[DRBD-cvs] drbd by lars; fixed: missing inc_ap_pending in the ret...

drbd-user@lists.linbit.com drbd-user@lists.linbit.com
Mon, 17 May 2004 14:55:38 +0200 (CEST)


DRBD CVS committal

Author  : lars
Module  : drbd

Dir     : drbd/drbd


Modified Files:
      Tag: rel-0_7-branch
	drbd_actlog.c drbd_dsender.c drbd_fs.c drbd_main.c 
	drbd_receiver.c drbd_req-2.4.c 


Log Message:
fixed:
missing inc_ap_pending in the retry remote path,
missing clearbit PARTNER_DISKLESS in the disconnect path,
too strict assert in bm_set_bit

several FIXMEs and D_ASSERTs

===================================================================
RCS file: /var/lib/cvs/drbd/drbd/drbd/Attic/drbd_actlog.c,v
retrieving revision 1.1.2.100
retrieving revision 1.1.2.101
diff -u -3 -r1.1.2.100 -r1.1.2.101
--- drbd_actlog.c	11 May 2004 13:14:49 -0000	1.1.2.100
+++ drbd_actlog.c	17 May 2004 12:55:32 -0000	1.1.2.101
@@ -446,7 +446,7 @@
 	lc_unlock(mdev->act_log);
 	wake_up(&mdev->al_wait);
 
-	INFO("Marked additional %lu KB as out-of-sync based on AL.\n",add/2);
+	INFO("Marked additional %lu KB as out-of-sync based on AL.\n",(add+1)/2);
 
 	mdev->rs_total += add;
 }
===================================================================
RCS file: /var/lib/cvs/drbd/drbd/drbd/Attic/drbd_dsender.c,v
retrieving revision 1.1.2.107
retrieving revision 1.1.2.108
diff -u -3 -r1.1.2.107 -r1.1.2.108
--- drbd_dsender.c	12 May 2004 10:00:47 -0000	1.1.2.107
+++ drbd_dsender.c	17 May 2004 12:55:32 -0000	1.1.2.108
@@ -392,8 +392,14 @@
 
 	// FIXME: what if partner was SyncTarget, and is out of sync for
 	// this area ?? ... should be handled in the receiver.
+	inc_ap_pending(mdev);
 	ok = drbd_read_remote(mdev,req);
-	if(unlikely(!ok)) ERR("drbd_read_remote() failed\n");
+	if(unlikely(!ok)) {
+		ERR("drbd_read_remote() failed\n");
+		/* dec_ap_pending and bio_io_error are done in
+		 * drbd_fail_pending_reads
+		 */
+	}
 	return ok;
 }
 
===================================================================
RCS file: /var/lib/cvs/drbd/drbd/drbd/drbd_fs.c,v
retrieving revision 1.28.2.90
retrieving revision 1.28.2.91
diff -u -3 -r1.28.2.90 -r1.28.2.91
--- drbd_fs.c	16 May 2004 18:40:49 -0000	1.28.2.90
+++ drbd_fs.c	17 May 2004 12:55:32 -0000	1.28.2.91
@@ -320,7 +320,7 @@
 	q->max_hw_segments   = 1;
 	q->max_segment_size  = min((unsigned)PAGE_SIZE,b->max_segment_size);
 	q->hardsect_size     = max((unsigned short)512,b->hardsect_size);
-	q->seg_boundary_mask = b->seg_boundary_mask;
+	q->seg_boundary_mask = PAGE_SIZE-1;
 	D_ASSERT(q->hardsect_size <= PAGE_SIZE); // or we are really screwed ;-)
 })
 #undef min_not_zero
@@ -373,12 +373,16 @@
 		drbd_thread_start(&mdev->worker);
 		set_cstate(mdev,StandAlone);
 	}
+
+
+// FIXME why "else" ?? I think allways, and *before send_param!
+	clear_bit(DISKLESS,&mdev->flags);
+	smp_wmb();
+// FIXME explain:
+	clear_bit(MD_IO_ALLOWED,&mdev->flags);
+
 	if(mdev->cstate >= Connected ) {
 		drbd_send_param(mdev,1);
-	} else {
-		clear_bit(DISKLESS,&mdev->flags);
-		smp_wmb();
-		clear_bit(MD_IO_ALLOWED,&mdev->flags);
 	}
 
 	return 0;
@@ -848,6 +852,13 @@
 /* FIXME race with sync start
  */
 		if (mdev->cstate == Connected) drbd_send_param(mdev,0);
+/* FIXME
+ * if you detach while connected, you are *at least* inconsistent now,
+ * and should clear MDF_Consistent in metadata, and maybe even set the bitmap
+ * out of sync.
+ * since if you reattach, this might be a different lo dev, and then it needs
+ * to receive a sync!
+ */
 		if (mdev->cstate == StandAlone) {
 			set_cstate(mdev,Unconfigured);
 			drbd_mdev_cleanup(mdev);
===================================================================
RCS file: /var/lib/cvs/drbd/drbd/drbd/drbd_main.c,v
retrieving revision 1.73.2.173
retrieving revision 1.73.2.174
diff -u -3 -r1.73.2.173 -r1.73.2.174
--- drbd_main.c	16 May 2004 14:44:08 -0000	1.73.2.173
+++ drbd_main.c	17 May 2004 12:55:32 -0000	1.73.2.174
@@ -402,6 +402,7 @@
 	daemonize("drbd_thread");
 #else
 	daemonize();
+	// VERIFY what about blocking signals ?
 	reparent_to_init();
 #endif
 }
@@ -551,13 +552,13 @@
 	}
 }
 
-inline sigset_t block_sigs_but(unsigned long mask)
+inline sigset_t drbd_block_all_signals(void)
 {
 	unsigned long flags;
 	sigset_t oldset;
 	LOCK_SIGMASK(current,flags);
 	oldset = current->blocked;
-	siginitsetinv(&current->blocked,mask);
+	sigfillset(&current->blocked);
 	RECALC_SIGPENDING(current);
 	UNLOCK_SIGMASK(current,flags);
 	return oldset;
@@ -612,7 +613,7 @@
 	} else
 		down(&mdev->meta.mutex);
 
-	old_blocked = block_sigs_but(0);
+	old_blocked = drbd_block_all_signals();
 	ok = _drbd_send_cmd(mdev,sock,cmd,h,size,0);
 	restore_old_sigset(old_blocked);
 
@@ -874,14 +875,12 @@
 
 	/* Still called directly by drbd_make_request,
 	 * so all sorts of processes may end up here.
-	 * They may be interrupted by DRBD_SIGKILL in response to
-	 * ioctl or some other "connection loast" event.
-	 *
-	 * we also should replace all "LOCK(); sigemptyset(); UNLOCK();"
-	 * with flush_signals(); ...
+	 * They may be interrupted by DRBD_SIG in response to
+	 * ioctl or some other "connection lost" event.
+	 * This is not propagated.
 	 */
 
-	old_blocked = block_sigs_but(0);
+	old_blocked = drbd_block_all_signals();
 	down(&mdev->data.mutex);
 	spin_lock(&mdev->send_task_lock);
 	mdev->send_task=current;
@@ -932,7 +931,7 @@
 	 * This one may be interupted by DRBD_SIG and/or DRBD_SIGKILL
 	 * in response to ioctl or module unload.
 	 */
-	old_blocked = block_sigs_but(0);
+	old_blocked = drbd_block_all_signals();
 	down(&mdev->data.mutex);
 	spin_lock(&mdev->send_task_lock);
 	mdev->send_task=current;
@@ -1014,9 +1013,9 @@
 		}
 		D_ASSERT(rv != 0);
 		if (rv == -EINTR ) {
-			ERR("Got a signal in drbd_send(,%c,)!\n",
+			DBG("Got a signal in drbd_send(,%c,)!\n",
 			    sock == mdev->meta.socket ? 'm' : 's');
-			dump_stack();
+			// dump_stack();
 			drbd_flush_signals(current);
 			rv = 0;
 		}
@@ -1161,7 +1160,6 @@
 	mdev->state                = Secondary;
 	mdev->o_state              = Unknown;
 	mdev->cstate               = Unconfigured;
-
 }
 
 void drbd_init_set_defaults(drbd_dev *mdev)
@@ -1856,7 +1854,7 @@
 	int ret=0;
 	unsigned long flags;
 
-	if (size <= 0 || (size & 0x1ff) != 0 || size > PAGE_SIZE) {
+	if (size <= 0 || (size & 0x1ff) != 0 || ( size > PAGE_SIZE && size != AL_EXTENT_SIZE)) {
 		DUMPI(size);
 		return 0;
 	}
@@ -2231,7 +2229,7 @@
 	return 0;
 }
 
-#if DUMP_MD
+#ifdef DUMP_MD
 #define MeGC(x) mdev->gen_cnt[x]
 #define PeGC(x) be32_to_cpu(peer->gen_cnt[x])
 
===================================================================
RCS file: /var/lib/cvs/drbd/drbd/drbd/drbd_receiver.c,v
retrieving revision 1.97.2.155
retrieving revision 1.97.2.156
diff -u -3 -r1.97.2.155 -r1.97.2.156
--- drbd_receiver.c	16 May 2004 14:57:05 -0000	1.97.2.155
+++ drbd_receiver.c	17 May 2004 12:55:32 -0000	1.97.2.156
@@ -204,6 +204,10 @@
 	list_del(le);
 
 	page = drbd_bio_get_page(&e->private_bio);
+ONLY_IN_26(
+	D_ASSERT(page == e->ee_bvec.bv_page);
+	// page = e->ee_bvec.bv_page;
+)
 	kmem_cache_free(drbd_ee_cache, e);
 	mdev->ee_vacant--;
 
@@ -321,6 +325,10 @@
 	mdev->ee_vacant--;
 	mdev->ee_in_use++;
 	e=list_entry(le, struct Tl_epoch_entry, w.list);
+ONLY_IN_26(
+	D_ASSERT(e->private_bio.bi_idx == 0);
+	// drbd_ee_init(e,e->ee_bvec.bv_page); // reinitialize
+)
 	e->block_id = !ID_VACANT;
 	SET_MAGIC(e);
 	return e;
@@ -547,8 +555,10 @@
 			INFO("sock was shut down by peer\n");
 			break;
 		} else  {
-			/* signal came in after we read a partial message */
-			D_ASSERT(signal_pending(current));
+			/* signal came in, or peer/link went down,
+			 * after we read a partial message
+			 */
+			// D_ASSERT(signal_pending(current));
 			break;
 		}
 	};
@@ -1241,7 +1251,12 @@
 				drbd_send_bitmap(mdev);
 				set_cstate(mdev,WFBitMapS);
 			} else { // have_good == -1
-				if (mdev->state == Primary) {
+				if ( (mdev->state == Primary) &&
+				     (mdev->gen_cnt[Flags] & MDF_Consistent) ) {
+					/* FIXME
+					 * allow Primary become SyncTarget if it was diskless, and now had a storage reattached.
+					 * only somewhere the MDF_Consistent flag is set where it should not... I think.
+					 */
 					ERR("Current Primary shall become sync TARGET! Aborting to prevent data corruption.\n");
 					set_cstate(mdev,StandAlone);
 					drbd_thread_stop_nowait(&mdev->receiver);
@@ -1526,6 +1541,9 @@
 	clear_bit(ISSUE_BARRIER,&mdev->flags);
 	wait_event( mdev->cstate_wait, atomic_read(&mdev->ap_pending_cnt) == 0 );
 	D_ASSERT(mdev->oldest_barrier->n_req == 0);
+
+	// both
+	clear_bit(PARTNER_DISKLESS,&mdev->flags);
 
 	D_ASSERT(mdev->ee_in_use == 0);
 	D_ASSERT(list_empty(&mdev->read_ee)); // done by termination of worker
===================================================================
RCS file: /var/lib/cvs/drbd/drbd/drbd/drbd_req-2.4.c,v
retrieving revision 1.33.2.76
retrieving revision 1.33.2.77
diff -u -3 -r1.33.2.76 -r1.33.2.77
--- drbd_req-2.4.c	13 May 2004 10:01:58 -0000	1.33.2.76
+++ drbd_req-2.4.c	17 May 2004 12:55:32 -0000	1.33.2.77
@@ -316,6 +316,15 @@
 		return 0;
 	}
 
+	/*
+	 * what we "blindly" assume:
+	 */
+	D_ASSERT(bio->bi_size > 0);
+	D_ASSERT( (bio->bi_size & 0x1ff) == 0);
+	D_ASSERT(bio->bi_size <= PAGE_SIZE);
+	D_ASSERT(bio->bi_vcnt == 1);
+	D_ASSERT(bio->bi_idx == 0);
+
 	s_enr = bio->bi_sector >> (AL_EXTENT_SIZE_B-9);
 	e_enr = (bio->bi_sector+(bio->bi_size>>9)-1) >> (AL_EXTENT_SIZE_B-9);
 	D_ASSERT(e_enr >= s_enr);