[DRBD-cvs] drbd by phil; Still hunting bugs .... Unfortunately I ...

drbd-user@lists.linbit.com drbd-user@lists.linbit.com
Fri, 16 Apr 2004 14:19:00 +0200 (CEST)


DRBD CVS committal

Author  : phil
Module  : drbd

Dir     : drbd/drbd


Modified Files:
      Tag: rel-0_7-branch
	drbd_actlog.c drbd_dsender.c drbd_int.h drbd_main.c 


Log Message:
Still hunting bugs .... Unfortunately I can not reproduce this one
any more, although this should fix the 
"lc_get() failed! Probabely something stays"
" dirty in the on disk BM. (resync LRU too small) "
issue.


===================================================================
RCS file: /var/lib/cvs/drbd/drbd/drbd/Attic/drbd_actlog.c,v
retrieving revision 1.1.2.83
retrieving revision 1.1.2.84
diff -u -3 -r1.1.2.83 -r1.1.2.84
--- drbd_actlog.c	9 Apr 2004 06:53:43 -0000	1.1.2.83
+++ drbd_actlog.c	16 Apr 2004 12:18:55 -0000	1.1.2.84
@@ -616,14 +616,14 @@
 			// since drbd_rs_begin_io() pulled it already in.
 			ext->rs_left = bm_count_sectors(mdev->mbds_id,enr);
 			lc_changed(mdev->resync,&ext->lce);
-			// wake_up(&mdev->al_wait);
 		}
 		lc_put(mdev->resync,&ext->lce);
-		// if (!lc_put(mdev->resync,&ext->lce))
-		//	wake_up(&mdev->al_wait);
 	} else {
 		ERR("lc_get() failed! Probabely something stays"
 		    " dirty in the on disk BM. (resync LRU too small) \n");
+		ERR("resync_locked=%d nr_elements=%d\n",
+		    atomic_read(&mdev->resync_locked),
+		    mdev->resync->nr_elements);
 	}
 
 	list_for_each_safe(le,tmp,&mdev->resync->lru) {
@@ -639,12 +639,11 @@
 			udw->w.cb = w_update_odbm;
 			drbd_queue_work(mdev,&mdev->data.work,&udw->w);
 			lc_del(mdev->resync,&ext->lce);
-			// wake_up(&mdev->al_wait);
 		}
 	}
 
 	spin_unlock_irqrestore(&mdev->al_lock,flags);
-	// just wake_up unconditional now.
+	// just wake_up unconditional now. [various lc_chaged(), lc_put() here]
 	wake_up(&mdev->al_wait);
 }
 
@@ -679,6 +678,11 @@
 	struct bm_extent  *bm_ext;
 	unsigned long     rs_flags;
 
+	if(atomic_read(&mdev->resync_locked) > mdev->resync->nr_elements-3 ) {
+		ERR("bme_get() does not lock all elements\n");
+		return 0;
+	}
+
 	spin_lock_irq(&mdev->al_lock);
 	bm_ext = (struct bm_extent*) lc_get(mdev->resync,enr);
 	if (bm_ext) {
@@ -744,6 +748,8 @@
 
 	if(test_bit(BME_LOCKED,&bm_ext->flags)) return;
 
+	atomic_inc(&mdev->resync_locked);
+
 	for(i=0;i<SM;i++) {
 		wait_event(mdev->al_wait, !_is_in_al(mdev,enr*SM+i) );
 	}
@@ -767,6 +773,7 @@
 	if( lc_put(mdev->resync,(struct lc_element *)bm_ext) == 0 ) {
 		clear_bit(BME_LOCKED,&bm_ext->flags);
 		clear_bit(BME_NO_WRITES,&bm_ext->flags);
+		atomic_dec(&mdev->resync_locked);
 		wake_up(&mdev->al_wait);
 	}
 
===================================================================
RCS file: /var/lib/cvs/drbd/drbd/drbd/Attic/drbd_dsender.c,v
retrieving revision 1.1.2.90
retrieving revision 1.1.2.91
diff -u -3 -r1.1.2.90 -r1.1.2.91
--- drbd_dsender.c	6 Apr 2004 14:07:00 -0000	1.1.2.90
+++ drbd_dsender.c	16 Apr 2004 12:18:55 -0000	1.1.2.91
@@ -464,7 +464,7 @@
 
 		drbd_rs_begin_io(mdev,sector);
 		if(unlikely(!bm_get_bit(mdev->mbds_id,sector,BM_BLOCK_SIZE))) {
-			INFO("Block got synced while in drbd_rs_begin_io()\n");
+		      //INFO("Block got synced while in drbd_rs_begin_io()\n");
 			drbd_rs_complete_io(mdev,sector);
 			goto next_sector;
 		}
===================================================================
RCS file: /var/lib/cvs/drbd/drbd/drbd/drbd_int.h,v
retrieving revision 1.58.2.143
retrieving revision 1.58.2.144
diff -u -3 -r1.58.2.143 -r1.58.2.144
--- drbd_int.h	6 Apr 2004 14:07:00 -0000	1.58.2.143
+++ drbd_int.h	16 Apr 2004 12:18:55 -0000	1.58.2.144
@@ -671,6 +671,7 @@
 	struct Drbd_thread asender;
 	struct BitMap* mbds_id;
 	struct lru_cache* resync; // Used to track operations of resync...
+	atomic_t resync_locked;   // Number of locked elements in resync LRU
 	int open_cnt;
 	u32 gen_cnt[GEN_CNT_SIZE];
 	int epoch_size;
===================================================================
RCS file: /var/lib/cvs/drbd/drbd/drbd/drbd_main.c,v
retrieving revision 1.73.2.149
retrieving revision 1.73.2.150
diff -u -3 -r1.73.2.149 -r1.73.2.150
--- drbd_main.c	6 Apr 2004 14:07:00 -0000	1.73.2.149
+++ drbd_main.c	16 Apr 2004 12:18:55 -0000	1.73.2.150
@@ -1095,6 +1095,7 @@
 	atomic_set(&mdev->rs_pending_cnt,0);
 	atomic_set(&mdev->unacked_cnt,0);
 	atomic_set(&mdev->local_cnt,0);
+	atomic_set(&mdev->resync_locked,0);
 
 	init_MUTEX(&mdev->device_mutex);
 	init_MUTEX(&mdev->md_io_mutex);