[DRBD-cvs] svn commit by simon - r2430 - trunk/drbd - By Simon, this adds support for better handling of bitm

drbd-cvs at lists.linbit.com drbd-cvs at lists.linbit.com
Mon Sep 18 14:23:01 CEST 2006


Author: simon
Date: 2006-09-18 14:22:59 +0200 (Mon, 18 Sep 2006)
New Revision: 2430

Modified:
   trunk/drbd/drbd_bitmap.c
   trunk/drbd/drbd_int.h
   trunk/drbd/drbd_main.c
   trunk/drbd/drbd_nl.c
   trunk/drbd/drbd_receiver.c
Log:
By Simon,

this adds support for better handling of bitmap I/O errors - although
these were handled at the lowest level, the higher levels did not
leading to bad-ish behavior.



Modified: trunk/drbd/drbd_bitmap.c
===================================================================
--- trunk/drbd/drbd_bitmap.c	2006-09-18 12:11:46 UTC (rev 2429)
+++ trunk/drbd/drbd_bitmap.c	2006-09-18 12:22:59 UTC (rev 2430)
@@ -736,13 +736,14 @@
 /* lel_to_cpu == cpu_to_lel */
 # define bm_lel_to_cpu(x) bm_cpu_to_lel(x)
 
-STATIC void drbd_bm_rw(struct Drbd_Conf *mdev, int rw)
+STATIC int drbd_bm_rw(struct Drbd_Conf *mdev, int rw)
 {
 	struct drbd_bitmap *b = mdev->bitmap;
 	/* sector_t sector; */
 	int bm_words, num_pages, i;
 	unsigned long now;
 	char ppb[10];
+	int err = 0;
 
 	MUST_BE_LOCKED();
 
@@ -774,12 +775,10 @@
 	     rw == READ ? "reading" : "writing", jiffies - now);
 
 	if (test_bit(BM_MD_IO_ERROR,&b->bm_flags)) {
-		/* FIXME correct handling of this.
-		 * detach?
-		 */
 		ALERT("we had at least one MD IO ERROR during bitmap IO\n");
 		drbd_chk_io_error(mdev, 1, TRUE);
 		drbd_io_error(mdev, TRUE);
+		err = -EIO;
 	}
 
 	now = jiffies;
@@ -800,15 +799,20 @@
 
 	INFO("%s marked out-of-sync by on disk bit-map.\n",
 	     ppsize(ppb,drbd_bm_total_weight(mdev) << (BM_BLOCK_SIZE_B-10)) );
+
+	return err;
 }
 
-void drbd_bm_read(struct Drbd_Conf *mdev)
+int drbd_bm_read(struct Drbd_Conf *mdev)
 {
 	struct drbd_bitmap *b = mdev->bitmap;
 
-	drbd_bm_rw(mdev, READ);
+	int err = drbd_bm_rw(mdev, READ);
 
-	b->bm[b->bm_words] = DRBD_MAGIC;
+	if (err == 0)
+	    b->bm[b->bm_words] = DRBD_MAGIC;
+
+	return err;
 }
 
 /**
@@ -857,12 +861,14 @@
 /**
  * drbd_bm_write: Write the whole bitmap to its on disk location.
  */
-void drbd_bm_write(struct Drbd_Conf *mdev)
+int drbd_bm_write(struct Drbd_Conf *mdev)
 {
-	drbd_bm_rw(mdev, WRITE);
+	int err = drbd_bm_rw(mdev, WRITE);
 
 	INFO("%lu KB now marked out-of-sync by on disk bit-map.\n",
 	      drbd_bm_total_weight(mdev) << (BM_BLOCK_SIZE_B-10) );
+
+	return err;
 }
 
 /* clear all bits in the bitmap */

Modified: trunk/drbd/drbd_int.h
===================================================================
--- trunk/drbd/drbd_int.h	2006-09-18 12:11:46 UTC (rev 2429)
+++ trunk/drbd/drbd_int.h	2006-09-18 12:22:59 UTC (rev 2430)
@@ -1155,8 +1155,8 @@
 extern int  drbd_bm_e_weight  (drbd_dev *mdev, unsigned long enr);
 extern int  drbd_bm_read_sect (drbd_dev *mdev, unsigned long enr);
 extern int  drbd_bm_write_sect(drbd_dev *mdev, unsigned long enr);
-extern void drbd_bm_read      (drbd_dev *mdev);
-extern void drbd_bm_write     (drbd_dev *mdev);
+extern int  drbd_bm_read      (drbd_dev *mdev);
+extern int  drbd_bm_write     (drbd_dev *mdev);
 extern unsigned long drbd_bm_ALe_set_all (drbd_dev *mdev, unsigned long al_enr);
 extern size_t        drbd_bm_words       (drbd_dev *mdev);
 extern sector_t      drbd_bm_capacity    (drbd_dev *mdev);

Modified: trunk/drbd/drbd_main.c
===================================================================
--- trunk/drbd/drbd_main.c	2006-09-18 12:11:46 UTC (rev 2429)
+++ trunk/drbd/drbd_main.c	2006-09-18 12:22:59 UTC (rev 2430)
@@ -494,7 +494,9 @@
 		drbd_state_lock(mdev);
 		if( !drbd_send_state_req(mdev,mask,val) ) {
 			drbd_state_unlock(mdev);
-			return SS_CW_FailedByPeer;
+			rv = SS_CW_FailedByPeer;
+			if( f & ChgStateVerbose ) print_st_err(mdev,os,ns,rv);
+			return rv;
 		}
 
 		wait_event(mdev->cstate_wait,(rv=_req_st_cond(mdev,mask,val)));
@@ -724,7 +726,7 @@
 			   this happen...*/
 
 			if( pre_state_checks(mdev,os) == rv ) {
-				ERR("State change from bad state. "
+				ERR("Forcing state change from bad state. "
 				    "Error would be: '%s'\n", 
 				    set_st_err_name(rv));
 				print_st(mdev,"old",os);
@@ -2899,6 +2901,14 @@
 		INFOP("%s (barrier %u)\n", cmdname(cmd), p->Barrier.barrier);
 		break;
 
+	case ReportUUIDs:
+		INFOP("%s Curr:%016llX, Bitmap:%016llX, HisSt:%016llX, HisEnd:%016llX\n", cmdname(cmd),
+		      p->GenCnt.uuid[Current],
+		      p->GenCnt.uuid[Bitmap],
+		      p->GenCnt.uuid[History_start],
+		      p->GenCnt.uuid[History_end]);
+		break;
+		      
 	case ReportSizes:
 		INFOP("%s (d %lluMiB, u %lluMiB, c %lldMiB, max bio %x, q order %x)\n", cmdname(cmd), 
 		     (long long)(be64_to_cpu(p->Sizes.d_size)>>(20-9)),

Modified: trunk/drbd/drbd_nl.c
===================================================================
--- trunk/drbd/drbd_nl.c	2006-09-18 12:11:46 UTC (rev 2429)
+++ trunk/drbd/drbd_nl.c	2006-09-18 12:22:59 UTC (rev 2430)
@@ -442,7 +442,7 @@
 	if ( la_size_changed || md_moved ) {
 		if( inc_local_if_state(mdev,Attaching) ) {
 			drbd_al_shrink(mdev); // All extents inactive.
-			drbd_bm_write(mdev);  // write bitmap
+			rv = drbd_bm_write(mdev);  // write bitmap
 			// Write mdev->la_size to on disk.
 			drbd_md_mark_dirty(mdev);
 			dec_local(mdev);
@@ -806,11 +806,16 @@
 	if (drbd_md_test_flag(mdev->bc,MDF_FullSync)) {
 		INFO("Assuming that all blocks are out of sync (aka FullSync)\n");
 		drbd_bm_set_all(mdev);
-		drbd_bm_write(mdev);
+		if (unlikely(drbd_bm_write(mdev) < 0)) {
+			retcode = MDIOError;
+			goto release_bdev3_fail;		
+		}
 		drbd_md_clear_flag(mdev,MDF_FullSync);
 	} else {
-		/* FIXME this still does not propagate io errors! */
-		drbd_bm_read(mdev);
+		if (unlikely(drbd_bm_read(mdev) < 0)) {
+			retcode = MDIOError;
+			goto release_bdev3_fail;		
+		}
 	}
 
 	if(test_bit(CRASHED_PRIMARY, &mdev->flags)) {
@@ -853,8 +858,8 @@
 	   this point, because drbd_request_state() modifies these
 	   flags. */
 
-	/* In case we are Connected postpony any desicion on the new disk
-	   state after the negotiatin phase. */
+	/* In case we are Connected postpone any desicion on the new disk
+	   state after the negotiation phase. */
 	if(mdev->state.conn == Connected) {
 		mdev->new_state_tmp.i = ns.i;
 		ns.i = os.i;
@@ -878,7 +883,10 @@
 	return 0;
 
  release_bdev3_fail:
-	nbc = NULL; /* will be freed by state change below */
+	/* The following will be freed by state change below */
+	nbc = NULL; 
+	resync_lru = NULL;
+
 	drbd_force_state(mdev,NS(disk,Diskless));
 	drbd_md_sync(mdev);
 	goto fail;

Modified: trunk/drbd/drbd_receiver.c
===================================================================
--- trunk/drbd/drbd_receiver.c	2006-09-18 12:11:46 UTC (rev 2429)
+++ trunk/drbd/drbd_receiver.c	2006-09-18 12:22:59 UTC (rev 2430)
@@ -1832,8 +1832,11 @@
 		drbd_md_sync(mdev);
 
 		drbd_bm_set_all(mdev);
-		drbd_bm_write(mdev);
 
+		if (unlikely(drbd_bm_write(mdev) < 0)) {
+			return conn_mask;
+		}
+
 		drbd_md_clear_flag(mdev,MDF_FullSync);
 		drbd_md_sync(mdev);
 	}
@@ -1849,7 +1852,8 @@
 		if(drbd_bm_total_weight(mdev)) {
 			INFO("No resync -> clearing bit map.\n");
 			drbd_bm_clear_all(mdev);
-			drbd_bm_write(mdev);
+			if (unlikely(drbd_bm_write(mdev) < 0))
+				return conn_mask;
 		}
 		drbd_bm_unlock(mdev); // }
 	}



More information about the drbd-cvs mailing list