[DRBD-cvs] svn commit by simon - r2431 - in trunk: drbd testing/testsuite - Simon's final few changes for the 1st phase of panic re

drbd-cvs at lists.linbit.com drbd-cvs at lists.linbit.com
Tue Sep 19 15:14:45 CEST 2006


Author: simon
Date: 2006-09-19 15:14:43 +0200 (Tue, 19 Sep 2006)
New Revision: 2431

Modified:
   trunk/drbd/drbd_bitmap.c
   trunk/drbd/drbd_main.c
   trunk/drbd/drbd_nl.c
   trunk/drbd/drbd_receiver.c
   trunk/testing/testsuite/testsuite.pl
Log:
Simon's final few changes for the 1st phase of panic removal, including:

1. Updated tests - I implemented a new set of fault insertion tests in
   testing/testsuite. NOTE: DRBD does NOT pass these tests yet -
   however, one of the main problems I am having is with the test tool
   itself - it keeps on timing out communicating with the agent script
   for no apparent reason. These tests are worth running though - they
   seem to stress the failure handling quite nicely!
   
2. Need to unlock the bitmap if a failure occurs in drbd_nl_disk_conf
   after the point where the lock is established.
      
3. There are some cases where drbd_bm_read can be called with a
   zero-size bitmap which leads to a crash -- test for a NULL b->bm.
	 
4. drbd_io_error() needs to call drbd_md_sync - I had removed this but
   there are a couple of issues - first of all, the error being
   reported might not stop the bitmap from being written and secondly
   drbd_md_sync has the side effect of canceling the md-sync timer
   which we want in this case.
	 
5. Minor changes in trace code


Modified: trunk/drbd/drbd_bitmap.c
===================================================================
--- trunk/drbd/drbd_bitmap.c	2006-09-18 12:22:59 UTC (rev 2430)
+++ trunk/drbd/drbd_bitmap.c	2006-09-19 13:14:43 UTC (rev 2431)
@@ -806,11 +806,15 @@
 int drbd_bm_read(struct Drbd_Conf *mdev)
 {
 	struct drbd_bitmap *b = mdev->bitmap;
+	int err=0;
 
-	int err = drbd_bm_rw(mdev, READ);
+	if (b->bm) {
+	    // bitmap size > 0
+	    err = drbd_bm_rw(mdev, READ);
 
-	if (err == 0)
-	    b->bm[b->bm_words] = DRBD_MAGIC;
+	    if (err == 0)
+		b->bm[b->bm_words] = DRBD_MAGIC;
+	}
 
 	return err;
 }

Modified: trunk/drbd/drbd_main.c
===================================================================
--- trunk/drbd/drbd_main.c	2006-09-18 12:22:59 UTC (rev 2430)
+++ trunk/drbd/drbd_main.c	2006-09-19 13:14:43 UTC (rev 2431)
@@ -378,18 +378,12 @@
 	if (ok) WARN("Notified peer that my disk is broken.\n");
 	else ERR("Sending state in drbd_io_error() failed\n");
 
-#if 0
-// warning SPG
-// This code seems wrong -- we only get here if we are set to
-// detach in which case we have no local disk, so there's no
-// point asserting that a full sync is needed.
-// Flushing the meta data is probably also wrong -- we want
-// this node to appear out of date so we should deliberately
-// NOT update the meta data with the latest epoch info!
-	D_ASSERT(drbd_md_test_flag(mdev->bc,MDF_FullSync));
-	D_ASSERT(!drbd_md_test_flag(mdev->bc,MDF_Consistent));
+	// Make sure we try to flush meta-data to disk - we come
+	// in here because of a local disk error so it might fail
+	// but we still need to try -- both because the error might
+	// be in the data portion of the disk and because we need
+	// to ensure the md-sync-timer is stopped if running.
 	drbd_md_sync(mdev);
-#endif
 
 	/* Releasing the backing device is done in after_state_ch() */
 
@@ -2903,10 +2897,10 @@
 
 	case ReportUUIDs:
 		INFOP("%s Curr:%016llX, Bitmap:%016llX, HisSt:%016llX, HisEnd:%016llX\n", cmdname(cmd),
-		      p->GenCnt.uuid[Current],
-		      p->GenCnt.uuid[Bitmap],
-		      p->GenCnt.uuid[History_start],
-		      p->GenCnt.uuid[History_end]);
+		      be64_to_cpu(p->GenCnt.uuid[Current]),
+		      be64_to_cpu(p->GenCnt.uuid[Bitmap]),
+		      be64_to_cpu(p->GenCnt.uuid[History_start]),
+		      be64_to_cpu(p->GenCnt.uuid[History_end]));
 		break;
 		      
 	case ReportSizes:

Modified: trunk/drbd/drbd_nl.c
===================================================================
--- trunk/drbd/drbd_nl.c	2006-09-18 12:22:59 UTC (rev 2430)
+++ trunk/drbd/drbd_nl.c	2006-09-19 13:14:43 UTC (rev 2431)
@@ -883,6 +883,8 @@
 	return 0;
 
  release_bdev3_fail:
+	drbd_bm_unlock(mdev);
+
 	/* The following will be freed by state change below */
 	nbc = NULL; 
 	resync_lru = NULL;

Modified: trunk/drbd/drbd_receiver.c
===================================================================
--- trunk/drbd/drbd_receiver.c	2006-09-18 12:22:59 UTC (rev 2430)
+++ trunk/drbd/drbd_receiver.c	2006-09-19 13:14:43 UTC (rev 2431)
@@ -1668,7 +1668,7 @@
 
 STATIC void drbd_uuid_dump(drbd_dev *mdev,char* text,u64* uuid)
 {
-	WARN("%s %016llX:%016llX:%016llX:%016llX\n",
+	INFO("%s %016llX:%016llX:%016llX:%016llX\n",
 	     text,
 	     uuid[Current],
 	     uuid[Bitmap],
@@ -1748,13 +1748,13 @@
 	int hg;
 	drbd_conns_t rv = conn_mask;
 
-
+	//INFO("drbd_sync_handshake:\n");
 	//drbd_uuid_dump(mdev,"self",mdev->bc->md.uuid);
 	//drbd_uuid_dump(mdev,"peer",mdev->p_uuid);
 
 	hg = drbd_uuid_compare(mdev);
 
-	//WARN("uuid_compare()=%d\n",hg);
+	//INFO("uuid_compare()=%d\n",hg);
 
 	if (hg == 100) {
 		int pcount = (mdev->state.role==Primary) + (peer_role==Primary);

Modified: trunk/testing/testsuite/testsuite.pl
===================================================================
--- trunk/testing/testsuite/testsuite.pl	2006-09-18 12:22:59 UTC (rev 2430)
+++ trunk/testing/testsuite/testsuite.pl	2006-09-19 13:14:43 UTC (rev 2431)
@@ -83,6 +83,16 @@
 #FILESYSTEM:
 $commands{'fs_make'} = 'mkfs.{filesystem} /dev/{device}'; #FIXME FileSystem - agent.conf!!'
 
+#FAULTS
+$commands{'set_fr'} = 'echo 10 >/sys/module/drbd/parameters/fault_rate';
+$commands{'clr_fr'} = 'echo 0 >/sys/module/drbd/parameters/fault_rate; echo 0 >/sys/module/drbd/parameters/enable_faults';
+$commands{'set_md_wr'} = 'echo 1 >/sys/module/drbd/parameters/enable_faults';
+$commands{'set_md_rd'} = 'echo 2 >/sys/module/drbd/parameters/enable_faults';
+$commands{'set_rs_wr'} = 'echo 4 >/sys/module/drbd/parameters/enable_faults';
+$commands{'set_rs_rd'} = 'echo 8 >/sys/module/drbd/parameters/enable_faults';
+$commands{'set_dt_wr'} = 'echo 16 >/sys/module/drbd/parameters/enable_faults';
+$commands{'set_dt_rd'} = 'echo 32 >/sys/module/drbd/parameters/enable_faults';
+
 ###############################################################################
 
 require 'getopts.pl';
@@ -154,10 +164,10 @@
     elsif ($section == 4) {
       push @seqcommands, $_;
       if (/{/) {
-        $seqsection = 1;
+        $seqsection += 1;
       }
       if (/}/) {
-        $seqsection = 0;
+        $seqsection -= 1;
       }
     }
     elsif ($section == 1 or $section == 2) {
@@ -210,7 +220,7 @@
       $section = 4;
     }
     else {
-      ERROR ("unknown configuration");
+      ERROR ("unknown configuration: ".$_);
     }
   }
 }
@@ -335,7 +345,6 @@
   return $reply;
 }
 
-
 ###############################################################################
 ######  functions
 ###############################################################################
@@ -603,7 +612,7 @@
     my $yday;
 
     print LOGFILE "--------- TestSuite --------\n";
-    foreach(sort(@logList)) {
+    foreach(@logList) {
       ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday) = (localtime($$_[0]));
       printf LOGFILE "%s %02d %02d:%02d:%02d ", $mnames[$mon], $mday, $hour, $min, $sec;
       print LOGFILE $$_[1]."\n";    
@@ -674,6 +683,7 @@
   
   while(($key, $value) = each(%commands)) {
     $seqcommands_eval =~ s/cmd $key/cmd '$value'/g;
+    $seqcommands_eval =~ s/get $key/get '$value'/g;
   }
   
   set_default_vars();
@@ -757,6 +767,19 @@
 }
 
 
+# print info messages
+sub INFO {
+  my ($msg) = @_;
+  
+ if (defined($opt_l)) {
+       LOG($msg);
+  }
+
+  print $msg. "\n";
+
+  return;
+}
+
 # print warn messages
 sub WARN {
   my ($msg) = @_;



More information about the drbd-cvs mailing list