[DRBD-cvs] r1947 - branches/drbd-0.7/drbd

drbd-cvs at lists.linbit.com drbd-cvs at lists.linbit.com
Wed Sep 7 19:05:16 CEST 2005


Author: lars
Date: 2005-09-07 19:05:16 +0200 (Wed, 07 Sep 2005)
New Revision: 1947

Modified:
   branches/drbd-0.7/drbd/drbd_fs.c
Log:

only use "degraded wait for connection timeout" (degr-wfc-timeout) when
  * we are currently not primary
  * meta data flags indicate we have been primary before
    (so we know we are just recovering from a crash)
  * and meta data flags indicate we did not have a peer before the crash,
    so it is likely that we won't see it now, either.

which translates to: only if we are recovering from a crash of an active
degraded cluster, which was the original intention for this, anyways.



Modified: branches/drbd-0.7/drbd/drbd_fs.c
===================================================================
--- branches/drbd-0.7/drbd/drbd_fs.c	2005-09-06 13:12:34 UTC (rev 1946)
+++ branches/drbd-0.7/drbd/drbd_fs.c	2005-09-07 17:05:16 UTC (rev 1947)
@@ -893,13 +893,27 @@
 	if(copy_from_user(&p,arg,sizeof(p))) {
 		return -EFAULT;
 	}
-
-	if( drbd_md_test_flag(mdev,MDF_ConnectedInd) ) {
+	/* If I am currently not Primary,
+	 * but meta data primary indicator is set,
+	 * I just now recover from a hard crash,
+	 * and have been Primary before that crash.
+	 *
+	 * Now, if I had no connection before that crash
+	 * (have been degraded Primary), chances are that
+	 * I won't find my peer now either.
+	 *
+	 * In that case, and _only_ in that case,
+	 * we use the degr-wfc-timeout instead of the default,
+	 * so we can automatically recover from a crash of a
+	 * degraded but active "cluster" after a certain timeout.
+	 */
+	if ( mdev->state != Primary &&
+	     drbd_md_test_flag(mdev,MDF_PrimaryInd) &&
+	    !drbd_md_test_flag(mdev,MDF_ConnectedInd) ) {
+		time=p.degr_wfc_timeout;
+		if (time) WARN("using degr_wfc_timeout=%ld seconds\n", time);
+	} else {
 		time=p.wfc_timeout;
-		//ERR("using wfc_timeout.\n");
-	} else {
-		time=p.degr_wfc_timeout;
-		//ERR("using degr_wfc_timeout.\n");
 	}
 
 	time=time*HZ;
@@ -1147,10 +1161,26 @@
 		} else {
 			clear_bit(ON_PRI_INC_HUMAN,&mdev->flags);
 			clear_bit(ON_PRI_INC_TIMEOUTEX,&mdev->flags);
+			if (arg == 0) break;
 
-			if (arg & Human ) 
+			// XXX reduce race: don't set it,
+			// if we have a connection.
+			// this does not avoid the race completely, though.
+			if (mdev->cstate > WFConnection) {
+				WARN("race avoidance: did not set "
+				     "the state flags (%s), cstate=%s\n",
+				        arg == (Human|TimeoutExpired)
+				     ?  "Human|TimeoutExpired"
+				     : arg == Human
+				     ? "Human"
+				     : "TimeoutExpired",
+				     cstate_to_name(mdev->cstate));
+				break;
+			}
+
+			if (arg & Human)
 				set_bit(ON_PRI_INC_HUMAN,&mdev->flags);
-			if (arg & TimeoutExpired )
+			if (arg & TimeoutExpired)
 				set_bit(ON_PRI_INC_TIMEOUTEX,&mdev->flags);
 		}
 		break;



More information about the drbd-cvs mailing list