[DRBD-cvs] r1947 - branches/drbd-0.7/drbd
drbd-cvs at lists.linbit.com
drbd-cvs at lists.linbit.com
Wed Sep 7 19:05:16 CEST 2005
Author: lars
Date: 2005-09-07 19:05:16 +0200 (Wed, 07 Sep 2005)
New Revision: 1947
Modified:
branches/drbd-0.7/drbd/drbd_fs.c
Log:
only use "degraded wait for connection timeout" (degr-wfc-timeout) when
* we are currently not primary
* meta data flags indicate we have been primary before
(so we know we are just recovering from a crash)
* and meta data flags indicate we did not have a peer before the crash,
so it is likely that we won't see it now, either.
which translates to: only if we are recovering from a crash of an active
degraded cluster, which was the original intention for this, anyways.
Modified: branches/drbd-0.7/drbd/drbd_fs.c
===================================================================
--- branches/drbd-0.7/drbd/drbd_fs.c 2005-09-06 13:12:34 UTC (rev 1946)
+++ branches/drbd-0.7/drbd/drbd_fs.c 2005-09-07 17:05:16 UTC (rev 1947)
@@ -893,13 +893,27 @@
if(copy_from_user(&p,arg,sizeof(p))) {
return -EFAULT;
}
-
- if( drbd_md_test_flag(mdev,MDF_ConnectedInd) ) {
+ /* If I am currently not Primary,
+ * but meta data primary indicator is set,
+ * I just now recover from a hard crash,
+ * and have been Primary before that crash.
+ *
+ * Now, if I had no connection before that crash
+ * (have been degraded Primary), chances are that
+ * I won't find my peer now either.
+ *
+ * In that case, and _only_ in that case,
+ * we use the degr-wfc-timeout instead of the default,
+ * so we can automatically recover from a crash of a
+ * degraded but active "cluster" after a certain timeout.
+ */
+ if ( mdev->state != Primary &&
+ drbd_md_test_flag(mdev,MDF_PrimaryInd) &&
+ !drbd_md_test_flag(mdev,MDF_ConnectedInd) ) {
+ time=p.degr_wfc_timeout;
+ if (time) WARN("using degr_wfc_timeout=%ld seconds\n", time);
+ } else {
time=p.wfc_timeout;
- //ERR("using wfc_timeout.\n");
- } else {
- time=p.degr_wfc_timeout;
- //ERR("using degr_wfc_timeout.\n");
}
time=time*HZ;
@@ -1147,10 +1161,26 @@
} else {
clear_bit(ON_PRI_INC_HUMAN,&mdev->flags);
clear_bit(ON_PRI_INC_TIMEOUTEX,&mdev->flags);
+ if (arg == 0) break;
- if (arg & Human )
+ // XXX reduce race: don't set it,
+ // if we have a connection.
+ // this does not avoid the race completely, though.
+ if (mdev->cstate > WFConnection) {
+ WARN("race avoidance: did not set "
+ "the state flags (%s), cstate=%s\n",
+ arg == (Human|TimeoutExpired)
+ ? "Human|TimeoutExpired"
+ : arg == Human
+ ? "Human"
+ : "TimeoutExpired",
+ cstate_to_name(mdev->cstate));
+ break;
+ }
+
+ if (arg & Human)
set_bit(ON_PRI_INC_HUMAN,&mdev->flags);
- if (arg & TimeoutExpired )
+ if (arg & TimeoutExpired)
set_bit(ON_PRI_INC_TIMEOUTEX,&mdev->flags);
}
break;
More information about the drbd-cvs
mailing list