[DRBD-cvs] svn commit by phil - r2602 - in trunk: . drbd drbd/linux
scripts user - Did item 41 of the ROADMAP. Right resync after
a cluste
drbd-cvs at lists.linbit.com
drbd-cvs at lists.linbit.com
Sun Nov 19 13:54:52 CET 2006
Author: phil
Date: 2006-11-19 13:54:50 +0100 (Sun, 19 Nov 2006)
New Revision: 2602
Modified:
trunk/ROADMAP
trunk/drbd/drbd_int.h
trunk/drbd/drbd_main.c
trunk/drbd/drbd_receiver.c
trunk/drbd/linux/drbd.h
trunk/drbd/linux/drbd_limits.h
trunk/drbd/linux/drbd_nl.h
trunk/scripts/drbd.conf
trunk/user/drbdadm_scanner.fl
trunk/user/drbdsetup.c
Log:
Did item 41 of the ROADMAP.
Right resync after a cluster crash or common power failure.
Modified: trunk/ROADMAP
===================================================================
--- trunk/ROADMAP 2006-11-18 10:46:55 UTC (rev 2601)
+++ trunk/ROADMAP 2006-11-19 12:54:50 UTC (rev 2602)
@@ -65,7 +65,7 @@
"consensus" ... discard the version of the secondary if the outcome
of the "after-sb-0pri" algorithm would also destroy
the current secondary's data. Otherwise disconnect.
- "violently-as0p" Alsways take the decission of the "after-sb-0pri"
+ "violently-as0p" Always take the decission of the "after-sb-0pri"
algorithm. Even if that causes case an erratic change
of the primarie's view of the data.
This is only usefull if you use an 1node FS (i.e.
@@ -87,7 +87,7 @@
In case both nodes are primary you control DRBD's strategy by
the "after-sb-2pri" option.
"disconnect" ... Go to StandAlone mode on both sides.
- "violently-as0p" Alsways take the decission of the "after-sb-0pri"
+ "violently-as0p" Always take the decission of the "after-sb-0pri"
algorithm. Even if that causes case an erratic change
of the primarie's view of the data.
This is only usefull if you use an 1node FS (i.e.
@@ -521,7 +521,11 @@
1. C=JC C=JC No Sync
2. C=JC C!=JC I am SyncTarget setting BM
3. C!=JC C=JC I am SyncSource setting BM
- 4. C = C No Sync
+ 4. C = C Common power [off|failure](Examine the roles at crash time)
+ 4.1 sec sec Common power off, no sync.
+ 4.2 pri sec Common power failure, I am SyncSource using BM
+ 4.3 sec pri Common power failure, I am SyncTarget using BM
+ 4.4 pri pri Common power failure, resync in arbitrary direction.
5. C = B I am SyncTarget using BM
6. C = H1|H2 I am SyncTarget setting BM
7. B = C I am SyncSource using BM
@@ -867,9 +871,30 @@
40 Do something with FLUSHBUFS ioctl.
-41 Fix DRBD's behaviour in case of a cluster crash when both nodes
- are in primary state.
+41 Fix DRBD's behaviour in case of a common power failuer and when
+ both nodes were in primary role.
+ See the the Algorithm of Item 16, section 4 to 4.4 .
+
+ Further we need to have the resync rolces conflict "rr-conflict"
+ strategy option with the following values:
+
+ The available options are:
+ "disconnect" ... No automatic resynchronisation, simply disconnect.
+ "violently" .... Sync to the primary node is allowed, violating the
+ assumption that data on a block device is stable
+ for one of the nodes. DANGEROUS, DO NOT USE.
+ "call-pri-lost"
+ Call this helper program on one of the machines.
+ This program is expected to halt or reboot the
+ machine.
+
+ An exception of course is a primary disk-less node that gets a disk
+ attached. Such a nodes becomes sync target, but since it does not
+ show a violently data change, this state transition is always allowed.
+
+ 99% DONE
+
42 Forward port the abilitiy to resume the TL after IO was frozen,
in case the connection is reestablished again.
Modified: trunk/drbd/drbd_int.h
===================================================================
--- trunk/drbd/drbd_int.h 2006-11-18 10:46:55 UTC (rev 2601)
+++ trunk/drbd/drbd_int.h 2006-11-19 12:54:50 UTC (rev 2602)
@@ -684,7 +684,9 @@
CLUSTER_ST_CHANGE, // Cluster wide state change going on...
CL_ST_CHG_SUCCESS,
CL_ST_CHG_FAIL,
- CRASHED_PRIMARY, // This node was a crashed primary
+ CRASHED_PRIMARY, // This node was a crashed primary. Gets
+ // cleared when the state.conn goes into
+ // Connected state.
WRITE_BM_AFTER_RESYNC // A kmalloc() during resync failed
};
Modified: trunk/drbd/drbd_main.c
===================================================================
--- trunk/drbd/drbd_main.c 2006-11-18 10:46:55 UTC (rev 2601)
+++ trunk/drbd/drbd_main.c 2006-11-19 12:54:50 UTC (rev 2602)
@@ -834,8 +834,7 @@
enum fencing_policy fp;
u32 mdf;
- if ( (os.role != Primary && ns.role == Primary) ||
- (os.conn != Connected && ns.conn == Connected) ) {
+ if ( (os.conn != Connected && ns.conn == Connected) ) {
clear_bit(CRASHED_PRIMARY, &mdev->flags);
}
@@ -1278,6 +1277,7 @@
{
Drbd_GenCnt_Packet p;
int i;
+ u64 uuid_flags = 0;
if(!inc_local_if_state(mdev,Negotiating)) return 1; // ok.
@@ -1289,7 +1289,9 @@
}
p.uuid[UUID_SIZE] = cpu_to_be64(drbd_bm_total_weight(mdev));
- p.uuid[UUID_FLAGS] = cpu_to_be64(mdev->net_conf->want_lose);
+ uuid_flags |= mdev->net_conf->want_lose ? 1 : 0;
+ uuid_flags |= test_bit(CRASHED_PRIMARY, &mdev->flags) ? 2 : 0;
+ p.uuid[UUID_FLAGS] = cpu_to_be64(uuid_flags);
dec_local(mdev);
Modified: trunk/drbd/drbd_receiver.c
===================================================================
--- trunk/drbd/drbd_receiver.c 2006-11-18 10:46:55 UTC (rev 2601)
+++ trunk/drbd/drbd_receiver.c 2006-11-19 12:54:50 UTC (rev 2602)
@@ -1737,7 +1737,7 @@
if( hg == -1 && mdev->state.role==Secondary) rv=hg;
if( hg == 1 && mdev->state.role==Primary) rv=hg;
break;
- case ViolentlyAS0Pri:
+ case Violently:
rv = drbd_asb_recover_0p(mdev);
break;
case DiscardSecondary:
@@ -1775,7 +1775,7 @@
case DiscardSecondary:
ERR("Configuration error.\n");
break;
- case ViolentlyAS0Pri:
+ case Violently:
rv = drbd_asb_recover_0p(mdev);
break;
case Disconnect:
@@ -1833,8 +1833,23 @@
if (self != UUID_JUST_CREATED &&
peer == UUID_JUST_CREATED) return 2;
- if (self == peer) return 0;
+ if (self == peer) { // Common power [off|failure]
+ int rct; // roles at crash time
+ rct = (test_bit(CRASHED_PRIMARY, &mdev->flags) ? 1 : 0) +
+ ( mdev->p_uuid[UUID_FLAGS] & 2 );
+ // lowest bit is set when we were primary
+ // next bit (weight 2) is set when peer was primary
+
+ switch(rct) {
+ case 0: /* !self_pri && !peer_pri */ return 0;
+ case 1: /* self_pri && !peer_pri */ return 1;
+ case 2: /* !self_pri && peer_pri */ return -1;
+ case 3: /* self_pri && peer_pri */
+ return test_bit(DISCARD_CONCURRENT,&mdev->flags) ? -1 : 1;
+ }
+ }
+
peer = mdev->p_uuid[Bitmap] & ~((u64)1);
if (self == peer) return -1;
@@ -1925,9 +1940,9 @@
}
}
- if (hg == 0) {
+ if (abs(hg) < 100) {
// This is needed in case someone does an invalidate on an
- // disconnected node.
+ // disconnected node. This has priority.
if(mydisk==Inconsistent && peer_disk>Inconsistent) hg=-1;
if(mydisk>Inconsistent && peer_disk==Inconsistent) hg= 1;
}
@@ -1952,8 +1967,21 @@
return conn_mask;
}
- /* The check "I shall become SyncTarget, but I am primary"
- got removed since, we now do live attachs on primary nodes! */
+ if (hg < 0 && // by intention we do not use mydisk here.
+ mdev->state.role == Primary && mdev->state.disk >= Consistent ) {
+ switch(mdev->net_conf->rr_conflict) {
+ case CallHelper:
+ drbd_khelper(mdev,"pri-lost");
+ // fall through
+ case Disconnect:
+ ERR("I shall become SyncTarget, but I am primary!\n");
+ drbd_force_state(mdev,NS(conn,Disconnecting));
+ return conn_mask;
+ case Violently:
+ WARN("Becoming SyncTarget, violating the stable-data"
+ "assumption\n");
+ }
+ }
if (abs(hg) >= 2) {
drbd_md_set_flag(mdev,MDF_FullSync);
@@ -1976,17 +2004,9 @@
rv = WFBitMapT;
} else {
rv = Connected;
- drbd_bm_lock(mdev); // {
if(drbd_bm_total_weight(mdev)) {
- /* FIXME for two-primaries this is wrong and may lead
- * to diverging data sets! */
- INFO("No resync -> clearing bit map.\n");
- drbd_bm_clear_all(mdev);
- drbd_uuid_set_bm(mdev,0UL);
- if (unlikely(drbd_bm_write(mdev) < 0))
- return conn_mask;
+ INFO("No resync, but bits in bitmap!\n");
}
- drbd_bm_unlock(mdev); // }
}
return rv;
Modified: trunk/drbd/linux/drbd.h
===================================================================
--- trunk/drbd/linux/drbd.h 2006-11-18 10:46:55 UTC (rev 2601)
+++ trunk/drbd/linux/drbd.h 2006-11-19 12:54:50 UTC (rev 2602)
@@ -62,7 +62,7 @@
Consensus,
DiscardSecondary,
CallHelper,
- ViolentlyAS0Pri
+ Violently
};
/* KEEP the order, do not delete or insert!
Modified: trunk/drbd/linux/drbd_limits.h
===================================================================
--- trunk/drbd/linux/drbd_limits.h 2006-11-18 10:46:55 UTC (rev 2601)
+++ trunk/drbd/linux/drbd_limits.h 2006-11-19 12:54:50 UTC (rev 2602)
@@ -113,6 +113,7 @@
#define DRBD_AFTER_SB_0P_DEF Disconnect
#define DRBD_AFTER_SB_1P_DEF Disconnect
#define DRBD_AFTER_SB_2P_DEF Disconnect
+#define DRBD_RR_CONFLICT_DEF Disconnect
#undef RANGE
#endif
Modified: trunk/drbd/linux/drbd_nl.h
===================================================================
--- trunk/drbd/linux/drbd_nl.h 2006-11-18 10:46:55 UTC (rev 2601)
+++ trunk/drbd/linux/drbd_nl.h 2006-11-19 12:54:50 UTC (rev 2602)
@@ -46,6 +46,7 @@
INTEGER( 24, T_MAY_IGNORE, after_sb_0p)
INTEGER( 25, T_MAY_IGNORE, after_sb_1p)
INTEGER( 26, T_MAY_IGNORE, after_sb_2p)
+ INTEGER( 39, T_MAY_IGNORE, rr_conflict)
BIT( 27, T_MAY_IGNORE, want_lose)
BIT( 28, T_MAY_IGNORE, two_primaries)
)
Modified: trunk/scripts/drbd.conf
===================================================================
--- trunk/scripts/drbd.conf 2006-11-18 10:46:55 UTC (rev 2601)
+++ trunk/scripts/drbd.conf 2006-11-19 12:54:50 UTC (rev 2602)
@@ -299,6 +299,14 @@
# "consensus" ... discard the version of the secondary if the outcome
# of the "after-sb-0pri" algorithm would also destroy
# the current secondary's data. Otherwise disconnect.
+ # "violently-as0p" Always take the decission of the "after-sb-0pri"
+ # algorithm. Even if that causes case an erratic change
+ # of the primarie's view of the data.
+ # This is only usefull if you use an 1node FS (i.e.
+ # not OCFS2 or GFS) with the allow-two-primaries
+ # flag, _AND_ you really know what you are doing.
+ # This is DANGEROUS and MAY CRASH YOUR MACHINE if you
+ # have a FS mounted on the primary node.
# "discard-secondary"
# discard the version of the secondary.
# "panic-primary" Always honour the outcome of the "after-sb-0pri"
@@ -311,10 +319,22 @@
# In case both nodes are primary you control DRBD's strategy by
# the "after-sb-2pri" option.
# "disconnect" ... Go to StandAlone mode on both sides.
+ # "violently-as0p" Always take the decission of the "after-sb-0pri".
# "panic" ... Honor the outcome of the "after-sb-0pri" algorithm
# and panic the other node.
after-sb-2pri disconnect;
+ # To solve the cases when the outcome of the resync descissions is
+ # incompatible to the current role asignment in the cluster.
+ # "disconnect" ... No automatic resynchronisation, simply disconnect.
+ # "violently" .... Sync to the primary node is allowed, violating the
+ # assumption that data on a block device is stable
+ # for one of the nodes. DANGEROUS, DO NOT USE.
+ # "call-pri-lost" Call the "pri-lost" helper program on one of the
+ # machines. This program is expected to reboot the
+ # machine. (I.e. make it secondary.)
+ rr-conflict disconnect;
+
# DRBD-0.7's behaviour is equivalent to
# after-sb-0pri discard-younger-primary;
# after-sb-1pri consensus;
Modified: trunk/user/drbdadm_scanner.fl
===================================================================
--- trunk/user/drbdadm_scanner.fl 2006-11-18 10:46:55 UTC (rev 2601)
+++ trunk/user/drbdadm_scanner.fl 2006-11-19 12:54:50 UTC (rev 2602)
@@ -78,7 +78,8 @@
cram-hmac-alg { DP; CP; return TK_NET_OPTION; }
shared-secret { DP; CP; return TK_NET_OPTION; }
max-epoch-size { DP; CP; RC(MAX_EPOCH_SIZE); return TK_NET_OPTION;}
-after-sb-[012]pri { DP; CP; return TK_NET_OPTION; }
+after-sb-[012]pri { DP; CP; return TK_NET_OPTION; }
+rr-conflict { DP; CP; return TK_NET_OPTION; }
unplug-watermark { DP; CP; return TK_NET_OPTION; }
allow-two-primaries { DP; CP; return TK_NET_SWITCH; }
rate { DP; CP; RC(RATE); return TK_SYNCER_OPTION; }
@@ -88,6 +89,7 @@
degr-wfc-timeout { DP; CP; RC(DEGR_WFC_TIMEOUT); return TK_STARTUP_OPTION;}
pri-on-incon-degr { DP; CP; return TK_HANDLER_OPTION; }
pri-lost-after-sb { DP; CP; return TK_HANDLER_OPTION; }
+pri-lost { DP; CP; return TK_HANDLER_OPTION; }
outdate-peer { DP; CP; return TK_HANDLER_OPTION; }
local-io-error { DP; CP; return TK_HANDLER_OPTION; }
{IPV4ADDR} { DP; CP; return TK_IPADDR; }
Modified: trunk/user/drbdsetup.c
===================================================================
--- trunk/user/drbdsetup.c 2006-11-18 10:46:55 UTC (rev 2601)
+++ trunk/user/drbdsetup.c 2006-11-19 12:54:50 UTC (rev 2602)
@@ -206,17 +206,23 @@
const char *asb1p_n[] = {
[Disconnect] = "disconnect",
[Consensus] = "consensus",
- [ViolentlyAS0Pri] = "violently-as0p",
+ [Violently] = "violently-as0p",
[DiscardSecondary] = "discard-secondary",
[CallHelper] = "call-pri-lost-after-sb"
};
const char *asb2p_n[] = {
[Disconnect] = "disconnect",
- [ViolentlyAS0Pri] = "violently-as0p",
+ [Violently] = "violently-as0p",
[CallHelper] = "call-pri-lost-after-sb"
};
+const char *rrcf_n[] = {
+ [Disconnect] = "disconnect",
+ [Violently] = "violently",
+ [CallHelper] = "call-pri-lost"
+};
+
struct option wait_cmds_options[] = {
{ "wfc-timeout",required_argument, 0, 't' },
{ "degr-wfc-timeout",required_argument,0,'d'},
@@ -283,6 +289,7 @@
{ "after-sb-0pri",'A', T_after_sb_0p,EH(asb0p_n,AFTER_SB_0P) },
{ "after-sb-1pri",'B', T_after_sb_1p,EH(asb1p_n,AFTER_SB_1P) },
{ "after-sb-2pri",'C', T_after_sb_2p,EH(asb2p_n,AFTER_SB_2P) },
+ { "rr-conflict",'R', T_rr_conflict,EH(rrcf_n,RR_CONFLICT) },
{ "discard-my-data",'D', T_want_lose, EB },
CLOSE_OPTIONS }} }, },
More information about the drbd-cvs
mailing list