[DRBD-cvs] svn commit by phil - r2270 - in trunk: . drbd drbd/linux
- Made the "drbdadm invalidate" and "invalidate_remote" c
drbd-cvs at lists.linbit.com
drbd-cvs at lists.linbit.com
Tue Jul 11 16:17:52 CEST 2006
Author: phil
Date: 2006-07-11 16:17:49 +0200 (Tue, 11 Jul 2006)
New Revision: 2270
Modified:
trunk/ROADMAP
trunk/drbd/drbd_fs.c
trunk/drbd/drbd_int.h
trunk/drbd/drbd_main.c
trunk/drbd/drbd_receiver.c
trunk/drbd/drbd_strings.c
trunk/drbd/drbd_worker.c
trunk/drbd/linux/drbd.h
Log:
Made the "drbdadm invalidate" and "invalidate_remote" commands
to work right in regard to:
* concurrent calls on both nodes
* calls an disconnected nodes
Modified: trunk/ROADMAP
===================================================================
--- trunk/ROADMAP 2006-07-10 16:09:49 UTC (rev 2269)
+++ trunk/ROADMAP 2006-07-11 14:17:49 UTC (rev 2270)
@@ -781,8 +781,9 @@
Evaluate if it is possible to use it for starting resync. (invalidate)
Evaluate it for the other cases...
- 60 % Is implemented. Changing the role to primary already uses this
- mechanism. Seems to work.
+ 80 % Is implemented. Changing the role to primary already uses this
+ mechanism. Starting resync with invalidate and invalidate_remote
+ now also uses this method.
34 Improve the initial hand-shake, to identify the sockets (and TCP-
links) by an initial message, and not only by the connection timming.
Modified: trunk/drbd/drbd_fs.c
===================================================================
--- trunk/drbd/drbd_fs.c 2006-07-10 16:09:49 UTC (rev 2269)
+++ trunk/drbd/drbd_fs.c 2006-07-11 14:17:49 UTC (rev 2270)
@@ -575,7 +575,7 @@
rv = _drbd_set_state(mdev, ns, ChgStateVerbose);
ns = mdev->state;
spin_unlock_irq(&mdev->req_lock);
- after_state_ch(mdev,os,ns);
+ after_state_ch(mdev,os,ns,ChgStateVerbose);
if(rv >= SS_Success ) {
drbd_thread_start(&mdev->worker);
@@ -1100,7 +1100,7 @@
lc_free(mdev->resync); mdev->resync = NULL;
lc_free(mdev->act_log); mdev->act_log = NULL;
- after_state_ch(mdev, os, ns);
+ after_state_ch(mdev, os, ns, ChgStateVerbose);
return 0;
}
@@ -1119,7 +1119,7 @@
}
ns = mdev->state;
spin_unlock_irq(&mdev->req_lock);
- after_state_ch(mdev,os,ns);
+ after_state_ch(mdev,os,ns, ChgStateVerbose);
if( r == SS_NothingToDo ) return 0;
if( r == -999 ) {
@@ -1173,7 +1173,7 @@
r = _drbd_set_state(mdev, _NS(conn,StandAlone), 0);
ns = mdev->state;
spin_unlock_irq(&mdev->req_lock);
- after_state_ch(mdev,os,ns);
+ after_state_ch(mdev,os,ns,0);
if ( r == SS_NothingToDo ) return 0;
if ( r == SS_PrimaryNOP ) {
@@ -1392,87 +1392,15 @@
goto out_unlocked;
case DRBD_IOCTL_INVALIDATE:
- /* TODO
- * differentiate between different error cases,
- * or report the current connection state and flags back
- * to userspace */
-
- /* disallow "invalidation" of local replica
- * when currently in primary state (would be a Bad Idea),
- * or during a running sync (won't make any sense) */
-
- /* PRE TODO disallow invalidate if we are primary */
- r = drbd_request_state(mdev,NS2(disk,Inconsistent,
- conn,WFBitMapT));
-
- if( r == SS_NothingToDo ) { break; }
- if( r < SS_Success ) {
- err = -EINPROGRESS;
- break;
- }
-
- /* avoid races with set_in_sync
- * for successfull mirrored writes
- */
- wait_event(mdev->cstate_wait,
- atomic_read(&mdev->ap_bio_cnt)==0);
-
- drbd_bm_lock(mdev); // racy...
-
- drbd_md_set_flag(mdev,MDF_FullSync);
- drbd_md_sync(mdev);
-
- drbd_bm_set_all(mdev);
- drbd_bm_write(mdev);
-
- drbd_md_clear_flag(mdev,MDF_FullSync);
- drbd_md_sync(mdev);
-
- if (drbd_send_short_cmd(mdev,BecomeSyncSource)) {
- int ok;
- ok = drbd_request_state(mdev,NS(conn,WFSyncUUID));
- D_ASSERT( ok == 1 );
- }
-
- drbd_bm_unlock(mdev);
-
+ r = drbd_request_state(mdev,NS2(conn,StartingSyncT,
+ disk,Inconsistent));
+ if ( r != SS_Success) err = -EINPROGRESS;
break;
case DRBD_IOCTL_INVALIDATE_REM:
-
- /* PRE TODO disallow invalidate if we peer is primary */
- /* remove EINVAL from error output... */
- r = drbd_request_state(mdev,NS2(pdsk,Inconsistent,
- conn,WFBitMapS));
-
- if( r == SS_NothingToDo ) { break; }
- if( r < SS_Success ) {
- err = -EINPROGRESS;
- break;
- }
-
- drbd_md_set_flag(mdev,MDF_FullSync);
- drbd_md_sync(mdev);
-
- /* avoid races with set_in_sync
- * for successfull mirrored writes
- */
- wait_event(mdev->cstate_wait,
- atomic_read(&mdev->ap_bio_cnt)==0);
-
- drbd_bm_lock(mdev); // racy...
-
- drbd_bm_set_all(mdev);
- drbd_bm_write(mdev);
-
- drbd_md_clear_flag(mdev,MDF_FullSync);
-
- drbd_send_uuids(mdev);
- drbd_send_short_cmd(mdev,BecomeSyncTarget);
- drbd_start_resync(mdev,SyncSource);
-
- drbd_bm_unlock(mdev);
-
+ r = drbd_request_state(mdev,NS2(conn,StartingSyncS,
+ pdsk,Inconsistent));
+ if ( r != SS_Success) err = -EINPROGRESS;
break;
case DRBD_IOCTL_OUTDATE_DISK:
Modified: trunk/drbd/drbd_int.h
===================================================================
--- trunk/drbd/drbd_int.h 2006-07-10 16:09:49 UTC (rev 2269)
+++ trunk/drbd/drbd_int.h 2006-07-11 14:17:49 UTC (rev 2270)
@@ -842,7 +842,8 @@
enum chg_state_flags);
extern int _drbd_set_state(drbd_dev*, drbd_state_t, enum chg_state_flags );
extern void print_st_err(drbd_dev*, drbd_state_t, drbd_state_t, int );
-extern void after_state_ch(drbd_dev* mdev, drbd_state_t os, drbd_state_t ns);
+extern void after_state_ch(drbd_dev* mdev, drbd_state_t os, drbd_state_t ns,
+ enum chg_state_flags);
extern void drbd_thread_start(struct Drbd_thread *thi);
extern void _drbd_thread_stop(struct Drbd_thread *thi, int restart, int wait);
extern void drbd_free_resources(drbd_dev *mdev);
Modified: trunk/drbd/drbd_main.c
===================================================================
--- trunk/drbd/drbd_main.c 2006-07-10 16:09:49 UTC (rev 2269)
+++ trunk/drbd/drbd_main.c 2006-07-11 14:17:49 UTC (rev 2270)
@@ -79,6 +79,7 @@
struct drbd_work w;
drbd_state_t os;
drbd_state_t ns;
+ enum chg_state_flags flags;
};
int drbdd_init(struct Drbd_thread*);
@@ -531,14 +532,14 @@
/**
* cl_wide_st_chg:
* Returns TRUE if this state change should be preformed as a cluster wide
- * transaction. Of courese it returns 0 as soon as the connection is lost.
+ * transaction. Of course it returns 0 as soon as the connection is lost.
*/
STATIC int cl_wide_st_chg(drbd_dev* mdev, drbd_state_t os, drbd_state_t ns)
{
- return ( ns.conn >= Connected &&
+ return ( os.conn >= Connected && ns.conn >= Connected &&
( ( os.role != Primary && ns.role == Primary ) ||
- // ( os.conn != SyncSource && ns.role == SyncSource ) ||
- // ( os.conn != SyncTarget && ns.role == SyncTarget ) ||
+ ( os.conn != StartingSyncT && ns.conn == StartingSyncT ) ||
+ ( os.conn != StartingSyncS && ns.conn == StartingSyncS ) ||
// ( os.disk != Diskless && ns.role == Diskless ) ||
// ( os.conn != TearDown && ns.conn == TearDown ) ||
0
@@ -558,7 +559,7 @@
rv = _drbd_set_state(mdev, ns, f);
ns = mdev->state;
spin_unlock_irqrestore(&mdev->req_lock,flags);
- after_state_ch(mdev,os,ns);
+ if (rv == SS_Success) after_state_ch(mdev,os,ns,f);
return rv;
}
@@ -624,7 +625,10 @@
}
drbd_state_lock(mdev);
- drbd_send_state_req(mdev,mask,val);
+ if( !drbd_send_state_req(mdev,mask,val) ) {
+ drbd_state_unlock(mdev);
+ return SS_CW_FailedByPeer;
+ }
wait_event(mdev->cstate_wait,(rv=_req_st_cond(mdev,mask,val)));
@@ -644,7 +648,7 @@
ns = mdev->state;
spin_unlock_irqrestore(&mdev->req_lock,flags);
- if (rv == SS_Success) after_state_ch(mdev,os,ns);
+ if (rv == SS_Success) after_state_ch(mdev,os,ns,f);
return rv;
}
@@ -748,6 +752,13 @@
os = mdev->state;
+ /* Early state sanitising. Dissalow the invalidate ioctl to connect */
+ if( (ns.conn == StartingSyncS || ns.conn == StartingSyncT) &&
+ os.conn < Connected ) {
+ ns.conn = os.conn;
+ ns.pdsk = os.pdsk;
+ }
+
if( ns.i == os.i ) return SS_NothingToDo;
fp = DontCare;
@@ -901,6 +912,7 @@
if(ascw) {
ascw->os = os;
ascw->ns = ns;
+ ascw->flags = flags;
ascw->w.cb = w_after_state_ch;
_drbd_queue_work_front(&mdev->data.work,&ascw->w);
} else {
@@ -916,13 +928,14 @@
struct after_state_chg_work* ascw;
ascw = (struct after_state_chg_work*) w;
- after_state_ch(mdev, ascw->os, ascw->ns);
+ after_state_ch(mdev, ascw->os, ascw->ns, ascw->flags);
kfree(ascw);
return 1;
}
-void after_state_ch(drbd_dev* mdev, drbd_state_t os, drbd_state_t ns)
+void after_state_ch(drbd_dev* mdev, drbd_state_t os, drbd_state_t ns,
+ enum chg_state_flags flags)
{
enum fencing_policy fp;
u32 mdf;
@@ -1021,6 +1034,57 @@
drbd_send_short_cmd(mdev,ResumeResync);
}
+ /* We are in the progress to start a full sync... */
+ if ( ( os.conn != StartingSyncT && ns.conn == StartingSyncT ) ||
+ ( os.conn != StartingSyncS && ns.conn == StartingSyncS ) ) {
+
+ /* avoid races with set_in_sync
+ * for successfull mirrored writes
+ */
+ wait_event(mdev->cstate_wait,
+ atomic_read(&mdev->ap_bio_cnt)==0);
+
+ drbd_bm_lock(mdev); // racy...
+
+ drbd_md_set_flag(mdev,MDF_FullSync);
+ drbd_md_sync(mdev);
+
+ drbd_bm_set_all(mdev);
+ drbd_bm_write(mdev);
+
+ drbd_md_clear_flag(mdev,MDF_FullSync);
+ drbd_md_sync(mdev);
+
+ drbd_bm_unlock(mdev);
+
+ if (ns.conn == StartingSyncT) {
+ spin_lock_irq(&mdev->req_lock);
+ _drbd_set_state(mdev,_NS(conn,WFSyncUUID),
+ ChgStateVerbose | ScheduleAfter );
+ spin_unlock_irq(&mdev->req_lock);
+ } else /* StartingSyncS */ {
+ drbd_start_resync(mdev,SyncSource);
+ }
+ }
+
+ /* We are invalidating our self... */
+ if ( os.conn < Connected && ns.conn < Connected &&
+ os.disk > Inconsistent && ns.disk == Inconsistent ) {
+ drbd_bm_lock(mdev); // racy...
+
+ drbd_md_set_flag(mdev,MDF_FullSync);
+ drbd_md_sync(mdev);
+
+ drbd_bm_set_all(mdev);
+ drbd_bm_write(mdev);
+
+ drbd_md_clear_flag(mdev,MDF_FullSync);
+ drbd_md_sync(mdev);
+
+ drbd_bm_unlock(mdev);
+ }
+
+
/* it feels better to have the module_put last ... */
if ( (os.disk > Diskless || os.conn > StandAlone) &&
ns.disk == Diskless && ns.conn == StandAlone ) {
Modified: trunk/drbd/drbd_receiver.c
===================================================================
--- trunk/drbd/drbd_receiver.c 2006-07-10 16:09:49 UTC (rev 2269)
+++ trunk/drbd/drbd_receiver.c 2006-07-11 14:17:49 UTC (rev 2270)
@@ -1696,7 +1696,8 @@
/* drbd_sync_handshake() returns the new conn state on success, or
conn_mask (-1) on failure.
*/
-STATIC drbd_conns_t drbd_sync_handshake(drbd_dev *mdev, drbd_role_t peer_role)
+STATIC drbd_conns_t drbd_sync_handshake(drbd_dev *mdev, drbd_role_t peer_role,
+ drbd_disks_t peer_disk)
{
int hg;
drbd_conns_t rv = conn_mask;
@@ -1734,6 +1735,13 @@
WARN("Split-Brain detected, manually solved.\n");
}
}
+
+ if (hg == 0) {
+ // This is needed in case someone does an invalidate on an
+ // disconnected node.
+ if(mdev->state.disk==Inconsistent && peer_disk>Inconsistent) hg=-1;
+ if(mdev->state.disk>Inconsistent && peer_disk==Inconsistent) hg= 1;
+ }
if (hg == -1000) {
ALERT("Unrelated data, dropping connection!\n");
@@ -1955,7 +1963,7 @@
drbd_bm_unlock(mdev); // }
if (mdev->p_uuid && mdev->state.conn <= Connected && inc_local(mdev)) {
- nconn=drbd_sync_handshake(mdev,mdev->state.peer);
+ nconn=drbd_sync_handshake(mdev,mdev->state.peer,mdev->state.pdsk);
dec_local(mdev);
if(nconn == conn_mask) return FALSE;
@@ -2018,15 +2026,10 @@
static drbd_conns_t c_tab[] = {
[Connected] = Connected,
- [SkippedSyncS] = SkippedSyncT,
- [SkippedSyncT] = SkippedSyncS,
- [WFBitMapS] = WFBitMapT,
- [WFBitMapT] = WFBitMapS,
- [WFSyncUUID] = SyncSource,
- [SyncSource] = SyncTarget,
- [SyncTarget] = WFSyncUUID,
- [PausedSyncS] = PausedSyncT,
- [PausedSyncT] = PausedSyncS,
+
+ [StartingSyncS] = StartingSyncT,
+ [StartingSyncT] = StartingSyncS,
+
[conn_mask] = conn_mask,
};
@@ -2087,7 +2090,7 @@
if (mdev->p_uuid && mdev->state.conn <= Connected &&
inc_md_only(mdev,Attaching) ) {
- nconn=drbd_sync_handshake(mdev,peer_state.role);
+ nconn=drbd_sync_handshake(mdev,peer_state.role,peer_state.disk);
dec_local(mdev);
if(nconn == conn_mask) return FALSE;
@@ -2117,7 +2120,7 @@
if(nconn == Connected && ns.disk == Attaching) ns.disk = UpToDate;
rv = _drbd_set_state(mdev,ns,ChgStateVerbose);
spin_unlock_irq(&mdev->req_lock);
- after_state_ch(mdev,os,ns);
+ after_state_ch(mdev,os,ns,ChgStateVerbose);
if(rv < SS_Success) {
drbd_force_state(mdev,NS(conn,StandAlone));
@@ -2137,8 +2140,11 @@
{
Drbd_SyncUUID_Packet *p = (Drbd_SyncUUID_Packet*)h;
- D_ASSERT( mdev->state.conn == WFSyncUUID );
+ wait_event( mdev->cstate_wait,
+ mdev->state.conn < Connected || mdev->state.conn == WFSyncUUID);
+ // D_ASSERT( mdev->state.conn == WFSyncUUID );
+
ERR_IF(h->length != (sizeof(*p)-sizeof(*h))) return FALSE;
if (drbd_recv(mdev, h->payload, h->length) != h->length)
return FALSE;
@@ -2270,32 +2276,6 @@
return (size == 0);
}
-STATIC int receive_BecomeSyncTarget(drbd_dev *mdev, Drbd_Header *h)
-{
- int ok;
-
- ERR_IF(!mdev->bitmap) return FALSE;
- D_ASSERT(mdev->p_uuid);
- drbd_bm_lock(mdev);
- drbd_bm_set_all(mdev);
- drbd_bm_write(mdev);
- ok = drbd_request_state(mdev,NS(conn,WFSyncUUID));
- D_ASSERT( ok == SS_Success );
- drbd_bm_unlock(mdev);
- return ok == SS_Success ? TRUE : FALSE;
-}
-
-STATIC int receive_BecomeSyncSource(drbd_dev *mdev, Drbd_Header *h)
-{
- drbd_send_uuids(mdev);
- drbd_bm_lock(mdev);
- drbd_bm_set_all(mdev);
- drbd_bm_write(mdev);
- drbd_start_resync(mdev,SyncSource);
- drbd_bm_unlock(mdev);
- return TRUE;
-}
-
STATIC int receive_pause_resync(drbd_dev *mdev, Drbd_Header *h)
{
drbd_resync_pause(mdev, PeerImposed);
@@ -2331,7 +2311,7 @@
}
ns = mdev->state;
spin_unlock_irq(&mdev->req_lock);
- after_state_ch(mdev,os,ns);
+ after_state_ch(mdev,os,ns,ChgStateVerbose);
if( r >= 0 ) {
drbd_md_sync(mdev);
@@ -2367,8 +2347,6 @@
[ReportBitMap] = receive_bitmap,
[Ping] = NULL, // via msock: got_Ping,
[PingAck] = NULL, // via msock: got_PingAck,
- [BecomeSyncTarget] = receive_BecomeSyncTarget,
- [BecomeSyncSource] = receive_BecomeSyncSource,
[UnplugRemote] = receive_UnplugRemote,
[DataRequest] = receive_DataRequest,
[RSDataRequest] = receive_DataRequest, //receive_RSDataRequest,
Modified: trunk/drbd/drbd_strings.c
===================================================================
--- trunk/drbd/drbd_strings.c 2006-07-10 16:09:49 UTC (rev 2269)
+++ trunk/drbd/drbd_strings.c 2006-07-11 14:17:49 UTC (rev 2270)
@@ -34,6 +34,8 @@
[Connected] = "Connected",
[SkippedSyncS] = "SkippedSyncS",
[SkippedSyncT] = "SkippedSyncT",
+ [StartingSyncS] = "StartingSyncS",
+ [StartingSyncT] = "StartingSyncT",
[WFBitMapS] = "WFBitMapS",
[WFBitMapT] = "WFBitMapT",
[WFSyncUUID] = "WFSyncUUID",
Modified: trunk/drbd/drbd_worker.c
===================================================================
--- trunk/drbd/drbd_worker.c 2006-07-10 16:09:49 UTC (rev 2269)
+++ trunk/drbd/drbd_worker.c 2006-07-11 14:17:49 UTC (rev 2270)
@@ -759,7 +759,7 @@
* @side: Either SyncSource or SyncTarget
* Start the resync process. Called from process context only,
* either ioctl or drbd_receiver.
- * Note, this function might bring your directly into one of the
+ * Note, this function might bring you directly into one of the
* PausedSync* states.
*/
void drbd_start_resync(drbd_dev *mdev, drbd_conns_t side)
@@ -809,7 +809,7 @@
drbd_global_unlock();
if ( r == SS_Success ) {
- after_state_ch(mdev,os,ns);
+ after_state_ch(mdev,os,ns,ChgStateVerbose);
INFO("Began resync as %s (will sync %lu KB [%lu bits set]).\n",
conns_to_name(ns.conn),
Modified: trunk/drbd/linux/drbd.h
===================================================================
--- trunk/drbd/linux/drbd.h 2006-07-10 16:09:49 UTC (rev 2269)
+++ trunk/drbd/linux/drbd.h 2006-07-11 14:17:49 UTC (rev 2270)
@@ -223,6 +223,8 @@
Connected, // we have introduced each other
SkippedSyncS, // we should have synced, but user said no
SkippedSyncT,
+ StartingSyncS, // starting full sync by IOCTL.
+ StartingSyncT, // stariing full sync by IOCTL.
WFBitMapS,
WFBitMapT,
WFSyncUUID,
More information about the drbd-cvs
mailing list