[DRBD-cvs] svn commit by phil - r2270 - in trunk: . drbd drbd/linux - Made the "drbdadm invalidate" and "invalidate_remote" c

drbd-cvs at lists.linbit.com drbd-cvs at lists.linbit.com
Tue Jul 11 16:17:52 CEST 2006


Author: phil
Date: 2006-07-11 16:17:49 +0200 (Tue, 11 Jul 2006)
New Revision: 2270

Modified:
   trunk/ROADMAP
   trunk/drbd/drbd_fs.c
   trunk/drbd/drbd_int.h
   trunk/drbd/drbd_main.c
   trunk/drbd/drbd_receiver.c
   trunk/drbd/drbd_strings.c
   trunk/drbd/drbd_worker.c
   trunk/drbd/linux/drbd.h
Log:
Made the "drbdadm invalidate" and "invalidate_remote" commands
to work right in regard to:

* concurrent calls on both nodes
* calls an disconnected nodes


Modified: trunk/ROADMAP
===================================================================
--- trunk/ROADMAP	2006-07-10 16:09:49 UTC (rev 2269)
+++ trunk/ROADMAP	2006-07-11 14:17:49 UTC (rev 2270)
@@ -781,8 +781,9 @@
     Evaluate if it is possible to use it for starting resync. (invalidate)
     Evaluate it for the other cases...
 
-  60 % Is implemented. Changing the role to primary already uses this 
-       mechanism. Seems to work.
+  80 % Is implemented. Changing the role to primary already uses this 
+       mechanism. Starting resync with invalidate and invalidate_remote
+       now also uses this method.
 
 34 Improve the initial hand-shake, to identify the sockets (and TCP-
    links) by an initial message, and not only by the connection timming.

Modified: trunk/drbd/drbd_fs.c
===================================================================
--- trunk/drbd/drbd_fs.c	2006-07-10 16:09:49 UTC (rev 2269)
+++ trunk/drbd/drbd_fs.c	2006-07-11 14:17:49 UTC (rev 2270)
@@ -575,7 +575,7 @@
 		rv = _drbd_set_state(mdev, ns, ChgStateVerbose);
 		ns = mdev->state;
 		spin_unlock_irq(&mdev->req_lock);
-		after_state_ch(mdev,os,ns);
+		after_state_ch(mdev,os,ns,ChgStateVerbose);
 
 		if(rv >= SS_Success ) {
 			drbd_thread_start(&mdev->worker);
@@ -1100,7 +1100,7 @@
 	lc_free(mdev->resync);  mdev->resync = NULL;
 	lc_free(mdev->act_log); mdev->act_log = NULL;
 
-	after_state_ch(mdev, os, ns);
+	after_state_ch(mdev, os, ns, ChgStateVerbose);
 
 	return 0;
 }
@@ -1119,7 +1119,7 @@
 	}
 	ns = mdev->state;
 	spin_unlock_irq(&mdev->req_lock);
-	after_state_ch(mdev,os,ns);
+	after_state_ch(mdev,os,ns, ChgStateVerbose);
 
 	if( r == SS_NothingToDo ) return 0;
 	if( r == -999 ) {
@@ -1173,7 +1173,7 @@
 	r = _drbd_set_state(mdev, _NS(conn,StandAlone), 0);
 	ns = mdev->state;
 	spin_unlock_irq(&mdev->req_lock);
-	after_state_ch(mdev,os,ns);
+	after_state_ch(mdev,os,ns,0);
 
 	if ( r == SS_NothingToDo )  return 0;
 	if ( r == SS_PrimaryNOP ) {
@@ -1392,87 +1392,15 @@
 		goto out_unlocked;
 
 	case DRBD_IOCTL_INVALIDATE:
-		/* TODO
-		 * differentiate between different error cases,
-		 * or report the current connection state and flags back
-		 * to userspace */
-
-		/* disallow "invalidation" of local replica
-		 * when currently in primary state (would be a Bad Idea),
-		 * or during a running sync (won't make any sense) */
-
-		/* PRE TODO disallow invalidate if we are primary */
-		r = drbd_request_state(mdev,NS2(disk,Inconsistent,
-					        conn,WFBitMapT));
-
-		if( r == SS_NothingToDo ) { break; }
-		if( r < SS_Success ) {
-			err = -EINPROGRESS;
-			break;
-		}
-
-		/* avoid races with set_in_sync
-		 * for successfull mirrored writes
-		 */
-		wait_event(mdev->cstate_wait,
-			   atomic_read(&mdev->ap_bio_cnt)==0);
-
-		drbd_bm_lock(mdev); // racy...
-
-		drbd_md_set_flag(mdev,MDF_FullSync);
-		drbd_md_sync(mdev);
-
-		drbd_bm_set_all(mdev);
-		drbd_bm_write(mdev);
-
-		drbd_md_clear_flag(mdev,MDF_FullSync);
-		drbd_md_sync(mdev);
-
-		if (drbd_send_short_cmd(mdev,BecomeSyncSource)) {
-			int ok;
-			ok = drbd_request_state(mdev,NS(conn,WFSyncUUID));
-			D_ASSERT( ok == 1 );
-		}
-
-		drbd_bm_unlock(mdev);
-
+		r = drbd_request_state(mdev,NS2(conn,StartingSyncT,
+						disk,Inconsistent));
+		if ( r != SS_Success) err = -EINPROGRESS;
 		break;
 
 	case DRBD_IOCTL_INVALIDATE_REM:
-
-		/* PRE TODO disallow invalidate if we peer is primary */
-		/* remove EINVAL from error output... */
-		r = drbd_request_state(mdev,NS2(pdsk,Inconsistent,
-					        conn,WFBitMapS));
-
-		if( r == SS_NothingToDo ) { break; }
-		if( r < SS_Success ) {
-			err = -EINPROGRESS;
-			break;
-		}
-
-		drbd_md_set_flag(mdev,MDF_FullSync);
-		drbd_md_sync(mdev);
-
-		/* avoid races with set_in_sync
-		 * for successfull mirrored writes
-		 */
-		wait_event(mdev->cstate_wait,
-		     atomic_read(&mdev->ap_bio_cnt)==0);
-
-		drbd_bm_lock(mdev); // racy...
-
-		drbd_bm_set_all(mdev);
-		drbd_bm_write(mdev);
-
-		drbd_md_clear_flag(mdev,MDF_FullSync);
-
-		drbd_send_uuids(mdev);
-		drbd_send_short_cmd(mdev,BecomeSyncTarget);
-		drbd_start_resync(mdev,SyncSource);
-
-		drbd_bm_unlock(mdev);
-
+		r = drbd_request_state(mdev,NS2(conn,StartingSyncS,
+						pdsk,Inconsistent));
+		if ( r != SS_Success) err = -EINPROGRESS;
 		break;
 
 	case DRBD_IOCTL_OUTDATE_DISK:

Modified: trunk/drbd/drbd_int.h
===================================================================
--- trunk/drbd/drbd_int.h	2006-07-10 16:09:49 UTC (rev 2269)
+++ trunk/drbd/drbd_int.h	2006-07-11 14:17:49 UTC (rev 2270)
@@ -842,7 +842,8 @@
 			       enum chg_state_flags);
 extern int _drbd_set_state(drbd_dev*, drbd_state_t, enum chg_state_flags );
 extern void print_st_err(drbd_dev*, drbd_state_t, drbd_state_t, int );
-extern void after_state_ch(drbd_dev* mdev, drbd_state_t os, drbd_state_t ns);
+extern void after_state_ch(drbd_dev* mdev, drbd_state_t os, drbd_state_t ns,
+			   enum chg_state_flags);
 extern void drbd_thread_start(struct Drbd_thread *thi);
 extern void _drbd_thread_stop(struct Drbd_thread *thi, int restart, int wait);
 extern void drbd_free_resources(drbd_dev *mdev);

Modified: trunk/drbd/drbd_main.c
===================================================================
--- trunk/drbd/drbd_main.c	2006-07-10 16:09:49 UTC (rev 2269)
+++ trunk/drbd/drbd_main.c	2006-07-11 14:17:49 UTC (rev 2270)
@@ -79,6 +79,7 @@
 	struct drbd_work w;
 	drbd_state_t os;
 	drbd_state_t ns;
+	enum chg_state_flags flags;
 };
 
 int drbdd_init(struct Drbd_thread*);
@@ -531,14 +532,14 @@
 /** 
  * cl_wide_st_chg:
  * Returns TRUE if this state change should be preformed as a cluster wide
- * transaction. Of courese it returns 0 as soon as the connection is lost.
+ * transaction. Of course it returns 0 as soon as the connection is lost.
  */ 
 STATIC int cl_wide_st_chg(drbd_dev* mdev, drbd_state_t os, drbd_state_t ns)
 {
-	return ( ns.conn >= Connected &&
+	return ( os.conn >= Connected && ns.conn >= Connected &&
 		 ( ( os.role != Primary && ns.role == Primary ) ||
-		   // ( os.conn != SyncSource && ns.role == SyncSource ) ||
-		   // ( os.conn != SyncTarget && ns.role == SyncTarget ) ||
+		   ( os.conn != StartingSyncT && ns.conn == StartingSyncT ) ||
+		   ( os.conn != StartingSyncS && ns.conn == StartingSyncS ) ||
 		   // ( os.disk != Diskless && ns.role == Diskless ) ||
 		   // ( os.conn != TearDown && ns.conn == TearDown ) ||
 		   0
@@ -558,7 +559,7 @@
 	rv = _drbd_set_state(mdev, ns, f);
 	ns = mdev->state;
 	spin_unlock_irqrestore(&mdev->req_lock,flags);
-	after_state_ch(mdev,os,ns);
+	if (rv == SS_Success) after_state_ch(mdev,os,ns,f);
 
 	return rv;
 }
@@ -624,7 +625,10 @@
 		}
 
 		drbd_state_lock(mdev);
-		drbd_send_state_req(mdev,mask,val);
+		if( !drbd_send_state_req(mdev,mask,val) ) {
+			drbd_state_unlock(mdev);
+			return SS_CW_FailedByPeer;
+		}
 
 		wait_event(mdev->cstate_wait,(rv=_req_st_cond(mdev,mask,val)));
 
@@ -644,7 +648,7 @@
 	ns = mdev->state;
 	spin_unlock_irqrestore(&mdev->req_lock,flags);
 
-	if (rv == SS_Success) after_state_ch(mdev,os,ns);
+	if (rv == SS_Success) after_state_ch(mdev,os,ns,f);
 
 	return rv;
 }
@@ -748,6 +752,13 @@
 
 	os = mdev->state;
 
+	/* Early state sanitising. Dissalow the invalidate ioctl to connect  */
+	if( (ns.conn == StartingSyncS || ns.conn == StartingSyncT) &&
+		os.conn < Connected ) {
+		ns.conn = os.conn;
+		ns.pdsk = os.pdsk;
+	}
+
 	if( ns.i == os.i ) return SS_NothingToDo;
 
 	fp = DontCare;
@@ -901,6 +912,7 @@
 		if(ascw) {
 			ascw->os = os;
 			ascw->ns = ns;
+			ascw->flags = flags;
 			ascw->w.cb = w_after_state_ch;
 			_drbd_queue_work_front(&mdev->data.work,&ascw->w);
 		} else {
@@ -916,13 +928,14 @@
 	struct after_state_chg_work* ascw;
 
 	ascw = (struct after_state_chg_work*) w;
-	after_state_ch(mdev, ascw->os, ascw->ns);
+	after_state_ch(mdev, ascw->os, ascw->ns, ascw->flags);
 	kfree(ascw);
 
 	return 1;
 }
 
-void after_state_ch(drbd_dev* mdev, drbd_state_t os, drbd_state_t ns)
+void after_state_ch(drbd_dev* mdev, drbd_state_t os, drbd_state_t ns,
+		    enum chg_state_flags flags)
 {
 	enum fencing_policy fp;
 	u32 mdf;
@@ -1021,6 +1034,57 @@
 		drbd_send_short_cmd(mdev,ResumeResync);
 	}
 
+	/* We are in the progress to start a full sync... */
+	if ( ( os.conn != StartingSyncT && ns.conn == StartingSyncT ) ||
+	     ( os.conn != StartingSyncS && ns.conn == StartingSyncS ) ) {
+
+		/* avoid races with set_in_sync
+		 * for successfull mirrored writes
+		 */
+		wait_event(mdev->cstate_wait,
+			   atomic_read(&mdev->ap_bio_cnt)==0);
+
+		drbd_bm_lock(mdev); // racy...
+
+		drbd_md_set_flag(mdev,MDF_FullSync);
+		drbd_md_sync(mdev);
+
+		drbd_bm_set_all(mdev);
+		drbd_bm_write(mdev);
+
+		drbd_md_clear_flag(mdev,MDF_FullSync);
+		drbd_md_sync(mdev);
+
+		drbd_bm_unlock(mdev);
+
+		if (ns.conn == StartingSyncT) {
+			spin_lock_irq(&mdev->req_lock);
+			_drbd_set_state(mdev,_NS(conn,WFSyncUUID), 
+					ChgStateVerbose | ScheduleAfter );
+			spin_unlock_irq(&mdev->req_lock);
+		} else /* StartingSyncS */ {
+			drbd_start_resync(mdev,SyncSource);
+		}
+	}
+
+	/* We are invalidating our self... */
+	if ( os.conn < Connected && ns.conn < Connected &&
+	       os.disk > Inconsistent && ns.disk == Inconsistent ) {
+		drbd_bm_lock(mdev); // racy...
+
+		drbd_md_set_flag(mdev,MDF_FullSync);
+		drbd_md_sync(mdev);
+
+		drbd_bm_set_all(mdev);
+		drbd_bm_write(mdev);
+
+		drbd_md_clear_flag(mdev,MDF_FullSync);
+		drbd_md_sync(mdev);
+
+		drbd_bm_unlock(mdev);		
+	}
+
+
 	/* it feels better to have the module_put last ... */
 	if ( (os.disk > Diskless || os.conn > StandAlone) &&
 	     ns.disk == Diskless && ns.conn == StandAlone ) {

Modified: trunk/drbd/drbd_receiver.c
===================================================================
--- trunk/drbd/drbd_receiver.c	2006-07-10 16:09:49 UTC (rev 2269)
+++ trunk/drbd/drbd_receiver.c	2006-07-11 14:17:49 UTC (rev 2270)
@@ -1696,7 +1696,8 @@
 /* drbd_sync_handshake() returns the new conn state on success, or
    conn_mask (-1) on failure.
  */
-STATIC drbd_conns_t drbd_sync_handshake(drbd_dev *mdev, drbd_role_t peer_role)
+STATIC drbd_conns_t drbd_sync_handshake(drbd_dev *mdev, drbd_role_t peer_role,
+					drbd_disks_t peer_disk)
 {
 	int hg;
 	drbd_conns_t rv = conn_mask;
@@ -1734,6 +1735,13 @@
 			WARN("Split-Brain detected, manually solved.\n");
 		}
 	}
+	
+	if (hg == 0) {
+		// This is needed in case someone does an invalidate on an
+		// disconnected node.
+		if(mdev->state.disk==Inconsistent && peer_disk>Inconsistent) hg=-1;
+		if(mdev->state.disk>Inconsistent && peer_disk==Inconsistent) hg= 1;
+	}
 
 	if (hg == -1000) {
 		ALERT("Unrelated data, dropping connection!\n");
@@ -1955,7 +1963,7 @@
 	drbd_bm_unlock(mdev); // }
 
 	if (mdev->p_uuid && mdev->state.conn <= Connected && inc_local(mdev)) {
-		nconn=drbd_sync_handshake(mdev,mdev->state.peer);
+		nconn=drbd_sync_handshake(mdev,mdev->state.peer,mdev->state.pdsk);
 		dec_local(mdev);
 
 		if(nconn == conn_mask) return FALSE;
@@ -2018,15 +2026,10 @@
 
 	static drbd_conns_t c_tab[] = {
 		[Connected] = Connected,
-		[SkippedSyncS] = SkippedSyncT,
-		[SkippedSyncT] = SkippedSyncS,
-		[WFBitMapS] = WFBitMapT,
-		[WFBitMapT] = WFBitMapS,
-		[WFSyncUUID] = SyncSource,
-		[SyncSource] = SyncTarget,
-		[SyncTarget] = WFSyncUUID,
-		[PausedSyncS] = PausedSyncT,
-		[PausedSyncT] = PausedSyncS,
+
+		[StartingSyncS] = StartingSyncT,
+		[StartingSyncT] = StartingSyncS,
+
 		[conn_mask]   = conn_mask,
 	};
 
@@ -2087,7 +2090,7 @@
 
 	if (mdev->p_uuid && mdev->state.conn <= Connected && 
 	    inc_md_only(mdev,Attaching) ) {
-		nconn=drbd_sync_handshake(mdev,peer_state.role);
+		nconn=drbd_sync_handshake(mdev,peer_state.role,peer_state.disk);
 		dec_local(mdev);
 
 		if(nconn == conn_mask) return FALSE;
@@ -2117,7 +2120,7 @@
 	if(nconn == Connected && ns.disk == Attaching) ns.disk = UpToDate;
 	rv = _drbd_set_state(mdev,ns,ChgStateVerbose);
 	spin_unlock_irq(&mdev->req_lock);
-	after_state_ch(mdev,os,ns);
+	after_state_ch(mdev,os,ns,ChgStateVerbose);
 
 	if(rv < SS_Success) {
 		drbd_force_state(mdev,NS(conn,StandAlone));
@@ -2137,8 +2140,11 @@
 {
 	Drbd_SyncUUID_Packet *p = (Drbd_SyncUUID_Packet*)h;
 
-	D_ASSERT( mdev->state.conn == WFSyncUUID );
+	wait_event( mdev->cstate_wait, 
+		    mdev->state.conn < Connected || mdev->state.conn == WFSyncUUID);
 
+	// D_ASSERT( mdev->state.conn == WFSyncUUID );
+
 	ERR_IF(h->length != (sizeof(*p)-sizeof(*h))) return FALSE;
 	if (drbd_recv(mdev, h->payload, h->length) != h->length)
 		return FALSE;
@@ -2270,32 +2276,6 @@
 	return (size == 0);
 }
 
-STATIC int receive_BecomeSyncTarget(drbd_dev *mdev, Drbd_Header *h)
-{
-	int ok;
-
-	ERR_IF(!mdev->bitmap) return FALSE;
-	D_ASSERT(mdev->p_uuid);
-	drbd_bm_lock(mdev);
-	drbd_bm_set_all(mdev);
-	drbd_bm_write(mdev);
-	ok = drbd_request_state(mdev,NS(conn,WFSyncUUID));
-	D_ASSERT( ok == SS_Success );
-	drbd_bm_unlock(mdev);
-	return ok == SS_Success ? TRUE : FALSE;
-}
-
-STATIC int receive_BecomeSyncSource(drbd_dev *mdev, Drbd_Header *h)
-{
-	drbd_send_uuids(mdev);
-	drbd_bm_lock(mdev);
-	drbd_bm_set_all(mdev);
-	drbd_bm_write(mdev);
-	drbd_start_resync(mdev,SyncSource);
-	drbd_bm_unlock(mdev);
-	return TRUE;
-}
-
 STATIC int receive_pause_resync(drbd_dev *mdev, Drbd_Header *h)
 {
 	drbd_resync_pause(mdev, PeerImposed);
@@ -2331,7 +2311,7 @@
 	}
 	ns = mdev->state;
 	spin_unlock_irq(&mdev->req_lock);
-	after_state_ch(mdev,os,ns);
+	after_state_ch(mdev,os,ns,ChgStateVerbose);
 
 	if( r >= 0 ) {
 		drbd_md_sync(mdev);
@@ -2367,8 +2347,6 @@
 	[ReportBitMap]     = receive_bitmap,
 	[Ping]             = NULL, // via msock: got_Ping,
 	[PingAck]          = NULL, // via msock: got_PingAck,
-	[BecomeSyncTarget] = receive_BecomeSyncTarget,
-	[BecomeSyncSource] = receive_BecomeSyncSource,
 	[UnplugRemote]     = receive_UnplugRemote,
 	[DataRequest]      = receive_DataRequest,
 	[RSDataRequest]    = receive_DataRequest, //receive_RSDataRequest,

Modified: trunk/drbd/drbd_strings.c
===================================================================
--- trunk/drbd/drbd_strings.c	2006-07-10 16:09:49 UTC (rev 2269)
+++ trunk/drbd/drbd_strings.c	2006-07-11 14:17:49 UTC (rev 2270)
@@ -34,6 +34,8 @@
 	[Connected]      = "Connected",
 	[SkippedSyncS]   = "SkippedSyncS",
 	[SkippedSyncT]   = "SkippedSyncT",
+	[StartingSyncS]  = "StartingSyncS",
+	[StartingSyncT]  = "StartingSyncT",
 	[WFBitMapS]      = "WFBitMapS",
 	[WFBitMapT]      = "WFBitMapT",
 	[WFSyncUUID]     = "WFSyncUUID",

Modified: trunk/drbd/drbd_worker.c
===================================================================
--- trunk/drbd/drbd_worker.c	2006-07-10 16:09:49 UTC (rev 2269)
+++ trunk/drbd/drbd_worker.c	2006-07-11 14:17:49 UTC (rev 2270)
@@ -759,7 +759,7 @@
  * @side: Either SyncSource or SyncTarget
  * Start the resync process. Called from process context only,
  * either ioctl or drbd_receiver.
- * Note, this function might bring your directly into one of the
+ * Note, this function might bring you directly into one of the
  * PausedSync* states.
  */
 void drbd_start_resync(drbd_dev *mdev, drbd_conns_t side)
@@ -809,7 +809,7 @@
 	drbd_global_unlock();
 
 	if ( r == SS_Success ) {
-		after_state_ch(mdev,os,ns);
+		after_state_ch(mdev,os,ns,ChgStateVerbose);
 
 		INFO("Began resync as %s (will sync %lu KB [%lu bits set]).\n",
 		     conns_to_name(ns.conn),

Modified: trunk/drbd/linux/drbd.h
===================================================================
--- trunk/drbd/linux/drbd.h	2006-07-10 16:09:49 UTC (rev 2269)
+++ trunk/drbd/linux/drbd.h	2006-07-11 14:17:49 UTC (rev 2270)
@@ -223,6 +223,8 @@
 	Connected,      // we have introduced each other
 	SkippedSyncS,   // we should have synced, but user said no
 	SkippedSyncT,
+	StartingSyncS,  // starting full sync by IOCTL.
+	StartingSyncT,  // stariing full sync by IOCTL.
 	WFBitMapS,
 	WFBitMapT,
 	WFSyncUUID,



More information about the drbd-cvs mailing list