[DRBD-cvs] svn commit by phil - r2048 - in trunk: . drbd - Implemented item 27 of the ROADMAP, removed drbd_md_wri

drbd-cvs at lists.linbit.com drbd-cvs at lists.linbit.com
Sat Jan 14 18:28:04 CET 2006


Author: phil
Date: 2006-01-14 18:28:02 +0100 (Sat, 14 Jan 2006)
New Revision: 2048

Modified:
   trunk/ROADMAP
   trunk/drbd/drbd_fs.c
   trunk/drbd/drbd_int.h
   trunk/drbd/drbd_main.c
   trunk/drbd/drbd_receiver.c
   trunk/drbd/drbd_worker.c
Log:
Implemented item 27 of the ROADMAP, removed drbd_md_write() and replaced
this with drbd_md_sync(). I guess this was Lars' idea when he created
the MD_DIRTY flag.

In case something is modified that is should be writeen to the meta-data
super block later, we mark this by calling drbd_md_mark_dirty(). 

A timer ensures that latest one second later it is synced to this. The
timer is intended as debugging aid, to find the code pathes where the call
to drbd_md_sync() is missing.



Modified: trunk/ROADMAP
===================================================================
--- trunk/ROADMAP	2006-01-10 14:30:17 UTC (rev 2047)
+++ trunk/ROADMAP	2006-01-14 17:28:02 UTC (rev 2048)
@@ -660,6 +660,25 @@
    that always keeps the on disk-metadata up-to-date implicit. 
    Calling drbd_md_write() explicit is too errorprone.
 
+   90% DONE [ Needs testing, drbd_md_sync() is the new call ]
+
+28 Implement a kind of 'call home', a single HTTP get request, that
+   gets counted in a data base. The initiator calculates a simple
+   hash over the machine and resource names. Each time a meta-data
+   set gets generated, the 'call home' is initiated. The user might
+   of course opt out of this.
+
+29 Make all the commands to have 'expert' switches to also show
+   the hidden sub-commands in the ussage.
+
+30 The current drbdadm_scanner is 1MB in source and as binary.
+   Use a _basic_ flex scanner, and a hand written parser for superb
+   errror reporting.
+
+31 Resizing several GB results in ko-count timeouts, maybe since the
+   secondary node does the enlargement of the bitmap in the receiver (?)
+
+
 plus-banches:
 ----------------------
 

Modified: trunk/drbd/drbd_fs.c
===================================================================
--- trunk/drbd/drbd_fs.c	2006-01-10 14:30:17 UTC (rev 2047)
+++ trunk/drbd/drbd_fs.c	2006-01-14 17:28:02 UTC (rev 2048)
@@ -87,7 +87,8 @@
 		break;
 	}
 }
-
+/* You should call drbd_md_sync() after calling this.
+ */
 STATIC int do_determin_dev_size(struct Drbd_Conf* mdev);
 int drbd_determin_dev_size(struct Drbd_Conf* mdev)
 {
@@ -122,7 +123,7 @@
 		drbd_al_shrink(mdev); // All extents inactive.
 		drbd_bm_write(mdev);  // write bitmap
 		// Write mdev->bc->md.la_size_sect to [possibly new position on] disk.
-		drbd_md_write(mdev);
+		drbd_md_mark_dirty(mdev);
 	}
   out:
 	lc_unlock(mdev->act_log);
@@ -230,7 +231,8 @@
  * drbd_check_al_size:
  * checks that the al lru is of requested size, and if neccessary tries to
  * allocate a new one. returns -EBUSY if current al lru is still used,
- * -ENOMEM when allocation failed, and 0 on success.
+ * -ENOMEM when allocation failed, and 0 on success. You should call
+ * drbd_md_sync() after you called this function.
  */
 STATIC int drbd_check_al_size(drbd_dev *mdev)
 {
@@ -276,7 +278,7 @@
 	} else {
 		if (t) lc_free(t);
 	}
-	drbd_md_write(mdev);
+	drbd_md_mark_dirty(mdev);	//we changed mdev->act_log->nr_elemens
 	return 0;
 }
 
@@ -478,7 +480,6 @@
 		drbd_bm_set_all(mdev);
 		drbd_bm_write(mdev);
 		drbd_md_clear_flag(mdev,MDF_FullSync);
-		drbd_md_write(mdev);
 	} else {
 		/* FIXME this still does not propagate io errors! */
 		drbd_bm_read(mdev);
@@ -521,6 +522,8 @@
 	}
 
 	drbd_bm_unlock(mdev);
+	drbd_md_sync(mdev);
+
 	return 0;
 
  release_bdev3_fail_ioctl:
@@ -866,8 +869,6 @@
 	 * but that means someone is misusing DRBD...
 	 * */
 
-	set_bit(MD_DIRTY,&mdev->flags); // we are changing state!
-
 	if (newstate & Secondary) {
 		set_disk_ro(mdev->vdisk, TRUE );
 	} else {
@@ -890,8 +891,6 @@
 	if(mdev->state.disk > Diskless && (newstate & Secondary)) {
 		drbd_al_to_on_disk_bm(mdev);
 	}
-	/* Primary indicator has changed in any case. */
-	drbd_md_write(mdev);
 
 	if (mdev->state.conn >= WFReportParams) {
 		/* if this was forced, we should consider sync */
@@ -987,6 +986,7 @@
 	mdev->sync_conf.al_extents = sc.al_extents;
 
 	err = drbd_check_al_size(mdev);
+	drbd_md_sync(mdev);
 	if (err) return err;
 
 	if (mdev->state.conn >= Connected)
@@ -1052,7 +1052,7 @@
 	if( r == -999 ) {
 		return -EINVAL;
 	}
-	after_state_ch(mdev,os,ns); // TODO decide if neccesarry.
+	after_state_ch(mdev,os,ns);
 
 	if( r < SS_Success ) {
 		err = put_user(r, reason);
@@ -1060,8 +1060,6 @@
 		return err;
 	}
 
-	drbd_md_write(mdev);
-
 	return 0;
 }
 
@@ -1230,7 +1228,7 @@
 		mdev->bc->u_size = (sector_t)(u64)arg;
 		drbd_bm_lock(mdev);
 		drbd_determin_dev_size(mdev);
-		drbd_md_write(mdev); // Write mdev->bc->md.la_size_sect to disk.
+		drbd_md_sync(mdev);
 		drbd_bm_unlock(mdev);
 		if (mdev->state.conn == Connected) {
 			drbd_send_uuids(mdev); // to start sync...
@@ -1348,13 +1346,13 @@
 		drbd_bm_lock(mdev); // racy...
 
 		drbd_md_set_flag(mdev,MDF_FullSync);
-		drbd_md_write(mdev);
+		drbd_md_sync(mdev);
 
 		drbd_bm_set_all(mdev);
 		drbd_bm_write(mdev);
 
 		drbd_md_clear_flag(mdev,MDF_FullSync);
-		drbd_md_write(mdev);
+		drbd_md_sync(mdev);
 
 		if (drbd_send_short_cmd(mdev,BecomeSyncSource)) {
 			int ok;
@@ -1380,7 +1378,7 @@
 		}
 
 		drbd_md_set_flag(mdev,MDF_FullSync);
-		drbd_md_write(mdev);
+		drbd_md_sync(mdev);
 
 		/* avoid races with set_in_sync
 		 * for successfull mirrored writes
@@ -1394,7 +1392,6 @@
 		drbd_bm_write(mdev);
 
 		drbd_md_clear_flag(mdev,MDF_FullSync);
-		drbd_md_write(mdev);
 
 		drbd_send_uuids(mdev);
 		drbd_send_short_cmd(mdev,BecomeSyncTarget);

Modified: trunk/drbd/drbd_int.h
===================================================================
--- trunk/drbd/drbd_int.h	2006-01-10 14:30:17 UTC (rev 2047)
+++ trunk/drbd/drbd_int.h	2006-01-14 17:28:02 UTC (rev 2048)
@@ -680,7 +680,7 @@
 
 	u64 la_size_sect;	/* last agreed size, unit sectors */
 	u64 uuid[UUID_SIZE];
-	unsigned long flags;
+	u32 flags;
 	u32 md_size_sect;
 
 	s32 al_offset;	/* signed relative sector offset to al area */
@@ -729,8 +729,10 @@
 	volatile unsigned int ko_count;
 	struct drbd_work  resync_work,
 			  barrier_work,
-			  unplug_work;
+			  unplug_work,
+	                  md_sync_work;
 	struct timer_list resync_timer;
+	struct timer_list md_sync_timer;
 
 	drbd_state_t state;
 	wait_queue_head_t cstate_wait; // TODO Rename into "misc_wait".
@@ -862,7 +864,7 @@
 extern void drbd_mdev_cleanup(drbd_dev *mdev);
 
 // drbd_meta-data.c (still in drbd_main.c)
-extern void drbd_md_write(drbd_dev *mdev);
+extern void drbd_md_sync(drbd_dev *mdev);
 extern int  drbd_md_read(drbd_dev *mdev, struct drbd_backing_dev * bdev);
 // maybe define them below as inline?
 extern void drbd_uuid_set(drbd_dev *mdev,int idx, u64 val);
@@ -872,6 +874,7 @@
 extern void drbd_md_set_flag(drbd_dev *mdev, int flags);
 extern void drbd_md_clear_flag(drbd_dev *mdev, int flags);
 extern int drbd_md_test_flag(drbd_dev *mdev, int flag);
+extern void drbd_md_mark_dirty(drbd_dev *mdev);
 
 /* Meta data layout
    We reserve a 128MB Block (4k aligned)

Modified: trunk/drbd/drbd_main.c
===================================================================
--- trunk/drbd/drbd_main.c	2006-01-10 14:30:17 UTC (rev 2047)
+++ trunk/drbd/drbd_main.c	2006-01-14 17:28:02 UTC (rev 2048)
@@ -91,6 +91,8 @@
 STATIC int drbd_open(struct inode *inode, struct file *file);
 STATIC int drbd_close(struct inode *inode, struct file *file);
 STATIC int w_after_state_ch(drbd_dev *mdev, struct drbd_work *w, int unused);
+STATIC int w_md_sync(drbd_dev *mdev, struct drbd_work *w, int unused);
+STATIC void md_sync_timer_fn(unsigned long data);
 
 MODULE_AUTHOR("Philipp Reisner <phil at linbit.com>, Lars Ellenberg <lars at linbit.com>");
 MODULE_DESCRIPTION("drbd - Distributed Replicated Block Device v" REL_VERSION);
@@ -504,10 +506,7 @@
 
 	D_ASSERT(drbd_md_test_flag(mdev,MDF_FullSync));
 	D_ASSERT(!drbd_md_test_flag(mdev,MDF_Consistent));
-	if (test_bit(MD_DIRTY,&mdev->flags)) {
-		// try to get "inconsistent, need full sync" to MD
-		drbd_md_write(mdev);
-	}
+	drbd_md_sync(mdev);
 
 	if ( wait_event_interruptible_timeout(mdev->cstate_wait,
 		     atomic_read(&mdev->local_cnt) == 0 , HZ ) <= 0) {
@@ -753,6 +752,20 @@
 
 void after_state_ch(drbd_dev* mdev, drbd_state_t os, drbd_state_t ns)
 {
+	u32 mdf;
+
+	mdf = mdev->bc->md.flags & ~(MDF_Consistent|MDF_PrimaryInd|
+				     MDF_ConnectedInd|MDF_WasUpToDate);
+	if (mdev->state.role == Primary)        mdf |= MDF_PrimaryInd;
+	if (mdev->state.conn >= WFReportParams) mdf |= MDF_ConnectedInd;
+	if (mdev->state.disk >  Inconsistent)   mdf |= MDF_Consistent;
+	if (mdev->state.disk >  Outdated)       mdf |= MDF_WasUpToDate;
+
+	if( mdf != mdev->bc->md.flags) {
+		mdev->bc->md.flags = mdf;
+		drbd_md_mark_dirty(mdev);
+	}
+
 	/* Here we have the actions that are performed after a
 	   state change. This function might sleep */
 
@@ -773,7 +786,6 @@
 			/* Only do it if we have not yet done it... */
 			INFO("Creating new current UUID\n");
 			drbd_uuid_new_current(mdev);
-			drbd_md_write(mdev);
 		}
 		if (ns.peer == Primary ) { 
  			/* Note: The condition ns.peer == Primary implies
@@ -786,7 +798,6 @@
 			INFO("Creating new current UUID [no BitMap]\n");
 			get_random_bytes(&uuid, sizeof(u64));
 			drbd_uuid_set(mdev, Current, uuid);
-			drbd_md_write(mdev);
 		}
 	}
 
@@ -807,6 +818,7 @@
 	        ns.aftr_isp == 0 && ns.user_isp == 0   ) {
 		drbd_send_short_cmd(mdev,ResumeResync);
 	}
+	drbd_md_sync(mdev);
 }
 
 
@@ -1159,7 +1171,7 @@
 			goto out;
 		}
 		drbd_md_clear_flag(mdev,MDF_FullSync);
-		drbd_md_write(mdev);
+		drbd_md_sync(mdev);
 	}
 
 	/*
@@ -1791,13 +1803,18 @@
 	INIT_LIST_HEAD(&mdev->resync_work.list);
 	INIT_LIST_HEAD(&mdev->barrier_work.list);
 	INIT_LIST_HEAD(&mdev->unplug_work.list);
+	INIT_LIST_HEAD(&mdev->md_sync_work.list);
 	INIT_LIST_HEAD(&mdev->discard);
 	mdev->resync_work.cb  = w_resync_inactive;
 	mdev->barrier_work.cb = w_try_send_barrier;
 	mdev->unplug_work.cb  = w_send_write_hint;
+	mdev->md_sync_work.cb = w_md_sync;
 	init_timer(&mdev->resync_timer);
+	init_timer(&mdev->md_sync_timer);
 	mdev->resync_timer.function = resync_timer_fn;
 	mdev->resync_timer.data = (unsigned long) mdev;
+	mdev->md_sync_timer.function = md_sync_timer_fn;
+	mdev->md_sync_timer.data = (unsigned long) mdev;
 
 	init_waitqueue_head(&mdev->cstate_wait);
 	init_waitqueue_head(&mdev->ee_wait);
@@ -2392,20 +2409,18 @@
 	u32 bm_offset;         // offset to the bitmap, from here
 } __attribute((packed));
 
-/*
-
-FIXME md_io might fail unnoticed sometimes ...
-
-*/
-void drbd_md_write(drbd_dev *mdev)
+/** 
+ * drbd_md_sync:
+ * Writes the meta data super block if the MD_DIRTY flag bit is set.
+ */ 
+void drbd_md_sync(drbd_dev *mdev)
 {
 	struct meta_data_on_disk * buffer;
-	u32 flags;
 	sector_t sector;
 	int i;
 
-	if(!mdev->bc) return; // because of drbd_check_al_size(mdev) in 
-			      // drbd_ioctl_set_disk() should be removed....
+	if (!test_and_clear_bit(MD_DIRTY,&mdev->flags)) return;
+	del_timer(&mdev->resync_timer);
 
 	ERR_IF(!inc_md_only(mdev,Attaching)) return;
 
@@ -2413,18 +2428,10 @@
 	buffer = (struct meta_data_on_disk *)page_address(mdev->md_io_page);
 	memset(buffer,0,512);
 
-	flags = mdev->bc->md.flags & ~(MDF_Consistent|MDF_PrimaryInd|
-				   MDF_ConnectedInd|MDF_WasUpToDate);
-	if (mdev->state.role == Primary)        flags |= MDF_PrimaryInd;
-	if (mdev->state.conn >= WFReportParams) flags |= MDF_ConnectedInd;
-	if (mdev->state.disk >  Inconsistent)   flags |= MDF_Consistent;
-	if (mdev->state.disk >  Outdated)       flags |= MDF_WasUpToDate;
-	mdev->bc->md.flags = flags;
-
 	buffer->la_size=cpu_to_be64(drbd_get_capacity(mdev->this_bdev));
 	for (i = Current; i < UUID_SIZE; i++)
 		buffer->uuid[i]=cpu_to_be64(mdev->bc->md.uuid[i]);
-	buffer->flags = cpu_to_be32(flags);
+	buffer->flags = cpu_to_be32(mdev->bc->md.flags);
 	buffer->magic = cpu_to_be32(DRBD_MD_MAGIC);
 
 	buffer->md_size_sect  = cpu_to_be32(mdev->bc->md.md_size_sect);
@@ -2533,6 +2540,18 @@
 	return rv;
 }
 
+/** 
+ * drbd_md_mark_dirty:
+ * Call this function if you change enything that should be written to
+ * the meta-data super block. This function sets MD_DIRTY, and starts a 
+ * timer that ensures that within one second you have to call drbd_md_sync().
+ */
+void drbd_md_mark_dirty(drbd_dev *mdev)
+{
+	set_bit(MD_DIRTY,&mdev->flags);
+	mod_timer(&mdev->resync_timer,jiffies + HZ );
+}
+
 static void drbd_uuid_move_history(drbd_dev *mdev)
 {
 	int i;
@@ -2549,6 +2568,7 @@
 	} else {
 		mdev->bc->md.uuid[idx] = val & ~((u64)1);
 	}
+	drbd_md_mark_dirty(mdev);
 }
 
 
@@ -2571,6 +2591,7 @@
 	} else {
 		mdev->bc->md.uuid[Current] &= ~((u64)1);
 	}
+	drbd_md_mark_dirty(mdev);
 }
 
 void drbd_uuid_set_bm(drbd_dev *mdev, u64 val)
@@ -2587,6 +2608,7 @@
 		mdev->bc->md.uuid[Bitmap] = val;
 		mdev->bc->md.uuid[Bitmap] &= ~((u64)1);
 	}
+	drbd_md_mark_dirty(mdev);
 }
 
 
@@ -2594,7 +2616,7 @@
 {
 	MUST_HOLD(mdev->req_lock);
 	if ( (mdev->bc->md.flags & flag) != flag) {
-		set_bit(MD_DIRTY,&mdev->flags);
+		drbd_md_mark_dirty(mdev);
 		mdev->bc->md.flags |= flag;
 	}
 }
@@ -2602,7 +2624,7 @@
 {
 	MUST_HOLD(mdev->req_lock);
 	if ( (mdev->bc->md.flags & flag) != 0 ) {
-		set_bit(MD_DIRTY,&mdev->flags);
+		drbd_md_mark_dirty(mdev);
 		mdev->bc->md.flags &= ~flag;
 	}
 }
@@ -2612,5 +2634,21 @@
 	return ((mdev->bc->md.flags & flag) != 0);
 }
 
+STATIC void md_sync_timer_fn(unsigned long data)
+{
+	drbd_dev* mdev = (drbd_dev*) data;
+
+	WARN("md_sync_timer expired!\n");
+	drbd_queue_work_front(mdev,&mdev->data.work,&mdev->md_sync_work);
+}
+
+STATIC int w_md_sync(drbd_dev *mdev, struct drbd_work *w, int unused)
+{
+	WARN("Worker calls drbd_md_sync() now.\n");
+	drbd_md_sync(mdev);
+
+	return 1;
+}
+
 module_init(drbd_init)
 module_exit(drbd_cleanup)

Modified: trunk/drbd/drbd_receiver.c
===================================================================
--- trunk/drbd/drbd_receiver.c	2006-01-10 14:30:17 UTC (rev 2047)
+++ trunk/drbd/drbd_receiver.c	2006-01-14 17:28:02 UTC (rev 2048)
@@ -1703,13 +1703,13 @@
 
 	if (abs(hg) >= 2) {
 		drbd_md_set_flag(mdev,MDF_FullSync);
-		drbd_md_write(mdev);
+		drbd_md_sync(mdev);
 
 		drbd_bm_set_all(mdev);
 		drbd_bm_write(mdev);
 
 		drbd_md_clear_flag(mdev,MDF_FullSync);
-		drbd_md_write(mdev);
+		drbd_md_sync(mdev);
 	}
 
 	if (hg > 0) { // become sync source.
@@ -1991,7 +1991,7 @@
 	mdev->net_conf->want_lose = 0;
 
 	/* FIXME assertion for (gencounts do not diverge) */
-	drbd_md_write(mdev); // update connected indicator, la_size, ...
+	drbd_md_sync(mdev); // update connected indicator, la_size, ...
 
 	return TRUE;
 }
@@ -2196,7 +2196,7 @@
 	after_state_ch(mdev,os,ns);
 
 	if( r >= 0 ) {
-		drbd_md_write(mdev);
+		drbd_md_sync(mdev);
 		drbd_send_short_cmd(mdev, OutdatedReply);
 		return TRUE;
 	}
@@ -2211,7 +2211,7 @@
 	r = drbd_request_state(mdev,NS2(pdsk,Outdated,conn,TearDown));
 	WARN("r=%d\n",r);
 	D_ASSERT(r >= SS_Success);
-	drbd_md_write(mdev); // because drbd_request_state created a new UUID.
+	drbd_md_sync(mdev); // because drbd_request_state created a new UUID.
 
 	return TRUE;
 }
@@ -2418,7 +2418,7 @@
 			drbd_disks_t nps = drbd_try_outdate_peer(mdev);
 			drbd_request_state(mdev,NS(pdsk,nps));
 		}
-		drbd_md_write(mdev);
+		drbd_md_sync(mdev);
 	}
 
 	INFO("Connection lost.\n");

Modified: trunk/drbd/drbd_worker.c
===================================================================
--- trunk/drbd/drbd_worker.c	2006-01-10 14:30:17 UTC (rev 2047)
+++ trunk/drbd/drbd_worker.c	2006-01-14 17:28:02 UTC (rev 2048)
@@ -433,9 +433,6 @@
 	drbd_request_state(mdev,NS3(conn,Connected,
 				    disk,UpToDate,
 				    pdsk,UpToDate));
-
-	drbd_md_write(mdev);
-
 	return 1;
 }
 
@@ -817,8 +814,6 @@
 			return;
 		}
 
-		drbd_md_write(mdev);
-
 		if( ns.conn == SyncTarget ) {
 			D_ASSERT(!test_bit(STOP_SYNC_TIMER,&mdev->flags));
 			mod_timer(&mdev->resync_timer,jiffies);



More information about the drbd-cvs mailing list