[DRBD-cvs] svn commit by phil - r2292 - in trunk: documentation drbd user - Propagated quite a number of fixes from the 0.7-branch.

drbd-cvs at lists.linbit.com drbd-cvs at lists.linbit.com
Fri Jul 21 10:24:24 CEST 2006


Author: phil
Date: 2006-07-21 10:24:20 +0200 (Fri, 21 Jul 2006)
New Revision: 2292

Modified:
   trunk/documentation/drbdsetup.sgml
   trunk/drbd/drbd_fs.c
   trunk/drbd/drbd_int.h
   trunk/drbd/drbd_main.c
   trunk/drbd/drbd_receiver.c
   trunk/drbd/drbd_worker.c
   trunk/user/drbd_limits.h
   trunk/user/drbdsetup.c
Log:
Propagated quite a number of fixes from the 0.7-branch.


Modified: trunk/documentation/drbdsetup.sgml
===================================================================
--- trunk/documentation/drbdsetup.sgml	2006-07-20 12:41:44 UTC (rev 2291)
+++ trunk/documentation/drbdsetup.sgml	2006-07-21 08:24:20 UTC (rev 2292)
@@ -396,7 +396,7 @@
 	  </listitem>
 	</varlistentry>
 	<varlistentry>
-	  <term><option>-l</option>,
+	  <term><option>-u</option>,
 	  <option>--unplug-watermark <replaceable>val</replaceable></option></term>
 	  <listitem>
 	    <para>

Modified: trunk/drbd/drbd_fs.c
===================================================================
--- trunk/drbd/drbd_fs.c	2006-07-20 12:41:44 UTC (rev 2291)
+++ trunk/drbd/drbd_fs.c	2006-07-21 08:24:20 UTC (rev 2292)
@@ -1170,9 +1170,10 @@
 long drbd_compat_ioctl(struct file *f, unsigned cmd, unsigned long arg)
 {
 	int ret;
-	// lock_kernel(); Not needed, since we have mdev->device_mutex
 	ret = drbd_ioctl(f->f_dentry->d_inode, f, cmd, arg);
-	// unlock_kernel();
+	/* need to map "unknown" to ENOIOCTLCMD
+	 * to get the generic fallback path going */
+	if (ret == -ENOTTY) ret = -ENOIOCTLCMD;
 	return ret;
 }
 #endif
@@ -1398,7 +1399,7 @@
 		break;
 
 	default:
-		err = -EINVAL;
+		err = -ENOTTY;
 	}
  /* out: */
 	up(&mdev->device_mutex);

Modified: trunk/drbd/drbd_int.h
===================================================================
--- trunk/drbd/drbd_int.h	2006-07-20 12:41:44 UTC (rev 2291)
+++ trunk/drbd/drbd_int.h	2006-07-21 08:24:20 UTC (rev 2292)
@@ -868,7 +868,9 @@
 extern int _drbd_send_cmd(drbd_dev *mdev, struct socket *sock,
 			  Drbd_Packet_Cmd cmd, Drbd_Header *h,
 			  size_t size, unsigned msg_flags);
-extern int drbd_send_cmd(drbd_dev *mdev, struct socket *sock,
+#define USE_DATA_SOCKET 1
+#define USE_META_SOCKET 0
+extern int drbd_send_cmd(drbd_dev *mdev, int use_data_socket,
 			  Drbd_Packet_Cmd cmd, Drbd_Header *h, size_t size);
 extern int drbd_send_cmd2(drbd_dev *mdev, Drbd_Packet_Cmd cmd,
 			  char* data, size_t size);
@@ -1359,8 +1361,9 @@
 	unsigned long flags;
 	spin_lock_irqsave(&mdev->req_lock,flags);
 	list_add(&w->list,&q->q);
+	up(&q->s); /* within the spinlock,
+		      see comment near end of drbd_worker() */
 	spin_unlock_irqrestore(&mdev->req_lock,flags);
-	up(&q->s);
 }
 
 static inline void
@@ -1370,8 +1373,9 @@
 	unsigned long flags;
 	spin_lock_irqsave(&mdev->req_lock,flags);
 	list_add_tail(&w->list,&q->q);
+	up(&q->s); /* within the spinlock,
+		      see comment near end of drbd_worker() */
 	spin_unlock_irqrestore(&mdev->req_lock,flags);
-	up(&q->s);
 }
 
 static inline void wake_asender(drbd_dev *mdev) {
@@ -1388,19 +1392,19 @@
 static inline int drbd_send_short_cmd(drbd_dev *mdev, Drbd_Packet_Cmd cmd)
 {
 	Drbd_Header h;
-	return drbd_send_cmd(mdev,mdev->data.socket,cmd,&h,sizeof(h));
+	return drbd_send_cmd(mdev,USE_DATA_SOCKET,cmd,&h,sizeof(h));
 }
 
 static inline int drbd_send_ping(drbd_dev *mdev)
 {
 	Drbd_Header h;
-	return drbd_send_cmd(mdev,mdev->meta.socket,Ping,&h,sizeof(h));
+	return drbd_send_cmd(mdev,USE_META_SOCKET,Ping,&h,sizeof(h));
 }
 
 static inline int drbd_send_ping_ack(drbd_dev *mdev)
 {
 	Drbd_Header h;
-	return drbd_send_cmd(mdev,mdev->meta.socket,PingAck,&h,sizeof(h));
+	return drbd_send_cmd(mdev,USE_META_SOCKET,PingAck,&h,sizeof(h));
 }
 
 static inline void drbd_thread_stop(struct Drbd_thread *thi)

Modified: trunk/drbd/drbd_main.c
===================================================================
--- trunk/drbd/drbd_main.c	2006-07-20 12:41:44 UTC (rev 2291)
+++ trunk/drbd/drbd_main.c	2006-07-21 08:24:20 UTC (rev 2292)
@@ -1235,6 +1235,7 @@
 	UNLOCK_SIGMASK(current,flags);
 }
 
+/* the appropriate socket mutex must be held already */
 int _drbd_send_cmd(drbd_dev *mdev, struct socket *sock,
 			  Drbd_Packet_Cmd cmd, Drbd_Header *h,
 			  size_t size, unsigned msg_flags)
@@ -1259,25 +1260,36 @@
 	return ok;
 }
 
-int drbd_send_cmd(drbd_dev *mdev, struct socket *sock,
+/* don't pass the socket. we may only look at it
+ * when we hold the appropriate socket mutex.
+ */
+int drbd_send_cmd(drbd_dev *mdev, int use_data_socket,
 		  Drbd_Packet_Cmd cmd, Drbd_Header* h, size_t size)
 {
-	int ok;
+	int ok = 0;
 	sigset_t old_blocked;
+	struct socket *sock;
 
-	if (sock == mdev->data.socket) {
+	if (use_data_socket) {
 		down(&mdev->data.mutex);
 		spin_lock(&mdev->send_task_lock);
 		mdev->send_task=current;
 		spin_unlock(&mdev->send_task_lock);
-	} else
+		sock = mdev->data.socket;
+	} else {
 		down(&mdev->meta.mutex);
+		sock = mdev->meta.socket;
+	}
 
-	old_blocked = drbd_block_all_signals();
-	ok = _drbd_send_cmd(mdev,sock,cmd,h,size,0);
-	restore_old_sigset(old_blocked);
+	/* drbd_disconnect() could have called drbd_free_sock()
+	 * while we were waiting in down()... */
+	if (likely(sock != NULL)) {
+		old_blocked = drbd_block_all_signals();
+		ok = _drbd_send_cmd(mdev, sock, cmd, h, size, 0);
+		restore_old_sigset(old_blocked);
+	}
 
-	if (sock == mdev->data.socket) {
+	if (use_data_socket) {
 		spin_lock(&mdev->send_task_lock);
 		mdev->send_task=NULL;
 		spin_unlock(&mdev->send_task_lock);
@@ -1327,7 +1339,7 @@
 	p.skip      = cpu_to_be32(sc->skip);
 	p.after     = cpu_to_be32(sc->after);
 
-	return drbd_send_cmd(mdev,mdev->data.socket,SyncParam,(Drbd_Header*)&p,sizeof(p));
+	return drbd_send_cmd(mdev,USE_DATA_SOCKET,SyncParam,(Drbd_Header*)&p,sizeof(p));
 }
 
 int drbd_send_protocol(drbd_dev *mdev)
@@ -1336,7 +1348,7 @@
 
 	p.protocol = cpu_to_be32(mdev->net_conf->wire_protocol);
 
-	return drbd_send_cmd(mdev,mdev->data.socket,ReportProtocol,
+	return drbd_send_cmd(mdev,USE_DATA_SOCKET,ReportProtocol,
 			     (Drbd_Header*)&p,sizeof(p));
 }
 
@@ -1352,7 +1364,7 @@
 	p.uuid[UUID_SIZE] = cpu_to_be64(drbd_bm_total_weight(mdev));
 	p.uuid[UUID_FLAGS] = cpu_to_be64(mdev->net_conf->want_lose);
 
-	return drbd_send_cmd(mdev,mdev->data.socket,ReportUUIDs,
+	return drbd_send_cmd(mdev,USE_DATA_SOCKET,ReportUUIDs,
 			     (Drbd_Header*)&p,sizeof(p));
 }
 
@@ -1362,7 +1374,7 @@
 
 	p.uuid = cpu_to_be64(val);
 
-	return drbd_send_cmd(mdev,mdev->data.socket,ReportSyncUUID,
+	return drbd_send_cmd(mdev,USE_DATA_SOCKET,ReportSyncUUID,
 			     (Drbd_Header*)&p,sizeof(p));
 }
 
@@ -1384,7 +1396,7 @@
 	p.max_segment_size = cpu_to_be32(mdev->rq_queue->max_segment_size);
 	p.queue_order_type = cpu_to_be32(drbd_queue_order_type(mdev));
 
-	ok = drbd_send_cmd(mdev,mdev->data.socket,ReportSizes,
+	ok = drbd_send_cmd(mdev,USE_DATA_SOCKET,ReportSizes,
 			   (Drbd_Header*)&p,sizeof(p));
 	return ok;
 }
@@ -1396,7 +1408,7 @@
 	p.block_id = (unsigned long)req;
 	p.seq_num  = cpu_to_be32(req->seq_num);
 
-	return drbd_send_cmd(mdev,mdev->meta.socket,DiscardNote,
+	return drbd_send_cmd(mdev,USE_META_SOCKET,DiscardNote,
 			     (Drbd_Header*)&p,sizeof(p));
 }
 
@@ -1406,7 +1418,7 @@
 
 	p.state    = cpu_to_be32(mdev->state.i);
 
-	return drbd_send_cmd(mdev,mdev->data.socket,ReportState,
+	return drbd_send_cmd(mdev,USE_DATA_SOCKET,ReportState,
 			     (Drbd_Header*)&p,sizeof(p));
 }
 
@@ -1417,7 +1429,7 @@
 	p.mask    = cpu_to_be32(mask.i);
 	p.val     = cpu_to_be32(val.i);
 
-	return drbd_send_cmd(mdev,mdev->data.socket,StateChgRequest,
+	return drbd_send_cmd(mdev,USE_DATA_SOCKET,StateChgRequest,
 			     (Drbd_Header*)&p,sizeof(p));
 }
 
@@ -1427,7 +1439,7 @@
 
 	p.retcode    = cpu_to_be32(retcode);
 
-	return drbd_send_cmd(mdev,mdev->meta.socket,StateChgReply,
+	return drbd_send_cmd(mdev,USE_META_SOCKET,StateChgReply,
 			     (Drbd_Header*)&p,sizeof(p));
 }
 
@@ -1513,7 +1525,7 @@
 	p.barrier  = barrier_nr;
 	p.set_size = cpu_to_be32(set_size);
 
-	ok = drbd_send_cmd(mdev,mdev->meta.socket,BarrierAck,(Drbd_Header*)&p,sizeof(p));
+	ok = drbd_send_cmd(mdev,USE_META_SOCKET,BarrierAck,(Drbd_Header*)&p,sizeof(p));
 	return ok;
 }
 
@@ -1538,7 +1550,7 @@
 #endif
 
 	if (!mdev->meta.socket || mdev->state.conn < Connected) return FALSE;
-	ok=drbd_send_cmd(mdev,mdev->meta.socket,cmd,(Drbd_Header*)&p,sizeof(p));
+	ok=drbd_send_cmd(mdev,USE_META_SOCKET,cmd,(Drbd_Header*)&p,sizeof(p));
 	return ok;
 }
 
@@ -1552,7 +1564,7 @@
 	p.block_id = block_id;
 	p.blksize  = cpu_to_be32(size);
 
-	ok = drbd_send_cmd(mdev,mdev->data.socket,cmd,(Drbd_Header*)&p,sizeof(p));
+	ok = drbd_send_cmd(mdev,USE_DATA_SOCKET,cmd,(Drbd_Header*)&p,sizeof(p));
 	return ok;
 }
 
@@ -1813,7 +1825,7 @@
 */
 
 /*
- * you should have down()ed the appropriate [m]sock_mutex elsewhere!
+ * you must have down()ed the appropriate [m]sock_mutex elsewhere!
  */
 int drbd_send(drbd_dev *mdev, struct socket *sock,
 	      void* buf, size_t size, unsigned msg_flags)

Modified: trunk/drbd/drbd_receiver.c
===================================================================
--- trunk/drbd/drbd_receiver.c	2006-07-20 12:41:44 UTC (rev 2291)
+++ trunk/drbd/drbd_receiver.c	2006-07-21 08:24:20 UTC (rev 2292)
@@ -2431,30 +2431,6 @@
 		dec_local(mdev);
 	}
 
-	/* in case we have been syncing, and then we drop the connection,
-	 * we need to "w_resume_next_sg", which we try to achieve by
-	 * setting the STOP_SYNC_TIMER bit, and schedulung the timer for
-	 * immediate execution.
-	 * unfortunately we cannot be sure that the timer already triggered.
-	 *
-	 * so we del_timer_sync here, and check that bit.
-	 * if it is still set, we queue w_resume_next_sg anyways,
-	 * just to be sure.
-	 */
-
-	del_timer_sync(&mdev->resync_timer);
-	spin_lock_irq(&mdev->req_lock);
-	if (test_and_clear_bit(STOP_SYNC_TIMER,&mdev->flags)) {
-		mdev->resync_work.cb = w_resume_next_sg;
-		if (list_empty(&mdev->resync_work.list))
-			_drbd_queue_work(&mdev->data.work,&mdev->resync_work);
-		// else: already queued, we only need to release the lock.
-	} else {
-		D_ASSERT(mdev->resync_work.cb == w_resync_inactive);
-	}
-	spin_unlock_irq(&mdev->req_lock);
-
-
 	drbd_thread_stop_nowait(&mdev->worker);
 	drbd_thread_stop(&mdev->asender);
 

Modified: trunk/drbd/drbd_worker.c
===================================================================
--- trunk/drbd/drbd_worker.c	2006-07-20 12:41:44 UTC (rev 2291)
+++ trunk/drbd/drbd_worker.c	2006-07-21 08:24:20 UTC (rev 2292)
@@ -752,10 +752,9 @@
 
 	drbd_global_lock();
 	_drbd_resume_next(mdev);
+	w->cb = w_resync_inactive;
 	drbd_global_unlock();
 
-	w->cb = w_resync_inactive;
-
 	return 1;
 }
 
@@ -900,7 +899,21 @@
 
 		w = 0;
 		spin_lock_irq(&mdev->req_lock);
-		D_ASSERT(!list_empty(&mdev->data.work.q));
+		ERR_IF(list_empty(&mdev->data.work.q)) {
+			/* something terribly wrong in our logic.
+			 * we were able to down() the semaphore,
+			 * but the list is empty... doh.
+			 *
+			 * what is the best thing to do now?
+			 * try again from scratch, restarting the receiver,
+			 * asender, whatnot? could break even more ugly,
+			 * e.g. when we are primary, but no good local data.
+			 *
+			 * I'll try to get away just starting over this loop.
+			 */
+			spin_unlock_irq(&mdev->req_lock);
+			continue;
+		}
 		w = list_entry(mdev->data.work.q.next,struct drbd_work,list);
 		list_del_init(&w->list);
 		spin_unlock_irq(&mdev->req_lock);
@@ -915,8 +928,49 @@
 
 	drbd_wait_ee_list_empty(mdev,&mdev->read_ee);
 
+	/* When we terminate a resync process, either because it finished
+	 * sucessfully, or because (like in this case here) we lost
+	 * communications, we need to "w_resume_next_sg".
+	 * We cannot use del_timer_sync from within _set_cstate, and since the
+	 * resync timer may still be scheduled and would then trigger anyways,
+	 * we set the STOP_SYNC_TIMER bit, and schedule the timer for immediate
+	 * execution from within _set_cstate().
+	 * The timer should then clear that bit and queue w_resume_next_sg.
+	 *
+	 * This is fine for the normal "resync finished" case.
+	 *
+	 * In this case (worker thread beeing stopped), there is a race:
+	 * we cannot be sure that the timer already triggered.
+	 *
+	 * So we del_timer_sync here, and check that "STOP_SYNC_TIMER" bit.
+	 * if it is still set, we queue w_resume_next_sg anyways,
+	 * just to be sure.
+	 */
+
+	del_timer_sync(&mdev->resync_timer);
+	/* possible paranoia check: the STOP_SYNC_TIMER bit should be set
+	 * if and only if del_timer_sync returns true ... */
+
+	spin_lock_irq(&mdev->req_lock);
+	if (test_and_clear_bit(STOP_SYNC_TIMER,&mdev->flags)) {
+		mdev->resync_work.cb = w_resume_next_sg;
+		if (list_empty(&mdev->resync_work.list))
+			_drbd_queue_work(&mdev->data.work,&mdev->resync_work);
+		// else: already queued
+	} else {
+		/* timer already consumed that bit, or it was never set */
+		if (list_empty(&mdev->resync_work.list)) {
+			/* not queued, should be inactive */
+			ERR_IF (mdev->resync_work.cb != w_resync_inactive)
+				mdev->resync_work.cb = w_resync_inactive;
+		} else {
+			/* still queued; should be w_resume_next_sg */
+			ERR_IF (mdev->resync_work.cb != w_resume_next_sg)
+				mdev->resync_work.cb = w_resume_next_sg;
+		}
+	}
+
 	i = 0;
-	spin_lock_irq(&mdev->req_lock);
   again:
 	list_splice_init(&mdev->data.work.q,&work_list);
 	spin_unlock_irq(&mdev->req_lock);
@@ -925,13 +979,18 @@
 		w = list_entry(work_list.next, struct drbd_work,list);
 		list_del_init(&w->list);
 		w->cb(mdev,w,1);
-		i++;
+		i++; /* dead debugging code */
 	}
 
 	spin_lock_irq(&mdev->req_lock);
 	ERR_IF(!list_empty(&mdev->data.work.q))
 		goto again;
 	sema_init(&mdev->data.work.s,0);
+	/* DANGEROUS race: if someone did queue his work within the spinlock,
+	 * but up() ed outside the spinlock, we could get an up() on the
+	 * semaphore without corresponding list entry.
+	 * So don't do that.
+	 */
 	spin_unlock_irq(&mdev->req_lock);
 
 	INFO("worker terminated\n");

Modified: trunk/user/drbd_limits.h
===================================================================
--- trunk/user/drbd_limits.h	2006-07-20 12:41:44 UTC (rev 2291)
+++ trunk/user/drbd_limits.h	2006-07-21 08:24:20 UTC (rev 2292)
@@ -52,8 +52,8 @@
   /* @4k PageSize -> 128kB - 512MB */
   RANGE(MAX_BUFFERS, 32, 131072);
 
-  /* @4k PageSize -> 64kB - 512MB */
-  RANGE(UNPLUG_WATERMARK, 16, 131072);
+  /* @4k PageSize -> 4kB - 512MB */
+  RANGE(UNPLUG_WATERMARK, 1, 131072);
 
   /* 0 is disabled.
    * 200 should be more than enough even for very short timeouts */

Modified: trunk/user/drbdsetup.c
===================================================================
--- trunk/user/drbdsetup.c	2006-07-20 12:41:44 UTC (rev 2291)
+++ trunk/user/drbdsetup.c	2006-07-21 08:24:20 UTC (rev 2292)
@@ -165,7 +165,7 @@
      { "timeout",    required_argument, 0, 't' },
      { "max-epoch-size", required_argument, 0, 'e' },
      { "max-buffers",required_argument, 0, 'b' },
-     { "unplug-watermark",required_argument, 0, 'l' },
+     { "unplug-watermark",required_argument, 0, 'u' },
      { "connect-int",required_argument, 0, 'c' },
      { "ping-int",   required_argument, 0, 'i' },
      { "sndbuf-size",required_argument, 0, 'S' },
@@ -557,7 +557,7 @@
 	  cn->config.max_buffers = m_strtoll_range(optarg,1, "max-buffers",
 			  DRBD_MAX_BUFFERS_MIN, DRBD_MAX_BUFFERS_MAX);
 	  break;
-	case 'l':
+	case 'u':
 	  cn->config.unplug_watermark = m_strtoll_range(optarg,1, "unplug-watermark",
 			  DRBD_UNPLUG_WATERMARK_MIN, DRBD_UNPLUG_WATERMARK_MAX);
 	  break;



More information about the drbd-cvs mailing list