[DRBD-cvs] svn commit by phil - r2292 - in trunk: documentation
drbd user - Propagated quite a number of fixes from the 0.7-branch.
drbd-cvs at lists.linbit.com
drbd-cvs at lists.linbit.com
Fri Jul 21 10:24:24 CEST 2006
Author: phil
Date: 2006-07-21 10:24:20 +0200 (Fri, 21 Jul 2006)
New Revision: 2292
Modified:
trunk/documentation/drbdsetup.sgml
trunk/drbd/drbd_fs.c
trunk/drbd/drbd_int.h
trunk/drbd/drbd_main.c
trunk/drbd/drbd_receiver.c
trunk/drbd/drbd_worker.c
trunk/user/drbd_limits.h
trunk/user/drbdsetup.c
Log:
Propagated quite a number of fixes from the 0.7-branch.
Modified: trunk/documentation/drbdsetup.sgml
===================================================================
--- trunk/documentation/drbdsetup.sgml 2006-07-20 12:41:44 UTC (rev 2291)
+++ trunk/documentation/drbdsetup.sgml 2006-07-21 08:24:20 UTC (rev 2292)
@@ -396,7 +396,7 @@
</listitem>
</varlistentry>
<varlistentry>
- <term><option>-l</option>,
+ <term><option>-u</option>,
<option>--unplug-watermark <replaceable>val</replaceable></option></term>
<listitem>
<para>
Modified: trunk/drbd/drbd_fs.c
===================================================================
--- trunk/drbd/drbd_fs.c 2006-07-20 12:41:44 UTC (rev 2291)
+++ trunk/drbd/drbd_fs.c 2006-07-21 08:24:20 UTC (rev 2292)
@@ -1170,9 +1170,10 @@
long drbd_compat_ioctl(struct file *f, unsigned cmd, unsigned long arg)
{
int ret;
- // lock_kernel(); Not needed, since we have mdev->device_mutex
ret = drbd_ioctl(f->f_dentry->d_inode, f, cmd, arg);
- // unlock_kernel();
+ /* need to map "unknown" to ENOIOCTLCMD
+ * to get the generic fallback path going */
+ if (ret == -ENOTTY) ret = -ENOIOCTLCMD;
return ret;
}
#endif
@@ -1398,7 +1399,7 @@
break;
default:
- err = -EINVAL;
+ err = -ENOTTY;
}
/* out: */
up(&mdev->device_mutex);
Modified: trunk/drbd/drbd_int.h
===================================================================
--- trunk/drbd/drbd_int.h 2006-07-20 12:41:44 UTC (rev 2291)
+++ trunk/drbd/drbd_int.h 2006-07-21 08:24:20 UTC (rev 2292)
@@ -868,7 +868,9 @@
extern int _drbd_send_cmd(drbd_dev *mdev, struct socket *sock,
Drbd_Packet_Cmd cmd, Drbd_Header *h,
size_t size, unsigned msg_flags);
-extern int drbd_send_cmd(drbd_dev *mdev, struct socket *sock,
+#define USE_DATA_SOCKET 1
+#define USE_META_SOCKET 0
+extern int drbd_send_cmd(drbd_dev *mdev, int use_data_socket,
Drbd_Packet_Cmd cmd, Drbd_Header *h, size_t size);
extern int drbd_send_cmd2(drbd_dev *mdev, Drbd_Packet_Cmd cmd,
char* data, size_t size);
@@ -1359,8 +1361,9 @@
unsigned long flags;
spin_lock_irqsave(&mdev->req_lock,flags);
list_add(&w->list,&q->q);
+ up(&q->s); /* within the spinlock,
+ see comment near end of drbd_worker() */
spin_unlock_irqrestore(&mdev->req_lock,flags);
- up(&q->s);
}
static inline void
@@ -1370,8 +1373,9 @@
unsigned long flags;
spin_lock_irqsave(&mdev->req_lock,flags);
list_add_tail(&w->list,&q->q);
+ up(&q->s); /* within the spinlock,
+ see comment near end of drbd_worker() */
spin_unlock_irqrestore(&mdev->req_lock,flags);
- up(&q->s);
}
static inline void wake_asender(drbd_dev *mdev) {
@@ -1388,19 +1392,19 @@
static inline int drbd_send_short_cmd(drbd_dev *mdev, Drbd_Packet_Cmd cmd)
{
Drbd_Header h;
- return drbd_send_cmd(mdev,mdev->data.socket,cmd,&h,sizeof(h));
+ return drbd_send_cmd(mdev,USE_DATA_SOCKET,cmd,&h,sizeof(h));
}
static inline int drbd_send_ping(drbd_dev *mdev)
{
Drbd_Header h;
- return drbd_send_cmd(mdev,mdev->meta.socket,Ping,&h,sizeof(h));
+ return drbd_send_cmd(mdev,USE_META_SOCKET,Ping,&h,sizeof(h));
}
static inline int drbd_send_ping_ack(drbd_dev *mdev)
{
Drbd_Header h;
- return drbd_send_cmd(mdev,mdev->meta.socket,PingAck,&h,sizeof(h));
+ return drbd_send_cmd(mdev,USE_META_SOCKET,PingAck,&h,sizeof(h));
}
static inline void drbd_thread_stop(struct Drbd_thread *thi)
Modified: trunk/drbd/drbd_main.c
===================================================================
--- trunk/drbd/drbd_main.c 2006-07-20 12:41:44 UTC (rev 2291)
+++ trunk/drbd/drbd_main.c 2006-07-21 08:24:20 UTC (rev 2292)
@@ -1235,6 +1235,7 @@
UNLOCK_SIGMASK(current,flags);
}
+/* the appropriate socket mutex must be held already */
int _drbd_send_cmd(drbd_dev *mdev, struct socket *sock,
Drbd_Packet_Cmd cmd, Drbd_Header *h,
size_t size, unsigned msg_flags)
@@ -1259,25 +1260,36 @@
return ok;
}
-int drbd_send_cmd(drbd_dev *mdev, struct socket *sock,
+/* don't pass the socket. we may only look at it
+ * when we hold the appropriate socket mutex.
+ */
+int drbd_send_cmd(drbd_dev *mdev, int use_data_socket,
Drbd_Packet_Cmd cmd, Drbd_Header* h, size_t size)
{
- int ok;
+ int ok = 0;
sigset_t old_blocked;
+ struct socket *sock;
- if (sock == mdev->data.socket) {
+ if (use_data_socket) {
down(&mdev->data.mutex);
spin_lock(&mdev->send_task_lock);
mdev->send_task=current;
spin_unlock(&mdev->send_task_lock);
- } else
+ sock = mdev->data.socket;
+ } else {
down(&mdev->meta.mutex);
+ sock = mdev->meta.socket;
+ }
- old_blocked = drbd_block_all_signals();
- ok = _drbd_send_cmd(mdev,sock,cmd,h,size,0);
- restore_old_sigset(old_blocked);
+ /* drbd_disconnect() could have called drbd_free_sock()
+ * while we were waiting in down()... */
+ if (likely(sock != NULL)) {
+ old_blocked = drbd_block_all_signals();
+ ok = _drbd_send_cmd(mdev, sock, cmd, h, size, 0);
+ restore_old_sigset(old_blocked);
+ }
- if (sock == mdev->data.socket) {
+ if (use_data_socket) {
spin_lock(&mdev->send_task_lock);
mdev->send_task=NULL;
spin_unlock(&mdev->send_task_lock);
@@ -1327,7 +1339,7 @@
p.skip = cpu_to_be32(sc->skip);
p.after = cpu_to_be32(sc->after);
- return drbd_send_cmd(mdev,mdev->data.socket,SyncParam,(Drbd_Header*)&p,sizeof(p));
+ return drbd_send_cmd(mdev,USE_DATA_SOCKET,SyncParam,(Drbd_Header*)&p,sizeof(p));
}
int drbd_send_protocol(drbd_dev *mdev)
@@ -1336,7 +1348,7 @@
p.protocol = cpu_to_be32(mdev->net_conf->wire_protocol);
- return drbd_send_cmd(mdev,mdev->data.socket,ReportProtocol,
+ return drbd_send_cmd(mdev,USE_DATA_SOCKET,ReportProtocol,
(Drbd_Header*)&p,sizeof(p));
}
@@ -1352,7 +1364,7 @@
p.uuid[UUID_SIZE] = cpu_to_be64(drbd_bm_total_weight(mdev));
p.uuid[UUID_FLAGS] = cpu_to_be64(mdev->net_conf->want_lose);
- return drbd_send_cmd(mdev,mdev->data.socket,ReportUUIDs,
+ return drbd_send_cmd(mdev,USE_DATA_SOCKET,ReportUUIDs,
(Drbd_Header*)&p,sizeof(p));
}
@@ -1362,7 +1374,7 @@
p.uuid = cpu_to_be64(val);
- return drbd_send_cmd(mdev,mdev->data.socket,ReportSyncUUID,
+ return drbd_send_cmd(mdev,USE_DATA_SOCKET,ReportSyncUUID,
(Drbd_Header*)&p,sizeof(p));
}
@@ -1384,7 +1396,7 @@
p.max_segment_size = cpu_to_be32(mdev->rq_queue->max_segment_size);
p.queue_order_type = cpu_to_be32(drbd_queue_order_type(mdev));
- ok = drbd_send_cmd(mdev,mdev->data.socket,ReportSizes,
+ ok = drbd_send_cmd(mdev,USE_DATA_SOCKET,ReportSizes,
(Drbd_Header*)&p,sizeof(p));
return ok;
}
@@ -1396,7 +1408,7 @@
p.block_id = (unsigned long)req;
p.seq_num = cpu_to_be32(req->seq_num);
- return drbd_send_cmd(mdev,mdev->meta.socket,DiscardNote,
+ return drbd_send_cmd(mdev,USE_META_SOCKET,DiscardNote,
(Drbd_Header*)&p,sizeof(p));
}
@@ -1406,7 +1418,7 @@
p.state = cpu_to_be32(mdev->state.i);
- return drbd_send_cmd(mdev,mdev->data.socket,ReportState,
+ return drbd_send_cmd(mdev,USE_DATA_SOCKET,ReportState,
(Drbd_Header*)&p,sizeof(p));
}
@@ -1417,7 +1429,7 @@
p.mask = cpu_to_be32(mask.i);
p.val = cpu_to_be32(val.i);
- return drbd_send_cmd(mdev,mdev->data.socket,StateChgRequest,
+ return drbd_send_cmd(mdev,USE_DATA_SOCKET,StateChgRequest,
(Drbd_Header*)&p,sizeof(p));
}
@@ -1427,7 +1439,7 @@
p.retcode = cpu_to_be32(retcode);
- return drbd_send_cmd(mdev,mdev->meta.socket,StateChgReply,
+ return drbd_send_cmd(mdev,USE_META_SOCKET,StateChgReply,
(Drbd_Header*)&p,sizeof(p));
}
@@ -1513,7 +1525,7 @@
p.barrier = barrier_nr;
p.set_size = cpu_to_be32(set_size);
- ok = drbd_send_cmd(mdev,mdev->meta.socket,BarrierAck,(Drbd_Header*)&p,sizeof(p));
+ ok = drbd_send_cmd(mdev,USE_META_SOCKET,BarrierAck,(Drbd_Header*)&p,sizeof(p));
return ok;
}
@@ -1538,7 +1550,7 @@
#endif
if (!mdev->meta.socket || mdev->state.conn < Connected) return FALSE;
- ok=drbd_send_cmd(mdev,mdev->meta.socket,cmd,(Drbd_Header*)&p,sizeof(p));
+ ok=drbd_send_cmd(mdev,USE_META_SOCKET,cmd,(Drbd_Header*)&p,sizeof(p));
return ok;
}
@@ -1552,7 +1564,7 @@
p.block_id = block_id;
p.blksize = cpu_to_be32(size);
- ok = drbd_send_cmd(mdev,mdev->data.socket,cmd,(Drbd_Header*)&p,sizeof(p));
+ ok = drbd_send_cmd(mdev,USE_DATA_SOCKET,cmd,(Drbd_Header*)&p,sizeof(p));
return ok;
}
@@ -1813,7 +1825,7 @@
*/
/*
- * you should have down()ed the appropriate [m]sock_mutex elsewhere!
+ * you must have down()ed the appropriate [m]sock_mutex elsewhere!
*/
int drbd_send(drbd_dev *mdev, struct socket *sock,
void* buf, size_t size, unsigned msg_flags)
Modified: trunk/drbd/drbd_receiver.c
===================================================================
--- trunk/drbd/drbd_receiver.c 2006-07-20 12:41:44 UTC (rev 2291)
+++ trunk/drbd/drbd_receiver.c 2006-07-21 08:24:20 UTC (rev 2292)
@@ -2431,30 +2431,6 @@
dec_local(mdev);
}
- /* in case we have been syncing, and then we drop the connection,
- * we need to "w_resume_next_sg", which we try to achieve by
- * setting the STOP_SYNC_TIMER bit, and schedulung the timer for
- * immediate execution.
- * unfortunately we cannot be sure that the timer already triggered.
- *
- * so we del_timer_sync here, and check that bit.
- * if it is still set, we queue w_resume_next_sg anyways,
- * just to be sure.
- */
-
- del_timer_sync(&mdev->resync_timer);
- spin_lock_irq(&mdev->req_lock);
- if (test_and_clear_bit(STOP_SYNC_TIMER,&mdev->flags)) {
- mdev->resync_work.cb = w_resume_next_sg;
- if (list_empty(&mdev->resync_work.list))
- _drbd_queue_work(&mdev->data.work,&mdev->resync_work);
- // else: already queued, we only need to release the lock.
- } else {
- D_ASSERT(mdev->resync_work.cb == w_resync_inactive);
- }
- spin_unlock_irq(&mdev->req_lock);
-
-
drbd_thread_stop_nowait(&mdev->worker);
drbd_thread_stop(&mdev->asender);
Modified: trunk/drbd/drbd_worker.c
===================================================================
--- trunk/drbd/drbd_worker.c 2006-07-20 12:41:44 UTC (rev 2291)
+++ trunk/drbd/drbd_worker.c 2006-07-21 08:24:20 UTC (rev 2292)
@@ -752,10 +752,9 @@
drbd_global_lock();
_drbd_resume_next(mdev);
+ w->cb = w_resync_inactive;
drbd_global_unlock();
- w->cb = w_resync_inactive;
-
return 1;
}
@@ -900,7 +899,21 @@
w = 0;
spin_lock_irq(&mdev->req_lock);
- D_ASSERT(!list_empty(&mdev->data.work.q));
+ ERR_IF(list_empty(&mdev->data.work.q)) {
+ /* something terribly wrong in our logic.
+ * we were able to down() the semaphore,
+ * but the list is empty... doh.
+ *
+ * what is the best thing to do now?
+ * try again from scratch, restarting the receiver,
+ * asender, whatnot? could break even more ugly,
+ * e.g. when we are primary, but no good local data.
+ *
+ * I'll try to get away just starting over this loop.
+ */
+ spin_unlock_irq(&mdev->req_lock);
+ continue;
+ }
w = list_entry(mdev->data.work.q.next,struct drbd_work,list);
list_del_init(&w->list);
spin_unlock_irq(&mdev->req_lock);
@@ -915,8 +928,49 @@
drbd_wait_ee_list_empty(mdev,&mdev->read_ee);
+ /* When we terminate a resync process, either because it finished
+ * sucessfully, or because (like in this case here) we lost
+ * communications, we need to "w_resume_next_sg".
+ * We cannot use del_timer_sync from within _set_cstate, and since the
+ * resync timer may still be scheduled and would then trigger anyways,
+ * we set the STOP_SYNC_TIMER bit, and schedule the timer for immediate
+ * execution from within _set_cstate().
+ * The timer should then clear that bit and queue w_resume_next_sg.
+ *
+ * This is fine for the normal "resync finished" case.
+ *
+ * In this case (worker thread beeing stopped), there is a race:
+ * we cannot be sure that the timer already triggered.
+ *
+ * So we del_timer_sync here, and check that "STOP_SYNC_TIMER" bit.
+ * if it is still set, we queue w_resume_next_sg anyways,
+ * just to be sure.
+ */
+
+ del_timer_sync(&mdev->resync_timer);
+ /* possible paranoia check: the STOP_SYNC_TIMER bit should be set
+ * if and only if del_timer_sync returns true ... */
+
+ spin_lock_irq(&mdev->req_lock);
+ if (test_and_clear_bit(STOP_SYNC_TIMER,&mdev->flags)) {
+ mdev->resync_work.cb = w_resume_next_sg;
+ if (list_empty(&mdev->resync_work.list))
+ _drbd_queue_work(&mdev->data.work,&mdev->resync_work);
+ // else: already queued
+ } else {
+ /* timer already consumed that bit, or it was never set */
+ if (list_empty(&mdev->resync_work.list)) {
+ /* not queued, should be inactive */
+ ERR_IF (mdev->resync_work.cb != w_resync_inactive)
+ mdev->resync_work.cb = w_resync_inactive;
+ } else {
+ /* still queued; should be w_resume_next_sg */
+ ERR_IF (mdev->resync_work.cb != w_resume_next_sg)
+ mdev->resync_work.cb = w_resume_next_sg;
+ }
+ }
+
i = 0;
- spin_lock_irq(&mdev->req_lock);
again:
list_splice_init(&mdev->data.work.q,&work_list);
spin_unlock_irq(&mdev->req_lock);
@@ -925,13 +979,18 @@
w = list_entry(work_list.next, struct drbd_work,list);
list_del_init(&w->list);
w->cb(mdev,w,1);
- i++;
+ i++; /* dead debugging code */
}
spin_lock_irq(&mdev->req_lock);
ERR_IF(!list_empty(&mdev->data.work.q))
goto again;
sema_init(&mdev->data.work.s,0);
+ /* DANGEROUS race: if someone did queue his work within the spinlock,
+ * but up() ed outside the spinlock, we could get an up() on the
+ * semaphore without corresponding list entry.
+ * So don't do that.
+ */
spin_unlock_irq(&mdev->req_lock);
INFO("worker terminated\n");
Modified: trunk/user/drbd_limits.h
===================================================================
--- trunk/user/drbd_limits.h 2006-07-20 12:41:44 UTC (rev 2291)
+++ trunk/user/drbd_limits.h 2006-07-21 08:24:20 UTC (rev 2292)
@@ -52,8 +52,8 @@
/* @4k PageSize -> 128kB - 512MB */
RANGE(MAX_BUFFERS, 32, 131072);
- /* @4k PageSize -> 64kB - 512MB */
- RANGE(UNPLUG_WATERMARK, 16, 131072);
+ /* @4k PageSize -> 4kB - 512MB */
+ RANGE(UNPLUG_WATERMARK, 1, 131072);
/* 0 is disabled.
* 200 should be more than enough even for very short timeouts */
Modified: trunk/user/drbdsetup.c
===================================================================
--- trunk/user/drbdsetup.c 2006-07-20 12:41:44 UTC (rev 2291)
+++ trunk/user/drbdsetup.c 2006-07-21 08:24:20 UTC (rev 2292)
@@ -165,7 +165,7 @@
{ "timeout", required_argument, 0, 't' },
{ "max-epoch-size", required_argument, 0, 'e' },
{ "max-buffers",required_argument, 0, 'b' },
- { "unplug-watermark",required_argument, 0, 'l' },
+ { "unplug-watermark",required_argument, 0, 'u' },
{ "connect-int",required_argument, 0, 'c' },
{ "ping-int", required_argument, 0, 'i' },
{ "sndbuf-size",required_argument, 0, 'S' },
@@ -557,7 +557,7 @@
cn->config.max_buffers = m_strtoll_range(optarg,1, "max-buffers",
DRBD_MAX_BUFFERS_MIN, DRBD_MAX_BUFFERS_MAX);
break;
- case 'l':
+ case 'u':
cn->config.unplug_watermark = m_strtoll_range(optarg,1, "unplug-watermark",
DRBD_UNPLUG_WATERMARK_MIN, DRBD_UNPLUG_WATERMARK_MAX);
break;
More information about the drbd-cvs
mailing list