[DRBD-cvs] drbd by phil; Fix for the "unknown packet type" proble...
drbd-user@lists.linbit.com
drbd-user@lists.linbit.com
Sun, 22 Feb 2004 19:49:44 +0100 (CET)
DRBD CVS committal
Author : phil
Module : drbd
Dir : drbd/drbd
Modified Files:
drbd_int.h drbd_main.c drbd_receiver.c
Log Message:
Fix for the "unknown packet type" problem by LGE
===================================================================
RCS file: /var/lib/cvs/drbd/drbd/drbd/drbd_int.h,v
retrieving revision 1.77
retrieving revision 1.78
diff -u -3 -r1.77 -r1.78
--- drbd_int.h 6 Jan 2004 10:06:25 -0000 1.77
+++ drbd_int.h 22 Feb 2004 18:49:39 -0000 1.78
@@ -327,6 +327,8 @@
Drbd_CState sync_method;
struct socket *sock; /* for data/barrier/cstate/parameter packets */
struct socket *msock; /* for ping/ack (metadata) packets */
+ Drbd_Packet dont_wait_cmd; /* buffer for short commands, which */
+ int dont_wait_pending; /* need to be sent non-blocking */
kdev_t lo_device;
struct file *lo_file;
int lo_usize; /* user provided size */
===================================================================
RCS file: /var/lib/cvs/drbd/drbd/drbd/drbd_main.c,v
retrieving revision 1.90
retrieving revision 1.91
diff -u -3 -r1.90 -r1.91
--- drbd_main.c 18 Jan 2004 20:18:19 -0000 1.90
+++ drbd_main.c 22 Feb 2004 18:49:39 -0000 1.91
@@ -508,20 +508,23 @@
}
}
-int drbd_send_cmd_dontwait(struct Drbd_Conf *mdev,Drbd_Packet_Cmd cmd, int via_msock)
+/* currently only used for write_hints which are still sent via
+ * the data socket */
+int drbd_send_cmd_dontwait(struct Drbd_Conf *mdev,Drbd_Packet_Cmd cmd)
{
- Drbd_Packet header;
+ Drbd_Packet *h = &mdev->dont_wait_cmd;
mm_segment_t oldfs;
sigset_t oldset;
struct msghdr msg;
struct iovec iov;
unsigned long flags;
int rv;
+ int via_msock = 0;
struct socket *sock = via_msock ? mdev->msock : mdev->sock;
- int header_size = sizeof(header);
+ int header_size = sizeof(*h);
if (!via_msock) {
- if (down_trylock(&mdev->send_mutex))
+ if (mdev->dont_wait_pending || down_trylock(&mdev->send_mutex))
return -EAGAIN;
};
if (!sock) {
@@ -534,13 +537,13 @@
goto out;
}
- header.command = cpu_to_be16(cmd);
- header.magic = cpu_to_be32(DRBD_MAGIC);
- header.length = 0;
+ h->command = cpu_to_be16(cmd);
+ h->magic = cpu_to_be32(DRBD_MAGIC);
+ h->length = 0;
sock->sk->allocation = GFP_DRBD;
- iov.iov_base = &header;
+ iov.iov_base = h;
iov.iov_len = header_size;
msg.msg_iov = &iov;
@@ -578,12 +581,14 @@
set_cstate(mdev,BrokenPipe);
drbd_thread_restart_nowait(&mdev->receiver);
+ } else if ( 0 < rv && rv < header_size ) {
+ mdev->dont_wait_pending = header_size - rv;
}
out:
if(!via_msock) up(&mdev->send_mutex);
- return rv == header_size ? 0 : rv;
+ return rv;
}
int drbd_send_cmd(struct Drbd_Conf *mdev,Drbd_Packet_Cmd cmd, int via_msock)
@@ -660,10 +665,10 @@
/* printk(KERN_DEBUG DEVICE_NAME": issuing a barrier\n"); */
- r=drbd_send(mdev,(Drbd_Packet*)&head,sizeof(head),0,0,0);
-
inc_pending(mdev);
+ r=drbd_send(mdev,(Drbd_Packet*)&head,sizeof(head),0,0,0);
+
return r;
}
@@ -701,7 +706,7 @@
u64 block_id)
{
Drbd_Data_Packet head;
- int ret,ok;
+ int ret,ok,pend;
head.p.command = cpu_to_be16(Data);
head.h.block_nr = cpu_to_be64(bh->b_blocknr);
@@ -713,17 +718,20 @@
_drbd_send_barrier(mdev);
}
+ pend = (mdev->net.wire_protocol != DRBD_PROT_A) ||
+ (block_id == ID_SYNCER);
+ if (pend)
+ inc_pending(mdev);
+
ret=drbd_send(mdev,(Drbd_Packet*)&head,sizeof(head),bh_kmap(bh),
bh->b_size,0);
bh_kunmap(bh);
ok=(ret == bh->b_size + sizeof(head));
- if( ok ) {
- if( mdev->net.wire_protocol != DRBD_PROT_A ||
- block_id == ID_SYNCER ) {
- inc_pending(mdev);
- }
+ if (ok) {
mdev->send_cnt+=bh->b_size>>10;
+ } else if (pend) {
+ dec_pending(mdev); // necessary ??
}
if(block_id != ID_SYNCER) {
@@ -831,15 +839,15 @@
if conf.timeout expires on sock ko_count times (and there
is no advance in communication) -> drop connection; and go into StandAlone state
*/
-int drbd_send(struct Drbd_Conf *mdev, Drbd_Packet* header, size_t header_size,
- void* data, size_t data_size, int via_msock)
+int drbd_send(struct Drbd_Conf *mdev, Drbd_Packet* header, const size_t header_size,
+ void* data, const size_t data_size, const int via_msock)
{
mm_segment_t oldfs;
sigset_t oldset;
struct msghdr msg;
- struct iovec iov[2];
+ struct iovec iov[3];
unsigned long flags;
- int rv,sent=0;
+ int rv,sent,dwp_size;
int app_got_sig=0;
struct send_timer_info ti;
struct socket *sock = via_msock ? mdev->msock : mdev->sock;
@@ -847,18 +855,31 @@
if (!sock) return -1000;
if (mdev->cstate < WFReportParams) return -1001;
- header->magic = cpu_to_be32(DRBD_MAGIC);
- header->length = cpu_to_be16(data_size);
+ header->magic = cpu_to_be32(DRBD_MAGIC);
+ header->length = cpu_to_be16(data_size);
sock->sk->allocation = GFP_DRBD;
- iov[0].iov_base = header;
- iov[0].iov_len = header_size;
- iov[1].iov_base = data;
- iov[1].iov_len = data_size;
+ dwp_size = mdev->dont_wait_pending;
+ mdev->dont_wait_pending = 0;
+
+ /* NOTE: tcp_sendmsg silently skips over iov with len == 0,
+ * the iov_base is not even dereferenced in that case.
+ * So it is absolutely legal to have only iov[1] populated.
+ * If a partially sent command is still hanging around,
+ * it is iov[0], otherwise iov[0].iov_len is 0.
+ */
+
+ iov[0].iov_base = ((char*)&mdev->dont_wait_cmd)
+ + sizeof(Drbd_Packet)-dwp_size;
+ iov[0].iov_len = dwp_size;
+ iov[1].iov_base = header;
+ iov[1].iov_len = header_size;
+ iov[2].iov_base = data;
+ iov[2].iov_len = data_size;
msg.msg_iov = iov;
- msg.msg_iovlen = data_size > 0 ? 2 : 1;
+ msg.msg_iovlen = 3;
msg.msg_control = NULL;
msg.msg_controllen = 0;
msg.msg_name = 0;
@@ -896,6 +917,7 @@
RECALC_SIGPENDING(current);
spin_unlock_irqrestore(¤t->SIGMASK_LOCK, flags);
+ sent = 0;
while(1) {
rv = sock_sendmsg(sock, &msg, header_size+data_size);
if ( rv == -ERESTARTSYS) {
@@ -916,27 +938,32 @@
}
if (rv <= 0) break;
sent += rv;
- if (sent == header_size+data_size) break;
+ if (sent == dwp_size + header_size + data_size) break;
/*printk(KERN_ERR DEVICE_NAME
"%d: calling sock_sendmsg again\n",
(int)(mdev-drbd_conf));*/
- if( rv < header_size ) {
+ if (rv <= iov[0].iov_len) {
iov[0].iov_base += rv;
iov[0].iov_len -= rv;
- header_size -= rv;
- } else /* rv >= header_size */ {
- if (header_size) {
- iov[0].iov_base = iov[1].iov_base;
- iov[0].iov_len = iov[1].iov_len;
- msg.msg_iovlen = 1;
- rv -= header_size;
- header_size = 0;
- }
- iov[0].iov_base += rv;
- iov[0].iov_len -= rv;
- data_size -= rv;
+ continue;
+ }
+ rv -= iov[0].iov_len;
+ iov[0].iov_len = 0;
+ if (rv <= iov[1].iov_len ) {
+ iov[1].iov_base += rv;
+ iov[1].iov_len -= rv;
+ continue;
+ }
+ rv -= iov[1].iov_len;
+ iov[1].iov_len = 0;
+ if (iov[2].iov_len > 0) {
+ /* rv > iov[0]iov_len + iov[1].iov_len */
+ iov[2].iov_base += rv;
+ iov[2].iov_len -= rv;
+ } else {
+ printk(KERN_ALERT DEVICE_NAME ": THINKO by lge. SCREAM at me :(\n");
}
}
@@ -998,6 +1025,11 @@
drbd_thread_restart_nowait(&mdev->receiver);
}
+ /* caller does not know about our internal nonblocking
+ * buffer, and would be confused by larger return values */
+ if (sent >= dwp_size) sent -= dwp_size;
+ else sent = 0;
+
return sent;
}
@@ -1062,7 +1094,7 @@
Note: to avoid bdflush or similar to get stuck in
wait_for_tcp_memory, we need to make the write_hint command
nonblocking.
- -- lge
+ -- lge
*/
for (i = 0; i < minor_count; i++) {
@@ -1072,16 +1104,22 @@
}
}
- i = drbd_send_cmd_dontwait(mdev,WriteHint,0);
+ i = drbd_send_cmd_dontwait(mdev,WriteHint);
if (i == -EAGAIN) {
- //printk(KERN_INFO DEVICE_NAME
- // ": send_cmd_dontwait would have blocked\n");
queue_task(&mdev->write_hint_tq, &tq_disk);
} else {
- // no need for error handling here,
- // drbd_send_cmd_dontwait already does it.
clear_bit(WRITE_HINT_QUEUED, &mdev->flags);
+ } /* else if (i == sizeof(Drbd_Packet)) {
+ * sent comletely.
+ } else if (i > 0) {
+ * only partially sent.
+ * further invocations will return -EAGAIN as long
+ * as it is not fully sent by someone else.
+ } else if (i <= 0) {
+ * no need for error handling here,
+ * drbd_send_cmd_dontwait already does it.
}
+ */
}
#endif
===================================================================
RCS file: /var/lib/cvs/drbd/drbd/drbd/drbd_receiver.c,v
retrieving revision 1.134
retrieving revision 1.135
diff -u -3 -r1.134 -r1.135
--- drbd_receiver.c 8 Feb 2004 19:39:57 -0000 1.134
+++ drbd_receiver.c 22 Feb 2004 18:49:39 -0000 1.135
@@ -819,6 +819,8 @@
drbd_thread_start(&mdev->asender);
+ mdev->dont_wait_pending = 0;
+
set_cstate(mdev,WFReportParams);
drbd_send_param(mdev);