[DRBD-cvs] drbd by phil; Fix for the "unknown packet type" proble...

drbd-user@lists.linbit.com drbd-user@lists.linbit.com
Sun, 22 Feb 2004 19:49:44 +0100 (CET)


DRBD CVS committal

Author  : phil
Module  : drbd

Dir     : drbd/drbd


Modified Files:
	drbd_int.h drbd_main.c drbd_receiver.c 


Log Message:
Fix for the "unknown packet type" problem by LGE

===================================================================
RCS file: /var/lib/cvs/drbd/drbd/drbd/drbd_int.h,v
retrieving revision 1.77
retrieving revision 1.78
diff -u -3 -r1.77 -r1.78
--- drbd_int.h	6 Jan 2004 10:06:25 -0000	1.77
+++ drbd_int.h	22 Feb 2004 18:49:39 -0000	1.78
@@ -327,6 +327,8 @@
 	Drbd_CState sync_method;
 	struct socket *sock;  /* for data/barrier/cstate/parameter packets */
 	struct socket *msock; /* for ping/ack (metadata) packets */
+	Drbd_Packet dont_wait_cmd;     /* buffer for short commands, which */
+	int         dont_wait_pending; /* need to be sent non-blocking */
 	kdev_t lo_device;
 	struct file *lo_file;
 	int lo_usize;   /* user provided size */
===================================================================
RCS file: /var/lib/cvs/drbd/drbd/drbd/drbd_main.c,v
retrieving revision 1.90
retrieving revision 1.91
diff -u -3 -r1.90 -r1.91
--- drbd_main.c	18 Jan 2004 20:18:19 -0000	1.90
+++ drbd_main.c	22 Feb 2004 18:49:39 -0000	1.91
@@ -508,20 +508,23 @@
 	}
 }
 
-int drbd_send_cmd_dontwait(struct Drbd_Conf *mdev,Drbd_Packet_Cmd cmd, int via_msock)
+/* currently only used for write_hints which are still sent via
+ * the data socket */
+int drbd_send_cmd_dontwait(struct Drbd_Conf *mdev,Drbd_Packet_Cmd cmd)
 {
-	Drbd_Packet header;
+	Drbd_Packet *h = &mdev->dont_wait_cmd;
 	mm_segment_t oldfs;
 	sigset_t oldset;
 	struct msghdr msg;
 	struct iovec iov;
 	unsigned long flags;
 	int rv;
+	int via_msock = 0;
 	struct socket *sock = via_msock ? mdev->msock : mdev->sock;
-	int header_size = sizeof(header);
+	int header_size = sizeof(*h);
 
 	if (!via_msock) {
-		if (down_trylock(&mdev->send_mutex))
+		if (mdev->dont_wait_pending || down_trylock(&mdev->send_mutex))
 			return -EAGAIN;
 	};
 	if (!sock) {
@@ -534,13 +537,13 @@
 		goto out;
 	}
 
-	header.command = cpu_to_be16(cmd);
-	header.magic   =  cpu_to_be32(DRBD_MAGIC);
-	header.length  = 0;
+	h->command = cpu_to_be16(cmd);
+	h->magic   = cpu_to_be32(DRBD_MAGIC);
+	h->length  = 0;
 
 	sock->sk->allocation = GFP_DRBD;
 
-	iov.iov_base = &header;
+	iov.iov_base = h;
 	iov.iov_len  = header_size;
 
 	msg.msg_iov = &iov;
@@ -578,12 +581,14 @@
 
 		set_cstate(mdev,BrokenPipe);
 		drbd_thread_restart_nowait(&mdev->receiver);
+	} else if ( 0 < rv && rv < header_size ) {
+		mdev->dont_wait_pending = header_size - rv;
 	}
 
 out:
 	if(!via_msock) up(&mdev->send_mutex);
 
-	return rv == header_size ? 0 : rv;
+	return rv;
 }
 
 int drbd_send_cmd(struct Drbd_Conf *mdev,Drbd_Packet_Cmd cmd, int via_msock)
@@ -660,10 +665,10 @@
 
 	/* printk(KERN_DEBUG DEVICE_NAME": issuing a barrier\n"); */
 
-	r=drbd_send(mdev,(Drbd_Packet*)&head,sizeof(head),0,0,0);
-
 	inc_pending(mdev);
 
+	r=drbd_send(mdev,(Drbd_Packet*)&head,sizeof(head),0,0,0);
+
 	return r;
 }
 
@@ -701,7 +706,7 @@
 			  u64 block_id)
 {
         Drbd_Data_Packet head;
-	int ret,ok;
+	int ret,ok,pend;
 
 	head.p.command = cpu_to_be16(Data);
 	head.h.block_nr = cpu_to_be64(bh->b_blocknr);
@@ -713,17 +718,20 @@
 	        _drbd_send_barrier(mdev);
 	}
 
+	pend = (mdev->net.wire_protocol != DRBD_PROT_A) ||
+	       (block_id == ID_SYNCER);
+	if (pend)
+		inc_pending(mdev);
+
 	ret=drbd_send(mdev,(Drbd_Packet*)&head,sizeof(head),bh_kmap(bh),
 		      bh->b_size,0);
 	bh_kunmap(bh);
 	ok=(ret == bh->b_size + sizeof(head));
 
-	if( ok ) {
-		if( mdev->net.wire_protocol != DRBD_PROT_A ||
-		    block_id == ID_SYNCER )  {
-			inc_pending(mdev);
-		}
+	if (ok) {
 		mdev->send_cnt+=bh->b_size>>10;
+	} else if (pend) {
+		dec_pending(mdev); // necessary ??
 	}
 
 	if(block_id != ID_SYNCER) {
@@ -831,15 +839,15 @@
   if conf.timeout expires on sock ko_count times (and there
   is no advance in communication) -> drop connection; and go into StandAlone state
 */
-int drbd_send(struct Drbd_Conf *mdev, Drbd_Packet* header, size_t header_size,
-	      void* data, size_t data_size, int via_msock)
+int drbd_send(struct Drbd_Conf *mdev, Drbd_Packet* header, const size_t header_size,
+	      void* data, const size_t data_size, const int via_msock)
 {
 	mm_segment_t oldfs;
 	sigset_t oldset;
 	struct msghdr msg;
-	struct iovec iov[2];
+	struct iovec iov[3];
 	unsigned long flags;
-	int rv,sent=0;
+	int rv,sent,dwp_size;
 	int app_got_sig=0;
 	struct send_timer_info ti;
 	struct socket *sock = via_msock ? mdev->msock : mdev->sock;
@@ -847,18 +855,31 @@
 	if (!sock) return -1000;
 	if (mdev->cstate < WFReportParams) return -1001;
 
-	header->magic  =  cpu_to_be32(DRBD_MAGIC);
-	header->length  = cpu_to_be16(data_size);
+	header->magic  = cpu_to_be32(DRBD_MAGIC);
+	header->length = cpu_to_be16(data_size);
 
 	sock->sk->allocation = GFP_DRBD;
 
-	iov[0].iov_base = header;
-	iov[0].iov_len = header_size;
-	iov[1].iov_base = data;
-	iov[1].iov_len = data_size;
+	dwp_size = mdev->dont_wait_pending;
+	mdev->dont_wait_pending = 0;
+
+	/* NOTE: tcp_sendmsg silently skips over iov with len == 0,
+	 * the iov_base is not even dereferenced in that case.
+	 * So it is absolutely legal to have only iov[1] populated.
+	 * If a partially sent command is still hanging around,
+	 * it is iov[0], otherwise iov[0].iov_len is 0.
+	 */
+
+	iov[0].iov_base = ((char*)&mdev->dont_wait_cmd)
+	                 + sizeof(Drbd_Packet)-dwp_size;
+	iov[0].iov_len  = dwp_size;
+	iov[1].iov_base = header;
+	iov[1].iov_len  = header_size;
+	iov[2].iov_base = data;
+	iov[2].iov_len  = data_size;
 
 	msg.msg_iov = iov;
-	msg.msg_iovlen = data_size > 0 ? 2 : 1;
+	msg.msg_iovlen = 3;
 	msg.msg_control = NULL;
 	msg.msg_controllen = 0;
 	msg.msg_name = 0;
@@ -896,6 +917,7 @@
 	RECALC_SIGPENDING(current);
 	spin_unlock_irqrestore(&current->SIGMASK_LOCK, flags);
 
+	sent = 0;
 	while(1) {
 		rv = sock_sendmsg(sock, &msg, header_size+data_size);
 		if ( rv == -ERESTARTSYS) {
@@ -916,27 +938,32 @@
 		}
 		if (rv <= 0) break;
 		sent += rv;
-		if (sent == header_size+data_size) break;
+		if (sent == dwp_size + header_size + data_size) break;
 
 		/*printk(KERN_ERR DEVICE_NAME
 		       "%d: calling sock_sendmsg again\n",
 		       (int)(mdev-drbd_conf));*/
 
-		if( rv < header_size ) {
+		if (rv <= iov[0].iov_len) {
 			iov[0].iov_base += rv;
 			iov[0].iov_len  -= rv;
-			header_size -= rv;
-		} else /* rv >= header_size */ {
-			if (header_size) {
-				iov[0].iov_base = iov[1].iov_base;
-				iov[0].iov_len = iov[1].iov_len;
-				msg.msg_iovlen = 1;
-				rv -= header_size;
-				header_size = 0;
-			}
-			iov[0].iov_base += rv;
-			iov[0].iov_len  -= rv;
-			data_size -= rv;
+			continue;
+		}
+		rv -= iov[0].iov_len;
+		iov[0].iov_len   = 0;
+		if (rv <= iov[1].iov_len ) {
+			iov[1].iov_base += rv;
+			iov[1].iov_len  -= rv;
+			continue;
+		}
+		rv -= iov[1].iov_len;
+		iov[1].iov_len   = 0;
+		if (iov[2].iov_len > 0) {
+			/* rv > iov[0]iov_len + iov[1].iov_len */
+			iov[2].iov_base += rv;
+			iov[2].iov_len  -= rv;
+		} else {
+			printk(KERN_ALERT DEVICE_NAME ": THINKO by lge. SCREAM at me :(\n");
 		}
 	}
 
@@ -998,6 +1025,11 @@
 		drbd_thread_restart_nowait(&mdev->receiver);
 	}
 
+	/* caller does not know about our internal nonblocking
+	 * buffer, and would be confused by larger return values */
+	if (sent >= dwp_size) sent -= dwp_size;
+	else sent = 0;
+
 	return sent;
 }
 
@@ -1062,7 +1094,7 @@
 	   Note: to avoid bdflush or similar to get stuck in
 	   wait_for_tcp_memory, we need to make the write_hint command
 	   nonblocking.
-	   			-- lge
+				-- lge
 	*/
 
 	for (i = 0; i < minor_count; i++) {
@@ -1072,16 +1104,22 @@
 		}
 	}
 
-	i = drbd_send_cmd_dontwait(mdev,WriteHint,0);
+	i = drbd_send_cmd_dontwait(mdev,WriteHint);
 	if (i == -EAGAIN) {
-		//printk(KERN_INFO DEVICE_NAME
-		//       ": send_cmd_dontwait would have blocked\n");
 		queue_task(&mdev->write_hint_tq, &tq_disk);
 	} else {
-		// no need for error handling here,
-		// drbd_send_cmd_dontwait already does it.
 		clear_bit(WRITE_HINT_QUEUED, &mdev->flags);
+	} /* else if (i == sizeof(Drbd_Packet)) {
+	   *	sent comletely.
+	}    else if (i > 0) {
+	   *	only partially sent.
+	   *	further invocations will return -EAGAIN as long
+	   *	as it is not fully sent by someone else.
+	}    else if (i <= 0) {
+	   *	no need for error handling here,
+	   *	drbd_send_cmd_dontwait already does it.
 	}
+	*/
 }
 #endif
 
===================================================================
RCS file: /var/lib/cvs/drbd/drbd/drbd/drbd_receiver.c,v
retrieving revision 1.134
retrieving revision 1.135
diff -u -3 -r1.134 -r1.135
--- drbd_receiver.c	8 Feb 2004 19:39:57 -0000	1.134
+++ drbd_receiver.c	22 Feb 2004 18:49:39 -0000	1.135
@@ -819,6 +819,8 @@
 
 	drbd_thread_start(&mdev->asender);
 
+	mdev->dont_wait_pending = 0;
+
 	set_cstate(mdev,WFReportParams);
 	drbd_send_param(mdev);