[DRBD-cvs] drbd by phil; * Do not use zero-copy IO with protocol ...

drbd-user@lists.linbit.com drbd-user@lists.linbit.com
Wed, 30 Jun 2004 11:12:26 +0200 (CEST)


DRBD CVS committal

Author  : phil
Module  : drbd

Dir     : drbd/drbd


Modified Files:
      Tag: rel-0_7-branch
	drbd_compat_wrappers.h drbd_int.h drbd_main.c 


Log Message:
* Do not use zero-copy IO with protocol A (since it could cause
  data corruption). 
* It is save to use zero-copy IO with B and C!

===================================================================
RCS file: /var/lib/cvs/drbd/drbd/drbd/Attic/drbd_compat_wrappers.h,v
retrieving revision 1.1.2.47
retrieving revision 1.1.2.48
diff -u -3 -r1.1.2.47 -r1.1.2.48
--- drbd_compat_wrappers.h	25 Jun 2004 09:44:20 -0000	1.1.2.47
+++ drbd_compat_wrappers.h	30 Jun 2004 09:12:20 -0000	1.1.2.48
@@ -279,11 +279,6 @@
 	size_t size = bh->b_size;
 	int offset;
 
-	/*
-	 * CAUTION I do not yet understand this completely.
-	 * I thought I have to kmap the page first... ?
-	 * hm. obviously the tcp stack kmaps internally somewhere.
-	 */
 	if (PageHighMem(page))
 		offset = (int)(long)bh->b_data;
 	else
@@ -292,6 +287,23 @@
 	return _drbd_send_page(mdev,page,offset,size);
 }
 
+static inline int _drbd_send_bio(drbd_dev *mdev, struct buffer_head *bh)
+{
+	struct page *page = bh->b_page;
+	size_t size = bh->b_size;
+	int offset;
+	int ret;
+
+	if (PageHighMem(page))
+		offset = (int)(long)bh->b_data;
+	else
+		offset = (long)bh->b_data - (long)page_address(page);
+
+	ret = drbd_send(mdev, mdev->data.socket, kmap(page) + offset, size, 0);
+	kunmap(page);
+	return ret;
+}
+
 #else
 // LINUX_VERSION_CODE > 2,5,0
 
@@ -590,4 +602,18 @@
 	struct bio_vec *bvec = bio_iovec_idx(bio, bio->bi_idx);
 	return _drbd_send_page(mdev,bvec->bv_page,bvec->bv_offset,bvec->bv_len);
 }
+
+static inline int _drbd_send_bio(drbd_dev *mdev, struct bio *bio)
+{
+	struct bio_vec *bvec = bio_iovec_idx(bio, bio->bi_idx);
+	struct page *page = bvec->bv_page;
+	size_t size = bvec->bv_len;
+	int offset = bvec->bv_offset;
+	int ret;
+
+	ret = drbd_send(mdev, mdev->data.socket, kmap(page) + offset, size, 0);
+	kunmap(page);
+	return ret;
+}
+
 #endif
===================================================================
RCS file: /var/lib/cvs/drbd/drbd/drbd/drbd_int.h,v
retrieving revision 1.58.2.178
retrieving revision 1.58.2.179
diff -u -3 -r1.58.2.178 -r1.58.2.179
--- drbd_int.h	26 Jun 2004 08:05:40 -0000	1.58.2.178
+++ drbd_int.h	30 Jun 2004 09:12:21 -0000	1.58.2.179
@@ -762,8 +762,8 @@
 extern void tl_clear(drbd_dev *mdev);
 extern int tl_dependence(drbd_dev *mdev, drbd_request_t * item);
 extern void drbd_free_sock(drbd_dev *mdev);
-/* extern int drbd_send(drbd_dev *mdev, struct socket *sock,
-	      void* buf, size_t size, unsigned msg_flags); */
+extern int drbd_send(drbd_dev *mdev, struct socket *sock,
+		     void* buf, size_t size, unsigned msg_flags);
 extern int drbd_send_param(drbd_dev *mdev, int flags);
 extern int drbd_send_cmd(drbd_dev *mdev, struct socket *sock,
 			  Drbd_Packet_Cmd cmd, Drbd_Header *h, size_t size);
===================================================================
RCS file: /var/lib/cvs/drbd/drbd/drbd/drbd_main.c,v
retrieving revision 1.73.2.192
retrieving revision 1.73.2.193
diff -u -3 -r1.73.2.192 -r1.73.2.193
--- drbd_main.c	26 Jun 2004 08:05:40 -0000	1.73.2.192
+++ drbd_main.c	30 Jun 2004 09:12:21 -0000	1.73.2.193
@@ -94,8 +94,6 @@
 STATIC int drbd_open(struct inode *inode, struct file *file);
 STATIC int drbd_close(struct inode *inode, struct file *file);
 
-STATIC int drbd_send(drbd_dev*,struct socket*,void*,size_t,unsigned);
-
 #ifdef DEVICE_REQUEST
 #undef DEVICE_REQUEST
 #endif
@@ -864,10 +862,7 @@
 	return drop_it; /* && (mdev->state == Primary) */;
 }
 
-#if 1
-/* We have the following problem with zero copy network IO:
-   
-   The idea of sendpage seems to be to put some kind of reference 
+/* The idea of sendpage seems to be to put some kind of reference 
    to the page into the skb, and to hand it over to the NIC. In 
    this process get_page() gets called.
 
@@ -881,9 +876,8 @@
    But this means that in protocol A we might signal IO completion too early !
 
    In order not to corrupt data during a full sync we must make sure
-   that we do not reuse our own buffer pages (EEs) to early. 
-   Have a look at drbd_get_ee() where we check if the count of the page
-   has already dropped to 1 .
+   that we do not reuse our own buffer pages (EEs) to early, therefore
+   we have the net_ee list. 
 */
 int _drbd_send_page(drbd_dev *mdev, struct page *page,
 		    int offset, size_t size)
@@ -896,9 +890,12 @@
 	spin_unlock(&mdev->send_task_lock);
 
 	do {
-		sent = mdev->data.socket->ops->sendpage(mdev->data.socket, page, offset, len, MSG_NOSIGNAL);
+		sent = mdev->data.socket->ops->sendpage(mdev->data.socket,page,
+							offset,len,
+							MSG_NOSIGNAL);
 		if (sent == -EAGAIN) {
-			if (we_should_drop_the_connection(mdev,mdev->data.socket))
+			if (we_should_drop_the_connection(mdev,
+							  mdev->data.socket))
 				break;
 			else
 				continue;
@@ -922,16 +919,6 @@
 		mdev->send_cnt += size>>9;
 	return ok;
 }
-#else
-int _drbd_send_page(drbd_dev *mdev, struct page *page,
-		    int offset, size_t size)
-{
-	int ret;
-	ret = drbd_send(mdev, mdev->data.socket, kmap(page) + offset, size, 0);
-	kunmap(page);
-	return ret;
-}
-#endif
 
 // Used to send write requests: bh->b_rsector !!
 int drbd_send_dblock(drbd_dev *mdev, drbd_request_t *req)
@@ -986,7 +973,11 @@
 		set_bit(UNPLUG_REMOTE,&mdev->flags);
 		ok = drbd_send(mdev,mdev->data.socket,&p,sizeof(p),MSG_MORE) == sizeof(p);
 		if(ok) {
-			ok = _drbd_send_zc_bio(mdev,&req->private_bio);
+			if(mdev->conf.wire_protocol == DRBD_PROT_A) {
+				ok = _drbd_send_bio(mdev,&req->private_bio);
+			} else {
+				ok = _drbd_send_zc_bio(mdev,&req->private_bio);
+			}
 		}
 		if(!ok) tl_cancel(mdev,req);
 	}