[DRBD-cvs] svn commit by phil - r2530 - trunk/drbd - 1) Added tracepoints for EEs with Simon's great tracing

drbd-cvs at lists.linbit.com drbd-cvs at lists.linbit.com
Fri Oct 13 16:38:09 CEST 2006


Author: phil
Date: 2006-10-13 16:38:08 +0200 (Fri, 13 Oct 2006)
New Revision: 2530

Modified:
   trunk/drbd/drbd_int.h
   trunk/drbd/drbd_receiver.c
   trunk/drbd/drbd_worker.c
Log:
1) Added tracepoints for EEs with Simon's great tracing gramework.

2) There was a race-condition, that could cause the asender to
   miss wakeup events. The io-latency-test rather quickly triggered
   this race.
      
   In the asender thread:
	 
   process_done_ee();
   set_bit(SIGNAL_ASENDER);
   receive_from_tcp();
		  
   Some IO completion context:
   list_add(done_ee);
   if(test_bit(SIGNAL_ASENDER) force_sig();
			   
   Now if the IO completion happens after the process_done_ee() and before the
   set_bit() then the wake_up event is lost.
  
   The solution is to see if the list is empty after we reenabled the
   signal delivery.


Modified: trunk/drbd/drbd_int.h
===================================================================
--- trunk/drbd/drbd_int.h	2006-10-13 07:39:38 UTC (rev 2529)
+++ trunk/drbd/drbd_int.h	2006-10-13 14:38:08 UTC (rev 2530)
@@ -1177,6 +1177,7 @@
 	TraceTypeRq     = 0x00000002,
 	TraceTypeUuid	= 0x00000004,
 	TraceTypeResync = 0x00000008,
+	TraceTypeEE     = 0x00000010,
 };
 
 static inline int

Modified: trunk/drbd/drbd_receiver.c
===================================================================
--- trunk/drbd/drbd_receiver.c	2006-10-13 07:39:38 UTC (rev 2529)
+++ trunk/drbd/drbd_receiver.c	2006-10-13 14:38:08 UTC (rev 2530)
@@ -281,6 +281,11 @@
 	e->barrier_nr2 = 0;
 	e->flags = 0;
 
+	MTRACE(TraceTypeEE,TraceLvlAll,
+	       INFO("allocated EE sec=%lld size=%d ee=%p\n",
+		    (long long)sector,data_size,e);
+	       );
+
 	return e;
 
  fail2:
@@ -300,6 +305,11 @@
 	struct bio_vec *bvec;
 	int i;
 
+	MTRACE(TraceTypeEE,TraceLvlAll,
+	       INFO("Free EE sec=%lld size=%d ee=%p\n",
+		    (long long)e->sector,e->size,e);
+	       );
+
 	__bio_for_each_segment(bvec, bio, i, 0) {
 		drbd_pp_free(mdev,bvec->bv_page);
 	}
@@ -378,6 +388,10 @@
 	 * both ignore the last argument.
 	 */
 	list_for_each_entry_safe(e, t, &work_list, w.list) {
+		MTRACE(TraceTypeEE,TraceLvlAll,
+		       INFO("Process EE on done_ee sec=%lld size=%d ee=%p\n",
+			    (long long)e->sector,e->size,e);
+			);
 		// list_del not necessary, next/prev members not touched
 		ok = ok && e->w.cb(mdev,&e->w,0);
 		drbd_free_ee(mdev,e);
@@ -1036,6 +1050,10 @@
 	list_add(&e->w.list,&mdev->sync_ee);
 	spin_unlock_irq(&mdev->req_lock);
 
+	MTRACE(TraceTypeEE,TraceLvlAll,
+	       INFO("submit EE (RS)WRITE sec=%lld size=%d ee=%p\n",
+		    (long long)e->sector,e->size,e);
+	       );
 	drbd_generic_make_request(WRITE,DRBD_FAULT_RS_WR,e->private_bio);
 	/* accounting done in endio */
 
@@ -1443,6 +1461,10 @@
 		drbd_al_begin_io(mdev, e->sector);
 	}
 
+	MTRACE(TraceTypeEE,TraceLvlAll,
+	       INFO("submit EE (DATA)WRITE sec=%lld size=%d ee=%p\n",
+		    (long long)e->sector,e->size,e);
+	       );
 	/* FIXME drbd_al_begin_io in case we have two primaries... */
 	drbd_generic_make_request(WRITE,DRBD_FAULT_DT_WR,e->private_bio);
 	/* accounting done in endio */
@@ -1534,6 +1556,11 @@
 	spin_unlock_irq(&mdev->req_lock);
 
 	inc_unacked(mdev);
+
+	MTRACE(TraceTypeEE,TraceLvlAll,
+	       INFO("submit EE READ sec=%lld size=%d ee=%p\n",
+		    (long long)e->sector,e->size,e);
+	       );
 	/* FIXME actually, it could be a READA originating from the peer ... */
 	drbd_generic_make_request(READ,fault_type,e->private_bio);
 	maybe_kick_lo(mdev);
@@ -3075,6 +3102,7 @@
 	int received = 0;
 	int expect   = sizeof(Drbd_Header);
 	int cmd      = -1;
+	int empty;
 
 	static struct asender_cmd asender_tbl[] = {
 		[Ping]      ={ sizeof(Drbd_Header),           got_Ping },
@@ -3103,9 +3131,16 @@
 				mdev->net_conf->timeout*HZ/20;
 		}
 
-		if (!drbd_process_done_ee(mdev)) goto err;
-		set_bit(SIGNAL_ASENDER, &mdev->flags);
-
+		while(1) {
+			if (!drbd_process_done_ee(mdev)) goto err;
+			set_bit(SIGNAL_ASENDER, &mdev->flags);
+			spin_lock_irq(&mdev->req_lock);
+			empty = list_empty(&mdev->done_ee);
+			spin_unlock_irq(&mdev->req_lock);
+			if(empty) break;
+			clear_bit(SIGNAL_ASENDER, &mdev->flags);
+			flush_signals(current);
+		}
 		rv = drbd_recv_short(mdev, mdev->meta.socket,
 				     buf,expect-received);
 		clear_bit(SIGNAL_ASENDER, &mdev->flags);

Modified: trunk/drbd/drbd_worker.c
===================================================================
--- trunk/drbd/drbd_worker.c	2006-10-13 07:39:38 UTC (rev 2529)
+++ trunk/drbd/drbd_worker.c	2006-10-13 14:38:08 UTC (rev 2530)
@@ -90,6 +90,11 @@
 	drbd_chk_io_error(mdev,error,FALSE);
 	drbd_queue_work(&mdev->data.work,&e->w);
 	dec_local(mdev);
+
+	MTRACE(TraceTypeEE,TraceLvlAll,
+	       INFO("Moved EE (READ) to worker sec=%lld size=%d ee=%p\n",
+		    (long long)e->sector,e->size,e);
+	       );
 	return 0;
 }
 
@@ -138,6 +143,11 @@
 
 	wake_asender(mdev);
 	dec_local(mdev);
+
+	MTRACE(TraceTypeEE,TraceLvlAll,
+	       INFO("Moved EE (WRITE) to done_ee sec=%lld size=%d ee=%p\n",
+		    (long long)e->sector,e->size,e);
+	       );
 	return 0;
 }
 



More information about the drbd-cvs mailing list