[DRBD-cvs] drbd by phil; some improvements by LGE [They were lost...

drbd-user@lists.linbit.com drbd-user@lists.linbit.com
Sat, 6 Mar 2004 09:45:33 +0100 (CET)


DRBD CVS committal

Author  : phil
Module  : drbd

Dir     : drbd/drbd


Modified Files:
      Tag: rel-0_7-branch
	drbd_compat_wrappers.h drbd_dsender.c drbd_int.h drbd_main.c 
	drbd_receiver.c 


Log Message:
some improvements by LGE [They were lost in my inbox for some time...]
* Better error reporting in drbd_req_prepare_write()
* Use of SIGHUP for DRBD's inter thread synchronisation
* a fix to drbdsetup's "on-io-error" parser.

===================================================================
RCS file: /var/lib/cvs/drbd/drbd/drbd/Attic/drbd_compat_wrappers.h,v
retrieving revision 1.1.2.30
retrieving revision 1.1.2.31
diff -u -3 -r1.1.2.30 -r1.1.2.31
--- drbd_compat_wrappers.h	5 Mar 2004 13:50:03 -0000	1.1.2.30
+++ drbd_compat_wrappers.h	6 Mar 2004 08:45:28 -0000	1.1.2.31
@@ -195,7 +195,11 @@
 	D_ASSERT(buffer_locked(bh));
 	D_ASSERT(buffer_mapped(bh));
 	// D_ASSERT(buffer_dirty(bh)); // It is not true ?!?
-	D_ASSERT(buffer_uptodate(bh));
+	ERR_IF (!buffer_uptodate(bh)) {
+		ERR("[%s/%d]: bh_src->b_state=%lx bh->b_state=%lx\n",
+		    current->comm, current->pid,
+		    bh_src->b_state, bh->b_state);
+	};
 
 	// FIXME should not be necessary;
 	// remove if the assertions above do not trigger.
===================================================================
RCS file: /var/lib/cvs/drbd/drbd/drbd/Attic/drbd_dsender.c,v
retrieving revision 1.1.2.79
retrieving revision 1.1.2.80
diff -u -3 -r1.1.2.79 -r1.1.2.80
--- drbd_dsender.c	19 Feb 2004 15:45:03 -0000	1.1.2.79
+++ drbd_dsender.c	6 Mar 2004 08:45:28 -0000	1.1.2.80
@@ -729,17 +729,15 @@
 
 	mdev->resync_timer.function = resync_timer_fn;
 	mdev->resync_timer.data = (unsigned long) mdev;
-	
+
 	for (;;) {
 		intr = down_interruptible(&mdev->data.work.s);
 
 		if (intr) {
 			D_ASSERT(intr == -EINTR);
 			LOCK_SIGMASK(current,flags);
-			if (sigismember(&current->pending.signal, SIGTERM)) {
-				sigdelset(&current->pending.signal, SIGTERM);
-				RECALC_SIGPENDING(current);
-			}
+			sigemptyset(&current->pending.signal);
+			RECALC_SIGPENDING(current);
 			UNLOCK_SIGMASK(current,flags);
 			if (thi->t_state != Running )
 				break;
===================================================================
RCS file: /var/lib/cvs/drbd/drbd/drbd/drbd_int.h,v
retrieving revision 1.58.2.130
retrieving revision 1.58.2.131
diff -u -3 -r1.58.2.130 -r1.58.2.131
--- drbd_int.h	5 Mar 2004 13:59:51 -0000	1.58.2.130
+++ drbd_int.h	6 Mar 2004 08:45:28 -0000	1.58.2.131
@@ -78,7 +78,28 @@
 #endif
 
 #define INITIAL_BLOCK_SIZE (1<<12)  // 4K
+
+/* I don't remember why XCPU ...
+ * This is used to wake the asender,
+ * and to interrupt sending the sending task
+ * on disconnect.
+ */
 #define DRBD_SIG SIGXCPU
+
+/* This is used to stop/restart our threads.
+ * Cannot use SIGTERM nor SIGKILL, since these
+ * are sent out by init on runlevel changes
+ * I choose SIGHUP for now.
+ */
+#define DRBD_SIGKILL SIGHUP
+
+/* To temporarily block signals during network operations.
+ * as long as we send directly from make_request, I'd like to
+ * allow KILL, so the user can kill -9 hanging write processes.
+ * If it does not succeed, it _should_ timeout anyways, but...
+ */
+#define DRBD_SHUTDOWNSIGMASK sigmask(DRBD_SIG)|sigmask(DRBD_SIGKILL)
+
 #define ID_SYNCER (-1LL)
 #define ID_VACANT 0     // All EEs on the free list should have this value
                         // freshly allocated EEs get !ID_VACANT (== 1)
===================================================================
RCS file: /var/lib/cvs/drbd/drbd/drbd/drbd_main.c,v
retrieving revision 1.73.2.136
retrieving revision 1.73.2.137
diff -u -3 -r1.73.2.136 -r1.73.2.137
--- drbd_main.c	5 Mar 2004 10:59:38 -0000	1.73.2.136
+++ drbd_main.c	6 Mar 2004 08:45:28 -0000	1.73.2.137
@@ -353,10 +353,8 @@
 	daemonize("drbd_thread");
 	// Linux 2.6.x's daemonize blocks all signals. Unblock "our" signals.
 
-	sigemptyset(&enable);
-	sigaddset(&enable,DRBD_SIG);
-	sigaddset(&enable,SIGTERM);
-	sigprocmask(SIG_UNBLOCK, &enable, NULL);	
+	siginitset(&enable, DRBD_SHUTDOWNSIGMASK );
+	sigprocmask(SIG_UNBLOCK, &enable, NULL);
 #else
 	daemonize();
 #endif
@@ -474,7 +472,9 @@
 	else
 		thi->t_state = Exiting;
 
-	drbd_queue_signal(SIGTERM,thi->task);
+	smp_mb(); /* should not be necessary, since the next
+		     instruction is spinlock, but anyways */
+	drbd_queue_signal(DRBD_SIGKILL,thi->task);
 
 	if(wait) {
 		down(&thi->mutex); // wait until thread has exited
@@ -522,11 +522,7 @@
 	h->command = cpu_to_be16(cmd);
 	h->length  = cpu_to_be16(size-sizeof(Drbd_Header));
 
-	/* as long as we send directly from make_request, I'd like to
-	 * allow KILL, so the user can kill -9 hanging write processes.
-	 * if it does not succeed, it _should_ timeout anyways, but...
-	 */
-	old_blocked = block_sigs_but(SIGKILL);
+	old_blocked = block_sigs_but(DRBD_SHUTDOWNSIGMASK);
 	sent = drbd_send(mdev,sock,h,size,msg_flags);
 	restore_old_sigset(old_blocked);
 
@@ -823,8 +819,27 @@
 	      tl_cear() will simulate a RQ_DRBD_SEND and set it out of sync
 	      for everything in the data structure.
 	*/
-	// SIGKILL: see comment in _drbd_send_cmd
-	old_blocked = block_sigs_but(SIGKILL);
+
+	/* Still called directly by drbd_make_request,
+	 * so all sorts of processes may end up here.
+	 * They may be interrupted by DRBD_SIGKILL in response to
+	 * ioctl or some other "connection loast" event.
+
+	 * FIXME
+	 * unfortunatly this may well be a user process like dd,
+	 * and interupted by a user signal.
+	 * (in 0.6.x this was handled with the "app_got_sig" cludge)
+
+	 * THINK
+	 * maybe we should block all signals (so we don't need to
+	 * wory about user signals), and use force_sig() instead
+	 * of drbd_queue_signal.
+	 *
+	 * we also should replace all "LOCK(); sigemptyset(); UNLOCK();"
+	 * with flush_signals(); ...
+	 */
+
+	old_blocked = block_sigs_but(sigmask(DRBD_SIGKILL));
 	down(&mdev->data.mutex);
 	spin_lock(&mdev->send_task_lock);
 	mdev->send_task=current;
@@ -860,11 +875,11 @@
 	p.sector   = cpu_to_be64(drbd_ee_get_sector(e));
 	p.block_id = e->block_id;
 
-	/* only called by our kernel thread.
-	 * that one might get stopped by SIGTERM in responst to
-	 * ioctl or module unload
+	/* Only called by our kernel thread.
+	 * This one may be interupted by DRBD_SIG and/or DRBD_SIGKILL
+	 * in response to ioctl or module unload.
 	 */
-	old_blocked = block_sigs_but(SIGTERM);
+	old_blocked = block_sigs_but(DRBD_SHUTDOWNSIGMASK);
 	down(&mdev->data.mutex);
 	spin_lock(&mdev->send_task_lock);
 	mdev->send_task=current;
@@ -931,10 +946,10 @@
 		 *
 		 * -EAGAIN on timeout, -EINTR on signal.
 		 */
-		/* THINK
-		 * do we need to block DRBD_SIG if sock == &meta.socket ??
-		 * otherwise wake_asender() might interrupt some send_*Ack !
-		 */
+/* THINK
+ * do we need to block DRBD_SIG if sock == &meta.socket ??
+ * otherwise wake_asender() might interrupt some send_*Ack !
+ */
 		rv = sock_sendmsg(sock, &msg, iov.iov_len );
 		if (rv == -EAGAIN) {
 			// FIXME move "retry--" into drbd_retry_send()
@@ -951,7 +966,6 @@
 	} while(sent < size);
 
 	set_fs(oldfs);
-	// unlock_kernel();
 
 	if (rv <= 0) {
 		if (rv != -EAGAIN) {
===================================================================
RCS file: /var/lib/cvs/drbd/drbd/drbd/drbd_receiver.c,v
retrieving revision 1.97.2.114
retrieving revision 1.97.2.115
diff -u -3 -r1.97.2.114 -r1.97.2.115
--- drbd_receiver.c	2 Mar 2004 13:15:40 -0000	1.97.2.114
+++ drbd_receiver.c	6 Mar 2004 08:45:28 -0000	1.97.2.115
@@ -596,7 +596,20 @@
 			}
 		}
 		if(mdev->cstate==Unconnected) return 0;
-		if(signal_pending(current)) return 0;
+		if(signal_pending(current)) {
+			unsigned long flags;
+			LOCK_SIGMASK(current,flags);
+			sigemptyset(&current->pending.signal);
+			RECALC_SIGPENDING(current);
+			UNLOCK_SIGMASK(current,flags);
+
+			smp_mb();
+			if ((volatile int)mdev->receiver.t_state != Running)
+				return 0;
+
+			WARN("Signal pending x%lx, but t_state still Running??\n",
+				current->pending.signal.sig[0]);
+		}
 	}
 
  connected:
@@ -617,6 +630,7 @@
 	sock->sk->SK_(rcvbuf) = mdev->conf.sndbuf_size;
 	sock->sk->SK_(sndtimeo) = mdev->conf.timeout*HZ/20;
 	sock->sk->SK_(rcvtimeo) = MAX_SCHEDULE_TIMEOUT;
+	sock->sk->SK_(userlocks) |= SOCK_SNDBUF_LOCK | SOCK_RCVBUF_LOCK;
 
 	msock->sk->SK_(priority)=TC_PRIO_INTERACTIVE;
 	NOT_IN_26(sock->sk->tp_pinfo.af_tcp.nonagle=1;)
@@ -1572,16 +1586,16 @@
 		drbdd(mdev);
 		drbd_disconnect(mdev);
 		if (thi->t_state == Exiting) break;
-		if (thi->t_state == Restarting) {
+		else {
 			unsigned long flags;
-			thi->t_state = Running;
-
 			LOCK_SIGMASK(current,flags);
-			if (sigismember(&current->pending.signal, SIGTERM)) {
-				sigdelset(&current->pending.signal, SIGTERM);
-				RECALC_SIGPENDING(current);
-			}
+			sigemptyset(&current->pending.signal);
+			RECALC_SIGPENDING(current);
 			UNLOCK_SIGMASK(current,flags);
+
+			if (thi->t_state != Restarting)
+				ERR("unexpected thread state: %d\n", thi->t_state);
+			thi->t_state = Running;
 		}
 	}