[DRBD-cvs] r1664 - in trunk: . drbd user
svn at svn.drbd.org
svn at svn.drbd.org
Mon Nov 29 16:32:44 CET 2004
Author: phil
Date: 2004-11-29 16:32:41 +0100 (Mon, 29 Nov 2004)
New Revision: 1664
Modified:
trunk/
trunk/Makefile
trunk/drbd/Makefile-2.6
trunk/drbd/drbd_bitmap.c
trunk/drbd/drbd_int.h
trunk/drbd/drbd_main.c
trunk/drbd/drbd_receiver.c
trunk/user/drbdadm_main.c
Log:
svnp run. Investigated 1654 to 1664
r1656 by lars on 2004-11-25 01:52:01 +0100 (Thu, 25 Nov 2004)
Changed paths:
M /branches/drbd-0.7/Makefile
removed debian/ subdir from .filelist,
and thus from tarball and tgz make targets
r1657 by phil on 2004-11-25 09:09:10 +0100 (Thu, 25 Nov 2004)
Changed paths:
M /branches/drbd-0.7/drbd/drbd_bitmap.c
drbd_bm_total_weight() is called from several
places with IRQs disabled! Therefore it has to
use the spin_*_irqsave|irqrestore() variants.
...from __drbd_set_in_sync(), _drbd_rs_resume(),
_drbd_rs_pause()
r1658 by phil on 2004-11-26 14:54:36 +0100 (Fri, 26 Nov 2004)
Changed paths:
M /branches/drbd-0.7/drbd/drbd_receiver.c
There was this schedule_timeout() without set_current_state().
r1659 by phil on 2004-11-26 15:46:08 +0100 (Fri, 26 Nov 2004)
Changed paths:
M /branches/drbd-0.7/drbd/drbd_int.h
M /branches/drbd-0.7/drbd/drbd_main.c
M /branches/drbd-0.7/drbd/drbd_receiver.c
The test for (rs_total != 0) was not okay. If sync was
started with the "invalidate" command on the Secondary,
it (rs_total) is set to a positive value, just after we
send the BecomeSyncSource packet to the peer.
The peer(Primary) writes its whole bitmap and sets
rs_total to a positive value afterwards.
Writes that happen on the Primary while it writes its
bitmap are mirrored to the Secondary (where they are
mared as cean in the BitMap, since rs_total is already
set here), but when the ACK comes back to the Primary
rs_total is still 0 here.
With this patch that problem is fixed, they both consider
the sync to be started when the SYNC_STARTED bit is
set on both node, and this happens with the first
ACK of the fir RSDATA packet... So the agree on the
same logic point in time.
r1660 by phil on 2004-11-29 09:46:01 +0100 (Mon, 29 Nov 2004)
Changed paths:
M /branches/drbd-0.7/user/drbdadm_main.c
Well, there are cases where the 60 seconds are not
enough. Made it 120 Seconds...
r1661 by phil on 2004-11-29 12:15:50 +0100 (Mon, 29 Nov 2004)
Changed paths:
M /branches/drbd-0.7/drbd/Makefile-2.6
M /branches/drbd-0.7/drbd/drbd_receiver.c
Removed the "struct list_head *head" parameter from
the drbd_process_ee() function, added a "be_sleepy"
flag argument.
This this flag, the asender we avoid that the asender
thread eventually sleeps in drbd_process_ee().
This changes makes a strange lockup condition go
away... where receive and asender where sleeping
in drbd_process_ee().
I do not know exacle how the lockup begins, but
with this changes it can not lock-up.
Property changes on: trunk
___________________________________________________________________
Name: propagate:at
- 1654
+ 1664
Modified: trunk/Makefile
===================================================================
--- trunk/Makefile 2004-11-29 12:45:29 UTC (rev 1663)
+++ trunk/Makefile 2004-11-29 15:32:41 UTC (rev 1664)
@@ -120,7 +120,7 @@
.PHONY: .filelist
.filelist:
@ svn info >/dev/null || { echo "you need a svn checkout to do this." ; false ; }
- @find $$(svn st -vq | sed 's/^.\{8\} \+[0-9]\+ \+[0-9]\+ [a-z]\+ *//;') \
+ @find $$(svn st -v | sed '/^?/d;s/^.\{8\} \+[0-9]\+ \+[0-9]\+ [a-z]\+ *//;/^debian/d' ) \
\! -type d -maxdepth 0 |\
sed 's:^:drbd-$(DIST_VERSION)/:' > .filelist
@[ -s .filelist ] # assert there is something in .filelist now
Modified: trunk/drbd/Makefile-2.6
===================================================================
--- trunk/drbd/Makefile-2.6 2004-11-29 12:45:29 UTC (rev 1663)
+++ trunk/drbd/Makefile-2.6 2004-11-29 15:32:41 UTC (rev 1664)
@@ -1,4 +1,4 @@
-CFLAGS_drbd_sizeof_sanity_check.o = -Wpadded # -Werror
+#CFLAGS_drbd_sizeof_sanity_check.o = -Wpadded # -Werror
drbd-objs := drbd_sizeof_sanity_check.o \
drbd_buildtag.o drbd_bitmap.o drbd_fs.o drbd_proc.o \
Modified: trunk/drbd/drbd_bitmap.c
===================================================================
--- trunk/drbd/drbd_bitmap.c 2004-11-29 12:45:29 UTC (rev 1663)
+++ trunk/drbd/drbd_bitmap.c 2004-11-29 15:32:41 UTC (rev 1664)
@@ -385,13 +385,14 @@
{
struct drbd_bitmap *b = mdev->bitmap;
unsigned long s;
+ unsigned long flags;
D_BUG_ON(!(b && b->bm));
// MUST_BE_LOCKED(); well. yes. but ...
- spin_lock_irq(&b->bm_lock);
+ spin_lock_irqsave(&b->bm_lock,flags);
s = b->bm_set;
- spin_unlock_irq(&b->bm_lock);
+ spin_unlock_irqrestore(&b->bm_lock,flags);
return s;
}
Modified: trunk/drbd/drbd_int.h
===================================================================
--- trunk/drbd/drbd_int.h 2004-11-29 12:45:29 UTC (rev 1663)
+++ trunk/drbd/drbd_int.h 2004-11-29 15:32:41 UTC (rev 1664)
@@ -625,6 +625,7 @@
MD_IO_ALLOWED, // EXPLAIN
SENT_DISK_FAILURE, // sending it once is enough
MD_DIRTY, // current gen counts and flags not yet on disk
+ SYNC_STARTED, // Needed to agree on the exact point in time..
};
struct drbd_bitmap; // opaque for Drbd_Conf
Modified: trunk/drbd/drbd_main.c
===================================================================
--- trunk/drbd/drbd_main.c 2004-11-29 12:45:29 UTC (rev 1663)
+++ trunk/drbd/drbd_main.c 2004-11-29 15:32:41 UTC (rev 1664)
@@ -525,6 +525,7 @@
/** post-state-change actions **/
if ( os.s.conn >= SyncSource && ns.s.conn <= Connected ) {
+ clear_bit(SYNC_STARTED,&mdev->flags);
set_bit(STOP_SYNC_TIMER,&mdev->flags);
mod_timer(&mdev->resync_timer,jiffies);
}
Modified: trunk/drbd/drbd_receiver.c
===================================================================
--- trunk/drbd/drbd_receiver.c 2004-11-29 12:45:29 UTC (rev 1663)
+++ trunk/drbd/drbd_receiver.c 2004-11-29 15:32:41 UTC (rev 1664)
@@ -237,7 +237,7 @@
#define GFP_TRY ( __GFP_HIGHMEM | __GFP_NOWARN )
-STATIC int _drbd_process_ee(drbd_dev *mdev,struct list_head *head);
+STATIC int _drbd_process_ee(drbd_dev *mdev, int be_sleepy);
/**
* drbd_get_ee: Returns an Tl_epoch_entry; might sleep. Fails only if
@@ -257,7 +257,7 @@
spin_lock_irq(&mdev->ee_lock);
}
- if(list_empty(&mdev->free_ee)) _drbd_process_ee(mdev,&mdev->done_ee);
+ if(list_empty(&mdev->free_ee)) _drbd_process_ee(mdev,1);
if(list_empty(&mdev->free_ee)) {
for (;;) {
@@ -282,7 +282,7 @@
}
// finish wait is inside, so that we are TASK_RUNNING
// in _drbd_process_ee (which might sleep by itself.)
- _drbd_process_ee(mdev,&mdev->done_ee);
+ _drbd_process_ee(mdev,1);
}
finish_wait(&mdev->ee_wait, &wait);
}
@@ -353,9 +353,10 @@
from this function. Note, this function is called from all three
threads (receiver, worker and asender). To ensure this I only allow
one thread at a time in the body of the function */
-STATIC int _drbd_process_ee(drbd_dev *mdev,struct list_head *head)
+STATIC int _drbd_process_ee(drbd_dev *mdev, int be_sleepy)
{
struct Tl_epoch_entry *e;
+ struct list_head *head = &mdev->done_ee;
struct list_head *le;
int ok=1;
int got_sig;
@@ -365,6 +366,10 @@
reclaim_net_ee(mdev);
if( test_and_set_bit(PROCESS_EE_RUNNING,&mdev->flags) ) {
+ if(!be_sleepy) {
+ clear_bit(PROCESS_EE_RUNNING,&mdev->flags);
+ return 3;
+ }
spin_unlock_irq(&mdev->ee_lock);
got_sig = wait_event_interruptible(mdev->ee_wait,
test_and_set_bit(PROCESS_EE_RUNNING,&mdev->flags) == 0);
@@ -388,11 +393,11 @@
return ok;
}
-STATIC int drbd_process_ee(drbd_dev *mdev,struct list_head *head)
+STATIC int drbd_process_ee(drbd_dev *mdev, int be_sleepy)
{
int rv;
spin_lock_irq(&mdev->ee_lock);
- rv=_drbd_process_ee(mdev,head);
+ rv=_drbd_process_ee(mdev,be_sleepy);
spin_unlock_irq(&mdev->ee_lock);
return rv;
}
@@ -638,7 +643,7 @@
for (retry=1; retry <= 10; retry++) {
// give the other side time to call
// bind() & listen()
- current->state = TASK_INTERRUPTIBLE;
+ set_current_state(TASK_INTERRUPTIBLE);
schedule_timeout(HZ / 10);
msock=drbd_try_connect(mdev);
if(msock) goto connected;
@@ -754,7 +759,7 @@
drbd_wait_ee(mdev,&mdev->active_ee);
spin_lock_irq(&mdev->ee_lock);
- rv = _drbd_process_ee(mdev,&mdev->done_ee);
+ rv = _drbd_process_ee(mdev,1);
epoch_size=mdev->epoch_size;
mdev->epoch_size=0;
@@ -851,6 +856,7 @@
*/
}
ok = drbd_send_ack(mdev,WriteAck,e);
+ __set_bit(SYNC_STARTED,&mdev->flags);
} else {
ok = drbd_send_ack(mdev,NegAck,e);
ok&= drbd_io_error(mdev);
@@ -971,7 +977,7 @@
if(mdev->conf.wire_protocol == DRBD_PROT_C) {
if(likely(drbd_bio_uptodate(&e->private_bio))) {
ok=drbd_send_ack(mdev,WriteAck,e);
- if (ok && mdev->rs_total)
+ if (ok && test_bit(SYNC_STARTED,&mdev->flags) )
drbd_set_in_sync(mdev,sector,drbd_ee_get_size(e));
} else {
ok = drbd_send_ack(mdev,NegAck,e);
@@ -1682,6 +1688,7 @@
break;
} else {
spin_unlock(&mdev->send_task_lock);
+ set_current_state(TASK_INTERRUPTIBLE);
schedule_timeout(HZ / 10);
}
}
@@ -1928,6 +1935,7 @@
if( is_syncer_blk(mdev,p->block_id)) {
drbd_set_in_sync(mdev,sector,blksize);
+ __set_bit(SYNC_STARTED,&mdev->flags);
} else {
req=(drbd_request_t*)(long)p->block_id;
@@ -1935,7 +1943,7 @@
drbd_end_req(req, RQ_DRBD_SENT, 1, sector);
- if (mdev->rs_total &&
+ if (test_bit(SYNC_STARTED,&mdev->flags) &&
mdev->conf.wire_protocol == DRBD_PROT_C)
drbd_set_in_sync(mdev,sector,blksize);
}
@@ -2071,7 +2079,7 @@
*/
set_bit(SIGNAL_ASENDER, &mdev->flags);
- if (!drbd_process_ee(mdev,&mdev->done_ee)) goto err;
+ if (!drbd_process_ee(mdev,0)) goto err;
rv = drbd_recv_short(mdev,buf,expect-received);
clear_bit(SIGNAL_ASENDER, &mdev->flags);
Modified: trunk/user/drbdadm_main.c
===================================================================
--- trunk/user/drbdadm_main.c 2004-11-29 12:45:29 UTC (rev 1663)
+++ trunk/user/drbdadm_main.c 2004-11-29 15:32:41 UTC (rev 1664)
@@ -398,7 +398,7 @@
alarm_raised=0;
switch(flags) {
case SLEEPS_SHORT: timeout = 5; break;
- case SLEEPS_LONG: timeout = 60; break;
+ case SLEEPS_LONG: timeout = 120; break;
case SLEEPS_VERY_LONG: timeout = 600; break;
default:
fprintf(stderr,"logic bug in %s:%d\n",__FILE__,__LINE__);
More information about the drbd-cvs
mailing list