Note: "permalinks" may not be as permanent as we would like,
direct links of old sources may well be a few messages off.
/ 2004-08-02 17:47:10 +0200
\ Horvath Szabolcs:
> Hello!
>
> Thanks for the quick reply.
>
> Just now I can't log into the cluster, but I remember when
> /etc/init.d/drbd start failed,
> in /proc/drbd I saw lines with "unconfigured"
> (no primary/secondary/unknown words), and
> ps aux | grep drbd didn't show drbd threads.
>
> > from reading the code there again,
> > we return BUSY for SET_DISK when
> > - we already have a disk
> > - we are currently diskless Primary
> > - we are currently diskless, but already connected
> > - we have an open_cnt > 1 on the device
>
> How can I check this?
>
> Insert a line with "fuser -mv /dev/nb0" before this? ->
> [...]
> if $DRBDADM adjust $I; then
wait: maybe drbdadm itself is the problem here...
I just see that we do not properly drain, fclose, and waitpid for the
drbdsetup show in certain cases... yes, that probably is it.
patch to drbdadm attached.
Lars Ellenberg
--
please use the "List-Reply" function of your email client.
-------------- next part --------------
Index: user/drbdadm_adjust.c
===================================================================
--- user/drbdadm_adjust.c (revision 1480)
+++ user/drbdadm_adjust.c (working copy)
@@ -202,6 +202,13 @@
}
+/* NOTE
+ * return before waitpit is a BUG. "goto out;" instead!
+ *
+ * calling drbdsetup again before waitpid("drbdsetup show") has a race with
+ * the next ioctl failing because of the zombie still holding an open_cnt on
+ * the drbd device. so don't do that.
+ */
int adm_adjust(struct d_resource* res,char* unused)
{
char* argv[20];
@@ -218,6 +225,7 @@
struct stat sb;
int major, minor;
+ int err = 10;
argv[argc++]=drbdsetup;
argv[argc++]=res->me->device;
@@ -236,11 +244,11 @@
if (stat(res->me->disk, &sb)) {
PERROR("stat '%s' failed:", res->me->device);
- return 10;
+ goto out;
}
if (!S_ISBLK(sb.st_mode)) {
fprintf(stderr, "'%s' not a block device!\n", res->me->disk);
- return 10;
+ goto out;
}
rv=m_fscanf(in,"Lower device: %d:%d (%*[^)])\n",&major,&minor);
if( (rv!=2) || (((major<<8)|minor) != (int)sb.st_rdev)) do_attach=1;
@@ -248,11 +256,11 @@
if (strcmp("internal", res->me->meta_disk)) {
if (stat(res->me->meta_disk, &sb)) {
PERROR("stat '%s' failed:", res->me->meta_disk);
- return 10;
+ goto out;
}
if (!S_ISBLK(sb.st_mode)) {
fprintf(stderr, "'%s' not a block device!\n", res->me->disk);
- return 10;
+ goto out;
}
} else {
sb.st_rdev = 0;
@@ -265,7 +273,7 @@
do_attach = 1;
} else {
fprintf(stderr, "parse error, '%s' read, 'internal' expected\n", str1);
- return 10;
+ goto out;
}
}
if (rv == 2) {
@@ -278,7 +286,7 @@
do_attach = 1;
} else {
fprintf(stderr, "parse error\n");
- return 10;
+ goto out;
}
}
@@ -345,10 +353,15 @@
do_syncer |= complete(res->sync_options);
} else do_syncer=1;
+ do_up:
+ err = 0;
+ out:
+ // drain, close, wait for drbdsetup to "officially die".
+ { static char drain[1024]; while (fgets(drain,1024,in)); }
fclose(in);
waitpid(pid,0,0);
+ if (err) return err;
- do_up:
if(do_attach) {
if( (rv=adm_attach(res,0)) ) return rv;
do_resize=0;