Note: "permalinks" may not be as permanent as we would like,
direct links of old sources may well be a few messages off.
/ 2004-08-02 17:47:10 +0200 \ Horvath Szabolcs: > Hello! > > Thanks for the quick reply. > > Just now I can't log into the cluster, but I remember when > /etc/init.d/drbd start failed, > in /proc/drbd I saw lines with "unconfigured" > (no primary/secondary/unknown words), and > ps aux | grep drbd didn't show drbd threads. > > > from reading the code there again, > > we return BUSY for SET_DISK when > > - we already have a disk > > - we are currently diskless Primary > > - we are currently diskless, but already connected > > - we have an open_cnt > 1 on the device > > How can I check this? > > Insert a line with "fuser -mv /dev/nb0" before this? -> > [...] > if $DRBDADM adjust $I; then wait: maybe drbdadm itself is the problem here... I just see that we do not properly drain, fclose, and waitpid for the drbdsetup show in certain cases... yes, that probably is it. patch to drbdadm attached. Lars Ellenberg -- please use the "List-Reply" function of your email client. -------------- next part -------------- Index: user/drbdadm_adjust.c =================================================================== --- user/drbdadm_adjust.c (revision 1480) +++ user/drbdadm_adjust.c (working copy) @@ -202,6 +202,13 @@ } +/* NOTE + * return before waitpit is a BUG. "goto out;" instead! + * + * calling drbdsetup again before waitpid("drbdsetup show") has a race with + * the next ioctl failing because of the zombie still holding an open_cnt on + * the drbd device. so don't do that. + */ int adm_adjust(struct d_resource* res,char* unused) { char* argv[20]; @@ -218,6 +225,7 @@ struct stat sb; int major, minor; + int err = 10; argv[argc++]=drbdsetup; argv[argc++]=res->me->device; @@ -236,11 +244,11 @@ if (stat(res->me->disk, &sb)) { PERROR("stat '%s' failed:", res->me->device); - return 10; + goto out; } if (!S_ISBLK(sb.st_mode)) { fprintf(stderr, "'%s' not a block device!\n", res->me->disk); - return 10; + goto out; } rv=m_fscanf(in,"Lower device: %d:%d (%*[^)])\n",&major,&minor); if( (rv!=2) || (((major<<8)|minor) != (int)sb.st_rdev)) do_attach=1; @@ -248,11 +256,11 @@ if (strcmp("internal", res->me->meta_disk)) { if (stat(res->me->meta_disk, &sb)) { PERROR("stat '%s' failed:", res->me->meta_disk); - return 10; + goto out; } if (!S_ISBLK(sb.st_mode)) { fprintf(stderr, "'%s' not a block device!\n", res->me->disk); - return 10; + goto out; } } else { sb.st_rdev = 0; @@ -265,7 +273,7 @@ do_attach = 1; } else { fprintf(stderr, "parse error, '%s' read, 'internal' expected\n", str1); - return 10; + goto out; } } if (rv == 2) { @@ -278,7 +286,7 @@ do_attach = 1; } else { fprintf(stderr, "parse error\n"); - return 10; + goto out; } } @@ -345,10 +353,15 @@ do_syncer |= complete(res->sync_options); } else do_syncer=1; + do_up: + err = 0; + out: + // drain, close, wait for drbdsetup to "officially die". + { static char drain[1024]; while (fgets(drain,1024,in)); } fclose(in); waitpid(pid,0,0); + if (err) return err; - do_up: if(do_attach) { if( (rv=adm_attach(res,0)) ) return rv; do_resize=0;