[Drbd-dev] [CASE-41] After re-connected, despite of OOS remaining primary does not start re-synchronization or continues AHEAD mode.
Jaeheon Kim
jhkim at mantech.co.kr
Mon Apr 18 16:05:38 CEST 2016
Dear Phil,
We made a code to avoid OOS remainder problem.
If the status is L_AHEAD at conn_try_disconnect(), we have to wait until
the state changes to SyncSource and meta-data be flushed by drbd_md_sync().
What do think about our temporary workaround?
Thanks.
PS. source code:
1. drbd/drbd_int.h ========================================
struct {/* sender todo per peer_device */
bool was_ahead;
} todo;
#ifdef _WIN32_OOS_TEST
wait_queue_head_t resync_start_wait;
int resync_start_flag;
#endif
};
struct submit_worker {
2. drbd/drbd_main.c ======================================
peer_device->resync_finished_pdsk = D_UNKNOWN;
INIT_WORK(&peer_device->send_acks_work, drbd_send_acks_wf);
#ifdef _WIN32_OOS_TEST
init_waitqueue_head(&peer_device->resync_start_wait);
peer_device->resync_start_flag = 0;
#endif
return peer_device;
}
3. drbd/drbd_nl.c ======================================
static enum drbd_state_rv conn_try_disconnect(struct drbd_connection
*connection, bool force)
{
struct drbd_resource *resource = connection->resource;
enum drbd_state_rv rv;
#ifdef _WIN32_OOS_TEST
// First, Check whether reresync-action occurred or not after AHEAD
{
struct drbd_peer_device *peer_device = conn_peer_device(connection, 0);
DbgPrint("_WIN32_OOS_TEST:conn_try_disconnect: Please check whether
resync-action occurred or not.");
if (peer_device->repl_state[NOW] == L_AHEAD) // check AHEAD status only
{
if (peer_device->resync_start_flag > 0)
{
DbgPrint("_WIN32_OOS_TEST: resync action started already. flag=%d\n",
peer_device->resync_start_flag);
}
else
{
DbgPrint("_WIN32_OOS_TEST: Wait for resync start...\n");// It will update
status and meta data on each node, maybe.
long t = 0; // if 5 seconds exceeding, drbdadm disconnect CLI will be
timeout.
wait_event_timeout(t, peer_device->resync_start_wait,
(peer_device->resync_start_flag > 0), 3000);
if (t == 0)
{
DbgPrint("_WIN32_OOS_TEST: timeout! t=%d. No resync_start_wait event. So,
You can see AHEAD pending problem\n", t);
}
else
{
DbgPrint("_WIN32_OOS_TEST: OK! got it! time=%d\n", t);
}
}
peer_device->resync_start_flag = 0; // reset
}
else
{
DbgPrint("_WIN32_OOS_TEST: repl mode=(%s).",
drbd_repl_str(peer_device->repl_state[NOW]));
}
}
#endif
repeat:
4. drbd/drbd_sender.c ======================================
Part
1)---------------------------------------------------------------------------
drbd_khelper(NULL, connection, "unfence-peer");
}
#ifdef _WIN32_OOS_TEST
if (peer_device->resync_start_flag > 0)
{
DbgPrint("_WIN32_OOS_TEST:drbd_resync_finished:
resync_start_flag=%d. reset!", peer_device->resync_start_flag);
peer_device->resync_start_flag = 0;
}
#endif
return 1;
}
Part
2)-----------------------------------------------------------------------------
spin_lock_irq(&device->resource->req_lock);
repl_state = peer_device->repl_state[NOW];
spin_unlock_irq(&device->resource->req_lock);
#ifdef _WIN32_OOS_TEST
int first_state = repl_state;
DbgPrint("_WIN32_OOS_TEST:(%s) drbd_start_resync: repl(%s) side(%s)",
current->comm, drbd_repl_str(repl_state), drbd_repl_str(side));
#endif
if (repl_state < L_ESTABLISHED) {
/* Connection closed meanwhile. */
return;
Part
3)----------------------------------------------------------------------------
mod_timer(&peer_device->resync_timer, jiffies);
drbd_md_sync(device);
#ifdef _WIN32_OOS_TEST
if (first_state == L_AHEAD)
{
peer_device->resync_start_flag++;
DbgPrint("_WIN32_OOS_TEST: Resync start at AHEAD status.
flag=%d. drbdadm disconnect CLI maybe wake up if exists",
peer_device->resync_start_flag);
wake_up(&peer_device->resync_start_wait);
}
else
{
DbgPrint("_WIN32_OOS_TEST: flag=%d\n",
peer_device->resync_start_flag);
}
#endif
}
put_ldev(device);
out:
------------------------------------------------------------------------------
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.linbit.com/pipermail/drbd-dev/attachments/20160418/0b7ff24a/attachment.htm>
More information about the drbd-dev
mailing list