<div dir="ltr"><div>Dear Phil,</div><div><br></div><div>We made a code to avoid OOS remainder problem.</div><div>If the status is L_AHEAD at conn_try_disconnect(), we have to wait until  the state changes to SyncSource and meta-data be flushed by drbd_md_sync().</div><div><br></div><div>What do think about our temporary workaround?</div><div><br></div><div>Thanks.</div><div><br></div><div><br></div><div><br></div><div>PS. source code:</div><div><br></div><div>1. drbd/drbd_int.h ========================================</div><div><br></div><div>  struct {/* sender todo per peer_device */</div><div>    bool was_ahead;</div><div>  } todo;</div><div>#ifdef _WIN32_OOS_TEST</div><div>  wait_queue_head_t resync_start_wait;</div><div>  int resync_start_flag;</div><div>#endif</div><div>};</div><div><br></div><div>struct submit_worker {</div><div><br></div><div><br></div><div><br></div><div>2. drbd/drbd_main.c ======================================</div><div><br></div><div>  peer_device->resync_finished_pdsk = D_UNKNOWN;</div><div><br></div><div>  INIT_WORK(&peer_device->send_acks_work, drbd_send_acks_wf);</div><div><br></div><div>#ifdef _WIN32_OOS_TEST</div><div>  init_waitqueue_head(&peer_device->resync_start_wait);</div><div>  peer_device->resync_start_flag = 0;</div><div>#endif</div><div>  return peer_device;</div><div>}</div><div><br></div><div><br></div><div>3. drbd/drbd_nl.c ======================================</div><div><br></div><div>static enum drbd_state_rv conn_try_disconnect(struct drbd_connection *connection, bool force)</div><div>{</div><div><span class="" style="white-space:pre"> </span>struct drbd_resource *resource = connection->resource;</div><div><span class="" style="white-space:pre"> </span>enum drbd_state_rv rv;</div><div><br></div><div>#ifdef _WIN32_OOS_TEST</div><div><span class="" style="white-space:pre"> </span>// First, Check whether reresync-action occurred or not after AHEAD </div><div><span class="" style="white-space:pre"> </span>{</div><div><span class="" style="white-space:pre"> </span>struct drbd_peer_device *peer_device = conn_peer_device(connection, 0);</div><div><br></div><div><span class="" style="white-space:pre"> </span>DbgPrint("_WIN32_OOS_TEST:conn_try_disconnect: Please check whether resync-action occurred or not.");</div><div><span class="" style="white-space:pre"> </span>if (peer_device->repl_state[NOW] == L_AHEAD) // check AHEAD status only</div><div><span class="" style="white-space:pre"> </span>{</div><div><span class="" style="white-space:pre"> </span>if (peer_device->resync_start_flag > 0)</div><div><span class="" style="white-space:pre"> </span>{</div><div><span class="" style="white-space:pre"> </span>DbgPrint("_WIN32_OOS_TEST: resync action started already. flag=%d\n", peer_device->resync_start_flag);</div><div><span class="" style="white-space:pre"> </span>}</div><div><span class="" style="white-space:pre"> </span>else</div><div><span class="" style="white-space:pre"> </span>{</div><div><span class="" style="white-space:pre"> </span>DbgPrint("_WIN32_OOS_TEST: Wait for resync start...\n");// It will update status and meta data on each node, maybe.</div><div><br></div><div><span class="" style="white-space:pre"> </span>long t = 0; // if 5 seconds exceeding, drbdadm disconnect CLI will be timeout.</div><div><span class="" style="white-space:pre"> </span>wait_event_timeout(t, peer_device->resync_start_wait, (peer_device->resync_start_flag > 0), 3000);</div><div><br></div><div><span class="" style="white-space:pre"> </span>if (t == 0)</div><div><span class="" style="white-space:pre"> </span>{</div><div><span class="" style="white-space:pre"> </span>DbgPrint("_WIN32_OOS_TEST: timeout! t=%d. No resync_start_wait event. So, You can see AHEAD pending problem\n", t);</div><div><span class="" style="white-space:pre"> </span>}</div><div><span class="" style="white-space:pre"> </span>else</div><div><span class="" style="white-space:pre"> </span>{</div><div><span class="" style="white-space:pre"> </span>DbgPrint("_WIN32_OOS_TEST: OK! got it! time=%d\n", t);</div><div><span class="" style="white-space:pre"> </span>}</div><div><span class="" style="white-space:pre"> </span>}</div><div><span class="" style="white-space:pre"> </span>peer_device->resync_start_flag = 0; // reset</div><div><span class="" style="white-space:pre"> </span>}</div><div><span class="" style="white-space:pre"> </span>else</div><div><span class="" style="white-space:pre"> </span>{</div><div><span class="" style="white-space:pre"> </span>DbgPrint("_WIN32_OOS_TEST: repl mode=(%s).", drbd_repl_str(peer_device->repl_state[NOW]));</div><div><span class="" style="white-space:pre"> </span>}</div><div><span class="" style="white-space:pre"> </span>}</div><div>#endif</div><div><br></div><div>  repeat:</div><div><br></div><div><br></div><div><br></div><div>4. drbd/drbd_sender.c ======================================</div><div><br></div><div>Part 1)---------------------------------------------------------------------------</div><div>      drbd_khelper(NULL, connection, "unfence-peer");</div><div>  }</div><div><br></div><div>#ifdef _WIN32_OOS_TEST</div><div>  if (peer_device->resync_start_flag > 0)</div><div>  {</div><div>    DbgPrint("_WIN32_OOS_TEST:drbd_resync_finished: resync_start_flag=%d. reset!", peer_device->resync_start_flag);</div><div>    peer_device->resync_start_flag = 0;</div><div>  }</div><div>#endif</div><div><br></div><div>  return 1;</div><div>}</div><div><br></div><div>Part 2)-----------------------------------------------------------------------------</div><div><br></div><div><div>  spin_lock_irq(&device->resource->req_lock);</div><div>  repl_state = peer_device->repl_state[NOW];</div><div>  spin_unlock_irq(&device->resource->req_lock);</div><div><br></div><div>#ifdef _WIN32_OOS_TEST</div><div>  int first_state = repl_state;</div><div>  DbgPrint("_WIN32_OOS_TEST:(%s) drbd_start_resync: repl(%s) side(%s)", current->comm, drbd_repl_str(repl_state), drbd_repl_str(side));</div><div>#endif</div><div><br></div><div>  if (repl_state < L_ESTABLISHED) {</div><div>    /* Connection closed meanwhile. */</div><div>    return;</div><div><br></div></div><div><br></div><div>Part 3)----------------------------------------------------------------------------</div><div><div><br></div><div>      mod_timer(&peer_device->resync_timer, jiffies);</div><div><br></div><div>    drbd_md_sync(device);</div><div><br></div><div>#ifdef _WIN32_OOS_TEST</div><div>    if (first_state == L_AHEAD)</div><div>    {</div><div>      peer_device->resync_start_flag++;</div><div>      DbgPrint("_WIN32_OOS_TEST: Resync start at AHEAD status. flag=%d. drbdadm disconnect CLI maybe wake up if exists", peer_device->resync_start_flag);</div><div>      wake_up(&peer_device->resync_start_wait);</div><div>    }</div><div>    else</div><div>    {</div><div>      DbgPrint("_WIN32_OOS_TEST: flag=%d\n", peer_device->resync_start_flag);</div><div>    }</div><div>#endif</div><div><br></div><div>  }</div><div>  put_ldev(device);</div><div>  out:</div><div><br></div></div><div><br></div><div>------------------------------------------------------------------------------</div><div><br></div><div><br></div></div>