No subject
Thu Dec 3 07:13:49 CET 2020
s to state Consistent from Up2Date after which (a couple of milliseconds la=
ter) the tiebreaker reports an error resulting in the following drbdadm sta=
tus:
node1
postgres-zabbix-data7790 role:Secondary suspended:quorum
disk:UpToDate quorum:no blocked:upper
node2 connection:Connecting
tiebreaker connection:Connecting
tiebreaker
postgres-zabbix-data7790 role:Secondary suspended:quorum
disk:Diskless quorum:no blocked:upper
node1 role:StandAlone
node2 connection:Connecting
resource postgres-zabbix-data7790 {
options {
quorum majority;
}
protocol C;
startup {
wfc-timeout 10;
degr-wfc-timeout 5;
}
net {
max-epoch-size 2048;
max-buffers 2048;
sndbuf-size 0;
rcvbuf-size 0;
}
disk {
on-io-error detach;
c-max-rate 900M;
c-min-rate 100M;
c-fill-target 1M;
resync-rate 300M;
}
on node1 {
device /dev/drbd7790;
disk /dev/mapper/lvmdata-postgres--zabbix--data7790;
node-id 1;
meta-disk internal;
address 10.21.24.11:7790;
}
on node2 {
device /dev/drbd7790;
disk /dev/mapper/lvmdata-postgres--zabbix--data7790;
meta-disk internal;
node-id 2;
address 10.21.24.12:7790;
}
on tiebreaker {
device /dev/drbd7790;
disk none;
meta-disk internal;
node-id 3;
address 10.21.24.13:7790;
}
connection-mesh {
hosts node1 node2 tiebreaker;
}
}
Tiebreaker logs:
2021-02-23T23:32:24.299214+00:00;ERR;tiebreaker;P-/;[kernel/]; drbd postgre=
s-zabbix-data7790 node2: PingAck did not arrive in time.;
2021-02-23T23:32:24.299244+00:00;INFO;tiebreaker;P-/;[kernel/]; drbd postgr=
es-zabbix-data7790/0 drbd7790: Would lose quorum, but using tiebreaker logi=
c to keep;
2021-02-23T23:32:24.299266+00:00;INFO;tiebreaker;P-/;[kernel/]; drbd postgr=
es-zabbix-data7790 node2: conn( Connected -> NetworkFailure ) peer( Primary=
-> Unknown );
2021-02-23T23:32:24.299305+00:00;INFO;tiebreaker;P-/;[kernel/]; drbd postgr=
es-zabbix-data7790/0 drbd7790 node2: pdsk( UpToDate -> DUnknown ) repl( Est=
ablished -> Off );
2021-02-23T23:32:24.299326+00:00;INFO;tiebreaker;P-/;[kernel/]; drbd postgr=
es-zabbix-data7790 node2: ack_receiver terminated;
2021-02-23T23:32:24.299349+00:00;INFO;tiebreaker;P-/;[kernel/]; drbd postgr=
es-zabbix-data7790 node2: Terminating ack_recv thread;
2021-02-23T23:32:24.299372+00:00;INFO;tiebreaker;P-/;[kernel/]; drbd postgr=
es-zabbix-data7790/0 drbd7790: Would lose quorum, but using tiebreaker logi=
c to keep;
2021-02-23T23:32:24.299394+00:00;INFO;tiebreaker;P-/;[kernel/]; drbd postgr=
es-zabbix-data7790/0 drbd7790: Would lose quorum, but using tiebreaker logi=
c to keep;
2021-02-23T23:32:24.318231+00:00;INFO;tiebreaker;P-/;[kernel/]; drbd postgr=
es-zabbix-data7790 node2: Restarting sender thread;
2021-02-23T23:32:24.318268+00:00;INFO;tiebreaker;P-/;[kernel/]; drbd postgr=
es-zabbix-data7790 node2: Connection closed;
2021-02-23T23:32:24.318290+00:00;INFO;tiebreaker;P-/;[kernel/]; drbd postgr=
es-zabbix-data7790/0 drbd7790: Would lose quorum, but using tiebreaker logi=
c to keep;
2021-02-23T23:32:24.318312+00:00;INFO;tiebreaker;P-/;[kernel/]; drbd postgr=
es-zabbix-data7790 node2: conn( NetworkFailure -> Unconnected );
2021-02-23T23:32:24.318337+00:00;INFO;tiebreaker;P-/;[kernel/]; drbd postgr=
es-zabbix-data7790 node2: Restarting receiver thread;
2021-02-23T23:32:24.318370+00:00;INFO;tiebreaker;P-/;[kernel/]; drbd postgr=
es-zabbix-data7790/0 drbd7790: Would lose quorum, but using tiebreaker logi=
c to keep;
2021-02-23T23:32:24.318404+00:00;INFO;tiebreaker;P-/;[kernel/]; drbd postgr=
es-zabbix-data7790 node2: conn( Unconnected -> Connecting );
2021-02-23T23:32:24.803484+00:00;ERR;tiebreaker;P-/;[kernel/]; drbd postgre=
s-zabbix-data7790/0 drbd7790: Got NegDReply; Sector 0s, len 131072.;
2021-02-23T23:32:24.803557+00:00;INFO;tiebreaker;P-/;[kernel/]; drbd postgr=
es-zabbix-data7790/0 drbd7790: Would lose quorum, but using tiebreaker logi=
c to keep;
2021-02-23T23:32:24.803587+00:00;ERR;tiebreaker;P-/;[kernel/]; drbd postgre=
s-zabbix-data7790: State change failed: Need access to UpToDate data;
2021-02-23T23:32:24.803628+00:00;INFO;tiebreaker;P-/;[kernel/]; drbd postgr=
es-zabbix-data7790/0 drbd7790 node1: Failed: pdsk( UpToDate -> Consistent )=
;
2021-02-23T23:32:24.803652+00:00;ERR;tiebreaker;P-/;[kernel/]; drbd postgre=
s-zabbix-data7790/0 drbd7790: drbd_req_destroy: Logic BUG rq_state: (0:3000=
00, 2:104), completion_ref =3D 0;
2021-02-23T23:32:24.805509+00:00;INFO;tiebreaker;P-/;[kernel/]; drbd postgr=
es-zabbix-data7790: susp-io( no -> quorum);
2021-02-23T23:32:24.805540+00:00;INFO;tiebreaker;P-/;[kernel/]; drbd postgr=
es-zabbix-data7790 node1: conn( Connected -> Disconnecting ) peer( Secondar=
y -> Unknown );
2021-02-23T23:32:24.805570+00:00;INFO;tiebreaker;P-/;[kernel/]; drbd postgr=
es-zabbix-data7790/0 drbd7790: quorum( yes -> no );
2021-02-23T23:32:24.805599+00:00;INFO;tiebreaker;P-/;[kernel/]; drbd postgr=
es-zabbix-data7790/0 drbd7790 node1: pdsk( UpToDate -> DUnknown ) repl( Est=
ablished -> Off );
2021-02-23T23:32:24.805627+00:00;ERR;tiebreaker;P-/;[kernel/]; drbd postgre=
s-zabbix-data7790 node1: error receiving P_STATE, e: -5 l: 0!;
2021-02-23T23:32:24.807322+00:00;INFO;tiebreaker;P-/;[kernel/]; drbd postgr=
es-zabbix-data7790 node1: ack_receiver terminated;
2021-02-23T23:32:24.807435+00:00;INFO;tiebreaker;P-/;[kernel/]; drbd postgr=
es-zabbix-data7790 node1: Terminating ack_recv thread;
2021-02-23T23:32:24.818354+00:00;INFO;tiebreaker;P-/;[kernel/]; drbd postgr=
es-zabbix-data7790 node1: Aborting remote state change 0 commit not possibl=
e;
2021-02-23T23:32:24.818465+00:00;INFO;tiebreaker;P-/;[kernel/]; drbd postgr=
es-zabbix-data7790 node1: Restarting sender thread;
2021-02-23T23:32:24.818542+00:00;INFO;tiebreaker;P-/;[kernel/]; drbd postgr=
es-zabbix-data7790 node1: Connection closed;
2021-02-23T23:32:24.818617+00:00;INFO;tiebreaker;P-/;[kernel/]; drbd postgr=
es-zabbix-data7790 node1: conn( Disconnecting -> StandAlone );
2021-02-23T23:32:24.818690+00:00;INFO;tiebreaker;P-/;[kernel/]; drbd postgr=
es-zabbix-data7790 node1: Terminating receiver thread;
Node1 logs:
2021-02-23T23:32:24.800547+00:00;ERR;node1;P-/;[kernel/]; drbd postgres-zab=
bix-data7790 node2: PingAck did not arrive in time.;
2021-02-23T23:32:24.800610+00:00;INFO;node1;P-/;[kernel/]; drbd postgres-za=
bbix-data7790/0 drbd7790: Would lose quorum, but using tiebreaker logic to =
keep;
2021-02-23T23:32:24.800645+00:00;INFO;node1;P-/;[kernel/]; drbd postgres-za=
bbix-data7790 node2: conn( Connected -> NetworkFailure ) peer( Primary -> U=
nknown );
2021-02-23T23:32:24.800676+00:00;INFO;node1;P-/;[kernel/]; drbd postgres-za=
bbix-data7790/0 drbd7790: disk( UpToDate -> Consistent );
2021-02-23T23:32:24.800706+00:00;INFO;node1;P-/;[kernel/]; drbd postgres-za=
bbix-data7790/0 drbd7790 node2: pdsk( UpToDate -> DUnknown ) repl( Establis=
hed -> Off );
2021-02-23T23:32:24.800742+00:00;INFO;node1;P-/;[kernel/]; drbd postgres-za=
bbix-data7790 node2: ack_receiver terminated;
2021-02-23T23:32:24.800771+00:00;INFO;node1;P-/;[kernel/]; drbd postgres-za=
bbix-data7790 node2: Terminating ack_recv thread;
2021-02-23T23:32:24.800800+00:00;INFO;node1;P-/;[kernel/]; drbd postgres-za=
bbix-data7790/0 drbd7790: Would lose quorum, but using tiebreaker logic to =
keep;
2021-02-23T23:32:24.800829+00:00;INFO;node1;P-/;[kernel/]; drbd postgres-za=
bbix-data7790/0 drbd7790: Would lose quorum, but using tiebreaker logic to =
keep;
2021-02-23T23:32:24.800856+00:00;INFO;node1;P-/;[kernel/]; drbd postgres-za=
bbix-data7790/0 drbd7790 tiebreaker: receive_peer_dagatg(): source-set-bitm=
ap by rule 30;
2021-02-23T23:32:24.800886+00:00;ERR;node1;P-/;[kernel/]; drbd postgres-zab=
bix-data7790/0 drbd7790: Can not satisfy peer's read request, no local data=
.;
2021-02-23T23:32:24.802664+00:00;INFO;node1;P-/;[kernel/]; drbd postgres-za=
bbix-data7790/0 drbd7790: Would lose quorum, but using tiebreaker logic to =
keep;
2021-02-23T23:32:24.811534+00:00;INFO;node1;P-/;[kernel/]; drbd postgres-za=
bbix-data7790 tiebreaker: sock was shut down by peer;
2021-02-23T23:32:24.811594+00:00;INFO;node1;P-/;[kernel/]; drbd postgres-za=
bbix-data7790: susp-io( no -> quorum);
2021-02-23T23:32:24.811626+00:00;INFO;node1;P-/;[kernel/]; drbd postgres-za=
bbix-data7790 tiebreaker: conn( Connected -> BrokenPipe ) peer( Secondary -=
> Unknown );
2021-02-23T23:32:24.811662+00:00;INFO;node1;P-/;[kernel/]; drbd postgres-za=
bbix-data7790/0 drbd7790: quorum( yes -> no );
2021-02-23T23:32:24.811692+00:00;INFO;node1;P-/;[kernel/]; drbd postgres-za=
bbix-data7790/0 drbd7790 tiebreaker: pdsk( Diskless -> DUnknown ) repl( Est=
ablished -> Off );
2021-02-23T23:32:24.811721+00:00;INFO;node1;P-/;[kernel/]; drbd postgres-za=
bbix-data7790 tiebreaker: ack_receiver terminated;
2021-02-23T23:32:24.811750+00:00;INFO;node1;P-/;[kernel/]; drbd postgres-za=
bbix-data7790 tiebreaker: Terminating ack_recv thread;
2021-02-23T23:32:24.811780+00:00;INFO;node1;P-/;[kernel/]; drbd postgres-za=
bbix-data7790: Preparing cluster-wide state change 3151158077 (1->-1 0/0);
2021-02-23T23:32:24.811814+00:00;INFO;node1;P-/;[kernel/]; drbd postgres-za=
bbix-data7790: Aborting cluster-wide state change 3151158077 (9ms) rv =3D -=
19;
2021-02-23T23:32:24.816641+00:00;INFO;node1;P-/;[kernel/]; drbd postgres-za=
bbix-data7790 node2: Aborting remote state change 0 commit not possible;
2021-02-23T23:32:24.816701+00:00;INFO;node1;P-/;[kernel/]; drbd postgres-za=
bbix-data7790 node2: Restarting sender thread;
2021-02-23T23:32:24.816925+00:00;INFO;node1;P-/;[kernel/]; drbd postgres-za=
bbix-data7790 node2: Connection closed;
2021-02-23T23:32:24.816962+00:00;INFO;node1;P-/;[kernel/]; drbd postgres-za=
bbix-data7790 node2: conn( NetworkFailure -> Unconnected );
2021-02-23T23:32:24.816989+00:00;INFO;node1;P-/;[kernel/]; drbd postgres-za=
bbix-data7790 node2: Restarting receiver thread;
2021-02-23T23:32:24.817018+00:00;INFO;node1;P-/;[kernel/]; drbd postgres-za=
bbix-data7790 node2: conn( Unconnected -> Connecting );
2021-02-23T23:32:24.826555+00:00;INFO;node1;P-/;[kernel/]; drbd postgres-za=
bbix-data7790 tiebreaker: Aborting remote state change 0 commit not possibl=
e;
2021-02-23T23:32:24.826622+00:00;INFO;node1;P-/;[kernel/]; drbd postgres-za=
bbix-data7790 tiebreaker: Restarting sender thread;
2021-02-23T23:32:24.826651+00:00;INFO;node1;P-/;[kernel/]; drbd postgres-za=
bbix-data7790 tiebreaker: Connection closed;
2021-02-23T23:32:24.826681+00:00;INFO;node1;P-/;[kernel/]; drbd postgres-za=
bbix-data7790 tiebreaker: conn( BrokenPipe -> Unconnected );
2021-02-23T23:32:24.826709+00:00;INFO;node1;P-/;[kernel/]; drbd postgres-za=
bbix-data7790 tiebreaker: Restarting receiver thread;
2021-02-23T23:32:24.826745+00:00;INFO;node1;P-/;[kernel/]; drbd postgres-za=
bbix-data7790 tiebreaker: conn( Unconnected -> Connecting );
2021-02-23T23:32:24.912545+00:00;INFO;node1;P-/;[kernel/]; drbd postgres-za=
bbix-data7790: Preparing cluster-wide state change 1764698033 (1->-1 0/0);
2021-02-23T23:32:24.912605+00:00;INFO;node1;P-/;[kernel/]; drbd postgres-za=
bbix-data7790: Committing cluster-wide state change 1764698033 (0ms);
2021-02-23T23:32:24.912637+00:00;INFO;node1;P-/;[kernel/]; drbd postgres-za=
bbix-data7790/0 drbd7790: disk( Consistent -> UpToDate );
2021-02-23T23:32:34.322531+00:00;INFO;node1;P-/;[kernel/]; drbd postgres-za=
bbix-data7790 tiebreaker: sock was shut down by peer;
2021-02-23T23:32:34.322577+00:00;INFO;node1;P-/;[kernel/]; drbd postgres-za=
bbix-data7790 tiebreaker: conn( Connecting -> BrokenPipe );
2021-02-23T23:32:34.322599+00:00;WARNING;node1;P-/;[kernel/]; drbd postgres=
-zabbix-data7790 tiebreaker: short read (expected size 8);
2021-02-23T23:32:34.345524+00:00;INFO;node1;P-/;[kernel/]; drbd postgres-za=
bbix-data7790 tiebreaker: Aborting remote state change 0 commit not possibl=
e;
2021-02-23T23:32:34.345582+00:00;INFO;node1;P-/;[kernel/]; drbd postgres-za=
bbix-data7790 tiebreaker: Restarting sender thread;
2021-02-23T23:32:34.347540+00:00;INFO;node1;P-/;[kernel/]; drbd postgres-za=
bbix-data7790 tiebreaker: Connection closed;
2021-02-23T23:32:34.347607+00:00;INFO;node1;P-/;[kernel/]; drbd postgres-za=
bbix-data7790 tiebreaker: conn( BrokenPipe -> Unconnected );
2021-02-23T23:32:35.347753+00:00;INFO;node1;P-/;[kernel/]; drbd postgres-za=
bbix-data7790 tiebreaker: conn( Unconnected -> Connecting );
Do you have any advice?
Best Regards,
Mihai
--_000_8baf91957cf143bc9b9e4475f8d85052frequentiscom_
Content-Type: text/html; charset="us-ascii"
Content-Transfer-Encoding: quoted-printable
<html xmlns:v=3D"urn:schemas-microsoft-com:vml" xmlns:o=3D"urn:schemas-micr=
osoft-com:office:office" xmlns:w=3D"urn:schemas-microsoft-com:office:word" =
xmlns:m=3D"http://schemas.microsoft.com/office/2004/12/omml" xmlns=3D"http:=
//www.w3.org/TR/REC-html40">
<head>
<meta http-equiv=3D"Content-Type" content=3D"text/html; charset=3Dus-ascii"=
>
<meta name=3D"Generator" content=3D"Microsoft Word 15 (filtered medium)">
<style><!--
/* Font Definitions */
@font-face
{font-family:"Cambria Math";
panose-1:2 4 5 3 5 4 6 3 2 4;}
@font-face
{font-family:Calibri;
panose-1:2 15 5 2 2 2 4 3 2 4;}
/* Style Definitions */
p.MsoNormal, li.MsoNormal, div.MsoNormal
{margin:0in;
font-size:11.0pt;
font-family:"Calibri",sans-serif;
mso-fareast-language:EN-US;}
span.EmailStyle20
{mso-style-type:personal-compose;
font-family:"Calibri",sans-serif;
color:windowtext;}
.MsoChpDefault
{mso-style-type:export-only;
font-size:10.0pt;}
@page WordSection1
{size:8.5in 11.0in;
margin:70.85pt 70.85pt 70.85pt 70.85pt;}
div.WordSection1
{page:WordSection1;}
--></style><!--[if gte mso 9]><xml>
<o:shapedefaults v:ext=3D"edit" spidmax=3D"1026" />
</xml><![endif]--><!--[if gte mso 9]><xml>
<o:shapelayout v:ext=3D"edit">
<o:idmap v:ext=3D"edit" data=3D"1" />
</o:shapelayout></xml><![endif]-->
</head>
<body lang=3D"RO" link=3D"#0563C1" vlink=3D"#954F72" style=3D"word-wrap:bre=
ak-word">
<div class=3D"WordSection1">
<p class=3D"MsoNormal">Hi, <o:p></o:p></p>
<p class=3D"MsoNormal"><o:p> </o:p></p>
<p class=3D"MsoNormal">We are using DRBD with a RHEL cluster in two nodes &=
#43; diskless tiebreaker setup: Corosync, Pacemaker and DRBD(node1, node2, =
tiebreaker-diskless), our versions are kmod-drbd-9.0.27 and drbd-utils-9.12=
.1<o:p></o:p></p>
<p class=3D"MsoNormal">We are testing failover scenarios on a physical envi=
ronment with RedHat 7.6 by executing hard shutdowns on node1 and node 2 (we=
always start from a stable state with all nodes connected and up2date).
<o:p></o:p></p>
<p class=3D"MsoNormal">From time to time when node2 is shut down (forced) t=
he volume on node1 moves to state Consistent from Up2Date after which (a co=
uple of milliseconds later) the tiebreaker reports an error resulting in th=
e following drbdadm status:<o:p></o:p></p>
<p class=3D"MsoNormal"><o:p> </o:p></p>
<p class=3D"MsoNormal">node1<o:p></o:p></p>
<p class=3D"MsoNormal">postgres-zabbix-data7790 role:Secondary suspended:qu=
orum<o:p></o:p></p>
<p class=3D"MsoNormal">disk:UpToDate quorum:no blocked:upper<o:p></o:p></p>
<p class=3D"MsoNormal">node2 connection:Connecting<o:p></o:p></p>
<p class=3D"MsoNormal">tiebreaker connection:Connecting<o:p></o:p></p>
<p class=3D"MsoNormal"><o:p> </o:p></p>
<p class=3D"MsoNormal">tiebreaker <o:p></o:p></p>
<p class=3D"MsoNormal"> postgres-zabbix-data7790 role:Secondary suspen=
ded:quorum<o:p></o:p></p>
<p class=3D"MsoNormal"> disk:Diskless quorum:no blocked:upper<o:p></o=
:p></p>
<p class=3D"MsoNormal"> node1 role:StandAlone<o:p></o:p></p>
<p class=3D"MsoNormal"> node2 connection:Connecting<o:p></o:p></p>
<p class=3D"MsoNormal"><o:p> </o:p></p>
<p class=3D"MsoNormal"><o:p> </o:p></p>
<p class=3D"MsoNormal">resource postgres-zabbix-data7790 {<o:p></o:p></p>
<p class=3D"MsoNormal"><o:p> </o:p></p>
<p class=3D"MsoNormal"> options {<o:p></o:p></p>
<p class=3D"MsoNormal"> quorum majority;<o:p>=
</o:p></p>
<p class=3D"MsoNormal"> }<o:p></o:p></p>
<p class=3D"MsoNormal"> protocol C;<o:p></o:p></p>
<p class=3D"MsoNormal"><o:p> </o:p></p>
<p class=3D"MsoNormal"> startup {<o:p></o:p></p>
<p class=3D"MsoNormal"> wfc-timeout 10;<o:p><=
/o:p></p>
<p class=3D"MsoNormal"> degr-wfc-timeout 5;<o=
:p></o:p></p>
<p class=3D"MsoNormal"> }<o:p></o:p></p>
<p class=3D"MsoNormal"> net {<o:p></o:p></p>
<p class=3D"MsoNormal"> max-epoch-size 2048;<=
o:p></o:p></p>
<p class=3D"MsoNormal"> max-buffers 2048;<o:p=
></o:p></p>
<p class=3D"MsoNormal"> sndbuf-size 0;<o:p></=
o:p></p>
<p class=3D"MsoNormal"> rcvbuf-size 0;<o:p></=
o:p></p>
<p class=3D"MsoNormal"> }<o:p></o:p></p>
<p class=3D"MsoNormal"><o:p> </o:p></p>
<p class=3D"MsoNormal"> disk {<o:p></o:p></p>
<p class=3D"MsoNormal"> on-io-error detach;<o=
:p></o:p></p>
<p class=3D"MsoNormal"> c-max-rate 900M;<o:p>=
</o:p></p>
<p class=3D"MsoNormal"> c-min-rate 100M;<o:p>=
</o:p></p>
<p class=3D"MsoNormal"> c-fill-target 1M;<o:p=
></o:p></p>
<p class=3D"MsoNormal"> resync-rate 300M;<o:p=
></o:p></p>
<p class=3D"MsoNormal"> }<o:p></o:p></p>
<p class=3D"MsoNormal"><o:p> </o:p></p>
<p class=3D"MsoNormal"> on node1 {<o:p></o:p></p>
<p class=3D"MsoNormal"> device /dev/drbd7790;=
<o:p></o:p></p>
<p class=3D"MsoNormal"> disk /dev/mapper/lvmd=
ata-postgres--zabbix--data7790;<o:p></o:p></p>
<p class=3D"MsoNormal"> node-id 1;<o:p></o:p>=
</p>
<p class=3D"MsoNormal"> meta-disk internal;<o=
:p></o:p></p>
<p class=3D"MsoNormal"> address 10.21.24.11:7=
790;<o:p></o:p></p>
<p class=3D"MsoNormal"> }<o:p></o:p></p>
<p class=3D"MsoNormal"><o:p> </o:p></p>
<p class=3D"MsoNormal"> on node2 {<o:p></o:p></p>
<p class=3D"MsoNormal"> device /dev/drbd7790;=
<o:p></o:p></p>
<p class=3D"MsoNormal"> disk /dev/mapper/lvmd=
ata-postgres--zabbix--data7790;<o:p></o:p></p>
<p class=3D"MsoNormal"> meta-disk internal;<o=
:p></o:p></p>
<p class=3D"MsoNormal"> node-id 2;<o:p></o:p>=
</p>
<p class=3D"MsoNormal"> address 10.21.24.12:7=
790;<o:p></o:p></p>
<p class=3D"MsoNormal"> }<o:p></o:p></p>
<p class=3D"MsoNormal"><o:p> </o:p></p>
<p class=3D"MsoNormal"><o:p> </o:p></p>
<p class=3D"MsoNormal"> on tiebreaker {<o:p></o:p></p>
<p class=3D"MsoNormal"> device /dev/drbd7790;=
<o:p></o:p></p>
<p class=3D"MsoNormal"> disk none;<o:p></o:p>=
</p>
<p class=3D"MsoNormal"> meta-disk internal;<o=
:p></o:p></p>
<p class=3D"MsoNormal"> node-id 3;<o:p></o:p>=
</p>
<p class=3D"MsoNormal"> address 10.21.24.13:7=
790;<o:p></o:p></p>
<p class=3D"MsoNormal"> }<o:p></o:p></p>
<p class=3D"MsoNormal"><o:p> </o:p></p>
<p class=3D"MsoNormal"> connection-mesh {<o:p></o:p></p>
<p class=3D"MsoNormal"> hosts node1 node2 tie=
breaker;<o:p></o:p></p>
<p class=3D"MsoNormal"> }<o:p></o:p></p>
<p class=3D"MsoNormal">}<o:p></o:p></p>
<p class=3D"MsoNormal"><o:p> </o:p></p>
<p class=3D"MsoNormal">Tiebreaker logs:<o:p></o:p></p>
<p class=3D"MsoNormal">2021-02-23T23:32:24.299214+00:00;ERR;tiebreaker;=
P-/;[kernel/]; drbd postgres-zabbix-data7790 node2: PingAck did not arrive =
in time.;<o:p></o:p></p>
<p class=3D"MsoNormal">2021-02-23T23:32:24.299244+00:00;INFO;tiebreaker=
;P-/;[kernel/]; drbd postgres-zabbix-data7790/0 drbd7790: Would lose quorum=
, but using tiebreaker logic to keep;<o:p></o:p></p>
<p class=3D"MsoNormal">2021-02-23T23:32:24.299266+00:00;INFO;tiebreaker=
;P-/;[kernel/]; drbd postgres-zabbix-data7790 node2: conn( Connected -> =
NetworkFailure ) peer( Primary -> Unknown );<o:p></o:p></p>
<p class=3D"MsoNormal">2021-02-23T23:32:24.299305+00:00;INFO;tiebreaker=
;P-/;[kernel/]; drbd postgres-zabbix-data7790/0 drbd7790 node2: pdsk( UpToD=
ate -> DUnknown ) repl( Established -> Off );<o:p></o:p></p>
<p class=3D"MsoNormal">2021-02-23T23:32:24.299326+00:00;INFO;tiebreaker=
;P-/;[kernel/]; drbd postgres-zabbix-data7790 node2: ack_receiver terminate=
d;<o:p></o:p></p>
<p class=3D"MsoNormal">2021-02-23T23:32:24.299349+00:00;INFO;tiebreaker=
;P-/;[kernel/]; drbd postgres-zabbix-data7790 node2: Terminating ack_recv t=
hread;<o:p></o:p></p>
<p class=3D"MsoNormal">2021-02-23T23:32:24.299372+00:00;INFO;tiebreaker=
;P-/;[kernel/]; drbd postgres-zabbix-data7790/0 drbd7790: Would lose quorum=
, but using tiebreaker logic to keep;<o:p></o:p></p>
<p class=3D"MsoNormal">2021-02-23T23:32:24.299394+00:00;INFO;tiebreaker=
;P-/;[kernel/]; drbd postgres-zabbix-data7790/0 drbd7790: Would lose quorum=
, but using tiebreaker logic to keep;<o:p></o:p></p>
<p class=3D"MsoNormal">2021-02-23T23:32:24.318231+00:00;INFO;tiebreaker=
;P-/;[kernel/]; drbd postgres-zabbix-data7790 node2: Restarting sender thre=
ad;<o:p></o:p></p>
<p class=3D"MsoNormal">2021-02-23T23:32:24.318268+00:00;INFO;tiebreaker=
;P-/;[kernel/]; drbd postgres-zabbix-data7790 node2: Connection closed;<o:p=
></o:p></p>
<p class=3D"MsoNormal">2021-02-23T23:32:24.318290+00:00;INFO;tiebreaker=
;P-/;[kernel/]; drbd postgres-zabbix-data7790/0 drbd7790: Would lose quorum=
, but using tiebreaker logic to keep;<o:p></o:p></p>
<p class=3D"MsoNormal">2021-02-23T23:32:24.318312+00:00;INFO;tiebreaker=
;P-/;[kernel/]; drbd postgres-zabbix-data7790 node2: conn( NetworkFailure -=
> Unconnected );<o:p></o:p></p>
<p class=3D"MsoNormal">2021-02-23T23:32:24.318337+00:00;INFO;tiebreaker=
;P-/;[kernel/]; drbd postgres-zabbix-data7790 node2: Restarting receiver th=
read;<o:p></o:p></p>
<p class=3D"MsoNormal">2021-02-23T23:32:24.318370+00:00;INFO;tiebreaker=
;P-/;[kernel/]; drbd postgres-zabbix-data7790/0 drbd7790: Would lose quorum=
, but using tiebreaker logic to keep;<o:p></o:p></p>
<p class=3D"MsoNormal">2021-02-23T23:32:24.318404+00:00;INFO;tiebreaker=
;P-/;[kernel/]; drbd postgres-zabbix-data7790 node2: conn( Unconnected ->=
; Connecting );<o:p></o:p></p>
<p class=3D"MsoNormal">2021-02-23T23:32:24.803484+00:00;ERR;tiebreaker;=
P-/;[kernel/]; drbd postgres-zabbix-data7790/0 drbd7790: Got NegDReply; Sec=
tor 0s, len 131072.;<o:p></o:p></p>
<p class=3D"MsoNormal">2021-02-23T23:32:24.803557+00:00;INFO;tiebreaker=
;P-/;[kernel/]; drbd postgres-zabbix-data7790/0 drbd7790: Would lose quorum=
, but using tiebreaker logic to keep;<o:p></o:p></p>
<p class=3D"MsoNormal">2021-02-23T23:32:24.803587+00:00;ERR;tiebreaker;=
P-/;[kernel/]; drbd postgres-zabbix-data7790: State change failed: Need acc=
ess to UpToDate data;<o:p></o:p></p>
<p class=3D"MsoNormal">2021-02-23T23:32:24.803628+00:00;INFO;tiebreaker=
;P-/;[kernel/]; drbd postgres-zabbix-data7790/0 drbd7790 node1: Failed: pds=
k( UpToDate -> Consistent );<o:p></o:p></p>
<p class=3D"MsoNormal">2021-02-23T23:32:24.803652+00:00;ERR;tiebreaker;=
P-/;[kernel/]; drbd postgres-zabbix-data7790/0 drbd7790: drbd_req_destroy: =
Logic BUG rq_state: (0:300000, 2:104), completion_ref =3D 0;<o:p></o:p></p>
<p class=3D"MsoNormal">2021-02-23T23:32:24.805509+00:00;INFO;tiebreaker=
;P-/;[kernel/]; drbd postgres-zabbix-data7790: susp-io( no -> quorum);<o=
:p></o:p></p>
<p class=3D"MsoNormal">2021-02-23T23:32:24.805540+00:00;INFO;tiebreaker=
;P-/;[kernel/]; drbd postgres-zabbix-data7790 node1: conn( Connected -> =
Disconnecting ) peer( Secondary -> Unknown );<o:p></o:p></p>
<p class=3D"MsoNormal">2021-02-23T23:32:24.805570+00:00;INFO;tiebreaker=
;P-/;[kernel/]; drbd postgres-zabbix-data7790/0 drbd7790: quorum( yes ->=
no );<o:p></o:p></p>
<p class=3D"MsoNormal">2021-02-23T23:32:24.805599+00:00;INFO;tiebreaker=
;P-/;[kernel/]; drbd postgres-zabbix-data7790/0 drbd7790 node1: pdsk( UpToD=
ate -> DUnknown ) repl( Established -> Off );<o:p></o:p></p>
<p class=3D"MsoNormal">2021-02-23T23:32:24.805627+00:00;ERR;tiebreaker;=
P-/;[kernel/]; drbd postgres-zabbix-data7790 node1: error receiving P_STATE=
, e: -5 l: 0!;<o:p></o:p></p>
<p class=3D"MsoNormal">2021-02-23T23:32:24.807322+00:00;INFO;tiebreaker=
;P-/;[kernel/]; drbd postgres-zabbix-data7790 node1: ack_receiver terminate=
d;<o:p></o:p></p>
<p class=3D"MsoNormal">2021-02-23T23:32:24.807435+00:00;INFO;tiebreaker=
;P-/;[kernel/]; drbd postgres-zabbix-data7790 node1: Terminating ack_recv t=
hread;<o:p></o:p></p>
<p class=3D"MsoNormal">2021-02-23T23:32:24.818354+00:00;INFO;tiebreaker=
;P-/;[kernel/]; drbd postgres-zabbix-data7790 node1: Aborting remote state =
change 0 commit not possible;<o:p></o:p></p>
<p class=3D"MsoNormal">2021-02-23T23:32:24.818465+00:00;INFO;tiebreaker=
;P-/;[kernel/]; drbd postgres-zabbix-data7790 node1: Restarting sender thre=
ad;<o:p></o:p></p>
<p class=3D"MsoNormal">2021-02-23T23:32:24.818542+00:00;INFO;tiebreaker=
;P-/;[kernel/]; drbd postgres-zabbix-data7790 node1: Connection closed;<o:p=
></o:p></p>
<p class=3D"MsoNormal">2021-02-23T23:32:24.818617+00:00;INFO;tiebreaker=
;P-/;[kernel/]; drbd postgres-zabbix-data7790 node1: conn( Disconnecting -&=
gt; StandAlone );<o:p></o:p></p>
<p class=3D"MsoNormal">2021-02-23T23:32:24.818690+00:00;INFO;tiebreaker=
;P-/;[kernel/]; drbd postgres-zabbix-data7790 node1: Terminating receiver t=
hread;<o:p></o:p></p>
<p class=3D"MsoNormal"><o:p> </o:p></p>
<p class=3D"MsoNormal">Node1 logs:<o:p></o:p></p>
<p class=3D"MsoNormal">2021-02-23T23:32:24.800547+00:00;ERR;node1;P-/;[=
kernel/]; drbd postgres-zabbix-data7790 node2: PingAck did not arrive in ti=
me.;<o:p></o:p></p>
<p class=3D"MsoNormal">2021-02-23T23:32:24.800610+00:00;INFO;node1;P-/;=
[kernel/]; drbd postgres-zabbix-data7790/0 drbd7790: Would lose quorum, but=
using tiebreaker logic to keep;<o:p></o:p></p>
<p class=3D"MsoNormal">2021-02-23T23:32:24.800645+00:00;INFO;node1;P-/;=
[kernel/]; drbd postgres-zabbix-data7790 node2: conn( Connected -> Netwo=
rkFailure ) peer( Primary -> Unknown );<o:p></o:p></p>
<p class=3D"MsoNormal">2021-02-23T23:32:24.800676+00:00;INFO;node1;P-/;=
[kernel/]; drbd postgres-zabbix-data7790/0 drbd7790: disk( UpToDate -> C=
onsistent );<o:p></o:p></p>
<p class=3D"MsoNormal">2021-02-23T23:32:24.800706+00:00;INFO;node1;P-/;=
[kernel/]; drbd postgres-zabbix-data7790/0 drbd7790 node2: pdsk( UpToDate -=
> DUnknown ) repl( Established -> Off );<o:p></o:p></p>
<p class=3D"MsoNormal">2021-02-23T23:32:24.800742+00:00;INFO;node1;P-/;=
[kernel/]; drbd postgres-zabbix-data7790 node2: ack_receiver terminated;<o:=
p></o:p></p>
<p class=3D"MsoNormal">2021-02-23T23:32:24.800771+00:00;INFO;node1;P-/;=
[kernel/]; drbd postgres-zabbix-data7790 node2: Terminating ack_recv thread=
;<o:p></o:p></p>
<p class=3D"MsoNormal">2021-02-23T23:32:24.800800+00:00;INFO;node1;P-/;=
[kernel/]; drbd postgres-zabbix-data7790/0 drbd7790: Would lose quorum, but=
using tiebreaker logic to keep;<o:p></o:p></p>
<p class=3D"MsoNormal">2021-02-23T23:32:24.800829+00:00;INFO;node1;P-/;=
[kernel/]; drbd postgres-zabbix-data7790/0 drbd7790: Would lose quorum, but=
using tiebreaker logic to keep;<o:p></o:p></p>
<p class=3D"MsoNormal">2021-02-23T23:32:24.800856+00:00;INFO;node1;P-/;=
[kernel/]; drbd postgres-zabbix-data7790/0 drbd7790 tiebreaker: receive_pee=
r_dagatg(): source-set-bitmap by rule 30;<o:p></o:p></p>
<p class=3D"MsoNormal">2021-02-23T23:32:24.800886+00:00;ERR;node1;P-/;[=
kernel/]; drbd postgres-zabbix-data7790/0 drbd7790: Can not satisfy peer's =
read request, no local data.;<o:p></o:p></p>
<p class=3D"MsoNormal">2021-02-23T23:32:24.802664+00:00;INFO;node1;P-/;=
[kernel/]; drbd postgres-zabbix-data7790/0 drbd7790: Would lose quorum, but=
using tiebreaker logic to keep;<o:p></o:p></p>
<p class=3D"MsoNormal">2021-02-23T23:32:24.811534+00:00;INFO;node1;P-/;=
[kernel/]; drbd postgres-zabbix-data7790 tiebreaker: sock was shut down by =
peer;<o:p></o:p></p>
<p class=3D"MsoNormal">2021-02-23T23:32:24.811594+00:00;INFO;node1;P-/;=
[kernel/]; drbd postgres-zabbix-data7790: susp-io( no -> quorum);<o:p></=
o:p></p>
<p class=3D"MsoNormal">2021-02-23T23:32:24.811626+00:00;INFO;node1;P-/;=
[kernel/]; drbd postgres-zabbix-data7790 tiebreaker: conn( Connected -> =
BrokenPipe ) peer( Secondary -> Unknown );<o:p></o:p></p>
<p class=3D"MsoNormal">2021-02-23T23:32:24.811662+00:00;INFO;node1;P-/;=
[kernel/]; drbd postgres-zabbix-data7790/0 drbd7790: quorum( yes -> no )=
;<o:p></o:p></p>
<p class=3D"MsoNormal">2021-02-23T23:32:24.811692+00:00;INFO;node1;P-/;=
[kernel/]; drbd postgres-zabbix-data7790/0 drbd7790 tiebreaker: pdsk( Diskl=
ess -> DUnknown ) repl( Established -> Off );<o:p></o:p></p>
<p class=3D"MsoNormal">2021-02-23T23:32:24.811721+00:00;INFO;node1;P-/;=
[kernel/]; drbd postgres-zabbix-data7790 tiebreaker: ack_receiver terminate=
d;<o:p></o:p></p>
<p class=3D"MsoNormal">2021-02-23T23:32:24.811750+00:00;INFO;node1;P-/;=
[kernel/]; drbd postgres-zabbix-data7790 tiebreaker: Terminating ack_recv t=
hread;<o:p></o:p></p>
<p class=3D"MsoNormal">2021-02-23T23:32:24.811780+00:00;INFO;node1;P-/;=
[kernel/]; drbd postgres-zabbix-data7790: Preparing cluster-wide state chan=
ge 3151158077 (1->-1 0/0);<o:p></o:p></p>
<p class=3D"MsoNormal">2021-02-23T23:32:24.811814+00:00;INFO;node1;P-/;=
[kernel/]; drbd postgres-zabbix-data7790: Aborting cluster-wide state chang=
e 3151158077 (9ms) rv =3D -19;<o:p></o:p></p>
<p class=3D"MsoNormal">2021-02-23T23:32:24.816641+00:00;INFO;node1;P-/;=
[kernel/]; drbd postgres-zabbix-data7790 node2: Aborting remote state chang=
e 0 commit not possible;<o:p></o:p></p>
<p class=3D"MsoNormal">2021-02-23T23:32:24.816701+00:00;INFO;node1;P-/;=
[kernel/]; drbd postgres-zabbix-data7790 node2: Restarting sender thread;<o=
:p></o:p></p>
<p class=3D"MsoNormal">2021-02-23T23:32:24.816925+00:00;INFO;node1;P-/;=
[kernel/]; drbd postgres-zabbix-data7790 node2: Connection closed;<o:p></o:=
p></p>
<p class=3D"MsoNormal">2021-02-23T23:32:24.816962+00:00;INFO;node1;P-/;=
[kernel/]; drbd postgres-zabbix-data7790 node2: conn( NetworkFailure -> =
Unconnected );<o:p></o:p></p>
<p class=3D"MsoNormal">2021-02-23T23:32:24.816989+00:00;INFO;node1;P-/;=
[kernel/]; drbd postgres-zabbix-data7790 node2: Restarting receiver thread;=
<o:p></o:p></p>
<p class=3D"MsoNormal">2021-02-23T23:32:24.817018+00:00;INFO;node1;P-/;=
[kernel/]; drbd postgres-zabbix-data7790 node2: conn( Unconnected -> Con=
necting );<o:p></o:p></p>
<p class=3D"MsoNormal">2021-02-23T23:32:24.826555+00:00;INFO;node1;P-/;=
[kernel/]; drbd postgres-zabbix-data7790 tiebreaker: Aborting remote state =
change 0 commit not possible;<o:p></o:p></p>
<p class=3D"MsoNormal">2021-02-23T23:32:24.826622+00:00;INFO;node1;P-/;=
[kernel/]; drbd postgres-zabbix-data7790 tiebreaker: Restarting sender thre=
ad;<o:p></o:p></p>
<p class=3D"MsoNormal">2021-02-23T23:32:24.826651+00:00;INFO;node1;P-/;=
[kernel/]; drbd postgres-zabbix-data7790 tiebreaker: Connection closed;<o:p=
></o:p></p>
<p class=3D"MsoNormal">2021-02-23T23:32:24.826681+00:00;INFO;node1;P-/;=
[kernel/]; drbd postgres-zabbix-data7790 tiebreaker: conn( BrokenPipe ->=
Unconnected );<o:p></o:p></p>
<p class=3D"MsoNormal">2021-02-23T23:32:24.826709+00:00;INFO;node1;P-/;=
[kernel/]; drbd postgres-zabbix-data7790 tiebreaker: Restarting receiver th=
read;<o:p></o:p></p>
<p class=3D"MsoNormal">2021-02-23T23:32:24.826745+00:00;INFO;node1;P-/;=
[kernel/]; drbd postgres-zabbix-data7790 tiebreaker: conn( Unconnected ->=
; Connecting );<o:p></o:p></p>
<p class=3D"MsoNormal">2021-02-23T23:32:24.912545+00:00;INFO;node1;P-/;=
[kernel/]; drbd postgres-zabbix-data7790: Preparing cluster-wide state chan=
ge 1764698033 (1->-1 0/0);<o:p></o:p></p>
<p class=3D"MsoNormal">2021-02-23T23:32:24.912605+00:00;INFO;node1;P-/;=
[kernel/]; drbd postgres-zabbix-data7790: Committing cluster-wide state cha=
nge 1764698033 (0ms);<o:p></o:p></p>
<p class=3D"MsoNormal">2021-02-23T23:32:24.912637+00:00;INFO;node1;P-/;=
[kernel/]; drbd postgres-zabbix-data7790/0 drbd7790: disk( Consistent ->=
UpToDate );<o:p></o:p></p>
<p class=3D"MsoNormal">2021-02-23T23:32:34.322531+00:00;INFO;node1;P-/;=
[kernel/]; drbd postgres-zabbix-data7790 tiebreaker: sock was shut down by =
peer;<o:p></o:p></p>
<p class=3D"MsoNormal">2021-02-23T23:32:34.322577+00:00;INFO;node1;P-/;=
[kernel/]; drbd postgres-zabbix-data7790 tiebreaker: conn( Connecting ->=
BrokenPipe );<o:p></o:p></p>
<p class=3D"MsoNormal">2021-02-23T23:32:34.322599+00:00;WARNING;node1;P=
-/;[kernel/]; drbd postgres-zabbix-data7790 tiebreaker: short read (expecte=
d size 8);<o:p></o:p></p>
<p class=3D"MsoNormal">2021-02-23T23:32:34.345524+00:00;INFO;node1;P-/;=
[kernel/]; drbd postgres-zabbix-data7790 tiebreaker: Aborting remote state =
change 0 commit not possible;<o:p></o:p></p>
<p class=3D"MsoNormal">2021-02-23T23:32:34.345582+00:00;INFO;node1;P-/;=
[kernel/]; drbd postgres-zabbix-data7790 tiebreaker: Restarting sender thre=
ad;<o:p></o:p></p>
<p class=3D"MsoNormal">2021-02-23T23:32:34.347540+00:00;INFO;node1;P-/;=
[kernel/]; drbd postgres-zabbix-data7790 tiebreaker: Connection closed;<o:p=
></o:p></p>
<p class=3D"MsoNormal">2021-02-23T23:32:34.347607+00:00;INFO;node1;P-/;=
[kernel/]; drbd postgres-zabbix-data7790 tiebreaker: conn( BrokenPipe ->=
Unconnected );<o:p></o:p></p>
<p class=3D"MsoNormal">2021-02-23T23:32:35.347753+00:00;INFO;node1;P-/;=
[kernel/]; drbd postgres-zabbix-data7790 tiebreaker: conn( Unconnected ->=
; Connecting );<o:p></o:p></p>
<p class=3D"MsoNormal"><o:p> </o:p></p>
<p class=3D"MsoNormal">Do you have any advice?<o:p></o:p></p>
<p class=3D"MsoNormal"><o:p> </o:p></p>
<p class=3D"MsoNormal">Best Regards,<o:p></o:p></p>
<p class=3D"MsoNormal">Mihai<o:p></o:p></p>
<p class=3D"MsoNormal"><o:p> </o:p></p>
</div>
</body>
</html>
--_000_8baf91957cf143bc9b9e4475f8d85052frequentiscom_--
More information about the drbd-user
mailing list