-
Notifications
You must be signed in to change notification settings - Fork 138
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[FIXED] cleaning up sanitize=thread found several races #771
Changes from all commits
943b315
eb9412c
b1a6e9c
7723d1a
23e8b68
4154667
9966b92
f54999c
4846196
ee40b68
6804cae
caddb65
80a9b40
bf65830
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -1137,17 +1137,29 @@ | |
|
||
adjustedMax = 0; | ||
natsSub_Lock(sub); | ||
if (sub->libDlvWorker != NULL) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There are macro for lock/unlock of this in
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @kozlovic I stayed away from further exposing these macros. I am about to submit a re-factor (just passed all tests now without kicking!) that hopefully eliminates the need for double-locking, leaving the worker lock almost exclusively to protect the message queue. Ditto for the It's ok with me if we keep this PR "hanging" for now until you see the upcoming solution, and then we can decide which way to fix? I just want to have it as my base branch to get the CI/unrelated test fixes out of the way. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Not sure I understand the last part, that is, do you want to merge this PR to be the base for the other work or the other work will be the (re)base for the CI changes. Either way, I am going to LGTM this PR so that you proceed as you wish. |
||
{ | ||
natsMutex_Lock(sub->libDlvWorker->lock); | ||
} | ||
// If JS ordered consumer, trigger a reset. Don't check the error | ||
// condition here. If there is a failure, it will be retried | ||
// at the next HB interval. | ||
if ((sub->jsi != NULL) && (sub->jsi->ordered)) | ||
{ | ||
jsSub_resetOrderedConsumer(sub, sub->jsi->sseq+1); | ||
if (sub->libDlvWorker != NULL) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. and:
|
||
{ | ||
natsMutex_Unlock(sub->libDlvWorker->lock); | ||
} | ||
natsSub_Unlock(sub); | ||
continue; | ||
} | ||
if (natsSub_drainStarted(sub)) | ||
{ | ||
if (sub->libDlvWorker != NULL) | ||
{ | ||
natsMutex_Unlock(sub->libDlvWorker->lock); | ||
} | ||
natsSub_Unlock(sub); | ||
continue; | ||
} | ||
|
@@ -1160,6 +1172,10 @@ | |
// messages have reached the max, if so, unsubscribe. | ||
if (adjustedMax == 0) | ||
{ | ||
if (sub->libDlvWorker != NULL) | ||
{ | ||
natsMutex_Unlock(sub->libDlvWorker->lock); | ||
} | ||
natsSub_Unlock(sub); | ||
s = natsConn_sendUnsubProto(nc, sub->sid, 0); | ||
continue; | ||
|
@@ -1172,6 +1188,10 @@ | |
|
||
// Hold the lock up to that point so we are sure not to resend | ||
// any SUB/UNSUB for a subscription that is in draining mode. | ||
if (sub->libDlvWorker != NULL) | ||
{ | ||
natsMutex_Unlock(sub->libDlvWorker->lock); | ||
} | ||
natsSub_Unlock(sub); | ||
} | ||
|
||
|
@@ -3435,7 +3455,7 @@ | |
return nats_setDefaultError(NATS_CONNECTION_CLOSED); | ||
} | ||
|
||
natsSock_Close(nc->sockCtx.fd); | ||
natsSock_Shutdown(nc->sockCtx.fd); | ||
|
||
natsConn_Unlock(nc); | ||
return NATS_OK; | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2045,6 +2045,10 @@ | |
natsStatus s = NATS_OK; | ||
|
||
natsSub_Lock(sub); | ||
if (sub->libDlvWorker != NULL) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Same than for |
||
{ | ||
natsMutex_Lock(sub->libDlvWorker->lock); | ||
} | ||
alert = !jsi->active; | ||
oc = jsi->ordered; | ||
jsi->active = false; | ||
|
@@ -2062,10 +2066,18 @@ | |
natsCondition_Signal(sub->cond); | ||
natsTimer_Stop(timer); | ||
} | ||
if (sub->libDlvWorker != NULL) | ||
{ | ||
natsMutex_Unlock(sub->libDlvWorker->lock); | ||
} | ||
natsSub_Unlock(sub); | ||
return; | ||
} | ||
nc = sub->conn; | ||
if (sub->libDlvWorker != NULL) | ||
{ | ||
natsMutex_Unlock(sub->libDlvWorker->lock); | ||
} | ||
natsSub_Unlock(sub); | ||
|
||
if (!alert) | ||
|
@@ -2075,12 +2087,20 @@ | |
if (oc) | ||
{ | ||
natsSub_Lock(sub); | ||
if (sub->libDlvWorker != NULL) | ||
{ | ||
natsMutex_Lock(sub->libDlvWorker->lock); | ||
} | ||
if (!sub->closed) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Humm... there are may places where we check this under the sub's lock only. Sure, it is set under both locks and checked only under the lib's lock in |
||
{ | ||
// If we fail in that call, we will report to async err callback | ||
// (if one is specified). | ||
s = jsSub_resetOrderedConsumer(sub, sub->jsi->sseq+1); | ||
} | ||
if (sub->libDlvWorker != NULL) | ||
{ | ||
natsMutex_Unlock(sub->libDlvWorker->lock); | ||
} | ||
natsSub_Unlock(sub); | ||
} | ||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
nit: Not sure why you would shorten to
san-
here while you usedsantize-
for the address matrix run above :-)There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
shorter is better? :) (will change for consistency)