Fix #408: Race condition when StateSync runs

When StateSync() runs and triggers Untrack() on items
that have just been removed from the state but on which
Untrack() is underway, the operation tracker would be
reset to phase queued and in some cases stay so.

Also happened for Track()

This caused failures of TestClustersPin as SyncStatus()
is triggered regularly while Tracks() and Untracks() happen.

License: MIT
Signed-off-by: Hector Sanjuan <code@hector.link>
This commit is contained in:
Hector Sanjuan 2018-05-11 19:38:40 +02:00
parent 7b9aac969e
commit 73aabfa8ec
2 changed files with 26 additions and 40 deletions

View File

@ -403,16 +403,11 @@ func TestClustersPin(t *testing.T) {
}
}
delay()
delay()
delay()
delay()
fpinned := func(t *testing.T, c *Cluster) {
status := c.tracker.StatusAll()
for _, v := range status {
if v.Status != api.TrackerStatusPinned {
t.Errorf("%s should have been pinned but it is %s",
v.Cid,
v.Status.String())
t.Errorf("%s should have been pinned but it is %s", v.Cid, v.Status)
}
}
if l := len(status); l != nPins {
@ -438,19 +433,10 @@ func TestClustersPin(t *testing.T) {
}
delay()
delay()
delay()
delay()
funpinned := func(t *testing.T, c *Cluster) {
status := c.tracker.StatusAll()
if l := len(status); l != 0 {
// workaround for this test failing randomly
t.Logf("%d items still around. Will wait more", l)
time.Sleep(10 * time.Second)
status = c.tracker.StatusAll()
if l := len(status); l != 0 {
t.Errorf("Nothing should be pinned: %d items still around after waiting 10 secs", l)
}
for _, v := range status {
t.Errorf("%s should have been unpinned but it is %s", v.Cid, v.Status)
}
}
runF(t, clusters, funpinned)

View File

@ -279,6 +279,20 @@ func (mpt *MapPinTracker) unpin(c api.Pin) error {
// possibly triggering Pin operations on the IPFS daemon.
func (mpt *MapPinTracker) Track(c api.Pin) error {
logger.Debugf("tracking %s", c.Cid)
if opc, ok := mpt.optracker.get(c.Cid); ok {
switch {
case opc.op == operationPin:
return nil // already ongoing
case opc.op == operationUnpin && opc.phase == phaseQueued:
mpt.optracker.finish(c.Cid)
return nil // cancelled while in queue, all done
case opc.op == operationUnpin && opc.phase == phaseInProgress:
mpt.optracker.finish(c.Cid)
// cancelled while unpinning: continue and trigger pin
}
}
if mpt.isRemote(c) {
if mpt.get(c.Cid).Status == api.TrackerStatusPinned {
mpt.optracker.trackNewOperation(
@ -292,20 +306,6 @@ func (mpt *MapPinTracker) Track(c api.Pin) error {
return nil
}
if opc, ok := mpt.optracker.get(c.Cid); ok {
if opc.op == operationUnpin {
switch opc.phase {
case phaseQueued:
mpt.optracker.finish(c.Cid)
return nil
case phaseInProgress:
mpt.optracker.finish(c.Cid)
// NOTE: this may leave the api.PinInfo in an error state
// so a pin operation needs to be run on it (same as Recover)
}
}
}
mpt.optracker.trackNewOperation(mpt.ctx, c.Cid, operationPin)
mpt.set(c.Cid, api.TrackerStatusPinQueued)
@ -326,15 +326,15 @@ func (mpt *MapPinTracker) Track(c api.Pin) error {
func (mpt *MapPinTracker) Untrack(c *cid.Cid) error {
logger.Debugf("untracking %s", c)
if opc, ok := mpt.optracker.get(c); ok {
if opc.op == operationPin {
mpt.optracker.finish(c) // cancel it
switch opc.phase {
case phaseQueued:
return nil
case phaseInProgress:
// continues below to run a full unpin
}
switch {
case opc.op == operationUnpin:
return nil // already ongoing
case opc.op == operationPin && opc.phase == phaseQueued:
mpt.optracker.finish(c)
return nil // cancelled while in queue, all done
case opc.op == operationPin && opc.phase == phaseInProgress:
mpt.optracker.finish(c)
// cancelled while pinning: continue and trigger unpin
}
}