01f7a9e4e8
This commit attempts to fix race issues in the maptracker since the introduction of the OperationTracker. There were two main problems: * Duplicity tracking the state both in the state map and the opTracker * Non atomiciy of operations with different threads being able to affect other threads operations. A test performing random Track/Untracks on the same Cid quickly showed that items would sometimes stay as pin_queued or pin_unqueued. That happened because operations could be cancelled under the hood by a different request, while leaving the map status untouched. It was not simply to deal with this issues without a refactoring. First, the state map has been removed, and the operation tracker now provides status information for any Cid. This implies that the tracker keeps all operations and operations have a `PhaseDone`. There's also a new `OperationRemote` type. Secondly, operations are only created in the tracker and can only be removed by their creators (they can be overwritten by other operations though). Operations cannot be accessed directly and modifications are limited to setting Error for PhaseDone operations. After created, *Operations are queued in the pinWorker queues which handle any status updates. This means, that, even when an operation has been removed from the tracker, status updates will not interfere with any other newer operations. In the maptracker, only the Unpin worker Cleans operations once processed. A sucessful unpin is the only way that a delete() happens in the tracker map. Otherwise, operations stay there until a newer operation for the Cid arrives and 1) cancels the existing one 2) takes its place. The tracker refuses to create a new operation if a similar "ongoing" operation of the same type exists. The final change is that Recover and RecoverAll() are not async and play by the same rules as Track() and Untrack(), queueing the items to be recovered. Note: for stateless pintracker, the tracker will need to Clean() operation of type OperationPin as well, and complement the Status reported by the tracker with those coming from IPFS. License: MIT Signed-off-by: Hector Sanjuan <code@hector.link>
671 lines
13 KiB
Go
671 lines
13 KiB
Go
package maptracker
|
|
|
|
import (
|
|
"context"
|
|
"errors"
|
|
"math/rand"
|
|
"sync"
|
|
"testing"
|
|
"time"
|
|
|
|
rpc "github.com/hsanjuan/go-libp2p-gorpc"
|
|
cid "github.com/ipfs/go-cid"
|
|
peer "github.com/libp2p/go-libp2p-peer"
|
|
|
|
"github.com/ipfs/ipfs-cluster/api"
|
|
"github.com/ipfs/ipfs-cluster/test"
|
|
)
|
|
|
|
var (
|
|
pinCancelCid = test.TestCid3
|
|
unpinCancelCid = test.TestCid2
|
|
ErrPinCancelCid = errors.New("should not have received rpc.IPFSPin operation")
|
|
ErrUnpinCancelCid = errors.New("should not have received rpc.IPFSUnpin operation")
|
|
)
|
|
|
|
type mockService struct {
|
|
rpcClient *rpc.Client
|
|
}
|
|
|
|
func mockRPCClient(t *testing.T) *rpc.Client {
|
|
s := rpc.NewServer(nil, "mock")
|
|
c := rpc.NewClientWithServer(nil, "mock", s)
|
|
err := s.RegisterName("Cluster", &mockService{})
|
|
if err != nil {
|
|
t.Fatal(err)
|
|
}
|
|
return c
|
|
}
|
|
|
|
func (mock *mockService) IPFSPin(ctx context.Context, in api.PinSerial, out *struct{}) error {
|
|
c := in.ToPin().Cid
|
|
switch c.String() {
|
|
case test.TestSlowCid1:
|
|
time.Sleep(2 * time.Second)
|
|
case pinCancelCid:
|
|
return ErrPinCancelCid
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func (mock *mockService) IPFSUnpin(ctx context.Context, in api.PinSerial, out *struct{}) error {
|
|
c := in.ToPin().Cid
|
|
switch c.String() {
|
|
case test.TestSlowCid1:
|
|
time.Sleep(2 * time.Second)
|
|
case unpinCancelCid:
|
|
return ErrUnpinCancelCid
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func testSlowMapPinTracker(t *testing.T) *MapPinTracker {
|
|
cfg := &Config{}
|
|
cfg.Default()
|
|
cfg.ConcurrentPins = 1
|
|
mpt := NewMapPinTracker(cfg, test.TestPeerID1)
|
|
mpt.SetClient(mockRPCClient(t))
|
|
return mpt
|
|
}
|
|
|
|
func testMapPinTracker(t *testing.T) *MapPinTracker {
|
|
cfg := &Config{}
|
|
cfg.Default()
|
|
cfg.ConcurrentPins = 1
|
|
mpt := NewMapPinTracker(cfg, test.TestPeerID1)
|
|
mpt.SetClient(test.NewMockRPCClient(t))
|
|
return mpt
|
|
}
|
|
|
|
func TestNew(t *testing.T) {
|
|
mpt := testMapPinTracker(t)
|
|
defer mpt.Shutdown()
|
|
}
|
|
|
|
func TestShutdown(t *testing.T) {
|
|
mpt := testMapPinTracker(t)
|
|
err := mpt.Shutdown()
|
|
if err != nil {
|
|
t.Fatal(err)
|
|
}
|
|
err = mpt.Shutdown()
|
|
if err != nil {
|
|
t.Fatal(err)
|
|
}
|
|
}
|
|
|
|
func TestTrack(t *testing.T) {
|
|
mpt := testMapPinTracker(t)
|
|
defer mpt.Shutdown()
|
|
|
|
h, _ := cid.Decode(test.TestCid1)
|
|
|
|
// Let's tart with a local pin
|
|
c := api.Pin{
|
|
Cid: h,
|
|
Allocations: []peer.ID{},
|
|
ReplicationFactorMin: -1,
|
|
ReplicationFactorMax: -1,
|
|
}
|
|
|
|
err := mpt.Track(c)
|
|
if err != nil {
|
|
t.Fatal(err)
|
|
}
|
|
|
|
time.Sleep(200 * time.Millisecond) // let it be pinned
|
|
|
|
st := mpt.Status(h)
|
|
if st.Status != api.TrackerStatusPinned {
|
|
t.Fatalf("cid should be pinned and is %s", st.Status)
|
|
}
|
|
|
|
// Unpin and set remote
|
|
c = api.Pin{
|
|
Cid: h,
|
|
Allocations: []peer.ID{test.TestPeerID2},
|
|
ReplicationFactorMin: 1,
|
|
ReplicationFactorMax: 1,
|
|
}
|
|
err = mpt.Track(c)
|
|
if err != nil {
|
|
t.Fatal(err)
|
|
}
|
|
|
|
time.Sleep(200 * time.Millisecond) // let it be unpinned
|
|
|
|
st = mpt.Status(h)
|
|
if st.Status != api.TrackerStatusRemote {
|
|
t.Fatalf("cid should be pinned and is %s", st.Status)
|
|
}
|
|
}
|
|
|
|
func TestUntrack(t *testing.T) {
|
|
mpt := testMapPinTracker(t)
|
|
defer mpt.Shutdown()
|
|
|
|
h1, _ := cid.Decode(test.TestCid1)
|
|
h2, _ := cid.Decode(test.TestCid2)
|
|
|
|
// LocalPin
|
|
c := api.Pin{
|
|
Cid: h1,
|
|
Allocations: []peer.ID{},
|
|
ReplicationFactorMin: -1,
|
|
ReplicationFactorMax: -1,
|
|
}
|
|
|
|
err := mpt.Track(c)
|
|
if err != nil {
|
|
t.Fatal(err)
|
|
}
|
|
|
|
// Remote pin
|
|
c = api.Pin{
|
|
Cid: h2,
|
|
Allocations: []peer.ID{test.TestPeerID2},
|
|
ReplicationFactorMin: 1,
|
|
ReplicationFactorMax: 1,
|
|
}
|
|
err = mpt.Track(c)
|
|
if err != nil {
|
|
t.Fatal(err)
|
|
}
|
|
|
|
time.Sleep(time.Second / 2)
|
|
|
|
err = mpt.Untrack(h2)
|
|
if err != nil {
|
|
t.Fatal(err)
|
|
}
|
|
err = mpt.Untrack(h1)
|
|
if err != nil {
|
|
t.Fatal(err)
|
|
}
|
|
err = mpt.Untrack(h1)
|
|
if err != nil {
|
|
t.Fatal(err)
|
|
}
|
|
|
|
time.Sleep(time.Second / 2)
|
|
|
|
st := mpt.Status(h1)
|
|
if st.Status != api.TrackerStatusUnpinned {
|
|
t.Fatalf("cid should be unpinned and is %s", st.Status)
|
|
}
|
|
|
|
st = mpt.Status(h2)
|
|
if st.Status != api.TrackerStatusUnpinned {
|
|
t.Fatalf("cid should be unpinned and is %s", st.Status)
|
|
}
|
|
}
|
|
|
|
func TestStatusAll(t *testing.T) {
|
|
mpt := testMapPinTracker(t)
|
|
defer mpt.Shutdown()
|
|
|
|
h1, _ := cid.Decode(test.TestCid1)
|
|
h2, _ := cid.Decode(test.TestCid2)
|
|
|
|
// LocalPin
|
|
c := api.Pin{
|
|
Cid: h1,
|
|
Allocations: []peer.ID{},
|
|
ReplicationFactorMin: -1,
|
|
ReplicationFactorMax: -1,
|
|
}
|
|
mpt.Track(c)
|
|
c = api.Pin{
|
|
Cid: h2,
|
|
Allocations: []peer.ID{},
|
|
ReplicationFactorMin: 1,
|
|
ReplicationFactorMax: 1,
|
|
}
|
|
mpt.Track(c)
|
|
|
|
time.Sleep(200 * time.Millisecond)
|
|
|
|
stAll := mpt.StatusAll()
|
|
if len(stAll) != 2 {
|
|
t.Logf("%+v", stAll)
|
|
t.Fatal("expected 2 pins")
|
|
}
|
|
|
|
for _, st := range stAll {
|
|
if st.Cid.Equals(h1) && st.Status != api.TrackerStatusPinned {
|
|
t.Fatal("expected pinned")
|
|
}
|
|
if st.Cid.Equals(h2) && st.Status != api.TrackerStatusRemote {
|
|
t.Fatal("expected remote")
|
|
}
|
|
}
|
|
}
|
|
|
|
func TestSyncAndRecover(t *testing.T) {
|
|
mpt := testMapPinTracker(t)
|
|
defer mpt.Shutdown()
|
|
|
|
h1, _ := cid.Decode(test.TestCid1)
|
|
h2, _ := cid.Decode(test.TestCid2)
|
|
|
|
c := api.Pin{
|
|
Cid: h1,
|
|
Allocations: []peer.ID{},
|
|
ReplicationFactorMin: -1,
|
|
ReplicationFactorMax: -1,
|
|
}
|
|
mpt.Track(c)
|
|
c = api.Pin{
|
|
Cid: h2,
|
|
Allocations: []peer.ID{},
|
|
ReplicationFactorMin: -1,
|
|
ReplicationFactorMax: -1,
|
|
}
|
|
mpt.Track(c)
|
|
|
|
time.Sleep(100 * time.Millisecond)
|
|
|
|
// IPFSPinLS RPC returns unpinned for anything != Cid1 or Cid3
|
|
info, err := mpt.Sync(h2)
|
|
if err != nil {
|
|
t.Fatal(err)
|
|
}
|
|
if info.Status != api.TrackerStatusPinError {
|
|
t.Error("expected pin_error")
|
|
}
|
|
|
|
info, err = mpt.Sync(h1)
|
|
if err != nil {
|
|
t.Fatal(err)
|
|
}
|
|
if info.Status != api.TrackerStatusPinned {
|
|
t.Error("expected pinned")
|
|
}
|
|
|
|
info, err = mpt.Recover(h1)
|
|
if err != nil {
|
|
t.Fatal(err)
|
|
}
|
|
if info.Status != api.TrackerStatusPinned {
|
|
t.Error("expected pinned")
|
|
}
|
|
|
|
_, err = mpt.Recover(h2)
|
|
if err != nil {
|
|
t.Fatal(err)
|
|
}
|
|
|
|
time.Sleep(100 * time.Millisecond)
|
|
|
|
info = mpt.Status(h2)
|
|
if info.Status != api.TrackerStatusPinned {
|
|
t.Error("expected pinned")
|
|
}
|
|
}
|
|
|
|
func TestRecoverAll(t *testing.T) {
|
|
mpt := testMapPinTracker(t)
|
|
defer mpt.Shutdown()
|
|
|
|
h1, _ := cid.Decode(test.TestCid1)
|
|
|
|
c := api.Pin{
|
|
Cid: h1,
|
|
Allocations: []peer.ID{},
|
|
ReplicationFactorMin: -1,
|
|
ReplicationFactorMax: -1,
|
|
}
|
|
mpt.Track(c)
|
|
time.Sleep(100 * time.Millisecond)
|
|
mpt.optracker.SetError(h1, errors.New("fakeerror"))
|
|
pins, err := mpt.RecoverAll()
|
|
if err != nil {
|
|
t.Fatal(err)
|
|
}
|
|
if len(pins) != 1 {
|
|
t.Fatal("there should be only one pin")
|
|
}
|
|
|
|
time.Sleep(100 * time.Millisecond)
|
|
info := mpt.Status(h1)
|
|
|
|
if info.Status != api.TrackerStatusPinned {
|
|
t.Error("the pin should have been recovered")
|
|
}
|
|
}
|
|
|
|
func TestSyncAll(t *testing.T) {
|
|
mpt := testMapPinTracker(t)
|
|
defer mpt.Shutdown()
|
|
|
|
synced, err := mpt.SyncAll()
|
|
if err != nil {
|
|
t.Fatal(err)
|
|
}
|
|
// This relies on the rpc mock implementation
|
|
|
|
if len(synced) != 0 {
|
|
t.Fatal("should not have synced anything when it tracks nothing")
|
|
}
|
|
|
|
h1, _ := cid.Decode(test.TestCid1)
|
|
h2, _ := cid.Decode(test.TestCid2)
|
|
|
|
c := api.Pin{
|
|
Cid: h1,
|
|
Allocations: []peer.ID{},
|
|
ReplicationFactorMin: -1,
|
|
ReplicationFactorMax: -1,
|
|
}
|
|
|
|
mpt.Track(c)
|
|
c = api.Pin{
|
|
Cid: h2,
|
|
Allocations: []peer.ID{},
|
|
ReplicationFactorMin: -1,
|
|
ReplicationFactorMax: -1,
|
|
}
|
|
mpt.Track(c)
|
|
|
|
time.Sleep(100 * time.Millisecond)
|
|
|
|
synced, err = mpt.SyncAll()
|
|
if err != nil {
|
|
t.Fatal(err)
|
|
}
|
|
if len(synced) != 1 || synced[0].Status != api.TrackerStatusPinError {
|
|
t.Logf("%+v", synced)
|
|
t.Fatal("should have synced h2")
|
|
}
|
|
}
|
|
|
|
func TestUntrackTrack(t *testing.T) {
|
|
mpt := testMapPinTracker(t)
|
|
defer mpt.Shutdown()
|
|
|
|
h1, _ := cid.Decode(test.TestCid1)
|
|
|
|
// LocalPin
|
|
c := api.Pin{
|
|
Cid: h1,
|
|
Allocations: []peer.ID{},
|
|
ReplicationFactorMin: -1,
|
|
ReplicationFactorMax: -1,
|
|
}
|
|
|
|
err := mpt.Track(c)
|
|
if err != nil {
|
|
t.Fatal(err)
|
|
}
|
|
|
|
time.Sleep(time.Second / 2)
|
|
|
|
err = mpt.Untrack(h1)
|
|
if err != nil {
|
|
t.Fatal(err)
|
|
}
|
|
}
|
|
|
|
func TestTrackUntrackWithCancel(t *testing.T) {
|
|
mpt := testSlowMapPinTracker(t)
|
|
defer mpt.Shutdown()
|
|
|
|
slowPinCid, _ := cid.Decode(test.TestSlowCid1)
|
|
|
|
// LocalPin
|
|
slowPin := api.Pin{
|
|
Cid: slowPinCid,
|
|
Allocations: []peer.ID{},
|
|
ReplicationFactorMin: -1,
|
|
ReplicationFactorMax: -1,
|
|
}
|
|
|
|
err := mpt.Track(slowPin)
|
|
if err != nil {
|
|
t.Fatal(err)
|
|
}
|
|
|
|
time.Sleep(100 * time.Millisecond) // let pinning start
|
|
|
|
pInfo := mpt.Status(slowPin.Cid)
|
|
if pInfo.Status == api.TrackerStatusUnpinned {
|
|
t.Fatal("slowPin should be tracked")
|
|
}
|
|
|
|
if pInfo.Status == api.TrackerStatusPinning {
|
|
go func() {
|
|
err = mpt.Untrack(slowPinCid)
|
|
if err != nil {
|
|
t.Fatal(err)
|
|
}
|
|
}()
|
|
select {
|
|
case <-mpt.optracker.GetOpContext(slowPinCid).Done():
|
|
return
|
|
case <-time.Tick(100 * time.Millisecond):
|
|
t.Errorf("operation context should have been cancelled by now")
|
|
}
|
|
} else {
|
|
t.Error("slowPin should be pinning and is:", pInfo.Status)
|
|
}
|
|
}
|
|
|
|
func TestTrackUntrackWithNoCancel(t *testing.T) {
|
|
mpt := testSlowMapPinTracker(t)
|
|
defer mpt.Shutdown()
|
|
|
|
slowPinCid, _ := cid.Decode(test.TestSlowCid1)
|
|
fastPinCid, _ := cid.Decode(pinCancelCid)
|
|
|
|
// SlowLocalPin
|
|
slowPin := api.Pin{
|
|
Cid: slowPinCid,
|
|
Allocations: []peer.ID{},
|
|
ReplicationFactorMin: -1,
|
|
ReplicationFactorMax: -1,
|
|
}
|
|
|
|
// LocalPin
|
|
fastPin := api.Pin{
|
|
Cid: fastPinCid,
|
|
Allocations: []peer.ID{},
|
|
ReplicationFactorMin: -1,
|
|
ReplicationFactorMax: -1,
|
|
}
|
|
|
|
err := mpt.Track(slowPin)
|
|
if err != nil {
|
|
t.Fatal(err)
|
|
}
|
|
|
|
err = mpt.Track(fastPin)
|
|
if err != nil {
|
|
t.Fatal(err)
|
|
}
|
|
|
|
// fastPin should be queued because slow pin is pinning
|
|
pInfo := mpt.Status(fastPinCid)
|
|
if pInfo.Status == api.TrackerStatusPinQueued {
|
|
err = mpt.Untrack(fastPinCid)
|
|
if err != nil {
|
|
t.Fatal(err)
|
|
}
|
|
pi := mpt.Status(fastPinCid)
|
|
if pi.Error == ErrPinCancelCid.Error() {
|
|
t.Fatal(ErrPinCancelCid)
|
|
}
|
|
} else {
|
|
t.Error("fastPin should be queued to pin:", pInfo.Status)
|
|
}
|
|
|
|
time.Sleep(100 * time.Millisecond)
|
|
pInfo = mpt.Status(fastPinCid)
|
|
if pInfo.Status != api.TrackerStatusUnpinned {
|
|
t.Error("fastPin should have been removed from tracker:", pInfo.Status)
|
|
}
|
|
}
|
|
|
|
func TestUntrackTrackWithCancel(t *testing.T) {
|
|
mpt := testSlowMapPinTracker(t)
|
|
defer mpt.Shutdown()
|
|
|
|
slowPinCid, _ := cid.Decode(test.TestSlowCid1)
|
|
|
|
// LocalPin
|
|
slowPin := api.Pin{
|
|
Cid: slowPinCid,
|
|
Allocations: []peer.ID{},
|
|
ReplicationFactorMin: -1,
|
|
ReplicationFactorMax: -1,
|
|
}
|
|
|
|
err := mpt.Track(slowPin)
|
|
if err != nil {
|
|
t.Fatal(err)
|
|
}
|
|
|
|
time.Sleep(time.Second / 2)
|
|
|
|
// Untrack should cancel the ongoing request
|
|
// and unpin right away
|
|
err = mpt.Untrack(slowPinCid)
|
|
if err != nil {
|
|
t.Fatal(err)
|
|
}
|
|
|
|
time.Sleep(100 * time.Millisecond)
|
|
|
|
pInfo := mpt.Status(slowPinCid)
|
|
if pInfo.Status == api.TrackerStatusUnpinned {
|
|
t.Fatal("expected slowPin to be tracked")
|
|
}
|
|
|
|
if pInfo.Status == api.TrackerStatusUnpinning {
|
|
go func() {
|
|
err = mpt.Track(slowPin)
|
|
if err != nil {
|
|
t.Fatal(err)
|
|
}
|
|
}()
|
|
select {
|
|
case <-mpt.optracker.GetOpContext(slowPinCid).Done():
|
|
return
|
|
case <-time.Tick(100 * time.Millisecond):
|
|
t.Errorf("operation context should have been cancelled by now")
|
|
}
|
|
} else {
|
|
t.Error("slowPin should be in unpinning")
|
|
}
|
|
|
|
}
|
|
|
|
func TestUntrackTrackWithNoCancel(t *testing.T) {
|
|
mpt := testSlowMapPinTracker(t)
|
|
defer mpt.Shutdown()
|
|
|
|
slowPinCid, _ := cid.Decode(test.TestSlowCid1)
|
|
fastPinCid, _ := cid.Decode(unpinCancelCid)
|
|
|
|
// SlowLocalPin
|
|
slowPin := api.Pin{
|
|
Cid: slowPinCid,
|
|
Allocations: []peer.ID{},
|
|
ReplicationFactorMin: -1,
|
|
ReplicationFactorMax: -1,
|
|
}
|
|
|
|
// LocalPin
|
|
fastPin := api.Pin{
|
|
Cid: fastPinCid,
|
|
Allocations: []peer.ID{},
|
|
ReplicationFactorMin: -1,
|
|
ReplicationFactorMax: -1,
|
|
}
|
|
|
|
err := mpt.Track(slowPin)
|
|
if err != nil {
|
|
t.Fatal(err)
|
|
}
|
|
|
|
err = mpt.Track(fastPin)
|
|
if err != nil {
|
|
t.Fatal(err)
|
|
}
|
|
|
|
time.Sleep(3 * time.Second)
|
|
|
|
err = mpt.Untrack(slowPin.Cid)
|
|
if err != nil {
|
|
t.Fatal(err)
|
|
}
|
|
|
|
err = mpt.Untrack(fastPin.Cid)
|
|
if err != nil {
|
|
t.Fatal(err)
|
|
}
|
|
|
|
pInfo := mpt.Status(fastPinCid)
|
|
if pInfo.Status == api.TrackerStatusUnpinned {
|
|
t.Fatal("c untrack operation should be tracked")
|
|
}
|
|
|
|
if pInfo.Status == api.TrackerStatusUnpinQueued {
|
|
err = mpt.Track(fastPin)
|
|
if err != nil {
|
|
t.Fatal(err)
|
|
}
|
|
|
|
pi := mpt.Status(fastPinCid)
|
|
if pi.Error == ErrUnpinCancelCid.Error() {
|
|
t.Fatal(ErrUnpinCancelCid)
|
|
}
|
|
} else {
|
|
t.Error("c should be queued to unpin")
|
|
}
|
|
}
|
|
|
|
func TestTrackUntrackConcurrent(t *testing.T) {
|
|
mpt := testMapPinTracker(t)
|
|
defer mpt.Shutdown()
|
|
|
|
h1, _ := cid.Decode(test.TestCid1)
|
|
|
|
// LocalPin
|
|
c := api.Pin{
|
|
Cid: h1,
|
|
Allocations: []peer.ID{},
|
|
ReplicationFactorMin: -1,
|
|
ReplicationFactorMax: -1,
|
|
}
|
|
|
|
var wg sync.WaitGroup
|
|
|
|
for i := 0; i < 50; i++ {
|
|
wg.Add(1)
|
|
go func() {
|
|
defer wg.Done()
|
|
for j := 0; j < 50; j++ {
|
|
var err error
|
|
op := rand.Intn(2)
|
|
if op == 1 {
|
|
err = mpt.Track(c)
|
|
} else {
|
|
err = mpt.Untrack(c.Cid)
|
|
}
|
|
if err != nil {
|
|
t.Error(err)
|
|
}
|
|
}
|
|
}()
|
|
}
|
|
|
|
wg.Wait()
|
|
|
|
time.Sleep(200 * time.Millisecond)
|
|
st := mpt.Status(h1)
|
|
t.Log(st.Status)
|
|
if st.Status != api.TrackerStatusUnpinned && st.Status != api.TrackerStatusPinned {
|
|
t.Fatal("should be pinned or unpinned")
|
|
}
|
|
}
|