Make the adder modules ipld.DAGService modules.

This removes a bunch of the channel dance and block forwarding
by having the adder submodules be DAGServices themselves and take
Add() directly from the ipfsAdder.

License: MIT
Signed-off-by: Hector Sanjuan <code@hector.link>
This commit is contained in:
Hector Sanjuan 2018-08-07 20:01:02 +02:00
parent e2e84dfad1
commit 87f4fcf958
16 changed files with 810 additions and 1088 deletions

View File

@ -2,23 +2,131 @@ package adder
import (
"context"
"io"
"mime/multipart"
"github.com/ipfs/ipfs-cluster/adder/ipfsadd"
"github.com/ipfs/ipfs-cluster/api"
cid "github.com/ipfs/go-cid"
files "github.com/ipfs/go-ipfs-cmdkit/files"
ipld "github.com/ipfs/go-ipld-format"
logging "github.com/ipfs/go-log"
)
var logger = logging.Logger("adder")
// Adder represents a module capable of adding content to IPFS Cluster.
type Adder interface {
// FromMultipart adds from a multipart reader and returns
// the resulting CID.
FromMultipart(context.Context, *multipart.Reader, *api.AddParams) (*cid.Cid, error)
// Output returns a channel from which to read updates during the adding
// process.
Output() <-chan *api.AddedOutput
// ClusterDAGService is an implementation of ipld.DAGService plus a Finalize
// method. ClusterDAGServices can be used to create Adders with different
// add implementations.
type ClusterDAGService interface {
ipld.DAGService
Finalize(context.Context) (*cid.Cid, error)
}
// Adder is used to add content to IPFS Cluster using an implementation of
// ClusterDAGService.
type Adder struct {
ctx context.Context
cancel context.CancelFunc
dags ClusterDAGService
params *api.AddParams
output chan *api.AddedOutput
}
// New returns a new Adder with the given ClusterDAGService, add optios and a
// channel to send updates during the adding process.
//
// An Adder may only be used once.
func New(ctx context.Context, ds ClusterDAGService, p *api.AddParams, out chan *api.AddedOutput) *Adder {
ctx2, cancel := context.WithCancel(ctx)
// Discard all output
if out == nil {
out = make(chan *api.AddedOutput, 100)
go func() {
for range out {
}
}()
}
return &Adder{
ctx: ctx2,
cancel: cancel,
dags: ds,
params: p,
output: out,
}
}
// FromMultipart adds content from a multipart.Reader. The adder will
// no longer be usable after calling this method.
func (a *Adder) FromMultipart(r *multipart.Reader) (*cid.Cid, error) {
logger.Debugf("adding from multipart with params: %+v", a.params)
f := &files.MultipartFile{
Mediatype: "multipart/form-data",
Reader: r,
}
defer f.Close()
return a.FromFiles(f)
}
// FromFiles adds content from a files.File. The adder will no longer
// be usable after calling this method.
func (a *Adder) FromFiles(f files.File) (*cid.Cid, error) {
logger.Debugf("adding from files")
if a.ctx.Err() != nil { // don't allow running twice
return nil, a.ctx.Err()
}
defer a.cancel()
defer close(a.output)
ipfsAdder, err := ipfsadd.NewAdder(a.ctx, a.dags)
if err != nil {
return nil, err
}
ipfsAdder.Hidden = a.params.Hidden
ipfsAdder.Trickle = a.params.Layout == "trickle"
ipfsAdder.RawLeaves = a.params.RawLeaves
ipfsAdder.Wrap = true
ipfsAdder.Chunker = a.params.Chunker
ipfsAdder.Out = a.output
for {
select {
case <-a.ctx.Done():
return nil, a.ctx.Err()
default:
err := addFile(f, ipfsAdder)
if err == io.EOF {
goto FINALIZE
}
if err != nil {
return nil, err
}
}
}
FINALIZE:
_, err = ipfsAdder.Finalize()
if err != nil {
return nil, err
}
root, err := a.dags.Finalize(a.ctx)
return root, err
}
func addFile(fs files.File, ipfsAdder *ipfsadd.Adder) error {
f, err := fs.NextFile()
if err != nil {
return err
}
logger.Debugf("ipfsAdder AddFile(%s)", f.FullPath())
return ipfsAdder.AddFile(f)
}

135
adder/adder_test.go Normal file
View File

@ -0,0 +1,135 @@
package adder
import (
"context"
"errors"
"mime/multipart"
"sync"
"testing"
"time"
"github.com/ipfs/ipfs-cluster/api"
"github.com/ipfs/ipfs-cluster/test"
cid "github.com/ipfs/go-cid"
ipld "github.com/ipfs/go-ipld-format"
)
type mockCDagServ struct {
BaseDAGService
resultCids map[string]struct{}
lastCid *cid.Cid
}
func (dag *mockCDagServ) Add(ctx context.Context, node ipld.Node) error {
dag.resultCids[node.Cid().String()] = struct{}{}
dag.lastCid = node.Cid()
return nil
}
func (dag *mockCDagServ) AddMany(ctx context.Context, nodes []ipld.Node) error {
for _, node := range nodes {
err := dag.Add(ctx, node)
if err != nil {
return err
}
}
return nil
}
func (dag *mockCDagServ) Finalize(ctx context.Context) (*cid.Cid, error) {
if dag.lastCid == nil {
return nil, errors.New("nothing added")
}
return dag.lastCid, nil
}
func TestAdder(t *testing.T) {
sth := test.NewShardingTestHelper()
defer sth.Clean()
mr := sth.GetTreeMultiReader(t)
r := multipart.NewReader(mr, mr.Boundary())
p := api.DefaultAddParams()
expectedCids := test.ShardingDirCids[:]
dags := &mockCDagServ{
resultCids: make(map[string]struct{}),
}
adder := New(context.Background(), dags, p, nil)
root, err := adder.FromMultipart(r)
if err != nil {
t.Fatal(err)
}
if root.String() != test.ShardingDirBalancedRootCID {
t.Error("expected the right content root")
}
if len(expectedCids) != len(dags.resultCids) {
t.Fatal("unexpected number of blocks imported")
}
for _, c := range expectedCids {
_, ok := dags.resultCids[c]
if !ok {
t.Fatal("unexpected block emitted:", c)
}
}
}
func TestAdder_DoubleStart(t *testing.T) {
sth := test.NewShardingTestHelper()
defer sth.Clean()
f := sth.GetTreeSerialFile(t)
p := api.DefaultAddParams()
dags := &mockCDagServ{
resultCids: make(map[string]struct{}),
}
adder := New(context.Background(), dags, p, nil)
_, err := adder.FromFiles(f)
if err != nil {
t.Fatal(err)
}
f = sth.GetTreeSerialFile(t)
_, err = adder.FromFiles(f)
if err == nil {
t.Fatal("expected an error: cannot run importer twice")
}
}
func TestAdder_ContextCancelled(t *testing.T) {
sth := test.NewShardingTestHelper()
defer sth.Clean()
mr := sth.GetRandFileMultiReader(t, 50000) // 50 MB
r := multipart.NewReader(mr, mr.Boundary())
p := api.DefaultAddParams()
dags := &mockCDagServ{
resultCids: make(map[string]struct{}),
}
ctx, cancel := context.WithCancel(context.Background())
adder := New(ctx, dags, p, nil)
var wg sync.WaitGroup
wg.Add(1)
go func() {
defer wg.Done()
_, err := adder.FromMultipart(r)
if err == nil {
t.Error("expected a context cancelled error")
}
t.Log(err)
}()
time.Sleep(200 * time.Millisecond)
cancel()
wg.Wait()
}

View File

@ -1,119 +0,0 @@
package adder
import (
"context"
"errors"
"fmt"
"github.com/ipfs/ipfs-cluster/api"
cid "github.com/ipfs/go-cid"
ipld "github.com/ipfs/go-ipld-format"
)
var errNotFound = errors.New("dagservice: block not found")
func isNotFound(err error) bool {
return err == errNotFound
}
// adderDAGService implements a DAG Service and
// outputs any nodes added using this service to an Adder.
type adderDAGService struct {
addedSet *cid.Set
addedChan chan<- *api.NodeWithMeta
}
func newAdderDAGService(ch chan *api.NodeWithMeta) ipld.DAGService {
set := cid.NewSet()
return &adderDAGService{
addedSet: set,
addedChan: ch,
}
}
// Add passes the provided node through the output channel
func (dag *adderDAGService) Add(ctx context.Context, node ipld.Node) error {
// FIXME ? This set will grow in memory.
// Maybe better to use a bloom filter
ok := dag.addedSet.Visit(node.Cid())
if !ok {
// don't re-add
return nil
}
size, err := node.Size()
if err != nil {
return err
}
nodeSerial := api.NodeWithMeta{
Cid: node.Cid().String(),
Data: node.RawData(),
CumSize: size,
}
select {
case dag.addedChan <- &nodeSerial:
return nil
case <-ctx.Done():
close(dag.addedChan)
return errors.New("canceled context preempted dagservice add")
}
}
// AddMany passes the provided nodes through the output channel
func (dag *adderDAGService) AddMany(ctx context.Context, nodes []ipld.Node) error {
for _, node := range nodes {
err := dag.Add(ctx, node)
if err != nil {
return err
}
}
return nil
}
// Get always returns errNotFound
func (dag *adderDAGService) Get(ctx context.Context, key *cid.Cid) (ipld.Node, error) {
return nil, errNotFound
}
// GetMany returns an output channel that always emits an error
func (dag *adderDAGService) GetMany(ctx context.Context, keys []*cid.Cid) <-chan *ipld.NodeOption {
out := make(chan *ipld.NodeOption, 1)
out <- &ipld.NodeOption{Err: fmt.Errorf("failed to fetch all nodes")}
close(out)
return out
}
// Remove is a nop
func (dag *adderDAGService) Remove(ctx context.Context, key *cid.Cid) error {
return nil
}
// RemoveMany is a nop
func (dag *adderDAGService) RemoveMany(ctx context.Context, keys []*cid.Cid) error {
return nil
}
// // printDAGService will "add" a node by printing it. printDAGService cannot Get nodes
// // that have already been seen and calls to Remove are noops. Nodes are
// // recorded after being added so that they will only be printed once.
// type printDAGService struct {
// ads ipld.DAGService
// }
// func newPDagService() *printDAGService {
// ch := make(chan *api.NodeWithMeta)
// ads := newAdderDAGService(ch)
// go func() {
// for n := range ch {
// fmt.Printf(n.Cid, " | ", n.Size)
// }
// }()
// return &printDAGService{
// ads: ads,
// }
// }

View File

@ -1,171 +0,0 @@
package adder
import (
"context"
"errors"
"io"
"sync"
"github.com/ipfs/ipfs-cluster/adder/ipfsadd"
"github.com/ipfs/ipfs-cluster/api"
"github.com/ipfs/go-ipfs-cmdkit/files"
)
// BlockHandler is a function used to process a block as is received by the
// Importer. Used in Importer.Run().
type BlockHandler func(ctx context.Context, n *api.NodeWithMeta) (string, error)
// Importer facilitates converting a file into a stream
// of chunked blocks.
type Importer struct {
startedMux sync.Mutex
started bool
files files.File
params *api.AddParams
output chan *api.AddedOutput
blocks chan *api.NodeWithMeta
errors chan error
}
// NewImporter sets up an Importer ready to Go().
func NewImporter(f files.File, p *api.AddParams, out chan *api.AddedOutput) (*Importer, error) {
blocks := make(chan *api.NodeWithMeta, 1)
errors := make(chan error, 1)
return &Importer{
started: false,
files: f,
params: p,
output: out,
blocks: blocks,
errors: errors,
}, nil
}
// Blocks returns a channel where each imported block is sent.
func (imp *Importer) Blocks() <-chan *api.NodeWithMeta {
return imp.blocks
}
// Errors returns a channel to which any errors during the import
// process are sent.
func (imp *Importer) Errors() <-chan error {
return imp.errors
}
func (imp *Importer) start() bool {
imp.startedMux.Lock()
defer imp.startedMux.Unlock()
retVal := imp.started
imp.started = true
return !retVal
}
func (imp *Importer) addFile(ipfsAdder *ipfsadd.Adder) error {
f, err := imp.files.NextFile()
if err != nil {
return err
}
logger.Debugf("ipfsAdder AddFile(%s)", f.FullPath())
return ipfsAdder.AddFile(f)
}
func (imp *Importer) addFiles(ctx context.Context, ipfsAdder *ipfsadd.Adder) {
defer close(imp.blocks)
defer close(imp.errors)
for {
select {
case <-ctx.Done():
imp.errors <- ctx.Err()
return
default:
err := imp.addFile(ipfsAdder)
if err == io.EOF {
goto FINALIZE
}
if err != nil {
imp.errors <- err
return
}
}
}
FINALIZE:
_, err := ipfsAdder.Finalize()
if err != nil {
imp.errors <- err
}
}
// Go starts a goroutine which reads the blocks as outputted by the
// ipfsadd module called with the parameters of this importer. The blocks,
// errors and output are placed in the respective importer channels for
// further processing. When there are no more blocks, or an error happen,
// the channels will be closed.
func (imp *Importer) Go(ctx context.Context) error {
if !imp.start() {
return errors.New("importing process already started or finished")
}
dagsvc := newAdderDAGService(imp.blocks)
ipfsAdder, err := ipfsadd.NewAdder(ctx, dagsvc)
if err != nil {
return err
}
ipfsAdder.Hidden = imp.params.Hidden
ipfsAdder.Trickle = imp.params.Layout == "trickle"
ipfsAdder.RawLeaves = imp.params.RawLeaves
ipfsAdder.Wrap = true
ipfsAdder.Chunker = imp.params.Chunker
ipfsAdder.Out = imp.output
go imp.addFiles(ctx, ipfsAdder)
return nil
}
// Run triggers the importing process (calling Go) and calls the given BlockHandler
// on every node read from the importer.
// It returns the value returned by the last-called BlockHandler.
func (imp *Importer) Run(ctx context.Context, blockF BlockHandler) (string, error) {
var retVal string
errors := imp.Errors()
blocks := imp.Blocks()
err := imp.Go(ctx)
if err != nil {
return retVal, err
}
for {
select {
case <-ctx.Done():
return retVal, ctx.Err()
case err, ok := <-errors:
if ok {
logger.Error(err)
return retVal, err
}
case node, ok := <-blocks:
if !ok {
goto BREAK // finished importing
}
retVal, err = blockF(ctx, node)
if err != nil {
return retVal, err
}
}
}
BREAK:
// grab any last errors from errors if necessary
// (this waits for errors to be closed)
err = <-errors
return retVal, err
}

View File

@ -1,92 +0,0 @@
package adder
import (
"context"
"testing"
"github.com/ipfs/ipfs-cluster/api"
"github.com/ipfs/ipfs-cluster/test"
)
func TestImporter(t *testing.T) {
sth := test.NewShardingTestHelper()
defer sth.Clean()
f := sth.GetTreeSerialFile(t)
p := api.DefaultAddParams()
out := make(chan *api.AddedOutput)
go func() {
for range out {
}
}()
imp, err := NewImporter(f, p, out)
if err != nil {
t.Fatal(err)
}
expectedCids := test.ShardingDirCids[:]
resultCids := make(map[string]struct{})
blockHandler := func(ctx context.Context, n *api.NodeWithMeta) (string, error) {
resultCids[n.Cid] = struct{}{}
return n.Cid, nil
}
_, err = imp.Run(context.Background(), blockHandler)
if err != nil {
t.Fatal(err)
}
// for i, c := range expectedCids {
// fmt.Printf("%d: %s\n", i, c)
// }
// for c := range resultCids {
// fmt.Printf("%s\n", c)
// }
if len(expectedCids) != len(resultCids) {
t.Fatal("unexpected number of blocks imported")
}
for _, c := range expectedCids {
_, ok := resultCids[c]
if !ok {
t.Fatal("unexpected block emitted:", c)
}
}
}
func TestImporter_DoubleStart(t *testing.T) {
sth := test.NewShardingTestHelper()
defer sth.Clean()
f := sth.GetTreeSerialFile(t)
p := api.DefaultAddParams()
out := make(chan *api.AddedOutput)
go func() {
for range out {
}
}()
imp, err := NewImporter(f, p, out)
if err != nil {
t.Fatal(err)
}
blockHandler := func(ctx context.Context, n *api.NodeWithMeta) (string, error) {
return "", nil
}
_, err = imp.Run(context.Background(), blockHandler)
if err != nil {
t.Fatal(err)
}
_, err = imp.Run(context.Background(), blockHandler)
if err == nil {
t.Fatal("expected an error: cannot run importer twice")
}
}

View File

@ -1,155 +0,0 @@
// Package local implements an ipfs-cluster Adder that chunks and adds content
// to a local peer, before pinning it.
package local
import (
"context"
"errors"
"mime/multipart"
"github.com/ipfs/ipfs-cluster/adder"
"github.com/ipfs/ipfs-cluster/api"
"github.com/ipfs/ipfs-cluster/rpcutil"
rpc "github.com/hsanjuan/go-libp2p-gorpc"
cid "github.com/ipfs/go-cid"
files "github.com/ipfs/go-ipfs-cmdkit/files"
logging "github.com/ipfs/go-log"
peer "github.com/libp2p/go-libp2p-peer"
)
var logger = logging.Logger("addlocal")
var outputBuffer = 200
// Adder is an implementation of IPFS Cluster Adder interface,
// which allows adding content directly to IPFS daemons attached
// to the Cluster (without sharding).
type Adder struct {
rpcClient *rpc.Client
output chan *api.AddedOutput
}
// New returns a new Adder with the given rpc Client. The client is used
// to perform calls to IPFSBlockPut and Pin content on Cluster.
func New(rpc *rpc.Client, discardOutput bool) *Adder {
output := make(chan *api.AddedOutput, outputBuffer)
if discardOutput {
go func() {
for range output {
}
}()
}
return &Adder{
rpcClient: rpc,
output: output,
}
}
// Output returns a channel for output updates during the adding process.
func (a *Adder) Output() <-chan *api.AddedOutput {
return a.output
}
func (a *Adder) putBlock(ctx context.Context, n *api.NodeWithMeta, dests []peer.ID) error {
logger.Debugf("put block: %s", n.Cid)
c, err := cid.Decode(n.Cid)
if err != nil {
return err
}
format, ok := cid.CodecToStr[c.Type()]
if !ok {
format = ""
logger.Warning("unsupported cid type, treating as v0")
}
if c.Prefix().Version == 0 {
format = "v0"
}
n.Format = format
ctxs, cancels := rpcutil.CtxsWithCancel(ctx, len(dests))
defer rpcutil.MultiCancel(cancels)
logger.Debugf("block put %s", n.Cid)
errs := a.rpcClient.MultiCall(
ctxs,
dests,
"Cluster",
"IPFSBlockPut",
*n,
rpcutil.RPCDiscardReplies(len(dests)),
)
return rpcutil.CheckErrs(errs)
}
// FromMultipart allows to add a file encoded as multipart.
func (a *Adder) FromMultipart(ctx context.Context, r *multipart.Reader, p *api.AddParams) (*cid.Cid, error) {
f := &files.MultipartFile{
Mediatype: "multipart/form-data",
Reader: r,
}
defer close(a.output)
defer f.Close()
ctxRun, cancelRun := context.WithCancel(ctx)
defer cancelRun()
var allocsStr []string
err := a.rpcClient.CallContext(
ctx,
"",
"Cluster",
"Allocate",
api.PinWithOpts(nil, p.PinOptions).ToSerial(),
&allocsStr,
)
if err != nil {
return nil, err
}
allocations := api.StringsToPeers(allocsStr)
localBlockPut := func(ctx context.Context, n *api.NodeWithMeta) (string, error) {
retVal := n.Cid
return retVal, a.putBlock(ctx, n, allocations)
}
importer, err := adder.NewImporter(f, p, a.output)
if err != nil {
return nil, err
}
lastCidStr, err := importer.Run(ctxRun, localBlockPut)
if err != nil {
cancelRun()
return nil, err
}
lastCid, err := cid.Decode(lastCidStr)
if err != nil {
return nil, errors.New("nothing imported: invalid Cid")
}
// Finally, cluster pin the result
pinS := api.PinSerial{
Cid: lastCidStr,
Type: int(api.DataType),
MaxDepth: -1,
PinOptions: api.PinOptions{
ReplicationFactorMin: p.ReplicationFactorMin,
ReplicationFactorMax: p.ReplicationFactorMax,
Name: p.Name,
},
}
err = a.rpcClient.CallContext(
ctx,
"",
"Cluster",
"Pin",
pinS,
&struct{}{},
)
return lastCid, err
}

116
adder/local/dag_service.go Normal file
View File

@ -0,0 +1,116 @@
// Package local implements a ClusterDAGService that chunks and adds content
// to a local peer, before pinning it.
package local
import (
"context"
"errors"
adder "github.com/ipfs/ipfs-cluster/adder"
"github.com/ipfs/ipfs-cluster/api"
rpc "github.com/hsanjuan/go-libp2p-gorpc"
cid "github.com/ipfs/go-cid"
ipld "github.com/ipfs/go-ipld-format"
logging "github.com/ipfs/go-log"
peer "github.com/libp2p/go-libp2p-peer"
)
var errNotFound = errors.New("dagservice: block not found")
var logger = logging.Logger("localdags")
// DAGService is an implementation of an adder.ClusterDAGService which
// puts the added blocks directly in the peers allocated to them (without
// sharding).
type DAGService struct {
adder.BaseDAGService
rpcClient *rpc.Client
dests []peer.ID
pinOpts api.PinOptions
lastCid *cid.Cid
}
// New returns a new Adder with the given rpc Client. The client is used
// to perform calls to IPFSBlockPut and Pin content on Cluster.
func New(rpc *rpc.Client, opts api.PinOptions) *DAGService {
return &DAGService{
rpcClient: rpc,
dests: nil,
pinOpts: opts,
}
}
// Add puts the given node in the destination peers.
func (dag *DAGService) Add(ctx context.Context, node ipld.Node) error {
if dag.dests == nil {
// Find where to allocate this file
var allocsStr []string
err := dag.rpcClient.CallContext(
ctx,
"",
"Cluster",
"Allocate",
api.PinWithOpts(nil, dag.pinOpts).ToSerial(),
&allocsStr,
)
if err != nil {
return err
}
dag.dests = api.StringsToPeers(allocsStr)
}
size, err := node.Size()
if err != nil {
return err
}
nodeSerial := &api.NodeWithMeta{
Cid: node.Cid().String(),
Data: node.RawData(),
CumSize: size,
}
dag.lastCid = node.Cid()
return adder.PutBlock(ctx, dag.rpcClient, nodeSerial, dag.dests)
}
// Finalize pins the last Cid added to this DAGService.
func (dag *DAGService) Finalize(ctx context.Context) (*cid.Cid, error) {
if dag.lastCid == nil {
return nil, errors.New("nothing was added")
}
// Cluster pin the result
pinS := api.PinSerial{
Cid: dag.lastCid.String(),
Type: int(api.DataType),
MaxDepth: -1,
PinOptions: dag.pinOpts,
}
dag.dests = nil
return dag.lastCid, dag.rpcClient.CallContext(
ctx,
"",
"Cluster",
"Pin",
pinS,
&struct{}{},
)
}
// AddMany calls Add for every given node.
func (dag *DAGService) AddMany(ctx context.Context, nodes []ipld.Node) error {
for _, node := range nodes {
err := dag.Add(ctx, node)
if err != nil {
return err
}
}
return nil
}

View File

@ -7,6 +7,7 @@ import (
"sync"
"testing"
adder "github.com/ipfs/ipfs-cluster/adder"
"github.com/ipfs/ipfs-cluster/api"
"github.com/ipfs/ipfs-cluster/test"
@ -36,7 +37,7 @@ func (rpcs *testRPC) Allocate(ctx context.Context, in api.PinSerial, out *[]stri
return nil
}
func TestFromMultipart(t *testing.T) {
func TestAdd(t *testing.T) {
t.Run("balanced", func(t *testing.T) {
rpcObj := &testRPC{}
server := rpc.NewServer(nil, "mock")
@ -45,16 +46,17 @@ func TestFromMultipart(t *testing.T) {
t.Fatal(err)
}
client := rpc.NewClientWithServer(nil, "mock", server)
params := api.DefaultAddParams()
add := New(client, true)
dags := New(client, params.PinOptions)
add := adder.New(context.Background(), dags, params, nil)
sth := test.NewShardingTestHelper()
defer sth.Clean()
mr := sth.GetTreeMultiReader(t)
r := multipart.NewReader(mr, mr.Boundary())
params := api.DefaultAddParams()
params.ShardSize = 0
rootCid, err := add.FromMultipart(context.Background(), r, api.DefaultAddParams())
rootCid, err := add.FromMultipart(r)
if err != nil {
t.Fatal(err)
}
@ -85,16 +87,18 @@ func TestFromMultipart(t *testing.T) {
t.Fatal(err)
}
client := rpc.NewClientWithServer(nil, "mock", server)
params := api.DefaultAddParams()
params.Layout = "trickle"
dags := New(client, params.PinOptions)
add := adder.New(context.Background(), dags, params, nil)
add := New(client, true)
sth := test.NewShardingTestHelper()
defer sth.Clean()
mr := sth.GetTreeMultiReader(t)
r := multipart.NewReader(mr, mr.Boundary())
p := api.DefaultAddParams()
p.Layout = "trickle"
rootCid, err := add.FromMultipart(context.Background(), r, p)
rootCid, err := add.FromMultipart(r)
if err != nil {
t.Fatal(err)
}

View File

@ -1,128 +0,0 @@
// Package sharding implements a sharding adder that chunks and
// shards content while it's added, creating Cluster DAGs and
// pinning them.
package sharding
import (
"context"
"fmt"
"mime/multipart"
"github.com/ipfs/ipfs-cluster/adder"
"github.com/ipfs/ipfs-cluster/api"
rpc "github.com/hsanjuan/go-libp2p-gorpc"
cid "github.com/ipfs/go-cid"
files "github.com/ipfs/go-ipfs-cmdkit/files"
logging "github.com/ipfs/go-log"
)
var logger = logging.Logger("addshard")
var outputBuffer = 200
// Adder is an implementation of IPFS Cluster's Adder interface which
// shards content while adding among several IPFS Cluster peers,
// creating a Cluster DAG to track and pin that content selectively
// in the IPFS daemons allocated to it.
type Adder struct {
rpcClient *rpc.Client
output chan *api.AddedOutput
}
// New returns a new Adder, which uses the given rpc client to perform
// Allocate, IPFSBlockPut and Pin requests to other cluster components.
func New(rpc *rpc.Client, discardOutput bool) *Adder {
output := make(chan *api.AddedOutput, outputBuffer)
if discardOutput {
go func() {
for range output {
}
}()
}
return &Adder{
rpcClient: rpc,
output: output,
}
}
// Output returns a channel for output updates during the adding process.
func (a *Adder) Output() <-chan *api.AddedOutput {
return a.output
}
// FromMultipart allows to add (and shard) a file encoded as multipart.
func (a *Adder) FromMultipart(ctx context.Context, r *multipart.Reader, p *api.AddParams) (*cid.Cid, error) {
logger.Debugf("adding from multipart with params: %+v", p)
f := &files.MultipartFile{
Mediatype: "multipart/form-data",
Reader: r,
}
defer close(a.output)
defer f.Close()
ctxRun, cancelRun := context.WithCancel(ctx)
defer cancelRun()
pinOpts := api.PinOptions{
ReplicationFactorMin: p.ReplicationFactorMin,
ReplicationFactorMax: p.ReplicationFactorMax,
Name: p.Name,
ShardSize: p.ShardSize,
}
dagBuilder := newClusterDAGBuilder(a.rpcClient, pinOpts, a.output)
// Always stop the builder
defer dagBuilder.Cancel()
blockHandle := func(ctx context.Context, n *api.NodeWithMeta) (string, error) {
logger.Debugf("handling block %s (size %d)", n.Cid, n.Size())
select {
case <-dagBuilder.Done():
return "", dagBuilder.Err()
case <-ctx.Done():
return "", ctx.Err()
case dagBuilder.Blocks() <- n:
return n.Cid, nil
}
}
logger.Debug("creating importer")
importer, err := adder.NewImporter(f, p, a.output)
if err != nil {
return nil, err
}
logger.Infof("importing file to Cluster (name '%s')", p.Name)
rootCidStr, err := importer.Run(ctxRun, blockHandle)
if err != nil {
cancelRun()
logger.Error("Importing aborted: ", err)
return nil, err
}
// Trigger shard finalize
close(dagBuilder.Blocks())
select {
case <-dagBuilder.Done(): // wait for the builder to finish
err = dagBuilder.Err()
case <-ctx.Done():
err = ctx.Err()
}
if err != nil {
logger.Info("import process finished with error: ", err)
return nil, err
}
rootCid, err := cid.Decode(rootCidStr)
if err != nil {
return nil, fmt.Errorf("bad root cid: %s", err)
}
logger.Info("import process finished successfully")
return rootCid, nil
}

View File

@ -1,352 +0,0 @@
package sharding
import (
"context"
"errors"
"fmt"
"time"
"github.com/ipfs/ipfs-cluster/api"
"github.com/ipfs/ipfs-cluster/rpcutil"
humanize "github.com/dustin/go-humanize"
rpc "github.com/hsanjuan/go-libp2p-gorpc"
cid "github.com/ipfs/go-cid"
peer "github.com/libp2p/go-libp2p-peer"
)
// A clusterDAGBuilder is in charge of creating a full cluster dag upon receiving
// a stream of blocks (NodeWithMeta)
type clusterDAGBuilder struct {
ctx context.Context
cancel context.CancelFunc
error error
pinOpts api.PinOptions
rpc *rpc.Client
blocks chan *api.NodeWithMeta
// Current shard being built
currentShard *shard
// Last flushed shard CID
previousShard *cid.Cid
// shard tracking
shards map[string]*cid.Cid
startTime time.Time
totalSize uint64
output chan *api.AddedOutput
}
func newClusterDAGBuilder(rpc *rpc.Client, opts api.PinOptions, output chan *api.AddedOutput) *clusterDAGBuilder {
ctx, cancel := context.WithCancel(context.Background())
// By caching one node don't block sending something
// to the channel.
blocks := make(chan *api.NodeWithMeta, 0)
cdb := &clusterDAGBuilder{
ctx: ctx,
cancel: cancel,
rpc: rpc,
blocks: blocks,
pinOpts: opts,
shards: make(map[string]*cid.Cid),
startTime: time.Now(),
output: output,
}
go cdb.ingestBlocks()
return cdb
}
// Blocks returns a channel on which to send blocks to be processed by this
// clusterDAGBuilder (ingested). Close channel when done.
func (cdb *clusterDAGBuilder) Blocks() chan<- *api.NodeWithMeta {
return cdb.blocks
}
// Done returns a channel that is closed when the clusterDAGBuilder has finished
// processing blocks. Use Err() to check for any errors after done.
func (cdb *clusterDAGBuilder) Done() <-chan struct{} {
return cdb.ctx.Done()
}
// Err returns any error after the clusterDAGBuilder is Done().
// Err always returns nil if not Done().
func (cdb *clusterDAGBuilder) Err() error {
select {
case <-cdb.ctx.Done():
return cdb.error
default:
return nil
}
}
// Cancel cancels the clusterDAGBulder and all associated operations
func (cdb *clusterDAGBuilder) Cancel() {
cdb.cancel()
}
// shortcut to pin something in Cluster
func (cdb *clusterDAGBuilder) pin(p api.Pin) error {
return cdb.rpc.CallContext(
cdb.ctx,
"",
"Cluster",
"Pin",
p.ToSerial(),
&struct{}{},
)
}
// flushes the cdb.currentShard and returns the LastLink()
func (cdb *clusterDAGBuilder) flushCurrentShard() (*cid.Cid, error) {
shard := cdb.currentShard
if shard == nil {
return nil, errors.New("cannot flush a nil shard")
}
lens := len(cdb.shards)
shardCid, err := shard.Flush(cdb.ctx, lens, cdb.previousShard)
if err != nil {
return shardCid, err
}
cdb.totalSize += shard.Size()
cdb.shards[fmt.Sprintf("%d", lens)] = shardCid
cdb.previousShard = shardCid
cdb.currentShard = nil
cdb.output <- &api.AddedOutput{
Name: fmt.Sprintf("shard-%d", lens),
Hash: shardCid.String(),
Size: fmt.Sprintf("%d", shard.Size()),
}
return shard.LastLink(), nil
}
// finalize is used to signal that we need to wrap up this clusterDAG
//. It is called when the Blocks() channel is closed.
func (cdb *clusterDAGBuilder) finalize() error {
dataRootCid, err := cdb.flushCurrentShard()
if err != nil {
return err
}
clusterDAGNodes, err := makeDAG(cdb.shards)
if err != nil {
return err
}
// PutDAG to ourselves
err = putDAG(cdb.ctx, cdb.rpc, clusterDAGNodes, []peer.ID{""})
if err != nil {
return err
}
clusterDAG := clusterDAGNodes[0].Cid()
cdb.output <- &api.AddedOutput{
Name: fmt.Sprintf("%s-clusterDAG", cdb.pinOpts.Name),
Hash: clusterDAG.String(),
Size: fmt.Sprintf("%d", cdb.totalSize),
}
// Pin the ClusterDAG
clusterDAGPin := api.PinCid(clusterDAG)
clusterDAGPin.ReplicationFactorMin = -1
clusterDAGPin.ReplicationFactorMax = -1
clusterDAGPin.MaxDepth = 0 // pin direct
clusterDAGPin.Name = fmt.Sprintf("%s-clusterDAG", cdb.pinOpts.Name)
clusterDAGPin.Type = api.ClusterDAGType
clusterDAGPin.Reference = dataRootCid
err = cdb.pin(clusterDAGPin)
if err != nil {
return err
}
// Pin the META pin
metaPin := api.PinWithOpts(dataRootCid, cdb.pinOpts)
metaPin.Type = api.MetaType
metaPin.Reference = clusterDAG
metaPin.MaxDepth = 0 // irrelevant. Meta-pins are not pinned
err = cdb.pin(metaPin)
if err != nil {
return err
}
// Log some stats
cdb.logStats(metaPin.Cid, clusterDAGPin.Cid)
// Consider doing this? Seems like overkill
//
// // Ammend ShardPins to reference clusterDAG root hash as a Parent
// shardParents := cid.NewSet()
// shardParents.Add(clusterDAG)
// for shardN, shard := range cdb.shardNodes {
// pin := api.PinWithOpts(shard, cdb.pinOpts)
// pin.Name := fmt.Sprintf("%s-shard-%s", pin.Name, shardN)
// pin.Type = api.ShardType
// pin.Parents = shardParents
// // FIXME: We don't know anymore the shard pin maxDepth
// // so we'd need to get the pin first.
// err := cdb.pin(pin)
// if err != nil {
// return err
// }
// }
return nil
}
// returns the value for continue in ingestBlocks()
func (cdb *clusterDAGBuilder) handleBlock(n *api.NodeWithMeta, more bool) bool {
if !more {
err := cdb.finalize()
if err != nil {
logger.Error(err)
cdb.error = err
}
return false
}
err := cdb.ingestBlock(n)
if err != nil {
logger.Error(err)
cdb.error = err
return false
}
return true
}
func (cdb *clusterDAGBuilder) ingestBlocks() {
// if this function returns, it means we are Done().
// we auto-cancel ourselves in that case.
// if it was due to an error, it will be in Err().
defer cdb.Cancel()
cont := true
for cont {
select {
case <-cdb.ctx.Done(): // cancelled from outside
return
case n, ok := <-cdb.blocks:
cont = cdb.handleBlock(n, ok)
}
}
}
// ingests a block to the current shard. If it get's full, it
// Flushes the shard and retries with a new one.
func (cdb *clusterDAGBuilder) ingestBlock(n *api.NodeWithMeta) error {
shard := cdb.currentShard
// if we have no currentShard, create one
if shard == nil {
logger.Infof("new shard for '%s': #%d", cdb.pinOpts.Name, len(cdb.shards))
var err error
shard, err = newShard(cdb.ctx, cdb.rpc, cdb.pinOpts)
if err != nil {
return err
}
cdb.currentShard = shard
}
logger.Debugf("ingesting block %s in shard %d (%s)", n.Cid, len(cdb.shards), cdb.pinOpts.Name)
c, err := cid.Decode(n.Cid)
if err != nil {
return err
}
// add the block to it if it fits and return
if shard.Size()+n.Size() < shard.Limit() {
shard.AddLink(c, n.Size())
return cdb.putBlock(n, shard.Allocations())
}
logger.Debugf("shard %d full: block: %d. shard: %d. limit: %d",
len(cdb.shards),
n.Size(),
shard.Size(),
shard.Limit(),
)
// -------
// Below: block DOES NOT fit in shard
// Flush and retry
// if shard is empty, error
if shard.Size() == 0 {
return errors.New("block doesn't fit in empty shard: shard size too small?")
}
_, err = cdb.flushCurrentShard()
if err != nil {
return err
}
return cdb.ingestBlock(n) // <-- retry ingest
}
// performs an IPFSBlockPut of this Node to the given destinations
func (cdb *clusterDAGBuilder) putBlock(n *api.NodeWithMeta, dests []peer.ID) error {
c, err := cid.Decode(n.Cid)
if err != nil {
return err
}
format, ok := cid.CodecToStr[c.Type()]
if !ok {
format = ""
logger.Warning("unsupported cid type, treating as v0")
}
if c.Prefix().Version == 0 {
format = "v0"
}
n.Format = format
ctxs, cancels := rpcutil.CtxsWithCancel(cdb.ctx, len(dests))
defer rpcutil.MultiCancel(cancels)
logger.Debugf("block put %s", n.Cid)
errs := cdb.rpc.MultiCall(
ctxs,
dests,
"Cluster",
"IPFSBlockPut",
*n,
rpcutil.RPCDiscardReplies(len(dests)),
)
return rpcutil.CheckErrs(errs)
}
func (cdb *clusterDAGBuilder) logStats(metaPin, clusterDAGPin *cid.Cid) {
duration := time.Since(cdb.startTime)
seconds := uint64(duration) / uint64(time.Second)
var rate string
if seconds == 0 {
rate = "∞ B"
} else {
rate = humanize.Bytes(cdb.totalSize / seconds)
}
logger.Infof(`sharding session sucessful:
CID: %s
ClusterDAG: %s
Total shards: %d
Total size: %s
Total time: %s
Ingest Rate: %s/s
`,
metaPin,
clusterDAGPin,
len(cdb.shards),
humanize.Bytes(cdb.totalSize),
duration,
rate,
)
}

View File

@ -0,0 +1,293 @@
// Package sharding implements a sharding ClusterDAGService places
// content in different shards while it's being added, creating
// a final Cluster DAG and pinning it.
package sharding
import (
"context"
"errors"
"fmt"
"time"
"github.com/ipfs/ipfs-cluster/adder"
"github.com/ipfs/ipfs-cluster/api"
humanize "github.com/dustin/go-humanize"
rpc "github.com/hsanjuan/go-libp2p-gorpc"
cid "github.com/ipfs/go-cid"
ipld "github.com/ipfs/go-ipld-format"
logging "github.com/ipfs/go-log"
peer "github.com/libp2p/go-libp2p-peer"
)
var errNotFound = errors.New("dagservice: block not found")
var logger = logging.Logger("shardingdags")
// DAGService is an implementation of a ClusterDAGService which
// shards content while adding among several IPFS Cluster peers,
// creating a Cluster DAG to track and pin that content selectively
// in the IPFS daemons allocated to it.
type DAGService struct {
adder.BaseDAGService
rpcClient *rpc.Client
pinOpts api.PinOptions
output chan<- *api.AddedOutput
addedSet *cid.Set
// Current shard being built
currentShard *shard
// Last flushed shard CID
previousShard *cid.Cid
// shard tracking
shards map[string]*cid.Cid
startTime time.Time
totalSize uint64
}
// New returns a new ClusterDAGService, which uses the given rpc client to perform
// Allocate, IPFSBlockPut and Pin requests to other cluster components.
func New(rpc *rpc.Client, opts api.PinOptions, out chan<- *api.AddedOutput) *DAGService {
return &DAGService{
rpcClient: rpc,
pinOpts: opts,
output: out,
addedSet: cid.NewSet(),
shards: make(map[string]*cid.Cid),
startTime: time.Now(),
}
}
// Add puts the given node in its corresponding shard and sends it to the
// destination peers.
func (dag *DAGService) Add(ctx context.Context, node ipld.Node) error {
// FIXME: This will grow in memory
if !dag.addedSet.Visit(node.Cid()) {
return nil
}
size, err := node.Size()
if err != nil {
return err
}
nodeSerial := &api.NodeWithMeta{
Cid: node.Cid().String(),
Data: node.RawData(),
CumSize: size,
}
return dag.ingestBlock(ctx, nodeSerial)
}
// Finalize finishes sharding, creates the cluster DAG and pins it along
// with the meta pin for the root node of the content.
func (dag *DAGService) Finalize(ctx context.Context) (*cid.Cid, error) {
dataRootCid, err := dag.flushCurrentShard(ctx)
if err != nil {
return dataRootCid, err
}
clusterDAGNodes, err := makeDAG(dag.shards)
if err != nil {
return dataRootCid, err
}
// PutDAG to ourselves
err = putDAG(ctx, dag.rpcClient, clusterDAGNodes, []peer.ID{""})
if err != nil {
return dataRootCid, err
}
clusterDAG := clusterDAGNodes[0].Cid()
dag.sendOutput(&api.AddedOutput{
Name: fmt.Sprintf("%s-clusterDAG", dag.pinOpts.Name),
Hash: clusterDAG.String(),
Size: fmt.Sprintf("%d", dag.totalSize),
})
// Pin the ClusterDAG
clusterDAGPin := api.PinCid(clusterDAG)
clusterDAGPin.ReplicationFactorMin = -1
clusterDAGPin.ReplicationFactorMax = -1
clusterDAGPin.MaxDepth = 0 // pin direct
clusterDAGPin.Name = fmt.Sprintf("%s-clusterDAG", dag.pinOpts.Name)
clusterDAGPin.Type = api.ClusterDAGType
clusterDAGPin.Reference = dataRootCid
err = dag.pin(ctx, clusterDAGPin)
if err != nil {
return dataRootCid, err
}
// Pin the META pin
metaPin := api.PinWithOpts(dataRootCid, dag.pinOpts)
metaPin.Type = api.MetaType
metaPin.Reference = clusterDAG
metaPin.MaxDepth = 0 // irrelevant. Meta-pins are not pinned
err = dag.pin(ctx, metaPin)
if err != nil {
return dataRootCid, err
}
// Log some stats
dag.logStats(metaPin.Cid, clusterDAGPin.Cid)
// Consider doing this? Seems like overkill
//
// // Ammend ShardPins to reference clusterDAG root hash as a Parent
// shardParents := cid.NewSet()
// shardParents.Add(clusterDAG)
// for shardN, shard := range dag.shardNodes {
// pin := api.PinWithOpts(shard, dag.pinOpts)
// pin.Name := fmt.Sprintf("%s-shard-%s", pin.Name, shardN)
// pin.Type = api.ShardType
// pin.Parents = shardParents
// // FIXME: We don't know anymore the shard pin maxDepth
// // so we'd need to get the pin first.
// err := dag.pin(pin)
// if err != nil {
// return err
// }
// }
return dataRootCid, nil
}
// ingests a block to the current shard. If it get's full, it
// Flushes the shard and retries with a new one.
func (dag *DAGService) ingestBlock(ctx context.Context, n *api.NodeWithMeta) error {
shard := dag.currentShard
// if we have no currentShard, create one
if shard == nil {
logger.Infof("new shard for '%s': #%d", dag.pinOpts.Name, len(dag.shards))
var err error
shard, err = newShard(ctx, dag.rpcClient, dag.pinOpts)
if err != nil {
return err
}
dag.currentShard = shard
}
logger.Debugf("ingesting block %s in shard %d (%s)", n.Cid, len(dag.shards), dag.pinOpts.Name)
c, err := cid.Decode(n.Cid)
if err != nil {
return err
}
// add the block to it if it fits and return
if shard.Size()+n.Size() < shard.Limit() {
shard.AddLink(c, n.Size())
return adder.PutBlock(ctx, dag.rpcClient, n, shard.Allocations())
}
logger.Debugf("shard %d full: block: %d. shard: %d. limit: %d",
len(dag.shards),
n.Size(),
shard.Size(),
shard.Limit(),
)
// -------
// Below: block DOES NOT fit in shard
// Flush and retry
// if shard is empty, error
if shard.Size() == 0 {
return errors.New("block doesn't fit in empty shard: shard size too small?")
}
_, err = dag.flushCurrentShard(ctx)
if err != nil {
return err
}
return dag.ingestBlock(ctx, n) // <-- retry ingest
}
func (dag *DAGService) logStats(metaPin, clusterDAGPin *cid.Cid) {
duration := time.Since(dag.startTime)
seconds := uint64(duration) / uint64(time.Second)
var rate string
if seconds == 0 {
rate = "∞ B"
} else {
rate = humanize.Bytes(dag.totalSize / seconds)
}
logger.Infof(`sharding session sucessful:
CID: %s
ClusterDAG: %s
Total shards: %d
Total size: %s
Total time: %s
Ingest Rate: %s/s
`,
metaPin,
clusterDAGPin,
len(dag.shards),
humanize.Bytes(dag.totalSize),
duration,
rate,
)
}
func (dag *DAGService) sendOutput(ao *api.AddedOutput) {
if dag.output != nil {
dag.output <- ao
}
}
// flushes the dag.currentShard and returns the LastLink()
func (dag *DAGService) flushCurrentShard(ctx context.Context) (*cid.Cid, error) {
shard := dag.currentShard
if shard == nil {
return nil, errors.New("cannot flush a nil shard")
}
lens := len(dag.shards)
shardCid, err := shard.Flush(ctx, lens, dag.previousShard)
if err != nil {
return shardCid, err
}
dag.totalSize += shard.Size()
dag.shards[fmt.Sprintf("%d", lens)] = shardCid
dag.previousShard = shardCid
dag.currentShard = nil
dag.sendOutput(&api.AddedOutput{
Name: fmt.Sprintf("shard-%d", lens),
Hash: shardCid.String(),
Size: fmt.Sprintf("%d", shard.Size()),
})
return shard.LastLink(), nil
}
// shortcut to pin something in Cluster
func (dag *DAGService) pin(ctx context.Context, p api.Pin) error {
return dag.rpcClient.CallContext(
ctx,
"",
"Cluster",
"Pin",
p.ToSerial(),
&struct{}{},
)
}
// AddMany calls Add for every given node.
func (dag *DAGService) AddMany(ctx context.Context, nodes []ipld.Node) error {
for _, node := range nodes {
err := dag.Add(ctx, node)
if err != nil {
return err
}
}
return nil
}

View File

@ -7,12 +7,12 @@ import (
"sync"
"testing"
adder "github.com/ipfs/ipfs-cluster/adder"
"github.com/ipfs/ipfs-cluster/api"
"github.com/ipfs/ipfs-cluster/test"
rpc "github.com/hsanjuan/go-libp2p-gorpc"
cid "github.com/ipfs/go-cid"
files "github.com/ipfs/go-ipfs-cmdkit/files"
logging "github.com/ipfs/go-log"
)
@ -60,7 +60,7 @@ func (rpcs *testRPC) BlockGet(c *cid.Cid) ([]byte, error) {
return bI.([]byte), nil
}
func makeAdder(t *testing.T, multiReaderF func(*testing.T) *files.MultiFileReader) (*Adder, *testRPC, *multipart.Reader) {
func makeAdder(t *testing.T, params *api.AddParams) (*adder.Adder, *testRPC) {
rpcObj := &testRPC{}
server := rpc.NewServer(nil, "mock")
err := server.RegisterName("Cluster", rpcObj)
@ -69,17 +69,18 @@ func makeAdder(t *testing.T, multiReaderF func(*testing.T) *files.MultiFileReade
}
client := rpc.NewClientWithServer(nil, "mock", server)
add := New(client, false)
out := make(chan *api.AddedOutput, 1)
dags := New(client, params.PinOptions, out)
add := adder.New(context.Background(), dags, params, out)
go func() {
for v := range add.Output() {
for v := range out {
t.Logf("Output: Name: %s. Cid: %s. Size: %s", v.Name, v.Hash, v.Size)
}
}()
mr := multiReaderF(t)
r := multipart.NewReader(mr, mr.Boundary())
return add, rpcObj, r
return add, rpcObj
}
func TestFromMultipart(t *testing.T) {
@ -87,9 +88,6 @@ func TestFromMultipart(t *testing.T) {
defer sth.Clean()
t.Run("Test tree", func(t *testing.T) {
add, rpcObj, r := makeAdder(t, sth.GetTreeMultiReader)
_ = rpcObj
p := api.DefaultAddParams()
// Total data is about
p.ShardSize = 1024 * 300 // 300kB
@ -98,7 +96,13 @@ func TestFromMultipart(t *testing.T) {
p.ReplicationFactorMin = 1
p.ReplicationFactorMax = 2
rootCid, err := add.FromMultipart(context.Background(), r, p)
add, rpcObj := makeAdder(t, p)
_ = rpcObj
mr := sth.GetTreeMultiReader(t)
r := multipart.NewReader(mr, mr.Boundary())
rootCid, err := add.FromMultipart(r)
if err != nil {
t.Fatal(err)
}
@ -145,12 +149,6 @@ func TestFromMultipart(t *testing.T) {
})
t.Run("Test file", func(t *testing.T) {
mrF := func(t *testing.T) *files.MultiFileReader {
return sth.GetRandFileMultiReader(t, 1024*50) // 50 MB
}
add, rpcObj, r := makeAdder(t, mrF)
_ = rpcObj
p := api.DefaultAddParams()
// Total data is about
p.ShardSize = 1024 * 1024 * 2 // 2MB
@ -159,7 +157,13 @@ func TestFromMultipart(t *testing.T) {
p.ReplicationFactorMin = 1
p.ReplicationFactorMax = 2
rootCid, err := add.FromMultipart(context.Background(), r, p)
add, rpcObj := makeAdder(t, p)
_ = rpcObj
mr := sth.GetRandFileMultiReader(t, 1024*50) // 50 MB
r := multipart.NewReader(mr, mr.Boundary())
rootCid, err := add.FromMultipart(r)
if err != nil {
t.Fatal(err)
}
@ -233,21 +237,16 @@ func TestFromMultipart_Errors(t *testing.T) {
sth := test.NewShardingTestHelper()
defer sth.Clean()
for _, tc := range tcs {
add, _, r := makeAdder(t, sth.GetTreeMultiReader)
_, err := add.FromMultipart(context.Background(), r, tc.params)
add, rpcObj := makeAdder(t, tc.params)
_ = rpcObj
f := sth.GetTreeSerialFile(t)
_, err := add.FromFiles(f)
if err == nil {
t.Error(tc.name, ": expected an error")
} else {
t.Log(tc.name, ":", err)
}
}
// Test running with a cancelled context
ctx, cancel := context.WithCancel(context.Background())
cancel()
add, _, r := makeAdder(t, sth.GetTreeMultiReader)
_, err := add.FromMultipart(ctx, r, api.DefaultAddParams())
if err != ctx.Err() {
t.Error("expected context error:", err)
}
}

80
adder/util.go Normal file
View File

@ -0,0 +1,80 @@
package adder
import (
"context"
"errors"
"fmt"
"github.com/ipfs/ipfs-cluster/api"
"github.com/ipfs/ipfs-cluster/rpcutil"
rpc "github.com/hsanjuan/go-libp2p-gorpc"
cid "github.com/ipfs/go-cid"
ipld "github.com/ipfs/go-ipld-format"
peer "github.com/libp2p/go-libp2p-peer"
)
// PutBlock sends a NodeWithMeta to the given destinations.
func PutBlock(ctx context.Context, rpc *rpc.Client, n *api.NodeWithMeta, dests []peer.ID) error {
logger.Debugf("put block: %s", n.Cid)
c, err := cid.Decode(n.Cid)
if err != nil {
return err
}
format, ok := cid.CodecToStr[c.Type()]
if !ok {
format = ""
logger.Warning("unsupported cid type, treating as v0")
}
if c.Prefix().Version == 0 {
format = "v0"
}
n.Format = format
ctxs, cancels := rpcutil.CtxsWithCancel(ctx, len(dests))
defer rpcutil.MultiCancel(cancels)
logger.Debugf("block put %s", n.Cid)
errs := rpc.MultiCall(
ctxs,
dests,
"Cluster",
"IPFSBlockPut",
*n,
rpcutil.RPCDiscardReplies(len(dests)),
)
return rpcutil.CheckErrs(errs)
}
// ErrDAGNotFound is returned whenever we try to get a block from the DAGService.
var ErrDAGNotFound = errors.New("dagservice: block not found")
// BaseDAGService partially implements an ipld.DAGService.
// It provides the methods which are not needed by ClusterDAGServices
// (Get*, Remove*) so that they can save adding this code.
type BaseDAGService struct {
}
// Get always returns errNotFound
func (dag BaseDAGService) Get(ctx context.Context, key *cid.Cid) (ipld.Node, error) {
return nil, ErrDAGNotFound
}
// GetMany returns an output channel that always emits an error
func (dag BaseDAGService) GetMany(ctx context.Context, keys []*cid.Cid) <-chan *ipld.NodeOption {
out := make(chan *ipld.NodeOption, 1)
out <- &ipld.NodeOption{Err: fmt.Errorf("failed to fetch all nodes")}
close(out)
return out
}
// Remove is a nop
func (dag BaseDAGService) Remove(ctx context.Context, key *cid.Cid) error {
return nil
}
// RemoveMany is a nop
func (dag BaseDAGService) RemoveMany(ctx context.Context, keys []*cid.Cid) error {
return nil
}

View File

@ -514,11 +514,13 @@ func (api *API) addHandler(w http.ResponseWriter, r *http.Request) {
return
}
var add adder.Adder
output := make(chan *types.AddedOutput, 200)
var dags adder.ClusterDAGService
if params.Shard {
add = sharding.New(api.rpcClient, false)
dags = sharding.New(api.rpcClient, params.PinOptions, output)
} else {
add = local.New(api.rpcClient, false)
dags = local.New(api.rpcClient, params.PinOptions)
}
enc := json.NewEncoder(w)
@ -529,7 +531,7 @@ func (api *API) addHandler(w http.ResponseWriter, r *http.Request) {
wg.Add(1)
go func() {
defer wg.Done()
for v := range add.Output() {
for v := range output {
err := enc.Encode(v)
if err != nil {
logger.Error(err)
@ -537,7 +539,8 @@ func (api *API) addHandler(w http.ResponseWriter, r *http.Request) {
}
}()
c, err := add.FromMultipart(api.ctx, reader, params)
add := adder.New(api.ctx, dags, params, output)
c, err := add.FromMultipart(reader)
_ = c
wg.Wait()

View File

@ -1113,13 +1113,14 @@ func (c *Cluster) unpinClusterDag(metaPin api.Pin) error {
// DAG can be added locally to the calling cluster peer's ipfs repo, or
// sharded across the entire cluster.
func (c *Cluster) AddFile(reader *multipart.Reader, params *api.AddParams) (*cid.Cid, error) {
var add adder.Adder
var dags adder.ClusterDAGService
if params.Shard {
add = sharding.New(c.rpcClient, true)
dags = sharding.New(c.rpcClient, params.PinOptions, nil)
} else {
add = local.New(c.rpcClient, true)
dags = local.New(c.rpcClient, params.PinOptions)
}
return add.FromMultipart(c.ctx, reader, params)
add := adder.New(c.ctx, dags, params, nil)
return add.FromMultipart(reader)
}
// Version returns the current IPFS Cluster version.

View File

@ -7,20 +7,20 @@ var logger = logging.Logger("cluster")
// LoggingFacilities provides a list of logging identifiers
// used by cluster and their default logging level.
var LoggingFacilities = map[string]string{
"cluster": "INFO",
"restapi": "INFO",
"ipfshttp": "INFO",
"monitor": "INFO",
"mapstate": "INFO",
"consensus": "INFO",
"pintracker": "INFO",
"ascendalloc": "INFO",
"diskinfo": "INFO",
"apitypes": "INFO",
"config": "INFO",
"addshard": "INFO",
"addlocal": "INFO",
"adder": "INFO",
"cluster": "INFO",
"restapi": "INFO",
"ipfshttp": "INFO",
"monitor": "INFO",
"mapstate": "INFO",
"consensus": "INFO",
"pintracker": "INFO",
"ascendalloc": "INFO",
"diskinfo": "INFO",
"apitypes": "INFO",
"config": "INFO",
"shardingdags": "INFO",
"localdags": "INFO",
"adder": "INFO",
}
// LoggingFacilitiesExtra provides logging identifiers