1d98538411
This commit fixes #810 and adds block streaming to the final destinations when adding. This should add major performance gains when adding data to clusters. Before, everytime cluster issued a block, it was broadcasted individually to all destinations (new libp2p stream), where it was block/put to IPFS (a single block/put http roundtrip per block). Now, blocks are streamed all the way from the adder module to the ipfs daemon, by making every block as it arrives a single part in a multipart block/put request. Before, block-broadcast needed to wait for all destinations to finish in order to process the next block. Now, buffers allow some destinations to be faster than others while sending and receiving blocks. Before, if a block put request failed to be broadcasted everywhere, an error would happen at that moment. Now, we keep streaming until the end and only then report any errors. The operation succeeds as long as at least one stream finished successfully. Errors block/putting to IPFS will not abort streams. Instead, subsequent blocks are retried with a new request, although the method will return an error when the stream finishes if there were errors at any point.
332 lines
8.6 KiB
Go
332 lines
8.6 KiB
Go
// Package adder implements functionality to add content to IPFS daemons
|
|
// managed by the Cluster.
|
|
package adder
|
|
|
|
import (
|
|
"context"
|
|
"errors"
|
|
"fmt"
|
|
"io"
|
|
"mime/multipart"
|
|
"strings"
|
|
|
|
"github.com/ipfs/go-unixfs"
|
|
"github.com/ipfs/ipfs-cluster/adder/ipfsadd"
|
|
"github.com/ipfs/ipfs-cluster/api"
|
|
"github.com/ipld/go-car"
|
|
peer "github.com/libp2p/go-libp2p-core/peer"
|
|
|
|
cid "github.com/ipfs/go-cid"
|
|
files "github.com/ipfs/go-ipfs-files"
|
|
cbor "github.com/ipfs/go-ipld-cbor"
|
|
ipld "github.com/ipfs/go-ipld-format"
|
|
logging "github.com/ipfs/go-log/v2"
|
|
merkledag "github.com/ipfs/go-merkledag"
|
|
multihash "github.com/multiformats/go-multihash"
|
|
)
|
|
|
|
var logger = logging.Logger("adder")
|
|
|
|
// go-merkledag does this, but it may be moved.
|
|
// We include for explicitness.
|
|
func init() {
|
|
ipld.Register(cid.DagProtobuf, merkledag.DecodeProtobufBlock)
|
|
ipld.Register(cid.Raw, merkledag.DecodeRawBlock)
|
|
ipld.Register(cid.DagCBOR, cbor.DecodeBlock)
|
|
}
|
|
|
|
// ClusterDAGService is an implementation of ipld.DAGService plus a Finalize
|
|
// method. ClusterDAGServices can be used to provide Adders with a different
|
|
// add implementation.
|
|
type ClusterDAGService interface {
|
|
ipld.DAGService
|
|
// Finalize receives the IPFS content root CID as
|
|
// returned by the ipfs adder.
|
|
Finalize(ctx context.Context, ipfsRoot cid.Cid) (cid.Cid, error)
|
|
// Allocations returns the allocations made by the cluster DAG service
|
|
// for the added content.
|
|
Allocations() []peer.ID
|
|
}
|
|
|
|
// A dagFormatter can create dags from files.Node. It can keep state
|
|
// to add several files to the same dag.
|
|
type dagFormatter interface {
|
|
Add(name string, f files.Node) (cid.Cid, error)
|
|
}
|
|
|
|
// Adder is used to add content to IPFS Cluster using an implementation of
|
|
// ClusterDAGService.
|
|
type Adder struct {
|
|
ctx context.Context
|
|
cancel context.CancelFunc
|
|
|
|
dgs ClusterDAGService
|
|
|
|
params api.AddParams
|
|
|
|
// AddedOutput updates are placed on this channel
|
|
// whenever a block is processed. They contain information
|
|
// about the block, the CID, the Name etc. and are mostly
|
|
// meant to be streamed back to the user.
|
|
output chan api.AddedOutput
|
|
}
|
|
|
|
// New returns a new Adder with the given ClusterDAGService, add options and a
|
|
// channel to send updates during the adding process.
|
|
//
|
|
// An Adder may only be used once.
|
|
func New(ds ClusterDAGService, p api.AddParams, out chan api.AddedOutput) *Adder {
|
|
// Discard all progress update output as the caller has not provided
|
|
// a channel for them to listen on.
|
|
if out == nil {
|
|
out = make(chan api.AddedOutput, 100)
|
|
go func() {
|
|
for range out {
|
|
}
|
|
}()
|
|
}
|
|
|
|
return &Adder{
|
|
dgs: ds,
|
|
params: p,
|
|
output: out,
|
|
}
|
|
}
|
|
|
|
func (a *Adder) setContext(ctx context.Context) {
|
|
if a.ctx == nil { // only allows first context
|
|
ctxc, cancel := context.WithCancel(ctx)
|
|
a.ctx = ctxc
|
|
a.cancel = cancel
|
|
}
|
|
}
|
|
|
|
// FromMultipart adds content from a multipart.Reader. The adder will
|
|
// no longer be usable after calling this method.
|
|
func (a *Adder) FromMultipart(ctx context.Context, r *multipart.Reader) (cid.Cid, error) {
|
|
logger.Debugf("adding from multipart with params: %+v", a.params)
|
|
|
|
f, err := files.NewFileFromPartReader(r, "multipart/form-data")
|
|
if err != nil {
|
|
return cid.Undef, err
|
|
}
|
|
defer f.Close()
|
|
return a.FromFiles(ctx, f)
|
|
}
|
|
|
|
// FromFiles adds content from a files.Directory. The adder will no longer
|
|
// be usable after calling this method.
|
|
func (a *Adder) FromFiles(ctx context.Context, f files.Directory) (cid.Cid, error) {
|
|
logger.Debug("adding from files")
|
|
a.setContext(ctx)
|
|
|
|
if a.ctx.Err() != nil { // don't allow running twice
|
|
return cid.Undef, a.ctx.Err()
|
|
}
|
|
|
|
defer a.cancel()
|
|
defer close(a.output)
|
|
|
|
var dagFmtr dagFormatter
|
|
var err error
|
|
switch a.params.Format {
|
|
case "", "unixfs":
|
|
dagFmtr, err = newIpfsAdder(ctx, a.dgs, a.params, a.output)
|
|
|
|
case "car":
|
|
dagFmtr, err = newCarAdder(ctx, a.dgs, a.params, a.output)
|
|
default:
|
|
err = errors.New("bad dag formatter option")
|
|
}
|
|
if err != nil {
|
|
return cid.Undef, err
|
|
}
|
|
|
|
// setup wrapping
|
|
if a.params.Wrap {
|
|
f = files.NewSliceDirectory(
|
|
[]files.DirEntry{files.FileEntry("", f)},
|
|
)
|
|
}
|
|
|
|
it := f.Entries()
|
|
var adderRoot cid.Cid
|
|
for it.Next() {
|
|
select {
|
|
case <-a.ctx.Done():
|
|
return cid.Undef, a.ctx.Err()
|
|
default:
|
|
logger.Debugf("ipfsAdder AddFile(%s)", it.Name())
|
|
|
|
adderRoot, err = dagFmtr.Add(it.Name(), it.Node())
|
|
if err != nil {
|
|
logger.Error("error adding to cluster: ", err)
|
|
return cid.Undef, err
|
|
}
|
|
}
|
|
// TODO (hector): We can only add a single CAR file for the
|
|
// moment.
|
|
if a.params.Format == "car" {
|
|
break
|
|
}
|
|
}
|
|
if it.Err() != nil {
|
|
return cid.Undef, it.Err()
|
|
}
|
|
|
|
clusterRoot, err := a.dgs.Finalize(a.ctx, adderRoot)
|
|
if err != nil {
|
|
logger.Error("error finalizing adder:", err)
|
|
return cid.Undef, err
|
|
}
|
|
logger.Infof("%s successfully added to cluster", clusterRoot)
|
|
return clusterRoot, nil
|
|
}
|
|
|
|
// A wrapper around the ipfsadd.Adder to satisfy the dagFormatter interface.
|
|
type ipfsAdder struct {
|
|
*ipfsadd.Adder
|
|
}
|
|
|
|
func newIpfsAdder(ctx context.Context, dgs ClusterDAGService, params api.AddParams, out chan api.AddedOutput) (*ipfsAdder, error) {
|
|
iadder, err := ipfsadd.NewAdder(ctx, dgs, dgs.Allocations)
|
|
if err != nil {
|
|
logger.Error(err)
|
|
return nil, err
|
|
}
|
|
|
|
iadder.Trickle = params.Layout == "trickle"
|
|
iadder.RawLeaves = params.RawLeaves
|
|
iadder.Chunker = params.Chunker
|
|
iadder.Out = out
|
|
iadder.Progress = params.Progress
|
|
iadder.NoCopy = params.NoCopy
|
|
|
|
// Set up prefi
|
|
prefix, err := merkledag.PrefixForCidVersion(params.CidVersion)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("bad CID Version: %s", err)
|
|
}
|
|
|
|
hashFunCode, ok := multihash.Names[strings.ToLower(params.HashFun)]
|
|
if !ok {
|
|
return nil, fmt.Errorf("unrecognized hash function: %s", params.HashFun)
|
|
}
|
|
prefix.MhType = hashFunCode
|
|
prefix.MhLength = -1
|
|
iadder.CidBuilder = &prefix
|
|
return &ipfsAdder{
|
|
Adder: iadder,
|
|
}, nil
|
|
}
|
|
|
|
func (ia *ipfsAdder) Add(name string, f files.Node) (cid.Cid, error) {
|
|
// In order to set the AddedOutput names right, we use
|
|
// OutputPrefix:
|
|
//
|
|
// When adding a folder, this is the root folder name which is
|
|
// prepended to the addedpaths. When adding a single file,
|
|
// this is the name of the file which overrides the empty
|
|
// AddedOutput name.
|
|
//
|
|
// After coreunix/add.go was refactored in go-ipfs and we
|
|
// followed suit, it no longer receives the name of the
|
|
// file/folder being added and does not emit AddedOutput
|
|
// events with the right names. We addressed this by adding
|
|
// OutputPrefix to our version. go-ipfs modifies emitted
|
|
// events before sending to user).
|
|
ia.OutputPrefix = name
|
|
|
|
nd, err := ia.AddAllAndPin(f)
|
|
if err != nil {
|
|
return cid.Undef, err
|
|
}
|
|
return nd.Cid(), nil
|
|
}
|
|
|
|
// An adder to add CAR files. It is at the moment very basic, and can
|
|
// add a single CAR file with a single root. Ideally, it should be able to
|
|
// add more complex, or several CARs by wrapping them with a single root.
|
|
// But for that we would need to keep state and track an MFS root similarly to
|
|
// what the ipfsadder does.
|
|
type carAdder struct {
|
|
ctx context.Context
|
|
dgs ClusterDAGService
|
|
params api.AddParams
|
|
output chan api.AddedOutput
|
|
}
|
|
|
|
func newCarAdder(ctx context.Context, dgs ClusterDAGService, params api.AddParams, out chan api.AddedOutput) (*carAdder, error) {
|
|
return &carAdder{
|
|
ctx: ctx,
|
|
dgs: dgs,
|
|
params: params,
|
|
output: out,
|
|
}, nil
|
|
}
|
|
|
|
// Add takes a node which should be a CAR file and nothing else and
|
|
// adds its blocks using the ClusterDAGService.
|
|
func (ca *carAdder) Add(name string, fn files.Node) (cid.Cid, error) {
|
|
if ca.params.Wrap {
|
|
return cid.Undef, errors.New("cannot wrap a CAR file upload")
|
|
}
|
|
|
|
f, ok := fn.(files.File)
|
|
if !ok {
|
|
return cid.Undef, errors.New("expected CAR file is not of type file")
|
|
}
|
|
carReader, err := car.NewCarReader(f)
|
|
if err != nil {
|
|
return cid.Undef, err
|
|
}
|
|
|
|
if len(carReader.Header.Roots) != 1 {
|
|
return cid.Undef, errors.New("only CAR files with a single root are supported")
|
|
}
|
|
|
|
root := carReader.Header.Roots[0]
|
|
bytes := uint64(0)
|
|
size := uint64(0)
|
|
|
|
for {
|
|
block, err := carReader.Next()
|
|
if err != nil && err != io.EOF {
|
|
return cid.Undef, err
|
|
} else if block == nil {
|
|
break
|
|
}
|
|
|
|
bytes += uint64(len(block.RawData()))
|
|
|
|
nd, err := ipld.Decode(block)
|
|
if err != nil {
|
|
return cid.Undef, err
|
|
}
|
|
|
|
// If the root is in the CAR and the root is a UnixFS
|
|
// node, then set the size in the output object.
|
|
if nd.Cid().Equals(root) {
|
|
ufs, err := unixfs.ExtractFSNode(nd)
|
|
if err == nil {
|
|
size = ufs.FileSize()
|
|
}
|
|
}
|
|
|
|
err = ca.dgs.Add(ca.ctx, nd)
|
|
if err != nil {
|
|
return cid.Undef, err
|
|
}
|
|
}
|
|
|
|
ca.output <- api.AddedOutput{
|
|
Name: name,
|
|
Cid: root,
|
|
Bytes: bytes,
|
|
Size: size,
|
|
Allocations: ca.dgs.Allocations(),
|
|
}
|
|
|
|
return root, nil
|
|
}
|