2019-08-28 09:50:55 +00:00
|
|
|
// Package single implements a ClusterDAGService that chunks and adds content
|
|
|
|
// to cluster without sharding, before pinning it.
|
|
|
|
package single
|
|
|
|
|
|
|
|
import (
|
|
|
|
"context"
|
|
|
|
|
2022-06-15 09:19:17 +00:00
|
|
|
adder "github.com/ipfs-cluster/ipfs-cluster/adder"
|
|
|
|
"github.com/ipfs-cluster/ipfs-cluster/api"
|
2019-08-28 09:50:55 +00:00
|
|
|
|
|
|
|
cid "github.com/ipfs/go-cid"
|
|
|
|
ipld "github.com/ipfs/go-ipld-format"
|
2020-03-13 20:40:02 +00:00
|
|
|
logging "github.com/ipfs/go-log/v2"
|
2019-08-28 09:50:55 +00:00
|
|
|
peer "github.com/libp2p/go-libp2p-core/peer"
|
|
|
|
rpc "github.com/libp2p/go-libp2p-gorpc"
|
|
|
|
)
|
|
|
|
|
|
|
|
var logger = logging.Logger("singledags")
|
2020-04-14 17:58:00 +00:00
|
|
|
var _ = logger // otherwise unused
|
2019-08-28 09:50:55 +00:00
|
|
|
|
|
|
|
// DAGService is an implementation of an adder.ClusterDAGService which
|
|
|
|
// puts the added blocks directly in the peers allocated to them (without
|
|
|
|
// sharding).
|
|
|
|
type DAGService struct {
|
|
|
|
adder.BaseDAGService
|
|
|
|
|
Adders: stream blocks to destinations
This commit fixes #810 and adds block streaming to the final destinations when
adding. This should add major performance gains when adding data to clusters.
Before, everytime cluster issued a block, it was broadcasted individually to
all destinations (new libp2p stream), where it was block/put to IPFS (a single
block/put http roundtrip per block).
Now, blocks are streamed all the way from the adder module to the ipfs daemon,
by making every block as it arrives a single part in a multipart block/put
request.
Before, block-broadcast needed to wait for all destinations to finish in order
to process the next block. Now, buffers allow some destinations to be faster
than others while sending and receiving blocks.
Before, if a block put request failed to be broadcasted everywhere, an error
would happen at that moment.
Now, we keep streaming until the end and only then report any errors. The
operation succeeds as long as at least one stream finished successfully.
Errors block/putting to IPFS will not abort streams. Instead, subsequent
blocks are retried with a new request, although the method will return an
error when the stream finishes if there were errors at any point.
2022-03-24 01:17:10 +00:00
|
|
|
ctx context.Context
|
2019-08-28 09:50:55 +00:00
|
|
|
rpcClient *rpc.Client
|
|
|
|
|
2022-02-28 18:44:04 +00:00
|
|
|
dests []peer.ID
|
|
|
|
addParams api.AddParams
|
|
|
|
local bool
|
2019-08-28 09:50:55 +00:00
|
|
|
|
2022-03-28 18:05:01 +00:00
|
|
|
bs *adder.BlockStreamer
|
|
|
|
blocks chan api.NodeWithMeta
|
|
|
|
recentBlocks *recentBlocks
|
2019-08-28 09:50:55 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// New returns a new Adder with the given rpc Client. The client is used
|
Adders: stream blocks to destinations
This commit fixes #810 and adds block streaming to the final destinations when
adding. This should add major performance gains when adding data to clusters.
Before, everytime cluster issued a block, it was broadcasted individually to
all destinations (new libp2p stream), where it was block/put to IPFS (a single
block/put http roundtrip per block).
Now, blocks are streamed all the way from the adder module to the ipfs daemon,
by making every block as it arrives a single part in a multipart block/put
request.
Before, block-broadcast needed to wait for all destinations to finish in order
to process the next block. Now, buffers allow some destinations to be faster
than others while sending and receiving blocks.
Before, if a block put request failed to be broadcasted everywhere, an error
would happen at that moment.
Now, we keep streaming until the end and only then report any errors. The
operation succeeds as long as at least one stream finished successfully.
Errors block/putting to IPFS will not abort streams. Instead, subsequent
blocks are retried with a new request, although the method will return an
error when the stream finishes if there were errors at any point.
2022-03-24 01:17:10 +00:00
|
|
|
// to perform calls to IPFS.BlockStream and Pin content on Cluster.
|
|
|
|
func New(ctx context.Context, rpc *rpc.Client, opts api.AddParams, local bool) *DAGService {
|
2020-04-20 13:15:52 +00:00
|
|
|
// ensure don't Add something and pin it in direct mode.
|
|
|
|
opts.Mode = api.PinModeRecursive
|
2019-08-28 09:50:55 +00:00
|
|
|
return &DAGService{
|
2022-03-28 18:05:01 +00:00
|
|
|
ctx: ctx,
|
|
|
|
rpcClient: rpc,
|
|
|
|
dests: nil,
|
|
|
|
addParams: opts,
|
|
|
|
local: local,
|
|
|
|
blocks: make(chan api.NodeWithMeta, 256),
|
|
|
|
recentBlocks: &recentBlocks{},
|
2019-08-28 09:50:55 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Add puts the given node in the destination peers.
|
|
|
|
func (dgs *DAGService) Add(ctx context.Context, node ipld.Node) error {
|
2022-03-28 18:05:01 +00:00
|
|
|
// Avoid adding the same node multiple times in a row.
|
|
|
|
// This is done by the ipfsadd-er, because some nodes are added
|
|
|
|
// via dagbuilder, then via MFS, and root nodes once more.
|
|
|
|
if dgs.recentBlocks.Has(node) {
|
|
|
|
return nil
|
|
|
|
}
|
Adders: stream blocks to destinations
This commit fixes #810 and adds block streaming to the final destinations when
adding. This should add major performance gains when adding data to clusters.
Before, everytime cluster issued a block, it was broadcasted individually to
all destinations (new libp2p stream), where it was block/put to IPFS (a single
block/put http roundtrip per block).
Now, blocks are streamed all the way from the adder module to the ipfs daemon,
by making every block as it arrives a single part in a multipart block/put
request.
Before, block-broadcast needed to wait for all destinations to finish in order
to process the next block. Now, buffers allow some destinations to be faster
than others while sending and receiving blocks.
Before, if a block put request failed to be broadcasted everywhere, an error
would happen at that moment.
Now, we keep streaming until the end and only then report any errors. The
operation succeeds as long as at least one stream finished successfully.
Errors block/putting to IPFS will not abort streams. Instead, subsequent
blocks are retried with a new request, although the method will return an
error when the stream finishes if there were errors at any point.
2022-03-24 01:17:10 +00:00
|
|
|
|
|
|
|
// FIXME: can't this happen on initialization? Perhaps the point here
|
|
|
|
// is the adder only allocates and starts streaming when the first
|
|
|
|
// block arrives and not on creation.
|
2019-08-28 09:50:55 +00:00
|
|
|
if dgs.dests == nil {
|
2022-02-28 18:44:04 +00:00
|
|
|
dests, err := adder.BlockAllocate(ctx, dgs.rpcClient, dgs.addParams.PinOptions)
|
2019-08-28 09:50:55 +00:00
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
2022-02-01 19:38:41 +00:00
|
|
|
|
2022-03-15 10:32:12 +00:00
|
|
|
hasLocal := false
|
|
|
|
localPid := dgs.rpcClient.ID()
|
|
|
|
for i, d := range dests {
|
|
|
|
if d == localPid || d == "" {
|
|
|
|
hasLocal = true
|
|
|
|
// ensure our allocs do not carry an empty peer
|
|
|
|
// mostly an issue with testing mocks
|
|
|
|
dests[i] = localPid
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-08-28 09:50:55 +00:00
|
|
|
dgs.dests = dests
|
2019-09-18 07:51:04 +00:00
|
|
|
|
|
|
|
if dgs.local {
|
2022-02-28 18:44:04 +00:00
|
|
|
// If this is a local pin, make sure that the local
|
|
|
|
// peer is among the allocations..
|
|
|
|
// UNLESS user-allocations are defined!
|
2022-03-15 10:32:12 +00:00
|
|
|
if !hasLocal && localPid != "" && len(dgs.addParams.UserAllocations) == 0 {
|
2022-02-01 19:38:41 +00:00
|
|
|
// replace last allocation with local peer
|
|
|
|
dgs.dests[len(dgs.dests)-1] = localPid
|
|
|
|
}
|
|
|
|
|
Adders: stream blocks to destinations
This commit fixes #810 and adds block streaming to the final destinations when
adding. This should add major performance gains when adding data to clusters.
Before, everytime cluster issued a block, it was broadcasted individually to
all destinations (new libp2p stream), where it was block/put to IPFS (a single
block/put http roundtrip per block).
Now, blocks are streamed all the way from the adder module to the ipfs daemon,
by making every block as it arrives a single part in a multipart block/put
request.
Before, block-broadcast needed to wait for all destinations to finish in order
to process the next block. Now, buffers allow some destinations to be faster
than others while sending and receiving blocks.
Before, if a block put request failed to be broadcasted everywhere, an error
would happen at that moment.
Now, we keep streaming until the end and only then report any errors. The
operation succeeds as long as at least one stream finished successfully.
Errors block/putting to IPFS will not abort streams. Instead, subsequent
blocks are retried with a new request, although the method will return an
error when the stream finishes if there were errors at any point.
2022-03-24 01:17:10 +00:00
|
|
|
dgs.bs = adder.NewBlockStreamer(dgs.ctx, dgs.rpcClient, []peer.ID{localPid}, dgs.blocks)
|
2019-09-18 07:51:04 +00:00
|
|
|
} else {
|
Adders: stream blocks to destinations
This commit fixes #810 and adds block streaming to the final destinations when
adding. This should add major performance gains when adding data to clusters.
Before, everytime cluster issued a block, it was broadcasted individually to
all destinations (new libp2p stream), where it was block/put to IPFS (a single
block/put http roundtrip per block).
Now, blocks are streamed all the way from the adder module to the ipfs daemon,
by making every block as it arrives a single part in a multipart block/put
request.
Before, block-broadcast needed to wait for all destinations to finish in order
to process the next block. Now, buffers allow some destinations to be faster
than others while sending and receiving blocks.
Before, if a block put request failed to be broadcasted everywhere, an error
would happen at that moment.
Now, we keep streaming until the end and only then report any errors. The
operation succeeds as long as at least one stream finished successfully.
Errors block/putting to IPFS will not abort streams. Instead, subsequent
blocks are retried with a new request, although the method will return an
error when the stream finishes if there were errors at any point.
2022-03-24 01:17:10 +00:00
|
|
|
dgs.bs = adder.NewBlockStreamer(dgs.ctx, dgs.rpcClient, dgs.dests, dgs.blocks)
|
2019-09-18 07:51:04 +00:00
|
|
|
}
|
2019-08-28 09:50:55 +00:00
|
|
|
}
|
|
|
|
|
Adders: stream blocks to destinations
This commit fixes #810 and adds block streaming to the final destinations when
adding. This should add major performance gains when adding data to clusters.
Before, everytime cluster issued a block, it was broadcasted individually to
all destinations (new libp2p stream), where it was block/put to IPFS (a single
block/put http roundtrip per block).
Now, blocks are streamed all the way from the adder module to the ipfs daemon,
by making every block as it arrives a single part in a multipart block/put
request.
Before, block-broadcast needed to wait for all destinations to finish in order
to process the next block. Now, buffers allow some destinations to be faster
than others while sending and receiving blocks.
Before, if a block put request failed to be broadcasted everywhere, an error
would happen at that moment.
Now, we keep streaming until the end and only then report any errors. The
operation succeeds as long as at least one stream finished successfully.
Errors block/putting to IPFS will not abort streams. Instead, subsequent
blocks are retried with a new request, although the method will return an
error when the stream finishes if there were errors at any point.
2022-03-24 01:17:10 +00:00
|
|
|
select {
|
|
|
|
case <-ctx.Done():
|
|
|
|
return ctx.Err()
|
|
|
|
case <-dgs.ctx.Done():
|
|
|
|
return ctx.Err()
|
|
|
|
case dgs.blocks <- adder.IpldNodeToNodeWithMeta(node):
|
2022-03-28 18:05:01 +00:00
|
|
|
dgs.recentBlocks.Add(node)
|
Adders: stream blocks to destinations
This commit fixes #810 and adds block streaming to the final destinations when
adding. This should add major performance gains when adding data to clusters.
Before, everytime cluster issued a block, it was broadcasted individually to
all destinations (new libp2p stream), where it was block/put to IPFS (a single
block/put http roundtrip per block).
Now, blocks are streamed all the way from the adder module to the ipfs daemon,
by making every block as it arrives a single part in a multipart block/put
request.
Before, block-broadcast needed to wait for all destinations to finish in order
to process the next block. Now, buffers allow some destinations to be faster
than others while sending and receiving blocks.
Before, if a block put request failed to be broadcasted everywhere, an error
would happen at that moment.
Now, we keep streaming until the end and only then report any errors. The
operation succeeds as long as at least one stream finished successfully.
Errors block/putting to IPFS will not abort streams. Instead, subsequent
blocks are retried with a new request, although the method will return an
error when the stream finishes if there were errors at any point.
2022-03-24 01:17:10 +00:00
|
|
|
return nil
|
|
|
|
}
|
2019-08-28 09:50:55 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// Finalize pins the last Cid added to this DAGService.
|
2022-04-07 11:53:30 +00:00
|
|
|
func (dgs *DAGService) Finalize(ctx context.Context, root api.Cid) (api.Cid, error) {
|
Adders: stream blocks to destinations
This commit fixes #810 and adds block streaming to the final destinations when
adding. This should add major performance gains when adding data to clusters.
Before, everytime cluster issued a block, it was broadcasted individually to
all destinations (new libp2p stream), where it was block/put to IPFS (a single
block/put http roundtrip per block).
Now, blocks are streamed all the way from the adder module to the ipfs daemon,
by making every block as it arrives a single part in a multipart block/put
request.
Before, block-broadcast needed to wait for all destinations to finish in order
to process the next block. Now, buffers allow some destinations to be faster
than others while sending and receiving blocks.
Before, if a block put request failed to be broadcasted everywhere, an error
would happen at that moment.
Now, we keep streaming until the end and only then report any errors. The
operation succeeds as long as at least one stream finished successfully.
Errors block/putting to IPFS will not abort streams. Instead, subsequent
blocks are retried with a new request, although the method will return an
error when the stream finishes if there were errors at any point.
2022-03-24 01:17:10 +00:00
|
|
|
close(dgs.blocks)
|
|
|
|
|
|
|
|
select {
|
|
|
|
case <-dgs.ctx.Done():
|
|
|
|
return root, ctx.Err()
|
|
|
|
case <-ctx.Done():
|
|
|
|
return root, ctx.Err()
|
|
|
|
case <-dgs.bs.Done():
|
|
|
|
}
|
|
|
|
|
|
|
|
// If the streamer failed to put blocks.
|
|
|
|
if err := dgs.bs.Err(); err != nil {
|
|
|
|
return root, err
|
|
|
|
}
|
|
|
|
|
2022-02-28 18:44:04 +00:00
|
|
|
// Do not pin, just block put.
|
|
|
|
// Why? Because some people are uploading CAR files with partial DAGs
|
|
|
|
// and ideally they should be pinning only when the last partial CAR
|
|
|
|
// is uploaded. This gives them that option.
|
|
|
|
if dgs.addParams.NoPin {
|
|
|
|
return root, nil
|
|
|
|
}
|
|
|
|
|
2019-08-28 09:50:55 +00:00
|
|
|
// Cluster pin the result
|
2022-02-28 18:44:04 +00:00
|
|
|
rootPin := api.PinWithOpts(root, dgs.addParams.PinOptions)
|
2019-08-31 14:10:56 +00:00
|
|
|
rootPin.Allocations = dgs.dests
|
2019-08-28 09:50:55 +00:00
|
|
|
|
|
|
|
return root, adder.Pin(ctx, dgs.rpcClient, rootPin)
|
|
|
|
}
|
|
|
|
|
2022-03-14 14:45:51 +00:00
|
|
|
// Allocations returns the add destinations decided by the DAGService.
|
|
|
|
func (dgs *DAGService) Allocations() []peer.ID {
|
2022-03-15 10:32:12 +00:00
|
|
|
// using rpc clients without a host results in an empty peer
|
|
|
|
// which cannot be parsed to peer.ID on deserialization.
|
|
|
|
if len(dgs.dests) == 1 && dgs.dests[0] == "" {
|
|
|
|
return nil
|
|
|
|
}
|
2022-03-14 14:45:51 +00:00
|
|
|
return dgs.dests
|
|
|
|
}
|
|
|
|
|
2019-08-28 09:50:55 +00:00
|
|
|
// AddMany calls Add for every given node.
|
|
|
|
func (dgs *DAGService) AddMany(ctx context.Context, nodes []ipld.Node) error {
|
|
|
|
for _, node := range nodes {
|
|
|
|
err := dgs.Add(ctx, node)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return nil
|
|
|
|
}
|
2022-03-28 18:05:01 +00:00
|
|
|
|
|
|
|
type recentBlocks struct {
|
|
|
|
blocks [2]cid.Cid
|
|
|
|
cur int
|
|
|
|
}
|
|
|
|
|
|
|
|
func (rc *recentBlocks) Add(n ipld.Node) {
|
|
|
|
rc.blocks[rc.cur] = n.Cid()
|
|
|
|
rc.cur = (rc.cur + 1) % 2
|
|
|
|
}
|
|
|
|
|
|
|
|
func (rc *recentBlocks) Has(n ipld.Node) bool {
|
|
|
|
c := n.Cid()
|
|
|
|
return rc.blocks[0].Equals(c) || rc.blocks[1].Equals(c)
|
|
|
|
}
|