7cc91cae4f
* Bump github.com/libp2p/go-libp2p-kad-dht from 0.18.0 to 0.20.0 (#1821) Bumps [github.com/libp2p/go-libp2p-kad-dht](https://github.com/libp2p/go-libp2p-kad-dht) from 0.18.0 to 0.20.0. - [Release notes](https://github.com/libp2p/go-libp2p-kad-dht/releases) - [Commits](https://github.com/libp2p/go-libp2p-kad-dht/compare/v0.18.0...v0.20.0) --- updated-dependencies: - dependency-name: github.com/libp2p/go-libp2p-kad-dht dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] <support@github.com> Signed-off-by: dependabot[bot] <support@github.com> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> * Bump github.com/dgraph-io/badger/v3 from 3.2103.4 to 3.2103.5 (#1822) Bumps [github.com/dgraph-io/badger/v3](https://github.com/dgraph-io/badger) from 3.2103.4 to 3.2103.5. - [Release notes](https://github.com/dgraph-io/badger/releases) - [Changelog](https://github.com/dgraph-io/badger/blob/main/CHANGELOG.md) - [Commits](https://github.com/dgraph-io/badger/compare/v3.2103.4...v3.2103.5) --- updated-dependencies: - dependency-name: github.com/dgraph-io/badger/v3 dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] <support@github.com> Signed-off-by: dependabot[bot] <support@github.com> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> * Bump go.uber.org/multierr from 1.8.0 to 1.9.0 (#1823) Bumps [go.uber.org/multierr](https://github.com/uber-go/multierr) from 1.8.0 to 1.9.0. - [Release notes](https://github.com/uber-go/multierr/releases) - [Changelog](https://github.com/uber-go/multierr/blob/master/CHANGELOG.md) - [Commits](https://github.com/uber-go/multierr/compare/v1.8.0...v1.9.0) --- updated-dependencies: - dependency-name: go.uber.org/multierr dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] <support@github.com> Signed-off-by: dependabot[bot] <support@github.com> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> * Bump github.com/rs/cors from 1.8.2 to 1.8.3 (#1826) Bumps [github.com/rs/cors](https://github.com/rs/cors) from 1.8.2 to 1.8.3. - [Release notes](https://github.com/rs/cors/releases) - [Commits](https://github.com/rs/cors/compare/v1.8.2...v1.8.3) --- updated-dependencies: - dependency-name: github.com/rs/cors dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] <support@github.com> Signed-off-by: dependabot[bot] <support@github.com> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> * Bump github.com/ipfs/go-merkledag from 0.8.1 to 0.9.0 (#1830) Bumps [github.com/ipfs/go-merkledag](https://github.com/ipfs/go-merkledag) from 0.8.1 to 0.9.0. - [Release notes](https://github.com/ipfs/go-merkledag/releases) - [Commits](https://github.com/ipfs/go-merkledag/compare/v0.8.1...v0.9.0) --- updated-dependencies: - dependency-name: github.com/ipfs/go-merkledag dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] <support@github.com> Signed-off-by: dependabot[bot] <support@github.com> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> * Bump github.com/dustin/go-humanize from 1.0.0 to 1.0.1 (#1831) Bumps [github.com/dustin/go-humanize](https://github.com/dustin/go-humanize) from 1.0.0 to 1.0.1. - [Release notes](https://github.com/dustin/go-humanize/releases) - [Commits](https://github.com/dustin/go-humanize/compare/v1.0.0...v1.0.1) --- updated-dependencies: - dependency-name: github.com/dustin/go-humanize dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] <support@github.com> Signed-off-by: dependabot[bot] <support@github.com> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> * Bump github.com/urfave/cli from 1.22.10 to 1.22.12 (#1834) Bumps [github.com/urfave/cli](https://github.com/urfave/cli) from 1.22.10 to 1.22.12. - [Release notes](https://github.com/urfave/cli/releases) - [Changelog](https://github.com/urfave/cli/blob/main/docs/CHANGELOG.md) - [Commits](https://github.com/urfave/cli/compare/v1.22.10...v1.22.12) --- updated-dependencies: - dependency-name: github.com/urfave/cli dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] <support@github.com> Signed-off-by: dependabot[bot] <support@github.com> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> * Bump github.com/ipfs/go-unixfs from 0.4.1 to 0.4.2 (#1829) Bumps [github.com/ipfs/go-unixfs](https://github.com/ipfs/go-unixfs) from 0.4.1 to 0.4.2. - [Release notes](https://github.com/ipfs/go-unixfs/releases) - [Commits](https://github.com/ipfs/go-unixfs/compare/v0.4.1...v0.4.2) --- updated-dependencies: - dependency-name: github.com/ipfs/go-unixfs dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] <support@github.com> Signed-off-by: dependabot[bot] <support@github.com> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> * Bump go.opencensus.io from 0.23.0 to 0.24.0 (#1794) Dependabot couldn't find the original pull request head commit, 9dedea6eeef2c8b5f7358cc29b5b088ac6cf7aba. Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> * Bump github.com/ugorji/go/codec from 1.2.7 to 1.2.8 (#1827) Bumps [github.com/ugorji/go/codec](https://github.com/ugorji/go) from 1.2.7 to 1.2.8. - [Release notes](https://github.com/ugorji/go/releases) - [Commits](https://github.com/ugorji/go/compare/v1.2.7...v1.2.8) --- updated-dependencies: - dependency-name: github.com/ugorji/go/codec dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] <support@github.com> Signed-off-by: dependabot[bot] <support@github.com> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> * Bump github.com/libp2p/go-libp2p-pubsub from 0.8.2 to 0.8.3 (#1838) Bumps [github.com/libp2p/go-libp2p-pubsub](https://github.com/libp2p/go-libp2p-pubsub) from 0.8.2 to 0.8.3. - [Release notes](https://github.com/libp2p/go-libp2p-pubsub/releases) - [Commits](https://github.com/libp2p/go-libp2p-pubsub/compare/v0.8.2...v0.8.3) --- updated-dependencies: - dependency-name: github.com/libp2p/go-libp2p-pubsub dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] <support@github.com> Signed-off-by: dependabot[bot] <support@github.com> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> * update go-libp2p * Bump github.com/ipfs/go-block-format from 0.0.3 to 0.1.1 (#1837) Bumps [github.com/ipfs/go-block-format](https://github.com/ipfs/go-block-format) from 0.0.3 to 0.1.1. - [Release notes](https://github.com/ipfs/go-block-format/releases) - [Commits](https://github.com/ipfs/go-block-format/compare/v0.0.3...v0.1.1) --- updated-dependencies: - dependency-name: github.com/ipfs/go-block-format dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] <support@github.com> Signed-off-by: dependabot[bot] <support@github.com> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> * Update go-mfs (#1818) * Fix docker build failure (git safe directory) (#1836) Git refuses to run `git rev-parse HEAD` now, in docker. * Remove go-block-format and use go-libipfs (deprecation) * Update ipfs-lite * Bump golang.org/x/crypto from 0.3.0 to 0.5.0 (#1839) Bumps [golang.org/x/crypto](https://github.com/golang/crypto) from 0.3.0 to 0.5.0. - [Release notes](https://github.com/golang/crypto/releases) - [Commits](https://github.com/golang/crypto/compare/v0.3.0...v0.5.0) --- updated-dependencies: - dependency-name: golang.org/x/crypto dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] <support@github.com> Signed-off-by: dependabot[bot] <support@github.com> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Signed-off-by: dependabot[bot] <support@github.com> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
187 lines
6.2 KiB
Go
187 lines
6.2 KiB
Go
package sharding
|
|
|
|
// dag.go defines functions for constructing and parsing ipld-cbor nodes
|
|
// of the clusterDAG used to track sharded DAGs in ipfs-cluster
|
|
|
|
// Most logic goes into handling the edge cases in which clusterDAG
|
|
// metadata for a single shard cannot fit within a single shard node. We
|
|
// make the following simplifying assumption: a single shard will not track
|
|
// more than 35,808,256 links (~2^25). This is the limit at which the current
|
|
// shard node format would need 2 levels of indirect nodes to reference
|
|
// all of the links. Note that this limit is only reached at shard sizes 7
|
|
// times the size of the current default and then only when files are all
|
|
// 1 byte in size. In the future we may generalize the shard dag to multiple
|
|
// indirect nodes to accommodate much bigger shard sizes. Also note that the
|
|
// move to using the identity hash function in cids of very small data
|
|
// will improve link density in shard nodes and further reduce the need for
|
|
// multiple levels of indirection.
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
|
|
cid "github.com/ipfs/go-cid"
|
|
cbor "github.com/ipfs/go-ipld-cbor"
|
|
ipld "github.com/ipfs/go-ipld-format"
|
|
blocks "github.com/ipfs/go-libipfs/blocks"
|
|
dag "github.com/ipfs/go-merkledag"
|
|
mh "github.com/multiformats/go-multihash"
|
|
)
|
|
|
|
// go-merkledag does this, but it may be moved.
|
|
// We include for explicitness.
|
|
func init() {
|
|
ipld.Register(cid.DagProtobuf, dag.DecodeProtobufBlock)
|
|
ipld.Register(cid.Raw, dag.DecodeRawBlock)
|
|
ipld.Register(cid.DagCBOR, cbor.DecodeBlock)
|
|
}
|
|
|
|
// MaxLinks is the max number of links that, when serialized fit into a block
|
|
const MaxLinks = 5984
|
|
const hashFn = mh.SHA2_256
|
|
|
|
// CborDataToNode parses cbor data into a clusterDAG node while making a few
|
|
// checks
|
|
func CborDataToNode(raw []byte, format string) (ipld.Node, error) {
|
|
if format != "cbor" {
|
|
return nil, fmt.Errorf("unexpected shard node format %s", format)
|
|
}
|
|
shardCid, err := cid.NewPrefixV1(cid.DagCBOR, hashFn).Sum(raw)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
shardBlk, err := blocks.NewBlockWithCid(raw, shardCid)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
shardNode, err := ipld.Decode(shardBlk)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
return shardNode, nil
|
|
}
|
|
|
|
func makeDAGSimple(ctx context.Context, dagObj map[string]cid.Cid) (ipld.Node, error) {
|
|
node, err := cbor.WrapObject(
|
|
dagObj,
|
|
hashFn, mh.DefaultLengths[hashFn],
|
|
)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
return node, err
|
|
}
|
|
|
|
// makeDAG parses a dagObj which stores all of the node-links a shardDAG
|
|
// is responsible for tracking. In general a single node of links may exceed
|
|
// the capacity of an ipfs block. In this case an indirect node in the
|
|
// shardDAG is constructed that references "leaf shardNodes" that themselves
|
|
// carry links to the data nodes being tracked. The head of the output slice
|
|
// is always the root of the shardDAG, i.e. the ipld node that should be
|
|
// recursively pinned to track the shard
|
|
func makeDAG(ctx context.Context, dagObj map[string]cid.Cid) ([]ipld.Node, error) {
|
|
// FIXME: We have a 4MB limit on the block size enforced by bitswap:
|
|
// https://github.com/libp2p/go-libp2p/core/blob/master/network/network.go#L23
|
|
|
|
// No indirect node
|
|
if len(dagObj) <= MaxLinks {
|
|
n, err := makeDAGSimple(ctx, dagObj)
|
|
return []ipld.Node{n}, err
|
|
}
|
|
// Indirect node required
|
|
leafNodes := make([]ipld.Node, 0) // shardNodes with links to data
|
|
indirectObj := make(map[string]cid.Cid) // shardNode with links to shardNodes
|
|
numFullLeaves := len(dagObj) / MaxLinks
|
|
for i := 0; i <= numFullLeaves; i++ {
|
|
leafObj := make(map[string]cid.Cid)
|
|
for j := 0; j < MaxLinks; j++ {
|
|
c, ok := dagObj[fmt.Sprintf("%d", i*MaxLinks+j)]
|
|
if !ok { // finished with this leaf before filling all the way
|
|
if i != numFullLeaves {
|
|
panic("bad state, should never be here")
|
|
}
|
|
break
|
|
}
|
|
leafObj[fmt.Sprintf("%d", j)] = c
|
|
}
|
|
leafNode, err := makeDAGSimple(ctx, leafObj)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
indirectObj[fmt.Sprintf("%d", i)] = leafNode.Cid()
|
|
leafNodes = append(leafNodes, leafNode)
|
|
}
|
|
indirectNode, err := makeDAGSimple(ctx, indirectObj)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
nodes := append([]ipld.Node{indirectNode}, leafNodes...)
|
|
return nodes, nil
|
|
}
|
|
|
|
// TODO: decide whether this is worth including. Is precision important for
|
|
// most usecases? Is being a little over the shard size a serious problem?
|
|
// Is precision worth the cost to maintain complex accounting for metadata
|
|
// size (cid sizes will vary in general, cluster dag cbor format may
|
|
// grow to vary unpredictably in size)
|
|
// byteCount returns the number of bytes the dagObj will occupy when
|
|
//serialized into an ipld DAG
|
|
/*func byteCount(obj dagObj) uint64 {
|
|
// 1 byte map overhead
|
|
// for each entry:
|
|
// 1 byte indicating text
|
|
// 1 byte*(number digits) for key
|
|
// 2 bytes for link tag
|
|
// 35 bytes for each cid
|
|
count := 1
|
|
for key := range obj {
|
|
count += fixedPerLink
|
|
count += len(key)
|
|
}
|
|
return uint64(count) + indirectCount(len(obj))
|
|
}
|
|
|
|
// indirectCount returns the number of bytes needed to serialize the indirect
|
|
// node structure of the shardDAG based on the number of links being tracked.
|
|
func indirectCount(linkNum int) uint64 {
|
|
q := linkNum / MaxLinks
|
|
if q == 0 { // no indirect node needed
|
|
return 0
|
|
}
|
|
dummyIndirect := make(map[string]cid.Cid)
|
|
for key := 0; key <= q; key++ {
|
|
dummyIndirect[fmt.Sprintf("%d", key)] = nil
|
|
}
|
|
// Count bytes of entries of single indirect node and add the map
|
|
// overhead for all leaf nodes other than the original
|
|
return byteCount(dummyIndirect) + uint64(q)
|
|
}
|
|
|
|
// Return the number of bytes added to the total shard node metadata DAG when
|
|
// adding a new link to the given dagObj.
|
|
func deltaByteCount(obj dagObj) uint64 {
|
|
linkNum := len(obj)
|
|
q1 := linkNum / MaxLinks
|
|
q2 := (linkNum + 1) / MaxLinks
|
|
count := uint64(fixedPerLink)
|
|
count += uint64(len(fmt.Sprintf("%d", len(obj))))
|
|
|
|
// new shard nodes created by adding link
|
|
if q1 != q2 {
|
|
// first new leaf node created, i.e. indirect created too
|
|
if q2 == 1 {
|
|
count++ // map overhead of indirect node
|
|
count += 1 + fixedPerLink // fixedPerLink + len("0")
|
|
}
|
|
|
|
// added to indirect node
|
|
count += fixedPerLink
|
|
count += uint64(len(fmt.Sprintf("%d", q2)))
|
|
|
|
// overhead of new leaf node
|
|
count++
|
|
}
|
|
return count
|
|
}
|
|
*/
|