ipfs-cluster/adder/sharding/dag.go

178 lines
5.9 KiB
Go
Raw Normal View History

package sharding
// dag.go defines functions for constructing and parsing ipld-cbor nodes
// of the clusterDAG used to track sharded DAGs in ipfs-cluster
// Most logic goes into handling the edge cases in which clusterDAG
// metadata for a single shard cannot fit within a single shard node. We
// make the following simplifying assumption: a single shard will not track
// more than 35,808,256 links (~2^25). This is the limit at which the current
// shard node format would need 2 levels of indirect nodes to reference
// all of the links. Note that this limit is only reached at shard sizes 7
// times the size of the current default and then only when files are all
// 1 byte in size. In the future we may generalize the shard dag to multiple
// indirect nodes to accommodate much bigger shard sizes. Also note that the
// move to using the identity hash function in cids of very small data
// will improve link density in shard nodes and further reduce the need for
// multiple levels of indirection.
import (
"context"
"fmt"
2023-03-27 19:49:08 +00:00
blocks "github.com/ipfs/go-block-format"
cid "github.com/ipfs/go-cid"
cbor "github.com/ipfs/go-ipld-cbor"
ipld "github.com/ipfs/go-ipld-format"
mh "github.com/multiformats/go-multihash"
)
// MaxLinks is the max number of links that, when serialized fit into a block
const MaxLinks = 5984
const hashFn = mh.SHA2_256
// CborDataToNode parses cbor data into a clusterDAG node while making a few
// checks
func CborDataToNode(raw []byte, format string) (ipld.Node, error) {
if format != "cbor" {
return nil, fmt.Errorf("unexpected shard node format %s", format)
}
shardCid, err := cid.NewPrefixV1(cid.DagCBOR, hashFn).Sum(raw)
if err != nil {
return nil, err
}
shardBlk, err := blocks.NewBlockWithCid(raw, shardCid)
if err != nil {
return nil, err
}
shardNode, err := cbor.DecodeBlock(shardBlk)
if err != nil {
return nil, err
}
return shardNode, nil
}
func makeDAGSimple(ctx context.Context, dagObj map[string]cid.Cid) (ipld.Node, error) {
node, err := cbor.WrapObject(
dagObj,
hashFn, mh.DefaultLengths[hashFn],
)
if err != nil {
return nil, err
}
return node, err
}
// makeDAG parses a dagObj which stores all of the node-links a shardDAG
// is responsible for tracking. In general a single node of links may exceed
// the capacity of an ipfs block. In this case an indirect node in the
// shardDAG is constructed that references "leaf shardNodes" that themselves
// carry links to the data nodes being tracked. The head of the output slice
// is always the root of the shardDAG, i.e. the ipld node that should be
// recursively pinned to track the shard
func makeDAG(ctx context.Context, dagObj map[string]cid.Cid) ([]ipld.Node, error) {
// FIXME: We have a 4MB limit on the block size enforced by bitswap:
Dependency upgrades (#1755) * Update go-libp2p to v0.22.0 * Testing with go1.19 * build(deps): bump github.com/multiformats/go-multicodec Bumps [github.com/multiformats/go-multicodec](https://github.com/multiformats/go-multicodec) from 0.5.0 to 0.6.0. - [Release notes](https://github.com/multiformats/go-multicodec/releases) - [Commits](https://github.com/multiformats/go-multicodec/compare/v0.5.0...v0.6.0) --- updated-dependencies: - dependency-name: github.com/multiformats/go-multicodec dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] <support@github.com> * build(deps): bump github.com/ipld/go-car from 0.4.0 to 0.5.0 Bumps [github.com/ipld/go-car](https://github.com/ipld/go-car) from 0.4.0 to 0.5.0. - [Release notes](https://github.com/ipld/go-car/releases) - [Commits](https://github.com/ipld/go-car/compare/v0.4.0...v0.5.0) --- updated-dependencies: - dependency-name: github.com/ipld/go-car dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] <support@github.com> * build(deps): bump github.com/prometheus/client_golang Bumps [github.com/prometheus/client_golang](https://github.com/prometheus/client_golang) from 1.12.2 to 1.13.0. - [Release notes](https://github.com/prometheus/client_golang/releases) - [Changelog](https://github.com/prometheus/client_golang/blob/main/CHANGELOG.md) - [Commits](https://github.com/prometheus/client_golang/compare/v1.12.2...v1.13.0) --- updated-dependencies: - dependency-name: github.com/prometheus/client_golang dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] <support@github.com> * build(deps): bump github.com/hashicorp/go-hclog from 1.2.1 to 1.3.0 Bumps [github.com/hashicorp/go-hclog](https://github.com/hashicorp/go-hclog) from 1.2.1 to 1.3.0. - [Release notes](https://github.com/hashicorp/go-hclog/releases) - [Commits](https://github.com/hashicorp/go-hclog/compare/v1.2.1...v1.3.0) --- updated-dependencies: - dependency-name: github.com/hashicorp/go-hclog dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] <support@github.com> * build(deps): bump github.com/ipfs/go-ds-crdt from 0.3.6 to 0.3.7 Bumps [github.com/ipfs/go-ds-crdt](https://github.com/ipfs/go-ds-crdt) from 0.3.6 to 0.3.7. - [Release notes](https://github.com/ipfs/go-ds-crdt/releases) - [Commits](https://github.com/ipfs/go-ds-crdt/compare/v0.3.6...v0.3.7) --- updated-dependencies: - dependency-name: github.com/ipfs/go-ds-crdt dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] <support@github.com> * build(deps): bump github.com/urfave/cli/v2 from 2.10.2 to 2.14.1 Bumps [github.com/urfave/cli/v2](https://github.com/urfave/cli) from 2.10.2 to 2.14.1. - [Release notes](https://github.com/urfave/cli/releases) - [Changelog](https://github.com/urfave/cli/blob/main/docs/CHANGELOG.md) - [Commits](https://github.com/urfave/cli/compare/v2.10.2...v2.14.1) --- updated-dependencies: - dependency-name: github.com/urfave/cli/v2 dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] <support@github.com> * build(deps): bump github.com/libp2p/go-libp2p-http from 0.3.0 to 0.4.0 Bumps [github.com/libp2p/go-libp2p-http](https://github.com/libp2p/go-libp2p-http) from 0.3.0 to 0.4.0. - [Release notes](https://github.com/libp2p/go-libp2p-http/releases) - [Commits](https://github.com/libp2p/go-libp2p-http/compare/v0.3.0...v0.4.0) --- updated-dependencies: - dependency-name: github.com/libp2p/go-libp2p-http dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] <support@github.com> * build(deps): bump github.com/libp2p/go-libp2p-gorpc from 0.4.0 to 0.5.0 Bumps [github.com/libp2p/go-libp2p-gorpc](https://github.com/libp2p/go-libp2p-gorpc) from 0.4.0 to 0.5.0. - [Release notes](https://github.com/libp2p/go-libp2p-gorpc/releases) - [Commits](https://github.com/libp2p/go-libp2p-gorpc/compare/v0.4.0...v0.5.0) --- updated-dependencies: - dependency-name: github.com/libp2p/go-libp2p-gorpc dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] <support@github.com> * build(deps): bump contrib.go.opencensus.io/exporter/prometheus Bumps [contrib.go.opencensus.io/exporter/prometheus](https://github.com/census-ecosystem/opencensus-go-exporter-prometheus) from 0.4.1 to 0.4.2. - [Release notes](https://github.com/census-ecosystem/opencensus-go-exporter-prometheus/releases) - [Commits](https://github.com/census-ecosystem/opencensus-go-exporter-prometheus/compare/v0.4.1...v0.4.2) --- updated-dependencies: - dependency-name: contrib.go.opencensus.io/exporter/prometheus dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] <support@github.com> * build(deps): bump github.com/libp2p/go-libp2p-raft from 0.1.8 to 0.2.0 Bumps [github.com/libp2p/go-libp2p-raft](https://github.com/libp2p/go-libp2p-raft) from 0.1.8 to 0.2.0. - [Release notes](https://github.com/libp2p/go-libp2p-raft/releases) - [Commits](https://github.com/libp2p/go-libp2p-raft/compare/v0.1.8...v0.2.0) --- updated-dependencies: - dependency-name: github.com/libp2p/go-libp2p-raft dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] <support@github.com> * build(deps): bump github.com/urfave/cli from 1.22.9 to 1.22.10 Bumps [github.com/urfave/cli](https://github.com/urfave/cli) from 1.22.9 to 1.22.10. - [Release notes](https://github.com/urfave/cli/releases) - [Changelog](https://github.com/urfave/cli/blob/main/docs/CHANGELOG.md) - [Commits](https://github.com/urfave/cli/compare/v1.22.9...v1.22.10) --- updated-dependencies: - dependency-name: github.com/urfave/cli dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] <support@github.com> * Fix checker/linter/staticcheck warnings Signed-off-by: dependabot[bot] <support@github.com> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2022-09-06 14:57:17 +00:00
// https://github.com/libp2p/go-libp2p/core/blob/master/network/network.go#L23
// No indirect node
if len(dagObj) <= MaxLinks {
n, err := makeDAGSimple(ctx, dagObj)
return []ipld.Node{n}, err
}
// Indirect node required
leafNodes := make([]ipld.Node, 0) // shardNodes with links to data
indirectObj := make(map[string]cid.Cid) // shardNode with links to shardNodes
numFullLeaves := len(dagObj) / MaxLinks
for i := 0; i <= numFullLeaves; i++ {
leafObj := make(map[string]cid.Cid)
for j := 0; j < MaxLinks; j++ {
c, ok := dagObj[fmt.Sprintf("%d", i*MaxLinks+j)]
if !ok { // finished with this leaf before filling all the way
if i != numFullLeaves {
panic("bad state, should never be here")
}
break
}
leafObj[fmt.Sprintf("%d", j)] = c
}
leafNode, err := makeDAGSimple(ctx, leafObj)
if err != nil {
return nil, err
}
indirectObj[fmt.Sprintf("%d", i)] = leafNode.Cid()
leafNodes = append(leafNodes, leafNode)
}
indirectNode, err := makeDAGSimple(ctx, indirectObj)
if err != nil {
return nil, err
}
nodes := append([]ipld.Node{indirectNode}, leafNodes...)
return nodes, nil
}
// TODO: decide whether this is worth including. Is precision important for
// most usecases? Is being a little over the shard size a serious problem?
// Is precision worth the cost to maintain complex accounting for metadata
// size (cid sizes will vary in general, cluster dag cbor format may
// grow to vary unpredictably in size)
// byteCount returns the number of bytes the dagObj will occupy when
//serialized into an ipld DAG
/*func byteCount(obj dagObj) uint64 {
// 1 byte map overhead
// for each entry:
// 1 byte indicating text
// 1 byte*(number digits) for key
// 2 bytes for link tag
// 35 bytes for each cid
count := 1
for key := range obj {
count += fixedPerLink
count += len(key)
}
return uint64(count) + indirectCount(len(obj))
}
// indirectCount returns the number of bytes needed to serialize the indirect
// node structure of the shardDAG based on the number of links being tracked.
func indirectCount(linkNum int) uint64 {
q := linkNum / MaxLinks
if q == 0 { // no indirect node needed
return 0
}
dummyIndirect := make(map[string]cid.Cid)
for key := 0; key <= q; key++ {
dummyIndirect[fmt.Sprintf("%d", key)] = nil
}
// Count bytes of entries of single indirect node and add the map
// overhead for all leaf nodes other than the original
return byteCount(dummyIndirect) + uint64(q)
}
// Return the number of bytes added to the total shard node metadata DAG when
// adding a new link to the given dagObj.
func deltaByteCount(obj dagObj) uint64 {
linkNum := len(obj)
q1 := linkNum / MaxLinks
q2 := (linkNum + 1) / MaxLinks
count := uint64(fixedPerLink)
count += uint64(len(fmt.Sprintf("%d", len(obj))))
// new shard nodes created by adding link
if q1 != q2 {
// first new leaf node created, i.e. indirect created too
if q2 == 1 {
count++ // map overhead of indirect node
count += 1 + fixedPerLink // fixedPerLink + len("0")
}
// added to indirect node
count += fixedPerLink
count += uint64(len(fmt.Sprintf("%d", q2)))
// overhead of new leaf node
count++
}
return count
}
*/