From ede1b8925221a8084a22a34d0790c4b19955950d Mon Sep 17 00:00:00 2001 From: Hector Sanjuan Date: Fri, 6 Mar 2020 16:35:15 +0100 Subject: [PATCH] Adder: fix #1006: Specify block format, hash and length when adding Currently we were only specifying the block format. When adding with a custom hash function, even though we produced the right cids, IPFS did not know the hash function and ended up storing them using SHA256. Additionally, since NodeWithMeta serializes the CID, we do not need to carry a Format parameter (which specifies the Codec): it is already embedded. Tests have been added and BlockPut in ipfshttp now checks that the response's CID matches the data sent. This will catch errors like what was happening, but also any data corruption between cluster and IPFS during the block upload. --- adder/util.go | 10 ------ api/types.go | 1 - ipfsconn/ipfshttp/ipfshttp.go | 49 +++++++++++++++++++++++++++--- ipfsconn/ipfshttp/ipfshttp_test.go | 11 +++---- test/cids.go | 12 +++++--- test/ipfs_mock.go | 41 ++++++++++++++----------- 6 files changed, 78 insertions(+), 46 deletions(-) diff --git a/adder/util.go b/adder/util.go index de0794cc..8cec78ee 100644 --- a/adder/util.go +++ b/adder/util.go @@ -93,20 +93,10 @@ func ipldNodeToNodeWithMeta(n ipld.Node) *api.NodeWithMeta { logger.Warning(err) } - format, ok := cid.CodecToStr[n.Cid().Type()] - if !ok { - format = "" - logger.Warning("unsupported cid type, treating as v0") - } - if n.Cid().Prefix().Version == 0 { - format = "v0" - } - return &api.NodeWithMeta{ Cid: n.Cid(), Data: n.RawData(), CumSize: size, - Format: format, } } diff --git a/api/types.go b/api/types.go index a7aea655..3758441c 100644 --- a/api/types.go +++ b/api/types.go @@ -883,7 +883,6 @@ type NodeWithMeta struct { Data []byte `codec:"d,omitempty"` Cid cid.Cid `codec:"c,omitempty"` CumSize uint64 `codec:"s,omitempty"` // Cumulative size - Format string `codec:"f,omitempty"` } // Size returns how big is the block. It is different from CumSize, which diff --git a/ipfsconn/ipfshttp/ipfshttp.go b/ipfsconn/ipfshttp/ipfshttp.go index dea89cff..719479c8 100644 --- a/ipfsconn/ipfshttp/ipfshttp.go +++ b/ipfsconn/ipfshttp/ipfshttp.go @@ -27,6 +27,7 @@ import ( rpc "github.com/libp2p/go-libp2p-gorpc" madns "github.com/multiformats/go-multiaddr-dns" manet "github.com/multiformats/go-multiaddr-net" + "github.com/multiformats/go-multihash" "go.opencensus.io/plugin/ochttp" "go.opencensus.io/plugin/ochttp/propagation/tracecontext" @@ -112,6 +113,11 @@ type ipfsSwarmPeersResp struct { Peers []ipfsPeer } +type ipfsBlockPutResp struct { + Key string + Size int +} + type ipfsPeer struct { Peer string } @@ -887,14 +893,47 @@ func (ipfs *Connector) BlockPut(ctx context.Context, b *api.NodeWithMeta) error ) multiFileR := files.NewMultiFileReader(mapDir, true) - if b.Format == "" { - b.Format = "v0" + + q := make(url.Values, 3) + prefix := b.Cid.Prefix() + format, ok := cid.CodecToStr[prefix.Codec] + if !ok { + return fmt.Errorf("cannot find name for the blocks' CID codec: %x", prefix.Codec) } - url := "block/put?f=" + b.Format + q.Set("format", format) + + mhType, ok := multihash.Codes[prefix.MhType] + if !ok { + return fmt.Errorf("cannot find name for the blocks' Multihash type: %x", prefix.MhType) + } + q.Set("mhtype", mhType) + q.Set("mhlen", strconv.Itoa(prefix.MhLength)) + + url := "block/put?" + q.Encode() contentType := "multipart/form-data; boundary=" + multiFileR.Boundary() - _, err := ipfs.postCtx(ctx, url, contentType, multiFileR) - return err + body, err := ipfs.postCtx(ctx, url, contentType, multiFileR) + if err != nil { + return err + } + + var res ipfsBlockPutResp + err = json.Unmarshal(body, &res) + if err != nil { + return err + } + + logger.Debug("block/put response CID", res.Key) + respCid, err := cid.Decode(res.Key) + if err != nil { + logger.Error("cannot parse CID from BlockPut response") + return err + } + + if !respCid.Equals(b.Cid) { + return fmt.Errorf("blockPut response CID (%s) does not match the block sent (%s)", respCid, b.Cid) + } + return nil } // BlockGet retrieves an ipfs block with the given cid diff --git a/ipfsconn/ipfshttp/ipfshttp_test.go b/ipfsconn/ipfshttp/ipfshttp_test.go index 9508dcaf..8bdb5f85 100644 --- a/ipfsconn/ipfshttp/ipfshttp_test.go +++ b/ipfsconn/ipfshttp/ipfshttp_test.go @@ -288,11 +288,9 @@ func TestBlockPut(t *testing.T) { defer mock.Close() defer ipfs.Shutdown(ctx) - data := []byte(test.Cid4Data) err := ipfs.BlockPut(ctx, &api.NodeWithMeta{ - Data: data, - Cid: test.Cid4, - Format: "raw", + Data: []byte(test.Cid4Data), + Cid: test.Cid4, }) if err != nil { t.Fatal(err) @@ -314,9 +312,8 @@ func TestBlockGet(t *testing.T) { // Put and then successfully get err = ipfs.BlockPut(ctx, &api.NodeWithMeta{ - Data: test.ShardData, - Cid: test.ShardCid, - Format: "cbor", + Data: test.ShardData, + Cid: test.ShardCid, }) if err != nil { t.Fatal(err) diff --git a/test/cids.go b/test/cids.go index 079d9c96..3c577b85 100644 --- a/test/cids.go +++ b/test/cids.go @@ -7,12 +7,14 @@ import ( // Common variables used all around tests. var ( - Cid1, _ = cid.Decode("QmP63DkAFEnDYNjDYBpyNDfttu1fvUw99x1brscPzpqmmq") - Cid2, _ = cid.Decode("QmP63DkAFEnDYNjDYBpyNDfttu1fvUw99x1brscPzpqmma") - Cid3, _ = cid.Decode("QmP63DkAFEnDYNjDYBpyNDfttu1fvUw99x1brscPzpqmmb") - Cid4, _ = cid.Decode("zb2rhiKhUepkTMw7oFfBUnChAN7ABAvg2hXUwmTBtZ6yxuc57") + Cid1, _ = cid.Decode("QmP63DkAFEnDYNjDYBpyNDfttu1fvUw99x1brscPzpqmmq") + Cid2, _ = cid.Decode("QmP63DkAFEnDYNjDYBpyNDfttu1fvUw99x1brscPzpqmma") + Cid3, _ = cid.Decode("QmP63DkAFEnDYNjDYBpyNDfttu1fvUw99x1brscPzpqmmb") + Cid4Data = "Cid4Data" + // Cid resulting from block put using blake2b-256 and raw format + Cid4, _ = cid.Decode("bafk2bzaceawsyhsnrwwy5mtit2emnjfalkxsyq2p2ptd6fuliolzwwjbs42fq") + Cid5, _ = cid.Decode("QmP63DkAFEnDYNjDYBpyNDfttu1fvUw99x1brscPzpqmmd") - Cid4Data = "Cid4Data" // Cid resulting from block put NOT ipfs add SlowCid1, _ = cid.Decode("QmP63DkAFEnDYNjDYBpyNDfttu1fvUw99x1brscPzpqmmd") CidResolved, _ = cid.Decode("zb2rhiKhUepkTMw7oFfBUnChAN7ABAvg2hXUwmTBtZ6yxuabc") // ErrorCid is meant to be used as a Cid which causes errors. i.e. the diff --git a/test/ipfs_mock.go b/test/ipfs_mock.go index 19f8c2c4..e6646a5c 100644 --- a/test/ipfs_mock.go +++ b/test/ipfs_mock.go @@ -18,9 +18,9 @@ import ( "github.com/ipfs/ipfs-cluster/datastore/inmem" "github.com/ipfs/ipfs-cluster/state" "github.com/ipfs/ipfs-cluster/state/dsstate" + "github.com/multiformats/go-multihash" cid "github.com/ipfs/go-cid" - u "github.com/ipfs/go-ipfs-util" cors "github.com/rs/cors" ) @@ -291,8 +291,9 @@ func (m *IpfsMock) handler(w http.ResponseWriter, r *http.Request) { goto ERROR } if ok { - if c.Equals(Cid4) { // this a v1 cid. Do not return default-base32 - w.Write([]byte(`{ "Keys": { "zb2rhiKhUepkTMw7oFfBUnChAN7ABAvg2hXUwmTBtZ6yxuc57": { "Type": "recursive" }}}`)) + if c.Equals(Cid4) { + // this a v1 cid. Do not return default-base32 but base58btc encoding of it + w.Write([]byte(`{ "Keys": { "zCT5htkdztJi3x4zBNHo8TRvGHPLTdHUdCLKgTGMgQcRKSLoWxK1": { "Type": "recursive" }}}`)) return } @@ -349,25 +350,29 @@ func (m *IpfsMock) handler(w http.ResponseWriter, r *http.Request) { } // Parse cid from data and format and add to mock block-store query := r.URL.Query() - format, ok := query["f"] - if !ok || len(format) != 1 { - goto ERROR - } - var c string - hash := u.Hash(data) - codec, ok := cid.Codecs[format[0]] - if !ok { - goto ERROR - } - if format[0] == "v0" { - c = cid.NewCidV0(hash).String() + format := cid.Codecs[query.Get("format")] + mhType := multihash.Names[query.Get("mhtype")] + mhLen, _ := strconv.Atoi(query.Get("mhLen")) + + var builder cid.Builder + if format == cid.DagProtobuf && mhType == multihash.SHA2_256 { + builder = cid.V0Builder{} } else { - c = cid.NewCidV1(codec, hash).String() + builder = cid.V1Builder{ + Codec: format, + MhType: mhType, + MhLength: mhLen, + } } - m.BlockStore[c] = data + + c, err := builder.Sum(data) + if err != nil { + goto ERROR + } + m.BlockStore[c.String()] = data resp := mockBlockPutResp{ - Key: c, + Key: c.String(), } j, _ := json.Marshal(resp) w.Write(j)