Fix warnings on block-cid mismatch when adding

Because we are adding blocks on a single call, and we choose the format
parameter based on the prefix of the first block, IPFS will return block CIDs
based on that option.

This caused warnings when adding content that has multiple CID prefixes: for
example, any cid-version=1 file will include both dag-pb CIDs and raw
CIDs. Since the first block is usually a leave, IPFS will only return
raw-cids, and cause a warning because of the CID-mistmatch.

This fixes things by comparing multihashes only.

But! We might be writing blocks with the wrong CID and then the good CID won't
work!

Correct, we might, in some corner cases.

In go-ipfs >= 0.12.0, all blocks are addressed by multihash so CID prefixes
are irrelevant. This problem does not exist in that case.

In go-ipfs < 0.12.0, if a read for a CIDv1 DAG-PB fails, it is retried as it
it was raw. This means that if we wrote something with cidv1/format=raw, that
should have been a cidv1/format=dag-pb, the read will still work. That covers
some common cases (i.e. adding with cid-version=1) because the first block
should be a raw-leaf. Default-params (cidv0) is not affected since everything
is raw multihashes. However, there are still possible CAR layouts etc.  where
cluster will write blocks wrongly to older IPFS versions.
This commit is contained in:
Hector Sanjuan 2022-04-11 20:26:06 +02:00
parent 2827427be8
commit cd2fe8f655

View File

@ -934,7 +934,7 @@ type chanIterator struct {
err error
seenMu sync.Mutex
seen *cid.Set
seen *multihash.Set
}
func (ci *chanIterator) Name() string {
@ -949,16 +949,17 @@ func (ci *chanIterator) Node() files.Node {
if !ci.current.Cid.Defined() {
return nil
}
logger.Debugf("it.node(): %s", ci.current.Cid)
ci.seenMu.Lock()
ci.seen.Add(ci.current.Cid.Cid)
ci.seen.Add(ci.current.Cid.Hash())
ci.seenMu.Unlock()
return files.NewBytesFile(ci.current.Data)
}
func (ci *chanIterator) Seen(c api.Cid) bool {
ci.seenMu.Lock()
has := ci.seen.Has(c.Cid)
ci.seen.Remove(c.Cid)
has := ci.seen.Has(c.Cid.Hash())
ci.seen.Remove(c.Cid.Hash())
ci.seenMu.Unlock()
return has
}
@ -1005,7 +1006,7 @@ func (ci *chanIterator) Next() bool {
ci.done = true
return false
}
logger.Debugf("block %s", next.Cid)
logger.Debugf("it.Next() %s", next.Cid)
ci.current = next
return true
}
@ -1037,6 +1038,7 @@ func blockPutQuery(prefix cid.Prefix) (url.Values, error) {
q.Set("mhtype", mhType)
q.Set("mhlen", strconv.Itoa(prefix.MhLength))
q.Set("pin", "false")
return q, nil
}
@ -1054,7 +1056,7 @@ func (ipfs *Connector) BlockStream(ctx context.Context, blocks <-chan api.NodeWi
it := &chanIterator{
ctx: ctx,
blocks: blocks,
seen: cid.NewSet(),
seen: multihash.NewSet(),
}
dir := &chanDirectory{
iterator: it,
@ -1106,8 +1108,9 @@ func (ipfs *Connector) BlockStream(ctx context.Context, blocks <-chan api.NodeWi
errs = multierr.Append(errs, err)
break
}
logger.Debugf("response block: %s", res.Key)
if !it.Seen(res.Key) {
logger.Debugf("blockPut response CID (%s) does not match any blocks sent", res.Key)
logger.Warningf("blockPut response CID (%s) does not match the multihash of any blocks sent", res.Key)
}
}
// continue until it.Done()