Fix #1320: Add automatic GC to Badger datastore

This takes advantage of go-ds-badger "auto-gc" feature.

It will run a GC cycle made of multiple GC rounds (until it cannot GC more)
automatically. The behaviour is enabled by default in the configuration and
can be disabled by setting "gc_interval" to "0m". Hopefully this prevents
badger datastores from growing crazy.
This commit is contained in:
Hector Sanjuan 2021-06-28 21:48:17 +02:00
parent d37df4fd95
commit 4ac2cf3eb0
7 changed files with 91 additions and 4 deletions

View File

@ -77,6 +77,11 @@ func SetIfNotDefault(src interface{}, dest interface{}) {
if n != 0 {
*dest.(*int) = n
}
case float64:
n := src.(float64)
if n != 0 {
*dest.(*float64) = n
}
case bool:
b := src.(bool)
if b {

View File

@ -65,6 +65,8 @@ var testingCrdtCfg = []byte(`{
var testingBadgerCfg = []byte(`{
"folder": "badgerFromTests",
"gc_interval": "0m",
"gc_sleep": "0m",
"badger_options": {
"max_table_size": 1048576
}

View File

@ -18,7 +18,12 @@ func New(cfg *Config) (ds.Datastore, error) {
if err != nil {
return nil, errors.Wrap(err, "creating badger folder")
}
opts := badgerds.Options{Options: cfg.BadgerOptions}
opts := badgerds.Options{
GcDiscardRatio: cfg.GCDiscardRatio,
GcInterval: cfg.GCInterval,
GcSleep: cfg.GCSleep,
Options: cfg.BadgerOptions,
}
return badgerds.NewDatastore(folder, &opts)
}

View File

@ -4,6 +4,7 @@ import (
"encoding/json"
"errors"
"path/filepath"
"time"
"github.com/dgraph-io/badger"
"github.com/dgraph-io/badger/options"
@ -25,6 +26,13 @@ var (
// DefaultBadgerOptions has to be a var because badger.DefaultOptions
// is. Values are customized during Init().
DefaultBadgerOptions badger.Options
// DefaultGCDiscardRatio for GC operations. See Badger docs.
DefaultGCDiscardRatio float64 = 0.2
// DefaultGCInterval specifies interval between GC cycles.
DefaultGCInterval time.Duration = 15 * time.Minute
// DefaultGCSleep specifies sleep time between GC rounds.
DefaultGCSleep time.Duration = 10 * time.Second
)
func init() {
@ -49,6 +57,16 @@ type Config struct {
// the base configuration folder.
Folder string
// For GC operation. See Badger documentation.
GCDiscardRatio float64
// Interval between GC cycles. Each GC cycle runs one or more
// rounds separated by GCSleep.
GCInterval time.Duration
// Time between rounds in a GC cycle
GCSleep time.Duration
BadgerOptions badger.Options
}
@ -132,8 +150,11 @@ func (bo *badgerOptions) Marshal(badgerOpts *badger.Options) {
}
type jsonConfig struct {
Folder string `json:"folder,omitempty"`
BadgerOptions badgerOptions `json:"badger_options,omitempty"`
Folder string `json:"folder,omitempty"`
GCDiscardRatio float64 `json:"gc_discard_ratio"`
GCInterval string `json:"gc_interval"`
GCSleep string `json:"gc_sleep"`
BadgerOptions badgerOptions `json:"badger_options,omitempty"`
}
// ConfigKey returns a human-friendly identifier for this type of Datastore.
@ -144,6 +165,9 @@ func (cfg *Config) ConfigKey() string {
// Default initializes this Config with sensible values.
func (cfg *Config) Default() error {
cfg.Folder = DefaultSubFolder
cfg.GCDiscardRatio = DefaultGCDiscardRatio
cfg.GCInterval = DefaultGCInterval
cfg.GCSleep = DefaultGCSleep
cfg.BadgerOptions = DefaultBadgerOptions
return nil
}
@ -167,6 +191,10 @@ func (cfg *Config) Validate() error {
return errors.New("folder is unset")
}
if cfg.GCDiscardRatio <= 0 || cfg.GCDiscardRatio >= 1 {
return errors.New("gc_discard_ratio must be more than 0 and less than 1")
}
return nil
}
@ -186,6 +214,19 @@ func (cfg *Config) LoadJSON(raw []byte) error {
func (cfg *Config) applyJSONConfig(jcfg *jsonConfig) error {
config.SetIfNotDefault(jcfg.Folder, &cfg.Folder)
// 0 is an invalid option anyways. In that case, set default (0.2)
config.SetIfNotDefault(jcfg.GCDiscardRatio, &cfg.GCDiscardRatio)
// If these durations are set, GC is enabled by default with default
// values.
err := config.ParseDurations("badger",
&config.DurationOpt{Duration: jcfg.GCInterval, Dst: &cfg.GCInterval, Name: "gc_interval"},
&config.DurationOpt{Duration: jcfg.GCSleep, Dst: &cfg.GCSleep, Name: "gc_sleep"},
)
if err != nil {
return err
}
badgerOpts := jcfg.BadgerOptions.Unmarshal()
if err := mergo.Merge(&cfg.BadgerOptions, badgerOpts, mergo.WithOverride); err != nil {
@ -219,6 +260,10 @@ func (cfg *Config) toJSONConfig() *jsonConfig {
jCfg.Folder = cfg.Folder
}
jCfg.GCDiscardRatio = cfg.GCDiscardRatio
jCfg.GCInterval = cfg.GCInterval.String()
jCfg.GCSleep = cfg.GCSleep.String()
bo := &badgerOptions{}
bo.Marshal(&cfg.BadgerOptions)
jCfg.BadgerOptions = *bo

View File

@ -2,6 +2,7 @@ package badger
import (
"testing"
"time"
"github.com/dgraph-io/badger"
"github.com/dgraph-io/badger/options"
@ -10,6 +11,8 @@ import (
var cfgJSON = []byte(`
{
"folder": "test",
"gc_discard_ratio": 0.1,
"gc_sleep": "2m",
"badger_options": {
"max_levels": 4,
"value_log_loading_mode": 0
@ -29,6 +32,18 @@ func TestToJSON(t *testing.T) {
cfg := &Config{}
cfg.LoadJSON(cfgJSON)
if cfg.GCDiscardRatio != 0.1 {
t.Fatal("GCDiscardRatio should be 0.1")
}
if cfg.GCInterval != DefaultGCInterval {
t.Fatal("GCInterval should default as it is unset")
}
if cfg.GCSleep != 2*time.Minute {
t.Fatal("GCSleep should be 2m")
}
if cfg.BadgerOptions.ValueLogLoadingMode != options.FileIO {
t.Fatalf("got: %d, want: %d", cfg.BadgerOptions.ValueLogLoadingMode, options.FileIO)
}
@ -60,3 +75,16 @@ func TestToJSON(t *testing.T) {
t.Fatal(err)
}
}
func TestDefault(t *testing.T) {
cfg := &Config{}
cfg.Default()
if cfg.Validate() != nil {
t.Fatal("error validating")
}
cfg.GCDiscardRatio = 0
if cfg.Validate() == nil {
t.Fatal("expected error validating")
}
}

2
go.mod
View File

@ -18,7 +18,7 @@ require (
github.com/ipfs/go-block-format v0.0.3
github.com/ipfs/go-cid v0.0.7
github.com/ipfs/go-datastore v0.4.5
github.com/ipfs/go-ds-badger v0.2.6
github.com/ipfs/go-ds-badger v0.2.7
github.com/ipfs/go-ds-crdt v0.1.20
github.com/ipfs/go-ds-leveldb v0.4.2
github.com/ipfs/go-fs-lock v0.0.6

2
go.sum
View File

@ -408,6 +408,8 @@ github.com/ipfs/go-ds-badger v0.2.1/go.mod h1:Tx7l3aTph3FMFrRS838dcSJh+jjA7cX9Dr
github.com/ipfs/go-ds-badger v0.2.3/go.mod h1:pEYw0rgg3FIrywKKnL+Snr+w/LjJZVMTBRn4FS6UHUk=
github.com/ipfs/go-ds-badger v0.2.6 h1:Hy8jw4rifxtRDrqpvC1yh36oIyE37KDzsUzlHUPOFiU=
github.com/ipfs/go-ds-badger v0.2.6/go.mod h1:02rnztVKA4aZwDuaRPTf8mpqcKmXP7mLl6JPxd14JHA=
github.com/ipfs/go-ds-badger v0.2.7 h1:ju5REfIm+v+wgVnQ19xGLYPHYHbYLR6qJfmMbCDSK1I=
github.com/ipfs/go-ds-badger v0.2.7/go.mod h1:02rnztVKA4aZwDuaRPTf8mpqcKmXP7mLl6JPxd14JHA=
github.com/ipfs/go-ds-crdt v0.1.20 h1:4iJPmZSXq4/2gLOq0fVH3ROYDjw39vgdCyJF7akkdvE=
github.com/ipfs/go-ds-crdt v0.1.20/go.mod h1:1LiDiHfnunQ6UfilPCkgtlWTX8vWP9hiQt4Q5GK+jaE=
github.com/ipfs/go-ds-leveldb v0.0.1/go.mod h1:feO8V3kubwsEF22n0YRQCffeb79OOYIykR4L04tMOYc=