Fix #1320: Add automatic GC to Badger datastore
This takes advantage of go-ds-badger "auto-gc" feature. It will run a GC cycle made of multiple GC rounds (until it cannot GC more) automatically. The behaviour is enabled by default in the configuration and can be disabled by setting "gc_interval" to "0m". Hopefully this prevents badger datastores from growing crazy.
This commit is contained in:
parent
d37df4fd95
commit
4ac2cf3eb0
|
@ -77,6 +77,11 @@ func SetIfNotDefault(src interface{}, dest interface{}) {
|
|||
if n != 0 {
|
||||
*dest.(*int) = n
|
||||
}
|
||||
case float64:
|
||||
n := src.(float64)
|
||||
if n != 0 {
|
||||
*dest.(*float64) = n
|
||||
}
|
||||
case bool:
|
||||
b := src.(bool)
|
||||
if b {
|
||||
|
|
|
@ -65,6 +65,8 @@ var testingCrdtCfg = []byte(`{
|
|||
|
||||
var testingBadgerCfg = []byte(`{
|
||||
"folder": "badgerFromTests",
|
||||
"gc_interval": "0m",
|
||||
"gc_sleep": "0m",
|
||||
"badger_options": {
|
||||
"max_table_size": 1048576
|
||||
}
|
||||
|
|
|
@ -18,7 +18,12 @@ func New(cfg *Config) (ds.Datastore, error) {
|
|||
if err != nil {
|
||||
return nil, errors.Wrap(err, "creating badger folder")
|
||||
}
|
||||
opts := badgerds.Options{Options: cfg.BadgerOptions}
|
||||
opts := badgerds.Options{
|
||||
GcDiscardRatio: cfg.GCDiscardRatio,
|
||||
GcInterval: cfg.GCInterval,
|
||||
GcSleep: cfg.GCSleep,
|
||||
Options: cfg.BadgerOptions,
|
||||
}
|
||||
return badgerds.NewDatastore(folder, &opts)
|
||||
}
|
||||
|
||||
|
|
|
@ -4,6 +4,7 @@ import (
|
|||
"encoding/json"
|
||||
"errors"
|
||||
"path/filepath"
|
||||
"time"
|
||||
|
||||
"github.com/dgraph-io/badger"
|
||||
"github.com/dgraph-io/badger/options"
|
||||
|
@ -25,6 +26,13 @@ var (
|
|||
// DefaultBadgerOptions has to be a var because badger.DefaultOptions
|
||||
// is. Values are customized during Init().
|
||||
DefaultBadgerOptions badger.Options
|
||||
|
||||
// DefaultGCDiscardRatio for GC operations. See Badger docs.
|
||||
DefaultGCDiscardRatio float64 = 0.2
|
||||
// DefaultGCInterval specifies interval between GC cycles.
|
||||
DefaultGCInterval time.Duration = 15 * time.Minute
|
||||
// DefaultGCSleep specifies sleep time between GC rounds.
|
||||
DefaultGCSleep time.Duration = 10 * time.Second
|
||||
)
|
||||
|
||||
func init() {
|
||||
|
@ -49,6 +57,16 @@ type Config struct {
|
|||
// the base configuration folder.
|
||||
Folder string
|
||||
|
||||
// For GC operation. See Badger documentation.
|
||||
GCDiscardRatio float64
|
||||
|
||||
// Interval between GC cycles. Each GC cycle runs one or more
|
||||
// rounds separated by GCSleep.
|
||||
GCInterval time.Duration
|
||||
|
||||
// Time between rounds in a GC cycle
|
||||
GCSleep time.Duration
|
||||
|
||||
BadgerOptions badger.Options
|
||||
}
|
||||
|
||||
|
@ -132,8 +150,11 @@ func (bo *badgerOptions) Marshal(badgerOpts *badger.Options) {
|
|||
}
|
||||
|
||||
type jsonConfig struct {
|
||||
Folder string `json:"folder,omitempty"`
|
||||
BadgerOptions badgerOptions `json:"badger_options,omitempty"`
|
||||
Folder string `json:"folder,omitempty"`
|
||||
GCDiscardRatio float64 `json:"gc_discard_ratio"`
|
||||
GCInterval string `json:"gc_interval"`
|
||||
GCSleep string `json:"gc_sleep"`
|
||||
BadgerOptions badgerOptions `json:"badger_options,omitempty"`
|
||||
}
|
||||
|
||||
// ConfigKey returns a human-friendly identifier for this type of Datastore.
|
||||
|
@ -144,6 +165,9 @@ func (cfg *Config) ConfigKey() string {
|
|||
// Default initializes this Config with sensible values.
|
||||
func (cfg *Config) Default() error {
|
||||
cfg.Folder = DefaultSubFolder
|
||||
cfg.GCDiscardRatio = DefaultGCDiscardRatio
|
||||
cfg.GCInterval = DefaultGCInterval
|
||||
cfg.GCSleep = DefaultGCSleep
|
||||
cfg.BadgerOptions = DefaultBadgerOptions
|
||||
return nil
|
||||
}
|
||||
|
@ -167,6 +191,10 @@ func (cfg *Config) Validate() error {
|
|||
return errors.New("folder is unset")
|
||||
}
|
||||
|
||||
if cfg.GCDiscardRatio <= 0 || cfg.GCDiscardRatio >= 1 {
|
||||
return errors.New("gc_discard_ratio must be more than 0 and less than 1")
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
|
@ -186,6 +214,19 @@ func (cfg *Config) LoadJSON(raw []byte) error {
|
|||
func (cfg *Config) applyJSONConfig(jcfg *jsonConfig) error {
|
||||
config.SetIfNotDefault(jcfg.Folder, &cfg.Folder)
|
||||
|
||||
// 0 is an invalid option anyways. In that case, set default (0.2)
|
||||
config.SetIfNotDefault(jcfg.GCDiscardRatio, &cfg.GCDiscardRatio)
|
||||
|
||||
// If these durations are set, GC is enabled by default with default
|
||||
// values.
|
||||
err := config.ParseDurations("badger",
|
||||
&config.DurationOpt{Duration: jcfg.GCInterval, Dst: &cfg.GCInterval, Name: "gc_interval"},
|
||||
&config.DurationOpt{Duration: jcfg.GCSleep, Dst: &cfg.GCSleep, Name: "gc_sleep"},
|
||||
)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
badgerOpts := jcfg.BadgerOptions.Unmarshal()
|
||||
|
||||
if err := mergo.Merge(&cfg.BadgerOptions, badgerOpts, mergo.WithOverride); err != nil {
|
||||
|
@ -219,6 +260,10 @@ func (cfg *Config) toJSONConfig() *jsonConfig {
|
|||
jCfg.Folder = cfg.Folder
|
||||
}
|
||||
|
||||
jCfg.GCDiscardRatio = cfg.GCDiscardRatio
|
||||
jCfg.GCInterval = cfg.GCInterval.String()
|
||||
jCfg.GCSleep = cfg.GCSleep.String()
|
||||
|
||||
bo := &badgerOptions{}
|
||||
bo.Marshal(&cfg.BadgerOptions)
|
||||
jCfg.BadgerOptions = *bo
|
||||
|
|
|
@ -2,6 +2,7 @@ package badger
|
|||
|
||||
import (
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/dgraph-io/badger"
|
||||
"github.com/dgraph-io/badger/options"
|
||||
|
@ -10,6 +11,8 @@ import (
|
|||
var cfgJSON = []byte(`
|
||||
{
|
||||
"folder": "test",
|
||||
"gc_discard_ratio": 0.1,
|
||||
"gc_sleep": "2m",
|
||||
"badger_options": {
|
||||
"max_levels": 4,
|
||||
"value_log_loading_mode": 0
|
||||
|
@ -29,6 +32,18 @@ func TestToJSON(t *testing.T) {
|
|||
cfg := &Config{}
|
||||
cfg.LoadJSON(cfgJSON)
|
||||
|
||||
if cfg.GCDiscardRatio != 0.1 {
|
||||
t.Fatal("GCDiscardRatio should be 0.1")
|
||||
}
|
||||
|
||||
if cfg.GCInterval != DefaultGCInterval {
|
||||
t.Fatal("GCInterval should default as it is unset")
|
||||
}
|
||||
|
||||
if cfg.GCSleep != 2*time.Minute {
|
||||
t.Fatal("GCSleep should be 2m")
|
||||
}
|
||||
|
||||
if cfg.BadgerOptions.ValueLogLoadingMode != options.FileIO {
|
||||
t.Fatalf("got: %d, want: %d", cfg.BadgerOptions.ValueLogLoadingMode, options.FileIO)
|
||||
}
|
||||
|
@ -60,3 +75,16 @@ func TestToJSON(t *testing.T) {
|
|||
t.Fatal(err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestDefault(t *testing.T) {
|
||||
cfg := &Config{}
|
||||
cfg.Default()
|
||||
if cfg.Validate() != nil {
|
||||
t.Fatal("error validating")
|
||||
}
|
||||
|
||||
cfg.GCDiscardRatio = 0
|
||||
if cfg.Validate() == nil {
|
||||
t.Fatal("expected error validating")
|
||||
}
|
||||
}
|
||||
|
|
2
go.mod
2
go.mod
|
@ -18,7 +18,7 @@ require (
|
|||
github.com/ipfs/go-block-format v0.0.3
|
||||
github.com/ipfs/go-cid v0.0.7
|
||||
github.com/ipfs/go-datastore v0.4.5
|
||||
github.com/ipfs/go-ds-badger v0.2.6
|
||||
github.com/ipfs/go-ds-badger v0.2.7
|
||||
github.com/ipfs/go-ds-crdt v0.1.20
|
||||
github.com/ipfs/go-ds-leveldb v0.4.2
|
||||
github.com/ipfs/go-fs-lock v0.0.6
|
||||
|
|
2
go.sum
2
go.sum
|
@ -408,6 +408,8 @@ github.com/ipfs/go-ds-badger v0.2.1/go.mod h1:Tx7l3aTph3FMFrRS838dcSJh+jjA7cX9Dr
|
|||
github.com/ipfs/go-ds-badger v0.2.3/go.mod h1:pEYw0rgg3FIrywKKnL+Snr+w/LjJZVMTBRn4FS6UHUk=
|
||||
github.com/ipfs/go-ds-badger v0.2.6 h1:Hy8jw4rifxtRDrqpvC1yh36oIyE37KDzsUzlHUPOFiU=
|
||||
github.com/ipfs/go-ds-badger v0.2.6/go.mod h1:02rnztVKA4aZwDuaRPTf8mpqcKmXP7mLl6JPxd14JHA=
|
||||
github.com/ipfs/go-ds-badger v0.2.7 h1:ju5REfIm+v+wgVnQ19xGLYPHYHbYLR6qJfmMbCDSK1I=
|
||||
github.com/ipfs/go-ds-badger v0.2.7/go.mod h1:02rnztVKA4aZwDuaRPTf8mpqcKmXP7mLl6JPxd14JHA=
|
||||
github.com/ipfs/go-ds-crdt v0.1.20 h1:4iJPmZSXq4/2gLOq0fVH3ROYDjw39vgdCyJF7akkdvE=
|
||||
github.com/ipfs/go-ds-crdt v0.1.20/go.mod h1:1LiDiHfnunQ6UfilPCkgtlWTX8vWP9hiQt4Q5GK+jaE=
|
||||
github.com/ipfs/go-ds-leveldb v0.0.1/go.mod h1:feO8V3kubwsEF22n0YRQCffeb79OOYIykR4L04tMOYc=
|
||||
|
|
Loading…
Reference in New Issue
Block a user