package badger3 import ( "encoding/json" "errors" "path/filepath" "time" "dario.cat/mergo" "github.com/dgraph-io/badger/v3" "github.com/dgraph-io/badger/v3/options" "github.com/kelseyhightower/envconfig" "github.com/ipfs-cluster/ipfs-cluster/config" ) const configKey = "badger3" const envConfigKey = "cluster_badger3" // Default values for badger Config const ( DefaultSubFolder = "badger3" ) var ( // DefaultBadgerOptions has to be a var because badger.DefaultOptions // is. Values are customized during Init(). DefaultBadgerOptions badger.Options // DefaultGCDiscardRatio for GC operations. See Badger docs. DefaultGCDiscardRatio float64 = 0.2 // DefaultGCInterval specifies interval between GC cycles. DefaultGCInterval time.Duration = 15 * time.Minute // DefaultGCSleep specifies sleep time between GC rounds. DefaultGCSleep time.Duration = 10 * time.Second ) func init() { DefaultBadgerOptions = badger.DefaultOptions("") // Better to slow down starts than shutdowns. DefaultBadgerOptions.CompactL0OnClose = false // Defaults to 1MB! For us that means everything goes into the LSM // tree and the LSM tree is supposed to be loaded into memory in full. // We only put very small things on the LSM tree by default (i.e. a // single CID). DefaultBadgerOptions.ValueThreshold = 100 // Disable Block Cache: the cluster read-pattern at scale requires // looping regularly all keys. The CRDT read-patterm avoids reading // something twice. In general, it probably does not add much, and it // is recommended to be disabled when not using compression. DefaultBadgerOptions.BlockCacheSize = 0 // Let's disable compression for values, better perf when reading and // usually the ratio between data stored by badger and the cluster // should be small. Users can always enable. DefaultBadgerOptions.Compression = options.None // There is a write lock in go-ds-crdt that writes batches one by one. // Also NewWriteBatch says that there can never be transaction // conflicts when doing batches. And IPFS will only write a block // once, or do it with the same values. In general, we probably don't // care about conflicts much (rows updated while a commit transaction // was open). Increases perf too. DefaultBadgerOptions.DetectConflicts = false // TODO: Increase memtable size. This will use some more memory, but any // normal system should be able to deal with using 256MiB for the // memtable. Badger puts a lot of things in memory anyways, // i.e. IndexCacheSize is set to 0. Note NumMemTables is 5. // DefaultBadgerOptions.MemTableSize = 268435456 // 256MiB } // Config is used to initialize a BadgerDB datastore. It implements the // ComponentConfig interface. type Config struct { config.Saver // The folder for this datastore. Non-absolute paths are relative to // the base configuration folder. Folder string // For GC operation. See Badger documentation. GCDiscardRatio float64 // Interval between GC cycles. Each GC cycle runs one or more // rounds separated by GCSleep. GCInterval time.Duration // Time between rounds in a GC cycle GCSleep time.Duration BadgerOptions badger.Options } // badgerOptions is a copy of badger.Options so it can be marshaled by us. type badgerOptions struct { Dir string `json:"dir"` ValueDir string `json:"value_dir"` SyncWrites bool `json:"sync_writes"` NumVersionsToKeep int `json:"num_versions_to_keep"` ReadOnly bool `json:"read_only"` // Logger Compression options.CompressionType `json:"compression"` InMemory bool `json:"in_memory"` MetricsEnabled bool `json:"metrics_enabled"` NumGoroutines int `json:"num_goroutines"` MemTableSize int64 `json:"mem_table_size"` BaseTableSize int64 `json:"base_table_size"` BaseLevelSize int64 `json:"base_level_size"` LevelSizeMultiplier int `json:"level_size_multiplier"` TableSizeMultiplier int `json:"table_size_multiplier"` MaxLevels int `json:"max_levels"` VLogPercentile float64 `json:"v_log_percentile"` ValueThreshold int64 `json:"value_threshold"` NumMemtables int `json:"num_memtables"` BlockSize int `json:"block_size"` BloomFalsePositive float64 `json:"bloom_false_positive"` BlockCacheSize int64 `json:"block_cache_size"` IndexCacheSize int64 `json:"index_cache_size"` NumLevelZeroTables int `json:"num_level_zero_tables"` NumLevelZeroTablesStall int `json:"num_level_zero_tables_stall"` ValueLogFileSize int64 `json:"value_log_file_size"` ValueLogMaxEntries uint32 `json:"value_log_max_entries"` NumCompactors int `json:"num_compactors"` CompactL0OnClose bool `json:"compact_l_0_on_close"` LmaxCompaction bool `json:"lmax_compaction"` ZSTDCompressionLevel int `json:"zstd_compression_level"` VerifyValueChecksum bool `json:"verify_value_checksum"` ChecksumVerificationMode options.ChecksumVerificationMode `json:"checksum_verification_mode"` DetectConflicts bool `json:"detect_conflicts"` NamespaceOffset int `json:"namespace_offset"` } func (bo *badgerOptions) Unmarshal() *badger.Options { badgerOpts := &badger.Options{} badgerOpts.Dir = bo.Dir badgerOpts.ValueDir = bo.ValueDir badgerOpts.SyncWrites = bo.SyncWrites badgerOpts.NumVersionsToKeep = bo.NumVersionsToKeep badgerOpts.ReadOnly = bo.ReadOnly badgerOpts.Compression = bo.Compression badgerOpts.InMemory = bo.InMemory badgerOpts.MetricsEnabled = bo.MetricsEnabled badgerOpts.NumGoroutines = bo.NumGoroutines badgerOpts.MemTableSize = bo.MemTableSize badgerOpts.BaseTableSize = bo.BaseTableSize badgerOpts.BaseLevelSize = bo.BaseLevelSize badgerOpts.LevelSizeMultiplier = bo.LevelSizeMultiplier badgerOpts.TableSizeMultiplier = bo.TableSizeMultiplier badgerOpts.MaxLevels = bo.MaxLevels badgerOpts.VLogPercentile = bo.VLogPercentile badgerOpts.ValueThreshold = bo.ValueThreshold badgerOpts.NumMemtables = bo.NumMemtables badgerOpts.BlockSize = bo.BlockSize badgerOpts.BloomFalsePositive = bo.BloomFalsePositive badgerOpts.BlockCacheSize = bo.BlockCacheSize badgerOpts.IndexCacheSize = bo.IndexCacheSize badgerOpts.NumLevelZeroTables = bo.NumLevelZeroTables badgerOpts.NumLevelZeroTablesStall = bo.NumLevelZeroTablesStall badgerOpts.ValueLogFileSize = bo.ValueLogFileSize badgerOpts.ValueLogMaxEntries = bo.ValueLogMaxEntries badgerOpts.NumCompactors = bo.NumCompactors badgerOpts.CompactL0OnClose = bo.CompactL0OnClose badgerOpts.LmaxCompaction = bo.LmaxCompaction badgerOpts.ZSTDCompressionLevel = bo.ZSTDCompressionLevel badgerOpts.VerifyValueChecksum = bo.VerifyValueChecksum badgerOpts.ChecksumVerificationMode = bo.ChecksumVerificationMode badgerOpts.DetectConflicts = bo.DetectConflicts badgerOpts.NamespaceOffset = bo.NamespaceOffset return badgerOpts } func (bo *badgerOptions) Marshal(badgerOpts *badger.Options) { bo.Dir = badgerOpts.Dir bo.ValueDir = badgerOpts.ValueDir bo.SyncWrites = badgerOpts.SyncWrites bo.NumVersionsToKeep = badgerOpts.NumVersionsToKeep bo.ReadOnly = badgerOpts.ReadOnly bo.Compression = badgerOpts.Compression bo.InMemory = badgerOpts.InMemory bo.MetricsEnabled = badgerOpts.MetricsEnabled bo.NumGoroutines = badgerOpts.NumGoroutines bo.MemTableSize = badgerOpts.MemTableSize bo.BaseTableSize = badgerOpts.BaseTableSize bo.BaseLevelSize = badgerOpts.BaseLevelSize bo.LevelSizeMultiplier = badgerOpts.LevelSizeMultiplier bo.TableSizeMultiplier = badgerOpts.TableSizeMultiplier bo.MaxLevels = badgerOpts.MaxLevels bo.VLogPercentile = badgerOpts.VLogPercentile bo.ValueThreshold = badgerOpts.ValueThreshold bo.NumMemtables = badgerOpts.NumMemtables bo.BlockSize = badgerOpts.BlockSize bo.BloomFalsePositive = badgerOpts.BloomFalsePositive bo.BlockCacheSize = badgerOpts.BlockCacheSize bo.IndexCacheSize = badgerOpts.IndexCacheSize bo.NumLevelZeroTables = badgerOpts.NumLevelZeroTables bo.NumLevelZeroTablesStall = badgerOpts.NumLevelZeroTablesStall bo.ValueLogFileSize = badgerOpts.ValueLogFileSize bo.ValueLogMaxEntries = badgerOpts.ValueLogMaxEntries bo.NumCompactors = badgerOpts.NumCompactors bo.CompactL0OnClose = badgerOpts.CompactL0OnClose bo.LmaxCompaction = badgerOpts.LmaxCompaction bo.ZSTDCompressionLevel = badgerOpts.ZSTDCompressionLevel bo.VerifyValueChecksum = badgerOpts.VerifyValueChecksum bo.ChecksumVerificationMode = badgerOpts.ChecksumVerificationMode bo.DetectConflicts = badgerOpts.DetectConflicts bo.NamespaceOffset = badgerOpts.NamespaceOffset } type jsonConfig struct { Folder string `json:"folder,omitempty"` GCDiscardRatio float64 `json:"gc_discard_ratio"` GCInterval string `json:"gc_interval"` GCSleep string `json:"gc_sleep"` BadgerOptions badgerOptions `json:"badger_options,omitempty"` } // ConfigKey returns a human-friendly identifier for this type of Datastore. func (cfg *Config) ConfigKey() string { return configKey } // Default initializes this Config with sensible values. func (cfg *Config) Default() error { cfg.Folder = DefaultSubFolder cfg.GCDiscardRatio = DefaultGCDiscardRatio cfg.GCInterval = DefaultGCInterval cfg.GCSleep = DefaultGCSleep cfg.BadgerOptions = DefaultBadgerOptions cfg.BadgerOptions.Logger = logger return nil } // ApplyEnvVars fills in any Config fields found as environment variables. func (cfg *Config) ApplyEnvVars() error { jcfg := cfg.toJSONConfig() err := envconfig.Process(envConfigKey, jcfg) if err != nil { return err } return cfg.applyJSONConfig(jcfg) } // Validate checks that the fields of this Config have working values, // at least in appearance. func (cfg *Config) Validate() error { if cfg.Folder == "" { return errors.New("folder is unset") } if cfg.GCDiscardRatio <= 0 || cfg.GCDiscardRatio >= 1 { return errors.New("gc_discard_ratio must be more than 0 and less than 1") } return nil } // LoadJSON reads the fields of this Config from a JSON byteslice as // generated by ToJSON. func (cfg *Config) LoadJSON(raw []byte) error { jcfg := &jsonConfig{} err := json.Unmarshal(raw, jcfg) if err != nil { return err } cfg.Default() return cfg.applyJSONConfig(jcfg) } func (cfg *Config) applyJSONConfig(jcfg *jsonConfig) error { config.SetIfNotDefault(jcfg.Folder, &cfg.Folder) // 0 is an invalid option anyways. In that case, set default (0.2) config.SetIfNotDefault(jcfg.GCDiscardRatio, &cfg.GCDiscardRatio) // If these durations are set, GC is enabled by default with default // values. err := config.ParseDurations("badger", &config.DurationOpt{Duration: jcfg.GCInterval, Dst: &cfg.GCInterval, Name: "gc_interval"}, &config.DurationOpt{Duration: jcfg.GCSleep, Dst: &cfg.GCSleep, Name: "gc_sleep"}, ) if err != nil { return err } badgerOpts := jcfg.BadgerOptions.Unmarshal() if err := mergo.Merge(&cfg.BadgerOptions, badgerOpts, mergo.WithOverride); err != nil { return err } return cfg.Validate() } // ToJSON generates a JSON-formatted human-friendly representation of this // Config. func (cfg *Config) ToJSON() (raw []byte, err error) { jcfg := cfg.toJSONConfig() raw, err = config.DefaultJSONMarshal(jcfg) return } func (cfg *Config) toJSONConfig() *jsonConfig { jCfg := &jsonConfig{} if cfg.Folder != DefaultSubFolder { jCfg.Folder = cfg.Folder } jCfg.GCDiscardRatio = cfg.GCDiscardRatio jCfg.GCInterval = cfg.GCInterval.String() jCfg.GCSleep = cfg.GCSleep.String() bo := &badgerOptions{} bo.Marshal(&cfg.BadgerOptions) jCfg.BadgerOptions = *bo return jCfg } // GetFolder returns the BadgerDB folder. func (cfg *Config) GetFolder() string { if filepath.IsAbs(cfg.Folder) { return cfg.Folder } return filepath.Join(cfg.BaseDir, cfg.Folder) } // ToDisplayJSON returns JSON config as a string. func (cfg *Config) ToDisplayJSON() ([]byte, error) { return config.DisplayJSON(cfg.toJSONConfig()) }