Created
March 29, 2019 14:11
-
-
Save florinutz/24d972665e1f9a047b9697a4fceebf4d to your computer and use it in GitHub Desktop.
Compressing a boltdb database using gz
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package main | |
import ( | |
"bytes" | |
"compress/gzip" | |
"crypto/sha256" | |
"encoding/gob" | |
"fmt" | |
"io/ioutil" | |
"log" | |
"net/http" | |
"os" | |
"strings" | |
"time" | |
"github.com/florinutz/filme/pkg/collector" | |
"github.com/florinutz/filme/pkg/collector/google/search" | |
"github.com/pkg/errors" | |
bolt "go.etcd.io/bbolt" | |
) | |
var urls = []string{search.TestSearch} | |
const BucketName = "store" | |
func main() { | |
if len(os.Args) < 2 { | |
log.Fatal("output path is required as argument") | |
} | |
outputPath := strings.Join(os.Args[1:], " ") | |
reqs, err := collector.GenerateSimpleRequests(urls, func(req *http.Request) { | |
req.Header.Set("Accept-Language", "en-US;q=0.8,es;q=0.5,fr;q=0.3") | |
}) | |
if err != nil { | |
log.Fatal(err) | |
} | |
// load db from compressed outputPath of create a new tmp file for it | |
db, err := loadDbFromGz(outputPath) | |
if err != nil { | |
var tmpFile *os.File | |
tmpFile, err = ioutil.TempFile("", "bolt-*.db") | |
if err != nil { | |
log.Fatal("cannot create temporary file") | |
} | |
defer os.Remove(tmpFile.Name()) | |
db, err = bolt.Open(tmpFile.Name(), 0600, &bolt.Options{Timeout: 1 * time.Second}) | |
if err != nil { | |
log.Fatal("cannot create temporary db") | |
} | |
} | |
defer db.Close() | |
responses, errs := collector.FetchUrls(reqs, *http.DefaultClient) | |
for _, err := range errs { | |
fmt.Fprintln(os.Stderr, err) | |
} | |
if err = db.Update(func(tx *bolt.Tx) error { | |
bucket, err := tx.CreateBucketIfNotExists([]byte(BucketName)) | |
if err != nil { | |
return fmt.Errorf("create bucket: %s", err) | |
} | |
if bucket == nil { | |
return fmt.Errorf("Could not retrieve bucket '%s'\n", BucketName) | |
} | |
for _, resp := range responses { | |
var buf bytes.Buffer | |
encoder := gob.NewEncoder(&buf) | |
err := encoder.Encode(resp.Request) | |
if err != nil { | |
return errors.Wrapf(err, "couldn't encode a key from the request into bucket %s", BucketName) | |
} | |
b := sha256.Sum256(buf.Bytes()) | |
if err = bucket.Put(b[:], buf.Bytes()); err != nil { | |
return errors.Wrapf(err, "couldn't save response into bucket '%s'", BucketName) | |
} | |
} | |
return nil | |
}); err != nil { | |
log.Fatal(err) | |
} | |
err = dumpDbToGz(db, outputPath) | |
if err != nil { | |
log.Fatal(err) | |
} | |
} | |
func loadDbFromGz(filename string) (*bolt.DB, error) { | |
f, err := os.OpenFile(filename, os.O_RDONLY, 0700) | |
if err != nil { | |
return nil, errors.Wrapf(err, "could not open file '%s'", filename) | |
} | |
defer f.Close() | |
zr, err := gzip.NewReader(f) | |
if err != nil { | |
return nil, errors.Wrapf(err, "could not instantiate gz reader from '%s'", filename) | |
} | |
var uncompressed []byte | |
_, err = zr.Read(uncompressed) | |
if err != nil { | |
return nil, errors.Wrapf(err, "could not read gz contents from '%s'", filename) | |
} | |
tmpFile, err := ioutil.TempFile("", "bolt-*.db") | |
if err != nil { | |
return nil, errors.Wrap(err, "cannot create temporary file") | |
} | |
defer os.Remove(tmpFile.Name()) | |
_, err = tmpFile.Write(uncompressed) | |
if err != nil { | |
return nil, errors.Wrap(err, "can't write contents to tmp file") | |
} | |
db, err := bolt.Open(tmpFile.Name(), 0640, &bolt.Options{Timeout: 1 * time.Second}) | |
if err != nil { | |
return nil, errors.Wrapf(err, "couldn't create/open bolt db at path '%s'", filename) | |
} | |
return db, nil | |
} | |
func dumpDbToGz(db *bolt.DB, filename string) error { | |
f, err := os.OpenFile(filename, os.O_RDWR|os.O_CREATE, 0700) | |
if err != nil { | |
return err | |
} | |
defer f.Close() | |
zw := gzip.NewWriter(f) | |
zw.Comment = "unit test db" | |
defer zw.Close() | |
err = db.View(func(tx *bolt.Tx) error { | |
_, err := tx.WriteTo(zw) | |
return err | |
}) | |
return err | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment