Created
September 17, 2015 07:34
-
-
Save tomotaka/f978feceae0b0aab1603 to your computer and use it in GitHub Desktop.
json-lines dumper of leveldb
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- coding: utf-8 -*- | |
import time | |
# pip install click plyvel simplejson | |
import click | |
import plyvel | |
import simplejson as json | |
@click.command() | |
@click.option('-l', '--ldb', help='input leveldb') | |
@click.option('-o', '--out', help='output file') | |
def main(ldb, out): | |
t_start = time.time() | |
with open(out, 'wb') as fh: | |
n = 0 | |
db = plyvel.DB(ldb) | |
for k, v in db: | |
json_line = json.dumps(dict(key=k, value=v)) + '\n' | |
fh.write(json_line) | |
n += 1 | |
if n % 10000 == 0: | |
t_passed = time.time() - t_start | |
speed = n / t_passed | |
print 'processed=%d, speed=%.2f/rps, time-passed=%.3fsec' % (n, speed, t_passed) | |
t_passed = time.time() - t_start | |
speed = n / t_finish | |
print 'processed=%d, speed=%.2f/rps, time-passed=%.3fsec' % (n, speed, t_passed) | |
print '----------------' | |
print 'n-record=%d' % n | |
print 'time=%.3fsec' % t_passed | |
print 'input-leveldb=%s' % ldb | |
print 'out=%s' % out | |
if __name__ == '__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment