Forked from egguy/mongo_db_recover_delete_record.py
Last active
December 25, 2015 19:49
-
-
Save saml/7030714 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
"""A little script to recover deleted recording of a mongoDB db file | |
There's no optimization but it work and has saved me | |
""" | |
import struct | |
import bson | |
import sys | |
def decode_chunck(chunck): | |
"Try to decode a chunck" | |
#if not bson.is_valid(chunck): | |
# return None | |
try: | |
result = bson.decode_all(chunck)[0] | |
if not result: | |
return None | |
else: | |
# if there's all the searched field, return it | |
if 'path' in result and 'storageId' in result: | |
return result | |
except Exception: | |
return None | |
return None | |
def generate_chunck(data, pos=0): | |
"Generator to create chunck" | |
with open(data,'rb') as f: | |
a = f.read() | |
size = len(a) | |
while pos < size: | |
# Determine the size of the possible bson encoded data | |
bson_size = struct.unpack("<I", a[pos:pos + 4])[0] | |
# If the bson is bigger than the file, reject it | |
if bson_size+pos > size-1: | |
pos += 1 | |
continue | |
# A bson should end by \x00 | |
# http://bsonspec.org/#/specification | |
if a[pos+bson_size] != '\x00': | |
pos += 1 | |
continue | |
# Chunck it | |
chunck = a[pos:pos+bson_size] | |
pos += 1 | |
yield chunck | |
#generate_chunck(sys.argv[1]) | |
# argv[1] = the file to recover | |
# argv[2] = Where to start in the file | |
for chunck in generate_chunck(sys.argv[1], 0): | |
result = decode_chunck(chunck) | |
if result: | |
print(result) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import sys | |
import json | |
class Parser(object): | |
def __init__(self, stream): | |
self.stream = stream | |
self.current_state = None | |
self.asset = None | |
def _start(self, line): | |
if line == 'path': | |
self.current_state = self._path_begin | |
def _back_to_path_begin(self, from_state): | |
print('%s -> _path_begin: %s' % (from_state, self.asset)) | |
self.asset = None | |
self.current_state = self._path_begin | |
def _path_begin(self, line): | |
if line == 'path': | |
return self._back_to_path_begin('_path_begin') | |
self.asset = {'path': line} | |
self.current_state = self._path_end | |
def _path_end(self, line): | |
if line == 'path': | |
return self._back_to_path_begin('_path_end') | |
if line == 'storageId': | |
self.current_state = self._storageId_begin | |
def _storageId_begin(self, line): | |
if line == 'path': | |
return self._back_to_path_begin('_storageId_begin') | |
self.asset['storageId'] = line | |
self.current_state = self._start | |
return self.asset | |
def start(self, output): | |
self.current_state = self._start | |
for line in self.stream: | |
result = self.current_state(line[:-1]) | |
if result is not None: | |
output.write(json.dumps(result)) | |
output.write("\n") | |
#usage: $0 output.json | |
with open(sys.argv[1], 'w') as output: | |
Parser(sys.stdin).start(output) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment