Last active
August 29, 2015 14:16
-
-
Save swstack/ba4ea05be18feb20b416 to your computer and use it in GitHub Desktop.
Small web server built using webapp2 and MongoDB to handle uploading large files through the means of streaming.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import time | |
from pymongo import MongoClient | |
from bson.binary import MD5_SUBTYPE, Binary | |
class Database(object): | |
"""Encapsulation of the MongoDB database and collection(s) | |
For the context of this application this class will only provide an API | |
for storing and fetching files. | |
""" | |
def __init__(self, host, port=27017): | |
self._client = MongoClient(host, port) | |
@property | |
def _files(self): | |
files_collection = self._client.files_collection | |
return files_collection.files | |
def get_file_data(self, file_id=None): | |
"""Return file data for one or more files based on file_id | |
This method will always return an iterable | |
""" | |
if file_id: | |
return self._files.find({'_id': file_id}) | |
else: | |
return self._files.find({}) | |
def update_file(self, file_id, path, size, checksum): | |
"""Record a file in the database with it's meta-data""" | |
# TODO: Could store checksum as hexlified plain text | |
meta_data = { | |
'timestamp': int(time.time()), # UNIX Timestamp (secs since epoch) | |
'path': path, # Path to file | |
'size': int(size), # File size in bytes | |
'checksum': Binary(checksum, MD5_SUBTYPE), # MD5 Checksum | |
} | |
if self._files.find_one(file_id) is None: | |
# Document doesn't exist yet | |
self._files.insert({'_id': file_id}) | |
self._files.update({'_id': file_id}, {'$set': meta_data}) | |
def get_unique_file_id(self): | |
"""Create a blank document in the files database and return it's ID""" | |
return self._files.insert({}) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# TODO: Stream requests | |
import requests | |
HOST = 'localhost' | |
PORT = 8080 | |
my_not_so_large_file = """ | |
sadfjf3098f324j09fj8a9sdh8ff92308fjsl;kdajfj1243890jfasdh9fp8hf120f8h | |
sadfjf3098f324j09fj8a9sdh8ff92308fjsl;kdajfj1243890jfasdh9fp8hf120f8h | |
sadfjf3098f324j09fj8a9sdh8ff92308fjsl;kdajfj1243890jfasdh9fp8hf120f8h | |
sadfjf3098f324j09fj8a9sdh8ff92308fjsl;kdajfj1243890jfasdh9fp8hf120f8h | |
sadfjf3098f324j09fj8a9sdh8ff92308fjsl;kdajfj1243890jfasdh9fp8hf120f8h | |
sadfjf3098f324j09fj8a9sdh8ff92308fjsl;kdajfj1243890jfasdh9fp8hf120f8h | |
""" | |
def _make_uri(file_id): | |
if file_id is None: | |
location = 'file' | |
else: | |
location = 'file/%s' % file_id | |
# FIXME: | |
# The trailing forward slash here seems to matter...not sure if | |
# it should or if i'm doing something wrong... | |
return "http://{host}:{port}/{location}/".format( | |
host=HOST, | |
location=location, | |
port=PORT | |
) | |
def get(file_id=None): | |
uri = _make_uri(file_id) | |
print "Performing GET to %s" % uri | |
response = requests.get(uri) | |
if response: | |
return response.json() | |
else: | |
return response | |
def put(file_id=None): | |
uri = _make_uri(file_id) | |
print "Performing PUT to %s" % uri | |
response = requests.put( | |
uri, | |
data=my_not_so_large_file, | |
headers={'content-type': 'text/plain'}, | |
) | |
return response | |
def main(): | |
print get() | |
# print get('newidizzle') | |
# print put('newidizzle') | |
# print put() | |
if __name__ == '__main__': | |
main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import json | |
import hashlib | |
import operator | |
import base64 | |
import os | |
from paste import httpserver | |
from webapp2 import WSGIApplication, Route, RequestHandler | |
from database import Database | |
db = Database('localhost') | |
class StreamingFileHandler(RequestHandler): | |
"""Request handler for route /file/<id>""" | |
file_store = '/tmp' | |
def get(self, file_id=None): | |
"""Get meta data for file(s)""" | |
if file_id is None or file_id == '': | |
file_id = None | |
else: | |
file_id = file_id.strip('/') | |
documents = [] | |
for document in db.get_file_data(file_id=file_id): | |
# Base64 encode the checksum so it goes across the wire nicely | |
md5_checksum = document['checksum'] | |
document.update({'checksum': base64.b64encode(md5_checksum)}) | |
documents.append(document) | |
documents.sort(key=operator.itemgetter('size')) # Sort in place | |
# TODO: Could stream response | |
self.response.write(json.dumps(documents)) | |
def put(self, file_id=None): | |
"""Read the contents of the body and write to disk | |
If no file_id is provided we will create a new file. This method will | |
return an ID so that the client can look up the file at at later time. | |
NOTE: The PUT method in a pure REST implementation would likely not allow | |
the creation of files, only updating existing ones. We could introduce a | |
POST method to the root collection /file/ to create a file and return the | |
newly created file ID. | |
This implementation of PUT also allows user to specify any arbitrary file_id | |
and it will create or update that file, which may not be a good idea or secure. | |
""" | |
if file_id is None or file_id == '': | |
file_id = str(db.get_unique_file_id()) | |
else: | |
file_id = file_id.strip('/') | |
# Open a file on disk located under `self.file_store` | |
file_path = os.path.join(self.file_store, file_id) | |
with open(file_path, 'wb') as out: | |
md5_checksum = hashlib.md5() | |
# Start reading the file in 128 byte chunks | |
while True: | |
chunk = self.request.body_file.read(128) | |
if chunk == '': | |
break | |
# Write the chunk out to disk and update our checksum | |
out.write(chunk) | |
md5_checksum.update(chunk) | |
try: | |
file_size = os.path.getsize(file_path) | |
except OSError: | |
# File path likely doesn't exist ...not a good situation | |
file_size = 0 | |
db.update_file(file_id, | |
file_path, | |
file_size, | |
md5_checksum.digest()) | |
self.response.write(json.dumps({'file_id': file_id})) | |
app = WSGIApplication( | |
# Routes | |
[ | |
Route('/file/<file_id:(.*)>', | |
handler=StreamingFileHandler, | |
methods=['PUT', 'GET']) | |
], | |
# Other options | |
debug=True | |
) | |
def main(): | |
"""Start the streaming file-upload server""" | |
httpserver.serve(app, host='127.0.0.1', port='8080') | |
if __name__ == '__main__': | |
main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import unittest | |
import streaming_server | |
import webapp2 | |
import mock | |
class TestStreamingServer(unittest.TestCase): | |
def setUp(self): | |
streaming_server.db.get_file_data = mock.Mock(return_value=[]) | |
streaming_server.db.update_file = mock.Mock() | |
streaming_server.db.get_unique_file_id = mock.Mock(return_value=1) | |
def test_root_404(self): | |
request = webapp2.Request.blank('/') | |
response = request.get_response(streaming_server.app) | |
self.assertEqual(response.status_int, 404) | |
def test_get_file_with_id(self): | |
request = webapp2.Request.blank('/file/foobar') | |
request.method = 'GET' | |
response = request.get_response(streaming_server.app) | |
self.assertEqual(response.status_int, 200) | |
self.assertEqual(response.body, '[]') | |
def test_get_file_no_id(self): | |
request = webapp2.Request.blank('/file/') | |
request.method = 'GET' | |
response = request.get_response(streaming_server.app) | |
self.assertEqual(response.status_int, 200) | |
self.assertEqual(response.body, '[]') | |
def test_put_file_with_id(self): | |
m = mock.mock_open() | |
request = webapp2.Request.blank('/file/foobar') | |
request.method = 'PUT' | |
request.body = 'mytestdata' | |
with mock.patch('__builtin__.open', m, create=True): | |
response = request.get_response(streaming_server.app) | |
self.assertEqual(response.status_int, 200) | |
self.assertEqual(response.body, '{"file_id": "foobar"}') | |
handle = m() | |
handle.write.assert_called_once_with('mytestdata') | |
def test_put_file_no_id(self): | |
m = mock.mock_open() | |
request = webapp2.Request.blank('/file/') | |
request.method = 'PUT' | |
request.body = 'mytestdata' | |
with mock.patch('__builtin__.open', m, create=True): | |
response = request.get_response(streaming_server.app) | |
self.assertEqual(response.status_int, 200) | |
self.assertEqual(response.body, '{"file_id": "1"}') | |
handle = m() | |
handle.write.assert_called_once_with('mytestdata') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment