Skip to content

Instantly share code, notes, and snippets.

@swstack
Last active August 29, 2015 14:16
Show Gist options
  • Save swstack/ba4ea05be18feb20b416 to your computer and use it in GitHub Desktop.
Save swstack/ba4ea05be18feb20b416 to your computer and use it in GitHub Desktop.
Small web server built using webapp2 and MongoDB to handle uploading large files through the means of streaming.
import time
from pymongo import MongoClient
from bson.binary import MD5_SUBTYPE, Binary
class Database(object):
"""Encapsulation of the MongoDB database and collection(s)
For the context of this application this class will only provide an API
for storing and fetching files.
"""
def __init__(self, host, port=27017):
self._client = MongoClient(host, port)
@property
def _files(self):
files_collection = self._client.files_collection
return files_collection.files
def get_file_data(self, file_id=None):
"""Return file data for one or more files based on file_id
This method will always return an iterable
"""
if file_id:
return self._files.find({'_id': file_id})
else:
return self._files.find({})
def update_file(self, file_id, path, size, checksum):
"""Record a file in the database with it's meta-data"""
# TODO: Could store checksum as hexlified plain text
meta_data = {
'timestamp': int(time.time()), # UNIX Timestamp (secs since epoch)
'path': path, # Path to file
'size': int(size), # File size in bytes
'checksum': Binary(checksum, MD5_SUBTYPE), # MD5 Checksum
}
if self._files.find_one(file_id) is None:
# Document doesn't exist yet
self._files.insert({'_id': file_id})
self._files.update({'_id': file_id}, {'$set': meta_data})
def get_unique_file_id(self):
"""Create a blank document in the files database and return it's ID"""
return self._files.insert({})
# TODO: Stream requests
import requests
HOST = 'localhost'
PORT = 8080
my_not_so_large_file = """
sadfjf3098f324j09fj8a9sdh8ff92308fjsl;kdajfj1243890jfasdh9fp8hf120f8h
sadfjf3098f324j09fj8a9sdh8ff92308fjsl;kdajfj1243890jfasdh9fp8hf120f8h
sadfjf3098f324j09fj8a9sdh8ff92308fjsl;kdajfj1243890jfasdh9fp8hf120f8h
sadfjf3098f324j09fj8a9sdh8ff92308fjsl;kdajfj1243890jfasdh9fp8hf120f8h
sadfjf3098f324j09fj8a9sdh8ff92308fjsl;kdajfj1243890jfasdh9fp8hf120f8h
sadfjf3098f324j09fj8a9sdh8ff92308fjsl;kdajfj1243890jfasdh9fp8hf120f8h
"""
def _make_uri(file_id):
if file_id is None:
location = 'file'
else:
location = 'file/%s' % file_id
# FIXME:
# The trailing forward slash here seems to matter...not sure if
# it should or if i'm doing something wrong...
return "http://{host}:{port}/{location}/".format(
host=HOST,
location=location,
port=PORT
)
def get(file_id=None):
uri = _make_uri(file_id)
print "Performing GET to %s" % uri
response = requests.get(uri)
if response:
return response.json()
else:
return response
def put(file_id=None):
uri = _make_uri(file_id)
print "Performing PUT to %s" % uri
response = requests.put(
uri,
data=my_not_so_large_file,
headers={'content-type': 'text/plain'},
)
return response
def main():
print get()
# print get('newidizzle')
# print put('newidizzle')
# print put()
if __name__ == '__main__':
main()
import json
import hashlib
import operator
import base64
import os
from paste import httpserver
from webapp2 import WSGIApplication, Route, RequestHandler
from database import Database
db = Database('localhost')
class StreamingFileHandler(RequestHandler):
"""Request handler for route /file/<id>"""
file_store = '/tmp'
def get(self, file_id=None):
"""Get meta data for file(s)"""
if file_id is None or file_id == '':
file_id = None
else:
file_id = file_id.strip('/')
documents = []
for document in db.get_file_data(file_id=file_id):
# Base64 encode the checksum so it goes across the wire nicely
md5_checksum = document['checksum']
document.update({'checksum': base64.b64encode(md5_checksum)})
documents.append(document)
documents.sort(key=operator.itemgetter('size')) # Sort in place
# TODO: Could stream response
self.response.write(json.dumps(documents))
def put(self, file_id=None):
"""Read the contents of the body and write to disk
If no file_id is provided we will create a new file. This method will
return an ID so that the client can look up the file at at later time.
NOTE: The PUT method in a pure REST implementation would likely not allow
the creation of files, only updating existing ones. We could introduce a
POST method to the root collection /file/ to create a file and return the
newly created file ID.
This implementation of PUT also allows user to specify any arbitrary file_id
and it will create or update that file, which may not be a good idea or secure.
"""
if file_id is None or file_id == '':
file_id = str(db.get_unique_file_id())
else:
file_id = file_id.strip('/')
# Open a file on disk located under `self.file_store`
file_path = os.path.join(self.file_store, file_id)
with open(file_path, 'wb') as out:
md5_checksum = hashlib.md5()
# Start reading the file in 128 byte chunks
while True:
chunk = self.request.body_file.read(128)
if chunk == '':
break
# Write the chunk out to disk and update our checksum
out.write(chunk)
md5_checksum.update(chunk)
try:
file_size = os.path.getsize(file_path)
except OSError:
# File path likely doesn't exist ...not a good situation
file_size = 0
db.update_file(file_id,
file_path,
file_size,
md5_checksum.digest())
self.response.write(json.dumps({'file_id': file_id}))
app = WSGIApplication(
# Routes
[
Route('/file/<file_id:(.*)>',
handler=StreamingFileHandler,
methods=['PUT', 'GET'])
],
# Other options
debug=True
)
def main():
"""Start the streaming file-upload server"""
httpserver.serve(app, host='127.0.0.1', port='8080')
if __name__ == '__main__':
main()
import unittest
import streaming_server
import webapp2
import mock
class TestStreamingServer(unittest.TestCase):
def setUp(self):
streaming_server.db.get_file_data = mock.Mock(return_value=[])
streaming_server.db.update_file = mock.Mock()
streaming_server.db.get_unique_file_id = mock.Mock(return_value=1)
def test_root_404(self):
request = webapp2.Request.blank('/')
response = request.get_response(streaming_server.app)
self.assertEqual(response.status_int, 404)
def test_get_file_with_id(self):
request = webapp2.Request.blank('/file/foobar')
request.method = 'GET'
response = request.get_response(streaming_server.app)
self.assertEqual(response.status_int, 200)
self.assertEqual(response.body, '[]')
def test_get_file_no_id(self):
request = webapp2.Request.blank('/file/')
request.method = 'GET'
response = request.get_response(streaming_server.app)
self.assertEqual(response.status_int, 200)
self.assertEqual(response.body, '[]')
def test_put_file_with_id(self):
m = mock.mock_open()
request = webapp2.Request.blank('/file/foobar')
request.method = 'PUT'
request.body = 'mytestdata'
with mock.patch('__builtin__.open', m, create=True):
response = request.get_response(streaming_server.app)
self.assertEqual(response.status_int, 200)
self.assertEqual(response.body, '{"file_id": "foobar"}')
handle = m()
handle.write.assert_called_once_with('mytestdata')
def test_put_file_no_id(self):
m = mock.mock_open()
request = webapp2.Request.blank('/file/')
request.method = 'PUT'
request.body = 'mytestdata'
with mock.patch('__builtin__.open', m, create=True):
response = request.get_response(streaming_server.app)
self.assertEqual(response.status_int, 200)
self.assertEqual(response.body, '{"file_id": "1"}')
handle = m()
handle.write.assert_called_once_with('mytestdata')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment