Skip to content

Instantly share code, notes, and snippets.

@swstack
Last active August 29, 2015 14:16

Revisions

  1. Stephen Stack revised this gist Mar 4, 2015. 1 changed file with 1 addition and 0 deletions.
    1 change: 1 addition & 0 deletions streaming_server.py
    Original file line number Diff line number Diff line change
    @@ -43,6 +43,7 @@ def put(self, file_id=None):
    If no file_id is provided we will create a new file. This method will
    return an ID so that the client can look up the file at at later time.
    NOTE: The PUT method in a pure REST implementation would likely not allow
    the creation of files, only updating existing ones. We could introduce a
    POST method to the root collection /file/ to create a file and return the
  2. Stephen Stack revised this gist Mar 4, 2015. 1 changed file with 1 addition and 0 deletions.
    1 change: 1 addition & 0 deletions streaming_server.py
    Original file line number Diff line number Diff line change
    @@ -40,6 +40,7 @@ def get(self, file_id=None):

    def put(self, file_id=None):
    """Read the contents of the body and write to disk
    If no file_id is provided we will create a new file. This method will
    return an ID so that the client can look up the file at at later time.
    NOTE: The PUT method in a pure REST implementation would likely not allow
  3. Stephen Stack revised this gist Mar 4, 2015. 3 changed files with 17 additions and 4 deletions.
    10 changes: 6 additions & 4 deletions database.py
    Original file line number Diff line number Diff line change
    @@ -31,11 +31,13 @@ def get_file_data(self, file_id=None):
    def update_file(self, file_id, path, size, checksum):
    """Record a file in the database with it's meta-data"""

    # TODO: Could store checksum as hexlified plain text

    meta_data = {
    'timestamp': int(time.time()),
    'path': path,
    'size': int(size),
    'checksum': Binary(checksum, MD5_SUBTYPE),
    'timestamp': int(time.time()), # UNIX Timestamp (secs since epoch)
    'path': path, # Path to file
    'size': int(size), # File size in bytes
    'checksum': Binary(checksum, MD5_SUBTYPE), # MD5 Checksum
    }

    if self._files.find_one(file_id) is None:
    2 changes: 2 additions & 0 deletions streaming_client.py
    Original file line number Diff line number Diff line change
    @@ -1,3 +1,5 @@
    # TODO: Stream requests

    import requests

    HOST = 'localhost'
    9 changes: 9 additions & 0 deletions streaming_server.py
    Original file line number Diff line number Diff line change
    @@ -34,12 +34,21 @@ def get(self, file_id=None):
    documents.append(document)

    documents.sort(key=operator.itemgetter('size')) # Sort in place

    # TODO: Could stream response
    self.response.write(json.dumps(documents))

    def put(self, file_id=None):
    """Read the contents of the body and write to disk
    If no file_id is provided we will create a new file. This method will
    return an ID so that the client can look up the file at at later time.
    NOTE: The PUT method in a pure REST implementation would likely not allow
    the creation of files, only updating existing ones. We could introduce a
    POST method to the root collection /file/ to create a file and return the
    newly created file ID.
    This implementation of PUT also allows user to specify any arbitrary file_id
    and it will create or update that file, which may not be a good idea or secure.
    """

    if file_id is None or file_id == '':
  4. Stephen Stack revised this gist Mar 3, 2015. No changes.
  5. Stephen Stack created this gist Mar 3, 2015.
    50 changes: 50 additions & 0 deletions database.py
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,50 @@
    import time

    from pymongo import MongoClient
    from bson.binary import MD5_SUBTYPE, Binary


    class Database(object):
    """Encapsulation of the MongoDB database and collection(s)
    For the context of this application this class will only provide an API
    for storing and fetching files.
    """

    def __init__(self, host, port=27017):
    self._client = MongoClient(host, port)

    @property
    def _files(self):
    files_collection = self._client.files_collection
    return files_collection.files

    def get_file_data(self, file_id=None):
    """Return file data for one or more files based on file_id
    This method will always return an iterable
    """

    if file_id:
    return self._files.find({'_id': file_id})
    else:
    return self._files.find({})

    def update_file(self, file_id, path, size, checksum):
    """Record a file in the database with it's meta-data"""

    meta_data = {
    'timestamp': int(time.time()),
    'path': path,
    'size': int(size),
    'checksum': Binary(checksum, MD5_SUBTYPE),
    }

    if self._files.find_one(file_id) is None:
    # Document doesn't exist yet
    self._files.insert({'_id': file_id})

    self._files.update({'_id': file_id}, {'$set': meta_data})

    def get_unique_file_id(self):
    """Create a blank document in the files database and return it's ID"""

    return self._files.insert({})
    61 changes: 61 additions & 0 deletions streaming_client.py
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,61 @@
    import requests

    HOST = 'localhost'
    PORT = 8080

    my_not_so_large_file = """
    sadfjf3098f324j09fj8a9sdh8ff92308fjsl;kdajfj1243890jfasdh9fp8hf120f8h
    sadfjf3098f324j09fj8a9sdh8ff92308fjsl;kdajfj1243890jfasdh9fp8hf120f8h
    sadfjf3098f324j09fj8a9sdh8ff92308fjsl;kdajfj1243890jfasdh9fp8hf120f8h
    sadfjf3098f324j09fj8a9sdh8ff92308fjsl;kdajfj1243890jfasdh9fp8hf120f8h
    sadfjf3098f324j09fj8a9sdh8ff92308fjsl;kdajfj1243890jfasdh9fp8hf120f8h
    sadfjf3098f324j09fj8a9sdh8ff92308fjsl;kdajfj1243890jfasdh9fp8hf120f8h
    """


    def _make_uri(file_id):
    if file_id is None:
    location = 'file'
    else:
    location = 'file/%s' % file_id

    # FIXME:
    # The trailing forward slash here seems to matter...not sure if
    # it should or if i'm doing something wrong...
    return "http://{host}:{port}/{location}/".format(
    host=HOST,
    location=location,
    port=PORT
    )


    def get(file_id=None):
    uri = _make_uri(file_id)
    print "Performing GET to %s" % uri
    response = requests.get(uri)
    if response:
    return response.json()
    else:
    return response


    def put(file_id=None):
    uri = _make_uri(file_id)
    print "Performing PUT to %s" % uri
    response = requests.put(
    uri,
    data=my_not_so_large_file,
    headers={'content-type': 'text/plain'},
    )
    return response


    def main():
    print get()
    # print get('newidizzle')
    # print put('newidizzle')
    # print put()


    if __name__ == '__main__':
    main()
    101 changes: 101 additions & 0 deletions streaming_server.py
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,101 @@
    import json
    import hashlib
    import operator
    import base64

    import os

    from paste import httpserver
    from webapp2 import WSGIApplication, Route, RequestHandler
    from database import Database


    db = Database('localhost')


    class StreamingFileHandler(RequestHandler):
    """Request handler for route /file/<id>"""

    file_store = '/tmp'

    def get(self, file_id=None):
    """Get meta data for file(s)"""

    if file_id is None or file_id == '':
    file_id = None
    else:
    file_id = file_id.strip('/')

    documents = []
    for document in db.get_file_data(file_id=file_id):
    # Base64 encode the checksum so it goes across the wire nicely
    md5_checksum = document['checksum']
    document.update({'checksum': base64.b64encode(md5_checksum)})
    documents.append(document)

    documents.sort(key=operator.itemgetter('size')) # Sort in place
    self.response.write(json.dumps(documents))

    def put(self, file_id=None):
    """Read the contents of the body and write to disk
    If no file_id is provided we will create a new file. This method will
    return an ID so that the client can look up the file at at later time.
    """

    if file_id is None or file_id == '':
    file_id = str(db.get_unique_file_id())
    else:
    file_id = file_id.strip('/')

    # Open a file on disk located under `self.file_store`
    file_path = os.path.join(self.file_store, file_id)
    with open(file_path, 'wb') as out:

    md5_checksum = hashlib.md5()

    # Start reading the file in 128 byte chunks
    while True:
    chunk = self.request.body_file.read(128)
    if chunk == '':
    break

    # Write the chunk out to disk and update our checksum
    out.write(chunk)
    md5_checksum.update(chunk)

    try:
    file_size = os.path.getsize(file_path)
    except OSError:
    # File path likely doesn't exist ...not a good situation
    file_size = 0

    db.update_file(file_id,
    file_path,
    file_size,
    md5_checksum.digest())

    self.response.write(json.dumps({'file_id': file_id}))


    app = WSGIApplication(

    # Routes
    [
    Route('/file/<file_id:(.*)>',
    handler=StreamingFileHandler,
    methods=['PUT', 'GET'])
    ],

    # Other options
    debug=True
    )


    def main():
    """Start the streaming file-upload server"""

    httpserver.serve(app, host='127.0.0.1', port='8080')


    if __name__ == '__main__':
    main()
    55 changes: 55 additions & 0 deletions test_streaming_server.py
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,55 @@
    import unittest
    import streaming_server
    import webapp2
    import mock


    class TestStreamingServer(unittest.TestCase):

    def setUp(self):
    streaming_server.db.get_file_data = mock.Mock(return_value=[])
    streaming_server.db.update_file = mock.Mock()
    streaming_server.db.get_unique_file_id = mock.Mock(return_value=1)

    def test_root_404(self):
    request = webapp2.Request.blank('/')
    response = request.get_response(streaming_server.app)
    self.assertEqual(response.status_int, 404)

    def test_get_file_with_id(self):
    request = webapp2.Request.blank('/file/foobar')
    request.method = 'GET'
    response = request.get_response(streaming_server.app)
    self.assertEqual(response.status_int, 200)
    self.assertEqual(response.body, '[]')

    def test_get_file_no_id(self):
    request = webapp2.Request.blank('/file/')
    request.method = 'GET'
    response = request.get_response(streaming_server.app)
    self.assertEqual(response.status_int, 200)
    self.assertEqual(response.body, '[]')

    def test_put_file_with_id(self):
    m = mock.mock_open()
    request = webapp2.Request.blank('/file/foobar')
    request.method = 'PUT'
    request.body = 'mytestdata'
    with mock.patch('__builtin__.open', m, create=True):
    response = request.get_response(streaming_server.app)
    self.assertEqual(response.status_int, 200)
    self.assertEqual(response.body, '{"file_id": "foobar"}')
    handle = m()
    handle.write.assert_called_once_with('mytestdata')

    def test_put_file_no_id(self):
    m = mock.mock_open()
    request = webapp2.Request.blank('/file/')
    request.method = 'PUT'
    request.body = 'mytestdata'
    with mock.patch('__builtin__.open', m, create=True):
    response = request.get_response(streaming_server.app)
    self.assertEqual(response.status_int, 200)
    self.assertEqual(response.body, '{"file_id": "1"}')
    handle = m()
    handle.write.assert_called_once_with('mytestdata')