Skip to content

Instantly share code, notes, and snippets.

@abshkd
Forked from ewindisch/glacier.py
Created August 22, 2012 10:05

Revisions

  1. abshkd revised this gist Aug 22, 2012. No changes.
  2. @ewindisch ewindisch created this gist Aug 21, 2012.
    114 changes: 114 additions & 0 deletions glacier.py
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,114 @@
    #!/usr/bin/env python
    #
    # Copyright (c) 2012 Eric Windisch <[email protected]>
    #
    # Permission is hereby granted, free of charge, to any person obtaining a copy
    # of this software and associated documentation files (the "Software"), to deal
    # in the Software without restriction, including without limitation the rights
    # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
    # copies of the Software, and to permit persons to whom the Software is
    # furnished to do so, subject to the following conditions:
    #
    # The above copyright notice and this permission notice shall be included in
    # all copies or substantial portions of the Software.
    #
    # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
    # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
    # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
    # THE SOFTWARE

    import hashlib
    import httplib
    import urllib

    from boto.connection import AWSAuthConnection


    class GlacierArchive(file):
    def __init__(self, *args, **kwargs):
    file.__init__(self, *args, **kwargs)

    @classmethod
    def open(self, file_name):
    return GlacierArchive.__init__(self, file_name)

    def sha256(self):
    return self._raw_hash()[0]

    def tree(self):
    return self_raw_hash()[1]

    def tree_hash(self, tree=None):
    tree = tree or self.tree()

    if lens(tree) == 1:
    return tree[0]

    ret = []
    while len(tree) > 0:
    if len(h) % 2 == 1:
    ret.append(tree.pop())
    next

    ret.append(hashlib.sha256(tree.pop() + tree.pop()))

    # TODO(ewindisch): make more efficient than reversing...
    return self.tree_hash(ret.reverse())

    def _raw_hash(self):
    '''This method builds the initial hashes for the tree
    and for the whole-file sha256. The result is memoized.
    This approach was taken because then we only read
    the file once.
    '''
    # Memoize
    if not self._sha is None:
    return (self._sha, self._tree)
    if not self._tree is None:
    return (self._sha, self._tree)

    sha = hashlib.sha256()
    tree = []

    # Read initial tree from file.
    while True:
    # 1 megabyte
    r_bytes = self.read(1024 * 1024)
    if r_bytes == '':
    break

    sha.update(r_bytes)
    tree.append(hashlib.sha256(r_bytes))

    self._sha = sha
    self._tree = tree
    return (sha, tree)

    def header(self):
    return {
    'x-amz-glacier-version': '2012-06-01',
    'x-amz-archive-description': self.name,
    'x-amz-sha256-tree-hash': self.tree_hash(),
    'x-amz-content-sha256': self.sha256()
    }


    class GlacierAPI(object):
    # TODO(ewindisch) implement multi-part uploads.
    @classmethod
    def post(self, account_id, region, vault_name, archive):
    host = '.'.join(['glacier', region, 'amazonaws.com'])
    uri = '/'.join([account_id, 'vaults', vault_name, 'archives'])

    conn = httplib.HTTPConnection(host)
    conn.request("POST", "", archive, archive.header)
    response = conn.getresponse()
    return (response.status, response.reason)


    with GlacierArchive('/etc/passwd') as archive:
    print GlacierAPI.post('-', 'us-east', 'backups', archive)