s3 object as a fileobj for reading tar without full download
Copied from SO question How to list files inside tar in AWS S3 without downloading it?
Check my answer there for more details: link
s3 object as a fileobj for reading tar without full download
Copied from SO question How to list files inside tar in AWS S3 without downloading it?
Check my answer there for more details: link
| # https://stackoverflow.com/questions/56086604/how-to-list-files-inside-tar-in-aws-s3-without-downloading-it | |
| import boto3 | |
| import io | |
| import tarfile | |
| class S3File(io.BytesIO): | |
| def __init__(self, bucket_name, key_name, s3client): | |
| super().__init__() | |
| self.bucket_name = bucket_name | |
| self.key_name = key_name | |
| self.s3client = s3client | |
| self.offset = 0 | |
| self.total_download = 0 | |
| def close(self): | |
| return | |
| def read(self, size): | |
| self.total_download += size | |
| print('read: offset = {}, size = {}, total download = {}'.format(self.offset, size, self.total_download)) | |
| start = self.offset | |
| end = self.offset + size - 1 | |
| try: | |
| s3_object = self.s3client.get_object(Bucket=self.bucket_name, Key=self.key_name, Range="bytes=%d-%d" % (start, end)) | |
| except: | |
| return bytearray() | |
| self.offset = self.offset + size | |
| result = s3_object['Body'].read() | |
| return result | |
| def seek(self, offset, whence=0): | |
| if whence == 0: | |
| print('seek: offset {} -> {} (diff = {} kB)'.format(self.offset, offset, (offset-self.offset)//1000)) | |
| self.offset = offset | |
| def tell(self): | |
| return self.offset |