Created
February 20, 2016 14:36
-
-
Save kitroed/9cccd7a4fda1ea16d766 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# the idea here is to throw together a quick | |
# SQLite database to store the hash of all | |
# the files found in a given subdir | |
import hashlib | |
import os | |
import time | |
import datetime | |
import socket | |
from sqlalchemy import create_engine | |
from sqlalchemy import Column, Integer, String, DateTime, Boolean, MetaData | |
from sqlalchemy.orm import sessionmaker | |
from sqlalchemy.ext.declarative import declarative_base | |
from sqlalchemy.orm import relationship, backref | |
from sqlalchemy.sql.schema import ForeignKey | |
Base = declarative_base() | |
# Should make a Host class? | |
class File(Base): | |
__tablename__ = 'files' | |
host = Column(String(50), nullable=False) | |
full_path = Column(String, primary_key=True) | |
md5_hash = Column(String(32), nullable=False) | |
path = Column(String) | |
size = Column(Integer) | |
filename = Column(String) | |
extension = Column(String) | |
modified = Column(DateTime) | |
created = Column(DateTime) | |
can_read = Column(Boolean) | |
last_checked = Column(DateTime) | |
def __repr__(self): | |
return "<File(Filename='%s' Hash='%s')>" % (self.filename, self.md5_hash) | |
################################################################################ | |
basedir = os.path.abspath(os.path.dirname(__file__)) | |
engine = create_engine('sqlite:///' + os.path.join(basedir, 'filehashdata.sqlite'), | |
echo=False) | |
Base.metadata.create_all(engine) | |
Session = sessionmaker() | |
Session.configure(bind=engine) | |
session = Session() | |
path = 'D:\\' | |
hostname = socket.gethostname() | |
for dir_path, dir_names, file_names in os.walk(path): | |
for file_name in file_names: | |
file = File(full_path=os.path.join(dir_path, file_name)) | |
file.host = hostname | |
file.path = dir_path | |
file.filename = file_name | |
file.extension = os.path.splitext(file_name)[1] | |
file.last_checked = datetime.datetime.now() | |
file.can_read = False | |
try: | |
file.size = os.path.getsize(file.full_path) | |
file.modified = datetime.datetime.fromtimestamp(os.path.getmtime(file.full_path)) | |
file.created = datetime.datetime.fromtimestamp(os.path.getctime(file.full_path)) | |
file.md5_hash = hashlib.md5(open(file.full_path, 'rb').read()).hexdigest() | |
file.last_checked = datetime.datetime.now() | |
file.can_read = True | |
print(file) | |
except (PermissionError, FileNotFoundError, OSError): | |
print("Permission or FileNotFound error when hashing %s" % file.full_path) | |
continue | |
session.merge(file) | |
session.commit() | |
# save info to database | |
# we'll use merge since the path is unique |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment