Last active
October 27, 2017 15:49
-
-
Save davidlukac/b1e76141c88727b155b5032d5f09f5df to your computer and use it in GitHub Desktop.
Joining CSVs rough way
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from modules.modules import CsvRepository, FileResource | |
def read_and_write(source_file: str, f, rm_header: bool): | |
line_counter = 0 | |
with open(source_file) as f_in: | |
for line in f_in: | |
if rm_header: | |
if line_counter != 0: | |
f.write(line) | |
line_counter += 1 | |
else: | |
f.write(line) | |
if __name__ == '__main__': | |
repo = CsvRepository(FileResource.get_resource_filename('data_Q2_2017/2017-*.csv')) | |
print(repo.matching_files) | |
print(len(repo.matching_files)) | |
with open(FileResource.get_resource_filename('data_Q2_2017/out.csv'), 'w') as f: | |
for i, in_file in enumerate(repo.matching_files): | |
if i == 0: | |
remove_header = False | |
else: | |
remove_header = True | |
read_and_write(in_file, f, remove_header) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import glob | |
class CsvRepository(object): | |
def __init__(self, path_pattern: str, recursive: bool = True): | |
self._path_pattern = path_pattern | |
self._recursive = recursive | |
self._matching_files = None | |
@property | |
def matching_files(self) -> List[str]: | |
if self._matching_files is None: | |
self._matching_files = glob.glob(self._path_pattern, recursive=self._recursive) | |
return self._matching_files | |
class FileResource(object): | |
@staticmethod | |
def get_resource_filename(filename: str) -> str: | |
return resource_filename(Resources.__module__, filename) | |
@staticmethod | |
def get_test_resource_filename(filename: str) -> str: | |
return resource_filename(TestResources.__module__, filename) | |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
class Resources(object): | |
pass |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment