Created
October 3, 2019 19:09
-
-
Save matabares/63606b59737f9bc13161a5223a9eccd7 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
import numpy as np | |
from tabulate import tabulate | |
#chunksize = 10 ** 6 | |
#for chunk in pd.read_csv('', chunksize=chunksize): | |
import os | |
source = 'c:/borreme/isa2/' | |
allFrames = [] | |
for root, dirs, filenames in os.walk(source): | |
for file in filenames: | |
print(file) | |
df = pd.read_csv('C:/borreme/isa2/'+file, delimiter='|', names=['hotelcode', 'amenitycode', 'name', 'type'], | |
header=None) | |
df.drop_duplicates(subset='amenitycode', keep='first', inplace=True) | |
allFrames.append(df.copy()) | |
del df | |
print(len(allFrames)) | |
totFrame = pd.concat(allFrames) | |
totFrame.drop_duplicates(subset='amenitycode', keep='first', inplace=True) | |
totFrame.to_csv(r'C:/borreme/isa2/result.csv') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment