Forked from WinstonN/flatten_additional_attributes.py
Created
March 16, 2021 18:41
-
-
Save speedupmate/fb1f5e4927ae62e4c3c8e9a8986b10b9 to your computer and use it in GitHub Desktop.
Flatten additional_attributes from Magento 2 export csv
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
This script formats a magento product export file, and sets each | |
additional_attribute as a new column in a target file | |
""" | |
import csv | |
import pandas as pd | |
# testing files | |
# source_file = './importexport_605048471de17.csv' | |
# target_file = './importexport_605048471de177_formatted.csv' | |
# real files | |
source_file = './catalog_product_20210316_044037.csv' | |
target_file = './catalog_product_20210316_044037_formatted.csv' | |
def get_modified_headers(): | |
reader = csv.reader(open(source_file, 'r')) | |
headers = next(reader) | |
# print('headers original') | |
# print(headers) | |
# get all headers for additional_attributes | |
additional_attributes_headers = [] | |
i = 0 | |
for row in reader: | |
# print(f'processing row: {i}') | |
target_index = headers.index("additional_attributes") | |
attributes = row[target_index].split(",") | |
for attribute in attributes: | |
data = attribute.split("=") | |
if len(data) != 1: | |
if data[0] != 'publish_date': | |
header = data[0] | |
# add attribute as header | |
if header not in additional_attributes_headers: | |
additional_attributes_headers.append(header) | |
# iterator | |
i += 1 | |
# print('headers modified') | |
headers_modified = headers + additional_attributes_headers | |
# print(headers_modified) | |
return [headers_modified, additional_attributes_headers] | |
def run_with_writer(): | |
""" Main run function """ | |
reader = csv.reader(open(source_file, 'r')) | |
writer = csv.writer(open(target_file, 'w')) | |
# get original headers | |
headers = next(reader) | |
# print('original headers') | |
# print(headers) | |
# add additional attributes to headers | |
modified_headers = get_modified_headers()[0] | |
additional_attributes_headers = get_modified_headers()[1] | |
# print('modified headers') | |
# print(modified_headers) | |
# print('additional_attribute headers') | |
# print(additional_attributes_headers) | |
# write headers to file | |
writer.writerow(modified_headers) | |
# extract data and populate rows | |
i = 0 | |
for row in reader: | |
print(f'processing row: {i}') | |
# print('original row') | |
# print(row) | |
result = [None] * len(additional_attributes_headers) | |
# print(result) | |
# pre-populate result (to avoid shifting items as we add insert them at the index) | |
target_index = headers.index("additional_attributes") | |
# print(row[target_index]) | |
attributes = row[target_index].split(",") | |
# print(attributes) | |
for attribute in attributes: | |
# print(attribute) | |
data = attribute.split("=") | |
if len(data) != 1: | |
if data[0] != 'publish_date': | |
header = data[0] | |
value = data[1] | |
# print('get target index for value') | |
target_index = additional_attributes_headers.index(header) | |
# print(f'insert {value} at index {target_index}') | |
result[target_index] = value | |
modified_row = row + result | |
# print('modified headers') | |
# print(modified_headers) | |
# print('modified row') | |
# print(modified_row) | |
# write data | |
# write original row | |
# writer.writerow(row) | |
# write modified row | |
writer.writerow(modified_row) | |
# break loop | |
# if i == 10: | |
# break | |
i += 1 | |
# Main entry point | |
if __name__ == '__main__': | |
run_with_writer() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment