Forked from sssurvey/monument_string_android_ios_cleaning.py
Created
May 6, 2019 09:29
-
-
Save sdex/99c42f6d9c0ffe446fef6083da53a92a to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Python 3 !! | |
# folder structures: | |
''' | |
haominshi@MacBook-Pro-de-Haomin:~/Desktop$ tree diff | |
diff | |
├── Android_xml_content.txt | |
├── Android_xml_key.txt | |
├── Android_xml_key_stylized.txt | |
├── android_string_sorted.txt | |
├── android_string_sorted_final.txt | |
├── clean.py // this is this gist file | |
├── iOS_git.txt | |
├── iOS_git_cleaned.txt | |
├── iOS_git_en.txt | |
├── iOS_git_en_lower_case.txt | |
├── iOS_string_matched_android.txt | |
├── iOS_string_sorted.txt | |
├── langs | |
│ ├── english.txt | |
│ ├── english_sort.txt | |
│ ├── franch.txt | |
│ ├── franch_sort.txt | |
│ ├── german.txt | |
│ ├── german_sort.txt | |
│ └── strings_comparisons_with_title.txt | |
├── original.txt | |
├── text2.txt | |
└── text2_cleaned_all.txt | |
1 directory, 22 files | |
''' | |
import re | |
# remove all the blank lines in the stream | |
def clean_spaces_in_file(read_file_dir, write_file_dir): | |
file_r = open(read_file_dir, "r") | |
file_w = open(write_file_dir, "w") | |
for line in file_r: | |
if (line == "\n"): | |
continue | |
file_w.write(line) | |
# remove all the spaces and "" in the string e.g: "ddafasfdasf" = adsfsafasf | |
def clean_file_format_sorted(read_file_dir, write_file_dir): | |
file_r = open(read_file_dir, "r") | |
file_w = open(write_file_dir, "w") | |
arrToWrite = [] | |
for line in file_r: | |
cleaned_string = re.search('"(.*)" ', line) | |
if (cleaned_string is None): | |
file_w.write("||||||||||||||||||||||||||||||||||||||||||||||||||| \n") | |
else: | |
cleaned_string = cleaned_string.group() | |
cleaned_string = cleaned_string.replace('"', '') | |
cleaned_string = cleaned_string.replace(' ', '') | |
arrToWrite.append(cleaned_string) | |
arrToWrite.sort() | |
for item in arrToWrite: | |
if item is "\n": | |
continue | |
file_w.write(item + "\n") | |
def sort_strings(read_file_dir, write_file_dir): | |
file_r = open(read_file_dir, "r") | |
file_w = open(write_file_dir, "w") | |
arrToWrite = [] | |
for line in file_r: | |
arrToWrite.append(line) | |
arrToWrite.sort() | |
for item in arrToWrite: | |
file_w.write(item + "\n") | |
# This method will take sorted ./lang/english_sort.txt ... etc | |
# and output them with each others translatrion to "./langs/strings_comparisons_with_title.txt" | |
# group by titles | |
def group_different_langs_via_title(de_dir, fr_dir, en_dir): | |
de_r = open(de_dir, "r") | |
fr_r = open(fr_dir, "r") | |
en_r = open(en_dir, "r") | |
file_w = open("./langs/strings_comparisons_with_title.txt", "w") | |
en_arr = [] | |
fr_arr = [] | |
de_arr = [] | |
for line in de_r: | |
if line is None: | |
continue | |
de_arr.append(line) | |
for line in fr_r: | |
if line is None: | |
continue | |
fr_arr.append(line) | |
for line in en_r: | |
if line is None: | |
continue | |
en_arr.append(line) | |
if ((en_arr.__len__() == de_arr.__len__()) and (en_arr.__len__() == fr_arr.__len__())): | |
array_length = en_arr.__len__() | |
output_arr = [] | |
for row in range(array_length): | |
all_strings = en_arr[row] + fr_arr[row] + de_arr[row] | |
output_arr.append(all_strings) | |
for row in output_arr: | |
file_w.write(row) | |
def concatanate(): | |
'''''' | |
basefile = open("./original.txt", "r") | |
strings_comparison_w_titles = open("./iOS_git_en.txt", "r") | |
# step 1: save all the english translation to an arr | |
original_arr_title = [] | |
for line in basefile: | |
if line is "\n": continue | |
if line is None: continue | |
original_arr_title.append(line.strip()) | |
# step 2: use the strings from the original_arr_title[] arr to get their titles | |
title_arr = [] | |
for i in original_arr_title: print(i) | |
for sentence in original_arr_title: | |
for line in strings_comparison_w_titles: | |
if sentence in line: | |
cleaned_string = re.search('"(.*)" = ', line) | |
if (cleaned_string is None): | |
title_arr.append("||||||||||||||||||||||||||||||||||||||||||||||||||| \n") | |
else: | |
cleaned_string = cleaned_string.group() | |
title_arr.append(cleaned_string) | |
#else: | |
#print(line + " xxxx " + sentence) | |
for i in title_arr: print(i) | |
def everythingToLowerCase(intput_filename, output_filename): | |
intput_file_r = open(intput_filename, 'r') | |
output_file_w = open(output_filename, 'w') | |
for line in intput_file_r: | |
output_file_w.write(line.lower()) | |
def stylizeXlsForDiff_1(intput_filename_1, intput_filename_2, output_filename, trailing = ""): | |
# this one is for adding "" to the string | |
intput_file_r_1 = open(intput_filename_1, 'r') | |
intput_file_r_2 = open(intput_filename_2, 'r') | |
output_file_w = open(output_filename, 'w') | |
for line in intput_file_r_1: | |
output_file_w.write('"') | |
output_file_w.write(line.strip().lower()) | |
output_file_w.write('"') | |
output_file_w.write(trailing) | |
output_file_w.write('"') | |
output_file_w.write(intput_file_r_2.readline().lower().strip()) | |
output_file_w.write('"') | |
output_file_w.write('\n') | |
def sortTxt(intput_filename, output_filename): | |
input_file_r = open(intput_filename, "r") | |
output_file_w = open(output_filename, "w") | |
array = [] | |
for line in input_file_r: | |
array.append(line) | |
array.sort() | |
for item in array: | |
output_file_w.write(item) | |
def extractAndroidStringFromIOS(intput_filename, input_keys, output_filename): | |
input_file_r = open(intput_filename, 'r') | |
input_keys_r = open(input_keys, 'r') | |
output_file_w = open(output_filename, 'w') | |
for line in input_file_r: | |
for keyItem in input_keys_r: | |
key = keyItem.strip() | |
if key in line: | |
output_file_w.write(line) | |
def stripAndAddTrailing(): | |
input_file_r = open("./android_string_sorted.txt", 'r') | |
output_file_w = open("./android_string_sorted_final.txt", 'w') | |
for line in input_file_r: | |
output_file_w.write(line.strip() + ";\n") | |
# Control | |
run_steps = [9] | |
for step in run_steps: | |
if (step is 1): | |
# step 1 - | |
# cleaned up iOS git Raw and sheets export | |
# sorted to compare titles for IDing unmathced strings | |
# text2.txt // source = sheets-export-ENGLISH | |
# iOS_git.txt // source = iOS git repo | |
clean_file_format_sorted("./text2.txt", "./text2_cleaned_all.txt") | |
clean_file_format_sorted("./iOS_git.txt", "./iOS_git_cleaned.txt") | |
# Above CHECKED: all TITLE match | |
if (step is 2): | |
# step 2 - | |
# compare the different in english strings | |
# we still uses text2.txt and iOS_git.txt | |
# we first sorted the file and output it | |
# text2.txt // source = sheets-export-ENGLISH | |
sort_strings("./text2.txt", "./text2_sorted.txt") | |
sort_strings("./iOS_git.txt", "./iOS_git_sorted.txt") | |
# Above SORTED: now can do diff | |
if (step is 3): | |
# step 3 - | |
# compare the titles of DE, FR, EN to ensure there is no differences | |
# or to identify differences | |
clean_file_format_sorted("./langs/english.txt", | |
"./langs/english_title_sort.txt") | |
clean_file_format_sorted( | |
"./langs/franch.txt", "./langs/franch_title_sort.txt") | |
clean_file_format_sorted( | |
"./langs/german.txt", "./langs/german_title_sort.txt") | |
# Above is SORTED TITLE export: now can do diff - CHECKS OUT | |
if (step is 4): | |
sort_strings("./langs/english.txt", "./langs/english_sort.txt") | |
sort_strings("./langs/franch.txt", "./langs/franch_sort.txt") | |
sort_strings("./langs/german.txt", "./langs/german_sort.txt") | |
group_different_langs_via_title( | |
"./langs/english_sort.txt", "./langs/franch_sort.txt", "./langs/german_sort.txt") | |
if (step is 5): | |
concatanate() | |
if (step is 6): | |
# set ios localize string output to lower case | |
everythingToLowerCase("./iOS_git_en.txt", "./iOS_git_en_lower_case.txt") | |
if (step is 7): | |
# create file that has android localize string to lower case for diff | |
# the params for this method is a file that linked to: | |
# first exprot android studio xml string res: to xlsx via http://convertcsv.com/xml-to-csv.htm | |
# put the keys to param 1 // just column 1 | |
# put the values to param 2 // just column 2 | |
# the method will concate these first 2 param to one single file "./Android_xml_key_stylized.txt", with " = " seperate key vals | |
stylizeXlsForDiff_1("./Android_xml_key.txt", "./Android_xml_content.txt", "./Android_xml_key_stylized.txt", " = ") | |
if (step is 8): | |
# sort the output k v pair of xml files to do comparisons | |
sortTxt("./Android_xml_key_stylized.txt", "./android_string_sorted.txt") | |
sortTxt("./iOS_git_en_lower_case.txt", "./iOS_string_sorted.txt") | |
# if (step is 9): # FAILED, the KEYS are different | |
# extractAndroidStringFromIOS("./iOS_string_sorted.txt", "./Android_xml_key.txt", "./iOS_string_matched_android.txt") | |
if (step is 9): | |
stripAndAddTrailing() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment