Skip to content

Instantly share code, notes, and snippets.

@trungkak
Last active July 3, 2018 06:27
Show Gist options
  • Save trungkak/1a70c3727c31d1ccf213e659fcd91afb to your computer and use it in GitHub Desktop.
Save trungkak/1a70c3727c31d1ccf213e659fcd91afb to your computer and use it in GitHub Desktop.
def words_map(filepaths, start_pos, end_post, word_pos, delim=' '):
mapper = {}
for filepath in filepaths:
with open(filepath, 'r') as f:
for row in f:
row = row.split(delim)
if row[word_pos] not in mapper.keys():
mapper[row[word_pos]] = {}
if filepath not in mapper[row[word_pos]].keys():
mapper[row[word_pos]][filepath] = []
mapper[row[word_pos]][filepath].append({
'start': row[start_pos],
'end': row[end_post]
})
return mapper
print(words_map(['file1.txt', 'file2.txt'], 0, 1, -1))
INPUT:
file1.txt
0.0 1.0 THIS
1.1 1.3 IS
1.4 1.6 AMERICA
file2.txt
0.0 0.6 COLUMBUS
0.7 1.5 DISCOVERED
1.6 2.0 AMERICA
OUTPUT:
{
'THIS\n': {
'file1.txt': [{'start': '0.0', 'end': '1.0'}]
},
'IS\n': {
'file1.txt': [{'start': '1.1', 'end': '1.3'}]
},
'AMERICA\n': {
'file1.txt': [{'start': '1.4', 'end': '1.6'}],
'file2.txt': [{'start': '1.6', 'end': '2.0'}]
},
'COLUMBUS\n': {
'file2.txt': [{'start': '0.0', 'end': '0.6'}]
},
'DISCOVERED\n': {
'file2.txt': [{'start': '0.7', 'end': '1.5'}]
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment