|
from pyconcepticon import Concepticon |
|
|
|
concepticon = Concepticon() # or Concepticon("path/to/concepticon") |
|
|
|
cl = { |
|
concept.number: concept |
|
for concept in concepticon.conceptlists["Vulic-2020-2244"].concepts.values() |
|
} |
|
|
|
msl = {} |
|
|
|
for concept in cl.values(): |
|
for (idx, link, eng, rus, eng_score, russ_score) in zip( |
|
concept.attributes["simlex_ids"], |
|
concept.attributes["links"], |
|
concept.attributes["english_in_source"], |
|
concept.attributes["russian_in_source"], |
|
concept.attributes["english_score"], |
|
concept.attributes["russian_score"], |
|
): |
|
msl[idx] = [ |
|
concept.concepticon_id or "", |
|
concept.concepticon_gloss or "", |
|
eng, |
|
rus, |
|
eng_score, |
|
russ_score, |
|
] |
|
|
|
pairs = [] |
|
|
|
with open("scores-russian.tsv", "w") as f: |
|
f.write( |
|
"\t".join( |
|
[ |
|
"ID", |
|
"CONCEPTICON_ID_1", |
|
"CONCEPTICON_GLOSS_1", |
|
"CONCEPTICON_ID_2", |
|
"CONCEPTICON_GLOSS_2", |
|
"ENGLISH_1", |
|
"ENGLISH_2", |
|
"RUSSIAN_1", |
|
"RUSSIAN_2", |
|
"ENGLISH_SCORE", |
|
"RUSSIAN_SCORE", |
|
] |
|
) |
|
+ "\n" |
|
) |
|
for i in range(1, 1889): |
|
cidA, cglA, engA, rusA, eng_scoreA, rus_scoreA = msl[str(i) + ":1"] |
|
cidB, cglB, engB, rusB, eng_scoreB, rus_scoreB = msl[str(i) + ":2"] |
|
assert rus_scoreA == rus_scoreB |
|
assert eng_scoreA == eng_scoreB |
|
f.write( |
|
"\t".join( |
|
[ |
|
str(i), |
|
cidA, |
|
cglA, |
|
cidB, |
|
cglB, |
|
engA, |
|
engB, |
|
rusA, |
|
rusB, |
|
"{0:.2f}".format(eng_scoreA), |
|
"{0:.2f}".format(rus_scoreA), |
|
] |
|
) |
|
+ "\n" |
|
) |