Created
December 18, 2017 11:48
-
-
Save fvicente/91d7331b3a33f4e9d156253cf993f9ab to your computer and use it in GitHub Desktop.
Dictionary of most used language by country based on https://stackoverflow.com/a/22202770/2962940
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# -*- coding: utf-8 -*- | |
import xml.etree.ElementTree as etree | |
import urllib | |
import pprint | |
def get_most_used_language_by_territory(): | |
langxml = urllib.urlopen('http://unicode.org/repos/cldr/trunk/common/supplemental/supplementalData.xml') | |
langtree = etree.XML(langxml.read()) | |
langs = {} | |
for t in langtree.find('territoryInfo').findall('territory'): | |
code = t.get('type') | |
# Seems like the languages are conveniently ordered by population percent, so the first one | |
# on the list will be the most used. | |
# We don't even need to look at the 'official' status, just get what is better for the user | |
lang_pop = t.findall('languagePopulation') | |
langs[code] = (lang_pop[0] if len(lang_pop) > 0 else {}).get('type', 'en') | |
return langs | |
def main(): | |
langs = get_most_used_language_by_territory() | |
pp = pprint.PrettyPrinter(indent=4) | |
pp.pprint(langs) | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment