-
-
Save sudo-shubham/613a396919bf5e8931516193c8ec5f4b to your computer and use it in GitHub Desktop.
Numbers to Chinese representations converter in Python. 中文数字转换
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
# -*- coding: utf-8 -*- | |
# Licensed under WTFPL or the Unlicense or CC0. | |
# This uses Python 3, but it's easy to port to Python 2 by changing | |
# strings to u'xx'. | |
import itertools | |
def num2chinese(num, big=False, simp=True, o=False, twoalt=False): | |
""" | |
Converts numbers to Chinese representations. | |
`big` : use financial characters. | |
`simp` : use simplified characters instead of traditional characters. | |
`o` : use 〇 for zero. | |
`twoalt`: use 两/兩 for two when appropriate. | |
Note that `o` and `twoalt` is ignored when `big` is used, | |
and `twoalt` is ignored when `o` is used for formal representations. | |
""" | |
# check num first | |
nd = str(num) | |
if abs(float(nd)) >= 1e48: | |
raise ValueError('number out of range') | |
elif 'e' in nd: | |
raise ValueError('scientific notation is not supported') | |
c_symbol = '正负点' if simp else '正負點' | |
if o: # formal | |
twoalt = False | |
if big: | |
c_basic = '零壹贰叁肆伍陆柒捌玖' if simp else '零壹貳參肆伍陸柒捌玖' | |
c_unit1 = '拾佰仟' | |
c_twoalt = '贰' if simp else '貳' | |
else: | |
c_basic = '〇一二三四五六七八九' if o else '零一二三四五六七八九' | |
c_unit1 = '十百千' | |
if twoalt: | |
c_twoalt = '两' if simp else '兩' | |
else: | |
c_twoalt = '二' | |
c_unit2 = '万亿兆京垓秭穰沟涧正载' if simp else '萬億兆京垓秭穰溝澗正載' | |
revuniq = lambda l: ''.join(k for k, g in itertools.groupby(reversed(l))) | |
nd = str(num) | |
result = [] | |
if nd[0] == '+': | |
result.append(c_symbol[0]) | |
elif nd[0] == '-': | |
result.append(c_symbol[1]) | |
if '.' in nd: | |
integer, remainder = nd.lstrip('+-').split('.') | |
else: | |
integer, remainder = nd.lstrip('+-'), None | |
if int(integer): | |
splitted = [integer[max(i - 4, 0):i] | |
for i in range(len(integer), 0, -4)] | |
intresult = [] | |
for nu, unit in enumerate(splitted): | |
# special cases | |
if int(unit) == 0: # 0000 | |
intresult.append(c_basic[0]) | |
continue | |
elif nu > 0 and int(unit) == 2: # 0002 | |
intresult.append(c_twoalt + c_unit2[nu - 1]) | |
continue | |
ulist = [] | |
unit = unit.zfill(4) | |
for nc, ch in enumerate(reversed(unit)): | |
if ch == '0': | |
if ulist: # ???0 | |
ulist.append(c_basic[0]) | |
elif nc == 0: | |
ulist.append(c_basic[int(ch)]) | |
elif nc == 1 and ch == '1' and unit[1] == '0': | |
# special case for tens | |
# edit the 'elif' if you don't like | |
# 十四, 三千零十四, 三千三百一十四 | |
ulist.append(c_unit1[0]) | |
elif nc > 1 and ch == '2': | |
ulist.append(c_twoalt + c_unit1[nc - 1]) | |
else: | |
ulist.append(c_basic[int(ch)] + c_unit1[nc - 1]) | |
ustr = revuniq(ulist) | |
if nu == 0: | |
intresult.append(ustr) | |
else: | |
intresult.append(ustr + c_unit2[nu - 1]) | |
result.append(revuniq(intresult).strip(c_basic[0])) | |
else: | |
result.append(c_basic[0]) | |
if remainder: | |
result.append(c_symbol[2]) | |
result.append(''.join(c_basic[int(ch)] for ch in remainder)) | |
return ''.join(result) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment