Last active
February 13, 2022 21:18
-
-
Save yjyao/2ae3fdfecb47a2ee9981e0165241ddba to your computer and use it in GitHub Desktop.
arabic to chinese number conversion 数字中文读法生成器
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import bisect | |
import re | |
import unittest | |
zhdigits = '零一二三四五六七八九' | |
zhplaces = { | |
0: '' , | |
1: '十', | |
2: '百', | |
3: '千', | |
4: '万', | |
8: '亿', | |
} | |
zhplace_keys = sorted(zhplaces.keys()) | |
def numdigits(n): | |
return len(str(abs(n))) | |
# # An alternative to using regex for 二 vs 两. | |
# def _place_with_alttwo(amount, unit): | |
# if amount == 2 and unit in set('亿万千'): | |
# return f'两{unit}' | |
# return f'{_zhnum(amount)}{unit}' | |
def _zhnum(n): | |
if n < 10: | |
return zhdigits[n] | |
# Largest place length (number of trailing digits) with a name | |
# that fits `n`. | |
# Examples: | |
# n | LPL | name | |
# --- | --- | --- | |
# 100 | 2 | 百 | |
# 10_0000 | 4 | 万 | |
named_place_len = zhplace_keys[bisect.bisect_right(zhplace_keys, | |
numdigits(n)-1) - 1] | |
# Break `n` on the `named_place_len`. The final answer (without handling | |
# grammar exceptions like '十' instead of '一十') will generally be | |
# zhnum(left_part) + unit name at `named_place_len` + zhnum(right_part) | |
# additionally, if the `right_part` has leading zeros, a complimenting '零' | |
# should lead it. | |
# Examples: | |
# | |
# 208|0300 | |
# / \ | |
# / \ | |
# 2|08 万 零 3|00 | |
# / \ / \ | |
# 二 百 零 八 三 百 empty | |
left_part, right_part = n // 10**named_place_len, n % 10**named_place_len | |
return (_zhnum(left_part) + | |
zhplaces[named_place_len] + | |
((zhdigits[0] if numdigits(right_part) != named_place_len else '') + | |
_zhnum(right_part) | |
if right_part else '')) | |
def zhnum(n): | |
answer = ('负' if n < 0 else '') + _zhnum(abs(n)) | |
answer = re.sub(r'^一十', '十', answer) | |
answer = re.sub(r'(?<![零十])二(?=[千万亿])', r'两', answer) | |
return answer | |
class TestZhnum(unittest.TestCase): | |
def test(self): | |
self.assertEqual(zhnum(-1), '负一') | |
self.assertEqual(zhnum(0), '零') | |
self.assertEqual(zhnum(-0), '零') | |
self.assertEqual(zhnum(6), '六') | |
self.assertEqual(zhnum(10), '十') | |
self.assertEqual(zhnum(14), '十四') | |
self.assertEqual(zhnum(28), '二十八') | |
self.assertEqual(zhnum(59), '五十九') | |
self.assertEqual(zhnum(100), '一百') | |
self.assertEqual(zhnum(101), '一百零一') | |
self.assertEqual(zhnum(110), '一百一十') | |
self.assertEqual(zhnum(132), '一百三十二') | |
self.assertEqual(zhnum(1000), '一千') | |
self.assertEqual(zhnum(2001), '两千零一') | |
self.assertEqual(zhnum(3010), '三千零一十') | |
self.assertEqual(zhnum(4012), '四千零一十二') | |
self.assertEqual(zhnum(5230), '五千二百三十') | |
self.assertEqual(zhnum(6234), '六千二百三十四') | |
self.assertEqual(zhnum(9999), '九千九百九十九') | |
self.assertEqual(zhnum(1_0000), '一万') | |
self.assertEqual(zhnum(20_0000), '二十万') | |
self.assertEqual(zhnum(123_4567), '一百二十三万四千五百六十七') | |
self.assertEqual(zhnum(500_1024), '五百万一千零二十四') | |
self.assertEqual(zhnum(360_5000), '三百六十万五千') | |
self.assertEqual(zhnum(3_0000_0000), '三亿') | |
self.assertEqual(zhnum(2_7600_2010), '两亿七千六百万两千零一十') | |
self.assertEqual(zhnum(2_0000_0000_0000_0000), '两亿亿') | |
self.assertEqual(zhnum(2_2002_2222), '两亿两千零二万两千二百二十二') | |
self.assertEqual(zhnum(22_2222_2222), '二十二亿两千二百二十二万两千二百二十二') | |
if __name__ == '__main__': | |
unittest.main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment