Created
September 15, 2023 09:56
-
-
Save yanweijia/bdcb40adbe8fc3c4ef0ee85c60b382dc to your computer and use it in GitHub Desktop.
敏感信息脱敏工具类
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import re | |
def desensitize_text(input_text): | |
# 替换 手机/证件号/车架号(10到20位大小写字母+数字) | |
id_pattern = re.compile(r'([a-zA-Z0-9]{2})[a-zA-Z0-9]*([a-zA-Z0-9]{2})') | |
input_text = re.sub(id_pattern, r'\1****\2', input_text) | |
# 地址脱敏, 文本包含以下任意三个, 就对这几个字前面两个字符用*代替 | |
addr_words = ["省","市","区","县", "镇","街", "号", "座", "楼","路"] | |
if sum(input_text.count(word) for word in addr_words) >= 3: | |
for word in addr_words: | |
addr_pattern = re.compile(r'(?<!\*\*)\w{2,}'+word) | |
matches = addr_pattern.search(input_text) | |
while None!=matches: | |
input_text = input_text[:matches.start()]+'**' + word +input_text[matches.end():] | |
matches = addr_pattern.search(input_text) | |
return input_text | |
# from faker import Faker | |
# fake = Faker('zh_CN') | |
# print(desensitize_text("我的手机号是 13812345678,身份证号是 320123198012345678,驾驶证号是 AB1234567890cdEF12345。")) | |
# print(desensitize_text("我的车架号:LJ1E6A2D1AG085640, 车辆订单号:8100045677713437, 车牌号是沪ABB2271")) | |
# print(desensitize_text("安徽省合肥市蜀山区秀康达城锦园 1 号楼101")) | |
# for i in range(5): | |
# print(desensitize_text(fake.address())) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment