Skip to content

Instantly share code, notes, and snippets.

@yanweijia
Created September 15, 2023 09:56
Show Gist options
  • Save yanweijia/bdcb40adbe8fc3c4ef0ee85c60b382dc to your computer and use it in GitHub Desktop.
Save yanweijia/bdcb40adbe8fc3c4ef0ee85c60b382dc to your computer and use it in GitHub Desktop.
敏感信息脱敏工具类
import re
def desensitize_text(input_text):
# 替换 手机/证件号/车架号(10到20位大小写字母+数字)
id_pattern = re.compile(r'([a-zA-Z0-9]{2})[a-zA-Z0-9]*([a-zA-Z0-9]{2})')
input_text = re.sub(id_pattern, r'\1****\2', input_text)
# 地址脱敏, 文本包含以下任意三个, 就对这几个字前面两个字符用*代替
addr_words = ["省","市","区","县", "镇","街", "号", "座", "楼","路"]
if sum(input_text.count(word) for word in addr_words) >= 3:
for word in addr_words:
addr_pattern = re.compile(r'(?<!\*\*)\w{2,}'+word)
matches = addr_pattern.search(input_text)
while None!=matches:
input_text = input_text[:matches.start()]+'**' + word +input_text[matches.end():]
matches = addr_pattern.search(input_text)
return input_text
# from faker import Faker
# fake = Faker('zh_CN')
# print(desensitize_text("我的手机号是 13812345678,身份证号是 320123198012345678,驾驶证号是 AB1234567890cdEF12345。"))
# print(desensitize_text("我的车架号:LJ1E6A2D1AG085640, 车辆订单号:8100045677713437, 车牌号是沪ABB2271"))
# print(desensitize_text("安徽省合肥市蜀山区秀康达城锦园 1 号楼101"))
# for i in range(5):
# print(desensitize_text(fake.address()))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment