Created
April 14, 2023 14:45
-
-
Save blcarlson01/c0b748801e917d60e6e1ab964f2a4bd9 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
ujson : https://pypi.org/project/ujson/ | |
**** | |
__all__ = ("findall", "findall_list", "last_downloaded_timestamp", "replace", "replace_with_desc") | |
__version__ = "1.1.0" | |
import datetime | |
import functools | |
import logging | |
import os.path | |
import re | |
import sys | |
import json | |
logging.getLogger(__name__).addHandler(logging.NullHandler()) | |
# Download endpoint | |
EMOJI_VERSION = "13.1" | |
URL = f"https://unicode.org/Public/emoji/{EMOJI_VERSION}/emoji-test.txt" | |
# This variable is updated automatically from scripts/download_codes.py | |
_LDT = datetime.datetime(2021, 7, 18, 19, 57, 25, 20304, tzinfo=datetime.timezone.utc) | |
# Load codes from file and compile regex pattern | |
with open(os.path.join(os.path.dirname(__file__), "codes.json")) as f: | |
_CODE_TO_DESC = json.load(f) | |
_ESCAPED_CODES = sorted([re.escape(code) for code in _CODE_TO_DESC.keys()], key=len, reverse=True) | |
_EMOJI_PATTERN = re.compile("|".join(_ESCAPED_CODES)) | |
def findall(string): | |
"""Find emojis within ``string``. | |
:param string: The input text to search | |
:type string: str | |
:return: A dictionary of ``{emoji: description}`` | |
:rtype: dict | |
""" | |
return {f: _CODE_TO_DESC[f] for f in set(_EMOJI_PATTERN.findall(string))} | |
def findall_list(string, desc=True): | |
"""Find emojis within ``string``; return a list with possible duplicates. | |
:param string: The input text to search | |
:type string: str | |
:param desc: Whether to return the description rather than emoji | |
:type desc: bool | |
:return: A list of ``[description, ...]`` in the order in which they | |
are found. | |
:rtype: list | |
""" | |
if desc: | |
return [_CODE_TO_DESC[k] for k in _EMOJI_PATTERN.findall(string)] | |
else: | |
return _EMOJI_PATTERN.findall(string) | |
def replace(string, repl=""): | |
"""Replace emojis in ``string`` with ``repl``. | |
:param string: The input text to search | |
:type string: str | |
:return: Modified ``str`` with replacements made | |
:rtype: str | |
""" | |
return _EMOJI_PATTERN.sub(repl, string) | |
def replace_with_desc(string, sep=":"): | |
"""Replace emojis in ``string`` with their description. | |
Add a ``sep`` immediately before and after ``string``. | |
:param string: The input text to search | |
:type string: str | |
:param sep: String to put before and after the emoji description | |
:type sep: str | |
:return: New copy of ``string`` with replacements made and ``sep`` | |
immediately before and after each code | |
:rtype: str | |
""" | |
found = findall(string) | |
result = string | |
for emoji, desc in found.items(): | |
result = result.replace(emoji, sep + desc + sep) | |
return result | |
def set_emoji_pattern(): | |
global _EMOJI_PAT | |
global _CODE_TO_DESC | |
if _EMOJI_PAT is None: | |
codes = _load_codes_from_file() | |
_EMOJI_PAT = _compile_codes(codes) | |
_CODE_TO_DESC = dict((c, d) for d, clist in codes.items() for c in clist) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment