Last active
December 13, 2024 09:32
-
-
Save frostming/6a275875722191cdaf6b77ea50df9d27 to your computer and use it in GitHub Desktop.
Python JSON Parser
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from __future__ import annotations | |
import re | |
from typing import Any, Never | |
class JSONParseError(Exception): | |
pass | |
ESCAPE_CHARS = { | |
'"': '"', | |
"\\": "\\", | |
"b": "\b", | |
"f": "\f", | |
"n": "\n", | |
"r": "\r", | |
"t": "\t", | |
} | |
class JSONParser: | |
def __init__(self, text: str) -> None: | |
self.text = text | |
self.pos = 0 | |
def _expect(self, text: str) -> bool: | |
if self.text[self.pos : self.pos + len(text)] == text: | |
self.pos += len(text) | |
return True | |
return False | |
def _expect_re(self, pattern: str) -> str | None: | |
match = re.compile(pattern).match(self.text, self.pos) | |
if match: | |
self.pos = match.end() | |
return match.group() | |
return None | |
def _parse_error(self, message: str = "", pos: int | None = None) -> Never: | |
if pos is None: | |
pos = self.pos | |
if not message: | |
message = f"Unexpected character '{self.text[pos]}'" | |
context = f"{self.text[pos-10:pos+5]}\n{' ' * min(10, pos)}^\n" | |
raise JSONParseError(f"at pos {pos}: {message}\n{context}") | |
def _skip_whitespace(self) -> None: | |
while self.pos < len(self.text) and self.text[self.pos].isspace(): | |
self.pos += 1 | |
def _parse_bool(self) -> bool: | |
if self._expect("true"): | |
return True | |
if self._expect("false"): | |
return False | |
self._parse_error() | |
def _parse_null(self) -> None: | |
if self._expect("null"): | |
return | |
self._parse_error() | |
def _parse_number(self) -> int | float: | |
pattern = r"-?(0|[1-9][0-9]*)(\.[0-9]+)?([eE][+-]?[0-9]+)?" | |
number = self._expect_re(pattern) | |
if number is None: | |
self._parse_error() | |
return int(number) if number.isdigit() else float(number) | |
def _parse_string(self) -> str: | |
pos = self.pos | |
text = self.text | |
if text[pos] != '"': | |
self._parse_error() | |
pos += 1 | |
buffer = "" | |
while pos < len(text): | |
if text[pos] == "\\": | |
pos += 1 | |
for escape_char in ESCAPE_CHARS: | |
if text[pos] == escape_char: | |
buffer += ESCAPE_CHARS[escape_char] | |
pos += 1 | |
break | |
else: | |
if text[pos] == "u": | |
pos += 1 | |
if pos + 4 >= len(text): | |
self._parse_error( | |
f"Truncated unicode escape sequence {text[pos - 2:pos + 4]}", | |
pos, | |
) | |
else: | |
try: | |
buffer += chr(int(text[pos : pos + 4], 16)) | |
pos += 4 | |
except ValueError: | |
self._parse_error( | |
f"Invalid unicode escape sequence {text[pos - 2:pos + 4]}", | |
pos, | |
) | |
else: | |
self._parse_error("Invalid escape sequence", pos) | |
elif text[pos] == '"': | |
pos += 1 | |
break | |
else: | |
buffer += text[pos] | |
pos += 1 | |
else: | |
self._parse_error("Unterminated string", pos) | |
self.pos = pos | |
return buffer | |
def _parse_array(self) -> list[Any]: | |
if not self._expect("["): | |
self._parse_error() | |
elements: list[Any] = [] | |
last_comma = False | |
while self.pos < len(self.text): | |
self._skip_whitespace() | |
if self._expect("]"): | |
if last_comma: | |
self._parse_error(pos=self.pos - 1) | |
return elements | |
if elements and not last_comma: | |
self._parse_error() | |
elements.append(self.parse_value()) | |
self._skip_whitespace() | |
last_comma = self._expect(",") | |
self._parse_error("Unclosed array") | |
def _parse_object(self) -> dict[str, Any]: | |
if not self._expect("{"): | |
self._parse_error() | |
obj: dict[str, Any] = {} | |
last_comma = False | |
while self.pos < len(self.text): | |
self._skip_whitespace() | |
if self._expect("}"): | |
if last_comma: | |
self._parse_error(pos=self.pos - 1) | |
return obj | |
if obj and not last_comma: | |
self._parse_error() | |
key = self._parse_string() | |
self._skip_whitespace() | |
if not self._expect(":"): | |
self._parse_error() | |
self._skip_whitespace() | |
obj[key] = self.parse_value() | |
self._skip_whitespace() | |
last_comma = self._expect(",") | |
self._parse_error("Unclosed object") | |
def parse_value(self) -> Any: | |
self._skip_whitespace() | |
if self.pos >= len(self.text): | |
self._parse_error("Unexpected end of input") | |
c = self.text[self.pos] | |
if c == '"': | |
return self._parse_string() | |
elif c == "[": | |
return self._parse_array() | |
elif c == "{": | |
return self._parse_object() | |
elif c in "tf": | |
return self._parse_bool() | |
elif c == "n": | |
self._parse_null() | |
return None | |
else: | |
return self._parse_number() | |
def loads(text: str) -> Any: | |
return JSONParser(text).parse_value() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment