Skip to content

Instantly share code, notes, and snippets.

@frostming
Last active December 13, 2024 09:32
Show Gist options
  • Save frostming/6a275875722191cdaf6b77ea50df9d27 to your computer and use it in GitHub Desktop.
Save frostming/6a275875722191cdaf6b77ea50df9d27 to your computer and use it in GitHub Desktop.
Python JSON Parser
from __future__ import annotations
import re
from typing import Any, Never
class JSONParseError(Exception):
pass
ESCAPE_CHARS = {
'"': '"',
"\\": "\\",
"b": "\b",
"f": "\f",
"n": "\n",
"r": "\r",
"t": "\t",
}
class JSONParser:
def __init__(self, text: str) -> None:
self.text = text
self.pos = 0
def _expect(self, text: str) -> bool:
if self.text[self.pos : self.pos + len(text)] == text:
self.pos += len(text)
return True
return False
def _expect_re(self, pattern: str) -> str | None:
match = re.compile(pattern).match(self.text, self.pos)
if match:
self.pos = match.end()
return match.group()
return None
def _parse_error(self, message: str = "", pos: int | None = None) -> Never:
if pos is None:
pos = self.pos
if not message:
message = f"Unexpected character '{self.text[pos]}'"
context = f"{self.text[pos-10:pos+5]}\n{' ' * min(10, pos)}^\n"
raise JSONParseError(f"at pos {pos}: {message}\n{context}")
def _skip_whitespace(self) -> None:
while self.pos < len(self.text) and self.text[self.pos].isspace():
self.pos += 1
def _parse_bool(self) -> bool:
if self._expect("true"):
return True
if self._expect("false"):
return False
self._parse_error()
def _parse_null(self) -> None:
if self._expect("null"):
return
self._parse_error()
def _parse_number(self) -> int | float:
pattern = r"-?(0|[1-9][0-9]*)(\.[0-9]+)?([eE][+-]?[0-9]+)?"
number = self._expect_re(pattern)
if number is None:
self._parse_error()
return int(number) if number.isdigit() else float(number)
def _parse_string(self) -> str:
pos = self.pos
text = self.text
if text[pos] != '"':
self._parse_error()
pos += 1
buffer = ""
while pos < len(text):
if text[pos] == "\\":
pos += 1
for escape_char in ESCAPE_CHARS:
if text[pos] == escape_char:
buffer += ESCAPE_CHARS[escape_char]
pos += 1
break
else:
if text[pos] == "u":
pos += 1
if pos + 4 >= len(text):
self._parse_error(
f"Truncated unicode escape sequence {text[pos - 2:pos + 4]}",
pos,
)
else:
try:
buffer += chr(int(text[pos : pos + 4], 16))
pos += 4
except ValueError:
self._parse_error(
f"Invalid unicode escape sequence {text[pos - 2:pos + 4]}",
pos,
)
else:
self._parse_error("Invalid escape sequence", pos)
elif text[pos] == '"':
pos += 1
break
else:
buffer += text[pos]
pos += 1
else:
self._parse_error("Unterminated string", pos)
self.pos = pos
return buffer
def _parse_array(self) -> list[Any]:
if not self._expect("["):
self._parse_error()
elements: list[Any] = []
last_comma = False
while self.pos < len(self.text):
self._skip_whitespace()
if self._expect("]"):
if last_comma:
self._parse_error(pos=self.pos - 1)
return elements
if elements and not last_comma:
self._parse_error()
elements.append(self.parse_value())
self._skip_whitespace()
last_comma = self._expect(",")
self._parse_error("Unclosed array")
def _parse_object(self) -> dict[str, Any]:
if not self._expect("{"):
self._parse_error()
obj: dict[str, Any] = {}
last_comma = False
while self.pos < len(self.text):
self._skip_whitespace()
if self._expect("}"):
if last_comma:
self._parse_error(pos=self.pos - 1)
return obj
if obj and not last_comma:
self._parse_error()
key = self._parse_string()
self._skip_whitespace()
if not self._expect(":"):
self._parse_error()
self._skip_whitespace()
obj[key] = self.parse_value()
self._skip_whitespace()
last_comma = self._expect(",")
self._parse_error("Unclosed object")
def parse_value(self) -> Any:
self._skip_whitespace()
if self.pos >= len(self.text):
self._parse_error("Unexpected end of input")
c = self.text[self.pos]
if c == '"':
return self._parse_string()
elif c == "[":
return self._parse_array()
elif c == "{":
return self._parse_object()
elif c in "tf":
return self._parse_bool()
elif c == "n":
self._parse_null()
return None
else:
return self._parse_number()
def loads(text: str) -> Any:
return JSONParser(text).parse_value()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment