Skip to content

Instantly share code, notes, and snippets.

@ClarkeRemy
Last active November 7, 2025 06:48
Show Gist options
  • Save ClarkeRemy/7a0abcc3d42d364c630b6c12b4819bf2 to your computer and use it in GitHub Desktop.
Save ClarkeRemy/7a0abcc3d42d364c630b6c12b4819bf2 to your computer and use it in GitHub Desktop.
JSON object to indicies format
use std::{fmt::Debug, rc::Rc};
const THE_JSON : &str = r#"{
"first_name": "John",
"last_name": "Smith",
"is_alive": true,
"age": 27,
"address": {
"street_address": "21 2nd Street",
"city": "New York",
"state": "NY",
"postal_code": "10021-3100"
},
"phone_numbers": [
{
"type": "home",
"number": "212 555-1234"
},
{
"type": "office",
"number": "646 555-4567"
}
],
"children": ["Catherine", "Thomas", "Trevor"],
"spouse": null
}"#;
/// ```ignore
/// integer
/// digit
/// onenine digits
/// '-' digit
/// '-' onenine digits
/// ```
fn leading_signed_integer_length(i : &str) -> usize {
let bytes = i.as_bytes();
let cursor = if let [b'-', .. ] = bytes {1} else {0};
cursor+leading_digits_length(&i[cursor..])
}
/// ```ignore
/// fraction
/// ""
/// '.' digits
/// ```
fn leading_fraction_length(f : &str) -> usize{
if let [ b'.', frac @ .. ] = f.as_bytes()
&& let [b'0'..=b'9', ..] = frac{
1+leading_digits_length(&f[1..])
} else { 0 }
}
/// ```ignore
/// exponent
/// ""
/// 'E' sign digits
/// 'e' sign digits
/// ```
fn leading_exponent_length(e : &str) -> usize {
match e.as_bytes() {
[b'e'|b'E', b'+' | b'*', b'0'..=b'9', ..] => 2+leading_digits_length(&e[2..]),
[b'e'|b'E', b'0'..=b'9', ..] => 1+leading_digits_length(&e[1..]),
_ => 0,
}
}
/// ```ignore
/// string
/// '"' characters '"'
///
/// characters
/// ""
/// character characters
///
/// character
/// '0020' . '10FFFF' - '"' - '\'
/// '\' escape
///
/// ```
fn parse_escaped_string(s : &str) -> Result<(&str, &str), &str> {
match parse_ascii(s) {
Some((b'"', rest0)) => {
let mut state = rest0;
loop {
let i = leading_chars_length(state, |c| {
matches!(c, '\u{0020}'..='\u{10FFFF}') && !matches!(c, '"'|'\\')
});
match parse_ascii(&state[i..]) {
Some((b'\\', rest1)) => (_, state) = parse_escape(rest1)?,
Some((b'"', rest1)) => return Ok((&s[..s.len()-rest1.len()],rest1)),
_ => return Err(&state[i..]),
}
}
}
_ => return Err(s)
}
// WHERE :
/// ```ignore
/// escape
/// '"'
/// '\'
/// '/'
/// 'b'
/// 'f'
/// 'n'
/// 'r'
/// 't'
/// 'u' hex hex hex hex
///
/// hex
/// digit
/// 'A' . 'F'
/// 'a' . 'f'
/// ```
fn parse_escape(e : &str) -> Result<(&str, &str), &str> {
match parse_ascii(e) {
Some((b'"'|b'\\'|b'/'|b'f'|b'n'|b'r'|b't', rest)) => Ok((&e[..rest.len()], rest)),
Some((b'u', rest)) => {
// hex
let mut loop_state = rest;
'hex : for _ in 0..4 {
if let Some((h, rest1)) = parse_ascii(loop_state) && !h.is_ascii_hexdigit() {
loop_state = rest1;
continue 'hex;
}
return Err(loop_state);
}
Ok((&e[..loop_state.len()], loop_state))
},
_ => return Err(e),
}
}
}
/// ```ignore
/// digits
/// digit
/// digit digits
///
/// digit
/// '0'
/// onenine
///
/// onenine
/// '1' . '9'
/// ```
fn leading_digits_length(s : &str) -> usize {
leading_chars_length(s, |c|c.is_ascii_digit())
}
fn leading_chars_length(s : &str, mut pred : impl FnMut(char)->bool) -> usize {
let mut index = 0;
for (i, c) in str::char_indices(&s) {
if pred(c) { index = i+c.len_utf8() ; continue; }
break
}
index
}
fn leading_whitespace_length(s : &str) -> usize {
leading_chars_length(s, char::is_whitespace)
}
fn parse_ascii(s : &str) -> Option<(u8, &str)> {
if let [b, rest @ ..] = s.as_bytes() && b.is_ascii() {
Some( (*b, unsafe { str::from_utf8_unchecked(rest) }) )
} else {
None
}
}
fn next_token<'a>(src : &'a str) -> Result<(JsonToken<'a>, &'a str), &'a str> {
fn stripped_whitespace(s : &str) -> &str { &s[leading_whitespace_length(s)..]}
fn leading_ascii(s : &str) -> Option<(u8, &str)> {parse_ascii(stripped_whitespace(s))}
let stripped_whitespace = stripped_whitespace(src);
// element
// ws value ws
//
// value
// object
// array
// string
// number
// "true"
// "false"
// "null"
match parse_ascii(stripped_whitespace) {
None => Err(""),
Some((b, rest)) => {
match b {
// object
// '{' ws '}'
// '{' members '}'
b'{' => {
match leading_ascii(rest) {
None => Err(src),
Some((b'}', rest_)) => Ok((JsonToken::EmptyObject, rest_)),
_ => Ok((JsonToken::OpenObject, rest ))
}
}
b'}' => {
Ok((JsonToken::CloseObject, rest))
}
// array
// '[' ws ']'
// '[' elements ']'
b'[' => {
match leading_ascii(src) {
None => Err(src),
Some((b']', rest_)) => Ok((JsonToken::EmptyArray , rest_)),
_ => Ok((JsonToken::OpenArray(0), rest ))
}
}
b']' => {
Ok((JsonToken::CloseArray, rest))
}
// number
// integer fraction exponent
b'0'..=b'9' => {
let int_end = leading_signed_integer_length(rest);
let frc_end = int_end+leading_fraction_length(&rest[int_end..]);
let exp_end = frc_end+leading_exponent_length(&rest[frc_end..]);
Ok((JsonToken::Number(&rest[..exp_end]), &rest[exp_end..]))
}
// string
// '"' characters '"'
b'"' => {
let (string, rest1) = parse_escaped_string(stripped_whitespace)?;
Ok((JsonToken::String(string), rest1))
}
// elements
// element
// element ',' elements
b',' => {
Ok((JsonToken::Comma, rest))
}
// member
// ws string ws ':' element
b':' => {
Ok((JsonToken::Colon, rest))
}
b't' | b'f' | b'n' => {
match stripped_whitespace.split_at(4) {
("true", rest1) => Ok((JsonToken::True, rest1)),
("null", rest1) => Ok((JsonToken::Null, rest1)),
_ => match src.split_at(5) {
("false", rest1) => Ok((JsonToken::False, rest1)),
_=> Err(src)
},
}
}
_ => Err(src)
}
}
}
}
#[derive(Clone)]
struct JsonParser<'a>{
stack : LList<JsonToken<'a>>,
src : &'a str,
}
macro_rules! base_cases {() => {
JsonToken::Null
| JsonToken::True
| JsonToken::False
| JsonToken::Number(_)
| JsonToken::String(_)
| JsonToken::EmptyObject
| JsonToken::EmptyArray
};}
impl<'a> JsonParser<'a> {
const fn new(src : &'a str) -> Self { JsonParser{ stack: LList::nil(), src } }
fn write_next_path(self)->Result<(String, Self), &'a str> {
let next = self.advance_to_next_base_case()?;
let mut stack = next.stack.top();
let mut reversed = LList::<&JsonToken>::nil();
loop {
(stack, reversed) = match stack {
Some((JsonToken::Colon,l)) => (l.top(), reversed),
Some((t,l)) => (l.top(), LList::cons(t, reversed)),
None => break ,
}
}
// we used a linked list representation to be more "functional, but it means we need to reverse it"
let mut rev_cursor = reversed.top();
let mut string = String::new();
string.push_str("JSON");
loop {
(rev_cursor) = match rev_cursor {
Some((t,l)) => {
match t {
JsonToken::OpenObject => {},
JsonToken::OpenArray(i) => string.push_str(&format!( "[{i}]")),
JsonToken::Member(member) => string.push_str(&format!( "[{member}]")),
// base cases
JsonToken::Null => string.push_str(" = null"),
JsonToken::True => string.push_str(" = true"),
JsonToken::False => string.push_str(" = false"),
JsonToken::String(s)
| JsonToken::Number(s) => string.push_str(&format!(" = {s}" )),
JsonToken::EmptyObject => string.push_str(" = {}"),
JsonToken::EmptyArray => string.push_str(" = []"),
// the following should have been filtered out
JsonToken::Colon
| JsonToken::Comma
| JsonToken::CloseObject
| JsonToken::CloseArray => unreachable!(),
}
l.top()
},
None => {string.push(';'); break Ok((string, next))},
}
}
}
fn advance_to_next_base_case(self) -> Result<Self, &'a str> {
let mut state = self;
loop {
state = match state.advance_token() {
Err(e) => return Err(e),
Ok(p) => {
match p.stack.top() {
None => return Ok(JsonParser { stack : LList(None), src : p.src}),
Some((t, _)) => {
if let base_cases!() = t {
return Ok(p);
} else {
p
}
},
}
},
}
}
}
fn advance_token(self) -> Result<Self, &'a str> {
macro_rules! closed {() => {
JsonToken::CloseArray | JsonToken::CloseObject
};}
let (token, next) = next_token(self.src)?;
// println!("{:?}", token);
match token {
JsonToken::Member(_) => unreachable!("tokenizer never creates members, ony strings"),
JsonToken::String(s) => if let Some((JsonToken::OpenObject, _)) = self.stack.top() {
Ok(JsonParser { stack: LList::cons(JsonToken::Member(s), self.stack), src: next })
} else {
Ok(JsonParser { stack: LList::cons(token, self.stack), src: next })
}
| JsonToken::Null
| JsonToken::True
| JsonToken::False
| JsonToken::Number(_)
| JsonToken::EmptyObject
| JsonToken::EmptyArray
| JsonToken::OpenObject
| JsonToken::OpenArray(_) => Ok(JsonParser { stack: LList::cons(token, self.stack), src: next }),
JsonToken::CloseObject => {
if let Some((base_cases!()|closed!(), s0)) = self.stack.top()
&& let Some((JsonToken::Colon, s1)) = s0.top()
&& let Some((JsonToken::Member(_), s2)) = s1.top()
&& let Some((JsonToken::OpenObject, s3)) = s2.top() {
Ok(JsonParser{stack : LList::cons(JsonToken::CloseObject, s3.clone()), src : next})
} else {
Err(next)
}
},
JsonToken::CloseArray => {
if let Some((base_cases!()|closed!(), s0)) = self.stack.top()
&& let Some((JsonToken::OpenArray(_), s2)) = s0.top() {
Ok(JsonParser{stack : LList::cons(JsonToken::CloseArray,s2.clone()), src : next})
} else {
Err(next)
}
},
JsonToken::Colon =>
match self.stack.top() {
Some((JsonToken::Member(_), _)) => Ok(JsonParser{ stack: LList::cons(token, self.stack), src: next }),
_ => Err(next),
},
JsonToken::Comma => {
if let Some((base_cases!()|closed!(), s0)) = self.stack.top() {
match s0.top() {
Some((JsonToken::OpenArray(i), s1)) => Ok(JsonParser{stack: LList::cons(JsonToken::OpenArray(i+1), s1.clone()), src : next}),
Some((JsonToken::Colon, s1)) => {
if let Some((JsonToken::Member(_), s2)) = s1.top()
&& let Some((JsonToken::OpenObject, _)) = s2.top() {
Ok(JsonParser{stack: s2.clone(), src: next})
} else {
Err(next)
}
}
_ => Err(next),
}
} else {
Err(next)
}
}
}
}
}
impl<'a> Debug for JsonParser<'a> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "JsonParser{{ stack : {:?},\t\tsrc : ...{:?}... }}", self.stack, &self.src[0..30.min(self.src.len())])
}
}
#[derive(Clone)]
struct LList<T>(Option<Rc<(T,LList<T>)>>);
impl<T> LList<T> {
fn cons(t : T, l : LList<T>) -> Self { LList(Some(Rc::new((t,l))))}
const fn nil() -> Self { LList(None) }
fn top(&self) -> Option<&(T, LList<T>)> {
match self {
LList(None) => None,
LList(Some(rc)) => Some(&**rc),
}
}
}
impl<T:Debug> Debug for LList<T> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let mut cursor = self;
write!(f, "LList([",)?;
loop {
cursor = match cursor {
LList(None) => return write!(f,"])"),
LList(Some(rc)) => {
let (t,next @ LList(l)) = &**rc;
write!(f, "{:?}", t)?;
match l {
None => return write!(f,"])"),
Some(_) => {write!(f, ", ")?; next},
}
}
}
}
}
}
#[derive(Debug,Clone)]
enum JsonToken<'a> {
OpenObject,
CloseObject,
OpenArray(usize),
CloseArray,
Colon,
Comma,
Member(&'a str),
Null,
True,
False,
Number(&'a str),
String(&'a str),
EmptyObject,
EmptyArray,
}
fn main() {
let mut src = THE_JSON;
while let Ok((token, cont)) = next_token(src) {
// println!("{:?}\n\t\t\t{:?}...", token, &cont[..cont.len().min(20) ]);
src = cont
}
let mut parser = JsonParser::new(THE_JSON);
loop {
match parser.write_next_path() {
Ok((s, p)) => {
println!("{}", s);
parser = p
},
e @ Err(s) => {
if s.is_empty() {
println!("DONE");
} else {
println!("ERROR : {:?}", e);
}
break;
},
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment