Last active
November 7, 2025 06:48
-
-
Save ClarkeRemy/7a0abcc3d42d364c630b6c12b4819bf2 to your computer and use it in GitHub Desktop.
JSON object to indicies format
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| use std::{fmt::Debug, rc::Rc}; | |
| const THE_JSON : &str = r#"{ | |
| "first_name": "John", | |
| "last_name": "Smith", | |
| "is_alive": true, | |
| "age": 27, | |
| "address": { | |
| "street_address": "21 2nd Street", | |
| "city": "New York", | |
| "state": "NY", | |
| "postal_code": "10021-3100" | |
| }, | |
| "phone_numbers": [ | |
| { | |
| "type": "home", | |
| "number": "212 555-1234" | |
| }, | |
| { | |
| "type": "office", | |
| "number": "646 555-4567" | |
| } | |
| ], | |
| "children": ["Catherine", "Thomas", "Trevor"], | |
| "spouse": null | |
| }"#; | |
| /// ```ignore | |
| /// integer | |
| /// digit | |
| /// onenine digits | |
| /// '-' digit | |
| /// '-' onenine digits | |
| /// ``` | |
| fn leading_signed_integer_length(i : &str) -> usize { | |
| let bytes = i.as_bytes(); | |
| let cursor = if let [b'-', .. ] = bytes {1} else {0}; | |
| cursor+leading_digits_length(&i[cursor..]) | |
| } | |
| /// ```ignore | |
| /// fraction | |
| /// "" | |
| /// '.' digits | |
| /// ``` | |
| fn leading_fraction_length(f : &str) -> usize{ | |
| if let [ b'.', frac @ .. ] = f.as_bytes() | |
| && let [b'0'..=b'9', ..] = frac{ | |
| 1+leading_digits_length(&f[1..]) | |
| } else { 0 } | |
| } | |
| /// ```ignore | |
| /// exponent | |
| /// "" | |
| /// 'E' sign digits | |
| /// 'e' sign digits | |
| /// ``` | |
| fn leading_exponent_length(e : &str) -> usize { | |
| match e.as_bytes() { | |
| [b'e'|b'E', b'+' | b'*', b'0'..=b'9', ..] => 2+leading_digits_length(&e[2..]), | |
| [b'e'|b'E', b'0'..=b'9', ..] => 1+leading_digits_length(&e[1..]), | |
| _ => 0, | |
| } | |
| } | |
| /// ```ignore | |
| /// string | |
| /// '"' characters '"' | |
| /// | |
| /// characters | |
| /// "" | |
| /// character characters | |
| /// | |
| /// character | |
| /// '0020' . '10FFFF' - '"' - '\' | |
| /// '\' escape | |
| /// | |
| /// ``` | |
| fn parse_escaped_string(s : &str) -> Result<(&str, &str), &str> { | |
| match parse_ascii(s) { | |
| Some((b'"', rest0)) => { | |
| let mut state = rest0; | |
| loop { | |
| let i = leading_chars_length(state, |c| { | |
| matches!(c, '\u{0020}'..='\u{10FFFF}') && !matches!(c, '"'|'\\') | |
| }); | |
| match parse_ascii(&state[i..]) { | |
| Some((b'\\', rest1)) => (_, state) = parse_escape(rest1)?, | |
| Some((b'"', rest1)) => return Ok((&s[..s.len()-rest1.len()],rest1)), | |
| _ => return Err(&state[i..]), | |
| } | |
| } | |
| } | |
| _ => return Err(s) | |
| } | |
| // WHERE : | |
| /// ```ignore | |
| /// escape | |
| /// '"' | |
| /// '\' | |
| /// '/' | |
| /// 'b' | |
| /// 'f' | |
| /// 'n' | |
| /// 'r' | |
| /// 't' | |
| /// 'u' hex hex hex hex | |
| /// | |
| /// hex | |
| /// digit | |
| /// 'A' . 'F' | |
| /// 'a' . 'f' | |
| /// ``` | |
| fn parse_escape(e : &str) -> Result<(&str, &str), &str> { | |
| match parse_ascii(e) { | |
| Some((b'"'|b'\\'|b'/'|b'f'|b'n'|b'r'|b't', rest)) => Ok((&e[..rest.len()], rest)), | |
| Some((b'u', rest)) => { | |
| // hex | |
| let mut loop_state = rest; | |
| 'hex : for _ in 0..4 { | |
| if let Some((h, rest1)) = parse_ascii(loop_state) && !h.is_ascii_hexdigit() { | |
| loop_state = rest1; | |
| continue 'hex; | |
| } | |
| return Err(loop_state); | |
| } | |
| Ok((&e[..loop_state.len()], loop_state)) | |
| }, | |
| _ => return Err(e), | |
| } | |
| } | |
| } | |
| /// ```ignore | |
| /// digits | |
| /// digit | |
| /// digit digits | |
| /// | |
| /// digit | |
| /// '0' | |
| /// onenine | |
| /// | |
| /// onenine | |
| /// '1' . '9' | |
| /// ``` | |
| fn leading_digits_length(s : &str) -> usize { | |
| leading_chars_length(s, |c|c.is_ascii_digit()) | |
| } | |
| fn leading_chars_length(s : &str, mut pred : impl FnMut(char)->bool) -> usize { | |
| let mut index = 0; | |
| for (i, c) in str::char_indices(&s) { | |
| if pred(c) { index = i+c.len_utf8() ; continue; } | |
| break | |
| } | |
| index | |
| } | |
| fn leading_whitespace_length(s : &str) -> usize { | |
| leading_chars_length(s, char::is_whitespace) | |
| } | |
| fn parse_ascii(s : &str) -> Option<(u8, &str)> { | |
| if let [b, rest @ ..] = s.as_bytes() && b.is_ascii() { | |
| Some( (*b, unsafe { str::from_utf8_unchecked(rest) }) ) | |
| } else { | |
| None | |
| } | |
| } | |
| fn next_token<'a>(src : &'a str) -> Result<(JsonToken<'a>, &'a str), &'a str> { | |
| fn stripped_whitespace(s : &str) -> &str { &s[leading_whitespace_length(s)..]} | |
| fn leading_ascii(s : &str) -> Option<(u8, &str)> {parse_ascii(stripped_whitespace(s))} | |
| let stripped_whitespace = stripped_whitespace(src); | |
| // element | |
| // ws value ws | |
| // | |
| // value | |
| // object | |
| // array | |
| // string | |
| // number | |
| // "true" | |
| // "false" | |
| // "null" | |
| match parse_ascii(stripped_whitespace) { | |
| None => Err(""), | |
| Some((b, rest)) => { | |
| match b { | |
| // object | |
| // '{' ws '}' | |
| // '{' members '}' | |
| b'{' => { | |
| match leading_ascii(rest) { | |
| None => Err(src), | |
| Some((b'}', rest_)) => Ok((JsonToken::EmptyObject, rest_)), | |
| _ => Ok((JsonToken::OpenObject, rest )) | |
| } | |
| } | |
| b'}' => { | |
| Ok((JsonToken::CloseObject, rest)) | |
| } | |
| // array | |
| // '[' ws ']' | |
| // '[' elements ']' | |
| b'[' => { | |
| match leading_ascii(src) { | |
| None => Err(src), | |
| Some((b']', rest_)) => Ok((JsonToken::EmptyArray , rest_)), | |
| _ => Ok((JsonToken::OpenArray(0), rest )) | |
| } | |
| } | |
| b']' => { | |
| Ok((JsonToken::CloseArray, rest)) | |
| } | |
| // number | |
| // integer fraction exponent | |
| b'0'..=b'9' => { | |
| let int_end = leading_signed_integer_length(rest); | |
| let frc_end = int_end+leading_fraction_length(&rest[int_end..]); | |
| let exp_end = frc_end+leading_exponent_length(&rest[frc_end..]); | |
| Ok((JsonToken::Number(&rest[..exp_end]), &rest[exp_end..])) | |
| } | |
| // string | |
| // '"' characters '"' | |
| b'"' => { | |
| let (string, rest1) = parse_escaped_string(stripped_whitespace)?; | |
| Ok((JsonToken::String(string), rest1)) | |
| } | |
| // elements | |
| // element | |
| // element ',' elements | |
| b',' => { | |
| Ok((JsonToken::Comma, rest)) | |
| } | |
| // member | |
| // ws string ws ':' element | |
| b':' => { | |
| Ok((JsonToken::Colon, rest)) | |
| } | |
| b't' | b'f' | b'n' => { | |
| match stripped_whitespace.split_at(4) { | |
| ("true", rest1) => Ok((JsonToken::True, rest1)), | |
| ("null", rest1) => Ok((JsonToken::Null, rest1)), | |
| _ => match src.split_at(5) { | |
| ("false", rest1) => Ok((JsonToken::False, rest1)), | |
| _=> Err(src) | |
| }, | |
| } | |
| } | |
| _ => Err(src) | |
| } | |
| } | |
| } | |
| } | |
| #[derive(Clone)] | |
| struct JsonParser<'a>{ | |
| stack : LList<JsonToken<'a>>, | |
| src : &'a str, | |
| } | |
| macro_rules! base_cases {() => { | |
| JsonToken::Null | |
| | JsonToken::True | |
| | JsonToken::False | |
| | JsonToken::Number(_) | |
| | JsonToken::String(_) | |
| | JsonToken::EmptyObject | |
| | JsonToken::EmptyArray | |
| };} | |
| impl<'a> JsonParser<'a> { | |
| const fn new(src : &'a str) -> Self { JsonParser{ stack: LList::nil(), src } } | |
| fn write_next_path(self)->Result<(String, Self), &'a str> { | |
| let next = self.advance_to_next_base_case()?; | |
| let mut stack = next.stack.top(); | |
| let mut reversed = LList::<&JsonToken>::nil(); | |
| loop { | |
| (stack, reversed) = match stack { | |
| Some((JsonToken::Colon,l)) => (l.top(), reversed), | |
| Some((t,l)) => (l.top(), LList::cons(t, reversed)), | |
| None => break , | |
| } | |
| } | |
| // we used a linked list representation to be more "functional, but it means we need to reverse it" | |
| let mut rev_cursor = reversed.top(); | |
| let mut string = String::new(); | |
| string.push_str("JSON"); | |
| loop { | |
| (rev_cursor) = match rev_cursor { | |
| Some((t,l)) => { | |
| match t { | |
| JsonToken::OpenObject => {}, | |
| JsonToken::OpenArray(i) => string.push_str(&format!( "[{i}]")), | |
| JsonToken::Member(member) => string.push_str(&format!( "[{member}]")), | |
| // base cases | |
| JsonToken::Null => string.push_str(" = null"), | |
| JsonToken::True => string.push_str(" = true"), | |
| JsonToken::False => string.push_str(" = false"), | |
| JsonToken::String(s) | |
| | JsonToken::Number(s) => string.push_str(&format!(" = {s}" )), | |
| JsonToken::EmptyObject => string.push_str(" = {}"), | |
| JsonToken::EmptyArray => string.push_str(" = []"), | |
| // the following should have been filtered out | |
| JsonToken::Colon | |
| | JsonToken::Comma | |
| | JsonToken::CloseObject | |
| | JsonToken::CloseArray => unreachable!(), | |
| } | |
| l.top() | |
| }, | |
| None => {string.push(';'); break Ok((string, next))}, | |
| } | |
| } | |
| } | |
| fn advance_to_next_base_case(self) -> Result<Self, &'a str> { | |
| let mut state = self; | |
| loop { | |
| state = match state.advance_token() { | |
| Err(e) => return Err(e), | |
| Ok(p) => { | |
| match p.stack.top() { | |
| None => return Ok(JsonParser { stack : LList(None), src : p.src}), | |
| Some((t, _)) => { | |
| if let base_cases!() = t { | |
| return Ok(p); | |
| } else { | |
| p | |
| } | |
| }, | |
| } | |
| }, | |
| } | |
| } | |
| } | |
| fn advance_token(self) -> Result<Self, &'a str> { | |
| macro_rules! closed {() => { | |
| JsonToken::CloseArray | JsonToken::CloseObject | |
| };} | |
| let (token, next) = next_token(self.src)?; | |
| // println!("{:?}", token); | |
| match token { | |
| JsonToken::Member(_) => unreachable!("tokenizer never creates members, ony strings"), | |
| JsonToken::String(s) => if let Some((JsonToken::OpenObject, _)) = self.stack.top() { | |
| Ok(JsonParser { stack: LList::cons(JsonToken::Member(s), self.stack), src: next }) | |
| } else { | |
| Ok(JsonParser { stack: LList::cons(token, self.stack), src: next }) | |
| } | |
| | JsonToken::Null | |
| | JsonToken::True | |
| | JsonToken::False | |
| | JsonToken::Number(_) | |
| | JsonToken::EmptyObject | |
| | JsonToken::EmptyArray | |
| | JsonToken::OpenObject | |
| | JsonToken::OpenArray(_) => Ok(JsonParser { stack: LList::cons(token, self.stack), src: next }), | |
| JsonToken::CloseObject => { | |
| if let Some((base_cases!()|closed!(), s0)) = self.stack.top() | |
| && let Some((JsonToken::Colon, s1)) = s0.top() | |
| && let Some((JsonToken::Member(_), s2)) = s1.top() | |
| && let Some((JsonToken::OpenObject, s3)) = s2.top() { | |
| Ok(JsonParser{stack : LList::cons(JsonToken::CloseObject, s3.clone()), src : next}) | |
| } else { | |
| Err(next) | |
| } | |
| }, | |
| JsonToken::CloseArray => { | |
| if let Some((base_cases!()|closed!(), s0)) = self.stack.top() | |
| && let Some((JsonToken::OpenArray(_), s2)) = s0.top() { | |
| Ok(JsonParser{stack : LList::cons(JsonToken::CloseArray,s2.clone()), src : next}) | |
| } else { | |
| Err(next) | |
| } | |
| }, | |
| JsonToken::Colon => | |
| match self.stack.top() { | |
| Some((JsonToken::Member(_), _)) => Ok(JsonParser{ stack: LList::cons(token, self.stack), src: next }), | |
| _ => Err(next), | |
| }, | |
| JsonToken::Comma => { | |
| if let Some((base_cases!()|closed!(), s0)) = self.stack.top() { | |
| match s0.top() { | |
| Some((JsonToken::OpenArray(i), s1)) => Ok(JsonParser{stack: LList::cons(JsonToken::OpenArray(i+1), s1.clone()), src : next}), | |
| Some((JsonToken::Colon, s1)) => { | |
| if let Some((JsonToken::Member(_), s2)) = s1.top() | |
| && let Some((JsonToken::OpenObject, _)) = s2.top() { | |
| Ok(JsonParser{stack: s2.clone(), src: next}) | |
| } else { | |
| Err(next) | |
| } | |
| } | |
| _ => Err(next), | |
| } | |
| } else { | |
| Err(next) | |
| } | |
| } | |
| } | |
| } | |
| } | |
| impl<'a> Debug for JsonParser<'a> { | |
| fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { | |
| write!(f, "JsonParser{{ stack : {:?},\t\tsrc : ...{:?}... }}", self.stack, &self.src[0..30.min(self.src.len())]) | |
| } | |
| } | |
| #[derive(Clone)] | |
| struct LList<T>(Option<Rc<(T,LList<T>)>>); | |
| impl<T> LList<T> { | |
| fn cons(t : T, l : LList<T>) -> Self { LList(Some(Rc::new((t,l))))} | |
| const fn nil() -> Self { LList(None) } | |
| fn top(&self) -> Option<&(T, LList<T>)> { | |
| match self { | |
| LList(None) => None, | |
| LList(Some(rc)) => Some(&**rc), | |
| } | |
| } | |
| } | |
| impl<T:Debug> Debug for LList<T> { | |
| fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { | |
| let mut cursor = self; | |
| write!(f, "LList([",)?; | |
| loop { | |
| cursor = match cursor { | |
| LList(None) => return write!(f,"])"), | |
| LList(Some(rc)) => { | |
| let (t,next @ LList(l)) = &**rc; | |
| write!(f, "{:?}", t)?; | |
| match l { | |
| None => return write!(f,"])"), | |
| Some(_) => {write!(f, ", ")?; next}, | |
| } | |
| } | |
| } | |
| } | |
| } | |
| } | |
| #[derive(Debug,Clone)] | |
| enum JsonToken<'a> { | |
| OpenObject, | |
| CloseObject, | |
| OpenArray(usize), | |
| CloseArray, | |
| Colon, | |
| Comma, | |
| Member(&'a str), | |
| Null, | |
| True, | |
| False, | |
| Number(&'a str), | |
| String(&'a str), | |
| EmptyObject, | |
| EmptyArray, | |
| } | |
| fn main() { | |
| let mut src = THE_JSON; | |
| while let Ok((token, cont)) = next_token(src) { | |
| // println!("{:?}\n\t\t\t{:?}...", token, &cont[..cont.len().min(20) ]); | |
| src = cont | |
| } | |
| let mut parser = JsonParser::new(THE_JSON); | |
| loop { | |
| match parser.write_next_path() { | |
| Ok((s, p)) => { | |
| println!("{}", s); | |
| parser = p | |
| }, | |
| e @ Err(s) => { | |
| if s.is_empty() { | |
| println!("DONE"); | |
| } else { | |
| println!("ERROR : {:?}", e); | |
| } | |
| break; | |
| }, | |
| } | |
| } | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment