Created
August 11, 2020 08:50
-
-
Save StephenWakely/f7d811bf3a35168cd8840b872c3bd18c to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
use std::collections::HashMap; | |
use std::env; | |
// Alias some types | |
type Count = HashMap<String, usize>; | |
type ParseError = String; | |
/// Take a list of tuples of strings and counts and | |
/// convert them into a hash table. | |
fn make_count(tpls: Vec<(String, usize)>) -> Count { | |
tpls.iter().cloned().collect::<Count>() | |
} | |
/// Eats a single atom, or a grouped set of atoms (within parens) from the string. | |
/// Returns a Result (Ok for a successful parse or Err(ParseError) for a failed parse) | |
/// of (String, Count) a tuple of the remaining yet to be parsed string | |
/// and Count - a Hashmap of atoms to counts. | |
fn eat_atom(molecule: &str) -> Result<(String, Count), ParseError> { | |
if molecule.len() == 0 { | |
return Err("Empty molecule".to_string()); | |
} | |
let (head, tail) = molecule.split_at(1); | |
if matches!(head, "(" | "{" | "[") { | |
// We are in a paren, recursively parse all the inner contents of this. | |
let (rest, count) = parse_molecule(&tail)?; | |
if rest.len() > 0 { | |
let (head, tail) = rest.split_at(1); | |
// We should really match against the same paren that we opened with, | |
// but life is short.. | |
if matches!(head, ")" | "}" | "]") { | |
Ok((tail.to_string(), count)) | |
} else { | |
Err("No closing paren".to_string()) | |
} | |
} else { | |
Err("No closing paren".to_string()) | |
} | |
} else { | |
// Its just a single atom, which can be one upper case followed by n lowercase chars. | |
let tail = tail | |
.chars() | |
.take_while(|c| c.is_lowercase()) | |
.collect::<String>(); | |
let atom = format!("{}{}", head, tail); | |
let remaining = molecule.chars().skip(atom.len()).collect::<String>(); | |
Ok((remaining, make_count(vec![(atom, 1)]))) | |
} | |
} | |
/// Eats the mulitplier from the string. | |
/// The multiplier is any numeric characters. | |
/// We may not have any multiplier, so return None if there aren't any. | |
fn eat_multiplier(molecule: &str) -> Option<(String, usize)> { | |
let num = molecule | |
.chars() | |
.take_while(|c| c.is_numeric()) | |
.collect::<String>(); | |
let remaining = molecule.chars().skip(num.len()).collect::<String>(); | |
match num.parse() { | |
Ok(num) => Some((remaining, num)), | |
Err(_) => None, | |
} | |
} | |
/// Multiply every element in our Count hash by the given multiplier. | |
/// Note count is mutable, so we are directly changing it's contents. | |
fn multiply_count(count: &mut Count, multiplier: usize) { | |
count.iter_mut().for_each(|(_, val)| *val *= multiplier); | |
} | |
/// Add all the counts from one hash table to another. | |
fn add_counts(count1: &mut Count, count2: Count) { | |
for (key, value) in count2 { | |
match count1.get_mut(&key) { | |
Some(val) => { | |
*val += value; | |
} | |
None => { | |
count1.insert(key, value); | |
} | |
} | |
} | |
} | |
/// Parse the atom and the count returning the hash table of the counts. | |
fn parse_atom(molecule: &str) -> Result<(String, Count), ParseError> { | |
let (mut remaining, mut count) = eat_atom(molecule)?; | |
if let Some((remaining_, multiplier)) = eat_multiplier(&remaining) { | |
multiply_count(&mut count, multiplier); | |
remaining = remaining_; | |
} | |
Ok((remaining, count)) | |
} | |
/// Is this the end of a molecule? | |
/// Either the closing bracket or end of the string. | |
fn end_molecule(molecule: &str) -> bool { | |
match molecule.chars().nth(0) { | |
Some(m) => matches!(m, ')' | '}' | ']'), | |
None => true, | |
} | |
} | |
/// Parse the molecule - or part of if we are within a bracketed bit. | |
/// Returns the remaining unparsed string and the counts. | |
fn parse_molecule(molecule: &str) -> Result<(String, Count), ParseError> { | |
let mut count = HashMap::new(); | |
let mut remaining = molecule.to_string(); | |
while !end_molecule(&remaining) { | |
let (remaining_, count_) = parse_atom(&remaining)?; | |
add_counts(&mut count, count_); | |
remaining = remaining_; | |
} | |
Ok((remaining, count)) | |
} | |
/// Parse the molecule, returning either the counts of the atoms, or an error if the parsing failed. | |
fn parse(molecule: &str) -> Result<Count, ParseError> { | |
let (remaining, count) = parse_molecule(molecule)?; | |
if remaining != "" { | |
// We should have parsed the entire string by the end. | |
Err(format!("Still some molecule remaining {}", remaining)) | |
} else { | |
Ok(count) | |
} | |
} | |
fn main() { | |
for molecule in env::args().skip(1) { | |
println!("{:?}", parse(&molecule)); | |
} | |
} | |
#[test] | |
fn test_h20() { | |
assert_eq!( | |
make_count(vec![("H".to_string(), 2), ("O".to_string(), 1)]), | |
parse("H2O").unwrap(), | |
); | |
} | |
#[test] | |
fn test_magnesium_hydroxide() { | |
assert_eq!( | |
make_count(vec![ | |
("Mg".to_string(), 1), | |
("O".to_string(), 2), | |
("H".to_string(), 2) | |
]), | |
parse("Mg(OH)2").unwrap(), | |
); | |
} | |
#[test] | |
fn test_fremy_salt() { | |
assert_eq!( | |
make_count(vec![ | |
("K".to_string(), 4), | |
("O".to_string(), 14), | |
("N".to_string(), 2), | |
("S".to_string(), 4), | |
]), | |
parse("K4[ON(SO3)2]2").unwrap(), | |
); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment