-
-
Save rust-play/be19d9261bc71bdf30f1feda426e4b0c to your computer and use it in GitHub Desktop.
Code shared from the Rust Playground
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
//! # `braces` | |
//! | |
//! This module implements Bash-like brace expansion. It supports | |
//! comma-separated options (e.g. `"a{b,c}d"` expands to `["abd", "acd"]`), | |
//! numeric sequences (e.g. `"file{1..3}.txt"`), alpha sequences, and even | |
//! nested brace expressions. It also supports stepped sequences (e.g. to | |
//! generate 10, 20, 30, etc. use `"file{10..30..10}.txt"`). | |
//! | |
//! The overall algorithm is similar to the TypeScript version: first the | |
//! string is “escaped” by swapping literal chars for tokens, balanced brace | |
//! regions are recursively expanded, and finally the tokens are restored. | |
//! | |
//! ## Acknowledgements | |
//! | |
//! This crate is inspired by the JavaScript package `braces` by Jon Schlinkert | |
//! and `braces-expansion` by Julian Gruber. Both published under MIT license. | |
//! | |
use regex::Regex; | |
use std::cmp; | |
// | |
// A simple result from a balanced match. | |
// | |
#[derive(Debug)] | |
pub struct BalancedResult { | |
pub start: usize, | |
pub end: usize, | |
pub pre: String, | |
pub body: String, | |
pub post: String, | |
} | |
/// Finds the first balanced substring delimited by `a` and `b` in `s`. | |
pub fn balanced(a: &str, b: &str, s: &str) -> Option<BalancedResult> { | |
range(a, b, s).map(|(start, end)| BalancedResult { | |
start, | |
end, | |
pre: s[0..start].to_string(), | |
body: s[start + a.len()..end].to_string(), | |
post: s[end + b.len()..].to_string(), | |
}) | |
} | |
/// Returns the range (start, end) of the first balanced region delimited by `a` and `b` in `s`. | |
/// | |
/// This implementation assumes that `a` and `b` are single-character tokens. | |
pub fn range(a: &str, b: &str, s: &str) -> Option<(usize, usize)> { | |
let start = s.find(a)?; | |
let mut count = 0; | |
let mut i = start; | |
while i < s.len() { | |
if s[i..].starts_with(a) { | |
count += 1; | |
i += a.len(); | |
continue; | |
} else if s[i..].starts_with(b) { | |
count -= 1; | |
i += b.len(); | |
if count == 0 { | |
return Some((start, i - b.len())); | |
} | |
continue; | |
} | |
// Move one character ahead. | |
i += 1; | |
} | |
None | |
} | |
// | |
// --- Internal helper functions --- | |
// | |
/// Converts a numeric string to a number; if not numeric, returns the character code of the first char. | |
fn numeric(s: &str) -> i32 { | |
s.parse::<i32>() | |
.unwrap_or_else(|_| s.chars().next().map(|c| c as i32).unwrap_or(0)) | |
} | |
/// Checks if a string element has padded numeric formatting (e.g., "01" or "-02"). | |
fn is_padded(s: &str) -> bool { | |
let chars: Vec<char> = s.chars().collect(); | |
if chars.is_empty() { | |
return false; | |
} | |
if chars[0] == '-' { | |
chars.get(1) == Some(&'0') | |
} else { | |
chars[0] == '0' && chars.len() > 1 | |
} | |
} | |
/// Splits a comma‑separated string while preserving nested braced sections. | |
fn parse_comma_parts(s: &str) -> Vec<String> { | |
if s.is_empty() { | |
return vec!["".to_string()]; | |
} | |
if let Some(m) = balanced("{", "}", s) { | |
let mut parts: Vec<String> = m.pre.split(',').map(|s| s.to_string()).collect(); | |
if let Some(last) = parts.pop() { | |
parts.push(format!("{}{{{}}}", last, m.body)); | |
} | |
let mut post_parts = parse_comma_parts(&m.post); | |
if !m.post.is_empty() && !post_parts.is_empty() { | |
if let Some(last) = parts.pop() { | |
let new_last = format!("{}{}", last, post_parts.remove(0)); | |
parts.push(new_last); | |
} | |
parts.extend(post_parts); | |
} | |
parts | |
} else { | |
s.split(',').map(|s| s.trim().to_string()).collect() | |
} | |
} | |
/// Wraps a string in braces. | |
fn embrace(s: &str) -> String { | |
format!("{{{}}}", s) | |
} | |
fn lte(i: i32, y: i32) -> bool { | |
i <= y | |
} | |
fn gte(i: i32, y: i32) -> bool { | |
i >= y | |
} | |
// | |
// --- Escaping/unescaping helpers --- | |
// | |
/// Each pattern is a pair of pairs. The first inner pair is used during unescaping, | |
/// and the second is used during escaping. | |
type PatternPair = ((Regex, String), (Regex, String)); | |
/// Given a label and a character, returns a pair of regex/replacement tuples. | |
fn create_patterns(label: &str, ch: char) -> PatternPair { | |
// Generate a random nonce to ensure uniqueness. | |
let nonce = ::std::time::SystemTime::now() | |
.duration_since(::std::time::UNIX_EPOCH) | |
.unwrap() | |
.as_nanos() | |
.to_string(); | |
// Create a token such as "\0OPEN_RANDOM\0" | |
let token = format!("\0{}_{}\0", label.to_uppercase(), nonce); | |
// Regex to match the literal token. | |
let token_regex = Regex::new(®ex::escape(&token)).unwrap(); | |
// Create the pattern to match the literal escaped character. | |
let escaped_char = format!("\\{}", ch); | |
let ch_regex = Regex::new(®ex::escape(&escaped_char)).unwrap(); | |
// Return two pairs: | |
// first: (ch_regex, token) used by unescape_braces, | |
// second: (token_regex, ch.to_string()) used by escape_braces. | |
((ch_regex, token.clone()), (token_regex, ch.to_string())) | |
} | |
lazy_static::lazy_static! { | |
/// Global array of patterns for "open", "close", "comma", "period", and "slash". | |
static ref PATTERNS: Vec<PatternPair> = { | |
vec![ | |
create_patterns("open", '{'), | |
create_patterns("close", '}'), | |
create_patterns("comma", ','), | |
create_patterns("period", '.'), | |
create_patterns("slash", '\\'), | |
] | |
}; | |
} | |
/// Escapes backslashes and brace‑related characters in a string. | |
fn escape_braces(s: &str) -> String { | |
let mut result = s.to_string(); | |
// For each pattern, use the SECOND pair (token_regex, original char). | |
for (_, (search, replace)) in PATTERNS.iter() { | |
result = search.replace_all(&result, replace.as_str()).into_owned(); | |
} | |
result | |
} | |
/// Reverses the escaping operation. | |
fn unescape_braces(s: &str) -> String { | |
let mut result = s.to_string(); | |
// For each pattern, use the FIRST pair (regex for the escaped char, token). | |
for ((search, replace), _) in PATTERNS.iter() { | |
result = search.replace_all(&result, replace.as_str()).into_owned(); | |
} | |
result | |
} | |
// | |
// --- Core expansion routines --- | |
// | |
/// Recursively expands brace expressions in a string. | |
fn expand(s: &str, is_top: bool) -> Vec<String> { | |
let mut expansions = Vec::new(); | |
if let Some(m) = balanced("{", "}", s) { | |
let pre: &str = m.pre.as_ref(); | |
let post = if !m.post.is_empty() { | |
expand(&m.post, false) | |
} else { | |
vec!["".to_string()] | |
}; | |
if (&pre).ends_with('$') { | |
// If the pre ends with '$', treat the brace set as literal. | |
for p in post.clone().iter() { | |
expansions.push(format!("{}{{{}}}{}", &pre, &m.body, p)); | |
} | |
} else { | |
let is_numeric_sequence = Regex::new(r"^-?\d+\.\.-?\d+(?:\.\.-?\d+)?$") | |
.unwrap() | |
.is_match(&m.body); | |
let is_alpha_sequence = Regex::new(r"^[a-zA-Z]\.\.[a-zA-Z](?:\.\.-?\d+)?$") | |
.unwrap() | |
.is_match(&m.body); | |
let is_sequence = is_numeric_sequence || is_alpha_sequence; | |
let is_options = m.body.contains(','); | |
if !is_sequence && !is_options { | |
// Handle cases like "{a},b}" | |
if Regex::new(r",.*\}").unwrap().is_match(&m.post) { | |
// Here we need the escape token for "close" (from our patterns for "close"). | |
let close_token = &PATTERNS[1].0 .1; // first pair of "close" | |
let new_str = format!("{}{{{}{}{}", pre, m.body, close_token, m.post); | |
return expand(&new_str, is_top); | |
} | |
return vec![s.to_string()]; | |
} | |
let mut n: Vec<String>; | |
if is_sequence { | |
n = (&m.body).split("..").map(|s| s.to_string()).collect(); | |
} else { | |
n = parse_comma_parts(&m.body); | |
if n.len() == 1 { | |
n = expand(&n[0], false) | |
.into_iter() | |
.map(|part| embrace(&part)) | |
.collect(); | |
if n.len() == 1 { | |
return post | |
.iter() | |
.map(|p| format!("{}{}{}", &m.pre, n[0], p)) | |
.collect(); | |
} | |
} | |
} | |
let mut n_vec = Vec::new(); | |
if is_sequence { | |
// Sequence expansion. | |
let parts: Vec<&str> = n.iter().map(|s| s.as_str()).collect(); | |
let x = numeric(parts[0]); | |
let y = numeric(parts[1]); | |
let width = cmp::max(parts[0].len(), parts[1].len()); | |
let incr = if n.len() == 3 { | |
numeric(&n[2]).abs() | |
} else { | |
1 | |
}; | |
let incr = if y < x { -(incr as i32) } else { incr as i32 }; | |
let pad = n.iter().any(|el| is_padded(el)); | |
let mut i_val = x; | |
if y < x { | |
while gte(i_val, y) { | |
let c = if is_alpha_sequence { | |
let ch = std::char::from_u32(i_val as u32).unwrap_or('?'); | |
if ch == '\\' { | |
"".to_string() | |
} else { | |
ch.to_string() | |
} | |
} else { | |
let mut s_val = i_val.to_string(); | |
if pad && s_val.len() < width { | |
let need = width - s_val.len(); | |
let zeros = "0".repeat(need); | |
if i_val < 0 { | |
s_val = format!("-{}{}", zeros, s_val.trim_start_matches('-')); | |
} else { | |
s_val = format!("{}{}", zeros, s_val); | |
} | |
} | |
s_val | |
}; | |
n_vec.push(c); | |
i_val += incr; | |
} | |
} else { | |
while lte(i_val, y) { | |
let c = if is_alpha_sequence { | |
let ch = std::char::from_u32(i_val as u32).unwrap_or('?'); | |
if ch == '\\' { | |
"".to_string() | |
} else { | |
ch.to_string() | |
} | |
} else { | |
let mut s_val = i_val.to_string(); | |
if pad && s_val.len() < width { | |
let need = width - s_val.len(); | |
let zeros = "0".repeat(need); | |
if i_val < 0 { | |
s_val = format!("-{}{}", zeros, s_val.trim_start_matches('-')); | |
} else { | |
s_val = format!("{}{}", zeros, s_val); | |
} | |
} | |
s_val | |
}; | |
n_vec.push(c); | |
i_val += incr; | |
} | |
} | |
} else { | |
for part in n.iter() { | |
n_vec.extend(expand(part, false)); | |
} | |
} | |
for item in n_vec.iter() { | |
for p in post.iter() { | |
let expansion = format!("{}{}{}", pre, item, p); | |
if !is_top || is_sequence || !expansion.is_empty() { | |
expansions.push(expansion); | |
} | |
} | |
} | |
} | |
expansions | |
} else { | |
vec![s.to_string()] | |
} | |
} | |
/// Expands brace expressions in a string using Bash-like rules. | |
/// | |
/// # Example | |
/// | |
/// ``` | |
/// let result = braces("a{b,c}d"); | |
/// assert_eq!(result, vec!["abd", "acd"]); | |
/// ``` | |
pub fn braces(s: &str) -> Vec<String> { | |
if s.is_empty() { | |
return vec![]; | |
} | |
let mut s = s.to_string(); | |
// Bash 4.3 quirk: if a string starts with "{}" escape the leading braces. | |
if s.starts_with("{}") { | |
s = format!("\\{{\\}}{}", &s[2..]); | |
} | |
let escaped = escape_braces(&s); | |
let expanded = expand(&escaped, true); | |
expanded.into_iter().map(|e| unescape_braces(&e)).collect() | |
} | |
#[cfg(test)] | |
mod tests { | |
use super::*; | |
#[test] | |
fn test_basic_brace_expansion() { | |
assert_eq!( | |
braces("He{llo,y there} world"), | |
vec!["Hello world", "Hey there world"] | |
); | |
assert_eq!( | |
braces("a{b,c}d{e,f}g"), | |
vec!["abdeg", "abdfg", "acdeg", "acdfg"] | |
); | |
} | |
} | |
fn main() { | |
use std::io::{self, BufRead, Write}; | |
println!("Enter brace expression(s) to expand (type 'exit' to quit):"); | |
let stdin = io::stdin(); | |
let mut stdout = io::stdout(); | |
for line in stdin.lock().lines() { | |
match line { | |
Ok(input) => { | |
let expr = input.trim(); | |
if expr.eq_ignore_ascii_case("exit") { | |
break; | |
} | |
if expr.is_empty() { | |
continue; | |
} | |
let result = braces(expr); | |
println!("Expanding `{}` yields:", expr); | |
for expansion in result { | |
println!(" {}", expansion); | |
} | |
stdout.flush().unwrap(); | |
} | |
Err(e) => { | |
eprintln!("Error reading input: {}", e); | |
} | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment