Skip to content

Instantly share code, notes, and snippets.

@rust-play
Created March 15, 2025 05:35
Show Gist options
  • Save rust-play/be19d9261bc71bdf30f1feda426e4b0c to your computer and use it in GitHub Desktop.
Save rust-play/be19d9261bc71bdf30f1feda426e4b0c to your computer and use it in GitHub Desktop.
Code shared from the Rust Playground
//! # `braces`
//!
//! This module implements Bash-like brace expansion. It supports
//! comma-separated options (e.g. `"a{b,c}d"` expands to `["abd", "acd"]`),
//! numeric sequences (e.g. `"file{1..3}.txt"`), alpha sequences, and even
//! nested brace expressions. It also supports stepped sequences (e.g. to
//! generate 10, 20, 30, etc. use `"file{10..30..10}.txt"`).
//!
//! The overall algorithm is similar to the TypeScript version: first the
//! string is “escaped” by swapping literal chars for tokens, balanced brace
//! regions are recursively expanded, and finally the tokens are restored.
//!
//! ## Acknowledgements
//!
//! This crate is inspired by the JavaScript package `braces` by Jon Schlinkert
//! and `braces-expansion` by Julian Gruber. Both published under MIT license.
//!
use regex::Regex;
use std::cmp;
//
// A simple result from a balanced match.
//
#[derive(Debug)]
pub struct BalancedResult {
pub start: usize,
pub end: usize,
pub pre: String,
pub body: String,
pub post: String,
}
/// Finds the first balanced substring delimited by `a` and `b` in `s`.
pub fn balanced(a: &str, b: &str, s: &str) -> Option<BalancedResult> {
range(a, b, s).map(|(start, end)| BalancedResult {
start,
end,
pre: s[0..start].to_string(),
body: s[start + a.len()..end].to_string(),
post: s[end + b.len()..].to_string(),
})
}
/// Returns the range (start, end) of the first balanced region delimited by `a` and `b` in `s`.
///
/// This implementation assumes that `a` and `b` are single-character tokens.
pub fn range(a: &str, b: &str, s: &str) -> Option<(usize, usize)> {
let start = s.find(a)?;
let mut count = 0;
let mut i = start;
while i < s.len() {
if s[i..].starts_with(a) {
count += 1;
i += a.len();
continue;
} else if s[i..].starts_with(b) {
count -= 1;
i += b.len();
if count == 0 {
return Some((start, i - b.len()));
}
continue;
}
// Move one character ahead.
i += 1;
}
None
}
//
// --- Internal helper functions ---
//
/// Converts a numeric string to a number; if not numeric, returns the character code of the first char.
fn numeric(s: &str) -> i32 {
s.parse::<i32>()
.unwrap_or_else(|_| s.chars().next().map(|c| c as i32).unwrap_or(0))
}
/// Checks if a string element has padded numeric formatting (e.g., "01" or "-02").
fn is_padded(s: &str) -> bool {
let chars: Vec<char> = s.chars().collect();
if chars.is_empty() {
return false;
}
if chars[0] == '-' {
chars.get(1) == Some(&'0')
} else {
chars[0] == '0' && chars.len() > 1
}
}
/// Splits a comma‑separated string while preserving nested braced sections.
fn parse_comma_parts(s: &str) -> Vec<String> {
if s.is_empty() {
return vec!["".to_string()];
}
if let Some(m) = balanced("{", "}", s) {
let mut parts: Vec<String> = m.pre.split(',').map(|s| s.to_string()).collect();
if let Some(last) = parts.pop() {
parts.push(format!("{}{{{}}}", last, m.body));
}
let mut post_parts = parse_comma_parts(&m.post);
if !m.post.is_empty() && !post_parts.is_empty() {
if let Some(last) = parts.pop() {
let new_last = format!("{}{}", last, post_parts.remove(0));
parts.push(new_last);
}
parts.extend(post_parts);
}
parts
} else {
s.split(',').map(|s| s.trim().to_string()).collect()
}
}
/// Wraps a string in braces.
fn embrace(s: &str) -> String {
format!("{{{}}}", s)
}
fn lte(i: i32, y: i32) -> bool {
i <= y
}
fn gte(i: i32, y: i32) -> bool {
i >= y
}
//
// --- Escaping/unescaping helpers ---
//
/// Each pattern is a pair of pairs. The first inner pair is used during unescaping,
/// and the second is used during escaping.
type PatternPair = ((Regex, String), (Regex, String));
/// Given a label and a character, returns a pair of regex/replacement tuples.
fn create_patterns(label: &str, ch: char) -> PatternPair {
// Generate a random nonce to ensure uniqueness.
let nonce = ::std::time::SystemTime::now()
.duration_since(::std::time::UNIX_EPOCH)
.unwrap()
.as_nanos()
.to_string();
// Create a token such as "\0OPEN_RANDOM\0"
let token = format!("\0{}_{}\0", label.to_uppercase(), nonce);
// Regex to match the literal token.
let token_regex = Regex::new(&regex::escape(&token)).unwrap();
// Create the pattern to match the literal escaped character.
let escaped_char = format!("\\{}", ch);
let ch_regex = Regex::new(&regex::escape(&escaped_char)).unwrap();
// Return two pairs:
// first: (ch_regex, token) used by unescape_braces,
// second: (token_regex, ch.to_string()) used by escape_braces.
((ch_regex, token.clone()), (token_regex, ch.to_string()))
}
lazy_static::lazy_static! {
/// Global array of patterns for "open", "close", "comma", "period", and "slash".
static ref PATTERNS: Vec<PatternPair> = {
vec![
create_patterns("open", '{'),
create_patterns("close", '}'),
create_patterns("comma", ','),
create_patterns("period", '.'),
create_patterns("slash", '\\'),
]
};
}
/// Escapes backslashes and brace‑related characters in a string.
fn escape_braces(s: &str) -> String {
let mut result = s.to_string();
// For each pattern, use the SECOND pair (token_regex, original char).
for (_, (search, replace)) in PATTERNS.iter() {
result = search.replace_all(&result, replace.as_str()).into_owned();
}
result
}
/// Reverses the escaping operation.
fn unescape_braces(s: &str) -> String {
let mut result = s.to_string();
// For each pattern, use the FIRST pair (regex for the escaped char, token).
for ((search, replace), _) in PATTERNS.iter() {
result = search.replace_all(&result, replace.as_str()).into_owned();
}
result
}
//
// --- Core expansion routines ---
//
/// Recursively expands brace expressions in a string.
fn expand(s: &str, is_top: bool) -> Vec<String> {
let mut expansions = Vec::new();
if let Some(m) = balanced("{", "}", s) {
let pre: &str = m.pre.as_ref();
let post = if !m.post.is_empty() {
expand(&m.post, false)
} else {
vec!["".to_string()]
};
if (&pre).ends_with('$') {
// If the pre ends with '$', treat the brace set as literal.
for p in post.clone().iter() {
expansions.push(format!("{}{{{}}}{}", &pre, &m.body, p));
}
} else {
let is_numeric_sequence = Regex::new(r"^-?\d+\.\.-?\d+(?:\.\.-?\d+)?$")
.unwrap()
.is_match(&m.body);
let is_alpha_sequence = Regex::new(r"^[a-zA-Z]\.\.[a-zA-Z](?:\.\.-?\d+)?$")
.unwrap()
.is_match(&m.body);
let is_sequence = is_numeric_sequence || is_alpha_sequence;
let is_options = m.body.contains(',');
if !is_sequence && !is_options {
// Handle cases like "{a},b}"
if Regex::new(r",.*\}").unwrap().is_match(&m.post) {
// Here we need the escape token for "close" (from our patterns for "close").
let close_token = &PATTERNS[1].0 .1; // first pair of "close"
let new_str = format!("{}{{{}{}{}", pre, m.body, close_token, m.post);
return expand(&new_str, is_top);
}
return vec![s.to_string()];
}
let mut n: Vec<String>;
if is_sequence {
n = (&m.body).split("..").map(|s| s.to_string()).collect();
} else {
n = parse_comma_parts(&m.body);
if n.len() == 1 {
n = expand(&n[0], false)
.into_iter()
.map(|part| embrace(&part))
.collect();
if n.len() == 1 {
return post
.iter()
.map(|p| format!("{}{}{}", &m.pre, n[0], p))
.collect();
}
}
}
let mut n_vec = Vec::new();
if is_sequence {
// Sequence expansion.
let parts: Vec<&str> = n.iter().map(|s| s.as_str()).collect();
let x = numeric(parts[0]);
let y = numeric(parts[1]);
let width = cmp::max(parts[0].len(), parts[1].len());
let incr = if n.len() == 3 {
numeric(&n[2]).abs()
} else {
1
};
let incr = if y < x { -(incr as i32) } else { incr as i32 };
let pad = n.iter().any(|el| is_padded(el));
let mut i_val = x;
if y < x {
while gte(i_val, y) {
let c = if is_alpha_sequence {
let ch = std::char::from_u32(i_val as u32).unwrap_or('?');
if ch == '\\' {
"".to_string()
} else {
ch.to_string()
}
} else {
let mut s_val = i_val.to_string();
if pad && s_val.len() < width {
let need = width - s_val.len();
let zeros = "0".repeat(need);
if i_val < 0 {
s_val = format!("-{}{}", zeros, s_val.trim_start_matches('-'));
} else {
s_val = format!("{}{}", zeros, s_val);
}
}
s_val
};
n_vec.push(c);
i_val += incr;
}
} else {
while lte(i_val, y) {
let c = if is_alpha_sequence {
let ch = std::char::from_u32(i_val as u32).unwrap_or('?');
if ch == '\\' {
"".to_string()
} else {
ch.to_string()
}
} else {
let mut s_val = i_val.to_string();
if pad && s_val.len() < width {
let need = width - s_val.len();
let zeros = "0".repeat(need);
if i_val < 0 {
s_val = format!("-{}{}", zeros, s_val.trim_start_matches('-'));
} else {
s_val = format!("{}{}", zeros, s_val);
}
}
s_val
};
n_vec.push(c);
i_val += incr;
}
}
} else {
for part in n.iter() {
n_vec.extend(expand(part, false));
}
}
for item in n_vec.iter() {
for p in post.iter() {
let expansion = format!("{}{}{}", pre, item, p);
if !is_top || is_sequence || !expansion.is_empty() {
expansions.push(expansion);
}
}
}
}
expansions
} else {
vec![s.to_string()]
}
}
/// Expands brace expressions in a string using Bash-like rules.
///
/// # Example
///
/// ```
/// let result = braces("a{b,c}d");
/// assert_eq!(result, vec!["abd", "acd"]);
/// ```
pub fn braces(s: &str) -> Vec<String> {
if s.is_empty() {
return vec![];
}
let mut s = s.to_string();
// Bash 4.3 quirk: if a string starts with "{}" escape the leading braces.
if s.starts_with("{}") {
s = format!("\\{{\\}}{}", &s[2..]);
}
let escaped = escape_braces(&s);
let expanded = expand(&escaped, true);
expanded.into_iter().map(|e| unescape_braces(&e)).collect()
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_basic_brace_expansion() {
assert_eq!(
braces("He{llo,y there} world"),
vec!["Hello world", "Hey there world"]
);
assert_eq!(
braces("a{b,c}d{e,f}g"),
vec!["abdeg", "abdfg", "acdeg", "acdfg"]
);
}
}
fn main() {
use std::io::{self, BufRead, Write};
println!("Enter brace expression(s) to expand (type 'exit' to quit):");
let stdin = io::stdin();
let mut stdout = io::stdout();
for line in stdin.lock().lines() {
match line {
Ok(input) => {
let expr = input.trim();
if expr.eq_ignore_ascii_case("exit") {
break;
}
if expr.is_empty() {
continue;
}
let result = braces(expr);
println!("Expanding `{}` yields:", expr);
for expansion in result {
println!(" {}", expansion);
}
stdout.flush().unwrap();
}
Err(e) => {
eprintln!("Error reading input: {}", e);
}
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment