Last active
April 21, 2021 13:22
-
-
Save honboubao/fc6b4479b9f82bd6a7414be5cc778a8d to your computer and use it in GitHub Desktop.
split/join with escape (Java)
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
public static class StringUtils { | |
public static String join(String... parts) { | |
return Arrays.stream(parts).map(s -> | |
s.replaceAll( | |
"\\\\", // regex, replace \ | |
"\\\\\\\\" // with \\ | |
).replaceAll( | |
"\\|", // regex, replace | | |
"\\\\|" // with \| | |
)) | |
.collect(Collectors.joining("|")); | |
} | |
public static String[] split(String joinedParts) { | |
return Arrays.stream(joinedParts.split( | |
"(?<!(?<!\\\\)(\\\\\\\\){0,9999}\\\\)\\|", | |
/* | |
regex without Java escaping | |
(?<!(?<!\\)(\\\\)*\\)\| | |
regex without any escaping | |
(?<!(?<!\)(\\)*\)| | |
in words: any | that is not preceded by an uneven number of \ (i.e. the | itself is not escaped) | |
e.g. | |
a|\|\ and b and c --join--> a\|\\\|\\|b|c | |
A A A A | |
\ \ \ \ | |
\ \ +-+-- split here, preceded by even number of \, i.e. the | is not escaped | |
\ \ | |
+---+----- don't split here, preceded by odd number of \, | |
i.e. the last preceding \ together with the | form an escaped | (i.e. a \|), | |
the even number of \ before that form pairs of escaped \ (i.e. \\) | |
explanation of the regex: | |
+---- negative look-behind that matches the even numbered \ group and a single \, i.e. it makes sure the | is not preceded by an uneven number of \ | |
__/____________ | |
| | | |
| +-|------ {0,9999} instead of * in the actual regex to prevent error "Look-behind group does not have an obvious maximum length" | |
| | | | |
| v | | |
(?<!(?<!\)(\\)*\)| | |
|____||___| A | |
\ \ \ | |
\ \ +--- the | that we want to match for splitting | |
\ +--- repeating double \, so a block of even numbered \ | |
+---- negative look-behind, matches anything non-\, so the following group matches every \ and only \ until the | | |
*/ | |
-1)) | |
.map(s -> | |
s.replaceAll( | |
"\\\\\\|", // regex, replace \| | |
"|" // with | | |
).replaceAll( | |
"\\\\\\\\", // regex, replace \\ | |
"\\\\" // with \ | |
)).toArray(String[]::new); | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment