Last active
August 29, 2015 14:17
-
-
Save twillouer/e4180390dc503fe886d0 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
@Warmup(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS) | |
@Measurement(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS) | |
@Fork(3) | |
@BenchmarkMode(Mode.Throughput) | |
@OutputTimeUnit(TimeUnit.NANOSECONDS) | |
@State(Scope.Benchmark) | |
public class ReplaceAllBench { | |
@Param({ "10", "100", "1000" }) | |
private int size; | |
String string; | |
@Setup | |
public void setup() throws Throwable | |
{ | |
string = randomAlphanumericString(size / 3) + "\n\r " + randomAlphanumericString(size / 3) + "\n " | |
+ randomAlphanumericString(size / 3); | |
} | |
static class Replacer { | |
private static final Pattern COMPILE = Pattern.compile("[\n\r]+ "); | |
static String unfold_original(String s) | |
{ | |
s = s.replaceAll("\n\r ", ""); | |
s = s.replaceAll("\r\n ", ""); | |
s = s.replaceAll("\n ", ""); | |
s = s.replaceAll("\r ", ""); | |
return s; | |
} | |
static String unfold_regexp(String s) | |
{ | |
s = s.replaceAll("\n\r |\r\n |\n |\r ", ""); | |
return s; | |
} | |
static String unfold_regexpcompiled(String s) | |
{ | |
s = COMPILE.matcher(s).replaceAll(""); | |
return s; | |
} | |
private enum NLStatus { | |
NONE, RC, NL, RC_NL, NL_RC; | |
} | |
/** | |
* Remove all the '\n' and ’\r' followed by a ' ' from a LDIF String. | |
* | |
* @param s The String to unfold | |
* @return The resulting String | |
*/ | |
protected static String unfold_optim_emmanuel_1(String s) | |
{ | |
int pos = 0; | |
char[] unfold = new char[s.length()]; | |
NLStatus newLine = NLStatus.NONE; | |
for (char c : s.toCharArray()) { | |
switch (c) { | |
case '\n': | |
switch (newLine) { | |
case NONE: | |
newLine = NLStatus.NL; | |
break; | |
case RC: | |
newLine = NLStatus.RC_NL; | |
break; | |
case NL: | |
unfold[pos++] = '\n'; | |
break; | |
case RC_NL: | |
unfold[pos++] = '\r'; | |
unfold[pos++] = '\n'; | |
newLine = NLStatus.NL; | |
break; | |
case NL_RC: | |
unfold[pos++] = '\n'; | |
unfold[pos++] = '\r'; | |
newLine = NLStatus.NL; | |
break; | |
} | |
break; | |
case '\r': | |
switch (newLine) { | |
case NONE: | |
newLine = NLStatus.RC; | |
break; | |
case NL: | |
newLine = NLStatus.NL_RC; | |
break; | |
case RC: | |
unfold[pos++] = '\r'; | |
break; | |
case RC_NL: | |
unfold[pos++] = '\r'; | |
unfold[pos++] = '\n'; | |
newLine = NLStatus.RC; | |
break; | |
case NL_RC: | |
unfold[pos++] = '\n'; | |
unfold[pos++] = '\r'; | |
newLine = NLStatus.RC; | |
break; | |
} | |
break; | |
case ' ': | |
if (newLine == NLStatus.NONE) { | |
unfold[pos++] = c; | |
} else { | |
newLine = NLStatus.NONE; | |
} | |
break; | |
default: | |
switch (newLine) { | |
case NONE: | |
break; | |
case NL: | |
unfold[pos++] = '\n'; | |
newLine = NLStatus.NONE; | |
break; | |
case RC: | |
unfold[pos++] = '\r'; | |
newLine = NLStatus.NONE; | |
break; | |
case NL_RC: | |
unfold[pos++] = '\n'; | |
unfold[pos++] = '\r'; | |
newLine = NLStatus.NONE; | |
break; | |
case RC_NL: | |
unfold[pos++] = '\r'; | |
unfold[pos++] = '\n'; | |
newLine = NLStatus.NONE; | |
break; | |
} | |
unfold[pos++] = c; | |
} | |
} | |
switch (newLine) { | |
case NONE: | |
break; | |
case NL: | |
unfold[pos++] = '\n'; | |
break; | |
case RC: | |
unfold[pos++] = '\r'; | |
break; | |
case NL_RC: | |
unfold[pos++] = '\n'; | |
unfold[pos++] = '\r'; | |
break; | |
case RC_NL: | |
unfold[pos++] = '\r'; | |
unfold[pos++] = '\n'; | |
break; | |
} | |
return new String(unfold, 0, pos); | |
} | |
private static final String[] TODO = { "\n\r ", "\r\n ", "\r ", "\n " }; | |
private static final String[] TO = { "", "", "", "" }; | |
protected static String unfold_with_stringutils_on_apache_common(final String string) | |
{ | |
return StringUtils.replaceEach(string, TODO, TO); | |
} | |
public static String unfold_olivier(String test) | |
{ | |
// Null -> null | |
if (test == null) { | |
return null; | |
} | |
// 0 or 1 char | |
if (test.length() < 2) | |
return test; | |
// 2 chars | |
if (test.length() == 2) { | |
if (test.charAt(1) == ' ') { | |
char c0 = test.charAt(0); | |
if (c0 == '\r' || c0 == '\n') { | |
return ""; | |
} | |
} | |
return test; | |
} | |
// More than 2 chars | |
char[] chars = test.toCharArray(); | |
char[] dest = new char[chars.length]; | |
int p = chars.length - 1; | |
int d = p; | |
while (p >= 2) { | |
char c = chars[p]; | |
// Not a space : keep as is | |
if (c != ' ') { | |
dest[d] = c; | |
p--; | |
d--; | |
} | |
// Space | |
else { | |
char c1 = chars[p - 1]; | |
// Previous char is special : investigate deeper | |
if (c1 == '\r' || c1 == '\n') { | |
p--; | |
char c2 = chars[p - 1]; | |
if ((c2 == '\r' || c2 == '\n') && c2 != c1) { | |
p--; | |
} | |
p--; | |
} | |
// It was just a space : keep it | |
else { | |
dest[d] = c; | |
p--; | |
d--; | |
} | |
} | |
} | |
// Keep the remaining chars as it (special cases already covered) | |
while (p >= 0) { | |
dest[d--] = chars[p--]; | |
} | |
return new String(dest, d + 1, chars.length - d - 1); | |
} | |
public static String unfold_olivier_optimise_par_emmanuel(String test) | |
{ | |
// Null -> null | |
if ( test == null ) | |
{ | |
return null; | |
} | |
char[] chars = test.toCharArray(); | |
// 0 or 1 char | |
if ( chars.length < 2 ) | |
{ | |
return test; | |
} | |
// 2 chars | |
if ( chars.length == 2 ) | |
{ | |
if ( chars[1] == ' ' ) | |
{ | |
if ( chars[0] == '\r' || chars[0] == '\n' ) | |
{ | |
return ""; | |
} | |
} | |
return test; | |
} | |
// More than 2 chars | |
int p = chars.length - 1; | |
int d = p; | |
while ( p >= 0 ) | |
{ | |
char c = chars[p]; | |
// Not a space : keep as is | |
if ( c != ' ' ) | |
{ | |
chars[d] = c; | |
p--; | |
d--; | |
} | |
// Space | |
else | |
{ | |
char c1 = chars[p - 1]; | |
// Previous char is special : investigate deeper | |
if ( c1 == '\r' ) | |
{ | |
if ( ( chars[p - 2] == '\n' ) ) | |
{ | |
p -= 3; | |
} | |
else | |
{ | |
p -= 2; | |
} | |
} | |
else if ( c1 == '\n' ) | |
{ | |
if ( ( chars[p - 2] == '\r' ) ) | |
{ | |
p -= 3; | |
} | |
else | |
{ | |
p -= 2; | |
} | |
} | |
// It was just a space : keep it | |
else | |
{ | |
chars[d] = c; | |
p--; | |
d--; | |
} | |
} | |
} | |
// Keep the remaining chars as it (special cases already covered) | |
while ( p >= 0 ) | |
{ | |
chars[d--] = chars[p--]; | |
} | |
return new String( chars, d + 1, chars.length - d - 1 ); | |
} | |
} | |
@Benchmark | |
public String unfold_original() | |
{ | |
return Replacer.unfold_original(string); | |
} | |
@Benchmark | |
public String unfold_regexp() | |
{ | |
return Replacer.unfold_regexp(string); | |
} | |
@Benchmark | |
public String unfold_regexpcompiled() | |
{ | |
return Replacer.unfold_regexpcompiled(string); | |
} | |
@Benchmark | |
public String unfold_optim_emmanuel_1() | |
{ | |
return Replacer.unfold_optim_emmanuel_1(string); | |
} | |
@Benchmark | |
public String unfold_with_stringutils_on_apache_common() | |
{ | |
return Replacer.unfold_with_stringutils_on_apache_common(string); | |
} | |
@Benchmark | |
public String unfold_unfold_olivier() | |
{ | |
return Replacer.unfold_olivier(string); | |
} | |
@Benchmark | |
public String unfold_olivier_optimise_par_emmanuel() | |
{ | |
return Replacer.unfold_olivier_optimise_par_emmanuel(string); | |
} | |
public static void main(String[] args) throws RunnerException, IOException | |
{ | |
Options opt = new OptionsBuilder().include(".*" + ReplaceAllBench.class.getSimpleName() + ".*") | |
.warmupIterations(20) | |
.warmupTime(TimeValue.seconds(1)) | |
.measurementIterations(20) | |
.timeUnit(TimeUnit.NANOSECONDS) | |
.forks(1) | |
// .addProfiler(LinuxPerfAsmProfiler.class) | |
.build(); | |
new Runner(opt).run(); | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Hmmm, effectivement, un cas n'était pas géré. J'en ai profité pour essayer d'améliorer un peu l'algo. Tu peux le tester ?