Skip to content

Instantly share code, notes, and snippets.

@scolladon
Created July 23, 2024 16:29
Show Gist options
  • Save scolladon/8a9cd4b24472bd0ecec046f8b1da428c to your computer and use it in GitHub Desktop.
Save scolladon/8a9cd4b24472bd0ecec046f8b1da428c to your computer and use it in GitHub Desktop.
/**
* Split a string of any size, while avoiding the dreaded 'Regex too complicated'
* error, which the String.split(String) method causes on some large inputs.
*
* Note that this method does not avoid other errors, such as those related to
* excess heap size or CPU time.
*/
List<String> safeSplit(String inStr, String delim)
{
Integer regexFindLimit = 100;
Integer regexFindCount = 0;
List<String> output = new List<String>();
Matcher m = Pattern.compile(delim).matcher(inStr);
Integer lastEnd = 0;
while(!m.hitEnd())
{
while(regexFindCount < regexFindLimit && !m.hitEnd())
{
if(m.find())
{
output.add(inStr.substring(lastEnd, m.start()));
lastEnd = m.end();
}
else
{
output.add(inStr.substring(lastEnd));
lastEnd = inStr.length();
}
regexFindCount++;
}
// Note: Using region() to advance instead of substring() saves
// drastically on heap size. Nonetheless, we still must reset the
// (unmodified) input sequence to avoid a 'Regex too complicated'
// error.
m.reset(inStr);
m.region(lastEnd, m.regionEnd());
regexFindCount = 0;
}
return output;
}
// Testing code
///////////////////
Integer numRepeats = 50000;
String bigInput = 'All work and no play makes Jack a dull boy.\r\n'.repeat(numRepeats);
// This generates a 'Regex too complicated' exception.
//
// List<String> a = bigInput.split('\r\n');
// This avoids a 'Regex too complicated' exception.
//
String[] a = safeSplit(bigInput, '\r\n');
System.assertEquals(numRepeats+1, a.size());
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment