Skip to content

Instantly share code, notes, and snippets.

@Groostav
Created January 19, 2025 07:36
Show Gist options
  • Save Groostav/fe317850377fc4e17e68c4993aa49c7e to your computer and use it in GitHub Desktop.
Save Groostav/fe317850377fc4e17e68c4993aa49c7e to your computer and use it in GitHub Desktop.
non-functioning scanner impl
object CommaSeparatedTextMatrixLexer {
private val LINE_END = Pattern.compile(";?\r?\n")
private val COMMA = Pattern.compile(",\\s*")
private val NAME = Pattern.compile("[^;\r\n,]+")
private val NAME_AND_LINE_END = Pattern.compile("[^;\r\n,]+;?\r?\n")
fun tokenize(stream: InputStream): Sequence<MatrixToken> {
val chars = Scanner(stream)
return sequence<MatrixToken> {
chars.use {
while(chars.hasNext()) {
val token = when {
fail; //this doesnt work;
// its structured around the delimeter used by Scanner;
// that is: the default scanner uses spaces as delimeters,
// and the provided regex must match the entire text between delimeters.
// and there doesnt appear to be any mechanism to actually capture the delimeters themselves.
chars.hasNext(COMMA) -> {
chars.next(COMMA)
MatrixToken.CellEnd
}
chars.hasNext(LINE_END) -> {
chars.next(LINE_END)
MatrixToken.RowEnd
}
chars.hasNextBigDecimal() -> {
val decimalValue = chars.nextBigDecimal()
MatrixToken.Decimal(decimalValue)
}
chars.hasNext(NAME) -> {
val name = chars.next(NAME)
MatrixToken.Name(name)
}
else -> {
val errChar = chars.next()
MatrixToken.Error(errChar)
}
// whitespace?
}
yield(token)
}
yield(MatrixToken.EndOfFile)
}
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment