Created
January 19, 2025 07:36
-
-
Save Groostav/fe317850377fc4e17e68c4993aa49c7e to your computer and use it in GitHub Desktop.
non-functioning scanner impl
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
object CommaSeparatedTextMatrixLexer { | |
private val LINE_END = Pattern.compile(";?\r?\n") | |
private val COMMA = Pattern.compile(",\\s*") | |
private val NAME = Pattern.compile("[^;\r\n,]+") | |
private val NAME_AND_LINE_END = Pattern.compile("[^;\r\n,]+;?\r?\n") | |
fun tokenize(stream: InputStream): Sequence<MatrixToken> { | |
val chars = Scanner(stream) | |
return sequence<MatrixToken> { | |
chars.use { | |
while(chars.hasNext()) { | |
val token = when { | |
fail; //this doesnt work; | |
// its structured around the delimeter used by Scanner; | |
// that is: the default scanner uses spaces as delimeters, | |
// and the provided regex must match the entire text between delimeters. | |
// and there doesnt appear to be any mechanism to actually capture the delimeters themselves. | |
chars.hasNext(COMMA) -> { | |
chars.next(COMMA) | |
MatrixToken.CellEnd | |
} | |
chars.hasNext(LINE_END) -> { | |
chars.next(LINE_END) | |
MatrixToken.RowEnd | |
} | |
chars.hasNextBigDecimal() -> { | |
val decimalValue = chars.nextBigDecimal() | |
MatrixToken.Decimal(decimalValue) | |
} | |
chars.hasNext(NAME) -> { | |
val name = chars.next(NAME) | |
MatrixToken.Name(name) | |
} | |
else -> { | |
val errChar = chars.next() | |
MatrixToken.Error(errChar) | |
} | |
// whitespace? | |
} | |
yield(token) | |
} | |
yield(MatrixToken.EndOfFile) | |
} | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment