Last active
December 20, 2015 09:09
-
-
Save rlankenau/6105437 to your computer and use it in GitHub Desktop.
Code to split RetroSheet records by game
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
private boolean isStartLine(Text t) | |
{ | |
/* Find the end of the first field */ | |
int fieldTerm = t.find(","); | |
int idTerm = t.find("id"); | |
return (idTerm != -1 && fieldTerm != -1 && idTerm < fieldTerm); | |
} | |
public boolean nextKeyValue() throws IOException | |
{ | |
Text line = new Text(); | |
Text newline = new Text("\n"); | |
Date d = new Date(); | |
if(key == null) | |
key = new LongWritable(); | |
key.set(pos); | |
if(value == null) | |
value = new Text(); | |
value.clear(); | |
int newSize = 0; | |
if(deferred_line != null) { | |
/* We held onto a line on the last invocation. Copy it in before we start */ | |
value.append(deferred_line.getBytes(), 0, deferred_line.getLength()); | |
value.append(newline.getBytes(), 0, newline.getLength()); | |
this.pos+=deferred_line.getLength(); | |
deferred_line=null; | |
} | |
newSize = in.readLine(line, maxLineLength); | |
value.append(line.getBytes(), 0, line.getLength()); | |
value.append(newline.getBytes(), 0, newline.getLength()); | |
this.pos+=newSize; | |
if(newSize == 0) { | |
/* If we got 0 bytes, we're at EOF and need to bail. | |
since we didn't get a first record, return nothing | |
*/ | |
key = null; | |
value = null; | |
d = new Date(); | |
return false; | |
} | |
while(true) { | |
line.clear(); | |
newSize = in.readLine(line, maxLineLength); | |
if(isStartLine(line)) | |
{ | |
/* Save this line for the next record */ | |
this.deferred_line = line; | |
/* Return the current version. */ | |
return true; | |
} else if (newSize == 0) { | |
/* At EOF */ | |
return true; | |
} else { | |
value.append(line.getBytes(), 0, line.getLength()); | |
value.append(newline.getBytes(), 0, newline.getLength()); | |
this.pos+=newSize; | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment