Last active
October 28, 2020 09:02
-
-
Save Matt-V50/8204df96cae877b9a0a0d8367c556788 to your computer and use it in GitHub Desktop.
This may be the fastest way to randomly read any line of the file.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import lombok.extern.slf4j.Slf4j; | |
import java.io.*; | |
import java.net.URISyntaxException; | |
import java.net.URL; | |
import java.nio.charset.StandardCharsets; | |
import java.util.ArrayList; | |
import java.util.List; | |
import java.util.Random; | |
/** | |
* Created on 2020/10/27-15:43 | |
* | |
*/ | |
@Slf4j | |
public abstract class RandomLineReader { | |
protected File file; | |
protected Random random = new Random(); | |
public RandomLineReader(String fileName) { | |
this.file = getFileFromResource(fileName); | |
} | |
protected File getFileFromResource(String fileName) { | |
ClassLoader classLoader = getClass().getClassLoader(); | |
URL resource = classLoader.getResource(fileName); | |
if (resource != null) { | |
try { | |
return new File(resource.toURI()); | |
} catch (URISyntaxException e) { | |
log.error("", e); | |
} | |
} else { | |
log.error("file not found: {}", fileName); | |
} | |
return null; | |
} | |
public String read() { | |
return innerRead(); | |
} | |
public void close() { | |
log.info("close"); | |
innerClose(); | |
} | |
protected abstract String innerRead(); | |
abstract void init(); | |
protected abstract void innerClose(); | |
/** | |
* 通过建立换行索引多次随机读取 | |
*/ | |
@Slf4j | |
public static class LineIndex extends RandomLineReader { | |
List<Integer> index = new ArrayList<>(); | |
RandomAccessFile randomAccessFile; | |
public LineIndex(String fileName) { | |
super(fileName); | |
} | |
@Override | |
protected String innerRead() { | |
int loc = random.nextInt(index.size()); | |
int from; | |
int to; | |
if (loc == 0) from = 0; | |
else from = index.get(loc - 1); | |
to = index.get(loc); | |
int size = to - from; //to - from = line + \n + [\r] | |
try { | |
randomAccessFile.seek(from); | |
byte[] bytes = new byte[size]; | |
randomAccessFile.read(bytes, 0, size); | |
return new String(bytes, StandardCharsets.UTF_8); | |
} catch (IOException e) { | |
log.info("", e); | |
} | |
return null; | |
} | |
@Override | |
void init() { | |
log.info("init"); | |
int count = 0; | |
try (LineNumberReader reader = new LineNumberReader(new BufferedReader(new FileReader(file)))) { | |
String line = reader.readLine(); | |
while (line != null) { | |
// windows : \r\n | |
// linux: \r | |
log.info("line number {}", reader.getLineNumber()); | |
count += line.length() + 1; | |
index.add(count); | |
reader.mark(0); | |
line = reader.readLine(); | |
} | |
} catch (IOException e) { | |
log.error("", e); | |
} | |
try { | |
randomAccessFile = new RandomAccessFile(file, "r"); | |
} catch (FileNotFoundException e) { | |
log.error("", e); | |
} | |
} | |
@Override | |
protected void innerClose() { | |
try { | |
randomAccessFile.close(); | |
} catch (IOException e) { | |
log.info("can not close file", e); | |
} | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment