Last active
November 24, 2022 13:47
-
-
Save aaronanderson/e370f2bbe45b4c38e59ab73d47a9b43c to your computer and use it in GitHub Desktop.
Apache POI Excel SAX Handler for large Excel files
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
//Example handler, in this case Excel rows are stored into an Apache Ignite cache | |
public class MappingSheetHandler extends SheetHandler { | |
IgniteCache<BinaryObject, BinaryObject> mappingCache; | |
protected Map<String, String> headerRowMapping = new HashedMap<>(); | |
protected LocationSheetHandler() { | |
mappingCache = ignite.cache("mappings").withKeepBinary(); | |
} | |
@Override | |
protected boolean processSheet() { | |
return "Sheet 1".equals(sheetName); | |
} | |
@Override | |
protected void startSheet() { | |
log.info(String.format("********************** Processing Mapping Sheet - %s ***************************", sheetName)); | |
} | |
@Override | |
protected void endSheet() { | |
log.info(String.format("********************** Processed Mapping Sheet - %s ***************************", sheetName)); | |
} | |
@Override | |
protected void processRow() { | |
if (rowNumber > 1 && !rowValues.isEmpty()) { | |
String oldID = rowValues.get("A"); | |
String newID = rowValues.get("B"); | |
if (newID == null) { | |
return; | |
} | |
String color = "white"; | |
CTXf style = rowStyles.get("A"); | |
if (style.getApplyFill()) { | |
XSSFCellFill fill = stylesTable.getFillAt((int) style.getFillId()); | |
if ("FFFFFF00".contentEquals(fill.getFillForegroundColor().getARGBHex())) { | |
color = "yellow"; | |
} | |
} | |
BinaryObjectBuilder keyBuilder = ignite.binary().builder("Key"); | |
keyBuilder.setField("srtype", "SomeType"); | |
keyBuilder.setField("srid", oldID); | |
keyBuilder.setField("color", color); | |
BinaryObjectBuilder valBuilder = ignite.binary().builder("Value"); | |
valBuilder.setField("trtype", "SomeType"); | |
valBuilder.setField("trid", newID); | |
mappingCache.put(keyBuilder.build(), valBuilder.build()); | |
} | |
} | |
} | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import java.io.InputStream; | |
import java.util.HashMap; | |
import java.util.Iterator; | |
import java.util.Map; | |
import javax.xml.parsers.SAXParser; | |
import javax.xml.parsers.SAXParserFactory; | |
import org.apache.poi.openxml4j.opc.OPCPackage; | |
import org.apache.poi.xssf.eventusermodel.XSSFReader; | |
import org.apache.poi.xssf.model.SharedStringsTable; | |
import org.apache.poi.xssf.model.StylesTable; | |
import org.apache.poi.xssf.usermodel.XSSFRichTextString; | |
import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTXf; | |
import org.xml.sax.Attributes; | |
import org.xml.sax.ContentHandler; | |
import org.xml.sax.SAXException; | |
import org.xml.sax.helpers.DefaultHandler; | |
//Used to handle large Excel files where the full object model doesn't fit into the JVM heap so stream it instead. Based of off this example - https://dzone.com/articles/introduction-to-apache-poi-library | |
public class SheetHandler extends DefaultHandler { | |
private SharedStringsTable sharedStringsTable; | |
private String contents; | |
private boolean isCellValue; | |
private boolean fromSST; | |
protected String sheetName; | |
protected int sheetNumber = 0; | |
protected Map<String, String> header = new HashMap(); | |
protected Map<String, String> rowValues = new HashMap(); | |
protected StylesTable stylesTable; | |
protected Map<String, CTXf> rowStyles = new HashMap(); | |
protected long rowNumber = 0; | |
protected String cellId; | |
@Override | |
public void startElement(String uri, String localName, String name, Attributes attributes) throws SAXException { | |
// Clear contents cache | |
contents = ""; | |
// element row represents Row | |
if (name.equals("row")) { | |
String rowNumStr = attributes.getValue("r"); | |
//System.out.println("Row# " + rowNumStr); | |
rowNumber = Long.parseLong(rowNumStr); | |
} | |
// element c represents Cell | |
else if (name.equals("c")) { | |
cellId = getColumnId(attributes.getValue("r")); | |
// attribute r represents the cell reference | |
//System.out.print(attributes.getValue("r") + " - "); | |
// attribute t represents the cell type | |
String cellType = attributes.getValue("t"); | |
if (cellType != null && cellType.equals("s")) { | |
// cell type s means value will be extracted from SharedStringsTable | |
fromSST = true; | |
} | |
String cellStyleStr = attributes.getValue("s"); | |
if (cellStyleStr != null) { | |
int cellStyleInt = Integer.parseInt(cellStyleStr); | |
CTXf cellStyle = stylesTable.getCellXfAt(cellStyleInt); | |
rowStyles.put(cellId, cellStyle); | |
} | |
// element v represents value of Cell | |
} else if (name.equals("v")) { | |
isCellValue = true; | |
} | |
} | |
@Override | |
public void characters(char[] ch, int start, int length) throws SAXException { | |
if (isCellValue) { | |
contents += new String(ch, start, length); | |
} | |
} | |
@Override | |
public void endElement(String uri, String localName, String name) throws SAXException { | |
if (isCellValue && fromSST) { | |
int index = Integer.parseInt(contents); | |
contents = new XSSFRichTextString(sharedStringsTable.getItemAt(index).getString()).toString(); | |
//System.out.format("%d %s %s\n", rowNumber, cellId, contents); | |
rowValues.put(cellId, contents); | |
cellId = null; | |
isCellValue = false; | |
fromSST = false; | |
} else if (isCellValue) { | |
rowValues.put(cellId, contents); | |
isCellValue = false; | |
} else if (name.equals("row")) { | |
if (rowNumber == 1) { | |
for (Map.Entry<String, String> row : rowValues.entrySet()) { | |
header.put(row.getValue(), row.getKey()); | |
} | |
} | |
processRow(); | |
rowValues.clear(); | |
} | |
} | |
protected boolean processSheet() { | |
return true; | |
} | |
protected void startSheet() { | |
} | |
protected void endSheet() { | |
} | |
protected void processRow() { | |
} | |
protected static String getColumnId(String attribute) throws SAXException { | |
for (int i = 0; i < attribute.length(); i++) { | |
if (!Character.isAlphabetic(attribute.charAt(i))) { | |
return attribute.substring(0, i); | |
} | |
} | |
throw new SAXException("Invalid format " + attribute); | |
} | |
public void readExcelFile(String filename) throws Exception { | |
SAXParserFactory factory = SAXParserFactory.newInstance(); | |
SAXParser saxParser = factory.newSAXParser(); | |
try (OPCPackage opcPackage = OPCPackage.open(filename);) { | |
XSSFReader xssfReader = new XSSFReader(opcPackage); | |
sharedStringsTable = xssfReader.getSharedStringsTable(); | |
stylesTable = xssfReader.getStylesTable(); | |
ContentHandler handler = this; | |
Iterator<InputStream> sheets = xssfReader.getSheetsData(); | |
if (sheets instanceof XSSFReader.SheetIterator) { | |
XSSFReader.SheetIterator sheetiterator = (XSSFReader.SheetIterator) sheets; | |
while (sheetiterator.hasNext()) { | |
try (InputStream sheet = sheetiterator.next();) { | |
sheetName = sheetiterator.getSheetName(); | |
sheetNumber++; | |
if (!processSheet()) { | |
continue; | |
} | |
startSheet(); | |
saxParser.parse(sheet, (DefaultHandler) handler); | |
endSheet(); | |
} | |
} | |
} | |
} | |
} | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
public class Test { | |
public static void main (String ... args) throws Exception { | |
MappingSheetHandler handler = new MappingSheetHandler(); | |
handler.readExcelFile("Test.xlsx"); | |
} | |
} |
Can we use sax for writing or exporting a big excel file?
Yeb, But how can you read multiple files and multi request at once without writing the wrong data? I'm rewriting the function based on your sample and am having this problem. Thanks!
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Can we use sax for writing or exporting a big excel file?