Created
May 20, 2019 13:29
-
-
Save hoijui/922f5f564ee2d27c078526fd24f2b75b to your computer and use it in GitHub Desktop.
fast, stream-based XML pretty-printer (Java 8, LGPL v3+)
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
* Copyright (C) 2019, Robin Vobruba <[email protected]>. | |
* | |
* This program is free software: you can redistribute it and/or modify | |
* it under the terms of the GNU Lesser General Public License as published by | |
* the Free Software Foundation, either version 3 of the License, or | |
* (at your option) any later version. | |
* | |
* This program is distributed in the hope that it will be useful, | |
* but WITHOUT ANY WARRANTY; without even the implied warranty of | |
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
* GNU Lesser General Public License for more details. | |
* | |
* You should have received a copy of the GNU Lesser General Public License | |
* along with this program. If not, see <https://www.gnu.org/licenses/>. | |
*/ | |
import java.io.FileInputStream; | |
import java.io.FileOutputStream; | |
import java.io.IOException; | |
import java.io.InputStream; | |
import java.io.OutputStream; | |
import java.io.PrintStream; | |
/** | |
* Takes XML content as input, | |
* and reproduces the same content as output, | |
* but more pleasing on the human eye, | |
* by adding proper line-endings and indents. | |
* | |
* This implementation is: | |
* + fast | |
* + memory efficient | |
* + uses no external dependencies | |
* + handles invalid XML gracefully | |
* - may produce strange results if there are '{@literal <}' or '{@literal >}' characters | |
* elsewhere then in tags | |
*/ | |
@SuppressWarnings("WeakerAccess") | |
public class XmlFastRoughAndPrettyFormatter { | |
private final String indent; | |
/** | |
* Creates an instance with specific values. | |
* | |
* @param indent what string to use for oen indent | |
* (this might be two spaces or one TAB, for example) | |
*/ | |
public XmlFastRoughAndPrettyFormatter(final String indent) { | |
this.indent = indent; | |
} | |
/** | |
* Creates an instance with default values. | |
*/ | |
public XmlFastRoughAndPrettyFormatter() { | |
this(" "); | |
} | |
public static void main(final String[] args) throws IOException { | |
if (0 == args.length) { | |
new XmlFastRoughAndPrettyFormatter().prettify(System.in, System.out, createBuffer()); | |
} else if (1 == args.length) { | |
try (final InputStream source = new FileInputStream(args[0])) { | |
new XmlFastRoughAndPrettyFormatter().prettify(source, System.out, createBuffer()); | |
} | |
} else if (2 == args.length) { | |
try (final InputStream source = new FileInputStream(args[0]); | |
final OutputStream target = new FileOutputStream(args[1])) | |
{ | |
new XmlFastRoughAndPrettyFormatter().prettify(source, target, createBuffer()); | |
} | |
} else { | |
System.err.println("Usage:"); | |
System.err.printf("\t%s in-file.xml out-file.xml%n", XmlFastRoughAndPrettyFormatter.class.getSimpleName()); | |
System.err.printf("\t%s in-file.xml > out-file.xml%n", XmlFastRoughAndPrettyFormatter.class.getSimpleName()); | |
System.err.printf("\t%s < in-file.xml > out-file.xml%n", XmlFastRoughAndPrettyFormatter.class.getSimpleName()); | |
System.exit(1); | |
} | |
} | |
private static byte[] createBuffer() { | |
return new byte[2048]; | |
} | |
/** | |
* Reformats XML content to be easy on the human eye. | |
* | |
* @param xmlIn the supplier of XML content to pretty-print | |
* @param xmlOut where the pretty XML content shall be written to | |
* @param buffer may be used internally for whatever in- or out-buffering there might be | |
* @throws IOException if any input or output fails | |
*/ | |
public void prettify(final InputStream xmlIn, final OutputStream xmlOut, final byte[] buffer) | |
throws IOException { | |
try { | |
prettifyRoughAndFast(xmlIn, xmlOut, buffer); | |
} catch (final Exception ex) { | |
ex.printStackTrace(System.err); | |
// In case of failure of pretty-printing, use the XML as-is | |
transferTo(xmlIn, xmlOut, buffer); | |
} | |
} | |
/** | |
* Copies input content to output. | |
* The same like Java 9's {@code InputStream#transferTo(OutputStream)}. | |
* | |
* @param source the source of the data | |
* @param target where the source data should be copied to | |
* @param buffer the buffer to use for transfering; | |
* no more then {@code buffer.length} bytes are read at a time | |
* @throws IOException if any input or output fails | |
*/ | |
public static void transferTo(final InputStream source, final OutputStream target, final byte[] buffer) | |
throws IOException | |
{ | |
for (int n = source.read(buffer); n >= 0; n = source.read(buffer)) { | |
target.write(buffer, 0, n); | |
} | |
} | |
public void prettifyRoughAndFast(final InputStream xmlIn, final OutputStream xmlOut, final byte[] buffer) | |
throws IOException { | |
// this is a kind of stack, denoting the number of indents | |
int numIndents = 0; | |
// prepare the in-buffer | |
final StringBuilder inBuffer = new StringBuilder(); | |
// prepare the out stream wrapper, | |
// which allows to write string data more comfortably | |
final PrintStream xmlOutPrinter = new PrintStream(xmlOut); | |
for (int readBytes = xmlIn.read(buffer); readBytes > 0; readBytes = xmlIn.read(buffer)) { | |
// convert the newly read part to a string | |
// and append it to the leftover, which was already read | |
inBuffer.append(new String(buffer, 0, readBytes)); | |
// split all the content we have at the moment into rows (think: lines) | |
final String[] rows = inBuffer.toString() | |
.replaceAll(">", ">\n") | |
.replaceAll("<", "\n<") | |
.split("\n"); | |
// handle all except the last row, | |
// because it is potentially incomplete | |
for (int ir = 0; ir < rows.length - 1; ir++) { | |
numIndents = handleRow(xmlOutPrinter, rows[ir].trim(), numIndents); | |
} | |
// fill the buffer with only the last row, | |
// which is potentially incomplete | |
inBuffer.setLength(0); | |
inBuffer.append(rows[rows.length - 1]); | |
} | |
// handle the last row | |
handleRow(xmlOutPrinter, inBuffer.toString().trim(), numIndents); | |
} | |
private static void appendIndents(final PrintStream output, final int numIndents, String indent) { | |
for (int ii = 0; ii < numIndents; ii++) { | |
output.append(indent); | |
} | |
} | |
public int handleRow(final PrintStream xmlOut, final String row, int numIndents) { | |
if (!row.isEmpty()) { | |
if (row.startsWith("<?")) { | |
xmlOut.append(row).append("\n"); | |
} else if (row.startsWith("</")) { | |
--numIndents; | |
appendIndents(xmlOut, numIndents, indent); | |
xmlOut.append(row).append("\n"); | |
} else if (row.startsWith("<") && !row.endsWith("/>")) { | |
numIndents++; | |
appendIndents(xmlOut, numIndents, indent); | |
xmlOut.append(row).append("\n"); | |
if (row.endsWith("]]>")) { | |
numIndents--; | |
} | |
} else { | |
appendIndents(xmlOut, numIndents, indent); | |
xmlOut.append(row).append("\n"); | |
} | |
} | |
return numIndents; | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment