Skip to content

Instantly share code, notes, and snippets.

@asdf913
Created May 4, 2026 05:25
Show Gist options
  • Select an option

  • Save asdf913/06b216e44c589d7984fb31c32a5ca621 to your computer and use it in GitHub Desktop.

Select an option

Save asdf913/06b216e44c589d7984fb31c32a5ca621 to your computer and use it in GitHub Desktop.
Docx Document to generate Ruby HTML
import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.lang.reflect.Proxy;
import java.util.Base64;
import java.util.Base64.Decoder;
import java.util.Collection;
import java.util.List;
import java.util.Objects;
import java.util.function.Function;
import java.util.stream.Collector;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import javax.xml.namespace.QName;
import org.apache.commons.lang3.StringUtils;
import org.apache.poi.xwpf.usermodel.XWPFDocument;
import org.apache.poi.xwpf.usermodel.XWPFParagraph;
import org.apache.poi.xwpf.usermodel.XWPFRun;
import org.apache.xmlbeans.XmlAnySimpleType;
import org.apache.xmlbeans.XmlCursor;
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTR;
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTRuby;
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTRubyContent;
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTText;
public class DocxToRubyHTML {
public static void main(final String[] args) throws IOException {
//
try (final InputStream is = new ByteArrayInputStream(decode(Base64.getDecoder(),
"UEsDBBQACAgIALMepFwAAAAAAAAAAAAAAAALAAAAX3JlbHMvLnJlbHOt0sFKAzEQBuB7n2KZe3e2VURks72I0JtIfYCQzO4Gm0xIplrf3lAKulBWwR4z+efnI6TdHP2+eqeUHQcFq7qBioJh68Kg4HX3tLyHTbdoX2ivpUTy6GKuyk7ICkaR+ICYzUhe55ojhXLTc/JayjENGLV50wPhumnuMP3sgG7SWW2tgrS1K6h2n5H+142eRFstGg0nWsZUtpM4yqVcp4FEgWXzXMb5lKhLM+Bl0PrvIO57Z+iRzcFTkEsuOgoFS3aepGOcE91cU2QOWdj/8kSnzBzp9pqkaeLb88HJoj2Pz5pFi5Of2X0BUEsHCOVy9kToAAAA0AIAAFBLAwQUAAgICACzHqRcAAAAAAAAAAAAAAAAEQAAAGRvY1Byb3BzL2NvcmUueG1sjVLLTsMwELzzFZHvifNoqspKUglQT1RCahGIm7G3qSFxLNt9/T1O0qQFeuC2szOefTmbH+vK24M2opE5ioIQeSBZw4Usc/SyXvgz5BlLJadVIyFHJzBoXtxlTBHWaHjWjQJtBRjPGUlDmMrR1lpFMDZsCzU1gVNIR24aXVProC6xouyLloDjMJziGizl1FLcGvpqdERnS85GS7XTVWfAGYYKapDW4CiI8EVrQdfm5oOOuVLWwp4U3JQO5Kg+GjEKD4dDcEg6qes/wm/Lp1U3qi9kuyoGqMjOjRCmgVrgnjMgfbmBeU0eHtcLVMRhPPXD1A8n6ygiaUqiyXuGf71vDfu40UXLXoCLORimhbLuhj35I+FwRWW5cwsvQPovq04yptpTVtTYpTv6RgC/PzmPG7mho/qc++dIMUkTksyuRhoMusoa9qL9e0XcFR1h27XZfXwCs/1II3CxFbaCPj2Ef/5j8Q1QSwcI7TYTzWQBAADbAgAAUEsDBBQACAgIALMepFwAAAAAAAAAAAAAAAAQAAAAZG9jUHJvcHMvYXBwLnhtbJ1RXW/CIBR9369omr1aaG27zlDMsmVPJjNZp3szCFfL0gIBNPrvh5p1zR7H0z0fOQe4ZH7qu+gI1kmt6jhNcByB4lpIta/jj+Z1UsWR80wJ1mkFdXwGF8/pHVlabcB6CS4KCcrVceu9mSHkeAs9c0mQVVB22vbMB2j3SO92ksOL5ocelEcZxiWCkwclQEzMEBjfEmdH/99Qofnlfm7VnE3Io6SB3nTMAyXod2y0Z10je6A40AMgT8Z0kjMffoQu5NbC27UCZWWSJdMku19IdThtPqtyU+bRyLEJb/gC7pHIRQEif9iWuMrTrNhllcjKR1aUBU5xIYpcCMDTnKBx16V4ddsETYsEh3M1/HBkyfbgaErQbSBrbYWjJUG3gTy3zDLug/1CjtBIWUvfvhvG4Y9nxIcey/aWmdbR7FI2oACGLdFvUEsHCNxvdy1DAQAAOwIAAFBLAwQUAAgICACzHqRcAAAAAAAAAAAAAAAAEwAAAGRvY1Byb3BzL2N1c3RvbS54bWydzrEKwjAUheHdpwjZ21QHkdK0izg7VPeQ3rYBc2/ITYt9eyOC7o6HHz5O0z39Q6wQ2RFquS8rKQAtDQ4nLW/9pThJwcngYB6EoOUGLLt211wjBYjJAYssIGs5pxRqpdjO4A2XOWMuI0VvUp5xUjSOzsKZ7OIBkzpU1VHZhRP5Inw5+fHqNf1LDmTf7/jebyF7baN+Z9sXUEsHCOHWAICXAAAA8QAAAFBLAwQUAAgICACzHqRcAAAAAAAAAAAAAAAAHAAAAHdvcmQvX3JlbHMvZG9jdW1lbnQueG1sLnJlbHOtUssKwjAQvPsVYe82rYqINPUiglepHxDT7QPbJCSr6N8bVLSCiIceZzY7M0w2XV26lp3R+cZoAUkUA0OtTNHoSsA+34wXsMpG6Q5bSeGJrxvrWdjRXkBNZJece1VjJ31kLOowKY3rJAXoKm6lOsoK+SSO59z1NSD70GTbQoDbFgmw/GrxH21Tlo3CtVGnDjV9seCeri36oChdhSTggaOgA/y7/WRI+9JoyuWhxXeCF/UrxHTQDpAo/GW/hSfzK8JsyAgUdnsd3OGDTJ4ZRin/OLDsBlBLBwh2ZKpt1AAAAJcCAABQSwMEFAAICAgAsx6kXAAAAAAAAAAAAAAAABEAAAB3b3JkL2RvY3VtZW50LnhtbO1WzW7TQBC+8xSW76njtFQlalKVVK0AUUUkPMBmvbYX9k+766TpLVwRR4TEBc4ceKkIicdg1vbaSUAotBLigBRl7W9mvvlmdtb26dkNZ8GcaEOlGITxQTcMiMAyoSIbhC+nl52TMDAWiQQxKcggXBITng0fnC76icQFJ8IGwCBMXw7CQou+wTnhyHQ4xVoamdoOlrwv05RiUi9hHaEHYW6t6kdRHXQgFRFgS6XmyMKtzqIq5KLOFfW63eNIE4Ys6DU5VcazzX+Xf86Z91vsk3UhdaK0xMQYaARnVV6OqGho4u4eBTueJkLtkznRaLGRclvIRWX0jIriO1BClC00aWWZn0iaWg6glnoLSinAEHd3RE1ypDbYsvuxXWlZKM/G96qPI/26UK7tCsZiRhm1y7LUVlR8dD9VO41f3I1vYwjjh39G0GsIOO4/yYTUaMbgOIKSwJUXAGM4hFM5k8nSrar8G+tymdglI8GiP0dsEF671rEwKr1pQj3eraBX2ANw7LWtQO2IonatiWuwvoS/Yrb067hBzhnNhCfFcIqJrlhzZTwc9xroBaKmEds7avDHyJApubG7JkYTD93mndG1Q6MtDbYVWCOXUliXGxlMqWuJlcGEaJoGo6fPgskoBFt+LswvbWVWc7sr3dyOdsqJmoR2uF69X68+O8j6pkWNNJDqqtuWuRn97evHrdCN1keb8ZHfg+1t2aVbv1mt37zdFaP+z82/ODcf1qtP8Lvr6Hz/8u4vjY4h2NZuS9XshIDWj1FGqmJVNnENgJdwHD9yT2joF1wfnxyeeIfnSAPKSGqd0+GR89E0yzdus8LCMLjRc/EEJc2Nlap1S6Vs3WbSWslrY53quuDTSmrKgT4hmDYj7t4wYy2bsUkRM3URFkq6oBrKhY8Qb2d6OqvM8GF0pcvhcn1wtCkqmHUiGBVkTC12NXdLWThHeqIQJvVBito+Rv5pHrUfW8MfUEsHCA1Cwl7YAgAAsQkAAFBLAwQUAAgICACzHqRcAAAAAAAAAAAAAAAADwAAAHdvcmQvc3R5bGVzLnhtbM2Vb0/bMBDG3+9TRH5f0lYVQ9UCYkWIbqib+PMBXOfSeCS2ZzuU8ul3TuISkhQKQ9retPVz9eXuuV+vX04e8iy4B224FBEZHQxJAILJmItVRG5vzgdHJDCWiphmUkBENmDIyfGnL+upsZsMTID3hZmuI5Jaq6ZhaFgKOTUHUoHAWCJ1Ti0e9SpcSx0rLRkYg+nzLBwPh4dhTrkgPs1o0kmUc6alkYk9YDIPZZJwBmUqvD4alp/yzCfI2T6F5FTfFWqA+RS1fMkzbjdlMSTI2XS+ElLTZYbdYj3kGHuNJTuDhBaZNe6of+r6WJ/Kt3MprAnWU2oY5xG55EvQmF6K4Bo0TwiG0lNhdoSAGntqOI3IQlpZ6cHs2/fgeubCzPgAFSY4g3sq6IpqTkL37DvQAr90T7OIjCvJPG6FiVdmpq1lVKy8BmJwe/28lMd0MFs4acljrDvlg/nCXQzrrsO2F6p9Kh9cKKVx6KeFlRcblYLY1mF1AXVCVSdspgg71pfU4W27UTgfRTVdaapSV2MZmsfOJxx1Vg5O0Bz8s2q57Pv3eYlD2KhyzWO5nuEMtczCPcuujPHqsPZZUcZLW5eA1IELOCwSC3r7pV/M38Jfl7ZND/4LopjMpPYlUnTgn4NWjreLAEuRAYbWPkPgqlhy3FRtBGq5g0Bt+FMro3G3Fa8VXhC4ESsJ8rb2euH97F4Adbu3U7nXK+iogfiH6ANbwIP1+lcZb27wvBP5OwC1aFx4wrrL8HjSpHg0Hr4ZWcRsB7F1pA2s43JfXhsUHvVQePQ3M9n62B6KCwRPDr8wFr8Jt7ZmXMBV4f5jyh9XrWClnw/Jjs0xmvR5/t6mLrnpNlSKfb08R6mxqvtm/9Kk3lvujCrHSqdiVuuvDaCHfr/fL9H4RZEjjmYH+472N7D/Mqm8ep2Zvbfnez2bixgeOo5V6of59VEY+E/m+A9QSwcIq1djtssCAACUCgAAUEsDBBQACAgIALMepFwAAAAAAAAAAAAAAAASAAAAd29yZC9mb250VGFibGUueG1srZLPbsIwDMbve4ood0hB2jRVFDQx7bBNHFb2ACa4NFL+VHEg4+0XWpCm0cMG3BJ/zuef7UxmX0azHXpSzhZ8NMw4QyvdWtlNwT+XL4NHziiAXYN2Fgu+R+Kz6d0k5pWzgVh6bimPBa9DaHIhSNZogIauQZu0ynkDIV39RkTn1413EomSu9FinGUPwoCy/Gjj/2LjqkpJfHZya9CGzsSjhpA6oFo1xKdHOhZzCyZBL5VBYguM7MMZsG2CrMETHnJ2oAuepca7IMiAvsRQ8Kjs2kUajMb3Yy5aVzBK709vfGvWCo0Ksj7Fd+AVrDQeJNGhnCGVe7Nyupfk5rWeUkp/qQuapqiILgR5Vyv07aJYiV5VLRPosEjqyef3rkQf9+icexuq9FdvPLmfwGCpj7cb7tWU14x14YLrBsrmr2+snPfu+j80sA3urzDHA02/AVBLBwhiMrCATgEAAEwEAABQSwMEFAAICAgAsx6kXAAAAAAAAAAAAAAAABEAAAB3b3JkL3NldHRpbmdzLnhtbKWST2/CMAzF7/sUVe6jhf1HFMQOiEnsBNOk3Uzr0mxJXCUuHfv0MxS0SpO4cHTe8/vZVkaTb2uiLfqgyaWq30tUhC6jXLtNqt5Ws+tHFQUGl4Mhh6naYVCT8dWoGQZkFleIJMGFYZOqkrkaxnHISrQQelShE60gb4Gl9Ju4IZ9XnjIMQVqtiQdJch9b0E6NJfKHyEbNsEKfoWMZ5zZR8V7IsYDa8ArWS6ZKLFswqXpInloZaqb5rirRAcseJ519ja2h/BM/ZI2T4Zieka2AD0H5Zx14oR3OUW9KfnGCNNhxLdutJcGBlXu0r3qtjebdK+WoRKq9/ncNqzNPgQruSUtMRaEzPNxDnabp36kzoDrgu7gHSf9m5SH7eiZmsp2tLwCf44Ix1HQwU+7gL2TG3duztOKMHC/ggD/Y9n6EwNOgoa3WOhfesfv0B8e/UEsHCLLODwtMAQAAyAIAAFBLAwQUAAgICACzHqRcAAAAAAAAAAAAAAAAFQAAAHdvcmQvdGhlbWUvdGhlbWUxLnhtbN2VTY/aMBCG7/0Vlu9dE7IgQIQVBaIeVuqBtvfBcRIvthPZ3t3y72ucAPmqtqoqVdtc8IyfeT3jmZDlww8p0AvThhcqwsHdCCOmaJFwlUX429f44wwjY0ElIArFInxiBj+sPixhYXMmGXLhyiwgwrm15YIQQ50bzF1RMuX20kJLsM7UGUk0vDpZKch4NJoSCVzhOl7/TnyRppyybUGfJVO2EtFMgHWpm5yXBiMF0uX4xYN4dUlyJ9g5wpwdVOg99ZlX7CM/aNYISI7B+cfo7LARGr2AiPDIP5isluQKCNvnYv/UXA0kx/FbeuNKr8919DwAlLpS+mcHs3U4Cmu2AVXLgRzC6Xzd5hv6YY9fh+Guox/e+PseP3N0R//+xk96/GY+31zvpAFVy+kAPw6CXYv3UC64Og7e+O5CX5G0EJ8H8ckkWM8+1fiNIo3xqeKVbQ1TY44kPBU6doBvrptRheypZClQx601B4FRyS3NY5BcnFyKGNEctGHWNfN8NCwYNGK27Am+P6M9KPN2JDV/Fkk6iUuu3mkVt8RJs1G+bbJpcCH29iTYo/FFmkLwJHZOb3jsOhZl7pbYK153KqsV9M8VSL8sodoWeo3wNJycrw7KCKeut24pyyTCRmUYgcjc54Ba7Ye51MZuweRVCv6kqkOSW6br/yf1PpVJ93JYmjJqf+G5mW6vEhnc/fswGcrskMX/5/x2CyOt15b0PuwXz+onUEsHCOT/VYAhAgAA0QgAAFBLAwQUAAgICACzHqRcAAAAAAAAAAAAAAAAEwAAAFtDb250ZW50X1R5cGVzXS54bWy1VLtugzAU3fMViLUCJx2qqoJk6GNsM6Qf4JoLcYsfsm/S5O97DYghotA07WIJ3/PSwXa2Oqg62oPz0ug8XqTzOAItTCF1lcevm6fkNl4tZ9nmaMFHhNU+j7eI9o4xL7aguE+NBU2T0jjFkT5dxSwXH7wCdj2f3zBhNILGBINGvMweoOS7GqPHA223vqXGgiOPo/sWG+zymFtbS8GRIOyQlKYDsUGJdwvVCV+qkKEZDHOsHqaE/WGGg9qPpNzr4qSOpKsiJWaD8Vtp/RUBvnEIk5EaWt4L/TEnC4jW3OEzV4RihRFrZ6ynvh2k4zIjOQM7sSQEDiX0SUcdSfp8Q1OWUgBp7BRRUggVFFCc6y12Ho262L6V+aH5p3EF66mXWgc18hXgPd06Vaf9RHGpJ3N4PNbg/z5FqztpH67lhr/VvzhyUwl66ekOAJE4/9FCpzwZAekxhHZdXByjkeksZxlrXt/lF1BLBwiDfvUJZAEAAKwFAABQSwECFAAUAAgICACzHqRc5XL2ROgAAADQAgAACwAAAAAAAAAAAAAAAAAAAAAAX3JlbHMvLnJlbHNQSwECFAAUAAgICACzHqRc7TYTzWQBAADbAgAAEQAAAAAAAAAAAAAAAAAhAQAAZG9jUHJvcHMvY29yZS54bWxQSwECFAAUAAgICACzHqRc3G93LUMBAAA7AgAAEAAAAAAAAAAAAAAAAADEAgAAZG9jUHJvcHMvYXBwLnhtbFBLAQIUABQACAgIALMepFzh1gCAlwAAAPEAAAATAAAAAAAAAAAAAAAAAEUEAABkb2NQcm9wcy9jdXN0b20ueG1sUEsBAhQAFAAICAgAsx6kXHZkqm3UAAAAlwIAABwAAAAAAAAAAAAAAAAAHQUAAHdvcmQvX3JlbHMvZG9jdW1lbnQueG1sLnJlbHNQSwECFAAUAAgICACzHqRcDULCXtgCAACxCQAAEQAAAAAAAAAAAAAAAAA7BgAAd29yZC9kb2N1bWVudC54bWxQSwECFAAUAAgICACzHqRcq1djtssCAACUCgAADwAAAAAAAAAAAAAAAABSCQAAd29yZC9zdHlsZXMueG1sUEsBAhQAFAAICAgAsx6kXGIysIBOAQAATAQAABIAAAAAAAAAAAAAAAAAWgwAAHdvcmQvZm9udFRhYmxlLnhtbFBLAQIUABQACAgIALMepFyyzg8LTAEAAMgCAAARAAAAAAAAAAAAAAAAAOgNAAB3b3JkL3NldHRpbmdzLnhtbFBLAQIUABQACAgIALMepFzk/1WAIQIAANEIAAAVAAAAAAAAAAAAAAAAAHMPAAB3b3JkL3RoZW1lL3RoZW1lMS54bWxQSwECFAAUAAgICACzHqRcg371CWQBAACsBQAAEwAAAAAAAAAAAAAAAADXEQAAW0NvbnRlbnRfVHlwZXNdLnhtbFBLBQYAAAAACwALAMACAAB8EwAAAAA="));
final XWPFDocument xwdfDocument = new XWPFDocument(is)) {
//
StringBuilder sb = null;
//
CTR ctr = null;
//
XmlCursor xmlCursor = null;
//
CTRuby ctRuby = null;
//
final Iterable<XWPFParagraph> paragraphs = xwdfDocument.getParagraphs();
//
if (paragraphs != null && paragraphs.iterator() != null) {
//
Iterable<XWPFRun> runs = null;
//
for (final XWPFParagraph p : paragraphs) {
//
if ((runs = p.getRuns()) == null || runs.iterator() == null) {
//
continue;
//
} // if
//
if (sb == null) {
//
sb = new StringBuilder();
//
} else if (StringUtils.isNotEmpty(sb)) {
//
sb.delete(0, StringUtils.length(sb));
//
} // if
//
for (final XWPFRun r : runs) {
//
xmlCursor = newCursor(ctr = r.getCTR());
//
while (xmlCursor != null && xmlCursor.toNextToken() != XmlCursor.TokenType.END) {
//
if (xmlCursor.isStart()) {
//
if (Objects.equals(getLocalPart(xmlCursor.getName()), "ruby")) {
//
sb.append(String.format("<ruby>%1$s<rt>%2$s</rt></ruby>", collect(map(
flatMap(stream(getRList((ctRuby = (CTRuby) xmlCursor.getObject()) != null
? ctRuby.getRubyBase()
: null)), x -> stream(getTList(x))),
DocxToRubyHTML::getStringValue), Collectors.joining()),
collect(map(
flatMap(stream(getRList(ctRuby != null ? ctRuby.getRt() : null)),
x -> stream(getTList(x))),
DocxToRubyHTML::getStringValue), Collectors.joining())));
//
} // if
//
} // if
//
} // while
//
sb.append(collect(map(stream(getTList(ctr)), DocxToRubyHTML::getStringValue),
Collectors.joining()));
//
} // for
//
System.out.println(sb);
//
} // for
//
} // if
//
} // try
//
}
private static String getLocalPart(final QName instance) {
return instance != null ? instance.getLocalPart() : null;
}
private static XmlCursor newCursor(final CTR instance) {
return instance != null ? instance.newCursor() : null;
}
private static byte[] decode(final Decoder instance, final String src) {
return instance != null ? instance.decode(src) : null;
}
private static List<CTR> getRList(final CTRubyContent instance) {
return instance != null ? instance.getRList() : null;
}
private static <T, R, A> R collect(final Stream<T> instance, final Collector<? super T, A, R> collector) {
//
return instance != null && (collector != null || Proxy.isProxyClass(getClass(instance)))
? instance.collect(collector)
: null;
}
private static <T, R> Stream<R> flatMap(final Stream<T> instance,
final Function<? super T, ? extends Stream<? extends R>> mapper) {
//
return instance != null && (Proxy.isProxyClass(getClass(instance)) || mapper != null) ? instance.flatMap(mapper)
: null;
//
}
private static <T, R> Stream<R> map(final Stream<T> instance, final Function<? super T, ? extends R> mapper) {
//
return instance != null && (Proxy.isProxyClass(getClass(instance)) || mapper != null) ? instance.map(mapper)
: null;
//
}
private static Class<?> getClass(final Object instance) {
return instance != null ? instance.getClass() : null;
}
private static <E> Stream<E> stream(final Collection<E> instance) {
return instance != null ? instance.stream() : null;
}
private static List<CTText> getTList(final CTR instance) {
return instance != null ? instance.getTList() : null;
}
private static String getStringValue(final XmlAnySimpleType instance) {
return instance != null ? instance.getStringValue() : null;
}
}
<dependencies>
<!--https://mvnrepository.com/artifact/org.apache.commons/commons-lang3-->
<dependency>
<groupId>org.apache.commons</groupId>
<artifactId>commons-lang3</artifactId>
<version>3.20.0</version>
</dependency>
<!--https://mvnrepository.com/artifact/org.apache.poi/poi-ooxml-->
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-ooxml</artifactId>
<version>5.5.1</version>
</dependency>
</dependencies>
@asdf913
Copy link
Copy Markdown
Author

asdf913 commented May 4, 2026

Output

<ruby>定<rt>さだ</rt></ruby>める
<ruby>認<rt>したた</rt></ruby>める

HTML

さだめる
したためる

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment