Created
May 4, 2026 05:25
-
-
Save asdf913/06b216e44c589d7984fb31c32a5ca621 to your computer and use it in GitHub Desktop.
Docx Document to generate Ruby HTML
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import java.io.ByteArrayInputStream; | |
| import java.io.IOException; | |
| import java.io.InputStream; | |
| import java.lang.reflect.Proxy; | |
| import java.util.Base64; | |
| import java.util.Base64.Decoder; | |
| import java.util.Collection; | |
| import java.util.List; | |
| import java.util.Objects; | |
| import java.util.function.Function; | |
| import java.util.stream.Collector; | |
| import java.util.stream.Collectors; | |
| import java.util.stream.Stream; | |
| import javax.xml.namespace.QName; | |
| import org.apache.commons.lang3.StringUtils; | |
| import org.apache.poi.xwpf.usermodel.XWPFDocument; | |
| import org.apache.poi.xwpf.usermodel.XWPFParagraph; | |
| import org.apache.poi.xwpf.usermodel.XWPFRun; | |
| import org.apache.xmlbeans.XmlAnySimpleType; | |
| import org.apache.xmlbeans.XmlCursor; | |
| import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTR; | |
| import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTRuby; | |
| import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTRubyContent; | |
| import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTText; | |
| public class DocxToRubyHTML { | |
| public static void main(final String[] args) throws IOException { | |
| // | |
| try (final InputStream is = new ByteArrayInputStream(decode(Base64.getDecoder(), | |
| "UEsDBBQACAgIALMepFwAAAAAAAAAAAAAAAALAAAAX3JlbHMvLnJlbHOt0sFKAzEQBuB7n2KZe3e2VURks72I0JtIfYCQzO4Gm0xIplrf3lAKulBWwR4z+efnI6TdHP2+eqeUHQcFq7qBioJh68Kg4HX3tLyHTbdoX2ivpUTy6GKuyk7ICkaR+ICYzUhe55ojhXLTc/JayjENGLV50wPhumnuMP3sgG7SWW2tgrS1K6h2n5H+142eRFstGg0nWsZUtpM4yqVcp4FEgWXzXMb5lKhLM+Bl0PrvIO57Z+iRzcFTkEsuOgoFS3aepGOcE91cU2QOWdj/8kSnzBzp9pqkaeLb88HJoj2Pz5pFi5Of2X0BUEsHCOVy9kToAAAA0AIAAFBLAwQUAAgICACzHqRcAAAAAAAAAAAAAAAAEQAAAGRvY1Byb3BzL2NvcmUueG1sjVLLTsMwELzzFZHvifNoqspKUglQT1RCahGIm7G3qSFxLNt9/T1O0qQFeuC2szOefTmbH+vK24M2opE5ioIQeSBZw4Usc/SyXvgz5BlLJadVIyFHJzBoXtxlTBHWaHjWjQJtBRjPGUlDmMrR1lpFMDZsCzU1gVNIR24aXVProC6xouyLloDjMJziGizl1FLcGvpqdERnS85GS7XTVWfAGYYKapDW4CiI8EVrQdfm5oOOuVLWwp4U3JQO5Kg+GjEKD4dDcEg6qes/wm/Lp1U3qi9kuyoGqMjOjRCmgVrgnjMgfbmBeU0eHtcLVMRhPPXD1A8n6ygiaUqiyXuGf71vDfu40UXLXoCLORimhbLuhj35I+FwRWW5cwsvQPovq04yptpTVtTYpTv6RgC/PzmPG7mho/qc++dIMUkTksyuRhoMusoa9qL9e0XcFR1h27XZfXwCs/1II3CxFbaCPj2Ef/5j8Q1QSwcI7TYTzWQBAADbAgAAUEsDBBQACAgIALMepFwAAAAAAAAAAAAAAAAQAAAAZG9jUHJvcHMvYXBwLnhtbJ1RXW/CIBR9369omr1aaG27zlDMsmVPJjNZp3szCFfL0gIBNPrvh5p1zR7H0z0fOQe4ZH7qu+gI1kmt6jhNcByB4lpIta/jj+Z1UsWR80wJ1mkFdXwGF8/pHVlabcB6CS4KCcrVceu9mSHkeAs9c0mQVVB22vbMB2j3SO92ksOL5ocelEcZxiWCkwclQEzMEBjfEmdH/99Qofnlfm7VnE3Io6SB3nTMAyXod2y0Z10je6A40AMgT8Z0kjMffoQu5NbC27UCZWWSJdMku19IdThtPqtyU+bRyLEJb/gC7pHIRQEif9iWuMrTrNhllcjKR1aUBU5xIYpcCMDTnKBx16V4ddsETYsEh3M1/HBkyfbgaErQbSBrbYWjJUG3gTy3zDLug/1CjtBIWUvfvhvG4Y9nxIcey/aWmdbR7FI2oACGLdFvUEsHCNxvdy1DAQAAOwIAAFBLAwQUAAgICACzHqRcAAAAAAAAAAAAAAAAEwAAAGRvY1Byb3BzL2N1c3RvbS54bWydzrEKwjAUheHdpwjZ21QHkdK0izg7VPeQ3rYBc2/ITYt9eyOC7o6HHz5O0z39Q6wQ2RFquS8rKQAtDQ4nLW/9pThJwcngYB6EoOUGLLt211wjBYjJAYssIGs5pxRqpdjO4A2XOWMuI0VvUp5xUjSOzsKZ7OIBkzpU1VHZhRP5Inw5+fHqNf1LDmTf7/jebyF7baN+Z9sXUEsHCOHWAICXAAAA8QAAAFBLAwQUAAgICACzHqRcAAAAAAAAAAAAAAAAHAAAAHdvcmQvX3JlbHMvZG9jdW1lbnQueG1sLnJlbHOtUssKwjAQvPsVYe82rYqINPUiglepHxDT7QPbJCSr6N8bVLSCiIceZzY7M0w2XV26lp3R+cZoAUkUA0OtTNHoSsA+34wXsMpG6Q5bSeGJrxvrWdjRXkBNZJece1VjJ31kLOowKY3rJAXoKm6lOsoK+SSO59z1NSD70GTbQoDbFgmw/GrxH21Tlo3CtVGnDjV9seCeri36oChdhSTggaOgA/y7/WRI+9JoyuWhxXeCF/UrxHTQDpAo/GW/hSfzK8JsyAgUdnsd3OGDTJ4ZRin/OLDsBlBLBwh2ZKpt1AAAAJcCAABQSwMEFAAICAgAsx6kXAAAAAAAAAAAAAAAABEAAAB3b3JkL2RvY3VtZW50LnhtbO1WzW7TQBC+8xSW76njtFQlalKVVK0AUUUkPMBmvbYX9k+766TpLVwRR4TEBc4ceKkIicdg1vbaSUAotBLigBRl7W9mvvlmdtb26dkNZ8GcaEOlGITxQTcMiMAyoSIbhC+nl52TMDAWiQQxKcggXBITng0fnC76icQFJ8IGwCBMXw7CQou+wTnhyHQ4xVoamdoOlrwv05RiUi9hHaEHYW6t6kdRHXQgFRFgS6XmyMKtzqIq5KLOFfW63eNIE4Ys6DU5VcazzX+Xf86Z91vsk3UhdaK0xMQYaARnVV6OqGho4u4eBTueJkLtkznRaLGRclvIRWX0jIriO1BClC00aWWZn0iaWg6glnoLSinAEHd3RE1ypDbYsvuxXWlZKM/G96qPI/26UK7tCsZiRhm1y7LUVlR8dD9VO41f3I1vYwjjh39G0GsIOO4/yYTUaMbgOIKSwJUXAGM4hFM5k8nSrar8G+tymdglI8GiP0dsEF671rEwKr1pQj3eraBX2ANw7LWtQO2IonatiWuwvoS/Yrb067hBzhnNhCfFcIqJrlhzZTwc9xroBaKmEds7avDHyJApubG7JkYTD93mndG1Q6MtDbYVWCOXUliXGxlMqWuJlcGEaJoGo6fPgskoBFt+LswvbWVWc7sr3dyOdsqJmoR2uF69X68+O8j6pkWNNJDqqtuWuRn97evHrdCN1keb8ZHfg+1t2aVbv1mt37zdFaP+z82/ODcf1qtP8Lvr6Hz/8u4vjY4h2NZuS9XshIDWj1FGqmJVNnENgJdwHD9yT2joF1wfnxyeeIfnSAPKSGqd0+GR89E0yzdus8LCMLjRc/EEJc2Nlap1S6Vs3WbSWslrY53quuDTSmrKgT4hmDYj7t4wYy2bsUkRM3URFkq6oBrKhY8Qb2d6OqvM8GF0pcvhcn1wtCkqmHUiGBVkTC12NXdLWThHeqIQJvVBito+Rv5pHrUfW8MfUEsHCA1Cwl7YAgAAsQkAAFBLAwQUAAgICACzHqRcAAAAAAAAAAAAAAAADwAAAHdvcmQvc3R5bGVzLnhtbM2Vb0/bMBDG3+9TRH5f0lYVQ9UCYkWIbqib+PMBXOfSeCS2ZzuU8ul3TuISkhQKQ9retPVz9eXuuV+vX04e8iy4B224FBEZHQxJAILJmItVRG5vzgdHJDCWiphmUkBENmDIyfGnL+upsZsMTID3hZmuI5Jaq6ZhaFgKOTUHUoHAWCJ1Ti0e9SpcSx0rLRkYg+nzLBwPh4dhTrkgPs1o0kmUc6alkYk9YDIPZZJwBmUqvD4alp/yzCfI2T6F5FTfFWqA+RS1fMkzbjdlMSTI2XS+ElLTZYbdYj3kGHuNJTuDhBaZNe6of+r6WJ/Kt3MprAnWU2oY5xG55EvQmF6K4Bo0TwiG0lNhdoSAGntqOI3IQlpZ6cHs2/fgeubCzPgAFSY4g3sq6IpqTkL37DvQAr90T7OIjCvJPG6FiVdmpq1lVKy8BmJwe/28lMd0MFs4acljrDvlg/nCXQzrrsO2F6p9Kh9cKKVx6KeFlRcblYLY1mF1AXVCVSdspgg71pfU4W27UTgfRTVdaapSV2MZmsfOJxx1Vg5O0Bz8s2q57Pv3eYlD2KhyzWO5nuEMtczCPcuujPHqsPZZUcZLW5eA1IELOCwSC3r7pV/M38Jfl7ZND/4LopjMpPYlUnTgn4NWjreLAEuRAYbWPkPgqlhy3FRtBGq5g0Bt+FMro3G3Fa8VXhC4ESsJ8rb2euH97F4Adbu3U7nXK+iogfiH6ANbwIP1+lcZb27wvBP5OwC1aFx4wrrL8HjSpHg0Hr4ZWcRsB7F1pA2s43JfXhsUHvVQePQ3M9n62B6KCwRPDr8wFr8Jt7ZmXMBV4f5jyh9XrWClnw/Jjs0xmvR5/t6mLrnpNlSKfb08R6mxqvtm/9Kk3lvujCrHSqdiVuuvDaCHfr/fL9H4RZEjjmYH+472N7D/Mqm8ep2Zvbfnez2bixgeOo5V6of59VEY+E/m+A9QSwcIq1djtssCAACUCgAAUEsDBBQACAgIALMepFwAAAAAAAAAAAAAAAASAAAAd29yZC9mb250VGFibGUueG1srZLPbsIwDMbve4ood0hB2jRVFDQx7bBNHFb2ACa4NFL+VHEg4+0XWpCm0cMG3BJ/zuef7UxmX0azHXpSzhZ8NMw4QyvdWtlNwT+XL4NHziiAXYN2Fgu+R+Kz6d0k5pWzgVh6bimPBa9DaHIhSNZogIauQZu0ynkDIV39RkTn1413EomSu9FinGUPwoCy/Gjj/2LjqkpJfHZya9CGzsSjhpA6oFo1xKdHOhZzCyZBL5VBYguM7MMZsG2CrMETHnJ2oAuepca7IMiAvsRQ8Kjs2kUajMb3Yy5aVzBK709vfGvWCo0Ksj7Fd+AVrDQeJNGhnCGVe7Nyupfk5rWeUkp/qQuapqiILgR5Vyv07aJYiV5VLRPosEjqyef3rkQf9+icexuq9FdvPLmfwGCpj7cb7tWU14x14YLrBsrmr2+snPfu+j80sA3urzDHA02/AVBLBwhiMrCATgEAAEwEAABQSwMEFAAICAgAsx6kXAAAAAAAAAAAAAAAABEAAAB3b3JkL3NldHRpbmdzLnhtbKWST2/CMAzF7/sUVe6jhf1HFMQOiEnsBNOk3Uzr0mxJXCUuHfv0MxS0SpO4cHTe8/vZVkaTb2uiLfqgyaWq30tUhC6jXLtNqt5Ws+tHFQUGl4Mhh6naYVCT8dWoGQZkFleIJMGFYZOqkrkaxnHISrQQelShE60gb4Gl9Ju4IZ9XnjIMQVqtiQdJch9b0E6NJfKHyEbNsEKfoWMZ5zZR8V7IsYDa8ArWS6ZKLFswqXpInloZaqb5rirRAcseJ519ja2h/BM/ZI2T4Zieka2AD0H5Zx14oR3OUW9KfnGCNNhxLdutJcGBlXu0r3qtjebdK+WoRKq9/ncNqzNPgQruSUtMRaEzPNxDnabp36kzoDrgu7gHSf9m5SH7eiZmsp2tLwCf44Ix1HQwU+7gL2TG3duztOKMHC/ggD/Y9n6EwNOgoa3WOhfesfv0B8e/UEsHCLLODwtMAQAAyAIAAFBLAwQUAAgICACzHqRcAAAAAAAAAAAAAAAAFQAAAHdvcmQvdGhlbWUvdGhlbWUxLnhtbN2VTY/aMBCG7/0Vlu9dE7IgQIQVBaIeVuqBtvfBcRIvthPZ3t3y72ucAPmqtqoqVdtc8IyfeT3jmZDlww8p0AvThhcqwsHdCCOmaJFwlUX429f44wwjY0ElIArFInxiBj+sPixhYXMmGXLhyiwgwrm15YIQQ50bzF1RMuX20kJLsM7UGUk0vDpZKch4NJoSCVzhOl7/TnyRppyybUGfJVO2EtFMgHWpm5yXBiMF0uX4xYN4dUlyJ9g5wpwdVOg99ZlX7CM/aNYISI7B+cfo7LARGr2AiPDIP5isluQKCNvnYv/UXA0kx/FbeuNKr8919DwAlLpS+mcHs3U4Cmu2AVXLgRzC6Xzd5hv6YY9fh+Guox/e+PseP3N0R//+xk96/GY+31zvpAFVy+kAPw6CXYv3UC64Og7e+O5CX5G0EJ8H8ckkWM8+1fiNIo3xqeKVbQ1TY44kPBU6doBvrptRheypZClQx601B4FRyS3NY5BcnFyKGNEctGHWNfN8NCwYNGK27Am+P6M9KPN2JDV/Fkk6iUuu3mkVt8RJs1G+bbJpcCH29iTYo/FFmkLwJHZOb3jsOhZl7pbYK153KqsV9M8VSL8sodoWeo3wNJycrw7KCKeut24pyyTCRmUYgcjc54Ba7Ye51MZuweRVCv6kqkOSW6br/yf1PpVJ93JYmjJqf+G5mW6vEhnc/fswGcrskMX/5/x2CyOt15b0PuwXz+onUEsHCOT/VYAhAgAA0QgAAFBLAwQUAAgICACzHqRcAAAAAAAAAAAAAAAAEwAAAFtDb250ZW50X1R5cGVzXS54bWy1VLtugzAU3fMViLUCJx2qqoJk6GNsM6Qf4JoLcYsfsm/S5O97DYghotA07WIJ3/PSwXa2Oqg62oPz0ug8XqTzOAItTCF1lcevm6fkNl4tZ9nmaMFHhNU+j7eI9o4xL7aguE+NBU2T0jjFkT5dxSwXH7wCdj2f3zBhNILGBINGvMweoOS7GqPHA223vqXGgiOPo/sWG+zymFtbS8GRIOyQlKYDsUGJdwvVCV+qkKEZDHOsHqaE/WGGg9qPpNzr4qSOpKsiJWaD8Vtp/RUBvnEIk5EaWt4L/TEnC4jW3OEzV4RihRFrZ6ynvh2k4zIjOQM7sSQEDiX0SUcdSfp8Q1OWUgBp7BRRUggVFFCc6y12Ho262L6V+aH5p3EF66mXWgc18hXgPd06Vaf9RHGpJ3N4PNbg/z5FqztpH67lhr/VvzhyUwl66ekOAJE4/9FCpzwZAekxhHZdXByjkeksZxlrXt/lF1BLBwiDfvUJZAEAAKwFAABQSwECFAAUAAgICACzHqRc5XL2ROgAAADQAgAACwAAAAAAAAAAAAAAAAAAAAAAX3JlbHMvLnJlbHNQSwECFAAUAAgICACzHqRc7TYTzWQBAADbAgAAEQAAAAAAAAAAAAAAAAAhAQAAZG9jUHJvcHMvY29yZS54bWxQSwECFAAUAAgICACzHqRc3G93LUMBAAA7AgAAEAAAAAAAAAAAAAAAAADEAgAAZG9jUHJvcHMvYXBwLnhtbFBLAQIUABQACAgIALMepFzh1gCAlwAAAPEAAAATAAAAAAAAAAAAAAAAAEUEAABkb2NQcm9wcy9jdXN0b20ueG1sUEsBAhQAFAAICAgAsx6kXHZkqm3UAAAAlwIAABwAAAAAAAAAAAAAAAAAHQUAAHdvcmQvX3JlbHMvZG9jdW1lbnQueG1sLnJlbHNQSwECFAAUAAgICACzHqRcDULCXtgCAACxCQAAEQAAAAAAAAAAAAAAAAA7BgAAd29yZC9kb2N1bWVudC54bWxQSwECFAAUAAgICACzHqRcq1djtssCAACUCgAADwAAAAAAAAAAAAAAAABSCQAAd29yZC9zdHlsZXMueG1sUEsBAhQAFAAICAgAsx6kXGIysIBOAQAATAQAABIAAAAAAAAAAAAAAAAAWgwAAHdvcmQvZm9udFRhYmxlLnhtbFBLAQIUABQACAgIALMepFyyzg8LTAEAAMgCAAARAAAAAAAAAAAAAAAAAOgNAAB3b3JkL3NldHRpbmdzLnhtbFBLAQIUABQACAgIALMepFzk/1WAIQIAANEIAAAVAAAAAAAAAAAAAAAAAHMPAAB3b3JkL3RoZW1lL3RoZW1lMS54bWxQSwECFAAUAAgICACzHqRcg371CWQBAACsBQAAEwAAAAAAAAAAAAAAAADXEQAAW0NvbnRlbnRfVHlwZXNdLnhtbFBLBQYAAAAACwALAMACAAB8EwAAAAA=")); | |
| final XWPFDocument xwdfDocument = new XWPFDocument(is)) { | |
| // | |
| StringBuilder sb = null; | |
| // | |
| CTR ctr = null; | |
| // | |
| XmlCursor xmlCursor = null; | |
| // | |
| CTRuby ctRuby = null; | |
| // | |
| final Iterable<XWPFParagraph> paragraphs = xwdfDocument.getParagraphs(); | |
| // | |
| if (paragraphs != null && paragraphs.iterator() != null) { | |
| // | |
| Iterable<XWPFRun> runs = null; | |
| // | |
| for (final XWPFParagraph p : paragraphs) { | |
| // | |
| if ((runs = p.getRuns()) == null || runs.iterator() == null) { | |
| // | |
| continue; | |
| // | |
| } // if | |
| // | |
| if (sb == null) { | |
| // | |
| sb = new StringBuilder(); | |
| // | |
| } else if (StringUtils.isNotEmpty(sb)) { | |
| // | |
| sb.delete(0, StringUtils.length(sb)); | |
| // | |
| } // if | |
| // | |
| for (final XWPFRun r : runs) { | |
| // | |
| xmlCursor = newCursor(ctr = r.getCTR()); | |
| // | |
| while (xmlCursor != null && xmlCursor.toNextToken() != XmlCursor.TokenType.END) { | |
| // | |
| if (xmlCursor.isStart()) { | |
| // | |
| if (Objects.equals(getLocalPart(xmlCursor.getName()), "ruby")) { | |
| // | |
| sb.append(String.format("<ruby>%1$s<rt>%2$s</rt></ruby>", collect(map( | |
| flatMap(stream(getRList((ctRuby = (CTRuby) xmlCursor.getObject()) != null | |
| ? ctRuby.getRubyBase() | |
| : null)), x -> stream(getTList(x))), | |
| DocxToRubyHTML::getStringValue), Collectors.joining()), | |
| collect(map( | |
| flatMap(stream(getRList(ctRuby != null ? ctRuby.getRt() : null)), | |
| x -> stream(getTList(x))), | |
| DocxToRubyHTML::getStringValue), Collectors.joining()))); | |
| // | |
| } // if | |
| // | |
| } // if | |
| // | |
| } // while | |
| // | |
| sb.append(collect(map(stream(getTList(ctr)), DocxToRubyHTML::getStringValue), | |
| Collectors.joining())); | |
| // | |
| } // for | |
| // | |
| System.out.println(sb); | |
| // | |
| } // for | |
| // | |
| } // if | |
| // | |
| } // try | |
| // | |
| } | |
| private static String getLocalPart(final QName instance) { | |
| return instance != null ? instance.getLocalPart() : null; | |
| } | |
| private static XmlCursor newCursor(final CTR instance) { | |
| return instance != null ? instance.newCursor() : null; | |
| } | |
| private static byte[] decode(final Decoder instance, final String src) { | |
| return instance != null ? instance.decode(src) : null; | |
| } | |
| private static List<CTR> getRList(final CTRubyContent instance) { | |
| return instance != null ? instance.getRList() : null; | |
| } | |
| private static <T, R, A> R collect(final Stream<T> instance, final Collector<? super T, A, R> collector) { | |
| // | |
| return instance != null && (collector != null || Proxy.isProxyClass(getClass(instance))) | |
| ? instance.collect(collector) | |
| : null; | |
| } | |
| private static <T, R> Stream<R> flatMap(final Stream<T> instance, | |
| final Function<? super T, ? extends Stream<? extends R>> mapper) { | |
| // | |
| return instance != null && (Proxy.isProxyClass(getClass(instance)) || mapper != null) ? instance.flatMap(mapper) | |
| : null; | |
| // | |
| } | |
| private static <T, R> Stream<R> map(final Stream<T> instance, final Function<? super T, ? extends R> mapper) { | |
| // | |
| return instance != null && (Proxy.isProxyClass(getClass(instance)) || mapper != null) ? instance.map(mapper) | |
| : null; | |
| // | |
| } | |
| private static Class<?> getClass(final Object instance) { | |
| return instance != null ? instance.getClass() : null; | |
| } | |
| private static <E> Stream<E> stream(final Collection<E> instance) { | |
| return instance != null ? instance.stream() : null; | |
| } | |
| private static List<CTText> getTList(final CTR instance) { | |
| return instance != null ? instance.getTList() : null; | |
| } | |
| private static String getStringValue(final XmlAnySimpleType instance) { | |
| return instance != null ? instance.getStringValue() : null; | |
| } | |
| } |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| <dependencies> | |
| <!--https://mvnrepository.com/artifact/org.apache.commons/commons-lang3--> | |
| <dependency> | |
| <groupId>org.apache.commons</groupId> | |
| <artifactId>commons-lang3</artifactId> | |
| <version>3.20.0</version> | |
| </dependency> | |
| <!--https://mvnrepository.com/artifact/org.apache.poi/poi-ooxml--> | |
| <dependency> | |
| <groupId>org.apache.poi</groupId> | |
| <artifactId>poi-ooxml</artifactId> | |
| <version>5.5.1</version> | |
| </dependency> | |
| </dependencies> |
Author
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Output
HTML