asdf913 · February 2, 2025 09:56 · asdf913 · Feb 2, 2025 · asdf913 · Feb 2, 2025
diff --git a/JlptAdverbList.java b/JlptAdverbList.java
 import java.io.File;
 import java.io.FileOutputStream;
 import java.io.IOException;
 import java.io.OutputStream;
 import java.net.MalformedURLException;
 import java.net.URI;
 import java.net.URISyntaxException;
 import java.util.Collection;
 import java.util.function.Function;
 import java.util.function.Predicate;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;
 import java.util.stream.Collectors;

 import org.apache.commons.codec.binary.StringUtils;
 import org.apache.commons.collections4.IterableUtils;
 import org.apache.commons.lang3.ObjectUtils;
 import org.apache.poi.ss.usermodel.Cell;
 import org.apache.poi.ss.usermodel.Row;
 import org.apache.poi.ss.usermodel.Sheet;
 import org.apache.poi.ss.usermodel.Workbook;
 import org.apache.poi.xssf.usermodel.XSSFWorkbook;
 import org.jsoup.Jsoup;
 import org.jsoup.nodes.Document;
 import org.jsoup.nodes.Element;
 import org.jsoup.nodes.Node;
 import org.jsoup.select.Elements;

 public class JlptAdverbList {

 	public static void main(String[] args) throws MalformedURLException, IOException, URISyntaxException {
 		//
 		final String url = "https://jlptsensei.com/complete-japanese-adverbs-list/";
 		//
 		final Document document = Jsoup.parse(new URI(url).toURL(), 0);
 		//
 		final File file = new File("jlpt-advert.xlsx");
 		//
 		System.out.println(file.getAbsolutePath());
 		//
 		try (final Workbook workbook = new XSSFWorkbook(); final OutputStream os = new FileOutputStream(file)) {
 			//
 			final Sheet sheet = workbook.createSheet("all");
 			//
 			write(sheet, url, true);
 			//
 			final Iterable<Element> elements = document != null ? document.select("a.page-numbers") : null;
 			//
 			Element element = null;
 			//
 			for (int i = 0; i < IterableUtils.size(elements); i++) {
 				//
 				if ((element = IterableUtils.get(elements, i)) == null || element.hasAttr("next")) {
 					//
 					continue;
 					//
 				} // if
 					//
 				write(sheet, element.attr("href"), false);
 				//
 			} // for
 				//
 			Collection<Element> as = getElementsByTag(document, "a");
 			//
 			as = as != null ? as.stream()
 					.filter(x -> x != null && matches(text(x), "^JLPT\\s+(N\\d+)\\s+adverbs\\s+list$")
 							&& StringUtils.equals(x.attr("href"), absUrl(x, "href")))
 					.collect(Collectors.toList()) : null;
 			//
 			Pattern pattern = null;
 			//
 			Matcher matcher = null;
 			//
 			Element a = null;
 			//
 			for (int i = 0; i < IterableUtils.size(as)
 					&& (pattern = ObjectUtils.getIfNull(pattern,
 							() -> Pattern.compile("^JLPT\\s+(N\\d+)\\s+adverbs\\s+list$"))) != null
 					&& (matcher = pattern.matcher(text(a = IterableUtils.get(as, i)))) != null && matcher.matches()
 					&& matcher.groupCount() > 0; i++) {
 				//
 				write(workbook.createSheet(matcher.group(1)), absUrl(a, "href"), true);
 				//
 			} // for
 				//
 			workbook.write(os);
 			//
 		} // try
 			//
 	}

 	private static String absUrl(final Node instance, final String attributeKey) {
 		return instance != null ? instance.absUrl(attributeKey) : null;
 	}

 	private static boolean matches(final String instance, final String regex) {
 		return instance != null && instance.matches(regex);
 	}

 	private static void write(final Sheet sheet, final String url, final boolean header)
 			throws MalformedURLException, IOException, URISyntaxException {
 		//
 		write(sheet,
 				testAndApply(x -> IterableUtils.size(x) == 1,
 						getElementsByTag(Jsoup.parse(url != null ? new URI(url).toURL() : null, 0), "table"),
 						x -> IterableUtils.get(x, 0), null),
 				header);
 		//
 	}

 	private static void write(final Sheet sheet, final Element table, final boolean header) {
 		//
 		Iterable<Element> elements = children(
 				IterableUtils.size(elements = getElementsByTag(table, "thead")) == 1 ? IterableUtils.get(elements, 0)
 						: null);
 		//
 		final Element tr = IterableUtils.size(elements) == 1 ? IterableUtils.get(elements, 0) : null;
 		//
 		Row row = null;
 		//
 		int maxCellCount = 0;
 		//
 		if (tr != null && (row = createRow(sheet, getPhysicalNumberOfRows(sheet))) != null && header) {
 			//
 			for (int i = 0; i < tr.childrenSize(); i++) {
 				//
 				setCellValue(row.createCell(row.getPhysicalNumberOfCells()), text(tr.child(i)));
 				//
 			} // for
 				//
 			maxCellCount = Math.max(maxCellCount, tr.childrenSize());
 			//
 		} // if
 			//
 		elements = children(
 				IterableUtils.size(elements = getElementsByTag(table, "tbody")) == 1 ? IterableUtils.get(elements, 0)
 						: null);
 		//
 		Iterable<Element> trs = null;
 		//
 		for (int i = 0; i < IterableUtils.size(elements)
 				&& (trs = children(IterableUtils.get(elements, i))) != null; i++) {
 			//
 			if (IterableUtils.size(trs) != maxCellCount
 					|| (row = createRow(sheet, sheet.getLastRowNum() + 1)) == null) {
 				//
 				continue;
 				//
 			} // if
 				//
 			for (int j = 0; j < IterableUtils.size(trs); j++) {
 				//
 				setCellValue(row.createCell(row.getPhysicalNumberOfCells()), text(IterableUtils.get(trs, j)));
 				//
 			} // for
 				//
 		} // for
 			//
 	}

 	private static <T, R> R testAndApply(final Predicate<T> predicate, final T value, final Function<T, R> functionTrue,
 			final Function<T, R> functionFalse) {
 		return test(predicate, value) ? apply(functionTrue, value) : apply(functionFalse, value);
 	}

 	private static <T> boolean test(final Predicate<T> instance, final T value) {
 		return instance != null && instance.test(value);
 	}

 	private static <T, R> R apply(final Function<T, R> instance, final T value) {
 		return instance != null ? instance.apply(value) : null;
 	}

 	private static Elements getElementsByTag(final Element instance, final String tagName) {
 		return instance != null ? instance.getElementsByTag(tagName) : null;
 	}

 	private static String text(final Element instance) {
 		return instance != null ? instance.text() : null;
 	}

 	private static void setCellValue(final Cell instance, final String value) {
 		if (instance != null) {
 			instance.setCellValue(value);
 		}
 	}

 	private static Elements children(final Element instance) {
 		return instance != null ? instance.children() : null;
 	}

 	private static int getPhysicalNumberOfRows(final Sheet instance) {
 		return instance != null ? instance.getPhysicalNumberOfRows() : 0;
 	}

 	private static Row createRow(final Sheet instance, final int rownum) {
 		return instance != null ? instance.createRow(rownum) : null;
 	}

 }
diff --git a/pom.xml b/pom.xml
 		<!-- https://mvnrepository.com/artifact/org.jsoup/jsoup -->
 		<dependency>
 			<groupId>org.jsoup</groupId>
 			<artifactId>jsoup</artifactId>
 			<version>1.18.3</version>
 		</dependency>
 		<!--https://mvnrepository.com/artifact/org.apache.commons/commons-collections4 -->
 		<dependency>
 			<groupId>org.apache.commons</groupId>
 			<artifactId>commons-collections4</artifactId>
 			<version>4.4</version>
 		</dependency>
 		<!-- https://mvnrepository.com/artifact/org.apache.poi/poi-ooxml -->
 		<dependency>
 			<groupId>org.apache.poi</groupId>
 			<artifactId>poi-ooxml</artifactId>
 			<version>5.4.0</version>
 		</dependency>
	import java.io.File;
	import java.io.FileOutputStream;
	import java.io.IOException;
	import java.io.OutputStream;
	import java.net.MalformedURLException;
	import java.net.URI;
	import java.net.URISyntaxException;
	import java.util.Collection;
	import java.util.function.Function;
	import java.util.function.Predicate;
	import java.util.regex.Matcher;
	import java.util.regex.Pattern;
	import java.util.stream.Collectors;

	import org.apache.commons.codec.binary.StringUtils;
	import org.apache.commons.collections4.IterableUtils;
	import org.apache.commons.lang3.ObjectUtils;
	import org.apache.poi.ss.usermodel.Cell;
	import org.apache.poi.ss.usermodel.Row;
	import org.apache.poi.ss.usermodel.Sheet;
	import org.apache.poi.ss.usermodel.Workbook;
	import org.apache.poi.xssf.usermodel.XSSFWorkbook;
	import org.jsoup.Jsoup;
	import org.jsoup.nodes.Document;
	import org.jsoup.nodes.Element;
	import org.jsoup.nodes.Node;
	import org.jsoup.select.Elements;

	public class JlptAdverbList {

	public static void main(String[] args) throws MalformedURLException, IOException, URISyntaxException {
	//
	final String url = "https://jlptsensei.com/complete-japanese-adverbs-list/";
	//
	final Document document = Jsoup.parse(new URI(url).toURL(), 0);
	//
	final File file = new File("jlpt-advert.xlsx");
	//
	System.out.println(file.getAbsolutePath());
	//
	try (final Workbook workbook = new XSSFWorkbook(); final OutputStream os = new FileOutputStream(file)) {
	//
	final Sheet sheet = workbook.createSheet("all");
	//
	write(sheet, url, true);
	//
	final Iterable<Element> elements = document != null ? document.select("a.page-numbers") : null;
	//
	Element element = null;
	//
	for (int i = 0; i < IterableUtils.size(elements); i++) {
	//
	if ((element = IterableUtils.get(elements, i)) == null \|\| element.hasAttr("next")) {
	//
	continue;
	//
	} // if
	//
	write(sheet, element.attr("href"), false);
	//
	} // for
	//
	Collection<Element> as = getElementsByTag(document, "a");
	//
	as = as != null ? as.stream()
	.filter(x -> x != null && matches(text(x), "^JLPT\\s+(N\\d+)\\s+adverbs\\s+list$")
	&& StringUtils.equals(x.attr("href"), absUrl(x, "href")))
	.collect(Collectors.toList()) : null;
	//
	Pattern pattern = null;
	//
	Matcher matcher = null;
	//
	Element a = null;
	//
	for (int i = 0; i < IterableUtils.size(as)
	&& (pattern = ObjectUtils.getIfNull(pattern,
	() -> Pattern.compile("^JLPT\\s+(N\\d+)\\s+adverbs\\s+list$"))) != null
	&& (matcher = pattern.matcher(text(a = IterableUtils.get(as, i)))) != null && matcher.matches()
	&& matcher.groupCount() > 0; i++) {
	//
	write(workbook.createSheet(matcher.group(1)), absUrl(a, "href"), true);
	//
	} // for
	//
	workbook.write(os);
	//
	} // try
	//
	}

	private static String absUrl(final Node instance, final String attributeKey) {
	return instance != null ? instance.absUrl(attributeKey) : null;
	}

	private static boolean matches(final String instance, final String regex) {
	return instance != null && instance.matches(regex);
	}

	private static void write(final Sheet sheet, final String url, final boolean header)
	throws MalformedURLException, IOException, URISyntaxException {
	//
	write(sheet,
	testAndApply(x -> IterableUtils.size(x) == 1,
	getElementsByTag(Jsoup.parse(url != null ? new URI(url).toURL() : null, 0), "table"),
	x -> IterableUtils.get(x, 0), null),
	header);
	//
	}

	private static void write(final Sheet sheet, final Element table, final boolean header) {
	//
	Iterable<Element> elements = children(
	IterableUtils.size(elements = getElementsByTag(table, "thead")) == 1 ? IterableUtils.get(elements, 0)
	: null);
	//
	final Element tr = IterableUtils.size(elements) == 1 ? IterableUtils.get(elements, 0) : null;
	//
	Row row = null;
	//
	int maxCellCount = 0;
	//
	if (tr != null && (row = createRow(sheet, getPhysicalNumberOfRows(sheet))) != null && header) {
	//
	for (int i = 0; i < tr.childrenSize(); i++) {
	//
	setCellValue(row.createCell(row.getPhysicalNumberOfCells()), text(tr.child(i)));
	//
	} // for
	//
	maxCellCount = Math.max(maxCellCount, tr.childrenSize());
	//
	} // if
	//
	elements = children(
	IterableUtils.size(elements = getElementsByTag(table, "tbody")) == 1 ? IterableUtils.get(elements, 0)
	: null);
	//
	Iterable<Element> trs = null;
	//
	for (int i = 0; i < IterableUtils.size(elements)
	&& (trs = children(IterableUtils.get(elements, i))) != null; i++) {
	//
	if (IterableUtils.size(trs) != maxCellCount
	\|\| (row = createRow(sheet, sheet.getLastRowNum() + 1)) == null) {
	//
	continue;
	//
	} // if
	//
	for (int j = 0; j < IterableUtils.size(trs); j++) {
	//
	setCellValue(row.createCell(row.getPhysicalNumberOfCells()), text(IterableUtils.get(trs, j)));
	//
	} // for
	//
	} // for
	//
	}

	private static <T, R> R testAndApply(final Predicate<T> predicate, final T value, final Function<T, R> functionTrue,
	final Function<T, R> functionFalse) {
	return test(predicate, value) ? apply(functionTrue, value) : apply(functionFalse, value);
	}

	private static <T> boolean test(final Predicate<T> instance, final T value) {
	return instance != null && instance.test(value);
	}

	private static <T, R> R apply(final Function<T, R> instance, final T value) {
	return instance != null ? instance.apply(value) : null;
	}

	private static Elements getElementsByTag(final Element instance, final String tagName) {
	return instance != null ? instance.getElementsByTag(tagName) : null;
	}

	private static String text(final Element instance) {
	return instance != null ? instance.text() : null;
	}

	private static void setCellValue(final Cell instance, final String value) {
	if (instance != null) {
	instance.setCellValue(value);
	}
	}

	private static Elements children(final Element instance) {
	return instance != null ? instance.children() : null;
	}

	private static int getPhysicalNumberOfRows(final Sheet instance) {
	return instance != null ? instance.getPhysicalNumberOfRows() : 0;
	}

	private static Row createRow(final Sheet instance, final int rownum) {
	return instance != null ? instance.createRow(rownum) : null;
	}

	}
	<!-- https://mvnrepository.com/artifact/org.jsoup/jsoup -->
	<dependency>
	<groupId>org.jsoup</groupId>
	<artifactId>jsoup</artifactId>
	<version>1.18.3</version>
	</dependency>
	<!--https://mvnrepository.com/artifact/org.apache.commons/commons-collections4 -->
	<dependency>
	<groupId>org.apache.commons</groupId>
	<artifactId>commons-collections4</artifactId>
	<version>4.4</version>
	</dependency>
	<!-- https://mvnrepository.com/artifact/org.apache.poi/poi-ooxml -->
	<dependency>
	<groupId>org.apache.poi</groupId>
	<artifactId>poi-ooxml</artifactId>
	<version>5.4.0</version>
	</dependency>