Last active
February 11, 2025 06:33
-
-
Save asdf913/d0f10efbcb90604b748e38752cecd160 to your computer and use it in GitHub Desktop.
Check if string is plain text or not
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import java.lang.reflect.Field; | |
import java.util.Arrays; | |
import java.util.List; | |
import java.util.Objects; | |
import java.util.function.Predicate; | |
import org.apache.commons.collections4.IterableUtils; | |
import org.apache.commons.lang3.ArrayUtils; | |
import org.apache.commons.lang3.StringUtils; | |
import org.apache.commons.lang3.function.FailableFunction; | |
import org.apache.commons.lang3.reflect.FieldUtils; | |
import org.jsoup.Jsoup; | |
import org.jsoup.nodes.Element; | |
import org.jsoup.nodes.Node; | |
import org.jsoup.nodes.TextNode; | |
import org.jsoup.select.Elements; | |
import org.slf4j.Logger; | |
import org.slf4j.LoggerFactory; | |
import io.github.toolfactory.narcissus.Narcissus; | |
public class Main { | |
public static void main(final String[] args) { | |
// | |
final Iterable<String> ss = Arrays.asList(null, "", " ", "a", "字", "<b/>"); | |
// | |
String s = null; | |
// | |
for (int i = 0; i < IterableUtils.size(ss); i++) { | |
// | |
System.out.println((s = IterableUtils.get(ss, i)) + "\t" + isPlainText(s)); | |
// | |
} // for | |
// | |
} | |
private static boolean isPlainText(final String string) { | |
// | |
Element element = testAndApply(x -> IterableUtils.size(x) == 1, | |
children(testAndApply(Objects::nonNull, string, Jsoup::parse, null)), x -> IterableUtils.get(x, 0), | |
null); | |
// | |
final List<Element> es = children(element); | |
// | |
boolean plainText = true; | |
// | |
if (Boolean.logicalAnd(StringUtils.isNotBlank(string), IterableUtils.size(es) == 2)) { | |
// | |
plainText &= and(childrenSize(element = IterableUtils.get(es, 0)) == 0, attributesSize(element) == 0, | |
// | |
ArrayUtils.contains(new int[] { 0, 1 }, childNodeSize(element = IterableUtils.get(es, 1))), | |
attributesSize(element) == 0 | |
// | |
, | |
StringUtils.equals( | |
text(cast(TextNode.class, | |
testAndApply(x -> childNodeSize(x) == 1, element, x -> childNode(x, 0), null))), | |
string)); | |
// | |
} // if | |
// | |
return plainText; | |
// | |
} | |
private static boolean and(final boolean a, final boolean b, final boolean... bs) { | |
// | |
boolean result = Boolean.logicalAnd(a, b); | |
// | |
for (int i = 0; bs != null && i < bs.length; i++) { | |
// | |
result &= bs[i]; | |
// | |
} // for | |
// | |
return result; | |
// | |
} | |
private static Node childNode(final Node instance, final int index) { | |
return instance != null ? instance.childNode(index) : null; | |
} | |
private static String text(final TextNode instance) { | |
// | |
if (instance == null) { | |
// | |
return null; | |
// | |
} // if | |
// | |
try { | |
// | |
if (FieldUtils.readField(instance, "value", true) == null) { | |
// | |
return null; | |
// | |
} // if | |
// | |
} catch (final IllegalAccessException e) { | |
// | |
throw new RuntimeException(e); | |
// | |
} // try | |
// | |
return instance.text(); | |
// | |
} | |
private static int childNodeSize(final Node instance) { | |
// | |
final Element element = cast(Element.class, instance); | |
// | |
if (element != null) { | |
// | |
try { | |
// | |
if (FieldUtils.readField(element, "childNodes", true) == null) { | |
// | |
return 0; | |
// | |
} // if | |
// | |
} catch (final IllegalAccessException e) { | |
// | |
throw new RuntimeException(e); | |
// | |
} // try | |
// | |
} // if | |
// | |
return instance != null ? instance.childNodeSize() : 0; | |
// | |
} | |
private static <T> T cast(final Class<T> clz, final Object instance) { | |
return clz != null && clz.isInstance(instance) ? clz.cast(instance) : null; | |
} | |
private static int attributesSize(final Node instance) { | |
return instance != null ? instance.attributesSize() : 0; | |
} | |
private static int childrenSize(final Element instance) { | |
// | |
if (instance == null) { | |
// | |
return 0; | |
// | |
} // if | |
// | |
final Class<?> clz = getClass(instance); | |
// | |
try { | |
// | |
final Field field = clz != null ? clz.getDeclaredField("childNodes") : null; | |
// | |
if (Narcissus.getField(instance, field) == null) { | |
// | |
return 0; | |
// | |
} // if | |
// | |
} catch (final NoSuchFieldException e) { | |
// | |
error(LoggerFactory.getLogger(Main.class), e.getMessage(), e); | |
// | |
} // try | |
// | |
return instance.childrenSize(); | |
// | |
} | |
private static void error(final Logger instance, final String msg, final Throwable t) { | |
// | |
if (instance != null) { | |
// | |
instance.error(msg, t); | |
// | |
} // if | |
// | |
} | |
private static Class<?> getClass(final Object instance) { | |
return instance != null ? instance.getClass() : null; | |
} | |
private static <T, R, E extends Throwable> R testAndApply(final Predicate<T> predicate, final T value, | |
final FailableFunction<T, R, E> functionTrue, final FailableFunction<T, R, E> functionFalse) throws E { | |
return test(predicate, value) ? apply(functionTrue, value) : apply(functionFalse, value); | |
} | |
private static <T, R, E extends Throwable> R apply(final FailableFunction<T, R, E> instance, final T value) | |
throws E { | |
return instance != null ? instance.apply(value) : null; | |
} | |
private static final <T> boolean test(final Predicate<T> instance, final T value) { | |
return instance != null && instance.test(value); | |
} | |
private static Elements children(final Element instance) { | |
// | |
try { | |
// | |
return instance != null && FieldUtils.readField(instance, "childNodes", true) != null ? instance.children() | |
: null; | |
// | |
} catch (final IllegalAccessException e) { | |
// | |
return null; | |
// | |
} // try | |
// | |
} | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<!-- https://mvnrepository.com/artifact/org.apache.commons/commons-lang3 --> | |
<dependency> | |
<groupId>org.apache.commons</groupId> | |
<artifactId>commons-lang3</artifactId> | |
<version>3.13.0</version> | |
</dependency> | |
<!--https://mvnrepository.com/artifact/org.apache.commons/commons-collections4 --> | |
<dependency> | |
<groupId>org.apache.commons</groupId> | |
<artifactId>commons-collections4</artifactId> | |
<version>4.4</version> | |
</dependency> | |
<!-- https://mvnrepository.com/artifact/org.jsoup/jsoup --> | |
<dependency> | |
<groupId>org.jsoup</groupId> | |
<artifactId>jsoup</artifactId> | |
<version>1.18.3</version> | |
</dependency> | |
<!-- https://mvnrepository.com/artifact/io.github.toolfactory/narcissus --> | |
<dependency> | |
<groupId>io.github.toolfactory</groupId> | |
<artifactId>narcissus</artifactId> | |
<version>1.0.7</version> | |
</dependency> | |
<!-- https://mvnrepository.com/artifact/org.slf4j/slf4j-api --> | |
<dependency> | |
<groupId>org.slf4j</groupId> | |
<artifactId>slf4j-api</artifactId> | |
<version>2.0.16</version> | |
</dependency> |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Sample output