Created
October 27, 2019 14:45
-
-
Save AlexTitovWork/089d9478e490f35ca03e65d9197c4a6f to your computer and use it in GitHub Desktop.
NlpProductClassifier main test
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import java.io.IOException; | |
public class NlpProductClassifier { | |
public static void main(String[] args) throws IOException { | |
NLPClassifier CL = new NLPClassifier(); | |
/** | |
* Sentence detector test | |
*/ | |
String[] sentences = CL.sentenceDetect("My text about boots and dress. It is next sentence. It is last sentence."); | |
for (String sent:sentences) { | |
System.out.println(sent); | |
} | |
/** | |
* Detecting tokens in first sentence | |
*/ | |
String[] tokens = CL.tokenize(sentences[0]); | |
for (String tok:tokens) { | |
System.out.println(tok); | |
} | |
/** | |
* Detecting category for intrest tokens | |
*/ | |
String[] tags = CL.tag(tokens); | |
/** | |
* Focusing on 3 and 5 elements of sentence | |
* | |
*/ | |
System.out.println(tokens[3] + " it is " + tags[3]); | |
System.out.println(tokens[5] + " it is " + tags[5]); | |
/** | |
* For example veiw model proccess generating, model it is not good for data analisys, but illustrated tools usage. | |
* For getting good model your data set must be larger than 5000 elements, better it will be about 50000 elements. | |
*/ | |
CL.makeDataTrainingModel(); | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment