First you have to train a NER model to test or you can directly download existing models from this link
Please refer my previous blog post on training a model.
Then you can load your NER model and test the sentences by the following code that I tried. This code was used to extract locations from sentences and print in the terminal. first you have to tokenize your sentence. therefore you can do it by using a model or manually. Here I have used a trained "en-token.bin" OpenNLP model for that. you can also use split command if you wand to do it manually. but I strongly recommend to use a model for tokenization.
After that you can load your NER model and recognize tokens. Here I have loaded my previously trained model "en-location.bin" for that.
Please refer my previous blog post on training a model.
Then you can load your NER model and test the sentences by the following code that I tried. This code was used to extract locations from sentences and print in the terminal. first you have to tokenize your sentence. therefore you can do it by using a model or manually. Here I have used a trained "en-token.bin" OpenNLP model for that. you can also use split command if you wand to do it manually. but I strongly recommend to use a model for tokenization.
After that you can load your NER model and recognize tokens. Here I have loaded my previously trained model "en-location.bin" for that.
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import opennlp.tools.namefind.NameFinderME;
import opennlp.tools.namefind.TokenNameFinderModel;
import opennlp.tools.tokenize.Tokenizer;
import opennlp.tools.tokenize.TokenizerME;
import opennlp.tools.tokenize.TokenizerModel;
import opennlp.tools.util.InvalidFormatException;
import opennlp.tools.util.Span;
public class train {
static String sentence = "RT @Madhawa:there is traffic in Kandy";
public String[] findLocation(String sentence){
String[] stream =new String[6];
InputStream modelInToken = null;
InputStream modelInLocation = null;
InputStream modelInLevel = null;
try {
// convert sentence into tokens
//load premodeled tokenizer model
modelInToken = new FileInputStream("en-token.bin");
TokenizerModel modelToken = new TokenizerModel(modelInToken);
Tokenizer tokenizer = new TokenizerME(modelToken);
String tokens[] = tokenizer.tokenize(sentence);
for(int i=0; i< tokens.length ; i++)
System.out.println(tokens[i]);
//find locations
//load location model
modelInLocation = new FileInputStream("en-location.bin");
TokenNameFinderModel modelLocation = new TokenNameFinderModel(modelInLocation);
NameFinderME locationFinder = new NameFinderME(modelLocation);
Span nameSpans[] = locationFinder.find(tokens);
//find probabilities for names
double[] spanProbs1 = locationFinder.probs(nameSpans);
//3. print names
int index=0;
int i=0;
for( i = 0; i< nameSpans.length ; i++ ) {
if (index !=5){
stream[i]= tokens[nameSpans[i].getStart()];
}
// System.out.println("Span: "+ nameSpans[i].toString());
System.out.println("Covered text is: "+tokens[nameSpans[i].getStart()]);
System.out.println("Probability is: "+spanProbs1[i]);
}
} catch (FileNotFoundException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (InvalidFormatException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
return stream;
}
public static void main(String[] args) {
new train().findLocation(sentence);
}
}
ReplyDeleteThanks for sharing this amazing blog
Java Online Training