001    package org.nlp2rdf.annotator;
002    
003    import edu.stanford.nlp.ling.CoreAnnotations;
004    import edu.stanford.nlp.ling.CoreLabel;
005    import edu.stanford.nlp.pipeline.Annotation;
006    import edu.stanford.nlp.util.CoreMap;
007    import opennlp.tools.util.Span;
008    import org.nlp2rdf.core.Tokenizer;
009    
010    import java.util.List;
011    
012    /**
013     * @author Sebastian Hellmann - http://bis.informatik.uni-leipzig.de/SebastianHellmann
014     */
015    public class StanfordTokenizer implements Tokenizer {
016    
017        private final Annotation annotatedDocument;
018          private final List<CoreMap> sentences ;
019    
020        /*public StanfordTokenizer() {
021            Properties props = new Properties();
022            props.put("annotators", "tokenize, ssplit");
023            pipeline = new StanfordCoreNLP(props);
024    
025        }*/
026    
027        public StanfordTokenizer(Annotation annotatedDocument) {
028            this.annotatedDocument = annotatedDocument;
029    
030            // these are all the sentences in this document
031            // a CoreMap is essentially a Map that uses class objects as keys and has values with custom types
032            sentences = annotatedDocument.get(CoreAnnotations.SentencesAnnotation.class);
033        }
034    
035        public Span[] detectSentences(String text) {
036    
037            for (CoreMap sentence : sentences)
038    
039            {
040            }
041    
042            return null;
043        }
044    
045        public Span[] detectWords(String sentence) {
046    
047            for (CoreMap s : sentences)
048    
049            {
050                // traversing the words in the current sentence
051                // a CoreLabel is a CoreMap with additional token-specific methods
052                for (CoreLabel token : s.get(CoreAnnotations.TokensAnnotation.class)) {
053                    // this is the text of the token
054                    String word = token.get(CoreAnnotations.TextAnnotation.class);
055                }
056    
057            }
058    
059            return null;
060        }
061    
062    }