01 /*
02 * LuceneTokeniser.java
03 *
04 * Niraj Aswani, 19/March/07
05 *
06 * $Id: LuceneTokeniser.html,v 1.0 2007/03/19 16:22:01 niraj Exp $
07 */
08 package gate.creole.annic.lucene;
09
10 import gate.creole.annic.apache.lucene.analysis.*;
11 import java.io.*;
12 import gate.*;
13 import java.util.*;
14
15 /**
16 * Implementation of token stream.
17 * @author niraj
18 *
19 */
20 public class LuceneTokenizer extends TokenStream {
21 Document document;
22 ArrayList tokens;
23 ArrayList featuresToExclude;
24 int pointer = 0;
25
26 /**
27 * Constructor
28 * @param tokenStream
29 */
30 public LuceneTokenizer(ArrayList tokenStream) {
31 this.tokens = tokenStream;
32 pointer = 0;
33 }
34
35 /**
36 * Returns the next token in the token stream.
37 */
38 public Token next() throws IOException {
39 while (pointer < tokens.size()) {
40 Token token = (Token) tokens.get(pointer);
41 pointer++;
42 if (token == null)
43 continue;
44 return token;
45 }
46 return null;
47 }
48 }
|