001 /* Generated By:JavaCC: Do not edit this line. StandardTokenizer.java */
002 package gate.creole.annic.apache.lucene.analysis.standard;
003
004 import java.io.*;
005
006 /** A grammar-based tokenizer constructed with JavaCC.
007 *
008 * <p> This should be a good tokenizer for most European-language documents.
009 *
010 * <p>Many applications have specific tokenizer needs. If this tokenizer does
011 * not suit your application, please consider copying this source code
012 * directory to your project and maintaining your own grammar-based tokenizer.
013 */
014 public class StandardTokenizer extends gate.creole.annic.apache.lucene.analysis.Tokenizer implements StandardTokenizerConstants {
015
016 /** Constructs a tokenizer for this Reader. */
017 public StandardTokenizer(Reader reader) {
018 this(new FastCharStream(reader));
019 this.input = reader;
020 }
021
022 /** Returns the next token in the stream, or null at EOS.
023 * <p>The returned token's type is set to an element of {@link
024 * StandardTokenizerConstants#tokenImage}.
025 */
026 final public gate.creole.annic.apache.lucene.analysis.Token next() throws ParseException, IOException {
027 Token token = null;
028 switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
029 case ALPHANUM:
030 token = jj_consume_token(ALPHANUM);
031 break;
032 case APOSTROPHE:
033 token = jj_consume_token(APOSTROPHE);
034 break;
035 case ACRONYM:
036 token = jj_consume_token(ACRONYM);
037 break;
038 case COMPANY:
039 token = jj_consume_token(COMPANY);
040 break;
041 case EMAIL:
042 token = jj_consume_token(EMAIL);
043 break;
044 case HOST:
045 token = jj_consume_token(HOST);
046 break;
047 case NUM:
048 token = jj_consume_token(NUM);
049 break;
050 case CJK:
051 token = jj_consume_token(CJK);
052 break;
053 case 0:
054 token = jj_consume_token(0);
055 break;
056 default:
057 jj_la1[0] = jj_gen;
058 jj_consume_token(-1);
059 throw new ParseException();
060 }
061 if (token.kind == EOF) {
062 {if (true) return null;}
063 } else {
064 {if (true) return
065 new gate.creole.annic.apache.lucene.analysis.Token(token.image,
066 token.beginColumn,token.endColumn,
067 tokenImage[token.kind]);}
068 }
069 throw new Error("Missing return statement in function");
070 }
071
072 public StandardTokenizerTokenManager token_source;
073 public Token token, jj_nt;
074 private int jj_ntk;
075 private int jj_gen;
076 final private int[] jj_la1 = new int[1];
077 static private int[] jj_la1_0;
078 static {
079 jj_la1_0();
080 }
081 private static void jj_la1_0() {
082 jj_la1_0 = new int[] {0x10ff,};
083 }
084
085 public StandardTokenizer(CharStream stream) {
086 token_source = new StandardTokenizerTokenManager(stream);
087 token = new Token();
088 jj_ntk = -1;
089 jj_gen = 0;
090 for (int i = 0; i < 1; i++) jj_la1[i] = -1;
091 }
092
093 public void ReInit(CharStream stream) {
094 token_source.ReInit(stream);
095 token = new Token();
096 jj_ntk = -1;
097 jj_gen = 0;
098 for (int i = 0; i < 1; i++) jj_la1[i] = -1;
099 }
100
101 public StandardTokenizer(StandardTokenizerTokenManager tm) {
102 token_source = tm;
103 token = new Token();
104 jj_ntk = -1;
105 jj_gen = 0;
106 for (int i = 0; i < 1; i++) jj_la1[i] = -1;
107 }
108
109 public void ReInit(StandardTokenizerTokenManager tm) {
110 token_source = tm;
111 token = new Token();
112 jj_ntk = -1;
113 jj_gen = 0;
114 for (int i = 0; i < 1; i++) jj_la1[i] = -1;
115 }
116
117 final private Token jj_consume_token(int kind) throws ParseException {
118 Token oldToken;
119 if ((oldToken = token).next != null) token = token.next;
120 else token = token.next = token_source.getNextToken();
121 jj_ntk = -1;
122 if (token.kind == kind) {
123 jj_gen++;
124 return token;
125 }
126 token = oldToken;
127 jj_kind = kind;
128 throw generateParseException();
129 }
130
131 final public Token getNextToken() {
132 if (token.next != null) token = token.next;
133 else token = token.next = token_source.getNextToken();
134 jj_ntk = -1;
135 jj_gen++;
136 return token;
137 }
138
139 final public Token getToken(int index) {
140 Token t = token;
141 for (int i = 0; i < index; i++) {
142 if (t.next != null) t = t.next;
143 else t = t.next = token_source.getNextToken();
144 }
145 return t;
146 }
147
148 final private int jj_ntk() {
149 if ((jj_nt=token.next) == null)
150 return (jj_ntk = (token.next=token_source.getNextToken()).kind);
151 else
152 return (jj_ntk = jj_nt.kind);
153 }
154
155 private java.util.Vector jj_expentries = new java.util.Vector();
156 private int[] jj_expentry;
157 private int jj_kind = -1;
158
159 public ParseException generateParseException() {
160 jj_expentries.removeAllElements();
161 boolean[] la1tokens = new boolean[15];
162 for (int i = 0; i < 15; i++) {
163 la1tokens[i] = false;
164 }
165 if (jj_kind >= 0) {
166 la1tokens[jj_kind] = true;
167 jj_kind = -1;
168 }
169 for (int i = 0; i < 1; i++) {
170 if (jj_la1[i] == jj_gen) {
171 for (int j = 0; j < 32; j++) {
172 if ((jj_la1_0[i] & (1<<j)) != 0) {
173 la1tokens[j] = true;
174 }
175 }
176 }
177 }
178 for (int i = 0; i < 15; i++) {
179 if (la1tokens[i]) {
180 jj_expentry = new int[1];
181 jj_expentry[0] = i;
182 jj_expentries.addElement(jj_expentry);
183 }
184 }
185 int[][] exptokseq = new int[jj_expentries.size()][];
186 for (int i = 0; i < jj_expentries.size(); i++) {
187 exptokseq[i] = (int[])jj_expentries.elementAt(i);
188 }
189 return new ParseException(token, exptokseq, tokenImage);
190 }
191
192 final public void enable_tracing() {
193 }
194
195 final public void disable_tracing() {
196 }
197
198 }
|