001 /*
002 * DFSMState.java
003 *
004 * Copyright (c) 1995-2010, The University of Sheffield. See the file
005 * COPYRIGHT.txt in the software or at http://gate.ac.uk/gate/COPYRIGHT.txt
006 *
007 * This file is part of GATE (see http://gate.ac.uk/), and is free
008 * software, licenced under the GNU Library General Public License,
009 * Version 2, June 1991 (in the distribution as file licence.html,
010 * and also available at http://gate.ac.uk/gate/licence.html).
011 *
012 * Valentin Tablan, 27/06/2000
013 *
014 * $Id: DFSMState.java 12006 2009-12-01 17:24:28Z thomas_heitz $
015 */
016
017 /*
018 modified by OntoText, Aug 29
019
020 */
021
022 package gate.creole.tokeniser;
023
024 import java.util.*;
025
026 /** Implements a state of the deterministic finite state machine of the
027 * tokeniser.
028 * It differs from {@link FSMState FSMState} by the definition of the
029 * transition function which in this case maps character types to other states
030 * as oposed to the transition function from FSMState which maps character
031 * types to sets of states, hence the nondeterministic character.
032 * @see FSMState
033 */
034 class DFSMState implements java.io.Serializable { //extends FSMState{
035
036 /** Debug flag */
037 private static final boolean DEBUG = false;
038
039 /** Constructs a new DFSMState object and adds it to the list of deterministic
040 * states of the {@link DefaultTokeniser DefaultTokeniser} provided as owner.
041 * @param owner a {@link DefaultTokeniser DefaultTokeniser} object
042 */
043 public DFSMState(SimpleTokeniser owner){
044 myIndex = index++;
045 owner.dfsmStates.add(this);
046 }
047
048 /** Adds a new mapping in the transition function of this state
049 * @param type the UnicodeType for this mapping
050 * @param state the next state of the FSM Machine when a character of type type
051 * is read from the input.
052 */
053 void put(UnicodeType type, DFSMState state){
054 put(type.type, state);
055 } // put(UnicodeType type, DFSMState state)
056
057 /** Adds a new mapping using the actual index in the internal array.
058 * This method is for internal use only. Use
059 * {@link #put(gate.creole.tokeniser.UnicodeType,
060 * gate.creole.tokeniser.DFSMState)} instead.
061 */
062 void put(int index, DFSMState state){
063 transitionFunction[index] = state;
064 } // put(int index, DFSMState state)
065
066 /** This method is used to access the transition function of this state.
067 * @param type the Unicode type identifier as the corresponding static value
068 * on {@link java.lang.Character}
069 */
070 DFSMState next(int type){//UnicodeType type){
071 return transitionFunction[type];
072 } // next
073
074 /** Returns a GML (Graph Modelling Language) representation of the edges
075 * emerging from this state
076 */
077 String getEdgesGML(){
078 ///String res = "";
079 //OT
080 StringBuffer res = new StringBuffer(gate.Gate.STRINGBUFFER_SIZE);
081 Set nextSet;
082 Iterator nextSetIter;
083 DFSMState nextState;
084
085 for(int i = 0; i< transitionFunction.length; i++){
086 nextState = transitionFunction[i];
087 if(null != nextState){
088 /*
089 res += "edge [ source " + myIndex +
090 " target " + nextState.getIndex() +
091 " label \"";
092 res += SimpleTokeniser.typeMnemonics[i];
093 res += "\" ]\n";
094 */
095 //OT
096 res.append("edge [ source ");
097 res.append(myIndex);
098 res.append(" target ");
099 res.append(nextState.getIndex());
100 res.append(" label \"");
101 res.append(SimpleTokeniser.typeMnemonics[i]);
102 res.append("\" ]\n");
103 }
104 };
105 return res.toString();
106 } // getEdgesGML
107
108 /** Builds the token description for the token that will be generated when
109 * this <b>final</b> state will be reached and the action associated with it
110 * will be fired.
111 * See also {@link #setRhs(String)}.
112 */
113 void buildTokenDesc() throws TokeniserException{
114 String ignorables = " \t\f";
115 String token = null,
116 type = null,
117 attribute = null,
118 value = null
119 ///prefix = null,
120 ///read =""
121 ;
122 //OT
123 StringBuffer prefix = new StringBuffer(gate.Gate.STRINGBUFFER_SIZE);
124 StringBuffer read = new StringBuffer(gate.Gate.STRINGBUFFER_SIZE);
125
126 LinkedList attributes = new LinkedList(),
127 values = new LinkedList();
128 StringTokenizer mainSt =
129 new StringTokenizer(rhs, ignorables + "\\\";=", true);
130
131 int descIndex = 0;
132 //phase means:
133 //0 == looking for type;
134 //1 == looking for attribute;
135 //2 == looking for value;
136 //3 == write the attr/value pair
137 int phase = 0;
138
139 while(mainSt.hasMoreTokens()) {
140 token = SimpleTokeniser.skipIgnoreTokens(mainSt);
141
142 if(token.equals("\\")){
143 if(null == prefix)
144 ///prefix = mainSt.nextToken();
145 //OT
146 prefix = new StringBuffer(mainSt.nextToken());
147 else ///prefix += mainSt.nextToken();
148 //OT
149 prefix.append(mainSt.nextToken());
150 continue;
151 } else if(null != prefix) {
152 ///read += prefix;
153 //OT
154 read.append(prefix.toString());
155 prefix = null;
156 }
157
158 if(token.equals("\"")){
159 ///read = mainSt.nextToken("\"");
160 //OT
161 read = new StringBuffer(mainSt.nextToken("\""));
162 if(read.equals("\"")) ///read = "";
163 read = new StringBuffer(gate.Gate.STRINGBUFFER_SIZE);
164 else {
165 //delete the remaining enclosing quote and restore the delimiters
166 mainSt.nextToken(ignorables + "\\\";=");
167 }
168
169 } else if(token.equals("=")) {
170
171 if(phase == 1){
172 ///attribute = read;
173 //OT
174 attribute = read.toString();
175 ///read = "";
176 //OT
177 read = new StringBuffer(gate.Gate.STRINGBUFFER_SIZE);
178 phase = 2;
179 }else throw new TokeniserException("Invalid attribute format: " +
180 read);
181 } else if(token.equals(";")) {
182 if(phase == 0){
183 ///type = read;
184 type = read.toString();
185 ///read = "";
186 read = new StringBuffer(gate.Gate.STRINGBUFFER_SIZE);
187 //Out.print("Type: " + type);
188 attributes.addLast(type);
189 values.addLast("");
190 phase = 1;
191 } else if(phase == 2) {
192 ///value = read;
193 value = read.toString();
194 ///read = "";
195 read = new StringBuffer(gate.Gate.STRINGBUFFER_SIZE);
196 phase = 3;
197 } else throw new TokeniserException("Invalid value format: " +
198 read);
199 } else ///read += token;
200 read.append(token);
201
202 if(phase == 3) {
203 // Out.print("; " + attribute + "=" + value);
204 attributes.addLast(attribute);
205 values.addLast(value);
206 phase = 1;
207 }
208 }
209 //Out.println();
210 if(attributes.size() < 1)
211 throw new InvalidRuleException("Invalid right hand side " + rhs);
212 tokenDesc = new String[attributes.size()][2];
213
214 for(int i = 0; i < attributes.size(); i++) {
215 tokenDesc[i][0] = (String)attributes.get(i);
216 tokenDesc[i][1] = (String)values.get(i);
217 }
218
219 // for(int i = 0; i < attributes.size(); i++){
220 // Out.println(tokenDesc[i][0] + "=" +
221 // tokenDesc[i][1]);
222 // }
223 } // buildTokenDesc
224
225 /** Sets the right hand side associated with this state. The RHS is
226 * represented as a string value that will be parsed by the
227 * {@link #buildTokenDesc()} method being converted in a table of strings
228 * with 2 columns and as many lines as necessary.
229 * @param rhs the RHS string
230 */
231 void setRhs(String rhs) { this.rhs = rhs; }
232
233 /** Returns the RHS string*/
234 String getRhs(){return rhs;}
235
236 /** Checks whether this state is a final one*/
237 boolean isFinal() { return (null != rhs); }
238
239 /** Returns the unique ID of this state.*/
240 int getIndex() { return myIndex; }
241
242 /** Returns the token description associated with this state. This description
243 * is built by {@link #buildTokenDesc()} method and consists of a table of
244 * strings having two columns.
245 * The first line of the table contains the annotation type on the first
246 * position and nothing on the second.
247 * Each line after the first one contains a attribute on the first position
248 * and its associated value on the second.
249 */
250 String[][] getTokenDesc() {
251 return tokenDesc;
252 }
253
254 /** A table of strings describing an annotation.
255 * The first line of the table contains the annotation type on the first
256 * position and nothing on the second.
257 * Each line after the first one contains a attribute on the first position
258 * and its associated value on the second.
259 */
260 String[][] tokenDesc;
261
262 /** The transition function of this state.
263 */
264 DFSMState[] transitionFunction = new DFSMState[SimpleTokeniser.maxTypeId];
265
266 /** The string of the RHS of the rule from which the token
267 * description is built
268 */
269 String rhs;
270
271 /** The unique index of this state*/
272 int myIndex;
273
274 /** Used to generate unique indices for all the objects of this class*/
275 static int index;
276
277 static {
278 index = 0;
279 }
280
281 } // class DFSMState
|