001 /*
002 * FSMState.java
003 *
004 * Copyright (c) 1995-2010, The University of Sheffield. See the file
005 * COPYRIGHT.txt in the software or at http://gate.ac.uk/gate/COPYRIGHT.txt
006 *
007 * This file is part of GATE (see http://gate.ac.uk/), and is free
008 * software, licenced under the GNU Library General Public License,
009 * Version 2, June 1991 (in the distribution as file licence.html,
010 * and also available at http://gate.ac.uk/gate/licence.html).
011 *
012 * Valentin Tablan, 27/06/2000
013 *
014 * $Id: FSMState.java 12006 2009-12-01 17:24:28Z thomas_heitz $
015 */
016
017 package gate.creole.tokeniser;
018
019 import java.util.*;
020
021 /** A state of the finite state machine that is the kernel tokeniser
022 */
023 class FSMState implements java.io.Serializable {
024
025 /** Debug flag */
026 private static final boolean DEBUG = false;
027
028 /** Creates a new FSMState belonging to a specified tokeniser
029 * @param owner the tokeniser that contains this new state
030 */
031 public FSMState(SimpleTokeniser owner) {
032 myIndex = index++;
033 owner.fsmStates.add(this);
034 }
035
036 /** Returns the value of the transition function of this state for a given
037 * Unicode type.
038 * As this state can belong to a non-deterministic automaton, the result
039 * will be a set.
040 */
041 Set nextSet(UnicodeType type) {
042 if(null == type) return transitionFunction[SimpleTokeniser.maxTypeId];
043 else return transitionFunction[type.type];
044 } // nextSet(UnicodeType type)
045
046 /** Returns the value of the transition function of this state for a given
047 * Unicode type specified using the internal ids used by the tokeniser.
048 * As this state can belong to a non-deterministic automaton, the result
049 * will be a set.
050 */
051 Set nextSet(int type) {
052 return transitionFunction[type];
053 } // nextSet(int type)
054
055 /** Adds a new transition to the transition function of this state
056 * @param type the restriction for the new transition; if <code>null</code>
057 * this transition will be unrestricted.
058 * @param state the vaule of the transition function for the given type
059 */
060 void put(UnicodeType type, FSMState state) {
061 if(null == type) put(SimpleTokeniser.maxTypeId, state);
062 else put(type.type, state);
063 } // put(UnicodeType type, FSMState state)
064
065 /** Adds a new transition to the transition function of this state
066 * @param index the internal index of the Unicode type representing the
067 * restriction for the new transition;
068 * @param state the vaule of the transition function for the given type
069 */
070 void put(int index, FSMState state) {
071 if(null == transitionFunction[index])
072 transitionFunction[index] = new HashSet();
073 transitionFunction[index].add(state);
074 } // put(int index, FSMState state)
075
076 /** Sets the RHS string value */
077 void setRhs(String rhs) { this.rhs = rhs; }
078
079 /** Gets the RHS string value */
080 String getRhs() { return rhs; }
081
082 /** Checks whether this state is a final one */
083 boolean isFinal() { return (null != rhs); }
084
085 /** Gets the unique id of this state */
086 int getIndex() { return myIndex; }
087
088 /** Returns a GML representation of all the edges emerging
089 * from this state */
090 String getEdgesGML() {
091 /// String res = "";
092 StringBuffer res = new StringBuffer(gate.Gate.STRINGBUFFER_SIZE);
093 Set nextSet;
094 Iterator nextSetIter;
095 FSMState nextState;
096
097 for(int i = 0; i <= SimpleTokeniser.maxTypeId; i++){
098 nextSet = transitionFunction[i];
099 if(null != nextSet){
100 nextSetIter = nextSet.iterator();
101 while(nextSetIter.hasNext()){
102 nextState = (FSMState)nextSetIter.next();
103 /* res += "edge [ source " + myIndex +
104 " target " + nextState.getIndex() +
105 " label \"";
106 */
107 res.append("edge [ source ");
108 res.append(myIndex);
109 res.append(" target ");
110 res.append(nextState.getIndex());
111 res.append(" label \"");
112
113 if(i == SimpleTokeniser.maxTypeId) ///res += "[]";
114 res.append("[]");
115 else ///res += SimpleTokeniser.typeMnemonics[i];
116 res.append(SimpleTokeniser.typeMnemonics[i]);
117
118 ///res += "\" ]\n";
119 res.append("\" ]\n");
120 }//while(nextSetIter.hasNext())
121 }
122 };
123 return res.toString();
124 } // getIndex
125
126 /** The transition function of this state. It's an array mapping from int
127 * (the ids used internally by the tokeniser for the Unicode types) to sets
128 * of states.
129 */
130 Set[] transitionFunction = new Set[SimpleTokeniser.maxTypeId + 1];
131
132 /** The RHS string value from which the annotation associated to
133 * final states is constructed.
134 */
135 String rhs;
136
137 /**the unique index of this state*/
138 int myIndex;
139
140 /**used for generating unique ids*/
141 static int index;
142
143 static{
144 index = 0;
145 }
146
147 } // class FSMState
|