001 package gate.creole.morph;
002
003 import java.lang.reflect.Method;
004 import java.net.MalformedURLException;
005 import java.net.URL;
006 import java.util.ArrayList;
007 import java.util.Comparator;
008 import java.util.HashSet;
009 import java.util.Iterator;
010 import java.util.TreeSet;
011 import java.util.regex.Pattern;
012
013 import gate.creole.ResourceInstantiationException;
014
015 /**
016 * <p>
017 * Title: Interpret.java
018 * </p>
019 * <p>
020 * Description: This is the main class which which should be invoked to load the
021 * rule file in the system and then to execute the program to find the root word
022 * and the affix to it.
023 * </p>
024 */
025 public class Interpret {
026
027 /**
028 * instance of the ReadFile class which reads the file and stores each line
029 * of the given program in the arraylist which can be read using different
030 * methods of the ReadFile class
031 */
032 private ReadFile file;
033
034 /** Boolean variables to keep track on which section is being read */
035 private boolean isDefineVarSession, isDefineRulesSession;
036
037 /** Instance of Storage class, which is used store all the variables details */
038 private Storage variables;
039
040 /** This variables keeps the record of available methods for the morphing */
041 private Method[] methods;
042
043 /** This variables holds the affix */
044 private String affix;
045
046 private Pattern vPat;
047
048 private Pattern nPat;
049
050 MorphFunctions morphInst;
051
052 ArrayList patterns = new ArrayList();
053 ArrayList fsms = new ArrayList();
054
055 /**
056 * The initial state of the FSM that backs this morpher
057 */
058 protected FSMState initialState;
059
060 protected HashSet lastStates;
061
062 /**
063 * It starts the actual program
064 *
065 * @param ruleFileName
066 */
067 public void init(URL ruleFileURL) throws ResourceInstantiationException {
068 RHS.patIndex = 0;
069 vPat = Pattern.compile("((VB)[DGNPZ]?)|(MD)");
070 nPat = Pattern.compile("(NN)(S)*");
071 variables = new Storage();
072 prepareListOfMorphMethods();
073 file = new ReadFile(ruleFileURL);
074 affix = null;
075 isDefineRulesSession = false;
076 isDefineVarSession = false;
077 morphInst = new MorphFunctions();
078
079 readProgram();
080 initialState = new FSMState(-1);
081
082 lastStates = new HashSet();
083 interpretProgram();
084
085 variables = null;
086 file = null;
087 lastStates = null;
088 }
089
090 class CharClass {
091 char ch;
092 FSMState st;
093 }
094
095 public void addState(char ch, FSMState fsm, int index) {
096 if(index == fsms.size()) {
097 fsms.add(new ArrayList());
098 }
099
100 ArrayList fs = (ArrayList) fsms.get(index);
101 for(int i=0;i<fs.size();i++) {
102 CharClass cc = (CharClass) fs.get(i);
103 if(cc.ch == ch)
104 return;
105 }
106
107 CharClass cc = new CharClass();
108 cc.ch = ch;
109 cc.st = fsm;
110 fs.add(cc);
111 }
112
113
114 public FSMState getState(char ch, int index) {
115 if(index >= fsms.size()) return null;
116 ArrayList fs = (ArrayList) fsms.get(index);
117 for(int i=0;i<fs.size();i++) {
118 CharClass cc = (CharClass) fs.get(i);
119 if(cc.ch == ch)
120 return cc.st;
121 }
122 return null;
123 }
124
125 private HashSet getStates(char ch, HashSet states) {
126 HashSet newStates = new HashSet();
127 Iterator iter = states.iterator();
128 while (iter.hasNext()) {
129 FSMState st = (FSMState) iter.next();
130 FSMState chState = st.next(ch, FSMState.CHILD_STATE);
131 if (chState != null) {
132 newStates.add(chState);
133 }
134
135 FSMState adState = st.next(ch, FSMState.ADJ_STATE);
136 if (adState != null) {
137 newStates.add(adState);
138 }
139 }
140 return newStates;
141 }
142
143 private boolean validCategory(String category) {
144 if (category.equals("*")) {
145 return true;
146 } else if (vPat.matcher(category).matches()) {
147 return true;
148 } else if (nPat.matcher(category).matches()) {
149 return true;
150 }
151 return false;
152 }
153
154 /**
155 * lookup <br>
156 *
157 * @param singleItem
158 * a single string to be looked up by the gazetteer
159 * @return set of the Lookups associated with the parameter
160 */
161 public String runMorpher(String word, String category) {
162 affix = null;
163 if(!validCategory(category)) {
164 return word;
165 }
166
167 foundRule = false;
168 HashSet states = new HashSet();
169 states.add(initialState);
170 for (int i = 0; i < word.length(); i++) {
171 char ch = word.charAt(i);
172 states = getStates(ch, states);
173 if (states.isEmpty()) {
174 return word;
175 }
176
177 }
178
179 // we have all states here
180 // we obtain all RHSes
181 TreeSet rhses = new TreeSet(new Comparator() {
182 public int compare(Object o1, Object o2) {
183 RHS r1 = (RHS) o1;
184 RHS r2 = (RHS) o2;
185 return r1.getPatternIndex() - r2.getPatternIndex();
186 }
187 });
188
189 Iterator iter = states.iterator();
190 while (iter.hasNext()) {
191 FSMState st = (FSMState) iter.next();
192 rhses.addAll(st.getRHSes());
193 }
194
195 if (rhses.isEmpty()) {
196 return word;
197 }
198
199 return executeRHSes(rhses, word, category);
200 }
201
202 protected int patternIndex = -1;
203 public int getPatternIndex() {
204 return patternIndex;
205 }
206
207 protected String executeRHSes(TreeSet rhses, String word, String category) {
208 foundRule = false;
209 // rhses are in sorted order
210 // we need to check if the word is compatible with pattern
211 Iterator rhsiter = rhses.iterator();
212 while (rhsiter.hasNext()){
213 RHS r1 = (RHS) rhsiter.next();
214 String answer = executeRHS(word, category, r1);
215
216 if (foundRule) {
217 patternIndex = r1.getPatternIndex();
218 return answer;
219 }
220 }
221 return word;
222 }
223
224 protected boolean foundRule = false;
225
226 protected String executeRHS(String word, String category, RHS rhs) {
227 if (category.equals("*")) {
228 return executeRule(word, rhs);
229 } else if (rhs.isVerb() && vPat.matcher(category).matches()) {
230 return executeRule(word, rhs);
231 } else if (rhs.isNoun() && nPat.matcher(category).matches()) {
232 return executeRule(word, rhs);
233 }
234 return word;
235 }
236
237 private String executeRule(String word, RHS rhs) {
238 Pattern p = (Pattern) patterns.get(rhs.getPatternIndex());
239
240 short methodIndex = rhs.getMethodIndex();
241 if (!p.matcher(word).matches()) {
242 foundRule = false;
243 return word;
244 }
245
246 // call the appropriate function
247 String[] parameters = rhs.getParameters();
248
249 // set the given word in that morph program
250 morphInst.setInput(word);
251 String answer = null;
252 switch (methodIndex) {
253 case ParsingFunctions.IRREG_STEM:
254 answer = morphInst.irreg_stem(parameters[0], parameters[1]);
255 break;
256 case ParsingFunctions.NULL_STEM:
257 answer = morphInst.null_stem();
258 break;
259 case ParsingFunctions.SEMIREG_STEM:
260 answer = morphInst.semi_reg_stem(Integer.parseInt(parameters[0]),
261 parameters[1]);
262 break;
263 case ParsingFunctions.STEM:
264 answer = morphInst.stem(Integer.parseInt(parameters[0]),
265 parameters[1], parameters[2]);
266 break;
267 default:
268 answer = null;
269 break;
270 }
271
272 if(answer != null) {
273 this.affix = morphInst.getAffix();
274 foundRule = true;
275 return answer;
276 } else {
277 foundRule = false;
278 return word;
279 }
280 }
281
282 /**
283 * This method prepares the list of available methods in the MorphFunctions
284 * class
285 */
286 private void prepareListOfMorphMethods()
287 throws ResourceInstantiationException {
288 methods = MorphFunctions.class.getDeclaredMethods();
289 }
290
291 /**
292 * read the program file
293 */
294 private void readProgram() throws ResourceInstantiationException {
295 // read the program file
296 boolean readStatus = file.read();
297
298 // check if read was success
299 if (!readStatus) {
300 // not it wasn't so simply display the message and ask user to check
301 // it
302 generateError("Some errors reading program file.. please check the"
303 + "program and try again");
304 }
305 }
306
307 /**
308 * This method reads each line of the program and interpret them
309 */
310 private void interpretProgram() throws ResourceInstantiationException {
311 // read each line and parse it
312 while (file.hasNext()) {
313 String currentLine = file.getNext();
314
315 if (currentLine == null || currentLine.trim().length() == 0) {
316 continue;
317 }
318
319 // remove all the leading spaces
320 currentLine = currentLine.trim();
321
322 /*
323 * if commandType is 0 ==> defineVars command if commandType is 1
324 * ==> defineRules command if commandType is 2 ==> variable
325 * declaration if commandType is 3 ==> rule declaration otherwise //
326 * unknown generate error
327 */
328 int commandType = findCommandType(currentLine);
329 switch (commandType) {
330 case -1:
331 // comment command
332 continue;
333 case 0:
334 // defineVars command
335 defineVarsCommand();
336 break;
337 case 1:
338 // defineRules command
339 defineRulesCommand();
340 break;
341 case 2:
342 // variable declaration
343 variableDeclarationCommand(currentLine);
344 break;
345 case 3:
346 // rule declaration
347 ruleDeclarationCommand(currentLine);
348 break;
349 default:
350 generateError("Syntax Error at line " + file.getPointer()
351 + " : " + currentLine);
352 break;
353 }
354 } // end while
355 }
356
357 /**
358 * This method interprets the line and finds out the type of command and
359 * returns the integer indicating the type of the command
360 *
361 * @param line
362 * The program command to be interpreted
363 * @return and <tt>int</tt> value
364 */
365 private int findCommandType(String line) {
366
367 // check for the comment command
368 if (line.substring(0, 2).equals("//") || line.charAt(0) == '#') {
369 return -1;
370 } else if (line.equals("defineVars")) {
371 return 0;
372 } else if (line.equals("defineRules")) {
373 return 1;
374 } else if (isDefineVarSession && line.split("==>").length == 2) {
375 return 2;
376 } else if (isDefineRulesSession &&
377 /*
378 * (line.charAt(0) == '{' || line.charAt(0) == '[' || line.charAt(0) ==
379 * '(' || line.charAt(0) == '\"')
380 */(line.charAt(0) == '<') && line.split("==>").length == 2) {
381 return 3;
382 } else {
383 return Codes.ERROR_CODE;
384 }
385 }
386
387 /**
388 * This method processes the command to define the variable section
389 */
390 private void defineVarsCommand() throws ResourceInstantiationException {
391
392 // variable section can only be defined once
393 if (isDefineVarSession) {
394 generateError("Variable Section already defined - " + "see line "
395 + file.getPointer());
396 } else if (isDefineRulesSession) {
397 generateError("Variable section must be declared before the Rule "
398 + "Section - see line " + file.getPointer());
399 } else {
400 isDefineVarSession = true;
401 }
402 }
403
404 /**
405 * This method processes the command to define the rule section
406 */
407 private void defineRulesCommand() throws ResourceInstantiationException {
408 if (isDefineRulesSession) {
409 generateError("Rule Section already defined - see " + "line "
410 + file.getPointer());
411 } else {
412 isDefineVarSession = false;
413 isDefineRulesSession = true;
414 }
415 }
416
417 /**
418 * This method processes the command to declare the variable
419 *
420 * @param line
421 */
422 private void variableDeclarationCommand(String line)
423 throws ResourceInstantiationException {
424 // ok so first find the variable name and the value for it
425 String varName = (line.split("==>"))[0].trim();
426 String varValue = (line.split("==>"))[1].trim();
427
428 // find the type of variable it is
429 int valueType = ParsingFunctions.findVariableType(varValue
430 .trim());
431 if (valueType == Codes.ERROR_CODE) {
432 generateError(varName + " - Variable Syntax Error - see " + "line"
433 + file.getPointer() + " : " + line);
434 }
435
436 // based on the variable type create the instance
437 Variable varInst = null;
438 switch (valueType) {
439 case Codes.CHARACTER_RANGE_CODE:
440 varInst = new CharacterRange();
441 break;
442 case Codes.CHARACTER_SET_CODE:
443 varInst = new CharacterSet();
444 break;
445 case Codes.STRING_SET_CODE:
446 varInst = new StringSet();
447 break;
448 }
449
450 // set the values in the variable
451 if (!varInst.set(varName, varValue)) {
452 generateError(varName
453 + " - Syntax Error while assigning value to the "
454 + "variable - see line" + file.getPointer() + " : " + line);
455 }
456
457 // and finally add the variable in
458 if (!variables.add(varName, varInst.getPattern())) {
459 generateError(varName.trim() + " - Variable already defined - see "
460 + "line " + file.getPointer() + " : " + line);
461 }
462
463 varInst.resetPointer();
464 }
465
466 /**
467 * This method processes the command to declare the rule
468 *
469 * @param line
470 */
471 private void ruleDeclarationCommand(String line)
472 throws ResourceInstantiationException {
473 // lets divide the rule into two parts
474 // LHS and RHS.
475 // LHS is a part which requires to be parsed and
476 // RHS should be checked for the legal function name and valid arguments
477 // we process RHS first and then the LHS
478 String[] ruleParts = line.split("==>");
479 if (ruleParts.length != 2) {
480 generateError("Error in declaring rule at line : "
481 + file.getPointer() + " : " + line);
482 }
483
484 // now check if the method which has been called in this rule actually
485 // available in the MorphFunction Class
486 String methodCalled = ruleParts[1].trim();
487 if (!isMethodAvailable(methodCalled)) {
488
489 // no method is not available so print the syntax error
490 generateError("Syntax error - method does not exists - see "
491 + "line " + file.getPointer() + " : " + line);
492 }
493
494 // so RHS part is Ok
495 // now we need to check if LHS is written properly
496 // and convert it to the pattern that is recognized by the java
497 String category = "";
498 // we need to find out the category
499 int i = 1;
500 for (; i < ruleParts[0].length(); i++) {
501 if (ruleParts[0].charAt(i) == '>')
502 break;
503 category = category + ruleParts[0].charAt(i);
504 }
505
506 if (i >= ruleParts[0].length()) {
507 generateError("Syntax error - pattern not written properly - see "
508 + "line " + file.getPointer() + " : " + line);
509 }
510
511 RHS rhs = new RHS(ruleParts[1], category);
512 ruleParts[0] = ruleParts[0].substring(i + 1, ruleParts[0].length())
513 .trim();
514 String regExp = ParsingFunctions.convertToRegExp(
515 ruleParts[0], variables);
516 patterns.add(Pattern.compile(regExp));
517 String[] rules = ParsingFunctions.normlizePattern(regExp);
518 for (int m = 0; m < rules.length; m++) {
519 HashSet lss = new HashSet();
520 lss.clear();
521 HashSet newSet = new HashSet();
522 newSet.add(initialState);
523 lss.add(newSet);
524 PatternPart parts[] = ParsingFunctions
525 .getPatternParts(rules[m].trim());
526 for (int j = 0; j < parts.length; j++) {
527 lss = ParsingFunctions.createFSMs(parts[j].getPartString(), parts[j].getType(), lss, this);
528 }
529 Iterator iter = lss.iterator();
530 while (iter.hasNext()) {
531 HashSet set = (HashSet) iter.next();
532 Iterator subIter = set.iterator();
533 while (subIter.hasNext()) {
534 FSMState st = (FSMState) subIter.next();
535 st.addRHS(rhs);
536 }
537 }
538 }
539 //drawFSM();
540 }
541
542 private HashSet intersect(HashSet a, HashSet b) {
543 HashSet result = new HashSet();
544 Iterator iter = a.iterator();
545 while (iter.hasNext()) {
546 FSMState st = (FSMState) iter.next();
547 if (b.contains(st)) {
548 result.add(st);
549 }
550 }
551 return result;
552 }
553
554 private void drawFSM() {
555 // we start with initialState
556 System.out.println("Initial:");
557 String space = "";
558 drawFSM(initialState, space);
559 }
560
561 private void drawFSM(FSMState st, String space) {
562 CharMap map = st.getTransitionFunction();
563 char[] keys = map.getItemsKeys();
564 if (keys != null) {
565 System.out.println(space + "Child:");
566 for (int i = 0; i < keys.length; i++) {
567 System.out.println(space + "'" + keys[i] + "':");
568 drawFSM(map.get(keys[i], FSMState.CHILD_STATE), space + " ");
569 }
570 }
571 keys = map.getAdjitemsKeys();
572 if (keys != null) {
573 System.out.println("ADJ:");
574 for (int i = 0; i < keys.length; i++) {
575 System.out.println(space + "'" + keys[i] + "' :");
576 // drawFSM(map.get(keys[i], FSMState.ADJ_STATE), space+" ");
577 }
578 }
579 }
580
581 /**
582 * This method takes a method signature and searches if the method
583 *
584 * @param method
585 * @return a <tt>boolean</tt> value.
586 */
587 private boolean isMethodAvailable(String method) {
588 // now first find the name of the method
589 // their parameters and their types
590 int index = method.indexOf("(");
591 if (index == -1 || index == 0
592 || method.charAt(method.length() - 1) != ')') {
593 return false;
594 }
595
596 String methodName = method.substring(0, index);
597 // now get the parameters
598
599 String[] parameters;
600 int[] userMethodParams;
601
602 String arguments = method.substring(index + 1, method.length() - 1);
603 if (arguments == null || arguments.trim().length() == 0) {
604 parameters = null;
605 userMethodParams = null;
606 } else {
607 parameters = method.substring(index + 1, method.length() - 1)
608 .split(",");
609 userMethodParams = new int[parameters.length];
610 }
611
612 // find the parameter types
613 // here we define only three types of arguments
614 // String, boolean and int
615 if (parameters != null) {
616 for (int i = 0; i < parameters.length; i++) {
617 if (parameters[i].startsWith("\"")
618 && parameters[i].endsWith("\"")) {
619 userMethodParams[i] = 7;
620 parameters[i] = "java.lang.String";
621 continue;
622 } else if (ParsingFunctions.isBoolean(parameters[i])) {
623 userMethodParams[i] = 6;
624 parameters[i] = "boolean";
625 } else if (ParsingFunctions.isInteger(parameters[i])) {
626 userMethodParams[i] = 2;
627 parameters[i] = "int";
628 } else {
629 // type cannot be recognized so generate error
630 return false;
631 }
632 }
633 }
634
635 // now parameters have been found, so check them with the available
636 // methods
637 // in the morph function
638 Outer: for (int i = 0; i < methods.length; i++) {
639 if (methods[i].getName().equals(methodName)) {
640 // yes method has found now check for the parameters
641 // compatibility
642 Class[] methodParams = methods[i].getParameterTypes();
643 // first check for the number of parameters
644 if (methods[i].getName().equals("null_stem")) {
645 return true;
646 }
647 if (methodParams.length == parameters.length) {
648 // yes arity has matched
649 // now set the precedence
650 int[] paramPrecedence = new int[methodParams.length];
651
652 // assign precedence
653 for (int j = 0; j < methodParams.length; j++) {
654 if (methodParams[j].getName()
655 .equals("java.lang.String"))
656 paramPrecedence[j] = 7;
657 else if (methodParams[j].getName().equals("boolean"))
658 paramPrecedence[j] = 6;
659 else if (methodParams[j].getName().equals("int"))
660 paramPrecedence[j] = 2;
661 else
662 return false;
663 }
664
665 // if we are here that means all the type matched
666 // so valid method declaration
667 return true;
668 }
669 }
670 }
671 // if we are here that means method doesnot found
672 return false;
673 }
674
675 /**
676 * Generates the error and stop the execution
677 *
678 * @param mess -
679 * message to be displayed as an error on the standard output
680 */
681 private void generateError(String mess)
682 throws ResourceInstantiationException {
683 System.out.println("\n\n" + mess);
684 System.out.println("Program terminated...");
685 throw new ResourceInstantiationException("\n\n" + mess);
686 }
687
688 /**
689 * Main method
690 *
691 * @param args
692 */
693 public static void main(String[] args)
694 throws ResourceInstantiationException {
695 if (args == null || args.length < 3) {
696 System.out
697 .println("Usage : Interpret <Rules fileName> <word> <POS>");
698 System.exit(-1);
699 }
700 Interpret interpret = new Interpret();
701 try {
702 interpret.init(new URL((String) args[0]));
703 } catch (MalformedURLException mue) {
704 throw new RuntimeException(mue);
705 }
706 String rootWord = interpret.runMorpher(args[1], args[2]);
707 String affix = interpret.getAffix();
708 System.out.println("Root : " + rootWord);
709 System.out.println("affix : " + affix);
710 }
711
712 /**
713 * This method tells what was the affix to the provided word
714 *
715 * @return affix
716 */
717 public String getAffix() {
718 return this.affix;
719 }
720
721 public FSMState getInitialState() {
722 return initialState;
723 }
724 }
|