01 package gate.creole.orthomatcher;
02
03 import gate.Annotation;
04
05 import java.util.Iterator;
06
07 import static gate.creole.orthomatcher.OrthoMatcher.*;
08
09 /**
10 * RULE #4: Does the first non-punctuation token from the long string match
11 * the first token from the short string?
12 * e.g. "fred jones" == "fred"
13 * Condition(s): case-insensitive match
14 * Applied to: person annotations
15 *
16 * Modified by Andrew Borthwick, Spock Networks: Disallow stop words
17 */
18 public class MatchRule4 implements OrthoMatcherRule {
19
20 OrthoMatcher orthomatcher;
21
22 public MatchRule4(OrthoMatcher orthmatcher){
23 this.orthomatcher=orthmatcher;
24 }
25
26 public boolean value(String s1, String s2) {
27
28 boolean allTokensMatch = true;
29 // Out.prln("MR4: Matching" + s1 + " with " + s2);
30
31 Iterator tokensLongAnnotIter = orthomatcher.tokensLongAnnot.iterator();
32 Iterator tokensShortAnnotIter = orthomatcher.tokensShortAnnot.iterator();
33 while (tokensLongAnnotIter.hasNext() && tokensShortAnnotIter.hasNext()) {
34 Annotation token = (Annotation) tokensLongAnnotIter.next();
35 if (((String)token.getFeatures().get(TOKEN_KIND_FEATURE_NAME)).equals(PUNCTUATION_VALUE) ||
36 token.getFeatures().containsKey("ortho_stop"))
37 continue;
38 if (! ((String)(((Annotation) tokensShortAnnotIter.next()).
39 getFeatures().get(TOKEN_STRING_FEATURE_NAME))).equals(
40 (String) token.getFeatures().get(TOKEN_STRING_FEATURE_NAME))) {
41 allTokensMatch = false;
42 break;
43 } // if (!tokensLongAnnot.nextToken()
44 } // while
45 // if (allTokensMatch)
46 // Out.prln("rule4 fired. result is: " + allTokensMatch);
47 if (allTokensMatch && log.isDebugEnabled()) {
48 log.debug("rule 4 matched " + s1 + "(id: " + orthomatcher.longAnnot.getId() + ") to " + s2+ "(id: " + orthomatcher.shortAnnot.getId() + ")");
49 }
50
51 if (allTokensMatch) OrthoMatcherHelper.usedRule(4);
52
53 return allTokensMatch;
54 }
55
56 public String getId(){
57 return "MatchRule4";
58 }
59 }
|