01 package gate.creole.orthomatcher;
02
03 import gate.Annotation;
04
05 /**
06 * RULE #13: do multi-word names match except for
07 * one token e.g.
08 * "Second Force Recon Company" == "Force Recon Company"
09 * Note that this rule has NOT been used in LaSIE's 1.5
10 * namematcher
11 * Restrictions: - remove cdg first
12 * - shortest name should be 2 words or more
13 * - if N is the number of tokens of the longest
14 * name, then N-1 tokens should be matched
15 * Condition(s): case-sensitive match
16 * Applied to: organisation or person annotations only
17 */
18 public class MatchRule14 implements OrthoMatcherRule {
19
20 OrthoMatcher orthomatcher;
21
22 public MatchRule14(OrthoMatcher orthmatcher){
23 this.orthomatcher=orthmatcher;
24 }
25
26 public boolean value(String s1, String s2) {
27
28 boolean result = false;
29
30 int matched_tokens = 0, mismatches = 0;
31
32 // if names < 2 words then rule is invalid
33 if (orthomatcher.tokensLongAnnot.size() < 3 || orthomatcher.tokensShortAnnot.size() < 2)
34 result = false;
35 else {
36 // now do the matching
37 for (int i=0,j= 0; i < orthomatcher.tokensShortAnnot.size() && mismatches < 2; i++) {
38
39 // Out.prln("i = " + i);
40 // Out.prln("j = " + j);
41 if ( ((Annotation) orthomatcher.tokensLongAnnot.get(j)).getFeatures().get(orthomatcher.TOKEN_STRING_FEATURE_NAME).equals(
42 ((Annotation) orthomatcher.tokensShortAnnot.get(i)).getFeatures().get(orthomatcher.TOKEN_STRING_FEATURE_NAME)) ) {
43 matched_tokens++;
44 j++;
45 } else
46 mismatches++;
47 } // for
48
49 if (matched_tokens >= orthomatcher.tokensLongAnnot.size()-1)
50 result = true;
51 }
52
53 if (result) OrthoMatcherHelper.usedRule(14);
54 return result;
55 }
56
57 public String getId(){
58 return "MatchRule14";
59 }
60 }
|