01 package gate.creole.orthomatcher;
02
03 import gate.Annotation;
04
05 import java.util.HashSet;
06 import java.util.Iterator;
07 import java.util.Map;
08
09 import static gate.creole.orthomatcher.OrthoMatcher.*;
10 /**
11 * RULE #4Name: Does all the non-punctuation tokens from the long string match the corresponding tokens
12 * in the short string?
13 * This basically identifies cases where the two strings match token for token, excluding punctuation
14 * Applied to: person annotations
15 *
16 * Modified by Andrew Borthwick, Spock Networks: Allowed for nickname match
17 */
18 public class MatchRule5 implements OrthoMatcherRule {
19
20 OrthoMatcher orthomatcher;
21
22 public MatchRule5(OrthoMatcher orthmatcher){
23 this.orthomatcher=orthmatcher;
24 }
25
26 public boolean value(String s1, String s2) {
27
28 boolean allTokensMatch = true;
29 // if (s1.equals("wilson")) {
30 // log.debug("MR4 Name: Matching" + tokensLongAnnot + " with " + tokensShortAnnot);
31 // log.debug("MR4 Name: Matching " + s1 + " with " + s2);
32 // }
33 if (orthomatcher.tokensLongAnnot.size() == 0 || orthomatcher.tokensShortAnnot.size() == 0) {
34 log.debug("Rule 5 rejecting " + s1 + " and " + s2 + " because one doesn't have any tokens");
35 return false;
36 }
37 Iterator<Annotation> tokensLongAnnotIter = orthomatcher.tokensLongAnnot.iterator();
38 Iterator<Annotation> tokensShortAnnotIter = orthomatcher.tokensShortAnnot.iterator();
39 while (tokensLongAnnotIter.hasNext() && tokensShortAnnotIter.hasNext()) {
40 Annotation token = (Annotation) tokensLongAnnotIter.next();
41 if (((String)token.getFeatures().get(TOKEN_KIND_FEATURE_NAME)).equals(PUNCTUATION_VALUE))
42 continue;
43 if (! orthomatcher.getOrthography().fuzzyMatch((String)(tokensShortAnnotIter.next().
44 getFeatures().get(TOKEN_STRING_FEATURE_NAME)),
45 (String) token.getFeatures().get(TOKEN_STRING_FEATURE_NAME))) {
46 allTokensMatch = false;
47 break;
48 }
49 }
50 if (allTokensMatch && log.isDebugEnabled()) {
51 log.debug("rule 5 matched " + s1 + "(id: " + orthomatcher.longAnnot.getId() + ", offset: " + orthomatcher.longAnnot.getStartNode().getOffset() + ") to " +
52 s2+ "(id: " + orthomatcher.shortAnnot.getId() + ", offset: " + orthomatcher.shortAnnot.getStartNode().getOffset() + ")");
53 }
54
55 if (allTokensMatch) OrthoMatcherHelper.usedRule(5);
56
57 return allTokensMatch;
58 }
59
60 public String getId(){
61 return "MatchRule5";
62 }
63 }
|