gate.creole.orthomatcher
Class BasicAnnotationOrthography

java.lang.Object
  extended by gate.creole.orthomatcher.BasicAnnotationOrthography
All Implemented Interfaces:
AnnotationOrthography

public class BasicAnnotationOrthography
extends Object
implements AnnotationOrthography


Field Summary
private  boolean extLists
           
protected static org.apache.log4j.Logger log
           
private  Double minimumNicknameLikelihood
           
private  Map<String,HashSet<String>> nicknameMap
           
private  String personType
           
private  String unknownType
           
 
Constructor Summary
BasicAnnotationOrthography(String personType, boolean extLists, String unknownType, URL nicknameFile, Double minimumNicknameLikelihood, String encoding)
           
 
Method Summary
 boolean allNonStopTokensInOtherAnnot(ArrayList<Annotation> firstName, ArrayList<Annotation> secondName, String TOKEN_STRING_FEATURE_NAME, boolean caseSensitive)
           
 HashSet buildTables(AnnotationSet nameAllAnnots)
          Tables for namematch info (used by the namematch rules)
 boolean fuzzyMatch(String s1, String s2)
           
 String getStringForAnnotation(Annotation a, Document d)
          Returns normalized content of an annotation - removes extra white spaces.
protected  Map<String,HashSet<String>> initNicknames(String nicknameFileEncoding, URL fileURL)
           
 boolean isUnknownGender(String gender)
           
 boolean matchedAlready(Annotation annot1, Annotation annot2, List matchesDocFeature, AnnotationSet nameAllAnnots)
           
 String stripPersonTitle(String annotString, Annotation annot, Document doc, Map<Integer,List<Annotation>> tokensMap, HashMap normalizedTokensMap, AnnotationSet nameAllAnnots)
          Return a person name without a title.
 void updateMatches(Annotation newAnnot, Annotation prevAnnot, List matchesDocFeature, AnnotationSet nameAllAnnots)
           
 Annotation updateMatches(Annotation newAnnot, String annotString, HashMap processedAnnots, AnnotationSet nameAllAnnots, List matchesDocFeature)
           
 
Methods inherited from class java.lang.Object
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
 

Field Detail

extLists

private final boolean extLists

personType

private final String personType

unknownType

private final String unknownType

nicknameMap

private Map<String,HashSet<String>> nicknameMap

minimumNicknameLikelihood

private final Double minimumNicknameLikelihood

log

protected static final org.apache.log4j.Logger log
Constructor Detail

BasicAnnotationOrthography

public BasicAnnotationOrthography(String personType,
                                  boolean extLists,
                                  String unknownType,
                                  URL nicknameFile,
                                  Double minimumNicknameLikelihood,
                                  String encoding)
Method Detail

getStringForAnnotation

public String getStringForAnnotation(Annotation a,
                                     Document d)
                              throws ExecutionException
Description copied from interface: AnnotationOrthography
Returns normalized content of an annotation - removes extra white spaces.

Specified by:
getStringForAnnotation in interface AnnotationOrthography
Returns:
Throws:
ExecutionException

fuzzyMatch

public boolean fuzzyMatch(String s1,
                          String s2)
Specified by:
fuzzyMatch in interface AnnotationOrthography

allNonStopTokensInOtherAnnot

public boolean allNonStopTokensInOtherAnnot(ArrayList<Annotation> firstName,
                                            ArrayList<Annotation> secondName,
                                            String TOKEN_STRING_FEATURE_NAME,
                                            boolean caseSensitive)
Specified by:
allNonStopTokensInOtherAnnot in interface AnnotationOrthography
Returns:
true if all of the tokens in firstName are either found in second name or are stop words

stripPersonTitle

public String stripPersonTitle(String annotString,
                               Annotation annot,
                               Document doc,
                               Map<Integer,List<Annotation>> tokensMap,
                               HashMap normalizedTokensMap,
                               AnnotationSet nameAllAnnots)
                        throws ExecutionException
Return a person name without a title. Also remove title from global variable tokensMap

Specified by:
stripPersonTitle in interface AnnotationOrthography
Throws:
ExecutionException

matchedAlready

public boolean matchedAlready(Annotation annot1,
                              Annotation annot2,
                              List matchesDocFeature,
                              AnnotationSet nameAllAnnots)
Specified by:
matchedAlready in interface AnnotationOrthography

updateMatches

public Annotation updateMatches(Annotation newAnnot,
                                String annotString,
                                HashMap processedAnnots,
                                AnnotationSet nameAllAnnots,
                                List matchesDocFeature)
Specified by:
updateMatches in interface AnnotationOrthography

updateMatches

public void updateMatches(Annotation newAnnot,
                          Annotation prevAnnot,
                          List matchesDocFeature,
                          AnnotationSet nameAllAnnots)
Specified by:
updateMatches in interface AnnotationOrthography

buildTables

public HashSet buildTables(AnnotationSet nameAllAnnots)
Tables for namematch info (used by the namematch rules)

Specified by:
buildTables in interface AnnotationOrthography
Returns:

isUnknownGender

public boolean isUnknownGender(String gender)
Specified by:
isUnknownGender in interface AnnotationOrthography

initNicknames

protected Map<String,HashSet<String>> initNicknames(String nicknameFileEncoding,
                                                    URL fileURL)
                                             throws IOException
Throws:
IOException