gate.creole.orthomatcher
Class BasicAnnotationOrthography
java.lang.Object
gate.creole.orthomatcher.BasicAnnotationOrthography
- All Implemented Interfaces:
- AnnotationOrthography
public class BasicAnnotationOrthography
- extends Object
- implements AnnotationOrthography
|
Field Summary |
protected static org.apache.log4j.Logger |
log
|
|
Method Summary |
boolean |
allNonStopTokensInOtherAnnot(ArrayList<Annotation> firstName,
ArrayList<Annotation> secondName,
String TOKEN_STRING_FEATURE_NAME,
boolean caseSensitive)
|
HashSet |
buildTables(AnnotationSet nameAllAnnots)
Tables for namematch info
(used by the namematch rules) |
boolean |
fuzzyMatch(String s1,
String s2)
|
String |
getStringForAnnotation(Annotation a,
Document d)
Returns normalized content of an annotation - removes extra white spaces. |
protected Map<String,HashSet<String>> |
initNicknames(String nicknameFileEncoding,
URL fileURL)
|
boolean |
isUnknownGender(String gender)
|
boolean |
matchedAlready(Annotation annot1,
Annotation annot2,
List matchesDocFeature,
AnnotationSet nameAllAnnots)
|
String |
stripPersonTitle(String annotString,
Annotation annot,
Document doc,
Map<Integer,List<Annotation>> tokensMap,
HashMap normalizedTokensMap,
AnnotationSet nameAllAnnots)
Return a person name without a title. |
void |
updateMatches(Annotation newAnnot,
Annotation prevAnnot,
List matchesDocFeature,
AnnotationSet nameAllAnnots)
|
Annotation |
updateMatches(Annotation newAnnot,
String annotString,
HashMap processedAnnots,
AnnotationSet nameAllAnnots,
List matchesDocFeature)
|
| Methods inherited from class java.lang.Object |
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait |
log
protected static final org.apache.log4j.Logger log
BasicAnnotationOrthography
public BasicAnnotationOrthography(String personType,
boolean extLists,
String unknownType,
URL nicknameFile,
Double minimumNicknameLikelihood,
String encoding)
getStringForAnnotation
public String getStringForAnnotation(Annotation a,
Document d)
throws ExecutionException
- Description copied from interface:
AnnotationOrthography
- Returns normalized content of an annotation - removes extra white spaces.
- Specified by:
getStringForAnnotation in interface AnnotationOrthography
- Returns:
-
- Throws:
ExecutionException
fuzzyMatch
public boolean fuzzyMatch(String s1,
String s2)
- Specified by:
fuzzyMatch in interface AnnotationOrthography
allNonStopTokensInOtherAnnot
public boolean allNonStopTokensInOtherAnnot(ArrayList<Annotation> firstName,
ArrayList<Annotation> secondName,
String TOKEN_STRING_FEATURE_NAME,
boolean caseSensitive)
- Specified by:
allNonStopTokensInOtherAnnot in interface AnnotationOrthography
- Returns:
- true if all of the tokens in firstName are either found in second name or are stop words
stripPersonTitle
public String stripPersonTitle(String annotString,
Annotation annot,
Document doc,
Map<Integer,List<Annotation>> tokensMap,
HashMap normalizedTokensMap,
AnnotationSet nameAllAnnots)
throws ExecutionException
- Return a person name without a title. Also remove title from global variable
tokensMap
- Specified by:
stripPersonTitle in interface AnnotationOrthography
- Throws:
ExecutionException
matchedAlready
public boolean matchedAlready(Annotation annot1,
Annotation annot2,
List matchesDocFeature,
AnnotationSet nameAllAnnots)
- Specified by:
matchedAlready in interface AnnotationOrthography
updateMatches
public Annotation updateMatches(Annotation newAnnot,
String annotString,
HashMap processedAnnots,
AnnotationSet nameAllAnnots,
List matchesDocFeature)
- Specified by:
updateMatches in interface AnnotationOrthography
updateMatches
public void updateMatches(Annotation newAnnot,
Annotation prevAnnot,
List matchesDocFeature,
AnnotationSet nameAllAnnots)
- Specified by:
updateMatches in interface AnnotationOrthography
buildTables
public HashSet buildTables(AnnotationSet nameAllAnnots)
- Tables for namematch info
(used by the namematch rules)
- Specified by:
buildTables in interface AnnotationOrthography
- Returns:
isUnknownGender
public boolean isUnknownGender(String gender)
- Specified by:
isUnknownGender in interface AnnotationOrthography
initNicknames
protected Map<String,HashSet<String>> initNicknames(String nicknameFileEncoding,
URL fileURL)
throws IOException
- Throws:
IOException