001 /*
002 * AnnicIndexing.java
003 *
004 * Niraj Aswani, 19/March/07
005 *
006 * $Id: AnnicIndexing.html,v 1.0 2007/03/19 16:22:01 niraj Exp $
007 */
008 package gate.creole.annic.test;
009
010 import java.util.HashMap;
011 import java.util.List;
012 import java.util.ArrayList;
013 import java.util.Map;
014
015 import gate.creole.*;
016 import gate.creole.annic.Constants;
017 import gate.creole.annic.IndexException;
018 import gate.creole.annic.lucene.LuceneIndexer;
019
020 import java.io.IOException;
021 import java.net.URL;
022
023 /**
024 * The class is an example of how to index a corpus using the ANNIC
025 * functionalities. The class is used by the TestAnnic.java.
026 *
027 * @author niraj
028 *
029 */
030 public class AnnicIndexing {
031
032 /**
033 * serial version id
034 */
035 private static final long serialVersionUID = 3688785860577212210L;
036
037 /**
038 * Corpus to index.
039 */
040 private gate.Corpus corpus;
041
042 /**
043 * Features to exclude from index.
044 */
045 private ArrayList<String> featuresToExclude = new ArrayList<String>();
046
047 /**
048 * Where to store the index.
049 */
050 private java.net.URL indexOutputDirectoryLocation;
051
052 /**
053 * AnnotationSet to index.
054 */
055 private String annotationSetName = "";
056
057 /**
058 * Base Token Annotation Type e.g. Token
059 */
060 private String baseTokenAnnotationType = "";
061
062 /**
063 * Index Unit Annotation type e.g. Sentence
064 */
065 private String indexUnitAnnotationType = "";
066
067 /**
068 * Instance of a Lucene Indexer
069 */
070 private LuceneIndexer indexer = null;
071
072 /**
073 * Constructor
074 * @throws IOException
075 */
076 public AnnicIndexing() throws IOException {
077 corpus = null;
078 annotationSetName = "";
079 indexer = new LuceneIndexer((URL)null);
080
081 }
082
083 /**
084 * This method creates a lucene index.
085 */
086 public void execute() throws ExecutionException {
087 long start = System.currentTimeMillis();
088 try {
089 Map parameters = new HashMap();
090 ArrayList<String> toinclude = new ArrayList<String>();
091 toinclude.add(getAnnotationSetName());
092 parameters.put(Constants.ANNOTATION_SETS_NAMES_TO_INCLUDE, toinclude);
093 parameters.put(Constants.ANNOTATION_SETS_NAMES_TO_EXCLUDE, new ArrayList<String>());
094 parameters.put(Constants.BASE_TOKEN_ANNOTATION_TYPE, getBaseTokenAnnotationType());
095 parameters.put(Constants.FEATURES_TO_EXCLUDE, getFeaturesToExclude());
096 parameters.put(Constants.FEATURES_TO_INCLUDE, new ArrayList<String>());
097 parameters.put(Constants.INDEX_UNIT_ANNOTATION_TYPE, getIndexUnitAnnotationType());
098 parameters.put(Constants.INDEX_LOCATION_URL, getIndexOutputDirectoryLocation());
099 indexer.setCorpus(getCorpus());
100 indexer.createIndex(parameters);
101 }
102 catch(IndexException ie) {
103 throw new ExecutionException(ie);
104 }
105 }
106
107 /**
108 * Gets the location of index output directory
109 * @return
110 */
111 public java.net.URL getIndexOutputDirectoryLocation() {
112 return indexOutputDirectoryLocation;
113 }
114
115 /**
116 * Sets the location of index output directory
117 * @param dir
118 */
119 public void setIndexOutputDirectoryLocation(java.net.URL dir) {
120 indexOutputDirectoryLocation = dir;
121 }
122
123 /**
124 * Gets the annotation set name to be indexed
125 * @return
126 */
127 public String getAnnotationSetName() {
128 return annotationSetName;
129 }
130
131 /**
132 * Sets the annotation set name
133 * @param annotationSetName
134 */
135 public void setAnnotationSetName(String annotationSetName) {
136 if(annotationSetName != null && annotationSetName.trim().equals("")) {
137 annotationSetName = null;
138 }
139
140 this.annotationSetName = annotationSetName;
141 }
142
143 /**
144 * Gets the base token annotation type
145 * @return
146 */
147 public String getBaseTokenAnnotationType() {
148 return this.baseTokenAnnotationType;
149 }
150
151 /**
152 * Sets the base token annotation type
153 * @param baseTokenAnnotationType
154 */
155 public void setBaseTokenAnnotationType(String baseTokenAnnotationType) {
156 this.baseTokenAnnotationType = baseTokenAnnotationType;
157 }
158
159 /**
160 * Sets the corpus to index
161 * @param corpus
162 */
163 public void setCorpus(gate.Corpus corpus) {
164 this.corpus = corpus;
165 }
166
167 /**
168 * Gets the corpus to index
169 * @return
170 */
171 public gate.Corpus getCorpus() {
172 return this.corpus;
173 }
174
175 /**
176 * Gets the features of annotation to be excluded from being indexed
177 * @return
178 */
179 public List getFeaturesToExclude() {
180 return featuresToExclude;
181 }
182
183 /**
184 * Sets the features of annotations to be excluded from being indexed
185 * @param featuresToExclude
186 */
187 public void setFeaturesToExclude(ArrayList<String> featuresToExclude) {
188 this.featuresToExclude = featuresToExclude;
189 }
190
191 /**
192 * Gets the Index Unit Annotation type.
193 * @return
194 */
195 public String getIndexUnitAnnotationType() {
196 return indexUnitAnnotationType;
197 }
198
199 /**
200 * Sets the Index Unit annotation type.
201 * @param indexUnitAnnotationType
202 */
203 public void setIndexUnitAnnotationType(String indexUnitAnnotationType) {
204 this.indexUnitAnnotationType = indexUnitAnnotationType;
205 }
206 }
|