001 package gate.creole.annic.apache.lucene.search;
002
003 /**
004 * Copyright 2004 The Apache Software Foundation
005 *
006 * Licensed under the Apache License, Version 2.0 (the "License");
007 * you may not use this file except in compliance with the License.
008 * You may obtain a copy of the License at
009 *
010 * http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018
019 import java.io.IOException;
020 import java.util.*;
021
022 import gate.creole.annic.apache.lucene.store.Directory;
023 import gate.creole.annic.apache.lucene.document.Document;
024 import gate.creole.annic.apache.lucene.index.IndexReader;
025 import gate.creole.annic.apache.lucene.index.Term;
026
027 /**
028 * Implements search over a single IndexReader.
029 *
030 * <p>
031 * Applications usually need only call the inherited {@link #search(Query)} or
032 * {@link #search(Query,Filter)} methods.
033 */
034 public class IndexSearcher extends Searcher {
035 IndexReader reader;
036
037 private boolean closeReader;
038
039 /** Creates a searcher searching the index in the named directory. */
040 public IndexSearcher(String path) throws IOException {
041 this(IndexReader.open(path), true);
042 }
043
044 /** Creates a searcher searching the index in the provided directory. */
045 public IndexSearcher(Directory directory) throws IOException {
046 this(IndexReader.open(directory), true);
047 }
048
049 /** Creates a searcher searching the provided index. */
050 public IndexSearcher(IndexReader r) {
051 this(r, false);
052 }
053
054 private IndexSearcher(IndexReader r, boolean closeReader) {
055 reader = r;
056 this.closeReader = closeReader;
057 }
058
059 /**
060 * Note that the underlying IndexReader is not closed, if IndexSearcher was
061 * constructed with IndexSearcher(IndexReader r). If the IndexReader was
062 * supplied implicitly by specifying a directory, then the IndexReader gets
063 * closed.
064 */
065 public void close() throws IOException {
066 if (closeReader)
067 reader.close();
068 }
069
070 // inherit javadoc
071 public int docFreq(Term term) throws IOException {
072 return reader.docFreq(term);
073 }
074
075 // inherit javadoc
076 public Document doc(int i) throws IOException {
077 return reader.document(i);
078 }
079
080 // inherit javadoc
081 public int maxDoc() throws IOException {
082 return reader.maxDoc();
083 }
084
085 // inherit javadoc
086 public TopDocs search(Query query, Filter filter, final int nDocs)
087 throws IOException {
088 initializeTermPositions();
089 Scorer scorer = query.weight(this)
090 .scorer(reader,/* Niraj */this /* End */);
091 if (scorer == null)
092 return new TopDocs(0, new ScoreDoc[0]);
093 final BitSet bits = filter != null ? filter.bits(reader) : null;
094 final HitQueue hq = new HitQueue(nDocs);
095 final int[] totalHits = new int[1];
096 scorer.score(new HitCollector() {
097 public final void collect(int doc, float score) {
098 if (score > 0.0f && // ignore zeroed buckets
099 (bits == null || bits.get(doc))) { // skip docs not in
100 // bits
101 totalHits[0]++;
102 hq.insert(new ScoreDoc(doc, score));
103 }
104 }
105 },/* Niraj */this /* End */);
106
107 ScoreDoc[] scoreDocs = new ScoreDoc[hq.size()];
108 for (int i = hq.size() - 1; i >= 0; i--)
109 // put docs in array
110 scoreDocs[i] = (ScoreDoc) hq.pop();
111
112 return new TopDocs(totalHits[0], scoreDocs);
113 }
114
115 // inherit javadoc
116 public TopFieldDocs search(Query query, Filter filter, final int nDocs,
117 Sort sort) throws IOException {
118 initializeTermPositions();
119 Scorer scorer = query.weight(this).scorer(reader, this);
120 if (scorer == null)
121 return new TopFieldDocs(0, new ScoreDoc[0], sort.fields);
122
123 final BitSet bits = filter != null ? filter.bits(reader) : null;
124 final FieldSortedHitQueue hq = new FieldSortedHitQueue(reader,
125 sort.fields, nDocs);
126 final int[] totalHits = new int[1];
127 scorer.score(new HitCollector() {
128 public final void collect(int doc, float score) {
129 if (score > 0.0f && // ignore zeroed buckets
130 (bits == null || bits.get(doc))) { // skip docs not in
131 // bits
132 totalHits[0]++;
133 hq.insert(new FieldDoc(doc, score));
134 }
135 }
136 }, this);
137
138 ScoreDoc[] scoreDocs = new ScoreDoc[hq.size()];
139 for (int i = hq.size() - 1; i >= 0; i--)
140 // put docs in array
141 scoreDocs[i] = hq.fillFields((FieldDoc) hq.pop());
142
143 return new TopFieldDocs(totalHits[0], scoreDocs, hq.getFields());
144 }
145
146 // inherit javadoc
147 public void search(Query query, Filter filter, final HitCollector results)
148 throws IOException {
149 initializeTermPositions();
150 HitCollector collector = results;
151 if (filter != null) {
152 final BitSet bits = filter.bits(reader);
153 collector = new HitCollector() {
154 public final void collect(int doc, float score) {
155 if (bits.get(doc)) { // skip docs not in bits
156 results.collect(doc, score);
157 }
158 }
159 };
160 }
161
162 Scorer scorer = query.weight(this).scorer(reader, this);
163 if (scorer == null)
164 return;
165 scorer.score(collector, this);
166 }
167
168 public Query rewrite(Query original) throws IOException {
169 Query query = original;
170 for (Query rewrittenQuery = query.rewrite(reader); rewrittenQuery != query; rewrittenQuery = query
171 .rewrite(reader)) {
172 query = rewrittenQuery;
173 }
174 return query;
175 }
176
177 public Explanation explain(Query query, int doc) throws IOException {
178 return query.weight(this).explain(reader, doc);
179 }
180
181 /**
182 * Each pattern is a result of either simple or a boolean query. The
183 * type number indicates if the query used to retrieve that pattern
184 * was simple or boolean.
185 */
186 private ArrayList<Integer> queryType = new ArrayList<Integer>();
187
188 /**
189 * Each terms has a frequency.
190 */
191 private ArrayList<Integer> frequencies = new ArrayList<Integer>();
192
193 /**
194 * Each Integer value in this list is an index of first annotation of
195 * the pattern that matches with the user query.
196 */
197 private ArrayList firstTermPositions = new ArrayList();
198
199 /**
200 * document numbers
201 */
202 private ArrayList<Integer> documentNumbers = new ArrayList<Integer>();
203
204 /**
205 * Stores how long each pattern is (in terms of number of
206 * annotations).
207 */
208 private ArrayList<Integer> patternLengths = new ArrayList<Integer>();
209
210 /**
211 * Sets the firstTermPositions.
212 *
213 * @param qType
214 * @param doc
215 * @param positions
216 * @param patternLength
217 */
218 public void setFirstTermPositions(int qType, int doc, ArrayList positions,
219 int patternLength, int frequency) {
220 queryType.add(new Integer(qType));
221 firstTermPositions.add(positions);
222 documentNumbers.add(new Integer(doc));
223 patternLengths.add(new Integer(patternLength));
224 frequencies.add(new Integer(frequency));
225 }
226
227 /**
228 * Initializes all local variables
229 *
230 */
231 public void initializeTermPositions() {
232 queryType = new ArrayList<Integer>();
233 firstTermPositions = new ArrayList();
234 documentNumbers = new ArrayList<Integer>();
235 patternLengths = new ArrayList<Integer>();
236 frequencies = new ArrayList<Integer>();
237 }
238
239 /**
240 * Returns an array of arrayLists where the first list contains
241 * document numbers, second list contains first term positions, third
242 * list contains the pattern lengths and the fourth one contains the
243 * query type for each pattern.
244 *
245 * @return
246 */
247 public ArrayList[] getFirstTermPositions() {
248 return new ArrayList[] {documentNumbers, firstTermPositions,
249 patternLengths, queryType, frequencies};
250 }
251 }
|