001 package gate.creole.annic.apache.lucene.search;
002
003 /**
004 * Copyright 2004 The Apache Software Foundation
005 *
006 * Licensed under the Apache License, Version 2.0 (the "License");
007 * you may not use this file except in compliance with the License.
008 * You may obtain a copy of the License at
009 *
010 * http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018
019 import java.io.IOException;
020 import java.util.Vector;
021
022 import gate.creole.annic.apache.lucene.document.Document;
023
024 /** A ranked list of documents, used to hold search results. */
025 public final class Hits {
026 private Query query;
027 private Searcher searcher;
028 private Filter filter = null;
029 private Sort sort = null;
030
031 private int length; // the total number of hits
032 private Vector hitDocs = new Vector(); // cache of hits retrieved
033
034 private HitDoc first; // head of LRU cache
035 private HitDoc last; // tail of LRU cache
036 private int numDocs = 0; // number cached
037 private int maxDocs = 200; // max to cache
038
039 Hits(Searcher s, Query q, Filter f) throws IOException {
040 query = q;
041 searcher = s;
042 filter = f;
043 getMoreDocs(50); // retrieve 100 initially
044 }
045
046 Hits(Searcher s, Query q, Filter f, Sort o) throws IOException {
047 query = q;
048 searcher = s;
049 filter = f;
050 sort = o;
051 getMoreDocs(50); // retrieve 100 initially
052 }
053
054 /**
055 * Tries to add new documents to hitDocs.
056 * Ensures that the hit numbered <code>min</code> has been retrieved.
057 */
058 private final void getMoreDocs(int min) throws IOException {
059 if (hitDocs.size() > min) {
060 min = hitDocs.size();
061 }
062
063 int n = min * 2; // double # retrieved
064 TopDocs topDocs = (sort == null) ? searcher.search(query, filter, n) : searcher.search(query, filter, n, sort);
065 length = topDocs.totalHits;
066 ScoreDoc[] scoreDocs = topDocs.scoreDocs;
067
068 float scoreNorm = 1.0f;
069 if (length > 0 && scoreDocs[0].score > 1.0f) {
070 scoreNorm = 1.0f / scoreDocs[0].score;
071 }
072
073 int end = scoreDocs.length < length ? scoreDocs.length : length;
074 for (int i = hitDocs.size(); i < end; i++) {
075 hitDocs.addElement(new HitDoc(scoreDocs[i].score * scoreNorm,
076 scoreDocs[i].doc));
077 }
078 }
079
080 /** Returns the total number of hits available in this set. */
081 public final int length() {
082 return length;
083 }
084
085 /** Returns the stored fields of the n<sup>th</sup> document in this set.
086 <p>Documents are cached, so that repeated requests for the same element may
087 return the same Document object. */
088 public final Document doc(int n) throws IOException {
089 HitDoc hitDoc = hitDoc(n);
090
091 // Update LRU cache of documents
092 remove(hitDoc); // remove from list, if there
093 addToFront(hitDoc); // add to front of list
094 if (numDocs > maxDocs) { // if cache is full
095 HitDoc oldLast = last;
096 remove(last); // flush last
097 oldLast.doc = null; // let doc get gc'd
098 }
099
100 if (hitDoc.doc == null) {
101 hitDoc.doc = searcher.doc(hitDoc.id); // cache miss: read document
102 }
103
104 return hitDoc.doc;
105 }
106
107 /** Returns the score for the nth document in this set. */
108 public final float score(int n) throws IOException {
109 return hitDoc(n).score;
110 }
111
112 /** Returns the id for the nth document in this set. */
113 public final int id(int n) throws IOException {
114 return hitDoc(n).id;
115 }
116
117
118 private final HitDoc hitDoc(int n) throws IOException {
119 if (n >= length) {
120 throw new IndexOutOfBoundsException("Not a valid hit number: " + n);
121 }
122
123 if (n >= hitDocs.size()) {
124 getMoreDocs(n);
125 }
126
127 return (HitDoc) hitDocs.elementAt(n);
128 }
129
130 private final void addToFront(HitDoc hitDoc) { // insert at front of cache
131 if (first == null) {
132 last = hitDoc;
133 } else {
134 first.prev = hitDoc;
135 }
136
137 hitDoc.next = first;
138 first = hitDoc;
139 hitDoc.prev = null;
140
141 numDocs++;
142 }
143
144 private final void remove(HitDoc hitDoc) { // remove from cache
145 if (hitDoc.doc == null) { // it's not in the list
146 return; // abort
147 }
148
149 if (hitDoc.next == null) {
150 last = hitDoc.prev;
151 } else {
152 hitDoc.next.prev = hitDoc.prev;
153 }
154
155 if (hitDoc.prev == null) {
156 first = hitDoc.next;
157 } else {
158 hitDoc.prev.next = hitDoc.next;
159 }
160
161 numDocs--;
162 }
163 }
164
165 final class HitDoc {
166 float score;
167 int id;
168 Document doc = null;
169
170 HitDoc next; // in doubly-linked cache
171 HitDoc prev; // in doubly-linked cache
172
173 HitDoc(float s, int i) {
174 score = s;
175 id = i;
176 }
177 }
|