001 package gate.creole.annic.apache.lucene.search;
002
003 /**
004 * Copyright 2004 The Apache Software Foundation
005 *
006 * Licensed under the Apache License, Version 2.0 (the "License");
007 * you may not use this file except in compliance with the License.
008 * You may obtain a copy of the License at
009 *
010 * http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018
019 import java.io.IOException;
020 import java.util.Vector;
021 import gate.creole.annic.apache.lucene.index.IndexReader;
022
023 /** A Query that matches documents matching boolean combinations of other
024 queries, typically {@link TermQuery}s or {@link PhraseQuery}s.
025 */
026 public class BooleanQuery extends Query {
027
028 /**
029 * Default value is 1024. Use <code>gate.creole.annic.apache.lucene.maxClauseCount</code>
030 * system property to override.
031 */
032 public static int maxClauseCount =
033 Integer.parseInt(System.getProperty("gate.creole.annic.apache.lucene.maxClauseCount",
034 "1024"));
035
036 /** Thrown when an attempt is made to add more than {@link
037 * #getMaxClauseCount()} clauses. */
038 public static class TooManyClauses extends RuntimeException {}
039
040 /** Return the maximum number of clauses permitted, 1024 by default.
041 * Attempts to add more than the permitted number of clauses cause {@link
042 * TooManyClauses} to be thrown.*/
043 public static int getMaxClauseCount() { return maxClauseCount; }
044
045 /** Set the maximum number of clauses permitted. */
046 public static void setMaxClauseCount(int maxClauseCount) {
047 BooleanQuery.maxClauseCount = maxClauseCount;
048 }
049
050 private Vector clauses = new Vector();
051
052 /** Constructs an empty boolean query. */
053 public BooleanQuery() {}
054
055 /** Adds a clause to a boolean query. Clauses may be:
056 * <ul>
057 * <li><code>required</code> which means that documents which <i>do not</i>
058 * match this sub-query will <i>not</i> match the boolean query;
059 * <li><code>prohibited</code> which means that documents which <i>do</i>
060 * match this sub-query will <i>not</i> match the boolean query; or
061 * <li>neither, in which case matched documents are neither prohibited from
062 * nor required to match the sub-query. However, a document must match at
063 * least 1 sub-query to match the boolean query.
064 * </ul>
065 * It is an error to specify a clause as both <code>required</code> and
066 * <code>prohibited</code>.
067 *
068 * @see #getMaxClauseCount()
069 */
070 public void add(Query query, boolean required, boolean prohibited) {
071 add(new BooleanClause(query, required, prohibited));
072 }
073
074 /** Adds a clause to a boolean query.
075 * @see #getMaxClauseCount()
076 */
077 public void add(BooleanClause clause) {
078 if (clauses.size() >= maxClauseCount)
079 throw new TooManyClauses();
080
081 clauses.addElement(clause);
082 }
083
084 /** Returns the set of clauses in this query. */
085 public BooleanClause[] getClauses() {
086 return (BooleanClause[])clauses.toArray(new BooleanClause[0]);
087 }
088
089 private class BooleanWeight implements Weight {
090 private Searcher searcher;
091 private Vector weights = new Vector();
092
093 public BooleanWeight(Searcher searcher) {
094 this.searcher = searcher;
095 for (int i = 0 ; i < clauses.size(); i++) {
096 BooleanClause c = (BooleanClause)clauses.elementAt(i);
097 weights.add(c.query.createWeight(searcher));
098 }
099 }
100
101 public Query getQuery() { return BooleanQuery.this; }
102 public float getValue() { return getBoost(); }
103
104 public float sumOfSquaredWeights() throws IOException {
105 float sum = 0.0f;
106 for (int i = 0 ; i < weights.size(); i++) {
107 BooleanClause c = (BooleanClause)clauses.elementAt(i);
108 Weight w = (Weight)weights.elementAt(i);
109 if (!c.prohibited)
110 sum += w.sumOfSquaredWeights(); // sum sub weights
111 }
112
113 sum *= getBoost() * getBoost(); // boost each sub-weight
114
115 return sum ;
116 }
117
118
119 public void normalize(float norm) {
120 norm *= getBoost(); // incorporate boost
121 for (int i = 0 ; i < weights.size(); i++) {
122 BooleanClause c = (BooleanClause)clauses.elementAt(i);
123 Weight w = (Weight)weights.elementAt(i);
124 if (!c.prohibited)
125 w.normalize(norm);
126 }
127 }
128
129 public Scorer scorer(IndexReader reader, Searcher searcher) throws IOException {
130 this.searcher = searcher;
131 // First see if the (faster) ConjunctionScorer will work. This can be
132 // used when all clauses are required. Also, at this point a
133 // BooleanScorer cannot be embedded in a ConjunctionScorer, as the hits
134 // from a BooleanScorer are not always sorted by document number (sigh)
135 // and hence BooleanScorer cannot implement skipTo() correctly, which is
136 // required by ConjunctionScorer.
137 boolean allRequired = true;
138 boolean noneBoolean = true;
139 for (int i = 0 ; i < weights.size(); i++) {
140 BooleanClause c = (BooleanClause)clauses.elementAt(i);
141 if (!c.required)
142 allRequired = false;
143 if (c.query instanceof BooleanQuery)
144 noneBoolean = false;
145 }
146
147 if (allRequired && noneBoolean) { // ConjunctionScorer is okay
148 ConjunctionScorer result =
149 new ConjunctionScorer(getSimilarity(searcher));
150 for (int i = 0 ; i < weights.size(); i++) {
151 Weight w = (Weight)weights.elementAt(i);
152 Scorer subScorer = w.scorer(reader, searcher);
153 if (subScorer == null)
154 return null;
155 result.add(subScorer);
156 }
157 return result;
158 }
159
160 // Use good-old BooleanScorer instead.
161 BooleanScorer result = new BooleanScorer(getSimilarity(searcher));
162
163 for (int i = 0 ; i < weights.size(); i++) {
164 BooleanClause c = (BooleanClause)clauses.elementAt(i);
165 Weight w = (Weight)weights.elementAt(i);
166 Scorer subScorer = w.scorer(reader, searcher);
167 if (subScorer != null)
168 result.add(subScorer, c.required, c.prohibited);
169 else if (c.required)
170 return null;
171 }
172
173 return result;
174 }
175
176 public Explanation explain(IndexReader reader, int doc)
177 throws IOException {
178 Explanation sumExpl = new Explanation();
179 sumExpl.setDescription("sum of:");
180 int coord = 0;
181 int maxCoord = 0;
182 float sum = 0.0f;
183 for (int i = 0 ; i < weights.size(); i++) {
184 BooleanClause c = (BooleanClause)clauses.elementAt(i);
185 Weight w = (Weight)weights.elementAt(i);
186 Explanation e = w.explain(reader, doc);
187 if (!c.prohibited) maxCoord++;
188 if (e.getValue() > 0) {
189 if (!c.prohibited) {
190 sumExpl.addDetail(e);
191 sum += e.getValue();
192 coord++;
193 } else {
194 return new Explanation(0.0f, "match prohibited");
195 }
196 } else if (c.required) {
197 return new Explanation(0.0f, "match required");
198 }
199 }
200 sumExpl.setValue(sum);
201
202 if (coord == 1) // only one clause matched
203 sumExpl = sumExpl.getDetails()[0]; // eliminate wrapper
204
205 float coordFactor = getSimilarity(searcher).coord(coord, maxCoord);
206 if (coordFactor == 1.0f) // coord is no-op
207 return sumExpl; // eliminate wrapper
208 else {
209 Explanation result = new Explanation();
210 result.setDescription("product of:");
211 result.addDetail(sumExpl);
212 result.addDetail(new Explanation(coordFactor,
213 "coord("+coord+"/"+maxCoord+")"));
214 result.setValue(sum*coordFactor);
215 return result;
216 }
217 }
218 }
219
220 protected Weight createWeight(Searcher searcher) {
221 return new BooleanWeight(searcher);
222 }
223
224 public Query rewrite(IndexReader reader) throws IOException {
225 if (clauses.size() == 1) { // optimize 1-clause queries
226 BooleanClause c = (BooleanClause)clauses.elementAt(0);
227 if (!c.prohibited) { // just return clause
228
229 Query query = c.query.rewrite(reader); // rewrite first
230
231 if (getBoost() != 1.0f) { // incorporate boost
232 if (query == c.query) // if rewrite was no-op
233 query = (Query)query.clone(); // then clone before boost
234 query.setBoost(getBoost() * query.getBoost());
235 }
236
237 return query;
238 }
239 }
240
241 BooleanQuery clone = null; // recursively rewrite
242 for (int i = 0 ; i < clauses.size(); i++) {
243 BooleanClause c = (BooleanClause)clauses.elementAt(i);
244 Query query = c.query.rewrite(reader);
245 if (query != c.query) { // clause rewrote: must clone
246 if (clone == null)
247 clone = (BooleanQuery)this.clone();
248 clone.clauses.setElementAt
249 (new BooleanClause(query, c.required, c.prohibited), i);
250 }
251 }
252 if (clone != null) {
253 return clone; // some clauses rewrote
254 } else
255 return this; // no clauses rewrote
256 }
257
258
259 public Object clone() {
260 BooleanQuery clone = (BooleanQuery)super.clone();
261 clone.clauses = (Vector)this.clauses.clone();
262 return clone;
263 }
264
265 /** Prints a user-readable version of this query. */
266 public String toString(String field) {
267 StringBuffer buffer = new StringBuffer();
268 if (getBoost() != 1.0) {
269 buffer.append("(");
270 }
271
272 for (int i = 0 ; i < clauses.size(); i++) {
273 BooleanClause c = (BooleanClause)clauses.elementAt(i);
274 if (c.prohibited)
275 buffer.append("-");
276 else if (c.required)
277 buffer.append("+");
278
279 Query subQuery = c.query;
280 if (subQuery instanceof BooleanQuery) { // wrap sub-bools in parens
281 buffer.append("(");
282 buffer.append(c.query.toString(field));
283 buffer.append(")");
284 } else
285 buffer.append(c.query.toString(field));
286
287 if (i != clauses.size()-1)
288 buffer.append(" ");
289 }
290
291 if (getBoost() != 1.0) {
292 buffer.append(")^");
293 buffer.append(getBoost());
294 }
295
296 return buffer.toString();
297 }
298
299 /** Returns true iff <code>o</code> is equal to this. */
300 public boolean equals(Object o) {
301 if (!(o instanceof BooleanQuery))
302 return false;
303 BooleanQuery other = (BooleanQuery)o;
304 return (this.getBoost() == other.getBoost())
305 && this.clauses.equals(other.clauses);
306 }
307
308 /** Returns a hash code value for this object.*/
309 public int hashCode() {
310 return Float.floatToIntBits(getBoost()) ^ clauses.hashCode();
311 }
312
313 }
|