001 package gate.creole.annic.apache.lucene.search;
002
003 /**
004 * Copyright 2004 The Apache Software Foundation
005 *
006 * Licensed under the Apache License, Version 2.0 (the "License");
007 * you may not use this file except in compliance with the License.
008 * You may obtain a copy of the License at
009 *
010 * http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018
019 import java.io.IOException;
020 import java.util.Vector;
021
022 import gate.creole.annic.apache.lucene.index.Term;
023 import gate.creole.annic.apache.lucene.index.TermPositions;
024 import gate.creole.annic.apache.lucene.index.IndexReader;
025
026 /** A Query that matches documents containing a particular sequence of terms.
027 This may be combined with other terms with a {@link BooleanQuery}.
028 */
029 public class PhraseQuery extends Query {
030 private String field;
031 private Vector terms = new Vector();
032 /* Niraj */
033 private Vector positions = new Vector();
034 /* End */
035 private int slop = 0;
036
037 /** Constructs an empty phrase query. */
038 public PhraseQuery() {}
039
040 /** Sets the number of other words permitted between words in query phrase.
041 If zero, then this is an exact phrase search. For larger values this works
042 like a <code>WITHIN</code> or <code>NEAR</code> operator.
043
044 <p>The slop is in fact an edit-distance, where the units correspond to
045 moves of terms in the query phrase out of position. For example, to switch
046 the order of two words requires two moves (the first move places the words
047 atop one another), so to permit re-orderings of phrases, the slop must be
048 at least two.
049
050 <p>More exact matches are scored higher than sloppier matches, thus search
051 results are sorted by exactness.
052
053 <p>The slop is zero by default, requiring exact matches.*/
054 public void setSlop(int s) { slop = s; }
055 /** Returns the slop. See setSlop(). */
056 public int getSlop() { return slop; }
057
058 int totalTerms = 0;
059 /** Niraj */
060 /** Adds a term to the end of the query phrase. */
061 public void add(Term term, Integer position, boolean considerAsATerm) {
062 if (terms.size() == 0)
063 field = term.field();
064 else if (term.field() != field)
065 throw new IllegalArgumentException
066 ("All phrase terms must be in the same field: " + term);
067
068 terms.addElement(term);
069 positions.addElement(position);
070
071 if(considerAsATerm) {
072 totalTerms++;
073 }
074 }
075
076 public void setTotalTerms(int totalTerms) {
077 this.totalTerms = totalTerms;
078 }
079
080 /* End */
081
082 public void add(Term term) {
083 if (terms.size() == 0)
084 field = term.field();
085 else if (term.field() != field)
086 throw new IllegalArgumentException
087 ("All phrase terms must be in the same field: " + term);
088
089 terms.addElement(term);
090 }
091
092 /** Returns the set of terms in this phrase. */
093 public Term[] getTerms() {
094 return (Term[])terms.toArray(new Term[0]);
095 }
096
097 private class PhraseWeight implements Weight {
098 private Searcher searcher;
099 private float value;
100 private float idf;
101 private float queryNorm;
102 private float queryWeight;
103
104 public PhraseWeight(Searcher searcher) {
105 this.searcher = searcher;
106 }
107
108 public String toString() { return "weight(" + PhraseQuery.this + ")"; }
109
110 public Query getQuery() { return PhraseQuery.this; }
111 public float getValue() { return value; }
112
113 public float sumOfSquaredWeights() throws IOException {
114 idf = getSimilarity(searcher).idf(terms, searcher);
115 queryWeight = idf * getBoost(); // compute query weight
116 return queryWeight * queryWeight; // square it
117 }
118
119 public void normalize(float queryNorm) {
120 this.queryNorm = queryNorm;
121 queryWeight *= queryNorm; // normalize query weight
122 value = queryWeight * idf; // idf for document
123 }
124
125 public Scorer scorer(IndexReader reader, Searcher searcher) throws IOException {
126 if (terms.size() == 0) // optimize zero-term case
127 return null;
128
129 TermPositions[] tps = new TermPositions[terms.size()];
130 for (int i = 0; i < terms.size(); i++) {
131 TermPositions p = reader.termPositions((Term)terms.elementAt(i));
132 if (p == null)
133 return null;
134 tps[i] = p;
135 }
136
137 if (slop == 0) { // optimize exact case
138 return new ExactPhraseScorer(this, tps, /*Niraj*/positions, totalTerms, getSimilarity(searcher),
139 reader.norms(field), searcher);
140
141 }
142 else
143 return
144 new SloppyPhraseScorer(this, tps, getSimilarity(searcher), slop,
145 reader.norms(field));
146
147 }
148
149
150 public Explanation explain(IndexReader reader, int doc)
151 throws IOException {
152
153 Explanation result = new Explanation();
154 result.setDescription("weight("+getQuery()+" in "+doc+"), product of:");
155
156 StringBuffer docFreqs = new StringBuffer();
157 StringBuffer query = new StringBuffer();
158 query.append('\"');
159 for (int i = 0; i < terms.size(); i++) {
160 if (i != 0) {
161 docFreqs.append(" ");
162 query.append(" ");
163 }
164
165 Term term = (Term)terms.elementAt(i);
166
167 docFreqs.append(term.text());
168 docFreqs.append("=");
169 docFreqs.append(searcher.docFreq(term));
170
171 query.append(term.text());
172 }
173 query.append('\"');
174
175 Explanation idfExpl =
176 new Explanation(idf, "idf(" + field + ": " + docFreqs + ")");
177
178 // explain query weight
179 Explanation queryExpl = new Explanation();
180 queryExpl.setDescription("queryWeight(" + getQuery() + "), product of:");
181
182 Explanation boostExpl = new Explanation(getBoost(), "boost");
183 if (getBoost() != 1.0f)
184 queryExpl.addDetail(boostExpl);
185 queryExpl.addDetail(idfExpl);
186
187 Explanation queryNormExpl = new Explanation(queryNorm,"queryNorm");
188 queryExpl.addDetail(queryNormExpl);
189
190 queryExpl.setValue(boostExpl.getValue() *
191 idfExpl.getValue() *
192 queryNormExpl.getValue());
193
194 result.addDetail(queryExpl);
195
196 // explain field weight
197 Explanation fieldExpl = new Explanation();
198 fieldExpl.setDescription("fieldWeight("+field+":"+query+" in "+doc+
199 "), product of:");
200
201 Explanation tfExpl = scorer(reader, this.searcher).explain(doc);
202 fieldExpl.addDetail(tfExpl);
203 fieldExpl.addDetail(idfExpl);
204
205 Explanation fieldNormExpl = new Explanation();
206 byte[] fieldNorms = reader.norms(field);
207 float fieldNorm =
208 fieldNorms!=null ? Similarity.decodeNorm(fieldNorms[doc]) : 0.0f;
209 fieldNormExpl.setValue(fieldNorm);
210 fieldNormExpl.setDescription("fieldNorm(field="+field+", doc="+doc+")");
211 fieldExpl.addDetail(fieldNormExpl);
212
213 fieldExpl.setValue(tfExpl.getValue() *
214 idfExpl.getValue() *
215 fieldNormExpl.getValue());
216
217 result.addDetail(fieldExpl);
218
219 // combine them
220 result.setValue(queryExpl.getValue() * fieldExpl.getValue());
221
222 if (queryExpl.getValue() == 1.0f)
223 return fieldExpl;
224
225 return result;
226 }
227 }
228
229 protected Weight createWeight(Searcher searcher) {
230 if (terms.size() == 1) { // optimize one-term case
231 Term term = (Term)terms.elementAt(0);
232 Query termQuery = new TermQuery(term);
233 termQuery.setBoost(getBoost());
234 return termQuery.createWeight(searcher);
235 }
236 return new PhraseWeight(searcher);
237 }
238
239
240 /** Prints a user-readable version of this query. */
241 public String toString(String f) {
242 StringBuffer buffer = new StringBuffer();
243 if (!field.equals(f)) {
244 buffer.append(field);
245 buffer.append(":");
246 }
247
248 buffer.append("\"");
249 for (int i = 0; i < terms.size(); i++) {
250 buffer.append(((Term)terms.elementAt(i)).text());
251 if (i != terms.size()-1)
252 buffer.append(" ");
253 }
254 buffer.append("\"");
255
256 if (slop != 0) {
257 buffer.append("~");
258 buffer.append(slop);
259 }
260
261 if (getBoost() != 1.0f) {
262 buffer.append("^");
263 buffer.append(Float.toString(getBoost()));
264 }
265
266 return buffer.toString();
267 }
268
269 /** Returns true iff <code>o</code> is equal to this. */
270 public boolean equals(Object o) {
271 if (!(o instanceof PhraseQuery))
272 return false;
273 PhraseQuery other = (PhraseQuery)o;
274 return (this.getBoost() == other.getBoost())
275 && (this.slop == other.slop)
276 && this.terms.equals(other.terms);
277 }
278
279 /** Returns a hash code value for this object.*/
280 public int hashCode() {
281 return Float.floatToIntBits(getBoost())
282 ^ Float.floatToIntBits(slop)
283 ^ terms.hashCode();
284 }
285
286 }
|