001 package gate.creole.annic.apache.lucene.search;
002
003 /**
004 * Copyright 2004 The Apache Software Foundation
005 *
006 * Licensed under the Apache License, Version 2.0 (the "License");
007 * you may not use this file except in compliance with the License.
008 * You may obtain a copy of the License at
009 *
010 * http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018
019 import java.io.Serializable;
020
021
022 /**
023 * Encapsulates sort criteria for returned hits.
024 *
025 * <p>The fields used to determine sort order must be carefully chosen.
026 * Documents must contain a single term in such a field,
027 * and the value of the term should indicate the document's relative position in
028 * a given sort order. The field must be indexed, but should not be tokenized,
029 * and does not need to be stored (unless you happen to want it back with the
030 * rest of your document data). In other words:
031 *
032 * <dl><dd><code>document.add (new Field ("byNumber", Integer.toString(x), false, true, false));</code>
033 * </dd></dl>
034 *
035 * <p><h3>Valid Types of Values</h3>
036 *
037 * <p>There are three possible kinds of term values which may be put into
038 * sorting fields: Integers, Floats, or Strings. Unless
039 * {@link SortField SortField} objects are specified, the type of value
040 * in the field is determined by parsing the first term in the field.
041 *
042 * <p>Integer term values should contain only digits and an optional
043 * preceeding negative sign. Values must be base 10 and in the range
044 * <code>Integer.MIN_VALUE</code> and <code>Integer.MAX_VALUE</code> inclusive.
045 * Documents which should appear first in the sort
046 * should have low value integers, later documents high values
047 * (i.e. the documents should be numbered <code>1..n</code> where
048 * <code>1</code> is the first and <code>n</code> the last).
049 *
050 * <p>Float term values should conform to values accepted by
051 * {@link Float Float.valueOf(String)} (except that <code>NaN</code>
052 * and <code>Infinity</code> are not supported).
053 * Documents which should appear first in the sort
054 * should have low values, later documents high values.
055 *
056 * <p>String term values can contain any valid String, but should
057 * not be tokenized. The values are sorted according to their
058 * {@link Comparable natural order}. Note that using this type
059 * of term value has higher memory requirements than the other
060 * two types.
061 *
062 * <p><h3>Object Reuse</h3>
063 *
064 * <p>One of these objects can be
065 * used multiple times and the sort order changed between usages.
066 *
067 * <p>This class is thread safe.
068 *
069 * <p><h3>Memory Usage</h3>
070 *
071 * <p>Sorting uses of caches of term values maintained by the
072 * internal HitQueue(s). The cache is static and contains an integer
073 * or float array of length <code>IndexReader.maxDoc()</code> for each field
074 * name for which a sort is performed. In other words, the size of the
075 * cache in bytes is:
076 *
077 * <p><code>4 * IndexReader.maxDoc() * (# of different fields actually used to sort)</code>
078 *
079 * <p>For String fields, the cache is larger: in addition to the
080 * above array, the value of every term in the field is kept in memory.
081 * If there are many unique terms in the field, this could
082 * be quite large.
083 *
084 * <p>Note that the size of the cache is not affected by how many
085 * fields are in the index and <i>might</i> be used to sort - only by
086 * the ones actually used to sort a result set.
087 *
088 * <p>The cache is cleared each time a new <code>IndexReader</code> is
089 * passed in, or if the value returned by <code>maxDoc()</code>
090 * changes for the current IndexReader. This class is not set up to
091 * be able to efficiently sort hits from more than one index
092 * simultaneously.
093 *
094 * <p>Created: Feb 12, 2004 10:53:57 AM
095 *
096 * @author Tim Jones (Nacimiento Software)
097 * @since lucene 1.4
098 * @version $Id: Sort.java 529 2004-10-05 11:55:26Z niraj $
099 */
100 public class Sort
101 implements Serializable {
102
103 /** Represents sorting by computed relevance. Using this sort criteria
104 * returns the same results as calling {@link Searcher#search(Query) Searcher#search()}
105 * without a sort criteria, only with slightly more overhead. */
106 public static final Sort RELEVANCE = new Sort();
107
108 /** Represents sorting by index order. */
109 public static final Sort INDEXORDER = new Sort (SortField.FIELD_DOC);
110
111 // internal representation of the sort criteria
112 SortField[] fields;
113
114
115 /** Sorts by computed relevance. This is the same sort criteria as
116 * calling {@link Searcher#search(Query) Searcher#search()} without a sort criteria, only with
117 * slightly more overhead. */
118 public Sort() {
119 this (new SortField[]{SortField.FIELD_SCORE, SortField.FIELD_DOC});
120 }
121
122
123 /** Sorts by the terms in <code>field</code> then by index order (document
124 * number). The type of value in <code>field</code> is determined
125 * automatically.
126 * @see SortField#AUTO
127 */
128 public Sort (String field) {
129 setSort (field, false);
130 }
131
132
133 /** Sorts possibly in reverse by the terms in <code>field</code> then by
134 * index order (document number). The type of value in <code>field</code> is determined
135 * automatically.
136 * @see SortField#AUTO
137 */
138 public Sort (String field, boolean reverse) {
139 setSort (field, reverse);
140 }
141
142
143 /** Sorts in succession by the terms in each field.
144 * The type of value in <code>field</code> is determined
145 * automatically.
146 * @see SortField#AUTO
147 */
148 public Sort (String[] fields) {
149 setSort (fields);
150 }
151
152
153 /** Sorts by the criteria in the given SortField. */
154 public Sort (SortField field) {
155 setSort (field);
156 }
157
158
159 /** Sorts in succession by the criteria in each SortField. */
160 public Sort (SortField[] fields) {
161 setSort (fields);
162 }
163
164
165 /** Sets the sort to the terms in <code>field</code> then by index order
166 * (document number). */
167 public final void setSort (String field) {
168 setSort (field, false);
169 }
170
171
172 /** Sets the sort to the terms in <code>field</code> possibly in reverse,
173 * then by index order (document number). */
174 public void setSort (String field, boolean reverse) {
175 SortField[] nfields = new SortField[]{
176 new SortField (field, SortField.AUTO, reverse),
177 SortField.FIELD_DOC
178 };
179 fields = nfields;
180 }
181
182
183 /** Sets the sort to the terms in each field in succession. */
184 public void setSort (String[] fieldnames) {
185 final int n = fieldnames.length;
186 SortField[] nfields = new SortField[n];
187 for (int i = 0; i < n; ++i) {
188 nfields[i] = new SortField (fieldnames[i], SortField.AUTO);
189 }
190 fields = nfields;
191 }
192
193
194 /** Sets the sort to the given criteria. */
195 public void setSort (SortField field) {
196 this.fields = new SortField[]{field};
197 }
198
199
200 /** Sets the sort to the given criteria in succession. */
201 public void setSort (SortField[] fields) {
202 this.fields = fields;
203 }
204
205 public String toString() {
206 StringBuffer buffer = new StringBuffer();
207
208 for (int i = 0; i < fields.length; i++) {
209 buffer.append(fields[i].toString());
210 if ((i +1) < fields.length)
211 buffer.append(',');
212 }
213
214 return buffer.toString();
215 }
216 }
|