001 package gate.creole.annic.apache.lucene.index;
002
003 /**
004 * Copyright 2004 The Apache Software Foundation
005 *
006 * Licensed under the Apache License, Version 2.0 (the "License");
007 * you may not use this file except in compliance with the License.
008 * You may obtain a copy of the License at
009 *
010 * http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018
019 import java.io.IOException;
020
021 import gate.creole.annic.apache.lucene.store.Directory;
022
023 /** This stores a monotonically increasing set of <Term, TermInfo> pairs in a
024 * Directory. Pairs are accessed either by Term or by ordinal position the
025 * set. */
026
027 final class TermInfosReader {
028 private Directory directory;
029 private String segment;
030 private FieldInfos fieldInfos;
031
032 private ThreadLocal enumerators = new ThreadLocal();
033 private SegmentTermEnum origEnum;
034 private long size;
035
036 TermInfosReader(Directory dir, String seg, FieldInfos fis)
037 throws IOException {
038 directory = dir;
039 segment = seg;
040 fieldInfos = fis;
041
042 origEnum = new SegmentTermEnum(directory.openFile(segment + ".tis"),
043 fieldInfos, false);
044 size = origEnum.size;
045 readIndex();
046 }
047
048 public int getSkipInterval() {
049 return origEnum.skipInterval;
050 }
051
052 final void close() throws IOException {
053 if (origEnum != null)
054 origEnum.close();
055 }
056
057 /** Returns the number of term/value pairs in the set. */
058 final long size() {
059 return size;
060 }
061
062 private SegmentTermEnum getEnum() {
063 SegmentTermEnum termEnum = (SegmentTermEnum)enumerators.get();
064 if (termEnum == null) {
065 termEnum = terms();
066 enumerators.set(termEnum);
067 }
068 return termEnum;
069 }
070
071 Term[] indexTerms = null;
072 TermInfo[] indexInfos;
073 long[] indexPointers;
074
075 private final void readIndex() throws IOException {
076 SegmentTermEnum indexEnum =
077 new SegmentTermEnum(directory.openFile(segment + ".tii"),
078 fieldInfos, true);
079 try {
080 int indexSize = (int)indexEnum.size;
081
082 indexTerms = new Term[indexSize];
083 indexInfos = new TermInfo[indexSize];
084 indexPointers = new long[indexSize];
085
086 for (int i = 0; indexEnum.next(); i++) {
087 indexTerms[i] = indexEnum.term();
088 indexInfos[i] = indexEnum.termInfo();
089 indexPointers[i] = indexEnum.indexPointer;
090 }
091 } finally {
092 indexEnum.close();
093 }
094 }
095
096 /** Returns the offset of the greatest index entry which is less than or equal to term.*/
097 private final int getIndexOffset(Term term) throws IOException {
098 int lo = 0; // binary search indexTerms[]
099 int hi = indexTerms.length - 1;
100
101 while (hi >= lo) {
102 int mid = (lo + hi) >>> 1;
103 int delta = term.indexCompareTo(indexTerms[mid]);
104 if (delta < 0)
105 hi = mid - 1;
106 else if (delta > 0)
107 lo = mid + 1;
108 else
109 return mid;
110 }
111 return hi;
112 }
113
114 private final void seekEnum(int indexOffset) throws IOException {
115 getEnum().seek(indexPointers[indexOffset],
116 (indexOffset * getEnum().indexInterval) - 1,
117 indexTerms[indexOffset], indexInfos[indexOffset]);
118 }
119
120 /** Returns the TermInfo for a Term in the set, or null. */
121 TermInfo get(Term term) throws IOException {
122 if (size == 0) return null;
123
124 // optimize sequential access: first try scanning cached enum w/o seeking
125 SegmentTermEnum enumerator = getEnum();
126 if (enumerator.term() != null // term is at or past current
127 && ((enumerator.prev != null && term.indexCompareTo(enumerator.prev) > 0)
128 || term.indexCompareTo(enumerator.term()) >= 0)) {
129 int enumOffset = (int)(enumerator.position/enumerator.indexInterval)+1;
130 if (indexTerms.length == enumOffset // but before end of block
131 || term.indexCompareTo(indexTerms[enumOffset]) < 0)
132 return scanEnum(term); // no need to seek
133 }
134
135 // random-access: must seek
136 seekEnum(getIndexOffset(term));
137 return scanEnum(term);
138 }
139
140 /** Scans within block for matching term. */
141 private final TermInfo scanEnum(Term term) throws IOException {
142 SegmentTermEnum enumerator = getEnum();
143 while (term.indexCompareTo(enumerator.term()) > 0 && enumerator.next()) {}
144 if (enumerator.term() != null && term.indexCompareTo(enumerator.term()) == 0)
145 return enumerator.termInfo();
146 else
147 return null;
148 }
149
150 /** Returns the nth term in the set. */
151 final Term get(int position) throws IOException {
152 if (size == 0) return null;
153
154 SegmentTermEnum enumerator = getEnum();
155 if (enumerator != null && enumerator.term() != null &&
156 position >= enumerator.position &&
157 position < (enumerator.position + enumerator.indexInterval))
158 return scanEnum(position); // can avoid seek
159
160 seekEnum(position / enumerator.indexInterval); // must seek
161 return scanEnum(position);
162 }
163
164 private final Term scanEnum(int position) throws IOException {
165 SegmentTermEnum enumerator = getEnum();
166 while(enumerator.position < position)
167 if (!enumerator.next())
168 return null;
169
170 return enumerator.term();
171 }
172
173 /** Returns the position of a Term in the set or -1. */
174 final long getPosition(Term term) throws IOException {
175 if (size == 0) return -1;
176
177 int indexOffset = getIndexOffset(term);
178 seekEnum(indexOffset);
179
180 SegmentTermEnum enumerator = getEnum();
181 while(term.indexCompareTo(enumerator.term()) > 0 && enumerator.next()) {}
182
183 if (term.indexCompareTo(enumerator.term()) == 0)
184 return enumerator.position;
185 else
186 return -1;
187 }
188
189 /** Returns an enumeration of all the Terms and TermInfos in the set. */
190 public SegmentTermEnum terms() {
191 return (SegmentTermEnum)origEnum.clone();
192 }
193
194 /** Returns an enumeration of terms starting at or after the named term. */
195 public SegmentTermEnum terms(Term term) throws IOException {
196 get(term);
197 return (SegmentTermEnum)getEnum().clone();
198 }
199 }
|