001 package gate.creole.annic.apache.lucene.index;
002
003 /**
004 * Copyright 2004 The Apache Software Foundation
005 *
006 * Licensed under the Apache License, Version 2.0 (the "License");
007 * you may not use this file except in compliance with the License.
008 * You may obtain a copy of the License at
009 *
010 * http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018
019 import java.io.IOException;
020 import gate.creole.annic.apache.lucene.util.BitVector;
021 import gate.creole.annic.apache.lucene.store.InputStream;
022
023 class SegmentTermDocs implements TermDocs {
024 protected SegmentReader parent;
025 private InputStream freqStream;
026 private int count;
027 private int df;
028 private BitVector deletedDocs;
029 int doc = 0;
030 int freq;
031
032 private int skipInterval;
033 private int numSkips;
034 private int skipCount;
035 private InputStream skipStream;
036 private int skipDoc;
037 private long freqPointer;
038 private long proxPointer;
039 private long skipPointer;
040 private boolean haveSkipped;
041
042 SegmentTermDocs(SegmentReader parent)
043 throws IOException {
044 this.parent = parent;
045 this.freqStream = (InputStream) parent.freqStream.clone();
046 this.deletedDocs = parent.deletedDocs;
047 this.skipInterval = parent.tis.getSkipInterval();
048 }
049
050 public void seek(Term term) throws IOException {
051 TermInfo ti = parent.tis.get(term);
052 seek(ti);
053 }
054
055 public void seek(TermEnum termEnum) throws IOException {
056 TermInfo ti;
057
058 // use comparison of fieldinfos to verify that termEnum belongs to the same segment as this SegmentTermDocs
059 if (termEnum instanceof SegmentTermEnum && ((SegmentTermEnum) termEnum).fieldInfos == parent.fieldInfos) // optimized case
060 ti = ((SegmentTermEnum) termEnum).termInfo();
061 else // punt case
062 ti = parent.tis.get(termEnum.term());
063
064 seek(ti);
065 }
066
067 void seek(TermInfo ti) throws IOException {
068 count = 0;
069 if (ti == null) {
070 df = 0;
071 } else {
072 df = ti.docFreq;
073 doc = 0;
074 skipDoc = 0;
075 skipCount = 0;
076 numSkips = df / skipInterval;
077 freqPointer = ti.freqPointer;
078 proxPointer = ti.proxPointer;
079 skipPointer = freqPointer + ti.skipOffset;
080 freqStream.seek(freqPointer);
081 haveSkipped = false;
082 }
083 }
084
085 public void close() throws IOException {
086 freqStream.close();
087 if (skipStream != null)
088 skipStream.close();
089 }
090
091 public final int doc() { return doc; }
092 public final int freq() { return freq; }
093
094 protected void skippingDoc() throws IOException {
095 }
096
097 public boolean next() throws IOException {
098 while (true) {
099 if (count == df)
100 return false;
101
102 int docCode = freqStream.readVInt();
103 doc += docCode >>> 1; // shift off low bit
104 if ((docCode & 1) != 0) // if low bit is set
105 freq = 1; // freq is one
106 else
107 freq = freqStream.readVInt(); // else read freq
108
109 count++;
110
111 if (deletedDocs == null || !deletedDocs.get(doc))
112 break;
113 skippingDoc();
114 }
115 return true;
116 }
117
118 /** Optimized implementation. */
119 public int read(final int[] docs, final int[] freqs)
120 throws IOException {
121 final int length = docs.length;
122 int i = 0;
123 while (i < length && count < df) {
124
125 // manually inlined call to next() for speed
126 final int docCode = freqStream.readVInt();
127 doc += docCode >>> 1; // shift off low bit
128 if ((docCode & 1) != 0) // if low bit is set
129 freq = 1; // freq is one
130 else
131 freq = freqStream.readVInt(); // else read freq
132 count++;
133
134 if (deletedDocs == null || !deletedDocs.get(doc)) {
135 docs[i] = doc;
136 freqs[i] = freq;
137 ++i;
138 }
139 }
140 return i;
141 }
142
143 /** Overridden by SegmentTermPositions to skip in prox stream. */
144 protected void skipProx(long proxPointer) throws IOException {}
145
146 /** Optimized implementation. */
147 public boolean skipTo(int target) throws IOException {
148 if (df >= skipInterval) { // optimized case
149
150 if (skipStream == null)
151 skipStream = (InputStream) freqStream.clone(); // lazily clone
152
153 if (!haveSkipped) { // lazily seek skip stream
154 skipStream.seek(skipPointer);
155 haveSkipped = true;
156 }
157
158 // scan skip data
159 int lastSkipDoc = skipDoc;
160 long lastFreqPointer = freqStream.getFilePointer();
161 long lastProxPointer = -1;
162 int numSkipped = -1 - (count % skipInterval);
163
164 while (target > skipDoc) {
165 lastSkipDoc = skipDoc;
166 lastFreqPointer = freqPointer;
167 lastProxPointer = proxPointer;
168
169 if (skipDoc != 0 && skipDoc >= doc)
170 numSkipped += skipInterval;
171
172 if(skipCount >= numSkips)
173 break;
174
175 skipDoc += skipStream.readVInt();
176 freqPointer += skipStream.readVInt();
177 proxPointer += skipStream.readVInt();
178
179 skipCount++;
180 }
181
182 // if we found something to skip, then skip it
183 if (lastFreqPointer > freqStream.getFilePointer()) {
184 freqStream.seek(lastFreqPointer);
185 skipProx(lastProxPointer);
186
187 doc = lastSkipDoc;
188 count += numSkipped;
189 }
190
191 }
192
193 // done skipping, now just scan
194 do {
195 if (!next())
196 return false;
197 } while (target > doc);
198 return true;
199 }
200
201 }
|