001 package gate.creole.annic.apache.lucene.index;
002
003 /**
004 * Copyright 2004 The Apache Software Foundation
005 *
006 * Licensed under the Apache License, Version 2.0 (the "License");
007 * you may not use this file except in compliance with the License.
008 * You may obtain a copy of the License at
009 *
010 * http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018
019 import java.io.IOException;
020 import gate.creole.annic.apache.lucene.store.InputStream;
021
022 final class SegmentTermEnum extends TermEnum implements Cloneable {
023 private InputStream input;
024 FieldInfos fieldInfos;
025 long size;
026 long position = -1;
027
028 private Term term = new Term("", "", "");/*, 0);*/
029 private TermInfo termInfo = new TermInfo();
030
031 private int format;
032 private boolean isIndex = false;
033 long indexPointer = 0;
034 int indexInterval;
035 int skipInterval;
036 private int formatM1SkipInterval;
037 Term prev;
038
039 private char[] buffer = {};
040
041 SegmentTermEnum(InputStream i, FieldInfos fis, boolean isi)
042 throws IOException {
043 input = i;
044 fieldInfos = fis;
045 isIndex = isi;
046
047 int firstInt = input.readInt();
048 if (firstInt >= 0) {
049 // original-format file, without explicit format version number
050 format = 0;
051 size = firstInt;
052
053 // back-compatible settings
054 indexInterval = 128;
055 skipInterval = Integer.MAX_VALUE; // switch off skipTo optimization
056
057 } else {
058 // we have a format version number
059 format = firstInt;
060
061 // check that it is a format we can understand
062 if (format < TermInfosWriter.FORMAT)
063 throw new IOException("Unknown format version:" + format);
064
065 size = input.readLong(); // read the size
066
067 if(format == -1){
068 if (!isIndex) {
069 indexInterval = input.readInt();
070 formatM1SkipInterval = input.readInt();
071 }
072 // switch off skipTo optimization for file format prior to 1.4rc2 in order to avoid a bug in
073 // skipTo implementation of these versions
074 skipInterval = Integer.MAX_VALUE;
075 }
076 else{
077 indexInterval = input.readInt();
078 skipInterval = input.readInt();
079 }
080 }
081
082 }
083
084 protected Object clone() {
085 SegmentTermEnum clone = null;
086 try {
087 clone = (SegmentTermEnum) super.clone();
088 } catch (CloneNotSupportedException e) {}
089
090 clone.input = (InputStream) input.clone();
091 clone.termInfo = new TermInfo(termInfo);
092 if (term != null) clone.growBuffer(term.text.length());
093
094 return clone;
095 }
096
097 final void seek(long pointer, int p, Term t, TermInfo ti)
098 throws IOException {
099 input.seek(pointer);
100 position = p;
101 term = t;
102 prev = null;
103 termInfo.set(ti);
104 growBuffer(term.text.length()); // copy term text into buffer
105 }
106
107 /** Increments the enumeration to the next element. True if one exists.*/
108 public final boolean next() throws IOException {
109 if (position++ >= size - 1) {
110 term = null;
111 return false;
112 }
113
114 prev = term;
115 term = readTerm();
116
117 termInfo.docFreq = input.readVInt(); // read doc freq
118 termInfo.freqPointer += input.readVLong(); // read freq pointer
119 termInfo.proxPointer += input.readVLong(); // read prox pointer
120
121 if(format == -1){
122 // just read skipOffset in order to increment file pointer;
123 // value is never used since skipTo is switched off
124 if (!isIndex) {
125 if (termInfo.docFreq > formatM1SkipInterval) {
126 termInfo.skipOffset = input.readVInt();
127 }
128 }
129 }
130 else{
131 if (termInfo.docFreq >= skipInterval)
132 termInfo.skipOffset = input.readVInt();
133 }
134
135 if (isIndex)
136 indexPointer += input.readVLong(); // read index pointer
137
138 return true;
139 }
140
141 private final Term readTerm() throws IOException {
142 //int start = input.readVInt();
143 int length = input.readVInt();
144 //int totalLength = start + length;
145 if (buffer.length < length/*totalLength*/)
146 growBuffer(length);
147
148 input.readChars(buffer, 0, length);
149 String text = new String(buffer, 0, length);
150 /* Niraj */
151 int typeLen = input.readVInt();
152 if(buffer.length < typeLen)
153 growBuffer(typeLen);
154
155 input.readChars(buffer,0,typeLen);
156 /*int position = input.readVInt();*/
157 /* End */
158 int fieldPos = input.readVInt();
159 return new Term(fieldInfos.fieldName(fieldPos), text, new String(buffer,0,typeLen)/*, position*/, false);
160 /*return new Term(fieldInfos.fieldName(fieldPos),
161 new String(buffer, 0, totalLength), false);*/
162 }
163
164 private final void growBuffer(int length) {
165 buffer = new char[length];
166 for (int i = 0; i < term.text.length(); i++) // copy contents
167 buffer[i] = term.text.charAt(i);
168 }
169
170 /** Returns the current Term in the enumeration.
171 Initially invalid, valid after next() called for the first time.*/
172 public final Term term() {
173 return term;
174 }
175
176 /** Returns the current TermInfo in the enumeration.
177 Initially invalid, valid after next() called for the first time.*/
178 final TermInfo termInfo() {
179 return new TermInfo(termInfo);
180 }
181
182 /** Sets the argument to the current TermInfo in the enumeration.
183 Initially invalid, valid after next() called for the first time.*/
184 final void termInfo(TermInfo ti) {
185 ti.set(termInfo);
186 }
187
188 /** Returns the docFreq from the current TermInfo in the enumeration.
189 Initially invalid, valid after next() called for the first time.*/
190 public final int docFreq() {
191 return termInfo.docFreq;
192 }
193
194 /* Returns the freqPointer from the current TermInfo in the enumeration.
195 Initially invalid, valid after next() called for the first time.*/
196 final long freqPointer() {
197 return termInfo.freqPointer;
198 }
199
200 /* Returns the proxPointer from the current TermInfo in the enumeration.
201 Initially invalid, valid after next() called for the first time.*/
202 final long proxPointer() {
203 return termInfo.proxPointer;
204 }
205
206 /** Closes the enumeration to further activity, freeing resources. */
207 public final void close() throws IOException {
208 input.close();
209 }
210 }
|