001 package gate.creole.annic.apache.lucene.index;
002
003 /**
004 * Copyright 2004 The Apache Software Foundation
005 *
006 * Licensed under the Apache License, Version 2.0 (the "License");
007 * you may not use this file except in compliance with the License.
008 * You may obtain a copy of the License at
009 *
010 * http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018
019 import java.util.*;
020 import java.io.IOException;
021
022 import gate.creole.annic.apache.lucene.document.Document;
023 import gate.creole.annic.apache.lucene.document.Field;
024
025 import gate.creole.annic.apache.lucene.store.Directory;
026 import gate.creole.annic.apache.lucene.store.OutputStream;
027 import gate.creole.annic.apache.lucene.store.InputStream;
028
029 /** Access to the Field Info file that describes document fields and whether or
030 * not they are indexed. Each segment has a separate Field Info file. Objects
031 * of this class are thread-safe for multiple readers, but only one thread can
032 * be adding documents at a time, with no other reader or writer threads
033 * accessing this object.
034 */
035 final class FieldInfos {
036 private Vector byNumber = new Vector();
037 private Hashtable byName = new Hashtable();
038
039 FieldInfos() {
040 add("", false);
041 }
042
043 /**
044 * Construct a FieldInfos object using the directory and the name of the file
045 * InputStream
046 * @param d The directory to open the InputStream from
047 * @param name The name of the file to open the InputStream from in the Directory
048 * @throws IOException
049 *
050 * @see #read
051 */
052 FieldInfos(Directory d, String name) throws IOException {
053 InputStream input = d.openFile(name);
054 try {
055 read(input);
056 } finally {
057 input.close();
058 }
059 }
060
061 /** Adds field info for a Document. */
062 public void add(Document doc) {
063 Enumeration fields = doc.fields();
064 while (fields.hasMoreElements()) {
065 Field field = (Field) fields.nextElement();
066 add(field.name(), field.isIndexed(), field.isTermVectorStored());
067 }
068 }
069
070 /**
071 * @param names The names of the fields
072 * @param storeTermVectors Whether the fields store term vectors or not
073 */
074 public void addIndexed(Collection names, boolean storeTermVectors) {
075 Iterator i = names.iterator();
076 int j = 0;
077 while (i.hasNext()) {
078 add((String)i.next(), true, storeTermVectors);
079 }
080 }
081
082 /**
083 * Assumes the field is not storing term vectors
084 * @param names The names of the fields
085 * @param isIndexed Whether the fields are indexed or not
086 *
087 * @see #add(String, boolean)
088 */
089 public void add(Collection names, boolean isIndexed) {
090 Iterator i = names.iterator();
091 int j = 0;
092 while (i.hasNext()) {
093 add((String)i.next(), isIndexed);
094 }
095 }
096
097 /**
098 * Calls three parameter add with false for the storeTermVector parameter
099 * @param name The name of the Field
100 * @param isIndexed true if the field is indexed
101 * @see #add(String, boolean, boolean)
102 */
103 public void add(String name, boolean isIndexed) {
104 add(name, isIndexed, false);
105 }
106
107
108 /** If the field is not yet known, adds it. If it is known, checks to make
109 * sure that the isIndexed flag is the same as was given previously for this
110 * field. If not - marks it as being indexed. Same goes for storeTermVector
111 *
112 * @param name The name of the field
113 * @param isIndexed true if the field is indexed
114 * @param storeTermVector true if the term vector should be stored
115 */
116 public void add(String name, boolean isIndexed, boolean storeTermVector) {
117 FieldInfo fi = fieldInfo(name);
118 if (fi == null) {
119 addInternal(name, isIndexed, storeTermVector);
120 } else {
121 if (fi.isIndexed != isIndexed) {
122 fi.isIndexed = true; // once indexed, always index
123 }
124 if (fi.storeTermVector != storeTermVector) {
125 fi.storeTermVector = true; // once vector, always vector
126 }
127 }
128 }
129
130 private void addInternal(String name, boolean isIndexed,
131 boolean storeTermVector) {
132 FieldInfo fi =
133 new FieldInfo(name, isIndexed, byNumber.size(), storeTermVector);
134 byNumber.addElement(fi);
135 byName.put(name, fi);
136 }
137
138 public int fieldNumber(String fieldName) {
139 FieldInfo fi = fieldInfo(fieldName);
140 if (fi != null)
141 return fi.number;
142 else
143 return -1;
144 }
145
146 public FieldInfo fieldInfo(String fieldName) {
147 return (FieldInfo) byName.get(fieldName);
148 }
149
150 public String fieldName(int fieldNumber) {
151 return fieldInfo(fieldNumber).name;
152 }
153
154 public FieldInfo fieldInfo(int fieldNumber) {
155 return (FieldInfo) byNumber.elementAt(fieldNumber);
156 }
157
158 public int size() {
159 return byNumber.size();
160 }
161
162 public boolean hasVectors() {
163 boolean hasVectors = false;
164 for (int i = 0; i < size(); i++) {
165 if (fieldInfo(i).storeTermVector)
166 hasVectors = true;
167 }
168 return hasVectors;
169 }
170
171 public void write(Directory d, String name) throws IOException {
172 OutputStream output = d.createFile(name);
173 try {
174 write(output);
175 } finally {
176 output.close();
177 }
178 }
179
180 public void write(OutputStream output) throws IOException {
181 output.writeVInt(size());
182 for (int i = 0; i < size(); i++) {
183 FieldInfo fi = fieldInfo(i);
184 byte bits = 0x0;
185 if (fi.isIndexed) bits |= 0x1;
186 if (fi.storeTermVector) bits |= 0x2;
187 output.writeString(fi.name);
188 //Was REMOVE
189 //output.writeByte((byte)(fi.isIndexed ? 1 : 0));
190 output.writeByte(bits);
191 }
192 }
193
194 private void read(InputStream input) throws IOException {
195 int size = input.readVInt();//read in the size
196 for (int i = 0; i < size; i++) {
197 String name = input.readString().intern();
198 byte bits = input.readByte();
199 boolean isIndexed = (bits & 0x1) != 0;
200 boolean storeTermVector = (bits & 0x2) != 0;
201 addInternal(name, isIndexed, storeTermVector);
202 }
203 }
204
205 }
|