001 // FastCharStream.java
002 package gate.creole.annic.apache.lucene.analysis.standard;
003
004 /**
005 * Copyright 2004 The Apache Software Foundation
006 *
007 * Licensed under the Apache License, Version 2.0 (the "License");
008 * you may not use this file except in compliance with the License.
009 * You may obtain a copy of the License at
010 *
011 * http://www.apache.org/licenses/LICENSE-2.0
012 *
013 * Unless required by applicable law or agreed to in writing, software
014 * distributed under the License is distributed on an "AS IS" BASIS,
015 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
016 * See the License for the specific language governing permissions and
017 * limitations under the License.
018 */
019
020 import java.io.*;
021
022 /** An efficient implementation of JavaCC's CharStream interface. <p>Note that
023 * this does not do line-number counting, but instead keeps track of the
024 * character position of the token in the input, as required by Lucene's {@link
025 * gate.creole.annic.apache.lucene.analysis.Token} API. */
026 public final class FastCharStream implements CharStream {
027 char[] buffer = null;
028
029 int bufferLength = 0; // end of valid chars
030 int bufferPosition = 0; // next char to read
031
032 int tokenStart = 0; // offset in buffer
033 int bufferStart = 0; // position in file of buffer
034
035 Reader input; // source of chars
036
037 /** Constructs from a Reader. */
038 public FastCharStream(Reader r) {
039 input = r;
040 }
041
042 public final char readChar() throws IOException {
043 if (bufferPosition >= bufferLength)
044 refill();
045 return buffer[bufferPosition++];
046 }
047
048 private final void refill() throws IOException {
049 int newPosition = bufferLength - tokenStart;
050
051 if (tokenStart == 0) { // token won't fit in buffer
052 if (buffer == null) { // first time: alloc buffer
053 buffer = new char[2048];
054 } else if (bufferLength == buffer.length) { // grow buffer
055 char[] newBuffer = new char[buffer.length*2];
056 System.arraycopy(buffer, 0, newBuffer, 0, bufferLength);
057 buffer = newBuffer;
058 }
059 } else { // shift token to front
060 System.arraycopy(buffer, tokenStart, buffer, 0, newPosition);
061 }
062
063 bufferLength = newPosition; // update state
064 bufferPosition = newPosition;
065 bufferStart += tokenStart;
066 tokenStart = 0;
067
068 int charsRead = // fill space in buffer
069 input.read(buffer, newPosition, buffer.length-newPosition);
070 if (charsRead == -1)
071 throw new IOException("read past eof");
072 else
073 bufferLength += charsRead;
074 }
075
076 public final char BeginToken() throws IOException {
077 tokenStart = bufferPosition;
078 return readChar();
079 }
080
081 public final void backup(int amount) {
082 bufferPosition -= amount;
083 }
084
085 public final String GetImage() {
086 return new String(buffer, tokenStart, bufferPosition - tokenStart);
087 }
088
089 public final char[] GetSuffix(int len) {
090 char[] value = new char[len];
091 System.arraycopy(buffer, bufferPosition - len, value, 0, len);
092 return value;
093 }
094
095 public final void Done() {
096 try {
097 input.close();
098 } catch (IOException e) {
099 System.err.println("Caught: " + e + "; ignoring.");
100 }
101 }
102
103 @Deprecated public final int getColumn() {
104 return bufferStart + bufferPosition;
105 }
106 @Deprecated public final int getLine() {
107 return 1;
108 }
109 public final int getEndColumn() {
110 return bufferStart + bufferPosition;
111 }
112 public final int getEndLine() {
113 return 1;
114 }
115 public final int getBeginColumn() {
116 return bufferStart + tokenStart;
117 }
118 public final int getBeginLine() {
119 return 1;
120 }
121 }
|