001 /*
002 * Copyright (c) 1995-2010, The University of Sheffield. See the file
003 * COPYRIGHT.txt in the software or at http://gate.ac.uk/gate/COPYRIGHT.txt
004 *
005 * This file is part of GATE (see http://gate.ac.uk/), and is free
006 * software, licenced under the GNU Library General Public License,
007 * Version 2, June 1991 (in the distribution as file licence.html,
008 * and also available at http://gate.ac.uk/gate/licence.html).
009 *
010 * Benson Margulies 28/07/2010
011 *
012 * $Id: GukBomStrippingInputStreamReader.java 13021 2010-08-25 09:59:38Z ian_roberts $
013 */
014
015 package guk;
016
017 import java.io.BufferedReader;
018 import java.io.IOException;
019 import java.io.InputStream;
020 import java.io.InputStreamReader;
021 import java.io.UnsupportedEncodingException;
022 import java.nio.CharBuffer;
023 import java.nio.charset.Charset;
024 import java.nio.charset.CharsetDecoder;
025
026 /**
027 * <p>StreamReader that removes the Unicode BOM, even when Sun/Oracle is
028 * too lazy to do so. Since a buffer is required, and since most of GATE
029 * was coded to use BufferedReaders around the InputStreamReader, this
030 * 'isa' BufferedReader.</p>
031 *
032 * <p>Note that there are differences in exception behaviour on the
033 * different InputStreamReader constructors, so this has to be careful
034 * to call the right one.</p>
035 *
036 * <p><b>Note</b> This class is a copy of BomStrippingInputStreamReader
037 * from gate.util. GUK cannot reference gate.util directly at runtime
038 * due to classloader issues, and we do not want to force GATE embedded
039 * to depend on GUK, therefore copying the class was considered the
040 * least bad option. If the gate.util class is modified, this class
041 * should also be modified to match.</p>
042 *
043 */
044 public class GukBomStrippingInputStreamReader extends BufferedReader {
045 private IOException pendingConstructionException;
046
047 private boolean pendingEOF;
048
049 private boolean pendingChecked;
050
051 public GukBomStrippingInputStreamReader(InputStream in) {
052 this(new InputStreamReader(in));
053 }
054
055 public GukBomStrippingInputStreamReader(InputStream in, String charsetName)
056 throws UnsupportedEncodingException {
057 this(new InputStreamReader(in, charsetName));
058 }
059
060 public GukBomStrippingInputStreamReader(InputStream in, String charsetName,
061 int bufferSize) throws UnsupportedEncodingException {
062 this(new InputStreamReader(in, charsetName), bufferSize);
063 }
064
065 public GukBomStrippingInputStreamReader(InputStream in, Charset cs) {
066 this(new InputStreamReader(in, cs));
067 }
068
069 public GukBomStrippingInputStreamReader(InputStream in, int bufferSize) {
070 this(new InputStreamReader(in), bufferSize);
071 }
072
073 public GukBomStrippingInputStreamReader(InputStream in, CharsetDecoder dec,
074 int bufferSize) {
075 this(new InputStreamReader(in, dec), bufferSize);
076 }
077
078 private GukBomStrippingInputStreamReader(InputStreamReader isr, int bufferSize) {
079 super(isr, bufferSize);
080 stripBomIfPresent();
081 }
082
083 private GukBomStrippingInputStreamReader(InputStreamReader isr) {
084 super(isr);
085 stripBomIfPresent();
086 }
087
088 public GukBomStrippingInputStreamReader(InputStream in, CharsetDecoder dec) {
089 super(new InputStreamReader(in, dec));
090 stripBomIfPresent();
091 }
092
093 /**
094 * Checks whether the first character is BOM and positions the input stream
095 * past it, if that's the case.
096 */
097 private void stripBomIfPresent() {
098 try {
099 super.mark(1);
100 int firstChar = super.read();
101 if(firstChar == -1) {
102 pendingEOF = true; /*
103 * If we hit EOF, note to return it from next
104 * call.
105 */
106 }
107 else if(firstChar != 0xfeff) {
108 super.reset(); /* if we read non-BOM, push back */
109 }
110 /* otherwise leave it consumed */
111
112 }
113 catch(IOException e) {
114 pendingConstructionException = e;
115 }
116 }
117
118 public int hashCode() {
119 return super.hashCode();
120 }
121
122 public boolean equals(Object obj) {
123 return super.equals(obj);
124 }
125
126 private boolean checkPending() throws IOException {
127 if(!pendingChecked) {
128 if(pendingEOF) {
129 return true;
130 }
131 else if(pendingConstructionException != null) {
132 throw pendingConstructionException;
133 }
134 pendingChecked = true;
135 }
136 return false;
137 }
138
139 public int read(CharBuffer target) throws IOException {
140 if(checkPending()) {
141 return -1;
142 }
143 return super.read(target);
144 }
145
146 public int read(char[] cbuf) throws IOException {
147 if(checkPending()) {
148 return -1;
149 }
150 return super.read(cbuf);
151 }
152
153 public int read() throws IOException {
154 if(checkPending()) {
155 return -1;
156 }
157 return super.read();
158 }
159
160 public int read(char[] cbuf, int off, int len) throws IOException {
161 if(checkPending()) {
162 return -1;
163 }
164 return super.read(cbuf, off, len);
165 }
166
167 public String readLine() throws IOException {
168 if(checkPending()) {
169 return null;
170 }
171 return super.readLine();
172 }
173
174 public long skip(long n) throws IOException {
175 if(checkPending()) {
176 return 0;
177 }
178 return super.skip(n);
179 }
180
181 public boolean ready() throws IOException {
182 if(checkPending()) {
183 return false;
184 }
185 return super.ready();
186 }
187
188 public boolean markSupported() {
189 return super.markSupported();
190 }
191
192 public void mark(int readAheadLimit) throws IOException {
193 checkPending();
194 super.mark(readAheadLimit);
195 }
196
197 public void reset() throws IOException {
198 checkPending();
199 super.reset();
200 }
201
202 public void close() throws IOException {
203 // go ahead and close on this call even if we have an IOException
204 // sitting around.
205 super.close();
206 }
207
208 }
|