001 /*
002 * Copyright (c) 1995-2010, The University of Sheffield. See the file
003 * COPYRIGHT.txt in the software or at http://gate.ac.uk/gate/COPYRIGHT.txt
004 *
005 * This file is part of GATE (see http://gate.ac.uk/), and is free
006 * software, licenced under the GNU Library General Public License,
007 * Version 2, June 1991 (in the distribution as file licence.html,
008 * and also available at http://gate.ac.uk/gate/licence.html).
009 *
010 * Benson Margulies 28/07/2010
011 *
012 * $Id: BomStrippingInputStreamReader.java 12918 2010-08-03 09:41:33Z valyt $
013 */
014
015 package gate.util;
016
017 import java.io.BufferedReader;
018 import java.io.IOException;
019 import java.io.InputStream;
020 import java.io.InputStreamReader;
021 import java.io.UnsupportedEncodingException;
022 import java.nio.CharBuffer;
023 import java.nio.charset.Charset;
024 import java.nio.charset.CharsetDecoder;
025
026 /**
027 * StreamReader that removes the Unicode BOM, even when Sun/Oracle is
028 * too lazy to do so. Since a buffer is required, and since most of GATE
029 * was coded to use BufferedReaders around the InputStreamReader, this
030 * 'isa' BufferedReader.
031 *
032 * Note that there are differences in exception behaviour on the
033 * different InputStreamReader constructors, so this has to be careful
034 * to call the right one.
035 *
036 */
037 public class BomStrippingInputStreamReader extends BufferedReader {
038 private IOException pendingConstructionException;
039
040 private boolean pendingEOF;
041
042 private boolean pendingChecked;
043
044 public BomStrippingInputStreamReader(InputStream in) {
045 this(new InputStreamReader(in));
046 }
047
048 public BomStrippingInputStreamReader(InputStream in, String charsetName)
049 throws UnsupportedEncodingException {
050 this(new InputStreamReader(in, charsetName));
051 }
052
053 public BomStrippingInputStreamReader(InputStream in, String charsetName,
054 int bufferSize) throws UnsupportedEncodingException {
055 this(new InputStreamReader(in, charsetName), bufferSize);
056 }
057
058 public BomStrippingInputStreamReader(InputStream in, Charset cs) {
059 this(new InputStreamReader(in, cs));
060 }
061
062 public BomStrippingInputStreamReader(InputStream in, int bufferSize) {
063 this(new InputStreamReader(in), bufferSize);
064 }
065
066 public BomStrippingInputStreamReader(InputStream in, CharsetDecoder dec,
067 int bufferSize) {
068 this(new InputStreamReader(in, dec), bufferSize);
069 }
070
071 private BomStrippingInputStreamReader(InputStreamReader isr, int bufferSize) {
072 super(isr, bufferSize);
073 stripBomIfPresent();
074 }
075
076 private BomStrippingInputStreamReader(InputStreamReader isr) {
077 super(isr);
078 stripBomIfPresent();
079 }
080
081 public BomStrippingInputStreamReader(InputStream in, CharsetDecoder dec) {
082 super(new InputStreamReader(in, dec));
083 stripBomIfPresent();
084 }
085
086 /**
087 * Checks whether the first character is BOM and positions the input stream
088 * past it, if that's the case.
089 */
090 private void stripBomIfPresent() {
091 try {
092 super.mark(1);
093 int firstChar = super.read();
094 if(firstChar == -1) {
095 pendingEOF = true; /*
096 * If we hit EOF, note to return it from next
097 * call.
098 */
099 }
100 else if(firstChar != 0xfeff) {
101 super.reset(); /* if we read non-BOM, push back */
102 }
103 /* otherwise leave it consumed */
104
105 }
106 catch(IOException e) {
107 pendingConstructionException = e;
108 }
109 }
110
111 public int hashCode() {
112 return super.hashCode();
113 }
114
115 public boolean equals(Object obj) {
116 return super.equals(obj);
117 }
118
119 private boolean checkPending() throws IOException {
120 if(!pendingChecked) {
121 if(pendingEOF) {
122 return true;
123 }
124 else if(pendingConstructionException != null) {
125 throw pendingConstructionException;
126 }
127 pendingChecked = true;
128 }
129 return false;
130 }
131
132 public int read(CharBuffer target) throws IOException {
133 if(checkPending()) {
134 return -1;
135 }
136 return super.read(target);
137 }
138
139 public int read(char[] cbuf) throws IOException {
140 if(checkPending()) {
141 return -1;
142 }
143 return super.read(cbuf);
144 }
145
146 public int read() throws IOException {
147 if(checkPending()) {
148 return -1;
149 }
150 return super.read();
151 }
152
153 public int read(char[] cbuf, int off, int len) throws IOException {
154 if(checkPending()) {
155 return -1;
156 }
157 return super.read(cbuf, off, len);
158 }
159
160 public String readLine() throws IOException {
161 if(checkPending()) {
162 return null;
163 }
164 return super.readLine();
165 }
166
167 public long skip(long n) throws IOException {
168 if(checkPending()) {
169 return 0;
170 }
171 return super.skip(n);
172 }
173
174 public boolean ready() throws IOException {
175 if(checkPending()) {
176 return false;
177 }
178 return super.ready();
179 }
180
181 public boolean markSupported() {
182 return super.markSupported();
183 }
184
185 public void mark(int readAheadLimit) throws IOException {
186 checkPending();
187 super.mark(readAheadLimit);
188 }
189
190 public void reset() throws IOException {
191 checkPending();
192 super.reset();
193 }
194
195 public void close() throws IOException {
196 // go ahead and close on this call even if we have an IOException
197 // sitting around.
198 super.close();
199 }
200
201 }
|