001 /***************************************************************************/
002 /* Copyright (C) 2010-2011, Sebastian Hellmann */
003 /* Note: If you need parts of NLP2RDF in another licence due to licence */
004 /* incompatibility, please mail hellmann@informatik.uni-leipzig.de */
005 /* */
006 /* This file is part of NLP2RDF. */
007 /* */
008 /* NLP2RDF is free software; you can redistribute it and/or modify */
009 /* it under the terms of the GNU General Public License as published by */
010 /* the Free Software Foundation; either version 3 of the License, or */
011 /* (at your option) any later version. */
012 /* */
013 /* NLP2RDF is distributed in the hope that it will be useful, */
014 /* but WITHOUT ANY WARRANTY; without even the implied warranty of */
015 /* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the */
016 /* GNU General Public License for more details. */
017 /* */
018 /* You should have received a copy of the GNU General Public License */
019 /* along with this program. If not, see <http://www.gnu.org/licenses/>. */
020 /***************************************************************************/
021
022 package org.nlp2rdf.core;
023
024 /**
025 * I copied this class from OpenNLP
026 * @author Sebastian Hellmann
027 * Date: 11/8/11
028 */
029 /*
030 * Licensed to the Apache Software Foundation (ASF) under one or more
031 * contributor license agreements. See the NOTICE file distributed with
032 * this work for additional information regarding copyright ownership.
033 * The ASF licenses this file to You under the Apache License, Version 2.0
034 * (the "License"); you may not use this file except in compliance with
035 * the License. You may obtain a copy of the License at
036 *
037 * http://www.apache.org/licenses/LICENSE-2.0
038 *
039 * Unless required by applicable law or agreed to in writing, software
040 * distributed under the License is distributed on an "AS IS" BASIS,
041 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
042 * See the License for the specific language governing permissions and
043 * limitations under the License.
044 */
045
046 /**
047 * Class for storing start and end integer offsets.
048 **/
049 public class Span implements Comparable<Span> {
050
051 private final int start;
052 private final int end;
053
054 private final String type;
055
056 /**
057 * Initializes a new Span Object.
058 *
059 * @param s start of span.
060 * @param e end of span.
061 * @param type the type of the span
062 */
063 public Span(int s, int e, String type) {
064
065 if (s < 0 || e <0)
066 throw new IllegalArgumentException("start and end index must be zero or greater!");
067
068 if (s > e)
069 throw new IllegalArgumentException("start index must not be larger than end index!");
070
071 start = s;
072 end = e;
073 this.type = type;
074 }
075
076 /**
077 * Initializes a new Span Object.
078 *
079 * @param s start of span.
080 * @param e end of span.
081 */
082 public Span(int s, int e) {
083 this(s, e, null);
084 }
085
086 /**
087 * Initializes a new Span object with an existing Span
088 * which is shifted by an offset.
089 *
090 * @param span
091 * @param offset
092 */
093 public Span(Span span, int offset) {
094 this(span.start + offset, span.end + offset, span.getType());
095 }
096
097 /**
098 * Return the start of a span.
099 *
100 * @return the start of a span.
101 **/
102 public int getStart() {
103 return start;
104 }
105
106 /**
107 * Return the end of a span.
108 *
109 * @return the end of a span.
110 **/
111 public int getEnd() {
112 return end;
113 }
114
115 /**
116 * Retrieves the type of the span.
117 *
118 * @return the type or null if not set
119 */
120 public String getType() {
121 return type;
122 }
123
124 /**
125 * Returns the length of this span.
126 *
127 * @return the length of the span.
128 */
129 public int length() {
130 return end-start;
131 }
132
133 /**
134 * Returns true if the specified span is contained by this span.
135 * Identical spans are considered to contain each other.
136 *
137 * @param s The span to compare with this span.
138 *
139 * @return true is the specified span is contained by this span;
140 * false otherwise.
141 */
142 public boolean contains(Span s) {
143 return start <= s.getStart() && s.getEnd() <= end;
144 }
145
146 public boolean contains(int index) {
147 return start <= index && index <= end;
148 }
149
150 /**
151 * Returns true if the specified span is the begin of this span and the
152 * specified span is contained in this span.
153 *
154 * @param s The span to compare with this span.
155 *
156 * @return true if the specified span starts with this span and is
157 * contained in this span; false otherwise
158 */
159 public boolean startsWith(Span s) {
160 return getStart() == s.getStart() && contains(s);
161 }
162
163 /**
164 * Returns true if the specified span intersects with this span.
165 *
166 * @param s The span to compare with this span.
167 *
168 * @return true is the spans overlap; false otherwise.
169 */
170 public boolean intersects(Span s) {
171 int sstart = s.getStart();
172 //either s's start is in this or this' start is in s
173 return this.contains(s) || s.contains(this) ||
174 getStart() <= sstart && sstart < getEnd() ||
175 sstart <= getStart() && getStart() < s.getEnd();
176 }
177
178 /**
179 * Returns true is the specified span crosses this span.
180 *
181 * @param s The span to compare with this span.
182 *
183 * @return true is the specified span overlaps this span and contains a
184 * non-overlapping section; false otherwise.
185 */
186 public boolean crosses(Span s) {
187 int sstart = s.getStart();
188 //either s's start is in this or this' start is in s
189 return !this.contains(s) && !s.contains(this) &&
190 (getStart() <= sstart && sstart < getEnd() ||
191 sstart <= getStart() && getStart() < s.getEnd());
192 }
193
194 /**
195 * Retrieves the string covered by the current span of the specified text.
196 *
197 * @param text
198 *
199 * @return the substring covered by the current span
200 */
201 public CharSequence getCoveredText(CharSequence text) {
202 if (getEnd() > text.length()) {
203 throw new IllegalArgumentException("The span " + toString() +
204 " is outside the given text!");
205 }
206
207 return text.subSequence(getStart(), getEnd());
208 }
209
210 /**
211 * Compares the specified span to the current span.
212 */
213 public int compareTo(Span s) {
214 if (getStart() < s.getStart()) {
215 return -1;
216 }
217 else if (getStart() == s.getStart()) {
218 if (getEnd() > s.getEnd()) {
219 return -1;
220 }
221 else if (getEnd() < s.getEnd()) {
222 return 1;
223 }
224 else {
225 return 0;
226 }
227 }
228 else {
229 return 1;
230 }
231 }
232
233 /**
234 * Generates a hash code of the current span.
235 */
236 public int hashCode() {
237 return this.start << 16 | 0x0000FFFF | this.end;
238 }
239
240 /**
241 * Checks if the specified span is equal to the current span.
242 */
243 public boolean equals(Object o) {
244
245 boolean result;
246
247 if (o == this) {
248 result = true;
249 }
250 else if (o instanceof Span) {
251 Span s = (Span) o;
252
253 result = (getStart() == s.getStart()) &&
254 (getEnd() == s.getEnd()) &&
255 (getType() != null ? type.equals(s.getType()) : true);
256 }
257 else {
258 result = false;
259 }
260
261 return result;
262 }
263
264 /**
265 * Generates a human readable string.
266 */
267 public String toString() {
268 StringBuffer toStringBuffer = new StringBuffer(15);
269 toStringBuffer.append(getStart());
270 toStringBuffer.append("..");
271 toStringBuffer.append(getEnd());
272
273 return toStringBuffer.toString();
274 }
275
276 /**
277 * Converts an array of {@link Span}s to an array of {@link String}s.
278 *
279 * @param spans
280 * @param s
281 * @return the strings
282 */
283 public static String[] spansToStrings(Span[] spans, CharSequence s) {
284 String[] tokens = new String[spans.length];
285
286 for (int si = 0, sl = spans.length; si < sl; si++) {
287 tokens[si] = spans[si].getCoveredText(s).toString();
288 }
289
290 return tokens;
291 }
292
293 public static String[] spansToStrings(Span[] spans, String[] tokens) {
294 String[] chunks = new String[spans.length];
295 StringBuffer cb = new StringBuffer();
296 for (int si = 0, sl = spans.length; si < sl; si++) {
297 cb.setLength(0);
298 for (int ti=spans[si].getStart();ti<spans[si].getEnd();ti++) {
299 cb.append(tokens[ti]).append(" ");
300 }
301 chunks[si]=cb.substring(0, cb.length()-1);
302 }
303 return chunks;
304 }
305 }