001    /***************************************************************************/
002    /*  Copyright (C) 2010-2011, Sebastian Hellmann                            */
003    /*  Note: If you need parts of NLP2RDF in another licence due to licence   */
004    /*  incompatibility, please mail hellmann@informatik.uni-leipzig.de        */
005    /*                                                                         */
006    /*  This file is part of NLP2RDF.                                          */
007    /*                                                                         */
008    /*  NLP2RDF is free software; you can redistribute it and/or modify        */
009    /*  it under the terms of the GNU General Public License as published by   */
010    /*  the Free Software Foundation; either version 3 of the License, or      */
011    /*  (at your option) any later version.                                    */
012    /*                                                                         */
013    /*  NLP2RDF is distributed in the hope that it will be useful,             */
014    /*  but WITHOUT ANY WARRANTY; without even the implied warranty of         */
015    /*  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the           */
016    /*  GNU General Public License for more details.                           */
017    /*                                                                         */
018    /*  You should have received a copy of the GNU General Public License      */
019    /*  along with this program. If not, see <http://www.gnu.org/licenses/>.   */
020    /***************************************************************************/
021    
022    package org.nlp2rdf.core;
023    
024    /**
025     * I copied this class from OpenNLP
026     * @author Sebastian Hellmann
027     *         Date: 11/8/11
028     */
029    /*
030     * Licensed to the Apache Software Foundation (ASF) under one or more
031     * contributor license agreements.  See the NOTICE file distributed with
032     * this work for additional information regarding copyright ownership.
033     * The ASF licenses this file to You under the Apache License, Version 2.0
034     * (the "License"); you may not use this file except in compliance with
035     * the License. You may obtain a copy of the License at
036     *
037     *     http://www.apache.org/licenses/LICENSE-2.0
038     *
039     * Unless required by applicable law or agreed to in writing, software
040     * distributed under the License is distributed on an "AS IS" BASIS,
041     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
042     * See the License for the specific language governing permissions and
043     * limitations under the License.
044     */
045    
046    /**
047     * Class for storing start and end integer offsets.
048     **/
049    public class Span implements Comparable<Span> {
050    
051      private final int start;
052      private final int end;
053    
054      private final String type;
055    
056      /**
057       * Initializes a new Span Object.
058       *
059       * @param s start of span.
060       * @param e end of span.
061       * @param type the type of the span
062       */
063      public Span(int s, int e, String type) {
064    
065        if (s < 0 || e <0)
066          throw new IllegalArgumentException("start and end index must be zero or greater!");
067    
068        if (s > e)
069          throw new IllegalArgumentException("start index must not be larger than end index!");
070    
071        start = s;
072        end = e;
073        this.type = type;
074      }
075    
076      /**
077       * Initializes a new Span Object.
078       *
079       * @param s start of span.
080       * @param e end of span.
081       */
082      public Span(int s, int e) {
083        this(s, e, null);
084      }
085    
086      /**
087       * Initializes a new Span object with an existing Span
088       * which is shifted by an offset.
089       *
090       * @param span
091       * @param offset
092       */
093      public Span(Span span, int offset) {
094        this(span.start + offset, span.end + offset, span.getType());
095      }
096    
097      /**
098       * Return the start of a span.
099       *
100       * @return the start of a span.
101       **/
102      public int getStart() {
103        return start;
104      }
105    
106      /**
107       * Return the end of a span.
108       *
109       * @return the end of a span.
110       **/
111      public int getEnd() {
112        return end;
113      }
114    
115      /**
116       * Retrieves the type of the span.
117       *
118       * @return the type or null if not set
119       */
120      public String getType() {
121        return type;
122      }
123    
124      /**
125       * Returns the length of this span.
126       *
127       * @return the length of the span.
128       */
129      public int length() {
130        return end-start;
131      }
132    
133      /**
134       * Returns true if the specified span is contained by this span.
135       * Identical spans are considered to contain each other.
136       *
137       * @param s The span to compare with this span.
138       *
139       * @return true is the specified span is contained by this span;
140       * false otherwise.
141       */
142      public boolean contains(Span s) {
143        return start <= s.getStart() && s.getEnd() <= end;
144      }
145    
146      public boolean contains(int index) {
147        return start <= index && index <= end;
148      }
149    
150      /**
151       * Returns true if the specified span is the begin of this span and the
152       * specified span is contained in this span.
153       *
154       * @param s The span to compare with this span.
155       *
156       * @return true if the specified span starts with this span and is
157       * contained in this span; false otherwise
158       */
159      public boolean startsWith(Span s) {
160        return getStart() == s.getStart() && contains(s);
161      }
162    
163      /**
164       * Returns true if the specified span intersects with this span.
165       *
166       * @param s The span to compare with this span.
167       *
168       * @return true is the spans overlap; false otherwise.
169       */
170      public boolean intersects(Span s) {
171        int sstart = s.getStart();
172        //either s's start is in this or this' start is in s
173        return this.contains(s) || s.contains(this) ||
174               getStart() <= sstart && sstart < getEnd() ||
175               sstart <= getStart() && getStart() < s.getEnd();
176      }
177    
178      /**
179       * Returns true is the specified span crosses this span.
180       *
181       * @param s The span to compare with this span.
182       *
183       * @return true is the specified span overlaps this span and contains a
184       * non-overlapping section; false otherwise.
185       */
186      public boolean crosses(Span s) {
187        int sstart = s.getStart();
188        //either s's start is in this or this' start is in s
189        return !this.contains(s) && !s.contains(this) &&
190               (getStart() <= sstart && sstart < getEnd() ||
191               sstart <= getStart() && getStart() < s.getEnd());
192      }
193    
194      /**
195       * Retrieves the string covered by the current span of the specified text.
196       *
197       * @param text
198       *
199       * @return the substring covered by the current span
200       */
201      public CharSequence getCoveredText(CharSequence text) {
202        if (getEnd() > text.length()) {
203          throw new IllegalArgumentException("The span " + toString() +
204              " is outside the given text!");
205        }
206    
207        return text.subSequence(getStart(), getEnd());
208      }
209    
210      /**
211       * Compares the specified span to the current span.
212       */
213      public int compareTo(Span s) {
214        if (getStart() < s.getStart()) {
215          return -1;
216        }
217        else if (getStart() == s.getStart()) {
218          if (getEnd() > s.getEnd()) {
219            return -1;
220          }
221          else if (getEnd() < s.getEnd()) {
222            return 1;
223          }
224          else {
225            return 0;
226          }
227        }
228        else {
229          return 1;
230        }
231      }
232    
233      /**
234       * Generates a hash code of the current span.
235       */
236      public int hashCode() {
237        return this.start << 16 | 0x0000FFFF | this.end;
238      }
239    
240      /**
241       * Checks if the specified span is equal to the current span.
242       */
243      public boolean equals(Object o) {
244    
245        boolean result;
246    
247        if (o == this) {
248          result = true;
249        }
250        else if (o instanceof Span) {
251          Span s = (Span) o;
252    
253          result = (getStart() == s.getStart()) &&
254              (getEnd() == s.getEnd()) &&
255              (getType() != null ? type.equals(s.getType()) : true);
256        }
257        else {
258          result = false;
259        }
260    
261        return result;
262      }
263    
264      /**
265       * Generates a human readable string.
266       */
267      public String toString() {
268        StringBuffer toStringBuffer = new StringBuffer(15);
269        toStringBuffer.append(getStart());
270        toStringBuffer.append("..");
271        toStringBuffer.append(getEnd());
272    
273        return toStringBuffer.toString();
274      }
275    
276      /**
277       * Converts an array of {@link Span}s to an array of {@link String}s.
278       *
279       * @param spans
280       * @param s
281       * @return the strings
282       */
283      public static String[] spansToStrings(Span[] spans, CharSequence s) {
284        String[] tokens = new String[spans.length];
285    
286        for (int si = 0, sl = spans.length; si < sl; si++) {
287          tokens[si] = spans[si].getCoveredText(s).toString();
288        }
289    
290        return tokens;
291      }
292    
293      public static String[] spansToStrings(Span[] spans, String[] tokens) {
294        String[] chunks = new String[spans.length];
295        StringBuffer cb = new StringBuffer();
296        for (int si = 0, sl = spans.length; si < sl; si++) {
297          cb.setLength(0);
298          for (int ti=spans[si].getStart();ti<spans[si].getEnd();ti++) {
299            cb.append(tokens[ti]).append(" ");
300          }
301          chunks[si]=cb.substring(0, cb.length()-1);
302        }
303        return chunks;
304      }
305    }