01 /*
02 * DocumentContent.java
03 *
04 * Copyright (c) 1995-2010, The University of Sheffield. See the file
05 * COPYRIGHT.txt in the software or at http://gate.ac.uk/gate/COPYRIGHT.txt
06 *
07 * This file is part of GATE (see http://gate.ac.uk/), and is free
08 * software, licenced under the GNU Library General Public License,
09 * Version 2, June 1991 (in the distribution as file licence.html,
10 * and also available at http://gate.ac.uk/gate/licence.html).
11 *
12 * Hamish Cunningham, 15/Feb/2000
13 *
14 * $Id: DocumentContent.java 13078 2010-09-15 10:31:37Z thomas_heitz $
15 */
16
17 package gate;
18
19 import java.io.Serializable;
20
21 import gate.util.InvalidOffsetException;
22
23 /** The content of Documents.
24 */
25 public interface DocumentContent extends Serializable {
26
27 /**
28 * Return the contents under a particular span.
29 * <p>
30 * Conceptually the annotation offsets are defined as falling in between
31 * characters, with "0" pointing before the fist character.
32 * Because of that, the offsets where an annotation ends and the space after
33 * it starts are the same.
34 * <p>
35 * So this is what the "abcde" string looks like with the offsets explicitly
36 * included: 0a1b2c3d4e5
37 * <p>
38 * "ab cd" would then look like this: 0a1b2 3c4d5
39 * <p>
40 * with the following annotations:<br>
41 * Token "ab" [0,2]<br>
42 * SpaceToken " " [2,3]<br>
43 * Token "cd" [3,5]
44 * <p>
45 * @param start the beginning index, inclusive.
46 * @param end the ending index, exclusive.
47 * @return the specified substring for the document.
48 * @throws gate.util.InvalidOffsetException if the
49 * <code>start</code> is negative, or
50 * <code>end</code> is larger than the length of
51 * this <code>DocumentContent</code> object, or
52 * <code>start</code> is larger than
53 * <code>end</code>.
54 */
55 public DocumentContent getContent(Long start, Long end)
56 throws InvalidOffsetException;
57
58 /** The size of this content (e.g. character length for textual
59 * content).
60 */
61 public Long size();
62
63 } // interface DocumentContent
|