001 /*
002 * Document.java
003 *
004 * Copyright (c) 1995-2010, The University of Sheffield. See the file
005 * COPYRIGHT.txt in the software or at http://gate.ac.uk/gate/COPYRIGHT.txt
006 *
007 * This file is part of GATE (see http://gate.ac.uk/), and is free
008 * software, licenced under the GNU Library General Public License,
009 * Version 2, June 1991 (in the distribution as file licence.html,
010 * and also available at http://gate.ac.uk/gate/licence.html).
011 *
012 * Hamish Cunningham, 19/Jan/2000
013 *
014 * $Id: Document.java 12006 2009-12-01 17:24:28Z thomas_heitz $
015 */
016
017 package gate;
018
019 import java.net.URL;
020 import java.util.Map;
021 import java.util.Set;
022
023 import gate.event.DocumentListener;
024 import gate.util.InvalidOffsetException;
025
026
027 /** Represents the commonalities between all sorts of documents.
028 */
029 public interface Document extends SimpleDocument {
030
031 /**
032 * The parameter name that determines whether or not a document is markup aware
033 */
034 public static final String
035 DOCUMENT_MARKUP_AWARE_PARAMETER_NAME = "markupAware";
036
037 public static final String
038 DOCUMENT_ENCODING_PARAMETER_NAME = "encoding";
039
040 public static final String
041 DOCUMENT_PRESERVE_CONTENT_PARAMETER_NAME = "preserveOriginalContent";
042
043 public static final String
044 DOCUMENT_STRING_CONTENT_PARAMETER_NAME = "stringContent";
045
046 public static final String
047 DOCUMENT_MIME_TYPE_PARAMETER_NAME = "mimeType";
048
049 public static final String
050 DOCUMENT_REPOSITIONING_PARAMETER_NAME = "collectRepositioningInfo";
051
052 public static final String
053 DOCUMENT_START_OFFSET_PARAMETER_NAME = "sourceUrlStartOffset";
054
055 public static final String
056 DOCUMENT_END_OFFSET_PARAMETER_NAME = "sourceUrlEndOffset";
057
058 /* parameter to store additional info about the document type,
059 * e.g. publication, javadoc, etc. */
060 public static final String
061 DOCUMENT_TYPE_PARAMETER_NAME = "documentType";
062
063 /** Documents may be packed within files; in this case an optional pair of
064 * offsets refer to the location of the document.
065 */
066 public Long[] getSourceUrlOffsets();
067
068 /** Documents may be packed within files; in this case an optional pair of
069 * offsets refer to the location of the document. This method gets the
070 * start offset.
071 */
072 public Long getSourceUrlStartOffset();
073
074 /** Documents may be packed within files; in this case an optional pair of
075 * offsets refer to the location of the document. This method gets the
076 * end offset.
077 */
078 public Long getSourceUrlEndOffset();
079
080 /** Returns a map with the named annotation sets
081 */
082 public Map<String, AnnotationSet> getNamedAnnotationSets();
083
084 /** Make the document markup-aware. This will trigger the creation
085 * of a DocumentFormat object at Document initialisation time; the
086 * DocumentFormat object will unpack the markup in the Document and
087 * add it as annotations. Documents are <B>not</B> markup-aware by default.
088 *
089 * @param b markup awareness status.
090 */
091 public void setMarkupAware(Boolean b);
092
093 /** Get the markup awareness status of the Document.
094 *
095 * @return whether the Document is markup aware.
096 */
097 public Boolean getMarkupAware();
098
099 /**
100 * Allow/disallow preserving of the original document content.
101 * If is <B>true</B> the original content will be retrieved from
102 * the DocumentContent object and preserved as document feature.
103 */
104 public void setPreserveOriginalContent(Boolean b);
105
106 /** Get the preserving of content status of the Document.
107 *
108 * @return whether the Document should preserve it's original content.
109 */
110 public Boolean getPreserveOriginalContent();
111
112 /**
113 * Allow/disallow collecting of repositioning information.
114 * If is <B>true</B> information will be retrieved and preserved
115 * as document feature.<BR>
116 * Preserving of repositioning information give the possibilities
117 * for converting of coordinates between the original document content and
118 * extracted from the document text.
119 */
120 public void setCollectRepositioningInfo(Boolean b);
121
122 /** Get the collectiong and preserving of repositioning information
123 * for the Document. <BR>
124 * Preserving of repositioning information give the possibilities
125 * for converting of coordinates between the original document content and
126 * extracted from the document text.
127 *
128 * @return whether the Document should collect and preserve information.
129 */
130 public Boolean getCollectRepositioningInfo();
131
132 /** Returns a GateXml document. This document is actually a serialization of
133 * a Gate Document in XML.
134 * @return a string representing a Gate Xml document
135 */
136 public String toXml();
137
138 /** Returns an XML document aming to preserve the original markups(
139 * the original markup will be in the same place and format as it was
140 * before processing the document) and include (if possible)
141 * the annotations specified in the aSourceAnnotationSet.
142 * <b>Warning:</b> Annotations from the aSourceAnnotationSet will be lost
143 * if they will cause a crosed over situation.
144 * @param aSourceAnnotationSet is an annotation set containing all the
145 * annotations that will be combined with the original marup set.
146 * @param includeFeatures determines whether or not features and gate IDs
147 * of the annotations should be included as attributes on the tags or not.
148 * If false, then only the annotation types are exported as tags, with no
149 * attributes.
150 * @return a string representing an XML document containing the original
151 * markup + dumped annotations form the aSourceAnnotationSet
152 */
153 public String toXml(Set aSourceAnnotationSet, boolean includeFeatures);
154
155 /**
156 * Equivalent to toXml(aSourceAnnotationSet, true).
157 */
158 public String toXml(Set aSourceAnnotationSet);
159
160 /** Make changes to the content.
161 */
162 public void edit(Long start, Long end, DocumentContent replacement)
163 throws InvalidOffsetException;
164
165 /**
166 * Adds a {@link gate.event.DocumentListener} to this document.
167 * All the registered listeners will be notified of changes occured to the
168 * document.
169 */
170 public void addDocumentListener(DocumentListener l);
171
172 /**
173 * Removes one of the previously registered document listeners.
174 */
175 public void removeDocumentListener(DocumentListener l);
176
177
178 /** Documents may be packed within files; in this case an optional pair of
179 * offsets refer to the location of the document. This method sets the
180 * end offset.
181 */
182 public void setSourceUrlEndOffset(Long sourceUrlEndOffset);
183
184
185 /** Documents may be packed within files; in this case an optional pair of
186 * offsets refer to the location of the document. This method sets the
187 * start offset.
188 */
189 public void setSourceUrlStartOffset(Long sourceUrlStartOffset);
190
191 } // interface Document
|