001 /*
002 * XmlDocumentHandler.java
003 *
004 * Copyright (c) 1995-2010, The University of Sheffield. See the file
005 * COPYRIGHT.txt in the software or at http://gate.ac.uk/gate/COPYRIGHT.txt
006 *
007 * This file is part of GATE (see http://gate.ac.uk/), and is free
008 * software, licenced under the GNU Library General Public License,
009 * Version 2, June 1991 (in the distribution as file licence.html,
010 * and also available at http://gate.ac.uk/gate/licence.html).
011 *
012 * Cristian URSU, 9 May 2000
013 *
014 * $Id: XmlDocumentHandler.java 13627 2011-04-06 09:20:09Z philgooch $
015 */
016 package gate.xml;
017
018 import java.util.*;
019
020 import org.xml.sax.*;
021
022 import gate.*;
023 import gate.corpora.DocumentContentImpl;
024 import gate.corpora.RepositioningInfo;
025 import gate.event.StatusListener;
026 import gate.util.*;
027
028 /**
029 * Implements the behaviour of the XML reader
030 * Methods of an object of this class are called by the SAX parser when
031 * events will appear.
032 * The idea is to parse the XML document and construct Gate annotations
033 * objects.
034 * This class also will replace the content of the Gate document with a
035 * new one containing only text from the XML document.
036 */
037 public class XmlDocumentHandler extends XmlPositionCorrectionHandler {
038
039 /** Debug flag */
040 private static final boolean DEBUG = false;
041 /** Keep the refference to this structure */
042 private RepositioningInfo reposInfo = null;
043 /** Keep the refference to this structure */
044 private RepositioningInfo ampCodingInfo = null;
045 /** This is used to capture all data within two tags before calling the actual characters method */
046 private StringBuffer contentBuffer = new StringBuffer("");
047 /** This is a variable that shows if characters have been read */
048 private boolean readCharacterStatus = false;
049
050
051 /** Flag to determine whether to deserialize namespace information into
052 * annotation features within Original markups AS
053 */
054 private boolean deserializeNamespaceInfo = false;
055 /** Feature name to use for namespace uri in namespaced elements */
056 private String namespaceURIFeature = null;
057 /** Feature name to use for namespace prefix in namespaced elements */
058 private String namespacePrefixFeature = null;
059
060 /** Set repositioning information structure refference. If you set this
061 * refference to <B>null</B> information wouldn't be collected.
062 */
063 public void setRepositioningInfo(RepositioningInfo info) {
064 reposInfo = info;
065 } // setRepositioningInfo
066
067 /** Return current RepositioningInfo object */
068 public RepositioningInfo getRepositioningInfo() {
069 return reposInfo;
070 } // getRepositioningInfo
071
072 /** Set repositioning information structure refference for ampersand coding.
073 * If you set this refference to <B>null</B> information wouldn't be used.
074 */
075 public void setAmpCodingInfo(RepositioningInfo info) {
076 ampCodingInfo = info;
077 } // setRepositioningInfo
078
079 /** Return current RepositioningInfo object for ampersand coding. */
080 public RepositioningInfo getAmpCodingInfo() {
081 return ampCodingInfo;
082 } // getRepositioningInfo
083
084
085 /**
086 * Constructs a XmlDocumentHandler object. The annotationSet set will be the
087 * default one taken from the gate document.
088 * @param aDocument the Gate document that will be processed.
089 * @param aMarkupElementsMap this map contains the elements name that we
090 * want to create.
091 * @param anElement2StringMap this map contains the strings that will be
092 * added to the text contained by the key element.
093 */
094 public XmlDocumentHandler(gate.Document aDocument, Map aMarkupElementsMap,
095 Map anElement2StringMap) {
096 this(aDocument, aMarkupElementsMap, anElement2StringMap, null);
097 } // XmlDocumentHandler
098
099 /**
100 * Constructs a XmlDocumentHandler object.
101 * @param aDocument the Gate document that will be processed.
102 * @param aMarkupElementsMap this map contains the elements name that we
103 * want to create.
104 * @param anElement2StringMap this map contains the strings that will be
105 * added to the text contained by the key element.
106 * @param anAnnotationSet is the annotation set that will be filled when the
107 * document was processed
108 */
109 public XmlDocumentHandler(gate.Document aDocument,
110 Map aMarkupElementsMap,
111 Map anElement2StringMap,
112 gate.AnnotationSet anAnnotationSet) {
113 // init parent
114 super();
115 // init stack
116 stack = new java.util.Stack();
117
118 // this string contains the plain text (the text without markup)
119 tmpDocContent = new StringBuffer(aDocument.getContent().size().intValue());
120
121 // colector is used later to transform all custom objects into annotation
122 // objects
123 colector = new LinkedList();
124
125 // the Gate document
126 doc = aDocument;
127
128 // this map contains the elements name that we want to create
129 // if it's null all the elements from the XML documents will be transformed
130 // into Gate annotation objects
131 markupElementsMap = aMarkupElementsMap;
132
133 // this map contains the string that we want to insert iside the document
134 // content, when a certain element is found
135 // if the map is null then no string is added
136 element2StringMap = anElement2StringMap;
137
138 basicAS = anAnnotationSet;
139 customObjectsId = 0;
140 }// XmlDocumentHandler()/
141
142
143 /**
144 * This method is called when the SAX parser encounts the beginning of the
145 * XML document.
146 */
147 public void startDocument() throws org.xml.sax.SAXException {
148 // init of variables in the parent
149 super.startDocument();
150
151 /** We will attempt to add namespace feature info to each namespaced element
152 * only if three parameters are set in the global or local config file:
153 * ADD_NAMESPACE_FEATURES: boolean flag
154 * ELEMENT_NAMESPACE_URI: feature name to use to hold namespace uri
155 * ELEMENT_NAMESPACE_PREFIX: feature name to use to hold namespace prefix
156 */
157 Map configData = Gate.getUserConfig();
158
159 boolean addNSFeature = Boolean.parseBoolean((String) configData.get(GateConstants.ADD_NAMESPACE_FEATURES));
160 namespaceURIFeature = (String) configData.get(GateConstants.ELEMENT_NAMESPACE_URI);
161 namespacePrefixFeature = (String) configData.get(GateConstants.ELEMENT_NAMESPACE_PREFIX);
162
163 deserializeNamespaceInfo = (addNSFeature && namespacePrefixFeature != null && !namespacePrefixFeature.isEmpty() && namespaceURIFeature != null && !namespaceURIFeature.isEmpty());
164
165 }
166
167 /**
168 * This method is called when the SAX parser encounts the end of the
169 * XML document.
170 * Here we set the content of the gate Document to be the one generated
171 * inside this class (tmpDocContent).
172 * After that we use the colector to generate all the annotation reffering
173 * this new gate document.
174 */
175 public void endDocument() throws org.xml.sax.SAXException {
176
177 // replace the document content with the one without markups
178 doc.setContent(new DocumentContentImpl(tmpDocContent.toString()));
179
180 // fire the status listener
181 fireStatusChangedEvent("Total elements: " + elements);
182
183 // If basicAs is null then get the default AnnotationSet,
184 // based on the gate document.
185 if (basicAS == null) {
186 basicAS = doc.getAnnotations(GateConstants.ORIGINAL_MARKUPS_ANNOT_SET_NAME);
187 }
188
189 // sort colector ascending on its id
190 Collections.sort(colector);
191 Set testIdsSet = new HashSet();
192 // create all the annotations (on this new document) from the collector
193 while (!colector.isEmpty()) {
194 CustomObject obj = (CustomObject) colector.getFirst();
195 // Test to see if there are two annotation objects with the same id.
196 if (testIdsSet.contains(obj.getId())) {
197 throw new GateSaxException("Found two annotations with the same Id(" +
198 obj.getId() +
199 ").The document is inconsistent.");
200 } else {
201 testIdsSet.add(obj.getId());
202 }// End iff
203 // create a new annotation and add it to the annotation set
204 try {
205 // the annotation type will be conforming with markupElementsMap
206 //add the annotation to the Annotation Set
207 if (markupElementsMap == null) {
208 basicAS.add(obj.getId(),
209 obj.getStart(),
210 obj.getEnd(),
211 obj.getElemName(),
212 obj.getFM());
213 } else {
214 // get the type of the annotation from Map
215 String annotationType = (String) markupElementsMap.get(obj.getElemName());
216 if (annotationType != null) {
217 basicAS.add(obj.getId(),
218 obj.getStart(),
219 obj.getEnd(),
220 annotationType,
221 obj.getFM());
222 }
223 }// End if
224 } catch (gate.util.InvalidOffsetException e) {
225 Err.prln("InvalidOffsetException for annot :" + obj.getElemName() +
226 " with Id =" + obj.getId() + ". Discarded...");
227 }// End try
228 colector.remove(obj);
229 }// End while
230 }// endDocument();
231
232
233 /**
234 * This method is called when the SAX parser encounts the beginning of an
235 * XML element.
236 */
237 /**
238 *
239 * @param uri - namespace uri
240 * @param localName - local, unprefixed element name
241 * @param qName - fully qualified, prefixed element name
242 * @param atts
243 * @throws SAXException
244 */
245 public void startElement(String uri, String localName, String qName,
246 Attributes atts) throws SAXException {
247
248 // call characterActions
249 if (readCharacterStatus) {
250 readCharacterStatus = false;
251 charactersAction(new String(contentBuffer).toCharArray(), 0, contentBuffer.length());
252 }
253
254 // Inform the progress listener to fire only if no of elements processed
255 // so far is a multiple of ELEMENTS_RATE
256 if ((++elements % ELEMENTS_RATE) == 0) {
257 fireStatusChangedEvent("Processed elements : " + elements);
258 }
259
260 Integer customObjectId = null;
261 // Construct a SimpleFeatureMapImpl from the list of attributes
262 FeatureMap fm = Factory.newFeatureMap();
263
264 /** Use localName rather than qName and add the namespace prefix and uri
265 * as features if global flag is set
266 */
267 String elemName = qName;
268 boolean hasNSUri = (uri != null && !uri.isEmpty());
269 if (deserializeNamespaceInfo && hasNSUri) {
270 elemName = localName;
271 StringTokenizer strToken = new StringTokenizer(qName, ":");
272 if (strToken.countTokens() > 1) {
273 String nsPrefix = strToken.nextToken();
274 fm.put(namespaceURIFeature, uri);
275 fm.put(namespacePrefixFeature, nsPrefix);
276 }
277 }
278
279
280 //Get the name and the value of the attributes and add them to a FeaturesMAP
281 for (int i = 0; i < atts.getLength(); i++) {
282 String attName = atts.getLocalName(i);
283 String attValue = atts.getValue(i);
284 String attUri = atts.getURI(i);
285 if (attUri != null && Gate.URI.equals(attUri)) {
286 if ("gateId".equals(attName)) {
287 customObjectId = new Integer(attValue);
288 }// End if
289 if ("annotMaxId".equals(attName)) {
290 customObjectsId = new Integer(attValue).intValue();
291 }// End if
292 if ("matches".equals(attName)) {
293 StringTokenizer strTokenizer = new StringTokenizer(attValue, ";");
294 List list = new ArrayList();
295 // Take all tokens,create Integers and add them to the list
296 while (strTokenizer.hasMoreTokens()) {
297 String token = strTokenizer.nextToken();
298 list.add(new Integer(token));
299 }// End while
300 fm.put(attName, list);
301 }// End if
302 } else {
303 fm.put(atts.getQName(i), attValue);
304 }// End if
305 }// End for
306
307 // create the START index of the annotation
308 Long startIndex = new Long(tmpDocContent.length());
309
310 // initialy the Start index is equal with End index
311 CustomObject obj = new CustomObject(customObjectId, elemName, fm,
312 startIndex, startIndex);
313
314 // put this object into the stack
315 stack.push(obj);
316 }// startElement();
317
318
319 /**
320 * This method is called when the SAX parser encounts the end of an
321 * XML element.
322 * Here we extract
323 */
324 /**
325 *
326 * @param uri - namespace uri
327 * @param localName - local, unprefixed element name
328 * @param qName - fully qualified, prefixed element name
329 * @throws SAXException
330 */
331 public void endElement(String uri, String localName, String qName)
332 throws SAXException {
333
334 /** Use localName rather than qName if global flag is set */
335 String elemName = qName;
336 boolean hasNSUri = (uri != null && !uri.isEmpty());
337 if (deserializeNamespaceInfo && hasNSUri)
338 elemName = localName;
339
340 // call characterActions
341 if (readCharacterStatus) {
342 readCharacterStatus = false;
343 charactersAction(new String(contentBuffer).toCharArray(), 0, contentBuffer.length());
344 }
345
346 // obj is for internal use
347 CustomObject obj = null;
348
349 // if the stack is not empty, we extract the custom object and delete it
350 if (!stack.isEmpty()) {
351 obj = (CustomObject) stack.pop();
352 }// End if
353
354 // Before adding it to the colector, we need to check if is an
355 // emptyAndSpan one. See CustomObject's isEmptyAndSpan field.
356 if (obj.getStart().equals(obj.getEnd())) {
357 // The element had an end tag and its start was equal to its end. Hence
358 // it is anEmptyAndSpan one.
359 obj.getFM().put("isEmptyAndSpan", "true");
360 }// End iff
361
362 // Put the object into colector
363 // Later, when the document ends we will use colector to create all the
364 // annotations
365 colector.add(obj);
366
367 // if element is found on Element2String map, then add the string to the
368 // end of the document content
369 if (element2StringMap != null) {
370 String stringFromMap = null;
371
372 // test to see if element is inside the map
373 // if it is then get the string value and add it to the document content
374 stringFromMap = (String) element2StringMap.get(elemName);
375 if (stringFromMap != null) {
376 tmpDocContent.append(stringFromMap);
377 }
378 }// End if
379 }// endElement();
380
381 /**
382 * This method is called when the SAX parser encounts text in the XML doc.
383 * Here we calculate the end indices for all the elements present inside the
384 * stack and update with the new values. For entities, this method is called
385 * separatley regardless of the text sourinding the entity.
386 */
387 public void characters(char[] text, int start, int length) throws SAXException {
388 if (!readCharacterStatus) {
389 contentBuffer = new StringBuffer(new String(text, start, length));
390 } else {
391 contentBuffer.append(new String(text, start, length));
392 }
393 readCharacterStatus = true;
394 }
395
396 /**
397 * This method is called when all characters between specific tags have been read completely
398 */
399 public void charactersAction(char[] text, int start, int length) throws SAXException {
400 // correction of real offset. Didn't affect on other data.
401 super.characters(text, start, length);
402 // create a string object based on the reported text
403 String content = new String(text, start, length);
404 StringBuffer contentBuffer = new StringBuffer("");
405 int tmpDocContentSize = tmpDocContent.length();
406 boolean incrementStartIndex = false;
407 boolean addExtraSpace = true;
408 if (Gate.getUserConfig().get(
409 GateConstants.DOCUMENT_ADD_SPACE_ON_UNPACK_FEATURE_NAME) != null) {
410 addExtraSpace =
411 Gate.getUserConfig().getBoolean(
412 GateConstants.DOCUMENT_ADD_SPACE_ON_UNPACK_FEATURE_NAME).booleanValue();
413 }
414 // If the first char of the text just read "text[0]" is NOT whitespace AND
415 // the last char of the tmpDocContent[SIZE-1] is NOT whitespace then
416 // concatenation "tmpDocContent + content" will result into a new different
417 // word... and we want to avoid that, because the tokenizer, gazetter and
418 // Jape work on the raw text and concatenating tokens might be not good.
419 if (tmpDocContentSize != 0 &&
420 content.length() != 0 &&
421 !Character.isWhitespace(content.charAt(0)) &&
422 !Character.isWhitespace(tmpDocContent.charAt(tmpDocContentSize - 1))) {
423
424 // If we are here it means that a concatenation between the last
425 // token in the tmpDocContent and the content(which doesn't start
426 // with a white space) will be performed. In order to prevent this,
427 // we will add a " " space char in order to assure that the 2 tokens
428 // stay apart. Howerver we will except from this rule the most known
429 // internal entities like &, <, >, etc
430 if (( // Testing the length against 1 makes it more likely that
431 // an internal entity was called. characters() gets called for
432 // each entity separately.
433 (content.length() == 1) &&
434 (content.charAt(0) == '&' ||
435 content.charAt(0) == '<' ||
436 content.charAt(0) == '>' ||
437 content.charAt(0) == '"' ||
438 content.charAt(0) == '\'')) ||
439 (tmpDocContent.charAt(tmpDocContentSize - 1) == '&' ||
440 tmpDocContent.charAt(tmpDocContentSize - 1) == '<' ||
441 tmpDocContent.charAt(tmpDocContentSize - 1) == '>' ||
442 tmpDocContent.charAt(tmpDocContentSize - 1) == '"' ||
443 tmpDocContent.charAt(tmpDocContentSize - 1) == '\'')) {// do nothing. The content will be appended
444 } else if (!addExtraSpace) {
445 } else {
446 // In all other cases append " "
447 contentBuffer.append(" ");
448 incrementStartIndex = true;
449 }// End if
450 }// End if
451
452 // put the repositioning information
453 if (reposInfo != null) {
454 if (!(start == 0 && length == 1 && text.length <= 2)) {
455 // normal piece of text
456 reposInfo.addPositionInfo(getRealOffset(), content.length(),
457 tmpDocContent.length() + contentBuffer.length(),
458 content.length());
459 if (DEBUG) {
460 Out.println("Info: " + getRealOffset() + ", " + content.length());
461 Out.println("Start: " + start + " len" + length);
462 } // DEBUG
463 } else {
464 // unicode char or &xxx; coding
465 // Reported from the parser offset is 0
466 // The real offset should be found in the ampCodingInfo structure.
467
468 long lastPosition = 0;
469 RepositioningInfo.PositionInfo pi;
470
471 if (reposInfo.size() > 0) {
472 pi =
473 (RepositioningInfo.PositionInfo) reposInfo.get(reposInfo.size() - 1);
474 lastPosition = pi.getOriginalPosition();
475 } // if
476
477 for (int i = 0; i < ampCodingInfo.size(); ++i) {
478 pi = (RepositioningInfo.PositionInfo) ampCodingInfo.get(i);
479 if (pi.getOriginalPosition() > lastPosition) {
480 // found
481 reposInfo.addPositionInfo(pi.getOriginalPosition(),
482 pi.getOriginalLength(),
483 tmpDocContent.length() + contentBuffer.length(),
484 content.length());
485 break;
486 } // if
487 } // for
488 } // if
489 } // if
490
491 // update the document content
492 contentBuffer.append(content);
493 // calculate the End index for all the elements of the stack
494 // the expression is : End index = Current doc length + text length
495 Long end = new Long(tmpDocContent.length() + contentBuffer.length());
496
497 CustomObject obj = null;
498 // Iterate through stack to modify the End index of the existing elements
499
500 java.util.Iterator anIterator = stack.iterator();
501 while (anIterator.hasNext()) {
502 // get the object and move to the next one
503 obj = (CustomObject) anIterator.next();
504 if (incrementStartIndex && obj.getStart().equals(obj.getEnd())) {
505 obj.setStart(new Long(obj.getStart().longValue() + 1));
506 }// End if
507 // sets its End index
508 obj.setEnd(end);
509 }// End while
510
511 tmpDocContent.append(contentBuffer.toString());
512 }// characters();
513
514 /**
515 * This method is called when the SAX parser encounts white spaces
516 */
517 public void ignorableWhitespace(char ch[], int start, int length) throws
518 SAXException {
519
520 // internal String object
521 String text = new String(ch, start, length);
522 // if the last character in tmpDocContent is \n and the read whitespace is
523 // \n then don't add it to tmpDocContent...
524
525 if (tmpDocContent.length() != 0) {
526 if (tmpDocContent.charAt(tmpDocContent.length() - 1) != '\n' ||
527 !text.equalsIgnoreCase("\n")) {
528 tmpDocContent.append(text);
529 }
530 }
531 }
532
533 /**
534 * Error method.We deal with this exception inside SimpleErrorHandler class
535 */
536 public void error(SAXParseException ex) throws SAXException {
537 // deal with a SAXParseException
538 // see SimpleErrorhandler class
539 _seh.error(ex);
540 }
541
542 /**
543 * FatalError method.
544 */
545 public void fatalError(SAXParseException ex) throws SAXException {
546 // deal with a SAXParseException
547 // see SimpleErrorhandler class
548 _seh.fatalError(ex);
549 }
550
551 /**
552 * Warning method comment.
553 */
554 public void warning(SAXParseException ex) throws SAXException {
555 // deal with a SAXParseException
556 // see SimpleErrorhandler class
557 _seh.warning(ex);
558 }
559
560 /**
561 * This method is called when the SAX parser encounts a comment
562 * It works only if the XmlDocumentHandler implements a
563 * com.sun.parser.LexicalEventListener
564 */
565 public void comment(String text) throws SAXException {
566 // create a FeatureMap and then add the comment to the annotation set.
567 /*
568 gate.util.SimpleFeatureMapImpl fm = new gate.util.SimpleFeatureMapImpl();
569 fm.put ("text_comment",text);
570 Long node = new Long (tmpDocContent.length());
571 CustomObject anObject = new CustomObject("Comment",fm,node,node);
572 colector.add(anObject);
573 */
574 }
575
576 /**
577 * This method is called when the SAX parser encounts a start of a CDATA
578 * section
579 * It works only if the XmlDocumentHandler implements a
580 * com.sun.parser.LexicalEventListener
581 */
582 public void startCDATA() throws SAXException {
583 }
584
585 /**
586 * This method is called when the SAX parser encounts the end of a CDATA
587 * section.
588 * It works only if the XmlDocumentHandler implements a
589 * com.sun.parser.LexicalEventListener
590 */
591 public void endCDATA() throws SAXException {
592 }
593
594 /**
595 * This method is called when the SAX parser encounts a parsed Entity
596 * It works only if the XmlDocumentHandler implements a
597 * com.sun.parser.LexicalEventListener
598 */
599 public void startParsedEntity(String name) throws SAXException {
600 }
601
602 /**
603 * This method is called when the SAX parser encounts a parsed entity and
604 * informs the application if that entity was parsed or not
605 * It's working only if the CustomDocumentHandler implements a
606 * com.sun.parser.LexicalEventListener
607 */
608 public void endParsedEntity(String name, boolean included) throws SAXException {
609 }
610
611 //StatusReporter Implementation
612 /**
613 * This methos is called when a listener is registered with this class
614 */
615 public void addStatusListener(StatusListener listener) {
616 myStatusListeners.add(listener);
617 }
618
619 /**
620 * This methos is called when a listener is removed
621 */
622 public void removeStatusListener(StatusListener listener) {
623 myStatusListeners.remove(listener);
624 }
625
626 /**
627 * This methos is called whenever we need to inform the listener about an
628 * event.
629 */
630 protected void fireStatusChangedEvent(String text) {
631 Iterator listenersIter = myStatusListeners.iterator();
632 while (listenersIter.hasNext()) {
633 ((StatusListener) listenersIter.next()).statusChanged(text);
634 }
635 }
636
637 /** This method is a workaround of the java 4 non namespace supporting parser
638 * It receives a qualified name and returns its local name.
639 * For eg. if it receives gate:gateId it will return gateId
640 */
641 private String getMyLocalName(String aQName) {
642 if (aQName == null) {
643 return "";
644 }
645 StringTokenizer strToken = new StringTokenizer(aQName, ":");
646 if (strToken.countTokens() <= 1) {
647 return aQName;
648 }
649 // The nr of tokens is >= than 2
650 // Skip the first token which is the QName
651 strToken.nextToken();
652 return strToken.nextToken();
653 }//getMyLocalName()
654
655 /** Also a workaround for URI identifier. If the QName is gate it will return
656 * GATE's. Otherwhise it will return the empty string
657 */
658 private String getMyURI(String aQName) {
659 if (aQName == null) {
660 return "";
661 }
662 StringTokenizer strToken = new StringTokenizer(aQName, ":");
663 if (strToken.countTokens() <= 1) {
664 return "";
665 }
666 // If first token is "gate" then return GATE's URI
667 if ("gate".equalsIgnoreCase(strToken.nextToken())) {
668 return Gate.URI;
669 }
670 return "";
671 }// getMyURI()
672 // XmlDocumentHandler member data
673 // this constant indicates when to fire the status listener
674 // this listener will add an overhead and we don't want a big overhead
675 // this listener will be callled from ELEMENTS_RATE to ELEMENTS_RATE
676 final static int ELEMENTS_RATE = 128;
677 // this map contains the elements name that we want to create
678 // if it's null all the elements from the XML documents will be transformed
679 // into Gate annotation objects otherwise only the elements it contains will
680 // be transformed
681 private Map markupElementsMap = null;
682 // this map contains the string that we want to insert iside the document
683 // content, when a certain element is found
684 // if the map is null then no string is added
685 private Map element2StringMap = null;
686 /**This object inducates what to do when the parser encounts an error*/
687 private SimpleErrorHandler _seh = new SimpleErrorHandler();
688 /**The content of the XML document, without any tag for internal use*/
689 private StringBuffer tmpDocContent = null;
690 /**A stack used to remember elements and to keep the order */
691 private java.util.Stack stack = null;
692 /**A gate document */
693 private gate.Document doc = null;
694 /**An annotation set used for creating annotation reffering the doc */
695 private gate.AnnotationSet basicAS = null;
696 /**Listeners for status report */
697 protected List myStatusListeners = new LinkedList();
698 /**This reports the the number of elements that have beed processed so far*/
699 private int elements = 0;
700 /** We need a colection to retain all the CustomObjects that will be
701 * transformed into annotation over the gate document...
702 * the transformation will take place inside onDocumentEnd() method
703 */
704 private LinkedList colector = null;
705 /** This is used to generate unique Ids for the CustomObjects read*/
706 protected int customObjectsId = 0;
707
708 /** Accesor method for the customObjectsId field*/
709 public int getCustomObjectsId() {
710 return customObjectsId;
711 }
712
713 //////// INNER CLASS
714 /**
715 * The objects belonging to this class are used inside the stack.
716 * This class is for internal needs
717 */
718 class CustomObject implements Comparable {
719
720 // constructor
721 public CustomObject(Integer anId, String anElemName, FeatureMap aFm,
722 Long aStart, Long anEnd) {
723 elemName = anElemName;
724 fm = aFm;
725 start = aStart;
726 end = anEnd;
727 if (anId == null) {
728 id = new Integer(customObjectsId++);
729 } else {
730 id = anId;
731 if (customObjectsId <= anId.intValue()) {
732 customObjectsId = anId.intValue() + 1;
733 }
734 }// End if
735 }// End CustomObject()
736
737 // Methos implemented as required by Comparable interface
738 public int compareTo(Object o) {
739 CustomObject obj = (CustomObject) o;
740 return this.id.compareTo(obj.getId());
741 }// compareTo();
742
743 // accesor
744 public String getElemName() {
745 return elemName;
746 }// getElemName()
747
748 public FeatureMap getFM() {
749 return fm;
750 }// getFM()
751
752 public Long getStart() {
753 return start;
754 }// getStart()
755
756 public Long getEnd() {
757 return end;
758 }// getEnd()
759
760 public Integer getId() {
761 return id;
762 }
763
764 // mutator
765 public void setElemName(String anElemName) {
766 elemName = anElemName;
767 }// getElemName()
768
769 public void setFM(FeatureMap aFm) {
770 fm = aFm;
771 }// setFM();
772
773 public void setStart(Long aStart) {
774 start = aStart;
775 }// setStart();
776
777 public void setEnd(Long anEnd) {
778 end = anEnd;
779 }// setEnd();
780 // data fields
781 private String elemName = null;
782 private FeatureMap fm = null;
783 private Long start = null;
784 private Long end = null;
785 private Integer id = null;
786 } // End inner class CustomObject
787 } //XmlDocumentHandler
788
789
|