001 /*
002 * DatabaseCorpusImpl.java
003 *
004 * Copyright (c) 1995-2010, The University of Sheffield. See the file
005 * COPYRIGHT.txt in the software or at http://gate.ac.uk/gate/COPYRIGHT.txt
006 *
007 * This file is part of GATE (see http://gate.ac.uk/), and is free
008 * software, licenced under the GNU Library General Public License,
009 * Version 2, June 1991 (in the distribution as file licence.html,
010 * and also available at http://gate.ac.uk/gate/licence.html).
011 *
012 * Marin Dimitrov, 05/Nov/2001
013 *
014 * $Id: DatabaseCorpusImpl.java 12455 2010-04-08 13:27:56Z ian_roberts $
015 */
016
017 package gate.corpora;
018
019 import java.util.*;
020
021 import junit.framework.Assert;
022
023 import gate.*;
024 import gate.creole.ResourceInstantiationException;
025 import gate.event.*;
026 import gate.persist.*;
027 import gate.security.SecurityInfo;
028 import gate.util.*;
029
030
031 public class DatabaseCorpusImpl extends CorpusImpl
032 implements DatastoreListener,
033 EventAwareCorpus {
034
035 /** Debug flag */
036 private static final boolean DEBUG = false;
037
038 private boolean featuresChanged;
039 private boolean nameChanged;
040 /**
041 * The listener for the events coming from the features.
042 */
043 protected EventsHandler eventHandler;
044 protected List documentData;
045 protected List removedDocuments;
046 protected List addedDocuments;
047
048 public DatabaseCorpusImpl() {
049 super();
050 }
051
052
053 public DatabaseCorpusImpl(String _name,
054 DatabaseDataStore _ds,
055 Long _persistenceID,
056 FeatureMap _features,
057 Vector _dbDocs) {
058
059 super();
060
061 this.name = _name;
062 this.dataStore = _ds;
063 this.lrPersistentId = _persistenceID;
064 this.features = _features;
065 this.documentData = _dbDocs;
066 this.supportList = new ArrayList(this.documentData.size());
067 this.removedDocuments = new ArrayList();
068 //just allocate space for this one, don't initialize it -
069 //invokations of add() will add elements to it
070 this.addedDocuments = new ArrayList();
071
072 //init the document list
073 for (int i=0; i< this.documentData.size(); i++) {
074 this.supportList.add(null);
075 }
076
077 this.featuresChanged = false;
078 this.nameChanged = false;
079
080 //3. add the listeners for the features
081 if (eventHandler == null)
082 eventHandler = new EventsHandler();
083 this.features.addFeatureMapListener(eventHandler);
084
085
086 //4. add self as listener for the data store, so that we'll know when the DS is
087 //synced and we'll clear the isXXXChanged flags
088 this.dataStore.addDatastoreListener(this);
089 }
090
091
092 public boolean add(Object o){
093
094 Assert.assertNotNull(o);
095 boolean result = false;
096
097 //accept only documents
098 if (false == o instanceof Document) {
099 throw new IllegalArgumentException();
100 }
101
102 Document doc = (Document)o;
103
104 //assert docs are either transient or from the same datastore
105 if (isValidForAdoption(doc)) {
106 result = super.add(doc);
107 }
108 else {
109 return false;
110 }
111
112 //add to doc data too
113 /* Was:
114 DocumentData newDocData = new DocumentData(doc.getName(),null);
115 */
116 DocumentData newDocData = new DocumentData(doc.getName(),
117 doc.getLRPersistenceId());
118
119 this.documentData.add(newDocData);
120
121 //add the LRID to the set of newly added documents so that upon sync() a reference
122 // from the doc to the corpus will be added in the database
123 if (null != doc.getLRPersistenceId()) {
124 this.addedDocuments.add(doc.getLRPersistenceId());
125 //Out.prln("adding [" + doc.getLRPersistenceId() + "] to NewlyAddedDocs...");
126 }
127
128 if (result) {
129 fireDocumentAdded(new CorpusEvent(this,
130 doc,
131 this.supportList.size()-1,
132 doc.getLRPersistenceId(),
133 CorpusEvent.DOCUMENT_ADDED));
134 }
135
136 return result;
137 }
138
139
140 public void add(int index, Object element){
141
142 Assert.assertNotNull(element);
143 Assert.assertTrue(index >= 0);
144
145 long collInitialSize = this.supportList.size();
146
147 //accept only documents
148 if (false == element instanceof Document) {
149 throw new IllegalArgumentException();
150 }
151
152 Document doc = (Document)element;
153
154 //assert docs are either transient or from the same datastore
155 if (isValidForAdoption(doc)) {
156 super.add(index,doc);
157
158 //add to doc data too
159 DocumentData newDocData = new DocumentData(doc.getName(),null);
160 this.documentData.add(index,newDocData);
161
162 //add the LRID to the set of newly added documents so that upon sync() a reference
163 // from the doc to the corpus will be added in the database
164 if (null != doc.getLRPersistenceId()) {
165
166 this.addedDocuments.add(doc.getLRPersistenceId());
167 //Out.prln("adding ["+doc.getLRPersistenceId()+"] to NewlyAddedDocs...");
168 }
169
170 //if added then fire event
171 if (this.supportList.size() > collInitialSize) {
172 fireDocumentAdded(new CorpusEvent(this,
173 doc,
174 index,
175 doc.getLRPersistenceId(),
176 CorpusEvent.DOCUMENT_ADDED));
177 }
178 }
179 }
180
181
182
183 public boolean addAll(Collection c){
184
185 boolean collectionChanged = false;
186
187 Iterator it = c.iterator();
188 while (it.hasNext()) {
189 Document doc = (Document)it.next();
190 if (isValidForAdoption(doc)) {
191 collectionChanged |= add(doc);
192 }
193 }
194
195 return collectionChanged;
196 }
197
198
199 public boolean addAll(int index, Collection c){
200
201 Assert.assertTrue(index >=0);
202
203 //funny enough add(index,element) returns void and not boolean
204 //so we can't use it
205 boolean collectionChanged = false;
206 int collInitialSize = this.supportList.size();
207 int currIndex = index;
208
209 Iterator it = c.iterator();
210 while (it.hasNext()) {
211 Document doc = (Document)it.next();
212 if (isValidForAdoption(doc)) {
213 add(currIndex++,doc);
214 }
215 }
216
217 return (this.supportList.size() > collInitialSize);
218 }
219
220
221 private boolean isValidForAdoption(LanguageResource lr) {
222
223 Long lrID = (Long)lr.getLRPersistenceId();
224
225 if (null == lrID ||
226 (this.getDataStore() != null && lr.getDataStore().equals(this.getDataStore()))) {
227 return true;
228 }
229 else {
230 return false;
231 }
232 }
233
234 public void resourceAdopted(DatastoreEvent evt){
235 }
236
237 public void resourceDeleted(DatastoreEvent evt){
238
239 Assert.assertNotNull(evt);
240 Long deletedID = (Long)evt.getResourceID();
241 Assert.assertNotNull(deletedID);
242
243 //unregister self as listener from the DataStore
244 if (deletedID.equals(this.getLRPersistenceId())) {
245 //someone deleted this corpus
246 this.supportList.clear();
247 getDataStore().removeDatastoreListener(this);
248 }
249
250 //check if the ID is of a document the corpus contains
251 Iterator it = this.supportList.iterator();
252 while (it.hasNext()) {
253 Document doc = (Document)it.next();
254 if (doc.getLRPersistenceId().equals(deletedID)) {
255 this.supportList.remove(doc);
256 break;
257 }
258 }
259 }
260
261 public void resourceWritten(DatastoreEvent evt){
262 Assert.assertNotNull(evt);
263 Assert.assertNotNull(evt.getResourceID());
264
265 //is the event for us?
266 if (evt.getResourceID().equals(this.getLRPersistenceId())) {
267 //wow, the event is for me
268 //clear all flags, the content is synced with the DB
269 this.featuresChanged =
270 this.nameChanged = false;
271
272 this.removedDocuments.clear();
273 this.addedDocuments.clear();
274 }
275 }
276
277
278 public void resourceUnloaded(CreoleEvent e) {
279
280 Assert.assertNotNull(e);
281 Assert.assertNotNull(e.getResource());
282
283 Resource res = e.getResource();
284
285 if (res instanceof Document) {
286
287 Document doc = (Document) res;
288
289 if (DEBUG) {
290 Out.prln("resource Unloaded called ");
291 }
292
293 //remove from the corpus too, if a transient one
294 if (null == doc.getLRPersistenceId()) {
295 //@FIXME - not sure we need this
296 super.remove(doc);
297 }
298 else {
299 //unload all occurences
300 //see if we can find it first. If not, then judt return
301 int index = findDocument(doc);
302 if (index == -1) {
303 //not our document
304 return;
305 }
306 else {
307 //3. unload from internal data structures
308
309 //@FIXME - not sure we need this
310 //super.remove(doc);
311
312 //remove from the list of loaded documents
313 //System.out.println("resourceUnloaded called -- removing doc[" + index +"] from supportList...");
314 //WAS: Document oldDoc = (Document)this.supportList.remove(index);
315 this.supportList.set(index, null);
316
317 if (DEBUG)
318 Out.prln("corpus: document " + index + " unloaded and set to null");
319 } //else
320 } //else
321 } //if
322 }
323
324
325 public boolean isResourceChanged(int changeType) {
326
327 switch(changeType) {
328
329 case EventAwareLanguageResource.RES_FEATURES:
330 return this.featuresChanged;
331 case EventAwareLanguageResource.RES_NAME:
332 return this.nameChanged;
333 default:
334 throw new IllegalArgumentException();
335 }
336 }
337
338 /**
339 * Returns true of an LR has been modified since the last sync.
340 * Always returns false for transient LRs.
341 */
342 public boolean isModified() {
343 return this.isResourceChanged(EventAwareLanguageResource.RES_FEATURES) ||
344 this.isResourceChanged(EventAwareLanguageResource.RES_NAME);
345 }
346
347
348
349 /** Sets the name of this resource*/
350 public void setName(String name){
351 super.setName(name);
352
353 this.nameChanged = true;
354 }
355
356
357 /** Set the feature set */
358 public void setFeatures(FeatureMap features) {
359 //1. save them first, so we can remove the listener
360 FeatureMap oldFeatures = this.features;
361
362 super.setFeatures(features);
363
364 this.featuresChanged = true;
365
366 //4. sort out the listeners
367 if (eventHandler != null)
368 oldFeatures.removeFeatureMapListener(eventHandler);
369 else
370 eventHandler = new EventsHandler();
371 this.features.addFeatureMapListener(eventHandler);
372 }
373
374
375 /**
376 * All the events from the features are handled by
377 * this inner class.
378 */
379 class EventsHandler implements gate.event.FeatureMapListener {
380 public void featureMapUpdated(){
381 //tell the document that its features have been updated
382 featuresChanged = true;
383 }
384 }
385
386 /**
387 * Overriden to remove the features listener, when the document is closed.
388 */
389 public void cleanup() {
390 super.cleanup();
391 if (eventHandler != null)
392 this.features.removeFeatureMapListener(eventHandler);
393 }///inner class EventsHandler
394
395
396
397 public void setInitData__$$__(Object data) {
398
399 HashMap initData = (HashMap)data;
400
401 this.name = (String)initData.get("CORP_NAME");
402 this.dataStore = (DatabaseDataStore)initData.get("DS");
403 this.lrPersistentId = (Long)initData.get("LR_ID");
404 this.features = (FeatureMap)initData.get("CORP_FEATURES");
405 this.supportList = new ArrayList((List)initData.get("CORP_SUPPORT_LIST"));
406
407 this.documentData = new ArrayList(this.supportList.size());
408 this.removedDocuments = new ArrayList();
409 this.addedDocuments = new ArrayList();
410
411 //init the documentData list
412 for (int i=0; i< this.supportList.size(); i++) {
413 Document dbDoc = (Document)this.supportList.get(i);
414 DocumentData dd = new DocumentData(dbDoc.getName(),dbDoc.getLRPersistenceId());
415 this.documentData.add(dd);
416 }
417
418 this.featuresChanged = false;
419 this.nameChanged = false;
420
421 //3. add the listeners for the features
422 if (eventHandler == null)
423 eventHandler = new EventsHandler();
424 this.features.addFeatureMapListener(eventHandler);
425
426
427 //4. add self as listener for the data store, so that we'll know when the DS is
428 //synced and we'll clear the isXXXChanged flags
429 this.dataStore.addDatastoreListener(this);
430 }
431
432 public Object getInitData__$$__(Object initData) {
433 return null;
434 }
435
436 /**
437 * Gets the names of the documents in this corpus.
438 * @return a {@link List} of Strings representing the names of the documents
439 * in this corpus.
440 */
441 public List<String> getDocumentNames(){
442
443 List<String> docsNames = new ArrayList<String>();
444
445 if(this.documentData == null)
446 return docsNames;
447
448 for (Object aDocumentData : this.documentData) {
449 DocumentData data = (DocumentData) aDocumentData;
450 docsNames.add(data.getDocumentName());
451 }
452
453 return docsNames;
454 }
455
456
457 /**
458 * Gets the name of a document in this corpus.
459 * @param index the index of the document
460 * @return a String value representing the name of the document at
461 * <tt>index</tt> in this corpus.<P>
462 */
463 public String getDocumentName(int index){
464
465 if (index >= this.documentData.size()) return "No such document";
466
467 return ((DocumentData)this.documentData.get(index)).getDocumentName();
468 }
469
470 /**
471 * returns a document in the coprus by index
472 * @param index the index of the document
473 * @return an Object value representing DatabaseDocumentImpl
474 */
475 public Object get(int index){
476
477 //0. preconditions
478 Assert.assertTrue(index >= 0);
479 Assert.assertTrue(index < this.documentData.size());
480 Assert.assertTrue(index < this.supportList.size());
481 Assert.assertTrue(this.documentData.size() == this.supportList.size());
482
483 if (index >= this.documentData.size())
484 return null;
485
486 Object res = this.supportList.get(index);
487
488 //if the document is null, then I must get it from the database
489 if (null == res) {
490 Long currLRID = (Long)((DocumentData)this.documentData.get(index)).getPersistentID();
491 FeatureMap params = Factory.newFeatureMap();
492 params.put(DataStore.DATASTORE_FEATURE_NAME, this.getDataStore());
493 params.put(DataStore.LR_ID_FEATURE_NAME, currLRID);
494
495 try {
496 Document dbDoc = (Document)Factory.createResource(DBHelper.DOCUMENT_CLASS, params);
497
498 if (DEBUG) {
499 Out.prln("Loaded document :" + dbDoc.getName());
500 }
501
502 //change the result to the newly loaded doc
503 res = dbDoc;
504
505 //finally replace the doc with the instantiated version
506 Assert.assertNull(this.supportList.get(index));
507 this.supportList.set(index, dbDoc);
508 }
509 catch (ResourceInstantiationException ex) {
510 Err.prln("Error reading document inside a serialised corpus.");
511 throw new GateRuntimeException(ex.getMessage());
512 }
513 }
514
515 return res;
516 }
517
518 public Object remove(int index){
519
520 //1. get the persistent id and add it to the removed list
521 DocumentData docData = (DocumentData)this.documentData.get(index);
522 Long removedID = (Long)docData.getPersistentID();
523 // Assert.assertTrue(null != removedID);
524 //removedID may be NULL if the doc is still transient
525
526 //2. add to the list of removed documents but only if it's not newly added
527 //othewrwise just ignore
528 if (null != removedID && false == this.addedDocuments.contains(removedID)) {
529 this.removedDocuments.add(removedID);
530 //Out.prln("adding ["+removedID+"] to RemovedDocs...");
531 }
532
533 //3. delete
534 this.documentData.remove(index);
535 Document res = (Document)this.supportList.remove(index);
536
537 //4. remove the LRID to the set of newly added documents (if there) so that upon sync() a reference
538 // from the doc to the corpus will NOT be added in the database
539 if (this.addedDocuments.contains(removedID)) {
540 this.addedDocuments.remove(removedID);
541 //Out.prln("removing ["+removedID+"] from NewlyAddedDocs...");
542 }
543
544 //5, fire events
545 fireDocumentRemoved(new CorpusEvent(DatabaseCorpusImpl.this,
546 res,
547 index,
548 removedID,
549 CorpusEvent.DOCUMENT_REMOVED));
550 return res;
551
552 }
553
554
555 public boolean remove(Object obj){
556
557 //0. preconditions
558 Assert.assertNotNull(obj);
559 Assert.assertTrue(obj instanceof DatabaseDocumentImpl);
560
561 if (false == obj instanceof Document) {
562 return false;
563 }
564
565 Document doc = (Document) obj;
566
567 //see if we can find it first. If not, then judt return
568 int index = findDocument(doc);
569 if (index == -1) {
570 return false;
571 }
572
573 if(index < this.documentData.size()) {
574 //we found it, so remove it
575
576 //1. get the persistent id and add it to the removed list
577 DocumentData docData = (DocumentData)this.documentData.get(index);
578 Long removedID = (Long)docData.getPersistentID();
579 //Assert.assertTrue(null != removedID);
580 //removed ID may be null - doc is still transient
581
582 //2. add to the list of removed documents
583 if (null != removedID && false == this.addedDocuments.contains(removedID)) {
584 this.removedDocuments.add(removedID);
585 //Out.prln("adding ["+removedID+"] to RemovedDocs...");
586 }
587
588 //3. delete
589 this.documentData.remove(index);
590 Document oldDoc = (Document) this.supportList.remove(index);
591
592 //4. remove the LRID to the set of newly added documents (if there) so that upon sync() a reference
593 // from the doc to the corpus will NOT be added in the database
594 if (this.addedDocuments.contains(removedID)) {
595 this.addedDocuments.remove(removedID);
596 //Out.prln("removing ["+removedID+"] from NewlyAddedDocs...");
597 }
598
599 //5. fire events
600 fireDocumentRemoved(new CorpusEvent(DatabaseCorpusImpl.this,
601 oldDoc,
602 index,
603 removedID,
604 CorpusEvent.DOCUMENT_REMOVED));
605 }
606
607 return true;
608 }
609
610
611 public int findDocument(Document doc) {
612
613 boolean found = false;
614 DocumentData docData = null;
615
616 //first try finding the document in memory
617 int index = this.supportList.indexOf(doc);
618
619 if (index > -1 && index < this.documentData.size()) {
620 return index;
621 }
622
623 //else try finding a document with the same name and persistent ID
624 Iterator iter = this.documentData.iterator();
625
626 for (index = 0; iter.hasNext(); index++) {
627 docData = (DocumentData) iter.next();
628 if (docData.getDocumentName().equals(doc.getName()) &&
629 docData.getPersistentID().equals(doc.getLRPersistenceId())) {
630 found = true;
631 break;
632 }
633 }
634
635 if (found && index < this.documentData.size()) {
636 return index;
637 }
638 else {
639 return -1;
640 }
641 }//findDocument
642
643
644 public boolean contains(Object o){
645 //return true if:
646 // - the document data list contains a document with such a name
647 // and persistent id
648
649 if(false == o instanceof Document)
650 return false;
651
652 int index = findDocument((Document) o);
653
654 if (index < 0) {
655 return false;
656 }
657 else {
658 return true;
659 }
660 }
661
662 public Iterator iterator(){
663 return new DatabaseCorpusIterator(this.documentData);
664 }
665
666 public List getLoadedDocuments() {
667 return new ArrayList(this.supportList);
668 }
669
670 public List getRemovedDocuments() {
671 return new ArrayList(this.removedDocuments);
672 }
673
674 public List getAddedDocuments() {
675 return new ArrayList(this.addedDocuments);
676 }
677
678 private class DatabaseCorpusIterator implements Iterator {
679
680 private Iterator docDataIter;
681 private List docDataList;
682
683 public DatabaseCorpusIterator(List docDataList) {
684 this.docDataList = docDataList;
685 this.docDataIter = this.docDataList.iterator();
686 }
687
688 public boolean hasNext() {
689 return docDataIter.hasNext();
690 }
691
692 public Object next(){
693
694 //try finding a document with the same name and persistent ID
695 DocumentData docData = (DocumentData)docDataIter.next();
696 int index = this.docDataList.indexOf(docData);
697 return DatabaseCorpusImpl.this.get(index);
698 }
699
700 public void remove() {
701 throw new UnsupportedOperationException("DatabaseCorpusImpl does not " +
702 "support remove in the iterators");
703 }
704 }
705
706
707 /**
708 * Unloads the document from memory, but calls sync() first, to store the
709 * changes
710 */
711 public void unloadDocument(int index) {
712
713 //preconditions
714 Assert.assertTrue(index >= 0);
715
716 //1. check whether its been loaded and is a persistent one
717 // if a persistent doc is not loaded, there's nothing we need to do
718 if ( (! isDocumentLoaded(index)) && isPersistentDocument(index)) {
719 return;
720 }
721
722 //2. sync the document before releasing it from memory, because the
723 //creole register garbage collects all LRs which are not used any more
724 Document doc = (Document)this.supportList.get(index);
725 Assert.assertNotNull(doc);
726
727 try {
728
729 //if the document is not already adopted, we need to do that first
730 if (doc.getLRPersistenceId() == null) {
731
732 //3.2 get the security info for the corpus
733 SecurityInfo si = this.getDataStore().getSecurityInfo(this);
734 Document dbDoc = (Document) this.getDataStore().adopt(doc, si);
735 }
736 else {
737 //if it is adopted, just sync it
738 this.getDataStore().sync(doc);
739 }
740
741 //3. remove the document from the memory
742 //do this, only if the saving has succeeded
743 // WAS this.supportList.remove(index);
744 this.supportList.set(index,null);
745 }
746 catch (PersistenceException pex) {
747 throw new GateRuntimeException("Error unloading document from corpus"
748 + "because document sync failed: " + pex.getMessage());
749 }
750 catch (gate.security.SecurityException sex) {
751 throw new GateRuntimeException("Error unloading document from corpus"
752 + "because of document access error: " + sex.getMessage());
753 }
754
755 }
756
757 /**
758 * Unloads a document from memory
759 */
760 public void unloadDocument(Document doc) {
761
762 Assert.assertNotNull(doc);
763
764 //1. determine the index of the document; if not there, do nothing
765 int index = findDocument(doc);
766
767 if (index == -1) {
768 return;
769 }
770
771 unloadDocument(index);
772 }
773
774
775 /**
776 * This method returns true when the document is already loaded in memory
777 */
778 public boolean isDocumentLoaded(int index) {
779
780 //preconditions
781 Assert.assertTrue(index >= 0);
782
783 if (this.supportList == null || this.supportList.isEmpty()) {
784 return false;
785 }
786
787 return this.supportList.get(index) != null;
788 }
789
790 /**
791 * This method returns true when the document is already stored on disk
792 * i.e., is not transient
793 */
794 public boolean isPersistentDocument(int index) {
795
796 //preconditions
797 Assert.assertTrue(index >= 0);
798
799 if (this.supportList == null || this.supportList.isEmpty()) {
800 return false;
801 }
802
803 return (((DocumentData)this.documentData.get(index)).getPersistentID() != null);
804 }
805
806
807 public boolean equals(Object o){
808
809 if (! (o instanceof DatabaseCorpusImpl))
810 return false;
811
812 DatabaseCorpusImpl dbCorp = (DatabaseCorpusImpl)o;
813
814 if (this.getDataStore() != null && this.getDataStore() != dbCorp.getDataStore()) {
815 return false;
816 }
817
818 if (this.getLRPersistenceId() != null && this.getLRPersistenceId() != dbCorp.getLRPersistenceId()) {
819 return false;
820 }
821
822 return supportList.equals(o);
823 }
824
825 /**
826 * DatabaseCorpusImpl does not support duplication.
827 */
828 public Resource duplicate(Factory.DuplicationContext ctx)
829 throws ResourceInstantiationException {
830 throw new ResourceInstantiationException("Duplication of "
831 + this.getClass().getName() + " not permitted");
832 }
833
834
835 }
|