0001 /*
0002 * DatabaseDocumentImpl.java
0003 *
0004 * Copyright (c) 1995-2010, The University of Sheffield. See the file
0005 * COPYRIGHT.txt in the software or at http://gate.ac.uk/gate/COPYRIGHT.txt
0006 *
0007 * This file is part of GATE (see http://gate.ac.uk/), and is free
0008 * software, licenced under the GNU Library General Public License,
0009 * Version 2, June 1991 (in the distribution as file licence.html,
0010 * and also available at http://gate.ac.uk/gate/licence.html).
0011 *
0012 * Marin Dimitrov, 16/Oct/2001
0013 *
0014 * $Id: DatabaseDocumentImpl.java 12006 2009-12-01 17:24:28Z thomas_heitz $
0015 */
0016
0017 package gate.corpora;
0018
0019
0020 import java.io.IOException;
0021 import java.net.URL;
0022 import java.sql.*;
0023 import java.util.*;
0024
0025 import junit.framework.Assert;
0026 import oracle.jdbc.driver.OraclePreparedStatement;
0027
0028 import gate.*;
0029 import gate.annotation.AnnotationSetImpl;
0030 import gate.annotation.DatabaseAnnotationSetImpl;
0031 import gate.creole.ResourceInstantiationException;
0032 import gate.event.*;
0033 import gate.persist.*;
0034 import gate.util.*;
0035
0036 public class DatabaseDocumentImpl extends DocumentImpl
0037 implements //DatastoreListener,
0038 //Document,
0039 EventAwareDocument {
0040
0041 private static final boolean DEBUG = false;
0042
0043 private boolean isContentRead;
0044 private Object contentLock;
0045 private Connection jdbcConn;
0046 private String jdbcSchema;
0047 protected int dbType;
0048
0049 private boolean contentChanged;
0050 private boolean featuresChanged;
0051 private boolean nameChanged;
0052 private boolean documentChanged;
0053
0054 private Collection removedAnotationSets;
0055 private Collection addedAnotationSets;
0056
0057 private Document parentDocument;
0058 private int maxAnnotationId;
0059
0060 /**
0061 * The listener for the events coming from the features.
0062 */
0063 protected EventsHandler eventHandler;
0064
0065
0066 public DatabaseDocumentImpl() {
0067
0068 //super();
0069 contentLock = new Object();
0070
0071 this.namedAnnotSets = new HashMap();
0072 // this.defaultAnnots = new DatabaseAnnotationSetImpl(this);
0073
0074 this.isContentRead = false;
0075
0076 this.contentChanged = false;
0077 this.featuresChanged = false;
0078 this.nameChanged = false;
0079 this.documentChanged = false;
0080
0081 this.removedAnotationSets = new Vector();
0082 this.addedAnotationSets = new Vector();
0083
0084 parentDocument = null;
0085 }
0086
0087 private void setDatabaseInfo(Connection conn)
0088 throws PersistenceException {
0089
0090 String url = null;
0091
0092 try {
0093 url = conn.getMetaData().getURL();
0094 }
0095 catch(SQLException sqle) {
0096 throw new PersistenceException("cannot get jdbc metadata: ["+sqle.getMessage()+"]");
0097 }
0098
0099 this.jdbcSchema = DBHelper.getSchemaPrefix(url);
0100 this.dbType = DBHelper.getDatabaseType(url);
0101 Assert.assertNotNull(this.jdbcSchema);
0102 Assert.assertTrue(this.dbType == DBHelper.ORACLE_DB ||
0103 this.dbType == DBHelper.POSTGRES_DB);
0104
0105 }
0106
0107
0108 public DatabaseDocumentImpl(Connection conn)
0109 throws PersistenceException {
0110
0111 //super();
0112 contentLock = new Object();
0113
0114 this.namedAnnotSets = new HashMap();
0115 // this.defaultAnnots = new DatabaseAnnotationSetImpl(this);
0116
0117 this.isContentRead = false;
0118 this.jdbcConn = conn;
0119 setDatabaseInfo(this.jdbcConn);
0120
0121 this.contentChanged = false;
0122 this.featuresChanged = false;
0123 this.nameChanged = false;
0124 this.documentChanged = false;
0125
0126 this.removedAnotationSets = new Vector();
0127 this.addedAnotationSets = new Vector();
0128
0129 parentDocument = null;
0130 }
0131
0132
0133 /* public DatabaseDocumentImpl(Connection _conn,
0134 String _name,
0135 DatabaseDataStore _ds,
0136 Long _persistenceID,
0137 DocumentContent _content,
0138 FeatureMap _features,
0139 Boolean _isMarkupAware,
0140 URL _sourceURL,
0141 Long _urlStartOffset,
0142 Long _urlEndOffset,
0143 AnnotationSet _default,
0144 Map _named) {
0145
0146 //this.jdbcConn = _conn;
0147 this(_conn);
0148
0149 this.name = _name;
0150 this.dataStore = _ds;
0151 this.lrPersistentId = _persistenceID;
0152 this.content = _content;
0153 this.isContentRead = true;
0154 this.features = _features;
0155 this.markupAware = _isMarkupAware;
0156 this.sourceUrl = _sourceURL;
0157 this.sourceUrlStartOffset = _urlStartOffset;
0158 this.sourceUrlEndOffset = _urlEndOffset;
0159
0160 //annotations
0161 //1. default
0162 _setAnnotations(null,_default);
0163
0164 //2. named (if any)
0165 if (null != _named) {
0166 Iterator itNamed = _named.values().iterator();
0167 while (itNamed.hasNext()){
0168 AnnotationSet currSet = (AnnotationSet)itNamed.next();
0169 //add them all to the DBAnnotationSet
0170 _setAnnotations(currSet.getName(),currSet);
0171 }
0172 }
0173
0174 //3. add the listeners for the features
0175 if (eventHandler == null)
0176 eventHandler = new EventsHandler();
0177 this.features.addFeatureMapListener(eventHandler);
0178
0179 //4. add self as listener for the data store, so that we'll know when the DS is
0180 //synced and we'll clear the isXXXChanged flags
0181 this.dataStore.addDatastoreListener(this);
0182 }
0183 */
0184
0185 /** The content of the document: a String for text; MPEG for video; etc. */
0186 public DocumentContent getContent() {
0187
0188 //1. if this is a child document then return the content of the parent resource
0189 if (null != this.parentDocument) {
0190 return this.parentDocument.getContent();
0191 }
0192 else {
0193 //2. assert that no one is reading from DB now
0194 synchronized(this.contentLock) {
0195 if (false == this.isContentRead) {
0196 _readContent();
0197 this.isContentRead = true;
0198 }
0199 }
0200
0201 //return content
0202 return super.getContent();
0203 }
0204 }
0205
0206 private void _readContent() {
0207
0208 //preconditions
0209 if (null == getLRPersistenceId()) {
0210 throw new GateRuntimeException("can't construct a DatabaseDocument - not associated " +
0211 " with any data store");
0212 }
0213
0214 if (false == getLRPersistenceId() instanceof Long) {
0215 throw new GateRuntimeException("can't construct a DatabaseDocument - " +
0216 " invalid persistence ID");
0217 }
0218
0219 Long lrID = (Long)getLRPersistenceId();
0220 //0. preconditions
0221 Assert.assertNotNull(lrID);
0222 Assert.assertTrue(false == this.isContentRead);
0223 Assert.assertNotNull(this.content);
0224
0225 //1. read from DB
0226 PreparedStatement pstmt = null;
0227 ResultSet rs = null;
0228
0229 try {
0230
0231 String sql = " select v1.enc_name, " +
0232 " v1.dc_character_content, " +
0233 " v1.dc_binary_content, " +
0234 " v1.dc_content_type " +
0235 " from "+this.jdbcSchema+"v_content v1 " +
0236 " where v1.lr_id = ? ";
0237 pstmt = this.jdbcConn.prepareStatement(sql);
0238 pstmt.setLong(1,lrID.longValue());
0239 pstmt.execute();
0240 rs = pstmt.getResultSet();
0241
0242 if (false == rs.next()) {
0243 throw new SynchronisationException("empty reault set");
0244 }
0245
0246 if (this.dbType == DBHelper.ORACLE_DB) {
0247
0248 String encoding = rs.getString("enc_name");
0249 if (encoding.equals(DBHelper.DUMMY_ENCODING)) {
0250 //no encoding was specified for this document
0251 encoding = "";
0252 }
0253 Clob clb = rs.getClob("dc_character_content");
0254 Blob blb = rs.getBlob("dc_binary_content");
0255 long contentType = rs.getLong("dc_content_type");
0256
0257 //binary documents are not supported yet
0258 Assert.assertTrue(DBHelper.CHARACTER_CONTENT == contentType ||
0259 DBHelper.EMPTY_CONTENT == contentType);
0260
0261 StringBuffer buff = new StringBuffer();
0262 OracleDataStore.readCLOB(clb,buff);
0263
0264 //2. set data members that were not previously initialized
0265 this.encoding = encoding;
0266
0267 //be aware than document content may be empty
0268 if (null != buff) {
0269 this.content = new DocumentContentImpl(buff.toString());
0270 }
0271 else {
0272 this.content = new DocumentContentImpl();
0273 }
0274
0275 }
0276
0277 else if (this.dbType == DBHelper.POSTGRES_DB) {
0278
0279 String encoding = rs.getString("enc_name");
0280 if (encoding.equals(DBHelper.DUMMY_ENCODING)) {
0281 //no encoding was specified for this document
0282 encoding = "";
0283 }
0284
0285 String content = rs.getString("dc_character_content");
0286 long contentType = rs.getLong("dc_content_type");
0287
0288 //binary documents are not supported yet
0289 Assert.assertTrue(DBHelper.CHARACTER_CONTENT == contentType ||
0290 DBHelper.EMPTY_CONTENT == contentType);
0291
0292 //2. set data members that were not previously initialized
0293
0294 this.encoding = encoding;
0295
0296 //be aware than document content may be empty
0297 if (null != content) {
0298 this.content = new DocumentContentImpl(content);
0299 }
0300 else {
0301 this.content = new DocumentContentImpl();
0302 }
0303 }
0304 else {
0305 Assert.fail();
0306 }
0307 }
0308 catch(SQLException sqle) {
0309 throw new SynchronisationException("can't read content from DB: ["+ sqle.getMessage()+"]");
0310 }
0311 catch(IOException ioe) {
0312 throw new SynchronisationException(ioe);
0313 }
0314 finally {
0315 try {
0316 DBHelper.cleanup(rs);
0317 DBHelper.cleanup(pstmt);
0318 }
0319 catch(PersistenceException pe) {
0320 throw new SynchronisationException("JDBC error: ["+ pe.getMessage()+"]");
0321 }
0322 }
0323 }
0324
0325
0326 /** Get the encoding of the document content source */
0327 public String getEncoding() {
0328
0329 //1. assert that no one is reading from DB now
0330 synchronized(this.contentLock) {
0331 if (false == this.isContentRead) {
0332 _readContent();
0333
0334 this.isContentRead = true;
0335 }
0336 }
0337
0338 return super.getEncoding();
0339 }
0340
0341 /** Returns a map with the named annotation sets. It returns <code>null</code>
0342 * if no named annotaton set exists. */
0343 public Map<String, AnnotationSet> getNamedAnnotationSets() {
0344
0345 Vector annNames = new Vector();
0346
0347 PreparedStatement pstmt = null;
0348 ResultSet rs = null;
0349
0350 //1. get the names of all sets
0351 try {
0352 String sql = " select as_name " +
0353 " from "+this.jdbcSchema+"v_annotation_set " +
0354 " where lr_id = ? " +
0355 " and as_name is not null";
0356
0357 pstmt = this.jdbcConn.prepareStatement(sql);
0358 pstmt.setLong(1,((Long)this.lrPersistentId).longValue());
0359 pstmt.execute();
0360 rs = pstmt.getResultSet();
0361
0362 while (rs.next()) {
0363 annNames.add(rs.getString("as_name"));
0364 }
0365 }
0366 catch(SQLException sqle) {
0367 throw new SynchronisationException("can't get named annotatios: ["+ sqle.getMessage()+"]");
0368 }
0369 finally {
0370 try {
0371 DBHelper.cleanup(rs);
0372 DBHelper.cleanup(pstmt);
0373 }
0374 catch(PersistenceException pe) {
0375 throw new SynchronisationException("JDBC error: ["+ pe.getMessage()+"]");
0376 }
0377 }
0378
0379 //2. read annotations
0380 for (int i=0; i< annNames.size(); i++) {
0381 //delegate because of the data is already read getAnnotations() will just return
0382 getAnnotations((String)annNames.elementAt(i));
0383 }
0384
0385 //3. delegate to the parent method
0386 return super.getNamedAnnotationSets();
0387
0388 } // getNamedAnnotationSets
0389
0390
0391 /** Get the default set of annotations. The set is created if it
0392 * doesn't exist yet.
0393 */
0394 public AnnotationSet getAnnotations() {
0395
0396 //1. read from DB
0397 _getAnnotations(null);
0398
0399 //2. is there such set in the DB?
0400 if (null == this.defaultAnnots) {
0401 //create a DatabaseAnnotationSetImpl
0402 //NOTE: we create the set and then delegate to the super mehtod, otherwise
0403 //the super mehtod will create AnnotationSetImpl instead of DatabaseAnnotationSetImpl
0404 //which will not work with DatabaseDocumentImpl
0405 AnnotationSet aset = new DatabaseAnnotationSetImpl(this);
0406
0407 //set internal member
0408 this.defaultAnnots = aset;
0409
0410 //3. fire events
0411 fireAnnotationSetAdded(new DocumentEvent(this,
0412 DocumentEvent.ANNOTATION_SET_ADDED,
0413 null));
0414 }
0415
0416 //4. delegate
0417 return super.getAnnotations();
0418 } // getAnnotations()
0419
0420
0421 /** Get a named set of annotations. Creates a new set if one with this
0422 * name doesn't exist yet.
0423 * If the provided name is null then it returns the default annotation set.
0424 */
0425 public AnnotationSet getAnnotations(String name) {
0426
0427 if(name == null || "".equals(name)) return getAnnotations();
0428 //0. preconditions
0429 Assert.assertNotNull(name);
0430
0431 //1. read from DB if the set is there at all
0432 _getAnnotations(name);
0433
0434 //2. is there such set in the DB?
0435 if (false == this.namedAnnotSets.keySet().contains(name)) {
0436 //create a DatabaseAnnotationSetImpl
0437 //NOTE: we create the set and then delegate to the super mehtod, otherwise
0438 //the super mehtod will create AnnotationSetImpl instead of DatabaseAnnotationSetImpl
0439 //which will not work with DatabaseDocumentImpl
0440 AnnotationSet aset = new DatabaseAnnotationSetImpl(this,name);
0441
0442 //add to internal collection
0443 this.namedAnnotSets.put(name,aset);
0444
0445 //add the set name to the list with the recently created sets
0446 this.addedAnotationSets.add(name);
0447
0448 //3. fire events
0449 DocumentEvent evt = new DocumentEvent(this, DocumentEvent.ANNOTATION_SET_ADDED, name);
0450 fireAnnotationSetAdded(evt);
0451 }
0452
0453 //3. delegate
0454 return super.getAnnotations(name);
0455 }
0456
0457
0458 private void _getAnnotations(String name) {
0459
0460 AnnotationSet as = null;
0461
0462 //preconditions
0463 if (null == getLRPersistenceId()) {
0464 throw new GateRuntimeException("can't construct a DatabaseDocument - not associated " +
0465 " with any data store");
0466 }
0467
0468 if (false == getLRPersistenceId() instanceof Long) {
0469 throw new GateRuntimeException("can't construct a DatabaseDocument - " +
0470 " invalid persistence ID");
0471 }
0472
0473 //have we already read this set?
0474
0475 if (null == name) {
0476 //default set
0477 if (this.defaultAnnots != null) {
0478 //the default set is alredy read - do nothing
0479 //super methods will take care
0480 return;
0481 }
0482 }
0483 else {
0484 //named set
0485 if (this.namedAnnotSets.containsKey(name)) {
0486 //we've already read it - do nothing
0487 //super methods will take care
0488 return;
0489 }
0490 }
0491
0492 Long lrID = (Long)getLRPersistenceId();
0493 Long asetID = null;
0494 //0. preconditions
0495 Assert.assertNotNull(lrID);
0496
0497 //1. read a-set info
0498 PreparedStatement pstmt = null;
0499 ResultSet rs = null;
0500 try {
0501 String sql = " select as_id " +
0502 " from "+this.jdbcSchema+"v_annotation_set " +
0503 " where lr_id = ? ";
0504 //do we have aset name?
0505 String clause = null;
0506 if (null != name) {
0507 clause = " and as_name = ? ";
0508 }
0509 else {
0510 clause = " and as_name is null ";
0511 }
0512 sql = sql + clause;
0513
0514 pstmt = this.jdbcConn.prepareStatement(sql);
0515 pstmt.setLong(1,lrID.longValue());
0516 if (null != name) {
0517 pstmt.setString(2,name);
0518 }
0519 pstmt.execute();
0520 rs = pstmt.getResultSet();
0521
0522 if (rs.next()) {
0523 //ok, there is such aset in the DB
0524 asetID = new Long(rs.getLong(1));
0525 }
0526 else {
0527 //wow, there is no such aset, so create new ...
0528 //... by delegating to the super method
0529 return;
0530 }
0531
0532 //1.5 cleanup
0533 DBHelper.cleanup(rs);
0534 DBHelper.cleanup(pstmt);
0535
0536 //2. read annotation Features
0537 HashMap featuresByAnnotationID = _readFeatures(asetID);
0538
0539 //3. read annotations
0540 AnnotationSetImpl transSet = new AnnotationSetImpl(this);
0541
0542 String hint;
0543
0544 if (this.dbType == DBHelper.ORACLE_DB) {
0545 hint = "/*+ use_nl(v.t_annotation v.t_as_annotation) " +
0546 " use_nl(v.t_annotation_type v.t_annotation) "+
0547 " */";
0548 }
0549 else {
0550 hint = "";
0551 }
0552
0553 String sql1 = " select "+hint+
0554 " ann_local_id, " +
0555 " at_name, " +
0556 " start_offset, " +
0557 " end_offset " +
0558 " from "+this.jdbcSchema+"v_annotation v" +
0559 " where asann_as_id = ? ";
0560
0561 if (DEBUG) Out.println(">>>>> asetID=["+asetID+"]");
0562
0563 pstmt = this.jdbcConn.prepareStatement(sql1);
0564 pstmt.setLong(1,asetID.longValue());
0565
0566 if (this.dbType == DBHelper.ORACLE_DB) {
0567 ((OraclePreparedStatement)pstmt).setRowPrefetch(DBHelper.CHINK_SIZE_LARGE);
0568 }
0569 pstmt.execute();
0570 rs = pstmt.getResultSet();
0571
0572 while (rs.next()) {
0573 //1. read data memebers
0574 Integer annID = new Integer(rs.getInt(1));
0575 String type = rs.getString(2);
0576 Long startOffset = new Long(rs.getLong(3));
0577 Long endOffset = new Long(rs.getLong(4));
0578
0579 if (DEBUG) Out.println("ann_local_id=["+annID+"]");
0580 if (DEBUG) Out.println("start_off=["+startOffset+"]");
0581 if (DEBUG) Out.println("end_off=["+endOffset+"]");
0582
0583 //2. get the features
0584 FeatureMap fm = (FeatureMap)featuresByAnnotationID.get(annID);
0585 //fm should NOT be null
0586 if (null == fm) {
0587 fm = new SimpleFeatureMapImpl();
0588 }
0589
0590 //3. add to annotation set
0591 transSet.add(annID,startOffset,endOffset,type,fm);
0592 }//while
0593
0594 //1.5, create a-set
0595 if (null == name) {
0596 as = new DatabaseAnnotationSetImpl(this, transSet);
0597 }
0598 else {
0599 as = new DatabaseAnnotationSetImpl(this,name, transSet);
0600 }
0601 }
0602 catch(SQLException sqle) {
0603 throw new SynchronisationException("can't read annotations from DB: ["+ sqle.getMessage()+"]");
0604 }
0605 catch(InvalidOffsetException oe) {
0606 throw new SynchronisationException(oe);
0607 }
0608 catch(PersistenceException pe) {
0609 throw new SynchronisationException("JDBC error: ["+ pe.getMessage()+"]");
0610 }
0611 finally {
0612 try {
0613 DBHelper.cleanup(rs);
0614 DBHelper.cleanup(pstmt);
0615 }
0616 catch(PersistenceException pe) {
0617 throw new SynchronisationException("JDBC error: ["+ pe.getMessage()+"]");
0618 }
0619 }
0620
0621
0622 //4. update internal data members
0623 if (name == null) {
0624 //default as
0625 this.defaultAnnots = as;
0626 }
0627 else {
0628 //named as
0629 this.namedAnnotSets.put(name,as);
0630 }
0631
0632 //don't return the new aset, the super method will take care
0633 return;
0634 }
0635
0636
0637
0638
0639 private HashMap _readFeatures(Long asetID) {
0640
0641 PreparedStatement pstmt = null;
0642 ResultSet rs = null;
0643
0644 //1
0645 String prevKey = DBHelper.DUMMY_FEATURE_KEY;
0646 String currKey = null;
0647
0648 Integer prevAnnID = null;
0649 Integer currAnnID = null;
0650
0651 Object currFeatureValue = null;
0652 Vector currFeatureArray = new Vector();
0653
0654 HashMap currFeatures = new HashMap();
0655 FeatureMap annFeatures = null;
0656
0657 HashMap featuresByAnnotID = new HashMap();
0658
0659 //2. read the features from DB
0660
0661 try {
0662
0663 if (this.dbType == DBHelper.ORACLE_DB) {
0664 String sql = " select /*+ use_nl(v.t_annotation v.t_as_annotation) "+
0665 " use_nl(v.t_feature v.t_annotation) "+
0666 " index(v.t_feature xt_feature_01) "+
0667 " use_nl(v.t_feature_key v.t_feature) "+
0668 " full(v.t_feature_key) "+
0669 " */ "+
0670 " " +
0671 " ann_local_id, " +
0672 " key, " +
0673 " ft_value_type, " +
0674 " ft_number_value, " +
0675 " ft_character_value, " +
0676 " ft_long_character_value, " +
0677 " ft_binary_value " +
0678 " from "+this.jdbcSchema+"v_annotation_features v" +
0679 " where set_id = ? " +
0680 " order by ann_local_id,key ";
0681
0682 pstmt = this.jdbcConn.prepareStatement(sql);
0683 pstmt.setLong(1,asetID.longValue());
0684 ((OraclePreparedStatement)pstmt).setRowPrefetch(DBHelper.CHINK_SIZE_LARGE);
0685 pstmt.execute();
0686 rs = pstmt.getResultSet();
0687 }
0688
0689 else if (this.dbType == DBHelper.POSTGRES_DB) {
0690
0691 String sql = " select " +
0692 " ann_local_id, " +
0693 " key, " +
0694 " ft_value_type, " +
0695 " ft_int_value, " +
0696 " ft_float_value, " +
0697 " ft_character_value, " +
0698 " ft_binary_value " +
0699 " from "+this.jdbcSchema+"v_annotation_features " +
0700 " where set_id = ? " +
0701 " order by ann_local_id,key ";
0702
0703 pstmt = this.jdbcConn.prepareStatement(sql);
0704 pstmt.setLong(1,asetID.longValue());
0705 pstmt.execute();
0706 rs = pstmt.getResultSet();
0707 }
0708
0709 else {
0710 Assert.fail();
0711 }
0712
0713 while (rs.next()) {
0714 //NOTE: because there are LOBs in the resulset
0715 //the columns should be read in the order they appear
0716 //in the query
0717
0718 prevAnnID = currAnnID;
0719 currAnnID = new Integer(rs.getInt("ann_local_id"));
0720
0721 //2.1 is this a new Annotation?
0722 if (!currAnnID.equals(prevAnnID) && prevAnnID != null) {
0723 //new one
0724 //2.1.1 normalize the hashmap with the features, and add
0725 //the elements into a new FeatureMap
0726 annFeatures = new SimpleFeatureMapImpl();
0727 Set entries = currFeatures.entrySet();
0728 Iterator itFeatureArrays = entries.iterator();
0729
0730 while(itFeatureArrays.hasNext()) {
0731 Map.Entry currEntry = (Map.Entry)itFeatureArrays.next();
0732 String key = (String)currEntry.getKey();
0733 Vector val = (Vector)currEntry.getValue();
0734
0735 //add to feature map normalized array
0736 Assert.assertTrue(val.size() >= 1);
0737
0738 if (val.size() == 1) {
0739 //the single elemnt of the array
0740 annFeatures.put(key,val.firstElement());
0741 }
0742 else {
0743 //the whole array
0744 annFeatures.put(key,val);
0745 }
0746 }//while
0747
0748 //2.1.2. add the featuremap for this annotation to the hashmap
0749 featuresByAnnotID.put(prevAnnID,annFeatures);
0750 //2.1.3. clear temp hashtable with feature vectors
0751 currFeatures.clear();
0752 /*??*/ prevAnnID = currAnnID;
0753 }//if -- is new annotation
0754
0755 currKey = rs.getString("key");
0756 Long valueType = new Long(rs.getLong("ft_value_type"));
0757
0758 //we don't quite know what is the type of the NUMBER
0759 //stored in DB
0760 Object numberValue = null;
0761
0762 //for all numeric types + boolean -> read from DB as appropriate
0763 //Java object
0764 switch(valueType.intValue()) {
0765
0766 case DBHelper.VALUE_TYPE_BOOLEAN:
0767
0768 if (this.dbType == DBHelper.ORACLE_DB) {
0769 numberValue = new Boolean(rs.getBoolean("ft_number_value"));
0770 }
0771 else if (this.dbType == DBHelper.POSTGRES_DB){
0772 numberValue = new Boolean(rs.getBoolean("ft_int_value"));
0773 }
0774 else {
0775 Assert.fail();
0776 }
0777
0778 break;
0779
0780
0781 case DBHelper.VALUE_TYPE_FLOAT:
0782
0783 if (this.dbType == DBHelper.ORACLE_DB) {
0784 numberValue = new Float(rs.getFloat("ft_number_value"));
0785 }
0786 else if (this.dbType == DBHelper.POSTGRES_DB){
0787 numberValue = new Float(rs.getFloat("ft_float_value"));
0788 }
0789 else {
0790 Assert.fail();
0791 }
0792
0793 break;
0794
0795 case DBHelper.VALUE_TYPE_INTEGER:
0796
0797 if (this.dbType == DBHelper.ORACLE_DB) {
0798 numberValue = new Integer(rs.getInt("ft_number_value"));
0799 }
0800 else if (this.dbType == DBHelper.POSTGRES_DB){
0801 numberValue = new Integer(rs.getInt("ft_int_value"));
0802 }
0803 else {
0804 Assert.fail();
0805 }
0806
0807 break;
0808
0809 case DBHelper.VALUE_TYPE_LONG:
0810
0811 if (this.dbType == DBHelper.ORACLE_DB) {
0812 numberValue = new Long(rs.getLong("ft_number_value"));
0813 }
0814 else if (this.dbType == DBHelper.POSTGRES_DB){
0815 numberValue = new Long(rs.getLong("ft_int_value"));
0816 }
0817 else {
0818 Assert.fail();
0819 }
0820
0821 break;
0822
0823 default:
0824 //do nothing, will be handled in the next switch statement
0825 }
0826
0827 //don't forget to read the rest of the current row
0828 String stringValue = rs.getString("ft_character_value");
0829 Clob clobValue = null;
0830 Blob blobValue = null;
0831
0832 if (this.dbType == DBHelper.ORACLE_DB) {
0833 clobValue = rs.getClob("ft_long_character_value");
0834 blobValue = rs.getBlob("ft_binary_value");
0835 }
0836
0837 switch(valueType.intValue()) {
0838
0839 case DBHelper.VALUE_TYPE_NULL:
0840 currFeatureValue = null;
0841 break;
0842
0843 case DBHelper.VALUE_TYPE_BINARY:
0844 throw new MethodNotImplementedException();
0845
0846 case DBHelper.VALUE_TYPE_BOOLEAN:
0847 case DBHelper.VALUE_TYPE_FLOAT:
0848 case DBHelper.VALUE_TYPE_INTEGER:
0849 case DBHelper.VALUE_TYPE_LONG:
0850 currFeatureValue = numberValue;
0851 break;
0852
0853 case DBHelper.VALUE_TYPE_STRING:
0854
0855 if (this.dbType == DBHelper.ORACLE_DB && null == stringValue) {
0856 //this one is tricky too
0857 //if the string is < 4000 bytes long then it's stored as varchar2
0858 //otherwise as CLOB
0859
0860 StringBuffer temp = new StringBuffer();
0861 OracleDataStore.readCLOB(clobValue,temp);
0862 currFeatureValue = temp.toString();
0863 }
0864 else { /* PostgresDB or (Oracle DB + value is stored in varchar column) */
0865 currFeatureValue = stringValue;
0866 }
0867 break;
0868
0869 default:
0870 throw new SynchronisationException("Invalid feature type found in DB, value is ["+valueType+"]");
0871 }//switch
0872
0873 //ok, we got the key/value pair now
0874 //2.2 is this a new feature key?
0875 if (false == currFeatures.containsKey(currKey)) {
0876 //new key
0877 Vector keyValue = new Vector();
0878 keyValue.add(currFeatureValue);
0879 currFeatures.put(currKey,keyValue);
0880 }
0881 else {
0882 //key is present, append to existing vector
0883 ((Vector)currFeatures.get(currKey)).add(currFeatureValue);
0884 }
0885
0886 prevKey = currKey;
0887 }//while
0888
0889
0890 //2.3 process the last Annotation left
0891 annFeatures = new SimpleFeatureMapImpl();
0892
0893 Set entries = currFeatures.entrySet();
0894 Iterator itFeatureArrays = entries.iterator();
0895
0896 while(itFeatureArrays.hasNext()) {
0897 Map.Entry currEntry = (Map.Entry)itFeatureArrays.next();
0898 String key = (String)currEntry.getKey();
0899 Vector val = (Vector)currEntry.getValue();
0900
0901 //add to feature map normalized array
0902 Assert.assertTrue(val.size() >= 1);
0903
0904 if (val.size() == 1) {
0905 //the single elemnt of the array
0906 annFeatures.put(key,val.firstElement());
0907 }
0908 else {
0909 //the whole array
0910 annFeatures.put(key,val);
0911 }
0912 }//while
0913
0914 //2.3.1. add the featuremap for this annotation to the hashmap
0915 if (null != currAnnID) {
0916 // do we have features at all for this annotation?
0917 featuresByAnnotID.put(currAnnID,annFeatures);
0918 }
0919
0920 //3. return the hashmap
0921 return featuresByAnnotID;
0922 }
0923 catch(SQLException sqle) {
0924 throw new SynchronisationException("can't read content from DB: ["+ sqle.getMessage()+"]");
0925 }
0926 catch(IOException sqle) {
0927 throw new SynchronisationException("can't read content from DB: ["+ sqle.getMessage()+"]");
0928 }
0929 finally {
0930 try {
0931 DBHelper.cleanup(rs);
0932 DBHelper.cleanup(pstmt);
0933 }
0934 catch(PersistenceException pe) {
0935 throw new SynchronisationException("JDBC error: ["+ pe.getMessage()+"]");
0936 }
0937 }
0938 }
0939
0940
0941 /** Set method for the document content */
0942 public void setContent(DocumentContent content) {
0943
0944 //if the document is a child document then setContent()is prohibited
0945 if (null != this.parentDocument) {
0946 Err.prln("content of document ["+this.name+"] cannot be changed!");
0947 return;
0948 }
0949 else {
0950 super.setContent(content);
0951 this.contentChanged = true;
0952 }
0953 }
0954
0955 /** Set the feature set */
0956 public void setFeatures(FeatureMap features) {
0957 //1. save them first, so we can remove the listener
0958 FeatureMap oldFeatures = this.features;
0959
0960 super.setFeatures(features);
0961
0962 this.featuresChanged = true;
0963
0964 //4. sort out the listeners
0965 if (eventHandler != null)
0966 oldFeatures.removeFeatureMapListener(eventHandler);
0967 else
0968 eventHandler = new EventsHandler();
0969 this.features.addFeatureMapListener(eventHandler);
0970 }
0971
0972 /** Sets the name of this resource*/
0973 public void setName(String name){
0974 super.setName(name);
0975
0976 this.nameChanged = true;
0977 }
0978
0979
0980 private List getAnnotationsForOffset(AnnotationSet aDumpAnnotSet,Long offset){
0981 throw new MethodNotImplementedException();
0982 }
0983
0984
0985 public void setNextNodeId(int nextID){
0986 Assert.assertTrue(nextID >= 0);
0987 this.nextNodeId = nextID;
0988 }
0989
0990
0991 public boolean isResourceChanged(int changeType) {
0992
0993 switch(changeType) {
0994
0995 case EventAwareLanguageResource.DOC_CONTENT:
0996 return this.contentChanged;
0997 case EventAwareLanguageResource.RES_FEATURES:
0998 return this.featuresChanged;
0999 case EventAwareLanguageResource.RES_NAME:
1000 return this.nameChanged;
1001 case EventAwareLanguageResource.DOC_MAIN:
1002 return this.documentChanged;
1003 default:
1004 throw new IllegalArgumentException();
1005 }
1006
1007 }
1008
1009 private void _setAnnotations(String setName,Collection annotations)
1010 throws InvalidOffsetException {
1011
1012 AnnotationSet tempSet = null;
1013
1014 if (null == setName) {
1015 Assert.assertTrue(null == this.defaultAnnots);
1016 // this.defaultAnnots = new DatabaseAnnotationSetImpl(this,annotations);
1017 tempSet = new DatabaseAnnotationSetImpl(this);
1018 this.defaultAnnots = tempSet;
1019 }
1020 else {
1021 Assert.assertTrue(false == this.namedAnnotSets.containsKey(setName));
1022 // AnnotationSet annSet = new DatabaseAnnotationSetImpl(this,setName,annotations);
1023 tempSet = new DatabaseAnnotationSetImpl(this,setName);
1024 this.namedAnnotSets.put(setName,tempSet);
1025 }
1026
1027 //NOTE - the source aset is not from this document, so we can't use the proper constructor -
1028 //we should iterate all elements from the original aset and create equiva elements in the new aset
1029 Iterator itAnnotations = annotations.iterator();
1030 while (itAnnotations.hasNext()) {
1031 Annotation currAnn = (Annotation)itAnnotations.next();
1032 tempSet.add(currAnn.getId(),
1033 currAnn.getStartNode().getOffset(),
1034 currAnn.getEndNode().getOffset(),
1035 currAnn.getType(),
1036 currAnn.getFeatures());
1037
1038 //adjust the maxAnnotationID
1039 this.maxAnnotationId = (currAnn.getId().intValue() >= this.maxAnnotationId)
1040 ? currAnn.getId().intValue()
1041 : this.maxAnnotationId;
1042 }
1043
1044 }
1045
1046 /** Set method for the document's URL */
1047 public void setSourceUrl(URL sourceUrl) {
1048
1049 this.documentChanged = true;
1050 super.setSourceUrl(sourceUrl);
1051 } // setSourceUrl
1052
1053
1054 /** Documents may be packed within files; in this case an optional pair of
1055 * offsets refer to the location of the document. This method sets the
1056 * end offset.
1057 */
1058 public void setSourceUrlEndOffset(Long sourceUrlEndOffset) {
1059
1060 this.documentChanged = true;
1061 super.setSourceUrlEndOffset(sourceUrlEndOffset);
1062 } // setSourceUrlStartOffset
1063
1064
1065 /** Documents may be packed within files; in this case an optional pair of
1066 * offsets refer to the location of the document. This method sets the
1067 * start offset.
1068 */
1069 public void setSourceUrlStartOffset(Long sourceUrlStartOffset) {
1070
1071 this.documentChanged = true;
1072 super.setSourceUrlStartOffset(sourceUrlStartOffset);
1073 } // setSourceUrlStartOffset
1074
1075 /** Make the document markup-aware. This will trigger the creation
1076 * of a DocumentFormat object at Document initialisation time; the
1077 * DocumentFormat object will unpack the markup in the Document and
1078 * add it as annotations. Documents are <B>not</B> markup-aware by default.
1079 *
1080 * @param newMarkupAware markup awareness status.
1081 */
1082 public void setMarkupAware(Boolean newMarkupAware) {
1083
1084 this.documentChanged = true;
1085 super.setMarkupAware(newMarkupAware);
1086 }
1087
1088 /**
1089 * All the events from the features are handled by
1090 * this inner class.
1091 */
1092 class EventsHandler implements gate.event.FeatureMapListener {
1093 public void featureMapUpdated(){
1094 //tell the document that its features have been updated
1095 featuresChanged = true;
1096 }
1097 }
1098
1099 /**
1100 * Overriden to remove the features listener, when the document is closed.
1101 */
1102 public void cleanup() {
1103
1104 if (eventHandler != null)
1105
1106 this.features.removeFeatureMapListener(eventHandler);
1107 getDataStore().removeDatastoreListener(this);
1108
1109 //unregister annot-sets
1110 if (null != this.defaultAnnots) {
1111 getDataStore().removeDatastoreListener((DatastoreListener)this.defaultAnnots);
1112 }
1113
1114 Set loadedNamedAnnots = this.namedAnnotSets.entrySet();
1115 Iterator it = loadedNamedAnnots.iterator();
1116 while (it.hasNext()) {
1117 Map.Entry currEntry = (Map.Entry)it.next();
1118 AnnotationSet currSet = (AnnotationSet)currEntry.getValue();
1119 //unregister
1120 getDataStore().removeDatastoreListener((DatastoreListener)currSet);
1121 }
1122
1123 super.cleanup();
1124 }///inner class EventsHandler
1125
1126
1127 /**
1128 * Called by a datastore when a new resource has been adopted
1129 */
1130 public void resourceAdopted(DatastoreEvent evt){
1131 }
1132
1133 /**
1134 * Called by a datastore when a resource has been deleted
1135 */
1136 public void resourceDeleted(DatastoreEvent evt){
1137
1138 Assert.assertNotNull(evt);
1139 Assert.assertNotNull(evt.getResourceID());
1140
1141 //unregister self as listener from the DataStore
1142 if (evt.getResourceID().equals(this.getLRPersistenceId())) {
1143
1144 //someone deleted this document
1145 getDataStore().removeDatastoreListener(this);
1146
1147 //unregister annot-sets
1148 if (null != this.defaultAnnots) {
1149 getDataStore().removeDatastoreListener((DatastoreListener)this.defaultAnnots);
1150 }
1151
1152 Set loadedNamedAnnots = this.namedAnnotSets.entrySet();
1153 Iterator it = loadedNamedAnnots.iterator();
1154 while (it.hasNext()) {
1155 Map.Entry currEntry = (Map.Entry)it.next();
1156 AnnotationSet currSet = (AnnotationSet)currEntry.getValue();
1157 //unregister
1158 getDataStore().removeDatastoreListener((DatastoreListener)currSet);
1159 }
1160 }
1161 }//resourceDeleted
1162
1163 /**
1164 * Called by a datastore when a resource has been wrote into the datastore
1165 */
1166 public void resourceWritten(DatastoreEvent evt){
1167
1168 Assert.assertNotNull(evt);
1169 Assert.assertNotNull(evt.getResourceID());
1170
1171 //is the event for us?
1172 if (evt.getResourceID().equals(this.getLRPersistenceId())) {
1173 //wow, the event is for me
1174 //clear all flags, the content is synced with the DB
1175 this.contentChanged =
1176 this.documentChanged =
1177 this.featuresChanged =
1178 this.nameChanged = false;
1179
1180 this.removedAnotationSets.clear();
1181 this.addedAnotationSets.clear();
1182 }
1183
1184
1185 }
1186
1187 public Collection getLoadedAnnotationSets() {
1188
1189 //never return the data member - return a clone
1190 Assert.assertNotNull(this.namedAnnotSets);
1191 Vector result = new Vector(this.namedAnnotSets.values());
1192 if (null != this.defaultAnnots) {
1193 result.add(this.defaultAnnots);
1194 }
1195
1196 return result;
1197 }
1198
1199
1200 public Collection getRemovedAnnotationSets() {
1201
1202 //return a clone
1203 return new Vector(this.removedAnotationSets);
1204 }
1205
1206 public Collection getAddedAnnotationSets() {
1207
1208 //return a clone
1209 return new Vector(this.addedAnotationSets);
1210 }
1211
1212 public void removeAnnotationSet(String name) {
1213
1214 //1. add to the list of removed a-sets
1215 this.removedAnotationSets.add(name);
1216
1217 //if the set was read from the DB then it is registered as datastore listener and ...
1218 //there may be chnges in it
1219 //NOTE that default set cannot be reoved, so we just ignore it
1220
1221 if (this.namedAnnotSets.keySet().contains(name)) {
1222 //set was loaded
1223 AnnotationSet aset = (AnnotationSet)this.namedAnnotSets.get(name);
1224
1225 Assert.assertNotNull(aset);
1226 Assert.assertTrue(aset instanceof DatabaseAnnotationSetImpl);
1227
1228 //3. unregister it as a DataStoreListener
1229 this.dataStore.removeDatastoreListener((DatastoreListener)aset);
1230 }
1231
1232 //4. delegate
1233 super.removeAnnotationSet(name);
1234 }
1235
1236 /**
1237 * Returns true of an LR has been modified since the last sync.
1238 * Always returns false for transient LRs.
1239 */
1240 public boolean isModified() {
1241 return this.isResourceChanged(EventAwareLanguageResource.DOC_CONTENT) ||
1242 this.isResourceChanged(EventAwareLanguageResource.RES_FEATURES) ||
1243 this.isResourceChanged(EventAwareLanguageResource.RES_NAME) ||
1244 this.isResourceChanged(EventAwareLanguageResource.DOC_MAIN);
1245 }
1246
1247
1248 /**
1249 * Returns the parent LR of this LR.
1250 * Only relevant for LRs that support shadowing. Most do not by default.
1251 */
1252 public LanguageResource getParent()
1253 throws PersistenceException,SecurityException {
1254
1255 return this.parentDocument;
1256 }//getParent
1257
1258 /**
1259 * Sets the parent LR of this LR.
1260 * Only relevant for LRs that support shadowing. Most do not by default.
1261 */
1262 public void setParent(LanguageResource parentLR)
1263 throws PersistenceException,SecurityException {
1264
1265 //0. preconditions
1266 Assert.assertNotNull(parentLR);
1267
1268 if (false == parentLR instanceof DatabaseDocumentImpl) {
1269 throw new IllegalArgumentException("invalid parent resource set");
1270 }
1271
1272 //1.
1273 this.parentDocument = (Document)parentLR;
1274
1275 }//setParent
1276
1277 public void setInitData__$$__(Object data)
1278 throws PersistenceException, InvalidOffsetException {
1279
1280 HashMap initData = (HashMap)data;
1281
1282 this.jdbcConn = (Connection)initData.get("JDBC_CONN");
1283 setDatabaseInfo(this.jdbcConn);
1284 this.dataStore = (DatabaseDataStore)initData.get("DS");
1285 this.lrPersistentId = (Long)initData.get("LR_ID");
1286 this.name = (String)initData.get("DOC_NAME");
1287 this.content = (DocumentContent)initData.get("DOC_CONTENT");
1288 this.isContentRead = true;
1289 this.features = (FeatureMap)initData.get("DOC_FEATURES");
1290 this.markupAware = (Boolean)initData.get("DOC_MARKUP_AWARE");
1291 this.sourceUrl = (URL)initData.get("DOC_SOURCE_URL");
1292 this.sourceUrlStartOffset = (Long)initData.get("DOC_SOURCE_URL_START");
1293 this.sourceUrlEndOffset = (Long)initData.get("DOC_SOURCE_URL_END");
1294 if(initData.containsKey("DOC_STRING_CONTENT"))
1295 this.setStringContent((String)initData.get("DOC_STRING_CONTENT"));
1296
1297
1298 Integer nextNodeID = (Integer)initData.get("DOC_NEXT_NODE_ID");
1299 if (null != nextNodeID) {
1300 this.setNextNodeId(nextNodeID.intValue());
1301 }
1302
1303 Integer nextAnnID = (Integer)initData.get("DOC_NEXT_ANN_ID");
1304 if (null != nextAnnID) {
1305 this.setNextAnnotationId(nextAnnID.intValue());
1306 }
1307
1308 this.parentDocument = (Document)initData.get("PARENT_LR");
1309
1310 //annotations
1311 //1. default
1312 AnnotationSet _default = (AnnotationSet)initData.get("DOC_DEFAULT_ANNOTATIONS");
1313 if (null != _default) {
1314 _setAnnotations(null,_default);
1315 }
1316
1317 //2. named (if any)
1318 Map _named = (Map)initData.get("DOC_NAMED_ANNOTATION_SETS");
1319 if (null != _named) {
1320 Iterator itNamed = _named.values().iterator();
1321 while (itNamed.hasNext()){
1322 AnnotationSet currSet = (AnnotationSet)itNamed.next();
1323 //add them all to the DBAnnotationSet, except the ORIGINAL MARKUPS - handled in the super init()
1324 if (false == currSet.getName().equals(GateConstants.ORIGINAL_MARKUPS_ANNOT_SET_NAME)) {
1325 _setAnnotations(currSet.getName(),currSet);
1326 }
1327 }
1328 }
1329
1330 //3. add the listeners for the features (if any)
1331 if (null != this.features) {
1332 if (eventHandler == null)
1333 eventHandler = new EventsHandler();
1334 this.features.addFeatureMapListener(eventHandler);
1335 }
1336
1337 //4. add self as listener for the data store, so that we'll know when the DS is
1338 //synced and we'll clear the isXXXChanged flags
1339 if (null != this.dataStore) {
1340 this.dataStore.addDatastoreListener(this);
1341 }
1342
1343 }
1344
1345 public Object getInitData__$$__(Object initData) {
1346 return null;
1347 }
1348
1349 /** Initialise this resource, and return it. */
1350 public Resource init() throws ResourceInstantiationException {
1351
1352 Resource result = super.init();
1353
1354 if (this.nextAnnotationId <= this.maxAnnotationId) {
1355 this.nextAnnotationId = this.maxAnnotationId +1;
1356 }
1357
1358 return result;
1359 }
1360
1361 }
|