001 /*
002 * PatternValidator.java
003 *
004 * Niraj Aswani, 19/March/07
005 *
006 * $Id: PatternValidator.html,v 1.0 2007/03/19 16:22:01 niraj Exp $
007 */
008 package gate.creole.annic.lucene;
009
010 import java.util.*;
011 import gate.creole.annic.apache.lucene.index.*;
012 import gate.creole.annic.apache.lucene.analysis.*;
013
014 /**
015 * Pattern Validator that given a position of first term, retrieves the
016 * entire pattern from the token stream. If it is not able to retrieve
017 * the entire pattern, the class reports it as an invalid pattern.
018 *
019 * @author niraj
020 *
021 */
022 public class PatternValidator {
023
024 /**
025 * AND operator
026 */
027 private final int AND = 0;
028
029 /**
030 * OR operator
031 */
032 private final int OR = 1;
033
034 /**
035 * Negation operator
036 */
037 private final int NOT = 2;
038
039 private int index = 0;
040
041 private int patLen = 0;
042
043 /**
044 * Gets the length of the pattern.
045 *
046 * @return
047 */
048 public int getPatternLength() {
049 return patLen;
050 }
051
052 /**
053 * This method takes two parameters the actual query issued and
054 * annotations in which it checks if the annotations exist that are
055 * validating for the given query
056 *
057 * @param query String
058 * @param annotations ArrayList
059 * @throws SearchException
060 * @return int positive number indicates the offset of the last
061 * annotation of the pattern. -1 indicates invalid pattern.
062 */
063 public int validate(List<String> queryTokens, List<Token> annotations, int from,
064 QueryParser queryParser) throws gate.creole.ir.SearchException {
065 patLen = 0;
066
067 // and now for each token we need to create Term(s)
068 int enOffset = -1;
069 int stOffset = -1;
070 int position = -1;
071
072 for(int i = 0; i < queryTokens.size(); i++) {
073 queryParser.position = 0;
074 ArrayList[] termpositions = queryParser.createTerms(queryTokens.get(i));
075 ArrayList terms = termpositions[0];
076 ArrayList consider = termpositions[2];
077 // process each term individually
078 for(int k = 0; k < terms.size(); k++) {
079 // when consider is true, that means we should change the start
080 // offset conditions
081 Term term = (Term)terms.get(k);
082 if(((Boolean)consider.get(k)).booleanValue()) {
083 patLen++;
084 // find relavant annotations where type and text should
085 // match with terms type and text
086 boolean found = false;
087 // among this if we are able to find the token that has
088 // start offset > previous enOffset
089 innerLoop: for(int j = from; j < annotations.size(); j++) {
090 Token tk = annotations.get(j);
091 // if the term is equal to one of the tokens
092 if(!isEqual(tk, term)) continue;
093 // the next token with consider must be starting with
094 // the end of last token
095 // or after 1 space
096 if(enOffset == -1 || tk.startOffset() == enOffset
097 || tk.startOffset() == enOffset + 1) {
098 // the position of the new token must be +1
099 if(tk.getPosition() > position) {
100 found = true;
101 // set the current position to the position of
102 // the found token
103 position = tk.getPosition();
104 enOffset = tk.endOffset();
105 stOffset = tk.startOffset();
106 // as the annotation is found
107 // break the innerLoop
108 // and search for the next term
109 break innerLoop;
110 }
111 }
112 }
113
114 if(!found) {
115 // we could not find any annotation that means this
116 // pattern is not valid
117 return -1;
118 }
119
120 }
121 else {
122 // if consider is false
123 boolean found = false;
124 for(int j = 0; j < annotations.size(); j++) {
125 Token tk = annotations.get(j);
126 if(tk.getPosition() != position) continue;
127 if(tk.endOffset() != enOffset || tk.startOffset() != stOffset)
128 continue;
129 if(!isEqual(tk, term))
130 continue;
131 else {
132 found = true;
133 break;
134 }
135 }
136 if(!found) {
137 return -1;
138 }
139 }
140 }
141 }
142 return enOffset;
143 }
144
145 /**
146 * Checks whether two terms are equal.
147 *
148 * @param tk
149 * @param term
150 * @return
151 */
152 private boolean isEqual(Token tk, Term term) {
153 return (term.text().equals(tk.termText()) && term.type().equals(tk.type()));
154 }
155 }
|