001 /***************************************************************************/
002 /* Copyright (C) 2010-2011, Sebastian Hellmann */
003 /* Note: If you need parts of NLP2RDF in another licence due to licence */
004 /* incompatibility, please mail hellmann@informatik.uni-leipzig.de */
005 /* */
006 /* This file is part of NLP2RDF. */
007 /* */
008 /* NLP2RDF is free software; you can redistribute it and/or modify */
009 /* it under the terms of the GNU General Public License as published by */
010 /* the Free Software Foundation; either version 3 of the License, or */
011 /* (at your option) any later version. */
012 /* */
013 /* NLP2RDF is distributed in the hope that it will be useful, */
014 /* but WITHOUT ANY WARRANTY; without even the implied warranty of */
015 /* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the */
016 /* GNU General Public License for more details. */
017 /* */
018 /* You should have received a copy of the GNU General Public License */
019 /* along with this program. If not, see <http://www.gnu.org/licenses/>. */
020 /***************************************************************************/
021
022 package org.nlp2rdf.core.util;
023
024 import com.hp.hpl.jena.ontology.OntModel;
025 import eu.lod2.nlp2rdf.schema.str.ContextHashBasedString;
026 import eu.lod2.nlp2rdf.schema.str.OffsetBasedString;
027 import org.nlp2rdf.core.Span;
028 import org.nlp2rdf.core.URIGenerator;
029 import org.nlp2rdf.core.impl.MD5Based;
030 import org.nlp2rdf.core.impl.OffsetBased;
031 import org.slf4j.Logger;
032 import org.slf4j.LoggerFactory;
033
034 import java.io.UnsupportedEncodingException;
035 import java.net.URLEncoder;
036 import java.security.InvalidParameterException;
037 import java.util.*;
038
039 /**
040 * @author Sebastian Hellmann
041 * Created: 29.06.11
042 * <p/>
043 * This class contains static helper methods for handling and validating a model
044 */
045 public class URIGeneratorHelper {
046 private static Logger log = LoggerFactory.getLogger(URIGeneratorHelper.class);
047
048
049 @Deprecated
050 private static URIGenerator initURIGenerator(String text, TreeMap<Span, List<Span>> tokenizedText, String uriRecipe) {
051 /* URIGenerator uriGenerator = URIGeneratorHelper.determineGenerator(uriRecipe);
052 //calculate the minimal contextlength
053 if (uriGenerator instanceof MD5Based) {
054 Set<Span> allSpans = new HashSet<Span>();
055 //document span
056 allSpans.add(new Span(0, text.length()));
057 allSpans.addAll(tokenizedText.keySet());
058 for (Span key : tokenizedText.keySet()) {
059 allSpans.addAll(tokenizedText.get(key));
060 }
061 //uriGenerator.init(text, allSpans);
062 }
063 return uriGenerator;
064 */
065 return null;
066 }
067
068
069 /**
070 * Based on the recipe string as sepicfied the method returns an urigenerator object
071 * default for contextlength should be 10 if client did not provide anything else
072 * @param recipe
073 * @param contextLength default is 10 and it is legal to put it here as a "Magic Number"
074 * @return
075 */
076 public static URIGenerator determineGenerator(String recipe, int contextLength) {
077
078 if (recipe.equalsIgnoreCase("context-hash")) {
079 return new MD5Based(contextLength);
080 } else if (recipe.equalsIgnoreCase("offset")) {
081 return new OffsetBased();
082 }
083 throw new InvalidParameterException(recipe + " <- recipe not known ");
084
085 }
086
087
088 /**
089 * @param span the span of the addressed String
090 * @param text
091 * @param contextLength
092 * @return
093 */
094 public static String getContextBefore(Span span, String text, int contextLength) {
095 int before = (span.getStart() - contextLength < 0) ? 0 : span.getStart() - contextLength;
096 return text.substring(before, span.getStart());
097 }
098
099 /**
100 * @param span the span of the addressed String
101 * @param text
102 * @param contextLength
103 * @return
104 */
105 public static String getContextAfter(Span span, String text, int contextLength) {
106 int after = (span.getEnd() + contextLength > text.length()) ? text.length() : span.getEnd() + contextLength;
107 return text.substring(span.getEnd(), after);
108 }
109
110
111 /**
112 * return the first characters of the anchored part urlencoded
113 *
114 * @param anchoredPart
115 * @param firstCharLength
116 * @return
117 */
118 public static String getFirstCharacters(String anchoredPart, int firstCharLength) {
119 String firstChars = (anchoredPart.length() > firstCharLength) ? anchoredPart.substring(0, firstCharLength) : anchoredPart;
120 try {
121 firstChars = URLEncoder.encode(firstChars, "UTF-8").replaceAll("\\+", "%20");
122 } catch (UnsupportedEncodingException e) {
123 log.error("", e);
124 }
125 return firstChars;
126 }
127
128 }