001 /***************************************************************************/
002 /* Copyright (C) 2010-2011, Sebastian Hellmann */
003 /* Note: If you need parts of NLP2RDF in another licence due to licence */
004 /* incompatibility, please mail hellmann@informatik.uni-leipzig.de */
005 /* */
006 /* This file is part of NLP2RDF. */
007 /* */
008 /* NLP2RDF is free software; you can redistribute it and/or modify */
009 /* it under the terms of the GNU General Public License as published by */
010 /* the Free Software Foundation; either version 3 of the License, or */
011 /* (at your option) any later version. */
012 /* */
013 /* NLP2RDF is distributed in the hope that it will be useful, */
014 /* but WITHOUT ANY WARRANTY; without even the implied warranty of */
015 /* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the */
016 /* GNU General Public License for more details. */
017 /* */
018 /* You should have received a copy of the GNU General Public License */
019 /* along with this program. If not, see <http://www.gnu.org/licenses/>. */
020 /***************************************************************************/
021
022 package org.nlp2rdf.core.util;
023
024 import com.hp.hpl.jena.ontology.OntModel;
025 import eu.lod2.nlp2rdf.schema.str.ContextHashBasedString;
026 import eu.lod2.nlp2rdf.schema.str.IString;
027 import eu.lod2.nlp2rdf.schema.str.OffsetBasedString;
028 import opennlp.tools.util.Span;
029 import org.nlp2rdf.core.URIGenerator;
030 import org.nlp2rdf.core.impl.MD5Based;
031 import org.nlp2rdf.core.impl.OffsetBased;
032 import org.slf4j.Logger;
033 import org.slf4j.LoggerFactory;
034
035 import java.io.UnsupportedEncodingException;
036 import java.net.URLEncoder;
037 import java.security.InvalidParameterException;
038 import java.util.ArrayList;
039 import java.util.List;
040
041 /**
042 * @author Sebastian Hellmann - http://bis.informatik.uni-leipzig.de/SebastianHellmann
043 * Created: 29.06.11
044 * <p/>
045 * This class contains static helper methods for handling and validating a model
046 */
047 public class URIGeneratorHelper {
048 private static Logger log = LoggerFactory.getLogger(URIGeneratorHelper.class);
049
050
051 public static List<Span> getSpans(List<IString> strings, String prefix, String text, URIGenerator uriGenerator) {
052 List<Span> spans = new ArrayList<Span>();
053 for (IString s : strings) {
054 spans.add(uriGenerator.getSpanFor(s.getURI(), prefix, text, s.getAnchorOf()));
055 }
056 return spans;
057 }
058
059 public static URIGenerator determineGenerator(String recipe) {
060 if (recipe.equalsIgnoreCase("ContextHashBasedString") || recipe.equalsIgnoreCase("context-hash") || recipe.equalsIgnoreCase("http://nlp2rdf.lod2.eu/schema/string/ContextHashBasedString")) {
061 return new MD5Based();
062 } else if (recipe.equalsIgnoreCase("OffsetBasedString") || recipe.equalsIgnoreCase("offset") || recipe.equalsIgnoreCase("http://nlp2rdf.lod2.eu/schema/string/OffsetBasedString")) {
063 return new OffsetBased();
064 }
065 throw new InvalidParameterException(recipe + " <- recipe not known ");
066
067 }
068
069 /**
070 * determines the respective generator from a model
071 * currently it is only recognized if the respective class is assigned once.
072 * No heuristic is used to determine the URI Recipe, otherwise.
073 *
074 * @param model
075 * @return
076 */
077 public static URIGenerator determineGenerator(OntModel model) {
078 if (!(ContextHashBasedString.list(model).isEmpty())) {
079 return new MD5Based();
080 } else if (!(OffsetBasedString.list(model).isEmpty())) {
081 return new OffsetBased();
082 } else {
083 return new OffsetBased();
084 }
085
086 }
087
088
089 public static String getContextBefore(Span span, String text, int contextLength) {
090 int before = (span.getStart() - contextLength < 0) ? 0 : span.getStart() - contextLength;
091 return text.substring(before, span.getStart());
092 }
093
094 public static String getContextAfter(Span span, String text, int contextLength) {
095 int after = (span.getEnd() + contextLength > text.length()) ? text.length() : span.getEnd() + contextLength;
096 return text.substring(span.getEnd(), after);
097 }
098
099
100 /**
101 * return the first characters of the anchored part urlencoded
102 *
103 * @param anchoredPart
104 * @param firstCharLength
105 * @return
106 */
107 public static String getFirstCharacters(String anchoredPart, int firstCharLength) {
108 String firstChars = "";
109 try {
110 firstChars = URLEncoder.encode((anchoredPart.length() > firstCharLength) ? anchoredPart.substring(0, firstCharLength) : anchoredPart, "UTF-8");
111 } catch (UnsupportedEncodingException e) {
112 log.error("", e);
113 }
114 return firstChars;
115 }
116
117 }