001    /***************************************************************************/
002    /*  Copyright (C) 2010-2011, Sebastian Hellmann                            */
003    /*  Note: If you need parts of NLP2RDF in another licence due to licence   */
004    /*  incompatibility, please mail hellmann@informatik.uni-leipzig.de        */
005    /*                                                                         */
006    /*  This file is part of NLP2RDF.                                          */
007    /*                                                                         */
008    /*  NLP2RDF is free software; you can redistribute it and/or modify        */
009    /*  it under the terms of the GNU General Public License as published by   */
010    /*  the Free Software Foundation; either version 3 of the License, or      */
011    /*  (at your option) any later version.                                    */
012    /*                                                                         */
013    /*  NLP2RDF is distributed in the hope that it will be useful,             */
014    /*  but WITHOUT ANY WARRANTY; without even the implied warranty of         */
015    /*  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the           */
016    /*  GNU General Public License for more details.                           */
017    /*                                                                         */
018    /*  You should have received a copy of the GNU General Public License      */
019    /*  along with this program. If not, see <http://www.gnu.org/licenses/>.   */
020    /***************************************************************************/
021    
022    package org.nlp2rdf.core.util;
023    
024    import com.hp.hpl.jena.ontology.OntModel;
025    import eu.lod2.nlp2rdf.schema.str.ContextHashBasedString;
026    import eu.lod2.nlp2rdf.schema.str.IString;
027    import eu.lod2.nlp2rdf.schema.str.OffsetBasedString;
028    import opennlp.tools.util.Span;
029    import org.nlp2rdf.core.URIGenerator;
030    import org.nlp2rdf.core.impl.MD5Based;
031    import org.nlp2rdf.core.impl.OffsetBased;
032    import org.slf4j.Logger;
033    import org.slf4j.LoggerFactory;
034    
035    import java.io.UnsupportedEncodingException;
036    import java.net.URLEncoder;
037    import java.security.InvalidParameterException;
038    import java.util.ArrayList;
039    import java.util.List;
040    
041    /**
042     * @author Sebastian Hellmann - http://bis.informatik.uni-leipzig.de/SebastianHellmann
043     *         Created: 29.06.11
044     *         <p/>
045     *         This class contains static helper methods for handling and validating a model
046     */
047    public class URIGeneratorHelper {
048        private static Logger log = LoggerFactory.getLogger(URIGeneratorHelper.class);
049    
050    
051        public static List<Span> getSpans(List<IString> strings, String prefix, String text, URIGenerator uriGenerator) {
052            List<Span> spans = new ArrayList<Span>();
053            for (IString s : strings) {
054                spans.add(uriGenerator.getSpanFor(s.getURI(), prefix, text, s.getAnchorOf()));
055            }
056            return spans;
057        }
058    
059        public static URIGenerator determineGenerator(String recipe) {
060            if (recipe.equalsIgnoreCase("ContextHashBasedString") || recipe.equalsIgnoreCase("context-hash") || recipe.equalsIgnoreCase("http://nlp2rdf.lod2.eu/schema/string/ContextHashBasedString")) {
061                return new MD5Based();
062            } else if (recipe.equalsIgnoreCase("OffsetBasedString") || recipe.equalsIgnoreCase("offset") || recipe.equalsIgnoreCase("http://nlp2rdf.lod2.eu/schema/string/OffsetBasedString")) {
063                return new OffsetBased();
064            }
065            throw new InvalidParameterException(recipe + " <- recipe not known ");
066    
067        }
068    
069        /**
070         * determines the respective generator from a model
071         * currently it is only recognized if the respective class is assigned once.
072         * No heuristic is used to determine the URI Recipe, otherwise.
073         *
074         * @param model
075         * @return
076         */
077        public static URIGenerator determineGenerator(OntModel model) {
078            if (!(ContextHashBasedString.list(model).isEmpty())) {
079                return new MD5Based();
080            } else if (!(OffsetBasedString.list(model).isEmpty())) {
081                return new OffsetBased();
082            } else {
083                return new OffsetBased();
084            }
085    
086        }
087    
088    
089        public static String getContextBefore(Span span, String text, int contextLength) {
090            int before = (span.getStart() - contextLength < 0) ? 0 : span.getStart() - contextLength;
091            return text.substring(before, span.getStart());
092        }
093    
094        public static String getContextAfter(Span span, String text, int contextLength) {
095            int after = (span.getEnd() + contextLength > text.length()) ? text.length() : span.getEnd() + contextLength;
096            return text.substring(span.getEnd(), after);
097        }
098    
099    
100        /**
101         * return the first characters of the anchored part urlencoded
102         *
103         * @param anchoredPart
104         * @param firstCharLength
105         * @return
106         */
107        public static String getFirstCharacters(String anchoredPart, int firstCharLength) {
108            String firstChars = "";
109            try {
110                firstChars = URLEncoder.encode((anchoredPart.length() > firstCharLength) ? anchoredPart.substring(0, firstCharLength) : anchoredPart, "UTF-8");
111            } catch (UnsupportedEncodingException e) {
112                log.error("", e);
113            }
114            return firstChars;
115        }
116    
117    }