001    /***************************************************************************/
002    /*  Copyright (C) 2010-2011, Sebastian Hellmann                            */
003    /*  Note: If you need parts of NLP2RDF in another licence due to licence   */
004    /*  incompatibility, please mail hellmann@informatik.uni-leipzig.de        */
005    /*                                                                         */
006    /*  This file is part of NLP2RDF.                                          */
007    /*                                                                         */
008    /*  NLP2RDF is free software; you can redistribute it and/or modify        */
009    /*  it under the terms of the GNU General Public License as published by   */
010    /*  the Free Software Foundation; either version 3 of the License, or      */
011    /*  (at your option) any later version.                                    */
012    /*                                                                         */
013    /*  NLP2RDF is distributed in the hope that it will be useful,             */
014    /*  but WITHOUT ANY WARRANTY; without even the implied warranty of         */
015    /*  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the           */
016    /*  GNU General Public License for more details.                           */
017    /*                                                                         */
018    /*  You should have received a copy of the GNU General Public License      */
019    /*  along with this program. If not, see <http://www.gnu.org/licenses/>.   */
020    /***************************************************************************/
021    
022    package org.nlp2rdf.core.util;
023    
024    import com.hp.hpl.jena.ontology.OntModel;
025    import eu.lod2.nlp2rdf.schema.str.ContextHashBasedString;
026    import eu.lod2.nlp2rdf.schema.str.OffsetBasedString;
027    import org.nlp2rdf.core.Span;
028    import org.nlp2rdf.core.URIGenerator;
029    import org.nlp2rdf.core.impl.MD5Based;
030    import org.nlp2rdf.core.impl.OffsetBased;
031    import org.slf4j.Logger;
032    import org.slf4j.LoggerFactory;
033    
034    import java.io.UnsupportedEncodingException;
035    import java.net.URLEncoder;
036    import java.security.InvalidParameterException;
037    import java.util.*;
038    
039    /**
040     * @author Sebastian Hellmann
041     *         Created: 29.06.11
042     *         <p/>
043     *         This class contains static helper methods for handling and validating a model
044     */
045    public class URIGeneratorHelper {
046        private static Logger log = LoggerFactory.getLogger(URIGeneratorHelper.class);
047    
048    
049        @Deprecated
050        private static URIGenerator initURIGenerator(String text, TreeMap<Span, List<Span>> tokenizedText, String uriRecipe) {
051            /* URIGenerator uriGenerator = URIGeneratorHelper.determineGenerator(uriRecipe);
052            //calculate the minimal contextlength
053            if (uriGenerator instanceof MD5Based) {
054                Set<Span> allSpans = new HashSet<Span>();
055                //document span
056                allSpans.add(new Span(0, text.length()));
057                allSpans.addAll(tokenizedText.keySet());
058                for (Span key : tokenizedText.keySet()) {
059                    allSpans.addAll(tokenizedText.get(key));
060                }
061                //uriGenerator.init(text, allSpans);
062            }
063            return uriGenerator;
064            */
065            return null;
066        }
067    
068    
069        /**
070         *  Based on the recipe string as sepicfied the method returns an urigenerator object
071         *  default for contextlength should be 10 if client did not provide anything else
072         * @param recipe
073         * @param contextLength  default is 10 and it is legal to put it here as a "Magic Number"
074         * @return
075         */
076        public static URIGenerator determineGenerator(String recipe, int contextLength) {
077    
078            if (recipe.equalsIgnoreCase("context-hash")) {
079                return new MD5Based(contextLength);
080            } else if (recipe.equalsIgnoreCase("offset")) {
081                return new OffsetBased();
082            }
083            throw new InvalidParameterException(recipe + " <- recipe not known ");
084    
085        }
086    
087    
088        /**
089         * @param span          the span of the addressed String
090         * @param text
091         * @param contextLength
092         * @return
093         */
094        public static String getContextBefore(Span span, String text, int contextLength) {
095            int before = (span.getStart() - contextLength < 0) ? 0 : span.getStart() - contextLength;
096            return text.substring(before, span.getStart());
097        }
098    
099        /**
100         * @param span          the span of the addressed String
101         * @param text
102         * @param contextLength
103         * @return
104         */
105        public static String getContextAfter(Span span, String text, int contextLength) {
106            int after = (span.getEnd() + contextLength > text.length()) ? text.length() : span.getEnd() + contextLength;
107            return text.substring(span.getEnd(), after);
108        }
109    
110    
111        /**
112         * return the first characters of the anchored part urlencoded
113         *
114         * @param anchoredPart
115         * @param firstCharLength
116         * @return
117         */
118        public static String getFirstCharacters(String anchoredPart, int firstCharLength) {
119            String firstChars = (anchoredPart.length() > firstCharLength) ? anchoredPart.substring(0, firstCharLength) : anchoredPart;
120            try {
121                firstChars = URLEncoder.encode(firstChars, "UTF-8").replaceAll("\\+", "%20");
122            } catch (UnsupportedEncodingException e) {
123                log.error("", e);
124            }
125            return firstChars;
126        }
127    
128    }