package org.aksw.gerbil.annotator.impl.aida;

import java.io.IOException;
import java.io.InputStream;
import java.io.Reader;
import java.io.StringReader;
import java.io.UnsupportedEncodingException;
import java.net.URLEncoder;
import java.nio.charset.Charset;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import org.aksw.gerbil.annotator.A2KBAnnotator;
import org.aksw.gerbil.annotator.http.AbstractHttpBasedAnnotator;
import org.aksw.gerbil.datatypes.ErrorTypes;
import org.aksw.gerbil.exceptions.GerbilException;
import org.aksw.gerbil.transfer.nif.Document;
import org.aksw.gerbil.transfer.nif.Meaning;
import org.aksw.gerbil.transfer.nif.MeaningSpan;
import org.aksw.gerbil.transfer.nif.Span;
import org.aksw.gerbil.transfer.nif.data.DocumentImpl;
import org.aksw.gerbil.transfer.nif.data.NamedEntity;
import org.aksw.gerbil.transfer.nif.data.ScoredNamedEntity;
import org.aksw.gerbil.transfer.nif.data.StartPosBasedComparator;
import org.aksw.gerbil.transfer.nif.vocabulary.ITSRDF;
import org.apache.commons.io.IOUtils;
import org.apache.commons.lang3.StringEscapeUtils;
import org.apache.http.Consts;
import org.apache.http.HttpEntity;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpPost;
import org.apache.http.entity.ContentType;
import org.apache.http.entity.StringEntity;
import org.apache.http.util.EntityUtils;
import org.json.JSONArray;
import org.json.JSONObject;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.util.StreamUtils;

/* loaded from: input_file:org/aksw/gerbil/annotator/impl/aida/AidaAnnotator.class */
public class AidaAnnotator extends AbstractHttpBasedAnnotator implements A2KBAnnotator {
    private static final Logger LOGGER = LoggerFactory.getLogger(AidaAnnotator.class);
    private static final Charset CHARSET = Charset.forName("utf-8");
    private static final ContentType REQUEST_CONTENT_TYPE = ContentType.create("application/x-www-form-urlencoded", Consts.UTF_8);
    private static final String FRED_DENOTES_URI = "http://ontologydesignpatterns.org/cp/owl/semiotics.owl#denotes";
    private String serviceUrl;
    private Comparator<Span> spanComparator = new StartPosBasedComparator();

    public AidaAnnotator(String str) {
        this.serviceUrl = str;
    }

    protected Document requestAnnotations(String str, String str2, boolean z) throws GerbilException {
        LOGGER.info("Started request for {}", str);
        try {
            HttpPost createPostRequest = createPostRequest(this.serviceUrl);
            StringBuilder sb = new StringBuilder();
            sb.append("text=");
            try {
                sb.append(URLEncoder.encode(str2, Consts.UTF_8.name()));
                sb.append('\n');
                if (z) {
                    sb.append("tag_mode=manual");
                }
                createPostRequest.setEntity(new StringEntity(sb.toString(), REQUEST_CONTENT_TYPE));
                createPostRequest.addHeader("Accept-Encoding", Consts.UTF_8.name());
                HttpEntity httpEntity = null;
                CloseableHttpResponse closeableHttpResponse = null;
                try {
                    closeableHttpResponse = sendRequest(createPostRequest);
                    httpEntity = closeableHttpResponse.getEntity();
                    try {
                        JSONObject jSONObject = new JSONObject(IOUtils.toString(httpEntity.getContent(), Consts.UTF_8.name()));
                        DocumentImpl documentImpl = new DocumentImpl(str2, str);
                        parseMarkings(jSONObject, documentImpl);
                        closeRequest(createPostRequest);
                        if (httpEntity != null) {
                            try {
                                EntityUtils.consume(httpEntity);
                            } catch (IOException e) {
                            }
                        }
                        IOUtils.closeQuietly(closeableHttpResponse);
                        LOGGER.info("Finished request for {}", documentImpl.getDocumentURI());
                        return documentImpl;
                    } catch (Exception e2) {
                        LOGGER.error("Couldn't parse the response.", e2);
                        throw new GerbilException("Couldn't parse the response.", e2, ErrorTypes.UNEXPECTED_EXCEPTION);
                    }
                } catch (Throwable th) {
                    closeRequest(createPostRequest);
                    if (httpEntity != null) {
                        try {
                            EntityUtils.consume(httpEntity);
                        } catch (IOException e3) {
                        }
                    }
                    IOUtils.closeQuietly(closeableHttpResponse);
                    throw th;
                }
            } catch (UnsupportedEncodingException e4) {
                throw new GerbilException("Couldn't encode text.", e4, ErrorTypes.UNEXPECTED_EXCEPTION);
            }
        } catch (Exception e5) {
            throw new GerbilException("Couldn't create HTTP request.", e5, ErrorTypes.UNEXPECTED_EXCEPTION);
        }
    }

    protected void parseMarkings(JSONObject jSONObject, Document document) throws GerbilException {
        JSONArray jSONArray;
        if (jSONObject != null) {
            try {
                if (jSONObject.has("mentions") && (jSONArray = jSONObject.getJSONArray("mentions")) != null) {
                    for (int i = 0; i < jSONArray.length(); i++) {
                        JSONObject jSONObject2 = jSONArray.getJSONObject(i);
                        if (jSONObject2 != null && jSONObject2.has("bestEntity") && jSONObject2.has("offset") && jSONObject2.has("length")) {
                            int i2 = jSONObject2.getInt("offset");
                            int i3 = jSONObject2.getInt("length");
                            JSONObject jSONObject3 = jSONObject2.getJSONObject("bestEntity");
                            if (jSONObject3 == null || !jSONObject3.has("kbIdentifier")) {
                                LOGGER.warn("Got an incomplete mention from AIDA: {}. It will be ignored", jSONObject3.toString());
                            } else {
                                Set<String> generateUriSet = generateUriSet(jSONObject3.getString("kbIdentifier"));
                                double d = jSONObject3.has("disambiguationScore") ? jSONObject3.getDouble("disambiguationScore") : -1.0d;
                                if (generateUriSet != null) {
                                    if (d > -1.0d) {
                                        document.addMarking(new ScoredNamedEntity(i2, i3, generateUriSet, d));
                                    } else {
                                        document.addMarking(new NamedEntity(i2, i3, generateUriSet));
                                    }
                                }
                            }
                        }
                    }
                }
            } catch (Exception e) {
                throw new GerbilException("Got an Exception while parsing the response of AIDA.", e, ErrorTypes.UNEXPECTED_EXCEPTION);
            }
        }
    }

    protected Set<String> generateUriSet(String str) {
        if (str == null) {
            return null;
        }
        HashSet hashSet = new HashSet();
        if (str.startsWith("YAGO:")) {
            hashSet.add("http://en.wikipedia.org/wiki/" + StringEscapeUtils.unescapeJava(str.substring(5, str.length())).replace(' ', '_'));
        } else {
            hashSet.add(str);
        }
        return hashSet;
    }

    protected Reader replaceDenotesUri(InputStream inputStream) throws IOException {
        return new StringReader(StreamUtils.copyToString(inputStream, CHARSET).replaceAll(FRED_DENOTES_URI, ITSRDF.taIdentRef.getURI()));
    }

    @Override // org.aksw.gerbil.annotator.C2KBAnnotator
    public List<Meaning> performC2KB(Document document) throws GerbilException {
        return requestAnnotations(document.getDocumentURI(), document.getText(), false).getMarkings(Meaning.class);
    }

    @Override // org.aksw.gerbil.annotator.A2KBAnnotator
    public List<MeaningSpan> performA2KBTask(Document document) throws GerbilException {
        return requestAnnotations(document.getDocumentURI(), document.getText(), false).getMarkings(MeaningSpan.class);
    }

    @Override // org.aksw.gerbil.annotator.D2KBAnnotator
    public List<MeaningSpan> performD2KBTask(Document document) throws GerbilException {
        return requestAnnotations(document.getDocumentURI(), createTextWithSpans(document), true).getMarkings(MeaningSpan.class);
    }

    @Override // org.aksw.gerbil.annotator.EntityRecognizer
    public List<Span> performRecognition(Document document) throws GerbilException {
        return requestAnnotations(document.getDocumentURI(), document.getText(), false).getMarkings(Span.class);
    }

    protected String createTextWithSpans(Document document) {
        String text = document.getText();
        List<Span> markings = document.getMarkings(Span.class);
        Collections.sort(markings, this.spanComparator);
        int i = 0;
        StringBuilder sb = new StringBuilder();
        for (Span span : markings) {
            sb.append(text.substring(i, span.getStartPosition()));
            sb.append("[[");
            i = span.getStartPosition() + span.getLength();
            sb.append(text.substring(span.getStartPosition(), i));
            sb.append("]]");
        }
        if (i < text.length()) {
            sb.append(text.substring(i, text.length()));
        }
        return sb.toString();
    }
}
