package org.aksw.gerbil.transfer.nif;

import com.hp.hpl.jena.rdf.model.Literal;
import com.hp.hpl.jena.rdf.model.Model;
import com.hp.hpl.jena.rdf.model.NodeIterator;
import com.hp.hpl.jena.rdf.model.ResIterator;
import com.hp.hpl.jena.rdf.model.Resource;
import com.hp.hpl.jena.vocabulary.RDF;
import java.io.Reader;
import java.io.StringReader;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import org.aksw.gerbil.transfer.nif.data.Annotation;
import org.aksw.gerbil.transfer.nif.data.DocumentImpl;
import org.aksw.gerbil.transfer.nif.data.EndPosBasedComparator;
import org.aksw.gerbil.transfer.nif.data.NamedEntity;
import org.aksw.gerbil.transfer.nif.data.ScoredAnnotation;
import org.aksw.gerbil.transfer.nif.data.ScoredNamedEntity;
import org.aksw.gerbil.transfer.nif.data.SpanImpl;
import org.aksw.gerbil.transfer.nif.data.StartPosBasedComparator;
import org.aksw.gerbil.transfer.nif.vocabulary.ITSRDF;
import org.aksw.gerbil.transfer.nif.vocabulary.NIF;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:org/aksw/gerbil/transfer/nif/AbstractNIFDocumentParser.class */
public abstract class AbstractNIFDocumentParser implements NIFDocumentParser {
    private static final Logger LOGGER = LoggerFactory.getLogger(AbstractNIFDocumentParser.class);
    private String httpContentType;

    public AbstractNIFDocumentParser(String str) {
        this.httpContentType = str;
    }

    @Override // org.aksw.gerbil.transfer.nif.NIFDocumentParser
    public Document getDocumentFromNIFString(String str) throws Exception {
        return getDocumentFromNIFReader(new StringReader(str));
    }

    @Override // org.aksw.gerbil.transfer.nif.NIFDocumentParser
    public Document getDocumentFromNIFReader(Reader reader) throws Exception {
        return createAnnotatedDocument(parseNIFModelFromReader(reader));
    }

    protected abstract Model parseNIFModelFromReader(Reader reader) throws Exception;

    protected Document createAnnotatedDocument(Model model) throws Exception {
        Literal literal;
        ResIterator listResourcesWithProperty = model.listResourcesWithProperty(RDF.type, NIF.Context);
        ArrayList arrayList = new ArrayList();
        while (listResourcesWithProperty.hasNext()) {
            arrayList.add(listResourcesWithProperty.next());
        }
        if (arrayList.size() == 0) {
            LOGGER.error("Couldn't find the document resource inside the parsed NIF model.");
            throw new Exception("Couldn't find the document resource inside the parsed NIF model.");
        }
        if (arrayList.size() > 1) {
            LOGGER.warn("Got a NIF model with more than one resource of the type nif:Context. Only the first one will be used.");
        }
        Resource resource = (Resource) arrayList.get(0);
        NodeIterator listObjectsOfProperty = model.listObjectsOfProperty(resource, NIF.isString);
        Literal literal2 = null;
        while (true) {
            literal = literal2;
            if (!listObjectsOfProperty.hasNext()) {
                break;
            }
            if (literal != null) {
                LOGGER.warn("Got a document with more than one nif:isString properties. Using the last one.");
            }
            literal2 = listObjectsOfProperty.next().asLiteral();
        }
        if (literal == null) {
            LOGGER.error("Got a document node without a text.");
            throw new Exception("Got a document node without a text.");
        }
        DocumentImpl documentImpl = new DocumentImpl(literal.getString());
        String uri = resource.getURI();
        int lastIndexOf = uri.lastIndexOf(35);
        if (lastIndexOf > 0) {
            documentImpl.setDocumentURI(uri.substring(0, lastIndexOf));
        }
        String language = literal.getLanguage();
        if (language == null || language.length() > 0) {
        }
        List<Marking> markings = documentImpl.getMarkings();
        ResIterator listSubjectsWithProperty = model.listSubjectsWithProperty(NIF.referenceContext, resource);
        while (listSubjectsWithProperty.hasNext()) {
            Resource resource2 = (Resource) listSubjectsWithProperty.next();
            int i = -1;
            int i2 = -1;
            NodeIterator listObjectsOfProperty2 = model.listObjectsOfProperty(resource2, NIF.beginIndex);
            if (listObjectsOfProperty2.hasNext()) {
                i2 = listObjectsOfProperty2.next().asLiteral().getInt();
            }
            NodeIterator listObjectsOfProperty3 = model.listObjectsOfProperty(resource2, NIF.endIndex);
            if (listObjectsOfProperty3.hasNext()) {
                i = listObjectsOfProperty3.next().asLiteral().getInt();
            }
            if (i2 < 0 || i < 0) {
                LOGGER.warn("Found an annotation resource (\"" + resource2.getURI() + "\") without a start or end index. This annotation will be ignored.");
            } else {
                NodeIterator listObjectsOfProperty4 = model.listObjectsOfProperty(resource2, ITSRDF.taIdentRef);
                if (listObjectsOfProperty4.hasNext()) {
                    String rDFNode = listObjectsOfProperty4.next().toString();
                    NodeIterator listObjectsOfProperty5 = model.listObjectsOfProperty(resource2, ITSRDF.taConfidence);
                    if (listObjectsOfProperty5.hasNext()) {
                        markings.add(new ScoredNamedEntity(i2, i - i2, rDFNode, listObjectsOfProperty5.next().asLiteral().getDouble()));
                    } else {
                        markings.add(new NamedEntity(i2, i - i2, rDFNode));
                    }
                } else {
                    markings.add(new SpanImpl(i2, i - i2));
                }
            }
        }
        NodeIterator listObjectsOfProperty6 = model.listObjectsOfProperty(resource, NIF.topic);
        while (listObjectsOfProperty6.hasNext()) {
            Resource asResource = listObjectsOfProperty6.next().asResource();
            NodeIterator listObjectsOfProperty7 = model.listObjectsOfProperty(asResource, ITSRDF.taIdentRef);
            if (listObjectsOfProperty7.hasNext()) {
                String rDFNode2 = listObjectsOfProperty7.next().toString();
                NodeIterator listObjectsOfProperty8 = model.listObjectsOfProperty(asResource, ITSRDF.taConfidence);
                if (listObjectsOfProperty8.hasNext()) {
                    markings.add(new ScoredAnnotation(rDFNode2, listObjectsOfProperty8.next().asLiteral().getDouble()));
                } else {
                    markings.add(new Annotation(rDFNode2));
                }
            }
        }
        correctAnnotationPositions(documentImpl);
        return documentImpl;
    }

    protected void correctAnnotationPositions(Document document) {
        List markings = document.getMarkings(Span.class);
        Collections.sort(markings, new StartPosBasedComparator());
        ArrayList arrayList = new ArrayList(markings);
        Collections.sort(arrayList, new EndPosBasedComparator());
        int[] iArr = new int[markings.size()];
        int[] iArr2 = new int[markings.size()];
        for (int i = 0; i < markings.size(); i++) {
            iArr[i] = ((Span) markings.get(i)).getStartPosition();
            Span span = (Span) arrayList.get(i);
            iArr2[i] = span.getStartPosition() + span.getLength();
        }
        String text = document.getText();
        int i2 = 0;
        int i3 = 0;
        int i4 = 0;
        for (int i5 = 0; i5 < text.length(); i5++) {
            i2 += text.codePointCount(i5, i5 + 1);
            while (i3 < iArr.length && i2 > iArr[i3]) {
                ((Span) markings.get(i3)).setStartPosition(i5);
                i3++;
            }
            while (i4 < iArr2.length && i2 > iArr2[i4]) {
                Span span2 = (Span) arrayList.get(i4);
                span2.setLength(i5 - span2.getStartPosition());
                i4++;
            }
        }
    }

    @Override // org.aksw.gerbil.transfer.nif.NIFDocumentParser
    public String getHttpContentType() {
        return this.httpContentType;
    }
}
