package edu.northwestern.at.morphadorner.xgtagger;

import edu.northwestern.at.morphadorner.MorphAdornerLogger;
import edu.northwestern.at.morphadorner.MorphAdornerSettings;
import edu.northwestern.at.utils.MapFactory;
import edu.northwestern.at.utils.UnicodeReader;
import edu.northwestern.at.utils.corpuslinguistics.inputter.TextInputter;
import edu.northwestern.at.utils.corpuslinguistics.outputter.AdornedWordOutputter;
import edu.northwestern.at.utils.corpuslinguistics.outputter.ByteStreamAdornedWordOutputter;
import edu.northwestern.at.utils.corpuslinguistics.outputter.ListAdornedWordOutputter;
import java.io.BufferedReader;
import java.io.ByteArrayInputStream;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.StringReader;
import java.util.List;
import java.util.Map;
import java.util.Vector;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import org.w3c.dom.Comment;
import org.w3c.dom.DOMException;
import org.w3c.dom.Document;
import org.w3c.dom.DocumentType;
import org.w3c.dom.Element;
import org.w3c.dom.Entity;
import org.w3c.dom.EntityReference;
import org.w3c.dom.NamedNodeMap;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.w3c.dom.Text;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;

/* loaded from: input_file:edu/northwestern/at/morphadorner/xgtagger/XGParser.class */
public class XGParser {
    XGOptions options;
    NamedNodeMap nnmEntities;
    int intLongWord;
    AdornedWordOutputter adornerOutputter;
    String surroundMarker;
    String surroundMarkerTrim;
    int surroundMarkerLength;
    int wordNodesCreated;
    static final String FILE_SEPARATOR = System.getProperty("file.separator");
    Map<Integer, Integer> splitWords = MapFactory.createNewMap();
    Map<Integer, XGPair> hMap = MapFactory.createNewMap();
    Map<String, String> hmAttributes = MapFactory.createNewMap();
    String strLine = null;
    boolean boolDot = false;
    int intCpt = 0;
    int intCountNonBlanks = 0;
    int intCountTags = 0;
    String strWord = "";
    StringBuffer sbWord = new StringBuffer();
    int intStrWordIndex = 0;
    int intStrWordLength = 0;
    int intID = 0;
    UnicodeReader frCurrent = null;
    BufferedReader brCurrent = null;
    int nextAdornedWord = 0;
    List adornedWordDataList = null;

    public XGParser(XGOptions xGOptions, Document document) {
        this.wordNodesCreated = 0;
        this.options = xGOptions;
        this.surroundMarker = this.options.getSurroundMarker();
        this.surroundMarkerTrim = this.surroundMarker.trim();
        this.surroundMarkerLength = this.surroundMarkerTrim.length();
        this.wordNodesCreated = 0;
        if (document.getDoctype() != null) {
            this.nnmEntities = document.getDoctype().getEntities();
        }
    }

    public void setRunningWordID(int i) {
        this.intID = i;
    }

    public int getRunningWordID() {
        return this.intID;
    }

    public int getNumberOfAdornedWords() {
        return this.wordNodesCreated;
    }

    protected int read() throws IOException, FileNotFoundException {
        if (this.frCurrent != null) {
            return this.brCurrent.read();
        }
        this.frCurrent = new UnicodeReader(new ByteArrayInputStream(((ByteStreamAdornedWordOutputter) this.adornerOutputter).getBytes()), "utf-8");
        this.brCurrent = new BufferedReader(this.frCurrent);
        return read();
    }

    protected void getNextEntry() throws IOException, FileNotFoundException {
        this.hmAttributes.clear();
        this.strWord = "";
        if (this.nextAdornedWord < this.adornedWordDataList.size()) {
            List list = this.adornedWordDataList;
            int i = this.nextAdornedWord;
            this.nextAdornedWord = i + 1;
            List list2 = (List) list.get(i);
            for (int i2 = 0; i2 < list2.size(); i2++) {
                String str = (String) list2.get(i2);
                if (this.options.getWordField() == i2 + 1) {
                    this.strWord = str;
                }
                String xMLWordAttribute = MorphAdornerSettings.getXMLWordAttribute(i2);
                if (xMLWordAttribute.length() > 0) {
                    this.hmAttributes.put(xMLWordAttribute, str);
                }
            }
        }
        this.intID++;
        this.intStrWordIndex = 0;
        this.intStrWordLength = this.strWord.length();
    }

    public StringBuffer extractText(Node node) throws IOException {
        StringBuffer stringBuffer = new StringBuffer();
        new StringBuffer();
        boolean z = false;
        NodeList childNodes = node.getChildNodes();
        int length = childNodes.getLength();
        Vector vector = new Vector();
        for (int i = 0; i < length; i++) {
            Node item = childNodes.item(i);
            String nodeName = item.getNodeName();
            if (item instanceof EntityReference) {
                if (((Entity) this.nnmEntities.getNamedItem(nodeName)).getSystemId() != null && !this.options.getEntityIgnoreFiles()) {
                    if (!this.options.isOutputDirectory() && !this.options.getEntityMerging()) {
                        MorphAdornerLogger.logError("Error: XML input  contains external file entity references.\n  Specified output should be a directory, or options xml.entities_not_files or xml.entities_merge should be set.\n");
                        System.exit(-1);
                    }
                    stringBuffer.append(extractText(item));
                } else if (this.options.getEntityTreatAll()) {
                    stringBuffer.append(extractText(item));
                } else {
                    stringBuffer.append(" ");
                }
            } else if (item instanceof Text) {
                String replaceAll = item.getNodeValue().replaceAll("\\s", " ");
                int countNonBlankCharacters = countNonBlankCharacters(replaceAll);
                stringBuffer.append(replaceAll);
                if (countNonBlankCharacters > 0) {
                    this.boolDot = false;
                } else {
                    z = true;
                }
            } else if (this.options.isJumpTag(nodeName)) {
                vector.add(new Integer(i));
            } else {
                boolean isSoftTag = this.options.isSoftTag(nodeName);
                if (z && !isSoftTag) {
                    stringBuffer.append(this.surroundMarker);
                    this.intCountNonBlanks += this.surroundMarkerLength;
                }
                StringBuffer extractText = extractText(item);
                if (!extractText.equals("")) {
                    stringBuffer.append(extractText);
                    if (this.options.isSoftTag(nodeName)) {
                        z = true;
                        this.boolDot = false;
                    } else {
                        if (!this.boolDot) {
                            stringBuffer.append(this.surroundMarker);
                            this.intCountNonBlanks += this.surroundMarkerLength;
                        }
                        this.boolDot = true;
                        z = false;
                    }
                }
            }
        }
        if (!vector.isEmpty()) {
            for (int i2 = 0; i2 < vector.size(); i2++) {
                Node item2 = childNodes.item(((Integer) vector.get(i2)).intValue());
                this.intCountNonBlanks += this.surroundMarkerLength;
                stringBuffer.append(this.surroundMarker + ((Object) extractText(item2)));
            }
        }
        return stringBuffer;
    }

    protected int createNewNode(Document document, Node node, Node node2, String str, Integer num) {
        if (this.sbWord.length() == 0 || this.sbWord.indexOf(this.surroundMarkerTrim) >= 0) {
            this.sbWord.delete(0, this.sbWord.length());
            return 0;
        }
        String[] split = this.options.getSpecialSeparator() != null ? this.sbWord.toString().split(this.options.getSpecialSeparator()) : new String[]{this.sbWord.toString()};
        this.splitWords.put(Integer.valueOf(this.intID), Integer.valueOf(this.splitWords.containsKey(Integer.valueOf(this.intID)) ? this.splitWords.get(Integer.valueOf(this.intID)).intValue() + 1 : 1));
        for (int i = 0; i < split.length; i++) {
            Element createElement = document.createElement(this.options.getWordTagName());
            Text createTextNode = document.createTextNode(split[i]);
            if (this.options.getWriteIds()) {
                createElement.setAttribute(this.options.getIdArgumentName(), String.valueOf(this.intID));
            }
            if (this.options.getWritePath() % 2 == 1) {
                num = num == null ? 1 : Integer.valueOf(num.intValue() + 1);
                createElement.setAttribute(this.options.getWordPathArgumentName(), str + File.separator + this.options.getWordTagName() + "[" + num.toString() + "]");
            }
            if (i == 0 || this.options.repeatAttributes()) {
                for (Map.Entry<String, String> entry : this.hmAttributes.entrySet()) {
                    createElement.setAttribute(entry.getKey(), entry.getValue());
                }
            }
            createElement.appendChild(createTextNode);
            node.insertBefore(createElement, node2);
            this.sbWord.delete(0, this.sbWord.length());
        }
        this.wordNodesCreated++;
        return split.length;
    }

    protected static Node cloneNode(Node node) {
        Node cloneNode = node.cloneNode(false);
        NodeList childNodes = node.getChildNodes();
        int length = childNodes.getLength();
        for (int i = 0; i < length; i++) {
            try {
                cloneNode.appendChild(cloneNode(childNodes.item(i)));
            } catch (DOMException e) {
                cloneNode = node.cloneNode(true);
            }
        }
        return cloneNode;
    }

    protected Node cloneEntityReference(EntityReference entityReference, Document document) {
        Element createElement = document.createElement("entityReferenceRoot");
        NodeList childNodes = entityReference.getChildNodes();
        int length = childNodes.getLength();
        for (int i = 0; i < length; i++) {
            createElement.appendChild(cloneNode(childNodes.item(i)));
        }
        return createElement;
    }

    public Document modifyDOM(Node node, Document document, String str) throws DOMException, IOException {
        int createNewNode;
        int createNewNode2;
        NodeList childNodes = node.getChildNodes();
        String str2 = null;
        int length = childNodes.getLength();
        Map createNewMap = MapFactory.createNewMap();
        Vector vector = new Vector();
        int i = 0;
        while (i < length) {
            Node item = childNodes.item(i);
            String nodeName = item.getNodeName();
            if (item instanceof DocumentType) {
                Comment createComment = document.createComment("Document Type Description element (DOCTYPE \"" + item.getNodeName() + "\") has been removed. ");
                Comment createComment2 = document.createComment("To build a correct DTD for this document, change all #PCDATA into '" + this.options.getWordTagName() + "' element, containing #PCDATA.");
                node.insertBefore(createComment, item);
                node.insertBefore(createComment2, item);
                node.removeChild(item);
                i++;
                length++;
                MorphAdornerLogger.logError(" *** Element DOCTYPE (\"" + item.getNodeName() + "\") removed in the output (out of date) *** ");
            } else if (item instanceof EntityReference) {
                if (((Entity) this.nnmEntities.getNamedItem(nodeName)).getSystemId() == null) {
                    if (this.options.getEntityTreatAll()) {
                        Node cloneEntityReference = cloneEntityReference((EntityReference) item, document);
                        NodeList childNodes2 = cloneEntityReference.getChildNodes();
                        int length2 = childNodes2.getLength();
                        for (int i2 = 0; i2 < length2; i2++) {
                            if (i != length - 1) {
                                node.insertBefore(childNodes2.item(i2), cloneEntityReference.getNextSibling());
                            } else {
                                node.appendChild(childNodes2.item(i2));
                            }
                            length++;
                        }
                        node.removeChild(item);
                        length--;
                        i--;
                    }
                } else if (!this.options.getEntityIgnoreFiles()) {
                    if (this.options.isOutputDirectory() || this.options.getEntityMerging()) {
                        Node cloneEntityReference2 = cloneEntityReference((EntityReference) item, document);
                        document = modifyDOM(cloneEntityReference2, document, str);
                        if (this.options.getEntityMerging()) {
                            NodeList childNodes3 = cloneEntityReference2.getChildNodes();
                            int length3 = childNodes3.getLength();
                            node.insertBefore(document.createComment(" ++ " + item.getNodeName() + " ++ Here begins the content of  entity " + item.getNodeName() + " inserted here in place of a reference to this entity in  the original document."), item);
                            i++;
                            length++;
                            for (int i3 = 0; i3 < length3; i3++) {
                                node.insertBefore(childNodes3.item(i3), item);
                                i++;
                                length++;
                            }
                            node.insertBefore(document.createComment(" -- " + item.getNodeName() + " -- End of entity " + item.getNodeName()), item);
                            node.removeChild(item);
                        } else {
                            MorphAdornerLogger.logError("Internal error:  attempted to write secondary XML output file.");
                        }
                    } else {
                        MorphAdornerLogger.logError("Error: XML output file  contains some external file entity references.\n  Specified output should be a directory.");
                        System.exit(-1);
                    }
                }
            } else if (item instanceof Text) {
                item.getNodeValue().replaceAll("\\s", " ");
                this.intCountTags++;
                XGPair xGPair = this.hMap.get(new Integer(this.intCountTags));
                int i4 = xGPair.begin;
                int i5 = xGPair.end;
                while (this.intCpt < i4 && this.strWord.equals(this.surroundMarkerTrim)) {
                    getNextEntry();
                    this.intCpt++;
                }
                while (this.intCpt < i5) {
                    if (!this.strWord.equals("")) {
                        this.sbWord.append(this.strWord.charAt(this.intStrWordIndex));
                    }
                    if (this.intStrWordIndex >= this.intStrWordLength - 1) {
                        if (this.options.getWritePath() % 2 == 1) {
                            Integer num = (Integer) createNewMap.get(this.options.getWordTagName());
                            createNewNode2 = createNewNode(document, node, item, str, num);
                            if (num != null) {
                                createNewMap.put(this.options.getWordTagName(), Integer.valueOf(num.intValue() + createNewNode2));
                            } else {
                                createNewMap.put(this.options.getWordTagName(), new Integer(createNewNode2));
                            }
                        } else {
                            createNewNode2 = createNewNode(document, node, item, null, 0);
                        }
                        length += createNewNode2;
                        i += createNewNode2;
                        getNextEntry();
                    } else {
                        this.intStrWordIndex++;
                    }
                    if (this.options.getSpecialSeparator() != null && this.strWord.length() >= this.intStrWordIndex + this.options.getSpecialSeparator().length() && this.strWord.substring(this.intStrWordIndex, this.intStrWordIndex + this.options.getSpecialSeparator().length()).equals(this.options.getSpecialSeparator())) {
                        this.sbWord.append(this.options.getSpecialSeparator());
                        this.intStrWordIndex += this.options.getSpecialSeparator().length();
                    }
                    this.intCpt++;
                }
                if (this.sbWord.length() > 0) {
                    if (this.options.getWritePath() % 2 == 1) {
                        Integer num2 = (Integer) createNewMap.get(this.options.getWordTagName());
                        createNewNode = createNewNode(document, node, item, str, num2);
                        if (num2 != null) {
                            createNewMap.put(this.options.getWordTagName(), Integer.valueOf(num2.intValue() + createNewNode));
                        } else {
                            createNewMap.put(this.options.getWordTagName(), new Integer(createNewNode));
                        }
                    } else {
                        createNewNode = createNewNode(document, node, item, null, 0);
                    }
                    length += createNewNode;
                    i += createNewNode;
                }
                if (this.intCpt >= i5) {
                    node.removeChild(item);
                    length--;
                    i--;
                }
            } else if (this.options.isJumpTag(nodeName)) {
                vector.add(new Integer(i));
            } else {
                if (this.options.getWritePath() > 0) {
                    Integer num3 = (Integer) createNewMap.get(nodeName);
                    Integer valueOf = num3 == null ? 1 : Integer.valueOf(num3.intValue() + 1);
                    str2 = str + File.separator + nodeName + "[" + valueOf.toString() + "]";
                    if (this.options.getWritePath() >= 2) {
                        ((Element) item).setAttribute(this.options.getTagsPathArgumentName(), str2);
                    }
                    createNewMap.put(nodeName, valueOf);
                }
                document = modifyDOM(item, document, str2);
            }
            i++;
        }
        if (!vector.isEmpty()) {
            for (int i6 = 0; i6 < vector.size(); i6++) {
                Node item2 = childNodes.item(((Integer) vector.get(i6)).intValue());
                String nodeName2 = item2.getNodeName();
                if (this.options.getWritePath() >= 0) {
                    Integer num4 = (Integer) createNewMap.get(nodeName2);
                    Integer valueOf2 = num4 == null ? 1 : Integer.valueOf(num4.intValue() + 1);
                    str2 = str + File.separator + nodeName2 + "[" + valueOf2.toString() + "]";
                    if (this.options.getWritePath() >= 2) {
                        ((Element) item2).setAttribute(this.options.getTagsPathArgumentName(), str2);
                    }
                    createNewMap.put(nodeName2, valueOf2);
                }
                this.intCountNonBlanks++;
                document = modifyDOM(item2, document, str2);
            }
        }
        return document;
    }

    protected int countNonBlankCharacters(String str) throws IOException {
        this.intCountTags++;
        int length = str.length();
        int i = 0;
        for (int i2 = 0; i2 < length; i2++) {
            if (str.charAt(i2) != ' ') {
                i++;
            }
        }
        int i3 = this.intCountNonBlanks;
        if (i > 0) {
            i3++;
        }
        this.intCountNonBlanks += i;
        this.hMap.put(new Integer(this.intCountTags), new XGPair(i3, this.intCountNonBlanks));
        return i;
    }

    public static Object[] extractText(XGOptions xGOptions, Document document) throws IOException {
        XGParser xGParser = new XGParser(xGOptions, document);
        Object[] objArr = {r0.toString(), xGParser};
        StringBuffer extractText = xGParser.extractText(document);
        extractText.toString();
        return objArr;
    }

    public static Map<Integer, Integer> mergeAdornments(XGOptions xGOptions, XGParser xGParser, Document document, String str, AdornedWordOutputter adornedWordOutputter, TextInputter textInputter) throws IOException {
        xGParser.adornerOutputter = adornedWordOutputter;
        xGParser.intCountTags = 0;
        xGParser.nextAdornedWord = 0;
        xGParser.adornedWordDataList = ((ListAdornedWordOutputter) adornedWordOutputter).getAdornedWordDataList();
        xGParser.getNextEntry();
        Document modifyDOM = xGParser.modifyDOM(document, document, "");
        File createTempFile = File.createTempFile("mad", null);
        createTempFile.deleteOnExit();
        if (XGMisc.printNodeToFile(modifyDOM, createTempFile.getAbsolutePath()) == 1) {
            textInputter.setSegmentText(str, createTempFile);
        }
        return xGParser.splitWords;
    }

    public static Document textToDOM(XGOptions xGOptions, String str) throws IOException {
        Document document = null;
        try {
            DocumentBuilderFactory newInstance = DocumentBuilderFactory.newInstance();
            newInstance.setExpandEntityReferences(false);
            document = newInstance.newDocumentBuilder().parse(new InputSource(new StringReader(str)));
        } catch (ParserConfigurationException e) {
            System.out.println(e.getMessage());
        } catch (SAXException e2) {
            System.out.println(e2.getMessage());
        }
        return document;
    }
}
