package edu.northwestern.at.morphadorner.tools.fixquotes;

import edu.northwestern.at.utils.FileNameUtils;
import edu.northwestern.at.utils.FileUtils;
import edu.northwestern.at.utils.Formatters;
import edu.northwestern.at.utils.SetUtils;
import edu.northwestern.at.utils.StringUtils;
import edu.northwestern.at.utils.TaggedStrings;
import edu.northwestern.at.utils.xml.DOMUtils;
import java.io.File;
import java.io.IOException;
import java.util.List;
import java.util.Set;
import java.util.regex.Matcher;
import org.w3c.dom.Document;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.w3c.dom.Text;

/* loaded from: input_file:edu/northwestern/at/morphadorner/tools/fixquotes/FixXMLQuotes.class */
public class FixXMLQuotes {
    protected static Document document;
    protected static final int INITPARAMS = 3;
    protected static String outputDirectory;
    protected static TaggedStrings contractions;
    protected static Matcher contractionsMatcher;
    protected static final String lsquo = "&lsquo;";
    protected static final String ldquo = "&ldquo;";
    protected static final String rsquo = "&rsquo;";
    protected static final String rdquo = "&rdquo;";
    protected static final String apos = "&apos;";
    protected static final String sq = "\ue060";
    protected static final String dq = "\ue061";
    protected static final String ap = "\ue062";
    protected static Set<String> softTags;
    protected static Set<String> jumpTags;
    protected static int docsToProcess = 0;
    protected static int currentDocNumber = 0;
    protected static String prevChar = " ";
    protected static boolean debug = false;

    public static void main(String[] strArr) {
        if (!initialize(strArr)) {
            System.exit(1);
        }
        terminate(processFiles(strArr), ((System.currentTimeMillis() - System.currentTimeMillis()) + 999) / 1000);
    }

    protected static boolean initialize(String[] strArr) {
        if (strArr.length < 2) {
            System.out.println("Not enough parameters.");
            return false;
        }
        try {
            softTags = SetUtils.loadSet(strArr[0], "utf-8");
            try {
                jumpTags = SetUtils.loadSet(strArr[1], "utf-8");
                outputDirectory = strArr[2];
                contractions = FixQuotes.loadContractions("resources/contractions.txt");
                contractionsMatcher = FixQuotes.buildContractionsPattern(contractions).matcher("");
                return true;
            } catch (IOException e) {
                return false;
            }
        } catch (IOException e2) {
            return false;
        }
    }

    protected static void processOneFile(String str) {
        currentDocNumber++;
        System.out.println("Processing " + str + " (" + currentDocNumber + "/" + docsToProcess + ")");
        try {
            long currentTimeMillis = System.currentTimeMillis();
            document = DOMUtils.parseText(FileUtils.readTextFile(str, "utf-8").replaceAll(apos, ap));
            String systemId = document.getDoctype().getSystemId();
            long currentTimeMillis2 = ((System.currentTimeMillis() - currentTimeMillis) + 999) / 1000;
            System.out.println("   Document loaded and parsed in " + Formatters.formatLongWithCommas(currentTimeMillis2) + StringUtils.pluralize(currentTimeMillis2, " second.", " seconds."));
            List<Node> children = DOMUtils.getChildren(DOMUtils.getChild(document, "TEI"), "text");
            long currentTimeMillis3 = System.currentTimeMillis();
            for (int i = 0; i < children.size(); i++) {
                traverse(children.get(i));
            }
            String replaceAll = DOMUtils.saveToString(document, systemId).replaceAll("&amp;ldquo", "&ldquo").replaceAll("&amp;rdquo", "&rdquo").replaceAll("&amp;lsquo", "&lsquo").replaceAll("&amp;rsquo", "&rsquo").replaceAll("&amp;apos", "&apos").replaceAll(ap, apos);
            long currentTimeMillis4 = ((System.currentTimeMillis() - currentTimeMillis3) + 999) / 1000;
            System.out.println("   Quotes fixed in " + Formatters.formatLongWithCommas(currentTimeMillis4) + StringUtils.pluralize(currentTimeMillis4, " second.", " seconds."));
            FileUtils.writeTextFile(new File(outputDirectory, FileNameUtils.stripPathName(str)).getCanonicalPath(), false, replaceAll, "utf-8");
        } catch (Exception e) {
            e.printStackTrace();
            System.out.println("   *** Failed");
        }
    }

    protected static void traverse(Node node) {
        String nodeName = node.getNodeName();
        String str = prevChar;
        if (isHardTag(nodeName)) {
            str = " ";
            prevChar = " ";
        } else if (isJumpTag(nodeName)) {
            prevChar = " ";
        }
        NodeList childNodes = node.getChildNodes();
        if (childNodes != null) {
            for (int i = 0; i < childNodes.getLength(); i++) {
                traverse(childNodes.item(i));
            }
        }
        if (node.getNodeType() != 3) {
            prevChar = str;
            return;
        }
        Text text = (Text) node;
        String data = text.getData();
        if (data == null || data.length() <= 0) {
            return;
        }
        String substring = data.substring(data.length() - 1);
        String repairQuotes = FixQuotes.repairQuotes(prevChar + data + " ", contractionsMatcher, contractions);
        text.setData(repairQuotes.substring(1, repairQuotes.length() - 1));
        prevChar = substring;
    }

    protected static int processFiles(String[] strArr) {
        String[] strArr2 = new String[strArr.length - 3];
        for (int i = 3; i < strArr.length; i++) {
            strArr2[i - 3] = strArr[i];
        }
        String[] expandFileNameWildcards = FileNameUtils.expandFileNameWildcards(strArr2);
        docsToProcess = expandFileNameWildcards.length;
        for (String str : expandFileNameWildcards) {
            processOneFile(str);
        }
        return expandFileNameWildcards.length;
    }

    protected static void terminate(int i, long j) {
        System.out.println("Processed " + Formatters.formatIntegerWithCommas(i) + " files in " + Formatters.formatLongWithCommas(j) + StringUtils.pluralize(j, " second.", " seconds."));
    }

    protected static boolean isSoftTag(String str) {
        return softTags.contains(str) || softTags.contains(str.toLowerCase());
    }

    protected static boolean isJumpTag(String str) {
        return jumpTags.contains(str) || jumpTags.contains(str.toLowerCase());
    }

    protected static boolean isHardTag(String str) {
        return (isSoftTag(str) || isJumpTag(str)) ? false : true;
    }

    protected FixXMLQuotes() {
    }
}
