package edu.northwestern.at.morphadorner.tools.addcharacteroffsets;

import edu.northwestern.at.utils.CharUtils;
import edu.northwestern.at.utils.FileUtils;
import edu.northwestern.at.utils.PatternReplacer;
import edu.northwestern.at.utils.UnicodeReader;
import java.io.BufferedOutputStream;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.OutputStreamWriter;
import java.io.PrintWriter;

/* loaded from: input_file:edu/northwestern/at/morphadorner/tools/addcharacteroffsets/AddCharacterOffsets.class */
public class AddCharacterOffsets {
    protected static final int LEFT = 1;
    protected static final int ATTRS = 2;
    protected static final int WORD = 3;
    protected static final int RIGHT = 4;
    protected static final int CLEFT = 1;
    protected static final int CDATA = 2;
    protected static final int CRIGHT = 3;
    protected static final int MAXLINEWIDTH = 80;
    protected static final String LINE_SEPARATOR = System.getProperty("line.separator");
    protected static String wPattern = "^(.*)<w (.*)>(.*)</w>(.*)$";
    protected static PatternReplacer wreplacer = new PatternReplacer(wPattern, "");
    protected static String cPattern = "^(.*)<c>(.*)</c>$";
    protected static PatternReplacer creplacer = new PatternReplacer(cPattern, "");

    public static void main(String[] strArr) {
        if (strArr.length >= 3) {
            new AddCharacterOffsets(strArr);
        } else {
            displayUsage();
            System.exit(1);
        }
    }

    public static void displayUsage() {
        System.out.println();
        System.out.println("Usage:");
        System.out.println();
        System.out.println("java edu.northwestern.at.morphadorner.tools.addcharacteroffsets.AddCharacterOffsets adornedinput.xml adornedoutput.xml unadornedoutput.xml");
        System.out.println();
        System.out.println("adornedinput.xml -- Standard MorphAdorner adorned output file.");
        System.out.println("adornedoutput.xml -- Derived adorned file with character");
        System.out.println("offsets added to tags.");
        System.out.println("unadornedoutput.xml -- Derived unadorned file whose word");
        System.out.println("offsets are given in adornedoutput.xml file.");
        System.out.println();
        System.out.println("The derived adorned output file adornedoutput.xml adds a");
        System.out.println("cof= attribute to each <w> tag. The cof= attribute");
        System.out.println("specifies the character (not byte) offset of each word in");
        System.out.println("the unadornedoutput.xml file. The latter file removes the");
        System.out.println("<w> and <c> tags from the adorned input file and outputs");
        System.out.println("the word and whitespace text as specified by the <w> and");
        System.out.println("<c> tags.");
    }

    public AddCharacterOffsets(String[] strArr) {
        String str = strArr[0];
        String str2 = strArr[1];
        String str3 = strArr[2];
        try {
            BufferedReader bufferedReader = new BufferedReader(new UnicodeReader(new FileInputStream(new File(str)), "utf-8"));
            PrintWriter printWriter = new PrintWriter(new OutputStreamWriter(new BufferedOutputStream(new FileOutputStream(str2, false)), "utf-8"));
            String readLine = bufferedReader.readLine();
            StringBuffer stringBuffer = new StringBuffer();
            boolean z = false;
            boolean z2 = false;
            int i = 0;
            int i2 = 0;
            boolean z3 = true;
            while (readLine != null) {
                int indexOf = readLine.indexOf("<w ");
                int indexOf2 = readLine.indexOf("<c>");
                if (indexOf >= 0) {
                    String[] matchGroups = wreplacer.matchGroups(readLine);
                    String str4 = matchGroups[3];
                    if (z2) {
                        if (!CharUtils.isPunctuation(str4) || z3) {
                            stringBuffer.append(matchGroups[1]);
                            z2 = false;
                            i += matchGroups[1].length();
                        } else {
                            stringBuffer.setLength(stringBuffer.length() - LINE_SEPARATOR.length());
                            i = i2;
                        }
                    }
                    readLine = matchGroups[1] + "<w " + matchGroups[2] + " cof=\"" + stringBuffer.length() + "\">" + matchGroups[3] + "</w>" + matchGroups[4];
                    stringBuffer.append(str4);
                    i += str4.length();
                    z = true;
                    if (i > MAXLINEWIDTH) {
                        stringBuffer.append(LINE_SEPARATOR);
                        i2 = i;
                        i = 0;
                        z2 = true;
                    }
                    z3 = false;
                } else if (indexOf2 >= 0) {
                    String[] matchGroups2 = creplacer.matchGroups(readLine);
                    if (z2) {
                        stringBuffer.append(matchGroups2[1]);
                        z2 = false;
                        i += matchGroups2[1].length();
                    } else {
                        stringBuffer.append(matchGroups2[2]);
                    }
                    i += matchGroups2[2].length();
                    z = true;
                    if (i > MAXLINEWIDTH) {
                        stringBuffer.append(LINE_SEPARATOR);
                        i2 = i;
                        i = 0;
                        z2 = true;
                    }
                } else {
                    if (z) {
                        stringBuffer.append(LINE_SEPARATOR);
                        z = false;
                    }
                    stringBuffer.append(readLine);
                    stringBuffer.append(LINE_SEPARATOR);
                    z2 = true;
                    i2 = 0;
                    i = 0;
                    z3 = true;
                }
                printWriter.println(readLine);
                readLine = bufferedReader.readLine();
            }
            bufferedReader.close();
            printWriter.close();
            FileUtils.writeTextFile(str3, false, stringBuffer.toString(), "utf-8");
        } catch (Exception e) {
            e.printStackTrace();
        }
    }
}
