package edu.stanford.nlp.pipeline;

import edu.stanford.nlp.ie.AbstractSequenceClassifier;
import edu.stanford.nlp.ie.crf.CRFClassifier;
import edu.stanford.nlp.ie.pascal.PascalTemplate;
import edu.stanford.nlp.ling.ChineseCoreAnnotations;
import edu.stanford.nlp.ling.CoreAnnotations;
import edu.stanford.nlp.ling.CoreLabel;
import edu.stanford.nlp.pipeline.Annotator;
import edu.stanford.nlp.tagger.maxent.TaggerConfig;
import edu.stanford.nlp.util.CoreMap;
import edu.stanford.nlp.util.PropertiesUtils;
import edu.stanford.nlp.util.Timing;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;
import java.util.Properties;
import java.util.Set;

/* loaded from: input_file:edu/stanford/nlp/pipeline/ChineseSegmenterAnnotator.class */
public class ChineseSegmenterAnnotator implements Annotator {
    private AbstractSequenceClassifier<?> segmenter;
    private Timing timer;
    private static long millisecondsAnnotating = 0;
    private boolean VERBOSE;
    private static final String DEFAULT_SEG_LOC = "/u/nlp/data/gale/segtool/stanford-seg/classifiers-2010/05202008-ctb6.processed-chris6.lex.gz";
    private static final String DEFAULT_SER_DICTIONARY = "/u/nlp/data/gale/segtool/stanford-seg/classifiers/dict-chris6.ser.gz";
    private static final String DEFAULT_SIGHAN_CORPORA_DICT = "/u/nlp/data/gale/segtool/stanford-seg/releasedata";

    public ChineseSegmenterAnnotator() {
        this(DEFAULT_SEG_LOC, false);
    }

    public ChineseSegmenterAnnotator(boolean z) {
        this(DEFAULT_SEG_LOC, z);
    }

    public ChineseSegmenterAnnotator(String str, boolean z) {
        this(str, z, DEFAULT_SER_DICTIONARY, DEFAULT_SIGHAN_CORPORA_DICT);
    }

    public ChineseSegmenterAnnotator(String str, boolean z, String str2, String str3) {
        this.segmenter = null;
        this.timer = new Timing();
        this.VERBOSE = false;
        this.VERBOSE = z;
        Properties properties = new Properties();
        properties.setProperty("serDictionary", str2);
        properties.setProperty("sighanCorporaDict", str3);
        loadModel(str, properties);
    }

    public ChineseSegmenterAnnotator(String str, Properties properties) {
        this.segmenter = null;
        this.timer = new Timing();
        this.VERBOSE = false;
        String str2 = null;
        Properties properties2 = new Properties();
        for (String str3 : properties.stringPropertyNames()) {
            if (str3.startsWith(str + ".")) {
                String substring = str3.substring(str.length() + 1);
                if (substring.equals("model")) {
                    str2 = properties.getProperty(str3);
                } else {
                    properties2.setProperty(substring, properties.getProperty(str3));
                }
            }
        }
        this.VERBOSE = PropertiesUtils.getBool(properties, str + ".verbose", true);
        if (str2 == null) {
            throw new RuntimeException("Expected a property " + str + ".model");
        }
        loadModel(str2, properties2);
    }

    private void loadModel(String str) {
        if (this.VERBOSE) {
            this.timer.start();
            System.err.print("Loading Segmentation Model [" + str + "]...");
        }
        this.segmenter = CRFClassifier.getClassifierNoExceptions(str);
        if (this.VERBOSE) {
            this.timer.stop("done.");
        }
    }

    private void loadModel(String str, Properties properties) {
        if (this.VERBOSE) {
            this.timer.start();
            System.err.print("Loading Segmentation Model [" + str + "]...");
        }
        try {
            this.segmenter = CRFClassifier.getClassifier(str, properties);
            if (this.VERBOSE) {
                this.timer.stop("done.");
            }
        } catch (RuntimeException e) {
            throw e;
        } catch (Exception e2) {
            throw new RuntimeException(e2);
        }
    }

    @Override // edu.stanford.nlp.pipeline.Annotator
    public void annotate(Annotation annotation) {
        if (this.VERBOSE) {
            this.timer.start();
            System.err.print("Adding Segmentation annotation...");
        }
        List list = (List) annotation.get(CoreAnnotations.SentencesAnnotation.class);
        if (list != null) {
            Iterator it = list.iterator();
            while (it.hasNext()) {
                doOneSentence((CoreMap) it.next());
            }
        } else {
            doOneSentence(annotation);
        }
        if (this.VERBOSE) {
            millisecondsAnnotating += this.timer.stop("done.");
        }
    }

    public void doOneSentence(CoreMap coreMap) {
        splitCharacters(coreMap);
        runSegmentation(coreMap);
    }

    public void splitCharacters(CoreMap coreMap) {
        boolean z;
        String str = (String) coreMap.get(CoreAnnotations.TextAnnotation.class);
        boolean z2 = true;
        ArrayList arrayList = new ArrayList();
        for (int i = 0; i < str.length(); i++) {
            CoreLabel coreLabel = new CoreLabel();
            String str2 = new String(new char[]{str.charAt(i)});
            if (Character.isWhitespace(str.charAt(i)) || Character.isISOControl(str.charAt(i))) {
                z = true;
            } else {
                coreLabel.set(CoreAnnotations.ChineseCharAnnotation.class, str2);
                if (z2) {
                    coreLabel.set(CoreAnnotations.ChineseSegAnnotation.class, TaggerConfig.NTHREADS);
                } else {
                    coreLabel.set(CoreAnnotations.ChineseSegAnnotation.class, PascalTemplate.BACKGROUND_SYMBOL);
                }
                coreLabel.set(CoreAnnotations.CharacterOffsetBeginAnnotation.class, Integer.valueOf(i));
                coreLabel.set(CoreAnnotations.CharacterOffsetEndAnnotation.class, Integer.valueOf(i + 1));
                arrayList.add(coreLabel);
                z = false;
            }
            z2 = z;
        }
        coreMap.set(ChineseCoreAnnotations.CharactersAnnotation.class, arrayList);
        if (this.VERBOSE) {
            System.err.println("output: " + arrayList);
        }
    }

    public void runSegmentation(CoreMap coreMap) {
        String str = (String) coreMap.get(CoreAnnotations.TextAnnotation.class);
        List list = (List) coreMap.get(ChineseCoreAnnotations.CharactersAnnotation.class);
        ArrayList arrayList = new ArrayList();
        coreMap.set(CoreAnnotations.TokensAnnotation.class, arrayList);
        List<String> segmentString = this.segmenter.segmentString(str);
        if (this.VERBOSE) {
            System.err.println(str);
            System.err.println("--->");
            System.err.println(segmentString);
        }
        int i = 0;
        for (String str2 : segmentString) {
            CoreLabel coreLabel = (CoreLabel) list.get(i);
            coreLabel.set(CoreAnnotations.ChineseSegAnnotation.class, TaggerConfig.NTHREADS);
            if (str2.length() != 0) {
                CoreLabel coreLabel2 = new CoreLabel();
                coreLabel2.setWord(str2);
                coreLabel2.set(CoreAnnotations.CharacterOffsetBeginAnnotation.class, coreLabel.get(CoreAnnotations.CharacterOffsetBeginAnnotation.class));
                i += str2.length();
                coreLabel2.set(CoreAnnotations.CharacterOffsetEndAnnotation.class, ((CoreLabel) list.get(i - 1)).get(CoreAnnotations.CharacterOffsetEndAnnotation.class));
                arrayList.add(coreLabel2);
            }
        }
    }

    @Override // edu.stanford.nlp.pipeline.Annotator
    public Set<Annotator.Requirement> requires() {
        return Collections.emptySet();
    }

    @Override // edu.stanford.nlp.pipeline.Annotator
    public Set<Annotator.Requirement> requirementsSatisfied() {
        return Collections.singleton(TOKENIZE_REQUIREMENT);
    }
}
