package edu.stanford.nlp.process;

import edu.stanford.nlp.ling.CoreAnnotations$WordAnnotation;
import edu.stanford.nlp.ling.Document;
import edu.stanford.nlp.ling.HasWord;
import edu.stanford.nlp.util.CoreMap;
import edu.stanford.nlp.util.Generics;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import java.util.regex.Pattern;

/* loaded from: input_file:edu/stanford/nlp/process/WordToSentenceProcessor.class */
public class WordToSentenceProcessor<IN> implements ListProcessor<IN, List<IN>> {
    private static final boolean DEBUG = false;
    private Set<String> sentenceBoundaryTokens;
    private Set<String> sentenceBoundaryFollowers;
    private Set<String> sentenceBoundaryToDiscard;
    private Pattern sentenceRegionBeginPattern;
    private Pattern sentenceRegionEndPattern;

    @Override // edu.stanford.nlp.process.ListProcessor
    public List<List<IN>> process(List<? extends IN> list) {
        String str;
        ArrayList newArrayList = Generics.newArrayList();
        ArrayList arrayList = null;
        ArrayList arrayList2 = null;
        boolean z = false;
        for (IN in : list) {
            if (in instanceof HasWord) {
                str = ((HasWord) in).word();
            } else if (in instanceof String) {
                str = (String) in;
            } else {
                if (!(in instanceof CoreMap)) {
                    throw new RuntimeException("Expected token to be either Word or String.");
                }
                str = (String) ((CoreMap) in).get(CoreAnnotations$WordAnnotation.class);
            }
            if (arrayList == null) {
                arrayList = new ArrayList();
            }
            if (this.sentenceRegionBeginPattern == null || z) {
                if (this.sentenceBoundaryFollowers.contains(str) && arrayList2 != null && arrayList.isEmpty()) {
                    arrayList2.add(in);
                } else {
                    boolean z2 = false;
                    if (this.sentenceBoundaryToDiscard.contains(str)) {
                        z2 = true;
                    } else if (this.sentenceRegionEndPattern != null && this.sentenceRegionEndPattern.matcher(str).matches()) {
                        z = false;
                        z2 = true;
                    } else if (this.sentenceBoundaryTokens.contains(str)) {
                        arrayList.add(in);
                        z2 = true;
                    } else {
                        arrayList.add(in);
                    }
                    if (z2 && arrayList.size() > 0) {
                        newArrayList.add(arrayList);
                        arrayList2 = arrayList;
                        arrayList = null;
                    }
                }
            } else if (this.sentenceRegionBeginPattern.matcher(str).matches()) {
                z = true;
            }
        }
        if (arrayList != null && arrayList.size() > 0) {
            newArrayList.add(arrayList);
        }
        return newArrayList;
    }

    public <L, F> Document<L, F, List<IN>> processDocument(Document<L, F, IN> document) {
        Document<L, F, List<IN>> document2 = (Document<L, F, List<IN>>) document.blankDocument();
        document2.addAll(process(document));
        return document2;
    }

    public WordToSentenceProcessor() {
        this(new HashSet(Arrays.asList(".", "?", "!")));
    }

    public WordToSentenceProcessor(Set<String> set) {
        this(set, Generics.newHashSet(Arrays.asList(")", "]", "\"", "'", PTBLexer.closedblquote, PTBLexer.closeparen, "-RSB-", PTBLexer.closebrace)));
    }

    public WordToSentenceProcessor(Set<String> set, Set<String> set2) {
        this(set, set2, Collections.singleton("\n"));
    }

    public WordToSentenceProcessor(Set<String> set, Set<String> set2, Set<String> set3) {
        this(set, set2, set3, null, null);
    }

    public WordToSentenceProcessor(Pattern pattern, Pattern pattern2) {
        this(Collections.emptySet(), Collections.emptySet(), Collections.emptySet(), pattern, pattern2);
    }

    private WordToSentenceProcessor(Set<String> set, Set<String> set2, Set<String> set3, Pattern pattern, Pattern pattern2) {
        this.sentenceBoundaryTokens = set;
        this.sentenceBoundaryFollowers = set2;
        this.sentenceBoundaryToDiscard = set3;
        this.sentenceRegionBeginPattern = pattern;
        this.sentenceRegionEndPattern = pattern2;
    }
}
