package edu.northwestern.at.utils.corpuslinguistics.tokenizer;

import edu.northwestern.at.utils.IsCloseableObject;
import edu.northwestern.at.utils.PatternReplacer;
import edu.northwestern.at.utils.StringUtils;
import edu.northwestern.at.utils.logger.DummyLogger;
import edu.northwestern.at.utils.logger.Logger;
import edu.northwestern.at.utils.logger.UsesLogger;

/* loaded from: input_file:edu/northwestern/at/utils/corpuslinguistics/tokenizer/AbstractPreTokenizer.class */
public abstract class AbstractPreTokenizer extends IsCloseableObject implements PreTokenizer, UsesLogger {
    protected static final String periods = "(\\.{3,})";
    protected static final String asterisks = "([\\*]+)";
    protected static final String hyphens = "(-{2,})";
    protected Logger logger = new DummyLogger();
    protected static final String alwaysSeparators = "((-{2,})|(\\.{3,})|[\\(\\)\\[\\]\\{\\}\";:/=`¶<>“”—\\|¦❘[\\p{InGeneralPunctuation}&&[^•′″‴‘’‐‑]]\\p{InLetterlikeSymbols}\\p{InMathematicalOperators}\\p{InMiscellaneousTechnical}[\\p{InGeometricShapes}&&[^●]]\\p{InMiscellaneousSymbols}\\p{InDingbats}\\p{InAlphabeticPresentationForms}])";
    protected static PatternReplacer alwaysSeparatorsReplacer = new PatternReplacer(alwaysSeparators, " $1 ");
    protected static final String commaSeparator = "(,)([^0-9])";
    protected static PatternReplacer commaSeparatorReplacer = new PatternReplacer(commaSeparator, " $1 $2");

    @Override // edu.northwestern.at.utils.logger.UsesLogger
    public Logger getLogger() {
        return this.logger;
    }

    @Override // edu.northwestern.at.utils.logger.UsesLogger
    public void setLogger(Logger logger) {
        this.logger = logger;
    }

    @Override // edu.northwestern.at.utils.corpuslinguistics.tokenizer.PreTokenizer
    public String pretokenize(String str) {
        return commaSeparatorReplacer.replace(alwaysSeparatorsReplacer.replace(StringUtils.replaceAll(str, "\t", " ")));
    }
}
