package com.intel.analytics.bigdl.models.rnn;

import com.intel.analytics.bigdl.dataset.DataSet$;
import com.intel.analytics.bigdl.dataset.text.Dictionary;
import com.intel.analytics.bigdl.dataset.text.Dictionary$;
import com.intel.analytics.bigdl.dataset.text.SentenceBiPadding$;
import com.intel.analytics.bigdl.dataset.text.SentenceSplitter;
import com.intel.analytics.bigdl.dataset.text.SentenceSplitter$;
import com.intel.analytics.bigdl.dataset.text.SentenceTokenizer;
import com.intel.analytics.bigdl.dataset.text.SentenceTokenizer$;
import java.io.File;
import org.apache.spark.SparkContext;
import org.apache.spark.rdd.RDD;
import scala.Array$;
import scala.Option;
import scala.Predef$;
import scala.Tuple4;
import scala.collection.Iterator;
import scala.collection.mutable.ArrayBuffer;
import scala.collection.mutable.ArrayOps;
import scala.io.Codec$;
import scala.io.Source$;
import scala.reflect.ClassTag$;
import scala.runtime.BoxedUnit;
import scala.runtime.BoxesRunTime;
import scala.runtime.ScalaRunTime$;

/* compiled from: Utils.scala */
/* loaded from: input_file:com/intel/analytics/bigdl/models/rnn/SequencePreprocess$.class */
public final class SequencePreprocess$ {
    public static SequencePreprocess$ MODULE$;

    static {
        new SequencePreprocess$();
    }

    public RDD<String[]> apply(String str, SparkContext sparkContext, Option<String> option, Option<String> option2) {
        SentenceSplitter apply = SentenceSplitter$.MODULE$.apply(option);
        SentenceTokenizer apply2 = SentenceTokenizer$.MODULE$.apply(option2);
        RDD flatMap = DataSet$.MODULE$.array(load(str), sparkContext, ClassTag$.MODULE$.apply(String.class)).transform(apply, ClassTag$.MODULE$.apply(ScalaRunTime$.MODULE$.arrayClass(String.class))).toDistributed().data(false).flatMap(strArr -> {
            return new ArrayOps.ofRef($anonfun$apply$1(strArr));
        }, ClassTag$.MODULE$.apply(String.class));
        RDD mapPartitions = flatMap.mapPartitions(iterator -> {
            return SentenceBiPadding$.MODULE$.apply(SentenceBiPadding$.MODULE$.apply$default$1(), SentenceBiPadding$.MODULE$.apply$default$2()).apply(iterator);
        }, flatMap.mapPartitions$default$2(), ClassTag$.MODULE$.apply(String.class));
        return mapPartitions.mapPartitions(iterator2 -> {
            return apply2.apply(iterator2);
        }, mapPartitions.mapPartitions$default$2(), ClassTag$.MODULE$.apply(ScalaRunTime$.MODULE$.arrayClass(String.class)));
    }

    public Tuple4<float[], float[], float[], Dictionary> apply(String str, int i) {
        String file = new File(str, "ptb.train.txt").toString();
        String file2 = new File(str, "ptb.valid.txt").toString();
        String file3 = new File(str, "ptb.test.txt").toString();
        Dictionary apply = Dictionary$.MODULE$.apply((String[]) readWords(file).toArray(ClassTag$.MODULE$.apply(String.class)), i - 1);
        return new Tuple4<>(fileToWordIdx(file, apply).toArray(ClassTag$.MODULE$.Float()), fileToWordIdx(file2, apply).toArray(ClassTag$.MODULE$.Float()), fileToWordIdx(file3, apply).toArray(ClassTag$.MODULE$.Float()), apply);
    }

    /* JADX WARN: Multi-variable type inference failed */
    public float[][] reader(float[] fArr, int i) {
        int i2 = 0;
        int length = (fArr.length - 1) - i;
        ArrayBuffer arrayBuffer = new ArrayBuffer();
        while (i2 <= length) {
            float[] fArr2 = new float[i + 1];
            Array$.MODULE$.copy(fArr, i2, fArr2, 0, i + 1);
            arrayBuffer.append(Predef$.MODULE$.wrapRefArray((Object[]) new float[]{fArr2}));
            i2 += i;
        }
        return (float[][]) arrayBuffer.toArray(ClassTag$.MODULE$.apply(ScalaRunTime$.MODULE$.arrayClass(Float.TYPE)));
    }

    public Iterator<Object> fileToWordIdx(String str, Dictionary dictionary) {
        return readWords(str).map(str2 -> {
            return BoxesRunTime.boxToFloat($anonfun$fileToWordIdx$1(dictionary, str2));
        });
    }

    public Iterator<String> readWords(String str) {
        ArrayBuffer arrayBuffer = new ArrayBuffer();
        Source$.MODULE$.fromFile(str, Codec$.MODULE$.fallbackSystemCodec()).getLines().foreach(str2 -> {
            $anonfun$readWords$1(arrayBuffer, str2);
            return BoxedUnit.UNIT;
        });
        BoxedUnit boxedUnit = BoxedUnit.UNIT;
        return arrayBuffer.toIterator();
    }

    public String[] load(String str) {
        Predef$.MODULE$.require(new File(str).exists(), () -> {
            return new StringBuilder(22).append("data file ").append(str).append(" not exists!").toString();
        });
        return (String[]) Source$.MODULE$.fromFile(str, Codec$.MODULE$.fallbackSystemCodec()).getLines().toArray(ClassTag$.MODULE$.apply(String.class));
    }

    public static final /* synthetic */ Object[] $anonfun$apply$1(String[] strArr) {
        return Predef$.MODULE$.refArrayOps(strArr);
    }

    public static final /* synthetic */ float $anonfun$fileToWordIdx$1(Dictionary dictionary, String str) {
        return dictionary.getIndex(str) + 1.0f;
    }

    public static final /* synthetic */ void $anonfun$readWords$2(ArrayBuffer arrayBuffer, String str) {
        arrayBuffer.append(Predef$.MODULE$.wrapRefArray(new String[]{str}));
    }

    public static final /* synthetic */ void $anonfun$readWords$1(ArrayBuffer arrayBuffer, String str) {
        new ArrayOps.ofRef(Predef$.MODULE$.refArrayOps(str.split(" "))).foreach(str2 -> {
            $anonfun$readWords$2(arrayBuffer, str2);
            return BoxedUnit.UNIT;
        });
        BoxedUnit boxedUnit = BoxedUnit.UNIT;
        arrayBuffer.append(Predef$.MODULE$.wrapRefArray(new String[]{"<eos>"}));
    }

    private SequencePreprocess$() {
        MODULE$ = this;
    }
}
