package com.intel.analytics.bigdl.example.udfpredictor;

import com.intel.analytics.bigdl.example.udfpredictor.Utils;
import com.intel.analytics.bigdl.example.utils.TextClassifier;
import com.intel.analytics.bigdl.example.utils.WordMeta;
import com.intel.analytics.bigdl.models.utils.ModelBroadcast;
import com.intel.analytics.bigdl.models.utils.ModelBroadcast$;
import com.intel.analytics.bigdl.nn.Module$;
import com.intel.analytics.bigdl.nn.abstractnn.AbstractModule;
import com.intel.analytics.bigdl.nn.abstractnn.Activity;
import com.intel.analytics.bigdl.tensor.ConvertableFrom$ConvertableFromFloat$;
import com.intel.analytics.bigdl.tensor.ConvertableTo$ConvertableToInt$;
import com.intel.analytics.bigdl.tensor.Storage$;
import com.intel.analytics.bigdl.tensor.Tensor;
import com.intel.analytics.bigdl.tensor.Tensor$;
import com.intel.analytics.bigdl.tensor.TensorNumericMath;
import java.io.File;
import java.io.PrintWriter;
import org.apache.commons.lang3.StringUtils;
import org.apache.spark.SparkContext;
import org.apache.spark.broadcast.Broadcast;
import org.apache.spark.rdd.RDD;
import scala.Array$;
import scala.Function1;
import scala.MatchError;
import scala.None$;
import scala.Option;
import scala.Option$;
import scala.Predef$;
import scala.Some;
import scala.Tuple2;
import scala.Tuple3;
import scala.Tuple4;
import scala.collection.IndexedSeq;
import scala.collection.immutable.Map;
import scala.collection.immutable.StringOps;
import scala.collection.mutable.ArrayOps;
import scala.io.BufferedSource;
import scala.io.Codec$;
import scala.io.Source$;
import scala.math.Numeric$IntIsIntegral$;
import scala.math.Ordering$;
import scala.reflect.ClassTag$;
import scala.runtime.BoxedUnit;
import scala.runtime.BoxesRunTime;
import scala.runtime.ScalaRunTime$;
import scopt.OptionParser;
import scopt.Read$;

/* compiled from: Utils.scala */
/* loaded from: input_file:com/intel/analytics/bigdl/example/udfpredictor/Utils$.class */
public final class Utils$ {
    public static Utils$ MODULE$;
    private TextClassifier textClassification;
    private final OptionParser<TextClassificationUDFParams> localParser;

    static {
        new Utils$();
    }

    private TextClassifier textClassification() {
        return this.textClassification;
    }

    private void textClassification_$eq(TextClassifier textClassifier) {
        this.textClassification = textClassifier;
    }

    public TextClassifier getTextClassifier(TextClassificationUDFParams textClassificationUDFParams) {
        if (textClassification() == null) {
            textClassification_$eq(new TextClassifier(textClassificationUDFParams));
        }
        return textClassification();
    }

    public Tuple4<AbstractModule<Activity, Activity, Object>, Option<Map<String, WordMeta>>, Option<Map<Object, float[]>>, int[]> getModel(SparkContext sparkContext, TextClassificationUDFParams textClassificationUDFParams) {
        TextClassifier textClassifier = getTextClassifier(textClassificationUDFParams);
        if (textClassificationUDFParams.modelPath().isDefined()) {
            return new Tuple4<>(Module$.MODULE$.load((String) textClassificationUDFParams.modelPath().get(), ClassTag$.MODULE$.Float()), None$.MODULE$, None$.MODULE$, new int[]{textClassificationUDFParams.maxSequenceLength(), textClassificationUDFParams.embeddingDim()});
        }
        Tuple3<RDD<Tuple2<float[][], Object>>[], Map<String, WordMeta>, Map<Object, float[]>> data = textClassifier.getData(sparkContext);
        if (data == null) {
            throw new MatchError(data);
        }
        Tuple3 tuple3 = new Tuple3((RDD[]) data._1(), (Map) data._2(), (Map) data._3());
        RDD<Tuple2<float[][], Object>>[] rddArr = (RDD[]) tuple3._1();
        Map map = (Map) tuple3._2();
        Map map2 = (Map) tuple3._3();
        sparkContext.parallelize(map.mapValues(wordMeta -> {
            return BoxesRunTime.boxToInteger(wordMeta.index());
        }).toSeq(), sparkContext.parallelize$default$2(), ClassTag$.MODULE$.apply(Tuple2.class)).saveAsTextFile(new StringBuilder(14).append(textClassificationUDFParams.baseDir()).append("/word2Meta.txt").toString());
        AbstractModule<Activity, Activity, Object> trainFromData = textClassifier.trainFromData(sparkContext, rddArr);
        if (textClassificationUDFParams.checkpoint().isDefined()) {
            trainFromData.save(new StringBuilder(8).append(textClassificationUDFParams.checkpoint().get()).append("/model.1").toString(), true);
        } else {
            BoxedUnit boxedUnit = BoxedUnit.UNIT;
        }
        return new Tuple4<>(trainFromData.evaluate2(), new Some(map), new Some(map2), new int[]{textClassificationUDFParams.maxSequenceLength(), textClassificationUDFParams.embeddingDim()});
    }

    public Map<Object, float[]> getWord2Vec(Map<String, Object> map) {
        return textClassification().buildWord2VecWithIndex(map);
    }

    public Function1<String, Object> genUdf(SparkContext sparkContext, AbstractModule<Activity, Activity, Object> abstractModule, int[] iArr, Map<String, Object> map, Map<Object, float[]> map2, TensorNumericMath.TensorNumeric<Object> tensorNumeric) {
        ModelBroadcast broadcast = ModelBroadcast$.MODULE$.apply(ClassTag$.MODULE$.Float(), tensorNumeric).broadcast(sparkContext, abstractModule);
        Broadcast broadcast2 = sparkContext.broadcast(map, ClassTag$.MODULE$.apply(Map.class));
        Broadcast broadcast3 = sparkContext.broadcast(map2, ClassTag$.MODULE$.apply(Map.class));
        return str -> {
            return BoxesRunTime.boxToInteger($anonfun$genUdf$1(iArr, broadcast2, broadcast3, tensorNumeric, broadcast, str));
        };
    }

    public IndexedSeq<Utils.Sample> loadTestData(String str) {
        return Predef$.MODULE$.wrapRefArray((Utils.Sample[]) new ArrayOps.ofRef(Predef$.MODULE$.refArrayOps((File[]) new ArrayOps.ofRef(Predef$.MODULE$.refArrayOps((Object[]) new ArrayOps.ofRef(Predef$.MODULE$.refArrayOps((Object[]) new ArrayOps.ofRef(Predef$.MODULE$.refArrayOps(new File(str).listFiles())).filter(file -> {
            return BoxesRunTime.boxToBoolean(file.isFile());
        }))).filter(file2 -> {
            return BoxesRunTime.boxToBoolean($anonfun$loadTestData$2(file2));
        }))).sorted(Ordering$.MODULE$.ordered(Predef$.MODULE$.$conforms())))).map(file3 -> {
            String name = file3.getName();
            BufferedSource fromFile = Source$.MODULE$.fromFile(file3, "ISO-8859-1");
            try {
                String mkString = fromFile.getLines().toList().mkString("\n");
                fromFile.close();
                return new Utils.Sample(name, mkString);
            } catch (Throwable th) {
                fromFile.close();
                throw th;
            }
        }, Array$.MODULE$.canBuildFrom(ClassTag$.MODULE$.apply(Utils.Sample.class))));
    }

    public String getResourcePath(String str) {
        String mkString = Source$.MODULE$.fromInputStream(getClass().getResourceAsStream(str), Codec$.MODULE$.fallbackSystemCodec()).mkString();
        File createTempFile = File.createTempFile(str, "");
        PrintWriter printWriter = new PrintWriter(createTempFile);
        printWriter.write(mkString);
        printWriter.close();
        return createTempFile.getAbsolutePath();
    }

    public OptionParser<TextClassificationUDFParams> localParser() {
        return this.localParser;
    }

    public static final /* synthetic */ boolean $anonfun$genUdf$2(String str) {
        return str.length() > 2;
    }

    public static final /* synthetic */ float[] $anonfun$genUdf$6(Map map, int i, float f) {
        return map.contains(BoxesRunTime.boxToFloat(f)) ? (float[]) map.apply(BoxesRunTime.boxToFloat(f)) : (float[]) Array$.MODULE$.fill(i, () -> {
            return 0.0f;
        }, ClassTag$.MODULE$.Float());
    }

    public static final /* synthetic */ int $anonfun$genUdf$1(int[] iArr, Broadcast broadcast, Broadcast broadcast2, TensorNumericMath.TensorNumeric tensorNumeric, ModelBroadcast modelBroadcast, String str) {
        float[] fArr;
        int i = iArr[0];
        int i2 = iArr[1];
        Map map = (Map) broadcast.value();
        Map map2 = (Map) broadcast2.value();
        float[] fArr2 = (float[]) new ArrayOps.ofRef(Predef$.MODULE$.refArrayOps((Object[]) new ArrayOps.ofRef(Predef$.MODULE$.refArrayOps((Object[]) new ArrayOps.ofRef(Predef$.MODULE$.refArrayOps(str.replaceAll("[^a-zA-Z]", StringUtils.SPACE).toLowerCase().split("\\s+"))).filter(str2 -> {
            return BoxesRunTime.boxToBoolean($anonfun$genUdf$2(str2));
        }))).map(str3 -> {
            return map.contains(str3) ? new Some(BoxesRunTime.boxToFloat(BoxesRunTime.unboxToInt(map.apply(str3)))) : None$.MODULE$;
        }, Array$.MODULE$.canBuildFrom(ClassTag$.MODULE$.apply(Option.class))))).flatten(option -> {
            return Option$.MODULE$.option2Iterable(option);
        }, ClassTag$.MODULE$.Float());
        float[] fArr3 = (float[]) new ArrayOps.ofRef(Predef$.MODULE$.refArrayOps((Object[]) new ArrayOps.ofFloat(Predef$.MODULE$.floatArrayOps(fArr2.length > i ? (float[]) new ArrayOps.ofFloat(Predef$.MODULE$.floatArrayOps(fArr2)).slice(fArr2.length - i, fArr2.length) : (float[]) new ArrayOps.ofFloat(Predef$.MODULE$.floatArrayOps(fArr2)).$plus$plus(new ArrayOps.ofFloat(Predef$.MODULE$.floatArrayOps((float[]) Array$.MODULE$.fill(i - fArr2.length, () -> {
            return 0.0f;
        }, ClassTag$.MODULE$.Float()))), Array$.MODULE$.canBuildFrom(ClassTag$.MODULE$.Float())))).map(obj -> {
            return $anonfun$genUdf$6(map2, i2, BoxesRunTime.unboxToFloat(obj));
        }, Array$.MODULE$.canBuildFrom(ClassTag$.MODULE$.apply(ScalaRunTime$.MODULE$.arrayClass(Float.TYPE)))))).flatten(fArr4 -> {
            return Predef$.MODULE$.wrapFloatArray(fArr4);
        }, ClassTag$.MODULE$.Float());
        Tensor apply = Tensor$.MODULE$.apply(ClassTag$.MODULE$.Float(), tensorNumeric);
        float[] fArr5 = null;
        int unboxToInt = BoxesRunTime.unboxToInt(new ArrayOps.ofInt(Predef$.MODULE$.intArrayOps(iArr)).product(Numeric$IntIsIntegral$.MODULE$));
        AbstractModule value = modelBroadcast.value(modelBroadcast.value$default$1(), modelBroadcast.value$default$2());
        if (0 == 0) {
            fArr5 = new float[1 * unboxToInt];
        }
        Array$.MODULE$.copy(new ArrayOps.ofFloat(Predef$.MODULE$.floatArrayOps(fArr3)).map(f -> {
            return tensorNumeric.fromType$mcF$sp(BoxesRunTime.boxToFloat(f), ConvertableFrom$ConvertableFromFloat$.MODULE$);
        }, Array$.MODULE$.canBuildFrom(ClassTag$.MODULE$.Float())), 0, fArr5, 0, unboxToInt);
        apply.set(Storage$.MODULE$.apply(fArr5, ClassTag$.MODULE$.Float()), apply.set$default$2(), (int[]) new ArrayOps.ofInt(Predef$.MODULE$.intArrayOps(new int[]{1})).$plus$plus(new ArrayOps.ofInt(Predef$.MODULE$.intArrayOps(iArr)), Array$.MODULE$.canBuildFrom(ClassTag$.MODULE$.Int())), apply.set$default$4());
        Tensor tensor = value.forward(apply.transpose(2, 3)).toTensor(tensorNumeric);
        if (tensor.dim() == 2) {
            fArr = (float[]) ((Tensor) tensor.max(2)._2()).squeeze().storage().array();
        } else {
            if (tensor.dim() != 1) {
                throw new IllegalArgumentException();
            }
            fArr = (float[]) ((Tensor) tensor.max(1)._2()).squeeze().storage().array();
        }
        return BoxesRunTime.unboxToInt(tensorNumeric.toType$mcF$sp(fArr[0], ConvertableTo$ConvertableToInt$.MODULE$));
    }

    public static final /* synthetic */ boolean $anonfun$loadTestData$2(File file) {
        return new StringOps(Predef$.MODULE$.augmentString(file.getName())).forall(obj -> {
            return BoxesRunTime.boxToBoolean(Character.isDigit(BoxesRunTime.unboxToChar(obj)));
        });
    }

    private Utils$() {
        MODULE$ = this;
        this.textClassification = null;
        this.localParser = new OptionParser<TextClassificationUDFParams>() { // from class: com.intel.analytics.bigdl.example.udfpredictor.Utils$$anon$1
            {
                opt('b', "baseDir", Read$.MODULE$.stringRead()).text("Base dir containing the training and word2Vec data").action((str, textClassificationUDFParams) -> {
                    return textClassificationUDFParams.copy(str, textClassificationUDFParams.copy$default$2(), textClassificationUDFParams.copy$default$3(), textClassificationUDFParams.copy$default$4(), textClassificationUDFParams.copy$default$5(), textClassificationUDFParams.copy$default$6(), textClassificationUDFParams.copy$default$7(), textClassificationUDFParams.copy$default$8(), textClassificationUDFParams.copy$default$9(), textClassificationUDFParams.copy$default$10());
                });
                opt('p', "partitionNum", Read$.MODULE$.stringRead()).text("you may want to tune the partitionNum if run into spark mode").action((str2, textClassificationUDFParams2) -> {
                    return textClassificationUDFParams2.copy(textClassificationUDFParams2.copy$default$1(), textClassificationUDFParams2.copy$default$2(), textClassificationUDFParams2.copy$default$3(), textClassificationUDFParams2.copy$default$4(), textClassificationUDFParams2.copy$default$5(), textClassificationUDFParams2.copy$default$6(), new StringOps(Predef$.MODULE$.augmentString(str2)).toInt(), textClassificationUDFParams2.copy$default$8(), textClassificationUDFParams2.copy$default$9(), textClassificationUDFParams2.copy$default$10());
                });
                opt('s', "maxSequenceLength", Read$.MODULE$.stringRead()).text("maxSequenceLength").action((str3, textClassificationUDFParams3) -> {
                    return textClassificationUDFParams3.copy(textClassificationUDFParams3.copy$default$1(), new StringOps(Predef$.MODULE$.augmentString(str3)).toInt(), textClassificationUDFParams3.copy$default$3(), textClassificationUDFParams3.copy$default$4(), textClassificationUDFParams3.copy$default$5(), textClassificationUDFParams3.copy$default$6(), textClassificationUDFParams3.copy$default$7(), textClassificationUDFParams3.copy$default$8(), textClassificationUDFParams3.copy$default$9(), textClassificationUDFParams3.copy$default$10());
                });
                opt('w', "maxWordsNum", Read$.MODULE$.stringRead()).text("maxWordsNum").action((str4, textClassificationUDFParams4) -> {
                    return textClassificationUDFParams4.copy(textClassificationUDFParams4.copy$default$1(), textClassificationUDFParams4.copy$default$2(), new StringOps(Predef$.MODULE$.augmentString(str4)).toInt(), textClassificationUDFParams4.copy$default$4(), textClassificationUDFParams4.copy$default$5(), textClassificationUDFParams4.copy$default$6(), textClassificationUDFParams4.copy$default$7(), textClassificationUDFParams4.copy$default$8(), textClassificationUDFParams4.copy$default$9(), textClassificationUDFParams4.copy$default$10());
                });
                opt('l', "trainingSplit", Read$.MODULE$.stringRead()).text("trainingSplit").action((str5, textClassificationUDFParams5) -> {
                    return textClassificationUDFParams5.copy(textClassificationUDFParams5.copy$default$1(), textClassificationUDFParams5.copy$default$2(), textClassificationUDFParams5.copy$default$3(), new StringOps(Predef$.MODULE$.augmentString(str5)).toDouble(), textClassificationUDFParams5.copy$default$5(), textClassificationUDFParams5.copy$default$6(), textClassificationUDFParams5.copy$default$7(), textClassificationUDFParams5.copy$default$8(), textClassificationUDFParams5.copy$default$9(), textClassificationUDFParams5.copy$default$10());
                });
                opt('z', "batchSize", Read$.MODULE$.stringRead()).text("batchSize").action((str6, textClassificationUDFParams6) -> {
                    return textClassificationUDFParams6.copy(textClassificationUDFParams6.copy$default$1(), textClassificationUDFParams6.copy$default$2(), textClassificationUDFParams6.copy$default$3(), textClassificationUDFParams6.copy$default$4(), new StringOps(Predef$.MODULE$.augmentString(str6)).toInt(), textClassificationUDFParams6.copy$default$6(), textClassificationUDFParams6.copy$default$7(), textClassificationUDFParams6.copy$default$8(), textClassificationUDFParams6.copy$default$9(), textClassificationUDFParams6.copy$default$10());
                });
                opt("modelPath", Read$.MODULE$.stringRead()).text("where to load the model").action((str7, textClassificationUDFParams7) -> {
                    return textClassificationUDFParams7.copy(textClassificationUDFParams7.copy$default$1(), textClassificationUDFParams7.copy$default$2(), textClassificationUDFParams7.copy$default$3(), textClassificationUDFParams7.copy$default$4(), textClassificationUDFParams7.copy$default$5(), textClassificationUDFParams7.copy$default$6(), textClassificationUDFParams7.copy$default$7(), new Some(str7), textClassificationUDFParams7.copy$default$9(), textClassificationUDFParams7.copy$default$10());
                });
                opt("checkpoint", Read$.MODULE$.stringRead()).text("where to load the model").action((str8, textClassificationUDFParams8) -> {
                    return textClassificationUDFParams8.copy(textClassificationUDFParams8.copy$default$1(), textClassificationUDFParams8.copy$default$2(), textClassificationUDFParams8.copy$default$3(), textClassificationUDFParams8.copy$default$4(), textClassificationUDFParams8.copy$default$5(), textClassificationUDFParams8.copy$default$6(), textClassificationUDFParams8.copy$default$7(), textClassificationUDFParams8.copy$default$8(), new Some(str8), textClassificationUDFParams8.copy$default$10());
                });
                opt('f', "dataDir", Read$.MODULE$.stringRead()).text("Text dir containing the text data").action((str9, textClassificationUDFParams9) -> {
                    return textClassificationUDFParams9.copy(textClassificationUDFParams9.copy$default$1(), textClassificationUDFParams9.copy$default$2(), textClassificationUDFParams9.copy$default$3(), textClassificationUDFParams9.copy$default$4(), textClassificationUDFParams9.copy$default$5(), textClassificationUDFParams9.copy$default$6(), textClassificationUDFParams9.copy$default$7(), textClassificationUDFParams9.copy$default$8(), textClassificationUDFParams9.copy$default$9(), str9);
                });
            }
        };
    }
}
