package com.johnsnowlabs.nlp.embeddings;

import com.johnsnowlabs.nlp.Annotation;
import com.johnsnowlabs.nlp.AnnotatorModel;
import com.johnsnowlabs.nlp.AnnotatorType$;
import com.johnsnowlabs.nlp.HasSimpleAnnotate;
import com.johnsnowlabs.nlp.annotators.common.TokenPieceEmbeddings;
import com.johnsnowlabs.nlp.annotators.common.WordpieceEmbeddingsSentence;
import com.johnsnowlabs.nlp.annotators.common.WordpieceEmbeddingsSentence$;
import com.johnsnowlabs.nlp.serialization.MapFeature;
import com.johnsnowlabs.storage.Database;
import com.johnsnowlabs.storage.HasStorageRef;
import com.johnsnowlabs.storage.RocksDBConnection;
import org.apache.spark.ml.param.IntParam;
import org.apache.spark.ml.param.Param;
import org.apache.spark.ml.param.ParamPair;
import org.apache.spark.ml.param.ParamValidators$;
import org.apache.spark.ml.util.Identifiable$;
import org.apache.spark.ml.util.MLReader;
import org.apache.spark.sql.Column;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.SparkSession;
import org.apache.spark.sql.expressions.UserDefinedFunction;
import scala.Array$;
import scala.Function3;
import scala.MatchError;
import scala.Option;
import scala.Predef$;
import scala.Some;
import scala.collection.Seq;
import scala.collection.Seq$;
import scala.collection.TraversableLike;
import scala.collection.TraversableOnce;
import scala.collection.immutable.Map;
import scala.collection.immutable.StringOps;
import scala.collection.mutable.ArrayOps;
import scala.math.Ordering$Int$;
import scala.reflect.ClassTag$;
import scala.reflect.ScalaSignature;
import scala.runtime.BoxedUnit;
import scala.runtime.BoxesRunTime;
import scala.runtime.ScalaRunTime$;

/* compiled from: Word2VecModel.scala */
@ScalaSignature(bytes = "\u0006\u0001\u0005\re\u0001\u0002\f\u0018\u0001\u0001B\u0001B\u000e\u0001\u0003\u0006\u0004%\te\u000e\u0005\t\u000b\u0002\u0011\t\u0011)A\u0005q!)a\t\u0001C\u0001\u000f\")a\t\u0001C\u0001\u0013\"9!\n\u0001b\u0001\n\u0003Z\u0005B\u0002,\u0001A\u0003%A\nC\u0004X\u0001\t\u0007I\u0011I\u001c\t\ra\u0003\u0001\u0015!\u00039\u0011\u001dI\u0006A1A\u0005\u0002iCa!\u001b\u0001!\u0002\u0013Y\u0006\"\u00026\u0001\t\u0003Y\u0007\"B8\u0001\t\u0003\u0001\bbB:\u0001\u0005\u0004%\t\u0001\u001e\u0005\u0007\u007f\u0002\u0001\u000b\u0011B;\t\u000f\u0005\u0005\u0001\u0001\"\u0001\u0002\u0004!9\u0011Q\u0002\u0001\u0005B\u0005=\u0001bBA\u0017\u0001\u0011E\u0013qF\u0004\b\u0003+:\u0002\u0012AA,\r\u00191r\u0003#\u0001\u0002Z!1ai\u0005C\u0001\u0003[B\u0011\"a\u001c\u0014\u0003\u0003%I!!\u001d\u0003\u001b]{'\u000f\u001a\u001aWK\u000elu\u000eZ3m\u0015\tA\u0012$\u0001\u0006f[\n,G\rZ5oONT!AG\u000e\u0002\u00079d\u0007O\u0003\u0002\u001d;\u0005a!n\u001c5og:|w\u000f\\1cg*\ta$A\u0002d_6\u001c\u0001a\u0005\u0004\u0001C\u001dR\u0003g\r\t\u0004E\r*S\"A\r\n\u0005\u0011J\"AD!o]>$\u0018\r^8s\u001b>$W\r\u001c\t\u0003M\u0001i\u0011a\u0006\t\u0004E!*\u0013BA\u0015\u001a\u0005EA\u0015m]*j[BdW-\u00118o_R\fG/\u001a\t\u0003W9j\u0011\u0001\f\u0006\u0003[m\tqa\u001d;pe\u0006<W-\u0003\u00020Y\ti\u0001*Y:Ti>\u0014\u0018mZ3SK\u001a\u0004\"AJ\u0019\n\u0005I:\"a\u0006%bg\u0016k'-\u001a3eS:<7\u000f\u0015:pa\u0016\u0014H/[3t!\t\u0011C'\u0003\u000263\tI\u0002+\u0019:b[N\fe\u000e\u001a$fCR,(/Z:Xe&$\u0018M\u00197f\u0003\r)\u0018\u000eZ\u000b\u0002qA\u0011\u0011H\u0011\b\u0003u\u0001\u0003\"a\u000f \u000e\u0003qR!!P\u0010\u0002\rq\u0012xn\u001c;?\u0015\u0005y\u0014!B:dC2\f\u0017BA!?\u0003\u0019\u0001&/\u001a3fM&\u00111\t\u0012\u0002\u0007'R\u0014\u0018N\\4\u000b\u0005\u0005s\u0014\u0001B;jI\u0002\na\u0001P5oSRtDCA\u0013I\u0011\u001514\u00011\u00019)\u0005)\u0013aE5oaV$\u0018I\u001c8pi\u0006$xN\u001d+za\u0016\u001cX#\u0001'\u0011\u00075s\u0005+D\u0001?\u0013\tyeHA\u0003BeJ\f\u0017\u0010\u0005\u0002R%6\t\u0001!\u0003\u0002T)\ni\u0011I\u001c8pi\u0006$xN\u001d+za\u0016L!!V\r\u0003-!\u000b7oT;uaV$\u0018I\u001c8pi\u0006$xN\u001d+za\u0016\fA#\u001b8qkR\feN\\8uCR|'\u000fV=qKN\u0004\u0013aE8viB,H/\u00118o_R\fGo\u001c:UsB,\u0017\u0001F8viB,H/\u00118o_R\fGo\u001c:UsB,\u0007%\u0001\u0006wK\u000e$xN]*ju\u0016,\u0012a\u0017\t\u00039\u001el\u0011!\u0018\u0006\u0003=~\u000bQ\u0001]1sC6T!\u0001Y1\u0002\u00055d'B\u00012d\u0003\u0015\u0019\b/\u0019:l\u0015\t!W-\u0001\u0004ba\u0006\u001c\u0007.\u001a\u0006\u0002M\u0006\u0019qN]4\n\u0005!l&\u0001C%oiB\u000b'/Y7\u0002\u0017Y,7\r^8s'&TX\rI\u0001\u000eO\u0016$h+Z2u_J\u001c\u0016N_3\u0016\u00031\u0004\"!T7\n\u00059t$aA%oi\u0006i1/\u001a;WK\u000e$xN]*ju\u0016$\"!U9\t\u000bId\u0001\u0019\u00017\u0002\u000bY\fG.^3\u0002\u0017]|'\u000f\u001a,fGR|'o]\u000b\u0002kB!a/\u001f\u001d|\u001b\u00059(B\u0001=\u001a\u00035\u0019XM]5bY&T\u0018\r^5p]&\u0011!p\u001e\u0002\u000b\u001b\u0006\u0004h)Z1ukJ,\u0007cA'OyB\u0011Q*`\u0005\u0003}z\u0012QA\u00127pCR\fAb^8sIZ+7\r^8sg\u0002\nab]3u/>\u0014HMV3di>\u00148\u000fF\u0002R\u0003\u000bAaA]\bA\u0002\u0005\u001d\u0001#B\u001d\u0002\naZ\u0018bAA\u0006\t\n\u0019Q*\u00199\u0002\u0011\u0005tgn\u001c;bi\u0016$B!!\u0005\u0002*A1\u00111CA\u000f\u0003GqA!!\u0006\u0002\u001a9\u00191(a\u0006\n\u0003}J1!a\u0007?\u0003\u001d\u0001\u0018mY6bO\u0016LA!a\b\u0002\"\t\u00191+Z9\u000b\u0007\u0005ma\bE\u0002#\u0003KI1!a\n\u001a\u0005)\teN\\8uCRLwN\u001c\u0005\b\u0003W\u0001\u0002\u0019AA\t\u0003-\tgN\\8uCRLwN\\:\u0002\u001b\u00054G/\u001a:B]:|G/\u0019;f)\u0011\t\t$!\u0015\u0011\t\u0005M\u00121\n\b\u0005\u0003k\t9E\u0004\u0003\u00028\u0005\rc\u0002BA\u001d\u0003\u0003rA!a\u000f\u0002@9\u00191(!\u0010\n\u0003\u0019L!\u0001Z3\n\u0005\t\u001c\u0017bAA#C\u0006\u00191/\u001d7\n\t\u0005m\u0011\u0011\n\u0006\u0004\u0003\u000b\n\u0017\u0002BA'\u0003\u001f\u0012\u0011\u0002R1uC\u001a\u0013\u0018-\\3\u000b\t\u0005m\u0011\u0011\n\u0005\b\u0003'\n\u0002\u0019AA\u0019\u0003\u001d!\u0017\r^1tKR\fQbV8sIJ2VmY'pI\u0016d\u0007C\u0001\u0014\u0014'\u001d\u0019\u00121LA1\u0003O\u00022!TA/\u0013\r\tyF\u0010\u0002\u0007\u0003:L(+\u001a4\u0011\u0007\u0019\n\u0019'C\u0002\u0002f]\u0011!DU3bI\u0006\u0014G.\u001a)sKR\u0014\u0018-\u001b8fI^{'\u000f\u001a\u001aWK\u000e\u00042!TA5\u0013\r\tYG\u0010\u0002\r'\u0016\u0014\u0018.\u00197ju\u0006\u0014G.\u001a\u000b\u0003\u0003/\n1B]3bIJ+7o\u001c7wKR\u0011\u00111\u000f\t\u0005\u0003k\ny(\u0004\u0002\u0002x)!\u0011\u0011PA>\u0003\u0011a\u0017M\\4\u000b\u0005\u0005u\u0014\u0001\u00026bm\u0006LA!!!\u0002x\t1qJ\u00196fGR\u0004")
/* loaded from: input_file:com/johnsnowlabs/nlp/embeddings/Word2VecModel.class */
public class Word2VecModel extends AnnotatorModel<Word2VecModel> implements HasSimpleAnnotate<Word2VecModel>, HasStorageRef, HasEmbeddingsProperties {
    private final String uid;
    private final String[] inputAnnotatorTypes;
    private final String outputAnnotatorType;
    private final IntParam vectorSize;
    private final MapFeature<String, float[]> wordVectors;
    private final IntParam dimension;
    private final Param<String> storageRef;

    public static Word2VecModel pretrained(String str, String str2) {
        return Word2VecModel$.MODULE$.mo121pretrained(str, str2);
    }

    public static Word2VecModel pretrained(String str) {
        return Word2VecModel$.MODULE$.mo122pretrained(str);
    }

    public static Word2VecModel pretrained() {
        return Word2VecModel$.MODULE$.mo123pretrained();
    }

    public static Word2VecModel pretrained(String str, String str2, String str3) {
        return Word2VecModel$.MODULE$.mo120pretrained(str, str2, str3);
    }

    public static Some<String> defaultModelName() {
        return Word2VecModel$.MODULE$.mo124defaultModelName();
    }

    public static String defaultLoc() {
        return Word2VecModel$.MODULE$.defaultLoc();
    }

    public static String defaultLang() {
        return Word2VecModel$.MODULE$.defaultLang();
    }

    public static MLReader<Word2VecModel> read() {
        return Word2VecModel$.MODULE$.read();
    }

    public static void addReader(Function3<Word2VecModel, String, SparkSession, BoxedUnit> function3) {
        Word2VecModel$.MODULE$.addReader(function3);
    }

    public static Object load(String str) {
        return Word2VecModel$.MODULE$.load(str);
    }

    @Override // com.johnsnowlabs.nlp.embeddings.HasEmbeddingsProperties
    public HasEmbeddingsProperties setDimension(int i) {
        HasEmbeddingsProperties dimension;
        dimension = setDimension(i);
        return dimension;
    }

    @Override // com.johnsnowlabs.nlp.embeddings.HasEmbeddingsProperties
    public int getDimension() {
        int dimension;
        dimension = getDimension();
        return dimension;
    }

    @Override // com.johnsnowlabs.nlp.embeddings.HasEmbeddingsProperties
    public Column wrapEmbeddingsMetadata(Column column, int i, Option<String> option) {
        Column wrapEmbeddingsMetadata;
        wrapEmbeddingsMetadata = wrapEmbeddingsMetadata(column, i, option);
        return wrapEmbeddingsMetadata;
    }

    @Override // com.johnsnowlabs.nlp.embeddings.HasEmbeddingsProperties
    public Option<String> wrapEmbeddingsMetadata$default$3() {
        Option<String> wrapEmbeddingsMetadata$default$3;
        wrapEmbeddingsMetadata$default$3 = wrapEmbeddingsMetadata$default$3();
        return wrapEmbeddingsMetadata$default$3;
    }

    @Override // com.johnsnowlabs.nlp.embeddings.HasEmbeddingsProperties
    public Column wrapSentenceEmbeddingsMetadata(Column column, int i, Option<String> option) {
        Column wrapSentenceEmbeddingsMetadata;
        wrapSentenceEmbeddingsMetadata = wrapSentenceEmbeddingsMetadata(column, i, option);
        return wrapSentenceEmbeddingsMetadata;
    }

    @Override // com.johnsnowlabs.nlp.embeddings.HasEmbeddingsProperties
    public Option<String> wrapSentenceEmbeddingsMetadata$default$3() {
        Option<String> wrapSentenceEmbeddingsMetadata$default$3;
        wrapSentenceEmbeddingsMetadata$default$3 = wrapSentenceEmbeddingsMetadata$default$3();
        return wrapSentenceEmbeddingsMetadata$default$3;
    }

    @Override // com.johnsnowlabs.storage.HasStorageRef
    public RocksDBConnection createDatabaseConnection(Database database) {
        RocksDBConnection createDatabaseConnection;
        createDatabaseConnection = createDatabaseConnection(database);
        return createDatabaseConnection;
    }

    @Override // com.johnsnowlabs.storage.HasStorageRef
    public HasStorageRef setStorageRef(String str) {
        HasStorageRef storageRef;
        storageRef = setStorageRef(str);
        return storageRef;
    }

    @Override // com.johnsnowlabs.storage.HasStorageRef
    public String getStorageRef() {
        String storageRef;
        storageRef = getStorageRef();
        return storageRef;
    }

    @Override // com.johnsnowlabs.storage.HasStorageRef
    public void validateStorageRef(Dataset<?> dataset, String[] strArr, String str) {
        validateStorageRef(dataset, strArr, str);
    }

    @Override // com.johnsnowlabs.nlp.HasSimpleAnnotate
    public UserDefinedFunction dfAnnotate() {
        UserDefinedFunction dfAnnotate;
        dfAnnotate = dfAnnotate();
        return dfAnnotate;
    }

    @Override // com.johnsnowlabs.nlp.embeddings.HasEmbeddingsProperties
    public IntParam dimension() {
        return this.dimension;
    }

    @Override // com.johnsnowlabs.nlp.embeddings.HasEmbeddingsProperties
    public void com$johnsnowlabs$nlp$embeddings$HasEmbeddingsProperties$_setter_$dimension_$eq(IntParam intParam) {
        this.dimension = intParam;
    }

    @Override // com.johnsnowlabs.storage.HasStorageRef
    public Param<String> storageRef() {
        return this.storageRef;
    }

    @Override // com.johnsnowlabs.storage.HasStorageRef
    public void com$johnsnowlabs$storage$HasStorageRef$_setter_$storageRef_$eq(Param<String> param) {
        this.storageRef = param;
    }

    public String uid() {
        return this.uid;
    }

    @Override // com.johnsnowlabs.nlp.HasInputAnnotationCols
    public String[] inputAnnotatorTypes() {
        return this.inputAnnotatorTypes;
    }

    @Override // com.johnsnowlabs.nlp.HasOutputAnnotatorType
    public String outputAnnotatorType() {
        return this.outputAnnotatorType;
    }

    public IntParam vectorSize() {
        return this.vectorSize;
    }

    public int getVectorSize() {
        return BoxesRunTime.unboxToInt($(vectorSize()));
    }

    public Word2VecModel setVectorSize(int i) {
        if (get((Param) vectorSize()).isEmpty()) {
            set((Param) vectorSize(), (Object) BoxesRunTime.boxToInteger(i));
        } else {
            BoxedUnit boxedUnit = BoxedUnit.UNIT;
        }
        return this;
    }

    public MapFeature<String, float[]> wordVectors() {
        return this.wordVectors;
    }

    public Word2VecModel setWordVectors(Map<String, float[]> map) {
        return (Word2VecModel) set(wordVectors(), map);
    }

    @Override // com.johnsnowlabs.nlp.HasSimpleAnnotate
    public Seq<Annotation> annotate(Seq<Annotation> seq) {
        Seq seq2 = (Seq) ((TraversableLike) seq.filter(annotation -> {
            return BoxesRunTime.boxToBoolean($anonfun$annotate$1(annotation));
        })).groupBy(annotation2 -> {
            return BoxesRunTime.boxToInteger($anonfun$annotate$2(annotation2));
        }).toSeq().sortBy(tuple2 -> {
            return BoxesRunTime.boxToInteger(tuple2._1$mcI$sp());
        }, Ordering$Int$.MODULE$);
        if (!seq2.nonEmpty()) {
            return Seq$.MODULE$.empty();
        }
        return WordpieceEmbeddingsSentence$.MODULE$.pack((Seq) seq2.map(tuple22 -> {
            if (tuple22 == null) {
                throw new MatchError(tuple22);
            }
            int _1$mcI$sp = tuple22._1$mcI$sp();
            Seq seq3 = (Seq) tuple22._2();
            float[] fArr = (float[]) Array$.MODULE$.fill(BoxesRunTime.unboxToInt(this.$(this.vectorSize())), () -> {
                return 0.0f;
            }, ClassTag$.MODULE$.Float());
            return new WordpieceEmbeddingsSentence((TokenPieceEmbeddings[]) ((TraversableOnce) seq3.map(annotation3 -> {
                float[] fArr2 = (float[]) this.$$(this.wordVectors()).getOrElse(annotation3.result(), () -> {
                    return fArr;
                });
                return new TokenPieceEmbeddings(annotation3.result(), annotation3.result(), -1, true, new ArrayOps.ofFloat(Predef$.MODULE$.floatArrayOps(fArr2)).sameElements(Predef$.MODULE$.wrapFloatArray(fArr)), fArr2, annotation3.begin(), annotation3.end());
            }, Seq$.MODULE$.canBuildFrom())).toArray(ClassTag$.MODULE$.apply(TokenPieceEmbeddings.class)), _1$mcI$sp);
        }, Seq$.MODULE$.canBuildFrom()));
    }

    @Override // com.johnsnowlabs.nlp.AnnotatorModel
    public Dataset<Row> afterAnnotate(Dataset<Row> dataset) {
        return dataset.withColumn(getOutputCol(), wrapEmbeddingsMetadata(dataset.col(getOutputCol()), BoxesRunTime.unboxToInt($(vectorSize())), new Some($(storageRef()))));
    }

    public static final /* synthetic */ boolean $anonfun$annotate$1(Annotation annotation) {
        String annotatorType = annotation.annotatorType();
        String TOKEN = AnnotatorType$.MODULE$.TOKEN();
        return annotatorType != null ? annotatorType.equals(TOKEN) : TOKEN == null;
    }

    public static final /* synthetic */ int $anonfun$annotate$2(Annotation annotation) {
        return new StringOps(Predef$.MODULE$.augmentString((String) annotation.metadata().apply("sentence"))).toInt();
    }

    public Word2VecModel(String str) {
        this.uid = str;
        HasSimpleAnnotate.$init$(this);
        HasStorageRef.$init$((HasStorageRef) this);
        com$johnsnowlabs$nlp$embeddings$HasEmbeddingsProperties$_setter_$dimension_$eq(new IntParam(this, "dimension", "Number of embedding dimensions"));
        this.inputAnnotatorTypes = new String[]{AnnotatorType$.MODULE$.TOKEN()};
        this.outputAnnotatorType = AnnotatorType$.MODULE$.WORD_EMBEDDINGS();
        this.vectorSize = new IntParam(this, "vectorSize", "the dimension of codes after transforming from words (> 0)", ParamValidators$.MODULE$.gt(0.0d));
        this.wordVectors = new MapFeature<>(this, "wordVectors", ClassTag$.MODULE$.apply(String.class), ClassTag$.MODULE$.apply(ScalaRunTime$.MODULE$.arrayClass(Float.TYPE)));
        setDefault(Predef$.MODULE$.wrapRefArray(new ParamPair[]{inputCols().$minus$greater(new String[]{AnnotatorType$.MODULE$.TOKEN()}), outputCol().$minus$greater("word2vec"), vectorSize().$minus$greater(BoxesRunTime.boxToInteger(100))}));
    }

    public Word2VecModel() {
        this(Identifiable$.MODULE$.randomUID("Word2VecModel"));
    }
}
