package com.johnsnowlabs.nlp.annotators;

import com.johnsnowlabs.nlp.AnnotatorApproach;
import com.johnsnowlabs.nlp.AnnotatorType$;
import com.johnsnowlabs.nlp.HasFeatures;
import com.johnsnowlabs.nlp.ParamsAndFeaturesWritable;
import com.johnsnowlabs.nlp.serialization.ArrayFeature;
import com.johnsnowlabs.nlp.serialization.Feature;
import com.johnsnowlabs.nlp.serialization.MapFeature;
import com.johnsnowlabs.nlp.serialization.SetFeature;
import com.johnsnowlabs.nlp.serialization.StructFeature;
import org.apache.spark.ml.PipelineModel;
import org.apache.spark.ml.param.Param;
import org.apache.spark.ml.param.StringArrayParam;
import org.apache.spark.ml.util.DefaultParamsWritable;
import org.apache.spark.ml.util.Identifiable$;
import org.apache.spark.ml.util.MLWriter;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.SparkSession;
import scala.Function0;
import scala.Option;
import scala.Predef$;
import scala.collection.immutable.Map;
import scala.collection.immutable.Set;
import scala.collection.mutable.ArrayBuffer;
import scala.collection.mutable.ArrayBuffer$;
import scala.math.Ordering$Int$;
import scala.reflect.ScalaSignature;

/* compiled from: RecursiveTokenizer.scala */
@ScalaSignature(bytes = "\u0006\u0001\u00055b\u0001B\u0001\u0003\u0001-\u0011!CU3dkJ\u001c\u0018N^3U_.,g.\u001b>fe*\u00111\u0001B\u0001\u000bC:tw\u000e^1u_J\u001c(BA\u0003\u0007\u0003\rqG\u000e\u001d\u0006\u0003\u000f!\tAB[8i]Ntwn\u001e7bENT\u0011!C\u0001\u0004G>l7\u0001A\n\u0004\u00011!\u0002cA\u0007\u000f!5\tA!\u0003\u0002\u0010\t\t\t\u0012I\u001c8pi\u0006$xN]!qaJ|\u0017m\u00195\u0011\u0005E\u0011R\"\u0001\u0002\n\u0005M\u0011!a\u0006*fGV\u00148/\u001b<f)>\\WM\\5{KJlu\u000eZ3m!\tiQ#\u0003\u0002\u0017\t\tI\u0002+\u0019:b[N\fe\u000e\u001a$fCR,(/Z:Xe&$\u0018M\u00197f\u0011!A\u0002A!b\u0001\n\u0003J\u0012aA;jIV\t!\u0004\u0005\u0002\u001cC9\u0011AdH\u0007\u0002;)\ta$A\u0003tG\u0006d\u0017-\u0003\u0002!;\u00051\u0001K]3eK\u001aL!AI\u0012\u0003\rM#(/\u001b8h\u0015\t\u0001S\u0004\u0003\u0005&\u0001\t\u0005\t\u0015!\u0003\u001b\u0003\u0011)\u0018\u000e\u001a\u0011\t\u000b\u001d\u0002A\u0011\u0001\u0015\u0002\rqJg.\u001b;?)\tI#\u0006\u0005\u0002\u0012\u0001!)\u0001D\na\u00015!)q\u0005\u0001C\u0001YQ\t\u0011\u0006C\u0004/\u0001\t\u0007I\u0011A\u0018\u0002\u0011A\u0014XMZ5yKN,\u0012\u0001\r\t\u0003cqj\u0011A\r\u0006\u0003gQ\nQ\u0001]1sC6T!!\u000e\u001c\u0002\u00055d'BA\u001c9\u0003\u0015\u0019\b/\u0019:l\u0015\tI$(\u0001\u0004ba\u0006\u001c\u0007.\u001a\u0006\u0002w\u0005\u0019qN]4\n\u0005u\u0012$\u0001E*ue&tw-\u0011:sCf\u0004\u0016M]1n\u0011\u0019y\u0004\u0001)A\u0005a\u0005I\u0001O]3gSb,7\u000f\t\u0005\u0006\u0003\u0002!\tAQ\u0001\fg\u0016$\bK]3gSb,7\u000f\u0006\u0002D\t6\t\u0001\u0001C\u0003F\u0001\u0002\u0007a)A\u0001q!\rarIG\u0005\u0003\u0011v\u0011Q!\u0011:sCfDqA\u0013\u0001C\u0002\u0013\u0005q&\u0001\u0005tk\u001a4\u0017\u000e_3t\u0011\u0019a\u0005\u0001)A\u0005a\u0005I1/\u001e4gSb,7\u000f\t\u0005\u0006\u001d\u0002!\taT\u0001\fg\u0016$8+\u001e4gSb,7\u000f\u0006\u0002D!\")\u0011+\u0014a\u0001\r\u0006\t1\u000fC\u0004T\u0001\t\u0007I\u0011A\u0018\u0002\u000f%tg-\u001b=fg\"1Q\u000b\u0001Q\u0001\nA\n\u0001\"\u001b8gSb,7\u000f\t\u0005\u0006/\u0002!\t\u0001W\u0001\u000bg\u0016$\u0018J\u001c4jq\u0016\u001cHCA\"Z\u0011\u0015)e\u000b1\u0001G\u0011\u001dY\u0006A1A\u0005\u0002=\n\u0011b\u001e5ji\u0016d\u0017n\u001d;\t\ru\u0003\u0001\u0015!\u00031\u0003)9\b.\u001b;fY&\u001cH\u000f\t\u0005\u0006?\u0002!\t\u0001Y\u0001\rg\u0016$x\u000b[5uK2L7\u000f\u001e\u000b\u0003\u0007\u0006DQA\u00190A\u0002\u0019\u000b\u0011a\u001e\u0005\bI\u0002\u0011\r\u0011\"\u0011f\u0003MyW\u000f\u001e9vi\u0006sgn\u001c;bi>\u0014H+\u001f9f+\u00051\u0007CA\"h\u0013\tA\u0017NA\u0007B]:|G/\u0019;peRK\b/Z\u0005\u0003U\u0012\u0011a\u0003S1t\u001fV$\b/\u001e;B]:|G/\u0019;peRK\b/\u001a\u0005\u0007Y\u0002\u0001\u000b\u0011\u00024\u0002)=,H\u000f];u\u0003:tw\u000e^1u_J$\u0016\u0010]3!\u0011\u001dq\u0007A1A\u0005B=\f1#\u001b8qkR\feN\\8uCR|'\u000fV=qKN,\u0012A\u0012\u0005\u0007c\u0002\u0001\u000b\u0011\u0002$\u0002)%t\u0007/\u001e;B]:|G/\u0019;peRK\b/Z:!\u0011\u001d\u0019\bA1A\u0005Be\t1\u0002Z3tGJL\u0007\u000f^5p]\"1Q\u000f\u0001Q\u0001\ni\tA\u0002Z3tGJL\u0007\u000f^5p]\u0002BQa\u001e\u0001\u0005Ba\fQ\u0001\u001e:bS:$B\u0001E=\u0002\u001c!)!P\u001ea\u0001w\u00069A-\u0019;bg\u0016$\bg\u0001?\u0002\nA)Q0!\u0001\u0002\u00065\taP\u0003\u0002��m\u0005\u00191/\u001d7\n\u0007\u0005\raPA\u0004ECR\f7/\u001a;\u0011\t\u0005\u001d\u0011\u0011\u0002\u0007\u0001\t-\tY!_A\u0001\u0002\u0003\u0015\t!!\u0004\u0003\u0007}#\u0013'\u0005\u0003\u0002\u0010\u0005U\u0001c\u0001\u000f\u0002\u0012%\u0019\u00111C\u000f\u0003\u000f9{G\u000f[5oOB\u0019A$a\u0006\n\u0007\u0005eQDA\u0002B]fD\u0011\"!\bw!\u0003\u0005\r!a\b\u0002#I,7-\u001e:tSZ,\u0007+\u001b9fY&tW\rE\u0003\u001d\u0003C\t)#C\u0002\u0002$u\u0011aa\u00149uS>t\u0007\u0003BA\u0014\u0003Si\u0011\u0001N\u0005\u0004\u0003W!$!\u0004)ja\u0016d\u0017N\\3N_\u0012,G\u000e")
/* loaded from: input_file:com/johnsnowlabs/nlp/annotators/RecursiveTokenizer.class */
public class RecursiveTokenizer extends AnnotatorApproach<RecursiveTokenizerModel> implements ParamsAndFeaturesWritable {
    private final String uid;
    private final StringArrayParam prefixes;
    private final StringArrayParam suffixes;
    private final StringArrayParam infixes;
    private final StringArrayParam whitelist;
    private final String outputAnnotatorType;
    private final String[] inputAnnotatorTypes;
    private final String description;
    private final ArrayBuffer<Feature<?, ?, ?>> features;

    @Override // com.johnsnowlabs.nlp.ParamsAndFeaturesWritable
    public /* synthetic */ MLWriter com$johnsnowlabs$nlp$ParamsAndFeaturesWritable$$super$write() {
        return DefaultParamsWritable.class.write(this);
    }

    @Override // com.johnsnowlabs.nlp.ParamsAndFeaturesWritable
    public void onWrite(String str, SparkSession sparkSession) {
        ParamsAndFeaturesWritable.Cclass.onWrite(this, str, sparkSession);
    }

    @Override // com.johnsnowlabs.nlp.AnnotatorApproach, com.johnsnowlabs.nlp.ParamsAndFeaturesWritable
    public MLWriter write() {
        return ParamsAndFeaturesWritable.Cclass.write(this);
    }

    @Override // com.johnsnowlabs.nlp.HasFeatures
    public ArrayBuffer<Feature<?, ?, ?>> features() {
        return this.features;
    }

    @Override // com.johnsnowlabs.nlp.HasFeatures
    public void com$johnsnowlabs$nlp$HasFeatures$_setter_$features_$eq(ArrayBuffer arrayBuffer) {
        this.features = arrayBuffer;
    }

    @Override // com.johnsnowlabs.nlp.HasFeatures
    public <T> HasFeatures set(ArrayFeature<T> arrayFeature, Object obj) {
        return HasFeatures.Cclass.set(this, arrayFeature, obj);
    }

    @Override // com.johnsnowlabs.nlp.HasFeatures
    public <T> HasFeatures set(SetFeature<T> setFeature, Set<T> set) {
        return HasFeatures.Cclass.set(this, setFeature, set);
    }

    @Override // com.johnsnowlabs.nlp.HasFeatures
    public <K, V> HasFeatures set(MapFeature<K, V> mapFeature, Map<K, V> map) {
        return HasFeatures.Cclass.set(this, mapFeature, map);
    }

    @Override // com.johnsnowlabs.nlp.HasFeatures
    public <T> HasFeatures set(StructFeature<T> structFeature, T t) {
        return HasFeatures.Cclass.set(this, structFeature, t);
    }

    @Override // com.johnsnowlabs.nlp.HasFeatures
    public <T> HasFeatures setDefault(ArrayFeature<T> arrayFeature, Function0<Object> function0) {
        return HasFeatures.Cclass.setDefault(this, arrayFeature, function0);
    }

    @Override // com.johnsnowlabs.nlp.HasFeatures
    public <T> HasFeatures setDefault(SetFeature<T> setFeature, Function0<Set<T>> function0) {
        return HasFeatures.Cclass.setDefault(this, setFeature, function0);
    }

    @Override // com.johnsnowlabs.nlp.HasFeatures
    public <K, V> HasFeatures setDefault(MapFeature<K, V> mapFeature, Function0<Map<K, V>> function0) {
        return HasFeatures.Cclass.setDefault(this, mapFeature, function0);
    }

    @Override // com.johnsnowlabs.nlp.HasFeatures
    public <T> HasFeatures setDefault(StructFeature<T> structFeature, Function0<T> function0) {
        return HasFeatures.Cclass.setDefault(this, structFeature, function0);
    }

    @Override // com.johnsnowlabs.nlp.HasFeatures
    public <T> Option<Object> get(ArrayFeature<T> arrayFeature) {
        return HasFeatures.Cclass.get(this, arrayFeature);
    }

    @Override // com.johnsnowlabs.nlp.HasFeatures
    public <T> Option<Set<T>> get(SetFeature<T> setFeature) {
        return HasFeatures.Cclass.get(this, setFeature);
    }

    @Override // com.johnsnowlabs.nlp.HasFeatures
    public <K, V> Option<Map<K, V>> get(MapFeature<K, V> mapFeature) {
        return HasFeatures.Cclass.get(this, mapFeature);
    }

    @Override // com.johnsnowlabs.nlp.HasFeatures
    public <T> Option<T> get(StructFeature<T> structFeature) {
        return HasFeatures.Cclass.get(this, structFeature);
    }

    @Override // com.johnsnowlabs.nlp.HasFeatures
    public <T> Object $$(ArrayFeature<T> arrayFeature) {
        Object orDefault;
        orDefault = arrayFeature.getOrDefault();
        return orDefault;
    }

    @Override // com.johnsnowlabs.nlp.HasFeatures
    public <T> Set<T> $$(SetFeature<T> setFeature) {
        Set<T> orDefault;
        orDefault = setFeature.getOrDefault();
        return orDefault;
    }

    @Override // com.johnsnowlabs.nlp.HasFeatures
    public <K, V> Map<K, V> $$(MapFeature<K, V> mapFeature) {
        Map<K, V> orDefault;
        orDefault = mapFeature.getOrDefault();
        return orDefault;
    }

    @Override // com.johnsnowlabs.nlp.HasFeatures
    public <T> T $$(StructFeature<T> structFeature) {
        Object orDefault;
        orDefault = structFeature.getOrDefault();
        return (T) orDefault;
    }

    public String uid() {
        return this.uid;
    }

    public StringArrayParam prefixes() {
        return this.prefixes;
    }

    public RecursiveTokenizer setPrefixes(String[] strArr) {
        return (RecursiveTokenizer) set((Param) prefixes(), Predef$.MODULE$.refArrayOps((Object[]) Predef$.MODULE$.refArrayOps(strArr).sortBy(new RecursiveTokenizer$$anonfun$setPrefixes$1(this), Ordering$Int$.MODULE$)).reverse());
    }

    public StringArrayParam suffixes() {
        return this.suffixes;
    }

    public RecursiveTokenizer setSuffixes(String[] strArr) {
        return (RecursiveTokenizer) set((Param) suffixes(), Predef$.MODULE$.refArrayOps((Object[]) Predef$.MODULE$.refArrayOps(strArr).sortBy(new RecursiveTokenizer$$anonfun$setSuffixes$1(this), Ordering$Int$.MODULE$)).reverse());
    }

    public StringArrayParam infixes() {
        return this.infixes;
    }

    public RecursiveTokenizer setInfixes(String[] strArr) {
        return (RecursiveTokenizer) set((Param) infixes(), Predef$.MODULE$.refArrayOps((Object[]) Predef$.MODULE$.refArrayOps(strArr).sortBy(new RecursiveTokenizer$$anonfun$setInfixes$1(this), Ordering$Int$.MODULE$)).reverse());
    }

    public StringArrayParam whitelist() {
        return this.whitelist;
    }

    public RecursiveTokenizer setWhitelist(String[] strArr) {
        return (RecursiveTokenizer) set((Param) whitelist(), (Object) strArr);
    }

    @Override // com.johnsnowlabs.nlp.HasOutputAnnotatorType
    public String outputAnnotatorType() {
        return this.outputAnnotatorType;
    }

    @Override // com.johnsnowlabs.nlp.HasInputAnnotationCols
    public String[] inputAnnotatorTypes() {
        return this.inputAnnotatorTypes;
    }

    @Override // com.johnsnowlabs.nlp.AnnotatorApproach
    public String description() {
        return this.description;
    }

    /* JADX WARN: Can't rename method to resolve collision */
    @Override // com.johnsnowlabs.nlp.AnnotatorApproach
    public RecursiveTokenizerModel train(Dataset<?> dataset, Option<PipelineModel> option) {
        return new RecursiveTokenizerModel().setPrefixes((String[]) getOrDefault(prefixes())).setSuffixes((String[]) getOrDefault(suffixes())).setInfixes((String[]) getOrDefault(infixes())).setWhitelist(Predef$.MODULE$.refArrayOps((Object[]) getOrDefault(whitelist())).toSet());
    }

    @Override // com.johnsnowlabs.nlp.AnnotatorApproach
    public /* bridge */ /* synthetic */ RecursiveTokenizerModel train(Dataset dataset, Option option) {
        return train((Dataset<?>) dataset, (Option<PipelineModel>) option);
    }

    public RecursiveTokenizer(String str) {
        this.uid = str;
        com$johnsnowlabs$nlp$HasFeatures$_setter_$features_$eq((ArrayBuffer) ArrayBuffer$.MODULE$.empty());
        ParamsAndFeaturesWritable.Cclass.$init$(this);
        this.prefixes = new StringArrayParam(this, "prefixes", "Strings that will be split when found at the beginning of token.");
        this.suffixes = new StringArrayParam(this, "suffixes", "Strings that will be split when found at the end of token.");
        this.infixes = new StringArrayParam(this, "infixes", "Strings that will be split when found at the middle of token.");
        this.whitelist = new StringArrayParam(this, "whitelist", "Whitelist.");
        setDefault((Param) infixes(), (Object) new String[]{"\n", "(", ")"});
        setDefault((Param) prefixes(), (Object) new String[]{"'", "\"", "(", "[", "\n"});
        setDefault((Param) suffixes(), (Object) new String[]{".", ":", "%", ",", ";", "?", "'", "\"", ")", "]", "\n", "!", "'s"});
        setDefault((Param) whitelist(), (Object) new String[]{"it's", "that's", "there's", "he's", "she's", "what's", "let's", "who's", "It's", "That's", "There's", "He's", "She's", "What's", "Let's", "Who's"});
        this.outputAnnotatorType = AnnotatorType$.MODULE$.TOKEN();
        this.inputAnnotatorTypes = new String[]{AnnotatorType$.MODULE$.DOCUMENT()};
        this.description = "Simplest possible tokenizer";
    }

    public RecursiveTokenizer() {
        this(Identifiable$.MODULE$.randomUID("SILLY_TOKENIZER"));
    }
}
