/*
 * Decompiled with CFR 0.152.
 */
package cc.factorie.app.topics.lda;

import cc.factorie.app.strings.Stopwords$;
import cc.factorie.app.strings.alphaSegmenter$;
import cc.factorie.app.topics.lda.Doc;
import cc.factorie.app.topics.lda.LDA;
import cc.factorie.app.topics.lda.LDA$;
import cc.factorie.app.topics.lda.Tag;
import cc.factorie.app.topics.lda.TaggedDocument;
import cc.factorie.directed.DirectedModel$;
import cc.factorie.directed.Dirichlet$;
import cc.factorie.directed.ItemizedDirectedModel;
import cc.factorie.directed.package$;
import cc.factorie.variable.CategoricalDomain;
import cc.factorie.variable.CategoricalSeqDomain;
import cc.factorie.variable.MassesVariable;
import cc.factorie.variable.MassesVariable$;
import java.io.File;
import scala.Console$;
import scala.Function1;
import scala.Predef$;
import scala.Serializable;
import scala.collection.Iterable$;
import scala.collection.Seq;
import scala.collection.Seq$;
import scala.collection.TraversableOnce;
import scala.collection.immutable.List;
import scala.collection.immutable.List$;
import scala.collection.mutable.ArrayBuffer;
import scala.collection.mutable.StringBuilder;
import scala.io.Codec$;
import scala.io.Source$;
import scala.math.Numeric;
import scala.runtime.BoxedUnit;
import scala.runtime.BoxesRunTime;
import scala.util.Random;

public final class TaggedLDA$ {
    public static final TaggedLDA$ MODULE$;
    private final ArrayBuffer<Tag> tags;
    private final int numTopics;
    private final double alpha1;
    private final int numIterations;
    private final CategoricalSeqDomain<String> WordSeqDomain;
    private final CategoricalDomain<String> WordDomain;
    private final alphaSegmenter$ tokenizer;
    private final ItemizedDirectedModel model;
    private final Random random;
    private final LDA lda;

    static {
        new TaggedLDA$();
    }

    public ArrayBuffer<Tag> tags() {
        return this.tags;
    }

    public int numTopics() {
        return this.numTopics;
    }

    public double alpha1() {
        return this.alpha1;
    }

    public int numIterations() {
        return this.numIterations;
    }

    public CategoricalSeqDomain<String> WordSeqDomain() {
        return this.WordSeqDomain;
    }

    public CategoricalDomain<String> WordDomain() {
        return this.WordDomain;
    }

    public alphaSegmenter$ tokenizer() {
        return this.tokenizer;
    }

    public ItemizedDirectedModel model() {
        return this.model;
    }

    public Random random() {
        return this.random;
    }

    public LDA lda() {
        return this.lda;
    }

    public void main(String[] args) {
        List directories = args.length > 0 ? Predef$.MODULE$.refArrayOps((Object[])args).toList() : (List)List$.MODULE$.apply((Seq)Predef$.MODULE$.wrapRefArray((Object[])new String[]{"11", "12", "10", "09", "08"})).take(4).map((Function1)new Serializable(){

            public final String apply(String x$2) {
                return new StringBuilder().append((Object)"/Users/mccallum/research/data/text/nipstxt/nips").append((Object)x$2).toString();
            }
        }, List$.MODULE$.canBuildFrom());
        directories.foreach((Function1)new Serializable(){

            public final void apply(String directory) {
                Predef$.MODULE$.println((Object)new StringBuilder().append((Object)"Reading files from directory ").append((Object)directory).toString());
                Predef$.MODULE$.refArrayOps((Object[])new File(directory).listFiles()).withFilter((Function1)new Serializable(this){

                    public final boolean apply(File file) {
                        return file.isFile();
                    }
                }).foreach((Function1)new Serializable(this){

                    public final void apply(File file) {
                        Predef$.MODULE$.print((Object)".");
                        Console$.MODULE$.flush();
                        String text = Source$.MODULE$.fromFile(file, Codec$.MODULE$.fallbackSystemCodec()).mkString();
                        TaggedDocument doc = new TaggedDocument(TaggedLDA$.MODULE$.WordSeqDomain(), file.toString(), (Seq<String>)TaggedLDA$.MODULE$.tokenizer().apply(text).map((Function1)new Serializable(this){

                            public final String apply(String x$5) {
                                return x$5.toLowerCase();
                            }
                        }).filter((Function1)new Serializable(this){

                            public final boolean apply(String x$6) {
                                return !Stopwords$.MODULE$.contains(x$6);
                            }
                        }).toIndexedSeq());
                        TaggedLDA$.MODULE$.lda().addDocument(doc, TaggedLDA$.MODULE$.random());
                        TaggedLDA$.MODULE$.tags().foreach((Function1)new Serializable(this, text, doc){
                            private final String text$2;
                            private final TaggedDocument doc$1;

                            public final Object apply(Tag tag2) {
                                Object object;
                                if (tag2.matches(this.text$2)) {
                                    this.doc$1.tags().$plus$eq((Object)tag2);
                                    MassesVariable masses = MassesVariable$.MODULE$.dense(TaggedLDA$.MODULE$.numTopics(), TaggedLDA$.MODULE$.alpha1());
                                    TaggedLDA$.MODULE$.lda().model().$minus$eq(TaggedLDA$.MODULE$.lda().model().parentFactor(this.doc$1.theta()));
                                    object = package$.MODULE$.generatedVarExtras(this.doc$1).$tilde(Dirichlet$.MODULE$.apply(masses), TaggedLDA$.MODULE$.model());
                                } else {
                                    object = BoxedUnit.UNIT;
                                }
                                return object;
                            }
                            {
                                this.text$2 = text$2;
                                this.doc$1 = doc$1;
                            }
                        });
                    }
                });
                Predef$.MODULE$.println();
            }
        });
        Predef$.MODULE$.println((Object)new StringBuilder().append((Object)"Read ").append((Object)BoxesRunTime.boxToInteger((int)this.lda().documents().size())).append((Object)" documents, ").append((Object)BoxesRunTime.boxToInteger((int)this.WordDomain().size())).append((Object)" word types, ").append(((TraversableOnce)this.lda().documents().map((Function1)new Serializable(){

            public final int apply(Doc x$7) {
                return x$7.ws().length();
            }
        }, Iterable$.MODULE$.canBuildFrom())).sum((Numeric)Numeric.IntIsIntegral$.MODULE$)).append((Object)" word tokens.").toString());
        long startTime = System.currentTimeMillis();
        this.lda().inferTopics(this.numIterations(), 10, this.lda().inferTopics$default$3(), this.lda().inferTopics$default$4());
        Predef$.MODULE$.println((Object)new StringBuilder().append((Object)"Finished in ").append((Object)BoxesRunTime.boxToDouble((double)((double)(System.currentTimeMillis() - startTime) / 1000.0))).append((Object)" seconds").toString());
    }

    private TaggedLDA$() {
        MODULE$ = this;
        this.tags = new ArrayBuffer();
        this.tags().$plus$eq((Object)new Tag("machine learning"));
        this.tags().$plus$eq((Object)new Tag("natural language processing", (Seq<String>)((Seq)Seq$.MODULE$.apply((Seq)Predef$.MODULE$.wrapRefArray((Object[])new String[]{"part-of-speech"})))));
        this.tags().$plus$eq((Object)new Tag("speech recognition"));
        this.tags().$plus$eq((Object)new Tag("neural networks", (Seq<String>)((Seq)Seq$.MODULE$.apply((Seq)Predef$.MODULE$.wrapRefArray((Object[])new String[]{"neural network"})))));
        this.numTopics = this.tags().length() + 10;
        this.alpha1 = 0.01;
        this.numIterations = 50;
        this.WordSeqDomain = new CategoricalSeqDomain();
        this.WordDomain = this.WordSeqDomain().elementDomain();
        this.tokenizer = alphaSegmenter$.MODULE$;
        this.model = DirectedModel$.MODULE$.apply();
        this.random = new Random(0);
        this.lda = new LDA(this.WordSeqDomain(), this.numTopics(), this.alpha1(), LDA$.MODULE$.$lessinit$greater$default$4(), LDA$.MODULE$.$lessinit$greater$default$5(), this.model(), this.random());
    }
}

