package org.nuxeo.ecm.platform.categorization.categorizer.tfidf;

import java.io.Serializable;
import java.util.List;

/* loaded from: input_file:org/nuxeo/ecm/platform/categorization/categorizer/tfidf/HashingVectorizer.class */
public class HashingVectorizer implements Serializable {
    private static final long serialVersionUID = 1;
    protected int dim = 524288;
    protected int probes = 2;
    protected int window = 0;

    public HashingVectorizer dimension(int i) {
        this.dim = i;
        return this;
    }

    public HashingVectorizer window(int i) {
        this.window = i;
        return this;
    }

    public HashingVectorizer probes(int i) {
        this.probes = i;
        return this;
    }

    public long[] count(List<String> list) {
        long[] jArr = new long[this.dim];
        addCounts(list, jArr);
        return jArr;
    }

    public void addCounts(List<String> list, long[] jArr) {
        int i = 0;
        for (String str : list) {
            for (int i2 = 0; i2 < this.probes; i2++) {
                int hash = hash(str, i2);
                jArr[hash] = jArr[hash] + serialVersionUID;
            }
            if (this.window > 0) {
                for (int max = Math.max(0, i - this.window); max < i; max++) {
                    for (int i3 = 0; i3 < this.probes; i3++) {
                        int hash2 = hash(str, list.get(max), i3);
                        jArr[hash2] = jArr[hash2] + serialVersionUID;
                    }
                }
            }
            i++;
        }
    }

    protected int hash(String str, int i) {
        return hash(str, null, i);
    }

    protected int hash(String str, String str2, int i) {
        int hashCode = (str + " " + str2 + " " + i).hashCode() % this.dim;
        if (hashCode < 0) {
            hashCode += this.dim;
        }
        return hashCode;
    }
}
