package org.nuxeo.common.utils;

import java.util.Arrays;
import java.util.Collections;
import java.util.HashSet;
import java.util.LinkedHashSet;
import java.util.Set;
import java.util.regex.Pattern;

/* loaded from: input_file:WEB-INF/lib/nuxeo-common-8.3.jar:org/nuxeo/common/utils/FullTextUtils.class */
public class FullTextUtils {
    public static final int MIN_SIZE = 3;
    public static final String UNACCENTED = "aaaaaaaceeeeiiiiðnooooo÷ouuuuyþy";
    public static final Pattern wordPattern = Pattern.compile("[\\s\\p{Punct}]+");
    public static final String STOP_WORDS = "a an are and as at be by for from how i in is it of on or that the this to was what when where who will with car donc est il ils je la le les mais ni nous or ou pour tu un une vous www com net org";
    public static final Set<String> stopWords = new HashSet(Arrays.asList(StringUtils.split(STOP_WORDS, ' ', false)));

    private FullTextUtils() {
    }

    public static Set<String> parseFullText(String str, boolean z) {
        if (str == null) {
            return Collections.emptySet();
        }
        LinkedHashSet linkedHashSet = new LinkedHashSet();
        for (String str2 : wordPattern.split(str)) {
            String parseWord = parseWord(str2, z);
            if (parseWord != null) {
                linkedHashSet.add(parseWord);
            }
        }
        return linkedHashSet;
    }

    public static String parseWord(String str, boolean z) {
        int length = str.length();
        if (length < 3) {
            return null;
        }
        StringBuilder sb = new StringBuilder(length);
        for (int i = 0; i < length; i++) {
            char lowerCase = Character.toLowerCase(str.charAt(i));
            if (!z) {
                sb.append(lowerCase);
            } else if (lowerCase == 230) {
                sb.append("ae");
            } else if (lowerCase >= 224 && lowerCase <= 255) {
                sb.append(UNACCENTED.charAt(lowerCase - 224));
            } else if (lowerCase == 339) {
                sb.append("oe");
            } else {
                sb.append(lowerCase);
            }
        }
        int length2 = sb.length();
        if (length2 > 3 && sb.charAt(length2 - 1) == 's') {
            sb.setLength(length2 - 1);
        }
        String sb2 = sb.toString();
        if (stopWords.contains(sb2)) {
            return null;
        }
        return sb2;
    }
}
