package org.nuxeo.ecm.core.storage;

import java.util.ArrayList;
import java.util.List;
import java.util.regex.Pattern;
import net.htmlparser.jericho.Renderer;
import net.htmlparser.jericho.Source;
import org.apache.commons.lang.StringEscapeUtils;
import org.apache.commons.lang.StringUtils;
import org.nuxeo.runtime.api.Framework;

/* loaded from: input_file:org/nuxeo/ecm/core/storage/DefaultFulltextParser.class */
public class DefaultFulltextParser implements FulltextParser {
    public static final String WORD_SPLIT_PROP = "org.nuxeo.fulltext.wordsplit";
    public static final String WORD_SPLIT_DEF = "[\\s\\p{Punct}]+";
    protected static final Pattern WORD_SPLIT_PATTERN = Pattern.compile(Framework.getProperty(WORD_SPLIT_PROP, WORD_SPLIT_DEF));

    @Override // org.nuxeo.ecm.core.storage.FulltextParser
    public String parse(String str, String str2) {
        ArrayList arrayList = new ArrayList();
        parse(str, str2, arrayList);
        return StringUtils.join(arrayList, ' ');
    }

    @Override // org.nuxeo.ecm.core.storage.FulltextParser
    public void parse(String str, String str2, List<String> list) {
        for (String str3 : WORD_SPLIT_PATTERN.split(preprocessField(str, str2))) {
            if (!str3.isEmpty()) {
                list.add(str3.toLowerCase());
            }
        }
    }

    protected String preprocessField(String str, String str2) {
        if (str == null) {
            return null;
        }
        if (str.contains("<")) {
            str = removeHtml(str);
        }
        return StringEscapeUtils.unescapeHtml(str);
    }

    protected String removeHtml(String str) {
        Renderer renderer = new Source(str).getRenderer();
        renderer.setIncludeHyperlinkURLs(false);
        renderer.setDecorateFontStyles(false);
        return renderer.toString();
    }
}
