/*
 * Decompiled with CFR 0.152.
 */
package org.nuxeo.ecm.core.storage;

import java.util.ArrayList;
import java.util.List;
import java.util.regex.Pattern;
import net.htmlparser.jericho.Renderer;
import net.htmlparser.jericho.Source;
import org.apache.commons.lang.StringEscapeUtils;
import org.apache.commons.lang.StringUtils;
import org.nuxeo.ecm.core.storage.FulltextParser;
import org.nuxeo.runtime.api.Framework;

public class DefaultFulltextParser
implements FulltextParser {
    public static final String WORD_SPLIT_PROP = "org.nuxeo.fulltext.wordsplit";
    public static final String WORD_SPLIT_DEF = "[\\s\\p{Punct}]+";
    protected static final Pattern WORD_SPLIT_PATTERN = Pattern.compile(Framework.getProperty((String)"org.nuxeo.fulltext.wordsplit", (String)"[\\s\\p{Punct}]+"));

    @Override
    public String parse(String s, String path) {
        ArrayList<String> strings = new ArrayList<String>();
        this.parse(s, path, strings);
        return StringUtils.join(strings, (char)' ');
    }

    @Override
    public void parse(String s, String path, List<String> strings) {
        s = this.preprocessField(s, path);
        for (String word : WORD_SPLIT_PATTERN.split(s)) {
            if (word.isEmpty()) continue;
            strings.add(word.toLowerCase());
        }
    }

    protected String preprocessField(String s, String path) {
        if (s == null) {
            return null;
        }
        if (s.contains("<")) {
            s = this.removeHtml(s);
        }
        return StringEscapeUtils.unescapeHtml((String)s);
    }

    protected String removeHtml(String s) {
        Source source = new Source((CharSequence)s);
        Renderer renderer = source.getRenderer();
        renderer.setIncludeHyperlinkURLs(false);
        renderer.setDecorateFontStyles(false);
        return renderer.toString();
    }
}

