/*
 * Decompiled with CFR 0.152.
 */
package org.nuxeo.ecm.core.convert.plugins.text.extractors;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.xerces.parsers.AbstractSAXParser;
import org.apache.xerces.xni.Augmentations;
import org.apache.xerces.xni.NamespaceContext;
import org.apache.xerces.xni.QName;
import org.apache.xerces.xni.XMLAttributes;
import org.apache.xerces.xni.XMLLocator;
import org.apache.xerces.xni.XMLString;
import org.apache.xerces.xni.XNIException;
import org.apache.xerces.xni.parser.XMLParserConfiguration;
import org.cyberneko.html.HTMLConfiguration;
import org.xml.sax.SAXException;

public class HtmlParser
extends AbstractSAXParser {
    private static final Log log = LogFactory.getLog(HtmlParser.class);
    private StringBuffer buffer;
    private String tagFilter;
    private Boolean inFilter;
    private Boolean noFilter;

    public HtmlParser() {
        super((XMLParserConfiguration)new HTMLConfiguration());
        this.init(null);
    }

    public HtmlParser(String tagFilter) {
        super((XMLParserConfiguration)new HTMLConfiguration());
        this.init(tagFilter);
    }

    public void init(String tagFilter) {
        try {
            this.setFeature("http://xml.org/sax/features/validation", false);
            this.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false);
        }
        catch (SAXException e) {
            log.debug((Object)("Could not switch parser to non-validating: " + e.getMessage()));
        }
        this.inFilter = false;
        if (tagFilter == null || "".equals(tagFilter)) {
            this.noFilter = true;
        } else {
            this.tagFilter = tagFilter;
            this.noFilter = false;
        }
    }

    public void startElement(QName element, XMLAttributes attributes, Augmentations augs) throws XNIException {
        super.startElement(element, attributes, augs);
        if (!this.noFilter.booleanValue() && this.tagFilter.equalsIgnoreCase(element.localpart)) {
            this.inFilter = true;
        }
    }

    public void endElement(QName element, Augmentations augs) throws XNIException {
        super.endElement(element, augs);
        if (!this.noFilter.booleanValue() && this.tagFilter.equals(element.localpart)) {
            this.inFilter = false;
        }
    }

    public void startDocument(XMLLocator arg0, String arg1, NamespaceContext arg2, Augmentations arg3) throws XNIException {
        super.startDocument(arg0, arg1, arg2, arg3);
        this.buffer = new StringBuffer();
    }

    public void characters(XMLString xmlString, Augmentations augmentations) throws XNIException {
        super.characters(xmlString, augmentations);
        if (this.noFilter.booleanValue() || this.inFilter.booleanValue()) {
            this.buffer.append(xmlString.toString());
        }
    }

    private String filterAndJoin(String text) {
        boolean space = false;
        StringBuilder buffer = new StringBuilder();
        for (int i = 0; i < text.length(); ++i) {
            char c = text.charAt(i);
            if (c == '\n' || c == ' ' || Character.isWhitespace(c)) {
                if (space) continue;
                space = true;
                buffer.append(' ');
                continue;
            }
            if (!Character.isLetter(c) && !Character.isDigit(c)) {
                if (space) continue;
                space = true;
                buffer.append(' ');
                continue;
            }
            space = false;
            buffer.append(c);
        }
        return buffer.toString();
    }

    public String getContents() {
        return this.filterAndJoin(this.buffer.toString());
    }
}

