package org.apache.nutch.parse;

import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Vector;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.nutch.plugin.Extension;
import org.apache.nutch.plugin.ExtensionPoint;
import org.apache.nutch.plugin.PluginRepository;
import org.apache.nutch.plugin.PluginRuntimeException;
import org.apache.nutch.util.MimeUtil;
import org.apache.nutch.util.ObjectCache;
import org.bouncycastle.cms.CMSAttributeTableGenerator;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.slf4j.Marker;

/* loaded from: input_file:nutch-1.5.1.jar:org/apache/nutch/parse/ParserFactory.class */
public final class ParserFactory {
    public static final Logger LOG = LoggerFactory.getLogger(ParserFactory.class);
    public static final String DEFAULT_PLUGIN = "*";
    private final List EMPTY_EXTENSION_LIST = Collections.EMPTY_LIST;
    private Configuration conf;
    private ExtensionPoint extensionPoint;
    private ParsePluginList parsePluginList;

    public ParserFactory(Configuration configuration) {
        this.conf = configuration;
        ObjectCache objectCache = ObjectCache.get(configuration);
        this.extensionPoint = PluginRepository.get(configuration).getExtensionPoint(Parser.X_POINT_ID);
        this.parsePluginList = (ParsePluginList) objectCache.getObject(ParsePluginList.class.getName());
        if (this.parsePluginList == null) {
            this.parsePluginList = new ParsePluginsReader().parse(configuration);
            objectCache.setObject(ParsePluginList.class.getName(), this.parsePluginList);
        }
        if (this.extensionPoint == null) {
            throw new RuntimeException("x point " + Parser.X_POINT_ID + " not found.");
        }
        if (this.parsePluginList == null) {
            throw new RuntimeException("Parse Plugins preferences could not be loaded.");
        }
    }

    public Parser[] getParsers(String str, String str2) throws ParserNotFound {
        ObjectCache objectCache = ObjectCache.get(this.conf);
        List<Extension> extensions = getExtensions(str);
        if (extensions == null) {
            throw new ParserNotFound(str2, str);
        }
        Vector vector = new Vector(extensions.size());
        for (Extension extension : extensions) {
            try {
                Parser parser = (Parser) objectCache.getObject(extension.getId());
                if (parser == null) {
                    parser = (Parser) extension.getExtensionInstance();
                    objectCache.setObject(extension.getId(), parser);
                }
                vector.add(parser);
            } catch (PluginRuntimeException e) {
                if (LOG.isWarnEnabled()) {
                    LOG.warn("ParserFactory:PluginRuntimeException when initializing parser plugin " + extension.getDescriptor().getPluginId() + " instance in getParsers function: attempting to continue instantiating parsers");
                }
            }
        }
        return (Parser[]) vector.toArray(new Parser[0]);
    }

    public Parser getParserById(String str) throws ParserNotFound {
        Extension[] extensions = this.extensionPoint.getExtensions();
        Extension extension = null;
        ObjectCache objectCache = ObjectCache.get(this.conf);
        if (str != null) {
            extension = getExtension(extensions, str);
        }
        if (extension == null) {
            extension = getExtensionFromAlias(extensions, str);
        }
        if (extension == null) {
            throw new ParserNotFound("No Parser Found for id [" + str + "]");
        }
        if (objectCache.getObject(extension.getId()) != null) {
            return (Parser) objectCache.getObject(extension.getId());
        }
        try {
            Parser parser = (Parser) extension.getExtensionInstance();
            objectCache.setObject(extension.getId(), parser);
            return parser;
        } catch (PluginRuntimeException e) {
            if (LOG.isWarnEnabled()) {
                LOG.warn("Canno initialize parser " + extension.getDescriptor().getPluginId() + " (cause: " + e.toString());
            }
            throw new ParserNotFound("Cannot init parser for id [" + str + "]");
        }
    }

    protected List<Extension> getExtensions(String str) {
        ObjectCache objectCache = ObjectCache.get(this.conf);
        String cleanMimeType = MimeUtil.cleanMimeType(str);
        List<Extension> list = (List) objectCache.getObject(cleanMimeType);
        if (list == this.EMPTY_EXTENSION_LIST) {
            return null;
        }
        if (list == null) {
            list = findExtensions(cleanMimeType);
            if (list != null) {
                objectCache.setObject(cleanMimeType, list);
            } else {
                objectCache.setObject(cleanMimeType, this.EMPTY_EXTENSION_LIST);
            }
        }
        return list;
    }

    private List<Extension> findExtensions(String str) {
        Extension[] extensions = this.extensionPoint.getExtensions();
        List<Extension> matchExtensions = matchExtensions(this.parsePluginList.getPluginList(str), extensions, str);
        return matchExtensions != null ? matchExtensions : matchExtensions(this.parsePluginList.getPluginList("*"), extensions, "*");
    }

    private List<Extension> matchExtensions(List<String> list, Extension[] extensionArr, String str) {
        ArrayList<Extension> arrayList = new ArrayList();
        if (list != null) {
            for (String str2 : list) {
                Extension extension = getExtension(extensionArr, str2, str);
                if (extension == null) {
                    extension = getExtension(extensionArr, str2);
                    if (LOG.isWarnEnabled()) {
                        if (extension != null) {
                            LOG.warn("ParserFactory:Plugin: " + str2 + " mapped to contentType " + str + " via parse-plugins.xml, but its plugin.xml file does not claim to support contentType: " + str);
                        } else {
                            LOG.warn("ParserFactory: Plugin: " + str2 + " mapped to contentType " + str + " via parse-plugins.xml, but not enabled via plugin.includes in nutch-default.xml");
                        }
                    }
                }
                if (extension != null) {
                    arrayList.add(extension);
                }
            }
        } else {
            for (int i = 0; i < extensionArr.length; i++) {
                if ("*".equals(extensionArr[i].getAttribute(CMSAttributeTableGenerator.CONTENT_TYPE))) {
                    arrayList.add(0, extensionArr[i]);
                } else if (extensionArr[i].getAttribute(CMSAttributeTableGenerator.CONTENT_TYPE) != null && str.matches(escapeContentType(extensionArr[i].getAttribute(CMSAttributeTableGenerator.CONTENT_TYPE)))) {
                    arrayList.add(extensionArr[i]);
                }
            }
            if (arrayList.size() > 0) {
                if (LOG.isInfoEnabled()) {
                    StringBuffer stringBuffer = new StringBuffer("[");
                    boolean z = true;
                    for (Extension extension2 : arrayList) {
                        if (z) {
                            z = false;
                        } else {
                            stringBuffer.append(" - ");
                        }
                        stringBuffer.append(extension2.getId());
                    }
                    stringBuffer.append("]");
                    LOG.info("The parsing plugins: " + stringBuffer.toString() + " are enabled via the plugin.includes system property, and all claim to support the content type " + str + ", but they are not mapped to it  in the parse-plugins.xml file");
                }
            } else if (LOG.isDebugEnabled()) {
                LOG.debug("ParserFactory:No parse plugins mapped or enabled for contentType " + str);
            }
        }
        if (arrayList.size() > 0) {
            return arrayList;
        }
        return null;
    }

    private String escapeContentType(String str) {
        return str.replace(Marker.ANY_NON_NULL_MARKER, "\\+").replace(Path.CUR_DIR, "\\.");
    }

    private boolean match(Extension extension, String str, String str2) {
        return str.equals(extension.getId()) && (extension.getAttribute(CMSAttributeTableGenerator.CONTENT_TYPE).equals("*") || str2.matches(escapeContentType(extension.getAttribute(CMSAttributeTableGenerator.CONTENT_TYPE))) || str2.equals("*"));
    }

    private Extension getExtension(Extension[] extensionArr, String str, String str2) {
        for (int i = 0; i < extensionArr.length; i++) {
            if (match(extensionArr[i], str, str2)) {
                return extensionArr[i];
            }
        }
        return null;
    }

    private Extension getExtension(Extension[] extensionArr, String str) {
        for (int i = 0; i < extensionArr.length; i++) {
            if (str.equals(extensionArr[i].getId())) {
                return extensionArr[i];
            }
        }
        return null;
    }

    private Extension getExtensionFromAlias(Extension[] extensionArr, String str) {
        return getExtension(extensionArr, this.parsePluginList.getAliases().get(str));
    }
}
