package org.apache.nutch.parse;

import java.util.concurrent.FutureTask;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.TimeoutException;
import org.apache.hadoop.conf.Configuration;
import org.apache.nutch.protocol.Content;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:nutch-1.5.1.jar:org/apache/nutch/parse/ParseUtil.class */
public class ParseUtil {
    public static final Logger LOG = LoggerFactory.getLogger(ParseUtil.class);
    private ParserFactory parserFactory;
    private int MAX_PARSE_TIME;

    public ParseUtil(Configuration configuration) {
        this.MAX_PARSE_TIME = 30;
        this.parserFactory = new ParserFactory(configuration);
        this.MAX_PARSE_TIME = configuration.getInt("parser.timeout", 30);
    }

    public ParseResult parse(Content content) throws ParseException {
        try {
            Parser[] parsers = this.parserFactory.getParsers(content.getContentType(), content.getUrl() != null ? content.getUrl() : "");
            for (int i = 0; i < parsers.length; i++) {
                if (LOG.isDebugEnabled()) {
                    LOG.debug("Parsing [" + content.getUrl() + "] with [" + parsers[i] + "]");
                }
                ParseResult runParser = this.MAX_PARSE_TIME != -1 ? runParser(parsers[i], content) : parsers[i].getParse(content);
                if (runParser != null && !runParser.isEmpty()) {
                    return runParser;
                }
            }
            if (LOG.isWarnEnabled()) {
                LOG.warn("Unable to successfully parse content " + content.getUrl() + " of type " + content.getContentType());
            }
            return new ParseStatus(new ParseException("Unable to successfully parse content")).getEmptyParseResult(content.getUrl(), null);
        } catch (ParserNotFound e) {
            if (LOG.isWarnEnabled()) {
                LOG.warn("No suitable parser found when trying to parse content " + content.getUrl() + " of type " + content.getContentType());
            }
            throw new ParseException(e.getMessage());
        }
    }

    public ParseResult parseByExtensionId(String str, Content content) throws ParseException {
        try {
            Parser parserById = this.parserFactory.getParserById(str);
            ParseResult runParser = this.MAX_PARSE_TIME != -1 ? runParser(parserById, content) : parserById.getParse(content);
            if (runParser != null && !runParser.isEmpty()) {
                return runParser;
            }
            if (LOG.isWarnEnabled()) {
                LOG.warn("Unable to successfully parse content " + content.getUrl() + " of type " + content.getContentType());
            }
            return new ParseStatus(new ParseException("Unable to successfully parse content")).getEmptyParseResult(content.getUrl(), null);
        } catch (ParserNotFound e) {
            if (LOG.isWarnEnabled()) {
                LOG.warn("No suitable parser found when trying to parse content " + content.getUrl() + " of type " + content.getContentType());
            }
            throw new ParseException(e.getMessage());
        }
    }

    private ParseResult runParser(Parser parser, Content content) {
        FutureTask futureTask = new FutureTask(new ParseCallable(parser, content));
        ParseResult parseResult = null;
        Thread thread = new Thread(futureTask);
        thread.start();
        try {
            try {
                parseResult = (ParseResult) futureTask.get(this.MAX_PARSE_TIME, TimeUnit.SECONDS);
            } catch (TimeoutException e) {
                LOG.warn("TIMEOUT parsing " + content.getUrl() + " with " + parser);
            } catch (Exception e2) {
                futureTask.cancel(true);
                parseResult = null;
                thread.interrupt();
            }
            return parseResult;
        } catch (Throwable th) {
            throw th;
        }
    }
}
