package org.apache.nutch.protocol.http.api;

import java.io.IOException;
import java.net.URL;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.io.Text;
import org.apache.nutch.crawl.CrawlDatum;
import org.apache.nutch.net.protocols.Response;
import org.apache.nutch.protocol.Content;
import org.apache.nutch.protocol.Protocol;
import org.apache.nutch.protocol.ProtocolException;
import org.apache.nutch.protocol.ProtocolOutput;
import org.apache.nutch.protocol.ProtocolStatus;
import org.apache.nutch.protocol.RobotRules;
import org.apache.nutch.util.DeflateUtils;
import org.apache.nutch.util.GZIPUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:plugins/lib-http/lib-http.jar:org/apache/nutch/protocol/http/api/HttpBase.class */
public abstract class HttpBase implements Protocol {
    public static final int BUFFER_SIZE = 8192;
    private RobotRulesParser robots;
    protected String proxyHost;
    protected int proxyPort;
    protected boolean useProxy;
    protected int timeout;
    protected int maxContent;
    protected String userAgent;
    protected String acceptLanguage;
    protected String accept;
    private Logger logger;
    private Configuration conf;
    protected boolean useHttp11;
    protected long maxCrawlDelay;
    private static final byte[] EMPTY_CONTENT = new byte[0];
    private static final Logger LOGGER = LoggerFactory.getLogger(HttpBase.class);

    public HttpBase() {
        this(null);
    }

    public HttpBase(Logger logger) {
        this.robots = null;
        this.proxyHost = null;
        this.proxyPort = 8080;
        this.useProxy = false;
        this.timeout = 10000;
        this.maxContent = 65536;
        this.userAgent = getAgentString("NutchCVS", null, "Nutch", "http://lucene.apache.org/nutch/bot.html", "nutch-agent@lucene.apache.org");
        this.acceptLanguage = "en-us,en-gb,en;q=0.7,*;q=0.3";
        this.accept = "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8";
        this.logger = LOGGER;
        this.conf = null;
        this.useHttp11 = false;
        this.maxCrawlDelay = -1L;
        if (logger != null) {
            this.logger = logger;
        }
        this.robots = new RobotRulesParser();
    }

    @Override // org.apache.hadoop.conf.Configurable
    public void setConf(Configuration configuration) {
        this.conf = configuration;
        this.proxyHost = configuration.get("http.proxy.host");
        this.proxyPort = configuration.getInt("http.proxy.port", 8080);
        this.useProxy = this.proxyHost != null && this.proxyHost.length() > 0;
        this.timeout = configuration.getInt("http.timeout", 10000);
        this.maxContent = configuration.getInt("http.content.limit", 65536);
        this.userAgent = getAgentString(configuration.get("http.agent.name"), configuration.get("http.agent.version"), configuration.get("http.agent.description"), configuration.get("http.agent.url"), configuration.get("http.agent.email"));
        this.acceptLanguage = configuration.get("http.accept.language", this.acceptLanguage);
        this.accept = configuration.get("http.accept", this.accept);
        this.useHttp11 = configuration.getBoolean("http.useHttp11", false);
        this.robots.setConf(configuration);
        logConf();
    }

    @Override // org.apache.hadoop.conf.Configurable
    public Configuration getConf() {
        return this.conf;
    }

    @Override // org.apache.nutch.protocol.Protocol
    public ProtocolOutput getProtocolOutput(Text text, CrawlDatum crawlDatum) {
        int i;
        String text2 = text.toString();
        try {
            URL url = new URL(text2);
            Response response = getResponse(url, crawlDatum, false);
            int code = response.getCode();
            byte[] content = response.getContent();
            Content content2 = new Content(url.toString(), url.toString(), content == null ? EMPTY_CONTENT : content, response.getHeader("Content-Type"), response.getHeaders(), this.conf);
            if (code == 200) {
                return new ProtocolOutput(content2);
            }
            if (code == 410) {
                return new ProtocolOutput(content2, new ProtocolStatus(11, "Http: " + code + " url=" + text));
            }
            if (code < 300 || code >= 400) {
                if (code == 400) {
                    if (this.logger.isTraceEnabled()) {
                        this.logger.trace("400 Bad request: " + url);
                    }
                    return new ProtocolOutput(content2, new ProtocolStatus(11, url));
                }
                if (code != 401) {
                    return code == 404 ? new ProtocolOutput(content2, new ProtocolStatus(14, url)) : code == 410 ? new ProtocolOutput(content2, new ProtocolStatus(11, url)) : new ProtocolOutput(content2, new ProtocolStatus(16, "Http code=" + code + ", url=" + url));
                }
                if (this.logger.isTraceEnabled()) {
                    this.logger.trace("401 Authentication Required");
                }
                return new ProtocolOutput(content2, new ProtocolStatus(17, "Authentication required: " + text2));
            }
            String header = response.getHeader("Location");
            if (header == null) {
                header = response.getHeader("location");
            }
            if (header == null) {
                header = "";
            }
            URL url2 = new URL(url, header);
            switch (code) {
                case 300:
                    i = 12;
                    break;
                case 301:
                case 305:
                    i = 12;
                    break;
                case 302:
                case 303:
                case 307:
                    i = 13;
                    break;
                case 304:
                    i = 21;
                    break;
                case 306:
                default:
                    i = 12;
                    break;
            }
            return new ProtocolOutput(content2, new ProtocolStatus(i, url2));
        } catch (Throwable th) {
            this.logger.error("Failed to get protocol output", th);
            return new ProtocolOutput(null, new ProtocolStatus(th));
        }
    }

    public String getProxyHost() {
        return this.proxyHost;
    }

    public int getProxyPort() {
        return this.proxyPort;
    }

    public boolean useProxy() {
        return this.useProxy;
    }

    public int getTimeout() {
        return this.timeout;
    }

    public int getMaxContent() {
        return this.maxContent;
    }

    public String getUserAgent() {
        return this.userAgent;
    }

    public String getAcceptLanguage() {
        return this.acceptLanguage;
    }

    public String getAccept() {
        return this.accept;
    }

    public boolean getUseHttp11() {
        return this.useHttp11;
    }

    private static String getAgentString(String str, String str2, String str3, String str4, String str5) {
        if ((str == null || str.trim().length() == 0) && LOGGER.isErrorEnabled()) {
            LOGGER.error("No User-Agent string set (http.agent.name)!");
        }
        StringBuffer stringBuffer = new StringBuffer();
        stringBuffer.append(str);
        if (str2 != null) {
            stringBuffer.append("/");
            stringBuffer.append(str2);
        }
        if ((str3 != null && str3.length() != 0) || ((str5 != null && str5.length() != 0) || (str4 != null && str4.length() != 0))) {
            stringBuffer.append(" (");
            if (str3 != null && str3.length() != 0) {
                stringBuffer.append(str3);
                if (str4 != null || str5 != null) {
                    stringBuffer.append("; ");
                }
            }
            if (str4 != null && str4.length() != 0) {
                stringBuffer.append(str4);
                if (str5 != null) {
                    stringBuffer.append("; ");
                }
            }
            if (str5 != null && str5.length() != 0) {
                stringBuffer.append(str5);
            }
            stringBuffer.append(")");
        }
        return stringBuffer.toString();
    }

    protected void logConf() {
        if (this.logger.isInfoEnabled()) {
            this.logger.info("http.proxy.host = " + this.proxyHost);
            this.logger.info("http.proxy.port = " + this.proxyPort);
            this.logger.info("http.timeout = " + this.timeout);
            this.logger.info("http.content.limit = " + this.maxContent);
            this.logger.info("http.agent = " + this.userAgent);
            this.logger.info("http.accept.language = " + this.acceptLanguage);
            this.logger.info("http.accept = " + this.accept);
        }
    }

    public byte[] processGzipEncoded(byte[] bArr, URL url) throws IOException {
        if (LOGGER.isTraceEnabled()) {
            LOGGER.trace("uncompressing....");
        }
        byte[] unzipBestEffort = getMaxContent() >= 0 ? GZIPUtils.unzipBestEffort(bArr, getMaxContent()) : GZIPUtils.unzipBestEffort(bArr);
        if (unzipBestEffort == null) {
            throw new IOException("unzipBestEffort returned null");
        }
        if (LOGGER.isTraceEnabled()) {
            LOGGER.trace("fetched " + bArr.length + " bytes of compressed content (expanded to " + unzipBestEffort.length + " bytes) from " + url);
        }
        return unzipBestEffort;
    }

    public byte[] processDeflateEncoded(byte[] bArr, URL url) throws IOException {
        if (LOGGER.isTraceEnabled()) {
            LOGGER.trace("inflating....");
        }
        byte[] inflateBestEffort = DeflateUtils.inflateBestEffort(bArr, getMaxContent());
        if (inflateBestEffort == null) {
            throw new IOException("inflateBestEffort returned null");
        }
        if (LOGGER.isTraceEnabled()) {
            LOGGER.trace("fetched " + bArr.length + " bytes of compressed content (expanded to " + inflateBestEffort.length + " bytes) from " + url);
        }
        return inflateBestEffort;
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public static void main(HttpBase httpBase, String[] strArr) throws Exception {
        String str = null;
        if (strArr.length == 0) {
            System.err.println("Usage: Http [-verbose] [-timeout N] url");
            System.exit(-1);
        }
        int i = 0;
        while (i < strArr.length) {
            if (strArr[i].equals("-timeout")) {
                i++;
                httpBase.timeout = Integer.parseInt(strArr[i]) * 1000;
            } else if (!strArr[i].equals("-verbose")) {
                if (i != strArr.length - 1) {
                    System.err.println("Usage: Http [-verbose] [-timeout N] url");
                    System.exit(-1);
                } else {
                    str = strArr[i];
                }
            }
            i++;
        }
        ProtocolOutput protocolOutput = httpBase.getProtocolOutput(new Text(str), new CrawlDatum());
        Content content = protocolOutput.getContent();
        System.out.println("Status: " + protocolOutput.getStatus());
        if (content != null) {
            System.out.println("Content Type: " + content.getContentType());
            System.out.println("Content Length: " + content.getMetadata().get("Content-Length"));
            System.out.println("Content:");
            System.out.println(new String(content.getContent()));
        }
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public abstract Response getResponse(URL url, CrawlDatum crawlDatum, boolean z) throws ProtocolException, IOException;

    @Override // org.apache.nutch.protocol.Protocol
    public RobotRules getRobotRules(Text text, CrawlDatum crawlDatum) {
        return this.robots.getRobotRulesSet(this, text);
    }
}
