package org.apache.nutch.tools.proxy;

import java.io.Closeable;
import java.io.IOException;
import java.util.Arrays;
import java.util.HashMap;
import javax.servlet.ServletException;
import javax.servlet.http.HttpServletResponse;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.FileUtil;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.PathFilter;
import org.apache.hadoop.io.MapFile;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.mapred.MapFileOutputFormat;
import org.apache.hadoop.mapred.Partitioner;
import org.apache.hadoop.mapred.lib.HashPartitioner;
import org.apache.hadoop.util.StringUtils;
import org.apache.nutch.crawl.CrawlDatum;
import org.apache.nutch.metadata.Metadata;
import org.apache.nutch.metadata.Nutch;
import org.apache.nutch.protocol.Content;
import org.apache.nutch.protocol.ProtocolStatus;
import org.apache.pdfbox.pdmodel.interactive.action.type.PDActionURI;
import org.mortbay.jetty.Request;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:nutch-1.5.1.jar:org/apache/nutch/tools/proxy/SegmentHandler.class */
public class SegmentHandler extends AbstractTestbedHandler {
    private Segment seg;
    private static final Logger LOG = LoggerFactory.getLogger(SegmentHandler.class);
    private static HashMap<Integer, Integer> protoCodes = new HashMap<>();

    /* loaded from: input_file:nutch-1.5.1.jar:org/apache/nutch/tools/proxy/SegmentHandler$Segment.class */
    private static class Segment implements Closeable {
        private static final Partitioner PARTITIONER = new HashPartitioner();
        private FileSystem fs;
        private Path segmentDir;
        private Object cLock = new Object();
        private Object crawlLock = new Object();
        private MapFile.Reader[] content;
        private MapFile.Reader[] parseText;
        private MapFile.Reader[] parseData;
        private MapFile.Reader[] crawl;
        private Configuration conf;

        public Segment(FileSystem fileSystem, Path path, Configuration configuration) throws IOException {
            this.fs = fileSystem;
            this.segmentDir = path;
            this.conf = configuration;
        }

        public CrawlDatum getCrawlDatum(Text text) throws IOException {
            synchronized (this.crawlLock) {
                if (this.crawl == null) {
                    this.crawl = getReaders(CrawlDatum.FETCH_DIR_NAME);
                }
            }
            return (CrawlDatum) getEntry(this.crawl, text, new CrawlDatum());
        }

        public Content getContent(Text text) throws IOException {
            synchronized (this.cLock) {
                if (this.content == null) {
                    this.content = getReaders("content");
                }
            }
            return (Content) getEntry(this.content, text, new Content());
        }

        private MapFile.Reader[] getReaders(String str) throws IOException {
            Path path = new Path(this.segmentDir, str);
            FileSystem fileSystem = path.getFileSystem(this.conf);
            Path[] stat2Paths = FileUtil.stat2Paths(fileSystem.listStatus(path, SegmentPathFilter.INSTANCE));
            Arrays.sort(stat2Paths);
            MapFile.Reader[] readerArr = new MapFile.Reader[stat2Paths.length];
            for (int i = 0; i < stat2Paths.length; i++) {
                readerArr[i] = new MapFile.Reader(fileSystem, stat2Paths[i].toString(), this.conf);
            }
            return readerArr;
        }

        private Writable getEntry(MapFile.Reader[] readerArr, Text text, Writable writable) throws IOException {
            return MapFileOutputFormat.getEntry(readerArr, PARTITIONER, text, writable);
        }

        @Override // java.io.Closeable, java.lang.AutoCloseable
        public void close() throws IOException {
            if (this.content != null) {
                closeReaders(this.content);
            }
            if (this.parseText != null) {
                closeReaders(this.parseText);
            }
            if (this.parseData != null) {
                closeReaders(this.parseData);
            }
            if (this.crawl != null) {
                closeReaders(this.crawl);
            }
        }

        private void closeReaders(MapFile.Reader[] readerArr) throws IOException {
            for (MapFile.Reader reader : readerArr) {
                reader.close();
            }
        }
    }

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: input_file:nutch-1.5.1.jar:org/apache/nutch/tools/proxy/SegmentHandler$SegmentPathFilter.class */
    public static class SegmentPathFilter implements PathFilter {
        public static final SegmentPathFilter INSTANCE = new SegmentPathFilter();

        private SegmentPathFilter() {
        }

        @Override // org.apache.hadoop.fs.PathFilter
        public boolean accept(Path path) {
            return path.getName().startsWith("part-");
        }
    }

    public SegmentHandler(Configuration configuration, Path path) throws Exception {
        this.seg = new Segment(FileSystem.get(configuration), path, configuration);
    }

    @Override // org.apache.nutch.tools.proxy.AbstractTestbedHandler
    public void handle(Request request, HttpServletResponse httpServletResponse, String str, int i) throws IOException, ServletException {
        try {
            String httpURI = request.getUri().toString();
            LOG.info("URI: " + httpURI);
            addMyHeader(httpServletResponse, PDActionURI.SUB_TYPE, httpURI);
            Text text = new Text(httpURI.toString());
            CrawlDatum crawlDatum = this.seg.getCrawlDatum(text);
            if (crawlDatum != null) {
                addMyHeader(httpServletResponse, "Res", "found");
                LOG.info("-got " + crawlDatum.toString());
                ProtocolStatus protocolStatus = (ProtocolStatus) crawlDatum.getMetaData().get((Object) Nutch.WRITABLE_PROTO_STATUS_KEY);
                if (protocolStatus != null) {
                    Integer num = protoCodes.get(Integer.valueOf(protocolStatus.getCode()));
                    if (num != null) {
                        httpServletResponse.setStatus(num.intValue());
                    } else {
                        httpServletResponse.setStatus(200);
                    }
                    addMyHeader(httpServletResponse, "ProtocolStatus", protocolStatus.toString());
                } else {
                    httpServletResponse.setStatus(200);
                }
                Content content = this.seg.getContent(text);
                if (content == null) {
                    request.setHandled(true);
                    httpServletResponse.addHeader("X-Handled-By", getClass().getSimpleName());
                    return;
                }
                byte[] content2 = content.getContent();
                LOG.debug("-data len=" + content2.length);
                Metadata metadata = content.getMetadata();
                String[] names = metadata.names();
                LOG.debug("- " + names.length + " meta");
                for (int i2 = 0; i2 < names.length; i2++) {
                    boolean z = true;
                    char charAt = names[i2].charAt(0);
                    if (Character.isLetter(charAt) && Character.isUpperCase(charAt)) {
                        z = false;
                    }
                    String[] values = metadata.getValues(names[i2]);
                    for (int i3 = 0; i3 < values.length; i3++) {
                        if (z) {
                            addMyHeader(httpServletResponse, names[i2], values[i3]);
                        } else {
                            httpServletResponse.addHeader(names[i2], values[i3]);
                        }
                    }
                }
                request.setHandled(true);
                httpServletResponse.addHeader("X-Handled-By", getClass().getSimpleName());
                httpServletResponse.setContentType(metadata.get("Content-Type"));
                httpServletResponse.setContentLength(content2.length);
                httpServletResponse.getOutputStream().write(content2, 0, content2.length);
                httpServletResponse.flushBuffer();
            } else {
                addMyHeader(httpServletResponse, "Res", "not found");
                LOG.info(" -not found " + text);
            }
        } catch (Exception e) {
            e.printStackTrace();
            LOG.warn(StringUtils.stringifyException(e));
            addMyHeader(httpServletResponse, "Res", "Exception: " + StringUtils.stringifyException(e));
        }
    }

    static {
        protoCodes.put(17, 401);
        protoCodes.put(23, 503);
        protoCodes.put(16, 500);
        protoCodes.put(2, 400);
        protoCodes.put(11, 410);
        protoCodes.put(12, 301);
        protoCodes.put(20, 400);
        protoCodes.put(14, 404);
        protoCodes.put(21, 304);
        protoCodes.put(10, 400);
        protoCodes.put(19, 400);
        protoCodes.put(15, 400);
        protoCodes.put(18, 403);
        protoCodes.put(1, 200);
        protoCodes.put(13, 302);
        protoCodes.put(22, 400);
    }
}
