package org.apache.nutch.fetcher;

import java.io.IOException;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.MapFile;
import org.apache.hadoop.io.SequenceFile;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.mapred.FileOutputFormat;
import org.apache.hadoop.mapred.InvalidJobConfException;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.OutputFormat;
import org.apache.hadoop.mapred.RecordWriter;
import org.apache.hadoop.mapred.Reporter;
import org.apache.hadoop.mapred.SequenceFileOutputFormat;
import org.apache.hadoop.util.Progressable;
import org.apache.nutch.crawl.CrawlDatum;
import org.apache.nutch.crawl.NutchWritable;
import org.apache.nutch.parse.Parse;
import org.apache.nutch.parse.ParseOutputFormat;
import org.apache.nutch.protocol.Content;

/* loaded from: input_file:nutch-1.5.1.jar:org/apache/nutch/fetcher/FetcherOutputFormat.class */
public class FetcherOutputFormat implements OutputFormat<Text, NutchWritable> {
    @Override // org.apache.hadoop.mapred.OutputFormat
    public void checkOutputSpecs(FileSystem fileSystem, JobConf jobConf) throws IOException {
        Path outputPath = FileOutputFormat.getOutputPath(jobConf);
        if (outputPath == null && jobConf.getNumReduceTasks() != 0) {
            throw new InvalidJobConfException("Output directory not set in JobConf.");
        }
        if (fileSystem == null) {
            fileSystem = outputPath.getFileSystem(jobConf);
        }
        if (fileSystem.exists(new Path(outputPath, CrawlDatum.FETCH_DIR_NAME))) {
            throw new IOException("Segment already fetched!");
        }
    }

    @Override // org.apache.hadoop.mapred.OutputFormat
    public RecordWriter<Text, NutchWritable> getRecordWriter(final FileSystem fileSystem, final JobConf jobConf, final String str, final Progressable progressable) throws IOException {
        Path outputPath = FileOutputFormat.getOutputPath(jobConf);
        Path path = new Path(new Path(outputPath, CrawlDatum.FETCH_DIR_NAME), str);
        final Path path2 = new Path(new Path(outputPath, "content"), str);
        final SequenceFile.CompressionType outputCompressionType = SequenceFileOutputFormat.getOutputCompressionType(jobConf);
        final MapFile.Writer writer = new MapFile.Writer(jobConf, fileSystem, path.toString(), (Class<? extends WritableComparable>) Text.class, CrawlDatum.class, outputCompressionType, progressable);
        return new RecordWriter<Text, NutchWritable>() { // from class: org.apache.nutch.fetcher.FetcherOutputFormat.1
            private MapFile.Writer contentOut;
            private RecordWriter<Text, Parse> parseOut;

            {
                if (Fetcher.isStoringContent(jobConf)) {
                    this.contentOut = new MapFile.Writer(jobConf, fileSystem, path2.toString(), (Class<? extends WritableComparable>) Text.class, Content.class, outputCompressionType, progressable);
                }
                if (Fetcher.isParsing(jobConf)) {
                    this.parseOut = new ParseOutputFormat().getRecordWriter(fileSystem, jobConf, str, progressable);
                }
            }

            @Override // org.apache.hadoop.mapred.RecordWriter
            public void write(Text text, NutchWritable nutchWritable) throws IOException {
                Writable writable = nutchWritable.get();
                if (writable instanceof CrawlDatum) {
                    writer.append(text, writable);
                } else if (writable instanceof Content) {
                    this.contentOut.append(text, writable);
                } else if (writable instanceof Parse) {
                    this.parseOut.write(text, (Parse) writable);
                }
            }

            @Override // org.apache.hadoop.mapred.RecordWriter
            public void close(Reporter reporter) throws IOException {
                writer.close();
                if (this.contentOut != null) {
                    this.contentOut.close();
                }
                if (this.parseOut != null) {
                    this.parseOut.close(reporter);
                }
            }
        };
    }
}
