package org.apache.nutch.segment;

import com.beust.jcommander.Parameters;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.TreeMap;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.MapFile;
import org.apache.hadoop.io.SequenceFile;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.mapred.FileInputFormat;
import org.apache.hadoop.mapred.FileOutputFormat;
import org.apache.hadoop.mapred.FileSplit;
import org.apache.hadoop.mapred.InputSplit;
import org.apache.hadoop.mapred.JobClient;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.Mapper;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.RecordReader;
import org.apache.hadoop.mapred.RecordWriter;
import org.apache.hadoop.mapred.Reducer;
import org.apache.hadoop.mapred.Reporter;
import org.apache.hadoop.mapred.SequenceFileInputFormat;
import org.apache.hadoop.mapred.SequenceFileOutputFormat;
import org.apache.hadoop.mapred.SequenceFileRecordReader;
import org.apache.hadoop.util.Progressable;
import org.apache.nutch.crawl.CrawlDatum;
import org.apache.nutch.crawl.Generator;
import org.apache.nutch.metadata.MetaWrapper;
import org.apache.nutch.metadata.Nutch;
import org.apache.nutch.net.URLFilters;
import org.apache.nutch.net.URLNormalizers;
import org.apache.nutch.parse.ParseData;
import org.apache.nutch.parse.ParseText;
import org.apache.nutch.protocol.Content;
import org.apache.nutch.util.HadoopFSUtil;
import org.apache.nutch.util.NutchConfiguration;
import org.apache.nutch.util.NutchJob;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:nutch-1.5.1.jar:org/apache/nutch/segment/SegmentMerger.class */
public class SegmentMerger extends Configured implements Mapper<Text, MetaWrapper, Text, MetaWrapper>, Reducer<Text, MetaWrapper, Text, MetaWrapper> {
    private static final Logger LOG = LoggerFactory.getLogger(SegmentMerger.class);
    private static final String SEGMENT_PART_KEY = "part";
    private static final String SEGMENT_SLICE_KEY = "slice";
    private URLFilters filters;
    private URLNormalizers normalizers;
    private SegmentMergeFilters mergeFilters;
    private long sliceSize;
    private long curCount;
    private Text newKey;

    /* loaded from: input_file:nutch-1.5.1.jar:org/apache/nutch/segment/SegmentMerger$ObjectInputFormat.class */
    public static class ObjectInputFormat extends SequenceFileInputFormat<Text, MetaWrapper> {
        @Override // org.apache.hadoop.mapred.SequenceFileInputFormat, org.apache.hadoop.mapred.FileInputFormat, org.apache.hadoop.mapred.InputFormat
        public RecordReader<Text, MetaWrapper> getRecordReader(InputSplit inputSplit, JobConf jobConf, Reporter reporter) throws IOException {
            reporter.setStatus(inputSplit.toString());
            FileSplit fileSplit = (FileSplit) inputSplit;
            try {
                final String segmentPart = SegmentPart.get(fileSplit).toString();
                SequenceFile.Reader reader = new SequenceFile.Reader(FileSystem.get(jobConf), fileSplit.getPath(), jobConf);
                try {
                    try {
                        final Writable writable = (Writable) reader.getValueClass().newInstance();
                        final SequenceFileRecordReader sequenceFileRecordReader = new SequenceFileRecordReader(jobConf, (FileSplit) inputSplit);
                        try {
                            return new SequenceFileRecordReader<Text, MetaWrapper>(jobConf, fileSplit) { // from class: org.apache.nutch.segment.SegmentMerger.ObjectInputFormat.1
                                @Override // org.apache.hadoop.mapred.SequenceFileRecordReader, org.apache.hadoop.mapred.RecordReader
                                public synchronized boolean next(Text text, MetaWrapper metaWrapper) throws IOException {
                                    FileInputFormat.LOG.debug("Running OIF.next()");
                                    boolean next = sequenceFileRecordReader.next(text, writable);
                                    metaWrapper.set(writable);
                                    metaWrapper.setMeta(SegmentMerger.SEGMENT_PART_KEY, segmentPart);
                                    return next;
                                }

                                @Override // org.apache.hadoop.mapred.SequenceFileRecordReader, org.apache.hadoop.mapred.RecordReader
                                public synchronized void close() throws IOException {
                                    sequenceFileRecordReader.close();
                                }

                                @Override // org.apache.hadoop.mapred.SequenceFileRecordReader, org.apache.hadoop.mapred.RecordReader
                                public MetaWrapper createValue() {
                                    return new MetaWrapper();
                                }
                            };
                        } catch (IOException e) {
                            throw new RuntimeException("Cannot create RecordReader: ", e);
                        }
                    } catch (Exception e2) {
                        throw new IOException(e2.toString());
                    }
                } finally {
                    try {
                        reader.close();
                    } catch (Exception e3) {
                    }
                }
            } catch (IOException e4) {
                throw new RuntimeException("Cannot identify segment:", e4);
            }
        }
    }

    /* loaded from: input_file:nutch-1.5.1.jar:org/apache/nutch/segment/SegmentMerger$SegmentOutputFormat.class */
    public static class SegmentOutputFormat extends FileOutputFormat<Text, MetaWrapper> {
        private static final String DEFAULT_SLICE = "default";

        @Override // org.apache.hadoop.mapred.FileOutputFormat, org.apache.hadoop.mapred.OutputFormat
        public RecordWriter<Text, MetaWrapper> getRecordWriter(final FileSystem fileSystem, final JobConf jobConf, final String str, final Progressable progressable) throws IOException {
            return new RecordWriter<Text, MetaWrapper>() { // from class: org.apache.nutch.segment.SegmentMerger.SegmentOutputFormat.1
                MapFile.Writer c_out = null;
                MapFile.Writer f_out = null;
                MapFile.Writer pd_out = null;
                MapFile.Writer pt_out = null;
                SequenceFile.Writer g_out = null;
                SequenceFile.Writer p_out = null;
                HashMap sliceWriters = new HashMap();
                String segmentName;

                {
                    this.segmentName = jobConf.get("segment.merger.segmentName");
                }

                @Override // org.apache.hadoop.mapred.RecordWriter
                public void write(Text text, MetaWrapper metaWrapper) throws IOException {
                    SegmentPart parse = SegmentPart.parse(metaWrapper.getMeta(SegmentMerger.SEGMENT_PART_KEY));
                    Writable writable = metaWrapper.get();
                    String meta = metaWrapper.getMeta(SegmentMerger.SEGMENT_SLICE_KEY);
                    if (writable instanceof CrawlDatum) {
                        if (parse.partName.equals(CrawlDatum.GENERATE_DIR_NAME)) {
                            this.g_out = ensureSequenceFile(meta, CrawlDatum.GENERATE_DIR_NAME);
                            this.g_out.append((Writable) text, writable);
                            return;
                        } else if (parse.partName.equals(CrawlDatum.FETCH_DIR_NAME)) {
                            this.f_out = ensureMapFile(meta, CrawlDatum.FETCH_DIR_NAME, CrawlDatum.class);
                            this.f_out.append(text, writable);
                            return;
                        } else {
                            if (!parse.partName.equals(CrawlDatum.PARSE_DIR_NAME)) {
                                throw new IOException("Cannot determine segment part: " + parse.partName);
                            }
                            this.p_out = ensureSequenceFile(meta, CrawlDatum.PARSE_DIR_NAME);
                            this.p_out.append((Writable) text, writable);
                            return;
                        }
                    }
                    if (writable instanceof Content) {
                        this.c_out = ensureMapFile(meta, "content", Content.class);
                        this.c_out.append(text, writable);
                        return;
                    }
                    if (!(writable instanceof ParseData)) {
                        if (writable instanceof ParseText) {
                            this.pt_out = ensureMapFile(meta, ParseText.DIR_NAME, ParseText.class);
                            this.pt_out.append(text, writable);
                            return;
                        }
                        return;
                    }
                    if (meta == null) {
                        ((ParseData) writable).getContentMeta().set(Nutch.SEGMENT_NAME_KEY, this.segmentName);
                    } else {
                        ((ParseData) writable).getContentMeta().set(Nutch.SEGMENT_NAME_KEY, this.segmentName + Parameters.DEFAULT_OPTION_PREFIXES + meta);
                    }
                    this.pd_out = ensureMapFile(meta, ParseData.DIR_NAME, ParseData.class);
                    this.pd_out.append(text, writable);
                }

                private SequenceFile.Writer ensureSequenceFile(String str2, String str3) throws IOException {
                    if (str2 == null) {
                        str2 = "default";
                    }
                    SequenceFile.Writer writer = (SequenceFile.Writer) this.sliceWriters.get(str2 + str3);
                    if (writer != null) {
                        return writer;
                    }
                    Path outputPath = FileOutputFormat.getOutputPath(jobConf);
                    SequenceFile.Writer createWriter = SequenceFile.createWriter(fileSystem, jobConf, str2 == "default" ? new Path(new Path(new Path(outputPath, this.segmentName), str3), str) : new Path(new Path(new Path(outputPath, this.segmentName + Parameters.DEFAULT_OPTION_PREFIXES + str2), str3), str), Text.class, CrawlDatum.class, SequenceFileOutputFormat.getOutputCompressionType(jobConf), progressable);
                    this.sliceWriters.put(str2 + str3, createWriter);
                    return createWriter;
                }

                private MapFile.Writer ensureMapFile(String str2, String str3, Class<? extends Writable> cls) throws IOException {
                    if (str2 == null) {
                        str2 = "default";
                    }
                    MapFile.Writer writer = (MapFile.Writer) this.sliceWriters.get(str2 + str3);
                    if (writer != null) {
                        return writer;
                    }
                    Path outputPath = FileOutputFormat.getOutputPath(jobConf);
                    Path path = str2 == "default" ? new Path(new Path(new Path(outputPath, this.segmentName), str3), str) : new Path(new Path(new Path(outputPath, this.segmentName + Parameters.DEFAULT_OPTION_PREFIXES + str2), str3), str);
                    SequenceFile.CompressionType outputCompressionType = SequenceFileOutputFormat.getOutputCompressionType(jobConf);
                    if (cls.isAssignableFrom(ParseText.class)) {
                        outputCompressionType = SequenceFile.CompressionType.RECORD;
                    }
                    MapFile.Writer writer2 = new MapFile.Writer(jobConf, fileSystem, path.toString(), (Class<? extends WritableComparable>) Text.class, cls, outputCompressionType, progressable);
                    this.sliceWriters.put(str2 + str3, writer2);
                    return writer2;
                }

                @Override // org.apache.hadoop.mapred.RecordWriter
                public void close(Reporter reporter) throws IOException {
                    for (Object obj : this.sliceWriters.values()) {
                        if (obj instanceof SequenceFile.Writer) {
                            ((SequenceFile.Writer) obj).close();
                        } else {
                            ((MapFile.Writer) obj).close();
                        }
                    }
                }
            };
        }
    }

    public SegmentMerger() {
        super(null);
        this.filters = null;
        this.normalizers = null;
        this.mergeFilters = null;
        this.sliceSize = -1L;
        this.curCount = 0L;
        this.newKey = new Text();
    }

    public SegmentMerger(Configuration configuration) {
        super(configuration);
        this.filters = null;
        this.normalizers = null;
        this.mergeFilters = null;
        this.sliceSize = -1L;
        this.curCount = 0L;
        this.newKey = new Text();
    }

    @Override // org.apache.hadoop.conf.Configured, org.apache.hadoop.conf.Configurable
    public void setConf(Configuration configuration) {
        super.setConf(configuration);
        if (configuration == null) {
            return;
        }
        if (configuration.getBoolean("segment.merger.filter", false)) {
            this.filters = new URLFilters(configuration);
            this.mergeFilters = new SegmentMergeFilters(configuration);
        }
        if (configuration.getBoolean("segment.merger.normalizer", false)) {
            this.normalizers = new URLNormalizers(configuration, "default");
        }
        this.sliceSize = configuration.getLong("segment.merger.slice", -1L);
        if (this.sliceSize <= 0 || !LOG.isInfoEnabled()) {
            return;
        }
        LOG.info("Slice size: " + this.sliceSize + " URLs.");
    }

    @Override // java.io.Closeable, java.lang.AutoCloseable
    public void close() throws IOException {
    }

    @Override // org.apache.hadoop.mapred.JobConfigurable
    public void configure(JobConf jobConf) {
        setConf(jobConf);
        if (this.sliceSize > 0) {
            this.sliceSize /= jobConf.getNumReduceTasks();
        }
    }

    @Override // org.apache.hadoop.mapred.Mapper
    public void map(Text text, MetaWrapper metaWrapper, OutputCollector<Text, MetaWrapper> outputCollector, Reporter reporter) throws IOException {
        String text2 = text.toString();
        if (this.normalizers != null) {
            try {
                text2 = this.normalizers.normalize(text2, "default");
            } catch (Exception e) {
                LOG.warn("Skipping " + text2 + ":" + e.getMessage());
                text2 = null;
            }
        }
        if (text2 != null && this.filters != null) {
            try {
                text2 = this.filters.filter(text2);
            } catch (Exception e2) {
                LOG.warn("Skipping key " + text2 + ": " + e2.getMessage());
                text2 = null;
            }
        }
        if (text2 != null) {
            this.newKey.set(text2);
            outputCollector.collect(this.newKey, metaWrapper);
        }
    }

    @Override // org.apache.hadoop.mapred.Reducer
    public void reduce(Text text, Iterator<MetaWrapper> it, OutputCollector<Text, MetaWrapper> outputCollector, Reporter reporter) throws IOException {
        CrawlDatum crawlDatum = null;
        CrawlDatum crawlDatum2 = null;
        CrawlDatum crawlDatum3 = null;
        Content content = null;
        ParseData parseData = null;
        ParseText parseText = null;
        String str = null;
        String str2 = null;
        String str3 = null;
        String str4 = null;
        String str5 = null;
        String str6 = null;
        TreeMap treeMap = new TreeMap();
        while (it.hasNext()) {
            MetaWrapper next = it.next();
            Writable writable = next.get();
            String meta = next.getMeta(SEGMENT_PART_KEY);
            if (meta == null) {
                throw new IOException("Null segment part, key=" + text);
            }
            SegmentPart parse = SegmentPart.parse(meta);
            if (writable instanceof CrawlDatum) {
                CrawlDatum crawlDatum4 = (CrawlDatum) writable;
                if (parse.partName.equals(CrawlDatum.GENERATE_DIR_NAME)) {
                    if (crawlDatum == null) {
                        crawlDatum = crawlDatum4;
                        str = parse.segmentName;
                    } else if (str.compareTo(parse.segmentName) < 0) {
                        crawlDatum = crawlDatum4;
                        str = parse.segmentName;
                    }
                } else if (!parse.partName.equals(CrawlDatum.FETCH_DIR_NAME)) {
                    if (!parse.partName.equals(CrawlDatum.PARSE_DIR_NAME)) {
                        throw new IOException("Cannot determine segment part: " + parse.partName);
                    }
                    if (crawlDatum4.getStatus() != 65) {
                        ArrayList arrayList = (ArrayList) treeMap.get(parse.segmentName);
                        if (arrayList == null) {
                            arrayList = new ArrayList();
                            treeMap.put(parse.segmentName, arrayList);
                        }
                        arrayList.add(crawlDatum4);
                    } else if (crawlDatum3 == null) {
                        crawlDatum3 = crawlDatum4;
                        str3 = parse.segmentName;
                    } else if (str3.compareTo(parse.segmentName) < 0) {
                        crawlDatum3 = crawlDatum4;
                        str3 = parse.segmentName;
                    }
                } else if (crawlDatum2 == null) {
                    crawlDatum2 = crawlDatum4;
                    str2 = parse.segmentName;
                } else if (str2.compareTo(parse.segmentName) < 0) {
                    crawlDatum2 = crawlDatum4;
                    str2 = parse.segmentName;
                }
            } else if (writable instanceof Content) {
                if (content == null) {
                    content = (Content) writable;
                    str4 = parse.segmentName;
                } else if (str4.compareTo(parse.segmentName) < 0) {
                    content = (Content) writable;
                    str4 = parse.segmentName;
                }
            } else if (writable instanceof ParseData) {
                if (parseData == null) {
                    parseData = (ParseData) writable;
                    str5 = parse.segmentName;
                } else if (str5.compareTo(parse.segmentName) < 0) {
                    parseData = (ParseData) writable;
                    str5 = parse.segmentName;
                }
            } else if (writable instanceof ParseText) {
                if (parseText == null) {
                    parseText = (ParseText) writable;
                    str6 = parse.segmentName;
                } else if (str6.compareTo(parse.segmentName) < 0) {
                    parseText = (ParseText) writable;
                    str6 = parse.segmentName;
                }
            }
        }
        if (this.mergeFilters != null) {
            if (!this.mergeFilters.filter(text, crawlDatum, crawlDatum2, crawlDatum3, content, parseData, parseText, treeMap.isEmpty() ? null : (ArrayList) treeMap.lastEntry().getValue())) {
                return;
            }
        }
        this.curCount++;
        MetaWrapper metaWrapper = new MetaWrapper();
        if (this.sliceSize > 0) {
            metaWrapper.setMeta(SEGMENT_SLICE_KEY, String.valueOf(this.curCount / this.sliceSize));
        }
        SegmentPart segmentPart = new SegmentPart();
        if (crawlDatum != null) {
            metaWrapper.set(crawlDatum);
            segmentPart.partName = CrawlDatum.GENERATE_DIR_NAME;
            segmentPart.segmentName = str;
            metaWrapper.setMeta(SEGMENT_PART_KEY, segmentPart.toString());
            outputCollector.collect(text, metaWrapper);
        }
        if (crawlDatum2 != null) {
            metaWrapper.set(crawlDatum2);
            segmentPart.partName = CrawlDatum.FETCH_DIR_NAME;
            segmentPart.segmentName = str2;
            metaWrapper.setMeta(SEGMENT_PART_KEY, segmentPart.toString());
            outputCollector.collect(text, metaWrapper);
        }
        if (crawlDatum3 != null) {
            metaWrapper.set(crawlDatum3);
            segmentPart.partName = CrawlDatum.PARSE_DIR_NAME;
            segmentPart.segmentName = str3;
            metaWrapper.setMeta(SEGMENT_PART_KEY, segmentPart.toString());
            outputCollector.collect(text, metaWrapper);
        }
        if (content != null) {
            metaWrapper.set(content);
            segmentPart.partName = "content";
            segmentPart.segmentName = str4;
            metaWrapper.setMeta(SEGMENT_PART_KEY, segmentPart.toString());
            outputCollector.collect(text, metaWrapper);
        }
        if (parseData != null) {
            metaWrapper.set(parseData);
            segmentPart.partName = ParseData.DIR_NAME;
            segmentPart.segmentName = str5;
            metaWrapper.setMeta(SEGMENT_PART_KEY, segmentPart.toString());
            outputCollector.collect(text, metaWrapper);
        }
        if (parseText != null) {
            metaWrapper.set(parseText);
            segmentPart.partName = ParseText.DIR_NAME;
            segmentPart.segmentName = str6;
            metaWrapper.setMeta(SEGMENT_PART_KEY, segmentPart.toString());
            outputCollector.collect(text, metaWrapper);
        }
        if (treeMap.size() > 0) {
            String str7 = (String) treeMap.lastKey();
            segmentPart.partName = CrawlDatum.PARSE_DIR_NAME;
            segmentPart.segmentName = str7;
            metaWrapper.setMeta(SEGMENT_PART_KEY, segmentPart.toString());
            ArrayList arrayList2 = (ArrayList) treeMap.get(str7);
            for (int i = 0; i < arrayList2.size(); i++) {
                metaWrapper.set((CrawlDatum) arrayList2.get(i));
                outputCollector.collect(text, metaWrapper);
            }
        }
    }

    public void merge(Path path, Path[] pathArr, boolean z, boolean z2, long j) throws Exception {
        String generateSegmentName = Generator.generateSegmentName();
        if (LOG.isInfoEnabled()) {
            LOG.info("Merging " + pathArr.length + " segments to " + path + "/" + generateSegmentName);
        }
        NutchJob nutchJob = new NutchJob(getConf());
        nutchJob.setJobName("mergesegs " + path + "/" + generateSegmentName);
        nutchJob.setBoolean("segment.merger.filter", z);
        nutchJob.setBoolean("segment.merger.normalizer", z2);
        nutchJob.setLong("segment.merger.slice", j);
        nutchJob.set("segment.merger.segmentName", generateSegmentName);
        FileSystem fileSystem = FileSystem.get(getConf());
        boolean z3 = true;
        boolean z4 = true;
        boolean z5 = true;
        boolean z6 = true;
        boolean z7 = true;
        boolean z8 = true;
        for (int i = 0; i < pathArr.length; i++) {
            if (fileSystem.exists(pathArr[i])) {
                if (LOG.isInfoEnabled()) {
                    LOG.info("SegmentMerger:   adding " + pathArr[i]);
                }
                Path path2 = new Path(pathArr[i], "content");
                Path path3 = new Path(pathArr[i], CrawlDatum.GENERATE_DIR_NAME);
                Path path4 = new Path(pathArr[i], CrawlDatum.FETCH_DIR_NAME);
                Path path5 = new Path(pathArr[i], CrawlDatum.PARSE_DIR_NAME);
                Path path6 = new Path(pathArr[i], ParseData.DIR_NAME);
                Path path7 = new Path(pathArr[i], ParseText.DIR_NAME);
                z6 = z6 && fileSystem.exists(path2);
                z3 = z3 && fileSystem.exists(path3);
                z4 = z4 && fileSystem.exists(path4);
                z5 = z5 && fileSystem.exists(path5);
                z7 = z7 && fileSystem.exists(path6);
                z8 = z8 && fileSystem.exists(path7);
            } else {
                if (LOG.isWarnEnabled()) {
                    LOG.warn("Input dir " + pathArr[i] + " doesn't exist, skipping.");
                }
                pathArr[i] = null;
            }
        }
        StringBuffer stringBuffer = new StringBuffer();
        if (z6) {
            stringBuffer.append(" content");
        }
        if (z3) {
            stringBuffer.append(" crawl_generate");
        }
        if (z4) {
            stringBuffer.append(" crawl_fetch");
        }
        if (z5) {
            stringBuffer.append(" crawl_parse");
        }
        if (z7) {
            stringBuffer.append(" parse_data");
        }
        if (z8) {
            stringBuffer.append(" parse_text");
        }
        if (LOG.isInfoEnabled()) {
            LOG.info("SegmentMerger: using segment data from:" + stringBuffer.toString());
        }
        for (int i2 = 0; i2 < pathArr.length; i2++) {
            if (pathArr[i2] != null) {
                if (z3) {
                    FileInputFormat.addInputPath(nutchJob, new Path(pathArr[i2], CrawlDatum.GENERATE_DIR_NAME));
                }
                if (z6) {
                    FileInputFormat.addInputPath(nutchJob, new Path(pathArr[i2], "content"));
                }
                if (z4) {
                    FileInputFormat.addInputPath(nutchJob, new Path(pathArr[i2], CrawlDatum.FETCH_DIR_NAME));
                }
                if (z5) {
                    FileInputFormat.addInputPath(nutchJob, new Path(pathArr[i2], CrawlDatum.PARSE_DIR_NAME));
                }
                if (z7) {
                    FileInputFormat.addInputPath(nutchJob, new Path(pathArr[i2], ParseData.DIR_NAME));
                }
                if (z8) {
                    FileInputFormat.addInputPath(nutchJob, new Path(pathArr[i2], ParseText.DIR_NAME));
                }
            }
        }
        nutchJob.setInputFormat(ObjectInputFormat.class);
        nutchJob.setMapperClass(SegmentMerger.class);
        nutchJob.setReducerClass(SegmentMerger.class);
        FileOutputFormat.setOutputPath(nutchJob, path);
        nutchJob.setOutputKeyClass(Text.class);
        nutchJob.setOutputValueClass(MetaWrapper.class);
        nutchJob.setOutputFormat(SegmentOutputFormat.class);
        setConf(nutchJob);
        JobClient.runJob(nutchJob);
    }

    public static void main(String[] strArr) throws Exception {
        if (strArr.length < 2) {
            System.err.println("SegmentMerger output_dir (-dir segments | seg1 seg2 ...) [-filter] [-slice NNNN]");
            System.err.println("\toutput_dir\tname of the parent dir for output segment slice(s)");
            System.err.println("\t-dir segments\tparent dir containing several segments");
            System.err.println("\tseg1 seg2 ...\tlist of segment dirs");
            System.err.println("\t-filter\t\tfilter out URL-s prohibited by current URLFilters");
            System.err.println("\t-slice NNNN\tcreate many output segments, each containing NNNN URLs");
            return;
        }
        Configuration create = NutchConfiguration.create();
        FileSystem fileSystem = FileSystem.get(create);
        Path path = new Path(strArr[0]);
        ArrayList arrayList = new ArrayList();
        long j = 0;
        boolean z = false;
        boolean z2 = false;
        int i = 1;
        while (i < strArr.length) {
            if (strArr[i].equals("-dir")) {
                i++;
                for (Path path2 : HadoopFSUtil.getPaths(fileSystem.listStatus(new Path(strArr[i]), HadoopFSUtil.getPassDirectoriesFilter(fileSystem)))) {
                    arrayList.add(path2);
                }
            } else if (strArr[i].equals("-filter")) {
                z = true;
            } else if (strArr[i].equals("-normalize")) {
                z2 = true;
            } else if (strArr[i].equals("-slice")) {
                i++;
                j = Long.parseLong(strArr[i]);
            } else {
                arrayList.add(new Path(strArr[i]));
            }
            i++;
        }
        if (arrayList.size() == 0) {
            System.err.println("ERROR: No input segments.");
        } else {
            new SegmentMerger(create).merge(path, (Path[]) arrayList.toArray(new Path[arrayList.size()]), z, z2, j);
        }
    }
}
