package org.apache.nutch.crawl;

import java.io.Closeable;
import java.io.DataOutputStream;
import java.io.IOException;
import java.net.URL;
import java.util.Date;
import java.util.Iterator;
import java.util.Map;
import java.util.Random;
import java.util.TreeMap;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hdfs.protocol.FSConstants;
import org.apache.hadoop.io.FloatWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.MapFile;
import org.apache.hadoop.io.SequenceFile;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.FileInputFormat;
import org.apache.hadoop.mapred.FileOutputFormat;
import org.apache.hadoop.mapred.JobClient;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.MapFileOutputFormat;
import org.apache.hadoop.mapred.Mapper;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.RecordWriter;
import org.apache.hadoop.mapred.Reducer;
import org.apache.hadoop.mapred.Reporter;
import org.apache.hadoop.mapred.SequenceFileInputFormat;
import org.apache.hadoop.mapred.SequenceFileOutputFormat;
import org.apache.hadoop.mapred.TextOutputFormat;
import org.apache.hadoop.mapred.lib.HashPartitioner;
import org.apache.hadoop.mapred.lib.IdentityMapper;
import org.apache.hadoop.mapred.lib.IdentityReducer;
import org.apache.hadoop.util.Progressable;
import org.apache.nutch.net.URLNormalizers;
import org.apache.nutch.util.NutchConfiguration;
import org.apache.nutch.util.NutchJob;
import org.apache.nutch.util.StringUtil;
import org.apache.pdfbox.pdmodel.interactive.measurement.PDNumberFormatDictionary;
import org.apache.tools.ant.taskdefs.SQLExec;
import org.apache.tools.ant.types.selectors.FilenameSelector;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.testng.internal.Parameters;

/* loaded from: input_file:nutch-1.5.1.jar:org/apache/nutch/crawl/CrawlDbReader.class */
public class CrawlDbReader implements Closeable {
    public static final Logger LOG = LoggerFactory.getLogger(CrawlDbReader.class);
    private MapFile.Reader[] readers = null;

    /* loaded from: input_file:nutch-1.5.1.jar:org/apache/nutch/crawl/CrawlDbReader$CrawlDatumCsvOutputFormat.class */
    public static class CrawlDatumCsvOutputFormat extends FileOutputFormat<Text, CrawlDatum> {

        /* loaded from: input_file:nutch-1.5.1.jar:org/apache/nutch/crawl/CrawlDbReader$CrawlDatumCsvOutputFormat$LineRecordWriter.class */
        protected static class LineRecordWriter implements RecordWriter<Text, CrawlDatum> {
            private DataOutputStream out;

            public LineRecordWriter(DataOutputStream dataOutputStream) {
                this.out = dataOutputStream;
                try {
                    dataOutputStream.writeBytes("Url;Status code;Status name;Fetch Time;Modified Time;Retries since fetch;Retry interval seconds;Retry interval days;Score;Signature\n");
                } catch (IOException e) {
                }
            }

            @Override // org.apache.hadoop.mapred.RecordWriter
            public synchronized void write(Text text, CrawlDatum crawlDatum) throws IOException {
                this.out.writeByte(34);
                this.out.writeBytes(text.toString());
                this.out.writeByte(34);
                this.out.writeByte(59);
                this.out.writeBytes(Integer.toString(crawlDatum.getStatus()));
                this.out.writeByte(59);
                this.out.writeByte(34);
                this.out.writeBytes(CrawlDatum.getStatusName(crawlDatum.getStatus()));
                this.out.writeByte(34);
                this.out.writeByte(59);
                this.out.writeBytes(new Date(crawlDatum.getFetchTime()).toString());
                this.out.writeByte(59);
                this.out.writeBytes(new Date(crawlDatum.getModifiedTime()).toString());
                this.out.writeByte(59);
                this.out.writeBytes(Integer.toString(crawlDatum.getRetriesSinceFetch()));
                this.out.writeByte(59);
                this.out.writeBytes(Float.toString(crawlDatum.getFetchInterval()));
                this.out.writeByte(59);
                this.out.writeBytes(Float.toString(crawlDatum.getFetchInterval() / FetchSchedule.SECONDS_PER_DAY));
                this.out.writeByte(59);
                this.out.writeBytes(Float.toString(crawlDatum.getScore()));
                this.out.writeByte(59);
                this.out.writeByte(34);
                this.out.writeBytes(crawlDatum.getSignature() != null ? StringUtil.toHexString(crawlDatum.getSignature()) : Parameters.NULL_VALUE);
                this.out.writeByte(34);
                this.out.writeByte(10);
            }

            @Override // org.apache.hadoop.mapred.RecordWriter
            public synchronized void close(Reporter reporter) throws IOException {
                this.out.close();
            }
        }

        @Override // org.apache.hadoop.mapred.FileOutputFormat, org.apache.hadoop.mapred.OutputFormat
        public RecordWriter<Text, CrawlDatum> getRecordWriter(FileSystem fileSystem, JobConf jobConf, String str, Progressable progressable) throws IOException {
            return new LineRecordWriter(fileSystem.create(new Path(FileOutputFormat.getOutputPath(jobConf), str), progressable));
        }
    }

    /* loaded from: input_file:nutch-1.5.1.jar:org/apache/nutch/crawl/CrawlDbReader$CrawlDbDumpMapper.class */
    public static class CrawlDbDumpMapper implements Mapper<Text, CrawlDatum, Text, CrawlDatum> {
        Pattern pattern = null;
        Matcher matcher = null;
        String status = null;

        @Override // org.apache.hadoop.mapred.JobConfigurable
        public void configure(JobConf jobConf) {
            if (jobConf.get(FilenameSelector.REGEX_KEY, null) != null) {
                this.pattern = Pattern.compile(jobConf.get(FilenameSelector.REGEX_KEY));
            }
            this.status = jobConf.get("status", null);
        }

        @Override // java.io.Closeable, java.lang.AutoCloseable
        public void close() {
        }

        @Override // org.apache.hadoop.mapred.Mapper
        public void map(Text text, CrawlDatum crawlDatum, OutputCollector<Text, CrawlDatum> outputCollector, Reporter reporter) throws IOException {
            if (this.status == null || this.status.equalsIgnoreCase(CrawlDatum.getStatusName(crawlDatum.getStatus()))) {
                if (this.pattern != null) {
                    this.matcher = this.pattern.matcher(text.toString());
                    if (!this.matcher.matches()) {
                        return;
                    }
                }
                outputCollector.collect(text, crawlDatum);
            }
        }
    }

    /* loaded from: input_file:nutch-1.5.1.jar:org/apache/nutch/crawl/CrawlDbReader$CrawlDbStatCombiner.class */
    public static class CrawlDbStatCombiner implements Reducer<Text, LongWritable, Text, LongWritable> {
        LongWritable val = new LongWritable();

        @Override // org.apache.hadoop.mapred.JobConfigurable
        public void configure(JobConf jobConf) {
        }

        @Override // java.io.Closeable, java.lang.AutoCloseable
        public void close() {
        }

        @Override // org.apache.hadoop.mapred.Reducer
        public void reduce(Text text, Iterator<LongWritable> it, OutputCollector<Text, LongWritable> outputCollector, Reporter reporter) throws IOException {
            this.val.set(0L);
            if (!text.toString().equals("s")) {
                while (it.hasNext()) {
                    this.val.set(this.val.get() + it.next().get());
                }
                outputCollector.collect(text, this.val);
                return;
            }
            long j = 0;
            long j2 = Long.MAX_VALUE;
            long j3 = Long.MIN_VALUE;
            while (it.hasNext()) {
                LongWritable next = it.next();
                if (next.get() < j2) {
                    j2 = next.get();
                }
                if (next.get() > j3) {
                    j3 = next.get();
                }
                j += next.get();
            }
            outputCollector.collect(new Text("scn"), new LongWritable(j2));
            outputCollector.collect(new Text("scx"), new LongWritable(j3));
            outputCollector.collect(new Text("sct"), new LongWritable(j));
        }
    }

    /* loaded from: input_file:nutch-1.5.1.jar:org/apache/nutch/crawl/CrawlDbReader$CrawlDbStatMapper.class */
    public static class CrawlDbStatMapper implements Mapper<Text, CrawlDatum, Text, LongWritable> {
        LongWritable COUNT_1 = new LongWritable(1);
        private boolean sort = false;

        @Override // org.apache.hadoop.mapred.JobConfigurable
        public void configure(JobConf jobConf) {
            this.sort = jobConf.getBoolean("db.reader.stats.sort", false);
        }

        @Override // java.io.Closeable, java.lang.AutoCloseable
        public void close() {
        }

        @Override // org.apache.hadoop.mapred.Mapper
        public void map(Text text, CrawlDatum crawlDatum, OutputCollector<Text, LongWritable> outputCollector, Reporter reporter) throws IOException {
            outputCollector.collect(new Text(PDNumberFormatDictionary.FRACTIONAL_DISPLAY_TRUNCATE), this.COUNT_1);
            outputCollector.collect(new Text("status " + ((int) crawlDatum.getStatus())), this.COUNT_1);
            outputCollector.collect(new Text("retry " + ((int) crawlDatum.getRetriesSinceFetch())), this.COUNT_1);
            outputCollector.collect(new Text("s"), new LongWritable((long) (crawlDatum.getScore() * 1000.0d)));
            if (this.sort) {
                outputCollector.collect(new Text("status " + ((int) crawlDatum.getStatus()) + " " + new URL(text.toString()).getHost()), this.COUNT_1);
            }
        }
    }

    /* loaded from: input_file:nutch-1.5.1.jar:org/apache/nutch/crawl/CrawlDbReader$CrawlDbStatReducer.class */
    public static class CrawlDbStatReducer implements Reducer<Text, LongWritable, Text, LongWritable> {
        @Override // org.apache.hadoop.mapred.JobConfigurable
        public void configure(JobConf jobConf) {
        }

        @Override // java.io.Closeable, java.lang.AutoCloseable
        public void close() {
        }

        @Override // org.apache.hadoop.mapred.Reducer
        public void reduce(Text text, Iterator<LongWritable> it, OutputCollector<Text, LongWritable> outputCollector, Reporter reporter) throws IOException {
            String text2 = text.toString();
            if (!text2.equals(PDNumberFormatDictionary.FRACTIONAL_DISPLAY_TRUNCATE)) {
                if (text2.startsWith("status") || text2.startsWith("retry")) {
                    LongWritable longWritable = new LongWritable();
                    while (it.hasNext()) {
                        longWritable.set(longWritable.get() + it.next().get());
                    }
                    outputCollector.collect(text, longWritable);
                    return;
                }
                if (text2.equals("scx")) {
                    LongWritable longWritable2 = new LongWritable(Long.MIN_VALUE);
                    while (it.hasNext()) {
                        LongWritable next = it.next();
                        if (longWritable2.get() < next.get()) {
                            longWritable2.set(next.get());
                        }
                    }
                    outputCollector.collect(text, longWritable2);
                    return;
                }
                if (text2.equals("scn")) {
                    LongWritable longWritable3 = new LongWritable(FSConstants.QUOTA_DONT_SET);
                    while (it.hasNext()) {
                        LongWritable next2 = it.next();
                        if (longWritable3.get() > next2.get()) {
                            longWritable3.set(next2.get());
                        }
                    }
                    outputCollector.collect(text, longWritable3);
                    return;
                }
                if (text2.equals("sct")) {
                    LongWritable longWritable4 = new LongWritable();
                    while (it.hasNext()) {
                        longWritable4.set(longWritable4.get() + it.next().get());
                    }
                    outputCollector.collect(text, longWritable4);
                    return;
                }
                return;
            }
            long j = 0;
            while (true) {
                long j2 = j;
                if (!it.hasNext()) {
                    outputCollector.collect(text, new LongWritable(j2));
                    return;
                }
                j = j2 + it.next().get();
            }
        }
    }

    /* loaded from: input_file:nutch-1.5.1.jar:org/apache/nutch/crawl/CrawlDbReader$CrawlDbTopNMapper.class */
    public static class CrawlDbTopNMapper implements Mapper<Text, CrawlDatum, FloatWritable, Text> {
        private static final FloatWritable fw = new FloatWritable();
        private float min = 0.0f;

        @Override // org.apache.hadoop.mapred.JobConfigurable
        public void configure(JobConf jobConf) {
            long j = jobConf.getLong("db.reader.topn.min", 0L);
            if (j != 0) {
                this.min = ((float) j) / 1000000.0f;
            }
        }

        @Override // java.io.Closeable, java.lang.AutoCloseable
        public void close() {
        }

        @Override // org.apache.hadoop.mapred.Mapper
        public void map(Text text, CrawlDatum crawlDatum, OutputCollector<FloatWritable, Text> outputCollector, Reporter reporter) throws IOException {
            if (crawlDatum.getScore() < this.min) {
                return;
            }
            fw.set(-crawlDatum.getScore());
            outputCollector.collect(fw, text);
        }
    }

    /* loaded from: input_file:nutch-1.5.1.jar:org/apache/nutch/crawl/CrawlDbReader$CrawlDbTopNReducer.class */
    public static class CrawlDbTopNReducer implements Reducer<FloatWritable, Text, FloatWritable, Text> {
        private long topN;
        private long count = 0;

        @Override // org.apache.hadoop.mapred.Reducer
        public void reduce(FloatWritable floatWritable, Iterator<Text> it, OutputCollector<FloatWritable, Text> outputCollector, Reporter reporter) throws IOException {
            while (it.hasNext() && this.count < this.topN) {
                floatWritable.set(-floatWritable.get());
                outputCollector.collect(floatWritable, it.next());
                this.count++;
            }
        }

        @Override // org.apache.hadoop.mapred.JobConfigurable
        public void configure(JobConf jobConf) {
            this.topN = jobConf.getLong("db.reader.topn", 100L) / jobConf.getNumReduceTasks();
        }

        @Override // java.io.Closeable, java.lang.AutoCloseable
        public void close() {
        }
    }

    private void openReaders(String str, Configuration configuration) throws IOException {
        if (this.readers != null) {
            return;
        }
        this.readers = MapFileOutputFormat.getReaders(FileSystem.get(configuration), new Path(str, "current"), configuration);
    }

    private void closeReaders() {
        if (this.readers == null) {
            return;
        }
        for (int i = 0; i < this.readers.length; i++) {
            try {
                this.readers[i].close();
            } catch (Exception e) {
            }
        }
    }

    @Override // java.io.Closeable, java.lang.AutoCloseable
    public void close() {
        closeReaders();
    }

    public void processStatJob(String str, Configuration configuration, boolean z) throws IOException {
        if (LOG.isInfoEnabled()) {
            LOG.info("CrawlDb statistics start: " + str);
        }
        Path path = new Path(str, "stat_tmp" + System.currentTimeMillis());
        NutchJob nutchJob = new NutchJob(configuration);
        nutchJob.setJobName("stats " + str);
        nutchJob.setBoolean("db.reader.stats.sort", z);
        FileInputFormat.addInputPath(nutchJob, new Path(str, "current"));
        nutchJob.setInputFormat(SequenceFileInputFormat.class);
        nutchJob.setMapperClass(CrawlDbStatMapper.class);
        nutchJob.setCombinerClass(CrawlDbStatCombiner.class);
        nutchJob.setReducerClass(CrawlDbStatReducer.class);
        FileOutputFormat.setOutputPath(nutchJob, path);
        nutchJob.setOutputFormat(SequenceFileOutputFormat.class);
        nutchJob.setOutputKeyClass(Text.class);
        nutchJob.setOutputValueClass(LongWritable.class);
        nutchJob.setBoolean("mapreduce.fileoutputcommitter.marksuccessfuljobs", false);
        JobClient.runJob(nutchJob);
        FileSystem fileSystem = FileSystem.get(configuration);
        SequenceFile.Reader[] readers = SequenceFileOutputFormat.getReaders(configuration, path);
        Text text = new Text();
        LongWritable longWritable = new LongWritable();
        TreeMap treeMap = new TreeMap();
        for (SequenceFile.Reader reader : readers) {
            while (reader.next(text, longWritable)) {
                String text2 = text.toString();
                LongWritable longWritable2 = (LongWritable) treeMap.get(text2);
                if (longWritable2 == null) {
                    longWritable2 = new LongWritable();
                    if (text2.equals("scx")) {
                        longWritable2.set(Long.MIN_VALUE);
                    }
                    if (text2.equals("scn")) {
                        longWritable2.set(FSConstants.QUOTA_DONT_SET);
                    }
                    treeMap.put(text2, longWritable2);
                }
                if (text2.equals("scx")) {
                    if (longWritable2.get() < longWritable.get()) {
                        longWritable2.set(longWritable.get());
                    }
                } else if (!text2.equals("scn")) {
                    longWritable2.set(longWritable2.get() + longWritable.get());
                } else if (longWritable2.get() > longWritable.get()) {
                    longWritable2.set(longWritable.get());
                }
            }
            reader.close();
        }
        if (LOG.isInfoEnabled()) {
            LOG.info("Statistics for CrawlDb: " + str);
            LongWritable longWritable3 = (LongWritable) treeMap.get(PDNumberFormatDictionary.FRACTIONAL_DISPLAY_TRUNCATE);
            treeMap.remove(PDNumberFormatDictionary.FRACTIONAL_DISPLAY_TRUNCATE);
            LOG.info("TOTAL urls:\t" + longWritable3.get());
            for (Map.Entry entry : treeMap.entrySet()) {
                String str2 = (String) entry.getKey();
                LongWritable longWritable4 = (LongWritable) entry.getValue();
                if (str2.equals("scn")) {
                    LOG.info("min score:\t" + (((float) longWritable4.get()) / 1000.0f));
                } else if (str2.equals("scx")) {
                    LOG.info("max score:\t" + (((float) longWritable4.get()) / 1000.0f));
                } else if (str2.equals("sct")) {
                    LOG.info("avg score:\t" + ((float) ((longWritable4.get() / longWritable3.get()) / 1000.0d)));
                } else if (str2.startsWith("status")) {
                    String[] split = str2.split(" ");
                    int parseInt = Integer.parseInt(split[1]);
                    if (split.length > 2) {
                        LOG.info("   " + split[2] + " :\t" + longWritable4);
                    } else {
                        LOG.info(split[0] + " " + parseInt + " (" + CrawlDatum.getStatusName((byte) parseInt) + "):\t" + longWritable4);
                    }
                } else {
                    LOG.info(str2 + ":\t" + longWritable4);
                }
            }
        }
        fileSystem.delete(path, true);
        if (LOG.isInfoEnabled()) {
            LOG.info("CrawlDb statistics: done");
        }
    }

    public CrawlDatum get(String str, String str2, Configuration configuration) throws IOException {
        Text text = new Text(str2);
        CrawlDatum crawlDatum = new CrawlDatum();
        openReaders(str, configuration);
        return (CrawlDatum) MapFileOutputFormat.getEntry(this.readers, new HashPartitioner(), text, crawlDatum);
    }

    public void readUrl(String str, String str2, Configuration configuration) throws IOException {
        CrawlDatum crawlDatum = get(str, str2, configuration);
        System.out.println("URL: " + str2);
        if (crawlDatum != null) {
            System.out.println(crawlDatum);
        } else {
            System.out.println("not found");
        }
    }

    public void processDumpJob(String str, String str2, Configuration configuration, String str3, String str4, String str5) throws IOException {
        if (LOG.isInfoEnabled()) {
            LOG.info("CrawlDb dump: starting");
            LOG.info("CrawlDb db: " + str);
        }
        Path path = new Path(str2);
        NutchJob nutchJob = new NutchJob(configuration);
        nutchJob.setJobName("dump " + str);
        FileInputFormat.addInputPath(nutchJob, new Path(str, "current"));
        nutchJob.setInputFormat(SequenceFileInputFormat.class);
        FileOutputFormat.setOutputPath(nutchJob, path);
        if (str3.equals("csv")) {
            nutchJob.setOutputFormat(CrawlDatumCsvOutputFormat.class);
        } else if (str3.equals(URLNormalizers.SCOPE_CRAWLDB)) {
            nutchJob.setOutputFormat(MapFileOutputFormat.class);
        } else {
            nutchJob.setOutputFormat(TextOutputFormat.class);
        }
        if (str5 != null) {
            nutchJob.set("status", str5);
        }
        if (str4 != null) {
            nutchJob.set(FilenameSelector.REGEX_KEY, str4);
        }
        nutchJob.setMapperClass(CrawlDbDumpMapper.class);
        nutchJob.setOutputKeyClass(Text.class);
        nutchJob.setOutputValueClass(CrawlDatum.class);
        JobClient.runJob(nutchJob);
        if (LOG.isInfoEnabled()) {
            LOG.info("CrawlDb dump: done");
        }
    }

    public void processTopNJob(String str, long j, float f, String str2, Configuration configuration) throws IOException {
        if (LOG.isInfoEnabled()) {
            LOG.info("CrawlDb topN: starting (topN=" + j + ", min=" + f + ")");
            LOG.info("CrawlDb db: " + str);
        }
        Path path = new Path(str2);
        Path path2 = new Path(configuration.get("mapred.temp.dir", Path.CUR_DIR) + "/readdb-topN-temp-" + Integer.toString(new Random().nextInt(Integer.MAX_VALUE)));
        NutchJob nutchJob = new NutchJob(configuration);
        nutchJob.setJobName("topN prepare " + str);
        FileInputFormat.addInputPath(nutchJob, new Path(str, "current"));
        nutchJob.setInputFormat(SequenceFileInputFormat.class);
        nutchJob.setMapperClass(CrawlDbTopNMapper.class);
        nutchJob.setReducerClass(IdentityReducer.class);
        FileOutputFormat.setOutputPath(nutchJob, path2);
        nutchJob.setOutputFormat(SequenceFileOutputFormat.class);
        nutchJob.setOutputKeyClass(FloatWritable.class);
        nutchJob.setOutputValueClass(Text.class);
        nutchJob.setLong("db.reader.topn.min", Math.round(1000000.0d * f));
        JobClient.runJob(nutchJob);
        if (LOG.isInfoEnabled()) {
            LOG.info("CrawlDb topN: collecting topN scores.");
        }
        NutchJob nutchJob2 = new NutchJob(configuration);
        nutchJob2.setJobName("topN collect " + str);
        nutchJob2.setLong("db.reader.topn", j);
        FileInputFormat.addInputPath(nutchJob2, path2);
        nutchJob2.setInputFormat(SequenceFileInputFormat.class);
        nutchJob2.setMapperClass(IdentityMapper.class);
        nutchJob2.setReducerClass(CrawlDbTopNReducer.class);
        FileOutputFormat.setOutputPath(nutchJob2, path);
        nutchJob2.setOutputFormat(TextOutputFormat.class);
        nutchJob2.setOutputKeyClass(FloatWritable.class);
        nutchJob2.setOutputValueClass(Text.class);
        nutchJob2.setNumReduceTasks(1);
        JobClient.runJob(nutchJob2);
        FileSystem.get(configuration).delete(path2, true);
        if (LOG.isInfoEnabled()) {
            LOG.info("CrawlDb topN: done");
        }
    }

    public static void main(String[] strArr) throws IOException {
        CrawlDbReader crawlDbReader = new CrawlDbReader();
        if (strArr.length < 1) {
            System.err.println("Usage: CrawlDbReader <crawldb> (-stats | -dump <out_dir> | -topN <nnnn> <out_dir> [<min>] | -url <url>)");
            System.err.println("\t<crawldb>\tdirectory name where crawldb is located");
            System.err.println("\t-stats [-sort] \tprint overall statistics to System.out");
            System.err.println("\t\t[-sort]\tlist status sorted by host");
            System.err.println("\t-dump <out_dir> [-format normal|csv|crawldb]\tdump the whole db to a text file in <out_dir>");
            System.err.println("\t\t[-format csv]\tdump in Csv format");
            System.err.println("\t\t[-format normal]\tdump in standard format (default option)");
            System.err.println("\t\t[-format crawldb]\tdump as CrawlDB");
            System.err.println("\t\t[-regex <expr>]\tfilter records with expression");
            System.err.println("\t\t[-status <status>]\tfilter records by CrawlDatum status");
            System.err.println("\t-url <url>\tprint information on <url> to System.out");
            System.err.println("\t-topN <nnnn> <out_dir> [<min>]\tdump top <nnnn> urls sorted by score to <out_dir>");
            System.err.println("\t\t[<min>]\tskip records with scores below this value.");
            System.err.println("\t\t\tThis can significantly improve performance.");
            return;
        }
        String str = strArr[0];
        Configuration create = NutchConfiguration.create();
        int i = 1;
        while (i < strArr.length) {
            if (strArr[i].equals("-stats")) {
                boolean z = false;
                if (i < strArr.length - 1 && "-sort".equals(strArr[i + 1])) {
                    z = true;
                    i++;
                }
                crawlDbReader.processStatJob(str, create, z);
            } else if (strArr[i].equals("-dump")) {
                i++;
                String str2 = strArr[i];
                String str3 = SQLExec.DelimiterType.NORMAL;
                String str4 = null;
                String str5 = null;
                int i2 = i + 1;
                while (i2 < strArr.length) {
                    if (strArr[i2].equals("-format")) {
                        i2++;
                        str3 = strArr[i2];
                        i += 2;
                    }
                    if (strArr[i2].equals("-regex")) {
                        i2++;
                        str4 = strArr[i2];
                        i += 2;
                    }
                    if (strArr[i2].equals("-status")) {
                        i2++;
                        str5 = strArr[i2];
                        i += 2;
                    }
                    i2++;
                }
                crawlDbReader.processDumpJob(str, str2, create, str3, str4, str5);
            } else if (strArr[i].equals("-url")) {
                i++;
                crawlDbReader.readUrl(str, strArr[i], create);
            } else if (strArr[i].equals("-topN")) {
                int i3 = i + 1;
                long parseLong = Long.parseLong(strArr[i3]);
                i = i3 + 1;
                String str6 = strArr[i];
                float f = 0.0f;
                if (i < strArr.length - 1) {
                    i++;
                    f = Float.parseFloat(strArr[i]);
                }
                crawlDbReader.processTopNJob(str, parseLong, f, str6, create);
            } else {
                System.err.println("\nError: wrong argument " + strArr[i]);
            }
            i++;
        }
    }
}
