package org.apache.nutch.indexer.solr;

import java.io.IOException;
import java.net.MalformedURLException;
import java.text.SimpleDateFormat;
import java.util.Iterator;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.ByteWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.FileInputFormat;
import org.apache.hadoop.mapred.JobClient;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.Mapper;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reducer;
import org.apache.hadoop.mapred.Reporter;
import org.apache.hadoop.mapred.SequenceFileInputFormat;
import org.apache.hadoop.mapred.lib.NullOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
import org.apache.nutch.crawl.CrawlDatum;
import org.apache.nutch.util.NutchConfiguration;
import org.apache.nutch.util.NutchJob;
import org.apache.nutch.util.TimingUtil;
import org.apache.solr.client.solrj.SolrServer;
import org.apache.solr.client.solrj.SolrServerException;
import org.apache.solr.client.solrj.request.UpdateRequest;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:nutch-1.5.1.jar:org/apache/nutch/indexer/solr/SolrClean.class */
public class SolrClean implements Tool {
    public static final Logger LOG = LoggerFactory.getLogger(SolrClean.class);
    private Configuration conf;

    /* loaded from: input_file:nutch-1.5.1.jar:org/apache/nutch/indexer/solr/SolrClean$DBFilter.class */
    public static class DBFilter implements Mapper<Text, CrawlDatum, ByteWritable, Text> {
        private ByteWritable OUT = new ByteWritable((byte) 3);

        @Override // org.apache.hadoop.mapred.JobConfigurable
        public void configure(JobConf jobConf) {
        }

        @Override // java.io.Closeable, java.lang.AutoCloseable
        public void close() throws IOException {
        }

        @Override // org.apache.hadoop.mapred.Mapper
        public void map(Text text, CrawlDatum crawlDatum, OutputCollector<ByteWritable, Text> outputCollector, Reporter reporter) throws IOException {
            if (crawlDatum.getStatus() == 3) {
                outputCollector.collect(this.OUT, text);
            }
        }
    }

    /* loaded from: input_file:nutch-1.5.1.jar:org/apache/nutch/indexer/solr/SolrClean$SolrDeleter.class */
    public static class SolrDeleter implements Reducer<ByteWritable, Text, Text, ByteWritable> {
        private static final int NUM_MAX_DELETE_REQUEST = 1000;
        private SolrServer solr;
        private int numDeletes = 0;
        private int totalDeleted = 0;
        private UpdateRequest updateRequest = new UpdateRequest();
        private boolean noCommit = false;

        @Override // org.apache.hadoop.mapred.JobConfigurable
        public void configure(JobConf jobConf) {
            try {
                this.solr = SolrUtils.getCommonsHttpSolrServer(jobConf);
                this.noCommit = jobConf.getBoolean("noCommit", false);
            } catch (MalformedURLException e) {
                throw new RuntimeException(e);
            }
        }

        @Override // java.io.Closeable, java.lang.AutoCloseable
        public void close() throws IOException {
            try {
                if (this.numDeletes > 0) {
                    SolrClean.LOG.info("SolrClean: deleting " + this.numDeletes + " documents");
                    this.updateRequest.process(this.solr);
                    this.totalDeleted += this.numDeletes;
                }
                if (this.totalDeleted > 0 && !this.noCommit) {
                    this.solr.commit();
                }
                SolrClean.LOG.info("SolrClean: deleted a total of " + this.totalDeleted + " documents");
            } catch (SolrServerException e) {
                throw new IOException((Throwable) e);
            }
        }

        @Override // org.apache.hadoop.mapred.Reducer
        public void reduce(ByteWritable byteWritable, Iterator<Text> it, OutputCollector<Text, ByteWritable> outputCollector, Reporter reporter) throws IOException {
            while (it.hasNext()) {
                this.updateRequest.deleteById(it.next().toString());
                this.numDeletes++;
                reporter.incrCounter("SolrCleanStatus", "Deleted documents", 1L);
                if (this.numDeletes >= 1000) {
                    try {
                        SolrClean.LOG.info("SolrClean: deleting " + this.numDeletes + " documents");
                        this.updateRequest.process(this.solr);
                        this.updateRequest = new UpdateRequest();
                        this.totalDeleted += this.numDeletes;
                        this.numDeletes = 0;
                    } catch (SolrServerException e) {
                        throw new IOException((Throwable) e);
                    }
                }
            }
        }
    }

    @Override // org.apache.hadoop.conf.Configurable
    public Configuration getConf() {
        return this.conf;
    }

    @Override // org.apache.hadoop.conf.Configurable
    public void setConf(Configuration configuration) {
        this.conf = configuration;
    }

    public void delete(String str, String str2, boolean z) throws IOException {
        SimpleDateFormat simpleDateFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
        long currentTimeMillis = System.currentTimeMillis();
        LOG.info("SolrClean: starting at " + simpleDateFormat.format(Long.valueOf(currentTimeMillis)));
        NutchJob nutchJob = new NutchJob(getConf());
        FileInputFormat.addInputPath(nutchJob, new Path(str, "current"));
        nutchJob.setBoolean("noCommit", z);
        nutchJob.set(SolrConstants.SERVER_URL, str2);
        nutchJob.setInputFormat(SequenceFileInputFormat.class);
        nutchJob.setOutputFormat(NullOutputFormat.class);
        nutchJob.setMapOutputKeyClass(ByteWritable.class);
        nutchJob.setMapOutputValueClass(Text.class);
        nutchJob.setMapperClass(DBFilter.class);
        nutchJob.setReducerClass(SolrDeleter.class);
        JobClient.runJob(nutchJob);
        long currentTimeMillis2 = System.currentTimeMillis();
        LOG.info("SolrClean: finished at " + simpleDateFormat.format(Long.valueOf(currentTimeMillis2)) + ", elapsed: " + TimingUtil.elapsedTime(currentTimeMillis, currentTimeMillis2));
    }

    @Override // org.apache.hadoop.util.Tool
    public int run(String[] strArr) throws IOException {
        if (strArr.length < 2) {
            System.err.println("Usage: SolrClean <crawldb> <solrurl> [-noCommit]");
            return 1;
        }
        boolean z = false;
        if (strArr.length == 3 && strArr[2].equals("-noCommit")) {
            z = true;
        }
        delete(strArr[0], strArr[1], z);
        return 0;
    }

    public static void main(String[] strArr) throws Exception {
        System.exit(ToolRunner.run(NutchConfiguration.create(), new SolrClean(), strArr));
    }
}
