package org.apache.nutch.crawl;

import java.io.IOException;
import java.util.Iterator;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.Mapper;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reporter;
import org.apache.nutch.net.URLFilters;
import org.apache.nutch.net.URLNormalizers;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:nutch-1.5.1.jar:org/apache/nutch/crawl/LinkDbFilter.class */
public class LinkDbFilter implements Mapper<Text, Inlinks, Text, Inlinks> {
    public static final String URL_FILTERING = "linkdb.url.filters";
    public static final String URL_NORMALIZING = "linkdb.url.normalizer";
    public static final String URL_NORMALIZING_SCOPE = "linkdb.url.normalizer.scope";
    private boolean filter;
    private boolean normalize;
    private URLFilters filters;
    private URLNormalizers normalizers;
    private String scope;
    public static final Logger LOG = LoggerFactory.getLogger(LinkDbFilter.class);
    private Text newKey = new Text();

    @Override // org.apache.hadoop.mapred.JobConfigurable
    public void configure(JobConf jobConf) {
        this.filter = jobConf.getBoolean(URL_FILTERING, false);
        this.normalize = jobConf.getBoolean(URL_NORMALIZING, false);
        if (this.filter) {
            this.filters = new URLFilters(jobConf);
        }
        if (this.normalize) {
            this.scope = jobConf.get(URL_NORMALIZING_SCOPE, URLNormalizers.SCOPE_LINKDB);
            this.normalizers = new URLNormalizers(jobConf, this.scope);
        }
    }

    @Override // java.io.Closeable, java.lang.AutoCloseable
    public void close() {
    }

    @Override // org.apache.hadoop.mapred.Mapper
    public void map(Text text, Inlinks inlinks, OutputCollector<Text, Inlinks> outputCollector, Reporter reporter) throws IOException {
        String text2 = text.toString();
        Inlinks inlinks2 = new Inlinks();
        if (this.normalize) {
            try {
                text2 = this.normalizers.normalize(text2, this.scope);
            } catch (Exception e) {
                LOG.warn("Skipping " + text2 + ":" + e);
                text2 = null;
            }
        }
        if (text2 != null && this.filter) {
            try {
                text2 = this.filters.filter(text2);
            } catch (Exception e2) {
                LOG.warn("Skipping " + text2 + ":" + e2);
                text2 = null;
            }
        }
        if (text2 == null) {
            return;
        }
        Iterator<Inlink> it = inlinks.iterator();
        while (it.hasNext()) {
            Inlink next = it.next();
            String fromUrl = next.getFromUrl();
            if (this.normalize) {
                try {
                    fromUrl = this.normalizers.normalize(fromUrl, this.scope);
                } catch (Exception e3) {
                    LOG.warn("Skipping " + fromUrl + ":" + e3);
                    fromUrl = null;
                }
            }
            if (fromUrl != null && this.filter) {
                try {
                    fromUrl = this.filters.filter(fromUrl);
                } catch (Exception e4) {
                    LOG.warn("Skipping " + fromUrl + ":" + e4);
                    fromUrl = null;
                }
            }
            if (fromUrl != null) {
                inlinks2.add(new Inlink(fromUrl, next.getAnchor()));
            }
        }
        if (inlinks2.size() > 0) {
            this.newKey.set(text2);
            outputCollector.collect(this.newKey, inlinks2);
        }
    }
}
