/*
 * Decompiled with CFR 0.152.
 */
package org.jahia.services.textextraction;

import java.io.IOException;
import java.io.InputStream;
import org.apache.commons.lang.StringUtils;
import org.apache.tika.config.TikaConfig;
import org.apache.tika.exception.TikaException;
import org.apache.tika.io.IOUtils;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.mime.MediaType;
import org.apache.tika.parser.AutoDetectParser;
import org.apache.tika.parser.CompositeParser;
import org.apache.tika.parser.ParseContext;
import org.apache.tika.sax.BodyContentHandler;
import org.apache.tika.sax.WriteOutContentHandler;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.core.io.Resource;
import org.xml.sax.ContentHandler;
import org.xml.sax.SAXException;

public class TextExtractionService {
    private static Logger logger = LoggerFactory.getLogger(TextExtractionService.class);
    private boolean autoDetectType = true;
    private Resource config;
    private Resource configMetadata;
    private boolean enabled = true;
    private volatile boolean initialized = false;
    private int maxExtractedCharacters = 100000;
    private CompositeParser parser;
    private CompositeParser parserMetadata;

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    private static CompositeParser configureParser(Resource config, boolean autoDetectType) {
        AutoDetectParser parser = null;
        InputStream stream = null;
        try {
            stream = config.getInputStream();
            if (autoDetectType) {
                parser = new AutoDetectParser(new TikaConfig(stream));
            } else {
                TikaConfig cfg = new TikaConfig(stream);
                parser = (CompositeParser)cfg.getParser();
            }
        }
        catch (Exception e) {
            logger.error("Error initializing text extraction service. Service will be disabled. Cause: {}", (Object)e.getMessage(), (Object)e);
        }
        finally {
            IOUtils.closeQuietly((InputStream)stream);
        }
        return parser;
    }

    private static String doParse(CompositeParser parser, InputStream stream, Metadata metadata, int characterLimit) throws IOException, SAXException, TikaException {
        long startTime = System.currentTimeMillis();
        if (logger.isDebugEnabled()) {
            logger.debug("Start text extraction using metadata: {}", (Object)metadata);
        }
        WriteOutContentHandler handler = new WriteOutContentHandler(characterLimit);
        try {
            parser.parse(stream, (ContentHandler)new BodyContentHandler((ContentHandler)handler), metadata, new ParseContext());
        }
        catch (SAXException e) {
            if (handler.isWriteLimitReached((Throwable)e)) {
                if (characterLimit > 0) {
                    logger.info("Document content length exceeded the configured limit. Extracted first {} characters.", (Object)characterLimit);
                }
            }
            throw e;
        }
        String extractedText = handler.toString();
        if (logger.isDebugEnabled()) {
            logger.debug("Text extraction finished in {} ms. Extracted {} characters.", (Object)(System.currentTimeMillis() - startTime), (Object)extractedText.length());
            logger.debug("Extracted metadata: {}", (Object)metadata);
        }
        return extractedText;
    }

    public boolean canHandle(InputStream stream, Metadata metadata) throws IOException {
        this.ensureInitialized();
        if (!this.isEnabled()) {
            return false;
        }
        MediaType contentMediaType = null;
        if (this.parser instanceof AutoDetectParser) {
            contentMediaType = ((AutoDetectParser)this.parser).getDetector().detect(stream, metadata);
        }
        if (contentMediaType == null) {
            String contentType = metadata.get("Content-Type");
            contentMediaType = contentType != null ? new MediaType(StringUtils.substringBefore((String)contentType, (String)"/"), StringUtils.substringAfter((String)contentType, (String)"/")) : null;
        }
        return contentMediaType != null && this.parser.getParsers().containsKey(contentMediaType);
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    private void ensureInitialized() {
        if (!this.initialized) {
            TextExtractionService textExtractionService = this;
            synchronized (textExtractionService) {
                if (!this.initialized) {
                    this.initialize();
                    this.initialized = true;
                }
            }
        }
    }

    public void extractMetadata(InputStream stream, Metadata metadata) throws IOException, SAXException, TikaException {
        this.ensureInitialized();
        if (!this.isEnabled() && logger.isDebugEnabled()) {
            logger.debug("Text extraction service is disabled. Skipping metadata extraction.");
        }
        TextExtractionService.doParse(this.parserMetadata, stream, metadata, 0);
    }

    private void initialize() {
        if (!this.enabled) {
            logger.info("Text extraction service is disabled");
            return;
        }
        logger.info("Starting the text extraction service...");
        if (!this.config.exists() || !this.configMetadata.exists()) {
            logger.error("Text extraction configuration cannot be found. Disabling the service.");
            this.enabled = false;
            return;
        }
        this.parser = TextExtractionService.configureParser(this.config, this.autoDetectType);
        if (this.parser == null || this.parser.getParsers().isEmpty()) {
            logger.error("No parsers have been found for text extraction service in the configuration '{}'. Disabling service.", (Object)this.config.getDescription());
            this.enabled = false;
        }
        if (!this.enabled) {
            this.parser = null;
        }
        if (this.enabled) {
            logger.info("Initialized text extraction parser using {}", (Object)this.config);
            if (!this.config.equals(this.configMetadata)) {
                this.parserMetadata = TextExtractionService.configureParser(this.configMetadata, this.autoDetectType);
                logger.info("Initialized metadata extraction parser using {}", (Object)this.configMetadata);
            } else {
                this.parserMetadata = this.parser;
                logger.info("Using same parser for metadata");
            }
        }
    }

    public boolean isEnabled() {
        return this.enabled;
    }

    public String parse(InputStream stream, Metadata metadata) throws IOException, SAXException, TikaException {
        return this.parse(stream, metadata, this.maxExtractedCharacters);
    }

    public String parse(InputStream stream, Metadata metadata, int characterLimit) throws IOException, SAXException, TikaException {
        this.ensureInitialized();
        if (!this.isEnabled()) {
            if (logger.isDebugEnabled()) {
                logger.debug("Text extraction service is disabled. Returning null.");
            }
            return null;
        }
        return TextExtractionService.doParse(this.parser, stream, metadata, characterLimit);
    }

    public String parse(InputStream stream, String contentType) throws IOException, SAXException, TikaException {
        this.ensureInitialized();
        Metadata metadata = new Metadata();
        metadata.set("Content-Type", contentType);
        return this.parse(stream, metadata);
    }

    public void setAutoDetectType(boolean autoDetectType) {
        this.autoDetectType = autoDetectType;
    }

    public void setConfig(Resource config) {
        this.config = config;
    }

    public void setConfigMetadata(Resource configMetadata) {
        this.configMetadata = configMetadata;
    }

    public void setEnabled(boolean enabled) {
        this.enabled = enabled;
    }

    public void setMaxExtractedCharacters(int maxExtractedCharacters) {
        this.maxExtractedCharacters = maxExtractedCharacters;
    }
}

