package org.jahia.services.textextraction;

import java.io.IOException;
import java.io.InputStream;
import org.apache.commons.lang.StringUtils;
import org.apache.tika.config.TikaConfig;
import org.apache.tika.exception.TikaException;
import org.apache.tika.io.IOUtils;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.mime.MediaType;
import org.apache.tika.parser.AutoDetectParser;
import org.apache.tika.parser.CompositeParser;
import org.apache.tika.parser.ParseContext;
import org.apache.tika.sax.BodyContentHandler;
import org.apache.tika.sax.WriteOutContentHandler;
import org.jahia.services.categories.Category;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.core.io.Resource;
import org.xml.sax.SAXException;

/* loaded from: input_file:org/jahia/services/textextraction/TextExtractionService.class */
public class TextExtractionService {
    private static Logger logger = LoggerFactory.getLogger(TextExtractionService.class);
    private Resource config;
    private Resource configMetadata;
    private CompositeParser parser;
    private CompositeParser parserMetadata;
    private boolean autoDetectType = true;
    private boolean enabled = true;
    private volatile boolean initialized = false;
    private int maxExtractedCharacters = 100000;

    private static CompositeParser configureParser(Resource resource, boolean z) {
        AutoDetectParser autoDetectParser = null;
        try {
            try {
                InputStream inputStream = resource.getInputStream();
                autoDetectParser = z ? new AutoDetectParser(new TikaConfig(inputStream)) : (CompositeParser) new TikaConfig(inputStream).getParser();
                IOUtils.closeQuietly(inputStream);
            } catch (Exception e) {
                logger.error("Error initializing text extraction service. Service will be disabled. Cause: " + e.getMessage(), e);
                IOUtils.closeQuietly((InputStream) null);
            }
            return autoDetectParser;
        } catch (Throwable th) {
            IOUtils.closeQuietly((InputStream) null);
            throw th;
        }
    }

    private static String doParse(CompositeParser compositeParser, InputStream inputStream, Metadata metadata, int i) throws IOException, SAXException, TikaException {
        long currentTimeMillis = System.currentTimeMillis();
        if (logger.isDebugEnabled()) {
            logger.debug("Start text extraction using metadata: " + metadata);
        }
        WriteOutContentHandler writeOutContentHandler = new WriteOutContentHandler(i);
        try {
            compositeParser.parse(inputStream, new BodyContentHandler(writeOutContentHandler), metadata, new ParseContext());
        } catch (SAXException e) {
            if (!writeOutContentHandler.isWriteLimitReached(e)) {
                throw e;
            }
            if (i > 0) {
                logger.info("Document content length exceeded the configured limit. Extracted first " + i + " characters.");
            }
        }
        String writeOutContentHandler2 = writeOutContentHandler.toString();
        if (logger.isDebugEnabled()) {
            logger.debug("Text extraction finished in " + (System.currentTimeMillis() - currentTimeMillis) + " ms. Extracted " + writeOutContentHandler2.length() + " characters.");
            logger.debug("Extracted metadata: " + metadata);
            if (logger.isTraceEnabled()) {
                logger.trace("Extracted text:\n" + writeOutContentHandler2);
            }
        }
        return writeOutContentHandler2;
    }

    public boolean canHandle(InputStream inputStream, Metadata metadata) throws IOException {
        ensureInitialized();
        if (!isEnabled()) {
            return false;
        }
        MediaType mediaType = null;
        if (this.parser instanceof AutoDetectParser) {
            mediaType = this.parser.getDetector().detect(inputStream, metadata);
        }
        if (mediaType == null) {
            String str = metadata.get("Content-Type");
            mediaType = str != null ? new MediaType(StringUtils.substringBefore(str, Category.PATH_DELIMITER), StringUtils.substringAfter(str, Category.PATH_DELIMITER)) : null;
        }
        if (mediaType != null) {
            return this.parser.getParsers().containsKey(mediaType);
        }
        return false;
    }

    private void ensureInitialized() {
        if (this.initialized) {
            return;
        }
        synchronized (this) {
            if (!this.initialized) {
                initialize();
                this.initialized = true;
            }
        }
    }

    public void extractMetadata(InputStream inputStream, Metadata metadata) throws IOException, SAXException, TikaException {
        ensureInitialized();
        if (!isEnabled() && logger.isDebugEnabled()) {
            logger.debug("Text extraction service is disabled. Skipping metadata extraction.");
        }
        doParse(this.parserMetadata, inputStream, metadata, 0);
    }

    private void initialize() {
        if (!this.enabled) {
            logger.info("Text extraction service is disabled");
            return;
        }
        logger.info("Starting the text extraction service...");
        if (!this.config.exists() || !this.configMetadata.exists()) {
            logger.error("Text extraction configuration cannot be found. Disabling the service.");
            this.enabled = false;
            return;
        }
        this.parser = configureParser(this.config, this.autoDetectType);
        if (this.enabled && (this.parser == null || this.parser.getParsers().isEmpty())) {
            logger.error("No parsers have been found for text extraction service in the configuration '" + this.config.getDescription() + "'. Disabling service.");
            this.enabled = false;
        }
        if (!this.enabled) {
            this.parser = null;
        }
        if (this.enabled) {
            logger.info("Initialized text extraction parser using " + this.config);
            if (this.config.equals(this.configMetadata)) {
                this.parserMetadata = this.parser;
                logger.info("Using same parser for metadata");
            } else {
                this.parserMetadata = configureParser(this.configMetadata, this.autoDetectType);
                logger.info("Initialized metadata extraction parser using " + this.configMetadata);
            }
        }
    }

    public boolean isEnabled() {
        return this.enabled;
    }

    public String parse(InputStream inputStream, Metadata metadata) throws IOException, SAXException, TikaException {
        return parse(inputStream, metadata, this.maxExtractedCharacters);
    }

    public String parse(InputStream inputStream, Metadata metadata, int i) throws IOException, SAXException, TikaException {
        ensureInitialized();
        if (isEnabled()) {
            return doParse(this.parser, inputStream, metadata, i);
        }
        if (!logger.isDebugEnabled()) {
            return null;
        }
        logger.debug("Text extraction service is disabled. Returning null.");
        return null;
    }

    public String parse(InputStream inputStream, String str) throws IOException, SAXException, TikaException {
        ensureInitialized();
        Metadata metadata = new Metadata();
        metadata.set("Content-Type", str);
        return parse(inputStream, metadata);
    }

    public void setAutoDetectType(boolean z) {
        this.autoDetectType = z;
    }

    public void setConfig(Resource resource) {
        this.config = resource;
    }

    public void setConfigMetadata(Resource resource) {
        this.configMetadata = resource;
    }

    public void setEnabled(boolean z) {
        this.enabled = z;
    }

    public void setMaxExtractedCharacters(int i) {
        this.maxExtractedCharacters = i;
    }
}
