/*
 * Decompiled with CFR 0.152.
 */
package org.nuxeo.ecm.core.convert.plugins.text.extractors;

import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.io.Serializable;
import java.lang.reflect.Field;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.pdfbox.cos.COSBase;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.encryption.PDEncryptionDictionary;
import org.apache.pdfbox.pdmodel.encryption.PDStandardEncryption;
import org.apache.pdfbox.util.PDFOperator;
import org.apache.pdfbox.util.PDFStreamEngine;
import org.apache.pdfbox.util.PDFTextStripper;
import org.apache.pdfbox.util.operator.OperatorProcessor;
import org.nuxeo.ecm.core.api.Blob;
import org.nuxeo.ecm.core.api.blobholder.BlobHolder;
import org.nuxeo.ecm.core.api.impl.blob.FileBlob;
import org.nuxeo.ecm.core.api.impl.blob.StringBlob;
import org.nuxeo.ecm.core.convert.api.ConversionException;
import org.nuxeo.ecm.core.convert.cache.SimpleCachableBlobHolder;
import org.nuxeo.ecm.core.convert.extension.Converter;
import org.nuxeo.ecm.core.convert.extension.ConverterDescriptor;

public class PDF2TextConverter
implements Converter {
    private static final Log log = LogFactory.getLog(PDF2TextConverter.class);

    public BlobHolder convert(BlobHolder blobHolder, Map<String, Serializable> parameters) throws ConversionException {
        PDDocument document = null;
        File f = null;
        OutputStream fas = null;
        try {
            document = PDDocument.load((InputStream)blobHolder.getBlob().getStream());
            Boolean isReadable = true;
            PDEncryptionDictionary encDictionary = document.getEncryptionDictionary();
            if (encDictionary instanceof PDStandardEncryption && !document.wasDecryptedWithOwnerPassword()) {
                PDStandardEncryption stdEncryption = (PDStandardEncryption)encDictionary;
                isReadable = stdEncryption.canExtractContent();
            }
            if (isReadable.booleanValue()) {
                String text = new PatchedPDFTextStripper().getText(document);
                text = text.replace("\u00a0", " ");
                f = File.createTempFile("pdfboplugin", ".txt");
                fas = new FileOutputStream(f);
                fas.write(text.getBytes("UTF-8"));
                SimpleCachableBlobHolder simpleCachableBlobHolder = new SimpleCachableBlobHolder((Blob)new FileBlob((InputStream)new FileInputStream(f), "text/plain", "UTF-8"));
                return simpleCachableBlobHolder;
            }
            SimpleCachableBlobHolder simpleCachableBlobHolder = new SimpleCachableBlobHolder((Blob)new StringBlob(""));
            return simpleCachableBlobHolder;
        }
        catch (Exception e) {
            throw new ConversionException("Error dring text extraction with PDFBox", e);
        }
        finally {
            if (document != null) {
                try {
                    document.close();
                }
                catch (Exception e) {
                    log.error((Object)"Error while closing PDFBox document", (Throwable)e);
                }
            }
            if (fas != null) {
                try {
                    fas.close();
                }
                catch (IOException e) {
                    log.error((Object)e);
                }
            }
            if (f != null) {
                f.delete();
            }
        }
    }

    public void init(ConverterDescriptor descriptor) {
    }

    public static class PatchedPDFTextStripper
    extends PDFTextStripper {
        static final Set<StackTraceElement> loggedStacks = new HashSet<StackTraceElement>();

        protected Object unrestrictedAccess(String name) {
            try {
                Field f = PDFStreamEngine.class.getDeclaredField(name);
                f.setAccessible(true);
                return f.get((Object)this);
            }
            catch (Exception e) {
                throw new Error("Cannot get access to PDFStreamEngine fields", e);
            }
        }

        protected Set<String> unsupportedOperators() {
            return (Set)this.unrestrictedAccess("unsupportedOperators");
        }

        protected Map<String, OperatorProcessor> operators() {
            return (Map)this.unrestrictedAccess("operators");
        }

        /*
         * WARNING - Removed try catching itself - possible behaviour change.
         */
        protected void processOperator(PDFOperator operator, List<COSBase> arguments) throws IOException {
            try {
                String operation = operator.getOperation();
                OperatorProcessor processor = this.operators().get(operation);
                if (processor != null) {
                    processor.setContext((PDFStreamEngine)this);
                    processor.process(operator, arguments);
                } else if (!this.unsupportedOperators().contains(operation)) {
                    log.info((Object)("unsupported/disabled operation: " + operation));
                    this.unsupportedOperators().add(operation);
                }
            }
            catch (Exception e) {
                StackTraceElement root = e.getStackTrace()[0];
                Set<StackTraceElement> set = loggedStacks;
                synchronized (set) {
                    if (loggedStacks.contains(root)) {
                        return;
                    }
                    loggedStacks.add(root);
                }
                log.warn((Object)"Caught error in pdfbox during extraction (stack logged only once)", (Throwable)e);
            }
        }
    }
}

