package org.nuxeo.ecm.platform.transform.plugin.xml;

import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStreamWriter;
import java.io.Serializable;
import java.io.Writer;
import java.util.List;
import java.util.Map;
import org.dom4j.DocumentException;
import org.dom4j.io.DOMWriter;
import org.dom4j.io.SAXReader;
import org.nuxeo.ecm.core.api.Blob;
import org.nuxeo.ecm.core.api.impl.blob.FileBlob;
import org.nuxeo.ecm.platform.transform.document.TransformDocumentImpl;
import org.nuxeo.ecm.platform.transform.interfaces.TransformDocument;
import org.nuxeo.ecm.platform.transform.plugin.AbstractPlugin;
import org.w3c.dom.DOMException;
import org.w3c.dom.Document;
import org.w3c.dom.Node;
import org.w3c.dom.traversal.DocumentTraversal;
import org.w3c.dom.traversal.NodeFilter;
import org.w3c.dom.traversal.TreeWalker;

/* loaded from: input_file:org/nuxeo/ecm/platform/transform/plugin/xml/Xml2TextPluginImpl.class */
public class Xml2TextPluginImpl extends AbstractPlugin {

    /* JADX INFO: Access modifiers changed from: package-private */
    /* loaded from: input_file:org/nuxeo/ecm/platform/transform/plugin/xml/Xml2TextPluginImpl$AllElementsFilter.class */
    public class AllElementsFilter implements NodeFilter {
        AllElementsFilter() {
        }

        public short acceptNode(Node node) {
            return node.getNodeType() > 0 ? (short) 1 : (short) 3;
        }
    }

    public List<TransformDocument> transform(Map<String, Serializable> map, TransformDocument... transformDocumentArr) throws Exception {
        List<TransformDocument> transform = super.transform(map, transformDocumentArr);
        extractFromXml(transform, transformDocumentArr);
        return transform;
    }

    public void extractFromXml(List<TransformDocument> list, TransformDocument... transformDocumentArr) throws Exception {
        for (TransformDocument transformDocument : transformDocumentArr) {
            Blob extractFromXmlSource = extractFromXmlSource(transformDocument.getBlob().getStream());
            list.add(new TransformDocumentImpl(extractFromXmlSource, extractFromXmlSource.getMimeType()));
        }
    }

    private Blob extractFromXmlSource(InputStream inputStream) throws DocumentException, Exception, IOException {
        ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();
        SAXReader sAXReader = new SAXReader();
        sAXReader.setMergeAdjacentText(true);
        sAXReader.setFeature("http://xml.org/sax/features/validation", false);
        sAXReader.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false);
        Document write = new DOMWriter().write(sAXReader.read(inputStream));
        OutputStreamWriter outputStreamWriter = new OutputStreamWriter(byteArrayOutputStream);
        extractTextFromDocument(write, outputStreamWriter);
        outputStreamWriter.close();
        return new FileBlob(new ByteArrayInputStream(byteArrayOutputStream.toByteArray()), "text/plain");
    }

    public void extractTextFromDocument(Document document, Writer writer) throws Exception {
        DocumentTraversal documentTraversal = (DocumentTraversal) document;
        walk(documentTraversal.createTreeWalker(document.getLastChild(), -1, new AllElementsFilter(), true), writer);
    }

    private void walk(TreeWalker treeWalker, Writer writer) throws DOMException, IOException {
        Node currentNode = treeWalker.getCurrentNode();
        Node firstChild = treeWalker.firstChild();
        while (true) {
            Node node = firstChild;
            if (node == null) {
                treeWalker.setCurrentNode(currentNode);
                return;
            }
            if (node.getNodeValue() != null) {
                writer.write(node.getNodeValue() + " ");
                System.out.println(node.getNodeValue());
            }
            walk(treeWalker, writer);
            firstChild = treeWalker.nextSibling();
        }
    }

    public static void main(String[] strArr) {
        try {
            Blob extractFromXmlSource = new Xml2TextPluginImpl().extractFromXmlSource(new ByteArrayInputStream("<?xml version=\"1.0\" encoding=\"UTF-8\"?><body><p>This Document is for testing ]: Axe</p>joe<p>t &quot; is a&quot;.  t &quot;work&quot;. &quot;Ana&quot; are mere.  Kaine .</p></body>".getBytes()));
            System.out.println("ext   " + extractFromXmlSource.getByteArray().length);
            System.out.println("ext text: " + new String(extractFromXmlSource.getByteArray()));
        } catch (IOException e) {
            e.printStackTrace();
        } catch (DocumentException e2) {
            e2.printStackTrace();
        } catch (Exception e3) {
            e3.printStackTrace();
        }
    }
}
