package org.apache.tika.parser.microsoft.xml;

import java.io.IOException;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Locale;
import java.util.Map;
import java.util.Set;
import javax.xml.namespace.QName;
import org.apache.batik.constants.XMLConstants;
import org.apache.batik.util.CSSConstants;
import org.apache.commons.codec.binary.Base64;
import org.apache.commons.compress.compressors.CompressorStreamFactory;
import org.apache.poi.openxml4j.opc.ContentTypes;
import org.apache.tika.extractor.EmbeddedDocumentExtractor;
import org.apache.tika.extractor.EmbeddedDocumentUtil;
import org.apache.tika.io.TikaInputStream;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.metadata.TikaCoreProperties;
import org.apache.tika.mime.MediaType;
import org.apache.tika.parser.ParseContext;
import org.apache.tika.sax.TeeContentHandler;
import org.xml.sax.Attributes;
import org.xml.sax.ContentHandler;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.AttributesImpl;
import org.xml.sax.helpers.DefaultHandler;

/* JADX WARN: Classes with same name are omitted:
  input_file:WEB-INF/lib/tika-parsers-standard-package-2.4.0.jar:org/apache/tika/parser/microsoft/xml/WordMLParser.class
 */
/* loaded from: input_file:WEB-INF/lib/tika-parser-microsoft-module-2.4.0.jar:org/apache/tika/parser/microsoft/xml/WordMLParser.class */
public class WordMLParser extends AbstractXML2003Parser {
    private static final Map<String, String> WORDML_TO_XHTML;
    private static final Set<QName> IGNORE_CHARACTERS = Collections.unmodifiableSet(new HashSet(Arrays.asList(new QName("http://schemas.microsoft.com/office/word/2003/wordml", "hlink"), new QName("http://schemas.microsoft.com/office/word/2003/wordml", ContentTypes.EXTENSION_PICT), new QName("http://schemas.microsoft.com/office/word/2003/wordml", "binData"), new QName("urn:schemas-microsoft-com:office:office", "DocumentProperties"))));
    private static final MediaType MEDIA_TYPE = MediaType.application("vnd.ms-wordml");
    private static final Set<MediaType> SUPPORTED_TYPES = Collections.singleton(MEDIA_TYPE);

    /* JADX WARN: Classes with same name are omitted:
      input_file:WEB-INF/lib/tika-parsers-standard-package-2.4.0.jar:org/apache/tika/parser/microsoft/xml/WordMLParser$PictHandler.class
     */
    /* loaded from: input_file:WEB-INF/lib/tika-parser-microsoft-module-2.4.0.jar:org/apache/tika/parser/microsoft/xml/WordMLParser$PictHandler.class */
    private static class PictHandler extends DefaultHandler {
        final Metadata parentMetadata;
        final ContentHandler handler;
        EmbeddedDocumentExtractor embeddedDocumentExtractor;
        final StringBuilder buffer = new StringBuilder();
        final Base64 base64 = new Base64();
        byte[] rawBytes = null;
        boolean inPict = false;
        boolean inBin = false;
        String pictName = null;
        String pictSource = null;

        public PictHandler(ContentHandler contentHandler, Metadata metadata, EmbeddedDocumentExtractor embeddedDocumentExtractor) {
            this.handler = contentHandler;
            this.parentMetadata = metadata;
            this.embeddedDocumentExtractor = embeddedDocumentExtractor;
        }

        @Override // org.xml.sax.helpers.DefaultHandler, org.xml.sax.ContentHandler
        public void startElement(String str, String str2, String str3, Attributes attributes) throws SAXException {
            int lastIndexOf;
            if ("http://schemas.microsoft.com/office/word/2003/wordml".equals(str)) {
                if (ContentTypes.EXTENSION_PICT.equals(str2)) {
                    this.inPict = true;
                    return;
                }
                if ("binData".equals(str2)) {
                    this.inBin = true;
                    this.pictName = attributes.getValue("http://schemas.microsoft.com/office/word/2003/wordml", "name");
                    if (this.pictName != null) {
                        this.pictName = this.pictName.replaceFirst("wordml://", "");
                        return;
                    }
                    return;
                }
                return;
            }
            if ("urn:schemas-microsoft-com:vml".equals(str) && str2.equals("imagedata")) {
                String value = attributes.getValue("", CSSConstants.CSS_SRC_PROPERTY);
                String value2 = attributes.getValue("urn:schemas-microsoft-com:office:office", "title");
                if (value2 == null || value2.equals("")) {
                    return;
                }
                if (value != null && (lastIndexOf = value.lastIndexOf(".")) > -1 && lastIndexOf + 1 < value.length()) {
                    value2 = value2 + value.substring(lastIndexOf);
                }
                this.pictSource = value2;
            }
        }

        @Override // org.xml.sax.helpers.DefaultHandler, org.xml.sax.ContentHandler
        public void characters(char[] cArr, int i, int i2) throws SAXException {
            if (this.inBin) {
                this.buffer.append(cArr, i, i2);
            } else if (this.inPict) {
                this.handler.characters(cArr, i, i2);
            }
        }

        @Override // org.xml.sax.helpers.DefaultHandler, org.xml.sax.ContentHandler
        public void endElement(String str, String str2, String str3) throws SAXException {
            if ("http://schemas.microsoft.com/office/word/2003/wordml".equals(str)) {
                if (ContentTypes.EXTENSION_PICT.equals(str2)) {
                    this.inPict = false;
                    AttributesImpl attributesImpl = new AttributesImpl();
                    if (this.pictName != null) {
                        attributesImpl.addAttribute("http://www.w3.org/1999/xhtml", XMLConstants.XLINK_HREF_ATTRIBUTE, XMLConstants.XLINK_HREF_ATTRIBUTE, "cdata", this.pictName);
                    }
                    this.handler.startElement("http://www.w3.org/1999/xhtml", "img", "img", attributesImpl);
                    this.handler.endElement("http://www.w3.org/1999/xhtml", "img", "img");
                    handleEmbedded(false);
                    return;
                }
                if ("binData".equals(str2)) {
                    this.inBin = false;
                    boolean z = false;
                    try {
                        try {
                            this.rawBytes = this.base64.decode(this.buffer.toString());
                            z = true;
                            this.buffer.setLength(0);
                        } catch (IllegalArgumentException e) {
                            EmbeddedDocumentUtil.recordEmbeddedStreamException(e, this.parentMetadata);
                            this.buffer.setLength(0);
                        }
                        if (!z || this.inPict) {
                            return;
                        }
                        handleEmbedded(true);
                    } catch (Throwable th) {
                        this.buffer.setLength(0);
                        throw th;
                    }
                }
            }
        }

        private void handleEmbedded(boolean z) throws SAXException {
            if (this.rawBytes != null) {
                try {
                    TikaInputStream tikaInputStream = TikaInputStream.get(this.rawBytes);
                    Throwable th = null;
                    try {
                        try {
                            Metadata metadata = new Metadata();
                            if (this.pictName != null) {
                                metadata.set(TikaCoreProperties.RESOURCE_NAME_KEY, this.pictName);
                            }
                            if (this.pictSource != null) {
                                metadata.set(TikaCoreProperties.ORIGINAL_RESOURCE_NAME, this.pictSource);
                            }
                            if (this.embeddedDocumentExtractor.shouldParseEmbedded(metadata)) {
                                this.embeddedDocumentExtractor.parseEmbedded(tikaInputStream, this.handler, metadata, z);
                            }
                            if (tikaInputStream != null) {
                                if (0 != 0) {
                                    try {
                                        tikaInputStream.close();
                                    } catch (Throwable th2) {
                                        th.addSuppressed(th2);
                                    }
                                } else {
                                    tikaInputStream.close();
                                }
                            }
                        } finally {
                        }
                    } finally {
                    }
                } catch (IOException e) {
                }
            }
            this.pictName = null;
            this.pictSource = null;
            this.rawBytes = null;
        }
    }

    /* JADX WARN: Classes with same name are omitted:
      input_file:WEB-INF/lib/tika-parsers-standard-package-2.4.0.jar:org/apache/tika/parser/microsoft/xml/WordMLParser$WordMLHandler.class
     */
    /* loaded from: input_file:WEB-INF/lib/tika-parser-microsoft-module-2.4.0.jar:org/apache/tika/parser/microsoft/xml/WordMLParser$WordMLHandler.class */
    private static class WordMLHandler extends DefaultHandler {
        private final ContentHandler handler;
        private boolean ignoreCharacters;
        private boolean inBody = false;
        private boolean inP;

        public WordMLHandler(ContentHandler contentHandler) {
            this.handler = contentHandler;
        }

        @Override // org.xml.sax.helpers.DefaultHandler, org.xml.sax.ContentHandler
        public void startElement(String str, String str2, String str3, Attributes attributes) throws SAXException {
            String lowerCase = str2.toLowerCase(Locale.US);
            if ("http://schemas.microsoft.com/office/word/2003/wordml".equals(str)) {
                if ("body".equals(lowerCase)) {
                    this.inBody = true;
                    return;
                }
                String str4 = (String) WordMLParser.WORDML_TO_XHTML.get(lowerCase);
                if (str4 != null) {
                    if ("p".equals(lowerCase)) {
                        if (this.inP) {
                            this.handler.endElement("http://www.w3.org/1999/xhtml", "p", "p");
                        }
                        this.inP = true;
                    }
                    this.handler.startElement("http://www.w3.org/1999/xhtml", str4, str4, AbstractXML2003Parser.EMPTY_ATTRS);
                    if (str4.equals("table")) {
                        this.handler.startElement("http://www.w3.org/1999/xhtml", "tbody", "tbody", AbstractXML2003Parser.EMPTY_ATTRS);
                    }
                }
                if (CompressorStreamFactory.BROTLI.equals(lowerCase)) {
                    this.handler.characters(AbstractXML2003Parser.NEWLINE, 0, 1);
                }
            }
            if (WordMLParser.IGNORE_CHARACTERS.contains(new QName(str, lowerCase))) {
                this.ignoreCharacters = true;
            }
        }

        @Override // org.xml.sax.helpers.DefaultHandler, org.xml.sax.ContentHandler
        public void characters(char[] cArr, int i, int i2) throws SAXException {
            if (this.ignoreCharacters || !this.inBody) {
                return;
            }
            this.handler.characters(cArr, i, i2);
        }

        @Override // org.xml.sax.helpers.DefaultHandler, org.xml.sax.ContentHandler
        public void endElement(String str, String str2, String str3) throws SAXException {
            if ("http://schemas.microsoft.com/office/word/2003/wordml".equals(str)) {
                str2 = str2.toLowerCase(Locale.US);
                String str4 = (String) WordMLParser.WORDML_TO_XHTML.get(str2);
                if (str4 != null) {
                    if (str4.equals("table")) {
                        this.handler.endElement("http://www.w3.org/1999/xhtml", "tbody", "tbody");
                    }
                    if ("p".equals(str4) && !this.inP) {
                        this.handler.startElement("http://www.w3.org/1999/xhtml", "p", "p", AbstractXML2003Parser.EMPTY_ATTRS);
                    }
                    this.handler.endElement("http://www.w3.org/1999/xhtml", str4, str4);
                    if ("p".equals(str4)) {
                        this.inP = false;
                    }
                }
            }
            if (WordMLParser.IGNORE_CHARACTERS.contains(new QName(str, str2))) {
                this.ignoreCharacters = false;
            }
        }
    }

    @Override // org.apache.tika.parser.Parser
    public Set<MediaType> getSupportedTypes(ParseContext parseContext) {
        return SUPPORTED_TYPES;
    }

    /* JADX INFO: Access modifiers changed from: protected */
    @Override // org.apache.tika.parser.microsoft.xml.AbstractXML2003Parser
    public ContentHandler getContentHandler(ContentHandler contentHandler, Metadata metadata, ParseContext parseContext) {
        return new TeeContentHandler(super.getContentHandler(contentHandler, metadata, parseContext), new WordMLHandler(contentHandler), new HyperlinkHandler(contentHandler, "http://schemas.microsoft.com/office/word/2003/wordml"), new PictHandler(contentHandler, metadata, EmbeddedDocumentUtil.getEmbeddedDocumentExtractor(parseContext)));
    }

    @Override // org.apache.tika.parser.microsoft.xml.AbstractXML2003Parser
    public void setContentType(Metadata metadata) {
        metadata.set("Content-Type", MEDIA_TYPE.toString());
    }

    static {
        HashMap hashMap = new HashMap();
        hashMap.put("p", "p");
        hashMap.put("tbl", "table");
        hashMap.put("tr", "tr");
        hashMap.put("tc", "td");
        WORDML_TO_XHTML = Collections.unmodifiableMap(hashMap);
    }
}
