/*
 * Decompiled with CFR 0.152.
 */
package org.opencms.search.extractors;

import java.io.ByteArrayInputStream;
import java.io.InputStream;
import java.io.StringWriter;
import java.io.Writer;
import java.util.LinkedHashMap;
import java.util.Map;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.parser.ParseContext;
import org.apache.tika.parser.Parser;
import org.apache.tika.sax.BodyContentHandler;
import org.opencms.search.extractors.CmsExtractionResult;
import org.opencms.search.extractors.I_CmsExtractionResult;
import org.opencms.search.extractors.I_CmsTextExtractor;
import org.opencms.util.CmsFileUtil;
import org.opencms.util.CmsStringUtil;
import org.xml.sax.ContentHandler;

public abstract class A_CmsTextExtractor
implements I_CmsTextExtractor {
    @Override
    public I_CmsExtractionResult extractText(byte[] content) throws Exception {
        return this.extractText(new ByteArrayInputStream(content));
    }

    @Override
    public I_CmsExtractionResult extractText(byte[] content, String encoding) throws Exception {
        return this.extractText((InputStream)new ByteArrayInputStream(content), encoding);
    }

    @Override
    public I_CmsExtractionResult extractText(InputStream in) throws Exception {
        return this.extractText(in, (String)null);
    }

    @Override
    public I_CmsExtractionResult extractText(InputStream in, String encoding) throws Exception {
        byte[] text = CmsFileUtil.readFully(in);
        return this.extractText(text, encoding);
    }

    protected void combineContentItem(String itemValue, String itemKey, StringBuffer content, Map<String, String> contentItems) {
        if (CmsStringUtil.isNotEmpty(itemValue)) {
            contentItems.put(itemKey, itemValue);
            content.append('\n');
            content.append(itemValue);
        }
    }

    protected CmsExtractionResult extractText(InputStream in, Parser parser) throws Exception {
        LinkedHashMap<String, String> contentItems = new LinkedHashMap<String, String>();
        StringWriter writer = new StringWriter();
        BodyContentHandler handler = new BodyContentHandler((Writer)writer);
        Metadata meta = new Metadata();
        ParseContext context = new ParseContext();
        parser.parse(in, (ContentHandler)handler, meta, context);
        in.close();
        String result = writer.toString();
        StringBuffer content = new StringBuffer(result);
        if (CmsStringUtil.isNotEmpty(result)) {
            contentItems.put("__raw", result);
        }
        this.combineContentItem(meta.get("title"), "title", content, contentItems);
        this.combineContentItem(meta.get("Keywords"), "keywords", content, contentItems);
        this.combineContentItem(meta.get("subject"), "subject", content, contentItems);
        this.combineContentItem(meta.get("Author"), "author", content, contentItems);
        this.combineContentItem(meta.get("creator"), "creator", content, contentItems);
        this.combineContentItem(meta.get("Category"), "category", content, contentItems);
        this.combineContentItem(meta.get("Comments"), "comments", content, contentItems);
        this.combineContentItem(meta.get("Company"), "company", content, contentItems);
        this.combineContentItem(meta.get("Manager"), "manager", content, contentItems);
        this.combineContentItem(meta.get("producer"), "producer", content, contentItems);
        return new CmsExtractionResult(content.toString(), contentItems);
    }

    protected String removeControlChars(String content) {
        if (CmsStringUtil.isEmptyOrWhitespaceOnly(content)) {
            return "";
        }
        char[] chars = content.toCharArray();
        StringBuffer result = new StringBuffer(chars.length);
        boolean wasUnwanted = false;
        block4: for (int i = 0; i < chars.length; ++i) {
            char ch = chars[i];
            int type = Character.getType(ch);
            switch (type) {
                case 1: 
                case 2: 
                case 3: 
                case 4: 
                case 5: 
                case 9: 
                case 12: 
                case 20: 
                case 21: 
                case 22: 
                case 23: 
                case 24: 
                case 26: 
                case 29: 
                case 30: {
                    result.append(ch);
                    wasUnwanted = false;
                    continue block4;
                }
                case 13: {
                    result.append('\n');
                    wasUnwanted = true;
                    continue block4;
                }
                default: {
                    if (wasUnwanted) continue block4;
                    result.append('\n');
                    wasUnwanted = true;
                }
            }
        }
        return result.toString();
    }
}

