package com.atlassian.bonnie.search.extractor;

import com.atlassian.bonnie.search.SearchableAttachment;

import java.io.InputStream;
import java.io.IOException;

import org.apache.commons.io.IOUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class DefaultTextContentExtractor extends BaseAttachmentContentExtractor
{
    private static final Logger log = LoggerFactory.getLogger(DefaultTextContentExtractor.class);

    /**
     * Extract text from mime types like 'text/*', 'application/xml*' and 'application/*+xml'
     */
    protected boolean shouldExtractFrom(String fileName, String contentType)
    {
        return contentType.startsWith("text/") || contentType.startsWith("application/xml") ||
                (contentType.startsWith("application/") && contentType.endsWith("+xml"));
    }

    protected String extractText(InputStream is, SearchableAttachment attachment)
    {
        try
        {
            return IOUtils.toString(is, "UTF-8");
        }
        catch (IOException e)
        {
            log.error("Couldn't extract text from attachment: " + attachment, e);
            return null;
        }
    }
}
