package com.uttesh.exude.stopping;

import com.uttesh.exude.common.Constants;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStreamWriter;
import java.net.URL;
import java.net.UnknownHostException;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Set;
import java.util.StringTokenizer;
import java.util.TreeSet;
import java.util.regex.Pattern;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.parser.AutoDetectParser;
import org.apache.tika.parser.ParseContext;
import org.apache.tika.sax.BodyContentHandler;

/* loaded from: input_file:com/uttesh/exude/stopping/TrushDuplicates.class */
public class TrushDuplicates {
    Set<String> filteredSet = new TreeSet(String.CASE_INSENSITIVE_ORDER);
    List<String> resultList = new ArrayList();
    public static Set<String> tempSet = new LinkedHashSet();
    public static List<String> tempList = new ArrayList();
    public static Set<String> _tempSet = new HashSet();
    public static TrushDuplicates instance = null;

    public static TrushDuplicates getInstance() {
        if (instance == null) {
            instance = new TrushDuplicates();
        }
        return instance;
    }

    protected TrushDuplicates() {
    }

    public Set<String> filter(String str) throws IOException {
        try {
            if (new File(str) != null) {
            }
        } catch (Exception e) {
            e.printStackTrace();
        }
        return this.filteredSet;
    }

    private String[] separateWords(String str) {
        String[] split = str.split(Constants.SPACE);
        if (split == null) {
            split = str.split(Constants.COMMA);
        }
        return split;
    }

    public List<String> filterDataKeepDuplicate(String str) {
        try {
            StoppingParser stoppingParser = StoppingParser.getInstance();
            String[] separateWords = separateWords(str);
            for (int i = 0; i < separateWords.length; i++) {
                String[] split = separateWords[i].split(Constants.SPACE);
                if (split.length > 0) {
                    for (String str2 : split) {
                        String filterStoppingWordsKeepDuplicates = stoppingParser.filterStoppingWordsKeepDuplicates(str2.replaceAll(Constants.MULTIPLE_SPACE_TAB_NEW_LINE, " ").toLowerCase());
                        if (filterStoppingWordsKeepDuplicates != null && filterStoppingWordsKeepDuplicates.trim().length() > 0) {
                            tempList.add(filterStoppingWordsKeepDuplicates);
                        }
                    }
                } else {
                    String filterStoppingWordsKeepDuplicates2 = stoppingParser.filterStoppingWordsKeepDuplicates(separateWords[i].replaceAll(Constants.MULTIPLE_SPACE_TAB_NEW_LINE, " ").toLowerCase());
                    if (filterStoppingWordsKeepDuplicates2 != null && filterStoppingWordsKeepDuplicates2.trim().length() > 0) {
                        tempList.add(filterStoppingWordsKeepDuplicates2);
                    }
                }
            }
            if (tempList.size() > 0) {
                this.resultList.addAll(tempList);
            }
        } catch (Exception e) {
            e.printStackTrace();
        }
        return this.resultList;
    }

    public Set<String> filterData(String str) throws IOException {
        try {
            StoppingParser stoppingParser = StoppingParser.getInstance();
            String[] split = str.split(Constants.SPACE);
            if (split == null) {
                split = str.split(Constants.COMMA);
            }
            for (int i = 0; i < split.length; i++) {
                String[] split2 = split[i].split(Constants.SPACE);
                if (split2.length > 0) {
                    for (String str2 : split2) {
                        String lowerCase = str2.replaceAll(Constants.MULTIPLE_SPACE_TAB_NEW_LINE, " ").toLowerCase();
                        stoppingParser.filterStoppingWords(lowerCase);
                        if (lowerCase != null && lowerCase.trim().length() > 0) {
                            tempSet.add(lowerCase);
                        }
                    }
                } else {
                    String lowerCase2 = split[i].replaceAll(Constants.MULTIPLE_SPACE_TAB_NEW_LINE, " ").toLowerCase();
                    stoppingParser.filterStoppingWords(lowerCase2);
                    if (lowerCase2 != null && lowerCase2.trim().length() > 0) {
                        tempSet.add(lowerCase2);
                    }
                }
            }
            if (tempSet.size() > 0) {
                for (String str3 : tempSet) {
                }
                this.filteredSet.addAll(tempSet);
            }
        } catch (Exception e) {
            e.printStackTrace();
        }
        return this.filteredSet;
    }

    public void filterDuplicates(String str, String str2) throws IOException {
        BodyContentHandler data = getData(str);
        LinkedHashSet<String> linkedHashSet = new LinkedHashSet();
        if (data != null) {
            StringTokenizer stringTokenizer = new StringTokenizer(data.toString().trim(), " ");
            while (stringTokenizer.hasMoreElements()) {
                String str3 = (String) stringTokenizer.nextElement();
                if (str3.trim().length() > 0) {
                    linkedHashSet.add(str3.replace("\"", ""));
                }
            }
            BufferedWriter bufferedWriter = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(str2), Constants.UTF_8));
            for (String str4 : linkedHashSet) {
                if (str4.trim().length() > 0) {
                    bufferedWriter.write(str4 + " ");
                }
            }
            bufferedWriter.close();
        }
    }

    public String filterDuplicates(String str) throws IOException {
        LinkedHashSet<String> linkedHashSet = new LinkedHashSet();
        StringBuilder sb = new StringBuilder();
        if (str == null) {
            return "";
        }
        StringTokenizer stringTokenizer = new StringTokenizer(str.trim(), " ");
        while (stringTokenizer.hasMoreElements()) {
            String str2 = (String) stringTokenizer.nextElement();
            if (str2.trim().length() > 0) {
                linkedHashSet.add(str2);
            }
        }
        for (String str3 : linkedHashSet) {
            if (str3.trim().length() > 0) {
                sb.append(str3 + " ");
            }
        }
        return sb.toString();
    }

    public String filterDuplicatesInText(String str) throws IOException {
        LinkedHashSet<String> linkedHashSet = new LinkedHashSet();
        StringBuilder sb = new StringBuilder();
        if (str == null || str.trim().length() <= 0) {
            return null;
        }
        String[] split = str.split(Constants.SPACE);
        if (split == null || split.length <= 0) {
            return str;
        }
        for (String str2 : split) {
            if (str2.trim().length() > 0) {
                linkedHashSet.add(str2.replace("\"", ""));
            }
        }
        for (String str3 : linkedHashSet) {
            if (str3.trim().length() > 0) {
                sb.append(str3 + " ");
            }
        }
        return sb.toString();
    }

    public static BodyContentHandler getData(String str) {
        try {
            AutoDetectParser autoDetectParser = new AutoDetectParser();
            BodyContentHandler bodyContentHandler = new BodyContentHandler(10485760);
            Metadata metadata = new Metadata();
            if (Pattern.matches(Constants.URL_REGULAR_EXPRESSION, str)) {
                try {
                    autoDetectParser.parse(new URL(str).openConnection().getInputStream(), bodyContentHandler, metadata, new ParseContext());
                } catch (UnknownHostException e) {
                    System.err.println("UnkonwnHost : " + e.getMessage());
                }
            } else {
                autoDetectParser.parse(new FileInputStream(new File(str)), bodyContentHandler, metadata, new ParseContext());
            }
            return bodyContentHandler;
        } catch (Exception e2) {
            e2.printStackTrace();
            return null;
        }
    }

    public static void filterDuplicate(Set<String> set) {
        _tempSet = new HashSet();
        for (String str : set) {
            String[] split = str.split(Constants.SPACE);
            if (split.length > 0) {
                for (int i = 0; i < split.length; i++) {
                    String[] split2 = split[i].split(Constants.SPACE);
                    if (split2.length > 0) {
                        for (String str2 : split2) {
                            String lowerCase = str2.replaceAll(Constants.MULTIPLE_SPACE_TAB_NEW_LINE, " ").toLowerCase();
                            if (lowerCase != null && lowerCase.trim().length() > 0) {
                                _tempSet.add(lowerCase);
                            }
                        }
                    } else {
                        String lowerCase2 = split[i].replaceAll(Constants.MULTIPLE_SPACE_TAB_NEW_LINE, " ").toLowerCase();
                        if (lowerCase2 != null && lowerCase2.trim().length() > 0) {
                            _tempSet.add(lowerCase2);
                        }
                    }
                }
            } else {
                _tempSet.add(str);
            }
        }
    }

    public static Set<String> getTempSet() {
        return _tempSet;
    }

    public static List<String> getTempList() {
        return tempList;
    }

    public void reset() {
        tempSet = new LinkedHashSet();
        _tempSet = new HashSet();
        tempList = new ArrayList();
    }
}
