package javatools.parsers;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import javatools.administrative.Announce;
import javatools.administrative.D;
import javatools.administrative.NonsharedParameters;
import javatools.datatypes.FinalMap;
import javatools.filehandlers.FileLines;

/* loaded from: input_file:javatools/parsers/NameML.class */
public class NameML {
    static final String PARSINGRESOURCES_PATH = "/javatools/resources/parsing/";
    public static final String ANYNAME = "NAME";
    public static final String U = "\\p{Lu}";
    public static final String L = "\\p{Ll}";
    public static final String A = "\\p{L}";
    public static final String B = "(?:[\\s_]++)";
    public static final String BD = "\\b";
    public static final String BC = "[,\\s_]++";
    public static final String DG = "\\d";
    public static final String H = "-";
    public static final String or = "|";
    public static final String familyNamePrefix = "(?:[aA]l|[dD][ea]|[dD]el|[dD]e las|[bB]in|[dD]e la|[dD]e los|[dD]i|[zZ]u[mr]|[aA]m|[vV][oa]n de[rnm]|[vV][oa][nm]|[dD]o|[dD]')";
    public static final String familyNameSuffix = "(?:CBE|DBE|GBE|[jJ]r\\.?|[jJ]unior|hijo|hija|P[hH]\\.?[dD]\\.?|KBE|MBE|M\\.?D\\.|OBE|[sS]enior|[sS]r\\.?)";
    protected static Set<String> titlesForGivenNamesFr;
    protected static Set<String> titlesForGivenNamesIt;
    protected static Set<String> stopWordDE;
    protected static Set<String> stopWordFR;
    protected static Set<String> stopWordES;
    protected static Set<String> stopWordEN;
    protected static Set<String> stopWordIT;
    public static final String teamName = "\\b\\p{Lu}[\\w\\s\\.]+\\b";
    public static final String prep = "(?:on|of|for)";
    public static final String laxName = "\\b\\p{Lu}.*\\b";
    public static final String safeName = "\\b\\p{Lu}(-[\\p{Lu}\\d]|[\\p{Lu}\\p{Ll}\\d]){2,}\\b";
    protected String original;
    protected String normalized;
    public static final String directFamilyNamePrefix = "\\b(?:(?:al-|Mc|Di|De|Mac|O')(?:[\\s_]++)?)";
    public static final String personNameComponent = "\\p{Lu}\\p{Ll}+";
    public static final String nickName = "(?:'[^']')";
    public static Pattern laxPersonNamePatternEn;
    public static Pattern laxPersonNamePatternDe;
    public static Pattern laxPersonNamePatternEs;
    public static Pattern laxPersonNamePatternFr;
    public static Pattern laxPersonNamePatternIt;
    public static Pattern safePersonNamePatternEn;
    public static Pattern safePersonNamePatternDe;
    public static Pattern safePersonNamePatternEs;
    public static Pattern safePersonNamePatternFr;
    public static Pattern safePersonNamePatternIt;
    static File CONFIG_DIR = null;
    protected static boolean hasBeenInitialized = false;
    public static String roman = "\\b(?:[XIV]++)\\b";
    public static String of = "\\bof\\b";
    public static final Pattern familyNamePrefixPattern = Pattern.compile("(?:[aA]l|[dD][ea]|[dD]el|[dD]e las|[bB]in|[dD]e la|[dD]e los|[dD]i|[zZ]u[mr]|[aA]m|[vV][oa]n de[rnm]|[vV][oa][nm]|[dD]o|[dD]')");
    public static String attributePrefix = "(?:the|der|die|il|la|le)";
    public static Pattern attributePrefixPattern = Pattern.compile(attributePrefix);
    public static final Pattern familyNameSuffixPattern = Pattern.compile("(?:CBE|DBE|GBE|[jJ]r\\.?|[jJ]unior|hijo|hija|P[hH]\\.?[dD]\\.?|KBE|MBE|M\\.?D\\.|OBE|[sS]enior|[sS]r\\.?)");
    public static Pattern titlePatternEn = null;
    public static Pattern titlePatternDe = null;
    public static Pattern titlePatternFr = null;
    public static Pattern titlePatternEs = null;
    public static Pattern titlePatternIt = null;
    protected static Set<String> titlesForGivenNamesEn = null;
    protected static Set<String> titlesForGivenNamesDe = null;
    protected static Set<String> titlesForGivenNamesEs = null;
    public static final String companyNameSuffix = "(?:[cC][oO]\\.|[cC][oO]\\b|&(?:[\\s_]++)?[cC][oO]\\.|&(?:[\\s_]++)?[cC][oO]\\b|\\b[cC][oO][rR][pP]\\.|\\b[cC][oO][rR][pP]\\b|\\bR[cC]orporation\\b|\\b[iI][nN][cC]\\.|\\b[iI][nN][cC]\\b|\\b[iI]ncorporated\\b|\\b[iI]ncorporation\\b|\\b[iI]ncorp\\.?|\\b[iI]ncorp\\b|\\b[lL][tT][dD]\\.|\\b[lL][tT][dD]\\b|\\b[lL]imited\\b|\\bp\\.l\\.c\\.\\b|\\bPty\\.\\b|\\bLLC\\b|\\bAG\\b|\\bGmbH\\b|\\bKG\\b|\\bOHG\\b|\\bS\\.R\\.L\\.\\b|\\bS\\.p\\.A\\.\\b|\\bS\\.A\\.\\b)";
    public static final Pattern companyNameSuffixPattern = Pattern.compile(companyNameSuffix);
    public static final Pattern teamNamePattern = Pattern.compile("\\b\\p{Lu}[\\w\\s\\.]+\\b");
    public static final Pattern laxNamePattern = Pattern.compile("\\b\\p{Lu}.*\\b");
    public static final Pattern safeNamePattern = Pattern.compile("\\b\\p{Lu}(-[\\p{Lu}\\d]|[\\p{Lu}\\p{Ll}\\d]){2,}\\b");
    public static final Pattern safeNamesPattern = Pattern.compile("\\b\\p{Lu}(-[\\p{Lu}\\d]|[\\p{Lu}\\p{Ll}\\d]){2,}\\b" + optMul("(?:[\\s_]++)" + opt("(?:on|of|for)(?:[\\s_]++)") + "\\b\\p{Lu}(-[\\p{Lu}\\d]|[\\p{Lu}\\p{Ll}\\d]){2,}\\b"));
    public static final Pattern safeNamesPatternNoPrep = Pattern.compile("\\b\\p{Lu}(-[\\p{Lu}\\d]|[\\p{Lu}\\p{Ll}\\d]){2,}\\b" + optMul("(?:[\\s_]++)\\b\\p{Lu}(-[\\p{Lu}\\d]|[\\p{Lu}\\p{Ll}\\d]){2,}\\b"));
    public static final Pattern laxAbbreviationPattern = Pattern.compile("\\b\\p{Lu}[\\p{Lu}\\d(?:[\\s_]++)-]++\\b");
    public static final Pattern safeAbbreviationPattern = Pattern.compile("\\b\\p{Lu}[\\p{Lu}\\d-\\.]++\\b");
    public static final Pattern laxCompanyPattern = Pattern.compile("(" + laxNamePattern + ")[,\\s_]++(" + companyNameSuffix + ")");
    public static final Pattern safeCompanyPattern = Pattern.compile("(" + safeNamesPatternNoPrep + opt(opt("(?:[\\s_]++)") + "&" + opt("(?:[\\s_]++)") + safeNamesPatternNoPrep) + ")[,\\s_]++(" + companyNameSuffix + ")");
    public static final String givenNameComponent = or(or("\\p{Lu}\\p{Ll}+\\b", "\\p{Lu}\\p{Ll}*+\\."), "\\p{Lu}\\b");
    public static final String givenName = "\\b" + mulHyp(givenNameComponent);
    public static final String givenNames = mul(givenName);
    public static final String familyName = "\\b" + mulHyp(opt("\\b(?:(?:al-|Mc|Di|De|Mac|O')(?:[\\s_]++)?)") + "\\p{Lu}\\p{Ll}+") + "\\b";
    public static Map<String, String> usStates = new FinalMap("AL", "Alabama", "AK", "Alaska", "AS", "American Samoa", "AZ", "Arizona", "AR", "Arkansas", "CA", "California", "CALIF", "California", "CO", "Colorado", "CT", "Connecticut", "DE", "Delaware", "DC", "District of Columbia", "FM", "Federated States of Micronesia", "FL", "Florida", "GA", "Georgia", "GU", "Guam", "HI", "Hawaii", "ID", "Idaho", "IL", "Illinois", "IN", "Indiana", "IA", "Iowa", "KS", "Kansas", "KY", "Kentucky", "LA", "Louisiana", "ME", "Maine", "MH", "Marshall Islands", "MD", "Maryland", "MA", "Massachusetts", "MI", "Michigan", "MN", "Minnesota", "MS", "Mississippi", "MO", "Missouri", "MT", "Montana", "NE", "Nebraska", "NV", "Nevada", "NH", "New Hampshire", "NJ", "New Jersey", "NM", "New Mexico", "NY", "New York", "NC", "North Carolina", "ND", "North Dakota", "MP", "Northern Mariana Islands", "OH", "Ohio", "OK", "Oklahoma", "OR", "Oregon", "PW", "Palau", "PA", "Pennsylvania", "PR", "Puerto Rico", "RI", "Rhode Island", "SC", "South Carolina", "SD", "South Dakota", "TN", "Tennessee", "TX", "Texas", "UT", "Utah", "VT", "Vermont", "VI", "Virgin Islands", "VA", "Virginia", "WA", "Washington", "WV", "West Virginia", "WI", "Wisconsin", "WY", "Wyoming");
    public static Map<String, String> languageCodes = new FinalMap("aa", "Afar", "ab", "Abkhazian", "ae", "Avestan", "af", "Afrikaans", "ak", "Akan", "am", "Amharic", "an", "Aragonese", "ar", "Arabic", "as", "Assamese", "av", "Avaric", "ay", "Aymara", "az", "Azerbaijani", "ba", "Bashkir", "be", "Belarusian", "bg", "Bulgarian", "bh", "Bihari", "bi", "Bislama", "bm", "Bambara", "bn", "Bengali", "bo", "Tibetan", "br", "Breton", "bs", "Bosnian", "ca", "Catalan", "ce", "Chechen", "ch", "Chamorro", "co", "Corsican", "cr", "Cree", "cs", "Czech", "cu", "Church", "cv", "Chuvash", "cy", "Welsh", "da", "Danish", "de", "German", "dv", "Divehi", "dz", "Dzongkha", "ee", "Ewe", "el", "Greek", "en", "English", "eo", "Esperanto", "es", "Spanish", "et", "Estonian", "eu", "Basque", "fa", "Persian", "ff", "Fulah", "fi", "Finnish", "fj", "Fijian", "fo", "Faroese", "fr", "French", "fy", "Western Frisian", "ga", "Irish", "gd", "Scottish", "gl", "Galician", "gn", "Guaran�", "gu", "Gujarati", "gv", "Manx", "ha", "Hausa", "he", "Hebrew", "hi", "Hindi", "ho", "Hiri", "hr", "Croatian", "ht", "Haitian", "hu", "Hungarian", "hy", "Armenian", "hz", "Herero", "ia", "Interlingua", "id", "Indonesian", "ie", "Interlingue", "ig", "Igbo", "ii", "Sichuan", "ik", "Inupiaq", "io", "Ido", "is", "Icelandic", "it", "Italian", "iu", "Inuktitut", "ja", "Japanese", "jv", "Javanese", "ka", "Georgian", "kg", "Kongo", "ki", "Kikuyu", "kj", "Kwanyama", "kk", "Kazakh", "kl", "Kalaallisut", "km", "Khmer", "kn", "Kannada", "ko", "Korean", "kr", "Kanuri", "ks", "Kashmiri", "ku", "Kurdish", "kv", "Komi", "kw", "Cornish", "ky", "Kirghiz", "la", "Latin", "lb", "Luxembourgish", "lg", "Ganda", "li", "Limburgish", "ln", "Lingala", "lo", "Lao", "lt", "Lithuanian", "lu", "Luba-Katanga", "lv", "Latvian", "mg", "Malagasy", "mh", "Marshallese", "mi", "Maori", "mk", "Macedonian", "ml", "Malayalam", "mn", "Mongolian", "mo", "Moldavian", "mr", "Marathi", "ms", "Malay", "mt", "Maltese", "my", "Burmese", "na", "Nauru", "nb", "Norwegian", "nd", "North", "ne", "Nepali", "ng", "Ndonga", "nl", "Dutch", "nn", "Norwegian", "no", "Norwegian", "nr", "South", "nv", "Navajo", "ny", "Chichewa", "oc", "Occitan", "oj", "Ojibwa", "om", "Oromo", "or", "Oriya", "os", "Ossetian", "pa", "Panjabi", "pi", "Pali", "pl", "Polish", "ps", "Pashto", "pt", "Portuguese", "qu", "Quechua", "rm", "Raeto-Romance", "rn", "Kirundi", "ro", "Romanian", "ru", "Russian", "rw", "Kinyarwanda", "ry", "Rusyn", "sa", "Sanskrit", "sc", "Sardinian", "sd", "Sindhi", "se", "Northern", "sg", "Sango", "sh", "Serbo-Croatian", "si", "Sinhalese", "sk", "Slovak", "sl", "Slovenian", "sm", "Samoan", "sn", "Shona", "so", "Somali", "sq", "Albanian", "sr", "Serbian", "ss", "Swati", "st", "Sotho", "su", "Sundanese", "sv", "Swedish", "sw", "Swahili", "ta", "Tamil", "te", "Telugu", "tg", "Tajik", "th", "Thai", "ti", "Tigrinya", "tk", "Turkmen", "tl", "Tagalog", "tn", "Tswana", "to", "Tonga", "tr", "Turkish", "ts", "Tsonga", "tt", "Tatar", "tw", "Twi", "ty", "Tahitian", "ug", "Uighur", "uk", "Ukrainian", "ur", "Urdu", "uz", "Uzbek", "ve", "Venda", "vi", "Vietnamese", "vo", "Volapük", "wa", "Walloon", "wo", "Wolof", "xh", "Xhosa", "yi", "Yiddish", "yo", "Yoruba", "za", "Zhuang", "zh", "Chinese", "zu", "Zulu");
    public static Map<String, String> nationality2country = new FinalMap("African", "Africa", "Antarctic", "Antarctica", "Americana", "Americas", "Asian", "Asia", "Middle Eastern", "Middle East", "Australasian", "Australasia", "Australian", "Australia", "Eurasian", "Eurasia", "European", "Europe", "North American", "North America", "Oceanian", "Oceania", "South American", "South America", "Afghan", "Afghanistan", "Albanian", "Albania", "Algerian", "Algeria", "American Samoan", "American Samoa", "Andorran", "Andorra", "Angolan", "Angola", "Anguillan", "Anguilla", "Antiguan", "Antigua and Barbuda", "Argentine", "Argentina", "Argentinean", "Argentina", "Argentinian", "Argentina", "Armenian", "Armenia", "Aruban", "Aruba", "Austrian", "Austria", "Azerbaijani", "Azerbaijan", "Azeri", "Azerbaijan", "Bahamian", "Bahamas", "Bahraini", "Bahrain", "Bangladeshi", "Bangladesh", "Barbadian", "Barbados", "Bajan", "Barbados", "Belarusian", "Belarus", "Belgian", "Belgium", "Belizean", "Belize", "Beninese", "Benin", "Bermudian", "Bermuda", "Bermudan", "Bermuda", "Bhutanese", "Bhutan", "Bolivian", "Bolivia", "Bosnian", "Bosnia and Herzegovina", "Bosniak", "Bosnia and Herzegovina", "Herzegovinian", "Bosnia and Herzegovina", "Botswanan", "Botswana", "Brazilian", "Brazil", "British Virgin Island", "British Virgin Islands", "Bruneian", "Brunei", "Bulgarian", "Bulgaria", "Burkinabe", "Burkina Fasoa", "Burmese", "Burmab", "Burundian", "Burundi", "Cambodian", "Cambodia", "Cameroonian", "Cameroon", "Canadian", "Canada", "Cape Verdean", "Cape Verde", "Caymanian", "Cayman Islands", "Central African", "Central African Republic", "Chadian", "Chad", "Chilean", "Chile", "Chinese", "People's Republic of China", "See Taiwan", "Republic of China", "Christmas Island", "Christmas Island", "Cocos Island", "Cocos (Keeling) Islands", "Colombian", "Colombia", "Comorian", "Comoros", "Congolese", "Democratic Republic of the Congo", "Cook Island", "Cook Islands", "Costa Rican", "Costa Rica", "Ivorian", "Côte d'Ivoire", "Croatian", "Croatia", "Cuban", "Cuba", "Cypriot", "Cyprus", "Czech", "Czech Republic", "Danish", "Denmark", "Djiboutian", "Djibouti", "Dominicand", "Dominica", "Dominicane", "Dominican Republic", "Timorese", "East Timor", "Ecuadorian", "Ecuador", "Egyptian", "Egypt", "Salvadoran", "El Salvador", "English", "England", "Equatorial Guinean", "Equatorial Guinea", "Eritrean", "Eritrea", "Estonian", "Estonia", "Ethiopian", "Ethiopia", "Falkland Island", "Falkland Islands", "Faroese", "Faroe Islands", "Fijian", "Fiji", "Finnish", "Finland", "French", "France", "French Guianese", "French Guiana", "French Polynesian", "French Polynesia", "Gabonese", "Gabon", "Gambian", "Gambia", "Georgian", "Georgia", "German", "Germany", "Ghanaian", "Ghana", "Gibraltar", "Gibraltar", "Greek", "Greece", "Greenlandic", "Greenland", "Grenadian", "Grenada", "Guadeloupe", "Guadeloupe", "Guamanian", "Guam", "Guatemalan", "Guatemala", "Guinean", "Guinea", "Guyanese", "Guyana", "Haitian", "Haiti", "Honduran", "Honduras", "Hong Kong", "Hong Kong", "Hungarian", "Hungary", "Icelandic", "Iceland", "Indian", "India", "Indonesian", "Indonesia", "Iranian", "Iran", "Iraqi", "Iraq", "Manx", "Isle of Man", "Israeli", "Israel", "Italian", "Italy", "Jamaican", "Jamaica", "Japanese", "Japan", "Jordanian", "Jordan", "Kazakhstaniz", "Kazakhstan", "Kenyan", "Kenya", "I-Kiribati", "Kiribati", "North Korean", "North Korea", "South Korean", "South Korea", "Kosovar", "Kosovo", "Kuwaiti", "Kuwait", "Kyrgyzstani", "Kyrgyzstan", "Laotian", "Laos", "Latvian", "Latvia", "Lebanese", "Lebanon", "Basotho", "Lesotho", "Liberian", "Liberia", "Libyan", "Libya", "Liechtenstein", "Liechtenstein", "Lithuanian", "Lithuania", "Luxembourg", "Luxembourg", "Macanese", "Macau", "Macedonian", "Republic of Macedonia", "Malagasy", "Madagascar", "Malawian", "Malawi", "Malaysian", "Malaysia", "Maldivian", "Maldives", "Malian", "Mali", "Maltese", "Malta", "Marshallese", "Marshall Islands", "Martiniquais", "Martinique", "Mauritanian", "Mauritania", "Mauritian", "Mauritius", "Mahoran", "Mayotte", "Mexican", "Mexico", "Micronesian", "Micronesia", "Moldovan", "Moldova", "Monégasque", "Monaco", "Mongolian", "Mongolia", "Montenegrin", "Montenegro", "Montserratian", "Montserrat", "Moroccan", "Morocco", "Mozambican", "Mozambique", "Namibian", "Namibia", "Nauruan", "Nauru", "Nepali", "Nepal", "Dutch", "Netherlands", "Dutch Antillean", "Netherlands Antilles", "New Caledonian", "New Caledonia", "New Zealand", "New Zealand", "Nicaraguan", "Nicaragua", "Niuean", "Niue", "Nigerien", "Niger", "Nigerian", "Nigeria", "Norwegian", "Norway", "Northern Irish", "Northern Ireland", "Northern Marianan", "Northern Marianas", "Omani", "Oman", "Pakistani", "Pakistan", "Palestinian", "Palestinian territories", "Palauan", "Palau", "Panamanian", "Panama", "Papua New Guinean", "Papua New Guinea", "Paraguayan", "Paraguay", "Peruvian", "Peru", "Philippine", "Philippines", "Filipino", "Philippines", "Pitcairn Island", "Pitcairn Island", "Polish", "Poland", "Portuguese", "Portugal", "Puerto Rican", "Puerto Rico", "Qatari", "Qatar", "Irish", "Republic of Ireland", "Réunionese", "Réunion", "Romanian", "Romania", "Russian", "Russia", "Rwandan", "Rwanda", "St. Helenian", "St. Helena", "Kittitian", "St. Kitts and Nevis", "St. Lucian", "St. Lucia", "Saint-Pierrais", "Saint-Pierre and Miquelon", "St. Vincentian", "St. Vincent and the Grenadines", "Samoan", "Samoa", "Sammarinese", "San Marino", "São Toméan", "São Tomé and Príncipe", "Saudi", "Saudi Arabia", "Scottish", "Scotland", "Senegalese", "Senegal", "Serbian", "Serbia", "Seychellois", "Seychelles", "Sierra Leonean", "Sierra Leone", "Singaporean", "Singapore", "Slovak", "Slovakia", "Slovene", "Slovenia", "Slovenian", "Slovenia", "Solomon Island", "Solomon Islands", "Somali", "Somalia", "Somaliland", "Somaliland", "South African", "South Africa", "Spanish", "Spain", "Sri Lankan", "Sri Lanka", "Sudanese", "Sudan", "Surinamese", "Surinam", "Swazi", "Swaziland", "Swedish", "Sweden", "Swiss", "Switzerland", "Syrian", "Syria", "Taiwanese", "Taiwan", "Tajikistani", "Tajikistan", "Tanzanian", "Tanzania", "Thai", "Thailand", "Togolese", "Togo", "Tongan", "Tonga", "Trinidadian", "Trinidad and Tobago", "Tunisian", "Tunisia", "Turkish", "Turkey", "Turkmen", "Turkmenistan", "Tuvaluan", "Tuvalu", "Ugandan", "Uganda", "Ukrainian", "Ukraine", "Emirati", "United Arab Emirates", "British", "United Kingdom", "American", "United States of America", "Uruguayan", "Uruguay", "Uzbekistani", "Uzbekistan", "Uzbek", "Uzbekistan", "Vanuatuan", "Vanuatu", "Venezuelan", "Venezuela", "Vietnamese", "Vietnam", "Virgin Island", "Virgin Islands", "Welsh", "Wales", "Wallisian", "Wallis and Futuna", "Sahrawi", "Western Sahara", "Yemeni", "Yemen", "Zambian", "Zambia", "Zimbabwean", "Zimbabwe");

    /* loaded from: input_file:javatools/parsers/NameML$AbbreviationML.class */
    public static class AbbreviationML extends NameML {
        public AbbreviationML(String str) {
            super(str);
            if (laxAbbreviationPattern.matcher(str).matches()) {
            }
        }

        @Override // javatools.parsers.NameML
        public String normalize() {
            if (this.normalized == null) {
                this.normalized = super.normalize().toUpperCase();
            }
            return this.normalized;
        }

        @Override // javatools.parsers.NameML
        public String describe() {
            return "Abbreviation\n  Original: " + this.original + "\n  Normalized: " + normalize();
        }
    }

    /* loaded from: input_file:javatools/parsers/NameML$CompanyNameML.class */
    public static class CompanyNameML extends NameML {
        protected String name;
        protected String suffix;

        public CompanyNameML(String str) {
            super(str);
            Matcher matcher = laxCompanyPattern.matcher(str);
            if (matcher.matches()) {
                this.name = matcher.group(1);
                this.suffix = matcher.group(2);
            }
        }

        public String name() {
            return this.name;
        }

        public String suffix() {
            return this.suffix;
        }

        @Override // javatools.parsers.NameML
        public String normalize() {
            return this.name;
        }

        @Override // javatools.parsers.NameML
        public String describe() {
            return "CompanyName\n  Original: " + this.original + "\n  Name: " + this.name + "\n  Suffix: " + this.suffix + "\n  Normalized: " + normalize();
        }
    }

    /* loaded from: input_file:javatools/parsers/NameML$PersonNameML.class */
    public static class PersonNameML extends NameML {
        protected String myTitles;
        protected String myGivenNames;
        protected String myFamilyNamePrefix;
        protected String myAttributePrefix;
        protected String myFamilyName;
        protected String myAttribute;
        protected String myFamilyNameSuffix;
        protected String myRoman;
        protected String myCity;
        protected String myNickname;

        protected static String getComponent(Matcher matcher, int i) {
            if (matcher.group(i) == null || matcher.group(i).length() == 0) {
                return null;
            }
            String group = matcher.group(i);
            return group.matches(".+(?:[\\s_]++)") ? group.substring(0, group.length() - 1) : group.matches("(?:[\\s_]++).+") ? group.substring(1) : group;
        }

        public PersonNameML(String str, Language language) {
            super(str);
            Matcher matcher;
            Set<String> set;
            String replace = str.replace('_', ' ');
            if (language.equals(Language.ENGLISH)) {
                matcher = laxPersonNamePatternEn.matcher(replace);
            } else if (language.equals(Language.FRENCH)) {
                matcher = laxPersonNamePatternFr.matcher(replace);
            } else if (language.equals(Language.SPANISH)) {
                matcher = laxPersonNamePatternEs.matcher(replace);
            } else if (language.equals(Language.GERMAN)) {
                matcher = laxPersonNamePatternDe.matcher(replace);
            } else {
                if (!language.equals(Language.ITALIAN)) {
                    throw new IllegalArgumentException("Unsupported language");
                }
                matcher = laxPersonNamePatternIt.matcher(replace);
            }
            if (matcher.matches()) {
                this.myTitles = getComponent(matcher, 1);
                this.myGivenNames = getComponent(matcher, 2);
                this.myNickname = getComponent(matcher, 3);
                this.myFamilyName = getComponent(matcher, 6);
                this.myFamilyNamePrefix = getComponent(matcher, 5);
                String component = getComponent(matcher, 4);
                if (component != null) {
                    this.myAttributePrefix = component;
                    this.myAttribute = this.myFamilyName;
                    this.myFamilyName = null;
                }
                this.myFamilyNameSuffix = getComponent(matcher, 7);
                this.myRoman = getComponent(matcher, 8);
                this.myCity = getComponent(matcher, 9);
                if (this.myNickname == null) {
                    this.myNickname = getComponent(matcher, 10);
                }
                if (language.equals(Language.ENGLISH)) {
                    set = titlesForGivenNamesEn;
                } else if (language.equals(Language.FRENCH)) {
                    set = titlesForGivenNamesFr;
                } else if (language.equals(Language.SPANISH)) {
                    set = titlesForGivenNamesEs;
                } else if (language.equals(Language.GERMAN)) {
                    set = titlesForGivenNamesDe;
                } else {
                    if (!language.equals(Language.ITALIAN)) {
                        throw new IllegalArgumentException("Unsupported language");
                    }
                    set = titlesForGivenNamesIt;
                }
                if (this.myGivenNames == null && this.myTitles != null && set.contains(this.myTitles.toLowerCase())) {
                    this.myGivenNames = this.myFamilyName;
                    this.myFamilyName = null;
                }
                if (this.myGivenNames == null && this.myRoman != null) {
                    this.myGivenNames = this.myFamilyName;
                    this.myFamilyName = null;
                }
                if (this.myFamilyName == null || this.myGivenNames == null || !familyNameSuffixPattern.matcher(this.myFamilyName).matches()) {
                    return;
                }
                String[] split = this.myGivenNames.split("(?:[\\s_]++)");
                this.myFamilyNameSuffix = this.myFamilyName;
                this.myFamilyName = split[split.length - 1];
                this.myGivenNames = split.length == 1 ? null : this.myGivenNames.substring(0, this.myGivenNames.length() - this.myFamilyName.length());
            }
        }

        public String givenName() {
            if (this.myGivenNames == null) {
                return null;
            }
            return this.myGivenNames.indexOf(32) == -1 ? this.myGivenNames : this.myGivenNames.substring(0, this.myGivenNames.indexOf(32));
        }

        public String attribute() {
            return this.myAttribute;
        }

        public String attributePrefix() {
            return this.myAttributePrefix;
        }

        public String city() {
            return this.myCity;
        }

        public String nickname() {
            return this.myNickname;
        }

        public String familyName() {
            return this.myFamilyName;
        }

        public String familyNamePrefix() {
            return this.myFamilyNamePrefix;
        }

        public String familyNameSuffix() {
            return this.myFamilyNameSuffix;
        }

        public String givenNames() {
            return this.myGivenNames;
        }

        public String roman() {
            return this.myRoman;
        }

        public String titles() {
            return this.myTitles;
        }

        @Override // javatools.parsers.NameML
        public String normalize() {
            String givenNames = givenNames();
            if (this.myFamilyName == null) {
                if (givenNames == null) {
                    return original();
                }
                if (this.myRoman != null && givenNames != null) {
                    givenNames = givenNames + ' ' + this.myRoman;
                }
                if (this.myAttribute != null && givenNames != null) {
                    givenNames = givenNames + ' ' + this.myAttribute;
                }
                return givenNames;
            }
            String str = this.myFamilyName;
            if (this.myFamilyNameSuffix != null && this.myFamilyNameSuffix.matches("[jJ].*")) {
                str = str + ", Jr.";
            } else if (this.myFamilyNameSuffix != null && this.myFamilyNameSuffix.matches("[sS].*")) {
                str = str + ", Sr.";
            }
            if (givenNames != null) {
                str = givenNames + ' ' + str;
            }
            return str;
        }

        @Override // javatools.parsers.NameML
        public String describe() {
            return "PersonName\n  Original: " + this.original + "\n  Titles: " + titles() + "\n  Given Name: " + givenName() + "\n  Given Names: " + givenNames() + "\n  Nickname: " + nickname() + "\n  Family Name Prefix: " + familyNamePrefix() + "\n  Attribute Prefix: " + attributePrefix() + "\n  Family Name: " + familyName() + "\n  Attribute: " + attribute() + "\n  Family Name Suffix: " + familyNameSuffix() + "\n  Roman: " + roman() + "\n  City: " + city() + "\n  Normalized: " + normalize();
        }
    }

    public static final void init(NonsharedParameters nonsharedParameters) {
        init();
    }

    public static final void init(String str) {
        CONFIG_DIR = new File(str + "parsing/");
        init();
    }

    public static final void init() {
        if (hasBeenInitialized) {
            return;
        }
        titlePatternEn = createTitlePattern(Language.ENGLISH);
        titlePatternDe = createTitlePattern(Language.GERMAN);
        titlePatternFr = createTitlePattern(Language.FRENCH);
        titlePatternEs = createTitlePattern(Language.SPANISH);
        titlePatternIt = createTitlePattern(Language.ITALIAN);
        titlesForGivenNamesEn = readTextFileLinesSet("titles." + Language.ENGLISH.getId());
        titlesForGivenNamesDe = readTextFileLinesSet("titles." + Language.GERMAN.getId());
        titlesForGivenNamesEs = readTextFileLinesSet("titles." + Language.SPANISH.getId());
        titlesForGivenNamesFr = readTextFileLinesSet("titles." + Language.FRENCH.getId());
        titlesForGivenNamesIt = readTextFileLinesSet("titles." + Language.ITALIAN.getId());
        stopWordDE = readTextFileLinesSet("stopwords." + Language.GERMAN.getId());
        stopWordFR = readTextFileLinesSet("stopwords." + Language.FRENCH.getId());
        stopWordES = readTextFileLinesSet("stopwords." + Language.SPANISH.getId());
        stopWordEN = readTextFileLinesSet("stopwords." + Language.ENGLISH.getId());
        stopWordIT = readTextFileLinesSet("stopwords." + Language.ITALIAN.getId());
        laxPersonNamePatternEn = createLaxPersonNamePattern(titlePatternEn);
        laxPersonNamePatternDe = createLaxPersonNamePattern(titlePatternDe);
        laxPersonNamePatternEs = createLaxPersonNamePattern(titlePatternEs);
        laxPersonNamePatternFr = createLaxPersonNamePattern(titlePatternFr);
        laxPersonNamePatternIt = createLaxPersonNamePattern(titlePatternIt);
        safePersonNamePatternEn = createSafePersonNamePattern(titlePatternEn);
        safePersonNamePatternDe = createSafePersonNamePattern(titlePatternDe);
        safePersonNamePatternEs = createSafePersonNamePattern(titlePatternEs);
        safePersonNamePatternFr = createSafePersonNamePattern(titlePatternFr);
        safePersonNamePatternIt = createSafePersonNamePattern(titlePatternIt);
        hasBeenInitialized = true;
    }

    public static String mul(String str) {
        return "(?:" + str + "(?:[\\s_]++))*" + str;
    }

    public static String mulHyp(String str) {
        return "(?:" + str + "-)*" + str;
    }

    public static String opt(String str) {
        return "(?:" + str + ")?";
    }

    public static String optMul(String str) {
        return "(?:" + str + ")*";
    }

    public static String or(String str, String str2) {
        return "(?:" + str + "|" + str2 + ")";
    }

    public static String c(String str) {
        return "(" + str + ")";
    }

    public static boolean isFamilyNamePrefix(String str) {
        return familyNamePrefixPattern.matcher(str).matches();
    }

    public static boolean isAttributePrefix(String str) {
        return str.matches(attributePrefix);
    }

    public static boolean isPersonNameSuffix(String str) {
        return familyNameSuffixPattern.matcher(str).matches();
    }

    private static Pattern createTitlePattern(Language language) {
        StringBuilder sb = new StringBuilder();
        sb.append("\\b(?:");
        boolean z = true;
        try {
            Iterator<String> it = readTextFileLines("titles." + language.getId(), "UTF-8").iterator();
            while (it.hasNext()) {
                String trim = it.next().trim();
                if (!trim.startsWith("##") && trim.length() > 0) {
                    if (z) {
                        z = false;
                    } else {
                        sb.append('|');
                    }
                    sb.append(trim);
                }
            }
            sb.append(")");
            return Pattern.compile(sb.toString());
        } catch (IOException e) {
            return null;
        }
    }

    public static boolean isTitle(String str, Language language) {
        if (language.equals(Language.ENGLISH)) {
            return titlePatternEn.matcher(str).matches();
        }
        if (language.equals(Language.FRENCH)) {
            return titlePatternFr.matcher(str).matches();
        }
        if (language.equals(Language.GERMAN)) {
            return titlePatternDe.matcher(str).matches();
        }
        if (language.equals(Language.SPANISH)) {
            return titlePatternEs.matcher(str).matches();
        }
        if (language.equals(Language.ITALIAN)) {
            return titlePatternIt.matcher(str).matches();
        }
        throw new IllegalArgumentException("Unsupported Language");
    }

    public static boolean isCompanyNameSuffix(String str) {
        return companyNameSuffixPattern.matcher(str).matches();
    }

    public static boolean isName(String str) {
        return safeNamePattern.matcher(str).matches();
    }

    public static boolean isNames(String str) {
        return safeNamesPattern.matcher(str).matches();
    }

    public static boolean couldBeName(String str) {
        return laxNamePattern.matcher(str).matches();
    }

    public static boolean isStopWord(String str, Language language) {
        if (str == null) {
            return true;
        }
        return language == Language.ENGLISH ? stopWordEN.contains(str) : language == Language.FRENCH ? stopWordFR.contains(str) : language == Language.GERMAN ? stopWordDE.contains(str) : language == Language.ITALIAN ? stopWordIT.contains(str) : language == Language.SPANISH ? stopWordES.contains(str) : stopWordEN.contains(str);
    }

    public String toString() {
        return this.original;
    }

    public String normalize() {
        if (this.normalized == null) {
            this.normalized = this.original.replaceAll("(?:[\\s_]++)", "_").replaceAll("([\\P{L}&&[^\\d]&&[^_]])", "");
        }
        return this.normalized;
    }

    protected NameML(String str) {
        this.original = str;
    }

    public String describe() {
        return "Name\n  Original: " + this.original + "\n  Normalized: " + normalize();
    }

    public String original() {
        return this.original;
    }

    public static List<String> readTextFileLines(String str, String str2) throws IOException {
        if (str2 == null) {
            str2 = "UTF-8";
        }
        BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(getConfigFileStream(str), str2));
        ArrayList arrayList = new ArrayList();
        while (true) {
            String readLine = bufferedReader.readLine();
            if (readLine == null) {
                bufferedReader.close();
                return arrayList;
            }
            arrayList.add(readLine);
        }
    }

    public static InputStream getConfigFileStream(String str) throws FileNotFoundException {
        return CONFIG_DIR != null ? new FileInputStream(new File(CONFIG_DIR, str)) : NameML.class.getResourceAsStream(PARSINGRESOURCES_PATH + str);
    }

    public static Set<String> readTextFileLinesSet(String str) {
        try {
            List<String> readTextFileLines = readTextFileLines(str, "UTF-8");
            HashSet hashSet = new HashSet((int) (readTextFileLines.size() / 0.75d));
            Iterator<String> it = readTextFileLines.iterator();
            while (it.hasNext()) {
                String trim = it.next().trim();
                if (!trim.startsWith("##") && trim.length() > 0) {
                    hashSet.add(trim);
                }
            }
            return hashSet;
        } catch (IOException e) {
            e.printStackTrace();
            return null;
        }
    }

    public static boolean isAbbreviation(String str) {
        return safeAbbreviationPattern.matcher(str).matches();
    }

    public static boolean couldBeAbbreviation(String str) {
        return laxAbbreviationPattern.matcher(str).matches();
    }

    public static boolean isCompanyName(String str) {
        return safeCompanyPattern.matcher(str).matches();
    }

    public static boolean couldBeCompanyName(String str) {
        return laxCompanyPattern.matcher(str).matches();
    }

    private static Pattern createLaxPersonNamePattern(Pattern pattern) {
        return Pattern.compile(c(optMul(pattern.pattern() + "(?:[\\s_]++)")) + c(optMul(givenName + "(?:[\\s_]++)")) + opt(c("(?:'[^']')") + "(?:[\\s_]++)") + opt(c(attributePrefix) + "(?:[\\s_]++)") + opt(c("(?:[aA]l|[dD][ea]|[dD]el|[dD]e las|[bB]in|[dD]e la|[dD]e los|[dD]i|[zZ]u[mr]|[aA]m|[vV][oa]n de[rnm]|[vV][oa][nm]|[dD]o|[dD]')") + "(?:[\\s_]++)") + c(familyName) + opt("[,\\s_]++" + c("(?:CBE|DBE|GBE|[jJ]r\\.?|[jJ]unior|hijo|hija|P[hH]\\.?[dD]\\.?|KBE|MBE|M\\.?D\\.|OBE|[sS]enior|[sS]r\\.?)")) + opt("(?:[\\s_]++)" + c(roman)) + opt("(?:[\\s_]++)" + of + "(?:[\\s_]++)" + c("\\p{Lu}\\p{Ll}+")) + opt("(?:[\\s_]++)" + c("(?:'[^']')")));
    }

    private static Pattern createSafePersonNamePattern(Pattern pattern) {
        return Pattern.compile(pattern.pattern() + "(?:[\\s_]++)" + givenNames + "(?:[\\s_]++)" + opt("(?:[aA]l|[dD][ea]|[dD]el|[dD]e las|[bB]in|[dD]e la|[dD]e los|[dD]i|[zZ]u[mr]|[aA]m|[vV][oa]n de[rnm]|[vV][oa][nm]|[dD]o|[dD]')(?:[\\s_]++)") + familyName + opt("[,\\s_]++(?:CBE|DBE|GBE|[jJ]r\\.?|[jJ]unior|hijo|hija|P[hH]\\.?[dD]\\.?|KBE|MBE|M\\.?D\\.|OBE|[sS]enior|[sS]r\\.?)") + "|" + pattern.pattern() + "(?:[\\s_]++)" + opt("(?:[aA]l|[dD][ea]|[dD]el|[dD]e las|[bB]in|[dD]e la|[dD]e los|[dD]i|[zZ]u[mr]|[aA]m|[vV][oa]n de[rnm]|[vV][oa][nm]|[dD]o|[dD]')(?:[\\s_]++)") + familyName + opt("[,\\s_]++(?:CBE|DBE|GBE|[jJ]r\\.?|[jJ]unior|hijo|hija|P[hH]\\.?[dD]\\.?|KBE|MBE|M\\.?D\\.|OBE|[sS]enior|[sS]r\\.?)") + "|" + givenName + "(?:[\\s_]++)" + roman + "|" + givenNames + "(?:[\\s_]++)" + opt("(?:[aA]l|[dD][ea]|[dD]el|[dD]e las|[bB]in|[dD]e la|[dD]e los|[dD]i|[zZ]u[mr]|[aA]m|[vV][oa]n de[rnm]|[vV][oa][nm]|[dD]o|[dD]')(?:[\\s_]++)") + familyName + "[,\\s_]++(?:CBE|DBE|GBE|[jJ]r\\.?|[jJ]unior|hijo|hija|P[hH]\\.?[dD]\\.?|KBE|MBE|M\\.?D\\.|OBE|[sS]enior|[sS]r\\.?)|" + opt("(?:[aA]l|[dD][ea]|[dD]el|[dD]e las|[bB]in|[dD]e la|[dD]e los|[dD]i|[zZ]u[mr]|[aA]m|[vV][oa]n de[rnm]|[vV][oa][nm]|[dD]o|[dD]')(?:[\\s_]++)") + familyName + "[,\\s_]++(?:CBE|DBE|GBE|[jJ]r\\.?|[jJ]unior|hijo|hija|P[hH]\\.?[dD]\\.?|KBE|MBE|M\\.?D\\.|OBE|[sS]enior|[sS]r\\.?)|" + givenName + "(?:[\\s_]++)\\p{Lu}\\.(?:[\\s_]++)" + opt("(?:[aA]l|[dD][ea]|[dD]el|[dD]e las|[bB]in|[dD]e la|[dD]e los|[dD]i|[zZ]u[mr]|[aA]m|[vV][oa]n de[rnm]|[vV][oa][nm]|[dD]o|[dD]')(?:[\\s_]++)") + familyName + opt("[,\\s_]++(?:CBE|DBE|GBE|[jJ]r\\.?|[jJ]unior|hijo|hija|P[hH]\\.?[dD]\\.?|KBE|MBE|M\\.?D\\.|OBE|[sS]enior|[sS]r\\.?)") + "|" + givenName + "(?:[\\s_]++)\\p{Lu}\\.(?:[\\s_]++)\\p{Lu}\\.(?:[\\s_]++)" + opt("(?:[aA]l|[dD][ea]|[dD]el|[dD]e las|[bB]in|[dD]e la|[dD]e los|[dD]i|[zZ]u[mr]|[aA]m|[vV][oa]n de[rnm]|[vV][oa][nm]|[dD]o|[dD]')(?:[\\s_]++)") + familyName + opt("[,\\s_]++(?:CBE|DBE|GBE|[jJ]r\\.?|[jJ]unior|hijo|hija|P[hH]\\.?[dD]\\.?|KBE|MBE|M\\.?D\\.|OBE|[sS]enior|[sS]r\\.?)"));
    }

    public static boolean couldBePersonName(String str, Language language) {
        if (isCompanyName(str)) {
            return false;
        }
        if (language.equals(Language.ENGLISH)) {
            return laxPersonNamePatternEn.matcher(str).matches();
        }
        if (language.equals(Language.GERMAN)) {
            return laxPersonNamePatternDe.matcher(str).matches();
        }
        if (language.equals(Language.SPANISH)) {
            return laxPersonNamePatternEs.matcher(str).matches();
        }
        if (language.equals(Language.FRENCH)) {
            return laxPersonNamePatternFr.matcher(str).matches();
        }
        if (language.equals(Language.ITALIAN)) {
            return laxPersonNamePatternIt.matcher(str).matches();
        }
        throw new IllegalArgumentException("Unsupported language.");
    }

    public static boolean isPersonName(String str, Language language) {
        if (language.equals(Language.ENGLISH)) {
            return safePersonNamePatternEn.matcher(str).matches();
        }
        if (language.equals(Language.GERMAN)) {
            return safePersonNamePatternDe.matcher(str).matches();
        }
        if (language.equals(Language.SPANISH)) {
            return safePersonNamePatternEs.matcher(str).matches();
        }
        if (language.equals(Language.FRENCH)) {
            return safePersonNamePatternFr.matcher(str).matches();
        }
        if (language.equals(Language.ITALIAN)) {
            return safePersonNamePatternIt.matcher(str).matches();
        }
        throw new IllegalArgumentException("Unsupported language.");
    }

    public static boolean isUSState(String str) {
        return usStates.values().contains(str.replace('_', ' '));
    }

    public static boolean isUSStateAbbreviation(String str) {
        if (str.endsWith(".")) {
            str = Char17.cutLast(str);
        }
        return usStates.containsKey(str.toUpperCase());
    }

    public static String unabbreviateUSState(String str) {
        if (str.endsWith(".")) {
            str = Char17.cutLast(str);
        }
        return usStates.get(str.toUpperCase());
    }

    public static boolean isLanguage(String str) {
        return languageCodes.values().contains(Char17.upCaseFirst(str));
    }

    public static boolean isLanguageCode(String str) {
        return languageCodes.containsKey(str.toLowerCase());
    }

    public static String languageForCode(String str) {
        return languageCodes.get(str.toLowerCase());
    }

    public static boolean isNation(String str) {
        return nationality2country.values().contains(str);
    }

    public static boolean isNationality(String str) {
        return nationality2country.containsKey(str);
    }

    public static String nationForNationality(String str) {
        return nationality2country.get(str);
    }

    public static NameML of(String str, Language language) {
        return isCompanyName(str) ? new CompanyNameML(str) : couldBePersonName(str, language) ? new PersonNameML(str, language) : isAbbreviation(str) ? new AbbreviationML(str) : new NameML(str);
    }

    public static void main(String[] strArr) throws Exception {
        init();
        Announce.doing("Testing for English");
        Iterator<String> it = new FileLines("./testdata/NameParserTest.txt").iterator();
        while (it.hasNext()) {
            D.p(of(it.next(), Language.ENGLISH).describe());
        }
        Announce.done();
        Announce.doing("Testing for German");
        Iterator<String> it2 = new FileLines("./testdata/NameParserTestDe.txt").iterator();
        while (it2.hasNext()) {
            D.p(of(it2.next(), Language.GERMAN).describe());
        }
        Announce.done();
    }
}
