001package ca.uhn.fhir.util;
002
003/*-
004 * #%L
005 * HAPI FHIR - Core Library
006 * %%
007 * Copyright (C) 2014 - 2022 Smile CDR, Inc.
008 * %%
009 * Licensed under the Apache License, Version 2.0 (the "License");
010 * you may not use this file except in compliance with the License.
011 * You may obtain a copy of the License at
012 *
013 *      http://www.apache.org/licenses/LICENSE-2.0
014 *
015 * Unless required by applicable law or agreed to in writing, software
016 * distributed under the License is distributed on an "AS IS" BASIS,
017 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
018 * See the License for the specific language governing permissions and
019 * limitations under the License.
020 * #L%
021 */
022
023import ca.uhn.fhir.context.phonetic.ApacheEncoder;
024import ca.uhn.fhir.context.phonetic.IPhoneticEncoder;
025import ca.uhn.fhir.context.phonetic.NumericEncoder;
026import ca.uhn.fhir.context.phonetic.PhoneticEncoderEnum;
027import org.apache.commons.codec.language.Caverphone1;
028import org.apache.commons.codec.language.Caverphone2;
029import org.apache.commons.codec.language.ColognePhonetic;
030import org.apache.commons.codec.language.DoubleMetaphone;
031import org.apache.commons.codec.language.MatchRatingApproachEncoder;
032import org.apache.commons.codec.language.Metaphone;
033import org.apache.commons.codec.language.Nysiis;
034import org.apache.commons.codec.language.RefinedSoundex;
035import org.apache.commons.codec.language.Soundex;
036import org.apache.commons.lang3.EnumUtils;
037import org.slf4j.Logger;
038import org.slf4j.LoggerFactory;
039
040public final class PhoneticEncoderUtil {
041
042        // embedded class only for parameter returns
043        private static class ParsedValues {
044                private final Integer maxCodeLength;
045                private final String encoderString;
046
047                public ParsedValues(String theString, Integer theMaxCode) {
048                        maxCodeLength = theMaxCode;
049                        encoderString = theString;
050                }
051
052                public Integer getMaxCodeLength() {
053                        return maxCodeLength;
054                }
055
056                public String getEncoderString() {
057                        return encoderString;
058                }
059        }
060
061        private static final Logger ourLog = LoggerFactory.getLogger(PhoneticEncoderUtil.class);
062
063        private PhoneticEncoderUtil() {
064        }
065
066        /**
067         * Creates the phonetic encoder wrapper from
068         * an input string.
069         *
070         * <p>
071         * String must be in the format of...
072         *      </p>
073         *
074         * PhoneticEncoderEnum(MAX_LENGTH)
075         *
076         * @return The IPhoneticEncoder
077         */
078        public static IPhoneticEncoder getEncoder(String theString) {
079                ParsedValues values = parseIntValue(theString);
080                String encoderType = values.getEncoderString();
081                Integer encoderMaxString = values.getMaxCodeLength();
082
083                IPhoneticEncoder encoder = getEncoderFromString(encoderType, encoderMaxString);
084                if (encoder != null) {
085                        return encoder;
086                }
087                else {
088                        ourLog.warn("Invalid phonetic param string " + theString);
089                        return null;
090                }
091        }
092
093        private static ParsedValues parseIntValue(String theString) {
094                String encoderType = null;
095                Integer encoderMaxString = null;
096
097                int braceIndex = theString.indexOf("(");
098                if (braceIndex != -1) {
099                        int len = theString.length();
100                        if (theString.charAt(len - 1) == ')') {
101                                encoderType = theString.substring(0, braceIndex);
102                                String num = theString.substring(braceIndex + 1, len - 1);
103                                try {
104                                        encoderMaxString = Integer.parseInt(num);
105                                } catch (NumberFormatException ex) {
106                                        // invalid number parse error
107                                }
108
109                                if (encoderMaxString == null
110                                                || encoderMaxString < 0) {
111                                        // parse error
112                                        ourLog.error("Invalid encoder max character length: " + num);
113                                        encoderType = null;
114                                }
115                        }
116                        // else - parse error
117                }
118                else {
119                        encoderType = theString;
120                }
121
122                return new ParsedValues(encoderType, encoderMaxString);
123        }
124
125        private static IPhoneticEncoder getEncoderFromString(String theName, Integer theMax) {
126                IPhoneticEncoder encoder = null;
127                PhoneticEncoderEnum enumVal = EnumUtils.getEnum(PhoneticEncoderEnum.class, theName);
128
129                if (enumVal != null) {
130                        switch (enumVal) {
131                                case CAVERPHONE1:
132                                        Caverphone1 caverphone1 = new Caverphone1();
133                                        encoder = new ApacheEncoder(theName, caverphone1);
134                                        break;
135                                case CAVERPHONE2:
136                                        Caverphone2 caverphone2 = new Caverphone2();
137                                        encoder = new ApacheEncoder(theName, caverphone2);
138                                        break;
139                                case COLOGNE:
140                                        ColognePhonetic colognePhonetic = new ColognePhonetic();
141                                        encoder = new ApacheEncoder(theName, colognePhonetic);
142                                        break;
143                                case DOUBLE_METAPHONE:
144                                        DoubleMetaphone doubleMetaphone = new DoubleMetaphone();
145                                        if (theMax != null) {
146                                                doubleMetaphone.setMaxCodeLen(theMax);
147                                        }
148                                        encoder = new ApacheEncoder(theName, doubleMetaphone);
149                                        break;
150                                case MATCH_RATING_APPROACH:
151                                        MatchRatingApproachEncoder matchRatingApproachEncoder = new MatchRatingApproachEncoder();
152                                        encoder = new ApacheEncoder(theName, matchRatingApproachEncoder);
153                                        break;
154                                case METAPHONE:
155                                        Metaphone metaphone = new Metaphone();
156                                        if (theMax != null) {
157                                                metaphone.setMaxCodeLen(theMax);
158                                        }
159                                        encoder = new ApacheEncoder(theName, metaphone);
160                                        break;
161                                case NYSIIS:
162                                        Nysiis nysiis = new Nysiis();
163                                        encoder = new ApacheEncoder(theName, nysiis);
164                                        break;
165                                case NYSIIS_LONG:
166                                        Nysiis nysiis1_long = new Nysiis(false);
167                                        encoder = new ApacheEncoder(theName, nysiis1_long);
168                                        break;
169                                case REFINED_SOUNDEX:
170                                        RefinedSoundex refinedSoundex = new RefinedSoundex();
171                                        encoder = new ApacheEncoder(theName, refinedSoundex);
172                                        break;
173                                case SOUNDEX:
174                                        Soundex soundex = new Soundex();
175                                        // soundex has deprecated setting the max size
176                                        encoder = new ApacheEncoder(theName, soundex);
177                                        break;
178                                case NUMERIC:
179                                        encoder = new NumericEncoder();
180                                        break;
181                                default:
182                                        // we don't ever expect to be here
183                                        // this log message is purely for devs who update this
184                                        // enum, but not this method
185                                        ourLog.error("Unhandled PhoneticParamEnum value " + enumVal.name());
186                                        break;
187                        }
188                }
189                return encoder;
190        }
191}