001package ca.uhn.fhir.util; 002 003/*- 004 * #%L 005 * HAPI FHIR - Core Library 006 * %% 007 * Copyright (C) 2014 - 2022 Smile CDR, Inc. 008 * %% 009 * Licensed under the Apache License, Version 2.0 (the "License"); 010 * you may not use this file except in compliance with the License. 011 * You may obtain a copy of the License at 012 * 013 * http://www.apache.org/licenses/LICENSE-2.0 014 * 015 * Unless required by applicable law or agreed to in writing, software 016 * distributed under the License is distributed on an "AS IS" BASIS, 017 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 018 * See the License for the specific language governing permissions and 019 * limitations under the License. 020 * #L% 021 */ 022 023import ca.uhn.fhir.context.phonetic.ApacheEncoder; 024import ca.uhn.fhir.context.phonetic.IPhoneticEncoder; 025import ca.uhn.fhir.context.phonetic.NumericEncoder; 026import ca.uhn.fhir.context.phonetic.PhoneticEncoderEnum; 027import org.apache.commons.codec.language.Caverphone1; 028import org.apache.commons.codec.language.Caverphone2; 029import org.apache.commons.codec.language.ColognePhonetic; 030import org.apache.commons.codec.language.DoubleMetaphone; 031import org.apache.commons.codec.language.MatchRatingApproachEncoder; 032import org.apache.commons.codec.language.Metaphone; 033import org.apache.commons.codec.language.Nysiis; 034import org.apache.commons.codec.language.RefinedSoundex; 035import org.apache.commons.codec.language.Soundex; 036import org.apache.commons.lang3.EnumUtils; 037import org.slf4j.Logger; 038import org.slf4j.LoggerFactory; 039 040public final class PhoneticEncoderUtil { 041 042 // embedded class only for parameter returns 043 private static class ParsedValues { 044 private final Integer maxCodeLength; 045 private final String encoderString; 046 047 public ParsedValues(String theString, Integer theMaxCode) { 048 maxCodeLength = theMaxCode; 049 encoderString = theString; 050 } 051 052 public Integer getMaxCodeLength() { 053 return maxCodeLength; 054 } 055 056 public String getEncoderString() { 057 return encoderString; 058 } 059 } 060 061 private static final Logger ourLog = LoggerFactory.getLogger(PhoneticEncoderUtil.class); 062 063 private PhoneticEncoderUtil() { 064 } 065 066 /** 067 * Creates the phonetic encoder wrapper from 068 * an input string. 069 * 070 * <p> 071 * String must be in the format of... 072 * </p> 073 * 074 * PhoneticEncoderEnum(MAX_LENGTH) 075 * 076 * @return The IPhoneticEncoder 077 */ 078 public static IPhoneticEncoder getEncoder(String theString) { 079 ParsedValues values = parseIntValue(theString); 080 String encoderType = values.getEncoderString(); 081 Integer encoderMaxString = values.getMaxCodeLength(); 082 083 IPhoneticEncoder encoder = getEncoderFromString(encoderType, encoderMaxString); 084 if (encoder != null) { 085 return encoder; 086 } 087 else { 088 ourLog.warn("Invalid phonetic param string " + theString); 089 return null; 090 } 091 } 092 093 private static ParsedValues parseIntValue(String theString) { 094 String encoderType = null; 095 Integer encoderMaxString = null; 096 097 int braceIndex = theString.indexOf("("); 098 if (braceIndex != -1) { 099 int len = theString.length(); 100 if (theString.charAt(len - 1) == ')') { 101 encoderType = theString.substring(0, braceIndex); 102 String num = theString.substring(braceIndex + 1, len - 1); 103 try { 104 encoderMaxString = Integer.parseInt(num); 105 } catch (NumberFormatException ex) { 106 // invalid number parse error 107 } 108 109 if (encoderMaxString == null 110 || encoderMaxString < 0) { 111 // parse error 112 ourLog.error("Invalid encoder max character length: " + num); 113 encoderType = null; 114 } 115 } 116 // else - parse error 117 } 118 else { 119 encoderType = theString; 120 } 121 122 return new ParsedValues(encoderType, encoderMaxString); 123 } 124 125 private static IPhoneticEncoder getEncoderFromString(String theName, Integer theMax) { 126 IPhoneticEncoder encoder = null; 127 PhoneticEncoderEnum enumVal = EnumUtils.getEnum(PhoneticEncoderEnum.class, theName); 128 129 if (enumVal != null) { 130 switch (enumVal) { 131 case CAVERPHONE1: 132 Caverphone1 caverphone1 = new Caverphone1(); 133 encoder = new ApacheEncoder(theName, caverphone1); 134 break; 135 case CAVERPHONE2: 136 Caverphone2 caverphone2 = new Caverphone2(); 137 encoder = new ApacheEncoder(theName, caverphone2); 138 break; 139 case COLOGNE: 140 ColognePhonetic colognePhonetic = new ColognePhonetic(); 141 encoder = new ApacheEncoder(theName, colognePhonetic); 142 break; 143 case DOUBLE_METAPHONE: 144 DoubleMetaphone doubleMetaphone = new DoubleMetaphone(); 145 if (theMax != null) { 146 doubleMetaphone.setMaxCodeLen(theMax); 147 } 148 encoder = new ApacheEncoder(theName, doubleMetaphone); 149 break; 150 case MATCH_RATING_APPROACH: 151 MatchRatingApproachEncoder matchRatingApproachEncoder = new MatchRatingApproachEncoder(); 152 encoder = new ApacheEncoder(theName, matchRatingApproachEncoder); 153 break; 154 case METAPHONE: 155 Metaphone metaphone = new Metaphone(); 156 if (theMax != null) { 157 metaphone.setMaxCodeLen(theMax); 158 } 159 encoder = new ApacheEncoder(theName, metaphone); 160 break; 161 case NYSIIS: 162 Nysiis nysiis = new Nysiis(); 163 encoder = new ApacheEncoder(theName, nysiis); 164 break; 165 case NYSIIS_LONG: 166 Nysiis nysiis1_long = new Nysiis(false); 167 encoder = new ApacheEncoder(theName, nysiis1_long); 168 break; 169 case REFINED_SOUNDEX: 170 RefinedSoundex refinedSoundex = new RefinedSoundex(); 171 encoder = new ApacheEncoder(theName, refinedSoundex); 172 break; 173 case SOUNDEX: 174 Soundex soundex = new Soundex(); 175 // soundex has deprecated setting the max size 176 encoder = new ApacheEncoder(theName, soundex); 177 break; 178 case NUMERIC: 179 encoder = new NumericEncoder(); 180 break; 181 default: 182 // we don't ever expect to be here 183 // this log message is purely for devs who update this 184 // enum, but not this method 185 ourLog.error("Unhandled PhoneticParamEnum value " + enumVal.name()); 186 break; 187 } 188 } 189 return encoder; 190 } 191}