001package ca.uhn.fhir.rest.server.interceptor.s13n.standardizers; 002 003/*- 004 * #%L 005 * HAPI FHIR - Server Framework 006 * %% 007 * Copyright (C) 2014 - 2022 Smile CDR, Inc. 008 * %% 009 * Licensed under the Apache License, Version 2.0 (the "License"); 010 * you may not use this file except in compliance with the License. 011 * You may obtain a copy of the License at 012 * 013 * http://www.apache.org/licenses/LICENSE-2.0 014 * 015 * Unless required by applicable law or agreed to in writing, software 016 * distributed under the License is distributed on an "AS IS" BASIS, 017 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 018 * See the License for the specific language governing permissions and 019 * limitations under the License. 020 * #L% 021 */ 022 023import org.apache.commons.lang3.StringUtils; 024import org.apache.commons.text.CaseUtils; 025 026import java.util.Arrays; 027import java.util.HashSet; 028import java.util.Set; 029import java.util.stream.Collectors; 030 031/** 032 * Standardizes first name by capitalizing all characters following a separators (e.g. -, ') and removing noise characters. 033 */ 034public class FirstNameStandardizer extends TextStandardizer { 035 036 private Set<String> myDelimiters = new HashSet<>(); 037 038 public FirstNameStandardizer() { 039 super(); 040 041 initializeDelimiters(); 042 } 043 044 protected void initializeDelimiters() { 045 addDelimiters("-", "'"); 046 } 047 048 protected FirstNameStandardizer addDelimiters(String... theDelimiters) { 049 myDelimiters.addAll(Arrays.asList(theDelimiters)); 050 return this; 051 } 052 053 public String standardize(String theString) { 054 theString = replaceTranslates(theString); 055 056 return Arrays.stream(theString.split("\\s+")) 057 .map(this::standardizeNameToken) 058 .filter(s -> !StringUtils.isEmpty(s)) 059 .collect(Collectors.joining(" ")); 060 } 061 062 protected String capitalize(String theString) { 063 if (theString.length() == 0) { 064 return theString; 065 } 066 if (theString.length() == 1) { 067 return theString.toUpperCase(); 068 } 069 070 StringBuilder buf = new StringBuilder(theString.length()); 071 buf.append(Character.toUpperCase(theString.charAt(0))); 072 buf.append(theString.substring(1)); 073 return buf.toString(); 074 } 075 076 protected String standardizeNameToken(String theToken) { 077 if (theToken.isEmpty()) { 078 return theToken; 079 } 080 081 boolean isDelimitedToken = false; 082 for (String d : myDelimiters) { 083 if (theToken.contains(d)) { 084 isDelimitedToken = true; 085 theToken = standardizeDelimitedToken(theToken, d); 086 } 087 } 088 089 if (isDelimitedToken) { 090 return theToken; 091 } 092 093 theToken = removeNoise(theToken); 094 theToken = CaseUtils.toCamelCase(theToken, true); 095 return theToken; 096 } 097 098 protected String standardizeDelimitedToken(String theToken, String d) { 099 boolean isTokenTheDelimiter = theToken.equals(d); 100 if (isTokenTheDelimiter) { 101 return theToken; 102 } 103 104 String splitToken = checkForRegexp(d); 105 String[] splits = theToken.split(splitToken); 106 for (int i = 0; i < splits.length; i++) { 107 splits[i] = standardizeNameToken(splits[i]); 108 } 109 110 String retVal = join(splits, d); 111 if (theToken.startsWith(d)) { 112 retVal = d.concat(retVal); 113 } 114 if (theToken.endsWith(d)) { 115 retVal = retVal.concat(d); 116 } 117 return retVal; 118 } 119 120 protected String join(String[] theSplits, String theDelimiter) { 121 StringBuilder buf = new StringBuilder(); 122 for (int i = 0; i < theSplits.length; i++) { 123 String s = theSplits[i]; 124 if (s == null || s.isEmpty()) { 125 continue; 126 } 127 if (buf.length() != 0) { 128 buf.append(theDelimiter); 129 } 130 buf.append(s); 131 132 } 133 return buf.toString(); 134 } 135 136 protected String checkForRegexp(String theExpression) { 137 if (theExpression.equals(".") || theExpression.equals("|") 138 || theExpression.equals("(") || theExpression.equals(")")) { 139 return "\\".concat(theExpression); 140 } 141 return theExpression; 142 } 143 144 protected boolean isDelimiter(String theString) { 145 return myDelimiters.contains(theString); 146 } 147}