001package ca.uhn.fhir.rest.server.interceptor.s13n.standardizers;
002
003/*-
004 * #%L
005 * HAPI FHIR - Server Framework
006 * %%
007 * Copyright (C) 2014 - 2022 Smile CDR, Inc.
008 * %%
009 * Licensed under the Apache License, Version 2.0 (the "License");
010 * you may not use this file except in compliance with the License.
011 * You may obtain a copy of the License at
012 *
013 *      http://www.apache.org/licenses/LICENSE-2.0
014 *
015 * Unless required by applicable law or agreed to in writing, software
016 * distributed under the License is distributed on an "AS IS" BASIS,
017 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
018 * See the License for the specific language governing permissions and
019 * limitations under the License.
020 * #L%
021 */
022
023import org.apache.commons.lang3.StringUtils;
024import org.apache.commons.text.CaseUtils;
025
026import java.util.Arrays;
027import java.util.HashSet;
028import java.util.Set;
029import java.util.stream.Collectors;
030
031/**
032 * Standardizes first name by capitalizing all characters following a separators (e.g. -, ') and removing noise characters.
033 */
034public class FirstNameStandardizer extends TextStandardizer {
035
036        private Set<String> myDelimiters = new HashSet<>();
037
038        public FirstNameStandardizer() {
039                super();
040
041                initializeDelimiters();
042        }
043
044        protected void initializeDelimiters() {
045                addDelimiters("-", "'");
046        }
047
048        protected FirstNameStandardizer addDelimiters(String... theDelimiters) {
049                myDelimiters.addAll(Arrays.asList(theDelimiters));
050                return this;
051        }
052
053        public String standardize(String theString) {
054                theString = replaceTranslates(theString);
055
056                return Arrays.stream(theString.split("\\s+"))
057                        .map(this::standardizeNameToken)
058                        .filter(s -> !StringUtils.isEmpty(s))
059                        .collect(Collectors.joining(" "));
060        }
061
062        protected String capitalize(String theString) {
063                if (theString.length() == 0) {
064                        return theString;
065                }
066                if (theString.length() == 1) {
067                        return theString.toUpperCase();
068                }
069
070                StringBuilder buf = new StringBuilder(theString.length());
071                buf.append(Character.toUpperCase(theString.charAt(0)));
072                buf.append(theString.substring(1));
073                return buf.toString();
074        }
075
076        protected String standardizeNameToken(String theToken) {
077                if (theToken.isEmpty()) {
078                        return theToken;
079                }
080
081                boolean isDelimitedToken = false;
082                for (String d : myDelimiters) {
083                        if (theToken.contains(d)) {
084                                isDelimitedToken = true;
085                                theToken = standardizeDelimitedToken(theToken, d);
086                        }
087                }
088
089                if (isDelimitedToken) {
090                        return theToken;
091                }
092
093                theToken = removeNoise(theToken);
094                theToken = CaseUtils.toCamelCase(theToken, true);
095                return theToken;
096        }
097
098        protected String standardizeDelimitedToken(String theToken, String d) {
099                boolean isTokenTheDelimiter = theToken.equals(d);
100                if (isTokenTheDelimiter) {
101                        return theToken;
102                }
103
104                String splitToken = checkForRegexp(d);
105                String[] splits = theToken.split(splitToken);
106                for (int i = 0; i < splits.length; i++) {
107                        splits[i] = standardizeNameToken(splits[i]);
108                }
109
110                String retVal = join(splits, d);
111                if (theToken.startsWith(d)) {
112                        retVal = d.concat(retVal);
113                }
114                if (theToken.endsWith(d)) {
115                        retVal = retVal.concat(d);
116                }
117                return retVal;
118        }
119
120        protected String join(String[] theSplits, String theDelimiter) {
121                StringBuilder buf = new StringBuilder();
122                for (int i = 0; i < theSplits.length; i++) {
123                        String s = theSplits[i];
124                        if (s == null || s.isEmpty()) {
125                                continue;
126                        }
127                        if (buf.length() != 0) {
128                                buf.append(theDelimiter);
129                        }
130                        buf.append(s);
131
132                }
133                return buf.toString();
134        }
135
136        protected String checkForRegexp(String theExpression) {
137                if (theExpression.equals(".") || theExpression.equals("|")
138                        || theExpression.equals("(") || theExpression.equals(")")) {
139                        return "\\".concat(theExpression);
140                }
141                return theExpression;
142        }
143
144        protected boolean isDelimiter(String theString) {
145                return myDelimiters.contains(theString);
146        }
147}