001package ca.uhn.fhir.util; 002 003import ca.uhn.fhir.context.FhirContext; 004import ca.uhn.fhir.context.RuntimeResourceDefinition; 005import ca.uhn.fhir.i18n.Msg; 006import ca.uhn.fhir.model.primitive.IdDt; 007import ca.uhn.fhir.parser.DataFormatException; 008import ca.uhn.fhir.rest.api.Constants; 009import ca.uhn.fhir.rest.server.exceptions.InvalidRequestException; 010import com.google.common.escape.Escaper; 011import com.google.common.net.PercentEscaper; 012import org.apache.commons.lang3.StringUtils; 013import org.apache.http.NameValuePair; 014import org.apache.http.client.utils.URLEncodedUtils; 015import org.apache.http.message.BasicNameValuePair; 016import org.hl7.fhir.instance.model.api.IPrimitiveType; 017 018import javax.annotation.Nonnull; 019import java.io.UnsupportedEncodingException; 020import java.net.MalformedURLException; 021import java.net.URI; 022import java.net.URISyntaxException; 023import java.net.URL; 024import java.net.URLDecoder; 025import java.util.ArrayList; 026import java.util.Collection; 027import java.util.HashMap; 028import java.util.List; 029import java.util.Map; 030import java.util.Map.Entry; 031import java.util.StringTokenizer; 032import java.util.stream.Collectors; 033 034import static org.apache.commons.lang3.StringUtils.defaultIfBlank; 035import static org.apache.commons.lang3.StringUtils.defaultString; 036import static org.apache.commons.lang3.StringUtils.endsWith; 037import static org.apache.commons.lang3.StringUtils.isBlank; 038 039/* 040 * #%L 041 * HAPI FHIR - Core Library 042 * %% 043 * Copyright (C) 2014 - 2022 Smile CDR, Inc. 044 * %% 045 * Licensed under the Apache License, Version 2.0 (the "License"); 046 * you may not use this file except in compliance with the License. 047 * You may obtain a copy of the License at 048 * 049 * http://www.apache.org/licenses/LICENSE-2.0 050 * 051 * Unless required by applicable law or agreed to in writing, software 052 * distributed under the License is distributed on an "AS IS" BASIS, 053 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 054 * See the License for the specific language governing permissions and 055 * limitations under the License. 056 * #L% 057 */ 058 059public class UrlUtil { 060 private static final org.slf4j.Logger ourLog = org.slf4j.LoggerFactory.getLogger(UrlUtil.class); 061 062 private static final String URL_FORM_PARAMETER_OTHER_SAFE_CHARS = "-_.*"; 063 private static final Escaper PARAMETER_ESCAPER = new PercentEscaper(URL_FORM_PARAMETER_OTHER_SAFE_CHARS, false); 064 065 public static String sanitizeBaseUrl(String theBaseUrl) { 066 return theBaseUrl.replaceAll("[^a-zA-Z0-9:/._-]", ""); 067 } 068 069 public static class UrlParts { 070 private String myParams; 071 private String myResourceId; 072 private String myResourceType; 073 private String myVersionId; 074 075 public String getParams() { 076 return myParams; 077 } 078 079 public void setParams(String theParams) { 080 myParams = theParams; 081 } 082 083 public String getResourceId() { 084 return myResourceId; 085 } 086 087 public void setResourceId(String theResourceId) { 088 myResourceId = theResourceId; 089 } 090 091 public String getResourceType() { 092 return myResourceType; 093 } 094 095 public void setResourceType(String theResourceType) { 096 myResourceType = theResourceType; 097 } 098 099 public String getVersionId() { 100 return myVersionId; 101 } 102 103 public void setVersionId(String theVersionId) { 104 myVersionId = theVersionId; 105 } 106 } 107 108 /** 109 * Resolve a relative URL - THIS METHOD WILL NOT FAIL but will log a warning and return theEndpoint if the input is invalid. 110 */ 111 public static String constructAbsoluteUrl(String theBase, String theEndpoint) { 112 if (theEndpoint == null) { 113 return null; 114 } 115 if (isAbsolute(theEndpoint)) { 116 return theEndpoint; 117 } 118 if (theBase == null) { 119 return theEndpoint; 120 } 121 122 try { 123 return new URL(new URL(theBase), theEndpoint).toString(); 124 } catch (MalformedURLException e) { 125 ourLog.warn("Failed to resolve relative URL[" + theEndpoint + "] against absolute base[" + theBase + "]", e); 126 return theEndpoint; 127 } 128 } 129 130 public static String constructRelativeUrl(String theParentExtensionUrl, String theExtensionUrl) { 131 if (theParentExtensionUrl == null) { 132 return theExtensionUrl; 133 } 134 if (theExtensionUrl == null) { 135 return null; 136 } 137 138 int parentLastSlashIdx = theParentExtensionUrl.lastIndexOf('/'); 139 int childLastSlashIdx = theExtensionUrl.lastIndexOf('/'); 140 141 if (parentLastSlashIdx == -1 || childLastSlashIdx == -1) { 142 return theExtensionUrl; 143 } 144 145 if (parentLastSlashIdx != childLastSlashIdx) { 146 return theExtensionUrl; 147 } 148 149 if (!theParentExtensionUrl.substring(0, parentLastSlashIdx).equals(theExtensionUrl.substring(0, parentLastSlashIdx))) { 150 return theExtensionUrl; 151 } 152 153 if (theExtensionUrl.length() > parentLastSlashIdx) { 154 return theExtensionUrl.substring(parentLastSlashIdx + 1); 155 } 156 157 return theExtensionUrl; 158 } 159 160 /** 161 * URL encode a value according to RFC 3986 162 * <p> 163 * This method is intended to be applied to an individual parameter 164 * name or value. For example, if you are creating the URL 165 * <code>http://example.com/fhir/Patient?key=føø</code> 166 * it would be appropriate to pass the string "føø" to this method, 167 * but not appropriate to pass the entire URL since characters 168 * such as "/" and "?" would also be escaped. 169 * </P> 170 */ 171 public static String escapeUrlParam(String theUnescaped) { 172 if (theUnescaped == null) { 173 return null; 174 } 175 return PARAMETER_ESCAPER.escape(theUnescaped); 176 } 177 178 /** 179 * Applies the same encodong as {@link #escapeUrlParam(String)} but against all 180 * values in a collection 181 */ 182 public static List<String> escapeUrlParams(@Nonnull Collection<String> theUnescaped) { 183 return theUnescaped 184 .stream() 185 .map(t -> PARAMETER_ESCAPER.escape(t)) 186 .collect(Collectors.toList()); 187 } 188 189 190 public static boolean isAbsolute(String theValue) { 191 String value = theValue.toLowerCase(); 192 return value.startsWith("http://") || value.startsWith("https://"); 193 } 194 195 public static boolean isNeedsSanitization(CharSequence theString) { 196 if (theString != null) { 197 for (int i = 0; i < theString.length(); i++) { 198 char nextChar = theString.charAt(i); 199 switch (nextChar) { 200 case '\'': 201 case '"': 202 case '<': 203 case '>': 204 case '\n': 205 case '\r': 206 return true; 207 } 208 if (nextChar < ' ') { 209 return true; 210 } 211 } 212 } 213 return false; 214 } 215 216 public static boolean isValid(String theUrl) { 217 if (theUrl == null || theUrl.length() < 8) { 218 return false; 219 } 220 221 String url = theUrl.toLowerCase(); 222 if (url.charAt(0) != 'h') { 223 return false; 224 } 225 if (url.charAt(1) != 't') { 226 return false; 227 } 228 if (url.charAt(2) != 't') { 229 return false; 230 } 231 if (url.charAt(3) != 'p') { 232 return false; 233 } 234 int slashOffset; 235 if (url.charAt(4) == ':') { 236 slashOffset = 5; 237 } else if (url.charAt(4) == 's') { 238 if (url.charAt(5) != ':') { 239 return false; 240 } 241 slashOffset = 6; 242 } else { 243 return false; 244 } 245 246 if (url.charAt(slashOffset) != '/') { 247 return false; 248 } 249 if (url.charAt(slashOffset + 1) != '/') { 250 return false; 251 } 252 253 return true; 254 } 255 256 public static RuntimeResourceDefinition parseUrlResourceType(FhirContext theCtx, String theUrl) throws DataFormatException { 257 String url = theUrl; 258 int paramIndex = url.indexOf('?'); 259 260 // Change pattern of "Observation/?param=foo" into "Observation?param=foo" 261 if (paramIndex > 0 && url.charAt(paramIndex - 1) == '/') { 262 url = url.substring(0, paramIndex - 1) + url.substring(paramIndex); 263 paramIndex--; 264 } 265 266 String resourceName = url.substring(0, paramIndex); 267 if (resourceName.contains("/")) { 268 resourceName = resourceName.substring(resourceName.lastIndexOf('/') + 1); 269 } 270 return theCtx.getResourceDefinition(resourceName); 271 } 272 273 public static Map<String, String[]> parseQueryString(String theQueryString) { 274 HashMap<String, List<String>> map = new HashMap<>(); 275 parseQueryString(theQueryString, map); 276 return toQueryStringMap(map); 277 } 278 279 private static void parseQueryString(String theQueryString, HashMap<String, List<String>> map) { 280 String query = defaultString(theQueryString); 281 if (query.startsWith("?")) { 282 query = query.substring(1); 283 } 284 285 286 StringTokenizer tok = new StringTokenizer(query, "&"); 287 while (tok.hasMoreTokens()) { 288 String nextToken = tok.nextToken(); 289 if (isBlank(nextToken)) { 290 continue; 291 } 292 293 int equalsIndex = nextToken.indexOf('='); 294 String nextValue; 295 String nextKey; 296 if (equalsIndex == -1) { 297 nextKey = nextToken; 298 nextValue = ""; 299 } else { 300 nextKey = nextToken.substring(0, equalsIndex); 301 nextValue = nextToken.substring(equalsIndex + 1); 302 } 303 304 nextKey = unescape(nextKey); 305 nextValue = unescape(nextValue); 306 307 List<String> list = map.computeIfAbsent(nextKey, k -> new ArrayList<>()); 308 list.add(nextValue); 309 } 310 } 311 312 public static Map<String, String[]> parseQueryStrings(String... theQueryString) { 313 HashMap<String, List<String>> map = new HashMap<>(); 314 for (String next : theQueryString) { 315 parseQueryString(next, map); 316 } 317 return toQueryStringMap(map); 318 } 319 320 /** 321 * Normalizes canonical URLs for comparison. Trailing "/" is stripped, 322 * and any version identifiers or fragment hash is removed 323 */ 324 public static String normalizeCanonicalUrlForComparison(String theUrl) { 325 String retVal; 326 try { 327 retVal = new URI(theUrl).normalize().toString(); 328 } catch (URISyntaxException e) { 329 retVal = theUrl; 330 } 331 while (endsWith(retVal, "/")) { 332 retVal = retVal.substring(0, retVal.length() - 1); 333 } 334 int hashOrPipeIndex = StringUtils.indexOfAny(retVal, '#', '|'); 335 if (hashOrPipeIndex != -1) { 336 retVal = retVal.substring(0, hashOrPipeIndex); 337 } 338 return retVal; 339 } 340 341 /** 342 * Parse a URL in one of the following forms: 343 * <ul> 344 * <li>[Resource Type]?[Search Params] 345 * <li>[Resource Type]/[Resource ID] 346 * <li>[Resource Type]/[Resource ID]/_history/[Version ID] 347 * </ul> 348 */ 349 public static UrlParts parseUrl(String theUrl) { 350 String url = theUrl; 351 UrlParts retVal = new UrlParts(); 352 if (url.startsWith("http")) { 353 int qmIdx = url.indexOf('?'); 354 if (qmIdx != -1) { 355 retVal.setParams(defaultIfBlank(url.substring(qmIdx + 1), null)); 356 url = url.substring(0, qmIdx); 357 } 358 359 IdDt id = new IdDt(url); 360 retVal.setResourceType(id.getResourceType()); 361 retVal.setResourceId(id.getIdPart()); 362 retVal.setVersionId(id.getVersionIdPart()); 363 return retVal; 364 } 365 366 int parsingStart = 0; 367 if (url.length() > 2) { 368 if (url.charAt(0) == '/') { 369 if (Character.isLetter(url.charAt(1))) { 370 parsingStart = 1; 371 } 372 } 373 } 374 375 int nextStart = parsingStart; 376 boolean nextIsHistory = false; 377 378 for (int idx = parsingStart; idx < url.length(); idx++) { 379 char nextChar = url.charAt(idx); 380 boolean atEnd = (idx + 1) == url.length(); 381 if (nextChar == '?' || nextChar == '/' || atEnd) { 382 int endIdx = (atEnd && nextChar != '?') ? idx + 1 : idx; 383 String nextSubstring = url.substring(nextStart, endIdx); 384 if (retVal.getResourceType() == null) { 385 retVal.setResourceType(nextSubstring); 386 } else if (retVal.getResourceId() == null) { 387 retVal.setResourceId(nextSubstring); 388 } else if (nextIsHistory) { 389 retVal.setVersionId(nextSubstring); 390 } else { 391 if (nextSubstring.equals(Constants.URL_TOKEN_HISTORY)) { 392 nextIsHistory = true; 393 } else { 394 throw new InvalidRequestException(Msg.code(1742) + "Invalid FHIR resource URL: " + url); 395 } 396 } 397 if (nextChar == '?') { 398 if (url.length() > idx + 1) { 399 retVal.setParams(url.substring(idx + 1)); 400 } 401 break; 402 } 403 nextStart = idx + 1; 404 } 405 } 406 407 return retVal; 408 409 } 410 411 /** 412 * This method specifically HTML-encodes the " and 413 * < characters in order to prevent injection attacks 414 */ 415 public static String sanitizeUrlPart(IPrimitiveType<?> theString) { 416 String retVal = null; 417 if (theString != null) { 418 retVal = sanitizeUrlPart(theString.getValueAsString()); 419 } 420 return retVal; 421 } 422 423 /** 424 * This method specifically HTML-encodes the " and 425 * < characters in order to prevent injection attacks. 426 * <p> 427 * The following characters are escaped: 428 * <ul> 429 * <li>'</li> 430 * <li>"</li> 431 * <li><</li> 432 * <li>></li> 433 * <li>\n (newline)</li> 434 * </ul> 435 */ 436 public static String sanitizeUrlPart(CharSequence theString) { 437 if (theString == null) { 438 return null; 439 } 440 441 boolean needsSanitization = isNeedsSanitization(theString); 442 443 if (needsSanitization) { 444 // Ok, we're sanitizing 445 StringBuilder buffer = new StringBuilder(theString.length() + 10); 446 for (int j = 0; j < theString.length(); j++) { 447 448 char nextChar = theString.charAt(j); 449 switch (nextChar) { 450 /* 451 * NB: If you add a constant here, you also need to add it 452 * to isNeedsSanitization()!! 453 */ 454 case '\'': 455 buffer.append("'"); 456 break; 457 case '"': 458 buffer.append("""); 459 break; 460 case '<': 461 buffer.append("<"); 462 break; 463 case '>': 464 buffer.append(">"); 465 break; 466 case '\n': 467 buffer.append(" "); 468 break; 469 case '\r': 470 buffer.append(" "); 471 break; 472 default: 473 if (nextChar >= ' ') { 474 buffer.append(nextChar); 475 } 476 break; 477 } 478 479 } // for build escaped string 480 481 return buffer.toString(); 482 } 483 484 return theString.toString(); 485 } 486 487 /** 488 * Applies the same logic as {@link #sanitizeUrlPart(CharSequence)} but against an array, returning an array with the 489 * same strings as the input but with sanitization applied 490 */ 491 public static String[] sanitizeUrlPart(String[] theParameterValues) { 492 String[] retVal = null; 493 if (theParameterValues != null) { 494 retVal = new String[theParameterValues.length]; 495 for (int i = 0; i < theParameterValues.length; i++) { 496 retVal[i] = sanitizeUrlPart(theParameterValues[i]); 497 } 498 } 499 return retVal; 500 } 501 502 private static Map<String, String[]> toQueryStringMap(HashMap<String, List<String>> map) { 503 HashMap<String, String[]> retVal = new HashMap<>(); 504 for (Entry<String, List<String>> nextEntry : map.entrySet()) { 505 retVal.put(nextEntry.getKey(), nextEntry.getValue().toArray(new String[0])); 506 } 507 return retVal; 508 } 509 510 public static String unescape(String theString) { 511 if (theString == null) { 512 return null; 513 } 514 for (int i = 0; i < theString.length(); i++) { 515 char nextChar = theString.charAt(i); 516 if (nextChar == '%' || nextChar == '+') { 517 try { 518 // Yes it would be nice to not use a string "UTF-8" but the equivalent 519 // method that takes Charset is JDK10+ only... sigh.... 520 return URLDecoder.decode(theString, "UTF-8"); 521 } catch (UnsupportedEncodingException e) { 522 throw new Error(Msg.code(1743) + "UTF-8 not supported, this shouldn't happen", e); 523 } 524 } 525 } 526 return theString; 527 } 528 529 public static List<NameValuePair> translateMatchUrl(String theMatchUrl) { 530 List<NameValuePair> parameters; 531 String matchUrl = theMatchUrl; 532 int questionMarkIndex = matchUrl.indexOf('?'); 533 if (questionMarkIndex != -1) { 534 matchUrl = matchUrl.substring(questionMarkIndex + 1); 535 } 536 537 final String[] searchList = new String[]{ 538 "+", 539 "|", 540 "=>=", 541 "=<=", 542 "=>", 543 "=<" 544 }; 545 final String[] replacementList = new String[]{ 546 "%2B", 547 "%7C", 548 "=%3E%3D", 549 "=%3C%3D", 550 "=%3E", 551 "=%3C" 552 }; 553 matchUrl = StringUtils.replaceEach(matchUrl, searchList, replacementList); 554 if (matchUrl.contains(" ")) { 555 throw new InvalidRequestException(Msg.code(1744) + "Failed to parse match URL[" + theMatchUrl + "] - URL is invalid (must not contain spaces)"); 556 } 557 558 parameters = URLEncodedUtils.parse((matchUrl), Constants.CHARSET_UTF8, '&'); 559 560 // One issue that has happened before is people putting a "+" sign into an email address in a match URL 561 // and having that turn into a " ". Since spaces are never appropriate for email addresses, let's just 562 // assume they really meant "+". 563 for (int i = 0; i < parameters.size(); i++) { 564 NameValuePair next = parameters.get(i); 565 if (next.getName().equals("email") && next.getValue().contains(" ")) { 566 BasicNameValuePair newPair = new BasicNameValuePair(next.getName(), next.getValue().replace(' ', '+')); 567 parameters.set(i, newPair); 568 } 569 } 570 571 return parameters; 572 } 573}