001package ca.uhn.fhir.jpa.model.search; 002 003/*- 004 * #%L 005 * HAPI FHIR JPA Model 006 * %% 007 * Copyright (C) 2014 - 2022 Smile CDR, Inc. 008 * %% 009 * Licensed under the Apache License, Version 2.0 (the "License"); 010 * you may not use this file except in compliance with the License. 011 * You may obtain a copy of the License at 012 * 013 * http://www.apache.org/licenses/LICENSE-2.0 014 * 015 * Unless required by applicable law or agreed to in writing, software 016 * distributed under the License is distributed on an "AS IS" BASIS, 017 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 018 * See the License for the specific language governing permissions and 019 * limitations under the License. 020 * #L% 021 */ 022 023import org.hibernate.search.engine.backend.document.DocumentElement; 024import org.hibernate.search.engine.backend.document.model.dsl.IndexSchemaElement; 025import org.hibernate.search.engine.backend.document.model.dsl.IndexSchemaObjectField; 026import org.hibernate.search.engine.backend.types.Aggregable; 027import org.hibernate.search.engine.backend.types.ObjectStructure; 028import org.hibernate.search.engine.backend.types.Projectable; 029import org.hibernate.search.engine.backend.types.dsl.IndexFieldTypeFactory; 030import org.hibernate.search.engine.backend.types.dsl.StringIndexFieldTypeOptionsStep; 031import org.hibernate.search.mapper.pojo.bridge.PropertyBridge; 032import org.hibernate.search.mapper.pojo.bridge.binding.PropertyBindingContext; 033import org.hibernate.search.mapper.pojo.bridge.mapping.programmatic.PropertyBinder; 034import org.hibernate.search.mapper.pojo.bridge.runtime.PropertyBridgeWriteContext; 035import org.slf4j.Logger; 036import org.slf4j.LoggerFactory; 037 038import static ca.uhn.fhir.jpa.model.search.HibernateSearchIndexWriter.IDX_STRING_EXACT; 039import static ca.uhn.fhir.jpa.model.search.HibernateSearchIndexWriter.IDX_STRING_NORMALIZED; 040import static ca.uhn.fhir.jpa.model.search.HibernateSearchIndexWriter.IDX_STRING_TEXT; 041 042/** 043 * Allows hibernate search to index 044 * <p> 045 * CodeableConcept.text 046 * Coding.display 047 * Identifier.type.text 048 */ 049public class SearchParamTextPropertyBinder implements PropertyBinder, PropertyBridge<ExtendedLuceneIndexData> { 050 051 public static final String SEARCH_PARAM_TEXT_PREFIX = "text-"; 052 private static final Logger ourLog = LoggerFactory.getLogger(SearchParamTextPropertyBinder.class); 053 054 @Override 055 public void bind(PropertyBindingContext thePropertyBindingContext) { 056 // TODO Is it safe to use object identity of the Map to track dirty? 057 // N.B. GGG I would hazard that it is not, we could potentially use Version of the resource. 058 thePropertyBindingContext.dependencies().use("mySearchParamStrings"); 059 060 defineIndexingTemplate(thePropertyBindingContext); 061 062 thePropertyBindingContext.bridge(ExtendedLuceneIndexData.class, this); 063 } 064 065 private void defineIndexingTemplate(PropertyBindingContext thePropertyBindingContext) { 066 IndexSchemaElement indexSchemaElement = thePropertyBindingContext.indexSchemaElement(); 067 068 //In order to support dynamic fields, we have to use field templates. We _must_ define the template at bootstrap time and cannot 069 //create them adhoc. https://docs.jboss.org/hibernate/search/6.0/reference/en-US/html_single/#mapper-orm-bridge-index-field-dsl-dynamic 070 //I _think_ im doing the right thing here by indicating that everything matching this template uses this analyzer. 071 IndexFieldTypeFactory indexFieldTypeFactory = thePropertyBindingContext.typeFactory(); 072 // TODO mb Once Ken finishes extracting a common base, we can share these constants with HapiElasticsearchAnalysisConfigurer and HapiLuceneAnalysisConfigurer 073 StringIndexFieldTypeOptionsStep<?> standardAnalyzer = indexFieldTypeFactory.asString() 074 .analyzer("standardAnalyzer") 075 .projectable(Projectable.NO); 076 077 StringIndexFieldTypeOptionsStep<?> exactAnalyzer = 078 indexFieldTypeFactory.asString() 079 .analyzer("exactAnalyzer") // default max-length is 256. Is that enough for code system uris? 080 .projectable(Projectable.NO); 081 082 StringIndexFieldTypeOptionsStep<?> normStringAnalyzer = indexFieldTypeFactory.asString() 083 .analyzer("normStringAnalyzer") 084 .projectable(Projectable.NO); 085 086 // TODO JB: may have to add normalizer to support case insensitive searches depending on token flags 087 StringIndexFieldTypeOptionsStep<?> keywordFieldType = indexFieldTypeFactory.asString() 088 .projectable(Projectable.NO) 089 .aggregable(Aggregable.YES); 090 091 092 // the old style for _text and _contains 093 indexSchemaElement 094 .fieldTemplate("SearchParamText", standardAnalyzer) 095 .matchingPathGlob(SEARCH_PARAM_TEXT_PREFIX + "*"); 096 097 // The following section is a bit ugly. We need to enforce order and dependency or the object matches will be too big. 098 { 099 IndexSchemaObjectField spfield = indexSchemaElement.objectField("sp", ObjectStructure.FLATTENED); 100 spfield.toReference(); 101 102 // TODO MB: the lucene/elastic independent api is hurting a bit here. 103 // For lucene, we need a separate field for each analyzer. So we'll add string (for :exact), and text (for :text). 104 // They aren't marked stored, so there's no space cost beyond the index for each. 105 // But for elastic, I'd rather have a single field defined, with multi-field sub-fields. The index cost is the same, 106 // but elastic will actually store all fields in the source document. 107 // Something like this. But we'll need two index writers (lucene vs hibernate). 108// ElasticsearchNativeIndexFieldTypeMappingStep nativeStep = indexFieldTypeFactory.extension(ElasticsearchExtension.get()).asNative(); 109// nativeStep.mapping() 110 111 // So triplicate the storage for now. :-( 112 String stringPathGlob = "*.string"; 113 spfield.objectFieldTemplate("stringIndex", ObjectStructure.FLATTENED).matchingPathGlob(stringPathGlob); 114 spfield.fieldTemplate("string-norm", normStringAnalyzer).matchingPathGlob(stringPathGlob + "." + IDX_STRING_NORMALIZED).multiValued(); 115 spfield.fieldTemplate("string-exact", exactAnalyzer).matchingPathGlob(stringPathGlob + "." + IDX_STRING_EXACT).multiValued(); 116 spfield.fieldTemplate("string-text", standardAnalyzer).matchingPathGlob(stringPathGlob + "." + IDX_STRING_TEXT).multiValued(); 117 118 // token 119 // Ideally, we'd store a single code-system string and use a custom tokenizer to 120 // generate "system|" "|code" and "system|code" tokens to support all three. 121 // But the standard tokenizers aren't that flexible. As second best, it would be nice to use elastic multi-fields 122 // to apply three different tokenizers to a single value. 123 // Instead, just be simple and expand into three full fields for now 124 String tokenPathGlob = "*.token"; 125 spfield.objectFieldTemplate("tokenIndex", ObjectStructure.FLATTENED).matchingPathGlob(tokenPathGlob); 126 spfield.fieldTemplate("token-code", keywordFieldType).matchingPathGlob(tokenPathGlob + ".code").multiValued(); 127 spfield.fieldTemplate("token-code-system", keywordFieldType).matchingPathGlob(tokenPathGlob + ".code-system").multiValued(); 128 spfield.fieldTemplate("token-system", keywordFieldType).matchingPathGlob(tokenPathGlob + ".system").multiValued(); 129 130 // reference 131 spfield.fieldTemplate("reference-value", keywordFieldType).matchingPathGlob("*.reference.value").multiValued(); 132 133 // last, since the globs are matched in declaration order, and * matches even nested nodes. 134 spfield.objectFieldTemplate("spObject", ObjectStructure.FLATTENED).matchingPathGlob("*"); 135 } 136 } 137 138 @Override 139 public void write(DocumentElement theDocument, ExtendedLuceneIndexData theIndexData, PropertyBridgeWriteContext thePropertyBridgeWriteContext) { 140 if (theIndexData != null) { 141 ourLog.trace("Writing index data for {}", theIndexData); 142 theIndexData.writeIndexElements(theDocument); 143 } 144 } 145 146}