001package ca.uhn.fhir.jpa.model.search;
002
003/*-
004 * #%L
005 * HAPI FHIR JPA Model
006 * %%
007 * Copyright (C) 2014 - 2022 Smile CDR, Inc.
008 * %%
009 * Licensed under the Apache License, Version 2.0 (the "License");
010 * you may not use this file except in compliance with the License.
011 * You may obtain a copy of the License at
012 *
013 *      http://www.apache.org/licenses/LICENSE-2.0
014 *
015 * Unless required by applicable law or agreed to in writing, software
016 * distributed under the License is distributed on an "AS IS" BASIS,
017 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
018 * See the License for the specific language governing permissions and
019 * limitations under the License.
020 * #L%
021 */
022
023import org.hibernate.search.engine.backend.document.DocumentElement;
024import org.hibernate.search.engine.backend.document.model.dsl.IndexSchemaElement;
025import org.hibernate.search.engine.backend.document.model.dsl.IndexSchemaObjectField;
026import org.hibernate.search.engine.backend.types.Aggregable;
027import org.hibernate.search.engine.backend.types.ObjectStructure;
028import org.hibernate.search.engine.backend.types.Projectable;
029import org.hibernate.search.engine.backend.types.dsl.IndexFieldTypeFactory;
030import org.hibernate.search.engine.backend.types.dsl.StringIndexFieldTypeOptionsStep;
031import org.hibernate.search.mapper.pojo.bridge.PropertyBridge;
032import org.hibernate.search.mapper.pojo.bridge.binding.PropertyBindingContext;
033import org.hibernate.search.mapper.pojo.bridge.mapping.programmatic.PropertyBinder;
034import org.hibernate.search.mapper.pojo.bridge.runtime.PropertyBridgeWriteContext;
035import org.slf4j.Logger;
036import org.slf4j.LoggerFactory;
037
038import static ca.uhn.fhir.jpa.model.search.HibernateSearchIndexWriter.IDX_STRING_EXACT;
039import static ca.uhn.fhir.jpa.model.search.HibernateSearchIndexWriter.IDX_STRING_NORMALIZED;
040import static ca.uhn.fhir.jpa.model.search.HibernateSearchIndexWriter.IDX_STRING_TEXT;
041
042/**
043 * Allows hibernate search to index
044 * <p>
045 * CodeableConcept.text
046 * Coding.display
047 * Identifier.type.text
048 */
049public class SearchParamTextPropertyBinder implements PropertyBinder, PropertyBridge<ExtendedLuceneIndexData> {
050
051        public static final String SEARCH_PARAM_TEXT_PREFIX = "text-";
052        private static final Logger ourLog = LoggerFactory.getLogger(SearchParamTextPropertyBinder.class);
053
054        @Override
055        public void bind(PropertyBindingContext thePropertyBindingContext) {
056                // TODO Is it safe to use object identity of the Map to track dirty?
057                // N.B. GGG I would hazard that it is not, we could potentially use Version of the resource.
058                thePropertyBindingContext.dependencies().use("mySearchParamStrings");
059
060                defineIndexingTemplate(thePropertyBindingContext);
061
062                thePropertyBindingContext.bridge(ExtendedLuceneIndexData.class, this);
063        }
064
065        private void defineIndexingTemplate(PropertyBindingContext thePropertyBindingContext) {
066                IndexSchemaElement indexSchemaElement = thePropertyBindingContext.indexSchemaElement();
067
068                //In order to support dynamic fields, we have to use field templates. We _must_ define the template at bootstrap time and cannot
069                //create them adhoc. https://docs.jboss.org/hibernate/search/6.0/reference/en-US/html_single/#mapper-orm-bridge-index-field-dsl-dynamic
070                //I _think_ im doing the right thing here by indicating that everything matching this template uses this analyzer.
071                IndexFieldTypeFactory indexFieldTypeFactory = thePropertyBindingContext.typeFactory();
072                // TODO mb Once Ken finishes extracting a common base, we can share these constants with HapiElasticsearchAnalysisConfigurer and HapiLuceneAnalysisConfigurer
073                StringIndexFieldTypeOptionsStep<?> standardAnalyzer = indexFieldTypeFactory.asString()
074                        .analyzer("standardAnalyzer")
075                        .projectable(Projectable.NO);
076
077                StringIndexFieldTypeOptionsStep<?> exactAnalyzer =
078                        indexFieldTypeFactory.asString()
079                                .analyzer("exactAnalyzer") // default max-length is 256.  Is that enough for code system uris?
080                                .projectable(Projectable.NO);
081
082                StringIndexFieldTypeOptionsStep<?> normStringAnalyzer = indexFieldTypeFactory.asString()
083                        .analyzer("normStringAnalyzer")
084                        .projectable(Projectable.NO);
085
086                // TODO JB: may have to add normalizer to support case insensitive searches depending on token flags
087                StringIndexFieldTypeOptionsStep<?> keywordFieldType = indexFieldTypeFactory.asString()
088                        .projectable(Projectable.NO)
089                        .aggregable(Aggregable.YES);
090
091
092                // the old style for _text and _contains
093                indexSchemaElement
094                        .fieldTemplate("SearchParamText", standardAnalyzer)
095                        .matchingPathGlob(SEARCH_PARAM_TEXT_PREFIX + "*");
096
097                // The following section is a bit ugly.  We need to enforce order and dependency or the object matches will be too big.
098                {
099                        IndexSchemaObjectField spfield = indexSchemaElement.objectField("sp", ObjectStructure.FLATTENED);
100                        spfield.toReference();
101
102                        // TODO MB: the lucene/elastic independent api is hurting a bit here.
103                        // For lucene, we need a separate field for each analyzer.  So we'll add string (for :exact), and text (for :text).
104                        // They aren't marked stored, so there's no space cost beyond the index for each.
105                        // But for elastic, I'd rather have a single field defined, with multi-field sub-fields.  The index cost is the same,
106                        // but elastic will actually store all fields in the source document.
107                        // Something like this.  But we'll need two index writers (lucene vs hibernate).
108//                      ElasticsearchNativeIndexFieldTypeMappingStep nativeStep = indexFieldTypeFactory.extension(ElasticsearchExtension.get()).asNative();
109//                      nativeStep.mapping()
110
111                        // So triplicate the storage for now. :-(
112                        String stringPathGlob = "*.string";
113                        spfield.objectFieldTemplate("stringIndex", ObjectStructure.FLATTENED).matchingPathGlob(stringPathGlob);
114                        spfield.fieldTemplate("string-norm", normStringAnalyzer).matchingPathGlob(stringPathGlob + "." + IDX_STRING_NORMALIZED).multiValued();
115                        spfield.fieldTemplate("string-exact", exactAnalyzer).matchingPathGlob(stringPathGlob + "." + IDX_STRING_EXACT).multiValued();
116                        spfield.fieldTemplate("string-text", standardAnalyzer).matchingPathGlob(stringPathGlob + "." + IDX_STRING_TEXT).multiValued();
117
118                        // token
119                        // Ideally, we'd store a single code-system string and use a custom tokenizer to
120                        // generate "system|" "|code" and "system|code" tokens to support all three.
121                        // But the standard tokenizers aren't that flexible.  As second best, it would be nice to use elastic multi-fields
122                        // to apply three different tokenizers to a single value.
123                        // Instead, just be simple and expand into three full fields for now
124                        String tokenPathGlob = "*.token";
125                        spfield.objectFieldTemplate("tokenIndex", ObjectStructure.FLATTENED).matchingPathGlob(tokenPathGlob);
126                        spfield.fieldTemplate("token-code", keywordFieldType).matchingPathGlob(tokenPathGlob + ".code").multiValued();
127                        spfield.fieldTemplate("token-code-system", keywordFieldType).matchingPathGlob(tokenPathGlob + ".code-system").multiValued();
128                        spfield.fieldTemplate("token-system", keywordFieldType).matchingPathGlob(tokenPathGlob + ".system").multiValued();
129
130                        // reference
131                        spfield.fieldTemplate("reference-value", keywordFieldType).matchingPathGlob("*.reference.value").multiValued();
132
133                        // last, since the globs are matched in declaration order, and * matches even nested nodes.
134                        spfield.objectFieldTemplate("spObject", ObjectStructure.FLATTENED).matchingPathGlob("*");
135                }
136        }
137
138        @Override
139        public void write(DocumentElement theDocument, ExtendedLuceneIndexData theIndexData, PropertyBridgeWriteContext thePropertyBridgeWriteContext) {
140                if (theIndexData != null) {
141                        ourLog.trace("Writing index data for {}", theIndexData);
142                        theIndexData.writeIndexElements(theDocument);
143                }
144        }
145
146}