001package ca.uhn.fhir.util.rdf;
002
003/*-
004 * #%L
005 * HAPI FHIR - Core Library
006 * %%
007 * Copyright (C) 2014 - 2019 University Health Network
008 * %%
009 * Licensed under the Apache License, Version 2.0 (the "License");
010 * you may not use this file except in compliance with the License.
011 * You may obtain a copy of the License at
012 *
013 *      http://www.apache.org/licenses/LICENSE-2.0
014 *
015 * Unless required by applicable law or agreed to in writing, software
016 * distributed under the License is distributed on an "AS IS" BASIS,
017 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
018 * See the License for the specific language governing permissions and
019 * limitations under the License.
020 * #L%
021 */
022
023import java.util.HashSet;
024import java.util.LinkedList;
025
026import org.apache.jena.graph.Triple;
027import org.apache.jena.riot.system.StreamRDF;
028import org.apache.jena.sparql.core.Quad;
029
030/**
031 * Wraps another {@link StreamRDF} and attempts to remove duplicate
032 * triples and quads. To maintain streaming, duplicates are only
033 * removed within a sliding window of configurable size. Default
034 * size is 10000 triples and quads.
035 */
036public class StreamRDFDedup implements StreamRDF {
037        private final StreamRDF wrapped;
038        private final int windowSize;
039        private final HashSet<Object> tripleAndQuadCache;
040        private final LinkedList<Object> tripleAndQuadList = new LinkedList<Object>();
041
042        public StreamRDFDedup(StreamRDF wrapped) {
043                this(wrapped, 10000);
044        }
045
046        public StreamRDFDedup(StreamRDF wrapped, int windowSize) {
047                this.wrapped = wrapped;
048                this.windowSize = windowSize;
049                // Initial capacity big enough to avoid rehashing
050                this.tripleAndQuadCache = new HashSet<Object>(windowSize * 3 / 2);
051        }
052
053        @Override
054        public void start() {
055                wrapped.start();
056        }
057
058        @Override
059        public void triple(Triple triple) {
060                if (!seen(triple)) {
061                        wrapped.triple(triple);
062                }
063        }
064
065        @Override
066        public void quad(Quad quad) {
067                if (!seen(quad)) {
068                        wrapped.quad(quad);
069                }
070        }
071
072        @Override
073        public void base(String base) {
074                wrapped.base(base);
075        }
076
077        @Override
078        public void prefix(String prefix, String iri) {
079                wrapped.prefix(prefix, iri);
080        }
081
082        @Override
083        public void finish() {
084                wrapped.finish();
085        }
086
087        private boolean seen(Object tuple) {
088                if (tripleAndQuadCache.contains(tuple)) {
089                        return true;
090                }
091                tripleAndQuadCache.add(tuple);
092                tripleAndQuadList.add(tuple);
093                if (tripleAndQuadList.size() > windowSize) {
094                        forgetOldest();
095                }
096                return false;
097        }
098
099        private void forgetOldest() {
100                tripleAndQuadCache.remove(tripleAndQuadList.removeFirst());
101        }
102}