001package ca.uhn.fhir.util.rdf; 002 003/*- 004 * #%L 005 * HAPI FHIR - Core Library 006 * %% 007 * Copyright (C) 2014 - 2019 University Health Network 008 * %% 009 * Licensed under the Apache License, Version 2.0 (the "License"); 010 * you may not use this file except in compliance with the License. 011 * You may obtain a copy of the License at 012 * 013 * http://www.apache.org/licenses/LICENSE-2.0 014 * 015 * Unless required by applicable law or agreed to in writing, software 016 * distributed under the License is distributed on an "AS IS" BASIS, 017 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 018 * See the License for the specific language governing permissions and 019 * limitations under the License. 020 * #L% 021 */ 022 023import java.util.HashSet; 024import java.util.LinkedList; 025 026import org.apache.jena.graph.Triple; 027import org.apache.jena.riot.system.StreamRDF; 028import org.apache.jena.sparql.core.Quad; 029 030/** 031 * Wraps another {@link StreamRDF} and attempts to remove duplicate 032 * triples and quads. To maintain streaming, duplicates are only 033 * removed within a sliding window of configurable size. Default 034 * size is 10000 triples and quads. 035 */ 036public class StreamRDFDedup implements StreamRDF { 037 private final StreamRDF wrapped; 038 private final int windowSize; 039 private final HashSet<Object> tripleAndQuadCache; 040 private final LinkedList<Object> tripleAndQuadList = new LinkedList<Object>(); 041 042 public StreamRDFDedup(StreamRDF wrapped) { 043 this(wrapped, 10000); 044 } 045 046 public StreamRDFDedup(StreamRDF wrapped, int windowSize) { 047 this.wrapped = wrapped; 048 this.windowSize = windowSize; 049 // Initial capacity big enough to avoid rehashing 050 this.tripleAndQuadCache = new HashSet<Object>(windowSize * 3 / 2); 051 } 052 053 @Override 054 public void start() { 055 wrapped.start(); 056 } 057 058 @Override 059 public void triple(Triple triple) { 060 if (!seen(triple)) { 061 wrapped.triple(triple); 062 } 063 } 064 065 @Override 066 public void quad(Quad quad) { 067 if (!seen(quad)) { 068 wrapped.quad(quad); 069 } 070 } 071 072 @Override 073 public void base(String base) { 074 wrapped.base(base); 075 } 076 077 @Override 078 public void prefix(String prefix, String iri) { 079 wrapped.prefix(prefix, iri); 080 } 081 082 @Override 083 public void finish() { 084 wrapped.finish(); 085 } 086 087 private boolean seen(Object tuple) { 088 if (tripleAndQuadCache.contains(tuple)) { 089 return true; 090 } 091 tripleAndQuadCache.add(tuple); 092 tripleAndQuadList.add(tuple); 093 if (tripleAndQuadList.size() > windowSize) { 094 forgetOldest(); 095 } 096 return false; 097 } 098 099 private void forgetOldest() { 100 tripleAndQuadCache.remove(tripleAndQuadList.removeFirst()); 101 } 102}