001/** 002 * Java Web Archive Toolkit - Software to read and validate ARC, WARC 003 * and GZip files. (http://jwat.org/) 004 * Copyright 2011-2012 Netarkivet.dk (http://netarkivet.dk/) 005 * 006 * Licensed under the Apache License, Version 2.0 (the "License"); 007 * you may not use this file except in compliance with the License. 008 * You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.jwat.warc; 019 020import java.io.BufferedOutputStream; 021import java.io.IOException; 022import java.io.InputStream; 023import java.io.OutputStream; 024 025import org.jwat.gzip.GzipConstants; 026import org.jwat.gzip.GzipEntry; 027import org.jwat.gzip.GzipWriter; 028 029/** 030 * WARC Writer implementation for writing GZip compressed files. 031 * Use WarcWriterFactory to get an instance of this class. 032 * 033 * @author nicl 034 */ 035public class WarcWriterCompressed extends WarcWriter { 036 037 /** GZip Writer used. */ 038 protected GzipWriter writer; 039 040 /** Current GZip entry. */ 041 protected GzipEntry entry; 042 043 /** 044 * Construct an unbuffered WARC writer used to write compressed records. 045 * @param out outputstream to write to 046 */ 047 WarcWriterCompressed(OutputStream out) { 048 if (out == null) { 049 throw new IllegalArgumentException( 050 "The 'out' parameter is null!"); 051 } 052 writer = new GzipWriter(out); 053 init(); 054 } 055 056 /** 057 * Construct a buffered WARC writer used to write compressed records. 058 * @param out outputstream to stream to 059 * @param buffer_size outputstream buffer size 060 * @throws IllegalArgumentException if out is null or buffer_size <= 0 061 */ 062 WarcWriterCompressed(OutputStream out, int buffer_size) { 063 if (out == null) { 064 throw new IllegalArgumentException( 065 "The 'out' parameter is null!"); 066 } 067 if (buffer_size <= 0) { 068 throw new IllegalArgumentException( 069 "The 'buffer_size' parameter is less than or equal to zero!"); 070 } 071 writer = new GzipWriter(new BufferedOutputStream(out, buffer_size)); 072 init(); 073 } 074 075 @Override 076 public boolean isCompressed() { 077 return true; 078 } 079 080 @Override 081 public void close() throws IOException { 082 if (entry != null) { 083 closeRecord(); 084 } 085 if (out != null) { 086 out.flush(); 087 out.close(); 088 out = null; 089 } 090 } 091 092 @Override 093 public void closeRecord() throws IOException { 094 if (state == S_INIT) { 095 throw new IllegalStateException("Please write a record before closing it!"); 096 } 097 if (entry != null) { 098 closeRecord_impl(); 099 state = S_RECORD_CLOSED; 100 entry.close(); 101 entry = null; 102 } 103 } 104 105 /* 106 * In this class "out" is the GZip output stream of the current GZip entry. 107 * @see org.jwat.warc.WarcWriter#writeHeader(byte[], java.lang.Long) 108 */ 109 @Override 110 public void writeRawHeader(byte[] header_bytes, Long contentLength) throws IOException { 111 if (header_bytes == null) { 112 throw new IllegalArgumentException( 113 "The 'header_bytes' parameter is null!"); 114 } 115 if (contentLength != null && contentLength < 0) { 116 throw new IllegalArgumentException( 117 "The 'contentLength' parameter is negative!"); 118 } 119 if (state == S_HEADER_WRITTEN) { 120 throw new IllegalStateException("Headers written back to back!"); 121 } else if (state == S_PAYLOAD_WRITTEN) { 122 closeRecord(); 123 } 124 entry = new GzipEntry(); 125 entry.magic = GzipConstants.GZIP_MAGIC; 126 entry.cm = GzipConstants.CM_DEFLATE; 127 entry.flg = 0; 128 entry.mtime = System.currentTimeMillis() / 1000; 129 entry.xfl = 0; 130 entry.os = GzipConstants.OS_UNKNOWN; 131 writer.writeEntryHeader(entry); 132 out = entry.getOutputStream(); 133 out.write(header_bytes); 134 state = S_HEADER_WRITTEN; 135 header = null; 136 headerContentLength = contentLength; 137 payloadWrittenTotal = 0; 138 } 139 140 /* 141 * In this class "out" is the GZip output stream of the current GZip entry. 142 * state changed to S_HEADER_WRITTEN 143 * Sets the header and headerContentLength fields. 144 * payloadWrittenTotal is set to 0 145 * @see org.jwat.warc.WarcWriter#writeHeader(org.jwat.warc.WarcRecord) 146 */ 147 @Override 148 public byte[] writeHeader(WarcRecord record) throws IOException { 149 if (record == null) { 150 throw new IllegalArgumentException( 151 "The 'record' parameter is null!"); 152 } 153 if (state == S_HEADER_WRITTEN) { 154 throw new IllegalStateException("Headers written back to back!"); 155 } else if (state == S_PAYLOAD_WRITTEN) { 156 closeRecord(); 157 } 158 entry = new GzipEntry(); 159 entry.magic = GzipConstants.GZIP_MAGIC; 160 entry.cm = GzipConstants.CM_DEFLATE; 161 entry.flg = 0; 162 entry.mtime = System.currentTimeMillis() / 1000; 163 entry.xfl = 0; 164 entry.os = GzipConstants.OS_UNKNOWN; 165 writer.writeEntryHeader(entry); 166 out = entry.getOutputStream(); 167 return writeHeader_impl(record); 168 } 169 170 /* 171 * state changed to S_PAYLOAD_WRITTEN; 172 * @see org.jwat.warc.WarcWriter#streamPayload(java.io.InputStream) 173 */ 174 @Override 175 public long streamPayload(InputStream in) throws IOException { 176 if (entry == null) { 177 throw new IllegalStateException("Write a header before writing payload!"); 178 } 179 return super.streamPayload(in); 180 } 181 182 /* 183 * state changed to S_PAYLOAD_WRITTEN 184 * @see org.jwat.warc.WarcWriter#writePayload(byte[]) 185 */ 186 @Override 187 public long writePayload(byte[] b) throws IOException { 188 if (entry == null) { 189 throw new IllegalStateException("Write a header before writing payload!"); 190 } 191 return super.writePayload(b); 192 } 193 194 /* 195 * state changed to S_PAYLOAD_WRITTEN 196 * @see org.jwat.warc.WarcWriter#writePayload(byte[], int, int) 197 */ 198 @Override 199 public long writePayload(byte[] b, int offset, int len) throws IOException { 200 if (entry == null) { 201 throw new IllegalStateException("Write a header before writing payload!"); 202 } 203 return super.writePayload(b, offset, len); 204 } 205 206}