001/**
002 * Java Web Archive Toolkit - Software to read and validate ARC, WARC
003 * and GZip files. (http://jwat.org/)
004 * Copyright 2011-2012 Netarkivet.dk (http://netarkivet.dk/)
005 *
006 * Licensed under the Apache License, Version 2.0 (the "License");
007 * you may not use this file except in compliance with the License.
008 * You may obtain a copy of the License at
009 *
010 * http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.jwat.warc;
019
020import java.io.BufferedOutputStream;
021import java.io.IOException;
022import java.io.InputStream;
023import java.io.OutputStream;
024
025import org.jwat.gzip.GzipConstants;
026import org.jwat.gzip.GzipEntry;
027import org.jwat.gzip.GzipWriter;
028
029/**
030 * WARC Writer implementation for writing GZip compressed files.
031 * Use WarcWriterFactory to get an instance of this class.
032 *
033 * @author nicl
034 */
035public class WarcWriterCompressed extends WarcWriter {
036
037    /** GZip Writer used. */
038    protected GzipWriter writer;
039
040    /** Current GZip entry. */
041    protected GzipEntry entry;
042
043    /**
044     * Construct an unbuffered WARC writer used to write compressed records.
045     * @param out outputstream to write to
046     */
047    WarcWriterCompressed(OutputStream out) {
048        if (out == null) {
049            throw new IllegalArgumentException(
050                    "The 'out' parameter is null!");
051        }
052        writer = new GzipWriter(out);
053        init();
054    }
055
056    /**
057     * Construct a buffered WARC writer used to write compressed records.
058     * @param out outputstream to stream to
059     * @param buffer_size outputstream buffer size
060     * @throws IllegalArgumentException if out is null or buffer_size <= 0
061     */
062    WarcWriterCompressed(OutputStream out, int buffer_size) {
063        if (out == null) {
064            throw new IllegalArgumentException(
065                    "The 'out' parameter is null!");
066        }
067        if (buffer_size <= 0) {
068            throw new IllegalArgumentException(
069                    "The 'buffer_size' parameter is less than or equal to zero!");
070        }
071        writer = new GzipWriter(new BufferedOutputStream(out, buffer_size));
072        init();
073    }
074
075    @Override
076    public boolean isCompressed() {
077        return true;
078    }
079
080    @Override
081    public void close() throws IOException {
082        if (entry != null) {
083            closeRecord();
084        }
085        if (out != null) {
086            out.flush();
087            out.close();
088            out = null;
089        }
090    }
091
092    @Override
093    public void closeRecord() throws IOException {
094        if (state == S_INIT) {
095            throw new IllegalStateException("Please write a record before closing it!");
096        }
097        if (entry != null) {
098            closeRecord_impl();
099            state = S_RECORD_CLOSED;
100            entry.close();
101            entry = null;
102        }
103    }
104
105    /*
106     * In this class "out" is the GZip output stream of the current GZip entry.
107     * @see org.jwat.warc.WarcWriter#writeHeader(byte[], java.lang.Long)
108     */
109    @Override
110    public void writeRawHeader(byte[] header_bytes, Long contentLength) throws IOException {
111        if (header_bytes == null) {
112            throw new IllegalArgumentException(
113                    "The 'header_bytes' parameter is null!");
114        }
115        if (contentLength != null && contentLength < 0) {
116            throw new IllegalArgumentException(
117                    "The 'contentLength' parameter is negative!");
118        }
119        if (state == S_HEADER_WRITTEN) {
120            throw new IllegalStateException("Headers written back to back!");
121        } else if (state == S_PAYLOAD_WRITTEN) {
122            closeRecord();
123        }
124        entry = new GzipEntry();
125        entry.magic = GzipConstants.GZIP_MAGIC;
126        entry.cm = GzipConstants.CM_DEFLATE;
127        entry.flg = 0;
128        entry.mtime = System.currentTimeMillis() / 1000;
129        entry.xfl = 0;
130        entry.os = GzipConstants.OS_UNKNOWN;
131        writer.writeEntryHeader(entry);
132        out = entry.getOutputStream();
133        out.write(header_bytes);
134        state = S_HEADER_WRITTEN;
135        header = null;
136        headerContentLength = contentLength;
137        payloadWrittenTotal = 0;
138    }
139
140    /*
141     * In this class "out" is the GZip output stream of the current GZip entry.
142     * state changed to S_HEADER_WRITTEN
143     * Sets the header and headerContentLength fields.
144     * payloadWrittenTotal is set to 0
145     * @see org.jwat.warc.WarcWriter#writeHeader(org.jwat.warc.WarcRecord)
146     */
147    @Override
148    public byte[] writeHeader(WarcRecord record) throws IOException {
149        if (record == null) {
150            throw new IllegalArgumentException(
151                    "The 'record' parameter is null!");
152        }
153        if (state == S_HEADER_WRITTEN) {
154            throw new IllegalStateException("Headers written back to back!");
155        } else if (state == S_PAYLOAD_WRITTEN) {
156            closeRecord();
157        }
158        entry = new GzipEntry();
159        entry.magic = GzipConstants.GZIP_MAGIC;
160        entry.cm = GzipConstants.CM_DEFLATE;
161        entry.flg = 0;
162        entry.mtime = System.currentTimeMillis() / 1000;
163        entry.xfl = 0;
164        entry.os = GzipConstants.OS_UNKNOWN;
165        writer.writeEntryHeader(entry);
166        out = entry.getOutputStream();
167        return writeHeader_impl(record);
168    }
169
170    /*
171     * state changed to S_PAYLOAD_WRITTEN;
172     * @see org.jwat.warc.WarcWriter#streamPayload(java.io.InputStream)
173     */
174    @Override
175    public long streamPayload(InputStream in) throws IOException {
176        if (entry == null) {
177            throw new IllegalStateException("Write a header before writing payload!");
178        }
179        return super.streamPayload(in);
180    }
181
182    /*
183     * state changed to S_PAYLOAD_WRITTEN
184     * @see org.jwat.warc.WarcWriter#writePayload(byte[])
185     */
186    @Override
187    public long writePayload(byte[] b) throws IOException {
188        if (entry == null) {
189            throw new IllegalStateException("Write a header before writing payload!");
190        }
191        return super.writePayload(b);
192    }
193
194    /*
195     * state changed to S_PAYLOAD_WRITTEN
196     * @see org.jwat.warc.WarcWriter#writePayload(byte[], int, int)
197     */
198    @Override
199    public long writePayload(byte[] b, int offset, int len) throws IOException {
200        if (entry == null) {
201            throw new IllegalStateException("Write a header before writing payload!");
202        }
203        return super.writePayload(b, offset, len);
204    }
205
206}