001/**
002 * Java Web Archive Toolkit - Software to read and validate ARC, WARC
003 * and GZip files. (http://jwat.org/)
004 * Copyright 2011-2012 Netarkivet.dk (http://netarkivet.dk/)
005 *
006 * Licensed under the Apache License, Version 2.0 (the "License");
007 * you may not use this file except in compliance with the License.
008 * You may obtain a copy of the License at
009 *
010 * http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.jwat.warc;
019
020import java.io.BufferedInputStream;
021import java.io.IOException;
022import java.io.InputStream;
023
024import org.jwat.common.ByteCountingPushBackInputStream;
025
026/**
027 * WARC Reader implementation for reading uncompressed files.
028 * Use WarcReaderFactory to get an instance of this class.
029 *
030 * @author nicl
031 */
032public class WarcReaderUncompressed extends WarcReader {
033
034    /** Buffer size used by <code>PushbackInputStream</code>. */
035    public static final int PUSHBACK_BUFFER_SIZE = 32;
036
037    /** WARC file <code>InputStream</code>. */
038    protected ByteCountingPushBackInputStream in;
039
040    /** Start offset of current or next valid record. */
041    protected long startOffset = 0;
042
043    /**
044     * This constructor is used to get random access to records.
045     * The records are then accessed using the getNextRecordFrom methods
046     * using a supplied input stream for each record.
047     */
048    public WarcReaderUncompressed() {
049        init();
050    }
051
052    /**
053     * Construct reader using the supplied input stream.
054     * This method is primarily for sequential access to records.
055     * @param in WARC file input stream
056     */
057    public WarcReaderUncompressed(ByteCountingPushBackInputStream in) {
058        if (in == null) {
059            throw new IllegalArgumentException(
060                    "The inputstream 'in' is null");
061        }
062        this.in = in;
063        init();
064    }
065
066    @Override
067    public boolean isCompressed() {
068        return false;
069    }
070
071    @Override
072    public void close() {
073        if (currentRecord != null) {
074            try {
075                currentRecord.close();
076            } catch (IOException e) { /* ignore */ }
077            currentRecord = null;
078        }
079        if (in != null) {
080            consumed = in.getConsumed();
081            try {
082                in.close();
083            } catch (IOException e) { /* ignore */ }
084            in = null;
085        }
086    }
087
088    @Override
089    protected void recordClosed() {
090        if (currentRecord != null) {
091            consumed += currentRecord.consumed;
092        } else {
093            throw new IllegalStateException("'currentRecord' is null, this should never happen!");
094        }
095    }
096
097    @Override
098    public long getStartOffset() {
099        return startOffset;
100    }
101
102    @Override
103    public long getOffset() {
104        if (in != null) {
105            return in.getConsumed();
106        } else {
107            return consumed;
108        }
109    }
110
111    @Override
112    public long getConsumed() {
113        if (in != null) {
114            return in.getConsumed();
115        } else {
116            return consumed;
117        }
118    }
119
120    @Override
121    public WarcRecord getNextRecord() throws IOException {
122        if (currentRecord != null) {
123            currentRecord.close();
124        }
125        if (in == null) {
126            throw new IllegalStateException(
127                    "This reader has been initialized with an incompatible constructor, 'in' is null");
128        }
129        currentRecord = WarcRecord.parseRecord(in, this);
130        if (currentRecord != null) {
131            startOffset = currentRecord.getStartOffset();
132        }
133        return currentRecord;
134    }
135
136    @Override
137    public WarcRecord getNextRecordFrom(InputStream rin, long offset)
138                                                        throws IOException {
139        if (currentRecord != null) {
140            currentRecord.close();
141        }
142        if (in != null) {
143            throw new IllegalStateException(
144                    "This reader has been initialized with an incompatible constructor, 'in' is not null");
145        }
146        if (rin == null) {
147            throw new IllegalArgumentException(
148                    "The inputstream 'rin' is null");
149        }
150        if (offset < -1) {
151            throw new IllegalArgumentException(
152                    "The 'offset' is less than -1: " + offset);
153        }
154        ByteCountingPushBackInputStream pbin =
155                new ByteCountingPushBackInputStream(rin, PUSHBACK_BUFFER_SIZE);
156        currentRecord = WarcRecord.parseRecord(pbin, this);
157        if (currentRecord != null) {
158            startOffset = offset;
159            currentRecord.header.startOffset = offset;
160        }
161        return currentRecord;
162    }
163
164    @Override
165    public WarcRecord getNextRecordFrom(InputStream rin, long offset,
166                                        int buffer_size) throws IOException {
167        if (currentRecord != null) {
168            currentRecord.close();
169        }
170        if (in != null) {
171            throw new IllegalStateException(
172                    "This reader has been initialized with an incompatible constructor, 'in' is not null");
173        }
174        if (rin == null) {
175            throw new IllegalArgumentException(
176                    "The inputstream 'rin' is null");
177        }
178        if (offset < -1) {
179            throw new IllegalArgumentException(
180                    "The 'offset' is less than -1: " + offset);
181        }
182        if (buffer_size <= 0) {
183            throw new IllegalArgumentException(
184                    "The 'buffer_size' is less than or equal to zero: "
185                    + buffer_size);
186        }
187        ByteCountingPushBackInputStream pbin =
188                new ByteCountingPushBackInputStream(
189                        new BufferedInputStream(rin, buffer_size),
190                        PUSHBACK_BUFFER_SIZE);
191        currentRecord = WarcRecord.parseRecord(pbin, this);
192        if (currentRecord != null) {
193            startOffset = offset;
194            currentRecord.header.startOffset = offset;
195        }
196        return currentRecord;
197    }
198
199}