001/**
002 * Java Web Archive Toolkit - Software to read and validate ARC, WARC
003 * and GZip files. (http://jwat.org/)
004 * Copyright 2011-2012 Netarkivet.dk (http://netarkivet.dk/)
005 *
006 * Licensed under the Apache License, Version 2.0 (the "License");
007 * you may not use this file except in compliance with the License.
008 * You may obtain a copy of the License at
009 *
010 * http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.jwat.warc;
019
020import org.jwat.common.Diagnosis;
021import org.jwat.common.DiagnosisType;
022import org.jwat.common.Diagnostics;
023import org.jwat.common.UriProfile;
024
025import java.io.ByteArrayOutputStream;
026import java.io.Closeable;
027import java.io.IOException;
028import java.io.InputStream;
029import java.io.OutputStream;
030import java.text.DateFormat;
031
032/**
033 * Base class for WARC writer implementations.
034 *
035 * @author nicl
036 */
037public abstract class WarcWriter implements Closeable {
038
039    /** State after writer has been constructed and before records have been written. */
040    protected static final int S_INIT = 0;
041
042    /** State after header has been written. */
043    protected static final int S_HEADER_WRITTEN = 1;
044
045    /** State after payload has been written. */
046    protected static final int S_PAYLOAD_WRITTEN = 2;
047
048    /** State after record has been closed. */
049    protected static final int S_RECORD_CLOSED = 3;
050
051    /*
052     * Settings.
053     */
054
055    /** WARC-Target-URI profile. */
056    protected UriProfile warcTargetUriProfile;
057
058    /** URI profile. */
059    protected UriProfile uriProfile;
060
061    /** Block Digesting enabled/disabled. */
062    //protected boolean bDigestBlock = false;
063
064    /** WARC <code>DateFormat</code> as specified by the WARC ISO standard. */
065    protected DateFormat warcDateFormat;
066
067    /** WARC field parser used. */
068    protected WarcFieldParsers fieldParsers;
069
070    /** Buffer used by streamPayload() to copy from one stream to another. */
071    protected byte[] stream_copy_buffer;
072
073    /** Configuration for throwing exception on content-length mismatch.
074     *  (Default is true) */
075    protected boolean bExceptionOnContentLengthMismatch;
076
077    /*
078     * State.
079     */
080
081    /** Writer level errors and warnings or when writing byte headers. */
082    public final Diagnostics<Diagnosis> diagnostics = new Diagnostics<Diagnosis>();
083
084    /** Current state of writer. */
085    protected int state = S_INIT;
086
087    /** Outputstream used to write WARC records. */
088    protected OutputStream out;
089
090    /** Current WARC header written. */
091    protected WarcHeader header;
092
093    /** Content-Length from the WARC header. */
094    protected Long headerContentLength;
095
096    /** Total bytes written for current record payload. */
097    protected long payloadWrittenTotal;
098
099    /**
100     * Method used to initialize a readers internal state.
101     * Must be called by all constructors.
102     */
103    protected void init() {
104        warcTargetUriProfile = UriProfile.RFC3986;
105        uriProfile = UriProfile.RFC3986;
106        warcDateFormat = WarcDateParser.getDateFormat();
107        fieldParsers = new WarcFieldParsers();
108        stream_copy_buffer = new byte[8192];
109        bExceptionOnContentLengthMismatch = true;
110    }
111
112    /**
113     * Is this writer producing compressed output.
114     * @return boolean indicating whether compressed output is produced
115     */
116    public abstract boolean isCompressed();
117
118    /**
119     * Set the URI profile used to validate WARC-Target URIs.
120     * If null, the uriProfile is set to RCF3986.
121     * @param uriProfile URI profile to use
122     */
123    public void setWarcTargetUriProfile(UriProfile uriProfile) {
124        if (uriProfile == null) {
125            uriProfile = UriProfile.RFC3986;
126        }
127        this.warcTargetUriProfile = uriProfile;
128    }
129
130    /**
131     * Get the URI profile used to validate WARC-Target URIs.
132     * @return the URI profile used to validate WARC-Target URIs
133     */
134    public UriProfile getWarcTargetUriProfile() {
135        return warcTargetUriProfile;
136    }
137
138    /**
139     * Set the URI profile used to validate URIs.
140     * If null, the uriProfile is set to RCF3986.
141     * @param uriProfile URI profile to use
142     */
143    public void setUriProfile(UriProfile uriProfile) {
144        if (uriProfile == null) {
145            uriProfile = UriProfile.RFC3986;
146        }
147        this.uriProfile = uriProfile;
148    }
149
150    /**
151     * Get the URI profile used to validate URIs.
152     * @return the URI profile used to validate URIs
153     */
154    public UriProfile getUriProfile() {
155        return uriProfile;
156    }
157
158    /**
159     * Does this writer throw an exception if the content-length does not match
160     * the payload amount written.
161     * @return boolean indicating if an exception is thrown or not
162     */
163    public boolean exceptionOnContentLengthMismatch() {
164        return bExceptionOnContentLengthMismatch;
165    }
166
167    /**
168     * Tell the writer what to do in case of mismatch between content-length
169     * and amount payload written.
170     * @param enabled boolean indicating exception throwing on/off
171     */
172    public void setExceptionOnContentLengthMismatch(boolean enabled) {
173        bExceptionOnContentLengthMismatch = enabled;
174    }
175
176    /**
177     * Is this writer set to block digest payload.
178     * @return boolean indicating payload block digesting
179     */
180    /*
181    public boolean digestBlock() {
182        return bDigestBlock;
183    }
184    */
185
186    /**
187     * Set the writers payload block digest mode
188     * @param enabled boolean indicating digest on/off
189     */
190    /*
191    public void setDigestBlock(boolean enabled) {
192        bDigestBlock = enabled;
193    }
194    */
195
196    /**
197     * Close WARC writer and free its resources.
198     * @throws IOException if an i/o exception occurs while closing the writer
199     */
200    public abstract void close() throws IOException;
201
202    /**
203     * Close the WARC record in an implementation specific way.
204     * @throws IOException if an i/o exception occurs while closing the record
205     */
206    public abstract void closeRecord() throws IOException;
207
208    /**
209     * Closes the WARC record by writing two newlines and comparing the amount of
210     * payload data streamed with the content-length supplied with the header.
211     * @throws IOException if an i/o exception occurs while closing the record
212     */
213    protected void closeRecord_impl() throws IOException {
214        Diagnosis diagnosis = null;
215        out.write(WarcConstants.endMark);
216        out.flush();
217        if (headerContentLength == null) {
218            diagnosis = new Diagnosis(
219                    DiagnosisType.ERROR_EXPECTED,
220                    "'" + WarcConstants.FN_CONTENT_LENGTH + "' header",
221                    "Mandatory!");
222        } else {
223            if (headerContentLength != payloadWrittenTotal) {
224                diagnosis = new Diagnosis(
225                        DiagnosisType.INVALID_EXPECTED,
226                        "'" + WarcConstants.FN_CONTENT_LENGTH + "' header",
227                        Long.toString(payloadWrittenTotal),
228                        headerContentLength.toString());
229            }
230        }
231        if (diagnosis != null) {
232            if (header != null) {
233                header.diagnostics.addError(diagnosis);
234            } else {
235                diagnostics.addError(diagnosis);
236            }
237            if (bExceptionOnContentLengthMismatch) {
238                throw new IllegalStateException("Payload size does not match content-length!");
239            }
240        }
241        header = null;
242        headerContentLength = null;
243    }
244
245    /**
246     * Write a raw WARC header to the WARC output stream. Closes any previously
247     * written record that has not been closed prior to this call.
248     * Errors and warnings are reported on the writers diagnostics object.
249     * @param header_bytes raw WARC header to output
250     * @param contentLength the expected content-length to be written and validated
251     * @throws IOException if an i/o exception occurs while writing header data
252     */
253    public void writeRawHeader(byte[] header_bytes, Long contentLength) throws IOException {
254        if (header_bytes == null) {
255            throw new IllegalArgumentException(
256                    "The 'header_bytes' parameter is null!");
257        }
258        if (contentLength != null && contentLength < 0) {
259            throw new IllegalArgumentException(
260                    "The 'contentLength' parameter is negative!");
261        }
262        if (state == S_HEADER_WRITTEN) {
263            throw new IllegalStateException("Headers written back to back!");
264        } else if (state == S_PAYLOAD_WRITTEN) {
265            closeRecord_impl();
266        }
267        out.write(header_bytes);
268        state = S_HEADER_WRITTEN;
269        header = null;
270        headerContentLength = contentLength;
271        payloadWrittenTotal = 0;
272    }
273
274    /**
275     * Write a WARC header to the WARC output stream.
276     * Errors and warnings are reported on the records diagnostics object.
277     * @param record WARC record to output
278     * @return byte array version of header as it was written
279     * @throws IOException if an i/o exception occurs while writing header data
280     */
281    public abstract byte[] writeHeader(WarcRecord record) throws IOException;
282
283    /**
284     * Write a WARC header to the WARC output stream.
285     * The WARC header is not required to be valid.
286     * Errors and warnings are reported on the records diagnostics object.
287     * @param record WARC record to output
288     * @return byte array version of header as it was written
289     * @throws IOException if an i/o exception occurs while writing header data
290     */
291    protected byte[] writeHeader_impl(WarcRecord record) throws IOException {
292        header = record.header;
293        headerContentLength = header.contentLength;
294        if (headerContentLength == null && header.contentLengthStr != null) {
295            try {
296                headerContentLength = Long.parseLong(header.contentLengthStr);
297            } catch (NumberFormatException e) {
298                // TODO Add warning...
299            }
300        }
301        ByteArrayOutputStream outBuf = new ByteArrayOutputStream();
302        /*
303         * Version Line
304         */
305        byte[] magicVersion = (WarcConstants.WARC_MAGIC_HEADER + header.major + "." + header.minor + "\r\n").getBytes();
306        outBuf.write(magicVersion);
307        /*
308         * Warc-Type
309         */
310        String warcTypeStr = null;
311        if (header.warcTypeIdx != null) {
312            if (header.warcTypeIdx > 0
313                && header.warcTypeIdx < WarcConstants.RT_IDX_STRINGS.length) {
314                warcTypeStr = WarcConstants.RT_IDX_STRINGS[header.warcTypeIdx];
315            } else {
316                // Warning...
317            }
318        }
319        if (warcTypeStr == null) {
320            warcTypeStr = header.warcTypeStr;
321        }
322        if (warcTypeStr != null) {
323            outBuf.write(WarcConstants.FN_WARC_TYPE.getBytes());
324            outBuf.write(": ".getBytes());
325            outBuf.write(warcTypeStr.getBytes());
326            outBuf.write("\r\n".getBytes());
327        }
328        /*
329         * Warc-Record-Id
330         */
331        String warcRecordIdStr = null;
332        if (header.warcRecordIdUri != null) {
333            warcRecordIdStr = header.warcRecordIdUri.toString();
334        } else if (header.warcRecordIdStr != null) {
335            warcRecordIdStr = header.warcRecordIdStr;
336            // Warning...
337        }
338        if (warcRecordIdStr != null) {
339            outBuf.write(WarcConstants.FN_WARC_RECORD_ID.getBytes());
340            outBuf.write(": <".getBytes());
341            outBuf.write(warcRecordIdStr.getBytes());
342            outBuf.write(">\r\n".getBytes());
343        }
344        /*
345         * Warc-Date
346         */
347        String warcDateStr = null;
348        if (header.warcDate != null) {
349            warcDateStr = warcDateFormat.format(header.warcDate);
350        } else if (header.warcDateStr != null) {
351            warcDateStr = header.warcDateStr;
352            // Warning...
353        }
354        if (warcDateStr != null) {
355            outBuf.write(WarcConstants.FN_WARC_DATE.getBytes());
356            outBuf.write(": ".getBytes());
357            outBuf.write(warcDateStr.getBytes());
358            outBuf.write("\r\n".getBytes());
359        }
360        /*
361         * Content-Length
362         */
363        String contentLengthStr = null;
364        if (header.contentLength != null) {
365            contentLengthStr = header.contentLength.toString();
366        } else if (header.contentLengthStr != null) {
367            contentLengthStr = header.contentLengthStr;
368            // Warning...
369        }
370        if (contentLengthStr != null) {
371            outBuf.write(WarcConstants.FN_CONTENT_LENGTH.getBytes());
372            outBuf.write(": ".getBytes());
373            outBuf.write(contentLengthStr.getBytes());
374            outBuf.write("\r\n".getBytes());
375        }
376        /*
377         * Content-Type
378         */
379        String contentTypeStr = null;
380        if (header.contentType != null) {
381            contentTypeStr = header.contentType.toString();
382        } else if (header.contentTypeStr != null) {
383            contentTypeStr = header.contentTypeStr;
384            // Warning...
385        }
386        if (contentTypeStr != null) {
387            outBuf.write(WarcConstants.FN_CONTENT_TYPE.getBytes());
388            outBuf.write(": ".getBytes());
389            outBuf.write(contentTypeStr.getBytes());
390            outBuf.write("\r\n".getBytes());
391        }
392        /*
393         * Warc-Concurrent-To
394         */
395        WarcConcurrentTo warcConcurrentTo;
396        String warcConcurrentToStr;
397        if (header.warcConcurrentToList != null) {
398            for (int i=0; i<header.warcConcurrentToList.size(); ++i) {
399                warcConcurrentTo = header.warcConcurrentToList.get(i);
400                warcConcurrentToStr = null;
401                if (warcConcurrentTo.warcConcurrentToUri != null) {
402                    warcConcurrentToStr = warcConcurrentTo.warcConcurrentToUri.toString();
403                } else if (warcConcurrentTo.warcConcurrentToStr != null) {
404                    warcConcurrentToStr = warcConcurrentTo.warcConcurrentToStr;
405                    // Warning...
406                }
407                if (warcConcurrentToStr != null) {
408                    outBuf.write(WarcConstants.FN_WARC_CONCURRENT_TO.getBytes());
409                    outBuf.write(": <".getBytes());
410                    outBuf.write(warcConcurrentToStr.getBytes());
411                    outBuf.write(">\r\n".getBytes());
412                }
413            }
414        }
415        /*
416         * Warc-Block-Digest
417         */
418        String warcBlockDigestStr = null;
419        if (header.warcBlockDigest != null) {
420            warcBlockDigestStr = header.warcBlockDigest.toString();
421        } else if (header.warcBlockDigestStr != null) {
422            warcBlockDigestStr = header.warcBlockDigestStr;
423            // Warning...
424        }
425        if (warcBlockDigestStr != null) {
426            outBuf.write(WarcConstants.FN_WARC_BLOCK_DIGEST.getBytes());
427            outBuf.write(": ".getBytes());
428            outBuf.write(warcBlockDigestStr.getBytes());
429            outBuf.write("\r\n".getBytes());
430        }
431        /*
432         * Warc-Payload-Digest
433         */
434        String warcPayloadDigestStr = null;
435        if (header.warcPayloadDigest != null) {
436            warcPayloadDigestStr = header.warcPayloadDigest.toString();
437        } else if (header.warcPayloadDigestStr != null) {
438            warcPayloadDigestStr = header.warcPayloadDigestStr;
439            // Warning...
440        }
441        if (warcPayloadDigestStr != null) {
442            outBuf.write(WarcConstants.FN_WARC_PAYLOAD_DIGEST.getBytes());
443            outBuf.write(": ".getBytes());
444            outBuf.write(warcPayloadDigestStr.getBytes());
445            outBuf.write("\r\n".getBytes());
446        }
447        /*
448         * Warc-Ip-Address
449         */
450        String warcIpAddress = null;
451        if (header.warcInetAddress != null) {
452            warcIpAddress = header.warcInetAddress.getHostAddress();
453        } else if (header.warcIpAddress != null) {
454            warcIpAddress = header.warcIpAddress;
455            // Warning...
456        }
457        if (warcIpAddress != null) {
458            outBuf.write(WarcConstants.FN_WARC_IP_ADDRESS.getBytes());
459            outBuf.write(": ".getBytes());
460            outBuf.write(warcIpAddress.getBytes());
461            outBuf.write("\r\n".getBytes());
462        }
463        /*
464         * Warc-Refers-To
465         */
466        String warcRefersToUriStr = null;
467        if (header.warcRefersToUri != null) {
468            warcRefersToUriStr = header.warcRefersToUri.toString();
469        } else if (header.warcRefersToStr != null) {
470            warcRefersToUriStr = header.warcRefersToStr;
471            // Warning...
472        }
473        if (warcRefersToUriStr != null) {
474            outBuf.write(WarcConstants.FN_WARC_REFERS_TO.getBytes());
475            outBuf.write(": <".getBytes());
476            outBuf.write(warcRefersToUriStr.getBytes());
477            outBuf.write(">\r\n".getBytes());
478        }
479        /*
480         * Warc-Target-Uri
481         */
482        String warcTargetUriStr = null;
483        if (header.warcTargetUriUri != null) {
484            warcTargetUriStr = header.warcTargetUriUri.toString();
485        } else if (header.warcTargetUriStr != null) {
486            warcTargetUriStr = header.warcTargetUriStr;
487            // Warning...
488        }
489        if (warcTargetUriStr != null) {
490            outBuf.write(WarcConstants.FN_WARC_TARGET_URI.getBytes());
491            outBuf.write(": ".getBytes());
492            outBuf.write(warcTargetUriStr.getBytes());
493            outBuf.write("\r\n".getBytes());
494        }
495        /*
496         * Warc-Truncated
497         */
498        String warcTruncatedStr = null;
499        if (header.warcTruncatedIdx != null) {
500            if (header.warcTruncatedIdx > 0
501                    && header.warcTruncatedIdx < WarcConstants.TT_IDX_STRINGS.length) {
502                warcTruncatedStr = WarcConstants.TT_IDX_STRINGS[header.warcTruncatedIdx];
503            } else {
504                // Warning...
505            }
506        }
507        if (warcTruncatedStr == null) {
508            warcTruncatedStr = header.warcTruncatedStr;
509        }
510        if (warcTruncatedStr != null) {
511            outBuf.write(WarcConstants.FN_WARC_TRUNCATED.getBytes());
512            outBuf.write(": ".getBytes());
513            outBuf.write(warcTruncatedStr.getBytes());
514            outBuf.write("\r\n".getBytes());
515        }
516        /*
517         * Warc-Warcinfo-Id
518         */
519        String warcWarcInfoIdStr = null;
520        if (header.warcWarcinfoIdUri != null) {
521            warcWarcInfoIdStr = header.warcWarcinfoIdUri.toString();
522        } else if (header.warcWarcinfoIdStr != null) {
523            warcWarcInfoIdStr = header.warcWarcinfoIdStr;
524            // Warning...
525        }
526        if (warcWarcInfoIdStr != null) {
527            outBuf.write(WarcConstants.FN_WARC_WARCINFO_ID.getBytes());
528            outBuf.write(": <".getBytes());
529            outBuf.write(warcWarcInfoIdStr.getBytes());
530            outBuf.write(">\r\n".getBytes());
531        }
532        /*
533         * Warc-Filename
534         */
535        if (header.warcFilename != null) {
536            outBuf.write(WarcConstants.FN_WARC_FILENAME.getBytes());
537            outBuf.write(": ".getBytes());
538            outBuf.write(header.warcFilename.getBytes());
539            outBuf.write("\r\n".getBytes());
540        }
541        /*
542         * Warc-Profile
543         */
544        String warcProfileStr = null;
545        if (header.warcProfileUri != null) {
546            warcProfileStr = header.warcProfileUri.toString();
547        } else if (header.warcProfileIdx != null) {
548            if (header.warcProfileIdx > 0
549                    && header.warcProfileIdx < WarcConstants.P_IDX_STRINGS.length) {
550                warcProfileStr = WarcConstants.P_IDX_STRINGS[header.warcProfileIdx];
551            } else {
552                // Warning...
553            }
554        }
555        if (warcProfileStr == null) {
556            warcProfileStr = header.warcProfileStr;
557            // Warning...
558        }
559        if (warcProfileStr != null) {
560            outBuf.write(WarcConstants.FN_WARC_PROFILE.getBytes());
561            outBuf.write(": ".getBytes());
562            outBuf.write(warcProfileStr.getBytes());
563            outBuf.write("\r\n".getBytes());
564        }
565        /*
566         * Warc-Identified-Payload-Type
567         */
568        String warcIdentifiedPayloadTypeStr = null;
569        if (header.warcIdentifiedPayloadType != null) {
570            warcIdentifiedPayloadTypeStr = header.warcIdentifiedPayloadType.toString();
571        } else if (header.warcIdentifiedPayloadTypeStr != null) {
572            warcIdentifiedPayloadTypeStr = header.warcIdentifiedPayloadTypeStr;
573            // Warning...
574        }
575        if (warcIdentifiedPayloadTypeStr != null) {
576            outBuf.write(WarcConstants.FN_WARC_IDENTIFIED_PAYLOAD_TYPE.getBytes());
577            outBuf.write(": ".getBytes());
578            outBuf.write(warcIdentifiedPayloadTypeStr.getBytes());
579            outBuf.write("\r\n".getBytes());
580        }
581        /*
582         * Warc-Segment-Number
583         */
584        String warcSegmentNumberStr = null;
585        if (header.warcSegmentNumber != null) {
586            warcSegmentNumberStr = header.warcSegmentNumber.toString();
587        } else if (header.warcSegmentNumberStr != null) {
588            warcSegmentNumberStr = header.warcSegmentNumberStr;
589            // Warning...
590        }
591        if (warcSegmentNumberStr != null) {
592            outBuf.write(WarcConstants.FN_WARC_SEGMENT_NUMBER.getBytes());
593            outBuf.write(": ".getBytes());
594            outBuf.write(warcSegmentNumberStr.getBytes());
595            outBuf.write("\r\n".getBytes());
596        }
597        /*
598         * Warc-Segment-Origin-Id
599         */
600        String warcSegmentOriginIdStr = null;
601        if (header.warcSegmentOriginIdUrl != null) {
602            warcSegmentOriginIdStr = header.warcSegmentOriginIdUrl.toString();
603        } else if (header.warcSegmentOriginIdStr != null) {
604            warcSegmentOriginIdStr = header.warcSegmentOriginIdStr;
605            // Warning...
606        }
607        if (warcSegmentOriginIdStr != null) {
608            outBuf.write(WarcConstants.FN_WARC_SEGMENT_ORIGIN_ID.getBytes());
609            outBuf.write(": <".getBytes());
610            outBuf.write(warcSegmentOriginIdStr.getBytes());
611            outBuf.write(">\r\n".getBytes());
612        }
613        /*
614         * Warc-Segment-Total-Length
615         */
616        String warcSegmentTotalLengthStr = null;
617        if (header.warcSegmentTotalLength != null) {
618            warcSegmentTotalLengthStr = header.warcSegmentTotalLength.toString();
619        } else if (header.warcSegmentTotalLengthStr != null) {
620            warcSegmentTotalLengthStr = header.warcSegmentTotalLengthStr;
621            // Warning...
622        }
623        if (warcSegmentTotalLengthStr != null) {
624            outBuf.write(WarcConstants.FN_WARC_SEGMENT_TOTAL_LENGTH.getBytes());
625            outBuf.write(": ".getBytes());
626            outBuf.write(warcSegmentTotalLengthStr.getBytes());
627            outBuf.write("\r\n".getBytes());
628        }
629        /*
630         * WARC-Refers-To-Target-URI
631         */
632        String warcRefersToTargetUriStr = null;
633        if (header.warcRefersToTargetUriUri != null) {
634            warcRefersToTargetUriStr = header.warcRefersToTargetUriUri.toString();
635        } else if (header.warcRefersToTargetUriStr != null) {
636            warcRefersToTargetUriStr = header.warcRefersToTargetUriStr;
637        }
638        if (warcRefersToTargetUriStr != null) {
639            outBuf.write(WarcConstants.FN_WARC_REFERS_TO_TARGET_URI.getBytes());
640            outBuf.write(": ".getBytes());
641            outBuf.write(warcRefersToTargetUriStr.getBytes());
642            outBuf.write("\r\n".getBytes());
643        }
644        /*
645         * Warc-Refers-To-Date
646         */
647        String warcRefersToDateStr = null;
648        if (header.warcRefersToDate != null) {
649            warcRefersToDateStr = warcDateFormat.format(header.warcRefersToDate);
650        } else if (header.warcRefersToDateStr != null) {
651            warcRefersToDateStr = header.warcRefersToDateStr;
652            // Warning...
653        }
654        if (warcRefersToDateStr != null) {
655            outBuf.write(WarcConstants.FN_WARC_REFERS_TO_DATE.getBytes());
656            outBuf.write(": ".getBytes());
657            outBuf.write(warcRefersToDateStr.getBytes());
658            outBuf.write("\r\n".getBytes());
659        }
660        /*
661         * End Of Header
662         */
663        outBuf.write("\r\n".getBytes());
664        byte[] headerBytes = outBuf.toByteArray();
665        out.write(headerBytes);
666        state = S_HEADER_WRITTEN;
667        payloadWrittenTotal = 0;
668        return headerBytes;
669    }
670
671    /**
672     * Stream the content of an input stream to the payload content.
673     * @param in input stream containing payload data
674     * @return number of bytes written during method invocation
675     * @throws IOException if an i/o exception occurs while writing payload data
676     */
677    public long streamPayload(InputStream in) throws IOException {
678        if (in == null) {
679            throw new IllegalArgumentException(
680                    "The 'in' parameter is null!");
681        }
682        if (state != S_HEADER_WRITTEN && state != S_PAYLOAD_WRITTEN) {
683            throw new IllegalStateException("Write a header before writing payload!");
684        }
685        long written = 0;
686        int read = 0;
687        while (read != -1) {
688            out.write(stream_copy_buffer, 0, read);
689            written += read;
690            read = in.read(stream_copy_buffer);
691        }
692        state = S_PAYLOAD_WRITTEN;
693        payloadWrittenTotal += written;
694        return written;
695    }
696
697    /**
698     * Append the content of a byte array to the payload content.
699     * @param b byte array with data to be written
700     * @return number of bytes written during method invocation
701     * @throws IOException if an i/o exception occurs while writing payload data
702     */
703    public long writePayload(byte[] b) throws IOException {
704        if (state != S_HEADER_WRITTEN && state != S_PAYLOAD_WRITTEN) {
705            throw new IllegalStateException("Write a header before writing payload!");
706        }
707        out.write(b);
708        state = S_PAYLOAD_WRITTEN;
709        payloadWrittenTotal += b.length;
710        return b.length;
711    }
712
713    /**
714     * Append the partial content of a byte array to the payload content.
715     * @param b byte array with partial data to be written
716     * @param offset offset to data to be written
717     * @param len length of data to be written
718     * @return number of bytes written during method invocation
719     * @throws IOException if an i/o exception occurs while writing payload data
720     */
721    public long writePayload(byte[] b, int offset, int len) throws IOException {
722        if (state != S_HEADER_WRITTEN && state != S_PAYLOAD_WRITTEN) {
723            throw new IllegalStateException("Write a header before writing payload!");
724        }
725        out.write(b, offset, len);
726        state = S_PAYLOAD_WRITTEN;
727        payloadWrittenTotal += len;
728        return len;
729    }
730
731}