001/** 002 * Java Web Archive Toolkit - Software to read and validate ARC, WARC 003 * and GZip files. (http://jwat.org/) 004 * Copyright 2011-2012 Netarkivet.dk (http://netarkivet.dk/) 005 * 006 * Licensed under the Apache License, Version 2.0 (the "License"); 007 * you may not use this file except in compliance with the License. 008 * You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.jwat.warc; 019 020import org.jwat.common.Diagnosis; 021import org.jwat.common.DiagnosisType; 022import org.jwat.common.Diagnostics; 023import org.jwat.common.UriProfile; 024 025import java.io.ByteArrayOutputStream; 026import java.io.Closeable; 027import java.io.IOException; 028import java.io.InputStream; 029import java.io.OutputStream; 030import java.text.DateFormat; 031 032/** 033 * Base class for WARC writer implementations. 034 * 035 * @author nicl 036 */ 037public abstract class WarcWriter implements Closeable { 038 039 /** State after writer has been constructed and before records have been written. */ 040 protected static final int S_INIT = 0; 041 042 /** State after header has been written. */ 043 protected static final int S_HEADER_WRITTEN = 1; 044 045 /** State after payload has been written. */ 046 protected static final int S_PAYLOAD_WRITTEN = 2; 047 048 /** State after record has been closed. */ 049 protected static final int S_RECORD_CLOSED = 3; 050 051 /* 052 * Settings. 053 */ 054 055 /** WARC-Target-URI profile. */ 056 protected UriProfile warcTargetUriProfile; 057 058 /** URI profile. */ 059 protected UriProfile uriProfile; 060 061 /** Block Digesting enabled/disabled. */ 062 //protected boolean bDigestBlock = false; 063 064 /** WARC <code>DateFormat</code> as specified by the WARC ISO standard. */ 065 protected DateFormat warcDateFormat; 066 067 /** WARC field parser used. */ 068 protected WarcFieldParsers fieldParsers; 069 070 /** Buffer used by streamPayload() to copy from one stream to another. */ 071 protected byte[] stream_copy_buffer; 072 073 /** Configuration for throwing exception on content-length mismatch. 074 * (Default is true) */ 075 protected boolean bExceptionOnContentLengthMismatch; 076 077 /* 078 * State. 079 */ 080 081 /** Writer level errors and warnings or when writing byte headers. */ 082 public final Diagnostics<Diagnosis> diagnostics = new Diagnostics<Diagnosis>(); 083 084 /** Current state of writer. */ 085 protected int state = S_INIT; 086 087 /** Outputstream used to write WARC records. */ 088 protected OutputStream out; 089 090 /** Current WARC header written. */ 091 protected WarcHeader header; 092 093 /** Content-Length from the WARC header. */ 094 protected Long headerContentLength; 095 096 /** Total bytes written for current record payload. */ 097 protected long payloadWrittenTotal; 098 099 /** 100 * Method used to initialize a readers internal state. 101 * Must be called by all constructors. 102 */ 103 protected void init() { 104 warcTargetUriProfile = UriProfile.RFC3986; 105 uriProfile = UriProfile.RFC3986; 106 warcDateFormat = WarcDateParser.getDateFormat(); 107 fieldParsers = new WarcFieldParsers(); 108 stream_copy_buffer = new byte[8192]; 109 bExceptionOnContentLengthMismatch = true; 110 } 111 112 /** 113 * Is this writer producing compressed output. 114 * @return boolean indicating whether compressed output is produced 115 */ 116 public abstract boolean isCompressed(); 117 118 /** 119 * Set the URI profile used to validate WARC-Target URIs. 120 * If null, the uriProfile is set to RCF3986. 121 * @param uriProfile URI profile to use 122 */ 123 public void setWarcTargetUriProfile(UriProfile uriProfile) { 124 if (uriProfile == null) { 125 uriProfile = UriProfile.RFC3986; 126 } 127 this.warcTargetUriProfile = uriProfile; 128 } 129 130 /** 131 * Get the URI profile used to validate WARC-Target URIs. 132 * @return the URI profile used to validate WARC-Target URIs 133 */ 134 public UriProfile getWarcTargetUriProfile() { 135 return warcTargetUriProfile; 136 } 137 138 /** 139 * Set the URI profile used to validate URIs. 140 * If null, the uriProfile is set to RCF3986. 141 * @param uriProfile URI profile to use 142 */ 143 public void setUriProfile(UriProfile uriProfile) { 144 if (uriProfile == null) { 145 uriProfile = UriProfile.RFC3986; 146 } 147 this.uriProfile = uriProfile; 148 } 149 150 /** 151 * Get the URI profile used to validate URIs. 152 * @return the URI profile used to validate URIs 153 */ 154 public UriProfile getUriProfile() { 155 return uriProfile; 156 } 157 158 /** 159 * Does this writer throw an exception if the content-length does not match 160 * the payload amount written. 161 * @return boolean indicating if an exception is thrown or not 162 */ 163 public boolean exceptionOnContentLengthMismatch() { 164 return bExceptionOnContentLengthMismatch; 165 } 166 167 /** 168 * Tell the writer what to do in case of mismatch between content-length 169 * and amount payload written. 170 * @param enabled boolean indicating exception throwing on/off 171 */ 172 public void setExceptionOnContentLengthMismatch(boolean enabled) { 173 bExceptionOnContentLengthMismatch = enabled; 174 } 175 176 /** 177 * Is this writer set to block digest payload. 178 * @return boolean indicating payload block digesting 179 */ 180 /* 181 public boolean digestBlock() { 182 return bDigestBlock; 183 } 184 */ 185 186 /** 187 * Set the writers payload block digest mode 188 * @param enabled boolean indicating digest on/off 189 */ 190 /* 191 public void setDigestBlock(boolean enabled) { 192 bDigestBlock = enabled; 193 } 194 */ 195 196 /** 197 * Close WARC writer and free its resources. 198 * @throws IOException if an i/o exception occurs while closing the writer 199 */ 200 public abstract void close() throws IOException; 201 202 /** 203 * Close the WARC record in an implementation specific way. 204 * @throws IOException if an i/o exception occurs while closing the record 205 */ 206 public abstract void closeRecord() throws IOException; 207 208 /** 209 * Closes the WARC record by writing two newlines and comparing the amount of 210 * payload data streamed with the content-length supplied with the header. 211 * @throws IOException if an i/o exception occurs while closing the record 212 */ 213 protected void closeRecord_impl() throws IOException { 214 Diagnosis diagnosis = null; 215 out.write(WarcConstants.endMark); 216 out.flush(); 217 if (headerContentLength == null) { 218 diagnosis = new Diagnosis( 219 DiagnosisType.ERROR_EXPECTED, 220 "'" + WarcConstants.FN_CONTENT_LENGTH + "' header", 221 "Mandatory!"); 222 } else { 223 if (headerContentLength != payloadWrittenTotal) { 224 diagnosis = new Diagnosis( 225 DiagnosisType.INVALID_EXPECTED, 226 "'" + WarcConstants.FN_CONTENT_LENGTH + "' header", 227 Long.toString(payloadWrittenTotal), 228 headerContentLength.toString()); 229 } 230 } 231 if (diagnosis != null) { 232 if (header != null) { 233 header.diagnostics.addError(diagnosis); 234 } else { 235 diagnostics.addError(diagnosis); 236 } 237 if (bExceptionOnContentLengthMismatch) { 238 throw new IllegalStateException("Payload size does not match content-length!"); 239 } 240 } 241 header = null; 242 headerContentLength = null; 243 } 244 245 /** 246 * Write a raw WARC header to the WARC output stream. Closes any previously 247 * written record that has not been closed prior to this call. 248 * Errors and warnings are reported on the writers diagnostics object. 249 * @param header_bytes raw WARC header to output 250 * @param contentLength the expected content-length to be written and validated 251 * @throws IOException if an i/o exception occurs while writing header data 252 */ 253 public void writeRawHeader(byte[] header_bytes, Long contentLength) throws IOException { 254 if (header_bytes == null) { 255 throw new IllegalArgumentException( 256 "The 'header_bytes' parameter is null!"); 257 } 258 if (contentLength != null && contentLength < 0) { 259 throw new IllegalArgumentException( 260 "The 'contentLength' parameter is negative!"); 261 } 262 if (state == S_HEADER_WRITTEN) { 263 throw new IllegalStateException("Headers written back to back!"); 264 } else if (state == S_PAYLOAD_WRITTEN) { 265 closeRecord_impl(); 266 } 267 out.write(header_bytes); 268 state = S_HEADER_WRITTEN; 269 header = null; 270 headerContentLength = contentLength; 271 payloadWrittenTotal = 0; 272 } 273 274 /** 275 * Write a WARC header to the WARC output stream. 276 * Errors and warnings are reported on the records diagnostics object. 277 * @param record WARC record to output 278 * @return byte array version of header as it was written 279 * @throws IOException if an i/o exception occurs while writing header data 280 */ 281 public abstract byte[] writeHeader(WarcRecord record) throws IOException; 282 283 /** 284 * Write a WARC header to the WARC output stream. 285 * The WARC header is not required to be valid. 286 * Errors and warnings are reported on the records diagnostics object. 287 * @param record WARC record to output 288 * @return byte array version of header as it was written 289 * @throws IOException if an i/o exception occurs while writing header data 290 */ 291 protected byte[] writeHeader_impl(WarcRecord record) throws IOException { 292 header = record.header; 293 headerContentLength = header.contentLength; 294 if (headerContentLength == null && header.contentLengthStr != null) { 295 try { 296 headerContentLength = Long.parseLong(header.contentLengthStr); 297 } catch (NumberFormatException e) { 298 // TODO Add warning... 299 } 300 } 301 ByteArrayOutputStream outBuf = new ByteArrayOutputStream(); 302 /* 303 * Version Line 304 */ 305 byte[] magicVersion = (WarcConstants.WARC_MAGIC_HEADER + header.major + "." + header.minor + "\r\n").getBytes(); 306 outBuf.write(magicVersion); 307 /* 308 * Warc-Type 309 */ 310 String warcTypeStr = null; 311 if (header.warcTypeIdx != null) { 312 if (header.warcTypeIdx > 0 313 && header.warcTypeIdx < WarcConstants.RT_IDX_STRINGS.length) { 314 warcTypeStr = WarcConstants.RT_IDX_STRINGS[header.warcTypeIdx]; 315 } else { 316 // Warning... 317 } 318 } 319 if (warcTypeStr == null) { 320 warcTypeStr = header.warcTypeStr; 321 } 322 if (warcTypeStr != null) { 323 outBuf.write(WarcConstants.FN_WARC_TYPE.getBytes()); 324 outBuf.write(": ".getBytes()); 325 outBuf.write(warcTypeStr.getBytes()); 326 outBuf.write("\r\n".getBytes()); 327 } 328 /* 329 * Warc-Record-Id 330 */ 331 String warcRecordIdStr = null; 332 if (header.warcRecordIdUri != null) { 333 warcRecordIdStr = header.warcRecordIdUri.toString(); 334 } else if (header.warcRecordIdStr != null) { 335 warcRecordIdStr = header.warcRecordIdStr; 336 // Warning... 337 } 338 if (warcRecordIdStr != null) { 339 outBuf.write(WarcConstants.FN_WARC_RECORD_ID.getBytes()); 340 outBuf.write(": <".getBytes()); 341 outBuf.write(warcRecordIdStr.getBytes()); 342 outBuf.write(">\r\n".getBytes()); 343 } 344 /* 345 * Warc-Date 346 */ 347 String warcDateStr = null; 348 if (header.warcDate != null) { 349 warcDateStr = warcDateFormat.format(header.warcDate); 350 } else if (header.warcDateStr != null) { 351 warcDateStr = header.warcDateStr; 352 // Warning... 353 } 354 if (warcDateStr != null) { 355 outBuf.write(WarcConstants.FN_WARC_DATE.getBytes()); 356 outBuf.write(": ".getBytes()); 357 outBuf.write(warcDateStr.getBytes()); 358 outBuf.write("\r\n".getBytes()); 359 } 360 /* 361 * Content-Length 362 */ 363 String contentLengthStr = null; 364 if (header.contentLength != null) { 365 contentLengthStr = header.contentLength.toString(); 366 } else if (header.contentLengthStr != null) { 367 contentLengthStr = header.contentLengthStr; 368 // Warning... 369 } 370 if (contentLengthStr != null) { 371 outBuf.write(WarcConstants.FN_CONTENT_LENGTH.getBytes()); 372 outBuf.write(": ".getBytes()); 373 outBuf.write(contentLengthStr.getBytes()); 374 outBuf.write("\r\n".getBytes()); 375 } 376 /* 377 * Content-Type 378 */ 379 String contentTypeStr = null; 380 if (header.contentType != null) { 381 contentTypeStr = header.contentType.toString(); 382 } else if (header.contentTypeStr != null) { 383 contentTypeStr = header.contentTypeStr; 384 // Warning... 385 } 386 if (contentTypeStr != null) { 387 outBuf.write(WarcConstants.FN_CONTENT_TYPE.getBytes()); 388 outBuf.write(": ".getBytes()); 389 outBuf.write(contentTypeStr.getBytes()); 390 outBuf.write("\r\n".getBytes()); 391 } 392 /* 393 * Warc-Concurrent-To 394 */ 395 WarcConcurrentTo warcConcurrentTo; 396 String warcConcurrentToStr; 397 if (header.warcConcurrentToList != null) { 398 for (int i=0; i<header.warcConcurrentToList.size(); ++i) { 399 warcConcurrentTo = header.warcConcurrentToList.get(i); 400 warcConcurrentToStr = null; 401 if (warcConcurrentTo.warcConcurrentToUri != null) { 402 warcConcurrentToStr = warcConcurrentTo.warcConcurrentToUri.toString(); 403 } else if (warcConcurrentTo.warcConcurrentToStr != null) { 404 warcConcurrentToStr = warcConcurrentTo.warcConcurrentToStr; 405 // Warning... 406 } 407 if (warcConcurrentToStr != null) { 408 outBuf.write(WarcConstants.FN_WARC_CONCURRENT_TO.getBytes()); 409 outBuf.write(": <".getBytes()); 410 outBuf.write(warcConcurrentToStr.getBytes()); 411 outBuf.write(">\r\n".getBytes()); 412 } 413 } 414 } 415 /* 416 * Warc-Block-Digest 417 */ 418 String warcBlockDigestStr = null; 419 if (header.warcBlockDigest != null) { 420 warcBlockDigestStr = header.warcBlockDigest.toString(); 421 } else if (header.warcBlockDigestStr != null) { 422 warcBlockDigestStr = header.warcBlockDigestStr; 423 // Warning... 424 } 425 if (warcBlockDigestStr != null) { 426 outBuf.write(WarcConstants.FN_WARC_BLOCK_DIGEST.getBytes()); 427 outBuf.write(": ".getBytes()); 428 outBuf.write(warcBlockDigestStr.getBytes()); 429 outBuf.write("\r\n".getBytes()); 430 } 431 /* 432 * Warc-Payload-Digest 433 */ 434 String warcPayloadDigestStr = null; 435 if (header.warcPayloadDigest != null) { 436 warcPayloadDigestStr = header.warcPayloadDigest.toString(); 437 } else if (header.warcPayloadDigestStr != null) { 438 warcPayloadDigestStr = header.warcPayloadDigestStr; 439 // Warning... 440 } 441 if (warcPayloadDigestStr != null) { 442 outBuf.write(WarcConstants.FN_WARC_PAYLOAD_DIGEST.getBytes()); 443 outBuf.write(": ".getBytes()); 444 outBuf.write(warcPayloadDigestStr.getBytes()); 445 outBuf.write("\r\n".getBytes()); 446 } 447 /* 448 * Warc-Ip-Address 449 */ 450 String warcIpAddress = null; 451 if (header.warcInetAddress != null) { 452 warcIpAddress = header.warcInetAddress.getHostAddress(); 453 } else if (header.warcIpAddress != null) { 454 warcIpAddress = header.warcIpAddress; 455 // Warning... 456 } 457 if (warcIpAddress != null) { 458 outBuf.write(WarcConstants.FN_WARC_IP_ADDRESS.getBytes()); 459 outBuf.write(": ".getBytes()); 460 outBuf.write(warcIpAddress.getBytes()); 461 outBuf.write("\r\n".getBytes()); 462 } 463 /* 464 * Warc-Refers-To 465 */ 466 String warcRefersToUriStr = null; 467 if (header.warcRefersToUri != null) { 468 warcRefersToUriStr = header.warcRefersToUri.toString(); 469 } else if (header.warcRefersToStr != null) { 470 warcRefersToUriStr = header.warcRefersToStr; 471 // Warning... 472 } 473 if (warcRefersToUriStr != null) { 474 outBuf.write(WarcConstants.FN_WARC_REFERS_TO.getBytes()); 475 outBuf.write(": <".getBytes()); 476 outBuf.write(warcRefersToUriStr.getBytes()); 477 outBuf.write(">\r\n".getBytes()); 478 } 479 /* 480 * Warc-Target-Uri 481 */ 482 String warcTargetUriStr = null; 483 if (header.warcTargetUriUri != null) { 484 warcTargetUriStr = header.warcTargetUriUri.toString(); 485 } else if (header.warcTargetUriStr != null) { 486 warcTargetUriStr = header.warcTargetUriStr; 487 // Warning... 488 } 489 if (warcTargetUriStr != null) { 490 outBuf.write(WarcConstants.FN_WARC_TARGET_URI.getBytes()); 491 outBuf.write(": ".getBytes()); 492 outBuf.write(warcTargetUriStr.getBytes()); 493 outBuf.write("\r\n".getBytes()); 494 } 495 /* 496 * Warc-Truncated 497 */ 498 String warcTruncatedStr = null; 499 if (header.warcTruncatedIdx != null) { 500 if (header.warcTruncatedIdx > 0 501 && header.warcTruncatedIdx < WarcConstants.TT_IDX_STRINGS.length) { 502 warcTruncatedStr = WarcConstants.TT_IDX_STRINGS[header.warcTruncatedIdx]; 503 } else { 504 // Warning... 505 } 506 } 507 if (warcTruncatedStr == null) { 508 warcTruncatedStr = header.warcTruncatedStr; 509 } 510 if (warcTruncatedStr != null) { 511 outBuf.write(WarcConstants.FN_WARC_TRUNCATED.getBytes()); 512 outBuf.write(": ".getBytes()); 513 outBuf.write(warcTruncatedStr.getBytes()); 514 outBuf.write("\r\n".getBytes()); 515 } 516 /* 517 * Warc-Warcinfo-Id 518 */ 519 String warcWarcInfoIdStr = null; 520 if (header.warcWarcinfoIdUri != null) { 521 warcWarcInfoIdStr = header.warcWarcinfoIdUri.toString(); 522 } else if (header.warcWarcinfoIdStr != null) { 523 warcWarcInfoIdStr = header.warcWarcinfoIdStr; 524 // Warning... 525 } 526 if (warcWarcInfoIdStr != null) { 527 outBuf.write(WarcConstants.FN_WARC_WARCINFO_ID.getBytes()); 528 outBuf.write(": <".getBytes()); 529 outBuf.write(warcWarcInfoIdStr.getBytes()); 530 outBuf.write(">\r\n".getBytes()); 531 } 532 /* 533 * Warc-Filename 534 */ 535 if (header.warcFilename != null) { 536 outBuf.write(WarcConstants.FN_WARC_FILENAME.getBytes()); 537 outBuf.write(": ".getBytes()); 538 outBuf.write(header.warcFilename.getBytes()); 539 outBuf.write("\r\n".getBytes()); 540 } 541 /* 542 * Warc-Profile 543 */ 544 String warcProfileStr = null; 545 if (header.warcProfileUri != null) { 546 warcProfileStr = header.warcProfileUri.toString(); 547 } else if (header.warcProfileIdx != null) { 548 if (header.warcProfileIdx > 0 549 && header.warcProfileIdx < WarcConstants.P_IDX_STRINGS.length) { 550 warcProfileStr = WarcConstants.P_IDX_STRINGS[header.warcProfileIdx]; 551 } else { 552 // Warning... 553 } 554 } 555 if (warcProfileStr == null) { 556 warcProfileStr = header.warcProfileStr; 557 // Warning... 558 } 559 if (warcProfileStr != null) { 560 outBuf.write(WarcConstants.FN_WARC_PROFILE.getBytes()); 561 outBuf.write(": ".getBytes()); 562 outBuf.write(warcProfileStr.getBytes()); 563 outBuf.write("\r\n".getBytes()); 564 } 565 /* 566 * Warc-Identified-Payload-Type 567 */ 568 String warcIdentifiedPayloadTypeStr = null; 569 if (header.warcIdentifiedPayloadType != null) { 570 warcIdentifiedPayloadTypeStr = header.warcIdentifiedPayloadType.toString(); 571 } else if (header.warcIdentifiedPayloadTypeStr != null) { 572 warcIdentifiedPayloadTypeStr = header.warcIdentifiedPayloadTypeStr; 573 // Warning... 574 } 575 if (warcIdentifiedPayloadTypeStr != null) { 576 outBuf.write(WarcConstants.FN_WARC_IDENTIFIED_PAYLOAD_TYPE.getBytes()); 577 outBuf.write(": ".getBytes()); 578 outBuf.write(warcIdentifiedPayloadTypeStr.getBytes()); 579 outBuf.write("\r\n".getBytes()); 580 } 581 /* 582 * Warc-Segment-Number 583 */ 584 String warcSegmentNumberStr = null; 585 if (header.warcSegmentNumber != null) { 586 warcSegmentNumberStr = header.warcSegmentNumber.toString(); 587 } else if (header.warcSegmentNumberStr != null) { 588 warcSegmentNumberStr = header.warcSegmentNumberStr; 589 // Warning... 590 } 591 if (warcSegmentNumberStr != null) { 592 outBuf.write(WarcConstants.FN_WARC_SEGMENT_NUMBER.getBytes()); 593 outBuf.write(": ".getBytes()); 594 outBuf.write(warcSegmentNumberStr.getBytes()); 595 outBuf.write("\r\n".getBytes()); 596 } 597 /* 598 * Warc-Segment-Origin-Id 599 */ 600 String warcSegmentOriginIdStr = null; 601 if (header.warcSegmentOriginIdUrl != null) { 602 warcSegmentOriginIdStr = header.warcSegmentOriginIdUrl.toString(); 603 } else if (header.warcSegmentOriginIdStr != null) { 604 warcSegmentOriginIdStr = header.warcSegmentOriginIdStr; 605 // Warning... 606 } 607 if (warcSegmentOriginIdStr != null) { 608 outBuf.write(WarcConstants.FN_WARC_SEGMENT_ORIGIN_ID.getBytes()); 609 outBuf.write(": <".getBytes()); 610 outBuf.write(warcSegmentOriginIdStr.getBytes()); 611 outBuf.write(">\r\n".getBytes()); 612 } 613 /* 614 * Warc-Segment-Total-Length 615 */ 616 String warcSegmentTotalLengthStr = null; 617 if (header.warcSegmentTotalLength != null) { 618 warcSegmentTotalLengthStr = header.warcSegmentTotalLength.toString(); 619 } else if (header.warcSegmentTotalLengthStr != null) { 620 warcSegmentTotalLengthStr = header.warcSegmentTotalLengthStr; 621 // Warning... 622 } 623 if (warcSegmentTotalLengthStr != null) { 624 outBuf.write(WarcConstants.FN_WARC_SEGMENT_TOTAL_LENGTH.getBytes()); 625 outBuf.write(": ".getBytes()); 626 outBuf.write(warcSegmentTotalLengthStr.getBytes()); 627 outBuf.write("\r\n".getBytes()); 628 } 629 /* 630 * WARC-Refers-To-Target-URI 631 */ 632 String warcRefersToTargetUriStr = null; 633 if (header.warcRefersToTargetUriUri != null) { 634 warcRefersToTargetUriStr = header.warcRefersToTargetUriUri.toString(); 635 } else if (header.warcRefersToTargetUriStr != null) { 636 warcRefersToTargetUriStr = header.warcRefersToTargetUriStr; 637 } 638 if (warcRefersToTargetUriStr != null) { 639 outBuf.write(WarcConstants.FN_WARC_REFERS_TO_TARGET_URI.getBytes()); 640 outBuf.write(": ".getBytes()); 641 outBuf.write(warcRefersToTargetUriStr.getBytes()); 642 outBuf.write("\r\n".getBytes()); 643 } 644 /* 645 * Warc-Refers-To-Date 646 */ 647 String warcRefersToDateStr = null; 648 if (header.warcRefersToDate != null) { 649 warcRefersToDateStr = warcDateFormat.format(header.warcRefersToDate); 650 } else if (header.warcRefersToDateStr != null) { 651 warcRefersToDateStr = header.warcRefersToDateStr; 652 // Warning... 653 } 654 if (warcRefersToDateStr != null) { 655 outBuf.write(WarcConstants.FN_WARC_REFERS_TO_DATE.getBytes()); 656 outBuf.write(": ".getBytes()); 657 outBuf.write(warcRefersToDateStr.getBytes()); 658 outBuf.write("\r\n".getBytes()); 659 } 660 /* 661 * End Of Header 662 */ 663 outBuf.write("\r\n".getBytes()); 664 byte[] headerBytes = outBuf.toByteArray(); 665 out.write(headerBytes); 666 state = S_HEADER_WRITTEN; 667 payloadWrittenTotal = 0; 668 return headerBytes; 669 } 670 671 /** 672 * Stream the content of an input stream to the payload content. 673 * @param in input stream containing payload data 674 * @return number of bytes written during method invocation 675 * @throws IOException if an i/o exception occurs while writing payload data 676 */ 677 public long streamPayload(InputStream in) throws IOException { 678 if (in == null) { 679 throw new IllegalArgumentException( 680 "The 'in' parameter is null!"); 681 } 682 if (state != S_HEADER_WRITTEN && state != S_PAYLOAD_WRITTEN) { 683 throw new IllegalStateException("Write a header before writing payload!"); 684 } 685 long written = 0; 686 int read = 0; 687 while (read != -1) { 688 out.write(stream_copy_buffer, 0, read); 689 written += read; 690 read = in.read(stream_copy_buffer); 691 } 692 state = S_PAYLOAD_WRITTEN; 693 payloadWrittenTotal += written; 694 return written; 695 } 696 697 /** 698 * Append the content of a byte array to the payload content. 699 * @param b byte array with data to be written 700 * @return number of bytes written during method invocation 701 * @throws IOException if an i/o exception occurs while writing payload data 702 */ 703 public long writePayload(byte[] b) throws IOException { 704 if (state != S_HEADER_WRITTEN && state != S_PAYLOAD_WRITTEN) { 705 throw new IllegalStateException("Write a header before writing payload!"); 706 } 707 out.write(b); 708 state = S_PAYLOAD_WRITTEN; 709 payloadWrittenTotal += b.length; 710 return b.length; 711 } 712 713 /** 714 * Append the partial content of a byte array to the payload content. 715 * @param b byte array with partial data to be written 716 * @param offset offset to data to be written 717 * @param len length of data to be written 718 * @return number of bytes written during method invocation 719 * @throws IOException if an i/o exception occurs while writing payload data 720 */ 721 public long writePayload(byte[] b, int offset, int len) throws IOException { 722 if (state != S_HEADER_WRITTEN && state != S_PAYLOAD_WRITTEN) { 723 throw new IllegalStateException("Write a header before writing payload!"); 724 } 725 out.write(b, offset, len); 726 state = S_PAYLOAD_WRITTEN; 727 payloadWrittenTotal += len; 728 return len; 729 } 730 731}