public class WarcConstants extends Object
| Modifier and Type | Field and Description |
|---|---|
static String |
CONTENT_TYPE_FORMAT
Content-type format string as specified in RFC2616.
|
static String |
CONTENT_TYPE_METADATA
Suggested content-type for metadata records and others.
|
static String |
CT_APP_WARC_FIELDS
Suggested content-type/media-type for metadata records and others.
|
protected static byte[] |
endMark
End mark used after each record consisting of two newlines.
|
static int |
FDT_CONTENTTYPE
WARC ContentType field datatype identifier.
|
static int |
FDT_DATE
WARC Date field datatype identifier.
|
static int |
FDT_DIGEST
WARC Digest field datatype identifier.
|
static String[] |
FDT_IDX_STRINGS
WARC field datatype id to field datatype name mapping table.
|
static int |
FDT_INETADDRESS
WARC InetAddress field datatype identifier.
|
static int |
FDT_INTEGER
WARC Integer field datatype identifier.
|
static int |
FDT_LONG
WARC Long field datatype identifier.
|
static int |
FDT_STRING
WARC String field datatype identifier.
|
static int |
FDT_URI
WARC URI field datatype identifier.
|
static int[][] |
field_policy
A (Warc-Types x Warc-Header-Fields) matrix used for policy validation.
|
static Map<String,Integer> |
fieldNameIdxMap
Map used to identify known warc field names.
|
static boolean[] |
fieldNamesRepeatableLookup
Lookup table of Warc fields that can have multiple occurrences.
|
static String |
FN_CONTENT_LENGTH
Content-length field name.
|
static String |
FN_CONTENT_TYPE
Content-type field name.
|
static int |
FN_IDX_CONTENT_LENGTH
Warc reader content-length field name id.
|
static int |
FN_IDX_CONTENT_TYPE
Warc reader content-type field name id.
|
static int[] |
FN_IDX_DT
Array to lookup WARC field datatypes.
|
static String[] |
FN_IDX_STRINGS
WARC field name id to field name mapping table.
|
static int |
FN_IDX_WARC_BLOCK_DIGEST
Warc reader warc-block-digest field name id.
|
static int |
FN_IDX_WARC_CONCURRENT_TO
Warc reader warc-concurrent-to field name id.
|
static int |
FN_IDX_WARC_DATE
Warc reader warc-date field name id.
|
static int |
FN_IDX_WARC_FILENAME
Warc reader warc-filename field name id.
|
static int |
FN_IDX_WARC_IDENTIFIED_PAYLOAD_TYPE
Warc reader warc-identified-payload-type field name id.
|
static int |
FN_IDX_WARC_IP_ADDRESS
Warc reader warc-ip-address field name id.
|
static int |
FN_IDX_WARC_PAYLOAD_DIGEST
Warc reader warc-payload-digest field name id.
|
static int |
FN_IDX_WARC_PROFILE
Warc reader warc-profile field name id.
|
static int |
FN_IDX_WARC_RECORD_ID
Warc reader warc-record-id field name id.
|
static int |
FN_IDX_WARC_REFERS_TO
Warc reader warc-refers-to field name id.
|
static int |
FN_IDX_WARC_REFERS_TO_DATE
WARC-Refers-To-Date field name id.
|
static int |
FN_IDX_WARC_REFERS_TO_TARGET_URI
WARC-Refers-To-Target-URI field name id.
|
static int |
FN_IDX_WARC_SEGMENT_NUMBER
Warc reader warc-segment-number field name id.
|
static int |
FN_IDX_WARC_SEGMENT_ORIGIN_ID
Warc reader warc-segment-origin-id field name id.
|
static int |
FN_IDX_WARC_SEGMENT_TOTAL_LENGTH
Warc reader warc-segment-totalt-length field name id.
|
static int |
FN_IDX_WARC_TARGET_URI
Warc reader warc-target-uri field name id.
|
static int |
FN_IDX_WARC_TRUNCATED
Warc reader warc-truncated field name id.
|
static int |
FN_IDX_WARC_TYPE
Warc reader warc-type field name id.
|
static int |
FN_IDX_WARC_WARCINFO_ID
Warc reader warc-warcinfo-id field name id.
|
static int |
FN_INDEX_OF_LAST
Index of last WARC field (zero-indexed).
|
static int |
FN_NUMBER
Number of WARC fields.
|
static String |
FN_WARC_BLOCK_DIGEST
Warc-block-digest field name.
|
static String |
FN_WARC_CONCURRENT_TO
Warc-concurrent-to field name.
|
static String |
FN_WARC_DATE
Warc-date field name.
|
static String |
FN_WARC_FILENAME
Warc-filename field name.
|
static String |
FN_WARC_IDENTIFIED_PAYLOAD_TYPE
Warc-identified-payload-type field name.
|
static String |
FN_WARC_IP_ADDRESS
Warc-ip-address field name.
|
static String |
FN_WARC_PAYLOAD_DIGEST
Warc-payload-digest field name.
|
static String |
FN_WARC_PROFILE
Warc-profile field name.
|
static String |
FN_WARC_RECORD_ID
Warc-record-id field name.
|
static String |
FN_WARC_REFERS_TO
Warc-refers-to field name.
|
static String |
FN_WARC_REFERS_TO_DATE
WARC-Refers-To-Date field name.
|
static String |
FN_WARC_REFERS_TO_TARGET_URI
WARC-Refers-To-Target-URI field name.
|
static String |
FN_WARC_SEGMENT_NUMBER
Warc-segment-number field name.
|
static String |
FN_WARC_SEGMENT_ORIGIN_ID
Warc-segment-origin-id field name.
|
static String |
FN_WARC_SEGMENT_TOTAL_LENGTH
Warc-segment-totalt-length field name.
|
static String |
FN_WARC_TARGET_URI
Warc-target-uri field name.
|
static String |
FN_WARC_TRUNCATED
Warc-truncated field name.
|
static String |
FN_WARC_TYPE
Warc-type field name.
|
static String |
FN_WARC_WARCINFO_ID
Warc-warcinfo-id field name.
|
static String |
MEDIA_TYPE_METADATA
Suggested media-type for metadata records and others.
|
static String[] |
P_IDX_STRINGS
WARC profile id to field name mapping table.
|
static int |
POLICY_IGNORE
Warc header can be ignored.
|
static int |
POLICY_MANDATORY
Warc header is mandatory (equal to shall).
|
static int |
POLICY_MAY
Warc header can be present.
|
static int |
POLICY_MAY_NOT
Warc header should not be present.
|
static int |
POLICY_SHALL
Warc header must be present.
|
static int |
POLICY_SHALL_NOT
Warc header must not be present.
|
static String |
PROFILE_IDENTICAL_PAYLOAD_DIGEST
Revisit WARC-Profile id for identical payload digest.
|
static int |
PROFILE_IDX_IDENTICAL_PAYLOAD_DIGEST
Warc reader id for identical payload digest profile.
|
static int |
PROFILE_IDX_SERVER_NOT_MODIFIED
Warc reader id for server not modified profile.
|
static int |
PROFILE_IDX_UNKNOWN
Warc reader id for unknown profile.
|
static String |
PROFILE_SERVER_NOT_MODIFIED
Revisit WARC-Profile id for server not modified.
|
static Map<String,Integer> |
profileIdxMap
Profile lookup map used to identify WARC-Profile values.
|
static Map<String,Integer> |
recordTypeIdxMap
WARC-Type lookup map.
|
static String |
RT_CONTINUATION
WARC-Type continuation id.
|
static String |
RT_CONVERSION
WARC-Type conversion id.
|
static int |
RT_IDX_CONTINUATION
Warc reader continuation warc record type id.
|
static int |
RT_IDX_CONVERSION
Warc reader conversion warc record type id.
|
static int |
RT_IDX_METADATA
Warc reader metadata warc record type id.
|
static int |
RT_IDX_REQUEST
Warc reader request warc record type id.
|
static int |
RT_IDX_RESOURCE
Warc reader resource warc record type id.
|
static int |
RT_IDX_RESPONSE
Warc reader response warc record type id.
|
static int |
RT_IDX_REVISIT
Warc reader revisit warc record type id.
|
static String[] |
RT_IDX_STRINGS
WARC type id to field name mapping table.
|
static int |
RT_IDX_UNKNOWN
Warc reader unknown warc record type id.
|
static int |
RT_IDX_WARCINFO
Warc reader warcinfo warc record type id.
|
static int |
RT_INDEX_OF_LAST
Index of last WARC type (zero indexed).
|
static String |
RT_METADATA
WARC-Type metadata id.
|
static int |
RT_NUMBER
Number of WARC types.
|
static String |
RT_REQUEST
WARC-Type request id.
|
static String |
RT_RESOURCE
WARC-Type resource id.
|
static String |
RT_RESPONSE
WARC-Type response id.
|
static String |
RT_REVISIT
WARC-Type revisit id.
|
static String |
RT_WARCINFO
WARC-Type warcinfo id.
|
static Map<String,Integer> |
truncatedTypeIdxMap
Lookup map for known truncation reason id's.
|
static String |
TT_DISCONNECT
WARC-Truncated disconnect id.
|
static int |
TT_IDX_DISCONNECT
Warc reader disconnect reason id.
|
static int |
TT_IDX_FUTURE_REASON
Warc reader future reason id.
|
static int |
TT_IDX_LENGTH
Warc reader length reason id.
|
static String[] |
TT_IDX_STRINGS
WARC truncation reason id to field name mapping table.
|
static int |
TT_IDX_TIME
Warc reader time reason id.
|
static int |
TT_IDX_UNSPECIFIED
Warc reader unspecified reason id.
|
static String |
TT_LENGTH
WARC-Truncated length id.
|
static String |
TT_TIME
WARC-Truncated time id
|
static String |
TT_UNSPECIFIED
WARC-Truncated unspecified id.
|
static String |
WARC_DATE_FORMAT
WARC date format string as specified by the WARC ISO standard.
|
static String |
WARC_DIGEST_FORMAT
WARC digest format string as specified by the WARC ISO standard.
|
static String |
WARC_MAGIC_HEADER
A WARC header block starts with this string including trailing version
information.
|
static String |
WARC_MIME_TYPE
WARC mime type.
|
static int |
WARC_RECORD_TRAILING_NEWLINES
Trailing newlines after each record as per the WARC ISO standard.
|
| Modifier | Constructor and Description |
|---|---|
protected |
WarcConstants()
This utility class does not require instantiation.
|
public static final String WARC_MAGIC_HEADER
protected static byte[] endMark
public static final String WARC_DATE_FORMAT
public static final String WARC_DIGEST_FORMAT
public static final String CONTENT_TYPE_FORMAT
public static final String WARC_MIME_TYPE
public static final String CT_APP_WARC_FIELDS
public static final String CONTENT_TYPE_METADATA
public static final String MEDIA_TYPE_METADATA
public static final int WARC_RECORD_TRAILING_NEWLINES
public static final int FN_NUMBER
public static final int FN_INDEX_OF_LAST
public static final int RT_NUMBER
public static final int RT_INDEX_OF_LAST
public static final String FN_WARC_TYPE
public static final String FN_WARC_RECORD_ID
public static final String FN_WARC_DATE
public static final String FN_CONTENT_LENGTH
public static final String FN_CONTENT_TYPE
public static final String FN_WARC_CONCURRENT_TO
public static final String FN_WARC_BLOCK_DIGEST
public static final String FN_WARC_PAYLOAD_DIGEST
public static final String FN_WARC_IP_ADDRESS
public static final String FN_WARC_REFERS_TO
public static final String FN_WARC_TARGET_URI
public static final String FN_WARC_TRUNCATED
public static final String FN_WARC_WARCINFO_ID
public static final String FN_WARC_FILENAME
public static final String FN_WARC_PROFILE
public static final String FN_WARC_IDENTIFIED_PAYLOAD_TYPE
public static final String FN_WARC_SEGMENT_ORIGIN_ID
public static final String FN_WARC_SEGMENT_NUMBER
public static final String FN_WARC_SEGMENT_TOTAL_LENGTH
public static final String FN_WARC_REFERS_TO_TARGET_URI
public static final String FN_WARC_REFERS_TO_DATE
public static final String[] FN_IDX_STRINGS
public static final int FN_IDX_WARC_TYPE
public static final int FN_IDX_WARC_RECORD_ID
public static final int FN_IDX_WARC_DATE
public static final int FN_IDX_CONTENT_LENGTH
public static final int FN_IDX_CONTENT_TYPE
public static final int FN_IDX_WARC_CONCURRENT_TO
public static final int FN_IDX_WARC_BLOCK_DIGEST
public static final int FN_IDX_WARC_PAYLOAD_DIGEST
public static final int FN_IDX_WARC_IP_ADDRESS
public static final int FN_IDX_WARC_REFERS_TO
public static final int FN_IDX_WARC_TARGET_URI
public static final int FN_IDX_WARC_TRUNCATED
public static final int FN_IDX_WARC_WARCINFO_ID
public static final int FN_IDX_WARC_FILENAME
public static final int FN_IDX_WARC_PROFILE
public static final int FN_IDX_WARC_IDENTIFIED_PAYLOAD_TYPE
public static final int FN_IDX_WARC_SEGMENT_ORIGIN_ID
public static final int FN_IDX_WARC_SEGMENT_NUMBER
public static final int FN_IDX_WARC_SEGMENT_TOTAL_LENGTH
public static final int FN_IDX_WARC_REFERS_TO_TARGET_URI
public static final int FN_IDX_WARC_REFERS_TO_DATE
public static final Map<String,Integer> fieldNameIdxMap
public static final int FDT_STRING
public static final int FDT_INTEGER
public static final int FDT_LONG
public static final int FDT_DIGEST
public static final int FDT_CONTENTTYPE
public static final int FDT_DATE
public static final int FDT_INETADDRESS
public static final int FDT_URI
public static final String[] FDT_IDX_STRINGS
public static final int[] FN_IDX_DT
public static final boolean[] fieldNamesRepeatableLookup
public static final String RT_WARCINFO
public static final String RT_RESPONSE
public static final String RT_RESOURCE
public static final String RT_REQUEST
public static final String RT_METADATA
public static final String RT_REVISIT
public static final String RT_CONVERSION
public static final String RT_CONTINUATION
public static final String[] RT_IDX_STRINGS
public static final int RT_IDX_UNKNOWN
public static final int RT_IDX_WARCINFO
public static final int RT_IDX_RESPONSE
public static final int RT_IDX_RESOURCE
public static final int RT_IDX_REQUEST
public static final int RT_IDX_METADATA
public static final int RT_IDX_REVISIT
public static final int RT_IDX_CONVERSION
public static final int RT_IDX_CONTINUATION
public static final Map<String,Integer> recordTypeIdxMap
public static final String TT_LENGTH
public static final String TT_TIME
public static final String TT_DISCONNECT
public static final String TT_UNSPECIFIED
public static final String[] TT_IDX_STRINGS
public static final int TT_IDX_FUTURE_REASON
public static final int TT_IDX_LENGTH
public static final int TT_IDX_TIME
public static final int TT_IDX_DISCONNECT
public static final int TT_IDX_UNSPECIFIED
public static final Map<String,Integer> truncatedTypeIdxMap
public static final String PROFILE_IDENTICAL_PAYLOAD_DIGEST
public static final String PROFILE_SERVER_NOT_MODIFIED
public static final String[] P_IDX_STRINGS
public static final int PROFILE_IDX_UNKNOWN
public static final int PROFILE_IDX_IDENTICAL_PAYLOAD_DIGEST
public static final int PROFILE_IDX_SERVER_NOT_MODIFIED
public static final Map<String,Integer> profileIdxMap
public static final int POLICY_IGNORE
public static final int POLICY_MANDATORY
public static final int POLICY_SHALL
public static final int POLICY_SHALL_NOT
public static final int POLICY_MAY
public static final int POLICY_MAY_NOT
public static final int[][] field_policy
protected WarcConstants()
Copyright © 2011–2015. All rights reserved.