001/** 002 * Java Web Archive Toolkit - Software to read and validate ARC, WARC 003 * and GZip files. (http://jwat.org/) 004 * Copyright 2011-2012 Netarkivet.dk (http://netarkivet.dk/) 005 * 006 * Licensed under the Apache License, Version 2.0 (the "License"); 007 * you may not use this file except in compliance with the License. 008 * You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.jwat.warc; 019 020import java.net.InetAddress; 021import java.net.UnknownHostException; 022import java.text.DateFormat; 023import java.text.SimpleDateFormat; 024import java.util.Date; 025 026/** 027 * Default WARC file naming implementation used for writing to multiple files. 028 * (prefix-date-sequenceNr-hostname.extension) 029 * 030 * @author nicl 031 */ 032public class WarcFileNamingDefault implements WarcFileNaming { 033 034 /** <code>DateFormat</code> to the following format 'yyyyMMddHHmmss'. */ 035 protected DateFormat dateFormat = new SimpleDateFormat("yyyyMMddHHmmss"); 036 037 /** Prefix component. */ 038 protected String filePrefix; 039 040 /** Date component. */ 041 protected Date date; 042 043 /** Date component converted into a human readable string. */ 044 protected String dateStr; 045 046 /** Host name component. */ 047 protected String hostname; 048 049 /** Extension component (including leading "."). */ 050 protected String extension; 051 052 /** 053 * Construct file naming instance. 054 * @param filePrefix prefix or null, will default to "JWAT" 055 * @param date date or null, if you want current date 056 * @param hostname host name or null, if you want to use default local host name 057 * @param extension extension or null, will default to ".warc" 058 */ 059 public WarcFileNamingDefault(String filePrefix, Date date, String hostname, String extension) { 060 if (filePrefix != null) { 061 this.filePrefix = filePrefix; 062 } else { 063 this.filePrefix = "JWAT"; 064 } 065 if (date != null) { 066 this.date = date; 067 } else { 068 this.date = new Date(); 069 } 070 if (hostname != null ) { 071 this.hostname = hostname; 072 } else { 073 try { 074 this.hostname = InetAddress.getLocalHost().getHostName().toLowerCase(); 075 } catch (UnknownHostException e) { 076 this.hostname = "unknown"; 077 } 078 } 079 if (extension != null) { 080 this.extension = extension; 081 } else { 082 this.extension = ".warc"; 083 } 084 dateStr = dateFormat.format(this.date); 085 } 086 087 @Override 088 public boolean supportMultipleFiles() { 089 return true; 090 } 091 092 @Override 093 public String getFilename(int sequenceNr, boolean bCompressed) { 094 String filename = filePrefix + "-" + dateStr + "-" + String.format("%05d", sequenceNr++) + "-" + hostname + extension; 095 if (bCompressed) { 096 filename += ".gz"; 097 } 098 return filename; 099 } 100 101}