001/**
002 * Java Web Archive Toolkit - Software to read and validate ARC, WARC
003 * and GZip files. (http://jwat.org/)
004 * Copyright 2011-2012 Netarkivet.dk (http://netarkivet.dk/)
005 *
006 * Licensed under the Apache License, Version 2.0 (the "License");
007 * you may not use this file except in compliance with the License.
008 * You may obtain a copy of the License at
009 *
010 * http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.jwat.warc;
019
020import java.net.InetAddress;
021import java.net.UnknownHostException;
022import java.text.DateFormat;
023import java.text.SimpleDateFormat;
024import java.util.Date;
025
026/**
027 * Default WARC file naming implementation used for writing to multiple files.
028 * (prefix-date-sequenceNr-hostname.extension)
029 *
030 * @author nicl
031 */
032public class WarcFileNamingDefault implements WarcFileNaming {
033
034    /** <code>DateFormat</code> to the following format 'yyyyMMddHHmmss'. */
035    protected DateFormat dateFormat = new SimpleDateFormat("yyyyMMddHHmmss");
036
037    /** Prefix component. */
038    protected String filePrefix;
039
040    /** Date component. */
041    protected Date date;
042
043    /** Date component converted into a human readable string. */
044    protected String dateStr;
045
046    /** Host name component. */
047    protected String hostname;
048
049    /** Extension component (including leading "."). */
050    protected String extension;
051
052    /**
053     * Construct file naming instance.
054     * @param filePrefix prefix or null, will default to "JWAT"
055     * @param date date or null, if you want current date
056     * @param hostname host name or null, if you want to use default local host name
057     * @param extension extension or null, will default to ".warc"
058     */
059    public WarcFileNamingDefault(String filePrefix, Date date, String hostname, String extension) {
060        if (filePrefix != null) {
061            this.filePrefix = filePrefix;
062        } else {
063            this.filePrefix = "JWAT";
064        }
065        if (date != null) {
066            this.date = date;
067        } else {
068            this.date = new Date();
069        }
070        if (hostname != null ) {
071            this.hostname = hostname;
072        } else {
073            try {
074                this.hostname = InetAddress.getLocalHost().getHostName().toLowerCase();
075            } catch (UnknownHostException e) {
076                this.hostname = "unknown";
077            }
078        }
079        if (extension != null) {
080            this.extension = extension;
081        } else {
082            this.extension = ".warc";
083        }
084        dateStr = dateFormat.format(this.date);
085    }
086
087    @Override
088    public boolean supportMultipleFiles() {
089        return true;
090    }
091
092    @Override
093    public String getFilename(int sequenceNr, boolean bCompressed) {
094        String filename = filePrefix + "-" + dateStr + "-" + String.format("%05d", sequenceNr++) + "-" + hostname + extension;
095        if (bCompressed) {
096            filename += ".gz";
097        }
098        return filename;
099    }
100
101}