001/**
002 * Java Web Archive Toolkit - Software to read and validate ARC, WARC
003 * and GZip files. (http://jwat.org/)
004 * Copyright 2011-2012 Netarkivet.dk (http://netarkivet.dk/)
005 *
006 * Licensed under the Apache License, Version 2.0 (the "License");
007 * you may not use this file except in compliance with the License.
008 * You may obtain a copy of the License at
009 *
010 * http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.jwat.warc;
019
020import java.text.DateFormat;
021import java.text.SimpleDateFormat;
022import java.util.Date;
023import java.util.TimeZone;
024
025/**
026 * WARC-Date parser and format validator. The format "yyyy-MM-dd'T'HH:mm:ss'Z'"
027 * is specified in the WARC ISO standard.
028 *
029 * @author lbihanic, selghissassi, nicl
030 */
031public final class WarcDateParser {
032
033    /** WARC <code>DateFormat</code> as specified in the WARC ISO standard. */
034    private final DateFormat dateFormat;
035
036    /** Basic <code>DateFormat</code> is not thread safe. */
037    private static final ThreadLocal<WarcDateParser> DateParserTL =
038        new ThreadLocal<WarcDateParser>() {
039        @Override
040        public WarcDateParser initialValue() {
041            return new WarcDateParser();
042        }
043    };
044
045    /**
046     * Creates a new <code>DateParser</code>.
047     */
048    private WarcDateParser() {
049        dateFormat = new SimpleDateFormat(WarcConstants.WARC_DATE_FORMAT);
050        dateFormat.setLenient(false);
051        dateFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
052    }
053
054    /**
055     * Parses a date.
056     * @param dateStr date to parse
057     * @return the formatted date or null if unable to parse date
058     */
059    private Date parseDate(String dateStr) {
060        Date date = null;
061        try {
062            // We subtract 4 from the format because of the ' characters.
063            // These characters are to specify constants in the format string.
064            if ((dateStr != null) && dateStr.length()
065                            == WarcConstants.WARC_DATE_FORMAT.length() - 4) {
066                // Support upper/lower-case.
067                date = dateFormat.parse(dateStr.toUpperCase());
068            }
069        } catch (Exception e) { /* Ignore */ }
070        return date;
071    }
072
073    /**
074     * Parses the date using the format "yyyy-MM-ddTHH:mm:ssZ".
075     * @param dateStr the date to parse
076     * @return the formatted date or <code>null</code> based on whether the date
077     * to parse is compliant with the format "yyyy-MM-ddTHH:mm:ssZ" or not
078     */
079    public static Date getDate(String dateStr) {
080        Date date = DateParserTL.get().parseDate(dateStr);
081        boolean isValid = (date == null) ? false
082                                         : (date.getTime() > 0);
083        return isValid ? date : null;
084    }
085
086    /**
087     * Return a <code>DateFormat</code> object which can be used to string
088     * format WARC dates.
089     * @return <code>DateFormat</code> object which can be used to string
090     * format WARC dates.
091     */
092    public static DateFormat getDateFormat() {
093        return DateParserTL.get().dateFormat;
094    }
095
096}
097