#!/usr/bin/perl
# Attempt to figure out when a domain was registered.
# Copyright 2006, Kees Cook <kees@outflux.net>, http://outflux.net/
# 
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
# 
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
# 
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software Foundation,
# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
# http://www.gnu.org/copyleft/gpl.html
use strict;
use warnings;
use Net::Whois::Raw; # qw( $TIMEOUT );
use Date::Manip;
use Data::Dumper;

sub date_extract($)
{
    my ($str) = @_;

    # Look for common DD-Mmm-YY(YY) first
    if ($str=~/(\d\d-\D{3}-\d{2,4})/) {
        my $date = ParseDate($1);
        return $date if ($date);
    }
    # Look for common YYYY-MM-DD next
    if ($str=~/(\d{4}-\d\d-\d\d)/) {
        my $date = ParseDate($1);
        return $date if ($date);
    }

    # Fallback to aggressive word-splitting
    my @forward = split(/\b/,$str);
    while (@forward) {
        my @backward=@forward;
        while (@backward) {
            my $str=join("",@backward);

            # skip garbage that may be parsed as "empty"
            if ($str=~/[a-z0-9]/i) {
                warn "\t\tParsing: '$str'\n";
                my $date = ParseDate($str);
                return $date if ($date);
            }
            pop(@backward);
        }
        shift(@forward);
    }
    return 0;
}

sub date_from_servers($)
{
    my ($servers)=@_;
    my $date=0;

    foreach my $response (@{ $servers }) {
        # in the hash: level, text, srv
        print STDERR Dumper($response);

        # All the prefixes I've found (including some JP UTF8 sequences)
        while ($response->{'text'} =~ /^\s*(?:\[\$BEPO?G\/7nF|\(B\]|(?:(?:domain|record)\s*){0,2}(?:(?:activat|creat|register)ed|(?:activa|crea|registra)tion)(?:\s+(?:on|date))?)[:\.\s]*(?!.*registrar)(.*)/img) {
            my $str=$1;
            warn "\tMatched: '$str'\n";
            $date = date_extract($str);
            return $date if ($date);
        }
    }
    return $date;
}

while (defined($ARGV[0])) {
    my $domain=shift @ARGV;
    my $date=0;

    eval {
        # Try first level only, then go deeper
        my ($text,$srv) = get_whois($domain, undef, "QRY_FIRST");
        $date = date_from_servers([{ level => '0', 'text' => $text, 'srv' => $srv }]);
        if (!$date) {
            my $servers = get_whois($domain, undef, "QRY_ALL");
            # Drop the first level, since we already failed to scan it
            shift(@{ $servers }) if (scalar(@{ $servers })>0);
            $date = date_from_servers($servers);
        }
    };
    if ($@) {
        $date=0;
        warn "Lookup exploded: $@\n";
    }

    if ($date) {
        print "'$domain' created on: ".UnixDate($date,"%Y-%m-%d")."\n";
    }
    else {
        print "Nothing known about '$domain'\n";
    }
}