use strict;
use warnings;
use Data::Dumper;
use DateTime::Format::Strptime;
use Time::Local;
use List::Util 'sum';
$| = 1;

my $validity_strptime = DateTime::Format::Strptime->new(pattern => '%Y-%m-%d %H:%M:%S');
my $lifetime_strptime = DateTime::Format::Strptime->new(pattern => '%Y%m%d');
my $epoch_strptime = DateTime::Format::Strptime->new(pattern => '%s');

my $FILE = "../results-certs-invalid-050816.txt";

my %validity = ();
my %host = ();
my %lifetime = ();
my %issuer = ();
my %as = ();
my %version = ();
my %ca = ();
my %key = ();
my %pkey = ();
my %keyalgo = ();
my %keylen = ();

my $c = 0;

open(F, $FILE);
while(<F>) {
  $c ++;
  if ($c % 1000 == 0) { print "\r$c"; }
  my @f = split(/[\t\r\n]/, $_);
  my $timeline = $f[30];

  if (! ($timeline eq "[]")) {
    # DETERMINE VALID
    my $valid = ($f[31] eq "True" ? "valid" : "invalid");

    # DETERMINE LIFETIME
    my $start;
    my $end;
    while ($timeline =~ /\[?\(([0-9]{8}),[^\)]+\),?/g) {
      if (! $start) { $start = $1; }
      $end = $1;
    }
    my $lifetime = int((lifetime_date($end) - lifetime_date($start)) / (60*60*24)) + 1;
    $lifetime{$valid}{$lifetime} ++;

    # DETERMINE VALIDITY
    my $validity = int((validity_date($f[9]) - validity_date($f[8])) / (60*60*24)) + 1;
    $validity{$valid}{$validity} ++;

    # GET ISSUER
    $issuer{$valid}{$f[6]} ++;

    # GET ASES
    my $astimeline = $f[30];
    my $count = 0;
    my %ases = ();
    my $hosts = 0;
    while ($astimeline =~ /\[?\([0-9]{8},([^\)]+)\),?/g) {
      my $host = $1;
      if ($host =~ m|^\[([^\]]+)\]$|) {
        $count ++;
        my $these_ases = get_ases($1);
        my $total = sum values %{ $these_ases };
        foreach my $as (keys %{ $these_ases }) {
           $ases{$as} += $these_ases->{$as}/$total;
           $hosts += $these_ases->{$as};
        }
      }
    }
    foreach my $as (keys %ases) {
      $as{$valid}{$as} += $ases{$as}/$count;
    }
    if ($count > 0) {
      $host{$valid}{$hosts/$count} ++;
    }

    # GET KEYS
    $pkey{$valid}{$f[28]} ++;
    $key{$valid}{$f[18]} ++;
    $keyalgo{$valid}{$f[19]} ++;
    $keylen{$valid}{$f[20]} ++;
  
    # GET VERSION AND CA
    $version{$valid}{$f[1]} ++;
    $ca{$valid}{$f[2]} ++;
  }
}

dump_cdf(\%host, "output/hosts.txt");
dump_cdf(\%validity, "output/validity.txt");
dump_cdf(\%lifetime, "output/lifetime.txt");
dump_dist_cdf(\%issuer, "output/issuer.txt");
dump_dist_cdf(\%as, "output/as.txt");
dump_top(\%issuer, "output/top-issuer.txt", 100);
dump_top(\%as, "output/top-as.txt", -1);
dump_dist_cdf(\%key, "output/key.txt");
dump_dist_cdf(\%pkey, "output/pkey.txt");
dump_cdf(\%keylen, "output/keylen.txt");
dump_pdf(\%keyalgo, "output/keyalgo.txt");
dump_pdf(\%version, "output/version.txt");
dump_pdf(\%ca, "output/ca.txt");

sub dump_top {
  my ($h, $file, $n) = @_;

  foreach my $e (keys %{ $h }) {
    open(F, "> $file.$e");
    my @l = sort {$h->{$e}->{$b} <=> $h->{$e}->{$a}} keys %{ $h->{$e} };
    if ($n > 0) { @l = splice @l, 0, $n; }
    foreach my $f (@l) {
      print F sprintf("%s\t%.6f\n", $f, $h->{$e}->{$f});
    }
  }
}

sub get_ases {
  my ($s) = @_;

  my %r = ();
  my $t = 0;
  foreach my $ipas (split(/,/, $s)) {
    if ($ipas =~ m|^[0-9\.]+\/([0-9None]+)$|) {
      $r{$1} ++;
      $t ++;
    } else {
      die("Unrecongized IPAS '$ipas'");
    }
  }
  
#  foreach my $as (keys %r) {
#    $r{$as} /= $t;
#  }

  return \%r;
}

sub dump_cdf {
  my ($h, $file) = @_;

  my %t = ();
  foreach my $e (keys %{ $h }) {
    $t{$e} = sum_values($h->{$e});
  }

  foreach my $e (keys %{ $h }) {
    my $c = 0;
    open(F, "> $file.$e");
    foreach my $f (sort {$a <=> $b} keys %{ $h->{$e} }) {
      $c += $h->{$e}->{$f};
      print F sprintf("%s\t%.6f\n", $f, $c/$t{$e});
    }
  }
}

sub dump_pdf {
  my ($h, $file) = @_;

  my %t = ();
  foreach my $e (keys %{ $h }) {
    $t{$e} = sum_values($h->{$e});
  }

  foreach my $e (keys %{ $h }) {
    open(F, "> $file.$e");
    foreach my $f (sort keys %{ $h->{$e} }) {
      print F sprintf("%s\t%.6f\n", $f, $h->{$e}->{$f}/$t{$e});
    }
  }
}

sub dump_dist_cdf {
  my ($h, $file) = @_;

  my %t = ();
  foreach my $e (keys %{ $h }) {
    $t{$e} = sum_values($h->{$e});
  }

  foreach my $e (keys %{ $h }) {
    my $c = 0;
    my $n = 0;
    my @l = sort {$h->{$e}->{$b} <=> $h->{$e}->{$a}} keys %{ $h->{$e} };
    my $tn = $#l + 1;
    open(F, "> $file.$e");
    foreach my $f (@l) {
      $c += $h->{$e}->{$f};
      $n++;
      print F sprintf("%.6f\t%.6f\n", $n/$tn, $c/$t{$e});
    }
  }
}

sub sum_values {
  my ($h) = @_;

  my $t = 0;
  foreach my $e (values %{ $h }) { $t += $e; }
  return $t;
}

sub validity_date {
  my ($a) = @_;

  if ($a =~ m|^([0-9]{4})-([0-9]{2})-([0-9]{2}) ([0-9]{2}):([0-9]{2}):([0-9]{2})$|) {
    return timegm($6, $5, $4, $3, $2-1, $1-1900);
  }

  warn("Unrecognized date '$a'");
  return 0;
}

sub lifetime_date {
  my ($a) = @_;

  if ($a =~ m|^([0-9]{4})([0-9]{2})([0-9]{2})|) {
    return timegm(0, 0, 0, $3, $2-1, $1-1900);
  }

  warn("Unrecognized date '$a'");
  return 0;
}
