#!/pkg/bin/perl
use strict;

use constant MTDIR => "/home/mt/MT";
use lib MTDIR . '/lib';
use MT;

my $blog_id = 1;

my $mt = MT->new;

use Date::Parse qw(str2time);
use Date::Format qw(time2str);
use Data::Dumper;
use HTML::Entities;

my $entry_id;

my $in_comments = 0;
my %comment;
my $comments;

sub flush {
  return 1 unless $comment{id};
  $comment{text} =~ s!^<p>!!sm;
  $comment{text} =~ s!<br />\n!\n!smg;
  $comment{text} =~ s!\s*</p>\s*$!!sm;
  $comment{text} =~ s!^\s*!!;
  $comment{text} =~ s!\s*$!!s;

  $comment{entry_id} = $entry_id;

  print Data::Dumper->Dump([\%comment], [qw(comment)]);

  my $entry = MT::Entry->load({id => $comment{entry_id}});

  my $comment = MT::Comment->load({ id => $comment{id} });
  if ($comment) {
    print "comment id already exists\n";
    if ($comment{author} eq $comment->author) {
      %comment = ();
      return 1;
    }
    die "not the same comment?"; 
  } 
  else {
    $comment = MT::Comment->new();
    $comment->id($comment{id});
    $comment->blog_id($entry->blog_id);
    $comment->entry_id($entry->id);
    $comment->author($comment{author});
    $comment->url($comment{link});
    $comment->text($comment{text});
    $comment->email($comment{email}) if $comment{email};
    $comment->created_on(time2str("%Y%m%d%H%M%S", $comment{time}) );
    $comment->save or die $comment->errstr;
    $comments++;
    print "COMMENTS LOADED: $comments\n";
  }

  %comment = ();
  1;
}

while (<>) { 

    $entry_id = $1 if m!rdf:about="http://www.askbjoernhansen.com/archives/\d{4}/\d{2}/\d\d/0*(\d+).html"!;

  unless ($in_comments) {
    $in_comments = 1 if m!^<h2><a id="comments"></a>Comments</h2>!;
    next;
  }


  if (m!^<h2>Post a comment</h2>!) {
    flush;
    $in_comments = 0;
    next;
  }

  if (my ($author, $date) = ($_ =~ m!<p class="posted">Posted by: (.*) at (.*)</p>!)) {
    if ($author =~ m/^<a /) {
      ($comment{link})   = ($author =~ m!title="(http://[^\"]+)"!);
      ($comment{author}) = ($author =~ m!>\s*(.*?)\s*</a>!);
      ($comment{email})   = ($author =~ m!<a href="mailto&#58;([^\"]+)">!);
      $comment{email} = decode_entities($comment{email}) if $comment{email};
     }
    else {
      $author =~ s/^\s*//;
      $author =~ s/\s*$//;
      $comment{author} = $author;
    }

    $comment{author_string} = $author;
    #$comment{date} = $date;
    $comment{time} = str2time($date);
    flush;
    next;
  }

  flush and $comment{id} = $1 and next if m!^<a id="c(\d+)"></a>!;

  $comment{text} .= $_;  

}
