#!/usr/bin/perl # Blogsum # Copyright (c) 2009 Jason Dixon # All rights reserved. use strict; use DBI; use XML::Simple; die "Usage: wp2blogsum.pl \n\n" unless (@ARGV == 2); my $wpxml = $ARGV[0]; my $database = $ARGV[1]; my $xs = XML::Simple->new(); my $ref = $xs->XMLin($wpxml); my $dbh = DBI->connect("DBI:SQLite:dbname=$database",'','', { RaiseError => 1 }) || die $DBI::errstr; my $stmt = "INSERT INTO articles VALUES (NULL, ?, ?, ?, ?, ?, ?, ?)"; my $sth = $dbh->prepare($stmt); my $stmt2 = "INSERT INTO comments VALUES (NULL, ?, ?, ?, ?, ?, ?, ?)"; my $sth2 = $dbh->prepare($stmt2); foreach my $item ( @{$ref->{'channel'}->{'item'}} ) { next unless ($item->{'wp:post_type'} eq 'post'); my $title = $item->{'title'}; my $date = $item->{'wp:post_date'}; my $uri = $item->{'wp:post_name'}; my $author = $item->{'dc:creator'}; my $enabled = ($item->{'wp:status'} eq 'publish') ? 1 : 0; my $content = $item->{'content:encoded'}; $content =~ s/ //g; # remove unless (($content =~ /
/) || ($content =~ 
    ) || ($content =~
      )) { $content =~ s/<\!\-\-more\-\->/<\!\-\-readmore\-\->/mg; # convert more to readmore $content =~ s/^/

      /mg; # add

      to beginning of line $content =~ s/\r\n/<\/p>\r\n/mg; # add

      to end of line $content =~ s/$/<\/p>/mg; # add

      to end of story (no \r\n) $content =~ s/^

      <\/p>$//mg; # remove

      (empty lines) $content =~ s/^

      (<\!\-\-\w+\-\->)<\/p>/$1/mg; # remove

      (comment lines) $content =~ s/^<\/p>$//mg; # remove extra

      from end of story $content =~ s/

        /
          /mg; # remove

          before

            $content =~ s/
              <\/p>/
                /mg; # remove

                after
                  $content =~ s/

                  <\/ul>/<\/ul>/mg; # remove

                  before

                $content =~ s/<\/ul><\/p>/<\/ul>/mg; # remove

                after
              $content =~ s/

            • /
            • /mg; # remove

              before

            • $content =~ s/
            • <\/p>/
            • /mg; # remove

              after
            • $content =~ s/

              <\/li>/<\/li>/mg; # remove

              before

            • $content =~ s/<\/li><\/p>/<\/li>/mg; # remove

              after } my @tags; if ($item->{'category'}) { for my $category (@{$item->{'category'}}) { if (ref($category) eq 'HASH') { if ($category->{'nicename'}) { push(@tags, $category->{'content'}); } } } } $sth->execute($date, $title, $uri, $content, join(',', @tags), $enabled, $author) || die $dbh->errstr; my $article_id = $dbh->func('last_insert_rowid'); if ($item->{'wp:comment'}) { if (ref($item->{'wp:comment'}) eq 'ARRAY') { for my $comment (@{$item->{'wp:comment'}}) { $sth2->execute($article_id, $comment->{'wp:comment_date'}, $comment->{'wp:comment_author'}, $comment->{'wp:comment_author_email'}, $comment->{'wp:comment_author_url'}, $comment->{'wp:comment_content'}, $comment->{'wp:comment_approved'}) || die $dbh->errstr; } } else { my $comment = $item->{'wp:comment'}; $sth2->execute($article_id, $comment->{'wp:comment_date'}, $comment->{'wp:comment_author'}, $comment->{'wp:comment_author_email'}, $comment->{'wp:comment_author_url'}, $comment->{'wp:comment_content'}, $comment->{'wp:comment_approved'}) || die $dbh->errstr; } } } $dbh->disconnect;