From acb38afcd4780191569ee809f3e8bdb550a634bc Mon Sep 17 00:00:00 2001 From: Dimitri Sokolyuk Date: Wed, 5 Sep 2012 18:34:48 +0000 Subject: blogsum --- examples/wp2blogsum.pl | 75 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 75 insertions(+) create mode 100755 examples/wp2blogsum.pl (limited to 'examples/wp2blogsum.pl') diff --git a/examples/wp2blogsum.pl b/examples/wp2blogsum.pl new file mode 100755 index 0000000..e11f1eb --- /dev/null +++ b/examples/wp2blogsum.pl @@ -0,0 +1,75 @@ +#!/usr/bin/perl + +# Blogsum +# Copyright (c) 2009 Jason Dixon +# All rights reserved. + +use strict; +use DBI; +use XML::Simple; + +die "Usage: wp2blogsum.pl \n\n" unless (@ARGV == 2); + +my $wpxml = $ARGV[0]; +my $database = $ARGV[1]; +my $xs = XML::Simple->new(); +my $ref = $xs->XMLin($wpxml); +my $dbh = DBI->connect("DBI:SQLite:dbname=$database",'','', { RaiseError => 1 }) || die $DBI::errstr; +my $stmt = "INSERT INTO articles VALUES (NULL, ?, ?, ?, ?, ?, ?, ?)"; +my $sth = $dbh->prepare($stmt); +my $stmt2 = "INSERT INTO comments VALUES (NULL, ?, ?, ?, ?, ?, ?, ?)"; +my $sth2 = $dbh->prepare($stmt2); + +foreach my $item ( @{$ref->{'channel'}->{'item'}} ) { + next unless ($item->{'wp:post_type'} eq 'post'); + my $title = $item->{'title'}; + my $date = $item->{'wp:post_date'}; + my $uri = $item->{'wp:post_name'}; + my $author = $item->{'dc:creator'}; + my $enabled = ($item->{'wp:status'} eq 'publish') ? 1 : 0; + my $content = $item->{'content:encoded'}; + $content =~ s/ //g; # remove + unless (($content =~ /
/) || ($content =~ 
    ) || ($content =~
      )) { + $content =~ s/<\!\-\-more\-\->/<\!\-\-readmore\-\->/mg; # convert more to readmore + $content =~ s/^/

      /mg; # add

      to beginning of line + $content =~ s/\r\n/<\/p>\r\n/mg; # add

      to end of line + $content =~ s/$/<\/p>/mg; # add

      to end of story (no \r\n) + $content =~ s/^

      <\/p>$//mg; # remove

      (empty lines) + $content =~ s/^

      (<\!\-\-\w+\-\->)<\/p>/$1/mg; # remove

      (comment lines) + $content =~ s/^<\/p>$//mg; # remove extra

      from end of story + $content =~ s/

        /
          /mg; # remove

          before

            + $content =~ s/
              <\/p>/
                /mg; # remove

                after
                  + $content =~ s/

                  <\/ul>/<\/ul>/mg; # remove

                  before

                + $content =~ s/<\/ul><\/p>/<\/ul>/mg; # remove

                after
              + $content =~ s/

            • /
            • /mg; # remove

              before

            • + $content =~ s/
            • <\/p>/
            • /mg; # remove

              after
            • + $content =~ s/

              <\/li>/<\/li>/mg; # remove

              before

            • + $content =~ s/<\/li><\/p>/<\/li>/mg; # remove

              after + } + my @tags; + if ($item->{'category'}) { + for my $category (@{$item->{'category'}}) { + if (ref($category) eq 'HASH') { + if ($category->{'nicename'}) { + push(@tags, $category->{'content'}); + } + } + } + } + $sth->execute($date, $title, $uri, $content, join(',', @tags), $enabled, $author) || die $dbh->errstr; + my $article_id = $dbh->func('last_insert_rowid'); + if ($item->{'wp:comment'}) { + if (ref($item->{'wp:comment'}) eq 'ARRAY') { + for my $comment (@{$item->{'wp:comment'}}) { + $sth2->execute($article_id, $comment->{'wp:comment_date'}, $comment->{'wp:comment_author'}, $comment->{'wp:comment_author_email'}, $comment->{'wp:comment_author_url'}, $comment->{'wp:comment_content'}, $comment->{'wp:comment_approved'}) || die $dbh->errstr; + } + } else { + my $comment = $item->{'wp:comment'}; + $sth2->execute($article_id, $comment->{'wp:comment_date'}, $comment->{'wp:comment_author'}, $comment->{'wp:comment_author_email'}, $comment->{'wp:comment_author_url'}, $comment->{'wp:comment_content'}, $comment->{'wp:comment_approved'}) || die $dbh->errstr; + } + } +} + +$dbh->disconnect; + + -- cgit v1.2.3