いい加減しつこいですが、もうちょっとだけ改善しました。
今回の変更点は、以下です。
今回から、パッチを添付します。Mixi.patch-20070108.tar.gz
WWW::Mixiのパッチ
今回の変更点は、以下です。
- コメントのイメージの扱いが間違っていたので修正
- コミュニティのアンケート(のコメント)に対応(アンケートに答えていない場合の挙動は未確認)
- コミュニティのトピックのコメントに、コミュニティ名を追加
今回から、パッチを添付します。Mixi.patch-20070108.tar.gz
WWW::Mixiのパッチ
*** /usr/local/lib/perl5/site_perl/5.8.8/WWW/Mixi.pm.ORIG Sat Jan 6 21:54:02 2007 --- /usr/local/lib/perl5/site_perl/5.8.8/WWW/Mixi.pm Mon Jan 8 15:56:01 2007 *************** *** 1422,1451 **** my $base = $res->base->as_string; my $content = $res->content; my @items = (); my $re_date = '<td rowspan="3" width="110" bgcolor="#ffd8b0" align="center" valign="top" nowrap>(\d{4})年(\d{2})月(\d{2})日<br>(\d{1,2}):(\d{2})</td>'; my $re_subj = '<td bgcolor="#fff4e0"> (.+?)</td>'; my $re_desc = '</table>(.+?)</td>'; my $re_c_date = '<td rowspan="2" width="110" bgcolor="#f2ddb7" align="center" nowrap>\n(\d{4})年(\d{2})月(\d{2})日<br>\n(\d{1,2}):(\d{2})'; my $re_c_desc = '<td class="h120">(.+?)\n</td>'; my $re_link = '<a href="?(.+?)"?>(.*?)<\/a>'; ! if ($content =~ s/<!-- TOPIC: start -->.*?${re_date}.*?${re_subj}.*?${re_link}(.*?)${re_desc}(.*?)$//is) { ! my ($time, $subj, $link, $name, $imgs, $desc, $comm) = (sprintf('%04d/%02d/%02d %02d:%02d', $1,$2,$3,$4,$5), $6, $7, $8, $9, $10, $11); ($desc, $subj) = map { s/[\r\n]+//g; s/<br>/\n/g; $_ = $self->rewrite($_); } ($desc, $subj); ! my $item = { 'time' => $time, 'description' => $desc, 'subject' => $subj, 'link' => $res->request->uri->as_string, 'images' => [], 'comments' => [] , 'name' => $name, 'name_link' => $self->absolute_url($link, $base)}; foreach my $image ($imgs =~ /<td width=130[^<>]*>(.*?)<\/td>/g) { next unless ($image =~ /<a [^<>]*'show_picture.pl\?img_src=(.*?)'[^<>]*><img src=([^ ]*) border=0>/); push(@{$item->{'images'}}, {'link' => $self->absolute_url($1, $base), 'thumb_link' => $self->absolute_url($2, $base)}); } ! while ($comm =~ s/.*?${re_c_date}.*?${re_link}.*?${re_c_desc}.*?<\/table>//is){ ! my ($time, $link, $name, $desc) = (sprintf('%04d/%02d/%02d %02d:%02d', $1,$2,$3,$4,$5), $6, $7, $8); ! ($name, $desc) = map { s/[\r\n]+//g; s/<br>/\n/g; $_ = $self->rewrite($_); } ($name, $desc); ! push(@{$item->{'comments'}}, {'time' => $time, 'link' => $self->absolute_url($link, $base), 'name' => $name, 'description' => $desc}); } push(@items, $item); } return @items; } sub parse_view_diary { my $self = shift; my $res = (@_) ? shift : $self->response(); --- 1422,1491 ---- my $base = $res->base->as_string; my $content = $res->content; my @items = (); + my $re_comm = '<b>(\[.*?\]) トピック</b>'; my $re_date = '<td rowspan="3" width="110" bgcolor="#ffd8b0" align="center" valign="top" nowrap>(\d{4})年(\d{2})月(\d{2})日<br>(\d{1,2}):(\d{2})</td>'; my $re_subj = '<td bgcolor="#fff4e0"> (.+?)</td>'; my $re_desc = '</table>(.+?)</td>'; my $re_c_date = '<td rowspan="2" width="110" bgcolor="#f2ddb7" align="center" nowrap>\n(\d{4})年(\d{2})月(\d{2})日<br>\n(\d{1,2}):(\d{2})'; + my $re_c_no = '<b>(.*?[0-9]+?)</b>'; my $re_c_desc = '<td class="h120">(.+?)\n</td>'; my $re_link = '<a href="?(.+?)"?>(.*?)<\/a>'; ! if ($content =~ s/<!-- TOPIC: start -->.*?${re_comm}.*?${re_date}.*?${re_subj}.*?${re_link}(.*?)${re_desc}(.*?)$//is) { ! my ($community, $time, $subj, $link, $name, $imgs, $desc, $comm) = ($1, sprintf('%04d/%02d/%02d %02d:%02d', $2,$3,$4,$5,$6), $7, $8, $9, $10, $11, $12); ($desc, $subj) = map { s/[\r\n]+//g; s/<br>/\n/g; $_ = $self->rewrite($_); } ($desc, $subj); ! my $item = { 'time' => $time, 'description' => $desc, 'subject' => $subj, 'link' => $res->request->uri->as_string, 'images' => [], 'comments' => [] , 'name' => $name, 'name_link' => $self->absolute_url($link, $base), 'community_name' => $community}; foreach my $image ($imgs =~ /<td width=130[^<>]*>(.*?)<\/td>/g) { next unless ($image =~ /<a [^<>]*'show_picture.pl\?img_src=(.*?)'[^<>]*><img src=([^ ]*) border=0>/); push(@{$item->{'images'}}, {'link' => $self->absolute_url($1, $base), 'thumb_link' => $self->absolute_url($2, $base)}); } ! while ($comm =~ s/.*?${re_c_date}.*?${re_c_no}.*?${re_link}.*?${re_c_desc}.*?<\/table>//is){ ! my ($time, $number, $link, $name, $desc) = (sprintf('%04d/%02d/%02d %02d:%02d', $1,$2,$3,$4,$5), $6, $7, $8, $9); ! # ($name, $desc) = map { s/[\r\n]+//g; s/<br>/\n/g; $_ = $self->rewrite($_); } ($name, $desc); ! my $comment = {'time' => $time, 'link' => $self->absolute_url($link, $base), 'name' => $name, 'description' => $desc, 'number' => $number, 'images' => []}; ! push(@{$item->{'comments'}}, $comment); ! foreach my $image ($desc =~ /<td width="?130"?[^<>]*>(.*?)<\/td>/g) { ! next unless ($image =~ /<img src=([^ ]*) border="?0"?>/); ! push(@{$comment->{'images'}}, {'link' => $self->absolute_url($1, $base), 'thumb_link' => $self->absolute_url($1, $base)}); ! } } push(@items, $item); } return @items; } + sub parse_view_enquete { + my $self = shift; + my $res = (@_) ? shift : $self->response(); + return unless ($res and $res->is_success); + my $base = $res->base->as_string; + my $content = $res->content; + my @items = (); + my $re_comm = '<b>(\[.*?\]).*?</b>'; + my $re_subj = '<td bgcolor=#ffffff width=530>(.+?)</td>'; + my $re_desc = '</table>(.+?)</td>'; + my $re_c_date = '<td rowspan="2" width="110" bgcolor="#f2ddb7" align="center" nowrap>\n(\d{4})年(\d{2})月(\d{2})日<br>(\d{1,2}):(\d{2})'; + my $re_c_no = '<b>(.*?[0-9]+?)</b>'; + my $re_c_desc = '<td class="h120">(.+?)</td>'; + my $re_link = '<a href="?(.+?)"?>(.*?)</a>'; + if ($content =~ s/<td align=center colspan=2>.*?${re_comm}.*?${re_subj}.*?${re_desc}.*?${re_link}(.*?)$//is) { + my ($community, $subj, $desc, $link, $name, $comm) = ($1, $2, $3, $4, $5, $6); + # ($desc, $subj) = map { s/[\r\n]+//g; s/<br>/\n/g; $_ = $self->rewrite($_); } ($desc, $subj); + my $item = { 'description' => $desc, 'subject' => $subj, 'link' => $res->request->uri->as_string, 'comments' => [] , 'name' => $name, 'name_link' => $self->absolute_url($link, $base), 'community_name' => $community}; + while ($comm =~ s/.*?${re_c_date}.*?${re_c_no}.*?${re_link}.*?${re_c_desc}.*?<\/table>//is){ + my ($time, $number, $link, $name, $desc) = (sprintf('%04d/%02d/%02d %02d:%02d', $1,$2,$3,$4,$5), $6, $7, $8, $9); + # ($name, $desc) = map { s/[\r\n]+//g; s/<br>/\n/g; $_ = $self->rewrite($_); } ($name, $desc); + my $comment = {'time' => $time, 'link' => $self->absolute_url($link, $base), 'name' => $name, 'description' => $desc, 'number' => $number, 'images' => []}; + push(@{$item->{'comments'}}, $comment); + foreach my $image ($desc =~ /<td width="?130"?[^<>]*>(.*?)<\/td>/g) { + next unless ($image =~ /<img src=([^ ]*) border="?0"?>/); + push(@{$comment->{'images'}}, {'link' => $self->absolute_url($1, $base), 'thumb_link' => $self->absolute_url($1, $base)}); + } + } + push(@items, $item); + } + return @items; + } + sub parse_view_diary { my $self = shift; my $res = (@_) ? shift : $self->response(); *************** *** 1473,1479 **** push(@{$item->{'comments'}}, {'time' => $time, 'link' => $self->absolute_url($link, $base), 'name' => $name, 'description' => $desc}); } push(@items, $item); ! } return @items; } --- 1513,1519 ---- push(@{$item->{'comments'}}, {'time' => $time, 'link' => $self->absolute_url($link, $base), 'name' => $name, 'description' => $desc}); } push(@items, $item); ! } return @items; } *************** *** 1582,1588 **** my $label_time = "(?:\Q日 付\E|\Q日 付\E)"; my $label_name = "(?:\Q差出人\E|\Q宛 先\E)"; my $label_subj = "(?:\Q件 名\E|\Q件 名\E)"; ! my $time = sprintf('%04d/%02d/%02d %02d:%02d', $1, $2, $3, $4, $5) if ($content =~ /<$td>$s<font(?:$attr)*>$label_time<\/font>$s:$s(\d{4})年(\d{2})月(\d{2})日$s(\d{2}):(\d{2})<\/td>/is); my $subj = $self->rewrite($1) if ($content =~ /<$td>$s<font(?:$attr)*>$label_subj<\/font>$s:$s($str)<\/td>/is); my $desc = $self->rewrite($1) if ($content =~ /<td(?:$attr)*CLASS=h120(?:$attr)*>$s($str)<\/td>/is); my $image = $self->absolute_url($1, $base) if ($content =~ /<$td><a(?:$attr)*><img(?:$attr)*src=["']?([^"'\s<>]+)["'](?:$attr)*><\/a><\/td>/is); --- 1622,1628 ---- my $label_time = "(?:\Q日 付\E|\Q日 付\E)"; my $label_name = "(?:\Q差出人\E|\Q宛 先\E)"; my $label_subj = "(?:\Q件 名\E|\Q件 名\E)"; ! my $time = sprintf('%04d/%02d/%02d %02d:%02d', $1, $2, $3, $4, $5) if ($content =~ /<$td>$s<font(?:$attr)*>$label_time<\/font>$s:$s(\d{4})年(\d{2})月(\d{2})日$s(\d{2})時(\d{2})分$s<\/td>/is); my $subj = $self->rewrite($1) if ($content =~ /<$td>$s<font(?:$attr)*>$label_subj<\/font>$s:$s($str)<\/td>/is); my $desc = $self->rewrite($1) if ($content =~ /<td(?:$attr)*CLASS=h120(?:$attr)*>$s($str)<\/td>/is); my $image = $self->absolute_url($1, $base) if ($content =~ /<$td><a(?:$attr)*><img(?:$attr)*src=["']?([^"'\s<>]+)["'](?:$attr)*><\/a><\/td>/is); *************** *** 2246,2251 **** --- 2286,2298 ---- my $url = shift or return; $self->set_response($url, @_) or return undef; return $self->parse_view_bbs(); + } + + sub get_view_enquete { + my $self = shift; + my $url = shift or return; + $self->set_response($url, @_) or return undef; + return $self->parse_view_enquete(); } sub get_view_community {CustomFeed::Mixiのパッチ
*** /usr/local/lib/perl5/site_perl/5.8.8/Plagger/Plugin/CustomFeed/Mixi.pm.ORIG Fri Jan 5 23:14:14 2007 --- /usr/local/lib/perl5/site_perl/5.8.8/Plagger/Plugin/CustomFeed/Mixi.pm Mon Jan 8 13:13:19 2007 *************** *** 48,53 **** --- 48,62 ---- get_list => 'parse_show_calendar', get_detail => 'get_view_event', }, + Bbs => { + start_url => 'http://mixi.jp/new_bbs.pl', + title => 'コミュニティ最新書き込み', + get_list => 'parse_new_bbs', + get_detail => 'dummy', + get_detail_bbs => 'get_view_bbs', + get_detail_event => 'get_view_event', + get_detail_enquete => 'get_view_enquete', + }, }; sub plugin_id { *************** *** 134,148 **** my $i = 0; my $blocked = 0; for my $msg (@msgs) { next if $type eq 'FriendDiary' and not $msg->{image}; # external blog last if $i++ >= $items; my $entry = Plagger::Entry->new; ! $entry->title( decode('euc-jp', $msg->{subject}) ); $entry->link($msg->{link}); $entry->author( decode('euc-jp', $msg->{name}) ); $entry->date( Plagger::Date->parse($format, $msg->{time}) ); if ($self->conf->{show_icon} && !$blocked && defined $MAP->{$type}->{icon_re}) { my $owner_id = ($msg->{link} =~ $MAP->{$type}->{icon_re})[0]; --- 143,165 ---- my $i = 0; my $blocked = 0; + my $item; for my $msg (@msgs) { next if $type eq 'FriendDiary' and not $msg->{image}; # external blog last if $i++ >= $items; my $entry = Plagger::Entry->new; ! my $title = ""; ! if ($msg->{community_name}) { ! $title = decode('euc-jp', $msg->{subject}) . " "; ! } ! $entry->title( $title . decode('euc-jp', $msg->{subject}) ); $entry->link($msg->{link}); $entry->author( decode('euc-jp', $msg->{name}) ); $entry->date( Plagger::Date->parse($format, $msg->{time}) ); + if ($entry->date) { + $entry->date->set_time_zone('Asia/Tokyo'); + } if ($self->conf->{show_icon} && !$blocked && defined $MAP->{$type}->{icon_re}) { my $owner_id = ($msg->{link} =~ $MAP->{$type}->{icon_re})[0]; *************** *** 170,182 **** } } if ($self->conf->{fetch_body} && !$blocked && $msg->{link} =~ /view_/ && defined $MAP->{$type}->{get_detail}) { $context->log(info => "Fetch body from $msg->{link}"); ! my $item = $self->cache->get_callback( "item-$msg->{link}", sub { Time::HiRes::sleep( $self->conf->{fetch_body_interval} || 1.5 ); ! my $meth = $MAP->{$type}->{get_detail}; my($item) = $self->{mixi}->$meth($msg->{link}); if ($meth eq 'get_view_diary') { --- 187,203 ---- } } + $item = 0; + if ($self->conf->{fetch_body} && !$blocked && $msg->{link} =~ /view_/ && defined $MAP->{$type}->{get_detail}) { $context->log(info => "Fetch body from $msg->{link}"); ! $item = $self->cache->get_callback( "item-$msg->{link}", sub { Time::HiRes::sleep( $self->conf->{fetch_body_interval} || 1.5 ); ! # my $meth = $MAP->{$type}->{get_detail}; ! my $meth = get_meth($MAP->{$type}, $msg->{link}); ! my($item) = $self->{mixi}->$meth($msg->{link}); if ($meth eq 'get_view_diary') { *************** *** 189,195 **** if ($item) { my $body = decode('euc-jp', $item->{description}); $body =~ s!(\r\n?|\n)!<br />!g; ! for my $image (@{ $item->{images} }) { $body .= qq(<div><a href="$image->{link}"><img src="$image->{thumb_link}" style="border:0" /></a></div>); my $enclosure = Plagger::Enclosure->new; $enclosure->url( URI->new($image->{thumb_link}) ); --- 210,217 ---- if ($item) { my $body = decode('euc-jp', $item->{description}); $body =~ s!(\r\n?|\n)!<br />!g; ! my $image; ! for $image (@{ $item->{images} }) { $body .= qq(<div><a href="$image->{link}"><img src="$image->{thumb_link}" style="border:0" /></a></div>); my $enclosure = Plagger::Enclosure->new; $enclosure->url( URI->new($image->{thumb_link}) ); *************** *** 197,205 **** --- 219,238 ---- $enclosure->is_inline(1); $entry->add_enclosure($enclosure); } + if ($image = $item->{image}) { + $body .= qq(<div><a href="$image"><img src="$image" style="border:0" /></a></div>); + my $enclosure = Plagger::Enclosure->new; + $enclosure->url( URI->new($image) ); + $enclosure->auto_set_type; + $enclosure->is_inline(1); + $entry->add_enclosure($enclosure); + } $entry->body($body); $entry->date( Plagger::Date->parse($format, $item->{time}) ); + if ($entry->date) { + $entry->date->set_time_zone('Asia/Tokyo'); + } } else { $context->log(warn => "Fetch body failed. You might be blocked?"); $blocked++; *************** *** 207,212 **** --- 240,248 ---- } $feed->add_entry($entry); + if ($self->conf->{fetch_body} && $item) { + add_comments($feed, $item); + } } $context->update->add($feed); *************** *** 221,226 **** --- 257,305 ---- } return \@images; + } + sub get_meth { + my($type, $link) = @_; + + my $meth = ""; + if ($link =~ /view_bbs/) { + $meth = $type->{get_detail_bbs}; + } elsif ($link =~ /view_event/) { + $meth = $type->{get_detail_event}; + } elsif ($link =~ /view_enquete/) { + $meth = $type->{get_detail_enquete}; + } else { + $meth = $type->{get_detail}; + } + return $meth; + } + + sub add_comments { + my ($feed, $item) = @_; + my $format = DateTime::Format::Strptime->new(pattern => '%Y/%m/%d %H:%M'); + if ($item->{'comments'}) { + for my $comment (@{$item->{'comments'}}) { + my $entry = Plagger::Entry->new; + $entry->title(decode('euc-jp', $item->{'community_name'}) . " " . decode('euc-jp', $item->{'subject'}) . ": " . $comment->{'number'}); + $entry->link($item->{'link'}); + $entry->author( decode('euc-jp', $comment->{'name'}) ); + $entry->date( Plagger::Date->parse($format, $comment->{'time'}) ); + if ($entry->date) { + $entry->date->set_time_zone('Asia/Tokyo'); + } + my $body = decode('euc-jp', $comment->{'description'}); + for my $image (@{ $comment->{images} }) { + $body .= qq(<div><a href="$image->{link}"><img src="$image->{thumb_link}" style="border:0" /></a></div>); + my $enclosure = Plagger::Enclosure->new; + $enclosure->url( URI->new($image->{thumb_link}) ); + $enclosure->auto_set_type; + $enclosure->is_inline(1); + $entry->add_enclosure($enclosure); + } + $entry->body( $body ); + $feed->add_entry($entry); + } + } } 1;
カテゴリ
Plaggerトラックバック(0)
このブログ記事を参照しているブログ一覧: mixiのコミュニティ新着の取得(その4)
このブログ記事に対するトラックバックURL: https://www.wizard-limit.net/cgi-bin/mt/mt-tb.cgi/1123
ごめんなさ~い!
このページに添付してあるパッチは、webに上げるtextに変換後のファイルでした(汗)
もうちょっと更新して、ちゃんとpatchの形式にしたものを別途エントリにしますのでご容赦ください。