簡易マイミク更新チェッカー

手動で更新チェックは面倒なので。
nokogiri でごりごり html を解析している。
ruby1.8 でも ruby1.9 でも nokogiri さえ入っていれば動くはず。
使い方としてはまぁ見ての通りで、最初に yamlpath のところに

email: foo@example.com
password: xxxxxxxx

こんな YAML を用意しておけば、後はこのスクリプトを動かす度に新規の日記を適当なフォーマットで出力します。
なんらかの操作に失敗すると、その場でさっさと自決するという適当なエラーハンドリングですみません。

#!/usr/bin/env ruby
# vim:set fileencoding=utf-8:
require 'net/http'
require 'cgi'
require 'yaml'
require 'kconv'
require 'rubygems'
require 'nokogiri'
Net::HTTP.version_1_2

host = 'mixi.jp'
yamlpath = File.expand_path('~/.config/mixi-diary.yml')
comm_re = /\((\d+)\)$/
date_re = /(\d+)(\d+)(\d+)[^\d]*(\d+):(\d+)/

# load config
config = YAML.load_file(yamlpath)
if not config.has_key?('email') or config['email'].nil?
  puts "ERROR: You must specify email in #{yamlpath}"
  exit 1
end
if not config.has_key?('password') or config['password'].nil?
  puts "ERROR: You must specify password in #{yamlpath}"
  exit 1
end
if not config.has_key?('last') or config['last'].nil?
  # first use
  config['last'] = Time.at(0)
end

# build query
params = Hash.new
params['next_url'] = '/home.pl'
params['email'] = config['email']
params['password'] = config['password']
query = params.map { |l| "#{l[0]}=#{CGI.escape(l[1])}" }.join('&')

Net::HTTP.start(host) { |h|
  # login and get session-id
  res = h.post('/login.pl', query)
  if res.code.to_i != 200
    puts res.message
    exit 1
  end
  if not res.key?('Set-Cookie')
    puts 'ERROR: incorrect email or password. login failed'
    exit 1
  end

  cookie = res['Set-Cookie'].split(',').map { |l| l.split(';').first.strip }.join(';')

  # get new friend diary
  res = h.get('/new_friend_diary.pl', 'Cookie' => cookie)
  if res.code.to_i != 200
    puts res.message
    exit 1
  end

  # extract entries from html
  html = Nokogiri(res.body)
  entries = html.xpath('//ul[@class="entryList01"]/li/dl')
  entries.each { |li|
    dt = li.at('dt').text.toutf8.strip
    if date_re.match(dt)
      dt = Time.local(*[$1, $2, $3, $4, $5])
      if dt < config['last']
        break
      end
    else
      puts "ERROR: datetime parse error: #{dt}"
      exit 1
    end

    dd = li.at('dd')
    href = dd.at('a')['href']
    title = dd.at('a').text.toutf8.strip
    # retrieve comment count from title
    comments = 0
    if comm_re.match(title)
      comments = $1
      title = title.sub(comm_re, '').strip
    end
    author = dd.xpath('text()').to_s.strip.toutf8
    author = author[1..(author.length-2)] # remove parens
    
    puts "[#{title}] #{comments} comments"
    puts "  by #{author} @ #{dt}"
    puts "http://#{host}/#{href}"
    puts ''
  }
}

# save config
config['last'] = Time.now
YAML.dump(config, File.open(yamlpath, 'w'))