NKFを再定義@windows

jrubyNKFJavajava.nio.charset.Charsetを使っているから、
WAVE DASH (U+301C, "〜")を含む文字列を変換しようとすると
変換に失敗しinvalid encoding (ArgumentError)が発生する。


ちょっとこれは困るのでNativeのNKFを使うようにFFIとnkf32.dllを
使って再定義してみた。windows限定だけど。

FFI
http://wiki.github.com/ffi/ffi
nkf32.dll
http://www.vector.co.jp/soft/win95/util/se295331.html

rubygem を使ってFFIをインストールした後

jruby -S gem install ffi

パスが通っているところにnkf32.dllに配置して
以下のコードをnkf32.rbとかで保存して、nkfの代わりに
ロードすればOK

require "nkf"
module Win32
  module NKF32
    module API
      VERSION_BUFFER_SIZE = 32
      USAGE_BUFFER_SIZE = 2048
      GUESS_BUFFER_SIZE = 32
      CONVERT_BUFFER_SIZE_TIMES = 3
      require "ffi"
      extend FFI::Library
      ffi_lib 'nkf32'
      ffi_convention :stdcall
    
      #void CALLBACK GetNkfVersion(LPSTR verStr);
      attach_function :GetNkfVersion, [ :pointer ], :void
      #BOOL WINAPI GetNkfVersionSafeA(LPSTR verStr,DWORD nBufferLength /*in TCHARs*/,LPDWORD lpTCHARsReturned /*in TCHARs*/);
      attach_function :GetNkfVersionSafeA, [ :pointer, :uint, :pointer ], :bool
      #int CALLBACK SetNkfOption(LPCSTR optStr);
      attach_function :SetNkfOption, [ :string ], :int
      #void CALLBACK NkfConvert(LPSTR outStr, LPCSTR inStr);
      attach_function :NkfConvert, [ :pointer, :string ], :void
      #BOOL WINAPI NkfConvertSafe(LPSTR outStr,DWORD nOutBufferLength /*in Bytes*/,LPDWORD lpBytesReturned /*in Bytes*/, LPCSTR inStr,DWORD nInBufferLength /*in Bytes*/);
      attach_function :NkfConvertSafe, [ :pointer, :uint, :pointer, :string, :uint ], :bool
      #int CALLBACK NkfGetKanjiCode(VOID);
      attach_function :NkfGetKanjiCode, [ ], :int
      # 0:シフトJIS, 1:EUC, 2:ISO-2022-JP, 3:UTF-8, 4:UTF-16LE, 5:UTF-16BE
      #BOOL WINAPI GetNkfGuessA(LPSTR outStr,DWORD nBufferLength /*in TCHARs*/,LPDWORD lpTCHARsReturned /*in TCHARs*/);
      attach_function :GetNkfGuessA, [ :pointer, :uint, :pointer ], :bool
      #BOOL WINAPI NkfUsage(LPSTR outStr,DWORD nBufferLength /*in Bytes*/,LPDWORD lpBytesReturned /*in Bytes*/);
      attach_function :NkfUsage, [ :pointer, :uint, :pointer ], :bool
    end
    
    class NKF32FailedException < StandardError; end
    
    def version(buffer_size = API::VERSION_BUFFER_SIZE)
      p_out_str = FFI::MemoryPointer.new(:char, buffer_size)
      p_bytes_returned = FFI::MemoryPointer.new(:uint)
      ret = Win32::NKF32::API::GetNkfVersionSafeA(p_out_str, p_out_str.size, p_bytes_returned)
      raise NKF32FailedException unless ret
      p_out_str.get_string(0, p_bytes_returned.read_int)
    end
    module_function :version
    
    def usage(buffer_size = API::USAGE_BUFFER_SIZE)
      p_out_str = FFI::MemoryPointer.new(:char, buffer_size)
      p_bytes_returned = FFI::MemoryPointer.new(:uint)
      ret = Win32::NKF32::API::NkfUsage(p_out_str, p_out_str.size, p_bytes_returned)
      raise NKF32FailedException unless ret
      p_out_str.get_string(0, p_bytes_returned.read_int)
    end
    module_function :usage
    
    def option=(opt_str)
      ret = Win32::NKF32::API::SetNkfOption(opt_str)
      raise NKF32FailedException unless ret
      opt_str
    end
    module_function :option=
    
    def convert(in_str, buffer_size = in_str.size * API::CONVERT_BUFFER_SIZE_TIMES)
      p_out_str = FFI::MemoryPointer.new(:char, buffer_size)
      p_bytes_returned = FFI::MemoryPointer.new(:uint)
      ret = Win32::NKF32::API::NkfConvertSafe(p_out_str, p_out_str.size, p_bytes_returned, in_str, in_str.size)
      raise NKF32FailedException unless ret
      p_out_str.get_string(0, p_bytes_returned.read_int)
    end
    module_function :convert
    
    def guess(buffer_size = API::GUESS_BUFFER_SIZE)
      p_out_str = FFI::MemoryPointer.new(:char, buffer_size)
      p_bytes_returned = FFI::MemoryPointer.new(:uint)
      ret = Win32::NKF32::API::GetNkfGuessA(p_out_str, p_out_str.size, p_bytes_returned)
      raise NKF32FailedException unless ret
      p_out_str.get_string(0, p_bytes_returned.read_int)
    end
    module_function :guess
    
    def code(name)
      case(name)
      when "ASCII"       then ::NKF::ASCII
      when "ISO-2022-JP" then ::NKF::JIS
      when "Shift_JIS"   then ::NKF::SJIS
      when "EUC-JP"      then ::NKF::EUC
      when "UTF-8"       then ::NKF::UTF8
      when "UTF-16"      then ::NKF::UTF16
      when "BINARY"      then ::NKF::BINARY
      else                    ::NKF::UNKNOWN
      end
    end
    module_function :code
  end
end

module NKF
  def nkf(opt_str, in_str)
    Win32::NKF32::option = opt_str
    Win32::NKF32::convert(in_str)
  end
  module_function :nkf
  
  def guess(in_str)
    Win32::NKF32::option = "-g"
    Win32::NKF32::convert(in_str)
    Win32::NKF32::code(Win32::NKF32::guess)
  end
  alias :guess1 :guess
  alias :guess2 :guess
  module_function :guess, :guess1, :guess2
end

試しに次のデータをutf8.txtに保存し、

こんにちは、JRuby!
&#9312; 
表
〜
〜 <- WAVE DASH (U+301C, "〜") のつもり

以下を実行

>jruby -rnkf32 -e "puts NKF.nkf('-s', IO.read('utf8.txt'))"
こんにちは、JRuby!
&#9312;
表
〜
〜

>jruby -rnkf -e "puts NKF.nkf('-s', IO.read('utf8.txt'))"
-e:1: invalid encoding (ArgumentError)

とりあえず、OKっぽいのでよしとする。
ただ、utf-16の文字をguessするとASCIIになる。。。たぶん、nkf32.dllの問題だと。付属(?)のnkf.exeでもASCIIだったし。
でも、rubyNKFは推定できるだよね。。。バージョンの違いかね。まあ使わないからいいけど