Skip to content

Instantly share code, notes, and snippets.

@fuba
Forked from gyuque/hannya.rb
Created May 19, 2011 18:09

Revisions

  1. fuba revised this gist May 19, 2011. 1 changed file with 3 additions and 3 deletions.
    6 changes: 3 additions & 3 deletions tenji.rb
    Original file line number Diff line number Diff line change
    @@ -2,19 +2,19 @@

    # http://www.unicode.org/reports/tr44/tr44-6.html
    # http://www.unicode.org/Public/UNIDATA/Scripts.txt
    chars = [10240,10241,10242,10243,10244,10245,10246,10247,10248,10249,10250,10251,10252,10253,10254,10255,10256,10257,10258,10259,10260,10261,10262,10263,10264,10265,10266,10267,10268,10269,10270,10271,10272,10273,10274,10275,10276,10277,10278,10279,10280,10281,10282,10283,10284,10285,10286,10287,10288,10289,10290,10291,10292,10293,10294,10295,10296,10297,10298,10299,10300,10301,10302,10303,10304,10305,10306,10307,10308,10309,10310,10311,10312,10313,10314,10315,10316,10317,10318,10319,10320,10321,10322,10323,10324,10325,10326,10327,10328,10329,10330,10331,10332,10333,10334,10335,10336,10337,10338,10339,10340,10341,10342,10343,10344,10345,10346,10347,10348,10349,10350,10351,10352,10353,10354,10355,10356,10357,10358,10359,10360,10361,10362,10363,10364,10365,10366,10367,10368,10369,10370,10371,10372,10373,10374,10375,10376,10377,10378,10379,10380,10381,10382,10383,10384,10385,10386,10387,10388,10389,10390,10391,10392,10393,10394,10395,10396,10397,10398,10399,10400,10401,10402,10403,10404,10405,10406,10407,10408,10409,10410,10411,10412,10413,10414,10415,10416,10417,10418,10419,10420,10421,10422,10423,10424,10425,10426,10427,10428,10429,10430,10431,10432,10433,10434,10435,10436,10437,10438,10439,10440,10441,10442,10443,10444,10445,10446,10447,10448,10449,10450,10451,10452,10453,10454,10455,10456,10457,10458,10459,10460,10461,10462,10463,10464,10465,10466,10467,10468,10469,10470,10471,10472,10473,10474,10475,10476,10477,10478,10479,10480,10481,10482,10483,10484,10485,10486,10487,10488,10489,10490,10491,10492,10493,10494,10495]
    CHARS = [10240,10241,10242,10243,10244,10245,10246,10247,10248,10249,10250,10251,10252,10253,10254,10255,10256,10257,10258,10259,10260,10261,10262,10263,10264,10265,10266,10267,10268,10269,10270,10271,10272,10273,10274,10275,10276,10277,10278,10279,10280,10281,10282,10283,10284,10285,10286,10287,10288,10289,10290,10291,10292,10293,10294,10295,10296,10297,10298,10299,10300,10301,10302,10303,10304,10305,10306,10307,10308,10309,10310,10311,10312,10313,10314,10315,10316,10317,10318,10319,10320,10321,10322,10323,10324,10325,10326,10327,10328,10329,10330,10331,10332,10333,10334,10335,10336,10337,10338,10339,10340,10341,10342,10343,10344,10345,10346,10347,10348,10349,10350,10351,10352,10353,10354,10355,10356,10357,10358,10359,10360,10361,10362,10363,10364,10365,10366,10367,10368,10369,10370,10371,10372,10373,10374,10375,10376,10377,10378,10379,10380,10381,10382,10383,10384,10385,10386,10387,10388,10389,10390,10391,10392,10393,10394,10395,10396,10397,10398,10399,10400,10401,10402,10403,10404,10405,10406,10407,10408,10409,10410,10411,10412,10413,10414,10415,10416,10417,10418,10419,10420,10421,10422,10423,10424,10425,10426,10427,10428,10429,10430,10431,10432,10433,10434,10435,10436,10437,10438,10439,10440,10441,10442,10443,10444,10445,10446,10447,10448,10449,10450,10451,10452,10453,10454,10455,10456,10457,10458,10459,10460,10461,10462,10463,10464,10465,10466,10467,10468,10469,10470,10471,10472,10473,10474,10475,10476,10477,10478,10479,10480,10481,10482,10483,10484,10485,10486,10487,10488,10489,10490,10491,10492,10493,10494,10495]

    def tenjihash(src)
    s = []
    bits = Digest::SHA1.digest(src).unpack("b*").join('')

    bit_len = (Math.log(chars.length) / Math.log(2)).to_i
    bit_len = (Math.log(CHARS.length) / Math.log(2)).to_i
    hash_len = (160/bit_len).to_i

    regex = Regexp.new("^[01]{"+bit_len.to_s+"}")
    hash_len.times{
    bits.sub!(regex, '')
    s << [chars[$&.to_i(2)]].pack("U")
    s << [CHARS[$&.to_i(2)]].pack("U")
    }

    s.join('')
  2. fuba revised this gist May 19, 2011. 1 changed file with 3 additions and 3 deletions.
    6 changes: 3 additions & 3 deletions tenji.rb
    Original file line number Diff line number Diff line change
    @@ -2,19 +2,19 @@

    # http://www.unicode.org/reports/tr44/tr44-6.html
    # http://www.unicode.org/Public/UNIDATA/Scripts.txt
    @@chars = [10240,10241,10242,10243,10244,10245,10246,10247,10248,10249,10250,10251,10252,10253,10254,10255,10256,10257,10258,10259,10260,10261,10262,10263,10264,10265,10266,10267,10268,10269,10270,10271,10272,10273,10274,10275,10276,10277,10278,10279,10280,10281,10282,10283,10284,10285,10286,10287,10288,10289,10290,10291,10292,10293,10294,10295,10296,10297,10298,10299,10300,10301,10302,10303,10304,10305,10306,10307,10308,10309,10310,10311,10312,10313,10314,10315,10316,10317,10318,10319,10320,10321,10322,10323,10324,10325,10326,10327,10328,10329,10330,10331,10332,10333,10334,10335,10336,10337,10338,10339,10340,10341,10342,10343,10344,10345,10346,10347,10348,10349,10350,10351,10352,10353,10354,10355,10356,10357,10358,10359,10360,10361,10362,10363,10364,10365,10366,10367,10368,10369,10370,10371,10372,10373,10374,10375,10376,10377,10378,10379,10380,10381,10382,10383,10384,10385,10386,10387,10388,10389,10390,10391,10392,10393,10394,10395,10396,10397,10398,10399,10400,10401,10402,10403,10404,10405,10406,10407,10408,10409,10410,10411,10412,10413,10414,10415,10416,10417,10418,10419,10420,10421,10422,10423,10424,10425,10426,10427,10428,10429,10430,10431,10432,10433,10434,10435,10436,10437,10438,10439,10440,10441,10442,10443,10444,10445,10446,10447,10448,10449,10450,10451,10452,10453,10454,10455,10456,10457,10458,10459,10460,10461,10462,10463,10464,10465,10466,10467,10468,10469,10470,10471,10472,10473,10474,10475,10476,10477,10478,10479,10480,10481,10482,10483,10484,10485,10486,10487,10488,10489,10490,10491,10492,10493,10494,10495]
    chars = [10240,10241,10242,10243,10244,10245,10246,10247,10248,10249,10250,10251,10252,10253,10254,10255,10256,10257,10258,10259,10260,10261,10262,10263,10264,10265,10266,10267,10268,10269,10270,10271,10272,10273,10274,10275,10276,10277,10278,10279,10280,10281,10282,10283,10284,10285,10286,10287,10288,10289,10290,10291,10292,10293,10294,10295,10296,10297,10298,10299,10300,10301,10302,10303,10304,10305,10306,10307,10308,10309,10310,10311,10312,10313,10314,10315,10316,10317,10318,10319,10320,10321,10322,10323,10324,10325,10326,10327,10328,10329,10330,10331,10332,10333,10334,10335,10336,10337,10338,10339,10340,10341,10342,10343,10344,10345,10346,10347,10348,10349,10350,10351,10352,10353,10354,10355,10356,10357,10358,10359,10360,10361,10362,10363,10364,10365,10366,10367,10368,10369,10370,10371,10372,10373,10374,10375,10376,10377,10378,10379,10380,10381,10382,10383,10384,10385,10386,10387,10388,10389,10390,10391,10392,10393,10394,10395,10396,10397,10398,10399,10400,10401,10402,10403,10404,10405,10406,10407,10408,10409,10410,10411,10412,10413,10414,10415,10416,10417,10418,10419,10420,10421,10422,10423,10424,10425,10426,10427,10428,10429,10430,10431,10432,10433,10434,10435,10436,10437,10438,10439,10440,10441,10442,10443,10444,10445,10446,10447,10448,10449,10450,10451,10452,10453,10454,10455,10456,10457,10458,10459,10460,10461,10462,10463,10464,10465,10466,10467,10468,10469,10470,10471,10472,10473,10474,10475,10476,10477,10478,10479,10480,10481,10482,10483,10484,10485,10486,10487,10488,10489,10490,10491,10492,10493,10494,10495]

    def tenjihash(src)
    s = []
    bits = Digest::SHA1.digest(src).unpack("b*").join('')

    bit_len = (Math.log(@@chars.length) / Math.log(2)).to_i
    bit_len = (Math.log(chars.length) / Math.log(2)).to_i
    hash_len = (160/bit_len).to_i

    regex = Regexp.new("^[01]{"+bit_len.to_s+"}")
    hash_len.times{
    bits.sub!(regex, '')
    s << [@@chars[$&.to_i(2)]].pack("U")
    s << [chars[$&.to_i(2)]].pack("U")
    }

    s.join('')
  3. fuba revised this gist May 19, 2011. 2 changed files with 21 additions and 18 deletions.
    18 changes: 0 additions & 18 deletions hannya.rb
    Original file line number Diff line number Diff line change
    @@ -1,18 +0,0 @@
    require 'digest/sha1'
    HANNYA_CHARS = [
    '観','菩','薩','行','深','般','若','波',
    '羅','蜜','多','時','照','見','五','蘊',
    '皆','空','度','苦','厄','舎','利','阿',
    '不','異','即','是','想','心','経','如'
    ]

    def hannya_sha1(src)
    s = []
    bits = Digest::SHA1.digest(src).unpack("b*").join('')
    32.times{
    bits.sub!(/^[01]{5}/, '')
    s << HANNYA_CHARS[$&.to_i(2)]
    }

    s.join('')
    end
    21 changes: 21 additions & 0 deletions tenji.rb
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,21 @@
    require 'digest/sha1'

    # http://www.unicode.org/reports/tr44/tr44-6.html
    # http://www.unicode.org/Public/UNIDATA/Scripts.txt
    @@chars = [10240,10241,10242,10243,10244,10245,10246,10247,10248,10249,10250,10251,10252,10253,10254,10255,10256,10257,10258,10259,10260,10261,10262,10263,10264,10265,10266,10267,10268,10269,10270,10271,10272,10273,10274,10275,10276,10277,10278,10279,10280,10281,10282,10283,10284,10285,10286,10287,10288,10289,10290,10291,10292,10293,10294,10295,10296,10297,10298,10299,10300,10301,10302,10303,10304,10305,10306,10307,10308,10309,10310,10311,10312,10313,10314,10315,10316,10317,10318,10319,10320,10321,10322,10323,10324,10325,10326,10327,10328,10329,10330,10331,10332,10333,10334,10335,10336,10337,10338,10339,10340,10341,10342,10343,10344,10345,10346,10347,10348,10349,10350,10351,10352,10353,10354,10355,10356,10357,10358,10359,10360,10361,10362,10363,10364,10365,10366,10367,10368,10369,10370,10371,10372,10373,10374,10375,10376,10377,10378,10379,10380,10381,10382,10383,10384,10385,10386,10387,10388,10389,10390,10391,10392,10393,10394,10395,10396,10397,10398,10399,10400,10401,10402,10403,10404,10405,10406,10407,10408,10409,10410,10411,10412,10413,10414,10415,10416,10417,10418,10419,10420,10421,10422,10423,10424,10425,10426,10427,10428,10429,10430,10431,10432,10433,10434,10435,10436,10437,10438,10439,10440,10441,10442,10443,10444,10445,10446,10447,10448,10449,10450,10451,10452,10453,10454,10455,10456,10457,10458,10459,10460,10461,10462,10463,10464,10465,10466,10467,10468,10469,10470,10471,10472,10473,10474,10475,10476,10477,10478,10479,10480,10481,10482,10483,10484,10485,10486,10487,10488,10489,10490,10491,10492,10493,10494,10495]

    def tenjihash(src)
    s = []
    bits = Digest::SHA1.digest(src).unpack("b*").join('')

    bit_len = (Math.log(@@chars.length) / Math.log(2)).to_i
    hash_len = (160/bit_len).to_i

    regex = Regexp.new("^[01]{"+bit_len.to_s+"}")
    hash_len.times{
    bits.sub!(regex, '')
    s << [@@chars[$&.to_i(2)]].pack("U")
    }

    s.join('')
    end
  4. @gyuque gyuque created this gist May 13, 2011.
    18 changes: 18 additions & 0 deletions hannya.rb
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,18 @@
    require 'digest/sha1'
    HANNYA_CHARS = [
    '観','菩','薩','行','深','般','若','波',
    '羅','蜜','多','時','照','見','五','蘊',
    '皆','空','度','苦','厄','舎','利','阿',
    '不','異','即','是','想','心','経','如'
    ]

    def hannya_sha1(src)
    s = []
    bits = Digest::SHA1.digest(src).unpack("b*").join('')
    32.times{
    bits.sub!(/^[01]{5}/, '')
    s << HANNYA_CHARS[$&.to_i(2)]
    }

    s.join('')
    end