Created
June 1, 2019 02:13
-
-
Save jewel12/4714f3beed0c65aadecc98a6feb9285c to your computer and use it in GitHub Desktop.
(変な)おもしろコード見せ合い会
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
require 'json' | |
probs = Hash.new { |h,k| h[k] = {} } | |
STDIN.each do |l| | |
d = JSON.load(l.chomp) | |
probs[d['l']][d['r']] = (d['prob'] * 1000).to_i | |
end | |
def gen(probs) | |
left = '@@@START@@@' | |
pbs = probs[left] | |
generated = [] | |
loop_num = 0 | |
while loop_num < 10000 do | |
loop_num += 1 | |
left_c = pbs.map{|r, pb| [r] * pb}.inject(:+).sample | |
pbs_c = probs[left_c] | |
next if pbs_c.empty? | |
left = left_c | |
pbs = pbs_c | |
break if left == '@@@END@@@' | |
generated << left | |
end | |
return generated | |
end | |
loop do | |
begin | |
code = gen(probs).map{|g| g.gsub('___ENT___', "\n")}.join(' ') | |
# 任意のコードを実行するので壊れてもいい環境で実行するべき | |
eval(code) | |
puts "-----------------------------" | |
puts code | |
rescue Exception | |
end | |
end |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
CREATE TEMPORARY FUNCTION | |
pairs(c STRING) | |
RETURNS ARRAY<STRUCT<l STRING, | |
r STRING>> | |
LANGUAGE js AS """ | |
const normalized = c.replace(RegExp('\\n+', 'g'), "___ENT___").replace(RegExp(' +', 'g'), " "); | |
const splited = normalized.split(' '); | |
let left = '@@@START@@@'; | |
let ps = []; | |
splited.forEach(right => { | |
ps.push({'l': left, 'r': right}); | |
left = right; | |
}) | |
ps.push({'l': left, 'r': '@@@END@@@'}); | |
return ps; | |
"""; | |
WITH | |
token_pairs AS ( | |
SELECT | |
pairs(c.content) ps | |
FROM | |
`bigquery-public-data.github_repos.sample_contents` c | |
JOIN | |
`bigquery-public-data.github_repos.sample_files` f | |
ON | |
c.id = f.id | |
WHERE | |
ENDS_WITH(f.path, '.rb') | |
AND c.content IS NOT NULL ), | |
token_freqs AS ( | |
SELECT | |
l, | |
r, | |
freq | |
FROM ( | |
SELECT | |
token.l l, | |
token.r r, | |
COUNT(1) freq | |
FROM | |
token_pairs, | |
UNNEST (token_pairs.ps) AS token | |
GROUP BY | |
l, | |
r ) | |
WHERE | |
freq >= 5 ), | |
left_freqs AS ( | |
SELECT | |
l, | |
COUNT(1) freq | |
FROM | |
token_freqs | |
GROUP BY | |
l ) | |
SELECT | |
tf.l, | |
tf.r, | |
tf.freq / lf.freq prob | |
FROM | |
token_freqs tf | |
JOIN | |
left_freqs lf | |
ON | |
tf.l = lf.l | |
ORDER BY | |
tf.l |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment