Created
June 12, 2016 05:42
-
-
Save kurubushi--rm/dc26a7b175fbcf862cd7620aed6b1d66 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"from __future__ import print_function\n", | |
"from IPython.core.magic import (register_line_magic, register_cell_magic)\n", | |
"import pandas as pd\n", | |
"import numpy as np\n", | |
"\n", | |
"from sympy import *\n", | |
"init_printing()\n", | |
"\n", | |
"import io\n", | |
"import re\n", | |
"import itertools" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## サンプル作成用関数" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"@register_cell_magic\n", | |
"def csv2df(line, cell):\n", | |
" '''\n", | |
" コンマ区切りテキストからデータフレームをつくる\n", | |
" '''\n", | |
" sio = io.StringIO(cell)\n", | |
" return pd.read_csv(sio)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"def make_logic_table(n):\n", | |
" '''\n", | |
" 論理変数の雛形をつくる\n", | |
" '''\n", | |
" A2Z = [chr(i+ord('A')) for i in range(26)]\n", | |
" print (','.join(A2Z[:n]), ',') #アルファベット大文字のリストから前からn個\n", | |
" l = ['0','1']\n", | |
" # デカルト積 繰り返しを許す: 1,1 がある、順序が違えば別と見なす: 1,2 と 2,1 は別\n", | |
" for element in itertools.product(l, repeat=n):\n", | |
" print (','.join(element), ',')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 4, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"def random_case_table(n,m):\n", | |
" '''\n", | |
" 乱数で事例テーブルをつくる \n", | |
" '''\n", | |
" A2Z = [chr(i+ord('A')) for i in range(26)] #アルファベット大文字のリストから前からn個\n", | |
" case_df = pd.DataFrame(np.random.randint(0,5,(n,m)), columns=A2Z[:m]) #randint(a,b,(c,d)) aからbまでの整数乱数をc×dの行列に\n", | |
" case_df[chr(m+ord('A'))] = np.random.randint(0,5,(n,1))\n", | |
" return case_df" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"# ブール代数アプローチ用の関数" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 5, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"def binarize_rep (df, col, str_list):\n", | |
" '''\n", | |
" コラムの中身を置き換えて二値化して論理変数にする\n", | |
" \n", | |
" df: 事例表のデータフレーム\n", | |
" col : データフレームのうち二値化して論理変数とするコラム\n", | |
" str_list: 置換前の文字列(1つめが0に、2つめが1に置換される)\n", | |
" \n", | |
" 返り値:コラムを置換したデータフレーム\n", | |
" \n", | |
" 例 replace_col(df,'ネオ・コーポラティズムか',['Yes','No'])\n", | |
" '''\n", | |
" for old, new in zip(str_list, range(len(str_list))):\n", | |
" df[col] = df[col].str.replace(old, str(new))\n", | |
" df[col] = df[col].astype(int)\n", | |
" return df\n" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 6, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"def binarize(df, cond_list, threshold_list):\n", | |
" '''\n", | |
" 閾値を使って独立関数を二値化して論理変数にする\n", | |
" \n", | |
" df: 事例表のデータフレーム\n", | |
" cond_list : データフレームのうち二値化して論理変数とするコラム(列)のリスト\n", | |
" threshold_list: 二値化の閾値のリスト(それぞれのコラムについて、この値以上なら1、未満なら0にする)\n", | |
" \n", | |
" 返り値:独立関数を二値化して論理変数にした事例表のデータフレーム\n", | |
" \n", | |
" '''\n", | |
" for col, threshold in zip(cond_list, threshold_list):\n", | |
" df[col] = df[col].apply( lambda x: 1 if x >= float(threshold) else 0 )\n", | |
" return df" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 7, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"def mark_contains(df, check_col, mark_string_list, mark_list):\n", | |
" '''\n", | |
" 特定の文字列が含んでいるかどうかで論理変数を決めてコラムとして追加する\n", | |
" \n", | |
" df: 事例表のデータフレーム\n", | |
" check_col: 文字列が含まれるかどうかチェックする対象のコラム名\n", | |
" mark_string_list: 含んでいるかどうかチェックする文字列のリスト\n", | |
" 例: ['飢饉','凶作','干ばつ','地震','流行','米価暴騰','乱','変','大獄','長州征討','大政奉還']\n", | |
" mark_list: 文字列が含まれていた時に1をセットするカラム名(mark_string_listと同じ長さ)\n", | |
" 例:['E','E','E','D','D','E','P','P','P','P','P']\n", | |
" \n", | |
" 返り値:論理変数を格納するmark_listのカラムを追加した事例表のデータフレーム\n", | |
" \n", | |
" '''\n", | |
" df_n = df.dropna(subset=[check_col]) #nanが含まれるとうまく動かないので除いてる\n", | |
" already_col = [] #一度出てきたカラムは上書きして0に戻さないように記録する\n", | |
" \n", | |
" for k,v in zip(mark_list, mark_string_list):\n", | |
" #print (k,v)\n", | |
" if k in already_col:\n", | |
" df_n[k] = df_n.apply( lambda x: 1 if x[k] or (v in x[check_col]) else 0 , axis=1)\n", | |
" else:\n", | |
" df_n[k] = df_n[check_col].apply( lambda x: 1 if v in x else 0 )\n", | |
" already_col.append(k)\n", | |
"\n", | |
" return df_n\n", | |
" " | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 8, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"def CT2TT(df, cond_list, result_col, cutpoint):\n", | |
" '''\n", | |
" 事例表から真理表をつくる\n", | |
" \n", | |
" df : 事例表のデータフレーム\n", | |
" cond_list : データフレームのうち論理変数が入ってるコラム(列)のリスト\n", | |
" result_col : 出力変数が入っているコラム(列)\n", | |
" cutpoint : 現象が生起したとみなせる結果の件数の下限値\n", | |
"\n", | |
" 返り値:真理表のデータフレーム(論理変数〜現象が生起したとみなせる結果の事例数〜当該の論理変数を満たす事例数)\n", | |
" '''\n", | |
" result = ','.join(cond_list) +','+ result_col +'_cases,all_cases\\n'\n", | |
" query_root_string = '&'.join([col+'=={}' for col in cond_list]) #'A=={}&B=={}&C=={}'\n", | |
" query_root_cutpoint_string = query_root_string + '&' + result_col + '>=' + str(cutpoint) #'A=={}&B=={}&C=={}&D>=5'\n", | |
" for element in itertools.product([1,0], repeat=len(cond_list)):\n", | |
" query_string = query_root_string.format(*element)\n", | |
" query_cutpoint_string = query_root_cutpoint_string.format(*element)\n", | |
" result += ','.join(map(str, element)) +','\\\n", | |
" + str(df.query(query_cutpoint_string).count()[result_col])+','\\\n", | |
" + str(df.query(query_string).count()[result_col]) + '\\n'\n", | |
" sio = io.StringIO(result)\n", | |
" return_df = pd.read_csv(sio)\n", | |
" return_df[result_col] = return_df['{}_cases'.format(result_col)] / return_df['all_cases']\n", | |
" return return_df" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 9, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"def table2bool(df, cond_list, result_col, cutpoint):\n", | |
" '''\n", | |
" 真理表からブール式をつくる\n", | |
" \n", | |
" df : 真理表のデータフレーム\n", | |
" cond_list : データフレームのうち論理変数が入ってるコラム(列)のリスト\n", | |
" result_col : 出力変数が入っているコラム(列)\n", | |
" cutpoint : 現象が生起したとみなせる結果の件数の下限値\n", | |
" \n", | |
" 例: table2bool(df, ['L','C','G'],'E', 1)\n", | |
" =>CGL+CGl+Cgl+GLc\n", | |
" '''\n", | |
" true_case_df = df[df[result_col].apply(lambda x: float(x)) >= cutpoint]\n", | |
" #return true_case_df\n", | |
" wholeform = []\n", | |
" for idx in true_case_df.index:\n", | |
" nowitem = []\n", | |
" for column in cond_list:\n", | |
" #print (column, true_case_df.ix[idx][column], end='')\n", | |
" TF = int(true_case_df.ix[idx][column])\n", | |
" if TF:\n", | |
" nowitem.append(column.upper())\n", | |
" else:\n", | |
" nowitem.append(column.lower())\n", | |
" #print (nowitem, end='\\n')\n", | |
" wholeform.append('*'.join(nowitem))\n", | |
" return sympify('+'.join(wholeform))\n", | |
" " | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 10, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"def bool2logic(b):\n", | |
" '''\n", | |
" ブール代数式を論理式に変換する\n", | |
" '''\n", | |
" s = srepr(b).replace('Mul','And').replace('Add','Or')\n", | |
" s = re.sub(r\"Symbol\\('([A-Za-z])'\\)\", r'\\1', s)\n", | |
" s = re.sub(r'([ \\(])([a-z])', (lambda x: x.group(1) + 'Not(' + x.group(2).upper() +')' ), s)\n", | |
" return sympify(s) " | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 11, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"def logic2bool(l):\n", | |
" '''\n", | |
" 論理式をブール代数式に変換する\n", | |
" '''\n", | |
" s = srepr(l).replace('And','Mul').replace('Or','Add')\n", | |
" s = re.sub(r\"Symbol\\('([A-Z])'\\)\", r'\\1', s)\n", | |
" s = re.sub(r'Not\\(([A-Z]+)\\)', (lambda x: x.group(1).lower()) , s)\n", | |
" return sympify(s)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 20, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"def complement_bool(b):\n", | |
" '''\n", | |
" ブール代数式から補集合のブール式を得る\n", | |
" '''\n", | |
" return logic2bool(to_dnf(Not(bool2logic(b))))" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 12, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"def simplify_bool(b):\n", | |
" '''\n", | |
" ブール代数式を積和標準形の形に簡単化する\n", | |
" '''\n", | |
" return logic2bool(simplify_logic(bool2logic(b),form='dnf'))\n", | |
" " | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 13, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"def minimize(df, cond_list, result_col, cutpoint, dontcare_symbol=''):\n", | |
" '''\n", | |
" 真理表から縮約した論理式をつくる\n", | |
" \n", | |
" df : 真理表のデータフレーム\n", | |
" cond_list : データフレームのうち論理変数が入ってるコラム(列)のリスト\n", | |
" result_col : 出力変数が入っているコラム(列)\n", | |
" cutpoint : 現象が生起したとみなせる結果の件数の下限値\n", | |
" dontcare_symbol: don't care条件して用いる行の出力変数の値\n", | |
" \n", | |
" 例: 真理表のデータフレームtrue_tableについて、\n", | |
" 論理変数L,C,Gと出力変数Eを指定、\n", | |
" Eが1以上の行を真理条件とし、Eが?の行をdon't care条件として\n", | |
" 真理表から縮約した論理式を得る\n", | |
" \n", | |
" minimize(true_table, ['L','C','G'], 'E', 1,'?')\n", | |
" =>(C∧¬L)∨(G∧L)\n", | |
" '''\n", | |
" if dontcare_symbol:\n", | |
" #don't care条件がある場合\n", | |
" dontcare_df = df[df[result_col].str.contains(dontcare_symbol) ]\n", | |
" dontcares = dontcare_df[cond_list].values.tolist()\n", | |
" df = df[~df[result_col].str.contains(dontcare_symbol) ] #dontcareシンボルが含まれない行だけを対象にする\n", | |
" else:\n", | |
" #don't care条件がない場合\n", | |
" dontcares = []\n", | |
" true_case_df = df[df[result_col].apply(lambda x: float(x)) >= cutpoint]\n", | |
" minterms = true_case_df[cond_list].values.tolist()\n", | |
" return SOPform(cond_list, minterms, dontcares)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 14, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"def minimize_bool(df, cond_list, result_col, cutpoint,dontcare_symbol=''):\n", | |
" '''\n", | |
" dontcare_symbol: don't care条件して用いる行の出力変数の値\n", | |
" \n", | |
" 例: 真理表のデータフレームtrue_tableについて、\n", | |
" 論理変数L,C,Gと出力変数Eを指定、\n", | |
" Eが1以上の行を真理条件とし、Eが?の行をdon't care条件として\n", | |
" 真理表から縮約したブール式を得る\n", | |
" \n", | |
" minimize(true_table, ['L','C','G'], 'E', 1,'?')\n", | |
" =>Cl+GL\n", | |
" '''\n", | |
" return logic2bool(minimize(df, cond_list, result_col, cutpoint,dontcare_symbol))" | |
] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.4.3" | |
}, | |
"widgets": { | |
"state": {}, | |
"version": "1.1.1" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 0 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment