Skip to content

Instantly share code, notes, and snippets.

@MagallanesFito
Created March 22, 2019 06:37
Show Gist options
  • Save MagallanesFito/155d00ac8ab2a8242f3789f29e3640a7 to your computer and use it in GitHub Desktop.
Save MagallanesFito/155d00ac8ab2a8242f3789f29e3640a7 to your computer and use it in GitHub Desktop.
Simple naive bayes implementation
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
#Operaciones sobre diccionarios
import operator
from random import randint
#Importa el dataframe y lo guarda en un diccionario
def load_data(filename):
df = {}
file = open(filename,'r')
for line in file:
currLine = line.split(",")
currLine[-1] = currLine[-1].rstrip()
if len(df) == 0:
for colname in currLine:
df[colname] = []
else:
headers = [key for key in df.keys()]
for i in range(len(headers)):
df[headers[i]].append(currLine[i])
df.pop([key for key in df.keys()][0])
file.close()
return df
#Obtener probabilidad de un atributo categorico pertenezca a una clase
def getProbability(column,at,class_,df,goal_class):
count_class = sum([e==class_ for e in df[goal_class]])
times = 0
for i in range(len(df[goal_class])):
if df[column][i] == at and df[goal_class][i] == class_:
times = times+1
if count_class == 0:
print(column+at+class_)
return(times/count_class)
#Funcion para probabilidad de clase P("yes"),P("No")
def getMarginal(class_,goal_class,df):
times = sum([e==class_ for e in df[goal_class]])
return (times/len(df[goal_class]))
'''Obtiene un diccionario de la forma dict[column][attribute][class]
Que representa P(column=atribute | class) para cada columna,atributo y clase.
Esta tabla se utiliza para prediccion '''
def train(df):
p = {}
columns = [key for key in df.keys()]
#Probabilidad marginal
goal_class = columns[-1]
classes = set(df[goal_class])
for class_ in classes:
p[class_] = getMarginal(class_,goal_class,df)
for column in columns[:-1]:
#Solamente para features categoricas
attr = set(df[column])
p[column] = {}
for at in attr:
p[column][at] = {}
for class_ in classes:
p[column][at][class_] = getProbability(column,at,class_,df,goal_class)
return(p,columns[:-1],classes)
''' Utilizando la tabla generada en la funcion train, generar
las predicciones de clase, en este caso solo obtiene la prediccion,
no la probabilidad de pertenencia. '''
def predict(p_cond,example,columns,classes):
prob_class = {}
for class_ in classes:
prob_class[class_] = 1
for i in range(len(columns)):
prob_class[class_]*=(p_cond[columns[i]][example[i]][class_])
prob_class[class_]*=p_cond[class_]
return(max(prob_class.items(),key=operator.itemgetter(1))[0])
''' Genera un ejemplo aleatorio del dataframe'''
def generateRandomExample(df,cols):
example = []
for column in cols:
example.append(df[column][randint(0,len(df[column]))])
return example
data_frame = load_data("data.csv")
p_cond,cols,classes = train(data_frame)
example = ["Sol","Baja","Alta","Fuerte"]
#example = generateRandomExample(data_frame,cols)
predicted_class = predict(p_cond,example,cols,classes)
print(predicted_class)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment