Last active
August 29, 2015 14:02
-
-
Save ianpreston/40ab702af079fb04a60f to your computer and use it in GitHub Desktop.
Naive Bayes Classifier in Haskell. Does it work? No idea.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
data FeatureVec = | |
FeatureVec { cls :: Int, feat :: [Bool] } | |
deriving (Show) | |
data ValueVec = | |
-- TODO: Need a better name for vals | |
ValueVec { cnt :: Int, vals :: [Int] } | |
deriving (Show) | |
type ProbVec = [Float] | |
-- | |
-- buildValueVec | |
-- | |
-- Turns a list of feature vectors for a given class into one 'value vector' | |
-- for that class, representing the sum of the features. | |
-- | |
shiznit :: Bool -> Int -> Int | |
shiznit True i = i + 1 | |
shiznit False i = i | |
appendFeatureVec :: FeatureVec -> ValueVec -> ValueVec | |
appendFeatureVec (FeatureVec fv_cls fv_feat) (ValueVec vv_cnt vv_vals) = | |
let | |
pairs = (zip fv_feat vv_vals) | |
vals = map (uncurry shiznit) pairs | |
cnt = (sum vals) | |
in | |
ValueVec{ cnt=cnt, vals=vals } | |
buildValueVec :: [FeatureVec] -> ValueVec -> ValueVec | |
buildValueVec (x:xs) vv = | |
if null xs | |
then result | |
else buildValueVec xs result | |
where | |
result = appendFeatureVec x vv | |
-- | |
-- train | |
-- | |
-- Takes a list of value vectors (transformed from feature vectors by | |
-- buildValueVec), one for each class, and returns the final trained model, | |
-- as one ProbVec per class. | |
-- | |
buildProbVec :: Int -> ValueVec -> ProbVec | |
buildProbVec cnt (ValueVec vv_cnt vv_vals) = | |
map (\x -> (fromIntegral x) / (fromIntegral cnt)) vv_vals | |
train :: [ValueVec] -> [ProbVec] | |
train vectors = | |
let | |
overall_cnt = sum (map (\x -> cnt x) vectors) | |
in | |
map (buildProbVec overall_cnt) vectors | |
-- | |
-- Uses the trained model (two ProbVec instances) to classify a given | |
-- feature vector. | |
-- | |
dopeShiznit :: Bool -> Float -> Float | |
dopeShiznit True x = x | |
dopeShiznit False x = 0.0 | |
classify :: ProbVec -> ProbVec -> [Bool] -> Int | |
classify pv0 pv1 fv = | |
let | |
prob0 = sum $ map (uncurry dopeShiznit) (zip fv pv0) | |
prob1 = sum $ map (uncurry dopeShiznit) (zip fv pv1) | |
in | |
if prob0 >= prob1 | |
then 0 | |
else 1 | |
-- | |
-- Test code and shit | |
-- | |
main = do | |
let vectors1 = [(FeatureVec{ cls=1, feat=[True, False, True] }), | |
(FeatureVec{ cls=1, feat=[False, False, True] }), | |
(FeatureVec{ cls=1, feat=[False, True, True] }), | |
(FeatureVec{ cls=1, feat=[True, True, True] })] | |
let vectors0 = [(FeatureVec{ cls=0, feat=[True, True, False] }), | |
(FeatureVec{ cls=0, feat=[False, True, False] }), | |
(FeatureVec{ cls=0, feat=[False, True, False] }), | |
(FeatureVec{ cls=0, feat=[False, True, False] })] | |
let vv1 = buildValueVec vectors1 (ValueVec{ cnt=0, vals=[0,0,0] }) | |
let vv0 = buildValueVec vectors0 (ValueVec{ cnt=0, vals=[0,0,0] }) | |
print vv1 | |
print vv0 | |
let pvs = train [vv1, vv0] | |
let pv1 = (head pvs) | |
let pv0 = (head (tail pvs)) | |
putStrLn "Probability vector for class 1:" | |
print pv1 | |
putStrLn "Probability vector for class 0:" | |
print pv0 | |
let to_classify = [True, True, False] | |
putStrLn "Classified as:" | |
print $ classify pv0 pv1 to_classify |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment