|
|
@@ -0,0 +1,311 @@ |
|
|
-- |
|
|
-- PostgreSQL database dump |
|
|
-- |
|
|
|
|
|
-- Dumped from database version 9.1.1 |
|
|
-- Dumped by pg_dump version 9.1.1 |
|
|
-- Started on 2011-11-02 16:53:36 EET |
|
|
|
|
|
SET statement_timeout = 0; |
|
|
SET client_encoding = 'UTF8'; |
|
|
SET standard_conforming_strings = on; |
|
|
SET check_function_bodies = false; |
|
|
SET client_min_messages = warning; |
|
|
|
|
|
-- |
|
|
-- TOC entry 8 (class 2615 OID 28036) |
|
|
-- Name: bayes; Type: SCHEMA; Schema: -; Owner: - |
|
|
-- |
|
|
|
|
|
CREATE SCHEMA bayes; |
|
|
|
|
|
|
|
|
SET search_path = bayes, pg_catalog; |
|
|
|
|
|
-- |
|
|
-- TOC entry 223 (class 1255 OID 28111) |
|
|
-- Dependencies: 607 8 |
|
|
-- Name: classify(integer, integer[]); Type: FUNCTION; Schema: bayes; Owner: - |
|
|
-- |
|
|
|
|
|
CREATE FUNCTION classify(iset_id integer, ifeature_ids integer[]) RETURNS integer |
|
|
LANGUAGE plpgsql COST 1 |
|
|
AS $$ |
|
|
DECLARE |
|
|
features_count integer := 0; |
|
|
r record; |
|
|
p float := 0; |
|
|
mult float := 0; |
|
|
max_prob float := -1; |
|
|
max_prob_cat integer := 0; |
|
|
index integer := 1; |
|
|
fcount integer := 0; |
|
|
BEGIN |
|
|
features_count = array_upper(ifeature_ids, 1); |
|
|
|
|
|
|
|
|
FOR r IN |
|
|
SELECT category_id, count(*) AS cntr |
|
|
FROM bayes.feature_category |
|
|
WHERE set_id = iset_id |
|
|
GROUP BY category_id |
|
|
LOOP |
|
|
p = 1; |
|
|
|
|
|
WHILE index <= features_count |
|
|
LOOP |
|
|
IF (r.cntr = 0) THEN |
|
|
mult = 0.001; |
|
|
ELSE |
|
|
|
|
|
SELECT cnt |
|
|
FROM bayes.feature_category |
|
|
WHERE feature_id = ifeature_ids[index] |
|
|
AND set_id = iset_id |
|
|
AND category_id = r.category_id INTO fcount; |
|
|
|
|
|
IF (fcount IS NULL) THEN |
|
|
fcount = 0; |
|
|
END IF; |
|
|
|
|
|
mult = fcount / r.cntr; |
|
|
END IF; |
|
|
|
|
|
p = p * mult; |
|
|
index = index + 1; |
|
|
END LOOP; |
|
|
|
|
|
--RAISE NOTICE 'cat % like %', r.category_id, p; |
|
|
|
|
|
IF p > max_prob THEN |
|
|
max_prob_cat = r.category_id; |
|
|
max_prob = p; |
|
|
END IF; |
|
|
|
|
|
END LOOP; |
|
|
|
|
|
RETURN max_prob_cat; |
|
|
END; |
|
|
$$; |
|
|
|
|
|
|
|
|
-- |
|
|
-- TOC entry 224 (class 1255 OID 28110) |
|
|
-- Dependencies: 607 8 |
|
|
-- Name: tokenize(character varying[], boolean); Type: FUNCTION; Schema: bayes; Owner: - |
|
|
-- |
|
|
|
|
|
CREATE FUNCTION tokenize(ifeatures character varying[], ido_insert boolean) RETURNS integer[] |
|
|
LANGUAGE plpgsql COST 1 |
|
|
AS $$ |
|
|
DECLARE |
|
|
w varchar; |
|
|
fid integer := 0; |
|
|
index integer := 1; |
|
|
count integer := 0; |
|
|
outa int[]; |
|
|
BEGIN |
|
|
count = array_upper(ifeatures, 1); |
|
|
WHILE index <= count |
|
|
LOOP |
|
|
RAISE NOTICE '%', ifeatures[index]; |
|
|
SELECT id FROM bayes.feature WHERE val = ifeatures[index] INTO fid; |
|
|
IF (fid IS NOT NULL) THEN |
|
|
outa = array_append(outa, fid); |
|
|
ELSE |
|
|
IF ido_insert THEN |
|
|
INSERT INTO bayes.feature (val) VALUES (ifeatures[index]); |
|
|
outa = array_append(outa, lastval()::int); |
|
|
END IF; |
|
|
END IF; |
|
|
index := index + 1; |
|
|
END LOOP; |
|
|
RETURN outa; |
|
|
END; |
|
|
$$; |
|
|
|
|
|
|
|
|
-- |
|
|
-- TOC entry 222 (class 1255 OID 28092) |
|
|
-- Dependencies: 607 8 |
|
|
-- Name: train(integer, integer, integer[]); Type: FUNCTION; Schema: bayes; Owner: - |
|
|
-- |
|
|
|
|
|
CREATE FUNCTION train(iset_id integer, icategory_id integer, ifeature_ids integer[]) RETURNS void |
|
|
LANGUAGE plpgsql COST 1 |
|
|
AS $$ |
|
|
DECLARE |
|
|
w integer; |
|
|
fid integer := 0; |
|
|
index integer := 1; |
|
|
count integer := 0; |
|
|
BEGIN |
|
|
count = array_upper(ifeature_ids, 1); |
|
|
WHILE index <= count |
|
|
LOOP |
|
|
SELECT id |
|
|
FROM bayes.feature_category |
|
|
WHERE set_id = iset_id |
|
|
AND category_id = icategory_id |
|
|
AND feature_id = ifeature_ids[index] INTO fid; |
|
|
|
|
|
IF (fid IS NOT NULL) THEN |
|
|
|
|
|
UPDATE bayes.feature_category |
|
|
SET cnt = cnt + 1 |
|
|
WHERE id = fid; |
|
|
|
|
|
ELSE |
|
|
|
|
|
INSERT INTO bayes.feature_category |
|
|
(set_id, category_id, feature_id) |
|
|
VALUES |
|
|
(iset_id, icategory_id, ifeature_ids[index]); |
|
|
|
|
|
END IF; |
|
|
index := index + 1; |
|
|
END LOOP; |
|
|
END; |
|
|
$$; |
|
|
|
|
|
|
|
|
SET default_tablespace = ''; |
|
|
|
|
|
SET default_with_oids = false; |
|
|
|
|
|
-- |
|
|
-- TOC entry 206 (class 1259 OID 28114) |
|
|
-- Dependencies: 8 |
|
|
-- Name: feature; Type: TABLE; Schema: bayes; Owner: -; Tablespace: |
|
|
-- |
|
|
|
|
|
CREATE TABLE feature ( |
|
|
id integer NOT NULL, |
|
|
val character varying(50) |
|
|
); |
|
|
|
|
|
|
|
|
-- |
|
|
-- TOC entry 208 (class 1259 OID 28124) |
|
|
-- Dependencies: 1975 8 |
|
|
-- Name: feature_category; Type: TABLE; Schema: bayes; Owner: -; Tablespace: |
|
|
-- |
|
|
|
|
|
CREATE TABLE feature_category ( |
|
|
id integer NOT NULL, |
|
|
set_id integer, |
|
|
category_id integer, |
|
|
feature_id integer, |
|
|
cnt integer DEFAULT 1 |
|
|
); |
|
|
|
|
|
|
|
|
-- |
|
|
-- TOC entry 207 (class 1259 OID 28122) |
|
|
-- Dependencies: 208 8 |
|
|
-- Name: feature_category_id_seq; Type: SEQUENCE; Schema: bayes; Owner: - |
|
|
-- |
|
|
|
|
|
CREATE SEQUENCE feature_category_id_seq |
|
|
START WITH 1 |
|
|
INCREMENT BY 1 |
|
|
NO MINVALUE |
|
|
NO MAXVALUE |
|
|
CACHE 1; |
|
|
|
|
|
|
|
|
-- |
|
|
-- TOC entry 1986 (class 0 OID 0) |
|
|
-- Dependencies: 207 |
|
|
-- Name: feature_category_id_seq; Type: SEQUENCE OWNED BY; Schema: bayes; Owner: - |
|
|
-- |
|
|
|
|
|
ALTER SEQUENCE feature_category_id_seq OWNED BY feature_category.id; |
|
|
|
|
|
|
|
|
-- |
|
|
-- TOC entry 205 (class 1259 OID 28112) |
|
|
-- Dependencies: 206 8 |
|
|
-- Name: feature_id_seq; Type: SEQUENCE; Schema: bayes; Owner: - |
|
|
-- |
|
|
|
|
|
CREATE SEQUENCE feature_id_seq |
|
|
START WITH 1 |
|
|
INCREMENT BY 1 |
|
|
NO MINVALUE |
|
|
NO MAXVALUE |
|
|
CACHE 1; |
|
|
|
|
|
|
|
|
-- |
|
|
-- TOC entry 1987 (class 0 OID 0) |
|
|
-- Dependencies: 205 |
|
|
-- Name: feature_id_seq; Type: SEQUENCE OWNED BY; Schema: bayes; Owner: - |
|
|
-- |
|
|
|
|
|
ALTER SEQUENCE feature_id_seq OWNED BY feature.id; |
|
|
|
|
|
|
|
|
-- |
|
|
-- TOC entry 1973 (class 2604 OID 28117) |
|
|
-- Dependencies: 205 206 206 |
|
|
-- Name: id; Type: DEFAULT; Schema: bayes; Owner: - |
|
|
-- |
|
|
|
|
|
ALTER TABLE feature ALTER COLUMN id SET DEFAULT nextval('feature_id_seq'::regclass); |
|
|
|
|
|
|
|
|
-- |
|
|
-- TOC entry 1974 (class 2604 OID 28127) |
|
|
-- Dependencies: 207 208 208 |
|
|
-- Name: id; Type: DEFAULT; Schema: bayes; Owner: - |
|
|
-- |
|
|
|
|
|
ALTER TABLE feature_category ALTER COLUMN id SET DEFAULT nextval('feature_category_id_seq'::regclass); |
|
|
|
|
|
|
|
|
-- |
|
|
-- TOC entry 1981 (class 2606 OID 28130) |
|
|
-- Dependencies: 208 208 |
|
|
-- Name: feature_category_pkey; Type: CONSTRAINT; Schema: bayes; Owner: -; Tablespace: |
|
|
-- |
|
|
|
|
|
ALTER TABLE ONLY feature_category |
|
|
ADD CONSTRAINT feature_category_pkey PRIMARY KEY (id); |
|
|
|
|
|
|
|
|
-- |
|
|
-- TOC entry 1983 (class 2606 OID 28132) |
|
|
-- Dependencies: 208 208 208 208 |
|
|
-- Name: feature_category_set_id_category_id_feature_id_key; Type: CONSTRAINT; Schema: bayes; Owner: -; Tablespace: |
|
|
-- |
|
|
|
|
|
ALTER TABLE ONLY feature_category |
|
|
ADD CONSTRAINT feature_category_set_id_category_id_feature_id_key UNIQUE (set_id, category_id, feature_id); |
|
|
|
|
|
|
|
|
-- |
|
|
-- TOC entry 1977 (class 2606 OID 28119) |
|
|
-- Dependencies: 206 206 |
|
|
-- Name: feature_pkey; Type: CONSTRAINT; Schema: bayes; Owner: -; Tablespace: |
|
|
-- |
|
|
|
|
|
ALTER TABLE ONLY feature |
|
|
ADD CONSTRAINT feature_pkey PRIMARY KEY (id); |
|
|
|
|
|
|
|
|
-- |
|
|
-- TOC entry 1979 (class 2606 OID 28121) |
|
|
-- Dependencies: 206 206 |
|
|
-- Name: feature_val_key; Type: CONSTRAINT; Schema: bayes; Owner: -; Tablespace: |
|
|
-- |
|
|
|
|
|
ALTER TABLE ONLY feature |
|
|
ADD CONSTRAINT feature_val_key UNIQUE (val); |
|
|
|
|
|
|
|
|
-- Completed on 2011-11-02 16:53:37 EET |
|
|
|
|
|
-- |
|
|
-- PostgreSQL database dump complete |
|
|
-- |