Skip to content

Instantly share code, notes, and snippets.

@golgote
Forked from aquilax/gist:1333830
Last active August 29, 2015 14:09

Revisions

  1. @aquilax aquilax created this gist Nov 2, 2011.
    311 changes: 311 additions & 0 deletions gistfile1.sql
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,311 @@
    --
    -- PostgreSQL database dump
    --

    -- Dumped from database version 9.1.1
    -- Dumped by pg_dump version 9.1.1
    -- Started on 2011-11-02 16:53:36 EET

    SET statement_timeout = 0;
    SET client_encoding = 'UTF8';
    SET standard_conforming_strings = on;
    SET check_function_bodies = false;
    SET client_min_messages = warning;

    --
    -- TOC entry 8 (class 2615 OID 28036)
    -- Name: bayes; Type: SCHEMA; Schema: -; Owner: -
    --

    CREATE SCHEMA bayes;


    SET search_path = bayes, pg_catalog;

    --
    -- TOC entry 223 (class 1255 OID 28111)
    -- Dependencies: 607 8
    -- Name: classify(integer, integer[]); Type: FUNCTION; Schema: bayes; Owner: -
    --

    CREATE FUNCTION classify(iset_id integer, ifeature_ids integer[]) RETURNS integer
    LANGUAGE plpgsql COST 1
    AS $$
    DECLARE
    features_count integer := 0;
    r record;
    p float := 0;
    mult float := 0;
    max_prob float := -1;
    max_prob_cat integer := 0;
    index integer := 1;
    fcount integer := 0;
    BEGIN
    features_count = array_upper(ifeature_ids, 1);


    FOR r IN
    SELECT category_id, count(*) AS cntr
    FROM bayes.feature_category
    WHERE set_id = iset_id
    GROUP BY category_id
    LOOP
    p = 1;

    WHILE index <= features_count
    LOOP
    IF (r.cntr = 0) THEN
    mult = 0.001;
    ELSE

    SELECT cnt
    FROM bayes.feature_category
    WHERE feature_id = ifeature_ids[index]
    AND set_id = iset_id
    AND category_id = r.category_id INTO fcount;

    IF (fcount IS NULL) THEN
    fcount = 0;
    END IF;

    mult = fcount / r.cntr;
    END IF;

    p = p * mult;
    index = index + 1;
    END LOOP;

    --RAISE NOTICE 'cat % like %', r.category_id, p;

    IF p > max_prob THEN
    max_prob_cat = r.category_id;
    max_prob = p;
    END IF;

    END LOOP;

    RETURN max_prob_cat;
    END;
    $$;


    --
    -- TOC entry 224 (class 1255 OID 28110)
    -- Dependencies: 607 8
    -- Name: tokenize(character varying[], boolean); Type: FUNCTION; Schema: bayes; Owner: -
    --

    CREATE FUNCTION tokenize(ifeatures character varying[], ido_insert boolean) RETURNS integer[]
    LANGUAGE plpgsql COST 1
    AS $$
    DECLARE
    w varchar;
    fid integer := 0;
    index integer := 1;
    count integer := 0;
    outa int[];
    BEGIN
    count = array_upper(ifeatures, 1);
    WHILE index <= count
    LOOP
    RAISE NOTICE '%', ifeatures[index];
    SELECT id FROM bayes.feature WHERE val = ifeatures[index] INTO fid;
    IF (fid IS NOT NULL) THEN
    outa = array_append(outa, fid);
    ELSE
    IF ido_insert THEN
    INSERT INTO bayes.feature (val) VALUES (ifeatures[index]);
    outa = array_append(outa, lastval()::int);
    END IF;
    END IF;
    index := index + 1;
    END LOOP;
    RETURN outa;
    END;
    $$;


    --
    -- TOC entry 222 (class 1255 OID 28092)
    -- Dependencies: 607 8
    -- Name: train(integer, integer, integer[]); Type: FUNCTION; Schema: bayes; Owner: -
    --

    CREATE FUNCTION train(iset_id integer, icategory_id integer, ifeature_ids integer[]) RETURNS void
    LANGUAGE plpgsql COST 1
    AS $$
    DECLARE
    w integer;
    fid integer := 0;
    index integer := 1;
    count integer := 0;
    BEGIN
    count = array_upper(ifeature_ids, 1);
    WHILE index <= count
    LOOP
    SELECT id
    FROM bayes.feature_category
    WHERE set_id = iset_id
    AND category_id = icategory_id
    AND feature_id = ifeature_ids[index] INTO fid;

    IF (fid IS NOT NULL) THEN

    UPDATE bayes.feature_category
    SET cnt = cnt + 1
    WHERE id = fid;

    ELSE

    INSERT INTO bayes.feature_category
    (set_id, category_id, feature_id)
    VALUES
    (iset_id, icategory_id, ifeature_ids[index]);

    END IF;
    index := index + 1;
    END LOOP;
    END;
    $$;


    SET default_tablespace = '';

    SET default_with_oids = false;

    --
    -- TOC entry 206 (class 1259 OID 28114)
    -- Dependencies: 8
    -- Name: feature; Type: TABLE; Schema: bayes; Owner: -; Tablespace:
    --

    CREATE TABLE feature (
    id integer NOT NULL,
    val character varying(50)
    );


    --
    -- TOC entry 208 (class 1259 OID 28124)
    -- Dependencies: 1975 8
    -- Name: feature_category; Type: TABLE; Schema: bayes; Owner: -; Tablespace:
    --

    CREATE TABLE feature_category (
    id integer NOT NULL,
    set_id integer,
    category_id integer,
    feature_id integer,
    cnt integer DEFAULT 1
    );


    --
    -- TOC entry 207 (class 1259 OID 28122)
    -- Dependencies: 208 8
    -- Name: feature_category_id_seq; Type: SEQUENCE; Schema: bayes; Owner: -
    --

    CREATE SEQUENCE feature_category_id_seq
    START WITH 1
    INCREMENT BY 1
    NO MINVALUE
    NO MAXVALUE
    CACHE 1;


    --
    -- TOC entry 1986 (class 0 OID 0)
    -- Dependencies: 207
    -- Name: feature_category_id_seq; Type: SEQUENCE OWNED BY; Schema: bayes; Owner: -
    --

    ALTER SEQUENCE feature_category_id_seq OWNED BY feature_category.id;


    --
    -- TOC entry 205 (class 1259 OID 28112)
    -- Dependencies: 206 8
    -- Name: feature_id_seq; Type: SEQUENCE; Schema: bayes; Owner: -
    --

    CREATE SEQUENCE feature_id_seq
    START WITH 1
    INCREMENT BY 1
    NO MINVALUE
    NO MAXVALUE
    CACHE 1;


    --
    -- TOC entry 1987 (class 0 OID 0)
    -- Dependencies: 205
    -- Name: feature_id_seq; Type: SEQUENCE OWNED BY; Schema: bayes; Owner: -
    --

    ALTER SEQUENCE feature_id_seq OWNED BY feature.id;


    --
    -- TOC entry 1973 (class 2604 OID 28117)
    -- Dependencies: 205 206 206
    -- Name: id; Type: DEFAULT; Schema: bayes; Owner: -
    --

    ALTER TABLE feature ALTER COLUMN id SET DEFAULT nextval('feature_id_seq'::regclass);


    --
    -- TOC entry 1974 (class 2604 OID 28127)
    -- Dependencies: 207 208 208
    -- Name: id; Type: DEFAULT; Schema: bayes; Owner: -
    --

    ALTER TABLE feature_category ALTER COLUMN id SET DEFAULT nextval('feature_category_id_seq'::regclass);


    --
    -- TOC entry 1981 (class 2606 OID 28130)
    -- Dependencies: 208 208
    -- Name: feature_category_pkey; Type: CONSTRAINT; Schema: bayes; Owner: -; Tablespace:
    --

    ALTER TABLE ONLY feature_category
    ADD CONSTRAINT feature_category_pkey PRIMARY KEY (id);


    --
    -- TOC entry 1983 (class 2606 OID 28132)
    -- Dependencies: 208 208 208 208
    -- Name: feature_category_set_id_category_id_feature_id_key; Type: CONSTRAINT; Schema: bayes; Owner: -; Tablespace:
    --

    ALTER TABLE ONLY feature_category
    ADD CONSTRAINT feature_category_set_id_category_id_feature_id_key UNIQUE (set_id, category_id, feature_id);


    --
    -- TOC entry 1977 (class 2606 OID 28119)
    -- Dependencies: 206 206
    -- Name: feature_pkey; Type: CONSTRAINT; Schema: bayes; Owner: -; Tablespace:
    --

    ALTER TABLE ONLY feature
    ADD CONSTRAINT feature_pkey PRIMARY KEY (id);


    --
    -- TOC entry 1979 (class 2606 OID 28121)
    -- Dependencies: 206 206
    -- Name: feature_val_key; Type: CONSTRAINT; Schema: bayes; Owner: -; Tablespace:
    --

    ALTER TABLE ONLY feature
    ADD CONSTRAINT feature_val_key UNIQUE (val);


    -- Completed on 2011-11-02 16:53:37 EET

    --
    -- PostgreSQL database dump complete
    --