#A Collection of NLP notes
##N-grams
###Calculating unigram probabilities:
P( wi ) = count ( wi ) ) / count ( total number of words )
In english..
#!/usr/bin/env python3 | |
# coding: utf-8 | |
# | |
# Copyright © Neoway Business Solutions | |
# | |
# @project: Diário Oficial | |
# @author: Manoel Vilela | |
# @email: [email protected] | |
# |
#A Collection of NLP notes
##N-grams
###Calculating unigram probabilities:
P( wi ) = count ( wi ) ) / count ( total number of words )
In english..
#updated 5/7/17 | |
import tkinter as tk | |
import re | |
import sqlite3 | |
from collections import Counter | |
from string import punctuation | |
from math import sqrt | |
from time import sleep ##for delay realism | |
global B |
import os.path | |
import collections | |
from operator import itemgetter | |
WORDFILE = '/usr/share/dict/words' | |
class Autocorrect(object): | |
""" | |
Very simplistic implementation of autocorrect using ngrams. | |
""" |
# List unique values in a DataFrame column | |
# h/t @makmanalp for the updated syntax! | |
df['Column Name'].unique() | |
# Convert Series datatype to numeric (will error if column has non-numeric values) | |
# h/t @makmanalp | |
pd.to_numeric(df['Column Name']) | |
# Convert Series datatype to numeric, changing non-numeric values to NaN | |
# h/t @makmanalp for the updated syntax! |
Regex for matching ALL Japanese common & uncommon Kanji (4e00 – 9fcf) ~ The Big Kahuna! | |
([一-龯]) | |
Regex for matching Hirgana or Katakana | |
([ぁ-んァ-ン]) | |
Regex for matching Non-Hirgana or Non-Katakana | |
([^ぁ-んァ-ン]) | |
Regex for matching Hirgana or Katakana or basic punctuation (、。’) |
#!/usr/bin/env python | |
""" | |
Example of using Keras to implement a 1D convolutional neural network (CNN) for timeseries prediction. | |
""" | |
from __future__ import print_function, division | |
import numpy as np | |
from keras.layers import Convolution1D, Dense, MaxPooling1D, Flatten | |
from keras.models import Sequential |