Created
October 28, 2017 14:59
-
-
Save yfgeek/2f07921322d4d65fbe05bef0eb897fb9 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# coding=utf-8 | |
# python version:2.7 | |
from jpype import * | |
import pandas as pd | |
reload(sys) | |
sys.setdefaultencoding('utf-8') | |
import pymysql | |
from pybloomfilter import BloomFilter | |
class Filter(object): | |
def __init__(self): | |
self.db = pymysql.connect("localhost", "root", "", "uob", use_unicode=True, charset="utf8") | |
self.cursor = self.db.cursor() | |
self.bfilter = BloomFilter(1000, 0.001, 'uob.bloom') # 创建布隆过滤器 | |
def boolomFilter(self): | |
sql = "SELECT id,name,url FROM list;" | |
self.cursor.execute(sql) | |
allData = self.cursor.fetchall() | |
if allData: | |
for rec in allData: | |
if rec[1] not in self.bfilter: | |
self.bfilter.add(rec[1]) | |
insertsql = "INSERT INTO filter (list_id,name,url) VALUES(" + str(rec[0]) + ",\"" + rec[1] + "\",\"" +rec[2] + "\");" | |
print insertsql | |
try: | |
self.cursor.execute(insertsql) | |
self.db.commit() | |
except: | |
self.db.rollback() | |
print "error" | |
if __name__ == "__main__": | |
obj_filter = Filter() | |
obj_filter.boolomFilter() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment