Last active
November 1, 2015 22:12
Revisions
-
scazon revised this gist
Nov 1, 2015 . 1 changed file with 3 additions and 0 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -1,3 +1,6 @@ import urllib.request import json def isOffensive(word): """ Determines whether a word string is considered offensive. -
scazon revised this gist
Nov 1, 2015 . No changes.There are no files selected for viewing
-
scazon revised this gist
Nov 1, 2015 . No changes.There are no files selected for viewing
-
scazon created this gist
Nov 1, 2015 .There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,34 @@ def isOffensive(word): """ Determines whether a word string is considered offensive. Searches the word on Wiktionary, then iterates through all the 'Categories' the word belongs to. If the category name contains a word related to 'offensive', the function returns True """ wikiUrl = "http://en.wiktionary.org/w/api.php?format=json&action=query&prop=categories&redirects=1&titles=" qContinue="" while(True): #Continue making HTTP requests for the word category lists until no more pages of categories exist url = wikiUrl+word+("&clcontinue="+urllib.parse.quote(qContinue) if qContinue!="" else "") #The continue attribute sometimes contains pipes (|) which must be escaped by quote() rawQuery = urllib.request.Request(url) #Make the HTTP request response = urllib.request.urlopen(rawQuery) #Parse it into a readable wikiQuery = json.loads(response.readall().decode('utf-8')) #JSON format try: #Save the continue attribute to append to the next search qContinue = wikiQuery["continue"]["clcontinue"] except KeyError: #when "continue" doesn't exist, i.e. no more pages, the function can quit qContinue = "" categories = wikiQuery["query"]["pages"][list(wikiQuery["query"]["pages"])[0]]["categories"] #the key between "pages" and "categories" is a random number, e.g. ["query"]["pages"]["3076"]["categories"] for category in categories: title = category["title"] print(title) bannedCats = ["offensive", "swear", "vulgarities", "slurs", "derogatory", "slang"] #if the category title contains any of these words, return True if any(x in title.lower() for x in bannedCats): print("SKIPPING") return True if (qContinue == ""): print("Not offensive") return False