scazon · November 1, 2015 22:12 · Nov 1, 2015 · Nov 1, 2015 · Nov 1, 2015 · Nov 1, 2015
diff --git a/isOffensive.py b/isOffensive.py
@@ -1,3 +1,6 @@
+import urllib.request
+import json
+
 def isOffensive(word):
 	"""
 	Determines whether a word string is considered offensive.

diff --git a/isOffensive.py b/isOffensive.py
@@ -0,0 +1,34 @@
+def isOffensive(word):
+	"""
+	Determines whether a word string is considered offensive.
+	Searches the word on Wiktionary, then iterates through all the 'Categories' the word belongs to.
+	If the category name contains a word related to 'offensive', the function returns True
+	"""
+	wikiUrl = "http://en.wiktionary.org/w/api.php?format=json&action=query&prop=categories&redirects=1&titles="
+	qContinue=""
+	while(True):
+		#Continue making HTTP requests for the word category lists until no more pages of categories exist
+		url = wikiUrl+word+("&clcontinue="+urllib.parse.quote(qContinue) if qContinue!="" else "")
+			#The continue attribute sometimes contains pipes (|) which must be escaped by quote()
+		rawQuery = urllib.request.Request(url)						#Make the HTTP request
+		response = urllib.request.urlopen(rawQuery)					#Parse it into a readable
+		wikiQuery = json.loads(response.readall().decode('utf-8'))	#JSON format
+		try: 
+			#Save the continue attribute to append to the next search
+			qContinue = wikiQuery["continue"]["clcontinue"]
+		except KeyError:
+			#when "continue" doesn't exist, i.e. no more pages, the function can quit
+			qContinue = "" 
+		categories = wikiQuery["query"]["pages"][list(wikiQuery["query"]["pages"])[0]]["categories"]
+			#the key between "pages" and "categories" is a random number, e.g. ["query"]["pages"]["3076"]["categories"]
+		for category in categories:
+			title = category["title"]
+			print(title)
+			bannedCats = ["offensive", "swear", "vulgarities", "slurs", "derogatory", "slang"]
+				#if the category title contains any of these words, return True	
+			if any(x in title.lower() for x in bannedCats):
+				print("SKIPPING")
+				return True
+		if (qContinue == ""):
+			print("Not offensive")
+			return False