Last active
January 6, 2018 12:32
-
-
Save prehensile/7d5eb1e79bd4476dee90ab51efb34bc8 to your computer and use it in GitHub Desktop.
Oulipo S+7 constraint, implemented in Python 3 & NLTK
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# an implementation of Oulipo S+7 | |
# see https://en.wikipedia.org/wiki/Oulipo | |
from nltk import pos_tag, word_tokenize | |
from nltk.corpus import wordnet | |
text = "I am sitting in a room different from the one you are in now. I am recording the sound of my speaking voice and I am going to play it back into the room again and again until the resonant frequencies of the room reinforce themselves so that any semblance of my speech, with perhaps the exception of rhythm, is destroyed. What you will hear, then, are the natural resonant frequencies of the room articulated by speech. I regard this activity not so much as a demonstration of a physical fact, but more as a way to smooth out any irregularities my speech might have." | |
# download nltk tokeniser if missing | |
try: | |
pos_tag(word_tokenize(text)) | |
except LookupError: | |
import nltk | |
nltk.download( 'punkt' ) | |
nltk.download( 'averaged_perceptron_tagger' ) | |
# tokenise & POS-tag quotation | |
tags = pos_tag(word_tokenize(text)) | |
# check we've got the wordnet corpus, download if missing | |
try: | |
wordnet.all_synsets() | |
except LookupError: | |
import nltk | |
nltk.download('wordnet') | |
# construct an alphabetised list of nouns from wordnet | |
# TODO: make this less stupid | |
nouns = [] | |
for synset in list(wordnet.all_synsets('n')): | |
name = synset.name().split(".")[0] | |
if "_" not in name: | |
nouns.append( name ) | |
nouns.sort() | |
# step through tokenised quotation, replace nouns (ouilpo +7) | |
# https://en.wikipedia.org/wiki/Oulipo | |
words_out = [] | |
for word, tag in tags: | |
word_out = word | |
# if word is a noun... | |
if 'n' in tag.lower(): | |
if word in nouns: | |
i = nouns.index( word ) | |
i = min( i+7, len(nouns) ) | |
word_out = nouns[ i ] | |
words_out.append( word_out ) | |
# print mangled quotation | |
print( " ".join( words_out) ) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment