This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
four_square_corpus_phrases = (st.CorpusFromParsedDocuments(reviews_df, category_col = 'category', parsed_col = 'parse', | |
feats_from_spacy_doc=st.PhraseMachinePhrases()) | |
.build().compact(st.ClassPercentageCompactor(term_count=1))) | |
four_square_axes = st.FourSquareAxes(four_square_corpus_phrases, | |
left_categories=['Accept, Positive'], | |
right_categories=['Accept, Negative'], | |
top_categories=['Reject, Positive'], | |
bottom_categories=['Reject, Negative'], | |
labels = {'a': 'Positive', | |
'b': 'Review that was Contrary to Accpetance Decision', |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
four_square_corpus = (st.CorpusFromParsedDocuments(reviews_df, category_col = 'category', parsed_col = 'parse') | |
.build() | |
.get_unigram_corpus() | |
.compact(st.ClassPercentageCompactor(term_count=1))) | |
four_square_axes = st.FourSquareAxes(four_square_corpus, | |
left_categories=['Accept, Positive'], | |
right_categories=['Accept, Negative'], | |
top_categories=['Reject, Positive'], | |
bottom_categories=['Reject, Negative'], | |
labels = {'a': 'Positive', |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
import scattertext as st | |
reviews_df = pd.read_csv('https://github.com/JasonKessler/ICLR18ReviewVis/raw/master/iclr2018_reviews.csv.bz2') | |
reviews_df['parse'] = reviews_df['review'].apply(st.whitespace_nlp_with_sentences) | |
corpus = (st.CorpusFromParsedDocuments(reviews_df, category_col = 'decision', parsed_col = 'parse') | |
.build().remove_categories(['Workshop'])) | |
html = st.produce_scattertext_explorer(corpus, | |
category='Accept', not_categories=['Reject'], | |
transform = st.Scalers.dense_rank, |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
reviews_df = pd.read_csv('https://github.com/JasonKessler/ICLR18ReviewVis/raw/master/iclr2018_reviews.csv.bz2') | |
reviews_df['parse'] = reviews_df['review'].apply(spacy.load('en', parser=False)) | |
# Create Corpus based on accept/reject/workshop decision | |
full_corpus = st.CorpusFromParsedDocuments( | |
reviews_df, category_col='decision', parsed_col='parse').build() | |
# A two-category corpus to use for plotting, with unigrams which only occur in bigrams removed. | |
# Terms used in <5 documents are removed as well. | |
corpus = st.CompactTerms(full_corpus.remove_categories(['Workshop']), |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
reviews_df = pd.read_csv('https://github.com/JasonKessler/ICLR18ReviewVis/raw/master/iclr2018_reviews.csv.bz2') | |
reviews_df['parse'] = reviews_df['review'].apply(spacy.load('en', parser=False)) | |
full_corpus = st.CorpusFromParsedDocuments(reviews_df, category_col='decision', parsed_col='parse').build() | |
corpus = full_corpus.remove_categories(['Workshop']) | |
priors = (st.PriorFactory(full_corpus, term_ranker=st.OncePerDocFrequencyRanker) | |
.use_all_categories().align_to_target(corpus).get_priors()) | |
html = st.produce_frequency_explorer( | |
corpus, | |
category='Accept', | |
not_categories=['Reject'], |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import bisect | |
class ECDFPurePy(object): | |
''' | |
ECDF with linear interpolation | |
''' | |
def __init__(self, raw_list, min_possible, max_possible, resolution=1000): | |
''' | |
raw: sorted list or generator of numbers | |
''' | |
self.resolution_ = resolution |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
import scattertext as st | |
import spacy | |
reviews_df = pd.read_csv('https://github.com/JasonKessler/ICLR18ReviewVis/raw/master/iclr2018_reviews.csv.bz2') | |
reviews_df['parse'] = reviews_df['review'].apply(spacy.load('en')) | |
corpus = (st.CorpusFromParsedDocuments(reviews_df, category_col = 'rating_bin', parsed_col = 'parse') | |
.build().remove_categories(['Neutral'])) | |
html = st.produce_scattertext_explorer(corpus, | |
category='Positive', not_categories=['Negative'], |
We can make this file beautiful and searchable if this error is corrected: Unclosed quoted field in line 2.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
title,authors,decision_raw,forum,confidence,rating,review | |
Improving Discriminator-Generator Balance in Generative Adversarial Networks,['Simen Selseng and Björn Gambäck'],Reject,SyBPtQfAZ,4: The reviewer is confident but not absolutely certain that the evaluation is correct,3: Clear rejection,"The paper proposes a variety of modifications to improve GAN training and evaluates them using a variant of the Generative Adversarial Metric. | |
The first proposed approach, Static Reusable Noise, proposes sampling a fixed set of latent noise vectors instead of producing them via online sampling. It is motivated by the observation that the generator encounters different noise samples at each iteration of training while for real data the discriminator sees only a fixed number of samples. This does not seem to be a particularly convincing argument. One could argue likewise that this makes the discriminator's job easier as it only has to track the finite amount of samples the generator can produce instead of the full distri |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
url = 'https://openreview.net/notes?invitation=ICLR.cc%2F2018%2FConference%2F-%2FBlind_Submission&offset=0&limit=1000' | |
df = pd.DataFrame(requests.get(url).json()['notes']) # Each row in this data frame is a paper. | |
forum_content = [] | |
for i, forum_id in list(enumerate(df.forum)): # Each forum_id is a review, comment, or acceptance decision about a paper. | |
forum_content.append(requests.get('https://openreview.net/notes?forum={}&trash=true'.format(forum_id)).json()) | |
time.sleep(.3) | |
df['forumContent'] = pd.Series(forum_content) | |
df['title'] = df.content.apply(lambda x: x['title']) | |
df['authors'] = df.content.apply(lambda x: x['authors']) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
url = 'https://openreview.net/notes?invitation=ICLR.cc%2F2018%2FConference%2F-%2FBlind_Submission&offset=0&limit=1000' | |
df = pd.DataFrame(requests.get(url).json()['notes']) # Each row in this data frame is a paper. | |
forum_content = [] | |
for i, forum_id in list(enumerate(df.forum)): # Each forum_id is a review, comment, or acceptance decision about a paper. | |
forum_content.append(requests.get('https://openreview.net/notes?forum={}&trash=true'.format(forum_id)).json()) | |
time.sleep(.3) | |
df['forumContent'] = pd.Series(forum_content) |
NewerOlder