Skip to content

Instantly share code, notes, and snippets.

@hakxcore
Created July 21, 2022 05:56
Show Gist options
  • Save hakxcore/092abb3476acfac2492d3229c1f5809f to your computer and use it in GitHub Desktop.
Save hakxcore/092abb3476acfac2492d3229c1f5809f to your computer and use it in GitHub Desktop.
NLTK.ipynb
Display the source blob
Display the rendered blob
Raw
{
"nbformat": 4,
"nbformat_minor": 0,
"metadata": {
"colab": {
"name": "NLTK.ipynb",
"provenance": [],
"collapsed_sections": [],
"include_colab_link": true
},
"kernelspec": {
"name": "python3",
"display_name": "Python 3"
}
},
"cells": [
{
"cell_type": "markdown",
"metadata": {
"id": "view-in-github",
"colab_type": "text"
},
"source": [
"<a href=\"https://colab.research.google.com/gist/hakxcore/092abb3476acfac2492d3229c1f5809f/nltk.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "UcIcD13vA-GL"
},
"source": [
"**Importing NLTK Library**"
]
},
{
"cell_type": "code",
"metadata": {
"id": "Yi5BB4LaAj-l"
},
"source": [
"import nltk"
],
"execution_count": 1,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "UUxuu_4OBSCv",
"colab": {
"base_uri": "https://localhost:8080/"
},
"outputId": "84d29aa9-32a8-41a0-894b-6d1ee5218299"
},
"source": [
"nltk.download('punkt')"
],
"execution_count": 2,
"outputs": [
{
"output_type": "stream",
"name": "stderr",
"text": [
"[nltk_data] Downloading package punkt to /root/nltk_data...\n",
"[nltk_data] Unzipping tokenizers/punkt.zip.\n"
]
},
{
"output_type": "execute_result",
"data": {
"text/plain": [
"True"
]
},
"metadata": {},
"execution_count": 2
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "Dtlcx68xAHQa"
},
"source": [
"from nltk import sent_tokenize"
],
"execution_count": 3,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "dbNBY1CPAtkI"
},
"source": [
"text = \"GOOD MORNING all. Hope you will like this video. Thank You.\""
],
"execution_count": 4,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "EiySbKvnBA_6"
},
"source": [
"sentence_tokens = sent_tokenize(text)\n"
],
"execution_count": 5,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "MP69RopMAxj9",
"colab": {
"base_uri": "https://localhost:8080/"
},
"outputId": "31f64bd7-4be0-46e8-cdcd-f51dd12d1be7"
},
"source": [
"print(sentence_tokens)\n"
],
"execution_count": 6,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"['GOOD MORNING all.', 'Hope you will like this video.', 'Thank You.']\n"
]
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "h8QjvvQcBlr8",
"colab": {
"base_uri": "https://localhost:8080/"
},
"outputId": "4f464b98-ae59-480a-e014-fb760c6f941b"
},
"source": [
"for sentence in sentence_tokens:\n",
" print(sentence)"
],
"execution_count": 7,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"GOOD MORNING all.\n",
"Hope you will like this video.\n",
"Thank You.\n"
]
}
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "SaTLjuVkC6lo"
},
"source": [
"Word Tokenization"
]
},
{
"cell_type": "code",
"metadata": {
"id": "dtpcHtHsC9dQ"
},
"source": [
"from nltk.tokenize import word_tokenize"
],
"execution_count": 8,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "dkiIXFLnDAP5"
},
"source": [
"sentence = \"Let's understand this concept in detail!\""
],
"execution_count": 9,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "7UdVxk5TDGNr",
"colab": {
"base_uri": "https://localhost:8080/"
},
"outputId": "221cadc8-d4d3-4854-dae4-58da97a4727a"
},
"source": [
"word_tokens = word_tokenize(sentence)\n",
"print(word_tokens)"
],
"execution_count": 10,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"['Let', \"'s\", 'understand', 'this', 'concept', 'in', 'detail', '!']\n"
]
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "hU6nZAsjDYV_"
},
"source": [
"from nltk.tokenize import TreebankWordTokenizer, WordPunctTokenizer, WhitespaceTokenizer\n",
"tree_tokenizer = TreebankWordTokenizer()\n",
"word_punct_tokenizer = WordPunctTokenizer()\n",
"white_space_tokenizer = WhitespaceTokenizer()"
],
"execution_count": 11,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "vtJuAjrTDdvO",
"colab": {
"base_uri": "https://localhost:8080/"
},
"outputId": "eb77c13e-b6c2-407a-e5bd-391f62fda6cc"
},
"source": [
"word_tokens = tree_tokenizer.tokenize(sentence)\n",
"print(word_tokens)"
],
"execution_count": 12,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"['Let', \"'s\", 'understand', 'this', 'concept', 'in', 'detail', '!']\n"
]
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "3gR0b2w2DhIB",
"colab": {
"base_uri": "https://localhost:8080/"
},
"outputId": "5b1fa415-0050-4a89-e8a1-609accc0285e"
},
"source": [
"word_tokens = word_punct_tokenizer.tokenize(sentence)\n",
"print(word_tokens)"
],
"execution_count": 13,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"['Let', \"'\", 's', 'understand', 'this', 'concept', 'in', 'detail', '!']\n"
]
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "-ixsNWXNDld7",
"colab": {
"base_uri": "https://localhost:8080/"
},
"outputId": "02a1405b-4e5f-4fbf-a6ba-98bbed8144be"
},
"source": [
"word_tokens = white_space_tokenizer.tokenize(sentence)\n",
"print(word_tokens)"
],
"execution_count": 14,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"[\"Let's\", 'understand', 'this', 'concept', 'in', 'detail!']\n"
]
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "wup0YgPrFMBa"
},
"source": [
""
],
"execution_count": null,
"outputs": []
},
{
"cell_type": "markdown",
"metadata": {
"id": "B42UVJnSFKK7"
},
"source": [
"Stemming"
]
},
{
"cell_type": "code",
"metadata": {
"id": "zux2s57QFOzv"
},
"source": [
"from nltk.stem import PorterStemmer, LancasterStemmer"
],
"execution_count": 15,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "NL5qiH9oFT6s",
"colab": {
"base_uri": "https://localhost:8080/"
},
"outputId": "e8c030d4-a477-4388-8aab-44d12c095140"
},
"source": [
"porter_stemmer = PorterStemmer()\n",
"print(porter_stemmer.stem('observing'))\n",
"print(porter_stemmer.stem('observs'))\n",
"print(porter_stemmer.stem('observe'))"
],
"execution_count": 16,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"observ\n",
"observ\n",
"observ\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"porter_stemmer = PorterStemmer()\n",
"print(porter_stemmer.stem('running'))\n",
"print(porter_stemmer.stem('observs'))\n",
"print(porter_stemmer.stem('observe'))"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "kaxqJ6PhNm7u",
"outputId": "b8751eac-4e8f-4d56-e928-1c80afc0e5e2"
},
"execution_count": 17,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"run\n",
"observ\n",
"observ\n"
]
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "nCXzgZjFFXC7",
"colab": {
"base_uri": "https://localhost:8080/"
},
"outputId": "2174e022-0e07-4efa-e614-97d580a6ba3d"
},
"source": [
"lancaster_stemmer = LancasterStemmer()\n",
"print(lancaster_stemmer.stem('observing'))\n",
"print(lancaster_stemmer.stem('observs'))\n",
"print(lancaster_stemmer.stem('observe'))"
],
"execution_count": 18,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"observ\n",
"observ\n",
"observ\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"lancaster_stemmer = LancasterStemmer()\n",
"print(lancaster_stemmer.stem('drives'))"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "DsowvWrnNyiv",
"outputId": "17d8e10d-89a4-416a-e0b9-25c1313362d3"
},
"execution_count": 19,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"driv\n"
]
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "4qMpWrItFo8L"
},
"source": [
""
],
"execution_count": null,
"outputs": []
},
{
"cell_type": "markdown",
"metadata": {
"id": "R6mx2PWIFp3i"
},
"source": [
"Lemmatization"
]
},
{
"cell_type": "code",
"metadata": {
"id": "PF--ePtcFwK7"
},
"source": [
"from nltk.stem import WordNetLemmatizer"
],
"execution_count": 20,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "mFsSxM1oGJmp",
"colab": {
"base_uri": "https://localhost:8080/"
},
"outputId": "32c0f797-5917-4c57-eb14-5086c3fe36e4"
},
"source": [
"nltk.download('wordnet')\n"
],
"execution_count": 21,
"outputs": [
{
"output_type": "stream",
"name": "stderr",
"text": [
"[nltk_data] Downloading package wordnet to /root/nltk_data...\n"
]
},
{
"output_type": "execute_result",
"data": {
"text/plain": [
"True"
]
},
"metadata": {},
"execution_count": 21
}
]
},
{
"cell_type": "code",
"source": [
"nltk.download('omw-1.4')"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "TyeIovskOorI",
"outputId": "b7626d78-2417-405a-a723-7beeedb2bc7b"
},
"execution_count": 22,
"outputs": [
{
"output_type": "stream",
"name": "stderr",
"text": [
"[nltk_data] Downloading package omw-1.4 to /root/nltk_data...\n"
]
},
{
"output_type": "execute_result",
"data": {
"text/plain": [
"True"
]
},
"metadata": {},
"execution_count": 22
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "IQezx9MEF4Vf",
"colab": {
"base_uri": "https://localhost:8080/"
},
"outputId": "8421a954-57b6-4c50-c707-f94fadd1b9d7"
},
"source": [
"lemmatizer = WordNetLemmatizer()\n",
"print(lemmatizer.lemmatize(\"running\"))\n",
"print(lemmatizer.lemmatize(\"runs\"))"
],
"execution_count": 23,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"running\n",
"run\n"
]
}
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "MwKPf_NwBeZH"
},
"source": [
"**Lemmatizer- Returns verb, noun, Adverb, Adjective form**"
]
},
{
"cell_type": "code",
"metadata": {
"id": "I-ZBwYcFGQZN"
},
"source": [
"def lemmatize(word):\n",
" lemmatizer = WordNetLemmatizer()\n",
" print(\"verb form: \" +lemmatizer.lemmatize(word, pos=\"v\"))\n",
" print(\"noun form: \" + lemmatizer.lemmatize(word, pos=\"n\"))\n",
" print(\"adverb form: \" + lemmatizer.lemmatize(word, pos=\"r\"))\n",
" print(\"adjective form: \" + lemmatizer.lemmatize(word, pos=\"a\"))"
],
"execution_count": 24,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "mUHVRBpKGUvY",
"colab": {
"base_uri": "https://localhost:8080/"
},
"outputId": "e469878b-2f56-48d4-c761-de0fd81df3f6"
},
"source": [
"lemmatize(\"ears\")"
],
"execution_count": 25,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"verb form: ears\n",
"noun form: ear\n",
"adverb form: ears\n",
"adjective form: ears\n"
]
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "xcNK5IKBGbGl",
"colab": {
"base_uri": "https://localhost:8080/"
},
"outputId": "59027377-72cf-41d1-8bb8-15cb4583571b"
},
"source": [
"lemmatize(\"running\")"
],
"execution_count": 26,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"verb form: run\n",
"noun form: running\n",
"adverb form: running\n",
"adjective form: running\n"
]
}
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "t2n7L32FIxnZ"
},
"source": [
"The following code snippet shows the comparison between stemming and lemmatization."
]
},
{
"cell_type": "code",
"metadata": {
"id": "rbDvt4WWIzks"
},
"source": [
"from nltk.stem import PorterStemmer\n",
"from nltk.stem import WordNetLemmatizer"
],
"execution_count": 27,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "ZJqe8tl6I5B2"
},
"source": [
"stemmer = PorterStemmer();\n",
"lemmatizer = WordNetLemmatizer()"
],
"execution_count": 28,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "BY-Go7noI8FL",
"colab": {
"base_uri": "https://localhost:8080/"
},
"outputId": "1455e624-5c24-4cd2-90b6-af0a6bf913c6"
},
"source": [
"print(stemmer.stem(\"deactivating\"))\n",
"print(stemmer.stem(\"deactivated\"))\n",
"print(stemmer.stem(\"deactivates\"))"
],
"execution_count": 29,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"deactiv\n",
"deactiv\n",
"deactiv\n"
]
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "ung1veEJI_U4",
"colab": {
"base_uri": "https://localhost:8080/"
},
"outputId": "d8545863-4e15-4c94-d31f-e5a135bb63fd"
},
"source": [
"print(lemmatizer.lemmatize(\"deactivating\", pos=\"v\"))\n",
"print(lemmatizer.lemmatize(\"deactivating\", pos=\"r\"))\n",
"print(lemmatizer.lemmatize(\"deactivating\", pos=\"n\"))"
],
"execution_count": 30,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"deactivate\n",
"deactivating\n",
"deactivating\n"
]
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "Z9AmVUhfJCyT",
"colab": {
"base_uri": "https://localhost:8080/"
},
"outputId": "fa47810b-2fc9-4408-c79f-436af1aa37e1"
},
"source": [
"print(stemmer.stem('stones')) \n",
"print(stemmer.stem('speaking')) \n",
"print(stemmer.stem('bedroom')) \n",
"print(stemmer.stem('jokes')) \n",
"print(stemmer.stem('lisa')) \n",
"print(stemmer.stem('purple'))"
],
"execution_count": 31,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"stone\n",
"speak\n",
"bedroom\n",
"joke\n",
"lisa\n",
"purpl\n"
]
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "W6CA-NDtJFsR",
"colab": {
"base_uri": "https://localhost:8080/"
},
"outputId": "55511b62-bdc2-4d88-de89-e7d640f0b784"
},
"source": [
"print(lemmatizer.lemmatize('stones')) \n",
"print(lemmatizer.lemmatize('speaking'))\n",
"print(lemmatizer.lemmatize('bedroom'))\n",
"print(lemmatizer.lemmatize('jokes'))\n",
"print(lemmatizer.lemmatize('lisa'))\n",
"print(lemmatizer.lemmatize('purple'))"
],
"execution_count": 32,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"stone\n",
"speaking\n",
"bedroom\n",
"joke\n",
"lisa\n",
"purple\n"
]
}
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "WoAzLzz4r3J_"
},
"source": [
"Conclusion:- When efficiency needed use lemmatization, if speed is required then stemming to be used."
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "jQ6K3CmAJLcw"
},
"source": [
"Part-Of-Speech (POS) Tagging"
]
},
{
"cell_type": "code",
"metadata": {
"id": "v0LP1UuwJMko",
"colab": {
"base_uri": "https://localhost:8080/"
},
"outputId": "6074ceaf-775c-46a4-f7e9-5ccd2801f45c"
},
"source": [
"from nltk import word_tokenize, pos_tag\n",
"nltk.download('averaged_perceptron_tagger')"
],
"execution_count": 33,
"outputs": [
{
"output_type": "stream",
"name": "stderr",
"text": [
"[nltk_data] Downloading package averaged_perceptron_tagger to\n",
"[nltk_data] /root/nltk_data...\n",
"[nltk_data] Unzipping taggers/averaged_perceptron_tagger.zip.\n"
]
},
{
"output_type": "execute_result",
"data": {
"text/plain": [
"True"
]
},
"metadata": {},
"execution_count": 33
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "hzMFNbHpJjw6"
},
"source": [
"sentence = \"I like many books.\""
],
"execution_count": 34,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "9zIalIxyJpqD",
"colab": {
"base_uri": "https://localhost:8080/"
},
"outputId": "a6858379-d85b-4df2-aad2-b0910699e171"
},
"source": [
"sentence_tokens = word_tokenize(sentence)\n",
"print(sentence_tokens)"
],
"execution_count": 35,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"['I', 'like', 'many', 'books', '.']\n"
]
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "wWbPy0ysJtIc",
"colab": {
"base_uri": "https://localhost:8080/"
},
"outputId": "2e1eaacb-e70c-42fe-8a6f-7f67cd3dd6bc"
},
"source": [
"pos_tag(sentence_tokens)"
],
"execution_count": 36,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"[('I', 'PRP'), ('like', 'VBP'), ('many', 'JJ'), ('books', 'NNS'), ('.', '.')]"
]
},
"metadata": {},
"execution_count": 36
}
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "scWTkbFbSUku"
},
"source": [
"**Chunking-making word phrases**"
]
},
{
"cell_type": "code",
"metadata": {
"id": "OakHkMcaQ7n5"
},
"source": [
"import nltk"
],
"execution_count": 37,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "t6CW1GPY6Kcw",
"colab": {
"base_uri": "https://localhost:8080/"
},
"outputId": "ed1ef58a-a971-4bb6-d2be-8c256896049b"
},
"source": [
"nltk.download('averaged_perceptron_tagger')"
],
"execution_count": 38,
"outputs": [
{
"output_type": "stream",
"name": "stderr",
"text": [
"[nltk_data] Downloading package averaged_perceptron_tagger to\n",
"[nltk_data] /root/nltk_data...\n",
"[nltk_data] Package averaged_perceptron_tagger is already up-to-\n",
"[nltk_data] date!\n"
]
},
{
"output_type": "execute_result",
"data": {
"text/plain": [
"True"
]
},
"metadata": {},
"execution_count": 38
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "pEFFaoHl5_iK",
"colab": {
"base_uri": "https://localhost:8080/"
},
"outputId": "1f680878-76b9-492d-fcf4-6abe83862733"
},
"source": [
"nltk.download('punkt')"
],
"execution_count": 39,
"outputs": [
{
"output_type": "stream",
"name": "stderr",
"text": [
"[nltk_data] Downloading package punkt to /root/nltk_data...\n",
"[nltk_data] Package punkt is already up-to-date!\n"
]
},
{
"output_type": "execute_result",
"data": {
"text/plain": [
"True"
]
},
"metadata": {},
"execution_count": 39
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "FKyGctDmRCX8",
"colab": {
"base_uri": "https://localhost:8080/"
},
"outputId": "e6d2f260-ca43-4894-c9fc-90fab20a6ecc"
},
"source": [
"text = \"The clean data is important for application development.\"\n",
"tokens = nltk.word_tokenize(text)\n",
"print(tokens)\n",
"tagged = nltk.pos_tag(tokens)"
],
"execution_count": 40,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"['The', 'clean', 'data', 'is', 'important', 'for', 'application', 'development', '.']\n"
]
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "x00MmqRbRHpF",
"colab": {
"base_uri": "https://localhost:8080/"
},
"outputId": "8d2fde33-f4d9-49f4-dcb1-08a43ebbe13b"
},
"source": [
"print(tagged)\n"
],
"execution_count": 41,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"[('The', 'DT'), ('clean', 'JJ'), ('data', 'NN'), ('is', 'VBZ'), ('important', 'JJ'), ('for', 'IN'), ('application', 'NN'), ('development', 'NN'), ('.', '.')]\n"
]
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "YZnw0ZSHRNWj"
},
"source": [
"grammar = \"NP: {<DT>?<JJ>*<NN>}\"\n",
"cp =nltk.RegexpParser(grammar)\n"
],
"execution_count": 42,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "qfMDg-eYRTfa",
"colab": {
"base_uri": "https://localhost:8080/"
},
"outputId": "eb17a098-f129-4078-9c2d-28de8dc3c18f"
},
"source": [
"result = cp.parse(tagged)\n",
"print(result)\n"
],
"execution_count": 43,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"(S\n",
" (NP The/DT clean/JJ data/NN)\n",
" is/VBZ\n",
" important/JJ\n",
" for/IN\n",
" (NP application/NN)\n",
" (NP development/NN)\n",
" ./.)\n"
]
}
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "_a_0bDRAA4GA"
},
"source": [
"**Parse** **tree**"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "fWlcWdUlSH7T"
},
"source": [
""
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "giFS1t4ySpqi"
},
"source": [
"Stop word Removal"
]
},
{
"cell_type": "code",
"metadata": {
"id": "U7t4cn6qStgA",
"colab": {
"base_uri": "https://localhost:8080/"
},
"outputId": "e038467e-7b5f-413a-b4ea-d63efe71d9ce"
},
"source": [
"from nltk.corpus import stopwords\n",
"from nltk.tokenize import word_tokenize\n",
"nltk.download('stopwords')"
],
"execution_count": 44,
"outputs": [
{
"output_type": "stream",
"name": "stderr",
"text": [
"[nltk_data] Downloading package stopwords to /root/nltk_data...\n",
"[nltk_data] Unzipping corpora/stopwords.zip.\n"
]
},
{
"output_type": "execute_result",
"data": {
"text/plain": [
"True"
]
},
"metadata": {},
"execution_count": 44
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "H6SBIPDzSxpf",
"colab": {
"base_uri": "https://localhost:8080/"
},
"outputId": "b6275d98-a6f8-4289-fde8-35e1decc6f8a"
},
"source": [
"print(stopwords.words('english'))"
],
"execution_count": 45,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"['i', 'me', 'my', 'myself', 'we', 'our', 'ours', 'ourselves', 'you', \"you're\", \"you've\", \"you'll\", \"you'd\", 'your', 'yours', 'yourself', 'yourselves', 'he', 'him', 'his', 'himself', 'she', \"she's\", 'her', 'hers', 'herself', 'it', \"it's\", 'its', 'itself', 'they', 'them', 'their', 'theirs', 'themselves', 'what', 'which', 'who', 'whom', 'this', 'that', \"that'll\", 'these', 'those', 'am', 'is', 'are', 'was', 'were', 'be', 'been', 'being', 'have', 'has', 'had', 'having', 'do', 'does', 'did', 'doing', 'a', 'an', 'the', 'and', 'but', 'if', 'or', 'because', 'as', 'until', 'while', 'of', 'at', 'by', 'for', 'with', 'about', 'against', 'between', 'into', 'through', 'during', 'before', 'after', 'above', 'below', 'to', 'from', 'up', 'down', 'in', 'out', 'on', 'off', 'over', 'under', 'again', 'further', 'then', 'once', 'here', 'there', 'when', 'where', 'why', 'how', 'all', 'any', 'both', 'each', 'few', 'more', 'most', 'other', 'some', 'such', 'no', 'nor', 'not', 'only', 'own', 'same', 'so', 'than', 'too', 'very', 's', 't', 'can', 'will', 'just', 'don', \"don't\", 'should', \"should've\", 'now', 'd', 'll', 'm', 'o', 're', 've', 'y', 'ain', 'aren', \"aren't\", 'couldn', \"couldn't\", 'didn', \"didn't\", 'doesn', \"doesn't\", 'hadn', \"hadn't\", 'hasn', \"hasn't\", 'haven', \"haven't\", 'isn', \"isn't\", 'ma', 'mightn', \"mightn't\", 'mustn', \"mustn't\", 'needn', \"needn't\", 'shan', \"shan't\", 'shouldn', \"shouldn't\", 'wasn', \"wasn't\", 'weren', \"weren't\", 'won', \"won't\", 'wouldn', \"wouldn't\"]\n"
]
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "WpQUUHQ5TAqH"
},
"source": [
"sentence = \"Data structure understanding is must for a computer engineer. Coding plays important role there.\""
],
"execution_count": 46,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "C_Oww7HVTF3a",
"colab": {
"base_uri": "https://localhost:8080/"
},
"outputId": "4e86985c-7411-4234-81b4-5bbefb33c961"
},
"source": [
"word_tokens = word_tokenize(sentence)\n",
"print(word_tokens)"
],
"execution_count": 47,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"['Data', 'structure', 'understanding', 'is', 'must', 'for', 'a', 'computer', 'engineer', '.', 'Coding', 'plays', 'important', 'role', 'there', '.']\n"
]
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "oI-IJMo2TKIG"
},
"source": [
"clean_tokens = word_tokens[:] \n",
"for token in word_tokens:\n",
" if token in stopwords.words('english'):\n",
" clean_tokens.remove(token)"
],
"execution_count": 48,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "RABaBqsLTOoM",
"colab": {
"base_uri": "https://localhost:8080/"
},
"outputId": "308029cc-ac27-4eaf-b5cf-8c097f4ca705"
},
"source": [
"print(clean_tokens)"
],
"execution_count": 49,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"['Data', 'structure', 'understanding', 'must', 'computer', 'engineer', '.', 'Coding', 'plays', 'important', 'role', '.']\n"
]
}
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "9tGQHvbTT1Eu"
},
"source": [
"Named Entity Recognition-not used...next refere"
]
},
{
"cell_type": "code",
"metadata": {
"id": "ZLViTaaWUKWU"
},
"source": [
"from nltk import word_tokenize, pos_tag, ne_chunk"
],
"execution_count": 50,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "5-LP7Jx9Ue41",
"colab": {
"base_uri": "https://localhost:8080/"
},
"outputId": "6f331965-2cb4-47e5-8fb2-d089d070d58a"
},
"source": [
"nltk.download('maxent_ne_chunker')"
],
"execution_count": 51,
"outputs": [
{
"output_type": "stream",
"name": "stderr",
"text": [
"[nltk_data] Downloading package maxent_ne_chunker to\n",
"[nltk_data] /root/nltk_data...\n",
"[nltk_data] Unzipping chunkers/maxent_ne_chunker.zip.\n"
]
},
{
"output_type": "execute_result",
"data": {
"text/plain": [
"True"
]
},
"metadata": {},
"execution_count": 51
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "N4Xbgs0vUv4_",
"colab": {
"base_uri": "https://localhost:8080/"
},
"outputId": "2c8780fc-47a5-4ea6-dcc9-3e20336297f6"
},
"source": [
"nltk.download('words')"
],
"execution_count": 52,
"outputs": [
{
"output_type": "stream",
"name": "stderr",
"text": [
"[nltk_data] Downloading package words to /root/nltk_data...\n",
"[nltk_data] Unzipping corpora/words.zip.\n"
]
},
{
"output_type": "execute_result",
"data": {
"text/plain": [
"True"
]
},
"metadata": {},
"execution_count": 52
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "MVzIZBueUOV4"
},
"source": [
"sentence = \"UNITED STATES Data structure INFOSYS Accenture understanding is must for a computer engineer. Coding plays important role there.\""
],
"execution_count": 53,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "-QwmSykLUZYT",
"colab": {
"base_uri": "https://localhost:8080/"
},
"outputId": "d5671a7a-09da-48fe-c5a3-1052748b31e2"
},
"source": [
"print (ne_chunk(pos_tag(word_tokenize(sentence))))"
],
"execution_count": 54,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"(S\n",
" (GPE UNITED/NNP)\n",
" (ORGANIZATION STATES/NNPS Data/NNP)\n",
" structure/NN\n",
" (ORGANIZATION INFOSYS/NNP)\n",
" Accenture/NNP\n",
" understanding/NN\n",
" is/VBZ\n",
" must/MD\n",
" for/IN\n",
" a/DT\n",
" computer/NN\n",
" engineer/NN\n",
" ./.\n",
" Coding/NNP\n",
" plays/VBZ\n",
" important/JJ\n",
" role/NN\n",
" there/RB\n",
" ./.)\n"
]
}
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "O9sGdgldWZya"
},
"source": [
"WORDNET"
]
},
{
"cell_type": "code",
"metadata": {
"id": "zsPRIzIZYx8o"
},
"source": [
"from nltk.corpus import wordnet"
],
"execution_count": 55,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "aXJ6uXs7_4_s",
"colab": {
"base_uri": "https://localhost:8080/"
},
"outputId": "c793678b-de3b-4cbb-f728-f4b8785cdd95"
},
"source": [
"nltk.download('wordnet')"
],
"execution_count": 56,
"outputs": [
{
"output_type": "stream",
"name": "stderr",
"text": [
"[nltk_data] Downloading package wordnet to /root/nltk_data...\n",
"[nltk_data] Package wordnet is already up-to-date!\n"
]
},
{
"output_type": "execute_result",
"data": {
"text/plain": [
"True"
]
},
"metadata": {},
"execution_count": 56
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "PK4QLOEFY04F",
"colab": {
"base_uri": "https://localhost:8080/"
},
"outputId": "926fd833-4271-42ec-8fc9-48fb148ae47b"
},
"source": [
"wordnet.synsets(\"gun\")"
],
"execution_count": 57,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"[Synset('gun.n.01'),\n",
" Synset('artillery.n.01'),\n",
" Synset('gunman.n.02'),\n",
" Synset('gunman.n.01'),\n",
" Synset('grease-gun.n.01'),\n",
" Synset('accelerator.n.01'),\n",
" Synset('gun.n.07'),\n",
" Synset('gun.v.01')]"
]
},
"metadata": {},
"execution_count": 57
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "mheokKiPALTZ",
"colab": {
"base_uri": "https://localhost:8080/"
},
"outputId": "43640de8-639a-45e5-8260-3907bdb9c382"
},
"source": [
"wordnet.synsets(\"flower\")"
],
"execution_count": 58,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"[Synset('flower.n.01'),\n",
" Synset('flower.n.02'),\n",
" Synset('flower.n.03'),\n",
" Synset('bloom.v.01')]"
]
},
"metadata": {},
"execution_count": 58
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "wQd6k10yY6Xf",
"colab": {
"base_uri": "https://localhost:8080/"
},
"outputId": "2d7a1641-0940-4007-b0e7-2bf4b3f1a8a5"
},
"source": [
"syn = wordnet.synset('flower.n.01')\n",
"syn.lemma_names()"
],
"execution_count": 59,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"['flower']"
]
},
"metadata": {},
"execution_count": 59
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "s2ku3tzXY9Uc",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 35
},
"outputId": "42896c9e-9064-45fa-c579-a5533f27dacc"
},
"source": [
"syn.definition()"
],
"execution_count": 60,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"'a plant cultivated for its blooms or blossoms'"
],
"application/vnd.google.colaboratory.intrinsic+json": {
"type": "string"
}
},
"metadata": {},
"execution_count": 60
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "KGa2gwhpZAlD",
"colab": {
"base_uri": "https://localhost:8080/"
},
"outputId": "2b3a75ae-032a-4020-f0e0-5fad08d93fa0"
},
"source": [
"wordnet.synset(\"flower.n.01\").examples()"
],
"execution_count": 61,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"[]"
]
},
"metadata": {},
"execution_count": 61
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "g8OnqkeSED-n",
"colab": {
"base_uri": "https://localhost:8080/"
},
"outputId": "65371b0e-e457-41ff-ecf9-531fa778f081"
},
"source": [
"synonyms = []\n",
"for syn in wordnet.synsets('long'):\n",
" for lemma in syn.lemmas():\n",
" synonyms.append(lemma.name())\n",
"print(synonyms)"
],
"execution_count": 62,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"['hanker', 'long', 'yearn', 'long', 'long', 'long', 'retentive', 'recollective', 'long', 'tenacious', 'long', 'long', 'long', 'farseeing', 'farsighted', 'foresighted', 'foresightful', 'prospicient', 'long', 'longsighted', 'long', 'long', 'long']\n"
]
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "R216YeipEKgl",
"colab": {
"base_uri": "https://localhost:8080/"
},
"outputId": "eb8ffab1-18d0-423b-f440-f74864b4db82"
},
"source": [
"antonyms = []\n",
"for syn in wordnet.synsets(\"like\"):\n",
" for l in syn.lemmas():\n",
" if l.antonyms():\n",
" antonyms.append(l.antonyms()[0].name())\n",
"print(antonyms)"
],
"execution_count": 63,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"['dislike', 'unlike', 'unlike', 'unalike']\n"
]
}
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "U260zG3Nl3i4"
},
"source": [
" **Named Entity PARSe TREE**"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "nTCtebX1jd2_"
},
"source": [
"**Named Entity Recognition(NER)"
]
},
{
"cell_type": "code",
"metadata": {
"id": "eowmIQAM6lAO"
},
"source": [
"from nltk import word_tokenize, pos_tag, ne_chunk"
],
"execution_count": 64,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "zC_p3trd72Pw",
"colab": {
"base_uri": "https://localhost:8080/"
},
"outputId": "a9367b95-06a9-4437-849d-1ab47fe6882b"
},
"source": [
"nltk.download('maxent_ne_chunker')"
],
"execution_count": 65,
"outputs": [
{
"output_type": "stream",
"name": "stderr",
"text": [
"[nltk_data] Downloading package maxent_ne_chunker to\n",
"[nltk_data] /root/nltk_data...\n",
"[nltk_data] Package maxent_ne_chunker is already up-to-date!\n"
]
},
{
"output_type": "execute_result",
"data": {
"text/plain": [
"True"
]
},
"metadata": {},
"execution_count": 65
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "gKYXp-6D8ATN",
"colab": {
"base_uri": "https://localhost:8080/"
},
"outputId": "ca92cdb4-dba4-446d-80df-5c2a59704010"
},
"source": [
"nltk.download('words')"
],
"execution_count": 66,
"outputs": [
{
"output_type": "stream",
"name": "stderr",
"text": [
"[nltk_data] Downloading package words to /root/nltk_data...\n",
"[nltk_data] Package words is already up-to-date!\n"
]
},
{
"output_type": "execute_result",
"data": {
"text/plain": [
"True"
]
},
"metadata": {},
"execution_count": 66
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "oRzaAmzf6tz8"
},
"source": [
"text = \"Johney works at Intel.\" # str"
],
"execution_count": 67,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "5VbHWj7Y7B2t",
"colab": {
"base_uri": "https://localhost:8080/"
},
"outputId": "a8063c60-2426-43e9-f8d7-5dd695115277"
},
"source": [
"tokens = word_tokenize(text)\n",
"print (tokens) "
],
"execution_count": 68,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"['Johney', 'works', 'at', 'Intel', '.']\n"
]
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "Dl3GiGZH67Is",
"colab": {
"base_uri": "https://localhost:8080/"
},
"outputId": "ab1d6a9b-a587-4457-fa86-8f3ef1bded36"
},
"source": [
"tagged_tokens1 = pos_tag(tokens)\n",
"print (tagged_tokens1 )"
],
"execution_count": 69,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"[('Johney', 'NNP'), ('works', 'VBZ'), ('at', 'IN'), ('Intel', 'NNP'), ('.', '.')]\n"
]
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "w3jejHfS6xRM"
},
"source": [
"ner_tree = ne_chunk(tagged_tokens1)\n"
],
"execution_count": 70,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "LEdaSyTE8GKV",
"colab": {
"base_uri": "https://localhost:8080/"
},
"outputId": "eafcfb62-ef1b-4fb2-b796-d260d10575ab"
},
"source": [
"print(ner_tree)"
],
"execution_count": 71,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"(S (PERSON Johney/NNP) works/VBZ at/IN (ORGANIZATION Intel/NNP) ./.)\n"
]
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "emj20fQ3jxE8"
},
"source": [
""
],
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "_q3FNMa0jxCI"
},
"source": [
""
],
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "soUnj1Ejjw_F"
},
"source": [
""
],
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "RjRK_FDCjw79"
},
"source": [
""
],
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "5xRNHQPyjw5N"
},
"source": [
""
],
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "yIzdgn_9jw2W"
},
"source": [
""
],
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "98Dmry_PjwzJ"
},
"source": [
""
],
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "yZXM-yzNjwwH"
},
"source": [
""
],
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "Vc70JUHXjwsz"
},
"source": [
""
],
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "MKzn705ajwpo"
},
"source": [
""
],
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "3XKBppzmjwlj"
},
"source": [
""
],
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "sO1V9ZUyjwck"
},
"source": [
""
],
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "sGbmwQFajwYc"
},
"source": [
""
],
"execution_count": null,
"outputs": []
},
{
"cell_type": "markdown",
"metadata": {
"id": "gOHOYtLu8fPn"
},
"source": [
"https://nlpforhackers.io/introduction-nltk/"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "Abd4gOCNGPwD"
},
"source": [
"**Exercise:- Implement Extractive Text Summarization **"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "eE8BoHIOHl0N"
},
"source": [
""
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "JIvfm_GJF8l4"
},
"source": [
"explore with spacy package: https://gist.github.com/LahiruTjay"
]
},
{
"cell_type": "code",
"metadata": {
"id": "VIMHlXoAKiPG"
},
"source": [
"from nltk.parse.corenlp import CoreNLPDependencyParser\n",
"\n",
"\n"
],
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "axtJNviyKngt"
},
"source": [
"parser = CoreNLPDependencyParser()\n"
],
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "yUxlRX9DKs_K",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 860
},
"outputId": "1869fb93-1ee7-4f7b-bd75-a83c7afa7290"
},
"source": [
"parse = next(parser.raw_parse(\"I put the book in the box on the table.\"))"
],
"execution_count": null,
"outputs": [
{
"output_type": "error",
"ename": "ConnectionError",
"evalue": "ignored",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mConnectionRefusedError\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m/usr/local/lib/python3.6/dist-packages/urllib3/connection.py\u001b[0m in \u001b[0;36m_new_conn\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 158\u001b[0m conn = connection.create_connection(\n\u001b[0;32m--> 159\u001b[0;31m (self._dns_host, self.port), self.timeout, **extra_kw)\n\u001b[0m\u001b[1;32m 160\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/usr/local/lib/python3.6/dist-packages/urllib3/util/connection.py\u001b[0m in \u001b[0;36mcreate_connection\u001b[0;34m(address, timeout, source_address, socket_options)\u001b[0m\n\u001b[1;32m 79\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0merr\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 80\u001b[0;31m \u001b[0;32mraise\u001b[0m \u001b[0merr\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 81\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/usr/local/lib/python3.6/dist-packages/urllib3/util/connection.py\u001b[0m in \u001b[0;36mcreate_connection\u001b[0;34m(address, timeout, source_address, socket_options)\u001b[0m\n\u001b[1;32m 69\u001b[0m \u001b[0msock\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbind\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0msource_address\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 70\u001b[0;31m \u001b[0msock\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mconnect\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0msa\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 71\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0msock\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;31mConnectionRefusedError\u001b[0m: [Errno 111] Connection refused",
"\nDuring handling of the above exception, another exception occurred:\n",
"\u001b[0;31mNewConnectionError\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m/usr/local/lib/python3.6/dist-packages/urllib3/connectionpool.py\u001b[0m in \u001b[0;36murlopen\u001b[0;34m(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, **response_kw)\u001b[0m\n\u001b[1;32m 599\u001b[0m \u001b[0mbody\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mbody\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mheaders\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mheaders\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 600\u001b[0;31m chunked=chunked)\n\u001b[0m\u001b[1;32m 601\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/usr/local/lib/python3.6/dist-packages/urllib3/connectionpool.py\u001b[0m in \u001b[0;36m_make_request\u001b[0;34m(self, conn, method, url, timeout, chunked, **httplib_request_kw)\u001b[0m\n\u001b[1;32m 353\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 354\u001b[0;31m \u001b[0mconn\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrequest\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmethod\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0murl\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mhttplib_request_kw\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 355\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/usr/lib/python3.6/http/client.py\u001b[0m in \u001b[0;36mrequest\u001b[0;34m(self, method, url, body, headers, encode_chunked)\u001b[0m\n\u001b[1;32m 1263\u001b[0m \u001b[0;34m\"\"\"Send a complete request to the server.\"\"\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1264\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_send_request\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmethod\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0murl\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mbody\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mheaders\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mencode_chunked\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1265\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/usr/lib/python3.6/http/client.py\u001b[0m in \u001b[0;36m_send_request\u001b[0;34m(self, method, url, body, headers, encode_chunked)\u001b[0m\n\u001b[1;32m 1309\u001b[0m \u001b[0mbody\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0m_encode\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mbody\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'body'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1310\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mendheaders\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mbody\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mencode_chunked\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mencode_chunked\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1311\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/usr/lib/python3.6/http/client.py\u001b[0m in \u001b[0;36mendheaders\u001b[0;34m(self, message_body, encode_chunked)\u001b[0m\n\u001b[1;32m 1258\u001b[0m \u001b[0;32mraise\u001b[0m \u001b[0mCannotSendHeader\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1259\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_send_output\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmessage_body\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mencode_chunked\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mencode_chunked\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1260\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/usr/lib/python3.6/http/client.py\u001b[0m in \u001b[0;36m_send_output\u001b[0;34m(self, message_body, encode_chunked)\u001b[0m\n\u001b[1;32m 1037\u001b[0m \u001b[0;32mdel\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_buffer\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1038\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msend\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmsg\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1039\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/usr/lib/python3.6/http/client.py\u001b[0m in \u001b[0;36msend\u001b[0;34m(self, data)\u001b[0m\n\u001b[1;32m 975\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mauto_open\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 976\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mconnect\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 977\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/usr/local/lib/python3.6/dist-packages/urllib3/connection.py\u001b[0m in \u001b[0;36mconnect\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 180\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mconnect\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 181\u001b[0;31m \u001b[0mconn\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_new_conn\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 182\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_prepare_conn\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mconn\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/usr/local/lib/python3.6/dist-packages/urllib3/connection.py\u001b[0m in \u001b[0;36m_new_conn\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 167\u001b[0m raise NewConnectionError(\n\u001b[0;32m--> 168\u001b[0;31m self, \"Failed to establish a new connection: %s\" % e)\n\u001b[0m\u001b[1;32m 169\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;31mNewConnectionError\u001b[0m: <urllib3.connection.HTTPConnection object at 0x7fd71eca65f8>: Failed to establish a new connection: [Errno 111] Connection refused",
"\nDuring handling of the above exception, another exception occurred:\n",
"\u001b[0;31mMaxRetryError\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m/usr/local/lib/python3.6/dist-packages/requests/adapters.py\u001b[0m in \u001b[0;36msend\u001b[0;34m(self, request, stream, timeout, verify, cert, proxies)\u001b[0m\n\u001b[1;32m 448\u001b[0m \u001b[0mretries\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmax_retries\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 449\u001b[0;31m \u001b[0mtimeout\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mtimeout\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 450\u001b[0m )\n",
"\u001b[0;32m/usr/local/lib/python3.6/dist-packages/urllib3/connectionpool.py\u001b[0m in \u001b[0;36murlopen\u001b[0;34m(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, **response_kw)\u001b[0m\n\u001b[1;32m 637\u001b[0m retries = retries.increment(method, url, error=e, _pool=self,\n\u001b[0;32m--> 638\u001b[0;31m _stacktrace=sys.exc_info()[2])\n\u001b[0m\u001b[1;32m 639\u001b[0m \u001b[0mretries\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msleep\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/usr/local/lib/python3.6/dist-packages/urllib3/util/retry.py\u001b[0m in \u001b[0;36mincrement\u001b[0;34m(self, method, url, response, error, _pool, _stacktrace)\u001b[0m\n\u001b[1;32m 398\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mnew_retry\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mis_exhausted\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 399\u001b[0;31m \u001b[0;32mraise\u001b[0m \u001b[0mMaxRetryError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0m_pool\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0murl\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0merror\u001b[0m \u001b[0;32mor\u001b[0m \u001b[0mResponseError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcause\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 400\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;31mMaxRetryError\u001b[0m: HTTPConnectionPool(host='localhost', port=9000): Max retries exceeded with url: /?properties=%7B%22outputFormat%22%3A+%22json%22%2C+%22annotators%22%3A+%22tokenize%2Cpos%2Clemma%2Cssplit%2Cdepparse%22%2C+%22ssplit.ssplit.eolonly%22%3A+%22true%22%2C+%22tokenize.whitespace%22%3A+%22false%22%7D (Caused by NewConnectionError('<urllib3.connection.HTTPConnection object at 0x7fd71eca65f8>: Failed to establish a new connection: [Errno 111] Connection refused',))",
"\nDuring handling of the above exception, another exception occurred:\n",
"\u001b[0;31mConnectionError\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m<ipython-input-328-456a0466293b>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mparse\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mnext\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mparser\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mraw_parse\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"I put the book in the box on the table.\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
"\u001b[0;32m/usr/local/lib/python3.6/dist-packages/nltk/parse/corenlp.py\u001b[0m in \u001b[0;36mraw_parse\u001b[0;34m(self, sentence, properties, *args, **kwargs)\u001b[0m\n\u001b[1;32m 226\u001b[0m \u001b[0mproperties\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mdefault_properties\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 227\u001b[0m \u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 228\u001b[0;31m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 229\u001b[0m )\n\u001b[1;32m 230\u001b[0m )\n",
"\u001b[0;32m/usr/local/lib/python3.6/dist-packages/nltk/parse/corenlp.py\u001b[0m in \u001b[0;36mraw_parse_sents\u001b[0;34m(self, sentences, verbose, properties, *args, **kwargs)\u001b[0m\n\u001b[1;32m 288\u001b[0m \u001b[0;32myield\u001b[0m \u001b[0miter\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mtree\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 289\u001b[0m \"\"\"\n\u001b[0;32m--> 290\u001b[0;31m \u001b[0mparsed_data\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mapi_call\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'\\n'\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mjoin\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0msentences\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mproperties\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mdefault_properties\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 291\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mparsed_sent\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mparsed_data\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'sentences'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 292\u001b[0m \u001b[0mtree\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmake_tree\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mparsed_sent\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/usr/local/lib/python3.6/dist-packages/nltk/parse/corenlp.py\u001b[0m in \u001b[0;36mapi_call\u001b[0;34m(self, data, properties)\u001b[0m\n\u001b[1;32m 246\u001b[0m },\n\u001b[1;32m 247\u001b[0m \u001b[0mdata\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mdata\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mencode\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mencoding\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 248\u001b[0;31m \u001b[0mtimeout\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m60\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 249\u001b[0m )\n\u001b[1;32m 250\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/usr/local/lib/python3.6/dist-packages/requests/sessions.py\u001b[0m in \u001b[0;36mpost\u001b[0;34m(self, url, data, json, **kwargs)\u001b[0m\n\u001b[1;32m 576\u001b[0m \"\"\"\n\u001b[1;32m 577\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 578\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrequest\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'POST'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0murl\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdata\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mdata\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mjson\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mjson\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 579\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 580\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mput\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0murl\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdata\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/usr/local/lib/python3.6/dist-packages/requests/sessions.py\u001b[0m in \u001b[0;36mrequest\u001b[0;34m(self, method, url, params, data, headers, cookies, files, auth, timeout, allow_redirects, proxies, hooks, stream, verify, cert, json)\u001b[0m\n\u001b[1;32m 528\u001b[0m }\n\u001b[1;32m 529\u001b[0m \u001b[0msend_kwargs\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mupdate\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0msettings\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 530\u001b[0;31m \u001b[0mresp\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msend\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mprep\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0msend_kwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 531\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 532\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mresp\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/usr/local/lib/python3.6/dist-packages/requests/sessions.py\u001b[0m in \u001b[0;36msend\u001b[0;34m(self, request, **kwargs)\u001b[0m\n\u001b[1;32m 641\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 642\u001b[0m \u001b[0;31m# Send the request\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 643\u001b[0;31m \u001b[0mr\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0madapter\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msend\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mrequest\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 644\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 645\u001b[0m \u001b[0;31m# Total elapsed time of the request (approximately)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/usr/local/lib/python3.6/dist-packages/requests/adapters.py\u001b[0m in \u001b[0;36msend\u001b[0;34m(self, request, stream, timeout, verify, cert, proxies)\u001b[0m\n\u001b[1;32m 514\u001b[0m \u001b[0;32mraise\u001b[0m \u001b[0mSSLError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0me\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mrequest\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mrequest\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 515\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 516\u001b[0;31m \u001b[0;32mraise\u001b[0m \u001b[0mConnectionError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0me\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mrequest\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mrequest\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 517\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 518\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mClosedPoolError\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0me\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;31mConnectionError\u001b[0m: HTTPConnectionPool(host='localhost', port=9000): Max retries exceeded with url: /?properties=%7B%22outputFormat%22%3A+%22json%22%2C+%22annotators%22%3A+%22tokenize%2Cpos%2Clemma%2Cssplit%2Cdepparse%22%2C+%22ssplit.ssplit.eolonly%22%3A+%22true%22%2C+%22tokenize.whitespace%22%3A+%22false%22%7D (Caused by NewConnectionError('<urllib3.connection.HTTPConnection object at 0x7fd71eca65f8>: Failed to establish a new connection: [Errno 111] Connection refused',))"
]
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "EGGtaCX8K_Ym"
},
"source": [
"from nltk.parse.corenlp import CoreNLPServer"
],
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "lvvY6dzjS8js"
},
"source": [
"from nltk.parse.corenlp import CoreNLPDependencyParser\n",
"\n"
],
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "qcVPnfZ-TIce"
},
"source": [
"parser = CoreNLPDependencyParser()\n"
],
"execution_count": null,
"outputs": []
}
]
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment