Created
July 21, 2022 05:56
-
-
Save hakxcore/092abb3476acfac2492d3229c1f5809f to your computer and use it in GitHub Desktop.
NLTK.ipynb
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"nbformat": 4, | |
"nbformat_minor": 0, | |
"metadata": { | |
"colab": { | |
"name": "NLTK.ipynb", | |
"provenance": [], | |
"collapsed_sections": [], | |
"include_colab_link": true | |
}, | |
"kernelspec": { | |
"name": "python3", | |
"display_name": "Python 3" | |
} | |
}, | |
"cells": [ | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "view-in-github", | |
"colab_type": "text" | |
}, | |
"source": [ | |
"<a href=\"https://colab.research.google.com/gist/hakxcore/092abb3476acfac2492d3229c1f5809f/nltk.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "UcIcD13vA-GL" | |
}, | |
"source": [ | |
"**Importing NLTK Library**" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "Yi5BB4LaAj-l" | |
}, | |
"source": [ | |
"import nltk" | |
], | |
"execution_count": 1, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "UUxuu_4OBSCv", | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"outputId": "84d29aa9-32a8-41a0-894b-6d1ee5218299" | |
}, | |
"source": [ | |
"nltk.download('punkt')" | |
], | |
"execution_count": 2, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"name": "stderr", | |
"text": [ | |
"[nltk_data] Downloading package punkt to /root/nltk_data...\n", | |
"[nltk_data] Unzipping tokenizers/punkt.zip.\n" | |
] | |
}, | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/plain": [ | |
"True" | |
] | |
}, | |
"metadata": {}, | |
"execution_count": 2 | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "Dtlcx68xAHQa" | |
}, | |
"source": [ | |
"from nltk import sent_tokenize" | |
], | |
"execution_count": 3, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "dbNBY1CPAtkI" | |
}, | |
"source": [ | |
"text = \"GOOD MORNING all. Hope you will like this video. Thank You.\"" | |
], | |
"execution_count": 4, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "EiySbKvnBA_6" | |
}, | |
"source": [ | |
"sentence_tokens = sent_tokenize(text)\n" | |
], | |
"execution_count": 5, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "MP69RopMAxj9", | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"outputId": "31f64bd7-4be0-46e8-cdcd-f51dd12d1be7" | |
}, | |
"source": [ | |
"print(sentence_tokens)\n" | |
], | |
"execution_count": 6, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"name": "stdout", | |
"text": [ | |
"['GOOD MORNING all.', 'Hope you will like this video.', 'Thank You.']\n" | |
] | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "h8QjvvQcBlr8", | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"outputId": "4f464b98-ae59-480a-e014-fb760c6f941b" | |
}, | |
"source": [ | |
"for sentence in sentence_tokens:\n", | |
" print(sentence)" | |
], | |
"execution_count": 7, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"name": "stdout", | |
"text": [ | |
"GOOD MORNING all.\n", | |
"Hope you will like this video.\n", | |
"Thank You.\n" | |
] | |
} | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "SaTLjuVkC6lo" | |
}, | |
"source": [ | |
"Word Tokenization" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "dtpcHtHsC9dQ" | |
}, | |
"source": [ | |
"from nltk.tokenize import word_tokenize" | |
], | |
"execution_count": 8, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "dkiIXFLnDAP5" | |
}, | |
"source": [ | |
"sentence = \"Let's understand this concept in detail!\"" | |
], | |
"execution_count": 9, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "7UdVxk5TDGNr", | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"outputId": "221cadc8-d4d3-4854-dae4-58da97a4727a" | |
}, | |
"source": [ | |
"word_tokens = word_tokenize(sentence)\n", | |
"print(word_tokens)" | |
], | |
"execution_count": 10, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"name": "stdout", | |
"text": [ | |
"['Let', \"'s\", 'understand', 'this', 'concept', 'in', 'detail', '!']\n" | |
] | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "hU6nZAsjDYV_" | |
}, | |
"source": [ | |
"from nltk.tokenize import TreebankWordTokenizer, WordPunctTokenizer, WhitespaceTokenizer\n", | |
"tree_tokenizer = TreebankWordTokenizer()\n", | |
"word_punct_tokenizer = WordPunctTokenizer()\n", | |
"white_space_tokenizer = WhitespaceTokenizer()" | |
], | |
"execution_count": 11, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "vtJuAjrTDdvO", | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"outputId": "eb77c13e-b6c2-407a-e5bd-391f62fda6cc" | |
}, | |
"source": [ | |
"word_tokens = tree_tokenizer.tokenize(sentence)\n", | |
"print(word_tokens)" | |
], | |
"execution_count": 12, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"name": "stdout", | |
"text": [ | |
"['Let', \"'s\", 'understand', 'this', 'concept', 'in', 'detail', '!']\n" | |
] | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "3gR0b2w2DhIB", | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"outputId": "5b1fa415-0050-4a89-e8a1-609accc0285e" | |
}, | |
"source": [ | |
"word_tokens = word_punct_tokenizer.tokenize(sentence)\n", | |
"print(word_tokens)" | |
], | |
"execution_count": 13, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"name": "stdout", | |
"text": [ | |
"['Let', \"'\", 's', 'understand', 'this', 'concept', 'in', 'detail', '!']\n" | |
] | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "-ixsNWXNDld7", | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"outputId": "02a1405b-4e5f-4fbf-a6ba-98bbed8144be" | |
}, | |
"source": [ | |
"word_tokens = white_space_tokenizer.tokenize(sentence)\n", | |
"print(word_tokens)" | |
], | |
"execution_count": 14, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"name": "stdout", | |
"text": [ | |
"[\"Let's\", 'understand', 'this', 'concept', 'in', 'detail!']\n" | |
] | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "wup0YgPrFMBa" | |
}, | |
"source": [ | |
"" | |
], | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "B42UVJnSFKK7" | |
}, | |
"source": [ | |
"Stemming" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "zux2s57QFOzv" | |
}, | |
"source": [ | |
"from nltk.stem import PorterStemmer, LancasterStemmer" | |
], | |
"execution_count": 15, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "NL5qiH9oFT6s", | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"outputId": "e8c030d4-a477-4388-8aab-44d12c095140" | |
}, | |
"source": [ | |
"porter_stemmer = PorterStemmer()\n", | |
"print(porter_stemmer.stem('observing'))\n", | |
"print(porter_stemmer.stem('observs'))\n", | |
"print(porter_stemmer.stem('observe'))" | |
], | |
"execution_count": 16, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"name": "stdout", | |
"text": [ | |
"observ\n", | |
"observ\n", | |
"observ\n" | |
] | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"porter_stemmer = PorterStemmer()\n", | |
"print(porter_stemmer.stem('running'))\n", | |
"print(porter_stemmer.stem('observs'))\n", | |
"print(porter_stemmer.stem('observe'))" | |
], | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"id": "kaxqJ6PhNm7u", | |
"outputId": "b8751eac-4e8f-4d56-e928-1c80afc0e5e2" | |
}, | |
"execution_count": 17, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"name": "stdout", | |
"text": [ | |
"run\n", | |
"observ\n", | |
"observ\n" | |
] | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "nCXzgZjFFXC7", | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"outputId": "2174e022-0e07-4efa-e614-97d580a6ba3d" | |
}, | |
"source": [ | |
"lancaster_stemmer = LancasterStemmer()\n", | |
"print(lancaster_stemmer.stem('observing'))\n", | |
"print(lancaster_stemmer.stem('observs'))\n", | |
"print(lancaster_stemmer.stem('observe'))" | |
], | |
"execution_count": 18, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"name": "stdout", | |
"text": [ | |
"observ\n", | |
"observ\n", | |
"observ\n" | |
] | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"lancaster_stemmer = LancasterStemmer()\n", | |
"print(lancaster_stemmer.stem('drives'))" | |
], | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"id": "DsowvWrnNyiv", | |
"outputId": "17d8e10d-89a4-416a-e0b9-25c1313362d3" | |
}, | |
"execution_count": 19, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"name": "stdout", | |
"text": [ | |
"driv\n" | |
] | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "4qMpWrItFo8L" | |
}, | |
"source": [ | |
"" | |
], | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "R6mx2PWIFp3i" | |
}, | |
"source": [ | |
"Lemmatization" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "PF--ePtcFwK7" | |
}, | |
"source": [ | |
"from nltk.stem import WordNetLemmatizer" | |
], | |
"execution_count": 20, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "mFsSxM1oGJmp", | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"outputId": "32c0f797-5917-4c57-eb14-5086c3fe36e4" | |
}, | |
"source": [ | |
"nltk.download('wordnet')\n" | |
], | |
"execution_count": 21, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"name": "stderr", | |
"text": [ | |
"[nltk_data] Downloading package wordnet to /root/nltk_data...\n" | |
] | |
}, | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/plain": [ | |
"True" | |
] | |
}, | |
"metadata": {}, | |
"execution_count": 21 | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"nltk.download('omw-1.4')" | |
], | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"id": "TyeIovskOorI", | |
"outputId": "b7626d78-2417-405a-a723-7beeedb2bc7b" | |
}, | |
"execution_count": 22, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"name": "stderr", | |
"text": [ | |
"[nltk_data] Downloading package omw-1.4 to /root/nltk_data...\n" | |
] | |
}, | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/plain": [ | |
"True" | |
] | |
}, | |
"metadata": {}, | |
"execution_count": 22 | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "IQezx9MEF4Vf", | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"outputId": "8421a954-57b6-4c50-c707-f94fadd1b9d7" | |
}, | |
"source": [ | |
"lemmatizer = WordNetLemmatizer()\n", | |
"print(lemmatizer.lemmatize(\"running\"))\n", | |
"print(lemmatizer.lemmatize(\"runs\"))" | |
], | |
"execution_count": 23, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"name": "stdout", | |
"text": [ | |
"running\n", | |
"run\n" | |
] | |
} | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "MwKPf_NwBeZH" | |
}, | |
"source": [ | |
"**Lemmatizer- Returns verb, noun, Adverb, Adjective form**" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "I-ZBwYcFGQZN" | |
}, | |
"source": [ | |
"def lemmatize(word):\n", | |
" lemmatizer = WordNetLemmatizer()\n", | |
" print(\"verb form: \" +lemmatizer.lemmatize(word, pos=\"v\"))\n", | |
" print(\"noun form: \" + lemmatizer.lemmatize(word, pos=\"n\"))\n", | |
" print(\"adverb form: \" + lemmatizer.lemmatize(word, pos=\"r\"))\n", | |
" print(\"adjective form: \" + lemmatizer.lemmatize(word, pos=\"a\"))" | |
], | |
"execution_count": 24, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "mUHVRBpKGUvY", | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"outputId": "e469878b-2f56-48d4-c761-de0fd81df3f6" | |
}, | |
"source": [ | |
"lemmatize(\"ears\")" | |
], | |
"execution_count": 25, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"name": "stdout", | |
"text": [ | |
"verb form: ears\n", | |
"noun form: ear\n", | |
"adverb form: ears\n", | |
"adjective form: ears\n" | |
] | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "xcNK5IKBGbGl", | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"outputId": "59027377-72cf-41d1-8bb8-15cb4583571b" | |
}, | |
"source": [ | |
"lemmatize(\"running\")" | |
], | |
"execution_count": 26, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"name": "stdout", | |
"text": [ | |
"verb form: run\n", | |
"noun form: running\n", | |
"adverb form: running\n", | |
"adjective form: running\n" | |
] | |
} | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "t2n7L32FIxnZ" | |
}, | |
"source": [ | |
"The following code snippet shows the comparison between stemming and lemmatization." | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "rbDvt4WWIzks" | |
}, | |
"source": [ | |
"from nltk.stem import PorterStemmer\n", | |
"from nltk.stem import WordNetLemmatizer" | |
], | |
"execution_count": 27, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "ZJqe8tl6I5B2" | |
}, | |
"source": [ | |
"stemmer = PorterStemmer();\n", | |
"lemmatizer = WordNetLemmatizer()" | |
], | |
"execution_count": 28, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "BY-Go7noI8FL", | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"outputId": "1455e624-5c24-4cd2-90b6-af0a6bf913c6" | |
}, | |
"source": [ | |
"print(stemmer.stem(\"deactivating\"))\n", | |
"print(stemmer.stem(\"deactivated\"))\n", | |
"print(stemmer.stem(\"deactivates\"))" | |
], | |
"execution_count": 29, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"name": "stdout", | |
"text": [ | |
"deactiv\n", | |
"deactiv\n", | |
"deactiv\n" | |
] | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "ung1veEJI_U4", | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"outputId": "d8545863-4e15-4c94-d31f-e5a135bb63fd" | |
}, | |
"source": [ | |
"print(lemmatizer.lemmatize(\"deactivating\", pos=\"v\"))\n", | |
"print(lemmatizer.lemmatize(\"deactivating\", pos=\"r\"))\n", | |
"print(lemmatizer.lemmatize(\"deactivating\", pos=\"n\"))" | |
], | |
"execution_count": 30, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"name": "stdout", | |
"text": [ | |
"deactivate\n", | |
"deactivating\n", | |
"deactivating\n" | |
] | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "Z9AmVUhfJCyT", | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"outputId": "fa47810b-2fc9-4408-c79f-436af1aa37e1" | |
}, | |
"source": [ | |
"print(stemmer.stem('stones')) \n", | |
"print(stemmer.stem('speaking')) \n", | |
"print(stemmer.stem('bedroom')) \n", | |
"print(stemmer.stem('jokes')) \n", | |
"print(stemmer.stem('lisa')) \n", | |
"print(stemmer.stem('purple'))" | |
], | |
"execution_count": 31, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"name": "stdout", | |
"text": [ | |
"stone\n", | |
"speak\n", | |
"bedroom\n", | |
"joke\n", | |
"lisa\n", | |
"purpl\n" | |
] | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "W6CA-NDtJFsR", | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"outputId": "55511b62-bdc2-4d88-de89-e7d640f0b784" | |
}, | |
"source": [ | |
"print(lemmatizer.lemmatize('stones')) \n", | |
"print(lemmatizer.lemmatize('speaking'))\n", | |
"print(lemmatizer.lemmatize('bedroom'))\n", | |
"print(lemmatizer.lemmatize('jokes'))\n", | |
"print(lemmatizer.lemmatize('lisa'))\n", | |
"print(lemmatizer.lemmatize('purple'))" | |
], | |
"execution_count": 32, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"name": "stdout", | |
"text": [ | |
"stone\n", | |
"speaking\n", | |
"bedroom\n", | |
"joke\n", | |
"lisa\n", | |
"purple\n" | |
] | |
} | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "WoAzLzz4r3J_" | |
}, | |
"source": [ | |
"Conclusion:- When efficiency needed use lemmatization, if speed is required then stemming to be used." | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "jQ6K3CmAJLcw" | |
}, | |
"source": [ | |
"Part-Of-Speech (POS) Tagging" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "v0LP1UuwJMko", | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"outputId": "6074ceaf-775c-46a4-f7e9-5ccd2801f45c" | |
}, | |
"source": [ | |
"from nltk import word_tokenize, pos_tag\n", | |
"nltk.download('averaged_perceptron_tagger')" | |
], | |
"execution_count": 33, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"name": "stderr", | |
"text": [ | |
"[nltk_data] Downloading package averaged_perceptron_tagger to\n", | |
"[nltk_data] /root/nltk_data...\n", | |
"[nltk_data] Unzipping taggers/averaged_perceptron_tagger.zip.\n" | |
] | |
}, | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/plain": [ | |
"True" | |
] | |
}, | |
"metadata": {}, | |
"execution_count": 33 | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "hzMFNbHpJjw6" | |
}, | |
"source": [ | |
"sentence = \"I like many books.\"" | |
], | |
"execution_count": 34, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "9zIalIxyJpqD", | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"outputId": "a6858379-d85b-4df2-aad2-b0910699e171" | |
}, | |
"source": [ | |
"sentence_tokens = word_tokenize(sentence)\n", | |
"print(sentence_tokens)" | |
], | |
"execution_count": 35, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"name": "stdout", | |
"text": [ | |
"['I', 'like', 'many', 'books', '.']\n" | |
] | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "wWbPy0ysJtIc", | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"outputId": "2e1eaacb-e70c-42fe-8a6f-7f67cd3dd6bc" | |
}, | |
"source": [ | |
"pos_tag(sentence_tokens)" | |
], | |
"execution_count": 36, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/plain": [ | |
"[('I', 'PRP'), ('like', 'VBP'), ('many', 'JJ'), ('books', 'NNS'), ('.', '.')]" | |
] | |
}, | |
"metadata": {}, | |
"execution_count": 36 | |
} | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "scWTkbFbSUku" | |
}, | |
"source": [ | |
"**Chunking-making word phrases**" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "OakHkMcaQ7n5" | |
}, | |
"source": [ | |
"import nltk" | |
], | |
"execution_count": 37, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "t6CW1GPY6Kcw", | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"outputId": "ed1ef58a-a971-4bb6-d2be-8c256896049b" | |
}, | |
"source": [ | |
"nltk.download('averaged_perceptron_tagger')" | |
], | |
"execution_count": 38, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"name": "stderr", | |
"text": [ | |
"[nltk_data] Downloading package averaged_perceptron_tagger to\n", | |
"[nltk_data] /root/nltk_data...\n", | |
"[nltk_data] Package averaged_perceptron_tagger is already up-to-\n", | |
"[nltk_data] date!\n" | |
] | |
}, | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/plain": [ | |
"True" | |
] | |
}, | |
"metadata": {}, | |
"execution_count": 38 | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "pEFFaoHl5_iK", | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"outputId": "1f680878-76b9-492d-fcf4-6abe83862733" | |
}, | |
"source": [ | |
"nltk.download('punkt')" | |
], | |
"execution_count": 39, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"name": "stderr", | |
"text": [ | |
"[nltk_data] Downloading package punkt to /root/nltk_data...\n", | |
"[nltk_data] Package punkt is already up-to-date!\n" | |
] | |
}, | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/plain": [ | |
"True" | |
] | |
}, | |
"metadata": {}, | |
"execution_count": 39 | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "FKyGctDmRCX8", | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"outputId": "e6d2f260-ca43-4894-c9fc-90fab20a6ecc" | |
}, | |
"source": [ | |
"text = \"The clean data is important for application development.\"\n", | |
"tokens = nltk.word_tokenize(text)\n", | |
"print(tokens)\n", | |
"tagged = nltk.pos_tag(tokens)" | |
], | |
"execution_count": 40, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"name": "stdout", | |
"text": [ | |
"['The', 'clean', 'data', 'is', 'important', 'for', 'application', 'development', '.']\n" | |
] | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "x00MmqRbRHpF", | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"outputId": "8d2fde33-f4d9-49f4-dcb1-08a43ebbe13b" | |
}, | |
"source": [ | |
"print(tagged)\n" | |
], | |
"execution_count": 41, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"name": "stdout", | |
"text": [ | |
"[('The', 'DT'), ('clean', 'JJ'), ('data', 'NN'), ('is', 'VBZ'), ('important', 'JJ'), ('for', 'IN'), ('application', 'NN'), ('development', 'NN'), ('.', '.')]\n" | |
] | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "YZnw0ZSHRNWj" | |
}, | |
"source": [ | |
"grammar = \"NP: {<DT>?<JJ>*<NN>}\"\n", | |
"cp =nltk.RegexpParser(grammar)\n" | |
], | |
"execution_count": 42, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "qfMDg-eYRTfa", | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"outputId": "eb17a098-f129-4078-9c2d-28de8dc3c18f" | |
}, | |
"source": [ | |
"result = cp.parse(tagged)\n", | |
"print(result)\n" | |
], | |
"execution_count": 43, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"name": "stdout", | |
"text": [ | |
"(S\n", | |
" (NP The/DT clean/JJ data/NN)\n", | |
" is/VBZ\n", | |
" important/JJ\n", | |
" for/IN\n", | |
" (NP application/NN)\n", | |
" (NP development/NN)\n", | |
" ./.)\n" | |
] | |
} | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "_a_0bDRAA4GA" | |
}, | |
"source": [ | |
"**Parse** **tree**" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "fWlcWdUlSH7T" | |
}, | |
"source": [ | |
"" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "giFS1t4ySpqi" | |
}, | |
"source": [ | |
"Stop word Removal" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "U7t4cn6qStgA", | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"outputId": "e038467e-7b5f-413a-b4ea-d63efe71d9ce" | |
}, | |
"source": [ | |
"from nltk.corpus import stopwords\n", | |
"from nltk.tokenize import word_tokenize\n", | |
"nltk.download('stopwords')" | |
], | |
"execution_count": 44, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"name": "stderr", | |
"text": [ | |
"[nltk_data] Downloading package stopwords to /root/nltk_data...\n", | |
"[nltk_data] Unzipping corpora/stopwords.zip.\n" | |
] | |
}, | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/plain": [ | |
"True" | |
] | |
}, | |
"metadata": {}, | |
"execution_count": 44 | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "H6SBIPDzSxpf", | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"outputId": "b6275d98-a6f8-4289-fde8-35e1decc6f8a" | |
}, | |
"source": [ | |
"print(stopwords.words('english'))" | |
], | |
"execution_count": 45, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"name": "stdout", | |
"text": [ | |
"['i', 'me', 'my', 'myself', 'we', 'our', 'ours', 'ourselves', 'you', \"you're\", \"you've\", \"you'll\", \"you'd\", 'your', 'yours', 'yourself', 'yourselves', 'he', 'him', 'his', 'himself', 'she', \"she's\", 'her', 'hers', 'herself', 'it', \"it's\", 'its', 'itself', 'they', 'them', 'their', 'theirs', 'themselves', 'what', 'which', 'who', 'whom', 'this', 'that', \"that'll\", 'these', 'those', 'am', 'is', 'are', 'was', 'were', 'be', 'been', 'being', 'have', 'has', 'had', 'having', 'do', 'does', 'did', 'doing', 'a', 'an', 'the', 'and', 'but', 'if', 'or', 'because', 'as', 'until', 'while', 'of', 'at', 'by', 'for', 'with', 'about', 'against', 'between', 'into', 'through', 'during', 'before', 'after', 'above', 'below', 'to', 'from', 'up', 'down', 'in', 'out', 'on', 'off', 'over', 'under', 'again', 'further', 'then', 'once', 'here', 'there', 'when', 'where', 'why', 'how', 'all', 'any', 'both', 'each', 'few', 'more', 'most', 'other', 'some', 'such', 'no', 'nor', 'not', 'only', 'own', 'same', 'so', 'than', 'too', 'very', 's', 't', 'can', 'will', 'just', 'don', \"don't\", 'should', \"should've\", 'now', 'd', 'll', 'm', 'o', 're', 've', 'y', 'ain', 'aren', \"aren't\", 'couldn', \"couldn't\", 'didn', \"didn't\", 'doesn', \"doesn't\", 'hadn', \"hadn't\", 'hasn', \"hasn't\", 'haven', \"haven't\", 'isn', \"isn't\", 'ma', 'mightn', \"mightn't\", 'mustn', \"mustn't\", 'needn', \"needn't\", 'shan', \"shan't\", 'shouldn', \"shouldn't\", 'wasn', \"wasn't\", 'weren', \"weren't\", 'won', \"won't\", 'wouldn', \"wouldn't\"]\n" | |
] | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "WpQUUHQ5TAqH" | |
}, | |
"source": [ | |
"sentence = \"Data structure understanding is must for a computer engineer. Coding plays important role there.\"" | |
], | |
"execution_count": 46, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "C_Oww7HVTF3a", | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"outputId": "4e86985c-7411-4234-81b4-5bbefb33c961" | |
}, | |
"source": [ | |
"word_tokens = word_tokenize(sentence)\n", | |
"print(word_tokens)" | |
], | |
"execution_count": 47, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"name": "stdout", | |
"text": [ | |
"['Data', 'structure', 'understanding', 'is', 'must', 'for', 'a', 'computer', 'engineer', '.', 'Coding', 'plays', 'important', 'role', 'there', '.']\n" | |
] | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "oI-IJMo2TKIG" | |
}, | |
"source": [ | |
"clean_tokens = word_tokens[:] \n", | |
"for token in word_tokens:\n", | |
" if token in stopwords.words('english'):\n", | |
" clean_tokens.remove(token)" | |
], | |
"execution_count": 48, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "RABaBqsLTOoM", | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"outputId": "308029cc-ac27-4eaf-b5cf-8c097f4ca705" | |
}, | |
"source": [ | |
"print(clean_tokens)" | |
], | |
"execution_count": 49, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"name": "stdout", | |
"text": [ | |
"['Data', 'structure', 'understanding', 'must', 'computer', 'engineer', '.', 'Coding', 'plays', 'important', 'role', '.']\n" | |
] | |
} | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "9tGQHvbTT1Eu" | |
}, | |
"source": [ | |
"Named Entity Recognition-not used...next refere" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "ZLViTaaWUKWU" | |
}, | |
"source": [ | |
"from nltk import word_tokenize, pos_tag, ne_chunk" | |
], | |
"execution_count": 50, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "5-LP7Jx9Ue41", | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"outputId": "6f331965-2cb4-47e5-8fb2-d089d070d58a" | |
}, | |
"source": [ | |
"nltk.download('maxent_ne_chunker')" | |
], | |
"execution_count": 51, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"name": "stderr", | |
"text": [ | |
"[nltk_data] Downloading package maxent_ne_chunker to\n", | |
"[nltk_data] /root/nltk_data...\n", | |
"[nltk_data] Unzipping chunkers/maxent_ne_chunker.zip.\n" | |
] | |
}, | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/plain": [ | |
"True" | |
] | |
}, | |
"metadata": {}, | |
"execution_count": 51 | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "N4Xbgs0vUv4_", | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"outputId": "2c8780fc-47a5-4ea6-dcc9-3e20336297f6" | |
}, | |
"source": [ | |
"nltk.download('words')" | |
], | |
"execution_count": 52, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"name": "stderr", | |
"text": [ | |
"[nltk_data] Downloading package words to /root/nltk_data...\n", | |
"[nltk_data] Unzipping corpora/words.zip.\n" | |
] | |
}, | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/plain": [ | |
"True" | |
] | |
}, | |
"metadata": {}, | |
"execution_count": 52 | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "MVzIZBueUOV4" | |
}, | |
"source": [ | |
"sentence = \"UNITED STATES Data structure INFOSYS Accenture understanding is must for a computer engineer. Coding plays important role there.\"" | |
], | |
"execution_count": 53, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "-QwmSykLUZYT", | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"outputId": "d5671a7a-09da-48fe-c5a3-1052748b31e2" | |
}, | |
"source": [ | |
"print (ne_chunk(pos_tag(word_tokenize(sentence))))" | |
], | |
"execution_count": 54, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"name": "stdout", | |
"text": [ | |
"(S\n", | |
" (GPE UNITED/NNP)\n", | |
" (ORGANIZATION STATES/NNPS Data/NNP)\n", | |
" structure/NN\n", | |
" (ORGANIZATION INFOSYS/NNP)\n", | |
" Accenture/NNP\n", | |
" understanding/NN\n", | |
" is/VBZ\n", | |
" must/MD\n", | |
" for/IN\n", | |
" a/DT\n", | |
" computer/NN\n", | |
" engineer/NN\n", | |
" ./.\n", | |
" Coding/NNP\n", | |
" plays/VBZ\n", | |
" important/JJ\n", | |
" role/NN\n", | |
" there/RB\n", | |
" ./.)\n" | |
] | |
} | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "O9sGdgldWZya" | |
}, | |
"source": [ | |
"WORDNET" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "zsPRIzIZYx8o" | |
}, | |
"source": [ | |
"from nltk.corpus import wordnet" | |
], | |
"execution_count": 55, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "aXJ6uXs7_4_s", | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"outputId": "c793678b-de3b-4cbb-f728-f4b8785cdd95" | |
}, | |
"source": [ | |
"nltk.download('wordnet')" | |
], | |
"execution_count": 56, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"name": "stderr", | |
"text": [ | |
"[nltk_data] Downloading package wordnet to /root/nltk_data...\n", | |
"[nltk_data] Package wordnet is already up-to-date!\n" | |
] | |
}, | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/plain": [ | |
"True" | |
] | |
}, | |
"metadata": {}, | |
"execution_count": 56 | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "PK4QLOEFY04F", | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"outputId": "926fd833-4271-42ec-8fc9-48fb148ae47b" | |
}, | |
"source": [ | |
"wordnet.synsets(\"gun\")" | |
], | |
"execution_count": 57, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/plain": [ | |
"[Synset('gun.n.01'),\n", | |
" Synset('artillery.n.01'),\n", | |
" Synset('gunman.n.02'),\n", | |
" Synset('gunman.n.01'),\n", | |
" Synset('grease-gun.n.01'),\n", | |
" Synset('accelerator.n.01'),\n", | |
" Synset('gun.n.07'),\n", | |
" Synset('gun.v.01')]" | |
] | |
}, | |
"metadata": {}, | |
"execution_count": 57 | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "mheokKiPALTZ", | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"outputId": "43640de8-639a-45e5-8260-3907bdb9c382" | |
}, | |
"source": [ | |
"wordnet.synsets(\"flower\")" | |
], | |
"execution_count": 58, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/plain": [ | |
"[Synset('flower.n.01'),\n", | |
" Synset('flower.n.02'),\n", | |
" Synset('flower.n.03'),\n", | |
" Synset('bloom.v.01')]" | |
] | |
}, | |
"metadata": {}, | |
"execution_count": 58 | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "wQd6k10yY6Xf", | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"outputId": "2d7a1641-0940-4007-b0e7-2bf4b3f1a8a5" | |
}, | |
"source": [ | |
"syn = wordnet.synset('flower.n.01')\n", | |
"syn.lemma_names()" | |
], | |
"execution_count": 59, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/plain": [ | |
"['flower']" | |
] | |
}, | |
"metadata": {}, | |
"execution_count": 59 | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "s2ku3tzXY9Uc", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 35 | |
}, | |
"outputId": "42896c9e-9064-45fa-c579-a5533f27dacc" | |
}, | |
"source": [ | |
"syn.definition()" | |
], | |
"execution_count": 60, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/plain": [ | |
"'a plant cultivated for its blooms or blossoms'" | |
], | |
"application/vnd.google.colaboratory.intrinsic+json": { | |
"type": "string" | |
} | |
}, | |
"metadata": {}, | |
"execution_count": 60 | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "KGa2gwhpZAlD", | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"outputId": "2b3a75ae-032a-4020-f0e0-5fad08d93fa0" | |
}, | |
"source": [ | |
"wordnet.synset(\"flower.n.01\").examples()" | |
], | |
"execution_count": 61, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/plain": [ | |
"[]" | |
] | |
}, | |
"metadata": {}, | |
"execution_count": 61 | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "g8OnqkeSED-n", | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"outputId": "65371b0e-e457-41ff-ecf9-531fa778f081" | |
}, | |
"source": [ | |
"synonyms = []\n", | |
"for syn in wordnet.synsets('long'):\n", | |
" for lemma in syn.lemmas():\n", | |
" synonyms.append(lemma.name())\n", | |
"print(synonyms)" | |
], | |
"execution_count": 62, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"name": "stdout", | |
"text": [ | |
"['hanker', 'long', 'yearn', 'long', 'long', 'long', 'retentive', 'recollective', 'long', 'tenacious', 'long', 'long', 'long', 'farseeing', 'farsighted', 'foresighted', 'foresightful', 'prospicient', 'long', 'longsighted', 'long', 'long', 'long']\n" | |
] | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "R216YeipEKgl", | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"outputId": "eb8ffab1-18d0-423b-f440-f74864b4db82" | |
}, | |
"source": [ | |
"antonyms = []\n", | |
"for syn in wordnet.synsets(\"like\"):\n", | |
" for l in syn.lemmas():\n", | |
" if l.antonyms():\n", | |
" antonyms.append(l.antonyms()[0].name())\n", | |
"print(antonyms)" | |
], | |
"execution_count": 63, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"name": "stdout", | |
"text": [ | |
"['dislike', 'unlike', 'unlike', 'unalike']\n" | |
] | |
} | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "U260zG3Nl3i4" | |
}, | |
"source": [ | |
" **Named Entity PARSe TREE**" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "nTCtebX1jd2_" | |
}, | |
"source": [ | |
"**Named Entity Recognition(NER)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "eowmIQAM6lAO" | |
}, | |
"source": [ | |
"from nltk import word_tokenize, pos_tag, ne_chunk" | |
], | |
"execution_count": 64, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "zC_p3trd72Pw", | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"outputId": "a9367b95-06a9-4437-849d-1ab47fe6882b" | |
}, | |
"source": [ | |
"nltk.download('maxent_ne_chunker')" | |
], | |
"execution_count": 65, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"name": "stderr", | |
"text": [ | |
"[nltk_data] Downloading package maxent_ne_chunker to\n", | |
"[nltk_data] /root/nltk_data...\n", | |
"[nltk_data] Package maxent_ne_chunker is already up-to-date!\n" | |
] | |
}, | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/plain": [ | |
"True" | |
] | |
}, | |
"metadata": {}, | |
"execution_count": 65 | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "gKYXp-6D8ATN", | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"outputId": "ca92cdb4-dba4-446d-80df-5c2a59704010" | |
}, | |
"source": [ | |
"nltk.download('words')" | |
], | |
"execution_count": 66, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"name": "stderr", | |
"text": [ | |
"[nltk_data] Downloading package words to /root/nltk_data...\n", | |
"[nltk_data] Package words is already up-to-date!\n" | |
] | |
}, | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/plain": [ | |
"True" | |
] | |
}, | |
"metadata": {}, | |
"execution_count": 66 | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "oRzaAmzf6tz8" | |
}, | |
"source": [ | |
"text = \"Johney works at Intel.\" # str" | |
], | |
"execution_count": 67, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "5VbHWj7Y7B2t", | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"outputId": "a8063c60-2426-43e9-f8d7-5dd695115277" | |
}, | |
"source": [ | |
"tokens = word_tokenize(text)\n", | |
"print (tokens) " | |
], | |
"execution_count": 68, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"name": "stdout", | |
"text": [ | |
"['Johney', 'works', 'at', 'Intel', '.']\n" | |
] | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "Dl3GiGZH67Is", | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"outputId": "ab1d6a9b-a587-4457-fa86-8f3ef1bded36" | |
}, | |
"source": [ | |
"tagged_tokens1 = pos_tag(tokens)\n", | |
"print (tagged_tokens1 )" | |
], | |
"execution_count": 69, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"name": "stdout", | |
"text": [ | |
"[('Johney', 'NNP'), ('works', 'VBZ'), ('at', 'IN'), ('Intel', 'NNP'), ('.', '.')]\n" | |
] | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "w3jejHfS6xRM" | |
}, | |
"source": [ | |
"ner_tree = ne_chunk(tagged_tokens1)\n" | |
], | |
"execution_count": 70, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "LEdaSyTE8GKV", | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"outputId": "eafcfb62-ef1b-4fb2-b796-d260d10575ab" | |
}, | |
"source": [ | |
"print(ner_tree)" | |
], | |
"execution_count": 71, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"name": "stdout", | |
"text": [ | |
"(S (PERSON Johney/NNP) works/VBZ at/IN (ORGANIZATION Intel/NNP) ./.)\n" | |
] | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "emj20fQ3jxE8" | |
}, | |
"source": [ | |
"" | |
], | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "_q3FNMa0jxCI" | |
}, | |
"source": [ | |
"" | |
], | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "soUnj1Ejjw_F" | |
}, | |
"source": [ | |
"" | |
], | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "RjRK_FDCjw79" | |
}, | |
"source": [ | |
"" | |
], | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "5xRNHQPyjw5N" | |
}, | |
"source": [ | |
"" | |
], | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "yIzdgn_9jw2W" | |
}, | |
"source": [ | |
"" | |
], | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "98Dmry_PjwzJ" | |
}, | |
"source": [ | |
"" | |
], | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "yZXM-yzNjwwH" | |
}, | |
"source": [ | |
"" | |
], | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "Vc70JUHXjwsz" | |
}, | |
"source": [ | |
"" | |
], | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "MKzn705ajwpo" | |
}, | |
"source": [ | |
"" | |
], | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "3XKBppzmjwlj" | |
}, | |
"source": [ | |
"" | |
], | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "sO1V9ZUyjwck" | |
}, | |
"source": [ | |
"" | |
], | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "sGbmwQFajwYc" | |
}, | |
"source": [ | |
"" | |
], | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "gOHOYtLu8fPn" | |
}, | |
"source": [ | |
"https://nlpforhackers.io/introduction-nltk/" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "Abd4gOCNGPwD" | |
}, | |
"source": [ | |
"**Exercise:- Implement Extractive Text Summarization **" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "eE8BoHIOHl0N" | |
}, | |
"source": [ | |
"" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "JIvfm_GJF8l4" | |
}, | |
"source": [ | |
"explore with spacy package: https://gist.github.com/LahiruTjay" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "VIMHlXoAKiPG" | |
}, | |
"source": [ | |
"from nltk.parse.corenlp import CoreNLPDependencyParser\n", | |
"\n", | |
"\n" | |
], | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "axtJNviyKngt" | |
}, | |
"source": [ | |
"parser = CoreNLPDependencyParser()\n" | |
], | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "yUxlRX9DKs_K", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 860 | |
}, | |
"outputId": "1869fb93-1ee7-4f7b-bd75-a83c7afa7290" | |
}, | |
"source": [ | |
"parse = next(parser.raw_parse(\"I put the book in the box on the table.\"))" | |
], | |
"execution_count": null, | |
"outputs": [ | |
{ | |
"output_type": "error", | |
"ename": "ConnectionError", | |
"evalue": "ignored", | |
"traceback": [ | |
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", | |
"\u001b[0;31mConnectionRefusedError\u001b[0m Traceback (most recent call last)", | |
"\u001b[0;32m/usr/local/lib/python3.6/dist-packages/urllib3/connection.py\u001b[0m in \u001b[0;36m_new_conn\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 158\u001b[0m conn = connection.create_connection(\n\u001b[0;32m--> 159\u001b[0;31m (self._dns_host, self.port), self.timeout, **extra_kw)\n\u001b[0m\u001b[1;32m 160\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", | |
"\u001b[0;32m/usr/local/lib/python3.6/dist-packages/urllib3/util/connection.py\u001b[0m in \u001b[0;36mcreate_connection\u001b[0;34m(address, timeout, source_address, socket_options)\u001b[0m\n\u001b[1;32m 79\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0merr\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 80\u001b[0;31m \u001b[0;32mraise\u001b[0m \u001b[0merr\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 81\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", | |
"\u001b[0;32m/usr/local/lib/python3.6/dist-packages/urllib3/util/connection.py\u001b[0m in \u001b[0;36mcreate_connection\u001b[0;34m(address, timeout, source_address, socket_options)\u001b[0m\n\u001b[1;32m 69\u001b[0m \u001b[0msock\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbind\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0msource_address\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 70\u001b[0;31m \u001b[0msock\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mconnect\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0msa\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 71\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0msock\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", | |
"\u001b[0;31mConnectionRefusedError\u001b[0m: [Errno 111] Connection refused", | |
"\nDuring handling of the above exception, another exception occurred:\n", | |
"\u001b[0;31mNewConnectionError\u001b[0m Traceback (most recent call last)", | |
"\u001b[0;32m/usr/local/lib/python3.6/dist-packages/urllib3/connectionpool.py\u001b[0m in \u001b[0;36murlopen\u001b[0;34m(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, **response_kw)\u001b[0m\n\u001b[1;32m 599\u001b[0m \u001b[0mbody\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mbody\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mheaders\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mheaders\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 600\u001b[0;31m chunked=chunked)\n\u001b[0m\u001b[1;32m 601\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", | |
"\u001b[0;32m/usr/local/lib/python3.6/dist-packages/urllib3/connectionpool.py\u001b[0m in \u001b[0;36m_make_request\u001b[0;34m(self, conn, method, url, timeout, chunked, **httplib_request_kw)\u001b[0m\n\u001b[1;32m 353\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 354\u001b[0;31m \u001b[0mconn\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrequest\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmethod\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0murl\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mhttplib_request_kw\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 355\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", | |
"\u001b[0;32m/usr/lib/python3.6/http/client.py\u001b[0m in \u001b[0;36mrequest\u001b[0;34m(self, method, url, body, headers, encode_chunked)\u001b[0m\n\u001b[1;32m 1263\u001b[0m \u001b[0;34m\"\"\"Send a complete request to the server.\"\"\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1264\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_send_request\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmethod\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0murl\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mbody\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mheaders\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mencode_chunked\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1265\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", | |
"\u001b[0;32m/usr/lib/python3.6/http/client.py\u001b[0m in \u001b[0;36m_send_request\u001b[0;34m(self, method, url, body, headers, encode_chunked)\u001b[0m\n\u001b[1;32m 1309\u001b[0m \u001b[0mbody\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0m_encode\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mbody\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'body'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1310\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mendheaders\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mbody\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mencode_chunked\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mencode_chunked\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1311\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", | |
"\u001b[0;32m/usr/lib/python3.6/http/client.py\u001b[0m in \u001b[0;36mendheaders\u001b[0;34m(self, message_body, encode_chunked)\u001b[0m\n\u001b[1;32m 1258\u001b[0m \u001b[0;32mraise\u001b[0m \u001b[0mCannotSendHeader\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1259\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_send_output\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmessage_body\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mencode_chunked\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mencode_chunked\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1260\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", | |
"\u001b[0;32m/usr/lib/python3.6/http/client.py\u001b[0m in \u001b[0;36m_send_output\u001b[0;34m(self, message_body, encode_chunked)\u001b[0m\n\u001b[1;32m 1037\u001b[0m \u001b[0;32mdel\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_buffer\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1038\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msend\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmsg\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1039\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", | |
"\u001b[0;32m/usr/lib/python3.6/http/client.py\u001b[0m in \u001b[0;36msend\u001b[0;34m(self, data)\u001b[0m\n\u001b[1;32m 975\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mauto_open\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 976\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mconnect\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 977\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", | |
"\u001b[0;32m/usr/local/lib/python3.6/dist-packages/urllib3/connection.py\u001b[0m in \u001b[0;36mconnect\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 180\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mconnect\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 181\u001b[0;31m \u001b[0mconn\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_new_conn\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 182\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_prepare_conn\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mconn\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", | |
"\u001b[0;32m/usr/local/lib/python3.6/dist-packages/urllib3/connection.py\u001b[0m in \u001b[0;36m_new_conn\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 167\u001b[0m raise NewConnectionError(\n\u001b[0;32m--> 168\u001b[0;31m self, \"Failed to establish a new connection: %s\" % e)\n\u001b[0m\u001b[1;32m 169\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", | |
"\u001b[0;31mNewConnectionError\u001b[0m: <urllib3.connection.HTTPConnection object at 0x7fd71eca65f8>: Failed to establish a new connection: [Errno 111] Connection refused", | |
"\nDuring handling of the above exception, another exception occurred:\n", | |
"\u001b[0;31mMaxRetryError\u001b[0m Traceback (most recent call last)", | |
"\u001b[0;32m/usr/local/lib/python3.6/dist-packages/requests/adapters.py\u001b[0m in \u001b[0;36msend\u001b[0;34m(self, request, stream, timeout, verify, cert, proxies)\u001b[0m\n\u001b[1;32m 448\u001b[0m \u001b[0mretries\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmax_retries\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 449\u001b[0;31m \u001b[0mtimeout\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mtimeout\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 450\u001b[0m )\n", | |
"\u001b[0;32m/usr/local/lib/python3.6/dist-packages/urllib3/connectionpool.py\u001b[0m in \u001b[0;36murlopen\u001b[0;34m(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, **response_kw)\u001b[0m\n\u001b[1;32m 637\u001b[0m retries = retries.increment(method, url, error=e, _pool=self,\n\u001b[0;32m--> 638\u001b[0;31m _stacktrace=sys.exc_info()[2])\n\u001b[0m\u001b[1;32m 639\u001b[0m \u001b[0mretries\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msleep\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", | |
"\u001b[0;32m/usr/local/lib/python3.6/dist-packages/urllib3/util/retry.py\u001b[0m in \u001b[0;36mincrement\u001b[0;34m(self, method, url, response, error, _pool, _stacktrace)\u001b[0m\n\u001b[1;32m 398\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mnew_retry\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mis_exhausted\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 399\u001b[0;31m \u001b[0;32mraise\u001b[0m \u001b[0mMaxRetryError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0m_pool\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0murl\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0merror\u001b[0m \u001b[0;32mor\u001b[0m \u001b[0mResponseError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcause\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 400\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", | |
"\u001b[0;31mMaxRetryError\u001b[0m: HTTPConnectionPool(host='localhost', port=9000): Max retries exceeded with url: /?properties=%7B%22outputFormat%22%3A+%22json%22%2C+%22annotators%22%3A+%22tokenize%2Cpos%2Clemma%2Cssplit%2Cdepparse%22%2C+%22ssplit.ssplit.eolonly%22%3A+%22true%22%2C+%22tokenize.whitespace%22%3A+%22false%22%7D (Caused by NewConnectionError('<urllib3.connection.HTTPConnection object at 0x7fd71eca65f8>: Failed to establish a new connection: [Errno 111] Connection refused',))", | |
"\nDuring handling of the above exception, another exception occurred:\n", | |
"\u001b[0;31mConnectionError\u001b[0m Traceback (most recent call last)", | |
"\u001b[0;32m<ipython-input-328-456a0466293b>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mparse\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mnext\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mparser\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mraw_parse\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"I put the book in the box on the table.\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", | |
"\u001b[0;32m/usr/local/lib/python3.6/dist-packages/nltk/parse/corenlp.py\u001b[0m in \u001b[0;36mraw_parse\u001b[0;34m(self, sentence, properties, *args, **kwargs)\u001b[0m\n\u001b[1;32m 226\u001b[0m \u001b[0mproperties\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mdefault_properties\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 227\u001b[0m \u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 228\u001b[0;31m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 229\u001b[0m )\n\u001b[1;32m 230\u001b[0m )\n", | |
"\u001b[0;32m/usr/local/lib/python3.6/dist-packages/nltk/parse/corenlp.py\u001b[0m in \u001b[0;36mraw_parse_sents\u001b[0;34m(self, sentences, verbose, properties, *args, **kwargs)\u001b[0m\n\u001b[1;32m 288\u001b[0m \u001b[0;32myield\u001b[0m \u001b[0miter\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mtree\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 289\u001b[0m \"\"\"\n\u001b[0;32m--> 290\u001b[0;31m \u001b[0mparsed_data\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mapi_call\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'\\n'\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mjoin\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0msentences\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mproperties\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mdefault_properties\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 291\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mparsed_sent\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mparsed_data\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'sentences'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 292\u001b[0m \u001b[0mtree\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmake_tree\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mparsed_sent\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", | |
"\u001b[0;32m/usr/local/lib/python3.6/dist-packages/nltk/parse/corenlp.py\u001b[0m in \u001b[0;36mapi_call\u001b[0;34m(self, data, properties)\u001b[0m\n\u001b[1;32m 246\u001b[0m },\n\u001b[1;32m 247\u001b[0m \u001b[0mdata\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mdata\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mencode\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mencoding\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 248\u001b[0;31m \u001b[0mtimeout\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m60\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 249\u001b[0m )\n\u001b[1;32m 250\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", | |
"\u001b[0;32m/usr/local/lib/python3.6/dist-packages/requests/sessions.py\u001b[0m in \u001b[0;36mpost\u001b[0;34m(self, url, data, json, **kwargs)\u001b[0m\n\u001b[1;32m 576\u001b[0m \"\"\"\n\u001b[1;32m 577\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 578\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrequest\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'POST'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0murl\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdata\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mdata\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mjson\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mjson\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 579\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 580\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mput\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0murl\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdata\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", | |
"\u001b[0;32m/usr/local/lib/python3.6/dist-packages/requests/sessions.py\u001b[0m in \u001b[0;36mrequest\u001b[0;34m(self, method, url, params, data, headers, cookies, files, auth, timeout, allow_redirects, proxies, hooks, stream, verify, cert, json)\u001b[0m\n\u001b[1;32m 528\u001b[0m }\n\u001b[1;32m 529\u001b[0m \u001b[0msend_kwargs\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mupdate\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0msettings\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 530\u001b[0;31m \u001b[0mresp\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msend\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mprep\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0msend_kwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 531\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 532\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mresp\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", | |
"\u001b[0;32m/usr/local/lib/python3.6/dist-packages/requests/sessions.py\u001b[0m in \u001b[0;36msend\u001b[0;34m(self, request, **kwargs)\u001b[0m\n\u001b[1;32m 641\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 642\u001b[0m \u001b[0;31m# Send the request\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 643\u001b[0;31m \u001b[0mr\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0madapter\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msend\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mrequest\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 644\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 645\u001b[0m \u001b[0;31m# Total elapsed time of the request (approximately)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", | |
"\u001b[0;32m/usr/local/lib/python3.6/dist-packages/requests/adapters.py\u001b[0m in \u001b[0;36msend\u001b[0;34m(self, request, stream, timeout, verify, cert, proxies)\u001b[0m\n\u001b[1;32m 514\u001b[0m \u001b[0;32mraise\u001b[0m \u001b[0mSSLError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0me\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mrequest\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mrequest\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 515\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 516\u001b[0;31m \u001b[0;32mraise\u001b[0m \u001b[0mConnectionError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0me\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mrequest\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mrequest\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 517\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 518\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mClosedPoolError\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0me\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", | |
"\u001b[0;31mConnectionError\u001b[0m: HTTPConnectionPool(host='localhost', port=9000): Max retries exceeded with url: /?properties=%7B%22outputFormat%22%3A+%22json%22%2C+%22annotators%22%3A+%22tokenize%2Cpos%2Clemma%2Cssplit%2Cdepparse%22%2C+%22ssplit.ssplit.eolonly%22%3A+%22true%22%2C+%22tokenize.whitespace%22%3A+%22false%22%7D (Caused by NewConnectionError('<urllib3.connection.HTTPConnection object at 0x7fd71eca65f8>: Failed to establish a new connection: [Errno 111] Connection refused',))" | |
] | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "EGGtaCX8K_Ym" | |
}, | |
"source": [ | |
"from nltk.parse.corenlp import CoreNLPServer" | |
], | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "lvvY6dzjS8js" | |
}, | |
"source": [ | |
"from nltk.parse.corenlp import CoreNLPDependencyParser\n", | |
"\n" | |
], | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "qcVPnfZ-TIce" | |
}, | |
"source": [ | |
"parser = CoreNLPDependencyParser()\n" | |
], | |
"execution_count": null, | |
"outputs": [] | |
} | |
] | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment