hakxcore · July 21, 2022 05:56
diff --git a/nltk.ipynb b/nltk.ipynb
 {
  "nbformat": 4,
  "nbformat_minor": 0,
  "metadata": {
    "colab": {
      "name": "NLTK.ipynb",
      "provenance": [],
      "collapsed_sections": [],
      "include_colab_link": true
    },
    "kernelspec": {
      "name": "python3",
      "display_name": "Python 3"
    }
  },
  "cells": [
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "view-in-github",
        "colab_type": "text"
      },
      "source": [
        "<a href=\"https://colab.research.google.com/gist/hakxcore/092abb3476acfac2492d3229c1f5809f/nltk.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "UcIcD13vA-GL"
      },
      "source": [
        "**Importing NLTK Library**"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "Yi5BB4LaAj-l"
      },
      "source": [
        "import nltk"
      ],
      "execution_count": 1,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "UUxuu_4OBSCv",
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "outputId": "84d29aa9-32a8-41a0-894b-6d1ee5218299"
      },
      "source": [
        "nltk.download('punkt')"
      ],
      "execution_count": 2,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "[nltk_data] Downloading package punkt to /root/nltk_data...\n",
            "[nltk_data]   Unzipping tokenizers/punkt.zip.\n"
          ]
        },
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "True"
            ]
          },
          "metadata": {},
          "execution_count": 2
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "Dtlcx68xAHQa"
      },
      "source": [
        "from nltk import sent_tokenize"
      ],
      "execution_count": 3,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "dbNBY1CPAtkI"
      },
      "source": [
        "text = \"GOOD MORNING all. Hope you will like this video. Thank You.\""
      ],
      "execution_count": 4,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "EiySbKvnBA_6"
      },
      "source": [
        "sentence_tokens = sent_tokenize(text)\n"
      ],
      "execution_count": 5,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "MP69RopMAxj9",
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "outputId": "31f64bd7-4be0-46e8-cdcd-f51dd12d1be7"
      },
      "source": [
        "print(sentence_tokens)\n"
      ],
      "execution_count": 6,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "['GOOD MORNING all.', 'Hope you will like this video.', 'Thank You.']\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "h8QjvvQcBlr8",
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "outputId": "4f464b98-ae59-480a-e014-fb760c6f941b"
      },
      "source": [
        "for sentence in sentence_tokens:\n",
        "    print(sentence)"
      ],
      "execution_count": 7,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "GOOD MORNING all.\n",
            "Hope you will like this video.\n",
            "Thank You.\n"
          ]
        }
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "SaTLjuVkC6lo"
      },
      "source": [
        "Word Tokenization"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "dtpcHtHsC9dQ"
      },
      "source": [
        "from nltk.tokenize import word_tokenize"
      ],
      "execution_count": 8,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "dkiIXFLnDAP5"
      },
      "source": [
        "sentence = \"Let's understand this concept in detail!\""
      ],
      "execution_count": 9,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "7UdVxk5TDGNr",
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "outputId": "221cadc8-d4d3-4854-dae4-58da97a4727a"
      },
      "source": [
        "word_tokens = word_tokenize(sentence)\n",
        "print(word_tokens)"
      ],
      "execution_count": 10,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "['Let', \"'s\", 'understand', 'this', 'concept', 'in', 'detail', '!']\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "hU6nZAsjDYV_"
      },
      "source": [
        "from nltk.tokenize import TreebankWordTokenizer, WordPunctTokenizer, WhitespaceTokenizer\n",
        "tree_tokenizer = TreebankWordTokenizer()\n",
        "word_punct_tokenizer = WordPunctTokenizer()\n",
        "white_space_tokenizer = WhitespaceTokenizer()"
      ],
      "execution_count": 11,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "vtJuAjrTDdvO",
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "outputId": "eb77c13e-b6c2-407a-e5bd-391f62fda6cc"
      },
      "source": [
        "word_tokens = tree_tokenizer.tokenize(sentence)\n",
        "print(word_tokens)"
      ],
      "execution_count": 12,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "['Let', \"'s\", 'understand', 'this', 'concept', 'in', 'detail', '!']\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "3gR0b2w2DhIB",
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "outputId": "5b1fa415-0050-4a89-e8a1-609accc0285e"
      },
      "source": [
        "word_tokens = word_punct_tokenizer.tokenize(sentence)\n",
        "print(word_tokens)"
      ],
      "execution_count": 13,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "['Let', \"'\", 's', 'understand', 'this', 'concept', 'in', 'detail', '!']\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "-ixsNWXNDld7",
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "outputId": "02a1405b-4e5f-4fbf-a6ba-98bbed8144be"
      },
      "source": [
        "word_tokens = white_space_tokenizer.tokenize(sentence)\n",
        "print(word_tokens)"
      ],
      "execution_count": 14,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "[\"Let's\", 'understand', 'this', 'concept', 'in', 'detail!']\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "wup0YgPrFMBa"
      },
      "source": [
        ""
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "B42UVJnSFKK7"
      },
      "source": [
        "Stemming"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "zux2s57QFOzv"
      },
      "source": [
        "from nltk.stem import PorterStemmer, LancasterStemmer"
      ],
      "execution_count": 15,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "NL5qiH9oFT6s",
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "outputId": "e8c030d4-a477-4388-8aab-44d12c095140"
      },
      "source": [
        "porter_stemmer = PorterStemmer()\n",
        "print(porter_stemmer.stem('observing'))\n",
        "print(porter_stemmer.stem('observs'))\n",
        "print(porter_stemmer.stem('observe'))"
      ],
      "execution_count": 16,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "observ\n",
            "observ\n",
            "observ\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "porter_stemmer = PorterStemmer()\n",
        "print(porter_stemmer.stem('running'))\n",
        "print(porter_stemmer.stem('observs'))\n",
        "print(porter_stemmer.stem('observe'))"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "kaxqJ6PhNm7u",
        "outputId": "b8751eac-4e8f-4d56-e928-1c80afc0e5e2"
      },
      "execution_count": 17,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "run\n",
            "observ\n",
            "observ\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "nCXzgZjFFXC7",
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "outputId": "2174e022-0e07-4efa-e614-97d580a6ba3d"
      },
      "source": [
        "lancaster_stemmer = LancasterStemmer()\n",
        "print(lancaster_stemmer.stem('observing'))\n",
        "print(lancaster_stemmer.stem('observs'))\n",
        "print(lancaster_stemmer.stem('observe'))"
      ],
      "execution_count": 18,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "observ\n",
            "observ\n",
            "observ\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "lancaster_stemmer = LancasterStemmer()\n",
        "print(lancaster_stemmer.stem('drives'))"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "DsowvWrnNyiv",
        "outputId": "17d8e10d-89a4-416a-e0b9-25c1313362d3"
      },
      "execution_count": 19,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "driv\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "4qMpWrItFo8L"
      },
      "source": [
        ""
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "R6mx2PWIFp3i"
      },
      "source": [
        "Lemmatization"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "PF--ePtcFwK7"
      },
      "source": [
        "from nltk.stem import WordNetLemmatizer"
      ],
      "execution_count": 20,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "mFsSxM1oGJmp",
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "outputId": "32c0f797-5917-4c57-eb14-5086c3fe36e4"
      },
      "source": [
        "nltk.download('wordnet')\n"
      ],
      "execution_count": 21,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "[nltk_data] Downloading package wordnet to /root/nltk_data...\n"
          ]
        },
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "True"
            ]
          },
          "metadata": {},
          "execution_count": 21
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "nltk.download('omw-1.4')"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "TyeIovskOorI",
        "outputId": "b7626d78-2417-405a-a723-7beeedb2bc7b"
      },
      "execution_count": 22,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "[nltk_data] Downloading package omw-1.4 to /root/nltk_data...\n"
          ]
        },
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "True"
            ]
          },
          "metadata": {},
          "execution_count": 22
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "IQezx9MEF4Vf",
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "outputId": "8421a954-57b6-4c50-c707-f94fadd1b9d7"
      },
      "source": [
        "lemmatizer = WordNetLemmatizer()\n",
        "print(lemmatizer.lemmatize(\"running\"))\n",
        "print(lemmatizer.lemmatize(\"runs\"))"
      ],
      "execution_count": 23,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "running\n",
            "run\n"
          ]
        }
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "MwKPf_NwBeZH"
      },
      "source": [
        "**Lemmatizer- Returns verb, noun, Adverb, Adjective form**"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "I-ZBwYcFGQZN"
      },
      "source": [
        "def lemmatize(word):\n",
        "    lemmatizer = WordNetLemmatizer()\n",
        "    print(\"verb form: \" +lemmatizer.lemmatize(word, pos=\"v\"))\n",
        "    print(\"noun form: \" + lemmatizer.lemmatize(word, pos=\"n\"))\n",
        "    print(\"adverb form: \" + lemmatizer.lemmatize(word, pos=\"r\"))\n",
        "    print(\"adjective form: \" + lemmatizer.lemmatize(word, pos=\"a\"))"
      ],
      "execution_count": 24,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "mUHVRBpKGUvY",
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "outputId": "e469878b-2f56-48d4-c761-de0fd81df3f6"
      },
      "source": [
        "lemmatize(\"ears\")"
      ],
      "execution_count": 25,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "verb form: ears\n",
            "noun form: ear\n",
            "adverb form: ears\n",
            "adjective form: ears\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "xcNK5IKBGbGl",
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "outputId": "59027377-72cf-41d1-8bb8-15cb4583571b"
      },
      "source": [
        "lemmatize(\"running\")"
      ],
      "execution_count": 26,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "verb form: run\n",
            "noun form: running\n",
            "adverb form: running\n",
            "adjective form: running\n"
          ]
        }
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "t2n7L32FIxnZ"
      },
      "source": [
        "The following code snippet shows the comparison between stemming and lemmatization."
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "rbDvt4WWIzks"
      },
      "source": [
        "from nltk.stem import PorterStemmer\n",
        "from nltk.stem import WordNetLemmatizer"
      ],
      "execution_count": 27,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "ZJqe8tl6I5B2"
      },
      "source": [
        "stemmer = PorterStemmer();\n",
        "lemmatizer = WordNetLemmatizer()"
      ],
      "execution_count": 28,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "BY-Go7noI8FL",
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "outputId": "1455e624-5c24-4cd2-90b6-af0a6bf913c6"
      },
      "source": [
        "print(stemmer.stem(\"deactivating\"))\n",
        "print(stemmer.stem(\"deactivated\"))\n",
        "print(stemmer.stem(\"deactivates\"))"
      ],
      "execution_count": 29,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "deactiv\n",
            "deactiv\n",
            "deactiv\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "ung1veEJI_U4",
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "outputId": "d8545863-4e15-4c94-d31f-e5a135bb63fd"
      },
      "source": [
        "print(lemmatizer.lemmatize(\"deactivating\", pos=\"v\"))\n",
        "print(lemmatizer.lemmatize(\"deactivating\", pos=\"r\"))\n",
        "print(lemmatizer.lemmatize(\"deactivating\", pos=\"n\"))"
      ],
      "execution_count": 30,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "deactivate\n",
            "deactivating\n",
            "deactivating\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "Z9AmVUhfJCyT",
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "outputId": "fa47810b-2fc9-4408-c79f-436af1aa37e1"
      },
      "source": [
        "print(stemmer.stem('stones')) \n",
        "print(stemmer.stem('speaking')) \n",
        "print(stemmer.stem('bedroom')) \n",
        "print(stemmer.stem('jokes')) \n",
        "print(stemmer.stem('lisa')) \n",
        "print(stemmer.stem('purple'))"
      ],
      "execution_count": 31,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "stone\n",
            "speak\n",
            "bedroom\n",
            "joke\n",
            "lisa\n",
            "purpl\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "W6CA-NDtJFsR",
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "outputId": "55511b62-bdc2-4d88-de89-e7d640f0b784"
      },
      "source": [
        "print(lemmatizer.lemmatize('stones')) \n",
        "print(lemmatizer.lemmatize('speaking'))\n",
        "print(lemmatizer.lemmatize('bedroom'))\n",
        "print(lemmatizer.lemmatize('jokes'))\n",
        "print(lemmatizer.lemmatize('lisa'))\n",
        "print(lemmatizer.lemmatize('purple'))"
      ],
      "execution_count": 32,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "stone\n",
            "speaking\n",
            "bedroom\n",
            "joke\n",
            "lisa\n",
            "purple\n"
          ]
        }
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "WoAzLzz4r3J_"
      },
      "source": [
        "Conclusion:- When efficiency needed use lemmatization, if speed is required then stemming to be used."
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "jQ6K3CmAJLcw"
      },
      "source": [
        "Part-Of-Speech (POS) Tagging"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "v0LP1UuwJMko",
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "outputId": "6074ceaf-775c-46a4-f7e9-5ccd2801f45c"
      },
      "source": [
        "from nltk import word_tokenize, pos_tag\n",
        "nltk.download('averaged_perceptron_tagger')"
      ],
      "execution_count": 33,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "[nltk_data] Downloading package averaged_perceptron_tagger to\n",
            "[nltk_data]     /root/nltk_data...\n",
            "[nltk_data]   Unzipping taggers/averaged_perceptron_tagger.zip.\n"
          ]
        },
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "True"
            ]
          },
          "metadata": {},
          "execution_count": 33
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "hzMFNbHpJjw6"
      },
      "source": [
        "sentence = \"I like many books.\""
      ],
      "execution_count": 34,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "9zIalIxyJpqD",
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "outputId": "a6858379-d85b-4df2-aad2-b0910699e171"
      },
      "source": [
        "sentence_tokens = word_tokenize(sentence)\n",
        "print(sentence_tokens)"
      ],
      "execution_count": 35,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "['I', 'like', 'many', 'books', '.']\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "wWbPy0ysJtIc",
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "outputId": "2e1eaacb-e70c-42fe-8a6f-7f67cd3dd6bc"
      },
      "source": [
        "pos_tag(sentence_tokens)"
      ],
      "execution_count": 36,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "[('I', 'PRP'), ('like', 'VBP'), ('many', 'JJ'), ('books', 'NNS'), ('.', '.')]"
            ]
          },
          "metadata": {},
          "execution_count": 36
        }
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "scWTkbFbSUku"
      },
      "source": [
        "**Chunking-making word phrases**"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "OakHkMcaQ7n5"
      },
      "source": [
        "import nltk"
      ],
      "execution_count": 37,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "t6CW1GPY6Kcw",
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "outputId": "ed1ef58a-a971-4bb6-d2be-8c256896049b"
      },
      "source": [
        "nltk.download('averaged_perceptron_tagger')"
      ],
      "execution_count": 38,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "[nltk_data] Downloading package averaged_perceptron_tagger to\n",
            "[nltk_data]     /root/nltk_data...\n",
            "[nltk_data]   Package averaged_perceptron_tagger is already up-to-\n",
            "[nltk_data]       date!\n"
          ]
        },
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "True"
            ]
          },
          "metadata": {},
          "execution_count": 38
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "pEFFaoHl5_iK",
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "outputId": "1f680878-76b9-492d-fcf4-6abe83862733"
      },
      "source": [
        "nltk.download('punkt')"
      ],
      "execution_count": 39,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "[nltk_data] Downloading package punkt to /root/nltk_data...\n",
            "[nltk_data]   Package punkt is already up-to-date!\n"
          ]
        },
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "True"
            ]
          },
          "metadata": {},
          "execution_count": 39
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "FKyGctDmRCX8",
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "outputId": "e6d2f260-ca43-4894-c9fc-90fab20a6ecc"
      },
      "source": [
        "text = \"The clean data is important for application development.\"\n",
        "tokens = nltk.word_tokenize(text)\n",
        "print(tokens)\n",
        "tagged = nltk.pos_tag(tokens)"
      ],
      "execution_count": 40,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "['The', 'clean', 'data', 'is', 'important', 'for', 'application', 'development', '.']\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "x00MmqRbRHpF",
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "outputId": "8d2fde33-f4d9-49f4-dcb1-08a43ebbe13b"
      },
      "source": [
        "print(tagged)\n"
      ],
      "execution_count": 41,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "[('The', 'DT'), ('clean', 'JJ'), ('data', 'NN'), ('is', 'VBZ'), ('important', 'JJ'), ('for', 'IN'), ('application', 'NN'), ('development', 'NN'), ('.', '.')]\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "YZnw0ZSHRNWj"
      },
      "source": [
        "grammar = \"NP: {<DT>?<JJ>*<NN>}\"\n",
        "cp  =nltk.RegexpParser(grammar)\n"
      ],
      "execution_count": 42,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "qfMDg-eYRTfa",
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "outputId": "eb17a098-f129-4078-9c2d-28de8dc3c18f"
      },
      "source": [
        "result = cp.parse(tagged)\n",
        "print(result)\n"
      ],
      "execution_count": 43,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "(S\n",
            "  (NP The/DT clean/JJ data/NN)\n",
            "  is/VBZ\n",
            "  important/JJ\n",
            "  for/IN\n",
            "  (NP application/NN)\n",
            "  (NP development/NN)\n",
            "  ./.)\n"
          ]
        }
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "_a_0bDRAA4GA"
      },
      "source": [
        "**Parse** **tree**"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "fWlcWdUlSH7T"
      },
      "source": [
        ""
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "giFS1t4ySpqi"
      },
      "source": [
        "Stop word Removal"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "U7t4cn6qStgA",
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "outputId": "e038467e-7b5f-413a-b4ea-d63efe71d9ce"
      },
      "source": [
        "from nltk.corpus import stopwords\n",
        "from nltk.tokenize import word_tokenize\n",
        "nltk.download('stopwords')"
      ],
      "execution_count": 44,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "[nltk_data] Downloading package stopwords to /root/nltk_data...\n",
            "[nltk_data]   Unzipping corpora/stopwords.zip.\n"
          ]
        },
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "True"
            ]
          },
          "metadata": {},
          "execution_count": 44
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "H6SBIPDzSxpf",
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "outputId": "b6275d98-a6f8-4289-fde8-35e1decc6f8a"
      },
      "source": [
        "print(stopwords.words('english'))"
      ],
      "execution_count": 45,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "['i', 'me', 'my', 'myself', 'we', 'our', 'ours', 'ourselves', 'you', \"you're\", \"you've\", \"you'll\", \"you'd\", 'your', 'yours', 'yourself', 'yourselves', 'he', 'him', 'his', 'himself', 'she', \"she's\", 'her', 'hers', 'herself', 'it', \"it's\", 'its', 'itself', 'they', 'them', 'their', 'theirs', 'themselves', 'what', 'which', 'who', 'whom', 'this', 'that', \"that'll\", 'these', 'those', 'am', 'is', 'are', 'was', 'were', 'be', 'been', 'being', 'have', 'has', 'had', 'having', 'do', 'does', 'did', 'doing', 'a', 'an', 'the', 'and', 'but', 'if', 'or', 'because', 'as', 'until', 'while', 'of', 'at', 'by', 'for', 'with', 'about', 'against', 'between', 'into', 'through', 'during', 'before', 'after', 'above', 'below', 'to', 'from', 'up', 'down', 'in', 'out', 'on', 'off', 'over', 'under', 'again', 'further', 'then', 'once', 'here', 'there', 'when', 'where', 'why', 'how', 'all', 'any', 'both', 'each', 'few', 'more', 'most', 'other', 'some', 'such', 'no', 'nor', 'not', 'only', 'own', 'same', 'so', 'than', 'too', 'very', 's', 't', 'can', 'will', 'just', 'don', \"don't\", 'should', \"should've\", 'now', 'd', 'll', 'm', 'o', 're', 've', 'y', 'ain', 'aren', \"aren't\", 'couldn', \"couldn't\", 'didn', \"didn't\", 'doesn', \"doesn't\", 'hadn', \"hadn't\", 'hasn', \"hasn't\", 'haven', \"haven't\", 'isn', \"isn't\", 'ma', 'mightn', \"mightn't\", 'mustn', \"mustn't\", 'needn', \"needn't\", 'shan', \"shan't\", 'shouldn', \"shouldn't\", 'wasn', \"wasn't\", 'weren', \"weren't\", 'won', \"won't\", 'wouldn', \"wouldn't\"]\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "WpQUUHQ5TAqH"
      },
      "source": [
        "sentence = \"Data structure understanding is must for a computer engineer. Coding plays important role there.\""
      ],
      "execution_count": 46,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "C_Oww7HVTF3a",
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "outputId": "4e86985c-7411-4234-81b4-5bbefb33c961"
      },
      "source": [
        "word_tokens = word_tokenize(sentence)\n",
        "print(word_tokens)"
      ],
      "execution_count": 47,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "['Data', 'structure', 'understanding', 'is', 'must', 'for', 'a', 'computer', 'engineer', '.', 'Coding', 'plays', 'important', 'role', 'there', '.']\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "oI-IJMo2TKIG"
      },
      "source": [
        "clean_tokens = word_tokens[:] \n",
        "for token in word_tokens:\n",
        "    if token in stopwords.words('english'):\n",
        "        clean_tokens.remove(token)"
      ],
      "execution_count": 48,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "RABaBqsLTOoM",
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "outputId": "308029cc-ac27-4eaf-b5cf-8c097f4ca705"
      },
      "source": [
        "print(clean_tokens)"
      ],
      "execution_count": 49,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "['Data', 'structure', 'understanding', 'must', 'computer', 'engineer', '.', 'Coding', 'plays', 'important', 'role', '.']\n"
          ]
        }
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "9tGQHvbTT1Eu"
      },
      "source": [
        "Named Entity Recognition-not used...next refere"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "ZLViTaaWUKWU"
      },
      "source": [
        "from nltk import word_tokenize, pos_tag, ne_chunk"
      ],
      "execution_count": 50,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "5-LP7Jx9Ue41",
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "outputId": "6f331965-2cb4-47e5-8fb2-d089d070d58a"
      },
      "source": [
        "nltk.download('maxent_ne_chunker')"
      ],
      "execution_count": 51,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "[nltk_data] Downloading package maxent_ne_chunker to\n",
            "[nltk_data]     /root/nltk_data...\n",
            "[nltk_data]   Unzipping chunkers/maxent_ne_chunker.zip.\n"
          ]
        },
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "True"
            ]
          },
          "metadata": {},
          "execution_count": 51
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "N4Xbgs0vUv4_",
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "outputId": "2c8780fc-47a5-4ea6-dcc9-3e20336297f6"
      },
      "source": [
        "nltk.download('words')"
      ],
      "execution_count": 52,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "[nltk_data] Downloading package words to /root/nltk_data...\n",
            "[nltk_data]   Unzipping corpora/words.zip.\n"
          ]
        },
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "True"
            ]
          },
          "metadata": {},
          "execution_count": 52
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "MVzIZBueUOV4"
      },
      "source": [
        "sentence = \"UNITED STATES Data structure INFOSYS Accenture understanding is must for a computer engineer. Coding plays important role there.\""
      ],
      "execution_count": 53,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "-QwmSykLUZYT",
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "outputId": "d5671a7a-09da-48fe-c5a3-1052748b31e2"
      },
      "source": [
        "print (ne_chunk(pos_tag(word_tokenize(sentence))))"
      ],
      "execution_count": 54,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "(S\n",
            "  (GPE UNITED/NNP)\n",
            "  (ORGANIZATION STATES/NNPS Data/NNP)\n",
            "  structure/NN\n",
            "  (ORGANIZATION INFOSYS/NNP)\n",
            "  Accenture/NNP\n",
            "  understanding/NN\n",
            "  is/VBZ\n",
            "  must/MD\n",
            "  for/IN\n",
            "  a/DT\n",
            "  computer/NN\n",
            "  engineer/NN\n",
            "  ./.\n",
            "  Coding/NNP\n",
            "  plays/VBZ\n",
            "  important/JJ\n",
            "  role/NN\n",
            "  there/RB\n",
            "  ./.)\n"
          ]
        }
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "O9sGdgldWZya"
      },
      "source": [
        "WORDNET"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "zsPRIzIZYx8o"
      },
      "source": [
        "from nltk.corpus import wordnet"
      ],
      "execution_count": 55,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "aXJ6uXs7_4_s",
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "outputId": "c793678b-de3b-4cbb-f728-f4b8785cdd95"
      },
      "source": [
        "nltk.download('wordnet')"
      ],
      "execution_count": 56,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "[nltk_data] Downloading package wordnet to /root/nltk_data...\n",
            "[nltk_data]   Package wordnet is already up-to-date!\n"
          ]
        },
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "True"
            ]
          },
          "metadata": {},
          "execution_count": 56
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "PK4QLOEFY04F",
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "outputId": "926fd833-4271-42ec-8fc9-48fb148ae47b"
      },
      "source": [
        "wordnet.synsets(\"gun\")"
      ],
      "execution_count": 57,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "[Synset('gun.n.01'),\n",
              " Synset('artillery.n.01'),\n",
              " Synset('gunman.n.02'),\n",
              " Synset('gunman.n.01'),\n",
              " Synset('grease-gun.n.01'),\n",
              " Synset('accelerator.n.01'),\n",
              " Synset('gun.n.07'),\n",
              " Synset('gun.v.01')]"
            ]
          },
          "metadata": {},
          "execution_count": 57
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "mheokKiPALTZ",
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "outputId": "43640de8-639a-45e5-8260-3907bdb9c382"
      },
      "source": [
        "wordnet.synsets(\"flower\")"
      ],
      "execution_count": 58,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "[Synset('flower.n.01'),\n",
              " Synset('flower.n.02'),\n",
              " Synset('flower.n.03'),\n",
              " Synset('bloom.v.01')]"
            ]
          },
          "metadata": {},
          "execution_count": 58
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "wQd6k10yY6Xf",
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "outputId": "2d7a1641-0940-4007-b0e7-2bf4b3f1a8a5"
      },
      "source": [
        "syn = wordnet.synset('flower.n.01')\n",
        "syn.lemma_names()"
      ],
      "execution_count": 59,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "['flower']"
            ]
          },
          "metadata": {},
          "execution_count": 59
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "s2ku3tzXY9Uc",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 35
        },
        "outputId": "42896c9e-9064-45fa-c579-a5533f27dacc"
      },
      "source": [
        "syn.definition()"
      ],
      "execution_count": 60,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "'a plant cultivated for its blooms or blossoms'"
            ],
            "application/vnd.google.colaboratory.intrinsic+json": {
              "type": "string"
            }
          },
          "metadata": {},
          "execution_count": 60
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "KGa2gwhpZAlD",
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "outputId": "2b3a75ae-032a-4020-f0e0-5fad08d93fa0"
      },
      "source": [
        "wordnet.synset(\"flower.n.01\").examples()"
      ],
      "execution_count": 61,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "[]"
            ]
          },
          "metadata": {},
          "execution_count": 61
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "g8OnqkeSED-n",
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "outputId": "65371b0e-e457-41ff-ecf9-531fa778f081"
      },
      "source": [
        "synonyms = []\n",
        "for syn in wordnet.synsets('long'):\n",
        "    for lemma in syn.lemmas():\n",
        "        synonyms.append(lemma.name())\n",
        "print(synonyms)"
      ],
      "execution_count": 62,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "['hanker', 'long', 'yearn', 'long', 'long', 'long', 'retentive', 'recollective', 'long', 'tenacious', 'long', 'long', 'long', 'farseeing', 'farsighted', 'foresighted', 'foresightful', 'prospicient', 'long', 'longsighted', 'long', 'long', 'long']\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "R216YeipEKgl",
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "outputId": "eb8ffab1-18d0-423b-f440-f74864b4db82"
      },
      "source": [
        "antonyms = []\n",
        "for syn in wordnet.synsets(\"like\"):\n",
        "    for l in syn.lemmas():\n",
        "        if l.antonyms():\n",
        "            antonyms.append(l.antonyms()[0].name())\n",
        "print(antonyms)"
      ],
      "execution_count": 63,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "['dislike', 'unlike', 'unlike', 'unalike']\n"
          ]
        }
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "U260zG3Nl3i4"
      },
      "source": [
        "                        **Named Entity PARSe TREE**"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "nTCtebX1jd2_"
      },
      "source": [
        "**Named Entity Recognition(NER)"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "eowmIQAM6lAO"
      },
      "source": [
        "from nltk import word_tokenize, pos_tag, ne_chunk"
      ],
      "execution_count": 64,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "zC_p3trd72Pw",
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "outputId": "a9367b95-06a9-4437-849d-1ab47fe6882b"
      },
      "source": [
        "nltk.download('maxent_ne_chunker')"
      ],
      "execution_count": 65,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "[nltk_data] Downloading package maxent_ne_chunker to\n",
            "[nltk_data]     /root/nltk_data...\n",
            "[nltk_data]   Package maxent_ne_chunker is already up-to-date!\n"
          ]
        },
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "True"
            ]
          },
          "metadata": {},
          "execution_count": 65
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "gKYXp-6D8ATN",
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "outputId": "ca92cdb4-dba4-446d-80df-5c2a59704010"
      },
      "source": [
        "nltk.download('words')"
      ],
      "execution_count": 66,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "[nltk_data] Downloading package words to /root/nltk_data...\n",
            "[nltk_data]   Package words is already up-to-date!\n"
          ]
        },
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "True"
            ]
          },
          "metadata": {},
          "execution_count": 66
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "oRzaAmzf6tz8"
      },
      "source": [
        "text = \"Johney works at Intel.\"  # str"
      ],
      "execution_count": 67,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "5VbHWj7Y7B2t",
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "outputId": "a8063c60-2426-43e9-f8d7-5dd695115277"
      },
      "source": [
        "tokens = word_tokenize(text)\n",
        "print (tokens)     "
      ],
      "execution_count": 68,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "['Johney', 'works', 'at', 'Intel', '.']\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "Dl3GiGZH67Is",
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "outputId": "ab1d6a9b-a587-4457-fa86-8f3ef1bded36"
      },
      "source": [
        "tagged_tokens1 = pos_tag(tokens)\n",
        "print (tagged_tokens1 )"
      ],
      "execution_count": 69,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "[('Johney', 'NNP'), ('works', 'VBZ'), ('at', 'IN'), ('Intel', 'NNP'), ('.', '.')]\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "w3jejHfS6xRM"
      },
      "source": [
        "ner_tree = ne_chunk(tagged_tokens1)\n"
      ],
      "execution_count": 70,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "LEdaSyTE8GKV",
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "outputId": "eafcfb62-ef1b-4fb2-b796-d260d10575ab"
      },
      "source": [
        "print(ner_tree)"
      ],
      "execution_count": 71,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "(S (PERSON Johney/NNP) works/VBZ at/IN (ORGANIZATION Intel/NNP) ./.)\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "emj20fQ3jxE8"
      },
      "source": [
        ""
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "_q3FNMa0jxCI"
      },
      "source": [
        ""
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "soUnj1Ejjw_F"
      },
      "source": [
        ""
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "RjRK_FDCjw79"
      },
      "source": [
        ""
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "5xRNHQPyjw5N"
      },
      "source": [
        ""
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "yIzdgn_9jw2W"
      },
      "source": [
        ""
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "98Dmry_PjwzJ"
      },
      "source": [
        ""
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "yZXM-yzNjwwH"
      },
      "source": [
        ""
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "Vc70JUHXjwsz"
      },
      "source": [
        ""
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "MKzn705ajwpo"
      },
      "source": [
        ""
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "3XKBppzmjwlj"
      },
      "source": [
        ""
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "sO1V9ZUyjwck"
      },
      "source": [
        ""
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "sGbmwQFajwYc"
      },
      "source": [
        ""
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "gOHOYtLu8fPn"
      },
      "source": [
        "https://nlpforhackers.io/introduction-nltk/"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "Abd4gOCNGPwD"
      },
      "source": [
        "**Exercise:- Implement Extractive Text Summarization **"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "eE8BoHIOHl0N"
      },
      "source": [
        ""
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "JIvfm_GJF8l4"
      },
      "source": [
        "explore with spacy package: https://gist.github.com/LahiruTjay"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "VIMHlXoAKiPG"
      },
      "source": [
        "from nltk.parse.corenlp import CoreNLPDependencyParser\n",
        "\n",
        "\n"
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "axtJNviyKngt"
      },
      "source": [
        "parser = CoreNLPDependencyParser()\n"
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "yUxlRX9DKs_K",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 860
        },
        "outputId": "1869fb93-1ee7-4f7b-bd75-a83c7afa7290"
      },
      "source": [
        "parse = next(parser.raw_parse(\"I put the book in the box on the table.\"))"
      ],
      "execution_count": null,
      "outputs": [
        {
          "output_type": "error",
          "ename": "ConnectionError",
          "evalue": "ignored",
          "traceback": [
            "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
            "\u001b[0;31mConnectionRefusedError\u001b[0m                    Traceback (most recent call last)",
            "\u001b[0;32m/usr/local/lib/python3.6/dist-packages/urllib3/connection.py\u001b[0m in \u001b[0;36m_new_conn\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m    158\u001b[0m             conn = connection.create_connection(\n\u001b[0;32m--> 159\u001b[0;31m                 (self._dns_host, self.port), self.timeout, **extra_kw)\n\u001b[0m\u001b[1;32m    160\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
            "\u001b[0;32m/usr/local/lib/python3.6/dist-packages/urllib3/util/connection.py\u001b[0m in \u001b[0;36mcreate_connection\u001b[0;34m(address, timeout, source_address, socket_options)\u001b[0m\n\u001b[1;32m     79\u001b[0m     \u001b[0;32mif\u001b[0m \u001b[0merr\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 80\u001b[0;31m         \u001b[0;32mraise\u001b[0m \u001b[0merr\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     81\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
            "\u001b[0;32m/usr/local/lib/python3.6/dist-packages/urllib3/util/connection.py\u001b[0m in \u001b[0;36mcreate_connection\u001b[0;34m(address, timeout, source_address, socket_options)\u001b[0m\n\u001b[1;32m     69\u001b[0m                 \u001b[0msock\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbind\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0msource_address\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 70\u001b[0;31m             \u001b[0msock\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mconnect\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0msa\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     71\u001b[0m             \u001b[0;32mreturn\u001b[0m \u001b[0msock\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
            "\u001b[0;31mConnectionRefusedError\u001b[0m: [Errno 111] Connection refused",
            "\nDuring handling of the above exception, another exception occurred:\n",
            "\u001b[0;31mNewConnectionError\u001b[0m                        Traceback (most recent call last)",
            "\u001b[0;32m/usr/local/lib/python3.6/dist-packages/urllib3/connectionpool.py\u001b[0m in \u001b[0;36murlopen\u001b[0;34m(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, **response_kw)\u001b[0m\n\u001b[1;32m    599\u001b[0m                                                   \u001b[0mbody\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mbody\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mheaders\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mheaders\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 600\u001b[0;31m                                                   chunked=chunked)\n\u001b[0m\u001b[1;32m    601\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
            "\u001b[0;32m/usr/local/lib/python3.6/dist-packages/urllib3/connectionpool.py\u001b[0m in \u001b[0;36m_make_request\u001b[0;34m(self, conn, method, url, timeout, chunked, **httplib_request_kw)\u001b[0m\n\u001b[1;32m    353\u001b[0m         \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 354\u001b[0;31m             \u001b[0mconn\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrequest\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmethod\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0murl\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mhttplib_request_kw\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    355\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
            "\u001b[0;32m/usr/lib/python3.6/http/client.py\u001b[0m in \u001b[0;36mrequest\u001b[0;34m(self, method, url, body, headers, encode_chunked)\u001b[0m\n\u001b[1;32m   1263\u001b[0m         \u001b[0;34m\"\"\"Send a complete request to the server.\"\"\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1264\u001b[0;31m         \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_send_request\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmethod\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0murl\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mbody\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mheaders\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mencode_chunked\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m   1265\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
            "\u001b[0;32m/usr/lib/python3.6/http/client.py\u001b[0m in \u001b[0;36m_send_request\u001b[0;34m(self, method, url, body, headers, encode_chunked)\u001b[0m\n\u001b[1;32m   1309\u001b[0m             \u001b[0mbody\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0m_encode\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mbody\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'body'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1310\u001b[0;31m         \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mendheaders\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mbody\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mencode_chunked\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mencode_chunked\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m   1311\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
            "\u001b[0;32m/usr/lib/python3.6/http/client.py\u001b[0m in \u001b[0;36mendheaders\u001b[0;34m(self, message_body, encode_chunked)\u001b[0m\n\u001b[1;32m   1258\u001b[0m             \u001b[0;32mraise\u001b[0m \u001b[0mCannotSendHeader\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1259\u001b[0;31m         \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_send_output\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmessage_body\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mencode_chunked\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mencode_chunked\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m   1260\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
            "\u001b[0;32m/usr/lib/python3.6/http/client.py\u001b[0m in \u001b[0;36m_send_output\u001b[0;34m(self, message_body, encode_chunked)\u001b[0m\n\u001b[1;32m   1037\u001b[0m         \u001b[0;32mdel\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_buffer\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1038\u001b[0;31m         \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msend\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmsg\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m   1039\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
            "\u001b[0;32m/usr/lib/python3.6/http/client.py\u001b[0m in \u001b[0;36msend\u001b[0;34m(self, data)\u001b[0m\n\u001b[1;32m    975\u001b[0m             \u001b[0;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mauto_open\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 976\u001b[0;31m                 \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mconnect\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    977\u001b[0m             \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
            "\u001b[0;32m/usr/local/lib/python3.6/dist-packages/urllib3/connection.py\u001b[0m in \u001b[0;36mconnect\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m    180\u001b[0m     \u001b[0;32mdef\u001b[0m \u001b[0mconnect\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 181\u001b[0;31m         \u001b[0mconn\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_new_conn\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    182\u001b[0m         \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_prepare_conn\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mconn\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
            "\u001b[0;32m/usr/local/lib/python3.6/dist-packages/urllib3/connection.py\u001b[0m in \u001b[0;36m_new_conn\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m    167\u001b[0m             raise NewConnectionError(\n\u001b[0;32m--> 168\u001b[0;31m                 self, \"Failed to establish a new connection: %s\" % e)\n\u001b[0m\u001b[1;32m    169\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
            "\u001b[0;31mNewConnectionError\u001b[0m: <urllib3.connection.HTTPConnection object at 0x7fd71eca65f8>: Failed to establish a new connection: [Errno 111] Connection refused",
            "\nDuring handling of the above exception, another exception occurred:\n",
            "\u001b[0;31mMaxRetryError\u001b[0m                             Traceback (most recent call last)",
            "\u001b[0;32m/usr/local/lib/python3.6/dist-packages/requests/adapters.py\u001b[0m in \u001b[0;36msend\u001b[0;34m(self, request, stream, timeout, verify, cert, proxies)\u001b[0m\n\u001b[1;32m    448\u001b[0m                     \u001b[0mretries\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmax_retries\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 449\u001b[0;31m                     \u001b[0mtimeout\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mtimeout\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    450\u001b[0m                 )\n",
            "\u001b[0;32m/usr/local/lib/python3.6/dist-packages/urllib3/connectionpool.py\u001b[0m in \u001b[0;36murlopen\u001b[0;34m(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, **response_kw)\u001b[0m\n\u001b[1;32m    637\u001b[0m             retries = retries.increment(method, url, error=e, _pool=self,\n\u001b[0;32m--> 638\u001b[0;31m                                         _stacktrace=sys.exc_info()[2])\n\u001b[0m\u001b[1;32m    639\u001b[0m             \u001b[0mretries\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msleep\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
            "\u001b[0;32m/usr/local/lib/python3.6/dist-packages/urllib3/util/retry.py\u001b[0m in \u001b[0;36mincrement\u001b[0;34m(self, method, url, response, error, _pool, _stacktrace)\u001b[0m\n\u001b[1;32m    398\u001b[0m         \u001b[0;32mif\u001b[0m \u001b[0mnew_retry\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mis_exhausted\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 399\u001b[0;31m             \u001b[0;32mraise\u001b[0m \u001b[0mMaxRetryError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0m_pool\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0murl\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0merror\u001b[0m \u001b[0;32mor\u001b[0m \u001b[0mResponseError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcause\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    400\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
            "\u001b[0;31mMaxRetryError\u001b[0m: HTTPConnectionPool(host='localhost', port=9000): Max retries exceeded with url: /?properties=%7B%22outputFormat%22%3A+%22json%22%2C+%22annotators%22%3A+%22tokenize%2Cpos%2Clemma%2Cssplit%2Cdepparse%22%2C+%22ssplit.ssplit.eolonly%22%3A+%22true%22%2C+%22tokenize.whitespace%22%3A+%22false%22%7D (Caused by NewConnectionError('<urllib3.connection.HTTPConnection object at 0x7fd71eca65f8>: Failed to establish a new connection: [Errno 111] Connection refused',))",
            "\nDuring handling of the above exception, another exception occurred:\n",
            "\u001b[0;31mConnectionError\u001b[0m                           Traceback (most recent call last)",
            "\u001b[0;32m<ipython-input-328-456a0466293b>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mparse\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mnext\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mparser\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mraw_parse\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"I put the book in the box on the table.\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
            "\u001b[0;32m/usr/local/lib/python3.6/dist-packages/nltk/parse/corenlp.py\u001b[0m in \u001b[0;36mraw_parse\u001b[0;34m(self, sentence, properties, *args, **kwargs)\u001b[0m\n\u001b[1;32m    226\u001b[0m                 \u001b[0mproperties\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mdefault_properties\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    227\u001b[0m                 \u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 228\u001b[0;31m                 \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    229\u001b[0m             )\n\u001b[1;32m    230\u001b[0m         )\n",
            "\u001b[0;32m/usr/local/lib/python3.6/dist-packages/nltk/parse/corenlp.py\u001b[0m in \u001b[0;36mraw_parse_sents\u001b[0;34m(self, sentences, verbose, properties, *args, **kwargs)\u001b[0m\n\u001b[1;32m    288\u001b[0m                 \u001b[0;32myield\u001b[0m \u001b[0miter\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mtree\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    289\u001b[0m         \"\"\"\n\u001b[0;32m--> 290\u001b[0;31m         \u001b[0mparsed_data\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mapi_call\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'\\n'\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mjoin\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0msentences\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mproperties\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mdefault_properties\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    291\u001b[0m         \u001b[0;32mfor\u001b[0m \u001b[0mparsed_sent\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mparsed_data\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'sentences'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    292\u001b[0m             \u001b[0mtree\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmake_tree\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mparsed_sent\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
            "\u001b[0;32m/usr/local/lib/python3.6/dist-packages/nltk/parse/corenlp.py\u001b[0m in \u001b[0;36mapi_call\u001b[0;34m(self, data, properties)\u001b[0m\n\u001b[1;32m    246\u001b[0m             },\n\u001b[1;32m    247\u001b[0m             \u001b[0mdata\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mdata\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mencode\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mencoding\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 248\u001b[0;31m             \u001b[0mtimeout\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m60\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    249\u001b[0m         )\n\u001b[1;32m    250\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
            "\u001b[0;32m/usr/local/lib/python3.6/dist-packages/requests/sessions.py\u001b[0m in \u001b[0;36mpost\u001b[0;34m(self, url, data, json, **kwargs)\u001b[0m\n\u001b[1;32m    576\u001b[0m         \"\"\"\n\u001b[1;32m    577\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 578\u001b[0;31m         \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrequest\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'POST'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0murl\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdata\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mdata\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mjson\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mjson\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    579\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    580\u001b[0m     \u001b[0;32mdef\u001b[0m \u001b[0mput\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0murl\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdata\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
            "\u001b[0;32m/usr/local/lib/python3.6/dist-packages/requests/sessions.py\u001b[0m in \u001b[0;36mrequest\u001b[0;34m(self, method, url, params, data, headers, cookies, files, auth, timeout, allow_redirects, proxies, hooks, stream, verify, cert, json)\u001b[0m\n\u001b[1;32m    528\u001b[0m         }\n\u001b[1;32m    529\u001b[0m         \u001b[0msend_kwargs\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mupdate\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0msettings\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 530\u001b[0;31m         \u001b[0mresp\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msend\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mprep\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0msend_kwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    531\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    532\u001b[0m         \u001b[0;32mreturn\u001b[0m \u001b[0mresp\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
            "\u001b[0;32m/usr/local/lib/python3.6/dist-packages/requests/sessions.py\u001b[0m in \u001b[0;36msend\u001b[0;34m(self, request, **kwargs)\u001b[0m\n\u001b[1;32m    641\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    642\u001b[0m         \u001b[0;31m# Send the request\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 643\u001b[0;31m         \u001b[0mr\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0madapter\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msend\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mrequest\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    644\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    645\u001b[0m         \u001b[0;31m# Total elapsed time of the request (approximately)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
            "\u001b[0;32m/usr/local/lib/python3.6/dist-packages/requests/adapters.py\u001b[0m in \u001b[0;36msend\u001b[0;34m(self, request, stream, timeout, verify, cert, proxies)\u001b[0m\n\u001b[1;32m    514\u001b[0m                 \u001b[0;32mraise\u001b[0m \u001b[0mSSLError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0me\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mrequest\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mrequest\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    515\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 516\u001b[0;31m             \u001b[0;32mraise\u001b[0m \u001b[0mConnectionError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0me\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mrequest\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mrequest\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    517\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    518\u001b[0m         \u001b[0;32mexcept\u001b[0m \u001b[0mClosedPoolError\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0me\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
            "\u001b[0;31mConnectionError\u001b[0m: HTTPConnectionPool(host='localhost', port=9000): Max retries exceeded with url: /?properties=%7B%22outputFormat%22%3A+%22json%22%2C+%22annotators%22%3A+%22tokenize%2Cpos%2Clemma%2Cssplit%2Cdepparse%22%2C+%22ssplit.ssplit.eolonly%22%3A+%22true%22%2C+%22tokenize.whitespace%22%3A+%22false%22%7D (Caused by NewConnectionError('<urllib3.connection.HTTPConnection object at 0x7fd71eca65f8>: Failed to establish a new connection: [Errno 111] Connection refused',))"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "EGGtaCX8K_Ym"
      },
      "source": [
        "from nltk.parse.corenlp import CoreNLPServer"
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "lvvY6dzjS8js"
      },
      "source": [
        "from nltk.parse.corenlp import CoreNLPDependencyParser\n",
        "\n"
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "qcVPnfZ-TIce"
      },
      "source": [
        "parser = CoreNLPDependencyParser()\n"
      ],
      "execution_count": null,
      "outputs": []
    }
  ]
 }