Created
November 18, 2019 16:23
-
-
Save Manikanta-Munnangi/943666ad279c97549654718a14657c80 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# spacy import convention\n", | |
"import spacy\n", | |
"\n", | |
"# load the english model into nlp object.\n", | |
"nlp=spacy.load('en_core_web_md') " | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 29, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"array([-2.3205e-01, 4.7468e-01, -3.8264e-01, 2.2248e-03, -1.0493e-01,\n", | |
" 1.1612e-01, -5.0251e-02, 1.2927e-01, 8.7639e-02, 2.6391e+00,\n", | |
" -3.7071e-01, -2.9460e-01, -1.2722e-01, -3.7028e-02, -1.3964e-01,\n", | |
" -9.8518e-02, -2.2704e-01, 1.2254e+00, -4.3827e-01, -4.2383e-01,\n", | |
" 4.9285e-01, -2.3314e-01, 9.7892e-02, -2.7542e-01, -2.6583e-01,\n", | |
" 1.4518e-01, -1.0652e-02, 1.1067e-01, 1.6126e-01, -4.2688e-01,\n", | |
" -3.0968e-01, 1.2774e-01, 9.5535e-02, -5.0221e-02, 2.6677e-01,\n", | |
" 1.4821e-01, 1.7805e-01, 8.8508e-02, -3.6138e-01, -2.1068e-01,\n", | |
" -2.6420e-01, -3.0030e-01, 1.7674e-01, -2.0741e-01, 2.3360e-01,\n", | |
" 2.6663e-02, -2.7939e-01, -1.4522e-02, -3.0973e-02, 4.3729e-02,\n", | |
" -3.0191e-01, 9.3855e-02, -2.0135e-01, -1.4267e-01, 3.3124e-01,\n", | |
" 9.8036e-02, 3.6001e-02, -5.7850e-02, 1.2101e-01, 4.1569e-02,\n", | |
" -3.8834e-02, -6.6843e-02, -2.4738e-01, 1.4838e-01, 4.1011e-01,\n", | |
" -3.0283e-01, 9.4704e-02, 3.2583e-01, 3.1955e-01, -4.3357e-02,\n", | |
" 2.0606e-01, -9.6981e-02, 4.5202e-01, -2.4532e-01, 2.6683e-01,\n", | |
" 2.6141e-01, 1.2174e-01, -3.9941e-01, -1.2916e-01, 2.2634e-01,\n", | |
" 1.2218e-01, 1.6536e-01, -5.2466e-02, -8.6235e-02, 1.2232e-02,\n", | |
" -3.9760e-01, -2.5987e-01, -6.4113e-01, 2.4669e-01, 2.9575e-02,\n", | |
" -2.9733e-01, -2.5994e-01, -6.1239e-01, 3.2332e-01, 2.2375e-01,\n", | |
" 2.1916e-01, -1.0905e-02, -7.2927e-02, -3.2219e-01, -6.5715e-02,\n", | |
" -1.7247e-01, 4.0714e-02, 1.6625e-01, -1.8120e-01, -1.8531e-01,\n", | |
" -1.1575e+00, -4.7285e-02, 2.4866e-02, 4.0405e-03, -6.1920e-02,\n", | |
" 8.7754e-02, -4.1669e-01, 8.6682e-02, -3.7720e-01, 1.6166e-01,\n", | |
" -1.2879e-01, -1.6494e-01, -1.1212e-02, -1.4810e-01, 9.9342e-02,\n", | |
" 1.5603e-01, -2.8030e-01, -9.5092e-02, 7.7952e-02, 8.8172e-02,\n", | |
" 2.2930e-01, -1.0321e-01, -3.8966e-01, 1.9519e-01, -8.7815e-02,\n", | |
" -1.5861e-01, 1.1627e-01, 8.8138e-02, 1.1262e-01, 1.8212e-01,\n", | |
" 1.8005e-02, -5.5187e-02, -3.8818e-02, 1.6536e-01, -2.5814e-01,\n", | |
" -1.8516e+00, -2.9996e-01, 3.3106e-02, 3.2293e-01, -1.6417e-01,\n", | |
" -2.7445e-01, 5.1582e-02, 3.4203e-01, -4.3025e-01, -4.5816e-02,\n", | |
" 2.3542e-01, 1.8271e-01, -8.9827e-02, -2.0280e-02, -1.0056e-02,\n", | |
" -7.5604e-02, 1.5922e-02, 1.5616e-01, -3.8949e-01, -5.8165e-02,\n", | |
" -4.3763e-01, 2.4587e-01, -2.3169e-01, -1.4508e-01, 3.5845e-01,\n", | |
" 1.2437e-01, 2.2588e-01, -1.8963e-02, 7.9287e-02, 1.6775e-01,\n", | |
" -1.2729e-01, -3.2950e-01, 3.1048e-01, -1.6959e-01, 5.7082e-02,\n", | |
" -9.8536e-02, -1.1715e-02, 3.9690e-01, 1.0493e-01, 1.9083e-01,\n", | |
" 1.3871e-01, -1.8307e-02, -7.8323e-02, -4.5149e-02, 6.6471e-02,\n", | |
" 1.7835e-01, -4.3998e-02, -1.9136e-01, -8.8387e-02, 4.2414e-01,\n", | |
" 1.1562e-01, 8.0458e-02, -1.0350e-01, -1.8200e-01, -2.0045e-01,\n", | |
" 1.9755e-01, 3.8457e-02, -1.1081e-01, 2.2978e-01, 3.5781e-01,\n", | |
" -1.6376e-01, -2.3062e-01, -2.4412e-01, -7.3929e-02, -1.2747e-01,\n", | |
" 1.4730e-01, 2.5954e-01, 1.8571e-01, 2.7923e-01, 1.8186e-01,\n", | |
" -1.4550e-01, -2.5523e-01, -2.3418e-01, -2.3684e-01, 5.7909e-02,\n", | |
" 1.3913e-01, -1.4280e-01, 9.8092e-02, -2.4884e-01, -2.2587e-01,\n", | |
" 2.2812e-01, 2.3718e-01, -6.6049e-02, 6.3126e-02, -3.4434e-03,\n", | |
" 2.6542e-01, -4.3094e-02, 9.1002e-02, -2.9563e-02, 1.3626e-01,\n", | |
" -2.2368e-01, 1.4869e-01, 1.7428e-02, 2.6551e-01, -2.0984e-01,\n", | |
" -1.6786e-01, 2.1192e-01, 1.2735e-01, 1.6441e-01, 3.3131e-01,\n", | |
" 1.0661e-01, -2.1155e-01, 2.8474e-02, -9.9419e-02, 3.4635e-01,\n", | |
" -4.0166e-01, -1.9083e-01, -2.8156e-01, -8.1996e-02, 2.4322e-01,\n", | |
" 3.0341e-01, -1.4984e-01, -2.9952e-01, -2.8089e-01, -8.2551e-02,\n", | |
" -3.5457e-01, 8.3108e-02, 7.3193e-02, 5.8555e-02, 4.7347e-02,\n", | |
" 3.3200e-01, 1.5465e-01, -6.5075e-02, 6.3738e-03, 2.6690e-01,\n", | |
" -3.3819e-01, -2.1204e-01, 2.2368e-01, 6.2783e-01, 7.0440e-01,\n", | |
" -2.2196e-01, -1.0377e-01, 6.9900e-02, -1.3201e-01, -2.6255e-01,\n", | |
" -1.9671e-02, -1.1906e-01, 3.2839e-02, -3.1207e-02, 2.5083e-01,\n", | |
" -1.4702e-01, 4.4411e-01, -2.1465e-01, 4.5018e-02, -1.4012e-01,\n", | |
" 4.6586e-02, 2.4790e-01, -1.3205e-01, 1.4456e-01, -1.8638e-01,\n", | |
" -8.6773e-02, 1.3312e-01, 1.8741e-03, 4.4091e-02, 2.8882e-01,\n", | |
" -9.0016e-02, -1.8108e-01, 3.3178e-01, 3.1545e-01, 3.7972e-01],\n", | |
" dtype=float32)" | |
] | |
}, | |
"execution_count": 29, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"# can access vector from of as word\n", | |
"nlp.vocab[\"how\"].vector" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 30, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,\n", | |
" 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,\n", | |
" 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,\n", | |
" 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,\n", | |
" 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,\n", | |
" 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,\n", | |
" 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,\n", | |
" 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,\n", | |
" 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,\n", | |
" 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,\n", | |
" 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,\n", | |
" 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,\n", | |
" 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,\n", | |
" 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,\n", | |
" 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,\n", | |
" 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,\n", | |
" 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,\n", | |
" 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], dtype=float32)" | |
] | |
}, | |
"execution_count": 30, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"# check for other word that don't have vector form \n", | |
"nlp.vocab[\"jupyter\"].vector" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"- `so before preprocessing you need to make sure the word has vector form.`" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 33, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"(True, False)" | |
] | |
}, | |
"execution_count": 33, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"# check with existence of vector with .vector_form attribute\n", | |
"nlp.vocab[\"how\"].has_vector, nlp.vocab[\"jupyter\"].has_vector" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 34, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# Two Sample sentences\n", | |
"text=\"how are you\"\n", | |
"text1=\"how you doing\"\n", | |
"\n", | |
"# returns doc containers \n", | |
"doc=nlp(text)\n", | |
"doc1=nlp(text1)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 35, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"0.9106663802758767" | |
] | |
}, | |
"execution_count": 35, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"# similarity between sentences\n", | |
"doc.similarity(doc1)" | |
] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.6.9" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 2 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment