Created
July 11, 2016 20:05
-
-
Save kokes/9db343467a47453b1eb9e42806a8df91 to your computer and use it in GitHub Desktop.
Shlukuje metadata z registru smluv
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 19, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"import xmltodict\n", | |
"import pandas as pd\n", | |
"import numpy as np" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"with open('dump_2016_07.xml') as f:\n", | |
" d = xmltodict.parse(f.read())" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 9, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"z = d['dump']['zaznam']" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 8, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"450" | |
] | |
}, | |
"execution_count": 8, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"len(z)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 10, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"OrderedDict([('identifikator',\n", | |
" OrderedDict([('idSmlouvy', '9'), ('idVerze', '9')])),\n", | |
" ('odkaz', 'https://smlouvy.gov.cz/smlouva/9'),\n", | |
" ('casZverejneni', '2016-07-01T01:17:36+02:00'),\n", | |
" ('smlouva',\n", | |
" OrderedDict([('subjekt',\n", | |
" OrderedDict([('datovaSchranka', 'c5kbvkw'),\n", | |
" ('nazev', 'Liberecký kraj'),\n", | |
" ('ico', '70891508'),\n", | |
" ('adresa',\n", | |
" 'U Jezu 642/2a, 46001 Liberec, CZ')])),\n", | |
" ('smluvniStrana',\n", | |
" OrderedDict([('datovaSchranka', 'sxk8tap'),\n", | |
" ('nazev', 'GORDIC spol. s r.o.'),\n", | |
" ('ico', '47903783'),\n", | |
" ('adresa',\n", | |
" 'Erbenova 2108/4, 58601, Jihlava, Jihlava'),\n", | |
" ('prijemce', '1')])),\n", | |
" ('predmet',\n", | |
" 'rozšíření jádra IS Ginis o funkcionalitu zveřejňování smluv'),\n", | |
" ('datumUzavreni', '2016-06-30'),\n", | |
" ('cisloSmlouvy', '12-OBJ/893/2016'),\n", | |
" ('schvalil', 'Ing. Pavel Tvrzník'),\n", | |
" ('hodnotaBezDph', '80000')])),\n", | |
" ('prilohy',\n", | |
" OrderedDict([('priloha',\n", | |
" OrderedDict([('nazevSouboru', '893_Gordic.pdf'),\n", | |
" ('hash',\n", | |
" OrderedDict([('@algoritmus',\n", | |
" 'sha256'),\n", | |
" ('#text',\n", | |
" 'a407edf283dd009c4a0f580b44c1baf191563d44629c927331802f6263c0d299')])),\n", | |
" ('odkaz',\n", | |
" 'https://smlouvy.gov.cz/smlouva/soubor/5/893_Gordic.pdf')]))])),\n", | |
" ('platnyZaznam', '1')])" | |
] | |
}, | |
"execution_count": 10, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"z[0]" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 12, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"OrderedDict([('datovaSchranka', 'c5kbvkw'),\n", | |
" ('nazev', 'Liberecký kraj'),\n", | |
" ('ico', '70891508'),\n", | |
" ('adresa', 'U Jezu 642/2a, 46001 Liberec, CZ')])" | |
] | |
}, | |
"execution_count": 12, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"z[0]['smlouva']['subjekt']" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 22, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"res = pd.DataFrame(columns=['nazev', 'ico', 'bezdph', 'sdph', 'predmet'])\n", | |
"for j, sml in enumerate(z):\n", | |
" res.loc[j] = [sml['smlouva']['subjekt']['nazev'], sml['smlouva']['subjekt']['ico'],\n", | |
" sml['smlouva'].get('hodnotaBezDph', np.nan),\n", | |
" sml['smlouva'].get('hodnotaVcetneDph', np.nan),\n", | |
" sml['smlouva']['predmet']]" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 24, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>nazev</th>\n", | |
" <th>ico</th>\n", | |
" <th>bezdph</th>\n", | |
" <th>sdph</th>\n", | |
" <th>predmet</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>Liberecký kraj</td>\n", | |
" <td>70891508</td>\n", | |
" <td>80000</td>\n", | |
" <td>NaN</td>\n", | |
" <td>rozšíření jádra IS Ginis o funkcionalitu zveře...</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>Zdravotnická záchranná služba Olomouckého kraj...</td>\n", | |
" <td>00849103</td>\n", | |
" <td>91308</td>\n", | |
" <td>NaN</td>\n", | |
" <td>Podnájmní smlouva</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>Zdravotnická záchranná služba Olomouckého kraj...</td>\n", | |
" <td>00849103</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>Podnájemní smlouva</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" nazev ico bezdph sdph \\\n", | |
"0 Liberecký kraj 70891508 80000 NaN \n", | |
"1 Zdravotnická záchranná služba Olomouckého kraj... 00849103 91308 NaN \n", | |
"2 Zdravotnická záchranná služba Olomouckého kraj... 00849103 NaN NaN \n", | |
"\n", | |
" predmet \n", | |
"0 rozšíření jádra IS Ginis o funkcionalitu zveře... \n", | |
"1 Podnájmní smlouva \n", | |
"2 Podnájemní smlouva " | |
] | |
}, | |
"execution_count": 24, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"res.head(3)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 39, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"uni = pd.DataFrame(index=res.ico.unique(), columns = ['nazev', 'smluv', 'bezcen'])" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 58, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"for ic in res.ico.unique():\n", | |
" nz = res.loc[res.ico == ic].nazev.iloc[0]\n", | |
" n = len(res.loc[res.ico == ic, :])\n", | |
" bc = res.loc[res.ico == ic, ['bezdph', 'sdph']].isnull().all(axis=1).sum()\n", | |
"\n", | |
" uni.loc[ic] = [nz, n, bc]" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 67, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"uni.sort_values(by='smluv', ascending=False).to_csv('bezcen.csv')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 69, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"nazev Liberecký krajZdravotnická záchranná služba Ol...\n", | |
"smluv 450\n", | |
"bezcen 119\n", | |
"dtype: object" | |
] | |
}, | |
"execution_count": 69, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"uni.sum()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.5.2" | |
}, | |
"widgets": { | |
"state": {}, | |
"version": "1.1.2" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 0 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment