Last active
December 22, 2016 18:51
-
-
Save georgf/60f11698dec0cfab91c7d112af255db1 to your computer and use it in GitHub Desktop.
Check moztelemetry types
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## Check moztelemetry types" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stderr", | |
"output_type": "stream", | |
"text": [ | |
"/home/hadoop/anaconda2/lib/python2.7/site-packages/matplotlib/font_manager.py:273: UserWarning: Matplotlib is building the font cache using fc-list. This may take a moment.\n", | |
" warnings.warn('Matplotlib is building the font cache using fc-list. This may take a moment.')\n" | |
] | |
}, | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"Unable to parse whitelist (/home/hadoop/anaconda2/lib/python2.7/site-packages/moztelemetry/histogram-whitelists.json). Assuming all histograms are acceptable.\n", | |
"Populating the interactive namespace from numpy and matplotlib\n" | |
] | |
} | |
], | |
"source": [ | |
"import ujson as json\n", | |
"import matplotlib.pyplot as plt\n", | |
"import pandas as pd\n", | |
"import numpy as np\n", | |
"import plotly.plotly as py\n", | |
"import datetime as dt\n", | |
"from uuid import UUID\n", | |
"\n", | |
"from moztelemetry import get_pings, get_pings_properties, get_one_ping_per_client, get_clients_history\n", | |
"\n", | |
"%pylab inline" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"We get all the pings on Beta, after bug 1293222 landed." | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 15, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"def extract(pings):\n", | |
" subset = get_pings_properties(pings, [\"meta/clientId\",\n", | |
" \"meta/documentId\",\n", | |
" \"payload/processes/parent\",\n", | |
" \"payload/processes/parent/events\"])\n", | |
" return subset\n", | |
"\n", | |
"pings = get_pings(sc,\n", | |
" app=\"Firefox\",\n", | |
" channel=\"nightly\",\n", | |
" doc_type=\"main\",\n", | |
" submission_date=\"20161216\",\n", | |
" build_id=\"20161210030206\",\n", | |
" fraction=1.0)\n", | |
"extracts = extract(pings)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 24, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"ping = extracts.filter(lambda p: p[\"meta/documentId\"] == \"0aa23e4c-b779-f544-9c90-5193a4a44c59\")\\\n", | |
" .first()" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"On my client i can clearly see that `payload/processes/parent/events` is `[]`.\n", | |
"\n", | |
"Extracting it here, it became a `dict` type instead of the expected `list`:" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 25, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"{}" | |
] | |
}, | |
"execution_count": 25, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"ping[\"payload/processes/parent/events\"]" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 26, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"{u'events': {}, u'gc': {}, u'keyedScalars': {}, u'scalars': {}}" | |
] | |
}, | |
"execution_count": 26, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"tmp = ping[\"payload/processes/parent\"]\n", | |
"tmp[\"gc\"] = {} # Readability ...\n", | |
"tmp[\"scalars\"] = {} # Readability ...\n", | |
"tmp" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"Skipping `get_pings_properties()`, the result is the same:" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 27, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"ping = pings.filter(lambda p: p[\"meta\"][\"documentId\"] == \"0aa23e4c-b779-f544-9c90-5193a4a44c59\")\\\n", | |
" .first()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 28, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"{}" | |
] | |
}, | |
"execution_count": 28, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"ping[\"payload\"][\"processes\"][\"parent\"][\"events\"]" | |
] | |
} | |
], | |
"metadata": { | |
"anaconda-cloud": {}, | |
"kernelspec": { | |
"display_name": "Python [default]", | |
"language": "python", | |
"name": "python2" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 2 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython2", | |
"version": "2.7.12" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 0 | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# coding: utf-8 | |
# ## Check moztelemetry types | |
# In[1]: | |
import ujson as json | |
import matplotlib.pyplot as plt | |
import pandas as pd | |
import numpy as np | |
import plotly.plotly as py | |
import datetime as dt | |
from uuid import UUID | |
from moztelemetry import get_pings, get_pings_properties, get_one_ping_per_client, get_clients_history | |
get_ipython().magic(u'pylab inline') | |
# We get all the pings on Beta, after bug 1293222 landed. | |
# In[15]: | |
def extract(pings): | |
subset = get_pings_properties(pings, ["meta/clientId", | |
"meta/documentId", | |
"payload/processes/parent", | |
"payload/processes/parent/events"]) | |
return subset | |
pings = get_pings(sc, | |
app="Firefox", | |
channel="nightly", | |
doc_type="main", | |
submission_date="20161216", | |
build_id="20161210030206", | |
fraction=1.0) | |
extracts = extract(pings) | |
# In[24]: | |
ping = extracts.filter(lambda p: p["meta/documentId"] == "0aa23e4c-b779-f544-9c90-5193a4a44c59") .first() | |
# On my client i can clearly see that `payload/processes/parent/events` is `[]`. | |
# | |
# Extracting it here, it became a `dict` type instead of the expected `list`: | |
# In[25]: | |
ping["payload/processes/parent/events"] | |
# In[26]: | |
tmp = ping["payload/processes/parent"] | |
tmp["gc"] = {} # Readability ... | |
tmp["scalars"] = {} # Readability ... | |
tmp | |
# Skipping `get_pings_properties()`, the result is the same: | |
# In[27]: | |
ping = pings.filter(lambda p: p["meta"]["documentId"] == "0aa23e4c-b779-f544-9c90-5193a4a44c59") .first() | |
# In[28]: | |
ping["payload"]["processes"]["parent"]["events"] |
What about the new pipeline, this should not be an issue
FYI: it is correct in the new pipeline
"processes": {
"parent": {
"scalars": {
"browser.engagement.max_concurrent_tab_count": 2,
"browser.engagement.max_concurrent_window_count": 2,
"browser.engagement.unfiltered_uri_count": 14,
"browser.engagement.tab_open_event_count": 1,
"browser.engagement.unique_domains_count": 1,
"browser.engagement.window_open_event_count": 1,
"browser.engagement.total_uri_count": 14
},
"keyedScalars": {
},
"events": [
],
"gc": {
"random": [
],
"worst": [
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
I verified that the value is
{}
in the ping as stored on S3. Going to check the raw landfill data next.cc @trink and @whd