stuartcampbell · December 1, 2018 18:12
diff --git a/iss_correct_missing_devnames.ipynb b/iss_correct_missing_devnames.ipynb
 {
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 53,
   "metadata": {},
   "outputs": [],
   "source": [
    "from pymongo import MongoClient"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Database connections"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 54,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Stuart's local stuff\n",
    "client=MongoClient('mongodb://localhost:27017/')\n",
    "database = client[\"iss-metadatastore\"]\n",
    "\n",
    "# ISS Parameters\n",
    "#client=MongoClient('mongodb://localhost:27017/')\n",
    "#database = client[\"\"]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## What time period do we care about ?"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 55,
   "metadata": {},
   "outputs": [],
   "source": [
    "since='2018-11-20 18:00'\n",
    "until='2018-11-21 12:00'"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Now here's some random time mangling code to get these datetimes into seconds since the unix epoch"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 56,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pytz\n",
    "from datetime import datetime\n",
    "zone = pytz.timezone('US/Eastern')\n",
    "epoch = pytz.UTC.localize(datetime(1970, 1, 1))\n",
    "\n",
    "fmt = '%Y-%m-%d %H:%M'\n",
    "since_val = datetime.strptime(since, fmt)\n",
    "until_val = datetime.strptime(until, fmt)\n",
    "\n",
    "since_val = zone.localize(since_val, is_dst=None)\n",
    "until_val = zone.localize(until_val, is_dst=None)\n",
    "\n",
    "#'$gte' == since\n",
    "#'$lte' == until\n",
    "\n",
    "since_seconds = (since_val - epoch).total_seconds()\n",
    "until_seconds = (until_val - epoch).total_seconds()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Let's get a list of the `run_start` uids for this period"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 57,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "We found 1293 documents\n"
     ]
    }
   ],
   "source": [
    "run_start_collection = database[\"run_start\"]\n",
    "\n",
    "run_start_uids=[]\n",
    "\n",
    "query = {}\n",
    "query[\"$and\"] = [\n",
    "    {\n",
    "        u\"time\": {\n",
    "            u\"$lte\": until_seconds\n",
    "        }\n",
    "    },\n",
    "    {\n",
    "        u\"time\": {\n",
    "            u\"$gte\": since_seconds\n",
    "        }\n",
    "    }\n",
    "]\n",
    "\n",
    "projection = {}\n",
    "projection[\"uid\"] = 1.0\n",
    "\n",
    "cursor = run_start_collection.find(query, projection=projection)\n",
    "for doc in cursor:\n",
    "    run_start_uids.append(doc['uid'])\n",
    "\n",
    "    \n",
    "print(f\"We found {len(run_start_uids)} documents\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "These times gave Chanaka 1293 documents."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Right, so now we have a list of `run_start` documents, we need to now get all the desciptors relating to them"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### List of devices that we need to check / update"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 77,
   "metadata": {},
   "outputs": [],
   "source": [
    "devices = []\n",
    "devices.append(['pba1_adc7','i0'])\n",
    "devices.append(['pb9_enc1','hhm_theta'])\n",
    "devices.append(['pba2_adc6','ir'])\n",
    "devices.append(['pba1_adc1','it'])\n",
    "devices.append(['pba1_adc6','iff'])\n",
    "devices.append(['pba2_adc7','pba2_adc7'])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Utility function to construct the mongo query"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 84,
   "metadata": {},
   "outputs": [],
   "source": [
    "def generate_query(device, devname):\n",
    "    query = {}\n",
    "    query[\"run_start\"] = {'$in': run_start_uids}\n",
    "    query[\"$and\"] = [\n",
    "        {\n",
    "            f\"data_keys.{device}.devname\": {\n",
    "                u\"$exists\": True\n",
    "            }\n",
    "        },\n",
    "        {\n",
    "            f\"data_keys.{device}.devname\": {\n",
    "                u\"$ne\": devname\n",
    "            }\n",
    "        }\n",
    "    ]\n",
    "    return query"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### How many documents have a blank `devname`"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 87,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "There are 0 documents missing an i0\n",
      "There are 0 documents missing an hhm_theta\n",
      "There are 0 documents missing an ir\n",
      "There are 0 documents missing an it\n",
      "There are 0 documents missing an iff\n",
      "There are 0 documents missing an pba2_adc7\n"
     ]
    }
   ],
   "source": [
    "descriptor_collection = database['event_descriptor']\n",
    "\n",
    "for device, devname in devices:\n",
    "    query = generate_query(device, devname)\n",
    "\n",
    "    print(f\"There are {descriptor_collection.count_documents(query)} documents missing an {devname}\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Let's update them and re-check\n",
    "The actual update command is commented out, for safetly - uncomment to run in *anger*"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 86,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "There are 0 documents missing an i0\n",
      "There are 0 documents missing an hhm_theta\n",
      "There are 0 documents missing an ir\n",
      "There are 0 documents missing an it\n",
      "There are 0 documents missing an iff\n",
      "There are 0 documents missing an pba2_adc7\n"
     ]
    }
   ],
   "source": [
    "for device, devname in devices:\n",
    "    query = generate_query(device, devname)\n",
    "    #descriptor_collection.update_many(query, {'$set' : {f'data_keys.{device}.devname' : devname}}, upsert=False)\n",
    "    print(f\"There are {descriptor_collection.count_documents(query)} documents missing an {devname}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
 }
	{
	"cells": [
	{
	"cell_type": "code",
	"execution_count": 53,
	"metadata": {},
	"outputs": [],
	"source": [
	"from pymongo import MongoClient"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"## Database connections"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 54,
	"metadata": {},
	"outputs": [],
	"source": [
	"# Stuart's local stuff\n",
	"client=MongoClient('mongodb://localhost:27017/')\n",
	"database = client[\"iss-metadatastore\"]\n",
	"\n",
	"# ISS Parameters\n",
	"#client=MongoClient('mongodb://localhost:27017/')\n",
	"#database = client[\"\"]"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"## What time period do we care about ?"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 55,
	"metadata": {},
	"outputs": [],
	"source": [
	"since='2018-11-20 18:00'\n",
	"until='2018-11-21 12:00'"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"Now here's some random time mangling code to get these datetimes into seconds since the unix epoch"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 56,
	"metadata": {},
	"outputs": [],
	"source": [
	"import pytz\n",
	"from datetime import datetime\n",
	"zone = pytz.timezone('US/Eastern')\n",
	"epoch = pytz.UTC.localize(datetime(1970, 1, 1))\n",
	"\n",
	"fmt = '%Y-%m-%d %H:%M'\n",
	"since_val = datetime.strptime(since, fmt)\n",
	"until_val = datetime.strptime(until, fmt)\n",
	"\n",
	"since_val = zone.localize(since_val, is_dst=None)\n",
	"until_val = zone.localize(until_val, is_dst=None)\n",
	"\n",
	"#'$gte' == since\n",
	"#'$lte' == until\n",
	"\n",
	"since_seconds = (since_val - epoch).total_seconds()\n",
	"until_seconds = (until_val - epoch).total_seconds()"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"## Let's get a list of the `run_start` uids for this period"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 57,
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"We found 1293 documents\n"
	]
	}
	],
	"source": [
	"run_start_collection = database[\"run_start\"]\n",
	"\n",
	"run_start_uids=[]\n",
	"\n",
	"query = {}\n",
	"query[\"$and\"] = [\n",
	" {\n",
	" u\"time\": {\n",
	" u\"$lte\": until_seconds\n",
	" }\n",
	" },\n",
	" {\n",
	" u\"time\": {\n",
	" u\"$gte\": since_seconds\n",
	" }\n",
	" }\n",
	"]\n",
	"\n",
	"projection = {}\n",
	"projection[\"uid\"] = 1.0\n",
	"\n",
	"cursor = run_start_collection.find(query, projection=projection)\n",
	"for doc in cursor:\n",
	" run_start_uids.append(doc['uid'])\n",
	"\n",
	" \n",
	"print(f\"We found {len(run_start_uids)} documents\")"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"These times gave Chanaka 1293 documents."
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"Right, so now we have a list of `run_start` documents, we need to now get all the desciptors relating to them"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"### List of devices that we need to check / update"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 77,
	"metadata": {},
	"outputs": [],
	"source": [
	"devices = []\n",
	"devices.append(['pba1_adc7','i0'])\n",
	"devices.append(['pb9_enc1','hhm_theta'])\n",
	"devices.append(['pba2_adc6','ir'])\n",
	"devices.append(['pba1_adc1','it'])\n",
	"devices.append(['pba1_adc6','iff'])\n",
	"devices.append(['pba2_adc7','pba2_adc7'])"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"Utility function to construct the mongo query"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 84,
	"metadata": {},
	"outputs": [],
	"source": [
	"def generate_query(device, devname):\n",
	" query = {}\n",
	" query[\"run_start\"] = {'$in': run_start_uids}\n",
	" query[\"$and\"] = [\n",
	" {\n",
	" f\"data_keys.{device}.devname\": {\n",
	" u\"$exists\": True\n",
	" }\n",
	" },\n",
	" {\n",
	" f\"data_keys.{device}.devname\": {\n",
	" u\"$ne\": devname\n",
	" }\n",
	" }\n",
	" ]\n",
	" return query"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"### How many documents have a blank `devname`"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 87,
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"There are 0 documents missing an i0\n",
	"There are 0 documents missing an hhm_theta\n",
	"There are 0 documents missing an ir\n",
	"There are 0 documents missing an it\n",
	"There are 0 documents missing an iff\n",
	"There are 0 documents missing an pba2_adc7\n"
	]
	}
	],
	"source": [
	"descriptor_collection = database['event_descriptor']\n",
	"\n",
	"for device, devname in devices:\n",
	" query = generate_query(device, devname)\n",
	"\n",
	" print(f\"There are {descriptor_collection.count_documents(query)} documents missing an {devname}\")"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"### Let's update them and re-check\n",
	"The actual update command is commented out, for safetly - uncomment to run in anger"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 86,
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"There are 0 documents missing an i0\n",
	"There are 0 documents missing an hhm_theta\n",
	"There are 0 documents missing an ir\n",
	"There are 0 documents missing an it\n",
	"There are 0 documents missing an iff\n",
	"There are 0 documents missing an pba2_adc7\n"
	]
	}
	],
	"source": [
	"for device, devname in devices:\n",
	" query = generate_query(device, devname)\n",
	" #descriptor_collection.update_many(query, {'$set' : {f'data_keys.{device}.devname' : devname}}, upsert=False)\n",
	" print(f\"There are {descriptor_collection.count_documents(query)} documents missing an {devname}\")"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": []
	}
	],
	"metadata": {
	"kernelspec": {
	"display_name": "Python 3",
	"language": "python",
	"name": "python3"
	},
	"language_info": {
	"codemirror_mode": {
	"name": "ipython",
	"version": 3
	},
	"file_extension": ".py",
	"mimetype": "text/x-python",
	"name": "python",
	"nbconvert_exporter": "python",
	"pygments_lexer": "ipython3",
	"version": "3.6.6"
	}
	},
	"nbformat": 4,
	"nbformat_minor": 2
	}