February 1, 2017 19:29
diff --git a/fnc.ipynb b/fnc.ipynb
 {
  "cells": [
    {
      "metadata": {
        "trusted": true,
        "collapsed": true
      },
      "cell_type": "code",
      "source": "'''\nauthor: brian mcmahan\n\nparser and pandas data manager for the fake news challenge first's dataset: fnc1\n\nthanks to napsternxg to converting it to json first\n'''\nimport urllib\nimport pandas as pd\nimport ujson as json",
      "execution_count": 121,
      "outputs": []
    },
    {
      "metadata": {
        "trusted": true,
        "collapsed": true
      },
      "cell_type": "code",
      "source": "fpath = 'https://raw.githubusercontent.com/napsternxg/fnc-1/8836b0b51b0826435f59152013cced8950267595/train_combined.json'\nfnc1 = Dataset.from_jsonurl(fpath)",
      "execution_count": 111,
      "outputs": []
    },
    {
      "metadata": {
        "trusted": true,
        "collapsed": false
      },
      "cell_type": "code",
      "source": "print(fnc1)",
      "execution_count": 112,
      "outputs": [
        {
          "output_type": "stream",
          "text": "[FNC-I][][1683 datums]\n",
          "name": "stdout"
        }
      ]
    },
    {
      "metadata": {
        "trusted": true,
        "collapsed": false
      },
      "cell_type": "code",
      "source": "print(fnc1.mode)\nprint(fnc1[0])\nfnc1[1:15]",
      "execution_count": 113,
      "outputs": [
        {
          "output_type": "stream",
          "text": "list\n[Story0][36 stances]\n",
          "name": "stdout"
        },
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": "[[Story4][1 stances],\n [Story5][56 stances],\n [Story6][1 stances],\n [Story7][102 stances],\n [Story8][45 stances],\n [Story9][2 stances],\n [Story10][46 stances],\n [Story11][71 stances],\n [Story13][15 stances],\n [Story14][1 stances],\n [Story15][26 stances],\n [Story16][1 stances],\n [Story17][52 stances],\n [Story18][1 stances]]"
          },
          "metadata": {},
          "execution_count": 113
        }
      ]
    },
    {
      "metadata": {
        "trusted": true,
        "collapsed": false
      },
      "cell_type": "code",
      "source": "fnc1[100].agree",
      "execution_count": 114,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": "                                            headline  label\n0  'Nasa Confirms Earth Will Experience 6 Days of...  agree\n1  ’6 Days Darkness in December 2014′ 100% Fake; ...  agree",
            "text/html": "<div>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>headline</th>\n      <th>label</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>0</th>\n      <td>'Nasa Confirms Earth Will Experience 6 Days of...</td>\n      <td>agree</td>\n    </tr>\n    <tr>\n      <th>1</th>\n      <td>’6 Days Darkness in December 2014′ 100% Fake; ...</td>\n      <td>agree</td>\n    </tr>\n  </tbody>\n</table>\n</div>"
          },
          "metadata": {},
          "execution_count": 114
        }
      ]
    },
    {
      "metadata": {
        "trusted": true,
        "collapsed": false
      },
      "cell_type": "code",
      "source": "fnc1[104].discuss",
      "execution_count": 115,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": "                                            headline    label\n0  Heartbroken girl spends week in KFC after gett...  discuss\n1  Comfort eating? Chinese woman, 26, spends an e...  discuss",
            "text/html": "<div>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>headline</th>\n      <th>label</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>0</th>\n      <td>Heartbroken girl spends week in KFC after gett...</td>\n      <td>discuss</td>\n    </tr>\n    <tr>\n      <th>1</th>\n      <td>Comfort eating? Chinese woman, 26, spends an e...</td>\n      <td>discuss</td>\n    </tr>\n  </tbody>\n</table>\n</div>"
          },
          "metadata": {},
          "execution_count": 115
        }
      ]
    },
    {
      "metadata": {
        "trusted": true,
        "collapsed": false
      },
      "cell_type": "code",
      "source": "fnc1[155].disagree",
      "execution_count": 119,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": "                                            headline     label\n2  Batmobile Stolen From \"Batman v Superman: Dawn...  disagree",
            "text/html": "<div>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>headline</th>\n      <th>label</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>2</th>\n      <td>Batmobile Stolen From \"Batman v Superman: Dawn...</td>\n      <td>disagree</td>\n    </tr>\n  </tbody>\n</table>\n</div>"
          },
          "metadata": {},
          "execution_count": 119
        }
      ]
    },
    {
      "metadata": {
        "trusted": true,
        "collapsed": false
      },
      "cell_type": "code",
      "source": "class Story(object):\n    def __init__(self, body_id, article_body, stances):\n        self.body_id = body_id\n        self.article_body = article_body\n        self.stances = stances\n        \n    \n    @classmethod\n    def from_raw(cls, body_info, stances):\n        return cls(body_info['Body ID'], body_info['articleBody'], pd.DataFrame(stances))\n        \n    @property\n    def unrelated(self):\n        return self.stances[self.stances.label=='unrelated']\n\n    @property\n    def agree(self):\n        return self.stances[self.stances.label=='agree']\n    \n    @property\n    def disagree(self):\n        return self.stances[self.stances.label=='disagree']\n    \n    @property\n    def discuss(self):\n        return self.stances[self.stances.label=='discuss']\n        \n    def __str__(self):\n        return \"[Story{}][{} stances]\".format(self.body_id, len(self.stances))\n    \n    def __repr__(self):\n        return str(self)\n        \nclass Dataset(object):\n    def __init__(self, data):\n        self.data = data\n        self.split = \"\"\n        self.mode = \"list\"\n        self._index = [d.body_id for d in data.values()]\n\n    @classmethod\n    def from_dict(cls, data):\n        stances = {d['Body ID']:[] for d in data['bodies']}\n        for stance in data['stances']:\n            stances[stance['Body ID']].append({'headline': stance['Headline'],\n                                               'label': stance['Stance']})\n        bodies = {d['Body ID']:Story.from_raw(body_info=d, stances=stances[d['Body ID']]) \n                               for d in data['bodies']}\n        return cls(bodies)\n    \n    @classmethod\n    def from_jsonfile(cls, jsonfile):\n        with open(jsonfile) as fp:\n            return cls.from_dict(json.load(fp))\n        \n    @classmethod\n    def from_jsonurl(cls, jsonurl):\n        return cls.from_dict(json.load(urllib.request.urlopen(fpath)))\n    \n    def __getitem__(self, k):\n        if isinstance(k, int) and self.mode == \"list\":\n            return self.data[self._index[k]]\n        elif isinstance(k, int) and self.mode == \"dict\":\n            return self.data[k]\n        elif isinstance(k, slice):\n            assert self.mode == \"list\"\n            return [self.data[i] for i in self._index[k]]\n        else:\n            raise AttributeError(\"bad index or key\")\n        \n    def __len__(self):\n        return len(self.data)\n    \n    def __str__(self):\n        return \"[FNC-I][{}][{} datums]\".format(self.split, len(self.data))\n    \n    def __repr__(self):\n        return str(self)\n",
      "execution_count": 110,
      "outputs": []
    }
  ],
  "metadata": {
    "kernelspec": {
      "name": "conda-env-tf-py",
      "display_name": "Python [conda env:tf]",
      "language": "python"
    },
    "anaconda-cloud": {},
    "language_info": {
      "mimetype": "text/x-python",
      "nbconvert_exporter": "python",
      "name": "python",
      "pygments_lexer": "ipython3",
      "version": "3.5.2",
      "file_extension": ".py",
      "codemirror_mode": {
        "version": 3,
        "name": "ipython"
      }
    },
    "gist": {
      "id": "",
      "data": {
        "description": "FNC1 - Data Handling Resources",
        "public": true
      }
    }
  },
  "nbformat": 4,
  "nbformat_minor": 1
 }
	{
	"cells": [
	{
	"metadata": {
	"trusted": true,
	"collapsed": true
	},
	"cell_type": "code",
	"source": "'''\nauthor: brian mcmahan\n\nparser and pandas data manager for the fake news challenge first's dataset: fnc1\n\nthanks to napsternxg to converting it to json first\n'''\nimport urllib\nimport pandas as pd\nimport ujson as json",
	"execution_count": 121,
	"outputs": []
	},
	{
	"metadata": {
	"trusted": true,
	"collapsed": true
	},
	"cell_type": "code",
	"source": "fpath = 'https://raw.githubusercontent.com/napsternxg/fnc-1/8836b0b51b0826435f59152013cced8950267595/train_combined.json'\nfnc1 = Dataset.from_jsonurl(fpath)",
	"execution_count": 111,
	"outputs": []
	},
	{
	"metadata": {
	"trusted": true,
	"collapsed": false
	},
	"cell_type": "code",
	"source": "print(fnc1)",
	"execution_count": 112,
	"outputs": [
	{
	"output_type": "stream",
	"text": "[FNC-I][][1683 datums]\n",
	"name": "stdout"
	}
	]
	},
	{
	"metadata": {
	"trusted": true,
	"collapsed": false
	},
	"cell_type": "code",
	"source": "print(fnc1.mode)\nprint(fnc1[0])\nfnc1[1:15]",
	"execution_count": 113,
	"outputs": [
	{
	"output_type": "stream",
	"text": "list\n[Story0][36 stances]\n",
	"name": "stdout"
	},
	{
	"output_type": "execute_result",
	"data": {
	"text/plain": "[[Story4][1 stances],\n [Story5][56 stances],\n [Story6][1 stances],\n [Story7][102 stances],\n [Story8][45 stances],\n [Story9][2 stances],\n [Story10][46 stances],\n [Story11][71 stances],\n [Story13][15 stances],\n [Story14][1 stances],\n [Story15][26 stances],\n [Story16][1 stances],\n [Story17][52 stances],\n [Story18][1 stances]]"
	},
	"metadata": {},
	"execution_count": 113
	}
	]
	},
	{
	"metadata": {
	"trusted": true,
	"collapsed": false
	},
	"cell_type": "code",
	"source": "fnc1[100].agree",
	"execution_count": 114,
	"outputs": [
	{
	"output_type": "execute_result",
	"data": {
	"text/plain": " headline label\n0 'Nasa Confirms Earth Will Experience 6 Days of... agree\n1 ’6 Days Darkness in December 2014′ 100% Fake; ... agree",
	"text/html": "<div>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>headline</th>\n <th>label</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>'Nasa Confirms Earth Will Experience 6 Days of...</td>\n <td>agree</td>\n </tr>\n <tr>\n <th>1</th>\n <td>’6 Days Darkness in December 2014′ 100% Fake; ...</td>\n <td>agree</td>\n </tr>\n </tbody>\n</table>\n</div>"
	},
	"metadata": {},
	"execution_count": 114
	}
	]
	},
	{
	"metadata": {
	"trusted": true,
	"collapsed": false
	},
	"cell_type": "code",
	"source": "fnc1[104].discuss",
	"execution_count": 115,
	"outputs": [
	{
	"output_type": "execute_result",
	"data": {
	"text/plain": " headline label\n0 Heartbroken girl spends week in KFC after gett... discuss\n1 Comfort eating? Chinese woman, 26, spends an e... discuss",
	"text/html": "<div>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>headline</th>\n <th>label</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>Heartbroken girl spends week in KFC after gett...</td>\n <td>discuss</td>\n </tr>\n <tr>\n <th>1</th>\n <td>Comfort eating? Chinese woman, 26, spends an e...</td>\n <td>discuss</td>\n </tr>\n </tbody>\n</table>\n</div>"
	},
	"metadata": {},
	"execution_count": 115
	}
	]
	},
	{
	"metadata": {
	"trusted": true,
	"collapsed": false
	},
	"cell_type": "code",
	"source": "fnc1[155].disagree",
	"execution_count": 119,
	"outputs": [
	{
	"output_type": "execute_result",
	"data": {
	"text/plain": " headline label\n2 Batmobile Stolen From \"Batman v Superman: Dawn... disagree",
	"text/html": "<div>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>headline</th>\n <th>label</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>2</th>\n <td>Batmobile Stolen From \"Batman v Superman: Dawn...</td>\n <td>disagree</td>\n </tr>\n </tbody>\n</table>\n</div>"
	},
	"metadata": {},
	"execution_count": 119
	}
	]
	},
	{
	"metadata": {
	"trusted": true,
	"collapsed": false
	},
	"cell_type": "code",
	"source": "class Story(object):\n def __init__(self, body_id, article_body, stances):\n self.body_id = body_id\n self.article_body = article_body\n self.stances = stances\n \n \n @classmethod\n def from_raw(cls, body_info, stances):\n return cls(body_info['Body ID'], body_info['articleBody'], pd.DataFrame(stances))\n \n @property\n def unrelated(self):\n return self.stances[self.stances.label=='unrelated']\n\n @property\n def agree(self):\n return self.stances[self.stances.label=='agree']\n \n @property\n def disagree(self):\n return self.stances[self.stances.label=='disagree']\n \n @property\n def discuss(self):\n return self.stances[self.stances.label=='discuss']\n \n def __str__(self):\n return \"[Story{}][{} stances]\".format(self.body_id, len(self.stances))\n \n def __repr__(self):\n return str(self)\n \nclass Dataset(object):\n def __init__(self, data):\n self.data = data\n self.split = \"\"\n self.mode = \"list\"\n self._index = [d.body_id for d in data.values()]\n\n @classmethod\n def from_dict(cls, data):\n stances = {d['Body ID']:[] for d in data['bodies']}\n for stance in data['stances']:\n stances[stance['Body ID']].append({'headline': stance['Headline'],\n 'label': stance['Stance']})\n bodies = {d['Body ID']:Story.from_raw(body_info=d, stances=stances[d['Body ID']]) \n for d in data['bodies']}\n return cls(bodies)\n \n @classmethod\n def from_jsonfile(cls, jsonfile):\n with open(jsonfile) as fp:\n return cls.from_dict(json.load(fp))\n \n @classmethod\n def from_jsonurl(cls, jsonurl):\n return cls.from_dict(json.load(urllib.request.urlopen(fpath)))\n \n def __getitem__(self, k):\n if isinstance(k, int) and self.mode == \"list\":\n return self.data[self._index[k]]\n elif isinstance(k, int) and self.mode == \"dict\":\n return self.data[k]\n elif isinstance(k, slice):\n assert self.mode == \"list\"\n return [self.data[i] for i in self._index[k]]\n else:\n raise AttributeError(\"bad index or key\")\n \n def __len__(self):\n return len(self.data)\n \n def __str__(self):\n return \"[FNC-I][{}][{} datums]\".format(self.split, len(self.data))\n \n def __repr__(self):\n return str(self)\n",
	"execution_count": 110,
	"outputs": []
	}
	],
	"metadata": {
	"kernelspec": {
	"name": "conda-env-tf-py",
	"display_name": "Python [conda env:tf]",
	"language": "python"
	},
	"anaconda-cloud": {},
	"language_info": {
	"mimetype": "text/x-python",
	"nbconvert_exporter": "python",
	"name": "python",
	"pygments_lexer": "ipython3",
	"version": "3.5.2",
	"file_extension": ".py",
	"codemirror_mode": {
	"version": 3,
	"name": "ipython"
	}
	},
	"gist": {
	"id": "",
	"data": {
	"description": "FNC1 - Data Handling Resources",
	"public": true
	}
	}
	},
	"nbformat": 4,
	"nbformat_minor": 1
	}