Last active
July 27, 2020 08:52
-
-
Save DahlitzFlorian/0db14b9dfbf4fefaafa58f0a4d6d63f0 to your computer and use it in GitHub Desktop.
Abstractive-based Text Summarization Using T5
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"nbformat": 4, | |
"nbformat_minor": 0, | |
"metadata": { | |
"colab": { | |
"name": "Untitled2.ipynb", | |
"provenance": [], | |
"collapsed_sections": [] | |
}, | |
"kernelspec": { | |
"name": "python3", | |
"display_name": "Python 3" | |
}, | |
"widgets": { | |
"application/vnd.jupyter.widget-state+json": { | |
"6c0c65a095e64277b92e369a59b2b613": { | |
"model_module": "@jupyter-widgets/controls", | |
"model_name": "HBoxModel", | |
"state": { | |
"_view_name": "HBoxView", | |
"_dom_classes": [], | |
"_model_name": "HBoxModel", | |
"_view_module": "@jupyter-widgets/controls", | |
"_model_module_version": "1.5.0", | |
"_view_count": null, | |
"_view_module_version": "1.5.0", | |
"box_style": "", | |
"layout": "IPY_MODEL_6a0ab95152804558beb004fc706cb6a1", | |
"_model_module": "@jupyter-widgets/controls", | |
"children": [ | |
"IPY_MODEL_a35f4ae732b14095b68e28b36fa83a86", | |
"IPY_MODEL_8adb88feb0284b59a01fa33916bd9075" | |
] | |
} | |
}, | |
"6a0ab95152804558beb004fc706cb6a1": { | |
"model_module": "@jupyter-widgets/base", | |
"model_name": "LayoutModel", | |
"state": { | |
"_view_name": "LayoutView", | |
"grid_template_rows": null, | |
"right": null, | |
"justify_content": null, | |
"_view_module": "@jupyter-widgets/base", | |
"overflow": null, | |
"_model_module_version": "1.2.0", | |
"_view_count": null, | |
"flex_flow": null, | |
"width": null, | |
"min_width": null, | |
"border": null, | |
"align_items": null, | |
"bottom": null, | |
"_model_module": "@jupyter-widgets/base", | |
"top": null, | |
"grid_column": null, | |
"overflow_y": null, | |
"overflow_x": null, | |
"grid_auto_flow": null, | |
"grid_area": null, | |
"grid_template_columns": null, | |
"flex": null, | |
"_model_name": "LayoutModel", | |
"justify_items": null, | |
"grid_row": null, | |
"max_height": null, | |
"align_content": null, | |
"visibility": null, | |
"align_self": null, | |
"height": null, | |
"min_height": null, | |
"padding": null, | |
"grid_auto_rows": null, | |
"grid_gap": null, | |
"max_width": null, | |
"order": null, | |
"_view_module_version": "1.2.0", | |
"grid_template_areas": null, | |
"object_position": null, | |
"object_fit": null, | |
"grid_auto_columns": null, | |
"margin": null, | |
"display": null, | |
"left": null | |
} | |
}, | |
"a35f4ae732b14095b68e28b36fa83a86": { | |
"model_module": "@jupyter-widgets/controls", | |
"model_name": "FloatProgressModel", | |
"state": { | |
"_view_name": "ProgressView", | |
"style": "IPY_MODEL_62566d2322f24e288d771872a65fb406", | |
"_dom_classes": [], | |
"description": "Downloading: 100%", | |
"_model_name": "FloatProgressModel", | |
"bar_style": "success", | |
"max": 1197, | |
"_view_module": "@jupyter-widgets/controls", | |
"_model_module_version": "1.5.0", | |
"value": 1197, | |
"_view_count": null, | |
"_view_module_version": "1.5.0", | |
"orientation": "horizontal", | |
"min": 0, | |
"description_tooltip": null, | |
"_model_module": "@jupyter-widgets/controls", | |
"layout": "IPY_MODEL_eb508479abd44565963b03f40fb850e1" | |
} | |
}, | |
"8adb88feb0284b59a01fa33916bd9075": { | |
"model_module": "@jupyter-widgets/controls", | |
"model_name": "HTMLModel", | |
"state": { | |
"_view_name": "HTMLView", | |
"style": "IPY_MODEL_fe2cb24aa73c4ad480a66beee4c0712c", | |
"_dom_classes": [], | |
"description": "", | |
"_model_name": "HTMLModel", | |
"placeholder": "", | |
"_view_module": "@jupyter-widgets/controls", | |
"_model_module_version": "1.5.0", | |
"value": " 1.20k/1.20k [00:00<00:00, 3.47kB/s]", | |
"_view_count": null, | |
"_view_module_version": "1.5.0", | |
"description_tooltip": null, | |
"_model_module": "@jupyter-widgets/controls", | |
"layout": "IPY_MODEL_3aa70289854948a4a32414fba2859407" | |
} | |
}, | |
"62566d2322f24e288d771872a65fb406": { | |
"model_module": "@jupyter-widgets/controls", | |
"model_name": "ProgressStyleModel", | |
"state": { | |
"_view_name": "StyleView", | |
"_model_name": "ProgressStyleModel", | |
"description_width": "initial", | |
"_view_module": "@jupyter-widgets/base", | |
"_model_module_version": "1.5.0", | |
"_view_count": null, | |
"_view_module_version": "1.2.0", | |
"bar_color": null, | |
"_model_module": "@jupyter-widgets/controls" | |
} | |
}, | |
"eb508479abd44565963b03f40fb850e1": { | |
"model_module": "@jupyter-widgets/base", | |
"model_name": "LayoutModel", | |
"state": { | |
"_view_name": "LayoutView", | |
"grid_template_rows": null, | |
"right": null, | |
"justify_content": null, | |
"_view_module": "@jupyter-widgets/base", | |
"overflow": null, | |
"_model_module_version": "1.2.0", | |
"_view_count": null, | |
"flex_flow": null, | |
"width": null, | |
"min_width": null, | |
"border": null, | |
"align_items": null, | |
"bottom": null, | |
"_model_module": "@jupyter-widgets/base", | |
"top": null, | |
"grid_column": null, | |
"overflow_y": null, | |
"overflow_x": null, | |
"grid_auto_flow": null, | |
"grid_area": null, | |
"grid_template_columns": null, | |
"flex": null, | |
"_model_name": "LayoutModel", | |
"justify_items": null, | |
"grid_row": null, | |
"max_height": null, | |
"align_content": null, | |
"visibility": null, | |
"align_self": null, | |
"height": null, | |
"min_height": null, | |
"padding": null, | |
"grid_auto_rows": null, | |
"grid_gap": null, | |
"max_width": null, | |
"order": null, | |
"_view_module_version": "1.2.0", | |
"grid_template_areas": null, | |
"object_position": null, | |
"object_fit": null, | |
"grid_auto_columns": null, | |
"margin": null, | |
"display": null, | |
"left": null | |
} | |
}, | |
"fe2cb24aa73c4ad480a66beee4c0712c": { | |
"model_module": "@jupyter-widgets/controls", | |
"model_name": "DescriptionStyleModel", | |
"state": { | |
"_view_name": "StyleView", | |
"_model_name": "DescriptionStyleModel", | |
"description_width": "", | |
"_view_module": "@jupyter-widgets/base", | |
"_model_module_version": "1.5.0", | |
"_view_count": null, | |
"_view_module_version": "1.2.0", | |
"_model_module": "@jupyter-widgets/controls" | |
} | |
}, | |
"3aa70289854948a4a32414fba2859407": { | |
"model_module": "@jupyter-widgets/base", | |
"model_name": "LayoutModel", | |
"state": { | |
"_view_name": "LayoutView", | |
"grid_template_rows": null, | |
"right": null, | |
"justify_content": null, | |
"_view_module": "@jupyter-widgets/base", | |
"overflow": null, | |
"_model_module_version": "1.2.0", | |
"_view_count": null, | |
"flex_flow": null, | |
"width": null, | |
"min_width": null, | |
"border": null, | |
"align_items": null, | |
"bottom": null, | |
"_model_module": "@jupyter-widgets/base", | |
"top": null, | |
"grid_column": null, | |
"overflow_y": null, | |
"overflow_x": null, | |
"grid_auto_flow": null, | |
"grid_area": null, | |
"grid_template_columns": null, | |
"flex": null, | |
"_model_name": "LayoutModel", | |
"justify_items": null, | |
"grid_row": null, | |
"max_height": null, | |
"align_content": null, | |
"visibility": null, | |
"align_self": null, | |
"height": null, | |
"min_height": null, | |
"padding": null, | |
"grid_auto_rows": null, | |
"grid_gap": null, | |
"max_width": null, | |
"order": null, | |
"_view_module_version": "1.2.0", | |
"grid_template_areas": null, | |
"object_position": null, | |
"object_fit": null, | |
"grid_auto_columns": null, | |
"margin": null, | |
"display": null, | |
"left": null | |
} | |
}, | |
"e3e74fcc9cc24c7a8121f16d3b5739c4": { | |
"model_module": "@jupyter-widgets/controls", | |
"model_name": "HBoxModel", | |
"state": { | |
"_view_name": "HBoxView", | |
"_dom_classes": [], | |
"_model_name": "HBoxModel", | |
"_view_module": "@jupyter-widgets/controls", | |
"_model_module_version": "1.5.0", | |
"_view_count": null, | |
"_view_module_version": "1.5.0", | |
"box_style": "", | |
"layout": "IPY_MODEL_181b60d13342427db075b89d1c4e41ae", | |
"_model_module": "@jupyter-widgets/controls", | |
"children": [ | |
"IPY_MODEL_79d2db87a96147c89f0c7cfee9434586", | |
"IPY_MODEL_5f2d69cdcb9044d08a4b3612c5e2fdab" | |
] | |
} | |
}, | |
"181b60d13342427db075b89d1c4e41ae": { | |
"model_module": "@jupyter-widgets/base", | |
"model_name": "LayoutModel", | |
"state": { | |
"_view_name": "LayoutView", | |
"grid_template_rows": null, | |
"right": null, | |
"justify_content": null, | |
"_view_module": "@jupyter-widgets/base", | |
"overflow": null, | |
"_model_module_version": "1.2.0", | |
"_view_count": null, | |
"flex_flow": null, | |
"width": null, | |
"min_width": null, | |
"border": null, | |
"align_items": null, | |
"bottom": null, | |
"_model_module": "@jupyter-widgets/base", | |
"top": null, | |
"grid_column": null, | |
"overflow_y": null, | |
"overflow_x": null, | |
"grid_auto_flow": null, | |
"grid_area": null, | |
"grid_template_columns": null, | |
"flex": null, | |
"_model_name": "LayoutModel", | |
"justify_items": null, | |
"grid_row": null, | |
"max_height": null, | |
"align_content": null, | |
"visibility": null, | |
"align_self": null, | |
"height": null, | |
"min_height": null, | |
"padding": null, | |
"grid_auto_rows": null, | |
"grid_gap": null, | |
"max_width": null, | |
"order": null, | |
"_view_module_version": "1.2.0", | |
"grid_template_areas": null, | |
"object_position": null, | |
"object_fit": null, | |
"grid_auto_columns": null, | |
"margin": null, | |
"display": null, | |
"left": null | |
} | |
}, | |
"79d2db87a96147c89f0c7cfee9434586": { | |
"model_module": "@jupyter-widgets/controls", | |
"model_name": "FloatProgressModel", | |
"state": { | |
"_view_name": "ProgressView", | |
"style": "IPY_MODEL_51563a0ecf894795a1c6ae5395627550", | |
"_dom_classes": [], | |
"description": "Downloading: 100%", | |
"_model_name": "FloatProgressModel", | |
"bar_style": "success", | |
"max": 242065649, | |
"_view_module": "@jupyter-widgets/controls", | |
"_model_module_version": "1.5.0", | |
"value": 242065649, | |
"_view_count": null, | |
"_view_module_version": "1.5.0", | |
"orientation": "horizontal", | |
"min": 0, | |
"description_tooltip": null, | |
"_model_module": "@jupyter-widgets/controls", | |
"layout": "IPY_MODEL_258787ecfa794cc195715e287896bc02" | |
} | |
}, | |
"5f2d69cdcb9044d08a4b3612c5e2fdab": { | |
"model_module": "@jupyter-widgets/controls", | |
"model_name": "HTMLModel", | |
"state": { | |
"_view_name": "HTMLView", | |
"style": "IPY_MODEL_fa79ef6c138148a5912d4828f0efb7d1", | |
"_dom_classes": [], | |
"description": "", | |
"_model_name": "HTMLModel", | |
"placeholder": "", | |
"_view_module": "@jupyter-widgets/controls", | |
"_model_module_version": "1.5.0", | |
"value": " 242M/242M [00:05<00:00, 47.8MB/s]", | |
"_view_count": null, | |
"_view_module_version": "1.5.0", | |
"description_tooltip": null, | |
"_model_module": "@jupyter-widgets/controls", | |
"layout": "IPY_MODEL_4b260518d4494b1680717f5689b06244" | |
} | |
}, | |
"51563a0ecf894795a1c6ae5395627550": { | |
"model_module": "@jupyter-widgets/controls", | |
"model_name": "ProgressStyleModel", | |
"state": { | |
"_view_name": "StyleView", | |
"_model_name": "ProgressStyleModel", | |
"description_width": "initial", | |
"_view_module": "@jupyter-widgets/base", | |
"_model_module_version": "1.5.0", | |
"_view_count": null, | |
"_view_module_version": "1.2.0", | |
"bar_color": null, | |
"_model_module": "@jupyter-widgets/controls" | |
} | |
}, | |
"258787ecfa794cc195715e287896bc02": { | |
"model_module": "@jupyter-widgets/base", | |
"model_name": "LayoutModel", | |
"state": { | |
"_view_name": "LayoutView", | |
"grid_template_rows": null, | |
"right": null, | |
"justify_content": null, | |
"_view_module": "@jupyter-widgets/base", | |
"overflow": null, | |
"_model_module_version": "1.2.0", | |
"_view_count": null, | |
"flex_flow": null, | |
"width": null, | |
"min_width": null, | |
"border": null, | |
"align_items": null, | |
"bottom": null, | |
"_model_module": "@jupyter-widgets/base", | |
"top": null, | |
"grid_column": null, | |
"overflow_y": null, | |
"overflow_x": null, | |
"grid_auto_flow": null, | |
"grid_area": null, | |
"grid_template_columns": null, | |
"flex": null, | |
"_model_name": "LayoutModel", | |
"justify_items": null, | |
"grid_row": null, | |
"max_height": null, | |
"align_content": null, | |
"visibility": null, | |
"align_self": null, | |
"height": null, | |
"min_height": null, | |
"padding": null, | |
"grid_auto_rows": null, | |
"grid_gap": null, | |
"max_width": null, | |
"order": null, | |
"_view_module_version": "1.2.0", | |
"grid_template_areas": null, | |
"object_position": null, | |
"object_fit": null, | |
"grid_auto_columns": null, | |
"margin": null, | |
"display": null, | |
"left": null | |
} | |
}, | |
"fa79ef6c138148a5912d4828f0efb7d1": { | |
"model_module": "@jupyter-widgets/controls", | |
"model_name": "DescriptionStyleModel", | |
"state": { | |
"_view_name": "StyleView", | |
"_model_name": "DescriptionStyleModel", | |
"description_width": "", | |
"_view_module": "@jupyter-widgets/base", | |
"_model_module_version": "1.5.0", | |
"_view_count": null, | |
"_view_module_version": "1.2.0", | |
"_model_module": "@jupyter-widgets/controls" | |
} | |
}, | |
"4b260518d4494b1680717f5689b06244": { | |
"model_module": "@jupyter-widgets/base", | |
"model_name": "LayoutModel", | |
"state": { | |
"_view_name": "LayoutView", | |
"grid_template_rows": null, | |
"right": null, | |
"justify_content": null, | |
"_view_module": "@jupyter-widgets/base", | |
"overflow": null, | |
"_model_module_version": "1.2.0", | |
"_view_count": null, | |
"flex_flow": null, | |
"width": null, | |
"min_width": null, | |
"border": null, | |
"align_items": null, | |
"bottom": null, | |
"_model_module": "@jupyter-widgets/base", | |
"top": null, | |
"grid_column": null, | |
"overflow_y": null, | |
"overflow_x": null, | |
"grid_auto_flow": null, | |
"grid_area": null, | |
"grid_template_columns": null, | |
"flex": null, | |
"_model_name": "LayoutModel", | |
"justify_items": null, | |
"grid_row": null, | |
"max_height": null, | |
"align_content": null, | |
"visibility": null, | |
"align_self": null, | |
"height": null, | |
"min_height": null, | |
"padding": null, | |
"grid_auto_rows": null, | |
"grid_gap": null, | |
"max_width": null, | |
"order": null, | |
"_view_module_version": "1.2.0", | |
"grid_template_areas": null, | |
"object_position": null, | |
"object_fit": null, | |
"grid_auto_columns": null, | |
"margin": null, | |
"display": null, | |
"left": null | |
} | |
}, | |
"1c16f39f7b604be6aa808860ceb34b10": { | |
"model_module": "@jupyter-widgets/controls", | |
"model_name": "HBoxModel", | |
"state": { | |
"_view_name": "HBoxView", | |
"_dom_classes": [], | |
"_model_name": "HBoxModel", | |
"_view_module": "@jupyter-widgets/controls", | |
"_model_module_version": "1.5.0", | |
"_view_count": null, | |
"_view_module_version": "1.5.0", | |
"box_style": "", | |
"layout": "IPY_MODEL_29aac4695f33405b8060137447c628e8", | |
"_model_module": "@jupyter-widgets/controls", | |
"children": [ | |
"IPY_MODEL_f0694ea533554cdf9e2d9a4ba7620c14", | |
"IPY_MODEL_cdc9b13c9c7749cabcce961d5ff52f6a" | |
] | |
} | |
}, | |
"29aac4695f33405b8060137447c628e8": { | |
"model_module": "@jupyter-widgets/base", | |
"model_name": "LayoutModel", | |
"state": { | |
"_view_name": "LayoutView", | |
"grid_template_rows": null, | |
"right": null, | |
"justify_content": null, | |
"_view_module": "@jupyter-widgets/base", | |
"overflow": null, | |
"_model_module_version": "1.2.0", | |
"_view_count": null, | |
"flex_flow": null, | |
"width": null, | |
"min_width": null, | |
"border": null, | |
"align_items": null, | |
"bottom": null, | |
"_model_module": "@jupyter-widgets/base", | |
"top": null, | |
"grid_column": null, | |
"overflow_y": null, | |
"overflow_x": null, | |
"grid_auto_flow": null, | |
"grid_area": null, | |
"grid_template_columns": null, | |
"flex": null, | |
"_model_name": "LayoutModel", | |
"justify_items": null, | |
"grid_row": null, | |
"max_height": null, | |
"align_content": null, | |
"visibility": null, | |
"align_self": null, | |
"height": null, | |
"min_height": null, | |
"padding": null, | |
"grid_auto_rows": null, | |
"grid_gap": null, | |
"max_width": null, | |
"order": null, | |
"_view_module_version": "1.2.0", | |
"grid_template_areas": null, | |
"object_position": null, | |
"object_fit": null, | |
"grid_auto_columns": null, | |
"margin": null, | |
"display": null, | |
"left": null | |
} | |
}, | |
"f0694ea533554cdf9e2d9a4ba7620c14": { | |
"model_module": "@jupyter-widgets/controls", | |
"model_name": "FloatProgressModel", | |
"state": { | |
"_view_name": "ProgressView", | |
"style": "IPY_MODEL_333213a4b69b4a93b98d0d9175ca550e", | |
"_dom_classes": [], | |
"description": "Downloading: 100%", | |
"_model_name": "FloatProgressModel", | |
"bar_style": "success", | |
"max": 791656, | |
"_view_module": "@jupyter-widgets/controls", | |
"_model_module_version": "1.5.0", | |
"value": 791656, | |
"_view_count": null, | |
"_view_module_version": "1.5.0", | |
"orientation": "horizontal", | |
"min": 0, | |
"description_tooltip": null, | |
"_model_module": "@jupyter-widgets/controls", | |
"layout": "IPY_MODEL_26e8097b43f84472b073624e90d94358" | |
} | |
}, | |
"cdc9b13c9c7749cabcce961d5ff52f6a": { | |
"model_module": "@jupyter-widgets/controls", | |
"model_name": "HTMLModel", | |
"state": { | |
"_view_name": "HTMLView", | |
"style": "IPY_MODEL_b89ae8937da64dc3b652c7919f4695d4", | |
"_dom_classes": [], | |
"description": "", | |
"_model_name": "HTMLModel", | |
"placeholder": "", | |
"_view_module": "@jupyter-widgets/controls", | |
"_model_module_version": "1.5.0", | |
"value": " 792k/792k [00:03<00:00, 242kB/s]", | |
"_view_count": null, | |
"_view_module_version": "1.5.0", | |
"description_tooltip": null, | |
"_model_module": "@jupyter-widgets/controls", | |
"layout": "IPY_MODEL_29385323a0dc46de957ec53ce95eb156" | |
} | |
}, | |
"333213a4b69b4a93b98d0d9175ca550e": { | |
"model_module": "@jupyter-widgets/controls", | |
"model_name": "ProgressStyleModel", | |
"state": { | |
"_view_name": "StyleView", | |
"_model_name": "ProgressStyleModel", | |
"description_width": "initial", | |
"_view_module": "@jupyter-widgets/base", | |
"_model_module_version": "1.5.0", | |
"_view_count": null, | |
"_view_module_version": "1.2.0", | |
"bar_color": null, | |
"_model_module": "@jupyter-widgets/controls" | |
} | |
}, | |
"26e8097b43f84472b073624e90d94358": { | |
"model_module": "@jupyter-widgets/base", | |
"model_name": "LayoutModel", | |
"state": { | |
"_view_name": "LayoutView", | |
"grid_template_rows": null, | |
"right": null, | |
"justify_content": null, | |
"_view_module": "@jupyter-widgets/base", | |
"overflow": null, | |
"_model_module_version": "1.2.0", | |
"_view_count": null, | |
"flex_flow": null, | |
"width": null, | |
"min_width": null, | |
"border": null, | |
"align_items": null, | |
"bottom": null, | |
"_model_module": "@jupyter-widgets/base", | |
"top": null, | |
"grid_column": null, | |
"overflow_y": null, | |
"overflow_x": null, | |
"grid_auto_flow": null, | |
"grid_area": null, | |
"grid_template_columns": null, | |
"flex": null, | |
"_model_name": "LayoutModel", | |
"justify_items": null, | |
"grid_row": null, | |
"max_height": null, | |
"align_content": null, | |
"visibility": null, | |
"align_self": null, | |
"height": null, | |
"min_height": null, | |
"padding": null, | |
"grid_auto_rows": null, | |
"grid_gap": null, | |
"max_width": null, | |
"order": null, | |
"_view_module_version": "1.2.0", | |
"grid_template_areas": null, | |
"object_position": null, | |
"object_fit": null, | |
"grid_auto_columns": null, | |
"margin": null, | |
"display": null, | |
"left": null | |
} | |
}, | |
"b89ae8937da64dc3b652c7919f4695d4": { | |
"model_module": "@jupyter-widgets/controls", | |
"model_name": "DescriptionStyleModel", | |
"state": { | |
"_view_name": "StyleView", | |
"_model_name": "DescriptionStyleModel", | |
"description_width": "", | |
"_view_module": "@jupyter-widgets/base", | |
"_model_module_version": "1.5.0", | |
"_view_count": null, | |
"_view_module_version": "1.2.0", | |
"_model_module": "@jupyter-widgets/controls" | |
} | |
}, | |
"29385323a0dc46de957ec53ce95eb156": { | |
"model_module": "@jupyter-widgets/base", | |
"model_name": "LayoutModel", | |
"state": { | |
"_view_name": "LayoutView", | |
"grid_template_rows": null, | |
"right": null, | |
"justify_content": null, | |
"_view_module": "@jupyter-widgets/base", | |
"overflow": null, | |
"_model_module_version": "1.2.0", | |
"_view_count": null, | |
"flex_flow": null, | |
"width": null, | |
"min_width": null, | |
"border": null, | |
"align_items": null, | |
"bottom": null, | |
"_model_module": "@jupyter-widgets/base", | |
"top": null, | |
"grid_column": null, | |
"overflow_y": null, | |
"overflow_x": null, | |
"grid_auto_flow": null, | |
"grid_area": null, | |
"grid_template_columns": null, | |
"flex": null, | |
"_model_name": "LayoutModel", | |
"justify_items": null, | |
"grid_row": null, | |
"max_height": null, | |
"align_content": null, | |
"visibility": null, | |
"align_self": null, | |
"height": null, | |
"min_height": null, | |
"padding": null, | |
"grid_auto_rows": null, | |
"grid_gap": null, | |
"max_width": null, | |
"order": null, | |
"_view_module_version": "1.2.0", | |
"grid_template_areas": null, | |
"object_position": null, | |
"object_fit": null, | |
"grid_auto_columns": null, | |
"margin": null, | |
"display": null, | |
"left": null | |
} | |
} | |
} | |
} | |
}, | |
"cells": [ | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "wWvP3oKfeJL1", | |
"colab_type": "text" | |
}, | |
"source": [ | |
"# Abstractive Text Summarization using Google's T5" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "tvMUfShXeFHx", | |
"colab_type": "text" | |
}, | |
"source": [ | |
"## Install dependencies" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "m7dHfJphBy3s", | |
"colab_type": "code", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 816 | |
}, | |
"outputId": "492411d9-df16-4442-9efa-4a9fca51a890" | |
}, | |
"source": [ | |
"!pip install transformers==2.8.0\n", | |
"!pip install torch==1.4.0" | |
], | |
"execution_count": 1, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": [ | |
"Collecting transformers==2.8.0\n", | |
"\u001b[?25l Downloading https://files.pythonhosted.org/packages/a3/78/92cedda05552398352ed9784908b834ee32a0bd071a9b32de287327370b7/transformers-2.8.0-py3-none-any.whl (563kB)\n", | |
"\u001b[K |████████████████████████████████| 573kB 2.7MB/s \n", | |
"\u001b[?25hRequirement already satisfied: requests in /usr/local/lib/python3.6/dist-packages (from transformers==2.8.0) (2.23.0)\n", | |
"Collecting sentencepiece\n", | |
"\u001b[?25l Downloading https://files.pythonhosted.org/packages/d4/a4/d0a884c4300004a78cca907a6ff9a5e9fe4f090f5d95ab341c53d28cbc58/sentencepiece-0.1.91-cp36-cp36m-manylinux1_x86_64.whl (1.1MB)\n", | |
"\u001b[K |████████████████████████████████| 1.1MB 43.1MB/s \n", | |
"\u001b[?25hRequirement already satisfied: tqdm>=4.27 in /usr/local/lib/python3.6/dist-packages (from transformers==2.8.0) (4.41.1)\n", | |
"Requirement already satisfied: filelock in /usr/local/lib/python3.6/dist-packages (from transformers==2.8.0) (3.0.12)\n", | |
"Requirement already satisfied: numpy in /usr/local/lib/python3.6/dist-packages (from transformers==2.8.0) (1.18.5)\n", | |
"Collecting tokenizers==0.5.2\n", | |
"\u001b[?25l Downloading https://files.pythonhosted.org/packages/d1/3f/73c881ea4723e43c1e9acf317cf407fab3a278daab3a69c98dcac511c04f/tokenizers-0.5.2-cp36-cp36m-manylinux1_x86_64.whl (3.7MB)\n", | |
"\u001b[K |████████████████████████████████| 3.7MB 36.3MB/s \n", | |
"\u001b[?25hRequirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.6/dist-packages (from transformers==2.8.0) (2019.12.20)\n", | |
"Collecting sacremoses\n", | |
"\u001b[?25l Downloading https://files.pythonhosted.org/packages/7d/34/09d19aff26edcc8eb2a01bed8e98f13a1537005d31e95233fd48216eed10/sacremoses-0.0.43.tar.gz (883kB)\n", | |
"\u001b[K |████████████████████████████████| 890kB 45.6MB/s \n", | |
"\u001b[?25hRequirement already satisfied: boto3 in /usr/local/lib/python3.6/dist-packages (from transformers==2.8.0) (1.14.24)\n", | |
"Requirement already satisfied: dataclasses; python_version < \"3.7\" in /usr/local/lib/python3.6/dist-packages (from transformers==2.8.0) (0.7)\n", | |
"Requirement already satisfied: chardet<4,>=3.0.2 in /usr/local/lib/python3.6/dist-packages (from requests->transformers==2.8.0) (3.0.4)\n", | |
"Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.6/dist-packages (from requests->transformers==2.8.0) (2020.6.20)\n", | |
"Requirement already satisfied: urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1 in /usr/local/lib/python3.6/dist-packages (from requests->transformers==2.8.0) (1.24.3)\n", | |
"Requirement already satisfied: idna<3,>=2.5 in /usr/local/lib/python3.6/dist-packages (from requests->transformers==2.8.0) (2.10)\n", | |
"Requirement already satisfied: six in /usr/local/lib/python3.6/dist-packages (from sacremoses->transformers==2.8.0) (1.15.0)\n", | |
"Requirement already satisfied: click in /usr/local/lib/python3.6/dist-packages (from sacremoses->transformers==2.8.0) (7.1.2)\n", | |
"Requirement already satisfied: joblib in /usr/local/lib/python3.6/dist-packages (from sacremoses->transformers==2.8.0) (0.16.0)\n", | |
"Requirement already satisfied: botocore<1.18.0,>=1.17.24 in /usr/local/lib/python3.6/dist-packages (from boto3->transformers==2.8.0) (1.17.24)\n", | |
"Requirement already satisfied: s3transfer<0.4.0,>=0.3.0 in /usr/local/lib/python3.6/dist-packages (from boto3->transformers==2.8.0) (0.3.3)\n", | |
"Requirement already satisfied: jmespath<1.0.0,>=0.7.1 in /usr/local/lib/python3.6/dist-packages (from boto3->transformers==2.8.0) (0.10.0)\n", | |
"Requirement already satisfied: docutils<0.16,>=0.10 in /usr/local/lib/python3.6/dist-packages (from botocore<1.18.0,>=1.17.24->boto3->transformers==2.8.0) (0.15.2)\n", | |
"Requirement already satisfied: python-dateutil<3.0.0,>=2.1 in /usr/local/lib/python3.6/dist-packages (from botocore<1.18.0,>=1.17.24->boto3->transformers==2.8.0) (2.8.1)\n", | |
"Building wheels for collected packages: sacremoses\n", | |
" Building wheel for sacremoses (setup.py) ... \u001b[?25l\u001b[?25hdone\n", | |
" Created wheel for sacremoses: filename=sacremoses-0.0.43-cp36-none-any.whl size=893260 sha256=99bcdca21b2f098208cb3a206e6fcf2cbaff3029a9527d896fd73012d26fb36b\n", | |
" Stored in directory: /root/.cache/pip/wheels/29/3c/fd/7ce5c3f0666dab31a50123635e6fb5e19ceb42ce38d4e58f45\n", | |
"Successfully built sacremoses\n", | |
"Installing collected packages: sentencepiece, tokenizers, sacremoses, transformers\n", | |
"Successfully installed sacremoses-0.0.43 sentencepiece-0.1.91 tokenizers-0.5.2 transformers-2.8.0\n", | |
"Collecting torch==1.4.0\n", | |
"\u001b[?25l Downloading https://files.pythonhosted.org/packages/24/19/4804aea17cd136f1705a5e98a00618cb8f6ccc375ad8bfa437408e09d058/torch-1.4.0-cp36-cp36m-manylinux1_x86_64.whl (753.4MB)\n", | |
"\u001b[K |████████████████████████████████| 753.4MB 17kB/s \n", | |
"\u001b[31mERROR: torchvision 0.6.1+cu101 has requirement torch==1.5.1, but you'll have torch 1.4.0 which is incompatible.\u001b[0m\n", | |
"\u001b[?25hInstalling collected packages: torch\n", | |
" Found existing installation: torch 1.5.1+cu101\n", | |
" Uninstalling torch-1.5.1+cu101:\n", | |
" Successfully uninstalled torch-1.5.1+cu101\n", | |
"Successfully installed torch-1.4.0\n" | |
], | |
"name": "stdout" | |
} | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "yUwLbv3ieSEE", | |
"colab_type": "text" | |
}, | |
"source": [ | |
"## Import modules and packages" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "0mF3A049eZ1z", | |
"colab_type": "code", | |
"colab": {} | |
}, | |
"source": [ | |
"import torch\n", | |
"\n", | |
"from transformers import T5ForConditionalGeneration\n", | |
"from transformers import T5Tokenizer" | |
], | |
"execution_count": 2, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "7E4TUBYpeqWY", | |
"colab_type": "text" | |
}, | |
"source": [ | |
"## Define the model" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "uSRxw1JPetob", | |
"colab_type": "code", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 164, | |
"referenced_widgets": [ | |
"6c0c65a095e64277b92e369a59b2b613", | |
"6a0ab95152804558beb004fc706cb6a1", | |
"a35f4ae732b14095b68e28b36fa83a86", | |
"8adb88feb0284b59a01fa33916bd9075", | |
"62566d2322f24e288d771872a65fb406", | |
"eb508479abd44565963b03f40fb850e1", | |
"fe2cb24aa73c4ad480a66beee4c0712c", | |
"3aa70289854948a4a32414fba2859407", | |
"e3e74fcc9cc24c7a8121f16d3b5739c4", | |
"181b60d13342427db075b89d1c4e41ae", | |
"79d2db87a96147c89f0c7cfee9434586", | |
"5f2d69cdcb9044d08a4b3612c5e2fdab", | |
"51563a0ecf894795a1c6ae5395627550", | |
"258787ecfa794cc195715e287896bc02", | |
"fa79ef6c138148a5912d4828f0efb7d1", | |
"4b260518d4494b1680717f5689b06244", | |
"1c16f39f7b604be6aa808860ceb34b10", | |
"29aac4695f33405b8060137447c628e8", | |
"f0694ea533554cdf9e2d9a4ba7620c14", | |
"cdc9b13c9c7749cabcce961d5ff52f6a", | |
"333213a4b69b4a93b98d0d9175ca550e", | |
"26e8097b43f84472b073624e90d94358", | |
"b89ae8937da64dc3b652c7919f4695d4", | |
"29385323a0dc46de957ec53ce95eb156" | |
] | |
}, | |
"outputId": "58c9062d-cc24-4c87-cc12-3ea8ef974397" | |
}, | |
"source": [ | |
"model = T5ForConditionalGeneration.from_pretrained('t5-small')\n", | |
"tokenizer = T5Tokenizer.from_pretrained('t5-small')\n", | |
"device = torch.device('cpu')" | |
], | |
"execution_count": 3, | |
"outputs": [ | |
{ | |
"output_type": "display_data", | |
"data": { | |
"application/vnd.jupyter.widget-view+json": { | |
"model_id": "6c0c65a095e64277b92e369a59b2b613", | |
"version_minor": 0, | |
"version_major": 2 | |
}, | |
"text/plain": [ | |
"HBox(children=(FloatProgress(value=0.0, description='Downloading', max=1197.0, style=ProgressStyle(description…" | |
] | |
}, | |
"metadata": { | |
"tags": [] | |
} | |
}, | |
{ | |
"output_type": "stream", | |
"text": [ | |
"\n" | |
], | |
"name": "stdout" | |
}, | |
{ | |
"output_type": "display_data", | |
"data": { | |
"application/vnd.jupyter.widget-view+json": { | |
"model_id": "e3e74fcc9cc24c7a8121f16d3b5739c4", | |
"version_minor": 0, | |
"version_major": 2 | |
}, | |
"text/plain": [ | |
"HBox(children=(FloatProgress(value=0.0, description='Downloading', max=242065649.0, style=ProgressStyle(descri…" | |
] | |
}, | |
"metadata": { | |
"tags": [] | |
} | |
}, | |
{ | |
"output_type": "stream", | |
"text": [ | |
"\n" | |
], | |
"name": "stdout" | |
}, | |
{ | |
"output_type": "display_data", | |
"data": { | |
"application/vnd.jupyter.widget-view+json": { | |
"model_id": "1c16f39f7b604be6aa808860ceb34b10", | |
"version_minor": 0, | |
"version_major": 2 | |
}, | |
"text/plain": [ | |
"HBox(children=(FloatProgress(value=0.0, description='Downloading', max=791656.0, style=ProgressStyle(descripti…" | |
] | |
}, | |
"metadata": { | |
"tags": [] | |
} | |
}, | |
{ | |
"output_type": "stream", | |
"text": [ | |
"\n" | |
], | |
"name": "stdout" | |
} | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "CNDjw5u-ewgl", | |
"colab_type": "text" | |
}, | |
"source": [ | |
"## Get text to be summarized" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "C-XgZUQgEyia", | |
"colab_type": "code", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 221 | |
}, | |
"outputId": "17fb15e9-a681-4f2c-d3e1-8d4f52a1de96" | |
}, | |
"source": [ | |
"!mkdir /content/data\n", | |
"%cd /content/data\n", | |
"!wget -O article.txt https://pastebin.com/raw/JumkZCTB" | |
], | |
"execution_count": 4, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": [ | |
"/content/data\n", | |
"--2020-07-27 08:44:01-- https://pastebin.com/raw/JumkZCTB\n", | |
"Resolving pastebin.com (pastebin.com)... 104.23.98.190, 104.23.99.190, 2606:4700:10::6817:63be, ...\n", | |
"Connecting to pastebin.com (pastebin.com)|104.23.98.190|:443... connected.\n", | |
"HTTP request sent, awaiting response... 200 OK\n", | |
"Length: unspecified [text/plain]\n", | |
"Saving to: ‘article.txt’\n", | |
"\n", | |
"article.txt [ <=> ] 2.80K --.-KB/s in 0s \n", | |
"\n", | |
"2020-07-27 08:44:02 (34.0 MB/s) - ‘article.txt’ saved [2869]\n", | |
"\n" | |
], | |
"name": "stdout" | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "IZdJo-cmFf6J", | |
"colab_type": "code", | |
"colab": {} | |
}, | |
"source": [ | |
"with open(\"/content/data/article.txt\") as f:\n", | |
" text = f.read()" | |
], | |
"execution_count": 5, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "zWFqAwNGe1p5", | |
"colab_type": "text" | |
}, | |
"source": [ | |
"## Preprocessing" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "QOYTzU8Ke3qE", | |
"colab_type": "code", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 71 | |
}, | |
"outputId": "c3a82fbb-ef2b-45f3-dd31-e100f8f71ede" | |
}, | |
"source": [ | |
"preprocess_text = text.strip().replace(\"\\n\", \"\")\n", | |
"t5_prepared_Text = \"summarize: \" + preprocess_text\n", | |
"print(\"original text preprocessed:\\n\", preprocess_text)" | |
], | |
"execution_count": 6, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": [ | |
"original text preprocessed:\n", | |
" A double for Aubameyang as Arsenal stuns Manchester City in FA Cup semifinalPAUL GITTINGS JULY 18, 2020(CNN)Pierre-Emerick Aubameyang scored in each half as Arsenal stunned holder Manchester City 2-0 at Wembley on Saturday to reach a record 21st FA Cup final.The Gabonese striker struck in the 19th and 71st minutes as Arsenal coach Mikel Arteta gained bragging rights over Pepe Guardiola, his old mentor at City.Manchester City, aiming for a domestic cup double having won the English League Cup before the lockdown, could not make the most of the lion's share of possession as Arsenal defended stoutly and looked ever dangerous in counterattack.The opening goal came in this fashion as the Gunners launched a sweet passing move from inside their own half and Nicolas Pepe found Aubameyang, who finished neatly at the far post, all the more creditable having missed another chance moments before.The same pattern continued with City unable to capitalize on its pressure before Arsenal's Shkodran Mustafi forced a fine save from City keeper Ederson just before the break.Breakaway second goalCity, who had its European football ban lifted by a ruling earlier this week, had a second-half penalty appeal turned down by referee John Moss and the video assistant referee for a Mustafi challenge on Raheem Sterling, while Arsenal's rock-solid Emiliano Martinez saved well from City's Riyad Mahrez.The near misses came back to haunt the cupholder as Arsenal broke forward and Aubameyang raced onto Kieran Tierney's pass to shoot home his second past Ederson.David Luiz, who made a disastrous appearance for Arsenal in the first game of the restart against City, then epitomized his side's improvement under Arteta by brilliantly blocking a further Sterling effort, while Aymeric Laporte's thunderbolt effort went just wide.At the end it was a smiling Arteta, formerly an assistant manager to Guardiola at City, who was celebrating in a cavernous and empty Wembley, normally filled to capacity for such a big match.Luiz paid tribute to Arteta's influence since taking over at the North London club late last year, with a 2-1 victory over new Premier League champion Liverpool earlier this week further sign of progress.\"We have an amazing coach but we can't go from 0 to 100. We are improving. The spirit was great and I'm happy for the team because they deserve it,\" Luiz told BT Sport.\"We have a final to play this season and we will try and win a title for this club because this club deserves to win a trophy,\" the Brazilian added.The cup final is set for August 1 and Arsenal will face the winner of the second semifinal on Sunday between Chelsea and Manchester United.City will now focus on its Champions League dream with the second leg of its last 16 tie against new Spanish champion Real Madrid next month.\n" | |
], | |
"name": "stdout" | |
} | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "ICWuZSoke896", | |
"colab_type": "text" | |
}, | |
"source": [ | |
"## Tokenize" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "MYS4Sb3Je-hd", | |
"colab_type": "code", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 34 | |
}, | |
"outputId": "6d567bff-9ffb-4cd0-b518-6352fa55a419" | |
}, | |
"source": [ | |
"tokenized_text = tokenizer.encode(t5_prepared_Text, return_tensors=\"pt\").to(device)" | |
], | |
"execution_count": 7, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": [ | |
"Token indices sequence length is longer than the specified maximum sequence length for this model (700 > 512). Running this sequence through the model will result in indexing errors\n" | |
], | |
"name": "stderr" | |
} | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "dTYu-hGGfBcf", | |
"colab_type": "text" | |
}, | |
"source": [ | |
"## Summarize" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "IXFujSBGfC6z", | |
"colab_type": "code", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 105 | |
}, | |
"outputId": "844e7a5f-61b3-4a1e-d779-de50d96dd9c3" | |
}, | |
"source": [ | |
"summary_ids = model.generate(\n", | |
" tokenized_text,\n", | |
" num_beams=4,\n", | |
" no_repeat_ngram_size=2,\n", | |
" min_length=30,\n", | |
" max_length=100,\n", | |
" early_stopping=True\n", | |
")\n", | |
"\n", | |
"output = tokenizer.decode(summary_ids[0], skip_special_tokens=True)\n", | |
"\n", | |
"print(\"\\n\\nSummarized text:\\n\", output)" | |
], | |
"execution_count": 8, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": [ | |
"\n", | |
"\n", | |
"Summarized text:\n", | |
" arsenal beat holder Manchester City 2-0 at wembley to reach a record 21st FA Cup final. Pierre-Emerick Aubameyang scored in each half as arsenal coach Mikel Arteta gained bragging rights over pepe Guardiola, his old mentor at city.\n" | |
], | |
"name": "stdout" | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "8qPRaUq6A6v_", | |
"colab_type": "code", | |
"colab": {} | |
}, | |
"source": [ | |
"" | |
], | |
"execution_count": 8, | |
"outputs": [] | |
} | |
] | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment