Created
May 26, 2024 17:56
-
-
Save kleinlennart/17721697b03b9e4c3e00417e189c20a8 to your computer and use it in GitHub Desktop.
zotero_remove_eprint_extra.ipynb
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"nbformat": 4, | |
"nbformat_minor": 0, | |
"metadata": { | |
"colab": { | |
"provenance": [], | |
"authorship_tag": "ABX9TyMN2Bb29302uSN0wMr39SHs", | |
"include_colab_link": true | |
}, | |
"kernelspec": { | |
"name": "python3", | |
"display_name": "Python 3" | |
}, | |
"language_info": { | |
"name": "python" | |
} | |
}, | |
"cells": [ | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "view-in-github", | |
"colab_type": "text" | |
}, | |
"source": [ | |
"<a href=\"https://colab.research.google.com/gist/kleinlennart/17721697b03b9e4c3e00417e189c20a8/zotero_remove_eprint_extra.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"# https://github.com/urschrei/pyzotero\n", | |
"!pip install pyzotero" | |
], | |
"metadata": { | |
"id": "9744xJ9I7ft1" | |
}, | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"id": "99s3W0fI3c-A" | |
}, | |
"outputs": [], | |
"source": [ | |
"import json\n", | |
"import os\n", | |
"import re\n", | |
"\n", | |
"from pyzotero import zotero" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"source": [ | |
"## Zotero API Setup\n", | |
"\n", | |
"**library_id**\n", | |
"\n", | |
"- Got to https://www.zotero.org/settings/keys (i.e., `Home > Settings > Feeds/API`)\n", | |
"- \"Your userID for use in API calls is XXXXXXX\"\n", | |
"\n", | |
"**api_key**\n", | |
"\n", | |
"- Create a new API key here: https://www.zotero.org/settings/keys/new\n", | |
"- Make sure to also tick \"Allow write access\"" | |
], | |
"metadata": { | |
"id": "cOu-5nKS-Bu8" | |
} | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"library_id = \"INSERT-YOUR-LIBRARY-ID\"\n", | |
"api_key = \"INSERT-YOUR-API-KEY\"\n", | |
"\n", | |
"library_type = \"user\" # or \"group\"\n", | |
"\n", | |
"zot = zotero.Zotero(library_id, library_type, api_key)" | |
], | |
"metadata": { | |
"id": "FZSiMHL-6-aP" | |
}, | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "markdown", | |
"source": [ | |
"## Getting Data" | |
], | |
"metadata": { | |
"id": "XlhRncUP-EnU" | |
} | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"## Test API\n", | |
"# items = zot.top(limit=5)\n", | |
"# items" | |
], | |
"metadata": { | |
"id": "HU66UOmx9Dnn" | |
}, | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"## Get all items (and save to json)\n", | |
"all_items = zot.everything(zot.top())\n", | |
"\n", | |
"# with open(\"data/all_items.json\", \"w\") as file:\n", | |
"# json.dump(all_items, file)" | |
], | |
"metadata": { | |
"id": "oCcyTPIj7BoQ" | |
}, | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"## Reload data if need be\n", | |
"# with open(\"../data/all_items.json\", \"r\") as file:\n", | |
"# all_items = json.load(file)" | |
], | |
"metadata": { | |
"id": "Tz9MAiBz9LO5" | |
}, | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"len(all_items)" | |
], | |
"metadata": { | |
"id": "QlTQGK8t8LfH" | |
}, | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "markdown", | |
"source": [ | |
"## Find and remove '_eprint'" | |
], | |
"metadata": { | |
"id": "Sfr-DVu29vbE" | |
} | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"## Find all items with \"_eprint\" in extra field\n", | |
"eprint_elements = [\n", | |
" item for item in all_items if \"_eprint:\" in item.get(\"data\", {}).get(\"extra\", \"\")\n", | |
"]\n", | |
"len(eprint_elements)" | |
], | |
"metadata": { | |
"id": "qiGaHOtB7F1S" | |
}, | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"## Remove _eprint data\n", | |
"pattern = r\"_eprint:[^\\n]*\\n\"\n", | |
"\n", | |
"for item in eprint_elements:\n", | |
" item['data']['extra'] = re.sub(pattern, \"\", item['data']['extra'])" | |
], | |
"metadata": { | |
"id": "f2gadiz89spv" | |
}, | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "markdown", | |
"source": [ | |
"## Update library with API" | |
], | |
"metadata": { | |
"id": "sw8q8iIR93tQ" | |
} | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"## First only try updating one itme\n", | |
"# CAUTION: uncomment to avoid accidental API write runs\n", | |
"# single_item = eprint_elements[0]\n", | |
"# single_item[\"data\"][\"extra\"]\n", | |
"# single_item[\"key\"] # search in Zotero\n", | |
"\n", | |
"# zot.update_item(single_item)" | |
], | |
"metadata": { | |
"id": "u8ZXm6KE7O8H" | |
}, | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"# zot.update_items(eprint_elements)" | |
], | |
"metadata": { | |
"id": "M1ssbB89-Qq7" | |
}, | |
"execution_count": null, | |
"outputs": [] | |
} | |
] | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment