Created
April 29, 2015 10:28
-
-
Save peterjc/5ebbf446d799f3aaa639 to your computer and use it in GitHub Desktop.
Rough script for populating Galaxy .shed.yml files with metadata already in Tool Shed
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Walks specified folders looking for .shed.yml files, | |
# with at least owner and name given. | |
# | |
# Matches the owner/name with the remote Tool Shed, and | |
# takes any missing meta-data from the remote Tool Shed. | |
# | |
# Pre-existing local data otherwise should be preserved. | |
# | |
# Does the yaml dump with some hackery because I couldn't | |
# work out how to make the library use the layout I wanted. | |
import yaml | |
import os | |
import sys | |
from bioblend import toolshed | |
# Seems at time of writing TestToolShed gives categories, | |
# but main ToolShed does not. Bug? | |
shed_url = "https://testtoolshed.g2.bx.psu.edu/" | |
tsi = toolshed.ToolShedInstance(shed_url) | |
repos = tsi.repositories.get_repositories() | |
cats = tsi.repositories.get_categories() | |
assert repos | |
assert cats | |
keys = ("name", "owner", | |
"homepage_url", "remote_repository_url", | |
"description", "long_description", | |
"categories", "type") | |
def shed_repo_config(shed_yaml_path): | |
if os.path.exists(shed_yaml_path): | |
with open(shed_yaml_path, "r") as f: | |
return yaml.load(f) | |
else: | |
return {} | |
def save_changes(shed_yaml_path, new_data): | |
# Quick and dirty - append mode, does not give ideal order | |
with open(shed_yaml_path, "a") as f: | |
for k in keys: | |
if k in new_data: | |
v = new_data[k] | |
if isinstance(v, list): | |
f.write("%s:\n- %s\n" % (k, "\n- ".join(v))) | |
else: | |
v = yaml.dump(v).rstrip() | |
if v.endswith("\n..."): | |
# Why? | |
v = v[:-4].rstrip() | |
# Strip redundant quote chars... | |
if v.count("'") == 2 and v[0] == "'" and v[-1] == "'": | |
v = v[1:-1] | |
assert "..." not in v, "%r --> %r" % (new_data[k], v) | |
f.write("%s: %s\n" % (k, v)) | |
def find_remote(repos, name, owner): | |
for r in repos: | |
if r["name"] == name and r["owner"] == owner: | |
return r | |
raise KeyError("Tool Shed missing %s/%s" % (owner, name)) | |
def sync_changes(local, remote): | |
updated = dict() | |
assert local["name"] == remote["name"] | |
assert local["owner"] == remote["owner"] | |
for k in keys: | |
if k not in local and k in remote: | |
print("Recording %s/%s field %s" % (owner, name, k)) | |
updated[k] = str(remote[k]) | |
remote_cat_ids = remote.get("category_ids", []) | |
if not remote_cat_ids: | |
print("Tool Shed did not tell us the categories for %s/%s" % (owner, name)) | |
else: | |
# assert remote_cat_ids, remote | |
# print("Remote cat IDs: %r" % remote_cat_ids) | |
remote_categories = sorted(str(x["name"]) for x in cats if x["id"] in remote_cat_ids) | |
local_categories = local.get("categories", []) | |
# print("Remote cats: %r" % remote_categories) | |
# print("Local cats: %r" % local_categories) | |
if remote_categories and not local_categories: | |
print("Recording %s/%s field categories" % (owner, name)) | |
updated["categories"] = remote_categories | |
elif set(remote_categories) != set(local_categories): | |
print("REPLACING %s/%s field categories" % (owner, name)) | |
updated["categories"] = remote_categories | |
return updated | |
for root in sys.argv[1:]: | |
print("Walking %s" % root) | |
for (dirpath, dirnames, filenames) in os.walk(root): | |
if not ".shed.yml" in filenames: | |
continue | |
shed_yml = os.path.join(root, dirpath, ".shed.yml") | |
assert os.path.isfile(shed_yml), shed_yml | |
print(shed_yml) | |
local = shed_repo_config(shed_yml) | |
assert local is not None, shed_yml | |
try: | |
name = local["name"] | |
owner = local["owner"] | |
except KeyError: | |
print("Missing owner and/or name in %s" % shed_yml) | |
continue | |
try: | |
remote = find_remote(repos, name, owner) | |
except KeyError, e: | |
# Not in remote tool shed? | |
print(str(e)) | |
continue | |
updated = sync_changes(local, remote) | |
if updated: | |
save_changes(shed_yml, updated) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment