Last active
February 21, 2025 15:24
-
-
Save slint/0a0489cf49d69eb2f0418b90b5089f66 to your computer and use it in GitHub Desktop.
Instructions to import the CSV version of the ORCiD public data produced by https://github.com/slint/orcid-data-toolkit
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
-- Create a new table with the new schema (excluding constraints and indexes) | |
CREATE TABLE name_metadata_new | |
(LIKE name_metadata EXCLUDING CONSTRAINTS EXCLUDING INDEXES); | |
-- Time: 0.337s | |
-- Copy the CSV data into the new table, running the following in a shell: | |
-- export DB_URI="CHANGE_ME" | |
-- gzip -dc orcid-names-2024.csv.gz | pv --line-mode --average-rate --timer | psql $DB_URI -c 'COPY name_metadata_new (created, updated, id, json, version_id, pid) FROM STDIN (FORMAT CSV);' | |
-- Time: 210.000s (3 minutes 30 seconds) | |
-- Rename the old indexes/constraints | |
ALTER INDEX pk_name_metadata RENAME TO pk_name_metadata_old; | |
ALTER INDEX uq_name_metadata_pid RENAME TO uq_name_metadata_pid_old; | |
-- Time: 0.042s | |
ALTER TABLE name_metadata_new ADD CONSTRAINT pk_name_metadata PRIMARY KEY (id); | |
-- Time: 53.613s (53 seconds) | |
ALTER TABLE name_metadata_new ADD CONSTRAINT uq_name_metadata_pid UNIQUE (pid); | |
-- Time: 112.739s (1 minute 52 seconds) | |
-- Rename the tables and indices/constraints in a transaction | |
BEGIN; | |
ALTER TABLE name_metadata RENAME TO name_metadata_old; | |
ALTER TABLE name_metadata_new RENAME TO name_metadata; | |
COMMIT; | |
-- Time: 0.013s | |
-- Drop the old table (optional) | |
DROP TABLE name_metadata_old; | |
-- Reindex names to OpenSearch (~6h) | |
-- invenio rdm rebuild-all-indices -o names | |
-- Time: ~1h for the command + 5h for bulk indexing |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment