Last active
March 7, 2022 15:15
-
-
Save callahantiff/b72339a8d6fb7de31e1912039947605a to your computer and use it in GitHub Desktop.
PatientSimilarity: Exploring the Impact of different Entities and Domains for Rare Disease Phenotyping.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
-- RARE DISEASE - RANDOM PATIENTS: Query is designed to retrieve on 10,000 random patients | |
-- The query seraches only among patients having >9 visits | |
-- Lasted edited on: 05/04/2018 | |
SELECT v.person_id, count(v.visit_occurrence_id) AS count, p_dat.cond, p_dat.cond_num | |
FROM CHCO_DeID_Apr2018.visit_occurrence v | |
RIGHT JOIN | |
(SELECT person_id, 'Rand' AS cond, 99 AS cond_num FROM CHCO_DeID_Apr2018.person | |
WHERE person_id NOT IN | |
(SELECT v.person_id | |
FROM CHCO_DeID_Apr2018.visit_occurrence v | |
RIGHT JOIN | |
(SELECT * FROM CHCO_DeID_Apr2018.PatSim_RareDisease_Cases2) AS p_dat | |
ON v.person_id = p_dat.person_id | |
GROUP BY v.person_id | |
HAVING count(v.visit_occurrence_id) > 9) | |
ORDER BY RAND()) AS p_dat | |
ON v.person_id = p_dat.person_id | |
WHERE v.person_id IS NOT null AND v.visit_occurrence_id IS NOT null | |
GROUP BY v.person_id, p_dat.cond, p_dat.cond_num | |
HAVING count > 9 | |
ORDER BY count | |
LIMIT 11000; |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
-- CONDITIONS: Query is designed to retrieve condition information on 10,000 random patients | |
-- The query returns all concepts and raw source values | |
-- Lasted edited on: 05/24/2018 | |
SELECT * FROM | |
(SELECT m.person_id, | |
m.condition_concept_id AS omop_cond_id, | |
c1.concept_name AS source_cond_label, | |
m.condition_source_value AS condition_source_value, | |
m.condition_start_date AS cond_date, | |
m.visit_occurrence_id AS visit_id, | |
v.visit_start_date AS visit_date | |
FROM CHCO_DeID_Apr2018.condition_occurrence m | |
JOIN CHCO_DeID_Apr2018.concept c1 | |
ON c1.concept_id = m.condition_concept_id | |
JOIN (SELECT person_id FROM CHCO_DeID_Apr2018.PatSim_RareDisease_Random2) AS p2 | |
ON m.person_id = p2.person_id | |
JOIN CHCO_DeID_Apr2018.visit_occurrence v | |
ON m.visit_occurrence_id = v.visit_occurrence_id | |
WHERE | |
c1.domain_id = 'Condition' | |
AND c1.vocabulary_id = 'SNOMED' | |
AND m.condition_concept_id != 0 | |
order by m.person_id, m.condition_start_date) | |
UNION ALL | |
(SELECT m.person_id, | |
m.condition_concept_id AS omop_cond_id, | |
'NONE' as source_cond_label, | |
m.condition_source_value AS condition_source_value, | |
m.condition_start_date AS cond_date, | |
m.visit_occurrence_id AS visit_id, | |
v.visit_start_date AS visit_date | |
FROM CHCO_DeID_Apr2018.condition_occurrence m | |
JOIN CHCO_DeID_Apr2018.visit_occurrence v | |
ON m.visit_occurrence_id = v.visit_occurrence_id | |
JOIN (SELECT person_id FROM CHCO_DeID_Apr2018.PatSim_RareDisease_Random2) AS p2 | |
ON m.person_id = p2.person_id | |
where m.condition_concept_id != 0 | |
order by m.person_id, m.condition_start_date) | |
ORDER BY person_id, visit_date; |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
-- DEMOGRAPHICS: Query is designed to retrieve demographic information on 10,000 random patients | |
-- Lasted edited on: 05/04/2018 | |
SELECT | |
p.person_id, | |
p.birth_datetime, | |
c1.concept_name AS gender, | |
c2.concept_name AS ethnicity, | |
c3.concept_name AS race, | |
p.care_site_id, | |
p.location_id | |
FROM CHCO_DeID_Apr2018.person p | |
JOIN CHCO_DeID_Apr2018.concept c1 | |
ON c1.concept_id = p.gender_concept_id | |
JOIN CHCO_DeID_Apr2018.concept c2 | |
ON c2.concept_id = p.ethnicity_concept_id | |
JOIN CHCO_DeID_Apr2018.concept c3 | |
ON c3.concept_id = p.race_concept_id | |
WHERE person_id IN | |
(SELECT person_id FROM CHCO_DeID_Apr2018.PatSim_RareDisease_Random2) | |
GROUP BY p.person_id, p.birth_datetime, gender, ethnicity, race, p.care_site_id, p.location_id; |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
-- MEASUREMENTS: Query is designed to retrieve lab information on 10,000 random patients | |
-- The query returns all concepts and raw source values | |
-- Lasted edited on: 05/24/2018 | |
SELECT * FROM | |
(SELECT m.person_id AS pat_id, | |
m.measurement_concept_id AS omop_cond_id, --OMOP concept id | |
c1.concept_name as source_cond_label, | |
m.measurement_source_value AS measurement_source_value, | |
m.visit_occurrence_id AS visit_id, | |
v.visit_start_date as visit_date | |
FROM CHCO_DeID_Apr2018.measurement m | |
JOIN CHCO_DeID_Apr2018.concept c1 | |
ON c1.concept_id = m.measurement_concept_id | |
join CHCO_DeID_Apr2018.visit_occurrence v | |
on v.visit_occurrence_id = m.visit_occurrence_id | |
WHERE c1.domain_id = 'Measurement' | |
AND c1.vocabulary_id = 'LOINC' | |
AND m.person_id IN | |
(SELECT person_id FROM CHCO_DeID_Apr2018.PatSim_RareDisease_Random2) | |
AND m.measurement_concept_id != 0 | |
order by visit_date) | |
UNION ALL | |
(SELECT m.person_id AS pat_id, | |
m.measurement_concept_id AS omop_cond_id, --OMOP concept id | |
'NONE' as source_cond_label, | |
m.measurement_source_value AS measurement_source_value, | |
m.visit_occurrence_id AS visit_id, | |
v.visit_start_date as visit_date | |
FROM CHCO_DeID_Apr2018.measurement m | |
join CHCO_DeID_Apr2018.visit_occurrence v | |
on v.visit_occurrence_id = m.visit_occurrence_id | |
WHERE m.person_id IN | |
(SELECT person_id FROM CHCO_DeID_Apr2018.PatSim_RareDisease_Random2) | |
AND m.measurement_concept_id != 0 | |
order by visit_date); |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
-- MEDICATIONS: Query is designed to retrieve medication information on 10,000 random patients | |
-- The query returns all concepts and raw source values | |
-- Lasted edited on: 05/24/2018 | |
SELECT * FROM | |
(SELECT m.person_id AS pat_id, | |
m.drug_concept_id AS omop_cond_id, --OMOP concept id | |
c1.concept_name AS source_cond_label, | |
m.drug_source_value AS drug_source_value, | |
m.visit_occurrence_id AS visit_id, | |
v.visit_start_date as visit_date | |
FROM CHCO_DeID_Apr2018.drug_exposure m | |
JOIN CHCO_DeID_Apr2018.concept c1 | |
ON c1.concept_id = m.drug_concept_id | |
JOIN CHCO_DeID_Apr2018.visit_occurrence v | |
ON m.visit_occurrence_id = v.visit_occurrence_id | |
WHERE c1.domain_id = 'Drug' | |
AND c1.vocabulary_id = 'RxNorm' | |
AND m.person_id IN | |
(SELECT person_id FROM CHCO_DeID_Apr2018.PatSim_RareDisease_Random2) | |
AND m.drug_concept_id != 0 | |
order by visit_date) | |
UNION ALL | |
(SELECT m.person_id AS pat_id, | |
m.drug_concept_id AS omop_cond_id, --OMOP concept id | |
'NONE' AS source_cond_label, | |
m.drug_source_value AS drug_source_value, | |
m.visit_occurrence_id AS visit_id, | |
v.visit_start_date as visit_date | |
FROM CHCO_DeID_Apr2018.drug_exposure m | |
JOIN CHCO_DeID_Apr2018.visit_occurrence v | |
ON m.visit_occurrence_id = v.visit_occurrence_id | |
WHERE m.person_id IN | |
(SELECT person_id FROM CHCO_DeID_Apr2018.PatSim_RareDisease_Random2) | |
AND m.drug_concept_id != 0 | |
order by visit_date); |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
-- RARE DISEASE CASES: Query is designed to retrieve test-cases having specific concept codes indicative of CF, SC, CH, and PKU | |
-- The query seraches only among patients having >9 visits | |
-- Lasted edited on: 05/04/2018 | |
SELECT | |
v.person_id, | |
count(v.visit_occurrence_id) AS count, | |
p_dat.cond, | |
p_dat.cond_num, | |
p_dat.cond_start, | |
p_dat.age_at_dx | |
FROM CHCO_DeID_Apr2018.visit_occurrence v | |
JOIN CHCO_DeID_Apr2018.person d | |
ON d.person_id = v.person_id | |
RIGHT JOIN | |
(SELECT * FROM | |
(SELECT co.person_id,'PKU' AS cond, 0 AS cond_num, | |
min(co.condition_start_date) AS cond_start, | |
DATE_DIFF(min(co.condition_start_date), CAST(DATE(p.birth_datetime) AS DATE), DAY)/365.25 AS age_at_dx | |
FROM CHCO_DeID_Apr2018.condition_occurrence co | |
JOIN CHCO_DeID_Apr2018.concept c1 | |
ON c1.concept_id = co.condition_concept_id | |
JOIN CHCO_DeID_Apr2018.person p | |
ON p.person_id = co.person_id | |
WHERE | |
c1.vocabulary_id = 'SNOMED' | |
AND c1.domain_id = 'Condition' | |
AND co.condition_concept_id IN (432872) | |
GROUP BY co.person_id, cond, cond_num, p.birth_datetime | |
ORDER BY co.person_id) | |
UNION ALL | |
(SELECT co.person_id,'CH' AS cond, 1 AS cond_num, | |
min(co.condition_start_date) AS cond_start, | |
DATE_DIFF(min(co.condition_start_date), CAST(DATE(p.birth_datetime) AS DATE), DAY)/365.25 AS age_at_dx | |
FROM CHCO_DeID_Apr2018.condition_occurrence co | |
JOIN CHCO_DeID_Apr2018.concept c1 | |
ON c1.concept_id = co.condition_concept_id | |
JOIN CHCO_DeID_Apr2018.person p | |
ON p.person_id = co.person_id | |
WHERE | |
c1.vocabulary_id = 'SNOMED' | |
AND c1.domain_id = 'Condition' | |
AND co.condition_concept_id IN (4314093, 4130017, 4081998) | |
GROUP BY co.person_id, cond, cond_num, p.birth_datetime | |
ORDER BY co.person_id) | |
UNION ALL | |
(SELECT co.person_id,'SC' AS cond, 3 AS cond_num, | |
min(co.condition_start_date) AS cond_start, | |
DATE_DIFF(min(co.condition_start_date), CAST(DATE(p.birth_datetime) AS DATE), DAY)/365.25 AS age_at_dx | |
FROM CHCO_DeID_Apr2018.condition_occurrence co | |
JOIN CHCO_DeID_Apr2018.concept c1 | |
ON c1.concept_id = co.condition_concept_id | |
JOIN CHCO_DeID_Apr2018.person p | |
ON p.person_id = co.person_id | |
WHERE | |
c1.vocabulary_id = 'SNOMED' | |
AND c1.domain_id = 'Condition' | |
AND co.condition_concept_id IN (30683, 22281, 443726, 26942, 196943, 443721, 254062, 25518, 321263, 443738, 40485018) | |
GROUP BY co.person_id, cond, cond_num, p.birth_datetime | |
ORDER BY co.person_id) | |
UNION ALL | |
(SELECT co.person_id,'CF' AS cond, 4 AS cond_num, | |
min(co.condition_start_date) AS cond_start, | |
DATE_DIFF(min(co.condition_start_date), CAST(DATE(p.birth_datetime) AS DATE), DAY)/365.25 AS age_at_dx | |
FROM CHCO_DeID_Apr2018.condition_occurrence co | |
JOIN CHCO_DeID_Apr2018.concept c1 | |
ON c1.concept_id = co.condition_concept_id | |
JOIN CHCO_DeID_Apr2018.person p | |
ON p.person_id = co.person_id | |
WHERE | |
c1.vocabulary_id = 'SNOMED' | |
AND c1.domain_id = 'Condition' | |
AND co.condition_concept_id IN (254320, 194325, 441267, 434615, 193174, 40479565) | |
GROUP BY co.person_id, cond, cond_num, p.birth_datetime | |
ORDER BY co.person_id)) AS p_dat | |
ON v.person_id = p_dat.person_id | |
GROUP BY v.person_id, p_dat.cond, p_dat.cond_num, p_dat.cond_start, p_dat.age_at_dx | |
HAVING count > 9 | |
ORDER BY count; |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
-- CONDITIONS: Query is designed to retrieve condition information on Rare Disease Cases | |
-- The query returns all concepts and raw source values | |
-- Lasted edited on: 05/24/2018 | |
SELECT * FROM | |
(SELECT m.person_id, | |
m.condition_concept_id AS omop_cond_id, | |
c1.concept_name AS source_cond_label, | |
m.condition_source_value AS condition_source_value, | |
p2.cond_start, | |
m.condition_start_date AS cond_date, | |
m.visit_occurrence_id AS visit_id, | |
v.visit_start_date AS visit_date | |
FROM CHCO_DeID_Apr2018.condition_occurrence m | |
JOIN CHCO_DeID_Apr2018.concept c1 | |
ON c1.concept_id = m.condition_concept_id | |
JOIN (SELECT person_id, cond_start FROM CHCO_DeID_Apr2018.PatSim_RareDisease_Cases2) AS p2 | |
ON m.person_id = p2.person_id | |
JOIN CHCO_DeID_Apr2018.visit_occurrence v | |
ON m.visit_occurrence_id = v.visit_occurrence_id | |
WHERE | |
c1.domain_id = 'Condition' | |
AND c1.vocabulary_id = 'SNOMED' | |
order by m.person_id, m.condition_start_date) | |
UNION ALL | |
(SELECT m.person_id, | |
m.condition_concept_id AS omop_cond_id, | |
'NONE' as source_cond_label, | |
m.condition_source_value AS condition_source_value, | |
p2.cond_start, | |
m.condition_start_date AS cond_date, | |
m.visit_occurrence_id AS visit_id, | |
v.visit_start_date AS visit_date | |
FROM CHCO_DeID_Apr2018.condition_occurrence m | |
JOIN CHCO_DeID_Apr2018.visit_occurrence v | |
ON m.visit_occurrence_id = v.visit_occurrence_id | |
JOIN (SELECT person_id, cond_start FROM CHCO_DeID_Apr2018.PatSim_RareDisease_Cases2) AS p2 | |
ON m.person_id = p2.person_id | |
order by m.person_id, m.condition_start_date) | |
ORDER BY person_id, visit_date; |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
-- DEMOGRAPHICS: Query is designed to retirve demographic information on Rare Disease Patients | |
-- Lasted edited on: 05/04/2018 | |
SELECT | |
p.person_id, | |
p.birth_datetime, | |
c1.concept_name AS gender, | |
c2.concept_name AS ethnicity, | |
c3.concept_name AS race, | |
p.care_site_id, | |
p.location_id | |
FROM CHCO_DeID_Apr2018.person p | |
JOIN CHCO_DeID_Apr2018.concept c1 | |
ON c1.concept_id = p.gender_concept_id | |
JOIN CHCO_DeID_Apr2018.concept c2 | |
ON c2.concept_id = p.ethnicity_concept_id | |
JOIN CHCO_DeID_Apr2018.concept c3 | |
ON c3.concept_id = p.race_concept_id | |
WHERE | |
person_id IN (SELECT person_id FROM CHCO_DeID_Apr2018.PatSim_RareDisease_Cases2) | |
GROUP BY p.person_id, p.birth_datetime, gender, ethnicity, race, p.care_site_id, p.location_id; |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
-- MEDICATIONS: Query is designed to retrieve lab information on Rare Disease Cases | |
-- The query returns all concepts and raw source values | |
-- Lasted edited on: 05/24/2018 | |
SELECT * FROM | |
(SELECT m.person_id AS pat_id, | |
m.measurement_concept_id AS omop_cond_id, --OMOP concept id | |
c1.concept_name as source_cond_label, | |
m.measurement_source_value AS measurement_source_value, | |
m.visit_occurrence_id AS visit_id, | |
v.visit_start_date as visit_date | |
FROM CHCO_DeID_Apr2018.measurement m | |
JOIN CHCO_DeID_Apr2018.concept c1 | |
ON c1.concept_id = m.measurement_concept_id | |
join CHCO_DeID_Apr2018.visit_occurrence v | |
on v.visit_occurrence_id = m.visit_occurrence_id | |
WHERE c1.domain_id = 'Measurement' | |
AND c1.vocabulary_id = 'LOINC' | |
AND m.person_id IN | |
(SELECT person_id FROM CHCO_DeID_Apr2018.PatSim_RareDisease_Cases2) | |
order by visit_date) | |
UNION ALL | |
(SELECT m.person_id AS pat_id, | |
m.measurement_concept_id AS omop_cond_id, --OMOP concept id | |
'NONE' as source_cond_label, | |
m.measurement_source_value AS measurement_source_value, | |
m.visit_occurrence_id AS visit_id, | |
v.visit_start_date as visit_date | |
FROM CHCO_DeID_Apr2018.measurement m | |
join CHCO_DeID_Apr2018.visit_occurrence v | |
on v.visit_occurrence_id = m.visit_occurrence_id | |
WHERE m.person_id IN | |
(SELECT person_id FROM CHCO_DeID_Apr2018.PatSim_RareDisease_Cases2) | |
order by visit_date); |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
-- MEDICATIONS: Query is designed to retrieve medication information on Rare Disease Cases | |
-- The query returns all concepts and raw source values | |
-- Lasted edited on: 05/24/2018 | |
SELECT * FROM | |
(SELECT m.person_id AS pat_id, | |
m.drug_concept_id AS omop_cond_id, --OMOP concept id | |
c1.concept_name AS source_cond_label, | |
m.drug_source_value AS drug_source_value, | |
m.visit_occurrence_id AS visit_id, | |
v.visit_start_date as visit_date | |
FROM CHCO_DeID_Apr2018.drug_exposure m | |
JOIN CHCO_DeID_Apr2018.concept c1 | |
ON c1.concept_id = m.drug_concept_id | |
JOIN CHCO_DeID_Apr2018.visit_occurrence v | |
ON m.visit_occurrence_id = v.visit_occurrence_id | |
WHERE c1.domain_id = 'Drug' | |
AND c1.vocabulary_id = 'RxNorm' | |
AND m.person_id IN | |
(SELECT person_id FROM CHCO_DeID_Apr2018.PatSim_RareDisease_Cases2) | |
order by visit_date) | |
UNION ALL | |
(SELECT m.person_id AS pat_id, | |
m.drug_concept_id AS omop_cond_id, --OMOP concept id | |
'NONE' AS source_cond_label, | |
m.drug_source_value AS drug_source_value, | |
m.visit_occurrence_id AS visit_id, | |
v.visit_start_date as visit_date | |
FROM CHCO_DeID_Apr2018.drug_exposure m | |
JOIN CHCO_DeID_Apr2018.visit_occurrence v | |
ON m.visit_occurrence_id = v.visit_occurrence_id | |
WHERE | |
m.person_id IN | |
(SELECT person_id FROM CHCO_DeID_Apr2018.PatSim_RareDisease_Cases2) | |
order by visit_date); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment