Created
June 24, 2021 15:27
-
-
Save guillemcanal/053939cdcca17b9c4953582fc9b14184 to your computer and use it in GitHub Desktop.
Extract Disney/Pixar movies from Wikidata
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
PREFIX wd: <http://www.wikidata.org/entity/> | |
PREFIX wds: <http://www.wikidata.org/entity/statement/> | |
PREFIX wdv: <http://www.wikidata.org/value/> | |
PREFIX wdt: <http://www.wikidata.org/prop/direct/> | |
PREFIX wikibase: <http://wikiba.se/ontology#> | |
PREFIX p: <http://www.wikidata.org/prop/> | |
PREFIX ps: <http://www.wikidata.org/prop/statement/> | |
PREFIX pq: <http://www.wikidata.org/prop/qualifier/> | |
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> | |
PREFIX bd: <http://www.bigdata.com/rdf#> | |
# Return a list of movies produced by Disney and Pixar | |
SELECT DISTINCT | |
?item | |
(GROUP_CONCAT(DISTINCT ?movieType; separator=", ") AS ?moviesType) | |
(SAMPLE(?wikidataID) AS ?wikidataID) | |
(SAMPLE(?name) AS ?name) | |
(MIN(?publicationDate) AS ?firstPublicationDate) | |
(SAMPLE(?imdbID) AS ?imdbID) | |
(SAMPLE(?imdbUrl) AS ?imdbUrl) | |
(SAMPLE(?article) AS ?wikipediaUrl) | |
(SAMPLE(?duration) AS ?duration) | |
WHERE { | |
SERVICE wikibase:label { bd:serviceParam wikibase:language "en" . } | |
# Match all subclass of the "film" instance | |
?item wdt:P31/wdt:P279* wd:Q11424 . | |
# Exclude "animated short film" and "short film" | |
?item wdt:P31 ?instanceOf FILTER(?instanceOf NOT IN(wd:Q17517379, wd:Q24862)) . | |
# Get instance's label | |
?instanceOf rdfs:label ?movieType FILTER(LANG(?movieType) = "en") . | |
# Get the title of the movie localized in english (some entity labels can be empty) | |
?item rdfs:label ?name FILTER(LANG(?name) = "en") . | |
# Produced by "Disney" or "Pixar" (@FIXME we have multiple disney production companies, there is maybe a better way to target all of them) | |
?item wdt:P272 ?production_company FILTER(?production_company IN(wd:Q7414, wd:Q191224, wd:Q1047410, wd:Q1323594, wd:Q127552)) | |
# Publication date in the US (it may match multiple publication dates, ex: Dumbo(wd:Q40895) ) | |
?item p:P577 ?placeofpublication . | |
?placeofpublication pq:P291 wd:Q30 . | |
?placeofpublication ps:P577 ?publicationDate . | |
# Extract the Wikidata ID | |
BIND( REPLACE( STR(?item), "http://www.wikidata.org/entity/", "") AS ?wikidataID). | |
# IMDB ID | |
?item wdt:P345 ?imdbID . | |
BIND( IRI( CONCAT("https://www.imdb.com/title/", ?imdbID, "/")) AS ?imdbUrl) | |
# Get the associated Wikipedia article written in english | |
OPTIONAL { | |
?article schema:about ?item . | |
?article schema:inLanguage "en" . | |
?article schema:isPartOf <https://en.wikipedia.org/> . | |
} | |
# Get the duration of the feature film | |
OPTIONAL { ?item wdt:P2047 ?duration . } | |
} | |
GROUP BY ?item | |
ORDER BY ASC(?publicationDate) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment