SPARQL example query
1: Select all taxa from the UniProt taxonomy
SELECT ?taxon FROM <http://sparql.uniprot.org/taxonomy> WHERE { ?taxon a up:Taxon . }Use1: Select all taxa from the UniProt taxonomy
PREFIX up: <http://purl.uniprot.org/core/> SELECT ?taxon FROM <http://sparql.uniprot.org/taxonomy> WHERE { ?taxon a up:Taxon . }Use2: Select all bacterial taxa and their scientific name from the UniProt taxonomy
SELECT ?taxon ?name WHERE { ?taxon a up:Taxon . ?taxon up:scientificName ?name . # Taxon subclasses are materialized, do not use rdfs:subClassOf+ ?taxon rdfs:subClassOf taxon:2 . }Use2: Select all bacterial taxa and their scientific name from the UniProt taxonomy
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> PREFIX taxon: <http://purl.uniprot.org/taxonomy/> PREFIX up: <http://purl.uniprot.org/core/> SELECT ?taxon ?name WHERE { ?taxon a up:Taxon . ?taxon up:scientificName ?name . # Taxon subclasses are materialized, do not use rdfs:subClassOf+ ?taxon rdfs:subClassOf taxon:2 . }Use3: Select all UniProt entries, and their organism and amino acid sequences (including isoforms), for E. coli K12 and all its strains
SELECT ?protein ?organism ?isoform ?sequence WHERE { ?protein a up:Protein . ?protein up:organism ?organism . # Taxon subclasses are materialized, do not use rdfs:subClassOf+ ?organism rdfs:subClassOf taxon:83333 . ?protein up:sequence ?isoform . ?isoform rdf:value ?sequence . }Use3: Select all UniProt entries, and their organism and amino acid sequences (including isoforms), for E. coli K12 and all its strains
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> PREFIX taxon: <http://purl.uniprot.org/taxonomy/> PREFIX up: <http://purl.uniprot.org/core/> SELECT ?protein ?organism ?isoform ?sequence WHERE { ?protein a up:Protein . ?protein up:organism ?organism . # Taxon subclasses are materialized, do not use rdfs:subClassOf+ ?organism rdfs:subClassOf taxon:83333 . ?protein up:sequence ?isoform . ?isoform rdf:value ?sequence . }Use4: Select the UniProt entry with the mnemonic 'A4_HUMAN'
SELECT ?protein WHERE { ?protein a up:Protein . ?protein up:mnemonic 'A4_HUMAN' }Use4: Select the UniProt entry with the mnemonic 'A4_HUMAN'
PREFIX up: <http://purl.uniprot.org/core/> SELECT ?protein WHERE { ?protein a up:Protein . ?protein up:mnemonic 'A4_HUMAN' }Use5: Select a mapping of UniProt to PDB entries using the UniProt cross-references to the PDB database
SELECT ?protein ?db WHERE { ?protein a up:Protein . ?protein rdfs:seeAlso ?db . ?db up:database <http://purl.uniprot.org/database/PDB> }Use5: Select a mapping of UniProt to PDB entries using the UniProt cross-references to the PDB database
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> PREFIX up: <http://purl.uniprot.org/core/> SELECT ?protein ?db WHERE { ?protein a up:Protein . ?protein rdfs:seeAlso ?db . ?db up:database <http://purl.uniprot.org/database/PDB> }Use6: Select all cross-references to external databases of the category '3D structure databases' of UniProt entries that are classified with the keyword 'Acetoin biosynthesis (KW-0005)'
SELECT DISTINCT ?link WHERE { ?protein a up:Protein . ?protein up:classifiedWith keywords:5 . ?protein rdfs:seeAlso ?link . ?link up:database ?db . ?db up:category '3D structure databases' }Use6: Select all cross-references to external databases of the category '3D structure databases' of UniProt entries that are classified with the keyword 'Acetoin biosynthesis (KW-0005)'
PREFIX keywords: <http://purl.uniprot.org/keywords/> PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> PREFIX up: <http://purl.uniprot.org/core/> SELECT DISTINCT ?link WHERE { ?protein a up:Protein . ?protein up:classifiedWith keywords:5 . ?protein rdfs:seeAlso ?link . ?link up:database ?db . ?db up:category '3D structure databases' }Use7: Select reviewed UniProt entries (Swiss-Prot), and their recommended protein name, that have a preferred gene name that contains the text 'DNA'
SELECT ?protein ?name WHERE { ?protein a up:Protein . ?protein up:reviewed true . ?protein up:recommendedName ?recommended . ?recommended up:fullName ?name . ?protein up:encodedBy ?gene . ?gene skos:prefLabel ?text . FILTER CONTAINS(?text, 'DNA') }Use7: Select reviewed UniProt entries (Swiss-Prot), and their recommended protein name, that have a preferred gene name that contains the text 'DNA'
PREFIX skos: <http://www.w3.org/2004/02/skos/core#> PREFIX up: <http://purl.uniprot.org/core/> SELECT ?protein ?name WHERE { ?protein a up:Protein . ?protein up:reviewed true . ?protein up:recommendedName ?recommended . ?recommended up:fullName ?name . ?protein up:encodedBy ?gene . ?gene skos:prefLabel ?text . FILTER CONTAINS(?text, 'DNA') }Use8: Select the preferred gene name and disease annotation of all human UniProt entries that are known to be involved in a disease
SELECT ?name ?text WHERE { ?protein a up:Protein . ?protein up:organism taxon:9606 . ?protein up:encodedBy ?gene . ?gene skos:prefLabel ?name . ?protein up:annotation ?annotation . ?annotation a up:Disease_Annotation . ?annotation rdfs:comment ?text }Use8: Select the preferred gene name and disease annotation of all human UniProt entries that are known to be involved in a disease
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> PREFIX skos: <http://www.w3.org/2004/02/skos/core#> PREFIX taxon: <http://purl.uniprot.org/taxonomy/> PREFIX up: <http://purl.uniprot.org/core/> SELECT ?name ?text WHERE { ?protein a up:Protein . ?protein up:organism taxon:9606 . ?protein up:encodedBy ?gene . ?gene skos:prefLabel ?name . ?protein up:annotation ?annotation . ?annotation a up:Disease_Annotation . ?annotation rdfs:comment ?text }Use9: Select all human UniProt entries with a sequence variant that leads to a 'loss of function'
SELECT ?protein ?text WHERE { ?protein a up:Protein . ?protein up:organism taxon:9606 . ?protein up:annotation ?annotation . ?annotation a up:Natural_Variant_Annotation . ?annotation rdfs:comment ?text . FILTER (CONTAINS(?text, 'loss of function')) }Use9: Select all human UniProt entries with a sequence variant that leads to a 'loss of function'
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> PREFIX taxon: <http://purl.uniprot.org/taxonomy/> PREFIX up: <http://purl.uniprot.org/core/> SELECT ?protein ?text WHERE { ?protein a up:Protein . ?protein up:organism taxon:9606 . ?protein up:annotation ?annotation . ?annotation a up:Natural_Variant_Annotation . ?annotation rdfs:comment ?text . FILTER (CONTAINS(?text, 'loss of function')) }Use10: Select all human UniProt entries with a sequence variant that leads to a tyrosine to phenylalanine substitution
SELECT ?protein ?annotation ?begin ?text WHERE { ?protein a up:Protein ; up:organism taxon:9606 ; up:annotation ?annotation . ?annotation a up:Natural_Variant_Annotation ; rdfs:comment ?text ; up:substitution ?substitution ; up:range/faldo:begin [ faldo:position ?begin ; faldo:reference ?sequence ] . ?sequence rdf:value ?value . BIND (substr(?value, ?begin, 1) as ?original) . FILTER(?original = 'Y' && ?substitution = 'F') . }Use10: Select all human UniProt entries with a sequence variant that leads to a tyrosine to phenylalanine substitution
PREFIX faldo: <http://biohackathon.org/resource/faldo#> PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> PREFIX taxon: <http://purl.uniprot.org/taxonomy/> PREFIX up: <http://purl.uniprot.org/core/> SELECT ?protein ?annotation ?begin ?text WHERE { ?protein a up:Protein ; up:organism taxon:9606 ; up:annotation ?annotation . ?annotation a up:Natural_Variant_Annotation ; rdfs:comment ?text ; up:substitution ?substitution ; up:range/faldo:begin [ faldo:position ?begin ; faldo:reference ?sequence ] . ?sequence rdf:value ?value . BIND (substr(?value, ?begin, 1) as ?original) . FILTER(?original = 'Y' && ?substitution = 'F') . }Use11: Select all UniProt entries with annotated transmembrane regions and the regions' begin and end coordinates on the canonical sequence
SELECT ?protein ?begin ?end WHERE { ?protein a up:Protein . ?protein up:annotation ?annotation . ?annotation a up:Transmembrane_Annotation . ?annotation up:range ?range . ?range faldo:begin/faldo:position ?begin . ?range faldo:end/faldo:position ?end }Use11: Select all UniProt entries with annotated transmembrane regions and the regions' begin and end coordinates on the canonical sequence
PREFIX faldo: <http://biohackathon.org/resource/faldo#> PREFIX up: <http://purl.uniprot.org/core/> SELECT ?protein ?begin ?end WHERE { ?protein a up:Protein . ?protein up:annotation ?annotation . ?annotation a up:Transmembrane_Annotation . ?annotation up:range ?range . ?range faldo:begin/faldo:position ?begin . ?range faldo:end/faldo:position ?end }Use12: Select all UniProt entries that were integrated on the 30th of November 2010
SELECT ?protein WHERE { ?protein a up:Protein . ?protein up:created '2010-11-30'^^xsd:date }Use12: Select all UniProt entries that were integrated on the 30th of November 2010
PREFIX up: <http://purl.uniprot.org/core/> PREFIX xsd: <http://www.w3.org/2001/XMLSchema#> SELECT ?protein WHERE { ?protein a up:Protein . ?protein up:created '2010-11-30'^^xsd:date }Use13: Was any UniProt entry integrated on the 9th of January 2013
ASK WHERE { ?protein a up:Protein . ?protein up:created '2013-01-09'^^xsd:date }Use13: Was any UniProt entry integrated on the 9th of January 2013
PREFIX up: <http://purl.uniprot.org/core/> PREFIX xsd: <http://www.w3.org/2001/XMLSchema#> ASK WHERE { ?protein a up:Protein . ?protein up:created '2013-01-09'^^xsd:date }Use