SPARQL example query
15: Select all triples that relate to the EMBL CDS entry AA089367.1:
DESCRIBE <http://purl.uniprot.org/embl-cds/AAO89367.1>Use16: Select all triples that relate to the taxon that describes Homo sapiens in the named graph for taxonomy
PREFIX taxon: <http://purl.uniprot.org/taxonomy/> DESCRIBE taxon:9606 FROM <http://sparql.uniprot.org/taxonomy>Use17: Select the average number of cross-references to the PDB database of UniProt entries that have at least one cross-reference to the PDB database
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> PREFIX up: <http://purl.uniprot.org/core/> SELECT (AVG(?linksToPdbPerEntry) AS ?avgLinksToPdbPerEntry) WHERE { SELECT ?protein (COUNT(DISTINCT ?db) AS ?linksToPdbPerEntry) WHERE { ?protein a up:Protein . ?protein rdfs:seeAlso ?db . ?db up:database <http://purl.uniprot.org/database/PDB> . } GROUP BY ?protein ORDER BY DESC(?linksToPdbPerEntry) }Use18: Select the number of UniProt entries for each of the EC (Enzyme Commission) top level categories
PREFIX ec: <http://purl.uniprot.org/enzyme/> PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> PREFIX up: <http://purl.uniprot.org/core/> SELECT ?ecClass (COUNT(?protein) as ?size) WHERE { VALUES (?ecClass) {(ec:1.-.-.-) (ec:2.-.-.-) (ec:3.-.-.-) (ec:4.-.-.-) (ec:5.-.-.-) (ec:6.-.-.-) (ec:7.-.-.-)} . ?protein ( up:enzyme | up:domain/up:enzyme | up:component/up:enzyme ) ?enzyme . # Enzyme subclasses are materialized, do not use rdfs:subClassOf+ ?enzyme rdfs:subClassOf ?ecClass . } GROUP BY ?ecClass ORDER BY ?ecClassUse19: Find all Natural Variant Annotations if associated via an evidence tag to an article with a pubmed identifier
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> PREFIX up: <http://purl.uniprot.org/core/> SELECT ?accession ?annotation_acc ?pubmed WHERE { ?protein a up:Protein ; up:annotation ?annotation . ?annotation a up:Natural_Variant_Annotation . ?linkToEvidence rdf:object ?annotation ; up:attribution ?attribution . ?attribution up:source ?source . ?source a up:Journal_Citation . BIND(SUBSTR(STR(?protein),33) AS ?accession) BIND(IF(CONTAINS(STR(?annotation), "#SIP"), SUBSTR(STR(?annotation),33), SUBSTR(STR(?annotation),36))AS?annotation_acc) BIND(SUBSTR(STR(?source),35) AS ?pubmed) }Use20: Find how often an article in pubmed was used in an evidence tag in a human protein (ordered by most used to least)
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> PREFIX taxon: <http://purl.uniprot.org/taxonomy/> PREFIX up: <http://purl.uniprot.org/core/> SELECT ?source (COUNT(?attribution) AS ?attribitions) WHERE { ?protein a up:Protein ; up:organism taxon:9606 ; up:annotation ?annotation . ?linkToEvidence rdf:object ?annotation ; up:attribution ?attribution . ?attribution up:source ?source . ?source a up:Journal_Citation . } GROUP BY ?source ORDER BY DESC(COUNT(?attribution))Use21: Find where disease related proteins are known to be located in the cell
PREFIX skos: <http://www.w3.org/2004/02/skos/core#> PREFIX up: <http://purl.uniprot.org/core/> SELECT ?protein ?disease ?location_inside_cell ?cellcmpt WHERE { ?protein up:annotation ?diseaseAnnotation , ?subcellAnnotation . ?diseaseAnnotation up:disease/skos:prefLabel ?disease . ?subcellAnnotation up:locatedIn/up:cellularComponent ?cellcmpt . ?cellcmpt skos:prefLabel ?location_inside_cell . }Use22: For two accessions find the GO term labels and group them into GO process,function and component
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> PREFIX up: <http://purl.uniprot.org/core/> PREFIX GO:<http://purl.obolibrary.org/obo/GO_> SELECT (CONCAT(SUBSTR(STR(?protein), 33)) AS ?uniprot) (GROUP_CONCAT(?celtype; separator=";") AS ?celtypes) (GROUP_CONCAT(?biotype; separator=";") AS ?biotypes) (GROUP_CONCAT(?moltype; separator=";") AS ?moltypes) WHERE { VALUES (?ac) {("Q6GZX4") ("Q96375")} BIND (IRI(CONCAT("http://purl.uniprot.org/uniprot/",?ac)) AS ?protein) ?protein a up:Protein . ?protein up:classifiedWith ?goTerm . #Determine if the type is biological_process OPTIONAL { ?goTerm rdfs:subClassOf GO:0008150 . ?goTerm rdfs:label ?biotype . } #Determine if the type is cellular_component OPTIONAL { ?goTerm rdfs:subClassOf GO:0005575 . ?goTerm rdfs:label ?celtype . } #Determine if the type is molecular_function OPTIONAL { ?goTerm rdfs:subClassOf GO:0003674 . ?goTerm rdfs:label ?moltype . } #Filter out the uniprot keywords FILTER(bound(?biotype) || bound(?celtype) || bound(?moltype)) } GROUP BY ?proteinUse23: Number of reviewed entries (Swiss-Prot) that are related to kinase activity
PREFIX GO: <http://purl.obolibrary.org/obo/GO_> PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> PREFIX taxon: <http://purl.uniprot.org/taxonomy/> PREFIX up: <http://purl.uniprot.org/core/> SELECT (COUNT(DISTINCT(?protein)) AS ?pc) WHERE { ?protein rdf:type up:Protein ; up:reviewed true ; up:organism taxon:9606 ; up:classifiedWith|(up:classifiedWith/rdfs:subClassOf) GO:0016301 . }Use24: Find the release number of the uniprot data that is currently being queried
SELECT ?version FROM <https://sparql.uniprot.org/.well-known/void> WHERE { [] <http://purl.org/pav/version> ?version }Use25: Find any uniprot entry which has a name 'HLA class I histocompatibility antigen, B-73 alpha chain'
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> PREFIX up: <http://purl.uniprot.org/core/> SELECT ?protein ?anyKindOfName WHERE { ?protein a up:Protein . ?protein (up:recommendedName|up:alternativeName) ?structuredName . ?structuredName ?anyKindOfName "HLA class I histocompatibility antigen, B alpha chain" . ?anyKindOfName rdfs:subPropertyOf up:structuredNameType . }Use26: Find any uniprot entry, or an uniprot entries domain or component which has a name 'HLA class I histocompatibility antigen, B-73 alpha chain'
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> PREFIX up: <http://purl.uniprot.org/core/> SELECT ?protein ?anyKindOfName WHERE { ?protein a up:Protein . ?protein (up:recommendedName|up:alternativeName)|((up:domain|up:component)/(up:recommendedName|up:alternativeName)) ?structuredName . ?structuredName ?anyKindOfName "HLA class I histocompatibility antigen, B-73 alpha chain" . ?anyKindOfName rdfs:subPropertyOf up:structuredNameType . }Use27: Find all names associated with uniprot entry P05067, and if the name is associated with the entry it's domains or its components
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> PREFIX up: <http://purl.uniprot.org/core/> SELECT ?protein ?anyKindOfName ?names ?partType WHERE { BIND(<http://purl.uniprot.org/uniprot/P05067> AS ?protein) ?protein a up:Protein . { ?protein (up:recommendedName|up:alternativeName) ?structuredName . } UNION { VALUES(?partType){(up:domain) (up:component)} ?protein ?partType ?part . ?part (up:recommendedName|up:alternativeName) ?structuredName . } ?structuredName ?anyKindOfName ?names . ?anyKindOfName rdfs:subPropertyOf up:structuredNameType . }Use28: Get the list of uniprot entries for the chromosome of proteome UP000000625
PREFIX up: <http://purl.uniprot.org/core/> SELECT ?protein ?proteome WHERE { ?protein a up:Protein ; up:reviewed true ; up:proteome ?proteome . VALUES (?proteome) {(<http://purl.uniprot.org/proteomes/UP000000625#Chromosome>)} }Use29: Use ALLIE a service for Abbreviation / Long Form in Japanese and English to search in UniProt using Japanese.
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> PREFIX up: <http://purl.uniprot.org/core/> SELECT ?protein ?englishLabelStr WHERE { SERVICE <http://data.allie.dbcls.jp/sparql>{ ?x rdfs:label "アミロイド前駆体タンパク質"@ja ; rdfs:label ?englishLabel . FILTER(lang(?englishLabel) = "en") } BIND (STR(?englishLabel) AS ?englishLabelStr) ?protein a up:Protein . { ?protein (up:recommendedName|up:alternativeName) ?structuredName . } UNION { VALUES(?partType){(up:domain) (up:component)} ?protein ?partType ?part . ?part (up:recommendedName|up:alternativeName) ?structuredName . } ?structuredName ?anyKindOfName ?englishLabelStr . ?anyKindOfName rdfs:subPropertyOf up:structuredNameType . }Use30: Find UniProt entries with merged loci in Bordetella avium
PREFIX taxon: <http://purl.uniprot.org/taxonomy/> PREFIX up: <http://purl.uniprot.org/core/> SELECT ?protein (GROUP_CONCAT(?locusName; separator=',') AS ?locusNames) WHERE { ?protein a up:Protein ; up:organism taxon:360910 ; up:encodedBy ?gene . ?gene up:locusName ?locusName . } GROUP BY ?protein HAVING (COUNT(?locusName) > 1)Use31: Find UniParc records whose sequence point to the most database entries
PREFIX up: <http://purl.uniprot.org/core/> SELECT ?sequence ?entries WHERE { SELECT ?sequence (COUNT(?entry) AS ?entries) WHERE { GRAPH <http://sparql.uniprot.org/uniparc> { ?sequence up:sequenceFor ?entry . } } GROUP BY ?sequence } ORDER BY DESC(?entries)Use32: Find UniProtKB entries with more than 1 Topological domain annotation
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> PREFIX up: <http://purl.uniprot.org/core/> SELECT ?protein (GROUP_CONCAT(?comment; separator=", ") AS ?comments) WHERE { ?protein a up:Protein ; up:annotation ?annotation . ?annotation rdf:type up:Topological_Domain_Annotation ; rdfs:comment ?comment . } GROUP BY ?protein HAVING (COUNT(?annotation) > 1)Use33: Find longest comment text associated with a UniProtKB Natural Variant Annotation
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> PREFIX up: <http://purl.uniprot.org/core/> SELECT ?annotation ?comment WHERE { ?annotation a up:Natural_Variant_Annotation ; rdfs:comment ?comment . } ORDER BY DESC(STRLEN(?comment))Use34: Find the co-occurence count of Topological Domain comment text in UniProtKB entries
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> PREFIX up: <http://purl.uniprot.org/core/> SELECT ?comment1 ?comment2 (COUNT(?comment1) AS ?count1) WHERE { ?protein a up:Protein ; up:annotation ?annotation1 , ?annotation2 . ?annotation1 rdf:type up:Topological_Domain_Annotation ; rdfs:comment ?rawComment1 . ?annotation2 rdf:type up:Topological_Domain_Annotation ; rdfs:comment ?rawComment2 . BIND(IF(contains(?rawComment1, ';'), STRBEFORE(?rawComment1,';'), ?rawComment1) AS ?comment1) BIND(IF(contains(?rawComment2, ';'), STRBEFORE(?rawComment2,';'), ?rawComment2) AS ?comment2) FILTER(?annotation1 != ?annotation2) } GROUP BY ?comment1 ?comment2 ORDER BY DESC(COUNT(?comment1))Use35: Find the similar proteins for UniProtKB entry P05067 sorted by UniRef cluster identity
PREFIX uniprotkb: <http://purl.uniprot.org/uniprot/> PREFIX up: <http://purl.uniprot.org/core/> SELECT ?similar ?identity FROM <http://sparql.uniprot.org/uniref> FROM <http://sparql.uniprot.org/uniprot> WHERE { BIND (uniprotkb:P05607 AS ?protein) ?cluster up:member ?member ; up:member/up:sequenceFor ?protein; up:identity ?identity . ?member up:sequenceFor ?similar . FILTER(!sameTerm(?similar, ?protein)) } ORDER BY DESC(?identity)Use36: Find the orthologous proteins for UniProtKB entry P05067 using the OrthoDB database
PREFIX orthodb: <http://purl.orthodb.org/> PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> PREFIX skos: <http://www.w3.org/2004/02/skos/core#> PREFIX uniprotkb: <http://purl.uniprot.org/uniprot/> PREFIX up: <http://purl.uniprot.org/core/> SELECT ?protein ?orthoGroup ?scientificName ?functionComment ?prefferedGeneName ((STRLEN(?value) - ?medianLength) as ?deviationFromMedianLength) WHERE { uniprotkb:P05067 a up:Protein ; up:organism/up:scientificName ?scientificName ; rdfs:seeAlso ?orthoGroup ; up:encodedBy/skos:prefLabel ?prefferedGeneName ; up:sequence/rdf:value ?value . OPTIONAL { ?protein up:annotation ?functionAnnotation . ?functionAnnotation a up:Function_Annotation ; rdfs:comment ?functionComment . } SERVICE <https://sparql.orthodb.org/sparql>{ ?orthoGroup orthodb:ogMedianProteinLength ?medianLength . ?orthoGroup orthodb:hasMember ?xref . ?xref orthodb:xref/orthodb:xrefResource uniprotkb:P05067 . } }Use37: Find the human protein which contains an Epitope VSTQ, where T is a phosporylated threonine
PREFIX faldo: <http://biohackathon.org/resource/faldo#> PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> PREFIX taxon: <http://purl.uniprot.org/taxonomy/> PREFIX up: <http://purl.uniprot.org/core/> SELECT ?protein ?comment ?begin ?end WHERE { ?protein a up:Protein ; up:organism taxon:9606 ; up:sequence ?sequence ; up:annotation ?annotation . ?annotation a up:Modified_Residue_Annotation ; rdfs:comment ?comment ; up:range ?range . ?range faldo:begin [ faldo:position ?begin ; faldo:reference ?sequence ] ; faldo:end [ faldo:position ?end ; faldo:reference ?sequence ] . ?sequence rdf:value ?aaSequence . FILTER (SUBSTR(?aaSequence, ?begin -2 , 4) = "VSTQ") FILTER (CONTAINS(?comment, "Phosphothreonine")) }Use38: For the human entry P05067 (Amyloid-beta precursor protein) find the gene start ends in WikiData
PREFIX p: <http://www.wikidata.org/prop/> PREFIX pq: <http://www.wikidata.org/prop/qualifier/> PREFIX ps: <http://www.wikidata.org/prop/statement/> PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> PREFIX uniprotkb: <http://purl.uniprot.org/uniprot/> PREFIX wdt: <http://www.wikidata.org/prop/direct/> SELECT ?protein ?begin ?end ?chromosome ?assembly WHERE { { BIND(uniprotkb:P05067 AS ?proteinIRI) BIND (SUBSTR(STR(?proteinIRI), STRLEN(STR(uniprotkb:))+1) AS ?protein) } SERVICE <https://query.wikidata.org/sparql> { ?wp wdt:P352 ?protein ; wdt:P702 ?wg . ?wg p:P644 ?wgss . ?wgss ps:P644 ?begin ; pq:P1057/wdt:P1813 ?chromosome ; pq:P659/rdfs:label ?assembly . ?wg p:P645 ?wgse . ?wgse ps:P645 ?end ; pq:P1057/wdt:P1813 ?chromosome ; pq:P659/rdfs:label ?assembly . FILTER(lang(?assembly) = "en") } }Use39: Retrieve entries and Catalytic activities in the reviewed (Swiss-Prot) section that have experimental evidences,
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> PREFIX up: <http://purl.uniprot.org/core/> SELECT ?protein ?rhea WHERE { # ECO 269 is experimental evidence BIND (<http://purl.obolibrary.org/obo/ECO_0000269> as ?evidence) GRAPH <http://sparql.uniprot.org/uniprot> { ?protein up:reviewed true ; up:annotation ?a ; up:attribution ?attribution . ?a a up:Catalytic_Activity_Annotation ; up:catalyticActivity ?ca . ?ca up:catalyzedReaction ?rhea . [] rdf:subject ?a ; rdf:predicate up:catalyticActivity ; rdf:object ?ca ; up:attribution ?attribution . ?attribution up:evidence ?evidence . } }Use40: Retrieve human enzymes that metabolize sphingolipids and are annotated in ChEMBL
PREFIX CHEBI: <http://purl.obolibrary.org/obo/CHEBI_> PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> PREFIX rh: <http://rdf.rhea-db.org/> PREFIX taxon: <http://purl.uniprot.org/taxonomy/> PREFIX up: <http://purl.uniprot.org/core/> SELECT DISTINCT ?protein ?chemblEntry WHERE { SERVICE <https://sparql.rhea-db.org/sparql> { ?rhea rdfs:subClassOf rh:Reaction ; rh:side/rh:contains/rh:compound/rh:chebi/rdfs:subClassOf+ CHEBI:26739 . } ?ca up:catalyzedReaction ?rhea . ?protein up:annotation/up:catalyticActivity ?ca ; up:organism taxon:9606 ; rdfs:seeAlso ?chemblEntry . ?chemblEntry up:database <http://purl.uniprot.org/database/ChEMBL> . }Use