SPARQL example query
26: Find any uniprot entry, or an uniprot entries domain or component which has a name 'HLA class I histocompatibility antigen, B-73 alpha chain'
SELECT ?protein ?anyKindOfName WHERE { ?protein a up:Protein . ?protein (up:recommendedName|up:alternativeName)|((up:domain|up:component)/(up:recommendedName|up:alternativeName)) ?structuredName . ?structuredName ?anyKindOfName "HLA class I histocompatibility antigen, B-73 alpha chain" . ?anyKindOfName rdfs:subPropertyOf up:structuredNameType . }Use27: Find all names associated with uniprot entry P05067, and if the name is associated with the entry it's domains or its components
SELECT ?protein ?anyKindOfName ?names ?partType WHERE { BIND(<http://purl.uniprot.org/uniprot/P05067> AS ?protein) ?protein a up:Protein . { ?protein (up:recommendedName|up:alternativeName) ?structuredName . } UNION { VALUES(?partType){(up:domain) (up:component)} ?protein ?partType ?part . ?part (up:recommendedName|up:alternativeName) ?structuredName . } ?structuredName ?anyKindOfName ?names . ?anyKindOfName rdfs:subPropertyOf up:structuredNameType . }Use28: Get the list of uniprot entries for the chromosome of proteome UP000000625
SELECT ?protein ?proteome WHERE { ?protein a up:Protein ; up:reviewed true ; up:proteome ?proteome . VALUES (?proteome) {(<http://purl.uniprot.org/proteomes/UP000000625#Chromosome>)} }Use29: Use ALLIE a service for Abbreviation / Long Form in Japanese and English to search in UniProt using Japanese.
SELECT ?protein ?englishLabelStr WHERE { SERVICE<http://data.allie.dbcls.jp/sparql>{ ?x rdfs:label "アミロイド前駆体タンパク質"@ja ; rdfs:label ?englishLabel . FILTER(lang(?englishLabel) = "en") } BIND (STR(?englishLabel) AS ?englishLabelStr) ?protein a up:Protein . { ?protein (up:recommendedName|up:alternativeName) ?structuredName . } UNION { VALUES(?partType){(up:domain) (up:component)} ?protein ?partType ?part . ?part (up:recommendedName|up:alternativeName) ?structuredName . } ?structuredName ?anyKindOfName ?englishLabelStr . ?anyKindOfName rdfs:subPropertyOf up:structuredNameType . }Use30: Find UniProt entries with merged loci in Bordetella avium
SELECT ?protein (GROUP_CONCAT(?locusName; separator=',') AS ?locusNames) WHERE { ?protein a up:Protein ; up:organism taxon:360910 ; up:encodedBy ?gene . ?gene up:locusName ?locusName . } GROUP BY ?protein HAVING (COUNT(?locusName) > 1)Use31: Find UniParc records whose sequence point to the most database entries
SELECT ?sequence ?entries WHERE { SELECT ?sequence (COUNT(?entry) AS ?entries) WHERE { GRAPH <http://sparql.uniprot.org/uniparc> { ?sequence up:sequenceFor ?entry . } } GROUP BY ?sequence } ORDER BY DESC(?entries)Use32: Find UniProtKB entries with more than 1 Topological domain annotation
SELECT ?protein (GROUP_CONCAT(?comment; separator=", ") AS ?comments) WHERE { ?protein a up:Protein ; up:annotation ?annotation . ?annotation rdf:type up:Topological_Domain_Annotation ; rdfs:comment ?comment . } GROUP BY ?protein HAVING (COUNT(?annotation) > 1)Use33: Find longest comment text associated with a UniProtKB Natural Variant Annotation
SELECT ?annotation ?comment WHERE { ?annotation a up:Natural_Variant_Annotation ; rdfs:comment ?comment . } ORDER BY DESC(STRLEN(?comment))Use34: Find the co-occurence count of Topological Domain comment text in UniProtKB entries
SELECT ?comment1 ?comment2 (COUNT(?comment1) AS ?count1) WHERE { ?protein a up:Protein ; up:annotation ?annotation1 , ?annotation2 . ?annotation1 rdf:type up:Topological_Domain_Annotation ; rdfs:comment ?rawComment1 . ?annotation2 rdf:type up:Topological_Domain_Annotation ; rdfs:comment ?rawComment2 . BIND(IF(contains(?rawComment1, ';'), STRBEFORE(?rawComment1,';'), ?rawComment1) AS ?comment1) BIND(IF(contains(?rawComment2, ';'), STRBEFORE(?rawComment2,';'), ?rawComment2) AS ?comment2) FILTER(?annotation1 != ?annotation2) } GROUP BY ?comment1 ?comment2 ORDER BY DESC(COUNT(?comment1))Use35: Find the similar proteins for UniProtKB entry P05067 sorted by UniRef cluser identity
SELECT ?similar ?identity FROM <http://sparql.uniprot.org/uniref> FROM <http://sparql.uniprot.org/uniprot> WHERE { BIND (uniprotkb:P05607 AS ?protein) ?cluster up:member ?member ; up:member/up:sequenceFor ?protein; up:identity ?identity . ?member up:sequenceFor ?similar . FILTER(!sameTerm(?similar, ?protein)) } ORDER BY DESC(?identity)Use36: Find the orthologous proteins for UniProtKB entry P05067 using the OrthoDB database
SELECT ?protein ?orthoGroup ?scientificName ?functionComment ?prefferedGeneName ((STRLEN(?value) - ?medianLength) as ?deviationFromMedianLength) WHERE { uniprotkb:P05067 a up:Protein ; up:organism/up:scientificName ?scientificName ; rdfs:seeAlso ?orthoGroup ; up:encodedBy/skos:prefLabel ?prefferedGeneName ; up:sequence/rdf:value ?value . OPTIONAL { ?protein up:annotation ?functionAnnotation . ?functionAnnotation a up:Function_Annotation ; rdfs:comment ?functionComment . } SERVICE<http://sparql.orthodb.org/sparql>{ ?orthoGroup orthodb:ogMedianProteinLength ?medianLength . ?orthoGroup orthodb:hasMember ?xref . ?xref orthodb:xref/orthodb:xrefResource uniprotkb:P05067 . } }Use37: Find the human protein which contains an Epitope VSTQ, where T is a phosporylated threonine
SELECT ?protein ?comment ?begin ?end WHERE { ?protein a up:Protein ; up:organism taxon:9606 ; up:sequence ?sequence ; up:annotation ?annotation . ?annotation a up:Modified_Residue_Annotation ; rdfs:comment ?comment ; up:range ?range . ?range faldo:begin [ faldo:position ?begin ; faldo:reference ?sequence ] ; faldo:end [ faldo:position ?end ; faldo:reference ?sequence ] . ?sequence rdf:value ?aaSequence . FILTER (SUBSTR(?aaSequence, ?begin -2 , 4) = "VSTQ") FILTER (CONTAINS(?comment, "Phosphothreonine")) }Use38: For the human entry P05067 (Amyloid-beta precursor protein) find the gene start ends in WikiData
SELECT ?protein ?begin ?end ?chromosome ?assembly WHERE { { BIND(uniprotkb:P05067 AS ?proteinIRI) BIND (SUBSTR(STR(?proteinIRI), STRLEN(STR(uniprotkb:))+1) AS ?protein) } SERVICE <https://query.wikidata.org/sparql> { ?wp wdt:P352 ?protein ; wdt:P702 ?wg . ?wg p:P644 ?wgss . ?wgss ps:P644 ?begin ; pq:P1057/wdt:P1813 ?chromosome ; pq:P659/rdfs:label ?assembly . ?wg p:P645 ?wgse . ?wgse ps:P645 ?end ; pq:P1057/wdt:P1813 ?chromosome ; pq:P659/rdfs:label ?assembly . FILTER(lang(?assembly) = "en") } }Use39: Retrieve entries and Catalytic activities in the reviewed (Swiss-Prot) section that have experimental evidences,
SELECT ?protein ?rhea WHERE { # ECO 269 is experimental evidence BIND (<http://purl.obolibrary.org/obo/ECO_0000269> as ?evidence) ?protein up:reviewed true ; up:organism taxon:9606 ; up:classifiedWith keywords:1185 ; up:annotation ?a ; up:attribution ?attribution . ?a a up:Catalytic_Activity_Annotation ; up:catalyticActivity ?ca . ?ca up:catalyzedReaction ?rhea . [] rdf:subject ?a ; rdf:predicate up:catalyticActivity ; rdf:object ?ca ; up:attribution ?attribution . ?attribution up:evidence ?evidence . }Use40: Retrieve human enzymes that metabolize sphingolipids and are annotated in ChEMBL
SELECT DISTINCT ?protein ?chemblEntry WHERE { SERVICE <https://sparql.rhea-db.org/sparql> { ?rhea rdfs:subClassOf rh:Reaction ; rh:side/rh:contains/rh:compound/rh:chebi/rdfs:subClassOf+ CHEBI:26739 . } ?ca up:catalyzedReaction ?rhea . ?protein up:annotation/up:catalyticActivity ?ca ; up:organism taxon:9606 ; rdfs:seeAlso ?chemblEntry . ?chemblEntry up:database <http://purl.uniprot.org/database/ChEMBL> . }Use41: Retrieve entries with sequences that are composed of fragments
SELECT DISTINCT ?protein WHERE { ?protein a up:Protein ; up:sequence ?sequence . MINUS { ?sequence up:fragment [] } }Use42: Connect patents cited in UniProtKB with those in the patent database at EPO via publication number.
SELECT ?citation ?patent ?application ?applicationNo WHERE { ?citation a up:Patent_Citation ; skos:exactMatch ?patent . FILTER(CONTAINS(STR(?patent), 'EP')) BIND(SUBSTR(STR(?patent), 35) AS ?applicationNo) SERVICE<https://data.epo.org/linked-data/query>{ ?application patent:publicationNumber ?applicationNo } }Use43: Connect patents cited in UniProtKB with those in the patent database at EPO via publication number, whose grant date is more than twenty years in the past.
PREFIX rdf:<http://www.w3.org/1999/02/22-rdf-syntax-ns#> PREFIX rdfs:<http://www.w3.org/2000/01/rdf-schema#> PREFIX skos:<http://www.w3.org/2004/02/skos/core#> PREFIX up:<http://purl.uniprot.org/core/> prefix patent: <http://data.epo.org/linked-data/def/patent/> SELECT ?grantDate ?patent ?application ?applicationNo WHERE { ?citation a up:Patent_Citation ; skos:exactMatch ?patent . BIND(SUBSTR(STR(?patent), 35) AS ?applicationNo) BIND(SUBSTR(STR(?patent), 33, 2) AS ?countryCode) SERVICE<https://data.epo.org/linked-data/query>{ ?publication patent:publicationNumber ?applicationNo ; patent:application ?application . ?application patent:grantDate ?grantDate . } BIND((year(now()) - 20) AS ?thisYearMinusTwenty) BIND(year(?grantDate) AS ?grantYear) FILTER(?grantYear < ?thisYearMinusTwenty) } ORDER BY ?grantYearUse44: Find the Rhea and Interpro combinations in UniProtKB entries.
SELECT ?interpro ?rhea FROM <http://sparql.uniprot.org/uniprot> WHERE { ?protein up:reviewed true . ?protein up:annotation ?annotation . ?annotation up:catalyticActivity ?rhea . ?protein rdfs:seeAlso ?interpro . ?interpro up:database <http://purl.uniprot.org/database/InterPro> . } ORDER BY ?rheaUse45: Retrieve drugs that target human enzymes involved in sterol metabolism (federated query with WikiData and Rhea).
SELECT DISTINCT ?protein ?proteinFullName ?wikiChemical ?wikiChemicalLabel ?medicalConditionTreatedLabel WHERE { # ChEBI: retrieve members of the ChEBI class ChEBI:15889 (sterol) # Rhea: retrieve the reactions involving these ChEBI as participants SERVICE <https://sparql.rhea-db.org/sparql> { ?reaction rdfs:subClassOf rh:Reaction ; rh:status rh:Approved ; rh:side ?reactionSide . ?reactionSide rh:contains ?participant . ?participant rh:compound ?compound { ?compound rh:chebi ?chebi . ?chebi (rdfs:subClassOf)+ CHEBI:15889 } UNION { ?compound rh:chebi ?chebi . ?chebi2 rdfs:subClassOf ?chebiRestriction . ?chebiRestriction a owl:Restriction ; owl:onProperty chebihash:has_major_microspecies_at_pH_7_3 ; owl:someValuesFrom ?chebi . ?chebi2 (rdfs:subClassOf)+ CHEBI:15889 } } # UniProt: retrieve the human (taxid:9606) enzymes catalyzing these Rhea reactions ?ca up:catalyzedReaction ?reaction . ?a up:catalyticActivity ?ca . ?proteinIRI up:annotation ?a ; up:organism taxon:9606 ; up:recommendedName ?proteinRecName . ?proteinRecName up:fullName ?proteinFullName . # Find drugs in wikidata that interact with the UniProt Proteins BIND (SUBSTR(STR(?proteinIRI), STRLEN(STR(uniprotkb:))+1) AS ?protein) SERVICE<https://query.wikidata.org/sparql> { ?wp wdt:P352 ?protein . ?wikiChemical wdt:P129 ?wp . # Physically interacts with ?wikiChemical rdfs:label ?wikiChemicalLabel . ?wikiChemical wdt:P2175 ?wmc . # Medical conndition treated ?wmc rdfs:label ?medicalConditionTreatedLabel . FILTER(lang(?medicalConditionTreatedLabel) = 'en') FILTER(lang(?wikiChemicalLabel) = 'en') } }Use46: Retrieve images of 'Anas' (Ducks) in the European Environmental Agency databases (federated query).
SELECT ?taxon ?ncbiTaxid ?eunisTaxon ?eunisname ?image WHERE { GRAPH<http://sparql.uniprot.org/taxonomy>{ ?taxon a up:Taxon . ?taxon rdfs:subClassOf taxon:8835 . BIND(strafter(str(?taxon), 'onomy/') AS ?ncbiTaxid) } SERVICE <https://semantic.eea.europa.eu/sparql> { ?eunisTaxon a eunisSpecies:SpeciesSynonym ; eunisSpecies:binomialName ?eunisname ; eunisSpecies:sameSpeciesNCBI ?ncbiTaxid ; <http://xmlns.com/foaf/0.1/depiction> ?image . } }Use49: Retrieve the UniProt proteins, their catalyzed Rhea reactions, their encoding genes (Ensembl) and the anatomic entities where the genes are expressed (UBERON anatomic entites from Bgee expression data resource).
SELECT DISTINCT ?protein ?ensemblGene ?reaction ?anatomicEntityLabel ?anatomicEntity WHERE { # federated query to Rhea enadpoint { SELECT DISTINCT ?reaction WHERE { SERVICE <https://sparql.rhea-db.org/sparql> { ?reaction rdfs:subClassOf rh:Reaction . ?reaction rh:equation ?reactionEquation . ?reaction rh:side ?reactionSide . ?reactionSide rh:contains ?participant . ?participant rh:compound ?compound . # compound constraint (CHEBI:16113 == cholesterol) ?compound rh:chebi CHEBI:16113 . } } } # taxonomy constraint (taxon:9606 == Homo sapiens) ?protein up:organism taxon:9606 . ?protein up:annotation ?a . ?a a up:Catalytic_Activity_Annotation . ?a up:catalyticActivity ?ca . ?ca up:catalyzedReaction ?reaction . ?protein rdfs:seeAlso / up:transcribedFrom ?ensemblGene . # federated query to Bgee (expression data) SERVICE <http://biosoda.expasy.org/rdf4j-server/repositories/bgeelight> { ?gene genex:isExpressedIn ?anatomicEntity . ?gene lscr:xrefEnsemblGene ?ensemblGene . ?anatomicEntity rdfs:label ?anatomicEntityLabel . } }Use52: Retrieve drugs that target human enzymes involved in sterol metabolism (federated query with Rhea and ChEMBL via IDSM/Elixir czech republic).
SELECT DISTINCT ?protein ?proteinFullName ?activityType ?standardActivityValue ?standardActivityUnit ?chemblMolecule ?chemlbMoleculePrefLabel WHERE { # ChEBI: retrieve members of the ChEBI class ChEBI:15889 (sterol) # Rhea: retrieve the reactions involving these ChEBI as participants SERVICE <https://sparql.rhea-db.org/sparql> { ?reaction rdfs:subClassOf rh:Reaction ; rh:status rh:Approved ; rh:side ?reactionSide . ?reactionSide rh:contains ?participant . ?participant rh:compound ?compound { ?compound rh:chebi ?chebi . ?chebi (rdfs:subClassOf)+ CHEBI:15889 } UNION { ?compound rh:chebi ?chebi . ?chebi2 rdfs:subClassOf ?chebiRestriction . ?chebiRestriction a owl:Restriction ; owl:onProperty chebihash:has_major_microspecies_at_pH_7_3 ; owl:someValuesFrom ?chebi . ?chebi2 (rdfs:subClassOf)+ CHEBI:15889 } } # UniProt: retrieve the human (taxid:9606) enzymes catalyzing these Rhea reactions ?ca up:catalyzedReaction ?reaction . ?a up:catalyticActivity ?ca . ?protein up:annotation ?a ; up:organism taxon:9606 ; up:recommendedName ?proteinRecName . ?proteinRecName up:fullName ?proteinFullName . # Find drugs in wikidata that interact with the UniProt Proteins # ChEMBL: retrieve the corresponding targets and with drugs in clinical phase 4 # Via https://idsm.elixir-czech.cz/sparql/ SERVICE <https://idsm.elixir-czech.cz/sparql/endpoint/idsm> { ?activity a cco:Activity ; cco:hasMolecule ?chemblMolecule ; cco:hasAssay ?assay ; cco:standardType ?activityType ; cco:standardValue ?standardActivityValue ; cco:standardUnits ?standardActivityUnit . ?chemblMolecule cco:highestDevelopmentPhase ?highestDevelopmentPhase ; rdfs:label ?chemblMoleculeLabel ; skos:prefLabel ?chemlbMoleculePrefLabel . FILTER (?highestDevelopmentPhase > 3) ?assay cco:hasTarget ?target . ?target cco:hasTargetComponent/cco:targetCmptXref ?protein . ?protein a cco:UniprotRef . } }Use53: Find mouse homologs in OMABrowser of human enzymes that catalyze reactions involving Sterols (CHEBI:15889). Federating with Rhea-DB and OMABrowser.
SELECT DISTINCT ?chebi ?reaction ?humanProtein ?mouseProtein ?cluster WHERE { SERVICE <https://sparql.rhea-db.org/sparql> { ?reaction rdfs:subClassOf rh:Reaction . ?reaction rh:side/rh:contains/rh:compound ?compound . ?compound rh:chebi ?chebi . ?chebi rdfs:subClassOf* CHEBI:15889 } ?humanProtein up:organism taxon:9606 . ?humanProtein up:annotation ?a . ?a a up:Catalytic_Activity_Annotation . ?a up:catalyticActivity ?ca . ?ca up:catalyzedReaction ?reaction . SERVICE <https://sparql.omabrowser.org/sparql> { ?cluster a orth:ParalogsCluster . ?cluster orth:hasHomologousMember ?node1 , ?node2 . ?node1 orth:hasHomologousMember* ?orthoProtein1 . ?node2 orth:hasHomologousMember* ?orthoProtein2 . ?orthoProtein1 lscr:xrefUniprot ?mouseProtein . ?orthoProtein2 lscr:xrefUniprot ?humanProtein . # inTaxon mouse ?orthoProtein1 orth:organism/<http://purl.obolibrary.org/obo/RO_0002162> taxon:10090 . } }Use54: Proteins with annotated binding sites for ligands similar to heme
SELECT ?protein ?mnemonic ?proteinName ?ligandSimilarityScore ?ligand WHERE { SERVICE <https://idsm.elixir-czech.cz/sparql/endpoint/chebi> { ?ssc sachem:compound ?ligand; sachem:score ?ligandSimilarityScore ; sachem:similaritySearch ?sss . # Smiles of Heme ?sss sachem:query "CC1=C(CCC([O-])=O)C2=[N+]3C1=Cc1c(C)c(C=C)c4C=C5C(C)=C(C=C)C6=[N+]5[Fe-]3(n14)n1c(=C6)c(C)c(CCC([O-])=O)c1=C2"; sachem:cutoff "8e-1"^^xsd:double ; sachem:aromaticityMode sachem:aromaticityDetect ; sachem:similarityRadius 1 ; sachem:tautomerMode sachem:ignoreTautomers . } ?protein up:mnemonic ?mnemonic ; up:recommendedName/up:fullName ?proteinName ; up:annotation ?annotation . ?annotation a up:Binding_Site_Annotation ; up:ligand/rdfs:subClassOf ?ligand . } ORDER BY DESC(?ligandSimilarityScore)Use55: Number of proteins with annotated binding sites for metals or metal sulfur clusters (and experimental evidence for the binding)
SELECT ?ligand ?ligandName (COUNT(DISTINCT ?protein) as ?entries) WHERE { ?protein up:annotation ?annotation . VALUES ?evs { obo:ECO_0000269 obo:ECO_0007744 } . VALUES ?chebids { CHEBI:25213 CHEBI:25214 } . ?st rdf:subject ?protein ; rdf:predicate up:annotation ; rdf:object ?annotation ; up:attribution/up:evidence ?evs . ?annotation up:ligand/rdfs:subClassOf ?ligand . ?ligand rdfs:subClassOf+ ?chebids ; rdfs:label ?ligandName . } GROUP BY ?ligand ?ligandName ORDER BY DESC(?entries)Use