44: Find the Rhea and Interpro combinations in UniProtKB entries.

SELECT ?interpro ?rhea FROM <http://sparql.uniprot.org/uniprot> WHERE { ?protein up:reviewed true . ?protein up:annotation ?annotation . ?annotation up:catalyticActivity ?rhea . ?protein rdfs:seeAlso ?interpro . ?interpro up:database <http://purl.uniprot.org/database/InterPro> . } ORDER BY ?rheaUse

45: Retrieve drugs that target human enzymes involved in sterol metabolism (federated query with WikiData and Rhea).

SELECT DISTINCT ?protein ?proteinFullName ?wikiChemical ?wikiChemicalLabel ?medicalConditionTreatedLabel WHERE { # ChEBI: retrieve members of the ChEBI class ChEBI:15889 (sterol) # Rhea: retrieve the reactions involving these ChEBI as participants SERVICE <https://sparql.rhea-db.org/sparql> { ?reaction rdfs:subClassOf rh:Reaction ; rh:status rh:Approved ; rh:side ?reactionSide . ?reactionSide rh:contains ?participant . ?participant rh:compound ?compound { ?compound rh:chebi ?chebi . ?chebi (rdfs:subClassOf)+ CHEBI:15889 } UNION { ?compound rh:chebi ?chebi . ?chebi2 rdfs:subClassOf ?chebiRestriction . ?chebiRestriction a owl:Restriction ; owl:onProperty chebihash:has_major_microspecies_at_pH_7_3 ; owl:someValuesFrom ?chebi . ?chebi2 (rdfs:subClassOf)+ CHEBI:15889 } } # UniProt: retrieve the human (taxid:9606) enzymes catalyzing these Rhea reactions ?ca up:catalyzedReaction ?reaction . ?a up:catalyticActivity ?ca . ?proteinIRI up:annotation ?a ; up:organism taxon:9606 ; up:recommendedName ?proteinRecName . ?proteinRecName up:fullName ?proteinFullName . # Find drugs in wikidata that interact with the UniProt Proteins BIND (SUBSTR(STR(?proteinIRI), STRLEN(STR(uniprotkb:))+1) AS ?protein) SERVICE<https://query.wikidata.org/sparql> { ?wp wdt:P352 ?protein . ?wikiChemical wdt:P129 ?wp . # Physically interacts with ?wikiChemical rdfs:label ?wikiChemicalLabel . ?wikiChemical wdt:P2175 ?wmc . # Medical conndition treated ?wmc rdfs:label ?medicalConditionTreatedLabel . FILTER(lang(?medicalConditionTreatedLabel) = 'en') FILTER(lang(?wikiChemicalLabel) = 'en') } }Use

46: Retrieve images of 'Anas' (Ducks) from the European Environmental Agency databases (federated query).

SELECT ?taxon ?ncbiTaxid ?eunisTaxon ?eunisName ?image WHERE { GRAPH <http://sparql.uniprot.org/taxonomy> { ?taxon a up:Taxon . # Taxon subclasses are materialized, do not use rdfs:subClassOf+ ?taxon rdfs:subClassOf taxon:8835 . BIND(strafter(str(?taxon), 'onomy/') AS ?ncbiTaxid) } SERVICE <https://semantic.eea.europa.eu/sparql> { ?eunisTaxon a eunisSpecies:SpeciesSynonym ; eunisSpecies:binomialName ?eunisName ; eunisSpecies:sameSpeciesNCBI ?ncbiTaxid ; <http://xmlns.com/foaf/0.1/depiction> ?image . } }Use

49: Retrieve the UniProt proteins, their catalyzed Rhea reactions, their encoding genes (Ensembl) and the anatomic entities where the genes are expressed (UBERON anatomic entites from Bgee expression data resource).

SELECT DISTINCT ?protein ?ensemblGene ?reaction ?anatomicEntityLabel ?anatomicEntity WHERE { # federated query to Rhea enadpoint { SELECT DISTINCT ?reaction WHERE { SERVICE <https://sparql.rhea-db.org/sparql> { ?reaction rdfs:subClassOf rh:Reaction . ?reaction rh:equation ?reactionEquation . ?reaction rh:side ?reactionSide . ?reactionSide rh:contains ?participant . ?participant rh:compound ?compound . # compound constraint (CHEBI:16113 == cholesterol) ?compound rh:chebi CHEBI:16113 . } } } # taxonomy constraint (taxon:9606 == Homo sapiens) ?protein up:organism taxon:9606 . ?protein up:annotation ?a . ?a a up:Catalytic_Activity_Annotation . ?a up:catalyticActivity ?ca . ?ca up:catalyzedReaction ?reaction . ?protein rdfs:seeAlso / up:transcribedFrom ?ensemblGene . # federated query to Bgee (expression data) SERVICE <http://biosoda.expasy.org/rdf4j-server/repositories/bgeelight> { ?gene genex:isExpressedIn ?anatomicEntity . ?gene lscr:xrefEnsemblGene ?ensemblGene . ?anatomicEntity rdfs:label ?anatomicEntityLabel . } }Use

52: Retrieve drugs that target human enzymes involved in sterol metabolism (federated query with Rhea and ChEMBL via IDSM/Elixir czech republic).

SELECT DISTINCT ?protein ?proteinFullName ?activityType ?standardActivityValue ?standardActivityUnit ?chemblMolecule ?chemlbMoleculePrefLabel WHERE { # ChEBI: retrieve members of the ChEBI class ChEBI:15889 (sterol) # Rhea: retrieve the reactions involving these ChEBI as participants SERVICE <https://sparql.rhea-db.org/sparql> { ?reaction rdfs:subClassOf rh:Reaction ; rh:status rh:Approved ; rh:side ?reactionSide . ?reactionSide rh:contains ?participant . ?participant rh:compound ?compound { ?compound rh:chebi ?chebi . ?chebi (rdfs:subClassOf)+ CHEBI:15889 } UNION { ?compound rh:chebi ?chebi . ?chebi2 rdfs:subClassOf ?chebiRestriction . ?chebiRestriction a owl:Restriction ; owl:onProperty chebihash:has_major_microspecies_at_pH_7_3 ; owl:someValuesFrom ?chebi . ?chebi2 (rdfs:subClassOf)+ CHEBI:15889 } } # UniProt: retrieve the human (taxid:9606) enzymes catalyzing these Rhea reactions ?ca up:catalyzedReaction ?reaction . ?a up:catalyticActivity ?ca . ?protein up:annotation ?a ; up:organism taxon:9606 ; up:recommendedName ?proteinRecName . ?proteinRecName up:fullName ?proteinFullName . # Find drugs in wikidata that interact with the UniProt Proteins # ChEMBL: retrieve the corresponding targets and with drugs in clinical phase 4 # Via https://idsm.elixir-czech.cz/sparql/ SERVICE <https://idsm.elixir-czech.cz/sparql/endpoint/idsm> { ?activity a cco:Activity ; cco:hasMolecule ?chemblMolecule ; cco:hasAssay ?assay ; cco:standardType ?activityType ; cco:standardValue ?standardActivityValue ; cco:standardUnits ?standardActivityUnit . ?chemblMolecule cco:highestDevelopmentPhase ?highestDevelopmentPhase ; rdfs:label ?chemblMoleculeLabel ; skos:prefLabel ?chemlbMoleculePrefLabel . FILTER (?highestDevelopmentPhase > 3) ?assay cco:hasTarget ?target . ?target cco:hasTargetComponent/cco:targetCmptXref ?protein . ?protein a cco:UniprotRef . } }Use

53: Find mouse homologs in OMABrowser of human enzymes that catalyze reactions involving Sterols (CHEBI:15889). Federating with Rhea-DB and OMABrowser.

SELECT DISTINCT ?chebi ?reaction ?humanProtein ?mouseProtein ?cluster WHERE { SERVICE <https://sparql.rhea-db.org/sparql> { ?reaction rdfs:subClassOf rh:Reaction . ?reaction rh:side/rh:contains/rh:compound ?compound . ?compound rh:chebi ?chebi . ?chebi rdfs:subClassOf* CHEBI:15889 } ?humanProtein up:organism taxon:9606 . ?humanProtein up:annotation ?a . ?a a up:Catalytic_Activity_Annotation . ?a up:catalyticActivity ?ca . ?ca up:catalyzedReaction ?reaction . SERVICE <https://sparql.omabrowser.org/sparql> { ?cluster a orth:ParalogsCluster . ?cluster orth:hasHomologousMember ?node1 , ?node2 . ?node1 orth:hasHomologousMember* ?orthoProtein1 . ?node2 orth:hasHomologousMember* ?orthoProtein2 . ?orthoProtein1 lscr:xrefUniprot ?mouseProtein . ?orthoProtein2 lscr:xrefUniprot ?humanProtein . # inTaxon mouse ?orthoProtein1 orth:organism/<http://purl.obolibrary.org/obo/RO_0002162> taxon:10090 . } }Use

54: Proteins with annotated binding sites for ligands similar to heme

SELECT ?protein ?mnemonic ?proteinName ?ligandSimilarityScore ?ligand WHERE { SERVICE <https://idsm.elixir-czech.cz/sparql/endpoint/chebi> { ?ssc sachem:compound ?ligand; sachem:score ?ligandSimilarityScore ; sachem:similaritySearch ?sss . # Smiles of Heme ?sss sachem:query "CC1=C(CCC([O-])=O)C2=[N+]3C1=Cc1c(C)c(C=C)c4C=C5C(C)=C(C=C)C6=[N+]5[Fe-]3(n14)n1c(=C6)c(C)c(CCC([O-])=O)c1=C2"; sachem:cutoff "8e-1"^^xsd:double ; sachem:aromaticityMode sachem:aromaticityDetect ; sachem:similarityRadius 1 ; sachem:tautomerMode sachem:ignoreTautomers . } ?protein up:mnemonic ?mnemonic ; up:recommendedName/up:fullName ?proteinName ; up:annotation ?annotation . ?annotation a up:Binding_Site_Annotation ; up:ligand/rdfs:subClassOf ?ligand . } ORDER BY DESC(?ligandSimilarityScore)Use

55: Number of proteins with annotated binding sites for metals or metal sulfur clusters (and experimental evidence for the binding)

SELECT ?ligand ?ligandName (COUNT(DISTINCT ?protein) as ?entries) WHERE { ?protein up:annotation ?annotation . VALUES ?evs { obo:ECO_0000269 obo:ECO_0007744 } . VALUES ?chebids { CHEBI:25213 CHEBI:25214 } . ?st rdf:subject ?protein ; rdf:predicate up:annotation ; rdf:object ?annotation ; up:attribution/up:evidence ?evs . ?annotation up:ligand/rdfs:subClassOf ?ligand . ?ligand rdfs:subClassOf+ ?chebids ; rdfs:label ?ligandName . } GROUP BY ?ligand ?ligandName ORDER BY DESC(?entries)Use

56: Select Enzymes that have Ligands that have a known allosteric effect

SELECT ?protein ?ligandName ?ligandNote ?chebi WHERE { ?protein up:annotation ?annotation . ?annotation a up:Binding_Site_Annotation . ?annotation up:ligand ?ligand . ?ligand rdfs:comment ?ligandNote ; rdfs:subClassOf ?chebi ; rdfs:label ?ligandName . FILTER(REGEX(?ligandNote, "allosteric", "i")) }Use

57: Map PDB identifiers plus chains to UniProt

SELECT ?pdbId ?chain ?pdbChain ?uniprot WHERE { # A space separated list of pairs of PDB identifiers and the chain code. VALUES(?pdbId ?pdbChain) { ('6VXC' 'A') ('1BG3' 'B') } # Make an IRI out of the pdbId BIND(iri(concat('http://rdf.wwpdb.org/pdb/', ?pdbId)) AS ?pdb) # Map to UniProt entries ?uniprot rdfs:seeAlso ?pdb . ?pdb up:database <http://purl.uniprot.org/database/PDB> ; up:chainSequenceMapping ?chainSm . ?chainSm up:chain ?chainsPlusRange . # Extract the list of chains from the text representation. BIND(STRBEFORE(?chainsPlusRange, '=') AS ?chain) # Filter those that match. FILTER(CONTAINS(?chain, ?pdbChain)) }Use

58: Map UniProt to HGNC identifiers and Symbols

SELECT ?uniprot ?hgnc ?hgncSymbol WHERE { # A space separated list of UniProt primary accessions. VALUES (?acc) {('P05067') ('P00750')} BIND(iri(concat(str(uniprotkb:), ?acc)) AS ?uniprot) ?uniprot rdfs:seeAlso ?hgnc . ?hgnc up:database <http://purl.uniprot.org/database/HGNC> ; rdfs:comment ?hgncSymbol . }Use

59: Count all isoforms for a given proteome

PREFIX proteome:<http://purl.uniprot.org/proteomes/> SELECT (COUNT(DISTINCT ?sequence) AS ?allIsoforms) WHERE { ?protein up:reviewed true . ?protein up:organism taxon:9606 . ?protein up:sequence ?sequence . ?protein up:proteome/^skos:narrower proteome:UP000005640 . }Use