SPARQL example query
64_diseases_related_to_mutation_in_active_site: Find (Human Genetic) diseases, that are related to a natural variant on the active site of an enzyme.
PREFIX faldo: <http://biohackathon.org/resource/faldo#>
PREFIX skos: <http://www.w3.org/2004/02/skos/core#>
PREFIX taxon: <http://purl.uniprot.org/taxonomy/>
PREFIX up: <http://purl.uniprot.org/core/>
SELECT
DISTINCT
?disease
WHERE {
?protein a up:Protein ;
up:organism taxon:9606 ;
up:annotation ?disease_annotation, ?active_site_annotation, ?natural_variant_annotation .
{
?protein up:enzyme [] .
} UNION {
?protein up:annotation/a up:Catalytic_Activity_Annotation .
}
?disease_annotation a up:Disease_Annotation ;
up:disease ?disease .
?active_site_annotation a up:Active_Site_Annotation ;
up:range ?active_site_range .
?active_site_range faldo:begin ?active_site_begin .
?active_site_begin faldo:position ?active_site_position ;
faldo:reference ?sequence .
?natural_variant_annotation a up:Natural_Variant_Annotation ;
up:range ?natural_variant_range ;
skos:related ?disease .
?natural_variant_range faldo:begin ?natural_variant_begin ;
faldo:end ?natural_variant_end .
?natural_variant_begin faldo:position ?natural_variant_begin_position .
?natural_variant_end faldo:position ?natural_variant_end_position ;
faldo:reference ?sequence .
FILTER(?natural_variant_begin_position >= ?active_site_position && ?active_site_position <= ?natural_variant_end_position)
}
65_distinct_extinct_organisms_in_uniprotkb: How many distinct extinct organisms are represented in UniProtKB
xxxxxxxxxx
PREFIX keywords: <http://purl.uniprot.org/keywords/>
PREFIX up: <http://purl.uniprot.org/core/>
SELECT
?taxon
(SAMPLE(?name) AS ?anName)
(COUNT(DISTINCT ?protein) AS ?entriesPerExtinctTaxon)
WHERE
{
GRAPH<http://sparql.uniprot.org/taxonomy>{
?taxon a up:Taxon ;
up:scientificName ?name .
}
?protein up:organism ?taxon ;
up:classifiedWith keywords:952 .
} GROUP BY ?taxon ORDER BY ?taxon
66_distinct_rhea_transport_in_reviewed_uniprot: Count number of unique Rhea transport reactions annotated in reviewed UniProtKB entries.
xxxxxxxxxx
PREFIX rh: <http://rdf.rhea-db.org/>
PREFIX up: <http://purl.uniprot.org/core/>
SELECT
(COUNT(DISTINCT ?rhea) AS ?distinctRheaTransportInUniProt)
WHERE
{
GRAPH <https://sparql.rhea-db.org/rhea> {
?rhea rh:isTransport true .
}
?protein up:annotation ?ann .
?ann up:catalyticActivity ?ca .
?ca up:catalyzedReaction ?rhea .
}
67_draft_human_metabolome: Generate a draft human metabolome
xxxxxxxxxx
PREFIX chebislash: <http://purl.obolibrary.org/obo/chebi/>
PREFIX rh: <http://rdf.rhea-db.org/>
PREFIX taxon: <http://purl.uniprot.org/taxonomy/>
PREFIX up: <http://purl.uniprot.org/core/>
SELECT
?uniprot ?mnemonic ?rhea ?chebi ?smiles ?inchiKey
WHERE
{
?uniprot up:annotation/up:catalyticActivity/up:catalyzedReaction ?rhea ;
up:organism taxon:9606 ;
up:mnemonic ?mnemonic .
SERVICE <https://sparql.rhea-db.org/sparql> {
?rhea rh:side/rh:contains/rh:compound ?compound .
?compound (rh:chebi|(rh:reactivePart/rh:chebi)|(rh:underlyingChebi/rh:chebi)) ?chebi .
}
?chebi chebislash:smiles ?smiles ;
chebislash:inchikey ?inchiKey .
}
70_enzymes_interacting_with_molecules_similar_to_dopamine: Look for enzymes catalyzing reactions with molecules similar to dopamine.
xxxxxxxxxx
PREFIX CHEBI: <http://purl.obolibrary.org/obo/CHEBI_>
PREFIX rh: <http://rdf.rhea-db.org/>
PREFIX sachem: <http://bioinfo.uochb.cas.cz/rdf/v1.0/sachem#>
PREFIX up: <http://purl.uniprot.org/core/>
SELECT
?protein
?rhea
?chebi
WHERE {
# Use the smiles of dopamine CHEBI:18243
SERVICE <https://idsm.elixir-czech.cz/sparql/endpoint/chebi>{
?chebi sachem:similarCompoundSearch [
sachem:query "NCCc1ccc(O)c(O)c1" ] .
}
GRAPH<https://sparql.rhea-db.org/rhea>{
?rhea rh:side/rh:contains/rh:compound ?compound .
?compound (rh:chebi|(rh:reactivePart/rh:chebi)|(rh:underlyingChebi/rh:chebi)) ?chebi .
}
?protein up:reviewed true ;
up:annotation ?caa .
?caa up:catalyticActivity/up:catalyzedReaction ?rhea .
}
71_enzymes_interacting_with_molecules_similar_to_dopamine_with_variants_related_to_disease: Look for enzymes catalyzing reactions with molecules similar to dopamine, with natural variants related to a disease.
xxxxxxxxxx
PREFIX CHEBI: <http://purl.obolibrary.org/obo/CHEBI_>
PREFIX rh: <http://rdf.rhea-db.org/>
PREFIX sachem: <http://bioinfo.uochb.cas.cz/rdf/v1.0/sachem#>
PREFIX skos: <http://www.w3.org/2004/02/skos/core#>
PREFIX up: <http://purl.uniprot.org/core/>
SELECT
?protein
?rhea
?chebi
?disease
WHERE {
# Use the smiles of dopamine CHEBI:18243
SERVICE <https://idsm.elixir-czech.cz/sparql/endpoint/chebi>{
?chebi sachem:similarCompoundSearch [
sachem:query "NCCc1ccc(O)c(O)c1" ] .
}
GRAPH<https://sparql.rhea-db.org/rhea>{
?rhea rh:side/rh:contains/rh:compound ?compound .
?compound (rh:chebi|(rh:reactivePart/rh:chebi)|(rh:underlyingChebi/rh:chebi)) ?chebi .
}
?protein up:reviewed true ;
up:annotation ?caa, ?natural_variant_annotation, ?disease_annotation .
?caa up:catalyticActivity/up:catalyzedReaction ?rhea .
?natural_variant_annotation a up:Natural_Variant_Annotation ;
skos:related ?disease .
?disease_annotation a up:Disease_Annotation ;
up:disease ?disease .
}
72_enzymes_mapping_to_PDB: Select a mapping of UniProtKB enzymes to PDB entries using the UniProtKB cross-references to the PDB database.
xxxxxxxxxx
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX up: <http://purl.uniprot.org/core/>
SELECT
?protein
?db
WHERE
{
?protein a up:Protein .
?protein rdfs:seeAlso ?db .
?db up:database <http://purl.uniprot.org/database/PDB> .
{
?protein up:enzyme ?enzyme .
} UNION {
?protein up:annotation/rdf:type up:Catalytic_Activity_Annotation .
}
}
73_enzymes_related_to_protein: Select Enzyme Classification (EC) classes that have a protein associated to them
xxxxxxxxxx
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX up: <http://purl.uniprot.org/core/>
SELECT
DISTINCT ?enzyme
WHERE {
?protein a up:Protein .
{
?protein up:enzyme ?enzyme .
} UNION {
?protein up:annotation ?caa .
?caa a up:Catalytic_Activity_Annotation .
?caa up:catalyticActivity ?ca .
?ca up:enzymeClass ?enzyme
} UNION {
?protein up:component/up:enzyme ?enzyme .
} UNION {
?protein up:domain/up:enzyme ?enzyme .
}
}
74_enzymes_with_at_least_two_transmembrane_domains: Find Hydrolases (enzymes that catalyse the hydrolysis of various bonds) that have at least two transmembrane domains
xxxxxxxxxx
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX up: <http://purl.uniprot.org/core/>
SELECT
?protein
WHERE {
?protein up:enzyme|up:annotation/up:catalyticActivity/up:enzymeClass ?enzymeClass ;
up:annotation ?transMembraneAnnotation .
?enzymeClass rdfs:subClassOf <http://purl.uniprot.org/enzyme/3.-.-.-> .
?transMembraneAnnotation a up:Transmembrane_Annotation .
} GROUP BY ?protein HAVING (COUNT(DISTINCT ?transMembraneAnnotation) >= 2)
75_enzymes_with_at_least_two_transmembrane_domains_PDB_xray: Find enzymes that have at least two transmembrane domains, whose 3D structure is elucidated through X-Ray analysis
xxxxxxxxxx
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX up: <http://purl.uniprot.org/core/>
SELECT
?protein
WHERE {
?protein a up:Protein ;
up:annotation ?transmembrane_annotation ;
rdfs:seeAlso ?wwPDB .
{
?protein up:enzyme ?enzyme .
} UNION {
?protein up:annotation/a up:Catalytic_Activity_Annotation .
}
?wwPDB up:database <http://purl.uniprot.org/database/PDB> ;
up:method up:X-Ray_Crystallography .
?transmembrane_annotation a up:Transmembrane_Annotation .
} GROUP BY ?protein HAVING(COUNT(DISTINCT ?transmembrane_annotation ) >= 2)
76_enzymes_with_mutagenesis_affecting_active_site: Find enzymes, where the active site is a region affected by mutagenesis and show the comment regarding mutagenesis effect.
xxxxxxxxxx
PREFIX faldo: <http://biohackathon.org/resource/faldo#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX up: <http://purl.uniprot.org/core/>
SELECT
?protein ?mutagenesisBeginPosition ?activeSiteBeginPosition ?mutagenesisRangeEndPosition ?mutagenesisComment
WHERE {
?protein up:annotation ?activeSiteAnnotation ,
?mutagenesisAnnotation .
?mutagenesisAnnotation a up:Mutagenesis_Annotation ;
up:range ?mutagenesisRange ;
rdfs:comment ?mutagenesisComment .
?activeSiteAnnotation a up:Active_Site_Annotation ;
up:range ?activeSiteRange .
?activeSiteRange faldo:begin ?activeSiteBegin .
?activeSiteBegin faldo:position ?activeSiteBeginPosition ;
faldo:reference ?sequence .
?mutagenesisRange faldo:begin ?mutagenesisRangeBegin ;
faldo:end ?mutagenesisRangeEnd .
?mutagenesisRangeBegin faldo:position ?mutagenesisBeginPosition ;
faldo:reference ?sequence .
?mutagenesisRangeEnd faldo:position ?mutagenesisRangeEndPosition .
FILTER(?mutagenesisBeginPosition <= ?activeSiteBeginPosition && ?activeSiteBeginPosition <= ?mutagenesisRangeEndPosition)
}
78_genetic_disease_related_proteins: List all UniProtKB proteins annotated to be related to a genetic disease.
xxxxxxxxxx
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX up: <http://purl.uniprot.org/core/>
SELECT
?uniprot ?disease ?diseaseComment ?mim
WHERE
{
GRAPH <http://sparql.uniprot.org/uniprot> {
?uniprot a up:Protein ;
up:annotation ?diseaseAnnotation .
?diseaseAnnotation up:disease ?disease .
}
GRAPH <http://sparql.uniprot.org/diseases> {
?disease a up:Disease ;
rdfs:comment ?diseaseComment .
OPTIONAL {
?disease rdfs:seeAlso ?mim .
?mim up:database <http://purl.uniprot.org/database/MIM> .
}
}
}
79_mnemonic_also_known_as_id: List all UniProtKB protein ID (mnemonic) for current UniProtKB entries.
xxxxxxxxxx
PREFIX up: <http://purl.uniprot.org/core/>
SELECT
?uniprot ?mnemonic
WHERE
{
GRAPH <http://sparql.uniprot.org/uniprot> {
?uniprot a up:Protein ;
up:mnemonic ?mnemonic .
}
}
80_obsolete_mnemonic_also_known_as_id: List all UniProtKB protein ID (mnemonic) that where used in the past for current UniProtKB entries.
xxxxxxxxxx
PREFIX up: <http://purl.uniprot.org/core/>
SELECT
?uniprot ?obsoleteMnemonic
WHERE
{
GRAPH <http://sparql.uniprot.org/uniprot> {
?uniprot a up:Protein ;
up:oldMnemonic ?obsoleteMnemonic .
}
}
81_rhea_reactions_annotated_as_experimental_and_only_small_molecules: Find all Rhea (only small molecule) that are used in UniProtKB where the annotation has a paper and is tagged having experimental evidence.
xxxxxxxxxx
PREFIX ECO: <http://purl.obolibrary.org/obo/ECO_>
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX rh: <http://rdf.rhea-db.org/>
PREFIX up: <http://purl.uniprot.org/core/>
SELECT
?rhea
?catalyzedReaction
?source
WHERE {
{
SELECT
DISTINCT
?rhea
WHERE {
GRAPh<https://sparql.rhea-db.org/rhea> {
?rhea rdfs:subClassOf rh:Reaction .
?rhea rh:side/rh:contains/rh:compound ?compound2 .
?uc rdfs:subClassOf rh:Compound .
}
?compound2 rdfs:subClassOf ?uc .
BIND(IF(?uc = rh:SmallMolecule, 0, 1) AS ?c)
} GROUP BY ?rhea HAVING (SUM(?c) = 0)
}
?catalyzedReaction up:catalyzedReaction ?rhea .
?reif rdf:object ?catalyzedReaction ;
up:attribution ?attr .
?attr up:evidence ECO:0000269 ;
up:source ?source .
?source a up:Citation .
}
82_rhea_reactions_associated_with_ec_in_uniprotkb: List Rhea reactions associated with an EC (enzyme classification).
xxxxxxxxxx
PREFIX up: <http://purl.uniprot.org/core/>
SELECT
?rhea
?EC
WHERE {
?CatalyticActivity up:catalyzedReaction ?rhea ;
up:enzymeClass ?EC .
}
83_rhea_reactions_not_associated_with_ec_in_uniprotkb: List Rhea reactions that are not associated with an EC (enzyme classification).
xxxxxxxxxx
PREFIX up: <http://purl.uniprot.org/core/>
SELECT
?rhea
?EC
WHERE {
?CatalyticActivity up:catalyzedReaction ?rhea .
MINUS {
?CatalyticActivity up:enzymeClass ?EC .
}
}
84_taxonomy_hierarchy: Find all taxonomic records that describe species of the genus Homo.
xxxxxxxxxx
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX taxon: <http://purl.uniprot.org/taxonomy/>
PREFIX up: <http://purl.uniprot.org/core/>
SELECT
?species
?genus
WHERE {
BIND(taxon:9605 AS ?genus)
?species a up:Taxon ;
up:rank up:Species ;
rdfs:subClassOf ?genus .
?genus a up:Taxon ;
up:rank up:Genus .
}
85_taxonomy_host: Find taxon records that are known to have part of their life cycle in other organisms (e.g. parasite, symbiont, infection)
xxxxxxxxxx
PREFIX up: <http://purl.uniprot.org/core/>
SELECT
?virus
?host
WHERE {
?virus up:host ?host .
}
86_taxonomy_rank_and_scientific_name: Retrieve the rank and the scientific name of an taxonomic record. Not all taxonomic records have a rank associated with them.
xxxxxxxxxx
PREFIX up: <http://purl.uniprot.org/core/>
SELECT
?taxon
?scientificName
?rank
WHERE {
?taxon a up:Taxon ;
up:scientificName ?scientificName .
OPTIONAL {
?taxon up:rank ?rank
}
}
87_taxonomy_with_at_least_one_swissprot: Find taxon records for which one reviewed UniProtKB/Swiss-Prot entry exists. We might expect species, strains, subspecies and isolates in the taxon list.
xxxxxxxxxx
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX taxon: <http://purl.uniprot.org/taxonomy/>
PREFIX up: <http://purl.uniprot.org/core/>
SELECT
DISTINCT
?taxid
?scientificName
?domain
?domainName
WHERE {
?uniprot a up:Protein .
# reviewed entries
?uniprot up:reviewed true .
?uniprot up:organism ?taxid .
?taxid up:scientificName ?scientificName .
VALUES ?domain { taxon:2 # bacteria
taxon:2157 # archaea
taxon:2759 # eukaryota
taxon:10239 #viruses
} .
?taxid rdfs:subClassOf ?domain .
}
88_uniparc_linked_to_active_uniprot: Show for a given UniParc accessions which active UniProtKB entries have the same amino acid sequence
xxxxxxxxxx
PREFIX up: <http://purl.uniprot.org/core/>
SELECT
?uniparc
?uniprot
WHERE {
GRAPH <http://sparql.uniprot.org/uniparc>{
BIND(<http://purl.uniprot.org/uniparc/UPI000002DB1C> AS ?uniparc)
?uniparc up:sequenceFor ?uniprot .
}
GRAPH <http://sparql.uniprot.org/uniprot> {
?uniprot a up:Protein .
}
}
89_uniparc_triples_directly_associated: Predicates and objects, for a given UniParc accession as a subject
xxxxxxxxxx
PREFIX up: <http://purl.uniprot.org/core/>
SELECT
?predicate
?object
WHERE {
<http://purl.uniprot.org/uniparc/UPI000012A0AD> ?predicate ?object
}
90_uniprot_affected_by_metabolic_diseases_using_MeSH: Proteins involved in metabolic diseases in UniProtKB. Using the MeSH concept as a root to find metabolic diseases in UniProt.
xxxxxxxxxx
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX up: <http://purl.uniprot.org/core/>
SELECT
?disease ?protein
WHERE {
SERVICE<https://id.nlm.nih.gov/mesh/sparql> {
GRAPH <http://id.nlm.nih.gov/mesh> {
# Mesh M0013493 is a meaningless gathering about the concept 'Metabolic Diseases'
?mesh <http://id.nlm.nih.gov/mesh/vocab#broaderDescriptor>* ?broader .
?broader <http://id.nlm.nih.gov/mesh/vocab#preferredConcept> <http://id.nlm.nih.gov/mesh/M0013493> .
}
}
GRAPH <http://sparql.uniprot.org/diseases>{
?disease a up:Disease ;
rdfs:seeAlso ?mesh .
?mesh up:database <http://purl.uniprot.org/database/MeSH> .
}
GRAPH <http://sparql.uniprot.org/uniprot> {
?protein up:annotation/up:disease ?disease .
}
}
91_uniprot_alternative_protein_full_name: Alternative protein full names for UniProtKB entries
xxxxxxxxxx
PREFIX up: <http://purl.uniprot.org/core/>
SELECT ?protein
?fullName
WHERE {
?protein a up:Protein ;
up:alternativeName ?recommendedName .
?recommendedName up:fullName ?fullName .
}
92_uniprot_bioregistry_iri_translation: Translate the global unique identifier for a UniProtKB record into other options using the bioregistry translating endpoint.
xxxxxxxxxx
PREFIX owl: <http://www.w3.org/2002/07/owl#>
PREFIX uniprotkb: <http://purl.uniprot.org/uniprot/>
PREFIX up: <http://purl.uniprot.org/core/>
SELECT
?protein
?otherIdentifier
WHERE {
BIND(uniprotkb:P00750 AS ?protein)
?protein a up:Protein .
SERVICE <https://bioregistry.io/sparql> {
?protein owl:sameAs ?otherIdentifier .
}
}