SPARQL example query
82_rhea_reactions_associated_with_ec_in_uniprotkb: List Rhea reactions associated with an EC (enzyme classification).
PREFIX up: <http://purl.uniprot.org/core/>
SELECT
?rhea
?EC
WHERE {
?CatalyticActivity up:catalyzedReaction ?rhea ;
up:enzymeClass ?EC .
}
83_rhea_reactions_not_associated_with_ec_in_uniprotkb: List Rhea reactions that are not associated with an EC (enzyme classification).
xxxxxxxxxx
PREFIX up: <http://purl.uniprot.org/core/>
SELECT
?rhea
?EC
WHERE {
?CatalyticActivity up:catalyzedReaction ?rhea .
MINUS {
?CatalyticActivity up:enzymeClass ?EC .
}
}
84_taxonomy_hierarchy: Find all taxonomic records that describe species of the genus Homo.
xxxxxxxxxx
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX taxon: <http://purl.uniprot.org/taxonomy/>
PREFIX up: <http://purl.uniprot.org/core/>
SELECT
?species
?genus
WHERE {
BIND(taxon:9605 AS ?genus)
?species a up:Taxon ;
up:rank up:Species ;
rdfs:subClassOf ?genus .
?genus a up:Taxon ;
up:rank up:Genus .
}
85_taxonomy_host: Find taxon records that are known to have part of their life cycle in other organisms (e.g. parasite, symbiont, infection)
xxxxxxxxxx
PREFIX up: <http://purl.uniprot.org/core/>
SELECT
?virus
?host
WHERE {
?virus up:host ?host .
}
86_taxonomy_rank_and_scientific_name: Retrieve the rank and the scientific name of an taxonomic record. Not all taxonomic records have a rank associated with them.
xxxxxxxxxx
PREFIX up: <http://purl.uniprot.org/core/>
SELECT
?taxon
?scientificName
?rank
WHERE {
?taxon a up:Taxon ;
up:scientificName ?scientificName .
OPTIONAL {
?taxon up:rank ?rank
}
}
87_taxonomy_with_at_least_one_swissprot: Find taxon records for which one reviewed UniProtKB/Swiss-Prot entry exists. We might expect species, strains, subspecies and isolates in the taxon list.
xxxxxxxxxx
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX taxon: <http://purl.uniprot.org/taxonomy/>
PREFIX up: <http://purl.uniprot.org/core/>
SELECT
DISTINCT
?taxid
?scientificName
?domain
?domainName
WHERE {
?uniprot a up:Protein .
# reviewed entries
?uniprot up:reviewed true .
?uniprot up:organism ?taxid .
?taxid up:scientificName ?scientificName .
VALUES ?domain { taxon:2 # bacteria
taxon:2157 # archaea
taxon:2759 # eukaryota
taxon:10239 #viruses
} .
?taxid rdfs:subClassOf ?domain .
}
88_uniparc_linked_to_active_uniprot: Show for a given UniParc accessions which active UniProtKB entries have the same amino acid sequence
xxxxxxxxxx
PREFIX up: <http://purl.uniprot.org/core/>
SELECT
?uniparc
?uniprot
WHERE {
GRAPH <http://sparql.uniprot.org/uniparc>{
BIND(<http://purl.uniprot.org/uniparc/UPI000002DB1C> AS ?uniparc)
?uniparc up:sequenceFor ?uniprot .
}
GRAPH <http://sparql.uniprot.org/uniprot> {
?uniprot a up:Protein .
}
}
89_uniparc_triples_directly_associated: Predicates and objects, for a given UniParc accession as a subject
xxxxxxxxxx
PREFIX up: <http://purl.uniprot.org/core/>
SELECT
?predicate
?object
WHERE {
<http://purl.uniprot.org/uniparc/UPI000012A0AD> ?predicate ?object
}
90_uniprot_affected_by_metabolic_diseases_using_MeSH: Proteins involved in metabolic diseases in UniProtKB. Using the MeSH concept as a root to find metabolic diseases in UniProt.
xxxxxxxxxx
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX up: <http://purl.uniprot.org/core/>
SELECT
?disease ?protein
WHERE {
SERVICE<https://id.nlm.nih.gov/mesh/sparql> {
GRAPH <http://id.nlm.nih.gov/mesh> {
# Mesh M0013493 is a meaningless gathering about the concept 'Metabolic Diseases'
?mesh <http://id.nlm.nih.gov/mesh/vocab#broaderDescriptor>* ?broader .
?broader <http://id.nlm.nih.gov/mesh/vocab#preferredConcept> <http://id.nlm.nih.gov/mesh/M0013493> .
}
}
GRAPH <http://sparql.uniprot.org/diseases>{
?disease a up:Disease ;
rdfs:seeAlso ?mesh .
?mesh up:database <http://purl.uniprot.org/database/MeSH> .
}
GRAPH <http://sparql.uniprot.org/uniprot> {
?protein up:annotation/up:disease ?disease .
}
}
91_uniprot_alternative_protein_full_name: Alternative protein full names for UniProtKB entries
xxxxxxxxxx
PREFIX up: <http://purl.uniprot.org/core/>
SELECT ?protein
?fullName
WHERE {
?protein a up:Protein ;
up:alternativeName ?recommendedName .
?recommendedName up:fullName ?fullName .
}
92_uniprot_bioregistry_iri_translation: Translate the global unique identifier for a UniProtKB record into other options using the bioregistry translating endpoint.
xxxxxxxxxx
PREFIX owl: <http://www.w3.org/2002/07/owl#>
PREFIX uniprotkb: <http://purl.uniprot.org/uniprot/>
PREFIX up: <http://purl.uniprot.org/core/>
SELECT
?protein
?otherIdentifier
WHERE {
BIND(uniprotkb:P00750 AS ?protein)
?protein a up:Protein .
SERVICE <https://bioregistry.io/sparql> {
?protein owl:sameAs ?otherIdentifier .
}
}
93_uniprot_created_modified_updated: List the created, last modified, and last sequence update dates for UniProtKB proteins.
xxxxxxxxxx
PREFIX up: <http://purl.uniprot.org/core/>
SELECT
?protein
?created
?modified
?version
WHERE {
?protein a up:Protein ;
up:created ?created ;
up:modified ?modified ;
up:version ?version .
}
94_uniprot_encoding_gene: List UniProtKB proteins with their associated named gene
xxxxxxxxxx
PREFIX up: <http://purl.uniprot.org/core/>
SELECT ?protein
?gene
WHERE {
?protein a up:Protein ;
up:encodedBy ?gene .
}
96_uniprot_encoding_gene_name: List UniProtKB proteins with their associated gene and the gene's preffered name
xxxxxxxxxx
PREFIX skos: <http://www.w3.org/2004/02/skos/core#>
PREFIX up: <http://purl.uniprot.org/core/>
SELECT ?protein
?gene
WHERE {
?protein a up:Protein ;
up:encodedBy ?gene .
?gene skos:prefLabel ?recommendedGeneName .
}
97_uniprot_encoding_gene_org_name: List UniProtKB proteins with their associated gene and the gene's ORF label
xxxxxxxxxx
PREFIX up: <http://purl.uniprot.org/core/>
SELECT
?gene
?orfName
WHERE {
?protein a up:Protein ;
up:encodedBy ?gene .
?gene up:orfName ?orfName .
}
98_uniprot_entries_with_more_than_two_geneid_crossrefences: Find GeneID's crosslinked to more than one human or mouse UniProtKB entry
xxxxxxxxxx
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX up: <http://purl.uniprot.org/core/>
PREFIX taxon:<http://purl.uniprot.org/taxon/>
SELECT
?geneid
?organism
(GROUP_CONCAT(?protein; separator=', ') AS ?entries)
WHERE
{
VALUES ?organism {taxon:9606 taxon:10090}
?geneid up:database <http://purl.uniprot.org/database/GeneID> .
?protein rdfs:seeAlso ?geneid ;
up:organism ?organism
} GROUP BY ?geneid ?organism HAVING (COUNT(?protein) > 1) ORDER BY ?organism ?geneid
99_uniprot_identifiers_org_translation: Translate a selection of UniProtKB accession numbers into other options using the identifiers.org translating endpoint.
xxxxxxxxxx
PREFIX owl: <http://www.w3.org/2002/07/owl#>
PREFIX uniprotkb: <http://purl.uniprot.org/uniprot/>
PREFIX up: <http://purl.uniprot.org/core/>
SELECT
?protein
?otherIdentifier
WHERE {
VALUES (?protein) {(uniprotkb:P00750) (uniprotkb:P05067)}
?protein a up:Protein .
SERVICE <https://sparql.api.identifiers.org/sparql> {
?protein owl:sameAs ?otherIdentifier .
}
}
drosophila_enzymes_derived_from_at_least_two_mRNAs: Find Drosophila proteins, that depend on more than one transcript (requires cross-references to Ensembl family of resources).
xxxxxxxxxx
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX taxon: <http://purl.uniprot.org/taxonomy/>
PREFIX up: <http://purl.uniprot.org/core/>
SELECT
?protein
WHERE {
?protein up:organism taxon:7227 ;
a up:Protein ;
up:reviewed true ;
rdfs:seeAlso ?transcript .
?transcript a up:Transcript_Resource .
} GROUP BY ?protein HAVING(COUNT(?transcript) >= 2)
drosophila_proteins_derived_from_at_least_two_mRNAs: Find Drosophila enzymes, that depend on more than one transcript (requires cross-references to Ensembl family of resources).
xxxxxxxxxx
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX taxon: <http://purl.uniprot.org/taxonomy/>
PREFIX up: <http://purl.uniprot.org/core/>
SELECT
?protein
WHERE {
?protein up:organism taxon:7227 ;
a up:Protein ;
up:reviewed true ;
rdfs:seeAlso ?transcript .
{
?protein up:annotation/a up:Catalytic_Activity_Annotation ;
} UNION {
?protein up:enzyme ?enzyme .
}
?transcript a up:Transcript_Resource .
} GROUP BY ?protein HAVING(COUNT(?transcript) >= 2)
enzyme_with_threonine_as_active_site: Find enzymes with a Tyrosine (Y) as an active site
xxxxxxxxxx
PREFIX faldo: <http://biohackathon.org/resource/faldo#>
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX up: <http://purl.uniprot.org/core/>
SELECT
?protein
WHERE {
?protein up:annotation ?activeSiteAnnotation .
?activeSiteAnnotation a up:Active_Site_Annotation ;
up:range ?range .
?range faldo:begin ?begin .
?begin faldo:position ?beginPosition ;
faldo:reference ?sequence .
?sequence rdf:value ?sequenceVal .
FILTER(SUBSTR(?sequenceVal, ?beginPosition, 1) = 'Y')
}
uniprot_encoding_gene_name_alternative_name: List UniProtKB proteins with their associated gene and the gene's names that are used in the field, but not recommended for use by UniProtKB
xxxxxxxxxx
PREFIX skos: <http://www.w3.org/2004/02/skos/core#>
PREFIX up: <http://purl.uniprot.org/core/>
SELECT ?protein
?gene ?altGeneName
WHERE {
?protein a up:Protein ;
up:encodedBy ?gene .
?gene skos:altLabel ?altGeneName .
}