SPARQL example query
104_uniprot_recomended_protein_full_name: The recommended protein full names for UniProtKB entries
PREFIX up: <http://purl.uniprot.org/core/>
SELECT ?protein
?fullName
WHERE {
?protein a up:Protein ;
up:recommendedName ?recommendedName .
?recommendedName up:fullName ?fullName .
}
105_uniprot_recomended_protein_short_name: The recommended protein short names for UniProtKB entries
xxxxxxxxxx
PREFIX up: <http://purl.uniprot.org/core/>
SELECT ?protein
?fullName
WHERE {
?protein a up:Protein ;
up:recommendedName ?recommendedName .
?recommendedName up:shortName ?fullName .
}
106_uniprot_reviewed_or_not: List all UniProtKB proteins and if they are reviewed (Swiss-Prot) or unreviewed (TrEMBL)
xxxxxxxxxx
PREFIX up: <http://purl.uniprot.org/core/>
SELECT
?protein
?reviewed
WHERE {
?protein a up:Protein .
?protein up:reviewed ?reviewed .
}
107_uniprot_sequences_and_mark_which_is_cannonical_for_human: List all human UniProtKB entries and their sequences, marking if the sequence listed is the cannonical sequence of the matching entry.
xxxxxxxxxx
PREFIX taxon: <http://purl.uniprot.org/taxonomy/>
PREFIX up: <http://purl.uniprot.org/core/>
SELECT ?entry ?sequence ?isCanonical
WHERE {
# We don't want to look into the UniParc graph which will
# confuse matters
GRAPH <http://sparql.uniprot.org/uniprot> {
# we need the UniProt entries that are human
?entry a up:Protein ;
up:organism taxon:9606 ;
up:sequence ?sequence .
# If the sequence is a "Simple_Sequence" it is likely to be the
# cannonical sequence
OPTIONAL {
?sequence a up:Simple_Sequence .
BIND(true AS ?likelyIsCanonical)
}
# unless we are dealing with an external isoform
# see https://www.uniprot.org/help/canonical_and_isoforms
OPTIONAL {
FILTER(?likelyIsCanonical)
?sequence a up:External_Sequence .
BIND(true AS ?isComplicated)
}
# If it is an external isoform it's id would not match the
# entry primary accession
BIND(IF(?isComplicated, STRENDS(STR(?entry), STRBEFORE(SUBSTR(STR(?sequence), 34),'-')),?likelyIsCanonical) AS ?isCanonical)
}
}
108_uniprot_signature_match_start_end: List all InterPro member database signature match start and end for a specific UniProtKB protein.
xxxxxxxxxx
PREFIX faldo: <http://biohackathon.org/resource/faldo#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX up: <http://purl.uniprot.org/core/>
SELECT
?protein ?interproMemberDatabaseXref ?matchStart ?matchEnd
WHERE{
GRAPH <http://sparql.uniprot.org/uniprot> {
VALUES ?protein {<http://purl.uniprot.org/uniprot/P05067>} .
?protein rdfs:seeAlso ?sa .
}
GRAPH <http://sparql.uniprot.org/uniparc> {
?uniparc up:sequenceFor ?protein ;
rdfs:seeAlso ?interproMemberDatabaseXref .
?interproDatabaseXref up:signatureSequenceMatch ?sam .
?sam faldo:begin ?sab ;
faldo:end ?sae .
?sab faldo:position ?matchStart ;
faldo:reference ?uniparc .
?sae faldo:position ?matchEnd ;
faldo:reference ?uniparc .
}
}
109_uniprot_transporter_in_liver: Find human transporter proteins in reviewed UniProtKB/Swiss-Prot, that are expressed in the liver (Uses Bgee and UBERON).
xxxxxxxxxx
PREFIX genex: <http://purl.org/genex#>
PREFIX lscr: <http://purl.org/lscr#>
PREFIX obo: <http://purl.obolibrary.org/obo/>
PREFIX orth: <http://purl.org/net/orth#>
PREFIX rh: <http://rdf.rhea-db.org/>
PREFIX taxon: <http://purl.uniprot.org/taxonomy/>
PREFIX uberon: <http://purl.obolibrary.org/obo/uo#>
PREFIX up: <http://purl.uniprot.org/core/>
SELECT
?rhea
?protein
?anat
WHERE
{
GRAPH <https://sparql.rhea-db.org/rhea> {
?rhea rh:isTransport true .
}
?protein up:annotation ?ann .
?protein up:organism taxon:9606 .
?ann up:catalyticActivity ?ca .
?ca up:catalyzedReaction ?rhea .
BIND(uberon:0002107 AS ?anat)
SERVICE <https://www.bgee.org/sparql> {
?seq genex:isExpressedIn ?anat .
?seq lscr:xrefUniprot ?protein .
?seq orth:organism ?organism .
?organism obo:RO_0002162 taxon:9606 .
}
}
110_uniprot_unamed_plasmids: Sometimes it is known that a gene encoding a protein UniProtKB is located on a plasmid or an organelle, but the name of the plasmid is unknown.
xxxxxxxxxx
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX up: <http://purl.uniprot.org/core/>
SELECT
?protein
?plasmidOrOrganelle
?label
WHERE {
?protein a up:Protein ;
up:encodedIn ?plasmidOrOrganelle .
OPTIONAL {
?plasmidOrOrganelle rdfs:label ?label .
}
}
111_uniprot_make_chain_sequence_fasta: Given an UniProt chain identifier, generate the matching sequence as a FASTA
xxxxxxxxxx
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX up: <http://purl.uniprot.org/core/>
PREFIX faldo: <http://biohackathon.org/resource/faldo#>
SELECT (CONCAT('>', ?chainSeqId, '\n', (SUBSTR(?iupacAA, ?begin, (?end-?begin+1)))) AS ?chainFasta)
WHERE {
BIND("PRO_0000268053" AS ?chainSeqId)
BIND(IRI(CONCAT("http://purl.uniprot.org/annotation/", ?chainSeqId)) AS ?annId)
?annId up:range ?range .
?range faldo:begin [ faldo:reference ?reference ; faldo:position ?begin ] ;
faldo:end [ faldo:position ?end ] .
?reference rdf:value ?iupacAA .
}
62_diseases_involving_enzymes: Find diseases that are thought to be related to enzymes
xxxxxxxxxx
PREFIX skos: <http://www.w3.org/2004/02/skos/core#>
PREFIX up: <http://purl.uniprot.org/core/>
SELECT
?disease ?diseaseLabel
WHERE {
?protein up:enzyme|up:annotation/up:catalyticActivity/up:enzymeClass ?enzyme ;
up:annotation ?diseaseAnnotation .
?diseaseAnnotation a up:Disease_Annotation ;
up:disease ?disease .
?disease skos:prefLabel ?diseaseLabel .
}
63_diseases_involving_enzymes_located_in_mitochondrion: Find (Human genetic) diseases that are thought to be related to Enzymes, known to be located in the Mitochondrion
xxxxxxxxxx
PREFIX taxon: <http://purl.uniprot.org/taxonomy/>
PREFIX up: <http://purl.uniprot.org/core/>
SELECT
DISTINCT
?disease
WHERE {
?protein a up:Protein ;
up:organism taxon:9606 ;
up:annotation ?disease_annotation ,
?subcellularLocation .
{
?protein up:enzyme [] .
} UNION {
?protein up:annotation/a up:Catalytic_Activity_Annotation .
}
?disease_annotation a up:Disease_Annotation ;
up:disease ?disease .
?subcellularLocation a up:Subcellular_Location_Annotation ;
up:locatedIn ?location .
?location up:cellularComponent ?component .
?component up:partOf* <http://purl.uniprot.org/locations/173> .
}
64_diseases_related_to_mutation_in_active_site: Find (Human Genetic) diseases, that are related to a natural variant on the active site of an enzyme.
xxxxxxxxxx
PREFIX faldo: <http://biohackathon.org/resource/faldo#>
PREFIX skos: <http://www.w3.org/2004/02/skos/core#>
PREFIX taxon: <http://purl.uniprot.org/taxonomy/>
PREFIX up: <http://purl.uniprot.org/core/>
SELECT
DISTINCT
?disease
WHERE {
?protein a up:Protein ;
up:organism taxon:9606 ;
up:annotation ?disease_annotation, ?active_site_annotation, ?natural_variant_annotation .
{
?protein up:enzyme [] .
} UNION {
?protein up:annotation/a up:Catalytic_Activity_Annotation .
}
?disease_annotation a up:Disease_Annotation ;
up:disease ?disease .
?active_site_annotation a up:Active_Site_Annotation ;
up:range ?active_site_range .
?active_site_range faldo:begin ?active_site_begin .
?active_site_begin faldo:position ?active_site_position ;
faldo:reference ?sequence .
?natural_variant_annotation a up:Natural_Variant_Annotation ;
up:range ?natural_variant_range ;
skos:related ?disease .
?natural_variant_range faldo:begin ?natural_variant_begin ;
faldo:end ?natural_variant_end .
?natural_variant_begin faldo:position ?natural_variant_begin_position .
?natural_variant_end faldo:position ?natural_variant_end_position ;
faldo:reference ?sequence .
FILTER(?natural_variant_begin_position >= ?active_site_position && ?active_site_position <= ?natural_variant_end_position)
}
65_distinct_extinct_organisms_in_uniprotkb: How many distinct extinct organisms are represented in UniProtKB
xxxxxxxxxx
PREFIX keywords: <http://purl.uniprot.org/keywords/>
PREFIX up: <http://purl.uniprot.org/core/>
SELECT
?taxon
(SAMPLE(?name) AS ?anName)
(COUNT(DISTINCT ?protein) AS ?entriesPerExtinctTaxon)
WHERE
{
GRAPH<http://sparql.uniprot.org/taxonomy>{
?taxon a up:Taxon ;
up:scientificName ?name .
}
?protein up:organism ?taxon ;
up:classifiedWith keywords:952 .
} GROUP BY ?taxon ORDER BY ?taxon
66_distinct_rhea_transport_in_reviewed_uniprot: Count number of unique Rhea transport reactions annotated in reviewed UniProtKB entries.
xxxxxxxxxx
PREFIX rh: <http://rdf.rhea-db.org/>
PREFIX up: <http://purl.uniprot.org/core/>
SELECT
(COUNT(DISTINCT ?rhea) AS ?distinctRheaTransportInUniProt)
WHERE
{
GRAPH <https://sparql.rhea-db.org/rhea> {
?rhea rh:isTransport true .
}
?protein up:annotation ?ann .
?ann up:catalyticActivity ?ca .
?ca up:catalyzedReaction ?rhea .
}
67_draft_human_metabolome: Generate a draft human metabolome
xxxxxxxxxx
PREFIX chebislash: <http://purl.obolibrary.org/obo/chebi/>
PREFIX rh: <http://rdf.rhea-db.org/>
PREFIX taxon: <http://purl.uniprot.org/taxonomy/>
PREFIX up: <http://purl.uniprot.org/core/>
SELECT
?uniprot ?mnemonic ?rhea ?chebi ?smiles ?inchiKey
WHERE
{
?uniprot up:annotation/up:catalyticActivity/up:catalyzedReaction ?rhea ;
up:organism taxon:9606 ;
up:mnemonic ?mnemonic .
SERVICE <https://sparql.rhea-db.org/sparql> {
?rhea rh:side/rh:contains/rh:compound ?compound .
?compound (rh:chebi|(rh:reactivePart/rh:chebi)|(rh:underlyingChebi/rh:chebi)) ?chebi .
}
?chebi chebislash:smiles ?smiles ;
chebislash:inchikey ?inchiKey .
}
70_enzymes_interacting_with_molecules_similar_to_dopamine: Look for enzymes catalyzing reactions with molecules similar to dopamine.
xxxxxxxxxx
PREFIX CHEBI: <http://purl.obolibrary.org/obo/CHEBI_>
PREFIX rh: <http://rdf.rhea-db.org/>
PREFIX sachem: <http://bioinfo.uochb.cas.cz/rdf/v1.0/sachem#>
PREFIX up: <http://purl.uniprot.org/core/>
SELECT
?protein
?rhea
?chebi
WHERE {
# Use the smiles of dopamine CHEBI:18243
SERVICE <https://idsm.elixir-czech.cz/sparql/endpoint/chebi>{
?chebi sachem:similarCompoundSearch [
sachem:query "NCCc1ccc(O)c(O)c1" ] .
}
GRAPH<https://sparql.rhea-db.org/rhea>{
?rhea rh:side/rh:contains/rh:compound ?compound .
?compound (rh:chebi|(rh:reactivePart/rh:chebi)|(rh:underlyingChebi/rh:chebi)) ?chebi .
}
?protein up:reviewed true ;
up:annotation ?caa .
?caa up:catalyticActivity/up:catalyzedReaction ?rhea .
}
71_enzymes_interacting_with_molecules_similar_to_dopamine_with_variants_related_to_disease: Look for enzymes catalyzing reactions with molecules similar to dopamine, with natural variants related to a disease.
xxxxxxxxxx
PREFIX CHEBI: <http://purl.obolibrary.org/obo/CHEBI_>
PREFIX rh: <http://rdf.rhea-db.org/>
PREFIX sachem: <http://bioinfo.uochb.cas.cz/rdf/v1.0/sachem#>
PREFIX skos: <http://www.w3.org/2004/02/skos/core#>
PREFIX up: <http://purl.uniprot.org/core/>
SELECT
?protein
?rhea
?chebi
?disease
WHERE {
# Use the smiles of dopamine CHEBI:18243
SERVICE <https://idsm.elixir-czech.cz/sparql/endpoint/chebi>{
?chebi sachem:similarCompoundSearch [
sachem:query "NCCc1ccc(O)c(O)c1" ] .
}
GRAPH<https://sparql.rhea-db.org/rhea>{
?rhea rh:side/rh:contains/rh:compound ?compound .
?compound (rh:chebi|(rh:reactivePart/rh:chebi)|(rh:underlyingChebi/rh:chebi)) ?chebi .
}
?protein up:reviewed true ;
up:annotation ?caa, ?natural_variant_annotation, ?disease_annotation .
?caa up:catalyticActivity/up:catalyzedReaction ?rhea .
?natural_variant_annotation a up:Natural_Variant_Annotation ;
skos:related ?disease .
?disease_annotation a up:Disease_Annotation ;
up:disease ?disease .
}
72_enzymes_mapping_to_PDB: Select a mapping of UniProtKB enzymes to PDB entries using the UniProtKB cross-references to the PDB database.
xxxxxxxxxx
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX up: <http://purl.uniprot.org/core/>
SELECT
?protein
?db
WHERE
{
?protein a up:Protein .
?protein rdfs:seeAlso ?db .
?db up:database <http://purl.uniprot.org/database/PDB> .
{
?protein up:enzyme ?enzyme .
} UNION {
?protein up:annotation/rdf:type up:Catalytic_Activity_Annotation .
}
}
73_enzymes_related_to_protein: Select Enzyme Classification (EC) classes that have a protein associated to them
xxxxxxxxxx
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX up: <http://purl.uniprot.org/core/>
SELECT
DISTINCT ?enzyme
WHERE {
?protein a up:Protein .
{
?protein up:enzyme ?enzyme .
} UNION {
?protein up:annotation ?caa .
?caa a up:Catalytic_Activity_Annotation .
?caa up:catalyticActivity ?ca .
?ca up:enzymeClass ?enzyme
} UNION {
?protein up:component/up:enzyme ?enzyme .
} UNION {
?protein up:domain/up:enzyme ?enzyme .
}
}
74_enzymes_with_at_least_two_transmembrane_domains: Find Hydrolases (enzymes that catalyse the hydrolysis of various bonds) that have at least two transmembrane domains
xxxxxxxxxx
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX up: <http://purl.uniprot.org/core/>
SELECT
?protein
WHERE {
?protein up:enzyme|up:annotation/up:catalyticActivity/up:enzymeClass ?enzymeClass ;
up:annotation ?transMembraneAnnotation .
?enzymeClass rdfs:subClassOf <http://purl.uniprot.org/enzyme/3.-.-.-> .
?transMembraneAnnotation a up:Transmembrane_Annotation .
} GROUP BY ?protein HAVING (COUNT(DISTINCT ?transMembraneAnnotation) >= 2)
75_enzymes_with_at_least_two_transmembrane_domains_PDB_xray: Find enzymes that have at least two transmembrane domains, whose 3D structure is elucidated through X-Ray analysis
xxxxxxxxxx
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX up: <http://purl.uniprot.org/core/>
SELECT
?protein
WHERE {
?protein a up:Protein ;
up:annotation ?transmembrane_annotation ;
rdfs:seeAlso ?wwPDB .
{
?protein up:enzyme ?enzyme .
} UNION {
?protein up:annotation/a up:Catalytic_Activity_Annotation .
}
?wwPDB up:database <http://purl.uniprot.org/database/PDB> ;
up:method up:X-Ray_Crystallography .
?transmembrane_annotation a up:Transmembrane_Annotation .
} GROUP BY ?protein HAVING(COUNT(DISTINCT ?transmembrane_annotation ) >= 2)
76_enzymes_with_mutagenesis_affecting_active_site: Find enzymes, where the active site is a region affected by mutagenesis and show the comment regarding mutagenesis effect.
xxxxxxxxxx
PREFIX faldo: <http://biohackathon.org/resource/faldo#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX up: <http://purl.uniprot.org/core/>
SELECT
?protein ?mutagenesisBeginPosition ?activeSiteBeginPosition ?mutagenesisRangeEndPosition ?mutagenesisComment
WHERE {
?protein up:annotation ?activeSiteAnnotation ,
?mutagenesisAnnotation .
?mutagenesisAnnotation a up:Mutagenesis_Annotation ;
up:range ?mutagenesisRange ;
rdfs:comment ?mutagenesisComment .
?activeSiteAnnotation a up:Active_Site_Annotation ;
up:range ?activeSiteRange .
?activeSiteRange faldo:begin ?activeSiteBegin .
?activeSiteBegin faldo:position ?activeSiteBeginPosition ;
faldo:reference ?sequence .
?mutagenesisRange faldo:begin ?mutagenesisRangeBegin ;
faldo:end ?mutagenesisRangeEnd .
?mutagenesisRangeBegin faldo:position ?mutagenesisBeginPosition ;
faldo:reference ?sequence .
?mutagenesisRangeEnd faldo:position ?mutagenesisRangeEndPosition .
FILTER(?mutagenesisBeginPosition <= ?activeSiteBeginPosition && ?activeSiteBeginPosition <= ?mutagenesisRangeEndPosition)
}
78_genetic_disease_related_proteins: List all UniProtKB proteins annotated to be related to a genetic disease.
xxxxxxxxxx
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX up: <http://purl.uniprot.org/core/>
SELECT
?uniprot ?disease ?diseaseComment ?mim
WHERE
{
GRAPH <http://sparql.uniprot.org/uniprot> {
?uniprot a up:Protein ;
up:annotation ?diseaseAnnotation .
?diseaseAnnotation up:disease ?disease .
}
GRAPH <http://sparql.uniprot.org/diseases> {
?disease a up:Disease ;
rdfs:comment ?diseaseComment .
OPTIONAL {
?disease rdfs:seeAlso ?mim .
?mim up:database <http://purl.uniprot.org/database/MIM> .
}
}
}
79_mnemonic_also_known_as_id: List all UniProtKB protein ID (mnemonic) for current UniProtKB entries.
xxxxxxxxxx
PREFIX up: <http://purl.uniprot.org/core/>
SELECT
?uniprot ?mnemonic
WHERE
{
GRAPH <http://sparql.uniprot.org/uniprot> {
?uniprot a up:Protein ;
up:mnemonic ?mnemonic .
}
}
80_obsolete_mnemonic_also_known_as_id: List all UniProtKB protein ID (mnemonic) that where used in the past for current UniProtKB entries.
xxxxxxxxxx
PREFIX up: <http://purl.uniprot.org/core/>
SELECT
?uniprot ?obsoleteMnemonic
WHERE
{
GRAPH <http://sparql.uniprot.org/uniprot> {
?uniprot a up:Protein ;
up:oldMnemonic ?obsoleteMnemonic .
}
}
81_rhea_reactions_annotated_as_experimental_and_only_small_molecules: Find all Rhea (only small molecule) that are used in UniProtKB where the annotation has a paper and is tagged having experimental evidence.
xxxxxxxxxx
PREFIX ECO: <http://purl.obolibrary.org/obo/ECO_>
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX rh: <http://rdf.rhea-db.org/>
PREFIX up: <http://purl.uniprot.org/core/>
SELECT
?rhea
?catalyzedReaction
?source
WHERE {
{
SELECT
DISTINCT
?rhea
WHERE {
GRAPh<https://sparql.rhea-db.org/rhea> {
?rhea rdfs:subClassOf rh:Reaction .
?rhea rh:side/rh:contains/rh:compound ?compound2 .
?uc rdfs:subClassOf rh:Compound .
}
?compound2 rdfs:subClassOf ?uc .
BIND(IF(?uc = rh:SmallMolecule, 0, 1) AS ?c)
} GROUP BY ?rhea HAVING (SUM(?c) = 0)
}
?catalyzedReaction up:catalyzedReaction ?rhea .
?reif rdf:object ?catalyzedReaction ;
up:attribution ?attr .
?attr up:evidence ECO:0000269 ;
up:source ?source .
?source a up:Citation .
}
82_rhea_reactions_associated_with_ec_in_uniprotkb: List Rhea reactions associated with an EC (enzyme classification).
xxxxxxxxxx
PREFIX up: <http://purl.uniprot.org/core/>
SELECT
?rhea
?EC
WHERE {
?CatalyticActivity up:catalyzedReaction ?rhea ;
up:enzymeClass ?EC .
}