SPARQL example query
104_uniprot_recomended_protein_full_name: The recommended protein full names for UniProtKB entries
PREFIX up: <http://purl.uniprot.org/core/> SELECT ?protein ?fullName WHERE { ?protein a up:Protein ; up:recommendedName ?recommendedName . ?recommendedName up:fullName ?fullName . }Use105_uniprot_recomended_protein_short_name: The recommended protein short names for UniProtKB entries
PREFIX up: <http://purl.uniprot.org/core/> SELECT ?protein ?fullName WHERE { ?protein a up:Protein ; up:recommendedName ?recommendedName . ?recommendedName up:shortName ?fullName . }Use106_uniprot_reviewed_or_not: List all UniProtKB proteins and if they are reviewed (Swiss-Prot) or unreviewed (TrEMBL)
PREFIX up: <http://purl.uniprot.org/core/> SELECT ?protein ?reviewed WHERE { ?protein a up:Protein . ?protein up:reviewed ?reviewed . }Use107_uniprot_sequences_and_mark_which_is_cannonical_for_human: List all human UniProtKB entries and their sequences, marking if the sequence listed is the cannonical sequence of the matching entry.
PREFIX taxon: <http://purl.uniprot.org/taxonomy/> PREFIX up: <http://purl.uniprot.org/core/> SELECT ?entry ?sequence ?isCanonical WHERE { # We don't want to look into the UniParc graph which will # confuse matters GRAPH <http://sparql.uniprot.org/uniprot> { # we need the UniProt entries that are human ?entry a up:Protein ; up:organism taxon:9606 ; up:sequence ?sequence . # If the sequence is a "Simple_Sequence" it is likely to be the # cannonical sequence OPTIONAL { ?sequence a up:Simple_Sequence . BIND(true AS ?likelyIsCanonical) } # unless we are dealing with an external isoform # see https://www.uniprot.org/help/canonical_and_isoforms OPTIONAL { FILTER(?likelyIsCanonical) ?sequence a up:External_Sequence . BIND(true AS ?isComplicated) } # If it is an external isoform it's id would not match the # entry primary accession BIND(IF(?isComplicated, STRENDS(STR(?entry), STRBEFORE(SUBSTR(STR(?sequence), 34),'-')),?likelyIsCanonical) AS ?isCanonical) } }Use108_uniprot_signature_match_start_end: List all InterPro member database signature match start and end for a specific UniProtKB protein.
PREFIX faldo: <http://biohackathon.org/resource/faldo#> PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> PREFIX up: <http://purl.uniprot.org/core/> SELECT ?protein ?interproMemberDatabaseXref ?matchStart ?matchEnd WHERE{ GRAPH <http://sparql.uniprot.org/uniprot> { VALUES ?protein {<http://purl.uniprot.org/uniprot/P05067>} . ?protein rdfs:seeAlso ?sa . } GRAPH <http://sparql.uniprot.org/uniparc> { ?uniparc up:sequenceFor ?protein ; rdfs:seeAlso ?interproMemberDatabaseXref . ?interproDatabaseXref up:signatureSequenceMatch ?sam . ?sam faldo:begin ?sab ; faldo:end ?sae . ?sab faldo:position ?matchStart ; faldo:reference ?uniparc . ?sae faldo:position ?matchEnd ; faldo:reference ?uniparc . } }Use109_uniprot_transporter_in_liver: Find human transporter proteins in reviewed UniProtKB/Swiss-Prot, that are expressed in the liver (Uses Bgee and UBERON).
PREFIX genex: <http://purl.org/genex#> PREFIX lscr: <http://purl.org/lscr#> PREFIX obo: <http://purl.obolibrary.org/obo/> PREFIX orth: <http://purl.org/net/orth#> PREFIX rh: <http://rdf.rhea-db.org/> PREFIX taxon: <http://purl.uniprot.org/taxonomy/> PREFIX uberon: <http://purl.obolibrary.org/obo/uo#> PREFIX up: <http://purl.uniprot.org/core/> SELECT ?rhea ?protein ?anat WHERE { GRAPH <https://sparql.rhea-db.org/rhea> { ?rhea rh:isTransport true . } ?protein up:annotation ?ann . ?protein up:organism taxon:9606 . ?ann up:catalyticActivity ?ca . ?ca up:catalyzedReaction ?rhea . BIND(uberon:0002107 AS ?anat) SERVICE <https://www.bgee.org/sparql> { ?seq genex:isExpressedIn ?anat . ?seq lscr:xrefUniprot ?protein . ?seq orth:organism ?organism . ?organism obo:RO_0002162 taxon:9606 . } }Use110_uniprot_unamed_plasmids: Sometimes it is known that a gene encoding a protein UniProtKB is located on a plasmid or an organelle, but the name of the plasmid is unknown.
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> PREFIX up: <http://purl.uniprot.org/core/> SELECT ?protein ?plasmidOrOrganelle ?label WHERE { ?protein a up:Protein ; up:encodedIn ?plasmidOrOrganelle . OPTIONAL { ?plasmidOrOrganelle rdfs:label ?label . } }Use111_uniprot_make_chain_sequence_fasta: Given an UniProt chain identifier, generate the matching sequence as a FASTA
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> PREFIX up: <http://purl.uniprot.org/core/> PREFIX faldo: <http://biohackathon.org/resource/faldo#> SELECT (CONCAT('>', ?chainSeqId, '\n', (SUBSTR(?iupacAA, ?begin, (?end-?begin+1)))) AS ?chainFasta) WHERE { BIND("PRO_0000268053" AS ?chainSeqId) BIND(IRI(CONCAT("http://purl.uniprot.org/annotation/", ?chainSeqId)) AS ?annId) ?annId up:range ?range . ?range faldo:begin [ faldo:reference ?reference ; faldo:position ?begin ] ; faldo:end [ faldo:position ?end ] . ?reference rdf:value ?iupacAA . }Use62_diseases_involving_enzymes: Find diseases that are thought to be related to enzymes
PREFIX skos: <http://www.w3.org/2004/02/skos/core#> PREFIX up: <http://purl.uniprot.org/core/> SELECT ?disease ?diseaseLabel WHERE { ?protein up:enzyme|up:annotation/up:catalyticActivity/up:enzymeClass ?enzyme ; up:annotation ?diseaseAnnotation . ?diseaseAnnotation a up:Disease_Annotation ; up:disease ?disease . ?disease skos:prefLabel ?diseaseLabel . }Use63_diseases_involving_enzymes_located_in_mitochondrion: Find (Human genetic) diseases that are thought to be related to Enzymes, known to be located in the Mitochondrion
PREFIX taxon: <http://purl.uniprot.org/taxonomy/> PREFIX up: <http://purl.uniprot.org/core/> SELECT DISTINCT ?disease WHERE { ?protein a up:Protein ; up:organism taxon:9606 ; up:annotation ?disease_annotation , ?subcellularLocation . { ?protein up:enzyme [] . } UNION { ?protein up:annotation/a up:Catalytic_Activity_Annotation . } ?disease_annotation a up:Disease_Annotation ; up:disease ?disease . ?subcellularLocation a up:Subcellular_Location_Annotation ; up:locatedIn ?location . ?location up:cellularComponent ?component . ?component up:partOf* <http://purl.uniprot.org/locations/173> . }Use64_diseases_related_to_mutation_in_active_site: Find (Human Genetic) diseases, that are related to a natural variant on the active site of an enzyme.
PREFIX faldo: <http://biohackathon.org/resource/faldo#> PREFIX skos: <http://www.w3.org/2004/02/skos/core#> PREFIX taxon: <http://purl.uniprot.org/taxonomy/> PREFIX up: <http://purl.uniprot.org/core/> SELECT DISTINCT ?disease WHERE { ?protein a up:Protein ; up:organism taxon:9606 ; up:annotation ?disease_annotation, ?active_site_annotation, ?natural_variant_annotation . { ?protein up:enzyme [] . } UNION { ?protein up:annotation/a up:Catalytic_Activity_Annotation . } ?disease_annotation a up:Disease_Annotation ; up:disease ?disease . ?active_site_annotation a up:Active_Site_Annotation ; up:range ?active_site_range . ?active_site_range faldo:begin ?active_site_begin . ?active_site_begin faldo:position ?active_site_position ; faldo:reference ?sequence . ?natural_variant_annotation a up:Natural_Variant_Annotation ; up:range ?natural_variant_range ; skos:related ?disease . ?natural_variant_range faldo:begin ?natural_variant_begin ; faldo:end ?natural_variant_end . ?natural_variant_begin faldo:position ?natural_variant_begin_position . ?natural_variant_end faldo:position ?natural_variant_end_position ; faldo:reference ?sequence . FILTER(?natural_variant_begin_position >= ?active_site_position && ?active_site_position <= ?natural_variant_end_position) }Use65_distinct_extinct_organisms_in_uniprotkb: How many distinct extinct organisms are represented in UniProtKB
PREFIX keywords: <http://purl.uniprot.org/keywords/> PREFIX up: <http://purl.uniprot.org/core/> SELECT ?taxon (SAMPLE(?name) AS ?anName) (COUNT(DISTINCT ?protein) AS ?entriesPerExtinctTaxon) WHERE { GRAPH<http://sparql.uniprot.org/taxonomy>{ ?taxon a up:Taxon ; up:scientificName ?name . } ?protein up:organism ?taxon ; up:classifiedWith keywords:952 . } GROUP BY ?taxon ORDER BY ?taxonUse66_distinct_rhea_transport_in_reviewed_uniprot: Count number of unique Rhea transport reactions annotated in reviewed UniProtKB entries.
PREFIX rh: <http://rdf.rhea-db.org/> PREFIX up: <http://purl.uniprot.org/core/> SELECT (COUNT(DISTINCT ?rhea) AS ?distinctRheaTransportInUniProt) WHERE { GRAPH <https://sparql.rhea-db.org/rhea> { ?rhea rh:isTransport true . } ?protein up:annotation ?ann . ?ann up:catalyticActivity ?ca . ?ca up:catalyzedReaction ?rhea . }Use67_draft_human_metabolome: Generate a draft human metabolome
PREFIX chebislash: <http://purl.obolibrary.org/obo/chebi/> PREFIX rh: <http://rdf.rhea-db.org/> PREFIX taxon: <http://purl.uniprot.org/taxonomy/> PREFIX up: <http://purl.uniprot.org/core/> SELECT ?uniprot ?mnemonic ?rhea ?chebi ?smiles ?inchiKey WHERE { ?uniprot up:annotation/up:catalyticActivity/up:catalyzedReaction ?rhea ; up:organism taxon:9606 ; up:mnemonic ?mnemonic . SERVICE <https://sparql.rhea-db.org/sparql> { ?rhea rh:side/rh:contains/rh:compound ?compound . ?compound (rh:chebi|(rh:reactivePart/rh:chebi)|(rh:underlyingChebi/rh:chebi)) ?chebi . } ?chebi chebislash:smiles ?smiles ; chebislash:inchikey ?inchiKey . }Use70_enzymes_interacting_with_molecules_similar_to_dopamine: Look for enzymes catalyzing reactions with molecules similar to dopamine.
PREFIX CHEBI: <http://purl.obolibrary.org/obo/CHEBI_> PREFIX rh: <http://rdf.rhea-db.org/> PREFIX sachem: <http://bioinfo.uochb.cas.cz/rdf/v1.0/sachem#> PREFIX up: <http://purl.uniprot.org/core/> SELECT ?protein ?rhea ?chebi WHERE { # Use the smiles of dopamine CHEBI:18243 SERVICE <https://idsm.elixir-czech.cz/sparql/endpoint/chebi>{ ?chebi sachem:similarCompoundSearch [ sachem:query "NCCc1ccc(O)c(O)c1" ] . } GRAPH<https://sparql.rhea-db.org/rhea>{ ?rhea rh:side/rh:contains/rh:compound ?compound . ?compound (rh:chebi|(rh:reactivePart/rh:chebi)|(rh:underlyingChebi/rh:chebi)) ?chebi . } ?protein up:reviewed true ; up:annotation ?caa . ?caa up:catalyticActivity/up:catalyzedReaction ?rhea . }Use71_enzymes_interacting_with_molecules_similar_to_dopamine_with_variants_related_to_disease: Look for enzymes catalyzing reactions with molecules similar to dopamine, with natural variants related to a disease.
PREFIX CHEBI: <http://purl.obolibrary.org/obo/CHEBI_> PREFIX rh: <http://rdf.rhea-db.org/> PREFIX sachem: <http://bioinfo.uochb.cas.cz/rdf/v1.0/sachem#> PREFIX skos: <http://www.w3.org/2004/02/skos/core#> PREFIX up: <http://purl.uniprot.org/core/> SELECT ?protein ?rhea ?chebi ?disease WHERE { # Use the smiles of dopamine CHEBI:18243 SERVICE <https://idsm.elixir-czech.cz/sparql/endpoint/chebi>{ ?chebi sachem:similarCompoundSearch [ sachem:query "NCCc1ccc(O)c(O)c1" ] . } GRAPH<https://sparql.rhea-db.org/rhea>{ ?rhea rh:side/rh:contains/rh:compound ?compound . ?compound (rh:chebi|(rh:reactivePart/rh:chebi)|(rh:underlyingChebi/rh:chebi)) ?chebi . } ?protein up:reviewed true ; up:annotation ?caa, ?natural_variant_annotation, ?disease_annotation . ?caa up:catalyticActivity/up:catalyzedReaction ?rhea . ?natural_variant_annotation a up:Natural_Variant_Annotation ; skos:related ?disease . ?disease_annotation a up:Disease_Annotation ; up:disease ?disease . }Use72_enzymes_mapping_to_PDB: Select a mapping of UniProtKB enzymes to PDB entries using the UniProtKB cross-references to the PDB database.
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> PREFIX up: <http://purl.uniprot.org/core/> SELECT ?protein ?db WHERE { ?protein a up:Protein . ?protein rdfs:seeAlso ?db . ?db up:database <http://purl.uniprot.org/database/PDB> . { ?protein up:enzyme ?enzyme . } UNION { ?protein up:annotation/rdf:type up:Catalytic_Activity_Annotation . } }Use73_enzymes_related_to_protein: Select Enzyme Classification (EC) classes that have a protein associated to them
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> PREFIX up: <http://purl.uniprot.org/core/> SELECT DISTINCT ?enzyme WHERE { ?protein a up:Protein . { ?protein up:enzyme ?enzyme . } UNION { ?protein up:annotation ?caa . ?caa a up:Catalytic_Activity_Annotation . ?caa up:catalyticActivity ?ca . ?ca up:enzymeClass ?enzyme } UNION { ?protein up:component/up:enzyme ?enzyme . } UNION { ?protein up:domain/up:enzyme ?enzyme . } }Use74_enzymes_with_at_least_two_transmembrane_domains: Find Hydrolases (enzymes that catalyse the hydrolysis of various bonds) that have at least two transmembrane domains
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> PREFIX up: <http://purl.uniprot.org/core/> SELECT ?protein WHERE { ?protein up:enzyme|up:annotation/up:catalyticActivity/up:enzymeClass ?enzymeClass ; up:annotation ?transMembraneAnnotation . ?enzymeClass rdfs:subClassOf <http://purl.uniprot.org/enzyme/3.-.-.-> . ?transMembraneAnnotation a up:Transmembrane_Annotation . } GROUP BY ?protein HAVING (COUNT(DISTINCT ?transMembraneAnnotation) >= 2)Use75_enzymes_with_at_least_two_transmembrane_domains_PDB_xray: Find enzymes that have at least two transmembrane domains, whose 3D structure is elucidated through X-Ray analysis
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> PREFIX up: <http://purl.uniprot.org/core/> SELECT ?protein WHERE { ?protein a up:Protein ; up:annotation ?transmembrane_annotation ; rdfs:seeAlso ?wwPDB . { ?protein up:enzyme ?enzyme . } UNION { ?protein up:annotation/a up:Catalytic_Activity_Annotation . } ?wwPDB up:database <http://purl.uniprot.org/database/PDB> ; up:method up:X-Ray_Crystallography . ?transmembrane_annotation a up:Transmembrane_Annotation . } GROUP BY ?protein HAVING(COUNT(DISTINCT ?transmembrane_annotation ) >= 2)Use76_enzymes_with_mutagenesis_affecting_active_site: Find enzymes, where the active site is a region affected by mutagenesis and show the comment regarding mutagenesis effect.
PREFIX faldo: <http://biohackathon.org/resource/faldo#> PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> PREFIX up: <http://purl.uniprot.org/core/> SELECT ?protein ?mutagenesisBeginPosition ?activeSiteBeginPosition ?mutagenesisRangeEndPosition ?mutagenesisComment WHERE { ?protein up:annotation ?activeSiteAnnotation , ?mutagenesisAnnotation . ?mutagenesisAnnotation a up:Mutagenesis_Annotation ; up:range ?mutagenesisRange ; rdfs:comment ?mutagenesisComment . ?activeSiteAnnotation a up:Active_Site_Annotation ; up:range ?activeSiteRange . ?activeSiteRange faldo:begin ?activeSiteBegin . ?activeSiteBegin faldo:position ?activeSiteBeginPosition ; faldo:reference ?sequence . ?mutagenesisRange faldo:begin ?mutagenesisRangeBegin ; faldo:end ?mutagenesisRangeEnd . ?mutagenesisRangeBegin faldo:position ?mutagenesisBeginPosition ; faldo:reference ?sequence . ?mutagenesisRangeEnd faldo:position ?mutagenesisRangeEndPosition . FILTER(?mutagenesisBeginPosition <= ?activeSiteBeginPosition && ?activeSiteBeginPosition <= ?mutagenesisRangeEndPosition) }Use78_genetic_disease_related_proteins: List all UniProtKB proteins annotated to be related to a genetic disease.
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> PREFIX up: <http://purl.uniprot.org/core/> SELECT ?uniprot ?disease ?diseaseComment ?mim WHERE { GRAPH <http://sparql.uniprot.org/uniprot> { ?uniprot a up:Protein ; up:annotation ?diseaseAnnotation . ?diseaseAnnotation up:disease ?disease . } GRAPH <http://sparql.uniprot.org/diseases> { ?disease a up:Disease ; rdfs:comment ?diseaseComment . OPTIONAL { ?disease rdfs:seeAlso ?mim . ?mim up:database <http://purl.uniprot.org/database/MIM> . } } }Use79_mnemonic_also_known_as_id: List all UniProtKB protein ID (mnemonic) for current UniProtKB entries.
PREFIX up: <http://purl.uniprot.org/core/> SELECT ?uniprot ?mnemonic WHERE { GRAPH <http://sparql.uniprot.org/uniprot> { ?uniprot a up:Protein ; up:mnemonic ?mnemonic . } }Use80_obsolete_mnemonic_also_known_as_id: List all UniProtKB protein ID (mnemonic) that where used in the past for current UniProtKB entries.
PREFIX up: <http://purl.uniprot.org/core/> SELECT ?uniprot ?obsoleteMnemonic WHERE { GRAPH <http://sparql.uniprot.org/uniprot> { ?uniprot a up:Protein ; up:oldMnemonic ?obsoleteMnemonic . } }Use81_rhea_reactions_annotated_as_experimental_and_only_small_molecules: Find all Rhea (only small molecule) that are used in UniProtKB where the annotation has a paper and is tagged having experimental evidence.
PREFIX ECO: <http://purl.obolibrary.org/obo/ECO_> PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> PREFIX rh: <http://rdf.rhea-db.org/> PREFIX up: <http://purl.uniprot.org/core/> SELECT ?rhea ?catalyzedReaction ?source WHERE { { SELECT DISTINCT ?rhea WHERE { GRAPh<https://sparql.rhea-db.org/rhea> { ?rhea rdfs:subClassOf rh:Reaction . ?rhea rh:side/rh:contains/rh:compound ?compound2 . ?uc rdfs:subClassOf rh:Compound . } ?compound2 rdfs:subClassOf ?uc . BIND(IF(?uc = rh:SmallMolecule, 0, 1) AS ?c) } GROUP BY ?rhea HAVING (SUM(?c) = 0) } ?catalyzedReaction up:catalyzedReaction ?rhea . ?reif rdf:object ?catalyzedReaction ; up:attribution ?attr . ?attr up:evidence ECO:0000269 ; up:source ?source . ?source a up:Citation . }Use82_rhea_reactions_associated_with_ec_in_uniprotkb: List Rhea reactions associated with an EC (enzyme classification).
PREFIX up: <http://purl.uniprot.org/core/> SELECT ?rhea ?EC WHERE { ?CatalyticActivity up:catalyzedReaction ?rhea ; up:enzymeClass ?EC . }Use