104_uniprot_recomended_protein_full_name: The recommended protein full names for UniProtKB entries

PREFIX up: <http://purl.uniprot.org/core/> SELECT ?protein ?fullName WHERE { ?protein a up:Protein ; up:recommendedName ?recommendedName . ?recommendedName up:fullName ?fullName . }Use

105_uniprot_recomended_protein_short_name: The recommended protein short names for UniProtKB entries

PREFIX up: <http://purl.uniprot.org/core/> SELECT ?protein ?fullName WHERE { ?protein a up:Protein ; up:recommendedName ?recommendedName . ?recommendedName up:shortName ?fullName . }Use

106_uniprot_reviewed_or_not: List all UniProtKB proteins and if they are reviewed (Swiss-Prot) or unreviewed (TrEMBL)

PREFIX up: <http://purl.uniprot.org/core/> SELECT ?protein ?reviewed WHERE { ?protein a up:Protein . ?protein up:reviewed ?reviewed . }Use

107_uniprot_sequences_and_mark_which_is_cannonical_for_human: List all human UniProtKB entries and their sequences, marking if the sequence listed is the cannonical sequence of the matching entry.

PREFIX taxon: <http://purl.uniprot.org/taxonomy/> PREFIX up: <http://purl.uniprot.org/core/> SELECT ?entry ?sequence ?isCanonical WHERE { # We don't want to look into the UniParc graph which will # confuse matters GRAPH <http://sparql.uniprot.org/uniprot> { # we need the UniProt entries that are human ?entry a up:Protein ; up:organism taxon:9606 ; up:sequence ?sequence . # If the sequence is a "Simple_Sequence" it is likely to be the # cannonical sequence OPTIONAL { ?sequence a up:Simple_Sequence . BIND(true AS ?likelyIsCanonical) } # unless we are dealing with an external isoform # see https://www.uniprot.org/help/canonical_and_isoforms OPTIONAL { FILTER(?likelyIsCanonical) ?sequence a up:External_Sequence . BIND(true AS ?isComplicated) } # If it is an external isoform it's id would not match the # entry primary accession BIND(IF(?isComplicated, STRENDS(STR(?entry), STRBEFORE(SUBSTR(STR(?sequence), 34),'-')),?likelyIsCanonical) AS ?isCanonical) } }Use

108_uniprot_signature_match_start_end: List all InterPro member database signature match start and end for a specific UniProtKB protein.

PREFIX faldo: <http://biohackathon.org/resource/faldo#> PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> PREFIX up: <http://purl.uniprot.org/core/> SELECT ?protein ?interproMemberDatabaseXref ?matchStart ?matchEnd WHERE{ GRAPH <http://sparql.uniprot.org/uniprot> { VALUES ?protein {<http://purl.uniprot.org/uniprot/P05067>} . ?protein rdfs:seeAlso ?sa . } GRAPH <http://sparql.uniprot.org/uniparc> { ?uniparc up:sequenceFor ?protein ; rdfs:seeAlso ?interproMemberDatabaseXref . ?interproDatabaseXref up:signatureSequenceMatch ?sam . ?sam faldo:begin ?sab ; faldo:end ?sae . ?sab faldo:position ?matchStart ; faldo:reference ?uniparc . ?sae faldo:position ?matchEnd ; faldo:reference ?uniparc . } }Use

109_uniprot_transporter_in_liver: Find human transporter proteins in reviewed UniProtKB/Swiss-Prot, that are expressed in the liver (Uses Bgee and UBERON).

PREFIX genex: <http://purl.org/genex#> PREFIX lscr: <http://purl.org/lscr#> PREFIX obo: <http://purl.obolibrary.org/obo/> PREFIX orth: <http://purl.org/net/orth#> PREFIX rh: <http://rdf.rhea-db.org/> PREFIX taxon: <http://purl.uniprot.org/taxonomy/> PREFIX uberon: <http://purl.obolibrary.org/obo/uo#> PREFIX up: <http://purl.uniprot.org/core/> SELECT ?rhea ?protein ?anat WHERE { GRAPH <https://sparql.rhea-db.org/rhea> { ?rhea rh:isTransport true . } ?protein up:annotation ?ann . ?protein up:organism taxon:9606 . ?ann up:catalyticActivity ?ca . ?ca up:catalyzedReaction ?rhea . BIND(uberon:0002107 AS ?anat) SERVICE <https://www.bgee.org/sparql/> { ?seq genex:isExpressedIn ?anat . ?seq lscr:xrefUniprot ?protein . ?seq orth:organism ?organism . ?organism obo:RO_0002162 taxon:9606 . } }Use

110_uniprot_unamed_plasmids: Sometimes it is known that a gene encoding a protein UniProtKB is located on a plasmid or an organelle, but the name of the plasmid is unknown.

PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> PREFIX up: <http://purl.uniprot.org/core/> SELECT ?protein ?plasmidOrOrganelle ?label WHERE { ?protein a up:Protein ; up:encodedIn ?plasmidOrOrganelle . OPTIONAL { ?plasmidOrOrganelle rdfs:label ?label . } }Use

111_uniprot_make_chain_sequence_fasta: Given an UniProt chain identifier, generate the matching sequence as a FASTA

PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> PREFIX up: <http://purl.uniprot.org/core/> PREFIX faldo: <http://biohackathon.org/resource/faldo#> SELECT (CONCAT('>', ?chainSeqId, '\n', (SUBSTR(?iupacAA, ?begin, (?end-?begin+1)))) AS ?chainFasta) WHERE { BIND("PRO_0000268053" AS ?chainSeqId) BIND(IRI(CONCAT("http://purl.uniprot.org/annotation/", ?chainSeqId)) AS ?annId) ?annId up:range ?range . ?range faldo:begin [ faldo:reference ?reference ; faldo:position ?begin ] ; faldo:end [ faldo:position ?end ] . ?reference rdf:value ?iupacAA . }Use

112_count_human_transporters: Count the number of Human Transporter Proteins

PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> PREFIX rh: <http://rdf.rhea-db.org/> PREFIX up: <http://purl.uniprot.org/core/> PREFIX taxon: <http://purl.uniprot.org/taxonomy/> PREFIX faldo: <http://biohackathon.org/resource/faldo#> SELECT (COUNT(DISTINCT ?protein) AS ?humanTransportEnzymes) WHERE { GRAPH <http://sparql.uniprot.org/uniprot> { ?protein up:organism taxon:9606 ; up:annotation ?a . ?a a up:Catalytic_Activity_Annotation ; up:catalyticActivity ?ca . ?ca up:catalyzedReaction ?rhea . } GRAPH <https://sparql.rhea-db.org/rhea>{ ?rhea rh:isTransport true . } }Use

113_UniProtKB_Swiss-Prot_entries_annotated_with_CC-CA_Rhea_involving_lipids.ttl: UniProtKB/Swiss-Prot entries annotated with CC-CA/Rhea involving lipids

PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> # select all pairs (sprot,rhea) where rhea involves a lipid PREFIX rh:<http://rdf.rhea-db.org/> PREFIX CHEBI:<http://purl.obolibrary.org/obo/CHEBI_> PREFIX up:<http://purl.uniprot.org/core/> PREFIX uniprotkb:<http://purl.uniprot.org/uniprot/> SELECT (COUNT(*) as ?cnt) WHERE { { SELECT ?protein ?rhea WHERE { SERVICE <https://sparql.rhea-db.org/sparql> { SELECT distinct ?rhea WHERE { ?rhea rdfs:subClassOf rh:Reaction . ?rhea rh:status rh:Approved . ?rhea rh:side ?reactionSide . ?reactionSide rh:contains ?participant . ?participant rh:compound ?compound . ?compound rh:chebi ?chebi . ?chebi rdfs:subClassOf+ CHEBI:18059 . } } } } ?protein a up:Protein . ?protein up:reviewed true . ?protein up:annotation ?a . ?a a up:Catalytic_Activity_Annotation . ?a up:catalyticActivity ?ca . ?ca up:catalyzedReaction ?rhea . }Use

114_Number_of_EC_numbers_described_at_protein_domain_and_component_levels: Number of EC numbers described at protein, domain and component levels

PREFIX up:<http://purl.uniprot.org/core/> PREFIX ec:<http://purl.uniprot.org/enzyme/> PREFIX rdfs:<http://www.w3.org/2000/01/rdf-schema#> SELECT (count(distinct ?protein) as ?proteinCount) WHERE { ?protein up:reviewed true . OPTIONAL{?protein up:enzyme ?ecNumber} OPTIONAL{?protein up:domain/up:enzyme ?ecNumber} OPTIONAL{?protein up:component/up:enzyme ?ecNumber} FILTER (BOUND(?ecNumber)) }Use

115_Number_of_complete_EC_numbers_described_at_protein_level_at_domain_levels_at_component_level: Number of _complete_ EC numbers described at protein level, at domain levels, at component level

PREFIX up:<http://purl.uniprot.org/core/> PREFIX ec:<http://purl.uniprot.org/enzyme/> PREFIX rdfs:<http://www.w3.org/2000/01/rdf-schema#> SELECT (count(distinct ?ecProt) as ?ecNumberInProteinCount) (count(distinct ?ecDomain) as ?ecNumberInDomainCount) (count(distinct ?ecComponent) as ?ecNumberInComponentCount) WHERE { ?protein up:reviewed true . OPTIONAL {?protein up:enzyme ?ecProt . } . OPTIONAL {?protein up:domain/up:enzyme ?ecDomain . } . OPTIONAL {?protein up:component/up:enzyme ?ecComponent . } . }Use

116_biosodafrontend_rabit_mouse_orthologs: Rabbit's proteins encoded by genes that are orthologous to Mouse's HBB-Y gene and their cross reference links to Uniprot

PREFIX lscr: <http://purl.org/lscr#> PREFIX orth: <http://purl.org/net/orth#> PREFIX up: <http://purl.uniprot.org/core/> PREFIX obo: <http://purl.obolibrary.org/obo/> PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> SELECT DISTINCT ?PROTEIN_1 ?PROTEIN_2 ?UNIPROT_XREF_1 ?UNIPROT_XREF_2 WHERE { ?taxon_1 up:commonName 'Mouse' . ?taxon_2 up:commonName 'Rabbit' . SERVICE <https://sparql.omabrowser.org/sparql/> { ?cluster a orth:OrthologsCluster . ?cluster orth:hasHomologousMember ?node1 . ?cluster orth:hasHomologousMember ?node2 . ?node2 orth:hasHomologousMember* ?PROTEIN_2 . ?node1 orth:hasHomologousMember* ?PROTEIN_1 . ?PROTEIN_1 a orth:Protein . ?PROTEIN_1 orth:organism/obo:RO_0002162 ?taxon_1 ; rdfs:label 'HBB-Y' ; lscr:xrefUniprot ?UNIPROT_XREF_1 . ?PROTEIN_2 a orth:Protein . ?PROTEIN_2 orth:organism/obo:RO_0002162 ?taxon_2 . ?PROTEIN_2 lscr:xrefUniprot ?UNIPROT_XREF_2 . FILTER ( ?node1 != ?node2 ) } }Use

117_biosodafrontend_glioblastoma_orthologs_rat: Which are the proteins associated with glioblastoma and the orthologs expressed in the rat brain?

PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> PREFIX obo: <http://purl.obolibrary.org/obo/> PREFIX orth: <http://purl.org/net/orth#> PREFIX sio: <http://semanticscience.org/resource/> PREFIX taxon: <http://purl.uniprot.org/taxonomy/> PREFIX up: <http://purl.uniprot.org/core/> PREFIX lscr: <http://purl.org/lscr#> PREFIX genex: <http://purl.org/genex#> SELECT DISTINCT ?protein ?orthologous_protein ?gene ?annotation_text WHERE { { SELECT ?protein ?annotation_text WHERE { ?protein a up:Protein ; up:organism taxon:9606 ; up:annotation ?annotation . ?annotation rdfs:comment ?annotation_text . ?annotation a up:Disease_Annotation . FILTER CONTAINS (?annotation_text, "glioblastoma") } } SERVICE <https://sparql.omabrowser.org/sparql/> { SELECT ?orthologous_protein ?protein ?gene WHERE { ?protein_OMA a orth:Protein . ?orthologous_protein a orth:Protein . ?cluster a orth:OrthologsCluster . ?cluster orth:hasHomologousMember ?node1 . ?cluster orth:hasHomologousMember ?node2 . ?node2 orth:hasHomologousMember* ?protein_OMA . ?node1 orth:hasHomologousMember* ?orthologous_protein . ?orthologous_protein orth:organism/obo:RO_0002162 taxon:10116 . # rattus norvegicus ?orthologous_protein sio:SIO_010079 ?gene . ?protein_OMA lscr:xrefUniprot ?protein . FILTER(?node1 != ?node2) } } SERVICE <https://www.bgee.org/sparql/> { ?gene genex:isExpressedIn ?a . ?a rdfs:label "brain" . ?gene orth:organism ?s . ?s obo:RO_0002162 taxon:10116. } }Use

118_biosodafrontend_rat_brain_human_cancer: What are the Homo sapiens genes associated with cancer and their orthologs expressed in the Rattus norvegicus brain?

PREFIX up:<http://purl.uniprot.org/core/> PREFIX taxon:<http://purl.uniprot.org/taxonomy/> PREFIX rdfs:<http://www.w3.org/2000/01/rdf-schema#> PREFIX orth:<http://purl.org/net/orth#> PREFIX dcterms:<http://purl.org/dc/terms/> PREFIX obo:<http://purl.obolibrary.org/obo/> PREFIX lscr:<http://purl.org/lscr#> PREFIX genex:<http://purl.org/genex#> PREFIX sio: <http://semanticscience.org/resource/> SELECT ?gene ?orthologous_protein2 WHERE { { SELECT ?protein1 WHERE { ?protein1 a up:Protein; up:organism/up:scientificName 'Homo sapiens' ; up:annotation ?annotation . ?annotation rdfs:comment ?annotation_text. ?annotation a up:Disease_Annotation . FILTER CONTAINS (?annotation_text, "cancer") } } SERVICE <https://sparql.omabrowser.org/sparql/> { SELECT ?orthologous_protein2 ?protein1 ?gene WHERE { ?protein_OMA a orth:Protein . ?orthologous_protein2 a orth:Protein . ?cluster a orth:OrthologsCluster . ?cluster orth:hasHomologousMember ?node1 . ?cluster orth:hasHomologousMember ?node2 . ?node2 orth:hasHomologousMember* ?protein_OMA . ?node1 orth:hasHomologousMember* ?orthologous_protein2 .?orthologous_protein2 orth:organism/obo:RO_0002162/up:scientificName 'Rattus norvegicus' . ?orthologous_protein2 sio:SIO_010079 ?gene . ?protein_OMA lscr:xrefUniprot ?protein1 . FILTER(?node1 != ?node2) } } SERVICE <https://www.bgee.org/sparql/> { ?gene genex:isExpressedIn ?anatEntity . ?anatEntity rdfs:label 'brain' . ?gene orth:organism ?org . ?org obo:RO_0002162 taxon:10116 . } }Use

119_uniref_distinct_ec_in_seed: Distinct Enzymes in UniRef50 seed entries

PREFIX up:<http://purl.uniprot.org/core/> SELECT (COUNT(DISTINCT ?enzyme) AS ?distinctEnzymesInUniRef50Seed) WHERE { GRAPH <http://sparql.uniprot.org/uniprot>{ ?protein ( up:enzyme | up:domain/up:enzyme | up:component/up:enzyme ) ?enzyme . } GRAPH <http://sparql.uniprot.org/uniref>{ ?protein up:seedFor ?cluster . ?cluster up:identity 0.5 . } }Use

120_for_taxon_find_reference_proteomes: For a ncbi taxon identifier as a digit find it's reference proteome(s) if they exist

PREFIX up:<http://purl.uniprot.org/core/> PREFIX taxon:<http://purl.uniprot.org/taxonomy/> SELECT ?taxon ?reference_proteome WHERE { VALUES (?taxid) { (623) # Shigella flexneri (633) # Yersinia pseudotuberculosis } # Convert the digit to a correct IRI BIND(IRI(CONCAT(STR(taxon:), ?taxid)) AS ?taxon) ?taxon up:scientificName ?taxonName . OPTIONAL { ?reference_proteome a up:Reference_Proteome . ?reference_proteome up:organism ?taxon . } }Use

121_proteins_and_diseases_linked: List all UniProtKB proteins and the diseases are annotated to be related.

PREFIX up: <http://purl.uniprot.org/core/> SELECT ?protein ?disease WHERE { ?protein a up:Protein ; up:annotation ?annotation . ?annotation a up:Disease_Annotation ; up:disease ?disease . ?disease a up:Disease . }Use

122_enzymes_transporting_lipids_in_reviewed_human: List all UniProtKB SwissProt entries, facilitation the transport of lipids.

PREFIX up: <http://purl.uniprot.org/core/> PREFIX rh: <http://rdf.rhea-db.org/> PREFIX CHEBI: <http://purl.obolibrary.org/obo/CHEBI_> PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> PREFIX taxon: <http://purl.uniprot.org/taxonomy/> SELECT ?protein ?chebi WHERE { GRAPH <https://sparql.rhea-db.org/rhea> { ?rhea rh:isTransport true . ?rhea rh:side/rh:contains/rh:compound ?compound . ?compound (rh:chebi|(rh:reactivePart/rh:chebi)|rh:underlyingChebi) ?chebi . } # CHEBI:18059 is the class for all Lipids ?chebi rdfs:subClassOf* CHEBI:18059 . # Select human reviewed entries from Swiss-Prot ?protein up:reviewed true ; up:organism taxon:9606 . # Link protein to catalytic activity, then to Rhea reaction ?protein up:annotation ?annotation . ?annotation up:catalyticActivity ?catalytic_activity_obj . ?catalytic_activity up:catalyzedReaction?rhea . }Use

123_list_EC_numbers_not_linked_to_UniProt_entries: list = EC numbers not linked to UniProt entries

PREFIX up:<http://purl.uniprot.org/core/> PREFIX ec:<http://purl.uniprot.org/enzyme/> PREFIX rdfs:<http://www.w3.org/2000/01/rdf-schema#> SELECT (count(distinct ?ecNumber) as ?ecCount) WHERE { ?ecNumber rdfs:subClassOf up:Enzyme . FILTER (NOT EXISTS {?ecNumber up:obsolete true .} ) FILTER (!regex(?ecNumber,'-')) . FILTER (NOT EXISTS { ?protein up:enzyme ?ecNumber . ?protein up:reviewed true . }) FILTER (NOT EXISTS { ?protein up:domain/up:enzyme ?ecNumber . ?protein up:reviewed true . }) FILTER (NOT EXISTS { ?protein up:reviewed true . ?protein up:component/up:enzyme ?ecNumber . }) }Use

124_list_protein_components_ec: list = protein - components -ec

PREFIX up:<http://purl.uniprot.org/core/> PREFIX ec:<http://purl.uniprot.org/enzyme/> PREFIX rdfs:<http://www.w3.org/2000/01/rdf-schema#> SELECT distinct ?enzyme WHERE { ?protein up:reviewed true . ?protein up:mnemonic ?mnemonic . ?protein up:domain ?domain . ?domain up:enzyme ?enzyme . }Use

125_CORDIS_EURIO_cited_project_results_in_uniprot: Use the CORDIS EURIO Knowledge Graph in combination with UniProt to find the count of outputs(DOIs) of their funded projects cited in UniProt.

PREFIX xsd: <http://www.w3.org/2001/XMLSchema#> PREFIX dcterms: <http://purl.org/dc/terms/> PREFIX up: <http://purl.uniprot.org/core/> PREFIX eurio: <http://data.europa.eu/s66#> PREFIX patent: <http://purl.uniprot.org/EPO/> PREFIX pubmed: <http://purl.uniprot.org/pubmed/> SELECT (IRI(CONCAT('https://cordis.europa.eu/project/id/', SAMPLE(?identifier))) AS ?id) (SAMPLE(?project_title) AS ?project_titles) (COUNT(DISTINCT ?doi_string) AS ?doisInSwissProt) (COUNT(DISTINCT ?protein) AS ?entriesInSwissProt) WHERE { SERVICE <https://cordis.europa.eu/datalab/sparql> { ?project a eurio:Project ; eurio:identifier ?identifier ; eurio:title ?project_title ; eurio:hasResult ?result . ?result eurio:doi ?doi . } #EURIO use literals of type xsd:anyURI to represent DOIs #without the leading doi: indicator BIND(CONCAT("doi:", STR(?doi)) AS ?doi_string) ?citation a up:Journal_Citation ; dcterms:identifier ?doi_string . ?protein up:citation ?citation . BIND(xsd:int(?identifier) AS ?intid) } GROUP BY ?project ORDER BY DESC(?intid)Use

62_diseases_involving_enzymes: Find diseases that are thought to be related to enzymes

PREFIX skos: <http://www.w3.org/2004/02/skos/core#> PREFIX up: <http://purl.uniprot.org/core/> SELECT ?disease ?diseaseLabel WHERE { ?protein up:enzyme|up:annotation/up:catalyticActivity/up:enzymeClass ?enzyme ; up:annotation ?diseaseAnnotation . ?diseaseAnnotation a up:Disease_Annotation ; up:disease ?disease . ?disease skos:prefLabel ?diseaseLabel . }Use

63_diseases_involving_enzymes_located_in_mitochondrion: Find (Human genetic) diseases that are thought to be related to Enzymes, known to be located in the Mitochondrion

PREFIX taxon: <http://purl.uniprot.org/taxonomy/> PREFIX up: <http://purl.uniprot.org/core/> SELECT DISTINCT ?disease WHERE { ?protein a up:Protein ; up:organism taxon:9606 ; up:annotation ?disease_annotation , ?subcellularLocation . { ?protein up:enzyme [] . } UNION { ?protein up:annotation/a up:Catalytic_Activity_Annotation . } ?disease_annotation a up:Disease_Annotation ; up:disease ?disease . ?subcellularLocation a up:Subcellular_Location_Annotation ; up:locatedIn ?location . ?location up:cellularComponent ?component . ?component up:partOf* <http://purl.uniprot.org/locations/173> . }Use

64_diseases_related_to_mutation_in_active_site: Find (Human Genetic) diseases, that are related to a natural variant on the active site of an enzyme.

PREFIX faldo: <http://biohackathon.org/resource/faldo#> PREFIX skos: <http://www.w3.org/2004/02/skos/core#> PREFIX taxon: <http://purl.uniprot.org/taxonomy/> PREFIX up: <http://purl.uniprot.org/core/> SELECT DISTINCT ?disease WHERE { ?protein a up:Protein ; up:organism taxon:9606 ; up:annotation ?disease_annotation, ?active_site_annotation, ?natural_variant_annotation . { ?protein up:enzyme [] . } UNION { ?protein up:annotation/a up:Catalytic_Activity_Annotation . } ?disease_annotation a up:Disease_Annotation ; up:disease ?disease . ?active_site_annotation a up:Active_Site_Annotation ; up:range ?active_site_range . ?active_site_range faldo:begin ?active_site_begin . ?active_site_begin faldo:position ?active_site_position ; faldo:reference ?sequence . ?natural_variant_annotation a up:Natural_Variant_Annotation ; up:range ?natural_variant_range ; skos:related ?disease . ?natural_variant_range faldo:begin ?natural_variant_begin ; faldo:end ?natural_variant_end . ?natural_variant_begin faldo:position ?natural_variant_begin_position . ?natural_variant_end faldo:position ?natural_variant_end_position ; faldo:reference ?sequence . FILTER(?natural_variant_begin_position >= ?active_site_position && ?active_site_position <= ?natural_variant_end_position) }Use

65_distinct_extinct_organisms_in_uniprotkb: How many distinct extinct organisms are represented in UniProtKB

PREFIX keywords: <http://purl.uniprot.org/keywords/> PREFIX up: <http://purl.uniprot.org/core/> SELECT ?taxon (SAMPLE(?name) AS ?anName) (COUNT(DISTINCT ?protein) AS ?entriesPerExtinctTaxon) WHERE { GRAPH<http://sparql.uniprot.org/taxonomy>{ ?taxon a up:Taxon ; up:scientificName ?name . } ?protein up:organism ?taxon ; up:classifiedWith keywords:952 . } GROUP BY ?taxon ORDER BY ?taxonUse