41: Retrieve UniProtKB entries with sequences that are composed of fragments

 
1
PREFIX up: <http://purl.uniprot.org/core/>
2
SELECT DISTINCT 
3
  ?protein
4
WHERE {
5
  ?protein a up:Protein ;
6
    up:sequence ?sequence .
7
  MINUS { ?sequence up:fragment [] }
8
}
Use

42: Connect patents cited in UniProtKB with those in the patent database at EPO via publication number.

xxxxxxxxxx
14
 
1
PREFIX patent: <http://data.epo.org/linked-data/def/patent/>
2
PREFIX skos: <http://www.w3.org/2004/02/skos/core#>
3
PREFIX up: <http://purl.uniprot.org/core/>
4
SELECT ?citation ?patent ?application ?applicationNo
5
WHERE
6
{
7
  ?citation a up:Patent_Citation ;
8
    skos:exactMatch ?patent .
9
  FILTER(CONTAINS(STR(?patent), 'EP'))
10
  BIND(SUBSTR(STR(?patent), 35) AS ?applicationNo)
11
  SERVICE <https://data.epo.org/linked-data/query>{
12
    ?application patent:publicationNumber ?applicationNo
13
  }
14
}
Use

43: Connect patents cited in UniProtKB with those in the patent database at EPO via publication number, whose grant date is more than twenty years in the past.

xxxxxxxxxx
19
 
1
PREFIX patent: <http://data.epo.org/linked-data/def/patent/>
2
PREFIX skos: <http://www.w3.org/2004/02/skos/core#>
3
PREFIX up: <http://purl.uniprot.org/core/>
4
SELECT ?grantDate ?patent ?application ?applicationNo
5
WHERE
6
{
7
    ?citation a up:Patent_Citation ;
8
  skos:exactMatch ?patent .
9
  BIND(SUBSTR(STR(?patent), 35) AS ?applicationNo)
10
  BIND(SUBSTR(STR(?patent), 33, 2) AS ?countryCode)
11
  SERVICE <https://data.epo.org/linked-data/query>{
12
    ?publication patent:publicationNumber ?applicationNo ;
13
      patent:application ?application .
14
    ?application patent:grantDate ?grantDate .
15
  }
16
  BIND((year(now()) - 20) AS ?thisYearMinusTwenty)
17
  BIND(year(?grantDate) AS ?grantYear)
18
  FILTER(?grantYear < ?thisYearMinusTwenty)
19
} ORDER BY ?grantYear
Use

44: Find the Rhea and InterPro combinations in UniProtKB entries.

xxxxxxxxxx
14
 
1
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
2
PREFIX up: <http://purl.uniprot.org/core/>
3
SELECT 
4
    ?interpro
5
    ?rhea
6
FROM <http://sparql.uniprot.org/uniprot>
7
WHERE 
8
{
9
  ?protein up:reviewed true .
10
  ?protein up:annotation ?annotation .
11
  ?annotation up:catalyticActivity ?rhea .
12
  ?protein rdfs:seeAlso ?interpro .
13
  ?interpro up:database <http://purl.uniprot.org/database/InterPro> .
14
} ORDER BY ?rhea
Use

45: Retrieve drugs that target human enzymes involved in sterol metabolism (federated query with WikiData and Rhea).

xxxxxxxxxx
52
 
1
PREFIX CHEBI: <http://purl.obolibrary.org/obo/CHEBI_>
2
PREFIX chebihash: <http://purl.obolibrary.org/obo/chebi#>
3
PREFIX owl: <http://www.w3.org/2002/07/owl#>
4
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
5
PREFIX rh: <http://rdf.rhea-db.org/>
6
PREFIX taxon: <http://purl.uniprot.org/taxonomy/>
7
PREFIX uniprotkb: <http://purl.uniprot.org/uniprot/>
8
PREFIX up: <http://purl.uniprot.org/core/>
9
PREFIX wdt: <http://www.wikidata.org/prop/direct/>
10
SELECT DISTINCT ?protein ?proteinFullName ?wikiChemical ?wikiChemicalLabel ?medicalConditionTreatedLabel
11
WHERE {
12
  # ChEBI: retrieve members of the ChEBI class ChEBI:15889 (sterol)
13
  # Rhea: retrieve the reactions involving these ChEBI as participants
14
  SERVICE <https://sparql.rhea-db.org/sparql> {
15
    ?reaction rdfs:subClassOf rh:Reaction ;
16
      rh:status rh:Approved ;
17
      rh:side ?reactionSide .
18
    ?reactionSide
19
      rh:contains ?participant .
20
    ?participant rh:compound ?compound
21
    {
22
      ?compound rh:chebi ?chebi .
23
      ?chebi (rdfs:subClassOf)+ CHEBI:15889
24
    } UNION {
25
      ?compound rh:chebi ?chebi .
26
      ?chebi2 rdfs:subClassOf ?chebiRestriction .
27
      ?chebiRestriction
28
        a owl:Restriction ;
29
        owl:onProperty chebihash:has_major_microspecies_at_pH_7_3 ;
30
        owl:someValuesFrom ?chebi .
31
      ?chebi2 (rdfs:subClassOf)+ CHEBI:15889
32
    }
33
  }
34
  # UniProt: retrieve the human (taxid:9606) enzymes catalyzing these Rhea reactions
35
  ?ca up:catalyzedReaction  ?reaction .
36
  ?a up:catalyticActivity  ?ca .
37
  ?proteinIRI  up:annotation ?a ;
38
    up:organism taxon:9606 ;
39
    up:recommendedName ?proteinRecName .
40
  ?proteinRecName up:fullName ?proteinFullName .
41
  # Find drugs in wikidata that interact with the UniProt Proteins
42
  BIND (SUBSTR(STR(?proteinIRI), STRLEN(STR(uniprotkb:))+1) AS ?protein)
43
  SERVICE <https://query.wikidata.org/sparql> {
44
    ?wp wdt:P352  ?protein .
45
    ?wikiChemical wdt:P129 ?wp . # Physically interacts with
46
    ?wikiChemical rdfs:label ?wikiChemicalLabel .
47
    ?wikiChemical wdt:P2175 ?wmc . # Medical conndition treated
48
    ?wmc rdfs:label ?medicalConditionTreatedLabel .
49
    FILTER(lang(?medicalConditionTreatedLabel) = 'en')
50
    FILTER(lang(?wikiChemicalLabel) = 'en')
51
  }
52
}
Use

46: Retrieve images of 'Anas' (Ducks) from the European Environmental Agency databases (federated query).

xxxxxxxxxx
27
 
1
PREFIX eunisSpecies: <http://eunis.eea.europa.eu/rdf/species-schema.rdf#>
2
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
3
PREFIX taxon: <http://purl.uniprot.org/taxonomy/>
4
PREFIX up: <http://purl.uniprot.org/core/>
5
SELECT 
6
    ?taxon
7
    ?ncbiTaxid
8
    ?eunisTaxon
9
    ?eunisName 
10
    ?image
11
WHERE
12
{
13
    GRAPH <http://sparql.uniprot.org/taxonomy>
14
    {
15
        ?taxon a up:Taxon .
16
        # Taxon subclasses are materialized, do not use rdfs:subClassOf+
17
        ?taxon rdfs:subClassOf taxon:8835 .
18
        BIND(strafter(str(?taxon), 'onomy/') AS ?ncbiTaxid)
19
    }
20
    SERVICE <https://semantic.eea.europa.eu/sparql>
21
    {
22
        ?eunisTaxon a eunisSpecies:SpeciesSynonym ;
23
           eunisSpecies:binomialName ?eunisName ;
24
           eunisSpecies:sameSpeciesNCBI ?ncbiTaxid ;
25
           <http://xmlns.com/foaf/0.1/depiction> ?image .
26
    }
27
}
Use

47: Find UniProtKB entries with a transmembrane region, with an alanine in the 15 amino acid region preceding the transmembrane

xxxxxxxxxx
24
 
1
PREFIX faldo: <http://biohackathon.org/resource/faldo#>
2
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
3
PREFIX up: <http://purl.uniprot.org/core/>
4
SELECT ?protein ?from ?interestingRegion
5
WHERE
6
{
7
  ?protein up:annotation ?annotation .
8
  ?annotation a up:Transmembrane_Annotation .
9
  # Get the coordinates of the Transmembrane
10
  ?annotation up:range ?range .
11
  ?range faldo:begin ?beginI .
12
  ?beginI faldo:position ?begin .
13
  ?beginI faldo:reference ?sequence .
14
  # The aas will have the specific IUPAC aminoacids
15
  ?sequence rdf:value ?aas .
16
  # We calculate the start by substracting 10
17
  BIND(?begin - 10 AS ?tenBeforeBegin)
18
  # Can't start before the sequence starts or we might miss some results
19
  BIND(IF(?tenBeforeBegin < 1, 0, ?tenBeforeBegin) AS ?from)
20
  # Substring the IUPAC aminoacids
21
  BIND(SUBSTR(?aas, ?from, 15) AS ?interestingRegion)
22
  # The interestingRegion needds to contain an Alanine
23
  FILTER(CONTAINS(?interestingRegion, 'A'))
24
}
Use

48: Retrieve glycosylation sites and glycans on human enzymes (federated with Glyconnect)

xxxxxxxxxx
23
 
1
PREFIX faldo: <http://biohackathon.org/resource/faldo#>
2
PREFIX foaf: <http://xmlns.com/foaf/0.1/>
3
PREFIX glycan: <http://purl.jp/bio/12/glyco/glycan#>
4
PREFIX taxon: <http://purl.uniprot.org/taxonomy/>
5
PREFIX up: <http://purl.uniprot.org/core/>
6
SELECT
7
        DISTINCT
8
            ?protein ?glycosite ?glycostructure ?glycoimage
9
WHERE{
10
  ?protein up:annotation ?annotation .
11
  ?protein up:organism taxon:9606 .
12
  ?annotation a up:Catalytic_Activity_Annotation .
13
  ?protein up:sequence ?isoform .
14
   
15
  SERVICE <https://glyconnect.expasy.org/sparql> {
16
    ?glycosite faldo:reference ?isoform .
17
    ?glycosite faldo:position ?position .
18
    ?specificglycosite faldo:location ?glycosite .
19
    ?glycoprotein glycan:glycosylated_at ?specificglycosite .
20
    ?glycostructure glycan:glycosylates_at ?specificglycosite .
21
    ?glycostructure foaf:depiction ?glycoimage .
22
  }
23
}
Use

49: Retrieve the UniProtKB proteins, their catalyzed Rhea reactions, their encoding genes (Ensembl) and the anatomic entities where the genes are expressed (UBERON anatomic entites from Bgee expression data resource).

xxxxxxxxxx
38
 
1
PREFIX CHEBI: <http://purl.obolibrary.org/obo/CHEBI_>
2
PREFIX genex: <http://purl.org/genex#>
3
PREFIX lscr: <http://purl.org/lscr#>
4
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
5
PREFIX rh: <http://rdf.rhea-db.org/>
6
PREFIX taxon: <http://purl.uniprot.org/taxonomy/>
7
PREFIX up: <http://purl.uniprot.org/core/>
8
SELECT DISTINCT ?protein ?ensemblGene ?reaction ?anatomicEntityLabel ?anatomicEntity
9
WHERE {
10
  # federated query to Rhea enadpoint
11
  {
12
    SELECT DISTINCT ?reaction WHERE {
13
      SERVICE <https://sparql.rhea-db.org/sparql> {
14
        ?reaction rdfs:subClassOf rh:Reaction ;
15
          rh:equation ?reactionEquation ;
16
          rh:side ?reactionSide .
17
        ?reactionSide rh:contains ?participant .
18
        ?participant rh:compound ?compound .
19
        # compound constraint (CHEBI:16113 == cholesterol)
20
        ?compound rh:chebi CHEBI:16113 .
21
      }
22
    }
23
  }
24
  # taxonomy constraint (taxon:9606 == Homo sapiens)
25
  ?protein up:organism taxon:9606 ;
26
    up:annotation ?a ;
27
    rdfs:seeAlso / up:transcribedFrom ?ensemblGene .
28
  ?a a up:Catalytic_Activity_Annotation ;
29
    up:catalyticActivity ?ca .
30
  ?ca up:catalyzedReaction ?reaction .
31
  # federated query to Bgee (expression data)
32
  BIND(IRI(REPLACE(STR(?ensemblGene), "\\.[0-9]+$", "")) AS ?ensemblGeneNoVersion)
33
  SERVICE <https://www.bgee.org/sparql> {
34
    ?gene lscr:xrefEnsemblGene ?ensemblGeneNoVersion ;
35
      genex:isExpressedIn ?anatomicEntity .
36
    ?anatomicEntity rdfs:label ?anatomicEntityLabel .
37
  }
38
}
Use

50: Where are the human genes encoding enzymes metabolizing N-acyl sphingosines expressed in the human body (federated query, with Rhea and Bgee)

xxxxxxxxxx
36
 
1
PREFIX CHEBI: <http://purl.obolibrary.org/obo/CHEBI_>
2
PREFIX genex: <http://purl.org/genex#>
3
PREFIX lscr: <http://purl.org/lscr#>
4
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
5
PREFIX rh: <http://rdf.rhea-db.org/>
6
PREFIX taxon: <http://purl.uniprot.org/taxonomy/>
7
PREFIX up: <http://purl.uniprot.org/core/>
8
SELECT
9
  DISTINCT
10
    ?chebi
11
    ?reaction
12
    ?protein ?ensemblGene
13
    ?anatomicEntityLabel
14
    ?anatomicEntity
15
WHERE {
16
  SERVICE <https://sparql.rhea-db.org/sparql> {
17
    ?reaction rdfs:subClassOf rh:Reaction .
18
    ?reaction rh:equation ?reactionEquation .
19
    ?reaction rh:side ?reactionSide .
20
    ?reactionSide rh:contains ?participant .
21
    ?participant rh:compound ?compound .
22
    ?compound rh:chebi ?chebi .
23
    ?chebi rdfs:subClassOf* CHEBI:52639
24
}
25
  ?protein up:organism taxon:9606 .
26
  ?protein up:annotation ?a .
27
  ?a a up:Catalytic_Activity_Annotation .
28
  ?a up:catalyticActivity ?ca .
29
  ?ca up:catalyzedReaction ?reaction .
30
  ?protein rdfs:seeAlso / up:transcribedFrom ?ensemblGene .
31
  SERVICE  <https://www.bgee.org/sparql> {
32
    ?gene genex:isExpressedIn ?anatomicEntity .
33
    ?gene lscr:xrefEnsemblGene ?ensemblGene .
34
    ?anatomicEntity rdfs:label ?anatomicEntityLabel .
35
  }
36
}
Use

51: Find all proteins linked to arachidonate (CHEBI:32395)

xxxxxxxxxx
24
 
1
PREFIX CHEBI: <http://purl.obolibrary.org/obo/CHEBI_>
2
PREFIX rh: <http://rdf.rhea-db.org/>
3
PREFIX skos: <http://www.w3.org/2004/02/skos/core#>
4
PREFIX up: <http://purl.uniprot.org/core/>
5
SELECT 
6
    DISTINCT
7
        ?uniprot
8
        ?uniprotID
9
        ?recname
10
        ?gene
11
        ?chebi
12
        ?uniprotName
13
WHERE {
14
  SERVICE <https://sparql.rhea-db.org/sparql> {
15
     VALUES (?chebi) { (CHEBI:32395) }
16
     ?rhea rh:side/rh:contains/rh:compound ?compound .
17
     ?compound rh:chebi ?chebi .
18
     ?chebi up:name ?uniprotName .
19
  }
20
  ?uniprot up:annotation/up:catalyticActivity/up:catalyzedReaction ?rhea .
21
  ?uniprot up:mnemonic ?uniprotID .
22
  ?uniprot up:recommendedName/up:fullName ?recname .
23
  OPTIONAL {?uniprot up:encodedBy/skos:prefLabel ?gene .}
24
}
Use

52: Retrieve drugs that target human enzymes involved in sterol metabolism (federated query with Rhea and ChEMBL via IDSM/Elixir czech republic).

xxxxxxxxxx
69
 
1
PREFIX CHEBI: <http://purl.obolibrary.org/obo/CHEBI_>
2
PREFIX cco: <http://rdf.ebi.ac.uk/terms/chembl#>
3
PREFIX chebihash: <http://purl.obolibrary.org/obo/chebi#>
4
PREFIX owl: <http://www.w3.org/2002/07/owl#>
5
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
6
PREFIX rh: <http://rdf.rhea-db.org/>
7
PREFIX skos: <http://www.w3.org/2004/02/skos/core#>
8
PREFIX taxon: <http://purl.uniprot.org/taxonomy/>
9
PREFIX up: <http://purl.uniprot.org/core/>
10
SELECT
11
        DISTINCT
12
            ?protein
13
            ?proteinFullName
14
            ?activityType
15
            ?standardActivityValue
16
            ?standardActivityUnit
17
            ?chemblMolecule
18
            ?chemlbMoleculePrefLabel
19
WHERE
20
  {
21
  # ChEBI: retrieve members of the ChEBI class ChEBI:15889 (sterol)
22
  # Rhea: retrieve the reactions involving these ChEBI as participants
23
  SERVICE <https://sparql.rhea-db.org/sparql> {
24
    ?reaction rdfs:subClassOf  rh:Reaction ;
25
          rh:status        rh:Approved ;
26
          rh:side          ?reactionSide .
27
    ?reactionSide
28
          rh:contains      ?participant .
29
    ?participant rh:compound  ?compound
30
    {
31
      ?compound  rh:chebi  ?chebi .
32
      ?chebi (rdfs:subClassOf)+ CHEBI:15889
33
    } UNION {
34
      ?compound  rh:chebi           ?chebi .
35
      ?chebi2   rdfs:subClassOf     ?chebiRestriction .
36
      ?chebiRestriction
37
        a           owl:Restriction ;
38
        owl:onProperty      chebihash:has_major_microspecies_at_pH_7_3 ;
39
        owl:someValuesFrom  ?chebi .
40
      ?chebi2 (rdfs:subClassOf)+ CHEBI:15889
41
    }
42
  }
43
  # UniProt: retrieve the human (taxid:9606) enzymes catalyzing these Rhea reactions
44
  ?ca       up:catalyzedReaction  ?reaction .
45
  ?a        up:catalyticActivity  ?ca .
46
  ?protein  up:annotation         ?a ;
47
        up:organism           taxon:9606 ;
48
        up:recommendedName    ?proteinRecName .
49
  ?proteinRecName
50
        up:fullName           ?proteinFullName .
51
  # Find drugs in wikidata that interact with the UniProt Proteins
52
  # ChEMBL: retrieve the corresponding targets and with drugs in clinical phase 4
53
  # Via https://idsm.elixir-czech.cz/sparql/
54
  SERVICE <https://idsm.elixir-czech.cz/sparql/endpoint/idsm> {
55
    ?activity a cco:Activity ;
56
      cco:hasMolecule ?chemblMolecule ;
57
      cco:hasAssay ?assay ;
58
      cco:standardType ?activityType ;
59
      cco:standardValue ?standardActivityValue ;
60
      cco:standardUnits ?standardActivityUnit .
61
    ?chemblMolecule cco:highestDevelopmentPhase ?highestDevelopmentPhase ;
62
      rdfs:label ?chemblMoleculeLabel ;
63
      skos:prefLabel ?chemlbMoleculePrefLabel .
64
    FILTER (?highestDevelopmentPhase > 3)
65
    ?assay cco:hasTarget ?target .
66
    ?target cco:hasTargetComponent/cco:targetCmptXref ?protein .
67
    ?protein a cco:UniprotRef .
68
  }
69
}
Use

53: Find mouse homologs in OMABrowser of human enzymes that catalyze reactions involving sterols (CHEBI:15889). Federating with Rhea and OMABrowser.

xxxxxxxxxx
37
 
1
PREFIX CHEBI: <http://purl.obolibrary.org/obo/CHEBI_>
2
PREFIX lscr: <http://purl.org/lscr#>
3
PREFIX orth: <http://purl.org/net/orth#>
4
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
5
PREFIX rh: <http://rdf.rhea-db.org/>
6
PREFIX taxon: <http://purl.uniprot.org/taxonomy/>
7
PREFIX up: <http://purl.uniprot.org/core/>
8
SELECT
9
  DISTINCT
10
    ?chebi
11
    ?reaction
12
    ?humanProtein
13
    ?mouseProtein
14
    ?cluster 
15
WHERE {
16
  SERVICE <https://sparql.rhea-db.org/sparql> {
17
    ?reaction rdfs:subClassOf rh:Reaction .
18
    ?reaction rh:side/rh:contains/rh:compound ?compound .
19
    ?compound rh:chebi ?chebi .
20
    ?chebi rdfs:subClassOf* CHEBI:15889
21
  }
22
  ?humanProtein up:organism taxon:9606 .
23
  ?humanProtein up:annotation ?a .
24
  ?a a up:Catalytic_Activity_Annotation .
25
  ?a up:catalyticActivity ?ca .
26
  ?ca up:catalyzedReaction ?reaction .
27
  SERVICE  <https://sparql.omabrowser.org/sparql> {
28
    ?cluster a orth:ParalogsCluster .
29
    ?cluster orth:hasHomologousMember ?node1 , ?node2 .
30
    ?node1 orth:hasHomologousMember* ?orthoProtein1 .
31
    ?node2 orth:hasHomologousMember* ?orthoProtein2 .
32
    ?orthoProtein1 lscr:xrefUniprot ?mouseProtein .
33
    ?orthoProtein2 lscr:xrefUniprot ?humanProtein .
34
    # inTaxon mouse
35
    ?orthoProtein1 orth:organism/<http://purl.obolibrary.org/obo/RO_0002162> taxon:10090 . 
36
  }
37
}
Use

54: Proteins with binding sites for ligands similar to heme

xxxxxxxxxx
29
 
1
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
2
PREFIX sachem: <http://bioinfo.uochb.cas.cz/rdf/v1.0/sachem#>
3
PREFIX up: <http://purl.uniprot.org/core/>
4
PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>
5
SELECT 
6
  ?protein
7
  ?mnemonic
8
  ?proteinName
9
  ?ligandSimilarityScore
10
  ?ligand
11
WHERE {
12
  SERVICE <https://idsm.elixir-czech.cz/sparql/endpoint/chebi> {
13
    ?ssc sachem:compound ?ligand; 
14
      sachem:score ?ligandSimilarityScore ;
15
      sachem:similaritySearch ?sss .
16
        # Smiles of Heme
17
    ?sss    sachem:query "CC1=C(CCC([O-])=O)C2=[N+]3C1=Cc1c(C)c(C=C)c4C=C5C(C)=C(C=C)C6=[N+]5[Fe-]3(n14)n1c(=C6)c(C)c(CCC([O-])=O)c1=C2";
18
      sachem:cutoff "8e-1"^^xsd:double ;
19
      sachem:aromaticityMode sachem:aromaticityDetect ;
20
      sachem:similarityRadius 1 ;
21
      sachem:tautomerMode sachem:ignoreTautomers .
22
  }
23
  ?protein up:mnemonic ?mnemonic ;
24
    up:recommendedName/up:fullName ?proteinName ;
25
    up:annotation ?annotation .
26
  ?annotation a up:Binding_Site_Annotation ;
27
      up:ligand/rdfs:subClassOf ?ligand .
28
}
29
ORDER BY DESC(?ligandSimilarityScore)
Use

55: Number of proteins with binding sites for metals or metal sulfur clusters (and experimental evidence for the binding)

xxxxxxxxxx
24
 
1
PREFIX CHEBI: <http://purl.obolibrary.org/obo/CHEBI_>
2
PREFIX obo: <http://purl.obolibrary.org/obo/>
3
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
4
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
5
PREFIX up: <http://purl.uniprot.org/core/>
6
SELECT
7
  ?ligand
8
  ?ligandName 
9
  (COUNT(DISTINCT ?protein) as ?entries)
10
WHERE {
11
   ?protein up:annotation ?annotation .
12
   
13
   VALUES ?evs { obo:ECO_0000269 obo:ECO_0007744 } .
14
   VALUES ?chebids { CHEBI:25213 CHEBI:25214 } .
15
   ?st rdf:subject ?protein ; 
16
       rdf:predicate up:annotation ; 
17
       rdf:object ?annotation ;
18
       up:attribution/up:evidence ?evs .
19
   ?annotation up:ligand/rdfs:subClassOf ?ligand .
20
   ?ligand rdfs:subClassOf+ ?chebids ;
21
     rdfs:label ?ligandName .
22
}
23
GROUP BY ?ligand ?ligandName
24
ORDER BY DESC(?entries)
Use

56: Select enzymes that have ligands known to have an allosteric effect

xxxxxxxxxx
16
 
1
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
2
PREFIX up: <http://purl.uniprot.org/core/>
3
SELECT
4
  ?protein
5
  ?ligandName 
6
  ?ligandNote
7
  ?chebi
8
WHERE {
9
   ?protein up:annotation ?annotation .
10
   ?annotation a up:Binding_Site_Annotation . 
11
   ?annotation up:ligand ?ligand .
12
   ?ligand rdfs:comment ?ligandNote ;
13
     rdfs:subClassOf ?chebi ;
14
     rdfs:label ?ligandName .
15
   FILTER(REGEX(?ligandNote, "allosteric", "i"))
16
}
Use

57: Map a selection of PDB identifiers plus chains to UniProtKB

xxxxxxxxxx
20
 
1
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
2
PREFIX up: <http://purl.uniprot.org/core/>
3
SELECT
4
  ?pdbId ?chain ?pdbChain ?uniprot
5
WHERE
6
{
7
  # A space separated list of pairs of PDB identifiers and the chain code.
8
  VALUES(?pdbId ?pdbChain) { ('6VXC' 'A') ('1BG3' 'B') }
9
  # Make an IRI out of the pdbId
10
  BIND(iri(concat('http://rdf.wwpdb.org/pdb/', ?pdbId)) AS ?pdb)
11
  # Map to UniProt entries
12
  ?uniprot rdfs:seeAlso ?pdb .
13
  ?pdb up:database <http://purl.uniprot.org/database/PDB> ;
14
       up:chainSequenceMapping ?chainSm .
15
  ?chainSm up:chain ?chainsPlusRange .
16
  # Extract the list of chains from the text representation.
17
  BIND(STRBEFORE(?chainsPlusRange, '=') AS ?chain)
18
  # Filter those that match.
19
  FILTER(CONTAINS(?chain, ?pdbChain))
20
}
Use

58: Map a selection of UniProtKB accession numbers (ACs) to HGNC identifiers and symbols

xxxxxxxxxx
16
 
1
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
2
PREFIX uniprotkb: <http://purl.uniprot.org/uniprot/>
3
PREFIX up: <http://purl.uniprot.org/core/>
4
SELECT
5
  ?uniprot
6
  ?hgnc
7
  ?hgncSymbol
8
WHERE
9
{
10
  # A space separated list of UniProt primary accessions.
11
  VALUES (?acc) {('P05067') ('P00750')}
12
  BIND(iri(concat(str(uniprotkb:), ?acc)) AS ?uniprot)
13
  ?uniprot rdfs:seeAlso ?hgnc .
14
  ?hgnc up:database <http://purl.uniprot.org/database/HGNC> ;
15
       rdfs:comment ?hgncSymbol .
16
}
Use

59: Count all isoforms for a given proteome

xxxxxxxxxx
13
 
1
PREFIX skos: <http://www.w3.org/2004/02/skos/core#>
2
PREFIX taxon: <http://purl.uniprot.org/taxonomy/>
3
PREFIX up: <http://purl.uniprot.org/core/>
4
PREFIX proteome:<http://purl.uniprot.org/proteomes/>
5
SELECT
6
  (COUNT(DISTINCT ?sequence) AS ?allIsoforms)
7
WHERE
8
{
9
  ?protein up:reviewed true .
10
  ?protein up:organism taxon:9606 .
11
  ?protein up:sequence ?sequence .
12
  ?protein up:proteome/^skos:narrower proteome:UP000005640 .
13
}
Use

60: Find human proteins that catalyze reactions where substrates or product have a Cholestane skeleton

xxxxxxxxxx
35
 
1
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
2
PREFIX rh: <http://rdf.rhea-db.org/>
3
PREFIX sachem: <http://bioinfo.uochb.cas.cz/rdf/v1.0/sachem#>
4
PREFIX taxon: <http://purl.uniprot.org/taxonomy/>
5
PREFIX up: <http://purl.uniprot.org/core/>
6
SELECT
7
  DISTINCT
8
    ?protein
9
    ?disease
10
    ?rhea
11
    ?chebi
12
    ?omim
13
WHERE {
14
    # Find complete ChEBIs with a Cholestane skeleton, via the Czech Elixir node IDSM Sachem chemical substructure search.
15
    SERVICE <https://idsm.elixir-czech.cz/sparql/endpoint/chebi> {
16
      ?chebi sachem:substructureSearch [
17
        sachem:query
18
"[C@]12(CCC3CCCC[C@]3(C)[C@@]1([H])CC[C@]1(C)[C@@]([H])([C@@](C)([H])CCCC(C)C)CC[C@@]21[H])[H]"
19
].
20
   }
21
   # Use the fact that UniProt catalytic activities are annotated using Rhea 
22
   # Mapping the found ChEBIs to Rhea reactions
23
   SERVICE <https://sparql.rhea-db.org/sparql>{
24
     ?rhea rh:side/rh:contains/rh:compound/rdfs:subClassOf ?chebi .
25
   }
26
   # Match the found Rhea reactions with human UniProtKB proteins
27
   ?protein up:annotation/up:catalyticActivity/up:catalyzedReaction ?rhea .
28
   ?protein up:organism taxon:9606 .
29
   # Find only those human entries that have an annotated related disease, and optionaly map these to OMIM
30
   ?protein up:annotation/up:disease ?disease .
31
   OPTIONAL {
32
     ?disease rdfs:seeAlso ?omim .
33
     ?omim up:database <http://purl.uniprot.org/database/MIM>
34
   }
35
}
Use

61: Select the Gene Protein Reaction sets for Human (Ensembl Gene, Human UniProtKB, Catalyzed Rhea reactions)

xxxxxxxxxx
15
 
1
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
2
PREFIX taxon: <http://purl.uniprot.org/taxonomy/>
3
PREFIX up: <http://purl.uniprot.org/core/>
4
SELECT
5
?ensemblGene ?protein ?rhea
6
WHERE {
7
  ?protein up:reviewed true ;
8
           up:organism taxon:9606 .
9
  ?protein up:annotation ?caa ;
10
           rdfs:seeAlso ?ensemblTranscript .
11
  ?ensemblTranscript up:database <http://purl.uniprot.org/database/Ensembl> .
12
    ?caa up:catalyticActivity ?ca .
13
  ?ca up:catalyzedReaction ?rhea .
14
  ?ensemblTranscript up:transcribedFrom ?ensemblGene 
15
}
Use

100_uniprot_organelles_or_plasmids: List the proteins encoded by a gene that is located in an organelle other than the nucleus, or on a plasmid rather than a chromosome. In these cases the gene location is stored with encodedIn properties. Note that if a plasmid has several names, they are listed as multiple rdfs:label properties.

xxxxxxxxxx
13
 
1
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
2
PREFIX up: <http://purl.uniprot.org/core/>
3
SELECT 
4
    ?protein 
5
    ?plasmidOrOrganelle
6
    ?label
7
WHERE {
8
    ?protein a up:Protein ;
9
      up:encodedIn ?plasmidOrOrganelle .
10
    OPTIONAL {
11
        ?plasmidOrOrganelle rdfs:label ?label .
12
    }
13
}
Use

101_uniprot_potential_isoforms: List all human UniProtKB entries and their computationaly mapped potential isoforms.

xxxxxxxxxx
14
 
1
PREFIX taxon: <http://purl.uniprot.org/taxonomy/>
2
PREFIX up: <http://purl.uniprot.org/core/>
3
SELECT ?entry ?sequence ?isCanonical
4
WHERE {
5
  # We don't want to look into the UniParc graph which will 
6
  # confuse matters
7
  GRAPH <http://sparql.uniprot.org/uniprot> {
8
      # we need the UniProt entries that are human
9
      ?entry a up:Protein ;
10
        up:organism taxon:9606 ;
11
      # and we select the computationally mapped sequences
12
        up:potentialSequence ?sequence .
13
  }
14
}
Use

102_uniprot_primary_accession: Extracting an UniProtKB primary accession from our IRIs. Is done with a bit of string manipulation. While UniProt primary accession are unique within UniProtKB they may be reused by accident or itentionally by other data sources. If we provided them as strings (not IRI) and if you used them in a query that way, you might accidentaly retrieve completely wrong records.

xxxxxxxxxx
9
 
1
PREFIX uniprotkb: <http://purl.uniprot.org/uniprot/>
2
PREFIX up: <http://purl.uniprot.org/core/>
3
SELECT
4
  ?primaryAccession
5
  ?protein
6
WHERE {
7
  ?protein a up:Protein .
8
  BIND(substr(str(?protein), strlen(str(uniprotkb:))+1) AS ?primaryAccession)
9
}
Use

103_uniprot_proteome_location_of_gene: List UniProtKB proteins with genetic replicon that they are encoded on using the Proteome data.

xxxxxxxxxx
16
 
1
PREFIX taxon: <http://purl.uniprot.org/taxonomy/>
2
PREFIX up: <http://purl.uniprot.org/core/>
3
SELECT
4
  DISTINCT
5
    ?proteomeData
6
    ?replicon
7
    ?proteome  
8
WHERE {
9
  # reviewed entries (UniProtKB/Swiss-Prot)
10
  ?protein up:reviewed true . 
11
  # restricted to Human taxid
12
  ?uniprot up:organism taxon:9606 . 
13
  ?uniprot up:proteome ?proteomeData .
14
  BIND( strbefore( str(?proteomeData), "#" ) as ?proteome )
15
  BIND( strafter( str(?proteomeData), "#" ) as ?replicon )
16
}
Use

104_uniprot_recomended_protein_full_name: The recommended protein full names for UniProtKB entries

xxxxxxxxxx
8
 
1
PREFIX up: <http://purl.uniprot.org/core/>
2
SELECT ?protein
3
  ?fullName
4
WHERE {
5
  ?protein a up:Protein ;
6
           up:recommendedName ?recommendedName .
7
  ?recommendedName up:fullName ?fullName .
8
}
Use