1: Select all taxa from the UniProt taxonomy

 
1
PREFIX up: <http://purl.uniprot.org/core/>
2
SELECT ?taxon
3
FROM <http://sparql.uniprot.org/taxonomy>
4
WHERE
5
{
6
    ?taxon a up:Taxon .
7
}
Use

2: Select all bacterial taxa and their scientific name from the UniProt taxonomy

xxxxxxxxxx
11
 
1
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
2
PREFIX taxon: <http://purl.uniprot.org/taxonomy/>
3
PREFIX up: <http://purl.uniprot.org/core/>
4
SELECT ?taxon ?name
5
WHERE
6
{
7
    ?taxon a up:Taxon .
8
    ?taxon up:scientificName ?name .
9
    # Taxon subclasses are materialized, do not use rdfs:subClassOf+
10
    ?taxon rdfs:subClassOf taxon:2 .
11
}
Use

3: Select all UniProtKB entries, and their organism and amino acid sequences (including isoforms), for E. coli K12 and all its strains

xxxxxxxxxx
14
 
1
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
2
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
3
PREFIX taxon: <http://purl.uniprot.org/taxonomy/>
4
PREFIX up: <http://purl.uniprot.org/core/>
5
SELECT ?protein ?organism ?isoform ?sequence
6
WHERE
7
{
8
    ?protein a up:Protein .
9
    ?protein up:organism ?organism .
10
    # Taxon subclasses are materialized, do not use rdfs:subClassOf+
11
    ?organism rdfs:subClassOf taxon:83333 .
12
    ?protein up:sequence ?isoform .
13
    ?isoform rdf:value ?sequence .
14
}
Use

4: Select the UniProtKB entry with the mnemonic 'A4_HUMAN'

xxxxxxxxxx
7
 
1
PREFIX up: <http://purl.uniprot.org/core/>
2
SELECT ?protein
3
WHERE
4
{
5
    ?protein a up:Protein .
6
    ?protein up:mnemonic 'A4_HUMAN'
7
}
Use

5: Select a mapping of UniProtKB to PDB entries using the UniProtKB cross-references to the PDB database

xxxxxxxxxx
9
 
1
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
2
PREFIX up: <http://purl.uniprot.org/core/>
3
SELECT ?protein ?db
4
WHERE
5
{
6
    ?protein a up:Protein .
7
    ?protein rdfs:seeAlso ?db .
8
    ?db up:database <http://purl.uniprot.org/database/PDB>
9
}
Use

6: Select all cross-references to external databases of the category '3D structure databases' of UniProtKB entries that are classified with the keyword 'Acetoin biosynthesis (KW-0005)'

xxxxxxxxxx
12
 
1
PREFIX keywords: <http://purl.uniprot.org/keywords/>
2
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
3
PREFIX up: <http://purl.uniprot.org/core/>
4
SELECT DISTINCT ?link
5
WHERE
6
{
7
    ?protein a up:Protein .
8
    ?protein up:classifiedWith keywords:5 .
9
    ?protein rdfs:seeAlso ?link .
10
    ?link up:database ?db .
11
    ?db up:category '3D structure databases'
12
}
Use

7: Select reviewed UniProtKB entries (Swiss-Prot), and their recommended protein name, that have a preferred gene name that contains the text 'DNA'

xxxxxxxxxx
13
 
1
PREFIX skos: <http://www.w3.org/2004/02/skos/core#>
2
PREFIX up: <http://purl.uniprot.org/core/>
3
SELECT ?protein ?name
4
WHERE
5
{
6
        ?protein a up:Protein .
7
        ?protein up:reviewed true .
8
        ?protein up:recommendedName ?recommended .
9
        ?recommended up:fullName ?name .
10
        ?protein up:encodedBy ?gene .
11
        ?gene skos:prefLabel ?text .
12
        FILTER CONTAINS(?text, 'DNA')
13
}
Use

8: Select the preferred gene name and disease annotation of all human UniProtKB entries that are known to be involved in a disease

xxxxxxxxxx
15
 
1
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
2
PREFIX skos: <http://www.w3.org/2004/02/skos/core#>
3
PREFIX taxon: <http://purl.uniprot.org/taxonomy/>
4
PREFIX up: <http://purl.uniprot.org/core/>
5
SELECT ?name ?text
6
WHERE
7
{
8
        ?protein a up:Protein . 
9
        ?protein up:organism taxon:9606 .
10
        ?protein up:encodedBy ?gene . 
11
        ?gene skos:prefLabel ?name .
12
        ?protein up:annotation ?annotation .
13
        ?annotation a up:Disease_Annotation .
14
        ?annotation rdfs:comment ?text
15
}
Use

9: Select all human UniProtKB entries with a sequence variant that leads to a 'loss of function'

xxxxxxxxxx
13
 
1
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
2
PREFIX taxon: <http://purl.uniprot.org/taxonomy/>
3
PREFIX up: <http://purl.uniprot.org/core/>
4
SELECT ?protein ?text
5
WHERE
6
{ 
7
        ?protein a up:Protein .
8
        ?protein up:organism taxon:9606 . 
9
        ?protein up:annotation ?annotation .
10
        ?annotation a up:Natural_Variant_Annotation . 
11
        ?annotation rdfs:comment ?text .
12
        FILTER (CONTAINS(?text, 'loss of function'))
13
}
Use

10: Select all human UniProtKB entries with a sequence variant that leads to a tyrosine to phenylalanine substitution

xxxxxxxxxx
21
 
1
PREFIX faldo: <http://biohackathon.org/resource/faldo#>
2
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
3
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
4
PREFIX taxon: <http://purl.uniprot.org/taxonomy/>
5
PREFIX up: <http://purl.uniprot.org/core/>
6
SELECT ?protein ?annotation ?begin ?text
7
WHERE
8
{
9
        ?protein a up:Protein ;
10
            up:organism taxon:9606 ; 
11
            up:annotation ?annotation .
12
        ?annotation a up:Natural_Variant_Annotation ;
13
            rdfs:comment ?text ;
14
            up:substitution ?substitution ;
15
            up:range/faldo:begin
16
                [ faldo:position ?begin ;
17
                  faldo:reference ?sequence ] .
18
        ?sequence rdf:value ?value .
19
        BIND (substr(?value, ?begin, 1) as ?original) .
20
        FILTER(?original = 'Y' && ?substitution = 'F') .
21
}
Use

11: Select all UniProtKB entries with annotated transmembrane regions and the regions' begin and end coordinates on the canonical sequence

xxxxxxxxxx
12
 
1
PREFIX faldo: <http://biohackathon.org/resource/faldo#>
2
PREFIX up: <http://purl.uniprot.org/core/>
3
SELECT ?protein ?begin ?end
4
WHERE 
5
{
6
    ?protein a up:Protein .
7
    ?protein up:annotation ?annotation .
8
    ?annotation a up:Transmembrane_Annotation .
9
    ?annotation up:range ?range .
10
    ?range faldo:begin/faldo:position ?begin .
11
    ?range faldo:end/faldo:position ?end
12
}
Use

12: Select all UniProtKB entries that were integrated on the 30th of November 2010

xxxxxxxxxx
8
 
1
PREFIX up: <http://purl.uniprot.org/core/>
2
PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>
3
SELECT ?protein
4
WHERE
5
{
6
    ?protein a up:Protein . 
7
    ?protein up:created '2010-11-30'^^xsd:date
8
}
Use

13: Was any UniProtKB entry integrated on the 9th of January 2013

xxxxxxxxxx
8
 
1
PREFIX up: <http://purl.uniprot.org/core/>
2
PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>
3
ASK
4
WHERE
5
{
6
    ?protein a up:Protein .
7
    ?protein up:created '2013-01-09'^^xsd:date
8
}
Use

14: Construct new triples of the type 'HumanProtein' from all human UniProtKB entries

xxxxxxxxxx
11
 
1
PREFIX taxon: <http://purl.uniprot.org/taxonomy/>
2
PREFIX up: <http://purl.uniprot.org/core/>
3
CONSTRUCT
4
{
5
    ?protein a up:HumanProtein .
6
}
7
WHERE
8
{
9
    ?protein a up:Protein .
10
    ?protein up:organism taxon:9606
11
}
Use

17: Select the average number of cross-references to the PDB database of UniProtKB entries that have at least one cross-reference to the PDB database

xxxxxxxxxx
14
 
1
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
2
PREFIX up: <http://purl.uniprot.org/core/>
3
SELECT (AVG(?linksToPdbPerEntry) AS ?avgLinksToPdbPerEntry)
4
WHERE
5
{
6
    SELECT ?protein (COUNT(DISTINCT ?db) AS ?linksToPdbPerEntry)
7
    WHERE
8
    {
9
        ?protein a up:Protein .
10
        ?protein rdfs:seeAlso ?db .
11
        ?db up:database <http://purl.uniprot.org/database/PDB> .
12
    }
13
    GROUP BY ?protein ORDER BY DESC(?linksToPdbPerEntry)
14
}
Use

18: Select the number of UniProtKB entries for each of the EC (Enzyme Commission) top level categories

xxxxxxxxxx
12
 
1
PREFIX ec: <http://purl.uniprot.org/enzyme/>
2
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
3
PREFIX up: <http://purl.uniprot.org/core/>
4
SELECT ?ecClass (COUNT(?protein) as ?size)
5
WHERE
6
{
7
    VALUES (?ecClass) {(ec:1.-.-.-) (ec:2.-.-.-) (ec:3.-.-.-) (ec:4.-.-.-) (ec:5.-.-.-) (ec:6.-.-.-) (ec:7.-.-.-)} .
8
    ?protein ( up:enzyme | up:domain/up:enzyme | up:component/up:enzyme ) ?enzyme .
9
    # Enzyme subclasses are materialized, do not use rdfs:subClassOf+
10
    ?enzyme rdfs:subClassOf ?ecClass .
11
}
12
GROUP BY ?ecClass ORDER BY ?ecClass
Use

19: Find all natural variant annotations if associated via an evidence tag to an article with a PubMed identifier

xxxxxxxxxx
19
 
1
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
2
PREFIX up: <http://purl.uniprot.org/core/>
3
SELECT 
4
    ?accession
5
    ?annotation_acc 
6
    ?pubmed
7
WHERE
8
{
9
        ?protein a up:Protein ;
10
            up:annotation ?annotation .
11
        ?annotation a up:Natural_Variant_Annotation .
12
        ?linkToEvidence rdf:object ?annotation ;
13
                        up:attribution ?attribution .
14
        ?attribution up:source ?source .
15
        ?source a up:Journal_Citation .
16
  BIND(SUBSTR(STR(?protein),33) AS ?accession)
17
  BIND(IF(CONTAINS(STR(?annotation), "#SIP"), SUBSTR(STR(?annotation),33), SUBSTR(STR(?annotation),36))AS?annotation_acc)
18
  BIND(SUBSTR(STR(?source),35) AS ?pubmed)
19
}
Use

20: Find how often an article in PubMed was used in an evidence tag in a human protein (ordered by most used to least)

xxxxxxxxxx
16
 
1
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
2
PREFIX taxon: <http://purl.uniprot.org/taxonomy/>
3
PREFIX up: <http://purl.uniprot.org/core/>
4
SELECT 
5
    ?source 
6
    (COUNT(?attribution) AS ?attribitions)
7
WHERE
8
{
9
        ?protein a up:Protein ;
10
            up:organism taxon:9606 ;
11
            up:annotation ?annotation .
12
        ?linkToEvidence rdf:object ?annotation ;
13
                        up:attribution ?attribution .
14
        ?attribution up:source ?source .
15
        ?source a up:Journal_Citation .
16
} GROUP BY ?source ORDER BY DESC(COUNT(?attribution))
Use

21: Find where disease related proteins are known to be located in the cell

xxxxxxxxxx
14
 
1
PREFIX skos: <http://www.w3.org/2004/02/skos/core#>
2
PREFIX up: <http://purl.uniprot.org/core/>
3
SELECT 
4
    ?protein 
5
    ?disease 
6
    ?location_inside_cell 
7
    ?cellcmpt
8
WHERE
9
{
10
    ?protein up:annotation ?diseaseAnnotation , ?subcellAnnotation .
11
    ?diseaseAnnotation up:disease/skos:prefLabel ?disease .
12
    ?subcellAnnotation up:locatedIn/up:cellularComponent ?cellcmpt .
13
    ?cellcmpt skos:prefLabel ?location_inside_cell .
14
}
Use

22: For two accession numbers (ACs) find the GO term labels and group them into GO process,function and component

xxxxxxxxxx
32
 
1
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
2
PREFIX up: <http://purl.uniprot.org/core/>
3
PREFIX GO:<http://purl.obolibrary.org/obo/GO_>
4
SELECT 
5
    (CONCAT(SUBSTR(STR(?protein), 33)) AS ?uniprot)
6
    (GROUP_CONCAT(?celtype; separator=";") AS ?celtypes)
7
    (GROUP_CONCAT(?biotype; separator=";") AS ?biotypes)
8
    (GROUP_CONCAT(?moltype; separator=";") AS ?moltypes)
9
WHERE
10
{
11
    VALUES (?ac) {("Q6GZX4") ("Q96375")}
12
    BIND (IRI(CONCAT("http://purl.uniprot.org/uniprot/",?ac)) AS ?protein)
13
    ?protein a up:Protein .
14
    ?protein up:classifiedWith ?goTerm .
15
    #Determine if the type is biological_process
16
    OPTIONAL {
17
        ?goTerm rdfs:subClassOf GO:0008150 .
18
        ?goTerm rdfs:label ?biotype .
19
    }
20
    #Determine if the type is cellular_component
21
    OPTIONAL {
22
        ?goTerm rdfs:subClassOf GO:0005575 .
23
        ?goTerm rdfs:label ?celtype .
24
    }
25
    #Determine if the type is molecular_function
26
    OPTIONAL {
27
        ?goTerm rdfs:subClassOf GO:0003674 .
28
        ?goTerm rdfs:label ?moltype .
29
    }
30
    #Filter out the uniprot keywords
31
    FILTER(bound(?biotype) || bound(?celtype) || bound(?moltype))
32
} GROUP BY ?protein
Use

23: Number of reviewed entries (UniProtKB/Swiss-Prot) that are related to kinase activity

xxxxxxxxxx
14
 
1
PREFIX GO: <http://purl.obolibrary.org/obo/GO_>
2
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
3
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
4
PREFIX taxon: <http://purl.uniprot.org/taxonomy/>
5
PREFIX up: <http://purl.uniprot.org/core/>
6
SELECT
7
    (COUNT(DISTINCT(?protein)) AS ?pc)
8
WHERE
9
{   
10
    ?protein rdf:type up:Protein ;
11
        up:reviewed true  ;
12
        up:organism taxon:9606 ;
13
        up:classifiedWith|(up:classifiedWith/rdfs:subClassOf) GO:0016301 .
14
}
Use

24: Find the release number of the UniProt data that is currently being queried

xxxxxxxxxx
6
 
1
SELECT ?version
2
FROM <https://sparql.uniprot.org/.well-known/void>
3
WHERE
4
{
5
    [] <http://purl.org/pav/version> ?version
6
}
Use

25: Find UniProtKB entry which has a protein name 'HLA class I histocompatibility antigen, B-73 alpha chain'

xxxxxxxxxx
10
 
1
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
2
PREFIX up: <http://purl.uniprot.org/core/>
3
SELECT ?protein ?anyKindOfName
4
WHERE
5
{
6
        ?protein a up:Protein .
7
        ?protein (up:recommendedName|up:alternativeName) ?structuredName .
8
        ?structuredName ?anyKindOfName  "HLA class I histocompatibility antigen, B alpha chain" .
9
        ?anyKindOfName rdfs:subPropertyOf up:structuredNameType .
10
}
Use

26: Find UniProtKB proteins which are cleaved and contain a domain or component named 'HLA class I histocompatibility antigen, B-73 alpha chain'

xxxxxxxxxx
10
 
1
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
2
PREFIX up: <http://purl.uniprot.org/core/>
3
SELECT ?protein ?anyKindOfName
4
WHERE
5
{
6
        ?protein a up:Protein .
7
        ?protein (up:recommendedName|up:alternativeName)|((up:domain|up:component)/(up:recommendedName|up:alternativeName)) ?structuredName .
8
        ?structuredName ?anyKindOfName  "HLA class I histocompatibility antigen, B-73 alpha chain" .
9
        ?anyKindOfName rdfs:subPropertyOf up:structuredNameType .
10
}
Use

26: Find UniProtKB entry, or an UniProtKB entries domain or component which has a name 'HLA class I histocompatibility antigen, B-73 alpha chain'

xxxxxxxxxx
10
 
1
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
2
PREFIX up: <http://purl.uniprot.org/core/>
3
SELECT ?protein ?anyKindOfName
4
WHERE
5
{
6
        ?protein a up:Protein .
7
        ?protein (up:recommendedName|up:alternativeName)|((up:domain|up:component)/(up:recommendedName|up:alternativeName)) ?structuredName .
8
        ?structuredName ?anyKindOfName  "HLA class I histocompatibility antigen, B-73 alpha chain" .
9
        ?anyKindOfName rdfs:subPropertyOf up:structuredNameType .
10
}
Use

27: Retrieve all protein names, including peptide names, associated with UniProtKB entry P05067.

xxxxxxxxxx
23
 
1
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
2
PREFIX up: <http://purl.uniprot.org/core/>
3
SELECT
4
  ?protein
5
  ?anyKindOfName 
6
  ?names 
7
  ?partType
8
WHERE
9
{
10
  BIND(<http://purl.uniprot.org/uniprot/P05067> AS ?protein)
11
  ?protein a up:Protein .
12
  {
13
    ?protein (up:recommendedName|up:alternativeName) ?structuredName .
14
  }
15
    UNION
16
  {
17
    VALUES(?partType){(up:domain) (up:component)}
18
    ?protein ?partType ?part .
19
    ?part (up:recommendedName|up:alternativeName) ?structuredName .
20
  }
21
  ?structuredName ?anyKindOfName  ?names .
22
  ?anyKindOfName rdfs:subPropertyOf up:structuredNameType .
23
}
Use