Skip HeaderExamples
- Find all Natural Variant Annotations if associated via an evidence tag to an article with a pubmed identifierSELECT
?accession
?annotation_acc
?pubmed
WHERE
{
?protein a up:Protein ;
up:annotation ?annotation .
?annotation a up:Natural_Variant_Annotation .
?linkToEvidence rdf:object ?annotation ;
up:attribution ?attribution .
?attribution up:source ?source .
?source a up:Journal_Citation .
BIND(SUBSTR(STR(?protein),33) AS ?accession)
BIND(IF(CONTAINS(STR(?annotation), "#SIP"), SUBSTR(STR(?annotation),33), SUBSTR(STR(?annotation),36))AS?annotation_acc)
BIND(SUBSTR(STR(?source),35) AS ?pubmed)
}
- Find how often an article in pubmed was used in an evidence tag (ordered by most used to least)SELECT
?source
(COUNT(?attribution) AS ?attribitions)
WHERE
{
?protein a up:Protein ;
up:annotation ?annotation .
?linkToEvidence rdf:object ?annotation ;
up:attribution ?attribution .
?attribution up:source ?source .
?source a up:Journal_Citation .
} GROUP BY ?source ORDER BY DESC(COUNT(?attribution))
- Find where disease related proteins are known to be located in the cellSELECT
?protein
?disease
?location_inside_cell
?cellcmpt
WHERE
{
?protein up:annotation ?diseaseAnnotation , ?subcellAnnotation .
?diseaseAnnotation up:disease/skos:prefLabel ?disease .
?subcellAnnotation up:locatedIn/up:cellularComponent ?cellcmpt .
?cellcmpt skos:prefLabel ?location_inside_cell .
}
- For two accessions find the GO term labels and group them into GO process,function and componentSELECT
(CONCAT(SUBSTR(STR(?protein), 33)) AS ?uniprot)
(GROUP_CONCAT(?celtype; separator=";") AS ?celtypes)
(GROUP_CONCAT(?biotype; separator=";") AS ?biotypes)
(GROUP_CONCAT(?moltype; separator=";") AS ?moltypes)
WHERE
{
VALUES (?ac) {("Q6GZX4") ("Q96375")}
BIND (IRI(CONCAT("http://purl.uniprot.org/uniprot/",?ac)) AS ?protein)
?protein a up:Protein .
?protein up:classifiedWith ?goTerm .
#Determine if the type is biological_process
OPTIONAL {
?goTerm rdfs:subClassOf <http://purl.obolibrary.org/obo/GO_0008150>.
?goTerm rdfs:label ?biotype .
}
#Determine if the type is cellular_component
OPTIONAL {
?goTerm rdfs:subClassOf <http://purl.obolibrary.org/obo/GO_0005575>.
?goTerm rdfs:label ?celtype .
}
#Determine if the type is molecular_function
OPTIONAL {
?goTerm rdfs:subClassOf <http://purl.obolibrary.org/obo/GO_0003674> .
?goTerm rdfs:label ?moltype .
}
#Filter out the uniprot keywords
FILTER(bound(?biotype) || bound(?celtype) || bound(?moltype))
} GROUP BY ?protein
- Number of reviewed entries (Swiss-Prot) that are related to kinase activitySELECT
(COUNT(DISTINCT(?protein)) AS ?pc)
WHERE
{
?protein rdf:type up:Protein ;
up:reviewed true ;
up:organism taxon:9606 ;
up:classifiedWith|(up:classifiedWith/rdfs:subClassOf) GO:0016301 .
}
- Find the release number of the uniprot data that is currently being queriedSELECT
?version
FROM <http://sparql.uniprot.org/.well-known/void>
WHERE
{
[] <http://purl.org/pav/2.0/version> ?version
}
- Find any uniprot entry which has a name 'HLA class I histocompatibility antigen, B-73 alpha chain'SELECT ?protein ?anyKindOfName
WHERE
{
?protein a up:Protein .
?protein (up:recommendedName|up:alternativeName) ?structuredName .
?structuredName ?anyKindOfName "HLA class I histocompatibility antigen, B-73 alpha chain" .
?anyKindOfName rdfs:subPropertyOf up:structuredNameType .
}
- Find any uniprot entry, or an uniprot entries domain or component which has a name 'HLA class I histocompatibility antigen, B-73 alpha chain'SELECT ?protein ?anyKindOfName
WHERE
{
?protein a up:Protein .
?protein (up:recommendedName|up:alternativeName)|((up:domain|up:component)/(up:recommendedName|up:alternativeName)) ?structuredName .
?structuredName ?anyKindOfName "HLA class I histocompatibility antigen, B-73 alpha chain" .
?anyKindOfName rdfs:subPropertyOf up:structuredNameType .
}
- Find all names associated with uniprot entry P05067, and if the name is associated with the entry it's domains or its componentsSELECT
?protein
?anyKindOfName
?names
?partType
WHERE
{
BIND(<http://purl.uniprot.org/uniprot/P05067> AS ?protein)
?protein a up:Protein .
{
?protein (up:recommendedName|up:alternativeName) ?structuredName .
}
UNION
{
VALUES(?partType){(up:domain) (up:component)}
?protein ?partType ?part .
?part (up:recommendedName|up:alternativeName) ?structuredName .
}
?structuredName ?anyKindOfName ?names .
?anyKindOfName rdfs:subPropertyOf up:structuredNameType .
}
- Get the list of uniprot entries for the chromosome of proteome UP000000625SELECT
?protein
?proteome
WHERE
{
?protein a up:Protein ;
up:reviewed true ;
up:proteome ?proteome .
VALUES (?proteome) {(<http://purl.uniprot.org/proteomes/UP000000625#Chromosome>)}
}
- Use ALLIE a service for Abbreviation / Long Form in Japanese and English to search in UniProt using Japanese.SELECT
?protein ?englishLabelStr
WHERE {
SERVICE<http://data.allie.dbcls.jp/sparql>{
?x rdfs:label "アミロイド前駆体タンパク質"@ja ;
rdfs:label ?englishLabel .
FILTER(lang(?englishLabel) = "en")
}
BIND (STR(?englishLabel) AS ?englishLabelStr)
?protein a up:Protein .
{
?protein (up:recommendedName|up:alternativeName) ?structuredName .
}
UNION
{
VALUES(?partType){(up:domain) (up:component)}
?protein ?partType ?part .
?part (up:recommendedName|up:alternativeName) ?structuredName .
}
?structuredName ?anyKindOfName ?englishLabelStr .
?anyKindOfName rdfs:subPropertyOf up:structuredNameType .
}
- Find UniProt entries with merged loci in Bordetella aviumSELECT
?protein
(GROUP_CONCAT(?locusName; separator=',') AS ?locusNames)
WHERE
{
?protein a up:Protein ;
up:organism taxon:360910 ;
up:encodedBy ?gene .
?gene up:locusName ?locusName .
}
GROUP BY ?protein
HAVING (COUNT(?locusName) > 1)
- Find UniParc records whose sequence point to the most database entriesSELECT ?sequence ?entries
WHERE
{
SELECT
?sequence
(COUNT(?entry) AS ?entries)
WHERE
{
GRAPH <http://sparql.uniprot.org/uniparc> {
?sequence up:sequenceFor ?entry .
}
} GROUP BY ?sequence
} ORDER BY DESC(?entries)
- Find UniProtKB entries with more than 1 Topological domain annotationSELECT
?protein
(GROUP_CONCAT(?comment; separator=", ") AS ?comments)
WHERE
{
?protein a up:Protein ;
up:annotation ?annotation .
?annotation rdf:type up:Topological_Domain_Annotation ;
rdfs:comment ?comment .
}
GROUP BY ?protein
HAVING (COUNT(?annotation) > 1)
- Find longest comment text associated with a UniProtKB Natural Variant AnnotationSELECT
?annotation ?comment
WHERE {
?annotation a up:Natural_Variant_Annotation ;
rdfs:comment ?comment .
}
ORDER BY DESC(STRLEN(?comment))
- Find the co-occurence count of Topological Domain comment text in UniProtKB entriesSELECT
?comment1
?comment2
(COUNT(?comment1) AS ?count1)
WHERE
{
?protein a up:Protein ;
up:annotation ?annotation1 ,
?annotation2 .
?annotation1 rdf:type up:Topological_Domain_Annotation ;
rdfs:comment ?rawComment1 .
?annotation2 rdf:type up:Topological_Domain_Annotation ;
rdfs:comment ?rawComment2 .
BIND(IF(contains(?rawComment1, ';'),
STRBEFORE(?rawComment1,';'),
?rawComment1) AS ?comment1)
BIND(IF(contains(?rawComment2, ';'),
STRBEFORE(?rawComment2,';'),
?rawComment2) AS ?comment2)
FILTER(?annotation1 != ?annotation2)
}
GROUP BY ?comment1 ?comment2
ORDER BY DESC(COUNT(?comment1))
- Find the similar proteins for UniProtKB entry P05067 sorted by UniRef cluser identitySELECT
?similar ?identity
FROM <http://sparql.uniprot.org/uniref>
FROM <http://sparql.uniprot.org/uniprot>
WHERE
{
BIND (uniprotkb:P05607 AS ?protein)
?cluster up:member ?member ;
up:member/up:sequenceFor ?protein;
up:identity ?identity .
?member up:sequenceFor ?similar .
FILTER(!sameTerm(?similar, ?protein))
}
ORDER BY DESC(?identity)
- Find the orthologous proteins for UniProtKB entry P05067 using the OrthoDB databaseSELECT
?protein
?orthoGroup
?scientificName
?functionComment
?prefferedGeneName
((STRLEN(?value) - ?medianLength) as ?deviationFromMedianLength)
WHERE
{
uniprotkb:P05067 a up:Protein ;
up:organism/up:scientificName ?scientificName ;
rdfs:seeAlso ?orthoGroup ;
up:encodedBy/skos:prefLabel ?prefferedGeneName ;
up:sequence/rdf:value ?value .
OPTIONAL {
?protein up:annotation ?functionAnnotation .
?functionAnnotation a up:Function_Annotation ;
rdfs:comment ?functionComment .
}
SERVICE<http://sparql.orthodb.org/sparql>{
?orthoGroup orthodb:ogMedianProteinLength ?medianLength .
?orthoGroup orthodb:hasMember ?xref .
?xref orthodb:xref/orthodb:xrefResource uniprotkb:P05067 .
}
}
- Find the human protein which contains an Epitope VSTQ, where T is a phosporylated threonineSELECT
?protein
?comment
?begin
?end
WHERE
{
?protein a up:Protein ;
up:organism taxon:9606 ;
up:sequence ?sequence ;
up:annotation ?annotation .
?annotation a up:Modified_Residue_Annotation ;
rdfs:comment ?comment ;
up:range ?range .
?range
faldo:begin [ faldo:position ?begin ; faldo:reference ?sequence ] ;
faldo:end [ faldo:position ?end ; faldo:reference ?sequence ] .
?sequence rdf:value ?aaSequence .
FILTER (SUBSTR(?aaSequence, ?begin -2 , 4) = "VSTQ")
FILTER (CONTAINS(?comment, "Phosphothreonine"))
}
- For the human entry P05067 (Amyloid-beta precursor protein) find the gene start ends in WikiDataSELECT
?protein
?begin
?end
?chromosome
?assembly
WHERE {
{
BIND(uniprotkb:P05067 AS ?proteinIRI)
BIND (SUBSTR(STR(?proteinIRI), STRLEN(STR(uniprotkb:))+1) AS ?protein)
}
SERVICE <https://query.wikidata.org/sparql> {
?wp wdt:P352 ?protein ;
wdt:P702 ?wg .
?wg p:P644 ?wgss .
?wgss ps:P644 ?begin ;
pq:P1057/wdt:P1813 ?chromosome ;
pq:P659/rdfs:label ?assembly .
?wg p:P645 ?wgse .
?wgse ps:P645 ?end ;
pq:P1057/wdt:P1813 ?chromosome ;
pq:P659/rdfs:label ?assembly .
FILTER(lang(?assembly) = "en")
}
}
- Retrieve entries and Catalytic activities in the reviewed (Swiss-Prot) section that have experimental evidences,SELECT
?protein
?rhea
WHERE {
# ECO 269 is experimental evidence
BIND (<http://purl.obolibrary.org/obo/ECO_0000269> as ?evidence)
?protein up:reviewed true ;
up:organism taxon:9606 ;
up:classifiedWith keywords:1185 ;
up:annotation ?a ;
up:attribution ?attribution .
?a a up:Catalytic_Activity_Annotation ;
up:catalyticActivity ?ca .
?ca up:catalyzedReaction ?rhea .
[] rdf:subject ?a ;
rdf:predicate up:catalyticActivity ;
rdf:object ?ca ;
up:attribution ?attribution .
?attribution up:evidence ?evidence .
}
- Retrieve human enzymes that metabolize sphingolipids and are annotated in ChEMBLSELECT DISTINCT
?protein
?chemblEntry
WHERE {
SERVICE <https://sparql.rhea-db.org/sparql> {
?rhea rdfs:subClassOf rh:Reaction ;
rh:side/rh:contains/rh:compound/rh:chebi/rdfs:subClassOf+ CHEBI:26739 .
}
?ca up:catalyzedReaction ?rhea .
?protein up:annotation/up:catalyticActivity ?ca ;
up:organism taxon:9606 ;
rdfs:seeAlso ?chemblEntry .
?chemblEntry up:database <http://purl.uniprot.org/database/ChEMBL> .
}
- Retrieve entries with sequences that are composed of fragmentsSELECT DISTINCT
?protein
WHERE {
?protein a up:Protein ;
up:sequence ?sequence ;
MINUS { ?sequence up:fragment [] }
}
- Connect patents cited in UniProtKB with those in the patent database at EPO via publication number.SELECT ?citation ?patent ?application ?applicationNo
WHERE
{
?citation a up:Patent_Citation ;
skos:exactMatch ?patent .
FILTER(CONTAINS(STR(?patent), 'EP'))
BIND(SUBSTR(STR(?patent), 35) AS ?applicationNo)
SERVICE<https://data.epo.org/linked-data/query>{
?application patent:publicationNumber ?applicationNo
}
}
- Connect patents cited in UniProtKB with those in the patent database at EPO via publication number, whose grant date is more than twenty years in the past.PREFIX rdf:<http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX rdfs:<http://www.w3.org/2000/01/rdf-schema#>
PREFIX skos:<http://www.w3.org/2004/02/skos/core#>
PREFIX up:<http://purl.uniprot.org/core/>
prefix patent: <http://data.epo.org/linked-data/def/patent/>
SELECT ?grantDate ?patent ?application ?applicationNo
WHERE
{
?citation a up:Patent_Citation ;
skos:exactMatch ?patent .
BIND(SUBSTR(STR(?patent), 35) AS ?applicationNo)
BIND(SUBSTR(STR(?patent), 33, 2) AS ?countryCode)
SERVICE<https://data.epo.org/linked-data/query>{
?publication patent:publicationNumber ?applicationNo ;
patent:application ?application .
?application patent:grantDate ?grantDate .
}
BIND((year(now()) - 20) AS ?thisYearMinusTwenty)
BIND(year(?grantDate) AS ?grantYear)
FILTER(?grantYear < ?thisYearMinusTwenty)
} ORDER BY ?grantYear