
scripts.python.kegg_trembl_mapping.keggToUniprot.py Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of slib-sml Show documentation
Show all versions of slib-sml Show documentation
Semantic measures library
The newest version!
#!/usr/bin/python
import os,re
# ex human
#p_ids = ["hsa00040","hsa00920","hsa00140","hsa00290","hsa00563","hsa00670","hsa00232","hsa03022","hsa03020","hsa04130","hsa03450","hsa03430","hsa04950"]
p_ids = ["sce00562","sce00920","sce00600","sce00300","sce00410","sce00514","sce00670","sce00903","sce03022","sce04130","sce03450","sce04070","sce04140"]
s = "style=\"width:5em\">YGL180W"
#
"""
r = re.findall('.*\w+.*', s)
if(len(r) == 1):
print(r[0])
exit()
"""
outputFile = "pathways_prots"
OUTPUT = open(outputFile,"w")
for p_id in p_ids:
pathway_id = p_id
output = "out_"+pathway_id
url = "http://www.genome.jp/dbget-bin/www_bget?pathway+"+pathway_id
os.system("wget -O "+output+" "+url)
genes_line = False
#
p = re.compile('.*.*')
#p = re.compile('.*bget.*')
geneList = []
allUniprotIds = []
if(os.path.exists(output)):
for line in open(output):
line = line.strip()
if(not genes_line and "Gene " in line):
genes_line = True
elif(genes_line and len(line) != 0):
data = line.split("")
genes_line = False
for d in data:
r = re.findall('.*\w+.*', d)
if(len(r) == 1):
geneList.append(r[0])
print "Found "+str(len(geneList))+" genes"
print geneList
print "Try to get their UniprotIDs"
for gene in geneList:
print "processing: "+gene
url = "http://www.genome.jp/dbget-bin/get_linkdb?-t+uniprot+"+gene
print "\t"+url
output_gene = "gene_"+gene
os.system("wget -O "+output_gene+" "+url)
uniprotIds = []
for line in open(output_gene):
r = re.findall('.*.*', line)
if(len(r) == 1):
uniprotIds.append(r[0])
os.remove(output_gene)
allUniprotIds.extend(uniprotIds)
print "\t"+str(uniprotIds)
print "-------------------"
print pathway_id
print allUniprotIds
print "-------------------"
idsString = ','.join(allUniprotIds)
OUTPUT.write(pathway_id+"\t"+idsString+"\n")
os.remove(output)
else:
print "Cannot locate file please check "+url
print "Consult: "+outputFile
OUTPUT.close()
© 2015 - 2025 Weber Informatics LLC | Privacy Policy