Saturday, March 28, 2009

Access PFAM domains from UniProt accessions using python


import xml.dom.minidom as minidom


def getPFamAnnotationByAcc(acc):
    res_list = []
    slurpp = pycurl.Curl()
    slurpp.setopt(pycurl.URL, "http://pfam.sbc.su.se:43210/protein/%s"%acc)
    slurpp.setopt(pycurl.POSTFIELDS, "output=xml")
    file = open("tmp.xml", "w")
    slurpp.setopt(pycurl.FILE, file)
    slurpp.perform()
    file.close()

    #parse the PFam xml file searching for matches
    dom = minidom.parse('tmp.xml')
    for m in dom.getElementsByTagName('match'):
        atts = m.attributes.items()
        for l in m.getElementsByTagName('location'):
            for t in l.attributes.items():
                atts.append(t)
        res_list.append(atts)
    return res_list

No comments:

Post a Comment