84 lines
		
	
	
		
			3.0 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
		
		
			
		
	
	
			84 lines
		
	
	
		
			3.0 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
| 
								 | 
							
								import xml.etree.ElementTree as ET
							 | 
						||
| 
								 | 
							
								import json
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								def parse_kegg_xml_with_group(xml_file):
							 | 
						||
| 
								 | 
							
								    tree = ET.parse(xml_file)
							 | 
						||
| 
								 | 
							
								    root = tree.getroot()
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    pathway_info = {
							 | 
						||
| 
								 | 
							
								        "name": root.attrib.get("name"),
							 | 
						||
| 
								 | 
							
								        "org": root.attrib.get("org"),
							 | 
						||
| 
								 | 
							
								        "number": root.attrib.get("number"),
							 | 
						||
| 
								 | 
							
								        "title": root.attrib.get("title"),
							 | 
						||
| 
								 | 
							
								        "image": root.attrib.get("image"),
							 | 
						||
| 
								 | 
							
								        "link": root.attrib.get("link"),
							 | 
						||
| 
								 | 
							
								        "entries": [],
							 | 
						||
| 
								 | 
							
								        "relations": [],
							 | 
						||
| 
								 | 
							
								        "reactions": []
							 | 
						||
| 
								 | 
							
								    }
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    for entry in root.findall("entry"):
							 | 
						||
| 
								 | 
							
								        entry_id = int(entry.attrib.get("id"))
							 | 
						||
| 
								 | 
							
								        entry_group = str((entry_id // 10000) * 10000) if entry_id >= 10000 else None
							 | 
						||
| 
								 | 
							
								        graphics = entry.find("graphics")
							 | 
						||
| 
								 | 
							
								        entry_data = {
							 | 
						||
| 
								 | 
							
								            "id": entry_id,
							 | 
						||
| 
								 | 
							
								            "name": entry.attrib.get("name"),
							 | 
						||
| 
								 | 
							
								            "type": entry.attrib.get("type"),
							 | 
						||
| 
								 | 
							
								            "link": entry.attrib.get("link"),
							 | 
						||
| 
								 | 
							
								            "reaction": entry.attrib.get("reaction"),
							 | 
						||
| 
								 | 
							
								            "group": entry_group,
							 | 
						||
| 
								 | 
							
								            "graphics": {
							 | 
						||
| 
								 | 
							
								                "name": graphics.attrib.get("name"),
							 | 
						||
| 
								 | 
							
								                "fgcolor": graphics.attrib.get("fgcolor"),
							 | 
						||
| 
								 | 
							
								                "bgcolor": graphics.attrib.get("bgcolor"),
							 | 
						||
| 
								 | 
							
								                "type": graphics.attrib.get("type"),
							 | 
						||
| 
								 | 
							
								                "x": int(graphics.attrib.get("x")),
							 | 
						||
| 
								 | 
							
								                "y": int(graphics.attrib.get("y")),
							 | 
						||
| 
								 | 
							
								                "width": int(graphics.attrib.get("width")),
							 | 
						||
| 
								 | 
							
								                "height": int(graphics.attrib.get("height")),
							 | 
						||
| 
								 | 
							
								            }
							 | 
						||
| 
								 | 
							
								        }
							 | 
						||
| 
								 | 
							
								        pathway_info["entries"].append(entry_data)
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    for relation in root.findall("relation"):
							 | 
						||
| 
								 | 
							
								        rel = {
							 | 
						||
| 
								 | 
							
								            "entry1": int(relation.attrib.get("entry1")),
							 | 
						||
| 
								 | 
							
								            "entry2": int(relation.attrib.get("entry2")),
							 | 
						||
| 
								 | 
							
								            "type": relation.attrib.get("type"),
							 | 
						||
| 
								 | 
							
								            "subtypes": []
							 | 
						||
| 
								 | 
							
								        }
							 | 
						||
| 
								 | 
							
								        for subtype in relation.findall("subtype"):
							 | 
						||
| 
								 | 
							
								            rel["subtypes"].append({
							 | 
						||
| 
								 | 
							
								                "name": subtype.attrib.get("name"),
							 | 
						||
| 
								 | 
							
								                "value": subtype.attrib.get("value")
							 | 
						||
| 
								 | 
							
								            })
							 | 
						||
| 
								 | 
							
								        pathway_info["relations"].append(rel)
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    for reaction in root.findall("reaction"):
							 | 
						||
| 
								 | 
							
								        reac = {
							 | 
						||
| 
								 | 
							
								            "id": int(reaction.attrib.get("id")),
							 | 
						||
| 
								 | 
							
								            "name": reaction.attrib.get("name"),
							 | 
						||
| 
								 | 
							
								            "type": reaction.attrib.get("type"),
							 | 
						||
| 
								 | 
							
								            "substrates": [],
							 | 
						||
| 
								 | 
							
								            "products": []
							 | 
						||
| 
								 | 
							
								        }
							 | 
						||
| 
								 | 
							
								        for substrate in reaction.findall("substrate"):
							 | 
						||
| 
								 | 
							
								            reac["substrates"].append({
							 | 
						||
| 
								 | 
							
								                "id": int(substrate.attrib.get("id")),
							 | 
						||
| 
								 | 
							
								                "name": substrate.attrib.get("name")
							 | 
						||
| 
								 | 
							
								            })
							 | 
						||
| 
								 | 
							
								        for product in reaction.findall("product"):
							 | 
						||
| 
								 | 
							
								            reac["products"].append({
							 | 
						||
| 
								 | 
							
								                "id": int(product.attrib.get("id")),
							 | 
						||
| 
								 | 
							
								                "name": product.attrib.get("name")
							 | 
						||
| 
								 | 
							
								            })
							 | 
						||
| 
								 | 
							
								        pathway_info["reactions"].append(reac)
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    return pathway_info
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								# 사용 예:
							 | 
						||
| 
								 | 
							
								result = parse_kegg_xml_with_group("expanded_pathway43200.xml")
							 | 
						||
| 
								 | 
							
								with open("group43200.json", "w", encoding="utf-8") as f:
							 | 
						||
| 
								 | 
							
								    json.dump(result, f, ensure_ascii=False, indent=2)
							 |