Forum: (Closed) how converting xml having identical tag names to json and json to csv by python
0
gravatar for prasanna55kt
3.7 years ago by
prasanna55kt0 wrote:
i wan to parse xml file to csv by python. I am following xml->json and json to csv flow .
i am not able to convert tag having same nama into csv my code taking only once skipping remaining tags having same name . Please help on the same ,below  my code IS NOT TAKING TAG NAME HAVING IDENTICAL NAME CAN ANY ONE HELP ME TO RESOVE THIS ISSUE

code 

import xml.etree.ElementTree as et
import json
import csv
import sys
import codecs
import os
class xml2json:

    def __init__(self, input_file, output_file = None, encoding='utf-8'):
        """Initialize the class with the paths to the input xml file
        and the output json file
        """

        # open the xml file for iteration
        self.context = et.iterparse(input_file, events=("start", "end"))
        self.output_file = output_file
        self.encoding = encoding

    def get_json(self, pretty=True):
        """
            Convert an XML file to json string
        """

        self.context = iter(self.context)
        event, root = self.context.next()

        return self._elem2json(root, pretty)

    def convert(self, pretty=True):
        """
            Convert xml file to a json file

          
        """

        json = self.get_json(pretty)

        # output file handle
        try:
            output = codecs.open('json_temp.json', "w", encoding=self.encoding)
        except:
            print("Failed to open the output file")
            raise

        output.write(json)
        output.close()

    def _elem2list(self, elem):
        """Convert an ElementTree element to a list"""

        block = {}

        # get the element's children
        children = elem.getchildren()

        if children:
            cur = map(self._elem2list, children)

            # create meaningful lists
            scalar = False
            try:
                if elem[0].tag != elem[1].tag:  # [{a: 1}, {b: 2}, {c: 3}] => {a: 1, b: 2, c: 3}
                    cur = dict(zip(
                        map(lambda e: e.keys()[0], cur),
                        map(lambda e: e.values()[0], cur)
                    ))
                else:
                    scalar = True
            except Exception as e:  # [{a: 1}, {a: 2}, {a: 3}] => {a: [1, 2, 3]}
                scalar = True

            if scalar:
                if len(cur) > 0:
                    cur = {elem[0].tag: [e.values()[0] for e in cur if e.values()[0] is not None]}
                else:
                    cur = {elem[0].tag: cur[0].values()[0] }

            block[elem.tag] = cur
        else:
            val = None
            if elem.text:
                val = elem.text.strip()
                val = val if len(val) > 0 else None
            elif elem.attrib:
                val = elem.attrib
                val = val if len(val) > 0 else None

            block[elem.tag] = val 

        return block

    def _elem2json(self, elem, pretty=True):
        """
        Convert an ElementTree Element (root) to json
        """
        # if the given Element is not the root element, find it
        if hasattr(elem, 'getroot'):
            elem = elem.getroot()

        return json.dumps(self._elem2list(elem), indent=(4 if pretty else None))

#ni=open('json_temp.json','wb')
converter = xml2json(sys.argv[1], encoding="utf-8")
converter.convert()

def change(row, pastkeys=()):

    result = {}
    for key in row:
        newkey = pastkeys + (key,)
        val = row[key]
        if isinstance(val, dict):
            result.update(change(val, newkey))
        elif isinstance(val, list):
            result.update(change(dict(zip(range(0, len(val)), val)), newkey))
        else:
            result[newkey] = val
    return result

# Get the JSON object, ensuring that we have a list of objects
##lines = list(sys.argv[2])

a=open('json_temp.json','r')
lines=list(a)

b= open(sys.argv[2],'w')

try:
    data = json.loads(''.join(lines))
    if isinstance(data, dict):
        data = [data]
except ValueError:
    data = [json.loads(line) for line in lines]

# change into keys
result = []
fields = set()
for row in data:
    hash = change(row)
    fields |= set(hash.keys())
    result.append(hash)

# Write as CSV
fields = sorted(fields)
out = csv.writer(b, lineterminator='\n')
out.writerow(['-'.join([str(f) for f in field]) for field in fields])
for row in result:
    out.writerow([(row.get(field, ''))for field in fields])
a.close()
os.remove('json_temp.json')
forum software error • 1.8k views
ADD COMMENTlink modified 3.7 years ago • written 3.7 years ago by prasanna55kt0

Hello prasanna55kt!

We believe that this post does not fit the main topic of this site.

Not a bioinformatics question.

For this reason we have closed your question. This allows us to keep the site focused on the topics that the community can help with.

If you disagree please tell us why in a reply below, we'll be happy to talk about it.

Cheers!

ADD REPLYlink written 3.7 years ago by Michael Dondrup46k
Please log in to add an answer.
The thread is closed. No new answers may be added.

Help
Access

Use of this site constitutes acceptance of our User Agreement and Privacy Policy.
Powered by Biostar version 2.3.0
Traffic: 978 users visited in the last hour