Off topic:how converting xml having identical tag names to json and json to csv by python
0
0
Entering edit mode
8.3 years ago

I want to parse xml file to csv by python. I am following xml->json and json to csv flow.

I am not able to convert tag having same name into csv. My code taking only once skipping remaining tags having same name. Please help on the same below. My code is not taking tag name having identical name can anyone help me to resolve this issues

Code:

import xml.etree.ElementTree as et
import json
import csv
import sys
import codecs
import os
class xml2json:

    def __init__(self, input_file, output_file = None, encoding='utf-8'):
        """Initialize the class with the paths to the input xml file
        and the output json file
        """

        # open the xml file for iteration
        self.context = et.iterparse(input_file, events=("start", "end"))
        self.output_file = output_file
        self.encoding = encoding

    def get_json(self, pretty=True):
        """
            Convert an XML file to json string
        """

        self.context = iter(self.context)
        event, root = self.context.next()

        return self._elem2json(root, pretty)

    def convert(self, pretty=True):
        """
            Convert xml file to a json file
        """

        json = self.get_json(pretty)

        # output file handle
        try:
            output = codecs.open('json_temp.json', "w", encoding=self.encoding)
        except:
            print("Failed to open the output file")
            raise

        output.write(json)
        output.close()

    def _elem2list(self, elem):
        """Convert an ElementTree element to a list"""

        block = {}

        # get the element's children
        children = elem.getchildren()

        if children:
            cur = map(self._elem2list, children)

            # create meaningful lists
            scalar = False
            try:
                if elem[0].tag != elem[1].tag:  # [{a: 1}, {b: 2}, {c: 3}] => {a: 1, b: 2, c: 3}
                    cur = dict(zip(
                        map(lambda e: e.keys()[0], cur),
                        map(lambda e: e.values()[0], cur)
                    ))
                else:
                    scalar = True
            except Exception as e:  # [{a: 1}, {a: 2}, {a: 3}] => {a: [1, 2, 3]}
                scalar = True

            if scalar:
                if len(cur) > 0:
                    cur = {elem[0].tag: [e.values()[0] for e in cur if e.values()[0] is not None]}
                else:
                    cur = {elem[0].tag: cur[0].values()[0] }

            block[elem.tag] = cur
        else:
            val = None
            if elem.text:
                val = elem.text.strip()
                val = val if len(val) > 0 else None
            elif elem.attrib:
                val = elem.attrib
                val = val if len(val) > 0 else None

            block[elem.tag] = val 

        return block

    def _elem2json(self, elem, pretty=True):
        """
        Convert an ElementTree Element (root) to json
        """
        # if the given Element is not the root element, find it
        if hasattr(elem, 'getroot'):
            elem = elem.getroot()

        return json.dumps(self._elem2list(elem), indent=(4 if pretty else None))

#ni=open('json_temp.json','wb')
converter = xml2json(sys.argv[1], encoding="utf-8")
converter.convert()

def change(row, pastkeys=()):

    result = {}
    for key in row:
        newkey = pastkeys + (key,)
        val = row[key]
        if isinstance(val, dict):
            result.update(change(val, newkey))
        elif isinstance(val, list):
            result.update(change(dict(zip(range(0, len(val)), val)), newkey))
        else:
            result[newkey] = val
    return result

# Get the JSON object, ensuring that we have a list of objects
##lines = list(sys.argv[2])

a=open('json_temp.json','r')
lines=list(a)

b= open(sys.argv[2],'w')

try:
    data = json.loads(''.join(lines))
    if isinstance(data, dict):
        data = [data]
except ValueError:
    data = [json.loads(line) for line in lines]

# change into keys
result = []
fields = set()
for row in data:
    hash = change(row)
    fields |= set(hash.keys())
    result.append(hash)

# Write as CSV
fields = sorted(fields)
out = csv.writer(b, lineterminator='\n')
out.writerow(['-'.join([str(f) for f in field]) for field in fields])
for row in result:
    out.writerow([(row.get(field, ''))for field in fields])
a.close()
os.remove('json_temp.json')
software-error • 3.0k views
ADD COMMENT
This thread is not open. No new answers may be added
Traffic: 1932 users visited in the last hour
Help About
FAQ
Access RSS
API
Stats

Use of this site constitutes acceptance of our User Agreement and Privacy Policy.

Powered by the version 2.3.6