Python read CSV file into List or Dictionary example

CSV (Comma Separated Values) is the most popular data format for importing and exporting databases between various systems. Because CSV doesn't have a standardized format there is always subtle differences between CSV files from different vendors such as the field separator may be TAB instead of a comma. Python CSV module tries to take care of most of these variation using either dialects and/or format parameters such delimiter, quotechar, escapechar, etc. You can also sniff() function to guess the file dialect. To get the names of all registered dialects you can use csv.list_dialects(). Some standard dialects are excel-tab, unix and excel.

In this example we read all CSV files from a given folder and then get each record either as List or a Dictionary based on what function is used. In the case of csv.DictReader it automatically takes the first row values as keys to the dictionary.

Python read CSV file into List or Dictionary example

Sample CSV file with US cities

ID,Name,CountryCode,District,Population
3793,"New York",USA,"New York",8008278
3794,"Los Angeles",USA,California,3694820
3795,Chicago,USA,Illinois,2896016
3796,Houston,USA,Texas,1953631
3797,Philadelphia,USA,Pennsylvania,1517550
3798,Phoenix,USA,Arizona,1321045
3799,"San Diego",USA,California,1223400
3800,Dallas,USA,Texas,1188580

Parsed CSV file in List format

['ID', 'Name', 'CountryCode', 'District', 'Population']
['3793', 'New York', 'USA', 'New York', '8008278']
['3794', 'Los Angeles', 'USA', 'California', '3694820']
['3795', 'Chicago', 'USA', 'Illinois', '2896016']
['3796', 'Houston', 'USA', 'Texas', '1953631']
['3797', 'Philadelphia', 'USA', 'Pennsylvania', '1517550']
['3798', 'Phoenix', 'USA', 'Arizona', '1321045']
['3799', 'San Diego', 'USA', 'California', '1223400']
['3800', 'Dallas', 'USA', 'Texas', '1188580']

Parsed CSV file in a Dictionary format

{'Name': 'New York', 'CountryCode': 'USA', 'Population': '8008278', 'ID': '3793', 'District': 'New York'}
{'Name': 'Los Angeles', 'CountryCode': 'USA', 'Population': '3694820', 'ID': '3794', 'District': 'California'}
{'Name': 'Chicago', 'CountryCode': 'USA', 'Population': '2896016', 'ID': '3795', 'District': 'Illinois'}
{'Name': 'Houston', 'CountryCode': 'USA', 'Population': '1953631', 'ID': '3796', 'District': 'Texas'}
{'Name': 'Philadelphia', 'CountryCode': 'USA', 'Population': '1517550', 'ID': '3797', 'District': 'Pennsylvania'}
{'Name': 'Phoenix', 'CountryCode': 'USA', 'Population': '1321045', 'ID': '3798', 'District': 'Arizona'}
{'Name': 'San Diego', 'CountryCode': 'USA', 'Population': '1223400', 'ID': '3799', 'District': 'California'}
{'Name': 'Dallas', 'CountryCode': 'USA', 'Population': '1188580', 'ID': '3800', 'District': 'Texas'}

Source code for the Python module

import os
import csv

def readMyFiles(filePath):
    #get all files in the given folder
    fileListing = os.listdir(filePath)
    for myFile in fileListing:
        #create the file path
        myFilePath = os.path.join(filePath, myFile)
        #check to make sure its a file not a sub folder
        if (os.path.isfile(myFilePath) and myFilePath.endswith(".csv")):
            with open(myFilePath, 'r', encoding='utf-8') as csvfile:
                #sniff to find the format
                fileDialect = csv.Sniffer().sniff(csvfile.read(1024))
                csvfile.seek(0)
                #create a CSV reader
                myReader = csv.reader(csvfile, dialect=fileDialect)
                #read each row
                for row in myReader:
                    #do your processing here
                    print(row)
                    
              
            with open(myFilePath, 'r', encoding='utf-8') as csvfile: 
                #sniff to find the format 
                fileDialect = csv.Sniffer().sniff(csvfile.read(1024))
                csvfile.seek(0)
                #read the CSV file into a dictionary
                dictReader = csv.DictReader(csvfile, dialect=fileDialect)
                for row in dictReader:
                    #do your processing here
                    print(row)
                    
    return


if __name__ == '__main__':
    #print a list of available dialects
    print(csv.list_dialects())
    #path for the current file
    currentPath = os.path.dirname(__file__)
    #path for the filename that we want to read
    filePath = os.path.abspath(os.path.join(currentPath, os.pardir,os.pardir,'data'))
    readMyFiles(filePath)


Reference

No comments:

Post a Comment

NO JUNK, Please try to keep this clean and related to the topic at hand.
Comments are for users to ask questions, collaborate or improve on existing.