Python 54_ csv 패키지

Python/Python기초

Python 54_ csv 패키지

Codezoy 2020. 2. 4. 18:44

import csv

# 문자열(string)을 아이템으로 갖는 리스트

row1 = ['test1', 'success', 'Mon']

row2 = ['test2', 'failure, kind of', 'Tue']

row3 = ['test3', 'success, kind of', 'Wed']

result = [row1, row2, row3]

print(result)

[['test1', 'success', 'Mon'], ['test2', 'failure, kind of', 'Tue'], ['test3', 'success, kind of', 'Wed']]

# 파일을 쓰기 모드로 열기

# csv 파일을 쓸(write) 때는 불필요한 라인이 써지지 않도록 하기 위해서

# 파일을 오픈할 때 newline='' 파라미터를 추가!

with open('test_result.csv', mode='w', encoding='UTF-8', newline='') as f:

# csv writer 객체 생성

writer = csv.writer(f, delimiter=',')

for row in result:

# writter 객체의 writerrow() 메소드를 사용해서 한 줄씩 쓰기

writer.writerow(row)

<test_result.csv>

test1,success,Mon

test2,"failure, kind of",Tue ',' 가 들어간 문자열을 따옴표(")로 묶어줌

test3,"success, kind of",Wed

csv를 읽어오기 위해서는 무조건 ','쉼표를 기준으로 구별하게 되면 데이터에 오류 발생

# csv 모듈을 사용하지 않고 파일을 읽었을 때 문제점

with open('test_result.csv', mode='r', encoding='UTF-8') as f:

for line in f:

print(line.strip().split(','))

['test1', 'success', 'Mon']

['test2', '"failure', ' kind of"', 'Tue']

['test3', '"success', ' kind of"', 'Wed']

# 'failure, kind of'라는 하나의 문자열이

#'"failure'와 ' kind of"'라는 두개의 문자열로 쪼개짐.

# 원래 데이터에는 없어야 할 "가 문자열에 포함됨.

print('\nCSV 모듈을 사용할 때')

with open('test_result.csv', mode = 'r', encoding='UTF') as f:

#csv reader란 객체 생성

reader = csv.reader(f)

for row in reader:

# reader 객체의 read 기능을 이용해서 한 줄씩 읽음

print(row)

csv 모듈을 사용한 mpg.csv 파일 읽기

import csv

import os

file_path = os.path.join('..', 'scratch08', 'mpg.csv')

# Windows OS: ..\scratch08\mpg.csv

# Linux, Mac OS: ../scratch08/mpg.csv

with open(file_path, mode='r', encoding = 'UTF-8') as f:

reader = csv.reader(f)

# 한 줄 건너뛰기(컬럼명)

reader.__next__()

# 첫 번째 줄은 컬럼 이름들이기 때문에

df = [line for line in reader]

print(df[0:5]) # 리스트 df에서 인덱스 0 ~ 4까지 행을 출력

# 리스트 df에서 0번째 행의 0, 1, 2번째 컬럼 아이템만 출력

print(df[0][0], df[0][1], df[0][2])

[['audi', 'a4', '1.8', '1999', '4', 'auto(l5)', 'f', '18', '29', 'p', 'compact'], ['audi', 'a4', '1.8', '1999', '4', 'manual(m5)', 'f', '21', '29', 'p', 'compact'], ['audi', 'a4', '2', '2008', '4', 'manual(m6)', 'f', '20', '31', 'p', 'compact'], ['audi', 'a4', '2', '2008', '4', 'auto(av)', 'f', '21', '30', 'p', 'compact'], ['audi', 'a4', '2.8', '1999', '6', 'auto(l5)', 'f', '16', '26', 'p', 'compact']]

audi a4 1.8

# 리스트에서 각 행마다 반복하면서,

# 각 행의 인덱스 2번 아이템을 숫자로 변환해서 새로운 리스트에 저장

displ = [float(row[2]) for row in df]

print(displ)

[1.8, 1.8, 2.0, 2.0, 2.8, 2.8, 3.1, 1.8, 1.8, 2.0, 2.0, 2.8, 2.8, 3.1, 3.1, 2.8, 3.1, 4.2, 5.3, 5.3, 5.3, 5.7, 6.0, 5.7, 5.7, 6.2, 6.2, 7.0, 5.3, 5.3, 5.7, 6.5, 2.4, 2.4, 3.1, 3.5, 3.6, 2.4, 3.0, 3.3, 3.3, 3.3, 3.3, 3.3, 3.8, 3.8, 3.8, 4.0, 3.7, 3.7, 3.9, 3.9, 4.7, 4.7, 4.7, 5.2, 5.2, 3.9, 4.7, 4.7, 4.7, 5.2, 5.7, 5.9, 4.7, 4.7, 4.7, 4.7, 4.7, 4.7, 5.2, 5.2, 5.7, 5.9, 4.6, 5.4, 5.4, 4.0, 4.0, 4.0, 4.0, 4.6, 5.0, 4.2, 4.2, 4.6, 4.6, 4.6, 5.4, 5.4, 3.8, 3.8, 4.0, 4.0, 4.6, 4.6, 4.6, 4.6, 5.4, 1.6, 1.6, 1.6, 1.6, 1.6, 1.8, 1.8, 1.8, 2.0, 2.4, 2.4, 2.4, 2.4, 2.5, 2.5, 3.3, 2.0, 2.0, 2.0, 2.0, 2.7, 2.7, 2.7, 3.0, 3.7, 4.0, 4.7, 4.7, 4.7, 5.7, 6.1, 4.0, 4.2, 4.4, 4.6, 5.4, 5.4, 5.4, 4.0, 4.0, 4.6, 5.0, 2.4, 2.4, 2.5, 2.5, 3.5, 3.5, 3.0, 3.0, 3.5, 3.3, 3.3, 4.0, 5.6, 3.1, 3.8, 3.8, 3.8, 5.3, 2.5, 2.5, 2.5, 2.5, 2.5, 2.5, 2.2, 2.2, 2.5, 2.5, 2.5, 2.5, 2.5, 2.5, 2.7, 2.7, 3.4, 3.4, 4.0, 4.7, 2.2, 2.2, 2.4, 2.4, 3.0, 3.0, 3.5, 2.2, 2.2, 2.4, 2.4, 3.0, 3.0, 3.3, 1.8, 1.8, 1.8, 1.8, 1.8, 4.7, 5.7, 2.7, 2.7, 2.7, 3.4, 3.4, 4.0, 4.0, 2.0, 2.0, 2.0, 2.0, 2.8, 1.9, 2.0, 2.0, 2.0, 2.0, 2.5, 2.5, 2.8, 2.8, 1.9, 1.9, 2.0, 2.0, 2.5, 2.5, 1.8, 1.8, 2.0, 2.0, 2.8, 2.8, 3.6]

성능 개선 - # 사전(dict) 타입으로 데이터들을 읽어주는 reader 객체

file_path = os.path.join('..', 'scratch08', 'mpg.csv')

with open(file_path, mode='r', encoding='UTF-8') as f:

# 사전(dict) 타입으로 데이터들을 읽어주는 reader 객체

reader = csv.DictReader(f, delimiter=',')

df = [row for row in reader]

print(df[0:5])

[OrderedDict([('manufacturer', 'audi'), ('model', 'a4'), ('displ', '1.8'), ('year', '1999'), ('cyl', '4'), ('trans', 'auto(l5)'), ('drv', 'f'), ('cty', '18'), ('hwy', '29'), ('fl', 'p'), ('class', 'compact')]), OrderedDict([('manufacturer', 'audi'), ('model', 'a4'), ('displ', '1.8'), ('year', '1999'), ('cyl', '4'), ('trans', 'manual(m5)'), ('drv', 'f'), ('cty', '21'), ('hwy', '29'), ('fl', 'p'), ('class', 'compact')]), OrderedDict([('manufacturer', 'audi'), ('model', 'a4'), ('displ', '2'), ('year', '2008'), ('cyl', '4'), ('trans', 'manual(m6)'), ('drv', 'f'), ('cty', '20'), ('hwy', '31'), ('fl', 'p'), ('class', 'compact')]), OrderedDict([('manufacturer', 'audi'), ('model', 'a4'), ('displ', '2'), ('year', '2008'), ('cyl', '4'), ('trans', 'auto(av)'), ('drv', 'f'), ('cty', '21'), ('hwy', '30'), ('fl', 'p'), ('class', 'compact')]), OrderedDict([('manufacturer', 'audi'), ('model', 'a4'), ('displ', '2.8'), ('year', '1999'), ('cyl', '6'), ('trans', 'auto(l5)'), ('drv', 'f'), ('cty', '16'), ('hwy', '26'), ('fl', 'p'), ('class', 'compact')])]

print(df[0])

print(df[0]['manufacturer'])

OrderedDict([('manufacturer', 'audi'), ('model', 'a4'), ('displ', '1.8'), ('year', '1999'), ('cyl', '4'), ('trans', 'auto(l5)'), ('drv', 'f'), ('cty', '18'), ('hwy', '29'), ('fl', 'p'), ('class', 'compact')])

audi

# DictReader 객체의 read 기능을 사용하면,

# 각 행은 '컬럼이름: 값'의 쌍으로 이루어진 dict가 됨.

displ = [float(row['displ']) for row in df]

print(displ)

저작자표시 (새창열림)