Python 58_ pandas4

Python/Python기초

Python 58_ pandas4_ Database

Codezoy 2020. 2. 10. 18:55

-- 사용자 계정이 소유한 테이블에 대한 정보를 갖고 있는 테이블: user_tables

desc user_tables;

select * from user_tables;

select table_name from user_tables;

-- 사용자 계정이 소유한 테이블의 컬럼들에 대한 정보를 갖고 있는 테이블: user_tab_columns

desc user_tab_columns;

select * from user_tab_columns;

-- emp 테이블이 가지고 있는 컬럼이름들을 검색

select column_name from user_tab_columns

where table_name = 'EMP'

order by column_id;

import cx_Oracle

import pandas as pd

if __name__ == '__main__':

# 오라클 DB 서버에 접속

dsn = cx_Oracle.makedsn('localhost', 1521, 'orcl')

with cx_Oracle.connect('scott', 'tiger', dsn) as connection:

# Cursor 객체 생성

with connection.cursor() as cursor:

print(get_column_names_of('emp', cursor)) # ['empno', 'ename', 'job']

# connection.commit()

def get_column_names_of(table, cursor_):

sql1 = """select column_name from user_tab_columns

where table_name = :table_name

order by column_id"""

# sql2 = 'select * from :table_name'

cursor_.execute(sql1, table_name=table.upper()) # 끝에 세미콜론(;) 을 붙이면 안됨

# cursor_.execute(f'select * from {table}')

req_table = cursor.fetchall()

return req_table

[('EMPNO',), ('ENAME',), ('JOB',), ('MGR',), ('HIREDATE',), ('SAL',), ('COMM',), ('DEPTNO',)]

with connection.cursor() as cursor:

emp_df = select_all_from('emp', cursor)

connection.commit()

print(emp_df)

def select_all_from(table, cursor_):

# print(table.upper())

sql1 = f"select * from {table.upper()}"

# from 뒤의 컬럼명에는 '' 콜론이 오면 안되므로 데이터 바인딩을 사용할 수 없고,

formatted string을 사용해야 한다

cursor_.execute(sql1)

req_table = cursor.fetchall()

return pd.DataFrame(req_table)

0 1 2 3 4 5 6 7

0 7369 SMITH CLERK 7902.0 1980-12-17 800.0 NaN 20

1 7499 ALLEN SALESMAN 7698.0 1981-02-20 1600.0 300.0 30

2 7521 WARD SALESMAN 7698.0 1981-02-22 1250.0 500.0 30

3 7566 JONES MANAGER 7839.0 1981-04-02 2975.0 NaN 20

4 7654 MARTIN SALESMAN 7698.0 1981-09-28 1250.0 1400.0 30

5 7698 BLAKE MANAGER 7839.0 1981-05-01 2850.0 NaN 30

6 7782 CLARK MANAGER 7839.0 1981-06-09 2450.0 NaN 10

7 7788 SCOTT ANALYST 7566.0 1987-04-19 3000.0 NaN 20

8 7839 KING PRESIDENT NaN 1981-11-17 5000.0 NaN 10

9 7844 TURNER SALESMAN 7698.0 1981-09-08 1500.0 0.0 30

10 7876 ADAMS CLERK 7788.0 1987-05-23 1100.0 NaN 20

11 7900 JAMES CLERK 7698.0 1981-12-03 950.0 NaN 30

12 7902 FORD ANALYST 7566.0 1981-12-03 3000.0 NaN 20

13 7934 MILLER CLERK 7782.0 1982-01-23 1300.0 NaN 10

# 선택된 sal 값이 salgrade_df 어느 grade에 속하는 지를 찾음

# -> salgrade_df의 행 개수만큼 반복하면서 LO, HI와 비교

# -> DataFrame.iterrows() 함수 이용

for x in salgrade_df.iterrows():

print(x)

# -> DataFrame.iterrows() 함수: 데이터 프레임의 (행 이름, 행)을 반복문 안에서 리턴해준다.

for a,b in salgrade_df.iterrows():

print(a)

for a,b in salgrade_df.iterrows():

print(b)

GRADE 1

LOSAL 700

HISAL 1200

Name: 0, dtype: int64

GRADE 2

LOSAL 1201

HISAL 1400

Name: 1, dtype: int64

GRADE 3

LOSAL 1401

HISAL 2000

Name: 2, dtype: int64

GRADE 4

LOSAL 2001

HISAL 3000

Name: 3, dtype: int64

GRADE 5

LOSAL 3001

HISAL 9999

Name: 4, dtype: int64

for name, row in salgrade_df.iterrows():

if row['LOSAL'] <= sal <= row['HISAL']:

# 급여 등급을 찾은 경우, list에 추가

sal_grade.append(row['GRADE'])

break # salgrade_df 반복을 중지

print(sal_grade)

[1, 3, 2, 4, 2, 4, 4, 4, 5, 3, 1, 1, 4, 2]

emp_df['SALGRADE'] = sal_grade

print(emp_df)

EMPNO ENAME JOB MGR ... SAL COMM DEPTNO SALGRADE

0 7369 SMITH CLERK 7902.0 ... 800.0 NaN 20 1

1 7499 ALLEN SALESMAN 7698.0 ... 1600.0 300.0 30 3

2 7521 WARD SALESMAN 7698.0 ... 1250.0 500.0 30 2

3 7566 JONES MANAGER 7839.0 ... 2975.0 NaN 20 4

4 7654 MARTIN SALESMAN 7698.0 ... 1250.0 1400.0 30 2

5 7698 BLAKE MANAGER 7839.0 ... 2850.0 NaN 30 4

6 7782 CLARK MANAGER 7839.0 ... 2450.0 NaN 10 4

7 7788 SCOTT ANALYST 7566.0 ... 3000.0 NaN 20 4

8 7839 KING PRESIDENT NaN ... 5000.0 NaN 10 5

9 7844 TURNER SALESMAN 7698.0 ... 1500.0 0.0 30 3

10 7876 ADAMS CLERK 7788.0 ... 1100.0 NaN 20 1

11 7900 JAMES CLERK 7698.0 ... 950.0 NaN 30 1

12 7902 FORD ANALYST 7566.0 ... 3000.0 NaN 20 4

13 7934 MILLER CLERK 7782.0 ... 1300.0 NaN 10 2

# pandas.merge(left, right, how, on, left_on, right_on, ...)

# left, right: 조인할 데이터 프레임

# how: 조인 방식(inner, left, right)

# on: 조인할 때 기준이 되는 컬럼 이름

# left_on= 'left 데이터 프레임의 컬럼명', right_on= 'right 데이터 프레임의 컬럼명'

# emp_df, dept_df 데이터 프레임의 left, right join 결과 비교

INNER_JOIN

emp_emp1= pd.merge(df1, df2, left_on='EMPNO', right_on='MGR', how='inner')

print(emp_emp1.head())

print(emp_emp1.tail())

EMPNO_x ENAME_x JOB_x MGR_x ... SAL_y COMM_y DEPTNO_y SAL_GRADE_y

0 7566 JONES MANAGER 7839.0 ... 3000.0 NaN 20 4

1 7566 JONES MANAGER 7839.0 ... 3000.0 NaN 20 4

2 7698 BLAKE MANAGER 7839.0 ... 1600.0 300.0 30 3

3 7698 BLAKE MANAGER 7839.0 ... 1250.0 500.0 30 2

4 7698 BLAKE MANAGER 7839.0 ... 1250.0 1400.0 30 2

[5 rows x 18 columns]

EMPNO_x ENAME_x JOB_x MGR_x ... SAL_y COMM_y DEPTNO_y SAL_GRADE_y

8 7788 SCOTT ANALYST 7566.0 ... 1100.0 NaN 20 1

9 7839 KING PRESIDENT NaN ... 2975.0 NaN 20 4

10 7839 KING PRESIDENT NaN ... 2850.0 NaN 30 4

11 7839 KING PRESIDENT NaN ... 2450.0 NaN 10 4

12 7902 FORD ANALYST 7566.0 ... 800.0 NaN 20 1

[5 rows x 18 columns]

LEFT_JOIN

emp_emp2= pd.merge(df1, df2, left_on='EMPNO', right_on='MGR', how='left')

print(emp_emp2.head())

print(emp_emp2.tail())

EMPNO_x ENAME_x JOB_x MGR_x ... SAL_y COMM_y DEPTNO_y SAL_GRADE_y

0 7369 SMITH CLERK 7902.0 ... NaN NaN NaN NaN

1 7499 ALLEN SALESMAN 7698.0 ... NaN NaN NaN NaN

2 7521 WARD SALESMAN 7698.0 ... NaN NaN NaN NaN

3 7566 JONES MANAGER 7839.0 ... 3000.0 NaN 20.0 4.0

4 7566 JONES MANAGER 7839.0 ... 3000.0 NaN 20.0 4.0

[5 rows x 18 columns]

EMPNO_x ENAME_x JOB_x MGR_x ... SAL_y COMM_y DEPTNO_y SAL_GRADE_y

16 7844 TURNER SALESMAN 7698.0 ... NaN NaN NaN NaN

17 7876 ADAMS CLERK 7788.0 ... NaN NaN NaN NaN

18 7900 JAMES CLERK 7698.0 ... NaN NaN NaN NaN

19 7902 FORD ANALYST 7566.0 ... 800.0 NaN 20.0 1.0

20 7934 MILLER CLERK 7782.0 ... NaN NaN NaN NaN

[5 rows x 18 columns]

RIGHT_JOIN

emp_emp3= pd.merge(df1, df2, left_on='EMPNO', right_on='MGR', how='right')

print(emp_emp3.head())

print(emp_emp3.tail())

EMPNO_x ENAME_x JOB_x MGR_x ... SAL_y COMM_y DEPTNO_y SAL_GRADE_y

0 7566.0 JONES MANAGER 7839.0 ... 3000.0 NaN 20 4

1 7566.0 JONES MANAGER 7839.0 ... 3000.0 NaN 20 4

2 7698.0 BLAKE MANAGER 7839.0 ... 1600.0 300.0 30 3

3 7698.0 BLAKE MANAGER 7839.0 ... 1250.0 500.0 30 2

4 7698.0 BLAKE MANAGER 7839.0 ... 1250.0 1400.0 30 2

[5 rows x 18 columns]

EMPNO_x ENAME_x JOB_x MGR_x ... SAL_y COMM_y DEPTNO_y SAL_GRADE_y

9 7839.0 KING PRESIDENT NaN ... 2975.0 NaN 20 4

10 7839.0 KING PRESIDENT NaN ... 2850.0 NaN 30 4

11 7839.0 KING PRESIDENT NaN ... 2450.0 NaN 10 4

12 7902.0 FORD ANALYST 7566.0 ... 800.0 NaN 20 1

13 NaN NaN NaN NaN ... 5000.0 NaN 10 5

[5 rows x 18 columns]

저작자표시 (새창열림)