In [1]:
import numpy as np
import pandas as pd
# 시각화
import matplotlib.pyplot as plt
import seaborn as sns
import matplotlib as mpl
from IPython.core.display import display, HTML
display(HTML("<style>.container {width:90% !important;}</style>"))
In [2]:
M_10 = pd.read_csv('../csv2/M_10_YEARLY.csv', encoding='cp949')
M_20 = pd.read_csv('../csv2/M_20_YEARLY.csv', encoding='cp949')
M_30 = pd.read_csv('../csv2/M_30_YEARLY.csv', encoding='cp949')
M_40 = pd.read_csv('../csv2/M_40_YEARLY.csv', encoding='cp949')
M_50 = pd.read_csv('../csv2/M_50_YEARLY.csv', encoding='cp949')
In [3]:
W_10 = pd.read_csv('../csv2/W_10_YEARLY.csv', encoding='cp949')
W_20 = pd.read_csv('../csv2/W_20_YEARLY.csv', encoding='cp949')
W_30 = pd.read_csv('../csv2/W_30_YEARLY.csv', encoding='cp949')
W_40 = pd.read_csv('../csv2/W_40_YEARLY.csv', encoding='cp949')
W_50 = pd.read_csv('../csv2/W_50_YEARLY.csv', encoding='cp949')
In [4]:
# M_10= pd.DataFrame(...)
M_10['age']=10
M_10=M_10.pivot(index='age', columns='label', values='hits')
M_10
Out[4]:
In [5]:
M_20['age']=20
M_20=M_20.pivot(index='age', columns='label', values='hits')
M_20
Out[5]:
In [6]:
M_30['age']= 30
M_30=M_30.pivot(index='age', columns='label', values='hits') #데이터 재구조화
M_40['age']= 40
M_40=M_40.pivot(index='age', columns='label', values='hits')
M_50['age']= 50
M_50=M_50.pivot(index='age', columns='label', values='hits')
In [7]:
dataM = pd.concat([M_10, M_20, M_30, M_40, M_50], axis=0)
dataM.columns=['2018','2019','2020']
dataM # 남자
Out[7]:
In [8]:
W_10['age']=10
W_10=W_10.pivot(index='age', columns='label', values='hits')
W_20['age']=20
W_20=W_20.pivot(index='age', columns='label', values='hits')
W_30['age']= 30
W_30=W_30.pivot(index='age', columns='label', values='hits')
W_40['age']= 40
W_40=W_40.pivot(index='age', columns='label', values='hits')
W_50['age']= 50
W_50=W_50.pivot(index='age', columns='label', values='hits')
dataW = pd.concat([W_10, W_20, W_30, W_40, W_50], axis=0)
dataW.columns=['2018','2019','2020']
dataW # 여자
Out[8]:
In [9]:
dataM.plot.bar()
plt.title('Man')
# y축은 값이 100000 이상 넘어가서 저렇게 표기됨
Out[9]:
In [10]:
dataW.plot.bar()
plt.title('Woman')
# dataW.plot.bar().x
Out[10]:
In [11]:
# 2019년도 30대 월별 결과
M_30_2019 = pd.read_csv('../csv2/M_30_MONTHLY.csv', encoding='cp949')
W_30_2019 = pd.read_csv('../csv2/W_30_MONTHLY.csv', encoding='cp949')
In [12]:
M_30_2019
Out[12]:
In [13]:
M_30_2019['sex']= 'M'
M_30_2019=M_30_2019.pivot(index='label', columns='sex', values='hits')
M_30_2019
Out[13]:
In [14]:
W_30_2019['sex']= 'W'
W_30_2019=W_30_2019.pivot(index='label', columns='sex', values='hits')
W_30_2019
Out[14]:
In [15]:
data2019 = pd.concat([M_30_2019, W_30_2019], axis=1)
data2019.index= ['01','02','03','24','05','06','07','08','09','10','11','12']
data2019
Out[15]:
In [16]:
data2019.plot.bar()
plt.title('2019_30_MONTHLY')
Out[16]:
In [17]:
# 2020년 남자 총 민원 건수
sumM=sum(dataM['2020'])
sumM
Out[17]:
In [18]:
# 2020년 여자 총 민원 건수
sumW=sum(dataW['2020'])
sumW
Out[18]:
In [19]:
# 2018~2020 남자 총 민원 건수
sumM3 = sum(dataM['2020'])+sum(dataM['2019'])+sum(dataM['2018'])
sumM3
Out[19]:
In [20]:
# 2018~2020 여자 총 민원 건수
sumW3 = sum(dataW['2020'])+sum(dataW['2019'])+sum(dataW['2018'])
sumW3
Out[20]:
In [ ]:
'PROJECT > 개인' 카테고리의 다른 글
Spark 기본 학습 (0) | 2022.04.19 |
---|---|
MyInTime; 프로젝트 개발 및 배포 (0) | 2020.12.29 |
민원 데이터 2020 전체 (0) | 2020.12.29 |