AGE_YEARLY
In [1]:
import numpy as np
import pandas as pd

# 시각화
import matplotlib.pyplot as plt
import seaborn as sns
import matplotlib as mpl

from IPython.core.display import display, HTML
display(HTML("<style>.container {width:90% !important;}</style>"))
In [2]:
M_10 = pd.read_csv('../csv2/M_10_YEARLY.csv', encoding='cp949')
M_20 = pd.read_csv('../csv2/M_20_YEARLY.csv', encoding='cp949')
M_30 = pd.read_csv('../csv2/M_30_YEARLY.csv', encoding='cp949')
M_40 = pd.read_csv('../csv2/M_40_YEARLY.csv', encoding='cp949')
M_50 = pd.read_csv('../csv2/M_50_YEARLY.csv', encoding='cp949')
In [3]:
W_10 = pd.read_csv('../csv2/W_10_YEARLY.csv', encoding='cp949')
W_20 = pd.read_csv('../csv2/W_20_YEARLY.csv', encoding='cp949')
W_30 = pd.read_csv('../csv2/W_30_YEARLY.csv', encoding='cp949')
W_40 = pd.read_csv('../csv2/W_40_YEARLY.csv', encoding='cp949')
W_50 = pd.read_csv('../csv2/W_50_YEARLY.csv', encoding='cp949')
In [4]:
# M_10= pd.DataFrame(...)
M_10['age']=10
M_10=M_10.pivot(index='age', columns='label', values='hits')
M_10
Out[4]:
label 20180101 20190101 20200101
age
10 15687 21103 6015
In [5]:
M_20['age']=20
M_20=M_20.pivot(index='age', columns='label', values='hits')
M_20
Out[5]:
label 20180101 20190101 20200101
age
20 209308 266658 125934
In [6]:
M_30['age']= 30
M_30=M_30.pivot(index='age', columns='label', values='hits')  #데이터 재구조화 

M_40['age']= 40
M_40=M_40.pivot(index='age', columns='label', values='hits')

M_50['age']= 50
M_50=M_50.pivot(index='age', columns='label', values='hits')
In [7]:
dataM = pd.concat([M_10, M_20, M_30, M_40, M_50], axis=0)
dataM.columns=['2018','2019','2020']
dataM  # 남자 
Out[7]:
2018 2019 2020
age
10 15687 21103 6015
20 209308 266658 125934
30 570136 1152732 702670
40 562894 889950 505705
50 310630 423603 278065
In [8]:
W_10['age']=10
W_10=W_10.pivot(index='age', columns='label', values='hits')  

W_20['age']=20
W_20=W_20.pivot(index='age', columns='label', values='hits')

W_30['age']= 30
W_30=W_30.pivot(index='age', columns='label', values='hits') 

W_40['age']= 40
W_40=W_40.pivot(index='age', columns='label', values='hits')

W_50['age']= 50
W_50=W_50.pivot(index='age', columns='label', values='hits')

dataW = pd.concat([W_10, W_20, W_30, W_40, W_50], axis=0)
dataW.columns=['2018','2019','2020']
dataW  # 여자 
Out[8]:
2018 2019 2020
age
10 19129 12704 2102
20 172311 178709 54883
30 510445 981757 437177
40 342044 713889 382116
50 194242 290498 271743
In [9]:
dataM.plot.bar()
plt.title('Man')
# y축은 값이 100000 이상 넘어가서 저렇게 표기됨 
Out[9]:
Text(0.5, 1.0, 'Man')
In [10]:
dataW.plot.bar()
plt.title('Woman')

# dataW.plot.bar().x
Out[10]:
Text(0.5, 1.0, 'Woman')
In [11]:
# 2019년도 30대 월별 결과
M_30_2019 = pd.read_csv('../csv2/M_30_MONTHLY.csv', encoding='cp949')
W_30_2019 = pd.read_csv('../csv2/W_30_MONTHLY.csv', encoding='cp949')
In [12]:
M_30_2019
Out[12]:
hits label
0 67998 20190101
1 65284 20190201
2 65492 20190301
3 83462 20190401
4 133628 20190501
5 173849 20190601
6 110914 20190701
7 76746 20190801
8 90342 20190901
9 88465 20191001
10 95025 20191101
11 97156 20191201
In [13]:
M_30_2019['sex']= 'M'
M_30_2019=M_30_2019.pivot(index='label', columns='sex', values='hits')
M_30_2019
Out[13]:
sex M
label
20190101 67998
20190201 65284
20190301 65492
20190401 83462
20190501 133628
20190601 173849
20190701 110914
20190801 76746
20190901 90342
20191001 88465
20191101 95025
20191201 97156
In [14]:
W_30_2019['sex']= 'W'
W_30_2019=W_30_2019.pivot(index='label', columns='sex', values='hits')  
W_30_2019
Out[14]:
sex W
label
20190101 86632
20190201 51532
20190301 56352
20190401 60964
20190501 125612
20190601 143827
20190701 80557
20190801 66555
20190901 74154
20191001 70279
20191101 83229
20191201 77972
In [15]:
data2019 = pd.concat([M_30_2019, W_30_2019], axis=1)
data2019.index= ['01','02','03','24','05','06','07','08','09','10','11','12']
data2019
Out[15]:
sex M W
01 67998 86632
02 65284 51532
03 65492 56352
24 83462 60964
05 133628 125612
06 173849 143827
07 110914 80557
08 76746 66555
09 90342 74154
10 88465 70279
11 95025 83229
12 97156 77972
In [16]:
data2019.plot.bar()
plt.title('2019_30_MONTHLY')
Out[16]:
Text(0.5, 1.0, '2019_30_MONTHLY')
In [17]:
# 2020년 남자 총 민원 건수 
sumM=sum(dataM['2020'])
sumM
Out[17]:
1618389
In [18]:
# 2020년 여자 총 민원 건수 
sumW=sum(dataW['2020'])
sumW
Out[18]:
1148021
In [19]:
# 2018~2020 남자 총 민원 건수 
sumM3 = sum(dataM['2020'])+sum(dataM['2019'])+sum(dataM['2018'])
sumM3
Out[19]:
6041090
In [20]:
# 2018~2020 여자 총 민원 건수 
sumW3 = sum(dataW['2020'])+sum(dataW['2019'])+sum(dataW['2018'])
sumW3
Out[20]:
4563749
In [ ]:
 

'PROJECT > 개인' 카테고리의 다른 글

Spark 기본 학습  (0) 2022.04.19
MyInTime; 프로젝트 개발 및 배포  (0) 2020.12.29
민원 데이터 2020 전체  (0) 2020.12.29
블로그 이미지

hjc_

୧( “̮ )୨

,