240923
미니프로젝트1
유통고객 구매패턴 데이터 전처리
In [8]:
!pip install matplotlib
!pip install --upgrade matplotlib
import matplotlib.pyplot as plt
Requirement already satisfied: matplotlib in c:\users\user\anaconda3\lib\site-packages (3.9.2)
Requirement already satisfied: contourpy>=1.0.1 in c:\users\user\anaconda3\lib\site-packages (from matplotlib) (1.2.0)
Requirement already satisfied: cycler>=0.10 in c:\users\user\anaconda3\lib\site-packages (from matplotlib) (0.11.0)
Requirement already satisfied: fonttools>=4.22.0 in c:\users\user\anaconda3\lib\site-packages (from matplotlib) (4.51.0)
Requirement already satisfied: kiwisolver>=1.3.1 in c:\users\user\anaconda3\lib\site-packages (from matplotlib) (1.4.4)
Requirement already satisfied: numpy>=1.23 in c:\users\user\anaconda3\lib\site-packages (from matplotlib) (1.26.4)
Requirement already satisfied: packaging>=20.0 in c:\users\user\anaconda3\lib\site-packages (from matplotlib) (23.2)
Requirement already satisfied: pillow>=8 in c:\users\user\anaconda3\lib\site-packages (from matplotlib) (10.3.0)
Requirement already satisfied: pyparsing>=2.3.1 in c:\users\user\anaconda3\lib\site-packages (from matplotlib) (3.0.9)
Requirement already satisfied: python-dateutil>=2.7 in c:\users\user\anaconda3\lib\site-packages (from matplotlib) (2.9.0.post0)
Requirement already satisfied: six>=1.5 in c:\users\user\anaconda3\lib\site-packages (from python-dateutil>=2.7->matplotlib) (1.16.0)
Requirement already satisfied: matplotlib in c:\users\user\anaconda3\lib\site-packages (3.9.2)
Requirement already satisfied: contourpy>=1.0.1 in c:\users\user\anaconda3\lib\site-packages (from matplotlib) (1.2.0)
Requirement already satisfied: cycler>=0.10 in c:\users\user\anaconda3\lib\site-packages (from matplotlib) (0.11.0)
Requirement already satisfied: fonttools>=4.22.0 in c:\users\user\anaconda3\lib\site-packages (from matplotlib) (4.51.0)
Requirement already satisfied: kiwisolver>=1.3.1 in c:\users\user\anaconda3\lib\site-packages (from matplotlib) (1.4.4)
Requirement already satisfied: numpy>=1.23 in c:\users\user\anaconda3\lib\site-packages (from matplotlib) (1.26.4)
Requirement already satisfied: packaging>=20.0 in c:\users\user\anaconda3\lib\site-packages (from matplotlib) (23.2)
Requirement already satisfied: pillow>=8 in c:\users\user\anaconda3\lib\site-packages (from matplotlib) (10.3.0)
Requirement already satisfied: pyparsing>=2.3.1 in c:\users\user\anaconda3\lib\site-packages (from matplotlib) (3.0.9)
Requirement already satisfied: python-dateutil>=2.7 in c:\users\user\anaconda3\lib\site-packages (from matplotlib) (2.9.0.post0)
Requirement already satisfied: six>=1.5 in c:\users\user\anaconda3\lib\site-packages (from python-dateutil>=2.7->matplotlib) (1.16.0)
In [64]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
In [66]:
customers = pd.read_csv('customers.csv')
sales = pd.read_csv('sales.csv')
products = pd.read_csv('products.csv')
In [67]:
customers.head()
Out[67]:
CustomerIDRegisterDateGenderBirthYearAddr1Addr201234
c328222 | 2014-09-25 | F | 1960 | 강원도 | 원주시 |
c281448 | 2013-06-18 | F | 1974 | 강원도 | 원주시 |
c038336 | 2003-10-10 | F | 1968 | 강원도 | 춘천시 |
c084237 | 2007-03-09 | F | 1982 | 강원도 | 강릉시 |
c162600 | 2010-06-14 | F | 1978 | 강원도 | 속초시 |
In [68]:
sales.head()
Out[68]:
OrderIDSeqOrderDateProductIDQtyAmtCustomerID01234
107 | 2 | 2016-01-02 | p1036481 | 2 | 2100 | c150417 |
69 | 1 | 2016-01-02 | p1152861 | 1 | 1091 | c212716 |
69 | 7 | 2016-01-02 | p1013161 | 1 | 2600 | c212716 |
69 | 8 | 2016-01-02 | p1005771 | 1 | 1650 | c212716 |
69 | 11 | 2016-01-02 | p1089531 | 1 | 2600 | c212716 |
In [69]:
sales.info()
sales.describe()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 70012 entries, 0 to 70011
Data columns (total 7 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 OrderID 70012 non-null int64
1 Seq 70012 non-null int64
2 OrderDate 70012 non-null object
3 ProductID 70012 non-null object
4 Qty 70012 non-null int64
5 Amt 70012 non-null int64
6 CustomerID 70012 non-null object
dtypes: int64(4), object(3)
memory usage: 3.7+ MB
Out[69]:
OrderIDSeqQtyAmtcountmeanstdmin25%50%75%max
70012.000000 | 70012.000000 | 70012.000000 | 70012.000000 |
101.857996 | 4.892233 | 1.238588 | 2785.229218 |
62.229614 | 4.255965 | 1.419127 | 2734.617603 |
3.000000 | 1.000000 | -44.000000 | -67650.000000 |
49.000000 | 2.000000 | 1.000000 | 1300.000000 |
95.000000 | 4.000000 | 1.000000 | 1950.000000 |
145.000000 | 7.000000 | 1.000000 | 3227.000000 |
385.000000 | 40.000000 | 200.000000 | 109100.000000 |
In [70]:
# sales의 Qty, Amt 에 음수 있다?
# 음수 값 제거
sales = sales[(sales['Qty'] >= 0) & (sales['Amt']> 0)]
sales
Out[70]:
OrderIDSeqOrderDateProductIDQtyAmtCustomerID01234...7000770008700097001070011
107 | 2 | 2016-01-02 | p1036481 | 2 | 2100 | c150417 |
69 | 1 | 2016-01-02 | p1152861 | 1 | 1091 | c212716 |
69 | 7 | 2016-01-02 | p1013161 | 1 | 2600 | c212716 |
69 | 8 | 2016-01-02 | p1005771 | 1 | 1650 | c212716 |
69 | 11 | 2016-01-02 | p1089531 | 1 | 2600 | c212716 |
... | ... | ... | ... | ... | ... | ... |
53 | 4 | 2017-03-31 | p1072601 | 1 | 4600 | c337999 |
53 | 6 | 2017-03-31 | p1178011 | 1 | 8800 | c337999 |
55 | 6 | 2017-03-31 | p1054261 | 1 | 2091 | c088320 |
59 | 4 | 2017-03-31 | p1175481 | 1 | 1300 | c238056 |
59 | 5 | 2017-03-31 | p1013161 | 1 | 2950 | c238056 |
69323 rows × 7 columns
In [71]:
customers.head()
Out[71]:
CustomerIDRegisterDateGenderBirthYearAddr1Addr201234
c328222 | 2014-09-25 | F | 1960 | 강원도 | 원주시 |
c281448 | 2013-06-18 | F | 1974 | 강원도 | 원주시 |
c038336 | 2003-10-10 | F | 1968 | 강원도 | 춘천시 |
c084237 | 2007-03-09 | F | 1982 | 강원도 | 강릉시 |
c162600 | 2010-06-14 | F | 1978 | 강원도 | 속초시 |
In [72]:
# 날짜를 datetime 형식으로 변경
customers['RegisterDate'] = pd.to_datetime(customers['RegisterDate'])
sales['OrderDate'] = pd.to_datetime(sales['OrderDate'])
In [76]:
# 확인
customers['RegisterDate'].info()
sales['OrderDate'].info()
<class 'pandas.core.series.Series'>
RangeIndex: 2243 entries, 0 to 2242
Series name: RegisterDate
Non-Null Count Dtype
-------------- -----
2243 non-null datetime64[ns]
dtypes: datetime64[ns](1)
memory usage: 17.7 KB
<class 'pandas.core.series.Series'>
Index: 69323 entries, 0 to 70011
Series name: OrderDate
Non-Null Count Dtype
-------------- -----
69323 non-null datetime64[ns]
dtypes: datetime64[ns](1)
memory usage: 1.1 MB
1. 일별 매출액
In [112]:
# 2016-01-01 ~ 2016-03-31 기간 동안 일별 매출액
daily_sales = sales.groupby('OrderDate', as_index = False)[['Amt']].sum()
# 방법1
daily_sales = sales.loc[sales['OrderDate'] < '2016-04-01']
daily_sales
# 방법2
daily_sales = sales.loc[sales['Orderdate'].between('2016-01-01', '2016-03-31')]
daily_sales
Out[112]:
"daily_sales = sales.loc[sales['Orderdate'].between('2016-01-01', '2016-03-31')]\ndaily_sales"
In [114]:
daily_sales
Out[114]:
OrderIDSeqOrderDateProductIDQtyAmtCustomerID01234...1406914070140711407214073
107 | 2 | 2016-01-02 | p1036481 | 2 | 2100 | c150417 |
69 | 1 | 2016-01-02 | p1152861 | 1 | 1091 | c212716 |
69 | 7 | 2016-01-02 | p1013161 | 1 | 2600 | c212716 |
69 | 8 | 2016-01-02 | p1005771 | 1 | 1650 | c212716 |
69 | 11 | 2016-01-02 | p1089531 | 1 | 2600 | c212716 |
... | ... | ... | ... | ... | ... | ... |
63 | 2 | 2016-03-31 | p1175481 | 1 | 1300 | c304973 |
67 | 1 | 2016-03-31 | p1178011 | 1 | 8800 | c115575 |
67 | 2 | 2016-03-31 | p1162631 | 1 | 4600 | c115575 |
67 | 3 | 2016-03-31 | p1002841 | 1 | 11000 | c115575 |
71 | 1 | 2016-03-31 | p1178011 | 1 | 8800 | c222420 |
13954 rows × 7 columns
2. 월별 매출액
In [123]:
sales['Year'] = sales['OrderDate'].dt.year
sales['Month'] = sales['OrderDate'].dt.month
sales.head()
Out[123]:
OrderIDSeqOrderDateProductIDQtyAmtCustomerIDYearMonth01234
107 | 2 | 2016-01-02 | p1036481 | 2 | 2100 | c150417 | 2016 | 1 |
69 | 1 | 2016-01-02 | p1152861 | 1 | 1091 | c212716 | 2016 | 1 |
69 | 7 | 2016-01-02 | p1013161 | 1 | 2600 | c212716 | 2016 | 1 |
69 | 8 | 2016-01-02 | p1005771 | 1 | 1650 | c212716 | 2016 | 1 |
69 | 11 | 2016-01-02 | p1089531 | 1 | 2600 | c212716 | 2016 | 1 |
In [125]:
monthly_sales = sales.groupby(['Year', 'Month'], as_index =False)[['Amt']].sum()
monthly_sales
Out[125]:
YearMonthAmt01234567891011121314
2016 | 1 | 14369524 |
2016 | 2 | 14544225 |
2016 | 3 | 17237214 |
2016 | 4 | 15332449 |
2016 | 5 | 14085203 |
2016 | 6 | 13286267 |
2016 | 7 | 10829941 |
2016 | 8 | 10317928 |
2016 | 9 | 10716686 |
2016 | 10 | 10097162 |
2016 | 11 | 9974754 |
2016 | 12 | 13181366 |
2017 | 1 | 14952173 |
2017 | 2 | 12730328 |
2017 | 3 | 15786274 |
3. 요일별 매출액 평균
In [139]:
# datetime 형 자료에 dt.day_name() 함수
sales['Weekday'] = sales['OrderDate'].dt.day_name()
weekday_sales = sales.groupby('Weekday', as_index = False)[['Amt']].mean()
# 보기 좋게 소수점 둘째까지 표기
weekday_sales['Amt'] = round(weekday_sales['Amt'],2)
weekday_sales
Out[139]:
WeekdayAmt0123456
Friday | 2884.69 |
Monday | 2813.86 |
Saturday | 2915.48 |
Sunday | 2855.51 |
Thursday | 2812.30 |
Tuesday | 2809.22 |
Wednesday | 2852.96 |
4. 일별 고객 1인당 평균 구매액
In [158]:
# 일별 고객 당 구매액 합 -> 날짜별 Amt 평균
order_amt = sales.groupby(['OrderDate', 'CustomerID'], as_index = False)[['Amt']].sum()
order_amt
order_amt2 = order_amt.groupby('OrderDate', as_index = False)[['Amt']].mean()
order_amt2
order_amt2['Amt'] = round(order_amt2['Amt'],2)
order_amt2
Out[158]:
OrderDateAmt01234...442443444445446
2016-01-02 | 7624.76 |
2016-01-03 | 5415.44 |
2016-01-04 | 6716.14 |
2016-01-05 | 6608.37 |
2016-01-06 | 6844.85 |
... | ... |
2017-03-27 | 5720.45 |
2017-03-28 | 6382.09 |
2017-03-29 | 5868.83 |
2017-03-30 | 5679.09 |
2017-03-31 | 5972.75 |
447 rows × 2 columns
5. 일별 방문 수
In [161]:
daily_visit = sales.groupby('OrderDate', as_index = False)[['CustomerID']].count()
daily_visit
Out[161]:
OrderDateCustomerID01234...442443444445446
2016-01-02 | 169 |
2016-01-03 | 75 |
2016-01-04 | 203 |
2016-01-05 | 152 |
2016-01-06 | 143 |
... | ... |
2017-03-27 | 174 |
2017-03-28 | 197 |
2017-03-29 | 142 |
2017-03-30 | 147 |
2017-03-31 | 157 |
447 rows × 2 columns
In [163]:
# order_amt2에 daily_visit 합치기 (OrderDate 기준)
order_amt2 = pd.merge(order_amt2, daily_visit, how = 'inner', on = 'OrderDate')
order_amt2
Out[163]:
OrderDateAmtCustomerID01234...442443444445446
2016-01-02 | 7624.76 | 169 |
2016-01-03 | 5415.44 | 75 |
2016-01-04 | 6716.14 | 203 |
2016-01-05 | 6608.37 | 152 |
2016-01-06 | 6844.85 | 143 |
... | ... | ... |
2017-03-27 | 5720.45 | 174 |
2017-03-28 | 6382.09 | 197 |
2017-03-29 | 5868.83 | 142 |
2017-03-30 | 5679.09 | 147 |
2017-03-31 | 5972.75 | 157 |
447 rows × 3 columns
6. 매출 상위 top10 상품
In [167]:
sales
Out[167]:
OrderIDSeqOrderDateProductIDQtyAmtCustomerIDYearMonthDayWeekday01234...7000770008700097001070011
107 | 2 | 2016-01-02 | p1036481 | 2 | 2100 | c150417 | 2016 | 1 | 2 | Saturday |
69 | 1 | 2016-01-02 | p1152861 | 1 | 1091 | c212716 | 2016 | 1 | 2 | Saturday |
69 | 7 | 2016-01-02 | p1013161 | 1 | 2600 | c212716 | 2016 | 1 | 2 | Saturday |
69 | 8 | 2016-01-02 | p1005771 | 1 | 1650 | c212716 | 2016 | 1 | 2 | Saturday |
69 | 11 | 2016-01-02 | p1089531 | 1 | 2600 | c212716 | 2016 | 1 | 2 | Saturday |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
53 | 4 | 2017-03-31 | p1072601 | 1 | 4600 | c337999 | 2017 | 3 | 31 | Friday |
53 | 6 | 2017-03-31 | p1178011 | 1 | 8800 | c337999 | 2017 | 3 | 31 | Friday |
55 | 6 | 2017-03-31 | p1054261 | 1 | 2091 | c088320 | 2017 | 3 | 31 | Friday |
59 | 4 | 2017-03-31 | p1175481 | 1 | 1300 | c238056 | 2017 | 3 | 31 | Friday |
59 | 5 | 2017-03-31 | p1013161 | 1 | 2950 | c238056 | 2017 | 3 | 31 | Friday |
69323 rows × 11 columns
In [185]:
top_amt1 = pd.merge(sales, products, how = 'inner', on = 'ProductID')
top_amt2 = top_amt1.groupby('ProductName', as_index = False)[['Amt']].sum()
top10_amt = top_amt2.sort_values('Amt', ascending = False)
# 필요 없는 인덱스 제거
top10_amt.reset_index(drop = True, inplace = True)
top10_amt.head(10)
Out[185]:
ProductNameAmt0123456789
우유1000 | 18279576 |
사과_소 | 14255378 |
딸기_대 | 11130009 |
두부_대 | 10208347 |
딸기_소 | 7632034 |
두부_소 | 7384068 |
콩나물 | 7038823 |
유기농우유 | 6484073 |
토마토 | 6420841 |
참외 | 5528830 |
7. 카테고리별 매출 비중
In [194]:
cate_amt = pd.merge(sales, products, how = 'inner', on = 'ProductID')
cate_amt2 = cate_amt.groupby('Category', as_index = False)[['Amt']].sum()
cate_amt2['Rate'] = round(cate_amt2['Amt']/cate_amt2['Amt'].sum(), 2)
cate_amt2
Out[194]:
CategoryAmtRate01234
간식 | 13135643 | 0.07 |
과일 | 50449677 | 0.26 |
반찬류 | 32533741 | 0.16 |
유제품 | 45679626 | 0.23 |
채소 | 55642807 | 0.28 |
8. 요일별 매출 비중
In [205]:
day_amt = pd.merge(sales, products, how = 'inner', on = 'ProductID')
# OrderDate 자료형을 datetime으로 변환
day_amt['OrderDate'] = pd.to_datetime(day_amt['OrderDate'])
day_amt['Weekday'] = day_amt['OrderDate'].dt.day_name()
day_amt2 = day_amt.groupby('Weekday', as_index = False)[['Amt']].sum()
day_amt2['Rate'] = round(day_amt2['Amt']/day_amt2['Amt'].sum(),2)
day_amt2
Out[205]:
WeekdayAmtRate0123456
Friday | 31342112 | 0.16 |
Monday | 34925582 | 0.18 |
Saturday | 29035310 | 0.15 |
Sunday | 18537988 | 0.09 |
Thursday | 26998060 | 0.14 |
Tuesday | 28418076 | 0.14 |
Wednesday | 28184366 | 0.14 |
9. 고객 나이대
In [212]:
# 기준연도 2016
customers['Age'] = 2016- customers['BirthYear']
customers['AgeGroup'] = customers['Age']//10 *10
customers.head()
Out[212]:
CustomerIDRegisterDateGenderBirthYearAddr1Addr2AgeAgeGroup01234
c328222 | 2014-09-25 | F | 1960 | 강원도 | 원주시 | 56 | 50 |
c281448 | 2013-06-18 | F | 1974 | 강원도 | 원주시 | 42 | 40 |
c038336 | 2003-10-10 | F | 1968 | 강원도 | 춘천시 | 48 | 40 |
c084237 | 2007-03-09 | F | 1982 | 강원도 | 강릉시 | 34 | 30 |
c162600 | 2010-06-14 | F | 1978 | 강원도 | 속초시 | 38 | 30 |
고객 이탈 정의
- 대상 고객
- 2014 ~ 2016년 신규 가입 고객 이면서,
- 2016년 하반기에 한번 이상 방문한 고객이 대상 고객입니다.
- 위 대상 고객 중, 2017년 1~3월(3개월)동안 방문(구매)하지 않은 사람은 이탈로 간주합니다.
In [270]:
# OrderDate의 자료형 Datetime으로 변경
customers['RegisterDate'] = pd.to_datetime(customers['RegisterDate'])
sales['OrderDate'] = pd.to_datetime(sales['OrderDate'])
# 2014 ~ 2016 기간 내 신규 가입고객
cust01 = customers.loc[customers['RegisterDate'].between('2014-01-01', '2016-12-31')]
# 2016년 하반기 방문 고객
cust02 = sales.loc[sales['OrderDate'].between('2016-07-01', '2016-12-31')]
# 3개월 동안 구매한 사람
cust03 = sales.loc[sales['OrderDate'].between('2017-01-03', '2017-03-31')]
cust03['churn'] = 0
C:\Users\User\AppData\Local\Temp\ipykernel_22552\3707145440.py:13: SettingWithCopyWarning:
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
cust03['churn'] = 0
In [272]:
# drop_duplicates 함수로 중복된 고객 제거
cust02 = cust02.drop_duplicates(subset = 'CustomerID', keep = 'first', inplace = False, ignore_index = True)
cust03 = cust03.drop_duplicates(subset = 'CustomerID', keep = 'first', inplace = False, ignore_index = True)
In [276]:
cust_churn0 = pd.merge(cust01, cust02, how ='inner', on = 'CustomerID')
# NaN가 이탈자를 의미하도록 'left'
cust_churn0 = pd.merge(cust_churn0, cust03, how = 'left', on = 'CustomerID')
cust_churn0
Out[276]:
CustomerIDRegisterDateGenderBirthYearAddr1Addr2AgeAgeGroupOrderID_xSeq_x...Seq_yOrderDate_yProductID_yQty_yAmt_yYear_yMonth_yDay_yWeekday_ychurn01234...541542543544545
c328222 | 2014-09-25 | F | 1960 | 강원도 | 원주시 | 56 | 50 | 99 | 1 | ... | 1.0 | 2017-01-04 | p1255281 | 1.0 | 1182.0 | 2017.0 | 1.0 | 4.0 | Wednesday | 0.0 |
c354310 | 2015-07-16 | F | 1964 | 강원도 | 원주시 | 52 | 50 | 181 | 3 | ... | NaN | NaT | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
c390828 | 2016-10-15 | F | 1984 | 강원도 | 원주시 | 32 | 30 | 49 | 14 | ... | NaN | NaT | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
c386399 | 2016-08-18 | F | 1972 | 강원도 | 원주시 | 44 | 40 | 121 | 4 | ... | NaN | NaT | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
c367116 | 2015-12-29 | F | 1970 | 강원도 | 원주시 | 46 | 40 | 169 | 3 | ... | 2.0 | 2017-02-17 | p1005891 | 1.0 | 1950.0 | 2017.0 | 2.0 | 17.0 | Friday | 0.0 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
c341674 | 2015-02-28 | M | 1973 | 인천광역시 | 중구 | 43 | 40 | 47 | 7 | ... | 1.0 | 2017-01-05 | p1149391 | 1.0 | 818.0 | 2017.0 | 1.0 | 5.0 | Thursday | 0.0 |
c316736 | 2014-05-17 | F | 1976 | 인천광역시 | 중구 | 40 | 40 | 193 | 3 | ... | 1.0 | 2017-02-02 | p1005771 | 1.0 | 1665.0 | 2017.0 | 2.0 | 2.0 | Thursday | 0.0 |
c310508 | 2014-03-12 | F | 1981 | 전라남도 | 해남군 | 35 | 30 | 155 | 1 | ... | 2.0 | 2017-02-21 | p1012811 | 1.0 | 3250.0 | 2017.0 | 2.0 | 21.0 | Tuesday | 0.0 |
c352708 | 2015-06-26 | F | 1980 | 충청남도 | 당진시 | 36 | 30 | 125 | 3 | ... | 4.0 | 2017-01-04 | p1235451 | 1.0 | 1227.0 | 2017.0 | 1.0 | 4.0 | Wednesday | 0.0 |
c395061 | 2016-12-08 | F | 1958 | 충청북도 | 청원군 | 58 | 50 | 79 | 1 | ... | 1.0 | 2017-01-13 | p1194801 | 1.0 | 8400.0 | 2017.0 | 1.0 | 13.0 | Friday | 0.0 |
546 rows × 29 columns
In [282]:
cust_churn0['churn'].isnull().sum()
cust_churn0.fillna(1, inplace = True)
C:\Users\User\AppData\Local\Temp\ipykernel_22552\467377577.py:2: FutureWarning: Setting an item of incompatible dtype is deprecated and will raise an error in a future version of pandas. Value '1' has dtype incompatible with datetime64[ns], please explicitly cast to a compatible dtype first.
cust_churn0.fillna(1, inplace = True)
In [289]:
cust_churn0['churn'].astype(int)
cust_churn0['churn'].value_counts()
Out[289]:
churn
0.0 362
1.0 184
Name: count, dtype: int64