Sunteți pe pagina 1din 4

Customer Behavior Analytics

Zakarie Hashi
12 October, 2019

Problem Statement
This is a public dataset from IBM Watson Analytics. They want to understand customer demographics
and buying behavior and identify the most profitable customers and how they interact.

Executive Summary
I have identifield two customers segments :-

• Renewal offer type and employment status are correlated to success of engagement rate.
• A strategy targeting customers that have been retired and have those with offer2 renewal type yields
28% & 22% respectively with positive outcomes (engagement) vs 8% otherwise.

Analysis

Customer marketing strategy analysis


Correlation to Engagement response rate
employment_status Unemployed Employed Medical.Leave Retired
renew_offer_type Offer3 Disabled Offer1 Offer2
Urban
location_code Offer4 Rural Married Suburban
Call.Center Single
marital_status
Branch Web Divorced
sales_chanel
bin4 bin2 bin3 Agent
total_claim_amount bin1
bin2 bin1 bin3
months_since_policy_inception
bin4 Medsize Large
vehicle_size
Small bin2 bin3 bin1
number_of_policies
bin3 bin1 bin2 bin4
customer_life_time
bin4 bin2 bin1 bin3 bin1
monthly_since_last_claim Two.Door.Car
bin2 bin4 bin3 SUV
monthly_premium_auto Four.Door.Car Luxury.SUV Sports.Car Special.L1
vehicle_class Corporate.L2 Luxury.Car
Personal.L1 Special.L3
policy Corporate.L1
Personal.L3 Special.L2 Master Corporate.L3
education
Personal.L2 Bachelor College Doctor
policy_type
High.School.or.Below
Personal.Auto bin1 Special.Auto
income
Corporate.Auto bin3 bin2 Oregon
state
Nevada Arizona California
gender Washington M
F Basic
coverage Extended Premium
−0.4 −0.2 0.0 0.2 0.4
Correlation to Engagement rate

1
Customer Segmentation Marketing Strategy
Targeting customers that have been retired or those with offer2 renewal type
100%
n: 5523
prop: 92.0%
n: 2225
75% prop: 78.1% n: 204
prop: 72.3%
Percent

50%

n: 78
25% n: 624 prop: 27.7%
prop: 21.9%
n: 480
prop: 8.0%
0%

High Potential(Offer2) Highly Potential (Retired) Normal


Critical

response No Yes

Future Work
Build predictive analytics to analyze the most profitable customers.

Reproducibe Code
Here are the codes I used to to produce these high quality visualizations.

• load the data


• prepare the data
• feature engineering
• correlation analysis
• visualizations

If you are wondering what are bins in the first graph, I grouobed numerical features on 4 quadrants based
on the quantiles.
Get the data from kaggle: https://www.kaggle.com/pankajjsh06/ibm-watson-marketing-customer-value-data

#-----------Step 0 : libraries
#
# library(data.table)

2
# library(tidyverse)
# library(tidyquant)
# library(recipes)
# library(mlbench)
#----------- step 1 : get the data
#
# data_tbl <- read.csv('marketing_customer_value.csv')
#
# ## just renaming the data
# names_data <- c("customer","state","customer_life_time","response","coverage",
# "education","effective_to_date", "employment_status","gender",
# "income","location_code","marital_status","monthly_premium_auto",
# "monthly_since_last_claim","months_since_policy_inception",
# "number_of_open_complaints","number_of_policies","policy_type",
# "policy","renew_offer_type","sales_chanel","total_claim_amount",
# "vehicle_class","vehicle_size")
# names(data_tbl) <- names_data
#
#-----------step 2 : prepare data
# recipe_obj <- recipe(~., data = data_tbl) %>%
# # remove ID
# step_rm(customer,effective_to_date) %>%
# # Convert all vars into binary or numeric
# step_discretize(all_numeric(), options = list(min_unique = 1))%>%
# # one hot encoding
# step_dummy(all_nominal(),one_hot = TRUE, naming = partial(dummy_names,sep = '__'))%>%
# prep()
#
# data_transformed_tbl <- data_tbl %>%
# recipes::bake(recipe_obj,new_data=.)
#
#
#----------- step3: analysis
# # correlation analysis
# correlate_tbl <- data_transformed_tbl %>%
# cor(y = data_transformed_tbl$response__Yes) %>%
# as_tibble(rownames = 'feature') %>%
# rename(response__Yes=V1) %>%
# separate(feature, into = c('feature','bin'),sep = '__') %>%
# filter(!is.na(response__Yes)) %>%
# filter(!str_detect(feature,'response' )) %>%
# arrange(abs(response__Yes) %>% desc ()) %>%
# mutate(feature = as_factor(feature) %>% fct_rev())
#
# # visualize the correlation
#
# correlate_tbl %>%
# p1 = ggplot(aes(response__Yes, y = feature, text=bin)) +
#
# # geometries
# geom_vline(xintercept = 0, linetype = 2, color = 'red') +
# geom_vline(xintercept = 0.15, linetype = 2, color = 'green', size = 1.1) +
# geom_point(color = '#2c3e50') +

3
# ggrepel::geom_text_repel(aes(label = bin), size = 3, color = '#2c3e50')+
# # formatting
# expand_limits(x = c(-0.4,0.4)) +
# theme_tq() +
# labs(title = 'Customer marketing strategy analysis',
# subtitle = 'Correlation to Engagement response rate',
# y = "", x = 'Correlation to Engagement rate' )
#
# # interpret the correlation - what are the bins
# bins_tbl <- recipe_obj %>% tidy(2)
#
#-----------Step 5 STRATEGY ----
# # - FOCUS ON DURATION BIN4 and POUTCOME == success
#
# strategy_tbl <- data_tbl%>%
# dplyr::select(employment_status,renew_offer_type, response) %>%
# dplyr::mutate(Critical = case_when(
# employment_status =="Retired" ~"Highly Potential (Retired)",
# renew_offer_type == "Offer2" ~ "High Potential(Offer2)",
# TRUE ~ "Normal"
# )) %>%
# group_by(Critical) %>%
# count(response) %>%
# mutate(prop = n / sum(n)) %>%
# ungroup() %>%
# mutate(label_text = str_glue("n: {n}
# prop: {scales::percent(prop)}"))
#
#
#-----------Step 6 REPORT RESULTS ----
#
# p2 = strategy_tbl %>%
# ggplot(aes(Critical, prop, fill = response)) +
# geom_col() +
# geom_label(aes(label = label_text), fill = "white", color = "#2c3e50") +
# scale_fill_tq() +
# scale_y_continuous(labels = scales::percent_format()) +
# theme_tq() +
# labs(y = 'Percent',title = "Customer Segmentation Marketing Strategy",
# subtitle = str_glue("Targeting customers that have been retired or those with offer2 renewal ty
#
# )

S-ar putea să vă placă și