Sunteți pe pagina 1din 4

##Pasul 0: Curatatea consolei de lucru

cat("\014")
###################################################################################
###################################

##Pasul 1: Instalarea librariilor cu functii ce urmeaza a fi apelate in scopul


constructiei scorecard-ului
library("smbinning")
library("tcltk")
library("ROCR")
library("Hmisc")
###################################################################################
###################################

##Pasul 2: Citirea Datelor


data <- read.delim("C:/Users/Alexia/Desktop/scoring/data.txt")
###################################################################################
###################################

##Pasul 3: Structurarea datelor in grupuri si calculul indicatorilor WoE si IV

# a Comportament_anterior:
result0.Comportament_anterior=smbinning.factor(df=data,y="Status_Bun",x="Comportame
nt_anterior")
View(result0.Comportament_anterior$ivtable)

# Vom avea urmatoarea clasificare a variabilelor ("Excelent"=_2,"Foarte


Bun"=_1,"Foarte Slab"=_4,"Mediu"=_0
levels(data$Comportament_anterior)<-c("_2","_1","_4","_0","_3")
data$Comportament_anterior=
factor(data$Comportament_anterior,levels=c("_0","_1","_2","_3","_4"))

result.Comportament_anterior=smbinning.factor(df=data,y="Status_Bun",x="Comportamen
t_anterior")
View(result.Comportament_anterior$ivtable)

# b Ocupatie:
result0.Ocupatie=smbinning.factor(df=data,y="Status_Bun",x="Ocupatie")
View(result0.Ocupatie$ivtable)

levels(data$Ocupatie)<-c("_0","_2","_3","_1")
data$Ocupatie= factor(data$Ocupatie,levels=c("_0","_1","_2","_3"))

result.Ocupatie=smbinning.factor(df=data,y="Status_Bun",x="Ocupatie")
View(result.Ocupatie$ivtable)

# c Rezidenta:
result0.Rezidenta=smbinning.factor(df=data,y="Status_Bun",x="Rezidenta")
View(result0.Rezidenta$ivtable)

levels(data$Rezidenta)<-c("_2","_1","_0")
data$Rezidenta= factor(data$Rezidenta,levels=c("_0","_1","_2"))

result.Rezidenta=smbinning.factor(df=data,y="Status_Bun",x="Rezidenta")
View(result.Rezidenta$ivtable)
# d Sex:
result0.Sex=smbinning.factor(df=data,y="Status_Bun",x="Sex")
View(result0.Sex$ivtable)

levels(data$Sex)<-c("_2","_1","_4","_0","_3")
data$Sex= factor(data$Sex,levels=c("_0","_1"))

result.Sex=smbinning.factor(df=data,y="Status_Bun",x="Sex")
View(result.Sex$ivtable)

# e vechime :
result0.Vechime_in_raport_cu_banca=smbinning.factor(df=data,y="Status_Bun",x="Vechi
me_in_raport_cu_banca")
View(result0.Vechime_in_raport_cu_banca$ivtable)
levels(data$Vechime_in_raport_cu_banca)<-c("_2","_1","_0","_3")
data$Vechime_in_raport_cu_banca= factor(data$Sex,levels=c("_0","_1","_2","_3"))
result.Vechime_in_raport_cu_banca=smbinning.factor(df=data,y="Status_Bun",x="Vechim
e_in_raport_cu_banca")
View(result.Vechime_in_raport_cu_banca$ivtable)

# f Suma_datorata:
Suma_datorata20p=quantile(data$Suma_datorata, probs=seq(0,1,0.2), na.rm=TRUE)
Suma_datorata20p.Breaks=as.vector(Suma_datorata20p)
Cuts.Suma_datorata20p=Suma_datorata20p.Breaks[2:(length(Suma_datorata20p.Breaks)-
1)]
result_Suma_datorata=smbinning.custom(df=data,y="Status_Bun",x="Suma_datorata",cuts
=Cuts.Suma_datorata20p)
View(result_Suma_datorata$ivtable)

data$Suma_datorata<-as.factor(
ifelse(data$Suma_datorata<=1400,'0-1400',
ifelse(data$Suma_datorata<=3000,'1401-3000',
ifelse(data$Suma_datorata<=4000,'3001-4000',
ifelse(data$Suma_datorata<=6500,'4001-6500','6501+'
)))))
result0.Suma_datorata=smbinning.factor(df=data,y="Status_Bun",x="Suma_datorata")
View(result0.Suma_datorata$ivtable)

levels(data$Suma_datorata)<-c("_0","_0","_0","_1","_2")
data$Suma_datorata= factor(data$Suma_datorata,levels=c("_0","_1","_2"))

result.Suma_datorata=smbinning.factor(df=data,y="Status_Bun",x="Suma_datorata")
View(result.Suma_datorata$ivtable)

# g Varsta:
data$Varsta<-as.factor(
ifelse(data$Varsta<=35,'18-35',
ifelse(data$Varsta<=45,'36-45',
ifelse(data$Varsta<=55,'46-55','55+'
))))
result0.Varsta=smbinning.factor(df=data,y="Status_Bun",x="Varsta")
View(result0.Varsta$ivtable)

levels(data$Varsta)<-c("_0","_0","_0","_1","_2")
data$Varsta= factor(data$Varsta,levels=c("_0","_1","_2"))

result.Varsta=smbinning.factor(df=data,y="Status_Bun",x="Varsta")
View(result.Varsta$ivtable)

##Pasul 4: Rularea regresiei logistice


Rezultat=glm(cbind(Status_Bun,Status_Rau)~
Comportament_anterior
+Sex
+Ocupatie
+Rezidenta
+Suma_datorata
+Varsta
,data=data,family=binomial(link = "logit"))
summary(Rezultat)
###################################################################################
###################################

##Pasul 5: Compararea semnelor coeficientilor si a indicatorilor WoE pentru fiecare


grupa in parte
Rezultat1=na.omit(rbind(
result.Comportament_anterior$ivtable
,result.Sex$ivtable
,result.Ocupatie$ivtable
,result.Rezidenta$ivtable
,result.Suma_datorata$ivtable
,result.Varsta$ivtable))
A=Rezultat1[!Rezultat1$Cutpoint == "= '_0'", ];
B=as.matrix(Rezultat$coefficients)
B=B[-1,]
B=transform(`B`)
B$WoE=A$WoE
B$IV=A$IV
colnames(B) <- c("Estimates","WoE","IV")
View(B)
###################################################################################
###################################

#Pasul 6: Analiza performantei pe baza de AUC, GINI, KS


# Pasul 6.1: Plierea modelului pe date
data$value<-predict(Rezultat,type='response')
#Pasul 6.2: Cuantificare probabilitatii de default pentru fiecare client in parte
data$PD=1-data$value
#Pasul 6.3: Cuantificarea AUC
pred_d<-prediction(data$value,data$Status_Bun)
perf_d <- performance(pred_d,"auc")
auc_d=unlist(slot(perf_d, 'y.values'))
#Pasul 6.4: Calculul coeficientului GINI
GINI=2*auc_d-1
GINI
perf1_d <- performance(pred_d,"tpr","fpr")
#Pasul 6.5: Calculul coeficientului KS
KS=max(attr(perf1_d,'y.values')[[1]]-attr(perf1_d,'x.values')[[1]])
KS
# Excel:
write.csv(data,"C:/Users/Alexia/Desktop/scoring/data_fin.csv")

S-ar putea să vă placă și