Importation des données

Analyse univariée (pas dans le rapport).

D <- 1:ncol(data)
options(digits=1)
Hmisc::describe(D)

## D 
##        n  missing distinct     Info     Mean      Gmd      .05      .10 
##       10        0       10        1      5.5    3.667     1.45     1.90 
##      .25      .50      .75      .90      .95 
##     3.25     5.50     7.75     9.10     9.55 
## 
## lowest :  1  2  3  4  5, highest:  6  7  8  9 10
##                                                   
## Value        1   2   3   4   5   6   7   8   9  10
## Frequency    1   1   1   1   1   1   1   1   1   1
## Proportion 0.1 0.1 0.1 0.1 0.1 0.1 0.1 0.1 0.1 0.1

par(mfrow=c(2,5))
for (j in D) {
  hist(data[,j],col="blue", xlab = names(data)[j],main = "")
}

A travers ce grapgique on peut voir que certaines variables suivent une loi normal.

Analyse bivariée

Relation des prédicteurs entre eux.

entrees <- as.data.frame(data[,-1])
cormat <- round(cor(data),2)
get_upper_tri <- function(cormat){
  cormat[lower.tri(cormat)]<- NA
  return(cormat)
}
upper_tri <- get_upper_tri(cormat)
reorder_cormat <- function(cormat){
  # Utiliser la corrélation entre les variables
  # comme mésure de distance
  dd <- as.dist((1-cormat)/2)
  hc <- hclust(dd)
  cormat <-cormat[hc$order, hc$order]
}
# Reordonner la matrice de corrélation
cormat <- reorder_cormat(cormat)
upper_tri <- get_upper_tri(cormat)
# Fondre la matrice de corrélation
melted_cormat <- melt(upper_tri, na.rm = TRUE)
# Créer un ggheatmap
ggheatmap <- ggplot(melted_cormat, aes(Var2, Var1, fill = value))+
  geom_tile(color = "white")+
  scale_fill_gradient2(low = "blue", high = "red", mid = "white", 
                       midpoint = 0, limit = c(-1,1), space = "Lab",
                       name="Pearson\nCorrelation") +
  theme_minimal()+ # minimal theme
  theme(axis.text.x = element_text(angle = 45, vjust = 1, 
                                   size = 12, hjust = 1))+
  coord_fixed()+ 
  geom_text(aes(Var2, Var1, label = value), color = "black", size = 4) +
  theme(
    axis.title.x = element_blank(),
    axis.title.y = element_blank(),
    panel.grid.major = element_blank(),
    panel.border = element_blank(),
    panel.background = element_blank(),
    axis.ticks = element_blank(),
    legend.justification = c(1, 0),
    legend.position = c(0.6, 0.7),
    legend.direction = "horizontal")+
  guides(fill = guide_colorbar(barwidth = 7, barheight = 1,
                               title.position = "top", title.hjust = 0.5))
print(ggheatmap)

Commentaire dans le rapport.

lien entre chaque prédicteur et la variable réponse

A <- ggplot(data = data, aes(x = T, y = TDM)) + geom_point() + geom_smooth(formula = y ~ x, method = "lm", se = FALSE)
B <- ggplot(data = data, aes(x = HR, y = TDM)) + geom_point() + geom_smooth(formula = y ~ x, method = "lm", se = FALSE)
C <- ggplot(data = data, aes(x = PR, y = TDM)) + geom_point() + geom_smooth(formula = y ~ x, method = "lm", se = FALSE)
D <- ggplot(data = data, aes(x = CL, y = TDM)) + geom_point() + geom_smooth(formula = y ~ x, method = "lm", se = FALSE)
E <- ggplot(data = data, aes(x = PD, y = TDM)) + geom_point() + geom_smooth(formula = y ~ x, method = "lm", se = FALSE)
F <- ggplot(data = data, aes(x = MA, y = TDM)) + geom_point() + geom_smooth(formula = y ~ x, method = "lm", se = FALSE)
G <- ggplot(data = data, aes(x = SI, y = TDM)) + geom_point() + geom_smooth(formula = y ~ x, method = "lm", se = FALSE)
H <- ggplot(data = data, aes(x = FM, y = TDM)) + geom_point() + geom_smooth(formula = y ~ x, method = "lm", se = FALSE)
I <- ggplot(data = data, aes(x = SH, y = TDM)) + geom_point() + geom_smooth(formula = y ~ x, method = "lm", se = FALSE)
grid.arrange(A, B, C, D, E, F, G, H, I, ncol=3, nrow = 3)

Commentaire dans le rapport.

cor.test(data$T, data$TDM, method = "pearson")

## 
##  Pearson's product-moment correlation
## 
## data:  data$T and data$TDM
## t = -4, df = 53, p-value = 2e-04
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.7 -0.3
## sample estimates:
##  cor 
## -0.5

Standardisation des données.

min <- apply(data,2,min)
max <- apply(data,2,max)
data_stand <- sweep(data,2,STATS=min,FUN="-")
s <- max-min
data_stand <- sweep(data_stand, 2, s, FUN="/")

head(data_stand)

##   TDM   T  HR  PR  CL  PD  MA  SI  FM   SH
## 1 0.3 0.4 0.3 0.2 0.0 0.2 0.3 0.4 0.5 0.12
## 2 0.0 1.0 0.5 0.5 0.2 0.5 0.0 0.4 0.5 0.27
## 3 0.3 0.5 0.6 0.4 0.3 0.7 0.3 0.8 0.9 0.49
## 4 0.3 0.2 0.6 0.4 0.2 0.2 0.4 0.5 0.7 0.08
## 5 0.2 0.7 0.5 0.7 0.4 0.4 0.3 0.8 0.5 0.41
## 6 0.2 0.6 0.4 0.2 0.0 0.4 0.6 0.7 0.4 0.30

Les observations sont bien comprises entre 0 et 1.

Base de fonctions des polynômes d’ordre 3.

data_stand_multi <- data_stand %>%
  mutate( T_2=T**2, T_3=T**3, HR_2=HR**2, HR_3=HR**3, PR_2=PR**2, PR_3=PR**3, CL_2=CL**2, CL_3=CL**3, PD_2=PD**2, PD_3=PD**3, MA_2=MA**2, MA_3=MA**3, SI_2=SI**2, SI_3=SI**3, FM_2=FM**2, FM_3=FM**3, SH_2=SH**2, SH_3=SH**3)

Analyse multivariée

Comparaison des critères pour la sélection de tous les sous-ensembles.

require(MASS)
B = 30
exhau.AIC <- rep(NA,B)
exhau.R2a <- rep(NA,B)
exhau.Pv <- rep(NA,B)
forw.AIC <- rep(NA,B)
forw.R2a <- rep(NA,B)
forw.Pv <- rep(NA,B)
back.AIC <- rep(NA,B)
back.R2a <- rep(NA,B)
back.Pv <- rep(NA,B)
set.seed(123)
for(i in 1:B){
  
  # Partition aléatoire des données : 2/3 pour l’apprentissage et 1/3 pour le test.
  n <- nrow(data_stand_multi)
  tr <- sample(1:n,((55*2)/3))
  train <- data_stand_multi[tr,]
  test <- data_stand_multi[-tr,]
  Xtrain <- train[,-1]
  Ytrain <- train[,1]
  Xtest <- test[,-1]
  Ytest <- test[,1]
  
  # Sélection de tous les sous-ensembles
  kmax=27
  grille_mod <-1:kmax 
  err_AIC_mod <- rep(NA,length(grille_mod))
  err_R2a_mod <- rep(NA,length(grille_mod))
  err_Pv_mod <- rep(NA,length(grille_mod))
  p=27
  models <- regsubsets(TDM~.,data=train,nvmax=p,method=c("exhaustive"))
  for(k in 1:length(grille_mod))
  {
    model_k <- summary(models)$which[k,-1]
    nv <- names(which(model_k == TRUE))
    nv<- paste(nv, collapse = "+")
    nv <- paste0("TDM", "~", nv)
    nv <- as.formula(nv)
    model_lineaire <- lm(nv,data=train)
    err_AIC_mod[k] <- AIC(model_lineaire)
    err_R2a_mod[k] <- summary(model_lineaire)$adj.r.squared
    err_Pv_mod[k] <- broom::glance(model_lineaire)$p.value
  }
  
  ## Critère AIC
  k_opt_AIC <- grille_mod[which.min(err_AIC_mod)]
  model_k_opt_AIC <- summary(models)$which[k_opt_AIC,-1]
  nv <- names(which(model_k_opt_AIC == TRUE))
  nv<- paste(nv, collapse = "+")
  nv <- paste0("TDM", "~", nv)
  nv <- as.formula(nv)
  model_lineaire_k_opt_AIC <- lm(nv,data=train)
  prediction_model_lineaire_k_opt_AIC <- predict(model_lineaire_k_opt_AIC,Xtest)
  exhau.AIC[i] <- sqrt(mean((Ytest - prediction_model_lineaire_k_opt_AIC)^2))
  
  ## Critère R2ajusté
  k_opt_R2a <- grille_mod[which.min(err_R2a_mod)]
  model_k_opt_R2a <- summary(models)$which[k_opt_R2a,-1]
  nv <- names(which(model_k_opt_R2a == TRUE))
  nv<- paste(nv, collapse = "+")
  nv <- paste0("TDM", "~", nv)
  nv <- as.formula(nv)
  model_lineaire_k_opt_R2a <- lm(nv,data=train)
  prediction_model_lineaire_k_opt_R2a <- predict(model_lineaire_k_opt_R2a,Xtest)
  exhau.R2a[i] <- sqrt(mean((Ytest - prediction_model_lineaire_k_opt_R2a)^2))
  
  ## Critère P.value
  k_opt_Pv <- grille_mod[which.min(err_Pv_mod)]
  model_k_opt_Pv <- summary(models)$which[k_opt_Pv,-1]
  nv <- names(which(model_k_opt_Pv == TRUE))
  nv<- paste(nv, collapse = "+")
  nv <- paste0("TDM", "~", nv)
  nv <- as.formula(nv)
  model_lineaire_k_opt_Pv <- lm(nv,data=train)
  prediction_model_lineaire_k_opt_Pv <- predict(model_lineaire_k_opt_Pv,Xtest)
  exhau.Pv[i] <- sqrt(mean((Ytest - prediction_model_lineaire_k_opt_Pv)^2))
  
  # Selection pas à pas ascendante(forward)
  nullmodel = lm(TDM~1, train)
  fullmodel = lm(TDM~., train)
  scope = list(lower=formula(nullmodel),upper=formula(fullmodel))
  fit.forw <- lm(TDM ~1, data = train)
  fit.back <- lm(TDM ~., data = train)
  
  ## Critère AIC
  fit_forw_AIC <-  SignifReg(fit.forw, scope = scope, direction = "forward", trace = FALSE, criterion = "AIC")
  pred_test_forw_AIC <- predict(fit_forw_AIC,Xtest)
  forw.AIC[i] <- sqrt(mean((Ytest -pred_test_forw_AIC)^2))

  ## Critère R2ajusté
  fit_forw_R2a <-  SignifReg(fit.forw, scope = scope, direction = "forward", trace = FALSE, criterion = "r-adj")
  pred_test_forw_R2a <- predict(fit_forw_R2a,Xtest)
  forw.R2a[i] <- sqrt(mean((Ytest -pred_test_forw_R2a)^2))
  
  ## Critère P.value
  fit_forw_Pv <-  SignifReg(fit.forw, scope = scope, direction = "forward", trace = FALSE, criterion = "p-value")
  pred_test_forw_Pv <- predict(fit_forw_Pv,Xtest)
  forw.Pv[i] <- sqrt(mean((Ytest -pred_test_forw_Pv)^2))
  
  # Selection pas à pas descendante(backward)
  
  ## Critère AIC
  fit_back_AIC <-  SignifReg(fit.back, scope = scope, direction = "backward", trace = FALSE, criterion = "AIC")
  pred_test_back_AIC <- predict(fit_back_AIC,Xtest)
  back.AIC[i] <- sqrt(mean((Ytest -pred_test_back_AIC)^2))
  
  ## Critère R2ajusté
  fit_back_R2a <-  SignifReg(fit.back, scope = scope, direction = "backward", trace = FALSE, criterion = "r-adj")
  pred_test_back_R2a <- predict(fit_back_R2a,Xtest)
  back.R2a[i] <- sqrt(mean((Ytest -pred_test_back_R2a)^2))
  
  ## Critère P.value
  fit_back_Pv <-  SignifReg(fit.back, scope = scope, direction = "backward", trace = FALSE, criterion = "p-value")
  pred_test_back_Pv <- predict(fit_back_Pv,Xtest)
  back.Pv[i] <- sqrt(mean((Ytest -pred_test_back_Pv)^2))

}

Comparaissons via la moyenne des erreurs test

mean(exhau.AIC)

## [1] 0.2

mean(forw.AIC)

## [1] 0.1

mean(back.AIC)

## [1] 0.2

mean(exhau.R2a)

## [1] 0.1

mean(forw.R2a)

## [1] 0.1

mean(back.R2a)

## [1] 0.2

mean(exhau.Pv)

## [1] 0.1

mean(forw.Pv)

## [1] 0.1

mean(back.Pv)

## [1] 0.1

par(mfrow=c(1,1))
err_test_1<-cbind(exhau.AIC, exhau.R2a, exhau.Pv, forw.AIC, forw.R2a, forw.Pv) 
boxplot(err_test_1,ylab="erreur test", col=rainbow(6))

Comparaison des 8 méthodes de sélection et taux de Faux Négatif

# Calcul pour le taux de faux négatif
X <- data_stand[,-1]
Indbeta<-matrix(0,nrow=ncol(X),ncol = 1) #Indicateur de vrais beta non nuls (dim depend de dim X)
rownames(Indbeta)<-colnames(X)
Indbeta<-rbind(0,Indbeta) #on rajoute l intercepte 
rownames(Indbeta)[1]<-"(Intercept)"  #Sans
Ind<-which(rownames(Indbeta) == "T" )
Indbeta[Ind]<-1


# Vecteur des erreurs
B = 30
exhau.Pv <- rep(NA,B)
back.Pv <- rep(NA,B)
forw.R2a <- rep(NA,B)
Lasso <- rep(NA,B)
Ridge <- rep(NA,B)
PCR <- rep(NA,B)
PLS <- rep(NA,B)
sPLS <- rep(NA,B)
FN<-matrix(NA,nrow=B,ncol=8) # false negative rate
colnames(FN)<-c("PCR", "PLS", "sPLS","Lasso","Ridge","Back.Pv","Forw.R2", "Exhau.Pv")
set.seed(123)


for(i in 1:B){
  # Partition aléatoire des données : 2/3 pour l’apprentissage et 1/3 pour le test.
  n <- nrow(data_stand_multi)
  tr <- sample(1:n,((55*2)/3))
  train <- data_stand_multi[tr,]
  test <- data_stand_multi[-tr,]
  Xtrain <- train[,-1]
  Ytrain <- train[,1]
  Xtest <- test[,-1]
  Ytest <- test[,1]
  
  # Sélection de tous les sous-ensembles
  kmax=27
  grille_mod <-1:kmax 
  err_Pv_mod <- rep(NA,length(grille_mod))
  p=27
  models <- regsubsets(TDM~.,data=train,nvmax=p,method=c("exhaustive"))
  for(k in 1:length(grille_mod))
  {
    model_k <- summary(models)$which[k,-1]
    nv <- names(which(model_k == TRUE))
    nv<- paste(nv, collapse = "+")
    nv <- paste0("TDM", "~", nv)
    nv <- as.formula(nv)
    model_lineaire <- lm(nv,data=train)
    err_Pv_mod[k] <- broom::glance(model_lineaire)$p.value
  }
  ## Critère P.value
  k_opt_Pv <- grille_mod[which.min(err_Pv_mod)]
  model_k_opt_Pv <- summary(models)$which[k_opt_Pv,-1]
  nv <- names(which(model_k_opt_Pv == TRUE))
  nv<- paste(nv, collapse = "+")
  nv <- paste0("TDM", "~", nv)
  nv <- as.formula(nv)
  model_lineaire_k_opt_Pv <- lm(nv,data=train)
  prediction_model_lineaire_k_opt_Pv <- predict(model_lineaire_k_opt_Pv,Xtest)
  exhau.Pv[i] <- sqrt(mean((Ytest - prediction_model_lineaire_k_opt_Pv)^2))
  betahat.PvS<-model_lineaire_k_opt_Pv$coefficients
  IndPvS<-which(rownames(Indbeta)%in%names(betahat.PvS))
  Indbetahat.PvS<-Indbeta
  Indbetahat.PvS[-IndPvS]<-0
  Indbetahat.PvS[IndPvS]<-1
  
  
  # Selection pas à pas 
  nullmodel = lm(TDM~1, train)
  fullmodel = lm(TDM~., train)
  scope = list(lower=formula(nullmodel),upper=formula(fullmodel))
  fit.forw <- lm(TDM ~1, data = train)
  fit.back <- lm(TDM ~., data = train)
  
  ## Ascendant avec critère R2ajusté
  fit_forw_R2a <-  SignifReg(fit.forw, scope = scope, direction = "forward", trace = FALSE, criterion = "r-adj")
  pred_test_forw_R2a <- predict(fit_forw_R2a,Xtest)
  forw.R2a[i] <- sqrt(mean((Ytest -pred_test_forw_R2a)^2))
  betahat.R2a<-fit_forw_R2a$coefficients
  IndR2a<-which(rownames(Indbeta)%in%names(betahat.R2a))
  Indbetahat.R2a<-Indbeta
  Indbetahat.R2a[-IndR2a]<-0
  Indbetahat.R2a[IndR2a]<-1
  
  ## Descendant avec Critère P.value
  fit_back_Pv <-  SignifReg(fit.back, scope = scope, direction = "backward", trace = FALSE, criterion = "p-value")
  pred_test_back_Pv <- predict(fit_back_Pv,Xtest)
  back.Pv[i] <- sqrt(mean((Ytest -pred_test_back_Pv)^2))
  betahat.PvB<-fit_back_Pv$coefficients
  IndPvB<-which(rownames(Indbeta)%in%names(betahat.PvB))
  Indbetahat.PvB<-Indbeta
  Indbetahat.PvB[-IndPvB]<-0
  Indbetahat.PvB[IndPvB]<-1

  
  # Regression Lasso
  fit.cv.lasso <- cv.glmnet(x = as.matrix(Xtrain), y = Ytrain, family= "gaussian", alpha = 1, nfold =10, 
                            standardize = FALSE, intercept = TRUE)
  lambda.opt.cvm<-fit.cv.lasso$lambda.min
  pred_test_Lasso <- predict(fit.cv.lasso, newx=as.matrix(Xtest),s=lambda.opt.cvm,type="response")
  Lasso[i] <- sqrt(mean((Ytest-pred_test_Lasso)^2))
  betahat.lasso <- predict(fit.cv.lasso,s=lambda.opt.cvm,type="coefficients")
  Indbetahat.lasso<-betahat.lasso
  Indbetahat.lasso[Indbetahat.lasso==0]<-0
  Indbetahat.lasso[Indbetahat.lasso!=0]<-1
  
  # Regression Ridge
  fit.cv.Ridge <- cv.glmnet(x = as.matrix(Xtrain), y = Ytrain, family= "gaussian", alpha = 0, nfold = 10, 
                            standardize = FALSE, intercept = TRUE)
  lambda.opt.cvm<-fit.cv.Ridge$lambda.min
  pred_test_Ridge <- predict(fit.cv.Ridge, newx=as.matrix(Xtest),s=lambda.opt.cvm,type="response")
  Ridge[i] <- sqrt(mean((Ytest-pred_test_Ridge)^2))
  betahat.Ridge <- predict(fit.cv.Ridge,s=lambda.opt.cvm,type="coefficients")
  Indbetahat.Ridge<-betahat.Ridge
  Indbetahat.Ridge[Indbetahat.Ridge==0]<-0
  Indbetahat.Ridge[Indbetahat.Ridge!=0]<-1
  
  
  # Regression PCR
  pcr.fit <- pcr(TDM ~., data=train, scale=TRUE , validation="CV", segments=10)
  k.pcr <- which.min(pcr.fit$validation$PRESS)
  pred_test.pcr <- predict(pcr.fit,as.matrix(Xtest), ncomp=k.pcr)
  PCR[i] <- sqrt(mean((Ytest-pred_test.pcr)^2))
  betahat.pcr <- as.matrix(pcr.fit$coefficients[,,k.pcr],ncol=1) #l extraction des coefs ne donne pas l intercept, il faudra le rajouter
  betahat.pcr <- rbind(predict(pcr.fit,t(as.matrix(rep(0,27))), ncomp=k.pcr),betahat.pcr) #je recupere l intercept en faisant la prediction sur un vecteur de 0s
  rownames(betahat.pcr)[1]<-"(Intercept)"
  Indbetahat.pcr<-betahat.pcr
  Indbetahat.pcr[Indbetahat.pcr==0]<-0
  Indbetahat.pcr[Indbetahat.pcr!=0]<-1
  
  # Regression PLSR
  pls.fit <- plsr(TDM ~., data=train, scale=TRUE , validation="CV", segments=10)
  k.pls <- which.min(pls.fit$validation$PRESS)
  pred_test.pls<-predict(pls.fit,as.matrix(Xtest), ncomp=k.pls)
  PLS[i] <- sqrt(mean((Ytest-pred_test.pls)^2))
  betahat.pls <- as.matrix(pls.fit$coefficients[,,k.pls],ncol=1) #l extraction des coefs ne donne pas l intercept, il faudra le rajouter
  betahat.pls<-rbind(predict(pls.fit,t(as.matrix(rep(0,27))), ncomp=k.pls),betahat.pls) #je recupere l intercept en faisant la prediction sur un vecteur de 0s
  rownames(betahat.pls)[1]<-"(Intercept)"
  Indbetahat.pls<-betahat.pls
  Indbetahat.pls[Indbetahat.pls==0]<-0
  Indbetahat.pls[Indbetahat.pls!=0]<-1
  
  
  # Regression sPLSR
  spls.cv <- cv.spls(Xtrain, Ytrain, eta = seq(0.1,0.9,0.1), K = c(1:27),plot.it= FALSE)
  spls.fit <- spls(x=Xtrain, y=Ytrain, eta = spls.cv$eta.opt, K = spls.cv$K.opt,
                   scale.x=TRUE, scale.y=FALSE, trace=FALSE)
  pred_test.spls <- predict(spls.fit,as.matrix(Xtest))
  sPLS[i] <- sqrt(mean((Ytest-pred_test.spls)^2))
  betahat.spls <-coef(spls.fit) #l extraction des coefs ne donne pas l intercept, il faut le rajouter
  betahat.spls<-rbind(predict(spls.fit,t(as.matrix(rep(0,27)))),betahat.spls) #je recupere l intercept en faisant la prediction sur un vecteur de 0s
  rownames(betahat.spls)[1]<-"(Intercept)"
  Indbetahat.spls<-betahat.spls
  Indbetahat.spls[Indbetahat.spls==0]<-0
  Indbetahat.spls[Indbetahat.spls!=0]<-1
  
  FN[i,1] <- sum(abs(Indbetahat.pcr[Ind,]-Indbeta[Ind,]))/length(Ind) 
  FN[i,2] <- sum(abs(Indbetahat.pls[Ind,]-Indbeta[Ind,]))/length(Ind) 
  FN[i,3] <- sum(abs(Indbetahat.spls[Ind,]-Indbeta[Ind,]))/length(Ind) 
  FN[i,4] <- sum(abs(Indbetahat.lasso[Ind]-Indbeta[Ind]))/length(Ind)  
  FN[i,5] <- sum(abs(Indbetahat.Ridge[Ind]-Indbeta[Ind]))/length(Ind)
  FN[i,6] <- sum(abs(Indbetahat.PvB[Ind]-Indbeta[Ind]))/length(Ind)
  FN[i,7] <- sum(abs(Indbetahat.R2a[Ind]-Indbeta[Ind]))/length(Ind) 
  FN[i,8] <- sum(abs(Indbetahat.PvS[Ind]-Indbeta[Ind]))/length(Ind)
  
  
}

Comparaison des 8 méthodes via graphique

par(mfrow=c(1,1))
err_test_2<-cbind(exhau.Pv, forw.R2a, back.Pv, Lasso, Ridge, PCR, PLS, sPLS)
boxplot(err_test_2,ylab="erreur test", col=rainbow(8))

Représentation graphique des taux de faux positifs

par(mfrow=c(1,1))
couleurs<-c("red","orange","purple","yellow","grey","blue","green","pink")
boxplot(FN,col=couleurs,main="FN rate", ylab="Taux.Faux_Négatifs", xlab= "Méthodes")

Devoir_Décembre_2021

Merveille Agbo

30/12/2021

Importation des données

Analyse univariée (pas dans le rapport).

Analyse bivariée

Relation des prédicteurs entre eux.

lien entre chaque prédicteur et la variable réponse

Standardisation des données.

Base de fonctions des polynômes d’ordre 3.

Analyse multivariée

Comparaison des critères pour la sélection de tous les sous-ensembles.

Comparaissons via la moyenne des erreurs test

Comparaison des 8 méthodes de sélection et taux de Faux Négatif

Comparaison des 8 méthodes via graphique

Représentation graphique des taux de faux positifs