Documente Academic
Documente Profesional
Documente Cultură
)7 ."::_: ":7_K:7{pP{_Q7, =
!"# $%'& )(
*,+.-0/213547683:983
;=<?>A@B<?CD<FEG;IHKJL;AM8>BCDNPOQOAJRNPS8@
T,U.VQW 3P9X?476'Y+Z-K6 U.V\[ -K6?]_^`+ZaK+ U
<8ObN5c8CEG;IHKJL;AM8>BCDNPOQOAJRNPS8@
deV\f=g 1%4 VQh
i ;Ij=k:EG;AHbJl;IM8>BCDNPOQO=JlNPS?@
T,U.VQWnm[ +po V\[
<8qsrtIkPEG;IHKJL;AM8>BCDNPOQOAJRNPS8@
uv w=xy{z{|}v ~Qz#sK|}wP5z_v y5 lv~:
vbAsKy_~PvlD~Plbv ylzK5z_:xKxPP bQ5
b5PA
Uv y_vsvKv~Pv yx7z_v5lbv yv{v
z_FKA|}55vL :{L~P"|x~\
PRIv y_v ~Qz#xLKby{lz{P|?v:{v5PwPwAKy{)z Vvz_Ky#xbPl~:v
vwPy_v{v ~Qzx|}v
z_P5pbyF K~:z{y_:
z{L~Pv~:v|G
W V_
=xy{z{l= LxKB~Pv PyxB~:v
z_%.=|Gv|}Ky_b xbv
lv,py{b|
L y_xKy{Lvz_PK={xK~:P,%|}55v
L
lL
Ll| .7xLLPxz{LK~={v
z
{v
z|G55vL,z_:x~ xbKL~P v LKQz{L~Pz_Pvz{yxL~5
L~Pv zz{npKy
v~Pv |G55vLxzz_v ~:'z{z{PQvwAKL~bz
z_:x7z}xy_v)5l Plzz_
xK_lp
Ky_y_vz_lb\PLlL7x~v zx w:y{\v5PLv
z{%LKx~:y_v)xK:lz{5vz{:z_v|Gx~55vv~:L"vxK| ly_ vLxbv 5 l~x0Kz_vPyv}xbv l~:~:{v | z_P v lvbLy
bKb \bz?pvx7z_Py{v?L~:z{vxK"K5z{yxL~Pl~:w=bl~Qz F#y_y{by
Ky_y{vz_l~PKb5z{w:5z
55v."#ZuLv
z{z{v y_Gx\Ly{ YxKl:L7LL|~Pxbv v~=xKvP| P l~PLv}x|GRz_55v
vPI z{v Llz{vP~bvzGvw=~:vyvpb| y{ |Lv xK~:0
v a z{ .xb zv
KQKB y{vx7z{v?|}55vL8Rz_GPv
by{y_v x7z_vvy{y_Ky z{yxL~5
L~P|}\Pv pby|%Plz{l Lxb{,wPy_ Lv |eK~5lAvy{v~bz5l pbPK~:y_v 7Klv~yF|}z_Pl~\v5z_{vv Lvpz_y_lKb| ~nw:L y{ 5y_
xKvy{5LvPy_vexKlz{lL 7GbK:'Gz{|}5b5w5vz{Ll
:z{b|}lv :~Py{x~=xy_|x~Qz{nbKQ% y{vx7z{v5l |}L vz_Pvv~:{v | lvz_x~\vxbLl
b|}wP5z{vw=vypby{|xK~:
v
bv yv~PvPy_xKB~Pv
z\Lx} K|}w=v z{lz{LK~x|}b~P~:\Pv
|}v z{y_LKvGv 7xL:x7z_vz_PvwAv y{pKy_|x~: v%K#v ~:{v | lv}{v
s+-Q56
879,:
! ;=
<>,?@A:
BF !D#C2"%A:$EF& ')Q(0 FQ* +A@-G,H0.H.ID
JK-,0EM/2L13@ N
40+O@+P 5 Lv
z{LK~nK~z_v ~wAv y{pKy_|x~: v|}v
z_y{ v v LlvKv,z{P
sz{Pv:yzz{L|}v"xLvxy_~PL~P%|}v z{P5,:xK vv ~,v 7xL:x7z_v
GH0HIRQ@N
POb
S%POLT4J
xb
y_b_:xGLPv"0xKy{Lv
zKwAv y{pKy_|x~: v|Gv z{y_L
hwdziwf%kuf%ordps+dkuxqv d@fkulnm%kZdf%g4hf{%mDk+t|s+h;osume%dc;dk+~
fs+d
%%%zorm--d{?xqf%k+dR-d0x+gk+tyjd0t|csuh;di;idcj-t2m%s+d
suhwf@s8v d-mec;msq-ds+dk+ortc;dvhwfsqiwfk4fords+dk4xWt|d{ydxs
idk+lnm%kuo]fcwgdvh;dcsukuf%t|c;tc;om--d{x{{}om--d{xf%k+d
fD;-d0s+mf{t|ywkuf%k+c;mo]fss+dkzhwm@v%mWm-m%kywf%s+h;d
f%k+dD9m--d{6i;kud;tgps+tm%cwxRmDcs+h;ds+k4ftcf%cwh;t{|{?gp{to
y;tc;
x+dps4xzf%k+d9gf%g4hwd2h;t?xzx+t|ori;{t|wdxzv mDk+Wtc;vt|s+hs+h;d9{|t|~
ywkuf%k+fcwo]fDdx]orm--d{Zxd{|d0gsut|mDclf%xs+dkeyjd0gfjxdsuh;d
orm--d{x;mcwmshwf:%dZsumydRdp-d0gp-sud-;kut|cwrx+d{dgps+tm%c=
Zq;Kq3Z3K-6
hwdx+t|ori;{dlnmDk+vfk4om--d{x+d{dgps+tm%ci;kum-gpd0-;kudi;kudp~
x+dcsudtcs+h;dcs+kumW;wgsut|mDctxelf%xs9fcjdp>dgps+t%d%yw-s
x+m%ords+tod0x)m@%dk;s4xKs+mZs+h;dhwt|{{g{|to
ywt|c;Rx+dps0%kud-wgt|cw8dc-~
x+dozy;{dZidk+lnm%kuo]fcwgd%)dRorfD-dZs+h;kuddRfD;-t|s+tm%cwx#sumrs+h;t?x
x+d{dgps+tm%ci;kumWgd-wk+ds+mkud-jgpdm@%dk;s+s+tc;wh;d0xdf%k+d
;txugpwxux+d9t|csuh;dRc;dpWssuh;k+ddxwy-~x+dgps+tm%cwxh;dx+dRordpsuh-~
m-;x6o]f:zydqwx+dplnw{jtcmsuh;dk#fi;iw{|t?gfs+tm%cwx6vh;dkudlnm%kuvfk4
xs+diWvtx+dxd{|d0gsut|mDct?xiwk+mDc;ds+mm@Ddk+;ssut|c;j#x+wg4hf%xt|c
lnd0f@su;k+dRx+d{dgps+tm%c}m%hjf:t=%mDh;c=20%%
=)3WW0r9D)w%WW>
t|s+horm--d{x+d{dgps+tm%c..w@-kudi;{?f%gdordcsidk+lnm%k+~
o]f%cwgpdrtoiwk+m@DdxZf%xZs+hwdydxsorm--d{?xRfkudfD;-d0sum9suh;d
dcwx+do
yw{|dD#idf%-xfcwsuh;dc;t?g4W{|-dg{|tc;dx)dklnmDk~
o]f%cwgpd-kum%iwxydgfwx+desuh;dydxsrorm--d{xrtcsuh;d{|ty;k4fku
hjf:%dyjddcwx+dfcjx+d{dgps+tm%co
wxsc;m@vf%;orm--d{?x
suhwf@s8hwks8s+h;ddcwx+do
yw{|dDKt%wk+d]
x+h;m@vxsuh;t?xydhwf:Wtm%k
lnmDk}k+mWms+~ord0fc-~x+wf%k+d0W~dkuk+mDkZc-lnmDksu;cwf@sud{%=ormxsRdk+~
kum%k)ordpsuk+t?gxtd{?o
wg4hy;;ori;tdk6%k4fi;hwxMsuhwfcsuh;txvh;dc
hwt|{{g{|to
ywt|c;Zt?xK-mDc;d#vtsuhzx+orf%{|{W;fsufZxdsux@orf%Wt|c;8ts-t|l.~
gp;{|ss+mrkud{tf%y;{ei;t?g4efr%mWm-xsum%i;i;tc;]im%tcs6h;dR{mDxux
tcrijdklnmDk+o]f%cwgpdgf%czydxt%cwtjgfcs)tls+h;didf%t?x)ot?xuxd02
Mt%;kud}f{?xmRx+h;m@vxKs+hwfs6xd{|d0gs+tc;}orm--d{?x..+|D4p
w=Dk+d0f@s+{ekud;wgpd0xs+h;t?x8i;k+mDy;{do-d{dgsut|mDcvtsuhkudp~
iw{fDgpdordcDsf%{|{m@vxormW;d{?x#s+m]ydf%;-d0sums+h;dRdcwxdo
y;{d
oz;{|s+ti;{|ds+tordxb8cwgpdijd0fijdklnmDk+o]f%cwgpdtx9k+d0f%g4h;d02
t|ls+hwde;cWwx+dorm--d{xf{{6hW;k+szdcwxdo
y;{didk+lnm%kuo]fcwgd%
x+d{dgps+tm%cfD;;x
om--d{x
s+hjf@szv#dkud9fD;-d0yjdlnm%kudek4f@s+hwdk
suhwfc9hW;ks#ijdklnmDk+o]f%cwgpdD)hwtx6jf@s+s+dcwx s+h;dZijdklnmDk+o]f%cwgpd
g;ku%diwfDxsqs+h;dzijd0f>f%cwf{{m@vxxd{|d0gsut|mDcs+mjc;dsu;c;d
dcwx+do
yw{|d0xqyWv dt|Dhs+tc;orm--d{?xqom--d{xZf%w-dsumesuh;d
dcwx+do
yw{|d}o
w{sut|i;{d}s+tordxkudgdt%d}orm%kud8v#dt%hs0
0.36
0.355
0.35
RMS Error
b8cedf%g4heijdklnmDk+o]fcjgpdqords+kutgqv dZgpm%oriwf%k+dqdcwx+dozy;{|d}x+dp~
{dgps+tm%cs+mrsuh;dRorm--d{Kt|cs+hwdR{|ty;k4fku]s+hwfsijdklnmDk+o]xydxs
mDc
suhwf@sords+kutg%)#d0gfjxd#v dDdc;dkufs+dxm}o]fcW
;t>dkudcs
orm--d{x{t|y;k4fkutdxKwxjf{{|
gm%csuf%t|cflndvorm--d{?x)vt|s+hrdpW~
gd{{|dcs8idk+lnm%kuorf%cwgpdzm%cf%cW9idk+lnm%kuo]fcwgdordps+kut?g}%wxs
x+d{dgps+tc;8s+h;dydxsxtc;%{d#om--d{Wlnk+mDofZ{|ty;k4fkuZWtd{?;x)kudp~
o]f%k+@fyw{|Z%mWm-}ijdklnmDk+o]f%cwgpdDKcjxdo
y;{d6x+d{dgsut|mDc=:h;m@v~
d%dkqwcw;xdcwxdo
y;{dx9suhwf@sm%;s+idk+lnm%kuos+h;dydxsx+tc-~
D{|deom--d{xh;t?x
x+;D%dxsuxs+hwfs
wx+t|cw-t|dk+dcs
{|d0fkuc-~
tc;8ods+h;m-;x)f%cwRiwf%kuf%ods+dkKx+dpssut|cwDxK%dcwdk4f@s+d0x={t|ywkuf%k+tdx
gm%csuf%t|cwt|c;]fr-t%dkux+dZx+dpsm%lKDmm-W~idk+lnm%kuotc;ormW;d{?x
0.345
0.34
0.335
0.33
0.325
0.32
50
100
150
200
>W0
! #"$%
&'(&)+*
-d{dgsut|mDcevt|s+hk+di;{?f%gpdordcs6jfssudcwx#suh;dZg;ku%d}xmzo
wg4h
suhwf@sfs+d0xsx+dpset?xecwms9cwdd;dsum-dpsudkuort|c;dvh;dcs+m
xs+mDifD;-tc;orm--d{xzs+ms+hwddcwx+do
yw{|dDh;dh;t|{{?gp{t|ozy-~
tc;x+dpsgf%cydjxd0sumxs+mDihwt|{{g{|to
ywt|c;jhwtx}ordf%cwx
dcwx+do
yw{|drxd{|d0gsut|mDc-mWd0x8cwmsRc;dd0ormDk+d
s+dxsxdsuxZsuhwfc
suh;dZywf%x+dp~{|d%d{worm--d{?x6v#m%w{]hjf:%dqjxd0rs+mrxd{|d0gs#om--d{
ijfk4fordpsudk4x36cwx+do
yw{|dx+d{dgsut|mDczjxd0xMs+h;d@f{twf@s+tm%crx+dps
sum]-m]yjm%s+hiwf%kuf%ods+dkR ,Zorm--d{Kxd{|d0gs+tm%c3
=?=)3/.00021843W65M*87Kw:9D-*j
wm%kuv#f%kurx+d{dgps+tm%cx+m%ordpsut|ordx m@%dkwsuxd0fku{|zt|cxd{|d0gs+tm%c
vhwdcdcwx+do
yw{|d0xzf%k+dxo]f{{b8cwd9vf:sumiwk+d%dcszs+h;t?x
t?x
s+mtc;t|s+t?f{<t ;dedcwxdo
y;{dxzvt|s+hormDk+dorm--d{xcwxs+d0f%
m%lxs4fk+s+tc;vtsuhfcdori-sdcwx+dozy;{d%)xmDksRsuh;dorm--d{?x
tcsuh;d
{t|ywkuf%k+yWesuh;dtk8idk+lnm%kuo]fcwgd%fcwiw-sqsuh;dzydxs
orm--d{?xrtcsuh;ddcjxdo
y;{d% =
txeg4h;mDx+dcyW{|mWmDtc;
=
fsijdklnmDk+o]f%cwgpdZm%cs+hwdRh;t|{{?gp{t|ozy;t|cwxds6h;t?xsWi;t?gf%{|{
fD;;?
x >@~@
>ml}s+hwdyjd0xs9om--d{xs+mf%cdcwx+dozy;{|B
d AuD Cp@u
Dk+dd-xsudiWvt?xd
x+d{dgps+tm%cyd%tcwxqWtcwgpdd0f%g4hm%lMsuh;d =
ydxsRormW;d{?x}ijdklnmDk+o]xZv d{|{2suh;dlnmDk+of9xs+kum%c;tc;tsutf%{
dcwx+do
yw{|dfcj
t|stx)ormDk+d-<t Egp;{|slnm%kDk+dd-x+d{dgps+tm%czs+m
jcwom--d{xs+hwfsm@Ddk+;svh;dcf%w-d9s+msuh;dRdcwx+dozy;{d%
=GF3IHKJ-Jj021ZI30WL5)3*W*j
8xs+h;dcW;ozyjdk]ml8orm--d{xrtcf{t|y;k4fkutcwgpkudfDxd0xsuh;d
g4hjfcwgdx ml=wcj-t|cwgm%o
ywt|cwfs+tm%cwx ml=orm--d{x s+hjf@sm@%dk+;s
suh;dh;t{{g{|to
y;tc;x+dps
tcwgk+d0f%x+dxef%Dt|cwgfcort|cwt|orGt ;d
suh;t?xi;kum%y;{do#dzk+d0-wgd}s+h;dzcwo
ydkm%l)orm--d{x8xd{|d0g~
sut|mDcgf%cg4h;mWmDx+d}lnk+mDoy-k4f:vt|cwf]kuf%cw-mDoxufori;{dml
orm--d{xlnk+mDo suh;d{t|ywkuf%k+fcwx+d{dgps+tc;]lnkum%o s+hwfsZxufo~
iw{|dD#lf]ijfk+s+t?gp;{?fkqgm%ozy;t|cjf@s+tm%cm%l orm--d{xqm@%dk+;s4x
suh;d
iwk+mDywfy;t{tsmlsuh;mDx+dz orm--d{xqyjdt|c;9t|cf]k4fcw-mDo
yjfm%l)orm--d{xqtxq{|d0x+xs+hwf%cN
MPO>R Q lnm%S
k Os+h;d
lnkufDgs+tm%c
T UWVXTZYZ[\^]S_G`8abK[cedgf/hIij[ke]R[ml8n2oep qKrZdg`eYc/dgfs[`e][Vt
ce\<[u][\G[vwa_GT `xgo8a_GVX[]yaTz_<`e]kK{[Xabedga(abK[c-[]RaVXTZYZ[\^]
T U4[`e][Vsce\<[]]R[[VvwT VXK\G[wrKcekZaN[d@v&b8[`e][Vsce\<[(_^]ke]Ra
c-[vdgke][L\G[d {`e_<`KfV[abKTZYK]?ab/d)a8-[{UT {V
zReR(WK
yIZK
c-[]RaLVXTZYZ[\^]za{d _<`e[Y
ij[[wZ/[{_GVX[` a
Zyr
| _ab][}@[`K{T ce\<[VX]
$ m /
r $ h W rdg`eY $ h -xU{T@V
abK[ $X [/T ]R_<aT@{D$\^dg@[zz[{@rN@ +r$ r
dK`K[kKVXT `K_^dYKd)a&d][warZW
r YKdgadU{T@VvwT@\<\^dgc-T {&d)t
T Uabe[8VX[wa{_^v]
| [\<\T `2T@`K[
@e
:-Iz/)
]TPab/d)a(abe[uVX_<`K_GVskeV
T@ce]R[{}@[Y N]vwT@{[X_^]oKh oPdg`eY
abK[(VX[d `8T@ce]R[{}@[Y
N]vwT {[_G](@h oeh
NPeSe/R
wr
ij[ke][a[`/[{RUT@{Vd `evw[VX[wa{_^v]d vvwkK{&d v:
!#"%$'&)(+*-,.0/1/32)#/3$'4-$5/32)6,.798:(6/3$;/32)6<=8?>?,@$'4 /3A+BC8(
4D0E.5F$E3BC8:G)70H5F$:EI987J2K4LE3$'ML(?BN8:G)*OB-0/.E3A7HAP5Q"=;RSGL0"AP/9T
U <%?7?8:&),.V$:56/32LA+,?WXB-98:,.& E3?,Y(ARZO[=\^]_/`> 4)A798(+(P>!8E3CG)$/
&),.9*a8(+$:G)'W@M)& /b8cE3O&L,.9*aAGa7?$'Gd&)GL70/3A$'Ga"6AP/32e$:/32)JEbB-98cf
,.&LE3?,;,.&L7J2g8:,;hIi;[j/3$KAGL,.&LE3k/32)8/$'GL(P>lB-$S* ?(,H"6AP/32gm'$$S*
*LA,.70E3AB-AG8/3A$:Gon:pq@m'$$S*Y798:(AMLEr8c/3A+$:Gb8cE3=,.?(?70/39*Ts=2)A,* $?,
G)$:/OB-98G[=\^]tA,K8!4D$$:EoB-J/.E3A+7Ku_AP/OA,O0v070/3AwZx8/K*LA,f
/3A+GLm'&LA+,.2LAG)mK4D$$E3(P>l7?8:(AMLEr8/3?*gB-$S*L?(,H5yE3$'Bz"%?((@798(A+M Er8/39*
B-$S*L?(,?T
m@no1m3prqs%m/otvudwpyxdzgm@o1m8p{wY|Ym3s<o&tvwu{}~U}u1=mxCw:tm3u1tvwu1
pb}nm`p{mxdu1<3o%tSwU~m3u1gx1x1=ng&<o3}u1m3tvgs=s%83wnx1udws=s=m@|M
APR
ROC
ALL
MXE
LFT
BEP
RMS
FSC
ACC
12
l<lW^88^@55/WU:
C
n xdzg=o&om/>xd%wnC#mawp{}:udm{m3no1m3p&qgs%mao1m3s=m@x1=wnCx1wxdzgm
qm@oxao=ngs%m-p2wY|Ym@s=o
##Y=n}:qs%m/ro=ngCm@ud3m3nx
udm@|g>xd%wn%nm@u1udwu/ngm8}|Y~U}:nxd}mbwtm@ud3m3nx&udm@|Y>
xd%wn=nm3udu1wu8wp{}:udm@|x1wngwudpb}:s=%@m@|odwu1m/ob=o8x1z}Ux
nwudp{}s%=3m/|odwu1m/o`*z}:ngmb}o`qmxxdm3u&p2wY|Ym@s=or}:udm&tvwgn|
}n|+o1zg%txxdzgmxdw+wthx1zgm{o1@}:s=mgl`"zgm&m@ud3m3nxudm@|Y>
xd%wn+=n+m3udu1wutvudwpp2wY|Ym@sx1wG|gm3m3n|gowns%wnxdzgm
m3u1tvwudpb}:n3m@o4w:t"p{wY|Ym3s<o}:n|C4Mngwxwnowp{mbw:x1zm3u
p{wY|Ym@sMx1z}:x|Ymngm@o#xdzgm`x1wwt(xdzgm m@utvwu1pb}nm od3}:s=m
w@}:s<gs<}Uxdmhm@ud3m3nx(udm@|Yx1=wn =nm3ududwu/3#mhu*ox3wn^~m@ux
m/}*z+p{m3x1ud=x1w8s=wodozgm3udm&8u1m@gu1m/om@nxdom3u1tvm@x4m3u1tvwu1
pb}nm+}n|udm3u1m/om@nx*o2wudoxbm@utvwu1pb}:nmSm@utvm/>x
u1m/|Y=x1=wn^%m@s=|gob4 r[ y
h6r}n| h6G6`5
}s%sw:tzg<*zz}W~ms%wo1o-gnmg}:p{gs=m+=s%s4zgm3s=l t
=p{gudwU~m@o tvu1wp-x1w^x1zgm-s%wo1o1m@o&}:udmag
}n|g:udm@o1m@>xd%~m3s="}:n|xdzgm+m3u*m@nx-u1m/|Y>xd%wn5%n
m@u1udwu8=o86ngmwx1m3nxd=}s4|Y<od}|Y~U}:nx*}:mwtm@ud3m3nx
udm@|g>xd%wn%ns=wodo<ox1z}Ux%x%~m@o"p{wudmm3p{gz}o1=o"xdwaudm
|g>xd%wnor}:xs%wU6s%wo1o@udm@|Y=ngs%wo1o tvu1wp axdw+g g
<o}audm@|g>xd%wnlgzgm@ud}ou1m/|Y3%ng{s=wodo"tvudwpE &x1w
%/
o1}p{m{}qo1ws=Yx1mb*z}ngmW>M<o wngs=+}udm@|g>xd%wnl
no1wp{m|Ywp{}%no#x1zg<oqg<}o<o}ggudwgud=}:x1m nwx1zgm@udo%x
<o"ngwx@[wu1pb}s%=3m/|-odwu1m/o#|gw2nw:xz}W~mxdzg=oqg=}o3
}:qs%m` ozwUohx1zmm3u*m3nx#u1m/|Y>xd%wnb%n8s%wo1o[tvwum@no1m3p2
qs%mom@s%m/>xd%wnwnx1zgmxdm@ox2u1wqgs%m@pbo2}n|/+p{mxdu1<3o@
3wp{}u1m/|`x1w4x1zmqm@oxhp2wY|Ym@s=oho1m3s=m@x1m/| tvwuSm/}*zru1wqgs%m@p
}n|p{mxdu1<:Co&;xdzngwu1pb}:s==3m@|odwu1m/o3n}sm3u1tvwu1
pb}nm/o}:udm`m@ox1=p{}:x1m/|-wns=}u1mn}:sMxdm@oxom3xdonw:xo1m@|
tvwu(xdud}%n%ng}:n|r=o(x1zgm"}W~m@ud}mSwU~m@ulx#wx1ud<}:s<o(%x1z&m@}*z
u1wqgs%m@p#Swo%x1=~m`m3nxdu1=m@o=nx1zm xd}qgs%mrp2m/}:nm3ududwu"}o
udm@|gm/|MY
mhm@no1m3prqs%mo1m3s=m@x1=wnm@utvwu1p{m@|-qmxxdm3u/
no1m3prqs%mo1m3s=m@x1=wn=nobw:tU2xd%p{m@o2o1=ng%3}nx}Ux
g /5n}W~m3u*}:mhm3nom@prqgs=mo1m3s=m@x1=wnu1m/|Y3m@o
s=wodogU^wU~m@uxdzgm`qm/oxp{wY|Ym@s=o"tvwum@}*z8gudwqgs=m3pE}:n|
p{m3x1ud=r(wpb}:mrxdzg=o 3wn3u1m3x1m%txdzgm{qm/ox p{wY|Ym3shz}o
}33gud} 8Ss%wo1o&=o2g=@S}n|}ngudm@|Y>
xd%wn=nCs%wo1o`wu1udm@o1wn|gox1wudm@|Y=ng-s=wodo4x1wg @8wu
=n3u1m/}o1%ng+}33gud}xdw ^8Y%p{=s=}u1s=S%t#xdzgm8qm@ox
p{wY|Ym@sz}o#a g}n-gudm@|Y>x1=wn8%n-s%wo1owu1
udm@o1wn|go(x1w`udm@|Y=ng xdw4%/:l(m/ox[o1mx*o3wnxd}%n
g @}o1m@o@ n3u1m/}o1%n}33gud} ^p{m/}:noUW
p{wu1m 3}om/o"}:udm4u1m/|Y=x1m/|-3wudu1m/>x1s=aq^bxdzgm m3no1m3p&qgs=m@o@
"zm%p{gudwU~m@p{m3nxh=ongw:x|gud}p{}:x1<:qgYx3wno1=ox1m@nx1s=r=nY
3u1m/}o1%n}@gu*}3 g wuSu1m/|Y=ng`&g gUW3wp2
}:udm@|x1wxdzgmaqm@oxrw:t:p{wY|Ym3s<o <o %p{gudm@odo1%~mbzgm3n
xdzgm qm@oxp2wY|Ym@s=oz}W~m`o*z-wwY|m3u1tvwudp{}nm#Yx#m
10
0.2
0.4
0.6
0.8
/1":^*d`@@/@>:**d//2d
U
;[3 h r3
*U (
^3/ ;
|gwn x"}:nxxdwawU~m@udoxd}:x1m`x1zmr=p2u1wU~m3p{m3nxw:thm@nom@prqgs=m
o1m3s=m@x1=wnl
gwurwp{}:ud=o1wn(lq}:=ngwu qw^wox1=ngxdu1m@m@o
^=m3s<|go}au1m/|Y>xd%wn%ns=wodowp{}:udm@|-x1wbu*}Wx1udm3m@o@
g &xd%p{m/ox1zmqm@ngmxw:thm3nom@prqgs=mom@s%m/>xd%wnl##Yxx1zgm@nl
=p{gudwU~=ng xdzgm4m@utvwu1pb}:nmwtlp2m/|Y=w^3u1mz%za~U}:ud=}nm
p{wY|Ym@s=oo1*z}o"|Ym@3=o1=wnaxdu1m@m@o"<o"m@}o=m3ux1z}n-=p{gudwU~=ng
xdzgm m3u1tvwudp{}nm`wt(x1zm ~m3udbqm@oxp{w^|gm3s<o3
l!(#"%$'&(&
#&C/*),+.-0/21433/5&
=gudm o1zgwUo[x1zgmm@ud3m3nxudm@|Y>x1=wnb=nas=wodo[wtm@no1m3p2
qs%m o1m3s=m@>xd%wn8%x1zq}:=ng&}o ^x1zgm4tvu*}x1=wn8wt(p{wY|Ym3s<o
=nxdzgmq}:o@W~U}:ud%m/oltvudwp|YwUnx1w #tvwu(xdzgm4@p{mx1
ud<3o}W~m3u*}:m/|}udwodoxdzgm-gudwqs%m@p{o@-#x q}o
3wnxd}%n}:s=sMp{w^|gm3s<oz=*z=o"m 6g%~U}s%m@nx#x1w{ngwxq}:=ng
na}W~m3u*}:mUq}m/|2m3no1m3p&qgs%mom@s%m/>x1=wnbudm@|gm/o[s=wodo[}:n
}|g|Y%x1=wn}s^g }:x gY"zgmudm@o1gs%xdo%n2}qgs%m/o[#}:n|
r}:udmtvwu g}r~U}:s=gm#mo1m3s=m@x1m@|aqm3tvwudms=ww^%ng&}Ux
xdzgm@o1mudm@o1gs%xdo@}:n|{zg=*7
z =gudm`o1zgwUoh<oo1gqwYx1=pb}:s
"z=og <o[}:qwgx}4x1zg=ud|2wtxdzgmx1wxd}sgGqm3nmgx#m
o1m3m %x1zm3no1m3p&qgs%mo1m3s=m@x1=wnl8
%gudmrvzg<*zo1m@oxdzgm
n}:sxdm@oxo1mx*od"o1gm@oxdo %nxdzgm`ud}ngm`g=:
9Cg 2wgs=|
^=m3s<|tvuxdzgm3ub=p{gu1wU~m3p{m3nx/<
;Cm+}:udm3gu1udm3nxds%m3^m3u1
=p{m3nx1=ng;xdzo=ng3u1wo1o-~U}:s==|}Ux1=wnx1wg<*5}ngm/}:u
wYx1=pb}:sM~U}:s=gm wt tvwum/}*zgudwqgs=m3p }n|-p2m3x1ud=
=>?A@BDCFE:G'HJIKMLN?AOP@BQBQRSTKURPVXWYEHUZ[]\*G_^a`bRPc
d
@e_G'RPcfOPCgB
m3udm#m`gudm@o1m3nx"}23}om`ox1|Y8zgm3udmwYx1=p{%@%n{}n8m@nY
o1m3p&qgs=m"x1w4x1zgmwududm@>xSp{mxdu1<"z}o[%p{}>x[wn{}:nb}ggs==@}U
h
iQjkmlon8prqQsutTvwTtx]yz:t|{}DwTy~x%~x
gxD
tJDtUtTtTwTy~x74tvyDt5tTy%r{tv:x]tmvxD~xM:mv~yD2
_D
]
4
D
_a
'
QD D
rQ_f
rQ_f Q
a!
_ D
_
_r
4
Q
_4
_r
Q
Qom
o
]'
]
_r
_
'r
'!
mr'
'
_
'
_
'
'
_r
]'
'r
_
'
]'
r_r
_
_r
4
Q
m_om
_]
m_r
Qom
_Q
_]
4'
'
_]
QQ
_r
rr
r
_
Q]
_
_o
_r
_Q
_r
r '
Q_
!Q4o
_
]_r
Qo
'
'
]'4
_r
rr
rr_
r
'Q
]
Q
]_4
m_rr
rr_
m_rr
_r_
ar
]r
Qom'
m_r
m_r]
m_]
Qomr
Qr]
__m
rr4
_o]
_rr
T2ru2'!'rT'!'PT!m'Q_!'T'!m
'NDT_!'r mmm!m]:mD_ 8
TT%mrr2mmrTUm!|Tf 8'oarroPM(
''TmP'!m!! Q TTrXP!'
mmTro!(A!!'Tm'Tr
'r%oro!
]2!m'!<T'Q
Tm!']r!o']T7PmTD!m''2TT]r!o']T
(mT!%DT2o%*T!oTPN'0ro!TTQ
PJ!2'7T'_ XT
T'''Mm_!2mDT
mTouTmmDr_mDm'J'T_o|T
o#'8'T'!Mr2om!
''!!]T_(
mr!_5
or
(mT282To:rP!]
T'PTroT!r]mrMT'2__m
Q
!2 ]r!mD%A4Q
T!'<_5
2']'!
(rTmDor!!T4Q'''MrTrMr_!or
2
FTm<4T'(P'5
'
0,
!!TTm:m
P!_
2'T]%D]m:
mT
r0P'T'T!!'*]r
T
T'7(P%r!']TNPr
(m4
r!']T7Jo
T'A%__!
rT(P_
m7r2P'!o_T2 m%_P'!
m!!D_5
P8!TP
rTmm 8'o
]5'U(P
T! u|TU2'U m
X'TTmD!
4o2TT54o'r!M!_
'%P mr''!o!'!'UT7!!_%r!gr
r!'!'']'_'P'M'!__MmTr2'!
6 a " a $
[H}
U
}
py a
}
U +
5r
0']UJ'2TrTm!DM!mD(r
%mT!a
*r
2__g
8
4mrTT0T'
']U#]
r
2TomT!DNm!'2
TaT'82Pm!D
!A'm%
_!2 'm'rQ'mTr_oP!MT5_
!mmm!m]aT2
m_5
m
T'
_'*]JP
p
r(U27mUm!!
PT!D!'Y_ (D
:r
T'
8'2m!DTM
'mT
2P7m!D
8mT
roP
APrDorM_ umTmmm:m'N2__mQ
mmT
_ (
Tr*!]b:D
TmmP
2Tom
!
r
7m!D2
(DDT
5'_%'P:
oP
X0 2_Q
,]2T'
X7_
T2 m4|f'_M!]:DX
'm_2'*
'!F:D
8'o
Pr
PTr:mP'!Jm!!7oa'X_'!'
TmPgorTamDT!DT4Q'Df!Tr!%
D:T'rTa'orfJ_ (DarT'!'r
"!$#&%(')+*,''%.-/102/,3546*678*698:<;=-98>
?A@CBD@FEHG8IKJCLGNMOJKPRQSGTQULSVWJKICICXICJKY[ZJCQ\^]F_M `
a
^bdcfehgCicfjKgCkfl &m Tn
2o
TT'am_!2 %mP'!UT72To
mm!!*
'
*Pm
r!
mm U!!!m!'!'T'J]o']T'Dar
mm'!UP|!%_,r!:_]rDT8]rm
2__!
8'4o'r!!(m'T
!'Mrr2TmaTT'
rr2TmU
!!5AmN0!'7(PTT'
m!!
,%__
P'!2P|!J'2]r!o']T
'
?A@F@PDYFM GNMOJKPRQG8I"PD`M
'!o_<!'rT!om_P!
r02Q'mo
A
!__7o**T!m! m2Q'mXm]!
f
M_oT!'_TTT'!'UTTf'!' #Tr_
''TXTT'
2__mo5'
XmTmr'Tm
o'rr2mmJ'm'r2'(mT2
]o']T0PmT
Pr!'
2Q'mgoXmom.P7
T2m!!2__m8P|TrNm%
o%'r'TrTTN2'D!Ur!8%__!2r!'
Tr!
TP'!T
r(
2QNTT']o']T'rT
'_!'TT'02__XT
8'!%'T]Q!'%rQ!2
4
tO6}O Oy(F(O r,8
(OT
O (Ff8OTTOHrF8 "!$#%!'&(
8t8 HtrH Dt8N " RU
tWrpt y* )NROT(y + " R,
.-./ 0
8O{8fT T1
!d2 "3
Ot8N8
zzNTfOO 5 4,8t 6 879 t8tO:
O8 f<T8
u8f f; <>=???
tNNO
+ r88 @<BA ???tD
+fT
C<D=$?2?Wf tfDNp^Nu^tON8
0 D
RNNF.=$?tAtNOO{88 rH T" fGTT E +=T?t8 *
HfO
8 z
NNhffffN
NNrhfN
NNffffd
NNO
8 z
NNf8
NNhNfO
NNNO
Nfff
D{fTf (T8O,OfyDfp0
f 1 8r tr$
8 HR(O8RHN,0
t H} $ r N
d8NOf
OT(O8tTTf Dt8R^6(tR 0
UNNO}z8T T 62 O rWNDl 0
{T{ ftrf 8 8y8fT
t O,*
N Ht T 8Rf0ODHtN8trFO ,{f $fN O
qEklkF
"*(s*l;$*lw*le'Ela'8l*,3,
,2P23w3l*i,%e*l*e'pa32l$
lekpp*wapemppkl$eq,l$2e
s23leeblw$mpp*8b*$2,
q"FkFXF33F3kq.k9a
tt (l;'$ek$$e2,e(l
'i` *5%FlFkquap $mpw3$a
*,wp $p3t*ii2,'lzE
la3"pw$*k*il ukp$u,p*,V,2il
b2*$2,eae2*e*$qw:2p*$F*il b3
e'$('q2q,leF,epw3$a
s*t$Vsiq1le",3*
t Vl$%(qE22p*i"e*i2il2
E'$tleuis*(ewepXee,i5pa98ip
b$e$e2*ilebeZepZepZe p2qEp:
a3m',we',2P3*(l%eV,2ipb%2(lE3l2p
,ea2(Plde*$;$+eZ**i
apqll$k32 .ep3e.2'*le
; '$1le;lelle1*l,llele,2pl
l$l,e
8$ew*hblw$hllee
Ea3$2ple
Fpp8,eile
\'$2
b2:
F
5"
5q
Va
E$w3e*'kl$8a
V
Vq,leelp
e! " V2#
$ V2qq(blw$"3$e, b$a%
&&'(Pq,l)
$2w$llee
**
+'i
,
k2,e22$2- .e,p,2$. Va
/ 1 032 ; ua$l*$. a22El*
,m21si3l$1l, q2ep me",e$
2.*82ql(8a$21*a3ela' b2*$2$
el(lqz /546 2 (ai 1l(a2
23leE*$d'*le 5lEV1eda23l
qe
elt$ l*'pZ: eip:$a5 $:$:p e52$
$ipu3l*a.a23leaz,l /547 2 4798;: ul
3la
l,X,*(ewl,2+l< qeZ**i
lelle*l,llwe zsii'pq.ip.
b$p2$p eZ2$e$22ipei"*
46=*8?> V"l"232*e*w,
!@%:i9
A222e
2- u,$ sew$ke** , e2pw$.q,l
qw+be 22pVe eVe 2peepk2pt$
'lule(*2ew$C B 'l1*l
Dlq9*
EGFP H$ epepXe$
@w
ICJKML<NOKQP ,3l $ I L P z8E;1le
@ e
el.*$le,"q,l$e*qppe a39
+$3
*leplP*31
!@ 2plpel2$,,
3lee* lm32R
+$3 2-
*SUT6V&W!X5YZ![\^]_Z VG`
>
a$,l*eVe*mq,l;e
bdc%+Ve +Ve'ei$
2ew$s'l2;$& 1e*q,l|leQ7
'f
g a32l;$a *l
h 6 i:jZ
w$26F
, , tkB2lF 2-m%V.*s2lu;'
ee", $ee;e'$23 *
F3*" a # $2 Xl*euepe,\;'
eePleeaetv{<|5|u}w{|uxq tuGq)"{kks<u
of t xCw*Cq<|t w{Ox<w|Z e
C6e^C7"}"<l.^Ck^"}}.OC
(-- Uk}k^!-)z" k}CQC-zz
k^d$}k-!7z}}zz.k<<<v!nOv}
G^<}}|G G.kx<_"<!vx
z"|}v!"
xkl z
Uf<" UzC^
k
e <
An ensemble is a collection of classifiers whose predictions are combined with the goal of achieving better performance than the constituent classifiers. A large body of research now exists showing that ensemble learning often increases performance (e.g. bagging [3], boosting [21], stacking [25]).
Recently, ensemble selection [7] was proposed as a
technique for building ensembles from large collections of
diverse classifiers. Ensemble selection employs greedy forward selection to select models to add to the ensemble, a
method categorized in the literature as overproduce and
choose [20]. Compared to previous work, ensemble selection uses many more classifiers, allows optimizing to arbitrary performance metrics, and includes refinements to
prevent overfitting to the ensembles training dataa larger
problem when selecting from more classifiers.
In this paper we analyze four previously unexplored aspects of ensemble selection. First, we evaluate ensemble
1 This is different from wrapping cross-validation around ensemble selection, which would not increase the data available for hillclimbing.
We investigate four previously unexplored aspects of ensemble selection, a procedure for building ensembles of
classifiers. First we test whether adjusting model predictions to put them on a canonical scale makes the ensembles
more effective. Second, we explore the performance of ensemble selection when different amounts of data are available for ensemble hillclimbing. Third, we quantify the benefit of ensemble selections ability to optimize to arbitrary
metrics. Fourth, we study the performance impact of pruning the number of models available for ensemble selection.
Based on our results we present improved ensemble selection methods that double the benefit of the original method.
1 Introduction
high variance. Ensemble selection uses bagging over models to reduce this variance. Multiple ensembles are built
from random subsets of the models, and then averaged together. This is analogous to the feature bagging methods
proposed by Bay [1] and Bryll et al. [5] and used in random
forests [4].
Another technique used in the original paper is allowing
models to be added to the ensemble more than once. This
provides two benefits. First, models added multiple times
get more weight in the ensemble average. Second, when
models are added without replacement, ensemble performance deteriorates quickly after the best models have been
exhausted because poorer models must then be added. This
makes deciding when to stop adding models to the ensemble
critical because overshooting the optimal stopping point can
yield much worse performance. Selection with replacement
allows selection to continue adding copies of good models instead of being forced to add inferior models. This, in
turn, makes deciding when to stop adding models far less
critical. All of the experiments in this paper use these three
techniques.
2 Background
In this section we briefly review the ensemble selection
procedure first proposed by Caruana et al. [7]. Ensemble selection is an overproduce and select ensemble method carried to an extreme where thousands of models are trained
using many different learning methods, and overfitting is
moderated by applying several techniques.
In ensemble selection, models are trained using as many
learning methods and control parameters as can be applied
to the problem. Little or no attempt is made to optimize
the performance of the individual models; all models, no
matter what their performance, are added to the model library for the problem. The expectation is that some of the
models will yield good performance on the problem, either
in isolation or in combination with other models, for any
reasonable performance metric.
Once the model library is collected, an ensemble is built
by selecting from the library the subset of models that
yield the best performance on the target optimization metric. Models are selected for inclusion in the ensemble using
greedy forward stepwise model selection. The performance
of adding a potential model to the ensemble is estimated using a hillclimbing set containing data not used to train the
models. At each step ensemble selection adds to the ensemble the model in the library that maximizes the performance
of the ensemble to this held-aside hillclimbing data.
When there are thousands of models to select from, the
chances of overfitting increase dramatically. Caruana et al.
describe two methods to combat overfitting. The first controls how ensembles are initialized. The second performs
model bagginganalogous to feature bagging [1, 5]to reduce the variance of the selection process.
Ensemble Initialization: Instead of starting with an
empty ensemble, Caruana et al. suggest initializing ensembles with the N models that have the best uni-model performance on the hillclimb set and performance metric.
Bagged Ensemble Selection: It is well known that feature subset selection (e.g. forward stepwise feature selection) is unstable when there are many relevant features [2].
Ensemble selection is like feature selection, where models
are features and model subsets are found by forward stepwise selection. Because of this, ensemble selection also has
3 Methodology
We use all of the learning methods and data sets used by
Caruana et al. [7], and all of the performance metrics except
CAL (a probability calibration metric) and SAR (a metric
that combines accuracy, squared error, and ROC area). In
addition, we also train models with logistic regression (LOGREG), nave bayes (NB), and random forests (RF) [4], and
experiment with four additional data sets: MG, CALHOUS,
COD, and BACT. All of the data sets are binary classification problems. The learning methods and data sets are
described in Appendix A and B, respectively. The performance metrics we study are described in the following
subsection.
Table 1. Performance with and without model calibration. The best score in each column is bolded.
ACC FSC
LFT ROC APR BEP RMS MXE MEAN
ES-BOTH
0.920 0.888 0.967 0.982 0.972 0.964 0.932 0.944
0.946
ES-PREV
0.922 0.893 0.967 0.981 0.966 0.965 0.919 0.932
0.943
ES-NOCAL
0.919 0.897 0.967 0.982 0.970 0.965 0.912 0.925
0.942
ES-CAL
0.912 0.847 0.969 0.981 0.969 0.966 0.935 0.940
0.940
BAYESAVG-BOTH
0.893 0.814 0.964 0.978 0.963 0.956 0.918 0.934
0.928
BAYESAVG-CAL
0.889 0.820 0.962 0.977 0.960 0.955 0.912 0.925
0.925
MODSEL-BOTH
0.871 0.861 0.939 0.973 0.948 0.938 0.901 0.916
0.918
BAYESAVG-PREV
0.881 0.789 0.956 0.970 0.956 0.947 0.893 0.911
0.913
MODSEL-PREV
0.872 0.860 0.939 0.973 0.948 0.938 0.879 0.892
0.913
MODSEL-CAL
0.870 0.819 0.943 0.973 0.948 0.940 0.892 0.910
0.912
MODSEL-NOCAL
0.871 0.858 0.939 0.973 0.948 0.938 0.861 0.871
0.907
BAYESAVG-NOCAL 0.875 0.784 0.955 0.968 0.953 0.941 0.874 0.892
0.905
values fall between 0 and 1 or 0.89 and 0.90. These metrics
measure how well the positive cases are ordered before negative cases and can be viewed as a summary of model performance across all possible thresholds. The rank metrics
we use are area under the ROC curve (ROC), average precision (APR), and precision/recall break even point (BEP).
See Provost and Fawcett [19] for a discussion of ROC from
a machine learning perspective.
The probability metrics are minimized (in expectation)
when the predicted value for each case coincides with the
true conditional probability of that case being positive class.
The probability metrics are squared error (RMS) and crossentropy (MXE).
0.14 from a neural net does not necessarily mean the same
thing as a prediction of 0.14 from a boosted tree or SVM.
Predictions from neural nets often are well-calibrated posterior probabilities, but predictions from SVMs are just normalized distances to the decision surface. Averaging predictions from models that are not on commensurate scales
may hurt ensemble performance.
In this section we evaluate the performance of ensemble selection after translating all model predictions to the
common language of well-calibrated posterior probabilities. Learning algorithms such as boosted trees and stumps,
SVMs, or nave bayes have poorly calibrated predictions
[15]. A number of methods have been proposed for mapping predictions to posterior probabilities. In this paper we
adopt the method Platt developed for SVMs [18], but which
also works well for other learning algorithms [15]. Platts
method transforms predictions by passing them through a
sigmoid whose parameters are learned on an independent
calibration set. In this paper, the ensemble selection hillclimb set is used for calibration as well.
Table 1 shows the performance of ensemble selection
(ES), model selection (MODSEL),2 and Bayesian model
averaging (BAYESAVG) [8], with and without calibrated
models. Results are shown for four different model libraries: 1) only uncalibrated models (NOCAL), 2) only calibrated models (CAL), 3) both calibrated and uncalibrated
models (BOTH), and 4) only SVMs are calibrated, to mimic
prior experiments [7] (PREV). Each entry is the average of
five folds on each of the eleven problems. The last column
shows the mean performance over all eight metrics. Rows
are sorted by mean performance.
Comparing results for ensemble selection with and without calibration (ES-CAL and ES-NOCAL), we see that calibrating models improves RMS and MXE (significant at .05)
but hurts FSC. There is little difference for LFT, ROC, APR
2 Model
selection chooses the best single model using the hillclimb set.
and BEP. For model selection we see the same trends: calibrated models yield better RMS and MXE and worse FSC.
The magnitudes of the differences suggest that most if not
all of the improvement in RMS and MXE for ensemble selection with calibrated models is due to having better models in the library rather than from ensemble selection taking
advantage of the common scale of the calibrated models.
We are not sure why calibration makes FSC performance
worse for both MODSEL and ES, but again suspect that the
differences between ES-CAL and ES-NOCAL are due to
differences in the performance of the base-level models.
Having both calibrated and uncalibrated models in the
library (ES-BOTH and MODSEL-BOTH) gives the best of
both worlds: it alleviates the problem with FSC while retaining the RMS and MXE improvements. For the rest of
the experiments in this paper we use libraries containing
both calibrated and uncalibrated models.
Unlike with ensemble selection, using calibrated models
for Bayesian model averaging improves performance on all
metrics, not just RMS and MXE (significant at .05). With
calibrated models, Bayesian averaging outperforms model
selection but is still not as good as ensemble selection.
bagging
no bagging
modsel
10000
bagging
no bagging
modsel
1000
bagging
no bagging
modsel
1000
normalized performance
normalized performance
10000
0.9
0.8
0.7
bagging
no bagging
modsel
0.5
0.4
100
1000
hillclimb data size
bagging
no bagging
modsel
1000
10000
MEAN
0.6
10000
MXE
bagging
no bagging
modsel
1000
10000
0.98
0.96
0.94
0.92
0.9
0.88
0.86
0.84
0.82
100
RMS
1
0.95
0.9
0.85
0.8
0.75
0.7
0.65
0.6
0.55
100
1000
BEP
normalized performance
normalized performance
normalized performance
10000
1
0.98
0.96
0.94
0.92
0.9
0.88
0.86
0.84
0.82
100
bagging
no bagging
modsel
hillclimb data size
APR
bagging
no bagging
modsel
1000
10000
1
0.98
0.96
0.94
0.92
0.9
0.88
0.86
0.84
100
ROC
1
0.98
0.96
0.94
0.92
0.9
0.88
0.86
0.84
100
LFT
normalized performance
1000
1
0.95
0.9
0.85
0.8
0.75
0.7
0.65
0.6
0.55
100
normalized performance
FSC
normalized performance
normalized performance
ACC
1
0.95
0.9
0.85
0.8
0.75
0.7
0.65
0.6
0.55
0.5
100
10000
1
0.95
0.9
0.85
0.8
0.75
0.7
100
bagging
no bagging
modsel
1000
10000
Figure 1. Learning curves for ensemble selection with and without bagging, and for picking the best
single model (modsel).
Table 2. Performance with and without cross-validation for ensemble selection and model selection.
ACC FSC
LFT ROC APR BEP RMS MXE MEAN
ES-BOTH-CV
0.935 0.926 0.982 0.996 0.992 0.977 0.984 0.989
0.973
MODSEL-BOTH-CV 0.907 0.923 0.971 0.985 0.968 0.963 0.945 0.961
0.953
ES-BOTH
0.920 0.888 0.967 0.982 0.972 0.964 0.932 0.944
0.946
MODSEL-BOTH
0.871 0.861 0.939 0.973 0.948 0.938 0.901 0.916
0.918
this is analogous to normal ensemble selection with a 5000
point hillclimb set. Table 2 shows the results averaged over
all the problems. Not only does cross-validation greatly improve ensemble selection performance, it also provides the
same benefit to model selection. Five-fold cross-validated
model selection actually outperforms non-cross-validated
ensemble selection by a small but noticeable amount. However, ensemble selection with embedded cross-validation
continues to outperform model selection.
selection achieves a raw accuracy score of 90%, and crossvalidated ensemble selection achieves 95% accuracy, then
the percent reduction in loss is 50%the loss has been reduced by half. The MEAN row is the average improvement
for each metric, across datasets. For comparison, the PREV
row is the performance of the original non-cross-validated
ensemble selection method (i.e. no cross-validation and
only SVMs are calibrated).
Embedding cross-validation within ensemble selection
doubles its benefit over simple model selection (from 6.90%
to 12.77%). This is somewhat of an unfair comparison; if a
cross-validated model library is available, it is just as easy
to do cross-validated model selection as it is to do crossvalidated ensemble selection. The last row in Table 3 shows
ACC
2.77
2.08
7.95
5.73
6.68
13.66
15.21
21.55
2.77
4.45
2.49
7.76
4.96
2.89
FSC
5.89
3.83
9.49
7.46
7.26
16.36
14.50
25.66
-0.05
1.98
3.27
8.70
4.56
3.07
LFT
8.72
16.42
48.00
14.33
12.35
12.32
100.00
0.29
2.08
4.25
13.65
21.13
16.22
10.82
ROC
7.45
4.13
8.69
9.14
11.34
37.53
32.84
69.10
6.33
11.84
6.92
18.67
8.43
9.97
the percent loss reduction of cross-validated ensemble selection compared to cross-validated model selection. Comparing PREV and MEANcv , we see that after embedding
cross-validation, ensemble selection provides slightly less
benefit over model selection than un-cross-validated ensemble selection did over un-cross validated model selection.
While training five times as many models is computationally expensive, it may be useful for domains where the
best possible performance is needed. Potentially more interesting, in domains where labeled data is scarce, crossvalidated ensemble selection is attractive because a) it does
not require sacrificing part of the training data for hillclimbing, b) it maximizes the size of the hillclimbing set (which
Figure 1 shows is critical when hillclimb data is small), and
c) training the cross-validated models is much more feasible
with smaller training data.
APR
6.70
5.49
8.81
10.52
14.99
37.78
33.05
45.29
7.28
12.65
9.62
17.47
6.24
9.37
BEP
7.58
1.76
6.15
7.11
7.64
16.77
15.85
19.25
4.62
6.04
2.73
8.68
5.15
2.84
RMS
2.26
1.42
7.17
2.39
7.80
12.65
17.13
19.59
1.40
2.57
1.66
6.91
3.27
2.54
MXE
4.08
4.15
12.74
3.79
12.92
27.43
29.47
34.58
2.70
6.10
3.33
12.84
6.39
4.22
MEAN
5.68
4.91
13.63
7.56
10.12
21.81
32.26
29.41
3.39
6.23
5.46
12.77
6.90
5.71
RMS
0.969
0.935
MXE
0.968
0.936
OPTMETRIC
0.973
0.946
significantly smaller.
The scatter plots in Figure 2 plot the performance of optimizing to RMS against the performance of optimizing to
OPTMETRIC, with one graph per target metric. Again, we
can see that ensemble selection performs somewhat better
with OPTMETRIC. Always optimizing RMS is frequently
very competitive, especially when performance gets close
to a normalized score of 1. This is why the benefit of direct
metric optimization is so small for cross-validated ensemble selection. These results suggest that optimizing RMS
(or MXE) may be a good alternative if the target metric is
too expensive to use for hillclimbing.
ACC
FSC
LFT
ROC
MEAN
1
0.57
0.24
APR
0.89
BEP
0.92
RMS
1
MXE
1
0.75
0.86
0.89
0.82
0.84
Figure 2. Scatter plots of ensemble selection performance when RMS is optimized (x-axis) vs when
the target metric is optimized (y-axis). Points above the line indicate better performance by optimizing
to the target metric (e.g. accuracy) than when optimizing RMS. Each point represents a different data
set; circles are averages for a problem over 5 folds, and Xs are performances using cross-validation.
Each metric (and the mean across metrics) is plotted separately for clarity.
The graphs in Figure 3 show the average behavior across
our 11 data sets. Ensemble selections behavior under pruning may in fact vary when each data set is considered individually. Averaging across problems could hide different
peak points. Figure 4 shows RMS performance for each of
the problems.
formance when employing varying levels of library pruning. The pruning works as follows: the models are sorted
by their performance on the target metric (with respect to
the hillclimb set), and only the top X% of the models are
used for ensemble selection. Note that this pruning is different from work on ensemble pruning [12, 22, 23, 26, 13].
This is a pre-processing method, while ensemble pruning
post-processes an existing ensemble.
Figure 3 shows the effect of pruning for each performance metric, averaged across the 11 data sets and 5
folds using non-cross-validated ensemble selection with
and without bagging. For comparison, flat lines illustrate
the performance achieved by model selection (modsel) and
non-pruned ensemble selection (es-both). The legend is
shown in the ACC graph.
The figure clearly shows that pruning usually does not
hurt ensemble selection performance, and often improves
it. For ACC, LFT, and BEP pruned ensemble selection
(the line with boxes) seems to yield the same performance
as non-pruned ensemble selection . For the other metrics,
pruning yields superior performance. Indeed, when using
more than 50% of the models performance decreases. Interestingly, library pruning reduces the need for bagging,
presumably by reducing the potential for overfitting.3
Although performance starts to decline at different pruning levels for the different problems, it is clear that larger
model libraries increase the risk of overfitting the hillclimb
set. Using 100% of the models is never worthwhile. At
best, using the full library can match the performance of using only a small subset. In the worst case, ensemble selection overfits. This is particularly evident for the COD data
set where model selection outperforms ensemble selection
unless pruning is employed.
While further work is needed to develop good heuristics
for automatically choosing an appropriate pruning level for
a data set, simply using the top 1020% models seems to be
a good rule of thumb. An open problem is finding a better
pruning method. For example, taking into account model
diversity (see for example [11, 17]) might find better pruned
sets.
3 The
bagging line at 100% does not always match the es-both line,
even though these should be equivalent configurations. This is particularly
evident for FSC, the highest variance metric. The sorting performed before
pruning alters ensemble selections model sampling, resulting in additional
variance.
bagging
no bagging
es-both
modsel-both
0
20
40
60
80
100
0.905
0.9
0.895
0.89
0.885
0.88
0.875
0.87
0.865
0.86
20
% models
40
20
40
60
80
100
20
0.97
0.965
0.96
0.955
0.95
0.945
20
40
60
80
100
0.93
0.92
0.91
60
80
100
20
% models
40
60
80
100
80
100
80
100
0.97
0.965
0.96
0.955
0.95
0.945
0.94
0.935
20
40
60
% models
MEAN
normalized performance
normalized performance
0.94
0.965
0.96
0.955
0.95
0.945
0.94
0.935
0.93
0.925
0.92
0.915
40
BEP
normalized performance
normalized performance
normalized performance
0.94
0.935
MXE
0.95
40
0.95
0.945
% models
0.975
RMS
20
0.96
0.955
% models
0.96
100
0.98
% models
0.9
80
0.965
APR
normalized performance
60
0.97
% models
ROC
0.99
0.988
0.986
0.984
0.982
0.98
0.978
0.976
0.974
0.972
LFT
normalized performance
FSC
normalized performance
normalized performance
ACC
0.925
0.92
0.915
0.91
0.905
0.9
0.895
0.89
0.885
0.88
0.875
0.87
60
80
100
0.955
0.95
0.945
0.94
0.935
0.93
0.925
0.92
0.915
% models
20
40
60
% models
9 Discussion
In this section we further analyze the benefit of embedding cross validation within ensemble selection and also
briefly describe other work we are doing to make ensemble selection models smaller and faster.
20
40
60
80
100
0.97
0.96
0.95
0.94
0.93
0.92
0.91
0.9
0.89
0.88
0.87
20
40
% models
0.92
0.91
0.9
0.89
0.88
40
60
80
100
0.945
0.94
0.935
0.93
0.925
0.92
0.915
0.91
0.905
20
% models
20
40
60
40
60
80
100
80
100
0.96
0.95
20
40
60
80
100
20
40
normalized performance
0.91
0.9
0.89
0.88
0.87
40
60
80
100
0.94
0.92
0.9
0.88
0.86
0.84
0.82
0.8
0.78
0.76
20
40
60
80
100
SLAC: RMS
0.92
20
60
% models
MG: RMS
normalized performance
0.98
0.97
0.96
0.95
0.94
0.93
0.92
0.91
0.9
0.89
0.88
% models
100
MEDIS: RMS
0.955
80
% models
0.965
0.945
60
HS: RMS
normalized performance
normalized performance
40
% models
0.97
% models
0.86
20
LETTER.p2: RMS
normalized performance
% models
LETTER.p1: RMS
0.96
0.95
0.94
0.93
0.92
0.91
0.9
0.89
0.88
100
normalized performance
0.93
20
80
bagging
no bagging
es-both
modsel-both
COVTYPE: RMS
normalized performance
normalized performance
COD: RMS
60
0.985
0.98
0.975
0.97
0.965
0.96
0.955
0.95
0.945
0.94
0.935
0.93
% models
0.94
0.87
CALHOUS: RMS
normalized performance
BACT: RMS
normalized performance
normalized performance
ADULT: RMS
0.98
0.975
0.97
0.965
0.96
0.955
0.95
0.945
0.94
80
100
% models
0.97
0.96
0.95
0.94
0.93
0.92
0.91
0.9
20
40
60
80
100
% models
ing the size of the hillclimb set from 1k to 5k would explain almost all of the benefit of cross-validation. These
results, however, show that on average across the eight metrics the benefit from ES-HILL and ES-AVG are roughly
equal. About half of the benefit from embedding crossvalidation within ensemble selection appears to result from
the increase in the size of the hillclimb set, and the other
half appears to result from averaging the sibling models.
Increasing the size of the hillclimb set via cross-validation
(as opposed to having more data available for hillclimbing)
provides less benefit in practice because there is a mismatch
between the base-level models used to make predictions on
the hillclimbing set and the sibling-averaged models that
9
ACC
32.9%
80.5%
113.4%
FSC
37.2%
13.6%
50.8%
LFT
48.0%
54.0%
102.0%
ROC
38.8%
59.0%
97.8%
will be used in the ensemble. In other words ensemble selection is hillclimbing using slightly different models than
the ones it actually adds to the ensemble.
APR
40.8%
55.7%
96.5%
BEP
19.4%
77.4%
96.8%
RMS
55.1%
46.8%
101.9%
MXE
56.7%
51.8%
108.5%
MEAN
41.1%
54.9%
96.0%
In comparison to model selection, however, ensemble selection seems much more resistant to overfitting when data is
scarce. Further experiments varying the amount of training
data provided to the base-level models are needed to see if
ensemble selection is truly able to outperform model selection by such a significant amount on small data sets.
Counter to our and others intuition [9], calibrating models to put all predictions on the same scale before averaging
them did not improve ensemble selections effectiveness.
Most of calibrations improvement comes from the superior
base-level models.
Our experiments show that directly optimizing to a target metric is better than always optimizing to some predetermined metric. That said, always optimizing to RMS or
MXE was surprisingly competitive. These metrics may be
good optimization proxies if the target metric is too expensive to compute repeatedly during hillclimbing.
Finally, pruning the number of available models reduces
the risk of overfitting during hillclimbing while also yielding faster ensemble building. In our experiments pruning
rarely hurt performance and frequently improved it.
Acknowledgments
We thank Lars Backstrom for help with exploring alternative model calibration methods and the anonymous reviewers for helpful comments on paper drafts. This work
was supported by NSF Award 0412930.
References
[1] S. D. Bay. Combining nearest neighbor classifiers through
multiple feature subsets. In ICML, pages 3745, 1998.
[2] L. Breiman. Heuristics of instability in model selection.
Technical report, Statistics Department, University of California at Berkeley, 1994.
[3] L. Breiman. Bagging predictors. Machine Learning,
24(2):123140, 1996.
[4] L. Breiman. Random forests. Machine Learning, 45(1):5
32, 2001.
[5] R. K. Bryll, R. Gutierrez-Osuna, and F. K. H. Quek. Attribute bagging: Improving accuracy of classifier ensembles by using random feature subsets. Pattern Recognition,
36(6):12911302, 2003.
10 Conclusions
Embedding cross-validation inside ensemble selection
greatly increases its performance. Half of this benefit is due
to having more data for hillclimbing; the other half is due to
a bagging effect that results from the way cross-validation
is embedded within ensemble selection. Unsurprisingly, reducing the amount of hillclimbing data hurts performance
because ensemble selection can overfit this data more easily.
10
A Learning Methods
[6] C. Bucila, R. Caruana, and A. Niculescu-Mizil. Model compression: Making big, slow models practical. In Proc. of the
12th International Conf. on Knowledge Discovery and Data
Mining (KDD06), 2006.
[7] R. Caruana, A. Niculescu-Mizil, G. Crew, and A. Ksikes.
Ensemble selection from libraries of models. In ICML,
2004.
[8] P. Domingos. Bayesian averaging of classifiers and the overfitting problem. In ICML, pages 223230. Morgan Kaufmann, San Francisco, CA, 2000.
[9] R. P. W. Duin. The combining classifier: To train or not to
train? In ICPR (2), pages 765770, 2002.
[10] P. Giudici. Applied Data Mining. John Wiley and Sons, New
York, 2003.
[11] L. I. Kuncheva and C. J. Whitaker. Measures of diversity in
classifier ensembles and their relationship with the ensemble
accuracy. Machine Learning, 51(2):181207, 2003.
[12] D. D. Margineantu and T. G. Dietterich. Pruning adaptive
boosting. In ICML, pages 211218. Morgan Kaufmann,
1997.
[13] G. Martnez-Munoz and A. Suarez. Pruning in ordered bagging ensembles. In ICML, pages 609616, New York, NY,
USA, 2006. ACM Press.
[14] A. Munson, C. Cardie, and R. Caruana. Optimizing to
arbitrary NLP metrics using ensemble selection. In HLTEMNLP, pages 539546, 2005.
[15] A. Niculescu-Mizil and R. Caruana. Predicting good probabilities with supervised learning. In ICML05, 2005.
[16] C. Perlich, F. Provost, and J. S. Simonoff. Tree induction
vs. logistic regression: A learning-curve analysis. J. Mach.
Learn. Res., 4:211255, 2003.
[17] A. H. Peterson and T. R. Martinez. Estimating the potential for combining learning models. In Proc. of the ICML
Workshop on Meta-Learning, pages 6875, 2005.
[18] J. Platt. Probabilistic outputs for support vector machines
and comparison to regularized likelihood methods. In Adv.
in Large Margin Classifiers, 1999.
[19] F. J. Provost and T. Fawcett. Analysis and visualization of
classifier performance: Comparison under imprecise class
and cost distributions. In Knowledge Discovery and Data
Mining, pages 4348, 1997.
[20] F. Roli, G. Giacinto, and G. Vernazza. Methods for designing multiple classifier systems. In Multiple Classifier Systems, pages 7887, 2001.
[21] R. Schapire. The boosting approach to machine learning: An
overview. In In MSRI Workshop on Nonlinear Estimation
and Classification, 2001.
[22] W. N. Street and Y.-H. Kim. A streaming ensemble algorithm (SEA) for large-scale classification. In KDD, pages
377382, 2001.
[23] G. Tsoumakas, L. Angelis, and I. Vlahavas. Selective fusion of heterogeneous classifiers. Intelligent Data Analysis,
9(6):511525, 2005.
[24] I. H. Witten and E. Frank. Data Mining: Practical Machine
Learning Tools and Techniques. Morgan Kaufmann, San
Francisco, second edition, 2005.
[25] D. H. Wolpert. Stacked generalization. Neural Networks,
5:241259, 1992.
[26] Y. Zhang, S. Burer, and W. N. Street. Ensemble pruning via
semi-definite programming. Journal of Machine Learning
Research, 7:13151338, 2006.
Data Sets
We experiment with 11 binary classification problems. ADULT, COV TYPE, HS, LETTER.P1, LETTER.P2, MEDIS, and SLAC were used by Caruana et
al. [7]. The four new data sets we use are BACT, COD,
CALHOUS, and MG. COD, BACT, and CALHOUS are
three of the datasets used in Perlich et al. [16]. MG is a
medical data set. See Table 6 for characteristics of the 11
problems.
Table 6. Description of problems
PROBLEM
ADULT
BACT
COD
CALHOUS
COV TYPE
HS
LETTER . P 1
LETTER . P 2
MEDIS
MG
SLAC
11
# ATTR
14/104
11/170
15/60
9
54
200
16
16
63
124
59
TRAIN
TEST
4000
4000
4000
4000
4000
4000
4000
4000
4000
4000
4000
35222
34262
14000
14640
25000
4366
14000
14000
8199
12807
25000
% POZ
25%
69%
50%
52%
36%
24%
3%
53%
11%
17%
50%
Table 7. Scales used to compute normalized scores. Each entry shows bottom / top for the scale.
ADULT
BACT
CALHOUS
COD
COVTYPE
HS
LETTER.p1
LETTER.p2
MEDIS
MG
SLAC
ACC
0.752 / 0.859
0.692 / 0.780
0.517 / 0.889
0.501 / 0.784
0.639 / 0.859
0.759 / 0.949
0.965 / 0.994
0.533 / 0.968
0.893 / 0.905
0.831 / 0.900
0.501 / 0.726
FSC
0.398 / 0.705
0.818 / 0.855
0.681 / 0.893
0.666 / 0.796
0.531 / 0.804
0.389 / 0.894
0.067 / 0.917
0.696 / 0.970
0.193 / 0.447
0.290 / 0.663
0.667 / 0.751
LFT
1.000 / 2.842
1.000 / 1.345
1.000 / 1.941
1.000 / 1.808
1.000 / 2.487
1.000 / 3.656
1.000 / 4.001
1.000 / 1.887
1.000 / 2.917
1.000 / 3.210
1.000 / 1.727
ROC
0.500 / 0.915
0.500 / 0.794
0.500 / 0.959
0.500 / 0.866
0.500 / 0.926
0.500 / 0.985
0.500 / 0.999
0.500 / 0.996
0.500 / 0.853
0.500 / 0.911
0.500 / 0.813
C Performance Scales
Table 7 lists the performance numbers that determine the
normalized scores. Each entry contains the baseline performance (bottom of the scale) and the best performance
achieved by any model or ensemble (top of the scale).
12
APR
0.248 / 0.808
0.692 / 0.891
0.517 / 0.964
0.499 / 0.864
0.362 / 0.879
0.243 / 0.962
0.036 / 0.975
0.534 / 0.997
0.108 / 0.462
0.170 / 0.740
0.501 / 0.816
BEP
0.248 / 0.708
0.692 / 0.824
0.517 / 0.895
0.499 / 0.782
0.361 / 0.805
0.241 / 0.898
0.035 / 0.917
0.533 / 0.970
0.107 / 0.469
0.169 / 0.686
0.501 / 0.727
RMS
0.432 / 0.312
0.462 / 0.398
0.500 / 0.283
0.500 / 0.387
0.480 / 0.320
0.428 / 0.198
0.184 / 0.067
0.499 / 0.157
0.309 / 0.272
0.375 / 0.278
0.500 / 0.420
MXE
0.808 / 0.442
0.891 / 0.697
0.999 / 0.380
1.000 / 0.663
0.944 / 0.478
0.797 / 0.195
0.219 / 0.025
0.997 / 0.125
0.491 / 0.365
0.656 / 0.373
1.000 / 0.755