Caution
train_adagrad()
became deprecated since v0.5.0 release. Use smarter general classifier instead.
AdaGradRDA
Note
The current AdaGradRDA implmenetation can only be applied to classification, not to regression, because it uses hinge loss for the loss function.
model building
use news20;
drop table news20b_adagrad_rda_model1;
create table news20b_adagrad_rda_model1 as
select
feature,
voted_avg(weight) as weight
from
(select
train_adagrad_rda(add_bias(features),label) as (feature,weight)
from
news20b_train_x3
) t
group by feature;
prediction
create or replace view news20b_adagrad_rda_predict1
as
select
t.rowid,
sum(m.weight * t.value) as total_weight,
case when sum(m.weight * t.value) > 0.0 then 1 else -1 end as label
from
news20b_test_exploded t LEFT OUTER JOIN
news20b_adagrad_rda_model1 m ON (t.feature = m.feature)
group by
t.rowid;
evaluation
create or replace view news20b_adagrad_rda_submit1 as
select
t.label as actual,
pd.label as predicted
from
news20b_test t JOIN news20b_adagrad_rda_predict1 pd
on (t.rowid = pd.rowid);
select count(1)/4996 from news20b_adagrad_rda_submit1
where actual == predicted;
SCW1 0.9661729383506805
ADAGRAD+RDA 0.9677742193755005
AdaGrad
Note
AdaGrad is better suited for a binary classification problem because the current implementation only support logistic loss.
model building
drop table news20b_adagrad_model1;
create table news20b_adagrad_model1 as
select
feature,
voted_avg(weight) as weight
from
(select
train_adagrad_regr(add_bias(features),convert_label(label)) as (feature,weight)
from
news20b_train_x3
) t
group by feature;
Caution
adagrad
takes 0/1 for a label value and convert_label(label)
converts a label value from -1/+1 to 0/1.
prediction
create or replace view news20b_adagrad_predict1
as
select
t.rowid,
case when sigmoid(sum(m.weight * t.value)) >= 0.5 then 1 else -1 end as label
from
news20b_test_exploded t LEFT OUTER JOIN
news20b_adagrad_model1 m ON (t.feature = m.feature)
group by
t.rowid;
evaluation
create or replace view news20b_adagrad_submit1 as
select
t.label as actual,
p.label as predicted
from
news20b_test t JOIN news20b_adagrad_predict1 p
on (t.rowid = p.rowid);
select count(1)/4996 from news20b_adagrad_submit1
where actual == predicted;
0.9549639711769415 (adagrad)
AdaDelta
Caution
AdaDelta can only be applied for regression problem because the current implementation only support logistic loss.
model building
drop table news20b_adadelta_model1;
create table news20b_adadelta_model1 as
select
feature,
voted_avg(weight) as weight
from
(select
adadelta(add_bias(features),convert_label(label)) as (feature,weight)
from
news20b_train_x3
) t
group by feature;
prediction
create or replace view news20b_adadelta_predict1
as
select
t.rowid,
case when sigmoid(sum(m.weight * t.value)) >= 0.5 then 1 else -1 end as label
from
news20b_test_exploded t LEFT OUTER JOIN
news20b_adadelta_model1 m ON (t.feature = m.feature)
group by
t.rowid;
evaluation
create or replace view news20b_adadelta_submit1 as
select
t.label as actual,
p.label as predicted
from
news20b_test t JOIN news20b_adadelta_predict1 p
on (t.rowid = p.rowid);
select count(1)/4996 from news20b_adadelta_submit1
where actual == predicted;
AdaDelta often performs better than AdaGrad.
0.9549639711769415 (adagrad)
0.9545636509207366 (adadelta)