PA1
Train
-- SET mapred.reduce.tasks=32;
drop table kdd10a_pa1_model1;
create table kdd10a_pa1_model1 as
select
feature,
voted_avg(weight) as weight
from
(select
train_pa1(add_bias(features),label) as (feature,weight)
from
kdd10a_train_x3
) t
group by feature;
Predict
create or replace view kdd10a_pa1_predict1
as
select
t.rowid,
sum(m.weight * t.value) as total_weight,
case when sum(m.weight * t.value) > 0.0 then 1 else -1 end as label
from
kdd10a_test_exploded t LEFT OUTER JOIN
kdd10a_pa1_model1 m ON (t.feature = m.feature)
group by
t.rowid;
Evaluate
create or replace view kdd10a_pa1_submit1 as
select
t.rowid,
t.label as actual,
pd.label as predicted
from
kdd10a_test t JOIN kdd10a_pa1_predict1 pd
on (t.rowid = pd.rowid);
select count(1)/510302 from kdd10a_pa1_submit1
where actual = predicted;
0.8677782959894337
CW
-- SET mapred.reduce.tasks=32;
drop table kdd10a_cw_model1;
create table kdd10a_cw_model1 as
select
feature,
argmin_kld(weight, covar) as weight
from
(select
train_cw(add_bias(features),label) as (feature,weight,covar)
from
kdd10a_train_x3
) t
group by feature;
create or replace view kdd10a_cw_predict1
as
select
t.rowid,
sum(m.weight * t.value) as total_weight,
case when sum(m.weight * t.value) > 0.0 then 1 else -1 end as label
from
kdd10a_test_exploded t LEFT OUTER JOIN
kdd10a_cw_model1 m ON (t.feature = m.feature)
group by
t.rowid;
create or replace view kdd10a_cw_submit1 as
select
t.rowid,
t.label as actual,
pd.label as predicted
from
kdd10a_test t JOIN kdd10a_cw_predict1 pd
on (t.rowid = pd.rowid);
select count(1)/510302 from kdd10a_cw_submit1
where actual = predicted;
0.8678037711002504
AROW
-- SET mapred.reduce.tasks=32;
drop table kdd10a_arow_model1;
create table kdd10a_arow_model1 as
select
feature,
-- voted_avg(weight) as weight
argmin_kld(weight, covar) as weight -- [hivemall v0.2alpha3 or later]
from
(select
-- train_arow(add_bias(features),label) as (feature,weight) -- [hivemall v0.1]
train_arow(add_bias(features),label) as (feature,weight,covar) -- [hivemall v0.2 or later]
from
kdd10a_train_x3
) t
group by feature;
create or replace view kdd10a_arow_predict1
as
select
t.rowid,
sum(m.weight * t.value) as total_weight,
case when sum(m.weight * t.value) > 0.0 then 1 else -1 end as label
from
kdd10a_test_exploded t LEFT OUTER JOIN
kdd10a_arow_model1 m ON (t.feature = m.feature)
group by
t.rowid;
create or replace view kdd10a_arow_submit1 as
select
t.rowid,
t.label as actual,
pd.label as predicted
from
kdd10a_test t JOIN kdd10a_arow_predict1 pd
on (t.rowid = pd.rowid);
select count(1)/510302 from kdd10a_arow_submit1
where actual = predicted;
0.8676038894615345
SCW
-- SET mapred.reduce.tasks=32;
drop table kdd10a_scw_model1;
create table kdd10a_scw_model1 as
select
feature,
argmin_kld(weight, covar) as weight
from
(select
train_scw(add_bias(features),label) as (feature,weight,covar)
from
kdd10a_train_x3
) t
group by feature;
create or replace view kdd10a_scw_predict1
as
select
t.rowid,
sum(m.weight * t.value) as total_weight,
case when sum(m.weight * t.value) > 0.0 then 1 else -1 end as label
from
kdd10a_test_exploded t LEFT OUTER JOIN
kdd10a_scw_model1 m ON (t.feature = m.feature)
group by
t.rowid;
create or replace view kdd10a_scw_submit1 as
select
t.rowid,
t.label as actual,
pd.label as predicted
from
kdd10a_test t JOIN kdd10a_scw_predict1 pd
on (t.rowid = pd.rowid);
select count(1)/510302 from kdd10a_scw_submit1
where actual = predicted;
0.8678096499719774
Algorithm | Accuracy |
---|---|
AROW | 0.8676038894615345 |
PA1 | 0.8677782959894337 |
CW | 0.8678037711002504 |
SCW1 | 0.8678096499719774 |