Confidece Weighted (CW)

training

drop table news20b_cw_model1;
create table news20b_cw_model1 as
select 
 feature,
 -- voted_avg(weight) as weight -- [hivemall v0.1]
 argmin_kld(weight, covar) as weight -- [hivemall v0.2 or later]
from 
 (select 
     -- train_cw(add_bias(features), label) as (feature, weight) -- [hivemall v0.1]
     train_cw(add_bias(features), label) as (feature, weight, covar) -- [hivemall v0.2 or later]
  from 
     news20b_train_x3
 ) t 
group by feature;

prediction

create or replace view news20b_cw_predict1 
as
select
  t.rowid, 
  sum(m.weight * t.value) as total_weight,
  case when sum(m.weight * t.value) > 0.0 then 1 else -1 end as label
from 
  news20b_test_exploded t LEFT OUTER JOIN
  news20b_cw_model1 m ON (t.feature = m.feature)
group by
  t.rowid;

evaluation

WITH submit as (
select 
  t.rowid,
  t.label as actual, 
  p.label as predicted
from 
  news20b_test t 
  JOIN news20b_cw_predict1 p
    on (t.rowid = p.rowid)
)
select sum(if(actual = predicted, 1, 0)) / count(1) as accuracy
from submit;

0.9655724579663731

Adaptive Regularization of Weight Vectors (AROW)

training

drop table news20b_arow_model1;
create table news20b_arow_model1 as
select 
 feature,
 -- voted_avg(weight) as weight -- [hivemall v0.1]
 argmin_kld(weight, covar) as weight -- [hivemall v0.2 or later]
from 
 (select 
     -- train_arow(add_bias(features),label) as (feature,weight) -- [hivemall v0.1]
     train_arow(add_bias(features),label) as (feature,weight,covar) -- [hivemall v0.2 or later]
  from 
     news20b_train_x3
 ) t 
group by feature;

prediction

create or replace view news20b_arow_predict1 
as
select
  t.rowid, 
  sum(m.weight * t.value) as total_weight,
  case when sum(m.weight * t.value) > 0.0 then 1 else -1 end as label
from 
  news20b_test_exploded t LEFT OUTER JOIN
  news20b_arow_model1 m ON (t.feature = m.feature)
group by
  t.rowid;

evaluation

WITH submit as (
select
  t.rowid, 
  t.label as actual, 
  p.label as predicted
from 
  news20b_test t
  JOIN news20b_arow_predict1 p
    on (t.rowid = p.rowid)
)
select sum(if(actual = predicted, 1, 0)) / count(1) as accuracy
from submit;

0.9659727782225781

Soft Confidence-Weighted (SCW1)

training

drop table news20b_scw_model1;
create table news20b_scw_model1 as
select 
 feature,
 -- voted_avg(weight) as weight -- [hivemall v0.1]
 argmin_kld(weight, covar) as weight -- [hivemall v0.2 or later]
from 
 (select 
     -- train_scw(add_bias(features),label) as (feature,weight) -- [hivemall v0.1]
     train_scw(add_bias(features),label) as (feature,weight,covar) -- [hivemall v0.2 or later]
  from 
     news20b_train_x3
 ) t 
group by feature;

prediction

create or replace view news20b_scw_predict1 
as
select
  t.rowid, 
  sum(m.weight * t.value) as total_weight,
  case when sum(m.weight * t.value) > 0.0 then 1 else -1 end as label
from 
  news20b_test_exploded t LEFT OUTER JOIN
  news20b_scw_model1 m ON (t.feature = m.feature)
group by
  t.rowid;

evaluation

WITH submit as (
  select 
    t.rowid, 
    t.label as actual, 
    p.label as predicted
  from 
    news20b_test t JOIN news20b_scw_predict1 p
      on (t.rowid = p.rowid)
)
select sum(if(actual = predicted, 1, 0)) / count(1) as accuracy
from submit

0.9661729383506805

Soft Confidence-Weighted (SCW2)

training

drop table news20b_scw2_model1;
create table news20b_scw2_model1 as
select 
 feature,
 -- voted_avg(weight) as weight -- [hivemall v0.1]
 argmin_kld(weight, covar) as weight -- [hivemall v0.2 or later]
from 
 (select 
     -- train_scw2(add_bias(features),label) as (feature,weight)    -- [hivemall v0.1]
     train_scw2(add_bias(features),label) as (feature,weight,covar) -- [hivemall v0.2 or later]
  from 
     news20b_train_x3
 ) t 
group by feature;

prediction

create or replace view news20b_scw2_predict1 
as
select
  t.rowid, 
  sum(m.weight * t.value) as total_weight,
  case when sum(m.weight * t.value) > 0.0 then 1 else -1 end as label
from 
  news20b_test_exploded t LEFT OUTER JOIN
  news20b_scw2_model1 m ON (t.feature = m.feature)
group by
  t.rowid;

evaluation

WITH submit as (
select 
  t.rowid, 
  t.label as actual, 
  pd.label as predicted
from 
  news20b_test t
  JOIN news20b_scw2_predict1 pd 
    on (t.rowid = pd.rowid)
)
select sum(if(actual = predicted, 1, 0)) / count(1) as accuracy
from submit;

0.9579663730984788

--

Algorithm Accuracy
Perceptron 0.9459567654123299
SCW2 0.9579663730984788
PA2 0.9597678142514011
PA1 0.9601681345076061
PA 0.9603682946357086
CW 0.9655724579663731
AROW 0.9659727782225781
SCW1 0.9661729383506805

My recommendation is AROW for classification.

results matching ""

    No results matching ""