PA1a
Training
set mapred.reduce.tasks=64;
drop table e2006tfidf_pa1a_model ;
create table e2006tfidf_pa1a_model as
select
feature,
avg(weight) as weight
from
(select
train_pa1a_regr(add_bias(features),target) as (feature,weight)
from
e2006tfidf_train_x3
) t
group by feature;
set mapred.reduce.tasks=-1;
Caution
Do not use voted_avg()
for regression. voted_avg()
is for classification.
prediction
create or replace view e2006tfidf_pa1a_predict
as
select
t.rowid,
sum(m.weight * t.value) as predicted
from
e2006tfidf_test_exploded t LEFT OUTER JOIN
e2006tfidf_pa1a_model m ON (t.feature = m.feature)
group by
t.rowid;
evaluation
WITH submit as (
select
t.target as actual,
p.predicted as predicted
from
e2006tfidf_test t
JOIN e2006tfidf_pa1a_predict p
on (t.rowid = p.rowid)
)
select
rmse(predicted, actual) as RMSE,
mse(predicted, actual) as MSE,
mae(predicted, actual) as MAE,
r2(predicted, actual) as R2
from
submit;
rmse |
mse |
mae |
r2 |
0.3797959864675519 |
0.14424499133686086 |
0.23846059576113587 |
0.5010367946980386 |
PA2a
Training
set mapred.reduce.tasks=64;
drop table e2006tfidf_pa2a_model;
create table e2006tfidf_pa2a_model as
select
feature,
avg(weight) as weight
from
(select
train_pa2a_regr(add_bias(features),target) as (feature,weight)
from
e2006tfidf_train_x3
) t
group by feature;
set mapred.reduce.tasks=-1;
prediction
create or replace view e2006tfidf_pa2a_predict
as
select
t.rowid,
sum(m.weight * t.value) as predicted
from
e2006tfidf_test_exploded t LEFT OUTER JOIN
e2006tfidf_pa2a_model m ON (t.feature = m.feature)
group by
t.rowid;
evaluation
WITH submit as (
select
t.target as actual,
p.predicted as predicted
from
e2006tfidf_test t
JOIN e2006tfidf_pa2a_predict p
on (t.rowid = p.rowid)
)
select
rmse(predicted, actual) as RMSE,
mse(predicted, actual) as MSE,
mae(predicted, actual) as MAE,
r2(predicted, actual) as R2
from
submit;
rmse |
mse |
mae |
r2 |
0.38538660838804495 |
0.14852283792484033 |
0.2466732002711477 |
0.48623913673053565 |
AROW
Training
set mapred.reduce.tasks=64;
drop table e2006tfidf_arow_model ;
create table e2006tfidf_arow_model as
select
feature,
argmin_kld(weight, covar) as weight
from
(select
train_arow_regr(add_bias(features),target) as (feature,weight,covar)
from
e2006tfidf_train_x3
) t
group by feature;
set mapred.reduce.tasks=-1;
prediction
create or replace view e2006tfidf_arow_predict
as
select
t.rowid,
sum(m.weight * t.value) as predicted
from
e2006tfidf_test_exploded t LEFT OUTER JOIN
e2006tfidf_arow_model m ON (t.feature = m.feature)
group by
t.rowid;
evaluation
WITH submit as (
select
t.target as actual,
p.predicted as predicted
from
e2006tfidf_test t
JOIN e2006tfidf_arow_predict p
on (t.rowid = p.rowid)
)
select
rmse(predicted, actual) as RMSE,
mse(predicted, actual) as MSE,
mae(predicted, actual) as MAE,
r2(predicted, actual) as R2
from
submit;
rmse |
mse |
mae |
r2 |
0.37862513029019407 |
0.14335698928726642 |
0.2368787001269389 |
0.5041085155590119 |
AROWe
AROWe is a modified version of AROW that uses Hinge loss (epsilion = 0.1)
Training
drop table e2006tfidf_arowe_model ;
create table e2006tfidf_arowe_model as
select
feature,
argmin_kld(weight, covar) as weight
from
(select
train_arowe_regr(add_bias(features),target) as (feature,weight,covar)
from
e2006tfidf_train_x3
) t
group by feature;
set mapred.reduce.tasks=-1;
prediction
create or replace view e2006tfidf_arowe_predict
as
select
t.rowid,
sum(m.weight * t.value) as predicted
from
e2006tfidf_test_exploded t LEFT OUTER JOIN
e2006tfidf_arowe_model m ON (t.feature = m.feature)
group by
t.rowid;
evaluation
WITH submit as (
select
t.target as actual,
p.predicted as predicted
from
e2006tfidf_test t
JOIN e2006tfidf_arowe_predict p
on (t.rowid = p.rowid)
)
select
rmse(predicted, actual) as RMSE,
mse(predicted, actual) as MSE,
mae(predicted, actual) as MAE,
r2(predicted, actual) as R2
from
submit;
rmse |
mse |
mae |
r2 |
0.37789148212861856 |
0.14280197226536404 |
0.2357339155291536 |
0.5060283955470721 |