In our regression tutorials, you can tackle realistic prediction problems by using several Hivemall's regression features such as:
Our train_regressor
function enables you to solve the regression problems with flexible configurable options. Let us try the function below.
It should be noted that the sample queries require you to prepare E2006-tfidf data. See our E2006-tfidf tutorial page for further instructions.
Note
This feature is supported from Hivemall v0.5-rc.1 or later.
Training
create table e2006tfidf_regression_model as
select
feature,
avg(weight) as weight
from (
select
train_regressor(features,target,'-loss squaredloss -opt AdaGrad -reg no') as (feature,weight)
from
e2006tfidf_train_x3
) t
group by feature;
Prediction & evaluation
WITH predict as (
select
t.rowid,
sum(m.weight * t.value) as predicted
from
e2006tfidf_test_exploded t LEFT OUTER JOIN
e2006tfidf_regression_model m ON (t.feature = m.feature)
group by
t.rowid
),
submit as (
select
t.target as actual,
p.predicted as predicted
from
e2006tfidf_test t JOIN predict p
on (t.rowid = p.rowid)
)
select rmse(predicted, actual) from submit;