One-vs-the-rest is a multiclass classification method that uses binary classifiers independently for each class. http://en.wikipedia.org/wiki/Multiclass_classification#one_vs_all

Dataset preparation for one-vs-the-rest classifiers

select collect_set(label) from news20mc_train;

[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,17,16,19,18,20]

SET hivevar:possible_labels="1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,17,16,19,18,20";

one-vs-rest.awk

create or replace view news20_onevsrest_train
as
select transform(${possible_labels}, rowid, label, add_bias(features))
  ROW FORMAT DELIMITED
    FIELDS TERMINATED BY "\t"
    COLLECTION ITEMS TERMINATED BY ","
    LINES TERMINATED BY "\n"
using 'gawk -f one-vs-rest.awk'
  as (rowid BIGINT, label INT, target INT, features ARRAY<STRING>)
  ROW FORMAT DELIMITED
    FIELDS TERMINATED BY "\t"
    COLLECTION ITEMS TERMINATED BY ","
    LINES TERMINATED BY "\n"
from news20mc_train;

create or replace view news20_onevsrest_train_x3
as
select
 *
from (
  select
    amplify(3, *) as (rowid, label, target, features)
  from
    news20_onevsrest_train
) t
CLUSTER BY rand();

results matching ""

    No results matching ""