Function Signature of bbit_minhash

Text bbit_minhash(array<int|string> features)
Text bbit_minhash(array<int|string> features, int numHashes=128)
Text bbit_minhash(array<int|string> features, boolean discardWeight=false)
Text bbit_minhash(array<int|string> features, int numHashes=128, boolean discardWeight=false)

Create a signature for each article

create table new20mc_with_signature
as
select
  rowid, 
  bbit_minhash(features, false) as signature
from
  news20mc_train;

kNN brute-force search using b-Bit minhash

set hivevar:topn=10;

select
  t1.rowid, 
  jaccard_similarity(t1.signature, q1.signature,128) as similarity
--  , popcnt(t1.signature, q1.signature) as popcnt
from
  new20mc_with_signature t1 
  CROSS JOIN 
  (select bbit_minhash(features,128,false) as signature from news20mc_test where rowid = 1) q1
order by
  similarity DESC
limit ${topn};
rowid similarity popcnt
11952 0.390625 41
10748 0.359375 41
12902 0.34375 45
3087 0.328125 48
3 0.328125 37
11493 0.328125 38
3839 0.328125 41
12669 0.328125 37
13604 0.3125 41
6333 0.3125 39

results matching ""

    No results matching ""