-- \set FETCH_COUNT 100
\timing
Timing is on.
drop table if exists test;
DROP TABLE
Time: 1.163 ms
create table test (a int, b svec) DISTRIBUTED BY (a);
CREATE TABLE
Time: 67.692 ms
insert into test (select 1,gp_extract_feature_histogram('{"one","two","three","four","five","six"}','{"twe","four","five","six","one","three","two","one"}'));
INSERT 0 1
Time: 72.360 ms
insert into test (select 2,gp_extract_feature_histogram('{"one","two","three","four","five","six"}','{"the","brown","cat","ran","across","three","dogs"}'));
INSERT 0 1
Time: 4.543 ms
insert into test (select 3,gp_extract_feature_histogram('{"one","two","three","four","five","six"}','{"two","four","five","six","one","three","two","one"}'));
INSERT 0 1
Time: 3.701 ms
-- Test the equals operator (should be only 3 rows)
select a,b::float8[] cross_product_equals from (select a,b from test) foo where b = foo.b order by a;
 a | cross_product_equals 
---+----------------------
 1 | {2,1,1,1,1,1}
 2 | {0,0,1,0,0,0}
 3 | {2,2,1,1,1,1}
(3 rows)

Time: 4.044 ms
drop table if exists test2;
DROP TABLE
Time: 0.217 ms
create table test2 as select * from test DISTRIBUTED BY (a);
SELECT 3
Time: 64.780 ms
-- Test the plus operator (should be 9 rows)
select (t1.b+t2.b)::float8[] cross_product_sum from test t1, test2 t2 order by t1.a;
 cross_product_sum 
-------------------
 {4,3,2,2,2,2}
 {4,2,2,2,2,2}
 {2,1,2,1,1,1}
 {0,0,2,0,0,0}
 {2,1,2,1,1,1}
 {2,2,2,1,1,1}
 {4,4,2,2,2,2}
 {4,3,2,2,2,2}
 {2,2,2,1,1,1}
(9 rows)

Time: 7.121 ms
-- Test ORDER BY
select (t1.b+t2.b)::float8[] cross_product_sum, l2norm(t1.b+t2.b) l2norm, (t1.b+t2.b) sparse_vector from test t1, test2 t2 order by 3;
 cross_product_sum |      l2norm      |    sparse_vector    
-------------------+------------------+---------------------
 {0,0,2,0,0,0}     |                2 | {2,1,3}:{0,2,0}
 {2,1,2,1,1,1}     | 3.46410161513775 | {1,1,1,3}:{2,1,2,1}
 {2,1,2,1,1,1}     | 3.46410161513775 | {1,1,1,3}:{2,1,2,1}
 {2,2,2,1,1,1}     | 3.87298334620742 | {3,3}:{2,1}
 {2,2,2,1,1,1}     | 3.87298334620742 | {3,3}:{2,1}
 {4,2,2,2,2,2}     |                6 | {1,5}:{4,2}
 {4,3,2,2,2,2}     | 6.40312423743285 | {1,1,4}:{4,3,2}
 {4,3,2,2,2,2}     | 6.40312423743285 | {1,1,4}:{4,3,2}
 {4,4,2,2,2,2}     | 6.92820323027551 | {2,4}:{4,2}
(9 rows)

Time: 5.359 ms
 select (sum(t1.b))::float8[] as features_sum from test t1;
 features_sum  
---------------
 {4,3,3,2,2,2}
(1 row)

Time: 2.844 ms
-- Test the div operator
 select (t1.b/(select sum(b) from test))::float8[] as weights from test t1 order by a;
                        weights                        
-------------------------------------------------------
 {0.5,0.333333333333333,0.333333333333333,0.5,0.5,0.5}
 {0,0,0.333333333333333,0,0,0}
 {0.5,0.666666666666667,0.333333333333333,0.5,0.5,0.5}
(3 rows)

Time: 4.229 ms
-- Test the * operator
 select t1.b %*% (t1.b/(select sum(b) from test)) as raw_score from test t1 order by a;
     raw_score     
-------------------
  3.16666666666667
 0.333333333333333
  4.16666666666667
(3 rows)

Time: 4.098 ms
-- Test the * and l2norm operators
 select (t1.b %*% (t1.b/(select sum(b) from test))) / (l2norm(t1.b) * l2norm((select sum(b) from test))) as norm_score from test t1 order by a;
    norm_score     
-------------------
  0.15563317594128
 0.049147318718299
 0.177345110574739
(3 rows)

Time: 6.295 ms
-- Test the ^ and l1norm operators
select ('{1,2}:{20.,10.}'::svec)^('{1}:{3.}'::svec);
     ?column?      
-------------------
 {1,2}:{8000,1000}
(1 row)

Time: 1.060 ms
 select (t1.b %*% (t1.b/(select sum(b) from test))) / (l1norm(t1.b) * l1norm((select sum(b) from test))) as norm_score from test t1 order by a;
     norm_score     
--------------------
 0.0282738095238095
 0.0208333333333333
 0.0325520833333333
(3 rows)

Time: 6.715 ms
-- Test the multi-concatenation and show sizes compared with a normal array
drop table if exists corpus_proj;
DROP TABLE
Time: 0.258 ms
drop table if exists corpus_proj_array;
DROP TABLE
Time: 0.225 ms
create table corpus_proj as (select 10000 *|| ('{45,2,35,4,15,1}:{0,1,0,1,0,2}'::svec) result ) distributed randomly;
SELECT 1
Time: 91.366 ms
create table corpus_proj_array as (select result::float8[] from corpus_proj) distributed randomly;
SELECT 1
Time: 318.754 ms
-- Calculate on-disk size of sparse vector
select pg_size_pretty(pg_total_relation_size('corpus_proj'));
 pg_size_pretty 
----------------
 192 kB
(1 row)

Time: 5.393 ms
-- Calculate on-disk size of normal array
select pg_size_pretty(pg_total_relation_size('corpus_proj_array'));
 pg_size_pretty 
----------------
 288 kB
(1 row)

Time: 3.488 ms
\timing
Timing is off.
-- Calculate L1 norm from sparse vector
select l1norm(result) from corpus_proj;
 l1norm 
--------
  80000
(1 row)

-- Calculate L1 norm from float8[]
select l1norm(result) from corpus_proj_array;
 l1norm 
--------
  80000
(1 row)

-- Calculate L2 norm from sparse vector
select l2norm(result) from corpus_proj;
      l2norm      
------------------
 316.227766016838
(1 row)

-- Calculate L2 norm from float8[]
select l2norm(result) from corpus_proj_array;
      l2norm      
------------------
 316.227766016838
(1 row)

drop table corpus_proj;
DROP TABLE
drop table corpus_proj_array;
DROP TABLE
drop table test;
DROP TABLE
drop table test2;
DROP TABLE
-- Test operators between svec and float8[]
select ('{1,2,3,4}:{3,4,5,6}'::svec)           %*% ('{1,2,3,4}:{3,4,5,6}'::svec)::float8[];
 ?column? 
----------
      260
(1 row)

select ('{1,2,3,4}:{3,4,5,6}'::svec)::float8[] %*% ('{1,2,3,4}:{3,4,5,6}'::svec);
 ?column? 
----------
      260
(1 row)

select ('{1,2,3,4}:{3,4,5,6}'::svec)            /  ('{1,2,3,4}:{3,4,5,6}'::svec)::float8[];
 ?column? 
----------
 {10}:{1}
(1 row)

select ('{1,2,3,4}:{3,4,5,6}'::svec)::float8[]  /  ('{1,2,3,4}:{3,4,5,6}'::svec);
 ?column? 
----------
 {10}:{1}
(1 row)

select ('{1,2,3,4}:{3,4,5,6}'::svec)            *  ('{1,2,3,4}:{3,4,5,6}'::svec)::float8[];
        ?column?        
------------------------
 {1,2,3,4}:{9,16,25,36}
(1 row)

select ('{1,2,3,4}:{3,4,5,6}'::svec)::float8[]  *  ('{1,2,3,4}:{3,4,5,6}'::svec);
        ?column?        
------------------------
 {1,2,3,4}:{9,16,25,36}
(1 row)

select ('{1,2,3,4}:{3,4,5,6}'::svec)            +  ('{1,2,3,4}:{3,4,5,6}'::svec)::float8[];
       ?column?        
-----------------------
 {1,2,3,4}:{6,8,10,12}
(1 row)

select ('{1,2,3,4}:{3,4,5,6}'::svec)::float8[]  +  ('{1,2,3,4}:{3,4,5,6}'::svec);
       ?column?        
-----------------------
 {1,2,3,4}:{6,8,10,12}
(1 row)

select ('{1,2,3,4}:{3,4,5,6}'::svec)            -  ('{1,2,3,4}:{3,4,5,6}'::svec)::float8[];
 ?column? 
----------
 {10}:{0}
(1 row)

select ('{1,2,3,4}:{3,4,5,6}'::svec)::float8[]  -  ('{1,2,3,4}:{3,4,5,6}'::svec);
 ?column? 
----------
 {10}:{0}
(1 row)

-- Test the pivot operator in the presence of NULL values
drop table if exists pivot_test;
DROP TABLE
create table pivot_test(a float8) distributed randomly;
CREATE TABLE
insert into pivot_test values (0),(1),(NULL),(2),(3);
INSERT 0 5
select array_agg(a) from pivot_test;
      array_agg      
---------------------
 {1,1,2,1}:{1,2,0,3}
(1 row)

select l1norm(array_agg(a)) from pivot_test;
 l1norm 
--------
      6
(1 row)

drop table if exists pivot_test;
DROP TABLE
-- Answer should be 5
select vec_median(array_agg(a)) from (select generate_series(1,9) a) foo;
 vec_median 
------------
          5
(1 row)

-- Answer should be a 10-wide vector
select array_agg(a) from (select trunc(random()*10) a,generate_series(1,100000) order by a) foo;
                                   array_agg                                   
-------------------------------------------------------------------------------
 {9946,10172,10063,9850,9874,10139,9964,9923,10131,9938}:{0,1,2,3,4,5,6,7,8,9}
(1 row)

-- Average is 4.50034, median is 5
select vec_median('{9960,9926,10053,9993,10080,10050,9938,9941,10030,10029}:{1,9,8,7,6,5,4,3,2,0}'::svec);
 vec_median 
------------
          5
(1 row)

select vec_median('{9960,9926,10053,9993,10080,10050,9938,9941,10030,10029}:{1,9,8,7,6,5,4,3,2,0}'::svec::float8[]);
 vec_median 
------------
          5
(1 row)

