This is just a personal memo about Aster SQL.
-- drop table
drop table if exists schema.table;
-- create fact table
create table schema.table (
id integer,
) distribute by hash(id) compress low;
-- create dimension table
create table schema.table (
id character varying,
) distribute by replication compress low;
-- collect statistics
analyze schema.table;
-- * multiple columns to where-in isn't allowed
select * from schema.t1 where (c1, c2) in (select d1, d2 from schema.t2);
-- > error
-- cast
select cast(c1 as integer) from schema.table;
select c1::integer from schema.table;
-- collaborative filtering
select * from cfilter (
on (select 3)
partition by 3
inputtable ('public.input_table')
outputtable ('public.output_table')
inputcolumns ('c1')
joincolumns ('j1')
droptable ('true')
);
-- canopy
select * from canopy(
on (select 1) partition by 1
inputtable('public.input_table')
loosedistance('130.0')
tightdistance('65.0'))
order by canopyid
;
-- kmeans modeling
select * from kmeans
(on (select 1) partition by 1
inputtable('public.input_table')
outputtable('public.output_table')
numberK('3'))
;
-- kmeans scoring
select * from kmeansplot (
on public.input_table partition by any
on public.kmeans_modeling_output dimension
centroidstable ('public.kmeans_modeling_output')
) order by 2,1