About Me

My photo
Software Engineer at Starburst. Maintainer at Trino. Previously at LINE, Teradata, HPE.

2017-07-17

Aster SQL Memo

This is just a personal memo about Aster SQL.

-- drop table
drop table if exists schema.table;

-- create fact table
create table schema.table (
    id integer,
) distribute by hash(id) compress low;

-- create dimension table
create table schema.table (
    id character varying,
) distribute by replication compress low;

-- collect statistics
analyze schema.table;

-- * multiple columns to where-in isn't allowed
select * from schema.t1 where (c1, c2) in (select d1, d2 from schema.t2);
-- > error

-- cast
select cast(c1 as integer) from schema.table;
select c1::integer from schema.table;

-- collaborative filtering
select * from cfilter (
on (select 3)
partition by 3
inputtable ('public.input_table')
outputtable ('public.output_table')
inputcolumns ('c1')
joincolumns ('j1')
droptable ('true')
);

-- canopy
select * from canopy(
on (select 1) partition by 1
inputtable('public.input_table')
loosedistance('130.0')
tightdistance('65.0'))
order by canopyid
;

-- kmeans modeling
select * from kmeans
(on (select 1) partition by 1
inputtable('public.input_table')
outputtable('public.output_table')
numberK('3'))
;

-- kmeans scoring
select * from kmeansplot (
on public.input_table partition by any
on public.kmeans_modeling_output dimension
centroidstable ('public.kmeans_modeling_output')
) order by 2,1