About Me

My photo
Software Engineer at Starburst. Maintainer at Trino. Previously at LINE, Teradata, HPE.

2017-10-16

(WIP)Teradata 14 SQL

Started preparing Teradata 14 SQL exam.

Teradata SQL - V14.10.6
-- Prepare
drop table t1;
create table t1(c1 int, c2 int);
insert into t1 values (1, 1);
insert into t1 values (2, 2);
-- SET
-- invalid
select * from t1
union
select * from t1
order by c1,c2
;

-- valid
select * from t1
union
select * from t1
order by 1,2
;
/* Macro */
create macro select1 as
(select 1;);

replace macro select1 as
(select 1;);

show macro select1;
help macro select1;

exec select1;
drop macro select1;

replace macro multiselect as
(select 1;
 select 2;);
exec multiselect;
-- returns 2

replace macro selectorder as
(
select * from t1 order by 1 desc
;);
exec selectorder;

replace macro plus1(num integer) as
(select :num+1;);
exec plus1(99); -- 100
exec plus1(); -- null

replace macro plus1_optional(num# integer) as
(select :num#+1;);
exec plus1_optional(1); -- 2
exec plus1_optional(); -- null

replace macro createt1 as
(drop table t1; create table t1(id int););
-- Data definition not valid unless solitary

replace macro createt1 as
(create table t1(id int););
exec createt1;

-- Parametizing an In-List
replace macro param_list(list varchar(100)) as
(
select * from (select 'teradata' as c1) t
where c1 in (:list)
;);
exec param_list('''hello'', ''teradata'''); -- No rows
exec param_list('teradata'); -- teradata

replace macro param_list2(list varchar(100)) as
(
select * from (select 'teradata' as c1) t
where position(c1 in :list) > 0
;);
exec param_list2('hello teradata'); -- teradata
exec param_list2('teradata'); -- teradata

Teradata Advanced SQL
-- CTAS
create table t2 as t1 with data;
create table t2 as t1 with no data;
create table t2 as (select * from t1) with data;
create table t2 as (select * from t1) with no data;
-- References(Foreign Key) constraints and Triggers (which reference source table) are not copied

-- index
create table t1 (c1 int, c2 int) unique primary index(c1);
create table t2 as t1 with no data unique index(c2); -- primary index(c1) and unique index(c2)
create table t2 as t1 with no data unique primary index(c1) unique index(c2); -- unique primary index(c1) and unique index(c2)
show table t2;


-- CTAS with subqueries
-- * Table attributes (FALLBACK) are not copied.  
-- * Secondary index are not copied.  
-- * First column listed becomes a NUPI unless otherwise specified.  
create table t1 (c1 int, c2 int) unique primary index(c1) unique index (c2);
create table t2 as (select * from t1) with no data; -- primary index(c1)
create table t2 as (select c2, c1 from t1) with no data; -- primary index(c2)

-- change column attribues
drop table t1;
drop table t2;
create table t1 (c1 int, c2 int) unique primary index(c1) unique index (c2);
create table t2 (c1 default 0, c2 check (c2 > 0) ) as (select * from t1) with no data;
create table t2 (c1 not null default 0) as (select c1 from t1) with no data;
insert into t2 values ();  -- (0) are inserted
Deribed Table

with d1(c1, c2) as
(select * from t1)
select * from d1;
Volatile Table

  • LOG is indicates that a transaction journal is maintained. (default)
  • NO LOG allows for better performance
  • ON COMMIT DELETE ROWS delete all rows after a commit
  • ON COMMIT PRESERVE ROWS keep table rows at transaction end
  • No secondary index arrowed
  • Statistics can not be collected
  • Up to 1000 volatile tables are allowed for a single session
  • Some Options(Permanent Journaling, Referential Integrity, CHECK constraints, Column compression, Column default values, Column titles, Named indexes )
  • CREATE/DROP INDEX, ALTER TABLE, GRANT, REVOKE are not applicable


-- Implicit transactions
-- Rows are deleted immediately after the inert for implicit transactions
-- Same for ANSI mode
create volatile table v1(id int) on commit delete rows;

bt;
insert into v1 values (1);
select * from v1; -- (1)
et;

select * from v1; -- No rows

-- Explicit transactions
create volatile table v1(id int) on commit preserve rows;

bt;
insert into v1 values (1);
select * from v1; -- (1)
et;

select * from v1; -- (1)
Recursive

with recursive all_trips(origin, destination, cost, depth) as
(
select origin, destination, cost, 0 from flights where origin='LAX'
union all
select all_trips.origin, flights.destination, all_trips.cost+flights.cost, all_trips.depth+1 from all_trips
join flights on all_trips.destination=flights.origin
and all_trips.depth<3
)
select * from all_trips order by 4
;

4. Global Temporary Table

The syntax is CREATE GLOBAL TEMPORARY
Space is charged against an allocation of temporary space
Users can materialize up to 2000 tables per session
They can survive system restart
They are tuneable CREATE INDEX and COLLECT STATS
The default mode of ON COMMIT is ON COMMIT DELETE ROWS, run this query to change it. ALTER TABLE t1 ON COMMIT PRESERVE ROWS

5. Windows Aggregate Functions

  • COUNT, SUM, MIN, MAX and AVG are standard aggregate syntax
  • RANK, PERCENT_RANK and ROW_NUMBER are extension
  • Nulls are ignored during aggregation, but do aggregate as groups
  • When more than one window aggregates is projected, the final sort is determined by the last one projected

Step Order

  1. where
  2. aggregation
  3. having
  4. olap (partiton by, order by rows)
  5. qualify [order by]
  6. RANDOM
  7. sample, top n
  8. order by
  9. format

6. Windows Aggregate Functions 2

Non-ANSI, ANSI
csum, sum over
msvg, avg over
msum, sum over
mdiff, N/A
rank(column), rank over
quantile, rank over

Cumulative SUM
sum(c1) over(partition by p1 order by o1 rows unbounded preceding)

Moving SUM
sum(c1) over(order by o1 rows 2 preceding)

Moving AVG
sum(c1) over(order by o1 rows between 2 preceding and 1 preceding)

Moving DIFF
c1 - min(c1) over(order by o1 rows between 7 preceding and 7 preceding) or mdiff(c1, 7, o1)

Remaining Window
sum(c1) over(order by o1 rows between current row and unbounded following)

Moving Window
sum(c1) over(order by o1 rows between current row and 2 following)

Reset When
sum(c1) over(order by o1 reset when c1 is null rows unbounded following)

Preceding includes the current row, Following excludes the current row 0 Preceding and 0 Following include the current row

7. RANK
rank() over(partition by p1 order by o1)
row_number function is an ANSI standard function, disregards any tied value
row_number() over(order by o1 reset when r1 is null)
dense_rank() function is an ANSI standard function
rank() over(order by o1 with ties low)
rank() over(order by o1 with ties high)
rank() over(order by o1 with ties dense)
rank() over(order by o1 with ties avg)

percent_rank() is an ANSI standard function and is expressed as an approximate numeric ratio between 0.0 and 1.0

pecentile_cont(?)
percent_cont(0.3) within group (order by c2 (dec(8,2)))pecentile_disk
percent_disc(0.3) within group (order by c2 (dec(8,2)))
cume_dist = RankHigh / Num Rows in Group
first_value(c1 ignore nulls) over(partition by p1 order by o1 rows between 3 preceding and 1 following)
last_value(c1 respect nulls) over(partition by p1 order by o1 rows between 3 preceding and 1 following)

8. Extended Grouping Functions ROLLUP, CUBE, GROUPING SETS

-- rollup
select
 c1
,c2
,sum(c3)
from t1
group by rollup(1, 2)
order by 1 desc, 2 desc
;

-- grouping
select
 case grouping(c1)
  when 1 then 'total'
  else coalesce(c1, 'value is null') /* else or when 1*/
 end
,sum(c3)
from t1
group by rollup(c1)
;

-- two colulmns into one column
select
 c1
,c2
,sum(c3)
from t1
group by rollup((1, 2))
order by 1 desc, 2 desc
;

-- rollup summarizes from right-to-left
-- cube summarizes from right-to-left and left-to-right

-- cube
select
 c1
,c2
,sum(c3)
from t1
group by cube(1, 2)
order by 1 desc, 2 desc
;

-- cube and grouping
select
 case grouping(c1)
  when 1 then 'c1 total'
  else coalesce(c1, 'value is null')
 end
,case grouping(c2)
  when 1 then 'c2 total'
  else coalesce(c2, 'value is null')
 end
,sum(c3)
from t1
group by cube(c1, c2)
;

-- combine grouping in a cube
select
 c1
,c2
,c3
,sum(c4)
from t1
group by cube((1, 2), 3)
;

-- grouping sets
select
 c1
,c2
,sum(c3)
from t1
group by grouping sets(c1, c2)
;

-- adding grand totals
select
 c1
,c2
,sum(c3)
from t1
group by grouping sets(c1, c2, ())
;

group by cube(c1, c2)
group by grouping sets((c1, c2), c1, c2, ())
group by grouping sets(c1, ()) grouping sets(c2, ())
9. QUANTILE and WIDTH_BUCKET

-- quantile
select
 c1
from t1
qualify quantile(100, c2) = 20
;

-- aggregates
select
sum(c1)
from
( select c1 from t1 qualify quantile(100, c2) = 20 ) as t
;

-- qualify and group by
select
 c1
,quantile(100, c2)
from t1
group by 1
;

-- ordergin qualilfy
select
 c2
,quantile(100, c2 asc)
from t1
;

-- percent_rank
select
 c2
,percent_rank() over(order by c2)
from t1
;

-- percent_rank derives a value between 0.0 and 1.0
-- percentile is a number between 0 and 99

-- width_bucket(column, lower, upper, partition_count)
-- number of buckets created is always partition_count+3
-- 1. 0 bucket for all values less than the lower
-- 2. n+1 for all values greater than the upper
-- 3. null backet for null values
select
 c1
,width_bucket(c1, 1, 3, 10)
from t1
;

10. Correlated Subqueries NOT IN can have issues when nulls are involved
NOT EXISTS does not share those issues due to row-at-a-time logic

11. Scalar Subqueries NCSSQ: Inner query does not reference the outer table
CSSQ: Inner query does reference the outer table

select
 c1
,(select max(c2) from t1)
from t1
;

12. Date-Time Data Types The date format for the system is defined in the “DATEFORM” setting of DBS Control

You can change or override the DATE format settings in any of the following ways:

Set/Change the system default date format in the SDF file (integerdate only)
Choose the DATEFORM at the system level in the DBS Control Record

  • Set the DATEFORM at the user level with the CREATE USER
  • Set the DATEFORM at the session level using SQL
  • Specify a format in a FORMAT phrase
  • modify general 14=0 /* integerdate (YY/MM/DD) */
  • modify general 14=1 /* ansidate (YYYY-MM-DD) */


create user dlm...
 dateform=ansidate
         =integerdate

set session dateform=ansidate
                    =integerdate

select date(format '----')

SQL Assistant may be affected from several settings. including OOL -> Options -> Data Format

Centrury-Break setting effects the system’s interpretation of dates only when FORMAT uses YY

If you use Centrury-Break setting of 40 (YY/MM/DD), ‘40/12/31’ become 1940/12/31 and ‘39/12/31’ become ‘2039/12/31’.

Time(n) n=0-6 (default is 6)
HH BYTEINT (1 byte)
MI BYTEINT (1 byte)
SS DEC(8,6) (4 bytes)
Time(0) HH:MI:SS CHAR(8)
Time(6) HH:MI:SS.nnnnnn CHAR(15)
  Time data types are imported and exported as CHARACTER data using FORMAT

-- setting timezone
modify general 16=n /* n=-12 to+13 */
modify general 17=n /* n=-59 to+59 */

create user dlm...
  timezone=local
          =null
          ='08:00'
          ='-04:30'

set time zone local
set time zone user
set time zone inteval '05:00' hour to minute

-- extract
select extract(timezone_hour from current_timestamp)
select extract(timezone_minute from current_timestamp)
13. Interval Data Types and Manipulations

select interval '05-13' year to month -- 6-01
select interval '03:12' hour to minute -- 3:12

select interval '02' year

select (date '2009-01-31' + interval '1' month) -- fail

select add_months(date '2009-01-31', 1) -- 2009-02-28

select (date '2009-01-31' - date '2010-02-15' year to month ) --  -1-01

select interval '1' year (interval month(4)) -- 12

14. Timestamps and Timezones (P.637)

Round with DATE
select round(cast('2017-10-02' as date), 'cc');
-- 2001-01-01
select round(cast('2017-10-02' as date), 'year');
-- 2018-01-01
select round(cast('2017-10-02' as date), 'month');
-- 2017-10-01
initcap

select initcap('software enginner');
-- Software Enginner
nvl & coalesce

select nvl(null, 1);
select coalesce(null, 1);
-- 1
DECODE

select decode(1, 1, '1st',
                 2, '2nd',
                 'other');

-- 1st
Tips

  • When using the TOP N function, unless there is an ORDER BY clause, the WITH TIES option is ignored
  • SHOW COLUMN is invalid, HELP COLUMN is valid
  • DATE and TIME are not ANSI starndard function
  • The FLOAT data type has 15 digits of precision, DECIMAL has 38 digits.
  • RANDOM function is a Teradata extension
  • A subquery cannot perform a SAMPLE, derived tables can perform a SAMPLE
  • Top N option is not supported in subquery
  • WITH TIES options is ignored without an ORDER BY option
  • A recursive queries without preventing infinite loop may dead-end on their own
  • Grand totals are always retrieved when using CUBE and ROLLUP
  • The format for integerdate is yy/mm/dd, ansidate is yyyy-mm-dd
  • The default for a timestamp data type is TIMESTAMP(6)
Backlog

  • Procedure


References