Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
EXPLAIN PLAN FOR SELECT playerID, playerName FROM baseballStats
+---------------------------------------------|------------|---------|
| Operator | Operator_Id|Parent_Id|
+---------------------------------------------|------------|---------|
|BROKER_REDUCE(limit:10) | 1 | 0 |
|COMBINE_SELECT | 2 | 1 |
|PLAN_START(numSegmentsForThisPlan:1) | -1 | -1 |
|SELECT(selectList:playerID, playerName) | 3 | 2 |
|TRANSFORM_PASSTHROUGH(playerID, playerName) | 4 | 3 |
|PROJECT(playerName, playerID) | 5 | 4 |
|DOC_ID_SET | 6 | 5 |
|FILTER_MATCH_ENTIRE_SEGMENT(docs:97889) | 7 | 6 |
+---------------------------------------------|------------|---------|PLAN_START(numSegmentsForThisPlan:1)minmaxTRANSFORM_PASSTHROUGHTRANSFORM_PASSTHROUGHDOC_ID_SETFILTER_SORTED_INDEXFILTER_RANGE_INDEXFILTER_INVERTED_INDEXFILTER_JSON_INDEXBROKER_REDUCE(limit:10)
└── COMBINE_SELECT
└── PLAN_START(numSegmentsForThisPlan:1)
└── SELECT(selectList:playerID, playerName)
└── TRANSFORM_PASSTHROUGH(playerID, playerName)
└── PROJECT(playerName, playerID)
└── DOC_ID_SET
└── FILTER_MATCH_ENTIRE_SEGMENT(docs:97889)SET explainPlanVerbose=true;
EXPLAIN PLAN FOR
SELECT playerID, playerName
FROM baseballStats
WHERE playerID = 'aardsda01' AND playerName = 'David Allan'
BROKER_REDUCE(limit:10)
└── COMBINE_SELECT
└── PLAN_START(numSegmentsForThisPlan:1)
└── SELECT(selectList:playerID, playerName)
└── TRANSFORM_PASSTHROUGH(playerID, playerName)
└── PROJECT(playerName, playerID)
└── DOC_ID_SET
└── FILTER_AND
├── FILTER_INVERTED_INDEX(indexLookUp:inverted_index,operator:EQ,predicate:playerID = 'aardsda01')
└── FILTER_FULL_SCAN(operator:EQ,predicate:playerName = 'David Allan')
└── PLAN_START(numSegmentsForThisPlan:1)
└── SELECT(selectList:playerID, playerName)
└── TRANSFORM_PASSTHROUGH(playerID, playerName)
└── PROJECT(playerName, playerID)
└── DOC_ID_SET
└── FILTER_EMPTYEXPLAIN PLAN FOR
SELECT playerID, count(*)
FROM baseballStats
WHERE playerID != 'aardsda01'
GROUP BY playerID
BROKER_REDUCE(limit:10)
└── COMBINE_GROUPBY_ORDERBY
└── PLAN_START(numSegmentsForThisPlan:1)
└── AGGREGATE_GROUPBY_ORDERBY(groupKeys:playerID, aggregations:count(*))
└── TRANORM_PASSTHROUGH(playerID)
└── PROJECT(playerID)
└── DOC_ID_SET
└── FILTER_INVERTED_INDEX(indexLookUp:inverted_index,operator:NOT_EQ,predicate:playerID != 'aardsda01')Query Pinot using supported syntax.
select distinctCountThetaSketch(
sketchCol,
'nominalEntries=1024',
'country'=''USA'' AND 'state'=''CA'', 'device'=''mobile'', 'SET_INTERSECT($1, $2)'
)
from table
where country = 'USA' or device = 'mobile...' EXPLAIN PLAN FOR
select
P_BRAND1, sum(LO_REVENUE)
from ssb_lineorder_1, ssb_part_1
where LO_PARTKEY = P_PARTKEY
and P_CATEGORY = 'MFGR#12'
group by P_BRAND1+-----------------------------------|-------------------------------------------------------------|
| SQL#$%0 |PLAN#$%1 |
+-----------------------------------|-------------------------------------------------------------|
|"EXPLAIN PLAN FOR |"Execution Plan |
|select |LogicalAggregate(group=[{0}], agg#0=[$SUM0($1)]) |
| P_BRAND1, sum(LO_REVENUE) | PinotLogicalExchange(distribution=[hash[0]]) |
|from ssb_lineorder_1, ssb_part_1 | LogicalAggregate(group=[{2}], agg#0=[$SUM0($1)]) |
|where LO_PARTKEY = P_PARTKEY | LogicalJoin(condition=[=($0, $3)], joinType=[inner]) |
| and P_CATEGORY = 'MFGR#12' | PinotLogicalExchange(distribution=[hash[0]]) |
|group by P_BRAND1 | LogicalProject(LO_PARTKEY=[$12], LO_REVENUE=[$14]) |
| and P_CATEGORY = 'MFGR#12' | LogicalTableScan(table=[[ssb_lineorder_1]]) |
|" | PinotLogicalExchange(distribution=[hash[1]]) |
| | LogicalProject(P_BRAND1=[$3], P_PARTKEY=[$9]) |
| | LogicalFilter(condition=[=($4, 'MFGR#12')]) |
| | LogicalTableScan(table=[[ssb_part_1]]) |
| |" |
+-----------------------------------|-------------------------------------------------------------|+-----------------------------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------|
| SQL#$%0 |PLAN#$%1 |
+-----------------------------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------|
|"EXPLAIN IMPLEMENTATION PLAN FOR |[0]@local:8843 MAIL_RECEIVE(BROADCAST_DISTRIBUTED) |
|select |├── [1]@local:8432 MAIL_SEND(BROADCAST_DISTRIBUTED)->{[0]@local@{8843,8843}|[0]} (Subtree Omitted) |
| P_BRAND1, sum(LO_REVENUE) |├── [1]@local:8432 MAIL_SEND(BROADCAST_DISTRIBUTED)->{[0]@local@{8843,8843}|[0]} (Subtree Omitted) |
|from ssb_lineorder_1, ssb_part_1 |└── [1]@local:8432 MAIL_SEND(BROADCAST_DISTRIBUTED)->{[0]@local@{8843,8843}|[0]} |
|where LO_PARTKEY = P_PARTKEY | └── [1]@local:8432 AGGREGATE_FINAL |
| and P_CATEGORY = 'MFGR#12' | └── [1]@local:8432 MAIL_RECEIVE(HASH_DISTRIBUTED) |
|group by P_BRAND1 | ├── [2]@local:8432 MAIL_SEND(HASH_DISTRIBUTED)->{[1]@local@{8432,8843}|[1],[1]@local@{8432,8843}|[2],[1]@local@{8432,8843}|[0]} (Subtree Omitted) |
| and P_CATEGORY = 'MFGR#12' | ├── [2]@local:8432 MAIL_SEND(HASH_DISTRIBUTED)->{[1]@local@{8432,8843}|[1],[1]@local@{8432,8843}|[2],[1]@local@{8432,8843}|[0]} (Subtree Omitted) |
|" | └── [2]@local:8432 MAIL_SEND(HASH_DISTRIBUTED)->{[1]@local@{8432,8843}|[1],[1]@local@{8432,8843}|[2],[1]@local@{8432,8843}|[0]} |
| | └── [2]@local:8432 AGGREGATE_LEAF |
| | └── [2]@local:8432 JOIN |
| | ├── [2]@local:8432 MAIL_RECEIVE(HASH_DISTRIBUTED) |
| | │ ├── [3]@local:8432 MAIL_SEND(HASH_DISTRIBUTED)->{[2]@local@{8432,8843}|[1],[2]@local@{8432,8843}|[2],[2]@local@{8432,8843}|[0]} |
| | │ │ └── [3]@local:8432 PROJECT |
| | │ │ └── [3]@local:8432 TABLE SCAN (ssb_lineorder_1) null |
| | │ ├── [3]@local:8432 MAIL_SEND(HASH_DISTRIBUTED)->{[2]@local@{8432,8843}|[1],[2]@local@{8432,8843}|[2],[2]@local@{8432,8843}|[0]} |
| | │ │ └── [3]@local:8432 PROJECT |
| | │ │ └── [3]@local:8432 TABLE SCAN (ssb_lineorder_1) null |
| | │ └── [3]@local:8432 MAIL_SEND(HASH_DISTRIBUTED)->{[2]@local@{8432,8843}|[1],[2]@local@{8432,8843}|[2],[2]@local@{8432,8843}|[0]} |
| | │ └── [3]@local:8432 PROJECT |
| | │ └── [3]@local:8432 TABLE SCAN (ssb_lineorder_1) null |
| | └── [2]@local:8432 MAIL_RECEIVE(HASH_DISTRIBUTED) |
| | ├── [4]@local:8432 MAIL_SEND(HASH_DISTRIBUTED)->{[2]@local@{8432,8843}|[1],[2]@local@{8432,8843}|[2],[2]@local@{8432,8843}|[0]} |
| | │ └── [4]@local:8432 PROJECT |
| | │ └── [4]@local:8432 FILTER |
| | │ └── [4]@local:8432 TABLE SCAN (ssb_part_1) null |
| | ├── [4]@local:8432 MAIL_SEND(HASH_DISTRIBUTED)->{[2]@local@{8432,8843}|[1],[2]@local@{8432,8843}|[2],[2]@local@{8432,8843}|[0]} |
| | │ └── [4]@local:8432 PROJECT |
| | │ └── [4]@local:8432 FILTER |
| | │ └── [4]@local:8432 TABLE SCAN (ssb_part_1) null |
| | └── [4]@local:8432 MAIL_SEND(HASH_DISTRIBUTED)->{[2]@local@{8432,8843}|[1],[2]@local@{8432,8843}|[2],[2]@local@{8432,8843}|[0]} |
| | └── [4]@local:8432 PROJECT |
| | └── [4]@local:8432 FILTER |
| | └── [4]@local:8432 TABLE SCAN (ssb_part_1) null |
+-----------------------------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------|
Aggregate functions return a single result for a group of rows.
SELECT *
FROM ...
OPTION(minSegmentGroupTrimSize=<minSegmentGroupTrimSize>)SELECT *
FROM ...
OPTION(minServerGroupTrimSize=<minServerGroupTrimSize>)SELECT *
FROM ...
OPTION(groupTrimThreshold=<groupTrimThreshold>)SELECT SUM(colA)
FROM myTable
GROUP BY colB
HAVING SUM(colA) < 100
ORDER BY SUM(colA) DESC
LIMIT 10Learn how to write fast queries for looking up IDs in a list of values.
Pinot supports JOINs, including left, right, full, semi, anti, lateral, and equi JOINs. Use JOINs to connect two table to generate a unified view, based on a related column between the tables.
SELECT SUM(COL1) FILTER (WHERE COL2 > 300),
AVG(COL2) FILTER (WHERE COL2 < 50)
FROM MyTable WHERE COL3 > 50SELECT SUM(COL1) FILTER (WHERE COL1 IS NOT NULL)
FROM MyTable WHERE COL3 > 50SELECT ID_SET(yearID)
FROM baseballStats
WHERE teamID = 'WS1'SELECT ID_SET(playerName, 'expectedInsertions=10')
FROM baseballStats
WHERE teamID = 'WS1'SELECT ID_SET(playerName, 'expectedInsertions=100')
FROM baseballStats
WHERE teamID = 'WS1'SELECT ID_SET(playerName, 'expectedInsertions=100;fpp=0.01')
FROM baseballStats
WHERE teamID = 'WS1'SELECT yearID, count(*)
FROM baseballStats
WHERE IN_ID_SET(
yearID,
'ATowAAABAAAAAAA7ABAAAABtB24HbwdwB3EHcgdzB3QHdQd2B3cHeAd5B3oHewd8B30Hfgd/B4AHgQeCB4MHhAeFB4YHhweIB4kHigeLB4wHjQeOB48HkAeRB5IHkweUB5UHlgeXB5gHmQeaB5sHnAedB54HnwegB6EHogejB6QHpQemB6cHqAc='
) = 1
GROUP BY yearIDSELECT yearID, count(*)
FROM baseballStats
WHERE IN_ID_SET(
yearID,
'ATowAAABAAAAAAA7ABAAAABtB24HbwdwB3EHcgdzB3QHdQd2B3cHeAd5B3oHewd8B30Hfgd/B4AHgQeCB4MHhAeFB4YHhweIB4kHigeLB4wHjQeOB48HkAeRB5IHkweUB5UHlgeXB5gHmQeaB5sHnAedB54HnwegB6EHogejB6QHpQemB6cHqAc='
) = 0
GROUP BY yearIDSELECT yearID, count(*)
FROM baseballStats
WHERE IN_SUBQUERY(
yearID,
'SELECT ID_SET(yearID) FROM baseballStats WHERE teamID = ''WS1'''
) = 1
GROUP BY yearID SELECT yearID, count(*)
FROM baseballStats
WHERE IN_SUBQUERY(
yearID,
'SELECT ID_SET(yearID) FROM baseballStats WHERE teamID = ''WS1'''
) = 0
GROUP BY yearID SELECT yearID, count(*)
FROM baseballStats
WHERE IN_PARTITIONED_SUBQUERY(
yearID,
'SELECT ID_SET(yearID) FROM baseballStats WHERE teamID = ''WS1'''
) = 1
GROUP BY yearID SELECT yearID, count(*)
FROM baseballStats
WHERE IN_PARTITIONED_SUBQUERY(
yearID,
'SELECT ID_SET(yearID) FROM baseballStats WHERE teamID = ''WS1'''
) = 0
GROUP BY yearID SELECT myTable.column1,myTable.column2,myOtherTable.column1,....
FROM mytable INNER JOIN table2
ON table1.matching_column = myOtherTable.matching_column;SELECT
p.userID, t.spending_val
FROM promotion AS p JOIN transaction AS t
ON p.userID = t.userID
WHERE
p.promotion_val > 10
AND t.transaction_type IN ('CASH', 'CREDIT')
AND t.transaction_epoch >= p.promotion_start_epoch
AND t.transaction_epoch < p.promotion_end_epoch SELECT myTable.column1,table1.column2,myOtherTable.column1,....
FROM myTable LEFT JOIN myOtherTable
ON myTable.matching_column = myOtherTable.matching_column;SELECT table1.column1,table1.column2,table2.column1,....
FROM table1
RIGHT JOIN table2
ON table1.matching_column = table2.matching_column;SELECT table1.column1,table1.column2,table2.column1,....
FROM table1
FULL JOIN table2
ON table1.matching_column = table2.matching_column;SELECT *
FROM table1
CROSS JOIN table2;SELECT myTable.column1, myOtherTable.column1
FROM myOtherTable
WHERE NOT EXISTS [ join_criteria ]SELECT *
FROM table1
JOIN table2
[ON (join_condition)]
OR
SELECT column_list
FROM table1, table2....
WHERE table1.column_name =
table2.column_name; For more information about using JOINs with the multi-stage query engine, see JOINs.
lookupUDFSpec:
LOOKUP
'('
'''dimTable'''
'''dimColToLookup'''
[ '''dimJoinKey''', factJoinKey ]*
')'SELECT
playerName,
teamID,
LOOKUP('dimBaseballTeams', 'teamName', 'teamID', teamID) AS teamName,
LOOKUP('dimBaseballTeams', 'teamAddress', 'teamID', teamID) AS teamAddress
FROM baseballStats SELECT
teamID,
teamName AS nameFromLocal,
LOOKUP('dimBaseballTeams', 'teamName', 'teamID', teamID) AS nameFromLookup
FROM dimBaseballTeamsselect
customerId,
missedPayment,
LOOKUP('billing', 'city', 'customerId', customerId, 'creditHistory', creditHistory) AS lookedupCity
from billingTable myTable:
id INTEGER
jsoncolumn JSON
Table data:
101,{"name":{"first":"daffy"\,"last":"duck"}\,"score":101\,"data":["a"\,"b"\,"c"\,"d"]}
102,{"name":{"first":"donald"\,"last":"duck"}\,"score":102\,"data":["a"\,"b"\,"e"\,"f"]}
103,{"name":{"first":"mickey"\,"last":"mouse"}\,"score":103\,"data":["a"\,"b"\,"g"\,"h"]}
104,{"name":{"first":"minnie"\,"last":"mouse"}\,"score":104\,"data":["a"\,"b"\,"i"\,"j"]}
105,{"name":{"first":"goofy"\,"last":"dwag"}\,"score":104\,"data":["a"\,"b"\,"i"\,"j"]}
106,{"person":{"name":"daffy duck"\,"companies":[{"name":"n1"\,"title":"t1"}\,{"name":"n2"\,"title":"t2"}]}}
107,{"person":{"name":"scrooge mcduck"\,"companies":[{"name":"n1"\,"title":"t1"}\,{"name":"n2"\,"title":"t2"}]}}SELECT id, jsoncolumn
FROM myTable<=JSON_MATCHJSON_EXTRACT_SCALAR><>=<=SELECT id,
json_extract_scalar(jsoncolumn, '$.name.last', 'STRING', 'null') last_name,
json_extract_scalar(jsoncolumn, '$.name.first', 'STRING', 'null') first_name
json_extract_scalar(jsoncolumn, '$.data[1]', 'STRING', 'null') value
FROM myTableSELECT id,
json_extract_scalar(jsoncolumn, '$.name.last', 'STRING', 'null') last_name,
json_extract_scalar(jsoncolumn, '$.name.first', 'STRING', 'null') first_name,
json_extract_scalar(jsoncolumn, '$.data[1]', 'STRING', 'null') value
FROM myTable
WHERE JSON_MATCH(jsoncolumn, '"$.data[1]" IS NOT NULL') SELECT json_extract_scalar(jsoncolumn, '$.name.last', 'STRING', 'null') last_name,
count(*)
FROM myTable
WHERE JSON_MATCH(jsoncolumn, '"$.data[1]" IS NOT NULL')
GROUP BY json_extract_scalar(jsoncolumn, '$.name.last', 'STRING', 'null')
ORDER BY 2 DESC SELECT json_extract_scalar(jsoncolumn, '$.name.last', 'STRING', 'null') last_name,
sum(json_extract_scalar(jsoncolumn, '$.id', 'INT', 0)) total
FROM myTable
WHERE JSON_MATCH(jsoncolumn, '"$.name.last" IS NOT NULL')
GROUP BY json_extract_scalar(jsoncolumn, '$.name.last', 'STRING', 'null') SELECT json_extract_scalar(jsoncolumn, '$.name.last', 'STRING', 'null') last_name,
sum(json_extract_scalar(jsoncolumn, '$.id', 'INT', 0)) total
FROM myTable
WHERE JSON_MATCH(jsoncolumn, '"$.name.last" IS NOT NULL') AND json_extract_scalar(jsoncolumn, '$.id', 'INT', 0) > 102
GROUP BY json_extract_scalar(jsoncolumn, '$.name.last', 'STRING', 'null') SELECT json_extract_scalar(jsoncolumn, '$.name.last', 'STRING', 'null') last_name,
json_extract_scalar(jsoncolumn, '$.id', 'INT', 0) total
FROM myTable
WHERE JSON_MATCH(jsoncolumn, '"$.data[*]" = ''f''')
GROUP BY json_extract_scalar(jsoncolumn, '$.name.last', 'STRING', 'null')data[0] IN ('k', 'j')"data[0]" IN ('k', 'j')"data[0]" IN (''k'', ''j'')'"data[0]" IN (''k'', ''j'')' WHERE JSON_MATCH(jsoncolumn, '"data[0]" IN (''k'', ''j'')')This document contains the list of all the transformation functions supported by Pinot SQL.
SELECT time_col, SUM(status) AS occupied_slots_count
FROM (
SELECT GAPFILL(time_col,'1:MILLISECONDS:SIMPLE_DATE_FORMAT:yyyy-MM-dd HH:mm:ss.SSS','2021-10-01 09:00:00.000',
'2021-10-01 12:00:00.000','30:MINUTES', FILL(status, 'FILL_PREVIOUS_VALUE'),
TIMESERIESON(lotId)), lotId, status
FROM (
SELECT DATETIMECONVERT(event_time,'1:MILLISECONDS:EPOCH',
'1:MILLISECONDS:SIMPLE_DATE_FORMAT:yyyy-MM-dd HH:mm:ss.SSS','30:MINUTES') AS time_col,
lotId, lastWithTime(is_occupied, event_time, 'INT') AS status
FROM parking_data
WHERE event_time >= 1633078800000 AND event_time <= 1633089600000
GROUP BY 1, 2
ORDER BY 1
LIMIT 100)
LIMIT 100)
GROUP BY 1
LIMIT 100SELECT GAPFILL(DATETIMECONVERT(event_time,'1:MILLISECONDS:EPOCH',
'1:MILLISECONDS:SIMPLE_DATE_FORMAT:yyyy-MM-dd HH:mm:ss.SSS','30:MINUTES'),
'1:MILLISECONDS:SIMPLE_DATE_FORMAT:yyyy-MM-dd HH:mm:ss.SSS','2021-10-01 09:00:00.000',
'2021-10-01 12:00:00.000','30:MINUTES', FILL(is_occupied, 'FILL_PREVIOUS_VALUE'),
TIMESERIESON(lotId)) AS time_col, lotId, is_occupied
FROM parking_data
WHERE event_time >= 1633078800000 AND event_time <= 1633089600000
ORDER BY 1
LIMIT 100SELECT GAPFILL(time_col,'1:MILLISECONDS:SIMPLE_DATE_FORMAT:yyyy-MM-dd HH:mm:ss.SSS','2021-10-01 09:00:00.000',
'2021-10-01 12:00:00.000','30:MINUTES', FILL(status, 'FILL_PREVIOUS_VALUE'),
TIMESERIESON(lotId)), lotId, status
FROM (
SELECT DATETIMECONVERT(event_time,'1:MILLISECONDS:EPOCH',
'1:MILLISECONDS:SIMPLE_DATE_FORMAT:yyyy-MM-dd HH:mm:ss.SSS','30:MINUTES') AS time_col,
lotId, lastWithTime(is_occupied, event_time, 'INT') AS status
FROM parking_data
WHERE event_time >= 1633078800000 AND event_time <= 1633089600000
GROUP BY 1, 2
ORDER BY 1
LIMIT 100)
LIMIT 100SELECT time_col, SUM(is_occupied) AS occupied_slots_count
FROM (
SELECT GAPFILL(DATETIMECONVERT(event_time,'1:MILLISECONDS:EPOCH',
'1:MILLISECONDS:SIMPLE_DATE_FORMAT:yyyy-MM-dd HH:mm:ss.SSS','30:MINUTES'),
'1:MILLISECONDS:SIMPLE_DATE_FORMAT:yyyy-MM-dd HH:mm:ss.SSS','2021-10-01 09:00:00.000',
'2021-10-01 12:00:00.000','30:MINUTES', FILL(is_occupied, 'FILL_PREVIOUS_VALUE'),
TIMESERIESON(lotId)) AS time_col, lotId, is_occupied
FROM parking_data
WHERE event_time >= 1633078800000 AND event_time <= 1633089600000
ORDER BY 1
LIMIT 100)
GROUP BY 1
LIMIT 100Use window aggregate to compute averages, sort, rank, or count items, calculate sums, and find minimum or maximum values across window.
windowedAggCall:
windowAggFunction
OVER
window
windowAggFunction:
agg '(' [ ALL | DISTINCT ] value [, value ]* ')'
|
agg '(' '*' ')'
window:
'('
[ PARTITION BY expression [, expression ]* ]
[ ORDER BY orderItem [, orderItem ]* ]
[
RANGE numericOrIntervalExpression { PRECEDING | FOLLOWING }
| ROWS numericExpression { PRECEDING | FOLLOWING }
]
')'SELECT FUNC(column1) OVER (PARTITION BY column2 ORDER BY column3)
FROM tableName
WHERE filter_clause SELECT customer_id, payment_date, amount, SUM(amount) OVER(PARTITION BY customer_id ORDER BY payment_date) from payment;SELECT customer_id, payment_date, amount, MIN(amount) OVER(PARTITION BY customer_id) from payment;SELECT customer_id, payment_date, amount, AVG(amount) OVER(PARTITION BY customer_id) from payment;SELECT ROW_NUMBER() OVER(ORDER BY SalesYTD DESC) AS Row,
FirstName, LastName AS "Total sales YTD"
FROM Sales.vSalesPerson; SELECT customer_id, payment_date, amount, count(amount) OVER(PARTITION BY customer_id) from payment;