Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Learn how to query Apache Pinot using SQL or explore data using the web-based Pinot query console.
SET key1 = 'value1';
SET key2 = 123;
SELECT * FROM myTableSELECT * FROM myTable OPTION(key1=value1, key2=123)
SELECT * FROM myTable OPTION(key1=value1) OPTION(key2=123)
SELECT * FROM myTable OPTION(timeoutMs=30000)Table myTable:
id INTEGER
jsoncolumn JSON
Table data:
101,{"name":{"first":"daffy"\,"last":"duck"}\,"score":101\,"data":["a"\,"b"\,"c"\,"d"]}
102,{"name":{"first":"donald"\,"last":"duck"}\,"score":102\,"data":["a"\,"b"\,"e"\,"f"]}
103,{"name":{"first":"mickey"\,"last":"mouse"}\,"score":103\,"data":["a"\,"b"\,"g"\,"h"]}
104,{"name":{"first":"minnie"\,"last":"mouse"}\,"score":104\,"data":["a"\,"b"\,"i"\,"j"]}
105,{"name":{"first":"goofy"\,"last":"dwag"}\,"score":104\,"data":["a"\,"b"\,"i"\,"j"]}
106,{"person":{"name":"daffy duck"\,"companies":[{"name":"n1"\,"title":"t1"}\,{"name":"n2"\,"title":"t2"}]}}
107,{"person":{"name":"scrooge mcduck"\,"companies":[{"name":"n1"\,"title":"t1"}\,{"name":"n2"\,"title":"t2"}]}}SELECT id, jsoncolumn
FROM myTable<=JSON_MATCHJSON_EXTRACT_SCALAR><>=<=SELECT id,
json_extract_scalar(jsoncolumn, '$.name.last', 'STRING', 'null') last_name,
json_extract_scalar(jsoncolumn, '$.name.first', 'STRING', 'null') first_name
json_extract_scalar(jsoncolumn, '$.data[1]', 'STRING', 'null') value
FROM myTableSELECT id,
json_extract_scalar(jsoncolumn, '$.name.last', 'STRING', 'null') last_name,
json_extract_scalar(jsoncolumn, '$.name.first', 'STRING', 'null') first_name,
json_extract_scalar(jsoncolumn, '$.data[1]', 'STRING', 'null') value
FROM myTable
WHERE JSON_MATCH(jsoncolumn, '"$.data[1]" IS NOT NULL') SELECT json_extract_scalar(jsoncolumn, '$.name.last', 'STRING', 'null') last_name,
count(*)
FROM myTable
WHERE JSON_MATCH(jsoncolumn, '"$.data[1]" IS NOT NULL')
GROUP BY json_extract_scalar(jsoncolumn, '$.name.last', 'STRING', 'null')
ORDER BY 2 DESC SELECT json_extract_scalar(jsoncolumn, '$.name.last', 'STRING', 'null') last_name,
sum(json_extract_scalar(jsoncolumn, '$.id', 'INT', 0)) total
FROM myTable
WHERE JSON_MATCH(jsoncolumn, '"$.name.last" IS NOT NULL')
GROUP BY json_extract_scalar(jsoncolumn, '$.name.last', 'STRING', 'null') SELECT json_extract_scalar(jsoncolumn, '$.name.last', 'STRING', 'null') last_name,
sum(json_extract_scalar(jsoncolumn, '$.id', 'INT', 0)) total
FROM myTable
WHERE JSON_MATCH(jsoncolumn, '"$.name.last" IS NOT NULL') AND json_extract_scalar(jsoncolumn, '$.id', 'INT', 0) > 102
GROUP BY json_extract_scalar(jsoncolumn, '$.name.last', 'STRING', 'null') SELECT json_extract_scalar(jsoncolumn, '$.name.last', 'STRING', 'null') last_name,
json_extract_scalar(jsoncolumn, '$.id', 'INT', 0) total
FROM myTable
WHERE JSON_MATCH(jsoncolumn, '"$.data[*]" = ''f''')
GROUP BY json_extract_scalar(jsoncolumn, '$.name.last', 'STRING', 'null')data[0] IN ('k', 'j')"data[0]" IN ('k', 'j')"data[0]" IN (''k'', ''j'')'"data[0]" IN (''k'', ''j'')' WHERE JSON_MATCH(jsoncolumn, '"data[0]" IN (''k'', ''j'')')
Learn how to query Pinot using SQL
//default to limit 10
SELECT *
FROM myTable
SELECT *
FROM myTable
LIMIT 100SELECT "date", "timestamp"
FROM myTable SELECT COUNT(*), MAX(foo), SUM(bar)
FROM myTableSELECT MIN(foo), MAX(foo), SUM(foo), AVG(foo), bar, baz
FROM myTable
GROUP BY bar, baz
LIMIT 50SELECT MIN(foo), MAX(foo), SUM(foo), AVG(foo), bar, baz
FROM myTable
GROUP BY bar, baz
ORDER BY bar, MAX(foo) DESC
LIMIT 50SELECT COUNT(*)
FROM myTable
WHERE foo = 'foo'
AND bar BETWEEN 1 AND 20
OR (baz < 42 AND quux IN ('hello', 'goodbye') AND quuux NOT IN (42, 69))SELECT COUNT(*)
FROM myTable
WHERE foo IS NOT NULL
AND foo = 'foo'
AND bar BETWEEN 1 AND 20
OR (baz < 42 AND quux IN ('hello', 'goodbye') AND quuux NOT IN (42, 69))SELECT *
FROM myTable
WHERE quux < 5
LIMIT 50SELECT foo, bar
FROM myTable
WHERE baz > 20
ORDER BY bar DESC
LIMIT 100SELECT foo, bar
FROM myTable
WHERE baz > 20
ORDER BY bar DESC
LIMIT 50, 100SELECT COUNT(*)
FROM myTable
WHERE REGEXP_LIKE(airlineName, '^U.*')
GROUP BY airlineName LIMIT 10SELECT
CASE
WHEN price > 30 THEN 3
WHEN price > 20 THEN 2
WHEN price > 10 THEN 1
ELSE 0
END AS price_category
FROM myTableSELECT
SUM(
CASE
WHEN price > 30 THEN 30
WHEN price > 20 THEN 20
WHEN price > 10 THEN 10
ELSE 0
END) AS total_cost
FROM myTableSELECT COUNT(*)
FROM myTable
GROUP BY DATETIMECONVERT(timeColumnName, '1:MILLISECONDS:EPOCH', '1:HOURS:EPOCH', '1:HOURS')SELECT *
FROM myTable
WHERE UID = 'c8b3bce0b378fc5ce8067fc271a34892'select distinctCountThetaSketch(
sketchCol,
'nominalEntries=1024',
'country'=''USA'' AND 'state'=''CA'', 'device'=''mobile'', 'SET_INTERSECT($1, $2)'
)
from table
where country = 'USA' or device = 'mobile...' Pinot supports JOINs, including left, right, full, semi, anti, lateral, and equi JOINs. Use JOINs to connect two table to generate a unified view, based on a related column between the tables.
Operator column describes the operator that Pinot will run where as, the Operator_Id and Parent_Id columns show the parent-child relationship between operators. SELECT myTable.column1,myTable.column2,myOtherTable.column1,....
FROM mytable INNER JOIN table2
ON table1.matching_column = myOtherTable.matching_column;SELECT
p.userID, t.spending_val
FROM promotion AS p JOIN transaction AS t
ON p.userID = t.userID
WHERE
p.promotion_val > 10
AND t.transaction_type IN ('CASH', 'CREDIT')
AND t.transaction_epoch >= p.promotion_start_epoch
AND t.transaction_epoch < p.promotion_end_epoch SELECT myTable.column1,table1.column2,myOtherTable.column1,....
FROM myTable LEFT JOIN myOtherTable
ON myTable.matching_column = myOtherTable.matching_column;SELECT table1.column1,table1.column2,table2.column1,....
FROM table1
RIGHT JOIN table2
ON table1.matching_column = table2.matching_column;SELECT table1.column1,table1.column2,table2.column1,....
FROM table1
FULL JOIN table2
ON table1.matching_column = table2.matching_column;SELECT *
FROM table1
CROSS JOIN table2;SELECT myTable.column1, myOtherTable.column1
FROM myOtherTable
WHERE NOT EXISTS [ join_criteria ]SELECT *
FROM table1
JOIN table2
[ON (join_condition)]
OR
SELECT column_list
FROM table1, table2....
WHERE table1.column_name =
table2.column_name; EXPLAIN PLAN FOR SELECT playerID, playerName FROM baseballStats
+---------------------------------------------|------------|---------|
| Operator | Operator_Id|Parent_Id|
+---------------------------------------------|------------|---------|
|BROKER_REDUCE(limit:10) | 1 | 0 |
|COMBINE_SELECT | 2 | 1 |
|PLAN_START(numSegmentsForThisPlan:1) | -1 | -1 |
|SELECT(selectList:playerID, playerName) | 3 | 2 |
|TRANSFORM_PASSTHROUGH(playerID, playerName) | 4 | 3 |
|PROJECT(playerName, playerID) | 5 | 4 |
|DOC_ID_SET | 6 | 5 |
|FILTER_MATCH_ENTIRE_SEGMENT(docs:97889) | 7 | 6 |
+---------------------------------------------|------------|---------|BROKER_REDUCE(limit:10)
└── COMBINE_SELECT
└── PLAN_START(numSegmentsForThisPlan:1)
└── SELECT(selectList:playerID, playerName)
└── TRANSFORM_PASSTHROUGH(playerID, playerName)
└── PROJECT(playerName, playerID)
└── DOC_ID_SET
└── FILTER_MATCH_ENTIRE_SEGMENT(docs:97889)SET explainPlanVerbose=true;
EXPLAIN PLAN FOR
SELECT playerID, playerName
FROM baseballStats
WHERE playerID = 'aardsda01' AND playerName = 'David Allan'
BROKER_REDUCE(limit:10)
└── COMBINE_SELECT
└── PLAN_START(numSegmentsForThisPlan:1)
└── SELECT(selectList:playerID, playerName)
└── TRANSFORM_PASSTHROUGH(playerID, playerName)
└── PROJECT(playerName, playerID)
└── DOC_ID_SET
└── FILTER_AND
├── FILTER_INVERTED_INDEX(indexLookUp:inverted_index,operator:EQ,predicate:playerID = 'aardsda01')
└── FILTER_FULL_SCAN(operator:EQ,predicate:playerName = 'David Allan')
└── PLAN_START(numSegmentsForThisPlan:1)
└── SELECT(selectList:playerID, playerName)
└── TRANSFORM_PASSTHROUGH(playerID, playerName)
└── PROJECT(playerName, playerID)
└── DOC_ID_SET
└── FILTER_EMPTYEXPLAIN PLAN FOR
SELECT playerID, count(*)
FROM baseballStats
WHERE playerID != 'aardsda01'
GROUP BY playerID
BROKER_REDUCE(limit:10)
└── COMBINE_GROUPBY_ORDERBY
└── PLAN_START(numSegmentsForThisPlan:1)
└── AGGREGATE_GROUPBY_ORDERBY(groupKeys:playerID, aggregations:count(*))
└── TRANORM_PASSTHROUGH(playerID)
└── PROJECT(playerID)
└── DOC_ID_SET
└── FILTER_INVERTED_INDEX(indexLookUp:inverted_index,operator:NOT_EQ,predicate:playerID != 'aardsda01'){
"tableName": "myTable",
"tableType": "OFFLINE",
"query" : {
"disableGroovy": false
}
}<dependency>
<groupId>org.apache.pinot</groupId>
<artifactId>pinot-common</artifactId>
<version>0.5.0</version>
</dependency>include 'org.apache.pinot:pinot-common:0.5.0'//Example Scalar function
@ScalarFunction
static String mySubStr(String input, Integer beginIndex) {
return input.substring(beginIndex);
}SELECT mysubstr(playerName, 4)
FROM baseballStatsSELECT *
FROM ...
OPTION(minSegmentGroupTrimSize=<minSegmentGroupTrimSize>)SELECT *
FROM ...
OPTION(minServerGroupTrimSize=<minServerGroupTrimSize>)SELECT SUM(colA)
FROM myTable
GROUP BY colB
HAVING SUM(colA) < 100
ORDER BY SUM(colA) DESC
LIMIT 10Learn how to look up IDs in a list of values. Filtering with IdSet is only supported with the single-stage query engine (v1).
SELECT ID_SET(yearID)
FROM baseballStats
WHERE teamID = 'WS1'SELECT ID_SET(playerName, 'expectedInsertions=10')
FROM baseballStats
WHERE teamID = 'WS1'SELECT ID_SET(playerName, 'expectedInsertions=100')
FROM baseballStats
WHERE teamID = 'WS1'SELECT ID_SET(playerName, 'expectedInsertions=100;fpp=0.01')
FROM baseballStats
WHERE teamID = 'WS1'SELECT yearID, count(*)
FROM baseballStats
WHERE IN_ID_SET(
yearID,
'ATowAAABAAAAAAA7ABAAAABtB24HbwdwB3EHcgdzB3QHdQd2B3cHeAd5B3oHewd8B30Hfgd/B4AHgQeCB4MHhAeFB4YHhweIB4kHigeLB4wHjQeOB48HkAeRB5IHkweUB5UHlgeXB5gHmQeaB5sHnAedB54HnwegB6EHogejB6QHpQemB6cHqAc='
) = 1
GROUP BY yearIDSELECT yearID, count(*)
FROM baseballStats
WHERE IN_ID_SET(
yearID,
'ATowAAABAAAAAAA7ABAAAABtB24HbwdwB3EHcgdzB3QHdQd2B3cHeAd5B3oHewd8B30Hfgd/B4AHgQeCB4MHhAeFB4YHhweIB4kHigeLB4wHjQeOB48HkAeRB5IHkweUB5UHlgeXB5gHmQeaB5sHnAedB54HnwegB6EHogejB6QHpQemB6cHqAc='
) = 0
GROUP BY yearIDSELECT yearID, count(*)
FROM baseballStats
WHERE IN_SUBQUERY(
yearID,
'SELECT ID_SET(yearID) FROM baseballStats WHERE teamID = ''WS1'''
) = 1
GROUP BY yearID SELECT yearID, count(*)
FROM baseballStats
WHERE IN_SUBQUERY(
yearID,
'SELECT ID_SET(yearID) FROM baseballStats WHERE teamID = ''WS1'''
) = 0
GROUP BY yearID SELECT yearID, count(*)
FROM baseballStats
WHERE IN_PARTITIONED_SUBQUERY(
yearID,
'SELECT ID_SET(yearID) FROM baseballStats WHERE teamID = ''WS1'''
) = 1
GROUP BY yearID SELECT yearID, count(*)
FROM baseballStats
WHERE IN_PARTITIONED_SUBQUERY(
yearID,
'SELECT ID_SET(yearID) FROM baseballStats WHERE teamID = ''WS1'''
) = 0
GROUP BY yearID Lookup UDF Join is only supported with the single-stage query engine (v1). For more information about using JOINs with the multi-stage query engine, see JOINs.
lookupUDFSpec:
LOOKUP
'('
'''dimTable'''
'''dimColToLookup'''
[ '''dimJoinKey''', factJoinKey ]*
')'SELECT
playerName,
teamID,
LOOKUP('dimBaseballTeams', 'teamName', 'teamID', teamID) AS teamName,
LOOKUP('dimBaseballTeams', 'teamAddress', 'teamID', teamID) AS teamAddress
FROM baseballStats SELECT
teamID,
teamName AS nameFromLocal,
LOOKUP('dimBaseballTeams', 'teamName', 'teamID', teamID) AS nameFromLookup
FROM dimBaseballTeamsselect
customerId,
missedPayment,
LOOKUP('billing', 'city', 'customerId', customerId, 'creditHistory', creditHistory) AS lookedupCity
from billingUse window functions to compute averages, sort, rank, or count items, calculate sums, and find minimum or maximum values.
SELECT SUM(COL1) FILTER (WHERE COL2 > 300),
AVG(COL2) FILTER (WHERE COL2 < 50)
FROM MyTable WHERE COL3 > 50SELECT SUM(COL1) FILTER (WHERE COL1 IS NOT NULL)
FROM MyTable WHERE COL3 > 50SELECT FUNC(column1) OVER (PARTITION BY column2 ORDER BY column3 ROWS 2 PRECEDING)
FROM tableName
WHERE filter_clause SELECT customer_id, payment_date, amount, SUM(amount) OVER(PARTITION BY customer_id ORDER BY payment_date) from payment;SELECT customer_id, payment_date, amount, MIN(amount) OVER(PARTITION BY customer_id) from payment;SELECT customer_id, payment_date, amount, AVG(amount) OVER(PARTITION BY customer_id) from payment;SELECT ROW_NUMBER() OVER(ORDER BY SalesYTD DESC) AS Row,
FirstName, LastName AS "Total sales YTD"
FROM Sales.vSalesPerson; SELECT customer_id, payment_date, amount, count(amount) OVER(PARTITION BY customer_id) from payment;SELECT time_col, SUM(status) AS occupied_slots_count
FROM (
SELECT GAPFILL(time_col,'1:MILLISECONDS:SIMPLE_DATE_FORMAT:yyyy-MM-dd HH:mm:ss.SSS','2021-10-01 09:00:00.000',
'2021-10-01 12:00:00.000','30:MINUTES', FILL(status, 'FILL_PREVIOUS_VALUE'),
TIMESERIESON(lotId)), lotId, status
FROM (
SELECT DATETIMECONVERT(event_time,'1:MILLISECONDS:EPOCH',
'1:MILLISECONDS:SIMPLE_DATE_FORMAT:yyyy-MM-dd HH:mm:ss.SSS','30:MINUTES') AS time_col,
lotId, lastWithTime(is_occupied, event_time, 'INT') AS status
FROM parking_data
WHERE event_time >= 1633078800000 AND event_time <= 1633089600000
GROUP BY 1, 2
ORDER BY 1
LIMIT 100)
LIMIT 100)
GROUP BY 1
LIMIT 100SELECT GAPFILL(DATETIMECONVERT(event_time,'1:MILLISECONDS:EPOCH',
'1:MILLISECONDS:SIMPLE_DATE_FORMAT:yyyy-MM-dd HH:mm:ss.SSS','30:MINUTES'),
'1:MILLISECONDS:SIMPLE_DATE_FORMAT:yyyy-MM-dd HH:mm:ss.SSS','2021-10-01 09:00:00.000',
'2021-10-01 12:00:00.000','30:MINUTES', FILL(is_occupied, 'FILL_PREVIOUS_VALUE'),
TIMESERIESON(lotId)) AS time_col, lotId, is_occupied
FROM parking_data
WHERE event_time >= 1633078800000 AND event_time <= 1633089600000
ORDER BY 1
LIMIT 100SELECT GAPFILL(time_col,'1:MILLISECONDS:SIMPLE_DATE_FORMAT:yyyy-MM-dd HH:mm:ss.SSS','2021-10-01 09:00:00.000',
'2021-10-01 12:00:00.000','30:MINUTES', FILL(status, 'FILL_PREVIOUS_VALUE'),
TIMESERIESON(lotId)), lotId, status
FROM (
SELECT DATETIMECONVERT(event_time,'1:MILLISECONDS:EPOCH',
'1:MILLISECONDS:SIMPLE_DATE_FORMAT:yyyy-MM-dd HH:mm:ss.SSS','30:MINUTES') AS time_col,
lotId, lastWithTime(is_occupied, event_time, 'INT') AS status
FROM parking_data
WHERE event_time >= 1633078800000 AND event_time <= 1633089600000
GROUP BY 1, 2
ORDER BY 1
LIMIT 100)
LIMIT 100SELECT time_col, SUM(is_occupied) AS occupied_slots_count
FROM (
SELECT GAPFILL(DATETIMECONVERT(event_time,'1:MILLISECONDS:EPOCH',
'1:MILLISECONDS:SIMPLE_DATE_FORMAT:yyyy-MM-dd HH:mm:ss.SSS','30:MINUTES'),
'1:MILLISECONDS:SIMPLE_DATE_FORMAT:yyyy-MM-dd HH:mm:ss.SSS','2021-10-01 09:00:00.000',
'2021-10-01 12:00:00.000','30:MINUTES', FILL(is_occupied, 'FILL_PREVIOUS_VALUE'),
TIMESERIESON(lotId)) AS time_col, lotId, is_occupied
FROM parking_data
WHERE event_time >= 1633078800000 AND event_time <= 1633089600000
ORDER BY 1
LIMIT 100)
GROUP BY 1
LIMIT 100