Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Learn how to query Apache Pinot using SQL or explore data using the web-based Pinot query console.
Learn how to query Pinot using SQL
//default to limit 10
SELECT *
FROM myTable
SELECT *
FROM myTable
LIMIT 100SELECT "date", "timestamp"
FROM myTable SELECT COUNT(*), MAX(foo), SUM(bar)
FROM myTableSELECT MIN(foo), MAX(foo), SUM(foo), AVG(foo), bar, baz
FROM myTable
GROUP BY bar, baz
LIMIT 50SELECT MIN(foo), MAX(foo), SUM(foo), AVG(foo), bar, baz
FROM myTable
GROUP BY bar, baz
ORDER BY bar, MAX(foo) DESC
LIMIT 50SELECT COUNT(*)
FROM myTable
WHERE foo = 'foo'
AND bar BETWEEN 1 AND 20
OR (baz < 42 AND quux IN ('hello', 'goodbye') AND quuux NOT IN (42, 69))SELECT COUNT(*)
FROM myTable
WHERE foo IS NOT NULL
AND foo = 'foo'
AND bar BETWEEN 1 AND 20
OR (baz < 42 AND quux IN ('hello', 'goodbye') AND quuux NOT IN (42, 69))SELECT *
FROM myTable
WHERE quux < 5
LIMIT 50SELECT foo, bar
FROM myTable
WHERE baz > 20
ORDER BY bar DESC
LIMIT 100SELECT foo, bar
FROM myTable
WHERE baz > 20
ORDER BY bar DESC
LIMIT 50, 100SELECT COUNT(*)
FROM myTable
WHERE REGEXP_LIKE(airlineName, '^U.*')
GROUP BY airlineName LIMIT 10SELECT
CASE
WHEN price > 30 THEN 3
WHEN price > 20 THEN 2
WHEN price > 10 THEN 1
ELSE 0
END AS price_category
FROM myTableSELECT
SUM(
CASE
WHEN price > 30 THEN 30
WHEN price > 20 THEN 20
WHEN price > 10 THEN 10
ELSE 0
END) AS total_cost
FROM myTableSELECT COUNT(*)
FROM myTable
GROUP BY DATETIMECONVERT(timeColumnName, '1:MILLISECONDS:EPOCH', '1:HOURS:EPOCH', '1:HOURS')SELECT *
FROM myTable
WHERE UID = 'c8b3bce0b378fc5ce8067fc271a34892'
lookUp('dimTableName', 'dimColToLookUp', 'dimJoinKey1', factJoinKeyVal1, 'dimJoinKey2', factJoinKeyVal2 ... )Operator column describes the operator that Pinot will run where as, the Operator_Id and Parent_Id columns show the parent-child relationship between operators. SELECT *
FROM ...
OPTION(minSegmentGroupTrimSize=<minSegmentGroupTrimSize>)SELECT *
FROM ...
OPTION(minServerGroupTrimSize=<minServerGroupTrimSize>)SELECT SUM(colA)
FROM myTable
GROUP BY colB
ORDER BY SUM(colA) DESC
HAVING SUM(colA) < 100
LIMIT 10SELECT * FROM myTable OPTION(key1=value1, key2=123)
SELECT * FROM myTable OPTION(key1=value1) OPTION(key2=123)
SELECT * FROM myTable OPTION(timeoutMs=30000)SET key1 = 'value1';
SET key2 = 123;
SELECT * FROM myTableEXPLAIN PLAN FOR SELECT playerID, playerName FROM baseballStats
+---------------------------------------------|------------|---------|
| Operator | Operator_Id|Parent_Id|
+---------------------------------------------|------------|---------|
|BROKER_REDUCE(limit:10) | 1 | 0 |
|COMBINE_SELECT | 2 | 1 |
|PLAN_START(numSegmentsForThisPlan:1) | -1 | -1 |
|SELECT(selectList:playerID, playerName) | 3 | 2 |
|TRANSFORM_PASSTHROUGH(playerID, playerName) | 4 | 3 |
|PROJECT(playerName, playerID) | 5 | 4 |
|DOC_ID_SET | 6 | 5 |
|FILTER_MATCH_ENTIRE_SEGMENT(docs:97889) | 7 | 6 |
+---------------------------------------------|------------|---------|BROKER_REDUCE(limit:10)
└── COMBINE_SELECT
└── PLAN_START(numSegmentsForThisPlan:1)
└── SELECT(selectList:playerID, playerName)
└── TRANSFORM_PASSTHROUGH(playerID, playerName)
└── PROJECT(playerName, playerID)
└── DOC_ID_SET
└── FILTER_MATCH_ENTIRE_SEGMENT(docs:97889)SET explainPlanVerbose=true;
EXPLAIN PLAN FOR
SELECT playerID, playerName
FROM baseballStats
WHERE playerID = 'aardsda01' AND playerName = 'David Allan'
BROKER_REDUCE(limit:10)
└── COMBINE_SELECT
└── PLAN_START(numSegmentsForThisPlan:1)
└── SELECT(selectList:playerID, playerName)
└── TRANSFORM_PASSTHROUGH(playerID, playerName)
└── PROJECT(playerName, playerID)
└── DOC_ID_SET
└── FILTER_AND
├── FILTER_INVERTED_INDEX(indexLookUp:inverted_index,operator:EQ,predicate:playerID = 'aardsda01')
└── FILTER_FULL_SCAN(operator:EQ,predicate:playerName = 'David Allan')
└── PLAN_START(numSegmentsForThisPlan:1)
└── SELECT(selectList:playerID, playerName)
└── TRANSFORM_PASSTHROUGH(playerID, playerName)
└── PROJECT(playerName, playerID)
└── DOC_ID_SET
└── FILTER_EMPTYEXPLAIN PLAN FOR
SELECT playerID, count(*)
FROM baseballStats
WHERE playerID != 'aardsda01'
GROUP BY playerID
BROKER_REDUCE(limit:10)
└── COMBINE_GROUPBY_ORDERBY
└── PLAN_START(numSegmentsForThisPlan:1)
└── AGGREGATE_GROUPBY_ORDERBY(groupKeys:playerID, aggregations:count(*))
└── TRANORM_PASSTHROUGH(playerID)
└── PROJECT(playerID)
└── DOC_ID_SET
└── FILTER_INVERTED_INDEX(indexLookUp:inverted_index,operator:NOT_EQ,predicate:playerID != 'aardsda01')Learn how to write fast queries for looking up ids in a list of values.
SELECT ID_SET(yearID)
FROM baseballStats
WHERE teamID = 'WS1'SELECT ID_SET(playerName, 'expectedInsertions=10')
FROM baseballStats
WHERE teamID = 'WS1'SELECT ID_SET(playerName, 'expectedInsertions=100')
FROM baseballStats
WHERE teamID = 'WS1'SELECT ID_SET(playerName, 'expectedInsertions=100;fpp=0.01')
FROM baseballStats
WHERE teamID = 'WS1'SELECT yearID, count(*)
FROM baseballStats
WHERE IN_ID_SET(
yearID,
'ATowAAABAAAAAAA7ABAAAABtB24HbwdwB3EHcgdzB3QHdQd2B3cHeAd5B3oHewd8B30Hfgd/B4AHgQeCB4MHhAeFB4YHhweIB4kHigeLB4wHjQeOB48HkAeRB5IHkweUB5UHlgeXB5gHmQeaB5sHnAedB54HnwegB6EHogejB6QHpQemB6cHqAc='
) = 1
GROUP BY yearIDSELECT yearID, count(*)
FROM baseballStats
WHERE IN_ID_SET(
yearID,
'ATowAAABAAAAAAA7ABAAAABtB24HbwdwB3EHcgdzB3QHdQd2B3cHeAd5B3oHewd8B30Hfgd/B4AHgQeCB4MHhAeFB4YHhweIB4kHigeLB4wHjQeOB48HkAeRB5IHkweUB5UHlgeXB5gHmQeaB5sHnAedB54HnwegB6EHogejB6QHpQemB6cHqAc='
) = 0
GROUP BY yearIDSELECT yearID, count(*)
FROM baseballStats
WHERE IN_SUBQUERY(
yearID,
'SELECT ID_SET(yearID) FROM baseballStats WHERE teamID = ''WS1'''
) = 1
GROUP BY yearID SELECT yearID, count(*)
FROM baseballStats
WHERE IN_SUBQUERY(
yearID,
'SELECT ID_SET(yearID) FROM baseballStats WHERE teamID = ''WS1'''
) = 0
GROUP BY yearID SELECT yearID, count(*)
FROM baseballStats
WHERE IN_PARTITIONED_SUBQUERY(
yearID,
'SELECT ID_SET(yearID) FROM baseballStats WHERE teamID = ''WS1'''
) = 1
GROUP BY yearID SELECT yearID, count(*)
FROM baseballStats
WHERE IN_PARTITIONED_SUBQUERY(
yearID,
'SELECT ID_SET(yearID) FROM baseballStats WHERE teamID = ''WS1'''
) = 0
GROUP BY yearID {
"tableName": "myTable",
"tableType": "OFFLINE",
"queryConfig" : {
"disableGroovy": false
}
}<dependency>
<groupId>org.apache.pinot</groupId>
<artifactId>pinot-common</artifactId>
<version>0.5.0</version>
</dependency>include 'org.apache.pinot:pinot-common:0.5.0'//Example Scalar function
@ScalarFunction
static String mySubStr(String input, Integer beginIndex) {
return input.substring(beginIndex);
}SELECT mysubstr(playerName, 4)
FROM baseballStatsselect distinctCountThetaSketch(
sketchCol,
'nominalEntries=1024',
'country'=''USA'' AND 'state'=''CA'', 'device'=''mobile'', 'SET_INTERSECT($1, $2)'
)
from table
where country = 'USA' or device = 'mobile...' Table myTable:
id INTEGER
jsoncolumn JSON
Table data:
101,{"name":{"first":"daffy"\,"last":"duck"}\,"score":101\,"data":["a"\,"b"\,"c"\,"d"]}
102,{"name":{"first":"donald"\,"last":"duck"}\,"score":102\,"data":["a"\,"b"\,"e"\,"f"]}
103,{"name":{"first":"mickey"\,"last":"mouse"}\,"score":103\,"data":["a"\,"b"\,"g"\,"h"]}
104,{"name":{"first":"minnie"\,"last":"mouse"}\,"score":104\,"data":["a"\,"b"\,"i"\,"j"]}
105,{"name":{"first":"goofy"\,"last":"dwag"}\,"score":104\,"data":["a"\,"b"\,"i"\,"j"]}
106,{"person":{"name":"daffy duck"\,"companies":[{"name":"n1"\,"title":"t1"}\,{"name":"n2"\,"title":"t2"}]}}
107,{"person":{"name":"scrooge mcduck"\,"companies":[{"name":"n1"\,"title":"t1"}\,{"name":"n2"\,"title":"t2"}]}}SELECT id, jsoncolumn
FROM myTable<=JSON_MATCHJSON_EXTRACT_SCALAR><>=<=SELECT id,
json_extract_scalar(jsoncolumn, '$.name.last', 'STRING', 'null') last_name,
json_extract_scalar(jsoncolumn, '$.name.first', 'STRING', 'null') first_name
json_extract_scalar(jsoncolumn, '$.data[1]', 'STRING', 'null') value
FROM myTableSELECT id,
json_extract_scalar(jsoncolumn, '$.name.last', 'STRING', 'null') last_name,
json_extract_scalar(jsoncolumn, '$.name.first', 'STRING', 'null') first_name,
json_extract_scalar(jsoncolumn, '$.data[1]', 'STRING', 'null') value
FROM myTable
WHERE JSON_MATCH(jsoncolumn, '"$.data[1]" IS NOT NULL') SELECT json_extract_scalar(jsoncolumn, '$.name.last', 'STRING', 'null') last_name,
count(*)
FROM myTable
WHERE JSON_MATCH(jsoncolumn, '"$.data[1]" IS NOT NULL')
GROUP BY json_extract_scalar(jsoncolumn, '$.name.last', 'STRING', 'null')
ORDER BY 2 DESC SELECT json_extract_scalar(jsoncolumn, '$.name.last', 'STRING', 'null') last_name,
sum(json_extract_scalar(jsoncolumn, '$.id', 'INT', 0)) total
FROM myTable
WHERE JSON_MATCH(jsoncolumn, '"$.name.last" IS NOT NULL')
GROUP BY json_extract_scalar(jsoncolumn, '$.name.last', 'STRING', 'null') SELECT json_extract_scalar(jsoncolumn, '$.name.last', 'STRING', 'null') last_name,
sum(json_extract_scalar(jsoncolumn, '$.id', 'INT', 0)) total
FROM myTable
WHERE JSON_MATCH(jsoncolumn, '"$.name.last" IS NOT NULL') AND json_extract_scalar(jsoncolumn, '$.id', 'INT', 0) > 102
GROUP BY json_extract_scalar(jsoncolumn, '$.name.last', 'STRING', 'null') SELECT json_extract_scalar(jsoncolumn, '$.name.last', 'STRING', 'null') last_name,
json_extract_scalar(jsoncolumn, '$.id', 'INT', 0) total
FROM myTable
WHERE JSON_MATCH(jsoncolumn, '"$.data[*]" = ''f''')
GROUP BY json_extract_scalar(jsoncolumn, '$.name.last', 'STRING', 'null')data[0] IN ('k', 'j')"data[0]" IN ('k', 'j')"data[0]" IN (''k'', ''j'')'"data[0]" IN (''k'', ''j'')' WHERE JSON_MATCH(jsoncolumn, '"data[0]" IN (''k'', ''j'')')This document contains the list of all the transformation functions supported by Pinot SQL.
SELECT time_col, SUM(status) AS occupied_slots_count
FROM (
SELECT GAPFILL(time_col,'1:MILLISECONDS:SIMPLE_DATE_FORMAT:yyyy-MM-dd HH:mm:ss.SSS','2021-10-01 09:00:00.000',
'2021-10-01 12:00:00.000','30:MINUTES', FILL(status, 'FILL_PREVIOUS_VALUE'),
TIMESERIESON(lotId)), lotId, status
FROM (
SELECT DATETIMECONVERT(event_time,'1:MILLISECONDS:EPOCH',
'1:MILLISECONDS:SIMPLE_DATE_FORMAT:yyyy-MM-dd HH:mm:ss.SSS','30:MINUTES') AS time_col,
lotId, lastWithTime(is_occupied, event_time, 'INT') AS status
FROM parking_data
WHERE event_time >= 1633078800000 AND event_time <= 1633089600000
GROUP BY 1, 2
ORDER BY 1
LIMIT 100)
LIMIT 100)
GROUP BY 1
LIMIT 100SELECT GAPFILL(DATETIMECONVERT(event_time,'1:MILLISECONDS:EPOCH',
'1:MILLISECONDS:SIMPLE_DATE_FORMAT:yyyy-MM-dd HH:mm:ss.SSS','30:MINUTES'),
'1:MILLISECONDS:SIMPLE_DATE_FORMAT:yyyy-MM-dd HH:mm:ss.SSS','2021-10-01 09:00:00.000',
'2021-10-01 12:00:00.000','30:MINUTES', FILL(is_occupied, 'FILL_PREVIOUS_VALUE'),
TIMESERIESON(lotId)) AS time_col, lotId, is_occupied
FROM parking_data
WHERE event_time >= 1633078800000 AND event_time <= 1633089600000
ORDER BY 1
LIMIT 100SELECT GAPFILL(time_col,'1:MILLISECONDS:SIMPLE_DATE_FORMAT:yyyy-MM-dd HH:mm:ss.SSS','2021-10-01 09:00:00.000',
'2021-10-01 12:00:00.000','30:MINUTES', FILL(status, 'FILL_PREVIOUS_VALUE'),
TIMESERIESON(lotId)), lotId, status
FROM (
SELECT DATETIMECONVERT(event_time,'1:MILLISECONDS:EPOCH',
'1:MILLISECONDS:SIMPLE_DATE_FORMAT:yyyy-MM-dd HH:mm:ss.SSS','30:MINUTES') AS time_col,
lotId, lastWithTime(is_occupied, event_time, 'INT') AS status
FROM parking_data
WHERE event_time >= 1633078800000 AND event_time <= 1633089600000
GROUP BY 1, 2
ORDER BY 1
LIMIT 100)
LIMIT 100SELECT time_col, SUM(is_occupied) AS occupied_slots_count
FROM (
SELECT GAPFILL(DATETIMECONVERT(event_time,'1:MILLISECONDS:EPOCH',
'1:MILLISECONDS:SIMPLE_DATE_FORMAT:yyyy-MM-dd HH:mm:ss.SSS','30:MINUTES'),
'1:MILLISECONDS:SIMPLE_DATE_FORMAT:yyyy-MM-dd HH:mm:ss.SSS','2021-10-01 09:00:00.000',
'2021-10-01 12:00:00.000','30:MINUTES', FILL(is_occupied, 'FILL_PREVIOUS_VALUE'),
TIMESERIESON(lotId)) AS time_col, lotId, is_occupied
FROM parking_data
WHERE event_time >= 1633078800000 AND event_time <= 1633089600000
ORDER BY 1
LIMIT 100)
GROUP BY 1
LIMIT 100