Learn about tuning real-time tables.
"instanceAssignmentConfigMap": {
"CONSUMING": {
"tagPoolConfig": {
"tag": "DefaultTenant_REALTIME"
},
"replicaGroupPartitionConfig": {
"replicaGroupBased": true,
"numReplicaGroups": 2,
"numInstancesPerReplicaGroup": 2
}
},
"COMPLETED": {
"tagPoolConfig": {
"tag": "DefaultTenant_OFFLINE"
},
"replicaGroupPartitionConfig": {
"replicaGroupBased": true,
"numReplicaGroups": 2,
"numInstancesPerReplicaGroup": 4
}
}
}
...============================================================
RealtimeProvisioningHelperCommand -tableConfigFile /Users/ssubrama/tmp/samza/realtimeTableConfig.json -numPartitions 16 -pushFrequency null -numHosts 8,6,10 -numHours 6,12,18,24 -sampleCompletedSegmentDir /Users/ssubrama/tmp/samza/TestSamzaAnalyticsFeatures_1593411480000_1593500340000_0/ -ingestionRate 100 -maxUsableHostMemory 10G -retentionHours 72
Note:
* Table retention and push frequency ignored for determining retentionHours
* See https://docs.pinot.apache.org/operators/operating-pinot/tuning/realtime
Memory used per host (Active/Mapped)
numHosts --> 6 |8 |10 |
numHours
6 --------> 5.05G/19.49G |3.37G/12.99G |3.37G/12.99G |
12 --------> 5.89G/20.33G |3.93G/13.55G |3.93G/13.55G |
18 --------> 6.73G/21.49G |4.48G/14.33G |4.48G/14.33G |
24 --------> 7.56G/22G |5.04G/14.66G |5.04G/14.66G |
Optimal segment size
numHosts --> 6 |8 |10 |
numHours
6 --------> 111.98M |111.98M |111.98M |
12 --------> 223.96M |223.96M |223.96M |
18 --------> 335.94M |335.94M |335.94M |
24 --------> 447.92M |447.92M |447.92M |
Consuming memory
numHosts --> 6 |8 |10 |
numHours
6 --------> 1.45G |987.17M |987.17M |
12 --------> 2.61G |1.74G |1.74G |
18 --------> 3.77G |2.52G |2.52G |
24 --------> 4.94G |3.29G |3.29G |
Number of segments queried per host
numHosts --> 6 |8 |10 |
numHours
6 --------> 12 |12 |12 |
12 --------> 6 |6 |6 |
18 --------> 4 |4 |4 |
24 --------> 3 |3 |3 |"realtime.segment.flush.threshold.rows": "0"
"realtime.segment.flush.threshold.time": "6h"
"realtime.segment.flush.threshold.segment.size": "112M""realtime.segment.flush.threshold.rows": "0"
"realtime.segment.flush.threshold.time": "24h"
"realtime.segment.flush.threshold.segment.size": "450M"Schedule queries to prioritize them.
Tuning BrokerTuning Server// Table config
{
...
"routing": {
"segmentPrunerTypes": ["time"]
},
...
}// Table config
{
...
"tableIndexConfig": {
...
"segmentPartitionConfig": {
"columnPartitionMap": {
"memberId": {
"functionName": "Modulo",
"numPartitions": 3
}
}
},
...
},
...
"routing": {
"segmentPrunerTypes": ["partition"]
},
...
}private val NUM_PARTITIONS = 8
def getPartitionUdf: UserDefinedFunction = {
udf((valueIn: Any) => {
(murmur2(valueIn.toString.getBytes(UTF_8)) & Integer.MAX_VALUE) % NUM_PARTITIONS
})
}column.memberId.partitionFunction = Module
column.memberId.numPartitions = 3
column.memberId.partitionValues = 1// Table config
{
...
"instanceAssignmentConfigMap": {
"OFFLINE": {
...
"replicaGroupPartitionConfig": {
"replicaGroupBased": true,
"numReplicaGroups": 3,
"numInstancesPerReplicaGroup": 4
}
}
},
...
"routing": {
"instanceSelectorType": "replicaGroup"
},
...
}SET "useFixedReplica"=true;// Table config
{
...
"routing": {
"useFixedReplica": true
},
...
}pinot.broker.use.fixed.replica=true
