"streamConfigs" : {
"realtime.segment.flush.threshold.rows": "0",
"realtime.segment.flush.threshold.time": "24h",
"realtime.segment.flush.threshold.segment.size": "150M",
"streamType": "kafka",
"stream.kafka.consumer.type": "LowLevel",
"stream.kafka.topic.name": "ClickStream",
"stream.kafka.consumer.prop.auto.offset.reset" : "largest"
}"streamConfigs" : {
"realtime.segment.flush.threshold.size": "0",
"realtime.segment.flush.threshold.time": "24h",
"realtime.segment.flush.desired.size": "150M",
"streamType": "kafka",
"stream.kafka.consumer.type": "LowLevel",
"stream.kafka.topic.name": "ClickStream",
"stream.kafka.consumer.prop.auto.offset.reset" : "largest"
} "broker": "brokerTenantName",
"server": "serverTenantName",
"tagOverrideConfig" : {
"realtimeConsuming" : "serverTenantName_REALTIME"
"realtimeCompleted" : "serverTenantName_OFFLINE"
}
}{
...
"ingestionConfig": {
"batchIngestionConfig": {
"segmentIngestionType": "APPEND",
"segmentIngestionFrequency": "DAILY",
"batchConfigMaps": [
{
"inputDirURI": "s3://<my-bucket>/baseballStats/rawdata",
"includeFileNamePattern": "glob:**/*.csv",
"excludeFileNamePattern": "glob:**/*.tmp",
"inputFormat": "csv",
"outputDirURI": "s3://<my-bucket>/baseballStats/segments",
"input.fs.className": "org.apache.pinot.plugin.filesystem.S3PinotFS",
"input.fs.prop.region": "us-west-2",
"input.fs.prop.accessKey": "${AWS_ACCESS_KEY}",
"input.fs.prop.secretKey": "${AWS_SECRET_KEY}",
"push.mode": "tar"
}
],
"segmentNameSpec": {},
"pushSpec": {}
}
},
...
}"OFFLINE": {
"tableName": "pinotTable",
"tableType": "OFFLINE",
"quota": {
"maxQueriesPerSecond": 300,
"storage": "140G"
},
"routing": {
"segmentPrunerTypes": ["partition"],
"instanceSelectorType": "replicaGroup"
},
"segmentsConfig": {
"schemaName": "pinotTable",
"timeColumnName": "daysSinceEpoch",
"timeType": "DAYS",
"allowNullTimeValue": false,
"replication": "3",
"retentionTimeUnit": "DAYS",
"retentionTimeValue": "365",
"segmentPushFrequency": "DAILY",
"segmentPushType": "APPEND"
},
"tableIndexConfig": {
"invertedIndexColumns": ["foo", "bar", "moo"],
"createInvertedIndexDuringSegmentGeneration": false,
"sortedColumn": ["pk"],
"bloomFilterColumns": [],
"starTreeIndexConfigs": [],
"noDictionaryColumns": [],
"rangeIndexColumns": [],
"onHeapDictionaryColumns": [],
"varLengthDictionaryColumns": [],
"segmentPartitionConfig": {
"pk": {
"functionName": "Murmur",
"numPartitions": 32
}
}
"loadMode": "MMAP",
"columnMinMaxValueGeneratorMode": null,
"nullHandlingEnabled": false
},
"tenants": {
"broker": "myBrokerTenant",
"server": "myServerTenant"
},
"ingestionConfig": {
"filterConfig": {
"filterFunction": "Groovy({foo == \"VALUE1\"}, foo)"
},
"transformConfigs": [{
"columnName": "bar",
"transformFunction": "lower(moo)"
},
{
"columnName": "hoursSinceEpoch",
"transformFunction": "toEpochHours(millis)"
}]
}
"metadata": {
"customConfigs": {
"key": "value",
"key": "value"
}
}
}
}"REALTIME": {
"tableName": "pinotTable",
"tableType": "REALTIME",
"segmentsConfig": {
"schemaName": "pinotTable",
"timeColumnName": "daysSinceEpoch",
"timeType": "DAYS",
"allowNullTimeValue": true,
"replicasPerPartition": "3",
"retentionTimeUnit": "DAYS",
"retentionTimeValue": "5",
"segmentPushType": "APPEND",
"completionConfig": {
"completionMode": "DOWNLOAD"
}
},
"tableIndexConfig": {
"invertedIndexColumns": ["foo", "bar", "moo"],
"sortedColumn": ["column1"],
"noDictionaryColumns": ["metric1", "metric2"],
"loadMode": "MMAP",
"aggregateMetrics": true,
"nullHandlingEnabled": false,
"streamConfigs": {
"realtime.segment.flush.threshold.size": "0",
"realtime.segment.flush.threshold.time": "24h",
"realtime.segment.flush.desired.size": "150M",
"stream.kafka.broker.list": "XXXX",
"stream.kafka.consumer.factory.class.name": "XXXX",
"stream.kafka.consumer.prop.auto.offset.reset": "largest",
"stream.kafka.consumer.type": "XXXX",
"stream.kafka.decoder.class.name": "XXXX",
"stream.kafka.decoder.prop.schema.registry.rest.url": "XXXX",
"stream.kafka.decoder.prop.schema.registry.schema.name": "XXXX",
"stream.kafka.hlc.zk.connect.string": "XXXX",
"stream.kafka.topic.name": "XXXX",
"stream.kafka.zk.broker.url": "XXXX",
"streamType": "kafka"
}
},
"tenants": {
"broker": "myBrokerTenant",
"server": "myServerTenant",
"tagOverrideConfig": {}
},
"metadata": {
}
}