Configure AliCloud Object Storage Service (OSS) as Pinot deep storage
<?xml version="1.0" encoding="UTF-8"?>
<configuration>
</configuration><?xml version="1.0"?>
<configuration>
<property>
<name>fs.defaultFS</name>
<value>oss://your-bucket-name/</value>
</property>
<property>
<name>fs.oss.accessKeyId</name>
<value>your-access-key-id</value>
</property>
<property>
<name>fs.oss.accessKeySecret</name>
<value>your-access-key-secret</value>
</property>
<property>
<name>fs.oss.impl</name>
<value>com.aliyun.emr.fs.oss.OssFileSystem</value>
</property>
<property>
<name>fs.oss.endpoint</name>
<value>your-oss-endpoint</value>
</property>
</configuration>controller.data.dir=oss://your-bucket-name/path/to/segments
controller.local.temp.dir=/path/to/local/temp/directory
controller.enable.split.commit=true
pinot.controller.storage.factory.class.oss=org.apache.pinot.plugin.filesystem.HadoopPinotFS
pinot.controller.storage.factory.oss.hadoop.conf.path=path/to/conf/directory/
pinot.controller.segment.fetcher.protocols=file,http,oss
pinot.controller.segment.fetcher.oss.class=org.apache.pinot.common.utils.fetcher.PinotFSSegmentFetcherpinot.server.instance.enable.split.commit=true
pinot.server.storage.factory.class.oss=org.apache.pinot.plugin.filesystem.HadoopPinotFS
pinot.server.storage.factory.oss.hadoop.conf.path=path/to/conf/directory/
pinot.server.segment.fetcher.protocols=file,http,oss
pinot.server.segment.fetcher.oss.class=org.apache.pinot.common.utils.fetcher.PinotFSSegmentFetcherexecutionFrameworkSpec:
name: 'standalone'
segmentGenerationJobRunnerClassName: 'org.apache.pinot.plugin.ingestion.batch.standalone.SegmentGenerationJobRunner'
segmentTarPushJobRunnerClassName: 'org.apache.pinot.plugin.ingestion.batch.standalone.SegmentTarPushJobRunner'
segmentUriPushJobRunnerClassName: 'org.apache.pinot.plugin.ingestion.batch.standalone.SegmentUriPushJobRunner'
segmentMetadataPushJobRunnerClassName: 'org.apache.pinot.plugin.ingestion.batch.standalone.SegmentMetadataPushJobRunner'
jobType: SegmentCreationAndMetadataPush
inputDirURI: 'oss://your-bucket-name/input'
includeFileNamePattern: 'glob:**/*.csv'
outputDirURI: 'oss://your-bucket-name/output'
overwriteOutput: true
pinotFSSpecs:
- scheme: oss
className: org.apache.pinot.plugin.filesystem.HadoopPinotFS
configs:
hadoop.conf.path: '/path/to/hadoop/conf'
recordReaderSpec:
dataFormat: 'csv'
className: 'org.apache.pinot.plugin.inputformat.csv.CSVRecordReader'
configClassName: 'org.apache.pinot.plugin.inputformat.csv.CSVRecordReaderConfig'
tableSpec:
tableName: 'transcript'
pinotClusterSpecs:
- controllerURI: '<http://localhost:9000>'