Hdfs as Deep Storage
This guide helps to setup HDFS as deepstorage for Pinot Segment.
To use HDFS as deep storage you need to include HDFS dependency jars and plugins.

Server Setup

Configuration.

1
pinot.server.instance.enable.split.commit=true
2
pinot.server.storage.factory.class.hdfs=org.apache.pinot.plugin.filesystem.HadoopPinotFS
3
pinot.server.storage.factory.hdfs.hadoop.conf.path=/path/to/hadoop/conf/directory/
4
pinot.server.segment.fetcher.protocols=file,http,hdfs
5
pinot.server.segment.fetcher.hdfs.class=org.apache.pinot.common.utils.fetcher.PinotFSSegmentFetcher
6
pinot.server.segment.fetcher.hdfs.hadoop.kerberos.principle=<your kerberos principal>
7
pinot.server.segment.fetcher.hdfs.hadoop.kerberos.keytab=<your kerberos keytab>
8
pinot.set.instance.id.to.hostname=true
9
pinot.server.instance.dataDir=/path/in/local/filesystem/for/pinot/data/server/index
10
pinot.server.instance.segmentTarDir=/path/in/local/filesystem/for/pinot/data/server/segment
11
pinot.server.grpc.enable=true
12
pinot.server.grpc.port=8090
Copied!

Executable.

1
export HADOOP_HOME=/path/to/hadoop/home
2
export HADOOP_VERSION=2.7.1
3
export HADOOP_GUAVA_VERSION=11.0.2
4
export HADOOP_GSON_VERSION=2.2.4
5
export GC_LOG_LOCATION=/path/to/gc/log/file
6
export PINOT_VERSION=0.8.0
7
export PINOT_DISTRIBUTION_DIR=/path/to/apache-pinot-${PINOT_VERSION}-bin/
8
export SERVER_CONF_DIR=/path/to/pinot/conf/dir/
9
export ZOOKEEPER_ADDRESS=localhost:2181
10
11
12
export CLASSPATH_PREFIX="${HADOOP_HOME}/share/hadoop/hdfs/hadoop-hdfs-${HADOOP_VERSION}.jar:${HADOOP_HOME}/share/hadoop/common/lib/hadoop-annotations-${HADOOP_VERSION}.jar:${HADOOP_HOME}/share/hadoop/common/lib/hadoop-auth-${HADOOP_VERSION}.jar:${HADOOP_HOME}/share/hadoop/common/hadoop-common-${HADOOP_VERSION}.jar:${HADOOP_HOME}/share/hadoop/common/lib/guava-${HADOOP_GUAVA_VERSION}.jar:${HADOOP_HOME}/share/hadoop/common/lib/gson-${HADOOP_GSON_VERSION}.jar"
13
export JAVA_OPTS="-Xms4G -Xmx16G -XX:+UseG1GC -XX:MaxGCPauseMillis=200 -Xloggc:${GC_LOG_LOCATION}/gc-pinot-server.log"
14
${PINOT_DISTRIBUTION_DIR}/bin/start-server.sh -zkAddress ${ZOOKEEPER_ADDRESS} -configFileName ${SERVER_CONF_DIR}/server.conf
Copied!

Controller Setup

Configuration.

1
controller.data.dir=hdfs://path/in/hdfs/for/controller/segment
2
controller.local.temp.dir=/tmp/pinot/
3
controller.zk.str=<ZOOKEEPER_HOST:ZOOKEEPER_PORT>
4
controller.enable.split.commit=true
5
controller.access.protocols.http.port=9000
6
controller.helix.cluster.name=PinotCluster
7
pinot.controller.storage.factory.class.hdfs=org.apache.pinot.plugin.filesystem.HadoopPinotFS
8
pinot.controller.storage.factory.hdfs.hadoop.conf.path=/path/to/hadoop/conf/directory/
9
pinot.controller.segment.fetcher.protocols=file,http,hdfs
10
pinot.controller.segment.fetcher.hdfs.class=org.apache.pinot.common.utils.fetcher.PinotFSSegmentFetcher
11
pinot.controller.segment.fetcher.hdfs.hadoop.kerberos.principle=<your kerberos principal>
12
pinot.controller.segment.fetcher.hdfs.hadoop.kerberos.keytab=<your kerberos keytab>
13
controller.vip.port=9000
14
controller.port=9000
15
pinot.set.instance.id.to.hostname=true
16
pinot.server.grpc.enable=true
Copied!

Executable.

1
export HADOOP_HOME=/path/to/hadoop/home
2
export HADOOP_VERSION=2.7.1
3
export HADOOP_GUAVA_VERSION=11.0.2
4
export HADOOP_GSON_VERSION=2.2.4
5
export GC_LOG_LOCATION=/path/to/gc/log/file
6
export PINOT_VERSION=0.8.0
7
export PINOT_DISTRIBUTION_DIR=/path/to/apache-pinot-${PINOT_VERSION}-bin/
8
export SERVER_CONF_DIR=/path/to/pinot/conf/dir/
9
export ZOOKEEPER_ADDRESS=localhost:2181
10
11
12
export CLASSPATH_PREFIX="${HADOOP_HOME}/share/hadoop/hdfs/hadoop-hdfs-${HADOOP_VERSION}.jar:${HADOOP_HOME}/share/hadoop/common/lib/hadoop-annotations-${HADOOP_VERSION}.jar:${HADOOP_HOME}/share/hadoop/common/lib/hadoop-auth-${HADOOP_VERSION}.jar:${HADOOP_HOME}/share/hadoop/common/hadoop-common-${HADOOP_VERSION}.jar:${HADOOP_HOME}/share/hadoop/common/lib/guava-${HADOOP_GUAVA_VERSION}.jar:${HADOOP_HOME}/share/hadoop/common/lib/gson-${HADOOP_GSON_VERSION}.jar"
13
export JAVA_OPTS="-Xms8G -Xmx12G -XX:+UseG1GC -XX:MaxGCPauseMillis=200 -Xloggc:${GC_LOG_LOCATION}/gc-pinot-controller.log"
14
${PINOT_DISTRIBUTION_DIR}/bin/start-controller.sh -configFileName ${SERVER_CONF_DIR}/controller.conf
Copied!

Broker Setup

Configuration.

1
pinot.set.instance.id.to.hostname=true
2
pinot.server.grpc.enable=true
Copied!

Executable.

1
export HADOOP_HOME=/path/to/hadoop/home
2
export HADOOP_VERSION=2.7.1
3
export HADOOP_GUAVA_VERSION=11.0.2
4
export HADOOP_GSON_VERSION=2.2.4
5
export GC_LOG_LOCATION=/path/to/gc/log/file
6
export PINOT_VERSION=0.8.0
7
export PINOT_DISTRIBUTION_DIR=/path/to/apache-pinot-${PINOT_VERSION}-bin/
8
export SERVER_CONF_DIR=/path/to/pinot/conf/dir/
9
export ZOOKEEPER_ADDRESS=localhost:2181
10
11
12
export CLASSPATH_PREFIX="${HADOOP_HOME}/share/hadoop/hdfs/hadoop-hdfs-${HADOOP_VERSION}.jar:${HADOOP_HOME}/share/hadoop/common/lib/hadoop-annotations-${HADOOP_VERSION}.jar:${HADOOP_HOME}/share/hadoop/common/lib/hadoop-auth-${HADOOP_VERSION}.jar:${HADOOP_HOME}/share/hadoop/common/hadoop-common-${HADOOP_VERSION}.jar:${HADOOP_HOME}/share/hadoop/common/lib/guava-${HADOOP_GUAVA_VERSION}.jar:${HADOOP_HOME}/share/hadoop/common/lib/gson-${HADOOP_GSON_VERSION}.jar"
13
export JAVA_OPTS="-Xms4G -Xmx4G -XX:+UseG1GC -XX:MaxGCPauseMillis=200 -Xloggc:${GC_LOG_LOCATION}/gc-pinot-broker.log"
14
${PINOT_DISTRIBUTION_DIR}/bin/start-broker.sh -zkAddress ${ZOOKEEPER_ADDRESS} -configFileName ${SERVER_CONF_DIR}/broker.conf
Copied!
Last modified 1mo ago