step 1 Set up Zookeeper cluster

Get Zookeeper from officail website.

For me, it’s zookeeper-3.4.13.tar.gz

unzip it with:

tar -xzvf zookeeper-3.4.13.tar.gz

mv  zookeeper-3.4.13 zookeeper

mv zookeeper /opt/

Edit zoo.cfg for each node

hduser@node7:~ $ cd /opt/
hduser@node7:/opt $ ls
hadoop  spark  vc  zookeeper
hduser@node7:/opt $ cd zookeeper/
hduser@node7:/opt/zookeeper $ ls
bin        dist-maven       lib          README_packaging.txt  zookeeper-3.4.13.jar.asc
build.xml  docs             LICENSE.txt  recipes               zookeeper-3.4.13.jar.md5
conf       ivysettings.xml  NOTICE.txt   src                   zookeeper-3.4.13.jar.sha1
contrib    ivy.xml          README.md    zookeeper-3.4.13.jar  zookeeper.out
hduser@node7:/opt/zookeeper $ cd conf/
hduser@node7:/opt/zookeeper/conf $ ls
configuration.xsl  log4j.properties  zoo.cfg  zoo.cfg.standalone  zoo_sample.cfg
hduser@node7:/opt/zookeeper/conf $ more zoo.cfg
tickTime=2000
initLimit=10
syncLimit=5
dataDir=/var/lib/zookeeper
clientPort=2181
server.1=node7:2888:3888
server.2=node8:2888:3888
server.3=node9:2888:3888

make datadir and myid file to datadir

hduser@node7:/opt/zookeeper/conf $ cd /var/lib/zookeeper/
hduser@node7:/var/lib/zookeeper $ ls
1  log  myid  tmp  version-2  zookeeper_server.pid
hduser@node7:/var/lib/zookeeper $ more myid 
1
hduser@node7:/var/lib/zookeeper $ ls -al
total 28
drwxr-x---  5 hduser hadoop 4096 Aug 13 18:16 .
drwxr-xr-x 30 root   root   4096 Aug 13 17:27 ..
-rwxr-x---  1 hduser hadoop    0 Aug 13 17:32 1
drwxr-x---  2 hduser hadoop 4096 Aug 13 17:48 log
-rw-r--r--  1 hduser hadoop    2 Aug 13 18:03 myid
drwxr-xr-x  2 hduser hadoop 4096 Aug 13 17:58 tmp
drwxr-xr-x  2 hduser hadoop 4096 Aug 20 17:47 version-2
-rw-r--r--  1 hduser hadoop    5 Aug 20 16:58 zookeeper_server.pid

Run ./bin/zkServer.sh start on each node

To validate if Zookeeper is running properly, run:

./bin/zkServer.sh status

step 2 Set up kafka cluster

Modify server.properties file for each card

hduser@node7:~/kafka/config $ more server.properties
broker.id=1
port=9092
host.name=node7
zookeeper.connect=node7:2181,node8:2181,node9:2181

Modify kafka-server-start.sh add :

export JMX_PORT=${JMX_PORT:-9999}
export KAFKA_HEAP_OPTS="-Xmx256M -Xms128M"
hduser@node7:~/kafka/bin $ more kafka-server-start.sh 
#!/bin/bash
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements.  See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License.  You may obtain a copy of the License at
#
#    http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

export JMX_PORT=${JMX_PORT:-9999}
export KAFKA_HEAP_OPTS="-Xmx256M -Xms128M"

if [ $# -lt 1 ];
then
	echo "USAGE: $0 [-daemon] server.properties [--override property=value]*"
	exit 1
fi
base_dir=$(dirname $0)

if [ "x$KAFKA_LOG4J_OPTS" = "x" ]; then
    export KAFKA_LOG4J_OPTS="-Dlog4j.configuration=file:$base_dir/../config/log4j.properties"
fi

if [ "x$KAFKA_HEAP_OPTS" = "x" ]; then
    export KAFKA_HEAP_OPTS="-Xmx1G -Xms1G"
fi

EXTRA_ARGS=${EXTRA_ARGS-'-name kafkaServer -loggc'}

COMMAND=$1
case $COMMAND in
  -daemon)
    EXTRA_ARGS="-daemon "$EXTRA_ARGS
    shift
    ;;
  *)
    ;;
esac

exec $base_dir/kafka-run-class.sh $EXTRA_ARGS kafka.Kafka "$@"

Update bin/kafka-run-class.sh

KAFKA_JVM_PERFORMANCE_OPTS="-client -XX:+UseParNewGC -XX:+UseConcMarkSweepGC -XX:+CMSClassUnloadingEnabled -XX:+CMSScavengeBeforeRemark -XX:+DisableExplicitGC -Djava.awt.headless=true" # change -server to -client
# JVM performance options
if [ -z "$KAFKA_JVM_PERFORMANCE_OPTS" ]; then
  KAFKA_JVM_PERFORMANCE_OPTS="-client -XX:+UseG1GC -XX:MaxGCPauseMillis=20 -XX:InitiatingHeapOccupanc
yPercent=35 -XX:+ExplicitGCInvokesConcurrent -Djava.awt.headless=true"
fi

start kafka on each node:

./bin/kafka-server-start.sh config/server.properties &

References