環(huán)境
- storm 1.2.1
- kafka 0.10.2.2
- eclipse
maven依賴
<dependencies>
<dependency>
<groupId>org.apache.storm</groupId>
<artifactId>storm-core</artifactId>
<version>1.2.1</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-resources-plugin</artifactId>
<version>2.6</version>
</dependency>
<dependency>
<groupId>org.apache.storm</groupId>
<artifactId>storm-kafka-client</artifactId>
<version>1.2.1</version>
</dependency>
<dependency>
<groupId>org.apache.kafka</groupId>
<artifactId>kafka-clients</artifactId>
<version>0.10.2.1</version>
</dependency>
</dependencies>
<build>
<plugins>
<plugin>
<artifactId>maven-assembly-plugin</artifactId>
<version>2.4</version>
<executions>
<execution>
<id>make-assembly</id>
<phase>package</phase>
<goals>
<goal>single</goal>
</goals>
<configuration>
<descriptorRefs>
<descriptorRef>jar-with-dependencies</descriptorRef>
</descriptorRefs>
<archive>
<manifest>
<mainClass></mainClass>
</manifest>
</archive>
</configuration>
</execution>
</executions>
</plugin>
<plugin>
<artifactId>maven-compiler-plugin</artifactId>
<version>3.1</version>
<executions>
<execution>
<id>default-compile</id>
<phase>compile</phase>
<goals>
<goal>compile</goal>
</goals>
<configuration>
<source>1.8</source>
<target>1.8</target>
<encoding>UTF-8</encoding>
</configuration>
</execution>
<execution>
<id>default-testCompile</id>
<phase>test-compile</phase>
<goals>
<goal>testCompile</goal>
</goals>
<configuration>
<source>1.8</source>
<target>1.8</target>
<encoding>UTF-8</encoding>
</configuration>
</execution>
</executions>
<configuration>
<source>1.8</source>
<target>1.8</target>
<encoding>UTF-8</encoding>
</configuration>
</plugin>
</plugins>
</build>
Storm寫入到kafka:Kafka producer
Properties props = new Properties();
props.put("bootstrap.servers", "10.11.6.52:9092");
props.put("acks", "1");
props.put("key.serializer", "org.apache.kafka.common.serialization.StringSerializer");
props.put("value.serializer", "org.apache.kafka.common.serialization.StringSerializer");
KafkaBolt bolt = new KafkaBolt()
.withProducerProperties(props)
.withTopicSelector(new DefaultTopicSelector("tokafkatest"))
.withTupleToKafkaMapper(new FieldNameBasedTupleToKafkaMapper());
builder.setBolt("forwardToKafka", bolt, 1).shuffleGrouping("spout");
注意
- topic需要先在kafka中提前創(chuàng)建好
- bootstrap.servers只用填寫集群中部分kafka地址就行
- 這里使用FieldNameBasedTupleToKafkaMapper(),那么這里spout的輸出聲明應(yīng)該是"key", "message",或者使用FieldNameBasedTupleToKafkaMapper(“AA”,“BB”),AABB為自定義域聲明
Storm從kafka中讀?。篕afka Consumer
builder.setSpout("kafka_spout", new KafkaSpout<>(KafkaSpoutConfig.builder("10.11.6.52:9092","tokafkatest").build()), 1);
注意
- 下游收到的信息含有"topic","partition","offset","key","value",其中key,value,topic是String類型,offset是Long類型。
- 默認(rèn)使用的是UNCOMMITTED_EARLIEST (默認(rèn)值) spout 會(huì)從每個(gè)partition的最后一次提交的offset開始讀取. 如果offset不存在或者過期, 則會(huì)依照 EARLIEST進(jìn)行讀取。
其他讀取類型還有:
- EARLIEST :無論之前的消費(fèi)情況如何,spout會(huì)從每個(gè)kafka partition能找到的最早的offset開始的讀取
- LATEST :無論之前的消費(fèi)情況如何,spout會(huì)從每個(gè)kafka partition當(dāng)前最新的offset開始的讀取
- UNCOMMITTED_LATEST:spout 會(huì)從每個(gè)partition的最后一次提交的offset開始讀取, 如果offset不存在或者過期, 則會(huì)依照 LATEST進(jìn)行讀取
多個(gè)topic使用
final TopologyBuilder tp = new TopologyBuilder();
//默認(rèn)情況下,spout 消費(fèi)但未被match到的topic的message的"topic","key"和"value"將發(fā)送到"STREAM_1"
ByTopicRecordTranslator<String, String> byTopic = new ByTopicRecordTranslator<>(
(r) -> new Values(r.topic(), r.key(), r.value()),
new Fields("topic", "key", "value"), "STREAM_1");
//topic_2 所有的消息的 "key" and "value" 將發(fā)送到 "STREAM_2"中
byTopic.forTopic("topic_2", (r) -> new Values(r.key(), r.value()), new Fields("key", "value"), "STREAM_2");
tp.setSpout("kafka_spout", new KafkaSpout<>(KafkaSpoutConfig.builder("127.0.0.1:" + port, "topic_1", "topic_2", "topic_3").build()), 1);
tp.setBolt("bolt", new myBolt()).shuffleGrouping("kafka_spout", "STREAM_1");
tp.setBolt("another", new myOtherBolt()).shuffleGrouping("kafka_spout", "STREAM_2");
通配符 Topics
通配符 topics 將消費(fèi)所有符合通配符的topics. 在下面的例子中 "topic", "topic_foo" 和 "topic_bar" 適配通配符 "topic.*", 但是 "not_my_topic" 并不適配.
final TopologyBuilder tp = new TopologyBuilder();
tp.setSpout("kafka_spout", new KafkaSpout<>(KafkaSpoutConfig.builder("127.0.0.1:" + port, Pattern.compile("topic.*")).build()), 1);
tp.setBolt("bolt", new myBolt()).shuffleGrouping("kafka_spout");
完整代碼
package Topology;
import java.util.Properties;
import org.apache.storm.Config;
import org.apache.storm.LocalCluster;
import org.apache.storm.StormSubmitter;
import org.apache.storm.generated.AlreadyAliveException;
import org.apache.storm.generated.AuthorizationException;
import org.apache.storm.generated.InvalidTopologyException;
import org.apache.storm.kafka.bolt.KafkaBolt;
import org.apache.storm.kafka.bolt.mapper.FieldNameBasedTupleToKafkaMapper;
import org.apache.storm.kafka.bolt.selector.DefaultTopicSelector;
import org.apache.storm.kafka.spout.KafkaSpout;
import org.apache.storm.kafka.spout.KafkaSpoutConfig;
import org.apache.storm.topology.IRichSpout;
import org.apache.storm.topology.TopologyBuilder;
import org.apache.storm.trident.testing.FixedBatchSpout;
import org.apache.storm.tuple.Fields;
import org.apache.storm.tuple.Values;
import Bolts.printbolt;
import Bolts.printbolt2;
import Spouts.NormalSpout;
public class sk {
public static void main(String[] args) throws Exception {
// TODO Auto-generated method stub
TopologyBuilder builder = new TopologyBuilder();
builder.setSpout("spout", new NormalSpout(), 1);
//set producer properties.
Properties props = new Properties();
props.put("bootstrap.servers", "10.11.6.52:9092");
props.put("acks", "1");
props.put("key.serializer", "org.apache.kafka.common.serialization.StringSerializer");
props.put("value.serializer", "org.apache.kafka.common.serialization.StringSerializer");
KafkaBolt bolt = new KafkaBolt()
.withProducerProperties(props)
.withTopicSelector(new DefaultTopicSelector("tokafkatest"))
.withTupleToKafkaMapper(new FieldNameBasedTupleToKafkaMapper());
builder.setBolt("forwardToKafka", bolt, 1).shuffleGrouping("spout");
builder.setBolt("printforwardToKafka", new printbolt(), 1).shuffleGrouping("spout");
builder.setSpout("kafka_spout", new KafkaSpout<>(KafkaSpoutConfig.builder("10.11.6.52:9092","tokafkatest").build()), 1);
builder.setBolt("printFromKafka", new printbolt2(),1).shuffleGrouping("kafka_spout");
Config conf = new Config();
conf.setDebug(true);
if (args != null && args.length > 0) {
conf.setNumWorkers(3);
StormSubmitter.submitTopologyWithProgressBar(args[0], conf, builder.createTopology());
}
else {
conf.setMaxTaskParallelism(3);
LocalCluster cluster = new LocalCluster();
cluster.submitTopology("kafkaboltTest", conf, builder.createTopology());
Thread.sleep(1000000);
cluster.shutdown();
}
}
}