+
+
+
+
+
+
\ No newline at end of file
diff --git a/code/TestSpark/TestSpark.iml b/code/TestSpark/TestSpark.iml
new file mode 100644
index 0000000..78b2cc5
--- /dev/null
+++ b/code/TestSpark/TestSpark.iml
@@ -0,0 +1,2 @@
+
+
\ No newline at end of file
diff --git a/code/TestSpark/out/artifacts/TestSpark_jar/TestSpark.jar b/code/TestSpark/out/artifacts/TestSpark_jar/TestSpark.jar
new file mode 100644
index 0000000..eb7e126
Binary files /dev/null and b/code/TestSpark/out/artifacts/TestSpark_jar/TestSpark.jar differ
diff --git a/code/TestSpark/pom.xml b/code/TestSpark/pom.xml
new file mode 100644
index 0000000..a4df494
--- /dev/null
+++ b/code/TestSpark/pom.xml
@@ -0,0 +1,53 @@
+
+
+
+
+ 4.0.0
+ war
+
+ TestSpark
+ com.kfk.spark
+ TestSpark
+ 1.0-SNAPSHOT
+
+
+
+ 2.11.12
+ 2.11
+ 2.2.0
+
+
+
+
+ org.apache.spark
+ spark-core_${scala.binary.version}
+ ${spark.version}
+
+
+ org.apache.spark
+ spark-streaming_${scala.binary.version}
+ ${spark.version}
+
+
+ org.apache.spark
+ spark-sql_${scala.binary.version}
+ ${spark.version}
+
+
+ org.apache.spark
+ spark-hive_${scala.binary.version}
+ ${spark.version}
+
+
+ org.apache.spark
+ spark-streaming-kafka-0-10_${scala.binary.version}
+ ${spark.version}
+
+
+ org.apache.hadoop
+ hadoop-client
+ 2.6.0
+
+
+
+
diff --git a/code/TestSpark/src/main/resources/META-INF/MANIFEST.MF b/code/TestSpark/src/main/resources/META-INF/MANIFEST.MF
new file mode 100644
index 0000000..c4cd13b
--- /dev/null
+++ b/code/TestSpark/src/main/resources/META-INF/MANIFEST.MF
@@ -0,0 +1,3 @@
+Manifest-Version: 1.0
+Main-Class: test
+
diff --git a/code/TestSpark/src/main/scala/TestStreaming.scala b/code/TestSpark/src/main/scala/TestStreaming.scala
new file mode 100644
index 0000000..75aa35a
--- /dev/null
+++ b/code/TestSpark/src/main/scala/TestStreaming.scala
@@ -0,0 +1,23 @@
+import org.apache.spark.SparkConf
+import org.apache.spark.streaming.{Seconds, StreamingContext}
+
+
+object TestStreaming {
+
+ def main(args: Array[String]): Unit = {
+
+ val conf = new SparkConf().setMaster("local[2]").setAppName("NetworkWordCount")
+ val ssc = new StreamingContext(conf, Seconds(5))
+
+
+ val lines = ssc.socketTextStream("bigdata-pro01.kfk.com",9999)
+ val words = lines.flatMap(_.split(" "))
+ //map reduce 计算
+ val wordCounts = words.map(x => (x, 1)).reduceByKey(_ + _)
+ wordCounts.print()
+ ssc.start()
+ ssc.awaitTermination()
+
+ }
+
+}
diff --git a/code/TestSpark/src/main/scala/test.scala b/code/TestSpark/src/main/scala/test.scala
new file mode 100644
index 0000000..04cc42d
--- /dev/null
+++ b/code/TestSpark/src/main/scala/test.scala
@@ -0,0 +1,20 @@
+import org.apache.spark.sql.SparkSession
+
+object test {
+ def main(args: Array[String]): Unit = {
+
+ val spark = SparkSession
+ .builder
+ .master("yarn-cluster")
+ // .master("local[2]")
+ .appName("HdfsTest")
+ .getOrCreate()
+
+ val path = args(0)
+ val out = args(1)
+
+ val rdd = spark.sparkContext.textFile(path)
+ val lines = rdd.flatMap(_.split(" ")).map(x=>(x,1)).reduceByKey((a,b)=>(a+b)).saveAsTextFile(out)
+ }
+
+}
diff --git a/code/TestSpark/src/main/webapp/WEB-INF/applicationContext.xml b/code/TestSpark/src/main/webapp/WEB-INF/applicationContext.xml
new file mode 100644
index 0000000..9410604
--- /dev/null
+++ b/code/TestSpark/src/main/webapp/WEB-INF/applicationContext.xml
@@ -0,0 +1,43 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/code/TestSpark/src/main/webapp/WEB-INF/log4j.xml b/code/TestSpark/src/main/webapp/WEB-INF/log4j.xml
new file mode 100644
index 0000000..edb3767
--- /dev/null
+++ b/code/TestSpark/src/main/webapp/WEB-INF/log4j.xml
@@ -0,0 +1,38 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/code/TestSpark/src/main/webapp/WEB-INF/web.xml b/code/TestSpark/src/main/webapp/WEB-INF/web.xml
new file mode 100644
index 0000000..208b385
--- /dev/null
+++ b/code/TestSpark/src/main/webapp/WEB-INF/web.xml
@@ -0,0 +1,119 @@
+
+
+
+
+
+
+
+
+
+
+ Multipart MIME handling filter for Cocoon
+ Cocoon multipart filter
+ CocoonMultipartFilter
+ org.apache.cocoon.servlet.multipart.MultipartFilter
+
+
+
+
+ Log debug information about each request
+ Cocoon debug filter
+ CocoonDebugFilter
+ org.apache.cocoon.servlet.DebugFilter
+
+
+
+
+
+
+ CocoonMultipartFilter
+ Cocoon
+
+
+ CocoonMultipartFilter
+ DispatcherServlet
+
+
+
+
+
+
+
+
+ org.springframework.web.context.ContextLoaderListener
+
+
+
+
+ org.springframework.web.context.request.RequestContextListener
+
+
+
+
+
+
+ Cocoon blocks dispatcher
+ DispatcherServlet
+ DispatcherServlet
+ org.apache.cocoon.servletservice.DispatcherServlet
+ 1
+
+
+
+
+
+
+ DispatcherServlet
+ /*
+
+
+
+
\ No newline at end of file
diff --git a/code/TestSpark/target/classes/META-INF/MANIFEST.MF b/code/TestSpark/target/classes/META-INF/MANIFEST.MF
new file mode 100644
index 0000000..c4cd13b
--- /dev/null
+++ b/code/TestSpark/target/classes/META-INF/MANIFEST.MF
@@ -0,0 +1,3 @@
+Manifest-Version: 1.0
+Main-Class: test
+
diff --git a/code/TestSpark/target/classes/TestStreaming$$anonfun$1.class b/code/TestSpark/target/classes/TestStreaming$$anonfun$1.class
new file mode 100644
index 0000000..5b52d7a
Binary files /dev/null and b/code/TestSpark/target/classes/TestStreaming$$anonfun$1.class differ
diff --git a/code/TestSpark/target/classes/TestStreaming$$anonfun$2.class b/code/TestSpark/target/classes/TestStreaming$$anonfun$2.class
new file mode 100644
index 0000000..46565dc
Binary files /dev/null and b/code/TestSpark/target/classes/TestStreaming$$anonfun$2.class differ
diff --git a/code/TestSpark/target/classes/TestStreaming$$anonfun$3.class b/code/TestSpark/target/classes/TestStreaming$$anonfun$3.class
new file mode 100644
index 0000000..4dc03d9
Binary files /dev/null and b/code/TestSpark/target/classes/TestStreaming$$anonfun$3.class differ
diff --git a/code/TestSpark/target/classes/TestStreaming$.class b/code/TestSpark/target/classes/TestStreaming$.class
new file mode 100644
index 0000000..e9bb3ba
Binary files /dev/null and b/code/TestSpark/target/classes/TestStreaming$.class differ
diff --git a/code/TestSpark/target/classes/TestStreaming.class b/code/TestSpark/target/classes/TestStreaming.class
new file mode 100644
index 0000000..5fee3cc
Binary files /dev/null and b/code/TestSpark/target/classes/TestStreaming.class differ
diff --git a/code/TestSpark/target/classes/test$$anonfun$1.class b/code/TestSpark/target/classes/test$$anonfun$1.class
new file mode 100644
index 0000000..05fd425
Binary files /dev/null and b/code/TestSpark/target/classes/test$$anonfun$1.class differ
diff --git a/code/TestSpark/target/classes/test$$anonfun$2.class b/code/TestSpark/target/classes/test$$anonfun$2.class
new file mode 100644
index 0000000..ca0bfac
Binary files /dev/null and b/code/TestSpark/target/classes/test$$anonfun$2.class differ
diff --git a/code/TestSpark/target/classes/test$$anonfun$3.class b/code/TestSpark/target/classes/test$$anonfun$3.class
new file mode 100644
index 0000000..8f75436
Binary files /dev/null and b/code/TestSpark/target/classes/test$$anonfun$3.class differ
diff --git a/code/TestSpark/target/classes/test$.class b/code/TestSpark/target/classes/test$.class
new file mode 100644
index 0000000..f9498db
Binary files /dev/null and b/code/TestSpark/target/classes/test$.class differ
diff --git a/code/TestSpark/target/classes/test.class b/code/TestSpark/target/classes/test.class
new file mode 100644
index 0000000..4d7c83c
Binary files /dev/null and b/code/TestSpark/target/classes/test.class differ
diff --git a/code/flume-ng-sinks/flume-dataset-sink/pom.xml b/code/flume-ng-sinks/flume-dataset-sink/pom.xml
new file mode 100644
index 0000000..1e8a07b
--- /dev/null
+++ b/code/flume-ng-sinks/flume-dataset-sink/pom.xml
@@ -0,0 +1,145 @@
+
+
+
+
+ 4.0.0
+
+
+ flume-ng-sinks
+ org.apache.flume
+ 1.7.0
+
+
+ org.apache.flume.flume-ng-sinks
+ flume-dataset-sink
+ Flume NG Kite Dataset Sink
+
+
+
+
+ org.apache.rat
+ apache-rat-plugin
+
+
+ org.apache.felix
+ maven-bundle-plugin
+ 2.3.7
+ true
+ true
+
+
+
+
+
+
+
+ org.apache.flume
+ flume-ng-sdk
+
+
+
+ org.apache.flume
+ flume-ng-configuration
+
+
+
+ org.apache.flume
+ flume-ng-core
+
+
+
+ org.kitesdk
+ kite-data-core
+
+
+
+ org.kitesdk
+ kite-data-hive
+
+
+
+ org.kitesdk
+ kite-data-hbase
+
+
+
+ org.apache.avro
+ avro
+
+
+
+ org.apache.hive
+ hive-exec
+ true
+
+
+
+ org.apache.hive
+ hive-metastore
+ true
+
+
+
+
+ org.apache.hadoop
+ hadoop-common
+ ${hadoop2.version}
+ true
+
+
+
+ org.slf4j
+ slf4j-api
+
+
+
+ com.google.guava
+ guava
+
+
+
+ junit
+ junit
+ test
+
+
+
+ org.apache.hadoop
+ hadoop-minicluster
+ ${hadoop2.version}
+ test
+
+
+
+ org.slf4j
+ slf4j-log4j12
+ test
+
+
+
+ org.mockito
+ mockito-all
+ test
+
+
+
+
+
diff --git a/code/flume-ng-sinks/flume-dataset-sink/src/main/java/org/apache/flume/sink/kite/DatasetSink.java b/code/flume-ng-sinks/flume-dataset-sink/src/main/java/org/apache/flume/sink/kite/DatasetSink.java
new file mode 100644
index 0000000..fa31262
--- /dev/null
+++ b/code/flume-ng-sinks/flume-dataset-sink/src/main/java/org/apache/flume/sink/kite/DatasetSink.java
@@ -0,0 +1,582 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.flume.sink.kite;
+
+import org.apache.flume.auth.FlumeAuthenticationUtil;
+import org.apache.flume.auth.PrivilegedExecutor;
+import org.apache.flume.sink.kite.parser.EntityParserFactory;
+import org.apache.flume.sink.kite.parser.EntityParser;
+import org.apache.flume.sink.kite.policy.FailurePolicy;
+import org.apache.flume.sink.kite.policy.FailurePolicyFactory;
+import com.google.common.annotations.VisibleForTesting;
+import com.google.common.base.Preconditions;
+import com.google.common.base.Throwables;
+import com.google.common.collect.Lists;
+
+import java.net.URI;
+import java.security.PrivilegedAction;
+import java.util.List;
+import java.util.concurrent.TimeUnit;
+import org.apache.avro.Schema;
+import org.apache.avro.file.DataFileWriter;
+import org.apache.avro.generic.GenericRecord;
+import org.apache.flume.Channel;
+import org.apache.flume.Context;
+import org.apache.flume.Event;
+import org.apache.flume.EventDeliveryException;
+import org.apache.flume.Transaction;
+import org.apache.flume.conf.Configurable;
+import org.apache.flume.instrumentation.SinkCounter;
+import org.apache.flume.sink.AbstractSink;
+import org.kitesdk.data.Dataset;
+import org.kitesdk.data.DatasetDescriptor;
+import org.kitesdk.data.DatasetIOException;
+import org.kitesdk.data.DatasetNotFoundException;
+import org.kitesdk.data.DatasetWriter;
+import org.kitesdk.data.Datasets;
+import org.kitesdk.data.Flushable;
+import org.kitesdk.data.Syncable;
+import org.kitesdk.data.View;
+import org.kitesdk.data.spi.Registration;
+import org.kitesdk.data.URIBuilder;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import static org.apache.flume.sink.kite.DatasetSinkConstants.*;
+import org.kitesdk.data.Format;
+import org.kitesdk.data.Formats;
+
+/**
+ * Sink that writes events to a Kite Dataset. This sink will parse the body of
+ * each incoming event and store the resulting entity in a Kite Dataset. It
+ * determines the destination Dataset by opening a dataset URI
+ * {@code kite.dataset.uri} or opening a repository URI, {@code kite.repo.uri},
+ * and loading a Dataset by name, {@code kite.dataset.name}, and namespace,
+ * {@code kite.dataset.namespace}.
+ */
+public class DatasetSink extends AbstractSink implements Configurable {
+
+ private static final Logger LOG = LoggerFactory.getLogger(DatasetSink.class);
+
+ private Context context = null;
+ private PrivilegedExecutor privilegedExecutor;
+
+ private String datasetName = null;
+ private URI datasetUri = null;
+ private Schema datasetSchema = null;
+ private DatasetWriter writer = null;
+
+ /**
+ * The number of events to process as a single batch.
+ */
+ private long batchSize = DEFAULT_BATCH_SIZE;
+
+ /**
+ * The number of seconds to wait before rolling a writer.
+ */
+ private int rollIntervalSeconds = DEFAULT_ROLL_INTERVAL;
+
+ /**
+ * Flag that says if Flume should commit on every batch.
+ */
+ private boolean commitOnBatch = DEFAULT_FLUSHABLE_COMMIT_ON_BATCH;
+
+ /**
+ * Flag that says if Flume should sync on every batch.
+ */
+ private boolean syncOnBatch = DEFAULT_SYNCABLE_SYNC_ON_BATCH;
+
+ /**
+ * The last time the writer rolled.
+ */
+ private long lastRolledMillis = 0L;
+
+ /**
+ * The raw number of bytes parsed.
+ */
+ private long bytesParsed = 0L;
+
+ /**
+ * A class for parsing Kite entities from Flume Events.
+ */
+ private EntityParser parser = null;
+
+ /**
+ * A class implementing a failure newPolicy for events that had a
+ non-recoverable error during processing.
+ */
+ private FailurePolicy failurePolicy = null;
+
+ private SinkCounter counter = null;
+
+ /**
+ * The Kite entity
+ */
+ private GenericRecord entity = null;
+ // TODO: remove this after PARQUET-62 is released
+ private boolean reuseEntity = true;
+
+ /**
+ * The Flume transaction. Used to keep transactions open across calls to
+ * process.
+ */
+ private Transaction transaction = null;
+
+ /**
+ * Internal flag on if there has been a batch of records committed. This is
+ * used during rollback to know if the current writer needs to be closed.
+ */
+ private boolean committedBatch = false;
+
+ // Factories
+ private static final EntityParserFactory ENTITY_PARSER_FACTORY =
+ new EntityParserFactory();
+ private static final FailurePolicyFactory FAILURE_POLICY_FACTORY =
+ new FailurePolicyFactory();
+
+ /**
+ * Return the list of allowed formats.
+ * @return The list of allowed formats.
+ */
+ protected List allowedFormats() {
+ return Lists.newArrayList("avro", "parquet");
+ }
+
+ @Override
+ public void configure(Context context) {
+ this.context = context;
+
+ String principal = context.getString(AUTH_PRINCIPAL);
+ String keytab = context.getString(AUTH_KEYTAB);
+ String effectiveUser = context.getString(AUTH_PROXY_USER);
+
+ this.privilegedExecutor = FlumeAuthenticationUtil.getAuthenticator(
+ principal, keytab).proxyAs(effectiveUser);
+
+ // Get the dataset URI and name from the context
+ String datasetURI = context.getString(CONFIG_KITE_DATASET_URI);
+ if (datasetURI != null) {
+ this.datasetUri = URI.create(datasetURI);
+ this.datasetName = uriToName(datasetUri);
+ } else {
+ String repositoryURI = context.getString(CONFIG_KITE_REPO_URI);
+ Preconditions.checkNotNull(repositoryURI, "No dataset configured. Setting "
+ + CONFIG_KITE_DATASET_URI + " is required.");
+
+ this.datasetName = context.getString(CONFIG_KITE_DATASET_NAME);
+ Preconditions.checkNotNull(datasetName, "No dataset configured. Setting "
+ + CONFIG_KITE_DATASET_URI + " is required.");
+
+ String namespace = context.getString(CONFIG_KITE_DATASET_NAMESPACE,
+ DEFAULT_NAMESPACE);
+
+ this.datasetUri = new URIBuilder(repositoryURI, namespace, datasetName)
+ .build();
+ }
+ this.setName(datasetUri.toString());
+
+ if (context.getBoolean(CONFIG_SYNCABLE_SYNC_ON_BATCH,
+ DEFAULT_SYNCABLE_SYNC_ON_BATCH)) {
+ Preconditions.checkArgument(
+ context.getBoolean(CONFIG_FLUSHABLE_COMMIT_ON_BATCH,
+ DEFAULT_FLUSHABLE_COMMIT_ON_BATCH), "Configuration error: "
+ + CONFIG_FLUSHABLE_COMMIT_ON_BATCH + " must be set to true when "
+ + CONFIG_SYNCABLE_SYNC_ON_BATCH + " is set to true.");
+ }
+
+ // Create the configured failure failurePolicy
+ this.failurePolicy = FAILURE_POLICY_FACTORY.newPolicy(context);
+
+ // other configuration
+ this.batchSize = context.getLong(CONFIG_KITE_BATCH_SIZE,
+ DEFAULT_BATCH_SIZE);
+ this.rollIntervalSeconds = context.getInteger(CONFIG_KITE_ROLL_INTERVAL,
+ DEFAULT_ROLL_INTERVAL);
+
+ this.counter = new SinkCounter(datasetName);
+ }
+
+ @Override
+ public synchronized void start() {
+ this.lastRolledMillis = System.currentTimeMillis();
+ counter.start();
+ // signal that this sink is ready to process
+ LOG.info("Started DatasetSink " + getName());
+ super.start();
+ }
+
+ /**
+ * Causes the sink to roll at the next {@link #process()} call.
+ */
+ @VisibleForTesting
+ void roll() {
+ this.lastRolledMillis = 0L;
+ }
+
+ @VisibleForTesting
+ DatasetWriter getWriter() {
+ return writer;
+ }
+
+ @VisibleForTesting
+ void setWriter(DatasetWriter writer) {
+ this.writer = writer;
+ }
+
+ @VisibleForTesting
+ void setParser(EntityParser parser) {
+ this.parser = parser;
+ }
+
+ @VisibleForTesting
+ void setFailurePolicy(FailurePolicy failurePolicy) {
+ this.failurePolicy = failurePolicy;
+ }
+
+ @Override
+ public synchronized void stop() {
+ counter.stop();
+
+ try {
+ // Close the writer and commit the transaction, but don't create a new
+ // writer since we're stopping
+ closeWriter();
+ commitTransaction();
+ } catch (EventDeliveryException ex) {
+ rollbackTransaction();
+
+ LOG.warn("Closing the writer failed: " + ex.getLocalizedMessage());
+ LOG.debug("Exception follows.", ex);
+ // We don't propogate the exception as the transaction would have been
+ // rolled back and we can still finish stopping
+ }
+
+ // signal that this sink has stopped
+ LOG.info("Stopped dataset sink: " + getName());
+ super.stop();
+ }
+
+ @Override
+ public Status process() throws EventDeliveryException {
+ long processedEvents = 0;
+
+ try {
+ if (shouldRoll()) {
+ closeWriter();
+ commitTransaction();
+ createWriter();
+ }
+
+ // The writer shouldn't be null at this point
+ Preconditions.checkNotNull(writer,
+ "Can't process events with a null writer. This is likely a bug.");
+ Channel channel = getChannel();
+
+ // Enter the transaction boundary if we haven't already
+ enterTransaction(channel);
+
+ for (; processedEvents < batchSize; processedEvents += 1) {
+ Event event = channel.take();
+
+ if (event == null) {
+ // no events available in the channel
+ break;
+ }
+
+ write(event);
+ }
+
+ // commit transaction
+ if (commitOnBatch) {
+ // Flush/sync before commiting. A failure here will result in rolling back
+ // the transaction
+ if (syncOnBatch && writer instanceof Syncable) {
+ ((Syncable) writer).sync();
+ } else if (writer instanceof Flushable) {
+ ((Flushable) writer).flush();
+ }
+ boolean committed = commitTransaction();
+ Preconditions.checkState(committed,
+ "Tried to commit a batch when there was no transaction");
+ committedBatch |= committed;
+ }
+ } catch (Throwable th) {
+ // catch-all for any unhandled Throwable so that the transaction is
+ // correctly rolled back.
+ rollbackTransaction();
+
+ if (commitOnBatch && committedBatch) {
+ try {
+ closeWriter();
+ } catch (EventDeliveryException ex) {
+ LOG.warn("Error closing writer there may be temp files that need to"
+ + " be manually recovered: " + ex.getLocalizedMessage());
+ LOG.debug("Exception follows.", ex);
+ }
+ } else {
+ this.writer = null;
+ }
+
+ // handle the exception
+ Throwables.propagateIfInstanceOf(th, Error.class);
+ Throwables.propagateIfInstanceOf(th, EventDeliveryException.class);
+ throw new EventDeliveryException(th);
+ }
+
+ if (processedEvents == 0) {
+ counter.incrementBatchEmptyCount();
+ return Status.BACKOFF;
+ } else if (processedEvents < batchSize) {
+ counter.incrementBatchUnderflowCount();
+ } else {
+ counter.incrementBatchCompleteCount();
+ }
+
+ counter.addToEventDrainSuccessCount(processedEvents);
+
+ return Status.READY;
+ }
+
+ /**
+ * Parse the event using the entity parser and write the entity to the dataset.
+ *
+ * @param event The event to write
+ * @throws EventDeliveryException An error occurred trying to write to the
+ dataset that couldn't or shouldn't be
+ handled by the failure policy.
+ */
+ @VisibleForTesting
+ void write(Event event) throws EventDeliveryException {
+ try {
+ this.entity = parser.parse(event, reuseEntity ? entity : null);
+ this.bytesParsed += event.getBody().length;
+
+ // writeEncoded would be an optimization in some cases, but HBase
+ // will not support it and partitioned Datasets need to get partition
+ // info from the entity Object. We may be able to avoid the
+ // serialization round-trip otherwise.
+ writer.write(entity);
+ } catch (NonRecoverableEventException ex) {
+ failurePolicy.handle(event, ex);
+ } catch (DataFileWriter.AppendWriteException ex) {
+ failurePolicy.handle(event, ex);
+ } catch (RuntimeException ex) {
+ Throwables.propagateIfInstanceOf(ex, EventDeliveryException.class);
+ throw new EventDeliveryException(ex);
+ }
+ }
+
+ /**
+ * Create a new writer.
+ *
+ * This method also re-loads the dataset so updates to the configuration or
+ * a dataset created after Flume starts will be loaded.
+ *
+ * @throws EventDeliveryException There was an error creating the writer.
+ */
+ @VisibleForTesting
+ void createWriter() throws EventDeliveryException {
+ // reset the commited flag whenever a new writer is created
+ committedBatch = false;
+ try {
+ View view;
+
+ view = privilegedExecutor.execute(
+ new PrivilegedAction>() {
+ @Override
+ public Dataset run() {
+ return Datasets.load(datasetUri);
+ }
+ });
+
+ DatasetDescriptor descriptor = view.getDataset().getDescriptor();
+ Format format = descriptor.getFormat();
+ Preconditions.checkArgument(allowedFormats().contains(format.getName()),
+ "Unsupported format: " + format.getName());
+
+ Schema newSchema = descriptor.getSchema();
+ if (datasetSchema == null || !newSchema.equals(datasetSchema)) {
+ this.datasetSchema = descriptor.getSchema();
+ // dataset schema has changed, create a new parser
+ parser = ENTITY_PARSER_FACTORY.newParser(datasetSchema, context);
+ }
+
+ this.reuseEntity = !(Formats.PARQUET.equals(format));
+
+ // TODO: Check that the format implements Flushable after CDK-863
+ // goes in. For now, just check that the Dataset is Avro format
+ this.commitOnBatch = context.getBoolean(CONFIG_FLUSHABLE_COMMIT_ON_BATCH,
+ DEFAULT_FLUSHABLE_COMMIT_ON_BATCH) && (Formats.AVRO.equals(format));
+
+ // TODO: Check that the format implements Syncable after CDK-863
+ // goes in. For now, just check that the Dataset is Avro format
+ this.syncOnBatch = context.getBoolean(CONFIG_SYNCABLE_SYNC_ON_BATCH,
+ DEFAULT_SYNCABLE_SYNC_ON_BATCH) && (Formats.AVRO.equals(format));
+
+ this.datasetName = view.getDataset().getName();
+
+ this.writer = view.newWriter();
+
+ // Reset the last rolled time and the metrics
+ this.lastRolledMillis = System.currentTimeMillis();
+ this.bytesParsed = 0L;
+ } catch (DatasetNotFoundException ex) {
+ throw new EventDeliveryException("Dataset " + datasetUri + " not found."
+ + " The dataset must be created before Flume can write to it.", ex);
+ } catch (RuntimeException ex) {
+ throw new EventDeliveryException("Error trying to open a new"
+ + " writer for dataset " + datasetUri, ex);
+ }
+ }
+
+ /**
+ * Return true if the sink should roll the writer.
+ *
+ * Currently, this is based on time since the last roll or if the current
+ * writer is null.
+ *
+ * @return True if and only if the sink should roll the writer
+ */
+ private boolean shouldRoll() {
+ long currentTimeMillis = System.currentTimeMillis();
+ long elapsedTimeSeconds = TimeUnit.MILLISECONDS.toSeconds(
+ currentTimeMillis - lastRolledMillis);
+
+ LOG.debug("Current time: {}, lastRolled: {}, diff: {} sec",
+ new Object[] {currentTimeMillis, lastRolledMillis, elapsedTimeSeconds});
+
+ return elapsedTimeSeconds >= rollIntervalSeconds || writer == null;
+ }
+
+ /**
+ * Close the current writer.
+ *
+ * This method always sets the current writer to null even if close fails.
+ * If this method throws an Exception, callers *must* rollback any active
+ * transaction to ensure that data is replayed.
+ *
+ * @throws EventDeliveryException
+ */
+ @VisibleForTesting
+ void closeWriter() throws EventDeliveryException {
+ if (writer != null) {
+ try {
+ writer.close();
+
+ long elapsedTimeSeconds = TimeUnit.MILLISECONDS.toSeconds(
+ System.currentTimeMillis() - lastRolledMillis);
+ LOG.info("Closed writer for {} after {} seconds and {} bytes parsed",
+ new Object[]{datasetUri, elapsedTimeSeconds, bytesParsed});
+ } catch (DatasetIOException ex) {
+ throw new EventDeliveryException("Check HDFS permissions/health. IO"
+ + " error trying to close the writer for dataset " + datasetUri,
+ ex);
+ } catch (RuntimeException ex) {
+ throw new EventDeliveryException("Error trying to close the writer for"
+ + " dataset " + datasetUri, ex);
+ } finally {
+ // If we failed to close the writer then we give up on it as we'll
+ // end up throwing an EventDeliveryException which will result in
+ // a transaction rollback and a replay of any events written during
+ // the current transaction. If commitOnBatch is true, you can still
+ // end up with orphaned temp files that have data to be recovered.
+ this.writer = null;
+ failurePolicy.close();
+ }
+ }
+ }
+
+ /**
+ * Enter the transaction boundary. This will either begin a new transaction
+ * if one didn't already exist. If we're already in a transaction boundary,
+ * then this method does nothing.
+ *
+ * @param channel The Sink's channel
+ * @throws EventDeliveryException There was an error starting a new batch
+ * with the failure policy.
+ */
+ private void enterTransaction(Channel channel) throws EventDeliveryException {
+ // There's no synchronization around the transaction instance because the
+ // Sink API states "the Sink#process() call is guaranteed to only
+ // be accessed by a single thread". Technically other methods could be
+ // called concurrently, but the implementation of SinkRunner waits
+ // for the Thread running process() to end before calling stop()
+ if (transaction == null) {
+ this.transaction = channel.getTransaction();
+ transaction.begin();
+ failurePolicy = FAILURE_POLICY_FACTORY.newPolicy(context);
+ }
+ }
+
+ /**
+ * Commit and close the transaction.
+ *
+ * If this method throws an Exception the caller *must* ensure that the
+ * transaction is rolled back. Callers can roll back the transaction by
+ * calling {@link #rollbackTransaction()}.
+ *
+ * @return True if there was an open transaction and it was committed, false
+ * otherwise.
+ * @throws EventDeliveryException There was an error ending the batch with
+ * the failure policy.
+ */
+ @VisibleForTesting
+ boolean commitTransaction() throws EventDeliveryException {
+ if (transaction != null) {
+ failurePolicy.sync();
+ transaction.commit();
+ transaction.close();
+ this.transaction = null;
+ return true;
+ } else {
+ return false;
+ }
+ }
+
+ /**
+ * Rollback the transaction. If there is a RuntimeException during rollback,
+ * it will be logged but the transaction instance variable will still be
+ * nullified.
+ */
+ private void rollbackTransaction() {
+ if (transaction != null) {
+ try {
+ // If the transaction wasn't committed before we got the exception, we
+ // need to rollback.
+ transaction.rollback();
+ } catch (RuntimeException ex) {
+ LOG.error("Transaction rollback failed: " + ex.getLocalizedMessage());
+ LOG.debug("Exception follows.", ex);
+ } finally {
+ transaction.close();
+ this.transaction = null;
+ }
+ }
+ }
+
+ /**
+ * Get the name of the dataset from the URI
+ *
+ * @param uri The dataset or view URI
+ * @return The dataset name
+ */
+ private static String uriToName(URI uri) {
+ return Registration.lookupDatasetUri(URI.create(
+ uri.getRawSchemeSpecificPart())).second().get("dataset");
+ }
+}
diff --git a/code/flume-ng-sinks/flume-dataset-sink/src/main/java/org/apache/flume/sink/kite/DatasetSinkConstants.java b/code/flume-ng-sinks/flume-dataset-sink/src/main/java/org/apache/flume/sink/kite/DatasetSinkConstants.java
new file mode 100644
index 0000000..af33304
--- /dev/null
+++ b/code/flume-ng-sinks/flume-dataset-sink/src/main/java/org/apache/flume/sink/kite/DatasetSinkConstants.java
@@ -0,0 +1,132 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flume.sink.kite;
+
+import org.kitesdk.data.URIBuilder;
+
+public class DatasetSinkConstants {
+ /**
+ * URI of the Kite Dataset
+ */
+ public static final String CONFIG_KITE_DATASET_URI = "kite.dataset.uri";
+
+ /**
+ * URI of the Kite DatasetRepository.
+ */
+ public static final String CONFIG_KITE_REPO_URI = "kite.repo.uri";
+
+ /**
+ * Name of the Kite Dataset to write into.
+ */
+ public static final String CONFIG_KITE_DATASET_NAME = "kite.dataset.name";
+
+ /**
+ * Namespace of the Kite Dataset to write into.
+ */
+ public static final String CONFIG_KITE_DATASET_NAMESPACE =
+ "kite.dataset.namespace";
+ public static final String DEFAULT_NAMESPACE = URIBuilder.NAMESPACE_DEFAULT;
+
+ /**
+ * Number of records to process from the incoming channel per call to process.
+ */
+ public static final String CONFIG_KITE_BATCH_SIZE = "kite.batchSize";
+ public static long DEFAULT_BATCH_SIZE = 100;
+
+ /**
+ * Maximum time to wait before finishing files.
+ */
+ public static final String CONFIG_KITE_ROLL_INTERVAL = "kite.rollInterval";
+ public static int DEFAULT_ROLL_INTERVAL = 30; // seconds
+
+ /**
+ * Flag for committing the Flume transaction on each batch for Flushable
+ * datasets. When set to false, Flume will only commit the transaction when
+ * roll interval has expired. Setting this to false requires enough space
+ * in the channel to handle all events delivered during the roll interval.
+ * Defaults to true.
+ */
+ public static final String CONFIG_FLUSHABLE_COMMIT_ON_BATCH =
+ "kite.flushable.commiteOnBatch";
+ public static boolean DEFAULT_FLUSHABLE_COMMIT_ON_BATCH = true;
+
+ /**
+ * Flag for syncing the DatasetWriter on each batch for Syncable
+ * datasets. Defaults to true.
+ */
+ public static final String CONFIG_SYNCABLE_SYNC_ON_BATCH =
+ "kite.syncable.syncOnBatch";
+ public static boolean DEFAULT_SYNCABLE_SYNC_ON_BATCH = true;
+
+ /**
+ * Parser used to parse Flume Events into Kite entities.
+ */
+ public static final String CONFIG_ENTITY_PARSER = "kite.entityParser";
+
+ /**
+ * Built-in entity parsers
+ */
+ public static final String AVRO_ENTITY_PARSER = "avro";
+ public static final String DEFAULT_ENTITY_PARSER = AVRO_ENTITY_PARSER;
+ public static final String[] AVAILABLE_PARSERS = new String[] {
+ AVRO_ENTITY_PARSER
+ };
+
+ /**
+ * Policy used to handle non-recoverable failures.
+ */
+ public static final String CONFIG_FAILURE_POLICY = "kite.failurePolicy";
+
+ /**
+ * Write non-recoverable Flume events to a Kite dataset.
+ */
+ public static final String SAVE_FAILURE_POLICY = "save";
+
+ /**
+ * The URI to write non-recoverable Flume events to in the case of an error.
+ * If the dataset doesn't exist, it will be created.
+ */
+ public static final String CONFIG_KITE_ERROR_DATASET_URI =
+ "kite.error.dataset.uri";
+
+ /**
+ * Retry non-recoverable Flume events. This will lead to a never ending cycle
+ * of failure, but matches the previous default semantics of the DatasetSink.
+ */
+ public static final String RETRY_FAILURE_POLICY = "retry";
+ public static final String DEFAULT_FAILURE_POLICY = RETRY_FAILURE_POLICY;
+ public static final String[] AVAILABLE_POLICIES = new String[] {
+ RETRY_FAILURE_POLICY,
+ SAVE_FAILURE_POLICY
+ };
+
+ /**
+ * Headers where avro schema information is expected.
+ */
+ public static final String AVRO_SCHEMA_LITERAL_HEADER =
+ "flume.avro.schema.literal";
+ public static final String AVRO_SCHEMA_URL_HEADER = "flume.avro.schema.url";
+
+ /**
+ * Hadoop authentication settings
+ */
+ public static final String AUTH_PROXY_USER = "auth.proxyUser";
+ public static final String AUTH_PRINCIPAL = "auth.kerberosPrincipal";
+ public static final String AUTH_KEYTAB = "auth.kerberosKeytab";
+}
diff --git a/code/flume-ng-sinks/flume-dataset-sink/src/main/java/org/apache/flume/sink/kite/NonRecoverableEventException.java b/code/flume-ng-sinks/flume-dataset-sink/src/main/java/org/apache/flume/sink/kite/NonRecoverableEventException.java
new file mode 100644
index 0000000..4373429
--- /dev/null
+++ b/code/flume-ng-sinks/flume-dataset-sink/src/main/java/org/apache/flume/sink/kite/NonRecoverableEventException.java
@@ -0,0 +1,53 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flume.sink.kite;
+
+
+/**
+ * A non-recoverable error trying to deliver the event.
+ *
+ * Non-recoverable event delivery failures include:
+ *
+ * 1. Error parsing the event body thrown from the {@link EntityParser}
+ * 2. A schema mismatch between the schema of an event and the schema of the
+ * destination dataset.
+ * 3. A missing schema from the Event header when using the
+ * {@link AvroEntityParser}.
+ */
+public class NonRecoverableEventException extends Exception {
+
+ private static final long serialVersionUID = 3485151222482254285L;
+
+ public NonRecoverableEventException() {
+ super();
+ }
+
+ public NonRecoverableEventException(String message) {
+ super(message);
+ }
+
+ public NonRecoverableEventException(String message, Throwable t) {
+ super(message, t);
+ }
+
+ public NonRecoverableEventException(Throwable t) {
+ super(t);
+ }
+
+}
diff --git a/code/flume-ng-sinks/flume-dataset-sink/src/main/java/org/apache/flume/sink/kite/parser/AvroParser.java b/code/flume-ng-sinks/flume-dataset-sink/src/main/java/org/apache/flume/sink/kite/parser/AvroParser.java
new file mode 100644
index 0000000..7c6a723
--- /dev/null
+++ b/code/flume-ng-sinks/flume-dataset-sink/src/main/java/org/apache/flume/sink/kite/parser/AvroParser.java
@@ -0,0 +1,208 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flume.sink.kite.parser;
+
+import com.google.common.base.Preconditions;
+import com.google.common.cache.CacheBuilder;
+import com.google.common.cache.CacheLoader;
+import com.google.common.cache.LoadingCache;
+import com.google.common.util.concurrent.UncheckedExecutionException;
+import java.io.IOException;
+import java.io.InputStream;
+import java.net.URI;
+import java.net.URL;
+import java.util.Locale;
+import java.util.Map;
+import java.util.concurrent.ExecutionException;
+import org.apache.avro.Schema;
+import org.apache.avro.generic.GenericDatumReader;
+import org.apache.avro.generic.GenericRecord;
+import org.apache.avro.io.BinaryDecoder;
+import org.apache.avro.io.DatumReader;
+import org.apache.avro.io.DecoderFactory;
+import org.apache.flume.Context;
+import org.apache.flume.Event;
+import org.apache.flume.EventDeliveryException;
+import org.apache.flume.sink.kite.NonRecoverableEventException;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+
+import static org.apache.flume.sink.kite.DatasetSinkConstants.*;
+
+/**
+ * An {@link EntityParser} that parses Avro serialized bytes from an event.
+ *
+ * The Avro schema used to serialize the data should be set as either a URL
+ * or literal in the flume.avro.schema.url or flume.avro.schema.literal event
+ * headers respectively.
+ */
+public class AvroParser implements EntityParser {
+
+ static Configuration conf = new Configuration();
+
+ /**
+ * A cache of literal schemas to avoid re-parsing the schema.
+ */
+ private static final LoadingCache schemasFromLiteral =
+ CacheBuilder.newBuilder()
+ .build(new CacheLoader() {
+ @Override
+ public Schema load(String literal) {
+ Preconditions.checkNotNull(literal,
+ "Schema literal cannot be null without a Schema URL");
+ return new Schema.Parser().parse(literal);
+ }
+ });
+
+ /**
+ * A cache of schemas retrieved by URL to avoid re-parsing the schema.
+ */
+ private static final LoadingCache schemasFromURL =
+ CacheBuilder.newBuilder()
+ .build(new CacheLoader() {
+ @Override
+ public Schema load(String url) throws IOException {
+ Schema.Parser parser = new Schema.Parser();
+ InputStream is = null;
+ try {
+ FileSystem fs = FileSystem.get(URI.create(url), conf);
+ if (url.toLowerCase(Locale.ENGLISH).startsWith("hdfs:/")) {
+ is = fs.open(new Path(url));
+ } else {
+ is = new URL(url).openStream();
+ }
+ return parser.parse(is);
+ } finally {
+ if (is != null) {
+ is.close();
+ }
+ }
+ }
+ });
+
+ /**
+ * The schema of the destination dataset.
+ *
+ * Used as the reader schema during parsing.
+ */
+ private final Schema datasetSchema;
+
+ /**
+ * A cache of DatumReaders per schema.
+ */
+ private final LoadingCache> readers =
+ CacheBuilder.newBuilder()
+ .build(new CacheLoader>() {
+ @Override
+ public DatumReader load(Schema schema) {
+ // must use the target dataset's schema for reading to ensure the
+ // records are able to be stored using it
+ return new GenericDatumReader(
+ schema, datasetSchema);
+ }
+ });
+
+ /**
+ * The binary decoder to reuse for event parsing.
+ */
+ private BinaryDecoder decoder = null;
+
+ /**
+ * Create a new AvroParser given the schema of the destination dataset.
+ *
+ * @param datasetSchema The schema of the destination dataset.
+ */
+ private AvroParser(Schema datasetSchema) {
+ this.datasetSchema = datasetSchema;
+ }
+
+ /**
+ * Parse the entity from the body of the given event.
+ *
+ * @param event The event to parse.
+ * @param reuse If non-null, this may be reused and returned from this method.
+ * @return The parsed entity as a GenericRecord.
+ * @throws EventDeliveryException A recoverable error such as an error
+ * downloading the schema from the URL has
+ * occurred.
+ * @throws NonRecoverableEventException A non-recoverable error such as an
+ * unparsable schema or entity has
+ * occurred.
+ */
+ @Override
+ public GenericRecord parse(Event event, GenericRecord reuse)
+ throws EventDeliveryException, NonRecoverableEventException {
+ decoder = DecoderFactory.get().binaryDecoder(event.getBody(), decoder);
+
+ try {
+ DatumReader reader = readers.getUnchecked(schema(event));
+ return reader.read(reuse, decoder);
+ } catch (IOException ex) {
+ throw new NonRecoverableEventException("Cannot deserialize event", ex);
+ } catch (RuntimeException ex) {
+ throw new NonRecoverableEventException("Cannot deserialize event", ex);
+ }
+ }
+
+ /**
+ * Get the schema from the event headers.
+ *
+ * @param event The Flume event
+ * @return The schema for the event
+ * @throws EventDeliveryException A recoverable error such as an error
+ * downloading the schema from the URL has
+ * occurred.
+ * @throws NonRecoverableEventException A non-recoverable error such as an
+ * unparsable schema has occurred.
+ */
+ private static Schema schema(Event event) throws EventDeliveryException,
+ NonRecoverableEventException {
+ Map headers = event.getHeaders();
+ String schemaURL = headers.get(AVRO_SCHEMA_URL_HEADER);
+ try {
+ if (schemaURL != null) {
+ return schemasFromURL.get(schemaURL);
+ } else {
+ String schemaLiteral = headers.get(AVRO_SCHEMA_LITERAL_HEADER);
+ if (schemaLiteral == null) {
+ throw new NonRecoverableEventException("No schema in event headers."
+ + " Headers must include either " + AVRO_SCHEMA_URL_HEADER
+ + " or " + AVRO_SCHEMA_LITERAL_HEADER);
+ }
+
+ return schemasFromLiteral.get(schemaLiteral);
+ }
+ } catch (ExecutionException ex) {
+ throw new EventDeliveryException("Cannot get schema", ex.getCause());
+ } catch (UncheckedExecutionException ex) {
+ throw new NonRecoverableEventException("Cannot parse schema",
+ ex.getCause());
+ }
+ }
+
+ public static class Builder implements EntityParser.Builder {
+
+ @Override
+ public EntityParser build(Schema datasetSchema, Context config) {
+ return new AvroParser(datasetSchema);
+ }
+
+ }
+}
diff --git a/code/flume-ng-sinks/flume-dataset-sink/src/main/java/org/apache/flume/sink/kite/parser/EntityParser.java b/code/flume-ng-sinks/flume-dataset-sink/src/main/java/org/apache/flume/sink/kite/parser/EntityParser.java
new file mode 100644
index 0000000..f2051a2
--- /dev/null
+++ b/code/flume-ng-sinks/flume-dataset-sink/src/main/java/org/apache/flume/sink/kite/parser/EntityParser.java
@@ -0,0 +1,56 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flume.sink.kite.parser;
+
+import javax.annotation.concurrent.NotThreadSafe;
+import org.apache.avro.Schema;
+import org.apache.flume.Context;
+import org.apache.flume.Event;
+import org.apache.flume.EventDeliveryException;
+import org.apache.flume.sink.kite.NonRecoverableEventException;
+
+@NotThreadSafe
+public interface EntityParser {
+
+ /**
+ * Parse a Kite entity from a Flume event
+ *
+ * @param event The event to parse
+ * @param reuse If non-null, this may be reused and returned
+ * @return The parsed entity
+ * @throws EventDeliveryException A recoverable error during parsing. Parsing
+ * can be safely retried.
+ * @throws NonRecoverableEventException A non-recoverable error during
+ * parsing. The event must be discarded.
+ *
+ */
+ public E parse(Event event, E reuse) throws EventDeliveryException,
+ NonRecoverableEventException;
+
+ /**
+ * Knows how to build {@code EntityParser}s. Implementers must provide a
+ * no-arg constructor.
+ *
+ * @param The type of entities generated
+ */
+ public static interface Builder {
+
+ public EntityParser build(Schema datasetSchema, Context config);
+ }
+}
diff --git a/code/flume-ng-sinks/flume-dataset-sink/src/main/java/org/apache/flume/sink/kite/parser/EntityParserFactory.java b/code/flume-ng-sinks/flume-dataset-sink/src/main/java/org/apache/flume/sink/kite/parser/EntityParserFactory.java
new file mode 100644
index 0000000..3720ff3
--- /dev/null
+++ b/code/flume-ng-sinks/flume-dataset-sink/src/main/java/org/apache/flume/sink/kite/parser/EntityParserFactory.java
@@ -0,0 +1,81 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flume.sink.kite.parser;
+
+import java.util.Arrays;
+import org.apache.avro.Schema;
+import org.apache.avro.generic.GenericRecord;
+import org.apache.flume.Context;
+
+import static org.apache.flume.sink.kite.DatasetSinkConstants.*;
+
+public class EntityParserFactory {
+
+ public EntityParser newParser(Schema datasetSchema, Context config) {
+ EntityParser parser;
+
+ String parserType = config.getString(CONFIG_ENTITY_PARSER,
+ DEFAULT_ENTITY_PARSER);
+
+ if (parserType.equals(AVRO_ENTITY_PARSER)) {
+ parser = new AvroParser.Builder().build(datasetSchema, config);
+ } else {
+
+ Class extends EntityParser.Builder> builderClass;
+ Class c;
+ try {
+ c = Class.forName(parserType);
+ } catch (ClassNotFoundException ex) {
+ throw new IllegalArgumentException("EntityParser.Builder class "
+ + parserType + " not found. Must set " + CONFIG_ENTITY_PARSER
+ + " to a class that implements EntityParser.Builder or to a builtin"
+ + " parser: " + Arrays.toString(AVAILABLE_PARSERS), ex);
+ }
+
+ if (c != null && EntityParser.Builder.class.isAssignableFrom(c)) {
+ builderClass = c;
+ } else {
+ throw new IllegalArgumentException("Class " + parserType + " does not"
+ + " implement EntityParser.Builder. Must set "
+ + CONFIG_ENTITY_PARSER + " to a class that extends"
+ + " EntityParser.Builder or to a builtin parser: "
+ + Arrays.toString(AVAILABLE_PARSERS));
+ }
+
+ EntityParser.Builder builder;
+ try {
+ builder = builderClass.newInstance();
+ } catch (InstantiationException ex) {
+ throw new IllegalArgumentException("Can't instantiate class "
+ + parserType + ". Must set " + CONFIG_ENTITY_PARSER + " to a class"
+ + " that extends EntityParser.Builder or to a builtin parser: "
+ + Arrays.toString(AVAILABLE_PARSERS), ex);
+ } catch (IllegalAccessException ex) {
+ throw new IllegalArgumentException("Can't instantiate class "
+ + parserType + ". Must set " + CONFIG_ENTITY_PARSER + " to a class"
+ + " that extends EntityParser.Builder or to a builtin parser: "
+ + Arrays.toString(AVAILABLE_PARSERS), ex);
+ }
+
+ parser = builder.build(datasetSchema, config);
+ }
+
+ return parser;
+ }
+}
diff --git a/code/flume-ng-sinks/flume-dataset-sink/src/main/java/org/apache/flume/sink/kite/policy/FailurePolicy.java b/code/flume-ng-sinks/flume-dataset-sink/src/main/java/org/apache/flume/sink/kite/policy/FailurePolicy.java
new file mode 100644
index 0000000..f6f875a
--- /dev/null
+++ b/code/flume-ng-sinks/flume-dataset-sink/src/main/java/org/apache/flume/sink/kite/policy/FailurePolicy.java
@@ -0,0 +1,105 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flume.sink.kite.policy;
+
+import org.apache.flume.Context;
+import org.apache.flume.Event;
+import org.apache.flume.EventDeliveryException;
+import org.apache.flume.sink.kite.DatasetSink;
+import org.kitesdk.data.Syncable;
+
+/**
+ * A policy for dealing with non-recoverable event delivery failures.
+ *
+ * Non-recoverable event delivery failures include:
+ *
+ * 1. Error parsing the event body thrown from the {@link EntityParser}
+ * 2. A schema mismatch between the schema of an event and the schema of the
+ * destination dataset.
+ * 3. A missing schema from the Event header when using the
+ * {@link AvroEntityParser}.
+ *
+ * The life cycle of a FailurePolicy mimics the life cycle of the
+ * {@link DatasetSink#writer}:
+ *
+ * 1. When a new writer is created, the policy will be instantiated.
+ * 2. As Event failures happen,
+ * {@link #handle(org.apache.flume.Event, java.lang.Throwable)} will be
+ * called to let the policy handle the failure.
+ * 3. If the {@link DatasetSink} is configured to commit on batch, then the
+ * {@link #sync()} method will be called when the batch is committed.
+ * 4. When the writer is closed, the policy's {@link #close()} method will be
+ * called.
+ */
+public interface FailurePolicy {
+
+ /**
+ * Handle a non-recoverable event.
+ *
+ * @param event The event
+ * @param cause The cause of the failure
+ * @throws EventDeliveryException The policy failed to handle the event. When
+ * this is thrown, the Flume transaction will
+ * be rolled back and the event will be retried
+ * along with the rest of the batch.
+ */
+ public void handle(Event event, Throwable cause)
+ throws EventDeliveryException;
+
+ /**
+ * Ensure any handled events are on stable storage.
+ *
+ * This allows the policy implementation to sync any data that it may not
+ * have fully handled.
+ *
+ * See {@link Syncable#sync()}.
+ *
+ * @throws EventDeliveryException The policy failed while syncing data.
+ * When this is thrown, the Flume transaction
+ * will be rolled back and the batch will be
+ * retried.
+ */
+ public void sync() throws EventDeliveryException;
+
+ /**
+ * Close this FailurePolicy and release any resources.
+ *
+ * @throws EventDeliveryException The policy failed while closing resources.
+ * When this is thrown, the Flume transaction
+ * will be rolled back and the batch will be
+ * retried.
+ */
+ public void close() throws EventDeliveryException;
+
+ /**
+ * Knows how to build {@code FailurePolicy}s. Implementers must provide a
+ * no-arg constructor.
+ */
+ public static interface Builder {
+
+ /**
+ * Build a new {@code FailurePolicy}
+ *
+ * @param config The Flume configuration context
+ * @return The {@code FailurePolicy}
+ */
+ FailurePolicy build(Context config);
+ }
+
+}
diff --git a/code/flume-ng-sinks/flume-dataset-sink/src/main/java/org/apache/flume/sink/kite/policy/FailurePolicyFactory.java b/code/flume-ng-sinks/flume-dataset-sink/src/main/java/org/apache/flume/sink/kite/policy/FailurePolicyFactory.java
new file mode 100644
index 0000000..d3b1fe8
--- /dev/null
+++ b/code/flume-ng-sinks/flume-dataset-sink/src/main/java/org/apache/flume/sink/kite/policy/FailurePolicyFactory.java
@@ -0,0 +1,81 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flume.sink.kite.policy;
+
+import java.util.Arrays;
+import org.apache.flume.Context;
+
+import static org.apache.flume.sink.kite.DatasetSinkConstants.*;
+
+public class FailurePolicyFactory {
+
+ public FailurePolicy newPolicy(Context config) {
+ FailurePolicy policy;
+
+ String policyType = config.getString(CONFIG_FAILURE_POLICY,
+ DEFAULT_FAILURE_POLICY);
+
+ if (policyType.equals(RETRY_FAILURE_POLICY)) {
+ policy = new RetryPolicy.Builder().build(config);
+ } else if (policyType.equals(SAVE_FAILURE_POLICY)) {
+ policy = new SavePolicy.Builder().build(config);
+ } else {
+
+ Class extends FailurePolicy.Builder> builderClass;
+ Class c;
+ try {
+ c = Class.forName(policyType);
+ } catch (ClassNotFoundException ex) {
+ throw new IllegalArgumentException("FailurePolicy.Builder class "
+ + policyType + " not found. Must set " + CONFIG_FAILURE_POLICY
+ + " to a class that implements FailurePolicy.Builder or to a builtin"
+ + " policy: " + Arrays.toString(AVAILABLE_POLICIES), ex);
+ }
+
+ if (c != null && FailurePolicy.Builder.class.isAssignableFrom(c)) {
+ builderClass = c;
+ } else {
+ throw new IllegalArgumentException("Class " + policyType + " does not"
+ + " implement FailurePolicy.Builder. Must set "
+ + CONFIG_FAILURE_POLICY + " to a class that extends"
+ + " FailurePolicy.Builder or to a builtin policy: "
+ + Arrays.toString(AVAILABLE_POLICIES));
+ }
+
+ FailurePolicy.Builder builder;
+ try {
+ builder = builderClass.newInstance();
+ } catch (InstantiationException ex) {
+ throw new IllegalArgumentException("Can't instantiate class "
+ + policyType + ". Must set " + CONFIG_FAILURE_POLICY + " to a class"
+ + " that extends FailurePolicy.Builder or to a builtin policy: "
+ + Arrays.toString(AVAILABLE_POLICIES), ex);
+ } catch (IllegalAccessException ex) {
+ throw new IllegalArgumentException("Can't instantiate class "
+ + policyType + ". Must set " + CONFIG_FAILURE_POLICY + " to a class"
+ + " that extends FailurePolicy.Builder or to a builtin policy: "
+ + Arrays.toString(AVAILABLE_POLICIES), ex);
+ }
+
+ policy = builder.build(config);
+ }
+
+ return policy;
+ }
+}
diff --git a/code/flume-ng-sinks/flume-dataset-sink/src/main/java/org/apache/flume/sink/kite/policy/RetryPolicy.java b/code/flume-ng-sinks/flume-dataset-sink/src/main/java/org/apache/flume/sink/kite/policy/RetryPolicy.java
new file mode 100644
index 0000000..9a4991c
--- /dev/null
+++ b/code/flume-ng-sinks/flume-dataset-sink/src/main/java/org/apache/flume/sink/kite/policy/RetryPolicy.java
@@ -0,0 +1,63 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flume.sink.kite.policy;
+
+import org.apache.flume.Context;
+import org.apache.flume.Event;
+import org.apache.flume.EventDeliveryException;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * A failure policy that logs the error and then forces a retry by throwing
+ * {@link EventDeliveryException}.
+ */
+public class RetryPolicy implements FailurePolicy {
+ private static final Logger LOG = LoggerFactory.getLogger(RetryPolicy.class);
+
+ private RetryPolicy() {
+ }
+
+ @Override
+ public void handle(Event event, Throwable cause) throws EventDeliveryException {
+ LOG.error("Event delivery failed: " + cause.getLocalizedMessage());
+ LOG.debug("Exception follows.", cause);
+
+ throw new EventDeliveryException(cause);
+ }
+
+ @Override
+ public void sync() throws EventDeliveryException {
+ // do nothing
+ }
+
+ @Override
+ public void close() throws EventDeliveryException {
+ // do nothing
+ }
+
+ public static class Builder implements FailurePolicy.Builder {
+
+ @Override
+ public FailurePolicy build(Context config) {
+ return new RetryPolicy();
+ }
+
+ }
+}
diff --git a/code/flume-ng-sinks/flume-dataset-sink/src/main/java/org/apache/flume/sink/kite/policy/SavePolicy.java b/code/flume-ng-sinks/flume-dataset-sink/src/main/java/org/apache/flume/sink/kite/policy/SavePolicy.java
new file mode 100644
index 0000000..bd537ec
--- /dev/null
+++ b/code/flume-ng-sinks/flume-dataset-sink/src/main/java/org/apache/flume/sink/kite/policy/SavePolicy.java
@@ -0,0 +1,128 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flume.sink.kite.policy;
+
+import com.google.common.base.Preconditions;
+import com.google.common.collect.Maps;
+import java.nio.ByteBuffer;
+import java.util.Map;
+import org.apache.flume.Context;
+import org.apache.flume.Event;
+import org.apache.flume.EventDeliveryException;
+import org.apache.flume.source.avro.AvroFlumeEvent;
+import org.kitesdk.data.DatasetDescriptor;
+import org.kitesdk.data.DatasetWriter;
+import org.kitesdk.data.Datasets;
+import org.kitesdk.data.Formats;
+import org.kitesdk.data.Syncable;
+import org.kitesdk.data.View;
+
+import static org.apache.flume.sink.kite.DatasetSinkConstants.*;
+
+/**
+ * A failure policy that writes the raw Flume event to a Kite dataset.
+ */
+public class SavePolicy implements FailurePolicy {
+
+ private final View dataset;
+ private DatasetWriter writer;
+ private int nEventsHandled;
+
+ private SavePolicy(Context context) {
+ String uri = context.getString(CONFIG_KITE_ERROR_DATASET_URI);
+ Preconditions.checkArgument(uri != null, "Must set "
+ + CONFIG_KITE_ERROR_DATASET_URI + " when " + CONFIG_FAILURE_POLICY
+ + "=save");
+ if (Datasets.exists(uri)) {
+ dataset = Datasets.load(uri, AvroFlumeEvent.class);
+ } else {
+ DatasetDescriptor descriptor = new DatasetDescriptor.Builder()
+ .schema(AvroFlumeEvent.class)
+ .build();
+ dataset = Datasets.create(uri, descriptor, AvroFlumeEvent.class);
+ }
+
+ nEventsHandled = 0;
+ }
+
+ @Override
+ public void handle(Event event, Throwable cause) throws EventDeliveryException {
+ try {
+ if (writer == null) {
+ writer = dataset.newWriter();
+ }
+
+ final AvroFlumeEvent avroEvent = new AvroFlumeEvent();
+ avroEvent.setBody(ByteBuffer.wrap(event.getBody()));
+ avroEvent.setHeaders(toCharSeqMap(event.getHeaders()));
+
+ writer.write(avroEvent);
+ nEventsHandled++;
+ } catch (RuntimeException ex) {
+ throw new EventDeliveryException(ex);
+ }
+ }
+
+ @Override
+ public void sync() throws EventDeliveryException {
+ if (nEventsHandled > 0) {
+ if (Formats.PARQUET.equals(
+ dataset.getDataset().getDescriptor().getFormat())) {
+ // We need to close the writer on sync if we're writing to a Parquet
+ // dataset
+ close();
+ } else {
+ if (writer instanceof Syncable) {
+ ((Syncable) writer).sync();
+ }
+ }
+ }
+ }
+
+ @Override
+ public void close() throws EventDeliveryException {
+ if (nEventsHandled > 0) {
+ try {
+ writer.close();
+ } catch (RuntimeException ex) {
+ throw new EventDeliveryException(ex);
+ } finally {
+ writer = null;
+ nEventsHandled = 0;
+ }
+ }
+ }
+
+ /**
+ * Helper function to convert a map of String to a map of CharSequence.
+ */
+ private static Map toCharSeqMap(
+ Map map) {
+ return Maps.newHashMap(map);
+ }
+
+ public static class Builder implements FailurePolicy.Builder {
+
+ @Override
+ public FailurePolicy build(Context config) {
+ return new SavePolicy(config);
+ }
+
+ }
+}
diff --git a/code/flume-ng-sinks/flume-dataset-sink/src/test/java/org/apache/flume/sink/kite/TestDatasetSink.java b/code/flume-ng-sinks/flume-dataset-sink/src/test/java/org/apache/flume/sink/kite/TestDatasetSink.java
new file mode 100644
index 0000000..3709577
--- /dev/null
+++ b/code/flume-ng-sinks/flume-dataset-sink/src/test/java/org/apache/flume/sink/kite/TestDatasetSink.java
@@ -0,0 +1,1036 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flume.sink.kite;
+
+import com.google.common.base.Function;
+import com.google.common.base.Throwables;
+import com.google.common.collect.Iterables;
+import com.google.common.collect.Lists;
+import com.google.common.collect.Maps;
+import com.google.common.collect.Sets;
+import org.apache.avro.Schema;
+import org.apache.avro.file.DataFileWriter;
+import org.apache.avro.generic.GenericData;
+import org.apache.avro.generic.GenericRecord;
+import org.apache.avro.generic.GenericRecordBuilder;
+import org.apache.avro.io.Encoder;
+import org.apache.avro.io.EncoderFactory;
+import org.apache.avro.reflect.ReflectDatumWriter;
+import org.apache.avro.util.Utf8;
+import org.apache.commons.io.FileUtils;
+import org.apache.flume.Channel;
+import org.apache.flume.Context;
+import org.apache.flume.Event;
+import org.apache.flume.EventDeliveryException;
+import org.apache.flume.Transaction;
+import org.apache.flume.channel.MemoryChannel;
+import org.apache.flume.conf.Configurables;
+import org.apache.flume.event.SimpleEvent;
+import org.apache.flume.sink.kite.parser.EntityParser;
+import org.apache.flume.sink.kite.policy.FailurePolicy;
+import org.apache.flume.source.avro.AvroFlumeEvent;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.hdfs.MiniDFSCluster;
+import org.junit.After;
+import org.junit.AfterClass;
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.BeforeClass;
+import org.junit.Test;
+import org.kitesdk.data.Dataset;
+import org.kitesdk.data.DatasetDescriptor;
+import org.kitesdk.data.DatasetReader;
+import org.kitesdk.data.DatasetWriter;
+import org.kitesdk.data.Datasets;
+import org.kitesdk.data.PartitionStrategy;
+import org.kitesdk.data.View;
+
+import javax.annotation.Nullable;
+import java.io.ByteArrayOutputStream;
+import java.io.File;
+import java.io.FileWriter;
+import java.io.IOException;
+import java.net.URI;
+import java.nio.ByteBuffer;
+import java.util.Arrays;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.concurrent.Callable;
+
+import static org.mockito.Mockito.any;
+import static org.mockito.Mockito.doThrow;
+import static org.mockito.Mockito.eq;
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.never;
+import static org.mockito.Mockito.verify;
+import static org.mockito.Mockito.when;
+
+public class TestDatasetSink {
+
+ public static final String FILE_REPO_URI = "repo:file:target/test_repo";
+ public static final String DATASET_NAME = "test";
+ public static final String FILE_DATASET_URI =
+ "dataset:file:target/test_repo/" + DATASET_NAME;
+ public static final String ERROR_DATASET_URI =
+ "dataset:file:target/test_repo/failed_events";
+ public static final File SCHEMA_FILE = new File("target/record-schema.avsc");
+ public static final Schema RECORD_SCHEMA = new Schema.Parser().parse(
+ "{\"type\":\"record\",\"name\":\"rec\",\"fields\":[" +
+ "{\"name\":\"id\",\"type\":\"string\"}," +
+ "{\"name\":\"msg\",\"type\":[\"string\",\"null\"]," +
+ "\"default\":\"default\"}]}");
+ public static final Schema COMPATIBLE_SCHEMA = new Schema.Parser().parse(
+ "{\"type\":\"record\",\"name\":\"rec\",\"fields\":[" +
+ "{\"name\":\"id\",\"type\":\"string\"}]}");
+ public static final Schema INCOMPATIBLE_SCHEMA = new Schema.Parser().parse(
+ "{\"type\":\"record\",\"name\":\"user\",\"fields\":[" +
+ "{\"name\":\"username\",\"type\":\"string\"}]}");
+ public static final Schema UPDATED_SCHEMA = new Schema.Parser().parse(
+ "{\"type\":\"record\",\"name\":\"rec\",\"fields\":[" +
+ "{\"name\":\"id\",\"type\":\"string\"}," +
+ "{\"name\":\"priority\",\"type\":\"int\", \"default\": 0}," +
+ "{\"name\":\"msg\",\"type\":[\"string\",\"null\"]," +
+ "\"default\":\"default\"}]}");
+ public static final DatasetDescriptor DESCRIPTOR = new DatasetDescriptor
+ .Builder()
+ .schema(RECORD_SCHEMA)
+ .build();
+
+ Context config = null;
+ Channel in = null;
+ List expected = null;
+ private static final String DFS_DIR = "target/test/dfs";
+ private static final String TEST_BUILD_DATA_KEY = "test.build.data";
+ private static String oldTestBuildDataProp = null;
+
+ @BeforeClass
+ public static void saveSchema() throws IOException {
+ oldTestBuildDataProp = System.getProperty(TEST_BUILD_DATA_KEY);
+ System.setProperty(TEST_BUILD_DATA_KEY, DFS_DIR);
+ FileWriter schema = new FileWriter(SCHEMA_FILE);
+ schema.append(RECORD_SCHEMA.toString());
+ schema.close();
+ }
+
+ @AfterClass
+ public static void tearDownClass() {
+ FileUtils.deleteQuietly(new File(DFS_DIR));
+ if (oldTestBuildDataProp != null) {
+ System.setProperty(TEST_BUILD_DATA_KEY, oldTestBuildDataProp);
+ }
+ }
+
+ @Before
+ public void setup() throws EventDeliveryException {
+ Datasets.delete(FILE_DATASET_URI);
+ Datasets.create(FILE_DATASET_URI, DESCRIPTOR);
+
+ this.config = new Context();
+ config.put("keep-alive", "0");
+ this.in = new MemoryChannel();
+ Configurables.configure(in, config);
+
+ config.put(DatasetSinkConstants.CONFIG_KITE_DATASET_URI, FILE_DATASET_URI);
+
+ GenericRecordBuilder builder = new GenericRecordBuilder(RECORD_SCHEMA);
+ expected = Lists.newArrayList(
+ builder.set("id", "1").set("msg", "msg1").build(),
+ builder.set("id", "2").set("msg", "msg2").build(),
+ builder.set("id", "3").set("msg", "msg3").build());
+
+ putToChannel(in, Iterables.transform(expected,
+ new Function() {
+ private int i = 0;
+
+ @Override
+ public Event apply(@Nullable GenericRecord rec) {
+ this.i += 1;
+ boolean useURI = (i % 2) == 0;
+ return event(rec, RECORD_SCHEMA, SCHEMA_FILE, useURI);
+ }
+ }));
+ }
+
+ @After
+ public void teardown() {
+ Datasets.delete(FILE_DATASET_URI);
+ }
+
+ @Test
+ public void testOldConfig() throws EventDeliveryException {
+ config.put(DatasetSinkConstants.CONFIG_KITE_DATASET_URI, null);
+ config.put(DatasetSinkConstants.CONFIG_KITE_REPO_URI, FILE_REPO_URI);
+ config.put(DatasetSinkConstants.CONFIG_KITE_DATASET_NAME, DATASET_NAME);
+
+ DatasetSink sink = sink(in, config);
+
+ // run the sink
+ sink.start();
+ sink.process();
+ sink.stop();
+
+ Assert.assertEquals(
+ Sets.newHashSet(expected),
+ read(Datasets.load(FILE_DATASET_URI)));
+ Assert.assertEquals("Should have committed", 0, remaining(in));
+ }
+
+ @Test
+ public void testDatasetUriOverridesOldConfig() throws EventDeliveryException {
+ // CONFIG_KITE_DATASET_URI is still set, otherwise this will cause an error
+ config.put(DatasetSinkConstants.CONFIG_KITE_REPO_URI, "bad uri");
+ config.put(DatasetSinkConstants.CONFIG_KITE_DATASET_NAME, "");
+
+ DatasetSink sink = sink(in, config);
+
+ // run the sink
+ sink.start();
+ sink.process();
+ sink.stop();
+
+ Assert.assertEquals(
+ Sets.newHashSet(expected),
+ read(Datasets.load(FILE_DATASET_URI)));
+ Assert.assertEquals("Should have committed", 0, remaining(in));
+ }
+
+ @Test
+ public void testFileStore()
+ throws EventDeliveryException, NonRecoverableEventException, NonRecoverableEventException {
+ DatasetSink sink = sink(in, config);
+
+ // run the sink
+ sink.start();
+ sink.process();
+ sink.stop();
+
+ Assert.assertEquals(
+ Sets.newHashSet(expected),
+ read(Datasets.load(FILE_DATASET_URI)));
+ Assert.assertEquals("Should have committed", 0, remaining(in));
+ }
+
+ @Test
+ public void testParquetDataset() throws EventDeliveryException {
+ Datasets.delete(FILE_DATASET_URI);
+ Dataset created = Datasets.create(FILE_DATASET_URI,
+ new DatasetDescriptor.Builder(DESCRIPTOR)
+ .format("parquet")
+ .build());
+
+ DatasetSink sink = sink(in, config);
+
+ // run the sink
+ sink.start();
+ sink.process();
+
+ // the transaction should not commit during the call to process
+ assertThrows("Transaction should still be open", IllegalStateException.class,
+ new Callable() {
+ @Override
+ public Object call() throws EventDeliveryException {
+ in.getTransaction().begin();
+ return null;
+ }
+ });
+ // The records won't commit until the call to stop()
+ Assert.assertEquals("Should not have committed", 0, read(created).size());
+
+ sink.stop();
+
+ Assert.assertEquals(Sets.newHashSet(expected), read(created));
+ Assert.assertEquals("Should have committed", 0, remaining(in));
+ }
+
+ @Test
+ public void testPartitionedData() throws EventDeliveryException {
+ URI partitionedUri = URI.create("dataset:file:target/test_repo/partitioned");
+ try {
+ Datasets.create(partitionedUri, new DatasetDescriptor.Builder(DESCRIPTOR)
+ .partitionStrategy(new PartitionStrategy.Builder()
+ .identity("id", 10) // partition by id
+ .build())
+ .build());
+
+ config.put(DatasetSinkConstants.CONFIG_KITE_DATASET_URI,
+ partitionedUri.toString());
+ DatasetSink sink = sink(in, config);
+
+ // run the sink
+ sink.start();
+ sink.process();
+ sink.stop();
+
+ Assert.assertEquals(
+ Sets.newHashSet(expected),
+ read(Datasets.load(partitionedUri)));
+ Assert.assertEquals("Should have committed", 0, remaining(in));
+ } finally {
+ if (Datasets.exists(partitionedUri)) {
+ Datasets.delete(partitionedUri);
+ }
+ }
+ }
+
+ @Test
+ public void testStartBeforeDatasetCreated() throws EventDeliveryException {
+ // delete the dataset created by setup
+ Datasets.delete(FILE_DATASET_URI);
+
+ DatasetSink sink = sink(in, config);
+
+ // start the sink
+ sink.start();
+
+ // run the sink without a target dataset
+ try {
+ sink.process();
+ Assert.fail("Should have thrown an exception: no such dataset");
+ } catch (EventDeliveryException e) {
+ // expected
+ }
+
+ // create the target dataset
+ Datasets.create(FILE_DATASET_URI, DESCRIPTOR);
+
+ // run the sink
+ sink.process();
+ sink.stop();
+
+ Assert.assertEquals(Sets.newHashSet(expected), read(Datasets.load(FILE_DATASET_URI)));
+ Assert.assertEquals("Should have committed", 0, remaining(in));
+ }
+
+ @Test
+ public void testDatasetUpdate() throws EventDeliveryException {
+ // add an updated record that is missing the msg field
+ GenericRecordBuilder updatedBuilder = new GenericRecordBuilder(UPDATED_SCHEMA);
+ GenericData.Record updatedRecord = updatedBuilder
+ .set("id", "0")
+ .set("priority", 1)
+ .set("msg", "Priority 1 message!")
+ .build();
+
+ // make a set of the expected records with the new schema
+ Set expectedAsUpdated = Sets.newHashSet();
+ for (GenericRecord record : expected) {
+ expectedAsUpdated.add(updatedBuilder
+ .clear("priority")
+ .set("id", record.get("id"))
+ .set("msg", record.get("msg"))
+ .build());
+ }
+ expectedAsUpdated.add(updatedRecord);
+
+ DatasetSink sink = sink(in, config);
+
+ // run the sink
+ sink.start();
+ sink.process();
+
+ // update the dataset's schema
+ DatasetDescriptor updated = new DatasetDescriptor
+ .Builder(Datasets.load(FILE_DATASET_URI).getDataset().getDescriptor())
+ .schema(UPDATED_SCHEMA)
+ .build();
+ Datasets.update(FILE_DATASET_URI, updated);
+
+ // trigger a roll on the next process call to refresh the writer
+ sink.roll();
+
+ // add the record to the incoming channel and the expected list
+ putToChannel(in, event(updatedRecord, UPDATED_SCHEMA, null, false));
+
+ // process events with the updated schema
+ sink.process();
+ sink.stop();
+
+ Assert.assertEquals(expectedAsUpdated, read(Datasets.load(FILE_DATASET_URI)));
+ Assert.assertEquals("Should have committed", 0, remaining(in));
+ }
+
+ @Test
+ public void testMiniClusterStore() throws EventDeliveryException, IOException {
+ // setup a minicluster
+ MiniDFSCluster cluster = new MiniDFSCluster
+ .Builder(new Configuration())
+ .build();
+
+ FileSystem dfs = cluster.getFileSystem();
+ Configuration conf = dfs.getConf();
+
+ URI hdfsUri = URI.create(
+ "dataset:" + conf.get("fs.defaultFS") + "/tmp/repo" + DATASET_NAME);
+ try {
+ // create a repository and dataset in HDFS
+ Datasets.create(hdfsUri, DESCRIPTOR);
+
+ // update the config to use the HDFS repository
+ config.put(DatasetSinkConstants.CONFIG_KITE_DATASET_URI, hdfsUri.toString());
+
+ DatasetSink sink = sink(in, config);
+
+ // run the sink
+ sink.start();
+ sink.process();
+ sink.stop();
+
+ Assert.assertEquals(
+ Sets.newHashSet(expected),
+ read(Datasets.load(hdfsUri)));
+ Assert.assertEquals("Should have committed", 0, remaining(in));
+
+ } finally {
+ if (Datasets.exists(hdfsUri)) {
+ Datasets.delete(hdfsUri);
+ }
+ cluster.shutdown();
+ }
+ }
+
+ @Test
+ public void testBatchSize() throws EventDeliveryException {
+ DatasetSink sink = sink(in, config);
+
+ // release one record per process call
+ config.put("kite.batchSize", "2");
+ Configurables.configure(sink, config);
+
+ sink.start();
+ sink.process(); // process the first and second
+ sink.roll(); // roll at the next process call
+ sink.process(); // roll and process the third
+ Assert.assertEquals(
+ Sets.newHashSet(expected.subList(0, 2)),
+ read(Datasets.load(FILE_DATASET_URI)));
+ Assert.assertEquals("Should have committed", 0, remaining(in));
+ sink.roll(); // roll at the next process call
+ sink.process(); // roll, the channel is empty
+ Assert.assertEquals(
+ Sets.newHashSet(expected),
+ read(Datasets.load(FILE_DATASET_URI)));
+ sink.stop();
+ }
+
+ @Test
+ public void testTimedFileRolling()
+ throws EventDeliveryException, InterruptedException {
+ // use a new roll interval
+ config.put("kite.rollInterval", "1"); // in seconds
+
+ DatasetSink sink = sink(in, config);
+
+ Dataset records = Datasets.load(FILE_DATASET_URI);
+
+ // run the sink
+ sink.start();
+ sink.process();
+
+ Assert.assertEquals("Should have committed", 0, remaining(in));
+
+ Thread.sleep(1100); // sleep longer than the roll interval
+ sink.process(); // rolling happens in the process method
+
+ Assert.assertEquals(Sets.newHashSet(expected), read(records));
+
+ // wait until the end to stop because it would close the files
+ sink.stop();
+ }
+
+ @Test
+ public void testCompatibleSchemas() throws EventDeliveryException {
+ DatasetSink sink = sink(in, config);
+
+ // add a compatible record that is missing the msg field
+ GenericRecordBuilder compatBuilder = new GenericRecordBuilder(
+ COMPATIBLE_SCHEMA);
+ GenericData.Record compatibleRecord = compatBuilder.set("id", "0").build();
+
+ // add the record to the incoming channel
+ putToChannel(in, event(compatibleRecord, COMPATIBLE_SCHEMA, null, false));
+
+ // the record will be read using the real schema, so create the expected
+ // record using it, but without any data
+
+ GenericRecordBuilder builder = new GenericRecordBuilder(RECORD_SCHEMA);
+ GenericData.Record expectedRecord = builder.set("id", "0").build();
+ expected.add(expectedRecord);
+
+ // run the sink
+ sink.start();
+ sink.process();
+ sink.stop();
+
+ Assert.assertEquals(
+ Sets.newHashSet(expected),
+ read(Datasets.load(FILE_DATASET_URI)));
+ Assert.assertEquals("Should have committed", 0, remaining(in));
+ }
+
+ @Test
+ public void testIncompatibleSchemas() throws EventDeliveryException {
+ final DatasetSink sink = sink(in, config);
+
+ GenericRecordBuilder builder = new GenericRecordBuilder(
+ INCOMPATIBLE_SCHEMA);
+ GenericData.Record rec = builder.set("username", "koala").build();
+ putToChannel(in, event(rec, INCOMPATIBLE_SCHEMA, null, false));
+
+ // run the sink
+ sink.start();
+ assertThrows("Should fail", EventDeliveryException.class,
+ new Callable() {
+ @Override
+ public Object call() throws EventDeliveryException {
+ sink.process();
+ return null;
+ }
+ });
+ sink.stop();
+
+ Assert.assertEquals("Should have rolled back",
+ expected.size() + 1, remaining(in));
+ }
+
+ @Test
+ public void testMissingSchema() throws EventDeliveryException {
+ final DatasetSink sink = sink(in, config);
+
+ Event badEvent = new SimpleEvent();
+ badEvent.setHeaders(Maps.newHashMap());
+ badEvent.setBody(serialize(expected.get(0), RECORD_SCHEMA));
+ putToChannel(in, badEvent);
+
+ // run the sink
+ sink.start();
+ assertThrows("Should fail", EventDeliveryException.class,
+ new Callable() {
+ @Override
+ public Object call() throws EventDeliveryException {
+ sink.process();
+ return null;
+ }
+ });
+ sink.stop();
+
+ Assert.assertEquals("Should have rolled back",
+ expected.size() + 1, remaining(in));
+ }
+
+ @Test
+ public void testFileStoreWithSavePolicy() throws EventDeliveryException {
+ if (Datasets.exists(ERROR_DATASET_URI)) {
+ Datasets.delete(ERROR_DATASET_URI);
+ }
+ config.put(DatasetSinkConstants.CONFIG_FAILURE_POLICY,
+ DatasetSinkConstants.SAVE_FAILURE_POLICY);
+ config.put(DatasetSinkConstants.CONFIG_KITE_ERROR_DATASET_URI,
+ ERROR_DATASET_URI);
+ DatasetSink sink = sink(in, config);
+
+ // run the sink
+ sink.start();
+ sink.process();
+ sink.stop();
+
+ Assert.assertEquals(
+ Sets.newHashSet(expected),
+ read(Datasets.load(FILE_DATASET_URI)));
+ Assert.assertEquals("Should have committed", 0, remaining(in));
+ }
+
+ @Test
+ public void testMissingSchemaWithSavePolicy() throws EventDeliveryException {
+ if (Datasets.exists(ERROR_DATASET_URI)) {
+ Datasets.delete(ERROR_DATASET_URI);
+ }
+ config.put(DatasetSinkConstants.CONFIG_FAILURE_POLICY,
+ DatasetSinkConstants.SAVE_FAILURE_POLICY);
+ config.put(DatasetSinkConstants.CONFIG_KITE_ERROR_DATASET_URI,
+ ERROR_DATASET_URI);
+ final DatasetSink sink = sink(in, config);
+
+ Event badEvent = new SimpleEvent();
+ badEvent.setHeaders(Maps.newHashMap());
+ badEvent.setBody(serialize(expected.get(0), RECORD_SCHEMA));
+ putToChannel(in, badEvent);
+
+ // run the sink
+ sink.start();
+ sink.process();
+ sink.stop();
+
+ Assert.assertEquals("Good records should have been written",
+ Sets.newHashSet(expected),
+ read(Datasets.load(FILE_DATASET_URI)));
+ Assert.assertEquals("Should not have rolled back", 0, remaining(in));
+ Assert.assertEquals("Should have saved the bad event",
+ Sets.newHashSet(AvroFlumeEvent.newBuilder()
+ .setBody(ByteBuffer.wrap(badEvent.getBody()))
+ .setHeaders(toUtf8Map(badEvent.getHeaders()))
+ .build()),
+ read(Datasets.load(ERROR_DATASET_URI, AvroFlumeEvent.class)));
+ }
+
+ @Test
+ public void testSerializedWithIncompatibleSchemasWithSavePolicy()
+ throws EventDeliveryException {
+ if (Datasets.exists(ERROR_DATASET_URI)) {
+ Datasets.delete(ERROR_DATASET_URI);
+ }
+ config.put(DatasetSinkConstants.CONFIG_FAILURE_POLICY,
+ DatasetSinkConstants.SAVE_FAILURE_POLICY);
+ config.put(DatasetSinkConstants.CONFIG_KITE_ERROR_DATASET_URI,
+ ERROR_DATASET_URI);
+ final DatasetSink sink = sink(in, config);
+
+ GenericRecordBuilder builder = new GenericRecordBuilder(
+ INCOMPATIBLE_SCHEMA);
+ GenericData.Record rec = builder.set("username", "koala").build();
+
+ // We pass in a valid schema in the header, but an incompatible schema
+ // was used to serialize the record
+ Event badEvent = event(rec, INCOMPATIBLE_SCHEMA, SCHEMA_FILE, true);
+ putToChannel(in, badEvent);
+
+ // run the sink
+ sink.start();
+ sink.process();
+ sink.stop();
+
+ Assert.assertEquals("Good records should have been written",
+ Sets.newHashSet(expected),
+ read(Datasets.load(FILE_DATASET_URI)));
+ Assert.assertEquals("Should not have rolled back", 0, remaining(in));
+ Assert.assertEquals("Should have saved the bad event",
+ Sets.newHashSet(AvroFlumeEvent.newBuilder()
+ .setBody(ByteBuffer.wrap(badEvent.getBody()))
+ .setHeaders(toUtf8Map(badEvent.getHeaders()))
+ .build()),
+ read(Datasets.load(ERROR_DATASET_URI, AvroFlumeEvent.class)));
+ }
+
+ @Test
+ public void testSerializedWithIncompatibleSchemas() throws EventDeliveryException {
+ final DatasetSink sink = sink(in, config);
+
+ GenericRecordBuilder builder = new GenericRecordBuilder(
+ INCOMPATIBLE_SCHEMA);
+ GenericData.Record rec = builder.set("username", "koala").build();
+
+ // We pass in a valid schema in the header, but an incompatible schema
+ // was used to serialize the record
+ putToChannel(in, event(rec, INCOMPATIBLE_SCHEMA, SCHEMA_FILE, true));
+
+ // run the sink
+ sink.start();
+ assertThrows("Should fail", EventDeliveryException.class,
+ new Callable() {
+ @Override
+ public Object call() throws EventDeliveryException {
+ sink.process();
+ return null;
+ }
+ });
+ sink.stop();
+
+ Assert.assertEquals("Should have rolled back",
+ expected.size() + 1, remaining(in));
+ }
+
+ @Test
+ public void testCommitOnBatch() throws EventDeliveryException {
+ DatasetSink sink = sink(in, config);
+
+ // run the sink
+ sink.start();
+ sink.process();
+
+ // the transaction should commit during the call to process
+ Assert.assertEquals("Should have committed", 0, remaining(in));
+ // but the data won't be visible yet
+ Assert.assertEquals(0,
+ read(Datasets.load(FILE_DATASET_URI)).size());
+
+ sink.stop();
+
+ Assert.assertEquals(
+ Sets.newHashSet(expected),
+ read(Datasets.load(FILE_DATASET_URI)));
+ }
+
+ @Test
+ public void testCommitOnBatchFalse() throws EventDeliveryException {
+ config.put(DatasetSinkConstants.CONFIG_FLUSHABLE_COMMIT_ON_BATCH,
+ Boolean.toString(false));
+ config.put(DatasetSinkConstants.CONFIG_SYNCABLE_SYNC_ON_BATCH,
+ Boolean.toString(false));
+ DatasetSink sink = sink(in, config);
+
+ // run the sink
+ sink.start();
+ sink.process();
+
+ // the transaction should not commit during the call to process
+ assertThrows("Transaction should still be open", IllegalStateException.class,
+ new Callable() {
+ @Override
+ public Object call() throws EventDeliveryException {
+ in.getTransaction().begin();
+ return null;
+ }
+ });
+
+ // the data won't be visible
+ Assert.assertEquals(0,
+ read(Datasets.load(FILE_DATASET_URI)).size());
+
+ sink.stop();
+
+ Assert.assertEquals(
+ Sets.newHashSet(expected),
+ read(Datasets.load(FILE_DATASET_URI)));
+ // the transaction should commit during the call to stop
+ Assert.assertEquals("Should have committed", 0, remaining(in));
+ }
+
+ @Test
+ public void testCommitOnBatchFalseSyncOnBatchTrue() throws EventDeliveryException {
+ config.put(DatasetSinkConstants.CONFIG_FLUSHABLE_COMMIT_ON_BATCH,
+ Boolean.toString(false));
+ config.put(DatasetSinkConstants.CONFIG_SYNCABLE_SYNC_ON_BATCH,
+ Boolean.toString(true));
+
+ try {
+ sink(in, config);
+ Assert.fail("Should have thrown IllegalArgumentException");
+ } catch (IllegalArgumentException ex) {
+ // expected
+ }
+ }
+
+ @Test
+ public void testCloseAndCreateWriter() throws EventDeliveryException {
+ config.put(DatasetSinkConstants.CONFIG_FLUSHABLE_COMMIT_ON_BATCH,
+ Boolean.toString(false));
+ config.put(DatasetSinkConstants.CONFIG_SYNCABLE_SYNC_ON_BATCH,
+ Boolean.toString(false));
+ DatasetSink sink = sink(in, config);
+
+ // run the sink
+ sink.start();
+ sink.process();
+
+ sink.closeWriter();
+ sink.commitTransaction();
+ sink.createWriter();
+
+ Assert.assertNotNull("Writer should not be null", sink.getWriter());
+ Assert.assertEquals("Should have committed", 0, remaining(in));
+
+ sink.stop();
+
+ Assert.assertEquals(
+ Sets.newHashSet(expected),
+ read(Datasets.load(FILE_DATASET_URI)));
+ }
+
+ @Test
+ public void testCloseWriter() throws EventDeliveryException {
+ config.put(DatasetSinkConstants.CONFIG_FLUSHABLE_COMMIT_ON_BATCH,
+ Boolean.toString(false));
+ config.put(DatasetSinkConstants.CONFIG_SYNCABLE_SYNC_ON_BATCH,
+ Boolean.toString(false));
+ DatasetSink sink = sink(in, config);
+
+ // run the sink
+ sink.start();
+ sink.process();
+
+ sink.closeWriter();
+ sink.commitTransaction();
+
+ Assert.assertNull("Writer should be null", sink.getWriter());
+ Assert.assertEquals("Should have committed", 0, remaining(in));
+
+ sink.stop();
+
+ Assert.assertEquals(
+ Sets.newHashSet(expected),
+ read(Datasets.load(FILE_DATASET_URI)));
+ }
+
+ @Test
+ public void testCreateWriter() throws EventDeliveryException {
+ config.put(DatasetSinkConstants.CONFIG_FLUSHABLE_COMMIT_ON_BATCH,
+ Boolean.toString(false));
+ config.put(DatasetSinkConstants.CONFIG_SYNCABLE_SYNC_ON_BATCH,
+ Boolean.toString(false));
+ DatasetSink sink = sink(in, config);
+
+ // run the sink
+ sink.start();
+ sink.process();
+
+ sink.commitTransaction();
+ sink.createWriter();
+ Assert.assertNotNull("Writer should not be null", sink.getWriter());
+ Assert.assertEquals("Should have committed", 0, remaining(in));
+
+ sink.stop();
+
+ Assert.assertEquals(0, read(Datasets.load(FILE_DATASET_URI)).size());
+ }
+
+ @Test
+ public void testAppendWriteExceptionInvokesPolicy()
+ throws EventDeliveryException, NonRecoverableEventException {
+ DatasetSink sink = sink(in, config);
+
+ // run the sink
+ sink.start();
+ sink.process();
+
+ // Mock an Event
+ Event mockEvent = mock(Event.class);
+ when(mockEvent.getBody()).thenReturn(new byte[] { 0x01 });
+
+ // Mock a GenericRecord
+ GenericRecord mockRecord = mock(GenericRecord.class);
+
+ // Mock an EntityParser
+ EntityParser mockParser = mock(EntityParser.class);
+ when(mockParser.parse(eq(mockEvent), any(GenericRecord.class)))
+ .thenReturn(mockRecord);
+ sink.setParser(mockParser);
+
+ // Mock a FailurePolicy
+ FailurePolicy mockFailurePolicy = mock(FailurePolicy.class);
+ sink.setFailurePolicy(mockFailurePolicy);
+
+ // Mock a DatasetWriter
+ DatasetWriter mockWriter = mock(DatasetWriter.class);
+ doThrow(new DataFileWriter.AppendWriteException(new IOException()))
+ .when(mockWriter).write(mockRecord);
+
+ sink.setWriter(mockWriter);
+ sink.write(mockEvent);
+
+ // Verify that the event was sent to the failure policy
+ verify(mockFailurePolicy).handle(eq(mockEvent), any(Throwable.class));
+
+ sink.stop();
+ }
+
+ @Test
+ public void testRuntimeExceptionThrowsEventDeliveryException()
+ throws EventDeliveryException, NonRecoverableEventException {
+ DatasetSink sink = sink(in, config);
+
+ // run the sink
+ sink.start();
+ sink.process();
+
+ // Mock an Event
+ Event mockEvent = mock(Event.class);
+ when(mockEvent.getBody()).thenReturn(new byte[] { 0x01 });
+
+ // Mock a GenericRecord
+ GenericRecord mockRecord = mock(GenericRecord.class);
+
+ // Mock an EntityParser
+ EntityParser mockParser = mock(EntityParser.class);
+ when(mockParser.parse(eq(mockEvent), any(GenericRecord.class)))
+ .thenReturn(mockRecord);
+ sink.setParser(mockParser);
+
+ // Mock a FailurePolicy
+ FailurePolicy mockFailurePolicy = mock(FailurePolicy.class);
+ sink.setFailurePolicy(mockFailurePolicy);
+
+ // Mock a DatasetWriter
+ DatasetWriter mockWriter = mock(DatasetWriter.class);
+ doThrow(new RuntimeException()).when(mockWriter).write(mockRecord);
+
+ sink.setWriter(mockWriter);
+
+ try {
+ sink.write(mockEvent);
+ Assert.fail("Should throw EventDeliveryException");
+ } catch (EventDeliveryException ex) {
+
+ }
+
+ // Verify that the event was not sent to the failure policy
+ verify(mockFailurePolicy, never()).handle(eq(mockEvent), any(Throwable.class));
+
+ sink.stop();
+ }
+
+ @Test
+ public void testProcessHandlesNullWriter() throws EventDeliveryException,
+ NonRecoverableEventException, NonRecoverableEventException {
+ DatasetSink sink = sink(in, config);
+
+ // run the sink
+ sink.start();
+ sink.process();
+
+ // explicitly set the writer to null
+ sink.setWriter(null);
+
+ // this should not throw an NPE
+ sink.process();
+
+ sink.stop();
+
+ Assert.assertEquals("Should have committed", 0, remaining(in));
+ }
+
+ public static DatasetSink sink(Channel in, Context config) {
+ DatasetSink sink = new DatasetSink();
+ sink.setChannel(in);
+ Configurables.configure(sink, config);
+ return sink;
+ }
+
+ public static HashSet read(View view) {
+ DatasetReader reader = null;
+ try {
+ reader = view.newReader();
+ return Sets.newHashSet(reader.iterator());
+ } finally {
+ if (reader != null) {
+ reader.close();
+ }
+ }
+ }
+
+ public static int remaining(Channel ch) throws EventDeliveryException {
+ Transaction t = ch.getTransaction();
+ try {
+ t.begin();
+ int count = 0;
+ while (ch.take() != null) {
+ count += 1;
+ }
+ t.commit();
+ return count;
+ } catch (Throwable th) {
+ t.rollback();
+ Throwables.propagateIfInstanceOf(th, Error.class);
+ Throwables.propagateIfInstanceOf(th, EventDeliveryException.class);
+ throw new EventDeliveryException(th);
+ } finally {
+ t.close();
+ }
+ }
+
+ public static void putToChannel(Channel in, Event... records)
+ throws EventDeliveryException {
+ putToChannel(in, Arrays.asList(records));
+ }
+
+ public static void putToChannel(Channel in, Iterable records)
+ throws EventDeliveryException {
+ Transaction t = in.getTransaction();
+ try {
+ t.begin();
+ for (Event record : records) {
+ in.put(record);
+ }
+ t.commit();
+ } catch (Throwable th) {
+ t.rollback();
+ Throwables.propagateIfInstanceOf(th, Error.class);
+ Throwables.propagateIfInstanceOf(th, EventDeliveryException.class);
+ throw new EventDeliveryException(th);
+ } finally {
+ t.close();
+ }
+ }
+
+ public static Event event(
+ Object datum, Schema schema, File file, boolean useURI) {
+ Map headers = Maps.newHashMap();
+ if (useURI) {
+ headers.put(DatasetSinkConstants.AVRO_SCHEMA_URL_HEADER,
+ file.getAbsoluteFile().toURI().toString());
+ } else {
+ headers.put(DatasetSinkConstants.AVRO_SCHEMA_LITERAL_HEADER,
+ schema.toString());
+ }
+ Event e = new SimpleEvent();
+ e.setBody(serialize(datum, schema));
+ e.setHeaders(headers);
+ return e;
+ }
+
+ @SuppressWarnings("unchecked")
+ public static byte[] serialize(Object datum, Schema schema) {
+ ByteArrayOutputStream out = new ByteArrayOutputStream();
+ Encoder encoder = EncoderFactory.get().binaryEncoder(out, null);
+ ReflectDatumWriter writer = new ReflectDatumWriter(schema);
+ try {
+ writer.write(datum, encoder);
+ encoder.flush();
+ } catch (IOException ex) {
+ Throwables.propagate(ex);
+ }
+ return out.toByteArray();
+ }
+
+ /**
+ * A convenience method to avoid a large number of @Test(expected=...) tests.
+ *
+ * This variant uses a Callable, which is allowed to throw checked Exceptions.
+ *
+ * @param message A String message to describe this assertion
+ * @param expected An Exception class that the Runnable should throw
+ * @param callable A Callable that is expected to throw the exception
+ */
+ public static void assertThrows(
+ String message, Class extends Exception> expected, Callable callable) {
+ try {
+ callable.call();
+ Assert.fail("No exception was thrown (" + message + "), expected: " +
+ expected.getName());
+ } catch (Exception actual) {
+ Assert.assertEquals(message, expected, actual.getClass());
+ }
+ }
+
+ /**
+ * Helper function to convert a map of String to a map of Utf8.
+ *
+ * @param map A Map of String to String
+ * @return The same mappings converting the {@code String}s to {@link Utf8}s
+ */
+ public static Map toUtf8Map(
+ Map map) {
+ Map utf8Map = Maps.newHashMap();
+ for (Map.Entry entry : map.entrySet()) {
+ utf8Map.put(new Utf8(entry.getKey()), new Utf8(entry.getValue()));
+ }
+ return utf8Map;
+ }
+}
diff --git a/code/flume-ng-sinks/flume-dataset-sink/src/test/resources/enable-kerberos.xml b/code/flume-ng-sinks/flume-dataset-sink/src/test/resources/enable-kerberos.xml
new file mode 100644
index 0000000..85b0447
--- /dev/null
+++ b/code/flume-ng-sinks/flume-dataset-sink/src/test/resources/enable-kerberos.xml
@@ -0,0 +1,30 @@
+
+
+
+
+
+
+ hadoop.security.authentication
+ kerberos
+
+
+
+ hadoop.security.authorization
+ true
+
+
+
diff --git a/code/flume-ng-sinks/flume-hdfs-sink/pom.xml b/code/flume-ng-sinks/flume-hdfs-sink/pom.xml
new file mode 100644
index 0000000..bcf6556
--- /dev/null
+++ b/code/flume-ng-sinks/flume-hdfs-sink/pom.xml
@@ -0,0 +1,196 @@
+
+
+
+
+ 4.0.0
+
+
+ flume-ng-sinks
+ org.apache.flume
+ 1.7.0
+
+
+ org.apache.flume.flume-ng-sinks
+ flume-hdfs-sink
+ Flume NG HDFS Sink
+
+
+
+
+ org.apache.rat
+ apache-rat-plugin
+
+
+
+
+
+
+
+ org.apache.flume
+ flume-ng-sdk
+
+
+
+ org.apache.flume
+ flume-ng-configuration
+
+
+
+ org.apache.flume
+ flume-ng-core
+
+
+
+ org.slf4j
+ slf4j-api
+
+
+
+ com.google.guava
+ guava
+
+
+
+ junit
+ junit
+ test
+
+
+
+ org.slf4j
+ slf4j-log4j12
+ test
+
+
+
+ org.mockito
+ mockito-all
+ test
+
+
+
+ org.apache.hadoop
+ ${hadoop.common.artifact.id}
+ true
+
+
+
+ commons-lang
+ commons-lang
+
+
+
+ commons-io
+ commons-io
+
+
+
+
+
+
+
+ hadoop-1.0
+
+
+ flume.hadoop.profile
+ 1
+
+
+
+
+
+ org.apache.hadoop
+ hadoop-test
+ test
+
+
+
+
+ com.sun.jersey
+ jersey-core
+ test
+
+
+
+
+
+
+ hadoop-2
+
+
+ flume.hadoop.profile
+ 2
+
+
+
+
+
+ org.apache.hadoop
+ hadoop-hdfs
+ true
+
+
+
+ org.apache.hadoop
+ hadoop-auth
+ true
+
+
+
+ org.apache.hadoop
+ hadoop-minicluster
+ test
+
+
+
+
+
+
+ hbase-1
+
+
+ !flume.hadoop.profile
+
+
+
+
+
+ org.apache.hadoop
+ hadoop-hdfs
+ true
+
+
+
+ org.apache.hadoop
+ hadoop-auth
+ true
+
+
+
+ org.apache.hadoop
+ hadoop-minicluster
+ test
+
+
+
+
+
+
+
diff --git a/code/flume-ng-sinks/flume-hdfs-sink/src/main/java/org/apache/flume/sink/hdfs/AbstractHDFSWriter.java b/code/flume-ng-sinks/flume-hdfs-sink/src/main/java/org/apache/flume/sink/hdfs/AbstractHDFSWriter.java
new file mode 100644
index 0000000..2fe309f
--- /dev/null
+++ b/code/flume-ng-sinks/flume-hdfs-sink/src/main/java/org/apache/flume/sink/hdfs/AbstractHDFSWriter.java
@@ -0,0 +1,280 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.flume.sink.hdfs;
+
+import com.google.common.base.Preconditions;
+import org.apache.flume.Context;
+import org.apache.flume.FlumeException;
+import org.apache.flume.annotations.InterfaceAudience;
+import org.apache.flume.annotations.InterfaceStability;
+import org.apache.hadoop.fs.FSDataOutputStream;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.IOException;
+import java.io.OutputStream;
+import java.lang.reflect.InvocationTargetException;
+import java.lang.reflect.Method;
+
+@InterfaceAudience.Private
+@InterfaceStability.Evolving
+public abstract class AbstractHDFSWriter implements HDFSWriter {
+
+ private static final Logger logger =
+ LoggerFactory.getLogger(AbstractHDFSWriter.class);
+
+ private FSDataOutputStream outputStream;
+ private FileSystem fs;
+ private Path destPath;
+ private Method refGetNumCurrentReplicas = null;
+ private Method refGetDefaultReplication = null;
+ private Method refHflushOrSync = null;
+ private Integer configuredMinReplicas = null;
+ private Integer numberOfCloseRetries = null;
+ private long timeBetweenCloseRetries = Long.MAX_VALUE;
+
+ static final Object[] NO_ARGS = new Object[]{};
+
+ @Override
+ public void configure(Context context) {
+ configuredMinReplicas = context.getInteger("hdfs.minBlockReplicas");
+ if (configuredMinReplicas != null) {
+ Preconditions.checkArgument(configuredMinReplicas >= 0,
+ "hdfs.minBlockReplicas must be greater than or equal to 0");
+ }
+ numberOfCloseRetries = context.getInteger("hdfs.closeTries", 1) - 1;
+
+ if (numberOfCloseRetries > 1) {
+ try {
+ timeBetweenCloseRetries = context.getLong("hdfs.callTimeout", 10000L);
+ } catch (NumberFormatException e) {
+ logger.warn("hdfs.callTimeout can not be parsed to a long: " +
+ context.getLong("hdfs.callTimeout"));
+ }
+ timeBetweenCloseRetries = Math.max(timeBetweenCloseRetries / numberOfCloseRetries, 1000);
+ }
+
+ }
+
+ /**
+ * Contract for subclasses: Call registerCurrentStream() on open,
+ * unregisterCurrentStream() on close, and the base class takes care of the
+ * rest.
+ * @return
+ */
+ @Override
+ public boolean isUnderReplicated() {
+ try {
+ int numBlocks = getNumCurrentReplicas();
+ if (numBlocks == -1) {
+ return false;
+ }
+ int desiredBlocks;
+ if (configuredMinReplicas != null) {
+ desiredBlocks = configuredMinReplicas;
+ } else {
+ desiredBlocks = getFsDesiredReplication();
+ }
+ return numBlocks < desiredBlocks;
+ } catch (IllegalAccessException e) {
+ logger.error("Unexpected error while checking replication factor", e);
+ } catch (InvocationTargetException e) {
+ logger.error("Unexpected error while checking replication factor", e);
+ } catch (IllegalArgumentException e) {
+ logger.error("Unexpected error while checking replication factor", e);
+ }
+ return false;
+ }
+
+ protected void registerCurrentStream(FSDataOutputStream outputStream,
+ FileSystem fs, Path destPath) {
+ Preconditions.checkNotNull(outputStream, "outputStream must not be null");
+ Preconditions.checkNotNull(fs, "fs must not be null");
+ Preconditions.checkNotNull(destPath, "destPath must not be null");
+
+ this.outputStream = outputStream;
+ this.fs = fs;
+ this.destPath = destPath;
+ this.refGetNumCurrentReplicas = reflectGetNumCurrentReplicas(outputStream);
+ this.refGetDefaultReplication = reflectGetDefaultReplication(fs);
+ this.refHflushOrSync = reflectHflushOrSync(outputStream);
+
+ }
+
+ protected void unregisterCurrentStream() {
+ this.outputStream = null;
+ this.fs = null;
+ this.destPath = null;
+ this.refGetNumCurrentReplicas = null;
+ this.refGetDefaultReplication = null;
+ }
+
+ public int getFsDesiredReplication() {
+ short replication = 0;
+ if (fs != null && destPath != null) {
+ if (refGetDefaultReplication != null) {
+ try {
+ replication = (Short) refGetDefaultReplication.invoke(fs, destPath);
+ } catch (IllegalAccessException e) {
+ logger.warn("Unexpected error calling getDefaultReplication(Path)", e);
+ } catch (InvocationTargetException e) {
+ logger.warn("Unexpected error calling getDefaultReplication(Path)", e);
+ }
+ } else {
+ // will not work on Federated HDFS (see HADOOP-8014)
+ replication = fs.getDefaultReplication();
+ }
+ }
+ return replication;
+ }
+
+ /**
+ * This method gets the datanode replication count for the current open file.
+ *
+ * If the pipeline isn't started yet or is empty, you will get the default
+ * replication factor.
+ *
+ * If this function returns -1, it means you
+ * are not properly running with the HDFS-826 patch.
+ * @throws InvocationTargetException
+ * @throws IllegalAccessException
+ * @throws IllegalArgumentException
+ */
+ public int getNumCurrentReplicas()
+ throws IllegalArgumentException, IllegalAccessException,
+ InvocationTargetException {
+ if (refGetNumCurrentReplicas != null && outputStream != null) {
+ OutputStream dfsOutputStream = outputStream.getWrappedStream();
+ if (dfsOutputStream != null) {
+ Object repl = refGetNumCurrentReplicas.invoke(dfsOutputStream, NO_ARGS);
+ if (repl instanceof Integer) {
+ return ((Integer)repl).intValue();
+ }
+ }
+ }
+ return -1;
+ }
+
+ /**
+ * Find the 'getNumCurrentReplicas' on the passed os stream.
+ * @return Method or null.
+ */
+ private Method reflectGetNumCurrentReplicas(FSDataOutputStream os) {
+ Method m = null;
+ if (os != null) {
+ Class extends OutputStream> wrappedStreamClass = os.getWrappedStream()
+ .getClass();
+ try {
+ m = wrappedStreamClass.getDeclaredMethod("getNumCurrentReplicas",
+ new Class>[] {});
+ m.setAccessible(true);
+ } catch (NoSuchMethodException e) {
+ logger.info("FileSystem's output stream doesn't support"
+ + " getNumCurrentReplicas; --HDFS-826 not available; fsOut="
+ + wrappedStreamClass.getName() + "; err=" + e);
+ } catch (SecurityException e) {
+ logger.info("Doesn't have access to getNumCurrentReplicas on "
+ + "FileSystems's output stream --HDFS-826 not available; fsOut="
+ + wrappedStreamClass.getName(), e);
+ m = null; // could happen on setAccessible()
+ }
+ }
+ if (m != null) {
+ logger.debug("Using getNumCurrentReplicas--HDFS-826");
+ }
+ return m;
+ }
+
+ /**
+ * Find the 'getDefaultReplication' method on the passed fs
+ * FileSystem that takes a Path argument.
+ * @return Method or null.
+ */
+ private Method reflectGetDefaultReplication(FileSystem fileSystem) {
+ Method m = null;
+ if (fileSystem != null) {
+ Class> fsClass = fileSystem.getClass();
+ try {
+ m = fsClass.getMethod("getDefaultReplication",
+ new Class>[] { Path.class });
+ } catch (NoSuchMethodException e) {
+ logger.debug("FileSystem implementation doesn't support"
+ + " getDefaultReplication(Path); -- HADOOP-8014 not available; " +
+ "className = " + fsClass.getName() + "; err = " + e);
+ } catch (SecurityException e) {
+ logger.debug("No access to getDefaultReplication(Path) on "
+ + "FileSystem implementation -- HADOOP-8014 not available; " +
+ "className = " + fsClass.getName() + "; err = " + e);
+ }
+ }
+ if (m != null) {
+ logger.debug("Using FileSystem.getDefaultReplication(Path) from " +
+ "HADOOP-8014");
+ }
+ return m;
+ }
+
+ private Method reflectHflushOrSync(FSDataOutputStream os) {
+ Method m = null;
+ if (os != null) {
+ Class> fsDataOutputStreamClass = os.getClass();
+ try {
+ m = fsDataOutputStreamClass.getMethod("hflush");
+ } catch (NoSuchMethodException ex) {
+ logger.debug("HFlush not found. Will use sync() instead");
+ try {
+ m = fsDataOutputStreamClass.getMethod("sync");
+ } catch (Exception ex1) {
+ String msg = "Neither hflush not sync were found. That seems to be " +
+ "a problem!";
+ logger.error(msg);
+ throw new FlumeException(msg, ex1);
+ }
+ }
+ }
+ return m;
+ }
+
+ /**
+ * If hflush is available in this version of HDFS, then this method calls
+ * hflush, else it calls sync.
+ * @param os - The stream to flush/sync
+ * @throws IOException
+ */
+ protected void hflushOrSync(FSDataOutputStream os) throws IOException {
+ try {
+ // At this point the refHflushOrSync cannot be null,
+ // since register method would have thrown if it was.
+ this.refHflushOrSync.invoke(os);
+ } catch (InvocationTargetException e) {
+ String msg = "Error while trying to hflushOrSync!";
+ logger.error(msg);
+ Throwable cause = e.getCause();
+ if (cause != null && cause instanceof IOException) {
+ throw (IOException)cause;
+ }
+ throw new FlumeException(msg, e);
+ } catch (Exception e) {
+ String msg = "Error while trying to hflushOrSync!";
+ logger.error(msg);
+ throw new FlumeException(msg, e);
+ }
+ }
+}
diff --git a/code/flume-ng-sinks/flume-hdfs-sink/src/main/java/org/apache/flume/sink/hdfs/AvroEventSerializer.java b/code/flume-ng-sinks/flume-hdfs-sink/src/main/java/org/apache/flume/sink/hdfs/AvroEventSerializer.java
new file mode 100644
index 0000000..3231742
--- /dev/null
+++ b/code/flume-ng-sinks/flume-hdfs-sink/src/main/java/org/apache/flume/sink/hdfs/AvroEventSerializer.java
@@ -0,0 +1,211 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.flume.sink.hdfs;
+
+import org.apache.avro.AvroRuntimeException;
+import org.apache.avro.Schema;
+import org.apache.avro.file.CodecFactory;
+import org.apache.avro.file.DataFileWriter;
+import org.apache.avro.generic.GenericDatumWriter;
+import org.apache.avro.io.DatumWriter;
+import org.apache.flume.Context;
+import org.apache.flume.Event;
+import org.apache.flume.FlumeException;
+import org.apache.flume.conf.Configurable;
+import org.apache.flume.serialization.EventSerializer;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FSDataInputStream;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.net.URL;
+import java.nio.ByteBuffer;
+import java.util.HashMap;
+import java.util.Locale;
+import java.util.Map;
+
+import static org.apache.flume.serialization.AvroEventSerializerConfigurationConstants.COMPRESSION_CODEC;
+import static org.apache.flume.serialization.AvroEventSerializerConfigurationConstants.DEFAULT_COMPRESSION_CODEC;
+import static org.apache.flume.serialization.AvroEventSerializerConfigurationConstants.DEFAULT_STATIC_SCHEMA_URL;
+import static org.apache.flume.serialization.AvroEventSerializerConfigurationConstants.DEFAULT_SYNC_INTERVAL_BYTES;
+import static org.apache.flume.serialization.AvroEventSerializerConfigurationConstants.STATIC_SCHEMA_URL;
+import static org.apache.flume.serialization.AvroEventSerializerConfigurationConstants.SYNC_INTERVAL_BYTES;
+
+/**
+ *
+ * This class serializes Flume {@linkplain org.apache.flume.Event events} into Avro data files. The
+ * Flume event body is read as an Avro datum, and is then written to the
+ * {@link org.apache.flume.serialization.EventSerializer}'s output stream in Avro data file format.
+ *
+ *
+ * The Avro schema is determined by reading a Flume event header. The schema may be
+ * specified either as a literal, by setting {@link #AVRO_SCHEMA_LITERAL_HEADER} (not
+ * recommended, since the full schema must be transmitted in every event),
+ * or as a URL which the schema may be read from, by setting {@link
+ * #AVRO_SCHEMA_URL_HEADER}. Schemas read from URLs are cached by instances of this
+ * class so that the overhead of retrieval is minimized.
+ *
+ */
+public class AvroEventSerializer implements EventSerializer, Configurable {
+
+ private static final Logger logger =
+ LoggerFactory.getLogger(AvroEventSerializer.class);
+
+ public static final String AVRO_SCHEMA_LITERAL_HEADER = "flume.avro.schema.literal";
+ public static final String AVRO_SCHEMA_URL_HEADER = "flume.avro.schema.url";
+
+ private final OutputStream out;
+ private DatumWriter
+ *
+ * This can be used to send events to ElasticSearch and use clients such as
+ * Kabana which expect Logstash formated indexes
+ *
+ *
+ * {
+ * "@timestamp": "2010-12-21T21:48:33.309258Z",
+ * "@tags": [ "array", "of", "tags" ],
+ * "@type": "string",
+ * "@source": "source of the event, usually a URL."
+ * "@source_host": ""
+ * "@source_path": ""
+ * "@fields":{
+ * # a set of fields for this event
+ * "user": "jordan",
+ * "command": "shutdown -r":
+ * }
+ * "@message": "the original plain-text message"
+ * }
+ *
+ *
+ * If the following headers are present, they will map to the above logstash
+ * output as long as the logstash fields are not already present.
+ *
+ *
+ *
+ * @see https
+ * ://github.com/logstash/logstash/wiki/logstash%27s-internal-message-
+ * format
+ */
+public class ElasticSearchLogStashEventSerializer implements
+ ElasticSearchEventSerializer {
+
+ @Override
+ public XContentBuilder getContentBuilder(Event event) throws IOException {
+ XContentBuilder builder = jsonBuilder().startObject();
+ appendBody(builder, event);
+ appendHeaders(builder, event);
+ return builder;
+ }
+
+ private void appendBody(XContentBuilder builder, Event event)
+ throws IOException, UnsupportedEncodingException {
+ byte[] body = event.getBody();
+ ContentBuilderUtil.appendField(builder, "@message", body);
+ }
+
+ private void appendHeaders(XContentBuilder builder, Event event)
+ throws IOException {
+ Map headers = Maps.newHashMap(event.getHeaders());
+
+ String timestamp = headers.get("timestamp");
+ if (!StringUtils.isBlank(timestamp)
+ && StringUtils.isBlank(headers.get("@timestamp"))) {
+ long timestampMs = Long.parseLong(timestamp);
+ builder.field("@timestamp", new Date(timestampMs));
+ }
+
+ String source = headers.get("source");
+ if (!StringUtils.isBlank(source)
+ && StringUtils.isBlank(headers.get("@source"))) {
+ ContentBuilderUtil.appendField(builder, "@source",
+ source.getBytes(charset));
+ }
+
+ String type = headers.get("type");
+ if (!StringUtils.isBlank(type)
+ && StringUtils.isBlank(headers.get("@type"))) {
+ ContentBuilderUtil.appendField(builder, "@type", type.getBytes(charset));
+ }
+
+ String host = headers.get("host");
+ if (!StringUtils.isBlank(host)
+ && StringUtils.isBlank(headers.get("@source_host"))) {
+ ContentBuilderUtil.appendField(builder, "@source_host",
+ host.getBytes(charset));
+ }
+
+ String srcPath = headers.get("src_path");
+ if (!StringUtils.isBlank(srcPath)
+ && StringUtils.isBlank(headers.get("@source_path"))) {
+ ContentBuilderUtil.appendField(builder, "@source_path",
+ srcPath.getBytes(charset));
+ }
+
+ builder.startObject("@fields");
+ for (String key : headers.keySet()) {
+ byte[] val = headers.get(key).getBytes(charset);
+ ContentBuilderUtil.appendField(builder, key, val);
+ }
+ builder.endObject();
+ }
+
+ @Override
+ public void configure(Context context) {
+ // NO-OP...
+ }
+
+ @Override
+ public void configure(ComponentConfiguration conf) {
+ // NO-OP...
+ }
+}
diff --git a/code/flume-ng-sinks/flume-ng-elasticsearch-sink/src/main/java/org/apache/flume/sink/elasticsearch/ElasticSearchSink.java b/code/flume-ng-sinks/flume-ng-elasticsearch-sink/src/main/java/org/apache/flume/sink/elasticsearch/ElasticSearchSink.java
new file mode 100644
index 0000000..ebafb9f
--- /dev/null
+++ b/code/flume-ng-sinks/flume-ng-elasticsearch-sink/src/main/java/org/apache/flume/sink/elasticsearch/ElasticSearchSink.java
@@ -0,0 +1,428 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.flume.sink.elasticsearch;
+
+import static org.apache.flume.sink.elasticsearch.ElasticSearchSinkConstants.BATCH_SIZE;
+import static org.apache.flume.sink.elasticsearch.ElasticSearchSinkConstants.CLUSTER_NAME;
+import static org.apache.flume.sink.elasticsearch.ElasticSearchSinkConstants.DEFAULT_CLUSTER_NAME;
+import static org.apache.flume.sink.elasticsearch.ElasticSearchSinkConstants.DEFAULT_INDEX_NAME;
+import static org.apache.flume.sink.elasticsearch.ElasticSearchSinkConstants.DEFAULT_INDEX_TYPE;
+import static org.apache.flume.sink.elasticsearch.ElasticSearchSinkConstants.DEFAULT_TTL;
+import static org.apache.flume.sink.elasticsearch.ElasticSearchSinkConstants.HOSTNAMES;
+import static org.apache.flume.sink.elasticsearch.ElasticSearchSinkConstants.INDEX_NAME;
+import static org.apache.flume.sink.elasticsearch.ElasticSearchSinkConstants.INDEX_TYPE;
+import static org.apache.flume.sink.elasticsearch.ElasticSearchSinkConstants.SERIALIZER;
+import static org.apache.flume.sink.elasticsearch.ElasticSearchSinkConstants.SERIALIZER_PREFIX;
+import static org.apache.flume.sink.elasticsearch.ElasticSearchSinkConstants.TTL;
+import static org.apache.flume.sink.elasticsearch.ElasticSearchSinkConstants.TTL_REGEX;
+import org.apache.commons.lang.StringUtils;
+import org.apache.flume.Channel;
+import org.apache.flume.Context;
+import org.apache.flume.CounterGroup;
+import org.apache.flume.Event;
+import org.apache.flume.EventDeliveryException;
+import org.apache.flume.Transaction;
+import org.apache.flume.formatter.output.BucketPath;
+import org.apache.flume.conf.Configurable;
+import org.apache.flume.instrumentation.SinkCounter;
+import org.apache.flume.sink.AbstractSink;
+import org.apache.flume.sink.elasticsearch.client.ElasticSearchClient;
+import org.apache.flume.sink.elasticsearch.client.ElasticSearchClientFactory;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import com.google.common.annotations.VisibleForTesting;
+import com.google.common.base.Preconditions;
+import com.google.common.base.Throwables;
+
+import java.util.concurrent.TimeUnit;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+import static org.apache.flume.sink.elasticsearch.ElasticSearchSinkConstants.CLIENT_PREFIX;
+import static org.apache.flume.sink.elasticsearch.ElasticSearchSinkConstants.CLIENT_TYPE;
+import static org.apache.flume.sink.elasticsearch.ElasticSearchSinkConstants.DEFAULT_CLIENT_TYPE;
+import static org.apache.flume.sink.elasticsearch.ElasticSearchSinkConstants.DEFAULT_INDEX_NAME_BUILDER_CLASS;
+import static org.apache.flume.sink.elasticsearch.ElasticSearchSinkConstants.DEFAULT_SERIALIZER_CLASS;
+import static org.apache.flume.sink.elasticsearch.ElasticSearchSinkConstants.INDEX_NAME_BUILDER;
+import static org.apache.flume.sink.elasticsearch.ElasticSearchSinkConstants.INDEX_NAME_BUILDER_PREFIX;
+
+/**
+ * A sink which reads events from a channel and writes them to ElasticSearch
+ * based on the work done by https://github.com/Aconex/elasticflume.git.
+ *
+ * This sink supports batch reading of events from the channel and writing them
+ * to ElasticSearch.
+ *
+ * Indexes will be rolled daily using the format 'indexname-YYYY-MM-dd' to allow
+ * easier management of the index
+ *
+ * This sink must be configured with with mandatory parameters detailed in
+ * {@link ElasticSearchSinkConstants} It is recommended as a secondary step
+ * the ElasticSearch indexes are optimized for the specified serializer. This is
+ * not handled by the sink but is typically done by deploying a config template
+ * alongside the ElasticSearch deploy
+ *
+ * @see http
+ * ://www.elasticsearch.org/guide/reference/api/admin-indices-templates.
+ * html
+ */
+public class ElasticSearchSink extends AbstractSink implements Configurable {
+
+ private static final Logger logger = LoggerFactory
+ .getLogger(ElasticSearchSink.class);
+
+ // Used for testing
+ private boolean isLocal = false;
+ private final CounterGroup counterGroup = new CounterGroup();
+
+ private static final int defaultBatchSize = 100;
+
+ private int batchSize = defaultBatchSize;
+ private long ttlMs = DEFAULT_TTL;
+ private String clusterName = DEFAULT_CLUSTER_NAME;
+ private String indexName = DEFAULT_INDEX_NAME;
+ private String indexType = DEFAULT_INDEX_TYPE;
+ private String clientType = DEFAULT_CLIENT_TYPE;
+ private final Pattern pattern = Pattern.compile(TTL_REGEX,
+ Pattern.CASE_INSENSITIVE);
+ private Matcher matcher = pattern.matcher("");
+
+ private String[] serverAddresses = null;
+
+ private ElasticSearchClient client = null;
+ private Context elasticSearchClientContext = null;
+
+ private ElasticSearchIndexRequestBuilderFactory indexRequestFactory;
+ private ElasticSearchEventSerializer eventSerializer;
+ private IndexNameBuilder indexNameBuilder;
+ private SinkCounter sinkCounter;
+
+ /**
+ * Create an {@link ElasticSearchSink} configured using the supplied
+ * configuration
+ */
+ public ElasticSearchSink() {
+ this(false);
+ }
+
+ /**
+ * Create an {@link ElasticSearchSink}
+ *
+ * @param isLocal
+ * If true sink will be configured to only talk to an
+ * ElasticSearch instance hosted in the same JVM, should always be
+ * false is production
+ *
+ */
+ @VisibleForTesting
+ ElasticSearchSink(boolean isLocal) {
+ this.isLocal = isLocal;
+ }
+
+ @VisibleForTesting
+ String[] getServerAddresses() {
+ return serverAddresses;
+ }
+
+ @VisibleForTesting
+ String getClusterName() {
+ return clusterName;
+ }
+
+ @VisibleForTesting
+ String getIndexName() {
+ return indexName;
+ }
+
+ @VisibleForTesting
+ String getIndexType() {
+ return indexType;
+ }
+
+ @VisibleForTesting
+ long getTTLMs() {
+ return ttlMs;
+ }
+
+ @VisibleForTesting
+ ElasticSearchEventSerializer getEventSerializer() {
+ return eventSerializer;
+ }
+
+ @VisibleForTesting
+ IndexNameBuilder getIndexNameBuilder() {
+ return indexNameBuilder;
+ }
+
+ @Override
+ public Status process() throws EventDeliveryException {
+ logger.debug("processing...");
+ Status status = Status.READY;
+ Channel channel = getChannel();
+ Transaction txn = channel.getTransaction();
+ try {
+ txn.begin();
+ int count;
+ for (count = 0; count < batchSize; ++count) {
+ Event event = channel.take();
+
+ if (event == null) {
+ break;
+ }
+ String realIndexType = BucketPath.escapeString(indexType, event.getHeaders());
+ client.addEvent(event, indexNameBuilder, realIndexType, ttlMs);
+ }
+
+ if (count <= 0) {
+ sinkCounter.incrementBatchEmptyCount();
+ counterGroup.incrementAndGet("channel.underflow");
+ status = Status.BACKOFF;
+ } else {
+ if (count < batchSize) {
+ sinkCounter.incrementBatchUnderflowCount();
+ status = Status.BACKOFF;
+ } else {
+ sinkCounter.incrementBatchCompleteCount();
+ }
+
+ sinkCounter.addToEventDrainAttemptCount(count);
+ client.execute();
+ }
+ txn.commit();
+ sinkCounter.addToEventDrainSuccessCount(count);
+ counterGroup.incrementAndGet("transaction.success");
+ } catch (Throwable ex) {
+ try {
+ txn.rollback();
+ counterGroup.incrementAndGet("transaction.rollback");
+ } catch (Exception ex2) {
+ logger.error(
+ "Exception in rollback. Rollback might not have been successful.",
+ ex2);
+ }
+
+ if (ex instanceof Error || ex instanceof RuntimeException) {
+ logger.error("Failed to commit transaction. Transaction rolled back.",
+ ex);
+ Throwables.propagate(ex);
+ } else {
+ logger.error("Failed to commit transaction. Transaction rolled back.",
+ ex);
+ throw new EventDeliveryException(
+ "Failed to commit transaction. Transaction rolled back.", ex);
+ }
+ } finally {
+ txn.close();
+ }
+ return status;
+ }
+
+ @Override
+ public void configure(Context context) {
+ if (!isLocal) {
+ if (StringUtils.isNotBlank(context.getString(HOSTNAMES))) {
+ serverAddresses = StringUtils.deleteWhitespace(
+ context.getString(HOSTNAMES)).split(",");
+ }
+ Preconditions.checkState(serverAddresses != null
+ && serverAddresses.length > 0, "Missing Param:" + HOSTNAMES);
+ }
+
+ if (StringUtils.isNotBlank(context.getString(INDEX_NAME))) {
+ this.indexName = context.getString(INDEX_NAME);
+ }
+
+ if (StringUtils.isNotBlank(context.getString(INDEX_TYPE))) {
+ this.indexType = context.getString(INDEX_TYPE);
+ }
+
+ if (StringUtils.isNotBlank(context.getString(CLUSTER_NAME))) {
+ this.clusterName = context.getString(CLUSTER_NAME);
+ }
+
+ if (StringUtils.isNotBlank(context.getString(BATCH_SIZE))) {
+ this.batchSize = Integer.parseInt(context.getString(BATCH_SIZE));
+ }
+
+ if (StringUtils.isNotBlank(context.getString(TTL))) {
+ this.ttlMs = parseTTL(context.getString(TTL));
+ Preconditions.checkState(ttlMs > 0, TTL
+ + " must be greater than 0 or not set.");
+ }
+
+ if (StringUtils.isNotBlank(context.getString(CLIENT_TYPE))) {
+ clientType = context.getString(CLIENT_TYPE);
+ }
+
+ elasticSearchClientContext = new Context();
+ elasticSearchClientContext.putAll(context.getSubProperties(CLIENT_PREFIX));
+
+ String serializerClazz = DEFAULT_SERIALIZER_CLASS;
+ if (StringUtils.isNotBlank(context.getString(SERIALIZER))) {
+ serializerClazz = context.getString(SERIALIZER);
+ }
+
+ Context serializerContext = new Context();
+ serializerContext.putAll(context.getSubProperties(SERIALIZER_PREFIX));
+
+ try {
+ @SuppressWarnings("unchecked")
+ Class extends Configurable> clazz = (Class extends Configurable>) Class
+ .forName(serializerClazz);
+ Configurable serializer = clazz.newInstance();
+
+ if (serializer instanceof ElasticSearchIndexRequestBuilderFactory) {
+ indexRequestFactory
+ = (ElasticSearchIndexRequestBuilderFactory) serializer;
+ indexRequestFactory.configure(serializerContext);
+ } else if (serializer instanceof ElasticSearchEventSerializer) {
+ eventSerializer = (ElasticSearchEventSerializer) serializer;
+ eventSerializer.configure(serializerContext);
+ } else {
+ throw new IllegalArgumentException(serializerClazz
+ + " is not an ElasticSearchEventSerializer");
+ }
+ } catch (Exception e) {
+ logger.error("Could not instantiate event serializer.", e);
+ Throwables.propagate(e);
+ }
+
+ if (sinkCounter == null) {
+ sinkCounter = new SinkCounter(getName());
+ }
+
+ String indexNameBuilderClass = DEFAULT_INDEX_NAME_BUILDER_CLASS;
+ if (StringUtils.isNotBlank(context.getString(INDEX_NAME_BUILDER))) {
+ indexNameBuilderClass = context.getString(INDEX_NAME_BUILDER);
+ }
+
+ Context indexnameBuilderContext = new Context();
+ serializerContext.putAll(
+ context.getSubProperties(INDEX_NAME_BUILDER_PREFIX));
+
+ try {
+ @SuppressWarnings("unchecked")
+ Class extends IndexNameBuilder> clazz
+ = (Class extends IndexNameBuilder>) Class
+ .forName(indexNameBuilderClass);
+ indexNameBuilder = clazz.newInstance();
+ indexnameBuilderContext.put(INDEX_NAME, indexName);
+ indexNameBuilder.configure(indexnameBuilderContext);
+ } catch (Exception e) {
+ logger.error("Could not instantiate index name builder.", e);
+ Throwables.propagate(e);
+ }
+
+ if (sinkCounter == null) {
+ sinkCounter = new SinkCounter(getName());
+ }
+
+ Preconditions.checkState(StringUtils.isNotBlank(indexName),
+ "Missing Param:" + INDEX_NAME);
+ Preconditions.checkState(StringUtils.isNotBlank(indexType),
+ "Missing Param:" + INDEX_TYPE);
+ Preconditions.checkState(StringUtils.isNotBlank(clusterName),
+ "Missing Param:" + CLUSTER_NAME);
+ Preconditions.checkState(batchSize >= 1, BATCH_SIZE
+ + " must be greater than 0");
+ }
+
+ @Override
+ public void start() {
+ ElasticSearchClientFactory clientFactory = new ElasticSearchClientFactory();
+
+ logger.info("ElasticSearch sink {} started");
+ sinkCounter.start();
+ try {
+ if (isLocal) {
+ client = clientFactory.getLocalClient(
+ clientType, eventSerializer, indexRequestFactory);
+ } else {
+ client = clientFactory.getClient(clientType, serverAddresses,
+ clusterName, eventSerializer, indexRequestFactory);
+ client.configure(elasticSearchClientContext);
+ }
+ sinkCounter.incrementConnectionCreatedCount();
+ } catch (Exception ex) {
+ ex.printStackTrace();
+ sinkCounter.incrementConnectionFailedCount();
+ if (client != null) {
+ client.close();
+ sinkCounter.incrementConnectionClosedCount();
+ }
+ }
+
+ super.start();
+ }
+
+ @Override
+ public void stop() {
+ logger.info("ElasticSearch sink {} stopping");
+ if (client != null) {
+ client.close();
+ }
+ sinkCounter.incrementConnectionClosedCount();
+ sinkCounter.stop();
+ super.stop();
+ }
+
+ /*
+ * Returns TTL value of ElasticSearch index in milliseconds when TTL specifier
+ * is "ms" / "s" / "m" / "h" / "d" / "w". In case of unknown specifier TTL is
+ * not set. When specifier is not provided it defaults to days in milliseconds
+ * where the number of days is parsed integer from TTL string provided by
+ * user.
Elasticsearch supports ttl values being provided in the format:
+ * 1d / 1w / 1ms / 1s / 1h / 1m specify a time unit like d (days), m
+ * (minutes), h (hours), ms (milliseconds) or w (weeks), milliseconds is used
+ * as default unit.
+ * http://www.elasticsearch.org/guide/reference/mapping/ttl-field/.
+ *
+ * @param ttl TTL value provided by user in flume configuration file for the
+ * sink
+ *
+ * @return the ttl value in milliseconds
+ */
+ private long parseTTL(String ttl) {
+ matcher = matcher.reset(ttl);
+ while (matcher.find()) {
+ if (matcher.group(2).equals("ms")) {
+ return Long.parseLong(matcher.group(1));
+ } else if (matcher.group(2).equals("s")) {
+ return TimeUnit.SECONDS.toMillis(Integer.parseInt(matcher.group(1)));
+ } else if (matcher.group(2).equals("m")) {
+ return TimeUnit.MINUTES.toMillis(Integer.parseInt(matcher.group(1)));
+ } else if (matcher.group(2).equals("h")) {
+ return TimeUnit.HOURS.toMillis(Integer.parseInt(matcher.group(1)));
+ } else if (matcher.group(2).equals("d")) {
+ return TimeUnit.DAYS.toMillis(Integer.parseInt(matcher.group(1)));
+ } else if (matcher.group(2).equals("w")) {
+ return TimeUnit.DAYS.toMillis(7 * Integer.parseInt(matcher.group(1)));
+ } else if (matcher.group(2).equals("")) {
+ logger.info("TTL qualifier is empty. Defaulting to day qualifier.");
+ return TimeUnit.DAYS.toMillis(Integer.parseInt(matcher.group(1)));
+ } else {
+ logger.debug("Unknown TTL qualifier provided. Setting TTL to 0.");
+ return 0;
+ }
+ }
+ logger.info("TTL not provided. Skipping the TTL config by returning 0.");
+ return 0;
+ }
+}
diff --git a/code/flume-ng-sinks/flume-ng-elasticsearch-sink/src/main/java/org/apache/flume/sink/elasticsearch/ElasticSearchSinkConstants.java b/code/flume-ng-sinks/flume-ng-elasticsearch-sink/src/main/java/org/apache/flume/sink/elasticsearch/ElasticSearchSinkConstants.java
new file mode 100644
index 0000000..da88def
--- /dev/null
+++ b/code/flume-ng-sinks/flume-ng-elasticsearch-sink/src/main/java/org/apache/flume/sink/elasticsearch/ElasticSearchSinkConstants.java
@@ -0,0 +1,111 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.flume.sink.elasticsearch;
+
+public class ElasticSearchSinkConstants {
+
+ /**
+ * Comma separated list of hostname:port, if the port is not present the
+ * default port '9300' will be used
+ * Example:
+ *
+ * 127.0.0.1:92001,127.0.0.2:9300
+ *
+ */
+ public static final String HOSTNAMES = "hostNames";
+
+ /**
+ * The name to index the document to, defaults to 'flume'
+ * The current date in the format 'yyyy-MM-dd' will be appended to this name,
+ * for example 'foo' will result in a daily index of 'foo-yyyy-MM-dd'
+ */
+ public static final String INDEX_NAME = "indexName";
+
+ /**
+ * The type to index the document to, defaults to 'log'
+ */
+ public static final String INDEX_TYPE = "indexType";
+
+ /**
+ * Name of the ElasticSearch cluster to connect to
+ */
+ public static final String CLUSTER_NAME = "clusterName";
+
+ /**
+ * Maximum number of events the sink should take from the channel per
+ * transaction, if available. Defaults to 100
+ */
+ public static final String BATCH_SIZE = "batchSize";
+
+ /**
+ * TTL in days, when set will cause the expired documents to be deleted
+ * automatically, if not set documents will never be automatically deleted
+ */
+ public static final String TTL = "ttl";
+
+ /**
+ * The fully qualified class name of the serializer the sink should use.
+ */
+ public static final String SERIALIZER = "serializer";
+
+ /**
+ * Configuration to pass to the serializer.
+ */
+ public static final String SERIALIZER_PREFIX = SERIALIZER + ".";
+
+ /**
+ * The fully qualified class name of the index name builder the sink
+ * should use to determine name of index where the event should be sent.
+ */
+ public static final String INDEX_NAME_BUILDER = "indexNameBuilder";
+
+ /**
+ * The fully qualified class name of the index name builder the sink
+ * should use to determine name of index where the event should be sent.
+ */
+ public static final String INDEX_NAME_BUILDER_PREFIX
+ = INDEX_NAME_BUILDER + ".";
+
+ /**
+ * The client type used for sending bulks to ElasticSearch
+ */
+ public static final String CLIENT_TYPE = "client";
+
+ /**
+ * The client prefix to extract the configuration that will be passed to
+ * elasticsearch client.
+ */
+ public static final String CLIENT_PREFIX = CLIENT_TYPE + ".";
+
+ /**
+ * DEFAULTS USED BY THE SINK
+ */
+
+ public static final int DEFAULT_PORT = 9300;
+ public static final int DEFAULT_TTL = -1;
+ public static final String DEFAULT_INDEX_NAME = "flume";
+ public static final String DEFAULT_INDEX_TYPE = "log";
+ public static final String DEFAULT_CLUSTER_NAME = "elasticsearch";
+ public static final String DEFAULT_CLIENT_TYPE = "transport";
+ public static final String TTL_REGEX = "^(\\d+)(\\D*)";
+ public static final String DEFAULT_SERIALIZER_CLASS = "org.apache.flume." +
+ "sink.elasticsearch.ElasticSearchLogStashEventSerializer";
+ public static final String DEFAULT_INDEX_NAME_BUILDER_CLASS =
+ "org.apache.flume.sink.elasticsearch.TimeBasedIndexNameBuilder";
+}
diff --git a/code/flume-ng-sinks/flume-ng-elasticsearch-sink/src/main/java/org/apache/flume/sink/elasticsearch/EventSerializerIndexRequestBuilderFactory.java b/code/flume-ng-sinks/flume-ng-elasticsearch-sink/src/main/java/org/apache/flume/sink/elasticsearch/EventSerializerIndexRequestBuilderFactory.java
new file mode 100644
index 0000000..d6cca50
--- /dev/null
+++ b/code/flume-ng-sinks/flume-ng-elasticsearch-sink/src/main/java/org/apache/flume/sink/elasticsearch/EventSerializerIndexRequestBuilderFactory.java
@@ -0,0 +1,69 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.flume.sink.elasticsearch;
+
+import java.io.IOException;
+
+import org.apache.commons.lang.time.FastDateFormat;
+import org.apache.flume.Context;
+import org.apache.flume.Event;
+import org.apache.flume.conf.ComponentConfiguration;
+import org.elasticsearch.action.index.IndexRequestBuilder;
+import org.elasticsearch.common.io.BytesStream;
+
+/**
+ * Default implementation of {@link ElasticSearchIndexRequestBuilderFactory}.
+ * It serializes flume events using the
+ * {@link ElasticSearchEventSerializer} instance configured on the sink.
+ */
+public class EventSerializerIndexRequestBuilderFactory
+ extends AbstractElasticSearchIndexRequestBuilderFactory {
+
+ protected final ElasticSearchEventSerializer serializer;
+
+ public EventSerializerIndexRequestBuilderFactory(
+ ElasticSearchEventSerializer serializer) {
+ this(serializer, ElasticSearchIndexRequestBuilderFactory.df);
+ }
+
+ protected EventSerializerIndexRequestBuilderFactory(
+ ElasticSearchEventSerializer serializer, FastDateFormat fdf) {
+ super(fdf);
+ this.serializer = serializer;
+ }
+
+ @Override
+ public void configure(Context context) {
+ serializer.configure(context);
+ }
+
+ @Override
+ public void configure(ComponentConfiguration config) {
+ serializer.configure(config);
+ }
+
+ @Override
+ protected void prepareIndexRequest(IndexRequestBuilder indexRequest,
+ String indexName, String indexType, Event event) throws IOException {
+ BytesStream contentBuilder = serializer.getContentBuilder(event);
+ indexRequest.setIndex(indexName)
+ .setType(indexType)
+ .setSource(contentBuilder.bytes());
+ }
+}
diff --git a/code/flume-ng-sinks/flume-ng-elasticsearch-sink/src/main/java/org/apache/flume/sink/elasticsearch/IndexNameBuilder.java b/code/flume-ng-sinks/flume-ng-elasticsearch-sink/src/main/java/org/apache/flume/sink/elasticsearch/IndexNameBuilder.java
new file mode 100644
index 0000000..1dd4415
--- /dev/null
+++ b/code/flume-ng-sinks/flume-ng-elasticsearch-sink/src/main/java/org/apache/flume/sink/elasticsearch/IndexNameBuilder.java
@@ -0,0 +1,42 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.flume.sink.elasticsearch;
+
+import org.apache.flume.Event;
+import org.apache.flume.conf.Configurable;
+import org.apache.flume.conf.ConfigurableComponent;
+
+public interface IndexNameBuilder extends Configurable,
+ ConfigurableComponent {
+ /**
+ * Gets the name of the index to use for an index request
+ * @param event
+ * Event which determines index name
+ * @return index name of the form 'indexPrefix-indexDynamicName'
+ */
+ public String getIndexName(Event event);
+
+ /**
+ * Gets the prefix of index to use for an index request.
+ * @param event
+ * Event which determines index name
+ * @return Index prefix name
+ */
+ public String getIndexPrefix(Event event);
+}
diff --git a/code/flume-ng-sinks/flume-ng-elasticsearch-sink/src/main/java/org/apache/flume/sink/elasticsearch/SimpleIndexNameBuilder.java b/code/flume-ng-sinks/flume-ng-elasticsearch-sink/src/main/java/org/apache/flume/sink/elasticsearch/SimpleIndexNameBuilder.java
new file mode 100644
index 0000000..801cac9
--- /dev/null
+++ b/code/flume-ng-sinks/flume-ng-elasticsearch-sink/src/main/java/org/apache/flume/sink/elasticsearch/SimpleIndexNameBuilder.java
@@ -0,0 +1,46 @@
+/*
+ * Copyright 2014 Apache Software Foundation.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flume.sink.elasticsearch;
+
+import org.apache.flume.Context;
+import org.apache.flume.Event;
+import org.apache.flume.conf.ComponentConfiguration;
+import org.apache.flume.formatter.output.BucketPath;
+
+public class SimpleIndexNameBuilder implements IndexNameBuilder {
+
+ private String indexName;
+
+ @Override
+ public String getIndexName(Event event) {
+ return BucketPath.escapeString(indexName, event.getHeaders());
+ }
+
+ @Override
+ public String getIndexPrefix(Event event) {
+ return BucketPath.escapeString(indexName, event.getHeaders());
+ }
+
+ @Override
+ public void configure(Context context) {
+ indexName = context.getString(ElasticSearchSinkConstants.INDEX_NAME);
+ }
+
+ @Override
+ public void configure(ComponentConfiguration conf) {
+ }
+}
diff --git a/code/flume-ng-sinks/flume-ng-elasticsearch-sink/src/main/java/org/apache/flume/sink/elasticsearch/TimeBasedIndexNameBuilder.java b/code/flume-ng-sinks/flume-ng-elasticsearch-sink/src/main/java/org/apache/flume/sink/elasticsearch/TimeBasedIndexNameBuilder.java
new file mode 100644
index 0000000..c651732
--- /dev/null
+++ b/code/flume-ng-sinks/flume-ng-elasticsearch-sink/src/main/java/org/apache/flume/sink/elasticsearch/TimeBasedIndexNameBuilder.java
@@ -0,0 +1,92 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.flume.sink.elasticsearch;
+
+import com.google.common.annotations.VisibleForTesting;
+import org.apache.commons.lang.StringUtils;
+import org.apache.commons.lang.time.FastDateFormat;
+import org.apache.flume.Context;
+import org.apache.flume.Event;
+import org.apache.flume.conf.ComponentConfiguration;
+import org.apache.flume.formatter.output.BucketPath;
+
+import java.util.TimeZone;
+
+/**
+ * Default index name builder. It prepares name of index using configured
+ * prefix and current timestamp. Default format of name is prefix-yyyy-MM-dd".
+ */
+public class TimeBasedIndexNameBuilder implements
+ IndexNameBuilder {
+
+ public static final String DATE_FORMAT = "dateFormat";
+ public static final String TIME_ZONE = "timeZone";
+
+ public static final String DEFAULT_DATE_FORMAT = "yyyy-MM-dd";
+ public static final String DEFAULT_TIME_ZONE = "Etc/UTC";
+
+ private FastDateFormat fastDateFormat = FastDateFormat.getInstance("yyyy-MM-dd",
+ TimeZone.getTimeZone("Etc/UTC"));
+
+ private String indexPrefix;
+
+ @VisibleForTesting
+ FastDateFormat getFastDateFormat() {
+ return fastDateFormat;
+ }
+
+ /**
+ * Gets the name of the index to use for an index request
+ * @param event
+ * Event for which the name of index has to be prepared
+ * @return index name of the form 'indexPrefix-formattedTimestamp'
+ */
+ @Override
+ public String getIndexName(Event event) {
+ TimestampedEvent timestampedEvent = new TimestampedEvent(event);
+ long timestamp = timestampedEvent.getTimestamp();
+ String realIndexPrefix = BucketPath.escapeString(indexPrefix, event.getHeaders());
+ return new StringBuilder(realIndexPrefix).append('-')
+ .append(fastDateFormat.format(timestamp)).toString();
+ }
+
+ @Override
+ public String getIndexPrefix(Event event) {
+ return BucketPath.escapeString(indexPrefix, event.getHeaders());
+ }
+
+ @Override
+ public void configure(Context context) {
+ String dateFormatString = context.getString(DATE_FORMAT);
+ String timeZoneString = context.getString(TIME_ZONE);
+ if (StringUtils.isBlank(dateFormatString)) {
+ dateFormatString = DEFAULT_DATE_FORMAT;
+ }
+ if (StringUtils.isBlank(timeZoneString)) {
+ timeZoneString = DEFAULT_TIME_ZONE;
+ }
+ fastDateFormat = FastDateFormat.getInstance(dateFormatString,
+ TimeZone.getTimeZone(timeZoneString));
+ indexPrefix = context.getString(ElasticSearchSinkConstants.INDEX_NAME);
+ }
+
+ @Override
+ public void configure(ComponentConfiguration conf) {
+ }
+}
diff --git a/code/flume-ng-sinks/flume-ng-elasticsearch-sink/src/main/java/org/apache/flume/sink/elasticsearch/TimestampedEvent.java b/code/flume-ng-sinks/flume-ng-elasticsearch-sink/src/main/java/org/apache/flume/sink/elasticsearch/TimestampedEvent.java
new file mode 100644
index 0000000..c056839
--- /dev/null
+++ b/code/flume-ng-sinks/flume-ng-elasticsearch-sink/src/main/java/org/apache/flume/sink/elasticsearch/TimestampedEvent.java
@@ -0,0 +1,60 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.flume.sink.elasticsearch;
+
+import com.google.common.collect.Maps;
+import org.apache.commons.lang.StringUtils;
+import org.apache.flume.Event;
+import org.apache.flume.event.SimpleEvent;
+import org.joda.time.DateTimeUtils;
+
+import java.util.Map;
+
+/**
+ * {@link org.apache.flume.Event} implementation that has a timestamp.
+ * The timestamp is taken from (in order of precedence):
+ *
The "timestamp" header of the base event, if present
+ *
The "@timestamp" header of the base event, if present
+ *
The current time in millis, otherwise
+ *
+ */
+final class TimestampedEvent extends SimpleEvent {
+
+ private final long timestamp;
+
+ TimestampedEvent(Event base) {
+ setBody(base.getBody());
+ Map headers = Maps.newHashMap(base.getHeaders());
+ String timestampString = headers.get("timestamp");
+ if (StringUtils.isBlank(timestampString)) {
+ timestampString = headers.get("@timestamp");
+ }
+ if (StringUtils.isBlank(timestampString)) {
+ this.timestamp = DateTimeUtils.currentTimeMillis();
+ headers.put("timestamp", String.valueOf(timestamp ));
+ } else {
+ this.timestamp = Long.valueOf(timestampString);
+ }
+ setHeaders(headers);
+ }
+
+ long getTimestamp() {
+ return timestamp;
+ }
+}
diff --git a/code/flume-ng-sinks/flume-ng-elasticsearch-sink/src/main/java/org/apache/flume/sink/elasticsearch/client/ElasticSearchClient.java b/code/flume-ng-sinks/flume-ng-elasticsearch-sink/src/main/java/org/apache/flume/sink/elasticsearch/client/ElasticSearchClient.java
new file mode 100644
index 0000000..655e00a
--- /dev/null
+++ b/code/flume-ng-sinks/flume-ng-elasticsearch-sink/src/main/java/org/apache/flume/sink/elasticsearch/client/ElasticSearchClient.java
@@ -0,0 +1,58 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.flume.sink.elasticsearch.client;
+
+import org.apache.flume.Event;
+import org.apache.flume.conf.Configurable;
+import org.apache.flume.sink.elasticsearch.IndexNameBuilder;
+
+/**
+ * Interface for an ElasticSearch client which is responsible for sending bulks
+ * of events to ElasticSearch.
+ */
+public interface ElasticSearchClient extends Configurable {
+
+ /**
+ * Close connection to elastic search in client
+ */
+ void close();
+
+ /**
+ * Add new event to the bulk
+ *
+ * @param event
+ * Flume Event
+ * @param indexNameBuilder
+ * Index name builder which generates name of index to feed
+ * @param indexType
+ * Name of type of document which will be sent to the elasticsearch cluster
+ * @param ttlMs
+ * Time to live expressed in milliseconds. Value <= 0 is ignored
+ * @throws Exception
+ */
+ public void addEvent(Event event, IndexNameBuilder indexNameBuilder,
+ String indexType, long ttlMs) throws Exception;
+
+ /**
+ * Sends bulk to the elasticsearch cluster
+ *
+ * @throws Exception
+ */
+ void execute() throws Exception;
+}
diff --git a/code/flume-ng-sinks/flume-ng-elasticsearch-sink/src/main/java/org/apache/flume/sink/elasticsearch/client/ElasticSearchClientFactory.java b/code/flume-ng-sinks/flume-ng-elasticsearch-sink/src/main/java/org/apache/flume/sink/elasticsearch/client/ElasticSearchClientFactory.java
new file mode 100644
index 0000000..986fb2b
--- /dev/null
+++ b/code/flume-ng-sinks/flume-ng-elasticsearch-sink/src/main/java/org/apache/flume/sink/elasticsearch/client/ElasticSearchClientFactory.java
@@ -0,0 +1,78 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.flume.sink.elasticsearch.client;
+
+import org.apache.flume.sink.elasticsearch.ElasticSearchEventSerializer;
+import org.apache.flume.sink.elasticsearch.ElasticSearchIndexRequestBuilderFactory;
+
+/**
+ * Internal ElasticSearch client factory. Responsible for creating instance
+ * of ElasticSearch clients.
+ */
+public class ElasticSearchClientFactory {
+ public static final String TransportClient = "transport";
+ public static final String RestClient = "rest";
+
+ /**
+ *
+ * @param clientType
+ * String representation of client type
+ * @param hostNames
+ * Array of strings that represents hostnames with ports (hostname:port)
+ * @param clusterName
+ * Elasticsearch cluster name used only by Transport Client
+ * @param serializer
+ * Serializer of flume events to elasticsearch documents
+ * @return
+ */
+ public ElasticSearchClient getClient(String clientType, String[] hostNames,
+ String clusterName, ElasticSearchEventSerializer serializer,
+ ElasticSearchIndexRequestBuilderFactory indexBuilder) throws NoSuchClientTypeException {
+ if (clientType.equalsIgnoreCase(TransportClient) && serializer != null) {
+ return new ElasticSearchTransportClient(hostNames, clusterName, serializer);
+ } else if (clientType.equalsIgnoreCase(TransportClient) && indexBuilder != null) {
+ return new ElasticSearchTransportClient(hostNames, clusterName, indexBuilder);
+ } else if (clientType.equalsIgnoreCase(RestClient) && serializer != null) {
+ return new ElasticSearchRestClient(hostNames, serializer);
+ }
+ throw new NoSuchClientTypeException();
+ }
+
+ /**
+ * Used for tests only. Creates local elasticsearch instance client.
+ *
+ * @param clientType Name of client to use
+ * @param serializer Serializer for the event
+ * @param indexBuilder Index builder factory
+ *
+ * @return Local elastic search instance client
+ */
+ public ElasticSearchClient getLocalClient(String clientType,
+ ElasticSearchEventSerializer serializer,
+ ElasticSearchIndexRequestBuilderFactory indexBuilder)
+ throws NoSuchClientTypeException {
+ if (clientType.equalsIgnoreCase(TransportClient) && serializer != null) {
+ return new ElasticSearchTransportClient(serializer);
+ } else if (clientType.equalsIgnoreCase(TransportClient) && indexBuilder != null) {
+ return new ElasticSearchTransportClient(indexBuilder);
+ } else if (clientType.equalsIgnoreCase(RestClient)) {
+ }
+ throw new NoSuchClientTypeException();
+ }
+}
diff --git a/code/flume-ng-sinks/flume-ng-elasticsearch-sink/src/main/java/org/apache/flume/sink/elasticsearch/client/ElasticSearchRestClient.java b/code/flume-ng-sinks/flume-ng-elasticsearch-sink/src/main/java/org/apache/flume/sink/elasticsearch/client/ElasticSearchRestClient.java
new file mode 100644
index 0000000..e51efe2
--- /dev/null
+++ b/code/flume-ng-sinks/flume-ng-elasticsearch-sink/src/main/java/org/apache/flume/sink/elasticsearch/client/ElasticSearchRestClient.java
@@ -0,0 +1,149 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.flume.sink.elasticsearch.client;
+
+import com.google.common.annotations.VisibleForTesting;
+import com.google.gson.Gson;
+import org.apache.flume.Context;
+import org.apache.flume.Event;
+import org.apache.flume.EventDeliveryException;
+import org.apache.flume.sink.elasticsearch.ElasticSearchEventSerializer;
+import org.apache.flume.sink.elasticsearch.IndexNameBuilder;
+import org.apache.http.HttpResponse;
+import org.apache.http.HttpStatus;
+import org.apache.http.client.HttpClient;
+import org.apache.http.client.methods.HttpPost;
+import org.apache.http.entity.StringEntity;
+import org.apache.http.impl.client.DefaultHttpClient;
+import org.apache.http.util.EntityUtils;
+import org.elasticsearch.common.bytes.BytesReference;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.Map;
+
+/**
+ * Rest ElasticSearch client which is responsible for sending bulks of events to
+ * ElasticSearch using ElasticSearch HTTP API. This is configurable, so any
+ * config params required should be taken through this.
+ */
+public class ElasticSearchRestClient implements ElasticSearchClient {
+
+ private static final String INDEX_OPERATION_NAME = "index";
+ private static final String INDEX_PARAM = "_index";
+ private static final String TYPE_PARAM = "_type";
+ private static final String TTL_PARAM = "_ttl";
+ private static final String BULK_ENDPOINT = "_bulk";
+
+ private static final Logger logger = LoggerFactory.getLogger(ElasticSearchRestClient.class);
+
+ private final ElasticSearchEventSerializer serializer;
+ private final RoundRobinList serversList;
+
+ private StringBuilder bulkBuilder;
+ private HttpClient httpClient;
+
+ public ElasticSearchRestClient(String[] hostNames,
+ ElasticSearchEventSerializer serializer) {
+
+ for (int i = 0; i < hostNames.length; ++i) {
+ if (!hostNames[i].contains("http://") && !hostNames[i].contains("https://")) {
+ hostNames[i] = "http://" + hostNames[i];
+ }
+ }
+ this.serializer = serializer;
+
+ serversList = new RoundRobinList(Arrays.asList(hostNames));
+ httpClient = new DefaultHttpClient();
+ bulkBuilder = new StringBuilder();
+ }
+
+ @VisibleForTesting
+ public ElasticSearchRestClient(String[] hostNames,
+ ElasticSearchEventSerializer serializer, HttpClient client) {
+ this(hostNames, serializer);
+ httpClient = client;
+ }
+
+ @Override
+ public void configure(Context context) {
+ }
+
+ @Override
+ public void close() {
+ }
+
+ @Override
+ public void addEvent(Event event, IndexNameBuilder indexNameBuilder, String indexType,
+ long ttlMs) throws Exception {
+ BytesReference content = serializer.getContentBuilder(event).bytes();
+ Map> parameters = new HashMap>();
+ Map indexParameters = new HashMap();
+ indexParameters.put(INDEX_PARAM, indexNameBuilder.getIndexName(event));
+ indexParameters.put(TYPE_PARAM, indexType);
+ if (ttlMs > 0) {
+ indexParameters.put(TTL_PARAM, Long.toString(ttlMs));
+ }
+ parameters.put(INDEX_OPERATION_NAME, indexParameters);
+
+ Gson gson = new Gson();
+ synchronized (bulkBuilder) {
+ bulkBuilder.append(gson.toJson(parameters));
+ bulkBuilder.append("\n");
+ bulkBuilder.append(content.toBytesArray().toUtf8());
+ bulkBuilder.append("\n");
+ }
+ }
+
+ @Override
+ public void execute() throws Exception {
+ int statusCode = 0, triesCount = 0;
+ HttpResponse response = null;
+ String entity;
+ synchronized (bulkBuilder) {
+ entity = bulkBuilder.toString();
+ bulkBuilder = new StringBuilder();
+ }
+
+ while (statusCode != HttpStatus.SC_OK && triesCount < serversList.size()) {
+ triesCount++;
+ String host = serversList.get();
+ String url = host + "/" + BULK_ENDPOINT;
+ HttpPost httpRequest = new HttpPost(url);
+ httpRequest.setEntity(new StringEntity(entity));
+ response = httpClient.execute(httpRequest);
+ statusCode = response.getStatusLine().getStatusCode();
+ logger.info("Status code from elasticsearch: " + statusCode);
+ if (response.getEntity() != null) {
+ logger.debug("Status message from elasticsearch: " +
+ EntityUtils.toString(response.getEntity(), "UTF-8"));
+ }
+ }
+
+ if (statusCode != HttpStatus.SC_OK) {
+ if (response.getEntity() != null) {
+ throw new EventDeliveryException(EntityUtils.toString(response.getEntity(), "UTF-8"));
+ } else {
+ throw new EventDeliveryException("Elasticsearch status code was: " + statusCode);
+ }
+ }
+ }
+}
diff --git a/code/flume-ng-sinks/flume-ng-elasticsearch-sink/src/main/java/org/apache/flume/sink/elasticsearch/client/ElasticSearchTransportClient.java b/code/flume-ng-sinks/flume-ng-elasticsearch-sink/src/main/java/org/apache/flume/sink/elasticsearch/client/ElasticSearchTransportClient.java
new file mode 100644
index 0000000..2cf365e
--- /dev/null
+++ b/code/flume-ng-sinks/flume-ng-elasticsearch-sink/src/main/java/org/apache/flume/sink/elasticsearch/client/ElasticSearchTransportClient.java
@@ -0,0 +1,228 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.flume.sink.elasticsearch.client;
+
+import com.google.common.annotations.VisibleForTesting;
+import java.io.IOException;
+import org.apache.flume.Context;
+import org.apache.flume.Event;
+import org.apache.flume.EventDeliveryException;
+import org.apache.flume.sink.elasticsearch.ElasticSearchEventSerializer;
+import org.apache.flume.sink.elasticsearch.IndexNameBuilder;
+import org.elasticsearch.action.bulk.BulkRequestBuilder;
+import org.elasticsearch.action.bulk.BulkResponse;
+import org.elasticsearch.action.index.IndexRequestBuilder;
+import org.elasticsearch.client.Client;
+import org.elasticsearch.client.transport.TransportClient;
+import org.elasticsearch.common.settings.ImmutableSettings;
+import org.elasticsearch.common.settings.Settings;
+import org.elasticsearch.common.transport.InetSocketTransportAddress;
+import org.elasticsearch.node.Node;
+import org.elasticsearch.node.NodeBuilder;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.util.Arrays;
+import org.apache.flume.sink.elasticsearch.ElasticSearchIndexRequestBuilderFactory;
+
+import static org.apache.flume.sink.elasticsearch.ElasticSearchSinkConstants.DEFAULT_PORT;
+
+public class ElasticSearchTransportClient implements ElasticSearchClient {
+
+ public static final Logger logger = LoggerFactory
+ .getLogger(ElasticSearchTransportClient.class);
+
+ private InetSocketTransportAddress[] serverAddresses;
+ private ElasticSearchEventSerializer serializer;
+ private ElasticSearchIndexRequestBuilderFactory indexRequestBuilderFactory;
+ private BulkRequestBuilder bulkRequestBuilder;
+
+ private Client client;
+
+ @VisibleForTesting
+ InetSocketTransportAddress[] getServerAddresses() {
+ return serverAddresses;
+ }
+
+ @VisibleForTesting
+ void setBulkRequestBuilder(BulkRequestBuilder bulkRequestBuilder) {
+ this.bulkRequestBuilder = bulkRequestBuilder;
+ }
+
+ /**
+ * Transport client for external cluster
+ *
+ * @param hostNames
+ * @param clusterName
+ * @param serializer
+ */
+ public ElasticSearchTransportClient(String[] hostNames, String clusterName,
+ ElasticSearchEventSerializer serializer) {
+ configureHostnames(hostNames);
+ this.serializer = serializer;
+ openClient(clusterName);
+ }
+
+ public ElasticSearchTransportClient(String[] hostNames, String clusterName,
+ ElasticSearchIndexRequestBuilderFactory indexBuilder) {
+ configureHostnames(hostNames);
+ this.indexRequestBuilderFactory = indexBuilder;
+ openClient(clusterName);
+ }
+
+ /**
+ * Local transport client only for testing
+ *
+ * @param indexBuilderFactory
+ */
+ public ElasticSearchTransportClient(ElasticSearchIndexRequestBuilderFactory indexBuilderFactory) {
+ this.indexRequestBuilderFactory = indexBuilderFactory;
+ openLocalDiscoveryClient();
+ }
+
+ /**
+ * Local transport client only for testing
+ *
+ * @param serializer
+ */
+ public ElasticSearchTransportClient(ElasticSearchEventSerializer serializer) {
+ this.serializer = serializer;
+ openLocalDiscoveryClient();
+ }
+
+ /**
+ * Used for testing
+ *
+ * @param client
+ * ElasticSearch Client
+ * @param serializer
+ * Event Serializer
+ */
+ public ElasticSearchTransportClient(Client client,
+ ElasticSearchEventSerializer serializer) {
+ this.client = client;
+ this.serializer = serializer;
+ }
+
+ /**
+ * Used for testing
+ */
+ public ElasticSearchTransportClient(Client client,
+ ElasticSearchIndexRequestBuilderFactory requestBuilderFactory)
+ throws IOException {
+ this.client = client;
+ requestBuilderFactory.createIndexRequest(client, null, null, null);
+ }
+
+ private void configureHostnames(String[] hostNames) {
+ logger.warn(Arrays.toString(hostNames));
+ serverAddresses = new InetSocketTransportAddress[hostNames.length];
+ for (int i = 0; i < hostNames.length; i++) {
+ String[] hostPort = hostNames[i].trim().split(":");
+ String host = hostPort[0].trim();
+ int port = hostPort.length == 2 ? Integer.parseInt(hostPort[1].trim())
+ : DEFAULT_PORT;
+ serverAddresses[i] = new InetSocketTransportAddress(host, port);
+ }
+ }
+
+ @Override
+ public void close() {
+ if (client != null) {
+ client.close();
+ }
+ client = null;
+ }
+
+ @Override
+ public void addEvent(Event event, IndexNameBuilder indexNameBuilder,
+ String indexType, long ttlMs) throws Exception {
+ if (bulkRequestBuilder == null) {
+ bulkRequestBuilder = client.prepareBulk();
+ }
+
+ IndexRequestBuilder indexRequestBuilder = null;
+ if (indexRequestBuilderFactory == null) {
+ indexRequestBuilder = client
+ .prepareIndex(indexNameBuilder.getIndexName(event), indexType)
+ .setSource(serializer.getContentBuilder(event).bytes());
+ } else {
+ indexRequestBuilder = indexRequestBuilderFactory.createIndexRequest(
+ client, indexNameBuilder.getIndexPrefix(event), indexType, event);
+ }
+
+ if (ttlMs > 0) {
+ indexRequestBuilder.setTTL(ttlMs);
+ }
+ bulkRequestBuilder.add(indexRequestBuilder);
+ }
+
+ @Override
+ public void execute() throws Exception {
+ try {
+ BulkResponse bulkResponse = bulkRequestBuilder.execute().actionGet();
+ if (bulkResponse.hasFailures()) {
+ throw new EventDeliveryException(bulkResponse.buildFailureMessage());
+ }
+ } finally {
+ bulkRequestBuilder = client.prepareBulk();
+ }
+ }
+
+ /**
+ * Open client to elaticsearch cluster
+ *
+ * @param clusterName
+ */
+ private void openClient(String clusterName) {
+ logger.info("Using ElasticSearch hostnames: {} ",
+ Arrays.toString(serverAddresses));
+ Settings settings = ImmutableSettings.settingsBuilder()
+ .put("cluster.name", clusterName).build();
+
+ TransportClient transportClient = new TransportClient(settings);
+ for (InetSocketTransportAddress host : serverAddresses) {
+ transportClient.addTransportAddress(host);
+ }
+ if (client != null) {
+ client.close();
+ }
+ client = transportClient;
+ }
+
+ /*
+ * FOR TESTING ONLY...
+ *
+ * Opens a local discovery node for talking to an elasticsearch server running
+ * in the same JVM
+ */
+ private void openLocalDiscoveryClient() {
+ logger.info("Using ElasticSearch AutoDiscovery mode");
+ Node node = NodeBuilder.nodeBuilder().client(true).local(true).node();
+ if (client != null) {
+ client.close();
+ }
+ client = node.client();
+ }
+
+ @Override
+ public void configure(Context context) {
+ //To change body of implemented methods use File | Settings | File Templates.
+ }
+}
diff --git a/code/flume-ng-sinks/flume-ng-elasticsearch-sink/src/main/java/org/apache/flume/sink/elasticsearch/client/NoSuchClientTypeException.java b/code/flume-ng-sinks/flume-ng-elasticsearch-sink/src/main/java/org/apache/flume/sink/elasticsearch/client/NoSuchClientTypeException.java
new file mode 100644
index 0000000..41fbe0d
--- /dev/null
+++ b/code/flume-ng-sinks/flume-ng-elasticsearch-sink/src/main/java/org/apache/flume/sink/elasticsearch/client/NoSuchClientTypeException.java
@@ -0,0 +1,23 @@
+/*
+ * Copyright 2014 Apache Software Foundation.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flume.sink.elasticsearch.client;
+
+/**
+ * Exception class
+ */
+class NoSuchClientTypeException extends Exception {
+}
diff --git a/code/flume-ng-sinks/flume-ng-elasticsearch-sink/src/main/java/org/apache/flume/sink/elasticsearch/client/RoundRobinList.java b/code/flume-ng-sinks/flume-ng-elasticsearch-sink/src/main/java/org/apache/flume/sink/elasticsearch/client/RoundRobinList.java
new file mode 100644
index 0000000..4cbbe91
--- /dev/null
+++ b/code/flume-ng-sinks/flume-ng-elasticsearch-sink/src/main/java/org/apache/flume/sink/elasticsearch/client/RoundRobinList.java
@@ -0,0 +1,44 @@
+package org.apache.flume.sink.elasticsearch.client;
+
+import java.util.Collection;
+import java.util.Iterator;
+
+/*
+ * Copyright 2014 Apache Software Foundation.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class RoundRobinList {
+
+ private Iterator iterator;
+ private final Collection elements;
+
+ public RoundRobinList(Collection elements) {
+ this.elements = elements;
+ iterator = this.elements.iterator();
+ }
+
+ public synchronized T get() {
+ if (iterator.hasNext()) {
+ return iterator.next();
+ } else {
+ iterator = elements.iterator();
+ return iterator.next();
+ }
+ }
+
+ public int size() {
+ return elements.size();
+ }
+}
diff --git a/code/flume-ng-sinks/flume-ng-elasticsearch-sink/src/test/java/org/apache/flume/sink/elasticsearch/AbstractElasticSearchSinkTest.java b/code/flume-ng-sinks/flume-ng-elasticsearch-sink/src/test/java/org/apache/flume/sink/elasticsearch/AbstractElasticSearchSinkTest.java
new file mode 100644
index 0000000..9fbd747
--- /dev/null
+++ b/code/flume-ng-sinks/flume-ng-elasticsearch-sink/src/test/java/org/apache/flume/sink/elasticsearch/AbstractElasticSearchSinkTest.java
@@ -0,0 +1,164 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.flume.sink.elasticsearch;
+
+import org.apache.flume.Channel;
+import org.apache.flume.Context;
+import org.apache.flume.Event;
+import org.apache.flume.channel.MemoryChannel;
+import org.apache.flume.conf.Configurables;
+import org.elasticsearch.action.search.SearchResponse;
+import org.elasticsearch.client.Client;
+import org.elasticsearch.common.collect.Maps;
+import org.elasticsearch.common.settings.ImmutableSettings;
+import org.elasticsearch.common.settings.Settings;
+import org.elasticsearch.gateway.Gateway;
+import org.elasticsearch.index.query.QueryBuilder;
+import org.elasticsearch.index.query.QueryBuilders;
+import org.elasticsearch.node.Node;
+import org.elasticsearch.node.NodeBuilder;
+import org.elasticsearch.node.internal.InternalNode;
+import org.elasticsearch.search.SearchHit;
+import org.elasticsearch.search.SearchHits;
+import org.joda.time.DateTimeUtils;
+import org.junit.After;
+import org.junit.Before;
+
+import java.util.Arrays;
+import java.util.Comparator;
+import java.util.Map;
+
+import static org.apache.flume.sink.elasticsearch.ElasticSearchSinkConstants.BATCH_SIZE;
+import static org.apache.flume.sink.elasticsearch.ElasticSearchSinkConstants.CLUSTER_NAME;
+import static org.apache.flume.sink.elasticsearch.ElasticSearchSinkConstants.INDEX_NAME;
+import static org.apache.flume.sink.elasticsearch.ElasticSearchSinkConstants.INDEX_TYPE;
+import static org.apache.flume.sink.elasticsearch.ElasticSearchSinkConstants.TTL;
+import static org.junit.Assert.assertEquals;
+
+public abstract class AbstractElasticSearchSinkTest {
+
+ static final String DEFAULT_INDEX_NAME = "flume";
+ static final String DEFAULT_INDEX_TYPE = "log";
+ static final String DEFAULT_CLUSTER_NAME = "elasticsearch";
+ static final long FIXED_TIME_MILLIS = 123456789L;
+
+ Node node;
+ Client client;
+ String timestampedIndexName;
+ Map parameters;
+
+ void initDefaults() {
+ parameters = Maps.newHashMap();
+ parameters.put(INDEX_NAME, DEFAULT_INDEX_NAME);
+ parameters.put(INDEX_TYPE, DEFAULT_INDEX_TYPE);
+ parameters.put(CLUSTER_NAME, DEFAULT_CLUSTER_NAME);
+ parameters.put(BATCH_SIZE, "1");
+ parameters.put(TTL, "5");
+
+ timestampedIndexName = DEFAULT_INDEX_NAME + '-'
+ + ElasticSearchIndexRequestBuilderFactory.df.format(FIXED_TIME_MILLIS);
+ }
+
+ void createNodes() throws Exception {
+ Settings settings = ImmutableSettings
+ .settingsBuilder()
+ .put("number_of_shards", 1)
+ .put("number_of_replicas", 0)
+ .put("routing.hash.type", "simple")
+ .put("gateway.type", "none")
+ .put("path.data", "target/es-test")
+ .build();
+
+ node = NodeBuilder.nodeBuilder().settings(settings).local(true).node();
+ client = node.client();
+
+ client.admin().cluster().prepareHealth().setWaitForGreenStatus().execute()
+ .actionGet();
+ }
+
+ void shutdownNodes() throws Exception {
+ ((InternalNode) node).injector().getInstance(Gateway.class).reset();
+ client.close();
+ node.close();
+ }
+
+ @Before
+ public void setFixedJodaTime() {
+ DateTimeUtils.setCurrentMillisFixed(FIXED_TIME_MILLIS);
+ }
+
+ @After
+ public void resetJodaTime() {
+ DateTimeUtils.setCurrentMillisSystem();
+ }
+
+ Channel bindAndStartChannel(ElasticSearchSink fixture) {
+ // Configure the channel
+ Channel channel = new MemoryChannel();
+ Configurables.configure(channel, new Context());
+
+ // Wire them together
+ fixture.setChannel(channel);
+ fixture.start();
+ return channel;
+ }
+
+ void assertMatchAllQuery(int expectedHits, Event... events) {
+ assertSearch(expectedHits, performSearch(QueryBuilders.matchAllQuery()),
+ null, events);
+ }
+
+ void assertBodyQuery(int expectedHits, Event... events) {
+ // Perform Multi Field Match
+ assertSearch(expectedHits,
+ performSearch(QueryBuilders.fieldQuery("@message", "event")),
+ null, events);
+ }
+
+ SearchResponse performSearch(QueryBuilder query) {
+ return client.prepareSearch(timestampedIndexName)
+ .setTypes(DEFAULT_INDEX_TYPE).setQuery(query).execute().actionGet();
+ }
+
+ void assertSearch(int expectedHits, SearchResponse response, Map expectedBody,
+ Event... events) {
+ SearchHits hitResponse = response.getHits();
+ assertEquals(expectedHits, hitResponse.getTotalHits());
+
+ SearchHit[] hits = hitResponse.getHits();
+ Arrays.sort(hits, new Comparator() {
+ @Override
+ public int compare(SearchHit o1, SearchHit o2) {
+ return o1.getSourceAsString().compareTo(o2.getSourceAsString());
+ }
+ });
+
+ for (int i = 0; i < events.length; i++) {
+ Event event = events[i];
+ SearchHit hit = hits[i];
+ Map source = hit.getSource();
+ if (expectedBody == null) {
+ assertEquals(new String(event.getBody()), source.get("@message"));
+ } else {
+ assertEquals(expectedBody, source.get("@message"));
+ }
+ }
+ }
+
+}
diff --git a/code/flume-ng-sinks/flume-ng-elasticsearch-sink/src/test/java/org/apache/flume/sink/elasticsearch/TestElasticSearchDynamicSerializer.java b/code/flume-ng-sinks/flume-ng-elasticsearch-sink/src/test/java/org/apache/flume/sink/elasticsearch/TestElasticSearchDynamicSerializer.java
new file mode 100644
index 0000000..d4e4654
--- /dev/null
+++ b/code/flume-ng-sinks/flume-ng-elasticsearch-sink/src/test/java/org/apache/flume/sink/elasticsearch/TestElasticSearchDynamicSerializer.java
@@ -0,0 +1,64 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.flume.sink.elasticsearch;
+
+import org.apache.flume.Context;
+import org.apache.flume.Event;
+import org.apache.flume.event.EventBuilder;
+import org.elasticsearch.common.collect.Maps;
+import org.elasticsearch.common.xcontent.XContentBuilder;
+import org.junit.Test;
+
+import java.util.Map;
+
+import static org.apache.flume.sink.elasticsearch.ElasticSearchEventSerializer.charset;
+import static org.elasticsearch.common.xcontent.XContentFactory.jsonBuilder;
+import static org.junit.Assert.assertEquals;
+
+public class TestElasticSearchDynamicSerializer {
+
+ @Test
+ public void testRoundTrip() throws Exception {
+ ElasticSearchDynamicSerializer fixture = new ElasticSearchDynamicSerializer();
+ Context context = new Context();
+ fixture.configure(context);
+
+ String message = "test body";
+ Map headers = Maps.newHashMap();
+ headers.put("headerNameOne", "headerValueOne");
+ headers.put("headerNameTwo", "headerValueTwo");
+ headers.put("headerNameThree", "headerValueThree");
+ Event event = EventBuilder.withBody(message.getBytes(charset));
+ event.setHeaders(headers);
+
+ XContentBuilder expected = jsonBuilder().startObject();
+ expected.field("body", new String(message.getBytes(), charset));
+ for (String headerName : headers.keySet()) {
+ expected.field(headerName, new String(headers.get(headerName).getBytes(),
+ charset));
+ }
+ expected.endObject();
+
+ XContentBuilder actual = fixture.getContentBuilder(event);
+
+ assertEquals(new String(expected.bytes().array()), new String(actual
+ .bytes().array()));
+
+ }
+}
diff --git a/code/flume-ng-sinks/flume-ng-elasticsearch-sink/src/test/java/org/apache/flume/sink/elasticsearch/TestElasticSearchIndexRequestBuilderFactory.java b/code/flume-ng-sinks/flume-ng-elasticsearch-sink/src/test/java/org/apache/flume/sink/elasticsearch/TestElasticSearchIndexRequestBuilderFactory.java
new file mode 100644
index 0000000..b62254e
--- /dev/null
+++ b/code/flume-ng-sinks/flume-ng-elasticsearch-sink/src/test/java/org/apache/flume/sink/elasticsearch/TestElasticSearchIndexRequestBuilderFactory.java
@@ -0,0 +1,215 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.flume.sink.elasticsearch;
+
+import com.google.common.collect.Maps;
+import org.apache.flume.Context;
+import org.apache.flume.Event;
+import org.apache.flume.conf.ComponentConfiguration;
+import org.apache.flume.conf.sink.SinkConfiguration;
+import org.apache.flume.event.SimpleEvent;
+import org.elasticsearch.action.index.IndexRequestBuilder;
+import org.elasticsearch.client.Client;
+import org.elasticsearch.common.io.BytesStream;
+import org.elasticsearch.common.io.FastByteArrayOutputStream;
+import org.junit.Before;
+import org.junit.Test;
+
+import java.io.IOException;
+import java.util.Map;
+
+import static org.junit.Assert.assertArrayEquals;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertNull;
+import static org.junit.Assert.assertTrue;
+
+public class TestElasticSearchIndexRequestBuilderFactory
+ extends AbstractElasticSearchSinkTest {
+
+ private static final Client FAKE_CLIENT = null;
+
+ private EventSerializerIndexRequestBuilderFactory factory;
+
+ private FakeEventSerializer serializer;
+
+ @Before
+ public void setupFactory() throws Exception {
+ serializer = new FakeEventSerializer();
+ factory = new EventSerializerIndexRequestBuilderFactory(serializer) {
+ @Override
+ IndexRequestBuilder prepareIndex(Client client) {
+ return new IndexRequestBuilder(FAKE_CLIENT);
+ }
+ };
+ }
+
+ @Test
+ public void shouldUseUtcAsBasisForDateFormat() {
+ assertEquals("Coordinated Universal Time",
+ factory.fastDateFormat.getTimeZone().getDisplayName());
+ }
+
+ @Test
+ public void indexNameShouldBePrefixDashFormattedTimestamp() {
+ long millis = 987654321L;
+ assertEquals("prefix-" + factory.fastDateFormat.format(millis),
+ factory.getIndexName("prefix", millis));
+ }
+
+ @Test
+ public void shouldEnsureTimestampHeaderPresentInTimestampedEvent() {
+ SimpleEvent base = new SimpleEvent();
+
+ TimestampedEvent timestampedEvent = new TimestampedEvent(base);
+ assertEquals(FIXED_TIME_MILLIS, timestampedEvent.getTimestamp());
+ assertEquals(String.valueOf(FIXED_TIME_MILLIS),
+ timestampedEvent.getHeaders().get("timestamp"));
+ }
+
+ @Test
+ public void shouldUseExistingTimestampHeaderInTimestampedEvent() {
+ SimpleEvent base = new SimpleEvent();
+ Map headersWithTimestamp = Maps.newHashMap();
+ headersWithTimestamp.put("timestamp", "-321");
+ base.setHeaders(headersWithTimestamp );
+
+ TimestampedEvent timestampedEvent = new TimestampedEvent(base);
+ assertEquals(-321L, timestampedEvent.getTimestamp());
+ assertEquals("-321", timestampedEvent.getHeaders().get("timestamp"));
+ }
+
+ @Test
+ public void shouldUseExistingAtTimestampHeaderInTimestampedEvent() {
+ SimpleEvent base = new SimpleEvent();
+ Map headersWithTimestamp = Maps.newHashMap();
+ headersWithTimestamp.put("@timestamp", "-999");
+ base.setHeaders(headersWithTimestamp );
+
+ TimestampedEvent timestampedEvent = new TimestampedEvent(base);
+ assertEquals(-999L, timestampedEvent.getTimestamp());
+ assertEquals("-999", timestampedEvent.getHeaders().get("@timestamp"));
+ assertNull(timestampedEvent.getHeaders().get("timestamp"));
+ }
+
+ @Test
+ public void shouldPreserveBodyAndNonTimestampHeadersInTimestampedEvent() {
+ SimpleEvent base = new SimpleEvent();
+ base.setBody(new byte[] {1,2,3,4});
+ Map headersWithTimestamp = Maps.newHashMap();
+ headersWithTimestamp.put("foo", "bar");
+ base.setHeaders(headersWithTimestamp );
+
+ TimestampedEvent timestampedEvent = new TimestampedEvent(base);
+ assertEquals("bar", timestampedEvent.getHeaders().get("foo"));
+ assertArrayEquals(base.getBody(), timestampedEvent.getBody());
+ }
+
+ @Test
+ public void shouldSetIndexNameTypeAndSerializedEventIntoIndexRequest()
+ throws Exception {
+
+ String indexPrefix = "qwerty";
+ String indexType = "uiop";
+ Event event = new SimpleEvent();
+
+ IndexRequestBuilder indexRequestBuilder = factory.createIndexRequest(
+ FAKE_CLIENT, indexPrefix, indexType, event);
+
+ assertEquals(indexPrefix + '-'
+ + ElasticSearchIndexRequestBuilderFactory.df.format(FIXED_TIME_MILLIS),
+ indexRequestBuilder.request().index());
+ assertEquals(indexType, indexRequestBuilder.request().type());
+ assertArrayEquals(FakeEventSerializer.FAKE_BYTES,
+ indexRequestBuilder.request().source().array());
+ }
+
+ @Test
+ public void shouldSetIndexNameFromTimestampHeaderWhenPresent()
+ throws Exception {
+ String indexPrefix = "qwerty";
+ String indexType = "uiop";
+ Event event = new SimpleEvent();
+ event.getHeaders().put("timestamp", "1213141516");
+
+ IndexRequestBuilder indexRequestBuilder = factory.createIndexRequest(
+ null, indexPrefix, indexType, event);
+
+ assertEquals(indexPrefix + '-'
+ + ElasticSearchIndexRequestBuilderFactory.df.format(1213141516L),
+ indexRequestBuilder.request().index());
+ }
+
+ @Test
+ public void shouldSetIndexNameTypeFromHeaderWhenPresent()
+ throws Exception {
+ String indexPrefix = "%{index-name}";
+ String indexType = "%{index-type}";
+ String indexValue = "testing-index-name-from-headers";
+ String typeValue = "testing-index-type-from-headers";
+
+ Event event = new SimpleEvent();
+ event.getHeaders().put("index-name", indexValue);
+ event.getHeaders().put("index-type", typeValue);
+
+ IndexRequestBuilder indexRequestBuilder = factory.createIndexRequest(
+ null, indexPrefix, indexType, event);
+
+ assertEquals(indexValue + '-'
+ + ElasticSearchIndexRequestBuilderFactory.df.format(FIXED_TIME_MILLIS),
+ indexRequestBuilder.request().index());
+ assertEquals(typeValue, indexRequestBuilder.request().type());
+ }
+
+ @Test
+ public void shouldConfigureEventSerializer() throws Exception {
+ assertFalse(serializer.configuredWithContext);
+ factory.configure(new Context());
+ assertTrue(serializer.configuredWithContext);
+
+ assertFalse(serializer.configuredWithComponentConfiguration);
+ factory.configure(new SinkConfiguration("name"));
+ assertTrue(serializer.configuredWithComponentConfiguration);
+ }
+
+ static class FakeEventSerializer implements ElasticSearchEventSerializer {
+
+ static final byte[] FAKE_BYTES = new byte[]{9, 8, 7, 6};
+ boolean configuredWithContext;
+ boolean configuredWithComponentConfiguration;
+
+ @Override
+ public BytesStream getContentBuilder(Event event) throws IOException {
+ FastByteArrayOutputStream fbaos = new FastByteArrayOutputStream(4);
+ fbaos.write(FAKE_BYTES);
+ return fbaos;
+ }
+
+ @Override
+ public void configure(Context arg0) {
+ configuredWithContext = true;
+ }
+
+ @Override
+ public void configure(ComponentConfiguration arg0) {
+ configuredWithComponentConfiguration = true;
+ }
+ }
+
+}
diff --git a/code/flume-ng-sinks/flume-ng-elasticsearch-sink/src/test/java/org/apache/flume/sink/elasticsearch/TestElasticSearchLogStashEventSerializer.java b/code/flume-ng-sinks/flume-ng-elasticsearch-sink/src/test/java/org/apache/flume/sink/elasticsearch/TestElasticSearchLogStashEventSerializer.java
new file mode 100644
index 0000000..65b4dab
--- /dev/null
+++ b/code/flume-ng-sinks/flume-ng-elasticsearch-sink/src/test/java/org/apache/flume/sink/elasticsearch/TestElasticSearchLogStashEventSerializer.java
@@ -0,0 +1,127 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.flume.sink.elasticsearch;
+
+import com.google.gson.JsonParser;
+import org.apache.flume.Context;
+import org.apache.flume.Event;
+import org.apache.flume.event.EventBuilder;
+import org.elasticsearch.common.collect.Maps;
+import org.elasticsearch.common.xcontent.XContentBuilder;
+import org.junit.Test;
+
+import java.util.Date;
+import java.util.Map;
+
+import static org.apache.flume.sink.elasticsearch.ElasticSearchEventSerializer.charset;
+import static org.elasticsearch.common.xcontent.XContentFactory.jsonBuilder;
+import static org.junit.Assert.assertEquals;
+
+public class TestElasticSearchLogStashEventSerializer {
+
+ @Test
+ public void testRoundTrip() throws Exception {
+ ElasticSearchLogStashEventSerializer fixture = new ElasticSearchLogStashEventSerializer();
+ Context context = new Context();
+ fixture.configure(context);
+
+ String message = "test body";
+ Map headers = Maps.newHashMap();
+ long timestamp = System.currentTimeMillis();
+ headers.put("timestamp", String.valueOf(timestamp));
+ headers.put("source", "flume_tail_src");
+ headers.put("host", "test@localhost");
+ headers.put("src_path", "/tmp/test");
+ headers.put("headerNameOne", "headerValueOne");
+ headers.put("headerNameTwo", "headerValueTwo");
+ headers.put("type", "sometype");
+ Event event = EventBuilder.withBody(message.getBytes(charset));
+ event.setHeaders(headers);
+
+ XContentBuilder expected = jsonBuilder().startObject();
+ expected.field("@message", new String(message.getBytes(), charset));
+ expected.field("@timestamp", new Date(timestamp));
+ expected.field("@source", "flume_tail_src");
+ expected.field("@type", "sometype");
+ expected.field("@source_host", "test@localhost");
+ expected.field("@source_path", "/tmp/test");
+
+ expected.startObject("@fields");
+ expected.field("timestamp", String.valueOf(timestamp));
+ expected.field("src_path", "/tmp/test");
+ expected.field("host", "test@localhost");
+ expected.field("headerNameTwo", "headerValueTwo");
+ expected.field("source", "flume_tail_src");
+ expected.field("headerNameOne", "headerValueOne");
+ expected.field("type", "sometype");
+ expected.endObject();
+
+ expected.endObject();
+
+ XContentBuilder actual = fixture.getContentBuilder(event);
+
+ JsonParser parser = new JsonParser();
+ assertEquals(parser.parse(expected.string()),parser.parse(actual.string()));
+ }
+
+ @Test
+ public void shouldHandleInvalidJSONDuringComplexParsing() throws Exception {
+ ElasticSearchLogStashEventSerializer fixture = new ElasticSearchLogStashEventSerializer();
+ Context context = new Context();
+ fixture.configure(context);
+
+ String message = "{flume: somethingnotvalid}";
+ Map headers = Maps.newHashMap();
+ long timestamp = System.currentTimeMillis();
+ headers.put("timestamp", String.valueOf(timestamp));
+ headers.put("source", "flume_tail_src");
+ headers.put("host", "test@localhost");
+ headers.put("src_path", "/tmp/test");
+ headers.put("headerNameOne", "headerValueOne");
+ headers.put("headerNameTwo", "headerValueTwo");
+ headers.put("type", "sometype");
+ Event event = EventBuilder.withBody(message.getBytes(charset));
+ event.setHeaders(headers);
+
+ XContentBuilder expected = jsonBuilder().startObject();
+ expected.field("@message", new String(message.getBytes(), charset));
+ expected.field("@timestamp", new Date(timestamp));
+ expected.field("@source", "flume_tail_src");
+ expected.field("@type", "sometype");
+ expected.field("@source_host", "test@localhost");
+ expected.field("@source_path", "/tmp/test");
+
+ expected.startObject("@fields");
+ expected.field("timestamp", String.valueOf(timestamp));
+ expected.field("src_path", "/tmp/test");
+ expected.field("host", "test@localhost");
+ expected.field("headerNameTwo", "headerValueTwo");
+ expected.field("source", "flume_tail_src");
+ expected.field("headerNameOne", "headerValueOne");
+ expected.field("type", "sometype");
+ expected.endObject();
+
+ expected.endObject();
+
+ XContentBuilder actual = fixture.getContentBuilder(event);
+
+ JsonParser parser = new JsonParser();
+ assertEquals(parser.parse(expected.string()),parser.parse(actual.string()));
+ }
+}
diff --git a/code/flume-ng-sinks/flume-ng-elasticsearch-sink/src/test/java/org/apache/flume/sink/elasticsearch/TestElasticSearchSink.java b/code/flume-ng-sinks/flume-ng-elasticsearch-sink/src/test/java/org/apache/flume/sink/elasticsearch/TestElasticSearchSink.java
new file mode 100644
index 0000000..69acc06
--- /dev/null
+++ b/code/flume-ng-sinks/flume-ng-elasticsearch-sink/src/test/java/org/apache/flume/sink/elasticsearch/TestElasticSearchSink.java
@@ -0,0 +1,505 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.flume.sink.elasticsearch;
+
+import org.apache.commons.lang.time.FastDateFormat;
+import org.apache.flume.Channel;
+import org.apache.flume.Context;
+import org.apache.flume.Event;
+import org.apache.flume.Sink.Status;
+import org.apache.flume.Transaction;
+import org.apache.flume.conf.ComponentConfiguration;
+import org.apache.flume.conf.Configurable;
+import org.apache.flume.conf.Configurables;
+import org.apache.flume.event.EventBuilder;
+import org.elasticsearch.action.index.IndexRequestBuilder;
+import org.elasticsearch.client.Requests;
+import org.elasticsearch.common.UUID;
+import org.elasticsearch.common.io.BytesStream;
+import org.elasticsearch.common.io.FastByteArrayOutputStream;
+import org.elasticsearch.index.query.QueryBuilders;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+
+import java.io.IOException;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.TimeZone;
+import java.util.concurrent.TimeUnit;
+
+import static org.apache.flume.sink.elasticsearch.ElasticSearchSinkConstants.BATCH_SIZE;
+import static org.apache.flume.sink.elasticsearch.ElasticSearchSinkConstants.CLUSTER_NAME;
+import static org.apache.flume.sink.elasticsearch.ElasticSearchSinkConstants.HOSTNAMES;
+import static org.apache.flume.sink.elasticsearch.ElasticSearchSinkConstants.INDEX_NAME;
+import static org.apache.flume.sink.elasticsearch.ElasticSearchSinkConstants.INDEX_TYPE;
+import static org.apache.flume.sink.elasticsearch.ElasticSearchSinkConstants.SERIALIZER;
+import static org.apache.flume.sink.elasticsearch.ElasticSearchSinkConstants.TTL;
+import static org.junit.Assert.assertArrayEquals;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNull;
+import static org.junit.Assert.assertTrue;
+
+public class TestElasticSearchSink extends AbstractElasticSearchSinkTest {
+
+ private ElasticSearchSink fixture;
+
+ @Before
+ public void init() throws Exception {
+ initDefaults();
+ createNodes();
+ fixture = new ElasticSearchSink(true);
+ fixture.setName("ElasticSearchSink-" + UUID.randomUUID().toString());
+ }
+
+ @After
+ public void tearDown() throws Exception {
+ shutdownNodes();
+ }
+
+ @Test
+ public void shouldIndexOneEvent() throws Exception {
+ Configurables.configure(fixture, new Context(parameters));
+ Channel channel = bindAndStartChannel(fixture);
+
+ Transaction tx = channel.getTransaction();
+ tx.begin();
+ Event event = EventBuilder.withBody("event #1 or 1".getBytes());
+ channel.put(event);
+ tx.commit();
+ tx.close();
+
+ fixture.process();
+ fixture.stop();
+ client.admin().indices()
+ .refresh(Requests.refreshRequest(timestampedIndexName)).actionGet();
+
+ assertMatchAllQuery(1, event);
+ assertBodyQuery(1, event);
+ }
+
+ @Test
+ public void shouldIndexInvalidComplexJsonBody() throws Exception {
+ parameters.put(BATCH_SIZE, "3");
+ Configurables.configure(fixture, new Context(parameters));
+ Channel channel = bindAndStartChannel(fixture);
+
+ Transaction tx = channel.getTransaction();
+ tx.begin();
+ Event event1 = EventBuilder.withBody("TEST1 {test}".getBytes());
+ channel.put(event1);
+ Event event2 = EventBuilder.withBody("{test: TEST2 }".getBytes());
+ channel.put(event2);
+ Event event3 = EventBuilder.withBody("{\"test\":{ TEST3 {test} }}".getBytes());
+ channel.put(event3);
+ tx.commit();
+ tx.close();
+
+ fixture.process();
+ fixture.stop();
+ client.admin().indices()
+ .refresh(Requests.refreshRequest(timestampedIndexName)).actionGet();
+
+ assertMatchAllQuery(3);
+ assertSearch(1,
+ performSearch(QueryBuilders.fieldQuery("@message", "TEST1")),
+ null, event1);
+ assertSearch(1,
+ performSearch(QueryBuilders.fieldQuery("@message", "TEST2")),
+ null, event2);
+ assertSearch(1,
+ performSearch(QueryBuilders.fieldQuery("@message", "TEST3")),
+ null, event3);
+ }
+
+ @Test
+ public void shouldIndexComplexJsonEvent() throws Exception {
+ Configurables.configure(fixture, new Context(parameters));
+ Channel channel = bindAndStartChannel(fixture);
+
+ Transaction tx = channel.getTransaction();
+ tx.begin();
+ Event event = EventBuilder.withBody(
+ "{\"event\":\"json content\",\"num\":1}".getBytes());
+ channel.put(event);
+ tx.commit();
+ tx.close();
+
+ fixture.process();
+ fixture.stop();
+ client.admin().indices()
+ .refresh(Requests.refreshRequest(timestampedIndexName)).actionGet();
+
+ Map expectedBody = new HashMap();
+ expectedBody.put("event", "json content");
+ expectedBody.put("num", 1);
+
+ assertSearch(1,
+ performSearch(QueryBuilders.matchAllQuery()), expectedBody, event);
+ assertSearch(1,
+ performSearch(QueryBuilders.fieldQuery("@message.event", "json")),
+ expectedBody, event);
+ }
+
+ @Test
+ public void shouldIndexFiveEvents() throws Exception {
+ // Make it so we only need to call process once
+ parameters.put(BATCH_SIZE, "5");
+ Configurables.configure(fixture, new Context(parameters));
+ Channel channel = bindAndStartChannel(fixture);
+
+ int numberOfEvents = 5;
+ Event[] events = new Event[numberOfEvents];
+
+ Transaction tx = channel.getTransaction();
+ tx.begin();
+ for (int i = 0; i < numberOfEvents; i++) {
+ String body = "event #" + i + " of " + numberOfEvents;
+ Event event = EventBuilder.withBody(body.getBytes());
+ events[i] = event;
+ channel.put(event);
+ }
+ tx.commit();
+ tx.close();
+
+ fixture.process();
+ fixture.stop();
+ client.admin().indices()
+ .refresh(Requests.refreshRequest(timestampedIndexName)).actionGet();
+
+ assertMatchAllQuery(numberOfEvents, events);
+ assertBodyQuery(5, events);
+ }
+
+ @Test
+ public void shouldIndexFiveEventsOverThreeBatches() throws Exception {
+ parameters.put(BATCH_SIZE, "2");
+ Configurables.configure(fixture, new Context(parameters));
+ Channel channel = bindAndStartChannel(fixture);
+
+ int numberOfEvents = 5;
+ Event[] events = new Event[numberOfEvents];
+
+ Transaction tx = channel.getTransaction();
+ tx.begin();
+ for (int i = 0; i < numberOfEvents; i++) {
+ String body = "event #" + i + " of " + numberOfEvents;
+ Event event = EventBuilder.withBody(body.getBytes());
+ events[i] = event;
+ channel.put(event);
+ }
+ tx.commit();
+ tx.close();
+
+ int count = 0;
+ Status status = Status.READY;
+ while (status != Status.BACKOFF) {
+ count++;
+ status = fixture.process();
+ }
+ fixture.stop();
+
+ assertEquals(3, count);
+
+ client.admin().indices()
+ .refresh(Requests.refreshRequest(timestampedIndexName)).actionGet();
+ assertMatchAllQuery(numberOfEvents, events);
+ assertBodyQuery(5, events);
+ }
+
+ @Test
+ public void shouldParseConfiguration() {
+ parameters.put(HOSTNAMES, "10.5.5.27");
+ parameters.put(CLUSTER_NAME, "testing-cluster-name");
+ parameters.put(INDEX_NAME, "testing-index-name");
+ parameters.put(INDEX_TYPE, "testing-index-type");
+ parameters.put(TTL, "10");
+
+ fixture = new ElasticSearchSink();
+ fixture.configure(new Context(parameters));
+
+ String[] expected = { "10.5.5.27" };
+
+ assertEquals("testing-cluster-name", fixture.getClusterName());
+ assertEquals("testing-index-name", fixture.getIndexName());
+ assertEquals("testing-index-type", fixture.getIndexType());
+ assertEquals(TimeUnit.DAYS.toMillis(10), fixture.getTTLMs());
+ assertArrayEquals(expected, fixture.getServerAddresses());
+ }
+
+ @Test
+ public void shouldParseConfigurationUsingDefaults() {
+ parameters.put(HOSTNAMES, "10.5.5.27");
+ parameters.remove(INDEX_NAME);
+ parameters.remove(INDEX_TYPE);
+ parameters.remove(CLUSTER_NAME);
+
+ fixture = new ElasticSearchSink();
+ fixture.configure(new Context(parameters));
+
+ String[] expected = { "10.5.5.27" };
+
+ assertEquals(DEFAULT_INDEX_NAME, fixture.getIndexName());
+ assertEquals(DEFAULT_INDEX_TYPE, fixture.getIndexType());
+ assertEquals(DEFAULT_CLUSTER_NAME, fixture.getClusterName());
+ assertArrayEquals(expected, fixture.getServerAddresses());
+ }
+
+ @Test
+ public void shouldParseMultipleHostUsingDefaultPorts() {
+ parameters.put(HOSTNAMES, "10.5.5.27,10.5.5.28,10.5.5.29");
+
+ fixture = new ElasticSearchSink();
+ fixture.configure(new Context(parameters));
+
+ String[] expected = { "10.5.5.27", "10.5.5.28", "10.5.5.29" };
+
+ assertArrayEquals(expected, fixture.getServerAddresses());
+ }
+
+ @Test
+ public void shouldParseMultipleHostWithWhitespacesUsingDefaultPorts() {
+ parameters.put(HOSTNAMES, " 10.5.5.27 , 10.5.5.28 , 10.5.5.29 ");
+
+ fixture = new ElasticSearchSink();
+ fixture.configure(new Context(parameters));
+
+ String[] expected = { "10.5.5.27", "10.5.5.28", "10.5.5.29" };
+
+ assertArrayEquals(expected, fixture.getServerAddresses());
+ }
+
+ @Test
+ public void shouldParseMultipleHostAndPorts() {
+ parameters.put(HOSTNAMES, "10.5.5.27:9300,10.5.5.28:9301,10.5.5.29:9302");
+
+ fixture = new ElasticSearchSink();
+ fixture.configure(new Context(parameters));
+
+ String[] expected = { "10.5.5.27:9300", "10.5.5.28:9301", "10.5.5.29:9302" };
+
+ assertArrayEquals(expected, fixture.getServerAddresses());
+ }
+
+ @Test
+ public void shouldParseMultipleHostAndPortsWithWhitespaces() {
+ parameters.put(HOSTNAMES,
+ " 10.5.5.27 : 9300 , 10.5.5.28 : 9301 , 10.5.5.29 : 9302 ");
+
+ fixture = new ElasticSearchSink();
+ fixture.configure(new Context(parameters));
+
+ String[] expected = { "10.5.5.27:9300", "10.5.5.28:9301", "10.5.5.29:9302" };
+
+ assertArrayEquals(expected, fixture.getServerAddresses());
+ }
+
+ @Test
+ public void shouldAllowCustomElasticSearchIndexRequestBuilderFactory()
+ throws Exception {
+ parameters.put(SERIALIZER,
+ CustomElasticSearchIndexRequestBuilderFactory.class.getName());
+
+ fixture.configure(new Context(parameters));
+
+ Channel channel = bindAndStartChannel(fixture);
+ Transaction tx = channel.getTransaction();
+ tx.begin();
+ String body = "{ foo: \"bar\" }";
+ Event event = EventBuilder.withBody(body.getBytes());
+ channel.put(event);
+ tx.commit();
+ tx.close();
+
+ fixture.process();
+ fixture.stop();
+
+ assertEquals(fixture.getIndexName() + "-05_17_36_789",
+ CustomElasticSearchIndexRequestBuilderFactory.actualIndexName);
+ assertEquals(fixture.getIndexType(),
+ CustomElasticSearchIndexRequestBuilderFactory.actualIndexType);
+ assertArrayEquals(event.getBody(),
+ CustomElasticSearchIndexRequestBuilderFactory.actualEventBody);
+ assertTrue(CustomElasticSearchIndexRequestBuilderFactory.hasContext);
+ }
+
+ @Test
+ public void shouldParseFullyQualifiedTTLs() {
+ Map testTTLMap = new HashMap();
+ testTTLMap.put("1ms", Long.valueOf(1));
+ testTTLMap.put("1s", Long.valueOf(1000));
+ testTTLMap.put("1m", Long.valueOf(60000));
+ testTTLMap.put("1h", Long.valueOf(3600000));
+ testTTLMap.put("1d", Long.valueOf(86400000));
+ testTTLMap.put("1w", Long.valueOf(604800000));
+ testTTLMap.put("1", Long.valueOf(86400000));
+
+ parameters.put(HOSTNAMES, "10.5.5.27");
+ parameters.put(CLUSTER_NAME, "testing-cluster-name");
+ parameters.put(INDEX_NAME, "testing-index-name");
+ parameters.put(INDEX_TYPE, "testing-index-type");
+
+ for (String ttl : testTTLMap.keySet()) {
+ parameters.put(TTL, ttl);
+ fixture = new ElasticSearchSink();
+ fixture.configure(new Context(parameters));
+
+ String[] expected = { "10.5.5.27" };
+ assertEquals("testing-cluster-name", fixture.getClusterName());
+ assertEquals("testing-index-name", fixture.getIndexName());
+ assertEquals("testing-index-type", fixture.getIndexType());
+ assertEquals((long) testTTLMap.get(ttl), fixture.getTTLMs());
+ assertArrayEquals(expected, fixture.getServerAddresses());
+
+ }
+ }
+
+ public static final class CustomElasticSearchIndexRequestBuilderFactory
+ extends AbstractElasticSearchIndexRequestBuilderFactory {
+
+ static String actualIndexName;
+ static String actualIndexType;
+ static byte[] actualEventBody;
+ static boolean hasContext;
+
+ public CustomElasticSearchIndexRequestBuilderFactory() {
+ super(FastDateFormat.getInstance("HH_mm_ss_SSS", TimeZone.getTimeZone("EST5EDT")));
+ }
+
+ @Override
+ protected void prepareIndexRequest(IndexRequestBuilder indexRequest, String indexName,
+ String indexType, Event event) throws IOException {
+ actualIndexName = indexName;
+ actualIndexType = indexType;
+ actualEventBody = event.getBody();
+ indexRequest.setIndex(indexName).setType(indexType).setSource(event.getBody());
+ }
+
+ @Override
+ public void configure(Context arg0) {
+ hasContext = true;
+ }
+
+ @Override
+ public void configure(ComponentConfiguration arg0) {
+ //no-op
+ }
+ }
+
+ @Test
+ public void shouldFailToConfigureWithInvalidSerializerClass()
+ throws Exception {
+
+ parameters.put(SERIALIZER, "java.lang.String");
+ try {
+ Configurables.configure(fixture, new Context(parameters));
+ } catch (ClassCastException e) {
+ // expected
+ }
+
+ parameters.put(SERIALIZER, FakeConfigurable.class.getName());
+ try {
+ Configurables.configure(fixture, new Context(parameters));
+ } catch (IllegalArgumentException e) {
+ // expected
+ }
+ }
+
+ @Test
+ public void shouldUseSpecifiedSerializer() throws Exception {
+ Context context = new Context();
+ context.put(SERIALIZER,
+ "org.apache.flume.sink.elasticsearch.FakeEventSerializer");
+
+ assertNull(fixture.getEventSerializer());
+ fixture.configure(context);
+ assertTrue(fixture.getEventSerializer() instanceof FakeEventSerializer);
+ }
+
+ @Test
+ public void shouldUseSpecifiedIndexNameBuilder() throws Exception {
+ Context context = new Context();
+ context.put(ElasticSearchSinkConstants.INDEX_NAME_BUILDER,
+ "org.apache.flume.sink.elasticsearch.FakeIndexNameBuilder");
+
+ assertNull(fixture.getIndexNameBuilder());
+ fixture.configure(context);
+ assertTrue(fixture.getIndexNameBuilder() instanceof FakeIndexNameBuilder);
+ }
+
+ public static class FakeConfigurable implements Configurable {
+ @Override
+ public void configure(Context arg0) {
+ // no-op
+ }
+ }
+}
+
+/**
+ * Internal class. Fake event serializer used for tests
+ */
+class FakeEventSerializer implements ElasticSearchEventSerializer {
+
+ static final byte[] FAKE_BYTES = new byte[] { 9, 8, 7, 6 };
+ boolean configuredWithContext;
+ boolean configuredWithComponentConfiguration;
+
+ @Override
+ public BytesStream getContentBuilder(Event event) throws IOException {
+ FastByteArrayOutputStream fbaos = new FastByteArrayOutputStream(4);
+ fbaos.write(FAKE_BYTES);
+ return fbaos;
+ }
+
+ @Override
+ public void configure(Context arg0) {
+ configuredWithContext = true;
+ }
+
+ @Override
+ public void configure(ComponentConfiguration arg0) {
+ configuredWithComponentConfiguration = true;
+ }
+}
+
+/**
+ * Internal class. Fake index name builder used only for tests.
+ */
+class FakeIndexNameBuilder implements IndexNameBuilder {
+
+ static final String INDEX_NAME = "index_name";
+
+ @Override
+ public String getIndexName(Event event) {
+ return INDEX_NAME;
+ }
+
+ @Override
+ public String getIndexPrefix(Event event) {
+ return INDEX_NAME;
+ }
+
+ @Override
+ public void configure(Context context) {
+ }
+
+ @Override
+ public void configure(ComponentConfiguration conf) {
+ }
+}
diff --git a/code/flume-ng-sinks/flume-ng-elasticsearch-sink/src/test/java/org/apache/flume/sink/elasticsearch/TestElasticSearchSinkCreation.java b/code/flume-ng-sinks/flume-ng-elasticsearch-sink/src/test/java/org/apache/flume/sink/elasticsearch/TestElasticSearchSinkCreation.java
new file mode 100644
index 0000000..2a36439
--- /dev/null
+++ b/code/flume-ng-sinks/flume-ng-elasticsearch-sink/src/test/java/org/apache/flume/sink/elasticsearch/TestElasticSearchSinkCreation.java
@@ -0,0 +1,49 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.flume.sink.elasticsearch;
+
+import org.apache.flume.FlumeException;
+import org.apache.flume.Sink;
+import org.apache.flume.SinkFactory;
+import org.apache.flume.sink.DefaultSinkFactory;
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.Test;
+
+public class TestElasticSearchSinkCreation {
+
+ private SinkFactory sinkFactory;
+
+ @Before
+ public void setUp() {
+ sinkFactory = new DefaultSinkFactory();
+ }
+
+ private void verifySinkCreation(String name, String type,
+ Class> typeClass) throws FlumeException {
+ Sink sink = sinkFactory.create(name, type);
+ Assert.assertNotNull(sink);
+ Assert.assertTrue(typeClass.isInstance(sink));
+ }
+
+ @Test
+ public void testSinkCreation() {
+ verifySinkCreation("elasticsearch-sink", "elasticsearch", ElasticSearchSink.class);
+ }
+}
diff --git a/code/flume-ng-sinks/flume-ng-elasticsearch-sink/src/test/java/org/apache/flume/sink/elasticsearch/TimeBasedIndexNameBuilderTest.java b/code/flume-ng-sinks/flume-ng-elasticsearch-sink/src/test/java/org/apache/flume/sink/elasticsearch/TimeBasedIndexNameBuilderTest.java
new file mode 100644
index 0000000..678342a
--- /dev/null
+++ b/code/flume-ng-sinks/flume-ng-elasticsearch-sink/src/test/java/org/apache/flume/sink/elasticsearch/TimeBasedIndexNameBuilderTest.java
@@ -0,0 +1,60 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.flume.sink.elasticsearch;
+
+import org.apache.flume.Context;
+import org.apache.flume.Event;
+import org.apache.flume.event.SimpleEvent;
+import org.junit.Before;
+import org.junit.Test;
+
+import java.util.HashMap;
+import java.util.Map;
+
+import static org.junit.Assert.assertEquals;
+
+public class TimeBasedIndexNameBuilderTest {
+
+ private TimeBasedIndexNameBuilder indexNameBuilder;
+
+ @Before
+ public void setUp() throws Exception {
+ Context context = new Context();
+ context.put(ElasticSearchSinkConstants.INDEX_NAME, "prefix");
+ indexNameBuilder = new TimeBasedIndexNameBuilder();
+ indexNameBuilder.configure(context);
+ }
+
+ @Test
+ public void shouldUseUtcAsBasisForDateFormat() {
+ assertEquals("Coordinated Universal Time",
+ indexNameBuilder.getFastDateFormat().getTimeZone().getDisplayName());
+ }
+
+ @Test
+ public void indexNameShouldBePrefixDashFormattedTimestamp() {
+ long time = 987654321L;
+ Event event = new SimpleEvent();
+ Map headers = new HashMap();
+ headers.put("timestamp", Long.toString(time));
+ event.setHeaders(headers);
+ assertEquals("prefix-" + indexNameBuilder.getFastDateFormat().format(time),
+ indexNameBuilder.getIndexName(event));
+ }
+}
diff --git a/code/flume-ng-sinks/flume-ng-elasticsearch-sink/src/test/java/org/apache/flume/sink/elasticsearch/TimestampedEventTest.java b/code/flume-ng-sinks/flume-ng-elasticsearch-sink/src/test/java/org/apache/flume/sink/elasticsearch/TimestampedEventTest.java
new file mode 100644
index 0000000..bef2ac6
--- /dev/null
+++ b/code/flume-ng-sinks/flume-ng-elasticsearch-sink/src/test/java/org/apache/flume/sink/elasticsearch/TimestampedEventTest.java
@@ -0,0 +1,88 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.flume.sink.elasticsearch;
+
+import com.google.common.collect.Maps;
+import org.apache.flume.event.SimpleEvent;
+import org.joda.time.DateTimeUtils;
+import org.junit.Before;
+import org.junit.Test;
+
+import java.util.Map;
+
+import static org.junit.Assert.assertArrayEquals;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNull;
+
+public class TimestampedEventTest {
+ static final long FIXED_TIME_MILLIS = 123456789L;
+
+ @Before
+ public void setFixedJodaTime() {
+ DateTimeUtils.setCurrentMillisFixed(FIXED_TIME_MILLIS);
+ }
+
+ @Test
+ public void shouldEnsureTimestampHeaderPresentInTimestampedEvent() {
+ SimpleEvent base = new SimpleEvent();
+
+ TimestampedEvent timestampedEvent = new TimestampedEvent(base);
+ assertEquals(FIXED_TIME_MILLIS, timestampedEvent.getTimestamp());
+ assertEquals(String.valueOf(FIXED_TIME_MILLIS),
+ timestampedEvent.getHeaders().get("timestamp"));
+ }
+
+ @Test
+ public void shouldUseExistingTimestampHeaderInTimestampedEvent() {
+ SimpleEvent base = new SimpleEvent();
+ Map headersWithTimestamp = Maps.newHashMap();
+ headersWithTimestamp.put("timestamp", "-321");
+ base.setHeaders(headersWithTimestamp );
+
+ TimestampedEvent timestampedEvent = new TimestampedEvent(base);
+ assertEquals(-321L, timestampedEvent.getTimestamp());
+ assertEquals("-321", timestampedEvent.getHeaders().get("timestamp"));
+ }
+
+ @Test
+ public void shouldUseExistingAtTimestampHeaderInTimestampedEvent() {
+ SimpleEvent base = new SimpleEvent();
+ Map headersWithTimestamp = Maps.newHashMap();
+ headersWithTimestamp.put("@timestamp", "-999");
+ base.setHeaders(headersWithTimestamp );
+
+ TimestampedEvent timestampedEvent = new TimestampedEvent(base);
+ assertEquals(-999L, timestampedEvent.getTimestamp());
+ assertEquals("-999", timestampedEvent.getHeaders().get("@timestamp"));
+ assertNull(timestampedEvent.getHeaders().get("timestamp"));
+ }
+
+ @Test
+ public void shouldPreserveBodyAndNonTimestampHeadersInTimestampedEvent() {
+ SimpleEvent base = new SimpleEvent();
+ base.setBody(new byte[] {1,2,3,4});
+ Map headersWithTimestamp = Maps.newHashMap();
+ headersWithTimestamp.put("foo", "bar");
+ base.setHeaders(headersWithTimestamp );
+
+ TimestampedEvent timestampedEvent = new TimestampedEvent(base);
+ assertEquals("bar", timestampedEvent.getHeaders().get("foo"));
+ assertArrayEquals(base.getBody(), timestampedEvent.getBody());
+ }
+}
diff --git a/code/flume-ng-sinks/flume-ng-elasticsearch-sink/src/test/java/org/apache/flume/sink/elasticsearch/client/RoundRobinListTest.java b/code/flume-ng-sinks/flume-ng-elasticsearch-sink/src/test/java/org/apache/flume/sink/elasticsearch/client/RoundRobinListTest.java
new file mode 100644
index 0000000..0d1d092
--- /dev/null
+++ b/code/flume-ng-sinks/flume-ng-elasticsearch-sink/src/test/java/org/apache/flume/sink/elasticsearch/client/RoundRobinListTest.java
@@ -0,0 +1,42 @@
+/*
+ * Copyright 2014 Apache Software Foundation.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flume.sink.elasticsearch.client;
+
+import java.util.Arrays;
+import org.junit.Before;
+import org.junit.Test;
+
+import static org.junit.Assert.assertEquals;
+
+public class RoundRobinListTest {
+
+ private RoundRobinList fixture;
+
+ @Before
+ public void setUp() {
+ fixture = new RoundRobinList(Arrays.asList("test1", "test2"));
+ }
+
+ @Test
+ public void shouldReturnNextElement() {
+ assertEquals("test1", fixture.get());
+ assertEquals("test2", fixture.get());
+ assertEquals("test1", fixture.get());
+ assertEquals("test2", fixture.get());
+ assertEquals("test1", fixture.get());
+ }
+}
diff --git a/code/flume-ng-sinks/flume-ng-elasticsearch-sink/src/test/java/org/apache/flume/sink/elasticsearch/client/TestElasticSearchClientFactory.java b/code/flume-ng-sinks/flume-ng-elasticsearch-sink/src/test/java/org/apache/flume/sink/elasticsearch/client/TestElasticSearchClientFactory.java
new file mode 100644
index 0000000..c3f07b0
--- /dev/null
+++ b/code/flume-ng-sinks/flume-ng-elasticsearch-sink/src/test/java/org/apache/flume/sink/elasticsearch/client/TestElasticSearchClientFactory.java
@@ -0,0 +1,64 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.flume.sink.elasticsearch.client;
+
+import org.apache.flume.sink.elasticsearch.ElasticSearchEventSerializer;
+import org.junit.Before;
+import org.junit.Test;
+import org.mockito.Mock;
+
+import static org.hamcrest.core.IsInstanceOf.instanceOf;
+import static org.junit.Assert.assertThat;
+import static org.mockito.MockitoAnnotations.initMocks;
+
+public class TestElasticSearchClientFactory {
+
+ ElasticSearchClientFactory factory;
+
+ @Mock
+ ElasticSearchEventSerializer serializer;
+
+ @Before
+ public void setUp() {
+ initMocks(this);
+ factory = new ElasticSearchClientFactory();
+ }
+
+ @Test
+ public void shouldReturnTransportClient() throws Exception {
+ String[] hostNames = { "127.0.0.1" };
+ Object o = factory.getClient(ElasticSearchClientFactory.TransportClient,
+ hostNames, "test", serializer, null);
+ assertThat(o, instanceOf(ElasticSearchTransportClient.class));
+ }
+
+ @Test
+ public void shouldReturnRestClient() throws NoSuchClientTypeException {
+ String[] hostNames = { "127.0.0.1" };
+ Object o = factory.getClient(ElasticSearchClientFactory.RestClient,
+ hostNames, "test", serializer, null);
+ assertThat(o, instanceOf(ElasticSearchRestClient.class));
+ }
+
+ @Test(expected = NoSuchClientTypeException.class)
+ public void shouldThrowNoSuchClientTypeException() throws NoSuchClientTypeException {
+ String[] hostNames = { "127.0.0.1" };
+ factory.getClient("not_existing_client", hostNames, "test", null, null);
+ }
+}
diff --git a/code/flume-ng-sinks/flume-ng-elasticsearch-sink/src/test/java/org/apache/flume/sink/elasticsearch/client/TestElasticSearchRestClient.java b/code/flume-ng-sinks/flume-ng-elasticsearch-sink/src/test/java/org/apache/flume/sink/elasticsearch/client/TestElasticSearchRestClient.java
new file mode 100644
index 0000000..9551c81
--- /dev/null
+++ b/code/flume-ng-sinks/flume-ng-elasticsearch-sink/src/test/java/org/apache/flume/sink/elasticsearch/client/TestElasticSearchRestClient.java
@@ -0,0 +1,180 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.flume.sink.elasticsearch.client;
+
+import com.google.common.base.Splitter;
+import com.google.gson.JsonObject;
+import com.google.gson.JsonParser;
+import org.apache.flume.Event;
+import org.apache.flume.EventDeliveryException;
+import org.apache.flume.sink.elasticsearch.ElasticSearchEventSerializer;
+import org.apache.flume.sink.elasticsearch.IndexNameBuilder;
+import org.apache.http.HttpEntity;
+import org.apache.http.HttpResponse;
+import org.apache.http.HttpStatus;
+import org.apache.http.StatusLine;
+import org.apache.http.client.HttpClient;
+import org.apache.http.client.methods.HttpPost;
+import org.apache.http.client.methods.HttpUriRequest;
+import org.apache.http.util.EntityUtils;
+import org.elasticsearch.common.bytes.BytesArray;
+import org.elasticsearch.common.bytes.BytesReference;
+import org.elasticsearch.common.io.BytesStream;
+import org.junit.Before;
+import org.junit.Test;
+import org.mockito.ArgumentCaptor;
+import org.mockito.Mock;
+
+import java.io.IOException;
+import java.util.Iterator;
+import java.util.List;
+
+import static junit.framework.Assert.assertEquals;
+import static junit.framework.Assert.assertTrue;
+import static org.mockito.Mockito.any;
+import static org.mockito.Mockito.isA;
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.times;
+import static org.mockito.Mockito.verify;
+import static org.mockito.Mockito.when;
+import static org.mockito.MockitoAnnotations.initMocks;
+
+public class TestElasticSearchRestClient {
+
+ private ElasticSearchRestClient fixture;
+
+ @Mock
+ private ElasticSearchEventSerializer serializer;
+
+ @Mock
+ private IndexNameBuilder nameBuilder;
+
+ @Mock
+ private Event event;
+
+ @Mock
+ private HttpClient httpClient;
+
+ @Mock
+ private HttpResponse httpResponse;
+
+ @Mock
+ private StatusLine httpStatus;
+
+ @Mock
+ private HttpEntity httpEntity;
+
+ private static final String INDEX_NAME = "foo_index";
+ private static final String MESSAGE_CONTENT = "{\"body\":\"test\"}";
+ private static final String[] HOSTS = {"host1", "host2"};
+
+ @Before
+ public void setUp() throws IOException {
+ initMocks(this);
+ BytesReference bytesReference = mock(BytesReference.class);
+ BytesStream bytesStream = mock(BytesStream.class);
+
+ when(nameBuilder.getIndexName(any(Event.class))).thenReturn(INDEX_NAME);
+ when(bytesReference.toBytesArray()).thenReturn(new BytesArray(MESSAGE_CONTENT));
+ when(bytesStream.bytes()).thenReturn(bytesReference);
+ when(serializer.getContentBuilder(any(Event.class))).thenReturn(bytesStream);
+ fixture = new ElasticSearchRestClient(HOSTS, serializer, httpClient);
+ }
+
+ @Test
+ public void shouldAddNewEventWithoutTTL() throws Exception {
+ ArgumentCaptor argument = ArgumentCaptor.forClass(HttpPost.class);
+
+ when(httpStatus.getStatusCode()).thenReturn(HttpStatus.SC_OK);
+ when(httpResponse.getStatusLine()).thenReturn(httpStatus);
+ when(httpClient.execute(any(HttpUriRequest.class))).thenReturn(httpResponse);
+
+ fixture.addEvent(event, nameBuilder, "bar_type", -1);
+ fixture.execute();
+
+ verify(httpClient).execute(isA(HttpUriRequest.class));
+ verify(httpClient).execute(argument.capture());
+
+ assertEquals("http://host1/_bulk", argument.getValue().getURI().toString());
+ assertTrue(verifyJsonEvents("{\"index\":{\"_type\":\"bar_type\", \"_index\":\"foo_index\"}}\n",
+ MESSAGE_CONTENT, EntityUtils.toString(argument.getValue().getEntity())));
+ }
+
+ @Test
+ public void shouldAddNewEventWithTTL() throws Exception {
+ ArgumentCaptor argument = ArgumentCaptor.forClass(HttpPost.class);
+
+ when(httpStatus.getStatusCode()).thenReturn(HttpStatus.SC_OK);
+ when(httpResponse.getStatusLine()).thenReturn(httpStatus);
+ when(httpClient.execute(any(HttpUriRequest.class))).thenReturn(httpResponse);
+
+ fixture.addEvent(event, nameBuilder, "bar_type", 123);
+ fixture.execute();
+
+ verify(httpClient).execute(isA(HttpUriRequest.class));
+ verify(httpClient).execute(argument.capture());
+
+ assertEquals("http://host1/_bulk", argument.getValue().getURI().toString());
+ assertTrue(verifyJsonEvents(
+ "{\"index\":{\"_type\":\"bar_type\",\"_index\":\"foo_index\",\"_ttl\":\"123\"}}\n",
+ MESSAGE_CONTENT, EntityUtils.toString(argument.getValue().getEntity())));
+ }
+
+ private boolean verifyJsonEvents(String expectedIndex, String expectedBody, String actual) {
+ Iterator it = Splitter.on("\n").split(actual).iterator();
+ JsonParser parser = new JsonParser();
+ JsonObject[] arr = new JsonObject[2];
+ for (int i = 0; i < 2; i++) {
+ arr[i] = (JsonObject) parser.parse(it.next());
+ }
+ return arr[0].equals(parser.parse(expectedIndex)) && arr[1].equals(parser.parse(expectedBody));
+ }
+
+ @Test(expected = EventDeliveryException.class)
+ public void shouldThrowEventDeliveryException() throws Exception {
+ ArgumentCaptor argument = ArgumentCaptor.forClass(HttpPost.class);
+
+ when(httpStatus.getStatusCode()).thenReturn(HttpStatus.SC_INTERNAL_SERVER_ERROR);
+ when(httpResponse.getStatusLine()).thenReturn(httpStatus);
+ when(httpClient.execute(any(HttpUriRequest.class))).thenReturn(httpResponse);
+
+ fixture.addEvent(event, nameBuilder, "bar_type", 123);
+ fixture.execute();
+ }
+
+ @Test()
+ public void shouldRetryBulkOperation() throws Exception {
+ ArgumentCaptor argument = ArgumentCaptor.forClass(HttpPost.class);
+
+ when(httpStatus.getStatusCode()).thenReturn(HttpStatus.SC_INTERNAL_SERVER_ERROR,
+ HttpStatus.SC_OK);
+ when(httpResponse.getStatusLine()).thenReturn(httpStatus);
+ when(httpClient.execute(any(HttpUriRequest.class))).thenReturn(httpResponse);
+
+ fixture.addEvent(event, nameBuilder, "bar_type", 123);
+ fixture.execute();
+
+ verify(httpClient, times(2)).execute(isA(HttpUriRequest.class));
+ verify(httpClient, times(2)).execute(argument.capture());
+
+ List allValues = argument.getAllValues();
+ assertEquals("http://host1/_bulk", allValues.get(0).getURI().toString());
+ assertEquals("http://host2/_bulk", allValues.get(1).getURI().toString());
+ }
+}
diff --git a/code/flume-ng-sinks/flume-ng-elasticsearch-sink/src/test/java/org/apache/flume/sink/elasticsearch/client/TestElasticSearchTransportClient.java b/code/flume-ng-sinks/flume-ng-elasticsearch-sink/src/test/java/org/apache/flume/sink/elasticsearch/client/TestElasticSearchTransportClient.java
new file mode 100644
index 0000000..b7b8e74
--- /dev/null
+++ b/code/flume-ng-sinks/flume-ng-elasticsearch-sink/src/test/java/org/apache/flume/sink/elasticsearch/client/TestElasticSearchTransportClient.java
@@ -0,0 +1,127 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.flume.sink.elasticsearch.client;
+
+import org.apache.flume.Event;
+import org.apache.flume.EventDeliveryException;
+import org.apache.flume.sink.elasticsearch.ElasticSearchEventSerializer;
+import org.apache.flume.sink.elasticsearch.IndexNameBuilder;
+import org.elasticsearch.action.ListenableActionFuture;
+import org.elasticsearch.action.bulk.BulkRequestBuilder;
+import org.elasticsearch.action.bulk.BulkResponse;
+import org.elasticsearch.action.index.IndexRequestBuilder;
+import org.elasticsearch.client.Client;
+import org.elasticsearch.common.bytes.BytesReference;
+import org.elasticsearch.common.io.BytesStream;
+import org.junit.Before;
+import org.junit.Test;
+import org.mockito.Mock;
+
+import java.io.IOException;
+
+import static org.mockito.Matchers.any;
+import static org.mockito.Matchers.anyString;
+import static org.mockito.Mockito.*;
+import static org.mockito.MockitoAnnotations.initMocks;
+
+public class TestElasticSearchTransportClient {
+
+ private ElasticSearchTransportClient fixture;
+
+ @Mock
+ private ElasticSearchEventSerializer serializer;
+
+ @Mock
+ private IndexNameBuilder nameBuilder;
+
+ @Mock
+ private Client elasticSearchClient;
+
+ @Mock
+ private BulkRequestBuilder bulkRequestBuilder;
+
+ @Mock
+ private IndexRequestBuilder indexRequestBuilder;
+
+ @Mock
+ private Event event;
+
+ @Before
+ public void setUp() throws IOException {
+ initMocks(this);
+ BytesReference bytesReference = mock(BytesReference.class);
+ BytesStream bytesStream = mock(BytesStream.class);
+
+ when(nameBuilder.getIndexName(any(Event.class))).thenReturn("foo_index");
+ when(bytesReference.toBytes()).thenReturn("{\"body\":\"test\"}".getBytes());
+ when(bytesStream.bytes()).thenReturn(bytesReference);
+ when(serializer.getContentBuilder(any(Event.class)))
+ .thenReturn(bytesStream);
+ when(elasticSearchClient.prepareIndex(anyString(), anyString()))
+ .thenReturn(indexRequestBuilder);
+ when(indexRequestBuilder.setSource(bytesReference)).thenReturn(
+ indexRequestBuilder);
+
+ fixture = new ElasticSearchTransportClient(elasticSearchClient, serializer);
+ fixture.setBulkRequestBuilder(bulkRequestBuilder);
+ }
+
+ @Test
+ public void shouldAddNewEventWithoutTTL() throws Exception {
+ fixture.addEvent(event, nameBuilder, "bar_type", -1);
+ verify(indexRequestBuilder).setSource(
+ serializer.getContentBuilder(event).bytes());
+ verify(bulkRequestBuilder).add(indexRequestBuilder);
+ }
+
+ @Test
+ public void shouldAddNewEventWithTTL() throws Exception {
+ fixture.addEvent(event, nameBuilder, "bar_type", 10);
+ verify(indexRequestBuilder).setTTL(10);
+ verify(indexRequestBuilder).setSource(
+ serializer.getContentBuilder(event).bytes());
+ }
+
+ @Test
+ public void shouldExecuteBulkRequestBuilder() throws Exception {
+ ListenableActionFuture action =
+ (ListenableActionFuture) mock(ListenableActionFuture.class);
+ BulkResponse response = mock(BulkResponse.class);
+ when(bulkRequestBuilder.execute()).thenReturn(action);
+ when(action.actionGet()).thenReturn(response);
+ when(response.hasFailures()).thenReturn(false);
+
+ fixture.addEvent(event, nameBuilder, "bar_type", 10);
+ fixture.execute();
+ verify(bulkRequestBuilder).execute();
+ }
+
+ @Test(expected = EventDeliveryException.class)
+ public void shouldThrowExceptionOnExecuteFailed() throws Exception {
+ ListenableActionFuture action =
+ (ListenableActionFuture) mock(ListenableActionFuture.class);
+ BulkResponse response = mock(BulkResponse.class);
+ when(bulkRequestBuilder.execute()).thenReturn(action);
+ when(action.actionGet()).thenReturn(response);
+ when(response.hasFailures()).thenReturn(true);
+
+ fixture.addEvent(event, nameBuilder, "bar_type", 10);
+ fixture.execute();
+ }
+}
diff --git a/code/flume-ng-sinks/flume-ng-elasticsearch-sink/src/test/resources/log4j.properties b/code/flume-ng-sinks/flume-ng-elasticsearch-sink/src/test/resources/log4j.properties
new file mode 100644
index 0000000..9036aca
--- /dev/null
+++ b/code/flume-ng-sinks/flume-ng-elasticsearch-sink/src/test/resources/log4j.properties
@@ -0,0 +1,25 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+
+log4j.rootLogger = DEBUG, out
+
+log4j.appender.out = org.apache.log4j.ConsoleAppender
+log4j.appender.out.layout = org.apache.log4j.PatternLayout
+log4j.appender.out.layout.ConversionPattern = %d (%t) [%p - %l] %m%n
+
+log4j.logger.org.apache.flume = DEBUG
diff --git a/code/flume-ng-sinks/flume-ng-hbase-sink/.idea/artifacts/flume_ng_hbase_sink_jar.xml b/code/flume-ng-sinks/flume-ng-hbase-sink/.idea/artifacts/flume_ng_hbase_sink_jar.xml
new file mode 100644
index 0000000..f3e9b44
--- /dev/null
+++ b/code/flume-ng-sinks/flume-ng-hbase-sink/.idea/artifacts/flume_ng_hbase_sink_jar.xml
@@ -0,0 +1,8 @@
+
+
+ $PROJECT_DIR$/out/artifacts/flume_ng_hbase_sink_jar
+
+
+
+
+
\ No newline at end of file
diff --git a/code/flume-ng-sinks/flume-ng-hbase-sink/.idea/compiler.xml b/code/flume-ng-sinks/flume-ng-hbase-sink/.idea/compiler.xml
new file mode 100644
index 0000000..6e72b1f
--- /dev/null
+++ b/code/flume-ng-sinks/flume-ng-hbase-sink/.idea/compiler.xml
@@ -0,0 +1,13 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/code/flume-ng-sinks/flume-ng-hbase-sink/.idea/encodings.xml b/code/flume-ng-sinks/flume-ng-hbase-sink/.idea/encodings.xml
new file mode 100644
index 0000000..b26911b
--- /dev/null
+++ b/code/flume-ng-sinks/flume-ng-hbase-sink/.idea/encodings.xml
@@ -0,0 +1,6 @@
+
+
+
+
+
+
\ No newline at end of file
diff --git a/code/flume-ng-sinks/flume-ng-hbase-sink/.idea/misc.xml b/code/flume-ng-sinks/flume-ng-hbase-sink/.idea/misc.xml
new file mode 100644
index 0000000..4b661a5
--- /dev/null
+++ b/code/flume-ng-sinks/flume-ng-hbase-sink/.idea/misc.xml
@@ -0,0 +1,14 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/code/flume-ng-sinks/flume-ng-hbase-sink/.idea/workspace.xml b/code/flume-ng-sinks/flume-ng-hbase-sink/.idea/workspace.xml
new file mode 100644
index 0000000..dd63465
--- /dev/null
+++ b/code/flume-ng-sinks/flume-ng-hbase-sink/.idea/workspace.xml
@@ -0,0 +1,435 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/code/flume-ng-sinks/flume-ng-hbase-sink/out/artifacts/flume_ng_hbase_sink_jar/flume-ng-hbase-sink.jar b/code/flume-ng-sinks/flume-ng-hbase-sink/out/artifacts/flume_ng_hbase_sink_jar/flume-ng-hbase-sink.jar
new file mode 100644
index 0000000..e1bcfa7
Binary files /dev/null and b/code/flume-ng-sinks/flume-ng-hbase-sink/out/artifacts/flume_ng_hbase_sink_jar/flume-ng-hbase-sink.jar differ
diff --git a/code/flume-ng-sinks/flume-ng-hbase-sink/pom.xml b/code/flume-ng-sinks/flume-ng-hbase-sink/pom.xml
new file mode 100644
index 0000000..66ffa4d
--- /dev/null
+++ b/code/flume-ng-sinks/flume-ng-hbase-sink/pom.xml
@@ -0,0 +1,255 @@
+
+
+
+ 4.0.0
+
+ flume-ng-sinks
+ org.apache.flume
+ 1.7.0
+
+ org.apache.flume.flume-ng-sinks
+ flume-ng-hbase-sink
+ Flume NG HBase Sink
+
+
+
+
+ org.apache.rat
+ apache-rat-plugin
+
+
+
+
+
+
+ org.apache.flume
+ flume-ng-sdk
+
+
+
+ org.apache.flume
+ flume-ng-core
+
+
+
+ org.apache.flume
+ flume-ng-configuration
+
+
+
+ org.slf4j
+ slf4j-api
+
+
+
+ com.google.guava
+ guava
+
+
+
+
+ org.hbase
+ asynchbase
+
+
+
+ io.netty
+ netty
+
+
+
+ org.slf4j
+ slf4j-log4j12
+ test
+
+
+
+ junit
+ junit
+ test
+
+
+
+ org.apache.hadoop
+ ${hadoop.common.artifact.id}
+ true
+
+
+
+ commons-io
+ commons-io
+ test
+
+
+
+ commons-lang
+ commons-lang
+
+
+
+ org.mockito
+ mockito-all
+ test
+
+
+
+ org.apache.flume.flume-ng-sinks
+ flume-hdfs-sink
+
+
+
+
+
+
+ hadoop-1.0
+
+
+ flume.hadoop.profile
+ 1
+
+
+
+
+ org.apache.hadoop
+ hadoop-test
+ test
+
+
+
+ com.sun.jersey
+ jersey-core
+ test
+
+
+ org.apache.hbase
+ hbase
+ true
+
+
+
+ org.apache.hbase
+ hbase
+ tests
+ test
+
+
+
+ org.apache.zookeeper
+ zookeeper
+ test
+
+
+
+
+ hadoop-2
+
+
+ flume.hadoop.profile
+ 2
+
+
+
+
+ org.apache.hadoop
+ hadoop-minicluster
+ test
+
+
+ org.apache.hbase
+ hbase
+ true
+
+
+
+ org.apache.hbase
+ hbase
+ tests
+ test
+
+
+
+ org.apache.zookeeper
+ zookeeper
+ test
+
+
+
+
+ hbase-1
+
+
+ !flume.hadoop.profile
+
+
+
+
+ org.apache.hadoop
+ hadoop-minicluster
+ test
+
+
+
+ org.apache.hbase
+ hbase-client
+ true
+
+
+
+ org.apache.hbase
+ hbase-client
+ tests
+ test
+
+
+
+
+ org.apache.hbase
+ hbase-server
+ test
+
+
+
+ org.apache.hbase
+ hbase-server
+ tests
+ test
+
+
+
+
+
+ org.apache.hbase
+ hbase-common
+ true
+
+
+ org.apache.hbase
+ hbase-testing-util
+ test
+
+
+
+ org.apache.zookeeper
+ zookeeper
+ test
+
+
+
+
+
+
diff --git a/code/flume-ng-sinks/flume-ng-hbase-sink/src/main/java/org/apache/flume/sink/hbase/AsyncHBaseSink.java b/code/flume-ng-sinks/flume-ng-hbase-sink/src/main/java/org/apache/flume/sink/hbase/AsyncHBaseSink.java
new file mode 100644
index 0000000..f120f59
--- /dev/null
+++ b/code/flume-ng-sinks/flume-ng-hbase-sink/src/main/java/org/apache/flume/sink/hbase/AsyncHBaseSink.java
@@ -0,0 +1,708 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.flume.sink.hbase;
+
+import com.google.common.annotations.VisibleForTesting;
+import com.google.common.base.Charsets;
+import com.google.common.base.Preconditions;
+import com.google.common.base.Throwables;
+import com.google.common.collect.Maps;
+import com.google.common.primitives.UnsignedBytes;
+import com.google.common.util.concurrent.ThreadFactoryBuilder;
+import com.stumbleupon.async.Callback;
+import org.apache.flume.Channel;
+import org.apache.flume.ChannelException;
+import org.apache.flume.Context;
+import org.apache.flume.Event;
+import org.apache.flume.EventDeliveryException;
+import org.apache.flume.FlumeException;
+import org.apache.flume.Transaction;
+import org.apache.flume.conf.Configurable;
+import org.apache.flume.instrumentation.SinkCounter;
+import org.apache.flume.sink.AbstractSink;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hbase.HBaseConfiguration;
+import org.apache.hadoop.hbase.HConstants;
+import org.apache.hadoop.hbase.zookeeper.ZKConfig;
+import org.hbase.async.AtomicIncrementRequest;
+import org.hbase.async.HBaseClient;
+import org.hbase.async.PutRequest;
+import org.jboss.netty.channel.socket.nio.NioClientSocketChannelFactory;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.Comparator;
+import java.util.List;
+import java.util.Map;
+import java.util.concurrent.CountDownLatch;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Executors;
+import java.util.concurrent.TimeUnit;
+import java.util.concurrent.atomic.AtomicBoolean;
+import java.util.concurrent.atomic.AtomicInteger;
+import java.util.concurrent.locks.Condition;
+import java.util.concurrent.locks.Lock;
+import java.util.concurrent.locks.ReentrantLock;
+
+/**
+ * A simple sink which reads events from a channel and writes them to HBase.
+ * This Sink uses an asynchronous API internally and is likely to
+ * perform better.
+ * The Hbase configuration is picked up from the first hbase-site.xml
+ * encountered in the classpath. This sink supports batch reading of
+ * events from the channel, and writing them to Hbase, to minimize the number
+ * of flushes on the hbase tables. To use this sink, it has to be configured
+ * with certain mandatory parameters:
+ *
+ * table: The name of the table in Hbase to write to.
+ * columnFamily: The column family in Hbase to write to.
+ * Other optional parameters are:
+ * serializer: A class implementing
+ * {@link AsyncHbaseEventSerializer}.
+ * An instance of
+ * this class will be used to serialize events which are written to hbase.
+ * serializer.*: Passed in the configure() method to
+ * serializer
+ * as an object of {@link org.apache.flume.Context}.
+ * batchSize: This is the batch size used by the client. This is the
+ * maximum number of events the sink will commit per transaction. The default
+ * batch size is 100 events.
+ *
+ * timeout: The length of time in milliseconds the sink waits for
+ * callbacks from hbase for all events in a transaction.
+ * If no timeout is specified, the sink will wait forever.
+ *
+ * Note: Hbase does not guarantee atomic commits on multiple
+ * rows. So if a subset of events in a batch are written to disk by Hbase and
+ * Hbase fails, the flume transaction is rolled back, causing flume to write
+ * all the events in the transaction all over again, which will cause
+ * duplicates. The serializer is expected to take care of the handling of
+ * duplicates etc. HBase also does not support batch increments, so if
+ * multiple increments are returned by the serializer, then HBase failure
+ * will cause them to be re-written, when HBase comes back up.
+ */
+public class AsyncHBaseSink extends AbstractSink implements Configurable {
+
+ private String tableName;
+ private byte[] columnFamily;
+ private long batchSize;
+ private static final Logger logger = LoggerFactory.getLogger(AsyncHBaseSink.class);
+ private AsyncHbaseEventSerializer serializer;
+ private String eventSerializerType;
+ private Context serializerContext;
+ private HBaseClient client;
+ private Configuration conf;
+ private Transaction txn;
+ private volatile boolean open = false;
+ private SinkCounter sinkCounter;
+ private long timeout;
+ private String zkQuorum;
+ private String zkBaseDir;
+ private ExecutorService sinkCallbackPool;
+ private boolean isTimeoutTest;
+ private boolean isCoalesceTest;
+ private boolean enableWal = true;
+ private boolean batchIncrements = false;
+ private volatile int totalCallbacksReceived = 0;
+ private int maxConsecutiveFails;
+ private Map incrementBuffer;
+ // The HBaseClient buffers the requests until a callback is received. In the event of a
+ // timeout, there is no way to clear these buffers. If there is a major cluster issue, this
+ // buffer can become too big and cause crashes. So if we hit a fixed number of HBase write
+ // failures/timeouts, then close the HBase Client (gracefully or not) and force a GC to get rid
+ // of the buffered data.
+ private int consecutiveHBaseFailures = 0;
+ private boolean lastTxnFailed = false;
+
+ // Does not need to be thread-safe. Always called only from the sink's
+ // process method.
+ private final Comparator COMPARATOR = UnsignedBytes.lexicographicalComparator();
+
+ public AsyncHBaseSink() {
+ this(null);
+ }
+
+ public AsyncHBaseSink(Configuration conf) {
+ this(conf, false, false);
+ }
+
+ @VisibleForTesting
+ AsyncHBaseSink(Configuration conf, boolean isTimeoutTest,
+ boolean isCoalesceTest) {
+ this.conf = conf;
+ this.isTimeoutTest = isTimeoutTest;
+ this.isCoalesceTest = isCoalesceTest;
+ }
+
+ @Override
+ public Status process() throws EventDeliveryException {
+ /*
+ * Reference to the boolean representing failure of the current transaction.
+ * Since each txn gets a new boolean, failure of one txn will not affect
+ * the next even if errbacks for the current txn get called while
+ * the next one is being processed.
+ *
+ */
+ if (!open) {
+ throw new EventDeliveryException("Sink was never opened. " +
+ "Please fix the configuration.");
+ }
+ if (client == null) {
+ client = initHBaseClient();
+ if (client == null) {
+ throw new EventDeliveryException("Could not establish connection to HBase!");
+ }
+ }
+ AtomicBoolean txnFail = new AtomicBoolean(false);
+ AtomicInteger callbacksReceived = new AtomicInteger(0);
+ AtomicInteger callbacksExpected = new AtomicInteger(0);
+ final Lock lock = new ReentrantLock();
+ final Condition condition = lock.newCondition();
+ if (incrementBuffer != null) {
+ incrementBuffer.clear();
+ }
+ /*
+ * Callbacks can be reused per transaction, since they share the same
+ * locks and conditions.
+ */
+ Callback putSuccessCallback =
+ new SuccessCallback(
+ lock, callbacksReceived, condition);
+ Callback putFailureCallback =
+ new FailureCallback(
+ lock, callbacksReceived, txnFail, condition);
+
+ Callback incrementSuccessCallback =
+ new SuccessCallback(
+ lock, callbacksReceived, condition);
+ Callback incrementFailureCallback =
+ new FailureCallback(
+ lock, callbacksReceived, txnFail, condition);
+
+ Status status = Status.READY;
+ Channel channel = getChannel();
+ txn = channel.getTransaction();
+ txn.begin();
+
+ int i = 0;
+ try {
+ for (; i < batchSize; i++) {
+ Event event = channel.take();
+ if (event == null) {
+ status = Status.BACKOFF;
+ if (i == 0) {
+ sinkCounter.incrementBatchEmptyCount();
+ } else {
+ sinkCounter.incrementBatchUnderflowCount();
+ }
+ break;
+ } else {
+ serializer.setEvent(event);
+ List actions = serializer.getActions();
+ List increments = serializer.getIncrements();
+ callbacksExpected.addAndGet(actions.size());
+ if (!batchIncrements) {
+ callbacksExpected.addAndGet(increments.size());
+ }
+
+ for (PutRequest action : actions) {
+ action.setDurable(enableWal);
+ client.put(action).addCallbacks(putSuccessCallback, putFailureCallback);
+ }
+ for (AtomicIncrementRequest increment : increments) {
+ if (batchIncrements) {
+ CellIdentifier identifier = new CellIdentifier(increment.key(),
+ increment.qualifier());
+ AtomicIncrementRequest request
+ = incrementBuffer.get(identifier);
+ if (request == null) {
+ incrementBuffer.put(identifier, increment);
+ } else {
+ request.setAmount(request.getAmount() + increment.getAmount());
+ }
+ } else {
+ client.atomicIncrement(increment).addCallbacks(
+ incrementSuccessCallback, incrementFailureCallback);
+ }
+ }
+ }
+ }
+ if (batchIncrements) {
+ Collection increments = incrementBuffer.values();
+ for (AtomicIncrementRequest increment : increments) {
+ client.atomicIncrement(increment).addCallbacks(
+ incrementSuccessCallback, incrementFailureCallback);
+ }
+ callbacksExpected.addAndGet(increments.size());
+ }
+ client.flush();
+ } catch (Throwable e) {
+ this.handleTransactionFailure(txn);
+ this.checkIfChannelExceptionAndThrow(e);
+ }
+ if (i == batchSize) {
+ sinkCounter.incrementBatchCompleteCount();
+ }
+ sinkCounter.addToEventDrainAttemptCount(i);
+
+ lock.lock();
+ long startTime = System.nanoTime();
+ long timeRemaining;
+ try {
+ while ((callbacksReceived.get() < callbacksExpected.get())
+ && !txnFail.get()) {
+ timeRemaining = timeout - (System.nanoTime() - startTime);
+ timeRemaining = (timeRemaining >= 0) ? timeRemaining : 0;
+ try {
+ if (!condition.await(timeRemaining, TimeUnit.NANOSECONDS)) {
+ txnFail.set(true);
+ logger.warn("HBase callbacks timed out. "
+ + "Transaction will be rolled back.");
+ }
+ } catch (Exception ex) {
+ logger.error("Exception while waiting for callbacks from HBase.");
+ this.handleTransactionFailure(txn);
+ Throwables.propagate(ex);
+ }
+ }
+ } finally {
+ lock.unlock();
+ }
+
+ if (isCoalesceTest) {
+ totalCallbacksReceived += callbacksReceived.get();
+ }
+
+ /*
+ * At this point, either the txn has failed
+ * or all callbacks received and txn is successful.
+ *
+ * This need not be in the monitor, since all callbacks for this txn
+ * have been received. So txnFail will not be modified any more(even if
+ * it is, it is set from true to true only - false happens only
+ * in the next process call).
+ *
+ */
+ if (txnFail.get()) {
+ // We enter this if condition only if the failure was due to HBase failure, so we make sure
+ // we track the consecutive failures.
+ if (lastTxnFailed) {
+ consecutiveHBaseFailures++;
+ }
+ lastTxnFailed = true;
+ this.handleTransactionFailure(txn);
+ throw new EventDeliveryException("Could not write events to Hbase. " +
+ "Transaction failed, and rolled back.");
+ } else {
+ try {
+ lastTxnFailed = false;
+ consecutiveHBaseFailures = 0;
+ txn.commit();
+ txn.close();
+ sinkCounter.addToEventDrainSuccessCount(i);
+ } catch (Throwable e) {
+ this.handleTransactionFailure(txn);
+ this.checkIfChannelExceptionAndThrow(e);
+ }
+ }
+
+ return status;
+ }
+
+ @Override
+ public void configure(Context context) {
+ tableName = context.getString(HBaseSinkConfigurationConstants.CONFIG_TABLE);
+ String cf = context.getString(
+ HBaseSinkConfigurationConstants.CONFIG_COLUMN_FAMILY);
+ batchSize = context.getLong(
+ HBaseSinkConfigurationConstants.CONFIG_BATCHSIZE, new Long(100));
+ serializerContext = new Context();
+ //If not specified, will use HBase defaults.
+ eventSerializerType = context.getString(
+ HBaseSinkConfigurationConstants.CONFIG_SERIALIZER);
+ Preconditions.checkNotNull(tableName,
+ "Table name cannot be empty, please specify in configuration file");
+ Preconditions.checkNotNull(cf,
+ "Column family cannot be empty, please specify in configuration file");
+ //Check foe event serializer, if null set event serializer type
+ if (eventSerializerType == null || eventSerializerType.isEmpty()) {
+ eventSerializerType =
+ "org.apache.flume.sink.hbase.SimpleAsyncHbaseEventSerializer";
+ logger.info("No serializer defined, Will use default");
+ }
+ serializerContext.putAll(context.getSubProperties(
+ HBaseSinkConfigurationConstants.CONFIG_SERIALIZER_PREFIX));
+ columnFamily = cf.getBytes(Charsets.UTF_8);
+ try {
+ @SuppressWarnings("unchecked")
+ Class extends AsyncHbaseEventSerializer> clazz =
+ (Class extends AsyncHbaseEventSerializer>)
+ Class.forName(eventSerializerType);
+ serializer = clazz.newInstance();
+ serializer.configure(serializerContext);
+ serializer.initialize(tableName.getBytes(Charsets.UTF_8), columnFamily);
+ } catch (Exception e) {
+ logger.error("Could not instantiate event serializer.", e);
+ Throwables.propagate(e);
+ }
+
+ if (sinkCounter == null) {
+ sinkCounter = new SinkCounter(this.getName());
+ }
+ timeout = context.getLong(HBaseSinkConfigurationConstants.CONFIG_TIMEOUT,
+ HBaseSinkConfigurationConstants.DEFAULT_TIMEOUT);
+ if (timeout <= 0) {
+ logger.warn("Timeout should be positive for Hbase sink. "
+ + "Sink will not timeout.");
+ timeout = HBaseSinkConfigurationConstants.DEFAULT_TIMEOUT;
+ }
+ //Convert to nanos.
+ timeout = TimeUnit.MILLISECONDS.toNanos(timeout);
+
+ zkQuorum = context.getString(
+ HBaseSinkConfigurationConstants.ZK_QUORUM, "").trim();
+ if (!zkQuorum.isEmpty()) {
+ zkBaseDir = context.getString(
+ HBaseSinkConfigurationConstants.ZK_ZNODE_PARENT,
+ HBaseSinkConfigurationConstants.DEFAULT_ZK_ZNODE_PARENT);
+ } else {
+ if (conf == null) { //In tests, we pass the conf in.
+ conf = HBaseConfiguration.create();
+ }
+ zkQuorum = ZKConfig.getZKQuorumServersString(conf);
+ zkBaseDir = conf.get(HConstants.ZOOKEEPER_ZNODE_PARENT,
+ HConstants.DEFAULT_ZOOKEEPER_ZNODE_PARENT);
+ }
+ Preconditions.checkState(zkQuorum != null && !zkQuorum.isEmpty(),
+ "The Zookeeper quorum cannot be null and should be specified.");
+
+ enableWal = context.getBoolean(HBaseSinkConfigurationConstants
+ .CONFIG_ENABLE_WAL, HBaseSinkConfigurationConstants.DEFAULT_ENABLE_WAL);
+ logger.info("The write to WAL option is set to: " + String.valueOf(enableWal));
+ if (!enableWal) {
+ logger.warn("AsyncHBaseSink's enableWal configuration is set to false. " +
+ "All writes to HBase will have WAL disabled, and any data in the " +
+ "memstore of this region in the Region Server could be lost!");
+ }
+
+ batchIncrements = context.getBoolean(
+ HBaseSinkConfigurationConstants.CONFIG_COALESCE_INCREMENTS,
+ HBaseSinkConfigurationConstants.DEFAULT_COALESCE_INCREMENTS);
+
+ if (batchIncrements) {
+ incrementBuffer = Maps.newHashMap();
+ logger.info("Increment coalescing is enabled. Increments will be " +
+ "buffered.");
+ }
+
+ maxConsecutiveFails =
+ context.getInteger(HBaseSinkConfigurationConstants.CONFIG_MAX_CONSECUTIVE_FAILS,
+ HBaseSinkConfigurationConstants.DEFAULT_MAX_CONSECUTIVE_FAILS);
+
+ }
+
+ @VisibleForTesting
+ int getTotalCallbacksReceived() {
+ return totalCallbacksReceived;
+ }
+
+ @VisibleForTesting
+ boolean isConfNull() {
+ return conf == null;
+ }
+
+ @Override
+ public void start() {
+ Preconditions.checkArgument(client == null, "Please call stop "
+ + "before calling start on an old instance.");
+ sinkCounter.start();
+ sinkCounter.incrementConnectionCreatedCount();
+ client = initHBaseClient();
+ super.start();
+ }
+
+ private HBaseClient initHBaseClient() {
+ logger.info("Initializing HBase Client");
+
+ sinkCallbackPool = Executors.newCachedThreadPool(new ThreadFactoryBuilder()
+ .setNameFormat(this.getName() + " HBase Call Pool").build());
+ logger.info("Callback pool created");
+ client = new HBaseClient(zkQuorum, zkBaseDir,
+ new NioClientSocketChannelFactory(sinkCallbackPool, sinkCallbackPool));
+
+ final CountDownLatch latch = new CountDownLatch(1);
+ final AtomicBoolean fail = new AtomicBoolean(false);
+ client.ensureTableFamilyExists(
+ tableName.getBytes(Charsets.UTF_8), columnFamily).addCallbacks(
+ new Callback() {
+ @Override
+ public Object call(Object arg) throws Exception {
+ latch.countDown();
+ logger.info("table found");
+ return null;
+ }
+ },
+ new Callback() {
+ @Override
+ public Object call(Object arg) throws Exception {
+ fail.set(true);
+ latch.countDown();
+ return null;
+ }
+ });
+
+ try {
+ logger.info("waiting on callback");
+ latch.await();
+ logger.info("callback received");
+ } catch (InterruptedException e) {
+ sinkCounter.incrementConnectionFailedCount();
+ throw new FlumeException(
+ "Interrupted while waiting for Hbase Callbacks", e);
+ }
+ if (fail.get()) {
+ sinkCounter.incrementConnectionFailedCount();
+ if (client != null) {
+ shutdownHBaseClient();
+ }
+ throw new FlumeException(
+ "Could not start sink. " +
+ "Table or column family does not exist in Hbase.");
+ } else {
+ open = true;
+ }
+ client.setFlushInterval((short) 0);
+ return client;
+ }
+
+ @Override
+ public void stop() {
+ serializer.cleanUp();
+ if (client != null) {
+ shutdownHBaseClient();
+ }
+ sinkCounter.incrementConnectionClosedCount();
+ sinkCounter.stop();
+
+ try {
+ if (sinkCallbackPool != null) {
+ sinkCallbackPool.shutdown();
+ if (!sinkCallbackPool.awaitTermination(5, TimeUnit.SECONDS)) {
+ sinkCallbackPool.shutdownNow();
+ }
+ }
+ } catch (InterruptedException e) {
+ logger.error("Interrupted while waiting for asynchbase sink pool to " +
+ "die", e);
+ if (sinkCallbackPool != null) {
+ sinkCallbackPool.shutdownNow();
+ }
+ }
+ sinkCallbackPool = null;
+ client = null;
+ conf = null;
+ open = false;
+ super.stop();
+ }
+
+ private void shutdownHBaseClient() {
+ logger.info("Shutting down HBase Client");
+ final CountDownLatch waiter = new CountDownLatch(1);
+ try {
+ client.shutdown().addCallback(new Callback() {
+ @Override
+ public Object call(Object arg) throws Exception {
+ waiter.countDown();
+ return null;
+ }
+ }).addErrback(new Callback() {
+ @Override
+ public Object call(Object arg) throws Exception {
+ logger.error("Failed to shutdown HBase client cleanly! HBase cluster might be down");
+ waiter.countDown();
+ return null;
+ }
+ });
+ if (!waiter.await(timeout, TimeUnit.NANOSECONDS)) {
+ logger.error("HBase connection could not be closed within timeout! HBase cluster might " +
+ "be down!");
+ }
+ } catch (Exception ex) {
+ logger.warn("Error while attempting to close connections to HBase");
+ } finally {
+ // Dereference the client to force GC to clear up any buffered requests.
+ client = null;
+ }
+ }
+
+ private void handleTransactionFailure(Transaction txn)
+ throws EventDeliveryException {
+ if (maxConsecutiveFails > 0 && consecutiveHBaseFailures >= maxConsecutiveFails) {
+ if (client != null) {
+ shutdownHBaseClient();
+ }
+ consecutiveHBaseFailures = 0;
+ }
+ try {
+ txn.rollback();
+ } catch (Throwable e) {
+ logger.error("Failed to commit transaction." +
+ "Transaction rolled back.", e);
+ if (e instanceof Error || e instanceof RuntimeException) {
+ logger.error("Failed to commit transaction." +
+ "Transaction rolled back.", e);
+ Throwables.propagate(e);
+ } else {
+ logger.error("Failed to commit transaction." +
+ "Transaction rolled back.", e);
+ throw new EventDeliveryException("Failed to commit transaction." +
+ "Transaction rolled back.", e);
+ }
+ } finally {
+ txn.close();
+ }
+ }
+
+ private class SuccessCallback implements Callback {
+ private Lock lock;
+ private AtomicInteger callbacksReceived;
+ private Condition condition;
+ private final boolean isTimeoutTesting;
+
+ public SuccessCallback(Lock lck, AtomicInteger callbacksReceived,
+ Condition condition) {
+ lock = lck;
+ this.callbacksReceived = callbacksReceived;
+ this.condition = condition;
+ isTimeoutTesting = isTimeoutTest;
+ }
+
+ @Override
+ public R call(T arg) throws Exception {
+ if (isTimeoutTesting) {
+ try {
+ //tests set timeout to 10 seconds, so sleep for 4 seconds
+ TimeUnit.NANOSECONDS.sleep(TimeUnit.SECONDS.toNanos(4));
+ } catch (InterruptedException e) {
+ //ignore
+ }
+ }
+ doCall();
+ return null;
+ }
+
+ private void doCall() throws Exception {
+ callbacksReceived.incrementAndGet();
+ lock.lock();
+ try {
+ condition.signal();
+ } finally {
+ lock.unlock();
+ }
+ }
+ }
+
+ private class FailureCallback implements Callback {
+ private Lock lock;
+ private AtomicInteger callbacksReceived;
+ private AtomicBoolean txnFail;
+ private Condition condition;
+ private final boolean isTimeoutTesting;
+
+ public FailureCallback(Lock lck, AtomicInteger callbacksReceived,
+ AtomicBoolean txnFail, Condition condition) {
+ this.lock = lck;
+ this.callbacksReceived = callbacksReceived;
+ this.txnFail = txnFail;
+ this.condition = condition;
+ isTimeoutTesting = isTimeoutTest;
+ }
+
+ @Override
+ public R call(T arg) throws Exception {
+ logger.error("failure callback:", arg);
+ if (isTimeoutTesting) {
+ //tests set timeout to 10 seconds, so sleep for 4 seconds
+ try {
+ TimeUnit.NANOSECONDS.sleep(TimeUnit.SECONDS.toNanos(4));
+ } catch (InterruptedException e) {
+ //ignore
+ }
+ }
+ doCall();
+ return null;
+ }
+
+ private void doCall() throws Exception {
+ callbacksReceived.incrementAndGet();
+ this.txnFail.set(true);
+ lock.lock();
+ try {
+ condition.signal();
+ } finally {
+ lock.unlock();
+ }
+ }
+ }
+
+ private void checkIfChannelExceptionAndThrow(Throwable e)
+ throws EventDeliveryException {
+ if (e instanceof ChannelException) {
+ throw new EventDeliveryException("Error in processing transaction.", e);
+ } else if (e instanceof Error || e instanceof RuntimeException) {
+ Throwables.propagate(e);
+ }
+ throw new EventDeliveryException("Error in processing transaction.", e);
+ }
+
+ private class CellIdentifier {
+ private final byte[] row;
+ private final byte[] column;
+ private final int hashCode;
+
+ // Since the sink operates only on one table and one cf,
+ // we use the data from the owning sink
+ public CellIdentifier(byte[] row, byte[] column) {
+ this.row = row;
+ this.column = column;
+ this.hashCode =
+ (Arrays.hashCode(row) * 31) * (Arrays.hashCode(column) * 31);
+ }
+
+ @Override
+ public int hashCode() {
+ return hashCode;
+ }
+
+ // Since we know that this class is used from only this class,
+ // skip the class comparison to save time
+ @Override
+ public boolean equals(Object other) {
+ CellIdentifier o = (CellIdentifier) other;
+ if (other == null) {
+ return false;
+ } else {
+ return (COMPARATOR.compare(row, o.row) == 0
+ && COMPARATOR.compare(column, o.column) == 0);
+ }
+ }
+ }
+}
diff --git a/code/flume-ng-sinks/flume-ng-hbase-sink/src/main/java/org/apache/flume/sink/hbase/AsyncHbaseEventSerializer.java b/code/flume-ng-sinks/flume-ng-hbase-sink/src/main/java/org/apache/flume/sink/hbase/AsyncHbaseEventSerializer.java
new file mode 100644
index 0000000..481fce8
--- /dev/null
+++ b/code/flume-ng-sinks/flume-ng-hbase-sink/src/main/java/org/apache/flume/sink/hbase/AsyncHbaseEventSerializer.java
@@ -0,0 +1,77 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.flume.sink.hbase;
+
+import java.util.List;
+
+import org.apache.flume.Event;
+import org.apache.flume.conf.Configurable;
+import org.apache.flume.conf.ConfigurableComponent;
+import org.hbase.async.AtomicIncrementRequest;
+import org.hbase.async.PutRequest;
+
+/**
+ * Interface for an event serializer which serializes the headers and body
+ * of an event to write them to hbase. This is configurable, so any config
+ * params required should be taken through this.
+ * The table should be valid on the column family. An implementation
+ * of this interface is expected by the {@linkplain AsyncHBaseSink} to serialize
+ * the events.
+ */
+public interface AsyncHbaseEventSerializer extends Configurable, ConfigurableComponent {
+
+ /**
+ * Initialize the event serializer.
+ * @param table - The table the serializer should use when creating
+ * {@link org.hbase.async.PutRequest} or
+ * {@link org.hbase.async.AtomicIncrementRequest}.
+ * @param cf - The column family to be used.
+ */
+ public void initialize(byte[] table, byte[] cf);
+
+ /**
+ * @param event Event to be written to HBase
+ */
+ public void setEvent(Event event);
+
+ /**
+ * Get the actions that should be written out to hbase as a result of this
+ * event. This list is written to hbase.
+ * @return List of {@link org.hbase.async.PutRequest} which
+ * are written as such to HBase.
+ *
+ *
+ */
+ public List getActions();
+
+ /**
+ * Get the increments that should be made in hbase as a result of this
+ * event. This list is written to hbase.
+ * @return List of {@link org.hbase.async.AtomicIncrementRequest} which
+ * are written as such to HBase.
+ *
+ *
+ */
+ public List getIncrements();
+
+ /**
+ * Clean up any state. This will be called when the sink is being stopped.
+ */
+ public void cleanUp();
+}
diff --git a/code/flume-ng-sinks/flume-ng-hbase-sink/src/main/java/org/apache/flume/sink/hbase/BatchAware.java b/code/flume-ng-sinks/flume-ng-hbase-sink/src/main/java/org/apache/flume/sink/hbase/BatchAware.java
new file mode 100644
index 0000000..0974241
--- /dev/null
+++ b/code/flume-ng-sinks/flume-ng-hbase-sink/src/main/java/org/apache/flume/sink/hbase/BatchAware.java
@@ -0,0 +1,28 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.flume.sink.hbase;
+
+/**
+ * This interface allows for implementing HBase serializers that are aware of
+ * batching. {@link #onBatchStart()} is called at the beginning of each batch
+ * by the sink.
+ */
+public interface BatchAware {
+ public void onBatchStart();
+}
diff --git a/code/flume-ng-sinks/flume-ng-hbase-sink/src/main/java/org/apache/flume/sink/hbase/HBaseSink.java b/code/flume-ng-sinks/flume-ng-hbase-sink/src/main/java/org/apache/flume/sink/hbase/HBaseSink.java
new file mode 100644
index 0000000..4c8b52b
--- /dev/null
+++ b/code/flume-ng-sinks/flume-ng-hbase-sink/src/main/java/org/apache/flume/sink/hbase/HBaseSink.java
@@ -0,0 +1,558 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.flume.sink.hbase;
+
+import com.google.common.annotations.VisibleForTesting;
+import com.google.common.base.Charsets;
+import com.google.common.base.Preconditions;
+import com.google.common.base.Throwables;
+import com.google.common.collect.Lists;
+import com.google.common.collect.Maps;
+import org.apache.flume.Channel;
+import org.apache.flume.Context;
+import org.apache.flume.Event;
+import org.apache.flume.EventDeliveryException;
+import org.apache.flume.FlumeException;
+import org.apache.flume.Transaction;
+import org.apache.flume.annotations.InterfaceAudience;
+import org.apache.flume.auth.FlumeAuthenticationUtil;
+import org.apache.flume.auth.PrivilegedExecutor;
+import org.apache.flume.conf.Configurable;
+import org.apache.flume.instrumentation.SinkCounter;
+import org.apache.flume.sink.AbstractSink;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hbase.HBaseConfiguration;
+import org.apache.hadoop.hbase.HConstants;
+import org.apache.hadoop.hbase.client.HTable;
+import org.apache.hadoop.hbase.client.Increment;
+import org.apache.hadoop.hbase.client.Put;
+import org.apache.hadoop.hbase.client.Row;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.IOException;
+import java.lang.reflect.InvocationTargetException;
+import java.lang.reflect.Method;
+import java.security.PrivilegedExceptionAction;
+import java.util.LinkedList;
+import java.util.List;
+import java.util.Map;
+import java.util.NavigableMap;
+
+/**
+ * A simple sink which reads events from a channel and writes them to HBase.
+ * The Hbase configuration is picked up from the first hbase-site.xml
+ * encountered in the classpath. This sink supports batch reading of
+ * events from the channel, and writing them to Hbase, to minimize the number
+ * of flushes on the hbase tables. To use this sink, it has to be configured
+ * with certain mandatory parameters:
+ * table: The name of the table in Hbase to write to.
+ * columnFamily: The column family in Hbase to write to.
+ * This sink will commit each transaction if the table's write buffer size is
+ * reached or if the number of events in the current transaction reaches the
+ * batch size, whichever comes first.
+ * Other optional parameters are:
+ * serializer: A class implementing {@link HbaseEventSerializer}.
+ * An instance of
+ * this class will be used to write out events to hbase.
+ * serializer.*: Passed in the configure() method to serializer
+ * as an object of {@link org.apache.flume.Context}.
+ * batchSize: This is the batch size used by the client. This is the
+ * maximum number of events the sink will commit per transaction. The default
+ * batch size is 100 events.
+ *
+ *
+ * Note: While this sink flushes all events in a transaction
+ * to HBase in one shot, Hbase does not guarantee atomic commits on multiple
+ * rows. So if a subset of events in a batch are written to disk by Hbase and
+ * Hbase fails, the flume transaction is rolled back, causing flume to write
+ * all the events in the transaction all over again, which will cause
+ * duplicates. The serializer is expected to take care of the handling of
+ * duplicates etc. HBase also does not support batch increments, so if
+ * multiple increments are returned by the serializer, then HBase failure
+ * will cause them to be re-written, when HBase comes back up.
+ */
+public class HBaseSink extends AbstractSink implements Configurable {
+ private String tableName;
+ private byte[] columnFamily;
+ private HTable table;
+ private long batchSize;
+ private Configuration config;
+ private static final Logger logger = LoggerFactory.getLogger(HBaseSink.class);
+ private HbaseEventSerializer serializer;
+ private String eventSerializerType;
+ private Context serializerContext;
+ private String kerberosPrincipal;
+ private String kerberosKeytab;
+ private boolean enableWal = true;
+ private boolean batchIncrements = false;
+ private Method refGetFamilyMap = null;
+ private SinkCounter sinkCounter;
+ private PrivilegedExecutor privilegedExecutor;
+
+ // Internal hooks used for unit testing.
+ private DebugIncrementsCallback debugIncrCallback = null;
+
+ public HBaseSink() {
+ this(HBaseConfiguration.create());
+ }
+
+ public HBaseSink(Configuration conf) {
+ this.config = conf;
+ }
+
+ @VisibleForTesting
+ @InterfaceAudience.Private
+ HBaseSink(Configuration conf, DebugIncrementsCallback cb) {
+ this(conf);
+ this.debugIncrCallback = cb;
+ }
+
+ @Override
+ public void start() {
+ Preconditions.checkArgument(table == null, "Please call stop " +
+ "before calling start on an old instance.");
+ try {
+ privilegedExecutor =
+ FlumeAuthenticationUtil.getAuthenticator(kerberosPrincipal, kerberosKeytab);
+ } catch (Exception ex) {
+ sinkCounter.incrementConnectionFailedCount();
+ throw new FlumeException("Failed to login to HBase using "
+ + "provided credentials.", ex);
+ }
+ try {
+ table = privilegedExecutor.execute(new PrivilegedExceptionAction() {
+ @Override
+ public HTable run() throws Exception {
+ HTable table = new HTable(config, tableName);
+ table.setAutoFlush(false);
+ // Flush is controlled by us. This ensures that HBase changing
+ // their criteria for flushing does not change how we flush.
+ return table;
+ }
+ });
+ } catch (Exception e) {
+ sinkCounter.incrementConnectionFailedCount();
+ logger.error("Could not load table, " + tableName +
+ " from HBase", e);
+ throw new FlumeException("Could not load table, " + tableName +
+ " from HBase", e);
+ }
+ try {
+ if (!privilegedExecutor.execute(new PrivilegedExceptionAction() {
+ @Override
+ public Boolean run() throws IOException {
+ return table.getTableDescriptor().hasFamily(columnFamily);
+ }
+ })) {
+ throw new IOException("Table " + tableName
+ + " has no such column family " + Bytes.toString(columnFamily));
+ }
+ } catch (Exception e) {
+ //Get getTableDescriptor also throws IOException, so catch the IOException
+ //thrown above or by the getTableDescriptor() call.
+ sinkCounter.incrementConnectionFailedCount();
+ throw new FlumeException("Error getting column family from HBase."
+ + "Please verify that the table " + tableName + " and Column Family, "
+ + Bytes.toString(columnFamily) + " exists in HBase, and the"
+ + " current user has permissions to access that table.", e);
+ }
+
+ super.start();
+ sinkCounter.incrementConnectionCreatedCount();
+ sinkCounter.start();
+ }
+
+ @Override
+ public void stop() {
+ try {
+ if (table != null) {
+ table.close();
+ }
+ table = null;
+ } catch (IOException e) {
+ throw new FlumeException("Error closing table.", e);
+ }
+ sinkCounter.incrementConnectionClosedCount();
+ sinkCounter.stop();
+ }
+
+ @SuppressWarnings("unchecked")
+ @Override
+ public void configure(Context context) {
+ tableName = context.getString(HBaseSinkConfigurationConstants.CONFIG_TABLE);
+ String cf = context.getString(
+ HBaseSinkConfigurationConstants.CONFIG_COLUMN_FAMILY);
+ batchSize = context.getLong(
+ HBaseSinkConfigurationConstants.CONFIG_BATCHSIZE, new Long(100));
+ serializerContext = new Context();
+ //If not specified, will use HBase defaults.
+ eventSerializerType = context.getString(
+ HBaseSinkConfigurationConstants.CONFIG_SERIALIZER);
+ Preconditions.checkNotNull(tableName,
+ "Table name cannot be empty, please specify in configuration file");
+ Preconditions.checkNotNull(cf,
+ "Column family cannot be empty, please specify in configuration file");
+ //Check foe event serializer, if null set event serializer type
+ if (eventSerializerType == null || eventSerializerType.isEmpty()) {
+ eventSerializerType =
+ "org.apache.flume.sink.hbase.SimpleHbaseEventSerializer";
+ logger.info("No serializer defined, Will use default");
+ }
+ serializerContext.putAll(context.getSubProperties(
+ HBaseSinkConfigurationConstants.CONFIG_SERIALIZER_PREFIX));
+ columnFamily = cf.getBytes(Charsets.UTF_8);
+ try {
+ Class extends HbaseEventSerializer> clazz =
+ (Class extends HbaseEventSerializer>)
+ Class.forName(eventSerializerType);
+ serializer = clazz.newInstance();
+ serializer.configure(serializerContext);
+ } catch (Exception e) {
+ logger.error("Could not instantiate event serializer.", e);
+ Throwables.propagate(e);
+ }
+ kerberosKeytab = context.getString(HBaseSinkConfigurationConstants.CONFIG_KEYTAB);
+ kerberosPrincipal = context.getString(HBaseSinkConfigurationConstants.CONFIG_PRINCIPAL);
+
+ enableWal = context.getBoolean(HBaseSinkConfigurationConstants
+ .CONFIG_ENABLE_WAL, HBaseSinkConfigurationConstants.DEFAULT_ENABLE_WAL);
+ logger.info("The write to WAL option is set to: " + String.valueOf(enableWal));
+ if (!enableWal) {
+ logger.warn("HBase Sink's enableWal configuration is set to false. All " +
+ "writes to HBase will have WAL disabled, and any data in the " +
+ "memstore of this region in the Region Server could be lost!");
+ }
+
+ batchIncrements = context.getBoolean(
+ HBaseSinkConfigurationConstants.CONFIG_COALESCE_INCREMENTS,
+ HBaseSinkConfigurationConstants.DEFAULT_COALESCE_INCREMENTS);
+
+ if (batchIncrements) {
+ logger.info("Increment coalescing is enabled. Increments will be " +
+ "buffered.");
+ refGetFamilyMap = reflectLookupGetFamilyMap();
+ }
+
+ String zkQuorum = context.getString(HBaseSinkConfigurationConstants
+ .ZK_QUORUM);
+ Integer port = null;
+ /**
+ * HBase allows multiple nodes in the quorum, but all need to use the
+ * same client port. So get the nodes in host:port format,
+ * and ignore the ports for all nodes except the first one. If no port is
+ * specified, use default.
+ */
+ if (zkQuorum != null && !zkQuorum.isEmpty()) {
+ StringBuilder zkBuilder = new StringBuilder();
+ logger.info("Using ZK Quorum: " + zkQuorum);
+ String[] zkHosts = zkQuorum.split(",");
+ int length = zkHosts.length;
+ for (int i = 0; i < length; i++) {
+ String[] zkHostAndPort = zkHosts[i].split(":");
+ zkBuilder.append(zkHostAndPort[0].trim());
+ if (i != length - 1) {
+ zkBuilder.append(",");
+ } else {
+ zkQuorum = zkBuilder.toString();
+ }
+ if (zkHostAndPort[1] == null) {
+ throw new FlumeException("Expected client port for the ZK node!");
+ }
+ if (port == null) {
+ port = Integer.parseInt(zkHostAndPort[1].trim());
+ } else if (!port.equals(Integer.parseInt(zkHostAndPort[1].trim()))) {
+ throw new FlumeException("All Zookeeper nodes in the quorum must " +
+ "use the same client port.");
+ }
+ }
+ if (port == null) {
+ port = HConstants.DEFAULT_ZOOKEPER_CLIENT_PORT;
+ }
+ this.config.set(HConstants.ZOOKEEPER_QUORUM, zkQuorum);
+ this.config.setInt(HConstants.ZOOKEEPER_CLIENT_PORT, port);
+ }
+ String hbaseZnode = context.getString(
+ HBaseSinkConfigurationConstants.ZK_ZNODE_PARENT);
+ if (hbaseZnode != null && !hbaseZnode.isEmpty()) {
+ this.config.set(HConstants.ZOOKEEPER_ZNODE_PARENT, hbaseZnode);
+ }
+ sinkCounter = new SinkCounter(this.getName());
+ }
+
+ public Configuration getConfig() {
+ return config;
+ }
+
+ @Override
+ public Status process() throws EventDeliveryException {
+ Status status = Status.READY;
+ Channel channel = getChannel();
+ Transaction txn = channel.getTransaction();
+ List actions = new LinkedList();
+ List incs = new LinkedList();
+ try {
+ txn.begin();
+
+ if (serializer instanceof BatchAware) {
+ ((BatchAware) serializer).onBatchStart();
+ }
+
+ long i = 0;
+ for (; i < batchSize; i++) {
+ Event event = channel.take();
+ if (event == null) {
+ if (i == 0) {
+ status = Status.BACKOFF;
+ sinkCounter.incrementBatchEmptyCount();
+ } else {
+ sinkCounter.incrementBatchUnderflowCount();
+ }
+ break;
+ } else {
+ serializer.initialize(event, columnFamily);
+ actions.addAll(serializer.getActions());
+ incs.addAll(serializer.getIncrements());
+ }
+ }
+ if (i == batchSize) {
+ sinkCounter.incrementBatchCompleteCount();
+ }
+ sinkCounter.addToEventDrainAttemptCount(i);
+
+ putEventsAndCommit(actions, incs, txn);
+
+ } catch (Throwable e) {
+ try {
+ txn.rollback();
+ } catch (Exception e2) {
+ logger.error("Exception in rollback. Rollback might not have been " +
+ "successful.", e2);
+ }
+ logger.error("Failed to commit transaction." +
+ "Transaction rolled back.", e);
+ if (e instanceof Error || e instanceof RuntimeException) {
+ logger.error("Failed to commit transaction." +
+ "Transaction rolled back.", e);
+ Throwables.propagate(e);
+ } else {
+ logger.error("Failed to commit transaction." +
+ "Transaction rolled back.", e);
+ throw new EventDeliveryException("Failed to commit transaction." +
+ "Transaction rolled back.", e);
+ }
+ } finally {
+ txn.close();
+ }
+ return status;
+ }
+
+ private void putEventsAndCommit(final List actions,
+ final List incs, Transaction txn) throws Exception {
+
+ privilegedExecutor.execute(new PrivilegedExceptionAction() {
+ @Override
+ public Void run() throws Exception {
+ for (Row r : actions) {
+ if (r instanceof Put) {
+ ((Put) r).setWriteToWAL(enableWal);
+ }
+ // Newer versions of HBase - Increment implements Row.
+ if (r instanceof Increment) {
+ ((Increment) r).setWriteToWAL(enableWal);
+ }
+ }
+ table.batch(actions);
+ return null;
+ }
+ });
+
+ privilegedExecutor.execute(new PrivilegedExceptionAction() {
+ @Override
+ public Void run() throws Exception {
+
+ List processedIncrements;
+ if (batchIncrements) {
+ processedIncrements = coalesceIncrements(incs);
+ } else {
+ processedIncrements = incs;
+ }
+
+ // Only used for unit testing.
+ if (debugIncrCallback != null) {
+ debugIncrCallback.onAfterCoalesce(processedIncrements);
+ }
+
+ for (final Increment i : processedIncrements) {
+ i.setWriteToWAL(enableWal);
+ table.increment(i);
+ }
+ return null;
+ }
+ });
+
+ txn.commit();
+ sinkCounter.addToEventDrainSuccessCount(actions.size());
+ }
+
+ /**
+ * The method getFamilyMap() is no longer available in Hbase 0.96.
+ * We must use reflection to determine which version we may use.
+ */
+ @VisibleForTesting
+ static Method reflectLookupGetFamilyMap() {
+ Method m = null;
+ String[] methodNames = {"getFamilyMapOfLongs", "getFamilyMap"};
+ for (String methodName : methodNames) {
+ try {
+ m = Increment.class.getMethod(methodName);
+ if (m != null && m.getReturnType().equals(Map.class)) {
+ logger.debug("Using Increment.{} for coalesce", methodName);
+ break;
+ }
+ } catch (NoSuchMethodException e) {
+ logger.debug("Increment.{} does not exist. Exception follows.",
+ methodName, e);
+ } catch (SecurityException e) {
+ logger.debug("No access to Increment.{}; Exception follows.",
+ methodName, e);
+ }
+ }
+ if (m == null) {
+ throw new UnsupportedOperationException(
+ "Cannot find Increment.getFamilyMap()");
+ }
+ return m;
+ }
+
+ @SuppressWarnings("unchecked")
+ private Map> getFamilyMap(Increment inc) {
+ Preconditions.checkNotNull(refGetFamilyMap,
+ "Increment.getFamilymap() not found");
+ Preconditions.checkNotNull(inc, "Increment required");
+ Map> familyMap = null;
+ try {
+ Object familyObj = refGetFamilyMap.invoke(inc);
+ familyMap = (Map>) familyObj;
+ } catch (IllegalAccessException e) {
+ logger.warn("Unexpected error calling getFamilyMap()", e);
+ Throwables.propagate(e);
+ } catch (InvocationTargetException e) {
+ logger.warn("Unexpected error calling getFamilyMap()", e);
+ Throwables.propagate(e);
+ }
+ return familyMap;
+ }
+
+ /**
+ * Perform "compression" on the given set of increments so that Flume sends
+ * the minimum possible number of RPC operations to HBase per batch.
+ *
+ * @param incs Input: Increment objects to coalesce.
+ * @return List of new Increment objects after coalescing the unique counts.
+ */
+ private List coalesceIncrements(Iterable incs) {
+ Preconditions.checkNotNull(incs, "List of Increments must not be null");
+ // Aggregate all of the increment row/family/column counts.
+ // The nested map is keyed like this: {row, family, qualifier} => count.
+ Map>> counters =
+ Maps.newTreeMap(Bytes.BYTES_COMPARATOR);
+ for (Increment inc : incs) {
+ byte[] row = inc.getRow();
+ Map> families = getFamilyMap(inc);
+ for (Map.Entry> familyEntry : families.entrySet()) {
+ byte[] family = familyEntry.getKey();
+ NavigableMap qualifiers = familyEntry.getValue();
+ for (Map.Entry qualifierEntry : qualifiers.entrySet()) {
+ byte[] qualifier = qualifierEntry.getKey();
+ Long count = qualifierEntry.getValue();
+ incrementCounter(counters, row, family, qualifier, count);
+ }
+ }
+ }
+
+ // Reconstruct list of Increments per unique row/family/qualifier.
+ List coalesced = Lists.newLinkedList();
+ for (Map.Entry>> rowEntry :
+ counters.entrySet()) {
+ byte[] row = rowEntry.getKey();
+ Map> families = rowEntry.getValue();
+ Increment inc = new Increment(row);
+ for (Map.Entry> familyEntry : families.entrySet()) {
+ byte[] family = familyEntry.getKey();
+ NavigableMap qualifiers = familyEntry.getValue();
+ for (Map.Entry qualifierEntry : qualifiers.entrySet()) {
+ byte[] qualifier = qualifierEntry.getKey();
+ long count = qualifierEntry.getValue();
+ inc.addColumn(family, qualifier, count);
+ }
+ }
+ coalesced.add(inc);
+ }
+
+ return coalesced;
+ }
+
+ /**
+ * Helper function for {@link #coalesceIncrements} to increment a counter
+ * value in the passed data structure.
+ *
+ * @param counters Nested data structure containing the counters.
+ * @param row Row key to increment.
+ * @param family Column family to increment.
+ * @param qualifier Column qualifier to increment.
+ * @param count Amount to increment by.
+ */
+ private void incrementCounter(
+ Map>> counters,
+ byte[] row, byte[] family, byte[] qualifier, Long count) {
+
+ Map> families = counters.get(row);
+ if (families == null) {
+ families = Maps.newTreeMap(Bytes.BYTES_COMPARATOR);
+ counters.put(row, families);
+ }
+
+ NavigableMap qualifiers = families.get(family);
+ if (qualifiers == null) {
+ qualifiers = Maps.newTreeMap(Bytes.BYTES_COMPARATOR);
+ families.put(family, qualifiers);
+ }
+
+ Long existingValue = qualifiers.get(qualifier);
+ if (existingValue == null) {
+ qualifiers.put(qualifier, count);
+ } else {
+ qualifiers.put(qualifier, existingValue + count);
+ }
+ }
+
+ @VisibleForTesting
+ @InterfaceAudience.Private
+ HbaseEventSerializer getSerializer() {
+ return serializer;
+ }
+
+ @VisibleForTesting
+ @InterfaceAudience.Private
+ interface DebugIncrementsCallback {
+ public void onAfterCoalesce(Iterable increments);
+ }
+}
diff --git a/code/flume-ng-sinks/flume-ng-hbase-sink/src/main/java/org/apache/flume/sink/hbase/HBaseSinkConfigurationConstants.java b/code/flume-ng-sinks/flume-ng-hbase-sink/src/main/java/org/apache/flume/sink/hbase/HBaseSinkConfigurationConstants.java
new file mode 100644
index 0000000..5560624
--- /dev/null
+++ b/code/flume-ng-sinks/flume-ng-hbase-sink/src/main/java/org/apache/flume/sink/hbase/HBaseSinkConfigurationConstants.java
@@ -0,0 +1,77 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.flume.sink.hbase;
+
+import org.apache.hadoop.hbase.HConstants;
+
+/**
+ * Constants used for configuration of HBaseSink and AsyncHBaseSink
+ *
+ */
+public class HBaseSinkConfigurationConstants {
+ /**
+ * The Hbase table which the sink should write to.
+ */
+ public static final String CONFIG_TABLE = "table";
+ /**
+ * The column family which the sink should use.
+ */
+ public static final String CONFIG_COLUMN_FAMILY = "columnFamily";
+ /**
+ * Maximum number of events the sink should take from the channel per
+ * transaction, if available.
+ */
+ public static final String CONFIG_BATCHSIZE = "batchSize";
+ /**
+ * The fully qualified class name of the serializer the sink should use.
+ */
+ public static final String CONFIG_SERIALIZER = "serializer";
+ /**
+ * Configuration to pass to the serializer.
+ */
+ public static final String CONFIG_SERIALIZER_PREFIX = CONFIG_SERIALIZER + ".";
+
+ public static final String CONFIG_TIMEOUT = "timeout";
+
+ public static final String CONFIG_ENABLE_WAL = "enableWal";
+
+ public static final boolean DEFAULT_ENABLE_WAL = true;
+
+ public static final long DEFAULT_TIMEOUT = 60000;
+
+ public static final String CONFIG_KEYTAB = "kerberosKeytab";
+
+ public static final String CONFIG_PRINCIPAL = "kerberosPrincipal";
+
+ public static final String ZK_QUORUM = "zookeeperQuorum";
+
+ public static final String ZK_ZNODE_PARENT = "znodeParent";
+
+ public static final String DEFAULT_ZK_ZNODE_PARENT =
+ HConstants.DEFAULT_ZOOKEEPER_ZNODE_PARENT;
+
+ public static final String CONFIG_COALESCE_INCREMENTS = "coalesceIncrements";
+
+ public static final Boolean DEFAULT_COALESCE_INCREMENTS = false;
+
+ public static final int DEFAULT_MAX_CONSECUTIVE_FAILS = 10;
+
+ public static final String CONFIG_MAX_CONSECUTIVE_FAILS = "maxConsecutiveFails";
+
+}
diff --git a/code/flume-ng-sinks/flume-ng-hbase-sink/src/main/java/org/apache/flume/sink/hbase/HbaseEventSerializer.java b/code/flume-ng-sinks/flume-ng-hbase-sink/src/main/java/org/apache/flume/sink/hbase/HbaseEventSerializer.java
new file mode 100644
index 0000000..d4e3f84
--- /dev/null
+++ b/code/flume-ng-sinks/flume-ng-hbase-sink/src/main/java/org/apache/flume/sink/hbase/HbaseEventSerializer.java
@@ -0,0 +1,61 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.flume.sink.hbase;
+
+import java.util.List;
+
+import org.apache.flume.Event;
+import org.apache.flume.conf.Configurable;
+import org.apache.flume.conf.ConfigurableComponent;
+import org.apache.hadoop.hbase.client.Increment;
+import org.apache.hadoop.hbase.client.Row;
+
+/**
+ * Interface for an event serializer which serializes the headers and body
+ * of an event to write them to hbase. This is configurable, so any config
+ * params required should be taken through this. Only the column family is
+ * passed in. The columns should exist in the table and column family
+ * specified in the configuration for the HbaseSink.
+ */
+public interface HbaseEventSerializer extends Configurable, ConfigurableComponent {
+ /**
+ * Initialize the event serializer.
+ * @param event Event to be written to HBase
+ * @param columnFamily Column family to write to
+ */
+ public void initialize(Event event, byte[] columnFamily);
+
+ /**
+ * Get the actions that should be written out to hbase as a result of this
+ * event. This list is written to hbase using the HBase batch API.
+ * @return List of {@link org.apache.hadoop.hbase.client.Row} which
+ * are written as such to HBase.
+ *
+ * 0.92 increments do not implement Row, so this is not generic.
+ *
+ */
+ public List getActions();
+
+ public List getIncrements();
+
+ /*
+ * Clean up any state. This will be called when the sink is being stopped.
+ */
+ public void close();
+}
diff --git a/code/flume-ng-sinks/flume-ng-hbase-sink/src/main/java/org/apache/flume/sink/hbase/KfkAsyncHbaseEventSerializer.java b/code/flume-ng-sinks/flume-ng-hbase-sink/src/main/java/org/apache/flume/sink/hbase/KfkAsyncHbaseEventSerializer.java
new file mode 100644
index 0000000..126b300
--- /dev/null
+++ b/code/flume-ng-sinks/flume-ng-hbase-sink/src/main/java/org/apache/flume/sink/hbase/KfkAsyncHbaseEventSerializer.java
@@ -0,0 +1,155 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.flume.sink.hbase;
+
+import com.google.common.base.Charsets;
+import org.apache.flume.Context;
+import org.apache.flume.Event;
+import org.apache.flume.FlumeException;
+import org.apache.flume.conf.ComponentConfiguration;
+import org.apache.flume.sink.hbase.SimpleHbaseEventSerializer.KeyType;
+import org.hbase.async.AtomicIncrementRequest;
+import org.hbase.async.PutRequest;
+
+import java.util.ArrayList;
+import java.util.List;
+
+/**
+ * A simple serializer to be used with the AsyncHBaseSink
+ * that returns puts from an event, by writing the event
+ * body into it. The headers are discarded. It also updates a row in hbase
+ * which acts as an event counter.
+ *
+ * Takes optional parameters:
+ * rowPrefix: The prefix to be used. Default: default
+ * incrementRow The row to increment. Default: incRow
+ * suffix:uuid/random/timestamp.Default: uuid
+ *
+ * Mandatory parameters:
+ * cf:Column family.
+ * Components that have no defaults and will not be used if absent:
+ * payloadColumn: Which column to put payload in. If it is not present,
+ * event data will not be written.
+ * incrementColumn: Which column to increment. If this is absent, it
+ * means no column is incremented.
+ */
+public class KfkAsyncHbaseEventSerializer implements AsyncHbaseEventSerializer {
+ private byte[] table;
+ private byte[] cf;
+ private byte[] payload;
+ private byte[] payloadColumn;
+ private byte[] incrementColumn;
+ private String rowPrefix;
+ private byte[] incrementRow;
+ private KeyType keyType;
+
+ @Override
+ public void initialize(byte[] table, byte[] cf) {
+ this.table = table;
+ this.cf = cf;
+ }
+
+ @Override
+ public List getActions() {
+ List actions = new ArrayList<>();
+ if (payloadColumn != null) {
+ byte[] rowKey;
+ try {
+ /*---------------------------代码修改开始---------------------------------*/
+ //解析列字段
+ String[] columns = new String(this.payloadColumn).split(",");
+ //解析flume采集过来的每行的值
+ String[] values = new String(this.payload).split(",");
+ for(int i=0;i < columns.length;i++) {
+ byte[] colColumn = columns[i].getBytes();
+ byte[] colValue = values[i].getBytes(Charsets.UTF_8);
+
+ //数据校验:字段和值是否对应
+ if (colColumn.length != colValue.length) break;
+
+ //时间
+ String datetime = values[0].toString();
+ //用户id
+ String userid = values[1].toString();
+ //根据业务自定义Rowkey
+ rowKey = SimpleRowKeyGenerator.getKfkRowKey(userid, datetime);
+ //插入数据
+ PutRequest putRequest = new PutRequest(table, rowKey, cf,
+ colColumn, colValue);
+ actions.add(putRequest);
+ /*---------------------------代码修改结束---------------------------------*/
+ }
+ } catch (Exception e) {
+ throw new FlumeException("Could not get row key!", e);
+ }
+ }
+ return actions;
+ }
+
+ public List getIncrements() {
+ List actions = new ArrayList();
+ if (incrementColumn != null) {
+ AtomicIncrementRequest inc = new AtomicIncrementRequest(table,
+ incrementRow, cf, incrementColumn);
+ actions.add(inc);
+ }
+ return actions;
+ }
+
+ @Override
+ public void cleanUp() {
+ // TODO Auto-generated method stub
+
+ }
+
+ @Override
+ public void configure(Context context) {
+ String pCol = context.getString("payloadColumn", "pCol");
+ String iCol = context.getString("incrementColumn", "iCol");
+ rowPrefix = context.getString("rowPrefix", "default");
+ String suffix = context.getString("suffix", "uuid");
+ if (pCol != null && !pCol.isEmpty()) {
+ if (suffix.equals("timestamp")) {
+ keyType = KeyType.TS;
+ } else if (suffix.equals("random")) {
+ keyType = KeyType.RANDOM;
+ } else if (suffix.equals("nano")) {
+ keyType = KeyType.TSNANO;
+ } else {
+ keyType = KeyType.UUID;
+ }
+ payloadColumn = pCol.getBytes(Charsets.UTF_8);
+ }
+ if (iCol != null && !iCol.isEmpty()) {
+ incrementColumn = iCol.getBytes(Charsets.UTF_8);
+ }
+ incrementRow = context.getString("incrementRow", "incRow").getBytes(Charsets.UTF_8);
+ }
+
+ @Override
+ public void setEvent(Event event) {
+ this.payload = event.getBody();
+ }
+
+ @Override
+ public void configure(ComponentConfiguration conf) {
+ // TODO Auto-generated method stub
+ }
+
+}
diff --git a/code/flume-ng-sinks/flume-ng-hbase-sink/src/main/java/org/apache/flume/sink/hbase/RegexHbaseEventSerializer.java b/code/flume-ng-sinks/flume-ng-hbase-sink/src/main/java/org/apache/flume/sink/hbase/RegexHbaseEventSerializer.java
new file mode 100644
index 0000000..8342d67
--- /dev/null
+++ b/code/flume-ng-sinks/flume-ng-hbase-sink/src/main/java/org/apache/flume/sink/hbase/RegexHbaseEventSerializer.java
@@ -0,0 +1,215 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.flume.sink.hbase;
+
+import com.google.common.base.Charsets;
+import com.google.common.collect.Lists;
+import org.apache.commons.lang.RandomStringUtils;
+import org.apache.flume.Context;
+import org.apache.flume.Event;
+import org.apache.flume.FlumeException;
+import org.apache.flume.conf.ComponentConfiguration;
+import org.apache.hadoop.hbase.client.Increment;
+import org.apache.hadoop.hbase.client.Put;
+import org.apache.hadoop.hbase.client.Row;
+
+import java.nio.charset.Charset;
+import java.util.Calendar;
+import java.util.List;
+import java.util.Map;
+import java.util.concurrent.atomic.AtomicInteger;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+/**
+ * An {@link HbaseEventSerializer} which parses columns based on a supplied
+ * regular expression and column name list.
+ *
+ * Note that if the regular expression does not return the correct number of
+ * groups for a particular event, or it does not correctly match an event,
+ * the event is silently dropped.
+ *
+ * Row keys for each event consist of a timestamp concatenated with an
+ * identifier which enforces uniqueness of keys across flume agents.
+ *
+ * See static constant variables for configuration options.
+ */
+public class RegexHbaseEventSerializer implements HbaseEventSerializer {
+ // Config vars
+ /** Regular expression used to parse groups from event data. */
+ public static final String REGEX_CONFIG = "regex";
+ public static final String REGEX_DEFAULT = "(.*)";
+
+ /** Whether to ignore case when performing regex matches. */
+ public static final String IGNORE_CASE_CONFIG = "regexIgnoreCase";
+ public static final boolean INGORE_CASE_DEFAULT = false;
+
+ /** Comma separated list of column names to place matching groups in. */
+ public static final String COL_NAME_CONFIG = "colNames";
+ public static final String COLUMN_NAME_DEFAULT = "payload";
+
+ /** Index of the row key in matched regex groups */
+ public static final String ROW_KEY_INDEX_CONFIG = "rowKeyIndex";
+
+ /** Placeholder in colNames for row key */
+ public static final String ROW_KEY_NAME = "ROW_KEY";
+
+ /** Whether to deposit event headers into corresponding column qualifiers */
+ public static final String DEPOSIT_HEADERS_CONFIG = "depositHeaders";
+ public static final boolean DEPOSIT_HEADERS_DEFAULT = false;
+
+ /** What charset to use when serializing into HBase's byte arrays */
+ public static final String CHARSET_CONFIG = "charset";
+ public static final String CHARSET_DEFAULT = "UTF-8";
+
+ /* This is a nonce used in HBase row-keys, such that the same row-key
+ * never gets written more than once from within this JVM. */
+ protected static final AtomicInteger nonce = new AtomicInteger(0);
+ protected static String randomKey = RandomStringUtils.randomAlphanumeric(10);
+
+ protected byte[] cf;
+ private byte[] payload;
+ private List colNames = Lists.newArrayList();
+ private Map headers;
+ private boolean regexIgnoreCase;
+ private boolean depositHeaders;
+ private Pattern inputPattern;
+ private Charset charset;
+ private int rowKeyIndex;
+
+ @Override
+ public void configure(Context context) {
+ String regex = context.getString(REGEX_CONFIG, REGEX_DEFAULT);
+ regexIgnoreCase = context.getBoolean(IGNORE_CASE_CONFIG,
+ INGORE_CASE_DEFAULT);
+ depositHeaders = context.getBoolean(DEPOSIT_HEADERS_CONFIG,
+ DEPOSIT_HEADERS_DEFAULT);
+ inputPattern = Pattern.compile(regex, Pattern.DOTALL
+ + (regexIgnoreCase ? Pattern.CASE_INSENSITIVE : 0));
+ charset = Charset.forName(context.getString(CHARSET_CONFIG,
+ CHARSET_DEFAULT));
+
+ String colNameStr = context.getString(COL_NAME_CONFIG, COLUMN_NAME_DEFAULT);
+ String[] columnNames = colNameStr.split(",");
+ for (String s : columnNames) {
+ colNames.add(s.getBytes(charset));
+ }
+
+ //Rowkey is optional, default is -1
+ rowKeyIndex = context.getInteger(ROW_KEY_INDEX_CONFIG, -1);
+ //if row key is being used, make sure it is specified correct
+ if (rowKeyIndex >= 0) {
+ if (rowKeyIndex >= columnNames.length) {
+ throw new IllegalArgumentException(ROW_KEY_INDEX_CONFIG + " must be " +
+ "less than num columns " + columnNames.length);
+ }
+ if (!ROW_KEY_NAME.equalsIgnoreCase(columnNames[rowKeyIndex])) {
+ throw new IllegalArgumentException("Column at " + rowKeyIndex + " must be "
+ + ROW_KEY_NAME + " and is " + columnNames[rowKeyIndex]);
+ }
+ }
+ }
+
+ @Override
+ public void configure(ComponentConfiguration conf) {
+ }
+
+ @Override
+ public void initialize(Event event, byte[] columnFamily) {
+ this.headers = event.getHeaders();
+ this.payload = event.getBody();
+ this.cf = columnFamily;
+ }
+
+ /**
+ * Returns a row-key with the following format:
+ * [time in millis]-[random key]-[nonce]
+ */
+ protected byte[] getRowKey(Calendar cal) {
+ /* NOTE: This key generation strategy has the following properties:
+ *
+ * 1) Within a single JVM, the same row key will never be duplicated.
+ * 2) Amongst any two JVM's operating at different time periods (according
+ * to their respective clocks), the same row key will never be
+ * duplicated.
+ * 3) Amongst any two JVM's operating concurrently (according to their
+ * respective clocks), the odds of duplicating a row-key are non-zero
+ * but infinitesimal. This would require simultaneous collision in (a)
+ * the timestamp (b) the respective nonce and (c) the random string.
+ * The string is necessary since (a) and (b) could collide if a fleet
+ * of Flume agents are restarted in tandem.
+ *
+ * Row-key uniqueness is important because conflicting row-keys will cause
+ * data loss. */
+ String rowKey = String.format("%s-%s-%s", cal.getTimeInMillis(),
+ randomKey, nonce.getAndIncrement());
+ return rowKey.getBytes(charset);
+ }
+
+ protected byte[] getRowKey() {
+ return getRowKey(Calendar.getInstance());
+ }
+
+ @Override
+ public List getActions() throws FlumeException {
+ List actions = Lists.newArrayList();
+ byte[] rowKey;
+ Matcher m = inputPattern.matcher(new String(payload, charset));
+ if (!m.matches()) {
+ return Lists.newArrayList();
+ }
+
+ if (m.groupCount() != colNames.size()) {
+ return Lists.newArrayList();
+ }
+
+ try {
+ if (rowKeyIndex < 0) {
+ rowKey = getRowKey();
+ } else {
+ rowKey = m.group(rowKeyIndex + 1).getBytes(Charsets.UTF_8);
+ }
+ Put put = new Put(rowKey);
+
+ for (int i = 0; i < colNames.size(); i++) {
+ if (i != rowKeyIndex) {
+ put.add(cf, colNames.get(i), m.group(i + 1).getBytes(Charsets.UTF_8));
+ }
+ }
+ if (depositHeaders) {
+ for (Map.Entry entry : headers.entrySet()) {
+ put.add(cf, entry.getKey().getBytes(charset), entry.getValue().getBytes(charset));
+ }
+ }
+ actions.add(put);
+ } catch (Exception e) {
+ throw new FlumeException("Could not get row key!", e);
+ }
+ return actions;
+ }
+
+ @Override
+ public List getIncrements() {
+ return Lists.newArrayList();
+ }
+
+ @Override
+ public void close() {
+ }
+}
\ No newline at end of file
diff --git a/code/flume-ng-sinks/flume-ng-hbase-sink/src/main/java/org/apache/flume/sink/hbase/SimpleAsyncHbaseEventSerializer.java b/code/flume-ng-sinks/flume-ng-hbase-sink/src/main/java/org/apache/flume/sink/hbase/SimpleAsyncHbaseEventSerializer.java
new file mode 100644
index 0000000..3f442e8
--- /dev/null
+++ b/code/flume-ng-sinks/flume-ng-hbase-sink/src/main/java/org/apache/flume/sink/hbase/SimpleAsyncHbaseEventSerializer.java
@@ -0,0 +1,148 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.flume.sink.hbase;
+
+import com.google.common.base.Charsets;
+import org.apache.flume.Context;
+import org.apache.flume.Event;
+import org.apache.flume.FlumeException;
+import org.apache.flume.conf.ComponentConfiguration;
+import org.apache.flume.sink.hbase.SimpleHbaseEventSerializer.KeyType;
+import org.hbase.async.AtomicIncrementRequest;
+import org.hbase.async.PutRequest;
+
+import java.util.ArrayList;
+import java.util.List;
+
+/**
+ * A simple serializer to be used with the AsyncHBaseSink
+ * that returns puts from an event, by writing the event
+ * body into it. The headers are discarded. It also updates a row in hbase
+ * which acts as an event counter.
+ *
+ * Takes optional parameters:
+ * rowPrefix: The prefix to be used. Default: default
+ * incrementRow The row to increment. Default: incRow
+ * suffix:uuid/random/timestamp.Default: uuid
+ *
+ * Mandatory parameters:
+ * cf:Column family.
+ * Components that have no defaults and will not be used if absent:
+ * payloadColumn: Which column to put payload in. If it is not present,
+ * event data will not be written.
+ * incrementColumn: Which column to increment. If this is absent, it
+ * means no column is incremented.
+ */
+public class SimpleAsyncHbaseEventSerializer implements AsyncHbaseEventSerializer {
+ private byte[] table;
+ private byte[] cf;
+ private byte[] payload;
+ private byte[] payloadColumn;
+ private byte[] incrementColumn;
+ private String rowPrefix;
+ private byte[] incrementRow;
+ private KeyType keyType;
+
+ @Override
+ public void initialize(byte[] table, byte[] cf) {
+ this.table = table;
+ this.cf = cf;
+ }
+
+ @Override
+ public List getActions() {
+ List actions = new ArrayList();
+ if (payloadColumn != null) {
+ byte[] rowKey;
+ try {
+ switch (keyType) {
+ case TS:
+ rowKey = SimpleRowKeyGenerator.getTimestampKey(rowPrefix);
+ break;
+ case TSNANO:
+ rowKey = SimpleRowKeyGenerator.getNanoTimestampKey(rowPrefix);
+ break;
+ case RANDOM:
+ rowKey = SimpleRowKeyGenerator.getRandomKey(rowPrefix);
+ break;
+ default:
+ rowKey = SimpleRowKeyGenerator.getUUIDKey(rowPrefix);
+ break;
+ }
+ PutRequest putRequest = new PutRequest(table, rowKey, cf,
+ payloadColumn, payload);
+ actions.add(putRequest);
+ } catch (Exception e) {
+ throw new FlumeException("Could not get row key!", e);
+ }
+ }
+ return actions;
+ }
+
+ public List getIncrements() {
+ List actions = new ArrayList();
+ if (incrementColumn != null) {
+ AtomicIncrementRequest inc = new AtomicIncrementRequest(table,
+ incrementRow, cf, incrementColumn);
+ actions.add(inc);
+ }
+ return actions;
+ }
+
+ @Override
+ public void cleanUp() {
+ // TODO Auto-generated method stub
+
+ }
+
+ @Override
+ public void configure(Context context) {
+ String pCol = context.getString("payloadColumn", "pCol");
+ String iCol = context.getString("incrementColumn", "iCol");
+ rowPrefix = context.getString("rowPrefix", "default");
+ String suffix = context.getString("suffix", "uuid");
+ if (pCol != null && !pCol.isEmpty()) {
+ if (suffix.equals("timestamp")) {
+ keyType = KeyType.TS;
+ } else if (suffix.equals("random")) {
+ keyType = KeyType.RANDOM;
+ } else if (suffix.equals("nano")) {
+ keyType = KeyType.TSNANO;
+ } else {
+ keyType = KeyType.UUID;
+ }
+ payloadColumn = pCol.getBytes(Charsets.UTF_8);
+ }
+ if (iCol != null && !iCol.isEmpty()) {
+ incrementColumn = iCol.getBytes(Charsets.UTF_8);
+ }
+ incrementRow = context.getString("incrementRow", "incRow").getBytes(Charsets.UTF_8);
+ }
+
+ @Override
+ public void setEvent(Event event) {
+ this.payload = event.getBody();
+ }
+
+ @Override
+ public void configure(ComponentConfiguration conf) {
+ // TODO Auto-generated method stub
+ }
+
+}
diff --git a/code/flume-ng-sinks/flume-ng-hbase-sink/src/main/java/org/apache/flume/sink/hbase/SimpleHbaseEventSerializer.java b/code/flume-ng-sinks/flume-ng-hbase-sink/src/main/java/org/apache/flume/sink/hbase/SimpleHbaseEventSerializer.java
new file mode 100644
index 0000000..dc89fd7
--- /dev/null
+++ b/code/flume-ng-sinks/flume-ng-hbase-sink/src/main/java/org/apache/flume/sink/hbase/SimpleHbaseEventSerializer.java
@@ -0,0 +1,146 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.flume.sink.hbase;
+
+import com.google.common.base.Charsets;
+import org.apache.flume.Context;
+import org.apache.flume.Event;
+import org.apache.flume.FlumeException;
+import org.apache.flume.conf.ComponentConfiguration;
+import org.apache.hadoop.hbase.client.Increment;
+import org.apache.hadoop.hbase.client.Put;
+import org.apache.hadoop.hbase.client.Row;
+
+import java.util.LinkedList;
+import java.util.List;
+
+/**
+ * A simple serializer that returns puts from an event, by writing the event
+ * body into it. The headers are discarded. It also updates a row in hbase
+ * which acts as an event counter.
+ *
Takes optional parameters:
+ * rowPrefix: The prefix to be used. Default: default
+ * incrementRow The row to increment. Default: incRow
+ * suffix:uuid/random/timestamp.Default: uuid
+ *
Mandatory parameters:
+ * cf:Column family.
+ * Components that have no defaults and will not be used if null:
+ * payloadColumn: Which column to put payload in. If it is null,
+ * event data will not be written.
+ * incColumn: Which column to increment. Null means no column is
+ * incremented.
+ */
+public class SimpleHbaseEventSerializer implements HbaseEventSerializer {
+ private String rowPrefix;
+ private byte[] incrementRow;
+ private byte[] cf;
+ private byte[] plCol;
+ private byte[] incCol;
+ private KeyType keyType;
+ private byte[] payload;
+
+ public SimpleHbaseEventSerializer() {
+ }
+
+ @Override
+ public void configure(Context context) {
+ rowPrefix = context.getString("rowPrefix", "default");
+ incrementRow =
+ context.getString("incrementRow", "incRow").getBytes(Charsets.UTF_8);
+ String suffix = context.getString("suffix", "uuid");
+
+ String payloadColumn = context.getString("payloadColumn", "pCol");
+ String incColumn = context.getString("incrementColumn", "iCol");
+ if (payloadColumn != null && !payloadColumn.isEmpty()) {
+ if (suffix.equals("timestamp")) {
+ keyType = KeyType.TS;
+ } else if (suffix.equals("random")) {
+ keyType = KeyType.RANDOM;
+ } else if (suffix.equals("nano")) {
+ keyType = KeyType.TSNANO;
+ } else {
+ keyType = KeyType.UUID;
+ }
+ plCol = payloadColumn.getBytes(Charsets.UTF_8);
+ }
+ if (incColumn != null && !incColumn.isEmpty()) {
+ incCol = incColumn.getBytes(Charsets.UTF_8);
+ }
+ }
+
+ @Override
+ public void configure(ComponentConfiguration conf) {
+ }
+
+ @Override
+ public void initialize(Event event, byte[] cf) {
+ this.payload = event.getBody();
+ this.cf = cf;
+ }
+
+ @Override
+ public List getActions() throws FlumeException {
+ List actions = new LinkedList();
+ if (plCol != null) {
+ byte[] rowKey;
+ try {
+ if (keyType == KeyType.TS) {
+ rowKey = SimpleRowKeyGenerator.getTimestampKey(rowPrefix);
+ } else if (keyType == KeyType.RANDOM) {
+ rowKey = SimpleRowKeyGenerator.getRandomKey(rowPrefix);
+ } else if (keyType == KeyType.TSNANO) {
+ rowKey = SimpleRowKeyGenerator.getNanoTimestampKey(rowPrefix);
+ } else {
+ rowKey = SimpleRowKeyGenerator.getUUIDKey(rowPrefix);
+ }
+ Put put = new Put(rowKey);
+ put.add(cf, plCol, payload);
+ actions.add(put);
+ } catch (Exception e) {
+ throw new FlumeException("Could not get row key!", e);
+ }
+
+ }
+ return actions;
+ }
+
+ @Override
+ public List getIncrements() {
+ List incs = new LinkedList();
+ if (incCol != null) {
+ Increment inc = new Increment(incrementRow);
+ inc.addColumn(cf, incCol, 1);
+ incs.add(inc);
+ }
+ return incs;
+ }
+
+ @Override
+ public void close() {
+ }
+
+ public enum KeyType {
+ UUID,
+ RANDOM,
+ TS,
+ TSNANO;
+ }
+
+}
diff --git a/code/flume-ng-sinks/flume-ng-hbase-sink/src/main/java/org/apache/flume/sink/hbase/SimpleRowKeyGenerator.java b/code/flume-ng-sinks/flume-ng-hbase-sink/src/main/java/org/apache/flume/sink/hbase/SimpleRowKeyGenerator.java
new file mode 100644
index 0000000..0cabd2c
--- /dev/null
+++ b/code/flume-ng-sinks/flume-ng-hbase-sink/src/main/java/org/apache/flume/sink/hbase/SimpleRowKeyGenerator.java
@@ -0,0 +1,50 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.flume.sink.hbase;
+
+import java.io.UnsupportedEncodingException;
+import java.util.Random;
+import java.util.UUID;
+
+/**
+ * Utility class for users to generate their own keys. Any key can be used,
+ * this is just a utility that provides a set of simple keys.
+ */
+public class SimpleRowKeyGenerator {
+
+ public static byte[] getUUIDKey(String prefix) throws UnsupportedEncodingException {
+ return (prefix + UUID.randomUUID().toString()).getBytes("UTF8");
+ }
+
+ public static byte[] getRandomKey(String prefix) throws UnsupportedEncodingException {
+ return (prefix + String.valueOf(new Random().nextLong())).getBytes("UTF8");
+ }
+
+ public static byte[] getTimestampKey(String prefix) throws UnsupportedEncodingException {
+ return (prefix + String.valueOf(System.currentTimeMillis())).getBytes("UTF8");
+ }
+
+ public static byte[] getNanoTimestampKey(String prefix) throws UnsupportedEncodingException {
+ return (prefix + String.valueOf(System.nanoTime())).getBytes("UTF8");
+ }
+ public static byte[] getKfkRowKey(String userid,String datetime)throws UnsupportedEncodingException {
+ return (userid + datetime + String.valueOf(System.currentTimeMillis())).getBytes("UTF8");
+ }
+
+}
diff --git a/code/flume-ng-sinks/flume-ng-hbase-sink/src/test/java/org/apache/flume/sink/hbase/IncrementAsyncHBaseSerializer.java b/code/flume-ng-sinks/flume-ng-hbase-sink/src/test/java/org/apache/flume/sink/hbase/IncrementAsyncHBaseSerializer.java
new file mode 100644
index 0000000..9a2be5a
--- /dev/null
+++ b/code/flume-ng-sinks/flume-ng-hbase-sink/src/test/java/org/apache/flume/sink/hbase/IncrementAsyncHBaseSerializer.java
@@ -0,0 +1,78 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.flume.sink.hbase;
+
+import org.apache.flume.Context;
+import org.apache.flume.Event;
+import org.apache.flume.conf.ComponentConfiguration;
+import org.hbase.async.AtomicIncrementRequest;
+import org.hbase.async.PutRequest;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+
+/**
+ * An AsyncHBaseEventSerializer implementation that increments a configured
+ * column for the row whose row key is the event's body bytes.
+ */
+public class IncrementAsyncHBaseSerializer implements AsyncHbaseEventSerializer {
+ private byte[] table;
+ private byte[] cf;
+ private byte[] column;
+ private Event currentEvent;
+
+ @Override
+ public void initialize(byte[] table, byte[] cf) {
+ this.table = table;
+ this.cf = cf;
+ }
+
+ @Override
+ public void setEvent(Event event) {
+ this.currentEvent = event;
+ }
+
+ @Override
+ public List getActions() {
+ return Collections.emptyList();
+ }
+
+ @Override
+ public List getIncrements() {
+ List incrs = new ArrayList();
+ AtomicIncrementRequest incr = new AtomicIncrementRequest(table,
+ currentEvent.getBody(), cf, column, 1);
+ incrs.add(incr);
+ return incrs;
+ }
+
+ @Override
+ public void cleanUp() {
+ }
+
+ @Override
+ public void configure(Context context) {
+ column = context.getString("column", "col").getBytes();
+ }
+
+ @Override
+ public void configure(ComponentConfiguration conf) {
+ }
+}
diff --git a/code/flume-ng-sinks/flume-ng-hbase-sink/src/test/java/org/apache/flume/sink/hbase/IncrementHBaseSerializer.java b/code/flume-ng-sinks/flume-ng-hbase-sink/src/test/java/org/apache/flume/sink/hbase/IncrementHBaseSerializer.java
new file mode 100644
index 0000000..b4343eb
--- /dev/null
+++ b/code/flume-ng-sinks/flume-ng-hbase-sink/src/test/java/org/apache/flume/sink/hbase/IncrementHBaseSerializer.java
@@ -0,0 +1,80 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.flume.sink.hbase;
+
+import com.google.common.annotations.VisibleForTesting;
+import com.google.common.base.Charsets;
+import com.google.common.collect.Lists;
+import java.util.Collections;
+import org.apache.flume.Context;
+import org.apache.flume.Event;
+import org.apache.flume.conf.ComponentConfiguration;
+import org.apache.hadoop.hbase.client.Increment;
+import org.apache.hadoop.hbase.client.Row;
+
+import java.util.List;
+
+/**
+ * For Increment-related unit tests.
+ */
+class IncrementHBaseSerializer implements HbaseEventSerializer, BatchAware {
+ private Event event;
+ private byte[] family;
+ private int numBatchesStarted = 0;
+
+ @Override public void configure(Context context) { }
+ @Override public void configure(ComponentConfiguration conf) { }
+ @Override public void close() { }
+
+ @Override
+ public void initialize(Event event, byte[] columnFamily) {
+ this.event = event;
+ this.family = columnFamily;
+ }
+
+ // This class only creates Increments.
+ @Override
+ public List getActions() {
+ return Collections.emptyList();
+ }
+
+ // Treat each Event as a String, i,e, "row:qualifier".
+ @Override
+ public List getIncrements() {
+ List increments = Lists.newArrayList();
+ String body = new String(event.getBody(), Charsets.UTF_8);
+ String[] pieces = body.split(":");
+ String row = pieces[0];
+ String qualifier = pieces[1];
+ Increment inc = new Increment(row.getBytes(Charsets.UTF_8));
+ inc.addColumn(family, qualifier.getBytes(Charsets.UTF_8), 1L);
+ increments.add(inc);
+ return increments;
+ }
+
+ @Override
+ public void onBatchStart() {
+ numBatchesStarted++;
+ }
+
+ @VisibleForTesting
+ public int getNumBatchesStarted() {
+ return numBatchesStarted;
+ }
+}
diff --git a/code/flume-ng-sinks/flume-ng-hbase-sink/src/test/java/org/apache/flume/sink/hbase/MockSimpleHbaseEventSerializer.java b/code/flume-ng-sinks/flume-ng-hbase-sink/src/test/java/org/apache/flume/sink/hbase/MockSimpleHbaseEventSerializer.java
new file mode 100644
index 0000000..9b2a850
--- /dev/null
+++ b/code/flume-ng-sinks/flume-ng-hbase-sink/src/test/java/org/apache/flume/sink/hbase/MockSimpleHbaseEventSerializer.java
@@ -0,0 +1,38 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.flume.sink.hbase;
+
+import java.util.List;
+
+import org.apache.flume.FlumeException;
+import org.apache.hadoop.hbase.client.Row;
+
+class MockSimpleHbaseEventSerializer extends SimpleHbaseEventSerializer {
+
+ public static boolean throwException = false;
+
+ @Override
+ public List getActions() throws FlumeException {
+ if (throwException) {
+ throw new FlumeException("Exception for testing");
+ }
+ return super.getActions();
+ }
+}
\ No newline at end of file
diff --git a/code/flume-ng-sinks/flume-ng-hbase-sink/src/test/java/org/apache/flume/sink/hbase/TestAsyncHBaseSink.java b/code/flume-ng-sinks/flume-ng-hbase-sink/src/test/java/org/apache/flume/sink/hbase/TestAsyncHBaseSink.java
new file mode 100644
index 0000000..f8faa1e
--- /dev/null
+++ b/code/flume-ng-sinks/flume-ng-hbase-sink/src/test/java/org/apache/flume/sink/hbase/TestAsyncHBaseSink.java
@@ -0,0 +1,618 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.flume.sink.hbase;
+
+import java.io.IOException;
+import java.lang.management.ManagementFactory;
+import java.lang.management.OperatingSystemMXBean;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.Map;
+
+import org.apache.flume.Channel;
+import org.apache.flume.Context;
+import org.apache.flume.Event;
+import org.apache.flume.EventDeliveryException;
+import org.apache.flume.FlumeException;
+import org.apache.flume.Transaction;
+import org.apache.flume.Sink.Status;
+import org.apache.flume.channel.MemoryChannel;
+import org.apache.flume.conf.Configurables;
+import org.apache.flume.event.EventBuilder;
+import org.apache.hadoop.hbase.HBaseTestingUtility;
+import org.apache.hadoop.hbase.HConstants;
+import org.apache.hadoop.hbase.client.HTable;
+import org.apache.hadoop.hbase.client.Result;
+import org.apache.hadoop.hbase.client.ResultScanner;
+import org.apache.hadoop.hbase.client.Scan;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.hbase.zookeeper.ZKConfig;
+import org.junit.AfterClass;
+import org.junit.Assert;
+import org.junit.BeforeClass;
+import org.junit.Ignore;
+import org.junit.Test;
+
+import com.google.common.primitives.Longs;
+import com.sun.management.UnixOperatingSystemMXBean;
+
+import org.junit.After;
+
+public class TestAsyncHBaseSink {
+ private static HBaseTestingUtility testUtility = new HBaseTestingUtility();
+
+ private static String tableName = "TestHbaseSink";
+ private static String columnFamily = "TestColumnFamily";
+ private static String inColumn = "iCol";
+ private static String plCol = "pCol";
+ private static Context ctx = new Context();
+ private static String valBase = "testing hbase sink: jham";
+ private boolean deleteTable = true;
+ private static OperatingSystemMXBean os;
+
+
+ @BeforeClass
+ public static void setUp() throws Exception {
+ testUtility.startMiniCluster();
+
+ Map ctxMap = new HashMap();
+ ctxMap.put("table", tableName);
+ ctxMap.put("columnFamily", columnFamily);
+ ctxMap.put("serializer",
+ "org.apache.flume.sink.hbase.SimpleAsyncHbaseEventSerializer");
+ ctxMap.put("serializer.payloadColumn", plCol);
+ ctxMap.put("serializer.incrementColumn", inColumn);
+ ctxMap.put("keep-alive", "0");
+ ctxMap.put("timeout", "10000");
+ ctx.putAll(ctxMap);
+
+ os = ManagementFactory.getOperatingSystemMXBean();
+ }
+
+ @AfterClass
+ public static void tearDown() throws Exception {
+ testUtility.shutdownMiniCluster();
+ }
+
+ @After
+ public void tearDownTest() throws Exception {
+ if (deleteTable) {
+ testUtility.deleteTable(tableName.getBytes());
+ }
+ }
+
+ @Test
+ public void testOneEventWithDefaults() throws Exception {
+ Map ctxMap = new HashMap();
+ ctxMap.put("table", tableName);
+ ctxMap.put("columnFamily", columnFamily);
+ ctxMap.put("serializer",
+ "org.apache.flume.sink.hbase.SimpleAsyncHbaseEventSerializer");
+ ctxMap.put("keep-alive", "0");
+ ctxMap.put("timeout", "10000");
+ Context tmpctx = new Context();
+ tmpctx.putAll(ctxMap);
+
+ testUtility.createTable(tableName.getBytes(), columnFamily.getBytes());
+ deleteTable = true;
+ AsyncHBaseSink sink = new AsyncHBaseSink(testUtility.getConfiguration());
+ Configurables.configure(sink, tmpctx);
+ Channel channel = new MemoryChannel();
+ Configurables.configure(channel, tmpctx);
+ sink.setChannel(channel);
+ sink.start();
+ Transaction tx = channel.getTransaction();
+ tx.begin();
+ Event e = EventBuilder.withBody(
+ Bytes.toBytes(valBase));
+ channel.put(e);
+ tx.commit();
+ tx.close();
+ Assert.assertFalse(sink.isConfNull());
+ sink.process();
+ sink.stop();
+ HTable table = new HTable(testUtility.getConfiguration(), tableName);
+ byte[][] results = getResults(table, 1);
+ byte[] out = results[0];
+ Assert.assertArrayEquals(e.getBody(), out);
+ out = results[1];
+ Assert.assertArrayEquals(Longs.toByteArray(1), out);
+ }
+
+ @Test
+ public void testOneEvent() throws Exception {
+ testUtility.createTable(tableName.getBytes(), columnFamily.getBytes());
+ deleteTable = true;
+ AsyncHBaseSink sink = new AsyncHBaseSink(testUtility.getConfiguration());
+ Configurables.configure(sink, ctx);
+ Channel channel = new MemoryChannel();
+ Configurables.configure(channel, ctx);
+ sink.setChannel(channel);
+ sink.start();
+ Transaction tx = channel.getTransaction();
+ tx.begin();
+ Event e = EventBuilder.withBody(
+ Bytes.toBytes(valBase));
+ channel.put(e);
+ tx.commit();
+ tx.close();
+ Assert.assertFalse(sink.isConfNull());
+ sink.process();
+ sink.stop();
+ HTable table = new HTable(testUtility.getConfiguration(), tableName);
+ byte[][] results = getResults(table, 1);
+ byte[] out = results[0];
+ Assert.assertArrayEquals(e.getBody(), out);
+ out = results[1];
+ Assert.assertArrayEquals(Longs.toByteArray(1), out);
+ }
+
+ @Test
+ public void testThreeEvents() throws Exception {
+ testUtility.createTable(tableName.getBytes(), columnFamily.getBytes());
+ deleteTable = true;
+ AsyncHBaseSink sink = new AsyncHBaseSink(testUtility.getConfiguration());
+ Configurables.configure(sink, ctx);
+ Channel channel = new MemoryChannel();
+ Configurables.configure(channel, ctx);
+ sink.setChannel(channel);
+ sink.start();
+ Transaction tx = channel.getTransaction();
+ tx.begin();
+ for (int i = 0; i < 3; i++) {
+ Event e = EventBuilder.withBody(Bytes.toBytes(valBase + "-" + i));
+ channel.put(e);
+ }
+ tx.commit();
+ tx.close();
+ Assert.assertFalse(sink.isConfNull());
+ sink.process();
+ sink.stop();
+ HTable table = new HTable(testUtility.getConfiguration(), tableName);
+ byte[][] results = getResults(table, 3);
+ byte[] out;
+ int found = 0;
+ for (int i = 0; i < 3; i++) {
+ for (int j = 0; j < 3; j++) {
+ if (Arrays.equals(results[j], Bytes.toBytes(valBase + "-" + i))) {
+ found++;
+ break;
+ }
+ }
+ }
+ Assert.assertEquals(3, found);
+ out = results[3];
+ Assert.assertArrayEquals(Longs.toByteArray(3), out);
+ }
+
+ //This will without FLUME-1842's timeout fix - but with FLUME-1842's testing
+ //oriented changes to the callback classes and using single threaded executor
+ //for tests.
+ @Test (expected = EventDeliveryException.class)
+ public void testTimeOut() throws Exception {
+ testUtility.createTable(tableName.getBytes(), columnFamily.getBytes());
+ deleteTable = true;
+ AsyncHBaseSink sink = new AsyncHBaseSink(testUtility.getConfiguration(), true, false);
+ Configurables.configure(sink, ctx);
+ Channel channel = new MemoryChannel();
+ Configurables.configure(channel, ctx);
+ sink.setChannel(channel);
+ channel.start();
+ sink.start();
+ Transaction tx = channel.getTransaction();
+ tx.begin();
+ for (int i = 0; i < 3; i++) {
+ Event e = EventBuilder.withBody(Bytes.toBytes(valBase + "-" + i));
+ channel.put(e);
+ }
+ tx.commit();
+ tx.close();
+ Assert.assertFalse(sink.isConfNull());
+ sink.process();
+ Assert.fail();
+ }
+
+ @Test
+ public void testMultipleBatches() throws Exception {
+ testUtility.createTable(tableName.getBytes(), columnFamily.getBytes());
+ deleteTable = true;
+ ctx.put("batchSize", "2");
+ AsyncHBaseSink sink = new AsyncHBaseSink(testUtility.getConfiguration());
+ Configurables.configure(sink, ctx);
+ //Reset the context to a higher batchSize
+ ctx.put("batchSize", "100");
+ Channel channel = new MemoryChannel();
+ Configurables.configure(channel, ctx);
+ sink.setChannel(channel);
+ sink.start();
+ Transaction tx = channel.getTransaction();
+ tx.begin();
+ for (int i = 0; i < 3; i++) {
+ Event e = EventBuilder.withBody(Bytes.toBytes(valBase + "-" + i));
+ channel.put(e);
+ }
+ tx.commit();
+ tx.close();
+ int count = 0;
+ Status status = Status.READY;
+ while (status != Status.BACKOFF) {
+ count++;
+ status = sink.process();
+ }
+ Assert.assertFalse(sink.isConfNull());
+ sink.stop();
+ Assert.assertEquals(2, count);
+ HTable table = new HTable(testUtility.getConfiguration(), tableName);
+ byte[][] results = getResults(table, 3);
+ byte[] out;
+ int found = 0;
+ for (int i = 0; i < 3; i++) {
+ for (int j = 0; j < 3; j++) {
+ if (Arrays.equals(results[j], Bytes.toBytes(valBase + "-" + i))) {
+ found++;
+ break;
+ }
+ }
+ }
+ Assert.assertEquals(3, found);
+ out = results[3];
+ Assert.assertArrayEquals(Longs.toByteArray(3), out);
+ }
+
+ @Test
+ public void testMultipleBatchesBatchIncrementsWithCoalescing() throws Exception {
+ doTestMultipleBatchesBatchIncrements(true);
+ }
+
+ @Test
+ public void testMultipleBatchesBatchIncrementsNoCoalescing() throws Exception {
+ doTestMultipleBatchesBatchIncrements(false);
+ }
+
+ public void doTestMultipleBatchesBatchIncrements(boolean coalesce) throws Exception {
+ testUtility.createTable(tableName.getBytes(), columnFamily.getBytes());
+ deleteTable = true;
+ AsyncHBaseSink sink = new AsyncHBaseSink(testUtility.getConfiguration(), false, true);
+ if (coalesce) {
+ ctx.put(HBaseSinkConfigurationConstants.CONFIG_COALESCE_INCREMENTS, "true");
+ }
+ ctx.put("batchSize", "2");
+ ctx.put("serializer", IncrementAsyncHBaseSerializer.class.getName());
+ ctx.put("serializer.column", "test");
+ Configurables.configure(sink, ctx);
+ //Reset the context to a higher batchSize
+ ctx.put("batchSize", "100");
+ // Restore the original serializer
+ ctx.put("serializer", SimpleAsyncHbaseEventSerializer.class.getName());
+ //Restore the no coalescing behavior
+ ctx.put(HBaseSinkConfigurationConstants.CONFIG_COALESCE_INCREMENTS,
+ "false");
+ Channel channel = new MemoryChannel();
+ Configurables.configure(channel, ctx);
+ sink.setChannel(channel);
+ sink.start();
+ Transaction tx = channel.getTransaction();
+ tx.begin();
+ for (int i = 0; i < 4; i++) {
+ for (int j = 0; j < 3; j++) {
+ Event e = EventBuilder.withBody(Bytes.toBytes(valBase + "-" + i));
+ channel.put(e);
+ }
+ }
+ tx.commit();
+ tx.close();
+ int count = 0;
+ Status status = Status.READY;
+ while (status != Status.BACKOFF) {
+ count++;
+ status = sink.process();
+ }
+ Assert.assertFalse(sink.isConfNull());
+ sink.stop();
+ Assert.assertEquals(7, count);
+ HTable table = new HTable(testUtility.getConfiguration(), tableName);
+ Scan scan = new Scan();
+ scan.addColumn(columnFamily.getBytes(), "test".getBytes());
+ scan.setStartRow(Bytes.toBytes(valBase));
+ ResultScanner rs = table.getScanner(scan);
+ int i = 0;
+ try {
+ for (Result r = rs.next(); r != null; r = rs.next()) {
+ byte[] out = r.getValue(columnFamily.getBytes(), "test".getBytes());
+ Assert.assertArrayEquals(Longs.toByteArray(3), out);
+ Assert.assertTrue(new String(r.getRow()).startsWith(valBase));
+ i++;
+ }
+ } finally {
+ rs.close();
+ }
+ Assert.assertEquals(4, i);
+ if (coalesce) {
+ Assert.assertEquals(8, sink.getTotalCallbacksReceived());
+ } else {
+ Assert.assertEquals(12, sink.getTotalCallbacksReceived());
+ }
+ }
+
+ @Test
+ public void testWithoutConfigurationObject() throws Exception {
+ testUtility.createTable(tableName.getBytes(), columnFamily.getBytes());
+ deleteTable = true;
+ ctx.put("batchSize", "2");
+ ctx.put(HBaseSinkConfigurationConstants.ZK_QUORUM,
+ ZKConfig.getZKQuorumServersString(testUtility.getConfiguration()));
+ ctx.put(HBaseSinkConfigurationConstants.ZK_ZNODE_PARENT,
+ testUtility.getConfiguration().get(HConstants.ZOOKEEPER_ZNODE_PARENT));
+ AsyncHBaseSink sink = new AsyncHBaseSink();
+ Configurables.configure(sink, ctx);
+ // Reset context to values usable by other tests.
+ ctx.put(HBaseSinkConfigurationConstants.ZK_QUORUM, null);
+ ctx.put(HBaseSinkConfigurationConstants.ZK_ZNODE_PARENT, null);
+ ctx.put("batchSize", "100");
+ Channel channel = new MemoryChannel();
+ Configurables.configure(channel, ctx);
+ sink.setChannel(channel);
+ sink.start();
+ Transaction tx = channel.getTransaction();
+ tx.begin();
+ for (int i = 0; i < 3; i++) {
+ Event e = EventBuilder.withBody(Bytes.toBytes(valBase + "-" + i));
+ channel.put(e);
+ }
+ tx.commit();
+ tx.close();
+ int count = 0;
+ Status status = Status.READY;
+ while (status != Status.BACKOFF) {
+ count++;
+ status = sink.process();
+ }
+ /*
+ * Make sure that the configuration was picked up from the context itself
+ * and not from a configuration object which was created by the sink.
+ */
+ Assert.assertTrue(sink.isConfNull());
+ sink.stop();
+ Assert.assertEquals(2, count);
+ HTable table = new HTable(testUtility.getConfiguration(), tableName);
+ byte[][] results = getResults(table, 3);
+ byte[] out;
+ int found = 0;
+ for (int i = 0; i < 3; i++) {
+ for (int j = 0; j < 3; j++) {
+ if (Arrays.equals(results[j], Bytes.toBytes(valBase + "-" + i))) {
+ found++;
+ break;
+ }
+ }
+ }
+ Assert.assertEquals(3, found);
+ out = results[3];
+ Assert.assertArrayEquals(Longs.toByteArray(3), out);
+ }
+
+ @Test(expected = FlumeException.class)
+ public void testMissingTable() throws Exception {
+ deleteTable = false;
+ ctx.put("batchSize", "2");
+ AsyncHBaseSink sink = new AsyncHBaseSink(testUtility.getConfiguration());
+ Configurables.configure(sink, ctx);
+ //Reset the context to a higher batchSize
+ ctx.put("batchSize", "100");
+ Channel channel = new MemoryChannel();
+ Configurables.configure(channel, ctx);
+ sink.setChannel(channel);
+ sink.start();
+ Transaction tx = channel.getTransaction();
+ tx.begin();
+ for (int i = 0; i < 3; i++) {
+ Event e = EventBuilder.withBody(Bytes.toBytes(valBase + "-" + i));
+ channel.put(e);
+ }
+ tx.commit();
+ tx.close();
+ sink.process();
+ Assert.assertFalse(sink.isConfNull());
+ HTable table = new HTable(testUtility.getConfiguration(), tableName);
+ byte[][] results = getResults(table, 2);
+ byte[] out;
+ int found = 0;
+ for (int i = 0; i < 2; i++) {
+ for (int j = 0; j < 2; j++) {
+ if (Arrays.equals(results[j], Bytes.toBytes(valBase + "-" + i))) {
+ found++;
+ break;
+ }
+ }
+ }
+ Assert.assertEquals(2, found);
+ out = results[2];
+ Assert.assertArrayEquals(Longs.toByteArray(2), out);
+ sink.process();
+ sink.stop();
+ }
+
+ // We only have support for getting File Descriptor count for Unix from the JDK
+ private long getOpenFileDescriptorCount() {
+ if (os instanceof UnixOperatingSystemMXBean) {
+ return ((UnixOperatingSystemMXBean) os).getOpenFileDescriptorCount();
+ } else {
+ return -1;
+ }
+ }
+
+ /*
+ * Before the fix for FLUME-2738, consistently File Descriptors were leaked with at least
+ * > 10 FDs being leaked for every single shutdown-reinitialize routine
+ * If there is a leak, then the increase in FDs should be way higher than
+ * 50 and if there is no leak, there should not be any substantial increase in
+ * FDs. This is over a set of 10 shutdown-reinitialize runs
+ * This test makes sure that there is no File Descriptor leak, by continuously
+ * failing transactions and shutting down and reinitializing the client every time
+ * and this test will fail if a leak is detected
+ */
+ @Test
+ public void testFDLeakOnShutdown() throws Exception {
+ if (getOpenFileDescriptorCount() < 0) {
+ return;
+ }
+ testUtility.createTable(tableName.getBytes(), columnFamily.getBytes());
+ deleteTable = true;
+ AsyncHBaseSink sink = new AsyncHBaseSink(testUtility.getConfiguration(),
+ true, false);
+ ctx.put("maxConsecutiveFails", "1");
+ Configurables.configure(sink, ctx);
+ Channel channel = new MemoryChannel();
+ Configurables.configure(channel, ctx);
+ sink.setChannel(channel);
+ channel.start();
+ sink.start();
+ Transaction tx = channel.getTransaction();
+ tx.begin();
+ for (int i = 0; i < 3; i++) {
+ Event e = EventBuilder.withBody(Bytes.toBytes(valBase + "-" + i));
+ channel.put(e);
+ }
+ tx.commit();
+ tx.close();
+ Assert.assertFalse(sink.isConfNull());
+ long initialFDCount = getOpenFileDescriptorCount();
+
+ // Since the isTimeOutTest is set to true, transaction will fail
+ // with EventDeliveryException
+ for (int i = 0; i < 10; i++) {
+ try {
+ sink.process();
+ } catch (EventDeliveryException ex) {
+ }
+ }
+ long increaseInFD = getOpenFileDescriptorCount() - initialFDCount;
+ Assert.assertTrue("File Descriptor leak detected. FDs have increased by " +
+ increaseInFD + " from an initial FD count of " + initialFDCount,
+ increaseInFD < 50);
+ }
+
+ /**
+ * This test must run last - it shuts down the minicluster :D
+ *
+ * @throws Exception
+ */
+ @Ignore("For dev builds only:" +
+ "This test takes too long, and this has to be run after all other" +
+ "tests, since it shuts down the minicluster. " +
+ "Comment out all other tests" +
+ "and uncomment this annotation to run this test.")
+ @Test(expected = EventDeliveryException.class)
+ public void testHBaseFailure() throws Exception {
+ ctx.put("batchSize", "2");
+ testUtility.createTable(tableName.getBytes(), columnFamily.getBytes());
+ deleteTable = false;
+ AsyncHBaseSink sink = new AsyncHBaseSink(testUtility.getConfiguration());
+ Configurables.configure(sink, ctx);
+ //Reset the context to a higher batchSize
+ ctx.put("batchSize", "100");
+ Channel channel = new MemoryChannel();
+ Configurables.configure(channel, ctx);
+ sink.setChannel(channel);
+ sink.start();
+ Transaction tx = channel.getTransaction();
+ tx.begin();
+ for (int i = 0; i < 3; i++) {
+ Event e = EventBuilder.withBody(Bytes.toBytes(valBase + "-" + i));
+ channel.put(e);
+ }
+ tx.commit();
+ tx.close();
+ sink.process();
+ Assert.assertFalse(sink.isConfNull());
+ HTable table = new HTable(testUtility.getConfiguration(), tableName);
+ byte[][] results = getResults(table, 2);
+ byte[] out;
+ int found = 0;
+ for (int i = 0; i < 2; i++) {
+ for (int j = 0; j < 2; j++) {
+ if (Arrays.equals(results[j], Bytes.toBytes(valBase + "-" + i))) {
+ found++;
+ break;
+ }
+ }
+ }
+ Assert.assertEquals(2, found);
+ out = results[2];
+ Assert.assertArrayEquals(Longs.toByteArray(2), out);
+ testUtility.shutdownMiniCluster();
+ sink.process();
+ sink.stop();
+ }
+
+ /**
+ * Makes Hbase scans to get rows in the payload column and increment column
+ * in the table given. Expensive, so tread lightly.
+ * Calling this function multiple times for the same result set is a bad
+ * idea. Cache the result set once it is returned by this function.
+ *
+ * @param table
+ * @param numEvents Number of events inserted into the table
+ * @return
+ * @throws IOException
+ */
+ private byte[][] getResults(HTable table, int numEvents) throws IOException {
+ byte[][] results = new byte[numEvents + 1][];
+ Scan scan = new Scan();
+ scan.addColumn(columnFamily.getBytes(), plCol.getBytes());
+ scan.setStartRow(Bytes.toBytes("default"));
+ ResultScanner rs = table.getScanner(scan);
+ byte[] out = null;
+ int i = 0;
+ try {
+ for (Result r = rs.next(); r != null; r = rs.next()) {
+ out = r.getValue(columnFamily.getBytes(), plCol.getBytes());
+
+ if (i >= results.length - 1) {
+ rs.close();
+ throw new FlumeException("More results than expected in the table." +
+ "Expected = " + numEvents + ". Found = " + i);
+ }
+ results[i++] = out;
+ System.out.println(out);
+ }
+ } finally {
+ rs.close();
+ }
+
+ Assert.assertEquals(i, results.length - 1);
+ scan = new Scan();
+ scan.addColumn(columnFamily.getBytes(), inColumn.getBytes());
+ scan.setStartRow(Bytes.toBytes("incRow"));
+ rs = table.getScanner(scan);
+ out = null;
+ try {
+ for (Result r = rs.next(); r != null; r = rs.next()) {
+ out = r.getValue(columnFamily.getBytes(), inColumn.getBytes());
+ results[i++] = out;
+ System.out.println(out);
+ }
+ } finally {
+ rs.close();
+ }
+ return results;
+ }
+}
+
diff --git a/code/flume-ng-sinks/flume-ng-hbase-sink/src/test/java/org/apache/flume/sink/hbase/TestHBaseSink.java b/code/flume-ng-sinks/flume-ng-hbase-sink/src/test/java/org/apache/flume/sink/hbase/TestHBaseSink.java
new file mode 100644
index 0000000..217913b
--- /dev/null
+++ b/code/flume-ng-sinks/flume-ng-hbase-sink/src/test/java/org/apache/flume/sink/hbase/TestHBaseSink.java
@@ -0,0 +1,744 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.flume.sink.hbase;
+
+import com.google.common.base.Charsets;
+import com.google.common.base.Throwables;
+import com.google.common.collect.Lists;
+import com.google.common.collect.Maps;
+import com.google.common.primitives.Longs;
+import org.apache.flume.Channel;
+import org.apache.flume.ChannelException;
+import org.apache.flume.Context;
+import org.apache.flume.Event;
+import org.apache.flume.EventDeliveryException;
+import org.apache.flume.FlumeException;
+import org.apache.flume.Sink.Status;
+import org.apache.flume.Transaction;
+import org.apache.flume.channel.MemoryChannel;
+import org.apache.flume.conf.Configurables;
+import org.apache.flume.event.EventBuilder;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hbase.HBaseTestingUtility;
+import org.apache.hadoop.hbase.HConstants;
+import org.apache.hadoop.hbase.client.HTable;
+import org.apache.hadoop.hbase.client.Increment;
+import org.apache.hadoop.hbase.client.Result;
+import org.apache.hadoop.hbase.client.ResultScanner;
+import org.apache.hadoop.hbase.client.Scan;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.hbase.zookeeper.ZKConfig;
+import org.junit.After;
+import org.junit.AfterClass;
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.BeforeClass;
+import org.junit.Ignore;
+import org.junit.Test;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.IOException;
+import java.lang.reflect.Method;
+import java.util.Arrays;
+import java.util.List;
+import java.util.Map;
+import java.util.NavigableMap;
+
+import static org.mockito.Mockito.doReturn;
+import static org.mockito.Mockito.doThrow;
+import static org.mockito.Mockito.spy;
+
+public class TestHBaseSink {
+ private static final Logger logger =
+ LoggerFactory.getLogger(TestHBaseSink.class);
+
+ private static final HBaseTestingUtility testUtility = new HBaseTestingUtility();
+ private static final String tableName = "TestHbaseSink";
+ private static final String columnFamily = "TestColumnFamily";
+ private static final String inColumn = "iCol";
+ private static final String plCol = "pCol";
+ private static final String valBase = "testing hbase sink: jham";
+
+ private Configuration conf;
+ private Context ctx;
+
+ @BeforeClass
+ public static void setUpOnce() throws Exception {
+ testUtility.startMiniCluster();
+ }
+
+ @AfterClass
+ public static void tearDownOnce() throws Exception {
+ testUtility.shutdownMiniCluster();
+ }
+
+ /**
+ * Most common context setup for unit tests using
+ * {@link SimpleHbaseEventSerializer}.
+ */
+ @Before
+ public void setUp() throws IOException {
+ conf = new Configuration(testUtility.getConfiguration());
+ ctx = new Context();
+ testUtility.createTable(tableName.getBytes(), columnFamily.getBytes());
+ }
+
+ @After
+ public void tearDown() throws IOException {
+ testUtility.deleteTable(tableName.getBytes());
+ }
+
+ /**
+ * Set up {@link Context} for use with {@link SimpleHbaseEventSerializer}.
+ */
+ private void initContextForSimpleHbaseEventSerializer() {
+ ctx = new Context();
+ ctx.put("table", tableName);
+ ctx.put("columnFamily", columnFamily);
+ ctx.put("serializer", SimpleHbaseEventSerializer.class.getName());
+ ctx.put("serializer.payloadColumn", plCol);
+ ctx.put("serializer.incrementColumn", inColumn);
+ }
+
+ /**
+ * Set up {@link Context} for use with {@link IncrementHBaseSerializer}.
+ */
+ private void initContextForIncrementHBaseSerializer() {
+ ctx = new Context();
+ ctx.put("table", tableName);
+ ctx.put("columnFamily", columnFamily);
+ ctx.put("serializer", IncrementHBaseSerializer.class.getName());
+ }
+
+ @Test
+ public void testOneEventWithDefaults() throws Exception {
+ //Create a context without setting increment column and payload Column
+ ctx = new Context();
+ ctx.put("table", tableName);
+ ctx.put("columnFamily", columnFamily);
+ ctx.put("serializer", SimpleHbaseEventSerializer.class.getName());
+
+ HBaseSink sink = new HBaseSink(conf);
+ Configurables.configure(sink, ctx);
+ Channel channel = new MemoryChannel();
+ Configurables.configure(channel, new Context());
+ sink.setChannel(channel);
+ sink.start();
+ Transaction tx = channel.getTransaction();
+ tx.begin();
+ Event e = EventBuilder.withBody(Bytes.toBytes(valBase));
+ channel.put(e);
+ tx.commit();
+ tx.close();
+
+ sink.process();
+ sink.stop();
+ HTable table = new HTable(conf, tableName);
+ byte[][] results = getResults(table, 1);
+ byte[] out = results[0];
+ Assert.assertArrayEquals(e.getBody(), out);
+ out = results[1];
+ Assert.assertArrayEquals(Longs.toByteArray(1), out);
+ }
+
+ @Test
+ public void testOneEvent() throws Exception {
+ initContextForSimpleHbaseEventSerializer();
+ HBaseSink sink = new HBaseSink(conf);
+ Configurables.configure(sink, ctx);
+ Channel channel = new MemoryChannel();
+ Configurables.configure(channel, new Context());
+ sink.setChannel(channel);
+ sink.start();
+ Transaction tx = channel.getTransaction();
+ tx.begin();
+ Event e = EventBuilder.withBody(
+ Bytes.toBytes(valBase));
+ channel.put(e);
+ tx.commit();
+ tx.close();
+
+ sink.process();
+ sink.stop();
+ HTable table = new HTable(conf, tableName);
+ byte[][] results = getResults(table, 1);
+ byte[] out = results[0];
+ Assert.assertArrayEquals(e.getBody(), out);
+ out = results[1];
+ Assert.assertArrayEquals(Longs.toByteArray(1), out);
+ }
+
+ @Test
+ public void testThreeEvents() throws Exception {
+ initContextForSimpleHbaseEventSerializer();
+ ctx.put("batchSize", "3");
+ HBaseSink sink = new HBaseSink(conf);
+ Configurables.configure(sink, ctx);
+ Channel channel = new MemoryChannel();
+ Configurables.configure(channel, new Context());
+ sink.setChannel(channel);
+ sink.start();
+ Transaction tx = channel.getTransaction();
+ tx.begin();
+ for (int i = 0; i < 3; i++) {
+ Event e = EventBuilder.withBody(Bytes.toBytes(valBase + "-" + i));
+ channel.put(e);
+ }
+ tx.commit();
+ tx.close();
+ sink.process();
+ sink.stop();
+ HTable table = new HTable(conf, tableName);
+ byte[][] results = getResults(table, 3);
+ byte[] out;
+ int found = 0;
+ for (int i = 0; i < 3; i++) {
+ for (int j = 0; j < 3; j++) {
+ if (Arrays.equals(results[j], Bytes.toBytes(valBase + "-" + i))) {
+ found++;
+ break;
+ }
+ }
+ }
+ Assert.assertEquals(3, found);
+ out = results[3];
+ Assert.assertArrayEquals(Longs.toByteArray(3), out);
+ }
+
+ @Test
+ public void testMultipleBatches() throws Exception {
+ initContextForSimpleHbaseEventSerializer();
+ ctx.put("batchSize", "2");
+ HBaseSink sink = new HBaseSink(conf);
+ Configurables.configure(sink, ctx);
+ //Reset the context to a higher batchSize
+ ctx.put("batchSize", "100");
+ Channel channel = new MemoryChannel();
+ Configurables.configure(channel, new Context());
+ sink.setChannel(channel);
+ sink.start();
+ Transaction tx = channel.getTransaction();
+ tx.begin();
+ for (int i = 0; i < 3; i++) {
+ Event e = EventBuilder.withBody(Bytes.toBytes(valBase + "-" + i));
+ channel.put(e);
+ }
+ tx.commit();
+ tx.close();
+ int count = 0;
+ while (sink.process() != Status.BACKOFF) {
+ count++;
+ }
+ sink.stop();
+ Assert.assertEquals(2, count);
+ HTable table = new HTable(conf, tableName);
+ byte[][] results = getResults(table, 3);
+ byte[] out;
+ int found = 0;
+ for (int i = 0; i < 3; i++) {
+ for (int j = 0; j < 3; j++) {
+ if (Arrays.equals(results[j], Bytes.toBytes(valBase + "-" + i))) {
+ found++;
+ break;
+ }
+ }
+ }
+ Assert.assertEquals(3, found);
+ out = results[3];
+ Assert.assertArrayEquals(Longs.toByteArray(3), out);
+ }
+
+ @Test(expected = FlumeException.class)
+ public void testMissingTable() throws Exception {
+ logger.info("Running testMissingTable()");
+ initContextForSimpleHbaseEventSerializer();
+
+ // setUp() will create the table, so we delete it.
+ logger.info("Deleting table {}", tableName);
+ testUtility.deleteTable(tableName.getBytes());
+
+ ctx.put("batchSize", "2");
+ HBaseSink sink = new HBaseSink(conf);
+ Configurables.configure(sink, ctx);
+ //Reset the context to a higher batchSize
+ ctx.put("batchSize", "100");
+ Channel channel = new MemoryChannel();
+ Configurables.configure(channel, new Context());
+ sink.setChannel(channel);
+
+ logger.info("Writing data into channel");
+ Transaction tx = channel.getTransaction();
+ tx.begin();
+ for (int i = 0; i < 3; i++) {
+ Event e = EventBuilder.withBody(Bytes.toBytes(valBase + "-" + i));
+ channel.put(e);
+ }
+ tx.commit();
+ tx.close();
+
+ logger.info("Starting sink and processing events");
+ try {
+ logger.info("Calling sink.start()");
+ sink.start(); // This method will throw.
+
+ // We never get here, but we log in case the behavior changes.
+ logger.error("Unexpected error: Calling sink.process()");
+ sink.process();
+ logger.error("Unexpected error: Calling sink.stop()");
+ sink.stop();
+ } finally {
+ // Re-create the table so tearDown() doesn't throw.
+ testUtility.createTable(tableName.getBytes(), columnFamily.getBytes());
+ }
+
+ // FIXME: The test should never get here, the below code doesn't run.
+ Assert.fail();
+
+ HTable table = new HTable(conf, tableName);
+ byte[][] results = getResults(table, 2);
+ byte[] out;
+ int found = 0;
+ for (int i = 0; i < 2; i++) {
+ for (int j = 0; j < 2; j++) {
+ if (Arrays.equals(results[j], Bytes.toBytes(valBase + "-" + i))) {
+ found++;
+ break;
+ }
+ }
+ }
+ Assert.assertEquals(2, found);
+ out = results[2];
+ Assert.assertArrayEquals(Longs.toByteArray(2), out);
+ sink.process();
+ }
+
+ // TODO: Move this test to a different class and run it stand-alone.
+
+ /**
+ * This test must run last - it shuts down the minicluster :D
+ *
+ * @throws Exception
+ */
+ @Ignore("For dev builds only:" +
+ "This test takes too long, and this has to be run after all other" +
+ "tests, since it shuts down the minicluster. " +
+ "Comment out all other tests" +
+ "and uncomment this annotation to run this test.")
+ @Test(expected = EventDeliveryException.class)
+ public void testHBaseFailure() throws Exception {
+ initContextForSimpleHbaseEventSerializer();
+ ctx.put("batchSize", "2");
+ HBaseSink sink = new HBaseSink(conf);
+ Configurables.configure(sink, ctx);
+ //Reset the context to a higher batchSize
+ ctx.put("batchSize", "100");
+ Channel channel = new MemoryChannel();
+ Configurables.configure(channel, new Context());
+ sink.setChannel(channel);
+ sink.start();
+ Transaction tx = channel.getTransaction();
+ tx.begin();
+ for (int i = 0; i < 3; i++) {
+ Event e = EventBuilder.withBody(Bytes.toBytes(valBase + "-" + i));
+ channel.put(e);
+ }
+ tx.commit();
+ tx.close();
+ sink.process();
+ HTable table = new HTable(conf, tableName);
+ byte[][] results = getResults(table, 2);
+ byte[] out;
+ int found = 0;
+ for (int i = 0; i < 2; i++) {
+ for (int j = 0; j < 2; j++) {
+ if (Arrays.equals(results[j], Bytes.toBytes(valBase + "-" + i))) {
+ found++;
+ break;
+ }
+ }
+ }
+ Assert.assertEquals(2, found);
+ out = results[2];
+ Assert.assertArrayEquals(Longs.toByteArray(2), out);
+ testUtility.shutdownMiniCluster();
+ sink.process();
+ sink.stop();
+ }
+
+ /**
+ * Makes Hbase scans to get rows in the payload column and increment column
+ * in the table given. Expensive, so tread lightly.
+ * Calling this function multiple times for the same result set is a bad
+ * idea. Cache the result set once it is returned by this function.
+ *
+ * @param table
+ * @param numEvents Number of events inserted into the table
+ * @return
+ * @throws IOException
+ */
+ private byte[][] getResults(HTable table, int numEvents) throws IOException {
+ byte[][] results = new byte[numEvents + 1][];
+ Scan scan = new Scan();
+ scan.addColumn(columnFamily.getBytes(), plCol.getBytes());
+ scan.setStartRow(Bytes.toBytes("default"));
+ ResultScanner rs = table.getScanner(scan);
+ byte[] out = null;
+ int i = 0;
+ try {
+ for (Result r = rs.next(); r != null; r = rs.next()) {
+ out = r.getValue(columnFamily.getBytes(), plCol.getBytes());
+
+ if (i >= results.length - 1) {
+ rs.close();
+ throw new FlumeException("More results than expected in the table." +
+ "Expected = " + numEvents + ". Found = " + i);
+ }
+ results[i++] = out;
+ System.out.println(out);
+ }
+ } finally {
+ rs.close();
+ }
+
+ Assert.assertEquals(i, results.length - 1);
+ scan = new Scan();
+ scan.addColumn(columnFamily.getBytes(), inColumn.getBytes());
+ scan.setStartRow(Bytes.toBytes("incRow"));
+ rs = table.getScanner(scan);
+ out = null;
+ try {
+ for (Result r = rs.next(); r != null; r = rs.next()) {
+ out = r.getValue(columnFamily.getBytes(), inColumn.getBytes());
+ results[i++] = out;
+ System.out.println(out);
+ }
+ } finally {
+ rs.close();
+ }
+ return results;
+ }
+
+ @Test
+ public void testTransactionStateOnChannelException() throws Exception {
+ initContextForSimpleHbaseEventSerializer();
+ ctx.put("batchSize", "1");
+
+ HBaseSink sink = new HBaseSink(conf);
+ Configurables.configure(sink, ctx);
+ // Reset the context to a higher batchSize
+ Channel channel = spy(new MemoryChannel());
+ Configurables.configure(channel, new Context());
+ sink.setChannel(channel);
+ sink.start();
+ Transaction tx = channel.getTransaction();
+ tx.begin();
+ Event e = EventBuilder.withBody(Bytes.toBytes(valBase + "-" + 0));
+ channel.put(e);
+ tx.commit();
+ tx.close();
+ doThrow(new ChannelException("Mock Exception")).when(channel).take();
+ try {
+ sink.process();
+ Assert.fail("take() method should throw exception");
+ } catch (ChannelException ex) {
+ Assert.assertEquals("Mock Exception", ex.getMessage());
+ }
+ doReturn(e).when(channel).take();
+ sink.process();
+ sink.stop();
+ HTable table = new HTable(conf, tableName);
+ byte[][] results = getResults(table, 1);
+ byte[] out = results[0];
+ Assert.assertArrayEquals(e.getBody(), out);
+ out = results[1];
+ Assert.assertArrayEquals(Longs.toByteArray(1), out);
+ }
+
+ @Test
+ public void testTransactionStateOnSerializationException() throws Exception {
+ initContextForSimpleHbaseEventSerializer();
+ ctx.put("batchSize", "1");
+ ctx.put(HBaseSinkConfigurationConstants.CONFIG_SERIALIZER,
+ "org.apache.flume.sink.hbase.MockSimpleHbaseEventSerializer");
+
+ HBaseSink sink = new HBaseSink(conf);
+ Configurables.configure(sink, ctx);
+ // Reset the context to a higher batchSize
+ ctx.put("batchSize", "100");
+ Channel channel = new MemoryChannel();
+ Configurables.configure(channel, new Context());
+ sink.setChannel(channel);
+ sink.start();
+ Transaction tx = channel.getTransaction();
+ tx.begin();
+ Event e = EventBuilder.withBody(Bytes.toBytes(valBase + "-" + 0));
+ channel.put(e);
+ tx.commit();
+ tx.close();
+ try {
+ MockSimpleHbaseEventSerializer.throwException = true;
+ sink.process();
+ Assert.fail("FlumeException expected from serilazer");
+ } catch (FlumeException ex) {
+ Assert.assertEquals("Exception for testing", ex.getMessage());
+ }
+ MockSimpleHbaseEventSerializer.throwException = false;
+ sink.process();
+ sink.stop();
+ HTable table = new HTable(conf, tableName);
+ byte[][] results = getResults(table, 1);
+ byte[] out = results[0];
+ Assert.assertArrayEquals(e.getBody(), out);
+ out = results[1];
+ Assert.assertArrayEquals(Longs.toByteArray(1), out);
+ }
+
+ @Test
+ public void testWithoutConfigurationObject() throws Exception {
+ initContextForSimpleHbaseEventSerializer();
+ Context tmpContext = new Context(ctx.getParameters());
+ tmpContext.put("batchSize", "2");
+ tmpContext.put(HBaseSinkConfigurationConstants.ZK_QUORUM,
+ ZKConfig.getZKQuorumServersString(conf));
+ System.out.print(ctx.getString(HBaseSinkConfigurationConstants.ZK_QUORUM));
+ tmpContext.put(HBaseSinkConfigurationConstants.ZK_ZNODE_PARENT,
+ conf.get(HConstants.ZOOKEEPER_ZNODE_PARENT,
+ HConstants.DEFAULT_ZOOKEEPER_ZNODE_PARENT));
+
+ HBaseSink sink = new HBaseSink();
+ Configurables.configure(sink, tmpContext);
+ Channel channel = new MemoryChannel();
+ Configurables.configure(channel, ctx);
+ sink.setChannel(channel);
+ sink.start();
+ Transaction tx = channel.getTransaction();
+ tx.begin();
+ for (int i = 0; i < 3; i++) {
+ Event e = EventBuilder.withBody(Bytes.toBytes(valBase + "-" + i));
+ channel.put(e);
+ }
+ tx.commit();
+ tx.close();
+ Status status = Status.READY;
+ while (status != Status.BACKOFF) {
+ status = sink.process();
+ }
+ sink.stop();
+ HTable table = new HTable(conf, tableName);
+ byte[][] results = getResults(table, 3);
+ byte[] out;
+ int found = 0;
+ for (int i = 0; i < 3; i++) {
+ for (int j = 0; j < 3; j++) {
+ if (Arrays.equals(results[j], Bytes.toBytes(valBase + "-" + i))) {
+ found++;
+ break;
+ }
+ }
+ }
+ Assert.assertEquals(3, found);
+ out = results[3];
+ Assert.assertArrayEquals(Longs.toByteArray(3), out);
+ }
+
+ @Test
+ public void testZKQuorum() throws Exception {
+ initContextForSimpleHbaseEventSerializer();
+ Context tmpContext = new Context(ctx.getParameters());
+ String zkQuorum = "zk1.flume.apache.org:3342, zk2.flume.apache.org:3342, " +
+ "zk3.flume.apache.org:3342";
+ tmpContext.put("batchSize", "2");
+ tmpContext.put(HBaseSinkConfigurationConstants.ZK_QUORUM, zkQuorum);
+ tmpContext.put(HBaseSinkConfigurationConstants.ZK_ZNODE_PARENT,
+ conf.get(HConstants.ZOOKEEPER_ZNODE_PARENT,
+ HConstants.DEFAULT_ZOOKEEPER_ZNODE_PARENT));
+ HBaseSink sink = new HBaseSink();
+ Configurables.configure(sink, tmpContext);
+ Assert.assertEquals("zk1.flume.apache.org,zk2.flume.apache.org," +
+ "zk3.flume.apache.org", sink.getConfig().get(HConstants.ZOOKEEPER_QUORUM));
+ Assert.assertEquals(String.valueOf(3342),
+ sink.getConfig().get(HConstants.ZOOKEEPER_CLIENT_PORT));
+ }
+
+ @Test(expected = FlumeException.class)
+ public void testZKQuorumIncorrectPorts() throws Exception {
+ initContextForSimpleHbaseEventSerializer();
+ Context tmpContext = new Context(ctx.getParameters());
+
+ String zkQuorum = "zk1.flume.apache.org:3345, zk2.flume.apache.org:3342, " +
+ "zk3.flume.apache.org:3342";
+ tmpContext.put("batchSize", "2");
+ tmpContext.put(HBaseSinkConfigurationConstants.ZK_QUORUM, zkQuorum);
+ tmpContext.put(HBaseSinkConfigurationConstants.ZK_ZNODE_PARENT,
+ conf.get(HConstants.ZOOKEEPER_ZNODE_PARENT,
+ HConstants.DEFAULT_ZOOKEEPER_ZNODE_PARENT));
+ HBaseSink sink = new HBaseSink();
+ Configurables.configure(sink, tmpContext);
+ Assert.fail();
+ }
+
+ @Test
+ public void testCoalesce() throws EventDeliveryException {
+ initContextForIncrementHBaseSerializer();
+ ctx.put("batchSize", "100");
+ ctx.put(HBaseSinkConfigurationConstants.CONFIG_COALESCE_INCREMENTS,
+ String.valueOf(true));
+
+ final Map expectedCounts = Maps.newHashMap();
+ expectedCounts.put("r1:c1", 10L);
+ expectedCounts.put("r1:c2", 20L);
+ expectedCounts.put("r2:c1", 7L);
+ expectedCounts.put("r2:c3", 63L);
+ HBaseSink.DebugIncrementsCallback cb = new CoalesceValidator(expectedCounts);
+
+ HBaseSink sink = new HBaseSink(testUtility.getConfiguration(), cb);
+ Configurables.configure(sink, ctx);
+ Channel channel = createAndConfigureMemoryChannel(sink);
+
+ List events = Lists.newLinkedList();
+ generateEvents(events, expectedCounts);
+ putEvents(channel, events);
+
+ sink.start();
+ sink.process(); // Calls CoalesceValidator instance.
+ sink.stop();
+ }
+
+ @Test(expected = AssertionError.class)
+ public void negativeTestCoalesce() throws EventDeliveryException {
+ initContextForIncrementHBaseSerializer();
+ ctx.put("batchSize", "10");
+
+ final Map expectedCounts = Maps.newHashMap();
+ expectedCounts.put("r1:c1", 10L);
+ HBaseSink.DebugIncrementsCallback cb = new CoalesceValidator(expectedCounts);
+
+ HBaseSink sink = new HBaseSink(testUtility.getConfiguration(), cb);
+ Configurables.configure(sink, ctx);
+ Channel channel = createAndConfigureMemoryChannel(sink);
+
+ List events = Lists.newLinkedList();
+ generateEvents(events, expectedCounts);
+ putEvents(channel, events);
+
+ sink.start();
+ sink.process(); // Calls CoalesceValidator instance.
+ sink.stop();
+ }
+
+ @Test
+ public void testBatchAware() throws EventDeliveryException {
+ logger.info("Running testBatchAware()");
+ initContextForIncrementHBaseSerializer();
+ HBaseSink sink = new HBaseSink(testUtility.getConfiguration());
+ Configurables.configure(sink, ctx);
+ Channel channel = createAndConfigureMemoryChannel(sink);
+
+ sink.start();
+ int batchCount = 3;
+ for (int i = 0; i < batchCount; i++) {
+ sink.process();
+ }
+ sink.stop();
+ Assert.assertEquals(batchCount,
+ ((IncrementHBaseSerializer) sink.getSerializer()).getNumBatchesStarted());
+ }
+
+ /**
+ * For testing that the rows coalesced, serialized by
+ * {@link IncrementHBaseSerializer}, are of the expected batch size.
+ */
+ private static class CoalesceValidator
+ implements HBaseSink.DebugIncrementsCallback {
+
+ private final Map expectedCounts;
+ private final Method refGetFamilyMap;
+
+ public CoalesceValidator(Map expectedCounts) {
+ this.expectedCounts = expectedCounts;
+ this.refGetFamilyMap = HBaseSink.reflectLookupGetFamilyMap();
+ }
+
+ @Override
+ @SuppressWarnings("unchecked")
+ public void onAfterCoalesce(Iterable increments) {
+ for (Increment inc : increments) {
+ byte[] row = inc.getRow();
+ Map> families = null;
+ try {
+ families = (Map>)
+ refGetFamilyMap.invoke(inc);
+ } catch (Exception e) {
+ Throwables.propagate(e);
+ }
+ for (byte[] family : families.keySet()) {
+ NavigableMap qualifiers = families.get(family);
+ for (Map.Entry entry : qualifiers.entrySet()) {
+ byte[] qualifier = entry.getKey();
+ Long count = entry.getValue();
+ StringBuilder b = new StringBuilder(20);
+ b.append(new String(row, Charsets.UTF_8));
+ b.append(':');
+ b.append(new String(qualifier, Charsets.UTF_8));
+ String key = b.toString();
+ Assert.assertEquals("Expected counts don't match observed for " + key,
+ expectedCounts.get(key), count);
+ }
+ }
+ }
+ }
+ }
+
+ /**
+ * Add number of Events corresponding to counts to the events list.
+ * @param events Destination list.
+ * @param counts How many events to generate for each row:qualifier pair.
+ */
+ private void generateEvents(List events, Map counts) {
+ for (String key : counts.keySet()) {
+ long count = counts.get(key);
+ for (long i = 0; i < count; i++) {
+ events.add(EventBuilder.withBody(key, Charsets.UTF_8));
+ }
+ }
+ }
+
+ private Channel createAndConfigureMemoryChannel(HBaseSink sink) {
+ Channel channel = new MemoryChannel();
+ Context channelCtx = new Context();
+ channelCtx.put("capacity", String.valueOf(1000L));
+ channelCtx.put("transactionCapacity", String.valueOf(1000L));
+ Configurables.configure(channel, channelCtx);
+ sink.setChannel(channel);
+ channel.start();
+ return channel;
+ }
+
+ private void putEvents(Channel channel, Iterable events) {
+ Transaction tx = channel.getTransaction();
+ tx.begin();
+ for (Event event : events) {
+ channel.put(event);
+ }
+ tx.commit();
+ tx.close();
+ }
+
+}
diff --git a/code/flume-ng-sinks/flume-ng-hbase-sink/src/test/java/org/apache/flume/sink/hbase/TestHBaseSinkCreation.java b/code/flume-ng-sinks/flume-ng-hbase-sink/src/test/java/org/apache/flume/sink/hbase/TestHBaseSinkCreation.java
new file mode 100644
index 0000000..115bc62
--- /dev/null
+++ b/code/flume-ng-sinks/flume-ng-hbase-sink/src/test/java/org/apache/flume/sink/hbase/TestHBaseSinkCreation.java
@@ -0,0 +1,50 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.flume.sink.hbase;
+
+import org.apache.flume.FlumeException;
+import org.apache.flume.Sink;
+import org.apache.flume.SinkFactory;
+import org.apache.flume.sink.DefaultSinkFactory;
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.Test;
+
+public class TestHBaseSinkCreation {
+
+ private SinkFactory sinkFactory;
+
+ @Before
+ public void setUp() {
+ sinkFactory = new DefaultSinkFactory();
+ }
+
+ private void verifySinkCreation(String name, String type,
+ Class> typeClass) throws FlumeException {
+ Sink sink = sinkFactory.create(name, type);
+ Assert.assertNotNull(sink);
+ Assert.assertTrue(typeClass.isInstance(sink));
+ }
+
+ @Test
+ public void testSinkCreation() {
+ verifySinkCreation("hbase-sink", "hbase", HBaseSink.class);
+ verifySinkCreation("asynchbase-sink", "asynchbase", AsyncHBaseSink.class);
+ }
+}
diff --git a/code/flume-ng-sinks/flume-ng-hbase-sink/src/test/java/org/apache/flume/sink/hbase/TestRegexHbaseEventSerializer.java b/code/flume-ng-sinks/flume-ng-hbase-sink/src/test/java/org/apache/flume/sink/hbase/TestRegexHbaseEventSerializer.java
new file mode 100644
index 0000000..24bcf37
--- /dev/null
+++ b/code/flume-ng-sinks/flume-ng-hbase-sink/src/test/java/org/apache/flume/sink/hbase/TestRegexHbaseEventSerializer.java
@@ -0,0 +1,232 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.flume.sink.hbase;
+
+import com.google.common.collect.Maps;
+import org.apache.flume.Context;
+import org.apache.flume.Event;
+import org.apache.flume.event.EventBuilder;
+import org.apache.hadoop.hbase.KeyValue;
+import org.apache.hadoop.hbase.client.Increment;
+import org.apache.hadoop.hbase.client.Put;
+import org.apache.hadoop.hbase.client.Row;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.junit.Test;
+
+import java.nio.charset.Charset;
+import java.util.Calendar;
+import java.util.List;
+import java.util.Map;
+
+import static org.junit.Assert.assertArrayEquals;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.when;
+
+public class TestRegexHbaseEventSerializer {
+
+ @Test
+ /** Ensure that when no config is specified, the a catch-all regex is used
+ * with default column name. */
+ public void testDefaultBehavior() throws Exception {
+ RegexHbaseEventSerializer s = new RegexHbaseEventSerializer();
+ Context context = new Context();
+ s.configure(context);
+ String logMsg = "The sky is falling!";
+ Event e = EventBuilder.withBody(Bytes.toBytes(logMsg));
+ s.initialize(e, "CF".getBytes());
+ List actions = s.getActions();
+ assertTrue(actions.size() == 1);
+ assertTrue(actions.get(0) instanceof Put);
+ Put put = (Put) actions.get(0);
+
+ assertTrue(put.getFamilyMap().containsKey(s.cf));
+ List kvPairs = put.getFamilyMap().get(s.cf);
+ assertTrue(kvPairs.size() == 1);
+
+ Map resultMap = Maps.newHashMap();
+ for (KeyValue kv : kvPairs) {
+ resultMap.put(new String(kv.getQualifier()), new String(kv.getValue()));
+ }
+
+ assertTrue(resultMap.containsKey(
+ RegexHbaseEventSerializer.COLUMN_NAME_DEFAULT));
+ assertEquals("The sky is falling!",
+ resultMap.get(RegexHbaseEventSerializer.COLUMN_NAME_DEFAULT));
+ }
+ @Test
+ public void testRowIndexKey() throws Exception {
+ RegexHbaseEventSerializer s = new RegexHbaseEventSerializer();
+ Context context = new Context();
+ context.put(RegexHbaseEventSerializer.REGEX_CONFIG,"^([^\t]+)\t([^\t]+)\t" + "([^\t]+)$");
+ context.put(RegexHbaseEventSerializer.COL_NAME_CONFIG, "col1,col2,ROW_KEY");
+ context.put("rowKeyIndex", "2");
+ s.configure(context);
+
+ String body = "val1\tval2\trow1";
+ Event e = EventBuilder.withBody(Bytes.toBytes(body));
+ s.initialize(e, "CF".getBytes());
+ List actions = s.getActions();
+
+ Put put = (Put)actions.get(0);
+
+ List kvPairs = put.getFamilyMap().get(s.cf);
+ assertTrue(kvPairs.size() == 2);
+
+ Map resultMap = Maps.newHashMap();
+ for (KeyValue kv : kvPairs) {
+ resultMap.put(new String(kv.getQualifier()), new String(kv.getValue()));
+ }
+ assertEquals("val1", resultMap.get("col1"));
+ assertEquals("val2", resultMap.get("col2"));
+ assertEquals("row1", Bytes.toString(put.getRow()));
+ }
+
+ @Test
+ /** Test a common case where regex is used to parse apache log format. */
+ public void testApacheRegex() throws Exception {
+ RegexHbaseEventSerializer s = new RegexHbaseEventSerializer();
+ Context context = new Context();
+ context.put(RegexHbaseEventSerializer.REGEX_CONFIG,
+ "([^ ]*) ([^ ]*) ([^ ]*) (-|\\[[^\\]]*\\]) \"([^ ]+) ([^ ]+)" +
+ " ([^\"]+)\" (-|[0-9]*) (-|[0-9]*)(?: ([^ \"]*|\"[^\"]*\")" +
+ " ([^ \"]*|\"[^\"]*\"))?");
+ context.put(RegexHbaseEventSerializer.COL_NAME_CONFIG,
+ "host,identity,user,time,method,request,protocol,status,size," +
+ "referer,agent");
+ s.configure(context);
+ String logMsg = "33.22.11.00 - - [20/May/2011:07:01:19 +0000] " +
+ "\"GET /wp-admin/css/install.css HTTP/1.0\" 200 813 " +
+ "\"http://www.cloudera.com/wp-admin/install.php\" \"Mozilla/5.0 (comp" +
+ "atible; Yahoo! Slurp; http://help.yahoo.com/help/us/ysearch/slurp)\"";
+
+ Event e = EventBuilder.withBody(Bytes.toBytes(logMsg));
+ s.initialize(e, "CF".getBytes());
+ List actions = s.getActions();
+ assertEquals(1, s.getActions().size());
+ assertTrue(actions.get(0) instanceof Put);
+
+ Put put = (Put) actions.get(0);
+ assertTrue(put.getFamilyMap().containsKey(s.cf));
+ List kvPairs = put.getFamilyMap().get(s.cf);
+ assertTrue(kvPairs.size() == 11);
+
+ Map resultMap = Maps.newHashMap();
+ for (KeyValue kv : kvPairs) {
+ resultMap.put(new String(kv.getQualifier()), new String(kv.getValue()));
+ }
+
+ assertEquals("33.22.11.00", resultMap.get("host"));
+ assertEquals("-", resultMap.get("identity"));
+ assertEquals("-", resultMap.get("user"));
+ assertEquals("[20/May/2011:07:01:19 +0000]", resultMap.get("time"));
+ assertEquals("GET", resultMap.get("method"));
+ assertEquals("/wp-admin/css/install.css", resultMap.get("request"));
+ assertEquals("HTTP/1.0", resultMap.get("protocol"));
+ assertEquals("200", resultMap.get("status"));
+ assertEquals("813", resultMap.get("size"));
+ assertEquals("\"http://www.cloudera.com/wp-admin/install.php\"",
+ resultMap.get("referer"));
+ assertEquals("\"Mozilla/5.0 (compatible; Yahoo! Slurp; " +
+ "http://help.yahoo.com/help/us/ysearch/slurp)\"",
+ resultMap.get("agent"));
+
+ List increments = s.getIncrements();
+ assertEquals(0, increments.size());
+ }
+
+ @Test
+ public void testRowKeyGeneration() {
+ Context context = new Context();
+ RegexHbaseEventSerializer s1 = new RegexHbaseEventSerializer();
+ s1.configure(context);
+ RegexHbaseEventSerializer s2 = new RegexHbaseEventSerializer();
+ s2.configure(context);
+
+ // Reset shared nonce to zero
+ RegexHbaseEventSerializer.nonce.set(0);
+ String randomString = RegexHbaseEventSerializer.randomKey;
+
+ Event e1 = EventBuilder.withBody(Bytes.toBytes("body"));
+ Event e2 = EventBuilder.withBody(Bytes.toBytes("body"));
+ Event e3 = EventBuilder.withBody(Bytes.toBytes("body"));
+
+ Calendar cal = mock(Calendar.class);
+ when(cal.getTimeInMillis()).thenReturn(1L);
+
+ s1.initialize(e1, "CF".getBytes());
+ String rk1 = new String(s1.getRowKey(cal));
+ assertEquals("1-" + randomString + "-0", rk1);
+
+ when(cal.getTimeInMillis()).thenReturn(10L);
+ s1.initialize(e2, "CF".getBytes());
+ String rk2 = new String(s1.getRowKey(cal));
+ assertEquals("10-" + randomString + "-1", rk2);
+
+ when(cal.getTimeInMillis()).thenReturn(100L);
+ s2.initialize(e3, "CF".getBytes());
+ String rk3 = new String(s2.getRowKey(cal));
+ assertEquals("100-" + randomString + "-2", rk3);
+
+ }
+
+ @Test
+ /** Test depositing of the header information. */
+ public void testDepositHeaders() throws Exception {
+ Charset charset = Charset.forName("KOI8-R");
+ RegexHbaseEventSerializer s = new RegexHbaseEventSerializer();
+ Context context = new Context();
+ context.put(RegexHbaseEventSerializer.DEPOSIT_HEADERS_CONFIG,
+ "true");
+ context.put(RegexHbaseEventSerializer.CHARSET_CONFIG,
+ charset.toString());
+ s.configure(context);
+
+ String body = "body";
+ Map headers = Maps.newHashMap();
+ headers.put("header1", "value1");
+ headers.put("заголовок2", "значение2");
+
+ Event e = EventBuilder.withBody(Bytes.toBytes(body), headers);
+ s.initialize(e, "CF".getBytes());
+ List actions = s.getActions();
+ assertEquals(1, s.getActions().size());
+ assertTrue(actions.get(0) instanceof Put);
+
+ Put put = (Put) actions.get(0);
+ assertTrue(put.getFamilyMap().containsKey(s.cf));
+ List kvPairs = put.getFamilyMap().get(s.cf);
+ assertTrue(kvPairs.size() == 3);
+
+ Map resultMap = Maps.newHashMap();
+ for (KeyValue kv : kvPairs) {
+ resultMap.put(new String(kv.getQualifier(), charset), kv.getValue());
+ }
+
+ assertEquals(body,
+ new String(resultMap.get(RegexHbaseEventSerializer.COLUMN_NAME_DEFAULT), charset));
+ assertEquals("value1", new String(resultMap.get("header1"), charset));
+ assertArrayEquals("значение2".getBytes(charset), resultMap.get("заголовок2"));
+ assertEquals("значение2".length(), resultMap.get("заголовок2").length);
+
+ List increments = s.getIncrements();
+ assertEquals(0, increments.size());
+ }
+}
diff --git a/code/flume-ng-sinks/flume-ng-hbase-sink/target/classes/org/apache/flume/sink/hbase/AsyncHBaseSink$1.class b/code/flume-ng-sinks/flume-ng-hbase-sink/target/classes/org/apache/flume/sink/hbase/AsyncHBaseSink$1.class
new file mode 100644
index 0000000..16cbbe1
Binary files /dev/null and b/code/flume-ng-sinks/flume-ng-hbase-sink/target/classes/org/apache/flume/sink/hbase/AsyncHBaseSink$1.class differ
diff --git a/code/flume-ng-sinks/flume-ng-hbase-sink/target/classes/org/apache/flume/sink/hbase/AsyncHBaseSink$2.class b/code/flume-ng-sinks/flume-ng-hbase-sink/target/classes/org/apache/flume/sink/hbase/AsyncHBaseSink$2.class
new file mode 100644
index 0000000..1d6c8ed
Binary files /dev/null and b/code/flume-ng-sinks/flume-ng-hbase-sink/target/classes/org/apache/flume/sink/hbase/AsyncHBaseSink$2.class differ
diff --git a/code/flume-ng-sinks/flume-ng-hbase-sink/target/classes/org/apache/flume/sink/hbase/AsyncHBaseSink$3.class b/code/flume-ng-sinks/flume-ng-hbase-sink/target/classes/org/apache/flume/sink/hbase/AsyncHBaseSink$3.class
new file mode 100644
index 0000000..a4bb8d2
Binary files /dev/null and b/code/flume-ng-sinks/flume-ng-hbase-sink/target/classes/org/apache/flume/sink/hbase/AsyncHBaseSink$3.class differ
diff --git a/code/flume-ng-sinks/flume-ng-hbase-sink/target/classes/org/apache/flume/sink/hbase/AsyncHBaseSink$4.class b/code/flume-ng-sinks/flume-ng-hbase-sink/target/classes/org/apache/flume/sink/hbase/AsyncHBaseSink$4.class
new file mode 100644
index 0000000..b0e527e
Binary files /dev/null and b/code/flume-ng-sinks/flume-ng-hbase-sink/target/classes/org/apache/flume/sink/hbase/AsyncHBaseSink$4.class differ
diff --git a/code/flume-ng-sinks/flume-ng-hbase-sink/target/classes/org/apache/flume/sink/hbase/AsyncHBaseSink$CellIdentifier.class b/code/flume-ng-sinks/flume-ng-hbase-sink/target/classes/org/apache/flume/sink/hbase/AsyncHBaseSink$CellIdentifier.class
new file mode 100644
index 0000000..e757011
Binary files /dev/null and b/code/flume-ng-sinks/flume-ng-hbase-sink/target/classes/org/apache/flume/sink/hbase/AsyncHBaseSink$CellIdentifier.class differ
diff --git a/code/flume-ng-sinks/flume-ng-hbase-sink/target/classes/org/apache/flume/sink/hbase/AsyncHBaseSink$FailureCallback.class b/code/flume-ng-sinks/flume-ng-hbase-sink/target/classes/org/apache/flume/sink/hbase/AsyncHBaseSink$FailureCallback.class
new file mode 100644
index 0000000..85a5a6e
Binary files /dev/null and b/code/flume-ng-sinks/flume-ng-hbase-sink/target/classes/org/apache/flume/sink/hbase/AsyncHBaseSink$FailureCallback.class differ
diff --git a/code/flume-ng-sinks/flume-ng-hbase-sink/target/classes/org/apache/flume/sink/hbase/AsyncHBaseSink$SuccessCallback.class b/code/flume-ng-sinks/flume-ng-hbase-sink/target/classes/org/apache/flume/sink/hbase/AsyncHBaseSink$SuccessCallback.class
new file mode 100644
index 0000000..ca7e8d5
Binary files /dev/null and b/code/flume-ng-sinks/flume-ng-hbase-sink/target/classes/org/apache/flume/sink/hbase/AsyncHBaseSink$SuccessCallback.class differ
diff --git a/code/flume-ng-sinks/flume-ng-hbase-sink/target/classes/org/apache/flume/sink/hbase/AsyncHBaseSink.class b/code/flume-ng-sinks/flume-ng-hbase-sink/target/classes/org/apache/flume/sink/hbase/AsyncHBaseSink.class
new file mode 100644
index 0000000..8c3b481
Binary files /dev/null and b/code/flume-ng-sinks/flume-ng-hbase-sink/target/classes/org/apache/flume/sink/hbase/AsyncHBaseSink.class differ
diff --git a/code/flume-ng-sinks/flume-ng-hbase-sink/target/classes/org/apache/flume/sink/hbase/AsyncHbaseEventSerializer.class b/code/flume-ng-sinks/flume-ng-hbase-sink/target/classes/org/apache/flume/sink/hbase/AsyncHbaseEventSerializer.class
new file mode 100644
index 0000000..77c22c3
Binary files /dev/null and b/code/flume-ng-sinks/flume-ng-hbase-sink/target/classes/org/apache/flume/sink/hbase/AsyncHbaseEventSerializer.class differ
diff --git a/code/flume-ng-sinks/flume-ng-hbase-sink/target/classes/org/apache/flume/sink/hbase/BatchAware.class b/code/flume-ng-sinks/flume-ng-hbase-sink/target/classes/org/apache/flume/sink/hbase/BatchAware.class
new file mode 100644
index 0000000..c2d18e7
Binary files /dev/null and b/code/flume-ng-sinks/flume-ng-hbase-sink/target/classes/org/apache/flume/sink/hbase/BatchAware.class differ
diff --git a/code/flume-ng-sinks/flume-ng-hbase-sink/target/classes/org/apache/flume/sink/hbase/HBaseSink$1.class b/code/flume-ng-sinks/flume-ng-hbase-sink/target/classes/org/apache/flume/sink/hbase/HBaseSink$1.class
new file mode 100644
index 0000000..25a444f
Binary files /dev/null and b/code/flume-ng-sinks/flume-ng-hbase-sink/target/classes/org/apache/flume/sink/hbase/HBaseSink$1.class differ
diff --git a/code/flume-ng-sinks/flume-ng-hbase-sink/target/classes/org/apache/flume/sink/hbase/HBaseSink$2.class b/code/flume-ng-sinks/flume-ng-hbase-sink/target/classes/org/apache/flume/sink/hbase/HBaseSink$2.class
new file mode 100644
index 0000000..4d1a284
Binary files /dev/null and b/code/flume-ng-sinks/flume-ng-hbase-sink/target/classes/org/apache/flume/sink/hbase/HBaseSink$2.class differ
diff --git a/code/flume-ng-sinks/flume-ng-hbase-sink/target/classes/org/apache/flume/sink/hbase/HBaseSink$3.class b/code/flume-ng-sinks/flume-ng-hbase-sink/target/classes/org/apache/flume/sink/hbase/HBaseSink$3.class
new file mode 100644
index 0000000..ddf1f02
Binary files /dev/null and b/code/flume-ng-sinks/flume-ng-hbase-sink/target/classes/org/apache/flume/sink/hbase/HBaseSink$3.class differ
diff --git a/code/flume-ng-sinks/flume-ng-hbase-sink/target/classes/org/apache/flume/sink/hbase/HBaseSink$4.class b/code/flume-ng-sinks/flume-ng-hbase-sink/target/classes/org/apache/flume/sink/hbase/HBaseSink$4.class
new file mode 100644
index 0000000..57677c2
Binary files /dev/null and b/code/flume-ng-sinks/flume-ng-hbase-sink/target/classes/org/apache/flume/sink/hbase/HBaseSink$4.class differ
diff --git a/code/flume-ng-sinks/flume-ng-hbase-sink/target/classes/org/apache/flume/sink/hbase/HBaseSink$DebugIncrementsCallback.class b/code/flume-ng-sinks/flume-ng-hbase-sink/target/classes/org/apache/flume/sink/hbase/HBaseSink$DebugIncrementsCallback.class
new file mode 100644
index 0000000..a93f223
Binary files /dev/null and b/code/flume-ng-sinks/flume-ng-hbase-sink/target/classes/org/apache/flume/sink/hbase/HBaseSink$DebugIncrementsCallback.class differ
diff --git a/code/flume-ng-sinks/flume-ng-hbase-sink/target/classes/org/apache/flume/sink/hbase/HBaseSink.class b/code/flume-ng-sinks/flume-ng-hbase-sink/target/classes/org/apache/flume/sink/hbase/HBaseSink.class
new file mode 100644
index 0000000..9107bc8
Binary files /dev/null and b/code/flume-ng-sinks/flume-ng-hbase-sink/target/classes/org/apache/flume/sink/hbase/HBaseSink.class differ
diff --git a/code/flume-ng-sinks/flume-ng-hbase-sink/target/classes/org/apache/flume/sink/hbase/HBaseSinkConfigurationConstants.class b/code/flume-ng-sinks/flume-ng-hbase-sink/target/classes/org/apache/flume/sink/hbase/HBaseSinkConfigurationConstants.class
new file mode 100644
index 0000000..45a7d07
Binary files /dev/null and b/code/flume-ng-sinks/flume-ng-hbase-sink/target/classes/org/apache/flume/sink/hbase/HBaseSinkConfigurationConstants.class differ
diff --git a/code/flume-ng-sinks/flume-ng-hbase-sink/target/classes/org/apache/flume/sink/hbase/HbaseEventSerializer.class b/code/flume-ng-sinks/flume-ng-hbase-sink/target/classes/org/apache/flume/sink/hbase/HbaseEventSerializer.class
new file mode 100644
index 0000000..8bbb189
Binary files /dev/null and b/code/flume-ng-sinks/flume-ng-hbase-sink/target/classes/org/apache/flume/sink/hbase/HbaseEventSerializer.class differ
diff --git a/code/flume-ng-sinks/flume-ng-hbase-sink/target/classes/org/apache/flume/sink/hbase/KfkAsyncHbaseEventSerializer.class b/code/flume-ng-sinks/flume-ng-hbase-sink/target/classes/org/apache/flume/sink/hbase/KfkAsyncHbaseEventSerializer.class
new file mode 100644
index 0000000..5bdd7ce
Binary files /dev/null and b/code/flume-ng-sinks/flume-ng-hbase-sink/target/classes/org/apache/flume/sink/hbase/KfkAsyncHbaseEventSerializer.class differ
diff --git a/code/flume-ng-sinks/flume-ng-hbase-sink/target/classes/org/apache/flume/sink/hbase/RegexHbaseEventSerializer.class b/code/flume-ng-sinks/flume-ng-hbase-sink/target/classes/org/apache/flume/sink/hbase/RegexHbaseEventSerializer.class
new file mode 100644
index 0000000..9575d0a
Binary files /dev/null and b/code/flume-ng-sinks/flume-ng-hbase-sink/target/classes/org/apache/flume/sink/hbase/RegexHbaseEventSerializer.class differ
diff --git a/code/flume-ng-sinks/flume-ng-hbase-sink/target/classes/org/apache/flume/sink/hbase/SimpleAsyncHbaseEventSerializer$1.class b/code/flume-ng-sinks/flume-ng-hbase-sink/target/classes/org/apache/flume/sink/hbase/SimpleAsyncHbaseEventSerializer$1.class
new file mode 100644
index 0000000..7b9558e
Binary files /dev/null and b/code/flume-ng-sinks/flume-ng-hbase-sink/target/classes/org/apache/flume/sink/hbase/SimpleAsyncHbaseEventSerializer$1.class differ
diff --git a/code/flume-ng-sinks/flume-ng-hbase-sink/target/classes/org/apache/flume/sink/hbase/SimpleAsyncHbaseEventSerializer.class b/code/flume-ng-sinks/flume-ng-hbase-sink/target/classes/org/apache/flume/sink/hbase/SimpleAsyncHbaseEventSerializer.class
new file mode 100644
index 0000000..bb34c15
Binary files /dev/null and b/code/flume-ng-sinks/flume-ng-hbase-sink/target/classes/org/apache/flume/sink/hbase/SimpleAsyncHbaseEventSerializer.class differ
diff --git a/code/flume-ng-sinks/flume-ng-hbase-sink/target/classes/org/apache/flume/sink/hbase/SimpleHbaseEventSerializer$KeyType.class b/code/flume-ng-sinks/flume-ng-hbase-sink/target/classes/org/apache/flume/sink/hbase/SimpleHbaseEventSerializer$KeyType.class
new file mode 100644
index 0000000..7576159
Binary files /dev/null and b/code/flume-ng-sinks/flume-ng-hbase-sink/target/classes/org/apache/flume/sink/hbase/SimpleHbaseEventSerializer$KeyType.class differ
diff --git a/code/flume-ng-sinks/flume-ng-hbase-sink/target/classes/org/apache/flume/sink/hbase/SimpleHbaseEventSerializer.class b/code/flume-ng-sinks/flume-ng-hbase-sink/target/classes/org/apache/flume/sink/hbase/SimpleHbaseEventSerializer.class
new file mode 100644
index 0000000..e375eca
Binary files /dev/null and b/code/flume-ng-sinks/flume-ng-hbase-sink/target/classes/org/apache/flume/sink/hbase/SimpleHbaseEventSerializer.class differ
diff --git a/code/flume-ng-sinks/flume-ng-hbase-sink/target/classes/org/apache/flume/sink/hbase/SimpleRowKeyGenerator.class b/code/flume-ng-sinks/flume-ng-hbase-sink/target/classes/org/apache/flume/sink/hbase/SimpleRowKeyGenerator.class
new file mode 100644
index 0000000..037185c
Binary files /dev/null and b/code/flume-ng-sinks/flume-ng-hbase-sink/target/classes/org/apache/flume/sink/hbase/SimpleRowKeyGenerator.class differ
diff --git a/code/flume-ng-sinks/flume-ng-hbase-sink/target/test-classes/org/apache/flume/sink/hbase/IncrementAsyncHBaseSerializer.class b/code/flume-ng-sinks/flume-ng-hbase-sink/target/test-classes/org/apache/flume/sink/hbase/IncrementAsyncHBaseSerializer.class
new file mode 100644
index 0000000..d8a5255
Binary files /dev/null and b/code/flume-ng-sinks/flume-ng-hbase-sink/target/test-classes/org/apache/flume/sink/hbase/IncrementAsyncHBaseSerializer.class differ
diff --git a/code/flume-ng-sinks/flume-ng-hbase-sink/target/test-classes/org/apache/flume/sink/hbase/IncrementHBaseSerializer.class b/code/flume-ng-sinks/flume-ng-hbase-sink/target/test-classes/org/apache/flume/sink/hbase/IncrementHBaseSerializer.class
new file mode 100644
index 0000000..39cde0f
Binary files /dev/null and b/code/flume-ng-sinks/flume-ng-hbase-sink/target/test-classes/org/apache/flume/sink/hbase/IncrementHBaseSerializer.class differ
diff --git a/code/flume-ng-sinks/flume-ng-hbase-sink/target/test-classes/org/apache/flume/sink/hbase/MockSimpleHbaseEventSerializer.class b/code/flume-ng-sinks/flume-ng-hbase-sink/target/test-classes/org/apache/flume/sink/hbase/MockSimpleHbaseEventSerializer.class
new file mode 100644
index 0000000..4ced24c
Binary files /dev/null and b/code/flume-ng-sinks/flume-ng-hbase-sink/target/test-classes/org/apache/flume/sink/hbase/MockSimpleHbaseEventSerializer.class differ
diff --git a/code/flume-ng-sinks/flume-ng-hbase-sink/target/test-classes/org/apache/flume/sink/hbase/TestAsyncHBaseSink.class b/code/flume-ng-sinks/flume-ng-hbase-sink/target/test-classes/org/apache/flume/sink/hbase/TestAsyncHBaseSink.class
new file mode 100644
index 0000000..dc6a15f
Binary files /dev/null and b/code/flume-ng-sinks/flume-ng-hbase-sink/target/test-classes/org/apache/flume/sink/hbase/TestAsyncHBaseSink.class differ
diff --git a/code/flume-ng-sinks/flume-ng-hbase-sink/target/test-classes/org/apache/flume/sink/hbase/TestHBaseSink$CoalesceValidator.class b/code/flume-ng-sinks/flume-ng-hbase-sink/target/test-classes/org/apache/flume/sink/hbase/TestHBaseSink$CoalesceValidator.class
new file mode 100644
index 0000000..db7933e
Binary files /dev/null and b/code/flume-ng-sinks/flume-ng-hbase-sink/target/test-classes/org/apache/flume/sink/hbase/TestHBaseSink$CoalesceValidator.class differ
diff --git a/code/flume-ng-sinks/flume-ng-hbase-sink/target/test-classes/org/apache/flume/sink/hbase/TestHBaseSink.class b/code/flume-ng-sinks/flume-ng-hbase-sink/target/test-classes/org/apache/flume/sink/hbase/TestHBaseSink.class
new file mode 100644
index 0000000..4689132
Binary files /dev/null and b/code/flume-ng-sinks/flume-ng-hbase-sink/target/test-classes/org/apache/flume/sink/hbase/TestHBaseSink.class differ
diff --git a/code/flume-ng-sinks/flume-ng-hbase-sink/target/test-classes/org/apache/flume/sink/hbase/TestHBaseSinkCreation.class b/code/flume-ng-sinks/flume-ng-hbase-sink/target/test-classes/org/apache/flume/sink/hbase/TestHBaseSinkCreation.class
new file mode 100644
index 0000000..703be15
Binary files /dev/null and b/code/flume-ng-sinks/flume-ng-hbase-sink/target/test-classes/org/apache/flume/sink/hbase/TestHBaseSinkCreation.class differ
diff --git a/code/flume-ng-sinks/flume-ng-hbase-sink/target/test-classes/org/apache/flume/sink/hbase/TestRegexHbaseEventSerializer.class b/code/flume-ng-sinks/flume-ng-hbase-sink/target/test-classes/org/apache/flume/sink/hbase/TestRegexHbaseEventSerializer.class
new file mode 100644
index 0000000..cfd3f0b
Binary files /dev/null and b/code/flume-ng-sinks/flume-ng-hbase-sink/target/test-classes/org/apache/flume/sink/hbase/TestRegexHbaseEventSerializer.class differ
diff --git a/code/flume-ng-sinks/flume-ng-kafka-sink/pom.xml b/code/flume-ng-sinks/flume-ng-kafka-sink/pom.xml
new file mode 100644
index 0000000..8ad229e
--- /dev/null
+++ b/code/flume-ng-sinks/flume-ng-kafka-sink/pom.xml
@@ -0,0 +1,91 @@
+
+
+
+ 4.0.0
+
+ flume-ng-sinks
+ org.apache.flume
+ 1.7.0
+
+ org.apache.flume.flume-ng-sinks
+ flume-ng-kafka-sink
+ Flume Kafka Sink
+
+
+
+
+ org.apache.rat
+ apache-rat-plugin
+
+
+ org.apache.maven.plugins
+ maven-jar-plugin
+
+
+
+ test-jar
+
+
+
+
+
+
+
+
+
+ org.apache.flume
+ flume-ng-sdk
+
+
+
+ org.apache.flume
+ flume-ng-core
+
+
+
+ org.apache.flume
+ flume-ng-configuration
+
+
+
+ org.apache.flume.flume-shared
+ flume-shared-kafka-test
+ test
+
+
+
+ org.slf4j
+ slf4j-api
+
+
+
+ junit
+ junit
+ test
+
+
+
+ org.apache.kafka
+ kafka_2.10
+ test
+
+
+ org.apache.kafka
+ kafka-clients
+ ${kafka.version}
+
+
+
+
+
diff --git a/code/flume-ng-sinks/flume-ng-kafka-sink/src/main/java/org/apache/flume/sink/kafka/KafkaSink.java b/code/flume-ng-sinks/flume-ng-kafka-sink/src/main/java/org/apache/flume/sink/kafka/KafkaSink.java
new file mode 100644
index 0000000..dd40224
--- /dev/null
+++ b/code/flume-ng-sinks/flume-ng-kafka-sink/src/main/java/org/apache/flume/sink/kafka/KafkaSink.java
@@ -0,0 +1,460 @@
+/**
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+ limitations under the License.
+ */
+
+package org.apache.flume.sink.kafka;
+
+import com.google.common.base.Optional;
+import com.google.common.base.Throwables;
+import org.apache.avro.io.BinaryEncoder;
+import org.apache.avro.io.EncoderFactory;
+import org.apache.avro.specific.SpecificDatumReader;
+import org.apache.avro.specific.SpecificDatumWriter;
+import org.apache.flume.Channel;
+import org.apache.flume.Context;
+import org.apache.flume.Event;
+import org.apache.flume.EventDeliveryException;
+import org.apache.flume.Transaction;
+import org.apache.flume.conf.Configurable;
+import org.apache.flume.conf.ConfigurationException;
+import org.apache.flume.conf.LogPrivacyUtil;
+import org.apache.flume.instrumentation.kafka.KafkaSinkCounter;
+import org.apache.flume.sink.AbstractSink;
+import org.apache.flume.source.avro.AvroFlumeEvent;
+import org.apache.kafka.clients.producer.Callback;
+import org.apache.kafka.clients.producer.KafkaProducer;
+import org.apache.kafka.clients.producer.ProducerConfig;
+import org.apache.kafka.clients.producer.ProducerRecord;
+import org.apache.kafka.clients.producer.RecordMetadata;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.util.HashMap;
+import java.util.LinkedList;
+import java.util.List;
+import java.util.Map;
+import java.util.Properties;
+import java.util.concurrent.Future;
+
+import static org.apache.flume.sink.kafka.KafkaSinkConstants.BOOTSTRAP_SERVERS_CONFIG;
+import static org.apache.flume.sink.kafka.KafkaSinkConstants.BATCH_SIZE;
+import static org.apache.flume.sink.kafka.KafkaSinkConstants.DEFAULT_BATCH_SIZE;
+import static org.apache.flume.sink.kafka.KafkaSinkConstants.BROKER_LIST_FLUME_KEY;
+import static org.apache.flume.sink.kafka.KafkaSinkConstants.DEFAULT_ACKS;
+import static org.apache.flume.sink.kafka.KafkaSinkConstants.DEFAULT_KEY_SERIALIZER;
+import static org.apache.flume.sink.kafka.KafkaSinkConstants.DEFAULT_TOPIC;
+import static org.apache.flume.sink.kafka.KafkaSinkConstants.DEFAULT_VALUE_SERIAIZER;
+import static org.apache.flume.sink.kafka.KafkaSinkConstants.KAFKA_PRODUCER_PREFIX;
+import static org.apache.flume.sink.kafka.KafkaSinkConstants.KEY_HEADER;
+import static org.apache.flume.sink.kafka.KafkaSinkConstants.OLD_BATCH_SIZE;
+import static org.apache.flume.sink.kafka.KafkaSinkConstants.REQUIRED_ACKS_FLUME_KEY;
+import static org.apache.flume.sink.kafka.KafkaSinkConstants.TOPIC_CONFIG;
+import static org.apache.flume.sink.kafka.KafkaSinkConstants.TOPIC_HEADER;
+import static org.apache.flume.sink.kafka.KafkaSinkConstants.KEY_SERIALIZER_KEY;
+import static org.apache.flume.sink.kafka.KafkaSinkConstants.MESSAGE_SERIALIZER_KEY;
+
+
+/**
+ * A Flume Sink that can publish messages to Kafka.
+ * This is a general implementation that can be used with any Flume agent and
+ * a channel.
+ * The message can be any event and the key is a string that we read from the
+ * header
+ * For use of partitioning, use an interceptor to generate a header with the
+ * partition key
+ *
+ * Mandatory properties are:
+ * brokerList -- can be a partial list, but at least 2 are recommended for HA
+ *
+ *
+ * however, any property starting with "kafka." will be passed along to the
+ * Kafka producer
+ * Read the Kafka producer documentation to see which configurations can be used
+ *
+ * Optional properties
+ * topic - there's a default, and also - this can be in the event header if
+ * you need to support events with
+ * different topics
+ * batchSize - how many messages to process in one batch. Larger batches
+ * improve throughput while adding latency.
+ * requiredAcks -- 0 (unsafe), 1 (accepted by at least one broker, default),
+ * -1 (accepted by all brokers)
+ * useFlumeEventFormat - preserves event headers when serializing onto Kafka
+ *
+ * header properties (per event):
+ * topic
+ * key
+ */
+public class KafkaSink extends AbstractSink implements Configurable {
+
+ private static final Logger logger = LoggerFactory.getLogger(KafkaSink.class);
+
+ private final Properties kafkaProps = new Properties();
+ private KafkaProducer producer;
+
+ private String topic;
+ private int batchSize;
+ private List> kafkaFutures;
+ private KafkaSinkCounter counter;
+ private boolean useAvroEventFormat;
+ private String partitionHeader = null;
+ private Integer staticPartitionId = null;
+ private Optional> writer =
+ Optional.absent();
+ private Optional> reader =
+ Optional.absent();
+ private Optional tempOutStream = Optional
+ .absent();
+
+ //Fine to use null for initial value, Avro will create new ones if this
+ // is null
+ private BinaryEncoder encoder = null;
+
+
+ //For testing
+ public String getTopic() {
+ return topic;
+ }
+
+ public int getBatchSize() {
+ return batchSize;
+ }
+
+ @Override
+ public Status process() throws EventDeliveryException {
+ Status result = Status.READY;
+ Channel channel = getChannel();
+ Transaction transaction = null;
+ Event event = null;
+ String eventTopic = null;
+ String eventKey = null;
+
+ try {
+ long processedEvents = 0;
+
+ transaction = channel.getTransaction();
+ transaction.begin();
+
+ kafkaFutures.clear();
+ long batchStartTime = System.nanoTime();
+ for (; processedEvents < batchSize; processedEvents += 1) {
+ event = channel.take();
+
+ if (event == null) {
+ // no events available in channel
+ if (processedEvents == 0) {
+ result = Status.BACKOFF;
+ counter.incrementBatchEmptyCount();
+ } else {
+ counter.incrementBatchUnderflowCount();
+ }
+ break;
+ }
+
+ byte[] eventBody = event.getBody();
+ Map headers = event.getHeaders();
+
+ eventTopic = headers.get(TOPIC_HEADER);
+ if (eventTopic == null) {
+ eventTopic = topic;
+ }
+ eventKey = headers.get(KEY_HEADER);
+ if (logger.isTraceEnabled()) {
+ if (LogPrivacyUtil.allowLogRawData()) {
+ logger.trace("{Event} " + eventTopic + " : " + eventKey + " : "
+ + new String(eventBody, "UTF-8"));
+ } else {
+ logger.trace("{Event} " + eventTopic + " : " + eventKey);
+ }
+ }
+ logger.debug("event #{}", processedEvents);
+
+ // create a message and add to buffer
+ long startTime = System.currentTimeMillis();
+
+ Integer partitionId = null;
+ try {
+ ProducerRecord record;
+ if (staticPartitionId != null) {
+ partitionId = staticPartitionId;
+ }
+ //Allow a specified header to override a static ID
+ if (partitionHeader != null) {
+ String headerVal = event.getHeaders().get(partitionHeader);
+ if (headerVal != null) {
+ partitionId = Integer.parseInt(headerVal);
+ }
+ }
+ if (partitionId != null) {
+ record = new ProducerRecord(eventTopic, partitionId, eventKey,
+ serializeEvent(event, useAvroEventFormat));
+ } else {
+ record = new ProducerRecord(eventTopic, eventKey,
+ serializeEvent(event, useAvroEventFormat));
+ }
+ kafkaFutures.add(producer.send(record, new SinkCallback(startTime)));
+ } catch (NumberFormatException ex) {
+ throw new EventDeliveryException("Non integer partition id specified", ex);
+ } catch (Exception ex) {
+ // N.B. The producer.send() method throws all sorts of RuntimeExceptions
+ // Catching Exception here to wrap them neatly in an EventDeliveryException
+ // which is what our consumers will expect
+ throw new EventDeliveryException("Could not send event", ex);
+ }
+ }
+
+ //Prevent linger.ms from holding the batch
+ producer.flush();
+
+ // publish batch and commit.
+ if (processedEvents > 0) {
+ for (Future future : kafkaFutures) {
+ future.get();
+ }
+ long endTime = System.nanoTime();
+ counter.addToKafkaEventSendTimer((endTime - batchStartTime) / (1000 * 1000));
+ counter.addToEventDrainSuccessCount(Long.valueOf(kafkaFutures.size()));
+ }
+
+ transaction.commit();
+
+ } catch (Exception ex) {
+ String errorMsg = "Failed to publish events";
+ logger.error("Failed to publish events", ex);
+ result = Status.BACKOFF;
+ if (transaction != null) {
+ try {
+ kafkaFutures.clear();
+ transaction.rollback();
+ counter.incrementRollbackCount();
+ } catch (Exception e) {
+ logger.error("Transaction rollback failed", e);
+ throw Throwables.propagate(e);
+ }
+ }
+ throw new EventDeliveryException(errorMsg, ex);
+ } finally {
+ if (transaction != null) {
+ transaction.close();
+ }
+ }
+
+ return result;
+ }
+
+ @Override
+ public synchronized void start() {
+ // instantiate the producer
+ producer = new KafkaProducer(kafkaProps);
+ counter.start();
+ super.start();
+ }
+
+ @Override
+ public synchronized void stop() {
+ producer.close();
+ counter.stop();
+ logger.info("Kafka Sink {} stopped. Metrics: {}", getName(), counter);
+ super.stop();
+ }
+
+
+ /**
+ * We configure the sink and generate properties for the Kafka Producer
+ *
+ * Kafka producer properties is generated as follows:
+ * 1. We generate a properties object with some static defaults that
+ * can be overridden by Sink configuration
+ * 2. We add the configuration users added for Kafka (parameters starting
+ * with .kafka. and must be valid Kafka Producer properties
+ * 3. We add the sink's documented parameters which can override other
+ * properties
+ *
+ * @param context
+ */
+ @Override
+ public void configure(Context context) {
+
+ translateOldProps(context);
+
+ String topicStr = context.getString(TOPIC_CONFIG);
+ if (topicStr == null || topicStr.isEmpty()) {
+ topicStr = DEFAULT_TOPIC;
+ logger.warn("Topic was not specified. Using {} as the topic.", topicStr);
+ } else {
+ logger.info("Using the static topic {}. This may be overridden by event headers", topicStr);
+ }
+
+ topic = topicStr;
+
+ batchSize = context.getInteger(BATCH_SIZE, DEFAULT_BATCH_SIZE);
+
+ if (logger.isDebugEnabled()) {
+ logger.debug("Using batch size: {}", batchSize);
+ }
+
+ useAvroEventFormat = context.getBoolean(KafkaSinkConstants.AVRO_EVENT,
+ KafkaSinkConstants.DEFAULT_AVRO_EVENT);
+
+ partitionHeader = context.getString(KafkaSinkConstants.PARTITION_HEADER_NAME);
+ staticPartitionId = context.getInteger(KafkaSinkConstants.STATIC_PARTITION_CONF);
+
+ if (logger.isDebugEnabled()) {
+ logger.debug(KafkaSinkConstants.AVRO_EVENT + " set to: {}", useAvroEventFormat);
+ }
+
+ kafkaFutures = new LinkedList>();
+
+ String bootStrapServers = context.getString(BOOTSTRAP_SERVERS_CONFIG);
+ if (bootStrapServers == null || bootStrapServers.isEmpty()) {
+ throw new ConfigurationException("Bootstrap Servers must be specified");
+ }
+
+ setProducerProps(context, bootStrapServers);
+
+ if (logger.isDebugEnabled() && LogPrivacyUtil.allowLogPrintConfig()) {
+ logger.debug("Kafka producer properties: {}", kafkaProps);
+ }
+
+ if (counter == null) {
+ counter = new KafkaSinkCounter(getName());
+ }
+ }
+
+ private void translateOldProps(Context ctx) {
+
+ if (!(ctx.containsKey(TOPIC_CONFIG))) {
+ ctx.put(TOPIC_CONFIG, ctx.getString("topic"));
+ logger.warn("{} is deprecated. Please use the parameter {}", "topic", TOPIC_CONFIG);
+ }
+
+ //Broker List
+ // If there is no value we need to check and set the old param and log a warning message
+ if (!(ctx.containsKey(BOOTSTRAP_SERVERS_CONFIG))) {
+ String brokerList = ctx.getString(BROKER_LIST_FLUME_KEY);
+ if (brokerList == null || brokerList.isEmpty()) {
+ throw new ConfigurationException("Bootstrap Servers must be specified");
+ } else {
+ ctx.put(BOOTSTRAP_SERVERS_CONFIG, brokerList);
+ logger.warn("{} is deprecated. Please use the parameter {}",
+ BROKER_LIST_FLUME_KEY, BOOTSTRAP_SERVERS_CONFIG);
+ }
+ }
+
+ //batch Size
+ if (!(ctx.containsKey(BATCH_SIZE))) {
+ String oldBatchSize = ctx.getString(OLD_BATCH_SIZE);
+ if ( oldBatchSize != null && !oldBatchSize.isEmpty()) {
+ ctx.put(BATCH_SIZE, oldBatchSize);
+ logger.warn("{} is deprecated. Please use the parameter {}", OLD_BATCH_SIZE, BATCH_SIZE);
+ }
+ }
+
+ // Acks
+ if (!(ctx.containsKey(KAFKA_PRODUCER_PREFIX + ProducerConfig.ACKS_CONFIG))) {
+ String requiredKey = ctx.getString(
+ KafkaSinkConstants.REQUIRED_ACKS_FLUME_KEY);
+ if (!(requiredKey == null) && !(requiredKey.isEmpty())) {
+ ctx.put(KAFKA_PRODUCER_PREFIX + ProducerConfig.ACKS_CONFIG, requiredKey);
+ logger.warn("{} is deprecated. Please use the parameter {}", REQUIRED_ACKS_FLUME_KEY,
+ KAFKA_PRODUCER_PREFIX + ProducerConfig.ACKS_CONFIG);
+ }
+ }
+
+ if (ctx.containsKey(KEY_SERIALIZER_KEY )) {
+ logger.warn("{} is deprecated. Flume now uses the latest Kafka producer which implements " +
+ "a different interface for serializers. Please use the parameter {}",
+ KEY_SERIALIZER_KEY,KAFKA_PRODUCER_PREFIX + ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG);
+ }
+
+ if (ctx.containsKey(MESSAGE_SERIALIZER_KEY)) {
+ logger.warn("{} is deprecated. Flume now uses the latest Kafka producer which implements " +
+ "a different interface for serializers. Please use the parameter {}",
+ MESSAGE_SERIALIZER_KEY,
+ KAFKA_PRODUCER_PREFIX + ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG);
+ }
+ }
+
+ private void setProducerProps(Context context, String bootStrapServers) {
+ kafkaProps.put(ProducerConfig.ACKS_CONFIG, DEFAULT_ACKS);
+ //Defaults overridden based on config
+ kafkaProps.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, DEFAULT_KEY_SERIALIZER);
+ kafkaProps.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, DEFAULT_VALUE_SERIAIZER);
+ kafkaProps.putAll(context.getSubProperties(KAFKA_PRODUCER_PREFIX));
+ kafkaProps.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, bootStrapServers);
+ }
+
+ protected Properties getKafkaProps() {
+ return kafkaProps;
+ }
+
+ private byte[] serializeEvent(Event event, boolean useAvroEventFormat) throws IOException {
+ byte[] bytes;
+ if (useAvroEventFormat) {
+ if (!tempOutStream.isPresent()) {
+ tempOutStream = Optional.of(new ByteArrayOutputStream());
+ }
+ if (!writer.isPresent()) {
+ writer = Optional.of(new SpecificDatumWriter(AvroFlumeEvent.class));
+ }
+ tempOutStream.get().reset();
+ AvroFlumeEvent e = new AvroFlumeEvent(toCharSeqMap(event.getHeaders()),
+ ByteBuffer.wrap(event.getBody()));
+ encoder = EncoderFactory.get().directBinaryEncoder(tempOutStream.get(), encoder);
+ writer.get().write(e, encoder);
+ encoder.flush();
+ bytes = tempOutStream.get().toByteArray();
+ } else {
+ bytes = event.getBody();
+ }
+ return bytes;
+ }
+
+ private static Map toCharSeqMap(Map stringMap) {
+ Map charSeqMap = new HashMap();
+ for (Map.Entry entry : stringMap.entrySet()) {
+ charSeqMap.put(entry.getKey(), entry.getValue());
+ }
+ return charSeqMap;
+ }
+
+}
+
+class SinkCallback implements Callback {
+ private static final Logger logger = LoggerFactory.getLogger(SinkCallback.class);
+ private long startTime;
+
+ public SinkCallback(long startTime) {
+ this.startTime = startTime;
+ }
+
+ public void onCompletion(RecordMetadata metadata, Exception exception) {
+ if (exception != null) {
+ logger.debug("Error sending message to Kafka {} ", exception.getMessage());
+ }
+
+ if (logger.isDebugEnabled()) {
+ long eventElapsedTime = System.currentTimeMillis() - startTime;
+ logger.debug("Acked message partition:{} ofset:{}", metadata.partition(), metadata.offset());
+ logger.debug("Elapsed time for send: {}", eventElapsedTime);
+ }
+ }
+}
+
diff --git a/code/flume-ng-sinks/flume-ng-kafka-sink/src/main/java/org/apache/flume/sink/kafka/KafkaSinkConstants.java b/code/flume-ng-sinks/flume-ng-kafka-sink/src/main/java/org/apache/flume/sink/kafka/KafkaSinkConstants.java
new file mode 100644
index 0000000..7c819f5
--- /dev/null
+++ b/code/flume-ng-sinks/flume-ng-kafka-sink/src/main/java/org/apache/flume/sink/kafka/KafkaSinkConstants.java
@@ -0,0 +1,63 @@
+/**
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+ limitations under the License.
+ */
+
+package org.apache.flume.sink.kafka;
+
+import org.apache.kafka.clients.CommonClientConfigs;
+
+public class KafkaSinkConstants {
+
+ public static final String KAFKA_PREFIX = "kafka.";
+ public static final String KAFKA_PRODUCER_PREFIX = KAFKA_PREFIX + "producer.";
+
+ /* Properties */
+
+ public static final String TOPIC_CONFIG = KAFKA_PREFIX + "topic";
+ public static final String BATCH_SIZE = "flumeBatchSize";
+ public static final String BOOTSTRAP_SERVERS_CONFIG =
+ KAFKA_PREFIX + CommonClientConfigs.BOOTSTRAP_SERVERS_CONFIG;
+
+ public static final String KEY_HEADER = "key";
+ public static final String TOPIC_HEADER = "topic";
+
+ public static final String AVRO_EVENT = "useFlumeEventFormat";
+ public static final boolean DEFAULT_AVRO_EVENT = false;
+
+ public static final String PARTITION_HEADER_NAME = "partitionIdHeader";
+ public static final String STATIC_PARTITION_CONF = "defaultPartitionId";
+
+ public static final String DEFAULT_KEY_SERIALIZER =
+ "org.apache.kafka.common.serialization.StringSerializer";
+ public static final String DEFAULT_VALUE_SERIAIZER =
+ "org.apache.kafka.common.serialization.ByteArraySerializer";
+
+ public static final int DEFAULT_BATCH_SIZE = 100;
+ public static final String DEFAULT_TOPIC = "default-flume-topic";
+ public static final String DEFAULT_ACKS = "1";
+
+ /* Old Properties */
+
+ /* Properties */
+
+ public static final String OLD_BATCH_SIZE = "batchSize";
+ public static final String MESSAGE_SERIALIZER_KEY = "serializer.class";
+ public static final String KEY_SERIALIZER_KEY = "key.serializer.class";
+ public static final String BROKER_LIST_FLUME_KEY = "brokerList";
+ public static final String REQUIRED_ACKS_FLUME_KEY = "requiredAcks";
+}
+
diff --git a/code/flume-ng-sinks/flume-ng-kafka-sink/src/test/java/org/apache/flume/sink/kafka/TestConstants.java b/code/flume-ng-sinks/flume-ng-kafka-sink/src/test/java/org/apache/flume/sink/kafka/TestConstants.java
new file mode 100644
index 0000000..6d85700
--- /dev/null
+++ b/code/flume-ng-sinks/flume-ng-kafka-sink/src/test/java/org/apache/flume/sink/kafka/TestConstants.java
@@ -0,0 +1,27 @@
+/**
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+ limitations under the License.
+ */
+
+package org.apache.flume.sink.kafka;
+
+public class TestConstants {
+ public static final String STATIC_TOPIC = "static-topic";
+ public static final String CUSTOM_KEY = "custom-key";
+ public static final String CUSTOM_TOPIC = "custom-topic";
+ public static final String HEADER_1_VALUE = "test-avro-header";
+ public static final String HEADER_1_KEY = "header1";
+}
diff --git a/code/flume-ng-sinks/flume-ng-kafka-sink/src/test/java/org/apache/flume/sink/kafka/TestKafkaSink.java b/code/flume-ng-sinks/flume-ng-kafka-sink/src/test/java/org/apache/flume/sink/kafka/TestKafkaSink.java
new file mode 100644
index 0000000..7eccf76
--- /dev/null
+++ b/code/flume-ng-sinks/flume-ng-kafka-sink/src/test/java/org/apache/flume/sink/kafka/TestKafkaSink.java
@@ -0,0 +1,550 @@
+/**
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+ limitations under the License.
+ */
+
+package org.apache.flume.sink.kafka;
+
+import com.google.common.base.Charsets;
+
+import kafka.admin.AdminUtils;
+import kafka.message.MessageAndMetadata;
+import kafka.utils.ZkUtils;
+
+import org.apache.avro.io.BinaryDecoder;
+import org.apache.avro.io.DecoderFactory;
+import org.apache.avro.specific.SpecificDatumReader;
+import org.apache.avro.util.Utf8;
+import org.apache.commons.lang.RandomStringUtils;
+import org.apache.flume.Channel;
+import org.apache.flume.Context;
+import org.apache.flume.Event;
+import org.apache.flume.EventDeliveryException;
+import org.apache.flume.Sink;
+import org.apache.flume.Transaction;
+import org.apache.flume.channel.MemoryChannel;
+import org.apache.flume.conf.Configurables;
+import org.apache.flume.event.EventBuilder;
+import org.apache.flume.shared.kafka.test.KafkaPartitionTestUtil;
+import org.apache.flume.shared.kafka.test.PartitionOption;
+import org.apache.flume.shared.kafka.test.PartitionTestScenario;
+import org.apache.flume.sink.kafka.util.TestUtil;
+import org.apache.flume.source.avro.AvroFlumeEvent;
+import org.apache.kafka.clients.CommonClientConfigs;
+import org.apache.kafka.clients.producer.ProducerConfig;
+import org.junit.AfterClass;
+import org.junit.BeforeClass;
+import org.junit.Test;
+
+import java.io.ByteArrayInputStream;
+import java.io.IOException;
+import java.io.UnsupportedEncodingException;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Properties;
+import java.util.Set;
+
+import static org.apache.flume.sink.kafka.KafkaSinkConstants.AVRO_EVENT;
+import static org.apache.flume.sink.kafka.KafkaSinkConstants.BATCH_SIZE;
+import static org.apache.flume.sink.kafka.KafkaSinkConstants.BOOTSTRAP_SERVERS_CONFIG;
+import static org.apache.flume.sink.kafka.KafkaSinkConstants.BROKER_LIST_FLUME_KEY;
+import static org.apache.flume.sink.kafka.KafkaSinkConstants.DEFAULT_KEY_SERIALIZER;
+import static org.apache.flume.sink.kafka.KafkaSinkConstants.DEFAULT_TOPIC;
+import static org.apache.flume.sink.kafka.KafkaSinkConstants.KAFKA_PREFIX;
+import static org.apache.flume.sink.kafka.KafkaSinkConstants.KAFKA_PRODUCER_PREFIX;
+import static org.apache.flume.sink.kafka.KafkaSinkConstants.OLD_BATCH_SIZE;
+import static org.apache.flume.sink.kafka.KafkaSinkConstants.REQUIRED_ACKS_FLUME_KEY;
+import static org.apache.flume.sink.kafka.KafkaSinkConstants.TOPIC_CONFIG;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNull;
+import static org.junit.Assert.fail;
+
+/**
+ * Unit tests for Kafka Sink
+ */
+public class TestKafkaSink {
+
+ private static TestUtil testUtil = TestUtil.getInstance();
+ private final Set usedTopics = new HashSet();
+
+ @BeforeClass
+ public static void setup() {
+ testUtil.prepare();
+ List topics = new ArrayList(3);
+ topics.add(DEFAULT_TOPIC);
+ topics.add(TestConstants.STATIC_TOPIC);
+ topics.add(TestConstants.CUSTOM_TOPIC);
+ testUtil.initTopicList(topics);
+ }
+
+ @AfterClass
+ public static void tearDown() {
+ testUtil.tearDown();
+ }
+
+ @Test
+ public void testKafkaProperties() {
+
+ KafkaSink kafkaSink = new KafkaSink();
+ Context context = new Context();
+ context.put(KAFKA_PREFIX + TOPIC_CONFIG, "");
+ context.put(KAFKA_PRODUCER_PREFIX + ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG,
+ "override.default.serializer");
+ context.put("kafka.producer.fake.property", "kafka.property.value");
+ context.put("kafka.bootstrap.servers", "localhost:9092,localhost:9092");
+ context.put("brokerList", "real-broker-list");
+ Configurables.configure(kafkaSink, context);
+
+ Properties kafkaProps = kafkaSink.getKafkaProps();
+
+ //check that we have defaults set
+ assertEquals(kafkaProps.getProperty(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG),
+ DEFAULT_KEY_SERIALIZER);
+ //check that kafka properties override the default and get correct name
+ assertEquals(kafkaProps.getProperty(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG),
+ "override.default.serializer");
+ //check that any kafka-producer property gets in
+ assertEquals(kafkaProps.getProperty("fake.property"),
+ "kafka.property.value");
+ //check that documented property overrides defaults
+ assertEquals(kafkaProps.getProperty("bootstrap.servers"),
+ "localhost:9092,localhost:9092");
+ }
+
+ @Test
+ public void testOldProperties() {
+ KafkaSink kafkaSink = new KafkaSink();
+ Context context = new Context();
+ context.put("topic", "test-topic");
+ context.put(OLD_BATCH_SIZE, "300");
+ context.put(BROKER_LIST_FLUME_KEY, "localhost:9092,localhost:9092");
+ context.put(REQUIRED_ACKS_FLUME_KEY, "all");
+ Configurables.configure(kafkaSink, context);
+
+ Properties kafkaProps = kafkaSink.getKafkaProps();
+
+ assertEquals(kafkaSink.getTopic(), "test-topic");
+ assertEquals(kafkaSink.getBatchSize(), 300);
+ assertEquals(kafkaProps.getProperty(CommonClientConfigs.BOOTSTRAP_SERVERS_CONFIG),
+ "localhost:9092,localhost:9092");
+ assertEquals(kafkaProps.getProperty(ProducerConfig.ACKS_CONFIG), "all");
+
+ }
+
+ @Test
+ public void testDefaultTopic() {
+ Sink kafkaSink = new KafkaSink();
+ Context context = prepareDefaultContext();
+ Configurables.configure(kafkaSink, context);
+ Channel memoryChannel = new MemoryChannel();
+ Configurables.configure(memoryChannel, context);
+ kafkaSink.setChannel(memoryChannel);
+ kafkaSink.start();
+
+ String msg = "default-topic-test";
+ Transaction tx = memoryChannel.getTransaction();
+ tx.begin();
+ Event event = EventBuilder.withBody(msg.getBytes());
+ memoryChannel.put(event);
+ tx.commit();
+ tx.close();
+
+ try {
+ Sink.Status status = kafkaSink.process();
+ if (status == Sink.Status.BACKOFF) {
+ fail("Error Occurred");
+ }
+ } catch (EventDeliveryException ex) {
+ // ignore
+ }
+
+ String fetchedMsg = new String((byte[]) testUtil.getNextMessageFromConsumer(DEFAULT_TOPIC)
+ .message());
+ assertEquals(msg, fetchedMsg);
+ }
+
+ @Test
+ public void testStaticTopic() {
+ Context context = prepareDefaultContext();
+ // add the static topic
+ context.put(TOPIC_CONFIG, TestConstants.STATIC_TOPIC);
+ String msg = "static-topic-test";
+
+ try {
+ Sink.Status status = prepareAndSend(context, msg);
+ if (status == Sink.Status.BACKOFF) {
+ fail("Error Occurred");
+ }
+ } catch (EventDeliveryException ex) {
+ // ignore
+ }
+
+ String fetchedMsg = new String((byte[]) testUtil.getNextMessageFromConsumer(
+ TestConstants.STATIC_TOPIC).message());
+ assertEquals(msg, fetchedMsg);
+ }
+
+ @Test
+ public void testTopicAndKeyFromHeader() throws UnsupportedEncodingException {
+ Sink kafkaSink = new KafkaSink();
+ Context context = prepareDefaultContext();
+ Configurables.configure(kafkaSink, context);
+ Channel memoryChannel = new MemoryChannel();
+ Configurables.configure(memoryChannel, context);
+ kafkaSink.setChannel(memoryChannel);
+ kafkaSink.start();
+
+ String msg = "test-topic-and-key-from-header";
+ Map headers = new HashMap();
+ headers.put("topic", TestConstants.CUSTOM_TOPIC);
+ headers.put("key", TestConstants.CUSTOM_KEY);
+ Transaction tx = memoryChannel.getTransaction();
+ tx.begin();
+ Event event = EventBuilder.withBody(msg.getBytes(), headers);
+ memoryChannel.put(event);
+ tx.commit();
+ tx.close();
+
+ try {
+ Sink.Status status = kafkaSink.process();
+ if (status == Sink.Status.BACKOFF) {
+ fail("Error Occurred");
+ }
+ } catch (EventDeliveryException ex) {
+ // ignore
+ }
+
+ MessageAndMetadata fetchedMsg =
+ testUtil.getNextMessageFromConsumer(TestConstants.CUSTOM_TOPIC);
+
+ assertEquals(msg, new String((byte[]) fetchedMsg.message(), "UTF-8"));
+ assertEquals(TestConstants.CUSTOM_KEY,
+ new String((byte[]) fetchedMsg.key(), "UTF-8"));
+ }
+
+ @SuppressWarnings("rawtypes")
+ @Test
+ public void testAvroEvent() throws IOException {
+ Sink kafkaSink = new KafkaSink();
+ Context context = prepareDefaultContext();
+ context.put(AVRO_EVENT, "true");
+ Configurables.configure(kafkaSink, context);
+ Channel memoryChannel = new MemoryChannel();
+ Configurables.configure(memoryChannel, context);
+ kafkaSink.setChannel(memoryChannel);
+ kafkaSink.start();
+
+ String msg = "test-avro-event";
+
+ Map headers = new HashMap();
+ headers.put("topic", TestConstants.CUSTOM_TOPIC);
+ headers.put("key", TestConstants.CUSTOM_KEY);
+ headers.put(TestConstants.HEADER_1_KEY, TestConstants.HEADER_1_VALUE);
+ Transaction tx = memoryChannel.getTransaction();
+ tx.begin();
+ Event event = EventBuilder.withBody(msg.getBytes(), headers);
+ memoryChannel.put(event);
+ tx.commit();
+ tx.close();
+
+ try {
+ Sink.Status status = kafkaSink.process();
+ if (status == Sink.Status.BACKOFF) {
+ fail("Error Occurred");
+ }
+ } catch (EventDeliveryException ex) {
+ // ignore
+ }
+
+ MessageAndMetadata fetchedMsg = testUtil.getNextMessageFromConsumer(TestConstants.CUSTOM_TOPIC);
+
+ ByteArrayInputStream in = new ByteArrayInputStream((byte[]) fetchedMsg.message());
+ BinaryDecoder decoder = DecoderFactory.get().directBinaryDecoder(in, null);
+ SpecificDatumReader reader =
+ new SpecificDatumReader(AvroFlumeEvent.class);
+
+ AvroFlumeEvent avroevent = reader.read(null, decoder);
+
+ String eventBody = new String(avroevent.getBody().array(), Charsets.UTF_8);
+ Map eventHeaders = avroevent.getHeaders();
+
+ assertEquals(msg, eventBody);
+ assertEquals(TestConstants.CUSTOM_KEY, new String((byte[]) fetchedMsg.key(), "UTF-8"));
+
+ assertEquals(TestConstants.HEADER_1_VALUE,
+ eventHeaders.get(new Utf8(TestConstants.HEADER_1_KEY)).toString());
+ assertEquals(TestConstants.CUSTOM_KEY, eventHeaders.get(new Utf8("key")).toString());
+ }
+
+ @Test
+ public void testEmptyChannel() throws UnsupportedEncodingException, EventDeliveryException {
+ Sink kafkaSink = new KafkaSink();
+ Context context = prepareDefaultContext();
+ Configurables.configure(kafkaSink, context);
+ Channel memoryChannel = new MemoryChannel();
+ Configurables.configure(memoryChannel, context);
+ kafkaSink.setChannel(memoryChannel);
+ kafkaSink.start();
+
+ Sink.Status status = kafkaSink.process();
+ if (status != Sink.Status.BACKOFF) {
+ fail("Error Occurred");
+ }
+ assertNull(testUtil.getNextMessageFromConsumer(DEFAULT_TOPIC));
+ }
+
+ @Test
+ public void testPartitionHeaderSet() throws Exception {
+ doPartitionHeader(PartitionTestScenario.PARTITION_ID_HEADER_ONLY);
+ }
+
+ @Test
+ public void testPartitionHeaderNotSet() throws Exception {
+ doPartitionHeader(PartitionTestScenario.NO_PARTITION_HEADERS);
+ }
+
+ @Test
+ public void testStaticPartitionAndHeaderSet() throws Exception {
+ doPartitionHeader(PartitionTestScenario.STATIC_HEADER_AND_PARTITION_ID);
+ }
+
+ @Test
+ public void testStaticPartitionHeaderNotSet() throws Exception {
+ doPartitionHeader(PartitionTestScenario.STATIC_HEADER_ONLY);
+ }
+
+ @Test
+ public void testPartitionHeaderMissing() throws Exception {
+ doPartitionErrors(PartitionOption.NOTSET);
+ }
+
+ @Test(expected = org.apache.flume.EventDeliveryException.class)
+ public void testPartitionHeaderOutOfRange() throws Exception {
+ doPartitionErrors(PartitionOption.VALIDBUTOUTOFRANGE);
+ }
+
+ @Test(expected = org.apache.flume.EventDeliveryException.class)
+ public void testPartitionHeaderInvalid() throws Exception {
+ doPartitionErrors(PartitionOption.NOTANUMBER);
+ }
+
+ /**
+ * This function tests three scenarios:
+ * 1. PartitionOption.VALIDBUTOUTOFRANGE: An integer partition is provided,
+ * however it exceeds the number of partitions available on the topic.
+ * Expected behaviour: ChannelException thrown.
+ *
+ * 2. PartitionOption.NOTSET: The partition header is not actually set.
+ * Expected behaviour: Exception is not thrown because the code avoids an NPE.
+ *
+ * 3. PartitionOption.NOTANUMBER: The partition header is set, but is not an Integer.
+ * Expected behaviour: ChannelExeption thrown.
+ *
+ * @param option
+ * @throws Exception
+ */
+ private void doPartitionErrors(PartitionOption option) throws Exception {
+ Sink kafkaSink = new KafkaSink();
+ Context context = prepareDefaultContext();
+ context.put(KafkaSinkConstants.PARTITION_HEADER_NAME, "partition-header");
+
+ Configurables.configure(kafkaSink, context);
+ Channel memoryChannel = new MemoryChannel();
+ Configurables.configure(memoryChannel, context);
+ kafkaSink.setChannel(memoryChannel);
+ kafkaSink.start();
+
+ String topic = findUnusedTopic();
+ createTopic(topic, 5);
+
+ Transaction tx = memoryChannel.getTransaction();
+ tx.begin();
+
+ Map headers = new HashMap();
+ headers.put("topic", topic);
+ switch (option) {
+ case VALIDBUTOUTOFRANGE:
+ headers.put("partition-header", "9");
+ break;
+ case NOTSET:
+ headers.put("wrong-header", "2");
+ break;
+ case NOTANUMBER:
+ headers.put("partition-header", "not-a-number");
+ break;
+ default:
+ break;
+ }
+
+ Event event = EventBuilder.withBody(String.valueOf(9).getBytes(), headers);
+
+ memoryChannel.put(event);
+ tx.commit();
+ tx.close();
+
+ Sink.Status status = kafkaSink.process();
+ assertEquals(Sink.Status.READY, status);
+
+ deleteTopic(topic);
+
+ }
+
+ /**
+ * This method tests both the default behavior (usePartitionHeader=false)
+ * and the behaviour when the partitionId setting is used.
+ * Under the default behaviour, one would expect an even distribution of
+ * messages to partitions, however when partitionId is used we manually create
+ * a large skew to some partitions and then verify that this actually happened
+ * by reading messages directly using a Kafka Consumer.
+ *
+ * @param usePartitionHeader
+ * @param staticPtn
+ * @throws Exception
+ */
+ private void doPartitionHeader(PartitionTestScenario scenario) throws Exception {
+ final int numPtns = 5;
+ final int numMsgs = numPtns * 10;
+ final Integer staticPtn = 3;
+
+ String topic = findUnusedTopic();
+ createTopic(topic, numPtns);
+ Context context = prepareDefaultContext();
+ context.put(BATCH_SIZE, "100");
+
+ if (scenario == PartitionTestScenario.PARTITION_ID_HEADER_ONLY ||
+ scenario == PartitionTestScenario.STATIC_HEADER_AND_PARTITION_ID) {
+ context.put(KafkaSinkConstants.PARTITION_HEADER_NAME,
+ KafkaPartitionTestUtil.PARTITION_HEADER);
+ }
+ if (scenario == PartitionTestScenario.STATIC_HEADER_AND_PARTITION_ID ||
+ scenario == PartitionTestScenario.STATIC_HEADER_ONLY) {
+ context.put(KafkaSinkConstants.STATIC_PARTITION_CONF, staticPtn.toString());
+ }
+ Sink kafkaSink = new KafkaSink();
+
+ Configurables.configure(kafkaSink, context);
+ Channel memoryChannel = new MemoryChannel();
+ Configurables.configure(memoryChannel, context);
+ kafkaSink.setChannel(memoryChannel);
+ kafkaSink.start();
+
+ //Create a map of PartitionId:List according to the desired distribution
+ Map> partitionMap = new HashMap>(numPtns);
+ for (int i = 0; i < numPtns; i++) {
+ partitionMap.put(i, new ArrayList());
+ }
+ Transaction tx = memoryChannel.getTransaction();
+ tx.begin();
+
+ List orderedEvents = KafkaPartitionTestUtil.generateSkewedMessageList(scenario, numMsgs,
+ partitionMap, numPtns, staticPtn);
+
+ for (Event event : orderedEvents) {
+ event.getHeaders().put("topic", topic);
+ memoryChannel.put(event);
+ }
+
+ tx.commit();
+ tx.close();
+
+ Sink.Status status = kafkaSink.process();
+ assertEquals(Sink.Status.READY, status);
+
+ Properties props = new Properties();
+ props.put("bootstrap.servers", testUtil.getKafkaServerUrl());
+ props.put("group.id", "group_1");
+ props.put("enable.auto.commit", "true");
+ props.put("auto.commit.interval.ms", "1000");
+ props.put("session.timeout.ms", "30000");
+ props.put("key.deserializer",
+ "org.apache.kafka.common.serialization.StringDeserializer");
+ props.put("value.deserializer",
+ "org.apache.kafka.common.serialization.ByteArrayDeserializer");
+ props.put("auto.offset.reset", "earliest");
+ Map> resultsMap =
+ KafkaPartitionTestUtil.retrieveRecordsFromPartitions(topic, numPtns, props);
+
+ KafkaPartitionTestUtil.checkResultsAgainstSkew(scenario, partitionMap, resultsMap, staticPtn,
+ numMsgs);
+
+ memoryChannel.stop();
+ kafkaSink.stop();
+ deleteTopic(topic);
+
+ }
+
+ private Context prepareDefaultContext() {
+ // Prepares a default context with Kafka Server Properties
+ Context context = new Context();
+ context.put(BOOTSTRAP_SERVERS_CONFIG, testUtil.getKafkaServerUrl());
+ context.put(BATCH_SIZE, "1");
+ return context;
+ }
+
+ private Sink.Status prepareAndSend(Context context, String msg)
+ throws EventDeliveryException {
+ Sink kafkaSink = new KafkaSink();
+ Configurables.configure(kafkaSink, context);
+ Channel memoryChannel = new MemoryChannel();
+ Configurables.configure(memoryChannel, context);
+ kafkaSink.setChannel(memoryChannel);
+ kafkaSink.start();
+
+ Transaction tx = memoryChannel.getTransaction();
+ tx.begin();
+ Event event = EventBuilder.withBody(msg.getBytes());
+ memoryChannel.put(event);
+ tx.commit();
+ tx.close();
+
+ return kafkaSink.process();
+ }
+
+ public static void createTopic(String topicName, int numPartitions) {
+ int sessionTimeoutMs = 10000;
+ int connectionTimeoutMs = 10000;
+ ZkUtils zkUtils =
+ ZkUtils.apply(testUtil.getZkUrl(), sessionTimeoutMs, connectionTimeoutMs, false);
+ int replicationFactor = 1;
+ Properties topicConfig = new Properties();
+ AdminUtils.createTopic(zkUtils, topicName, numPartitions, replicationFactor, topicConfig);
+ }
+
+ public static void deleteTopic(String topicName) {
+ int sessionTimeoutMs = 10000;
+ int connectionTimeoutMs = 10000;
+ ZkUtils zkUtils =
+ ZkUtils.apply(testUtil.getZkUrl(), sessionTimeoutMs, connectionTimeoutMs, false);
+ AdminUtils.deleteTopic(zkUtils, topicName);
+ }
+
+ public String findUnusedTopic() {
+ String newTopic = null;
+ boolean topicFound = false;
+ while (!topicFound) {
+ newTopic = RandomStringUtils.randomAlphabetic(8);
+ if (!usedTopics.contains(newTopic)) {
+ usedTopics.add(newTopic);
+ topicFound = true;
+ }
+ }
+ return newTopic;
+ }
+
+}
diff --git a/code/flume-ng-sinks/flume-ng-kafka-sink/src/test/java/org/apache/flume/sink/kafka/util/KafkaConsumer.java b/code/flume-ng-sinks/flume-ng-kafka-sink/src/test/java/org/apache/flume/sink/kafka/util/KafkaConsumer.java
new file mode 100644
index 0000000..d5dfbd6
--- /dev/null
+++ b/code/flume-ng-sinks/flume-ng-kafka-sink/src/test/java/org/apache/flume/sink/kafka/util/KafkaConsumer.java
@@ -0,0 +1,98 @@
+/**
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+ limitations under the License.
+ */
+
+package org.apache.flume.sink.kafka.util;
+
+import kafka.consumer.ConsumerConfig;
+import kafka.consumer.ConsumerIterator;
+import kafka.consumer.ConsumerTimeoutException;
+import kafka.consumer.KafkaStream;
+import kafka.javaapi.consumer.ConsumerConnector;
+import kafka.message.MessageAndMetadata;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Properties;
+
+/**
+ * A Kafka Consumer implementation. This uses the current thread to fetch the
+ * next message from the queue and doesn't use a multi threaded implementation.
+ * So this implements a synchronous blocking call.
+ * To avoid infinite waiting, a timeout is implemented to wait only for
+ * 10 seconds before concluding that the message will not be available.
+ */
+public class KafkaConsumer {
+
+ private static final Logger logger = LoggerFactory.getLogger(
+ KafkaConsumer.class);
+
+ private final ConsumerConnector consumer;
+ Map>> consumerMap;
+
+ public KafkaConsumer() {
+ consumer = kafka.consumer.Consumer.createJavaConsumerConnector(
+ createConsumerConfig(TestUtil.getInstance().getZkUrl(), "group_1"));
+ }
+
+ private static ConsumerConfig createConsumerConfig(String zkUrl,
+ String groupId) {
+ Properties props = new Properties();
+ props.put("zookeeper.connect", zkUrl);
+ props.put("group.id", groupId);
+ props.put("zookeeper.session.timeout.ms", "1000");
+ props.put("zookeeper.sync.time.ms", "200");
+ props.put("auto.commit.interval.ms", "1000");
+ props.put("auto.offset.reset", "smallest");
+ props.put("consumer.timeout.ms","1000");
+ return new ConsumerConfig(props);
+ }
+
+ public void initTopicList(List topics) {
+ Map topicCountMap = new HashMap();
+ for (String topic : topics) {
+ // we need only single threaded consumers
+ topicCountMap.put(topic, new Integer(1));
+ }
+ consumerMap = consumer.createMessageStreams(topicCountMap);
+ }
+
+ public MessageAndMetadata getNextMessage(String topic) {
+ List> streams = consumerMap.get(topic);
+ // it has only a single stream, because there is only one consumer
+ KafkaStream stream = streams.get(0);
+ final ConsumerIterator it = stream.iterator();
+ int counter = 0;
+ try {
+ if (it.hasNext()) {
+ return it.next();
+ } else {
+ return null;
+ }
+ } catch (ConsumerTimeoutException e) {
+ logger.error("0 messages available to fetch for the topic " + topic);
+ return null;
+ }
+ }
+
+ public void shutdown() {
+ consumer.shutdown();
+ }
+}
diff --git a/code/flume-ng-sinks/flume-ng-kafka-sink/src/test/java/org/apache/flume/sink/kafka/util/KafkaLocal.java b/code/flume-ng-sinks/flume-ng-kafka-sink/src/test/java/org/apache/flume/sink/kafka/util/KafkaLocal.java
new file mode 100644
index 0000000..6d89bd3
--- /dev/null
+++ b/code/flume-ng-sinks/flume-ng-kafka-sink/src/test/java/org/apache/flume/sink/kafka/util/KafkaLocal.java
@@ -0,0 +1,51 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ * limitations under the License.
+ */
+
+package org.apache.flume.sink.kafka.util;
+
+import kafka.server.KafkaConfig;
+import kafka.server.KafkaServerStartable;
+
+import java.io.IOException;
+import java.util.Properties;
+
+/**
+ * A local Kafka server for running unit tests.
+ * Reference: https://gist.github.com/fjavieralba/7930018/
+ */
+public class KafkaLocal {
+
+ public KafkaServerStartable kafka;
+ public ZooKeeperLocal zookeeper;
+
+ public KafkaLocal(Properties kafkaProperties) throws IOException, InterruptedException {
+ KafkaConfig kafkaConfig = KafkaConfig.fromProps(kafkaProperties);
+
+ // start local kafka broker
+ kafka = new KafkaServerStartable(kafkaConfig);
+ }
+
+ public void start() throws Exception {
+ kafka.startup();
+ }
+
+ public void stop() {
+ kafka.shutdown();
+ }
+
+}
\ No newline at end of file
diff --git a/code/flume-ng-sinks/flume-ng-kafka-sink/src/test/java/org/apache/flume/sink/kafka/util/TestUtil.java b/code/flume-ng-sinks/flume-ng-kafka-sink/src/test/java/org/apache/flume/sink/kafka/util/TestUtil.java
new file mode 100644
index 0000000..6405d6c
--- /dev/null
+++ b/code/flume-ng-sinks/flume-ng-kafka-sink/src/test/java/org/apache/flume/sink/kafka/util/TestUtil.java
@@ -0,0 +1,175 @@
+/**
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+ limitations under the License.
+ */
+
+package org.apache.flume.sink.kafka.util;
+
+import kafka.message.MessageAndMetadata;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.net.BindException;
+import java.net.InetAddress;
+import java.net.UnknownHostException;
+import java.util.List;
+import java.util.Properties;
+import java.util.Random;
+
+/**
+ * A utility class for starting/stopping Kafka Server.
+ */
+public class TestUtil {
+
+ private static final Logger logger = LoggerFactory.getLogger(TestUtil.class);
+ private static TestUtil instance = new TestUtil();
+
+ private Random randPortGen = new Random(System.currentTimeMillis());
+ private KafkaLocal kafkaServer;
+ private KafkaConsumer kafkaConsumer;
+ private String hostname = "localhost";
+ private int kafkaLocalPort;
+ private int zkLocalPort;
+
+ private TestUtil() {
+ init();
+ }
+
+ public static TestUtil getInstance() {
+ return instance;
+ }
+
+ private void init() {
+ // get the localhost.
+ try {
+ hostname = InetAddress.getLocalHost().getHostName();
+ } catch (UnknownHostException e) {
+ logger.warn("Error getting the value of localhost. " +
+ "Proceeding with 'localhost'.", e);
+ }
+ }
+
+ private boolean startKafkaServer() {
+ Properties kafkaProperties = new Properties();
+ Properties zkProperties = new Properties();
+
+ logger.info("Starting kafka server.");
+ try {
+ //load properties
+ zkProperties.load(Class.class.getResourceAsStream(
+ "/zookeeper.properties"));
+
+ ZooKeeperLocal zookeeper;
+ while (true) {
+ //start local Zookeeper
+ try {
+ zkLocalPort = getNextPort();
+ // override the Zookeeper client port with the generated one.
+ zkProperties.setProperty("clientPort", Integer.toString(zkLocalPort));
+ zookeeper = new ZooKeeperLocal(zkProperties);
+ break;
+ } catch (BindException bindEx) {
+ // bind exception. port is already in use. Try a different port.
+ }
+ }
+ logger.info("ZooKeeper instance is successfully started on port " +
+ zkLocalPort);
+
+ kafkaProperties.load(Class.class.getResourceAsStream(
+ "/kafka-server.properties"));
+ // override the Zookeeper url.
+ kafkaProperties.setProperty("zookeeper.connect", getZkUrl());
+ while (true) {
+ kafkaLocalPort = getNextPort();
+ // override the Kafka server port
+ kafkaProperties.setProperty("port", Integer.toString(kafkaLocalPort));
+ kafkaServer = new KafkaLocal(kafkaProperties);
+ try {
+ kafkaServer.start();
+ break;
+ } catch (BindException bindEx) {
+ // let's try another port.
+ }
+ }
+ logger.info("Kafka Server is successfully started on port " +
+ kafkaLocalPort);
+ return true;
+
+ } catch (Exception e) {
+ logger.error("Error starting the Kafka Server.", e);
+ return false;
+ }
+ }
+
+ private KafkaConsumer getKafkaConsumer() {
+ synchronized (this) {
+ if (kafkaConsumer == null) {
+ kafkaConsumer = new KafkaConsumer();
+ }
+ }
+ return kafkaConsumer;
+ }
+
+ public void initTopicList(List topics) {
+ getKafkaConsumer().initTopicList(topics);
+ }
+
+ public MessageAndMetadata getNextMessageFromConsumer(String topic) {
+ return getKafkaConsumer().getNextMessage(topic);
+ }
+
+ public void prepare() {
+ boolean startStatus = startKafkaServer();
+ if (!startStatus) {
+ throw new RuntimeException("Error starting the server!");
+ }
+ try {
+ Thread.sleep(3 * 1000); // add this sleep time to
+ // ensure that the server is fully started before proceeding with tests.
+ } catch (InterruptedException e) {
+ // ignore
+ }
+ getKafkaConsumer();
+ logger.info("Completed the prepare phase.");
+ }
+
+ public void tearDown() {
+ logger.info("Shutting down the Kafka Consumer.");
+ getKafkaConsumer().shutdown();
+ try {
+ Thread.sleep(3 * 1000); // add this sleep time to
+ // ensure that the server is fully started before proceeding with tests.
+ } catch (InterruptedException e) {
+ // ignore
+ }
+ logger.info("Shutting down the kafka Server.");
+ kafkaServer.stop();
+ logger.info("Completed the tearDown phase.");
+ }
+
+ private synchronized int getNextPort() {
+ // generate a random port number between 49152 and 65535
+ return randPortGen.nextInt(65535 - 49152) + 49152;
+ }
+
+ public String getZkUrl() {
+ return hostname + ":" + zkLocalPort;
+ }
+
+ public String getKafkaServerUrl() {
+ return hostname + ":" + kafkaLocalPort;
+ }
+}
diff --git a/code/flume-ng-sinks/flume-ng-kafka-sink/src/test/java/org/apache/flume/sink/kafka/util/ZooKeeperLocal.java b/code/flume-ng-sinks/flume-ng-kafka-sink/src/test/java/org/apache/flume/sink/kafka/util/ZooKeeperLocal.java
new file mode 100644
index 0000000..35c1e47
--- /dev/null
+++ b/code/flume-ng-sinks/flume-ng-kafka-sink/src/test/java/org/apache/flume/sink/kafka/util/ZooKeeperLocal.java
@@ -0,0 +1,61 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ * limitations under the License.
+ */
+
+package org.apache.flume.sink.kafka.util;
+
+import org.apache.zookeeper.server.ServerConfig;
+import org.apache.zookeeper.server.ZooKeeperServerMain;
+import org.apache.zookeeper.server.quorum.QuorumPeerConfig;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.IOException;
+import java.util.Properties;
+
+/**
+ * A local Zookeeper server for running unit tests.
+ * Reference: https://gist.github.com/fjavieralba/7930018/
+ */
+public class ZooKeeperLocal {
+
+ private static final Logger logger = LoggerFactory.getLogger(ZooKeeperLocal.class);
+ private ZooKeeperServerMain zooKeeperServer;
+
+ public ZooKeeperLocal(Properties zkProperties) throws IOException {
+ QuorumPeerConfig quorumConfiguration = new QuorumPeerConfig();
+ try {
+ quorumConfiguration.parseProperties(zkProperties);
+ } catch (Exception e) {
+ throw new RuntimeException(e);
+ }
+
+ zooKeeperServer = new ZooKeeperServerMain();
+ final ServerConfig configuration = new ServerConfig();
+ configuration.readFrom(quorumConfiguration);
+
+ new Thread() {
+ public void run() {
+ try {
+ zooKeeperServer.runFromConfig(configuration);
+ } catch (IOException e) {
+ logger.error("Zookeeper startup failed.", e);
+ }
+ }
+ }.start();
+ }
+}
diff --git a/code/flume-ng-sinks/flume-ng-kafka-sink/src/test/resources/kafka-server.properties b/code/flume-ng-sinks/flume-ng-kafka-sink/src/test/resources/kafka-server.properties
new file mode 100644
index 0000000..02a81e2
--- /dev/null
+++ b/code/flume-ng-sinks/flume-ng-kafka-sink/src/test/resources/kafka-server.properties
@@ -0,0 +1,118 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# see kafka.server.KafkaConfig for additional details and defaults
+
+############################# Server Basics #############################
+
+# The id of the broker. This must be set to a unique integer for each broker.
+broker.id=0
+
+############################# Socket Server Settings #############################
+
+# The port the socket server listens on
+port=9092
+
+# Hostname the broker will bind to. If not set, the server will bind to all interfaces
+#host.name=localhost
+
+# Hostname the broker will advertise to producers and consumers. If not set, it uses the
+# value for "host.name" if configured. Otherwise, it will use the value returned from
+# java.net.InetAddress.getCanonicalHostName().
+#advertised.host.name=
+
+# The port to publish to ZooKeeper for clients to use. If this is not set,
+# it will publish the same port that the broker binds to.
+#advertised.port=
+
+# The number of threads handling network requests
+num.network.threads=2
+
+# The number of threads doing disk I/O
+num.io.threads=8
+
+# The send buffer (SO_SNDBUF) used by the socket server
+socket.send.buffer.bytes=1048576
+
+# The receive buffer (SO_RCVBUF) used by the socket server
+socket.receive.buffer.bytes=1048576
+
+# The maximum size of a request that the socket server will accept (protection against OOM)
+socket.request.max.bytes=104857600
+
+
+############################# Log Basics #############################
+
+# A comma seperated list of directories under which to store log files
+log.dirs=target/kafka-logs
+
+# The default number of log partitions per topic. More partitions allow greater
+# parallelism for consumption, but this will also result in more files across
+# the brokers.
+num.partitions=2
+
+############################# Log Flush Policy #############################
+
+# Messages are immediately written to the filesystem but by default we only fsync() to sync
+# the OS cache lazily. The following configurations control the flush of data to disk.
+# There are a few important trade-offs here:
+# 1. Durability: Unflushed data may be lost if you are not using replication.
+# 2. Latency: Very large flush intervals may lead to latency spikes when the flush does occur as there will be a lot of data to flush.
+# 3. Throughput: The flush is generally the most expensive operation, and a small flush interval may lead to exceessive seeks.
+# The settings below allow one to configure the flush policy to flush data after a period of time or
+# every N messages (or both). This can be done globally and overridden on a per-topic basis.
+
+# The number of messages to accept before forcing a flush of data to disk
+#log.flush.interval.messages=10000
+
+# The maximum amount of time a message can sit in a log before we force a flush
+#log.flush.interval.ms=1000
+
+############################# Log Retention Policy #############################
+
+# The following configurations control the disposal of log segments. The policy can
+# be set to delete segments after a period of time, or after a given size has accumulated.
+# A segment will be deleted whenever *either* of these criteria are met. Deletion always happens
+# from the end of the log.
+
+# The minimum age of a log file to be eligible for deletion
+log.retention.hours=168
+
+# A size-based retention policy for logs. Segments are pruned from the log as long as the remaining
+# segments don't drop below log.retention.bytes.
+#log.retention.bytes=1073741824
+
+# The maximum size of a log segment file. When this size is reached a new log segment will be created.
+log.segment.bytes=536870912
+
+# The interval at which log segments are checked to see if they can be deleted according
+# to the retention policies
+log.retention.check.interval.ms=60000
+
+# By default the log cleaner is disabled and the log retention policy will default to just delete segments after their retention expires.
+# If log.cleaner.enable=true is set the cleaner will be enabled and individual logs can then be marked for log compaction.
+log.cleaner.enable=false
+
+############################# Zookeeper #############################
+
+# Zookeeper connection string (see zookeeper docs for details).
+# This is a comma separated host:port pairs, each corresponding to a zk
+# server. e.g. "127.0.0.1:3000,127.0.0.1:3001,127.0.0.1:3002".
+# You can also append an optional chroot string to the urls to specify the
+# root directory for all kafka znodes.
+zookeeper.connect=localhost:2181
+
+# Timeout in ms for connecting to zookeeper
+zookeeper.connection.timeout.ms=1000000
diff --git a/code/flume-ng-sinks/flume-ng-kafka-sink/src/test/resources/log4j.properties b/code/flume-ng-sinks/flume-ng-kafka-sink/src/test/resources/log4j.properties
new file mode 100644
index 0000000..b86600b
--- /dev/null
+++ b/code/flume-ng-sinks/flume-ng-kafka-sink/src/test/resources/log4j.properties
@@ -0,0 +1,78 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+kafka.logs.dir=target/logs
+
+log4j.rootLogger=INFO, stdout
+
+log4j.appender.stdout=org.apache.log4j.ConsoleAppender
+log4j.appender.stdout.layout=org.apache.log4j.PatternLayout
+log4j.appender.stdout.layout.ConversionPattern=[%d] %p %m (%c)%n
+
+log4j.appender.kafkaAppender=org.apache.log4j.DailyRollingFileAppender
+log4j.appender.kafkaAppender.DatePattern='.'yyyy-MM-dd-HH
+log4j.appender.kafkaAppender.File=${kafka.logs.dir}/server.log
+log4j.appender.kafkaAppender.layout=org.apache.log4j.PatternLayout
+log4j.appender.kafkaAppender.layout.ConversionPattern=[%d] %p %m (%c)%n
+
+log4j.appender.stateChangeAppender=org.apache.log4j.DailyRollingFileAppender
+log4j.appender.stateChangeAppender.DatePattern='.'yyyy-MM-dd-HH
+log4j.appender.stateChangeAppender.File=${kafka.logs.dir}/state-change.log
+log4j.appender.stateChangeAppender.layout=org.apache.log4j.PatternLayout
+log4j.appender.stateChangeAppender.layout.ConversionPattern=[%d] %p %m (%c)%n
+
+log4j.appender.requestAppender=org.apache.log4j.DailyRollingFileAppender
+log4j.appender.requestAppender.DatePattern='.'yyyy-MM-dd-HH
+log4j.appender.requestAppender.File=${kafka.logs.dir}/kafka-request.log
+log4j.appender.requestAppender.layout=org.apache.log4j.PatternLayout
+log4j.appender.requestAppender.layout.ConversionPattern=[%d] %p %m (%c)%n
+
+log4j.appender.cleanerAppender=org.apache.log4j.DailyRollingFileAppender
+log4j.appender.cleanerAppender.DatePattern='.'yyyy-MM-dd-HH
+log4j.appender.cleanerAppender.File=${kafka.logs.dir}/log-cleaner.log
+log4j.appender.cleanerAppender.layout=org.apache.log4j.PatternLayout
+log4j.appender.cleanerAppender.layout.ConversionPattern=[%d] %p %m (%c)%n
+
+log4j.appender.controllerAppender=org.apache.log4j.DailyRollingFileAppender
+log4j.appender.controllerAppender.DatePattern='.'yyyy-MM-dd-HH
+log4j.appender.controllerAppender.File=${kafka.logs.dir}/controller.log
+log4j.appender.controllerAppender.layout=org.apache.log4j.PatternLayout
+log4j.appender.controllerAppender.layout.ConversionPattern=[%d] %p %m (%c)%n
+
+# Turn on all our debugging info
+#log4j.logger.kafka.producer.async.DefaultEventHandler=DEBUG, kafkaAppender
+#log4j.logger.kafka.client.ClientUtils=DEBUG, kafkaAppender
+#log4j.logger.kafka.perf=DEBUG, kafkaAppender
+#log4j.logger.kafka.perf.ProducerPerformance$ProducerThread=DEBUG, kafkaAppender
+#log4j.logger.org.I0Itec.zkclient.ZkClient=DEBUG
+log4j.logger.kafka=INFO, kafkaAppender
+
+log4j.logger.kafka.network.RequestChannel$=WARN, requestAppender
+log4j.additivity.kafka.network.RequestChannel$=false
+
+#log4j.logger.kafka.network.Processor=TRACE, requestAppender
+#log4j.logger.kafka.server.KafkaApis=TRACE, requestAppender
+#log4j.additivity.kafka.server.KafkaApis=false
+log4j.logger.kafka.request.logger=WARN, requestAppender
+log4j.additivity.kafka.request.logger=false
+
+log4j.logger.kafka.controller=TRACE, controllerAppender
+log4j.additivity.kafka.controller=false
+
+log4j.logger.kafka.log.LogCleaner=INFO, cleanerAppender
+log4j.additivity.kafka.log.LogCleaner=false
+
+log4j.logger.state.change.logger=TRACE, stateChangeAppender
+log4j.additivity.state.change.logger=false
diff --git a/code/flume-ng-sinks/flume-ng-kafka-sink/src/test/resources/zookeeper.properties b/code/flume-ng-sinks/flume-ng-kafka-sink/src/test/resources/zookeeper.properties
new file mode 100644
index 0000000..89e1b5e
--- /dev/null
+++ b/code/flume-ng-sinks/flume-ng-kafka-sink/src/test/resources/zookeeper.properties
@@ -0,0 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# the directory where the snapshot is stored.
+dataDir=target
+# the port at which the clients will connect
+clientPort=2181
+# disable the per-ip limit on the number of connections since this is a non-production config
+maxClientCnxns=0
\ No newline at end of file
diff --git a/code/flume-ng-sinks/flume-ng-morphline-solr-sink/README.md b/code/flume-ng-sinks/flume-ng-morphline-solr-sink/README.md
new file mode 100644
index 0000000..ede3ab7
--- /dev/null
+++ b/code/flume-ng-sinks/flume-ng-morphline-solr-sink/README.md
@@ -0,0 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# Flume Morphline Solr Sink
+
+This module contains a Flume Morphline Solr Sink that extracts search documents from Flume events, transforms them and loads them in Near Real Time into Apache Solr, typically a SolrCloud. This sink is intended to be used alongside the HdfsSink. It is designed to process not just structured data, but also arbitrary raw data, including data from many heterogeneous data sources.
diff --git a/code/flume-ng-sinks/flume-ng-morphline-solr-sink/pom.xml b/code/flume-ng-sinks/flume-ng-morphline-solr-sink/pom.xml
new file mode 100644
index 0000000..055c2c2
--- /dev/null
+++ b/code/flume-ng-sinks/flume-ng-morphline-solr-sink/pom.xml
@@ -0,0 +1,139 @@
+
+
+
+ 4.0.0
+
+
+ flume-ng-sinks
+ org.apache.flume
+ 1.7.0
+
+
+ org.apache.flume.flume-ng-sinks
+ flume-ng-morphline-solr-sink
+ 1.7.0
+ Flume NG Morphline Solr Sink
+
+
+ UTF-8
+ 4.3.0
+ 4.3.0
+ 1.6.1
+ 2.12.4
+
+
+
+
+
+ org.apache.flume
+ flume-ng-core
+
+
+
+ org.slf4j
+ slf4j-api
+
+
+
+ org.kitesdk
+ kite-morphlines-all
+ ${kite.version}
+
+
+ org.apache.hadoop
+ hadoop-common
+
+
+ pom
+ true
+
+
+
+ org.slf4j
+ jcl-over-slf4j
+ ${slf4j.version}
+ provided
+
+
+
+ org.apache.solr
+ solr-test-framework
+ ${solr.version}
+ test
+
+
+ org.slf4j
+ slf4j-jdk14
+
+
+
+
+
+ org.kitesdk
+ kite-morphlines-solr-core
+ ${kite.version}
+ test-jar
+ test
+
+
+
+ junit
+ junit
+ test
+
+
+
+
+
+
+
+
+ org.apache.maven.plugins
+ maven-surefire-plugin
+ ${surefire.version}
+
+ -Dtests.locale=en_us
+ true
+
+
+
+
+
+
+
+ org.apache.rat
+ apache-rat-plugin
+
+
+ test.rat
+ test
+
+ check
+
+
+
+ src/test/resources/**
+
+
+
+
+
+
+
+
+
diff --git a/code/flume-ng-sinks/flume-ng-morphline-solr-sink/src/main/java/org/apache/flume/sink/solr/morphline/BlobDeserializer.java b/code/flume-ng-sinks/flume-ng-morphline-solr-sink/src/main/java/org/apache/flume/sink/solr/morphline/BlobDeserializer.java
new file mode 100644
index 0000000..095f889
--- /dev/null
+++ b/code/flume-ng-sinks/flume-ng-morphline-solr-sink/src/main/java/org/apache/flume/sink/solr/morphline/BlobDeserializer.java
@@ -0,0 +1,162 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.flume.sink.solr.morphline;
+
+import java.io.IOException;
+import java.util.List;
+
+import org.apache.commons.io.output.ByteArrayOutputStream;
+import org.apache.flume.Context;
+import org.apache.flume.Event;
+import org.apache.flume.annotations.InterfaceAudience;
+import org.apache.flume.annotations.InterfaceStability;
+import org.apache.flume.conf.ConfigurationException;
+import org.apache.flume.event.EventBuilder;
+import org.apache.flume.serialization.EventDeserializer;
+import org.apache.flume.serialization.ResettableInputStream;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import com.google.common.collect.Lists;
+
+/**
+ * A deserializer that reads a Binary Large Object (BLOB) per event, typically
+ * one BLOB per file; To be used in conjunction with Flume SpoolDirectorySource.
+ *
+ * Note that this approach is not suitable for very large objects because it
+ * buffers up the entire BLOB.
+ */
+@InterfaceAudience.Private
+@InterfaceStability.Evolving
+public class BlobDeserializer implements EventDeserializer {
+
+ private ResettableInputStream in;
+ private final int maxBlobLength;
+ private volatile boolean isOpen;
+
+ public static final String MAX_BLOB_LENGTH_KEY = "maxBlobLength";
+ public static final int MAX_BLOB_LENGTH_DEFAULT = 100 * 1000 * 1000;
+
+ private static final int DEFAULT_BUFFER_SIZE = 1024 * 8;
+ private static final Logger LOGGER = LoggerFactory.getLogger(BlobDeserializer.class);
+
+ protected BlobDeserializer(Context context, ResettableInputStream in) {
+ this.in = in;
+ this.maxBlobLength = context.getInteger(MAX_BLOB_LENGTH_KEY, MAX_BLOB_LENGTH_DEFAULT);
+ if (this.maxBlobLength <= 0) {
+ throw new ConfigurationException("Configuration parameter " + MAX_BLOB_LENGTH_KEY
+ + " must be greater than zero: " + maxBlobLength);
+ }
+ this.isOpen = true;
+ }
+
+ /**
+ * Reads a BLOB from a file and returns an event
+ * @return Event containing a BLOB
+ * @throws IOException
+ */
+ @SuppressWarnings("resource")
+ @Override
+ public Event readEvent() throws IOException {
+ ensureOpen();
+ ByteArrayOutputStream blob = null;
+ byte[] buf = new byte[Math.min(maxBlobLength, DEFAULT_BUFFER_SIZE)];
+ int blobLength = 0;
+ int n = 0;
+ while ((n = in.read(buf, 0, Math.min(buf.length, maxBlobLength - blobLength))) != -1) {
+ if (blob == null) {
+ blob = new ByteArrayOutputStream(n);
+ }
+ blob.write(buf, 0, n);
+ blobLength += n;
+ if (blobLength >= maxBlobLength) {
+ LOGGER.warn("File length exceeds maxBlobLength ({}), truncating BLOB event!",
+ maxBlobLength);
+ break;
+ }
+ }
+
+ if (blob == null) {
+ return null;
+ } else {
+ return EventBuilder.withBody(blob.toByteArray());
+ }
+ }
+
+ /**
+ * Batch BLOB read
+ * @param numEvents Maximum number of events to return.
+ * @return List of events containing read BLOBs
+ * @throws IOException
+ */
+ @Override
+ public List readEvents(int numEvents) throws IOException {
+ ensureOpen();
+ List events = Lists.newLinkedList();
+ for (int i = 0; i < numEvents; i++) {
+ Event event = readEvent();
+ if (event != null) {
+ events.add(event);
+ } else {
+ break;
+ }
+ }
+ return events;
+ }
+
+ @Override
+ public void mark() throws IOException {
+ ensureOpen();
+ in.mark();
+ }
+
+ @Override
+ public void reset() throws IOException {
+ ensureOpen();
+ in.reset();
+ }
+
+ @Override
+ public void close() throws IOException {
+ if (isOpen) {
+ reset();
+ in.close();
+ isOpen = false;
+ }
+ }
+
+ private void ensureOpen() {
+ if (!isOpen) {
+ throw new IllegalStateException("Serializer has been closed");
+ }
+ }
+
+
+ ///////////////////////////////////////////////////////////////////////////////
+ // Nested classes:
+ ///////////////////////////////////////////////////////////////////////////////
+ /** Builder implementations MUST have a public no-arg constructor */
+ public static class Builder implements EventDeserializer.Builder {
+
+ @Override
+ public BlobDeserializer build(Context context, ResettableInputStream in) {
+ return new BlobDeserializer(context, in);
+ }
+
+ }
+
+}
diff --git a/code/flume-ng-sinks/flume-ng-morphline-solr-sink/src/main/java/org/apache/flume/sink/solr/morphline/BlobHandler.java b/code/flume-ng-sinks/flume-ng-morphline-solr-sink/src/main/java/org/apache/flume/sink/solr/morphline/BlobHandler.java
new file mode 100644
index 0000000..fe98746
--- /dev/null
+++ b/code/flume-ng-sinks/flume-ng-morphline-solr-sink/src/main/java/org/apache/flume/sink/solr/morphline/BlobHandler.java
@@ -0,0 +1,129 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.flume.sink.solr.morphline;
+
+import java.io.InputStream;
+import java.util.Collections;
+import java.util.Enumeration;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+import javax.servlet.http.HttpServletRequest;
+
+import org.apache.commons.io.output.ByteArrayOutputStream;
+import org.apache.flume.Context;
+import org.apache.flume.Event;
+import org.apache.flume.conf.ConfigurationException;
+import org.apache.flume.conf.LogPrivacyUtil;
+import org.apache.flume.event.EventBuilder;
+import org.apache.flume.source.http.HTTPSourceHandler;
+import org.apache.tika.metadata.Metadata;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * BlobHandler for HTTPSource that returns event that contains the request
+ * parameters as well as the Binary Large Object (BLOB) uploaded with this
+ * request.
+ *
+ * Note that this approach is not suitable for very large objects because it
+ * buffers up the entire BLOB.
+ *
+ */
+public class BlobHandler implements HTTPSourceHandler {
+
+ private int maxBlobLength = MAX_BLOB_LENGTH_DEFAULT;
+
+ public static final String MAX_BLOB_LENGTH_KEY = "maxBlobLength";
+ public static final int MAX_BLOB_LENGTH_DEFAULT = 100 * 1000 * 1000;
+
+ private static final int DEFAULT_BUFFER_SIZE = 1024 * 8;
+ private static final Logger LOGGER = LoggerFactory.getLogger(BlobHandler.class);
+
+ public BlobHandler() {
+ }
+
+ @Override
+ public void configure(Context context) {
+ this.maxBlobLength = context.getInteger(MAX_BLOB_LENGTH_KEY, MAX_BLOB_LENGTH_DEFAULT);
+ if (this.maxBlobLength <= 0) {
+ throw new ConfigurationException("Configuration parameter " + MAX_BLOB_LENGTH_KEY
+ + " must be greater than zero: " + maxBlobLength);
+ }
+ }
+
+ @SuppressWarnings("resource")
+ @Override
+ public List getEvents(HttpServletRequest request) throws Exception {
+ Map headers = getHeaders(request);
+ InputStream in = request.getInputStream();
+ try {
+ ByteArrayOutputStream blob = null;
+ byte[] buf = new byte[Math.min(maxBlobLength, DEFAULT_BUFFER_SIZE)];
+ int blobLength = 0;
+ int n = 0;
+ while ((n = in.read(buf, 0, Math.min(buf.length, maxBlobLength - blobLength))) != -1) {
+ if (blob == null) {
+ blob = new ByteArrayOutputStream(n);
+ }
+ blob.write(buf, 0, n);
+ blobLength += n;
+ if (blobLength >= maxBlobLength) {
+ LOGGER.warn("Request length exceeds maxBlobLength ({}), truncating BLOB event!",
+ maxBlobLength);
+ break;
+ }
+ }
+
+ byte[] array = blob != null ? blob.toByteArray() : new byte[0];
+ Event event = EventBuilder.withBody(array, headers);
+ if (LOGGER.isDebugEnabled() && LogPrivacyUtil.allowLogRawData()) {
+ LOGGER.debug("blobEvent: {}", event);
+ }
+ return Collections.singletonList(event);
+ } finally {
+ in.close();
+ }
+ }
+
+ private Map getHeaders(HttpServletRequest request) {
+ if (LOGGER.isDebugEnabled() && LogPrivacyUtil.allowLogRawData()) {
+ Map requestHeaders = new HashMap();
+ Enumeration iter = request.getHeaderNames();
+ while (iter.hasMoreElements()) {
+ String name = (String) iter.nextElement();
+ requestHeaders.put(name, request.getHeader(name));
+ }
+ LOGGER.debug("requestHeaders: {}", requestHeaders);
+ }
+ Map headers = new HashMap();
+ if (request.getContentType() != null) {
+ headers.put(Metadata.CONTENT_TYPE, request.getContentType());
+ }
+ Enumeration iter = request.getParameterNames();
+ while (iter.hasMoreElements()) {
+ String name = (String) iter.nextElement();
+ headers.put(name, request.getParameter(name));
+ }
+ return headers;
+ }
+}
diff --git a/code/flume-ng-sinks/flume-ng-morphline-solr-sink/src/main/java/org/apache/flume/sink/solr/morphline/MorphlineHandler.java b/code/flume-ng-sinks/flume-ng-morphline-solr-sink/src/main/java/org/apache/flume/sink/solr/morphline/MorphlineHandler.java
new file mode 100644
index 0000000..bb5191d
--- /dev/null
+++ b/code/flume-ng-sinks/flume-ng-morphline-solr-sink/src/main/java/org/apache/flume/sink/solr/morphline/MorphlineHandler.java
@@ -0,0 +1,62 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.flume.sink.solr.morphline;
+
+import java.io.IOException;
+
+import org.apache.flume.Event;
+import org.apache.flume.conf.Configurable;
+
+/**
+ * Interface to load Flume events into Solr
+ */
+public interface MorphlineHandler extends Configurable {
+
+ /** Begins a transaction */
+ public void beginTransaction();
+
+ /** Loads the given event into Solr */
+ public void process(Event event);
+
+ /**
+ * Sends any outstanding documents to Solr and waits for a positive
+ * or negative ack (i.e. exception). Depending on the outcome the caller
+ * should then commit or rollback the current flume transaction
+ * correspondingly.
+ *
+ * @throws IOException
+ * If there is a low-level I/O error.
+ */
+ public void commitTransaction();
+
+ /**
+ * Performs a rollback of all non-committed documents pending.
+ *
+ * Note that this is not a true rollback as in databases. Content you have previously added to
+ * Solr may have already been committed due to autoCommit, buffer full, other client performing a
+ * commit etc. So this is only a best-effort rollback.
+ *
+ * @throws IOException
+ * If there is a low-level I/O error.
+ */
+ public void rollbackTransaction();
+
+ /** Releases allocated resources */
+ public void stop();
+
+
+}
diff --git a/code/flume-ng-sinks/flume-ng-morphline-solr-sink/src/main/java/org/apache/flume/sink/solr/morphline/MorphlineHandlerImpl.java b/code/flume-ng-sinks/flume-ng-morphline-solr-sink/src/main/java/org/apache/flume/sink/solr/morphline/MorphlineHandlerImpl.java
new file mode 100644
index 0000000..d877814
--- /dev/null
+++ b/code/flume-ng-sinks/flume-ng-morphline-solr-sink/src/main/java/org/apache/flume/sink/solr/morphline/MorphlineHandlerImpl.java
@@ -0,0 +1,163 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.flume.sink.solr.morphline;
+
+import java.io.File;
+import java.util.Map.Entry;
+
+import org.apache.flume.Context;
+import org.apache.flume.Event;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import org.kitesdk.morphline.api.Command;
+import org.kitesdk.morphline.api.MorphlineCompilationException;
+import org.kitesdk.morphline.api.MorphlineContext;
+import org.kitesdk.morphline.api.Record;
+import org.kitesdk.morphline.base.Compiler;
+import org.kitesdk.morphline.base.FaultTolerance;
+import org.kitesdk.morphline.base.Fields;
+import org.kitesdk.morphline.base.Metrics;
+import org.kitesdk.morphline.base.Notifications;
+import com.codahale.metrics.Meter;
+import com.codahale.metrics.MetricRegistry;
+import com.codahale.metrics.SharedMetricRegistries;
+import com.codahale.metrics.Timer;
+import com.typesafe.config.Config;
+import com.typesafe.config.ConfigFactory;
+
+/**
+ * A {@link MorphlineHandler} that processes it's events using a morphline {@link Command} chain.
+ */
+public class MorphlineHandlerImpl implements MorphlineHandler {
+
+ private MorphlineContext morphlineContext;
+ private Command morphline;
+ private Command finalChild;
+ private String morphlineFileAndId;
+
+ private Timer mappingTimer;
+ private Meter numRecords;
+ private Meter numFailedRecords;
+ private Meter numExceptionRecords;
+
+ public static final String MORPHLINE_FILE_PARAM = "morphlineFile";
+ public static final String MORPHLINE_ID_PARAM = "morphlineId";
+
+ /**
+ * Morphline variables can be passed from flume.conf to the morphline, e.g.:
+ * agent.sinks.solrSink.morphlineVariable.zkHost=127.0.0.1:2181/solr
+ */
+ public static final String MORPHLINE_VARIABLE_PARAM = "morphlineVariable";
+
+ private static final Logger LOG = LoggerFactory.getLogger(MorphlineHandlerImpl.class);
+
+ // For test injection
+ void setMorphlineContext(MorphlineContext morphlineContext) {
+ this.morphlineContext = morphlineContext;
+ }
+
+ // for interceptor
+ void setFinalChild(Command finalChild) {
+ this.finalChild = finalChild;
+ }
+
+ @Override
+ public void configure(Context context) {
+ String morphlineFile = context.getString(MORPHLINE_FILE_PARAM);
+ String morphlineId = context.getString(MORPHLINE_ID_PARAM);
+ if (morphlineFile == null || morphlineFile.trim().length() == 0) {
+ throw new MorphlineCompilationException("Missing parameter: " + MORPHLINE_FILE_PARAM, null);
+ }
+ morphlineFileAndId = morphlineFile + "@" + morphlineId;
+
+ if (morphlineContext == null) {
+ FaultTolerance faultTolerance = new FaultTolerance(
+ context.getBoolean(FaultTolerance.IS_PRODUCTION_MODE, false),
+ context.getBoolean(FaultTolerance.IS_IGNORING_RECOVERABLE_EXCEPTIONS, false),
+ context.getString(FaultTolerance.RECOVERABLE_EXCEPTION_CLASSES));
+
+ morphlineContext = new MorphlineContext.Builder()
+ .setExceptionHandler(faultTolerance)
+ .setMetricRegistry(SharedMetricRegistries.getOrCreate(morphlineFileAndId))
+ .build();
+ }
+
+ Config override = ConfigFactory.parseMap(
+ context.getSubProperties(MORPHLINE_VARIABLE_PARAM + "."));
+ morphline = new Compiler().compile(
+ new File(morphlineFile), morphlineId, morphlineContext, finalChild, override);
+
+ this.mappingTimer = morphlineContext.getMetricRegistry().timer(
+ MetricRegistry.name("morphline.app", Metrics.ELAPSED_TIME));
+ this.numRecords = morphlineContext.getMetricRegistry().meter(
+ MetricRegistry.name("morphline.app", Metrics.NUM_RECORDS));
+ this.numFailedRecords = morphlineContext.getMetricRegistry().meter(
+ MetricRegistry.name("morphline.app", "numFailedRecords"));
+ this.numExceptionRecords = morphlineContext.getMetricRegistry().meter(
+ MetricRegistry.name("morphline.app", "numExceptionRecords"));
+ }
+
+ @Override
+ public void process(Event event) {
+ numRecords.mark();
+ Timer.Context timerContext = mappingTimer.time();
+ try {
+ Record record = new Record();
+ for (Entry entry : event.getHeaders().entrySet()) {
+ record.put(entry.getKey(), entry.getValue());
+ }
+ byte[] bytes = event.getBody();
+ if (bytes != null && bytes.length > 0) {
+ record.put(Fields.ATTACHMENT_BODY, bytes);
+ }
+ try {
+ Notifications.notifyStartSession(morphline);
+ if (!morphline.process(record)) {
+ numFailedRecords.mark();
+ LOG.warn("Morphline {} failed to process record: {}", morphlineFileAndId, record);
+ }
+ } catch (RuntimeException t) {
+ numExceptionRecords.mark();
+ morphlineContext.getExceptionHandler().handleException(t, record);
+ }
+ } finally {
+ timerContext.stop();
+ }
+ }
+
+ @Override
+ public void beginTransaction() {
+ Notifications.notifyBeginTransaction(morphline);
+ }
+
+ @Override
+ public void commitTransaction() {
+ Notifications.notifyCommitTransaction(morphline);
+ }
+
+ @Override
+ public void rollbackTransaction() {
+ Notifications.notifyRollbackTransaction(morphline);
+ }
+
+ @Override
+ public void stop() {
+ Notifications.notifyShutdown(morphline);
+ }
+
+}
diff --git a/code/flume-ng-sinks/flume-ng-morphline-solr-sink/src/main/java/org/apache/flume/sink/solr/morphline/MorphlineInterceptor.java b/code/flume-ng-sinks/flume-ng-morphline-solr-sink/src/main/java/org/apache/flume/sink/solr/morphline/MorphlineInterceptor.java
new file mode 100644
index 0000000..3b94133
--- /dev/null
+++ b/code/flume-ng-sinks/flume-ng-morphline-solr-sink/src/main/java/org/apache/flume/sink/solr/morphline/MorphlineInterceptor.java
@@ -0,0 +1,242 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.flume.sink.solr.morphline;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Queue;
+import java.util.concurrent.ConcurrentLinkedQueue;
+
+import org.apache.flume.Context;
+import org.apache.flume.Event;
+import org.apache.flume.FlumeException;
+import org.apache.flume.event.EventBuilder;
+import org.apache.flume.interceptor.Interceptor;
+import org.kitesdk.morphline.api.Command;
+import org.kitesdk.morphline.api.Record;
+import org.kitesdk.morphline.base.Fields;
+
+import com.google.common.base.Preconditions;
+import com.google.common.io.ByteStreams;
+
+/**
+ * Flume Interceptor that executes a morphline on events that are intercepted.
+ *
+ * Currently, there is a restriction in that the morphline must not generate more than one output
+ * record for each input event.
+ */
+public class MorphlineInterceptor implements Interceptor {
+
+ private final Context context;
+ private final Queue pool = new ConcurrentLinkedQueue<>();
+
+ protected MorphlineInterceptor(Context context) {
+ Preconditions.checkNotNull(context);
+ this.context = context;
+ // fail fast on morphline compilation exception
+ returnToPool(new LocalMorphlineInterceptor(context));
+ }
+
+ @Override
+ public void initialize() {
+ }
+
+ @Override
+ public void close() {
+ LocalMorphlineInterceptor interceptor;
+ while ((interceptor = pool.poll()) != null) {
+ interceptor.close();
+ }
+ }
+
+ @Override
+ public List intercept(List events) {
+ LocalMorphlineInterceptor interceptor = borrowFromPool();
+ List results = interceptor.intercept(events);
+ returnToPool(interceptor);
+ return results;
+ }
+
+ @Override
+ public Event intercept(Event event) {
+ LocalMorphlineInterceptor interceptor = borrowFromPool();
+ Event result = interceptor.intercept(event);
+ returnToPool(interceptor);
+ return result;
+ }
+
+ private void returnToPool(LocalMorphlineInterceptor interceptor) {
+ pool.add(interceptor);
+ }
+
+ private LocalMorphlineInterceptor borrowFromPool() {
+ LocalMorphlineInterceptor interceptor = pool.poll();
+ if (interceptor == null) {
+ interceptor = new LocalMorphlineInterceptor(context);
+ }
+ return interceptor;
+ }
+
+
+ ///////////////////////////////////////////////////////////////////////////////
+ // Nested classes:
+ ///////////////////////////////////////////////////////////////////////////////
+ /** Builder implementations MUST have a public no-arg constructor */
+ public static class Builder implements Interceptor.Builder {
+
+ private Context context;
+
+ public Builder() {
+ }
+
+ @Override
+ public MorphlineInterceptor build() {
+ return new MorphlineInterceptor(context);
+ }
+
+ @Override
+ public void configure(Context context) {
+ this.context = context;
+ }
+
+ }
+
+
+ ///////////////////////////////////////////////////////////////////////////////
+ // Nested classes:
+ ///////////////////////////////////////////////////////////////////////////////
+ private static final class LocalMorphlineInterceptor implements Interceptor {
+
+ private final MorphlineHandlerImpl morphline;
+ private final Collector collector;
+
+ protected LocalMorphlineInterceptor(Context context) {
+ this.morphline = new MorphlineHandlerImpl();
+ this.collector = new Collector();
+ this.morphline.setFinalChild(collector);
+ this.morphline.configure(context);
+ }
+
+ @Override
+ public void initialize() {
+ }
+
+ @Override
+ public void close() {
+ morphline.stop();
+ }
+
+ @Override
+ public List intercept(List events) {
+ List results = new ArrayList(events.size());
+ for (Event event : events) {
+ event = intercept(event);
+ if (event != null) {
+ results.add(event);
+ }
+ }
+ return results;
+ }
+
+ @Override
+ public Event intercept(Event event) {
+ collector.reset();
+ morphline.process(event);
+ List results = collector.getRecords();
+ if (results.size() == 0) {
+ return null;
+ }
+ if (results.size() > 1) {
+ throw new FlumeException(getClass().getName() +
+ " must not generate more than one output record per input event");
+ }
+ Event result = toEvent(results.get(0));
+ return result;
+ }
+
+ private Event toEvent(Record record) {
+ Map headers = new HashMap();
+ Map> recordMap = record.getFields().asMap();
+ byte[] body = null;
+ for (Map.Entry> entry : recordMap.entrySet()) {
+ if (entry.getValue().size() > 1) {
+ throw new FlumeException(getClass().getName()
+ + " must not generate more than one output value per record field");
+ }
+ assert entry.getValue().size() != 0; // guava guarantees that
+ Object firstValue = entry.getValue().iterator().next();
+ if (Fields.ATTACHMENT_BODY.equals(entry.getKey())) {
+ if (firstValue instanceof byte[]) {
+ body = (byte[]) firstValue;
+ } else if (firstValue instanceof InputStream) {
+ try {
+ body = ByteStreams.toByteArray((InputStream) firstValue);
+ } catch (IOException e) {
+ throw new FlumeException(e);
+ }
+ } else {
+ throw new FlumeException(getClass().getName()
+ + " must non generate attachments that are not a byte[] or InputStream");
+ }
+ } else {
+ headers.put(entry.getKey(), firstValue.toString());
+ }
+ }
+ return EventBuilder.withBody(body, headers);
+ }
+ }
+
+
+ ///////////////////////////////////////////////////////////////////////////////
+ // Nested classes:
+ ///////////////////////////////////////////////////////////////////////////////
+ private static final class Collector implements Command {
+
+ private final List results = new ArrayList();
+
+ public List getRecords() {
+ return results;
+ }
+
+ public void reset() {
+ results.clear();
+ }
+
+ @Override
+ public Command getParent() {
+ return null;
+ }
+
+ @Override
+ public void notify(Record notification) {
+ }
+
+ @Override
+ public boolean process(Record record) {
+ Preconditions.checkNotNull(record);
+ results.add(record);
+ return true;
+ }
+
+ }
+
+}
diff --git a/code/flume-ng-sinks/flume-ng-morphline-solr-sink/src/main/java/org/apache/flume/sink/solr/morphline/MorphlineSink.java b/code/flume-ng-sinks/flume-ng-morphline-solr-sink/src/main/java/org/apache/flume/sink/solr/morphline/MorphlineSink.java
new file mode 100644
index 0000000..0917d39
--- /dev/null
+++ b/code/flume-ng-sinks/flume-ng-morphline-solr-sink/src/main/java/org/apache/flume/sink/solr/morphline/MorphlineSink.java
@@ -0,0 +1,204 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.flume.sink.solr.morphline;
+
+import org.apache.flume.Channel;
+import org.apache.flume.ChannelException;
+import org.apache.flume.Context;
+import org.apache.flume.Event;
+import org.apache.flume.EventDeliveryException;
+import org.apache.flume.Transaction;
+import org.apache.flume.conf.Configurable;
+import org.apache.flume.conf.ConfigurationException;
+import org.apache.flume.conf.LogPrivacyUtil;
+import org.apache.flume.instrumentation.SinkCounter;
+import org.apache.flume.sink.AbstractSink;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import org.kitesdk.morphline.api.Command;
+
+/**
+ * Flume sink that extracts search documents from Flume events and processes them using a morphline
+ * {@link Command} chain.
+ */
+public class MorphlineSink extends AbstractSink implements Configurable {
+
+ private int maxBatchSize = 1000;
+ private long maxBatchDurationMillis = 1000;
+ private String handlerClass;
+ private MorphlineHandler handler;
+ private Context context;
+ private SinkCounter sinkCounter;
+
+ public static final String BATCH_SIZE = "batchSize";
+ public static final String BATCH_DURATION_MILLIS = "batchDurationMillis";
+ public static final String HANDLER_CLASS = "handlerClass";
+
+ private static final Logger LOGGER = LoggerFactory.getLogger(MorphlineSink.class);
+
+ public MorphlineSink() {
+ this(null);
+ }
+
+ /** For testing only */
+ protected MorphlineSink(MorphlineHandler handler) {
+ this.handler = handler;
+ }
+
+ @Override
+ public void configure(Context context) {
+ this.context = context;
+ maxBatchSize = context.getInteger(BATCH_SIZE, maxBatchSize);
+ maxBatchDurationMillis = context.getLong(BATCH_DURATION_MILLIS, maxBatchDurationMillis);
+ handlerClass = context.getString(HANDLER_CLASS, MorphlineHandlerImpl.class.getName());
+ if (sinkCounter == null) {
+ sinkCounter = new SinkCounter(getName());
+ }
+ }
+
+ /**
+ * Returns the maximum number of events to take per flume transaction;
+ * override to customize
+ */
+ private int getMaxBatchSize() {
+ return maxBatchSize;
+ }
+
+ /** Returns the maximum duration per flume transaction; override to customize */
+ private long getMaxBatchDurationMillis() {
+ return maxBatchDurationMillis;
+ }
+
+ @Override
+ public synchronized void start() {
+ LOGGER.info("Starting Morphline Sink {} ...", this);
+ sinkCounter.start();
+ if (handler == null) {
+ MorphlineHandler tmpHandler;
+ try {
+ tmpHandler = (MorphlineHandler) Class.forName(handlerClass).newInstance();
+ } catch (Exception e) {
+ throw new ConfigurationException(e);
+ }
+ tmpHandler.configure(context);
+ handler = tmpHandler;
+ }
+ super.start();
+ LOGGER.info("Morphline Sink {} started.", getName());
+ }
+
+ @Override
+ public synchronized void stop() {
+ LOGGER.info("Morphline Sink {} stopping...", getName());
+ try {
+ if (handler != null) {
+ handler.stop();
+ }
+ sinkCounter.stop();
+ LOGGER.info("Morphline Sink {} stopped. Metrics: {}, {}", getName(), sinkCounter);
+ } finally {
+ super.stop();
+ }
+ }
+
+ @Override
+ public Status process() throws EventDeliveryException {
+ int batchSize = getMaxBatchSize();
+ long batchEndTime = System.currentTimeMillis() + getMaxBatchDurationMillis();
+ Channel myChannel = getChannel();
+ Transaction txn = myChannel.getTransaction();
+ txn.begin();
+ boolean isMorphlineTransactionCommitted = true;
+ try {
+ int numEventsTaken = 0;
+ handler.beginTransaction();
+ isMorphlineTransactionCommitted = false;
+
+ // repeatedly take and process events from the Flume queue
+ for (int i = 0; i < batchSize; i++) {
+ Event event = myChannel.take();
+ if (event == null) {
+ break;
+ }
+ sinkCounter.incrementEventDrainAttemptCount();
+ numEventsTaken++;
+ if (LOGGER.isTraceEnabled() && LogPrivacyUtil.allowLogRawData()) {
+ LOGGER.trace("Flume event arrived {}", event);
+ }
+
+ //StreamEvent streamEvent = createStreamEvent(event);
+ handler.process(event);
+ if (System.currentTimeMillis() >= batchEndTime) {
+ break;
+ }
+ }
+
+ // update metrics
+ if (numEventsTaken == 0) {
+ sinkCounter.incrementBatchEmptyCount();
+ }
+ if (numEventsTaken < batchSize) {
+ sinkCounter.incrementBatchUnderflowCount();
+ } else {
+ sinkCounter.incrementBatchCompleteCount();
+ }
+ handler.commitTransaction();
+ isMorphlineTransactionCommitted = true;
+ txn.commit();
+ sinkCounter.addToEventDrainSuccessCount(numEventsTaken);
+ return numEventsTaken == 0 ? Status.BACKOFF : Status.READY;
+ } catch (Throwable t) {
+ // Ooops - need to rollback and back off
+ LOGGER.error("Morphline Sink " + getName() + ": Unable to process event from channel " +
+ myChannel.getName() + ". Exception follows.", t);
+ try {
+ if (!isMorphlineTransactionCommitted) {
+ handler.rollbackTransaction();
+ }
+ } catch (Throwable t2) {
+ LOGGER.error("Morphline Sink " + getName() +
+ ": Unable to rollback morphline transaction. Exception follows.", t2);
+ } finally {
+ try {
+ txn.rollback();
+ } catch (Throwable t4) {
+ LOGGER.error("Morphline Sink " + getName() + ": Unable to rollback Flume transaction. " +
+ "Exception follows.", t4);
+ }
+ }
+
+ if (t instanceof Error) {
+ throw (Error) t; // rethrow original exception
+ } else if (t instanceof ChannelException) {
+ return Status.BACKOFF;
+ } else {
+ throw new EventDeliveryException("Failed to send events", t); // rethrow and backoff
+ }
+ } finally {
+ txn.close();
+ }
+ }
+
+ @Override
+ public String toString() {
+ int i = getClass().getName().lastIndexOf('.') + 1;
+ String shortClassName = getClass().getName().substring(i);
+ return getName() + " (" + shortClassName + ")";
+ }
+
+}
diff --git a/code/flume-ng-sinks/flume-ng-morphline-solr-sink/src/main/java/org/apache/flume/sink/solr/morphline/MorphlineSolrSink.java b/code/flume-ng-sinks/flume-ng-morphline-solr-sink/src/main/java/org/apache/flume/sink/solr/morphline/MorphlineSolrSink.java
new file mode 100644
index 0000000..e403b10
--- /dev/null
+++ b/code/flume-ng-sinks/flume-ng-morphline-solr-sink/src/main/java/org/apache/flume/sink/solr/morphline/MorphlineSolrSink.java
@@ -0,0 +1,49 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.flume.sink.solr.morphline;
+
+import org.apache.flume.Context;
+
+import org.kitesdk.morphline.api.Command;
+import org.kitesdk.morphline.base.FaultTolerance;
+
+
+/**
+ * Flume sink that extracts search documents from Flume events, processes them using a morphline
+ * {@link Command} chain, and loads them into Apache Solr.
+ */
+public class MorphlineSolrSink extends MorphlineSink {
+
+ public MorphlineSolrSink() {
+ super();
+ }
+
+ /** For testing only */
+ protected MorphlineSolrSink(MorphlineHandler handler) {
+ super(handler);
+ }
+
+ @Override
+ public void configure(Context context) {
+ if (context.getString(FaultTolerance.RECOVERABLE_EXCEPTION_CLASSES) == null) {
+ context.put(FaultTolerance.RECOVERABLE_EXCEPTION_CLASSES,
+ "org.apache.solr.client.solrj.SolrServerException");
+ }
+ super.configure(context);
+ }
+
+}
diff --git a/code/flume-ng-sinks/flume-ng-morphline-solr-sink/src/main/java/org/apache/flume/sink/solr/morphline/UUIDInterceptor.java b/code/flume-ng-sinks/flume-ng-morphline-solr-sink/src/main/java/org/apache/flume/sink/solr/morphline/UUIDInterceptor.java
new file mode 100644
index 0000000..22d5347
--- /dev/null
+++ b/code/flume-ng-sinks/flume-ng-morphline-solr-sink/src/main/java/org/apache/flume/sink/solr/morphline/UUIDInterceptor.java
@@ -0,0 +1,115 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.flume.sink.solr.morphline;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Map;
+import java.util.UUID;
+
+import org.apache.flume.Context;
+import org.apache.flume.Event;
+import org.apache.flume.interceptor.Interceptor;
+
+/**
+ * Flume Interceptor that sets a universally unique identifier on all events
+ * that are intercepted. By default this event header is named "id".
+ */
+public class UUIDInterceptor implements Interceptor {
+
+ private String headerName;
+ private boolean preserveExisting;
+ private String prefix;
+
+ public static final String HEADER_NAME = "headerName";
+ public static final String PRESERVE_EXISTING_NAME = "preserveExisting";
+ public static final String PREFIX_NAME = "prefix";
+
+ protected UUIDInterceptor(Context context) {
+ headerName = context.getString(HEADER_NAME, "id");
+ preserveExisting = context.getBoolean(PRESERVE_EXISTING_NAME, true);
+ prefix = context.getString(PREFIX_NAME, "");
+ }
+
+ @Override
+ public void initialize() {
+ }
+
+ protected String getPrefix() {
+ return prefix;
+ }
+
+ protected String generateUUID() {
+ return getPrefix() + UUID.randomUUID().toString();
+ }
+
+ protected boolean isMatch(Event event) {
+ return true;
+ }
+
+ @Override
+ public Event intercept(Event event) {
+ Map headers = event.getHeaders();
+ if (preserveExisting && headers.containsKey(headerName)) {
+ // we must preserve the existing id
+ } else if (isMatch(event)) {
+ headers.put(headerName, generateUUID());
+ }
+ return event;
+ }
+
+ @Override
+ public List intercept(List events) {
+ List results = new ArrayList(events.size());
+ for (Event event : events) {
+ event = intercept(event);
+ if (event != null) {
+ results.add(event);
+ }
+ }
+ return results;
+ }
+
+ @Override
+ public void close() {
+ }
+
+
+ ///////////////////////////////////////////////////////////////////////////////
+ // Nested classes:
+ ///////////////////////////////////////////////////////////////////////////////
+ /** Builder implementations MUST have a public no-arg constructor */
+ public static class Builder implements Interceptor.Builder {
+
+ private Context context;
+
+ public Builder() {
+ }
+
+ @Override
+ public UUIDInterceptor build() {
+ return new UUIDInterceptor(context);
+ }
+
+ @Override
+ public void configure(Context context) {
+ this.context = context;
+ }
+
+ }
+
+}
diff --git a/code/flume-ng-sinks/flume-ng-morphline-solr-sink/src/test/java/org/apache/flume/sink/solr/morphline/EmbeddedSource.java b/code/flume-ng-sinks/flume-ng-morphline-solr-sink/src/test/java/org/apache/flume/sink/solr/morphline/EmbeddedSource.java
new file mode 100644
index 0000000..b30fece
--- /dev/null
+++ b/code/flume-ng-sinks/flume-ng-morphline-solr-sink/src/test/java/org/apache/flume/sink/solr/morphline/EmbeddedSource.java
@@ -0,0 +1,45 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.flume.sink.solr.morphline;
+
+import java.util.List;
+
+import org.apache.flume.Event;
+import org.apache.flume.EventDeliveryException;
+import org.apache.flume.EventDrivenSource;
+import org.apache.flume.Sink;
+import org.apache.flume.source.AbstractSource;
+
+class EmbeddedSource extends AbstractSource implements EventDrivenSource {
+
+ private Sink sink;
+
+ public EmbeddedSource(Sink sink) {
+ this.sink = sink;
+ }
+
+ public void load(Event event) throws EventDeliveryException {
+ getChannelProcessor().processEvent(event);
+ sink.process();
+ }
+
+ public void load(List events) throws EventDeliveryException {
+ getChannelProcessor().processEventBatch(events);
+ sink.process();
+ }
+
+}
diff --git a/code/flume-ng-sinks/flume-ng-morphline-solr-sink/src/test/java/org/apache/flume/sink/solr/morphline/FlumeHttpServletRequestWrapper.java b/code/flume-ng-sinks/flume-ng-morphline-solr-sink/src/test/java/org/apache/flume/sink/solr/morphline/FlumeHttpServletRequestWrapper.java
new file mode 100644
index 0000000..9711a3a
--- /dev/null
+++ b/code/flume-ng-sinks/flume-ng-morphline-solr-sink/src/test/java/org/apache/flume/sink/solr/morphline/FlumeHttpServletRequestWrapper.java
@@ -0,0 +1,321 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.flume.sink.solr.morphline;
+
+import java.io.BufferedReader;
+import java.io.ByteArrayInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.UnsupportedEncodingException;
+import java.security.Principal;
+import java.util.Collections;
+import java.util.Enumeration;
+import java.util.Locale;
+import java.util.Map;
+
+import javax.servlet.RequestDispatcher;
+import javax.servlet.ServletInputStream;
+import javax.servlet.http.Cookie;
+import javax.servlet.http.HttpServletRequest;
+import javax.servlet.http.HttpSession;
+
+class FlumeHttpServletRequestWrapper implements HttpServletRequest {
+
+ private ServletInputStream stream;
+ private String charset;
+
+ public FlumeHttpServletRequestWrapper(final byte[] data) {
+ stream = new ServletInputStream() {
+ private final InputStream in = new ByteArrayInputStream(data);
+ @Override
+ public int read() throws IOException {
+ return in.read();
+ }
+ };
+ }
+
+ @Override
+ public String getAuthType() {
+ throw new UnsupportedOperationException("Not supported yet.");
+ }
+
+ @Override
+ public Cookie[] getCookies() {
+ throw new UnsupportedOperationException("Not supported yet.");
+ }
+
+ @Override
+ public long getDateHeader(String name) {
+ throw new UnsupportedOperationException("Not supported yet.");
+ }
+
+ @Override
+ public String getHeader(String name) {
+ throw new UnsupportedOperationException("Not supported yet.");
+ }
+
+ @Override
+ public Enumeration getHeaders(String name) {
+ throw new UnsupportedOperationException("Not supported yet.");
+ }
+
+ @Override
+ public Enumeration getHeaderNames() {
+ return Collections.enumeration(Collections.EMPTY_LIST);
+ }
+
+ @Override
+ public int getIntHeader(String name) {
+ throw new UnsupportedOperationException("Not supported yet.");
+ }
+
+ @Override
+ public String getMethod() {
+ throw new UnsupportedOperationException("Not supported yet.");
+ }
+
+ @Override
+ public String getPathInfo() {
+ throw new UnsupportedOperationException("Not supported yet.");
+ }
+
+ @Override
+ public String getPathTranslated() {
+ throw new UnsupportedOperationException("Not supported yet.");
+ }
+
+ @Override
+ public String getContextPath() {
+ throw new UnsupportedOperationException("Not supported yet.");
+ }
+
+ @Override
+ public String getQueryString() {
+ throw new UnsupportedOperationException("Not supported yet.");
+ }
+
+ @Override
+ public String getRemoteUser() {
+ throw new UnsupportedOperationException("Not supported yet.");
+ }
+
+ @Override
+ public boolean isUserInRole(String role) {
+ throw new UnsupportedOperationException("Not supported yet.");
+ }
+
+ @Override
+ public Principal getUserPrincipal() {
+ throw new UnsupportedOperationException("Not supported yet.");
+ }
+
+ @Override
+ public String getRequestedSessionId() {
+ throw new UnsupportedOperationException("Not supported yet.");
+ }
+
+ @Override
+ public String getRequestURI() {
+ throw new UnsupportedOperationException("Not supported yet.");
+ }
+
+ @Override
+ public StringBuffer getRequestURL() {
+ throw new UnsupportedOperationException("Not supported yet.");
+ }
+
+ @Override
+ public String getServletPath() {
+ throw new UnsupportedOperationException("Not supported yet.");
+ }
+
+ @Override
+ public HttpSession getSession(boolean create) {
+ throw new UnsupportedOperationException("Not supported yet.");
+ }
+
+ @Override
+ public HttpSession getSession() {
+ throw new UnsupportedOperationException("Not supported yet.");
+ }
+
+ @Override
+ public boolean isRequestedSessionIdValid() {
+ throw new UnsupportedOperationException("Not supported yet.");
+ }
+
+ @Override
+ public boolean isRequestedSessionIdFromCookie() {
+ throw new UnsupportedOperationException("Not supported yet.");
+ }
+
+ @Override
+ public boolean isRequestedSessionIdFromURL() {
+ throw new UnsupportedOperationException("Not supported yet.");
+ }
+
+ @Override
+ public boolean isRequestedSessionIdFromUrl() {
+ throw new UnsupportedOperationException("Not supported yet.");
+ }
+
+ @Override
+ public Object getAttribute(String name) {
+ throw new UnsupportedOperationException("Not supported yet.");
+ }
+
+ @Override
+ public Enumeration getAttributeNames() {
+ throw new UnsupportedOperationException("Not supported yet.");
+ }
+
+ @Override
+ public String getCharacterEncoding() {
+ return charset;
+ }
+
+ @Override
+ public void setCharacterEncoding(String env) throws UnsupportedEncodingException {
+ this.charset = env;
+ }
+
+ @Override
+ public int getContentLength() {
+ throw new UnsupportedOperationException("Not supported yet.");
+ }
+
+ @Override
+ public String getContentType() {
+ return null;
+ }
+
+ @Override
+ public ServletInputStream getInputStream() throws IOException {
+ return stream;
+ }
+
+ @Override
+ public String getParameter(String name) {
+ throw new UnsupportedOperationException("Not supported yet.");
+ }
+
+ @Override
+ public Enumeration getParameterNames() {
+ return Collections.enumeration(Collections.EMPTY_LIST);
+ }
+
+ @Override
+ public String[] getParameterValues(String name) {
+ throw new UnsupportedOperationException("Not supported yet.");
+ }
+
+ @Override
+ public Map getParameterMap() {
+ throw new UnsupportedOperationException("Not supported yet.");
+ }
+
+ @Override
+ public String getProtocol() {
+ throw new UnsupportedOperationException("Not supported yet.");
+ }
+
+ @Override
+ public String getScheme() {
+ throw new UnsupportedOperationException("Not supported yet.");
+ }
+
+ @Override
+ public String getServerName() {
+ throw new UnsupportedOperationException("Not supported yet.");
+ }
+
+ @Override
+ public int getServerPort() {
+ throw new UnsupportedOperationException("Not supported yet.");
+ }
+
+ @Override
+ public BufferedReader getReader() throws IOException {
+ throw new UnsupportedOperationException("Not supported yet.");
+ }
+
+ @Override
+ public String getRemoteAddr() {
+ throw new UnsupportedOperationException("Not supported yet.");
+ }
+
+ @Override
+ public String getRemoteHost() {
+ throw new UnsupportedOperationException("Not supported yet.");
+ }
+
+ @Override
+ public void setAttribute(String name, Object o) {
+ throw new UnsupportedOperationException("Not supported yet.");
+ }
+
+ @Override
+ public void removeAttribute(String name) {
+ throw new UnsupportedOperationException("Not supported yet.");
+ }
+
+ @Override
+ public Locale getLocale() {
+ throw new UnsupportedOperationException("Not supported yet.");
+ }
+
+ @Override
+ public Enumeration getLocales() {
+ throw new UnsupportedOperationException("Not supported yet.");
+ }
+
+ @Override
+ public boolean isSecure() {
+ throw new UnsupportedOperationException("Not supported yet.");
+ }
+
+ @Override
+ public RequestDispatcher getRequestDispatcher(String path) {
+ throw new UnsupportedOperationException("Not supported yet.");
+ }
+
+ @Override
+ public String getRealPath(String path) {
+ throw new UnsupportedOperationException("Not supported yet.");
+ }
+
+ @Override
+ public int getRemotePort() {
+ throw new UnsupportedOperationException("Not supported yet.");
+ }
+
+ @Override
+ public String getLocalName() {
+ throw new UnsupportedOperationException("Not supported yet.");
+ }
+
+ @Override
+ public String getLocalAddr() {
+ throw new UnsupportedOperationException("Not supported yet.");
+ }
+
+ @Override
+ public int getLocalPort() {
+ throw new UnsupportedOperationException("Not supported yet.");
+ }
+}
diff --git a/code/flume-ng-sinks/flume-ng-morphline-solr-sink/src/test/java/org/apache/flume/sink/solr/morphline/ResettableTestStringInputStream.java b/code/flume-ng-sinks/flume-ng-morphline-solr-sink/src/test/java/org/apache/flume/sink/solr/morphline/ResettableTestStringInputStream.java
new file mode 100644
index 0000000..e6ee9b9
--- /dev/null
+++ b/code/flume-ng-sinks/flume-ng-morphline-solr-sink/src/test/java/org/apache/flume/sink/solr/morphline/ResettableTestStringInputStream.java
@@ -0,0 +1,89 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.flume.sink.solr.morphline;
+
+import java.io.IOException;
+
+import org.apache.flume.serialization.ResettableInputStream;
+
+class ResettableTestStringInputStream extends ResettableInputStream {
+
+ private String str;
+ private int markPos = 0;
+ private int curPos = 0;
+
+ /**
+ * Warning: This test class does not handle character/byte conversion at all!
+ * @param str String to use for testing
+ */
+ public ResettableTestStringInputStream(String str) {
+ this.str = str;
+ }
+
+ @Override
+ public int readChar() throws IOException {
+ throw new UnsupportedOperationException("This test class doesn't return " +
+ "strings!");
+ }
+
+ @Override
+ public void mark() throws IOException {
+ markPos = curPos;
+ }
+
+ @Override
+ public void reset() throws IOException {
+ curPos = markPos;
+ }
+
+ @Override
+ public void seek(long position) throws IOException {
+ throw new UnsupportedOperationException("Unimplemented in test class");
+ }
+
+ @Override
+ public long tell() throws IOException {
+ throw new UnsupportedOperationException("Unimplemented in test class");
+ }
+
+ @Override
+ public int read() throws IOException {
+ if (curPos >= str.length()) {
+ return -1;
+ }
+ return str.charAt(curPos++);
+ }
+
+ @Override
+ public int read(byte[] b, int off, int len) throws IOException {
+ if (curPos >= str.length()) {
+ return -1;
+ }
+ int n = 0;
+ while (len > 0 && curPos < str.length()) {
+ b[off++] = (byte) str.charAt(curPos++);
+ n++;
+ len--;
+ }
+ return n;
+ }
+
+ @Override
+ public void close() throws IOException {
+ // no-op
+ }
+}
diff --git a/code/flume-ng-sinks/flume-ng-morphline-solr-sink/src/test/java/org/apache/flume/sink/solr/morphline/TestBlobDeserializer.java b/code/flume-ng-sinks/flume-ng-morphline-solr-sink/src/test/java/org/apache/flume/sink/solr/morphline/TestBlobDeserializer.java
new file mode 100644
index 0000000..be377ba
--- /dev/null
+++ b/code/flume-ng-sinks/flume-ng-morphline-solr-sink/src/test/java/org/apache/flume/sink/solr/morphline/TestBlobDeserializer.java
@@ -0,0 +1,122 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.flume.sink.solr.morphline;
+
+import com.google.common.base.Charsets;
+import org.apache.flume.Context;
+import org.apache.flume.Event;
+import org.apache.flume.serialization.EventDeserializer;
+import org.apache.flume.serialization.EventDeserializerFactory;
+import org.apache.flume.serialization.ResettableInputStream;
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.Test;
+
+import java.io.IOException;
+import java.util.List;
+
+public class TestBlobDeserializer extends Assert {
+
+ private String mini;
+
+ @Before
+ public void setup() {
+ StringBuilder sb = new StringBuilder();
+ sb.append("line 1\n");
+ sb.append("line 2\n");
+ mini = sb.toString();
+ }
+
+ @Test
+ public void testSimple() throws IOException {
+ ResettableInputStream in = new ResettableTestStringInputStream(mini);
+ EventDeserializer des = new BlobDeserializer(new Context(), in);
+ validateMiniParse(des);
+ }
+
+ @Test
+ public void testSimpleViaBuilder() throws IOException {
+ ResettableInputStream in = new ResettableTestStringInputStream(mini);
+ EventDeserializer.Builder builder = new BlobDeserializer.Builder();
+ EventDeserializer des = builder.build(new Context(), in);
+ validateMiniParse(des);
+ }
+
+ @Test
+ public void testSimpleViaFactory() throws IOException {
+ ResettableInputStream in = new ResettableTestStringInputStream(mini);
+ EventDeserializer des;
+ des = EventDeserializerFactory.getInstance(BlobDeserializer.Builder.class.getName(),
+ new Context(), in);
+ validateMiniParse(des);
+ }
+
+ @Test
+ public void testBatch() throws IOException {
+ ResettableInputStream in = new ResettableTestStringInputStream(mini);
+ EventDeserializer des = new BlobDeserializer(new Context(), in);
+ List events;
+
+ events = des.readEvents(10); // try to read more than we should have
+ assertEquals(1, events.size());
+ assertEventBodyEquals(mini, events.get(0));
+
+ des.mark();
+ des.close();
+ }
+
+ // truncation occurs at maxLineLength boundaries
+ @Test
+ public void testMaxLineLength() throws IOException {
+ String longLine = "abcdefghijklmnopqrstuvwxyz\n";
+ Context ctx = new Context();
+ ctx.put(BlobDeserializer.MAX_BLOB_LENGTH_KEY, "10");
+
+ ResettableInputStream in = new ResettableTestStringInputStream(longLine);
+ EventDeserializer des = new BlobDeserializer(ctx, in);
+
+ assertEventBodyEquals("abcdefghij", des.readEvent());
+ assertEventBodyEquals("klmnopqrst", des.readEvent());
+ assertEventBodyEquals("uvwxyz\n", des.readEvent());
+ assertNull(des.readEvent());
+ }
+
+ private void assertEventBodyEquals(String expected, Event event) {
+ String bodyStr = new String(event.getBody(), Charsets.UTF_8);
+ assertEquals(expected, bodyStr);
+ }
+
+ private void validateMiniParse(EventDeserializer des) throws IOException {
+ Event evt;
+
+ des.mark();
+ evt = des.readEvent();
+ assertEquals(new String(evt.getBody()), mini);
+ des.reset(); // reset!
+
+ evt = des.readEvent();
+ assertEquals("data should be repeated, " +
+ "because we reset() the stream", new String(evt.getBody()), mini);
+
+ evt = des.readEvent();
+ assertNull("Event should be null because there are no lines " +
+ "left to read", evt);
+
+ des.mark();
+ des.close();
+ }
+}
diff --git a/code/flume-ng-sinks/flume-ng-morphline-solr-sink/src/test/java/org/apache/flume/sink/solr/morphline/TestBlobHandler.java b/code/flume-ng-sinks/flume-ng-morphline-solr-sink/src/test/java/org/apache/flume/sink/solr/morphline/TestBlobHandler.java
new file mode 100644
index 0000000..3e7de99
--- /dev/null
+++ b/code/flume-ng-sinks/flume-ng-morphline-solr-sink/src/test/java/org/apache/flume/sink/solr/morphline/TestBlobHandler.java
@@ -0,0 +1,60 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.flume.sink.solr.morphline;
+
+import java.util.List;
+
+import javax.servlet.http.HttpServletRequest;
+
+import org.apache.flume.Event;
+import org.apache.flume.source.http.HTTPSourceHandler;
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.Test;
+
+public class TestBlobHandler extends Assert {
+
+ private HTTPSourceHandler handler;
+
+ @Before
+ public void setUp() {
+ handler = new BlobHandler();
+ }
+
+ @Test
+ public void testSingleEvent() throws Exception {
+ byte[] json = "foo".getBytes("UTF-8");
+ HttpServletRequest req = new FlumeHttpServletRequestWrapper(json);
+ List deserialized = handler.getEvents(req);
+ assertEquals(1, deserialized.size());
+ Event e = deserialized.get(0);
+ assertEquals(0, e.getHeaders().size());
+ assertEquals("foo", new String(e.getBody(),"UTF-8"));
+ }
+
+ @Test
+ public void testEmptyEvent() throws Exception {
+ byte[] json = "".getBytes("UTF-8");
+ HttpServletRequest req = new FlumeHttpServletRequestWrapper(json);
+ List deserialized = handler.getEvents(req);
+ assertEquals(1, deserialized.size());
+ Event e = deserialized.get(0);
+ assertEquals(0, e.getHeaders().size());
+ assertEquals("", new String(e.getBody(),"UTF-8"));
+ }
+
+}
diff --git a/code/flume-ng-sinks/flume-ng-morphline-solr-sink/src/test/java/org/apache/flume/sink/solr/morphline/TestEnvironment.java b/code/flume-ng-sinks/flume-ng-morphline-solr-sink/src/test/java/org/apache/flume/sink/solr/morphline/TestEnvironment.java
new file mode 100644
index 0000000..933a6b1
--- /dev/null
+++ b/code/flume-ng-sinks/flume-ng-morphline-solr-sink/src/test/java/org/apache/flume/sink/solr/morphline/TestEnvironment.java
@@ -0,0 +1,33 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.flume.sink.solr.morphline;
+
+import java.net.UnknownHostException;
+
+import org.junit.Test;
+
+import org.kitesdk.morphline.solr.EnvironmentTest;
+
+/** Print and verify some info about the environment in which the unit tests are running */
+public class TestEnvironment extends EnvironmentTest {
+
+ @Test
+ public void testEnvironment() throws UnknownHostException {
+ super.testEnvironment();
+ }
+
+}
diff --git a/code/flume-ng-sinks/flume-ng-morphline-solr-sink/src/test/java/org/apache/flume/sink/solr/morphline/TestMorphlineInterceptor.java b/code/flume-ng-sinks/flume-ng-morphline-solr-sink/src/test/java/org/apache/flume/sink/solr/morphline/TestMorphlineInterceptor.java
new file mode 100644
index 0000000..8d62d38
--- /dev/null
+++ b/code/flume-ng-sinks/flume-ng-morphline-solr-sink/src/test/java/org/apache/flume/sink/solr/morphline/TestMorphlineInterceptor.java
@@ -0,0 +1,167 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.flume.sink.solr.morphline;
+
+import com.google.common.base.Charsets;
+import com.google.common.collect.ImmutableMap;
+import com.google.common.io.Files;
+import org.apache.flume.Context;
+import org.apache.flume.Event;
+import org.apache.flume.event.EventBuilder;
+import org.junit.Assert;
+import org.junit.Test;
+import org.kitesdk.morphline.base.Fields;
+
+import java.io.File;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+public class TestMorphlineInterceptor extends Assert {
+
+ private static final String RESOURCES_DIR = "target/test-classes";
+
+ @Test
+ public void testNoOperation() throws Exception {
+ Context context = new Context();
+ context.put(MorphlineHandlerImpl.MORPHLINE_FILE_PARAM,
+ RESOURCES_DIR + "/test-morphlines/noOperation.conf");
+ Event input = EventBuilder.withBody("foo", Charsets.UTF_8);
+ input.getHeaders().put("name", "nadja");
+ MorphlineInterceptor interceptor = build(context);
+ Event actual = interceptor.intercept(input);
+ interceptor.close();
+ Event expected = EventBuilder.withBody("foo".getBytes("UTF-8"),
+ ImmutableMap.of("name", "nadja"));
+ assertEqualsEvent(expected, actual);
+
+ List actualList = build(context).intercept(Collections.singletonList(input));
+ List expectedList = Collections.singletonList(expected);
+ assertEqualsEventList(expectedList, actualList);
+ }
+
+ @Test
+ public void testReadClob() throws Exception {
+ Context context = new Context();
+ context.put(MorphlineHandlerImpl.MORPHLINE_FILE_PARAM,
+ RESOURCES_DIR + "/test-morphlines/readClob.conf");
+ Event input = EventBuilder.withBody("foo", Charsets.UTF_8);
+ input.getHeaders().put("name", "nadja");
+ Event actual = build(context).intercept(input);
+ Event expected = EventBuilder.withBody(null,
+ ImmutableMap.of("name", "nadja", Fields.MESSAGE, "foo"));
+ assertEqualsEvent(expected, actual);
+
+ List actualList = build(context).intercept(Collections.singletonList(input));
+ List expectedList = Collections.singletonList(expected);
+ assertEqualsEventList(expectedList, actualList);
+ }
+
+ @Test
+ public void testGrokIfNotMatchDropEventRetain() throws Exception {
+ Context context = new Context();
+ context.put(MorphlineHandlerImpl.MORPHLINE_FILE_PARAM,
+ RESOURCES_DIR + "/test-morphlines/grokIfNotMatchDropRecord.conf");
+
+ String msg = "<164>Feb 4 10:46:14 syslog sshd[607]: Server listening on 0.0.0.0 port 22.";
+ Event input = EventBuilder.withBody(null, ImmutableMap.of(Fields.MESSAGE, msg));
+ Event actual = build(context).intercept(input);
+
+ Map expected = new HashMap();
+ expected.put(Fields.MESSAGE, msg);
+ expected.put("syslog_pri", "164");
+ expected.put("syslog_timestamp", "Feb 4 10:46:14");
+ expected.put("syslog_hostname", "syslog");
+ expected.put("syslog_program", "sshd");
+ expected.put("syslog_pid", "607");
+ expected.put("syslog_message", "Server listening on 0.0.0.0 port 22.");
+ Event expectedEvent = EventBuilder.withBody(null, expected);
+ assertEqualsEvent(expectedEvent, actual);
+ }
+
+ @Test
+ /* leading XXXXX does not match regex, thus we expect the event to be dropped */
+ public void testGrokIfNotMatchDropEventDrop() throws Exception {
+ Context context = new Context();
+ context.put(MorphlineHandlerImpl.MORPHLINE_FILE_PARAM,
+ RESOURCES_DIR + "/test-morphlines/grokIfNotMatchDropRecord.conf");
+ String msg = "Feb 4 10:46:14 syslog sshd[607]: Server listening on 0.0.0.0" +
+ " port 22.";
+ Event input = EventBuilder.withBody(null, ImmutableMap.of(Fields.MESSAGE, msg));
+ Event actual = build(context).intercept(input);
+ assertNull(actual);
+ }
+
+ @Test
+ /** morphline says route to southpole if it's an avro file, otherwise route to northpole */
+ public void testIfDetectMimeTypeRouteToSouthPole() throws Exception {
+ Context context = new Context();
+ context.put(MorphlineHandlerImpl.MORPHLINE_FILE_PARAM,
+ RESOURCES_DIR + "/test-morphlines/ifDetectMimeType.conf");
+ context.put(MorphlineHandlerImpl.MORPHLINE_VARIABLE_PARAM + ".MY.MIME_TYPE", "avro/binary");
+
+ Event input = EventBuilder.withBody(Files.toByteArray(
+ new File(RESOURCES_DIR + "/test-documents/sample-statuses-20120906-141433.avro")));
+ Event actual = build(context).intercept(input);
+
+ Map expected = new HashMap();
+ expected.put(Fields.ATTACHMENT_MIME_TYPE, "avro/binary");
+ expected.put("flume.selector.header", "goToSouthPole");
+ Event expectedEvent = EventBuilder.withBody(input.getBody(), expected);
+ assertEqualsEvent(expectedEvent, actual);
+ }
+
+ @Test
+ /** morphline says route to southpole if it's an avro file, otherwise route to northpole */
+ public void testIfDetectMimeTypeRouteToNorthPole() throws Exception {
+ Context context = new Context();
+ context.put(MorphlineHandlerImpl.MORPHLINE_FILE_PARAM,
+ RESOURCES_DIR + "/test-morphlines/ifDetectMimeType.conf");
+ context.put(MorphlineHandlerImpl.MORPHLINE_VARIABLE_PARAM + ".MY.MIME_TYPE", "avro/binary");
+
+ Event input = EventBuilder.withBody(
+ Files.toByteArray(new File(RESOURCES_DIR + "/test-documents/testPDF.pdf")));
+ Event actual = build(context).intercept(input);
+
+ Map expected = new HashMap();
+ expected.put(Fields.ATTACHMENT_MIME_TYPE, "application/pdf");
+ expected.put("flume.selector.header", "goToNorthPole");
+ Event expectedEvent = EventBuilder.withBody(input.getBody(), expected);
+ assertEqualsEvent(expectedEvent, actual);
+ }
+
+ private MorphlineInterceptor build(Context context) {
+ MorphlineInterceptor.Builder builder = new MorphlineInterceptor.Builder();
+ builder.configure(context);
+ return builder.build();
+ }
+
+ // b/c SimpleEvent doesn't implement equals() method :-(
+ private void assertEqualsEvent(Event x, Event y) {
+ assertEquals(x.getHeaders(), y.getHeaders());
+ assertArrayEquals(x.getBody(), y.getBody());
+ }
+
+ private void assertEqualsEventList(List x, List y) {
+ assertEquals(x.size(), y.size());
+ for (int i = 0; i < x.size(); i++) {
+ assertEqualsEvent(x.get(i), y.get(i));
+ }
+ }
+
+}
diff --git a/code/flume-ng-sinks/flume-ng-morphline-solr-sink/src/test/java/org/apache/flume/sink/solr/morphline/TestMorphlineSolrSink.java b/code/flume-ng-sinks/flume-ng-morphline-solr-sink/src/test/java/org/apache/flume/sink/solr/morphline/TestMorphlineSolrSink.java
new file mode 100644
index 0000000..1bfae95
--- /dev/null
+++ b/code/flume-ng-sinks/flume-ng-morphline-solr-sink/src/test/java/org/apache/flume/sink/solr/morphline/TestMorphlineSolrSink.java
@@ -0,0 +1,431 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.flume.sink.solr.morphline;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+import java.util.UUID;
+import java.util.concurrent.atomic.AtomicInteger;
+
+import org.apache.flume.Channel;
+import org.apache.flume.ChannelSelector;
+import org.apache.flume.Context;
+import org.apache.flume.Event;
+import org.apache.flume.EventDeliveryException;
+import org.apache.flume.channel.ChannelProcessor;
+import org.apache.flume.channel.MemoryChannel;
+import org.apache.flume.channel.ReplicatingChannelSelector;
+import org.apache.flume.conf.Configurables;
+import org.apache.flume.event.EventBuilder;
+import org.apache.solr.SolrTestCaseJ4;
+import org.apache.solr.client.solrj.SolrQuery;
+import org.apache.solr.client.solrj.SolrServer;
+import org.apache.solr.client.solrj.SolrServerException;
+import org.apache.solr.client.solrj.response.QueryResponse;
+import org.apache.solr.common.SolrDocument;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.BeforeClass;
+import org.junit.Test;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import org.kitesdk.morphline.api.MorphlineContext;
+import org.kitesdk.morphline.api.Record;
+import org.kitesdk.morphline.base.FaultTolerance;
+import org.kitesdk.morphline.base.Fields;
+import org.kitesdk.morphline.solr.DocumentLoader;
+import org.kitesdk.morphline.solr.SolrLocator;
+import org.kitesdk.morphline.solr.SolrMorphlineContext;
+import org.kitesdk.morphline.solr.SolrServerDocumentLoader;
+import org.kitesdk.morphline.solr.TestEmbeddedSolrServer;
+import com.codahale.metrics.MetricRegistry;
+import com.google.common.base.Charsets;
+import com.google.common.collect.ImmutableListMultimap;
+import com.google.common.collect.ListMultimap;
+import com.google.common.io.Files;
+
+public class TestMorphlineSolrSink extends SolrTestCaseJ4 {
+
+ private EmbeddedSource source;
+ private SolrServer solrServer;
+ private MorphlineSink sink;
+ private Map