IT`~PrS#X_=rx&q*XQy!0fNqt=%u+Qd=%EvS2&&
zl&;%Vp?MM3YaNCOVgdI>s_yqM+uP@xc}w(`p)BQt16b1U!g;-OI|@;lUF9*rdP!eI$w#f`e?~;
zrryC77r)J?krMt&OKe;+wIzquHmIzqlO0ZKdgxrU-K(d2fy&p>C2CEDl7+n*+T;&B
z%qgGj7zm{sai`kqwY6Qqmj9jRse$;oCVxOz4EqsOFHq}!HaG!*r~F4yWnl#Ze&YOB
z;2z=ppY;Q>|APCG3NPUQ!}+HQ?y&u#@~im~tbYx51nZwgJZ%4kZlpQK;U541w!_tU
Kc=mD~-Tnr6JJJdO
literal 0
HcmV?d00001
diff --git a/code/DataProducer/out/production/DataProducer/META-INF/MANIFEST.MF b/code/DataProducer/out/production/DataProducer/META-INF/MANIFEST.MF
new file mode 100644
index 0000000..da86503
--- /dev/null
+++ b/code/DataProducer/out/production/DataProducer/META-INF/MANIFEST.MF
@@ -0,0 +1,3 @@
+Manifest-Version: 1.0
+Main-Class: ReadWrite
+
diff --git a/code/DataProducer/out/production/DataProducer/ReadWrite.class b/code/DataProducer/out/production/DataProducer/ReadWrite.class
new file mode 100644
index 0000000000000000000000000000000000000000..09a3f4e3b5a2d0ecf1470a517303057ef79fd2a2
GIT binary patch
literal 3235
zcmZuzdvF`&5&xZZ(n;sb10}X2Ck+Y3i7aHh2^fO%AaO`yCw>s@I1vuCvV3-w$dYZj6Ndlzx1G+^{q3D3%ds@0
z-S2ky+ud*X_wC)^{`dZ$0NjOlbbJM4L3AP|WLzgPHB1CCi4y^&b)3X04H+FlOliml
zkkiqCyoRsqSPo0bw2!X%Q(`)76f`^ZYdATUah&B+$;?R7O3QOusM;se(+V2;9%SI7;+PTCsbo5H)JPMr}~q5Cj$GxTq>vPJ|kIiYh$FoXYaIaDe$Fog{-YW>E)l9
zH@e2@Txaj#zQ)%o8z@6fr)A{y%TcMaMhPous548*sQFX
zoT5K(dYaMKRxN6@QXbQyjc1G|YpN#Qg^3B%GRLcW-8QH4Zm)c6
zv?2V7VmjXgdUI2Jcgsfi2i-;7%cX(9K3Kps*TT!^;L9#w!Z$y^GOFlEsBT(Ef`LUf1vg13#1q
z%}xY1_@<^vK2(P*>LnfFkq>kq60`FJY{hJGS44Zkz(u^O;YS8u!|MuGE*!}%Gf{tJ
zc>gv7Z{WwW;U|@(YA#=QA)O%#Xlmt7#|``xKNIq%g21lw&%hn4pAj@=7usYJVbmyXj@f5V?pqb8?W}f|w9CUn4ns)aYTLx59ovYb4
zDoBltb}#iVyZy<0-ptZ#d%e9&_LRZIk+Lq*yENeib->P5AnHqHa??EF3a4_;pdGf7
zDN5?)`;cT6UZ_?r7PgD7rjq3WAyVIIc@6MA*2(uO;6Xs<0apPT3FImi;-`!BZa#IA
zE9AcD9F!|ucyJG?-w}GSjcda-z`cm`DTwXZL5XZWNox;zbJV-__wYnjQtvE0SE0^8
z?YlxV10gi>9qD!4w$QOS_d}ybr_P|Z0Zmv|6RH_Iv5R*1@vp&iM8j^0%VFlm^}NSJ
z1gZR&(x>z*14>+-g+HVf5hx*;;7Rnp5*iYBgp4^X>n~w>A~uH=1KzuR?M19i#MO_`
z|BO#rja#l_RSByTVKvlPM3dMz$Nh9x!kWY!!l9NT);9WzSQq*dS7DW;i1iFK3vXDx
zid$#q-;EWqA@&glh0mZT?wi5hxG&6Iw^6*!6>kx7%oX2G@kUqNBI3rlMv37{eDOe7
z`|t{ja}KZIJR2Fr^PGAc@fO~}uglMu`GHcxhxl{(`5LS78s3EP%b0Wc$M^)FQhyD9
zb)MZ8{0dRptYqJAVHHDc#VST<q9@!>!0`~9J!x=+6x~k^P}^GbKquULinh&L-{w82+GUFg|Tv;cT(=dafKLdO_Z>S|ILZ0
zI)}&{+E}UnSm=%-Vljsl(LRH%1BCExGl&ZBAY>?8UzRn+Rac;w;TKj3B#ljJV*xiZ
zdMj_fj~96-ZxYP&-c{;
literal 0
HcmV?d00001
diff --git a/code/DataProducer/src/META-INF/MANIFEST.MF b/code/DataProducer/src/META-INF/MANIFEST.MF
new file mode 100644
index 0000000..da86503
--- /dev/null
+++ b/code/DataProducer/src/META-INF/MANIFEST.MF
@@ -0,0 +1,3 @@
+Manifest-Version: 1.0
+Main-Class: ReadWrite
+
diff --git a/code/DataProducer/src/ReadWrite.java b/code/DataProducer/src/ReadWrite.java
new file mode 100644
index 0000000..da7754a
--- /dev/null
+++ b/code/DataProducer/src/ReadWrite.java
@@ -0,0 +1,67 @@
+import java.io.*;
+public class ReadWrite {
+ static String readFileName;
+ static String writeFileName;
+ public static void main(String args[]){
+ readFileName = args[0];
+ writeFileName = args[1];
+ try {
+ // readInput();
+ readFileByLines(readFileName);
+ }catch(Exception e){
+ }
+ }
+
+ public static void readFileByLines(String fileName) {
+ FileInputStream fis = null;
+ InputStreamReader isr = null;
+ BufferedReader br = null;
+ String tempString = null;
+ try {
+ System.out.println("以行为单位读取文件内容,一次读一整行:");
+ fis = new FileInputStream(fileName);// FileInputStream
+ // 从文件系统中的某个文件中获取字节
+ isr = new InputStreamReader(fis,"GBK");
+ br = new BufferedReader(isr);
+ int count=0;
+ while ((tempString = br.readLine()) != null) {
+ count++;
+ // 显示行号
+ Thread.sleep(300);
+ String str = new String(tempString.getBytes("UTF8"),"GBK");
+ System.out.println("row:"+count+">>>>>>>>"+tempString);
+ method1(writeFileName,tempString);
+ //appendMethodA(writeFileName,tempString);
+ }
+ isr.close();
+ } catch (IOException e) {
+ e.printStackTrace();
+ } catch (InterruptedException e) {
+ e.printStackTrace();
+ } finally {
+ if (isr != null) {
+ try {
+ isr.close();
+ } catch (IOException e1) {
+ }
+ }
+ }
+ }
+ public static void method1(String file, String conent) {
+ BufferedWriter out = null;
+ try {
+ out = new BufferedWriter(new OutputStreamWriter(
+ new FileOutputStream(file, true)));
+ out.write("\n");
+ out.write(conent);
+ } catch (Exception e) {
+ e.printStackTrace();
+ } finally {
+ try {
+ out.close();
+ } catch (IOException e) {
+ e.printStackTrace();
+ }
+ }
+ }
+}
diff --git a/code/flume-ng-sinks/flume-dataset-sink/pom.xml b/code/flume-ng-sinks/flume-dataset-sink/pom.xml
new file mode 100644
index 0000000..1e8a07b
--- /dev/null
+++ b/code/flume-ng-sinks/flume-dataset-sink/pom.xml
@@ -0,0 +1,145 @@
+
+
+
+
+ 4.0.0
+
+
+ flume-ng-sinks
+ org.apache.flume
+ 1.7.0
+
+
+ org.apache.flume.flume-ng-sinks
+ flume-dataset-sink
+ Flume NG Kite Dataset Sink
+
+
+
+
+ org.apache.rat
+ apache-rat-plugin
+
+
+ org.apache.felix
+ maven-bundle-plugin
+ 2.3.7
+ true
+ true
+
+
+
+
+
+
+
+ org.apache.flume
+ flume-ng-sdk
+
+
+
+ org.apache.flume
+ flume-ng-configuration
+
+
+
+ org.apache.flume
+ flume-ng-core
+
+
+
+ org.kitesdk
+ kite-data-core
+
+
+
+ org.kitesdk
+ kite-data-hive
+
+
+
+ org.kitesdk
+ kite-data-hbase
+
+
+
+ org.apache.avro
+ avro
+
+
+
+ org.apache.hive
+ hive-exec
+ true
+
+
+
+ org.apache.hive
+ hive-metastore
+ true
+
+
+
+
+ org.apache.hadoop
+ hadoop-common
+ ${hadoop2.version}
+ true
+
+
+
+ org.slf4j
+ slf4j-api
+
+
+
+ com.google.guava
+ guava
+
+
+
+ junit
+ junit
+ test
+
+
+
+ org.apache.hadoop
+ hadoop-minicluster
+ ${hadoop2.version}
+ test
+
+
+
+ org.slf4j
+ slf4j-log4j12
+ test
+
+
+
+ org.mockito
+ mockito-all
+ test
+
+
+
+
+
diff --git a/code/flume-ng-sinks/flume-dataset-sink/src/main/java/org/apache/flume/sink/kite/DatasetSink.java b/code/flume-ng-sinks/flume-dataset-sink/src/main/java/org/apache/flume/sink/kite/DatasetSink.java
new file mode 100644
index 0000000..fa31262
--- /dev/null
+++ b/code/flume-ng-sinks/flume-dataset-sink/src/main/java/org/apache/flume/sink/kite/DatasetSink.java
@@ -0,0 +1,582 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.flume.sink.kite;
+
+import org.apache.flume.auth.FlumeAuthenticationUtil;
+import org.apache.flume.auth.PrivilegedExecutor;
+import org.apache.flume.sink.kite.parser.EntityParserFactory;
+import org.apache.flume.sink.kite.parser.EntityParser;
+import org.apache.flume.sink.kite.policy.FailurePolicy;
+import org.apache.flume.sink.kite.policy.FailurePolicyFactory;
+import com.google.common.annotations.VisibleForTesting;
+import com.google.common.base.Preconditions;
+import com.google.common.base.Throwables;
+import com.google.common.collect.Lists;
+
+import java.net.URI;
+import java.security.PrivilegedAction;
+import java.util.List;
+import java.util.concurrent.TimeUnit;
+import org.apache.avro.Schema;
+import org.apache.avro.file.DataFileWriter;
+import org.apache.avro.generic.GenericRecord;
+import org.apache.flume.Channel;
+import org.apache.flume.Context;
+import org.apache.flume.Event;
+import org.apache.flume.EventDeliveryException;
+import org.apache.flume.Transaction;
+import org.apache.flume.conf.Configurable;
+import org.apache.flume.instrumentation.SinkCounter;
+import org.apache.flume.sink.AbstractSink;
+import org.kitesdk.data.Dataset;
+import org.kitesdk.data.DatasetDescriptor;
+import org.kitesdk.data.DatasetIOException;
+import org.kitesdk.data.DatasetNotFoundException;
+import org.kitesdk.data.DatasetWriter;
+import org.kitesdk.data.Datasets;
+import org.kitesdk.data.Flushable;
+import org.kitesdk.data.Syncable;
+import org.kitesdk.data.View;
+import org.kitesdk.data.spi.Registration;
+import org.kitesdk.data.URIBuilder;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import static org.apache.flume.sink.kite.DatasetSinkConstants.*;
+import org.kitesdk.data.Format;
+import org.kitesdk.data.Formats;
+
+/**
+ * Sink that writes events to a Kite Dataset. This sink will parse the body of
+ * each incoming event and store the resulting entity in a Kite Dataset. It
+ * determines the destination Dataset by opening a dataset URI
+ * {@code kite.dataset.uri} or opening a repository URI, {@code kite.repo.uri},
+ * and loading a Dataset by name, {@code kite.dataset.name}, and namespace,
+ * {@code kite.dataset.namespace}.
+ */
+public class DatasetSink extends AbstractSink implements Configurable {
+
+ private static final Logger LOG = LoggerFactory.getLogger(DatasetSink.class);
+
+ private Context context = null;
+ private PrivilegedExecutor privilegedExecutor;
+
+ private String datasetName = null;
+ private URI datasetUri = null;
+ private Schema datasetSchema = null;
+ private DatasetWriter writer = null;
+
+ /**
+ * The number of events to process as a single batch.
+ */
+ private long batchSize = DEFAULT_BATCH_SIZE;
+
+ /**
+ * The number of seconds to wait before rolling a writer.
+ */
+ private int rollIntervalSeconds = DEFAULT_ROLL_INTERVAL;
+
+ /**
+ * Flag that says if Flume should commit on every batch.
+ */
+ private boolean commitOnBatch = DEFAULT_FLUSHABLE_COMMIT_ON_BATCH;
+
+ /**
+ * Flag that says if Flume should sync on every batch.
+ */
+ private boolean syncOnBatch = DEFAULT_SYNCABLE_SYNC_ON_BATCH;
+
+ /**
+ * The last time the writer rolled.
+ */
+ private long lastRolledMillis = 0L;
+
+ /**
+ * The raw number of bytes parsed.
+ */
+ private long bytesParsed = 0L;
+
+ /**
+ * A class for parsing Kite entities from Flume Events.
+ */
+ private EntityParser parser = null;
+
+ /**
+ * A class implementing a failure newPolicy for events that had a
+ non-recoverable error during processing.
+ */
+ private FailurePolicy failurePolicy = null;
+
+ private SinkCounter counter = null;
+
+ /**
+ * The Kite entity
+ */
+ private GenericRecord entity = null;
+ // TODO: remove this after PARQUET-62 is released
+ private boolean reuseEntity = true;
+
+ /**
+ * The Flume transaction. Used to keep transactions open across calls to
+ * process.
+ */
+ private Transaction transaction = null;
+
+ /**
+ * Internal flag on if there has been a batch of records committed. This is
+ * used during rollback to know if the current writer needs to be closed.
+ */
+ private boolean committedBatch = false;
+
+ // Factories
+ private static final EntityParserFactory ENTITY_PARSER_FACTORY =
+ new EntityParserFactory();
+ private static final FailurePolicyFactory FAILURE_POLICY_FACTORY =
+ new FailurePolicyFactory();
+
+ /**
+ * Return the list of allowed formats.
+ * @return The list of allowed formats.
+ */
+ protected List allowedFormats() {
+ return Lists.newArrayList("avro", "parquet");
+ }
+
+ @Override
+ public void configure(Context context) {
+ this.context = context;
+
+ String principal = context.getString(AUTH_PRINCIPAL);
+ String keytab = context.getString(AUTH_KEYTAB);
+ String effectiveUser = context.getString(AUTH_PROXY_USER);
+
+ this.privilegedExecutor = FlumeAuthenticationUtil.getAuthenticator(
+ principal, keytab).proxyAs(effectiveUser);
+
+ // Get the dataset URI and name from the context
+ String datasetURI = context.getString(CONFIG_KITE_DATASET_URI);
+ if (datasetURI != null) {
+ this.datasetUri = URI.create(datasetURI);
+ this.datasetName = uriToName(datasetUri);
+ } else {
+ String repositoryURI = context.getString(CONFIG_KITE_REPO_URI);
+ Preconditions.checkNotNull(repositoryURI, "No dataset configured. Setting "
+ + CONFIG_KITE_DATASET_URI + " is required.");
+
+ this.datasetName = context.getString(CONFIG_KITE_DATASET_NAME);
+ Preconditions.checkNotNull(datasetName, "No dataset configured. Setting "
+ + CONFIG_KITE_DATASET_URI + " is required.");
+
+ String namespace = context.getString(CONFIG_KITE_DATASET_NAMESPACE,
+ DEFAULT_NAMESPACE);
+
+ this.datasetUri = new URIBuilder(repositoryURI, namespace, datasetName)
+ .build();
+ }
+ this.setName(datasetUri.toString());
+
+ if (context.getBoolean(CONFIG_SYNCABLE_SYNC_ON_BATCH,
+ DEFAULT_SYNCABLE_SYNC_ON_BATCH)) {
+ Preconditions.checkArgument(
+ context.getBoolean(CONFIG_FLUSHABLE_COMMIT_ON_BATCH,
+ DEFAULT_FLUSHABLE_COMMIT_ON_BATCH), "Configuration error: "
+ + CONFIG_FLUSHABLE_COMMIT_ON_BATCH + " must be set to true when "
+ + CONFIG_SYNCABLE_SYNC_ON_BATCH + " is set to true.");
+ }
+
+ // Create the configured failure failurePolicy
+ this.failurePolicy = FAILURE_POLICY_FACTORY.newPolicy(context);
+
+ // other configuration
+ this.batchSize = context.getLong(CONFIG_KITE_BATCH_SIZE,
+ DEFAULT_BATCH_SIZE);
+ this.rollIntervalSeconds = context.getInteger(CONFIG_KITE_ROLL_INTERVAL,
+ DEFAULT_ROLL_INTERVAL);
+
+ this.counter = new SinkCounter(datasetName);
+ }
+
+ @Override
+ public synchronized void start() {
+ this.lastRolledMillis = System.currentTimeMillis();
+ counter.start();
+ // signal that this sink is ready to process
+ LOG.info("Started DatasetSink " + getName());
+ super.start();
+ }
+
+ /**
+ * Causes the sink to roll at the next {@link #process()} call.
+ */
+ @VisibleForTesting
+ void roll() {
+ this.lastRolledMillis = 0L;
+ }
+
+ @VisibleForTesting
+ DatasetWriter getWriter() {
+ return writer;
+ }
+
+ @VisibleForTesting
+ void setWriter(DatasetWriter writer) {
+ this.writer = writer;
+ }
+
+ @VisibleForTesting
+ void setParser(EntityParser parser) {
+ this.parser = parser;
+ }
+
+ @VisibleForTesting
+ void setFailurePolicy(FailurePolicy failurePolicy) {
+ this.failurePolicy = failurePolicy;
+ }
+
+ @Override
+ public synchronized void stop() {
+ counter.stop();
+
+ try {
+ // Close the writer and commit the transaction, but don't create a new
+ // writer since we're stopping
+ closeWriter();
+ commitTransaction();
+ } catch (EventDeliveryException ex) {
+ rollbackTransaction();
+
+ LOG.warn("Closing the writer failed: " + ex.getLocalizedMessage());
+ LOG.debug("Exception follows.", ex);
+ // We don't propogate the exception as the transaction would have been
+ // rolled back and we can still finish stopping
+ }
+
+ // signal that this sink has stopped
+ LOG.info("Stopped dataset sink: " + getName());
+ super.stop();
+ }
+
+ @Override
+ public Status process() throws EventDeliveryException {
+ long processedEvents = 0;
+
+ try {
+ if (shouldRoll()) {
+ closeWriter();
+ commitTransaction();
+ createWriter();
+ }
+
+ // The writer shouldn't be null at this point
+ Preconditions.checkNotNull(writer,
+ "Can't process events with a null writer. This is likely a bug.");
+ Channel channel = getChannel();
+
+ // Enter the transaction boundary if we haven't already
+ enterTransaction(channel);
+
+ for (; processedEvents < batchSize; processedEvents += 1) {
+ Event event = channel.take();
+
+ if (event == null) {
+ // no events available in the channel
+ break;
+ }
+
+ write(event);
+ }
+
+ // commit transaction
+ if (commitOnBatch) {
+ // Flush/sync before commiting. A failure here will result in rolling back
+ // the transaction
+ if (syncOnBatch && writer instanceof Syncable) {
+ ((Syncable) writer).sync();
+ } else if (writer instanceof Flushable) {
+ ((Flushable) writer).flush();
+ }
+ boolean committed = commitTransaction();
+ Preconditions.checkState(committed,
+ "Tried to commit a batch when there was no transaction");
+ committedBatch |= committed;
+ }
+ } catch (Throwable th) {
+ // catch-all for any unhandled Throwable so that the transaction is
+ // correctly rolled back.
+ rollbackTransaction();
+
+ if (commitOnBatch && committedBatch) {
+ try {
+ closeWriter();
+ } catch (EventDeliveryException ex) {
+ LOG.warn("Error closing writer there may be temp files that need to"
+ + " be manually recovered: " + ex.getLocalizedMessage());
+ LOG.debug("Exception follows.", ex);
+ }
+ } else {
+ this.writer = null;
+ }
+
+ // handle the exception
+ Throwables.propagateIfInstanceOf(th, Error.class);
+ Throwables.propagateIfInstanceOf(th, EventDeliveryException.class);
+ throw new EventDeliveryException(th);
+ }
+
+ if (processedEvents == 0) {
+ counter.incrementBatchEmptyCount();
+ return Status.BACKOFF;
+ } else if (processedEvents < batchSize) {
+ counter.incrementBatchUnderflowCount();
+ } else {
+ counter.incrementBatchCompleteCount();
+ }
+
+ counter.addToEventDrainSuccessCount(processedEvents);
+
+ return Status.READY;
+ }
+
+ /**
+ * Parse the event using the entity parser and write the entity to the dataset.
+ *
+ * @param event The event to write
+ * @throws EventDeliveryException An error occurred trying to write to the
+ dataset that couldn't or shouldn't be
+ handled by the failure policy.
+ */
+ @VisibleForTesting
+ void write(Event event) throws EventDeliveryException {
+ try {
+ this.entity = parser.parse(event, reuseEntity ? entity : null);
+ this.bytesParsed += event.getBody().length;
+
+ // writeEncoded would be an optimization in some cases, but HBase
+ // will not support it and partitioned Datasets need to get partition
+ // info from the entity Object. We may be able to avoid the
+ // serialization round-trip otherwise.
+ writer.write(entity);
+ } catch (NonRecoverableEventException ex) {
+ failurePolicy.handle(event, ex);
+ } catch (DataFileWriter.AppendWriteException ex) {
+ failurePolicy.handle(event, ex);
+ } catch (RuntimeException ex) {
+ Throwables.propagateIfInstanceOf(ex, EventDeliveryException.class);
+ throw new EventDeliveryException(ex);
+ }
+ }
+
+ /**
+ * Create a new writer.
+ *
+ * This method also re-loads the dataset so updates to the configuration or
+ * a dataset created after Flume starts will be loaded.
+ *
+ * @throws EventDeliveryException There was an error creating the writer.
+ */
+ @VisibleForTesting
+ void createWriter() throws EventDeliveryException {
+ // reset the commited flag whenever a new writer is created
+ committedBatch = false;
+ try {
+ View view;
+
+ view = privilegedExecutor.execute(
+ new PrivilegedAction>() {
+ @Override
+ public Dataset run() {
+ return Datasets.load(datasetUri);
+ }
+ });
+
+ DatasetDescriptor descriptor = view.getDataset().getDescriptor();
+ Format format = descriptor.getFormat();
+ Preconditions.checkArgument(allowedFormats().contains(format.getName()),
+ "Unsupported format: " + format.getName());
+
+ Schema newSchema = descriptor.getSchema();
+ if (datasetSchema == null || !newSchema.equals(datasetSchema)) {
+ this.datasetSchema = descriptor.getSchema();
+ // dataset schema has changed, create a new parser
+ parser = ENTITY_PARSER_FACTORY.newParser(datasetSchema, context);
+ }
+
+ this.reuseEntity = !(Formats.PARQUET.equals(format));
+
+ // TODO: Check that the format implements Flushable after CDK-863
+ // goes in. For now, just check that the Dataset is Avro format
+ this.commitOnBatch = context.getBoolean(CONFIG_FLUSHABLE_COMMIT_ON_BATCH,
+ DEFAULT_FLUSHABLE_COMMIT_ON_BATCH) && (Formats.AVRO.equals(format));
+
+ // TODO: Check that the format implements Syncable after CDK-863
+ // goes in. For now, just check that the Dataset is Avro format
+ this.syncOnBatch = context.getBoolean(CONFIG_SYNCABLE_SYNC_ON_BATCH,
+ DEFAULT_SYNCABLE_SYNC_ON_BATCH) && (Formats.AVRO.equals(format));
+
+ this.datasetName = view.getDataset().getName();
+
+ this.writer = view.newWriter();
+
+ // Reset the last rolled time and the metrics
+ this.lastRolledMillis = System.currentTimeMillis();
+ this.bytesParsed = 0L;
+ } catch (DatasetNotFoundException ex) {
+ throw new EventDeliveryException("Dataset " + datasetUri + " not found."
+ + " The dataset must be created before Flume can write to it.", ex);
+ } catch (RuntimeException ex) {
+ throw new EventDeliveryException("Error trying to open a new"
+ + " writer for dataset " + datasetUri, ex);
+ }
+ }
+
+ /**
+ * Return true if the sink should roll the writer.
+ *
+ * Currently, this is based on time since the last roll or if the current
+ * writer is null.
+ *
+ * @return True if and only if the sink should roll the writer
+ */
+ private boolean shouldRoll() {
+ long currentTimeMillis = System.currentTimeMillis();
+ long elapsedTimeSeconds = TimeUnit.MILLISECONDS.toSeconds(
+ currentTimeMillis - lastRolledMillis);
+
+ LOG.debug("Current time: {}, lastRolled: {}, diff: {} sec",
+ new Object[] {currentTimeMillis, lastRolledMillis, elapsedTimeSeconds});
+
+ return elapsedTimeSeconds >= rollIntervalSeconds || writer == null;
+ }
+
+ /**
+ * Close the current writer.
+ *
+ * This method always sets the current writer to null even if close fails.
+ * If this method throws an Exception, callers *must* rollback any active
+ * transaction to ensure that data is replayed.
+ *
+ * @throws EventDeliveryException
+ */
+ @VisibleForTesting
+ void closeWriter() throws EventDeliveryException {
+ if (writer != null) {
+ try {
+ writer.close();
+
+ long elapsedTimeSeconds = TimeUnit.MILLISECONDS.toSeconds(
+ System.currentTimeMillis() - lastRolledMillis);
+ LOG.info("Closed writer for {} after {} seconds and {} bytes parsed",
+ new Object[]{datasetUri, elapsedTimeSeconds, bytesParsed});
+ } catch (DatasetIOException ex) {
+ throw new EventDeliveryException("Check HDFS permissions/health. IO"
+ + " error trying to close the writer for dataset " + datasetUri,
+ ex);
+ } catch (RuntimeException ex) {
+ throw new EventDeliveryException("Error trying to close the writer for"
+ + " dataset " + datasetUri, ex);
+ } finally {
+ // If we failed to close the writer then we give up on it as we'll
+ // end up throwing an EventDeliveryException which will result in
+ // a transaction rollback and a replay of any events written during
+ // the current transaction. If commitOnBatch is true, you can still
+ // end up with orphaned temp files that have data to be recovered.
+ this.writer = null;
+ failurePolicy.close();
+ }
+ }
+ }
+
+ /**
+ * Enter the transaction boundary. This will either begin a new transaction
+ * if one didn't already exist. If we're already in a transaction boundary,
+ * then this method does nothing.
+ *
+ * @param channel The Sink's channel
+ * @throws EventDeliveryException There was an error starting a new batch
+ * with the failure policy.
+ */
+ private void enterTransaction(Channel channel) throws EventDeliveryException {
+ // There's no synchronization around the transaction instance because the
+ // Sink API states "the Sink#process() call is guaranteed to only
+ // be accessed by a single thread". Technically other methods could be
+ // called concurrently, but the implementation of SinkRunner waits
+ // for the Thread running process() to end before calling stop()
+ if (transaction == null) {
+ this.transaction = channel.getTransaction();
+ transaction.begin();
+ failurePolicy = FAILURE_POLICY_FACTORY.newPolicy(context);
+ }
+ }
+
+ /**
+ * Commit and close the transaction.
+ *
+ * If this method throws an Exception the caller *must* ensure that the
+ * transaction is rolled back. Callers can roll back the transaction by
+ * calling {@link #rollbackTransaction()}.
+ *
+ * @return True if there was an open transaction and it was committed, false
+ * otherwise.
+ * @throws EventDeliveryException There was an error ending the batch with
+ * the failure policy.
+ */
+ @VisibleForTesting
+ boolean commitTransaction() throws EventDeliveryException {
+ if (transaction != null) {
+ failurePolicy.sync();
+ transaction.commit();
+ transaction.close();
+ this.transaction = null;
+ return true;
+ } else {
+ return false;
+ }
+ }
+
+ /**
+ * Rollback the transaction. If there is a RuntimeException during rollback,
+ * it will be logged but the transaction instance variable will still be
+ * nullified.
+ */
+ private void rollbackTransaction() {
+ if (transaction != null) {
+ try {
+ // If the transaction wasn't committed before we got the exception, we
+ // need to rollback.
+ transaction.rollback();
+ } catch (RuntimeException ex) {
+ LOG.error("Transaction rollback failed: " + ex.getLocalizedMessage());
+ LOG.debug("Exception follows.", ex);
+ } finally {
+ transaction.close();
+ this.transaction = null;
+ }
+ }
+ }
+
+ /**
+ * Get the name of the dataset from the URI
+ *
+ * @param uri The dataset or view URI
+ * @return The dataset name
+ */
+ private static String uriToName(URI uri) {
+ return Registration.lookupDatasetUri(URI.create(
+ uri.getRawSchemeSpecificPart())).second().get("dataset");
+ }
+}
diff --git a/code/flume-ng-sinks/flume-dataset-sink/src/main/java/org/apache/flume/sink/kite/DatasetSinkConstants.java b/code/flume-ng-sinks/flume-dataset-sink/src/main/java/org/apache/flume/sink/kite/DatasetSinkConstants.java
new file mode 100644
index 0000000..af33304
--- /dev/null
+++ b/code/flume-ng-sinks/flume-dataset-sink/src/main/java/org/apache/flume/sink/kite/DatasetSinkConstants.java
@@ -0,0 +1,132 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flume.sink.kite;
+
+import org.kitesdk.data.URIBuilder;
+
+public class DatasetSinkConstants {
+ /**
+ * URI of the Kite Dataset
+ */
+ public static final String CONFIG_KITE_DATASET_URI = "kite.dataset.uri";
+
+ /**
+ * URI of the Kite DatasetRepository.
+ */
+ public static final String CONFIG_KITE_REPO_URI = "kite.repo.uri";
+
+ /**
+ * Name of the Kite Dataset to write into.
+ */
+ public static final String CONFIG_KITE_DATASET_NAME = "kite.dataset.name";
+
+ /**
+ * Namespace of the Kite Dataset to write into.
+ */
+ public static final String CONFIG_KITE_DATASET_NAMESPACE =
+ "kite.dataset.namespace";
+ public static final String DEFAULT_NAMESPACE = URIBuilder.NAMESPACE_DEFAULT;
+
+ /**
+ * Number of records to process from the incoming channel per call to process.
+ */
+ public static final String CONFIG_KITE_BATCH_SIZE = "kite.batchSize";
+ public static long DEFAULT_BATCH_SIZE = 100;
+
+ /**
+ * Maximum time to wait before finishing files.
+ */
+ public static final String CONFIG_KITE_ROLL_INTERVAL = "kite.rollInterval";
+ public static int DEFAULT_ROLL_INTERVAL = 30; // seconds
+
+ /**
+ * Flag for committing the Flume transaction on each batch for Flushable
+ * datasets. When set to false, Flume will only commit the transaction when
+ * roll interval has expired. Setting this to false requires enough space
+ * in the channel to handle all events delivered during the roll interval.
+ * Defaults to true.
+ */
+ public static final String CONFIG_FLUSHABLE_COMMIT_ON_BATCH =
+ "kite.flushable.commiteOnBatch";
+ public static boolean DEFAULT_FLUSHABLE_COMMIT_ON_BATCH = true;
+
+ /**
+ * Flag for syncing the DatasetWriter on each batch for Syncable
+ * datasets. Defaults to true.
+ */
+ public static final String CONFIG_SYNCABLE_SYNC_ON_BATCH =
+ "kite.syncable.syncOnBatch";
+ public static boolean DEFAULT_SYNCABLE_SYNC_ON_BATCH = true;
+
+ /**
+ * Parser used to parse Flume Events into Kite entities.
+ */
+ public static final String CONFIG_ENTITY_PARSER = "kite.entityParser";
+
+ /**
+ * Built-in entity parsers
+ */
+ public static final String AVRO_ENTITY_PARSER = "avro";
+ public static final String DEFAULT_ENTITY_PARSER = AVRO_ENTITY_PARSER;
+ public static final String[] AVAILABLE_PARSERS = new String[] {
+ AVRO_ENTITY_PARSER
+ };
+
+ /**
+ * Policy used to handle non-recoverable failures.
+ */
+ public static final String CONFIG_FAILURE_POLICY = "kite.failurePolicy";
+
+ /**
+ * Write non-recoverable Flume events to a Kite dataset.
+ */
+ public static final String SAVE_FAILURE_POLICY = "save";
+
+ /**
+ * The URI to write non-recoverable Flume events to in the case of an error.
+ * If the dataset doesn't exist, it will be created.
+ */
+ public static final String CONFIG_KITE_ERROR_DATASET_URI =
+ "kite.error.dataset.uri";
+
+ /**
+ * Retry non-recoverable Flume events. This will lead to a never ending cycle
+ * of failure, but matches the previous default semantics of the DatasetSink.
+ */
+ public static final String RETRY_FAILURE_POLICY = "retry";
+ public static final String DEFAULT_FAILURE_POLICY = RETRY_FAILURE_POLICY;
+ public static final String[] AVAILABLE_POLICIES = new String[] {
+ RETRY_FAILURE_POLICY,
+ SAVE_FAILURE_POLICY
+ };
+
+ /**
+ * Headers where avro schema information is expected.
+ */
+ public static final String AVRO_SCHEMA_LITERAL_HEADER =
+ "flume.avro.schema.literal";
+ public static final String AVRO_SCHEMA_URL_HEADER = "flume.avro.schema.url";
+
+ /**
+ * Hadoop authentication settings
+ */
+ public static final String AUTH_PROXY_USER = "auth.proxyUser";
+ public static final String AUTH_PRINCIPAL = "auth.kerberosPrincipal";
+ public static final String AUTH_KEYTAB = "auth.kerberosKeytab";
+}
diff --git a/code/flume-ng-sinks/flume-dataset-sink/src/main/java/org/apache/flume/sink/kite/NonRecoverableEventException.java b/code/flume-ng-sinks/flume-dataset-sink/src/main/java/org/apache/flume/sink/kite/NonRecoverableEventException.java
new file mode 100644
index 0000000..4373429
--- /dev/null
+++ b/code/flume-ng-sinks/flume-dataset-sink/src/main/java/org/apache/flume/sink/kite/NonRecoverableEventException.java
@@ -0,0 +1,53 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flume.sink.kite;
+
+
+/**
+ * A non-recoverable error trying to deliver the event.
+ *
+ * Non-recoverable event delivery failures include:
+ *
+ * 1. Error parsing the event body thrown from the {@link EntityParser}
+ * 2. A schema mismatch between the schema of an event and the schema of the
+ * destination dataset.
+ * 3. A missing schema from the Event header when using the
+ * {@link AvroEntityParser}.
+ */
+public class NonRecoverableEventException extends Exception {
+
+ private static final long serialVersionUID = 3485151222482254285L;
+
+ public NonRecoverableEventException() {
+ super();
+ }
+
+ public NonRecoverableEventException(String message) {
+ super(message);
+ }
+
+ public NonRecoverableEventException(String message, Throwable t) {
+ super(message, t);
+ }
+
+ public NonRecoverableEventException(Throwable t) {
+ super(t);
+ }
+
+}
diff --git a/code/flume-ng-sinks/flume-dataset-sink/src/main/java/org/apache/flume/sink/kite/parser/AvroParser.java b/code/flume-ng-sinks/flume-dataset-sink/src/main/java/org/apache/flume/sink/kite/parser/AvroParser.java
new file mode 100644
index 0000000..7c6a723
--- /dev/null
+++ b/code/flume-ng-sinks/flume-dataset-sink/src/main/java/org/apache/flume/sink/kite/parser/AvroParser.java
@@ -0,0 +1,208 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flume.sink.kite.parser;
+
+import com.google.common.base.Preconditions;
+import com.google.common.cache.CacheBuilder;
+import com.google.common.cache.CacheLoader;
+import com.google.common.cache.LoadingCache;
+import com.google.common.util.concurrent.UncheckedExecutionException;
+import java.io.IOException;
+import java.io.InputStream;
+import java.net.URI;
+import java.net.URL;
+import java.util.Locale;
+import java.util.Map;
+import java.util.concurrent.ExecutionException;
+import org.apache.avro.Schema;
+import org.apache.avro.generic.GenericDatumReader;
+import org.apache.avro.generic.GenericRecord;
+import org.apache.avro.io.BinaryDecoder;
+import org.apache.avro.io.DatumReader;
+import org.apache.avro.io.DecoderFactory;
+import org.apache.flume.Context;
+import org.apache.flume.Event;
+import org.apache.flume.EventDeliveryException;
+import org.apache.flume.sink.kite.NonRecoverableEventException;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+
+import static org.apache.flume.sink.kite.DatasetSinkConstants.*;
+
+/**
+ * An {@link EntityParser} that parses Avro serialized bytes from an event.
+ *
+ * The Avro schema used to serialize the data should be set as either a URL
+ * or literal in the flume.avro.schema.url or flume.avro.schema.literal event
+ * headers respectively.
+ */
+public class AvroParser implements EntityParser {
+
+ static Configuration conf = new Configuration();
+
+ /**
+ * A cache of literal schemas to avoid re-parsing the schema.
+ */
+ private static final LoadingCache schemasFromLiteral =
+ CacheBuilder.newBuilder()
+ .build(new CacheLoader() {
+ @Override
+ public Schema load(String literal) {
+ Preconditions.checkNotNull(literal,
+ "Schema literal cannot be null without a Schema URL");
+ return new Schema.Parser().parse(literal);
+ }
+ });
+
+ /**
+ * A cache of schemas retrieved by URL to avoid re-parsing the schema.
+ */
+ private static final LoadingCache schemasFromURL =
+ CacheBuilder.newBuilder()
+ .build(new CacheLoader() {
+ @Override
+ public Schema load(String url) throws IOException {
+ Schema.Parser parser = new Schema.Parser();
+ InputStream is = null;
+ try {
+ FileSystem fs = FileSystem.get(URI.create(url), conf);
+ if (url.toLowerCase(Locale.ENGLISH).startsWith("hdfs:/")) {
+ is = fs.open(new Path(url));
+ } else {
+ is = new URL(url).openStream();
+ }
+ return parser.parse(is);
+ } finally {
+ if (is != null) {
+ is.close();
+ }
+ }
+ }
+ });
+
+ /**
+ * The schema of the destination dataset.
+ *
+ * Used as the reader schema during parsing.
+ */
+ private final Schema datasetSchema;
+
+ /**
+ * A cache of DatumReaders per schema.
+ */
+ private final LoadingCache> readers =
+ CacheBuilder.newBuilder()
+ .build(new CacheLoader>() {
+ @Override
+ public DatumReader load(Schema schema) {
+ // must use the target dataset's schema for reading to ensure the
+ // records are able to be stored using it
+ return new GenericDatumReader(
+ schema, datasetSchema);
+ }
+ });
+
+ /**
+ * The binary decoder to reuse for event parsing.
+ */
+ private BinaryDecoder decoder = null;
+
+ /**
+ * Create a new AvroParser given the schema of the destination dataset.
+ *
+ * @param datasetSchema The schema of the destination dataset.
+ */
+ private AvroParser(Schema datasetSchema) {
+ this.datasetSchema = datasetSchema;
+ }
+
+ /**
+ * Parse the entity from the body of the given event.
+ *
+ * @param event The event to parse.
+ * @param reuse If non-null, this may be reused and returned from this method.
+ * @return The parsed entity as a GenericRecord.
+ * @throws EventDeliveryException A recoverable error such as an error
+ * downloading the schema from the URL has
+ * occurred.
+ * @throws NonRecoverableEventException A non-recoverable error such as an
+ * unparsable schema or entity has
+ * occurred.
+ */
+ @Override
+ public GenericRecord parse(Event event, GenericRecord reuse)
+ throws EventDeliveryException, NonRecoverableEventException {
+ decoder = DecoderFactory.get().binaryDecoder(event.getBody(), decoder);
+
+ try {
+ DatumReader reader = readers.getUnchecked(schema(event));
+ return reader.read(reuse, decoder);
+ } catch (IOException ex) {
+ throw new NonRecoverableEventException("Cannot deserialize event", ex);
+ } catch (RuntimeException ex) {
+ throw new NonRecoverableEventException("Cannot deserialize event", ex);
+ }
+ }
+
+ /**
+ * Get the schema from the event headers.
+ *
+ * @param event The Flume event
+ * @return The schema for the event
+ * @throws EventDeliveryException A recoverable error such as an error
+ * downloading the schema from the URL has
+ * occurred.
+ * @throws NonRecoverableEventException A non-recoverable error such as an
+ * unparsable schema has occurred.
+ */
+ private static Schema schema(Event event) throws EventDeliveryException,
+ NonRecoverableEventException {
+ Map headers = event.getHeaders();
+ String schemaURL = headers.get(AVRO_SCHEMA_URL_HEADER);
+ try {
+ if (schemaURL != null) {
+ return schemasFromURL.get(schemaURL);
+ } else {
+ String schemaLiteral = headers.get(AVRO_SCHEMA_LITERAL_HEADER);
+ if (schemaLiteral == null) {
+ throw new NonRecoverableEventException("No schema in event headers."
+ + " Headers must include either " + AVRO_SCHEMA_URL_HEADER
+ + " or " + AVRO_SCHEMA_LITERAL_HEADER);
+ }
+
+ return schemasFromLiteral.get(schemaLiteral);
+ }
+ } catch (ExecutionException ex) {
+ throw new EventDeliveryException("Cannot get schema", ex.getCause());
+ } catch (UncheckedExecutionException ex) {
+ throw new NonRecoverableEventException("Cannot parse schema",
+ ex.getCause());
+ }
+ }
+
+ public static class Builder implements EntityParser.Builder {
+
+ @Override
+ public EntityParser build(Schema datasetSchema, Context config) {
+ return new AvroParser(datasetSchema);
+ }
+
+ }
+}
diff --git a/code/flume-ng-sinks/flume-dataset-sink/src/main/java/org/apache/flume/sink/kite/parser/EntityParser.java b/code/flume-ng-sinks/flume-dataset-sink/src/main/java/org/apache/flume/sink/kite/parser/EntityParser.java
new file mode 100644
index 0000000..f2051a2
--- /dev/null
+++ b/code/flume-ng-sinks/flume-dataset-sink/src/main/java/org/apache/flume/sink/kite/parser/EntityParser.java
@@ -0,0 +1,56 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flume.sink.kite.parser;
+
+import javax.annotation.concurrent.NotThreadSafe;
+import org.apache.avro.Schema;
+import org.apache.flume.Context;
+import org.apache.flume.Event;
+import org.apache.flume.EventDeliveryException;
+import org.apache.flume.sink.kite.NonRecoverableEventException;
+
+@NotThreadSafe
+public interface EntityParser {
+
+ /**
+ * Parse a Kite entity from a Flume event
+ *
+ * @param event The event to parse
+ * @param reuse If non-null, this may be reused and returned
+ * @return The parsed entity
+ * @throws EventDeliveryException A recoverable error during parsing. Parsing
+ * can be safely retried.
+ * @throws NonRecoverableEventException A non-recoverable error during
+ * parsing. The event must be discarded.
+ *
+ */
+ public E parse(Event event, E reuse) throws EventDeliveryException,
+ NonRecoverableEventException;
+
+ /**
+ * Knows how to build {@code EntityParser}s. Implementers must provide a
+ * no-arg constructor.
+ *
+ * @param The type of entities generated
+ */
+ public static interface Builder {
+
+ public EntityParser build(Schema datasetSchema, Context config);
+ }
+}
diff --git a/code/flume-ng-sinks/flume-dataset-sink/src/main/java/org/apache/flume/sink/kite/parser/EntityParserFactory.java b/code/flume-ng-sinks/flume-dataset-sink/src/main/java/org/apache/flume/sink/kite/parser/EntityParserFactory.java
new file mode 100644
index 0000000..3720ff3
--- /dev/null
+++ b/code/flume-ng-sinks/flume-dataset-sink/src/main/java/org/apache/flume/sink/kite/parser/EntityParserFactory.java
@@ -0,0 +1,81 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flume.sink.kite.parser;
+
+import java.util.Arrays;
+import org.apache.avro.Schema;
+import org.apache.avro.generic.GenericRecord;
+import org.apache.flume.Context;
+
+import static org.apache.flume.sink.kite.DatasetSinkConstants.*;
+
+public class EntityParserFactory {
+
+ public EntityParser newParser(Schema datasetSchema, Context config) {
+ EntityParser parser;
+
+ String parserType = config.getString(CONFIG_ENTITY_PARSER,
+ DEFAULT_ENTITY_PARSER);
+
+ if (parserType.equals(AVRO_ENTITY_PARSER)) {
+ parser = new AvroParser.Builder().build(datasetSchema, config);
+ } else {
+
+ Class extends EntityParser.Builder> builderClass;
+ Class c;
+ try {
+ c = Class.forName(parserType);
+ } catch (ClassNotFoundException ex) {
+ throw new IllegalArgumentException("EntityParser.Builder class "
+ + parserType + " not found. Must set " + CONFIG_ENTITY_PARSER
+ + " to a class that implements EntityParser.Builder or to a builtin"
+ + " parser: " + Arrays.toString(AVAILABLE_PARSERS), ex);
+ }
+
+ if (c != null && EntityParser.Builder.class.isAssignableFrom(c)) {
+ builderClass = c;
+ } else {
+ throw new IllegalArgumentException("Class " + parserType + " does not"
+ + " implement EntityParser.Builder. Must set "
+ + CONFIG_ENTITY_PARSER + " to a class that extends"
+ + " EntityParser.Builder or to a builtin parser: "
+ + Arrays.toString(AVAILABLE_PARSERS));
+ }
+
+ EntityParser.Builder builder;
+ try {
+ builder = builderClass.newInstance();
+ } catch (InstantiationException ex) {
+ throw new IllegalArgumentException("Can't instantiate class "
+ + parserType + ". Must set " + CONFIG_ENTITY_PARSER + " to a class"
+ + " that extends EntityParser.Builder or to a builtin parser: "
+ + Arrays.toString(AVAILABLE_PARSERS), ex);
+ } catch (IllegalAccessException ex) {
+ throw new IllegalArgumentException("Can't instantiate class "
+ + parserType + ". Must set " + CONFIG_ENTITY_PARSER + " to a class"
+ + " that extends EntityParser.Builder or to a builtin parser: "
+ + Arrays.toString(AVAILABLE_PARSERS), ex);
+ }
+
+ parser = builder.build(datasetSchema, config);
+ }
+
+ return parser;
+ }
+}
diff --git a/code/flume-ng-sinks/flume-dataset-sink/src/main/java/org/apache/flume/sink/kite/policy/FailurePolicy.java b/code/flume-ng-sinks/flume-dataset-sink/src/main/java/org/apache/flume/sink/kite/policy/FailurePolicy.java
new file mode 100644
index 0000000..f6f875a
--- /dev/null
+++ b/code/flume-ng-sinks/flume-dataset-sink/src/main/java/org/apache/flume/sink/kite/policy/FailurePolicy.java
@@ -0,0 +1,105 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flume.sink.kite.policy;
+
+import org.apache.flume.Context;
+import org.apache.flume.Event;
+import org.apache.flume.EventDeliveryException;
+import org.apache.flume.sink.kite.DatasetSink;
+import org.kitesdk.data.Syncable;
+
+/**
+ * A policy for dealing with non-recoverable event delivery failures.
+ *
+ * Non-recoverable event delivery failures include:
+ *
+ * 1. Error parsing the event body thrown from the {@link EntityParser}
+ * 2. A schema mismatch between the schema of an event and the schema of the
+ * destination dataset.
+ * 3. A missing schema from the Event header when using the
+ * {@link AvroEntityParser}.
+ *
+ * The life cycle of a FailurePolicy mimics the life cycle of the
+ * {@link DatasetSink#writer}:
+ *
+ * 1. When a new writer is created, the policy will be instantiated.
+ * 2. As Event failures happen,
+ * {@link #handle(org.apache.flume.Event, java.lang.Throwable)} will be
+ * called to let the policy handle the failure.
+ * 3. If the {@link DatasetSink} is configured to commit on batch, then the
+ * {@link #sync()} method will be called when the batch is committed.
+ * 4. When the writer is closed, the policy's {@link #close()} method will be
+ * called.
+ */
+public interface FailurePolicy {
+
+ /**
+ * Handle a non-recoverable event.
+ *
+ * @param event The event
+ * @param cause The cause of the failure
+ * @throws EventDeliveryException The policy failed to handle the event. When
+ * this is thrown, the Flume transaction will
+ * be rolled back and the event will be retried
+ * along with the rest of the batch.
+ */
+ public void handle(Event event, Throwable cause)
+ throws EventDeliveryException;
+
+ /**
+ * Ensure any handled events are on stable storage.
+ *
+ * This allows the policy implementation to sync any data that it may not
+ * have fully handled.
+ *
+ * See {@link Syncable#sync()}.
+ *
+ * @throws EventDeliveryException The policy failed while syncing data.
+ * When this is thrown, the Flume transaction
+ * will be rolled back and the batch will be
+ * retried.
+ */
+ public void sync() throws EventDeliveryException;
+
+ /**
+ * Close this FailurePolicy and release any resources.
+ *
+ * @throws EventDeliveryException The policy failed while closing resources.
+ * When this is thrown, the Flume transaction
+ * will be rolled back and the batch will be
+ * retried.
+ */
+ public void close() throws EventDeliveryException;
+
+ /**
+ * Knows how to build {@code FailurePolicy}s. Implementers must provide a
+ * no-arg constructor.
+ */
+ public static interface Builder {
+
+ /**
+ * Build a new {@code FailurePolicy}
+ *
+ * @param config The Flume configuration context
+ * @return The {@code FailurePolicy}
+ */
+ FailurePolicy build(Context config);
+ }
+
+}
diff --git a/code/flume-ng-sinks/flume-dataset-sink/src/main/java/org/apache/flume/sink/kite/policy/FailurePolicyFactory.java b/code/flume-ng-sinks/flume-dataset-sink/src/main/java/org/apache/flume/sink/kite/policy/FailurePolicyFactory.java
new file mode 100644
index 0000000..d3b1fe8
--- /dev/null
+++ b/code/flume-ng-sinks/flume-dataset-sink/src/main/java/org/apache/flume/sink/kite/policy/FailurePolicyFactory.java
@@ -0,0 +1,81 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flume.sink.kite.policy;
+
+import java.util.Arrays;
+import org.apache.flume.Context;
+
+import static org.apache.flume.sink.kite.DatasetSinkConstants.*;
+
+public class FailurePolicyFactory {
+
+ public FailurePolicy newPolicy(Context config) {
+ FailurePolicy policy;
+
+ String policyType = config.getString(CONFIG_FAILURE_POLICY,
+ DEFAULT_FAILURE_POLICY);
+
+ if (policyType.equals(RETRY_FAILURE_POLICY)) {
+ policy = new RetryPolicy.Builder().build(config);
+ } else if (policyType.equals(SAVE_FAILURE_POLICY)) {
+ policy = new SavePolicy.Builder().build(config);
+ } else {
+
+ Class extends FailurePolicy.Builder> builderClass;
+ Class c;
+ try {
+ c = Class.forName(policyType);
+ } catch (ClassNotFoundException ex) {
+ throw new IllegalArgumentException("FailurePolicy.Builder class "
+ + policyType + " not found. Must set " + CONFIG_FAILURE_POLICY
+ + " to a class that implements FailurePolicy.Builder or to a builtin"
+ + " policy: " + Arrays.toString(AVAILABLE_POLICIES), ex);
+ }
+
+ if (c != null && FailurePolicy.Builder.class.isAssignableFrom(c)) {
+ builderClass = c;
+ } else {
+ throw new IllegalArgumentException("Class " + policyType + " does not"
+ + " implement FailurePolicy.Builder. Must set "
+ + CONFIG_FAILURE_POLICY + " to a class that extends"
+ + " FailurePolicy.Builder or to a builtin policy: "
+ + Arrays.toString(AVAILABLE_POLICIES));
+ }
+
+ FailurePolicy.Builder builder;
+ try {
+ builder = builderClass.newInstance();
+ } catch (InstantiationException ex) {
+ throw new IllegalArgumentException("Can't instantiate class "
+ + policyType + ". Must set " + CONFIG_FAILURE_POLICY + " to a class"
+ + " that extends FailurePolicy.Builder or to a builtin policy: "
+ + Arrays.toString(AVAILABLE_POLICIES), ex);
+ } catch (IllegalAccessException ex) {
+ throw new IllegalArgumentException("Can't instantiate class "
+ + policyType + ". Must set " + CONFIG_FAILURE_POLICY + " to a class"
+ + " that extends FailurePolicy.Builder or to a builtin policy: "
+ + Arrays.toString(AVAILABLE_POLICIES), ex);
+ }
+
+ policy = builder.build(config);
+ }
+
+ return policy;
+ }
+}
diff --git a/code/flume-ng-sinks/flume-dataset-sink/src/main/java/org/apache/flume/sink/kite/policy/RetryPolicy.java b/code/flume-ng-sinks/flume-dataset-sink/src/main/java/org/apache/flume/sink/kite/policy/RetryPolicy.java
new file mode 100644
index 0000000..9a4991c
--- /dev/null
+++ b/code/flume-ng-sinks/flume-dataset-sink/src/main/java/org/apache/flume/sink/kite/policy/RetryPolicy.java
@@ -0,0 +1,63 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flume.sink.kite.policy;
+
+import org.apache.flume.Context;
+import org.apache.flume.Event;
+import org.apache.flume.EventDeliveryException;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * A failure policy that logs the error and then forces a retry by throwing
+ * {@link EventDeliveryException}.
+ */
+public class RetryPolicy implements FailurePolicy {
+ private static final Logger LOG = LoggerFactory.getLogger(RetryPolicy.class);
+
+ private RetryPolicy() {
+ }
+
+ @Override
+ public void handle(Event event, Throwable cause) throws EventDeliveryException {
+ LOG.error("Event delivery failed: " + cause.getLocalizedMessage());
+ LOG.debug("Exception follows.", cause);
+
+ throw new EventDeliveryException(cause);
+ }
+
+ @Override
+ public void sync() throws EventDeliveryException {
+ // do nothing
+ }
+
+ @Override
+ public void close() throws EventDeliveryException {
+ // do nothing
+ }
+
+ public static class Builder implements FailurePolicy.Builder {
+
+ @Override
+ public FailurePolicy build(Context config) {
+ return new RetryPolicy();
+ }
+
+ }
+}
diff --git a/code/flume-ng-sinks/flume-dataset-sink/src/main/java/org/apache/flume/sink/kite/policy/SavePolicy.java b/code/flume-ng-sinks/flume-dataset-sink/src/main/java/org/apache/flume/sink/kite/policy/SavePolicy.java
new file mode 100644
index 0000000..bd537ec
--- /dev/null
+++ b/code/flume-ng-sinks/flume-dataset-sink/src/main/java/org/apache/flume/sink/kite/policy/SavePolicy.java
@@ -0,0 +1,128 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flume.sink.kite.policy;
+
+import com.google.common.base.Preconditions;
+import com.google.common.collect.Maps;
+import java.nio.ByteBuffer;
+import java.util.Map;
+import org.apache.flume.Context;
+import org.apache.flume.Event;
+import org.apache.flume.EventDeliveryException;
+import org.apache.flume.source.avro.AvroFlumeEvent;
+import org.kitesdk.data.DatasetDescriptor;
+import org.kitesdk.data.DatasetWriter;
+import org.kitesdk.data.Datasets;
+import org.kitesdk.data.Formats;
+import org.kitesdk.data.Syncable;
+import org.kitesdk.data.View;
+
+import static org.apache.flume.sink.kite.DatasetSinkConstants.*;
+
+/**
+ * A failure policy that writes the raw Flume event to a Kite dataset.
+ */
+public class SavePolicy implements FailurePolicy {
+
+ private final View dataset;
+ private DatasetWriter writer;
+ private int nEventsHandled;
+
+ private SavePolicy(Context context) {
+ String uri = context.getString(CONFIG_KITE_ERROR_DATASET_URI);
+ Preconditions.checkArgument(uri != null, "Must set "
+ + CONFIG_KITE_ERROR_DATASET_URI + " when " + CONFIG_FAILURE_POLICY
+ + "=save");
+ if (Datasets.exists(uri)) {
+ dataset = Datasets.load(uri, AvroFlumeEvent.class);
+ } else {
+ DatasetDescriptor descriptor = new DatasetDescriptor.Builder()
+ .schema(AvroFlumeEvent.class)
+ .build();
+ dataset = Datasets.create(uri, descriptor, AvroFlumeEvent.class);
+ }
+
+ nEventsHandled = 0;
+ }
+
+ @Override
+ public void handle(Event event, Throwable cause) throws EventDeliveryException {
+ try {
+ if (writer == null) {
+ writer = dataset.newWriter();
+ }
+
+ final AvroFlumeEvent avroEvent = new AvroFlumeEvent();
+ avroEvent.setBody(ByteBuffer.wrap(event.getBody()));
+ avroEvent.setHeaders(toCharSeqMap(event.getHeaders()));
+
+ writer.write(avroEvent);
+ nEventsHandled++;
+ } catch (RuntimeException ex) {
+ throw new EventDeliveryException(ex);
+ }
+ }
+
+ @Override
+ public void sync() throws EventDeliveryException {
+ if (nEventsHandled > 0) {
+ if (Formats.PARQUET.equals(
+ dataset.getDataset().getDescriptor().getFormat())) {
+ // We need to close the writer on sync if we're writing to a Parquet
+ // dataset
+ close();
+ } else {
+ if (writer instanceof Syncable) {
+ ((Syncable) writer).sync();
+ }
+ }
+ }
+ }
+
+ @Override
+ public void close() throws EventDeliveryException {
+ if (nEventsHandled > 0) {
+ try {
+ writer.close();
+ } catch (RuntimeException ex) {
+ throw new EventDeliveryException(ex);
+ } finally {
+ writer = null;
+ nEventsHandled = 0;
+ }
+ }
+ }
+
+ /**
+ * Helper function to convert a map of String to a map of CharSequence.
+ */
+ private static Map toCharSeqMap(
+ Map map) {
+ return Maps.newHashMap(map);
+ }
+
+ public static class Builder implements FailurePolicy.Builder {
+
+ @Override
+ public FailurePolicy build(Context config) {
+ return new SavePolicy(config);
+ }
+
+ }
+}
diff --git a/code/flume-ng-sinks/flume-dataset-sink/src/test/java/org/apache/flume/sink/kite/TestDatasetSink.java b/code/flume-ng-sinks/flume-dataset-sink/src/test/java/org/apache/flume/sink/kite/TestDatasetSink.java
new file mode 100644
index 0000000..3709577
--- /dev/null
+++ b/code/flume-ng-sinks/flume-dataset-sink/src/test/java/org/apache/flume/sink/kite/TestDatasetSink.java
@@ -0,0 +1,1036 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flume.sink.kite;
+
+import com.google.common.base.Function;
+import com.google.common.base.Throwables;
+import com.google.common.collect.Iterables;
+import com.google.common.collect.Lists;
+import com.google.common.collect.Maps;
+import com.google.common.collect.Sets;
+import org.apache.avro.Schema;
+import org.apache.avro.file.DataFileWriter;
+import org.apache.avro.generic.GenericData;
+import org.apache.avro.generic.GenericRecord;
+import org.apache.avro.generic.GenericRecordBuilder;
+import org.apache.avro.io.Encoder;
+import org.apache.avro.io.EncoderFactory;
+import org.apache.avro.reflect.ReflectDatumWriter;
+import org.apache.avro.util.Utf8;
+import org.apache.commons.io.FileUtils;
+import org.apache.flume.Channel;
+import org.apache.flume.Context;
+import org.apache.flume.Event;
+import org.apache.flume.EventDeliveryException;
+import org.apache.flume.Transaction;
+import org.apache.flume.channel.MemoryChannel;
+import org.apache.flume.conf.Configurables;
+import org.apache.flume.event.SimpleEvent;
+import org.apache.flume.sink.kite.parser.EntityParser;
+import org.apache.flume.sink.kite.policy.FailurePolicy;
+import org.apache.flume.source.avro.AvroFlumeEvent;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.hdfs.MiniDFSCluster;
+import org.junit.After;
+import org.junit.AfterClass;
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.BeforeClass;
+import org.junit.Test;
+import org.kitesdk.data.Dataset;
+import org.kitesdk.data.DatasetDescriptor;
+import org.kitesdk.data.DatasetReader;
+import org.kitesdk.data.DatasetWriter;
+import org.kitesdk.data.Datasets;
+import org.kitesdk.data.PartitionStrategy;
+import org.kitesdk.data.View;
+
+import javax.annotation.Nullable;
+import java.io.ByteArrayOutputStream;
+import java.io.File;
+import java.io.FileWriter;
+import java.io.IOException;
+import java.net.URI;
+import java.nio.ByteBuffer;
+import java.util.Arrays;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.concurrent.Callable;
+
+import static org.mockito.Mockito.any;
+import static org.mockito.Mockito.doThrow;
+import static org.mockito.Mockito.eq;
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.never;
+import static org.mockito.Mockito.verify;
+import static org.mockito.Mockito.when;
+
+public class TestDatasetSink {
+
+ public static final String FILE_REPO_URI = "repo:file:target/test_repo";
+ public static final String DATASET_NAME = "test";
+ public static final String FILE_DATASET_URI =
+ "dataset:file:target/test_repo/" + DATASET_NAME;
+ public static final String ERROR_DATASET_URI =
+ "dataset:file:target/test_repo/failed_events";
+ public static final File SCHEMA_FILE = new File("target/record-schema.avsc");
+ public static final Schema RECORD_SCHEMA = new Schema.Parser().parse(
+ "{\"type\":\"record\",\"name\":\"rec\",\"fields\":[" +
+ "{\"name\":\"id\",\"type\":\"string\"}," +
+ "{\"name\":\"msg\",\"type\":[\"string\",\"null\"]," +
+ "\"default\":\"default\"}]}");
+ public static final Schema COMPATIBLE_SCHEMA = new Schema.Parser().parse(
+ "{\"type\":\"record\",\"name\":\"rec\",\"fields\":[" +
+ "{\"name\":\"id\",\"type\":\"string\"}]}");
+ public static final Schema INCOMPATIBLE_SCHEMA = new Schema.Parser().parse(
+ "{\"type\":\"record\",\"name\":\"user\",\"fields\":[" +
+ "{\"name\":\"username\",\"type\":\"string\"}]}");
+ public static final Schema UPDATED_SCHEMA = new Schema.Parser().parse(
+ "{\"type\":\"record\",\"name\":\"rec\",\"fields\":[" +
+ "{\"name\":\"id\",\"type\":\"string\"}," +
+ "{\"name\":\"priority\",\"type\":\"int\", \"default\": 0}," +
+ "{\"name\":\"msg\",\"type\":[\"string\",\"null\"]," +
+ "\"default\":\"default\"}]}");
+ public static final DatasetDescriptor DESCRIPTOR = new DatasetDescriptor
+ .Builder()
+ .schema(RECORD_SCHEMA)
+ .build();
+
+ Context config = null;
+ Channel in = null;
+ List expected = null;
+ private static final String DFS_DIR = "target/test/dfs";
+ private static final String TEST_BUILD_DATA_KEY = "test.build.data";
+ private static String oldTestBuildDataProp = null;
+
+ @BeforeClass
+ public static void saveSchema() throws IOException {
+ oldTestBuildDataProp = System.getProperty(TEST_BUILD_DATA_KEY);
+ System.setProperty(TEST_BUILD_DATA_KEY, DFS_DIR);
+ FileWriter schema = new FileWriter(SCHEMA_FILE);
+ schema.append(RECORD_SCHEMA.toString());
+ schema.close();
+ }
+
+ @AfterClass
+ public static void tearDownClass() {
+ FileUtils.deleteQuietly(new File(DFS_DIR));
+ if (oldTestBuildDataProp != null) {
+ System.setProperty(TEST_BUILD_DATA_KEY, oldTestBuildDataProp);
+ }
+ }
+
+ @Before
+ public void setup() throws EventDeliveryException {
+ Datasets.delete(FILE_DATASET_URI);
+ Datasets.create(FILE_DATASET_URI, DESCRIPTOR);
+
+ this.config = new Context();
+ config.put("keep-alive", "0");
+ this.in = new MemoryChannel();
+ Configurables.configure(in, config);
+
+ config.put(DatasetSinkConstants.CONFIG_KITE_DATASET_URI, FILE_DATASET_URI);
+
+ GenericRecordBuilder builder = new GenericRecordBuilder(RECORD_SCHEMA);
+ expected = Lists.newArrayList(
+ builder.set("id", "1").set("msg", "msg1").build(),
+ builder.set("id", "2").set("msg", "msg2").build(),
+ builder.set("id", "3").set("msg", "msg3").build());
+
+ putToChannel(in, Iterables.transform(expected,
+ new Function() {
+ private int i = 0;
+
+ @Override
+ public Event apply(@Nullable GenericRecord rec) {
+ this.i += 1;
+ boolean useURI = (i % 2) == 0;
+ return event(rec, RECORD_SCHEMA, SCHEMA_FILE, useURI);
+ }
+ }));
+ }
+
+ @After
+ public void teardown() {
+ Datasets.delete(FILE_DATASET_URI);
+ }
+
+ @Test
+ public void testOldConfig() throws EventDeliveryException {
+ config.put(DatasetSinkConstants.CONFIG_KITE_DATASET_URI, null);
+ config.put(DatasetSinkConstants.CONFIG_KITE_REPO_URI, FILE_REPO_URI);
+ config.put(DatasetSinkConstants.CONFIG_KITE_DATASET_NAME, DATASET_NAME);
+
+ DatasetSink sink = sink(in, config);
+
+ // run the sink
+ sink.start();
+ sink.process();
+ sink.stop();
+
+ Assert.assertEquals(
+ Sets.newHashSet(expected),
+ read(Datasets.load(FILE_DATASET_URI)));
+ Assert.assertEquals("Should have committed", 0, remaining(in));
+ }
+
+ @Test
+ public void testDatasetUriOverridesOldConfig() throws EventDeliveryException {
+ // CONFIG_KITE_DATASET_URI is still set, otherwise this will cause an error
+ config.put(DatasetSinkConstants.CONFIG_KITE_REPO_URI, "bad uri");
+ config.put(DatasetSinkConstants.CONFIG_KITE_DATASET_NAME, "");
+
+ DatasetSink sink = sink(in, config);
+
+ // run the sink
+ sink.start();
+ sink.process();
+ sink.stop();
+
+ Assert.assertEquals(
+ Sets.newHashSet(expected),
+ read(Datasets.load(FILE_DATASET_URI)));
+ Assert.assertEquals("Should have committed", 0, remaining(in));
+ }
+
+ @Test
+ public void testFileStore()
+ throws EventDeliveryException, NonRecoverableEventException, NonRecoverableEventException {
+ DatasetSink sink = sink(in, config);
+
+ // run the sink
+ sink.start();
+ sink.process();
+ sink.stop();
+
+ Assert.assertEquals(
+ Sets.newHashSet(expected),
+ read(Datasets.load(FILE_DATASET_URI)));
+ Assert.assertEquals("Should have committed", 0, remaining(in));
+ }
+
+ @Test
+ public void testParquetDataset() throws EventDeliveryException {
+ Datasets.delete(FILE_DATASET_URI);
+ Dataset created = Datasets.create(FILE_DATASET_URI,
+ new DatasetDescriptor.Builder(DESCRIPTOR)
+ .format("parquet")
+ .build());
+
+ DatasetSink sink = sink(in, config);
+
+ // run the sink
+ sink.start();
+ sink.process();
+
+ // the transaction should not commit during the call to process
+ assertThrows("Transaction should still be open", IllegalStateException.class,
+ new Callable() {
+ @Override
+ public Object call() throws EventDeliveryException {
+ in.getTransaction().begin();
+ return null;
+ }
+ });
+ // The records won't commit until the call to stop()
+ Assert.assertEquals("Should not have committed", 0, read(created).size());
+
+ sink.stop();
+
+ Assert.assertEquals(Sets.newHashSet(expected), read(created));
+ Assert.assertEquals("Should have committed", 0, remaining(in));
+ }
+
+ @Test
+ public void testPartitionedData() throws EventDeliveryException {
+ URI partitionedUri = URI.create("dataset:file:target/test_repo/partitioned");
+ try {
+ Datasets.create(partitionedUri, new DatasetDescriptor.Builder(DESCRIPTOR)
+ .partitionStrategy(new PartitionStrategy.Builder()
+ .identity("id", 10) // partition by id
+ .build())
+ .build());
+
+ config.put(DatasetSinkConstants.CONFIG_KITE_DATASET_URI,
+ partitionedUri.toString());
+ DatasetSink sink = sink(in, config);
+
+ // run the sink
+ sink.start();
+ sink.process();
+ sink.stop();
+
+ Assert.assertEquals(
+ Sets.newHashSet(expected),
+ read(Datasets.load(partitionedUri)));
+ Assert.assertEquals("Should have committed", 0, remaining(in));
+ } finally {
+ if (Datasets.exists(partitionedUri)) {
+ Datasets.delete(partitionedUri);
+ }
+ }
+ }
+
+ @Test
+ public void testStartBeforeDatasetCreated() throws EventDeliveryException {
+ // delete the dataset created by setup
+ Datasets.delete(FILE_DATASET_URI);
+
+ DatasetSink sink = sink(in, config);
+
+ // start the sink
+ sink.start();
+
+ // run the sink without a target dataset
+ try {
+ sink.process();
+ Assert.fail("Should have thrown an exception: no such dataset");
+ } catch (EventDeliveryException e) {
+ // expected
+ }
+
+ // create the target dataset
+ Datasets.create(FILE_DATASET_URI, DESCRIPTOR);
+
+ // run the sink
+ sink.process();
+ sink.stop();
+
+ Assert.assertEquals(Sets.newHashSet(expected), read(Datasets.load(FILE_DATASET_URI)));
+ Assert.assertEquals("Should have committed", 0, remaining(in));
+ }
+
+ @Test
+ public void testDatasetUpdate() throws EventDeliveryException {
+ // add an updated record that is missing the msg field
+ GenericRecordBuilder updatedBuilder = new GenericRecordBuilder(UPDATED_SCHEMA);
+ GenericData.Record updatedRecord = updatedBuilder
+ .set("id", "0")
+ .set("priority", 1)
+ .set("msg", "Priority 1 message!")
+ .build();
+
+ // make a set of the expected records with the new schema
+ Set expectedAsUpdated = Sets.newHashSet();
+ for (GenericRecord record : expected) {
+ expectedAsUpdated.add(updatedBuilder
+ .clear("priority")
+ .set("id", record.get("id"))
+ .set("msg", record.get("msg"))
+ .build());
+ }
+ expectedAsUpdated.add(updatedRecord);
+
+ DatasetSink sink = sink(in, config);
+
+ // run the sink
+ sink.start();
+ sink.process();
+
+ // update the dataset's schema
+ DatasetDescriptor updated = new DatasetDescriptor
+ .Builder(Datasets.load(FILE_DATASET_URI).getDataset().getDescriptor())
+ .schema(UPDATED_SCHEMA)
+ .build();
+ Datasets.update(FILE_DATASET_URI, updated);
+
+ // trigger a roll on the next process call to refresh the writer
+ sink.roll();
+
+ // add the record to the incoming channel and the expected list
+ putToChannel(in, event(updatedRecord, UPDATED_SCHEMA, null, false));
+
+ // process events with the updated schema
+ sink.process();
+ sink.stop();
+
+ Assert.assertEquals(expectedAsUpdated, read(Datasets.load(FILE_DATASET_URI)));
+ Assert.assertEquals("Should have committed", 0, remaining(in));
+ }
+
+ @Test
+ public void testMiniClusterStore() throws EventDeliveryException, IOException {
+ // setup a minicluster
+ MiniDFSCluster cluster = new MiniDFSCluster
+ .Builder(new Configuration())
+ .build();
+
+ FileSystem dfs = cluster.getFileSystem();
+ Configuration conf = dfs.getConf();
+
+ URI hdfsUri = URI.create(
+ "dataset:" + conf.get("fs.defaultFS") + "/tmp/repo" + DATASET_NAME);
+ try {
+ // create a repository and dataset in HDFS
+ Datasets.create(hdfsUri, DESCRIPTOR);
+
+ // update the config to use the HDFS repository
+ config.put(DatasetSinkConstants.CONFIG_KITE_DATASET_URI, hdfsUri.toString());
+
+ DatasetSink sink = sink(in, config);
+
+ // run the sink
+ sink.start();
+ sink.process();
+ sink.stop();
+
+ Assert.assertEquals(
+ Sets.newHashSet(expected),
+ read(Datasets.load(hdfsUri)));
+ Assert.assertEquals("Should have committed", 0, remaining(in));
+
+ } finally {
+ if (Datasets.exists(hdfsUri)) {
+ Datasets.delete(hdfsUri);
+ }
+ cluster.shutdown();
+ }
+ }
+
+ @Test
+ public void testBatchSize() throws EventDeliveryException {
+ DatasetSink sink = sink(in, config);
+
+ // release one record per process call
+ config.put("kite.batchSize", "2");
+ Configurables.configure(sink, config);
+
+ sink.start();
+ sink.process(); // process the first and second
+ sink.roll(); // roll at the next process call
+ sink.process(); // roll and process the third
+ Assert.assertEquals(
+ Sets.newHashSet(expected.subList(0, 2)),
+ read(Datasets.load(FILE_DATASET_URI)));
+ Assert.assertEquals("Should have committed", 0, remaining(in));
+ sink.roll(); // roll at the next process call
+ sink.process(); // roll, the channel is empty
+ Assert.assertEquals(
+ Sets.newHashSet(expected),
+ read(Datasets.load(FILE_DATASET_URI)));
+ sink.stop();
+ }
+
+ @Test
+ public void testTimedFileRolling()
+ throws EventDeliveryException, InterruptedException {
+ // use a new roll interval
+ config.put("kite.rollInterval", "1"); // in seconds
+
+ DatasetSink sink = sink(in, config);
+
+ Dataset records = Datasets.load(FILE_DATASET_URI);
+
+ // run the sink
+ sink.start();
+ sink.process();
+
+ Assert.assertEquals("Should have committed", 0, remaining(in));
+
+ Thread.sleep(1100); // sleep longer than the roll interval
+ sink.process(); // rolling happens in the process method
+
+ Assert.assertEquals(Sets.newHashSet(expected), read(records));
+
+ // wait until the end to stop because it would close the files
+ sink.stop();
+ }
+
+ @Test
+ public void testCompatibleSchemas() throws EventDeliveryException {
+ DatasetSink sink = sink(in, config);
+
+ // add a compatible record that is missing the msg field
+ GenericRecordBuilder compatBuilder = new GenericRecordBuilder(
+ COMPATIBLE_SCHEMA);
+ GenericData.Record compatibleRecord = compatBuilder.set("id", "0").build();
+
+ // add the record to the incoming channel
+ putToChannel(in, event(compatibleRecord, COMPATIBLE_SCHEMA, null, false));
+
+ // the record will be read using the real schema, so create the expected
+ // record using it, but without any data
+
+ GenericRecordBuilder builder = new GenericRecordBuilder(RECORD_SCHEMA);
+ GenericData.Record expectedRecord = builder.set("id", "0").build();
+ expected.add(expectedRecord);
+
+ // run the sink
+ sink.start();
+ sink.process();
+ sink.stop();
+
+ Assert.assertEquals(
+ Sets.newHashSet(expected),
+ read(Datasets.load(FILE_DATASET_URI)));
+ Assert.assertEquals("Should have committed", 0, remaining(in));
+ }
+
+ @Test
+ public void testIncompatibleSchemas() throws EventDeliveryException {
+ final DatasetSink sink = sink(in, config);
+
+ GenericRecordBuilder builder = new GenericRecordBuilder(
+ INCOMPATIBLE_SCHEMA);
+ GenericData.Record rec = builder.set("username", "koala").build();
+ putToChannel(in, event(rec, INCOMPATIBLE_SCHEMA, null, false));
+
+ // run the sink
+ sink.start();
+ assertThrows("Should fail", EventDeliveryException.class,
+ new Callable() {
+ @Override
+ public Object call() throws EventDeliveryException {
+ sink.process();
+ return null;
+ }
+ });
+ sink.stop();
+
+ Assert.assertEquals("Should have rolled back",
+ expected.size() + 1, remaining(in));
+ }
+
+ @Test
+ public void testMissingSchema() throws EventDeliveryException {
+ final DatasetSink sink = sink(in, config);
+
+ Event badEvent = new SimpleEvent();
+ badEvent.setHeaders(Maps.newHashMap());
+ badEvent.setBody(serialize(expected.get(0), RECORD_SCHEMA));
+ putToChannel(in, badEvent);
+
+ // run the sink
+ sink.start();
+ assertThrows("Should fail", EventDeliveryException.class,
+ new Callable() {
+ @Override
+ public Object call() throws EventDeliveryException {
+ sink.process();
+ return null;
+ }
+ });
+ sink.stop();
+
+ Assert.assertEquals("Should have rolled back",
+ expected.size() + 1, remaining(in));
+ }
+
+ @Test
+ public void testFileStoreWithSavePolicy() throws EventDeliveryException {
+ if (Datasets.exists(ERROR_DATASET_URI)) {
+ Datasets.delete(ERROR_DATASET_URI);
+ }
+ config.put(DatasetSinkConstants.CONFIG_FAILURE_POLICY,
+ DatasetSinkConstants.SAVE_FAILURE_POLICY);
+ config.put(DatasetSinkConstants.CONFIG_KITE_ERROR_DATASET_URI,
+ ERROR_DATASET_URI);
+ DatasetSink sink = sink(in, config);
+
+ // run the sink
+ sink.start();
+ sink.process();
+ sink.stop();
+
+ Assert.assertEquals(
+ Sets.newHashSet(expected),
+ read(Datasets.load(FILE_DATASET_URI)));
+ Assert.assertEquals("Should have committed", 0, remaining(in));
+ }
+
+ @Test
+ public void testMissingSchemaWithSavePolicy() throws EventDeliveryException {
+ if (Datasets.exists(ERROR_DATASET_URI)) {
+ Datasets.delete(ERROR_DATASET_URI);
+ }
+ config.put(DatasetSinkConstants.CONFIG_FAILURE_POLICY,
+ DatasetSinkConstants.SAVE_FAILURE_POLICY);
+ config.put(DatasetSinkConstants.CONFIG_KITE_ERROR_DATASET_URI,
+ ERROR_DATASET_URI);
+ final DatasetSink sink = sink(in, config);
+
+ Event badEvent = new SimpleEvent();
+ badEvent.setHeaders(Maps.newHashMap());
+ badEvent.setBody(serialize(expected.get(0), RECORD_SCHEMA));
+ putToChannel(in, badEvent);
+
+ // run the sink
+ sink.start();
+ sink.process();
+ sink.stop();
+
+ Assert.assertEquals("Good records should have been written",
+ Sets.newHashSet(expected),
+ read(Datasets.load(FILE_DATASET_URI)));
+ Assert.assertEquals("Should not have rolled back", 0, remaining(in));
+ Assert.assertEquals("Should have saved the bad event",
+ Sets.newHashSet(AvroFlumeEvent.newBuilder()
+ .setBody(ByteBuffer.wrap(badEvent.getBody()))
+ .setHeaders(toUtf8Map(badEvent.getHeaders()))
+ .build()),
+ read(Datasets.load(ERROR_DATASET_URI, AvroFlumeEvent.class)));
+ }
+
+ @Test
+ public void testSerializedWithIncompatibleSchemasWithSavePolicy()
+ throws EventDeliveryException {
+ if (Datasets.exists(ERROR_DATASET_URI)) {
+ Datasets.delete(ERROR_DATASET_URI);
+ }
+ config.put(DatasetSinkConstants.CONFIG_FAILURE_POLICY,
+ DatasetSinkConstants.SAVE_FAILURE_POLICY);
+ config.put(DatasetSinkConstants.CONFIG_KITE_ERROR_DATASET_URI,
+ ERROR_DATASET_URI);
+ final DatasetSink sink = sink(in, config);
+
+ GenericRecordBuilder builder = new GenericRecordBuilder(
+ INCOMPATIBLE_SCHEMA);
+ GenericData.Record rec = builder.set("username", "koala").build();
+
+ // We pass in a valid schema in the header, but an incompatible schema
+ // was used to serialize the record
+ Event badEvent = event(rec, INCOMPATIBLE_SCHEMA, SCHEMA_FILE, true);
+ putToChannel(in, badEvent);
+
+ // run the sink
+ sink.start();
+ sink.process();
+ sink.stop();
+
+ Assert.assertEquals("Good records should have been written",
+ Sets.newHashSet(expected),
+ read(Datasets.load(FILE_DATASET_URI)));
+ Assert.assertEquals("Should not have rolled back", 0, remaining(in));
+ Assert.assertEquals("Should have saved the bad event",
+ Sets.newHashSet(AvroFlumeEvent.newBuilder()
+ .setBody(ByteBuffer.wrap(badEvent.getBody()))
+ .setHeaders(toUtf8Map(badEvent.getHeaders()))
+ .build()),
+ read(Datasets.load(ERROR_DATASET_URI, AvroFlumeEvent.class)));
+ }
+
+ @Test
+ public void testSerializedWithIncompatibleSchemas() throws EventDeliveryException {
+ final DatasetSink sink = sink(in, config);
+
+ GenericRecordBuilder builder = new GenericRecordBuilder(
+ INCOMPATIBLE_SCHEMA);
+ GenericData.Record rec = builder.set("username", "koala").build();
+
+ // We pass in a valid schema in the header, but an incompatible schema
+ // was used to serialize the record
+ putToChannel(in, event(rec, INCOMPATIBLE_SCHEMA, SCHEMA_FILE, true));
+
+ // run the sink
+ sink.start();
+ assertThrows("Should fail", EventDeliveryException.class,
+ new Callable() {
+ @Override
+ public Object call() throws EventDeliveryException {
+ sink.process();
+ return null;
+ }
+ });
+ sink.stop();
+
+ Assert.assertEquals("Should have rolled back",
+ expected.size() + 1, remaining(in));
+ }
+
+ @Test
+ public void testCommitOnBatch() throws EventDeliveryException {
+ DatasetSink sink = sink(in, config);
+
+ // run the sink
+ sink.start();
+ sink.process();
+
+ // the transaction should commit during the call to process
+ Assert.assertEquals("Should have committed", 0, remaining(in));
+ // but the data won't be visible yet
+ Assert.assertEquals(0,
+ read(Datasets.load(FILE_DATASET_URI)).size());
+
+ sink.stop();
+
+ Assert.assertEquals(
+ Sets.newHashSet(expected),
+ read(Datasets.load(FILE_DATASET_URI)));
+ }
+
+ @Test
+ public void testCommitOnBatchFalse() throws EventDeliveryException {
+ config.put(DatasetSinkConstants.CONFIG_FLUSHABLE_COMMIT_ON_BATCH,
+ Boolean.toString(false));
+ config.put(DatasetSinkConstants.CONFIG_SYNCABLE_SYNC_ON_BATCH,
+ Boolean.toString(false));
+ DatasetSink sink = sink(in, config);
+
+ // run the sink
+ sink.start();
+ sink.process();
+
+ // the transaction should not commit during the call to process
+ assertThrows("Transaction should still be open", IllegalStateException.class,
+ new Callable() {
+ @Override
+ public Object call() throws EventDeliveryException {
+ in.getTransaction().begin();
+ return null;
+ }
+ });
+
+ // the data won't be visible
+ Assert.assertEquals(0,
+ read(Datasets.load(FILE_DATASET_URI)).size());
+
+ sink.stop();
+
+ Assert.assertEquals(
+ Sets.newHashSet(expected),
+ read(Datasets.load(FILE_DATASET_URI)));
+ // the transaction should commit during the call to stop
+ Assert.assertEquals("Should have committed", 0, remaining(in));
+ }
+
+ @Test
+ public void testCommitOnBatchFalseSyncOnBatchTrue() throws EventDeliveryException {
+ config.put(DatasetSinkConstants.CONFIG_FLUSHABLE_COMMIT_ON_BATCH,
+ Boolean.toString(false));
+ config.put(DatasetSinkConstants.CONFIG_SYNCABLE_SYNC_ON_BATCH,
+ Boolean.toString(true));
+
+ try {
+ sink(in, config);
+ Assert.fail("Should have thrown IllegalArgumentException");
+ } catch (IllegalArgumentException ex) {
+ // expected
+ }
+ }
+
+ @Test
+ public void testCloseAndCreateWriter() throws EventDeliveryException {
+ config.put(DatasetSinkConstants.CONFIG_FLUSHABLE_COMMIT_ON_BATCH,
+ Boolean.toString(false));
+ config.put(DatasetSinkConstants.CONFIG_SYNCABLE_SYNC_ON_BATCH,
+ Boolean.toString(false));
+ DatasetSink sink = sink(in, config);
+
+ // run the sink
+ sink.start();
+ sink.process();
+
+ sink.closeWriter();
+ sink.commitTransaction();
+ sink.createWriter();
+
+ Assert.assertNotNull("Writer should not be null", sink.getWriter());
+ Assert.assertEquals("Should have committed", 0, remaining(in));
+
+ sink.stop();
+
+ Assert.assertEquals(
+ Sets.newHashSet(expected),
+ read(Datasets.load(FILE_DATASET_URI)));
+ }
+
+ @Test
+ public void testCloseWriter() throws EventDeliveryException {
+ config.put(DatasetSinkConstants.CONFIG_FLUSHABLE_COMMIT_ON_BATCH,
+ Boolean.toString(false));
+ config.put(DatasetSinkConstants.CONFIG_SYNCABLE_SYNC_ON_BATCH,
+ Boolean.toString(false));
+ DatasetSink sink = sink(in, config);
+
+ // run the sink
+ sink.start();
+ sink.process();
+
+ sink.closeWriter();
+ sink.commitTransaction();
+
+ Assert.assertNull("Writer should be null", sink.getWriter());
+ Assert.assertEquals("Should have committed", 0, remaining(in));
+
+ sink.stop();
+
+ Assert.assertEquals(
+ Sets.newHashSet(expected),
+ read(Datasets.load(FILE_DATASET_URI)));
+ }
+
+ @Test
+ public void testCreateWriter() throws EventDeliveryException {
+ config.put(DatasetSinkConstants.CONFIG_FLUSHABLE_COMMIT_ON_BATCH,
+ Boolean.toString(false));
+ config.put(DatasetSinkConstants.CONFIG_SYNCABLE_SYNC_ON_BATCH,
+ Boolean.toString(false));
+ DatasetSink sink = sink(in, config);
+
+ // run the sink
+ sink.start();
+ sink.process();
+
+ sink.commitTransaction();
+ sink.createWriter();
+ Assert.assertNotNull("Writer should not be null", sink.getWriter());
+ Assert.assertEquals("Should have committed", 0, remaining(in));
+
+ sink.stop();
+
+ Assert.assertEquals(0, read(Datasets.load(FILE_DATASET_URI)).size());
+ }
+
+ @Test
+ public void testAppendWriteExceptionInvokesPolicy()
+ throws EventDeliveryException, NonRecoverableEventException {
+ DatasetSink sink = sink(in, config);
+
+ // run the sink
+ sink.start();
+ sink.process();
+
+ // Mock an Event
+ Event mockEvent = mock(Event.class);
+ when(mockEvent.getBody()).thenReturn(new byte[] { 0x01 });
+
+ // Mock a GenericRecord
+ GenericRecord mockRecord = mock(GenericRecord.class);
+
+ // Mock an EntityParser
+ EntityParser mockParser = mock(EntityParser.class);
+ when(mockParser.parse(eq(mockEvent), any(GenericRecord.class)))
+ .thenReturn(mockRecord);
+ sink.setParser(mockParser);
+
+ // Mock a FailurePolicy
+ FailurePolicy mockFailurePolicy = mock(FailurePolicy.class);
+ sink.setFailurePolicy(mockFailurePolicy);
+
+ // Mock a DatasetWriter
+ DatasetWriter mockWriter = mock(DatasetWriter.class);
+ doThrow(new DataFileWriter.AppendWriteException(new IOException()))
+ .when(mockWriter).write(mockRecord);
+
+ sink.setWriter(mockWriter);
+ sink.write(mockEvent);
+
+ // Verify that the event was sent to the failure policy
+ verify(mockFailurePolicy).handle(eq(mockEvent), any(Throwable.class));
+
+ sink.stop();
+ }
+
+ @Test
+ public void testRuntimeExceptionThrowsEventDeliveryException()
+ throws EventDeliveryException, NonRecoverableEventException {
+ DatasetSink sink = sink(in, config);
+
+ // run the sink
+ sink.start();
+ sink.process();
+
+ // Mock an Event
+ Event mockEvent = mock(Event.class);
+ when(mockEvent.getBody()).thenReturn(new byte[] { 0x01 });
+
+ // Mock a GenericRecord
+ GenericRecord mockRecord = mock(GenericRecord.class);
+
+ // Mock an EntityParser
+ EntityParser mockParser = mock(EntityParser.class);
+ when(mockParser.parse(eq(mockEvent), any(GenericRecord.class)))
+ .thenReturn(mockRecord);
+ sink.setParser(mockParser);
+
+ // Mock a FailurePolicy
+ FailurePolicy mockFailurePolicy = mock(FailurePolicy.class);
+ sink.setFailurePolicy(mockFailurePolicy);
+
+ // Mock a DatasetWriter
+ DatasetWriter mockWriter = mock(DatasetWriter.class);
+ doThrow(new RuntimeException()).when(mockWriter).write(mockRecord);
+
+ sink.setWriter(mockWriter);
+
+ try {
+ sink.write(mockEvent);
+ Assert.fail("Should throw EventDeliveryException");
+ } catch (EventDeliveryException ex) {
+
+ }
+
+ // Verify that the event was not sent to the failure policy
+ verify(mockFailurePolicy, never()).handle(eq(mockEvent), any(Throwable.class));
+
+ sink.stop();
+ }
+
+ @Test
+ public void testProcessHandlesNullWriter() throws EventDeliveryException,
+ NonRecoverableEventException, NonRecoverableEventException {
+ DatasetSink sink = sink(in, config);
+
+ // run the sink
+ sink.start();
+ sink.process();
+
+ // explicitly set the writer to null
+ sink.setWriter(null);
+
+ // this should not throw an NPE
+ sink.process();
+
+ sink.stop();
+
+ Assert.assertEquals("Should have committed", 0, remaining(in));
+ }
+
+ public static DatasetSink sink(Channel in, Context config) {
+ DatasetSink sink = new DatasetSink();
+ sink.setChannel(in);
+ Configurables.configure(sink, config);
+ return sink;
+ }
+
+ public static HashSet read(View view) {
+ DatasetReader reader = null;
+ try {
+ reader = view.newReader();
+ return Sets.newHashSet(reader.iterator());
+ } finally {
+ if (reader != null) {
+ reader.close();
+ }
+ }
+ }
+
+ public static int remaining(Channel ch) throws EventDeliveryException {
+ Transaction t = ch.getTransaction();
+ try {
+ t.begin();
+ int count = 0;
+ while (ch.take() != null) {
+ count += 1;
+ }
+ t.commit();
+ return count;
+ } catch (Throwable th) {
+ t.rollback();
+ Throwables.propagateIfInstanceOf(th, Error.class);
+ Throwables.propagateIfInstanceOf(th, EventDeliveryException.class);
+ throw new EventDeliveryException(th);
+ } finally {
+ t.close();
+ }
+ }
+
+ public static void putToChannel(Channel in, Event... records)
+ throws EventDeliveryException {
+ putToChannel(in, Arrays.asList(records));
+ }
+
+ public static void putToChannel(Channel in, Iterable records)
+ throws EventDeliveryException {
+ Transaction t = in.getTransaction();
+ try {
+ t.begin();
+ for (Event record : records) {
+ in.put(record);
+ }
+ t.commit();
+ } catch (Throwable th) {
+ t.rollback();
+ Throwables.propagateIfInstanceOf(th, Error.class);
+ Throwables.propagateIfInstanceOf(th, EventDeliveryException.class);
+ throw new EventDeliveryException(th);
+ } finally {
+ t.close();
+ }
+ }
+
+ public static Event event(
+ Object datum, Schema schema, File file, boolean useURI) {
+ Map headers = Maps.newHashMap();
+ if (useURI) {
+ headers.put(DatasetSinkConstants.AVRO_SCHEMA_URL_HEADER,
+ file.getAbsoluteFile().toURI().toString());
+ } else {
+ headers.put(DatasetSinkConstants.AVRO_SCHEMA_LITERAL_HEADER,
+ schema.toString());
+ }
+ Event e = new SimpleEvent();
+ e.setBody(serialize(datum, schema));
+ e.setHeaders(headers);
+ return e;
+ }
+
+ @SuppressWarnings("unchecked")
+ public static byte[] serialize(Object datum, Schema schema) {
+ ByteArrayOutputStream out = new ByteArrayOutputStream();
+ Encoder encoder = EncoderFactory.get().binaryEncoder(out, null);
+ ReflectDatumWriter writer = new ReflectDatumWriter(schema);
+ try {
+ writer.write(datum, encoder);
+ encoder.flush();
+ } catch (IOException ex) {
+ Throwables.propagate(ex);
+ }
+ return out.toByteArray();
+ }
+
+ /**
+ * A convenience method to avoid a large number of @Test(expected=...) tests.
+ *
+ * This variant uses a Callable, which is allowed to throw checked Exceptions.
+ *
+ * @param message A String message to describe this assertion
+ * @param expected An Exception class that the Runnable should throw
+ * @param callable A Callable that is expected to throw the exception
+ */
+ public static void assertThrows(
+ String message, Class extends Exception> expected, Callable callable) {
+ try {
+ callable.call();
+ Assert.fail("No exception was thrown (" + message + "), expected: " +
+ expected.getName());
+ } catch (Exception actual) {
+ Assert.assertEquals(message, expected, actual.getClass());
+ }
+ }
+
+ /**
+ * Helper function to convert a map of String to a map of Utf8.
+ *
+ * @param map A Map of String to String
+ * @return The same mappings converting the {@code String}s to {@link Utf8}s
+ */
+ public static Map toUtf8Map(
+ Map map) {
+ Map utf8Map = Maps.newHashMap();
+ for (Map.Entry entry : map.entrySet()) {
+ utf8Map.put(new Utf8(entry.getKey()), new Utf8(entry.getValue()));
+ }
+ return utf8Map;
+ }
+}
diff --git a/code/flume-ng-sinks/flume-dataset-sink/src/test/resources/enable-kerberos.xml b/code/flume-ng-sinks/flume-dataset-sink/src/test/resources/enable-kerberos.xml
new file mode 100644
index 0000000..85b0447
--- /dev/null
+++ b/code/flume-ng-sinks/flume-dataset-sink/src/test/resources/enable-kerberos.xml
@@ -0,0 +1,30 @@
+
+
+
+
+
+
+ hadoop.security.authentication
+ kerberos
+
+
+
+ hadoop.security.authorization
+ true
+
+
+
diff --git a/code/flume-ng-sinks/flume-hdfs-sink/pom.xml b/code/flume-ng-sinks/flume-hdfs-sink/pom.xml
new file mode 100644
index 0000000..bcf6556
--- /dev/null
+++ b/code/flume-ng-sinks/flume-hdfs-sink/pom.xml
@@ -0,0 +1,196 @@
+
+
+
+
+ 4.0.0
+
+
+ flume-ng-sinks
+ org.apache.flume
+ 1.7.0
+
+
+ org.apache.flume.flume-ng-sinks
+ flume-hdfs-sink
+ Flume NG HDFS Sink
+
+
+
+
+ org.apache.rat
+ apache-rat-plugin
+
+
+
+
+
+
+
+ org.apache.flume
+ flume-ng-sdk
+
+
+
+ org.apache.flume
+ flume-ng-configuration
+
+
+
+ org.apache.flume
+ flume-ng-core
+
+
+
+ org.slf4j
+ slf4j-api
+
+
+
+ com.google.guava
+ guava
+
+
+
+ junit
+ junit
+ test
+
+
+
+ org.slf4j
+ slf4j-log4j12
+ test
+
+
+
+ org.mockito
+ mockito-all
+ test
+
+
+
+ org.apache.hadoop
+ ${hadoop.common.artifact.id}
+ true
+
+
+
+ commons-lang
+ commons-lang
+
+
+
+ commons-io
+ commons-io
+
+
+
+
+
+
+
+ hadoop-1.0
+
+
+ flume.hadoop.profile
+ 1
+
+
+
+
+
+ org.apache.hadoop
+ hadoop-test
+ test
+
+
+
+
+ com.sun.jersey
+ jersey-core
+ test
+
+
+
+
+
+
+ hadoop-2
+
+
+ flume.hadoop.profile
+ 2
+
+
+
+
+
+ org.apache.hadoop
+ hadoop-hdfs
+ true
+
+
+
+ org.apache.hadoop
+ hadoop-auth
+ true
+
+
+
+ org.apache.hadoop
+ hadoop-minicluster
+ test
+
+
+
+
+
+
+ hbase-1
+
+
+ !flume.hadoop.profile
+
+
+
+
+
+ org.apache.hadoop
+ hadoop-hdfs
+ true
+
+
+
+ org.apache.hadoop
+ hadoop-auth
+ true
+
+
+
+ org.apache.hadoop
+ hadoop-minicluster
+ test
+
+
+
+
+
+
+
diff --git a/code/flume-ng-sinks/flume-hdfs-sink/src/main/java/org/apache/flume/sink/hdfs/AbstractHDFSWriter.java b/code/flume-ng-sinks/flume-hdfs-sink/src/main/java/org/apache/flume/sink/hdfs/AbstractHDFSWriter.java
new file mode 100644
index 0000000..2fe309f
--- /dev/null
+++ b/code/flume-ng-sinks/flume-hdfs-sink/src/main/java/org/apache/flume/sink/hdfs/AbstractHDFSWriter.java
@@ -0,0 +1,280 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.flume.sink.hdfs;
+
+import com.google.common.base.Preconditions;
+import org.apache.flume.Context;
+import org.apache.flume.FlumeException;
+import org.apache.flume.annotations.InterfaceAudience;
+import org.apache.flume.annotations.InterfaceStability;
+import org.apache.hadoop.fs.FSDataOutputStream;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.IOException;
+import java.io.OutputStream;
+import java.lang.reflect.InvocationTargetException;
+import java.lang.reflect.Method;
+
+@InterfaceAudience.Private
+@InterfaceStability.Evolving
+public abstract class AbstractHDFSWriter implements HDFSWriter {
+
+ private static final Logger logger =
+ LoggerFactory.getLogger(AbstractHDFSWriter.class);
+
+ private FSDataOutputStream outputStream;
+ private FileSystem fs;
+ private Path destPath;
+ private Method refGetNumCurrentReplicas = null;
+ private Method refGetDefaultReplication = null;
+ private Method refHflushOrSync = null;
+ private Integer configuredMinReplicas = null;
+ private Integer numberOfCloseRetries = null;
+ private long timeBetweenCloseRetries = Long.MAX_VALUE;
+
+ static final Object[] NO_ARGS = new Object[]{};
+
+ @Override
+ public void configure(Context context) {
+ configuredMinReplicas = context.getInteger("hdfs.minBlockReplicas");
+ if (configuredMinReplicas != null) {
+ Preconditions.checkArgument(configuredMinReplicas >= 0,
+ "hdfs.minBlockReplicas must be greater than or equal to 0");
+ }
+ numberOfCloseRetries = context.getInteger("hdfs.closeTries", 1) - 1;
+
+ if (numberOfCloseRetries > 1) {
+ try {
+ timeBetweenCloseRetries = context.getLong("hdfs.callTimeout", 10000L);
+ } catch (NumberFormatException e) {
+ logger.warn("hdfs.callTimeout can not be parsed to a long: " +
+ context.getLong("hdfs.callTimeout"));
+ }
+ timeBetweenCloseRetries = Math.max(timeBetweenCloseRetries / numberOfCloseRetries, 1000);
+ }
+
+ }
+
+ /**
+ * Contract for subclasses: Call registerCurrentStream() on open,
+ * unregisterCurrentStream() on close, and the base class takes care of the
+ * rest.
+ * @return
+ */
+ @Override
+ public boolean isUnderReplicated() {
+ try {
+ int numBlocks = getNumCurrentReplicas();
+ if (numBlocks == -1) {
+ return false;
+ }
+ int desiredBlocks;
+ if (configuredMinReplicas != null) {
+ desiredBlocks = configuredMinReplicas;
+ } else {
+ desiredBlocks = getFsDesiredReplication();
+ }
+ return numBlocks < desiredBlocks;
+ } catch (IllegalAccessException e) {
+ logger.error("Unexpected error while checking replication factor", e);
+ } catch (InvocationTargetException e) {
+ logger.error("Unexpected error while checking replication factor", e);
+ } catch (IllegalArgumentException e) {
+ logger.error("Unexpected error while checking replication factor", e);
+ }
+ return false;
+ }
+
+ protected void registerCurrentStream(FSDataOutputStream outputStream,
+ FileSystem fs, Path destPath) {
+ Preconditions.checkNotNull(outputStream, "outputStream must not be null");
+ Preconditions.checkNotNull(fs, "fs must not be null");
+ Preconditions.checkNotNull(destPath, "destPath must not be null");
+
+ this.outputStream = outputStream;
+ this.fs = fs;
+ this.destPath = destPath;
+ this.refGetNumCurrentReplicas = reflectGetNumCurrentReplicas(outputStream);
+ this.refGetDefaultReplication = reflectGetDefaultReplication(fs);
+ this.refHflushOrSync = reflectHflushOrSync(outputStream);
+
+ }
+
+ protected void unregisterCurrentStream() {
+ this.outputStream = null;
+ this.fs = null;
+ this.destPath = null;
+ this.refGetNumCurrentReplicas = null;
+ this.refGetDefaultReplication = null;
+ }
+
+ public int getFsDesiredReplication() {
+ short replication = 0;
+ if (fs != null && destPath != null) {
+ if (refGetDefaultReplication != null) {
+ try {
+ replication = (Short) refGetDefaultReplication.invoke(fs, destPath);
+ } catch (IllegalAccessException e) {
+ logger.warn("Unexpected error calling getDefaultReplication(Path)", e);
+ } catch (InvocationTargetException e) {
+ logger.warn("Unexpected error calling getDefaultReplication(Path)", e);
+ }
+ } else {
+ // will not work on Federated HDFS (see HADOOP-8014)
+ replication = fs.getDefaultReplication();
+ }
+ }
+ return replication;
+ }
+
+ /**
+ * This method gets the datanode replication count for the current open file.
+ *
+ * If the pipeline isn't started yet or is empty, you will get the default
+ * replication factor.
+ *
+ * If this function returns -1, it means you
+ * are not properly running with the HDFS-826 patch.
+ * @throws InvocationTargetException
+ * @throws IllegalAccessException
+ * @throws IllegalArgumentException
+ */
+ public int getNumCurrentReplicas()
+ throws IllegalArgumentException, IllegalAccessException,
+ InvocationTargetException {
+ if (refGetNumCurrentReplicas != null && outputStream != null) {
+ OutputStream dfsOutputStream = outputStream.getWrappedStream();
+ if (dfsOutputStream != null) {
+ Object repl = refGetNumCurrentReplicas.invoke(dfsOutputStream, NO_ARGS);
+ if (repl instanceof Integer) {
+ return ((Integer)repl).intValue();
+ }
+ }
+ }
+ return -1;
+ }
+
+ /**
+ * Find the 'getNumCurrentReplicas' on the passed os stream.
+ * @return Method or null.
+ */
+ private Method reflectGetNumCurrentReplicas(FSDataOutputStream os) {
+ Method m = null;
+ if (os != null) {
+ Class extends OutputStream> wrappedStreamClass = os.getWrappedStream()
+ .getClass();
+ try {
+ m = wrappedStreamClass.getDeclaredMethod("getNumCurrentReplicas",
+ new Class>[] {});
+ m.setAccessible(true);
+ } catch (NoSuchMethodException e) {
+ logger.info("FileSystem's output stream doesn't support"
+ + " getNumCurrentReplicas; --HDFS-826 not available; fsOut="
+ + wrappedStreamClass.getName() + "; err=" + e);
+ } catch (SecurityException e) {
+ logger.info("Doesn't have access to getNumCurrentReplicas on "
+ + "FileSystems's output stream --HDFS-826 not available; fsOut="
+ + wrappedStreamClass.getName(), e);
+ m = null; // could happen on setAccessible()
+ }
+ }
+ if (m != null) {
+ logger.debug("Using getNumCurrentReplicas--HDFS-826");
+ }
+ return m;
+ }
+
+ /**
+ * Find the 'getDefaultReplication' method on the passed fs
+ * FileSystem that takes a Path argument.
+ * @return Method or null.
+ */
+ private Method reflectGetDefaultReplication(FileSystem fileSystem) {
+ Method m = null;
+ if (fileSystem != null) {
+ Class> fsClass = fileSystem.getClass();
+ try {
+ m = fsClass.getMethod("getDefaultReplication",
+ new Class>[] { Path.class });
+ } catch (NoSuchMethodException e) {
+ logger.debug("FileSystem implementation doesn't support"
+ + " getDefaultReplication(Path); -- HADOOP-8014 not available; " +
+ "className = " + fsClass.getName() + "; err = " + e);
+ } catch (SecurityException e) {
+ logger.debug("No access to getDefaultReplication(Path) on "
+ + "FileSystem implementation -- HADOOP-8014 not available; " +
+ "className = " + fsClass.getName() + "; err = " + e);
+ }
+ }
+ if (m != null) {
+ logger.debug("Using FileSystem.getDefaultReplication(Path) from " +
+ "HADOOP-8014");
+ }
+ return m;
+ }
+
+ private Method reflectHflushOrSync(FSDataOutputStream os) {
+ Method m = null;
+ if (os != null) {
+ Class> fsDataOutputStreamClass = os.getClass();
+ try {
+ m = fsDataOutputStreamClass.getMethod("hflush");
+ } catch (NoSuchMethodException ex) {
+ logger.debug("HFlush not found. Will use sync() instead");
+ try {
+ m = fsDataOutputStreamClass.getMethod("sync");
+ } catch (Exception ex1) {
+ String msg = "Neither hflush not sync were found. That seems to be " +
+ "a problem!";
+ logger.error(msg);
+ throw new FlumeException(msg, ex1);
+ }
+ }
+ }
+ return m;
+ }
+
+ /**
+ * If hflush is available in this version of HDFS, then this method calls
+ * hflush, else it calls sync.
+ * @param os - The stream to flush/sync
+ * @throws IOException
+ */
+ protected void hflushOrSync(FSDataOutputStream os) throws IOException {
+ try {
+ // At this point the refHflushOrSync cannot be null,
+ // since register method would have thrown if it was.
+ this.refHflushOrSync.invoke(os);
+ } catch (InvocationTargetException e) {
+ String msg = "Error while trying to hflushOrSync!";
+ logger.error(msg);
+ Throwable cause = e.getCause();
+ if (cause != null && cause instanceof IOException) {
+ throw (IOException)cause;
+ }
+ throw new FlumeException(msg, e);
+ } catch (Exception e) {
+ String msg = "Error while trying to hflushOrSync!";
+ logger.error(msg);
+ throw new FlumeException(msg, e);
+ }
+ }
+}
diff --git a/code/flume-ng-sinks/flume-hdfs-sink/src/main/java/org/apache/flume/sink/hdfs/AvroEventSerializer.java b/code/flume-ng-sinks/flume-hdfs-sink/src/main/java/org/apache/flume/sink/hdfs/AvroEventSerializer.java
new file mode 100644
index 0000000..3231742
--- /dev/null
+++ b/code/flume-ng-sinks/flume-hdfs-sink/src/main/java/org/apache/flume/sink/hdfs/AvroEventSerializer.java
@@ -0,0 +1,211 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.flume.sink.hdfs;
+
+import org.apache.avro.AvroRuntimeException;
+import org.apache.avro.Schema;
+import org.apache.avro.file.CodecFactory;
+import org.apache.avro.file.DataFileWriter;
+import org.apache.avro.generic.GenericDatumWriter;
+import org.apache.avro.io.DatumWriter;
+import org.apache.flume.Context;
+import org.apache.flume.Event;
+import org.apache.flume.FlumeException;
+import org.apache.flume.conf.Configurable;
+import org.apache.flume.serialization.EventSerializer;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FSDataInputStream;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.net.URL;
+import java.nio.ByteBuffer;
+import java.util.HashMap;
+import java.util.Locale;
+import java.util.Map;
+
+import static org.apache.flume.serialization.AvroEventSerializerConfigurationConstants.COMPRESSION_CODEC;
+import static org.apache.flume.serialization.AvroEventSerializerConfigurationConstants.DEFAULT_COMPRESSION_CODEC;
+import static org.apache.flume.serialization.AvroEventSerializerConfigurationConstants.DEFAULT_STATIC_SCHEMA_URL;
+import static org.apache.flume.serialization.AvroEventSerializerConfigurationConstants.DEFAULT_SYNC_INTERVAL_BYTES;
+import static org.apache.flume.serialization.AvroEventSerializerConfigurationConstants.STATIC_SCHEMA_URL;
+import static org.apache.flume.serialization.AvroEventSerializerConfigurationConstants.SYNC_INTERVAL_BYTES;
+
+/**
+ *
+ * This class serializes Flume {@linkplain org.apache.flume.Event events} into Avro data files. The
+ * Flume event body is read as an Avro datum, and is then written to the
+ * {@link org.apache.flume.serialization.EventSerializer}'s output stream in Avro data file format.
+ *
+ *
+ * The Avro schema is determined by reading a Flume event header. The schema may be
+ * specified either as a literal, by setting {@link #AVRO_SCHEMA_LITERAL_HEADER} (not
+ * recommended, since the full schema must be transmitted in every event),
+ * or as a URL which the schema may be read from, by setting {@link
+ * #AVRO_SCHEMA_URL_HEADER}. Schemas read from URLs are cached by instances of this
+ * class so that the overhead of retrieval is minimized.
+ *
+ */
+public class AvroEventSerializer implements EventSerializer, Configurable {
+
+ private static final Logger logger =
+ LoggerFactory.getLogger(AvroEventSerializer.class);
+
+ public static final String AVRO_SCHEMA_LITERAL_HEADER = "flume.avro.schema.literal";
+ public static final String AVRO_SCHEMA_URL_HEADER = "flume.avro.schema.url";
+
+ private final OutputStream out;
+ private DatumWriter
+ *
+ * This can be used to send events to ElasticSearch and use clients such as
+ * Kabana which expect Logstash formated indexes
+ *
+ *
+ * {
+ * "@timestamp": "2010-12-21T21:48:33.309258Z",
+ * "@tags": [ "array", "of", "tags" ],
+ * "@type": "string",
+ * "@source": "source of the event, usually a URL."
+ * "@source_host": ""
+ * "@source_path": ""
+ * "@fields":{
+ * # a set of fields for this event
+ * "user": "jordan",
+ * "command": "shutdown -r":
+ * }
+ * "@message": "the original plain-text message"
+ * }
+ *
+ *
+ * If the following headers are present, they will map to the above logstash
+ * output as long as the logstash fields are not already present.
+ *
+ * @see https
+ * ://github.com/logstash/logstash/wiki/logstash%27s-internal-message-
+ * format
+ */
+public class ElasticSearchLogStashEventSerializer implements
+ ElasticSearchEventSerializer {
+
+ @Override
+ public XContentBuilder getContentBuilder(Event event) throws IOException {
+ XContentBuilder builder = jsonBuilder().startObject();
+ appendBody(builder, event);
+ appendHeaders(builder, event);
+ return builder;
+ }
+
+ private void appendBody(XContentBuilder builder, Event event)
+ throws IOException, UnsupportedEncodingException {
+ byte[] body = event.getBody();
+ ContentBuilderUtil.appendField(builder, "@message", body);
+ }
+
+ private void appendHeaders(XContentBuilder builder, Event event)
+ throws IOException {
+ Map headers = Maps.newHashMap(event.getHeaders());
+
+ String timestamp = headers.get("timestamp");
+ if (!StringUtils.isBlank(timestamp)
+ && StringUtils.isBlank(headers.get("@timestamp"))) {
+ long timestampMs = Long.parseLong(timestamp);
+ builder.field("@timestamp", new Date(timestampMs));
+ }
+
+ String source = headers.get("source");
+ if (!StringUtils.isBlank(source)
+ && StringUtils.isBlank(headers.get("@source"))) {
+ ContentBuilderUtil.appendField(builder, "@source",
+ source.getBytes(charset));
+ }
+
+ String type = headers.get("type");
+ if (!StringUtils.isBlank(type)
+ && StringUtils.isBlank(headers.get("@type"))) {
+ ContentBuilderUtil.appendField(builder, "@type", type.getBytes(charset));
+ }
+
+ String host = headers.get("host");
+ if (!StringUtils.isBlank(host)
+ && StringUtils.isBlank(headers.get("@source_host"))) {
+ ContentBuilderUtil.appendField(builder, "@source_host",
+ host.getBytes(charset));
+ }
+
+ String srcPath = headers.get("src_path");
+ if (!StringUtils.isBlank(srcPath)
+ && StringUtils.isBlank(headers.get("@source_path"))) {
+ ContentBuilderUtil.appendField(builder, "@source_path",
+ srcPath.getBytes(charset));
+ }
+
+ builder.startObject("@fields");
+ for (String key : headers.keySet()) {
+ byte[] val = headers.get(key).getBytes(charset);
+ ContentBuilderUtil.appendField(builder, key, val);
+ }
+ builder.endObject();
+ }
+
+ @Override
+ public void configure(Context context) {
+ // NO-OP...
+ }
+
+ @Override
+ public void configure(ComponentConfiguration conf) {
+ // NO-OP...
+ }
+}
diff --git a/code/flume-ng-sinks/flume-ng-elasticsearch-sink/src/main/java/org/apache/flume/sink/elasticsearch/ElasticSearchSink.java b/code/flume-ng-sinks/flume-ng-elasticsearch-sink/src/main/java/org/apache/flume/sink/elasticsearch/ElasticSearchSink.java
new file mode 100644
index 0000000..ebafb9f
--- /dev/null
+++ b/code/flume-ng-sinks/flume-ng-elasticsearch-sink/src/main/java/org/apache/flume/sink/elasticsearch/ElasticSearchSink.java
@@ -0,0 +1,428 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.flume.sink.elasticsearch;
+
+import static org.apache.flume.sink.elasticsearch.ElasticSearchSinkConstants.BATCH_SIZE;
+import static org.apache.flume.sink.elasticsearch.ElasticSearchSinkConstants.CLUSTER_NAME;
+import static org.apache.flume.sink.elasticsearch.ElasticSearchSinkConstants.DEFAULT_CLUSTER_NAME;
+import static org.apache.flume.sink.elasticsearch.ElasticSearchSinkConstants.DEFAULT_INDEX_NAME;
+import static org.apache.flume.sink.elasticsearch.ElasticSearchSinkConstants.DEFAULT_INDEX_TYPE;
+import static org.apache.flume.sink.elasticsearch.ElasticSearchSinkConstants.DEFAULT_TTL;
+import static org.apache.flume.sink.elasticsearch.ElasticSearchSinkConstants.HOSTNAMES;
+import static org.apache.flume.sink.elasticsearch.ElasticSearchSinkConstants.INDEX_NAME;
+import static org.apache.flume.sink.elasticsearch.ElasticSearchSinkConstants.INDEX_TYPE;
+import static org.apache.flume.sink.elasticsearch.ElasticSearchSinkConstants.SERIALIZER;
+import static org.apache.flume.sink.elasticsearch.ElasticSearchSinkConstants.SERIALIZER_PREFIX;
+import static org.apache.flume.sink.elasticsearch.ElasticSearchSinkConstants.TTL;
+import static org.apache.flume.sink.elasticsearch.ElasticSearchSinkConstants.TTL_REGEX;
+import org.apache.commons.lang.StringUtils;
+import org.apache.flume.Channel;
+import org.apache.flume.Context;
+import org.apache.flume.CounterGroup;
+import org.apache.flume.Event;
+import org.apache.flume.EventDeliveryException;
+import org.apache.flume.Transaction;
+import org.apache.flume.formatter.output.BucketPath;
+import org.apache.flume.conf.Configurable;
+import org.apache.flume.instrumentation.SinkCounter;
+import org.apache.flume.sink.AbstractSink;
+import org.apache.flume.sink.elasticsearch.client.ElasticSearchClient;
+import org.apache.flume.sink.elasticsearch.client.ElasticSearchClientFactory;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import com.google.common.annotations.VisibleForTesting;
+import com.google.common.base.Preconditions;
+import com.google.common.base.Throwables;
+
+import java.util.concurrent.TimeUnit;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+import static org.apache.flume.sink.elasticsearch.ElasticSearchSinkConstants.CLIENT_PREFIX;
+import static org.apache.flume.sink.elasticsearch.ElasticSearchSinkConstants.CLIENT_TYPE;
+import static org.apache.flume.sink.elasticsearch.ElasticSearchSinkConstants.DEFAULT_CLIENT_TYPE;
+import static org.apache.flume.sink.elasticsearch.ElasticSearchSinkConstants.DEFAULT_INDEX_NAME_BUILDER_CLASS;
+import static org.apache.flume.sink.elasticsearch.ElasticSearchSinkConstants.DEFAULT_SERIALIZER_CLASS;
+import static org.apache.flume.sink.elasticsearch.ElasticSearchSinkConstants.INDEX_NAME_BUILDER;
+import static org.apache.flume.sink.elasticsearch.ElasticSearchSinkConstants.INDEX_NAME_BUILDER_PREFIX;
+
+/**
+ * A sink which reads events from a channel and writes them to ElasticSearch
+ * based on the work done by https://github.com/Aconex/elasticflume.git.
+ *
+ * This sink supports batch reading of events from the channel and writing them
+ * to ElasticSearch.
+ *
+ * Indexes will be rolled daily using the format 'indexname-YYYY-MM-dd' to allow
+ * easier management of the index
+ *
+ * This sink must be configured with with mandatory parameters detailed in
+ * {@link ElasticSearchSinkConstants} It is recommended as a secondary step
+ * the ElasticSearch indexes are optimized for the specified serializer. This is
+ * not handled by the sink but is typically done by deploying a config template
+ * alongside the ElasticSearch deploy
+ *
+ * @see http
+ * ://www.elasticsearch.org/guide/reference/api/admin-indices-templates.
+ * html
+ */
+public class ElasticSearchSink extends AbstractSink implements Configurable {
+
+ private static final Logger logger = LoggerFactory
+ .getLogger(ElasticSearchSink.class);
+
+ // Used for testing
+ private boolean isLocal = false;
+ private final CounterGroup counterGroup = new CounterGroup();
+
+ private static final int defaultBatchSize = 100;
+
+ private int batchSize = defaultBatchSize;
+ private long ttlMs = DEFAULT_TTL;
+ private String clusterName = DEFAULT_CLUSTER_NAME;
+ private String indexName = DEFAULT_INDEX_NAME;
+ private String indexType = DEFAULT_INDEX_TYPE;
+ private String clientType = DEFAULT_CLIENT_TYPE;
+ private final Pattern pattern = Pattern.compile(TTL_REGEX,
+ Pattern.CASE_INSENSITIVE);
+ private Matcher matcher = pattern.matcher("");
+
+ private String[] serverAddresses = null;
+
+ private ElasticSearchClient client = null;
+ private Context elasticSearchClientContext = null;
+
+ private ElasticSearchIndexRequestBuilderFactory indexRequestFactory;
+ private ElasticSearchEventSerializer eventSerializer;
+ private IndexNameBuilder indexNameBuilder;
+ private SinkCounter sinkCounter;
+
+ /**
+ * Create an {@link ElasticSearchSink} configured using the supplied
+ * configuration
+ */
+ public ElasticSearchSink() {
+ this(false);
+ }
+
+ /**
+ * Create an {@link ElasticSearchSink}
+ *
+ * @param isLocal
+ * If true sink will be configured to only talk to an
+ * ElasticSearch instance hosted in the same JVM, should always be
+ * false is production
+ *
+ */
+ @VisibleForTesting
+ ElasticSearchSink(boolean isLocal) {
+ this.isLocal = isLocal;
+ }
+
+ @VisibleForTesting
+ String[] getServerAddresses() {
+ return serverAddresses;
+ }
+
+ @VisibleForTesting
+ String getClusterName() {
+ return clusterName;
+ }
+
+ @VisibleForTesting
+ String getIndexName() {
+ return indexName;
+ }
+
+ @VisibleForTesting
+ String getIndexType() {
+ return indexType;
+ }
+
+ @VisibleForTesting
+ long getTTLMs() {
+ return ttlMs;
+ }
+
+ @VisibleForTesting
+ ElasticSearchEventSerializer getEventSerializer() {
+ return eventSerializer;
+ }
+
+ @VisibleForTesting
+ IndexNameBuilder getIndexNameBuilder() {
+ return indexNameBuilder;
+ }
+
+ @Override
+ public Status process() throws EventDeliveryException {
+ logger.debug("processing...");
+ Status status = Status.READY;
+ Channel channel = getChannel();
+ Transaction txn = channel.getTransaction();
+ try {
+ txn.begin();
+ int count;
+ for (count = 0; count < batchSize; ++count) {
+ Event event = channel.take();
+
+ if (event == null) {
+ break;
+ }
+ String realIndexType = BucketPath.escapeString(indexType, event.getHeaders());
+ client.addEvent(event, indexNameBuilder, realIndexType, ttlMs);
+ }
+
+ if (count <= 0) {
+ sinkCounter.incrementBatchEmptyCount();
+ counterGroup.incrementAndGet("channel.underflow");
+ status = Status.BACKOFF;
+ } else {
+ if (count < batchSize) {
+ sinkCounter.incrementBatchUnderflowCount();
+ status = Status.BACKOFF;
+ } else {
+ sinkCounter.incrementBatchCompleteCount();
+ }
+
+ sinkCounter.addToEventDrainAttemptCount(count);
+ client.execute();
+ }
+ txn.commit();
+ sinkCounter.addToEventDrainSuccessCount(count);
+ counterGroup.incrementAndGet("transaction.success");
+ } catch (Throwable ex) {
+ try {
+ txn.rollback();
+ counterGroup.incrementAndGet("transaction.rollback");
+ } catch (Exception ex2) {
+ logger.error(
+ "Exception in rollback. Rollback might not have been successful.",
+ ex2);
+ }
+
+ if (ex instanceof Error || ex instanceof RuntimeException) {
+ logger.error("Failed to commit transaction. Transaction rolled back.",
+ ex);
+ Throwables.propagate(ex);
+ } else {
+ logger.error("Failed to commit transaction. Transaction rolled back.",
+ ex);
+ throw new EventDeliveryException(
+ "Failed to commit transaction. Transaction rolled back.", ex);
+ }
+ } finally {
+ txn.close();
+ }
+ return status;
+ }
+
+ @Override
+ public void configure(Context context) {
+ if (!isLocal) {
+ if (StringUtils.isNotBlank(context.getString(HOSTNAMES))) {
+ serverAddresses = StringUtils.deleteWhitespace(
+ context.getString(HOSTNAMES)).split(",");
+ }
+ Preconditions.checkState(serverAddresses != null
+ && serverAddresses.length > 0, "Missing Param:" + HOSTNAMES);
+ }
+
+ if (StringUtils.isNotBlank(context.getString(INDEX_NAME))) {
+ this.indexName = context.getString(INDEX_NAME);
+ }
+
+ if (StringUtils.isNotBlank(context.getString(INDEX_TYPE))) {
+ this.indexType = context.getString(INDEX_TYPE);
+ }
+
+ if (StringUtils.isNotBlank(context.getString(CLUSTER_NAME))) {
+ this.clusterName = context.getString(CLUSTER_NAME);
+ }
+
+ if (StringUtils.isNotBlank(context.getString(BATCH_SIZE))) {
+ this.batchSize = Integer.parseInt(context.getString(BATCH_SIZE));
+ }
+
+ if (StringUtils.isNotBlank(context.getString(TTL))) {
+ this.ttlMs = parseTTL(context.getString(TTL));
+ Preconditions.checkState(ttlMs > 0, TTL
+ + " must be greater than 0 or not set.");
+ }
+
+ if (StringUtils.isNotBlank(context.getString(CLIENT_TYPE))) {
+ clientType = context.getString(CLIENT_TYPE);
+ }
+
+ elasticSearchClientContext = new Context();
+ elasticSearchClientContext.putAll(context.getSubProperties(CLIENT_PREFIX));
+
+ String serializerClazz = DEFAULT_SERIALIZER_CLASS;
+ if (StringUtils.isNotBlank(context.getString(SERIALIZER))) {
+ serializerClazz = context.getString(SERIALIZER);
+ }
+
+ Context serializerContext = new Context();
+ serializerContext.putAll(context.getSubProperties(SERIALIZER_PREFIX));
+
+ try {
+ @SuppressWarnings("unchecked")
+ Class extends Configurable> clazz = (Class extends Configurable>) Class
+ .forName(serializerClazz);
+ Configurable serializer = clazz.newInstance();
+
+ if (serializer instanceof ElasticSearchIndexRequestBuilderFactory) {
+ indexRequestFactory
+ = (ElasticSearchIndexRequestBuilderFactory) serializer;
+ indexRequestFactory.configure(serializerContext);
+ } else if (serializer instanceof ElasticSearchEventSerializer) {
+ eventSerializer = (ElasticSearchEventSerializer) serializer;
+ eventSerializer.configure(serializerContext);
+ } else {
+ throw new IllegalArgumentException(serializerClazz
+ + " is not an ElasticSearchEventSerializer");
+ }
+ } catch (Exception e) {
+ logger.error("Could not instantiate event serializer.", e);
+ Throwables.propagate(e);
+ }
+
+ if (sinkCounter == null) {
+ sinkCounter = new SinkCounter(getName());
+ }
+
+ String indexNameBuilderClass = DEFAULT_INDEX_NAME_BUILDER_CLASS;
+ if (StringUtils.isNotBlank(context.getString(INDEX_NAME_BUILDER))) {
+ indexNameBuilderClass = context.getString(INDEX_NAME_BUILDER);
+ }
+
+ Context indexnameBuilderContext = new Context();
+ serializerContext.putAll(
+ context.getSubProperties(INDEX_NAME_BUILDER_PREFIX));
+
+ try {
+ @SuppressWarnings("unchecked")
+ Class extends IndexNameBuilder> clazz
+ = (Class extends IndexNameBuilder>) Class
+ .forName(indexNameBuilderClass);
+ indexNameBuilder = clazz.newInstance();
+ indexnameBuilderContext.put(INDEX_NAME, indexName);
+ indexNameBuilder.configure(indexnameBuilderContext);
+ } catch (Exception e) {
+ logger.error("Could not instantiate index name builder.", e);
+ Throwables.propagate(e);
+ }
+
+ if (sinkCounter == null) {
+ sinkCounter = new SinkCounter(getName());
+ }
+
+ Preconditions.checkState(StringUtils.isNotBlank(indexName),
+ "Missing Param:" + INDEX_NAME);
+ Preconditions.checkState(StringUtils.isNotBlank(indexType),
+ "Missing Param:" + INDEX_TYPE);
+ Preconditions.checkState(StringUtils.isNotBlank(clusterName),
+ "Missing Param:" + CLUSTER_NAME);
+ Preconditions.checkState(batchSize >= 1, BATCH_SIZE
+ + " must be greater than 0");
+ }
+
+ @Override
+ public void start() {
+ ElasticSearchClientFactory clientFactory = new ElasticSearchClientFactory();
+
+ logger.info("ElasticSearch sink {} started");
+ sinkCounter.start();
+ try {
+ if (isLocal) {
+ client = clientFactory.getLocalClient(
+ clientType, eventSerializer, indexRequestFactory);
+ } else {
+ client = clientFactory.getClient(clientType, serverAddresses,
+ clusterName, eventSerializer, indexRequestFactory);
+ client.configure(elasticSearchClientContext);
+ }
+ sinkCounter.incrementConnectionCreatedCount();
+ } catch (Exception ex) {
+ ex.printStackTrace();
+ sinkCounter.incrementConnectionFailedCount();
+ if (client != null) {
+ client.close();
+ sinkCounter.incrementConnectionClosedCount();
+ }
+ }
+
+ super.start();
+ }
+
+ @Override
+ public void stop() {
+ logger.info("ElasticSearch sink {} stopping");
+ if (client != null) {
+ client.close();
+ }
+ sinkCounter.incrementConnectionClosedCount();
+ sinkCounter.stop();
+ super.stop();
+ }
+
+ /*
+ * Returns TTL value of ElasticSearch index in milliseconds when TTL specifier
+ * is "ms" / "s" / "m" / "h" / "d" / "w". In case of unknown specifier TTL is
+ * not set. When specifier is not provided it defaults to days in milliseconds
+ * where the number of days is parsed integer from TTL string provided by
+ * user.
Elasticsearch supports ttl values being provided in the format:
+ * 1d / 1w / 1ms / 1s / 1h / 1m specify a time unit like d (days), m
+ * (minutes), h (hours), ms (milliseconds) or w (weeks), milliseconds is used
+ * as default unit.
+ * http://www.elasticsearch.org/guide/reference/mapping/ttl-field/.
+ *
+ * @param ttl TTL value provided by user in flume configuration file for the
+ * sink
+ *
+ * @return the ttl value in milliseconds
+ */
+ private long parseTTL(String ttl) {
+ matcher = matcher.reset(ttl);
+ while (matcher.find()) {
+ if (matcher.group(2).equals("ms")) {
+ return Long.parseLong(matcher.group(1));
+ } else if (matcher.group(2).equals("s")) {
+ return TimeUnit.SECONDS.toMillis(Integer.parseInt(matcher.group(1)));
+ } else if (matcher.group(2).equals("m")) {
+ return TimeUnit.MINUTES.toMillis(Integer.parseInt(matcher.group(1)));
+ } else if (matcher.group(2).equals("h")) {
+ return TimeUnit.HOURS.toMillis(Integer.parseInt(matcher.group(1)));
+ } else if (matcher.group(2).equals("d")) {
+ return TimeUnit.DAYS.toMillis(Integer.parseInt(matcher.group(1)));
+ } else if (matcher.group(2).equals("w")) {
+ return TimeUnit.DAYS.toMillis(7 * Integer.parseInt(matcher.group(1)));
+ } else if (matcher.group(2).equals("")) {
+ logger.info("TTL qualifier is empty. Defaulting to day qualifier.");
+ return TimeUnit.DAYS.toMillis(Integer.parseInt(matcher.group(1)));
+ } else {
+ logger.debug("Unknown TTL qualifier provided. Setting TTL to 0.");
+ return 0;
+ }
+ }
+ logger.info("TTL not provided. Skipping the TTL config by returning 0.");
+ return 0;
+ }
+}
diff --git a/code/flume-ng-sinks/flume-ng-elasticsearch-sink/src/main/java/org/apache/flume/sink/elasticsearch/ElasticSearchSinkConstants.java b/code/flume-ng-sinks/flume-ng-elasticsearch-sink/src/main/java/org/apache/flume/sink/elasticsearch/ElasticSearchSinkConstants.java
new file mode 100644
index 0000000..da88def
--- /dev/null
+++ b/code/flume-ng-sinks/flume-ng-elasticsearch-sink/src/main/java/org/apache/flume/sink/elasticsearch/ElasticSearchSinkConstants.java
@@ -0,0 +1,111 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.flume.sink.elasticsearch;
+
+public class ElasticSearchSinkConstants {
+
+ /**
+ * Comma separated list of hostname:port, if the port is not present the
+ * default port '9300' will be used
+ * Example:
+ *
+ * 127.0.0.1:92001,127.0.0.2:9300
+ *
+ */
+ public static final String HOSTNAMES = "hostNames";
+
+ /**
+ * The name to index the document to, defaults to 'flume'
+ * The current date in the format 'yyyy-MM-dd' will be appended to this name,
+ * for example 'foo' will result in a daily index of 'foo-yyyy-MM-dd'
+ */
+ public static final String INDEX_NAME = "indexName";
+
+ /**
+ * The type to index the document to, defaults to 'log'
+ */
+ public static final String INDEX_TYPE = "indexType";
+
+ /**
+ * Name of the ElasticSearch cluster to connect to
+ */
+ public static final String CLUSTER_NAME = "clusterName";
+
+ /**
+ * Maximum number of events the sink should take from the channel per
+ * transaction, if available. Defaults to 100
+ */
+ public static final String BATCH_SIZE = "batchSize";
+
+ /**
+ * TTL in days, when set will cause the expired documents to be deleted
+ * automatically, if not set documents will never be automatically deleted
+ */
+ public static final String TTL = "ttl";
+
+ /**
+ * The fully qualified class name of the serializer the sink should use.
+ */
+ public static final String SERIALIZER = "serializer";
+
+ /**
+ * Configuration to pass to the serializer.
+ */
+ public static final String SERIALIZER_PREFIX = SERIALIZER + ".";
+
+ /**
+ * The fully qualified class name of the index name builder the sink
+ * should use to determine name of index where the event should be sent.
+ */
+ public static final String INDEX_NAME_BUILDER = "indexNameBuilder";
+
+ /**
+ * The fully qualified class name of the index name builder the sink
+ * should use to determine name of index where the event should be sent.
+ */
+ public static final String INDEX_NAME_BUILDER_PREFIX
+ = INDEX_NAME_BUILDER + ".";
+
+ /**
+ * The client type used for sending bulks to ElasticSearch
+ */
+ public static final String CLIENT_TYPE = "client";
+
+ /**
+ * The client prefix to extract the configuration that will be passed to
+ * elasticsearch client.
+ */
+ public static final String CLIENT_PREFIX = CLIENT_TYPE + ".";
+
+ /**
+ * DEFAULTS USED BY THE SINK
+ */
+
+ public static final int DEFAULT_PORT = 9300;
+ public static final int DEFAULT_TTL = -1;
+ public static final String DEFAULT_INDEX_NAME = "flume";
+ public static final String DEFAULT_INDEX_TYPE = "log";
+ public static final String DEFAULT_CLUSTER_NAME = "elasticsearch";
+ public static final String DEFAULT_CLIENT_TYPE = "transport";
+ public static final String TTL_REGEX = "^(\\d+)(\\D*)";
+ public static final String DEFAULT_SERIALIZER_CLASS = "org.apache.flume." +
+ "sink.elasticsearch.ElasticSearchLogStashEventSerializer";
+ public static final String DEFAULT_INDEX_NAME_BUILDER_CLASS =
+ "org.apache.flume.sink.elasticsearch.TimeBasedIndexNameBuilder";
+}
diff --git a/code/flume-ng-sinks/flume-ng-elasticsearch-sink/src/main/java/org/apache/flume/sink/elasticsearch/EventSerializerIndexRequestBuilderFactory.java b/code/flume-ng-sinks/flume-ng-elasticsearch-sink/src/main/java/org/apache/flume/sink/elasticsearch/EventSerializerIndexRequestBuilderFactory.java
new file mode 100644
index 0000000..d6cca50
--- /dev/null
+++ b/code/flume-ng-sinks/flume-ng-elasticsearch-sink/src/main/java/org/apache/flume/sink/elasticsearch/EventSerializerIndexRequestBuilderFactory.java
@@ -0,0 +1,69 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.flume.sink.elasticsearch;
+
+import java.io.IOException;
+
+import org.apache.commons.lang.time.FastDateFormat;
+import org.apache.flume.Context;
+import org.apache.flume.Event;
+import org.apache.flume.conf.ComponentConfiguration;
+import org.elasticsearch.action.index.IndexRequestBuilder;
+import org.elasticsearch.common.io.BytesStream;
+
+/**
+ * Default implementation of {@link ElasticSearchIndexRequestBuilderFactory}.
+ * It serializes flume events using the
+ * {@link ElasticSearchEventSerializer} instance configured on the sink.
+ */
+public class EventSerializerIndexRequestBuilderFactory
+ extends AbstractElasticSearchIndexRequestBuilderFactory {
+
+ protected final ElasticSearchEventSerializer serializer;
+
+ public EventSerializerIndexRequestBuilderFactory(
+ ElasticSearchEventSerializer serializer) {
+ this(serializer, ElasticSearchIndexRequestBuilderFactory.df);
+ }
+
+ protected EventSerializerIndexRequestBuilderFactory(
+ ElasticSearchEventSerializer serializer, FastDateFormat fdf) {
+ super(fdf);
+ this.serializer = serializer;
+ }
+
+ @Override
+ public void configure(Context context) {
+ serializer.configure(context);
+ }
+
+ @Override
+ public void configure(ComponentConfiguration config) {
+ serializer.configure(config);
+ }
+
+ @Override
+ protected void prepareIndexRequest(IndexRequestBuilder indexRequest,
+ String indexName, String indexType, Event event) throws IOException {
+ BytesStream contentBuilder = serializer.getContentBuilder(event);
+ indexRequest.setIndex(indexName)
+ .setType(indexType)
+ .setSource(contentBuilder.bytes());
+ }
+}
diff --git a/code/flume-ng-sinks/flume-ng-elasticsearch-sink/src/main/java/org/apache/flume/sink/elasticsearch/IndexNameBuilder.java b/code/flume-ng-sinks/flume-ng-elasticsearch-sink/src/main/java/org/apache/flume/sink/elasticsearch/IndexNameBuilder.java
new file mode 100644
index 0000000..1dd4415
--- /dev/null
+++ b/code/flume-ng-sinks/flume-ng-elasticsearch-sink/src/main/java/org/apache/flume/sink/elasticsearch/IndexNameBuilder.java
@@ -0,0 +1,42 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.flume.sink.elasticsearch;
+
+import org.apache.flume.Event;
+import org.apache.flume.conf.Configurable;
+import org.apache.flume.conf.ConfigurableComponent;
+
+public interface IndexNameBuilder extends Configurable,
+ ConfigurableComponent {
+ /**
+ * Gets the name of the index to use for an index request
+ * @param event
+ * Event which determines index name
+ * @return index name of the form 'indexPrefix-indexDynamicName'
+ */
+ public String getIndexName(Event event);
+
+ /**
+ * Gets the prefix of index to use for an index request.
+ * @param event
+ * Event which determines index name
+ * @return Index prefix name
+ */
+ public String getIndexPrefix(Event event);
+}
diff --git a/code/flume-ng-sinks/flume-ng-elasticsearch-sink/src/main/java/org/apache/flume/sink/elasticsearch/SimpleIndexNameBuilder.java b/code/flume-ng-sinks/flume-ng-elasticsearch-sink/src/main/java/org/apache/flume/sink/elasticsearch/SimpleIndexNameBuilder.java
new file mode 100644
index 0000000..801cac9
--- /dev/null
+++ b/code/flume-ng-sinks/flume-ng-elasticsearch-sink/src/main/java/org/apache/flume/sink/elasticsearch/SimpleIndexNameBuilder.java
@@ -0,0 +1,46 @@
+/*
+ * Copyright 2014 Apache Software Foundation.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flume.sink.elasticsearch;
+
+import org.apache.flume.Context;
+import org.apache.flume.Event;
+import org.apache.flume.conf.ComponentConfiguration;
+import org.apache.flume.formatter.output.BucketPath;
+
+public class SimpleIndexNameBuilder implements IndexNameBuilder {
+
+ private String indexName;
+
+ @Override
+ public String getIndexName(Event event) {
+ return BucketPath.escapeString(indexName, event.getHeaders());
+ }
+
+ @Override
+ public String getIndexPrefix(Event event) {
+ return BucketPath.escapeString(indexName, event.getHeaders());
+ }
+
+ @Override
+ public void configure(Context context) {
+ indexName = context.getString(ElasticSearchSinkConstants.INDEX_NAME);
+ }
+
+ @Override
+ public void configure(ComponentConfiguration conf) {
+ }
+}
diff --git a/code/flume-ng-sinks/flume-ng-elasticsearch-sink/src/main/java/org/apache/flume/sink/elasticsearch/TimeBasedIndexNameBuilder.java b/code/flume-ng-sinks/flume-ng-elasticsearch-sink/src/main/java/org/apache/flume/sink/elasticsearch/TimeBasedIndexNameBuilder.java
new file mode 100644
index 0000000..c651732
--- /dev/null
+++ b/code/flume-ng-sinks/flume-ng-elasticsearch-sink/src/main/java/org/apache/flume/sink/elasticsearch/TimeBasedIndexNameBuilder.java
@@ -0,0 +1,92 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.flume.sink.elasticsearch;
+
+import com.google.common.annotations.VisibleForTesting;
+import org.apache.commons.lang.StringUtils;
+import org.apache.commons.lang.time.FastDateFormat;
+import org.apache.flume.Context;
+import org.apache.flume.Event;
+import org.apache.flume.conf.ComponentConfiguration;
+import org.apache.flume.formatter.output.BucketPath;
+
+import java.util.TimeZone;
+
+/**
+ * Default index name builder. It prepares name of index using configured
+ * prefix and current timestamp. Default format of name is prefix-yyyy-MM-dd".
+ */
+public class TimeBasedIndexNameBuilder implements
+ IndexNameBuilder {
+
+ public static final String DATE_FORMAT = "dateFormat";
+ public static final String TIME_ZONE = "timeZone";
+
+ public static final String DEFAULT_DATE_FORMAT = "yyyy-MM-dd";
+ public static final String DEFAULT_TIME_ZONE = "Etc/UTC";
+
+ private FastDateFormat fastDateFormat = FastDateFormat.getInstance("yyyy-MM-dd",
+ TimeZone.getTimeZone("Etc/UTC"));
+
+ private String indexPrefix;
+
+ @VisibleForTesting
+ FastDateFormat getFastDateFormat() {
+ return fastDateFormat;
+ }
+
+ /**
+ * Gets the name of the index to use for an index request
+ * @param event
+ * Event for which the name of index has to be prepared
+ * @return index name of the form 'indexPrefix-formattedTimestamp'
+ */
+ @Override
+ public String getIndexName(Event event) {
+ TimestampedEvent timestampedEvent = new TimestampedEvent(event);
+ long timestamp = timestampedEvent.getTimestamp();
+ String realIndexPrefix = BucketPath.escapeString(indexPrefix, event.getHeaders());
+ return new StringBuilder(realIndexPrefix).append('-')
+ .append(fastDateFormat.format(timestamp)).toString();
+ }
+
+ @Override
+ public String getIndexPrefix(Event event) {
+ return BucketPath.escapeString(indexPrefix, event.getHeaders());
+ }
+
+ @Override
+ public void configure(Context context) {
+ String dateFormatString = context.getString(DATE_FORMAT);
+ String timeZoneString = context.getString(TIME_ZONE);
+ if (StringUtils.isBlank(dateFormatString)) {
+ dateFormatString = DEFAULT_DATE_FORMAT;
+ }
+ if (StringUtils.isBlank(timeZoneString)) {
+ timeZoneString = DEFAULT_TIME_ZONE;
+ }
+ fastDateFormat = FastDateFormat.getInstance(dateFormatString,
+ TimeZone.getTimeZone(timeZoneString));
+ indexPrefix = context.getString(ElasticSearchSinkConstants.INDEX_NAME);
+ }
+
+ @Override
+ public void configure(ComponentConfiguration conf) {
+ }
+}
diff --git a/code/flume-ng-sinks/flume-ng-elasticsearch-sink/src/main/java/org/apache/flume/sink/elasticsearch/TimestampedEvent.java b/code/flume-ng-sinks/flume-ng-elasticsearch-sink/src/main/java/org/apache/flume/sink/elasticsearch/TimestampedEvent.java
new file mode 100644
index 0000000..c056839
--- /dev/null
+++ b/code/flume-ng-sinks/flume-ng-elasticsearch-sink/src/main/java/org/apache/flume/sink/elasticsearch/TimestampedEvent.java
@@ -0,0 +1,60 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.flume.sink.elasticsearch;
+
+import com.google.common.collect.Maps;
+import org.apache.commons.lang.StringUtils;
+import org.apache.flume.Event;
+import org.apache.flume.event.SimpleEvent;
+import org.joda.time.DateTimeUtils;
+
+import java.util.Map;
+
+/**
+ * {@link org.apache.flume.Event} implementation that has a timestamp.
+ * The timestamp is taken from (in order of precedence):
+ *
The "timestamp" header of the base event, if present
+ *
The "@timestamp" header of the base event, if present
+ *
The current time in millis, otherwise
+ *
+ */
+final class TimestampedEvent extends SimpleEvent {
+
+ private final long timestamp;
+
+ TimestampedEvent(Event base) {
+ setBody(base.getBody());
+ Map headers = Maps.newHashMap(base.getHeaders());
+ String timestampString = headers.get("timestamp");
+ if (StringUtils.isBlank(timestampString)) {
+ timestampString = headers.get("@timestamp");
+ }
+ if (StringUtils.isBlank(timestampString)) {
+ this.timestamp = DateTimeUtils.currentTimeMillis();
+ headers.put("timestamp", String.valueOf(timestamp ));
+ } else {
+ this.timestamp = Long.valueOf(timestampString);
+ }
+ setHeaders(headers);
+ }
+
+ long getTimestamp() {
+ return timestamp;
+ }
+}
diff --git a/code/flume-ng-sinks/flume-ng-elasticsearch-sink/src/main/java/org/apache/flume/sink/elasticsearch/client/ElasticSearchClient.java b/code/flume-ng-sinks/flume-ng-elasticsearch-sink/src/main/java/org/apache/flume/sink/elasticsearch/client/ElasticSearchClient.java
new file mode 100644
index 0000000..655e00a
--- /dev/null
+++ b/code/flume-ng-sinks/flume-ng-elasticsearch-sink/src/main/java/org/apache/flume/sink/elasticsearch/client/ElasticSearchClient.java
@@ -0,0 +1,58 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.flume.sink.elasticsearch.client;
+
+import org.apache.flume.Event;
+import org.apache.flume.conf.Configurable;
+import org.apache.flume.sink.elasticsearch.IndexNameBuilder;
+
+/**
+ * Interface for an ElasticSearch client which is responsible for sending bulks
+ * of events to ElasticSearch.
+ */
+public interface ElasticSearchClient extends Configurable {
+
+ /**
+ * Close connection to elastic search in client
+ */
+ void close();
+
+ /**
+ * Add new event to the bulk
+ *
+ * @param event
+ * Flume Event
+ * @param indexNameBuilder
+ * Index name builder which generates name of index to feed
+ * @param indexType
+ * Name of type of document which will be sent to the elasticsearch cluster
+ * @param ttlMs
+ * Time to live expressed in milliseconds. Value <= 0 is ignored
+ * @throws Exception
+ */
+ public void addEvent(Event event, IndexNameBuilder indexNameBuilder,
+ String indexType, long ttlMs) throws Exception;
+
+ /**
+ * Sends bulk to the elasticsearch cluster
+ *
+ * @throws Exception
+ */
+ void execute() throws Exception;
+}
diff --git a/code/flume-ng-sinks/flume-ng-elasticsearch-sink/src/main/java/org/apache/flume/sink/elasticsearch/client/ElasticSearchClientFactory.java b/code/flume-ng-sinks/flume-ng-elasticsearch-sink/src/main/java/org/apache/flume/sink/elasticsearch/client/ElasticSearchClientFactory.java
new file mode 100644
index 0000000..986fb2b
--- /dev/null
+++ b/code/flume-ng-sinks/flume-ng-elasticsearch-sink/src/main/java/org/apache/flume/sink/elasticsearch/client/ElasticSearchClientFactory.java
@@ -0,0 +1,78 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.flume.sink.elasticsearch.client;
+
+import org.apache.flume.sink.elasticsearch.ElasticSearchEventSerializer;
+import org.apache.flume.sink.elasticsearch.ElasticSearchIndexRequestBuilderFactory;
+
+/**
+ * Internal ElasticSearch client factory. Responsible for creating instance
+ * of ElasticSearch clients.
+ */
+public class ElasticSearchClientFactory {
+ public static final String TransportClient = "transport";
+ public static final String RestClient = "rest";
+
+ /**
+ *
+ * @param clientType
+ * String representation of client type
+ * @param hostNames
+ * Array of strings that represents hostnames with ports (hostname:port)
+ * @param clusterName
+ * Elasticsearch cluster name used only by Transport Client
+ * @param serializer
+ * Serializer of flume events to elasticsearch documents
+ * @return
+ */
+ public ElasticSearchClient getClient(String clientType, String[] hostNames,
+ String clusterName, ElasticSearchEventSerializer serializer,
+ ElasticSearchIndexRequestBuilderFactory indexBuilder) throws NoSuchClientTypeException {
+ if (clientType.equalsIgnoreCase(TransportClient) && serializer != null) {
+ return new ElasticSearchTransportClient(hostNames, clusterName, serializer);
+ } else if (clientType.equalsIgnoreCase(TransportClient) && indexBuilder != null) {
+ return new ElasticSearchTransportClient(hostNames, clusterName, indexBuilder);
+ } else if (clientType.equalsIgnoreCase(RestClient) && serializer != null) {
+ return new ElasticSearchRestClient(hostNames, serializer);
+ }
+ throw new NoSuchClientTypeException();
+ }
+
+ /**
+ * Used for tests only. Creates local elasticsearch instance client.
+ *
+ * @param clientType Name of client to use
+ * @param serializer Serializer for the event
+ * @param indexBuilder Index builder factory
+ *
+ * @return Local elastic search instance client
+ */
+ public ElasticSearchClient getLocalClient(String clientType,
+ ElasticSearchEventSerializer serializer,
+ ElasticSearchIndexRequestBuilderFactory indexBuilder)
+ throws NoSuchClientTypeException {
+ if (clientType.equalsIgnoreCase(TransportClient) && serializer != null) {
+ return new ElasticSearchTransportClient(serializer);
+ } else if (clientType.equalsIgnoreCase(TransportClient) && indexBuilder != null) {
+ return new ElasticSearchTransportClient(indexBuilder);
+ } else if (clientType.equalsIgnoreCase(RestClient)) {
+ }
+ throw new NoSuchClientTypeException();
+ }
+}
diff --git a/code/flume-ng-sinks/flume-ng-elasticsearch-sink/src/main/java/org/apache/flume/sink/elasticsearch/client/ElasticSearchRestClient.java b/code/flume-ng-sinks/flume-ng-elasticsearch-sink/src/main/java/org/apache/flume/sink/elasticsearch/client/ElasticSearchRestClient.java
new file mode 100644
index 0000000..e51efe2
--- /dev/null
+++ b/code/flume-ng-sinks/flume-ng-elasticsearch-sink/src/main/java/org/apache/flume/sink/elasticsearch/client/ElasticSearchRestClient.java
@@ -0,0 +1,149 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.flume.sink.elasticsearch.client;
+
+import com.google.common.annotations.VisibleForTesting;
+import com.google.gson.Gson;
+import org.apache.flume.Context;
+import org.apache.flume.Event;
+import org.apache.flume.EventDeliveryException;
+import org.apache.flume.sink.elasticsearch.ElasticSearchEventSerializer;
+import org.apache.flume.sink.elasticsearch.IndexNameBuilder;
+import org.apache.http.HttpResponse;
+import org.apache.http.HttpStatus;
+import org.apache.http.client.HttpClient;
+import org.apache.http.client.methods.HttpPost;
+import org.apache.http.entity.StringEntity;
+import org.apache.http.impl.client.DefaultHttpClient;
+import org.apache.http.util.EntityUtils;
+import org.elasticsearch.common.bytes.BytesReference;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.Map;
+
+/**
+ * Rest ElasticSearch client which is responsible for sending bulks of events to
+ * ElasticSearch using ElasticSearch HTTP API. This is configurable, so any
+ * config params required should be taken through this.
+ */
+public class ElasticSearchRestClient implements ElasticSearchClient {
+
+ private static final String INDEX_OPERATION_NAME = "index";
+ private static final String INDEX_PARAM = "_index";
+ private static final String TYPE_PARAM = "_type";
+ private static final String TTL_PARAM = "_ttl";
+ private static final String BULK_ENDPOINT = "_bulk";
+
+ private static final Logger logger = LoggerFactory.getLogger(ElasticSearchRestClient.class);
+
+ private final ElasticSearchEventSerializer serializer;
+ private final RoundRobinList serversList;
+
+ private StringBuilder bulkBuilder;
+ private HttpClient httpClient;
+
+ public ElasticSearchRestClient(String[] hostNames,
+ ElasticSearchEventSerializer serializer) {
+
+ for (int i = 0; i < hostNames.length; ++i) {
+ if (!hostNames[i].contains("http://") && !hostNames[i].contains("https://")) {
+ hostNames[i] = "http://" + hostNames[i];
+ }
+ }
+ this.serializer = serializer;
+
+ serversList = new RoundRobinList(Arrays.asList(hostNames));
+ httpClient = new DefaultHttpClient();
+ bulkBuilder = new StringBuilder();
+ }
+
+ @VisibleForTesting
+ public ElasticSearchRestClient(String[] hostNames,
+ ElasticSearchEventSerializer serializer, HttpClient client) {
+ this(hostNames, serializer);
+ httpClient = client;
+ }
+
+ @Override
+ public void configure(Context context) {
+ }
+
+ @Override
+ public void close() {
+ }
+
+ @Override
+ public void addEvent(Event event, IndexNameBuilder indexNameBuilder, String indexType,
+ long ttlMs) throws Exception {
+ BytesReference content = serializer.getContentBuilder(event).bytes();
+ Map> parameters = new HashMap>();
+ Map indexParameters = new HashMap();
+ indexParameters.put(INDEX_PARAM, indexNameBuilder.getIndexName(event));
+ indexParameters.put(TYPE_PARAM, indexType);
+ if (ttlMs > 0) {
+ indexParameters.put(TTL_PARAM, Long.toString(ttlMs));
+ }
+ parameters.put(INDEX_OPERATION_NAME, indexParameters);
+
+ Gson gson = new Gson();
+ synchronized (bulkBuilder) {
+ bulkBuilder.append(gson.toJson(parameters));
+ bulkBuilder.append("\n");
+ bulkBuilder.append(content.toBytesArray().toUtf8());
+ bulkBuilder.append("\n");
+ }
+ }
+
+ @Override
+ public void execute() throws Exception {
+ int statusCode = 0, triesCount = 0;
+ HttpResponse response = null;
+ String entity;
+ synchronized (bulkBuilder) {
+ entity = bulkBuilder.toString();
+ bulkBuilder = new StringBuilder();
+ }
+
+ while (statusCode != HttpStatus.SC_OK && triesCount < serversList.size()) {
+ triesCount++;
+ String host = serversList.get();
+ String url = host + "/" + BULK_ENDPOINT;
+ HttpPost httpRequest = new HttpPost(url);
+ httpRequest.setEntity(new StringEntity(entity));
+ response = httpClient.execute(httpRequest);
+ statusCode = response.getStatusLine().getStatusCode();
+ logger.info("Status code from elasticsearch: " + statusCode);
+ if (response.getEntity() != null) {
+ logger.debug("Status message from elasticsearch: " +
+ EntityUtils.toString(response.getEntity(), "UTF-8"));
+ }
+ }
+
+ if (statusCode != HttpStatus.SC_OK) {
+ if (response.getEntity() != null) {
+ throw new EventDeliveryException(EntityUtils.toString(response.getEntity(), "UTF-8"));
+ } else {
+ throw new EventDeliveryException("Elasticsearch status code was: " + statusCode);
+ }
+ }
+ }
+}
diff --git a/code/flume-ng-sinks/flume-ng-elasticsearch-sink/src/main/java/org/apache/flume/sink/elasticsearch/client/ElasticSearchTransportClient.java b/code/flume-ng-sinks/flume-ng-elasticsearch-sink/src/main/java/org/apache/flume/sink/elasticsearch/client/ElasticSearchTransportClient.java
new file mode 100644
index 0000000..2cf365e
--- /dev/null
+++ b/code/flume-ng-sinks/flume-ng-elasticsearch-sink/src/main/java/org/apache/flume/sink/elasticsearch/client/ElasticSearchTransportClient.java
@@ -0,0 +1,228 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.flume.sink.elasticsearch.client;
+
+import com.google.common.annotations.VisibleForTesting;
+import java.io.IOException;
+import org.apache.flume.Context;
+import org.apache.flume.Event;
+import org.apache.flume.EventDeliveryException;
+import org.apache.flume.sink.elasticsearch.ElasticSearchEventSerializer;
+import org.apache.flume.sink.elasticsearch.IndexNameBuilder;
+import org.elasticsearch.action.bulk.BulkRequestBuilder;
+import org.elasticsearch.action.bulk.BulkResponse;
+import org.elasticsearch.action.index.IndexRequestBuilder;
+import org.elasticsearch.client.Client;
+import org.elasticsearch.client.transport.TransportClient;
+import org.elasticsearch.common.settings.ImmutableSettings;
+import org.elasticsearch.common.settings.Settings;
+import org.elasticsearch.common.transport.InetSocketTransportAddress;
+import org.elasticsearch.node.Node;
+import org.elasticsearch.node.NodeBuilder;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.util.Arrays;
+import org.apache.flume.sink.elasticsearch.ElasticSearchIndexRequestBuilderFactory;
+
+import static org.apache.flume.sink.elasticsearch.ElasticSearchSinkConstants.DEFAULT_PORT;
+
+public class ElasticSearchTransportClient implements ElasticSearchClient {
+
+ public static final Logger logger = LoggerFactory
+ .getLogger(ElasticSearchTransportClient.class);
+
+ private InetSocketTransportAddress[] serverAddresses;
+ private ElasticSearchEventSerializer serializer;
+ private ElasticSearchIndexRequestBuilderFactory indexRequestBuilderFactory;
+ private BulkRequestBuilder bulkRequestBuilder;
+
+ private Client client;
+
+ @VisibleForTesting
+ InetSocketTransportAddress[] getServerAddresses() {
+ return serverAddresses;
+ }
+
+ @VisibleForTesting
+ void setBulkRequestBuilder(BulkRequestBuilder bulkRequestBuilder) {
+ this.bulkRequestBuilder = bulkRequestBuilder;
+ }
+
+ /**
+ * Transport client for external cluster
+ *
+ * @param hostNames
+ * @param clusterName
+ * @param serializer
+ */
+ public ElasticSearchTransportClient(String[] hostNames, String clusterName,
+ ElasticSearchEventSerializer serializer) {
+ configureHostnames(hostNames);
+ this.serializer = serializer;
+ openClient(clusterName);
+ }
+
+ public ElasticSearchTransportClient(String[] hostNames, String clusterName,
+ ElasticSearchIndexRequestBuilderFactory indexBuilder) {
+ configureHostnames(hostNames);
+ this.indexRequestBuilderFactory = indexBuilder;
+ openClient(clusterName);
+ }
+
+ /**
+ * Local transport client only for testing
+ *
+ * @param indexBuilderFactory
+ */
+ public ElasticSearchTransportClient(ElasticSearchIndexRequestBuilderFactory indexBuilderFactory) {
+ this.indexRequestBuilderFactory = indexBuilderFactory;
+ openLocalDiscoveryClient();
+ }
+
+ /**
+ * Local transport client only for testing
+ *
+ * @param serializer
+ */
+ public ElasticSearchTransportClient(ElasticSearchEventSerializer serializer) {
+ this.serializer = serializer;
+ openLocalDiscoveryClient();
+ }
+
+ /**
+ * Used for testing
+ *
+ * @param client
+ * ElasticSearch Client
+ * @param serializer
+ * Event Serializer
+ */
+ public ElasticSearchTransportClient(Client client,
+ ElasticSearchEventSerializer serializer) {
+ this.client = client;
+ this.serializer = serializer;
+ }
+
+ /**
+ * Used for testing
+ */
+ public ElasticSearchTransportClient(Client client,
+ ElasticSearchIndexRequestBuilderFactory requestBuilderFactory)
+ throws IOException {
+ this.client = client;
+ requestBuilderFactory.createIndexRequest(client, null, null, null);
+ }
+
+ private void configureHostnames(String[] hostNames) {
+ logger.warn(Arrays.toString(hostNames));
+ serverAddresses = new InetSocketTransportAddress[hostNames.length];
+ for (int i = 0; i < hostNames.length; i++) {
+ String[] hostPort = hostNames[i].trim().split(":");
+ String host = hostPort[0].trim();
+ int port = hostPort.length == 2 ? Integer.parseInt(hostPort[1].trim())
+ : DEFAULT_PORT;
+ serverAddresses[i] = new InetSocketTransportAddress(host, port);
+ }
+ }
+
+ @Override
+ public void close() {
+ if (client != null) {
+ client.close();
+ }
+ client = null;
+ }
+
+ @Override
+ public void addEvent(Event event, IndexNameBuilder indexNameBuilder,
+ String indexType, long ttlMs) throws Exception {
+ if (bulkRequestBuilder == null) {
+ bulkRequestBuilder = client.prepareBulk();
+ }
+
+ IndexRequestBuilder indexRequestBuilder = null;
+ if (indexRequestBuilderFactory == null) {
+ indexRequestBuilder = client
+ .prepareIndex(indexNameBuilder.getIndexName(event), indexType)
+ .setSource(serializer.getContentBuilder(event).bytes());
+ } else {
+ indexRequestBuilder = indexRequestBuilderFactory.createIndexRequest(
+ client, indexNameBuilder.getIndexPrefix(event), indexType, event);
+ }
+
+ if (ttlMs > 0) {
+ indexRequestBuilder.setTTL(ttlMs);
+ }
+ bulkRequestBuilder.add(indexRequestBuilder);
+ }
+
+ @Override
+ public void execute() throws Exception {
+ try {
+ BulkResponse bulkResponse = bulkRequestBuilder.execute().actionGet();
+ if (bulkResponse.hasFailures()) {
+ throw new EventDeliveryException(bulkResponse.buildFailureMessage());
+ }
+ } finally {
+ bulkRequestBuilder = client.prepareBulk();
+ }
+ }
+
+ /**
+ * Open client to elaticsearch cluster
+ *
+ * @param clusterName
+ */
+ private void openClient(String clusterName) {
+ logger.info("Using ElasticSearch hostnames: {} ",
+ Arrays.toString(serverAddresses));
+ Settings settings = ImmutableSettings.settingsBuilder()
+ .put("cluster.name", clusterName).build();
+
+ TransportClient transportClient = new TransportClient(settings);
+ for (InetSocketTransportAddress host : serverAddresses) {
+ transportClient.addTransportAddress(host);
+ }
+ if (client != null) {
+ client.close();
+ }
+ client = transportClient;
+ }
+
+ /*
+ * FOR TESTING ONLY...
+ *
+ * Opens a local discovery node for talking to an elasticsearch server running
+ * in the same JVM
+ */
+ private void openLocalDiscoveryClient() {
+ logger.info("Using ElasticSearch AutoDiscovery mode");
+ Node node = NodeBuilder.nodeBuilder().client(true).local(true).node();
+ if (client != null) {
+ client.close();
+ }
+ client = node.client();
+ }
+
+ @Override
+ public void configure(Context context) {
+ //To change body of implemented methods use File | Settings | File Templates.
+ }
+}
diff --git a/code/flume-ng-sinks/flume-ng-elasticsearch-sink/src/main/java/org/apache/flume/sink/elasticsearch/client/NoSuchClientTypeException.java b/code/flume-ng-sinks/flume-ng-elasticsearch-sink/src/main/java/org/apache/flume/sink/elasticsearch/client/NoSuchClientTypeException.java
new file mode 100644
index 0000000..41fbe0d
--- /dev/null
+++ b/code/flume-ng-sinks/flume-ng-elasticsearch-sink/src/main/java/org/apache/flume/sink/elasticsearch/client/NoSuchClientTypeException.java
@@ -0,0 +1,23 @@
+/*
+ * Copyright 2014 Apache Software Foundation.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flume.sink.elasticsearch.client;
+
+/**
+ * Exception class
+ */
+class NoSuchClientTypeException extends Exception {
+}
diff --git a/code/flume-ng-sinks/flume-ng-elasticsearch-sink/src/main/java/org/apache/flume/sink/elasticsearch/client/RoundRobinList.java b/code/flume-ng-sinks/flume-ng-elasticsearch-sink/src/main/java/org/apache/flume/sink/elasticsearch/client/RoundRobinList.java
new file mode 100644
index 0000000..4cbbe91
--- /dev/null
+++ b/code/flume-ng-sinks/flume-ng-elasticsearch-sink/src/main/java/org/apache/flume/sink/elasticsearch/client/RoundRobinList.java
@@ -0,0 +1,44 @@
+package org.apache.flume.sink.elasticsearch.client;
+
+import java.util.Collection;
+import java.util.Iterator;
+
+/*
+ * Copyright 2014 Apache Software Foundation.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class RoundRobinList {
+
+ private Iterator iterator;
+ private final Collection elements;
+
+ public RoundRobinList(Collection elements) {
+ this.elements = elements;
+ iterator = this.elements.iterator();
+ }
+
+ public synchronized T get() {
+ if (iterator.hasNext()) {
+ return iterator.next();
+ } else {
+ iterator = elements.iterator();
+ return iterator.next();
+ }
+ }
+
+ public int size() {
+ return elements.size();
+ }
+}
diff --git a/code/flume-ng-sinks/flume-ng-elasticsearch-sink/src/test/java/org/apache/flume/sink/elasticsearch/AbstractElasticSearchSinkTest.java b/code/flume-ng-sinks/flume-ng-elasticsearch-sink/src/test/java/org/apache/flume/sink/elasticsearch/AbstractElasticSearchSinkTest.java
new file mode 100644
index 0000000..9fbd747
--- /dev/null
+++ b/code/flume-ng-sinks/flume-ng-elasticsearch-sink/src/test/java/org/apache/flume/sink/elasticsearch/AbstractElasticSearchSinkTest.java
@@ -0,0 +1,164 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.flume.sink.elasticsearch;
+
+import org.apache.flume.Channel;
+import org.apache.flume.Context;
+import org.apache.flume.Event;
+import org.apache.flume.channel.MemoryChannel;
+import org.apache.flume.conf.Configurables;
+import org.elasticsearch.action.search.SearchResponse;
+import org.elasticsearch.client.Client;
+import org.elasticsearch.common.collect.Maps;
+import org.elasticsearch.common.settings.ImmutableSettings;
+import org.elasticsearch.common.settings.Settings;
+import org.elasticsearch.gateway.Gateway;
+import org.elasticsearch.index.query.QueryBuilder;
+import org.elasticsearch.index.query.QueryBuilders;
+import org.elasticsearch.node.Node;
+import org.elasticsearch.node.NodeBuilder;
+import org.elasticsearch.node.internal.InternalNode;
+import org.elasticsearch.search.SearchHit;
+import org.elasticsearch.search.SearchHits;
+import org.joda.time.DateTimeUtils;
+import org.junit.After;
+import org.junit.Before;
+
+import java.util.Arrays;
+import java.util.Comparator;
+import java.util.Map;
+
+import static org.apache.flume.sink.elasticsearch.ElasticSearchSinkConstants.BATCH_SIZE;
+import static org.apache.flume.sink.elasticsearch.ElasticSearchSinkConstants.CLUSTER_NAME;
+import static org.apache.flume.sink.elasticsearch.ElasticSearchSinkConstants.INDEX_NAME;
+import static org.apache.flume.sink.elasticsearch.ElasticSearchSinkConstants.INDEX_TYPE;
+import static org.apache.flume.sink.elasticsearch.ElasticSearchSinkConstants.TTL;
+import static org.junit.Assert.assertEquals;
+
+public abstract class AbstractElasticSearchSinkTest {
+
+ static final String DEFAULT_INDEX_NAME = "flume";
+ static final String DEFAULT_INDEX_TYPE = "log";
+ static final String DEFAULT_CLUSTER_NAME = "elasticsearch";
+ static final long FIXED_TIME_MILLIS = 123456789L;
+
+ Node node;
+ Client client;
+ String timestampedIndexName;
+ Map parameters;
+
+ void initDefaults() {
+ parameters = Maps.newHashMap();
+ parameters.put(INDEX_NAME, DEFAULT_INDEX_NAME);
+ parameters.put(INDEX_TYPE, DEFAULT_INDEX_TYPE);
+ parameters.put(CLUSTER_NAME, DEFAULT_CLUSTER_NAME);
+ parameters.put(BATCH_SIZE, "1");
+ parameters.put(TTL, "5");
+
+ timestampedIndexName = DEFAULT_INDEX_NAME + '-'
+ + ElasticSearchIndexRequestBuilderFactory.df.format(FIXED_TIME_MILLIS);
+ }
+
+ void createNodes() throws Exception {
+ Settings settings = ImmutableSettings
+ .settingsBuilder()
+ .put("number_of_shards", 1)
+ .put("number_of_replicas", 0)
+ .put("routing.hash.type", "simple")
+ .put("gateway.type", "none")
+ .put("path.data", "target/es-test")
+ .build();
+
+ node = NodeBuilder.nodeBuilder().settings(settings).local(true).node();
+ client = node.client();
+
+ client.admin().cluster().prepareHealth().setWaitForGreenStatus().execute()
+ .actionGet();
+ }
+
+ void shutdownNodes() throws Exception {
+ ((InternalNode) node).injector().getInstance(Gateway.class).reset();
+ client.close();
+ node.close();
+ }
+
+ @Before
+ public void setFixedJodaTime() {
+ DateTimeUtils.setCurrentMillisFixed(FIXED_TIME_MILLIS);
+ }
+
+ @After
+ public void resetJodaTime() {
+ DateTimeUtils.setCurrentMillisSystem();
+ }
+
+ Channel bindAndStartChannel(ElasticSearchSink fixture) {
+ // Configure the channel
+ Channel channel = new MemoryChannel();
+ Configurables.configure(channel, new Context());
+
+ // Wire them together
+ fixture.setChannel(channel);
+ fixture.start();
+ return channel;
+ }
+
+ void assertMatchAllQuery(int expectedHits, Event... events) {
+ assertSearch(expectedHits, performSearch(QueryBuilders.matchAllQuery()),
+ null, events);
+ }
+
+ void assertBodyQuery(int expectedHits, Event... events) {
+ // Perform Multi Field Match
+ assertSearch(expectedHits,
+ performSearch(QueryBuilders.fieldQuery("@message", "event")),
+ null, events);
+ }
+
+ SearchResponse performSearch(QueryBuilder query) {
+ return client.prepareSearch(timestampedIndexName)
+ .setTypes(DEFAULT_INDEX_TYPE).setQuery(query).execute().actionGet();
+ }
+
+ void assertSearch(int expectedHits, SearchResponse response, Map expectedBody,
+ Event... events) {
+ SearchHits hitResponse = response.getHits();
+ assertEquals(expectedHits, hitResponse.getTotalHits());
+
+ SearchHit[] hits = hitResponse.getHits();
+ Arrays.sort(hits, new Comparator() {
+ @Override
+ public int compare(SearchHit o1, SearchHit o2) {
+ return o1.getSourceAsString().compareTo(o2.getSourceAsString());
+ }
+ });
+
+ for (int i = 0; i < events.length; i++) {
+ Event event = events[i];
+ SearchHit hit = hits[i];
+ Map source = hit.getSource();
+ if (expectedBody == null) {
+ assertEquals(new String(event.getBody()), source.get("@message"));
+ } else {
+ assertEquals(expectedBody, source.get("@message"));
+ }
+ }
+ }
+
+}
diff --git a/code/flume-ng-sinks/flume-ng-elasticsearch-sink/src/test/java/org/apache/flume/sink/elasticsearch/TestElasticSearchDynamicSerializer.java b/code/flume-ng-sinks/flume-ng-elasticsearch-sink/src/test/java/org/apache/flume/sink/elasticsearch/TestElasticSearchDynamicSerializer.java
new file mode 100644
index 0000000..d4e4654
--- /dev/null
+++ b/code/flume-ng-sinks/flume-ng-elasticsearch-sink/src/test/java/org/apache/flume/sink/elasticsearch/TestElasticSearchDynamicSerializer.java
@@ -0,0 +1,64 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.flume.sink.elasticsearch;
+
+import org.apache.flume.Context;
+import org.apache.flume.Event;
+import org.apache.flume.event.EventBuilder;
+import org.elasticsearch.common.collect.Maps;
+import org.elasticsearch.common.xcontent.XContentBuilder;
+import org.junit.Test;
+
+import java.util.Map;
+
+import static org.apache.flume.sink.elasticsearch.ElasticSearchEventSerializer.charset;
+import static org.elasticsearch.common.xcontent.XContentFactory.jsonBuilder;
+import static org.junit.Assert.assertEquals;
+
+public class TestElasticSearchDynamicSerializer {
+
+ @Test
+ public void testRoundTrip() throws Exception {
+ ElasticSearchDynamicSerializer fixture = new ElasticSearchDynamicSerializer();
+ Context context = new Context();
+ fixture.configure(context);
+
+ String message = "test body";
+ Map headers = Maps.newHashMap();
+ headers.put("headerNameOne", "headerValueOne");
+ headers.put("headerNameTwo", "headerValueTwo");
+ headers.put("headerNameThree", "headerValueThree");
+ Event event = EventBuilder.withBody(message.getBytes(charset));
+ event.setHeaders(headers);
+
+ XContentBuilder expected = jsonBuilder().startObject();
+ expected.field("body", new String(message.getBytes(), charset));
+ for (String headerName : headers.keySet()) {
+ expected.field(headerName, new String(headers.get(headerName).getBytes(),
+ charset));
+ }
+ expected.endObject();
+
+ XContentBuilder actual = fixture.getContentBuilder(event);
+
+ assertEquals(new String(expected.bytes().array()), new String(actual
+ .bytes().array()));
+
+ }
+}
diff --git a/code/flume-ng-sinks/flume-ng-elasticsearch-sink/src/test/java/org/apache/flume/sink/elasticsearch/TestElasticSearchIndexRequestBuilderFactory.java b/code/flume-ng-sinks/flume-ng-elasticsearch-sink/src/test/java/org/apache/flume/sink/elasticsearch/TestElasticSearchIndexRequestBuilderFactory.java
new file mode 100644
index 0000000..b62254e
--- /dev/null
+++ b/code/flume-ng-sinks/flume-ng-elasticsearch-sink/src/test/java/org/apache/flume/sink/elasticsearch/TestElasticSearchIndexRequestBuilderFactory.java
@@ -0,0 +1,215 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.flume.sink.elasticsearch;
+
+import com.google.common.collect.Maps;
+import org.apache.flume.Context;
+import org.apache.flume.Event;
+import org.apache.flume.conf.ComponentConfiguration;
+import org.apache.flume.conf.sink.SinkConfiguration;
+import org.apache.flume.event.SimpleEvent;
+import org.elasticsearch.action.index.IndexRequestBuilder;
+import org.elasticsearch.client.Client;
+import org.elasticsearch.common.io.BytesStream;
+import org.elasticsearch.common.io.FastByteArrayOutputStream;
+import org.junit.Before;
+import org.junit.Test;
+
+import java.io.IOException;
+import java.util.Map;
+
+import static org.junit.Assert.assertArrayEquals;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertNull;
+import static org.junit.Assert.assertTrue;
+
+public class TestElasticSearchIndexRequestBuilderFactory
+ extends AbstractElasticSearchSinkTest {
+
+ private static final Client FAKE_CLIENT = null;
+
+ private EventSerializerIndexRequestBuilderFactory factory;
+
+ private FakeEventSerializer serializer;
+
+ @Before
+ public void setupFactory() throws Exception {
+ serializer = new FakeEventSerializer();
+ factory = new EventSerializerIndexRequestBuilderFactory(serializer) {
+ @Override
+ IndexRequestBuilder prepareIndex(Client client) {
+ return new IndexRequestBuilder(FAKE_CLIENT);
+ }
+ };
+ }
+
+ @Test
+ public void shouldUseUtcAsBasisForDateFormat() {
+ assertEquals("Coordinated Universal Time",
+ factory.fastDateFormat.getTimeZone().getDisplayName());
+ }
+
+ @Test
+ public void indexNameShouldBePrefixDashFormattedTimestamp() {
+ long millis = 987654321L;
+ assertEquals("prefix-" + factory.fastDateFormat.format(millis),
+ factory.getIndexName("prefix", millis));
+ }
+
+ @Test
+ public void shouldEnsureTimestampHeaderPresentInTimestampedEvent() {
+ SimpleEvent base = new SimpleEvent();
+
+ TimestampedEvent timestampedEvent = new TimestampedEvent(base);
+ assertEquals(FIXED_TIME_MILLIS, timestampedEvent.getTimestamp());
+ assertEquals(String.valueOf(FIXED_TIME_MILLIS),
+ timestampedEvent.getHeaders().get("timestamp"));
+ }
+
+ @Test
+ public void shouldUseExistingTimestampHeaderInTimestampedEvent() {
+ SimpleEvent base = new SimpleEvent();
+ Map headersWithTimestamp = Maps.newHashMap();
+ headersWithTimestamp.put("timestamp", "-321");
+ base.setHeaders(headersWithTimestamp );
+
+ TimestampedEvent timestampedEvent = new TimestampedEvent(base);
+ assertEquals(-321L, timestampedEvent.getTimestamp());
+ assertEquals("-321", timestampedEvent.getHeaders().get("timestamp"));
+ }
+
+ @Test
+ public void shouldUseExistingAtTimestampHeaderInTimestampedEvent() {
+ SimpleEvent base = new SimpleEvent();
+ Map headersWithTimestamp = Maps.newHashMap();
+ headersWithTimestamp.put("@timestamp", "-999");
+ base.setHeaders(headersWithTimestamp );
+
+ TimestampedEvent timestampedEvent = new TimestampedEvent(base);
+ assertEquals(-999L, timestampedEvent.getTimestamp());
+ assertEquals("-999", timestampedEvent.getHeaders().get("@timestamp"));
+ assertNull(timestampedEvent.getHeaders().get("timestamp"));
+ }
+
+ @Test
+ public void shouldPreserveBodyAndNonTimestampHeadersInTimestampedEvent() {
+ SimpleEvent base = new SimpleEvent();
+ base.setBody(new byte[] {1,2,3,4});
+ Map headersWithTimestamp = Maps.newHashMap();
+ headersWithTimestamp.put("foo", "bar");
+ base.setHeaders(headersWithTimestamp );
+
+ TimestampedEvent timestampedEvent = new TimestampedEvent(base);
+ assertEquals("bar", timestampedEvent.getHeaders().get("foo"));
+ assertArrayEquals(base.getBody(), timestampedEvent.getBody());
+ }
+
+ @Test
+ public void shouldSetIndexNameTypeAndSerializedEventIntoIndexRequest()
+ throws Exception {
+
+ String indexPrefix = "qwerty";
+ String indexType = "uiop";
+ Event event = new SimpleEvent();
+
+ IndexRequestBuilder indexRequestBuilder = factory.createIndexRequest(
+ FAKE_CLIENT, indexPrefix, indexType, event);
+
+ assertEquals(indexPrefix + '-'
+ + ElasticSearchIndexRequestBuilderFactory.df.format(FIXED_TIME_MILLIS),
+ indexRequestBuilder.request().index());
+ assertEquals(indexType, indexRequestBuilder.request().type());
+ assertArrayEquals(FakeEventSerializer.FAKE_BYTES,
+ indexRequestBuilder.request().source().array());
+ }
+
+ @Test
+ public void shouldSetIndexNameFromTimestampHeaderWhenPresent()
+ throws Exception {
+ String indexPrefix = "qwerty";
+ String indexType = "uiop";
+ Event event = new SimpleEvent();
+ event.getHeaders().put("timestamp", "1213141516");
+
+ IndexRequestBuilder indexRequestBuilder = factory.createIndexRequest(
+ null, indexPrefix, indexType, event);
+
+ assertEquals(indexPrefix + '-'
+ + ElasticSearchIndexRequestBuilderFactory.df.format(1213141516L),
+ indexRequestBuilder.request().index());
+ }
+
+ @Test
+ public void shouldSetIndexNameTypeFromHeaderWhenPresent()
+ throws Exception {
+ String indexPrefix = "%{index-name}";
+ String indexType = "%{index-type}";
+ String indexValue = "testing-index-name-from-headers";
+ String typeValue = "testing-index-type-from-headers";
+
+ Event event = new SimpleEvent();
+ event.getHeaders().put("index-name", indexValue);
+ event.getHeaders().put("index-type", typeValue);
+
+ IndexRequestBuilder indexRequestBuilder = factory.createIndexRequest(
+ null, indexPrefix, indexType, event);
+
+ assertEquals(indexValue + '-'
+ + ElasticSearchIndexRequestBuilderFactory.df.format(FIXED_TIME_MILLIS),
+ indexRequestBuilder.request().index());
+ assertEquals(typeValue, indexRequestBuilder.request().type());
+ }
+
+ @Test
+ public void shouldConfigureEventSerializer() throws Exception {
+ assertFalse(serializer.configuredWithContext);
+ factory.configure(new Context());
+ assertTrue(serializer.configuredWithContext);
+
+ assertFalse(serializer.configuredWithComponentConfiguration);
+ factory.configure(new SinkConfiguration("name"));
+ assertTrue(serializer.configuredWithComponentConfiguration);
+ }
+
+ static class FakeEventSerializer implements ElasticSearchEventSerializer {
+
+ static final byte[] FAKE_BYTES = new byte[]{9, 8, 7, 6};
+ boolean configuredWithContext;
+ boolean configuredWithComponentConfiguration;
+
+ @Override
+ public BytesStream getContentBuilder(Event event) throws IOException {
+ FastByteArrayOutputStream fbaos = new FastByteArrayOutputStream(4);
+ fbaos.write(FAKE_BYTES);
+ return fbaos;
+ }
+
+ @Override
+ public void configure(Context arg0) {
+ configuredWithContext = true;
+ }
+
+ @Override
+ public void configure(ComponentConfiguration arg0) {
+ configuredWithComponentConfiguration = true;
+ }
+ }
+
+}
diff --git a/code/flume-ng-sinks/flume-ng-elasticsearch-sink/src/test/java/org/apache/flume/sink/elasticsearch/TestElasticSearchLogStashEventSerializer.java b/code/flume-ng-sinks/flume-ng-elasticsearch-sink/src/test/java/org/apache/flume/sink/elasticsearch/TestElasticSearchLogStashEventSerializer.java
new file mode 100644
index 0000000..65b4dab
--- /dev/null
+++ b/code/flume-ng-sinks/flume-ng-elasticsearch-sink/src/test/java/org/apache/flume/sink/elasticsearch/TestElasticSearchLogStashEventSerializer.java
@@ -0,0 +1,127 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.flume.sink.elasticsearch;
+
+import com.google.gson.JsonParser;
+import org.apache.flume.Context;
+import org.apache.flume.Event;
+import org.apache.flume.event.EventBuilder;
+import org.elasticsearch.common.collect.Maps;
+import org.elasticsearch.common.xcontent.XContentBuilder;
+import org.junit.Test;
+
+import java.util.Date;
+import java.util.Map;
+
+import static org.apache.flume.sink.elasticsearch.ElasticSearchEventSerializer.charset;
+import static org.elasticsearch.common.xcontent.XContentFactory.jsonBuilder;
+import static org.junit.Assert.assertEquals;
+
+public class TestElasticSearchLogStashEventSerializer {
+
+ @Test
+ public void testRoundTrip() throws Exception {
+ ElasticSearchLogStashEventSerializer fixture = new ElasticSearchLogStashEventSerializer();
+ Context context = new Context();
+ fixture.configure(context);
+
+ String message = "test body";
+ Map headers = Maps.newHashMap();
+ long timestamp = System.currentTimeMillis();
+ headers.put("timestamp", String.valueOf(timestamp));
+ headers.put("source", "flume_tail_src");
+ headers.put("host", "test@localhost");
+ headers.put("src_path", "/tmp/test");
+ headers.put("headerNameOne", "headerValueOne");
+ headers.put("headerNameTwo", "headerValueTwo");
+ headers.put("type", "sometype");
+ Event event = EventBuilder.withBody(message.getBytes(charset));
+ event.setHeaders(headers);
+
+ XContentBuilder expected = jsonBuilder().startObject();
+ expected.field("@message", new String(message.getBytes(), charset));
+ expected.field("@timestamp", new Date(timestamp));
+ expected.field("@source", "flume_tail_src");
+ expected.field("@type", "sometype");
+ expected.field("@source_host", "test@localhost");
+ expected.field("@source_path", "/tmp/test");
+
+ expected.startObject("@fields");
+ expected.field("timestamp", String.valueOf(timestamp));
+ expected.field("src_path", "/tmp/test");
+ expected.field("host", "test@localhost");
+ expected.field("headerNameTwo", "headerValueTwo");
+ expected.field("source", "flume_tail_src");
+ expected.field("headerNameOne", "headerValueOne");
+ expected.field("type", "sometype");
+ expected.endObject();
+
+ expected.endObject();
+
+ XContentBuilder actual = fixture.getContentBuilder(event);
+
+ JsonParser parser = new JsonParser();
+ assertEquals(parser.parse(expected.string()),parser.parse(actual.string()));
+ }
+
+ @Test
+ public void shouldHandleInvalidJSONDuringComplexParsing() throws Exception {
+ ElasticSearchLogStashEventSerializer fixture = new ElasticSearchLogStashEventSerializer();
+ Context context = new Context();
+ fixture.configure(context);
+
+ String message = "{flume: somethingnotvalid}";
+ Map headers = Maps.newHashMap();
+ long timestamp = System.currentTimeMillis();
+ headers.put("timestamp", String.valueOf(timestamp));
+ headers.put("source", "flume_tail_src");
+ headers.put("host", "test@localhost");
+ headers.put("src_path", "/tmp/test");
+ headers.put("headerNameOne", "headerValueOne");
+ headers.put("headerNameTwo", "headerValueTwo");
+ headers.put("type", "sometype");
+ Event event = EventBuilder.withBody(message.getBytes(charset));
+ event.setHeaders(headers);
+
+ XContentBuilder expected = jsonBuilder().startObject();
+ expected.field("@message", new String(message.getBytes(), charset));
+ expected.field("@timestamp", new Date(timestamp));
+ expected.field("@source", "flume_tail_src");
+ expected.field("@type", "sometype");
+ expected.field("@source_host", "test@localhost");
+ expected.field("@source_path", "/tmp/test");
+
+ expected.startObject("@fields");
+ expected.field("timestamp", String.valueOf(timestamp));
+ expected.field("src_path", "/tmp/test");
+ expected.field("host", "test@localhost");
+ expected.field("headerNameTwo", "headerValueTwo");
+ expected.field("source", "flume_tail_src");
+ expected.field("headerNameOne", "headerValueOne");
+ expected.field("type", "sometype");
+ expected.endObject();
+
+ expected.endObject();
+
+ XContentBuilder actual = fixture.getContentBuilder(event);
+
+ JsonParser parser = new JsonParser();
+ assertEquals(parser.parse(expected.string()),parser.parse(actual.string()));
+ }
+}
diff --git a/code/flume-ng-sinks/flume-ng-elasticsearch-sink/src/test/java/org/apache/flume/sink/elasticsearch/TestElasticSearchSink.java b/code/flume-ng-sinks/flume-ng-elasticsearch-sink/src/test/java/org/apache/flume/sink/elasticsearch/TestElasticSearchSink.java
new file mode 100644
index 0000000..69acc06
--- /dev/null
+++ b/code/flume-ng-sinks/flume-ng-elasticsearch-sink/src/test/java/org/apache/flume/sink/elasticsearch/TestElasticSearchSink.java
@@ -0,0 +1,505 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.flume.sink.elasticsearch;
+
+import org.apache.commons.lang.time.FastDateFormat;
+import org.apache.flume.Channel;
+import org.apache.flume.Context;
+import org.apache.flume.Event;
+import org.apache.flume.Sink.Status;
+import org.apache.flume.Transaction;
+import org.apache.flume.conf.ComponentConfiguration;
+import org.apache.flume.conf.Configurable;
+import org.apache.flume.conf.Configurables;
+import org.apache.flume.event.EventBuilder;
+import org.elasticsearch.action.index.IndexRequestBuilder;
+import org.elasticsearch.client.Requests;
+import org.elasticsearch.common.UUID;
+import org.elasticsearch.common.io.BytesStream;
+import org.elasticsearch.common.io.FastByteArrayOutputStream;
+import org.elasticsearch.index.query.QueryBuilders;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+
+import java.io.IOException;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.TimeZone;
+import java.util.concurrent.TimeUnit;
+
+import static org.apache.flume.sink.elasticsearch.ElasticSearchSinkConstants.BATCH_SIZE;
+import static org.apache.flume.sink.elasticsearch.ElasticSearchSinkConstants.CLUSTER_NAME;
+import static org.apache.flume.sink.elasticsearch.ElasticSearchSinkConstants.HOSTNAMES;
+import static org.apache.flume.sink.elasticsearch.ElasticSearchSinkConstants.INDEX_NAME;
+import static org.apache.flume.sink.elasticsearch.ElasticSearchSinkConstants.INDEX_TYPE;
+import static org.apache.flume.sink.elasticsearch.ElasticSearchSinkConstants.SERIALIZER;
+import static org.apache.flume.sink.elasticsearch.ElasticSearchSinkConstants.TTL;
+import static org.junit.Assert.assertArrayEquals;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNull;
+import static org.junit.Assert.assertTrue;
+
+public class TestElasticSearchSink extends AbstractElasticSearchSinkTest {
+
+ private ElasticSearchSink fixture;
+
+ @Before
+ public void init() throws Exception {
+ initDefaults();
+ createNodes();
+ fixture = new ElasticSearchSink(true);
+ fixture.setName("ElasticSearchSink-" + UUID.randomUUID().toString());
+ }
+
+ @After
+ public void tearDown() throws Exception {
+ shutdownNodes();
+ }
+
+ @Test
+ public void shouldIndexOneEvent() throws Exception {
+ Configurables.configure(fixture, new Context(parameters));
+ Channel channel = bindAndStartChannel(fixture);
+
+ Transaction tx = channel.getTransaction();
+ tx.begin();
+ Event event = EventBuilder.withBody("event #1 or 1".getBytes());
+ channel.put(event);
+ tx.commit();
+ tx.close();
+
+ fixture.process();
+ fixture.stop();
+ client.admin().indices()
+ .refresh(Requests.refreshRequest(timestampedIndexName)).actionGet();
+
+ assertMatchAllQuery(1, event);
+ assertBodyQuery(1, event);
+ }
+
+ @Test
+ public void shouldIndexInvalidComplexJsonBody() throws Exception {
+ parameters.put(BATCH_SIZE, "3");
+ Configurables.configure(fixture, new Context(parameters));
+ Channel channel = bindAndStartChannel(fixture);
+
+ Transaction tx = channel.getTransaction();
+ tx.begin();
+ Event event1 = EventBuilder.withBody("TEST1 {test}".getBytes());
+ channel.put(event1);
+ Event event2 = EventBuilder.withBody("{test: TEST2 }".getBytes());
+ channel.put(event2);
+ Event event3 = EventBuilder.withBody("{\"test\":{ TEST3 {test} }}".getBytes());
+ channel.put(event3);
+ tx.commit();
+ tx.close();
+
+ fixture.process();
+ fixture.stop();
+ client.admin().indices()
+ .refresh(Requests.refreshRequest(timestampedIndexName)).actionGet();
+
+ assertMatchAllQuery(3);
+ assertSearch(1,
+ performSearch(QueryBuilders.fieldQuery("@message", "TEST1")),
+ null, event1);
+ assertSearch(1,
+ performSearch(QueryBuilders.fieldQuery("@message", "TEST2")),
+ null, event2);
+ assertSearch(1,
+ performSearch(QueryBuilders.fieldQuery("@message", "TEST3")),
+ null, event3);
+ }
+
+ @Test
+ public void shouldIndexComplexJsonEvent() throws Exception {
+ Configurables.configure(fixture, new Context(parameters));
+ Channel channel = bindAndStartChannel(fixture);
+
+ Transaction tx = channel.getTransaction();
+ tx.begin();
+ Event event = EventBuilder.withBody(
+ "{\"event\":\"json content\",\"num\":1}".getBytes());
+ channel.put(event);
+ tx.commit();
+ tx.close();
+
+ fixture.process();
+ fixture.stop();
+ client.admin().indices()
+ .refresh(Requests.refreshRequest(timestampedIndexName)).actionGet();
+
+ Map expectedBody = new HashMap();
+ expectedBody.put("event", "json content");
+ expectedBody.put("num", 1);
+
+ assertSearch(1,
+ performSearch(QueryBuilders.matchAllQuery()), expectedBody, event);
+ assertSearch(1,
+ performSearch(QueryBuilders.fieldQuery("@message.event", "json")),
+ expectedBody, event);
+ }
+
+ @Test
+ public void shouldIndexFiveEvents() throws Exception {
+ // Make it so we only need to call process once
+ parameters.put(BATCH_SIZE, "5");
+ Configurables.configure(fixture, new Context(parameters));
+ Channel channel = bindAndStartChannel(fixture);
+
+ int numberOfEvents = 5;
+ Event[] events = new Event[numberOfEvents];
+
+ Transaction tx = channel.getTransaction();
+ tx.begin();
+ for (int i = 0; i < numberOfEvents; i++) {
+ String body = "event #" + i + " of " + numberOfEvents;
+ Event event = EventBuilder.withBody(body.getBytes());
+ events[i] = event;
+ channel.put(event);
+ }
+ tx.commit();
+ tx.close();
+
+ fixture.process();
+ fixture.stop();
+ client.admin().indices()
+ .refresh(Requests.refreshRequest(timestampedIndexName)).actionGet();
+
+ assertMatchAllQuery(numberOfEvents, events);
+ assertBodyQuery(5, events);
+ }
+
+ @Test
+ public void shouldIndexFiveEventsOverThreeBatches() throws Exception {
+ parameters.put(BATCH_SIZE, "2");
+ Configurables.configure(fixture, new Context(parameters));
+ Channel channel = bindAndStartChannel(fixture);
+
+ int numberOfEvents = 5;
+ Event[] events = new Event[numberOfEvents];
+
+ Transaction tx = channel.getTransaction();
+ tx.begin();
+ for (int i = 0; i < numberOfEvents; i++) {
+ String body = "event #" + i + " of " + numberOfEvents;
+ Event event = EventBuilder.withBody(body.getBytes());
+ events[i] = event;
+ channel.put(event);
+ }
+ tx.commit();
+ tx.close();
+
+ int count = 0;
+ Status status = Status.READY;
+ while (status != Status.BACKOFF) {
+ count++;
+ status = fixture.process();
+ }
+ fixture.stop();
+
+ assertEquals(3, count);
+
+ client.admin().indices()
+ .refresh(Requests.refreshRequest(timestampedIndexName)).actionGet();
+ assertMatchAllQuery(numberOfEvents, events);
+ assertBodyQuery(5, events);
+ }
+
+ @Test
+ public void shouldParseConfiguration() {
+ parameters.put(HOSTNAMES, "10.5.5.27");
+ parameters.put(CLUSTER_NAME, "testing-cluster-name");
+ parameters.put(INDEX_NAME, "testing-index-name");
+ parameters.put(INDEX_TYPE, "testing-index-type");
+ parameters.put(TTL, "10");
+
+ fixture = new ElasticSearchSink();
+ fixture.configure(new Context(parameters));
+
+ String[] expected = { "10.5.5.27" };
+
+ assertEquals("testing-cluster-name", fixture.getClusterName());
+ assertEquals("testing-index-name", fixture.getIndexName());
+ assertEquals("testing-index-type", fixture.getIndexType());
+ assertEquals(TimeUnit.DAYS.toMillis(10), fixture.getTTLMs());
+ assertArrayEquals(expected, fixture.getServerAddresses());
+ }
+
+ @Test
+ public void shouldParseConfigurationUsingDefaults() {
+ parameters.put(HOSTNAMES, "10.5.5.27");
+ parameters.remove(INDEX_NAME);
+ parameters.remove(INDEX_TYPE);
+ parameters.remove(CLUSTER_NAME);
+
+ fixture = new ElasticSearchSink();
+ fixture.configure(new Context(parameters));
+
+ String[] expected = { "10.5.5.27" };
+
+ assertEquals(DEFAULT_INDEX_NAME, fixture.getIndexName());
+ assertEquals(DEFAULT_INDEX_TYPE, fixture.getIndexType());
+ assertEquals(DEFAULT_CLUSTER_NAME, fixture.getClusterName());
+ assertArrayEquals(expected, fixture.getServerAddresses());
+ }
+
+ @Test
+ public void shouldParseMultipleHostUsingDefaultPorts() {
+ parameters.put(HOSTNAMES, "10.5.5.27,10.5.5.28,10.5.5.29");
+
+ fixture = new ElasticSearchSink();
+ fixture.configure(new Context(parameters));
+
+ String[] expected = { "10.5.5.27", "10.5.5.28", "10.5.5.29" };
+
+ assertArrayEquals(expected, fixture.getServerAddresses());
+ }
+
+ @Test
+ public void shouldParseMultipleHostWithWhitespacesUsingDefaultPorts() {
+ parameters.put(HOSTNAMES, " 10.5.5.27 , 10.5.5.28 , 10.5.5.29 ");
+
+ fixture = new ElasticSearchSink();
+ fixture.configure(new Context(parameters));
+
+ String[] expected = { "10.5.5.27", "10.5.5.28", "10.5.5.29" };
+
+ assertArrayEquals(expected, fixture.getServerAddresses());
+ }
+
+ @Test
+ public void shouldParseMultipleHostAndPorts() {
+ parameters.put(HOSTNAMES, "10.5.5.27:9300,10.5.5.28:9301,10.5.5.29:9302");
+
+ fixture = new ElasticSearchSink();
+ fixture.configure(new Context(parameters));
+
+ String[] expected = { "10.5.5.27:9300", "10.5.5.28:9301", "10.5.5.29:9302" };
+
+ assertArrayEquals(expected, fixture.getServerAddresses());
+ }
+
+ @Test
+ public void shouldParseMultipleHostAndPortsWithWhitespaces() {
+ parameters.put(HOSTNAMES,
+ " 10.5.5.27 : 9300 , 10.5.5.28 : 9301 , 10.5.5.29 : 9302 ");
+
+ fixture = new ElasticSearchSink();
+ fixture.configure(new Context(parameters));
+
+ String[] expected = { "10.5.5.27:9300", "10.5.5.28:9301", "10.5.5.29:9302" };
+
+ assertArrayEquals(expected, fixture.getServerAddresses());
+ }
+
+ @Test
+ public void shouldAllowCustomElasticSearchIndexRequestBuilderFactory()
+ throws Exception {
+ parameters.put(SERIALIZER,
+ CustomElasticSearchIndexRequestBuilderFactory.class.getName());
+
+ fixture.configure(new Context(parameters));
+
+ Channel channel = bindAndStartChannel(fixture);
+ Transaction tx = channel.getTransaction();
+ tx.begin();
+ String body = "{ foo: \"bar\" }";
+ Event event = EventBuilder.withBody(body.getBytes());
+ channel.put(event);
+ tx.commit();
+ tx.close();
+
+ fixture.process();
+ fixture.stop();
+
+ assertEquals(fixture.getIndexName() + "-05_17_36_789",
+ CustomElasticSearchIndexRequestBuilderFactory.actualIndexName);
+ assertEquals(fixture.getIndexType(),
+ CustomElasticSearchIndexRequestBuilderFactory.actualIndexType);
+ assertArrayEquals(event.getBody(),
+ CustomElasticSearchIndexRequestBuilderFactory.actualEventBody);
+ assertTrue(CustomElasticSearchIndexRequestBuilderFactory.hasContext);
+ }
+
+ @Test
+ public void shouldParseFullyQualifiedTTLs() {
+ Map testTTLMap = new HashMap();
+ testTTLMap.put("1ms", Long.valueOf(1));
+ testTTLMap.put("1s", Long.valueOf(1000));
+ testTTLMap.put("1m", Long.valueOf(60000));
+ testTTLMap.put("1h", Long.valueOf(3600000));
+ testTTLMap.put("1d", Long.valueOf(86400000));
+ testTTLMap.put("1w", Long.valueOf(604800000));
+ testTTLMap.put("1", Long.valueOf(86400000));
+
+ parameters.put(HOSTNAMES, "10.5.5.27");
+ parameters.put(CLUSTER_NAME, "testing-cluster-name");
+ parameters.put(INDEX_NAME, "testing-index-name");
+ parameters.put(INDEX_TYPE, "testing-index-type");
+
+ for (String ttl : testTTLMap.keySet()) {
+ parameters.put(TTL, ttl);
+ fixture = new ElasticSearchSink();
+ fixture.configure(new Context(parameters));
+
+ String[] expected = { "10.5.5.27" };
+ assertEquals("testing-cluster-name", fixture.getClusterName());
+ assertEquals("testing-index-name", fixture.getIndexName());
+ assertEquals("testing-index-type", fixture.getIndexType());
+ assertEquals((long) testTTLMap.get(ttl), fixture.getTTLMs());
+ assertArrayEquals(expected, fixture.getServerAddresses());
+
+ }
+ }
+
+ public static final class CustomElasticSearchIndexRequestBuilderFactory
+ extends AbstractElasticSearchIndexRequestBuilderFactory {
+
+ static String actualIndexName;
+ static String actualIndexType;
+ static byte[] actualEventBody;
+ static boolean hasContext;
+
+ public CustomElasticSearchIndexRequestBuilderFactory() {
+ super(FastDateFormat.getInstance("HH_mm_ss_SSS", TimeZone.getTimeZone("EST5EDT")));
+ }
+
+ @Override
+ protected void prepareIndexRequest(IndexRequestBuilder indexRequest, String indexName,
+ String indexType, Event event) throws IOException {
+ actualIndexName = indexName;
+ actualIndexType = indexType;
+ actualEventBody = event.getBody();
+ indexRequest.setIndex(indexName).setType(indexType).setSource(event.getBody());
+ }
+
+ @Override
+ public void configure(Context arg0) {
+ hasContext = true;
+ }
+
+ @Override
+ public void configure(ComponentConfiguration arg0) {
+ //no-op
+ }
+ }
+
+ @Test
+ public void shouldFailToConfigureWithInvalidSerializerClass()
+ throws Exception {
+
+ parameters.put(SERIALIZER, "java.lang.String");
+ try {
+ Configurables.configure(fixture, new Context(parameters));
+ } catch (ClassCastException e) {
+ // expected
+ }
+
+ parameters.put(SERIALIZER, FakeConfigurable.class.getName());
+ try {
+ Configurables.configure(fixture, new Context(parameters));
+ } catch (IllegalArgumentException e) {
+ // expected
+ }
+ }
+
+ @Test
+ public void shouldUseSpecifiedSerializer() throws Exception {
+ Context context = new Context();
+ context.put(SERIALIZER,
+ "org.apache.flume.sink.elasticsearch.FakeEventSerializer");
+
+ assertNull(fixture.getEventSerializer());
+ fixture.configure(context);
+ assertTrue(fixture.getEventSerializer() instanceof FakeEventSerializer);
+ }
+
+ @Test
+ public void shouldUseSpecifiedIndexNameBuilder() throws Exception {
+ Context context = new Context();
+ context.put(ElasticSearchSinkConstants.INDEX_NAME_BUILDER,
+ "org.apache.flume.sink.elasticsearch.FakeIndexNameBuilder");
+
+ assertNull(fixture.getIndexNameBuilder());
+ fixture.configure(context);
+ assertTrue(fixture.getIndexNameBuilder() instanceof FakeIndexNameBuilder);
+ }
+
+ public static class FakeConfigurable implements Configurable {
+ @Override
+ public void configure(Context arg0) {
+ // no-op
+ }
+ }
+}
+
+/**
+ * Internal class. Fake event serializer used for tests
+ */
+class FakeEventSerializer implements ElasticSearchEventSerializer {
+
+ static final byte[] FAKE_BYTES = new byte[] { 9, 8, 7, 6 };
+ boolean configuredWithContext;
+ boolean configuredWithComponentConfiguration;
+
+ @Override
+ public BytesStream getContentBuilder(Event event) throws IOException {
+ FastByteArrayOutputStream fbaos = new FastByteArrayOutputStream(4);
+ fbaos.write(FAKE_BYTES);
+ return fbaos;
+ }
+
+ @Override
+ public void configure(Context arg0) {
+ configuredWithContext = true;
+ }
+
+ @Override
+ public void configure(ComponentConfiguration arg0) {
+ configuredWithComponentConfiguration = true;
+ }
+}
+
+/**
+ * Internal class. Fake index name builder used only for tests.
+ */
+class FakeIndexNameBuilder implements IndexNameBuilder {
+
+ static final String INDEX_NAME = "index_name";
+
+ @Override
+ public String getIndexName(Event event) {
+ return INDEX_NAME;
+ }
+
+ @Override
+ public String getIndexPrefix(Event event) {
+ return INDEX_NAME;
+ }
+
+ @Override
+ public void configure(Context context) {
+ }
+
+ @Override
+ public void configure(ComponentConfiguration conf) {
+ }
+}
diff --git a/code/flume-ng-sinks/flume-ng-elasticsearch-sink/src/test/java/org/apache/flume/sink/elasticsearch/TestElasticSearchSinkCreation.java b/code/flume-ng-sinks/flume-ng-elasticsearch-sink/src/test/java/org/apache/flume/sink/elasticsearch/TestElasticSearchSinkCreation.java
new file mode 100644
index 0000000..2a36439
--- /dev/null
+++ b/code/flume-ng-sinks/flume-ng-elasticsearch-sink/src/test/java/org/apache/flume/sink/elasticsearch/TestElasticSearchSinkCreation.java
@@ -0,0 +1,49 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.flume.sink.elasticsearch;
+
+import org.apache.flume.FlumeException;
+import org.apache.flume.Sink;
+import org.apache.flume.SinkFactory;
+import org.apache.flume.sink.DefaultSinkFactory;
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.Test;
+
+public class TestElasticSearchSinkCreation {
+
+ private SinkFactory sinkFactory;
+
+ @Before
+ public void setUp() {
+ sinkFactory = new DefaultSinkFactory();
+ }
+
+ private void verifySinkCreation(String name, String type,
+ Class> typeClass) throws FlumeException {
+ Sink sink = sinkFactory.create(name, type);
+ Assert.assertNotNull(sink);
+ Assert.assertTrue(typeClass.isInstance(sink));
+ }
+
+ @Test
+ public void testSinkCreation() {
+ verifySinkCreation("elasticsearch-sink", "elasticsearch", ElasticSearchSink.class);
+ }
+}
diff --git a/code/flume-ng-sinks/flume-ng-elasticsearch-sink/src/test/java/org/apache/flume/sink/elasticsearch/TimeBasedIndexNameBuilderTest.java b/code/flume-ng-sinks/flume-ng-elasticsearch-sink/src/test/java/org/apache/flume/sink/elasticsearch/TimeBasedIndexNameBuilderTest.java
new file mode 100644
index 0000000..678342a
--- /dev/null
+++ b/code/flume-ng-sinks/flume-ng-elasticsearch-sink/src/test/java/org/apache/flume/sink/elasticsearch/TimeBasedIndexNameBuilderTest.java
@@ -0,0 +1,60 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.flume.sink.elasticsearch;
+
+import org.apache.flume.Context;
+import org.apache.flume.Event;
+import org.apache.flume.event.SimpleEvent;
+import org.junit.Before;
+import org.junit.Test;
+
+import java.util.HashMap;
+import java.util.Map;
+
+import static org.junit.Assert.assertEquals;
+
+public class TimeBasedIndexNameBuilderTest {
+
+ private TimeBasedIndexNameBuilder indexNameBuilder;
+
+ @Before
+ public void setUp() throws Exception {
+ Context context = new Context();
+ context.put(ElasticSearchSinkConstants.INDEX_NAME, "prefix");
+ indexNameBuilder = new TimeBasedIndexNameBuilder();
+ indexNameBuilder.configure(context);
+ }
+
+ @Test
+ public void shouldUseUtcAsBasisForDateFormat() {
+ assertEquals("Coordinated Universal Time",
+ indexNameBuilder.getFastDateFormat().getTimeZone().getDisplayName());
+ }
+
+ @Test
+ public void indexNameShouldBePrefixDashFormattedTimestamp() {
+ long time = 987654321L;
+ Event event = new SimpleEvent();
+ Map headers = new HashMap();
+ headers.put("timestamp", Long.toString(time));
+ event.setHeaders(headers);
+ assertEquals("prefix-" + indexNameBuilder.getFastDateFormat().format(time),
+ indexNameBuilder.getIndexName(event));
+ }
+}
diff --git a/code/flume-ng-sinks/flume-ng-elasticsearch-sink/src/test/java/org/apache/flume/sink/elasticsearch/TimestampedEventTest.java b/code/flume-ng-sinks/flume-ng-elasticsearch-sink/src/test/java/org/apache/flume/sink/elasticsearch/TimestampedEventTest.java
new file mode 100644
index 0000000..bef2ac6
--- /dev/null
+++ b/code/flume-ng-sinks/flume-ng-elasticsearch-sink/src/test/java/org/apache/flume/sink/elasticsearch/TimestampedEventTest.java
@@ -0,0 +1,88 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.flume.sink.elasticsearch;
+
+import com.google.common.collect.Maps;
+import org.apache.flume.event.SimpleEvent;
+import org.joda.time.DateTimeUtils;
+import org.junit.Before;
+import org.junit.Test;
+
+import java.util.Map;
+
+import static org.junit.Assert.assertArrayEquals;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNull;
+
+public class TimestampedEventTest {
+ static final long FIXED_TIME_MILLIS = 123456789L;
+
+ @Before
+ public void setFixedJodaTime() {
+ DateTimeUtils.setCurrentMillisFixed(FIXED_TIME_MILLIS);
+ }
+
+ @Test
+ public void shouldEnsureTimestampHeaderPresentInTimestampedEvent() {
+ SimpleEvent base = new SimpleEvent();
+
+ TimestampedEvent timestampedEvent = new TimestampedEvent(base);
+ assertEquals(FIXED_TIME_MILLIS, timestampedEvent.getTimestamp());
+ assertEquals(String.valueOf(FIXED_TIME_MILLIS),
+ timestampedEvent.getHeaders().get("timestamp"));
+ }
+
+ @Test
+ public void shouldUseExistingTimestampHeaderInTimestampedEvent() {
+ SimpleEvent base = new SimpleEvent();
+ Map headersWithTimestamp = Maps.newHashMap();
+ headersWithTimestamp.put("timestamp", "-321");
+ base.setHeaders(headersWithTimestamp );
+
+ TimestampedEvent timestampedEvent = new TimestampedEvent(base);
+ assertEquals(-321L, timestampedEvent.getTimestamp());
+ assertEquals("-321", timestampedEvent.getHeaders().get("timestamp"));
+ }
+
+ @Test
+ public void shouldUseExistingAtTimestampHeaderInTimestampedEvent() {
+ SimpleEvent base = new SimpleEvent();
+ Map headersWithTimestamp = Maps.newHashMap();
+ headersWithTimestamp.put("@timestamp", "-999");
+ base.setHeaders(headersWithTimestamp );
+
+ TimestampedEvent timestampedEvent = new TimestampedEvent(base);
+ assertEquals(-999L, timestampedEvent.getTimestamp());
+ assertEquals("-999", timestampedEvent.getHeaders().get("@timestamp"));
+ assertNull(timestampedEvent.getHeaders().get("timestamp"));
+ }
+
+ @Test
+ public void shouldPreserveBodyAndNonTimestampHeadersInTimestampedEvent() {
+ SimpleEvent base = new SimpleEvent();
+ base.setBody(new byte[] {1,2,3,4});
+ Map headersWithTimestamp = Maps.newHashMap();
+ headersWithTimestamp.put("foo", "bar");
+ base.setHeaders(headersWithTimestamp );
+
+ TimestampedEvent timestampedEvent = new TimestampedEvent(base);
+ assertEquals("bar", timestampedEvent.getHeaders().get("foo"));
+ assertArrayEquals(base.getBody(), timestampedEvent.getBody());
+ }
+}
diff --git a/code/flume-ng-sinks/flume-ng-elasticsearch-sink/src/test/java/org/apache/flume/sink/elasticsearch/client/RoundRobinListTest.java b/code/flume-ng-sinks/flume-ng-elasticsearch-sink/src/test/java/org/apache/flume/sink/elasticsearch/client/RoundRobinListTest.java
new file mode 100644
index 0000000..0d1d092
--- /dev/null
+++ b/code/flume-ng-sinks/flume-ng-elasticsearch-sink/src/test/java/org/apache/flume/sink/elasticsearch/client/RoundRobinListTest.java
@@ -0,0 +1,42 @@
+/*
+ * Copyright 2014 Apache Software Foundation.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flume.sink.elasticsearch.client;
+
+import java.util.Arrays;
+import org.junit.Before;
+import org.junit.Test;
+
+import static org.junit.Assert.assertEquals;
+
+public class RoundRobinListTest {
+
+ private RoundRobinList fixture;
+
+ @Before
+ public void setUp() {
+ fixture = new RoundRobinList(Arrays.asList("test1", "test2"));
+ }
+
+ @Test
+ public void shouldReturnNextElement() {
+ assertEquals("test1", fixture.get());
+ assertEquals("test2", fixture.get());
+ assertEquals("test1", fixture.get());
+ assertEquals("test2", fixture.get());
+ assertEquals("test1", fixture.get());
+ }
+}
diff --git a/code/flume-ng-sinks/flume-ng-elasticsearch-sink/src/test/java/org/apache/flume/sink/elasticsearch/client/TestElasticSearchClientFactory.java b/code/flume-ng-sinks/flume-ng-elasticsearch-sink/src/test/java/org/apache/flume/sink/elasticsearch/client/TestElasticSearchClientFactory.java
new file mode 100644
index 0000000..c3f07b0
--- /dev/null
+++ b/code/flume-ng-sinks/flume-ng-elasticsearch-sink/src/test/java/org/apache/flume/sink/elasticsearch/client/TestElasticSearchClientFactory.java
@@ -0,0 +1,64 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.flume.sink.elasticsearch.client;
+
+import org.apache.flume.sink.elasticsearch.ElasticSearchEventSerializer;
+import org.junit.Before;
+import org.junit.Test;
+import org.mockito.Mock;
+
+import static org.hamcrest.core.IsInstanceOf.instanceOf;
+import static org.junit.Assert.assertThat;
+import static org.mockito.MockitoAnnotations.initMocks;
+
+public class TestElasticSearchClientFactory {
+
+ ElasticSearchClientFactory factory;
+
+ @Mock
+ ElasticSearchEventSerializer serializer;
+
+ @Before
+ public void setUp() {
+ initMocks(this);
+ factory = new ElasticSearchClientFactory();
+ }
+
+ @Test
+ public void shouldReturnTransportClient() throws Exception {
+ String[] hostNames = { "127.0.0.1" };
+ Object o = factory.getClient(ElasticSearchClientFactory.TransportClient,
+ hostNames, "test", serializer, null);
+ assertThat(o, instanceOf(ElasticSearchTransportClient.class));
+ }
+
+ @Test
+ public void shouldReturnRestClient() throws NoSuchClientTypeException {
+ String[] hostNames = { "127.0.0.1" };
+ Object o = factory.getClient(ElasticSearchClientFactory.RestClient,
+ hostNames, "test", serializer, null);
+ assertThat(o, instanceOf(ElasticSearchRestClient.class));
+ }
+
+ @Test(expected = NoSuchClientTypeException.class)
+ public void shouldThrowNoSuchClientTypeException() throws NoSuchClientTypeException {
+ String[] hostNames = { "127.0.0.1" };
+ factory.getClient("not_existing_client", hostNames, "test", null, null);
+ }
+}
diff --git a/code/flume-ng-sinks/flume-ng-elasticsearch-sink/src/test/java/org/apache/flume/sink/elasticsearch/client/TestElasticSearchRestClient.java b/code/flume-ng-sinks/flume-ng-elasticsearch-sink/src/test/java/org/apache/flume/sink/elasticsearch/client/TestElasticSearchRestClient.java
new file mode 100644
index 0000000..9551c81
--- /dev/null
+++ b/code/flume-ng-sinks/flume-ng-elasticsearch-sink/src/test/java/org/apache/flume/sink/elasticsearch/client/TestElasticSearchRestClient.java
@@ -0,0 +1,180 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.flume.sink.elasticsearch.client;
+
+import com.google.common.base.Splitter;
+import com.google.gson.JsonObject;
+import com.google.gson.JsonParser;
+import org.apache.flume.Event;
+import org.apache.flume.EventDeliveryException;
+import org.apache.flume.sink.elasticsearch.ElasticSearchEventSerializer;
+import org.apache.flume.sink.elasticsearch.IndexNameBuilder;
+import org.apache.http.HttpEntity;
+import org.apache.http.HttpResponse;
+import org.apache.http.HttpStatus;
+import org.apache.http.StatusLine;
+import org.apache.http.client.HttpClient;
+import org.apache.http.client.methods.HttpPost;
+import org.apache.http.client.methods.HttpUriRequest;
+import org.apache.http.util.EntityUtils;
+import org.elasticsearch.common.bytes.BytesArray;
+import org.elasticsearch.common.bytes.BytesReference;
+import org.elasticsearch.common.io.BytesStream;
+import org.junit.Before;
+import org.junit.Test;
+import org.mockito.ArgumentCaptor;
+import org.mockito.Mock;
+
+import java.io.IOException;
+import java.util.Iterator;
+import java.util.List;
+
+import static junit.framework.Assert.assertEquals;
+import static junit.framework.Assert.assertTrue;
+import static org.mockito.Mockito.any;
+import static org.mockito.Mockito.isA;
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.times;
+import static org.mockito.Mockito.verify;
+import static org.mockito.Mockito.when;
+import static org.mockito.MockitoAnnotations.initMocks;
+
+public class TestElasticSearchRestClient {
+
+ private ElasticSearchRestClient fixture;
+
+ @Mock
+ private ElasticSearchEventSerializer serializer;
+
+ @Mock
+ private IndexNameBuilder nameBuilder;
+
+ @Mock
+ private Event event;
+
+ @Mock
+ private HttpClient httpClient;
+
+ @Mock
+ private HttpResponse httpResponse;
+
+ @Mock
+ private StatusLine httpStatus;
+
+ @Mock
+ private HttpEntity httpEntity;
+
+ private static final String INDEX_NAME = "foo_index";
+ private static final String MESSAGE_CONTENT = "{\"body\":\"test\"}";
+ private static final String[] HOSTS = {"host1", "host2"};
+
+ @Before
+ public void setUp() throws IOException {
+ initMocks(this);
+ BytesReference bytesReference = mock(BytesReference.class);
+ BytesStream bytesStream = mock(BytesStream.class);
+
+ when(nameBuilder.getIndexName(any(Event.class))).thenReturn(INDEX_NAME);
+ when(bytesReference.toBytesArray()).thenReturn(new BytesArray(MESSAGE_CONTENT));
+ when(bytesStream.bytes()).thenReturn(bytesReference);
+ when(serializer.getContentBuilder(any(Event.class))).thenReturn(bytesStream);
+ fixture = new ElasticSearchRestClient(HOSTS, serializer, httpClient);
+ }
+
+ @Test
+ public void shouldAddNewEventWithoutTTL() throws Exception {
+ ArgumentCaptor argument = ArgumentCaptor.forClass(HttpPost.class);
+
+ when(httpStatus.getStatusCode()).thenReturn(HttpStatus.SC_OK);
+ when(httpResponse.getStatusLine()).thenReturn(httpStatus);
+ when(httpClient.execute(any(HttpUriRequest.class))).thenReturn(httpResponse);
+
+ fixture.addEvent(event, nameBuilder, "bar_type", -1);
+ fixture.execute();
+
+ verify(httpClient).execute(isA(HttpUriRequest.class));
+ verify(httpClient).execute(argument.capture());
+
+ assertEquals("http://host1/_bulk", argument.getValue().getURI().toString());
+ assertTrue(verifyJsonEvents("{\"index\":{\"_type\":\"bar_type\", \"_index\":\"foo_index\"}}\n",
+ MESSAGE_CONTENT, EntityUtils.toString(argument.getValue().getEntity())));
+ }
+
+ @Test
+ public void shouldAddNewEventWithTTL() throws Exception {
+ ArgumentCaptor argument = ArgumentCaptor.forClass(HttpPost.class);
+
+ when(httpStatus.getStatusCode()).thenReturn(HttpStatus.SC_OK);
+ when(httpResponse.getStatusLine()).thenReturn(httpStatus);
+ when(httpClient.execute(any(HttpUriRequest.class))).thenReturn(httpResponse);
+
+ fixture.addEvent(event, nameBuilder, "bar_type", 123);
+ fixture.execute();
+
+ verify(httpClient).execute(isA(HttpUriRequest.class));
+ verify(httpClient).execute(argument.capture());
+
+ assertEquals("http://host1/_bulk", argument.getValue().getURI().toString());
+ assertTrue(verifyJsonEvents(
+ "{\"index\":{\"_type\":\"bar_type\",\"_index\":\"foo_index\",\"_ttl\":\"123\"}}\n",
+ MESSAGE_CONTENT, EntityUtils.toString(argument.getValue().getEntity())));
+ }
+
+ private boolean verifyJsonEvents(String expectedIndex, String expectedBody, String actual) {
+ Iterator it = Splitter.on("\n").split(actual).iterator();
+ JsonParser parser = new JsonParser();
+ JsonObject[] arr = new JsonObject[2];
+ for (int i = 0; i < 2; i++) {
+ arr[i] = (JsonObject) parser.parse(it.next());
+ }
+ return arr[0].equals(parser.parse(expectedIndex)) && arr[1].equals(parser.parse(expectedBody));
+ }
+
+ @Test(expected = EventDeliveryException.class)
+ public void shouldThrowEventDeliveryException() throws Exception {
+ ArgumentCaptor argument = ArgumentCaptor.forClass(HttpPost.class);
+
+ when(httpStatus.getStatusCode()).thenReturn(HttpStatus.SC_INTERNAL_SERVER_ERROR);
+ when(httpResponse.getStatusLine()).thenReturn(httpStatus);
+ when(httpClient.execute(any(HttpUriRequest.class))).thenReturn(httpResponse);
+
+ fixture.addEvent(event, nameBuilder, "bar_type", 123);
+ fixture.execute();
+ }
+
+ @Test()
+ public void shouldRetryBulkOperation() throws Exception {
+ ArgumentCaptor argument = ArgumentCaptor.forClass(HttpPost.class);
+
+ when(httpStatus.getStatusCode()).thenReturn(HttpStatus.SC_INTERNAL_SERVER_ERROR,
+ HttpStatus.SC_OK);
+ when(httpResponse.getStatusLine()).thenReturn(httpStatus);
+ when(httpClient.execute(any(HttpUriRequest.class))).thenReturn(httpResponse);
+
+ fixture.addEvent(event, nameBuilder, "bar_type", 123);
+ fixture.execute();
+
+ verify(httpClient, times(2)).execute(isA(HttpUriRequest.class));
+ verify(httpClient, times(2)).execute(argument.capture());
+
+ List allValues = argument.getAllValues();
+ assertEquals("http://host1/_bulk", allValues.get(0).getURI().toString());
+ assertEquals("http://host2/_bulk", allValues.get(1).getURI().toString());
+ }
+}
diff --git a/code/flume-ng-sinks/flume-ng-elasticsearch-sink/src/test/java/org/apache/flume/sink/elasticsearch/client/TestElasticSearchTransportClient.java b/code/flume-ng-sinks/flume-ng-elasticsearch-sink/src/test/java/org/apache/flume/sink/elasticsearch/client/TestElasticSearchTransportClient.java
new file mode 100644
index 0000000..b7b8e74
--- /dev/null
+++ b/code/flume-ng-sinks/flume-ng-elasticsearch-sink/src/test/java/org/apache/flume/sink/elasticsearch/client/TestElasticSearchTransportClient.java
@@ -0,0 +1,127 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.flume.sink.elasticsearch.client;
+
+import org.apache.flume.Event;
+import org.apache.flume.EventDeliveryException;
+import org.apache.flume.sink.elasticsearch.ElasticSearchEventSerializer;
+import org.apache.flume.sink.elasticsearch.IndexNameBuilder;
+import org.elasticsearch.action.ListenableActionFuture;
+import org.elasticsearch.action.bulk.BulkRequestBuilder;
+import org.elasticsearch.action.bulk.BulkResponse;
+import org.elasticsearch.action.index.IndexRequestBuilder;
+import org.elasticsearch.client.Client;
+import org.elasticsearch.common.bytes.BytesReference;
+import org.elasticsearch.common.io.BytesStream;
+import org.junit.Before;
+import org.junit.Test;
+import org.mockito.Mock;
+
+import java.io.IOException;
+
+import static org.mockito.Matchers.any;
+import static org.mockito.Matchers.anyString;
+import static org.mockito.Mockito.*;
+import static org.mockito.MockitoAnnotations.initMocks;
+
+public class TestElasticSearchTransportClient {
+
+ private ElasticSearchTransportClient fixture;
+
+ @Mock
+ private ElasticSearchEventSerializer serializer;
+
+ @Mock
+ private IndexNameBuilder nameBuilder;
+
+ @Mock
+ private Client elasticSearchClient;
+
+ @Mock
+ private BulkRequestBuilder bulkRequestBuilder;
+
+ @Mock
+ private IndexRequestBuilder indexRequestBuilder;
+
+ @Mock
+ private Event event;
+
+ @Before
+ public void setUp() throws IOException {
+ initMocks(this);
+ BytesReference bytesReference = mock(BytesReference.class);
+ BytesStream bytesStream = mock(BytesStream.class);
+
+ when(nameBuilder.getIndexName(any(Event.class))).thenReturn("foo_index");
+ when(bytesReference.toBytes()).thenReturn("{\"body\":\"test\"}".getBytes());
+ when(bytesStream.bytes()).thenReturn(bytesReference);
+ when(serializer.getContentBuilder(any(Event.class)))
+ .thenReturn(bytesStream);
+ when(elasticSearchClient.prepareIndex(anyString(), anyString()))
+ .thenReturn(indexRequestBuilder);
+ when(indexRequestBuilder.setSource(bytesReference)).thenReturn(
+ indexRequestBuilder);
+
+ fixture = new ElasticSearchTransportClient(elasticSearchClient, serializer);
+ fixture.setBulkRequestBuilder(bulkRequestBuilder);
+ }
+
+ @Test
+ public void shouldAddNewEventWithoutTTL() throws Exception {
+ fixture.addEvent(event, nameBuilder, "bar_type", -1);
+ verify(indexRequestBuilder).setSource(
+ serializer.getContentBuilder(event).bytes());
+ verify(bulkRequestBuilder).add(indexRequestBuilder);
+ }
+
+ @Test
+ public void shouldAddNewEventWithTTL() throws Exception {
+ fixture.addEvent(event, nameBuilder, "bar_type", 10);
+ verify(indexRequestBuilder).setTTL(10);
+ verify(indexRequestBuilder).setSource(
+ serializer.getContentBuilder(event).bytes());
+ }
+
+ @Test
+ public void shouldExecuteBulkRequestBuilder() throws Exception {
+ ListenableActionFuture action =
+ (ListenableActionFuture) mock(ListenableActionFuture.class);
+ BulkResponse response = mock(BulkResponse.class);
+ when(bulkRequestBuilder.execute()).thenReturn(action);
+ when(action.actionGet()).thenReturn(response);
+ when(response.hasFailures()).thenReturn(false);
+
+ fixture.addEvent(event, nameBuilder, "bar_type", 10);
+ fixture.execute();
+ verify(bulkRequestBuilder).execute();
+ }
+
+ @Test(expected = EventDeliveryException.class)
+ public void shouldThrowExceptionOnExecuteFailed() throws Exception {
+ ListenableActionFuture action =
+ (ListenableActionFuture) mock(ListenableActionFuture.class);
+ BulkResponse response = mock(BulkResponse.class);
+ when(bulkRequestBuilder.execute()).thenReturn(action);
+ when(action.actionGet()).thenReturn(response);
+ when(response.hasFailures()).thenReturn(true);
+
+ fixture.addEvent(event, nameBuilder, "bar_type", 10);
+ fixture.execute();
+ }
+}
diff --git a/code/flume-ng-sinks/flume-ng-elasticsearch-sink/src/test/resources/log4j.properties b/code/flume-ng-sinks/flume-ng-elasticsearch-sink/src/test/resources/log4j.properties
new file mode 100644
index 0000000..9036aca
--- /dev/null
+++ b/code/flume-ng-sinks/flume-ng-elasticsearch-sink/src/test/resources/log4j.properties
@@ -0,0 +1,25 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+
+log4j.rootLogger = DEBUG, out
+
+log4j.appender.out = org.apache.log4j.ConsoleAppender
+log4j.appender.out.layout = org.apache.log4j.PatternLayout
+log4j.appender.out.layout.ConversionPattern = %d (%t) [%p - %l] %m%n
+
+log4j.logger.org.apache.flume = DEBUG
diff --git a/code/flume-ng-sinks/flume-ng-hbase-sink/.idea/artifacts/flume_ng_hbase_sink_jar.xml b/code/flume-ng-sinks/flume-ng-hbase-sink/.idea/artifacts/flume_ng_hbase_sink_jar.xml
new file mode 100644
index 0000000..f3e9b44
--- /dev/null
+++ b/code/flume-ng-sinks/flume-ng-hbase-sink/.idea/artifacts/flume_ng_hbase_sink_jar.xml
@@ -0,0 +1,8 @@
+
+
+ $PROJECT_DIR$/out/artifacts/flume_ng_hbase_sink_jar
+
+
+
+
+
\ No newline at end of file
diff --git a/code/flume-ng-sinks/flume-ng-hbase-sink/.idea/compiler.xml b/code/flume-ng-sinks/flume-ng-hbase-sink/.idea/compiler.xml
new file mode 100644
index 0000000..6e72b1f
--- /dev/null
+++ b/code/flume-ng-sinks/flume-ng-hbase-sink/.idea/compiler.xml
@@ -0,0 +1,13 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/code/flume-ng-sinks/flume-ng-hbase-sink/.idea/encodings.xml b/code/flume-ng-sinks/flume-ng-hbase-sink/.idea/encodings.xml
new file mode 100644
index 0000000..b26911b
--- /dev/null
+++ b/code/flume-ng-sinks/flume-ng-hbase-sink/.idea/encodings.xml
@@ -0,0 +1,6 @@
+
+
+
+
+
+
\ No newline at end of file
diff --git a/code/flume-ng-sinks/flume-ng-hbase-sink/.idea/misc.xml b/code/flume-ng-sinks/flume-ng-hbase-sink/.idea/misc.xml
new file mode 100644
index 0000000..4b661a5
--- /dev/null
+++ b/code/flume-ng-sinks/flume-ng-hbase-sink/.idea/misc.xml
@@ -0,0 +1,14 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/code/flume-ng-sinks/flume-ng-hbase-sink/.idea/workspace.xml b/code/flume-ng-sinks/flume-ng-hbase-sink/.idea/workspace.xml
new file mode 100644
index 0000000..dd63465
--- /dev/null
+++ b/code/flume-ng-sinks/flume-ng-hbase-sink/.idea/workspace.xml
@@ -0,0 +1,435 @@
+
+
+
+
+
+
+
+
+
+
+