org.apache.httpcomponents
diff --git a/core/src/main/java/com/alibaba/datax/core/Engine.java b/core/src/main/java/com/alibaba/datax/core/Engine.java
index 38342532..4ba9fc18 100755
--- a/core/src/main/java/com/alibaba/datax/core/Engine.java
+++ b/core/src/main/java/com/alibaba/datax/core/Engine.java
@@ -79,16 +79,9 @@ public class Engine {
perfReportEnable = false;
}
- int priority = 0;
- try {
- priority = Integer.parseInt(System.getenv("SKYNET_PRIORITY"));
- }catch (NumberFormatException e){
- LOG.warn("prioriy set to 0, because NumberFormatException, the value is: "+System.getProperty("PROIORY"));
- }
-
Configuration jobInfoConfig = allConf.getConfiguration(CoreConstant.DATAX_JOB_JOBINFO);
//初始化PerfTrace
- PerfTrace perfTrace = PerfTrace.getInstance(isJob, instanceId, taskGroupId, priority, traceEnable);
+ PerfTrace perfTrace = PerfTrace.getInstance(isJob, instanceId, taskGroupId, traceEnable);
perfTrace.setJobInfo(jobInfoConfig,perfReportEnable,channelNumber);
container.start();
diff --git a/core/src/main/java/com/alibaba/datax/core/container/util/JobAssignUtil.java b/core/src/main/java/com/alibaba/datax/core/container/util/JobAssignUtil.java
index 31ba60a4..cbd0d2a1 100755
--- a/core/src/main/java/com/alibaba/datax/core/container/util/JobAssignUtil.java
+++ b/core/src/main/java/com/alibaba/datax/core/container/util/JobAssignUtil.java
@@ -114,7 +114,7 @@ public final class JobAssignUtil {
* 需要实现的效果通过例子来说是:
*
* a 库上有表:0, 1, 2
- * a 库上有表:3, 4
+ * b 库上有表:3, 4
* c 库上有表:5, 6, 7
*
* 如果有 4个 taskGroup
diff --git a/core/src/main/java/com/alibaba/datax/core/job/JobContainer.java b/core/src/main/java/com/alibaba/datax/core/job/JobContainer.java
index 26b2989f..49f5a0a1 100755
--- a/core/src/main/java/com/alibaba/datax/core/job/JobContainer.java
+++ b/core/src/main/java/com/alibaba/datax/core/job/JobContainer.java
@@ -27,7 +27,7 @@ import com.alibaba.datax.core.util.container.ClassLoaderSwapper;
import com.alibaba.datax.core.util.container.CoreConstant;
import com.alibaba.datax.core.util.container.LoadUtil;
import com.alibaba.datax.dataxservice.face.domain.enums.ExecuteMode;
-import com.alibaba.fastjson.JSON;
+import com.alibaba.fastjson2.JSON;
import org.apache.commons.lang.StringUtils;
import org.apache.commons.lang.Validate;
import org.slf4j.Logger;
diff --git a/core/src/main/java/com/alibaba/datax/core/statistics/communication/CommunicationTool.java b/core/src/main/java/com/alibaba/datax/core/statistics/communication/CommunicationTool.java
index 51a601ae..1815ea02 100755
--- a/core/src/main/java/com/alibaba/datax/core/statistics/communication/CommunicationTool.java
+++ b/core/src/main/java/com/alibaba/datax/core/statistics/communication/CommunicationTool.java
@@ -2,7 +2,7 @@ package com.alibaba.datax.core.statistics.communication;
import com.alibaba.datax.common.statistics.PerfTrace;
import com.alibaba.datax.common.util.StrUtil;
-import com.alibaba.fastjson.JSON;
+import com.alibaba.fastjson2.JSON;
import org.apache.commons.lang.Validate;
import java.text.DecimalFormat;
diff --git a/core/src/main/java/com/alibaba/datax/core/statistics/plugin/task/StdoutPluginCollector.java b/core/src/main/java/com/alibaba/datax/core/statistics/plugin/task/StdoutPluginCollector.java
index 8b2a8378..d88ad0a8 100755
--- a/core/src/main/java/com/alibaba/datax/core/statistics/plugin/task/StdoutPluginCollector.java
+++ b/core/src/main/java/com/alibaba/datax/core/statistics/plugin/task/StdoutPluginCollector.java
@@ -6,7 +6,7 @@ import com.alibaba.datax.common.util.Configuration;
import com.alibaba.datax.core.statistics.communication.Communication;
import com.alibaba.datax.core.util.container.CoreConstant;
import com.alibaba.datax.core.statistics.plugin.task.util.DirtyRecord;
-import com.alibaba.fastjson.JSON;
+import com.alibaba.fastjson2.JSON;
import org.apache.commons.lang3.StringUtils;
import org.slf4j.Logger;
diff --git a/core/src/main/java/com/alibaba/datax/core/statistics/plugin/task/util/DirtyRecord.java b/core/src/main/java/com/alibaba/datax/core/statistics/plugin/task/util/DirtyRecord.java
index 1b0d5238..caa4cb5b 100755
--- a/core/src/main/java/com/alibaba/datax/core/statistics/plugin/task/util/DirtyRecord.java
+++ b/core/src/main/java/com/alibaba/datax/core/statistics/plugin/task/util/DirtyRecord.java
@@ -4,7 +4,7 @@ import com.alibaba.datax.common.element.Column;
import com.alibaba.datax.common.element.Record;
import com.alibaba.datax.common.exception.DataXException;
import com.alibaba.datax.core.util.FrameworkErrorCode;
-import com.alibaba.fastjson.JSON;
+import com.alibaba.fastjson2.JSON;
import java.math.BigDecimal;
import java.math.BigInteger;
diff --git a/core/src/main/java/com/alibaba/datax/core/taskgroup/TaskGroupContainer.java b/core/src/main/java/com/alibaba/datax/core/taskgroup/TaskGroupContainer.java
index c30c94d9..b4b45695 100755
--- a/core/src/main/java/com/alibaba/datax/core/taskgroup/TaskGroupContainer.java
+++ b/core/src/main/java/com/alibaba/datax/core/taskgroup/TaskGroupContainer.java
@@ -27,7 +27,7 @@ import com.alibaba.datax.core.util.TransformerUtil;
import com.alibaba.datax.core.util.container.CoreConstant;
import com.alibaba.datax.core.util.container.LoadUtil;
import com.alibaba.datax.dataxservice.face.domain.enums.State;
-import com.alibaba.fastjson.JSON;
+import com.alibaba.fastjson2.JSON;
import org.apache.commons.lang3.Validate;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
diff --git a/core/src/main/java/com/alibaba/datax/core/transport/channel/memory/MemoryChannel.java b/core/src/main/java/com/alibaba/datax/core/transport/channel/memory/MemoryChannel.java
index e49c7878..5bce085f 100755
--- a/core/src/main/java/com/alibaba/datax/core/transport/channel/memory/MemoryChannel.java
+++ b/core/src/main/java/com/alibaba/datax/core/transport/channel/memory/MemoryChannel.java
@@ -29,7 +29,7 @@ public class MemoryChannel extends Channel {
private ReentrantLock lock;
- private Condition notInsufficient, notEmpty;
+ private Condition notSufficient, notEmpty;
public MemoryChannel(final Configuration configuration) {
super(configuration);
@@ -37,7 +37,7 @@ public class MemoryChannel extends Channel {
this.bufferSize = configuration.getInt(CoreConstant.DATAX_CORE_TRANSPORT_EXCHANGER_BUFFERSIZE);
lock = new ReentrantLock();
- notInsufficient = lock.newCondition();
+ notSufficient = lock.newCondition();
notEmpty = lock.newCondition();
}
@@ -75,7 +75,7 @@ public class MemoryChannel extends Channel {
lock.lockInterruptibly();
int bytes = getRecordBytes(rs);
while (memoryBytes.get() + bytes > this.byteCapacity || rs.size() > this.queue.remainingCapacity()) {
- notInsufficient.await(200L, TimeUnit.MILLISECONDS);
+ notSufficient.await(200L, TimeUnit.MILLISECONDS);
}
this.queue.addAll(rs);
waitWriterTime += System.nanoTime() - startTime;
@@ -116,7 +116,7 @@ public class MemoryChannel extends Channel {
waitReaderTime += System.nanoTime() - startTime;
int bytes = getRecordBytes(rs);
memoryBytes.addAndGet(-bytes);
- notInsufficient.signalAll();
+ notSufficient.signalAll();
} catch (InterruptedException e) {
throw DataXException.asDataXException(
FrameworkErrorCode.RUNTIME_ERROR, e);
diff --git a/core/src/main/java/com/alibaba/datax/core/transport/record/DefaultRecord.java b/core/src/main/java/com/alibaba/datax/core/transport/record/DefaultRecord.java
index c78a2a87..1dfa02e8 100755
--- a/core/src/main/java/com/alibaba/datax/core/transport/record/DefaultRecord.java
+++ b/core/src/main/java/com/alibaba/datax/core/transport/record/DefaultRecord.java
@@ -5,7 +5,7 @@ import com.alibaba.datax.common.element.Record;
import com.alibaba.datax.common.exception.DataXException;
import com.alibaba.datax.core.util.ClassSize;
import com.alibaba.datax.core.util.FrameworkErrorCode;
-import com.alibaba.fastjson.JSON;
+import com.alibaba.fastjson2.JSON;
import java.util.ArrayList;
import java.util.HashMap;
diff --git a/core/src/main/java/com/alibaba/datax/core/transport/transformer/DigestTransformer.java b/core/src/main/java/com/alibaba/datax/core/transport/transformer/DigestTransformer.java
new file mode 100644
index 00000000..d2bf1431
--- /dev/null
+++ b/core/src/main/java/com/alibaba/datax/core/transport/transformer/DigestTransformer.java
@@ -0,0 +1,87 @@
+package com.alibaba.datax.core.transport.transformer;
+
+import com.alibaba.datax.common.element.Column;
+import com.alibaba.datax.common.element.Record;
+import com.alibaba.datax.common.element.StringColumn;
+import com.alibaba.datax.common.exception.DataXException;
+import com.alibaba.datax.transformer.Transformer;
+
+import org.apache.commons.codec.digest.DigestUtils;
+import org.apache.commons.lang.StringUtils;
+
+import java.util.Arrays;
+
+/**
+ * no comments.
+ *
+ * @author XuDaojie
+ * @since 2021-08-16
+ */
+public class DigestTransformer extends Transformer {
+
+ private static final String MD5 = "md5";
+ private static final String SHA1 = "sha1";
+ private static final String TO_UPPER_CASE = "toUpperCase";
+ private static final String TO_LOWER_CASE = "toLowerCase";
+
+ public DigestTransformer() {
+ setTransformerName("dx_digest");
+ }
+
+ @Override
+ public Record evaluate(Record record, Object... paras) {
+
+ int columnIndex;
+ String type;
+ String charType;
+
+ try {
+ if (paras.length != 3) {
+ throw new RuntimeException("dx_digest paras length must be 3");
+ }
+
+ columnIndex = (Integer) paras[0];
+ type = (String) paras[1];
+ charType = (String) paras[2];
+
+ if (!StringUtils.equalsIgnoreCase(MD5, type) && !StringUtils.equalsIgnoreCase(SHA1, type)) {
+ throw new RuntimeException("dx_digest paras index 1 must be md5 or sha1");
+ }
+ if (!StringUtils.equalsIgnoreCase(TO_UPPER_CASE, charType) && !StringUtils.equalsIgnoreCase(TO_LOWER_CASE, charType)) {
+ throw new RuntimeException("dx_digest paras index 2 must be toUpperCase or toLowerCase");
+ }
+ } catch (Exception e) {
+ throw DataXException.asDataXException(TransformerErrorCode.TRANSFORMER_ILLEGAL_PARAMETER, "paras:" + Arrays.asList(paras) + " => " + e.getMessage());
+ }
+
+ Column column = record.getColumn(columnIndex);
+
+ try {
+ String oriValue = column.asString();
+
+ // 如果字段为空,作为空字符串处理
+ if (oriValue == null) {
+ oriValue = "";
+ }
+ String newValue;
+ if (MD5.equals(type)) {
+ newValue = DigestUtils.md5Hex(oriValue);
+ } else {
+ newValue = DigestUtils.sha1Hex(oriValue);
+ }
+
+ if (TO_UPPER_CASE.equals(charType)) {
+ newValue = newValue.toUpperCase();
+ } else {
+ newValue = newValue.toLowerCase();
+ }
+
+ record.setColumn(columnIndex, new StringColumn(newValue));
+
+ } catch (Exception e) {
+ throw DataXException.asDataXException(TransformerErrorCode.TRANSFORMER_RUN_EXCEPTION, e.getMessage(), e);
+ }
+ return record;
+ }
+
+}
diff --git a/core/src/main/java/com/alibaba/datax/core/transport/transformer/FilterTransformer.java b/core/src/main/java/com/alibaba/datax/core/transport/transformer/FilterTransformer.java
index 8f6492fa..a3251715 100644
--- a/core/src/main/java/com/alibaba/datax/core/transport/transformer/FilterTransformer.java
+++ b/core/src/main/java/com/alibaba/datax/core/transport/transformer/FilterTransformer.java
@@ -61,7 +61,7 @@ public class FilterTransformer extends Transformer {
} else if (code.equalsIgnoreCase("<=")) {
return doLess(record, value, column, true);
} else {
- throw new RuntimeException("dx_filter can't suport code:" + code);
+ throw new RuntimeException("dx_filter can't support code:" + code);
}
} catch (Exception e) {
throw DataXException.asDataXException(TransformerErrorCode.TRANSFORMER_RUN_EXCEPTION, e.getMessage(), e);
diff --git a/core/src/main/java/com/alibaba/datax/core/transport/transformer/GroovyTransformerStaticUtil.java b/core/src/main/java/com/alibaba/datax/core/transport/transformer/GroovyTransformerStaticUtil.java
index 4c872993..487a8be8 100644
--- a/core/src/main/java/com/alibaba/datax/core/transport/transformer/GroovyTransformerStaticUtil.java
+++ b/core/src/main/java/com/alibaba/datax/core/transport/transformer/GroovyTransformerStaticUtil.java
@@ -1,10 +1,18 @@
package com.alibaba.datax.core.transport.transformer;
+import org.apache.commons.codec.digest.DigestUtils;
+
/**
* GroovyTransformer的帮助类,供groovy代码使用,必须全是static的方法
* Created by liqiang on 16/3/4.
*/
public class GroovyTransformerStaticUtil {
+ public static String md5(final String data) {
+ return DigestUtils.md5Hex(data);
+ }
+ public static String sha1(final String data) {
+ return DigestUtils.sha1Hex(data);
+ }
}
diff --git a/core/src/main/java/com/alibaba/datax/core/transport/transformer/TransformerRegistry.java b/core/src/main/java/com/alibaba/datax/core/transport/transformer/TransformerRegistry.java
index 96a0d988..3c625153 100644
--- a/core/src/main/java/com/alibaba/datax/core/transport/transformer/TransformerRegistry.java
+++ b/core/src/main/java/com/alibaba/datax/core/transport/transformer/TransformerRegistry.java
@@ -36,6 +36,7 @@ public class TransformerRegistry {
registTransformer(new ReplaceTransformer());
registTransformer(new FilterTransformer());
registTransformer(new GroovyTransformer());
+ registTransformer(new DigestTransformer());
}
public static void loadTransformerFromLocalStorage() {
diff --git a/core/src/main/java/com/alibaba/datax/core/util/ConfigParser.java b/core/src/main/java/com/alibaba/datax/core/util/ConfigParser.java
index 20039864..24f43d55 100755
--- a/core/src/main/java/com/alibaba/datax/core/util/ConfigParser.java
+++ b/core/src/main/java/com/alibaba/datax/core/util/ConfigParser.java
@@ -168,6 +168,7 @@ public final class ConfigParser {
boolean isDefaultPath = StringUtils.isBlank(pluginPath);
if (isDefaultPath) {
configuration.set("path", path);
+ configuration.set("loadType","jarLoader");
}
Configuration result = Configuration.newDefault();
diff --git a/core/src/main/java/com/alibaba/datax/core/util/container/CoreConstant.java b/core/src/main/java/com/alibaba/datax/core/util/container/CoreConstant.java
index 6a0b6205..a1ca164d 100755
--- a/core/src/main/java/com/alibaba/datax/core/util/container/CoreConstant.java
+++ b/core/src/main/java/com/alibaba/datax/core/util/container/CoreConstant.java
@@ -105,7 +105,7 @@ public class CoreConstant {
public static final String DATAX_JOB_POSTHANDLER_PLUGINNAME = "job.postHandler.pluginName";
// ----------------------------- 局部使用的变量
- public static final String JOB_WRITER = "reader";
+ public static final String JOB_WRITER = "writer";
public static final String JOB_READER = "reader";
diff --git a/core/src/main/java/com/alibaba/datax/core/util/container/JarLoader.java b/core/src/main/java/com/alibaba/datax/core/util/container/JarLoader.java
index 9fc113dc..ddf22bae 100755
--- a/core/src/main/java/com/alibaba/datax/core/util/container/JarLoader.java
+++ b/core/src/main/java/com/alibaba/datax/core/util/container/JarLoader.java
@@ -15,7 +15,7 @@ import java.util.List;
/**
* 提供Jar隔离的加载机制,会把传入的路径、及其子路径、以及路径中的jar文件加入到class path。
*/
-public class JarLoader extends URLClassLoader {
+public class JarLoader extends URLClassLoader{
public JarLoader(String[] paths) {
this(paths, JarLoader.class.getClassLoader());
}
diff --git a/core/src/main/java/com/alibaba/datax/core/util/container/LoadUtil.java b/core/src/main/java/com/alibaba/datax/core/util/container/LoadUtil.java
index 30e926c3..9a6a8302 100755
--- a/core/src/main/java/com/alibaba/datax/core/util/container/LoadUtil.java
+++ b/core/src/main/java/com/alibaba/datax/core/util/container/LoadUtil.java
@@ -49,7 +49,7 @@ public class LoadUtil {
/**
* jarLoader的缓冲
*/
- private static Map jarLoaderCenter = new HashMap();
+ private static Map jarLoaderCenter = new HashMap();
/**
* 设置pluginConfigs,方便后面插件来获取
diff --git a/core/src/main/job/job.json b/core/src/main/job/job.json
index 58206592..cc353877 100755
--- a/core/src/main/job/job.json
+++ b/core/src/main/job/job.json
@@ -2,7 +2,7 @@
"job": {
"setting": {
"speed": {
- "byte":10485760
+ "channel":1
},
"errorLimit": {
"record": 0,
diff --git a/databendwriter/doc/databendwriter-CN.md b/databendwriter/doc/databendwriter-CN.md
new file mode 100644
index 00000000..5b26ed7e
--- /dev/null
+++ b/databendwriter/doc/databendwriter-CN.md
@@ -0,0 +1,183 @@
+# DataX DatabendWriter
+[简体中文](./databendwriter-CN.md) | [English](./databendwriter.md)
+
+## 1 快速介绍
+
+Databend Writer 是一个 DataX 的插件,用于从 DataX 中写入数据到 Databend 表中。
+该插件基于[databend JDBC driver](https://github.com/databendcloud/databend-jdbc) ,它使用 [RESTful http protocol](https://databend.rs/doc/integrations/api/rest)
+在开源的 databend 和 [databend cloud](https://app.databend.com/) 上执行查询。
+
+在每个写入批次中,databend writer 将批量数据上传到内部的 S3 stage,然后执行相应的 insert SQL 将数据上传到 databend 表中。
+
+为了最佳的用户体验,如果您使用的是 databend 社区版本,您应该尝试采用 [S3](https://aws.amazon.com/s3/)/[minio](https://min.io/)/[OSS](https://www.alibabacloud.com/product/object-storage-service) 作为其底层存储层,因为
+它们支持预签名上传操作,否则您可能会在数据传输上浪费不必要的成本。
+
+您可以在[文档](https://databend.rs/doc/deploy/deploying-databend)中了解更多详细信息
+
+## 2 实现原理
+
+Databend Writer 将使用 DataX 从 DataX Reader 中获取生成的记录,并将记录批量插入到 databend 表中指定的列中。
+
+## 3 功能说明
+
+### 3.1 配置样例
+
+* 以下配置将从内存中读取一些生成的数据,并将数据上传到databend表中
+
+#### 准备工作
+```sql
+--- create table in databend
+drop table if exists datax.sample1;
+drop database if exists datax;
+create database if not exists datax;
+create table if not exsits datax.sample1(a string, b int64, c date, d timestamp, e bool, f string, g variant);
+```
+
+#### 配置样例
+```json
+{
+ "job": {
+ "content": [
+ {
+ "reader": {
+ "name": "streamreader",
+ "parameter": {
+ "column" : [
+ {
+ "value": "DataX",
+ "type": "string"
+ },
+ {
+ "value": 19880808,
+ "type": "long"
+ },
+ {
+ "value": "1926-08-08 08:08:08",
+ "type": "date"
+ },
+ {
+ "value": "1988-08-08 08:08:08",
+ "type": "date"
+ },
+ {
+ "value": true,
+ "type": "bool"
+ },
+ {
+ "value": "test",
+ "type": "bytes"
+ },
+ {
+ "value": "{\"type\": \"variant\", \"value\": \"test\"}",
+ "type": "string"
+ }
+
+ ],
+ "sliceRecordCount": 10000
+ }
+ },
+ "writer": {
+ "name": "databendwriter",
+ "parameter": {
+ "writeMode": "replace",
+ "onConflictColumn": ["id"],
+ "username": "databend",
+ "password": "databend",
+ "column": ["a", "b", "c", "d", "e", "f", "g"],
+ "batchSize": 1000,
+ "preSql": [
+ ],
+ "postSql": [
+ ],
+ "connection": [
+ {
+ "jdbcUrl": "jdbc:databend://localhost:8000/datax",
+ "table": [
+ "sample1"
+ ]
+ }
+ ]
+ }
+ }
+ }
+ ],
+ "setting": {
+ "speed": {
+ "channel": 1
+ }
+ }
+ }
+}
+```
+
+### 3.2 参数说明
+* jdbcUrl
+ * 描述: JDBC 数据源 url。请参阅仓库中的详细[文档](https://github.com/databendcloud/databend-jdbc)
+ * 必选: 是
+ * 默认值: 无
+ * 示例: jdbc:databend://localhost:8000/datax
+* username
+ * 描述: JDBC 数据源用户名
+ * 必选: 是
+ * 默认值: 无
+ * 示例: databend
+* password
+ * 描述: JDBC 数据源密码
+ * 必选: 是
+ * 默认值: 无
+ * 示例: databend
+* table
+ * 描述: 表名的集合,table应该包含column参数中的所有列。
+ * 必选: 是
+ * 默认值: 无
+ * 示例: ["sample1"]
+* column
+ * 描述: 表中的列名集合,字段顺序应该与reader的record中的column类型对应
+ * 必选: 是
+ * 默认值: 无
+ * 示例: ["a", "b", "c", "d", "e", "f", "g"]
+* batchSize
+ * 描述: 每个批次的记录数
+ * 必选: 否
+ * 默认值: 1000
+ * 示例: 1000
+* preSql
+ * 描述: 在写入数据之前执行的SQL语句
+ * 必选: 否
+ * 默认值: 无
+ * 示例: ["delete from datax.sample1"]
+* postSql
+ * 描述: 在写入数据之后执行的SQL语句
+ * 必选: 否
+ * 默认值: 无
+ * 示例: ["select count(*) from datax.sample1"]
+* writeMode
+ * 描述:写入模式,支持 insert 和 replace 两种模式,默认为 insert。若为 replace,务必填写 onConflictColumn 参数
+ * 必选:否
+ * 默认值:insert
+ * 示例:"replace"
+* onConflictColumn
+ * 描述:on conflict 字段,指定 writeMode 为 replace 后,需要此参数
+ * 必选:否
+ * 默认值:无
+ * 示例:["id","user"]
+
+### 3.3 类型转化
+DataX中的数据类型可以转换为databend中的相应数据类型。下表显示了两种类型之间的对应关系。
+
+| DataX 内部类型 | Databend 数据类型 |
+|------------|-----------------------------------------------------------|
+| INT | TINYINT, INT8, SMALLINT, INT16, INT, INT32, BIGINT, INT64 |
+| LONG | TINYINT, INT8, SMALLINT, INT16, INT, INT32, BIGINT, INT64 |
+| STRING | STRING, VARCHAR |
+| DOUBLE | FLOAT, DOUBLE |
+| BOOL | BOOLEAN, BOOL |
+| DATE | DATE, TIMESTAMP |
+| BYTES | STRING, VARCHAR |
+
+## 4 性能测试
+
+## 5 约束限制
+目前,复杂数据类型支持不稳定,如果您想使用复杂数据类型,例如元组,数组,请检查databend和jdbc驱动程序的进一步版本。
+
+## FAQ
\ No newline at end of file
diff --git a/databendwriter/doc/databendwriter.md b/databendwriter/doc/databendwriter.md
new file mode 100644
index 00000000..c92d6387
--- /dev/null
+++ b/databendwriter/doc/databendwriter.md
@@ -0,0 +1,176 @@
+# DataX DatabendWriter
+[简体中文](./databendwriter-CN.md) | [English](./databendwriter.md)
+
+## 1 Introduction
+Databend Writer is a plugin for DataX to write data to Databend Table from dataX records.
+The plugin is based on [databend JDBC driver](https://github.com/databendcloud/databend-jdbc) which use [RESTful http protocol](https://databend.rs/doc/integrations/api/rest)
+to execute query on open source databend and [databend cloud](https://app.databend.com/).
+
+During each write batch, databend writer will upload batch data into internal S3 stage and execute corresponding insert SQL to upload data into databend table.
+
+For best user experience, if you are using databend community distribution, you should try to adopt [S3](https://aws.amazon.com/s3/)/[minio](https://min.io/)/[OSS](https://www.alibabacloud.com/product/object-storage-service) as its underlying storage layer since
+they support presign upload operation otherwise you may expend unneeded cost on data transfer.
+
+You could see more details on the [doc](https://databend.rs/doc/deploy/deploying-databend)
+
+## 2 Detailed Implementation
+Databend Writer would use DataX to fetch records generated by DataX Reader, and then batch insert records to the designated columns for your databend table.
+
+## 3 Features
+### 3.1 Example Configurations
+* the following configuration would read some generated data in memory and upload data into databend table
+
+#### Preparation
+```sql
+--- create table in databend
+drop table if exists datax.sample1;
+drop database if exists datax;
+create database if not exists datax;
+create table if not exsits datax.sample1(a string, b int64, c date, d timestamp, e bool, f string, g variant);
+```
+
+#### Configurations
+```json
+{
+ "job": {
+ "content": [
+ {
+ "reader": {
+ "name": "streamreader",
+ "parameter": {
+ "column" : [
+ {
+ "value": "DataX",
+ "type": "string"
+ },
+ {
+ "value": 19880808,
+ "type": "long"
+ },
+ {
+ "value": "1926-08-08 08:08:08",
+ "type": "date"
+ },
+ {
+ "value": "1988-08-08 08:08:08",
+ "type": "date"
+ },
+ {
+ "value": true,
+ "type": "bool"
+ },
+ {
+ "value": "test",
+ "type": "bytes"
+ },
+ {
+ "value": "{\"type\": \"variant\", \"value\": \"test\"}",
+ "type": "string"
+ }
+
+ ],
+ "sliceRecordCount": 10000
+ }
+ },
+ "writer": {
+ "name": "databendwriter",
+ "parameter": {
+ "username": "databend",
+ "password": "databend",
+ "column": ["a", "b", "c", "d", "e", "f", "g"],
+ "batchSize": 1000,
+ "preSql": [
+ ],
+ "postSql": [
+ ],
+ "connection": [
+ {
+ "jdbcUrl": "jdbc:databend://localhost:8000/datax",
+ "table": [
+ "sample1"
+ ]
+ }
+ ]
+ }
+ }
+ }
+ ],
+ "setting": {
+ "speed": {
+ "channel": 1
+ }
+ }
+ }
+}
+```
+
+### 3.2 Configuration Description
+* jdbcUrl
+ * Description: JDBC Data source url in Databend. Please take a look at repository for detailed [doc](https://github.com/databendcloud/databend-jdbc)
+ * Required: yes
+ * Default: none
+ * Example: jdbc:databend://localhost:8000/datax
+* username
+ * Description: Databend user name
+ * Required: yes
+ * Default: none
+ * Example: databend
+* password
+ * Description: Databend user password
+ * Required: yes
+ * Default: none
+ * Example: databend
+* table
+ * Description: A list of table names that should contain all of the columns in the column parameter.
+ * Required: yes
+ * Default: none
+ * Example: ["sample1"]
+* column
+ * Description: A list of column field names that should be inserted into the table. if you want to insert all column fields use `["*"]` instead.
+ * Required: yes
+ * Default: none
+ * Example: ["a", "b", "c", "d", "e", "f", "g"]
+* batchSize
+ * Description: The number of records to be inserted in each batch.
+ * Required: no
+ * Default: 1024
+* preSql
+ * Description: A list of SQL statements that will be executed before the write operation.
+ * Required: no
+ * Default: none
+* postSql
+ * Description: A list of SQL statements that will be executed after the write operation.
+ * Required: no
+ * Default: none
+* writeMode
+ * Description:The write mode, support `insert` and `replace` two mode.
+ * Required:no
+ * Default:insert
+ * Example:"replace"
+* onConflictColumn
+ * Description:On conflict fields list.
+ * Required:no
+ * Default:none
+ * Example:["id","user"]
+
+### 3.3 Type Convert
+Data types in datax can be converted to the corresponding data types in databend. The following table shows the correspondence between the two types.
+
+| DataX Type | Databend Type |
+|------------|-----------------------------------------------------------|
+| INT | TINYINT, INT8, SMALLINT, INT16, INT, INT32, BIGINT, INT64 |
+| LONG | TINYINT, INT8, SMALLINT, INT16, INT, INT32, BIGINT, INT64 |
+| STRING | STRING, VARCHAR |
+| DOUBLE | FLOAT, DOUBLE |
+| BOOL | BOOLEAN, BOOL |
+| DATE | DATE, TIMESTAMP |
+| BYTES | STRING, VARCHAR |
+
+
+## 4 Performance Test
+
+
+## 5 Restrictions
+Currently, complex data type support is not stable, if you want to use complex data type such as tuple, array, please check further release version of databend and jdbc driver.
+
+## FAQ
diff --git a/databendwriter/pom.xml b/databendwriter/pom.xml
new file mode 100644
index 00000000..b99ca5d8
--- /dev/null
+++ b/databendwriter/pom.xml
@@ -0,0 +1,101 @@
+
+
+
+ datax-all
+ com.alibaba.datax
+ 0.0.1-SNAPSHOT
+
+
+ 4.0.0
+ databendwriter
+ databendwriter
+ jar
+
+
+
+ com.databend
+ databend-jdbc
+ 0.1.0
+
+
+ com.alibaba.datax
+ datax-core
+ ${datax-project-version}
+
+
+ com.alibaba.datax
+ datax-common
+ ${datax-project-version}
+
+
+ org.slf4j
+ slf4j-api
+
+
+
+ ch.qos.logback
+ logback-classic
+
+
+
+ com.alibaba.datax
+ plugin-rdbms-util
+ ${datax-project-version}
+
+
+ com.google.guava
+ guava
+
+
+
+
+
+
+ junit
+ junit
+ test
+
+
+
+
+
+ src/main/java
+
+ **/*.properties
+
+
+
+
+
+
+ maven-compiler-plugin
+
+ ${jdk-version}
+ ${jdk-version}
+ ${project-sourceEncoding}
+
+
+
+
+ maven-assembly-plugin
+
+
+ src/main/assembly/package.xml
+
+ datax
+
+
+
+ dwzip
+ package
+
+ single
+
+
+
+
+
+
+
diff --git a/databendwriter/src/main/assembly/package.xml b/databendwriter/src/main/assembly/package.xml
new file mode 100755
index 00000000..8a9ba1b2
--- /dev/null
+++ b/databendwriter/src/main/assembly/package.xml
@@ -0,0 +1,34 @@
+
+
+
+ dir
+
+ false
+
+
+ src/main/resources
+
+ plugin.json
+ plugin_job_template.json
+
+ plugin/writer/databendwriter
+
+
+ target/
+
+ databendwriter-0.0.1-SNAPSHOT.jar
+
+ plugin/writer/databendwriter
+
+
+
+
+
+ false
+ plugin/writer/databendwriter/libs
+
+
+
diff --git a/databendwriter/src/main/java/com/alibaba/datax/plugin/writer/databendwriter/DatabendWriter.java b/databendwriter/src/main/java/com/alibaba/datax/plugin/writer/databendwriter/DatabendWriter.java
new file mode 100644
index 00000000..ddb8fc9a
--- /dev/null
+++ b/databendwriter/src/main/java/com/alibaba/datax/plugin/writer/databendwriter/DatabendWriter.java
@@ -0,0 +1,241 @@
+package com.alibaba.datax.plugin.writer.databendwriter;
+
+import com.alibaba.datax.common.element.Column;
+import com.alibaba.datax.common.element.StringColumn;
+import com.alibaba.datax.common.exception.CommonErrorCode;
+import com.alibaba.datax.common.exception.DataXException;
+import com.alibaba.datax.common.plugin.RecordReceiver;
+import com.alibaba.datax.common.spi.Writer;
+import com.alibaba.datax.common.util.Configuration;
+import com.alibaba.datax.plugin.rdbms.util.DataBaseType;
+import com.alibaba.datax.plugin.rdbms.writer.CommonRdbmsWriter;
+import com.alibaba.datax.plugin.writer.databendwriter.util.DatabendWriterUtil;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.sql.*;
+import java.util.List;
+import java.util.regex.Pattern;
+
+public class DatabendWriter extends Writer {
+ private static final DataBaseType DATABASE_TYPE = DataBaseType.Databend;
+
+ public static class Job
+ extends Writer.Job {
+ private static final Logger LOG = LoggerFactory.getLogger(Job.class);
+ private Configuration originalConfig;
+ private CommonRdbmsWriter.Job commonRdbmsWriterMaster;
+
+ @Override
+ public void init() throws DataXException {
+ this.originalConfig = super.getPluginJobConf();
+ this.commonRdbmsWriterMaster = new CommonRdbmsWriter.Job(DATABASE_TYPE);
+ this.commonRdbmsWriterMaster.init(this.originalConfig);
+ // placeholder currently not supported by databend driver, needs special treatment
+ DatabendWriterUtil.dealWriteMode(this.originalConfig);
+ }
+
+ @Override
+ public void preCheck() {
+ this.init();
+ this.commonRdbmsWriterMaster.writerPreCheck(this.originalConfig, DATABASE_TYPE);
+ }
+
+ @Override
+ public void prepare() {
+ this.commonRdbmsWriterMaster.prepare(this.originalConfig);
+ }
+
+ @Override
+ public List split(int mandatoryNumber) {
+ return this.commonRdbmsWriterMaster.split(this.originalConfig, mandatoryNumber);
+ }
+
+ @Override
+ public void post() {
+ this.commonRdbmsWriterMaster.post(this.originalConfig);
+ }
+
+ @Override
+ public void destroy() {
+ this.commonRdbmsWriterMaster.destroy(this.originalConfig);
+ }
+ }
+
+
+ public static class Task extends Writer.Task {
+ private static final Logger LOG = LoggerFactory.getLogger(Task.class);
+
+ private Configuration writerSliceConfig;
+
+ private CommonRdbmsWriter.Task commonRdbmsWriterSlave;
+
+ @Override
+ public void init() {
+ this.writerSliceConfig = super.getPluginJobConf();
+
+ this.commonRdbmsWriterSlave = new CommonRdbmsWriter.Task(DataBaseType.Databend) {
+ @Override
+ protected PreparedStatement fillPreparedStatementColumnType(PreparedStatement preparedStatement, int columnIndex, int columnSqltype, String typeName, Column column) throws SQLException {
+ try {
+ if (column.getRawData() == null) {
+ preparedStatement.setNull(columnIndex + 1, columnSqltype);
+ return preparedStatement;
+ }
+
+ java.util.Date utilDate;
+ switch (columnSqltype) {
+
+ case Types.TINYINT:
+ case Types.SMALLINT:
+ case Types.INTEGER:
+ preparedStatement.setInt(columnIndex + 1, column.asBigInteger().intValue());
+ break;
+ case Types.BIGINT:
+ preparedStatement.setLong(columnIndex + 1, column.asLong());
+ break;
+ case Types.DECIMAL:
+ preparedStatement.setBigDecimal(columnIndex + 1, column.asBigDecimal());
+ break;
+ case Types.FLOAT:
+ case Types.REAL:
+ preparedStatement.setFloat(columnIndex + 1, column.asDouble().floatValue());
+ break;
+ case Types.DOUBLE:
+ preparedStatement.setDouble(columnIndex + 1, column.asDouble());
+ break;
+ case Types.DATE:
+ java.sql.Date sqlDate = null;
+ try {
+ utilDate = column.asDate();
+ } catch (DataXException e) {
+ throw new SQLException(String.format(
+ "Date type conversion error: [%s]", column));
+ }
+
+ if (null != utilDate) {
+ sqlDate = new java.sql.Date(utilDate.getTime());
+ }
+ preparedStatement.setDate(columnIndex + 1, sqlDate);
+ break;
+
+ case Types.TIME:
+ java.sql.Time sqlTime = null;
+ try {
+ utilDate = column.asDate();
+ } catch (DataXException e) {
+ throw new SQLException(String.format(
+ "Date type conversion error: [%s]", column));
+ }
+
+ if (null != utilDate) {
+ sqlTime = new java.sql.Time(utilDate.getTime());
+ }
+ preparedStatement.setTime(columnIndex + 1, sqlTime);
+ break;
+
+ case Types.TIMESTAMP:
+ Timestamp sqlTimestamp = null;
+ if (column instanceof StringColumn && column.asString() != null) {
+ String timeStampStr = column.asString();
+ // JAVA TIMESTAMP 类型入参必须是 "2017-07-12 14:39:00.123566" 格式
+ String pattern = "^\\d+-\\d+-\\d+ \\d+:\\d+:\\d+.\\d+";
+ boolean isMatch = Pattern.matches(pattern, timeStampStr);
+ if (isMatch) {
+ sqlTimestamp = Timestamp.valueOf(timeStampStr);
+ preparedStatement.setTimestamp(columnIndex + 1, sqlTimestamp);
+ break;
+ }
+ }
+ try {
+ utilDate = column.asDate();
+ } catch (DataXException e) {
+ throw new SQLException(String.format(
+ "Date type conversion error: [%s]", column));
+ }
+
+ if (null != utilDate) {
+ sqlTimestamp = new Timestamp(
+ utilDate.getTime());
+ }
+ preparedStatement.setTimestamp(columnIndex + 1, sqlTimestamp);
+ break;
+
+ case Types.BINARY:
+ case Types.VARBINARY:
+ case Types.BLOB:
+ case Types.LONGVARBINARY:
+ preparedStatement.setBytes(columnIndex + 1, column
+ .asBytes());
+ break;
+
+ case Types.BOOLEAN:
+
+ // warn: bit(1) -> Types.BIT 可使用setBoolean
+ // warn: bit(>1) -> Types.VARBINARY 可使用setBytes
+ case Types.BIT:
+ if (this.dataBaseType == DataBaseType.MySql) {
+ Boolean asBoolean = column.asBoolean();
+ if (asBoolean != null) {
+ preparedStatement.setBoolean(columnIndex + 1, asBoolean);
+ } else {
+ preparedStatement.setNull(columnIndex + 1, Types.BIT);
+ }
+ } else {
+ preparedStatement.setString(columnIndex + 1, column.asString());
+ }
+ break;
+
+ default:
+ // cast variant / array into string is fine.
+ preparedStatement.setString(columnIndex + 1, column.asString());
+ break;
+ }
+ return preparedStatement;
+ } catch (DataXException e) {
+ // fix类型转换或者溢出失败时,将具体哪一列打印出来
+ if (e.getErrorCode() == CommonErrorCode.CONVERT_NOT_SUPPORT ||
+ e.getErrorCode() == CommonErrorCode.CONVERT_OVER_FLOW) {
+ throw DataXException
+ .asDataXException(
+ e.getErrorCode(),
+ String.format(
+ "type conversion error. columnName: [%s], columnType:[%d], columnJavaType: [%s]. please change the data type in given column field or do not sync on the column.",
+ this.resultSetMetaData.getLeft()
+ .get(columnIndex),
+ this.resultSetMetaData.getMiddle()
+ .get(columnIndex),
+ this.resultSetMetaData.getRight()
+ .get(columnIndex)));
+ } else {
+ throw e;
+ }
+ }
+ }
+
+ };
+ this.commonRdbmsWriterSlave.init(this.writerSliceConfig);
+ }
+
+ @Override
+ public void destroy() {
+ this.commonRdbmsWriterSlave.destroy(this.writerSliceConfig);
+ }
+
+ @Override
+ public void prepare() {
+ this.commonRdbmsWriterSlave.prepare(this.writerSliceConfig);
+ }
+
+ @Override
+ public void post() {
+ this.commonRdbmsWriterSlave.post(this.writerSliceConfig);
+ }
+
+ @Override
+ public void startWrite(RecordReceiver lineReceiver) {
+ this.commonRdbmsWriterSlave.startWrite(lineReceiver, this.writerSliceConfig, this.getTaskPluginCollector());
+ }
+
+ }
+}
diff --git a/databendwriter/src/main/java/com/alibaba/datax/plugin/writer/databendwriter/DatabendWriterErrorCode.java b/databendwriter/src/main/java/com/alibaba/datax/plugin/writer/databendwriter/DatabendWriterErrorCode.java
new file mode 100644
index 00000000..21cbf428
--- /dev/null
+++ b/databendwriter/src/main/java/com/alibaba/datax/plugin/writer/databendwriter/DatabendWriterErrorCode.java
@@ -0,0 +1,33 @@
+package com.alibaba.datax.plugin.writer.databendwriter;
+
+import com.alibaba.datax.common.spi.ErrorCode;
+
+
+public enum DatabendWriterErrorCode implements ErrorCode {
+ CONF_ERROR("DatabendWriter-00", "配置错误."),
+ WRITE_DATA_ERROR("DatabendWriter-01", "写入数据时失败."),
+ ;
+
+ private final String code;
+ private final String description;
+
+ private DatabendWriterErrorCode(String code, String description) {
+ this.code = code;
+ this.description = description;
+ }
+
+ @Override
+ public String getCode() {
+ return this.code;
+ }
+
+ @Override
+ public String getDescription() {
+ return this.description;
+ }
+
+ @Override
+ public String toString() {
+ return String.format("Code:[%s], Description:[%s].", this.code, this.description);
+ }
+}
\ No newline at end of file
diff --git a/databendwriter/src/main/java/com/alibaba/datax/plugin/writer/databendwriter/util/DatabendWriterUtil.java b/databendwriter/src/main/java/com/alibaba/datax/plugin/writer/databendwriter/util/DatabendWriterUtil.java
new file mode 100644
index 00000000..516a75eb
--- /dev/null
+++ b/databendwriter/src/main/java/com/alibaba/datax/plugin/writer/databendwriter/util/DatabendWriterUtil.java
@@ -0,0 +1,72 @@
+package com.alibaba.datax.plugin.writer.databendwriter.util;
+
+import com.alibaba.datax.common.exception.DataXException;
+import com.alibaba.datax.common.util.Configuration;
+import com.alibaba.datax.plugin.rdbms.writer.Constant;
+import com.alibaba.datax.plugin.rdbms.writer.Key;
+
+import com.alibaba.datax.plugin.writer.databendwriter.DatabendWriterErrorCode;
+import org.apache.commons.lang3.StringUtils;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import javax.xml.crypto.Data;
+import java.util.List;
+import java.util.StringJoiner;
+
+public final class DatabendWriterUtil {
+ private static final Logger LOG = LoggerFactory.getLogger(DatabendWriterUtil.class);
+
+ private DatabendWriterUtil() {
+ }
+
+ public static void dealWriteMode(Configuration originalConfig) throws DataXException {
+ List columns = originalConfig.getList(Key.COLUMN, String.class);
+ List onConflictColumns = originalConfig.getList(Key.ONCONFLICT_COLUMN, String.class);
+ StringBuilder writeDataSqlTemplate = new StringBuilder();
+
+ String jdbcUrl = originalConfig.getString(String.format("%s[0].%s",
+ Constant.CONN_MARK, Key.JDBC_URL, String.class));
+
+ String writeMode = originalConfig.getString(Key.WRITE_MODE, "INSERT");
+ LOG.info("write mode is {}", writeMode);
+ if (writeMode.toLowerCase().contains("replace")) {
+ if (onConflictColumns == null || onConflictColumns.size() == 0) {
+ throw DataXException
+ .asDataXException(
+ DatabendWriterErrorCode.CONF_ERROR,
+ String.format(
+ "Replace mode must has onConflictColumn config."
+ ));
+ }
+
+ // for databend if you want to use replace mode, the writeMode should be: "writeMode": "replace"
+ writeDataSqlTemplate.append("REPLACE INTO %s (")
+ .append(StringUtils.join(columns, ",")).append(") ").append(onConFlictDoString(onConflictColumns))
+ .append(" VALUES");
+
+ LOG.info("Replace data [\n{}\n], which jdbcUrl like:[{}]", writeDataSqlTemplate, jdbcUrl);
+ originalConfig.set(Constant.INSERT_OR_REPLACE_TEMPLATE_MARK, writeDataSqlTemplate);
+ } else {
+ writeDataSqlTemplate.append("INSERT INTO %s");
+ StringJoiner columnString = new StringJoiner(",");
+
+ for (String column : columns) {
+ columnString.add(column);
+ }
+ writeDataSqlTemplate.append(String.format("(%s)", columnString));
+ writeDataSqlTemplate.append(" VALUES");
+
+ LOG.info("Insert data [\n{}\n], which jdbcUrl like:[{}]", writeDataSqlTemplate, jdbcUrl);
+
+ originalConfig.set(Constant.INSERT_OR_REPLACE_TEMPLATE_MARK, writeDataSqlTemplate);
+ }
+
+ }
+
+ public static String onConFlictDoString(List conflictColumns) {
+ return " ON " +
+ "(" +
+ StringUtils.join(conflictColumns, ",") + ") ";
+ }
+}
diff --git a/databendwriter/src/main/resources/plugin.json b/databendwriter/src/main/resources/plugin.json
new file mode 100644
index 00000000..bab0130d
--- /dev/null
+++ b/databendwriter/src/main/resources/plugin.json
@@ -0,0 +1,6 @@
+{
+ "name": "databendwriter",
+ "class": "com.alibaba.datax.plugin.writer.databendwriter.DatabendWriter",
+ "description": "execute batch insert sql to write dataX data into databend",
+ "developer": "databend"
+}
\ No newline at end of file
diff --git a/databendwriter/src/main/resources/plugin_job_template.json b/databendwriter/src/main/resources/plugin_job_template.json
new file mode 100644
index 00000000..34d4b251
--- /dev/null
+++ b/databendwriter/src/main/resources/plugin_job_template.json
@@ -0,0 +1,19 @@
+{
+ "name": "databendwriter",
+ "parameter": {
+ "username": "username",
+ "password": "password",
+ "column": ["col1", "col2", "col3"],
+ "connection": [
+ {
+ "jdbcUrl": "jdbc:databend://:[/]",
+ "table": "table1"
+ }
+ ],
+ "preSql": [],
+ "postSql": [],
+
+ "maxBatchRows": 65536,
+ "maxBatchSize": 134217728
+ }
+}
\ No newline at end of file
diff --git a/datahubreader/pom.xml b/datahubreader/pom.xml
new file mode 100644
index 00000000..c0022b44
--- /dev/null
+++ b/datahubreader/pom.xml
@@ -0,0 +1,79 @@
+
+
+
+ datax-all
+ com.alibaba.datax
+ 0.0.1-SNAPSHOT
+
+ 4.0.0
+
+ datahubreader
+
+ 0.0.1-SNAPSHOT
+
+
+
+ com.alibaba.datax
+ datax-common
+ ${datax-project-version}
+
+
+ slf4j-log4j12
+ org.slf4j
+
+
+
+
+ org.slf4j
+ slf4j-api
+
+
+ ch.qos.logback
+ logback-classic
+
+
+ com.aliyun.datahub
+ aliyun-sdk-datahub
+ 2.21.6-public
+
+
+ junit
+ junit
+ 4.12
+ test
+
+
+
+
+
+
+
+ maven-compiler-plugin
+
+ ${jdk-version}
+ ${jdk-version}
+ ${project-sourceEncoding}
+
+
+
+
+ maven-assembly-plugin
+
+
+ src/main/assembly/package.xml
+
+ datax
+
+
+
+ dwzip
+ package
+
+ single
+
+
+
+
+
+
+
diff --git a/datahubreader/src/main/assembly/package.xml b/datahubreader/src/main/assembly/package.xml
new file mode 100644
index 00000000..d14ea981
--- /dev/null
+++ b/datahubreader/src/main/assembly/package.xml
@@ -0,0 +1,34 @@
+
+
+
+ dir
+
+ false
+
+
+ src/main/resources
+
+ plugin.json
+
+ plugin/reader/datahubreader
+
+
+ target/
+
+ datahubreader-0.0.1-SNAPSHOT.jar
+
+ plugin/reader/datahubreader
+
+
+
+
+
+ false
+ plugin/reader/datahubreader/libs
+ runtime
+
+
+
diff --git a/datahubreader/src/main/java/com/alibaba/datax/plugin/reader/datahubreader/Constant.java b/datahubreader/src/main/java/com/alibaba/datax/plugin/reader/datahubreader/Constant.java
new file mode 100644
index 00000000..bee3ccd7
--- /dev/null
+++ b/datahubreader/src/main/java/com/alibaba/datax/plugin/reader/datahubreader/Constant.java
@@ -0,0 +1,8 @@
+package com.alibaba.datax.plugin.reader.datahubreader;
+
+public class Constant {
+
+ public static String DATETIME_FORMAT = "yyyyMMddHHmmss";
+ public static String DATE_FORMAT = "yyyyMMdd";
+
+}
diff --git a/datahubreader/src/main/java/com/alibaba/datax/plugin/reader/datahubreader/DatahubClientHelper.java b/datahubreader/src/main/java/com/alibaba/datax/plugin/reader/datahubreader/DatahubClientHelper.java
new file mode 100644
index 00000000..2b7bcec4
--- /dev/null
+++ b/datahubreader/src/main/java/com/alibaba/datax/plugin/reader/datahubreader/DatahubClientHelper.java
@@ -0,0 +1,42 @@
+package com.alibaba.datax.plugin.reader.datahubreader;
+
+import com.alibaba.datax.common.util.Configuration;
+import com.alibaba.fastjson2.JSON;
+import com.alibaba.fastjson2.TypeReference;
+import com.aliyun.datahub.client.DatahubClient;
+import com.aliyun.datahub.client.DatahubClientBuilder;
+import com.aliyun.datahub.client.auth.Account;
+import com.aliyun.datahub.client.auth.AliyunAccount;
+import com.aliyun.datahub.client.common.DatahubConfig;
+import com.aliyun.datahub.client.http.HttpConfig;
+import org.apache.commons.lang3.StringUtils;
+
+public class DatahubClientHelper {
+ public static DatahubClient getDatahubClient(Configuration jobConfig) {
+ String accessId = jobConfig.getNecessaryValue(Key.CONFIG_KEY_ACCESS_ID,
+ DatahubWriterErrorCode.MISSING_REQUIRED_VALUE);
+ String accessKey = jobConfig.getNecessaryValue(Key.CONFIG_KEY_ACCESS_KEY,
+ DatahubWriterErrorCode.MISSING_REQUIRED_VALUE);
+ String endpoint = jobConfig.getNecessaryValue(Key.CONFIG_KEY_ENDPOINT,
+ DatahubWriterErrorCode.MISSING_REQUIRED_VALUE);
+ Account account = new AliyunAccount(accessId, accessKey);
+ // 是否开启二进制传输,服务端2.12版本开始支持
+ boolean enableBinary = jobConfig.getBool("enableBinary", false);
+ DatahubConfig datahubConfig = new DatahubConfig(endpoint, account, enableBinary);
+ // HttpConfig可不设置,不设置时采用默认值
+ // 读写数据推荐打开网络传输 LZ4压缩
+ HttpConfig httpConfig = null;
+ String httpConfigStr = jobConfig.getString("httpConfig");
+ if (StringUtils.isNotBlank(httpConfigStr)) {
+ httpConfig = JSON.parseObject(httpConfigStr, new TypeReference() {
+ });
+ }
+
+ DatahubClientBuilder builder = DatahubClientBuilder.newBuilder().setDatahubConfig(datahubConfig);
+ if (null != httpConfig) {
+ builder.setHttpConfig(httpConfig);
+ }
+ DatahubClient datahubClient = builder.build();
+ return datahubClient;
+ }
+}
diff --git a/datahubreader/src/main/java/com/alibaba/datax/plugin/reader/datahubreader/DatahubReader.java b/datahubreader/src/main/java/com/alibaba/datax/plugin/reader/datahubreader/DatahubReader.java
new file mode 100644
index 00000000..4792ac39
--- /dev/null
+++ b/datahubreader/src/main/java/com/alibaba/datax/plugin/reader/datahubreader/DatahubReader.java
@@ -0,0 +1,292 @@
+package com.alibaba.datax.plugin.reader.datahubreader;
+
+import java.text.ParseException;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+
+import com.aliyun.datahub.client.model.*;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import com.alibaba.datax.common.element.Column;
+import com.alibaba.datax.common.element.Record;
+import com.alibaba.datax.common.element.StringColumn;
+import com.alibaba.datax.common.exception.DataXException;
+import com.alibaba.datax.common.plugin.RecordSender;
+import com.alibaba.datax.common.spi.Reader;
+import com.alibaba.datax.common.util.Configuration;
+
+
+import com.aliyun.datahub.client.DatahubClient;
+
+
+public class DatahubReader extends Reader {
+ public static class Job extends Reader.Job {
+ private static final Logger LOG = LoggerFactory.getLogger(Job.class);
+
+ private Configuration originalConfig;
+
+ private Long beginTimestampMillis;
+ private Long endTimestampMillis;
+
+ DatahubClient datahubClient;
+
+ @Override
+ public void init() {
+ LOG.info("datahub reader job init begin ...");
+ this.originalConfig = super.getPluginJobConf();
+ validateParameter(originalConfig);
+ this.datahubClient = DatahubClientHelper.getDatahubClient(this.originalConfig);
+ LOG.info("datahub reader job init end.");
+ }
+
+ private void validateParameter(Configuration conf){
+ conf.getNecessaryValue(Key.ENDPOINT,DatahubReaderErrorCode.REQUIRE_VALUE);
+ conf.getNecessaryValue(Key.ACCESSKEYID,DatahubReaderErrorCode.REQUIRE_VALUE);
+ conf.getNecessaryValue(Key.ACCESSKEYSECRET,DatahubReaderErrorCode.REQUIRE_VALUE);
+ conf.getNecessaryValue(Key.PROJECT,DatahubReaderErrorCode.REQUIRE_VALUE);
+ conf.getNecessaryValue(Key.TOPIC,DatahubReaderErrorCode.REQUIRE_VALUE);
+ conf.getNecessaryValue(Key.COLUMN,DatahubReaderErrorCode.REQUIRE_VALUE);
+ conf.getNecessaryValue(Key.BEGINDATETIME,DatahubReaderErrorCode.REQUIRE_VALUE);
+ conf.getNecessaryValue(Key.ENDDATETIME,DatahubReaderErrorCode.REQUIRE_VALUE);
+
+ int batchSize = this.originalConfig.getInt(Key.BATCHSIZE, 1024);
+ if (batchSize > 10000) {
+ throw DataXException.asDataXException(DatahubReaderErrorCode.BAD_CONFIG_VALUE,
+ "Invalid batchSize[" + batchSize + "] value (0,10000]!");
+ }
+
+ String beginDateTime = this.originalConfig.getString(Key.BEGINDATETIME);
+ if (beginDateTime != null) {
+ try {
+ beginTimestampMillis = DatahubReaderUtils.getUnixTimeFromDateTime(beginDateTime);
+ } catch (ParseException e) {
+ throw DataXException.asDataXException(DatahubReaderErrorCode.BAD_CONFIG_VALUE,
+ "Invalid beginDateTime[" + beginDateTime + "], format [yyyyMMddHHmmss]!");
+ }
+ }
+
+ if (beginTimestampMillis != null && beginTimestampMillis <= 0) {
+ throw DataXException.asDataXException(DatahubReaderErrorCode.BAD_CONFIG_VALUE,
+ "Invalid beginTimestampMillis[" + beginTimestampMillis + "]!");
+ }
+
+ String endDateTime = this.originalConfig.getString(Key.ENDDATETIME);
+ if (endDateTime != null) {
+ try {
+ endTimestampMillis = DatahubReaderUtils.getUnixTimeFromDateTime(endDateTime);
+ } catch (ParseException e) {
+ throw DataXException.asDataXException(DatahubReaderErrorCode.BAD_CONFIG_VALUE,
+ "Invalid beginDateTime[" + endDateTime + "], format [yyyyMMddHHmmss]!");
+ }
+ }
+
+ if (endTimestampMillis != null && endTimestampMillis <= 0) {
+ throw DataXException.asDataXException(DatahubReaderErrorCode.BAD_CONFIG_VALUE,
+ "Invalid endTimestampMillis[" + endTimestampMillis + "]!");
+ }
+
+ if (beginTimestampMillis != null && endTimestampMillis != null
+ && endTimestampMillis <= beginTimestampMillis) {
+ throw DataXException.asDataXException(DatahubReaderErrorCode.BAD_CONFIG_VALUE,
+ "endTimestampMillis[" + endTimestampMillis + "] must bigger than beginTimestampMillis[" + beginTimestampMillis + "]!");
+ }
+ }
+
+ @Override
+ public void prepare() {
+ // create datahub client
+ String project = originalConfig.getNecessaryValue(Key.PROJECT, DatahubReaderErrorCode.REQUIRE_VALUE);
+ String topic = originalConfig.getNecessaryValue(Key.TOPIC, DatahubReaderErrorCode.REQUIRE_VALUE);
+ RecordType recordType = null;
+ try {
+ DatahubClient client = DatahubClientHelper.getDatahubClient(this.originalConfig);
+ GetTopicResult getTopicResult = client.getTopic(project, topic);
+ recordType = getTopicResult.getRecordType();
+ } catch (Exception e) {
+ LOG.warn("get topic type error: {}", e.getMessage());
+ }
+ if (null != recordType) {
+ if (recordType == RecordType.BLOB) {
+ throw DataXException.asDataXException(DatahubReaderErrorCode.BAD_CONFIG_VALUE,
+ "DatahubReader only support 'Tuple' RecordType now, but your RecordType is 'BLOB'");
+ }
+ }
+ }
+
+ @Override
+ public void destroy() {
+ }
+
+ @Override
+ public List split(int adviceNumber) {
+ LOG.info("split() begin...");
+
+ List readerSplitConfigs = new ArrayList();
+
+ String project = this.originalConfig.getString(Key.PROJECT);
+ String topic = this.originalConfig.getString(Key.TOPIC);
+
+ List shardEntrys = DatahubReaderUtils.getShardsWithRetry(this.datahubClient, project, topic);
+ if (shardEntrys == null || shardEntrys.isEmpty() || shardEntrys.size() == 0) {
+ throw DataXException.asDataXException(DatahubReaderErrorCode.BAD_CONFIG_VALUE,
+ "Project [" + project + "] Topic [" + topic + "] has no shards, please check !");
+ }
+
+ for (ShardEntry shardEntry : shardEntrys) {
+ Configuration splitedConfig = this.originalConfig.clone();
+ splitedConfig.set(Key.SHARDID, shardEntry.getShardId());
+ readerSplitConfigs.add(splitedConfig);
+ }
+
+ LOG.info("split() ok and end...");
+ return readerSplitConfigs;
+ }
+
+ }
+
+ public static class Task extends Reader.Task {
+ private static final Logger LOG = LoggerFactory.getLogger(Task.class);
+
+ private Configuration taskConfig;
+
+ private String accessId;
+ private String accessKey;
+ private String endpoint;
+ private String project;
+ private String topic;
+ private String shardId;
+ private Long beginTimestampMillis;
+ private Long endTimestampMillis;
+ private int batchSize;
+ private List columns;
+ private RecordSchema schema;
+ private String timeStampUnit;
+
+ DatahubClient datahubClient;
+
+ @Override
+ public void init() {
+ this.taskConfig = super.getPluginJobConf();
+
+ this.accessId = this.taskConfig.getString(Key.ACCESSKEYID);
+ this.accessKey = this.taskConfig.getString(Key.ACCESSKEYSECRET);
+ this.endpoint = this.taskConfig.getString(Key.ENDPOINT);
+ this.project = this.taskConfig.getString(Key.PROJECT);
+ this.topic = this.taskConfig.getString(Key.TOPIC);
+ this.shardId = this.taskConfig.getString(Key.SHARDID);
+ this.batchSize = this.taskConfig.getInt(Key.BATCHSIZE, 1024);
+ this.timeStampUnit = this.taskConfig.getString(Key.TIMESTAMP_UNIT, "MICROSECOND");
+ try {
+ this.beginTimestampMillis = DatahubReaderUtils.getUnixTimeFromDateTime(this.taskConfig.getString(Key.BEGINDATETIME));
+ } catch (ParseException e) {
+ }
+
+ try {
+ this.endTimestampMillis = DatahubReaderUtils.getUnixTimeFromDateTime(this.taskConfig.getString(Key.ENDDATETIME));
+ } catch (ParseException e) {
+ }
+
+ this.columns = this.taskConfig.getList(Key.COLUMN, String.class);
+
+ this.datahubClient = DatahubClientHelper.getDatahubClient(this.taskConfig);
+
+
+ this.schema = DatahubReaderUtils.getDatahubSchemaWithRetry(this.datahubClient, this.project, topic);
+
+ LOG.info("init datahub reader task finished.project:{} topic:{} batchSize:{}", project, topic, batchSize);
+ }
+
+ @Override
+ public void destroy() {
+ }
+
+ @Override
+ public void startRead(RecordSender recordSender) {
+ LOG.info("read start");
+
+ String beginCursor = DatahubReaderUtils.getCursorWithRetry(this.datahubClient, this.project,
+ this.topic, this.shardId, this.beginTimestampMillis);
+ String endCursor = DatahubReaderUtils.getCursorWithRetry(this.datahubClient, this.project,
+ this.topic, this.shardId, this.endTimestampMillis);
+
+ if (beginCursor == null) {
+ LOG.info("Shard:{} has no data!", this.shardId);
+ return;
+ } else if (endCursor == null) {
+ endCursor = DatahubReaderUtils.getLatestCursorWithRetry(this.datahubClient, this.project,
+ this.topic, this.shardId);
+ }
+
+ String curCursor = beginCursor;
+
+ boolean exit = false;
+
+ while (true) {
+
+ GetRecordsResult result = DatahubReaderUtils.getRecordsResultWithRetry(this.datahubClient, this.project, this.topic,
+ this.shardId, this.batchSize, curCursor, this.schema);
+
+ List records = result.getRecords();
+ if (records.size() > 0) {
+ for (RecordEntry record : records) {
+ if (record.getSystemTime() >= this.endTimestampMillis) {
+ exit = true;
+ break;
+ }
+
+ HashMap dataMap = new HashMap();
+ List fields = ((TupleRecordData) record.getRecordData()).getRecordSchema().getFields();
+ for (int i = 0; i < fields.size(); i++) {
+ Field field = fields.get(i);
+ Column column = DatahubReaderUtils.getColumnFromField(record, field, this.timeStampUnit);
+ dataMap.put(field.getName(), column);
+ }
+
+ Record dataxRecord = recordSender.createRecord();
+
+ if (null != this.columns && 1 == this.columns.size()) {
+ String columnsInStr = columns.get(0).toString();
+ if ("\"*\"".equals(columnsInStr) || "*".equals(columnsInStr)) {
+ for (int i = 0; i < fields.size(); i++) {
+ dataxRecord.addColumn(dataMap.get(fields.get(i).getName()));
+ }
+
+ } else {
+ if (dataMap.containsKey(columnsInStr)) {
+ dataxRecord.addColumn(dataMap.get(columnsInStr));
+ } else {
+ dataxRecord.addColumn(new StringColumn(null));
+ }
+ }
+ } else {
+ for (String col : this.columns) {
+ if (dataMap.containsKey(col)) {
+ dataxRecord.addColumn(dataMap.get(col));
+ } else {
+ dataxRecord.addColumn(new StringColumn(null));
+ }
+ }
+ }
+
+ recordSender.sendToWriter(dataxRecord);
+ }
+ } else {
+ break;
+ }
+
+ if (exit) {
+ break;
+ }
+
+ curCursor = result.getNextCursor();
+ }
+
+
+ LOG.info("end read datahub shard...");
+ }
+
+ }
+
+}
diff --git a/datahubreader/src/main/java/com/alibaba/datax/plugin/reader/datahubreader/DatahubReaderErrorCode.java b/datahubreader/src/main/java/com/alibaba/datax/plugin/reader/datahubreader/DatahubReaderErrorCode.java
new file mode 100644
index 00000000..949a66f0
--- /dev/null
+++ b/datahubreader/src/main/java/com/alibaba/datax/plugin/reader/datahubreader/DatahubReaderErrorCode.java
@@ -0,0 +1,35 @@
+package com.alibaba.datax.plugin.reader.datahubreader;
+
+import com.alibaba.datax.common.spi.ErrorCode;
+
+public enum DatahubReaderErrorCode implements ErrorCode {
+ BAD_CONFIG_VALUE("DatahubReader-00", "The value you configured is invalid."),
+ LOG_HUB_ERROR("DatahubReader-01","Datahub exception"),
+ REQUIRE_VALUE("DatahubReader-02","Missing parameters"),
+ EMPTY_LOGSTORE_VALUE("DatahubReader-03","There is no shard under this LogStore");
+
+
+ private final String code;
+ private final String description;
+
+ private DatahubReaderErrorCode(String code, String description) {
+ this.code = code;
+ this.description = description;
+ }
+
+ @Override
+ public String getCode() {
+ return this.code;
+ }
+
+ @Override
+ public String getDescription() {
+ return this.description;
+ }
+
+ @Override
+ public String toString() {
+ return String.format("Code:[%s], Description:[%s]. ", this.code,
+ this.description);
+ }
+}
diff --git a/datahubreader/src/main/java/com/alibaba/datax/plugin/reader/datahubreader/DatahubReaderUtils.java b/datahubreader/src/main/java/com/alibaba/datax/plugin/reader/datahubreader/DatahubReaderUtils.java
new file mode 100644
index 00000000..6c3455df
--- /dev/null
+++ b/datahubreader/src/main/java/com/alibaba/datax/plugin/reader/datahubreader/DatahubReaderUtils.java
@@ -0,0 +1,200 @@
+package com.alibaba.datax.plugin.reader.datahubreader;
+
+import java.math.BigDecimal;
+import java.text.ParseException;
+import java.text.SimpleDateFormat;
+import java.util.Date;
+import java.util.List;
+import java.util.concurrent.Callable;
+
+import com.alibaba.datax.common.element.*;
+import com.alibaba.datax.common.exception.DataXException;
+import com.alibaba.datax.common.util.DataXCaseEnvUtil;
+import com.alibaba.datax.common.util.RetryUtil;
+
+import com.aliyun.datahub.client.DatahubClient;
+import com.aliyun.datahub.client.exception.InvalidParameterException;
+import com.aliyun.datahub.client.model.*;
+
+public class DatahubReaderUtils {
+
+ public static long getUnixTimeFromDateTime(String dateTime) throws ParseException {
+ try {
+ String format = Constant.DATETIME_FORMAT;
+ SimpleDateFormat simpleDateFormat = new SimpleDateFormat(format);
+ return simpleDateFormat.parse(dateTime).getTime();
+ } catch (ParseException ignored) {
+ throw DataXException.asDataXException(DatahubReaderErrorCode.BAD_CONFIG_VALUE,
+ "Invalid DateTime[" + dateTime + "]!");
+ }
+ }
+
+ public static List getShardsWithRetry(final DatahubClient datahubClient, final String project, final String topic) {
+
+ List shards = null;
+ try {
+ shards = RetryUtil.executeWithRetry(new Callable>() {
+ @Override
+ public List call() throws Exception {
+ ListShardResult listShardResult = datahubClient.listShard(project, topic);
+ return listShardResult.getShards();
+ }
+ }, DataXCaseEnvUtil.getRetryTimes(7), DataXCaseEnvUtil.getRetryInterval(1000L), DataXCaseEnvUtil.getRetryExponential(true));
+
+ } catch (Exception e) {
+ throw DataXException.asDataXException(DatahubReaderErrorCode.BAD_CONFIG_VALUE,
+ "get Shards error, please check ! detail error messsage: " + e.toString());
+ }
+ return shards;
+ }
+
+ public static String getCursorWithRetry(final DatahubClient datahubClient, final String project, final String topic,
+ final String shardId, final long timestamp) {
+
+ String cursor;
+ try {
+ cursor = RetryUtil.executeWithRetry(new Callable() {
+ @Override
+ public String call() throws Exception {
+ try {
+ return datahubClient.getCursor(project, topic, shardId, CursorType.SYSTEM_TIME, timestamp).getCursor();
+ } catch (InvalidParameterException e) {
+ if (e.getErrorMessage().indexOf("Time in seek request is out of range") >= 0) {
+ return null;
+ } else {
+ throw e;
+ }
+
+ }
+ }
+ }, DataXCaseEnvUtil.getRetryTimes(7), DataXCaseEnvUtil.getRetryInterval(1000L), DataXCaseEnvUtil.getRetryExponential(true));
+
+ } catch (Exception e) {
+ throw DataXException.asDataXException(DatahubReaderErrorCode.BAD_CONFIG_VALUE,
+ "get Cursor error, please check ! detail error messsage: " + e.toString());
+ }
+ return cursor;
+ }
+
+ public static String getLatestCursorWithRetry(final DatahubClient datahubClient, final String project, final String topic,
+ final String shardId) {
+
+ String cursor;
+ try {
+ cursor = RetryUtil.executeWithRetry(new Callable() {
+ @Override
+ public String call() throws Exception {
+ return datahubClient.getCursor(project, topic, shardId, CursorType.LATEST).getCursor();
+ }
+ }, DataXCaseEnvUtil.getRetryTimes(7), DataXCaseEnvUtil.getRetryInterval(1000L), DataXCaseEnvUtil.getRetryExponential(true));
+
+ } catch (Exception e) {
+ throw DataXException.asDataXException(DatahubReaderErrorCode.BAD_CONFIG_VALUE,
+ "get Cursor error, please check ! detail error messsage: " + e.toString());
+ }
+ return cursor;
+ }
+
+ public static RecordSchema getDatahubSchemaWithRetry(final DatahubClient datahubClient, final String project, final String topic) {
+
+ RecordSchema schema;
+ try {
+ schema = RetryUtil.executeWithRetry(new Callable() {
+ @Override
+ public RecordSchema call() throws Exception {
+ return datahubClient.getTopic(project, topic).getRecordSchema();
+ }
+ }, DataXCaseEnvUtil.getRetryTimes(7), DataXCaseEnvUtil.getRetryInterval(1000L), DataXCaseEnvUtil.getRetryExponential(true));
+
+ } catch (Exception e) {
+ throw DataXException.asDataXException(DatahubReaderErrorCode.BAD_CONFIG_VALUE,
+ "get Topic Schema error, please check ! detail error messsage: " + e.toString());
+ }
+ return schema;
+ }
+
+ public static GetRecordsResult getRecordsResultWithRetry(final DatahubClient datahubClient, final String project,
+ final String topic, final String shardId, final int batchSize, final String cursor, final RecordSchema schema) {
+
+ GetRecordsResult result;
+ try {
+ result = RetryUtil.executeWithRetry(new Callable() {
+ @Override
+ public GetRecordsResult call() throws Exception {
+ return datahubClient.getRecords(project, topic, shardId, schema, cursor, batchSize);
+ }
+ }, DataXCaseEnvUtil.getRetryTimes(7), DataXCaseEnvUtil.getRetryInterval(1000L), DataXCaseEnvUtil.getRetryExponential(true));
+
+ } catch (Exception e) {
+ throw DataXException.asDataXException(DatahubReaderErrorCode.BAD_CONFIG_VALUE,
+ "get Record Result error, please check ! detail error messsage: " + e.toString());
+ }
+ return result;
+
+ }
+
+ public static Column getColumnFromField(RecordEntry record, Field field, String timeStampUnit) {
+ Column col = null;
+ TupleRecordData o = (TupleRecordData) record.getRecordData();
+
+ switch (field.getType()) {
+ case SMALLINT:
+ Short shortValue = ((Short) o.getField(field.getName()));
+ col = new LongColumn(shortValue == null ? null: shortValue.longValue());
+ break;
+ case INTEGER:
+ col = new LongColumn((Integer) o.getField(field.getName()));
+ break;
+ case BIGINT: {
+ col = new LongColumn((Long) o.getField(field.getName()));
+ break;
+ }
+ case TINYINT: {
+ Byte byteValue = ((Byte) o.getField(field.getName()));
+ col = new LongColumn(byteValue == null ? null : byteValue.longValue());
+ break;
+ }
+ case BOOLEAN: {
+ col = new BoolColumn((Boolean) o.getField(field.getName()));
+ break;
+ }
+ case FLOAT:
+ col = new DoubleColumn((Float) o.getField(field.getName()));
+ break;
+ case DOUBLE: {
+ col = new DoubleColumn((Double) o.getField(field.getName()));
+ break;
+ }
+ case STRING: {
+ col = new StringColumn((String) o.getField(field.getName()));
+ break;
+ }
+ case DECIMAL: {
+ BigDecimal value = (BigDecimal) o.getField(field.getName());
+ col = new DoubleColumn(value == null ? null : value.doubleValue());
+ break;
+ }
+ case TIMESTAMP: {
+ Long value = (Long) o.getField(field.getName());
+
+ if ("MILLISECOND".equals(timeStampUnit)) {
+ // MILLISECOND, 13位精度,直接 new Date()
+ col = new DateColumn(value == null ? null : new Date(value));
+ }
+ else if ("SECOND".equals(timeStampUnit)){
+ col = new DateColumn(value == null ? null : new Date(value * 1000));
+ }
+ else {
+ // 默认都是 MICROSECOND, 16位精度, 和之前的逻辑保持一致。
+ col = new DateColumn(value == null ? null : new Date(value / 1000));
+ }
+ break;
+ }
+ default:
+ throw new RuntimeException("Unknown column type: " + field.getType());
+ }
+
+ return col;
+ }
+
+}
diff --git a/datahubreader/src/main/java/com/alibaba/datax/plugin/reader/datahubreader/DatahubWriterErrorCode.java b/datahubreader/src/main/java/com/alibaba/datax/plugin/reader/datahubreader/DatahubWriterErrorCode.java
new file mode 100644
index 00000000..c8633ea8
--- /dev/null
+++ b/datahubreader/src/main/java/com/alibaba/datax/plugin/reader/datahubreader/DatahubWriterErrorCode.java
@@ -0,0 +1,37 @@
+package com.alibaba.datax.plugin.reader.datahubreader;
+
+import com.alibaba.datax.common.spi.ErrorCode;
+import com.alibaba.datax.common.util.MessageSource;
+
+public enum DatahubWriterErrorCode implements ErrorCode {
+ MISSING_REQUIRED_VALUE("DatahubWriter-01", MessageSource.loadResourceBundle(DatahubWriterErrorCode.class).message("errorcode.missing_required_value")),
+ INVALID_CONFIG_VALUE("DatahubWriter-02", MessageSource.loadResourceBundle(DatahubWriterErrorCode.class).message("errorcode.invalid_config_value")),
+ GET_TOPOIC_INFO_FAIL("DatahubWriter-03", MessageSource.loadResourceBundle(DatahubWriterErrorCode.class).message("errorcode.get_topic_info_fail")),
+ WRITE_DATAHUB_FAIL("DatahubWriter-04", MessageSource.loadResourceBundle(DatahubWriterErrorCode.class).message("errorcode.write_datahub_fail")),
+ SCHEMA_NOT_MATCH("DatahubWriter-05", MessageSource.loadResourceBundle(DatahubWriterErrorCode.class).message("errorcode.schema_not_match")),
+ ;
+
+ private final String code;
+ private final String description;
+
+ private DatahubWriterErrorCode(String code, String description) {
+ this.code = code;
+ this.description = description;
+ }
+
+ @Override
+ public String getCode() {
+ return this.code;
+ }
+
+ @Override
+ public String getDescription() {
+ return this.description;
+ }
+
+ @Override
+ public String toString() {
+ return String.format("Code:[%s], Description:[%s]. ", this.code,
+ this.description);
+ }
+}
\ No newline at end of file
diff --git a/datahubreader/src/main/java/com/alibaba/datax/plugin/reader/datahubreader/Key.java b/datahubreader/src/main/java/com/alibaba/datax/plugin/reader/datahubreader/Key.java
new file mode 100644
index 00000000..3cb84b4b
--- /dev/null
+++ b/datahubreader/src/main/java/com/alibaba/datax/plugin/reader/datahubreader/Key.java
@@ -0,0 +1,35 @@
+package com.alibaba.datax.plugin.reader.datahubreader;
+
+public final class Key {
+
+ /**
+ * 此处声明插件用到的需要插件使用者提供的配置项
+ */
+ public static final String ENDPOINT = "endpoint";
+
+ public static final String ACCESSKEYID = "accessId";
+
+ public static final String ACCESSKEYSECRET = "accessKey";
+
+ public static final String PROJECT = "project";
+
+ public static final String TOPIC = "topic";
+
+ public static final String BEGINDATETIME = "beginDateTime";
+
+ public static final String ENDDATETIME = "endDateTime";
+
+ public static final String BATCHSIZE = "batchSize";
+
+ public static final String COLUMN = "column";
+
+ public static final String SHARDID = "shardId";
+
+ public static final String CONFIG_KEY_ENDPOINT = "endpoint";
+ public static final String CONFIG_KEY_ACCESS_ID = "accessId";
+ public static final String CONFIG_KEY_ACCESS_KEY = "accessKey";
+
+
+ public static final String TIMESTAMP_UNIT = "timeStampUnit";
+
+}
\ No newline at end of file
diff --git a/datahubreader/src/main/java/com/alibaba/datax/plugin/reader/datahubreader/LocalStrings.properties b/datahubreader/src/main/java/com/alibaba/datax/plugin/reader/datahubreader/LocalStrings.properties
new file mode 100644
index 00000000..e85c8ab3
--- /dev/null
+++ b/datahubreader/src/main/java/com/alibaba/datax/plugin/reader/datahubreader/LocalStrings.properties
@@ -0,0 +1,5 @@
+errorcode.missing_required_value=\u60A8\u7F3A\u5931\u4E86\u5FC5\u987B\u586B\u5199\u7684\u53C2\u6570\u503C.
+errorcode.invalid_config_value=\u60A8\u7684\u53C2\u6570\u914D\u7F6E\u9519\u8BEF.
+errorcode.get_topic_info_fail=\u83B7\u53D6shard\u5217\u8868\u5931\u8D25.
+errorcode.write_datahub_fail=\u5199\u6570\u636E\u5931\u8D25.
+errorcode.schema_not_match=\u6570\u636E\u683C\u5F0F\u9519\u8BEF.
diff --git a/datahubreader/src/main/java/com/alibaba/datax/plugin/reader/datahubreader/LocalStrings_en_US.properties b/datahubreader/src/main/java/com/alibaba/datax/plugin/reader/datahubreader/LocalStrings_en_US.properties
new file mode 100644
index 00000000..31a291e6
--- /dev/null
+++ b/datahubreader/src/main/java/com/alibaba/datax/plugin/reader/datahubreader/LocalStrings_en_US.properties
@@ -0,0 +1,5 @@
+errorcode.missing_required_value=\u60A8\u7F3A\u5931\u4E86\u5FC5\u987B\u586B\u5199\u7684\u53C2\u6570\u503C.
+errorcode.invalid_config_value=\u60A8\u7684\u53C2\u6570\u914D\u7F6E\u9519\u8BEF.
+errorcode.get_topic_info_fail=\u83B7\u53D6shard\u5217\u8868\u5931\u8D25.
+errorcode.write_datahub_fail=\u5199\u6570\u636E\u5931\u8D25.
+errorcode.schema_not_match=\u6570\u636E\u683C\u5F0F\u9519\u8BEF.
\ No newline at end of file
diff --git a/datahubreader/src/main/java/com/alibaba/datax/plugin/reader/datahubreader/LocalStrings_ja_JP.properties b/datahubreader/src/main/java/com/alibaba/datax/plugin/reader/datahubreader/LocalStrings_ja_JP.properties
new file mode 100644
index 00000000..31a291e6
--- /dev/null
+++ b/datahubreader/src/main/java/com/alibaba/datax/plugin/reader/datahubreader/LocalStrings_ja_JP.properties
@@ -0,0 +1,5 @@
+errorcode.missing_required_value=\u60A8\u7F3A\u5931\u4E86\u5FC5\u987B\u586B\u5199\u7684\u53C2\u6570\u503C.
+errorcode.invalid_config_value=\u60A8\u7684\u53C2\u6570\u914D\u7F6E\u9519\u8BEF.
+errorcode.get_topic_info_fail=\u83B7\u53D6shard\u5217\u8868\u5931\u8D25.
+errorcode.write_datahub_fail=\u5199\u6570\u636E\u5931\u8D25.
+errorcode.schema_not_match=\u6570\u636E\u683C\u5F0F\u9519\u8BEF.
\ No newline at end of file
diff --git a/datahubreader/src/main/java/com/alibaba/datax/plugin/reader/datahubreader/LocalStrings_zh_CN.properties b/datahubreader/src/main/java/com/alibaba/datax/plugin/reader/datahubreader/LocalStrings_zh_CN.properties
new file mode 100644
index 00000000..31a291e6
--- /dev/null
+++ b/datahubreader/src/main/java/com/alibaba/datax/plugin/reader/datahubreader/LocalStrings_zh_CN.properties
@@ -0,0 +1,5 @@
+errorcode.missing_required_value=\u60A8\u7F3A\u5931\u4E86\u5FC5\u987B\u586B\u5199\u7684\u53C2\u6570\u503C.
+errorcode.invalid_config_value=\u60A8\u7684\u53C2\u6570\u914D\u7F6E\u9519\u8BEF.
+errorcode.get_topic_info_fail=\u83B7\u53D6shard\u5217\u8868\u5931\u8D25.
+errorcode.write_datahub_fail=\u5199\u6570\u636E\u5931\u8D25.
+errorcode.schema_not_match=\u6570\u636E\u683C\u5F0F\u9519\u8BEF.
\ No newline at end of file
diff --git a/datahubreader/src/main/java/com/alibaba/datax/plugin/reader/datahubreader/LocalStrings_zh_HK.properties b/datahubreader/src/main/java/com/alibaba/datax/plugin/reader/datahubreader/LocalStrings_zh_HK.properties
new file mode 100644
index 00000000..c6a3a0e0
--- /dev/null
+++ b/datahubreader/src/main/java/com/alibaba/datax/plugin/reader/datahubreader/LocalStrings_zh_HK.properties
@@ -0,0 +1,9 @@
+errorcode.missing_required_value=\u60A8\u7F3A\u5931\u4E86\u5FC5\u987B\u586B\u5199\u7684\u53C2\u6570\u503C.
+errorcode.invalid_config_value=\u60A8\u7684\u53C2\u6570\u914D\u7F6E\u9519\u8BEF.
+errorcode.get_topic_info_fail=\u83B7\u53D6shard\u5217\u8868\u5931\u8D25.
+errorcode.write_datahub_fail=\u5199\u6570\u636E\u5931\u8D25.
+errorcode.schema_not_match=\u6570\u636E\u683C\u5F0F\u9519\u8BEF.errorcode.missing_required_value=您缺失了必須填寫的參數值.
+errorcode.invalid_config_value=您的參數配寘錯誤.
+errorcode.get_topic_info_fail=獲取shard清單失敗.
+errorcode.write_datahub_fail=寫數據失敗.
+errorcode.schema_not_match=數據格式錯誤.
diff --git a/datahubreader/src/main/java/com/alibaba/datax/plugin/reader/datahubreader/LocalStrings_zh_TW.properties b/datahubreader/src/main/java/com/alibaba/datax/plugin/reader/datahubreader/LocalStrings_zh_TW.properties
new file mode 100644
index 00000000..c6a3a0e0
--- /dev/null
+++ b/datahubreader/src/main/java/com/alibaba/datax/plugin/reader/datahubreader/LocalStrings_zh_TW.properties
@@ -0,0 +1,9 @@
+errorcode.missing_required_value=\u60A8\u7F3A\u5931\u4E86\u5FC5\u987B\u586B\u5199\u7684\u53C2\u6570\u503C.
+errorcode.invalid_config_value=\u60A8\u7684\u53C2\u6570\u914D\u7F6E\u9519\u8BEF.
+errorcode.get_topic_info_fail=\u83B7\u53D6shard\u5217\u8868\u5931\u8D25.
+errorcode.write_datahub_fail=\u5199\u6570\u636E\u5931\u8D25.
+errorcode.schema_not_match=\u6570\u636E\u683C\u5F0F\u9519\u8BEF.errorcode.missing_required_value=您缺失了必須填寫的參數值.
+errorcode.invalid_config_value=您的參數配寘錯誤.
+errorcode.get_topic_info_fail=獲取shard清單失敗.
+errorcode.write_datahub_fail=寫數據失敗.
+errorcode.schema_not_match=數據格式錯誤.
diff --git a/datahubreader/src/main/resources/job_config_template.json b/datahubreader/src/main/resources/job_config_template.json
new file mode 100644
index 00000000..eaf89804
--- /dev/null
+++ b/datahubreader/src/main/resources/job_config_template.json
@@ -0,0 +1,14 @@
+{
+ "name": "datahubreader",
+ "parameter": {
+ "endpoint":"",
+ "accessId": "",
+ "accessKey": "",
+ "project": "",
+ "topic": "",
+ "beginDateTime": "20180913121019",
+ "endDateTime": "20180913121119",
+ "batchSize": 1024,
+ "column": []
+ }
+}
\ No newline at end of file
diff --git a/datahubreader/src/main/resources/plugin.json b/datahubreader/src/main/resources/plugin.json
new file mode 100644
index 00000000..47b1c86b
--- /dev/null
+++ b/datahubreader/src/main/resources/plugin.json
@@ -0,0 +1,6 @@
+{
+ "name": "datahubreader",
+ "class": "com.alibaba.datax.plugin.reader.datahubreader.DatahubReader",
+ "description": "datahub reader",
+ "developer": "alibaba"
+}
\ No newline at end of file
diff --git a/datahubwriter/pom.xml b/datahubwriter/pom.xml
new file mode 100644
index 00000000..1ee1fe9b
--- /dev/null
+++ b/datahubwriter/pom.xml
@@ -0,0 +1,79 @@
+
+
+
+ datax-all
+ com.alibaba.datax
+ 0.0.1-SNAPSHOT
+
+ 4.0.0
+
+ datahubwriter
+
+ 0.0.1-SNAPSHOT
+
+
+
+ com.alibaba.datax
+ datax-common
+ ${datax-project-version}
+
+
+ slf4j-log4j12
+ org.slf4j
+
+
+
+
+ org.slf4j
+ slf4j-api
+
+
+ ch.qos.logback
+ logback-classic
+
+
+ com.aliyun.datahub
+ aliyun-sdk-datahub
+ 2.21.6-public
+
+
+ junit
+ junit
+ 4.12
+ test
+
+
+
+
+
+
+
+ maven-compiler-plugin
+
+ ${jdk-version}
+ ${jdk-version}
+ ${project-sourceEncoding}
+
+
+
+
+ maven-assembly-plugin
+
+
+ src/main/assembly/package.xml
+
+ datax
+
+
+
+ dwzip
+ package
+
+ single
+
+
+
+
+
+
+
diff --git a/datahubwriter/src/main/assembly/package.xml b/datahubwriter/src/main/assembly/package.xml
new file mode 100644
index 00000000..aaef9f99
--- /dev/null
+++ b/datahubwriter/src/main/assembly/package.xml
@@ -0,0 +1,34 @@
+
+
+
+ dir
+
+ false
+
+
+ src/main/resources
+
+ plugin.json
+
+ plugin/writer/datahubwriter
+
+
+ target/
+
+ datahubwriter-0.0.1-SNAPSHOT.jar
+
+ plugin/writer/datahubwriter
+
+
+
+
+
+ false
+ plugin/writer/datahubwriter/libs
+ runtime
+
+
+
diff --git a/datahubwriter/src/main/java/com/alibaba/datax/plugin/writer/datahubwriter/DatahubClientHelper.java b/datahubwriter/src/main/java/com/alibaba/datax/plugin/writer/datahubwriter/DatahubClientHelper.java
new file mode 100644
index 00000000..c25d1210
--- /dev/null
+++ b/datahubwriter/src/main/java/com/alibaba/datax/plugin/writer/datahubwriter/DatahubClientHelper.java
@@ -0,0 +1,43 @@
+package com.alibaba.datax.plugin.writer.datahubwriter;
+
+import org.apache.commons.lang3.StringUtils;
+
+import com.alibaba.datax.common.util.Configuration;
+import com.alibaba.fastjson2.JSON;
+import com.alibaba.fastjson2.TypeReference;
+import com.aliyun.datahub.client.DatahubClient;
+import com.aliyun.datahub.client.DatahubClientBuilder;
+import com.aliyun.datahub.client.auth.Account;
+import com.aliyun.datahub.client.auth.AliyunAccount;
+import com.aliyun.datahub.client.common.DatahubConfig;
+import com.aliyun.datahub.client.http.HttpConfig;
+
+public class DatahubClientHelper {
+ public static DatahubClient getDatahubClient(Configuration jobConfig) {
+ String accessId = jobConfig.getNecessaryValue(Key.CONFIG_KEY_ACCESS_ID,
+ DatahubWriterErrorCode.MISSING_REQUIRED_VALUE);
+ String accessKey = jobConfig.getNecessaryValue(Key.CONFIG_KEY_ACCESS_KEY,
+ DatahubWriterErrorCode.MISSING_REQUIRED_VALUE);
+ String endpoint = jobConfig.getNecessaryValue(Key.CONFIG_KEY_ENDPOINT,
+ DatahubWriterErrorCode.MISSING_REQUIRED_VALUE);
+ Account account = new AliyunAccount(accessId, accessKey);
+ // 是否开启二进制传输,服务端2.12版本开始支持
+ boolean enableBinary = jobConfig.getBool("enableBinary", false);
+ DatahubConfig datahubConfig = new DatahubConfig(endpoint, account, enableBinary);
+ // HttpConfig可不设置,不设置时采用默认值
+ // 读写数据推荐打开网络传输 LZ4压缩
+ HttpConfig httpConfig = null;
+ String httpConfigStr = jobConfig.getString("httpConfig");
+ if (StringUtils.isNotBlank(httpConfigStr)) {
+ httpConfig = JSON.parseObject(httpConfigStr, new TypeReference() {
+ });
+ }
+
+ DatahubClientBuilder builder = DatahubClientBuilder.newBuilder().setDatahubConfig(datahubConfig);
+ if (null != httpConfig) {
+ builder.setHttpConfig(httpConfig);
+ }
+ DatahubClient datahubClient = builder.build();
+ return datahubClient;
+ }
+}
diff --git a/datahubwriter/src/main/java/com/alibaba/datax/plugin/writer/datahubwriter/DatahubWriter.java b/datahubwriter/src/main/java/com/alibaba/datax/plugin/writer/datahubwriter/DatahubWriter.java
new file mode 100644
index 00000000..cd414fc5
--- /dev/null
+++ b/datahubwriter/src/main/java/com/alibaba/datax/plugin/writer/datahubwriter/DatahubWriter.java
@@ -0,0 +1,355 @@
+package com.alibaba.datax.plugin.writer.datahubwriter;
+
+import com.alibaba.datax.common.element.Column;
+import com.alibaba.datax.common.element.Record;
+import com.alibaba.datax.common.exception.DataXException;
+import com.alibaba.datax.common.plugin.RecordReceiver;
+import com.alibaba.datax.common.spi.Writer;
+import com.alibaba.datax.common.util.Configuration;
+import com.alibaba.datax.common.util.DataXCaseEnvUtil;
+import com.alibaba.datax.common.util.RetryUtil;
+import com.alibaba.fastjson2.JSON;
+import com.aliyun.datahub.client.DatahubClient;
+import com.aliyun.datahub.client.model.FieldType;
+import com.aliyun.datahub.client.model.GetTopicResult;
+import com.aliyun.datahub.client.model.ListShardResult;
+import com.aliyun.datahub.client.model.PutErrorEntry;
+import com.aliyun.datahub.client.model.PutRecordsResult;
+import com.aliyun.datahub.client.model.RecordEntry;
+import com.aliyun.datahub.client.model.RecordSchema;
+import com.aliyun.datahub.client.model.RecordType;
+import com.aliyun.datahub.client.model.ShardEntry;
+import com.aliyun.datahub.client.model.ShardState;
+import com.aliyun.datahub.client.model.TupleRecordData;
+
+import org.apache.commons.lang3.StringUtils;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+import java.util.Random;
+import java.util.concurrent.Callable;
+
+public class DatahubWriter extends Writer {
+
+ /**
+ * Job 中的方法仅执行一次,Task 中方法会由框架启动多个 Task 线程并行执行。
+ *
+ * 整个 Writer 执行流程是:
+ *
+ * Job类init-->prepare-->split
+ *
+ * Task类init-->prepare-->startWrite-->post-->destroy
+ * Task类init-->prepare-->startWrite-->post-->destroy
+ *
+ * Job类post-->destroy
+ *
+ */
+ public static class Job extends Writer.Job {
+ private static final Logger LOG = LoggerFactory
+ .getLogger(Job.class);
+
+ private Configuration jobConfig = null;
+
+ @Override
+ public void init() {
+ this.jobConfig = super.getPluginJobConf();
+ jobConfig.getNecessaryValue(Key.CONFIG_KEY_ENDPOINT, DatahubWriterErrorCode.MISSING_REQUIRED_VALUE);
+ jobConfig.getNecessaryValue(Key.CONFIG_KEY_ACCESS_ID, DatahubWriterErrorCode.MISSING_REQUIRED_VALUE);
+ jobConfig.getNecessaryValue(Key.CONFIG_KEY_ACCESS_KEY, DatahubWriterErrorCode.MISSING_REQUIRED_VALUE);
+ jobConfig.getNecessaryValue(Key.CONFIG_KEY_PROJECT, DatahubWriterErrorCode.MISSING_REQUIRED_VALUE);
+ jobConfig.getNecessaryValue(Key.CONFIG_KEY_TOPIC, DatahubWriterErrorCode.MISSING_REQUIRED_VALUE);
+ }
+
+ @Override
+ public void prepare() {
+ String project = jobConfig.getNecessaryValue(Key.CONFIG_KEY_PROJECT,
+ DatahubWriterErrorCode.MISSING_REQUIRED_VALUE);
+ String topic = jobConfig.getNecessaryValue(Key.CONFIG_KEY_TOPIC,
+ DatahubWriterErrorCode.MISSING_REQUIRED_VALUE);
+ RecordType recordType = null;
+ DatahubClient client = DatahubClientHelper.getDatahubClient(this.jobConfig);
+ try {
+ GetTopicResult getTopicResult = client.getTopic(project, topic);
+ recordType = getTopicResult.getRecordType();
+ } catch (Exception e) {
+ LOG.warn("get topic type error: {}", e.getMessage());
+ }
+ if (null != recordType) {
+ if (recordType == RecordType.BLOB) {
+ throw DataXException.asDataXException(DatahubWriterErrorCode.WRITE_DATAHUB_FAIL,
+ "DatahubWriter only support 'Tuple' RecordType now, but your RecordType is 'BLOB'");
+ }
+ }
+ }
+
+ @Override
+ public List split(int mandatoryNumber) {
+ List configs = new ArrayList();
+ for (int i = 0; i < mandatoryNumber; ++i) {
+ configs.add(jobConfig.clone());
+ }
+ return configs;
+ }
+
+ @Override
+ public void post() {}
+
+ @Override
+ public void destroy() {}
+
+ }
+
+ public static class Task extends Writer.Task {
+ private static final Logger LOG = LoggerFactory
+ .getLogger(Task.class);
+ private static final List FATAL_ERRORS_DEFAULT = Arrays.asList(
+ "InvalidParameterM",
+ "MalformedRecord",
+ "INVALID_SHARDID",
+ "NoSuchTopic",
+ "NoSuchShard"
+ );
+
+ private Configuration taskConfig;
+ private DatahubClient client;
+ private String project;
+ private String topic;
+ private List shards;
+ private int maxCommitSize;
+ private int maxRetryCount;
+ private RecordSchema schema;
+ private long retryInterval;
+ private Random random;
+ private List column;
+ private List columnIndex;
+ private boolean enableColumnConfig;
+ private List fatalErrors;
+
+ @Override
+ public void init() {
+ this.taskConfig = super.getPluginJobConf();
+ project = taskConfig.getNecessaryValue(Key.CONFIG_KEY_PROJECT, DatahubWriterErrorCode.MISSING_REQUIRED_VALUE);
+ topic = taskConfig.getNecessaryValue(Key.CONFIG_KEY_TOPIC, DatahubWriterErrorCode.MISSING_REQUIRED_VALUE);
+ maxCommitSize = taskConfig.getInt(Key.CONFIG_KEY_MAX_COMMIT_SIZE, 1024*1024);
+ maxRetryCount = taskConfig.getInt(Key.CONFIG_KEY_MAX_RETRY_COUNT, 500);
+ this.retryInterval = taskConfig.getInt(Key.RETRY_INTERVAL, 650);
+ this.random = new Random();
+ this.column = this.taskConfig.getList(Key.CONFIG_KEY_COLUMN, String.class);
+ // ["*"]
+ if (null != this.column && 1 == this.column.size()) {
+ if (StringUtils.equals("*", this.column.get(0))) {
+ this.column = null;
+ }
+ }
+ this.columnIndex = new ArrayList();
+ // 留个开关保平安
+ this.enableColumnConfig = this.taskConfig.getBool("enableColumnConfig", true);
+ this.fatalErrors = this.taskConfig.getList("fatalErrors", Task.FATAL_ERRORS_DEFAULT, String.class);
+ this.client = DatahubClientHelper.getDatahubClient(this.taskConfig);
+ }
+
+ @Override
+ public void prepare() {
+ final String shardIdConfig = this.taskConfig.getString(Key.CONFIG_KEY_SHARD_ID);
+ this.shards = new ArrayList();
+ try {
+ RetryUtil.executeWithRetry(new Callable() {
+ @Override
+ public Void call() throws Exception {
+ ListShardResult result = client.listShard(project, topic);
+ if (StringUtils.isNotBlank(shardIdConfig)) {
+ shards.add(shardIdConfig);
+ } else {
+ for (ShardEntry shard : result.getShards()) {
+ if (shard.getState() == ShardState.ACTIVE || shard.getState() == ShardState.OPENING) {
+ shards.add(shard.getShardId());
+ }
+ }
+ }
+ schema = client.getTopic(project, topic).getRecordSchema();
+ return null;
+ }
+ }, DataXCaseEnvUtil.getRetryTimes(5), DataXCaseEnvUtil.getRetryInterval(10000L), DataXCaseEnvUtil.getRetryExponential(false));
+ } catch (Exception e) {
+ throw DataXException.asDataXException(DatahubWriterErrorCode.GET_TOPOIC_INFO_FAIL,
+ "get topic info failed", e);
+ }
+ LOG.info("datahub topic {} shard to write: {}", this.topic, JSON.toJSONString(this.shards));
+ LOG.info("datahub topic {} has schema: {}", this.topic, JSON.toJSONString(this.schema));
+
+ // 根据 schmea 顺序 和用户配置的 column,计算写datahub的顺序关系,以支持列换序
+ // 后续统一使用 columnIndex 的顺位关系写 datahub
+ int totalSize = this.schema.getFields().size();
+ if (null != this.column && !this.column.isEmpty() && this.enableColumnConfig) {
+ for (String eachCol : this.column) {
+ int indexFound = -1;
+ for (int i = 0; i < totalSize; i++) {
+ // warn: 大小写ignore
+ if (StringUtils.equalsIgnoreCase(eachCol, this.schema.getField(i).getName())) {
+ indexFound = i;
+ break;
+ }
+ }
+ if (indexFound >= 0) {
+ this.columnIndex.add(indexFound);
+ } else {
+ throw DataXException.asDataXException(DatahubWriterErrorCode.SCHEMA_NOT_MATCH,
+ String.format("can not find column %s in datahub topic %s", eachCol, this.topic));
+ }
+ }
+ } else {
+ for (int i = 0; i < totalSize; i++) {
+ this.columnIndex.add(i);
+ }
+ }
+ }
+
+ @Override
+ public void startWrite(RecordReceiver recordReceiver) {
+ Record record;
+ List records = new ArrayList();
+ String shardId = null;
+ if (1 == this.shards.size()) {
+ shardId = shards.get(0);
+ } else {
+ shardId = shards.get(this.random.nextInt(shards.size()));
+ }
+ int commitSize = 0;
+ try {
+ while ((record = recordReceiver.getFromReader()) != null) {
+ RecordEntry dhRecord = convertRecord(record, shardId);
+ if (dhRecord != null) {
+ records.add(dhRecord);
+ }
+ commitSize += record.getByteSize();
+ if (commitSize >= maxCommitSize) {
+ commit(records);
+ records.clear();
+ commitSize = 0;
+ if (1 == this.shards.size()) {
+ shardId = shards.get(0);
+ } else {
+ shardId = shards.get(this.random.nextInt(shards.size()));
+ }
+ }
+ }
+ if (commitSize > 0) {
+ commit(records);
+ }
+ } catch (Exception e) {
+ throw DataXException.asDataXException(
+ DatahubWriterErrorCode.WRITE_DATAHUB_FAIL, e);
+ }
+ }
+
+ @Override
+ public void post() {}
+
+ @Override
+ public void destroy() {}
+
+ private void commit(List records) throws InterruptedException {
+ PutRecordsResult result = client.putRecords(project, topic, records);
+ if (result.getFailedRecordCount() > 0) {
+ for (int i = 0; i < maxRetryCount; ++i) {
+ boolean limitExceededMessagePrinted = false;
+ for (PutErrorEntry error : result.getPutErrorEntries()) {
+ // 如果是 LimitExceeded 这样打印日志,不能每行记录打印一次了
+ if (StringUtils.equalsIgnoreCase("LimitExceeded", error.getErrorcode())) {
+ if (!limitExceededMessagePrinted) {
+ LOG.warn("write record error, request id: {}, error code: {}, error message: {}",
+ result.getRequestId(), error.getErrorcode(), error.getMessage());
+ limitExceededMessagePrinted = true;
+ }
+ } else {
+ LOG.error("write record error, request id: {}, error code: {}, error message: {}",
+ result.getRequestId(), error.getErrorcode(), error.getMessage());
+ }
+ if (this.fatalErrors.contains(error.getErrorcode())) {
+ throw DataXException.asDataXException(
+ DatahubWriterErrorCode.WRITE_DATAHUB_FAIL,
+ error.getMessage());
+ }
+ }
+
+ if (this.retryInterval >= 0) {
+ Thread.sleep(this.retryInterval);
+ } else {
+ Thread.sleep(new Random().nextInt(700) + 300);
+ }
+
+ result = client.putRecords(project, topic, result.getFailedRecords());
+ if (result.getFailedRecordCount() == 0) {
+ return;
+ }
+ }
+ throw DataXException.asDataXException(
+ DatahubWriterErrorCode.WRITE_DATAHUB_FAIL,
+ "write datahub failed");
+ }
+ }
+
+ private RecordEntry convertRecord(Record dxRecord, String shardId) {
+ try {
+ RecordEntry dhRecord = new RecordEntry();
+ dhRecord.setShardId(shardId);
+ TupleRecordData data = new TupleRecordData(this.schema);
+ for (int i = 0; i < this.columnIndex.size(); ++i) {
+ int orderInSchema = this.columnIndex.get(i);
+ FieldType type = this.schema.getField(orderInSchema).getType();
+ Column column = dxRecord.getColumn(i);
+ switch (type) {
+ case BIGINT:
+ data.setField(orderInSchema, column.asLong());
+ break;
+ case DOUBLE:
+ data.setField(orderInSchema, column.asDouble());
+ break;
+ case STRING:
+ data.setField(orderInSchema, column.asString());
+ break;
+ case BOOLEAN:
+ data.setField(orderInSchema, column.asBoolean());
+ break;
+ case TIMESTAMP:
+ if (null == column.asDate()) {
+ data.setField(orderInSchema, null);
+ } else {
+ data.setField(orderInSchema, column.asDate().getTime() * 1000);
+ }
+ break;
+ case DECIMAL:
+ // warn
+ data.setField(orderInSchema, column.asBigDecimal());
+ break;
+ case INTEGER:
+ data.setField(orderInSchema, column.asLong());
+ break;
+ case FLOAT:
+ data.setField(orderInSchema, column.asDouble());
+ break;
+ case TINYINT:
+ data.setField(orderInSchema, column.asLong());
+ break;
+ case SMALLINT:
+ data.setField(orderInSchema, column.asLong());
+ break;
+ default:
+ throw DataXException.asDataXException(
+ DatahubWriterErrorCode.SCHEMA_NOT_MATCH,
+ String.format("does not support type: %s", type));
+ }
+ }
+ dhRecord.setRecordData(data);
+ return dhRecord;
+ } catch (Exception e) {
+ super.getTaskPluginCollector().collectDirtyRecord(dxRecord, e, "convert recor failed");
+ }
+ return null;
+ }
+ }
+
+}
\ No newline at end of file
diff --git a/datahubwriter/src/main/java/com/alibaba/datax/plugin/writer/datahubwriter/DatahubWriterErrorCode.java b/datahubwriter/src/main/java/com/alibaba/datax/plugin/writer/datahubwriter/DatahubWriterErrorCode.java
new file mode 100644
index 00000000..ad03abd1
--- /dev/null
+++ b/datahubwriter/src/main/java/com/alibaba/datax/plugin/writer/datahubwriter/DatahubWriterErrorCode.java
@@ -0,0 +1,37 @@
+package com.alibaba.datax.plugin.writer.datahubwriter;
+
+import com.alibaba.datax.common.spi.ErrorCode;
+import com.alibaba.datax.common.util.MessageSource;
+
+public enum DatahubWriterErrorCode implements ErrorCode {
+ MISSING_REQUIRED_VALUE("DatahubWriter-01", MessageSource.loadResourceBundle(DatahubWriterErrorCode.class).message("errorcode.missing_required_value")),
+ INVALID_CONFIG_VALUE("DatahubWriter-02", MessageSource.loadResourceBundle(DatahubWriterErrorCode.class).message("errorcode.invalid_config_value")),
+ GET_TOPOIC_INFO_FAIL("DatahubWriter-03", MessageSource.loadResourceBundle(DatahubWriterErrorCode.class).message("errorcode.get_topic_info_fail")),
+ WRITE_DATAHUB_FAIL("DatahubWriter-04", MessageSource.loadResourceBundle(DatahubWriterErrorCode.class).message("errorcode.write_datahub_fail")),
+ SCHEMA_NOT_MATCH("DatahubWriter-05", MessageSource.loadResourceBundle(DatahubWriterErrorCode.class).message("errorcode.schema_not_match")),
+ ;
+
+ private final String code;
+ private final String description;
+
+ private DatahubWriterErrorCode(String code, String description) {
+ this.code = code;
+ this.description = description;
+ }
+
+ @Override
+ public String getCode() {
+ return this.code;
+ }
+
+ @Override
+ public String getDescription() {
+ return this.description;
+ }
+
+ @Override
+ public String toString() {
+ return String.format("Code:[%s], Description:[%s]. ", this.code,
+ this.description);
+ }
+}
\ No newline at end of file
diff --git a/datahubwriter/src/main/java/com/alibaba/datax/plugin/writer/datahubwriter/Key.java b/datahubwriter/src/main/java/com/alibaba/datax/plugin/writer/datahubwriter/Key.java
new file mode 100644
index 00000000..5f179234
--- /dev/null
+++ b/datahubwriter/src/main/java/com/alibaba/datax/plugin/writer/datahubwriter/Key.java
@@ -0,0 +1,26 @@
+package com.alibaba.datax.plugin.writer.datahubwriter;
+
+public final class Key {
+
+ /**
+ * 此处声明插件用到的需要插件使用者提供的配置项
+ */
+ public static final String CONFIG_KEY_ENDPOINT = "endpoint";
+ public static final String CONFIG_KEY_ACCESS_ID = "accessId";
+ public static final String CONFIG_KEY_ACCESS_KEY = "accessKey";
+ public static final String CONFIG_KEY_PROJECT = "project";
+ public static final String CONFIG_KEY_TOPIC = "topic";
+ public static final String CONFIG_KEY_WRITE_MODE = "mode";
+ public static final String CONFIG_KEY_SHARD_ID = "shardId";
+ public static final String CONFIG_KEY_MAX_COMMIT_SIZE = "maxCommitSize";
+ public static final String CONFIG_KEY_MAX_RETRY_COUNT = "maxRetryCount";
+
+ public static final String CONFIG_VALUE_SEQUENCE_MODE = "sequence";
+ public static final String CONFIG_VALUE_RANDOM_MODE = "random";
+
+ public final static String MAX_RETRY_TIME = "maxRetryTime";
+
+ public final static String RETRY_INTERVAL = "retryInterval";
+
+ public final static String CONFIG_KEY_COLUMN = "column";
+}
diff --git a/datahubwriter/src/main/java/com/alibaba/datax/plugin/writer/datahubwriter/LocalStrings.properties b/datahubwriter/src/main/java/com/alibaba/datax/plugin/writer/datahubwriter/LocalStrings.properties
new file mode 100644
index 00000000..e85c8ab3
--- /dev/null
+++ b/datahubwriter/src/main/java/com/alibaba/datax/plugin/writer/datahubwriter/LocalStrings.properties
@@ -0,0 +1,5 @@
+errorcode.missing_required_value=\u60A8\u7F3A\u5931\u4E86\u5FC5\u987B\u586B\u5199\u7684\u53C2\u6570\u503C.
+errorcode.invalid_config_value=\u60A8\u7684\u53C2\u6570\u914D\u7F6E\u9519\u8BEF.
+errorcode.get_topic_info_fail=\u83B7\u53D6shard\u5217\u8868\u5931\u8D25.
+errorcode.write_datahub_fail=\u5199\u6570\u636E\u5931\u8D25.
+errorcode.schema_not_match=\u6570\u636E\u683C\u5F0F\u9519\u8BEF.
diff --git a/datahubwriter/src/main/java/com/alibaba/datax/plugin/writer/datahubwriter/LocalStrings_en_US.properties b/datahubwriter/src/main/java/com/alibaba/datax/plugin/writer/datahubwriter/LocalStrings_en_US.properties
new file mode 100644
index 00000000..31a291e6
--- /dev/null
+++ b/datahubwriter/src/main/java/com/alibaba/datax/plugin/writer/datahubwriter/LocalStrings_en_US.properties
@@ -0,0 +1,5 @@
+errorcode.missing_required_value=\u60A8\u7F3A\u5931\u4E86\u5FC5\u987B\u586B\u5199\u7684\u53C2\u6570\u503C.
+errorcode.invalid_config_value=\u60A8\u7684\u53C2\u6570\u914D\u7F6E\u9519\u8BEF.
+errorcode.get_topic_info_fail=\u83B7\u53D6shard\u5217\u8868\u5931\u8D25.
+errorcode.write_datahub_fail=\u5199\u6570\u636E\u5931\u8D25.
+errorcode.schema_not_match=\u6570\u636E\u683C\u5F0F\u9519\u8BEF.
\ No newline at end of file
diff --git a/datahubwriter/src/main/java/com/alibaba/datax/plugin/writer/datahubwriter/LocalStrings_ja_JP.properties b/datahubwriter/src/main/java/com/alibaba/datax/plugin/writer/datahubwriter/LocalStrings_ja_JP.properties
new file mode 100644
index 00000000..31a291e6
--- /dev/null
+++ b/datahubwriter/src/main/java/com/alibaba/datax/plugin/writer/datahubwriter/LocalStrings_ja_JP.properties
@@ -0,0 +1,5 @@
+errorcode.missing_required_value=\u60A8\u7F3A\u5931\u4E86\u5FC5\u987B\u586B\u5199\u7684\u53C2\u6570\u503C.
+errorcode.invalid_config_value=\u60A8\u7684\u53C2\u6570\u914D\u7F6E\u9519\u8BEF.
+errorcode.get_topic_info_fail=\u83B7\u53D6shard\u5217\u8868\u5931\u8D25.
+errorcode.write_datahub_fail=\u5199\u6570\u636E\u5931\u8D25.
+errorcode.schema_not_match=\u6570\u636E\u683C\u5F0F\u9519\u8BEF.
\ No newline at end of file
diff --git a/datahubwriter/src/main/java/com/alibaba/datax/plugin/writer/datahubwriter/LocalStrings_zh_CN.properties b/datahubwriter/src/main/java/com/alibaba/datax/plugin/writer/datahubwriter/LocalStrings_zh_CN.properties
new file mode 100644
index 00000000..31a291e6
--- /dev/null
+++ b/datahubwriter/src/main/java/com/alibaba/datax/plugin/writer/datahubwriter/LocalStrings_zh_CN.properties
@@ -0,0 +1,5 @@
+errorcode.missing_required_value=\u60A8\u7F3A\u5931\u4E86\u5FC5\u987B\u586B\u5199\u7684\u53C2\u6570\u503C.
+errorcode.invalid_config_value=\u60A8\u7684\u53C2\u6570\u914D\u7F6E\u9519\u8BEF.
+errorcode.get_topic_info_fail=\u83B7\u53D6shard\u5217\u8868\u5931\u8D25.
+errorcode.write_datahub_fail=\u5199\u6570\u636E\u5931\u8D25.
+errorcode.schema_not_match=\u6570\u636E\u683C\u5F0F\u9519\u8BEF.
\ No newline at end of file
diff --git a/datahubwriter/src/main/java/com/alibaba/datax/plugin/writer/datahubwriter/LocalStrings_zh_HK.properties b/datahubwriter/src/main/java/com/alibaba/datax/plugin/writer/datahubwriter/LocalStrings_zh_HK.properties
new file mode 100644
index 00000000..c6a3a0e0
--- /dev/null
+++ b/datahubwriter/src/main/java/com/alibaba/datax/plugin/writer/datahubwriter/LocalStrings_zh_HK.properties
@@ -0,0 +1,9 @@
+errorcode.missing_required_value=\u60A8\u7F3A\u5931\u4E86\u5FC5\u987B\u586B\u5199\u7684\u53C2\u6570\u503C.
+errorcode.invalid_config_value=\u60A8\u7684\u53C2\u6570\u914D\u7F6E\u9519\u8BEF.
+errorcode.get_topic_info_fail=\u83B7\u53D6shard\u5217\u8868\u5931\u8D25.
+errorcode.write_datahub_fail=\u5199\u6570\u636E\u5931\u8D25.
+errorcode.schema_not_match=\u6570\u636E\u683C\u5F0F\u9519\u8BEF.errorcode.missing_required_value=您缺失了必須填寫的參數值.
+errorcode.invalid_config_value=您的參數配寘錯誤.
+errorcode.get_topic_info_fail=獲取shard清單失敗.
+errorcode.write_datahub_fail=寫數據失敗.
+errorcode.schema_not_match=數據格式錯誤.
diff --git a/datahubwriter/src/main/java/com/alibaba/datax/plugin/writer/datahubwriter/LocalStrings_zh_TW.properties b/datahubwriter/src/main/java/com/alibaba/datax/plugin/writer/datahubwriter/LocalStrings_zh_TW.properties
new file mode 100644
index 00000000..c6a3a0e0
--- /dev/null
+++ b/datahubwriter/src/main/java/com/alibaba/datax/plugin/writer/datahubwriter/LocalStrings_zh_TW.properties
@@ -0,0 +1,9 @@
+errorcode.missing_required_value=\u60A8\u7F3A\u5931\u4E86\u5FC5\u987B\u586B\u5199\u7684\u53C2\u6570\u503C.
+errorcode.invalid_config_value=\u60A8\u7684\u53C2\u6570\u914D\u7F6E\u9519\u8BEF.
+errorcode.get_topic_info_fail=\u83B7\u53D6shard\u5217\u8868\u5931\u8D25.
+errorcode.write_datahub_fail=\u5199\u6570\u636E\u5931\u8D25.
+errorcode.schema_not_match=\u6570\u636E\u683C\u5F0F\u9519\u8BEF.errorcode.missing_required_value=您缺失了必須填寫的參數值.
+errorcode.invalid_config_value=您的參數配寘錯誤.
+errorcode.get_topic_info_fail=獲取shard清單失敗.
+errorcode.write_datahub_fail=寫數據失敗.
+errorcode.schema_not_match=數據格式錯誤.
diff --git a/datahubwriter/src/main/resources/job_config_template.json b/datahubwriter/src/main/resources/job_config_template.json
new file mode 100644
index 00000000..8b0b41ae
--- /dev/null
+++ b/datahubwriter/src/main/resources/job_config_template.json
@@ -0,0 +1,14 @@
+{
+ "name": "datahubwriter",
+ "parameter": {
+ "endpoint":"",
+ "accessId": "",
+ "accessKey": "",
+ "project": "",
+ "topic": "",
+ "mode": "random",
+ "shardId": "",
+ "maxCommitSize": 524288,
+ "maxRetryCount": 500
+ }
+}
\ No newline at end of file
diff --git a/datahubwriter/src/main/resources/plugin.json b/datahubwriter/src/main/resources/plugin.json
new file mode 100644
index 00000000..91c17292
--- /dev/null
+++ b/datahubwriter/src/main/resources/plugin.json
@@ -0,0 +1,6 @@
+{
+ "name": "datahubwriter",
+ "class": "com.alibaba.datax.plugin.writer.datahubwriter.DatahubWriter",
+ "description": "datahub writer",
+ "developer": "alibaba"
+}
\ No newline at end of file
diff --git a/datax-example/datax-example-core/pom.xml b/datax-example/datax-example-core/pom.xml
new file mode 100644
index 00000000..6a2e9e8e
--- /dev/null
+++ b/datax-example/datax-example-core/pom.xml
@@ -0,0 +1,20 @@
+
+
+ 4.0.0
+
+ com.alibaba.datax
+ datax-example
+ 0.0.1-SNAPSHOT
+
+
+ datax-example-core
+
+
+ 8
+ 8
+ UTF-8
+
+
+
\ No newline at end of file
diff --git a/datax-example/datax-example-core/src/main/java/com/alibaba/datax/example/ExampleContainer.java b/datax-example/datax-example-core/src/main/java/com/alibaba/datax/example/ExampleContainer.java
new file mode 100644
index 00000000..a4229fd1
--- /dev/null
+++ b/datax-example/datax-example-core/src/main/java/com/alibaba/datax/example/ExampleContainer.java
@@ -0,0 +1,26 @@
+package com.alibaba.datax.example;
+
+import com.alibaba.datax.common.util.Configuration;
+import com.alibaba.datax.core.Engine;
+import com.alibaba.datax.example.util.ExampleConfigParser;
+
+/**
+ * {@code Date} 2023/8/6 11:22
+ *
+ * @author fuyouj
+ */
+
+public class ExampleContainer {
+ /**
+ * example对外暴露的启动入口
+ * 使用前最好看下 datax-example/doc/README.MD
+ * @param jobPath 任务json绝对路径
+ */
+ public static void start(String jobPath) {
+
+ Configuration configuration = ExampleConfigParser.parse(jobPath);
+
+ Engine engine = new Engine();
+ engine.start(configuration);
+ }
+}
diff --git a/datax-example/datax-example-core/src/main/java/com/alibaba/datax/example/Main.java b/datax-example/datax-example-core/src/main/java/com/alibaba/datax/example/Main.java
new file mode 100644
index 00000000..56bf9f0b
--- /dev/null
+++ b/datax-example/datax-example-core/src/main/java/com/alibaba/datax/example/Main.java
@@ -0,0 +1,23 @@
+package com.alibaba.datax.example;
+
+
+import com.alibaba.datax.example.util.PathUtil;
+
+/**
+ * @author fuyouj
+ */
+public class Main {
+
+ /**
+ * 1.在example模块pom文件添加你依赖的的调试插件,
+ * 你可以直接打开本模块的pom文件,参考是如何引入streamreader,streamwriter
+ * 2. 在此处指定你的job文件
+ */
+ public static void main(String[] args) {
+
+ String classPathJobPath = "/job/stream2stream.json";
+ String absJobPath = PathUtil.getAbsolutePathFromClassPath(classPathJobPath);
+ ExampleContainer.start(absJobPath);
+ }
+
+}
diff --git a/datax-example/datax-example-core/src/main/java/com/alibaba/datax/example/util/ExampleConfigParser.java b/datax-example/datax-example-core/src/main/java/com/alibaba/datax/example/util/ExampleConfigParser.java
new file mode 100644
index 00000000..6bbb4a23
--- /dev/null
+++ b/datax-example/datax-example-core/src/main/java/com/alibaba/datax/example/util/ExampleConfigParser.java
@@ -0,0 +1,154 @@
+package com.alibaba.datax.example.util;
+
+import com.alibaba.datax.common.exception.DataXException;
+import com.alibaba.datax.common.util.Configuration;
+import com.alibaba.datax.core.util.ConfigParser;
+import com.alibaba.datax.core.util.FrameworkErrorCode;
+import com.alibaba.datax.core.util.container.CoreConstant;
+
+import java.io.File;
+import java.io.IOException;
+import java.net.URL;
+import java.nio.file.Paths;
+import java.util.*;
+
+/**
+ * @author fuyouj
+ */
+public class ExampleConfigParser {
+ private static final String CORE_CONF = "/example/conf/core.json";
+
+ private static final String PLUGIN_DESC_FILE = "plugin.json";
+
+ /**
+ * 指定Job配置路径,ConfigParser会解析Job、Plugin、Core全部信息,并以Configuration返回
+ * 不同于Core的ConfigParser,这里的core,plugin 不依赖于编译后的datax.home,而是扫描程序编译后的target目录
+ */
+ public static Configuration parse(final String jobPath) {
+
+ Configuration configuration = ConfigParser.parseJobConfig(jobPath);
+ configuration.merge(coreConfig(),
+ false);
+
+ Map pluginTypeMap = new HashMap<>();
+ String readerName = configuration.getString(CoreConstant.DATAX_JOB_CONTENT_READER_NAME);
+ String writerName = configuration.getString(CoreConstant.DATAX_JOB_CONTENT_WRITER_NAME);
+ pluginTypeMap.put(readerName, "reader");
+ pluginTypeMap.put(writerName, "writer");
+ Configuration pluginsDescConfig = parsePluginsConfig(pluginTypeMap);
+ configuration.merge(pluginsDescConfig, false);
+ return configuration;
+ }
+
+ private static Configuration parsePluginsConfig(Map pluginTypeMap) {
+
+ Configuration configuration = Configuration.newDefault();
+
+ //最初打算通过user.dir获取工作目录来扫描插件,
+ //但是user.dir在不同有一些不确定性,所以废弃了这个选择
+
+ for (File basePackage : runtimeBasePackages()) {
+ if (pluginTypeMap.isEmpty()) {
+ break;
+ }
+ scanPluginByPackage(basePackage, configuration, basePackage.listFiles(), pluginTypeMap);
+ }
+ if (!pluginTypeMap.isEmpty()) {
+ String failedPlugin = pluginTypeMap.keySet().toString();
+ String message = "\nplugin %s load failed :ry to analyze the reasons from the following aspects.。\n" +
+ "1: Check if the name of the plugin is spelled correctly, and verify whether DataX supports this plugin\n" +
+ "2:Verify if the tag has been added under section in the pom file of the relevant plugin.\n" +
+ " src/main/resources\n" +
+ " \n" +
+ " **/*.*\n" +
+ " \n" +
+ " true\n" +
+ " \n [Refer to the streamreader pom file] \n" +
+ "3: Check that the datax-yourPlugin-example module imported your test plugin";
+ message = String.format(message, failedPlugin);
+ throw DataXException.asDataXException(FrameworkErrorCode.PLUGIN_INIT_ERROR, message);
+ }
+ return configuration;
+ }
+
+ /**
+ * 通过classLoader获取程序编译的输出目录
+ *
+ * @return File[/datax-example/target/classes,xxReader/target/classes,xxWriter/target/classes]
+ */
+ private static File[] runtimeBasePackages() {
+ List basePackages = new ArrayList<>();
+ ClassLoader classLoader = Thread.currentThread().getContextClassLoader();
+ Enumeration resources = null;
+ try {
+ resources = classLoader.getResources("");
+ } catch (IOException e) {
+ throw DataXException.asDataXException(e.getMessage());
+ }
+
+ while (resources.hasMoreElements()) {
+ URL resource = resources.nextElement();
+ File file = new File(resource.getFile());
+ if (file.isDirectory()) {
+ basePackages.add(file);
+ }
+ }
+
+ return basePackages.toArray(new File[0]);
+ }
+
+ /**
+ * @param packageFile 编译出来的target/classes根目录 便于找到插件时设置插件的URL目录,设置根目录是最保险的方式
+ * @param configuration pluginConfig
+ * @param files 待扫描文件
+ * @param needPluginTypeMap 需要的插件
+ */
+ private static void scanPluginByPackage(File packageFile,
+ Configuration configuration,
+ File[] files,
+ Map needPluginTypeMap) {
+ if (files == null) {
+ return;
+ }
+ for (File file : files) {
+ if (file.isFile() && PLUGIN_DESC_FILE.equals(file.getName())) {
+ Configuration pluginDesc = Configuration.from(file);
+ String descPluginName = pluginDesc.getString("name", "");
+
+ if (needPluginTypeMap.containsKey(descPluginName)) {
+
+ String type = needPluginTypeMap.get(descPluginName);
+ configuration.merge(parseOnePlugin(packageFile.getAbsolutePath(), type, descPluginName, pluginDesc), false);
+ needPluginTypeMap.remove(descPluginName);
+
+ }
+ } else {
+ scanPluginByPackage(packageFile, configuration, file.listFiles(), needPluginTypeMap);
+ }
+ }
+ }
+
+
+ private static Configuration parseOnePlugin(String packagePath,
+ String pluginType,
+ String pluginName,
+ Configuration pluginDesc) {
+ //设置path 兼容jarLoader的加载方式URLClassLoader
+ pluginDesc.set("path", packagePath);
+ Configuration pluginConfInJob = Configuration.newDefault();
+ pluginConfInJob.set(
+ String.format("plugin.%s.%s", pluginType, pluginName),
+ pluginDesc.getInternal());
+ return pluginConfInJob;
+ }
+
+ private static Configuration coreConfig() {
+ try {
+ URL resource = ExampleConfigParser.class.getResource(CORE_CONF);
+ return Configuration.from(Paths.get(resource.toURI()).toFile());
+ } catch (Exception ignore) {
+ throw DataXException.asDataXException("Failed to load the configuration file core.json. " +
+ "Please check whether /example/conf/core.json exists!");
+ }
+ }
+}
diff --git a/datax-example/datax-example-core/src/main/java/com/alibaba/datax/example/util/PathUtil.java b/datax-example/datax-example-core/src/main/java/com/alibaba/datax/example/util/PathUtil.java
new file mode 100644
index 00000000..e197fa73
--- /dev/null
+++ b/datax-example/datax-example-core/src/main/java/com/alibaba/datax/example/util/PathUtil.java
@@ -0,0 +1,26 @@
+package com.alibaba.datax.example.util;
+
+
+import com.alibaba.datax.common.exception.DataXException;
+
+import java.net.URI;
+import java.net.URISyntaxException;
+import java.net.URL;
+import java.nio.file.Paths;
+
+/**
+ * @author fuyouj
+ */
+public class PathUtil {
+ public static String getAbsolutePathFromClassPath(String path) {
+ URL resource = PathUtil.class.getResource(path);
+ try {
+ assert resource != null;
+ URI uri = resource.toURI();
+ return Paths.get(uri).toString();
+ } catch (NullPointerException | URISyntaxException e) {
+ throw DataXException.asDataXException("path error,please check whether the path is correct");
+ }
+
+ }
+}
diff --git a/datax-example/datax-example-core/src/main/resources/example/conf/core.json b/datax-example/datax-example-core/src/main/resources/example/conf/core.json
new file mode 100755
index 00000000..33281ac0
--- /dev/null
+++ b/datax-example/datax-example-core/src/main/resources/example/conf/core.json
@@ -0,0 +1,60 @@
+{
+ "entry": {
+ "jvm": "-Xms1G -Xmx1G",
+ "environment": {}
+ },
+ "common": {
+ "column": {
+ "datetimeFormat": "yyyy-MM-dd HH:mm:ss",
+ "timeFormat": "HH:mm:ss",
+ "dateFormat": "yyyy-MM-dd",
+ "extraFormats":["yyyyMMdd"],
+ "timeZone": "GMT+8",
+ "encoding": "utf-8"
+ }
+ },
+ "core": {
+ "dataXServer": {
+ "address": "http://localhost:7001/api",
+ "timeout": 10000,
+ "reportDataxLog": false,
+ "reportPerfLog": false
+ },
+ "transport": {
+ "channel": {
+ "class": "com.alibaba.datax.core.transport.channel.memory.MemoryChannel",
+ "speed": {
+ "byte": -1,
+ "record": -1
+ },
+ "flowControlInterval": 20,
+ "capacity": 512,
+ "byteCapacity": 67108864
+ },
+ "exchanger": {
+ "class": "com.alibaba.datax.core.plugin.BufferedRecordExchanger",
+ "bufferSize": 32
+ }
+ },
+ "container": {
+ "job": {
+ "reportInterval": 10000
+ },
+ "taskGroup": {
+ "channel": 5
+ },
+ "trace": {
+ "enable": "false"
+ }
+
+ },
+ "statistics": {
+ "collector": {
+ "plugin": {
+ "taskClass": "com.alibaba.datax.core.statistics.plugin.task.StdoutPluginCollector",
+ "maxDirtyNumber": 10
+ }
+ }
+ }
+ }
+}
diff --git a/datax-example/datax-example-core/src/test/java/com/alibaba/datax/example/util/PathUtilTest.java b/datax-example/datax-example-core/src/test/java/com/alibaba/datax/example/util/PathUtilTest.java
new file mode 100644
index 00000000..8985b54c
--- /dev/null
+++ b/datax-example/datax-example-core/src/test/java/com/alibaba/datax/example/util/PathUtilTest.java
@@ -0,0 +1,19 @@
+package com.alibaba.datax.example.util;
+
+import org.junit.Assert;
+import org.junit.Test;
+
+/**
+ * {@code Author} FuYouJ
+ * {@code Date} 2023/8/19 21:38
+ */
+
+public class PathUtilTest {
+
+ @Test
+ public void testParseClassPathFile() {
+ String path = "/pathTest.json";
+ String absolutePathFromClassPath = PathUtil.getAbsolutePathFromClassPath(path);
+ Assert.assertNotNull(absolutePathFromClassPath);
+ }
+}
diff --git a/datax-example/datax-example-core/src/test/resources/pathTest.json b/datax-example/datax-example-core/src/test/resources/pathTest.json
new file mode 100644
index 00000000..9e26dfee
--- /dev/null
+++ b/datax-example/datax-example-core/src/test/resources/pathTest.json
@@ -0,0 +1 @@
+{}
\ No newline at end of file
diff --git a/datax-example/datax-example-neo4j/pom.xml b/datax-example/datax-example-neo4j/pom.xml
new file mode 100644
index 00000000..303b14a8
--- /dev/null
+++ b/datax-example/datax-example-neo4j/pom.xml
@@ -0,0 +1,43 @@
+
+
+ 4.0.0
+
+ com.alibaba.datax
+ datax-example
+ 0.0.1-SNAPSHOT
+
+
+ datax-example-neo4j
+
+
+ 8
+ 8
+ UTF-8
+ 1.17.6
+ 4.4.9
+
+
+
+ com.alibaba.datax
+ datax-example-core
+ 0.0.1-SNAPSHOT
+
+
+ org.testcontainers
+ testcontainers
+ ${test.container.version}
+
+
+ com.alibaba.datax
+ neo4jwriter
+ 0.0.1-SNAPSHOT
+
+
+ com.alibaba.datax
+ datax-example-streamreader
+ 0.0.1-SNAPSHOT
+
+
+
\ No newline at end of file
diff --git a/datax-example/datax-example-neo4j/src/test/java/com/alibaba/datax/example/neo4j/StreamReader2Neo4jWriterTest.java b/datax-example/datax-example-neo4j/src/test/java/com/alibaba/datax/example/neo4j/StreamReader2Neo4jWriterTest.java
new file mode 100644
index 00000000..9cf01253
--- /dev/null
+++ b/datax-example/datax-example-neo4j/src/test/java/com/alibaba/datax/example/neo4j/StreamReader2Neo4jWriterTest.java
@@ -0,0 +1,138 @@
+package com.alibaba.datax.example.neo4j;
+
+import com.alibaba.datax.example.ExampleContainer;
+import com.alibaba.datax.example.util.PathUtil;
+import org.junit.After;
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.Test;
+import org.neo4j.driver.*;
+import org.neo4j.driver.types.Node;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import org.testcontainers.containers.GenericContainer;
+import org.testcontainers.containers.Network;
+import org.testcontainers.containers.output.Slf4jLogConsumer;
+import org.testcontainers.lifecycle.Startables;
+import org.testcontainers.shaded.org.awaitility.Awaitility;
+import org.testcontainers.utility.DockerImageName;
+import org.testcontainers.utility.DockerLoggerFactory;
+
+import java.net.URI;
+import java.util.Arrays;
+import java.util.concurrent.TimeUnit;
+import java.util.stream.Stream;
+
+/**
+ * {@code Author} FuYouJ
+ * {@code Date} 2023/8/19 21:48
+ */
+
+public class StreamReader2Neo4jWriterTest {
+ private static final Logger LOGGER = LoggerFactory.getLogger(StreamReader2Neo4jWriterTest.class);
+ private static final String CONTAINER_IMAGE = "neo4j:5.9.0";
+
+ private static final String CONTAINER_HOST = "neo4j-host";
+ private static final int HTTP_PORT = 7474;
+ private static final int BOLT_PORT = 7687;
+ private static final String CONTAINER_NEO4J_USERNAME = "neo4j";
+ private static final String CONTAINER_NEO4J_PASSWORD = "Test@12343";
+ private static final URI CONTAINER_URI = URI.create("neo4j://localhost:" + BOLT_PORT);
+
+ protected static final Network NETWORK = Network.newNetwork();
+
+ private GenericContainer> container;
+ protected Driver neo4jDriver;
+ protected Session neo4jSession;
+ private static final int CHANNEL = 5;
+ private static final int READER_NUM = 10;
+
+ @Before
+ public void init() {
+ DockerImageName imageName = DockerImageName.parse(CONTAINER_IMAGE);
+ container =
+ new GenericContainer<>(imageName)
+ .withNetwork(NETWORK)
+ .withNetworkAliases(CONTAINER_HOST)
+ .withExposedPorts(HTTP_PORT, BOLT_PORT)
+ .withEnv(
+ "NEO4J_AUTH",
+ CONTAINER_NEO4J_USERNAME + "/" + CONTAINER_NEO4J_PASSWORD)
+ .withEnv("apoc.export.file.enabled", "true")
+ .withEnv("apoc.import.file.enabled", "true")
+ .withEnv("apoc.import.file.use_neo4j_config", "true")
+ .withEnv("NEO4J_PLUGINS", "[\"apoc\"]")
+ .withLogConsumer(
+ new Slf4jLogConsumer(
+ DockerLoggerFactory.getLogger(CONTAINER_IMAGE)));
+ container.setPortBindings(
+ Arrays.asList(
+ String.format("%s:%s", HTTP_PORT, HTTP_PORT),
+ String.format("%s:%s", BOLT_PORT, BOLT_PORT)));
+ Startables.deepStart(Stream.of(container)).join();
+ LOGGER.info("container started");
+ Awaitility.given()
+ .ignoreExceptions()
+ .await()
+ .atMost(30, TimeUnit.SECONDS)
+ .untilAsserted(this::initConnection);
+ }
+
+ //在neo4jWriter模块使用Example测试整个job,方便发现整个流程的代码问题
+ @Test
+ public void streamReader2Neo4j() {
+
+ deleteHistoryIfExist();
+
+ String path = "/streamreader2neo4j.json";
+ String jobPath = PathUtil.getAbsolutePathFromClassPath(path);
+
+ ExampleContainer.start(jobPath);
+
+ //根据channel和reader的mock数据,校验结果集是否符合预期
+ verifyWriteResult();
+ }
+
+ private void deleteHistoryIfExist() {
+ String query = "match (n:StreamReader) return n limit 1";
+ String delete = "match (n:StreamReader) delete n";
+ if (neo4jSession.run(query).hasNext()) {
+ neo4jSession.run(delete);
+ }
+ }
+
+ private void verifyWriteResult() {
+ int total = CHANNEL * READER_NUM;
+ String query = "match (n:StreamReader) return n";
+ Result run = neo4jSession.run(query);
+ int count = 0;
+ while (run.hasNext()) {
+ Record record = run.next();
+ Node node = record.get("n").asNode();
+ if (node.hasLabel("StreamReader")) {
+ count++;
+ }
+ }
+ Assert.assertEquals(count, total);
+ }
+ @After
+ public void destroy() {
+ if (neo4jSession != null) {
+ neo4jSession.close();
+ }
+ if (neo4jDriver != null) {
+ neo4jDriver.close();
+ }
+ if (container != null) {
+ container.close();
+ }
+ }
+
+ private void initConnection() {
+ neo4jDriver =
+ GraphDatabase.driver(
+ CONTAINER_URI,
+ AuthTokens.basic(CONTAINER_NEO4J_USERNAME, CONTAINER_NEO4J_PASSWORD));
+ neo4jSession = neo4jDriver.session(SessionConfig.forDatabase("neo4j"));
+ }
+}
diff --git a/datax-example/datax-example-neo4j/src/test/resources/streamreader2neo4j.json b/datax-example/datax-example-neo4j/src/test/resources/streamreader2neo4j.json
new file mode 100644
index 00000000..3d543ce3
--- /dev/null
+++ b/datax-example/datax-example-neo4j/src/test/resources/streamreader2neo4j.json
@@ -0,0 +1,51 @@
+{
+ "job": {
+ "content": [
+ {
+ "reader": {
+ "name": "streamreader",
+ "parameter": {
+ "sliceRecordCount": 10,
+ "column": [
+ {
+ "type": "string",
+ "value": "StreamReader"
+ },
+ {
+ "type": "string",
+ "value": "1997"
+ }
+ ]
+ }
+ },
+ "writer": {
+ "name": "neo4jWriter",
+ "parameter": {
+ "uri": "bolt://localhost:7687",
+ "username":"neo4j",
+ "password":"Test@12343",
+ "database":"neo4j",
+ "cypher": "unwind $batch as row CALL apoc.cypher.doIt( 'create (n:`' + row.Label + '`{id:$id})' ,{id: row.id} ) YIELD value RETURN 1 ",
+ "batchDataVariableName": "batch",
+ "batchSize": "3",
+ "properties": [
+ {
+ "name": "Label",
+ "type": "string"
+ },
+ {
+ "name": "id",
+ "type": "STRING"
+ }
+ ]
+ }
+ }
+ }
+ ],
+ "setting": {
+ "speed": {
+ "channel": 5
+ }
+ }
+ }
+}
\ No newline at end of file
diff --git a/datax-example/datax-example-streamreader/pom.xml b/datax-example/datax-example-streamreader/pom.xml
new file mode 100644
index 00000000..ea70de10
--- /dev/null
+++ b/datax-example/datax-example-streamreader/pom.xml
@@ -0,0 +1,37 @@
+
+
+ 4.0.0
+
+ com.alibaba.datax
+ datax-example
+ 0.0.1-SNAPSHOT
+
+
+ datax-example-streamreader
+
+
+ 8
+ 8
+ UTF-8
+
+
+
+ com.alibaba.datax
+ datax-example-core
+ 0.0.1-SNAPSHOT
+
+
+ com.alibaba.datax
+ streamreader
+ 0.0.1-SNAPSHOT
+
+
+ com.alibaba.datax
+ streamwriter
+ 0.0.1-SNAPSHOT
+
+
+
+
\ No newline at end of file
diff --git a/datax-example/datax-example-streamreader/src/test/java/com/alibaba/datax/example/streamreader/StreamReader2StreamWriterTest.java b/datax-example/datax-example-streamreader/src/test/java/com/alibaba/datax/example/streamreader/StreamReader2StreamWriterTest.java
new file mode 100644
index 00000000..71d083d0
--- /dev/null
+++ b/datax-example/datax-example-streamreader/src/test/java/com/alibaba/datax/example/streamreader/StreamReader2StreamWriterTest.java
@@ -0,0 +1,19 @@
+package com.alibaba.datax.example.streamreader;
+
+import com.alibaba.datax.example.ExampleContainer;
+import com.alibaba.datax.example.util.PathUtil;
+import org.junit.Test;
+
+/**
+ * {@code Author} FuYouJ
+ * {@code Date} 2023/8/14 20:16
+ */
+
+public class StreamReader2StreamWriterTest {
+ @Test
+ public void testStreamReader2StreamWriter() {
+ String path = "/stream2stream.json";
+ String jobPath = PathUtil.getAbsolutePathFromClassPath(path);
+ ExampleContainer.start(jobPath);
+ }
+}
diff --git a/datax-example/datax-example-streamreader/src/test/resources/stream2stream.json b/datax-example/datax-example-streamreader/src/test/resources/stream2stream.json
new file mode 100644
index 00000000..b2a57395
--- /dev/null
+++ b/datax-example/datax-example-streamreader/src/test/resources/stream2stream.json
@@ -0,0 +1,36 @@
+{
+ "job": {
+ "content": [
+ {
+ "reader": {
+ "name": "streamreader",
+ "parameter": {
+ "sliceRecordCount": 10,
+ "column": [
+ {
+ "type": "long",
+ "value": "10"
+ },
+ {
+ "type": "string",
+ "value": "hello,你好,世界-DataX"
+ }
+ ]
+ }
+ },
+ "writer": {
+ "name": "streamwriter",
+ "parameter": {
+ "encoding": "UTF-8",
+ "print": true
+ }
+ }
+ }
+ ],
+ "setting": {
+ "speed": {
+ "channel": 5
+ }
+ }
+ }
+}
\ No newline at end of file
diff --git a/datax-example/doc/README.md b/datax-example/doc/README.md
new file mode 100644
index 00000000..15f77e87
--- /dev/null
+++ b/datax-example/doc/README.md
@@ -0,0 +1,107 @@
+## [DataX-Example]调试datax插件的模块
+
+### 为什么要开发这个模块
+
+一般使用DataX启动数据同步任务是从datax.py 脚本开始,获取程序datax包目录设置到系统变量datax.home里,此后系统核心插件的加载,配置初始化均依赖于变量datax.home,这带来了一些麻烦,以一次本地 DeBug streamreader 插件为例。
+
+- maven 打包 datax 生成 datax 目录
+- 在 IDE 中 设置系统环境变量 datax.home,或者在Engine启动类中硬编码设置datax.home。
+- 修改插件 streamreader 代码
+- 再次 maven 打包,使JarLoader 能够加载到最新的 streamreader 代码。
+- 调试代码
+
+在以上步骤中,打包完全不必要且最耗时,等待打包也最煎熬。
+
+所以我编写一个新的模块(datax-example),此模块特用于本地调试和复现 BUG。如果模块顺利编写完成,那么以上流程将被简化至两步。
+
+- 修改插件 streamreader 代码。
+- 调试代码
+
+
+
+### 目录结构
+该目录结构演示了如何使用datax-example-core编写测试用例,和校验代码流程。
+
+
+### 实现原理
+
+- 不修改原有的ConfigParer,使用新的ExampleConfigParser,仅用于example模块。他不依赖datax.home,而是依赖ide编译后的target目录
+- 将ide的target目录作为每个插件的目录类加载目录。
+
+
+
+### 如何使用
+1.修改插件的pom文件,做如下改动。以streamreader为例。
+改动前
+```xml
+
+
+
+
+ maven-compiler-plugin
+
+ ${jdk-version}
+ ${jdk-version}
+ ${project-sourceEncoding}
+
+
+
+
+```
+改动后
+```xml
+
+
+
+
+ src/main/resources
+
+ **/*.*
+
+ true
+
+
+
+
+
+ maven-compiler-plugin
+
+ ${jdk-version}
+ ${jdk-version}
+ ${project-sourceEncoding}
+
+
+
+
+```
+#### 在测试模块模块使用
+参考datax-example/datax-example-streamreader的StreamReader2StreamWriterTest.java
+```java
+public class StreamReader2StreamWriterTest {
+ @Test
+ public void testStreamReader2StreamWriter() {
+ String path = "/stream2stream.json";
+ String jobPath = PathUtil.getAbsolutePathFromClassPath(path);
+ ExampleContainer.start(jobPath);
+ }
+}
+
+```
+参考datax-example/datax-example-neo4j的StreamReader2Neo4jWriterTest
+```java
+public class StreamReader2Neo4jWriterTest{
+@Test
+ public void streamReader2Neo4j() {
+
+ deleteHistoryIfExist();
+
+ String path = "/streamreader2neo4j.json";
+ String jobPath = PathUtil.getAbsolutePathFromClassPath(path);
+
+ ExampleContainer.start(jobPath);
+
+ //根据channel和reader的mock数据,校验结果集是否符合预期
+ verifyWriteResult();
+ }
+}
+```
\ No newline at end of file
diff --git a/datax-example/doc/img/img01.png b/datax-example/doc/img/img01.png
new file mode 100644
index 00000000..d0431c1a
Binary files /dev/null and b/datax-example/doc/img/img01.png differ
diff --git a/datax-example/doc/img/img02.png b/datax-example/doc/img/img02.png
new file mode 100644
index 00000000..eec860d4
Binary files /dev/null and b/datax-example/doc/img/img02.png differ
diff --git a/datax-example/doc/img/img03.png b/datax-example/doc/img/img03.png
new file mode 100644
index 00000000..731f81bd
Binary files /dev/null and b/datax-example/doc/img/img03.png differ
diff --git a/datax-example/pom.xml b/datax-example/pom.xml
new file mode 100644
index 00000000..9c4c9200
--- /dev/null
+++ b/datax-example/pom.xml
@@ -0,0 +1,68 @@
+
+
+ 4.0.0
+
+ com.alibaba.datax
+ datax-all
+ 0.0.1-SNAPSHOT
+
+
+ datax-example
+ pom
+
+ datax-example-core
+ datax-example-streamreader
+ datax-example-neo4j
+
+
+
+ 8
+ 8
+ UTF-8
+ 4.13.2
+
+
+
+ com.alibaba.datax
+ datax-common
+ 0.0.1-SNAPSHOT
+
+
+ com.alibaba.datax
+ datax-core
+ 0.0.1-SNAPSHOT
+
+
+ junit
+ junit
+ ${junit4.version}
+ test
+
+
+
+
+
+
+ src/main/resources
+
+ **/*.*
+
+ true
+
+
+
+
+
+ maven-compiler-plugin
+
+ ${jdk-version}
+ ${jdk-version}
+ ${project-sourceEncoding}
+
+
+
+
+
+
\ No newline at end of file
diff --git a/dataxPluginDev.md b/dataxPluginDev.md
index 4483f270..8c7241bf 100644
--- a/dataxPluginDev.md
+++ b/dataxPluginDev.md
@@ -447,6 +447,9 @@ DataX的内部类型在实现上会选用不同的java类型:
3. 用户在插件中在`reader`/`writer`配置的`name`字段指定插件名字。框架根据插件的类型(`reader`/`writer`)和插件名称去插件的路径下扫描所有的jar,加入`classpath`。
4. 根据插件配置中定义的入口类,框架通过反射实例化对应的`Job`和`Task`对象。
+### 编写测试用例
+1. 在datax-example工程下新建新的插件测试模块,调用`ExampleContainer.start(jobPath)`方法来检测你的代码逻辑是否正确。[datax-example使用](https://github.com/alibaba/DataX/blob/master/datax-example/doc/README.md)
+
## 三、Last but not Least
diff --git a/dorisreader/doc/dorisreader.md b/dorisreader/doc/dorisreader.md
new file mode 100644
index 00000000..c249c178
--- /dev/null
+++ b/dorisreader/doc/dorisreader.md
@@ -0,0 +1,224 @@
+# DorisReader 插件文档
+
+___
+
+## 1 快速介绍
+
+DorisReader插件实现了从Doris读取数据。在底层实现上,DorisReader通过JDBC连接远程Doris数据库,并执行相应的sql语句将数据从doris库中SELECT出来。
+
+## 2 实现原理
+
+简而言之,DorisReader通过JDBC连接器连接到远程的Doris数据库,并根据用户配置的信息生成查询SELECT
+SQL语句,然后发送到远程Doris数据库,并将该SQL执行返回结果使用DataX自定义的数据类型拼装为抽象的数据集,并传递给下游Writer处理。
+
+对于用户配置Table、Column、Where的信息,DorisReader将其拼接为SQL语句发送到Doris数据库;对于用户配置querySql信息,DorisReader直接将其发送到Doris数据库。
+
+## 3 功能说明
+
+### 3.1 配置样例
+
+* 配置一个从Doris数据库同步抽取数据到本地的作业:
+
+```
+{
+ "job": {
+ "setting": {
+ "speed": {
+ "channel": 3
+ },
+ "errorLimit": {
+ "record": 0,
+ "percentage": 0.02
+ }
+ },
+ "content": [
+ {
+ "reader": {
+ "name": "dorisreader",
+ "parameter": {
+ "username": "root",
+ "password": "root",
+ "column": [
+ "id",
+ "name"
+ ],
+ "splitPk": "db_id",
+ "connection": [
+ {
+ "table": [
+ "table"
+ ],
+ "jdbcUrl": [
+ "jdbc:Doris://127.0.0.1:9030/database"
+ ]
+ }
+ ]
+ }
+ },
+ "writer": {
+ "name": "streamwriter",
+ "parameter": {
+ "print":true
+ }
+ }
+ }
+ ]
+ }
+}
+
+```
+
+* 配置一个自定义SQL的数据库同步任务到本地内容的作业:
+
+```
+{
+ "job": {
+ "setting": {
+ "speed": {
+ "channel":1
+ }
+ },
+ "content": [
+ {
+ "reader": {
+ "name": "dorisreader",
+ "parameter": {
+ "username": "root",
+ "password": "root",
+ "connection": [
+ {
+ "querySql": [
+ "select db_id,on_line_flag from db_info where db_id < 10;",
+ "select db_id,on_line_flag from db_info where db_id >= 10;"
+
+ ],
+ "jdbcUrl": [
+ "jdbc:Doris://127.0.0.1:9030/database"
+ ]
+ }
+ ]
+ }
+ },
+ "writer": {
+ "name": "streamwriter",
+ "parameter": {
+ "print": false,
+ "encoding": "UTF-8"
+ }
+ }
+ }
+ ]
+ }
+}
+```
+
+### 3.2 参数说明
+
+* **jdbcUrl**
+
+ *
+ 描述:描述的是到对端数据库的JDBC连接信息,使用JSON的数组描述,并支持一个库填写多个连接地址。之所以使用JSON数组描述连接信息,是因为阿里集团内部支持多个IP探测,如果配置了多个,DorisReader可以依次探测ip的可连接性,直到选择一个合法的IP。如果全部连接失败,DorisReader报错。
+ 注意,jdbcUrl必须包含在connection配置单元中。对于阿里集团外部使用情况,JSON数组填写一个JDBC连接即可。
+
+ * 必选:是
+
+ * 默认值:无
+
+* **username**
+
+ * 描述:数据源的用户名
+
+ * 必选:是
+
+ * 默认值:无
+
+* **password**
+
+ * 描述:数据源指定用户名的密码
+
+ * 必选:是
+
+ * 默认值:无
+
+* **table**
+
+ *
+ 描述:所选取的需要同步的表。使用JSON的数组描述,因此支持多张表同时抽取。当配置为多张表时,用户自己需保证多张表是同一schema结构,DorisReader不予检查表是否同一逻辑表。注意,table必须包含在connection配置单元中。
+
+ * 必选:是
+
+ * 默认值:无
+
+* **column**
+
+ * 描述:所配置的表中需要同步的列名集合,使用JSON的数组描述字段信息。用户使用\*代表默认使用所有列配置,例如['\*']。
+
+ 支持列裁剪,即列可以挑选部分列进行导出。
+
+ 支持列换序,即列可以不按照表schema信息进行导出。
+
+ 支持常量配置,用户需要按照Doris SQL语法格式:
+ ["id", "\`table\`", "1", "'bazhen.csy'", "null", "to_char(a + 1)", "2.3" , "true"]
+ id为普通列名,\`table\`为包含保留字的列名,1为整形数字常量,'bazhen.csy'为字符串常量,null为空指针,to_char(a + 1)为表达式,2.3为浮点数,true为布尔值。
+
+ * 必选:是
+
+ * 默认值:无
+
+* **splitPk**
+
+ * 描述:DorisReader进行数据抽取时,如果指定splitPk,表示用户希望使用splitPk代表的字段进行数据分片,DataX因此会启动并发任务进行数据同步,这样可以大大提供数据同步的效能。
+
+ 推荐splitPk用户使用表主键,因为表主键通常情况下比较均匀,因此切分出来的分片也不容易出现数据热点。
+
+ 目前splitPk仅支持整形数据切分,`不支持浮点、字符串、日期等其他类型`。如果用户指定其他非支持类型,DorisReader将报错!
+
+ 如果splitPk不填写,包括不提供splitPk或者splitPk值为空,DataX视作使用单通道同步该表数据。
+
+ * 必选:否
+
+ * 默认值:空
+
+* **where**
+
+ * 描述:筛选条件,DorisReader根据指定的column、table、where条件拼接SQL,并根据这个SQL进行数据抽取。在实际业务场景中,往往会选择当天的数据进行同步,可以将where条件指定为gmt_create >
+ $bizdate 。注意:不可以将where条件指定为limit 10,limit不是SQL的合法where子句。
+
+ where条件可以有效地进行业务增量同步。如果不填写where语句,包括不提供where的key或者value,DataX均视作同步全量数据。
+
+ * 必选:否
+
+ * 默认值:无
+
+* **querySql**
+
+ *
+ 描述:在有些业务场景下,where这一配置项不足以描述所筛选的条件,用户可以通过该配置型来自定义筛选SQL。当用户配置了这一项之后,DataX系统就会忽略table,column这些配置型,直接使用这个配置项的内容对数据进行筛选,例如需要进行多表join后同步数据,使用select
+ a,b from table_a join table_b on table_a.id = table_b.id
+
+ `当用户配置querySql时,DorisReader直接忽略table、column、where条件的配置`,querySql优先级大于table、column、where选项。
+
+ * 必选:否
+
+ * 默认值:无
+
+### 3.3 类型转换
+
+目前DorisReader支持大部分Doris类型,但也存在部分个别类型没有支持的情况,请注意检查你的类型。
+
+下面列出DorisReaderr针对Doris类型转换列表:
+
+| DataX 内部类型| doris 数据类型 |
+| -------- |-------------------------------------------------------|
+| Long | int, tinyint, smallint, int, bigint,Largint |
+| Double | float, double, decimal |
+| String | varchar, char, text, string, map, json, array, struct |
+| Date | date, datetime |
+| Boolean | Boolean |
+
+请注意:
+
+* `tinyint(1) DataX视作为整形`。
+
+
+
diff --git a/dorisreader/pom.xml b/dorisreader/pom.xml
new file mode 100755
index 00000000..15a025b6
--- /dev/null
+++ b/dorisreader/pom.xml
@@ -0,0 +1,81 @@
+
+
+ 4.0.0
+
+ com.alibaba.datax
+ datax-all
+ 0.0.1-SNAPSHOT
+
+ dorisreader
+ dorisreader
+ jar
+
+
+
+ com.alibaba.datax
+ datax-common
+ ${datax-project-version}
+
+
+ slf4j-log4j12
+ org.slf4j
+
+
+
+
+ org.slf4j
+ slf4j-api
+
+
+ ch.qos.logback
+ logback-classic
+
+
+
+ com.alibaba.datax
+ plugin-rdbms-util
+ ${datax-project-version}
+
+
+ mysql
+ mysql-connector-java
+ ${mysql.driver.version}
+
+
+
+
+
+
+
+
+
+ maven-compiler-plugin
+
+ ${jdk-version}
+ ${jdk-version}
+ ${project-sourceEncoding}
+
+
+
+
+ maven-assembly-plugin
+
+
+ src/main/assembly/package.xml
+
+ datax
+
+
+
+ dwzip
+ package
+
+ single
+
+
+
+
+
+
+
diff --git a/dorisreader/src/main/assembly/package.xml b/dorisreader/src/main/assembly/package.xml
new file mode 100755
index 00000000..724613f9
--- /dev/null
+++ b/dorisreader/src/main/assembly/package.xml
@@ -0,0 +1,35 @@
+
+
+
+ dir
+
+ false
+
+
+ src/main/resources
+
+ plugin.json
+ plugin_job_template.json
+
+ plugin/reader/dorisreader
+
+
+ target/
+
+ dorisreader-0.0.1-SNAPSHOT.jar
+
+ plugin/reader/dorisreader
+
+
+
+
+
+ false
+ plugin/reader/dorisreader/libs
+ runtime
+
+
+
diff --git a/dorisreader/src/main/java/com/alibaba/datax/plugin/reader/dorisreader/DorisReader.java b/dorisreader/src/main/java/com/alibaba/datax/plugin/reader/dorisreader/DorisReader.java
new file mode 100755
index 00000000..56a44316
--- /dev/null
+++ b/dorisreader/src/main/java/com/alibaba/datax/plugin/reader/dorisreader/DorisReader.java
@@ -0,0 +1,94 @@
+package com.alibaba.datax.plugin.reader.dorisreader;
+
+import com.alibaba.datax.common.plugin.RecordSender;
+import com.alibaba.datax.common.spi.Reader;
+import com.alibaba.datax.common.util.Configuration;
+import com.alibaba.datax.plugin.rdbms.reader.CommonRdbmsReader;
+import com.alibaba.datax.plugin.rdbms.reader.Constant;
+import com.alibaba.datax.plugin.rdbms.util.DataBaseType;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.util.List;
+
+public class DorisReader extends Reader {
+
+ private static final DataBaseType DATABASE_TYPE = DataBaseType.Doris;
+
+ public static class Job extends Reader.Job {
+ private static final Logger LOG = LoggerFactory
+ .getLogger(Job.class);
+
+ private Configuration originalConfig = null;
+ private CommonRdbmsReader.Job commonRdbmsReaderJob;
+
+ @Override
+ public void init() {
+ this.originalConfig = super.getPluginJobConf();
+
+ Integer fetchSize = this.originalConfig.getInt(Constant.FETCH_SIZE,Integer.MIN_VALUE);
+ this.originalConfig.set(Constant.FETCH_SIZE, fetchSize);
+
+ this.commonRdbmsReaderJob = new CommonRdbmsReader.Job(DATABASE_TYPE);
+ this.commonRdbmsReaderJob.init(this.originalConfig);
+ }
+
+ @Override
+ public void preCheck(){
+ init();
+ this.commonRdbmsReaderJob.preCheck(this.originalConfig,DATABASE_TYPE);
+
+ }
+
+ @Override
+ public List split(int adviceNumber) {
+ return this.commonRdbmsReaderJob.split(this.originalConfig, adviceNumber);
+
+ }
+
+ @Override
+ public void post() {
+ this.commonRdbmsReaderJob.post(this.originalConfig);
+ }
+
+ @Override
+ public void destroy() {
+ this.commonRdbmsReaderJob.destroy(this.originalConfig);
+ }
+
+ }
+
+ public static class Task extends Reader.Task {
+
+ private Configuration readerSliceConfig;
+ private CommonRdbmsReader.Task commonRdbmsReaderTask;
+
+ @Override
+ public void init() {
+ this.readerSliceConfig = super.getPluginJobConf();
+ this.commonRdbmsReaderTask = new CommonRdbmsReader.Task(DATABASE_TYPE,super.getTaskGroupId(), super.getTaskId());
+ this.commonRdbmsReaderTask.init(this.readerSliceConfig);
+
+ }
+
+ @Override
+ public void startRead(RecordSender recordSender) {
+ int fetchSize = this.readerSliceConfig.getInt(Constant.FETCH_SIZE);
+
+ this.commonRdbmsReaderTask.startRead(this.readerSliceConfig, recordSender,
+ super.getTaskPluginCollector(), fetchSize);
+ }
+
+ @Override
+ public void post() {
+ this.commonRdbmsReaderTask.post(this.readerSliceConfig);
+ }
+
+ @Override
+ public void destroy() {
+ this.commonRdbmsReaderTask.destroy(this.readerSliceConfig);
+ }
+
+ }
+
+}
diff --git a/elasticsearchwriter/src/main/java/com/alibaba/datax/plugin/writer/elasticsearchwriter/ESWriterErrorCode.java b/dorisreader/src/main/java/com/alibaba/datax/plugin/reader/dorisreader/DorisReaderErrorCode.java
old mode 100644
new mode 100755
similarity index 50%
rename from elasticsearchwriter/src/main/java/com/alibaba/datax/plugin/writer/elasticsearchwriter/ESWriterErrorCode.java
rename to dorisreader/src/main/java/com/alibaba/datax/plugin/reader/dorisreader/DorisReaderErrorCode.java
index 59dcbd0a..f9a8c449
--- a/elasticsearchwriter/src/main/java/com/alibaba/datax/plugin/writer/elasticsearchwriter/ESWriterErrorCode.java
+++ b/dorisreader/src/main/java/com/alibaba/datax/plugin/reader/dorisreader/DorisReaderErrorCode.java
@@ -1,20 +1,14 @@
-package com.alibaba.datax.plugin.writer.elasticsearchwriter;
+package com.alibaba.datax.plugin.reader.dorisreader;
import com.alibaba.datax.common.spi.ErrorCode;
-public enum ESWriterErrorCode implements ErrorCode {
- BAD_CONFIG_VALUE("ESWriter-00", "您配置的值不合法."),
- ES_INDEX_DELETE("ESWriter-01", "删除index错误."),
- ES_INDEX_CREATE("ESWriter-02", "创建index错误."),
- ES_MAPPINGS("ESWriter-03", "mappings错误."),
- ES_INDEX_INSERT("ESWriter-04", "插入数据错误."),
- ES_ALIAS_MODIFY("ESWriter-05", "别名修改错误."),
+public enum DorisReaderErrorCode implements ErrorCode {
;
private final String code;
private final String description;
- ESWriterErrorCode(String code, String description) {
+ private DorisReaderErrorCode(String code, String description) {
this.code = code;
this.description = description;
}
@@ -34,4 +28,4 @@ public enum ESWriterErrorCode implements ErrorCode {
return String.format("Code:[%s], Description:[%s]. ", this.code,
this.description);
}
-}
\ No newline at end of file
+}
diff --git a/dorisreader/src/main/resources/plugin.json b/dorisreader/src/main/resources/plugin.json
new file mode 100755
index 00000000..981d1af8
--- /dev/null
+++ b/dorisreader/src/main/resources/plugin.json
@@ -0,0 +1,6 @@
+{
+ "name": "dorisreader",
+ "class": "com.alibaba.datax.plugin.reader.dorisreader.DorisReader",
+ "description": "useScene: prod. mechanism: Jdbc connection using the database, execute select sql, retrieve data from the ResultSet. warn: The more you know about the database, the less problems you encounter.",
+ "developer": "alibaba"
+}
\ No newline at end of file
diff --git a/dorisreader/src/main/resources/plugin_job_template.json b/dorisreader/src/main/resources/plugin_job_template.json
new file mode 100644
index 00000000..2e3d9fa8
--- /dev/null
+++ b/dorisreader/src/main/resources/plugin_job_template.json
@@ -0,0 +1,15 @@
+{
+ "name": "dorisreader",
+ "parameter": {
+ "username": "",
+ "password": "",
+ "column": [],
+ "connection": [
+ {
+ "jdbcUrl": [],
+ "table": []
+ }
+ ],
+ "where": ""
+ }
+}
\ No newline at end of file
diff --git a/doriswriter/doc/doriswriter.md b/doriswriter/doc/doriswriter.md
new file mode 100644
index 00000000..58a688b8
--- /dev/null
+++ b/doriswriter/doc/doriswriter.md
@@ -0,0 +1,181 @@
+# DorisWriter 插件文档
+
+## 1 快速介绍
+DorisWriter支持将大批量数据写入Doris中。
+
+## 2 实现原理
+DorisWriter 通过Doris原生支持Stream load方式导入数据, DorisWriter会将`reader`读取的数据进行缓存在内存中,拼接成Json文本,然后批量导入至Doris。
+
+## 3 功能说明
+
+### 3.1 配置样例
+
+这里是一份从Stream读取数据后导入至Doris的配置文件。
+
+```
+{
+ "job": {
+ "content": [
+ {
+ "reader": {
+ "name": "mysqlreader",
+ "parameter": {
+ "column": ["emp_no", "birth_date", "first_name","last_name","gender","hire_date"],
+ "connection": [
+ {
+ "jdbcUrl": ["jdbc:mysql://localhost:3306/demo"],
+ "table": ["employees_1"]
+ }
+ ],
+ "username": "root",
+ "password": "xxxxx",
+ "where": ""
+ }
+ },
+ "writer": {
+ "name": "doriswriter",
+ "parameter": {
+ "loadUrl": ["172.16.0.13:8030"],
+ "loadProps": {
+ },
+ "column": ["emp_no", "birth_date", "first_name","last_name","gender","hire_date"],
+ "username": "root",
+ "password": "xxxxxx",
+ "postSql": ["select count(1) from all_employees_info"],
+ "preSql": [],
+ "flushInterval":30000,
+ "connection": [
+ {
+ "jdbcUrl": "jdbc:mysql://172.16.0.13:9030/demo",
+ "selectedDatabase": "demo",
+ "table": ["all_employees_info"]
+ }
+ ],
+ "loadProps": {
+ "format": "json",
+ "strip_outer_array": true
+ }
+ }
+ }
+ }
+ ],
+ "setting": {
+ "speed": {
+ "channel": "1"
+ }
+ }
+ }
+}
+```
+
+### 3.2 参数说明
+
+* **jdbcUrl**
+
+ - 描述:Doris 的 JDBC 连接串,用户执行 preSql 或 postSQL。
+ - 必选:是
+ - 默认值:无
+
+* **loadUrl**
+
+ - 描述:作为 Stream Load 的连接目标。格式为 "ip:port"。其中 IP 是 FE 节点 IP,port 是 FE 节点的 http_port。可以填写多个,多个之间使用英文状态的分号隔开:`;`,doriswriter 将以轮询的方式访问。
+ - 必选:是
+ - 默认值:无
+
+* **username**
+
+ - 描述:访问Doris数据库的用户名
+ - 必选:是
+ - 默认值:无
+
+* **password**
+
+ - 描述:访问Doris数据库的密码
+ - 必选:否
+ - 默认值:空
+
+* **connection.selectedDatabase**
+ - 描述:需要写入的Doris数据库名称。
+ - 必选:是
+ - 默认值:无
+
+* **connection.table**
+ - 描述:需要写入的Doris表名称。
+ - 必选:是
+ - 默认值:无
+
+* **column**
+
+ - 描述:目的表**需要写入数据**的字段,这些字段将作为生成的 Json 数据的字段名。字段之间用英文逗号分隔。例如: "column": ["id","name","age"]。
+ - 必选:是
+ - 默认值:否
+
+* **preSql**
+
+ - 描述:写入数据到目的表前,会先执行这里的标准语句。
+ - 必选:否
+ - 默认值:无
+
+* **postSql**
+
+ - 描述:写入数据到目的表后,会执行这里的标准语句。
+ - 必选:否
+ - 默认值:无
+
+
+* **maxBatchRows**
+
+ - 描述:每批次导入数据的最大行数。和 **batchSize** 共同控制每批次的导入数量。每批次数据达到两个阈值之一,即开始导入这一批次的数据。
+ - 必选:否
+ - 默认值:500000
+
+* **batchSize**
+
+ - 描述:每批次导入数据的最大数据量。和 **maxBatchRows** 共同控制每批次的导入数量。每批次数据达到两个阈值之一,即开始导入这一批次的数据。
+ - 必选:否
+ - 默认值:104857600
+
+* **maxRetries**
+
+ - 描述:每批次导入数据失败后的重试次数。
+ - 必选:否
+ - 默认值:0
+
+* **labelPrefix**
+
+ - 描述:每批次导入任务的 label 前缀。最终的 label 将有 `labelPrefix + UUID` 组成全局唯一的 label,确保数据不会重复导入
+ - 必选:否
+ - 默认值:`datax_doris_writer_`
+
+* **loadProps**
+
+ - 描述:StreamLoad 的请求参数,详情参照StreamLoad介绍页面。[Stream load - Apache Doris](https://doris.apache.org/zh-CN/docs/data-operate/import/import-way/stream-load-manual)
+
+ 这里包括导入的数据格式:format等,导入数据格式默认我们使用csv,支持JSON,具体可以参照下面类型转换部分,也可以参照上面Stream load 官方信息
+
+ - 必选:否
+
+ - 默认值:无
+
+### 类型转换
+
+默认传入的数据均会被转为字符串,并以`\t`作为列分隔符,`\n`作为行分隔符,组成`csv`文件进行StreamLoad导入操作。
+
+默认是csv格式导入,如需更改列分隔符, 则正确配置 `loadProps` 即可:
+
+```json
+"loadProps": {
+ "column_separator": "\\x01",
+ "line_delimiter": "\\x02"
+}
+```
+
+如需更改导入格式为`json`, 则正确配置 `loadProps` 即可:
+```json
+"loadProps": {
+ "format": "json",
+ "strip_outer_array": true
+}
+```
+
+更多信息请参照 Doris 官网:[Stream load - Apache Doris](https://doris.apache.org/zh-CN/docs/data-operate/import/import-way/stream-load-manual)
\ No newline at end of file
diff --git a/doriswriter/doc/mysql2doris.json b/doriswriter/doc/mysql2doris.json
new file mode 100644
index 00000000..5810d6db
--- /dev/null
+++ b/doriswriter/doc/mysql2doris.json
@@ -0,0 +1,48 @@
+{
+ "job": {
+ "content": [
+ {
+ "reader": {
+ "name": "mysqlreader",
+ "parameter": {
+ "column": ["k1", "k2", "k3"],
+ "connection": [
+ {
+ "jdbcUrl": ["jdbc:mysql://192.168.10.10:3306/db1"],
+ "table": ["t1"]
+ }
+ ],
+ "username": "root",
+ "password": "",
+ "where": ""
+ }
+ },
+ "writer": {
+ "name": "doriswriter",
+ "parameter": {
+ "loadUrl": ["192.168.1.1:8030"],
+ "loadProps": {},
+ "database": "db1",
+ "column": ["k1", "k2", "k3"],
+ "username": "root",
+ "password": "",
+ "postSql": [],
+ "preSql": [],
+ "connection": [
+ {
+ "jdbcUrl":"jdbc:mysql://192.168.1.1:9030/",
+ "table":["xxx"],
+ "selectedDatabase":"xxxx"
+ }
+ ]
+ }
+ }
+ }
+ ],
+ "setting": {
+ "speed": {
+ "channel": "1"
+ }
+ }
+ }
+}
diff --git a/doriswriter/pom.xml b/doriswriter/pom.xml
new file mode 100644
index 00000000..aa1e6ff0
--- /dev/null
+++ b/doriswriter/pom.xml
@@ -0,0 +1,99 @@
+
+
+
+
+ datax-all
+ com.alibaba.datax
+ 0.0.1-SNAPSHOT
+
+ 4.0.0
+ doriswriter
+ doriswriter
+ jar
+
+
+ com.alibaba.datax
+ datax-common
+ ${datax-project-version}
+
+
+ slf4j-log4j12
+ org.slf4j
+
+
+
+
+ org.slf4j
+ slf4j-api
+
+
+ ch.qos.logback
+ logback-classic
+
+
+ com.alibaba.datax
+ plugin-rdbms-util
+ ${datax-project-version}
+
+
+ mysql
+ mysql-connector-java
+ ${mysql.driver.version}
+
+
+ org.apache.httpcomponents
+ httpclient
+ 4.5.13
+
+
+
+
+
+
+ maven-compiler-plugin
+
+ ${jdk-version}
+ ${jdk-version}
+ ${project-sourceEncoding}
+
+
+
+
+ maven-assembly-plugin
+
+
+ src/main/assembly/package.xml
+
+ datax
+
+
+
+ dwzip
+ package
+
+ single
+
+
+
+
+
+
+
diff --git a/doriswriter/src/main/assembly/package.xml b/doriswriter/src/main/assembly/package.xml
new file mode 100644
index 00000000..71596332
--- /dev/null
+++ b/doriswriter/src/main/assembly/package.xml
@@ -0,0 +1,52 @@
+
+
+
+
+
+ dir
+
+ false
+
+
+ src/main/resources
+
+ plugin.json
+ plugin_job_template.json
+
+ plugin/writer/doriswriter
+
+
+ target/
+
+ doriswriter-0.0.1-SNAPSHOT.jar
+
+ plugin/writer/doriswriter
+
+
+
+
+ false
+ plugin/writer/doriswriter/libs
+ runtime
+
+
+
diff --git a/doriswriter/src/main/java/com/alibaba/datax/plugin/writer/doriswriter/DelimiterParser.java b/doriswriter/src/main/java/com/alibaba/datax/plugin/writer/doriswriter/DelimiterParser.java
new file mode 100644
index 00000000..e84bd7dd
--- /dev/null
+++ b/doriswriter/src/main/java/com/alibaba/datax/plugin/writer/doriswriter/DelimiterParser.java
@@ -0,0 +1,54 @@
+package com.alibaba.datax.plugin.writer.doriswriter;
+
+import com.google.common.base.Strings;
+
+import java.io.StringWriter;
+
+public class DelimiterParser {
+
+ private static final String HEX_STRING = "0123456789ABCDEF";
+
+ public static String parse(String sp, String dSp) throws RuntimeException {
+ if ( Strings.isNullOrEmpty(sp)) {
+ return dSp;
+ }
+ if (!sp.toUpperCase().startsWith("\\X")) {
+ return sp;
+ }
+ String hexStr = sp.substring(2);
+ // check hex str
+ if (hexStr.isEmpty()) {
+ throw new RuntimeException("Failed to parse delimiter: `Hex str is empty`");
+ }
+ if (hexStr.length() % 2 != 0) {
+ throw new RuntimeException("Failed to parse delimiter: `Hex str length error`");
+ }
+ for (char hexChar : hexStr.toUpperCase().toCharArray()) {
+ if (HEX_STRING.indexOf(hexChar) == -1) {
+ throw new RuntimeException("Failed to parse delimiter: `Hex str format error`");
+ }
+ }
+ // transform to separator
+ StringWriter writer = new StringWriter();
+ for (byte b : hexStrToBytes(hexStr)) {
+ writer.append((char) b);
+ }
+ return writer.toString();
+ }
+
+ private static byte[] hexStrToBytes(String hexStr) {
+ String upperHexStr = hexStr.toUpperCase();
+ int length = upperHexStr.length() / 2;
+ char[] hexChars = upperHexStr.toCharArray();
+ byte[] bytes = new byte[length];
+ for (int i = 0; i < length; i++) {
+ int pos = i * 2;
+ bytes[i] = (byte) (charToByte(hexChars[pos]) << 4 | charToByte(hexChars[pos + 1]));
+ }
+ return bytes;
+ }
+
+ private static byte charToByte(char c) {
+ return (byte) HEX_STRING.indexOf(c);
+ }
+}
diff --git a/doriswriter/src/main/java/com/alibaba/datax/plugin/writer/doriswriter/DorisBaseCodec.java b/doriswriter/src/main/java/com/alibaba/datax/plugin/writer/doriswriter/DorisBaseCodec.java
new file mode 100644
index 00000000..ee7ded56
--- /dev/null
+++ b/doriswriter/src/main/java/com/alibaba/datax/plugin/writer/doriswriter/DorisBaseCodec.java
@@ -0,0 +1,23 @@
+package com.alibaba.datax.plugin.writer.doriswriter;
+
+import com.alibaba.datax.common.element.Column;
+
+public class DorisBaseCodec {
+ protected String convertionField( Column col) {
+ if (null == col.getRawData() || Column.Type.NULL == col.getType()) {
+ return null;
+ }
+ if ( Column.Type.BOOL == col.getType()) {
+ return String.valueOf(col.asLong());
+ }
+ if ( Column.Type.BYTES == col.getType()) {
+ byte[] bts = (byte[])col.getRawData();
+ long value = 0;
+ for (int i = 0; i < bts.length; i++) {
+ value += (bts[bts.length - i - 1] & 0xffL) << (8 * i);
+ }
+ return String.valueOf(value);
+ }
+ return col.asString();
+ }
+}
diff --git a/doriswriter/src/main/java/com/alibaba/datax/plugin/writer/doriswriter/DorisCodec.java b/doriswriter/src/main/java/com/alibaba/datax/plugin/writer/doriswriter/DorisCodec.java
new file mode 100644
index 00000000..a2437a1c
--- /dev/null
+++ b/doriswriter/src/main/java/com/alibaba/datax/plugin/writer/doriswriter/DorisCodec.java
@@ -0,0 +1,10 @@
+package com.alibaba.datax.plugin.writer.doriswriter;
+
+import com.alibaba.datax.common.element.Record;
+
+import java.io.Serializable;
+
+public interface DorisCodec extends Serializable {
+
+ String codec( Record row);
+}
diff --git a/doriswriter/src/main/java/com/alibaba/datax/plugin/writer/doriswriter/DorisCodecFactory.java b/doriswriter/src/main/java/com/alibaba/datax/plugin/writer/doriswriter/DorisCodecFactory.java
new file mode 100644
index 00000000..22c4b409
--- /dev/null
+++ b/doriswriter/src/main/java/com/alibaba/datax/plugin/writer/doriswriter/DorisCodecFactory.java
@@ -0,0 +1,19 @@
+package com.alibaba.datax.plugin.writer.doriswriter;
+
+import java.util.Map;
+
+public class DorisCodecFactory {
+ public DorisCodecFactory (){
+
+ }
+ public static DorisCodec createCodec( Keys writerOptions) {
+ if ( Keys.StreamLoadFormat.CSV.equals(writerOptions.getStreamLoadFormat())) {
+ Map props = writerOptions.getLoadProps();
+ return new DorisCsvCodec (null == props || !props.containsKey("column_separator") ? null : String.valueOf(props.get("column_separator")));
+ }
+ if ( Keys.StreamLoadFormat.JSON.equals(writerOptions.getStreamLoadFormat())) {
+ return new DorisJsonCodec (writerOptions.getColumns());
+ }
+ throw new RuntimeException("Failed to create row serializer, unsupported `format` from stream load properties.");
+ }
+}
diff --git a/doriswriter/src/main/java/com/alibaba/datax/plugin/writer/doriswriter/DorisCsvCodec.java b/doriswriter/src/main/java/com/alibaba/datax/plugin/writer/doriswriter/DorisCsvCodec.java
new file mode 100644
index 00000000..518aa304
--- /dev/null
+++ b/doriswriter/src/main/java/com/alibaba/datax/plugin/writer/doriswriter/DorisCsvCodec.java
@@ -0,0 +1,27 @@
+package com.alibaba.datax.plugin.writer.doriswriter;
+
+import com.alibaba.datax.common.element.Record;
+
+public class DorisCsvCodec extends DorisBaseCodec implements DorisCodec {
+
+ private static final long serialVersionUID = 1L;
+
+ private final String columnSeparator;
+
+ public DorisCsvCodec ( String sp) {
+ this.columnSeparator = DelimiterParser.parse(sp, "\t");
+ }
+
+ @Override
+ public String codec( Record row) {
+ StringBuilder sb = new StringBuilder();
+ for (int i = 0; i < row.getColumnNumber(); i++) {
+ String value = convertionField(row.getColumn(i));
+ sb.append(null == value ? "\\N" : value);
+ if (i < row.getColumnNumber() - 1) {
+ sb.append(columnSeparator);
+ }
+ }
+ return sb.toString();
+ }
+}
diff --git a/doriswriter/src/main/java/com/alibaba/datax/plugin/writer/doriswriter/DorisJsonCodec.java b/doriswriter/src/main/java/com/alibaba/datax/plugin/writer/doriswriter/DorisJsonCodec.java
new file mode 100644
index 00000000..68abd9eb
--- /dev/null
+++ b/doriswriter/src/main/java/com/alibaba/datax/plugin/writer/doriswriter/DorisJsonCodec.java
@@ -0,0 +1,33 @@
+package com.alibaba.datax.plugin.writer.doriswriter;
+
+import com.alibaba.datax.common.element.Record;
+import com.alibaba.fastjson2.JSON;
+
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+public class DorisJsonCodec extends DorisBaseCodec implements DorisCodec {
+
+ private static final long serialVersionUID = 1L;
+
+ private final List fieldNames;
+
+ public DorisJsonCodec ( List fieldNames) {
+ this.fieldNames = fieldNames;
+ }
+
+ @Override
+ public String codec( Record row) {
+ if (null == fieldNames) {
+ return "";
+ }
+ Map rowMap = new HashMap<> (fieldNames.size());
+ int idx = 0;
+ for (String fieldName : fieldNames) {
+ rowMap.put(fieldName, convertionField(row.getColumn(idx)));
+ idx++;
+ }
+ return JSON.toJSONString(rowMap);
+ }
+}
diff --git a/doriswriter/src/main/java/com/alibaba/datax/plugin/writer/doriswriter/DorisStreamLoadObserver.java b/doriswriter/src/main/java/com/alibaba/datax/plugin/writer/doriswriter/DorisStreamLoadObserver.java
new file mode 100644
index 00000000..e1f6e0ee
--- /dev/null
+++ b/doriswriter/src/main/java/com/alibaba/datax/plugin/writer/doriswriter/DorisStreamLoadObserver.java
@@ -0,0 +1,235 @@
+package com.alibaba.datax.plugin.writer.doriswriter;
+
+import com.alibaba.fastjson2.JSON;
+import org.apache.commons.codec.binary.Base64;
+import org.apache.http.HttpEntity;
+import org.apache.http.HttpHeaders;
+import org.apache.http.client.config.RequestConfig;
+import org.apache.http.client.methods.CloseableHttpResponse;
+import org.apache.http.client.methods.HttpGet;
+import org.apache.http.client.methods.HttpPut;
+import org.apache.http.entity.ByteArrayEntity;
+import org.apache.http.impl.client.CloseableHttpClient;
+import org.apache.http.impl.client.DefaultRedirectStrategy;
+import org.apache.http.impl.client.HttpClientBuilder;
+import org.apache.http.impl.client.HttpClients;
+import org.apache.http.util.EntityUtils;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.IOException;
+import java.net.HttpURLConnection;
+import java.net.URL;
+import java.nio.ByteBuffer;
+import java.nio.charset.StandardCharsets;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.concurrent.TimeUnit;
+import java.util.stream.Collectors;
+
+public class DorisStreamLoadObserver {
+ private static final Logger LOG = LoggerFactory.getLogger(DorisStreamLoadObserver.class);
+
+ private Keys options;
+
+ private long pos;
+ private static final String RESULT_FAILED = "Fail";
+ private static final String RESULT_LABEL_EXISTED = "Label Already Exists";
+ private static final String LAEBL_STATE_VISIBLE = "VISIBLE";
+ private static final String LAEBL_STATE_COMMITTED = "COMMITTED";
+ private static final String RESULT_LABEL_PREPARE = "PREPARE";
+ private static final String RESULT_LABEL_ABORTED = "ABORTED";
+ private static final String RESULT_LABEL_UNKNOWN = "UNKNOWN";
+
+
+ public DorisStreamLoadObserver ( Keys options){
+ this.options = options;
+ }
+
+ public void streamLoad(WriterTuple data) throws Exception {
+ String host = getLoadHost();
+ if(host == null){
+ throw new IOException ("load_url cannot be empty, or the host cannot connect.Please check your configuration.");
+ }
+ String loadUrl = new StringBuilder(host)
+ .append("/api/")
+ .append(options.getDatabase())
+ .append("/")
+ .append(options.getTable())
+ .append("/_stream_load")
+ .toString();
+ LOG.info("Start to join batch data: rows[{}] bytes[{}] label[{}].", data.getRows().size(), data.getBytes(), data.getLabel());
+ Map loadResult = put(loadUrl, data.getLabel(), addRows(data.getRows(), data.getBytes().intValue()));
+ LOG.info("StreamLoad response :{}",JSON.toJSONString(loadResult));
+ final String keyStatus = "Status";
+ if (null == loadResult || !loadResult.containsKey(keyStatus)) {
+ throw new IOException("Unable to flush data to Doris: unknown result status.");
+ }
+ LOG.debug("StreamLoad response:{}",JSON.toJSONString(loadResult));
+ if (RESULT_FAILED.equals(loadResult.get(keyStatus))) {
+ throw new IOException(
+ new StringBuilder("Failed to flush data to Doris.\n").append(JSON.toJSONString(loadResult)).toString()
+ );
+ } else if (RESULT_LABEL_EXISTED.equals(loadResult.get(keyStatus))) {
+ LOG.debug("StreamLoad response:{}",JSON.toJSONString(loadResult));
+ checkStreamLoadState(host, data.getLabel());
+ }
+ }
+
+ private void checkStreamLoadState(String host, String label) throws IOException {
+ int idx = 0;
+ while(true) {
+ try {
+ TimeUnit.SECONDS.sleep(Math.min(++idx, 5));
+ } catch (InterruptedException ex) {
+ break;
+ }
+ try (CloseableHttpClient httpclient = HttpClients.createDefault()) {
+ HttpGet httpGet = new HttpGet(new StringBuilder(host).append("/api/").append(options.getDatabase()).append("/get_load_state?label=").append(label).toString());
+ httpGet.setHeader("Authorization", getBasicAuthHeader(options.getUsername(), options.getPassword()));
+ httpGet.setHeader("Connection", "close");
+
+ try (CloseableHttpResponse resp = httpclient.execute(httpGet)) {
+ HttpEntity respEntity = getHttpEntity(resp);
+ if (respEntity == null) {
+ throw new IOException(String.format("Failed to flush data to Doris, Error " +
+ "could not get the final state of label[%s].\n", label), null);
+ }
+ Map result = (Map)JSON.parse(EntityUtils.toString(respEntity));
+ String labelState = (String)result.get("data");
+ if (null == labelState) {
+ throw new IOException(String.format("Failed to flush data to Doris, Error " +
+ "could not get the final state of label[%s]. response[%s]\n", label, EntityUtils.toString(respEntity)), null);
+ }
+ LOG.info(String.format("Checking label[%s] state[%s]\n", label, labelState));
+ switch(labelState) {
+ case LAEBL_STATE_VISIBLE:
+ case LAEBL_STATE_COMMITTED:
+ return;
+ case RESULT_LABEL_PREPARE:
+ continue;
+ case RESULT_LABEL_ABORTED:
+ throw new DorisWriterExcetion (String.format("Failed to flush data to Doris, Error " +
+ "label[%s] state[%s]\n", label, labelState), null, true);
+ case RESULT_LABEL_UNKNOWN:
+ default:
+ throw new IOException(String.format("Failed to flush data to Doris, Error " +
+ "label[%s] state[%s]\n", label, labelState), null);
+ }
+ }
+ }
+ }
+ }
+
+ private byte[] addRows(List rows, int totalBytes) {
+ if (Keys.StreamLoadFormat.CSV.equals(options.getStreamLoadFormat())) {
+ Map props = (options.getLoadProps() == null ? new HashMap<> () : options.getLoadProps());
+ byte[] lineDelimiter = DelimiterParser.parse((String)props.get("line_delimiter"), "\n").getBytes(StandardCharsets.UTF_8);
+ ByteBuffer bos = ByteBuffer.allocate(totalBytes + rows.size() * lineDelimiter.length);
+ for (byte[] row : rows) {
+ bos.put(row);
+ bos.put(lineDelimiter);
+ }
+ return bos.array();
+ }
+
+ if (Keys.StreamLoadFormat.JSON.equals(options.getStreamLoadFormat())) {
+ ByteBuffer bos = ByteBuffer.allocate(totalBytes + (rows.isEmpty() ? 2 : rows.size() + 1));
+ bos.put("[".getBytes(StandardCharsets.UTF_8));
+ byte[] jsonDelimiter = ",".getBytes(StandardCharsets.UTF_8);
+ boolean isFirstElement = true;
+ for (byte[] row : rows) {
+ if (!isFirstElement) {
+ bos.put(jsonDelimiter);
+ }
+ bos.put(row);
+ isFirstElement = false;
+ }
+ bos.put("]".getBytes(StandardCharsets.UTF_8));
+ return bos.array();
+ }
+ throw new RuntimeException("Failed to join rows data, unsupported `format` from stream load properties:");
+ }
+ private Map put(String loadUrl, String label, byte[] data) throws IOException {
+ LOG.info(String.format("Executing stream load to: '%s', size: '%s'", loadUrl, data.length));
+ final HttpClientBuilder httpClientBuilder = HttpClients.custom()
+ .setRedirectStrategy(new DefaultRedirectStrategy () {
+ @Override
+ protected boolean isRedirectable(String method) {
+ return true;
+ }
+ });
+ try ( CloseableHttpClient httpclient = httpClientBuilder.build()) {
+ HttpPut httpPut = new HttpPut(loadUrl);
+ httpPut.removeHeaders(HttpHeaders.CONTENT_LENGTH);
+ httpPut.removeHeaders(HttpHeaders.TRANSFER_ENCODING);
+ List cols = options.getColumns();
+ if (null != cols && !cols.isEmpty() && Keys.StreamLoadFormat.CSV.equals(options.getStreamLoadFormat())) {
+ httpPut.setHeader("columns", String.join(",", cols.stream().map(f -> String.format("`%s`", f)).collect(Collectors.toList())));
+ }
+ if (null != options.getLoadProps()) {
+ for (Map.Entry entry : options.getLoadProps().entrySet()) {
+ httpPut.setHeader(entry.getKey(), String.valueOf(entry.getValue()));
+ }
+ }
+ httpPut.setHeader("Expect", "100-continue");
+ httpPut.setHeader("label", label);
+ httpPut.setHeader("two_phase_commit", "false");
+ httpPut.setHeader("Authorization", getBasicAuthHeader(options.getUsername(), options.getPassword()));
+ httpPut.setEntity(new ByteArrayEntity(data));
+ httpPut.setConfig(RequestConfig.custom().setRedirectsEnabled(true).build());
+ try ( CloseableHttpResponse resp = httpclient.execute(httpPut)) {
+ HttpEntity respEntity = getHttpEntity(resp);
+ if (respEntity == null)
+ return null;
+ return (Map)JSON.parse(EntityUtils.toString(respEntity));
+ }
+ }
+ }
+
+ private String getBasicAuthHeader(String username, String password) {
+ String auth = username + ":" + password;
+ byte[] encodedAuth = Base64.encodeBase64(auth.getBytes(StandardCharsets.UTF_8));
+ return new StringBuilder("Basic ").append(new String(encodedAuth)).toString();
+ }
+
+ private HttpEntity getHttpEntity(CloseableHttpResponse resp) {
+ int code = resp.getStatusLine().getStatusCode();
+ if (200 != code) {
+ LOG.warn("Request failed with code:{}", code);
+ return null;
+ }
+ HttpEntity respEntity = resp.getEntity();
+ if (null == respEntity) {
+ LOG.warn("Request failed with empty response.");
+ return null;
+ }
+ return respEntity;
+ }
+
+ private String getLoadHost() {
+ List hostList = options.getLoadUrlList();
+ Collections.shuffle(hostList);
+ String host = new StringBuilder("http://").append(hostList.get((0))).toString();
+ if (checkConnection(host)){
+ return host;
+ }
+ return null;
+ }
+
+ private boolean checkConnection(String host) {
+ try {
+ URL url = new URL(host);
+ HttpURLConnection co = (HttpURLConnection) url.openConnection();
+ co.setConnectTimeout(5000);
+ co.connect();
+ co.disconnect();
+ return true;
+ } catch (Exception e1) {
+ e1.printStackTrace();
+ return false;
+ }
+ }
+}
diff --git a/doriswriter/src/main/java/com/alibaba/datax/plugin/writer/doriswriter/DorisUtil.java b/doriswriter/src/main/java/com/alibaba/datax/plugin/writer/doriswriter/DorisUtil.java
new file mode 100644
index 00000000..5f5a6f34
--- /dev/null
+++ b/doriswriter/src/main/java/com/alibaba/datax/plugin/writer/doriswriter/DorisUtil.java
@@ -0,0 +1,105 @@
+package com.alibaba.datax.plugin.writer.doriswriter;
+
+import com.alibaba.datax.plugin.rdbms.util.DBUtil;
+import com.alibaba.datax.plugin.rdbms.util.DataBaseType;
+import com.alibaba.datax.plugin.rdbms.util.RdbmsException;
+import com.alibaba.datax.plugin.rdbms.writer.Constant;
+import com.alibaba.druid.sql.parser.ParserException;
+import com.google.common.base.Strings;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.sql.Connection;
+import java.sql.ResultSet;
+import java.sql.Statement;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+
+/**
+ * jdbc util
+ */
+public class DorisUtil {
+ private static final Logger LOG = LoggerFactory.getLogger(DorisUtil.class);
+
+ private DorisUtil() {}
+
+ public static List getDorisTableColumns( Connection conn, String databaseName, String tableName) {
+ String currentSql = String.format("SELECT COLUMN_NAME FROM `information_schema`.`COLUMNS` WHERE `TABLE_SCHEMA` = '%s' AND `TABLE_NAME` = '%s' ORDER BY `ORDINAL_POSITION` ASC;", databaseName, tableName);
+ List columns = new ArrayList<> ();
+ ResultSet rs = null;
+ try {
+ rs = DBUtil.query(conn, currentSql);
+ while (DBUtil.asyncResultSetNext(rs)) {
+ String colName = rs.getString("COLUMN_NAME");
+ columns.add(colName);
+ }
+ return columns;
+ } catch (Exception e) {
+ throw RdbmsException.asQueryException(DataBaseType.MySql, e, currentSql, null, null);
+ } finally {
+ DBUtil.closeDBResources(rs, null, null);
+ }
+ }
+
+ public static List renderPreOrPostSqls(List preOrPostSqls, String tableName) {
+ if (null == preOrPostSqls) {
+ return Collections.emptyList();
+ }
+ List renderedSqls = new ArrayList<>();
+ for (String sql : preOrPostSqls) {
+ if (! Strings.isNullOrEmpty(sql)) {
+ renderedSqls.add(sql.replace(Constant.TABLE_NAME_PLACEHOLDER, tableName));
+ }
+ }
+ return renderedSqls;
+ }
+
+ public static void executeSqls(Connection conn, List sqls) {
+ Statement stmt = null;
+ String currentSql = null;
+ try {
+ stmt = conn.createStatement();
+ for (String sql : sqls) {
+ currentSql = sql;
+ DBUtil.executeSqlWithoutResultSet(stmt, sql);
+ }
+ } catch (Exception e) {
+ throw RdbmsException.asQueryException(DataBaseType.MySql, e, currentSql, null, null);
+ } finally {
+ DBUtil.closeDBResources(null, stmt, null);
+ }
+ }
+
+ public static void preCheckPrePareSQL( Keys options) {
+ String table = options.getTable();
+ List preSqls = options.getPreSqlList();
+ List renderedPreSqls = DorisUtil.renderPreOrPostSqls(preSqls, table);
+ if (null != renderedPreSqls && !renderedPreSqls.isEmpty()) {
+ LOG.info("Begin to preCheck preSqls:[{}].", String.join(";", renderedPreSqls));
+ for (String sql : renderedPreSqls) {
+ try {
+ DBUtil.sqlValid(sql, DataBaseType.MySql);
+ } catch ( ParserException e) {
+ throw RdbmsException.asPreSQLParserException(DataBaseType.MySql,e,sql);
+ }
+ }
+ }
+ }
+
+ public static void preCheckPostSQL( Keys options) {
+ String table = options.getTable();
+ List postSqls = options.getPostSqlList();
+ List renderedPostSqls = DorisUtil.renderPreOrPostSqls(postSqls, table);
+ if (null != renderedPostSqls && !renderedPostSqls.isEmpty()) {
+ LOG.info("Begin to preCheck postSqls:[{}].", String.join(";", renderedPostSqls));
+ for(String sql : renderedPostSqls) {
+ try {
+ DBUtil.sqlValid(sql, DataBaseType.MySql);
+ } catch (ParserException e){
+ throw RdbmsException.asPostSQLParserException(DataBaseType.MySql,e,sql);
+ }
+ }
+ }
+ }
+}
diff --git a/doriswriter/src/main/java/com/alibaba/datax/plugin/writer/doriswriter/DorisWriter.java b/doriswriter/src/main/java/com/alibaba/datax/plugin/writer/doriswriter/DorisWriter.java
new file mode 100644
index 00000000..b44d5440
--- /dev/null
+++ b/doriswriter/src/main/java/com/alibaba/datax/plugin/writer/doriswriter/DorisWriter.java
@@ -0,0 +1,164 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package com.alibaba.datax.plugin.writer.doriswriter;
+
+import com.alibaba.datax.common.element.Record;
+import com.alibaba.datax.common.exception.DataXException;
+import com.alibaba.datax.common.plugin.RecordReceiver;
+import com.alibaba.datax.common.spi.Writer;
+import com.alibaba.datax.common.util.Configuration;
+import com.alibaba.datax.plugin.rdbms.util.DBUtil;
+import com.alibaba.datax.plugin.rdbms.util.DBUtilErrorCode;
+import com.alibaba.datax.plugin.rdbms.util.DataBaseType;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.sql.Connection;
+import java.util.ArrayList;
+import java.util.List;
+
+/**
+ * doris data writer
+ */
+public class DorisWriter extends Writer {
+
+ public static class Job extends Writer.Job {
+
+ private static final Logger LOG = LoggerFactory.getLogger(Job.class);
+ private Configuration originalConfig = null;
+ private Keys options;
+
+ @Override
+ public void init() {
+ this.originalConfig = super.getPluginJobConf();
+ options = new Keys (super.getPluginJobConf());
+ options.doPretreatment();
+ }
+
+ @Override
+ public void preCheck(){
+ this.init();
+ DorisUtil.preCheckPrePareSQL(options);
+ DorisUtil.preCheckPostSQL(options);
+ }
+
+ @Override
+ public void prepare() {
+ String username = options.getUsername();
+ String password = options.getPassword();
+ String jdbcUrl = options.getJdbcUrl();
+ List renderedPreSqls = DorisUtil.renderPreOrPostSqls(options.getPreSqlList(), options.getTable());
+ if (null != renderedPreSqls && !renderedPreSqls.isEmpty()) {
+ Connection conn = DBUtil.getConnection(DataBaseType.MySql, jdbcUrl, username, password);
+ LOG.info("Begin to execute preSqls:[{}]. context info:{}.", String.join(";", renderedPreSqls), jdbcUrl);
+ DorisUtil.executeSqls(conn, renderedPreSqls);
+ DBUtil.closeDBResources(null, null, conn);
+ }
+ }
+
+ @Override
+ public List split(int mandatoryNumber) {
+ List configurations = new ArrayList<>(mandatoryNumber);
+ for (int i = 0; i < mandatoryNumber; i++) {
+ configurations.add(originalConfig);
+ }
+ return configurations;
+ }
+
+ @Override
+ public void post() {
+ String username = options.getUsername();
+ String password = options.getPassword();
+ String jdbcUrl = options.getJdbcUrl();
+ List renderedPostSqls = DorisUtil.renderPreOrPostSqls(options.getPostSqlList(), options.getTable());
+ if (null != renderedPostSqls && !renderedPostSqls.isEmpty()) {
+ Connection conn = DBUtil.getConnection(DataBaseType.MySql, jdbcUrl, username, password);
+ LOG.info("Start to execute preSqls:[{}]. context info:{}.", String.join(";", renderedPostSqls), jdbcUrl);
+ DorisUtil.executeSqls(conn, renderedPostSqls);
+ DBUtil.closeDBResources(null, null, conn);
+ }
+ }
+
+ @Override
+ public void destroy() {
+ }
+
+ }
+
+ public static class Task extends Writer.Task {
+ private DorisWriterManager writerManager;
+ private Keys options;
+ private DorisCodec rowCodec;
+
+ @Override
+ public void init() {
+ options = new Keys (super.getPluginJobConf());
+ if (options.isWildcardColumn()) {
+ Connection conn = DBUtil.getConnection(DataBaseType.MySql, options.getJdbcUrl(), options.getUsername(), options.getPassword());
+ List columns = DorisUtil.getDorisTableColumns(conn, options.getDatabase(), options.getTable());
+ options.setInfoCchemaColumns(columns);
+ }
+ writerManager = new DorisWriterManager(options);
+ rowCodec = DorisCodecFactory.createCodec(options);
+ }
+
+ @Override
+ public void prepare() {
+ }
+
+ public void startWrite(RecordReceiver recordReceiver) {
+ try {
+ Record record;
+ while ((record = recordReceiver.getFromReader()) != null) {
+ if (record.getColumnNumber() != options.getColumns().size()) {
+ throw DataXException
+ .asDataXException(
+ DBUtilErrorCode.CONF_ERROR,
+ String.format(
+ "There is an error in the column configuration information. " +
+ "This is because you have configured a task where the number of fields to be read from the source:%s " +
+ "is not equal to the number of fields to be written to the destination table:%s. " +
+ "Please check your configuration and make changes.",
+ record.getColumnNumber(),
+ options.getColumns().size()));
+ }
+ writerManager.writeRecord(rowCodec.codec(record));
+ }
+ } catch (Exception e) {
+ throw DataXException.asDataXException(DBUtilErrorCode.WRITE_DATA_ERROR, e);
+ }
+ }
+
+ @Override
+ public void post() {
+ try {
+ writerManager.close();
+ } catch (Exception e) {
+ throw DataXException.asDataXException(DBUtilErrorCode.WRITE_DATA_ERROR, e);
+ }
+ }
+
+ @Override
+ public void destroy() {}
+
+ @Override
+ public boolean supportFailOver(){
+ return false;
+ }
+ }
+}
diff --git a/doriswriter/src/main/java/com/alibaba/datax/plugin/writer/doriswriter/DorisWriterExcetion.java b/doriswriter/src/main/java/com/alibaba/datax/plugin/writer/doriswriter/DorisWriterExcetion.java
new file mode 100644
index 00000000..7797d79f
--- /dev/null
+++ b/doriswriter/src/main/java/com/alibaba/datax/plugin/writer/doriswriter/DorisWriterExcetion.java
@@ -0,0 +1,29 @@
+package com.alibaba.datax.plugin.writer.doriswriter;
+
+import java.io.IOException;
+import java.util.Map;
+
+public class DorisWriterExcetion extends IOException {
+
+ private final Map response;
+ private boolean reCreateLabel;
+
+ public DorisWriterExcetion ( String message, Map response) {
+ super(message);
+ this.response = response;
+ }
+
+ public DorisWriterExcetion ( String message, Map response, boolean reCreateLabel) {
+ super(message);
+ this.response = response;
+ this.reCreateLabel = reCreateLabel;
+ }
+
+ public Map getFailedResponse() {
+ return response;
+ }
+
+ public boolean needReCreateLabel() {
+ return reCreateLabel;
+ }
+}
diff --git a/doriswriter/src/main/java/com/alibaba/datax/plugin/writer/doriswriter/DorisWriterManager.java b/doriswriter/src/main/java/com/alibaba/datax/plugin/writer/doriswriter/DorisWriterManager.java
new file mode 100644
index 00000000..f0ba6b52
--- /dev/null
+++ b/doriswriter/src/main/java/com/alibaba/datax/plugin/writer/doriswriter/DorisWriterManager.java
@@ -0,0 +1,192 @@
+package com.alibaba.datax.plugin.writer.doriswriter;
+
+import com.google.common.base.Strings;
+import org.apache.commons.lang3.concurrent.BasicThreadFactory;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.IOException;
+import java.nio.charset.StandardCharsets;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.UUID;
+import java.util.concurrent.Executors;
+import java.util.concurrent.LinkedBlockingDeque;
+import java.util.concurrent.ScheduledExecutorService;
+import java.util.concurrent.ScheduledFuture;
+import java.util.concurrent.TimeUnit;
+
+public class DorisWriterManager {
+
+ private static final Logger LOG = LoggerFactory.getLogger(DorisWriterManager.class);
+
+ private final DorisStreamLoadObserver visitor;
+ private final Keys options;
+ private final List buffer = new ArrayList<> ();
+ private int batchCount = 0;
+ private long batchSize = 0;
+ private volatile boolean closed = false;
+ private volatile Exception flushException;
+ private final LinkedBlockingDeque< WriterTuple > flushQueue;
+ private ScheduledExecutorService scheduler;
+ private ScheduledFuture> scheduledFuture;
+
+ public DorisWriterManager( Keys options) {
+ this.options = options;
+ this.visitor = new DorisStreamLoadObserver (options);
+ flushQueue = new LinkedBlockingDeque<>(options.getFlushQueueLength());
+ this.startScheduler();
+ this.startAsyncFlushing();
+ }
+
+ public void startScheduler() {
+ stopScheduler();
+ this.scheduler = Executors.newScheduledThreadPool(1, new BasicThreadFactory.Builder().namingPattern("Doris-interval-flush").daemon(true).build());
+ this.scheduledFuture = this.scheduler.schedule(() -> {
+ synchronized (DorisWriterManager.this) {
+ if (!closed) {
+ try {
+ String label = createBatchLabel();
+ LOG.info(String.format("Doris interval Sinking triggered: label[%s].", label));
+ if (batchCount == 0) {
+ startScheduler();
+ }
+ flush(label, false);
+ } catch (Exception e) {
+ flushException = e;
+ }
+ }
+ }
+ }, options.getFlushInterval(), TimeUnit.MILLISECONDS);
+ }
+
+ public void stopScheduler() {
+ if (this.scheduledFuture != null) {
+ scheduledFuture.cancel(false);
+ this.scheduler.shutdown();
+ }
+ }
+
+ public final synchronized void writeRecord(String record) throws IOException {
+ checkFlushException();
+ try {
+ byte[] bts = record.getBytes(StandardCharsets.UTF_8);
+ buffer.add(bts);
+ batchCount++;
+ batchSize += bts.length;
+ if (batchCount >= options.getBatchRows() || batchSize >= options.getBatchSize()) {
+ String label = createBatchLabel();
+ LOG.debug(String.format("Doris buffer Sinking triggered: rows[%d] label[%s].", batchCount, label));
+ flush(label, false);
+ }
+ } catch (Exception e) {
+ throw new IOException("Writing records to Doris failed.", e);
+ }
+ }
+
+ public synchronized void flush(String label, boolean waitUtilDone) throws Exception {
+ checkFlushException();
+ if (batchCount == 0) {
+ if (waitUtilDone) {
+ waitAsyncFlushingDone();
+ }
+ return;
+ }
+ flushQueue.put(new WriterTuple (label, batchSize, new ArrayList<>(buffer)));
+ if (waitUtilDone) {
+ // wait the last flush
+ waitAsyncFlushingDone();
+ }
+ buffer.clear();
+ batchCount = 0;
+ batchSize = 0;
+ }
+
+ public synchronized void close() {
+ if (!closed) {
+ closed = true;
+ try {
+ String label = createBatchLabel();
+ if (batchCount > 0) LOG.debug(String.format("Doris Sink is about to close: label[%s].", label));
+ flush(label, true);
+ } catch (Exception e) {
+ throw new RuntimeException("Writing records to Doris failed.", e);
+ }
+ }
+ checkFlushException();
+ }
+
+ public String createBatchLabel() {
+ StringBuilder sb = new StringBuilder();
+ if (! Strings.isNullOrEmpty(options.getLabelPrefix())) {
+ sb.append(options.getLabelPrefix());
+ }
+ return sb.append(UUID.randomUUID().toString())
+ .toString();
+ }
+
+ private void startAsyncFlushing() {
+ // start flush thread
+ Thread flushThread = new Thread(new Runnable(){
+ public void run() {
+ while(true) {
+ try {
+ asyncFlush();
+ } catch (Exception e) {
+ flushException = e;
+ }
+ }
+ }
+ });
+ flushThread.setDaemon(true);
+ flushThread.start();
+ }
+
+ private void waitAsyncFlushingDone() throws InterruptedException {
+ // wait previous flushings
+ for (int i = 0; i <= options.getFlushQueueLength(); i++) {
+ flushQueue.put(new WriterTuple ("", 0l, null));
+ }
+ checkFlushException();
+ }
+
+ private void asyncFlush() throws Exception {
+ WriterTuple flushData = flushQueue.take();
+ if (Strings.isNullOrEmpty(flushData.getLabel())) {
+ return;
+ }
+ stopScheduler();
+ LOG.debug(String.format("Async stream load: rows[%d] bytes[%d] label[%s].", flushData.getRows().size(), flushData.getBytes(), flushData.getLabel()));
+ for (int i = 0; i <= options.getMaxRetries(); i++) {
+ try {
+ // flush to Doris with stream load
+ visitor.streamLoad(flushData);
+ LOG.info(String.format("Async stream load finished: label[%s].", flushData.getLabel()));
+ startScheduler();
+ break;
+ } catch (Exception e) {
+ LOG.warn("Failed to flush batch data to Doris, retry times = {}", i, e);
+ if (i >= options.getMaxRetries()) {
+ throw new IOException(e);
+ }
+ if (e instanceof DorisWriterExcetion && (( DorisWriterExcetion )e).needReCreateLabel()) {
+ String newLabel = createBatchLabel();
+ LOG.warn(String.format("Batch label changed from [%s] to [%s]", flushData.getLabel(), newLabel));
+ flushData.setLabel(newLabel);
+ }
+ try {
+ Thread.sleep(1000l * Math.min(i + 1, 10));
+ } catch (InterruptedException ex) {
+ Thread.currentThread().interrupt();
+ throw new IOException("Unable to flush, interrupted while doing another attempt", e);
+ }
+ }
+ }
+ }
+
+ private void checkFlushException() {
+ if (flushException != null) {
+ throw new RuntimeException("Writing records to Doris failed.", flushException);
+ }
+ }
+}
diff --git a/doriswriter/src/main/java/com/alibaba/datax/plugin/writer/doriswriter/Keys.java b/doriswriter/src/main/java/com/alibaba/datax/plugin/writer/doriswriter/Keys.java
new file mode 100644
index 00000000..e460e76b
--- /dev/null
+++ b/doriswriter/src/main/java/com/alibaba/datax/plugin/writer/doriswriter/Keys.java
@@ -0,0 +1,177 @@
+package com.alibaba.datax.plugin.writer.doriswriter;
+
+import com.alibaba.datax.common.exception.DataXException;
+import com.alibaba.datax.common.util.Configuration;
+import com.alibaba.datax.plugin.rdbms.util.DBUtilErrorCode;
+
+import java.io.Serializable;
+import java.util.List;
+import java.util.Map;
+import java.util.stream.Collectors;
+
+public class Keys implements Serializable {
+
+ private static final long serialVersionUID = 1l;
+ private static final int MAX_RETRIES = 3;
+ private static final int BATCH_ROWS = 500000;
+ private static final long DEFAULT_FLUSH_INTERVAL = 30000;
+
+ private static final String LOAD_PROPS_FORMAT = "format";
+ public enum StreamLoadFormat {
+ CSV, JSON;
+ }
+
+ private static final String USERNAME = "username";
+ private static final String PASSWORD = "password";
+ private static final String DATABASE = "connection[0].selectedDatabase";
+ private static final String TABLE = "connection[0].table[0]";
+ private static final String COLUMN = "column";
+ private static final String PRE_SQL = "preSql";
+ private static final String POST_SQL = "postSql";
+ private static final String JDBC_URL = "connection[0].jdbcUrl";
+ private static final String LABEL_PREFIX = "labelPrefix";
+ private static final String MAX_BATCH_ROWS = "maxBatchRows";
+ private static final String MAX_BATCH_SIZE = "batchSize";
+ private static final String FLUSH_INTERVAL = "flushInterval";
+ private static final String LOAD_URL = "loadUrl";
+ private static final String FLUSH_QUEUE_LENGTH = "flushQueueLength";
+ private static final String LOAD_PROPS = "loadProps";
+
+ private static final String DEFAULT_LABEL_PREFIX = "datax_doris_writer_";
+
+ private static final long DEFAULT_MAX_BATCH_SIZE = 90 * 1024 * 1024; //default 90M
+
+ private final Configuration options;
+
+ private List infoSchemaColumns;
+ private List userSetColumns;
+ private boolean isWildcardColumn;
+
+ public Keys ( Configuration options) {
+ this.options = options;
+ this.userSetColumns = options.getList(COLUMN, String.class).stream().map(str -> str.replace("`", "")).collect(Collectors.toList());
+ if (1 == options.getList(COLUMN, String.class).size() && "*".trim().equals(options.getList(COLUMN, String.class).get(0))) {
+ this.isWildcardColumn = true;
+ }
+ }
+
+ public void doPretreatment() {
+ validateRequired();
+ validateStreamLoadUrl();
+ }
+
+ public String getJdbcUrl() {
+ return options.getString(JDBC_URL);
+ }
+
+ public String getDatabase() {
+ return options.getString(DATABASE);
+ }
+
+ public String getTable() {
+ return options.getString(TABLE);
+ }
+
+ public String getUsername() {
+ return options.getString(USERNAME);
+ }
+
+ public String getPassword() {
+ return options.getString(PASSWORD);
+ }
+
+ public String getLabelPrefix() {
+ String label = options.getString(LABEL_PREFIX);
+ return null == label ? DEFAULT_LABEL_PREFIX : label;
+ }
+
+ public List getLoadUrlList() {
+ return options.getList(LOAD_URL, String.class);
+ }
+
+ public List getColumns() {
+ if (isWildcardColumn) {
+ return this.infoSchemaColumns;
+ }
+ return this.userSetColumns;
+ }
+
+ public boolean isWildcardColumn() {
+ return this.isWildcardColumn;
+ }
+
+ public void setInfoCchemaColumns(List cols) {
+ this.infoSchemaColumns = cols;
+ }
+
+ public List getPreSqlList() {
+ return options.getList(PRE_SQL, String.class);
+ }
+
+ public List getPostSqlList() {
+ return options.getList(POST_SQL, String.class);
+ }
+
+ public Map getLoadProps() {
+ return options.getMap(LOAD_PROPS);
+ }
+
+ public int getMaxRetries() {
+ return MAX_RETRIES;
+ }
+
+ public int getBatchRows() {
+ Integer rows = options.getInt(MAX_BATCH_ROWS);
+ return null == rows ? BATCH_ROWS : rows;
+ }
+
+ public long getBatchSize() {
+ Long size = options.getLong(MAX_BATCH_SIZE);
+ return null == size ? DEFAULT_MAX_BATCH_SIZE : size;
+ }
+
+ public long getFlushInterval() {
+ Long interval = options.getLong(FLUSH_INTERVAL);
+ return null == interval ? DEFAULT_FLUSH_INTERVAL : interval;
+ }
+
+ public int getFlushQueueLength() {
+ Integer len = options.getInt(FLUSH_QUEUE_LENGTH);
+ return null == len ? 1 : len;
+ }
+
+ public StreamLoadFormat getStreamLoadFormat() {
+ Map loadProps = getLoadProps();
+ if (null == loadProps) {
+ return StreamLoadFormat.CSV;
+ }
+ if (loadProps.containsKey(LOAD_PROPS_FORMAT)
+ && StreamLoadFormat.JSON.name().equalsIgnoreCase(String.valueOf(loadProps.get(LOAD_PROPS_FORMAT)))) {
+ return StreamLoadFormat.JSON;
+ }
+ return StreamLoadFormat.CSV;
+ }
+
+ private void validateStreamLoadUrl() {
+ List urlList = getLoadUrlList();
+ for (String host : urlList) {
+ if (host.split(":").length < 2) {
+ throw DataXException.asDataXException(DBUtilErrorCode.CONF_ERROR,
+ "The format of loadUrl is not correct, please enter:[`fe_ip:fe_http_ip;fe_ip:fe_http_ip`].");
+ }
+ }
+ }
+
+ private void validateRequired() {
+ final String[] requiredOptionKeys = new String[]{
+ USERNAME,
+ DATABASE,
+ TABLE,
+ COLUMN,
+ LOAD_URL
+ };
+ for (String optionKey : requiredOptionKeys) {
+ options.getNecessaryValue(optionKey, DBUtilErrorCode.REQUIRED_VALUE);
+ }
+ }
+}
diff --git a/doriswriter/src/main/java/com/alibaba/datax/plugin/writer/doriswriter/WriterTuple.java b/doriswriter/src/main/java/com/alibaba/datax/plugin/writer/doriswriter/WriterTuple.java
new file mode 100644
index 00000000..32e0b341
--- /dev/null
+++ b/doriswriter/src/main/java/com/alibaba/datax/plugin/writer/doriswriter/WriterTuple.java
@@ -0,0 +1,20 @@
+package com.alibaba.datax.plugin.writer.doriswriter;
+
+import java.util.List;
+
+public class WriterTuple {
+ private String label;
+ private Long bytes;
+ private List rows;
+
+ public WriterTuple ( String label, Long bytes, List rows){
+ this.label = label;
+ this.rows = rows;
+ this.bytes = bytes;
+ }
+
+ public String getLabel() { return label; }
+ public void setLabel(String label) { this.label = label; }
+ public Long getBytes() { return bytes; }
+ public List getRows() { return rows; }
+}
diff --git a/doriswriter/src/main/resources/plugin.json b/doriswriter/src/main/resources/plugin.json
new file mode 100644
index 00000000..69dc31a2
--- /dev/null
+++ b/doriswriter/src/main/resources/plugin.json
@@ -0,0 +1,6 @@
+{
+ "name": "doriswriter",
+ "class": "com.alibaba.datax.plugin.writer.doriswriter.DorisWriter",
+ "description": "apache doris writer plugin",
+ "developer": "apche doris"
+}
diff --git a/doriswriter/src/main/resources/plugin_job_template.json b/doriswriter/src/main/resources/plugin_job_template.json
new file mode 100644
index 00000000..0187e539
--- /dev/null
+++ b/doriswriter/src/main/resources/plugin_job_template.json
@@ -0,0 +1,20 @@
+{
+ "name": "doriswriter",
+ "parameter": {
+ "username": "",
+ "password": "",
+ "column": [],
+ "preSql": [],
+ "postSql": [],
+ "beLoadUrl": [],
+ "loadUrl": [],
+ "loadProps": {},
+ "connection": [
+ {
+ "jdbcUrl": "",
+ "selectedDatabase": "",
+ "table": []
+ }
+ ]
+ }
+}
\ No newline at end of file
diff --git a/elasticsearchwriter/pom.xml b/elasticsearchwriter/pom.xml
index a60dbd88..8699c6e5 100644
--- a/elasticsearchwriter/pom.xml
+++ b/elasticsearchwriter/pom.xml
@@ -35,12 +35,12 @@
io.searchbox
jest-common
- 2.4.0
+ 6.3.1
io.searchbox
jest
- 2.4.0
+ 6.3.1
joda-time
diff --git a/elasticsearchwriter/src/main/java/com/alibaba/datax/plugin/writer/elasticsearchwriter/ESClient.java b/elasticsearchwriter/src/main/java/com/alibaba/datax/plugin/writer/elasticsearchwriter/ESClient.java
deleted file mode 100644
index 34bb7e54..00000000
--- a/elasticsearchwriter/src/main/java/com/alibaba/datax/plugin/writer/elasticsearchwriter/ESClient.java
+++ /dev/null
@@ -1,236 +0,0 @@
-package com.alibaba.datax.plugin.writer.elasticsearchwriter;
-
-import com.google.gson.Gson;
-import com.google.gson.JsonElement;
-import com.google.gson.JsonObject;
-import com.google.gson.JsonParser;
-import io.searchbox.action.Action;
-import io.searchbox.client.JestClient;
-import io.searchbox.client.JestClientFactory;
-import io.searchbox.client.JestResult;
-import io.searchbox.client.config.HttpClientConfig;
-import io.searchbox.client.config.HttpClientConfig.Builder;
-import io.searchbox.core.Bulk;
-import io.searchbox.indices.CreateIndex;
-import io.searchbox.indices.DeleteIndex;
-import io.searchbox.indices.IndicesExists;
-import io.searchbox.indices.aliases.*;
-import io.searchbox.indices.mapping.PutMapping;
-import org.apache.http.HttpHost;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.List;
-import java.util.Map;
-import java.util.concurrent.TimeUnit;
-
-/**
- * Created by xiongfeng.bxf on 17/2/8.
- */
-public class ESClient {
- private static final Logger log = LoggerFactory.getLogger(ESClient.class);
-
- private JestClient jestClient;
-
- public JestClient getClient() {
- return jestClient;
- }
-
- public void createClient(String endpoint,
- String user,
- String passwd,
- boolean multiThread,
- int readTimeout,
- boolean compression,
- boolean discovery) {
-
- JestClientFactory factory = new JestClientFactory();
- Builder httpClientConfig = new HttpClientConfig
- .Builder(endpoint)
- .setPreemptiveAuth(new HttpHost(endpoint))
- .multiThreaded(multiThread)
- .connTimeout(30000)
- .readTimeout(readTimeout)
- .maxTotalConnection(200)
- .requestCompressionEnabled(compression)
- .discoveryEnabled(discovery)
- .discoveryFrequency(5l, TimeUnit.MINUTES);
-
- if (!("".equals(user) || "".equals(passwd))) {
- httpClientConfig.defaultCredentials(user, passwd);
- }
-
- factory.setHttpClientConfig(httpClientConfig.build());
-
- jestClient = factory.getObject();
- }
-
- public boolean indicesExists(String indexName) throws Exception {
- boolean isIndicesExists = false;
- JestResult rst = jestClient.execute(new IndicesExists.Builder(indexName).build());
- if (rst.isSucceeded()) {
- isIndicesExists = true;
- } else {
- switch (rst.getResponseCode()) {
- case 404:
- isIndicesExists = false;
- break;
- case 401:
- // 无权访问
- default:
- log.warn(rst.getErrorMessage());
- break;
- }
- }
- return isIndicesExists;
- }
-
- public boolean deleteIndex(String indexName) throws Exception {
- log.info("delete index " + indexName);
- if (indicesExists(indexName)) {
- JestResult rst = execute(new DeleteIndex.Builder(indexName).build());
- if (!rst.isSucceeded()) {
- return false;
- }
- } else {
- log.info("index cannot found, skip delete " + indexName);
- }
- return true;
- }
-
- public boolean createIndex(String indexName, String typeName,
- Object mappings, String settings, boolean dynamic) throws Exception {
- JestResult rst = null;
- if (!indicesExists(indexName)) {
- log.info("create index " + indexName);
- rst = jestClient.execute(
- new CreateIndex.Builder(indexName)
- .settings(settings)
- .setParameter("master_timeout", "5m")
- .build()
- );
- //index_already_exists_exception
- if (!rst.isSucceeded()) {
- if (getStatus(rst) == 400) {
- log.info(String.format("index [%s] already exists", indexName));
- return true;
- } else {
- log.error(rst.getErrorMessage());
- return false;
- }
- } else {
- log.info(String.format("create [%s] index success", indexName));
- }
- }
-
- int idx = 0;
- while (idx < 5) {
- if (indicesExists(indexName)) {
- break;
- }
- Thread.sleep(2000);
- idx ++;
- }
- if (idx >= 5) {
- return false;
- }
-
- if (dynamic) {
- log.info("ignore mappings");
- return true;
- }
- log.info("create mappings for " + indexName + " " + mappings);
- rst = jestClient.execute(new PutMapping.Builder(indexName, typeName, mappings)
- .setParameter("master_timeout", "5m").build());
- if (!rst.isSucceeded()) {
- if (getStatus(rst) == 400) {
- log.info(String.format("index [%s] mappings already exists", indexName));
- } else {
- log.error(rst.getErrorMessage());
- return false;
- }
- } else {
- log.info(String.format("index [%s] put mappings success", indexName));
- }
- return true;
- }
-
- public JestResult execute(Action clientRequest) throws Exception {
- JestResult rst = null;
- rst = jestClient.execute(clientRequest);
- if (!rst.isSucceeded()) {
- //log.warn(rst.getErrorMessage());
- }
- return rst;
- }
-
- public Integer getStatus(JestResult rst) {
- JsonObject jsonObject = rst.getJsonObject();
- if (jsonObject.has("status")) {
- return jsonObject.get("status").getAsInt();
- }
- return 600;
- }
-
- public boolean isBulkResult(JestResult rst) {
- JsonObject jsonObject = rst.getJsonObject();
- return jsonObject.has("items");
- }
-
-
- public boolean alias(String indexname, String aliasname, boolean needClean) throws IOException {
- GetAliases getAliases = new GetAliases.Builder().addIndex(aliasname).build();
- AliasMapping addAliasMapping = new AddAliasMapping.Builder(indexname, aliasname).build();
- JestResult rst = jestClient.execute(getAliases);
- log.info(rst.getJsonString());
- List list = new ArrayList();
- if (rst.isSucceeded()) {
- JsonParser jp = new JsonParser();
- JsonObject jo = (JsonObject)jp.parse(rst.getJsonString());
- for(Map.Entry entry : jo.entrySet()){
- String tindex = entry.getKey();
- if (indexname.equals(tindex)) {
- continue;
- }
- AliasMapping m = new RemoveAliasMapping.Builder(tindex, aliasname).build();
- String s = new Gson().toJson(m.getData());
- log.info(s);
- if (needClean) {
- list.add(m);
- }
- }
- }
-
- ModifyAliases modifyAliases = new ModifyAliases.Builder(addAliasMapping).addAlias(list).setParameter("master_timeout", "5m").build();
- rst = jestClient.execute(modifyAliases);
- if (!rst.isSucceeded()) {
- log.error(rst.getErrorMessage());
- return false;
- }
- return true;
- }
-
- public JestResult bulkInsert(Bulk.Builder bulk, int trySize) throws Exception {
- // es_rejected_execution_exception
- // illegal_argument_exception
- // cluster_block_exception
- JestResult rst = null;
- rst = jestClient.execute(bulk.build());
- if (!rst.isSucceeded()) {
- log.warn(rst.getErrorMessage());
- }
- return rst;
- }
-
- /**
- * 关闭JestClient客户端
- *
- */
- public void closeJestClient() {
- if (jestClient != null) {
- jestClient.shutdownClient();
- }
- }
-}
diff --git a/elasticsearchwriter/src/main/java/com/alibaba/datax/plugin/writer/elasticsearchwriter/ESColumn.java b/elasticsearchwriter/src/main/java/com/alibaba/datax/plugin/writer/elasticsearchwriter/ESColumn.java
deleted file mode 100644
index 8990d77c..00000000
--- a/elasticsearchwriter/src/main/java/com/alibaba/datax/plugin/writer/elasticsearchwriter/ESColumn.java
+++ /dev/null
@@ -1,65 +0,0 @@
-package com.alibaba.datax.plugin.writer.elasticsearchwriter;
-
-/**
- * Created by xiongfeng.bxf on 17/3/2.
- */
-public class ESColumn {
-
- private String name;//: "appkey",
-
- private String type;//": "TEXT",
-
- private String timezone;
-
- private String format;
-
- private Boolean array;
-
- public void setName(String name) {
- this.name = name;
- }
-
- public void setType(String type) {
- this.type = type;
- }
-
- public void setTimeZone(String timezone) {
- this.timezone = timezone;
- }
-
- public void setFormat(String format) {
- this.format = format;
- }
-
- public String getName() {
- return name;
- }
-
- public String getType() {
- return type;
- }
-
- public String getTimezone() {
- return timezone;
- }
-
- public String getFormat() {
- return format;
- }
-
- public void setTimezone(String timezone) {
- this.timezone = timezone;
- }
-
- public Boolean isArray() {
- return array;
- }
-
- public void setArray(Boolean array) {
- this.array = array;
- }
-
- public Boolean getArray() {
- return array;
- }
-}
diff --git a/elasticsearchwriter/src/main/java/com/alibaba/datax/plugin/writer/elasticsearchwriter/ESWriter.java b/elasticsearchwriter/src/main/java/com/alibaba/datax/plugin/writer/elasticsearchwriter/ESWriter.java
deleted file mode 100644
index eb0e9a81..00000000
--- a/elasticsearchwriter/src/main/java/com/alibaba/datax/plugin/writer/elasticsearchwriter/ESWriter.java
+++ /dev/null
@@ -1,460 +0,0 @@
-package com.alibaba.datax.plugin.writer.elasticsearchwriter;
-
-import com.alibaba.datax.common.element.Column;
-import com.alibaba.datax.common.element.Record;
-import com.alibaba.datax.common.exception.DataXException;
-import com.alibaba.datax.common.plugin.RecordReceiver;
-import com.alibaba.datax.common.spi.Writer;
-import com.alibaba.datax.common.util.Configuration;
-import com.alibaba.datax.common.util.RetryUtil;
-import com.alibaba.fastjson.JSON;
-import com.alibaba.fastjson.JSONObject;
-import com.alibaba.fastjson.TypeReference;
-import io.searchbox.client.JestResult;
-import io.searchbox.core.Bulk;
-import io.searchbox.core.BulkResult;
-import io.searchbox.core.Index;
-import org.joda.time.DateTime;
-import org.joda.time.DateTimeZone;
-import org.joda.time.format.DateTimeFormat;
-import org.joda.time.format.DateTimeFormatter;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import java.io.IOException;
-import java.net.URLEncoder;
-import java.util.*;
-import java.util.concurrent.Callable;
-
-public class ESWriter extends Writer {
- private final static String WRITE_COLUMNS = "write_columns";
-
- public static class Job extends Writer.Job {
- private static final Logger log = LoggerFactory.getLogger(Job.class);
-
- private Configuration conf = null;
-
- @Override
- public void init() {
- this.conf = super.getPluginJobConf();
- }
-
- @Override
- public void prepare() {
- /**
- * 注意:此方法仅执行一次。
- * 最佳实践:如果 Job 中有需要进行数据同步之前的处理,可以在此处完成,如果没有必要则可以直接去掉。
- */
- ESClient esClient = new ESClient();
- esClient.createClient(Key.getEndpoint(conf),
- Key.getAccessID(conf),
- Key.getAccessKey(conf),
- false,
- 300000,
- false,
- false);
-
- String indexName = Key.getIndexName(conf);
- String typeName = Key.getTypeName(conf);
- boolean dynamic = Key.getDynamic(conf);
- String mappings = genMappings(typeName);
- String settings = JSONObject.toJSONString(
- Key.getSettings(conf)
- );
- log.info(String.format("index:[%s], type:[%s], mappings:[%s]", indexName, typeName, mappings));
-
- try {
- boolean isIndicesExists = esClient.indicesExists(indexName);
- if (Key.isCleanup(this.conf) && isIndicesExists) {
- esClient.deleteIndex(indexName);
- }
- // 强制创建,内部自动忽略已存在的情况
- if (!esClient.createIndex(indexName, typeName, mappings, settings, dynamic)) {
- throw new IOException("create index or mapping failed");
- }
- } catch (Exception ex) {
- throw DataXException.asDataXException(ESWriterErrorCode.ES_MAPPINGS, ex.toString());
- }
- esClient.closeJestClient();
- }
-
- private String genMappings(String typeName) {
- String mappings = null;
- Map propMap = new HashMap();
- List columnList = new ArrayList();
-
- List column = conf.getList("column");
- if (column != null) {
- for (Object col : column) {
- JSONObject jo = JSONObject.parseObject(col.toString());
- String colName = jo.getString("name");
- String colTypeStr = jo.getString("type");
- if (colTypeStr == null) {
- throw DataXException.asDataXException(ESWriterErrorCode.BAD_CONFIG_VALUE, col.toString() + " column must have type");
- }
- ESFieldType colType = ESFieldType.getESFieldType(colTypeStr);
- if (colType == null) {
- throw DataXException.asDataXException(ESWriterErrorCode.BAD_CONFIG_VALUE, col.toString() + " unsupported type");
- }
-
- ESColumn columnItem = new ESColumn();
-
- if (colName.equals(Key.PRIMARY_KEY_COLUMN_NAME)) {
- // 兼容已有版本
- colType = ESFieldType.ID;
- colTypeStr = "id";
- }
-
- columnItem.setName(colName);
- columnItem.setType(colTypeStr);
-
- if (colType == ESFieldType.ID) {
- columnList.add(columnItem);
- // 如果是id,则properties为空
- continue;
- }
-
- Boolean array = jo.getBoolean("array");
- if (array != null) {
- columnItem.setArray(array);
- }
- Map field = new HashMap();
- field.put("type", colTypeStr);
- //https://www.elastic.co/guide/en/elasticsearch/reference/5.2/breaking_50_mapping_changes.html#_literal_index_literal_property
- // https://www.elastic.co/guide/en/elasticsearch/guide/2.x/_deep_dive_on_doc_values.html#_disabling_doc_values
- field.put("doc_values", jo.getBoolean("doc_values"));
- field.put("ignore_above", jo.getInteger("ignore_above"));
- field.put("index", jo.getBoolean("index"));
-
- switch (colType) {
- case STRING:
- // 兼容string类型,ES5之前版本
- break;
- case KEYWORD:
- // https://www.elastic.co/guide/en/elasticsearch/reference/current/tune-for-search-speed.html#_warm_up_global_ordinals
- field.put("eager_global_ordinals", jo.getBoolean("eager_global_ordinals"));
- case TEXT:
- field.put("analyzer", jo.getString("analyzer"));
- // 优化disk使用,也同步会提高index性能
- // https://www.elastic.co/guide/en/elasticsearch/reference/current/tune-for-disk-usage.html
- field.put("norms", jo.getBoolean("norms"));
- field.put("index_options", jo.getBoolean("index_options"));
- break;
- case DATE:
- columnItem.setTimeZone(jo.getString("timezone"));
- columnItem.setFormat(jo.getString("format"));
- // 后面时间会处理为带时区的标准时间,所以不需要给ES指定格式
- /*
- if (jo.getString("format") != null) {
- field.put("format", jo.getString("format"));
- } else {
- //field.put("format", "strict_date_optional_time||epoch_millis||yyyy-MM-dd HH:mm:ss||yyyy-MM-dd");
- }
- */
- break;
- case GEO_SHAPE:
- field.put("tree", jo.getString("tree"));
- field.put("precision", jo.getString("precision"));
- default:
- break;
- }
- propMap.put(colName, field);
- columnList.add(columnItem);
- }
- }
-
- conf.set(WRITE_COLUMNS, JSON.toJSONString(columnList));
-
- log.info(JSON.toJSONString(columnList));
-
- Map rootMappings = new HashMap();
- Map typeMappings = new HashMap();
- typeMappings.put("properties", propMap);
- rootMappings.put(typeName, typeMappings);
-
- mappings = JSON.toJSONString(rootMappings);
-
- if (mappings == null || "".equals(mappings)) {
- throw DataXException.asDataXException(ESWriterErrorCode.BAD_CONFIG_VALUE, "must have mappings");
- }
-
- return mappings;
- }
-
- @Override
- public List split(int mandatoryNumber) {
- List configurations = new ArrayList(mandatoryNumber);
- for (int i = 0; i < mandatoryNumber; i++) {
- configurations.add(conf);
- }
- return configurations;
- }
-
- @Override
- public void post() {
- ESClient esClient = new ESClient();
- esClient.createClient(Key.getEndpoint(conf),
- Key.getAccessID(conf),
- Key.getAccessKey(conf),
- false,
- 300000,
- false,
- false);
- String alias = Key.getAlias(conf);
- if (!"".equals(alias)) {
- log.info(String.format("alias [%s] to [%s]", alias, Key.getIndexName(conf)));
- try {
- esClient.alias(Key.getIndexName(conf), alias, Key.isNeedCleanAlias(conf));
- } catch (IOException e) {
- throw DataXException.asDataXException(ESWriterErrorCode.ES_ALIAS_MODIFY, e);
- }
- }
- }
-
- @Override
- public void destroy() {
-
- }
- }
-
- public static class Task extends Writer.Task {
-
- private static final Logger log = LoggerFactory.getLogger(Job.class);
-
- private Configuration conf;
-
-
- ESClient esClient = null;
- private List typeList;
- private List columnList;
-
- private int trySize;
- private int batchSize;
- private String index;
- private String type;
- private String splitter;
-
- @Override
- public void init() {
- this.conf = super.getPluginJobConf();
- index = Key.getIndexName(conf);
- type = Key.getTypeName(conf);
-
- trySize = Key.getTrySize(conf);
- batchSize = Key.getBatchSize(conf);
- splitter = Key.getSplitter(conf);
- columnList = JSON.parseObject(this.conf.getString(WRITE_COLUMNS), new TypeReference>() {
- });
-
- typeList = new ArrayList();
-
- for (ESColumn col : columnList) {
- typeList.add(ESFieldType.getESFieldType(col.getType()));
- }
-
- esClient = new ESClient();
- }
-
- @Override
- public void prepare() {
- esClient.createClient(Key.getEndpoint(conf),
- Key.getAccessID(conf),
- Key.getAccessKey(conf),
- Key.isMultiThread(conf),
- Key.getTimeout(conf),
- Key.isCompression(conf),
- Key.isDiscovery(conf));
- }
-
- @Override
- public void startWrite(RecordReceiver recordReceiver) {
- List writerBuffer = new ArrayList(this.batchSize);
- Record record = null;
- long total = 0;
- while ((record = recordReceiver.getFromReader()) != null) {
- writerBuffer.add(record);
- if (writerBuffer.size() >= this.batchSize) {
- total += doBatchInsert(writerBuffer);
- writerBuffer.clear();
- }
- }
-
- if (!writerBuffer.isEmpty()) {
- total += doBatchInsert(writerBuffer);
- writerBuffer.clear();
- }
-
- String msg = String.format("task end, write size :%d", total);
- getTaskPluginCollector().collectMessage("writesize", String.valueOf(total));
- log.info(msg);
- esClient.closeJestClient();
- }
-
- private String getDateStr(ESColumn esColumn, Column column) {
- DateTime date = null;
- DateTimeZone dtz = DateTimeZone.getDefault();
- if (esColumn.getTimezone() != null) {
- // 所有时区参考 http://www.joda.org/joda-time/timezones.html
- dtz = DateTimeZone.forID(esColumn.getTimezone());
- }
- if (column.getType() != Column.Type.DATE && esColumn.getFormat() != null) {
- DateTimeFormatter formatter = DateTimeFormat.forPattern(esColumn.getFormat());
- date = formatter.withZone(dtz).parseDateTime(column.asString());
- return date.toString();
- } else if (column.getType() == Column.Type.DATE) {
- date = new DateTime(column.asLong(), dtz);
- return date.toString();
- } else {
- return column.asString();
- }
- }
-
- private long doBatchInsert(final List writerBuffer) {
- Map data = null;
- final Bulk.Builder bulkaction = new Bulk.Builder().defaultIndex(this.index).defaultType(this.type);
- for (Record record : writerBuffer) {
- data = new HashMap();
- String id = null;
- for (int i = 0; i < record.getColumnNumber(); i++) {
- Column column = record.getColumn(i);
- String columnName = columnList.get(i).getName();
- ESFieldType columnType = typeList.get(i);
- //如果是数组类型,那它传入的必是字符串类型
- if (columnList.get(i).isArray() != null && columnList.get(i).isArray()) {
- String[] dataList = column.asString().split(splitter);
- if (!columnType.equals(ESFieldType.DATE)) {
- data.put(columnName, dataList);
- } else {
- for (int pos = 0; pos < dataList.length; pos++) {
- dataList[pos] = getDateStr(columnList.get(i), column);
- }
- data.put(columnName, dataList);
- }
- } else {
- switch (columnType) {
- case ID:
- if (id != null) {
- id += record.getColumn(i).asString();
- } else {
- id = record.getColumn(i).asString();
- }
- break;
- case DATE:
- try {
- String dateStr = getDateStr(columnList.get(i), column);
- data.put(columnName, dateStr);
- } catch (Exception e) {
- getTaskPluginCollector().collectDirtyRecord(record, String.format("时间类型解析失败 [%s:%s] exception: %s", columnName, column.toString(), e.toString()));
- }
- break;
- case KEYWORD:
- case STRING:
- case TEXT:
- case IP:
- case GEO_POINT:
- data.put(columnName, column.asString());
- break;
- case BOOLEAN:
- data.put(columnName, column.asBoolean());
- break;
- case BYTE:
- case BINARY:
- data.put(columnName, column.asBytes());
- break;
- case LONG:
- data.put(columnName, column.asLong());
- break;
- case INTEGER:
- data.put(columnName, column.asBigInteger());
- break;
- case SHORT:
- data.put(columnName, column.asBigInteger());
- break;
- case FLOAT:
- case DOUBLE:
- data.put(columnName, column.asDouble());
- break;
- case NESTED:
- case OBJECT:
- case GEO_SHAPE:
- data.put(columnName, JSON.parse(column.asString()));
- break;
- default:
- getTaskPluginCollector().collectDirtyRecord(record, "类型错误:不支持的类型:" + columnType + " " + columnName);
- }
- }
- }
-
- if (id == null) {
- //id = UUID.randomUUID().toString();
- bulkaction.addAction(new Index.Builder(data).build());
- } else {
- bulkaction.addAction(new Index.Builder(data).id(id).build());
- }
- }
-
- try {
- return RetryUtil.executeWithRetry(new Callable() {
- @Override
- public Integer call() throws Exception {
- JestResult jestResult = esClient.bulkInsert(bulkaction, 1);
- if (jestResult.isSucceeded()) {
- return writerBuffer.size();
- }
-
- String msg = String.format("response code: [%d] error :[%s]", jestResult.getResponseCode(), jestResult.getErrorMessage());
- log.warn(msg);
- if (esClient.isBulkResult(jestResult)) {
- BulkResult brst = (BulkResult) jestResult;
- List failedItems = brst.getFailedItems();
- for (BulkResult.BulkResultItem item : failedItems) {
- if (item.status != 400) {
- // 400 BAD_REQUEST 如果非数据异常,请求异常,则不允许忽略
- throw DataXException.asDataXException(ESWriterErrorCode.ES_INDEX_INSERT, String.format("status:[%d], error: %s", item.status, item.error));
- } else {
- // 如果用户选择不忽略解析错误,则抛异常,默认为忽略
- if (!Key.isIgnoreParseError(conf)) {
- throw DataXException.asDataXException(ESWriterErrorCode.ES_INDEX_INSERT, String.format("status:[%d], error: %s, config not ignoreParseError so throw this error", item.status, item.error));
- }
- }
- }
-
- List items = brst.getItems();
- for (int idx = 0; idx < items.size(); ++idx) {
- BulkResult.BulkResultItem item = items.get(idx);
- if (item.error != null && !"".equals(item.error)) {
- getTaskPluginCollector().collectDirtyRecord(writerBuffer.get(idx), String.format("status:[%d], error: %s", item.status, item.error));
- }
- }
- return writerBuffer.size() - brst.getFailedItems().size();
- } else {
- Integer status = esClient.getStatus(jestResult);
- switch (status) {
- case 429: //TOO_MANY_REQUESTS
- log.warn("server response too many requests, so auto reduce speed");
- break;
- }
- throw DataXException.asDataXException(ESWriterErrorCode.ES_INDEX_INSERT, jestResult.getErrorMessage());
- }
- }
- }, trySize, 60000L, true);
- } catch (Exception e) {
- if (Key.isIgnoreWriteError(this.conf)) {
- log.warn(String.format("重试[%d]次写入失败,忽略该错误,继续写入!", trySize));
- } else {
- throw DataXException.asDataXException(ESWriterErrorCode.ES_INDEX_INSERT, e);
- }
- }
- return 0;
- }
-
- @Override
- public void post() {
- }
-
- @Override
- public void destroy() {
- esClient.closeJestClient();
- }
- }
-}
diff --git a/elasticsearchwriter/src/main/java/com/alibaba/datax/plugin/writer/elasticsearchwriter/ElasticSearchClient.java b/elasticsearchwriter/src/main/java/com/alibaba/datax/plugin/writer/elasticsearchwriter/ElasticSearchClient.java
new file mode 100644
index 00000000..08486e1f
--- /dev/null
+++ b/elasticsearchwriter/src/main/java/com/alibaba/datax/plugin/writer/elasticsearchwriter/ElasticSearchClient.java
@@ -0,0 +1,314 @@
+package com.alibaba.datax.plugin.writer.elasticsearchwriter;
+
+import com.alibaba.datax.common.exception.DataXException;
+import com.alibaba.datax.common.util.Configuration;
+import com.alibaba.datax.plugin.writer.elasticsearchwriter.jest.ClusterInfo;
+import com.alibaba.datax.plugin.writer.elasticsearchwriter.jest.ClusterInfoResult;
+import com.alibaba.datax.plugin.writer.elasticsearchwriter.jest.PutMapping7;
+import com.alibaba.fastjson2.JSON;
+import com.alibaba.fastjson2.JSONObject;
+import com.google.gson.Gson;
+import com.google.gson.JsonElement;
+import com.google.gson.JsonObject;
+import com.google.gson.JsonParser;
+import io.searchbox.action.Action;
+import io.searchbox.client.JestClient;
+import io.searchbox.client.JestClientFactory;
+import io.searchbox.client.JestResult;
+import io.searchbox.client.config.HttpClientConfig;
+import io.searchbox.client.config.HttpClientConfig.Builder;
+import io.searchbox.core.Bulk;
+import io.searchbox.indices.CreateIndex;
+import io.searchbox.indices.DeleteIndex;
+import io.searchbox.indices.IndicesExists;
+import io.searchbox.indices.aliases.*;
+import io.searchbox.indices.mapping.GetMapping;
+import io.searchbox.indices.mapping.PutMapping;
+
+import io.searchbox.indices.settings.GetSettings;
+import org.apache.commons.lang3.StringUtils;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+import java.util.Map;
+import java.util.concurrent.TimeUnit;
+
+/**
+ * Created by xiongfeng.bxf on 17/2/8.
+ */
+public class ElasticSearchClient {
+ private static final Logger LOGGER = LoggerFactory.getLogger(ElasticSearchClient.class);
+
+ private JestClient jestClient;
+ private Configuration conf;
+
+ public JestClient getClient() {
+ return jestClient;
+ }
+
+ public ElasticSearchClient(Configuration conf) {
+ this.conf = conf;
+ String endpoint = Key.getEndpoint(conf);
+ //es是支持集群写入的
+ String[] endpoints = endpoint.split(",");
+ String user = Key.getUsername(conf);
+ String passwd = Key.getPassword(conf);
+ boolean multiThread = Key.isMultiThread(conf);
+ int readTimeout = Key.getTimeout(conf);
+ boolean compression = Key.isCompression(conf);
+ boolean discovery = Key.isDiscovery(conf);
+ String discoveryFilter = Key.getDiscoveryFilter(conf);
+ int totalConnection = this.conf.getInt("maxTotalConnection", 200);
+ JestClientFactory factory = new JestClientFactory();
+ Builder httpClientConfig = new HttpClientConfig
+ .Builder(Arrays.asList(endpoints))
+// .setPreemptiveAuth(new HttpHost(endpoint))
+ .multiThreaded(multiThread)
+ .connTimeout(readTimeout)
+ .readTimeout(readTimeout)
+ .maxTotalConnection(totalConnection)
+ .requestCompressionEnabled(compression)
+ .discoveryEnabled(discovery)
+ .discoveryFrequency(5L, TimeUnit.MINUTES)
+ .discoveryFilter(discoveryFilter);
+ if (!(StringUtils.isBlank(user) || StringUtils.isBlank(passwd))) {
+ // 匿名登录
+ httpClientConfig.defaultCredentials(user, passwd);
+ }
+ factory.setHttpClientConfig(httpClientConfig.build());
+ this.jestClient = factory.getObject();
+ }
+
+ public boolean indicesExists(String indexName) throws Exception {
+ boolean isIndicesExists = false;
+ JestResult rst = execute(new IndicesExists.Builder(indexName).build());
+ if (rst.isSucceeded()) {
+ isIndicesExists = true;
+ } else {
+ LOGGER.warn("IndicesExists got ResponseCode: {} ErrorMessage: {}", rst.getResponseCode(), rst.getErrorMessage());
+ switch (rst.getResponseCode()) {
+ case 404:
+ isIndicesExists = false;
+ break;
+ case 401:
+ // 无权访问
+ default:
+ LOGGER.warn(rst.getErrorMessage());
+ break;
+ }
+ }
+ return isIndicesExists;
+ }
+
+ public boolean deleteIndex(String indexName) throws Exception {
+ LOGGER.info("delete index {}", indexName);
+ if (indicesExists(indexName)) {
+ JestResult rst = execute(new DeleteIndex.Builder(indexName).build());
+ if (!rst.isSucceeded()) {
+ LOGGER.warn("DeleteIndex got ResponseCode: {}, ErrorMessage: {}", rst.getResponseCode(), rst.getErrorMessage());
+ return false;
+ } else {
+ LOGGER.info("delete index {} success", indexName);
+ }
+ } else {
+ LOGGER.info("index cannot found, skip delete index {}", indexName);
+ }
+ return true;
+ }
+
+ public boolean isGreaterOrEqualThan7() throws Exception {
+ try {
+ ClusterInfoResult result = execute(new ClusterInfo.Builder().build());
+ LOGGER.info("ClusterInfoResult: {}", result.getJsonString());
+ return result.isGreaterOrEqualThan7();
+ }catch(Exception e) {
+ LOGGER.warn(e.getMessage());
+ return false;
+ }
+ }
+
+ /**
+ * 获取索引的settings
+ * @param indexName 索引名
+ * @return 设置
+ */
+ public String getIndexSettings(String indexName) {
+ GetSettings.Builder builder = new GetSettings.Builder();
+ builder.addIndex(indexName);
+ GetSettings getSettings = builder.build();
+ try {
+ LOGGER.info("begin GetSettings for index: {}", indexName);
+ JestResult result = this.execute(getSettings);
+ return result.getJsonString();
+ } catch (Exception e) {
+ String message = "GetSettings for index error: " + e.getMessage();
+ LOGGER.warn(message, e);
+ throw DataXException.asDataXException(ElasticSearchWriterErrorCode.ES_GET_SETTINGS, e.getMessage(), e);
+ }
+ }
+
+ public boolean createIndexIfNotExists(String indexName, String typeName,
+ Object mappings, String settings,
+ boolean dynamic, boolean isGreaterOrEqualThan7) throws Exception {
+ JestResult rst;
+ if (!indicesExists(indexName)) {
+ LOGGER.info("create index {}", indexName);
+ rst = execute(
+ new CreateIndex.Builder(indexName)
+ .settings(settings)
+ .setParameter("master_timeout", Key.getMasterTimeout(this.conf))
+ .build()
+ );
+ //index_already_exists_exception
+ if (!rst.isSucceeded()) {
+ LOGGER.warn("CreateIndex got ResponseCode: {}, ErrorMessage: {}", rst.getResponseCode(), rst.getErrorMessage());
+ if (getStatus(rst) == 400) {
+ LOGGER.info(String.format("index {} already exists", indexName));
+ return true;
+ } else {
+ return false;
+ }
+ } else {
+ LOGGER.info("create {} index success", indexName);
+ }
+ }
+
+ if (dynamic) {
+ LOGGER.info("dynamic is true, ignore mappings");
+ return true;
+ }
+ LOGGER.info("create mappings for {} {}", indexName, mappings);
+ //如果大于7.x,mapping的PUT请求URI中不能带type,并且mapping设置中不能带有嵌套结构
+ if (isGreaterOrEqualThan7) {
+ rst = execute(new PutMapping7.Builder(indexName, mappings).
+ setParameter("master_timeout", Key.getMasterTimeout(this.conf)).build());
+ } else {
+ rst = execute(new PutMapping.Builder(indexName, typeName, mappings)
+ .setParameter("master_timeout", Key.getMasterTimeout(this.conf)).build());
+ }
+ if (!rst.isSucceeded()) {
+ LOGGER.error("PutMapping got ResponseCode: {}, ErrorMessage: {}", rst.getResponseCode(), rst.getErrorMessage());
+ return false;
+ } else {
+ LOGGER.info("index {} put mappings success", indexName);
+ }
+ return true;
+ }
+
+ public T execute(Action clientRequest) throws IOException {
+ T rst = jestClient.execute(clientRequest);
+ if (!rst.isSucceeded()) {
+ LOGGER.warn(rst.getJsonString());
+ }
+ return rst;
+ }
+
+ public Integer getStatus(JestResult rst) {
+ JsonObject jsonObject = rst.getJsonObject();
+ if (jsonObject.has("status")) {
+ return jsonObject.get("status").getAsInt();
+ }
+ return 600;
+ }
+
+ public boolean isBulkResult(JestResult rst) {
+ JsonObject jsonObject = rst.getJsonObject();
+ return jsonObject.has("items");
+ }
+
+
+ public boolean alias(String indexname, String aliasname, boolean needClean) throws IOException {
+ GetAliases getAliases = new GetAliases.Builder().addIndex(aliasname).build();
+ AliasMapping addAliasMapping = new AddAliasMapping.Builder(indexname, aliasname).build();
+ JestResult rst = null;
+ List list = new ArrayList();
+ if (needClean) {
+ rst = execute(getAliases);
+ if (rst.isSucceeded()) {
+ JsonParser jp = new JsonParser();
+ JsonObject jo = (JsonObject) jp.parse(rst.getJsonString());
+ for (Map.Entry entry : jo.entrySet()) {
+ String tindex = entry.getKey();
+ if (indexname.equals(tindex)) {
+ continue;
+ }
+ AliasMapping m = new RemoveAliasMapping.Builder(tindex, aliasname).build();
+ String s = new Gson().toJson(m.getData());
+ LOGGER.info(s);
+ list.add(m);
+ }
+ }
+ }
+
+ ModifyAliases modifyAliases = new ModifyAliases.Builder(addAliasMapping).addAlias(list).setParameter("master_timeout", Key.getMasterTimeout(this.conf)).build();
+ rst = execute(modifyAliases);
+ if (!rst.isSucceeded()) {
+ LOGGER.error(rst.getErrorMessage());
+ throw new IOException(rst.getErrorMessage());
+ }
+ return true;
+ }
+
+ /**
+ * 获取index的mapping
+ */
+ public String getIndexMapping(String indexName) {
+ GetMapping.Builder builder = new GetMapping.Builder();
+ builder.addIndex(indexName);
+ GetMapping getMapping = builder.build();
+ try {
+ LOGGER.info("begin GetMapping for index: {}", indexName);
+ JestResult result = this.execute(getMapping);
+ return result.getJsonString();
+ } catch (Exception e) {
+ String message = "GetMapping for index error: " + e.getMessage();
+ LOGGER.warn(message, e);
+ throw DataXException.asDataXException(ElasticSearchWriterErrorCode.ES_MAPPINGS, e.getMessage(), e);
+ }
+ }
+
+ public String getMappingForIndexType(String indexName, String typeName) {
+ String indexMapping = this.getIndexMapping(indexName);
+ JSONObject indexMappingInJson = JSON.parseObject(indexMapping);
+ List paths = Arrays.asList(indexName, "mappings");
+ JSONObject properties = JsonPathUtil.getJsonObject(paths, indexMappingInJson);
+ JSONObject propertiesParent = properties;
+ if (StringUtils.isNotBlank(typeName) && properties.containsKey(typeName)) {
+ propertiesParent = (JSONObject) properties.get(typeName);
+ }
+ JSONObject mapping = (JSONObject) propertiesParent.get("properties");
+ return JSON.toJSONString(mapping);
+ }
+
+ public JestResult bulkInsert(Bulk.Builder bulk) throws Exception {
+ // es_rejected_execution_exception
+ // illegal_argument_exception
+ // cluster_block_exception
+ JestResult rst = null;
+ rst = execute(bulk.build());
+ if (!rst.isSucceeded()) {
+ LOGGER.warn(rst.getErrorMessage());
+ }
+ return rst;
+ }
+
+ /**
+ * 关闭JestClient客户端
+ *
+ */
+ public void closeJestClient() {
+ if (jestClient != null) {
+ try {
+ // jestClient.shutdownClient();
+ jestClient.close();
+ } catch (IOException e) {
+ LOGGER.warn("ignore error: ", e.getMessage());
+ }
+
+ }
+ }
+}
diff --git a/elasticsearchwriter/src/main/java/com/alibaba/datax/plugin/writer/elasticsearchwriter/ElasticSearchColumn.java b/elasticsearchwriter/src/main/java/com/alibaba/datax/plugin/writer/elasticsearchwriter/ElasticSearchColumn.java
new file mode 100644
index 00000000..a27b15b2
--- /dev/null
+++ b/elasticsearchwriter/src/main/java/com/alibaba/datax/plugin/writer/elasticsearchwriter/ElasticSearchColumn.java
@@ -0,0 +1,126 @@
+package com.alibaba.datax.plugin.writer.elasticsearchwriter;
+
+import java.util.List;
+
+/**
+ * Created by xiongfeng.bxf on 17/3/2.
+ */
+public class ElasticSearchColumn {
+
+ private String name;//: "appkey",
+
+ private String type;//": "TEXT",
+
+ private String timezone;
+
+ /**
+ * 源头数据格式化处理,datax做的事情
+ */
+ private String format;
+
+ /**
+ * 目标端格式化,es原生支持的格式
+ */
+ private String dstFormat;
+
+ private boolean array;
+
+ /**
+ * 是否使用目标端(ES原生)数组类型
+ *
+ * 默认是false
+ */
+ private boolean dstArray = false;
+
+ private boolean jsonArray;
+
+ private boolean origin;
+
+ private List combineFields;
+
+ private String combineFieldsValueSeparator = "-";
+
+ public String getCombineFieldsValueSeparator() {
+ return combineFieldsValueSeparator;
+ }
+
+ public void setCombineFieldsValueSeparator(String combineFieldsValueSeparator) {
+ this.combineFieldsValueSeparator = combineFieldsValueSeparator;
+ }
+
+ public List getCombineFields() {
+ return combineFields;
+ }
+
+ public void setCombineFields(List combineFields) {
+ this.combineFields = combineFields;
+ }
+
+ public void setName(String name) {
+ this.name = name;
+ }
+
+ public void setType(String type) {
+ this.type = type;
+ }
+
+ public void setTimeZone(String timezone) {
+ this.timezone = timezone;
+ }
+
+ public void setFormat(String format) {
+ this.format = format;
+ }
+
+ public String getName() {
+ return name;
+ }
+
+ public String getType() {
+ return type;
+ }
+
+ public boolean isOrigin() { return origin; }
+
+ public void setOrigin(boolean origin) { this.origin = origin; }
+
+ public String getTimezone() {
+ return timezone;
+ }
+
+ public String getFormat() {
+ return format;
+ }
+
+ public void setTimezone(String timezone) {
+ this.timezone = timezone;
+ }
+
+ public boolean isArray() {
+ return array;
+ }
+
+ public void setArray(boolean array) {
+ this.array = array;
+ }
+
+ public boolean isJsonArray() {return jsonArray;}
+
+ public void setJsonArray(boolean jsonArray) {this.jsonArray = jsonArray;}
+
+ public String getDstFormat() {
+ return dstFormat;
+ }
+
+ public void setDstFormat(String dstFormat) {
+ this.dstFormat = dstFormat;
+ }
+
+ public boolean isDstArray() {
+ return dstArray;
+ }
+
+ public void setDstArray(boolean dstArray) {
+ this.dstArray = dstArray;
+ }
+}
diff --git a/elasticsearchwriter/src/main/java/com/alibaba/datax/plugin/writer/elasticsearchwriter/ESFieldType.java b/elasticsearchwriter/src/main/java/com/alibaba/datax/plugin/writer/elasticsearchwriter/ElasticSearchFieldType.java
similarity index 73%
rename from elasticsearchwriter/src/main/java/com/alibaba/datax/plugin/writer/elasticsearchwriter/ESFieldType.java
rename to elasticsearchwriter/src/main/java/com/alibaba/datax/plugin/writer/elasticsearchwriter/ElasticSearchFieldType.java
index 14b09689..22c3ee6b 100644
--- a/elasticsearchwriter/src/main/java/com/alibaba/datax/plugin/writer/elasticsearchwriter/ESFieldType.java
+++ b/elasticsearchwriter/src/main/java/com/alibaba/datax/plugin/writer/elasticsearchwriter/ElasticSearchFieldType.java
@@ -3,8 +3,11 @@ package com.alibaba.datax.plugin.writer.elasticsearchwriter;
/**
* Created by xiongfeng.bxf on 17/3/1.
*/
-public enum ESFieldType {
+public enum ElasticSearchFieldType {
ID,
+ PARENT,
+ ROUTING,
+ VERSION,
STRING,
TEXT,
KEYWORD,
@@ -24,20 +27,18 @@ public enum ESFieldType {
DATE_RANGE,
GEO_POINT,
GEO_SHAPE,
-
IP,
+ IP_RANGE,
COMPLETION,
TOKEN_COUNT,
-
- ARRAY,
OBJECT,
NESTED;
- public static ESFieldType getESFieldType(String type) {
+ public static ElasticSearchFieldType getESFieldType(String type) {
if (type == null) {
return null;
}
- for (ESFieldType f : ESFieldType.values()) {
+ for (ElasticSearchFieldType f : ElasticSearchFieldType.values()) {
if (f.name().compareTo(type.toUpperCase()) == 0) {
return f;
}
diff --git a/elasticsearchwriter/src/main/java/com/alibaba/datax/plugin/writer/elasticsearchwriter/ElasticSearchWriter.java b/elasticsearchwriter/src/main/java/com/alibaba/datax/plugin/writer/elasticsearchwriter/ElasticSearchWriter.java
new file mode 100644
index 00000000..2c8ed2d0
--- /dev/null
+++ b/elasticsearchwriter/src/main/java/com/alibaba/datax/plugin/writer/elasticsearchwriter/ElasticSearchWriter.java
@@ -0,0 +1,1117 @@
+package com.alibaba.datax.plugin.writer.elasticsearchwriter;
+
+import com.alibaba.datax.common.element.Column;
+import com.alibaba.datax.common.element.Record;
+import com.alibaba.datax.common.exception.DataXException;
+import com.alibaba.datax.common.plugin.RecordReceiver;
+import com.alibaba.datax.common.spi.Writer;
+import com.alibaba.datax.common.util.Configuration;
+import com.alibaba.datax.common.util.DataXCaseEnvUtil;
+import com.alibaba.datax.common.util.RetryUtil;
+import com.alibaba.datax.plugin.writer.elasticsearchwriter.Key.ActionType;
+import com.alibaba.fastjson2.JSON;
+import com.alibaba.fastjson2.JSONArray;
+import com.alibaba.fastjson2.JSONObject;
+import com.alibaba.fastjson2.TypeReference;
+import com.alibaba.fastjson2.JSONWriter;
+import com.google.common.base.Joiner;
+import io.searchbox.client.JestResult;
+import io.searchbox.core.*;
+import io.searchbox.params.Parameters;
+import org.apache.commons.lang3.StringUtils;
+import org.joda.time.DateTime;
+import org.joda.time.DateTimeZone;
+import org.joda.time.format.DateTimeFormat;
+import org.joda.time.format.DateTimeFormatter;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.IOException;
+import java.util.*;
+import java.util.concurrent.Callable;
+
+public class ElasticSearchWriter extends Writer {
+ private final static String WRITE_COLUMNS = "write_columns";
+
+ public static class Job extends Writer.Job {
+ private static final Logger LOGGER = LoggerFactory.getLogger(Job.class);
+
+ private Configuration conf = null;
+ int retryTimes = 3;
+ long sleepTimeInMilliSecond = 10000L;
+
+ private String settingsCache;
+
+ private void setSettings(String settings) {
+ this.settingsCache = JsonUtil.mergeJsonStr(settings, this.settingsCache);
+ }
+
+ @Override
+ public void init() {
+ this.conf = super.getPluginJobConf();
+ //LOGGER.info("conf:{}", conf);
+ this.retryTimes = this.conf.getInt("retryTimes", 3);
+ this.sleepTimeInMilliSecond = this.conf.getLong("sleepTimeInMilliSecond", 10000L);
+ }
+
+ public List getIncludeSettings() {
+ return this.conf.getList("includeSettingKeys", Arrays.asList("number_of_shards", "number_of_replicas"), String.class);
+ }
+
+ /**
+ * 从es中获取的原始settings转为需要的settings
+ * @param originSettings 原始settings
+ * @return settings
+ */
+ private String convertSettings(String originSettings) {
+ if(StringUtils.isBlank(originSettings)) {
+ return null;
+ }
+ JSONObject jsonObject = JSON.parseObject(originSettings);
+ for(String key : jsonObject.keySet()) {
+ JSONObject settingsObj = jsonObject.getJSONObject(key);
+ if(settingsObj != null) {
+ JSONObject indexObj = settingsObj.getJSONObject("settings");
+ JSONObject settings = indexObj.getJSONObject("index");
+ JSONObject filterSettings = new JSONObject();
+ if(settings != null) {
+ List includeSettings = getIncludeSettings();
+ if(includeSettings != null && includeSettings.size() > 0) {
+ for(String includeSetting : includeSettings) {
+ Object fieldValue = settings.get(includeSetting);
+ if(fieldValue != null) {
+ filterSettings.put(includeSetting, fieldValue);
+ }
+ }
+ return filterSettings.toJSONString();
+ }
+ }
+ }
+ }
+ return null;
+ }
+
+ @Override
+ public void prepare() {
+ /**
+ * 注意:此方法仅执行一次。
+ * 最佳实践:如果 Job 中有需要进行数据同步之前的处理,可以在此处完成,如果没有必要则可以直接去掉。
+ * 对于7.x之后的es版本,取消了index设置type的逻辑,因此在prepare阶段,加入了判断是否为7.x及以上版本
+ * 如果是7.x及以上版本,需要对于index的type做不同的处理
+ * 详见 : https://www.elastic.co/guide/en/elasticsearch/reference/6.8/removal-of-types.html
+ */
+ final ElasticSearchClient esClient = new ElasticSearchClient(this.conf);
+ final String indexName = Key.getIndexName(conf);
+ ActionType actionType = Key.getActionType(conf);
+ final String typeName = Key.getTypeName(conf);
+ final boolean dynamic = Key.getDynamic(conf);
+ final String dstDynamic = Key.getDstDynamic(conf);
+ final String newSettings = JSONObject.toJSONString(Key.getSettings(conf));
+ LOGGER.info("conf settings:{}, settingsCache:{}", newSettings, this.settingsCache);
+ final Integer esVersion = Key.getESVersion(conf);
+ boolean hasId = this.hasID();
+ this.conf.set("hasId", hasId);
+ if (ActionType.UPDATE.equals(actionType) && !hasId && !hasPrimaryKeyInfo()) {
+ throw DataXException.asDataXException(ElasticSearchWriterErrorCode.UPDATE_WITH_ID, "Update mode must specify column type with id or primaryKeyInfo config");
+ }
+
+ try {
+ RetryUtil.executeWithRetry(() -> {
+ boolean isGreaterOrEqualThan7 = esClient.isGreaterOrEqualThan7();
+ if (esVersion != null && esVersion >= 7) {
+ isGreaterOrEqualThan7 = true;
+ }
+ String mappings = genMappings(dstDynamic, typeName, isGreaterOrEqualThan7);
+ conf.set("isGreaterOrEqualThan7", isGreaterOrEqualThan7);
+
+
+ LOGGER.info(String.format("index:[%s], type:[%s], mappings:[%s]", indexName, typeName, mappings));
+ boolean isIndicesExists = esClient.indicesExists(indexName);
+ if (isIndicesExists) {
+ try {
+ // 将原有的mapping打印出来,便于排查问题
+ String oldMappings = esClient.getMappingForIndexType(indexName, typeName);
+ LOGGER.info("the mappings for old index is: {}", oldMappings);
+ } catch (Exception e) {
+ LOGGER.warn("warn message: {}", e.getMessage());
+ }
+ }
+
+ if (Key.isTruncate(conf) && isIndicesExists) {
+ // 备份老的索引中的settings到缓存
+ try {
+ String oldOriginSettings = esClient.getIndexSettings(indexName);
+ if (StringUtils.isNotBlank(oldOriginSettings)) {
+ String includeSettings = convertSettings(oldOriginSettings);
+ LOGGER.info("merge1 settings:{}, settingsCache:{}, includeSettings:{}",
+ oldOriginSettings,
+ this.settingsCache, includeSettings);
+ this.setSettings(includeSettings);
+ }
+ } catch (Exception e) {
+ LOGGER.warn("get old settings fail, indexName:{}", indexName);
+ }
+ esClient.deleteIndex(indexName);
+ }
+
+ // 更新缓存中的settings
+ this.setSettings(newSettings);
+ LOGGER.info("merge2 settings:{}, settingsCache:{}", newSettings, this.settingsCache);
+ // 强制创建,内部自动忽略已存在的情况
+ if (!esClient.createIndexIfNotExists(indexName, typeName, mappings, this.settingsCache, dynamic,
+ isGreaterOrEqualThan7)) {
+ throw DataXException.asDataXException(ElasticSearchWriterErrorCode.ES_MAPPINGS, "");
+ }
+
+ return true;
+ }, DataXCaseEnvUtil.getRetryTimes(this.retryTimes), DataXCaseEnvUtil.getRetryInterval(this.sleepTimeInMilliSecond), DataXCaseEnvUtil.getRetryExponential(false));
+ } catch (Exception ex) {
+ throw DataXException.asDataXException(ElasticSearchWriterErrorCode.ES_MAPPINGS, ex.getMessage(), ex);
+ } finally {
+ try {
+ esClient.closeJestClient();
+ } catch (Exception e) {
+ LOGGER.warn("ignore close jest client error: {}", e.getMessage());
+ }
+ }
+ }
+
+ private boolean hasID() {
+ List column = conf.getList("column");
+ if (column != null) {
+ for (Object col : column) {
+ JSONObject jo = JSONObject.parseObject(col.toString());
+ String colTypeStr = jo.getString("type");
+ ElasticSearchFieldType colType = ElasticSearchFieldType.getESFieldType(colTypeStr);
+ if (ElasticSearchFieldType.ID.equals(colType)) {
+ return true;
+ }
+ }
+ }
+ return false;
+ }
+
+ private boolean hasPrimaryKeyInfo() {
+ PrimaryKeyInfo primaryKeyInfo = Key.getPrimaryKeyInfo(this.conf);
+ if (null != primaryKeyInfo && null != primaryKeyInfo.getColumn() && !primaryKeyInfo.getColumn().isEmpty()) {
+ return true;
+ } else {
+ return false;
+ }
+ }
+
+
+ private String genMappings(String dstDynamic, String typeName, boolean isGreaterOrEqualThan7) {
+ String mappings;
+ Map propMap = new HashMap();
+ List columnList = new ArrayList();
+ ElasticSearchColumn combineItem = null;
+
+ List column = conf.getList("column");
+ if (column != null) {
+ for (Object col : column) {
+ JSONObject jo = JSONObject.parseObject(col.toString());
+ String colName = jo.getString("name");
+ String colTypeStr = jo.getString("type");
+ if (colTypeStr == null) {
+ throw DataXException.asDataXException(ElasticSearchWriterErrorCode.BAD_CONFIG_VALUE, col.toString() + " column must have type");
+ }
+ ElasticSearchFieldType colType = ElasticSearchFieldType.getESFieldType(colTypeStr);
+ if (colType == null) {
+ throw DataXException.asDataXException(ElasticSearchWriterErrorCode.BAD_CONFIG_VALUE, col.toString() + " unsupported type");
+ }
+
+ ElasticSearchColumn columnItem = new ElasticSearchColumn();
+
+ if (Key.PRIMARY_KEY_COLUMN_NAME.equals(colName)) {
+ // 兼容已有版本
+ colType = ElasticSearchFieldType.ID;
+ colTypeStr = "id";
+ }
+
+ columnItem.setName(colName);
+ columnItem.setType(colTypeStr);
+
+ JSONArray combineFields = jo.getJSONArray("combineFields");
+ if (combineFields != null && !combineFields.isEmpty() && ElasticSearchFieldType.ID.equals(ElasticSearchFieldType.getESFieldType(colTypeStr))) {
+ List fields = new ArrayList();
+ for (Object item : combineFields) {
+ fields.add((String) item);
+ }
+ columnItem.setCombineFields(fields);
+ combineItem = columnItem;
+ }
+
+ String combineFieldsValueSeparator = jo.getString("combineFieldsValueSeparator");
+ if (StringUtils.isNotBlank(combineFieldsValueSeparator)) {
+ columnItem.setCombineFieldsValueSeparator(combineFieldsValueSeparator);
+ }
+
+ // 如果是id,version,routing,不需要创建mapping
+ if (colType == ElasticSearchFieldType.ID || colType == ElasticSearchFieldType.VERSION || colType == ElasticSearchFieldType.ROUTING) {
+ columnList.add(columnItem);
+ continue;
+ }
+
+ // 如果是组合id中的字段,不需要创建mapping
+ // 所以组合id的定义必须要在columns最前面
+ if (combineItem != null && combineItem.getCombineFields().contains(colName)) {
+ columnList.add(columnItem);
+ continue;
+ }
+ columnItem.setDstArray(false);
+ Boolean array = jo.getBoolean("array");
+ if (array != null) {
+ columnItem.setArray(array);
+ Boolean dstArray = jo.getBoolean("dstArray");
+ if(dstArray!=null) {
+ columnItem.setDstArray(dstArray);
+ }
+ } else {
+ columnItem.setArray(false);
+ }
+ Boolean jsonArray = jo.getBoolean("json_array");
+ if (jsonArray != null) {
+ columnItem.setJsonArray(jsonArray);
+ } else {
+ columnItem.setJsonArray(false);
+ }
+ Map field = new HashMap();
+ field.put("type", colTypeStr);
+ //https://www.elastic.co/guide/en/elasticsearch/reference/5.2/breaking_50_mapping_changes.html#_literal_index_literal_property
+ // https://www.elastic.co/guide/en/elasticsearch/guide/2.x/_deep_dive_on_doc_values.html#_disabling_doc_values
+ field.put("doc_values", jo.getBoolean("doc_values"));
+ field.put("ignore_above", jo.getInteger("ignore_above"));
+ field.put("index", jo.getBoolean("index"));
+ switch (colType) {
+ case STRING:
+ // 兼容string类型,ES5之前版本
+ break;
+ case KEYWORD:
+ // https://www.elastic.co/guide/en/elasticsearch/reference/current/tune-for-search-speed.html#_warm_up_global_ordinals
+ field.put("eager_global_ordinals", jo.getBoolean("eager_global_ordinals"));
+ break;
+ case TEXT:
+ field.put("analyzer", jo.getString("analyzer"));
+ // 优化disk使用,也同步会提高index性能
+ // https://www.elastic.co/guide/en/elasticsearch/reference/current/tune-for-disk-usage.html
+ field.put("norms", jo.getBoolean("norms"));
+ field.put("index_options", jo.getBoolean("index_options"));
+ if(jo.getString("fields") != null) {
+ field.put("fields", jo.getJSONObject("fields"));
+ }
+ break;
+ case DATE:
+ if (Boolean.TRUE.equals(jo.getBoolean("origin"))) {
+ if (jo.getString("format") != null) {
+ field.put("format", jo.getString("format"));
+ }
+ // es原生format覆盖原先来的format
+ if (jo.getString("dstFormat") != null) {
+ field.put("format", jo.getString("dstFormat"));
+ }
+ if(jo.getBoolean("origin") != null) {
+ columnItem.setOrigin(jo.getBoolean("origin"));
+ }
+ } else {
+ columnItem.setTimeZone(jo.getString("timezone"));
+ columnItem.setFormat(jo.getString("format"));
+ }
+ break;
+ case GEO_SHAPE:
+ field.put("tree", jo.getString("tree"));
+ field.put("precision", jo.getString("precision"));
+ break;
+ case OBJECT:
+ case NESTED:
+ if (jo.getString("dynamic") != null) {
+ field.put("dynamic", jo.getString("dynamic"));
+ }
+ break;
+ default:
+ break;
+ }
+ if (jo.containsKey("other_params")) {
+ field.putAll(jo.getJSONObject("other_params"));
+ }
+ propMap.put(colName, field);
+ columnList.add(columnItem);
+ }
+ }
+
+ long version = System.currentTimeMillis();
+ LOGGER.info("unified version: {}", version);
+ conf.set("version", version);
+ conf.set(WRITE_COLUMNS, JSON.toJSONString(columnList));
+
+ LOGGER.info(JSON.toJSONString(columnList));
+
+ Map rootMappings = new HashMap();
+ Map typeMappings = new HashMap();
+ typeMappings.put("properties", propMap);
+ rootMappings.put(typeName, typeMappings);
+
+ // 7.x以后版本取消了index中关于type的指定,所以mapping的格式只能支持
+ // {
+ // "properties" : {
+ // "abc" : {
+ // "type" : "text"
+ // }
+ // }
+ // }
+ // properties 外不能再嵌套typeName
+
+ if(StringUtils.isNotBlank(dstDynamic)) {
+ typeMappings.put("dynamic", dstDynamic);
+ }
+ if (isGreaterOrEqualThan7) {
+ mappings = JSON.toJSONString(typeMappings);
+ } else {
+ mappings = JSON.toJSONString(rootMappings);
+ }
+ if (StringUtils.isBlank(mappings)) {
+ throw DataXException.asDataXException(ElasticSearchWriterErrorCode.BAD_CONFIG_VALUE, "must have mappings");
+ }
+
+ return mappings;
+ }
+
+ @Override
+ public List split(int mandatoryNumber) {
+ List configurations = new ArrayList(mandatoryNumber);
+ for (int i = 0; i < mandatoryNumber; i++) {
+ configurations.add(this.conf.clone());
+ }
+ return configurations;
+ }
+
+ @Override
+ public void post() {
+ ElasticSearchClient esClient = new ElasticSearchClient(this.conf);
+ String alias = Key.getAlias(conf);
+ if (!"".equals(alias)) {
+ LOGGER.info(String.format("alias [%s] to [%s]", alias, Key.getIndexName(conf)));
+ try {
+ esClient.alias(Key.getIndexName(conf), alias, Key.isNeedCleanAlias(conf));
+ } catch (IOException e) {
+ throw DataXException.asDataXException(ElasticSearchWriterErrorCode.ES_ALIAS_MODIFY, e);
+ }
+ }
+ }
+
+ @Override
+ public void destroy() {
+
+ }
+ }
+
+ public static class Task extends Writer.Task {
+
+ private static final Logger LOGGER = LoggerFactory.getLogger(Job.class);
+
+ private Configuration conf;
+
+
+ ElasticSearchClient esClient = null;
+ private List typeList;
+ private List columnList;
+ private List