From 5e54aee590f7ad5970b28c011d8c0e34d00c09ab Mon Sep 17 00:00:00 2001 From: zyyang Date: Tue, 19 Oct 2021 15:40:11 +0800 Subject: [PATCH] tdenginewriter support batch write --- core/src/main/job/opentsdb2tdengine.json | 3 +- tdenginewriter/doc/tdenginewriter.md | 151 ++++++++++++++++++ .../datax/plugin/writer/JniConnection.java | 5 - .../com/alibaba/datax/plugin/writer/Key.java | 11 ++ .../datax/plugin/writer/TDengineWriter.java | 77 ++++----- 5 files changed, 205 insertions(+), 42 deletions(-) create mode 100644 tdenginewriter/doc/tdenginewriter.md create mode 100644 tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/Key.java diff --git a/core/src/main/job/opentsdb2tdengine.json b/core/src/main/job/opentsdb2tdengine.json index dd332202..377b98c9 100644 --- a/core/src/main/job/opentsdb2tdengine.json +++ b/core/src/main/job/opentsdb2tdengine.json @@ -20,7 +20,8 @@ "port": 6030, "dbname": "test", "user": "root", - "password": "taosdata" + "password": "taosdata", + "batchSize": 1000 } } } diff --git a/tdenginewriter/doc/tdenginewriter.md b/tdenginewriter/doc/tdenginewriter.md new file mode 100644 index 00000000..8e55b189 --- /dev/null +++ b/tdenginewriter/doc/tdenginewriter.md @@ -0,0 +1,151 @@ +# DataX TDengineWriter + +## 1 快速介绍 + +TDengineWriter 插件实现了写入数据到 TDengine 的功能。 在底层实现上, TDengineWriter 通过 JNI的方式调用libtaos.so/tao.dll中的方法,连接 TDengine +数据库实例,并执行schemaless的写入。 TDengineWriter 面向ETL开发工程师,他们使用 TDengineWriter 从数仓导入数据到 TDengine。同时,TDengineWriter +亦可以作为数据迁移工具为DBA等用户提供服务。 + +## 2 实现原理 + +TDengineWriter 通过 DataX 框架获取 Reader +生成的协议数据,根据reader的类型解析数据,通过JNI方式调用libtaos.so(或taos.dll)中的方法,使用schemaless的方式写入到TDengine。 + +## 3 功能说明 + +### 3.1 配置样例 + +* 这里使用一份从OpenTSDB产生到 TDengine 导入的数据。 + +```json +{ + "job": { + "content": [ + { + "reader": { + "name": "opentsdbreader", + "parameter": { + "endpoint": "http://192.168.1.180:4242", + "column": [ + "weather_temperature" + ], + "beginDateTime": "2021-01-01 00:00:00", + "endDateTime": "2021-01-01 01:00:00" + } + }, + "writer": { + "name": "tdenginewriter", + "parameter": { + "host": "192.168.1.180", + "port": 6030, + "dbname": "test", + "user": "root", + "password": "taosdata" + } + } + } + ], + "setting": { + "speed": { + "channel": 1 + } + } + } +} +``` + +### 3.2 参数说明 + +* **host** + * 描述:TDengine实例的host。 + + * 必选:是
+ + * 默认值:无
+* **port** + * 描述:TDengine实例的port。 + * 必选:是
+ * 默认值:无
+* **dbname** + * 描述:目的数据库的名称。 + + * 必选:是
+ + * 默认值:无
+* **username** + * 描述:TDengine实例的用户名
+ * 必选:是
+ * 默认值:无
+* **password** + * 描述:TDengine实例的密码
+ * 必选:是
+ * 默认值:无
+ +### 3.3 类型转换 + +目前,由于opentsdbreader将opentsdb的数据统一读取为json字符串,TDengineWriter 在做Opentsdb到TDengine的迁移时,按照以下类型进行处理: + +| OpenTSDB数据类型 | DataX 内部类型| TDengine 数据类型 | +| -------- | ----- | -------- | +| timestamp | Date | timestamp | +| Integer(value) | Double | double | +| Float(value) | Double | double | +| String(value) | String | binary | +| Integer(tag) | String | binary | +| Float(tag) | String |binary | +| String(tag) | String |binary | + +## 4 性能报告 + +### 4.1 环境准备 + +#### 4.1.1 数据特征 + +建表语句: + +单行记录类似于: + +#### 4.1.2 机器参数 + +* 执行DataX的机器参数为: + 1. cpu: + 2. mem: + 3. net: 千兆双网卡 + 4. disc: DataX 数据不落磁盘,不统计此项 + +* TDengine数据库机器参数为: + 1. cpu: + 2. mem: + 3. net: 千兆双网卡 + 4. disc: + +#### 4.1.3 DataX jvm 参数 + + -Xms1024m -Xmx1024m -XX:+HeapDumpOnOutOfMemoryError + +### 4.2 测试报告 + +#### 4.2.1 单表测试报告 + +| 通道数| DataX速度(Rec/s)|DataX流量(MB/s)| DataX机器网卡流出流量(MB/s)|DataX机器运行负载|DB网卡进入流量(MB/s)|DB运行负载|DB TPS| +|--------| --------|--------|--------|--------|--------|--------|--------| +|1| | | | | | | | +|4| | | | | | | | +|8| | | | | | | | +|16| | | | | | | | +|32| | | | | | | | + +说明: + +1. 这里的单表,主键类型为 bigint(20),自增。 +2. batchSize 和 通道个数,对性能影响较大。 +3. 16通道,4096批量提交时,出现 full gc 2次。 + +#### 4.2.4 性能测试小结 + +1. +2. + +## 5 约束限制 + +## FAQ \ No newline at end of file diff --git a/tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/JniConnection.java b/tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/JniConnection.java index a8739c54..b1670633 100644 --- a/tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/JniConnection.java +++ b/tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/JniConnection.java @@ -18,11 +18,6 @@ public class JniConnection { } public JniConnection(Properties props) throws Exception { - if (this.conn != JNI_NULL_POINTER) { - close(); - this.conn = JNI_NULL_POINTER; - } - initImp(props.getProperty(PROPERTY_KEY_CONFIG_DIR, null)); String locale = props.getProperty(PROPERTY_KEY_LOCALE); diff --git a/tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/Key.java b/tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/Key.java new file mode 100644 index 00000000..b240bce4 --- /dev/null +++ b/tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/Key.java @@ -0,0 +1,11 @@ +package com.alibaba.datax.plugin.writer; + +public class Key { + public static final String HOST = "host"; + public static final String PORT = "port"; + public static final String DBNAME = "dbname"; + public static final String USER = "user"; + public static final String PASSWORD = "password"; + public static final String BATCH_SIZE = "batchSize"; + +} diff --git a/tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/TDengineWriter.java b/tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/TDengineWriter.java index 28fa4ca5..60c76522 100644 --- a/tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/TDengineWriter.java +++ b/tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/TDengineWriter.java @@ -10,19 +10,12 @@ import com.alibaba.datax.common.util.Configuration; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import java.util.ArrayList; -import java.util.List; -import java.util.Optional; -import java.util.Properties; +import java.util.*; public class TDengineWriter extends Writer { - private static final String HOST = "host"; - private static final String PORT = "port"; - private static final String DBNAME = "dbname"; - private static final String USER = "user"; - private static final String PASSWORD = "password"; private static final String PEER_PLUGIN_NAME = "peerPluginName"; + private static final String DEFAULT_BATCH_SIZE = "1"; public static class Job extends Writer.Job { @@ -45,7 +38,6 @@ public class TDengineWriter extends Writer { for (int i = 0; i < mandatoryNumber; i++) { writerSplitConfigs.add(this.originalConfig); } - return writerSplitConfigs; } } @@ -68,53 +60,66 @@ public class TDengineWriter extends Writer { @Override public void startWrite(RecordReceiver lineReceiver) { - - - String host = this.writerSliceConfig.getString(HOST); - int port = this.writerSliceConfig.getInt(PORT); - String dbname = this.writerSliceConfig.getString(DBNAME); - String user = this.writerSliceConfig.getString(USER); - String password = this.writerSliceConfig.getString(PASSWORD); - + Set keys = this.writerSliceConfig.getKeys(); Properties properties = new Properties(); - String cfgdir = this.writerSliceConfig.getString(JniConnection.PROPERTY_KEY_CONFIG_DIR); - if (cfgdir != null && !cfgdir.isEmpty()) { - properties.setProperty(JniConnection.PROPERTY_KEY_CONFIG_DIR, cfgdir); - } - String timezone = this.writerSliceConfig.getString(JniConnection.PROPERTY_KEY_TIME_ZONE); - if (timezone != null && !timezone.isEmpty()) { - properties.setProperty(JniConnection.PROPERTY_KEY_TIME_ZONE, timezone); - } - String locale = this.writerSliceConfig.getString(JniConnection.PROPERTY_KEY_LOCALE); - if (locale != null && !locale.isEmpty()) { - properties.setProperty(JniConnection.PROPERTY_KEY_LOCALE, locale); - } - String charset = this.writerSliceConfig.getString(JniConnection.PROPERTY_KEY_CHARSET); - if (charset != null && !charset.isEmpty()) { - properties.setProperty(JniConnection.PROPERTY_KEY_CHARSET, charset); + for (String key : keys) { + String value = this.writerSliceConfig.getString(key); + properties.setProperty(key, value); } String peerPluginName = this.writerSliceConfig.getString(PEER_PLUGIN_NAME); if (peerPluginName.equals("opentsdbreader")) { + // opentsdb json protocol use JNI and schemaless API to write + + String host = properties.getProperty(Key.HOST); + int port = Integer.parseInt(properties.getProperty(Key.PORT)); + String dbname = properties.getProperty(Key.DBNAME); + String user = properties.getProperty(Key.USER); + String password = properties.getProperty(Key.PASSWORD); + try { JniConnection conn = new JniConnection(properties); conn.open(host, port, dbname, user, password); LOG.info("TDengine connection established, host: " + host + ", port: " + port + ", dbname: " + dbname + ", user: " + user); - writeOpentsdb(lineReceiver, conn); + + int batchSize = Integer.parseInt(properties.getProperty(Key.BATCH_SIZE, DEFAULT_BATCH_SIZE)); + writeOpentsdb(lineReceiver, conn, batchSize); conn.close(); LOG.info("TDengine connection closed"); } catch (Exception e) { LOG.error(e.getMessage()); e.printStackTrace(); } + } else { + // other } } - private void writeOpentsdb(RecordReceiver lineReceiver, JniConnection conn) { + private void writeOpentsdb(RecordReceiver lineReceiver, JniConnection conn, int batchSize) { try { Record record; + StringBuilder sb = new StringBuilder(); + long recordIndex = 1; while ((record = lineReceiver.getFromReader()) != null) { - String jsonData = recordToString(record); + if (batchSize == 1) { + String jsonData = recordToString(record); + LOG.debug(">>> " + jsonData); + conn.insertOpentsdbJson(jsonData); + } else if (recordIndex % batchSize == 1) { + sb.append("[").append(recordToString(record)).append(","); + } else if (recordIndex % batchSize == 0) { + sb.append(recordToString(record)).append("]"); + String jsonData = sb.toString(); + LOG.debug(">>> " + jsonData); + conn.insertOpentsdbJson(jsonData); + sb.delete(0, sb.length()); + } else { + sb.append(recordToString(record)).append(","); + } + recordIndex++; + } + if (sb.length() != 0 && sb.charAt(0) == '[') { + String jsonData = sb.deleteCharAt(sb.length() - 1).append("]").toString(); LOG.debug(">>> " + jsonData); conn.insertOpentsdbJson(jsonData); }