From d2ab6127546004569dfd5533a344a516131a618d Mon Sep 17 00:00:00 2001 From: zyyang Date: Tue, 12 Oct 2021 15:44:24 +0800 Subject: [PATCH 01/33] add tdengine writer plugin --- .../com/alibaba/datax/core/EngineTest.java | 18 +++ job/opentsdb2stream.json | 31 +++++ job/opentsdb2tdengine.json | 34 ++++++ package.xml | 7 ++ pom.xml | 92 +++++++------- tdenginewriter/pom.xml | 75 ++++++++++++ tdenginewriter/src/main/assembly/package.xml | 34 ++++++ .../datax/plugin/writer/JniConnection.java | 83 +++++++++++++ .../datax/plugin/writer/TDengineWriter.java | 113 ++++++++++++++++++ .../writer/TDengineWriterErrorCode.java | 31 +++++ ...libaba_datax_plugin_writer_JniConnection.h | 87 ++++++++++++++ tdenginewriter/src/main/resources/plugin.json | 9 ++ .../main/resources/plugin_job_template.json | 10 ++ .../plugin/writer/JniConnectionTest.java | 19 +++ 14 files changed, 598 insertions(+), 45 deletions(-) create mode 100644 core/src/test/java/com/alibaba/datax/core/EngineTest.java create mode 100644 job/opentsdb2stream.json create mode 100644 job/opentsdb2tdengine.json create mode 100644 tdenginewriter/pom.xml create mode 100755 tdenginewriter/src/main/assembly/package.xml create mode 100644 tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/JniConnection.java create mode 100644 tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/TDengineWriter.java create mode 100644 tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/TDengineWriterErrorCode.java create mode 100644 tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/com_alibaba_datax_plugin_writer_JniConnection.h create mode 100755 tdenginewriter/src/main/resources/plugin.json create mode 100644 tdenginewriter/src/main/resources/plugin_job_template.json create mode 100644 tdenginewriter/src/test/java/com/alibaba/datax/plugin/writer/JniConnectionTest.java diff --git a/core/src/test/java/com/alibaba/datax/core/EngineTest.java b/core/src/test/java/com/alibaba/datax/core/EngineTest.java new file mode 100644 index 00000000..3f36263b --- /dev/null +++ b/core/src/test/java/com/alibaba/datax/core/EngineTest.java @@ -0,0 +1,18 @@ +package com.alibaba.datax.core; + + +public class EngineTest { + + public static void main(String[] args) { + System.out.println(System.getProperty("java.library.path")); +// String[] params = {"-mode", "standalone", "-jobid", "-1", "-job", "/Users/yangzy/workspace/DataX/job/opentsdb2stream.json"}; + String[] params = {"-mode", "standalone", "-jobid", "-1", "-job", "/Users/yangzy/workspace/DataX/job/opentsdb2tdengine.json"}; + System.setProperty("datax.home", "/Users/yangzy/workspace/DataX/target/datax/datax"); + try { + Engine.entry(params); + } catch (Throwable e) { + e.printStackTrace(); + } + } + +} \ No newline at end of file diff --git a/job/opentsdb2stream.json b/job/opentsdb2stream.json new file mode 100644 index 00000000..1ea43204 --- /dev/null +++ b/job/opentsdb2stream.json @@ -0,0 +1,31 @@ +{ + "job": { + "content": [ + { + "reader": { + "name": "opentsdbreader", + "parameter": { + "endpoint": "http://192.168.1.180:4242", + "column": [ + "weather.temperature" + ], + "beginDateTime": "2021-01-01 00:00:00", + "endDateTime": "2021-01-01 01:00:00" + } + }, + "writer": { + "name": "streamwriter", + "parameter": { + "encoding": "UTF-8", + "print": true + } + } + } + ], + "setting": { + "speed": { + "channel": 1 + } + } + } +} \ No newline at end of file diff --git a/job/opentsdb2tdengine.json b/job/opentsdb2tdengine.json new file mode 100644 index 00000000..cdf2277b --- /dev/null +++ b/job/opentsdb2tdengine.json @@ -0,0 +1,34 @@ +{ + "job": { + "content": [ + { + "reader": { + "name": "opentsdbreader", + "parameter": { + "endpoint": "http://192.168.1.180:4242", + "column": [ + "weather.temperature" + ], + "beginDateTime": "2021-01-01 00:00:00", + "endDateTime": "2021-01-01 01:00:00" + } + }, + "writer": { + "name": "tdenginewriter", + "parameter": { + "host": "192.168.56.105", + "port": 6030, + "db": "test", + "user": "root", + "password": "taosdata" + } + } + } + ], + "setting": { + "speed": { + "channel": 1 + } + } + } +} \ No newline at end of file diff --git a/package.xml b/package.xml index 882dd23b..a93e945c 100755 --- a/package.xml +++ b/package.xml @@ -238,6 +238,13 @@ datax + + tdenginewriter/target/datax/ + + **/*.* + + datax + otswriter/target/datax/ diff --git a/pom.xml b/pom.xml index 3bd75a31..cb635ad3 100644 --- a/pom.xml +++ b/pom.xml @@ -47,66 +47,68 @@ transformer - mysqlreader - drdsreader - sqlserverreader - postgresqlreader - kingbaseesreader - oraclereader + + + + + + odpsreader - otsreader - otsstreamreader + + txtfilereader - hdfsreader + streamreader - ossreader - ftpreader - mongodbreader + + + rdbmsreader - hbase11xreader - hbase094xreader - tsdbreader + + + opentsdbreader - cassandrareader - gdbreader - oceanbasev10reader + + + - mysqlwriter - drdswriter + + odpswriter txtfilewriter - ftpwriter - hdfswriter + + streamwriter - otswriter - oraclewriter - sqlserverwriter - postgresqlwriter - kingbaseeswriter - osswriter - mongodbwriter + + + + + + + adswriter - ocswriter + rdbmswriter - hbase11xwriter - hbase094xwriter - hbase11xsqlwriter - hbase11xsqlreader - elasticsearchwriter - tsdbwriter - adbpgwriter - gdbwriter - cassandrawriter - clickhousewriter - oscarwriter - oceanbasev10writer + + + + + + + + + + + + + plugin-rdbms-util plugin-unstructured-storage-util - hbase20xsqlreader - hbase20xsqlwriter - kuduwriter + + + + tdenginewriter diff --git a/tdenginewriter/pom.xml b/tdenginewriter/pom.xml new file mode 100644 index 00000000..7cef9ac2 --- /dev/null +++ b/tdenginewriter/pom.xml @@ -0,0 +1,75 @@ + + + + datax-all + com.alibaba.datax + 0.0.1-SNAPSHOT + + 4.0.0 + + com.alibaba.datax.tdenginewriter + tdenginewriter + 1.0.0 + + + 8 + 8 + + + + + com.alibaba.datax + datax-common + ${datax-project-version} + + + slf4j-log4j12 + org.slf4j + + + + + + junit + junit + ${junit-version} + test + + + + + + + + + maven-compiler-plugin + + ${jdk-version} + ${jdk-version} + ${project-sourceEncoding} + + + + maven-assembly-plugin + + + src/main/assembly/package.xml + + datax + + + + dwzip + package + + single + + + + + + + + \ No newline at end of file diff --git a/tdenginewriter/src/main/assembly/package.xml b/tdenginewriter/src/main/assembly/package.xml new file mode 100755 index 00000000..f7a7d0bf --- /dev/null +++ b/tdenginewriter/src/main/assembly/package.xml @@ -0,0 +1,34 @@ + + + + dir + + false + + + src/main/resources + + plugin.json + plugin_job_template.json + + plugin/writer/tdenginewriter + + + target/ + + tdenginewriter-1.0.0.jar + + plugin/writer/tdenginewriter + + + + + + false + plugin/writer/tdenginewriter/libs + runtime + + + diff --git a/tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/JniConnection.java b/tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/JniConnection.java new file mode 100644 index 00000000..5cbbf2ae --- /dev/null +++ b/tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/JniConnection.java @@ -0,0 +1,83 @@ +package com.alibaba.datax.plugin.writer; + +import java.util.Properties; + +public class JniConnection { + + private static final long JNI_NULL_POINTER = 0L; + private static final String PROPERTY_KEY_CONFIG_DIR = "cfgdir"; + private static final String PROPERTY_KEY_LOCALE = "locale"; + private static final String PROPERTY_KEY_CHARSET = "charset"; + private static final String PROPERTY_KEY_TIME_ZONE = "timezone"; + + private long psql; + + static { + System.loadLibrary("taos"); + } + + public JniConnection(Properties props) { + if (this.psql != JNI_NULL_POINTER) { + close(); + this.psql = JNI_NULL_POINTER; + } + + initImp(props.getProperty(PROPERTY_KEY_CONFIG_DIR, null)); + + String locale = props.getProperty(PROPERTY_KEY_LOCALE); + if (setOptions(0, locale) < 0) { + throw new RuntimeException("Failed to set locale: " + locale + ". System default will be used."); + } + String charset = props.getProperty(PROPERTY_KEY_CHARSET); + if (setOptions(1, charset) < 0) { + throw new RuntimeException("Failed to set charset: " + charset + ". System default will be used."); + } + String timezone = props.getProperty(PROPERTY_KEY_TIME_ZONE); + if (setOptions(2, timezone) < 0) { + throw new RuntimeException("Failed to set timezone: " + timezone + ". System default will be used."); + } + } + + public long open(String host, int port, String dbname, String user, String password) { + if (this.psql != JNI_NULL_POINTER) { + close(); + this.psql = JNI_NULL_POINTER; + } + + this.psql = connectImp(host, port, dbname, user, password); + if (this.psql == JNI_NULL_POINTER) { + String errMsg = getErrMsgImp(0); + throw new RuntimeException(errMsg); + } + return this.psql; + } + + public void close() { + int code = this.closeConnectionImp(this.psql); + if (code != 0) { + throw new RuntimeException("JNI closeConnection failed"); + } + this.psql = JNI_NULL_POINTER; + } + + private static native void initImp(String configDir); + + private static native int setOptions(int optionIndex, String optionValue); + + private static native String getTsCharset(); + + private native long connectImp(String host, int port, String dbName, String user, String password); + + private native long executeQueryImp(byte[] sqlBytes, long connection); + + private native int getErrCodeImp(long connection, long pSql); + + private native String getErrMsgImp(long pSql); + + private native int getAffectedRowsImp(long connection, long pSql); + + private native int closeConnectionImp(long connection); + + private native long insertOpentsdbJson(String json, long pSql); + +} diff --git a/tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/TDengineWriter.java b/tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/TDengineWriter.java new file mode 100644 index 00000000..184279e4 --- /dev/null +++ b/tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/TDengineWriter.java @@ -0,0 +1,113 @@ +package com.alibaba.datax.plugin.writer; + + +import com.alibaba.datax.common.element.Column; +import com.alibaba.datax.common.element.Record; +import com.alibaba.datax.common.exception.DataXException; +import com.alibaba.datax.common.plugin.RecordReceiver; +import com.alibaba.datax.common.spi.Writer; +import com.alibaba.datax.common.util.Configuration; + +import java.io.BufferedWriter; +import java.io.OutputStreamWriter; +import java.nio.charset.StandardCharsets; +import java.util.ArrayList; +import java.util.List; +import java.util.Properties; + +public class TDengineWriter extends Writer { + + private static final String HOST = "host"; + private static final String PORT = "port"; + private static final String DBNAME = "dbname"; + private static final String USER = "user"; + private static final String PASSWORD = "password"; + + public static class Job extends Writer.Job { + + private Configuration originalConfig; + + @Override + public void init() { + this.originalConfig = super.getPluginJobConf(); + } + + @Override + public void destroy() { + + } + + @Override + public List split(int mandatoryNumber) { + List writerSplitConfigs = new ArrayList(); + for (int i = 0; i < mandatoryNumber; i++) { + writerSplitConfigs.add(this.originalConfig); + } + + return writerSplitConfigs; + } + } + + public static class Task extends Writer.Task { + private static final String NEWLINE_FLAG = System.getProperty("line.separator", "\n"); + private Configuration writerSliceConfig; + private String peerPluginName; + + @Override + public void init() { + this.writerSliceConfig = getPluginJobConf(); + this.peerPluginName = getPeerPluginName(); + } + + @Override + public void destroy() { + + } + + @Override + public void startWrite(RecordReceiver lineReceiver) { + + String host = this.writerSliceConfig.getString(HOST); + int port = this.writerSliceConfig.getInt(PORT); + String dbname = this.writerSliceConfig.getString(DBNAME); + String user = this.writerSliceConfig.getString(USER); + String password = this.writerSliceConfig.getString(PASSWORD); + + JniConnection connection = new JniConnection(new Properties()); + long psql = connection.open(host, port, dbname, user, password); + System.out.println("psql: " + psql); + connection.close(); + + try { + BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(System.out, StandardCharsets.UTF_8)); + + Record record; + while ((record = lineReceiver.getFromReader()) != null) { + writer.write(recordToString(record)); + } + writer.flush(); + + } catch (Exception e) { + throw DataXException.asDataXException(TDengineWriterErrorCode.RUNTIME_EXCEPTION, e); + } + } + + private String recordToString(Record record) { + int recordLength = record.getColumnNumber(); + if (0 == recordLength) { + return NEWLINE_FLAG; + } + + Column column; + StringBuilder sb = new StringBuilder(); + for (int i = 0; i < recordLength; i++) { + column = record.getColumn(i); + sb.append(column.asString()).append("\t"); + } + sb.setLength(sb.length() - 1); + sb.append(NEWLINE_FLAG); + + return sb.toString(); + } + } +} diff --git a/tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/TDengineWriterErrorCode.java b/tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/TDengineWriterErrorCode.java new file mode 100644 index 00000000..02e87079 --- /dev/null +++ b/tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/TDengineWriterErrorCode.java @@ -0,0 +1,31 @@ +package com.alibaba.datax.plugin.writer; + +import com.alibaba.datax.common.spi.ErrorCode; + +public enum TDengineWriterErrorCode implements ErrorCode { + RUNTIME_EXCEPTION("TDengineWriter-00", "运行时异常"); + + private final String code; + private final String description; + + private TDengineWriterErrorCode(String code, String description) { + this.code = code; + this.description = description; + } + + @Override + public String getCode() { + return this.code; + } + + @Override + public String getDescription() { + return this.description; + } + + @Override + public String toString() { + return String.format("Code:[%s], Description:[%s]. ", this.code, + this.description); + } +} diff --git a/tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/com_alibaba_datax_plugin_writer_JniConnection.h b/tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/com_alibaba_datax_plugin_writer_JniConnection.h new file mode 100644 index 00000000..0a161b92 --- /dev/null +++ b/tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/com_alibaba_datax_plugin_writer_JniConnection.h @@ -0,0 +1,87 @@ +/* DO NOT EDIT THIS FILE - it is machine generated */ +#include +/* Header for class com_alibaba_datax_plugin_writer_JniConnection */ + +#ifndef _Included_com_alibaba_datax_plugin_writer_JniConnection +#define _Included_com_alibaba_datax_plugin_writer_JniConnection +#ifdef __cplusplus +extern "C" { +#endif +#undef com_alibaba_datax_plugin_writer_JniConnection_JNI_NULL_POINTER +#define com_alibaba_datax_plugin_writer_JniConnection_JNI_NULL_POINTER 0LL +/* + * Class: com_alibaba_datax_plugin_writer_JniConnection + * Method: initImp + * Signature: (Ljava/lang/String;)V + */ +JNIEXPORT void JNICALL Java_com_alibaba_datax_plugin_writer_JniConnection_initImp + (JNIEnv *, jclass, jstring); + +/* + * Class: com_alibaba_datax_plugin_writer_JniConnection + * Method: setOptions + * Signature: (ILjava/lang/String;)I + */ +JNIEXPORT jint JNICALL Java_com_alibaba_datax_plugin_writer_JniConnection_setOptions + (JNIEnv *, jclass, jint, jstring); + +/* + * Class: com_alibaba_datax_plugin_writer_JniConnection + * Method: getTsCharset + * Signature: ()Ljava/lang/String; + */ +JNIEXPORT jstring JNICALL Java_com_alibaba_datax_plugin_writer_JniConnection_getTsCharset + (JNIEnv *, jclass); + +/* + * Class: com_alibaba_datax_plugin_writer_JniConnection + * Method: connectImp + * Signature: (Ljava/lang/String;ILjava/lang/String;Ljava/lang/String;Ljava/lang/String;)J + */ +JNIEXPORT jlong JNICALL Java_com_alibaba_datax_plugin_writer_JniConnection_connectImp + (JNIEnv *, jobject, jstring, jint, jstring, jstring, jstring); + +/* + * Class: com_alibaba_datax_plugin_writer_JniConnection + * Method: executeQueryImp + * Signature: ([BJ)J + */ +JNIEXPORT jlong JNICALL Java_com_alibaba_datax_plugin_writer_JniConnection_executeQueryImp + (JNIEnv *, jobject, jbyteArray, jlong); + +/* + * Class: com_alibaba_datax_plugin_writer_JniConnection + * Method: getErrCodeImp + * Signature: (JJ)I + */ +JNIEXPORT jint JNICALL Java_com_alibaba_datax_plugin_writer_JniConnection_getErrCodeImp + (JNIEnv *, jobject, jlong, jlong); + +/* + * Class: com_alibaba_datax_plugin_writer_JniConnection + * Method: getErrMsgImp + * Signature: (J)Ljava/lang/String; + */ +JNIEXPORT jstring JNICALL Java_com_alibaba_datax_plugin_writer_JniConnection_getErrMsgImp + (JNIEnv *, jobject, jlong); + +/* + * Class: com_alibaba_datax_plugin_writer_JniConnection + * Method: getAffectedRowsImp + * Signature: (JJ)I + */ +JNIEXPORT jint JNICALL Java_com_alibaba_datax_plugin_writer_JniConnection_getAffectedRowsImp + (JNIEnv *, jobject, jlong, jlong); + +/* + * Class: com_alibaba_datax_plugin_writer_JniConnection + * Method: closeConnectionImp + * Signature: (J)I + */ +JNIEXPORT jint JNICALL Java_com_alibaba_datax_plugin_writer_JniConnection_closeConnectionImp + (JNIEnv *, jobject, jlong); + +#ifdef __cplusplus +} +#endif +#endif diff --git a/tdenginewriter/src/main/resources/plugin.json b/tdenginewriter/src/main/resources/plugin.json new file mode 100755 index 00000000..6c900a15 --- /dev/null +++ b/tdenginewriter/src/main/resources/plugin.json @@ -0,0 +1,9 @@ +{ + "name": "tdenginewriter", + "class": "com.alibaba.datax.plugin.writer.TDengineWriter", + "description": { + "useScene": "data migration to tdengine", + "mechanism": "use JNI to write data to tdengine." + }, + "developer": "zyyang-taosdata" +} \ No newline at end of file diff --git a/tdenginewriter/src/main/resources/plugin_job_template.json b/tdenginewriter/src/main/resources/plugin_job_template.json new file mode 100644 index 00000000..5482b26e --- /dev/null +++ b/tdenginewriter/src/main/resources/plugin_job_template.json @@ -0,0 +1,10 @@ +{ + "name": "tdenginewriter", + "parameter": { + "host": "", + "port": 6030, + "db": "", + "user": "", + "password": "" + } +} \ No newline at end of file diff --git a/tdenginewriter/src/test/java/com/alibaba/datax/plugin/writer/JniConnectionTest.java b/tdenginewriter/src/test/java/com/alibaba/datax/plugin/writer/JniConnectionTest.java new file mode 100644 index 00000000..1c9f426f --- /dev/null +++ b/tdenginewriter/src/test/java/com/alibaba/datax/plugin/writer/JniConnectionTest.java @@ -0,0 +1,19 @@ +package com.alibaba.datax.plugin.writer; + +import org.junit.Test; + +import java.util.Properties; + +public class JniConnectionTest { + + @Test + public void test() { + JniConnection connection = new JniConnection(new Properties()); + + long psql = connection.open("192.168.56.107", 6030, "log", "root", "taosdata"); + System.out.println("psql: " + psql); + + connection.close(); + } + +} \ No newline at end of file From 5da6e34d5c877fbf4e9d57365912cfdffca5113e Mon Sep 17 00:00:00 2001 From: zyyang Date: Tue, 12 Oct 2021 15:59:45 +0800 Subject: [PATCH 02/33] add maven plugin for junit test --- tdenginewriter/pom.xml | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/tdenginewriter/pom.xml b/tdenginewriter/pom.xml index 7cef9ac2..2c294b8f 100644 --- a/tdenginewriter/pom.xml +++ b/tdenginewriter/pom.xml @@ -69,6 +69,23 @@ + + + org.apache.maven.plugins + maven-surefire-plugin + 2.12.4 + + + + **/*Test.java + + + + + true + + + From ff87a6bcff21f3fac853111bdee8cf4f0ac11464 Mon Sep 17 00:00:00 2001 From: zyyang Date: Tue, 12 Oct 2021 17:47:08 +0800 Subject: [PATCH 03/33] change test case --- .../com/alibaba/datax/plugin/writer/JniConnection.java | 10 ++++++++-- .../alibaba/datax/plugin/writer/TDengineWriter.java | 5 ----- .../com_alibaba_datax_plugin_writer_JniConnection.h | 8 ++++++++ .../alibaba/datax/plugin/writer/JniConnectionTest.java | 6 ++++-- 4 files changed, 20 insertions(+), 9 deletions(-) diff --git a/tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/JniConnection.java b/tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/JniConnection.java index 5cbbf2ae..6c533ed8 100644 --- a/tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/JniConnection.java +++ b/tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/JniConnection.java @@ -38,7 +38,7 @@ public class JniConnection { } } - public long open(String host, int port, String dbname, String user, String password) { + public void open(String host, int port, String dbname, String user, String password) { if (this.psql != JNI_NULL_POINTER) { close(); this.psql = JNI_NULL_POINTER; @@ -49,7 +49,13 @@ public class JniConnection { String errMsg = getErrMsgImp(0); throw new RuntimeException(errMsg); } - return this.psql; + } + + public long insertOpentsdbJson(String json) { + if (this.psql == JNI_NULL_POINTER) { + throw new RuntimeException("JNI connection is NULL"); + } + return insertOpentsdbJson(json, this.psql); } public void close() { diff --git a/tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/TDengineWriter.java b/tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/TDengineWriter.java index 184279e4..ee080f54 100644 --- a/tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/TDengineWriter.java +++ b/tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/TDengineWriter.java @@ -73,11 +73,6 @@ public class TDengineWriter extends Writer { String user = this.writerSliceConfig.getString(USER); String password = this.writerSliceConfig.getString(PASSWORD); - JniConnection connection = new JniConnection(new Properties()); - long psql = connection.open(host, port, dbname, user, password); - System.out.println("psql: " + psql); - connection.close(); - try { BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(System.out, StandardCharsets.UTF_8)); diff --git a/tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/com_alibaba_datax_plugin_writer_JniConnection.h b/tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/com_alibaba_datax_plugin_writer_JniConnection.h index 0a161b92..58cac666 100644 --- a/tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/com_alibaba_datax_plugin_writer_JniConnection.h +++ b/tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/com_alibaba_datax_plugin_writer_JniConnection.h @@ -81,6 +81,14 @@ JNIEXPORT jint JNICALL Java_com_alibaba_datax_plugin_writer_JniConnection_getAff JNIEXPORT jint JNICALL Java_com_alibaba_datax_plugin_writer_JniConnection_closeConnectionImp (JNIEnv *, jobject, jlong); +/* + * Class: com_alibaba_datax_plugin_writer_JniConnection + * Method: insertOpentsdbJson + * Signature: (Ljava/lang/String;J)J + */ +JNIEXPORT jlong JNICALL Java_com_alibaba_datax_plugin_writer_JniConnection_insertOpentsdbJson + (JNIEnv *, jobject, jstring, jlong); + #ifdef __cplusplus } #endif diff --git a/tdenginewriter/src/test/java/com/alibaba/datax/plugin/writer/JniConnectionTest.java b/tdenginewriter/src/test/java/com/alibaba/datax/plugin/writer/JniConnectionTest.java index 1c9f426f..87fa668d 100644 --- a/tdenginewriter/src/test/java/com/alibaba/datax/plugin/writer/JniConnectionTest.java +++ b/tdenginewriter/src/test/java/com/alibaba/datax/plugin/writer/JniConnectionTest.java @@ -10,8 +10,10 @@ public class JniConnectionTest { public void test() { JniConnection connection = new JniConnection(new Properties()); - long psql = connection.open("192.168.56.107", 6030, "log", "root", "taosdata"); - System.out.println("psql: " + psql); + connection.open("192.168.56.105", 6030, "log", "root", "taosdata"); + + String json = "{\"metric\":\"weather.temperature\",\"timestamp\":1609430400000,\"value\":123,\"tags\":{\"location\":\"beijing\",\"id\":123}}"; + connection.insertOpentsdbJson(json); connection.close(); } From f6520cf06b4475ba2e36690e84e7af2979781552 Mon Sep 17 00:00:00 2001 From: zyyang Date: Wed, 13 Oct 2021 12:47:31 +0800 Subject: [PATCH 04/33] change --- .../datax/plugin/writer/JniConnection.java | 31 ++++++++++++------- ...libaba_datax_plugin_writer_JniConnection.h | 10 ++++++ .../plugin/writer/JniConnectionTest.java | 2 +- 3 files changed, 30 insertions(+), 13 deletions(-) diff --git a/tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/JniConnection.java b/tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/JniConnection.java index 6c533ed8..5512aaf9 100644 --- a/tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/JniConnection.java +++ b/tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/JniConnection.java @@ -5,21 +5,22 @@ import java.util.Properties; public class JniConnection { private static final long JNI_NULL_POINTER = 0L; + private static final int JNI_SUCCESSFUL = 0; private static final String PROPERTY_KEY_CONFIG_DIR = "cfgdir"; private static final String PROPERTY_KEY_LOCALE = "locale"; private static final String PROPERTY_KEY_CHARSET = "charset"; private static final String PROPERTY_KEY_TIME_ZONE = "timezone"; - private long psql; + private long conn; static { System.loadLibrary("taos"); } public JniConnection(Properties props) { - if (this.psql != JNI_NULL_POINTER) { + if (this.conn != JNI_NULL_POINTER) { close(); - this.psql = JNI_NULL_POINTER; + this.conn = JNI_NULL_POINTER; } initImp(props.getProperty(PROPERTY_KEY_CONFIG_DIR, null)); @@ -39,31 +40,35 @@ public class JniConnection { } public void open(String host, int port, String dbname, String user, String password) { - if (this.psql != JNI_NULL_POINTER) { + if (this.conn != JNI_NULL_POINTER) { close(); - this.psql = JNI_NULL_POINTER; + this.conn = JNI_NULL_POINTER; } - this.psql = connectImp(host, port, dbname, user, password); - if (this.psql == JNI_NULL_POINTER) { + this.conn = connectImp(host, port, dbname, user, password); + if (this.conn == JNI_NULL_POINTER) { String errMsg = getErrMsgImp(0); throw new RuntimeException(errMsg); } } - public long insertOpentsdbJson(String json) { - if (this.psql == JNI_NULL_POINTER) { + public void insertOpentsdbJson(String json) { + if (this.conn == JNI_NULL_POINTER) { throw new RuntimeException("JNI connection is NULL"); } - return insertOpentsdbJson(json, this.psql); + long code = insertOpentsdbJson(json, this.conn); + if (code != JNI_SUCCESSFUL) { + String errMsg = getErrMsgByCode(code); + throw new RuntimeException(errMsg); + } } public void close() { - int code = this.closeConnectionImp(this.psql); + int code = this.closeConnectionImp(this.conn); if (code != 0) { throw new RuntimeException("JNI closeConnection failed"); } - this.psql = JNI_NULL_POINTER; + this.conn = JNI_NULL_POINTER; } private static native void initImp(String configDir); @@ -80,6 +85,8 @@ public class JniConnection { private native String getErrMsgImp(long pSql); + private native String getErrMsgByCode(long code); + private native int getAffectedRowsImp(long connection, long pSql); private native int closeConnectionImp(long connection); diff --git a/tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/com_alibaba_datax_plugin_writer_JniConnection.h b/tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/com_alibaba_datax_plugin_writer_JniConnection.h index 58cac666..4bdf3639 100644 --- a/tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/com_alibaba_datax_plugin_writer_JniConnection.h +++ b/tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/com_alibaba_datax_plugin_writer_JniConnection.h @@ -9,6 +9,8 @@ extern "C" { #endif #undef com_alibaba_datax_plugin_writer_JniConnection_JNI_NULL_POINTER #define com_alibaba_datax_plugin_writer_JniConnection_JNI_NULL_POINTER 0LL +#undef com_alibaba_datax_plugin_writer_JniConnection_JNI_SUCCESSFUL +#define com_alibaba_datax_plugin_writer_JniConnection_JNI_SUCCESSFUL 0L /* * Class: com_alibaba_datax_plugin_writer_JniConnection * Method: initImp @@ -65,6 +67,14 @@ JNIEXPORT jint JNICALL Java_com_alibaba_datax_plugin_writer_JniConnection_getErr JNIEXPORT jstring JNICALL Java_com_alibaba_datax_plugin_writer_JniConnection_getErrMsgImp (JNIEnv *, jobject, jlong); +/* + * Class: com_alibaba_datax_plugin_writer_JniConnection + * Method: getErrMsgByCode + * Signature: (J)Ljava/lang/String; + */ +JNIEXPORT jstring JNICALL Java_com_alibaba_datax_plugin_writer_JniConnection_getErrMsgByCode + (JNIEnv *, jobject, jlong); + /* * Class: com_alibaba_datax_plugin_writer_JniConnection * Method: getAffectedRowsImp diff --git a/tdenginewriter/src/test/java/com/alibaba/datax/plugin/writer/JniConnectionTest.java b/tdenginewriter/src/test/java/com/alibaba/datax/plugin/writer/JniConnectionTest.java index 87fa668d..603931ef 100644 --- a/tdenginewriter/src/test/java/com/alibaba/datax/plugin/writer/JniConnectionTest.java +++ b/tdenginewriter/src/test/java/com/alibaba/datax/plugin/writer/JniConnectionTest.java @@ -10,7 +10,7 @@ public class JniConnectionTest { public void test() { JniConnection connection = new JniConnection(new Properties()); - connection.open("192.168.56.105", 6030, "log", "root", "taosdata"); + connection.open("192.168.56.105", 6030, "test", "root", "taosdata"); String json = "{\"metric\":\"weather.temperature\",\"timestamp\":1609430400000,\"value\":123,\"tags\":{\"location\":\"beijing\",\"id\":123}}"; connection.insertOpentsdbJson(json); From 975096b676dc408ba311e4f74eda06876702c605 Mon Sep 17 00:00:00 2001 From: zyyang Date: Wed, 13 Oct 2021 17:02:43 +0800 Subject: [PATCH 05/33] change --- .../com/alibaba/datax/core/EngineTest.java | 5 +- job/opentsdb2tdengine.json | 2 +- .../datax/plugin/writer/JniConnection.java | 30 +++++----- .../datax/plugin/writer/TDengineWriter.java | 59 +++++++++++++++---- .../plugin/writer/JniConnectionTest.java | 4 +- 5 files changed, 69 insertions(+), 31 deletions(-) diff --git a/core/src/test/java/com/alibaba/datax/core/EngineTest.java b/core/src/test/java/com/alibaba/datax/core/EngineTest.java index 3f36263b..47ce268e 100644 --- a/core/src/test/java/com/alibaba/datax/core/EngineTest.java +++ b/core/src/test/java/com/alibaba/datax/core/EngineTest.java @@ -1,9 +1,12 @@ package com.alibaba.datax.core; +import org.junit.Test; + public class EngineTest { - public static void main(String[] args) { + @Test + public void test() { System.out.println(System.getProperty("java.library.path")); // String[] params = {"-mode", "standalone", "-jobid", "-1", "-job", "/Users/yangzy/workspace/DataX/job/opentsdb2stream.json"}; String[] params = {"-mode", "standalone", "-jobid", "-1", "-job", "/Users/yangzy/workspace/DataX/job/opentsdb2tdengine.json"}; diff --git a/job/opentsdb2tdengine.json b/job/opentsdb2tdengine.json index cdf2277b..92c0f8e3 100644 --- a/job/opentsdb2tdengine.json +++ b/job/opentsdb2tdengine.json @@ -7,7 +7,7 @@ "parameter": { "endpoint": "http://192.168.1.180:4242", "column": [ - "weather.temperature" + "weather_temperature" ], "beginDateTime": "2021-01-01 00:00:00", "endDateTime": "2021-01-01 01:00:00" diff --git a/tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/JniConnection.java b/tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/JniConnection.java index 5512aaf9..a8739c54 100644 --- a/tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/JniConnection.java +++ b/tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/JniConnection.java @@ -6,10 +6,10 @@ public class JniConnection { private static final long JNI_NULL_POINTER = 0L; private static final int JNI_SUCCESSFUL = 0; - private static final String PROPERTY_KEY_CONFIG_DIR = "cfgdir"; - private static final String PROPERTY_KEY_LOCALE = "locale"; - private static final String PROPERTY_KEY_CHARSET = "charset"; - private static final String PROPERTY_KEY_TIME_ZONE = "timezone"; + public static final String PROPERTY_KEY_CONFIG_DIR = "cfgdir"; + public static final String PROPERTY_KEY_LOCALE = "locale"; + public static final String PROPERTY_KEY_CHARSET = "charset"; + public static final String PROPERTY_KEY_TIME_ZONE = "timezone"; private long conn; @@ -17,7 +17,7 @@ public class JniConnection { System.loadLibrary("taos"); } - public JniConnection(Properties props) { + public JniConnection(Properties props) throws Exception { if (this.conn != JNI_NULL_POINTER) { close(); this.conn = JNI_NULL_POINTER; @@ -27,19 +27,19 @@ public class JniConnection { String locale = props.getProperty(PROPERTY_KEY_LOCALE); if (setOptions(0, locale) < 0) { - throw new RuntimeException("Failed to set locale: " + locale + ". System default will be used."); + throw new Exception("Failed to set locale: " + locale + ". System default will be used."); } String charset = props.getProperty(PROPERTY_KEY_CHARSET); if (setOptions(1, charset) < 0) { - throw new RuntimeException("Failed to set charset: " + charset + ". System default will be used."); + throw new Exception("Failed to set charset: " + charset + ". System default will be used."); } String timezone = props.getProperty(PROPERTY_KEY_TIME_ZONE); if (setOptions(2, timezone) < 0) { - throw new RuntimeException("Failed to set timezone: " + timezone + ". System default will be used."); + throw new Exception("Failed to set timezone: " + timezone + ". System default will be used."); } } - public void open(String host, int port, String dbname, String user, String password) { + public void open(String host, int port, String dbname, String user, String password) throws Exception { if (this.conn != JNI_NULL_POINTER) { close(); this.conn = JNI_NULL_POINTER; @@ -48,25 +48,25 @@ public class JniConnection { this.conn = connectImp(host, port, dbname, user, password); if (this.conn == JNI_NULL_POINTER) { String errMsg = getErrMsgImp(0); - throw new RuntimeException(errMsg); + throw new Exception(errMsg); } } - public void insertOpentsdbJson(String json) { + public void insertOpentsdbJson(String json) throws Exception { if (this.conn == JNI_NULL_POINTER) { - throw new RuntimeException("JNI connection is NULL"); + throw new Exception("JNI connection is NULL"); } long code = insertOpentsdbJson(json, this.conn); if (code != JNI_SUCCESSFUL) { String errMsg = getErrMsgByCode(code); - throw new RuntimeException(errMsg); + throw new Exception(errMsg); } } - public void close() { + public void close() throws Exception { int code = this.closeConnectionImp(this.conn); if (code != 0) { - throw new RuntimeException("JNI closeConnection failed"); + throw new Exception("JNI closeConnection failed"); } this.conn = JNI_NULL_POINTER; } diff --git a/tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/TDengineWriter.java b/tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/TDengineWriter.java index ee080f54..28fa4ca5 100644 --- a/tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/TDengineWriter.java +++ b/tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/TDengineWriter.java @@ -7,12 +7,12 @@ import com.alibaba.datax.common.exception.DataXException; import com.alibaba.datax.common.plugin.RecordReceiver; import com.alibaba.datax.common.spi.Writer; import com.alibaba.datax.common.util.Configuration; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; -import java.io.BufferedWriter; -import java.io.OutputStreamWriter; -import java.nio.charset.StandardCharsets; import java.util.ArrayList; import java.util.List; +import java.util.Optional; import java.util.Properties; public class TDengineWriter extends Writer { @@ -22,6 +22,7 @@ public class TDengineWriter extends Writer { private static final String DBNAME = "dbname"; private static final String USER = "user"; private static final String PASSWORD = "password"; + private static final String PEER_PLUGIN_NAME = "peerPluginName"; public static class Job extends Writer.Job { @@ -30,6 +31,7 @@ public class TDengineWriter extends Writer { @Override public void init() { this.originalConfig = super.getPluginJobConf(); + this.originalConfig.set(PEER_PLUGIN_NAME, getPeerPluginName()); } @Override @@ -49,14 +51,14 @@ public class TDengineWriter extends Writer { } public static class Task extends Writer.Task { + private static final Logger LOG = LoggerFactory.getLogger(Job.class); + private static final String NEWLINE_FLAG = System.getProperty("line.separator", "\n"); private Configuration writerSliceConfig; - private String peerPluginName; @Override public void init() { this.writerSliceConfig = getPluginJobConf(); - this.peerPluginName = getPeerPluginName(); } @Override @@ -67,22 +69,57 @@ public class TDengineWriter extends Writer { @Override public void startWrite(RecordReceiver lineReceiver) { + String host = this.writerSliceConfig.getString(HOST); int port = this.writerSliceConfig.getInt(PORT); String dbname = this.writerSliceConfig.getString(DBNAME); String user = this.writerSliceConfig.getString(USER); String password = this.writerSliceConfig.getString(PASSWORD); - try { - BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(System.out, StandardCharsets.UTF_8)); + Properties properties = new Properties(); + String cfgdir = this.writerSliceConfig.getString(JniConnection.PROPERTY_KEY_CONFIG_DIR); + if (cfgdir != null && !cfgdir.isEmpty()) { + properties.setProperty(JniConnection.PROPERTY_KEY_CONFIG_DIR, cfgdir); + } + String timezone = this.writerSliceConfig.getString(JniConnection.PROPERTY_KEY_TIME_ZONE); + if (timezone != null && !timezone.isEmpty()) { + properties.setProperty(JniConnection.PROPERTY_KEY_TIME_ZONE, timezone); + } + String locale = this.writerSliceConfig.getString(JniConnection.PROPERTY_KEY_LOCALE); + if (locale != null && !locale.isEmpty()) { + properties.setProperty(JniConnection.PROPERTY_KEY_LOCALE, locale); + } + String charset = this.writerSliceConfig.getString(JniConnection.PROPERTY_KEY_CHARSET); + if (charset != null && !charset.isEmpty()) { + properties.setProperty(JniConnection.PROPERTY_KEY_CHARSET, charset); + } + String peerPluginName = this.writerSliceConfig.getString(PEER_PLUGIN_NAME); + if (peerPluginName.equals("opentsdbreader")) { + try { + JniConnection conn = new JniConnection(properties); + conn.open(host, port, dbname, user, password); + LOG.info("TDengine connection established, host: " + host + ", port: " + port + ", dbname: " + dbname + ", user: " + user); + writeOpentsdb(lineReceiver, conn); + conn.close(); + LOG.info("TDengine connection closed"); + } catch (Exception e) { + LOG.error(e.getMessage()); + e.printStackTrace(); + } + } + } + + private void writeOpentsdb(RecordReceiver lineReceiver, JniConnection conn) { + try { Record record; while ((record = lineReceiver.getFromReader()) != null) { - writer.write(recordToString(record)); + String jsonData = recordToString(record); + LOG.debug(">>> " + jsonData); + conn.insertOpentsdbJson(jsonData); } - writer.flush(); - } catch (Exception e) { + LOG.error("TDengineWriter ERROR: " + e.getMessage()); throw DataXException.asDataXException(TDengineWriterErrorCode.RUNTIME_EXCEPTION, e); } } @@ -92,7 +129,6 @@ public class TDengineWriter extends Writer { if (0 == recordLength) { return NEWLINE_FLAG; } - Column column; StringBuilder sb = new StringBuilder(); for (int i = 0; i < recordLength; i++) { @@ -101,7 +137,6 @@ public class TDengineWriter extends Writer { } sb.setLength(sb.length() - 1); sb.append(NEWLINE_FLAG); - return sb.toString(); } } diff --git a/tdenginewriter/src/test/java/com/alibaba/datax/plugin/writer/JniConnectionTest.java b/tdenginewriter/src/test/java/com/alibaba/datax/plugin/writer/JniConnectionTest.java index 603931ef..040cf34c 100644 --- a/tdenginewriter/src/test/java/com/alibaba/datax/plugin/writer/JniConnectionTest.java +++ b/tdenginewriter/src/test/java/com/alibaba/datax/plugin/writer/JniConnectionTest.java @@ -7,12 +7,12 @@ import java.util.Properties; public class JniConnectionTest { @Test - public void test() { + public void test() throws Exception { JniConnection connection = new JniConnection(new Properties()); connection.open("192.168.56.105", 6030, "test", "root", "taosdata"); - String json = "{\"metric\":\"weather.temperature\",\"timestamp\":1609430400000,\"value\":123,\"tags\":{\"location\":\"beijing\",\"id\":123}}"; + String json = "{\"metric\":\"weather_temperature\",\"timestamp\":1609430400000,\"value\":123,\"tags\":{\"location\":\"beijing\",\"id\":\"t123\"}}"; connection.insertOpentsdbJson(json); connection.close(); From 9121b7d3c59b0c079aabe69596ad9b01b9f3ed66 Mon Sep 17 00:00:00 2001 From: zyyang Date: Wed, 13 Oct 2021 17:12:02 +0800 Subject: [PATCH 06/33] change test cases --- {job => core/src/main/job}/opentsdb2stream.json | 0 {job => core/src/main/job}/opentsdb2tdengine.json | 0 core/src/test/java/com/alibaba/datax/core/EngineTest.java | 5 ++--- 3 files changed, 2 insertions(+), 3 deletions(-) rename {job => core/src/main/job}/opentsdb2stream.json (100%) rename {job => core/src/main/job}/opentsdb2tdengine.json (100%) diff --git a/job/opentsdb2stream.json b/core/src/main/job/opentsdb2stream.json similarity index 100% rename from job/opentsdb2stream.json rename to core/src/main/job/opentsdb2stream.json diff --git a/job/opentsdb2tdengine.json b/core/src/main/job/opentsdb2tdengine.json similarity index 100% rename from job/opentsdb2tdengine.json rename to core/src/main/job/opentsdb2tdengine.json diff --git a/core/src/test/java/com/alibaba/datax/core/EngineTest.java b/core/src/test/java/com/alibaba/datax/core/EngineTest.java index 47ce268e..bbc5bdc5 100644 --- a/core/src/test/java/com/alibaba/datax/core/EngineTest.java +++ b/core/src/test/java/com/alibaba/datax/core/EngineTest.java @@ -8,9 +8,8 @@ public class EngineTest { @Test public void test() { System.out.println(System.getProperty("java.library.path")); -// String[] params = {"-mode", "standalone", "-jobid", "-1", "-job", "/Users/yangzy/workspace/DataX/job/opentsdb2stream.json"}; - String[] params = {"-mode", "standalone", "-jobid", "-1", "-job", "/Users/yangzy/workspace/DataX/job/opentsdb2tdengine.json"}; - System.setProperty("datax.home", "/Users/yangzy/workspace/DataX/target/datax/datax"); + String[] params = {"-mode", "standalone", "-jobid", "-1", "-job", "src/main/job/opentsdb2tdengine.json"}; + System.setProperty("datax.home", "../target/datax/datax"); try { Engine.entry(params); } catch (Throwable e) { From c5d64c00a9c99812737698e0edf1fcbeddf0b134 Mon Sep 17 00:00:00 2001 From: zyyang Date: Wed, 13 Oct 2021 17:21:36 +0800 Subject: [PATCH 07/33] change --- core/src/main/job/opentsdb2tdengine.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/src/main/job/opentsdb2tdengine.json b/core/src/main/job/opentsdb2tdengine.json index 92c0f8e3..dd332202 100644 --- a/core/src/main/job/opentsdb2tdengine.json +++ b/core/src/main/job/opentsdb2tdengine.json @@ -18,7 +18,7 @@ "parameter": { "host": "192.168.56.105", "port": 6030, - "db": "test", + "dbname": "test", "user": "root", "password": "taosdata" } From 5e54aee590f7ad5970b28c011d8c0e34d00c09ab Mon Sep 17 00:00:00 2001 From: zyyang Date: Tue, 19 Oct 2021 15:40:11 +0800 Subject: [PATCH 08/33] tdenginewriter support batch write --- core/src/main/job/opentsdb2tdengine.json | 3 +- tdenginewriter/doc/tdenginewriter.md | 151 ++++++++++++++++++ .../datax/plugin/writer/JniConnection.java | 5 - .../com/alibaba/datax/plugin/writer/Key.java | 11 ++ .../datax/plugin/writer/TDengineWriter.java | 77 ++++----- 5 files changed, 205 insertions(+), 42 deletions(-) create mode 100644 tdenginewriter/doc/tdenginewriter.md create mode 100644 tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/Key.java diff --git a/core/src/main/job/opentsdb2tdengine.json b/core/src/main/job/opentsdb2tdengine.json index dd332202..377b98c9 100644 --- a/core/src/main/job/opentsdb2tdengine.json +++ b/core/src/main/job/opentsdb2tdengine.json @@ -20,7 +20,8 @@ "port": 6030, "dbname": "test", "user": "root", - "password": "taosdata" + "password": "taosdata", + "batchSize": 1000 } } } diff --git a/tdenginewriter/doc/tdenginewriter.md b/tdenginewriter/doc/tdenginewriter.md new file mode 100644 index 00000000..8e55b189 --- /dev/null +++ b/tdenginewriter/doc/tdenginewriter.md @@ -0,0 +1,151 @@ +# DataX TDengineWriter + +## 1 快速介绍 + +TDengineWriter 插件实现了写入数据到 TDengine 的功能。 在底层实现上, TDengineWriter 通过 JNI的方式调用libtaos.so/tao.dll中的方法,连接 TDengine +数据库实例,并执行schemaless的写入。 TDengineWriter 面向ETL开发工程师,他们使用 TDengineWriter 从数仓导入数据到 TDengine。同时,TDengineWriter +亦可以作为数据迁移工具为DBA等用户提供服务。 + +## 2 实现原理 + +TDengineWriter 通过 DataX 框架获取 Reader +生成的协议数据,根据reader的类型解析数据,通过JNI方式调用libtaos.so(或taos.dll)中的方法,使用schemaless的方式写入到TDengine。 + +## 3 功能说明 + +### 3.1 配置样例 + +* 这里使用一份从OpenTSDB产生到 TDengine 导入的数据。 + +```json +{ + "job": { + "content": [ + { + "reader": { + "name": "opentsdbreader", + "parameter": { + "endpoint": "http://192.168.1.180:4242", + "column": [ + "weather_temperature" + ], + "beginDateTime": "2021-01-01 00:00:00", + "endDateTime": "2021-01-01 01:00:00" + } + }, + "writer": { + "name": "tdenginewriter", + "parameter": { + "host": "192.168.1.180", + "port": 6030, + "dbname": "test", + "user": "root", + "password": "taosdata" + } + } + } + ], + "setting": { + "speed": { + "channel": 1 + } + } + } +} +``` + +### 3.2 参数说明 + +* **host** + * 描述:TDengine实例的host。 + + * 必选:是
+ + * 默认值:无
+* **port** + * 描述:TDengine实例的port。 + * 必选:是
+ * 默认值:无
+* **dbname** + * 描述:目的数据库的名称。 + + * 必选:是
+ + * 默认值:无
+* **username** + * 描述:TDengine实例的用户名
+ * 必选:是
+ * 默认值:无
+* **password** + * 描述:TDengine实例的密码
+ * 必选:是
+ * 默认值:无
+ +### 3.3 类型转换 + +目前,由于opentsdbreader将opentsdb的数据统一读取为json字符串,TDengineWriter 在做Opentsdb到TDengine的迁移时,按照以下类型进行处理: + +| OpenTSDB数据类型 | DataX 内部类型| TDengine 数据类型 | +| -------- | ----- | -------- | +| timestamp | Date | timestamp | +| Integer(value) | Double | double | +| Float(value) | Double | double | +| String(value) | String | binary | +| Integer(tag) | String | binary | +| Float(tag) | String |binary | +| String(tag) | String |binary | + +## 4 性能报告 + +### 4.1 环境准备 + +#### 4.1.1 数据特征 + +建表语句: + +单行记录类似于: + +#### 4.1.2 机器参数 + +* 执行DataX的机器参数为: + 1. cpu: + 2. mem: + 3. net: 千兆双网卡 + 4. disc: DataX 数据不落磁盘,不统计此项 + +* TDengine数据库机器参数为: + 1. cpu: + 2. mem: + 3. net: 千兆双网卡 + 4. disc: + +#### 4.1.3 DataX jvm 参数 + + -Xms1024m -Xmx1024m -XX:+HeapDumpOnOutOfMemoryError + +### 4.2 测试报告 + +#### 4.2.1 单表测试报告 + +| 通道数| DataX速度(Rec/s)|DataX流量(MB/s)| DataX机器网卡流出流量(MB/s)|DataX机器运行负载|DB网卡进入流量(MB/s)|DB运行负载|DB TPS| +|--------| --------|--------|--------|--------|--------|--------|--------| +|1| | | | | | | | +|4| | | | | | | | +|8| | | | | | | | +|16| | | | | | | | +|32| | | | | | | | + +说明: + +1. 这里的单表,主键类型为 bigint(20),自增。 +2. batchSize 和 通道个数,对性能影响较大。 +3. 16通道,4096批量提交时,出现 full gc 2次。 + +#### 4.2.4 性能测试小结 + +1. +2. + +## 5 约束限制 + +## FAQ \ No newline at end of file diff --git a/tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/JniConnection.java b/tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/JniConnection.java index a8739c54..b1670633 100644 --- a/tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/JniConnection.java +++ b/tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/JniConnection.java @@ -18,11 +18,6 @@ public class JniConnection { } public JniConnection(Properties props) throws Exception { - if (this.conn != JNI_NULL_POINTER) { - close(); - this.conn = JNI_NULL_POINTER; - } - initImp(props.getProperty(PROPERTY_KEY_CONFIG_DIR, null)); String locale = props.getProperty(PROPERTY_KEY_LOCALE); diff --git a/tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/Key.java b/tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/Key.java new file mode 100644 index 00000000..b240bce4 --- /dev/null +++ b/tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/Key.java @@ -0,0 +1,11 @@ +package com.alibaba.datax.plugin.writer; + +public class Key { + public static final String HOST = "host"; + public static final String PORT = "port"; + public static final String DBNAME = "dbname"; + public static final String USER = "user"; + public static final String PASSWORD = "password"; + public static final String BATCH_SIZE = "batchSize"; + +} diff --git a/tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/TDengineWriter.java b/tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/TDengineWriter.java index 28fa4ca5..60c76522 100644 --- a/tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/TDengineWriter.java +++ b/tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/TDengineWriter.java @@ -10,19 +10,12 @@ import com.alibaba.datax.common.util.Configuration; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import java.util.ArrayList; -import java.util.List; -import java.util.Optional; -import java.util.Properties; +import java.util.*; public class TDengineWriter extends Writer { - private static final String HOST = "host"; - private static final String PORT = "port"; - private static final String DBNAME = "dbname"; - private static final String USER = "user"; - private static final String PASSWORD = "password"; private static final String PEER_PLUGIN_NAME = "peerPluginName"; + private static final String DEFAULT_BATCH_SIZE = "1"; public static class Job extends Writer.Job { @@ -45,7 +38,6 @@ public class TDengineWriter extends Writer { for (int i = 0; i < mandatoryNumber; i++) { writerSplitConfigs.add(this.originalConfig); } - return writerSplitConfigs; } } @@ -68,53 +60,66 @@ public class TDengineWriter extends Writer { @Override public void startWrite(RecordReceiver lineReceiver) { - - - String host = this.writerSliceConfig.getString(HOST); - int port = this.writerSliceConfig.getInt(PORT); - String dbname = this.writerSliceConfig.getString(DBNAME); - String user = this.writerSliceConfig.getString(USER); - String password = this.writerSliceConfig.getString(PASSWORD); - + Set keys = this.writerSliceConfig.getKeys(); Properties properties = new Properties(); - String cfgdir = this.writerSliceConfig.getString(JniConnection.PROPERTY_KEY_CONFIG_DIR); - if (cfgdir != null && !cfgdir.isEmpty()) { - properties.setProperty(JniConnection.PROPERTY_KEY_CONFIG_DIR, cfgdir); - } - String timezone = this.writerSliceConfig.getString(JniConnection.PROPERTY_KEY_TIME_ZONE); - if (timezone != null && !timezone.isEmpty()) { - properties.setProperty(JniConnection.PROPERTY_KEY_TIME_ZONE, timezone); - } - String locale = this.writerSliceConfig.getString(JniConnection.PROPERTY_KEY_LOCALE); - if (locale != null && !locale.isEmpty()) { - properties.setProperty(JniConnection.PROPERTY_KEY_LOCALE, locale); - } - String charset = this.writerSliceConfig.getString(JniConnection.PROPERTY_KEY_CHARSET); - if (charset != null && !charset.isEmpty()) { - properties.setProperty(JniConnection.PROPERTY_KEY_CHARSET, charset); + for (String key : keys) { + String value = this.writerSliceConfig.getString(key); + properties.setProperty(key, value); } String peerPluginName = this.writerSliceConfig.getString(PEER_PLUGIN_NAME); if (peerPluginName.equals("opentsdbreader")) { + // opentsdb json protocol use JNI and schemaless API to write + + String host = properties.getProperty(Key.HOST); + int port = Integer.parseInt(properties.getProperty(Key.PORT)); + String dbname = properties.getProperty(Key.DBNAME); + String user = properties.getProperty(Key.USER); + String password = properties.getProperty(Key.PASSWORD); + try { JniConnection conn = new JniConnection(properties); conn.open(host, port, dbname, user, password); LOG.info("TDengine connection established, host: " + host + ", port: " + port + ", dbname: " + dbname + ", user: " + user); - writeOpentsdb(lineReceiver, conn); + + int batchSize = Integer.parseInt(properties.getProperty(Key.BATCH_SIZE, DEFAULT_BATCH_SIZE)); + writeOpentsdb(lineReceiver, conn, batchSize); conn.close(); LOG.info("TDengine connection closed"); } catch (Exception e) { LOG.error(e.getMessage()); e.printStackTrace(); } + } else { + // other } } - private void writeOpentsdb(RecordReceiver lineReceiver, JniConnection conn) { + private void writeOpentsdb(RecordReceiver lineReceiver, JniConnection conn, int batchSize) { try { Record record; + StringBuilder sb = new StringBuilder(); + long recordIndex = 1; while ((record = lineReceiver.getFromReader()) != null) { - String jsonData = recordToString(record); + if (batchSize == 1) { + String jsonData = recordToString(record); + LOG.debug(">>> " + jsonData); + conn.insertOpentsdbJson(jsonData); + } else if (recordIndex % batchSize == 1) { + sb.append("[").append(recordToString(record)).append(","); + } else if (recordIndex % batchSize == 0) { + sb.append(recordToString(record)).append("]"); + String jsonData = sb.toString(); + LOG.debug(">>> " + jsonData); + conn.insertOpentsdbJson(jsonData); + sb.delete(0, sb.length()); + } else { + sb.append(recordToString(record)).append(","); + } + recordIndex++; + } + if (sb.length() != 0 && sb.charAt(0) == '[') { + String jsonData = sb.deleteCharAt(sb.length() - 1).append("]").toString(); LOG.debug(">>> " + jsonData); conn.insertOpentsdbJson(jsonData); } From ab526ca5c4d4c846277224cb122fec581d3f368c Mon Sep 17 00:00:00 2001 From: zyyang Date: Tue, 19 Oct 2021 15:58:47 +0800 Subject: [PATCH 09/33] tdenginewriter --- .../datax/plugin/writer/TDengineWriter.java | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/TDengineWriter.java b/tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/TDengineWriter.java index 60c76522..9dc42d9d 100644 --- a/tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/TDengineWriter.java +++ b/tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/TDengineWriter.java @@ -45,7 +45,6 @@ public class TDengineWriter extends Writer { public static class Task extends Writer.Task { private static final Logger LOG = LoggerFactory.getLogger(Job.class); - private static final String NEWLINE_FLAG = System.getProperty("line.separator", "\n"); private Configuration writerSliceConfig; @Override @@ -70,25 +69,30 @@ public class TDengineWriter extends Writer { String peerPluginName = this.writerSliceConfig.getString(PEER_PLUGIN_NAME); if (peerPluginName.equals("opentsdbreader")) { // opentsdb json protocol use JNI and schemaless API to write - String host = properties.getProperty(Key.HOST); int port = Integer.parseInt(properties.getProperty(Key.PORT)); String dbname = properties.getProperty(Key.DBNAME); String user = properties.getProperty(Key.USER); String password = properties.getProperty(Key.PASSWORD); + JniConnection conn = null; try { - JniConnection conn = new JniConnection(properties); + conn = new JniConnection(properties); conn.open(host, port, dbname, user, password); LOG.info("TDengine connection established, host: " + host + ", port: " + port + ", dbname: " + dbname + ", user: " + user); - int batchSize = Integer.parseInt(properties.getProperty(Key.BATCH_SIZE, DEFAULT_BATCH_SIZE)); writeOpentsdb(lineReceiver, conn, batchSize); - conn.close(); - LOG.info("TDengine connection closed"); } catch (Exception e) { LOG.error(e.getMessage()); e.printStackTrace(); + } finally { + try { + if (conn != null) + conn.close(); + } catch (Exception e) { + e.printStackTrace(); + } + LOG.info("TDengine connection closed"); } } else { // other @@ -132,7 +136,7 @@ public class TDengineWriter extends Writer { private String recordToString(Record record) { int recordLength = record.getColumnNumber(); if (0 == recordLength) { - return NEWLINE_FLAG; + return ""; } Column column; StringBuilder sb = new StringBuilder(); @@ -141,7 +145,6 @@ public class TDengineWriter extends Writer { sb.append(column.asString()).append("\t"); } sb.setLength(sb.length() - 1); - sb.append(NEWLINE_FLAG); return sb.toString(); } } From 7d9543105de02c68a26f088a13024994452c25ca Mon Sep 17 00:00:00 2001 From: zyyang Date: Wed, 20 Oct 2021 14:21:50 +0800 Subject: [PATCH 10/33] data handle use stratgy pattern --- core/src/main/job/stream2tdengine.json | 56 +++++++++++ .../com/alibaba/datax/core/EngineTest.java | 3 +- .../datax/plugin/writer/DataHandler.java | 10 ++ .../plugin/writer/DataHandlerFactory.java | 10 ++ .../plugin/writer/DefaultDataHandler.java | 34 +++++++ .../plugin/writer/OpentsdbDataHandler.java | 98 +++++++++++++++++++ .../datax/plugin/writer/TDengineWriter.java | 91 ++--------------- 7 files changed, 218 insertions(+), 84 deletions(-) create mode 100644 core/src/main/job/stream2tdengine.json create mode 100644 tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/DataHandler.java create mode 100644 tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/DataHandlerFactory.java create mode 100644 tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/DefaultDataHandler.java create mode 100644 tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/OpentsdbDataHandler.java diff --git a/core/src/main/job/stream2tdengine.json b/core/src/main/job/stream2tdengine.json new file mode 100644 index 00000000..6af68323 --- /dev/null +++ b/core/src/main/job/stream2tdengine.json @@ -0,0 +1,56 @@ +{ + "job": { + "content": [ + { + "reader": { + "name": "streamreader", + "parameter": { + "column": [ + { + "random": "2021-01-01 00:00:00, 2021-01-01 23:59:59", + "type": "date" + }, + { + "random": "0, 10000", + "type": "long" + }, + { + "random": "0, 10", + "type": "string" + }, + { + "random": "0, 5", + "type": "bool" + }, + { + "random": "0, 10", + "type": "double" + }, + { + "random": "0, 10", + "type": "bytes" + } + ], + "sliceRecordCount": 100 + } + }, + "writer": { + "name": "tdenginewriter", + "parameter": { + "host": "192.168.56.105", + "port": 6030, + "dbname": "test", + "user": "root", + "password": "taosdata", + "batchSize": 1000 + } + } + } + ], + "setting": { + "speed": { + "channel": 1 + } + } + } +} \ No newline at end of file diff --git a/core/src/test/java/com/alibaba/datax/core/EngineTest.java b/core/src/test/java/com/alibaba/datax/core/EngineTest.java index bbc5bdc5..92c50a77 100644 --- a/core/src/test/java/com/alibaba/datax/core/EngineTest.java +++ b/core/src/test/java/com/alibaba/datax/core/EngineTest.java @@ -8,7 +8,8 @@ public class EngineTest { @Test public void test() { System.out.println(System.getProperty("java.library.path")); - String[] params = {"-mode", "standalone", "-jobid", "-1", "-job", "src/main/job/opentsdb2tdengine.json"}; +// String[] params = {"-mode", "standalone", "-jobid", "-1", "-job", "src/main/job/opentsdb2tdengine.json"}; + String[] params = {"-mode", "standalone", "-jobid", "-1", "-job", "src/main/job/stream2tdengine.json"}; System.setProperty("datax.home", "../target/datax/datax"); try { Engine.entry(params); diff --git a/tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/DataHandler.java b/tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/DataHandler.java new file mode 100644 index 00000000..94d1db30 --- /dev/null +++ b/tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/DataHandler.java @@ -0,0 +1,10 @@ +package com.alibaba.datax.plugin.writer; + +import com.alibaba.datax.common.plugin.RecordReceiver; + +import java.util.Properties; + +public interface DataHandler { + + long handle(RecordReceiver lineReceiver, Properties properties); +} diff --git a/tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/DataHandlerFactory.java b/tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/DataHandlerFactory.java new file mode 100644 index 00000000..a488e7d5 --- /dev/null +++ b/tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/DataHandlerFactory.java @@ -0,0 +1,10 @@ +package com.alibaba.datax.plugin.writer; + +public class DataHandlerFactory { + + public static DataHandler build(String peerPluginName) { + if (peerPluginName.equals("opentsdbreader")) + return new OpentsdbDataHandler(); + return new DefaultDataHandler(); + } +} diff --git a/tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/DefaultDataHandler.java b/tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/DefaultDataHandler.java new file mode 100644 index 00000000..a1d52d75 --- /dev/null +++ b/tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/DefaultDataHandler.java @@ -0,0 +1,34 @@ +package com.alibaba.datax.plugin.writer; + +import com.alibaba.datax.common.element.Column; +import com.alibaba.datax.common.element.Record; +import com.alibaba.datax.common.plugin.RecordReceiver; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.Properties; + +public class DefaultDataHandler implements DataHandler { + private static final Logger LOG = LoggerFactory.getLogger(DefaultDataHandler.class); + + @Override + public long handle(RecordReceiver lineReceiver, Properties properties) { + long count = 0; + Record record; + while ((record = lineReceiver.getFromReader()) != null) { + + int recordLength = record.getColumnNumber(); + StringBuilder sb = new StringBuilder(); + for (int i = 0; i < recordLength; i++) { + Column column = record.getColumn(i); + sb.append(column.asString()).append("\t"); + } + sb.setLength(sb.length() - 1); + LOG.debug(sb.toString()); + + count++; + } + return count; + } + +} \ No newline at end of file diff --git a/tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/OpentsdbDataHandler.java b/tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/OpentsdbDataHandler.java new file mode 100644 index 00000000..599e5f3e --- /dev/null +++ b/tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/OpentsdbDataHandler.java @@ -0,0 +1,98 @@ +package com.alibaba.datax.plugin.writer; + +import com.alibaba.datax.common.element.Column; +import com.alibaba.datax.common.element.Record; +import com.alibaba.datax.common.exception.DataXException; +import com.alibaba.datax.common.plugin.RecordReceiver; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.Properties; + +public class OpentsdbDataHandler implements DataHandler { + private static final Logger LOG = LoggerFactory.getLogger(OpentsdbDataHandler.class); + private static final String DEFAULT_BATCH_SIZE = "1"; + + @Override + public long handle(RecordReceiver lineReceiver, Properties properties) { + // opentsdb json protocol use JNI and schemaless API to write + String host = properties.getProperty(Key.HOST); + int port = Integer.parseInt(properties.getProperty(Key.PORT)); + String dbname = properties.getProperty(Key.DBNAME); + String user = properties.getProperty(Key.USER); + String password = properties.getProperty(Key.PASSWORD); + + JniConnection conn = null; + long count = 0; + try { + conn = new JniConnection(properties); + conn.open(host, port, dbname, user, password); + LOG.info("TDengine connection established, host: " + host + ", port: " + port + ", dbname: " + dbname + ", user: " + user); + int batchSize = Integer.parseInt(properties.getProperty(Key.BATCH_SIZE, DEFAULT_BATCH_SIZE)); + count = writeOpentsdb(lineReceiver, conn, batchSize); + } catch (Exception e) { + LOG.error(e.getMessage()); + e.printStackTrace(); + } finally { + try { + if (conn != null) + conn.close(); + } catch (Exception e) { + e.printStackTrace(); + } + LOG.info("TDengine connection closed"); + } + + return count; + } + + private long writeOpentsdb(RecordReceiver lineReceiver, JniConnection conn, int batchSize) { + long recordIndex = 1; + try { + Record record; + StringBuilder sb = new StringBuilder(); + while ((record = lineReceiver.getFromReader()) != null) { + if (batchSize == 1) { + String jsonData = recordToString(record); + LOG.debug(">>> " + jsonData); + conn.insertOpentsdbJson(jsonData); + } else if (recordIndex % batchSize == 1) { + sb.append("[").append(recordToString(record)).append(","); + } else if (recordIndex % batchSize == 0) { + sb.append(recordToString(record)).append("]"); + String jsonData = sb.toString(); + LOG.debug(">>> " + jsonData); + conn.insertOpentsdbJson(jsonData); + sb.delete(0, sb.length()); + } else { + sb.append(recordToString(record)).append(","); + } + recordIndex++; + } + if (sb.length() != 0 && sb.charAt(0) == '[') { + String jsonData = sb.deleteCharAt(sb.length() - 1).append("]").toString(); + LOG.debug(">>> " + jsonData); + conn.insertOpentsdbJson(jsonData); + } + } catch (Exception e) { + LOG.error("TDengineWriter ERROR: " + e.getMessage()); + throw DataXException.asDataXException(TDengineWriterErrorCode.RUNTIME_EXCEPTION, e); + } + return recordIndex - 1; + } + + private String recordToString(Record record) { + int recordLength = record.getColumnNumber(); + if (0 == recordLength) { + return ""; + } + Column column; + StringBuilder sb = new StringBuilder(); + for (int i = 0; i < recordLength; i++) { + column = record.getColumn(i); + sb.append(column.asString()).append("\t"); + } + sb.setLength(sb.length() - 1); + return sb.toString(); + } +} diff --git a/tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/TDengineWriter.java b/tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/TDengineWriter.java index 9dc42d9d..84600802 100644 --- a/tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/TDengineWriter.java +++ b/tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/TDengineWriter.java @@ -1,21 +1,20 @@ package com.alibaba.datax.plugin.writer; -import com.alibaba.datax.common.element.Column; -import com.alibaba.datax.common.element.Record; -import com.alibaba.datax.common.exception.DataXException; import com.alibaba.datax.common.plugin.RecordReceiver; import com.alibaba.datax.common.spi.Writer; import com.alibaba.datax.common.util.Configuration; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import java.util.*; +import java.util.ArrayList; +import java.util.List; +import java.util.Properties; +import java.util.Set; public class TDengineWriter extends Writer { private static final String PEER_PLUGIN_NAME = "peerPluginName"; - private static final String DEFAULT_BATCH_SIZE = "1"; public static class Job extends Writer.Job { @@ -67,85 +66,11 @@ public class TDengineWriter extends Writer { } String peerPluginName = this.writerSliceConfig.getString(PEER_PLUGIN_NAME); - if (peerPluginName.equals("opentsdbreader")) { - // opentsdb json protocol use JNI and schemaless API to write - String host = properties.getProperty(Key.HOST); - int port = Integer.parseInt(properties.getProperty(Key.PORT)); - String dbname = properties.getProperty(Key.DBNAME); - String user = properties.getProperty(Key.USER); - String password = properties.getProperty(Key.PASSWORD); - - JniConnection conn = null; - try { - conn = new JniConnection(properties); - conn.open(host, port, dbname, user, password); - LOG.info("TDengine connection established, host: " + host + ", port: " + port + ", dbname: " + dbname + ", user: " + user); - int batchSize = Integer.parseInt(properties.getProperty(Key.BATCH_SIZE, DEFAULT_BATCH_SIZE)); - writeOpentsdb(lineReceiver, conn, batchSize); - } catch (Exception e) { - LOG.error(e.getMessage()); - e.printStackTrace(); - } finally { - try { - if (conn != null) - conn.close(); - } catch (Exception e) { - e.printStackTrace(); - } - LOG.info("TDengine connection closed"); - } - } else { - // other - } + LOG.debug("start to handle record from: " + peerPluginName); + DataHandler handler = DataHandlerFactory.build(peerPluginName); + long records = handler.handle(lineReceiver, properties); + LOG.debug("handle data finished, records: " + records); } - private void writeOpentsdb(RecordReceiver lineReceiver, JniConnection conn, int batchSize) { - try { - Record record; - StringBuilder sb = new StringBuilder(); - long recordIndex = 1; - while ((record = lineReceiver.getFromReader()) != null) { - if (batchSize == 1) { - String jsonData = recordToString(record); - LOG.debug(">>> " + jsonData); - conn.insertOpentsdbJson(jsonData); - } else if (recordIndex % batchSize == 1) { - sb.append("[").append(recordToString(record)).append(","); - } else if (recordIndex % batchSize == 0) { - sb.append(recordToString(record)).append("]"); - String jsonData = sb.toString(); - LOG.debug(">>> " + jsonData); - conn.insertOpentsdbJson(jsonData); - sb.delete(0, sb.length()); - } else { - sb.append(recordToString(record)).append(","); - } - recordIndex++; - } - if (sb.length() != 0 && sb.charAt(0) == '[') { - String jsonData = sb.deleteCharAt(sb.length() - 1).append("]").toString(); - LOG.debug(">>> " + jsonData); - conn.insertOpentsdbJson(jsonData); - } - } catch (Exception e) { - LOG.error("TDengineWriter ERROR: " + e.getMessage()); - throw DataXException.asDataXException(TDengineWriterErrorCode.RUNTIME_EXCEPTION, e); - } - } - - private String recordToString(Record record) { - int recordLength = record.getColumnNumber(); - if (0 == recordLength) { - return ""; - } - Column column; - StringBuilder sb = new StringBuilder(); - for (int i = 0; i < recordLength; i++) { - column = record.getColumn(i); - sb.append(column.asString()).append("\t"); - } - sb.setLength(sb.length() - 1); - return sb.toString(); - } } } From cd6a2f73dae68b44724a422782d2c23f66c3ee67 Mon Sep 17 00:00:00 2001 From: zyyang Date: Tue, 26 Oct 2021 16:09:39 +0800 Subject: [PATCH 11/33] [TD-10725]: add tdenginereader --- core/src/main/job/mysql2tdengine.json | 46 ++++++ core/src/main/job/tdengine2tdengine.json | 38 +++++ package.xml | 41 +++-- pom.xml | 3 +- tdenginereader/doc/tdenginereader.md | 145 ++++++++++++++++++ tdenginereader/pom.xml | 90 +++++++++++ tdenginereader/src/main/assembly/package.xml | 34 ++++ .../datax/plugin/reader/TDengineReader.java | 47 ++++++ tdenginereader/src/main/resources/plugin.json | 9 ++ .../main/resources/plugin_job_template.json | 14 ++ tdenginewriter/pom.xml | 2 +- tdenginewriter/src/main/assembly/package.xml | 2 +- .../datax/plugin/writer/JniConnection.java | 18 +-- 13 files changed, 459 insertions(+), 30 deletions(-) create mode 100644 core/src/main/job/mysql2tdengine.json create mode 100644 core/src/main/job/tdengine2tdengine.json create mode 100644 tdenginereader/doc/tdenginereader.md create mode 100644 tdenginereader/pom.xml create mode 100755 tdenginereader/src/main/assembly/package.xml create mode 100644 tdenginereader/src/main/java/com/alibaba/datax/plugin/reader/TDengineReader.java create mode 100755 tdenginereader/src/main/resources/plugin.json create mode 100644 tdenginereader/src/main/resources/plugin_job_template.json diff --git a/core/src/main/job/mysql2tdengine.json b/core/src/main/job/mysql2tdengine.json new file mode 100644 index 00000000..530ee2b5 --- /dev/null +++ b/core/src/main/job/mysql2tdengine.json @@ -0,0 +1,46 @@ +{ + "job": { + "content": [ + { + "reader": { + "name": "mysqlreader", + "parameter": { + "username": "root", + "password": "root", + "column": [ + "id", + "name" + ], + "splitPk": "db_id", + "connection": [ + { + "table": [ + "test" + ], + "jdbcUrl": [ + "jdbc:mysql://127.0.0.1:3306/database" + ] + } + ] + } + }, + "writer": { + "name": "tdenginewriter", + "parameter": { + "host": "192.168.56.105", + "port": 6030, + "dbname": "test", + "user": "root", + "password": "taosdata", + "batchSize": 1000 + } + } + } + ], + "setting": { + "speed": { + "channel": 1 + } + } + } +} \ No newline at end of file diff --git a/core/src/main/job/tdengine2tdengine.json b/core/src/main/job/tdengine2tdengine.json new file mode 100644 index 00000000..750ae202 --- /dev/null +++ b/core/src/main/job/tdengine2tdengine.json @@ -0,0 +1,38 @@ +{ + "job": { + "content": [ + { + "reader": { + "name": "tdenginereader", + "parameter": { + "host": "192.168.1.82", + "port": 6030, + "db": "test", + "user": "root", + "password": "taosdata", + "sql": "select * from weather", + "beginDateTime": "2021-01-01 00:00:00", + "endDateTime": "2021-01-02 00:00:00", + "splitInterval": "1h" + } + }, + "writer": { + "name": "tdenginewriter", + "parameter": { + "host": "192.168.56.105", + "port": 6030, + "dbname": "test", + "user": "root", + "password": "taosdata", + "batchSize": 1000 + } + } + } + ], + "setting": { + "speed": { + "channel": 1 + } + } + } +} \ No newline at end of file diff --git a/package.xml b/package.xml index a93e945c..1b291a99 100755 --- a/package.xml +++ b/package.xml @@ -103,13 +103,13 @@
datax
- - otsstreamreader/target/datax/ - - **/*.* - - datax - + + otsstreamreader/target/datax/ + + **/*.* + + datax + txtfilereader/target/datax/ @@ -138,7 +138,7 @@ datax - + ftpreader/target/datax/ **/*.* @@ -180,6 +180,13 @@ datax + + tdenginereader/target/datax/ + + **/*.* + + datax + @@ -238,13 +245,6 @@ datax - - tdenginewriter/target/datax/ - - **/*.* - - datax - otswriter/target/datax/ @@ -259,7 +259,7 @@ datax - + oraclewriter/target/datax/ **/*.* @@ -273,7 +273,7 @@ datax - + postgresqlwriter/target/datax/ **/*.* @@ -399,5 +399,12 @@ datax + + tdenginewriter/target/datax/ + + **/*.* + + datax + diff --git a/pom.xml b/pom.xml index cb635ad3..2358e212 100644 --- a/pom.xml +++ b/pom.xml @@ -47,7 +47,7 @@ transformer - + mysqlreader @@ -109,6 +109,7 @@ tdenginewriter + tdenginereader diff --git a/tdenginereader/doc/tdenginereader.md b/tdenginereader/doc/tdenginereader.md new file mode 100644 index 00000000..284b8e6d --- /dev/null +++ b/tdenginereader/doc/tdenginereader.md @@ -0,0 +1,145 @@ +# DataX TDengineReader + +## 1 快速介绍 + +TDengineReader 插件实现了 TDengine 读取数据的功能。 + +## 2 实现原理 + +TDengineReader 通过TDengine的JDBC driver查询获取数据。 + +## 3 功能说明 + +### 3.1 配置样例 + +```json +{ + "job": { + "content": [ + { + "reader": { + "name": "tdenginereader", + "parameter": { + "host": "192.168.1.82", + "port": 6030, + "db": "test", + "user": "root", + "password": "taosdata", + "sql": "select * from weather", + "beginDateTime": "2021-01-01 00:00:00", + "endDateTime": "2021-01-02 00:00:00", + "splitInterval": "1h" + } + }, + "writer": { + "name": "streamwriter", + "parameter": { + "encoding": "UTF-8", + "print": true + } + } + } + ], + "setting": { + "speed": { + "channel": 1 + } + } + } +} +``` + +### 3.2 参数说明 + +* **host** + * 描述:TDengine实例的host。 + * 必选:是
+ * 默认值:无
+* **port** + * 描述:TDengine实例的port。 + * 必选:是
+ * 默认值:无
+* **dbname** + * 描述:目的数据库的名称。 + * 必选:是
+ * 默认值:无
+* **username** + * 描述:TDengine实例的用户名
+ * 必选:是
+ * 默认值:无
+* **password** + * 描述:TDengine实例的密码
+ * 必选:是
+ * 默认值:无
+* **sql** + * 描述:用来筛选迁移数据的sql
+ * 必选:是
+ * 默认值:无
+* **beginDateTime** + * 描述:TDengine实例的密码
+ * 必选:是
+ * 默认值:无
+* **endDateTime** + * 描述:
+ * 必选:是
+ * 默认值:无
+* **splitInterval** + * 描述:按照splitInterval来划分task, 每splitInterval创建一个task
+ * 必选:否
+ * 默认值:1h
+ +### 3.3 类型转换 + + +## 4 性能报告 + +### 4.1 环境准备 + +#### 4.1.1 数据特征 + +建表语句: + +单行记录类似于: + +#### 4.1.2 机器参数 + +* 执行DataX的机器参数为: + 1. cpu: + 2. mem: + 3. net: 千兆双网卡 + 4. disc: DataX 数据不落磁盘,不统计此项 + +* TDengine数据库机器参数为: + 1. cpu: + 2. mem: + 3. net: 千兆双网卡 + 4. disc: + +#### 4.1.3 DataX jvm 参数 + + -Xms1024m -Xmx1024m -XX:+HeapDumpOnOutOfMemoryError + +### 4.2 测试报告 + +#### 4.2.1 单表测试报告 + +| 通道数| DataX速度(Rec/s)|DataX流量(MB/s)| DataX机器网卡流出流量(MB/s)|DataX机器运行负载|DB网卡进入流量(MB/s)|DB运行负载|DB TPS| +|--------| --------|--------|--------|--------|--------|--------|--------| +|1| | | | | | | | +|4| | | | | | | | +|8| | | | | | | | +|16| | | | | | | | +|32| | | | | | | | + +说明: +1. 这里的单表,主键类型为 bigint(20),自增。 +2. batchSize 和 通道个数,对性能影响较大。 + +#### 4.2.4 性能测试小结 + +1. +2. + +## 5 约束限制 + +## FAQ \ No newline at end of file diff --git a/tdenginereader/pom.xml b/tdenginereader/pom.xml new file mode 100644 index 00000000..66c64eaf --- /dev/null +++ b/tdenginereader/pom.xml @@ -0,0 +1,90 @@ + + + + datax-all + com.alibaba.datax + 0.0.1-SNAPSHOT + + 4.0.0 + + tdenginereader + + + 8 + 8 + + + + + com.alibaba.datax + datax-common + ${datax-project-version} + + + slf4j-log4j12 + org.slf4j + + + + + + junit + junit + ${junit-version} + test + + + + + + + + + maven-compiler-plugin + + ${jdk-version} + ${jdk-version} + ${project-sourceEncoding} + + + + maven-assembly-plugin + + + src/main/assembly/package.xml + + datax + + + + dwzip + package + + single + + + + + + + org.apache.maven.plugins + maven-surefire-plugin + 2.12.4 + + + + **/*Test.java + + + + + true + + + + + + + \ No newline at end of file diff --git a/tdenginereader/src/main/assembly/package.xml b/tdenginereader/src/main/assembly/package.xml new file mode 100755 index 00000000..b52f20fb --- /dev/null +++ b/tdenginereader/src/main/assembly/package.xml @@ -0,0 +1,34 @@ + + + + dir + + false + + + src/main/resources + + plugin.json + plugin_job_template.json + + plugin/reader/tdenginereader + + + target/ + + tdenginereader-0.0.1-SNAPSHOT.jar + + plugin/reader/tdenginereader + + + + + + false + plugin/reader/tdenginereader/libs + runtime + + + diff --git a/tdenginereader/src/main/java/com/alibaba/datax/plugin/reader/TDengineReader.java b/tdenginereader/src/main/java/com/alibaba/datax/plugin/reader/TDengineReader.java new file mode 100644 index 00000000..cec88eda --- /dev/null +++ b/tdenginereader/src/main/java/com/alibaba/datax/plugin/reader/TDengineReader.java @@ -0,0 +1,47 @@ +package com.alibaba.datax.plugin.reader; + +import com.alibaba.datax.common.plugin.RecordSender; +import com.alibaba.datax.common.spi.Reader; +import com.alibaba.datax.common.util.Configuration; + +import java.util.List; + +public class TDengineReader extends Reader { + + public static class Job extends Reader.Job { + + @Override + public void init() { + + } + + @Override + public void destroy() { + + } + + @Override + public List split(int adviceNumber) { + return null; + } + } + + public static class Task extends Reader.Task { + + @Override + public void init() { + + } + + @Override + public void destroy() { + + } + + @Override + public void startRead(RecordSender recordSender) { + + } + } + +} diff --git a/tdenginereader/src/main/resources/plugin.json b/tdenginereader/src/main/resources/plugin.json new file mode 100755 index 00000000..dc91982c --- /dev/null +++ b/tdenginereader/src/main/resources/plugin.json @@ -0,0 +1,9 @@ +{ + "name": "tdenginereader", + "class": "com.alibaba.datax.plugin.reader.TDengineReader", + "description": { + "useScene": "data migration from tdengine", + "mechanism": "use JNI to read data from tdengine." + }, + "developer": "zyyang-taosdata" +} \ No newline at end of file diff --git a/tdenginereader/src/main/resources/plugin_job_template.json b/tdenginereader/src/main/resources/plugin_job_template.json new file mode 100644 index 00000000..3e09dffc --- /dev/null +++ b/tdenginereader/src/main/resources/plugin_job_template.json @@ -0,0 +1,14 @@ +{ + "name": "tdenginereader", + "parameter": { + "host": "127.0.0.1", + "port": 6030, + "db": "test", + "user": "root", + "password": "taosdata", + "sql": "select * from weather", + "beginDateTime": "2021-01-01 00:00:00", + "endDateTime": "2021-01-02 00:00:00", + "splitInterval": "1h" + } +} \ No newline at end of file diff --git a/tdenginewriter/pom.xml b/tdenginewriter/pom.xml index 2c294b8f..d658d4a2 100644 --- a/tdenginewriter/pom.xml +++ b/tdenginewriter/pom.xml @@ -11,7 +11,7 @@ com.alibaba.datax.tdenginewriter tdenginewriter - 1.0.0 + 0.0.1-SNAPSHOT 8 diff --git a/tdenginewriter/src/main/assembly/package.xml b/tdenginewriter/src/main/assembly/package.xml index f7a7d0bf..d3b75ea2 100755 --- a/tdenginewriter/src/main/assembly/package.xml +++ b/tdenginewriter/src/main/assembly/package.xml @@ -18,7 +18,7 @@ target/ - tdenginewriter-1.0.0.jar + tdenginewriter-0.0.1-SNAPSHOT.jar plugin/writer/tdenginewriter diff --git a/tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/JniConnection.java b/tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/JniConnection.java index b1670633..3ce786e5 100644 --- a/tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/JniConnection.java +++ b/tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/JniConnection.java @@ -51,11 +51,15 @@ public class JniConnection { if (this.conn == JNI_NULL_POINTER) { throw new Exception("JNI connection is NULL"); } - long code = insertOpentsdbJson(json, this.conn); - if (code != JNI_SUCCESSFUL) { - String errMsg = getErrMsgByCode(code); + + long result = insertOpentsdbJson(json, this.conn); + int errCode = getErrCodeImp(this.conn, result); + if (errCode != JNI_SUCCESSFUL) { + String errMsg = getErrMsgImp(result); + freeResultSetImp(this.conn, result); throw new Exception(errMsg); } + freeResultSetImp(this.conn, result); } public void close() throws Exception { @@ -70,19 +74,13 @@ public class JniConnection { private static native int setOptions(int optionIndex, String optionValue); - private static native String getTsCharset(); - private native long connectImp(String host, int port, String dbName, String user, String password); - private native long executeQueryImp(byte[] sqlBytes, long connection); - private native int getErrCodeImp(long connection, long pSql); private native String getErrMsgImp(long pSql); - private native String getErrMsgByCode(long code); - - private native int getAffectedRowsImp(long connection, long pSql); + private native void freeResultSetImp(long connection, long pSql); private native int closeConnectionImp(long connection); From e60948b604f8887addc806160eb89984bd477333 Mon Sep 17 00:00:00 2001 From: dingbo Date: Thu, 11 Nov 2021 15:24:09 +0800 Subject: [PATCH 12/33] add option authDB to mongodb reader doc --- mongodbreader/doc/mongodbreader.md | 1 + 1 file changed, 1 insertion(+) diff --git a/mongodbreader/doc/mongodbreader.md b/mongodbreader/doc/mongodbreader.md index b61493e6..99d25731 100644 --- a/mongodbreader/doc/mongodbreader.md +++ b/mongodbreader/doc/mongodbreader.md @@ -127,6 +127,7 @@ MongoDBReader通过Datax框架从MongoDB并行的读取数据,通过主控的J * address: MongoDB的数据地址信息,因为MonogDB可能是个集群,则ip端口信息需要以Json数组的形式给出。【必填】 * userName:MongoDB的用户名。【选填】 * userPassword: MongoDB的密码。【选填】 +* authDb: MongoDB认证数据库【选填】 * collectionName: MonogoDB的集合名。【必填】 * column:MongoDB的文档列名。【必填】 * name:Column的名字。【必填】 From 3de5a8f71517268afe77415f71f6b5511d81a8b6 Mon Sep 17 00:00:00 2001 From: dingbo Date: Thu, 11 Nov 2021 17:38:14 +0800 Subject: [PATCH 13/33] add test job configure --- core/src/main/job/mongodb2tdengine.json | 75 +++++++++++++++++++++++++ 1 file changed, 75 insertions(+) create mode 100644 core/src/main/job/mongodb2tdengine.json diff --git a/core/src/main/job/mongodb2tdengine.json b/core/src/main/job/mongodb2tdengine.json new file mode 100644 index 00000000..0667bddd --- /dev/null +++ b/core/src/main/job/mongodb2tdengine.json @@ -0,0 +1,75 @@ +{ + "job": { + "setting": { + "speed": { + "channel": 2 + } + }, + "content": [ + { + "reader": { + "name": "mongodbreader", + "parameter": { + "address": [ + "123.56.104.14:27017" + ], + "userName": "admin678", + "mechanism": "SCRAM-SHA-1", + "userPassword": "huwG86123", + "authDb": "admin", + "dbName": "test", + "collectionName": "cu_market_data", + "column": [ + { + "name": "instrumentID", + "type": "string" + }, + { + "name": "tradeTime", + "type": "date" + }, + { + "name": "lastPrice", + "type": "double" + }, + { + "name": "askPrice1", + "type": "double" + }, + { + "name": "bidPrice1", + "type": "double" + }, + { + "name": "volume", + "type": "int" + } + ] + } + }, + "writer": { + "name": "tdenginewriter", + "parameter": { + "host": "123.56.104.14", + "port": 6030, + "dbname": "test", + "user": "root", + "password": "taosdata", + "measurement": "market_snapshot", + "tag_set": { + "product": "cu", + "instrumentID": 0 + }, + "field_set": { + "lastPrice": 2, + "askPrice1": 3, + "bidPrice1": 4, + "volume": 5 + }, + "timestamp": 1 + } + } + } + ] + } +} \ No newline at end of file From 906cc24ba16a8410cf935a2931b5a313c4446045 Mon Sep 17 00:00:00 2001 From: zyyang Date: Fri, 12 Nov 2021 15:48:24 +0800 Subject: [PATCH 14/33] [TD-10787]: migrate mysql data to tdengine --- core/src/main/job/mysql2tdengine.json | 12 +-- core/src/main/job/opentsdb2stream.json | 31 -------- .../com/alibaba/datax/core/EngineTest.java | 2 +- tdenginewriter/pom.xml | 6 ++ .../datax/plugin/writer/CommonUtil.java | 20 +++++ .../datax/plugin/writer/DataHandler.java | 5 +- .../plugin/writer/DataHandlerFactory.java | 2 + .../plugin/writer/DefaultDataHandler.java | 5 +- .../datax/plugin/writer/MysqlDataHandler.java | 73 +++++++++++++++++++ .../plugin/writer/OpentsdbDataHandler.java | 4 +- .../datax/plugin/writer/TDengineWriter.java | 20 ++--- 11 files changed, 125 insertions(+), 55 deletions(-) delete mode 100644 core/src/main/job/opentsdb2stream.json create mode 100644 tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/CommonUtil.java create mode 100644 tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/MysqlDataHandler.java diff --git a/core/src/main/job/mysql2tdengine.json b/core/src/main/job/mysql2tdengine.json index 530ee2b5..7978fbf5 100644 --- a/core/src/main/job/mysql2tdengine.json +++ b/core/src/main/job/mysql2tdengine.json @@ -6,19 +6,18 @@ "name": "mysqlreader", "parameter": { "username": "root", - "password": "root", + "password": "123456", "column": [ - "id", - "name" + "*" ], - "splitPk": "db_id", + "splitPk": "f1", "connection": [ { "table": [ - "test" + "weather" ], "jdbcUrl": [ - "jdbc:mysql://127.0.0.1:3306/database" + "jdbc:mysql://192.168.56.105:3306/test?useSSL=false&useUnicode=true&characterEncoding=utf8" ] } ] @@ -32,6 +31,7 @@ "dbname": "test", "user": "root", "password": "taosdata", + "table": "weather", "batchSize": 1000 } } diff --git a/core/src/main/job/opentsdb2stream.json b/core/src/main/job/opentsdb2stream.json deleted file mode 100644 index 1ea43204..00000000 --- a/core/src/main/job/opentsdb2stream.json +++ /dev/null @@ -1,31 +0,0 @@ -{ - "job": { - "content": [ - { - "reader": { - "name": "opentsdbreader", - "parameter": { - "endpoint": "http://192.168.1.180:4242", - "column": [ - "weather.temperature" - ], - "beginDateTime": "2021-01-01 00:00:00", - "endDateTime": "2021-01-01 01:00:00" - } - }, - "writer": { - "name": "streamwriter", - "parameter": { - "encoding": "UTF-8", - "print": true - } - } - } - ], - "setting": { - "speed": { - "channel": 1 - } - } - } -} \ No newline at end of file diff --git a/core/src/test/java/com/alibaba/datax/core/EngineTest.java b/core/src/test/java/com/alibaba/datax/core/EngineTest.java index 92c50a77..fe4dff8b 100644 --- a/core/src/test/java/com/alibaba/datax/core/EngineTest.java +++ b/core/src/test/java/com/alibaba/datax/core/EngineTest.java @@ -9,7 +9,7 @@ public class EngineTest { public void test() { System.out.println(System.getProperty("java.library.path")); // String[] params = {"-mode", "standalone", "-jobid", "-1", "-job", "src/main/job/opentsdb2tdengine.json"}; - String[] params = {"-mode", "standalone", "-jobid", "-1", "-job", "src/main/job/stream2tdengine.json"}; + String[] params = {"-mode", "standalone", "-jobid", "-1", "-job", "src/main/job/mysql2tdengine.json"}; System.setProperty("datax.home", "../target/datax/datax"); try { Engine.entry(params); diff --git a/tdenginewriter/pom.xml b/tdenginewriter/pom.xml index d658d4a2..d8df0c52 100644 --- a/tdenginewriter/pom.xml +++ b/tdenginewriter/pom.xml @@ -31,6 +31,12 @@ + + com.taosdata.jdbc + taos-jdbcdriver + 2.0.34 + + junit junit diff --git a/tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/CommonUtil.java b/tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/CommonUtil.java new file mode 100644 index 00000000..573b00f7 --- /dev/null +++ b/tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/CommonUtil.java @@ -0,0 +1,20 @@ +package com.alibaba.datax.plugin.writer; + +import com.alibaba.datax.common.util.Configuration; + +import java.util.Properties; +import java.util.Set; + +public class CommonUtil { + + public static Properties toProperties(Configuration configuration) { + Set keys = configuration.getKeys(); + Properties properties = new Properties(); + for (String key : keys) { + String value = configuration.getString(key); + if (value != null) + properties.setProperty(key, value); + } + return properties; + } +} diff --git a/tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/DataHandler.java b/tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/DataHandler.java index 94d1db30..e7a1328b 100644 --- a/tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/DataHandler.java +++ b/tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/DataHandler.java @@ -1,10 +1,9 @@ package com.alibaba.datax.plugin.writer; import com.alibaba.datax.common.plugin.RecordReceiver; - -import java.util.Properties; +import com.alibaba.datax.common.util.Configuration; public interface DataHandler { - long handle(RecordReceiver lineReceiver, Properties properties); + long handle(RecordReceiver lineReceiver, Configuration properties); } diff --git a/tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/DataHandlerFactory.java b/tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/DataHandlerFactory.java index a488e7d5..26b3e475 100644 --- a/tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/DataHandlerFactory.java +++ b/tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/DataHandlerFactory.java @@ -5,6 +5,8 @@ public class DataHandlerFactory { public static DataHandler build(String peerPluginName) { if (peerPluginName.equals("opentsdbreader")) return new OpentsdbDataHandler(); + if (peerPluginName.equals("mysqlreader")) + return new MysqlDataHandler(); return new DefaultDataHandler(); } } diff --git a/tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/DefaultDataHandler.java b/tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/DefaultDataHandler.java index a1d52d75..54d32d7b 100644 --- a/tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/DefaultDataHandler.java +++ b/tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/DefaultDataHandler.java @@ -3,16 +3,15 @@ package com.alibaba.datax.plugin.writer; import com.alibaba.datax.common.element.Column; import com.alibaba.datax.common.element.Record; import com.alibaba.datax.common.plugin.RecordReceiver; +import com.alibaba.datax.common.util.Configuration; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import java.util.Properties; - public class DefaultDataHandler implements DataHandler { private static final Logger LOG = LoggerFactory.getLogger(DefaultDataHandler.class); @Override - public long handle(RecordReceiver lineReceiver, Properties properties) { + public long handle(RecordReceiver lineReceiver, Configuration configuration) { long count = 0; Record record; while ((record = lineReceiver.getFromReader()) != null) { diff --git a/tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/MysqlDataHandler.java b/tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/MysqlDataHandler.java new file mode 100644 index 00000000..fd52b774 --- /dev/null +++ b/tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/MysqlDataHandler.java @@ -0,0 +1,73 @@ +package com.alibaba.datax.plugin.writer; + +import com.alibaba.datax.common.element.Column; +import com.alibaba.datax.common.element.Record; +import com.alibaba.datax.common.plugin.RecordReceiver; +import com.alibaba.datax.common.util.Configuration; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.sql.Connection; +import java.sql.DriverManager; +import java.sql.SQLException; +import java.util.Properties; + +public class MysqlDataHandler implements DataHandler { + private static final Logger LOG = LoggerFactory.getLogger(MysqlDataHandler.class); + Connection conn; + + @Override + public long handle(RecordReceiver lineReceiver, Configuration configuration) { + Properties properties = CommonUtil.toProperties(configuration); + + long count = 0; + try { + conn = getConnection(properties); + + Record record; + while ((record = lineReceiver.getFromReader()) != null) { + + int recordLength = record.getColumnNumber(); + StringBuilder sb = new StringBuilder(); + for (int i = 0; i < recordLength; i++) { + Column column = record.getColumn(i); + sb.append(column.asString()).append("\t"); + } + sb.setLength(sb.length() - 1); + LOG.debug(sb.toString()); + + count++; + } + + + } finally { + if (conn != null) { + try { + conn.close(); + } catch (SQLException e) { + e.printStackTrace(); + } + } + } + + return count; + } + + private Connection getConnection(Properties properties) { + String host = properties.getProperty(Key.HOST); + int port = Integer.parseInt(properties.getProperty(Key.PORT)); + String dbname = properties.getProperty(Key.DBNAME); + String user = properties.getProperty(Key.USER); + String password = properties.getProperty(Key.PASSWORD); + String url = "jdbc:TAOS://" + host + ":" + port + "/" + dbname + "?user=" + user + "&password=" + password; + Connection connection = null; + try { + connection = DriverManager.getConnection(url, properties); + LOG.info("TDengine connection established, host: " + host + ", port: " + port + ", dbname: " + dbname + ", user: " + user); + } catch (SQLException e) { + LOG.error(e.getMessage()); + e.printStackTrace(); + } + return connection; + } +} diff --git a/tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/OpentsdbDataHandler.java b/tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/OpentsdbDataHandler.java index 599e5f3e..2096909b 100644 --- a/tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/OpentsdbDataHandler.java +++ b/tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/OpentsdbDataHandler.java @@ -4,6 +4,7 @@ import com.alibaba.datax.common.element.Column; import com.alibaba.datax.common.element.Record; import com.alibaba.datax.common.exception.DataXException; import com.alibaba.datax.common.plugin.RecordReceiver; +import com.alibaba.datax.common.util.Configuration; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -14,8 +15,9 @@ public class OpentsdbDataHandler implements DataHandler { private static final String DEFAULT_BATCH_SIZE = "1"; @Override - public long handle(RecordReceiver lineReceiver, Properties properties) { + public long handle(RecordReceiver lineReceiver, Configuration configuration) { // opentsdb json protocol use JNI and schemaless API to write + Properties properties = CommonUtil.toProperties(configuration); String host = properties.getProperty(Key.HOST); int port = Integer.parseInt(properties.getProperty(Key.PORT)); String dbname = properties.getProperty(Key.DBNAME); diff --git a/tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/TDengineWriter.java b/tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/TDengineWriter.java index 84600802..73087acb 100644 --- a/tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/TDengineWriter.java +++ b/tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/TDengineWriter.java @@ -9,13 +9,19 @@ import org.slf4j.LoggerFactory; import java.util.ArrayList; import java.util.List; -import java.util.Properties; -import java.util.Set; public class TDengineWriter extends Writer { private static final String PEER_PLUGIN_NAME = "peerPluginName"; + static { + try { + Class.forName("com.taosdata.jdbc.TSDBDriver"); + } catch (ClassNotFoundException e) { + e.printStackTrace(); + } + } + public static class Job extends Writer.Job { private Configuration originalConfig; @@ -49,6 +55,7 @@ public class TDengineWriter extends Writer { @Override public void init() { this.writerSliceConfig = getPluginJobConf(); + } @Override @@ -58,17 +65,10 @@ public class TDengineWriter extends Writer { @Override public void startWrite(RecordReceiver lineReceiver) { - Set keys = this.writerSliceConfig.getKeys(); - Properties properties = new Properties(); - for (String key : keys) { - String value = this.writerSliceConfig.getString(key); - properties.setProperty(key, value); - } - String peerPluginName = this.writerSliceConfig.getString(PEER_PLUGIN_NAME); LOG.debug("start to handle record from: " + peerPluginName); DataHandler handler = DataHandlerFactory.build(peerPluginName); - long records = handler.handle(lineReceiver, properties); + long records = handler.handle(lineReceiver, writerSliceConfig); LOG.debug("handle data finished, records: " + records); } From bf01999222ff86d0275c1f7112db9846e814b729 Mon Sep 17 00:00:00 2001 From: dingbo Date: Thu, 18 Nov 2021 10:28:20 +0800 Subject: [PATCH 15/33] mongodb2tdengine support --- core/src/main/job/mongodb2tdengine.json | 11 +- tdenginewriter/doc/tdenginewriter.md | 260 ++++++++++++++---- tdenginewriter/pom.xml | 11 +- .../plugin/writer/DefaultDataHandler.java | 34 --- .../{ => tdenginewriter}/DataHandler.java | 2 +- .../DataHandlerFactory.java | 2 +- .../tdenginewriter/DefaultDataHandler.java | 101 +++++++ .../tdenginewriter/JDBCBatchWriter.java | 149 ++++++++++ .../{ => tdenginewriter}/JniConnection.java | 2 +- .../writer/{ => tdenginewriter}/Key.java | 7 +- .../OpentsdbDataHandler.java | 2 +- .../writer/tdenginewriter/SchemaManager.java | 255 +++++++++++++++++ .../{ => tdenginewriter}/TDengineWriter.java | 10 +- .../TDengineWriterErrorCode.java | 5 +- ...gin_writer_tdenginewriter_JniConnection.h} | 0 tdenginewriter/src/main/resources/plugin.json | 4 +- .../JniConnectionTest.java | 2 +- .../tdenginewriter/TDengineWriterTest.java | 21 ++ 18 files changed, 775 insertions(+), 103 deletions(-) delete mode 100644 tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/DefaultDataHandler.java rename tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/{ => tdenginewriter}/DataHandler.java (77%) rename tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/{ => tdenginewriter}/DataHandlerFactory.java (81%) create mode 100644 tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/tdenginewriter/DefaultDataHandler.java create mode 100644 tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/tdenginewriter/JDBCBatchWriter.java rename tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/{ => tdenginewriter}/JniConnection.java (98%) rename tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/{ => tdenginewriter}/Key.java (52%) rename tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/{ => tdenginewriter}/OpentsdbDataHandler.java (98%) create mode 100644 tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/tdenginewriter/SchemaManager.java rename tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/{ => tdenginewriter}/TDengineWriter.java (84%) rename tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/{ => tdenginewriter}/TDengineWriterErrorCode.java (75%) rename tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/{com_alibaba_datax_plugin_writer_JniConnection.h => tdenginewriter/com_alibaba_datax_plugin_writer_tdenginewriter_JniConnection.h} (100%) rename tdenginewriter/src/test/java/com/alibaba/datax/plugin/writer/{ => tdenginewriter}/JniConnectionTest.java (90%) create mode 100644 tdenginewriter/src/test/java/com/alibaba/datax/plugin/writer/tdenginewriter/TDengineWriterTest.java diff --git a/core/src/main/job/mongodb2tdengine.json b/core/src/main/job/mongodb2tdengine.json index 0667bddd..4cfc987e 100644 --- a/core/src/main/job/mongodb2tdengine.json +++ b/core/src/main/job/mongodb2tdengine.json @@ -55,18 +55,21 @@ "dbname": "test", "user": "root", "password": "taosdata", - "measurement": "market_snapshot", - "tag_set": { + "stable": "market_snapshot", + "batchSize": 35, + "tagColumn": { "product": "cu", "instrumentID": 0 }, - "field_set": { + "fieldColumn": { "lastPrice": 2, "askPrice1": 3, "bidPrice1": 4, "volume": 5 }, - "timestamp": 1 + "timestampColumn": { + "tradeTime": 1 + } } } } diff --git a/tdenginewriter/doc/tdenginewriter.md b/tdenginewriter/doc/tdenginewriter.md index 8e55b189..c9c222a2 100644 --- a/tdenginewriter/doc/tdenginewriter.md +++ b/tdenginewriter/doc/tdenginewriter.md @@ -2,20 +2,21 @@ ## 1 快速介绍 -TDengineWriter 插件实现了写入数据到 TDengine 的功能。 在底层实现上, TDengineWriter 通过 JNI的方式调用libtaos.so/tao.dll中的方法,连接 TDengine -数据库实例,并执行schemaless的写入。 TDengineWriter 面向ETL开发工程师,他们使用 TDengineWriter 从数仓导入数据到 TDengine。同时,TDengineWriter -亦可以作为数据迁移工具为DBA等用户提供服务。 +TDengineWriter插件实现了写入数据到TDengine数据库功能。可用于离线同步其它数据库的数据到TDengine。 ## 2 实现原理 -TDengineWriter 通过 DataX 框架获取 Reader -生成的协议数据,根据reader的类型解析数据,通过JNI方式调用libtaos.so(或taos.dll)中的方法,使用schemaless的方式写入到TDengine。 +TDengineWriter 通过 DataX 框架获取 Reader生成的协议数据,根据reader的类型解析数据。目前有两种写入方式: + +1. 对于OpenTSDBReader, TDengineWriter通过JNI方式调用TDengine客户端库文件(taos.lib或taos.dll)中的方法,使用[schemaless的方式](https://www.taosdata.com/cn/documentation/insert#schemaless)写入。 + +2. 对于其它数据源,会根据配置生成SQL语句, 通过[taos-jdbcdriver](https://www.taosdata.com/cn/documentation/connector/java)批量写入。 + +这样区分的原因是OpenTSDBReader将opentsdb的数据统一读取为json字符串,Writer端接收到的数据只有1列。而其它Reader插件一般会把数据放在不同列。 ## 3 功能说明 - -### 3.1 配置样例 - -* 这里使用一份从OpenTSDB产生到 TDengine 导入的数据。 +### 3.1 从OpenTSDB到TDengine +#### 3.1.1 配置样例 ```json { @@ -54,46 +55,189 @@ TDengineWriter 通过 DataX 框架获取 Reader } ``` -### 3.2 参数说明 +#### 3.1.2 参数说明 -* **host** - * 描述:TDengine实例的host。 +| 参数 | 描述 | 是否必选 | 默认值 | +| --------- | -------------------- | -------- | -------- | +| host | TDengine实例的host | 是 | 无 | +| port | TDengine实例的port | 是 | 无 | +| user | TDengine实例的用户名 | 否 | root | +| password | TDengine实例的密码 | 否 | taosdata | +| dbname | 目的数据库的名称 | 是 | 无 | +| batchSize | 每次批量插入多少记录 | 否 | 1 | - * 必选:是
- * 默认值:无
-* **port** - * 描述:TDengine实例的port。 - * 必选:是
- * 默认值:无
-* **dbname** - * 描述:目的数据库的名称。 +#### 3.1.3 类型转换 - * 必选:是
+目前,由于OpenTSDBReader将opentsdb的数据统一读取为json字符串,TDengineWriter 在做Opentsdb到TDengine的迁移时,按照以下类型进行处理: - * 默认值:无
-* **username** - * 描述:TDengine实例的用户名
- * 必选:是
- * 默认值:无
-* **password** - * 描述:TDengine实例的密码
- * 必选:是
- * 默认值:无
- -### 3.3 类型转换 - -目前,由于opentsdbreader将opentsdb的数据统一读取为json字符串,TDengineWriter 在做Opentsdb到TDengine的迁移时,按照以下类型进行处理: - -| OpenTSDB数据类型 | DataX 内部类型| TDengine 数据类型 | -| -------- | ----- | -------- | +| OpenTSDB数据类型 | DataX 内部类型 | TDengine 数据类型 | +| ---------------- | -------------- | ----------------- | | timestamp | Date | timestamp | | Integer(value) | Double | double | -| Float(value) | Double | double | -| String(value) | String | binary | +| Float(value) | Double | double | +| String(value) | String | binary | | Integer(tag) | String | binary | -| Float(tag) | String |binary | -| String(tag) | String |binary | +| Float(tag) | String | binary | +| String(tag) | String | binary | + +### 3.2 从MongoDB到TDengine + +#### 3.2.1 配置样例 +```json +{ + "job": { + "setting": { + "speed": { + "channel": 2 + } + }, + "content": [ + { + "reader": { + "name": "mongodbreader", + "parameter": { + "address": [ + "127.0.0.1:27017" + ], + "userName": "user", + "mechanism": "SCRAM-SHA-1", + "userPassword": "password", + "authDb": "admin", + "dbName": "test", + "collectionName": "stock", + "column": [ + { + "name": "stockID", + "type": "string" + }, + { + "name": "tradeTime", + "type": "date" + }, + { + "name": "lastPrice", + "type": "double" + }, + { + "name": "askPrice1", + "type": "double" + }, + { + "name": "bidPrice1", + "type": "double" + }, + { + "name": "volume", + "type": "int" + } + ] + } + }, + "writer": { + "name": "tdenginewriter", + "parameter": { + "host": "localhost", + "port": 6030, + "dbname": "test", + "user": "root", + "password": "taosdata", + "stable": "stock", + "tagColumn": { + "industry": "energy", + "stockID": 0 + }, + "fieldColumn": { + "lastPrice": 2, + "askPrice1": 3, + "bidPrice1": 4, + "volume": 5 + }, + "timestampColumn": { + "tradeTime": 1 + } + } + } + } + ] + } +} +``` + +**注:本配置的writer部分同样适用于关系型数据库** + + +#### 3.2.2 参数说明 +| 参数 | 描述 | 是否必选 | 默认值 | 备注 | +| --------------- | -------------------- | ---------------- | -------- | ------------------ | +| host | TDengine实例的host | 是 | 无 | +| port | TDengine实例的port | 是 | 无 | +| user | TDengine实例的用户名 | 否 | root | +| password | TDengine实例的密码 | 否 | taosdata | +| dbname | 目的数据库的名称 | 是 | 无 | +| batchSize | 每次批量插入多少记录 | 否 | 1000 | +| stable | 目标超级表的名称 | 是(OpenTSDB除外) | 无 | +| tagColumn | 标签列的列名和位置 | 否 | 无 | 位置索引均从0开始 | +| fieldColumn | 字段列的列名和位置 | 否 | 无 | | +| timestampColumn | 时间戳列的列名和位置 | 否 | 无 | 时间戳列只能有一个 | + +#### 3.3.3 自动建表规则 +##### 3.3.3.1 超级表创建规则 + +如果配置了tagColumn、 fieldColumn和timestampColumn将会在插入第一条数据前,自动创建超级表。
+数据列的类型从第1条记录自动推断, 标签列默认类型为`NCHAR(64)`, 比如示例配置,可能生成以下建表语句: + +```sql +CREATE STABLE IF NOT EXISTS market_snapshot ( + tadetime TIMESTAMP, + lastprice DOUBLE, + askprice1 DOUBLE, + bidprice1 DOUBLE, + volume INT +) +TAGS( + industry NCHAR(64), + stockID NCHAR(64 +); +``` + +##### 3.3.3.2 子表创建规则 + +子表结果与超表相同,子表表名生成规则: +1. 将标签的value 组合成为如下的字符串: `tag_value1!tag_value2!tag_value3`。 +2. 计算该字符串的 MD5 散列值 "md5_val"。 +3. "t_md5val"作为子表名。其中的 "t" 是固定的前缀。 + +#### 3.3.4 用户提前建表 + +如果你已经创建好目标超级表,那么tagColumn、 fieldColumn和timestampColumn三个字段均可省略, 插件将通过执行通过`describe stableName`获取表结构的信息。 +此时要求接收到的Record中Column的顺序和执行`describe stableName`返回的列顺序相同, 比如通过`describe stableName`返回以下内容: +``` + Field | Type | Length | Note | +================================================================================= + ts | TIMESTAMP | 8 | | + current | DOUBLE | 8 | | + location | BINARY | 10 | TAG | +``` +那么插件收到的数据第1列必须代表时间戳,第2列必须代表电流,第3列必须代表位置。 + +#### 3.3.5 注意事项 + +1. tagColumn、 fieldColumn和timestampColumn三个字段用于描述目标表的结构信息,这三个配置字段必须同时存在或同时省略。 +2. 如果存在以上三个配置,且目标表也已经存在,则两者必须一致。**一致性**由用户自己保证,插件不做检查。不一致可能会导致插入失败或插入数据错乱。 +3. 插件优先使用配置文件中指定的表结构。 + +#### 3.3.6 类型转换 + +| MongoDB 数据类型 | DataX 内部类型 | TDengine 数据类型 | +| ---------------- | -------------- | ----------------- | +| int, Long | Long | BIGINT | +| double | Double | DOUBLE | +| string, array | String | NCHAR(64) | +| date | Date | TIMESTAMP | +| boolean | Boolean | BOOL | +| bytes | Bytes | BINARY | + ## 4 性能报告 @@ -127,13 +271,13 @@ TDengineWriter 通过 DataX 框架获取 Reader #### 4.2.1 单表测试报告 -| 通道数| DataX速度(Rec/s)|DataX流量(MB/s)| DataX机器网卡流出流量(MB/s)|DataX机器运行负载|DB网卡进入流量(MB/s)|DB运行负载|DB TPS| -|--------| --------|--------|--------|--------|--------|--------|--------| -|1| | | | | | | | -|4| | | | | | | | -|8| | | | | | | | -|16| | | | | | | | -|32| | | | | | | | +| 通道数 | DataX速度(Rec/s) | DataX流量(MB/s) | DataX机器网卡流出流量(MB/s) | DataX机器运行负载 | DB网卡进入流量(MB/s) | DB运行负载 | DB TPS | +| ------ | ---------------- | --------------- | --------------------------- | ----------------- | -------------------- | ---------- | ------ | +| 1 | | | | | | | | +| 4 | | | | | | | | +| 8 | | | | | | | | +| 16 | | | | | | | | +| 32 | | | | | | | | 说明: @@ -143,9 +287,23 @@ TDengineWriter 通过 DataX 框架获取 Reader #### 4.2.4 性能测试小结 -1. -2. ## 5 约束限制 -## FAQ \ No newline at end of file +## FAQ + +### 如何选取要同步的数据的范围? + +数据范围的选取在Reader插件端配置,对于不同的Reader插件配置方法往往不同。比如对于mysqlreader, 可以用sql语句指定数据范围。对于opentsdbreader, 用beginDateTime和endDateTime两个配置项指定数据范围。 + +### 如何一次导入多张源表? + +如果Reader插件支持一次读多张表,Writer插件就能一次导入多张表。如果Reader不支持多多张表,可以建多个job,分别导入。Writer插件只负责写数据。 + +### 1张源表导入之后对应TDengine中多少张表? + +这是又tagColumn决定的,如果所有tag列的值都相同,目标表也只有一个。源表有多少不同的tag组合,目标超表就会有多少子表。 + +### 源表和目标表的字段顺序一致吗? + +TDengine要求每个表第一列是时间戳列,后边是普通字段,最后是标签列。如果源表不是这个顺序,插件在自动建表是自动调整。 \ No newline at end of file diff --git a/tdenginewriter/pom.xml b/tdenginewriter/pom.xml index d658d4a2..8eb94b33 100644 --- a/tdenginewriter/pom.xml +++ b/tdenginewriter/pom.xml @@ -19,6 +19,11 @@
+ + com.taosdata.jdbc + taos-jdbcdriver + 2.0.34 + com.alibaba.datax datax-common @@ -37,7 +42,11 @@ ${junit-version} test - + + org.apache.commons + commons-lang3 + ${commons-lang3-version} + diff --git a/tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/DefaultDataHandler.java b/tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/DefaultDataHandler.java deleted file mode 100644 index a1d52d75..00000000 --- a/tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/DefaultDataHandler.java +++ /dev/null @@ -1,34 +0,0 @@ -package com.alibaba.datax.plugin.writer; - -import com.alibaba.datax.common.element.Column; -import com.alibaba.datax.common.element.Record; -import com.alibaba.datax.common.plugin.RecordReceiver; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import java.util.Properties; - -public class DefaultDataHandler implements DataHandler { - private static final Logger LOG = LoggerFactory.getLogger(DefaultDataHandler.class); - - @Override - public long handle(RecordReceiver lineReceiver, Properties properties) { - long count = 0; - Record record; - while ((record = lineReceiver.getFromReader()) != null) { - - int recordLength = record.getColumnNumber(); - StringBuilder sb = new StringBuilder(); - for (int i = 0; i < recordLength; i++) { - Column column = record.getColumn(i); - sb.append(column.asString()).append("\t"); - } - sb.setLength(sb.length() - 1); - LOG.debug(sb.toString()); - - count++; - } - return count; - } - -} \ No newline at end of file diff --git a/tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/DataHandler.java b/tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/tdenginewriter/DataHandler.java similarity index 77% rename from tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/DataHandler.java rename to tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/tdenginewriter/DataHandler.java index 94d1db30..686ac27b 100644 --- a/tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/DataHandler.java +++ b/tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/tdenginewriter/DataHandler.java @@ -1,4 +1,4 @@ -package com.alibaba.datax.plugin.writer; +package com.alibaba.datax.plugin.writer.tdenginewriter; import com.alibaba.datax.common.plugin.RecordReceiver; diff --git a/tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/DataHandlerFactory.java b/tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/tdenginewriter/DataHandlerFactory.java similarity index 81% rename from tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/DataHandlerFactory.java rename to tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/tdenginewriter/DataHandlerFactory.java index a488e7d5..1f740d7e 100644 --- a/tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/DataHandlerFactory.java +++ b/tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/tdenginewriter/DataHandlerFactory.java @@ -1,4 +1,4 @@ -package com.alibaba.datax.plugin.writer; +package com.alibaba.datax.plugin.writer.tdenginewriter; public class DataHandlerFactory { diff --git a/tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/tdenginewriter/DefaultDataHandler.java b/tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/tdenginewriter/DefaultDataHandler.java new file mode 100644 index 00000000..733f49c5 --- /dev/null +++ b/tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/tdenginewriter/DefaultDataHandler.java @@ -0,0 +1,101 @@ +package com.alibaba.datax.plugin.writer.tdenginewriter; + +import com.alibaba.datax.common.element.Record; +import com.alibaba.datax.common.plugin.RecordReceiver; +import com.taosdata.jdbc.TSDBPreparedStatement; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.sql.Connection; +import java.sql.DriverManager; +import java.sql.SQLException; +import java.util.Properties; + +/** + * 默认DataHandler + */ +public class DefaultDataHandler implements DataHandler { + private static final Logger LOG = LoggerFactory.getLogger(DefaultDataHandler.class); + + static { + try { + Class.forName("com.taosdata.jdbc.TSDBDriver"); + } catch (ClassNotFoundException e) { + e.printStackTrace(); + } + } + + @Override + public long handle(RecordReceiver lineReceiver, Properties properties) { + SchemaManager schemaManager = new SchemaManager(properties); + if (!schemaManager.configValid()) { + return 0; + } + + try { + Connection conn = getTaosConnection(properties); + if (conn == null) { + return 0; + } + if (schemaManager.shouldGuessSchema()) { + LOG.info("无法从配置文件获取表结构信息,尝试从数据库获取"); + boolean success = schemaManager.getFromDB(conn); + if (!success) { + return 0; + } + } else { + + } + int batchSize = Integer.parseInt(properties.getProperty(Key.BATCH_SIZE, "1000")); + return write(lineReceiver, conn, batchSize, schemaManager); + } catch (Exception e) { + LOG.error("write failed " + e.getMessage()); + e.printStackTrace(); + } + return 0; + } + + + private Connection getTaosConnection(Properties properties) throws SQLException { + // 检查必要参数 + String host = properties.getProperty(Key.HOST); + String port = properties.getProperty(Key.PORT); + String dbname = properties.getProperty(Key.DBNAME); + String user = properties.getProperty(Key.USER); + String password = properties.getProperty(Key.PASSWORD); + if (host == null || port == null || dbname == null || user == null || password == null) { + String keys = String.join(" ", Key.HOST, Key.PORT, Key.DBNAME, Key.USER, Key.PASSWORD); + LOG.error("Required options missing, please check: " + keys); + return null; + } + String jdbcUrl = String.format("jdbc:TAOS://%s:%s/%s?user=%s&password=%s", host, port, dbname, user, password); + LOG.info("TDengine connection established, host:{} port:{} dbname:{} user:{}", host, port, dbname, user); + return DriverManager.getConnection(jdbcUrl); + } + + /** + * 使用SQL批量写入
+ * + * @return 成功写入记录数 + * @throws SQLException + */ + private long write(RecordReceiver lineReceiver, Connection conn, int batchSize, SchemaManager scm) throws SQLException { + Record record = lineReceiver.getFromReader(); + if (record == null) { + return 0; + } + if (scm.shouldCreateTable()) { + scm.createSTable(conn, record); + } + String pq = String.format("INSERT INTO ? USING %s TAGS(%s) (%s) values (%s)", scm.getStable(), scm.getTagValuesPlaceHolder(), scm.getJoinedFieldNames(), scm.getFieldValuesPlaceHolder()); + LOG.info("Prepared SQL: {}", pq); + try (TSDBPreparedStatement stmt = (TSDBPreparedStatement) conn.prepareStatement(pq)) { + JDBCBatchWriter batchWriter = new JDBCBatchWriter(stmt, scm, batchSize); + do { + batchWriter.append(record); + } while ((record = lineReceiver.getFromReader()) != null); + batchWriter.flush(); + return batchWriter.getCount(); + } + } +} \ No newline at end of file diff --git a/tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/tdenginewriter/JDBCBatchWriter.java b/tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/tdenginewriter/JDBCBatchWriter.java new file mode 100644 index 00000000..17023d03 --- /dev/null +++ b/tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/tdenginewriter/JDBCBatchWriter.java @@ -0,0 +1,149 @@ +package com.alibaba.datax.plugin.writer.tdenginewriter; + +import com.alibaba.datax.common.element.Column; +import com.alibaba.datax.common.element.Record; +import com.alibaba.datax.common.exception.DataXException; +import com.taosdata.jdbc.TSDBPreparedStatement; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.sql.SQLException; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.stream.Collectors; + +/** + * 使用JDBC原生写入接口批量写入。
+ * 有两个限制条件导致批量写入的代码逻辑过于复杂,以至于需要开发新的类来封装。
+ * 1. 用户必须提前把需要批量写入的数据搜集到ArrayList中 + * 2. 每批写入的表名必须相同。 + * 这个类的实现逻辑是: + * 1. 先把属于同一子表的Record缓存起来 + * 2. 缓存的数量达到batchSize阈值,自动执行一次批量写入 + * 3. 最后一批数据需要用户手动flush才能写入 + */ +public class JDBCBatchWriter { + public static final Logger LOG = LoggerFactory.getLogger(JDBCBatchWriter.class); + + private TSDBPreparedStatement stmt; + private SchemaManager scm; + private int batchSize; + // 缓存Record, key为tableName + Map> buf = new HashMap<>(); + // 缓存表的标签值, key为tableName + Map tableTagValues = new HashMap<>(); + private long sucCount = 0; + private final int tsColIndex; + private List fieldList; + private Map fieldIndexMap; + + public JDBCBatchWriter(TSDBPreparedStatement stmt, SchemaManager scm, int batchSize) { + this.stmt = stmt; + this.scm = scm; + this.batchSize = batchSize; + this.tsColIndex = scm.getTsColIndex(); + this.fieldList = scm.getFieldList(); + this.fieldIndexMap = scm.getFieldIndexMap(); + } + + + public void append(Record record) throws SQLException { + String[] tagValues = scm.getTagValuesFromRecord(record); + String tableName = scm.computeTableName(tagValues); + if (buf.containsKey(tableName)) { + List lis = buf.get(tableName); + lis.add(record); + if (lis.size() == batchSize) { + executeBatch(tableName); + lis.clear(); + } + } else { + List lis = new ArrayList<>(batchSize); + lis.add(record); + buf.put(tableName, lis); + tableTagValues.put(tableName, tagValues); + } + } + + /** + * 执行单表批量写入 + * + * @param tableName + * @throws SQLException + */ + private void executeBatch(String tableName) throws SQLException { + // 表名 + stmt.setTableName(tableName); + List records = buf.get(tableName); + // 标签 + String[] tagValues = tableTagValues.get(tableName); + LOG.debug("executeBatch {}", String.join(",", tagValues)); + for (int i = 0; i < tagValues.length; ++i) { + stmt.setTagNString(i, tagValues[i]); + } + // 时间戳 + ArrayList tsList = records.stream().map(r -> r.getColumn(tsColIndex).asDate().getTime()).collect(Collectors.toCollection(ArrayList::new)); + stmt.setTimestamp(0, tsList); + // 字段 + Record record = records.get(0); + for (int i = 0; i < fieldList.size(); ) { + String fieldName = fieldList.get(i); + int index = fieldIndexMap.get(fieldName); + Column column = record.getColumn(index); + switch (column.getType()) { + case LONG: + ArrayList lisLong = records.stream().map(r -> r.getColumn(index).asBigInteger().longValue()).collect(Collectors.toCollection(ArrayList::new)); + stmt.setLong(++i, lisLong); + break; + case DOUBLE: + ArrayList lisDouble = records.stream().map(r -> r.getColumn(index).asDouble()).collect(Collectors.toCollection(ArrayList::new)); + stmt.setDouble(++i, lisDouble); + break; + case STRING: + ArrayList lisString = records.stream().map(r -> r.getColumn(index).asString()).collect(Collectors.toCollection(ArrayList::new)); + stmt.setNString(++i, lisString, 64); + break; + case DATE: + ArrayList lisTs = records.stream().map(r -> r.getColumn(index).asBigInteger().longValue()).collect(Collectors.toCollection(ArrayList::new)); + stmt.setTimestamp(++i, lisTs); + break; + case BOOL: + ArrayList lisBool = records.stream().map(r -> r.getColumn(index).asBoolean()).collect(Collectors.toCollection(ArrayList::new)); + stmt.setBoolean(++i, lisBool); + break; + case BYTES: + ArrayList lisBytes = records.stream().map(r -> r.getColumn(index).asString()).collect(Collectors.toCollection(ArrayList::new)); + stmt.setString(++i, lisBytes, 64); + break; + default: + throw DataXException.asDataXException(TDengineWriterErrorCode.TYPE_ERROR, column.getType().toString()); + } + } + // 执行 + stmt.columnDataAddBatch(); + stmt.columnDataExecuteBatch(); + // 更新计数器 + sucCount += records.size(); + } + + /** + * 把缓存的Record全部写入 + */ + public void flush() throws SQLException { + for (String tabName : buf.keySet()) { + if (buf.get(tabName).size() > 0) { + executeBatch(tabName); + } + } + stmt.columnDataCloseBatch(); + } + + /** + * @return 成功写入的数据量 + */ + public long getCount() { + return sucCount; + } +} diff --git a/tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/JniConnection.java b/tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/tdenginewriter/JniConnection.java similarity index 98% rename from tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/JniConnection.java rename to tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/tdenginewriter/JniConnection.java index 3ce786e5..0aabe32a 100644 --- a/tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/JniConnection.java +++ b/tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/tdenginewriter/JniConnection.java @@ -1,4 +1,4 @@ -package com.alibaba.datax.plugin.writer; +package com.alibaba.datax.plugin.writer.tdenginewriter; import java.util.Properties; diff --git a/tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/Key.java b/tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/tdenginewriter/Key.java similarity index 52% rename from tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/Key.java rename to tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/tdenginewriter/Key.java index b240bce4..090a7999 100644 --- a/tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/Key.java +++ b/tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/tdenginewriter/Key.java @@ -1,4 +1,4 @@ -package com.alibaba.datax.plugin.writer; +package com.alibaba.datax.plugin.writer.tdenginewriter; public class Key { public static final String HOST = "host"; @@ -7,5 +7,8 @@ public class Key { public static final String USER = "user"; public static final String PASSWORD = "password"; public static final String BATCH_SIZE = "batchSize"; - + public static final String STABLE = "stable"; + public static final String TAG_COLUMN = "tagColumn"; + public static final String FIELD_COLUMN = "fieldColumn"; + public static final String TIMESTAMP_COLUMN = "timestampColumn"; } diff --git a/tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/OpentsdbDataHandler.java b/tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/tdenginewriter/OpentsdbDataHandler.java similarity index 98% rename from tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/OpentsdbDataHandler.java rename to tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/tdenginewriter/OpentsdbDataHandler.java index 599e5f3e..52f1aa7a 100644 --- a/tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/OpentsdbDataHandler.java +++ b/tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/tdenginewriter/OpentsdbDataHandler.java @@ -1,4 +1,4 @@ -package com.alibaba.datax.plugin.writer; +package com.alibaba.datax.plugin.writer.tdenginewriter; import com.alibaba.datax.common.element.Column; import com.alibaba.datax.common.element.Record; diff --git a/tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/tdenginewriter/SchemaManager.java b/tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/tdenginewriter/SchemaManager.java new file mode 100644 index 00000000..b3d7b7e3 --- /dev/null +++ b/tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/tdenginewriter/SchemaManager.java @@ -0,0 +1,255 @@ +package com.alibaba.datax.plugin.writer.tdenginewriter; + +import com.alibaba.datax.common.element.Column; +import com.alibaba.datax.common.element.Record; +import com.alibaba.datax.common.exception.DataXException; +import org.apache.commons.codec.digest.DigestUtils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.sql.Connection; +import java.sql.ResultSet; +import java.sql.SQLException; +import java.sql.Statement; +import java.util.*; +import java.util.stream.Collectors; + +public class SchemaManager { + private static final Logger LOG = LoggerFactory.getLogger(SchemaManager.class); + + private String stable; // 目标超表名 + private Map fixedTagValue = new HashMap<>(); // 固定标签值 标签名 -> 标签值 + private Map tagIndexMap = new HashMap<>(); // 动态标签值 标签名 -> 列索引 + private Map fieldIndexMap = new HashMap<>(); // 字段名 -> 字段索引 + private String tsColName; // 时间戳列名 + private int tsColIndex = -1; // 时间戳列索引 + private List fieldList = new ArrayList<>(); + private List tagList = new ArrayList<>(); + private boolean canInferSchemaFromConfig = false; + + + public SchemaManager() { + } + + public SchemaManager(Properties properties) { + getFromConfig(properties); + } + + private String mapDataxType(Column.Type type) { + switch (type) { + case LONG: + return "BIGINT"; + case DOUBLE: + return "DOUBLE"; + case STRING: + return "NCHAR(64)"; + case DATE: + return "TIMESTAMP"; + case BOOL: + return "BOOL"; + case BYTES: + return "BINARY"; + default: + throw DataXException.asDataXException(TDengineWriterErrorCode.TYPE_ERROR, type.toString()); + } + } + + public void setStable(String stable) { + stable = stable; + } + + public String getStable() { + return stable; + } + + private void getFromConfig(Properties properties) { + stable = properties.getProperty(Key.STABLE); + if (stable == null) { + LOG.error("配置错误: no stable"); + return; + } + for (Object key : properties.keySet()) { + String k = (String) key; + String v = properties.getProperty(k); + + String[] ps = k.split("\\."); + if (ps.length == 1) { + continue; + } + if (k.startsWith(Key.TAG_COLUMN)) { + String tagName = ps[1]; + try { + Integer tagIndex = Integer.parseInt(v); + this.tagIndexMap.put(tagName, tagIndex); + tagList.add(tagName); + } catch (NumberFormatException e) { + fixedTagValue.put(tagName, v); + tagList.add(tagName); + } + } else if (k.startsWith(Key.FIELD_COLUMN)) { + String fieldName = ps[1]; + Integer fileIndex = Integer.parseInt(v); + fieldIndexMap.put(fieldName, fileIndex); + } else if (k.startsWith(Key.TIMESTAMP_COLUMN)) { + tsColName = ps[1]; + tsColIndex = Integer.parseInt(v); + } + } + List sortedFieldName = fieldIndexMap.entrySet().stream().sorted((x, y) -> x.getValue().compareTo(y.getValue())).map(e -> e.getKey()).collect(Collectors.toList()); + fieldList.addAll(sortedFieldName); // 排序的目的是保证自动建表时列的顺序和输入数据的列的顺序保持一致 + canInferSchemaFromConfig = tsColIndex > -1 && !(fixedTagValue.isEmpty() && tagIndexMap.isEmpty()) && !fieldIndexMap.isEmpty(); + LOG.info("配置文件解析结果:fixedTags=[{}] ,tags=[{}], fields=[{}], tsColName={}, tsIndex={}", String.join(",", fixedTagValue.keySet()), String.join(",", tagIndexMap.keySet()), String.join(",", fieldList), tsColName, tsColIndex); + } + + public boolean shouldGuessSchema() { + return !canInferSchemaFromConfig; + } + + public boolean shouldCreateTable() { + return canInferSchemaFromConfig; + } + + public boolean configValid() { + boolean valid = (tagList.size() > 0 && fieldList.size() > 0 && tsColIndex > -1) || (tagList.size() == 0 && fieldList.size() == 0 && tsColIndex == -1); + if (!valid) { + LOG.error("配置错误. tag_columns,field_columns,timestamp_column必须同时存在或同时省略,当前解析结果: tag_columns: {}, field_columns:{}, timestamp_column:{} tsColIndex:{}", + (fixedTagValue.size() + tagIndexMap.size()), fieldIndexMap.size(), tsColName, tsColIndex); + } + return valid; + } + + /** + * 通过执行`describe dbname.stable`命令,获取表的schema.
+ * describe命名返回有4列内容,分布是:Field,Type,Length,Note
+ * + * @return 成功返回true,如果超表不存在或其他错误则返回false + */ + public boolean getFromDB(Connection conn) { + try { + List stables = getSTables(conn); + if (!stables.contains(stable)) { + LOG.error("超级表{}不存在,无法从数据库获取表结构信息.", stable); + return false; + } + } catch (SQLException e) { + LOG.error(e.getMessage()); + e.printStackTrace(); + return false; + } + try (Statement stmt = conn.createStatement()) { + ResultSet rs = stmt.executeQuery("describe " + stable); + int colIndex = 0; + while (rs.next()) { + String name = rs.getString(1); + String type = rs.getString(2); + String note = rs.getString(4); + if ("TIMESTAMP".equals(type)) { + tsColName = name; + tsColIndex = colIndex; + } else if ("TAG".equals(note)) { + tagIndexMap.put(name, colIndex); + tagList.add(name); + } else { + fieldIndexMap.put(name, colIndex); + fieldList.add(name); + } + colIndex++; + } + LOG.info("从数据库获取的表结构概要:tags=[{}], fields=[{}], tsColName={}, tsIndex={}", String.join(",", tagIndexMap.keySet()), String.join(",", fieldList), tsColName, tsColIndex); + return true; + } catch (SQLException e) { + LOG.error(e.getMessage()); + e.printStackTrace(); + return false; + } + } + + public static List getSTables(Connection conn) throws SQLException { + List stables = new ArrayList<>(); + try (Statement stmt = conn.createStatement()) { + ResultSet rs = stmt.executeQuery("show stables"); + while (rs.next()) { + String name = rs.getString(1); + stables.add(name); + } + } + return stables; + } + + public void createSTable(Connection conn, Record record) throws SQLException { + StringBuilder sb = new StringBuilder(); + sb.append("CREATE STABLE IF NOT EXISTS ").append(stable).append("("); + sb.append(tsColName).append(" ").append("TIMESTAMP,"); + for (String fieldName : fieldList) { + sb.append(fieldName).append(' '); + Column col = record.getColumn(fieldIndexMap.get(fieldName)); + String tdType = mapDataxType(col.getType()); + sb.append(tdType).append(','); + } + sb.deleteCharAt(sb.length() - 1); + sb.append(") TAGS("); + for (String tagName : tagList) { + sb.append(tagName).append(" NCHAR(64),"); + } + sb.deleteCharAt(sb.length() - 1); + sb.append(")"); + String q = sb.toString(); + LOG.info("自动创建超级表:" + q); + try (Statement stmt = conn.createStatement()) { + stmt.execute(q); + } + } + + public String[] getTagValuesFromRecord(Record record) { + String[] tagValues = new String[tagList.size()]; + for (int i = 0; i < tagList.size(); ++i) { + if (fixedTagValue.containsKey(tagList.get(i))) { + tagValues[i] = fixedTagValue.get(tagList.get(i)); + } else { + int tagIndex = tagIndexMap.get(tagList.get(i)); + tagValues[i] = record.getColumn(tagIndex).asString(); + } + } + return tagValues; + } + + public Map getFieldIndexMap() { + return fieldIndexMap; + } + + public List getFieldList() { + return fieldList; + } + + public String getJoinedFieldNames() { + return tsColName + ", " + String.join(", ", fieldList); + } + + public int getTsColIndex() { + return tsColIndex; + } + + public String getTagValuesPlaceHolder() { + return tagList.stream().map(x -> "?").collect(Collectors.joining(",")); + } + + public String getFieldValuesPlaceHolder() { + return "?, " + fieldList.stream().map(x -> "?").collect(Collectors.joining(", ")); + } + + /** + * 计算子表表名 + *
    + *
  1. 将标签的value 组合成为如下的字符串: tag_value1!tag_value2!tag_value3。
  2. + *
  3. 计算该字符串的 MD5 散列值 "md5_val"。
  4. + *
  5. "t_md5val"作为子表名。其中的 "t" 是固定的前缀。
  6. + *
+ * + * @param tagValues + * @return + */ + public String computeTableName(String[] tagValues) { + String s = String.join("!", tagValues); + return "t_" + DigestUtils.md5Hex(s); + } +} diff --git a/tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/TDengineWriter.java b/tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/tdenginewriter/TDengineWriter.java similarity index 84% rename from tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/TDengineWriter.java rename to tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/tdenginewriter/TDengineWriter.java index 84600802..70ea5737 100644 --- a/tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/TDengineWriter.java +++ b/tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/tdenginewriter/TDengineWriter.java @@ -1,4 +1,4 @@ -package com.alibaba.datax.plugin.writer; +package com.alibaba.datax.plugin.writer.tdenginewriter; import com.alibaba.datax.common.plugin.RecordReceiver; @@ -64,7 +64,13 @@ public class TDengineWriter extends Writer { String value = this.writerSliceConfig.getString(key); properties.setProperty(key, value); } - + if (!keys.contains(Key.USER)) { + properties.setProperty(Key.USER, "root"); + } + if (!keys.contains(Key.PASSWORD)) { + properties.setProperty(Key.PASSWORD, "taosdata"); + } + LOG.debug("========================properties==========================\n" + properties.toString()); String peerPluginName = this.writerSliceConfig.getString(PEER_PLUGIN_NAME); LOG.debug("start to handle record from: " + peerPluginName); DataHandler handler = DataHandlerFactory.build(peerPluginName); diff --git a/tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/TDengineWriterErrorCode.java b/tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/tdenginewriter/TDengineWriterErrorCode.java similarity index 75% rename from tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/TDengineWriterErrorCode.java rename to tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/tdenginewriter/TDengineWriterErrorCode.java index 02e87079..994f1e89 100644 --- a/tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/TDengineWriterErrorCode.java +++ b/tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/tdenginewriter/TDengineWriterErrorCode.java @@ -1,9 +1,10 @@ -package com.alibaba.datax.plugin.writer; +package com.alibaba.datax.plugin.writer.tdenginewriter; import com.alibaba.datax.common.spi.ErrorCode; public enum TDengineWriterErrorCode implements ErrorCode { - RUNTIME_EXCEPTION("TDengineWriter-00", "运行时异常"); + RUNTIME_EXCEPTION("TDengineWriter-00", "运行时异常"), + TYPE_ERROR("TDengineWriter-00", "Datax类型无法正确映射到TDengine类型"); private final String code; private final String description; diff --git a/tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/com_alibaba_datax_plugin_writer_JniConnection.h b/tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/tdenginewriter/com_alibaba_datax_plugin_writer_tdenginewriter_JniConnection.h similarity index 100% rename from tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/com_alibaba_datax_plugin_writer_JniConnection.h rename to tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/tdenginewriter/com_alibaba_datax_plugin_writer_tdenginewriter_JniConnection.h diff --git a/tdenginewriter/src/main/resources/plugin.json b/tdenginewriter/src/main/resources/plugin.json index 6c900a15..e54f65ff 100755 --- a/tdenginewriter/src/main/resources/plugin.json +++ b/tdenginewriter/src/main/resources/plugin.json @@ -1,9 +1,9 @@ { "name": "tdenginewriter", - "class": "com.alibaba.datax.plugin.writer.TDengineWriter", + "class": "com.alibaba.datax.plugin.writer.tdenginewriter.TDengineWriter", "description": { "useScene": "data migration to tdengine", - "mechanism": "use JNI to write data to tdengine." + "mechanism": "use JNI or taos-jdbc to write data to tdengine." }, "developer": "zyyang-taosdata" } \ No newline at end of file diff --git a/tdenginewriter/src/test/java/com/alibaba/datax/plugin/writer/JniConnectionTest.java b/tdenginewriter/src/test/java/com/alibaba/datax/plugin/writer/tdenginewriter/JniConnectionTest.java similarity index 90% rename from tdenginewriter/src/test/java/com/alibaba/datax/plugin/writer/JniConnectionTest.java rename to tdenginewriter/src/test/java/com/alibaba/datax/plugin/writer/tdenginewriter/JniConnectionTest.java index 040cf34c..09c3df26 100644 --- a/tdenginewriter/src/test/java/com/alibaba/datax/plugin/writer/JniConnectionTest.java +++ b/tdenginewriter/src/test/java/com/alibaba/datax/plugin/writer/tdenginewriter/JniConnectionTest.java @@ -1,4 +1,4 @@ -package com.alibaba.datax.plugin.writer; +package com.alibaba.datax.plugin.writer.tdenginewriter; import org.junit.Test; diff --git a/tdenginewriter/src/test/java/com/alibaba/datax/plugin/writer/tdenginewriter/TDengineWriterTest.java b/tdenginewriter/src/test/java/com/alibaba/datax/plugin/writer/tdenginewriter/TDengineWriterTest.java new file mode 100644 index 00000000..43928db9 --- /dev/null +++ b/tdenginewriter/src/test/java/com/alibaba/datax/plugin/writer/tdenginewriter/TDengineWriterTest.java @@ -0,0 +1,21 @@ +package com.alibaba.datax.plugin.writer.tdenginewriter; + +import org.junit.Test; + +import java.sql.Connection; +import java.sql.DriverManager; +import java.sql.SQLException; + +public class TDengineWriterTest { + + + @Test + public void testGetSchema() throws ClassNotFoundException, SQLException { + Class.forName("com.taosdata.jdbc.TSDBDriver"); + String jdbcUrl = String.format("jdbc:TAOS://%s:%s/%s?user=%s&password=%s", "wozai.fun", "6030", "test", "root", "taosdata"); + Connection conn = DriverManager.getConnection(jdbcUrl); + SchemaManager schemaManager = new SchemaManager(); + schemaManager.setStable("test1"); + schemaManager.getFromDB(conn); + } +} From ca1851fb995a3200bd4c186539ff1930b7162e8d Mon Sep 17 00:00:00 2001 From: dingbo Date: Thu, 18 Nov 2021 18:06:10 +0800 Subject: [PATCH 16/33] mongodb2tdengine test and refine --- core/src/main/job/mongodb2tdengine.json | 4 +- tdenginewriter/doc/tdenginewriter.md | 11 +- .../writer/tdenginewriter/DataHandler.java | 3 +- .../tdenginewriter/DefaultDataHandler.java | 17 +-- .../tdenginewriter/JDBCBatchWriter.java | 100 ++++++++++++++++-- .../tdenginewriter/OpentsdbDataHandler.java | 3 +- .../writer/tdenginewriter/SchemaManager.java | 25 ++++- .../writer/tdenginewriter/TDengineWriter.java | 2 +- .../tdenginewriter/TDengineWriterTest.java | 10 ++ 9 files changed, 150 insertions(+), 25 deletions(-) diff --git a/core/src/main/job/mongodb2tdengine.json b/core/src/main/job/mongodb2tdengine.json index 4cfc987e..45e5a640 100644 --- a/core/src/main/job/mongodb2tdengine.json +++ b/core/src/main/job/mongodb2tdengine.json @@ -11,7 +11,7 @@ "name": "mongodbreader", "parameter": { "address": [ - "123.56.104.14:27017" + "127.0.0.1:27017" ], "userName": "admin678", "mechanism": "SCRAM-SHA-1", @@ -50,7 +50,7 @@ "writer": { "name": "tdenginewriter", "parameter": { - "host": "123.56.104.14", + "host": "127.0.0.1", "port": 6030, "dbname": "test", "user": "root", diff --git a/tdenginewriter/doc/tdenginewriter.md b/tdenginewriter/doc/tdenginewriter.md index c9c222a2..9ab64a2d 100644 --- a/tdenginewriter/doc/tdenginewriter.md +++ b/tdenginewriter/doc/tdenginewriter.md @@ -290,6 +290,9 @@ TAGS( ## 5 约束限制 +1. 本插件自动创建超级表时NCHAR类型的长度固定为64,对于包含长度大于64的字符串的数据源,将不支持。 +2. 标签列不能包含null值,如果包含会被过滤掉。 + ## FAQ ### 如何选取要同步的数据的范围? @@ -300,10 +303,14 @@ TAGS( 如果Reader插件支持一次读多张表,Writer插件就能一次导入多张表。如果Reader不支持多多张表,可以建多个job,分别导入。Writer插件只负责写数据。 -### 1张源表导入之后对应TDengine中多少张表? +### 一张源表导入之后对应TDengine中多少张表? 这是又tagColumn决定的,如果所有tag列的值都相同,目标表也只有一个。源表有多少不同的tag组合,目标超表就会有多少子表。 ### 源表和目标表的字段顺序一致吗? -TDengine要求每个表第一列是时间戳列,后边是普通字段,最后是标签列。如果源表不是这个顺序,插件在自动建表是自动调整。 \ No newline at end of file +TDengine要求每个表第一列是时间戳列,后边是普通字段,最后是标签列。如果源表不是这个顺序,插件在自动建表是自动调整。 + +### 插件如何确定各列的数据类型? + +抽样收到的第一批数据自动推断各列的类型。schema是从数据来的,因此要保障“好的”数据占大多数。 \ No newline at end of file diff --git a/tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/tdenginewriter/DataHandler.java b/tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/tdenginewriter/DataHandler.java index 686ac27b..421c2fe4 100644 --- a/tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/tdenginewriter/DataHandler.java +++ b/tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/tdenginewriter/DataHandler.java @@ -1,10 +1,11 @@ package com.alibaba.datax.plugin.writer.tdenginewriter; import com.alibaba.datax.common.plugin.RecordReceiver; +import com.alibaba.datax.common.plugin.TaskPluginCollector; import java.util.Properties; public interface DataHandler { - long handle(RecordReceiver lineReceiver, Properties properties); + long handle(RecordReceiver lineReceiver, Properties properties, TaskPluginCollector collector); } diff --git a/tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/tdenginewriter/DefaultDataHandler.java b/tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/tdenginewriter/DefaultDataHandler.java index 733f49c5..9250910a 100644 --- a/tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/tdenginewriter/DefaultDataHandler.java +++ b/tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/tdenginewriter/DefaultDataHandler.java @@ -2,6 +2,7 @@ package com.alibaba.datax.plugin.writer.tdenginewriter; import com.alibaba.datax.common.element.Record; import com.alibaba.datax.common.plugin.RecordReceiver; +import com.alibaba.datax.common.plugin.TaskPluginCollector; import com.taosdata.jdbc.TSDBPreparedStatement; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -16,7 +17,6 @@ import java.util.Properties; */ public class DefaultDataHandler implements DataHandler { private static final Logger LOG = LoggerFactory.getLogger(DefaultDataHandler.class); - static { try { Class.forName("com.taosdata.jdbc.TSDBDriver"); @@ -26,7 +26,7 @@ public class DefaultDataHandler implements DataHandler { } @Override - public long handle(RecordReceiver lineReceiver, Properties properties) { + public long handle(RecordReceiver lineReceiver, Properties properties, TaskPluginCollector collector) { SchemaManager schemaManager = new SchemaManager(properties); if (!schemaManager.configValid()) { return 0; @@ -47,7 +47,11 @@ public class DefaultDataHandler implements DataHandler { } int batchSize = Integer.parseInt(properties.getProperty(Key.BATCH_SIZE, "1000")); - return write(lineReceiver, conn, batchSize, schemaManager); + if (batchSize < 5) { + LOG.error("batchSize太小,会增加自动类型推断错误的概率,建议改大后重试"); + return 0; + } + return write(lineReceiver, conn, batchSize, schemaManager, collector); } catch (Exception e) { LOG.error("write failed " + e.getMessage()); e.printStackTrace(); @@ -79,18 +83,15 @@ public class DefaultDataHandler implements DataHandler { * @return 成功写入记录数 * @throws SQLException */ - private long write(RecordReceiver lineReceiver, Connection conn, int batchSize, SchemaManager scm) throws SQLException { + private long write(RecordReceiver lineReceiver, Connection conn, int batchSize, SchemaManager scm, TaskPluginCollector collector) throws SQLException { Record record = lineReceiver.getFromReader(); if (record == null) { return 0; } - if (scm.shouldCreateTable()) { - scm.createSTable(conn, record); - } String pq = String.format("INSERT INTO ? USING %s TAGS(%s) (%s) values (%s)", scm.getStable(), scm.getTagValuesPlaceHolder(), scm.getJoinedFieldNames(), scm.getFieldValuesPlaceHolder()); LOG.info("Prepared SQL: {}", pq); try (TSDBPreparedStatement stmt = (TSDBPreparedStatement) conn.prepareStatement(pq)) { - JDBCBatchWriter batchWriter = new JDBCBatchWriter(stmt, scm, batchSize); + JDBCBatchWriter batchWriter = new JDBCBatchWriter(conn, stmt, scm, batchSize, collector); do { batchWriter.append(record); } while ((record = lineReceiver.getFromReader()) != null); diff --git a/tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/tdenginewriter/JDBCBatchWriter.java b/tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/tdenginewriter/JDBCBatchWriter.java index 17023d03..21974e93 100644 --- a/tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/tdenginewriter/JDBCBatchWriter.java +++ b/tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/tdenginewriter/JDBCBatchWriter.java @@ -3,10 +3,13 @@ package com.alibaba.datax.plugin.writer.tdenginewriter; import com.alibaba.datax.common.element.Column; import com.alibaba.datax.common.element.Record; import com.alibaba.datax.common.exception.DataXException; +import com.alibaba.datax.common.plugin.AbstractTaskPlugin; +import com.alibaba.datax.common.plugin.TaskPluginCollector; import com.taosdata.jdbc.TSDBPreparedStatement; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import java.sql.Connection; import java.sql.SQLException; import java.util.ArrayList; import java.util.HashMap; @@ -26,10 +29,12 @@ import java.util.stream.Collectors; */ public class JDBCBatchWriter { public static final Logger LOG = LoggerFactory.getLogger(JDBCBatchWriter.class); - private TSDBPreparedStatement stmt; private SchemaManager scm; + private Connection conn; private int batchSize; + private TaskPluginCollector collector; + // 缓存Record, key为tableName Map> buf = new HashMap<>(); // 缓存表的标签值, key为tableName @@ -37,25 +42,57 @@ public class JDBCBatchWriter { private long sucCount = 0; private final int tsColIndex; private List fieldList; + // 每个record至少应该包含的列数,用于检测数据 + private int minColNum = 0; private Map fieldIndexMap; + private List fieldTypes = null; - public JDBCBatchWriter(TSDBPreparedStatement stmt, SchemaManager scm, int batchSize) { + public JDBCBatchWriter(Connection conn, TSDBPreparedStatement stmt, SchemaManager scm, int batchSize, TaskPluginCollector collector) { + this.conn = conn; this.stmt = stmt; this.scm = scm; this.batchSize = batchSize; + this.collector = collector; this.tsColIndex = scm.getTsColIndex(); this.fieldList = scm.getFieldList(); this.fieldIndexMap = scm.getFieldIndexMap(); + this.minColNum = 1 + fieldList.size() + scm.getDynamicTagCount(); + } + public void initFiledTypesAndTargetTable(List records) throws SQLException { + if (fieldTypes != null) { + return; + } + guessFieldTypes(records); + if (scm.shouldCreateTable()) { + scm.createSTable(conn, fieldTypes); + } + } public void append(Record record) throws SQLException { + int columnNum = record.getColumnNumber(); + if (columnNum < minColNum) { + collector.collectDirtyRecord(record, "实际列数小于期望列数"); + return; + } String[] tagValues = scm.getTagValuesFromRecord(record); + if (tagValues == null) { + collector.collectDirtyRecord(record, "标签列包含null"); + return; + } + if (!scm.hasTimestamp(record)) { + collector.collectDirtyRecord(record, "时间戳列为null或类型错误"); + return; + } String tableName = scm.computeTableName(tagValues); if (buf.containsKey(tableName)) { List lis = buf.get(tableName); lis.add(record); if (lis.size() == batchSize) { + if (fieldTypes == null) { + initFiledTypesAndTargetTable(lis); + } executeBatch(tableName); lis.clear(); } @@ -67,6 +104,49 @@ public class JDBCBatchWriter { } } + /** + * 只有String类型比较特别,测试发现值为null的列会转成String类型。所以Column的类型为String并不代表这一列的类型真的是String。 + * + * @param records + */ + private void guessFieldTypes(List records) { + fieldTypes = new ArrayList<>(fieldList.size()); + for (int i = 0; i < fieldList.size(); ++i) { + int colIndex = fieldIndexMap.get(fieldList.get(i)); + boolean ok = false; + for (int j = 0; j < records.size() && !ok; ++j) { + Column column = records.get(j).getColumn(colIndex); + Column.Type type = column.getType(); + switch (type) { + case LONG: + case DOUBLE: + case DATE: + case BOOL: + case BYTES: + if (column.getRawData() != null) { + fieldTypes.add(type); + ok = true; + } + break; + case STRING: + // 只有非null且非空的String列,才会被真的当作String类型。 + String value = column.asString(); + if (value != null && !"".equals(value)) { + fieldTypes.add(type); + ok = true; + } + break; + default: + throw DataXException.asDataXException(TDengineWriterErrorCode.TYPE_ERROR, fieldTypes.get(i).toString()); + } + } + if (!ok) { + throw DataXException.asDataXException(TDengineWriterErrorCode.TYPE_ERROR, String.format("根据采样的%d条数据,无法推断第%d列的数据类型", records.size(), i + 1)); + } + } + LOG.info("Field Types: {}", fieldTypes); + } + /** * 执行单表批量写入 * @@ -87,12 +167,10 @@ public class JDBCBatchWriter { ArrayList tsList = records.stream().map(r -> r.getColumn(tsColIndex).asDate().getTime()).collect(Collectors.toCollection(ArrayList::new)); stmt.setTimestamp(0, tsList); // 字段 - Record record = records.get(0); for (int i = 0; i < fieldList.size(); ) { String fieldName = fieldList.get(i); int index = fieldIndexMap.get(fieldName); - Column column = record.getColumn(index); - switch (column.getType()) { + switch (fieldTypes.get(i)) { case LONG: ArrayList lisLong = records.stream().map(r -> r.getColumn(index).asBigInteger().longValue()).collect(Collectors.toCollection(ArrayList::new)); stmt.setLong(++i, lisLong); @@ -118,7 +196,7 @@ public class JDBCBatchWriter { stmt.setString(++i, lisBytes, 64); break; default: - throw DataXException.asDataXException(TDengineWriterErrorCode.TYPE_ERROR, column.getType().toString()); + throw DataXException.asDataXException(TDengineWriterErrorCode.TYPE_ERROR, fieldTypes.get(i).toString()); } } // 执行 @@ -132,6 +210,16 @@ public class JDBCBatchWriter { * 把缓存的Record全部写入 */ public void flush() throws SQLException { + if (fieldTypes == null) { + List records = new ArrayList<>(); + for (List lis : buf.values()) { + records.addAll(lis); + if (records.size() > 100) { + break; + } + } + initFiledTypesAndTargetTable(records); + } for (String tabName : buf.keySet()) { if (buf.get(tabName).size() > 0) { executeBatch(tabName); diff --git a/tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/tdenginewriter/OpentsdbDataHandler.java b/tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/tdenginewriter/OpentsdbDataHandler.java index 52f1aa7a..e1b8f5dd 100644 --- a/tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/tdenginewriter/OpentsdbDataHandler.java +++ b/tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/tdenginewriter/OpentsdbDataHandler.java @@ -4,6 +4,7 @@ import com.alibaba.datax.common.element.Column; import com.alibaba.datax.common.element.Record; import com.alibaba.datax.common.exception.DataXException; import com.alibaba.datax.common.plugin.RecordReceiver; +import com.alibaba.datax.common.plugin.TaskPluginCollector; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -14,7 +15,7 @@ public class OpentsdbDataHandler implements DataHandler { private static final String DEFAULT_BATCH_SIZE = "1"; @Override - public long handle(RecordReceiver lineReceiver, Properties properties) { + public long handle(RecordReceiver lineReceiver, Properties properties, TaskPluginCollector collector) { // opentsdb json protocol use JNI and schemaless API to write String host = properties.getProperty(Key.HOST); int port = Integer.parseInt(properties.getProperty(Key.PORT)); diff --git a/tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/tdenginewriter/SchemaManager.java b/tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/tdenginewriter/SchemaManager.java index b3d7b7e3..21b8ef01 100644 --- a/tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/tdenginewriter/SchemaManager.java +++ b/tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/tdenginewriter/SchemaManager.java @@ -176,14 +176,15 @@ public class SchemaManager { return stables; } - public void createSTable(Connection conn, Record record) throws SQLException { + public void createSTable(Connection conn, List fieldTypes) throws SQLException { StringBuilder sb = new StringBuilder(); sb.append("CREATE STABLE IF NOT EXISTS ").append(stable).append("("); sb.append(tsColName).append(" ").append("TIMESTAMP,"); - for (String fieldName : fieldList) { + for (int i = 0; i < fieldList.size(); ++i) { + String fieldName = fieldList.get(i); + Column.Type dxType = fieldTypes.get(i); sb.append(fieldName).append(' '); - Column col = record.getColumn(fieldIndexMap.get(fieldName)); - String tdType = mapDataxType(col.getType()); + String tdType = mapDataxType(dxType); sb.append(tdType).append(','); } sb.deleteCharAt(sb.length() - 1); @@ -209,10 +210,22 @@ public class SchemaManager { int tagIndex = tagIndexMap.get(tagList.get(i)); tagValues[i] = record.getColumn(tagIndex).asString(); } + if (tagValues[i] == null) { + return null; + } } return tagValues; } + public boolean hasTimestamp(Record record) { + Column column = record.getColumn(tsColIndex); + if (column.getType() == Column.Type.DATE && column.asDate() != null) { + return true; + } else { + return false; + } + } + public Map getFieldIndexMap() { return fieldIndexMap; } @@ -252,4 +265,8 @@ public class SchemaManager { String s = String.join("!", tagValues); return "t_" + DigestUtils.md5Hex(s); } + + public int getDynamicTagCount() { + return tagIndexMap.size(); + } } diff --git a/tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/tdenginewriter/TDengineWriter.java b/tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/tdenginewriter/TDengineWriter.java index 70ea5737..cd223792 100644 --- a/tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/tdenginewriter/TDengineWriter.java +++ b/tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/tdenginewriter/TDengineWriter.java @@ -74,7 +74,7 @@ public class TDengineWriter extends Writer { String peerPluginName = this.writerSliceConfig.getString(PEER_PLUGIN_NAME); LOG.debug("start to handle record from: " + peerPluginName); DataHandler handler = DataHandlerFactory.build(peerPluginName); - long records = handler.handle(lineReceiver, properties); + long records = handler.handle(lineReceiver, properties, getTaskPluginCollector()); LOG.debug("handle data finished, records: " + records); } diff --git a/tdenginewriter/src/test/java/com/alibaba/datax/plugin/writer/tdenginewriter/TDengineWriterTest.java b/tdenginewriter/src/test/java/com/alibaba/datax/plugin/writer/tdenginewriter/TDengineWriterTest.java index 43928db9..62bf7040 100644 --- a/tdenginewriter/src/test/java/com/alibaba/datax/plugin/writer/tdenginewriter/TDengineWriterTest.java +++ b/tdenginewriter/src/test/java/com/alibaba/datax/plugin/writer/tdenginewriter/TDengineWriterTest.java @@ -5,6 +5,7 @@ import org.junit.Test; import java.sql.Connection; import java.sql.DriverManager; import java.sql.SQLException; +import java.sql.Statement; public class TDengineWriterTest { @@ -18,4 +19,13 @@ public class TDengineWriterTest { schemaManager.setStable("test1"); schemaManager.getFromDB(conn); } + + @Test + public void dropTestTable() throws ClassNotFoundException, SQLException { + Class.forName("com.taosdata.jdbc.TSDBDriver"); + String jdbcUrl = String.format("jdbc:TAOS://%s:%s/%s?user=%s&password=%s", "wozai.fun", "6030", "test", "root", "taosdata"); + Connection conn = DriverManager.getConnection(jdbcUrl); + Statement stmt = conn.createStatement(); + stmt.execute("drop table market_snapshot"); + } } From 75d4f7e101bc17bf0427acc7efcdafb4648642ff Mon Sep 17 00:00:00 2001 From: dingbo Date: Thu, 18 Nov 2021 19:08:19 +0800 Subject: [PATCH 17/33] mongodb2tdengine check records count before flush --- .../datax/plugin/writer/tdenginewriter/JDBCBatchWriter.java | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/tdenginewriter/JDBCBatchWriter.java b/tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/tdenginewriter/JDBCBatchWriter.java index 21974e93..3b1f860b 100644 --- a/tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/tdenginewriter/JDBCBatchWriter.java +++ b/tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/tdenginewriter/JDBCBatchWriter.java @@ -218,7 +218,11 @@ public class JDBCBatchWriter { break; } } - initFiledTypesAndTargetTable(records); + if (records.size() > 0) { + initFiledTypesAndTargetTable(records); + } else { + return; + } } for (String tabName : buf.keySet()) { if (buf.get(tabName).size() > 0) { From 485d2d881593aaadc0ad39c95aa215e88d2f38db Mon Sep 17 00:00:00 2001 From: dingbo Date: Thu, 18 Nov 2021 20:18:40 +0800 Subject: [PATCH 18/33] mongodb2tdengine typo --- tdenginewriter/doc/tdenginewriter.md | 2 +- .../datax/plugin/writer/tdenginewriter/JDBCBatchWriter.java | 1 - .../datax/plugin/writer/tdenginewriter/SchemaManager.java | 2 +- 3 files changed, 2 insertions(+), 3 deletions(-) diff --git a/tdenginewriter/doc/tdenginewriter.md b/tdenginewriter/doc/tdenginewriter.md index 9ab64a2d..715080c1 100644 --- a/tdenginewriter/doc/tdenginewriter.md +++ b/tdenginewriter/doc/tdenginewriter.md @@ -197,7 +197,7 @@ CREATE STABLE IF NOT EXISTS market_snapshot ( ) TAGS( industry NCHAR(64), - stockID NCHAR(64 + stockID NCHAR(64) ); ``` diff --git a/tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/tdenginewriter/JDBCBatchWriter.java b/tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/tdenginewriter/JDBCBatchWriter.java index 3b1f860b..279e6ed3 100644 --- a/tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/tdenginewriter/JDBCBatchWriter.java +++ b/tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/tdenginewriter/JDBCBatchWriter.java @@ -3,7 +3,6 @@ package com.alibaba.datax.plugin.writer.tdenginewriter; import com.alibaba.datax.common.element.Column; import com.alibaba.datax.common.element.Record; import com.alibaba.datax.common.exception.DataXException; -import com.alibaba.datax.common.plugin.AbstractTaskPlugin; import com.alibaba.datax.common.plugin.TaskPluginCollector; import com.taosdata.jdbc.TSDBPreparedStatement; import org.slf4j.Logger; diff --git a/tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/tdenginewriter/SchemaManager.java b/tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/tdenginewriter/SchemaManager.java index 21b8ef01..22c8a44f 100644 --- a/tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/tdenginewriter/SchemaManager.java +++ b/tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/tdenginewriter/SchemaManager.java @@ -48,7 +48,7 @@ public class SchemaManager { case BOOL: return "BOOL"; case BYTES: - return "BINARY"; + return "BINARY(64)"; default: throw DataXException.asDataXException(TDengineWriterErrorCode.TYPE_ERROR, type.toString()); } From be78295e116f6b2c98e65bb76c3d5cf2b71f2578 Mon Sep 17 00:00:00 2001 From: dingbo Date: Thu, 18 Nov 2021 22:57:53 +0800 Subject: [PATCH 19/33] mongodb2tdengine typo --- tdenginewriter/doc/tdenginewriter.md | 4 ++-- .../datax/plugin/writer/tdenginewriter/JDBCBatchWriter.java | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/tdenginewriter/doc/tdenginewriter.md b/tdenginewriter/doc/tdenginewriter.md index 715080c1..432b1fb2 100644 --- a/tdenginewriter/doc/tdenginewriter.md +++ b/tdenginewriter/doc/tdenginewriter.md @@ -305,7 +305,7 @@ TAGS( ### 一张源表导入之后对应TDengine中多少张表? -这是又tagColumn决定的,如果所有tag列的值都相同,目标表也只有一个。源表有多少不同的tag组合,目标超表就会有多少子表。 +这是由tagColumn决定的,如果所有tag列的值都相同,那么目标表只有一个。源表有多少不同的tag组合,目标超表就有多少子表。 ### 源表和目标表的字段顺序一致吗? @@ -313,4 +313,4 @@ TDengine要求每个表第一列是时间戳列,后边是普通字段,最后 ### 插件如何确定各列的数据类型? -抽样收到的第一批数据自动推断各列的类型。schema是从数据来的,因此要保障“好的”数据占大多数。 \ No newline at end of file +根据收到的第一批数据自动推断各列的类型。 \ No newline at end of file diff --git a/tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/tdenginewriter/JDBCBatchWriter.java b/tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/tdenginewriter/JDBCBatchWriter.java index 279e6ed3..20065a70 100644 --- a/tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/tdenginewriter/JDBCBatchWriter.java +++ b/tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/tdenginewriter/JDBCBatchWriter.java @@ -41,7 +41,7 @@ public class JDBCBatchWriter { private long sucCount = 0; private final int tsColIndex; private List fieldList; - // 每个record至少应该包含的列数,用于检测数据 + // 每个record至少应该包含的列数,用于校验数据 private int minColNum = 0; private Map fieldIndexMap; private List fieldTypes = null; From e5c3fed1a939e8006911a423f7559ff5b7428a1c Mon Sep 17 00:00:00 2001 From: dingbo Date: Fri, 19 Nov 2021 14:20:37 +0800 Subject: [PATCH 20/33] i18n support for some import log messages --- .../tdenginewriter/DefaultDataHandler.java | 7 ++++-- .../tdenginewriter/JDBCBatchWriter.java | 12 ++++++--- .../plugin/writer/tdenginewriter/Msg.java | 20 +++++++++++++++ .../writer/tdenginewriter/SchemaManager.java | 13 +++++----- .../resources/tdenginewritermsg.properties | 6 +++++ .../tdenginewritermsg_en_US.properties | 6 +++++ .../tdenginewritermsg_zh_CN.properties | 6 +++++ .../writer/tdenginewriter/MessageTest.java | 25 +++++++++++++++++++ 8 files changed, 82 insertions(+), 13 deletions(-) create mode 100644 tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/tdenginewriter/Msg.java create mode 100644 tdenginewriter/src/main/resources/tdenginewritermsg.properties create mode 100644 tdenginewriter/src/main/resources/tdenginewritermsg_en_US.properties create mode 100644 tdenginewriter/src/main/resources/tdenginewritermsg_zh_CN.properties create mode 100644 tdenginewriter/src/test/java/com/alibaba/datax/plugin/writer/tdenginewriter/MessageTest.java diff --git a/tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/tdenginewriter/DefaultDataHandler.java b/tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/tdenginewriter/DefaultDataHandler.java index 9250910a..91c2b7e3 100644 --- a/tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/tdenginewriter/DefaultDataHandler.java +++ b/tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/tdenginewriter/DefaultDataHandler.java @@ -17,6 +17,7 @@ import java.util.Properties; */ public class DefaultDataHandler implements DataHandler { private static final Logger LOG = LoggerFactory.getLogger(DefaultDataHandler.class); + static { try { Class.forName("com.taosdata.jdbc.TSDBDriver"); @@ -38,7 +39,8 @@ public class DefaultDataHandler implements DataHandler { return 0; } if (schemaManager.shouldGuessSchema()) { - LOG.info("无法从配置文件获取表结构信息,尝试从数据库获取"); + // 无法从配置文件获取表结构信息,尝试从数据库获取 + LOG.info(Msg.get("try_get_schema_from_db")); boolean success = schemaManager.getFromDB(conn); if (!success) { return 0; @@ -48,7 +50,8 @@ public class DefaultDataHandler implements DataHandler { } int batchSize = Integer.parseInt(properties.getProperty(Key.BATCH_SIZE, "1000")); if (batchSize < 5) { - LOG.error("batchSize太小,会增加自动类型推断错误的概率,建议改大后重试"); + // batchSize太小,会增加自动类型推断错误的概率,建议改大后重试 + LOG.error(Msg.get("batch_size_too_small")); return 0; } return write(lineReceiver, conn, batchSize, schemaManager, collector); diff --git a/tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/tdenginewriter/JDBCBatchWriter.java b/tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/tdenginewriter/JDBCBatchWriter.java index 20065a70..53ab9bb9 100644 --- a/tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/tdenginewriter/JDBCBatchWriter.java +++ b/tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/tdenginewriter/JDBCBatchWriter.java @@ -72,16 +72,19 @@ public class JDBCBatchWriter { public void append(Record record) throws SQLException { int columnNum = record.getColumnNumber(); if (columnNum < minColNum) { - collector.collectDirtyRecord(record, "实际列数小于期望列数"); + // 实际列数小于期望列数 + collector.collectDirtyRecord(record, Msg.get("column_number_error")); return; } String[] tagValues = scm.getTagValuesFromRecord(record); if (tagValues == null) { - collector.collectDirtyRecord(record, "标签列包含null"); + // 标签列包含null + collector.collectDirtyRecord(record, Msg.get("tag_value_error")); return; } if (!scm.hasTimestamp(record)) { - collector.collectDirtyRecord(record, "时间戳列为null或类型错误"); + // 时间戳列为null或类型错误 + collector.collectDirtyRecord(record, Msg.get("ts_value_error")); return; } String tableName = scm.computeTableName(tagValues); @@ -140,7 +143,8 @@ public class JDBCBatchWriter { } } if (!ok) { - throw DataXException.asDataXException(TDengineWriterErrorCode.TYPE_ERROR, String.format("根据采样的%d条数据,无法推断第%d列的数据类型", records.size(), i + 1)); + // 根据采样的%d条数据,无法推断第%d列的数据类型 + throw DataXException.asDataXException(TDengineWriterErrorCode.TYPE_ERROR, String.format(Msg.get("infer_column_type_error"), records.size(), i + 1)); } } LOG.info("Field Types: {}", fieldTypes); diff --git a/tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/tdenginewriter/Msg.java b/tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/tdenginewriter/Msg.java new file mode 100644 index 00000000..89730d35 --- /dev/null +++ b/tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/tdenginewriter/Msg.java @@ -0,0 +1,20 @@ +package com.alibaba.datax.plugin.writer.tdenginewriter; + +import java.util.Locale; +import java.util.ResourceBundle; + +/** + * i18n message util + */ +public class Msg { + private static ResourceBundle bundle; + + static { + bundle = ResourceBundle.getBundle("tdenginewritermsg", Locale.getDefault()); + } + + public static String get(String key) { + return bundle.getString(key); + } + +} diff --git a/tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/tdenginewriter/SchemaManager.java b/tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/tdenginewriter/SchemaManager.java index 22c8a44f..d67a6585 100644 --- a/tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/tdenginewriter/SchemaManager.java +++ b/tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/tdenginewriter/SchemaManager.java @@ -65,7 +65,7 @@ public class SchemaManager { private void getFromConfig(Properties properties) { stable = properties.getProperty(Key.STABLE); if (stable == null) { - LOG.error("配置错误: no stable"); + LOG.error("Config error: no stable"); return; } for (Object key : properties.keySet()) { @@ -98,7 +98,7 @@ public class SchemaManager { List sortedFieldName = fieldIndexMap.entrySet().stream().sorted((x, y) -> x.getValue().compareTo(y.getValue())).map(e -> e.getKey()).collect(Collectors.toList()); fieldList.addAll(sortedFieldName); // 排序的目的是保证自动建表时列的顺序和输入数据的列的顺序保持一致 canInferSchemaFromConfig = tsColIndex > -1 && !(fixedTagValue.isEmpty() && tagIndexMap.isEmpty()) && !fieldIndexMap.isEmpty(); - LOG.info("配置文件解析结果:fixedTags=[{}] ,tags=[{}], fields=[{}], tsColName={}, tsIndex={}", String.join(",", fixedTagValue.keySet()), String.join(",", tagIndexMap.keySet()), String.join(",", fieldList), tsColName, tsColIndex); + LOG.info("Config file parsed result:fixedTags=[{}] ,tags=[{}], fields=[{}], tsColName={}, tsIndex={}", String.join(",", fixedTagValue.keySet()), String.join(",", tagIndexMap.keySet()), String.join(",", fieldList), tsColName, tsColIndex); } public boolean shouldGuessSchema() { @@ -112,8 +112,7 @@ public class SchemaManager { public boolean configValid() { boolean valid = (tagList.size() > 0 && fieldList.size() > 0 && tsColIndex > -1) || (tagList.size() == 0 && fieldList.size() == 0 && tsColIndex == -1); if (!valid) { - LOG.error("配置错误. tag_columns,field_columns,timestamp_column必须同时存在或同时省略,当前解析结果: tag_columns: {}, field_columns:{}, timestamp_column:{} tsColIndex:{}", - (fixedTagValue.size() + tagIndexMap.size()), fieldIndexMap.size(), tsColName, tsColIndex); + LOG.error("Config error: tagColumn, fieldColumn and timestampColumn must be present together or absent together."); } return valid; } @@ -128,7 +127,7 @@ public class SchemaManager { try { List stables = getSTables(conn); if (!stables.contains(stable)) { - LOG.error("超级表{}不存在,无法从数据库获取表结构信息.", stable); + LOG.error("super table {} not exist, fail to get schema from database.", stable); return false; } } catch (SQLException e) { @@ -155,7 +154,7 @@ public class SchemaManager { } colIndex++; } - LOG.info("从数据库获取的表结构概要:tags=[{}], fields=[{}], tsColName={}, tsIndex={}", String.join(",", tagIndexMap.keySet()), String.join(",", fieldList), tsColName, tsColIndex); + LOG.info("table info:tags=[{}], fields=[{}], tsColName={}, tsIndex={}", String.join(",", tagIndexMap.keySet()), String.join(",", fieldList), tsColName, tsColIndex); return true; } catch (SQLException e) { LOG.error(e.getMessage()); @@ -195,7 +194,7 @@ public class SchemaManager { sb.deleteCharAt(sb.length() - 1); sb.append(")"); String q = sb.toString(); - LOG.info("自动创建超级表:" + q); + LOG.info("run sql:" + q); try (Statement stmt = conn.createStatement()) { stmt.execute(q); } diff --git a/tdenginewriter/src/main/resources/tdenginewritermsg.properties b/tdenginewriter/src/main/resources/tdenginewritermsg.properties new file mode 100644 index 00000000..4aaa220b --- /dev/null +++ b/tdenginewriter/src/main/resources/tdenginewritermsg.properties @@ -0,0 +1,6 @@ +try_get_schema_fromdb=fail to get structure info of target table from configure file and will try to get it from database +batch_size_too_small='batchSize' is too small, please increase it and try again +column_number_error=number of columns is less than expected +tag_value_error=tag columns include 'null' value +ts_value_error=timestamp column type error or null +infer_column_type_error=fail to infer column type: sample count %d, column index %d \ No newline at end of file diff --git a/tdenginewriter/src/main/resources/tdenginewritermsg_en_US.properties b/tdenginewriter/src/main/resources/tdenginewritermsg_en_US.properties new file mode 100644 index 00000000..4aaa220b --- /dev/null +++ b/tdenginewriter/src/main/resources/tdenginewritermsg_en_US.properties @@ -0,0 +1,6 @@ +try_get_schema_fromdb=fail to get structure info of target table from configure file and will try to get it from database +batch_size_too_small='batchSize' is too small, please increase it and try again +column_number_error=number of columns is less than expected +tag_value_error=tag columns include 'null' value +ts_value_error=timestamp column type error or null +infer_column_type_error=fail to infer column type: sample count %d, column index %d \ No newline at end of file diff --git a/tdenginewriter/src/main/resources/tdenginewritermsg_zh_CN.properties b/tdenginewriter/src/main/resources/tdenginewritermsg_zh_CN.properties new file mode 100644 index 00000000..4b9552fd --- /dev/null +++ b/tdenginewriter/src/main/resources/tdenginewritermsg_zh_CN.properties @@ -0,0 +1,6 @@ +try_get_schema_fromdb=\u65e0\u6cd5\u4ece\u914d\u7f6e\u6587\u4ef6\u83b7\u53d6\u8868\u7ed3\u6784\u4fe1\u606f\uff0c\u5c1d\u8bd5\u4ece\u6570\u636e\u5e93\u83b7\u53d6 +batch_size_too_small=batchSize\u592a\u5c0f\uff0c\u4f1a\u589e\u52a0\u81ea\u52a8\u7c7b\u578b\u63a8\u65ad\u9519\u8bef\u7684\u6982\u7387\uff0c\u5efa\u8bae\u6539\u5927\u540e\u91cd\u8bd5 +column_number_error=\u5b9e\u9645\u5217\u6570\u5c0f\u4e8e\u671f\u671b\u5217\u6570 +tag_value_error=\u6807\u7b7e\u5217\u5305\u542bnull +ts_value_error=\u65f6\u95f4\u6233\u5217\u4e3anull\u6216\u7c7b\u578b\u9519\u8bef +infer_column_type_error=\u6839\u636e\u91c7\u6837\u7684%d\u6761\u6570\u636e\uff0c\u65e0\u6cd5\u63a8\u65ad\u7b2c%d\u5217\u7684\u6570\u636e\u7c7b\u578b diff --git a/tdenginewriter/src/test/java/com/alibaba/datax/plugin/writer/tdenginewriter/MessageTest.java b/tdenginewriter/src/test/java/com/alibaba/datax/plugin/writer/tdenginewriter/MessageTest.java new file mode 100644 index 00000000..b1b7ddd8 --- /dev/null +++ b/tdenginewriter/src/test/java/com/alibaba/datax/plugin/writer/tdenginewriter/MessageTest.java @@ -0,0 +1,25 @@ +package com.alibaba.datax.plugin.writer.tdenginewriter; + +import org.junit.Test; + +import java.util.Locale; +import java.util.ResourceBundle; + +import org.junit.Assert; + +public class MessageTest { + @Test + public void testChineseMessage() { + Locale local = new Locale("zh", "CN"); + ResourceBundle bundle = ResourceBundle.getBundle("tdenginewritermsg", local); + String msg = bundle.getString("try_get_schema_fromdb"); + Assert.assertEquals("无法从配置文件获取表结构信息,尝试从数据库获取", msg); + } + + @Test + public void testDefaultMessage() { + ResourceBundle bundle = ResourceBundle.getBundle("tdenginewritermsg", Locale.getDefault()); + String msg = bundle.getString("try_get_schema_fromdb"); + System.out.println(msg); + } +} From b47cdaf217dafa3ff84b8a289b13fc5e190901b5 Mon Sep 17 00:00:00 2001 From: dingbo Date: Fri, 19 Nov 2021 14:23:24 +0800 Subject: [PATCH 21/33] use plain user name in demo config file --- core/src/main/job/mongodb2tdengine.json | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/core/src/main/job/mongodb2tdengine.json b/core/src/main/job/mongodb2tdengine.json index 45e5a640..49e04c11 100644 --- a/core/src/main/job/mongodb2tdengine.json +++ b/core/src/main/job/mongodb2tdengine.json @@ -13,9 +13,9 @@ "address": [ "127.0.0.1:27017" ], - "userName": "admin678", + "userName": "mongouser", "mechanism": "SCRAM-SHA-1", - "userPassword": "huwG86123", + "userPassword": "mongopass", "authDb": "admin", "dbName": "test", "collectionName": "cu_market_data", From 17c39b11bac472f6a0bac1495a92aed45c602766 Mon Sep 17 00:00:00 2001 From: dingbo Date: Fri, 19 Nov 2021 16:38:55 +0800 Subject: [PATCH 22/33] migrate datax.py to python3, and compatible with python2 --- core/src/main/bin/datax.py | 114 +++++++++++++++++++++---------------- userGuid.md | 2 +- 2 files changed, 65 insertions(+), 51 deletions(-) diff --git a/core/src/main/bin/datax.py b/core/src/main/bin/datax.py index 1099ed3a..4811ae8d 100755 --- a/core/src/main/bin/datax.py +++ b/core/src/main/bin/datax.py @@ -1,23 +1,26 @@ #!/usr/bin/env python # -*- coding:utf-8 -*- -import sys -import os -import signal -import subprocess -import time -import re -import socket -import json -from optparse import OptionParser -from optparse import OptionGroup -from string import Template import codecs +import json +import os import platform +import re +import signal +import socket +import subprocess +import sys +import time +from optparse import OptionGroup +from optparse import OptionParser +from string import Template + +ispy2 = sys.version_info.major == 2 def isWindows(): return platform.system() == 'Windows' + DATAX_HOME = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) DATAX_VERSION = 'DATAX-OPENSOURCE-3.0' @@ -52,13 +55,19 @@ def getLocalIp(): def suicide(signum, e): global child_process - print >> sys.stderr, "[Error] DataX receive unexpected signal %d, starts to suicide." % (signum) + if ispy2: + print >> sys.stderr, "[Error] DataX receive unexpected signal %d, starts to suicide." % (signum) + else: + print("[Error] DataX receive unexpected signal %d, starts to suicide." % (signum), sys.stderr) if child_process: child_process.send_signal(signal.SIGQUIT) time.sleep(1) child_process.kill() - print >> sys.stderr, "DataX Process was killed ! you did ?" + if ispy2: + print >> sys.stderr, "DataX Process was killed ! you did ?" + else: + print("DataX Process was killed ! you did ?", sys.stderr) sys.exit(RET_STATE["KILL"]) @@ -92,10 +101,10 @@ def getOptionParser(): 'if you have mutiple parameters: -p"-DtableName=your-table-name -DcolumnName=your-column-name".' 'Note: you should config in you job tableName with ${tableName}.') prodEnvOptionGroup.add_option("-r", "--reader", metavar="", - action="store", dest="reader",type="string", + action="store", dest="reader", type="string", help='View job config[reader] template, eg: mysqlreader,streamreader') prodEnvOptionGroup.add_option("-w", "--writer", metavar="", - action="store", dest="writer",type="string", + action="store", dest="writer", type="string", help='View job config[writer] template, eg: mysqlwriter,streamwriter') parser.add_option_group(prodEnvOptionGroup) @@ -108,45 +117,50 @@ def getOptionParser(): parser.add_option_group(devEnvOptionGroup) return parser + def generateJobConfigTemplate(reader, writer): - readerRef = "Please refer to the %s document:\n https://github.com/alibaba/DataX/blob/master/%s/doc/%s.md \n" % (reader,reader,reader) - writerRef = "Please refer to the %s document:\n https://github.com/alibaba/DataX/blob/master/%s/doc/%s.md \n " % (writer,writer,writer) - print readerRef - print writerRef + readerRef = "Please refer to the %s document:\n https://github.com/alibaba/DataX/blob/master/%s/doc/%s.md \n" % ( + reader, reader, reader) + writerRef = "Please refer to the %s document:\n https://github.com/alibaba/DataX/blob/master/%s/doc/%s.md \n " % ( + writer, writer, writer) + print(readerRef) + print(writerRef) jobGuid = 'Please save the following configuration as a json file and use\n python {DATAX_HOME}/bin/datax.py {JSON_FILE_NAME}.json \nto run the job.\n' - print jobGuid - jobTemplate={ - "job": { - "setting": { - "speed": { - "channel": "" - } - }, - "content": [ - { - "reader": {}, - "writer": {} - } - ] - } + print(jobGuid) + jobTemplate = { + "job": { + "setting": { + "speed": { + "channel": "" + } + }, + "content": [ + { + "reader": {}, + "writer": {} + } + ] + } } - readerTemplatePath = "%s/plugin/reader/%s/plugin_job_template.json" % (DATAX_HOME,reader) - writerTemplatePath = "%s/plugin/writer/%s/plugin_job_template.json" % (DATAX_HOME,writer) + readerTemplatePath = "%s/plugin/reader/%s/plugin_job_template.json" % (DATAX_HOME, reader) + writerTemplatePath = "%s/plugin/writer/%s/plugin_job_template.json" % (DATAX_HOME, writer) try: - readerPar = readPluginTemplate(readerTemplatePath); - except Exception, e: - print "Read reader[%s] template error: can\'t find file %s" % (reader,readerTemplatePath) + readerPar = readPluginTemplate(readerTemplatePath) + except: + print("Read reader[%s] template error: can\'t find file %s" % (reader, readerTemplatePath)) try: - writerPar = readPluginTemplate(writerTemplatePath); - except Exception, e: - print "Read writer[%s] template error: : can\'t find file %s" % (writer,writerTemplatePath) - jobTemplate['job']['content'][0]['reader'] = readerPar; - jobTemplate['job']['content'][0]['writer'] = writerPar; - print json.dumps(jobTemplate, indent=4, sort_keys=True) + writerPar = readPluginTemplate(writerTemplatePath) + except: + print("Read writer[%s] template error: : can\'t find file %s" % (writer, writerTemplatePath)) + jobTemplate['job']['content'][0]['reader'] = readerPar + jobTemplate['job']['content'][0]['writer'] = writerPar + print(json.dumps(jobTemplate, indent=4, sort_keys=True)) + def readPluginTemplate(plugin): with open(plugin, 'r') as f: - return json.load(f) + return json.load(f) + def isUrl(path): if not path: @@ -168,7 +182,7 @@ def buildStartCommand(options, args): if options.remoteDebug: tempJVMCommand = tempJVMCommand + " " + REMOTE_DEBUG_CONFIG - print 'local ip: ', getLocalIp() + print('local ip: ', getLocalIp()) if options.loglevel: tempJVMCommand = tempJVMCommand + " " + ("-Dloglevel=%s" % (options.loglevel)) @@ -198,11 +212,11 @@ def buildStartCommand(options, args): def printCopyright(): - print ''' + print(''' DataX (%s), From Alibaba ! Copyright (C) 2010-2017, Alibaba Group. All Rights Reserved. -''' % DATAX_VERSION +''' % DATAX_VERSION) sys.stdout.flush() @@ -211,7 +225,7 @@ if __name__ == "__main__": parser = getOptionParser() options, args = parser.parse_args(sys.argv[1:]) if options.reader is not None and options.writer is not None: - generateJobConfigTemplate(options.reader,options.writer) + generateJobConfigTemplate(options.reader, options.writer) sys.exit(RET_STATE['OK']) if len(args) != 1: parser.print_help() diff --git a/userGuid.md b/userGuid.md index 153c8111..16771a5e 100644 --- a/userGuid.md +++ b/userGuid.md @@ -10,7 +10,7 @@ DataX本身作为数据同步框架,将不同数据源的同步抽象为从源 - Linux - [JDK(1.8以上,推荐1.8) ](http://www.oracle.com/technetwork/cn/java/javase/downloads/index.html) -- [Python(推荐Python2.6.X) ](https://www.python.org/downloads/) +- [Python(2或3都可以) ](https://www.python.org/downloads/) - [Apache Maven 3.x](https://maven.apache.org/download.cgi) (Compile DataX) # Quick Start From 24952af5c3a3b9cfa5be4ac5b3bb2860bab6a868 Mon Sep 17 00:00:00 2001 From: dingbo Date: Tue, 23 Nov 2021 08:56:57 +0800 Subject: [PATCH 23/33] create test case --- .../datax/core/TestMysql2TDengine.java | 20 +++++++++++++++++++ 1 file changed, 20 insertions(+) create mode 100644 core/src/test/java/com/alibaba/datax/core/TestMysql2TDengine.java diff --git a/core/src/test/java/com/alibaba/datax/core/TestMysql2TDengine.java b/core/src/test/java/com/alibaba/datax/core/TestMysql2TDengine.java new file mode 100644 index 00000000..aeb05071 --- /dev/null +++ b/core/src/test/java/com/alibaba/datax/core/TestMysql2TDengine.java @@ -0,0 +1,20 @@ +package com.alibaba.datax.core; + + +import org.junit.Test; + +public class TestMysql2TDengine { + + @Test + public void test() { + System.out.println(System.getProperty("java.library.path")); + String[] params = {"-mode", "standalone", "-jobid", "-1", "-job", "src/main/job/mysql2tdengine.json"}; + System.setProperty("datax.home", "../target/datax/datax"); + try { + Engine.entry(params); + } catch (Throwable e) { + e.printStackTrace(); + } + } + +} \ No newline at end of file From 93a3369eb6346607ad1fdca7055f6e72645b96f3 Mon Sep 17 00:00:00 2001 From: dingbo Date: Tue, 23 Nov 2021 17:34:44 +0800 Subject: [PATCH 24/33] prepare test data and add test case --- core/pom.xml | 12 +++ core/src/main/job/mysql2tdengine.json | 24 +++-- .../datax/core/TestMysql2TDengine.java | 99 ++++++++++++++++++- 3 files changed, 128 insertions(+), 7 deletions(-) diff --git a/core/pom.xml b/core/pom.xml index 174a18d3..3981cfcc 100755 --- a/core/pom.xml +++ b/core/pom.xml @@ -97,6 +97,18 @@ groovy-all 2.1.9 + + mysql + mysql-connector-java + ${mysql.driver.version} + test + + + com.taosdata.jdbc + taos-jdbcdriver + 2.0.34 + test + diff --git a/core/src/main/job/mysql2tdengine.json b/core/src/main/job/mysql2tdengine.json index 7978fbf5..c936aa36 100644 --- a/core/src/main/job/mysql2tdengine.json +++ b/core/src/main/job/mysql2tdengine.json @@ -6,18 +6,18 @@ "name": "mysqlreader", "parameter": { "username": "root", - "password": "123456", + "password": "passw0rd", "column": [ "*" ], - "splitPk": "f1", + "splitPk": "station", "connection": [ { "table": [ "weather" ], "jdbcUrl": [ - "jdbc:mysql://192.168.56.105:3306/test?useSSL=false&useUnicode=true&characterEncoding=utf8" + "jdbc:mysql://127.0.0.1:3306/test?useSSL=false&useUnicode=true&characterEncoding=utf8" ] } ] @@ -26,13 +26,25 @@ "writer": { "name": "tdenginewriter", "parameter": { - "host": "192.168.56.105", + "host": "127.0.0.1", "port": 6030, "dbname": "test", "user": "root", "password": "taosdata", - "table": "weather", - "batchSize": 1000 + "batchSize": 1000, + "stable": "weather", + "tagColumn": { + "station": 0 + }, + "fieldColumn": { + "latitude": 1, + "longtitude": 2, + "tmax": 4, + "tmin": 5 + }, + "timestampColumn":{ + "date": 3 + } } } } diff --git a/core/src/test/java/com/alibaba/datax/core/TestMysql2TDengine.java b/core/src/test/java/com/alibaba/datax/core/TestMysql2TDengine.java index aeb05071..6edc185c 100644 --- a/core/src/test/java/com/alibaba/datax/core/TestMysql2TDengine.java +++ b/core/src/test/java/com/alibaba/datax/core/TestMysql2TDengine.java @@ -1,10 +1,105 @@ package com.alibaba.datax.core; - +import org.junit.After; +import org.junit.Before; import org.junit.Test; +import java.sql.*; +import java.util.*; +import java.util.Date; + +/** + * 测试从mysql到TD + */ public class TestMysql2TDengine { + @Test + public void genTestData() throws ClassNotFoundException, SQLException { + Class.forName("com.mysql.jdbc.Driver"); + + Connection conn = null; + Statement stmt = null; + ResultSet rs = null; + PreparedStatement pstmt = null; + + try { + conn = DriverManager.getConnection("jdbc:mysql://localhost/mysql?" + + "user=root&password=passw0rd"); + stmt = conn.createStatement(); + stmt.execute("create database if not exists test"); + stmt.execute("use test"); + stmt.execute("drop table weather"); + stmt.execute("CREATE TABLE IF NOT EXISTS weather(station varchar(100), latitude DOUBLE, longtitude DOUBLE, `date` DATETIME, tmax INT, tmin INT)"); + pstmt = conn.prepareStatement("insert into weather(station, latitude, longtitude, `date`, tmax, tmin) values (?, ?, ?, ?, ?, ?)"); + genRandomData(pstmt); + } finally { + if (rs != null) { + try { + rs.close(); + } catch (SQLException sqlEx) { + } // ignore + + rs = null; + } + + if (stmt != null) { + try { + stmt.close(); + } catch (SQLException sqlEx) { + } // ignore + + stmt = null; + } + + if (pstmt != null) { + pstmt.close(); + } + } + + } + + private void genRandomData(PreparedStatement psmt) throws SQLException { + Random random = new Random(); + Calendar calendar = Calendar.getInstance(); + calendar.set(1990, 0, 1, 1, 0, 0); + List stations = Arrays.asList("STA", "STB", "STC"); + for (int i = 0; i < (10 * 100 * 24); i++) { + for (int j = 0; j < 3; j++) { + psmt.setString(1, stations.get(j)); + psmt.setDouble(2, random.nextDouble() * 1000); + psmt.setDouble(3, random.nextDouble() * 1000); + psmt.setTimestamp(4, new java.sql.Timestamp(calendar.getTime().getTime())); + psmt.setInt(5, random.nextInt(100)); + psmt.setInt(6, random.nextInt(100)); + psmt.addBatch(); + } + calendar.add(Calendar.MINUTE, 60); + if (i % 1000 == 0) { + psmt.executeBatch(); + } + } + psmt.executeBatch(); + } + + @Test + public void prepareTDengine() throws ClassNotFoundException, SQLException { + Class.forName("com.mysql.jdbc.Driver"); + + Connection conn = null; + Statement stmt = null; + + try { + conn = DriverManager.getConnection("jdbc:TAOS://127.0.0.1:6030/log?user=root&password=taosdata"); + stmt = conn.createStatement(); + stmt.execute("create database if not exists test"); + stmt.execute("drop stable if exists test.weather"); + } finally { + if (stmt != null) { + stmt.close(); + } + } + } + @Test public void test() { System.out.println(System.getProperty("java.library.path")); @@ -17,4 +112,6 @@ public class TestMysql2TDengine { } } + + } \ No newline at end of file From 9b2d33b6e1761385d4bb8528409a64e7102bd9aa Mon Sep 17 00:00:00 2001 From: dingbo Date: Tue, 23 Nov 2021 18:48:04 +0800 Subject: [PATCH 25/33] test case bug fix --- .../com/alibaba/datax/core/TestMysql2TDengine.java | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/core/src/test/java/com/alibaba/datax/core/TestMysql2TDengine.java b/core/src/test/java/com/alibaba/datax/core/TestMysql2TDengine.java index 6edc185c..026b1db2 100644 --- a/core/src/test/java/com/alibaba/datax/core/TestMysql2TDengine.java +++ b/core/src/test/java/com/alibaba/datax/core/TestMysql2TDengine.java @@ -1,12 +1,12 @@ package com.alibaba.datax.core; -import org.junit.After; -import org.junit.Before; import org.junit.Test; import java.sql.*; -import java.util.*; -import java.util.Date; +import java.util.Arrays; +import java.util.Calendar; +import java.util.List; +import java.util.Random; /** * 测试从mysql到TD @@ -78,6 +78,7 @@ public class TestMysql2TDengine { psmt.executeBatch(); } } + // 有部分重复数据,不影响测试 psmt.executeBatch(); } @@ -91,7 +92,8 @@ public class TestMysql2TDengine { try { conn = DriverManager.getConnection("jdbc:TAOS://127.0.0.1:6030/log?user=root&password=taosdata"); stmt = conn.createStatement(); - stmt.execute("create database if not exists test"); + stmt.execute("drop database if exists test"); + stmt.execute("create database if not exists test keep 36500"); stmt.execute("drop stable if exists test.weather"); } finally { if (stmt != null) { From 5cc0fa3b1709441c42ac3e29bb09df39fe09b21b Mon Sep 17 00:00:00 2001 From: dingbo Date: Tue, 23 Nov 2021 18:48:31 +0800 Subject: [PATCH 26/33] add mysql config demo to doc --- tdenginewriter/doc/tdenginewriter.md | 96 ++++++++++++++++++++++++++-- 1 file changed, 89 insertions(+), 7 deletions(-) diff --git a/tdenginewriter/doc/tdenginewriter.md b/tdenginewriter/doc/tdenginewriter.md index 432b1fb2..062fac2c 100644 --- a/tdenginewriter/doc/tdenginewriter.md +++ b/tdenginewriter/doc/tdenginewriter.md @@ -181,8 +181,8 @@ TDengineWriter 通过 DataX 框架获取 Reader生成的协议数据,根据rea | fieldColumn | 字段列的列名和位置 | 否 | 无 | | | timestampColumn | 时间戳列的列名和位置 | 否 | 无 | 时间戳列只能有一个 | -#### 3.3.3 自动建表规则 -##### 3.3.3.1 超级表创建规则 +#### 3.2.3 自动建表规则 +##### 3.2.3.1 超级表创建规则 如果配置了tagColumn、 fieldColumn和timestampColumn将会在插入第一条数据前,自动创建超级表。
数据列的类型从第1条记录自动推断, 标签列默认类型为`NCHAR(64)`, 比如示例配置,可能生成以下建表语句: @@ -201,14 +201,14 @@ TAGS( ); ``` -##### 3.3.3.2 子表创建规则 +##### 3.2.3.2 子表创建规则 子表结果与超表相同,子表表名生成规则: 1. 将标签的value 组合成为如下的字符串: `tag_value1!tag_value2!tag_value3`。 2. 计算该字符串的 MD5 散列值 "md5_val"。 3. "t_md5val"作为子表名。其中的 "t" 是固定的前缀。 -#### 3.3.4 用户提前建表 +#### 3.2.4 用户提前建表 如果你已经创建好目标超级表,那么tagColumn、 fieldColumn和timestampColumn三个字段均可省略, 插件将通过执行通过`describe stableName`获取表结构的信息。 此时要求接收到的Record中Column的顺序和执行`describe stableName`返回的列顺序相同, 比如通过`describe stableName`返回以下内容: @@ -221,13 +221,13 @@ TAGS( ``` 那么插件收到的数据第1列必须代表时间戳,第2列必须代表电流,第3列必须代表位置。 -#### 3.3.5 注意事项 +#### 3.2.5 注意事项 1. tagColumn、 fieldColumn和timestampColumn三个字段用于描述目标表的结构信息,这三个配置字段必须同时存在或同时省略。 2. 如果存在以上三个配置,且目标表也已经存在,则两者必须一致。**一致性**由用户自己保证,插件不做检查。不一致可能会导致插入失败或插入数据错乱。 3. 插件优先使用配置文件中指定的表结构。 -#### 3.3.6 类型转换 +#### 3.2.6 类型转换 | MongoDB 数据类型 | DataX 内部类型 | TDengine 数据类型 | | ---------------- | -------------- | ----------------- | @@ -238,6 +238,84 @@ TAGS( | boolean | Boolean | BOOL | | bytes | Bytes | BINARY | +### 3.3 从关系型数据库到TDengine +writer部分的配置规则和上述MongoDB的示例是一样的,这里给出一个MySQL的示例。 + +#### 3.3.1 MySQL中表结构 +```sql +CREATE TABLE IF NOT EXISTS weather( + station varchar(100), + latitude DOUBLE, + longtitude DOUBLE, + `date` DATE, + TMAX int, + TMIN int +) +``` + +#### 3.3.2 配置文件示例 + +```json +{ + "job": { + "content": [ + { + "reader": { + "name": "mysqlreader", + "parameter": { + "username": "root", + "password": "passw0rd", + "column": [ + "*" + ], + "splitPk": "station", + "connection": [ + { + "table": [ + "weather" + ], + "jdbcUrl": [ + "jdbc:mysql://127.0.0.1:3306/test?useSSL=false&useUnicode=true&characterEncoding=utf8" + ] + } + ] + } + }, + "writer": { + "name": "tdenginewriter", + "parameter": { + "host": "127.0.0.1", + "port": 6030, + "dbname": "test", + "user": "root", + "password": "taosdata", + "batchSize": 1000, + "stable": "weather", + "tagColumn": { + "station": 0 + }, + "fieldColumn": { + "latitude": 1, + "longtitude": 2, + "tmax": 4, + "tmin": 5 + }, + "timestampColumn":{ + "date": 3 + } + } + } + } + ], + "setting": { + "speed": { + "channel": 1 + } + } + } +} +``` + ## 4 性能报告 @@ -313,4 +391,8 @@ TDengine要求每个表第一列是时间戳列,后边是普通字段,最后 ### 插件如何确定各列的数据类型? -根据收到的第一批数据自动推断各列的类型。 \ No newline at end of file +根据收到的第一批数据自动推断各列的类型。 + +### 为什么插入10年前的数据会抛异常`TDengine ERROR (2350): failed to execute batch bind` ? + +因为创建数据库的时候,默认保留10年的数据。可以手动指定要保留多长时间的数据,比如:`CREATE DATABASE power KEEP 36500;`。 \ No newline at end of file From 1bc24bfbd04229853e0de46a66bd4cfb7e96be14 Mon Sep 17 00:00:00 2001 From: dingbo Date: Tue, 23 Nov 2021 18:55:29 +0800 Subject: [PATCH 27/33] update global readme add TDengine --- README.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 37a21022..9d18a0a9 100644 --- a/README.md +++ b/README.md @@ -60,7 +60,8 @@ DataX目前已经有了比较全面的插件体系,主流的RDBMS数据库、N | | FTP | √ | √ |[读](https://github.com/alibaba/DataX/blob/master/ftpreader/doc/ftpreader.md) 、[写](https://github.com/alibaba/DataX/blob/master/ftpwriter/doc/ftpwriter.md)| | | HDFS | √ | √ |[读](https://github.com/alibaba/DataX/blob/master/hdfsreader/doc/hdfsreader.md) 、[写](https://github.com/alibaba/DataX/blob/master/hdfswriter/doc/hdfswriter.md)| | | Elasticsearch | | √ |[写](https://github.com/alibaba/DataX/blob/master/elasticsearchwriter/doc/elasticsearchwriter.md)| -| 时间序列数据库 | OpenTSDB | √ | |[读](https://github.com/alibaba/DataX/blob/master/opentsdbreader/doc/opentsdbreader.md)| +| 时间序列数据库 | TDengine | √ | √ |[读](https://github.com/taosdata/DataX/blob/master/tdenginereader/doc/tdenginereader.md) 、[写](https://github.com/taosdata/DataX/blob/master/tdenginewriter/doc/tdenginewriter.md)| +| | OpenTSDB | √ | |[读](https://github.com/alibaba/DataX/blob/master/opentsdbreader/doc/opentsdbreader.md)| | | TSDB | √ | √ |[读](https://github.com/alibaba/DataX/blob/master/tsdbreader/doc/tsdbreader.md) 、[写](https://github.com/alibaba/DataX/blob/master/tsdbwriter/doc/tsdbhttpwriter.md)| # 阿里云DataWorks数据集成 From f8aa7e90943f52b7a3c94d0ae0badb7f135105fe Mon Sep 17 00:00:00 2001 From: dingbo Date: Tue, 23 Nov 2021 19:11:27 +0800 Subject: [PATCH 28/33] typo --- .../test/java/com/alibaba/datax/core/TestMysql2TDengine.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/src/test/java/com/alibaba/datax/core/TestMysql2TDengine.java b/core/src/test/java/com/alibaba/datax/core/TestMysql2TDengine.java index 026b1db2..c86f1776 100644 --- a/core/src/test/java/com/alibaba/datax/core/TestMysql2TDengine.java +++ b/core/src/test/java/com/alibaba/datax/core/TestMysql2TDengine.java @@ -9,7 +9,7 @@ import java.util.List; import java.util.Random; /** - * 测试从mysql到TD + * 测试从mysql到TDengine */ public class TestMysql2TDengine { From a52b1909ca77e24490d7e4588b964bfbeee5cfce Mon Sep 17 00:00:00 2001 From: dingbo Date: Wed, 24 Nov 2021 07:58:25 +0800 Subject: [PATCH 29/33] no need to load driver class explicitly --- .../datax/core/TestMysql2TDengine.java | 24 ++++--------------- 1 file changed, 5 insertions(+), 19 deletions(-) diff --git a/core/src/test/java/com/alibaba/datax/core/TestMysql2TDengine.java b/core/src/test/java/com/alibaba/datax/core/TestMysql2TDengine.java index c86f1776..cab0629f 100644 --- a/core/src/test/java/com/alibaba/datax/core/TestMysql2TDengine.java +++ b/core/src/test/java/com/alibaba/datax/core/TestMysql2TDengine.java @@ -14,12 +14,9 @@ import java.util.Random; public class TestMysql2TDengine { @Test - public void genTestData() throws ClassNotFoundException, SQLException { - Class.forName("com.mysql.jdbc.Driver"); - - Connection conn = null; + public void genTestData() throws SQLException { + Connection conn; Statement stmt = null; - ResultSet rs = null; PreparedStatement pstmt = null; try { @@ -33,15 +30,6 @@ public class TestMysql2TDengine { pstmt = conn.prepareStatement("insert into weather(station, latitude, longtitude, `date`, tmax, tmin) values (?, ?, ?, ?, ?, ?)"); genRandomData(pstmt); } finally { - if (rs != null) { - try { - rs.close(); - } catch (SQLException sqlEx) { - } // ignore - - rs = null; - } - if (stmt != null) { try { stmt.close(); @@ -53,6 +41,7 @@ public class TestMysql2TDengine { if (pstmt != null) { pstmt.close(); + pstmt = null; } } @@ -83,10 +72,8 @@ public class TestMysql2TDengine { } @Test - public void prepareTDengine() throws ClassNotFoundException, SQLException { - Class.forName("com.mysql.jdbc.Driver"); - - Connection conn = null; + public void prepareTDengine() throws SQLException { + Connection conn; Statement stmt = null; try { @@ -115,5 +102,4 @@ public class TestMysql2TDengine { } - } \ No newline at end of file From 64ba5e91a58b2525e072e43e07663f661e1dd92d Mon Sep 17 00:00:00 2001 From: dingbo Date: Wed, 24 Nov 2021 09:20:49 +0800 Subject: [PATCH 30/33] remove comment --- .../src/test/java/com/alibaba/datax/core/TestMysql2TDengine.java | 1 - 1 file changed, 1 deletion(-) diff --git a/core/src/test/java/com/alibaba/datax/core/TestMysql2TDengine.java b/core/src/test/java/com/alibaba/datax/core/TestMysql2TDengine.java index cab0629f..4aaad646 100644 --- a/core/src/test/java/com/alibaba/datax/core/TestMysql2TDengine.java +++ b/core/src/test/java/com/alibaba/datax/core/TestMysql2TDengine.java @@ -67,7 +67,6 @@ public class TestMysql2TDengine { psmt.executeBatch(); } } - // 有部分重复数据,不影响测试 psmt.executeBatch(); } From 72c72612fdacf731f662024f780febc547f1b384 Mon Sep 17 00:00:00 2001 From: dingbo Date: Mon, 29 Nov 2021 13:37:47 +0800 Subject: [PATCH 31/33] fix job template error --- .../main/resources/plugin_job_template.json | 22 +++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/tdenginewriter/src/main/resources/plugin_job_template.json b/tdenginewriter/src/main/resources/plugin_job_template.json index 5482b26e..39c9c969 100644 --- a/tdenginewriter/src/main/resources/plugin_job_template.json +++ b/tdenginewriter/src/main/resources/plugin_job_template.json @@ -1,10 +1,24 @@ { "name": "tdenginewriter", "parameter": { - "host": "", + "host": "127.0.0.1", "port": 6030, - "db": "", - "user": "", - "password": "" + "dbname": "test", + "user": "root", + "password": "taosdata", + "batchSize": 1000, + "stable": "weather", + "tagColumn": { + "station": 0 + }, + "fieldColumn": { + "latitude": 1, + "longtitude": 2, + "tmax": 4, + "tmin": 5 + }, + "timestampColumn":{ + "date": 3 + } } } \ No newline at end of file From 30fa8f12843e478b70bd8ab64f9f4581b07d89f9 Mon Sep 17 00:00:00 2001 From: dingbo Date: Mon, 29 Nov 2021 21:08:56 +0800 Subject: [PATCH 32/33] add English doc and fix bugs of Chinese doc --- tdenginewriter/doc/tdenginewriter-EN.md | 349 ++++++++++++++++++++++++ tdenginewriter/doc/tdenginewriter.md | 7 +- 2 files changed, 353 insertions(+), 3 deletions(-) create mode 100644 tdenginewriter/doc/tdenginewriter-EN.md diff --git a/tdenginewriter/doc/tdenginewriter-EN.md b/tdenginewriter/doc/tdenginewriter-EN.md new file mode 100644 index 00000000..eda88a9f --- /dev/null +++ b/tdenginewriter/doc/tdenginewriter-EN.md @@ -0,0 +1,349 @@ +# DataX TDengineWriter + +[简体中文](./tdenginewriter.md) | English + +## 1 Quick Introduction + +TDengineWriter Plugin writes data to [TDengine](https://www.taosdata.com/en/). It can be used to offline synchronize data from other databases to TDengine. + +## 2 Implementation + +TDengineWriter get records from DataX Framework that are generated from reader side. It has two whiting strategies: + +1. For data from OpenTSDBReader which is in json format, to leverage the new feature of TDengine Server that support writing json data directly called [schemaless writing](https://www.taosdata.com/cn/documentation/insert#schemaless), we use JNI to call functions in `taos.lib` or `taos.dll`.(Since the feature was not included in taos-jdbcdrive until version 2.0.36). +2. For other data sources, we use [taos-jdbcdriver](https://www.taosdata.com/cn/documentation/connector/java) to write data. If the target table is not exists beforehand, then it will be created automatically according to your configuration. + +## 3 Features Introduction +### 3.1 From OpenTSDB to TDengine +#### 3.1.1 Sample Setting + +```json +{ + "job": { + "content": [ + { + "reader": { + "name": "opentsdbreader", + "parameter": { + "endpoint": "http://192.168.1.180:4242", + "column": [ + "weather_temperature" + ], + "beginDateTime": "2021-01-01 00:00:00", + "endDateTime": "2021-01-01 01:00:00" + } + }, + "writer": { + "name": "tdenginewriter", + "parameter": { + "host": "192.168.1.180", + "port": 6030, + "dbname": "test", + "user": "root", + "password": "taosdata" + } + } + } + ], + "setting": { + "speed": { + "channel": 1 + } + } + } +} +``` + +#### 3.1.2 Configuration + +| Parameter | Description | Required | Default | +| --------- | ------------------------------ | -------- | -------- | +| host | host of TDengine | Yes | | +| port | port of TDengine | Yes | | +| user | use name of TDengine | No | root | +| password | password of TDengine | No | taosdata | +| dbname | name of target database | No | | +| batchSize | batch size of insert operation | No | 1 | + + +#### 3.1.3 Type Convert + +| OpenTSDB Type | DataX Type | TDengine Type | +| ---------------- | ---------- | ------------- | +| timestamp | Date | timestamp | +| Integer(value) | Double | double | +| Float(value) | Double | double | +| String(value) | String | binary | +| Integer(tag) | String | binary | +| Float(tag) | String | binary | +| String(tag) | String | binary | + +### 3.2 From MongoDB to TDengine + +#### 3.2.1 Sample Setting +```json +{ + "job": { + "setting": { + "speed": { + "channel": 2 + } + }, + "content": [ + { + "reader": { + "name": "mongodbreader", + "parameter": { + "address": [ + "127.0.0.1:27017" + ], + "userName": "user", + "mechanism": "SCRAM-SHA-1", + "userPassword": "password", + "authDb": "admin", + "dbName": "test", + "collectionName": "stock", + "column": [ + { + "name": "stockID", + "type": "string" + }, + { + "name": "tradeTime", + "type": "date" + }, + { + "name": "lastPrice", + "type": "double" + }, + { + "name": "askPrice1", + "type": "double" + }, + { + "name": "bidPrice1", + "type": "double" + }, + { + "name": "volume", + "type": "int" + } + ] + } + }, + "writer": { + "name": "tdenginewriter", + "parameter": { + "host": "localhost", + "port": 6030, + "dbname": "test", + "user": "root", + "password": "taosdata", + "stable": "stock", + "tagColumn": { + "industry": "energy", + "stockID": 0 + }, + "fieldColumn": { + "lastPrice": 2, + "askPrice1": 3, + "bidPrice1": 4, + "volume": 5 + }, + "timestampColumn": { + "tradeTime": 1 + } + } + } + } + ] + } +} +``` + +**Note:the writer part of this setting can also apply to other data source except for OpenTSDB ** + + +#### 3.2.2 Configuration + +| Parameter | Description | Required | Default | Remark | +| --------------- | --------------------------------------------------------------- | ------------------------ | -------- | ------------------- | +| host | host ofTDengine | Yes | | +| port | port of TDengine | Yes | | +| user | user name of TDengine | No | root | +| password | password of TDengine | No | taosdata | +| dbname | name of target database | Yes | | +| batchSize | batch size of insert operation | No | 1000 | +| stable | name of target super table | Yes(except for OpenTSDB) | | +| tagColumn | name and position of tag columns in the record from reader | No | | index starts with 0 | +| fieldColumn | name and position of data columns in the record from reader | No | | | +| timestampColumn | name and position of timestamp column in the record from reader | No | | | + +#### 3.2.3 Auto table creating +##### 3.2.3.1 Rules + +If all of `tagColumn`, `fieldColumn` and `timestampColumn` are offered in writer configuration, then target super table will be created automatically. +The type of tag columns will always be `NCHAR(64)`. The sample setting above will produce following sql: + +```sql +CREATE STABLE IF NOT EXISTS market_snapshot ( + tadetime TIMESTAMP, + lastprice DOUBLE, + askprice1 DOUBLE, + bidprice1 DOUBLE, + volume INT +) +TAGS( + industry NCHAR(64), + stockID NCHAR(64) +); +``` + +##### 3.2.3.2 Sub-table Creating Rules + +The structure of sub-tables are the same with structure of super table. The names of sub-tables are generated by rules below: +1. combine value of tags like this:`tag_value1!tag_value2!tag_value3`. +2. compute md5 hash hex of above string, named `md5val` +3. use "t_md5val" as sub-table name, in which "t" is fixed prefix. + +#### 3.2.4 Use Pre-created Table + +If you have created super table firstly, then all of tagColumn, fieldColumn and timestampColumn can be omitted. The writer plugin will get table schema by executing `describe stableName`. +The order of columns of records received by this plugin must be the same as the order of columns returned by `describe stableName`. For example, if you have super table as below: +``` + Field | Type | Length | Note | +================================================================================= + ts | TIMESTAMP | 8 | | + current | DOUBLE | 8 | | + location | BINARY | 10 | TAG | +``` +Then the first columns received by this writer plugin must represent timestamp, the second column must represent current with type double, the third column must represent location with internal type string. + +#### 3.2.5 Remarks + +1. Config keys --tagColumn, fieldColumn and timestampColumn, must be presented or omitted at the same time. +2. If above three config keys exist and the target table also exists, then the order of columns defined by the config file and the existed table must be the same. + +#### 3.2.6 Type Convert + +| MongoDB Type | DataX Type | TDengine Type | +| ---------------- | -------------- | ----------------- | +| int, Long | Long | BIGINT | +| double | Double | DOUBLE | +| string, array | String | NCHAR(64) | +| date | Date | TIMESTAMP | +| boolean | Boolean | BOOL | +| bytes | Bytes | BINARY | + +### 3.3 From Relational Database to TDengine + +Take MySQl as example. + +#### 3.3.1 Table Structure in MySQL +```sql +CREATE TABLE IF NOT EXISTS weather( + station varchar(100), + latitude DOUBLE, + longtitude DOUBLE, + `date` DATE, + TMAX int, + TMIN int +) +``` + +#### 3.3.2 Sample Setting + +```json +{ + "job": { + "content": [ + { + "reader": { + "name": "mysqlreader", + "parameter": { + "username": "root", + "password": "passw0rd", + "column": [ + "*" + ], + "splitPk": "station", + "connection": [ + { + "table": [ + "weather" + ], + "jdbcUrl": [ + "jdbc:mysql://127.0.0.1:3306/test?useSSL=false&useUnicode=true&characterEncoding=utf8" + ] + } + ] + } + }, + "writer": { + "name": "tdenginewriter", + "parameter": { + "host": "127.0.0.1", + "port": 6030, + "dbname": "test", + "user": "root", + "password": "taosdata", + "batchSize": 1000, + "stable": "weather", + "tagColumn": { + "station": 0 + }, + "fieldColumn": { + "latitude": 1, + "longtitude": 2, + "tmax": 4, + "tmin": 5 + }, + "timestampColumn":{ + "date": 3 + } + } + } + } + ], + "setting": { + "speed": { + "channel": 1 + } + } + } +} +``` + + +## 4 Performance Test + +## 5 Restriction + +1. NCHAR type has fixed length 64 when auto creating stable. +2. Rows have null tag values will be dropped. + +## FAQ + +### How to filter on source table? + +It depends on reader plugin. For different reader plugins, the way may be different. + +### How to import multiple source tables at once? + +It depends on reader plugin. If the reader plugin supports reading multiple tables at once, then there is no problem. + +### How many sub-tables will be produced? + +The number of sub-tables is determined by tagColumns, equals to the number of different combinations of tag values. + +### Do columns in source table and columns in target table must be in the same order? + +No. TDengine require the first column has timestamp type,which is followed by data columns, followed by tag columns. The writer plugin will create super table in this column order, regardless of origin column orders. + +### How dose the plugin infer the data type of incoming data? + +By the first batch of records it received. + +### Why can't I insert data of 10 years ago? Do this will get error: `TDengine ERROR (2350): failed to execute batch bind`. + +Because the database you created only keep 10 years data by default, you can create table like this: `CREATE DATABASE power KEEP 36500;`, in order to enlarge the time period to 100 years. \ No newline at end of file diff --git a/tdenginewriter/doc/tdenginewriter.md b/tdenginewriter/doc/tdenginewriter.md index 062fac2c..e318ef7f 100644 --- a/tdenginewriter/doc/tdenginewriter.md +++ b/tdenginewriter/doc/tdenginewriter.md @@ -1,5 +1,7 @@ # DataX TDengineWriter +简体中文| [English](./tdenginewriter-EN.md) + ## 1 快速介绍 TDengineWriter插件实现了写入数据到TDengine数据库功能。可用于离线同步其它数据库的数据到TDengine。 @@ -203,7 +205,7 @@ TAGS( ##### 3.2.3.2 子表创建规则 -子表结果与超表相同,子表表名生成规则: +子表结构与超表相同,子表表名生成规则: 1. 将标签的value 组合成为如下的字符串: `tag_value1!tag_value2!tag_value3`。 2. 计算该字符串的 MD5 散列值 "md5_val"。 3. "t_md5val"作为子表名。其中的 "t" 是固定的前缀。 @@ -225,7 +227,6 @@ TAGS( 1. tagColumn、 fieldColumn和timestampColumn三个字段用于描述目标表的结构信息,这三个配置字段必须同时存在或同时省略。 2. 如果存在以上三个配置,且目标表也已经存在,则两者必须一致。**一致性**由用户自己保证,插件不做检查。不一致可能会导致插入失败或插入数据错乱。 -3. 插件优先使用配置文件中指定的表结构。 #### 3.2.6 类型转换 @@ -387,7 +388,7 @@ CREATE TABLE IF NOT EXISTS weather( ### 源表和目标表的字段顺序一致吗? -TDengine要求每个表第一列是时间戳列,后边是普通字段,最后是标签列。如果源表不是这个顺序,插件在自动建表是自动调整。 +TDengine要求每个表第一列是时间戳列,后边是普通字段,最后是标签列。如果源表不是这个顺序,插件在自动建表时会自动调整。 ### 插件如何确定各列的数据类型? From e8563a23796be77c72587b5369f7eabe8dab9eb5 Mon Sep 17 00:00:00 2001 From: dingbo Date: Tue, 30 Nov 2021 07:41:25 +0800 Subject: [PATCH 33/33] =?UTF-8?q?=E8=B6=85=E8=A1=A8=20to=20=E8=B6=85?= =?UTF-8?q?=E7=BA=A7=E8=A1=A8?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- tdenginewriter/doc/tdenginewriter.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tdenginewriter/doc/tdenginewriter.md b/tdenginewriter/doc/tdenginewriter.md index e318ef7f..251f29a8 100644 --- a/tdenginewriter/doc/tdenginewriter.md +++ b/tdenginewriter/doc/tdenginewriter.md @@ -205,7 +205,7 @@ TAGS( ##### 3.2.3.2 子表创建规则 -子表结构与超表相同,子表表名生成规则: +子表结构与超级表相同,子表表名生成规则: 1. 将标签的value 组合成为如下的字符串: `tag_value1!tag_value2!tag_value3`。 2. 计算该字符串的 MD5 散列值 "md5_val"。 3. "t_md5val"作为子表名。其中的 "t" 是固定的前缀。 @@ -384,7 +384,7 @@ CREATE TABLE IF NOT EXISTS weather( ### 一张源表导入之后对应TDengine中多少张表? -这是由tagColumn决定的,如果所有tag列的值都相同,那么目标表只有一个。源表有多少不同的tag组合,目标超表就有多少子表。 +这是由tagColumn决定的,如果所有tag列的值都相同,那么目标表只有一个。源表有多少不同的tag组合,目标超级表就有多少子表。 ### 源表和目标表的字段顺序一致吗?