From 272f48be8c05d1fdaf03e46c3b008b6f6483bdfb Mon Sep 17 00:00:00 2001 From: binaryWorld Date: Mon, 25 Feb 2019 17:18:24 +0800 Subject: [PATCH 1/7] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index f12bb021..725dbbff 100644 --- a/README.md +++ b/README.md @@ -105,7 +105,7 @@ This software is free to use under the Apache License [Apache license](https://g 9. 有大数据产品、云产品、中间件技术解决方案者优先考虑。 ```` 钉钉用户请扫描以下二维码进行讨论: -![DataX-OpenSource-Dingding](https://img.alicdn.com/tfs/TB1SdPUH21TBuNjy0FjXXajyXXa-359-504.png) +![DataX-OpenSource-Dingding](https://img.alicdn.com/tfs/TB1ZQuhIG6qK1RjSZFmXXX0PFXa-362-501.png) From db0333b9712ad98088a17d3ab4501d615d338699 Mon Sep 17 00:00:00 2001 From: "bake.snn" Date: Mon, 11 Mar 2019 11:19:41 +0800 Subject: [PATCH 2/7] =?UTF-8?q?=E6=94=AF=E6=8C=81Phoenix5.x=E7=89=88?= =?UTF-8?q?=E6=9C=AC=E8=AF=BB=E5=86=99=E6=8F=92=E4=BB=B6?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- hbase20xsqlreader/doc/hbase20xsqlreader.md | 164 +++++++ hbase20xsqlreader/pom.xml | 116 +++++ .../src/main/assembly/package.xml | 35 ++ .../reader/hbase20xsqlreader/Constant.java | 28 ++ .../HBase20SQLReaderHelper.java | 403 ++++++++++++++++++ .../hbase20xsqlreader/HBase20xSQLReader.java | 53 +++ .../HBase20xSQLReaderErrorCode.java | 39 ++ .../HBase20xSQLReaderTask.java | 121 ++++++ .../plugin/reader/hbase20xsqlreader/Key.java | 40 ++ .../src/main/resources/plugin.json | 7 + .../main/resources/plugin_job_template.json | 13 + hbase20xsqlwriter/doc/hbase20xsqlwriter.md | 164 +++++++ hbase20xsqlwriter/pom.xml | 107 +++++ .../src/main/assembly/package.xml | 35 ++ .../writer/hbase20xsqlwriter/Constant.java | 17 + .../hbase20xsqlwriter/HBase20xSQLHelper.java | 142 ++++++ .../hbase20xsqlwriter/HBase20xSQLWriter.java | 58 +++ .../HBase20xSQLWriterErrorCode.java | 37 ++ .../HBase20xSQLWriterTask.java | 389 +++++++++++++++++ .../plugin/writer/hbase20xsqlwriter/Key.java | 36 ++ .../hbase20xsqlwriter/NullModeType.java | 32 ++ .../src/main/resources/plugin.json | 7 + .../main/resources/plugin_job_template.json | 13 + package.xml | 14 + pom.xml | 2 + 25 files changed, 2072 insertions(+) create mode 100644 hbase20xsqlreader/doc/hbase20xsqlreader.md create mode 100644 hbase20xsqlreader/pom.xml create mode 100644 hbase20xsqlreader/src/main/assembly/package.xml create mode 100644 hbase20xsqlreader/src/main/java/com/alibaba/datax/plugin/reader/hbase20xsqlreader/Constant.java create mode 100644 hbase20xsqlreader/src/main/java/com/alibaba/datax/plugin/reader/hbase20xsqlreader/HBase20SQLReaderHelper.java create mode 100644 hbase20xsqlreader/src/main/java/com/alibaba/datax/plugin/reader/hbase20xsqlreader/HBase20xSQLReader.java create mode 100644 hbase20xsqlreader/src/main/java/com/alibaba/datax/plugin/reader/hbase20xsqlreader/HBase20xSQLReaderErrorCode.java create mode 100644 hbase20xsqlreader/src/main/java/com/alibaba/datax/plugin/reader/hbase20xsqlreader/HBase20xSQLReaderTask.java create mode 100644 hbase20xsqlreader/src/main/java/com/alibaba/datax/plugin/reader/hbase20xsqlreader/Key.java create mode 100644 hbase20xsqlreader/src/main/resources/plugin.json create mode 100644 hbase20xsqlreader/src/main/resources/plugin_job_template.json create mode 100644 hbase20xsqlwriter/doc/hbase20xsqlwriter.md create mode 100644 hbase20xsqlwriter/pom.xml create mode 100755 hbase20xsqlwriter/src/main/assembly/package.xml create mode 100755 hbase20xsqlwriter/src/main/java/com/alibaba/datax/plugin/writer/hbase20xsqlwriter/Constant.java create mode 100644 hbase20xsqlwriter/src/main/java/com/alibaba/datax/plugin/writer/hbase20xsqlwriter/HBase20xSQLHelper.java create mode 100644 hbase20xsqlwriter/src/main/java/com/alibaba/datax/plugin/writer/hbase20xsqlwriter/HBase20xSQLWriter.java create mode 100644 hbase20xsqlwriter/src/main/java/com/alibaba/datax/plugin/writer/hbase20xsqlwriter/HBase20xSQLWriterErrorCode.java create mode 100644 hbase20xsqlwriter/src/main/java/com/alibaba/datax/plugin/writer/hbase20xsqlwriter/HBase20xSQLWriterTask.java create mode 100644 hbase20xsqlwriter/src/main/java/com/alibaba/datax/plugin/writer/hbase20xsqlwriter/Key.java create mode 100644 hbase20xsqlwriter/src/main/java/com/alibaba/datax/plugin/writer/hbase20xsqlwriter/NullModeType.java create mode 100755 hbase20xsqlwriter/src/main/resources/plugin.json create mode 100644 hbase20xsqlwriter/src/main/resources/plugin_job_template.json diff --git a/hbase20xsqlreader/doc/hbase20xsqlreader.md b/hbase20xsqlreader/doc/hbase20xsqlreader.md new file mode 100644 index 00000000..9df020cc --- /dev/null +++ b/hbase20xsqlreader/doc/hbase20xsqlreader.md @@ -0,0 +1,164 @@ +# hbase20xsqlreader 插件文档 + + +___ + + + +## 1 快速介绍 + +hbase20xsqlreader插件实现了从Phoenix(HBase SQL)读取数据,对应版本为HBase2.X和Phoenix5.X。 + +## 2 实现原理 + +简而言之,hbase20xsqlreader通过Phoenix轻客户端去连接Phoenix QueryServer,并根据用户配置信息生成查询SELECT 语句,然后发送到QueryServer读取HBase数据,并将返回结果使用DataX自定义的数据类型拼装为抽象的数据集,最终传递给下游Writer处理。 + +## 3 功能说明 + +### 3.1 配置样例 + +* 配置一个从Phoenix同步抽取数据到本地的作业: + +``` +{ + "job": { + "content": [ + { + "reader": { + "name": "hbase20xsqlreader", //指定插件为hbase20xsqlreader + "parameter": { + "queryServerAddress": "http://127.0.0.1:8765", //填写连接Phoenix QueryServer地址 + "serialization": "PROTOBUF", //QueryServer序列化格式 + "table": "TEST", //读取表名 + "column": ["ID", "NAME"], //所要读取列名 + "splitKey": "ID" //切分列,必须是表主键 + } + }, + "writer": { + "name": "streamwriter", + "parameter": { + "encoding": "UTF-8", + "print": true + } + } + } + ], + "setting": { + "speed": { + "channel": "3" + } + } + } +} +``` + + +### 3.2 参数说明 + +* **queryServerAddress** + + * 描述:hbase20xsqlreader需要通过Phoenix轻客户端去连接Phoenix QueryServer,因此这里需要填写对应QueryServer地址。 + + * 必选:是
+ + * 默认值:无
+ +* **serialization** + + * 描述:QueryServer使用的序列化协议 + + * 必选:否
+ + * 默认值:PROTOBUF
+ +* **table** + + * 描述:所要读取表名 + + * 必选:是
+ + * 默认值:无
+ +* **schema** + + * 描述:表所在的schema + + * 必选:否
+ + * 默认值:无
+ +* **column** + + * 描述:填写需要从phoenix表中读取的列名集合,使用JSON的数组描述字段信息,空值表示读取所有列。 + + * 必选: 否
+ + * 默认值:全部列
+ +* **splitKey** + + * 描述:读取表时对表进行切分并行读取,切分时有两种方式:1.根据该列的最大最小值按照指定channel个数均分,这种方式仅支持整形和字符串类型切分列;2.根据设置的splitPoint进行切分 + + * 必选:是
+ + * 默认值:无
+ +* **splitPoints** + + * 描述:由于根据切分列最大最小值切分时不能保证避免数据热点,splitKey支持用户根据数据特征动态指定切分点,对表数据进行切分。建议切分点根据Region的startkey和endkey设置,保证每个查询对应单个Region + + * 必选: 否
+ + * 默认值:无
+ +* **where** + + * 描述:支持对表查询增加过滤条件,每个切分都会携带该过滤条件。 + + * 必选: 否
+ + * 默认值:无
+ +* **querySql** + + * 描述:支持指定多个查询语句,但查询列类型和数目必须保持一致,用户可根据实际情况手动输入表查询语句或多表联合查询语句,设置该参数后,除queryserverAddress参数必须设置外,其余参数将失去作用或可不设置。 + + * 必选: 否
+ + * 默认值:无
+ + +### 3.3 类型转换 + +目前hbase20xsqlreader支持大部分Phoenix类型,但也存在部分个别类型没有支持的情况,请注意检查你的类型。 + +下面列出MysqlReader针对Mysql类型转换列表: + + +| DataX 内部类型| Phoenix 数据类型 | +| -------- | ----- | +| String |CHAR, VARCHAR| +| Bytes |BINARY, VARBINARY| +| Bool |BOOLEAN | +| Long |INTEGER, TINYINT, SMALLINT, BIGINT | +| Double |FLOAT, DECIMAL, DOUBLE, | +| Date |DATE, TIME, TIMESTAMP | + + + +## 4 性能报告 + +略 + +## 5 约束限制 + +* 切分表时切分列仅支持单个列,且该列必须是表主键 +* 不设置splitPoint默认使用自动切分,此时切分列仅支持整形和字符型 +* 表名和SCHEMA名及列名大小写敏感,请与Phoenix表实际大小写保持一致 +* 仅支持通过Phoenix QeuryServer读取数据,因此您的Phoenix必须启动QueryServer服务才能使用本插件 + +## 6 FAQ + +*** + + diff --git a/hbase20xsqlreader/pom.xml b/hbase20xsqlreader/pom.xml new file mode 100644 index 00000000..2df9a1a2 --- /dev/null +++ b/hbase20xsqlreader/pom.xml @@ -0,0 +1,116 @@ + + + + datax-all + com.alibaba.datax + 0.0.1-SNAPSHOT + + 4.0.0 + + hbase20xsqlreader + 0.0.1-SNAPSHOT + jar + + + 5.0.0-HBase-2.0 + + + + + com.alibaba.datax + datax-common + ${datax-project-version} + + + slf4j-log4j12 + org.slf4j + + + + + + org.apache.phoenix + phoenix-queryserver + ${phoenix.version} + + + servlet-api + javax.servlet + + + + + + + junit + junit + test + + + org.mockito + mockito-core + 2.0.44-beta + test + + + com.alibaba.datax + datax-core + ${datax-project-version} + + + com.alibaba.datax + datax-service-face + + + test + + + com.alibaba.datax + plugin-rdbms-util + 0.0.1-SNAPSHOT + compile + + + + + + + src/main/java + + **/*.properties + + + + + + + maven-compiler-plugin + + 1.6 + 1.6 + ${project-sourceEncoding} + + + + maven-assembly-plugin + + + src/main/assembly/package.xml + + datax + + + + dwzip + package + + single + + + + + + + diff --git a/hbase20xsqlreader/src/main/assembly/package.xml b/hbase20xsqlreader/src/main/assembly/package.xml new file mode 100644 index 00000000..c6ade25f --- /dev/null +++ b/hbase20xsqlreader/src/main/assembly/package.xml @@ -0,0 +1,35 @@ + + + + dir + + false + + + src/main/resources + + plugin.json + plugin_job_template.json + + plugin/reader/hbase20xsqlreader + + + target/ + + hbase20xsqlreader-0.0.1-SNAPSHOT.jar + + plugin/reader/hbase20xsqlreader + + + + + + false + plugin/reader/hbase20xsqlreader/libs + runtime + + + diff --git a/hbase20xsqlreader/src/main/java/com/alibaba/datax/plugin/reader/hbase20xsqlreader/Constant.java b/hbase20xsqlreader/src/main/java/com/alibaba/datax/plugin/reader/hbase20xsqlreader/Constant.java new file mode 100644 index 00000000..0190125f --- /dev/null +++ b/hbase20xsqlreader/src/main/java/com/alibaba/datax/plugin/reader/hbase20xsqlreader/Constant.java @@ -0,0 +1,28 @@ +package com.alibaba.datax.plugin.reader.hbase20xsqlreader; + +public class Constant { + public static final String PK_TYPE = "pkType"; + + public static final Object PK_TYPE_STRING = "pkTypeString"; + + public static final Object PK_TYPE_LONG = "pkTypeLong"; + + public static final String DEFAULT_SERIALIZATION = "PROTOBUF"; + + public static final String CONNECT_STRING_TEMPLATE = "jdbc:phoenix:thin:url=%s;serialization=%s"; + + public static final String CONNECT_DRIVER_STRING = "org.apache.phoenix.queryserver.client.Driver"; + + public static final String SELECT_COLUMNS_TEMPLATE = "SELECT COLUMN_NAME, COLUMN_FAMILY FROM SYSTEM.CATALOG WHERE TABLE_NAME='%s' AND COLUMN_NAME IS NOT NULL"; + + public static String QUERY_SQL_TEMPLATE_WITHOUT_WHERE = "select %s from %s "; + + public static String QUERY_SQL_TEMPLATE = "select %s from %s where (%s)"; + + public static String QUERY_MIN_MAX_TEMPLATE = "SELECT MIN(%s),MAX(%s) FROM %s"; + + public static String QUERY_COLUMN_TYPE_TEMPLATE = "SELECT %s FROM %s LIMIT 1"; + + public static String QUERY_SQL_PER_SPLIT = "querySqlPerSplit"; + +} diff --git a/hbase20xsqlreader/src/main/java/com/alibaba/datax/plugin/reader/hbase20xsqlreader/HBase20SQLReaderHelper.java b/hbase20xsqlreader/src/main/java/com/alibaba/datax/plugin/reader/hbase20xsqlreader/HBase20SQLReaderHelper.java new file mode 100644 index 00000000..f2d880af --- /dev/null +++ b/hbase20xsqlreader/src/main/java/com/alibaba/datax/plugin/reader/hbase20xsqlreader/HBase20SQLReaderHelper.java @@ -0,0 +1,403 @@ +package com.alibaba.datax.plugin.reader.hbase20xsqlreader; + +import com.alibaba.datax.common.exception.DataXException; +import com.alibaba.datax.common.util.Configuration; +import com.alibaba.datax.plugin.rdbms.util.DBUtilErrorCode; +import com.alibaba.datax.plugin.rdbms.util.RdbmsRangeSplitWrap; +import org.apache.commons.lang3.StringUtils; +import org.apache.commons.lang3.tuple.ImmutablePair; +import org.apache.commons.lang3.tuple.Pair; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.math.BigInteger; +import java.sql.*; +import java.util.ArrayList; +import java.util.List; + +public class HBase20SQLReaderHelper { + private static final Logger LOG = LoggerFactory.getLogger(HBase20SQLReaderHelper.class); + + private Configuration configuration; + + private Connection connection; + private List querySql; + private String fullTableName; + private List columnNames; + private String splitKey; + private List splitPoints; + + + public HBase20SQLReaderHelper (Configuration configuration) { + this.configuration = configuration; + } + /** + * 校验配置参数是否正确 + */ + public void validateParameter() { + // queryserver地址必须配置 + String queryServerAddress = configuration.getNecessaryValue(Key.QUERYSERVER_ADDRESS, + HBase20xSQLReaderErrorCode.REQUIRED_VALUE); + String serialization = configuration.getString(Key.SERIALIZATION_NAME, Constant.DEFAULT_SERIALIZATION); + connection = getConnection(queryServerAddress, serialization); + + //判断querySql是否配置,如果配置则table配置可为空,否则table必须配置 + querySql = configuration.getList(Key.QUERY_SQL, String.class); + if (querySql == null || querySql.isEmpty()) { + LOG.info("Split according to splitKey or split points."); + + String schema = configuration.getString(Key.SCHEMA, null); + String tableName = configuration.getNecessaryValue(Key.TABLE, HBase20xSQLReaderErrorCode.REQUIRED_VALUE); + if (schema != null && !schema.isEmpty()) { + fullTableName = schema + "." + tableName; + } else { + fullTableName = tableName; + } + // 如果列名未配置,默认读取全部列* + columnNames = configuration.getList(Key.COLUMN, String.class); + splitKey = configuration.getString(Key.SPLIT_KEY, null); + splitPoints = configuration.getList(Key.SPLIT_POINT); + checkTable(schema, tableName); + dealWhere(); + } else { + // 用户指定querySql,切分不做处理,根据给定sql读取数据即可 + LOG.info("Split according to query sql."); + } + } + + public Connection getConnection(String queryServerAddress, String serialization) { + String url = String.format(Constant.CONNECT_STRING_TEMPLATE, queryServerAddress, serialization); + LOG.debug("Connecting to QueryServer [" + url + "] ..."); + Connection conn; + try { + Class.forName(Constant.CONNECT_DRIVER_STRING); + conn = DriverManager.getConnection(url); + conn.setAutoCommit(false); + } catch (Throwable e) { + throw DataXException.asDataXException(HBase20xSQLReaderErrorCode.GET_QUERYSERVER_CONNECTION_ERROR, + "无法连接QueryServer,配置不正确或服务未启动,请检查配置和服务状态或者联系HBase管理员.", e); + } + LOG.debug("Connected to QueryServer successfully."); + return conn; + } + + /** + * 检查表名、列名和切分列是否存在 + */ + public void checkTable(String schema, String tableName) { + Statement statement = null; + ResultSet resultSet = null; + try { + statement = connection.createStatement(); + String selectSql = String.format(Constant.SELECT_COLUMNS_TEMPLATE, tableName); + + // 处理schema不为空情况 + if (schema == null || schema.isEmpty()) { + selectSql = selectSql + " AND TABLE_SCHEM IS NULL"; + } else { + selectSql = selectSql + " AND TABLE_SCHEM = '" + schema + "'"; + } + resultSet = statement.executeQuery(selectSql); + List primaryColumnNames = new ArrayList(); + List allColumnName = new ArrayList(); + while (resultSet.next()) { + String columnName = resultSet.getString(1); + allColumnName.add(columnName); + // 列族为空表示该列为主键列 + if (resultSet.getString(2) == null) { + primaryColumnNames.add(columnName); + } + } + if (columnNames != null && !columnNames.isEmpty()) { + for (String columnName : columnNames) { + if (!allColumnName.contains(columnName)) { + // 用户配置的列名在元数据中不存在 + throw DataXException.asDataXException(HBase20xSQLReaderErrorCode.ILLEGAL_VALUE, + "您配置的列" + columnName + "在表" + tableName + "的元数据中不存在,请检查您的配置或者联系HBase管理员."); + } + } + } else { + columnNames = allColumnName; + configuration.set(Key.COLUMN, allColumnName); + } + if (splitKey != null) { + // 切分列必须是主键列,否则会严重影响读取性能 + if (!primaryColumnNames.contains(splitKey)) { + throw DataXException.asDataXException(HBase20xSQLReaderErrorCode.ILLEGAL_VALUE, + "您配置的切分列" + splitKey + "不是表" + tableName + "的主键,请检查您的配置或者联系HBase管理员."); + } + } + + } catch (SQLException e) { + throw DataXException.asDataXException(HBase20xSQLReaderErrorCode.GET_PHOENIX_TABLE_ERROR, + "获取表" + tableName + "信息失败,请检查您的集群和表状态或者联系HBase管理员.", e); + + } finally { + closeJdbc(null, statement, resultSet); + } + } + + public void closeJdbc(Connection connection, Statement statement, ResultSet resultSet) { + try { + if (resultSet != null) { + resultSet.close(); + } + if (statement != null) { + statement.close(); + } + if (connection != null) { + connection.close(); + } + } catch (SQLException e) { + LOG.warn("数据库连接关闭异常.", HBase20xSQLReaderErrorCode.CLOSE_PHOENIX_CONNECTION_ERROR, e); + } + } + + public void dealWhere() { + String where = configuration.getString(Key.WHERE, null); + if(StringUtils.isNotBlank(where)) { + String whereImprove = where.trim(); + if(whereImprove.endsWith(";") || whereImprove.endsWith(";")) { + whereImprove = whereImprove.substring(0,whereImprove.length()-1); + } + configuration.set(Key.WHERE, whereImprove); + } + } + + /** + * 对表进行切分 + */ + public List doSplit(int adviceNumber) { + List pluginParams = new ArrayList(); + List rangeList; + String where = configuration.getString(Key.WHERE); + boolean hasWhere = StringUtils.isNotBlank(where); + if (querySql == null || querySql.isEmpty()) { + // 如果splitPoints为空,则根据splitKey自动切分,不过这种切分方式无法保证数据均分,且只支持整形和字符型列 + if (splitPoints == null || splitPoints.isEmpty()) { + LOG.info("Split accoring min and max value of splitColumn..."); + Pair minMaxPK = getPkRange(configuration); + if (null == minMaxPK) { + throw DataXException.asDataXException(HBase20xSQLReaderErrorCode.ILLEGAL_SPLIT_PK, + "根据切分主键切分表失败. DataX仅支持切分主键为一个,并且类型为整数或者字符串类型. " + + "请尝试使用其他的切分主键或者联系 HBase管理员 进行处理."); + } + if (null == minMaxPK.getLeft() || null == minMaxPK.getRight()) { + // 切分后获取到的start/end 有 Null 的情况 + pluginParams.add(configuration); + return pluginParams; + } + boolean isStringType = Constant.PK_TYPE_STRING.equals(configuration + .getString(Constant.PK_TYPE)); + boolean isLongType = Constant.PK_TYPE_LONG.equals(configuration + .getString(Constant.PK_TYPE)); + if (isStringType) { + rangeList = RdbmsRangeSplitWrap.splitAndWrap( + String.valueOf(minMaxPK.getLeft()), + String.valueOf(minMaxPK.getRight()), adviceNumber, + splitKey, "'", null); + } else if (isLongType) { + rangeList = RdbmsRangeSplitWrap.splitAndWrap( + new BigInteger(minMaxPK.getLeft().toString()), + new BigInteger(minMaxPK.getRight().toString()), + adviceNumber, splitKey); + } else { + throw DataXException.asDataXException(HBase20xSQLReaderErrorCode.ILLEGAL_SPLIT_PK, + "您配置的切分主键(splitPk) 类型 DataX 不支持. DataX 仅支持切分主键为一个,并且类型为整数或者字符串类型. " + + "请尝试使用其他的切分主键或者联系HBase管理员进行处理."); + } + + } else { + LOG.info("Split accoring splitPoints..."); + // 根据指定splitPoints进行切分 + rangeList = buildSplitRange(); + } + String tempQuerySql; + if (null != rangeList && !rangeList.isEmpty()) { + for (String range : rangeList) { + Configuration tempConfig = configuration.clone(); + + tempQuerySql = buildQuerySql(columnNames, fullTableName, where) + + (hasWhere ? " and " : " where ") + range; + LOG.info("Query SQL: " + tempQuerySql); + tempConfig.set(Constant.QUERY_SQL_PER_SPLIT, tempQuerySql); + pluginParams.add(tempConfig); + } + } else { + Configuration tempConfig = configuration.clone(); + tempQuerySql = buildQuerySql(columnNames, fullTableName, where) + + (hasWhere ? " and " : " where ") + + String.format(" %s IS NOT NULL", splitKey); + LOG.info("Query SQL: " + tempQuerySql); + tempConfig.set(Constant.QUERY_SQL_PER_SPLIT, tempQuerySql); + pluginParams.add(tempConfig); + } + } else { + // 指定querySql不需要切分 + for (String sql : querySql) { + Configuration tempConfig = configuration.clone(); + tempConfig.set(Constant.QUERY_SQL_PER_SPLIT, sql); + pluginParams.add(tempConfig); + } + } + return pluginParams; + } + + public static String buildQuerySql(List columnNames, String table, + String where) { + String querySql; + StringBuilder columnBuilder = new StringBuilder(); + for (String columnName : columnNames) { + columnBuilder.append(columnName).append(","); + } + columnBuilder.setLength(columnBuilder.length() -1); + if (StringUtils.isBlank(where)) { + querySql = String.format(Constant.QUERY_SQL_TEMPLATE_WITHOUT_WHERE, + columnBuilder.toString(), table); + } else { + querySql = String.format(Constant.QUERY_SQL_TEMPLATE, columnBuilder.toString(), + table, where); + } + return querySql; + } + + private List buildSplitRange() { + String getSplitKeyTypeSQL = String.format(Constant.QUERY_COLUMN_TYPE_TEMPLATE, splitKey, fullTableName); + Statement statement = null; + ResultSet resultSet = null; + List splitConditions = new ArrayList(); + + try { + statement = connection.createStatement(); + resultSet = statement.executeQuery(getSplitKeyTypeSQL); + ResultSetMetaData rsMetaData = resultSet.getMetaData(); + int type = rsMetaData.getColumnType(1); + String symbol = "%s"; + switch (type) { + case Types.CHAR: + case Types.VARCHAR: + symbol = "'%s'"; + break; + case Types.DATE: + symbol = "TO_DATE('%s')"; + break; + case Types.TIME: + symbol = "TO_TIME('%s')"; + break; + case Types.TIMESTAMP: + symbol = "TO_TIMESTAMP('%s')"; + break; + case Types.BINARY: + case Types.VARBINARY: + case Types.ARRAY: + throw DataXException.asDataXException(HBase20xSQLReaderErrorCode.ILLEGAL_SPLIT_PK, + "切分列类型为" + rsMetaData.getColumnTypeName(1) + ",暂不支持该类型字段作为切分列。"); + } + String splitCondition = null; + for (int i = 0; i <= splitPoints.size(); i++) { + if (i == 0) { + splitCondition = splitKey + " <= " + String.format(symbol, splitPoints.get(i)); + } else if (i == splitPoints.size()) { + splitCondition = splitKey + " > " + String.format(symbol, splitPoints.get(i - 1)); + } else { + splitCondition = splitKey + " > " + String.format(symbol, splitPoints.get(i - 1)) + + " AND " + splitKey + " <= " + String.format(symbol, splitPoints.get(i)); + } + splitConditions.add(splitCondition); + } + + return splitConditions; + } catch (SQLException e) { + throw DataXException.asDataXException(HBase20xSQLReaderErrorCode.GET_TABLE_COLUMNTYPE_ERROR, + "获取切分列类型失败,请检查服务或给定表和切分列是否正常,或者联系HBase管理员进行处理。", e); + } finally { + closeJdbc(null, statement, resultSet); + } + + } + + private Pair getPkRange(Configuration configuration) { + String pkRangeSQL = String.format(Constant.QUERY_MIN_MAX_TEMPLATE, splitKey, splitKey, fullTableName); + String where = configuration.getString(Key.WHERE); + if (StringUtils.isNotBlank(where)) { + pkRangeSQL = String.format("%s WHERE (%s AND %s IS NOT NULL)", + pkRangeSQL, where, splitKey); + } + Statement statement = null; + ResultSet resultSet = null; + Pair minMaxPK = null; + + try { + statement = connection.createStatement(); + resultSet = statement.executeQuery(pkRangeSQL); + ResultSetMetaData rsMetaData = resultSet.getMetaData(); + + if (isPKTypeValid(rsMetaData)) { + if (isStringType(rsMetaData.getColumnType(1))) { + if(configuration != null) { + configuration + .set(Constant.PK_TYPE, Constant.PK_TYPE_STRING); + } + if (resultSet.next()) { + minMaxPK = new ImmutablePair( + resultSet.getString(1), resultSet.getString(2)); + } + } else if (isLongType(rsMetaData.getColumnType(1))) { + if(configuration != null) { + configuration.set(Constant.PK_TYPE, Constant.PK_TYPE_LONG); + } + if (resultSet.next()) { + minMaxPK = new ImmutablePair( + resultSet.getLong(1), resultSet.getLong(2)); + } + } else { + throw DataXException.asDataXException(HBase20xSQLReaderErrorCode.ILLEGAL_SPLIT_PK, + "您配置的DataX切分主键(splitPk)有误. 因为您配置的切分主键(splitPk) 类型 DataX 不支持. " + + "DataX 仅支持切分主键为一个,并且类型为整数或者字符串类型. 请尝试使用其他的切分主键或者联系HBASE管理员进行处理."); + } + } else { + throw DataXException.asDataXException(HBase20xSQLReaderErrorCode.ILLEGAL_SPLIT_PK, + "您配置的DataX切分主键(splitPk)有误. 因为您配置的切分主键(splitPk) 类型 DataX 不支持. " + + "DataX 仅支持切分主键为一个,并且类型为整数或者字符串类型. 请尝试使用其他的切分主键或者联系HBASE管理员进行处理."); + } + } catch (SQLException e) { + throw DataXException.asDataXException(HBase20xSQLReaderErrorCode.ILLEGAL_SPLIT_PK, e); + } finally { + closeJdbc(null, statement, resultSet); + } + + return minMaxPK; + } + + private static boolean isPKTypeValid(ResultSetMetaData rsMetaData) { + boolean ret = false; + try { + int minType = rsMetaData.getColumnType(1); + int maxType = rsMetaData.getColumnType(2); + + boolean isNumberType = isLongType(minType); + + boolean isStringType = isStringType(minType); + + if (minType == maxType && (isNumberType || isStringType)) { + ret = true; + } + } catch (Exception e) { + throw DataXException.asDataXException(DBUtilErrorCode.ILLEGAL_SPLIT_PK, + "DataX获取切分主键(splitPk)字段类型失败. 该错误通常是系统底层异常导致. 请联系旺旺:askdatax或者DBA处理."); + } + return ret; + } + + private static boolean isLongType(int type) { + boolean isValidLongType = type == Types.BIGINT || type == Types.INTEGER + || type == Types.SMALLINT || type == Types.TINYINT; + return isValidLongType; + } + + private static boolean isStringType(int type) { + return type == Types.CHAR || type == Types.NCHAR + || type == Types.VARCHAR || type == Types.LONGVARCHAR + || type == Types.NVARCHAR; + } +} diff --git a/hbase20xsqlreader/src/main/java/com/alibaba/datax/plugin/reader/hbase20xsqlreader/HBase20xSQLReader.java b/hbase20xsqlreader/src/main/java/com/alibaba/datax/plugin/reader/hbase20xsqlreader/HBase20xSQLReader.java new file mode 100644 index 00000000..2072c2c0 --- /dev/null +++ b/hbase20xsqlreader/src/main/java/com/alibaba/datax/plugin/reader/hbase20xsqlreader/HBase20xSQLReader.java @@ -0,0 +1,53 @@ +package com.alibaba.datax.plugin.reader.hbase20xsqlreader; + +import com.alibaba.datax.common.plugin.RecordSender; +import com.alibaba.datax.common.spi.Reader; +import com.alibaba.datax.common.util.Configuration; + +import java.util.List; + +public class HBase20xSQLReader extends Reader { + + public static class Job extends Reader.Job { + private Configuration originalConfig; + private HBase20SQLReaderHelper readerHelper; + @Override + public void init() { + this.originalConfig = this.getPluginJobConf(); + this.readerHelper = new HBase20SQLReaderHelper(this.originalConfig); + readerHelper.validateParameter(); + } + + @Override + public List split(int adviceNumber) { + return readerHelper.doSplit(adviceNumber); + } + + @Override + public void destroy() { + // do nothing + } + } + + public static class Task extends Reader.Task { + private Configuration readerConfig; + private HBase20xSQLReaderTask hbase20xSQLReaderTask; + + @Override + public void init() { + this.readerConfig = super.getPluginJobConf(); + hbase20xSQLReaderTask = new HBase20xSQLReaderTask(readerConfig, super.getTaskGroupId(), super.getTaskId()); + } + + @Override + public void startRead(RecordSender recordSender) { + hbase20xSQLReaderTask.readRecord(recordSender); + } + + @Override + public void destroy() { + // do nothing + } + + } +} diff --git a/hbase20xsqlreader/src/main/java/com/alibaba/datax/plugin/reader/hbase20xsqlreader/HBase20xSQLReaderErrorCode.java b/hbase20xsqlreader/src/main/java/com/alibaba/datax/plugin/reader/hbase20xsqlreader/HBase20xSQLReaderErrorCode.java new file mode 100644 index 00000000..415bf71f --- /dev/null +++ b/hbase20xsqlreader/src/main/java/com/alibaba/datax/plugin/reader/hbase20xsqlreader/HBase20xSQLReaderErrorCode.java @@ -0,0 +1,39 @@ +package com.alibaba.datax.plugin.reader.hbase20xsqlreader; + +import com.alibaba.datax.common.spi.ErrorCode; + +public enum HBase20xSQLReaderErrorCode implements ErrorCode { + REQUIRED_VALUE("Hbasewriter-00", "您缺失了必须填写的参数值."), + ILLEGAL_VALUE("Hbasewriter-01", "您填写的参数值不合法."), + GET_QUERYSERVER_CONNECTION_ERROR("Hbasewriter-02", "获取QueryServer连接时出错."), + GET_PHOENIX_TABLE_ERROR("Hbasewriter-03", "获取 Phoenix table时出错."), + GET_TABLE_COLUMNTYPE_ERROR("Hbasewriter-05", "获取表列类型时出错."), + CLOSE_PHOENIX_CONNECTION_ERROR("Hbasewriter-06", "关闭JDBC连接时时出错."), + ILLEGAL_SPLIT_PK("Hbasewriter-07", "非法splitKey配置."), + PHOENIX_COLUMN_TYPE_CONVERT_ERROR("Hbasewriter-08", "phoenix的列类型转换错误."), + QUERY_DATA_ERROR("Hbasewriter-09", "truncate hbase表时发生异常."), + ; + + private final String code; + private final String description; + + private HBase20xSQLReaderErrorCode(String code, String description) { + this.code = code; + this.description = description; + } + + @Override + public String getCode() { + return this.code; + } + + @Override + public String getDescription() { + return this.description; + } + + @Override + public String toString() { + return String.format("Code:[%s], Description:[%s].", this.code, this.description); + } +} diff --git a/hbase20xsqlreader/src/main/java/com/alibaba/datax/plugin/reader/hbase20xsqlreader/HBase20xSQLReaderTask.java b/hbase20xsqlreader/src/main/java/com/alibaba/datax/plugin/reader/hbase20xsqlreader/HBase20xSQLReaderTask.java new file mode 100644 index 00000000..866cef38 --- /dev/null +++ b/hbase20xsqlreader/src/main/java/com/alibaba/datax/plugin/reader/hbase20xsqlreader/HBase20xSQLReaderTask.java @@ -0,0 +1,121 @@ +package com.alibaba.datax.plugin.reader.hbase20xsqlreader; + +import com.alibaba.datax.common.element.*; +import com.alibaba.datax.common.exception.DataXException; +import com.alibaba.datax.common.plugin.RecordSender; +import com.alibaba.datax.common.statistics.PerfRecord; +import com.alibaba.datax.common.util.Configuration; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.math.BigDecimal; +import java.sql.*; + +public class HBase20xSQLReaderTask { + private static final Logger LOG = LoggerFactory.getLogger(HBase20xSQLReaderTask.class); + + private Configuration readerConfig; + private int taskGroupId = -1; + private int taskId=-1; + + public HBase20xSQLReaderTask(Configuration config, int taskGroupId, int taskId) { + this.readerConfig = config; + this.taskGroupId = taskGroupId; + this.taskId = taskId; + } + + public void readRecord(RecordSender recordSender) { + String querySql = readerConfig.getString(Constant.QUERY_SQL_PER_SPLIT); + LOG.info("Begin to read record by Sql: [{}\n] {}.", querySql); + HBase20SQLReaderHelper helper = new HBase20SQLReaderHelper(readerConfig); + Connection conn = helper.getConnection(readerConfig.getString(Key.QUERYSERVER_ADDRESS), + readerConfig.getString(Key.SERIALIZATION_NAME, Constant.DEFAULT_SERIALIZATION)); + Statement statement = null; + ResultSet resultSet = null; + try { + long rsNextUsedTime = 0; + long lastTime = System.nanoTime(); + statement = conn.createStatement(); + // 统计查询时间 + PerfRecord queryPerfRecord = new PerfRecord(taskGroupId,taskId, PerfRecord.PHASE.SQL_QUERY); + queryPerfRecord.start(); + + resultSet = statement.executeQuery(querySql); + ResultSetMetaData meta = resultSet.getMetaData(); + int columnNum = meta.getColumnCount(); + // 统计的result_Next时间 + PerfRecord allResultPerfRecord = new PerfRecord(taskGroupId, taskId, PerfRecord.PHASE.RESULT_NEXT_ALL); + allResultPerfRecord.start(); + + while (resultSet.next()) { + Record record = recordSender.createRecord(); + rsNextUsedTime += (System.nanoTime() - lastTime); + for (int i = 1; i <= columnNum; i++) { + Column column = this.convertPhoenixValueToDataxColumn(meta.getColumnType(i), resultSet.getObject(i)); + record.addColumn(column); + } + lastTime = System.nanoTime(); + recordSender.sendToWriter(record); + } + allResultPerfRecord.end(rsNextUsedTime); + LOG.info("Finished read record by Sql: [{}\n] {}.", querySql); + } catch (SQLException e) { + throw DataXException.asDataXException( + HBase20xSQLReaderErrorCode.QUERY_DATA_ERROR, "查询Phoenix数据出现异常,请检查服务状态或与HBase管理员联系!", e); + } finally { + helper.closeJdbc(conn, statement, resultSet); + } + + } + + private Column convertPhoenixValueToDataxColumn(int sqlType, Object value) { + Column column; + switch (sqlType) { + case Types.CHAR: + case Types.VARCHAR: + column = new StringColumn((String) value); + break; + case Types.BINARY: + case Types.VARBINARY: + column = new BytesColumn((byte[]) value); + break; + case Types.BOOLEAN: + column = new BoolColumn((Boolean) value); + break; + case Types.INTEGER: + column = new LongColumn((Integer) value); + break; + case Types.TINYINT: + column = new LongColumn(((Byte) value).longValue()); + break; + case Types.SMALLINT: + column = new LongColumn(((Short) value).longValue()); + break; + case Types.BIGINT: + column = new LongColumn((Long) value); + break; + case Types.FLOAT: + column = new DoubleColumn((Float.valueOf(value.toString()))); + break; + case Types.DECIMAL: + column = new DoubleColumn((BigDecimal)value); + break; + case Types.DOUBLE: + column = new DoubleColumn((Double) value); + break; + case Types.DATE: + column = new DateColumn((Date) value); + break; + case Types.TIME: + column = new DateColumn((Time) value); + break; + case Types.TIMESTAMP: + column = new DateColumn((Timestamp) value); + break; + default: + throw DataXException.asDataXException( + HBase20xSQLReaderErrorCode.PHOENIX_COLUMN_TYPE_CONVERT_ERROR, "遇到不可识别的phoenix类型," + "sqlType :" + sqlType); + } + return column; + } +} diff --git a/hbase20xsqlreader/src/main/java/com/alibaba/datax/plugin/reader/hbase20xsqlreader/Key.java b/hbase20xsqlreader/src/main/java/com/alibaba/datax/plugin/reader/hbase20xsqlreader/Key.java new file mode 100644 index 00000000..43d811ac --- /dev/null +++ b/hbase20xsqlreader/src/main/java/com/alibaba/datax/plugin/reader/hbase20xsqlreader/Key.java @@ -0,0 +1,40 @@ +package com.alibaba.datax.plugin.reader.hbase20xsqlreader; + +public class Key { + /** + * 【必选】writer要读取的表的表名 + */ + public final static String TABLE = "table"; + /** + * 【必选】writer要读取哪些列 + */ + public final static String COLUMN = "column"; + /** + * 【必选】Phoenix QueryServer服务地址 + */ + public final static String QUERYSERVER_ADDRESS = "queryServerAddress"; + /** + * 【可选】序列化格式,默认为PROTOBUF + */ + public static final String SERIALIZATION_NAME = "serialization"; + /** + * 【可选】Phoenix表所属schema,默认为空 + */ + public static final String SCHEMA = "schema"; + /** + * 【可选】读取数据时切分列 + */ + public static final String SPLIT_KEY = "splitKey"; + /** + * 【可选】读取数据时切分点 + */ + public static final String SPLIT_POINT = "splitPoint"; + /** + * 【可选】读取数据过滤条件配置 + */ + public static final String WHERE = "where"; + /** + * 【可选】查询语句配置 + */ + public static final String QUERY_SQL = "querySql"; +} diff --git a/hbase20xsqlreader/src/main/resources/plugin.json b/hbase20xsqlreader/src/main/resources/plugin.json new file mode 100644 index 00000000..45856411 --- /dev/null +++ b/hbase20xsqlreader/src/main/resources/plugin.json @@ -0,0 +1,7 @@ +{ + "name": "hbase20xsqlreader", + "class": "com.alibaba.datax.plugin.reader.hbase20xsqlreader.HBase20xSQLReader", + "description": "useScene: prod. mechanism: read data from phoenix through queryserver.", + "developer": "bake" +} + diff --git a/hbase20xsqlreader/src/main/resources/plugin_job_template.json b/hbase20xsqlreader/src/main/resources/plugin_job_template.json new file mode 100644 index 00000000..256c30cc --- /dev/null +++ b/hbase20xsqlreader/src/main/resources/plugin_job_template.json @@ -0,0 +1,13 @@ +{ + "name": "hbase20xsqlreader", + "parameter": { + "queryserverAddress": "", + "serialization": "PROTOBUF", + "schema": "", + "table": "TABLE1", + "column": ["ID", "NAME"], + "splitKey": "rowkey", + "splitPoint":[], + "where": "" + } +} diff --git a/hbase20xsqlwriter/doc/hbase20xsqlwriter.md b/hbase20xsqlwriter/doc/hbase20xsqlwriter.md new file mode 100644 index 00000000..63e4a431 --- /dev/null +++ b/hbase20xsqlwriter/doc/hbase20xsqlwriter.md @@ -0,0 +1,164 @@ +# HBase20xsqlwriter插件文档 + +## 1. 快速介绍 + +HBase20xsqlwriter实现了向hbase中的SQL表(phoenix)批量导入数据的功能。Phoenix因为对rowkey做了数据编码,所以,直接使用HBaseAPI进行写入会面临手工数据转换的问题,麻烦且易错。本插件提供了SQL方式直接向Phoenix表写入数据。 + +在底层实现上,通过Phoenix QueryServer的轻客户端驱动,执行UPSERT语句向Phoenix写入数据。 + +### 1.1 支持的功能 + +* 支持带索引的表的数据导入,可以同步更新所有的索引表 + + +### 1.2 限制 + +* 要求版本为Phoenix5.x及HBase2.x +* 仅支持通过Phoenix QeuryServer导入数据,因此您Phoenix必须启动QueryServer服务才能使用本插件 +* 不支持清空已有表数据 +* 仅支持通过phoenix创建的表,不支持原生HBase表 +* 不支持带时间戳的数据导入 + +## 2. 实现原理 + +通过Phoenix轻客户端,连接Phoenix QueryServer服务,执行UPSERT语句向表中批量写入数据。因为使用上层接口,所以,可以同步更新索引表。 + +## 3. 配置说明 + +### 3.1 配置样例 + +```json +{ + "job": { + "entry": { + "jvm": "-Xms2048m -Xmx2048m" + }, + "content": [ + { + "reader": { + "name": "txtfilereader", + "parameter": { + "path": "/Users/shf/workplace/datax_test/hbase20xsqlwriter/txt/normal.txt", + "charset": "UTF-8", + "column": [ + { + "index": 0, + "type": "String" + }, + { + "index": 1, + "type": "string" + }, + { + "index": 2, + "type": "string" + }, + { + "index": 3, + "type": "string" + } + ], + "fieldDelimiter": "," + } + }, + "writer": { + "name": "hbase20xsqlwriter", + "parameter": { + "batchSize": "100", + "column": [ + "UID", + "TS", + "EVENTID", + "CONTENT" + ], + "queryServerAddress": "http://127.0.0.1:8765", + "nullMode": "skip", + "table": "目标hbase表名,大小写有关" + } + } + } + ], + "setting": { + "speed": { + "channel": 5 + } + } + } +} +``` + + +### 3.2 参数说明 + +* **name** + + * 描述:插件名字,必须是`hbase11xsqlwriter` + * 必选:是 + * 默认值:无 + +* **table** + + * 描述:要导入的表名,大小写敏感,通常phoenix表都是**大写**表名 + * 必选:是 + * 默认值:无 + +* **column** + + * 描述:列名,大小写敏感,通常phoenix的列名都是**大写**。 + * 需要注意列的顺序,必须与reader输出的列的顺序一一对应。 + * 不需要填写数据类型,会自动从phoenix获取列的元数据 + * 必选:是 + * 默认值:无 + +* **queryServerAddress** + + * 描述:Phoenix QueryServer地址,为必填项,格式:http://${hostName}:${ip},如http://172.16.34.58:8765 + * 必选:是 + * 默认值:无 + +* **serialization** + + * 描述:QueryServer使用的序列化协议 + * 必选:否 + * 默认值:PROTOBUF + +* **batchSize** + + * 描述:批量写入的最大行数 + * 必选:否 + * 默认值:256 + +* **nullMode** + + * 描述:读取到的列值为null时,如何处理。目前有两种方式: + * skip:跳过这一列,即不插入这一列(如果该行的这一列之前已经存在,则会被删除) + * empty:插入空值,值类型的空值是0,varchar的空值是空字符串 + * 必选:否 + * 默认值:skip + +## 4. 性能报告 + +无 + +## 5. 约束限制 + +writer中的列的定义顺序必须与reader的列顺序匹配。reader中的列顺序定义了输出的每一行中,列的组织顺序。而writer的列顺序,定义的是在收到的数据中,writer期待的列的顺序。例如: + +reader的列顺序是: c1, c2, c3, c4 + +writer的列顺序是: x1, x2, x3, x4 + +则reader输出的列c1就会赋值给writer的列x1。如果writer的列顺序是x1, x2, x4, x3,则c3会赋值给x4,c4会赋值给x3. + + +## 6. FAQ + +1. 并发开多少合适?速度慢时增加并发有用吗? + 数据导入进程默认JVM的堆大小是2GB,并发(channel数)是通过多线程实现的,开过多的线程有时并不能提高导入速度,反而可能因为过于频繁的GC导致性能下降。一般建议并发数(channel)为5-10. + +2. batchSize设置多少比较合适? +默认是256,但应根据每行的大小来计算最合适的batchSize。通常一次操作的数据量在2MB-4MB左右,用这个值除以行大小,即可得到batchSize。 + + + + diff --git a/hbase20xsqlwriter/pom.xml b/hbase20xsqlwriter/pom.xml new file mode 100644 index 00000000..2dc5f4c7 --- /dev/null +++ b/hbase20xsqlwriter/pom.xml @@ -0,0 +1,107 @@ + + + + datax-all + com.alibaba.datax + 0.0.1-SNAPSHOT + + 4.0.0 + + hbase20xsqlwriter + 0.0.1-SNAPSHOT + jar + + + 5.0.0-HBase-2.0 + 1.12.0 + 1.8 + + + + + com.alibaba.datax + datax-common + ${datax-project-version} + + + slf4j-log4j12 + org.slf4j + + + + + org.apache.phoenix + phoenix-queryserver + ${phoenix.version} + + + + + junit + junit + test + + + com.alibaba.datax + datax-core + ${datax-project-version} + + + com.alibaba.datax + datax-service-face + + + test + + + org.mockito + mockito-all + 1.9.5 + test + + + + + + + src/main/java + + **/*.properties + + + + + + + maven-compiler-plugin + + 1.6 + 1.6 + ${project-sourceEncoding} + + + + maven-assembly-plugin + + + src/main/assembly/package.xml + + datax + + + + dwzip + package + + single + + + + + + + + + \ No newline at end of file diff --git a/hbase20xsqlwriter/src/main/assembly/package.xml b/hbase20xsqlwriter/src/main/assembly/package.xml new file mode 100755 index 00000000..f2f7f679 --- /dev/null +++ b/hbase20xsqlwriter/src/main/assembly/package.xml @@ -0,0 +1,35 @@ + + + + dir + + false + + + src/main/resources + + plugin.json + plugin_job_template.json + + plugin/writer/hbase20xsqlwriter + + + target/ + + hbase20xsqlwriter-0.0.1-SNAPSHOT.jar + + plugin/writer/hbase20xsqlwriter + + + + + + false + plugin/writer/hbase20xsqlwriter/libs + runtime + + + diff --git a/hbase20xsqlwriter/src/main/java/com/alibaba/datax/plugin/writer/hbase20xsqlwriter/Constant.java b/hbase20xsqlwriter/src/main/java/com/alibaba/datax/plugin/writer/hbase20xsqlwriter/Constant.java new file mode 100755 index 00000000..31760705 --- /dev/null +++ b/hbase20xsqlwriter/src/main/java/com/alibaba/datax/plugin/writer/hbase20xsqlwriter/Constant.java @@ -0,0 +1,17 @@ +package com.alibaba.datax.plugin.writer.hbase20xsqlwriter; + +public final class Constant { + public static final String DEFAULT_NULL_MODE = "skip"; + public static final String DEFAULT_SERIALIZATION = "PROTOBUF"; + public static final int DEFAULT_BATCH_ROW_COUNT = 256; // 默认一次写256行 + + public static final int TYPE_UNSIGNED_TINYINT = 11; + public static final int TYPE_UNSIGNED_SMALLINT = 13; + public static final int TYPE_UNSIGNED_INTEGER = 9; + public static final int TYPE_UNSIGNED_LONG = 10; + public static final int TYPE_UNSIGNED_FLOAT = 14; + public static final int TYPE_UNSIGNED_DOUBLE = 15; + public static final int TYPE_UNSIGNED_DATE = 19; + public static final int TYPE_UNSIGNED_TIME = 18; + public static final int TYPE_UNSIGNED_TIMESTAMP = 20; +} diff --git a/hbase20xsqlwriter/src/main/java/com/alibaba/datax/plugin/writer/hbase20xsqlwriter/HBase20xSQLHelper.java b/hbase20xsqlwriter/src/main/java/com/alibaba/datax/plugin/writer/hbase20xsqlwriter/HBase20xSQLHelper.java new file mode 100644 index 00000000..f90b792b --- /dev/null +++ b/hbase20xsqlwriter/src/main/java/com/alibaba/datax/plugin/writer/hbase20xsqlwriter/HBase20xSQLHelper.java @@ -0,0 +1,142 @@ +package com.alibaba.datax.plugin.writer.hbase20xsqlwriter; + +import com.alibaba.datax.common.exception.DataXException; +import com.alibaba.datax.common.util.Configuration; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.sql.*; +import java.util.ArrayList; +import java.util.List; + +public class HBase20xSQLHelper { + private static final Logger LOG = LoggerFactory.getLogger(HBase20xSQLHelper.class); + + /** + * phoenix瘦客户端连接前缀 + */ + public static final String CONNECT_STRING_PREFIX = "jdbc:phoenix:thin:"; + /** + * phoenix驱动名 + */ + public static final String CONNECT_DRIVER_STRING = "org.apache.phoenix.queryserver.client.Driver"; + /** + * 从系统表查找配置表信息 + */ + public static final String SELECT_CATALOG_TABLE_STRING = "SELECT COLUMN_NAME FROM SYSTEM.CATALOG WHERE TABLE_NAME='%s' AND COLUMN_NAME IS NOT NULL"; + + /** + * 验证配置参数是否正确 + */ + public static void validateParameter(com.alibaba.datax.common.util.Configuration originalConfig) { + // 表名和queryserver地址必须配置,否则抛异常 + String tableName = originalConfig.getNecessaryValue(Key.TABLE, HBase20xSQLWriterErrorCode.REQUIRED_VALUE); + String queryServerAddress = originalConfig.getNecessaryValue(Key.QUERYSERVER_ADDRESS, HBase20xSQLWriterErrorCode.REQUIRED_VALUE); + + // 序列化格式,可不配置,默认PROTOBUF + String serialization = originalConfig.getString(Key.SERIALIZATION_NAME, Constant.DEFAULT_SERIALIZATION); + + String connStr = getConnectionUrl(queryServerAddress, serialization); + // 校验jdbc连接是否正常 + Connection conn = getThinClientConnection(connStr); + + List columnNames = originalConfig.getList(Key.COLUMN, String.class); + if (columnNames == null || columnNames.isEmpty()) { + throw DataXException.asDataXException( + HBase20xSQLWriterErrorCode.ILLEGAL_VALUE, "HBase的columns配置不能为空,请添加目标表的列名配置."); + } + String schema = originalConfig.getString(Key.SCHEMA); + // 检查表以及配置列是否存在 + checkTable(conn, schema, tableName, columnNames); + } + + /** + * 获取JDBC连接,轻量级连接,使用完后必须显式close + */ + public static Connection getThinClientConnection(String connStr) { + LOG.debug("Connecting to QueryServer [" + connStr + "] ..."); + Connection conn; + try { + Class.forName(CONNECT_DRIVER_STRING); + conn = DriverManager.getConnection(connStr); + conn.setAutoCommit(false); + } catch (Throwable e) { + throw DataXException.asDataXException(HBase20xSQLWriterErrorCode.GET_QUERYSERVER_CONNECTION_ERROR, + "无法连接QueryServer,配置不正确或服务未启动,请检查配置和服务状态或者联系HBase管理员.", e); + } + LOG.debug("Connected to QueryServer successfully."); + return conn; + } + + public static Connection getJdbcConnection(Configuration conf) { + String queryServerAddress = conf.getNecessaryValue(Key.QUERYSERVER_ADDRESS, HBase20xSQLWriterErrorCode.REQUIRED_VALUE); + // 序列化格式,可不配置,默认PROTOBUF + String serialization = conf.getString(Key.SERIALIZATION_NAME, "PROTOBUF"); + String connStr = getConnectionUrl(queryServerAddress, serialization); + return getThinClientConnection(connStr); + } + + + public static String getConnectionUrl(String queryServerAddress, String serialization) { + String urlFmt = CONNECT_STRING_PREFIX + "url=%s;serialization=%s"; + return String.format(urlFmt, queryServerAddress, serialization); + } + + public static void checkTable(Connection conn, String schema, String tableName, List columnNames) throws DataXException { + String selectSystemTable = getSelectSystemSQL(schema, tableName); + Statement st = null; + ResultSet rs = null; + try { + st = conn.createStatement(); + rs = st.executeQuery(selectSystemTable); + List allColumns = new ArrayList(); + if (rs.next()) { + allColumns.add(rs.getString(1)); + } else { + LOG.error(tableName + "表不存在,请检查表名是否正确或是否已创建.", HBase20xSQLWriterErrorCode.GET_HBASE_TABLE_ERROR); + throw DataXException.asDataXException(HBase20xSQLWriterErrorCode.GET_HBASE_TABLE_ERROR, + tableName + "表不存在,请检查表名是否正确或是否已创建."); + } + while (rs.next()) { + allColumns.add(rs.getString(1)); + } + for (String columnName : columnNames) { + if (!allColumns.contains(columnName)) { + // 用户配置的列名在元数据中不存在 + throw DataXException.asDataXException(HBase20xSQLWriterErrorCode.ILLEGAL_VALUE, + "您配置的列" + columnName + "在目的表" + tableName + "的元数据中不存在,请检查您的配置或者联系HBase管理员."); + } + } + + } catch (SQLException t) { + throw DataXException.asDataXException(HBase20xSQLWriterErrorCode.GET_HBASE_TABLE_ERROR, + "获取表" + tableName + "信息失败,请检查您的集群和表状态或者联系HBase管理员.", t); + } finally { + closeJdbc(conn, st, rs); + } + } + + private static String getSelectSystemSQL(String schema, String tableName) { + String sql = String.format(SELECT_CATALOG_TABLE_STRING, tableName); + if (schema != null) { + sql = sql + " AND TABLE_SCHEM = '" + schema + "'"; + } + return sql; + } + + public static void closeJdbc(Connection connection, Statement statement, ResultSet resultSet) { + try { + if (resultSet != null) { + resultSet.close(); + } + if (statement != null) { + statement.close(); + } + if (connection != null) { + connection.close(); + } + } catch (SQLException e) { + LOG.warn("数据库连接关闭异常.", HBase20xSQLWriterErrorCode.CLOSE_HBASE_CONNECTION_ERROR); + } + } +} diff --git a/hbase20xsqlwriter/src/main/java/com/alibaba/datax/plugin/writer/hbase20xsqlwriter/HBase20xSQLWriter.java b/hbase20xsqlwriter/src/main/java/com/alibaba/datax/plugin/writer/hbase20xsqlwriter/HBase20xSQLWriter.java new file mode 100644 index 00000000..2c08b734 --- /dev/null +++ b/hbase20xsqlwriter/src/main/java/com/alibaba/datax/plugin/writer/hbase20xsqlwriter/HBase20xSQLWriter.java @@ -0,0 +1,58 @@ +package com.alibaba.datax.plugin.writer.hbase20xsqlwriter; + +import com.alibaba.datax.common.plugin.RecordReceiver; +import com.alibaba.datax.common.spi.Writer; +import com.alibaba.datax.common.util.Configuration; + +import java.util.ArrayList; +import java.util.List; + +public class HBase20xSQLWriter extends Writer { + + public static class Job extends Writer.Job { + + private Configuration config = null; + + @Override + public void init() { + this.config = this.getPluginJobConf(); + HBase20xSQLHelper.validateParameter(this.config); + } + + @Override + public List split(int mandatoryNumber) { + List splitResultConfigs = new ArrayList(); + for (int j = 0; j < mandatoryNumber; j++) { + splitResultConfigs.add(config.clone()); + } + return splitResultConfigs; + } + + @Override + public void destroy() { + //doNothing + } + } + + public static class Task extends Writer.Task { + private Configuration taskConfig; + private HBase20xSQLWriterTask writerTask; + + @Override + public void init() { + this.taskConfig = super.getPluginJobConf(); + this.writerTask = new HBase20xSQLWriterTask(this.taskConfig); + } + + @Override + public void startWrite(RecordReceiver lineReceiver) { + this.writerTask.startWriter(lineReceiver, super.getTaskPluginCollector()); + } + + + @Override + public void destroy() { + // 不需要close + } + } +} \ No newline at end of file diff --git a/hbase20xsqlwriter/src/main/java/com/alibaba/datax/plugin/writer/hbase20xsqlwriter/HBase20xSQLWriterErrorCode.java b/hbase20xsqlwriter/src/main/java/com/alibaba/datax/plugin/writer/hbase20xsqlwriter/HBase20xSQLWriterErrorCode.java new file mode 100644 index 00000000..f946e20b --- /dev/null +++ b/hbase20xsqlwriter/src/main/java/com/alibaba/datax/plugin/writer/hbase20xsqlwriter/HBase20xSQLWriterErrorCode.java @@ -0,0 +1,37 @@ +package com.alibaba.datax.plugin.writer.hbase20xsqlwriter; + +import com.alibaba.datax.common.spi.ErrorCode; + +public enum HBase20xSQLWriterErrorCode implements ErrorCode { + REQUIRED_VALUE("Hbasewriter-00", "您缺失了必须填写的参数值."), + ILLEGAL_VALUE("Hbasewriter-01", "您填写的参数值不合法."), + GET_QUERYSERVER_CONNECTION_ERROR("Hbasewriter-02", "获取QueryServer连接时出错."), + GET_HBASE_TABLE_ERROR("Hbasewriter-03", "获取 Hbase table时出错."), + CLOSE_HBASE_CONNECTION_ERROR("Hbasewriter-04", "关闭Hbase连接时出错."), + GET_TABLE_COLUMNTYPE_ERROR("Hbasewriter-05", "获取表列类型时出错."), + PUT_HBASE_ERROR("Hbasewriter-07", "写入hbase时发生IO异常."), + ; + + private final String code; + private final String description; + + private HBase20xSQLWriterErrorCode(String code, String description) { + this.code = code; + this.description = description; + } + + @Override + public String getCode() { + return this.code; + } + + @Override + public String getDescription() { + return this.description; + } + + @Override + public String toString() { + return String.format("Code:[%s], Description:[%s].", this.code, this.description); + } +} diff --git a/hbase20xsqlwriter/src/main/java/com/alibaba/datax/plugin/writer/hbase20xsqlwriter/HBase20xSQLWriterTask.java b/hbase20xsqlwriter/src/main/java/com/alibaba/datax/plugin/writer/hbase20xsqlwriter/HBase20xSQLWriterTask.java new file mode 100644 index 00000000..5557e674 --- /dev/null +++ b/hbase20xsqlwriter/src/main/java/com/alibaba/datax/plugin/writer/hbase20xsqlwriter/HBase20xSQLWriterTask.java @@ -0,0 +1,389 @@ +package com.alibaba.datax.plugin.writer.hbase20xsqlwriter; + +import com.alibaba.datax.common.element.Column; +import com.alibaba.datax.common.element.Record; +import com.alibaba.datax.common.exception.DataXException; +import com.alibaba.datax.common.plugin.RecordReceiver; +import com.alibaba.datax.common.plugin.TaskPluginCollector; +import com.alibaba.datax.common.util.Configuration; +import com.google.common.collect.Lists; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.math.BigDecimal; +import java.sql.*; +import java.util.Arrays; +import java.util.List; + +public class HBase20xSQLWriterTask { + private final static Logger LOG = LoggerFactory.getLogger(HBase20xSQLWriterTask.class); + + private Configuration configuration; + private TaskPluginCollector taskPluginCollector; + + private Connection connection = null; + private PreparedStatement pstmt = null; + + // 需要向hbsae写入的列的数量,即用户配置的column参数中列的个数。时间戳不包含在内 + private int numberOfColumnsToWrite; + // 期待从源头表的Record中拿到多少列 + private int numberOfColumnsToRead; + private int[] columnTypes; + private List columns; + private String fullTableName; + + private NullModeType nullModeType; + private int batchSize; + + public HBase20xSQLWriterTask(Configuration configuration) { + // 这里仅解析配置,不访问远端集群,配置的合法性检查在writer的init过程中进行 + this.configuration = configuration; + } + + public void startWriter(RecordReceiver lineReceiver, TaskPluginCollector taskPluginCollector) { + this.taskPluginCollector = taskPluginCollector; + + try { + // 准备阶段 + initialize(); + + // 写入数据 + writeData(lineReceiver); + + } catch (Throwable e) { + throw DataXException.asDataXException(HBase20xSQLWriterErrorCode.PUT_HBASE_ERROR, e); + } finally { + // 关闭jdbc连接 + HBase20xSQLHelper.closeJdbc(connection, pstmt, null); + } + + } + + /** + * 初始化JDBC操作对象及列类型 + * @throws SQLException + */ + private void initialize() throws SQLException { + if (connection == null) { + connection = HBase20xSQLHelper.getJdbcConnection(configuration); + connection.setAutoCommit(false); + } + nullModeType = NullModeType.getByTypeName(configuration.getString(Key.NULLMODE, Constant.DEFAULT_NULL_MODE)); + batchSize = configuration.getInt(Key.BATCHSIZE, Constant.DEFAULT_BATCH_ROW_COUNT); + String schema = configuration.getString(Key.SCHEMA); + String tableName = configuration.getNecessaryValue(Key.TABLE, HBase20xSQLWriterErrorCode.REQUIRED_VALUE); + fullTableName = tableName; + if (schema != null && !schema.isEmpty()) { + fullTableName = schema + "." + tableName; + } + columns = configuration.getList(Key.COLUMN, String.class); + if (pstmt == null) { + // 一个Task的生命周期中只使用一个PreparedStatement对象 + pstmt = createPreparedStatement(); + columnTypes = getColumnSqlType(); + } + } + + /** + * 生成sql模板,并根据模板创建PreparedStatement + */ + private PreparedStatement createPreparedStatement() throws SQLException { + // 生成列名集合,列之间用逗号分隔: col1,col2,col3,... + StringBuilder columnNamesBuilder = new StringBuilder(); + for (String col : columns) { + // 列名使用双引号,则不自动转换为全大写,而是保留用户配置的大小写 + columnNamesBuilder.append("\""); + columnNamesBuilder.append(col); + columnNamesBuilder.append("\""); + columnNamesBuilder.append(","); + } + // 移除末尾多余的逗号 + columnNamesBuilder.setLength(columnNamesBuilder.length() - 1); + String columnNames = columnNamesBuilder.toString(); + numberOfColumnsToWrite = columns.size(); + numberOfColumnsToRead = numberOfColumnsToWrite; // 开始的时候,要读的列数娱要写的列数相等 + + // 生成UPSERT模板 + StringBuilder upsertBuilder = + new StringBuilder("upsert into " + fullTableName + " (" + columnNames + " ) values ("); + for (int i = 0; i < numberOfColumnsToWrite; i++) { + upsertBuilder.append("?,"); + } + upsertBuilder.setLength(upsertBuilder.length() - 1); // 移除末尾多余的逗号 + upsertBuilder.append(")"); + + String sql = upsertBuilder.toString(); + PreparedStatement ps = connection.prepareStatement(sql); + LOG.debug("SQL template generated: " + sql); + return ps; + } + + /** + * 根据列名来从数据库元数据中获取这一列对应的SQL类型 + */ + private int[] getColumnSqlType() throws SQLException { + int[] types = new int[numberOfColumnsToWrite]; + StringBuilder columnNamesBuilder = new StringBuilder(); + for (String columnName : columns) { + columnNamesBuilder.append(columnName).append(","); + } + columnNamesBuilder.setLength(columnNamesBuilder.length() - 1); + // 查询一条数据获取表meta信息 + String selectSql = "SELECT " + columnNamesBuilder + " FROM " + fullTableName + " LIMIT 1"; + Statement statement = null; + try { + statement = connection.createStatement(); + ResultSetMetaData meta = statement.executeQuery(selectSql).getMetaData(); + + for (int i = 0; i < columns.size(); i++) { + String name = columns.get(i); + types[i] = meta.getColumnType(i + 1); + LOG.debug("Column name : " + name + ", sql type = " + types[i] + " " + meta.getColumnTypeName(i + 1)); + } + } catch (SQLException e) { + throw DataXException.asDataXException(HBase20xSQLWriterErrorCode.GET_TABLE_COLUMNTYPE_ERROR, + "获取表" + fullTableName + "列类型失败,请检查配置和服务状态或者联系HBase管理员.", e); + } finally { + HBase20xSQLHelper.closeJdbc(null, statement, null); + } + + return types; + } + + /** + * 从接收器中获取每条记录,写入Phoenix + */ + private void writeData(RecordReceiver lineReceiver) throws SQLException { + List buffer = Lists.newArrayListWithExpectedSize(batchSize); + Record record = null; + while ((record = lineReceiver.getFromReader()) != null) { + // 校验列数量是否符合预期 + if (record.getColumnNumber() != numberOfColumnsToRead) { + throw DataXException.asDataXException(HBase20xSQLWriterErrorCode.ILLEGAL_VALUE, + "数据源给出的列数量[" + record.getColumnNumber() + "]与您配置中的列数量[" + numberOfColumnsToRead + + "]不同, 请检查您的配置 或者 联系 Hbase 管理员."); + } + + buffer.add(record); + if (buffer.size() > batchSize) { + doBatchUpsert(buffer); + buffer.clear(); + } + } + + // 处理剩余的record + if (!buffer.isEmpty()) { + doBatchUpsert(buffer); + buffer.clear(); + } + } + + /** + * 批量提交一组数据,如果失败,则尝试一行行提交,如果仍然失败,抛错给用户 + */ + private void doBatchUpsert(List records) throws SQLException { + try { + // 将所有record提交到connection缓存 + for (Record r : records) { + setupStatement(r); + pstmt.addBatch(); + } + + pstmt.executeBatch(); + // 将缓存的数据提交到phoenix + connection.commit(); + pstmt.clearParameters(); + pstmt.clearBatch(); + + } catch (SQLException e) { + LOG.error("Failed batch committing " + records.size() + " records", e); + + // 批量提交失败,则一行行重试,以确定哪一行出错 + connection.rollback(); + HBase20xSQLHelper.closeJdbc(null, pstmt, null); + connection.setAutoCommit(true); + pstmt = createPreparedStatement(); + doSingleUpsert(records); + } catch (Exception e) { + throw DataXException.asDataXException(HBase20xSQLWriterErrorCode.PUT_HBASE_ERROR, e); + } + } + + /** + * 单行提交,将出错的行记录到脏数据中。由脏数据收集模块判断任务是否继续 + */ + private void doSingleUpsert(List records) throws SQLException { + int rowNumber = 0; + for (Record r : records) { + try { + rowNumber ++; + setupStatement(r); + pstmt.executeUpdate(); + } catch (SQLException e) { + //出错了,记录脏数据 + LOG.error("Failed writing to phoenix, rowNumber: " + rowNumber); + this.taskPluginCollector.collectDirtyRecord(r, e); + } + } + } + + private void setupStatement(Record record) throws SQLException { + for (int i = 0; i < numberOfColumnsToWrite; i++) { + Column col = record.getColumn(i); + int sqlType = columnTypes[i]; + // PreparedStatement中的索引从1开始,所以用i+1 + setupColumn(i + 1, sqlType, col); + } + } + + private void setupColumn(int pos, int sqlType, Column col) throws SQLException { + if (col.getRawData() != null) { + switch (sqlType) { + case Types.CHAR: + case Types.VARCHAR: + pstmt.setString(pos, col.asString()); + break; + + case Types.BINARY: + case Types.VARBINARY: + pstmt.setBytes(pos, col.asBytes()); + break; + + case Types.BOOLEAN: + pstmt.setBoolean(pos, col.asBoolean()); + break; + + case Types.TINYINT: + case Constant.TYPE_UNSIGNED_TINYINT: + pstmt.setByte(pos, col.asLong().byteValue()); + break; + + case Types.SMALLINT: + case Constant.TYPE_UNSIGNED_SMALLINT: + pstmt.setShort(pos, col.asLong().shortValue()); + break; + + case Types.INTEGER: + case Constant.TYPE_UNSIGNED_INTEGER: + pstmt.setInt(pos, col.asLong().intValue()); + break; + + case Types.BIGINT: + case Constant.TYPE_UNSIGNED_LONG: + pstmt.setLong(pos, col.asLong()); + break; + + case Types.FLOAT: + pstmt.setFloat(pos, col.asDouble().floatValue()); + break; + + case Types.DOUBLE: + pstmt.setDouble(pos, col.asDouble()); + break; + + case Types.DECIMAL: + pstmt.setBigDecimal(pos, col.asBigDecimal()); + break; + + case Types.DATE: + case Constant.TYPE_UNSIGNED_DATE: + pstmt.setDate(pos, new Date(col.asDate().getTime())); + break; + + case Types.TIME: + case Constant.TYPE_UNSIGNED_TIME: + pstmt.setTime(pos, new Time(col.asDate().getTime())); + break; + + case Types.TIMESTAMP: + case Constant.TYPE_UNSIGNED_TIMESTAMP: + pstmt.setTimestamp(pos, new Timestamp(col.asDate().getTime())); + break; + + default: + throw DataXException.asDataXException(HBase20xSQLWriterErrorCode.ILLEGAL_VALUE, + "不支持您配置的列类型:" + sqlType + ", 请检查您的配置 或者 联系 Hbase 管理员."); + } + } else { + // 没有值,按空值的配置情况处理 + switch (nullModeType){ + case Skip: + // 跳过空值,则不插入该列, + pstmt.setNull(pos, sqlType); + break; + + case Empty: + // 插入"空值",请注意不同类型的空值不同 + // 另外,对SQL来说,空值本身是有值的,这与直接操作HBASE Native API时的空值完全不同 + pstmt.setObject(pos, getEmptyValue(sqlType)); + break; + + default: + // nullMode的合法性在初始化配置的时候已经校验过,这里一定不会出错 + throw DataXException.asDataXException(HBase20xSQLWriterErrorCode.ILLEGAL_VALUE, + "Hbasewriter 不支持该 nullMode 类型: " + nullModeType + + ", 目前支持的 nullMode 类型是:" + Arrays.asList(NullModeType.values())); + } + } + } + + /** + * 根据类型获取"空值" + * 值类型的空值都是0,bool是false,String是空字符串 + * @param sqlType sql数据类型,定义于{@link Types} + */ + private Object getEmptyValue(int sqlType) { + switch (sqlType) { + case Types.VARCHAR: + return ""; + + case Types.BOOLEAN: + return false; + + case Types.TINYINT: + case Constant.TYPE_UNSIGNED_TINYINT: + return (byte) 0; + + case Types.SMALLINT: + case Constant.TYPE_UNSIGNED_SMALLINT: + return (short) 0; + + case Types.INTEGER: + case Constant.TYPE_UNSIGNED_INTEGER: + return (int) 0; + + case Types.BIGINT: + case Constant.TYPE_UNSIGNED_LONG: + return (long) 0; + + case Types.FLOAT: + return (float) 0.0; + + case Types.DOUBLE: + return (double) 0.0; + + case Types.DECIMAL: + return new BigDecimal(0); + + case Types.DATE: + case Constant.TYPE_UNSIGNED_DATE: + return new Date(0); + + case Types.TIME: + case Constant.TYPE_UNSIGNED_TIME: + return new Time(0); + + case Types.TIMESTAMP: + case Constant.TYPE_UNSIGNED_TIMESTAMP: + return new Timestamp(0); + + case Types.BINARY: + case Types.VARBINARY: + return new byte[0]; + + default: + throw DataXException.asDataXException(HBase20xSQLWriterErrorCode.ILLEGAL_VALUE, + "不支持您配置的列类型:" + sqlType + ", 请检查您的配置 或者 联系 Hbase 管理员."); + } + } +} diff --git a/hbase20xsqlwriter/src/main/java/com/alibaba/datax/plugin/writer/hbase20xsqlwriter/Key.java b/hbase20xsqlwriter/src/main/java/com/alibaba/datax/plugin/writer/hbase20xsqlwriter/Key.java new file mode 100644 index 00000000..7e93cca0 --- /dev/null +++ b/hbase20xsqlwriter/src/main/java/com/alibaba/datax/plugin/writer/hbase20xsqlwriter/Key.java @@ -0,0 +1,36 @@ +package com.alibaba.datax.plugin.writer.hbase20xsqlwriter; + +public class Key { + + /** + * 【必选】writer要写入的表的表名 + */ + public final static String TABLE = "table"; + /** + * 【必选】writer要写入哪些列 + */ + public final static String COLUMN = "column"; + /** + * 【必选】Phoenix QueryServer服务地址 + */ + public final static String QUERYSERVER_ADDRESS = "queryServerAddress"; + /** + * 【可选】序列化格式,默认为PROTOBUF + */ + public static final String SERIALIZATION_NAME = "serialization"; + + /** + * 【可选】批量写入的最大行数,默认100行 + */ + public static final String BATCHSIZE = "batchSize"; + + /** + * 【可选】遇到空值默认跳过 + */ + public static final String NULLMODE = "nullMode"; + /** + * 【可选】Phoenix表所属schema,默认为空 + */ + public static final String SCHEMA = "schema"; + +} diff --git a/hbase20xsqlwriter/src/main/java/com/alibaba/datax/plugin/writer/hbase20xsqlwriter/NullModeType.java b/hbase20xsqlwriter/src/main/java/com/alibaba/datax/plugin/writer/hbase20xsqlwriter/NullModeType.java new file mode 100644 index 00000000..788e6345 --- /dev/null +++ b/hbase20xsqlwriter/src/main/java/com/alibaba/datax/plugin/writer/hbase20xsqlwriter/NullModeType.java @@ -0,0 +1,32 @@ +package com.alibaba.datax.plugin.writer.hbase20xsqlwriter; + +import com.alibaba.datax.common.exception.DataXException; + +import java.util.Arrays; + +public enum NullModeType { + Skip("skip"), + Empty("empty") + ; + + private String mode; + + + NullModeType(String mode) { + this.mode = mode.toLowerCase(); + } + + public String getMode() { + return mode; + } + + public static NullModeType getByTypeName(String modeName) { + for (NullModeType modeType : values()) { + if (modeType.mode.equalsIgnoreCase(modeName)) { + return modeType; + } + } + throw DataXException.asDataXException(HBase20xSQLWriterErrorCode.ILLEGAL_VALUE, + "Hbasewriter 不支持该 nullMode 类型:" + modeName + ", 目前支持的 nullMode 类型是:" + Arrays.asList(values())); + } +} diff --git a/hbase20xsqlwriter/src/main/resources/plugin.json b/hbase20xsqlwriter/src/main/resources/plugin.json new file mode 100755 index 00000000..91b7069f --- /dev/null +++ b/hbase20xsqlwriter/src/main/resources/plugin.json @@ -0,0 +1,7 @@ +{ + "name": "hbase20xsqlwriter", + "class": "com.alibaba.datax.plugin.writer.hbase20xsqlwriter.HBase20xSQLWriter", + "description": "useScene: prod. mechanism: use hbase sql UPSERT to put data, index tables will be updated too.", + "developer": "bake" +} + diff --git a/hbase20xsqlwriter/src/main/resources/plugin_job_template.json b/hbase20xsqlwriter/src/main/resources/plugin_job_template.json new file mode 100644 index 00000000..2cf634b8 --- /dev/null +++ b/hbase20xsqlwriter/src/main/resources/plugin_job_template.json @@ -0,0 +1,13 @@ + { + "name": "hbase20xsqlwriter", + "parameter": { + "queryServerAddress": "", + "table": "", + "serialization": "PROTOBUF", + "column": [ + ], + "batchSize": "100", + "nullMode": "skip", + "schema": "" + } +} \ No newline at end of file diff --git a/package.xml b/package.xml index ae07dd08..47e277b8 100755 --- a/package.xml +++ b/package.xml @@ -308,5 +308,19 @@ datax + + hbase20xsqlreader/target/datax/ + + **/*.* + + datax + + + hbase20xsqlwriter/target/datax/ + + **/*.* + + datax + diff --git a/pom.xml b/pom.xml index f8bc7434..f4d197ba 100755 --- a/pom.xml +++ b/pom.xml @@ -89,6 +89,8 @@ plugin-rdbms-util plugin-unstructured-storage-util + hbase20xsqlreader + hbase20xsqlwriter From 4d70b5ab86c28bebe79cc65fac65451b76571b55 Mon Sep 17 00:00:00 2001 From: asdf2014 Date: Fri, 26 Apr 2019 14:53:55 +0800 Subject: [PATCH 3/7] Add OpenTSDB reader and TSDB writer --- .gitignore | 161 +++++++++++++- opentsdbreader/doc/opentsdbreader.md | 209 ++++++++++++++++++ opentsdbreader/pom.xml | 156 +++++++++++++ opentsdbreader/src/main/assembly/package.xml | 35 +++ .../datax/plugin/reader/conn/CliQuery.java | 104 +++++++++ .../plugin/reader/conn/Connection4TSDB.java | 77 +++++++ .../plugin/reader/conn/DataPoint4TSDB.java | 68 ++++++ .../datax/plugin/reader/conn/DumpSeries.java | 96 ++++++++ .../reader/conn/OpenTSDBConnection.java | 78 +++++++ .../plugin/reader/conn/OpenTSDBDump.java | 48 ++++ .../reader/opentsdbreader/Constant.java | 14 ++ .../plugin/reader/opentsdbreader/Key.java | 17 ++ .../reader/opentsdbreader/OpenTSDBReader.java | 207 +++++++++++++++++ .../OpenTSDBReaderErrorCode.java | 40 ++++ .../datax/plugin/reader/util/HttpUtils.java | 68 ++++++ .../datax/plugin/reader/util/TSDBUtils.java | 68 ++++++ .../datax/plugin/reader/util/TimeUtils.java | 38 ++++ opentsdbreader/src/main/resources/plugin.json | 10 + .../main/resources/plugin_job_template.json | 11 + .../reader/conn/OpenTSDBConnectionTest.java | 30 +++ .../datax/plugin/reader/util/Const.java | 18 ++ .../plugin/reader/util/HttpUtilsTest.java | 39 ++++ .../datax/plugin/reader/util/TSDBTest.java | 28 +++ .../plugin/reader/util/TimeUtilsTest.java | 33 +++ package.xml | 14 ++ pom.xml | 2 + tsdbwriter/doc/tsdbhttpwriter.md | 187 ++++++++++++++++ tsdbwriter/pom.xml | 136 ++++++++++++ tsdbwriter/src/main/assembly/package.xml | 35 +++ .../plugin/writer/conn/Connection4TSDB.java | 85 +++++++ .../plugin/writer/conn/DataPoint4TSDB.java | 68 ++++++ .../plugin/writer/conn/TSDBConnection.java | 86 +++++++ .../plugin/writer/tsdbwriter/Constant.java | 16 ++ .../datax/plugin/writer/tsdbwriter/Key.java | 17 ++ .../plugin/writer/tsdbwriter/TSDBWriter.java | 171 ++++++++++++++ .../tsdbwriter/TSDBWriterErrorCode.java | 41 ++++ .../datax/plugin/writer/util/HttpUtils.java | 68 ++++++ .../datax/plugin/writer/util/TSDBUtils.java | 72 ++++++ tsdbwriter/src/main/resources/plugin.json | 10 + .../main/resources/plugin_job_template.json | 6 + .../writer/conn/TSDBConnectionTest.java | 30 +++ .../datax/plugin/writer/util/Const.java | 18 ++ .../plugin/writer/util/HttpUtilsTest.java | 39 ++++ .../datax/plugin/writer/util/TSDBTest.java | 28 +++ 44 files changed, 2776 insertions(+), 6 deletions(-) create mode 100644 opentsdbreader/doc/opentsdbreader.md create mode 100644 opentsdbreader/pom.xml create mode 100755 opentsdbreader/src/main/assembly/package.xml create mode 100644 opentsdbreader/src/main/java/com/alibaba/datax/plugin/reader/conn/CliQuery.java create mode 100644 opentsdbreader/src/main/java/com/alibaba/datax/plugin/reader/conn/Connection4TSDB.java create mode 100644 opentsdbreader/src/main/java/com/alibaba/datax/plugin/reader/conn/DataPoint4TSDB.java create mode 100644 opentsdbreader/src/main/java/com/alibaba/datax/plugin/reader/conn/DumpSeries.java create mode 100644 opentsdbreader/src/main/java/com/alibaba/datax/plugin/reader/conn/OpenTSDBConnection.java create mode 100644 opentsdbreader/src/main/java/com/alibaba/datax/plugin/reader/conn/OpenTSDBDump.java create mode 100644 opentsdbreader/src/main/java/com/alibaba/datax/plugin/reader/opentsdbreader/Constant.java create mode 100644 opentsdbreader/src/main/java/com/alibaba/datax/plugin/reader/opentsdbreader/Key.java create mode 100755 opentsdbreader/src/main/java/com/alibaba/datax/plugin/reader/opentsdbreader/OpenTSDBReader.java create mode 100755 opentsdbreader/src/main/java/com/alibaba/datax/plugin/reader/opentsdbreader/OpenTSDBReaderErrorCode.java create mode 100644 opentsdbreader/src/main/java/com/alibaba/datax/plugin/reader/util/HttpUtils.java create mode 100644 opentsdbreader/src/main/java/com/alibaba/datax/plugin/reader/util/TSDBUtils.java create mode 100644 opentsdbreader/src/main/java/com/alibaba/datax/plugin/reader/util/TimeUtils.java create mode 100755 opentsdbreader/src/main/resources/plugin.json create mode 100644 opentsdbreader/src/main/resources/plugin_job_template.json create mode 100644 opentsdbreader/src/test/java/com/alibaba/datax/plugin/reader/conn/OpenTSDBConnectionTest.java create mode 100644 opentsdbreader/src/test/java/com/alibaba/datax/plugin/reader/util/Const.java create mode 100644 opentsdbreader/src/test/java/com/alibaba/datax/plugin/reader/util/HttpUtilsTest.java create mode 100644 opentsdbreader/src/test/java/com/alibaba/datax/plugin/reader/util/TSDBTest.java create mode 100644 opentsdbreader/src/test/java/com/alibaba/datax/plugin/reader/util/TimeUtilsTest.java create mode 100644 tsdbwriter/doc/tsdbhttpwriter.md create mode 100644 tsdbwriter/pom.xml create mode 100755 tsdbwriter/src/main/assembly/package.xml create mode 100644 tsdbwriter/src/main/java/com/alibaba/datax/plugin/writer/conn/Connection4TSDB.java create mode 100644 tsdbwriter/src/main/java/com/alibaba/datax/plugin/writer/conn/DataPoint4TSDB.java create mode 100644 tsdbwriter/src/main/java/com/alibaba/datax/plugin/writer/conn/TSDBConnection.java create mode 100644 tsdbwriter/src/main/java/com/alibaba/datax/plugin/writer/tsdbwriter/Constant.java create mode 100755 tsdbwriter/src/main/java/com/alibaba/datax/plugin/writer/tsdbwriter/Key.java create mode 100755 tsdbwriter/src/main/java/com/alibaba/datax/plugin/writer/tsdbwriter/TSDBWriter.java create mode 100755 tsdbwriter/src/main/java/com/alibaba/datax/plugin/writer/tsdbwriter/TSDBWriterErrorCode.java create mode 100644 tsdbwriter/src/main/java/com/alibaba/datax/plugin/writer/util/HttpUtils.java create mode 100644 tsdbwriter/src/main/java/com/alibaba/datax/plugin/writer/util/TSDBUtils.java create mode 100755 tsdbwriter/src/main/resources/plugin.json create mode 100644 tsdbwriter/src/main/resources/plugin_job_template.json create mode 100644 tsdbwriter/src/test/java/com/alibaba/datax/plugin/writer/conn/TSDBConnectionTest.java create mode 100644 tsdbwriter/src/test/java/com/alibaba/datax/plugin/writer/util/Const.java create mode 100644 tsdbwriter/src/test/java/com/alibaba/datax/plugin/writer/util/HttpUtilsTest.java create mode 100644 tsdbwriter/src/test/java/com/alibaba/datax/plugin/writer/util/TSDBTest.java diff --git a/.gitignore b/.gitignore index fbfffba8..925cf0ab 100644 --- a/.gitignore +++ b/.gitignore @@ -1,8 +1,157 @@ -/target/ -.classpath -.project -.settings +# Created by .ignore support plugin (hsz.mobi) .DS_Store -/logs/ -.idea/ +.AppleDouble +.LSOverride +Icon +._* +.DocumentRevisions-V100 +.fseventsd +.Spotlight-V100 +.TemporaryItems +.Trashes +.VolumeIcon.icns +.com.apple.timemachine.donotpresent +.AppleDB +.AppleDesktop +Network Trash Folder +Temporary Items +.apdisk +*.class +*.log +*.ctxt +.mtj.tmp/ +*.jar +*.war +*.nar +*.ear +*.zip +*.tar.gz +*.rar +hs_err_pid* +.idea/**/workspace.xml +.idea/**/tasks.xml +.idea/**/dictionaries +.idea/**/shelf +.idea/**/dataSources/ +.idea/**/dataSources.ids +.idea/**/dataSources.local.xml +.idea/**/sqlDataSources.xml +.idea/**/dynamic.xml +.idea/**/uiDesigner.xml +.idea/**/dbnavigator.xml +.idea/**/gradle.xml +.idea/**/libraries +cmake-build-debug/ +cmake-build-release/ +.idea/**/mongoSettings.xml +*.iws +out/ +.idea_modules/ +atlassian-ide-plugin.xml +.idea/replstate.xml +com_crashlytics_export_strings.xml +crashlytics.properties +crashlytics-build.properties +fabric.properties +.idea/httpRequests +target/ +pom.xml.tag +pom.xml.releaseBackup +pom.xml.versionsBackup +pom.xml.next +release.properties +dependency-reduced-pom.xml +buildNumber.properties +.mvn/timing.properties +!/.mvn/wrapper/maven-wrapper.jar +.idea *.iml +out +gen### Python template +__pycache__/ +*.py[cod] +*$py.class +*.so +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST +*.manifest +*.spec +pip-log.txt +pip-delete-this-directory.txt +htmlcov/ +.tox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +.hypothesis/ +.pytest_cache/ +*.mo +*.pot +*.log +local_settings.py +db.sqlite3 +instance/ +.webassets-cache +.scrapy +docs/_build/ +target/ +.ipynb_checkpoints +.python-version +celerybeat-schedule +*.sage.py +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ +.spyderproject +.spyproject +.ropeproject +/site +.mypy_cache/ +.metadata +bin/ +tmp/ +*.tmp +*.bak +*.swp +*~.nib +local.properties +.settings/ +.loadpath +.recommenders +.externalToolBuilders/ +*.launch +*.pydevproject +.cproject +.autotools +.factorypath +.buildpath +.target +.tern-project +.texlipse +.springBeans +.recommenders/ +.cache-main +.scala_dependencies +.worksheet diff --git a/opentsdbreader/doc/opentsdbreader.md b/opentsdbreader/doc/opentsdbreader.md new file mode 100644 index 00000000..d9fe742b --- /dev/null +++ b/opentsdbreader/doc/opentsdbreader.md @@ -0,0 +1,209 @@ + +# OpenTSDBReader 插件文档 + +___ + + +## 1 快速介绍 + +OpenTSDBReader 插件实现了从 OpenTSDB 读取数据。OpenTSDB 是主要由 Yahoo 维护的、可扩展的、分布式时序数据库,与阿里巴巴自研 TSDB 的关系与区别详见阿里云官网:《[相比 OpenTSDB 优势](https://help.aliyun.com/document_detail/113368.html)》 + + + +## 2 实现原理 + +在底层实现上,OpenTSDBReader 通过 HTTP 请求链接到 OpenTSDB 实例,利用 `/api/config` 接口获取到其底层存储 HBase 的连接信息,再利用 AsyncHBase 框架连接 HBase,通过 Scan 的方式将数据点扫描出来。整个同步的过程通过 metric 和时间段进行切分,即某个 metric 在某一个小时内的数据迁移,组合成一个迁移 Task。 + + + +## 3 功能说明 + +### 3.1 配置样例 + +* 配置一个从 OpenTSDB 数据库同步抽取数据到本地的作业: + +```json +{ + "job": { + "content": [ + { + "reader": { + "name": "opentsdbreader", + "parameter": { + "endpoint": "http://localhost:4242", + "column": [ + "m" + ], + "beginDateTime": "2019-01-01 00:00:00", + "endDateTime": "2019-01-01 03:00:00" + } + }, + "writer": { + "name": "streamwriter", + "parameter": { + "encoding": "UTF-8", + "print": true + } + } + } + ], + "setting": { + "speed": { + "channel": 1 + } + } + } +} +``` + + + +### 3.2 参数说明 + +* **name** + * 描述:本插件的名称 + * 必选:是 + * 默认值:opentsdbreader + +* **parameter** + * **endpoint** + * 描述:OpenTSDB 的 HTTP 连接地址 + * 必选:是 + * 格式:http://IP:Port +* 默认值:无 + + * **column** + * 描述:数据迁移任务需要迁移的 Metric 列表 + * 必选:是 + * 默认值:无 + +* **beginDateTime** + * 描述:和 endDateTime 配合使用,用于指定哪个时间段内的数据点,需要被迁移 + * 必选:是 + * 格式:`yyyy-MM-dd HH:mm:ss` + * 默认值:无 + * 注意:指定起止时间会自动忽略分钟和秒,转为整点时刻,例如 2019-4-18 的 [3:35, 4:55) 会被转为 [3:00, 4:00) + +* **endDateTime** + * 描述:和 beginDateTime 配合使用,用于指定哪个时间段内的数据点,需要被迁移 + * 必选:是 + * 格式:`yyyy-MM-dd HH:mm:ss` + * 默认值:无 + * 注意:指定起止时间会自动忽略分钟和秒,转为整点时刻,例如 2019-4-18 的 [3:35, 4:55) 会被转为 [3:00, 4:00) + + + + +### 3.3 类型转换 + +| DataX 内部类型 | TSDB 数据类型 | +| -------------- | ------------------------------------------------------------ | +| String | TSDB 数据点序列化字符串,包括 timestamp、metric、tags 和 value | + + + + + +## 4 性能报告 + +### 4.1 环境准备 + +#### 4.1.1 数据特征 +从 Metric、时间线、Value 和 采集周期 四个方面来描述: + +##### metric + +固定指定一个 metric 为 `m`。 + +##### tagkv + +前四个 tagkv 全排列,形成 `10 * 20 * 100 * 100 = 2000000` 条时间线,最后 IP 对应 2000000 条时间线从 1 开始自增。 + +| **tag_k** | **tag_v** | +| --------- | ------------- | +| zone | z1~z10 | +| cluster | c1~c20 | +| group | g1~100 | +| app | a1~a100 | +| ip | ip1~ip2000000 | + +##### value + +度量值为 [1, 100] 区间内的随机值 + +##### interval + +采集周期为 10 秒,持续摄入 3 小时,总数据量为 `3 * 60 * 60 / 10 * 2000000 = 2,160,000,000` 个数据点。 + + + +#### 4.1.2 机器参数 + +OpenTSDB Reader 机型: 64C256G + +HBase 机型: 8C16G * 5 + + +#### 4.1.3 DataX jvm 参数 + +"-Xms4096m -Xmx4096m" + + + + +### 4.2 测试报告 + + +| 通道数| DataX 速度 (Rec/s) |DataX 流量 (MB/s)| +|--------| --------|--------| +|1| 215428 | 25.65 | +|2| 424994 | 50.60 | +|3| 603132 | 71.81 | + + + + + + +## 5 约束限制 + +### 5.1 需要确保与 OpenTSDB 底层存储的网络是连通的 + +具体缘由详见 6.1 + + + +### 5.2 如果存在某一个 Metric 下在一个小时范围内的数据量过大,可能需要通过 `-j` 参数调整 JVM 内存大小 + +考虑到下游 Writer 如果写入速度不及 OpenTSDB reader 的查询数据,可能会存在积压的情况,因此需要适当地调整 JVM 参数。以"从 OpenTSDB 数据库同步抽取数据到本地的作业"为例,启动命令如下: + +```bash + python datax/bin/datax.py opentsdb2stream.json -j "-Xms4096m -Xmx4096m" +``` + + + +### 5.3 指定起止时间会自动被转为整点时刻 + +指定起止时间会自动被转为整点时刻,例如 2019-4-18 的 `[3:35, 3:55)` 会被转为 `[3:00, 4:00)` + + + +### 5.4 目前只支持兼容 OpenTSDB 2.3.x + +其他版本暂不保证兼容 + + + + + +## 6 FAQ + +*** + +**Q:为什么需要连接 OpenTSDB 的底层存储,为什么不直接使用 `/api/query` 查询获取数据点?** + +A:因为通过 OpenTSDB 的 HTTP 接口(`/api/query`)来读取数据的话,经内部压测发现,在大数据量的情况下,会导致 OpenTSDB 的异步框架会报 CallBack 过多的问题;所以,采用了直连底层 HBase 存储,通过 Scan 的方式来扫描数据点,来避免这个问题。另外,还考虑到,可以通过指定 metric 和时间范围,可以顺序地 Scan HBase 表,提高查询效率。 + + + diff --git a/opentsdbreader/pom.xml b/opentsdbreader/pom.xml new file mode 100644 index 00000000..bcbf8414 --- /dev/null +++ b/opentsdbreader/pom.xml @@ -0,0 +1,156 @@ + + + 4.0.0 + + + com.alibaba.datax + datax-all + 0.0.1-SNAPSHOT + + + opentsdbreader + opentsdbreader + jar + + + UTF-8 + + + 3.3.2 + + + 4.4 + 2.4 + + + 1.2.28 + + + 2.3.2 + + + 4.12 + + + 2.9.9 + + + + + com.alibaba.datax + datax-common + ${datax-project-version} + + + slf4j-log4j12 + org.slf4j + + + fastjson + com.alibaba + + + commons-math3 + org.apache.commons + + + + + org.slf4j + slf4j-api + + + ch.qos.logback + logback-classic + + + + + org.apache.commons + commons-lang3 + ${commons-lang3.version} + + + + + org.apache.httpcomponents + httpclient + ${httpclient.version} + + + commons-io + commons-io + ${commons-io.version} + + + org.apache.httpcomponents + fluent-hc + ${httpclient.version} + + + + + com.alibaba + fastjson + ${fastjson.version} + + + + + net.opentsdb + opentsdb + ${opentsdb.version} + + + + + joda-time + joda-time + ${joda-time.version} + + + + + junit + junit + ${junit4.version} + test + + + + + + + + maven-compiler-plugin + + 1.6 + 1.6 + ${project-sourceEncoding} + + + + + + maven-assembly-plugin + + + src/main/assembly/package.xml + + datax + + + + dwzip + package + + single + + + + + + + diff --git a/opentsdbreader/src/main/assembly/package.xml b/opentsdbreader/src/main/assembly/package.xml new file mode 100755 index 00000000..f4ac3b4b --- /dev/null +++ b/opentsdbreader/src/main/assembly/package.xml @@ -0,0 +1,35 @@ + + + + dir + + false + + + src/main/resources + + plugin.json + plugin_job_template.json + + plugin/reader/opentsdbreader + + + target/ + + opentsdbreader-0.0.1-SNAPSHOT.jar + + plugin/reader/opentsdbreader + + + + + + false + plugin/reader/opentsdbreader/libs + runtime + + + diff --git a/opentsdbreader/src/main/java/com/alibaba/datax/plugin/reader/conn/CliQuery.java b/opentsdbreader/src/main/java/com/alibaba/datax/plugin/reader/conn/CliQuery.java new file mode 100644 index 00000000..fe8dce2b --- /dev/null +++ b/opentsdbreader/src/main/java/com/alibaba/datax/plugin/reader/conn/CliQuery.java @@ -0,0 +1,104 @@ +package com.alibaba.datax.plugin.reader.conn; + +import net.opentsdb.core.*; +import net.opentsdb.utils.DateTime; + +import java.util.ArrayList; +import java.util.HashMap; + +/** + * Copyright @ 2019 alibaba.com + * All right reserved. + * Function:CliQuery + * + * @author Benedict Jin + * @since 2019-04-17 + */ +final class CliQuery { + + /** + * Parses the query from the command lines. + * + * @param args The command line arguments. + * @param tsdb The TSDB to use. + * @param queries The list in which {@link Query}s will be appended. + */ + static void parseCommandLineQuery(final String[] args, + final TSDB tsdb, + final ArrayList queries) { + long start_ts = DateTime.parseDateTimeString(args[0], null); + if (start_ts >= 0) { + start_ts /= 1000; + } + long end_ts = -1; + if (args.length > 3) { + // see if we can detect an end time + try { + if (args[1].charAt(0) != '+' && (args[1].indexOf(':') >= 0 + || args[1].indexOf('/') >= 0 || args[1].indexOf('-') >= 0 + || Long.parseLong(args[1]) > 0)) { + end_ts = DateTime.parseDateTimeString(args[1], null); + } + } catch (NumberFormatException ignore) { + // ignore it as it means the third parameter is likely the aggregator + } + } + // temp fixup to seconds from ms until the rest of TSDB supports ms + // Note you can't append this to the DateTime.parseDateTimeString() call as + // it clobbers -1 results + if (end_ts >= 0) { + end_ts /= 1000; + } + + int i = end_ts < 0 ? 1 : 2; + while (i < args.length && args[i].charAt(0) == '+') { + i++; + } + + while (i < args.length) { + final Aggregator agg = Aggregators.get(args[i++]); + final boolean rate = "rate".equals(args[i]); + RateOptions rate_options = new RateOptions(false, Long.MAX_VALUE, + RateOptions.DEFAULT_RESET_VALUE); + if (rate) { + i++; + + long counterMax = Long.MAX_VALUE; + long resetValue = RateOptions.DEFAULT_RESET_VALUE; + if (args[i].startsWith("counter")) { + String[] parts = Tags.splitString(args[i], ','); + if (parts.length >= 2 && parts[1].length() > 0) { + counterMax = Long.parseLong(parts[1]); + } + if (parts.length >= 3 && parts[2].length() > 0) { + resetValue = Long.parseLong(parts[2]); + } + rate_options = new RateOptions(true, counterMax, resetValue); + i++; + } + } + final boolean downsample = "downsample".equals(args[i]); + if (downsample) { + i++; + } + final long interval = downsample ? Long.parseLong(args[i++]) : 0; + final Aggregator sampler = downsample ? Aggregators.get(args[i++]) : null; + final String metric = args[i++]; + final HashMap tags = new HashMap(); + while (i < args.length && args[i].indexOf(' ', 1) < 0 + && args[i].indexOf('=', 1) > 0) { + Tags.parse(tags, args[i++]); + } + final Query query = tsdb.newQuery(); + query.setStartTime(start_ts); + if (end_ts > 0) { + query.setEndTime(end_ts); + } + query.setTimeSeries(metric, tags, agg, rate, rate_options); + if (downsample) { + query.downsample(interval, sampler); + } + queries.add(query); + } + } +} diff --git a/opentsdbreader/src/main/java/com/alibaba/datax/plugin/reader/conn/Connection4TSDB.java b/opentsdbreader/src/main/java/com/alibaba/datax/plugin/reader/conn/Connection4TSDB.java new file mode 100644 index 00000000..97a841cf --- /dev/null +++ b/opentsdbreader/src/main/java/com/alibaba/datax/plugin/reader/conn/Connection4TSDB.java @@ -0,0 +1,77 @@ +package com.alibaba.datax.plugin.reader.conn; + +import com.alibaba.datax.common.plugin.RecordSender; + +import java.util.List; + +/** + * Copyright @ 2019 alibaba.com + * All right reserved. + * Function:Connection for TSDB-like databases + * + * @author Benedict Jin + * @since 2019-03-29 + */ +public interface Connection4TSDB { + + /** + * Get the address of Database. + * + * @return host+ip + */ + String address(); + + /** + * Get the version of Database. + * + * @return version + */ + String version(); + + /** + * Get these configurations. + * + * @return configs + */ + String config(); + + /** + * Get the list of supported version. + * + * @return version list + */ + String[] getSupportVersionPrefix(); + + /** + * Send data points by metric & start time & end time. + * + * @param metric metric + * @param start startTime + * @param end endTime + * @param recordSender sender + */ + void sendDPs(String metric, Long start, Long end, RecordSender recordSender) throws Exception; + + /** + * Put data point. + * + * @param dp data point + * @return whether the data point is written successfully + */ + boolean put(DataPoint4TSDB dp); + + /** + * Put data points. + * + * @param dps data points + * @return whether the data point is written successfully + */ + boolean put(List dps); + + /** + * Whether current version is supported. + * + * @return true: supported; false: not yet! + */ + boolean isSupported(); +} diff --git a/opentsdbreader/src/main/java/com/alibaba/datax/plugin/reader/conn/DataPoint4TSDB.java b/opentsdbreader/src/main/java/com/alibaba/datax/plugin/reader/conn/DataPoint4TSDB.java new file mode 100644 index 00000000..1f690245 --- /dev/null +++ b/opentsdbreader/src/main/java/com/alibaba/datax/plugin/reader/conn/DataPoint4TSDB.java @@ -0,0 +1,68 @@ +package com.alibaba.datax.plugin.reader.conn; + +import com.alibaba.fastjson.JSON; + +import java.util.Map; + +/** + * Copyright @ 2019 alibaba.com + * All right reserved. + * Function:DataPoint for TSDB + * + * @author Benedict Jin + * @since 2019-04-10 + */ +public class DataPoint4TSDB { + + private long timestamp; + private String metric; + private Map tags; + private Object value; + + public DataPoint4TSDB() { + } + + public DataPoint4TSDB(long timestamp, String metric, Map tags, Object value) { + this.timestamp = timestamp; + this.metric = metric; + this.tags = tags; + this.value = value; + } + + public long getTimestamp() { + return timestamp; + } + + public void setTimestamp(long timestamp) { + this.timestamp = timestamp; + } + + public String getMetric() { + return metric; + } + + public void setMetric(String metric) { + this.metric = metric; + } + + public Map getTags() { + return tags; + } + + public void setTags(Map tags) { + this.tags = tags; + } + + public Object getValue() { + return value; + } + + public void setValue(Object value) { + this.value = value; + } + + @Override + public String toString() { + return JSON.toJSONString(this); + } +} diff --git a/opentsdbreader/src/main/java/com/alibaba/datax/plugin/reader/conn/DumpSeries.java b/opentsdbreader/src/main/java/com/alibaba/datax/plugin/reader/conn/DumpSeries.java new file mode 100644 index 00000000..56ab0bc2 --- /dev/null +++ b/opentsdbreader/src/main/java/com/alibaba/datax/plugin/reader/conn/DumpSeries.java @@ -0,0 +1,96 @@ +package com.alibaba.datax.plugin.reader.conn; + +import com.alibaba.datax.common.element.Record; +import com.alibaba.datax.common.element.StringColumn; +import com.alibaba.datax.common.plugin.RecordSender; +import net.opentsdb.core.*; +import net.opentsdb.core.Internal.Cell; +import org.hbase.async.KeyValue; +import org.hbase.async.Scanner; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.*; + +/** + * Copyright @ 2019 alibaba.com + * All right reserved. + * Function:Tool to dump the data straight from HBase + * + * @author Benedict Jin + * @since 2019-04-17 + */ +final class DumpSeries { + + private static final Logger LOG = LoggerFactory.getLogger(DumpSeries.class); + + /** + * Dump all data points with special metric and time range, then send them all by {@link RecordSender}. + */ + static void doDump(TSDB tsdb, String[] args, RecordSender sender) throws Exception { + final ArrayList queries = new ArrayList(); + CliQuery.parseCommandLineQuery(args, tsdb, queries); + + List dps = new LinkedList(); + for (final Query query : queries) { + final List scanners = Internal.getScanners(query); + for (Scanner scanner : scanners) { + ArrayList> rows; + while ((rows = scanner.nextRows().join()) != null) { + for (final ArrayList row : rows) { + final byte[] key = row.get(0).key(); + final long baseTime = Internal.baseTime(tsdb, key); + final String metric = Internal.metricName(tsdb, key); + for (final KeyValue kv : row) { + formatKeyValue(dps, tsdb, kv, baseTime, metric); + for (DataPoint4TSDB dp : dps) { + StringColumn tsdbColumn = new StringColumn(dp.toString()); + Record record = sender.createRecord(); + record.addColumn(tsdbColumn); + sender.sendToWriter(record); + } + dps.clear(); + } + } + } + } + } + } + + /** + * Parse KeyValue into data points. + */ + private static void formatKeyValue(final List dps, final TSDB tsdb, + final KeyValue kv, final long baseTime, final String metric) { + Map tagKVs = Internal.getTags(tsdb, kv.key()); + + final byte[] qualifier = kv.qualifier(); + final int q_len = qualifier.length; + + if (!AppendDataPoints.isAppendDataPoints(qualifier) && q_len % 2 != 0) { + // custom data object, not a data point + if (LOG.isDebugEnabled()) { + LOG.debug("Not a data point"); + } + } else if (q_len == 2 || q_len == 4 && Internal.inMilliseconds(qualifier)) { + // regular data point + final Cell cell = Internal.parseSingleValue(kv); + if (cell == null) { + throw new IllegalDataException("Unable to parse row: " + kv); + } + dps.add(new DataPoint4TSDB(cell.absoluteTimestamp(baseTime), metric, tagKVs, cell.parseValue())); + } else { + final Collection cells; + if (q_len == 3) { + // append data points + cells = new AppendDataPoints().parseKeyValue(tsdb, kv); + } else { + // compacted column + cells = Internal.extractDataPoints(kv); + } + for (Cell cell : cells) { + dps.add(new DataPoint4TSDB(cell.absoluteTimestamp(baseTime), metric, tagKVs, cell.parseValue())); + } + } + } +} diff --git a/opentsdbreader/src/main/java/com/alibaba/datax/plugin/reader/conn/OpenTSDBConnection.java b/opentsdbreader/src/main/java/com/alibaba/datax/plugin/reader/conn/OpenTSDBConnection.java new file mode 100644 index 00000000..9e7f12c9 --- /dev/null +++ b/opentsdbreader/src/main/java/com/alibaba/datax/plugin/reader/conn/OpenTSDBConnection.java @@ -0,0 +1,78 @@ +package com.alibaba.datax.plugin.reader.conn; + +import com.alibaba.datax.common.plugin.RecordSender; +import com.alibaba.datax.plugin.reader.util.TSDBUtils; +import com.alibaba.fastjson.JSON; +import org.apache.commons.lang3.StringUtils; + +import java.util.List; + +/** + * Copyright @ 2019 alibaba.com + * All right reserved. + * Function:OpenTSDB Connection + * + * @author Benedict Jin + * @since 2019-03-29 + */ +public class OpenTSDBConnection implements Connection4TSDB { + + private String address; + + public OpenTSDBConnection(String address) { + this.address = address; + } + + @Override + public String address() { + return address; + } + + @Override + public String version() { + return TSDBUtils.version(address); + } + + @Override + public String config() { + return TSDBUtils.config(address); + } + + @Override + public String[] getSupportVersionPrefix() { + return new String[]{"2.3"}; + } + + @Override + public void sendDPs(String metric, Long start, Long end, RecordSender recordSender) throws Exception { + OpenTSDBDump.dump(this, metric, start, end, recordSender); + } + + @Override + public boolean put(DataPoint4TSDB dp) { + return false; + } + + @Override + public boolean put(List dps) { + return false; + } + + @Override + public boolean isSupported() { + String versionJson = version(); + if (StringUtils.isBlank(versionJson)) { + throw new RuntimeException("Cannot get the version!"); + } + String version = JSON.parseObject(versionJson).getString("version"); + if (StringUtils.isBlank(version)) { + return false; + } + for (String prefix : getSupportVersionPrefix()) { + if (version.startsWith(prefix)) { + return true; + } + } + return false; + } +} diff --git a/opentsdbreader/src/main/java/com/alibaba/datax/plugin/reader/conn/OpenTSDBDump.java b/opentsdbreader/src/main/java/com/alibaba/datax/plugin/reader/conn/OpenTSDBDump.java new file mode 100644 index 00000000..5ed0a314 --- /dev/null +++ b/opentsdbreader/src/main/java/com/alibaba/datax/plugin/reader/conn/OpenTSDBDump.java @@ -0,0 +1,48 @@ +package com.alibaba.datax.plugin.reader.conn; + +import com.alibaba.datax.common.plugin.RecordSender; +import com.alibaba.fastjson.JSON; +import net.opentsdb.core.TSDB; +import net.opentsdb.utils.Config; + +import java.util.Map; + +/** + * Copyright @ 2019 alibaba.com + * All right reserved. + * Function:OpenTSDB Dump + * + * @author Benedict Jin + * @since 2019-04-15 + */ +final class OpenTSDBDump { + + private static TSDB TSDB_INSTANCE; + + private OpenTSDBDump() { + } + + static void dump(OpenTSDBConnection conn, String metric, Long start, Long end, RecordSender sender) throws Exception { + DumpSeries.doDump(getTSDB(conn), new String[]{start + "", end + "", "none", metric}, sender); + } + + private static TSDB getTSDB(OpenTSDBConnection conn) { + if (TSDB_INSTANCE == null) { + synchronized (TSDB.class) { + if (TSDB_INSTANCE == null) { + try { + Config config = new Config(false); + Map configurations = JSON.parseObject(conn.config(), Map.class); + for (Object key : configurations.keySet()) { + config.overrideConfig(key.toString(), configurations.get(key.toString()).toString()); + } + TSDB_INSTANCE = new TSDB(config); + } catch (Exception e) { + throw new RuntimeException("Cannot init OpenTSDB connection!"); + } + } + } + } + return TSDB_INSTANCE; + } +} diff --git a/opentsdbreader/src/main/java/com/alibaba/datax/plugin/reader/opentsdbreader/Constant.java b/opentsdbreader/src/main/java/com/alibaba/datax/plugin/reader/opentsdbreader/Constant.java new file mode 100644 index 00000000..6017d4e5 --- /dev/null +++ b/opentsdbreader/src/main/java/com/alibaba/datax/plugin/reader/opentsdbreader/Constant.java @@ -0,0 +1,14 @@ +package com.alibaba.datax.plugin.reader.opentsdbreader; + +/** + * Copyright @ 2019 alibaba.com + * All right reserved. + * Function:Key + * + * @author Benedict Jin + * @since 2019-04-18 + */ +public final class Constant { + + static final String DEFAULT_DATA_FORMAT = "yyyy-MM-dd HH:mm:ss"; +} diff --git a/opentsdbreader/src/main/java/com/alibaba/datax/plugin/reader/opentsdbreader/Key.java b/opentsdbreader/src/main/java/com/alibaba/datax/plugin/reader/opentsdbreader/Key.java new file mode 100644 index 00000000..5b8c4adc --- /dev/null +++ b/opentsdbreader/src/main/java/com/alibaba/datax/plugin/reader/opentsdbreader/Key.java @@ -0,0 +1,17 @@ +package com.alibaba.datax.plugin.reader.opentsdbreader; + +/** + * Copyright @ 2019 alibaba.com + * All right reserved. + * Function:Key + * + * @author Benedict Jin + * @since 2019-04-18 + */ +public class Key { + + static final String ENDPOINT = "endpoint"; + static final String COLUMN = "column"; + static final String BEGIN_DATE_TIME = "beginDateTime"; + static final String END_DATE_TIME = "endDateTime"; +} diff --git a/opentsdbreader/src/main/java/com/alibaba/datax/plugin/reader/opentsdbreader/OpenTSDBReader.java b/opentsdbreader/src/main/java/com/alibaba/datax/plugin/reader/opentsdbreader/OpenTSDBReader.java new file mode 100755 index 00000000..d57456d1 --- /dev/null +++ b/opentsdbreader/src/main/java/com/alibaba/datax/plugin/reader/opentsdbreader/OpenTSDBReader.java @@ -0,0 +1,207 @@ +package com.alibaba.datax.plugin.reader.opentsdbreader; + +import com.alibaba.datax.common.exception.DataXException; +import com.alibaba.datax.common.plugin.RecordSender; +import com.alibaba.datax.common.spi.Reader; +import com.alibaba.datax.common.util.Configuration; +import com.alibaba.datax.plugin.reader.conn.OpenTSDBConnection; +import com.alibaba.datax.plugin.reader.util.TimeUtils; +import com.alibaba.fastjson.JSON; +import org.apache.commons.lang3.StringUtils; +import org.joda.time.DateTime; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.text.ParseException; +import java.text.SimpleDateFormat; +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; + +/** + * Copyright @ 2019 alibaba.com + * All right reserved. + * Function:Key + * + * @author Benedict Jin + * @since 2019-04-18 + */ +@SuppressWarnings("unused") +public class OpenTSDBReader extends Reader { + + public static class Job extends Reader.Job { + + private static final Logger LOG = LoggerFactory.getLogger(Job.class); + + private Configuration originalConfig; + + @Override + public void init() { + this.originalConfig = super.getPluginJobConf(); + + String address = originalConfig.getString(Key.ENDPOINT); + if (StringUtils.isBlank(address)) { + throw DataXException.asDataXException( + OpenTSDBReaderErrorCode.REQUIRED_VALUE, + "The parameter [" + Key.ENDPOINT + "] is not set."); + } + + List columns = originalConfig.getList(Key.COLUMN, String.class); + if (columns == null || columns.isEmpty()) { + throw DataXException.asDataXException( + OpenTSDBReaderErrorCode.REQUIRED_VALUE, + "The parameter [" + Key.COLUMN + "] is not set."); + } + + SimpleDateFormat format = new SimpleDateFormat(Constant.DEFAULT_DATA_FORMAT); + String startTime = originalConfig.getString(Key.BEGIN_DATE_TIME); + Long startDate; + if (startTime == null || startTime.trim().length() == 0) { + throw DataXException.asDataXException( + OpenTSDBReaderErrorCode.REQUIRED_VALUE, + "The parameter [" + Key.BEGIN_DATE_TIME + "] is not set."); + } else { + try { + startDate = format.parse(startTime).getTime(); + } catch (ParseException e) { + throw DataXException.asDataXException(OpenTSDBReaderErrorCode.ILLEGAL_VALUE, + "The parameter [" + Key.BEGIN_DATE_TIME + + "] needs to conform to the [" + Constant.DEFAULT_DATA_FORMAT + "] format."); + } + } + String endTime = originalConfig.getString(Key.END_DATE_TIME); + Long endDate; + if (endTime == null || endTime.trim().length() == 0) { + throw DataXException.asDataXException( + OpenTSDBReaderErrorCode.REQUIRED_VALUE, + "The parameter [" + Key.END_DATE_TIME + "] is not set."); + } else { + try { + endDate = format.parse(endTime).getTime(); + } catch (ParseException e) { + throw DataXException.asDataXException(OpenTSDBReaderErrorCode.ILLEGAL_VALUE, + "The parameter [" + Key.END_DATE_TIME + + "] needs to conform to the [" + Constant.DEFAULT_DATA_FORMAT + "] format."); + } + } + if (startDate >= endDate) { + throw DataXException.asDataXException(OpenTSDBReaderErrorCode.ILLEGAL_VALUE, + "The parameter [" + Key.BEGIN_DATE_TIME + + "] should be less than the parameter [" + Key.END_DATE_TIME + "]."); + } + } + + @Override + public void prepare() { + } + + @Override + public List split(int adviceNumber) { + List configurations = new ArrayList(); + + // get metrics + List columns = originalConfig.getList(Key.COLUMN, String.class); + + // get time range + SimpleDateFormat format = new SimpleDateFormat(Constant.DEFAULT_DATA_FORMAT); + long startTime; + try { + startTime = format.parse(originalConfig.getString(Key.BEGIN_DATE_TIME)).getTime(); + } catch (ParseException e) { + throw DataXException.asDataXException( + OpenTSDBReaderErrorCode.ILLEGAL_VALUE, "解析[" + Key.BEGIN_DATE_TIME + "]失败.", e); + } + long endTime; + try { + endTime = format.parse(originalConfig.getString(Key.END_DATE_TIME)).getTime(); + } catch (ParseException e) { + throw DataXException.asDataXException( + OpenTSDBReaderErrorCode.ILLEGAL_VALUE, "解析[" + Key.END_DATE_TIME + "]失败.", e); + } + if (TimeUtils.isSecond(startTime)) { + startTime *= 1000; + } + if (TimeUtils.isSecond(endTime)) { + endTime *= 1000; + } + DateTime startDateTime = new DateTime(TimeUtils.getTimeInHour(startTime)); + DateTime endDateTime = new DateTime(TimeUtils.getTimeInHour(endTime)); + + // split by metric + for (String column : columns) { + // split by time in hour + while (startDateTime.isBefore(endDateTime)) { + Configuration clone = this.originalConfig.clone(); + clone.set(Key.COLUMN, Collections.singletonList(column)); + + clone.set(Key.BEGIN_DATE_TIME, startDateTime.getMillis()); + startDateTime = startDateTime.plusHours(1); + // Make sure the time interval is [start, end). + // Because net.opentsdb.core.Query.setEndTime means less than or equal to the end time. + clone.set(Key.END_DATE_TIME, startDateTime.getMillis() - 1); + configurations.add(clone); + + LOG.info("Configuration: {}", JSON.toJSONString(clone)); + } + } + return configurations; + } + + @Override + public void post() { + } + + @Override + public void destroy() { + } + } + + public static class Task extends Reader.Task { + + private static final Logger LOG = LoggerFactory.getLogger(Task.class); + + private List columns; + private OpenTSDBConnection conn; + private Long startTime; + private Long endTime; + + @Override + public void init() { + Configuration readerSliceConfig = super.getPluginJobConf(); + + LOG.info("getPluginJobConf: {}", JSON.toJSONString(readerSliceConfig)); + + this.columns = readerSliceConfig.getList(Key.COLUMN, String.class); + String address = readerSliceConfig.getString(Key.ENDPOINT); + + conn = new OpenTSDBConnection(address); + + this.startTime = readerSliceConfig.getLong(Key.BEGIN_DATE_TIME); + this.endTime = readerSliceConfig.getLong(Key.END_DATE_TIME); + } + + @Override + public void prepare() { + } + + @Override + public void startRead(RecordSender recordSender) { + try { + for (String column : columns) { + conn.sendDPs(column, this.startTime, this.endTime, recordSender); + } + } catch (Exception e) { + throw DataXException.asDataXException( + OpenTSDBReaderErrorCode.ILLEGAL_VALUE, "获取或发送数据点的过程中出错!", e); + } + } + + @Override + public void post() { + } + + @Override + public void destroy() { + } + } +} diff --git a/opentsdbreader/src/main/java/com/alibaba/datax/plugin/reader/opentsdbreader/OpenTSDBReaderErrorCode.java b/opentsdbreader/src/main/java/com/alibaba/datax/plugin/reader/opentsdbreader/OpenTSDBReaderErrorCode.java new file mode 100755 index 00000000..0d9de4c4 --- /dev/null +++ b/opentsdbreader/src/main/java/com/alibaba/datax/plugin/reader/opentsdbreader/OpenTSDBReaderErrorCode.java @@ -0,0 +1,40 @@ +package com.alibaba.datax.plugin.reader.opentsdbreader; + +import com.alibaba.datax.common.spi.ErrorCode; + +/** + * Copyright @ 2019 alibaba.com + * All right reserved. + * Function:OpenTSDB Reader Error Code + * + * @author Benedict Jin + * @since 2019-04-18 + */ +public enum OpenTSDBReaderErrorCode implements ErrorCode { + + REQUIRED_VALUE("OpenTSDBReader-00", "缺失必要的值"), + ILLEGAL_VALUE("OpenTSDBReader-01", "值非法"); + + private final String code; + private final String description; + + OpenTSDBReaderErrorCode(String code, String description) { + this.code = code; + this.description = description; + } + + @Override + public String getCode() { + return this.code; + } + + @Override + public String getDescription() { + return this.description; + } + + @Override + public String toString() { + return String.format("Code:[%s], Description:[%s]. ", this.code, this.description); + } +} diff --git a/opentsdbreader/src/main/java/com/alibaba/datax/plugin/reader/util/HttpUtils.java b/opentsdbreader/src/main/java/com/alibaba/datax/plugin/reader/util/HttpUtils.java new file mode 100644 index 00000000..cdf5c9c1 --- /dev/null +++ b/opentsdbreader/src/main/java/com/alibaba/datax/plugin/reader/util/HttpUtils.java @@ -0,0 +1,68 @@ +package com.alibaba.datax.plugin.reader.util; + +import com.alibaba.fastjson.JSON; +import org.apache.http.client.fluent.Content; +import org.apache.http.client.fluent.Request; +import org.apache.http.entity.ContentType; + +import java.nio.charset.Charset; +import java.util.Map; +import java.util.concurrent.TimeUnit; + +/** + * Copyright @ 2019 alibaba.com + * All right reserved. + * Function:HttpUtils + * + * @author Benedict Jin + * @since 2019-03-29 + */ +public final class HttpUtils { + + public final static Charset UTF_8 = Charset.forName("UTF-8"); + public final static int CONNECT_TIMEOUT_DEFAULT_IN_MILL = (int) TimeUnit.SECONDS.toMillis(60); + public final static int SOCKET_TIMEOUT_DEFAULT_IN_MILL = (int) TimeUnit.SECONDS.toMillis(60); + + private HttpUtils() { + } + + public static String get(String url) throws Exception { + Content content = Request.Get(url) + .connectTimeout(CONNECT_TIMEOUT_DEFAULT_IN_MILL) + .socketTimeout(SOCKET_TIMEOUT_DEFAULT_IN_MILL) + .execute() + .returnContent(); + if (content == null) { + return null; + } + return content.asString(UTF_8); + } + + public static String post(String url, Map params) throws Exception { + return post(url, JSON.toJSONString(params), CONNECT_TIMEOUT_DEFAULT_IN_MILL, SOCKET_TIMEOUT_DEFAULT_IN_MILL); + } + + public static String post(String url, String params) throws Exception { + return post(url, params, CONNECT_TIMEOUT_DEFAULT_IN_MILL, SOCKET_TIMEOUT_DEFAULT_IN_MILL); + } + + public static String post(String url, Map params, + int connectTimeoutInMill, int socketTimeoutInMill) throws Exception { + return post(url, JSON.toJSONString(params), connectTimeoutInMill, socketTimeoutInMill); + } + + public static String post(String url, String params, + int connectTimeoutInMill, int socketTimeoutInMill) throws Exception { + Content content = Request.Post(url) + .connectTimeout(connectTimeoutInMill) + .socketTimeout(socketTimeoutInMill) + .addHeader("Content-Type", "application/json") + .bodyString(params, ContentType.APPLICATION_JSON) + .execute() + .returnContent(); + if (content == null) { + return null; + } + return content.asString(UTF_8); + } +} diff --git a/opentsdbreader/src/main/java/com/alibaba/datax/plugin/reader/util/TSDBUtils.java b/opentsdbreader/src/main/java/com/alibaba/datax/plugin/reader/util/TSDBUtils.java new file mode 100644 index 00000000..72c7fd62 --- /dev/null +++ b/opentsdbreader/src/main/java/com/alibaba/datax/plugin/reader/util/TSDBUtils.java @@ -0,0 +1,68 @@ +package com.alibaba.datax.plugin.reader.util; + +import com.alibaba.datax.plugin.reader.conn.DataPoint4TSDB; +import com.alibaba.fastjson.JSON; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.List; + +/** + * Copyright @ 2019 alibaba.com + * All right reserved. + * Function:TSDB Utils + * + * @author Benedict Jin + * @since 2019-03-29 + */ +public final class TSDBUtils { + + private static final Logger LOG = LoggerFactory.getLogger(TSDBUtils.class); + + private TSDBUtils() { + } + + public static String version(String address) { + String url = String.format("%s/api/version", address); + String rsp; + try { + rsp = HttpUtils.get(url); + } catch (Exception e) { + throw new RuntimeException(e); + } + return rsp; + } + + public static String config(String address) { + String url = String.format("%s/api/config", address); + String rsp; + try { + rsp = HttpUtils.get(url); + } catch (Exception e) { + throw new RuntimeException(e); + } + return rsp; + } + + public static boolean put(String address, List dps) { + return put(address, JSON.toJSON(dps)); + } + + public static boolean put(String address, DataPoint4TSDB dp) { + return put(address, JSON.toJSON(dp)); + } + + private static boolean put(String address, Object o) { + String url = String.format("%s/api/put", address); + String rsp; + try { + rsp = HttpUtils.post(url, o.toString()); + // If successful, the returned content should be null. + assert rsp == null; + } catch (Exception e) { + LOG.error("Address: {}, DataPoints: {}", url, o); + throw new RuntimeException(e); + } + return true; + } +} diff --git a/opentsdbreader/src/main/java/com/alibaba/datax/plugin/reader/util/TimeUtils.java b/opentsdbreader/src/main/java/com/alibaba/datax/plugin/reader/util/TimeUtils.java new file mode 100644 index 00000000..9bc11b36 --- /dev/null +++ b/opentsdbreader/src/main/java/com/alibaba/datax/plugin/reader/util/TimeUtils.java @@ -0,0 +1,38 @@ +package com.alibaba.datax.plugin.reader.util; + +import java.util.concurrent.TimeUnit; + +/** + * Copyright @ 2019 alibaba.com + * All right reserved. + * Function:TimeUtils + * + * @author Benedict Jin + * @since 2019-04-22 + */ +public final class TimeUtils { + + private TimeUtils() { + } + + private static final long SECOND_MASK = 0xFFFFFFFF00000000L; + private static final long HOUR_IN_MILL = TimeUnit.HOURS.toMillis(1); + + /** + * Weather the timestamp is second. + * + * @param ts timestamp + */ + public static boolean isSecond(long ts) { + return (ts & SECOND_MASK) == 0; + } + + /** + * Get the hour. + * + * @param ms time in millisecond + */ + public static long getTimeInHour(long ms) { + return ms - ms % HOUR_IN_MILL; + } +} diff --git a/opentsdbreader/src/main/resources/plugin.json b/opentsdbreader/src/main/resources/plugin.json new file mode 100755 index 00000000..692a9853 --- /dev/null +++ b/opentsdbreader/src/main/resources/plugin.json @@ -0,0 +1,10 @@ +{ + "name": "opentsdbreader", + "class": "com.alibaba.datax.plugin.reader.opentsdbreader.OpenTSDBReader", + "description": { + "useScene": "从 OpenTSDB 中摄取数据点", + "mechanism": "根据时间和 metric 直连底层 HBase 存储,从而 Scan 出符合条件的数据点", + "warn": "指定起止时间会自动忽略分钟和秒,转为整点时刻,例如 2019-4-18 的 [3:35, 4:55) 会被转为 [3:00, 4:00)" + }, + "developer": "Benedict Jin" +} diff --git a/opentsdbreader/src/main/resources/plugin_job_template.json b/opentsdbreader/src/main/resources/plugin_job_template.json new file mode 100644 index 00000000..c1f29f3d --- /dev/null +++ b/opentsdbreader/src/main/resources/plugin_job_template.json @@ -0,0 +1,11 @@ +{ + "name": "opentsdbreader", + "parameter": { + "endpoint": "http://localhost:8242", + "column": [ + "m" + ], + "startTime": "2019-01-01 00:00:00", + "endTime": "2019-01-01 01:00:00" + } +} diff --git a/opentsdbreader/src/test/java/com/alibaba/datax/plugin/reader/conn/OpenTSDBConnectionTest.java b/opentsdbreader/src/test/java/com/alibaba/datax/plugin/reader/conn/OpenTSDBConnectionTest.java new file mode 100644 index 00000000..91429b4a --- /dev/null +++ b/opentsdbreader/src/test/java/com/alibaba/datax/plugin/reader/conn/OpenTSDBConnectionTest.java @@ -0,0 +1,30 @@ +package com.alibaba.datax.plugin.reader.conn; + +import org.junit.Assert; +import org.junit.Ignore; +import org.junit.Test; + +/** + * Copyright @ 2019 alibaba.com + * All right reserved. + * Function:OpenTSDB Connection4TSDB Test + * + * @author Benedict Jin + * @since 2019-03-29 + */ +@Ignore +public class OpenTSDBConnectionTest { + + private static final String OPENTSDB_ADDRESS = "http://localhost:8242"; + + @Test + public void testVersion() { + String version = new OpenTSDBConnection(OPENTSDB_ADDRESS).version(); + Assert.assertNotNull(version); + } + + @Test + public void testIsSupported() { + Assert.assertTrue(new OpenTSDBConnection(OPENTSDB_ADDRESS).isSupported()); + } +} diff --git a/opentsdbreader/src/test/java/com/alibaba/datax/plugin/reader/util/Const.java b/opentsdbreader/src/test/java/com/alibaba/datax/plugin/reader/util/Const.java new file mode 100644 index 00000000..df9e0eda --- /dev/null +++ b/opentsdbreader/src/test/java/com/alibaba/datax/plugin/reader/util/Const.java @@ -0,0 +1,18 @@ +package com.alibaba.datax.plugin.reader.util; + +/** + * Copyright @ 2019 alibaba.com + * All right reserved. + * Function:Const + * + * @author Benedict Jin + * @since 2019-03-29 + */ +final class Const { + + private Const() { + } + + static final String OPENTSDB_ADDRESS = "http://localhost:8242"; + static final String TSDB_ADDRESS = "http://localhost:8240"; +} diff --git a/opentsdbreader/src/test/java/com/alibaba/datax/plugin/reader/util/HttpUtilsTest.java b/opentsdbreader/src/test/java/com/alibaba/datax/plugin/reader/util/HttpUtilsTest.java new file mode 100644 index 00000000..ca77597f --- /dev/null +++ b/opentsdbreader/src/test/java/com/alibaba/datax/plugin/reader/util/HttpUtilsTest.java @@ -0,0 +1,39 @@ +package com.alibaba.datax.plugin.reader.util; + +import org.junit.Assert; +import org.junit.Ignore; +import org.junit.Test; + +import java.util.HashMap; +import java.util.Map; + +/** + * Copyright @ 2019 alibaba.com + * All right reserved. + * Function:HttpUtils Test + * + * @author Benedict Jin + * @since 2019-03-29 + */ +@Ignore +public class HttpUtilsTest { + + @Test + public void testSimpleCase() throws Exception { + String url = "https://httpbin.org/post"; + Map params = new HashMap(); + params.put("foo", "bar"); + + String rsp = HttpUtils.post(url, params); + System.out.println(rsp); + Assert.assertNotNull(rsp); + } + + @Test + public void testGet() throws Exception { + String url = String.format("%s/api/version", Const.OPENTSDB_ADDRESS); + String rsp = HttpUtils.get(url); + System.out.println(rsp); + Assert.assertNotNull(rsp); + } +} diff --git a/opentsdbreader/src/test/java/com/alibaba/datax/plugin/reader/util/TSDBTest.java b/opentsdbreader/src/test/java/com/alibaba/datax/plugin/reader/util/TSDBTest.java new file mode 100644 index 00000000..8cd091bf --- /dev/null +++ b/opentsdbreader/src/test/java/com/alibaba/datax/plugin/reader/util/TSDBTest.java @@ -0,0 +1,28 @@ +package com.alibaba.datax.plugin.reader.util; + +import org.junit.Assert; +import org.junit.Ignore; +import org.junit.Test; + +/** + * Copyright @ 2019 alibaba.com + * All right reserved. + * Function:TSDB Test + * + * @author Benedict Jin + * @since 2019-04-11 + */ +@Ignore +public class TSDBTest { + + @Test + public void testVersion() { + String version = TSDBUtils.version(Const.TSDB_ADDRESS); + Assert.assertNotNull(version); + System.out.println(version); + + version = TSDBUtils.version(Const.OPENTSDB_ADDRESS); + Assert.assertNotNull(version); + System.out.println(version); + } +} diff --git a/opentsdbreader/src/test/java/com/alibaba/datax/plugin/reader/util/TimeUtilsTest.java b/opentsdbreader/src/test/java/com/alibaba/datax/plugin/reader/util/TimeUtilsTest.java new file mode 100644 index 00000000..61d29088 --- /dev/null +++ b/opentsdbreader/src/test/java/com/alibaba/datax/plugin/reader/util/TimeUtilsTest.java @@ -0,0 +1,33 @@ +package com.alibaba.datax.plugin.reader.util; + +import org.junit.Assert; +import org.junit.Test; + +import java.text.ParseException; +import java.text.SimpleDateFormat; +import java.util.Date; + +/** + * Copyright @ 2019 alibaba.com + * All right reserved. + * Function:com.alibaba.datax.common.util + * + * @author Benedict Jin + * @since 2019-04-22 + */ +public class TimeUtilsTest { + + @Test + public void testIsSecond() { + Assert.assertFalse(TimeUtils.isSecond(System.currentTimeMillis())); + Assert.assertTrue(TimeUtils.isSecond(System.currentTimeMillis() / 1000)); + } + + @Test + public void testGetTimeInHour() throws ParseException { + SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); + Date date = sdf.parse("2019-04-18 15:32:33"); + long timeInHour = TimeUtils.getTimeInHour(date.getTime()); + Assert.assertEquals("2019-04-18 15:00:00", sdf.format(timeInHour)); + } +} diff --git a/package.xml b/package.xml index 47e277b8..1ff6391d 100755 --- a/package.xml +++ b/package.xml @@ -159,6 +159,13 @@ datax + + opentsdbreader/target/datax/ + + **/*.* + + datax + @@ -322,5 +329,12 @@ datax + + tsdbwriter/target/datax/ + + **/*.* + + datax + diff --git a/pom.xml b/pom.xml index f4d197ba..6e68418e 100755 --- a/pom.xml +++ b/pom.xml @@ -62,6 +62,7 @@ rdbmsreader hbase11xreader hbase094xreader + opentsdbreader mysqlwriter @@ -85,6 +86,7 @@ hbase11xsqlwriter hbase11xsqlreader elasticsearchwriter + tsdbwriter plugin-rdbms-util diff --git a/tsdbwriter/doc/tsdbhttpwriter.md b/tsdbwriter/doc/tsdbhttpwriter.md new file mode 100644 index 00000000..c723a360 --- /dev/null +++ b/tsdbwriter/doc/tsdbhttpwriter.md @@ -0,0 +1,187 @@ + +# TSDBWriter 插件文档 + +___ + + +## 1 快速介绍 + +TSDBWriter 插件实现了将数据点写入到阿里巴巴自研 TSDB 数据库中(后续简称 TSDB)。 + + +时间序列数据库(Time Series Database , 简称 TSDB)是一种高性能,低成本,稳定可靠的在线时序数据库服务;提供高效读写,高压缩比存储、时序数据插值及聚合计算,广泛应用于物联网(IoT)设备监控系统 ,企业能源管理系统(EMS),生产安全监控系统,电力检测系统等行业场景。 TSDB 提供百万级时序数据秒级写入,高压缩比低成本存储、预降采样、插值、多维聚合计算,查询结果可视化功能;解决由于设备采集点数量巨大,数据采集频率高,造成的存储成本高,写入和查询分析效率低的问题。更多关于 TSDB 的介绍,详见[阿里云 TSDB 官网](https://help.aliyun.com/product/54825.html)。 + + + +## 2 实现原理 + +通过 HTTP 连接 TSDB 实例,并通过 `/api/put` 接口将数据点写入。关于写入接口详见 TSDB 的[接口说明文档](https://help.aliyun.com/document_detail/59939.html)。 + + + +## 3 功能说明 + +### 3.1 配置样例 + +* 配置一个从 OpenTSDB 数据库同步抽取数据到 TSDB: + +```json +{ + "job": { + "content": [ + { + "reader": { + "name": "opentsdbreader", + "parameter": { + "endpoint": "http://localhost:4242", + "column": [ + "m" + ], + "startTime": "2019-01-01 00:00:00", + "endTime": "2019-01-01 03:00:00" + } + }, + "writer": { + "name": "tsdbhttpwriter", + "parameter": { + "endpoint": "http://localhost:8242" + } + } + } + ], + "setting": { + "speed": { + "channel": 1 + } + } + } +} +``` + + + +### 3.2 参数说明 + +* **name** + * 描述:本插件的名称 + * 必选:是 + * 默认值:tsdbhttpwriter + +* **parameter** + * **endpoint** + * 描述:TSDB 的 HTTP 连接地址 + * 必选:是 + * 格式:http://IP:Port + * 默认值:无 + +* **batchSize** + * 描述:每次批量数据的条数 + * 必选:否 + * 格式:int,需要保证大于 0 + * 默认值:100 + +* **maxRetryTime** + * 描述:失败后重试的次数 + * 必选:否 + * 格式:int,需要保证大于 1 + * 默认值:3 + +* **ignoreWriteError** + * 描述:如果设置为 true,则忽略写入错误,继续写入;否则,多次重试后仍写入失败的话,则会终止写入任务 + * 必选:否 + * 格式:bool + * 默认值:false + + + + + + +### 3.3 类型转换 + + +| DataX 内部类型 | TSDB 数据类型 | +| -------------- | ------------------------------------------------------------ | +| String | TSDB 数据点序列化字符串,包括 timestamp、metric、tags 和 value | + + + + + + +## 4 性能报告 + +### 4.1 环境准备 + +#### 4.1.1 数据特征 + +从 Metric、时间线、Value 和 采集周期 四个方面来描述: + +##### metric + +固定指定一个 metric 为 `m`。 + +##### tagkv + +前四个 tagkv 全排列,形成 `10 * 20 * 100 * 100 = 2000000` 条时间线,最后 IP 对应 2000000 条时间线从 1 开始自增。 + +| **tag_k** | **tag_v** | +| --------- | ------------- | +| zone | z1~z10 | +| cluster | c1~c20 | +| group | g1~100 | +| app | a1~a100 | +| ip | ip1~ip2000000 | + +##### value + +度量值为 [1, 100] 区间内的随机值 + +##### interval + +采集周期为 10 秒,持续摄入 3 小时,总数据量为 `3 * 60 * 60 / 10 * 2000000 = 2,160,000,000` 个数据点。 + + + +#### 4.1.2 机器参数 + +TSDB Writer 机型: 64C256G + +HBase 机型: 8C16G * 5 + +#### 4.1.3 DataX jvm 参数 + +"-Xms4096m -Xmx4096m" + + + + +### 4.2 测试报告 + + +| 通道数 | DataX 速度 (Rec/s) | DataX 流量 (MB/s) | +| ------ | ------------------ | ----------------- | +| 1 | 129753 | 15.45 | +| 2 | 284953 | 33.70 | +| 3 | 385868 | 45.71 | + + + + + +## 5 约束限制 + +### 5.1 目前只支持兼容 TSDB 2.4.x 及以上版本 + +其他版本暂不保证兼容 + + + + + +## 6 FAQ + + + + + diff --git a/tsdbwriter/pom.xml b/tsdbwriter/pom.xml new file mode 100644 index 00000000..d74776af --- /dev/null +++ b/tsdbwriter/pom.xml @@ -0,0 +1,136 @@ + + + 4.0.0 + + + com.alibaba.datax + datax-all + 0.0.1-SNAPSHOT + + + tsdbwriter + tsdbwriter + jar + + + UTF-8 + + + 3.3.2 + + + 4.4 + 2.4 + + + 1.2.28 + + + 4.12 + + + + + com.alibaba.datax + datax-common + ${datax-project-version} + + + slf4j-log4j12 + org.slf4j + + + fastjson + com.alibaba + + + commons-math3 + org.apache.commons + + + + + org.slf4j + slf4j-api + + + ch.qos.logback + logback-classic + + + + + org.apache.commons + commons-lang3 + ${commons-lang3.version} + + + + + org.apache.httpcomponents + httpclient + ${httpclient.version} + + + commons-io + commons-io + ${commons-io.version} + + + org.apache.httpcomponents + fluent-hc + ${httpclient.version} + + + + + com.alibaba + fastjson + ${fastjson.version} + + + + + junit + junit + ${junit4.version} + test + + + + + + + + maven-compiler-plugin + + 1.6 + 1.6 + ${project-sourceEncoding} + + + + + + maven-assembly-plugin + + + src/main/assembly/package.xml + + datax + + + + dwzip + package + + single + + + + + + + diff --git a/tsdbwriter/src/main/assembly/package.xml b/tsdbwriter/src/main/assembly/package.xml new file mode 100755 index 00000000..ff474770 --- /dev/null +++ b/tsdbwriter/src/main/assembly/package.xml @@ -0,0 +1,35 @@ + + + + dir + + false + + + src/main/resources + + plugin.json + plugin_job_template.json + + plugin/writer/tsdbwriter + + + target/ + + tsdbwriter-0.0.1-SNAPSHOT.jar + + plugin/writer/tsdbwriter + + + + + + false + plugin/writer/tsdbwriter/libs + runtime + + + diff --git a/tsdbwriter/src/main/java/com/alibaba/datax/plugin/writer/conn/Connection4TSDB.java b/tsdbwriter/src/main/java/com/alibaba/datax/plugin/writer/conn/Connection4TSDB.java new file mode 100644 index 00000000..8119348d --- /dev/null +++ b/tsdbwriter/src/main/java/com/alibaba/datax/plugin/writer/conn/Connection4TSDB.java @@ -0,0 +1,85 @@ +package com.alibaba.datax.plugin.writer.conn; + +import com.alibaba.datax.common.plugin.RecordSender; + +import java.util.List; + +/** + * Copyright @ 2019 alibaba.com + * All right reserved. + * Function:Connection for TSDB-like databases + * + * @author Benedict Jin + * @since 2019-03-29 + */ +public interface Connection4TSDB { + + /** + * Get the address of Database. + * + * @return host+ip + */ + String address(); + + /** + * Get the version of Database. + * + * @return version + */ + String version(); + + /** + * Get these configurations. + * + * @return configs + */ + String config(); + + /** + * Get the list of supported version. + * + * @return version list + */ + String[] getSupportVersionPrefix(); + + /** + * Send data points by metric & start time & end time. + * + * @param metric metric + * @param start startTime + * @param end endTime + * @param recordSender sender + */ + void sendDPs(String metric, Long start, Long end, RecordSender recordSender) throws Exception; + + /** + * Put data point. + * + * @param dp data point + * @return whether the data point is written successfully + */ + boolean put(DataPoint4TSDB dp); + + /** + * Put data points. + * + * @param dps data points + * @return whether the data point is written successfully + */ + boolean put(List dps); + + /** + * Put data points. + * + * @param dps data points + * @return whether the data point is written successfully + */ + boolean put(String dps); + + /** + * Whether current version is supported. + * + * @return true: supported; false: not yet! + */ + boolean isSupported(); +} diff --git a/tsdbwriter/src/main/java/com/alibaba/datax/plugin/writer/conn/DataPoint4TSDB.java b/tsdbwriter/src/main/java/com/alibaba/datax/plugin/writer/conn/DataPoint4TSDB.java new file mode 100644 index 00000000..fee012df --- /dev/null +++ b/tsdbwriter/src/main/java/com/alibaba/datax/plugin/writer/conn/DataPoint4TSDB.java @@ -0,0 +1,68 @@ +package com.alibaba.datax.plugin.writer.conn; + +import com.alibaba.fastjson.JSON; + +import java.util.Map; + +/** + * Copyright @ 2019 alibaba.com + * All right reserved. + * Function:DataPoint for TSDB + * + * @author Benedict Jin + * @since 2019-04-10 + */ +public class DataPoint4TSDB { + + private long timestamp; + private String metric; + private Map tags; + private Object value; + + public DataPoint4TSDB() { + } + + public DataPoint4TSDB(long timestamp, String metric, Map tags, Object value) { + this.timestamp = timestamp; + this.metric = metric; + this.tags = tags; + this.value = value; + } + + public long getTimestamp() { + return timestamp; + } + + public void setTimestamp(long timestamp) { + this.timestamp = timestamp; + } + + public String getMetric() { + return metric; + } + + public void setMetric(String metric) { + this.metric = metric; + } + + public Map getTags() { + return tags; + } + + public void setTags(Map tags) { + this.tags = tags; + } + + public Object getValue() { + return value; + } + + public void setValue(Object value) { + this.value = value; + } + + @Override + public String toString() { + return JSON.toJSONString(this); + } +} diff --git a/tsdbwriter/src/main/java/com/alibaba/datax/plugin/writer/conn/TSDBConnection.java b/tsdbwriter/src/main/java/com/alibaba/datax/plugin/writer/conn/TSDBConnection.java new file mode 100644 index 00000000..e4ebad7d --- /dev/null +++ b/tsdbwriter/src/main/java/com/alibaba/datax/plugin/writer/conn/TSDBConnection.java @@ -0,0 +1,86 @@ +package com.alibaba.datax.plugin.writer.conn; + +import com.alibaba.datax.common.plugin.RecordSender; +import com.alibaba.datax.plugin.writer.util.TSDBUtils; +import com.alibaba.fastjson.JSON; +import org.apache.commons.lang3.StringUtils; + +import java.util.List; + +/** + * Copyright @ 2019 alibaba.com + * All right reserved. + * Function:TSDB Connection + * + * @author Benedict Jin + * @since 2019-03-29 + */ +public class TSDBConnection implements Connection4TSDB { + + private String address; + + public TSDBConnection(String address) { + if (StringUtils.isBlank(address)) { + throw new RuntimeException("TSDBConnection init failed because address is blank!"); + } + this.address = address; + } + + @Override + public String address() { + return address; + } + + @Override + public String version() { + return TSDBUtils.version(address); + } + + @Override + public String config() { + return TSDBUtils.config(address); + } + + @Override + public String[] getSupportVersionPrefix() { + return new String[]{"2.4.1", "2.4.2"}; + } + + @Override + public void sendDPs(String metric, Long start, Long end, RecordSender recordSender) { + throw new RuntimeException("Not support yet!"); + } + + @Override + public boolean put(DataPoint4TSDB dp) { + return TSDBUtils.put(address, dp); + } + + @Override + public boolean put(List dps) { + return TSDBUtils.put(address, dps); + } + + @Override + public boolean put(String dps) { + return TSDBUtils.put(address, dps); + } + + @Override + public boolean isSupported() { + String versionJson = version(); + if (StringUtils.isBlank(versionJson)) { + throw new RuntimeException("Cannot get the version!"); + } + String version = JSON.parseObject(versionJson).getString("version"); + if (StringUtils.isBlank(version)) { + return false; + } + for (String prefix : getSupportVersionPrefix()) { + if (version.startsWith(prefix)) { + return true; + } + } + return false; + } +} diff --git a/tsdbwriter/src/main/java/com/alibaba/datax/plugin/writer/tsdbwriter/Constant.java b/tsdbwriter/src/main/java/com/alibaba/datax/plugin/writer/tsdbwriter/Constant.java new file mode 100644 index 00000000..abac14a4 --- /dev/null +++ b/tsdbwriter/src/main/java/com/alibaba/datax/plugin/writer/tsdbwriter/Constant.java @@ -0,0 +1,16 @@ +package com.alibaba.datax.plugin.writer.tsdbwriter; + +/** + * Copyright @ 2019 alibaba.com + * All right reserved. + * Function:Key + * + * @author Benedict Jin + * @since 2019-04-18 + */ +public final class Constant { + + static final int DEFAULT_BATCH_SIZE = 100; + static final int DEFAULT_TRY_SIZE = 3; + static final boolean DEFAULT_IGNORE_WRITE_ERROR = false; +} diff --git a/tsdbwriter/src/main/java/com/alibaba/datax/plugin/writer/tsdbwriter/Key.java b/tsdbwriter/src/main/java/com/alibaba/datax/plugin/writer/tsdbwriter/Key.java new file mode 100755 index 00000000..2cc3f671 --- /dev/null +++ b/tsdbwriter/src/main/java/com/alibaba/datax/plugin/writer/tsdbwriter/Key.java @@ -0,0 +1,17 @@ +package com.alibaba.datax.plugin.writer.tsdbwriter; + +/** + * Copyright @ 2019 alibaba.com + * All right reserved. + * Function:Key + * + * @author Benedict Jin + * @since 2019-04-18 + */ +public class Key { + + static final String ENDPOINT = "endpoint"; + static final String BATCH_SIZE = "batchSize"; + static final String MAX_RETRY_TIME = "maxRetryTime"; + static final String IGNORE_WRITE_ERROR = "ignoreWriteError"; +} diff --git a/tsdbwriter/src/main/java/com/alibaba/datax/plugin/writer/tsdbwriter/TSDBWriter.java b/tsdbwriter/src/main/java/com/alibaba/datax/plugin/writer/tsdbwriter/TSDBWriter.java new file mode 100755 index 00000000..e410b2ba --- /dev/null +++ b/tsdbwriter/src/main/java/com/alibaba/datax/plugin/writer/tsdbwriter/TSDBWriter.java @@ -0,0 +1,171 @@ +package com.alibaba.datax.plugin.writer.tsdbwriter; + +import com.alibaba.datax.common.element.Record; +import com.alibaba.datax.common.exception.DataXException; +import com.alibaba.datax.common.plugin.RecordReceiver; +import com.alibaba.datax.common.spi.Writer; +import com.alibaba.datax.common.util.Configuration; +import com.alibaba.datax.common.util.RetryUtil; +import com.alibaba.datax.plugin.writer.conn.TSDBConnection; +import org.apache.commons.lang3.StringUtils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.ArrayList; +import java.util.List; +import java.util.concurrent.Callable; + +/** + * Copyright @ 2019 alibaba.com + * All right reserved. + * Function:TSDB Http Writer + * + * @author Benedict Jin + * @since 2019-04-18 + */ +@SuppressWarnings("unused") +public class TSDBWriter extends Writer { + + public static class Job extends Writer.Job { + + private static final Logger LOG = LoggerFactory.getLogger(Job.class); + + private Configuration originalConfig; + + @Override + public void init() { + this.originalConfig = super.getPluginJobConf(); + + String address = this.originalConfig.getString(Key.ENDPOINT); + if (StringUtils.isBlank(address)) { + throw DataXException.asDataXException(TSDBWriterErrorCode.REQUIRED_VALUE, + "The parameter [" + Key.ENDPOINT + "] is not set."); + } + + Integer batchSize = this.originalConfig.getInt(Key.BATCH_SIZE); + if (batchSize == null || batchSize < 1) { + originalConfig.set(Key.BATCH_SIZE, Constant.DEFAULT_BATCH_SIZE); + LOG.info("The parameter [" + Key.BATCH_SIZE + + "] will be default value: " + Constant.DEFAULT_BATCH_SIZE); + } + + Integer retrySize = this.originalConfig.getInt(Key.MAX_RETRY_TIME); + if (retrySize == null || retrySize < 0) { + originalConfig.set(Key.MAX_RETRY_TIME, Constant.DEFAULT_TRY_SIZE); + LOG.info("The parameter [" + Key.MAX_RETRY_TIME + + "] will be default value: " + Constant.DEFAULT_TRY_SIZE); + } + + Boolean ignoreWriteError = this.originalConfig.getBool(Key.IGNORE_WRITE_ERROR); + if (ignoreWriteError == null) { + originalConfig.set(Key.IGNORE_WRITE_ERROR, Constant.DEFAULT_IGNORE_WRITE_ERROR); + LOG.info("The parameter [" + Key.IGNORE_WRITE_ERROR + + "] will be default value: " + Constant.DEFAULT_IGNORE_WRITE_ERROR); + } + } + + @Override + public void prepare() { + } + + @Override + public List split(int mandatoryNumber) { + ArrayList configurations = new ArrayList(mandatoryNumber); + for (int i = 0; i < mandatoryNumber; i++) { + configurations.add(this.originalConfig.clone()); + } + return configurations; + } + + @Override + public void post() { + } + + @Override + public void destroy() { + } + } + + public static class Task extends Writer.Task { + + private static final Logger LOG = LoggerFactory.getLogger(Task.class); + + private TSDBConnection conn; + private int batchSize; + private int retrySize; + private boolean ignoreWriteError; + + @Override + public void init() { + Configuration writerSliceConfig = getPluginJobConf(); + String address = writerSliceConfig.getString(Key.ENDPOINT); + this.conn = new TSDBConnection(address); + this.batchSize = writerSliceConfig.getInt(Key.BATCH_SIZE); + this.retrySize = writerSliceConfig.getInt(Key.MAX_RETRY_TIME); + this.ignoreWriteError = writerSliceConfig.getBool(Key.IGNORE_WRITE_ERROR); + } + + @Override + public void prepare() { + } + + @Override + public void startWrite(RecordReceiver recordReceiver) { + try { + Record lastRecord = null; + Record record; + int count = 0; + StringBuilder dps = new StringBuilder(); + while ((record = recordReceiver.getFromReader()) != null) { + final int recordLength = record.getColumnNumber(); + for (int i = 0; i < recordLength; i++) { + dps.append(record.getColumn(i).asString()); + dps.append(","); + count++; + if (count == batchSize) { + count = 0; + batchPut(record, "[" + dps.substring(0, dps.length() - 1) + "]"); + dps = new StringBuilder(); + } + } + lastRecord = record; + } + if (StringUtils.isNotBlank(dps.toString())) { + batchPut(lastRecord, "[" + dps.substring(0, dps.length() - 1) + "]"); + } + } catch (Exception e) { + throw DataXException.asDataXException(TSDBWriterErrorCode.RUNTIME_EXCEPTION, e); + } + } + + private void batchPut(final Record record, final String dps) { + try { + RetryUtil.executeWithRetry(new Callable() { + @Override + public Integer call() { + if (!conn.put(dps)) { + getTaskPluginCollector().collectDirtyRecord(record, "Put data points failed!"); + throw DataXException.asDataXException(TSDBWriterErrorCode.RUNTIME_EXCEPTION, + "Put data points failed!"); + } + return 0; + } + }, retrySize, 60000L, true); + } catch (Exception e) { + if (ignoreWriteError) { + LOG.warn("Ignore write exceptions and continue writing."); + } else { + throw DataXException.asDataXException(TSDBWriterErrorCode.RETRY_WRITER_EXCEPTION, e); + } + } + } + + @Override + public void post() { + } + + @Override + public void destroy() { + } + } +} diff --git a/tsdbwriter/src/main/java/com/alibaba/datax/plugin/writer/tsdbwriter/TSDBWriterErrorCode.java b/tsdbwriter/src/main/java/com/alibaba/datax/plugin/writer/tsdbwriter/TSDBWriterErrorCode.java new file mode 100755 index 00000000..f907fb67 --- /dev/null +++ b/tsdbwriter/src/main/java/com/alibaba/datax/plugin/writer/tsdbwriter/TSDBWriterErrorCode.java @@ -0,0 +1,41 @@ +package com.alibaba.datax.plugin.writer.tsdbwriter; + +import com.alibaba.datax.common.spi.ErrorCode; + +/** + * Copyright @ 2019 alibaba.com + * All right reserved. + * Function:TSDB Http Writer Error Code + * + * @author Benedict Jin + * @since 2019-04-18 + */ +public enum TSDBWriterErrorCode implements ErrorCode { + + REQUIRED_VALUE("TSDBWriter-00", "Missing the necessary value"), + RUNTIME_EXCEPTION("TSDBWriter-01", "Runtime exception"), + RETRY_WRITER_EXCEPTION("TSDBWriter-02", "After repeated attempts, the write still fails"); + + private final String code; + private final String description; + + TSDBWriterErrorCode(String code, String description) { + this.code = code; + this.description = description; + } + + @Override + public String getCode() { + return this.code; + } + + @Override + public String getDescription() { + return this.description; + } + + @Override + public String toString() { + return String.format("Code:[%s], Description:[%s]. ", this.code, this.description); + } +} diff --git a/tsdbwriter/src/main/java/com/alibaba/datax/plugin/writer/util/HttpUtils.java b/tsdbwriter/src/main/java/com/alibaba/datax/plugin/writer/util/HttpUtils.java new file mode 100644 index 00000000..b81512f7 --- /dev/null +++ b/tsdbwriter/src/main/java/com/alibaba/datax/plugin/writer/util/HttpUtils.java @@ -0,0 +1,68 @@ +package com.alibaba.datax.plugin.writer.util; + +import com.alibaba.fastjson.JSON; +import org.apache.http.client.fluent.Content; +import org.apache.http.client.fluent.Request; +import org.apache.http.entity.ContentType; + +import java.nio.charset.Charset; +import java.util.Map; +import java.util.concurrent.TimeUnit; + +/** + * Copyright @ 2019 alibaba.com + * All right reserved. + * Function:HttpUtils + * + * @author Benedict Jin + * @since 2019-03-29 + */ +public final class HttpUtils { + + public final static Charset UTF_8 = Charset.forName("UTF-8"); + public final static int CONNECT_TIMEOUT_DEFAULT_IN_MILL = (int) TimeUnit.SECONDS.toMillis(60); + public final static int SOCKET_TIMEOUT_DEFAULT_IN_MILL = (int) TimeUnit.SECONDS.toMillis(60); + + private HttpUtils() { + } + + public static String get(String url) throws Exception { + Content content = Request.Get(url) + .connectTimeout(CONNECT_TIMEOUT_DEFAULT_IN_MILL) + .socketTimeout(SOCKET_TIMEOUT_DEFAULT_IN_MILL) + .execute() + .returnContent(); + if (content == null) { + return null; + } + return content.asString(UTF_8); + } + + public static String post(String url, Map params) throws Exception { + return post(url, JSON.toJSONString(params), CONNECT_TIMEOUT_DEFAULT_IN_MILL, SOCKET_TIMEOUT_DEFAULT_IN_MILL); + } + + public static String post(String url, String params) throws Exception { + return post(url, params, CONNECT_TIMEOUT_DEFAULT_IN_MILL, SOCKET_TIMEOUT_DEFAULT_IN_MILL); + } + + public static String post(String url, Map params, + int connectTimeoutInMill, int socketTimeoutInMill) throws Exception { + return post(url, JSON.toJSONString(params), connectTimeoutInMill, socketTimeoutInMill); + } + + public static String post(String url, String params, + int connectTimeoutInMill, int socketTimeoutInMill) throws Exception { + Content content = Request.Post(url) + .connectTimeout(connectTimeoutInMill) + .socketTimeout(socketTimeoutInMill) + .addHeader("Content-Type", "application/json") + .bodyString(params, ContentType.APPLICATION_JSON) + .execute() + .returnContent(); + if (content == null) { + return null; + } + return content.asString(UTF_8); + } +} diff --git a/tsdbwriter/src/main/java/com/alibaba/datax/plugin/writer/util/TSDBUtils.java b/tsdbwriter/src/main/java/com/alibaba/datax/plugin/writer/util/TSDBUtils.java new file mode 100644 index 00000000..ed01d877 --- /dev/null +++ b/tsdbwriter/src/main/java/com/alibaba/datax/plugin/writer/util/TSDBUtils.java @@ -0,0 +1,72 @@ +package com.alibaba.datax.plugin.writer.util; + +import com.alibaba.datax.plugin.writer.conn.DataPoint4TSDB; +import com.alibaba.fastjson.JSON; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.List; + +/** + * Copyright @ 2019 alibaba.com + * All right reserved. + * Function:TSDB Utils + * + * @author Benedict Jin + * @since 2019-03-29 + */ +public final class TSDBUtils { + + private static final Logger LOG = LoggerFactory.getLogger(TSDBUtils.class); + + private TSDBUtils() { + } + + public static String version(String address) { + String url = String.format("%s/api/version", address); + String rsp; + try { + rsp = HttpUtils.get(url); + } catch (Exception e) { + throw new RuntimeException(e); + } + return rsp; + } + + public static String config(String address) { + String url = String.format("%s/api/config", address); + String rsp; + try { + rsp = HttpUtils.get(url); + } catch (Exception e) { + throw new RuntimeException(e); + } + return rsp; + } + + public static boolean put(String address, List dps) { + return put(address, JSON.toJSON(dps)); + } + + public static boolean put(String address, DataPoint4TSDB dp) { + return put(address, JSON.toJSON(dp)); + } + + private static boolean put(String address, Object o) { + return put(address, o.toString()); + } + + public static boolean put(String address, String s) { + String url = String.format("%s/api/put", address); + String rsp; + try { + rsp = HttpUtils.post(url, s); + // If successful, the returned content should be null. + assert rsp == null; + } catch (Exception e) { + LOG.error("Address: {}, DataPoints: {}", url, s); + throw new RuntimeException(e); + } + return true; + } +} diff --git a/tsdbwriter/src/main/resources/plugin.json b/tsdbwriter/src/main/resources/plugin.json new file mode 100755 index 00000000..78c8273f --- /dev/null +++ b/tsdbwriter/src/main/resources/plugin.json @@ -0,0 +1,10 @@ +{ + "name": "tsdbwriter", + "class": "com.alibaba.datax.plugin.writer.tsdbwriter.TSDBWriter", + "description": { + "useScene": "往 TSDB 中摄入数据点", + "mechanism": "调用 TSDB 的 /api/put 接口,实现数据点的写入", + "warn": "" + }, + "developer": "Benedict Jin" +} diff --git a/tsdbwriter/src/main/resources/plugin_job_template.json b/tsdbwriter/src/main/resources/plugin_job_template.json new file mode 100644 index 00000000..5d9b43db --- /dev/null +++ b/tsdbwriter/src/main/resources/plugin_job_template.json @@ -0,0 +1,6 @@ +{ + "name": "tsdbwriter", + "parameter": { + "endpoint": "http://localhost:8242" + } +} diff --git a/tsdbwriter/src/test/java/com/alibaba/datax/plugin/writer/conn/TSDBConnectionTest.java b/tsdbwriter/src/test/java/com/alibaba/datax/plugin/writer/conn/TSDBConnectionTest.java new file mode 100644 index 00000000..455f4ce6 --- /dev/null +++ b/tsdbwriter/src/test/java/com/alibaba/datax/plugin/writer/conn/TSDBConnectionTest.java @@ -0,0 +1,30 @@ +package com.alibaba.datax.plugin.writer.conn; + +import org.junit.Assert; +import org.junit.Ignore; +import org.junit.Test; + +/** + * Copyright @ 2019 alibaba.com + * All right reserved. + * Function:TSDBConnection Test + * + * @author Benedict Jin + * @since 2019-03-29 + */ +@Ignore +public class TSDBConnectionTest { + + private static final String TSDB_ADDRESS = "http://localhost:8240"; + + @Test + public void testVersion() { + String version = new TSDBConnection(TSDB_ADDRESS).version(); + Assert.assertNotNull(version); + } + + @Test + public void testIsSupported() { + Assert.assertTrue(new TSDBConnection(TSDB_ADDRESS).isSupported()); + } +} diff --git a/tsdbwriter/src/test/java/com/alibaba/datax/plugin/writer/util/Const.java b/tsdbwriter/src/test/java/com/alibaba/datax/plugin/writer/util/Const.java new file mode 100644 index 00000000..34b074d6 --- /dev/null +++ b/tsdbwriter/src/test/java/com/alibaba/datax/plugin/writer/util/Const.java @@ -0,0 +1,18 @@ +package com.alibaba.datax.plugin.writer.util; + +/** + * Copyright @ 2019 alibaba.com + * All right reserved. + * Function:Const + * + * @author Benedict Jin + * @since 2019-03-29 + */ +final class Const { + + private Const() { + } + + static final String OPENTSDB_ADDRESS = "http://localhost:8242"; + static final String TSDB_ADDRESS = "http://localhost:8240"; +} diff --git a/tsdbwriter/src/test/java/com/alibaba/datax/plugin/writer/util/HttpUtilsTest.java b/tsdbwriter/src/test/java/com/alibaba/datax/plugin/writer/util/HttpUtilsTest.java new file mode 100644 index 00000000..69f26b80 --- /dev/null +++ b/tsdbwriter/src/test/java/com/alibaba/datax/plugin/writer/util/HttpUtilsTest.java @@ -0,0 +1,39 @@ +package com.alibaba.datax.plugin.writer.util; + +import org.junit.Assert; +import org.junit.Ignore; +import org.junit.Test; + +import java.util.HashMap; +import java.util.Map; + +/** + * Copyright @ 2019 alibaba.com + * All right reserved. + * Function:HttpUtils Test + * + * @author Benedict Jin + * @since 2019-03-29 + */ +@Ignore +public class HttpUtilsTest { + + @Test + public void testSimpleCase() throws Exception { + String url = "https://httpbin.org/post"; + Map params = new HashMap(); + params.put("foo", "bar"); + + String rsp = HttpUtils.post(url, params); + System.out.println(rsp); + Assert.assertNotNull(rsp); + } + + @Test + public void testGet() throws Exception { + String url = String.format("%s/api/version", Const.OPENTSDB_ADDRESS); + String rsp = HttpUtils.get(url); + System.out.println(rsp); + Assert.assertNotNull(rsp); + } +} diff --git a/tsdbwriter/src/test/java/com/alibaba/datax/plugin/writer/util/TSDBTest.java b/tsdbwriter/src/test/java/com/alibaba/datax/plugin/writer/util/TSDBTest.java new file mode 100644 index 00000000..7d22bb72 --- /dev/null +++ b/tsdbwriter/src/test/java/com/alibaba/datax/plugin/writer/util/TSDBTest.java @@ -0,0 +1,28 @@ +package com.alibaba.datax.plugin.writer.util; + +import org.junit.Assert; +import org.junit.Ignore; +import org.junit.Test; + +/** + * Copyright @ 2019 alibaba.com + * All right reserved. + * Function:TSDB Test + * + * @author Benedict Jin + * @since 2019-04-11 + */ +@Ignore +public class TSDBTest { + + @Test + public void testVersion() { + String version = TSDBUtils.version(Const.TSDB_ADDRESS); + Assert.assertNotNull(version); + System.out.println(version); + + version = TSDBUtils.version(Const.OPENTSDB_ADDRESS); + Assert.assertNotNull(version); + System.out.println(version); + } +} From 2437bd64bc5706c09eaae88ea2b8e72ad7a65900 Mon Sep 17 00:00:00 2001 From: asdf2014 Date: Fri, 26 Apr 2019 17:15:50 +0800 Subject: [PATCH 4/7] Add OpenTSDB reader and TSDB writer into README.md --- README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/README.md b/README.md index 725dbbff..3076872b 100644 --- a/README.md +++ b/README.md @@ -54,6 +54,8 @@ DataX目前已经有了比较全面的插件体系,主流的RDBMS数据库、N | | FTP | √ | √ |[读](https://github.com/alibaba/DataX/blob/master/ftpreader/doc/ftpreader.md) 、[写](https://github.com/alibaba/DataX/blob/master/ftpwriter/doc/ftpwriter.md)| | | HDFS | √ | √ |[读](https://github.com/alibaba/DataX/blob/master/hdfsreader/doc/hdfsreader.md) 、[写](https://github.com/alibaba/DataX/blob/master/hdfswriter/doc/hdfswriter.md)| | | Elasticsearch | | √ |[写](https://github.com/alibaba/DataX/blob/master/elasticsearchwriter/doc/elasticsearchwriter.md)| +| 时间序列数据库 | OpenTSDB | √ | |[读](https://github.com/alibaba/DataX/blob/master/opentsdbreader/doc/opentsdbreader.md)| +| | TSDB | | √ |[写](https://github.com/alibaba/DataX/blob/master/tsdbwriter/doc/tsdbhttpwriter.md)| # 我要开发新的插件 请点击:[DataX插件开发宝典](https://github.com/alibaba/DataX/blob/master/dataxPluginDev.md) From 07cb166c5ba65eed4a1879936da6c992a2c3a54e Mon Sep 17 00:00:00 2001 From: "bake.snn" Date: Thu, 23 May 2019 17:27:26 +0800 Subject: [PATCH 5/7] fix#342 support phoenix table name lower case. --- README.md | 1 + .../reader/hbase20xsqlreader/HBase20SQLReaderHelper.java | 6 +++--- .../writer/hbase20xsqlwriter/HBase20xSQLWriterTask.java | 6 +++--- 3 files changed, 7 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index 3076872b..af79baba 100644 --- a/README.md +++ b/README.md @@ -48,6 +48,7 @@ DataX目前已经有了比较全面的插件体系,主流的RDBMS数据库、N | | Hbase0.94 | √ | √ |[读](https://github.com/alibaba/DataX/blob/master/hbase094xreader/doc/hbase094xreader.md) 、[写](https://github.com/alibaba/DataX/blob/master/hbase094xwriter/doc/hbase094xwriter.md)| | | Hbase1.1 | √ | √ |[读](https://github.com/alibaba/DataX/blob/master/hbase11xreader/doc/hbase11xreader.md) 、[写](https://github.com/alibaba/DataX/blob/master/hbase11xwriter/doc/hbase11xwriter.md)| | | Phoenix4.x | √ | √ |[读](https://github.com/alibaba/DataX/blob/master/hbase11xsqlreader/doc/hbase11xsqlreader.md) 、[写](https://github.com/alibaba/DataX/blob/master/hbase11xsqlwriter/doc/hbase11xsqlwriter.md)| +| | Phoenix5.x | √ | √ |[读](https://github.com/alibaba/DataX/blob/master/hbase20xsqlreader/doc/hbase20xsqlreader.md) 、[写](https://github.com/alibaba/DataX/blob/master/hbase20xsqlwriter/doc/hbase20xsqlwriter.md)| | | MongoDB | √ | √ |[读](https://github.com/alibaba/DataX/blob/master/mongoreader/doc/mongoreader.md) 、[写](https://github.com/alibaba/DataX/blob/master/mongowriter/doc/mongowriter.md)| | | Hive | √ | √ |[读](https://github.com/alibaba/DataX/blob/master/hdfsreader/doc/hdfsreader.md) 、[写](https://github.com/alibaba/DataX/blob/master/hdfswriter/doc/hdfswriter.md)| | 无结构化数据存储 | TxtFile | √ | √ |[读](https://github.com/alibaba/DataX/blob/master/txtfilereader/doc/txtfilereader.md) 、[写](https://github.com/alibaba/DataX/blob/master/txtfilewriter/doc/txtfilewriter.md)| diff --git a/hbase20xsqlreader/src/main/java/com/alibaba/datax/plugin/reader/hbase20xsqlreader/HBase20SQLReaderHelper.java b/hbase20xsqlreader/src/main/java/com/alibaba/datax/plugin/reader/hbase20xsqlreader/HBase20SQLReaderHelper.java index f2d880af..0edc993f 100644 --- a/hbase20xsqlreader/src/main/java/com/alibaba/datax/plugin/reader/hbase20xsqlreader/HBase20SQLReaderHelper.java +++ b/hbase20xsqlreader/src/main/java/com/alibaba/datax/plugin/reader/hbase20xsqlreader/HBase20SQLReaderHelper.java @@ -49,9 +49,9 @@ public class HBase20SQLReaderHelper { String schema = configuration.getString(Key.SCHEMA, null); String tableName = configuration.getNecessaryValue(Key.TABLE, HBase20xSQLReaderErrorCode.REQUIRED_VALUE); if (schema != null && !schema.isEmpty()) { - fullTableName = schema + "." + tableName; + fullTableName = "\"" + schema + "\".\"" + tableName + "\""; } else { - fullTableName = tableName; + fullTableName = "\"" + tableName + "\""; } // 如果列名未配置,默认读取全部列* columnNames = configuration.getList(Key.COLUMN, String.class); @@ -248,7 +248,7 @@ public class HBase20SQLReaderHelper { String querySql; StringBuilder columnBuilder = new StringBuilder(); for (String columnName : columnNames) { - columnBuilder.append(columnName).append(","); + columnBuilder.append("\"").append(columnName).append("\","); } columnBuilder.setLength(columnBuilder.length() -1); if (StringUtils.isBlank(where)) { diff --git a/hbase20xsqlwriter/src/main/java/com/alibaba/datax/plugin/writer/hbase20xsqlwriter/HBase20xSQLWriterTask.java b/hbase20xsqlwriter/src/main/java/com/alibaba/datax/plugin/writer/hbase20xsqlwriter/HBase20xSQLWriterTask.java index 5557e674..43f710b7 100644 --- a/hbase20xsqlwriter/src/main/java/com/alibaba/datax/plugin/writer/hbase20xsqlwriter/HBase20xSQLWriterTask.java +++ b/hbase20xsqlwriter/src/main/java/com/alibaba/datax/plugin/writer/hbase20xsqlwriter/HBase20xSQLWriterTask.java @@ -72,9 +72,9 @@ public class HBase20xSQLWriterTask { batchSize = configuration.getInt(Key.BATCHSIZE, Constant.DEFAULT_BATCH_ROW_COUNT); String schema = configuration.getString(Key.SCHEMA); String tableName = configuration.getNecessaryValue(Key.TABLE, HBase20xSQLWriterErrorCode.REQUIRED_VALUE); - fullTableName = tableName; + fullTableName = "\"" + tableName + "\""; if (schema != null && !schema.isEmpty()) { - fullTableName = schema + "." + tableName; + fullTableName = "\"" + schema + "\".\"" + tableName + "\""; } columns = configuration.getList(Key.COLUMN, String.class); if (pstmt == null) { @@ -125,7 +125,7 @@ public class HBase20xSQLWriterTask { int[] types = new int[numberOfColumnsToWrite]; StringBuilder columnNamesBuilder = new StringBuilder(); for (String columnName : columns) { - columnNamesBuilder.append(columnName).append(","); + columnNamesBuilder.append("\"").append(columnName).append("\","); } columnNamesBuilder.setLength(columnNamesBuilder.length() - 1); // 查询一条数据获取表meta信息 From 4c62feeb7e0d67dbd456bc9b425023e4d764a752 Mon Sep 17 00:00:00 2001 From: "bake.snn" Date: Wed, 29 May 2019 11:19:02 +0800 Subject: [PATCH 6/7] upgrade phoenix thin client version. --- hbase20xsqlreader/pom.xml | 12 +++--------- hbase20xsqlwriter/doc/hbase20xsqlwriter.md | 8 ++++++++ hbase20xsqlwriter/pom.xml | 7 +++---- 3 files changed, 14 insertions(+), 13 deletions(-) diff --git a/hbase20xsqlreader/pom.xml b/hbase20xsqlreader/pom.xml index 2df9a1a2..56804b7b 100644 --- a/hbase20xsqlreader/pom.xml +++ b/hbase20xsqlreader/pom.xml @@ -14,7 +14,7 @@ jar - 5.0.0-HBase-2.0 + 5.1.0-HBase-2.0.0.2 @@ -31,15 +31,9 @@ - org.apache.phoenix - phoenix-queryserver + com.aliyun.phoenix + ali-phoenix-shaded-thin-client ${phoenix.version} - - - servlet-api - javax.servlet - - diff --git a/hbase20xsqlwriter/doc/hbase20xsqlwriter.md b/hbase20xsqlwriter/doc/hbase20xsqlwriter.md index 63e4a431..2cc8cb41 100644 --- a/hbase20xsqlwriter/doc/hbase20xsqlwriter.md +++ b/hbase20xsqlwriter/doc/hbase20xsqlwriter.md @@ -95,7 +95,15 @@ HBase20xsqlwriter实现了向hbase中的SQL表(phoenix)批量导入数据的功 * 描述:插件名字,必须是`hbase11xsqlwriter` * 必选:是 * 默认值:无 + +* **schema** + * 描述:表所在的schema + + * 必选:否
+ + * 默认值:无
+ * **table** * 描述:要导入的表名,大小写敏感,通常phoenix表都是**大写**表名 diff --git a/hbase20xsqlwriter/pom.xml b/hbase20xsqlwriter/pom.xml index 2dc5f4c7..9d363f63 100644 --- a/hbase20xsqlwriter/pom.xml +++ b/hbase20xsqlwriter/pom.xml @@ -14,8 +14,7 @@ jar - 5.0.0-HBase-2.0 - 1.12.0 + 5.1.0-HBase-2.0.0.2 1.8 @@ -32,8 +31,8 @@ - org.apache.phoenix - phoenix-queryserver + com.aliyun.phoenix + ali-phoenix-shaded-thin-client ${phoenix.version} From ca32cbd4790e3c0ac17e240ddb4a82e786601fac Mon Sep 17 00:00:00 2001 From: "qingdao.gqs" Date: Wed, 29 May 2019 15:20:51 +0800 Subject: [PATCH 7/7] =?UTF-8?q?hbase11xsqlwriter=E6=94=AF=E6=8C=81phoenix?= =?UTF-8?q?=20thinclient?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- hbase11xsqlwriter/pom.xml | 22 + .../writer/hbase11xsqlwriter/Constant.java | 1 + .../hbase11xsqlwriter/HbaseSQLHelper.java | 130 +++++- .../HbaseSQLWriterConfig.java | 80 +++- .../hbase11xsqlwriter/HbaseSQLWriterTask.java | 33 +- .../plugin/writer/hbase11xsqlwriter/Key.java | 7 + .../hbase11xsqlwriter/ThinClientPTable.java | 402 ++++++++++++++++++ .../plugin/rdbms/util/SqlFormatUtil.java | 359 ---------------- 8 files changed, 644 insertions(+), 390 deletions(-) create mode 100644 hbase11xsqlwriter/src/main/java/com/alibaba/datax/plugin/writer/hbase11xsqlwriter/ThinClientPTable.java delete mode 100755 plugin-rdbms-util/src/main/java/com/alibaba/datax/plugin/rdbms/util/SqlFormatUtil.java diff --git a/hbase11xsqlwriter/pom.xml b/hbase11xsqlwriter/pom.xml index 0b8a2d51..3b75167e 100644 --- a/hbase11xsqlwriter/pom.xml +++ b/hbase11xsqlwriter/pom.xml @@ -18,6 +18,8 @@ 4.11.0-HBase-1.1 2.7.1 1.8 + 3.2.0 + 4.4.1 @@ -47,6 +49,11 @@ phoenix-core ${phoenix.version} + + org.apache.phoenix + phoenix-queryserver-client + ${phoenix.version} + com.google.guava guava @@ -58,6 +65,21 @@ ${commons-codec.version} + + + org.apache.httpcomponents + httpclient + ${httpclient.version} + + + + + com.google.protobuf + protobuf-java + ${protobuf.version} + + + junit diff --git a/hbase11xsqlwriter/src/main/java/com/alibaba/datax/plugin/writer/hbase11xsqlwriter/Constant.java b/hbase11xsqlwriter/src/main/java/com/alibaba/datax/plugin/writer/hbase11xsqlwriter/Constant.java index d45d30e1..5812655d 100755 --- a/hbase11xsqlwriter/src/main/java/com/alibaba/datax/plugin/writer/hbase11xsqlwriter/Constant.java +++ b/hbase11xsqlwriter/src/main/java/com/alibaba/datax/plugin/writer/hbase11xsqlwriter/Constant.java @@ -8,6 +8,7 @@ public final class Constant { public static final boolean DEFAULT_LAST_COLUMN_IS_VERSION = false; // 默认最后一列不是version列 public static final int DEFAULT_BATCH_ROW_COUNT = 256; // 默认一次写256行 public static final boolean DEFAULT_TRUNCATE = false; // 默认开始的时候不清空表 + public static final boolean DEFAULT_USE_THIN_CLIENT = false; // 默认不用thin客户端 public static final int TYPE_UNSIGNED_TINYINT = 11; public static final int TYPE_UNSIGNED_SMALLINT = 13; diff --git a/hbase11xsqlwriter/src/main/java/com/alibaba/datax/plugin/writer/hbase11xsqlwriter/HbaseSQLHelper.java b/hbase11xsqlwriter/src/main/java/com/alibaba/datax/plugin/writer/hbase11xsqlwriter/HbaseSQLHelper.java index 6146ac8d..41e57d4e 100644 --- a/hbase11xsqlwriter/src/main/java/com/alibaba/datax/plugin/writer/hbase11xsqlwriter/HbaseSQLHelper.java +++ b/hbase11xsqlwriter/src/main/java/com/alibaba/datax/plugin/writer/hbase11xsqlwriter/HbaseSQLHelper.java @@ -11,6 +11,7 @@ import org.apache.phoenix.jdbc.PhoenixConnection; import org.apache.phoenix.schema.ColumnNotFoundException; import org.apache.phoenix.schema.MetaDataClient; import org.apache.phoenix.schema.PTable; +import org.apache.phoenix.schema.types.PDataType; import org.apache.phoenix.util.SchemaUtil; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -18,7 +19,11 @@ import org.slf4j.LoggerFactory; import java.io.IOException; import java.sql.Connection; import java.sql.DriverManager; +import java.sql.ResultSet; +import java.sql.ResultSetMetaData; import java.sql.SQLException; +import java.sql.Statement; +import java.util.HashMap; import java.util.List; import java.util.Map; @@ -28,6 +33,8 @@ import java.util.Map; public class HbaseSQLHelper { private static final Logger LOG = LoggerFactory.getLogger(HbaseSQLHelper.class); + public static ThinClientPTable ptable; + /** * 将datax的配置解析成sql writer的配置 */ @@ -53,6 +60,11 @@ public class HbaseSQLHelper { return new Pair(zkQuorum, znode); } + public static Map getThinConnectConfig(String hbaseCfgString) { + assert hbaseCfgString != null; + return JSON.parseObject(hbaseCfgString, new TypeReference>() {}); + } + /** * 校验配置 */ @@ -61,12 +73,12 @@ public class HbaseSQLHelper { Connection conn = getJdbcConnection(cfg); // 检查表:存在,可用 - checkTable(conn, cfg.getTableName()); + checkTable(conn, cfg.getNamespace(), cfg.getTableName(), cfg.isThinClient()); // 校验元数据:配置中给出的列必须是目的表中已经存在的列 PTable schema = null; try { - schema = getTableSchema(conn, cfg.getTableName()); + schema = getTableSchema(conn, cfg.getNamespace(), cfg.getTableName(), cfg.isThinClient()); } catch (SQLException e) { throw DataXException.asDataXException(HbaseSQLWriterErrorCode.GET_HBASE_CONNECTION_ERROR, "无法获取目的表" + cfg.getTableName() + "的元数据信息,表可能不是SQL表或表名配置错误,请检查您的配置 或者 联系 HBase 管理员.", e); @@ -97,7 +109,11 @@ public class HbaseSQLHelper { Connection conn; try { Class.forName("org.apache.phoenix.jdbc.PhoenixDriver"); - conn = DriverManager.getConnection(connStr); + if (cfg.isThinClient()) { + conn = getThinClientJdbcConnection(cfg); + } else { + conn = DriverManager.getConnection(connStr); + } conn.setAutoCommit(false); } catch (Throwable e) { throw DataXException.asDataXException(HbaseSQLWriterErrorCode.GET_HBASE_CONNECTION_ERROR, @@ -107,6 +123,32 @@ public class HbaseSQLHelper { return conn; } + /** + * 创建 thin client jdbc连接 + * @param cfg + * @return + * @throws SQLException + */ + public static Connection getThinClientJdbcConnection(HbaseSQLWriterConfig cfg) throws SQLException { + String connStr = cfg.getConnectionString(); + LOG.info("Connecting to HBase cluster [" + connStr + "] use thin client ..."); + Connection conn = DriverManager.getConnection(connStr, cfg.getUsername(), cfg.getPassword()); + String userNamespaceQuery = "use " + cfg.getNamespace(); + Statement statement = null; + try { + statement = conn.createStatement(); + statement.executeUpdate(userNamespaceQuery); + return conn; + } catch (Exception e) { + throw DataXException.asDataXException(HbaseSQLWriterErrorCode.GET_HBASE_CONNECTION_ERROR, + "无法连接配置的namespace, 请检查配置 或者 联系 HBase 管理员.", e); + } finally { + if (statement != null) { + statement.close(); + } + } + } + /** * 获取一张表的元数据信息 * @param conn hbsae sql的jdbc连接 @@ -121,6 +163,70 @@ public class HbaseSQLHelper { return mdc.updateCache(schemaName, tableName).getTable(); } + /** + * 获取一张表的元数据信息 + * @param conn + * @param namespace + * @param fullTableName + * @param isThinClient 是否使用thin client + * @return 表的元数据 + * @throws SQLException + */ + public static PTable getTableSchema(Connection conn, String namespace, String fullTableName, boolean isThinClient) + throws + SQLException { + LOG.info("Start to get table schema of namespace=" + namespace + " , fullTableName=" + fullTableName); + if (!isThinClient) { + return getTableSchema(conn, fullTableName); + } else { + if (ptable == null) { + ResultSet result = conn.getMetaData().getColumns(null, namespace, fullTableName, null); + try { + ThinClientPTable retTable = new ThinClientPTable(); + retTable.setColTypeMap(parseColType(result)); + ptable = retTable; + }finally { + if (result != null) { + result.close(); + } + } + } + return ptable; + } + + } + + /** + * 解析字段 + * @param rs + * @return + * @throws SQLException + */ + public static Map parseColType(ResultSet rs) throws SQLException { + Map cols = new HashMap(); + ResultSetMetaData md = rs.getMetaData(); + int columnCount = md.getColumnCount(); + + while (rs.next()) { + String colName = null; + PDataType colType = null; + for (int i = 1; i <= columnCount; i++) { + if (md.getColumnLabel(i).equals("TYPE_NAME")) { + colType = PDataType.fromSqlTypeName((String) rs.getObject(i)); + } else if (md.getColumnLabel(i).equals("COLUMN_NAME")) { + colName = (String) rs.getObject(i); + } + } + if (colType == null || colName == null) { + throw new SQLException("ColType or colName is null, colType : " + colType + " , colName : " + colName); + } + cols.put(colName, new ThinClientPTable.ThinClientPColumn(colName, colType)); + } + return cols; + } + + /** * 清空表 */ @@ -148,6 +254,24 @@ public class HbaseSQLHelper { } } + /** + * 检查表 + * @param conn + * @param namespace + * @param tableName + * @param isThinClient + * @throws DataXException + */ + public static void checkTable(Connection conn, String namespace, String tableName, boolean isThinClient) + throws DataXException { + if (!isThinClient) { + checkTable(conn, tableName); + } else { + //ignore check table when use thin client + } + } + + /** * 检查表:表要存在,enabled */ diff --git a/hbase11xsqlwriter/src/main/java/com/alibaba/datax/plugin/writer/hbase11xsqlwriter/HbaseSQLWriterConfig.java b/hbase11xsqlwriter/src/main/java/com/alibaba/datax/plugin/writer/hbase11xsqlwriter/HbaseSQLWriterConfig.java index ce8561fe..38ca58a9 100644 --- a/hbase11xsqlwriter/src/main/java/com/alibaba/datax/plugin/writer/hbase11xsqlwriter/HbaseSQLWriterConfig.java +++ b/hbase11xsqlwriter/src/main/java/com/alibaba/datax/plugin/writer/hbase11xsqlwriter/HbaseSQLWriterConfig.java @@ -2,6 +2,7 @@ package com.alibaba.datax.plugin.writer.hbase11xsqlwriter; import com.alibaba.datax.common.exception.DataXException; import com.alibaba.datax.common.util.Configuration; +import com.google.common.base.Strings; import org.apache.commons.lang3.StringUtils; import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.util.Pair; @@ -9,6 +10,7 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.util.List; +import java.util.Map; /** * HBase SQL writer config @@ -30,6 +32,10 @@ public class HbaseSQLWriterConfig { private NullModeType nullMode; private int batchSize; // 一次批量写入多少行 private boolean truncate; // 导入开始前是否要清空目的表 + private boolean isThinClient; + private String namespace; + private String username; + private String password; /** * @return 获取原始的datax配置 @@ -81,6 +87,22 @@ public class HbaseSQLWriterConfig { return truncate; } + public boolean isThinClient() { + return isThinClient; + } + + public String getNamespace() { + return namespace; + } + + public String getPassword() { + return password; + } + + public String getUsername() { + return username; + } + /** * @param dataxCfg * @return @@ -100,6 +122,7 @@ public class HbaseSQLWriterConfig { cfg.nullMode = NullModeType.getByTypeName(dataxCfg.getString(Key.NULL_MODE, Constant.DEFAULT_NULL_MODE)); cfg.batchSize = dataxCfg.getInt(Key.BATCH_SIZE, Constant.DEFAULT_BATCH_ROW_COUNT); cfg.truncate = dataxCfg.getBool(Key.TRUNCATE, Constant.DEFAULT_TRUNCATE); + cfg.isThinClient = dataxCfg.getBool(Key.THIN_CLIENT, Constant.DEFAULT_USE_THIN_CLIENT); // 4. 打印解析出来的配置 LOG.info("HBase SQL writer config parsed:" + cfg.toString()); @@ -117,31 +140,52 @@ public class HbaseSQLWriterConfig { "读 Hbase 时需要配置hbaseConfig,其内容为 Hbase 连接信息,请联系 Hbase PE 获取该信息."); } - // 解析zk服务器和znode信息 - Pair zkCfg; - try { - zkCfg = HbaseSQLHelper.getHbaseConfig(hbaseCfg); - } catch (Throwable t) { - // 解析hbase配置错误 - throw DataXException.asDataXException( + + if (dataxCfg.getBool(Key.THIN_CLIENT, Constant.DEFAULT_USE_THIN_CLIENT)) { + Map thinConnectConfig = HbaseSQLHelper.getThinConnectConfig(hbaseCfg); + String thinConnectStr = thinConnectConfig.get(Key.HBASE_THIN_CONNECT_URL); + cfg.namespace = thinConnectConfig.get(Key.HBASE_THIN_CONNECT_NAMESPACE); + cfg.username = thinConnectConfig.get(Key.HBASE_THIN_CONNECT_USERNAME); + cfg.password = thinConnectConfig.get(Key.HBASE_THIN_CONNECT_PASSWORD); + if (Strings.isNullOrEmpty(thinConnectStr)) { + throw DataXException.asDataXException( + HbaseSQLWriterErrorCode.ILLEGAL_VALUE, + "thinClient=true的轻客户端模式下HBase的hbase.thin.connect.url配置不能为空,请联系HBase PE获取该信息."); + } + if (Strings.isNullOrEmpty(cfg.namespace) || Strings.isNullOrEmpty(cfg.username) || Strings + .isNullOrEmpty(cfg.password)) { + throw DataXException.asDataXException(HbaseSQLWriterErrorCode.ILLEGAL_VALUE, + "thinClient=true的轻客户端模式下HBase的hbase.thin.connect.namespce|username|password配置不能为空,请联系HBase " + + "PE获取该信息."); + } + cfg.connectionString = thinConnectStr; + } else { + // 解析zk服务器和znode信息 + Pair zkCfg; + try { + zkCfg = HbaseSQLHelper.getHbaseConfig(hbaseCfg); + } catch (Throwable t) { + // 解析hbase配置错误 + throw DataXException.asDataXException( HbaseSQLWriterErrorCode.REQUIRED_VALUE, "解析hbaseConfig出错,请确认您配置的hbaseConfig为合法的json数据格式,内容正确."); - } - String zkQuorum = zkCfg.getFirst(); - String znode = zkCfg.getSecond(); - if (zkQuorum == null || zkQuorum.isEmpty()) { - throw DataXException.asDataXException( + } + String zkQuorum = zkCfg.getFirst(); + String znode = zkCfg.getSecond(); + if (zkQuorum == null || zkQuorum.isEmpty()) { + throw DataXException.asDataXException( HbaseSQLWriterErrorCode.ILLEGAL_VALUE, "HBase的hbase.zookeeper.quorum配置不能为空,请联系HBase PE获取该信息."); - } - if (znode == null || znode.isEmpty()) { - throw DataXException.asDataXException( + } + if (znode == null || znode.isEmpty()) { + throw DataXException.asDataXException( HbaseSQLWriterErrorCode.ILLEGAL_VALUE, "HBase的zookeeper.znode.parent配置不能为空,请联系HBase PE获取该信息."); - } + } - // 生成sql使用的连接字符串, 格式: jdbc:phoenix:zk_quorum:2181:/znode_parent - cfg.connectionString = "jdbc:phoenix:" + zkQuorum + ":2181:" + znode; + // 生成sql使用的连接字符串, 格式: jdbc:phoenix:zk_quorum:2181:/znode_parent + cfg.connectionString = "jdbc:phoenix:" + zkQuorum + ":2181:" + znode; + } } private static void parseTableConfig(HbaseSQLWriterConfig cfg, Configuration dataxCfg) { diff --git a/hbase11xsqlwriter/src/main/java/com/alibaba/datax/plugin/writer/hbase11xsqlwriter/HbaseSQLWriterTask.java b/hbase11xsqlwriter/src/main/java/com/alibaba/datax/plugin/writer/hbase11xsqlwriter/HbaseSQLWriterTask.java index 1b00ea3f..0e752b01 100644 --- a/hbase11xsqlwriter/src/main/java/com/alibaba/datax/plugin/writer/hbase11xsqlwriter/HbaseSQLWriterTask.java +++ b/hbase11xsqlwriter/src/main/java/com/alibaba/datax/plugin/writer/hbase11xsqlwriter/HbaseSQLWriterTask.java @@ -157,12 +157,20 @@ public class HbaseSQLWriterTask { private PreparedStatement createPreparedStatement() throws SQLException { // 生成列名集合,列之间用逗号分隔: col1,col2,col3,... StringBuilder columnNamesBuilder = new StringBuilder(); - for (String col : cfg.getColumns()) { - // 列名使用双引号,则不自动转换为全大写,而是保留用户配置的大小写 - columnNamesBuilder.append("\""); - columnNamesBuilder.append(col); - columnNamesBuilder.append("\""); - columnNamesBuilder.append(","); + if (cfg.isThinClient()) { + for (String col : cfg.getColumns()) { + // thin 客户端不使用双引号 + columnNamesBuilder.append(col); + columnNamesBuilder.append(","); + } + } else { + for (String col : cfg.getColumns()) { + // 列名使用双引号,则不自动转换为全大写,而是保留用户配置的大小写 + columnNamesBuilder.append("\""); + columnNamesBuilder.append(col); + columnNamesBuilder.append("\""); + columnNamesBuilder.append(","); + } } columnNamesBuilder.setLength(columnNamesBuilder.length() - 1); // 移除末尾多余的逗号 String columnNames = columnNamesBuilder.toString(); @@ -171,9 +179,13 @@ public class HbaseSQLWriterTask { // 生成UPSERT模板 String tableName = cfg.getTableName(); - // 表名使用双引号,则不自动转换为全大写,而是保留用户配置的大小写 - StringBuilder upsertBuilder = - new StringBuilder("upsert into \"" + tableName + "\" (" + columnNames + " ) values ("); + StringBuilder upsertBuilder = null; + if (cfg.isThinClient()) { + upsertBuilder = new StringBuilder("upsert into " + tableName + " (" + columnNames + " ) values ("); + } else { + // 表名使用双引号,则不自动转换为全大写,而是保留用户配置的大小写 + upsertBuilder = new StringBuilder("upsert into \"" + tableName + "\" (" + columnNames + " ) values ("); + } for (int i = 0; i < cfg.getColumns().size(); i++) { upsertBuilder.append("?,"); } @@ -191,7 +203,8 @@ public class HbaseSQLWriterTask { */ private int[] getColumnSqlType(List columnNames) throws SQLException { int[] types = new int[numberOfColumnsToWrite]; - PTable ptable = HbaseSQLHelper.getTableSchema(connection, cfg.getTableName()); + PTable ptable = HbaseSQLHelper + .getTableSchema(connection, cfg.getNamespace(), cfg.getTableName(), cfg.isThinClient()); for (int i = 0; i < columnNames.size(); i++) { String name = columnNames.get(i); diff --git a/hbase11xsqlwriter/src/main/java/com/alibaba/datax/plugin/writer/hbase11xsqlwriter/Key.java b/hbase11xsqlwriter/src/main/java/com/alibaba/datax/plugin/writer/hbase11xsqlwriter/Key.java index 1b4f3816..131aba66 100755 --- a/hbase11xsqlwriter/src/main/java/com/alibaba/datax/plugin/writer/hbase11xsqlwriter/Key.java +++ b/hbase11xsqlwriter/src/main/java/com/alibaba/datax/plugin/writer/hbase11xsqlwriter/Key.java @@ -10,6 +10,10 @@ public final class Key { public final static String HBASE_CONFIG = "hbaseConfig"; public final static String HBASE_ZK_QUORUM = HConstants.ZOOKEEPER_QUORUM; public final static String HBASE_ZNODE_PARENT = HConstants.ZOOKEEPER_ZNODE_PARENT; + public final static String HBASE_THIN_CONNECT_URL = "hbase.thin.connect.url"; + public final static String HBASE_THIN_CONNECT_NAMESPACE = "hbase.thin.connect.namespace"; + public final static String HBASE_THIN_CONNECT_USERNAME = "hbase.thin.connect.username"; + public final static String HBASE_THIN_CONNECT_PASSWORD = "hbase.thin.connect.password"; /** * 【必选】writer要写入的表的表名 @@ -34,6 +38,9 @@ public final class Key { */ public static final String TRUNCATE = "truncate"; + + public static final String THIN_CLIENT = "thinClient"; + /** * 【可选】批量写入的最大行数,默认100行 */ diff --git a/hbase11xsqlwriter/src/main/java/com/alibaba/datax/plugin/writer/hbase11xsqlwriter/ThinClientPTable.java b/hbase11xsqlwriter/src/main/java/com/alibaba/datax/plugin/writer/hbase11xsqlwriter/ThinClientPTable.java new file mode 100644 index 00000000..49c2e061 --- /dev/null +++ b/hbase11xsqlwriter/src/main/java/com/alibaba/datax/plugin/writer/hbase11xsqlwriter/ThinClientPTable.java @@ -0,0 +1,402 @@ +package com.alibaba.datax.plugin.writer.hbase11xsqlwriter; + +import org.apache.hadoop.hbase.io.ImmutableBytesWritable; +import org.apache.phoenix.hbase.index.util.KeyValueBuilder; +import org.apache.phoenix.index.IndexMaintainer; +import org.apache.phoenix.jdbc.PhoenixConnection; +import org.apache.phoenix.schema.AmbiguousColumnException; +import org.apache.phoenix.schema.ColumnFamilyNotFoundException; +import org.apache.phoenix.schema.ColumnNotFoundException; +import org.apache.phoenix.schema.PColumn; +import org.apache.phoenix.schema.PColumnFamily; +import org.apache.phoenix.schema.PIndexState; +import org.apache.phoenix.schema.PName; +import org.apache.phoenix.schema.PRow; +import org.apache.phoenix.schema.PTable; +import org.apache.phoenix.schema.PTableKey; +import org.apache.phoenix.schema.PTableType; +import org.apache.phoenix.schema.RowKeySchema; +import org.apache.phoenix.schema.SortOrder; +import org.apache.phoenix.schema.types.PDataType; + +import java.util.List; +import java.util.Map; + +public class ThinClientPTable implements PTable { + + private Map colMap; + + public void setColTypeMap(Map colMap) { + this.colMap = colMap; + } + + @Override + public long getTimeStamp() { + throw new UnsupportedOperationException("Not implement"); + } + + @Override + public long getSequenceNumber() { + throw new UnsupportedOperationException("Not implement"); + } + + @Override + public long getIndexDisableTimestamp() { + throw new UnsupportedOperationException("Not implement"); + } + + @Override + public PName getName() { + throw new UnsupportedOperationException("Not implement"); + } + + @Override + public PName getSchemaName() { + throw new UnsupportedOperationException("Not implement"); + } + + @Override + public PName getTableName() { + throw new UnsupportedOperationException("Not implement"); + } + + @Override + public PName getTenantId() { + throw new UnsupportedOperationException("Not implement"); + } + + @Override + public PTableType getType() { + throw new UnsupportedOperationException("Not implement"); + } + + @Override + public PName getPKName() { + throw new UnsupportedOperationException("Not implement"); + } + + @Override + public List getPKColumns() { + throw new UnsupportedOperationException("Not implement"); + } + + @Override + public List getColumns() { + throw new UnsupportedOperationException("Not implement"); + } + + @Override + public List getColumnFamilies() { + throw new UnsupportedOperationException("Not implement"); + } + + @Override + public PColumnFamily getColumnFamily(byte[] bytes) throws ColumnFamilyNotFoundException { + throw new UnsupportedOperationException("Not implement"); + } + + @Override + public PColumnFamily getColumnFamily(String s) throws ColumnFamilyNotFoundException { + throw new UnsupportedOperationException("Not implement"); + } + + @Override + public PColumn getColumnForColumnName(String colname) throws ColumnNotFoundException, AmbiguousColumnException { + if (!colMap.containsKey(colname)) { + throw new ColumnNotFoundException("Col " + colname + " not found"); + } + return colMap.get(colname); + } + + @Override + public PColumn getColumnForColumnQualifier(byte[] bytes, byte[] bytes1) + throws ColumnNotFoundException, AmbiguousColumnException { + throw new UnsupportedOperationException("Not implement"); + } + + @Override + public PColumn getPKColumn(String s) throws ColumnNotFoundException { + throw new UnsupportedOperationException("Not implement"); + } + + @Override + public PRow newRow(KeyValueBuilder keyValueBuilder, long l, ImmutableBytesWritable immutableBytesWritable, boolean b, + byte[]... bytes) { + throw new UnsupportedOperationException("Not implement"); + } + + @Override + public PRow newRow(KeyValueBuilder keyValueBuilder, ImmutableBytesWritable immutableBytesWritable, boolean b, + byte[]... bytes) { + throw new UnsupportedOperationException("Not implement"); + } + + @Override + public int newKey(ImmutableBytesWritable immutableBytesWritable, byte[][] bytes) { + throw new UnsupportedOperationException("Not implement"); + } + + @Override + public RowKeySchema getRowKeySchema() { + throw new UnsupportedOperationException("Not implement"); + } + + @Override + public Integer getBucketNum() { + throw new UnsupportedOperationException("Not implement"); + } + + @Override + public List getIndexes() { + throw new UnsupportedOperationException("Not implement"); + } + + @Override + public PIndexState getIndexState() { + throw new UnsupportedOperationException("Not implement"); + } + + @Override + public PName getParentName() { + throw new UnsupportedOperationException("Not implement"); + } + + @Override + public PName getParentTableName() { + throw new UnsupportedOperationException("Not implement"); + } + + @Override + public PName getParentSchemaName() { + throw new UnsupportedOperationException("Not implement"); + } + + @Override + public List getPhysicalNames() { + throw new UnsupportedOperationException("Not implement"); + } + + @Override + public PName getPhysicalName() { + throw new UnsupportedOperationException("Not implement"); + } + + @Override + public boolean isImmutableRows() { + throw new UnsupportedOperationException("Not implement"); + } + + @Override + public boolean getIndexMaintainers(ImmutableBytesWritable immutableBytesWritable, + PhoenixConnection phoenixConnection) { + throw new UnsupportedOperationException("Not implement"); + } + + @Override + public IndexMaintainer getIndexMaintainer(PTable pTable, PhoenixConnection phoenixConnection) { + throw new UnsupportedOperationException("Not implement"); + } + + @Override + public PName getDefaultFamilyName() { + return null; + } + + @Override + public boolean isWALDisabled() { + throw new UnsupportedOperationException("Not implement"); + } + + @Override + public boolean isMultiTenant() { + throw new UnsupportedOperationException("Not implement"); + } + + @Override + public boolean getStoreNulls() { + throw new UnsupportedOperationException("Not implement"); + } + + @Override + public boolean isTransactional() { + throw new UnsupportedOperationException("Not implement"); + } + + @Override + public ViewType getViewType() { + throw new UnsupportedOperationException("Not implement"); + } + + @Override + public String getViewStatement() { + throw new UnsupportedOperationException("Not implement"); + } + + @Override + public Short getViewIndexId() { + throw new UnsupportedOperationException("Not implement"); + } + + @Override + public PTableKey getKey() { + throw new UnsupportedOperationException("Not implement"); + } + + @Override + public IndexType getIndexType() { + throw new UnsupportedOperationException("Not implement"); + } + + @Override + public int getBaseColumnCount() { + throw new UnsupportedOperationException("Not implement"); + } + + @Override + public boolean rowKeyOrderOptimizable() { + throw new UnsupportedOperationException("Not implement"); + } + + @Override + public int getRowTimestampColPos() { + throw new UnsupportedOperationException("Not implement"); + } + + @Override + public long getUpdateCacheFrequency() { + throw new UnsupportedOperationException("Not implement"); + } + + @Override + public boolean isNamespaceMapped() { + throw new UnsupportedOperationException("Not implement"); + } + + @Override + public String getAutoPartitionSeqName() { + throw new UnsupportedOperationException("Not implement"); + } + + @Override + public boolean isAppendOnlySchema() { + throw new UnsupportedOperationException("Not implement"); + } + + @Override + public ImmutableStorageScheme getImmutableStorageScheme() { + throw new UnsupportedOperationException("Not implement"); + } + + @Override + public QualifierEncodingScheme getEncodingScheme() { + throw new UnsupportedOperationException("Not implement"); + } + + @Override + public EncodedCQCounter getEncodedCQCounter() { + throw new UnsupportedOperationException("Not implement"); + } + + @Override + public boolean useStatsForParallelization() { + throw new UnsupportedOperationException("Not implement"); + } + + @Override + public int getEstimatedSize() { + throw new UnsupportedOperationException("Not implement"); + } + + public static class ThinClientPColumn implements PColumn { + + private String colName; + + private PDataType pDataType; + + public ThinClientPColumn(String colName, PDataType pDataType) { + this.colName = colName; + this.pDataType = pDataType; + } + + @Override + public PName getName() { + throw new UnsupportedOperationException("Not implement"); + } + + @Override + public PName getFamilyName() { + throw new UnsupportedOperationException("Not implement"); + } + + @Override + public int getPosition() { + throw new UnsupportedOperationException("Not implement"); + } + + @Override + public Integer getArraySize() { + throw new UnsupportedOperationException("Not implement"); + } + + @Override + public byte[] getViewConstant() { + throw new UnsupportedOperationException("Not implement"); + } + + @Override + public boolean isViewReferenced() { + throw new UnsupportedOperationException("Not implement"); + } + + @Override + public int getEstimatedSize() { + throw new UnsupportedOperationException("Not implement"); + } + + @Override + public String getExpressionStr() { + throw new UnsupportedOperationException("Not implement"); + } + + @Override + public boolean isRowTimestamp() { + throw new UnsupportedOperationException("Not implement"); + } + + @Override + public boolean isDynamic() { + throw new UnsupportedOperationException("Not implement"); + } + + @Override + public byte[] getColumnQualifierBytes() { + throw new UnsupportedOperationException("Not implement"); + } + + @Override + public boolean isNullable() { + throw new UnsupportedOperationException("Not implement"); + } + + @Override + public PDataType getDataType() { + return pDataType; + } + + @Override + public Integer getMaxLength() { + throw new UnsupportedOperationException("Not implement"); + } + + @Override + public Integer getScale() { + throw new UnsupportedOperationException("Not implement"); + } + + @Override + public SortOrder getSortOrder() { + throw new UnsupportedOperationException("Not implement"); + } + } + +} diff --git a/plugin-rdbms-util/src/main/java/com/alibaba/datax/plugin/rdbms/util/SqlFormatUtil.java b/plugin-rdbms-util/src/main/java/com/alibaba/datax/plugin/rdbms/util/SqlFormatUtil.java deleted file mode 100755 index 76137d31..00000000 --- a/plugin-rdbms-util/src/main/java/com/alibaba/datax/plugin/rdbms/util/SqlFormatUtil.java +++ /dev/null @@ -1,359 +0,0 @@ -package com.alibaba.datax.plugin.rdbms.util; - -import java.util.HashSet; -import java.util.LinkedList; -import java.util.Set; -import java.util.StringTokenizer; - -// TODO delete it -public class SqlFormatUtil { - - private static final Set BEGIN_CLAUSES = new HashSet(); - private static final Set END_CLAUSES = new HashSet(); - private static final Set LOGICAL = new HashSet(); - private static final Set QUANTIFIERS = new HashSet(); - private static final Set DML = new HashSet(); - private static final Set MISC = new HashSet(); - - private static final String WHITESPACE = " \n\r\f\t"; - - static { - BEGIN_CLAUSES.add("left"); - BEGIN_CLAUSES.add("right"); - BEGIN_CLAUSES.add("inner"); - BEGIN_CLAUSES.add("outer"); - BEGIN_CLAUSES.add("group"); - BEGIN_CLAUSES.add("order"); - - END_CLAUSES.add("where"); - END_CLAUSES.add("set"); - END_CLAUSES.add("having"); - END_CLAUSES.add("join"); - END_CLAUSES.add("from"); - END_CLAUSES.add("by"); - END_CLAUSES.add("join"); - END_CLAUSES.add("into"); - END_CLAUSES.add("union"); - - LOGICAL.add("and"); - LOGICAL.add("or"); - LOGICAL.add("when"); - LOGICAL.add("else"); - LOGICAL.add("end"); - - QUANTIFIERS.add("in"); - QUANTIFIERS.add("all"); - QUANTIFIERS.add("exists"); - QUANTIFIERS.add("some"); - QUANTIFIERS.add("any"); - - DML.add("insert"); - DML.add("update"); - DML.add("delete"); - - MISC.add("select"); - MISC.add("on"); - } - - static final String indentString = " "; - static final String initial = "\n "; - - public static String format(String source) { - return new FormatProcess(source).perform(); - } - - private static class FormatProcess { - boolean beginLine = true; - boolean afterBeginBeforeEnd = false; - boolean afterByOrSetOrFromOrSelect = false; - boolean afterValues = false; - boolean afterOn = false; - boolean afterBetween = false; - boolean afterInsert = false; - int inFunction = 0; - int parensSinceSelect = 0; - private LinkedList parenCounts = new LinkedList(); - private LinkedList afterByOrFromOrSelects = new LinkedList(); - - int indent = 1; - - StringBuilder result = new StringBuilder(); - StringTokenizer tokens; - String lastToken; - String token; - String lcToken; - - public FormatProcess(String sql) { - tokens = new StringTokenizer(sql, "()+*/-=<>'`\"[]," + WHITESPACE, - true); - } - - public String perform() { - - result.append(initial); - - while (tokens.hasMoreTokens()) { - token = tokens.nextToken(); - lcToken = token.toLowerCase(); - - if ("'".equals(token)) { - String t; - do { - t = tokens.nextToken(); - token += t; - } while (!"'".equals(t) && tokens.hasMoreTokens()); // cannot - // handle - // single - // quotes - } else if ("\"".equals(token)) { - String t; - do { - t = tokens.nextToken(); - token += t; - } while (!"\"".equals(t)); - } - - if (afterByOrSetOrFromOrSelect && ",".equals(token)) { - commaAfterByOrFromOrSelect(); - } else if (afterOn && ",".equals(token)) { - commaAfterOn(); - } - - else if ("(".equals(token)) { - openParen(); - } else if (")".equals(token)) { - closeParen(); - } - - else if (BEGIN_CLAUSES.contains(lcToken)) { - beginNewClause(); - } - - else if (END_CLAUSES.contains(lcToken)) { - endNewClause(); - } - - else if ("select".equals(lcToken)) { - select(); - } - - else if (DML.contains(lcToken)) { - updateOrInsertOrDelete(); - } - - else if ("values".equals(lcToken)) { - values(); - } - - else if ("on".equals(lcToken)) { - on(); - } - - else if (afterBetween && lcToken.equals("and")) { - misc(); - afterBetween = false; - } - - else if (LOGICAL.contains(lcToken)) { - logical(); - } - - else if (isWhitespace(token)) { - white(); - } - - else { - misc(); - } - - if (!isWhitespace(token)) { - lastToken = lcToken; - } - - } - return result.toString(); - } - - private void commaAfterOn() { - out(); - indent--; - newline(); - afterOn = false; - afterByOrSetOrFromOrSelect = true; - } - - private void commaAfterByOrFromOrSelect() { - out(); - newline(); - } - - private void logical() { - if ("end".equals(lcToken)) { - indent--; - } - newline(); - out(); - beginLine = false; - } - - private void on() { - indent++; - afterOn = true; - newline(); - out(); - beginLine = false; - } - - private void misc() { - out(); - if ("between".equals(lcToken)) { - afterBetween = true; - } - if (afterInsert) { - newline(); - afterInsert = false; - } else { - beginLine = false; - if ("case".equals(lcToken)) { - indent++; - } - } - } - - private void white() { - if (!beginLine) { - result.append(" "); - } - } - - private void updateOrInsertOrDelete() { - out(); - indent++; - beginLine = false; - if ("update".equals(lcToken)) { - newline(); - } - if ("insert".equals(lcToken)) { - afterInsert = true; - } - } - - private void select() { - out(); - indent++; - newline(); - parenCounts.addLast(Integer.valueOf(parensSinceSelect)); - afterByOrFromOrSelects.addLast(Boolean - .valueOf(afterByOrSetOrFromOrSelect)); - parensSinceSelect = 0; - afterByOrSetOrFromOrSelect = true; - } - - private void out() { - result.append(token); - } - - private void endNewClause() { - if (!afterBeginBeforeEnd) { - indent--; - if (afterOn) { - indent--; - afterOn = false; - } - newline(); - } - out(); - if (!"union".equals(lcToken)) { - indent++; - } - newline(); - afterBeginBeforeEnd = false; - afterByOrSetOrFromOrSelect = "by".equals(lcToken) - || "set".equals(lcToken) || "from".equals(lcToken); - } - - private void beginNewClause() { - if (!afterBeginBeforeEnd) { - if (afterOn) { - indent--; - afterOn = false; - } - indent--; - newline(); - } - out(); - beginLine = false; - afterBeginBeforeEnd = true; - } - - private void values() { - indent--; - newline(); - out(); - indent++; - newline(); - afterValues = true; - } - - private void closeParen() { - parensSinceSelect--; - if (parensSinceSelect < 0) { - indent--; - parensSinceSelect = parenCounts.removeLast().intValue(); - afterByOrSetOrFromOrSelect = afterByOrFromOrSelects - .removeLast().booleanValue(); - } - if (inFunction > 0) { - inFunction--; - out(); - } else { - if (!afterByOrSetOrFromOrSelect) { - indent--; - newline(); - } - out(); - } - beginLine = false; - } - - private void openParen() { - if (isFunctionName(lastToken) || inFunction > 0) { - inFunction++; - } - beginLine = false; - if (inFunction > 0) { - out(); - } else { - out(); - if (!afterByOrSetOrFromOrSelect) { - indent++; - newline(); - beginLine = true; - } - } - parensSinceSelect++; - } - - private static boolean isFunctionName(String tok) { - final char begin = tok.charAt(0); - final boolean isIdentifier = Character.isJavaIdentifierStart(begin) - || '"' == begin; - return isIdentifier && !LOGICAL.contains(tok) - && !END_CLAUSES.contains(tok) && !QUANTIFIERS.contains(tok) - && !DML.contains(tok) && !MISC.contains(tok); - } - - private static boolean isWhitespace(String token) { - return WHITESPACE.indexOf(token) >= 0; - } - - private void newline() { - result.append("\n"); - for (int i = 0; i < indent; i++) { - result.append(indentString); - } - beginLine = true; - } - } - -}