diff --git a/README.md b/README.md
index 01bbc3ea..89aeb87c 100644
--- a/README.md
+++ b/README.md
@@ -26,7 +26,7 @@ DataX本身作为数据同步框架,将不同数据源的同步抽象为从源
# Quick Start
-##### Download [DataX下载地址](https://datax-opensource.oss-cn-hangzhou.aliyuncs.com/202303/datax.tar.gz)
+##### Download [DataX下载地址](https://datax-opensource.oss-cn-hangzhou.aliyuncs.com/202308/datax.tar.gz)
##### 请点击:[Quick Start](https://github.com/alibaba/DataX/blob/master/userGuid.md)
@@ -37,47 +37,48 @@ DataX本身作为数据同步框架,将不同数据源的同步抽象为从源
DataX目前已经有了比较全面的插件体系,主流的RDBMS数据库、NOSQL、大数据计算系统都已经接入,目前支持数据如下图,详情请点击:[DataX数据源参考指南](https://github.com/alibaba/DataX/wiki/DataX-all-data-channels)
-| 类型 | 数据源 | Reader(读) | Writer(写) | 文档 |
+| 类型 | 数据源 | Reader(读) | Writer(写) | 文档 |
|--------------|---------------------------|:---------:|:---------:|:----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------:|
-| RDBMS 关系型数据库 | MySQL | √ | √ | [读](https://github.com/alibaba/DataX/blob/master/mysqlreader/doc/mysqlreader.md) 、[写](https://github.com/alibaba/DataX/blob/master/mysqlwriter/doc/mysqlwriter.md) |
-| | Oracle | √ | √ | [读](https://github.com/alibaba/DataX/blob/master/oraclereader/doc/oraclereader.md) 、[写](https://github.com/alibaba/DataX/blob/master/oraclewriter/doc/oraclewriter.md) |
-| | OceanBase | √ | √ | [读](https://open.oceanbase.com/docs/community/oceanbase-database/V3.1.0/use-datax-to-full-migration-data-to-oceanbase) 、[写](https://open.oceanbase.com/docs/community/oceanbase-database/V3.1.0/use-datax-to-full-migration-data-to-oceanbase) |
-| | SQLServer | √ | √ | [读](https://github.com/alibaba/DataX/blob/master/sqlserverreader/doc/sqlserverreader.md) 、[写](https://github.com/alibaba/DataX/blob/master/sqlserverwriter/doc/sqlserverwriter.md) |
-| | PostgreSQL | √ | √ | [读](https://github.com/alibaba/DataX/blob/master/postgresqlreader/doc/postgresqlreader.md) 、[写](https://github.com/alibaba/DataX/blob/master/postgresqlwriter/doc/postgresqlwriter.md) |
-| | DRDS | √ | √ | [读](https://github.com/alibaba/DataX/blob/master/drdsreader/doc/drdsreader.md) 、[写](https://github.com/alibaba/DataX/blob/master/drdswriter/doc/drdswriter.md) |
-| | Kingbase | √ | √ | [读](https://github.com/alibaba/DataX/blob/master/drdsreader/doc/drdsreader.md) 、[写](https://github.com/alibaba/DataX/blob/master/drdswriter/doc/drdswriter.md) |
-| | 通用RDBMS(支持所有关系型数据库) | √ | √ | [读](https://github.com/alibaba/DataX/blob/master/rdbmsreader/doc/rdbmsreader.md) 、[写](https://github.com/alibaba/DataX/blob/master/rdbmswriter/doc/rdbmswriter.md) |
-| 阿里云数仓数据存储 | ODPS | √ | √ | [读](https://github.com/alibaba/DataX/blob/master/odpsreader/doc/odpsreader.md) 、[写](https://github.com/alibaba/DataX/blob/master/odpswriter/doc/odpswriter.md) |
-| | ADB | | √ | [写](https://github.com/alibaba/DataX/blob/master/adbmysqlwriter/doc/adbmysqlwriter.md) |
-| | ADS | | √ | [写](https://github.com/alibaba/DataX/blob/master/adswriter/doc/adswriter.md) |
-| | OSS | √ | √ | [读](https://github.com/alibaba/DataX/blob/master/ossreader/doc/ossreader.md) 、[写](https://github.com/alibaba/DataX/blob/master/osswriter/doc/osswriter.md) |
-| | OCS | | √ | [写](https://github.com/alibaba/DataX/blob/master/ocswriter/doc/ocswriter.md) |
-| | Hologres | | √ | [写](https://github.com/alibaba/DataX/blob/master/hologresjdbcwriter/doc/hologresjdbcwriter.md) |
-| | AnalyticDB For PostgreSQL | | √ | 写 |
-| 阿里云中间件 | datahub | √ | √ | 读 、写 |
-| | SLS | √ | √ | 读 、写 |
-| 阿里云图数据库 | GDB | √ | √ | [读](https://github.com/alibaba/DataX/blob/master/gdbreader/doc/gdbreader.md) 、[写](https://github.com/alibaba/DataX/blob/master/gdbwriter/doc/gdbwriter.md) |
-| NoSQL数据存储 | OTS | √ | √ | [读](https://github.com/alibaba/DataX/blob/master/otsreader/doc/otsreader.md) 、[写](https://github.com/alibaba/DataX/blob/master/otswriter/doc/otswriter.md) |
-| | Hbase0.94 | √ | √ | [读](https://github.com/alibaba/DataX/blob/master/hbase094xreader/doc/hbase094xreader.md) 、[写](https://github.com/alibaba/DataX/blob/master/hbase094xwriter/doc/hbase094xwriter.md) |
-| | Hbase1.1 | √ | √ | [读](https://github.com/alibaba/DataX/blob/master/hbase11xreader/doc/hbase11xreader.md) 、[写](https://github.com/alibaba/DataX/blob/master/hbase11xwriter/doc/hbase11xwriter.md) |
-| | Phoenix4.x | √ | √ | [读](https://github.com/alibaba/DataX/blob/master/hbase11xsqlreader/doc/hbase11xsqlreader.md) 、[写](https://github.com/alibaba/DataX/blob/master/hbase11xsqlwriter/doc/hbase11xsqlwriter.md) |
-| | Phoenix5.x | √ | √ | [读](https://github.com/alibaba/DataX/blob/master/hbase20xsqlreader/doc/hbase20xsqlreader.md) 、[写](https://github.com/alibaba/DataX/blob/master/hbase20xsqlwriter/doc/hbase20xsqlwriter.md) |
-| | MongoDB | √ | √ | [读](https://github.com/alibaba/DataX/blob/master/mongodbreader/doc/mongodbreader.md) 、[写](https://github.com/alibaba/DataX/blob/master/mongodbwriter/doc/mongodbwriter.md) |
-| | Cassandra | √ | √ | [读](https://github.com/alibaba/DataX/blob/master/cassandrareader/doc/cassandrareader.md) 、[写](https://github.com/alibaba/DataX/blob/master/cassandrawriter/doc/cassandrawriter.md) |
-| 数仓数据存储 | StarRocks | √ | √ | 读 、[写](https://github.com/alibaba/DataX/blob/master/starrockswriter/doc/starrockswriter.md) |
-| | ApacheDoris | | √ | [写](https://github.com/alibaba/DataX/blob/master/doriswriter/doc/doriswriter.md) |
-| | ClickHouse | | √ | 写 |
-| | Databend | | √ | [写](https://github.com/alibaba/DataX/blob/master/databendwriter/doc/databendwriter.md) |
-| | Hive | √ | √ | [读](https://github.com/alibaba/DataX/blob/master/hdfsreader/doc/hdfsreader.md) 、[写](https://github.com/alibaba/DataX/blob/master/hdfswriter/doc/hdfswriter.md) |
-| | kudu | | √ | [写](https://github.com/alibaba/DataX/blob/master/hdfswriter/doc/hdfswriter.md) |
-| | selectdb | | √ | [写](https://github.com/alibaba/DataX/blob/master/selectdbwriter/doc/selectdbwriter.md) |
-| 无结构化数据存储 | TxtFile | √ | √ | [读](https://github.com/alibaba/DataX/blob/master/txtfilereader/doc/txtfilereader.md) 、[写](https://github.com/alibaba/DataX/blob/master/txtfilewriter/doc/txtfilewriter.md) |
-| | FTP | √ | √ | [读](https://github.com/alibaba/DataX/blob/master/ftpreader/doc/ftpreader.md) 、[写](https://github.com/alibaba/DataX/blob/master/ftpwriter/doc/ftpwriter.md) |
-| | HDFS | √ | √ | [读](https://github.com/alibaba/DataX/blob/master/hdfsreader/doc/hdfsreader.md) 、[写](https://github.com/alibaba/DataX/blob/master/hdfswriter/doc/hdfswriter.md) |
-| | Elasticsearch | | √ | [写](https://github.com/alibaba/DataX/blob/master/elasticsearchwriter/doc/elasticsearchwriter.md) |
-| 时间序列数据库 | OpenTSDB | √ | | [读](https://github.com/alibaba/DataX/blob/master/opentsdbreader/doc/opentsdbreader.md) |
-| | TSDB | √ | √ | [读](https://github.com/alibaba/DataX/blob/master/tsdbreader/doc/tsdbreader.md) 、[写](https://github.com/alibaba/DataX/blob/master/tsdbwriter/doc/tsdbhttpwriter.md) |
-| | TDengine | √ | √ | [读](https://github.com/alibaba/DataX/blob/master/tdenginereader/doc/tdenginereader-CN.md) 、[写](https://github.com/alibaba/DataX/blob/master/tdenginewriter/doc/tdenginewriter-CN.md) |
+| RDBMS 关系型数据库 | MySQL | √ | √ | [读](https://github.com/alibaba/DataX/blob/master/mysqlreader/doc/mysqlreader.md) 、[写](https://github.com/alibaba/DataX/blob/master/mysqlwriter/doc/mysqlwriter.md) |
+| | Oracle | √ | √ | [读](https://github.com/alibaba/DataX/blob/master/oraclereader/doc/oraclereader.md) 、[写](https://github.com/alibaba/DataX/blob/master/oraclewriter/doc/oraclewriter.md) |
+| | OceanBase | √ | √ | [读](https://open.oceanbase.com/docs/community/oceanbase-database/V3.1.0/use-datax-to-full-migration-data-to-oceanbase) 、[写](https://open.oceanbase.com/docs/community/oceanbase-database/V3.1.0/use-datax-to-full-migration-data-to-oceanbase) |
+| | SQLServer | √ | √ | [读](https://github.com/alibaba/DataX/blob/master/sqlserverreader/doc/sqlserverreader.md) 、[写](https://github.com/alibaba/DataX/blob/master/sqlserverwriter/doc/sqlserverwriter.md) |
+| | PostgreSQL | √ | √ | [读](https://github.com/alibaba/DataX/blob/master/postgresqlreader/doc/postgresqlreader.md) 、[写](https://github.com/alibaba/DataX/blob/master/postgresqlwriter/doc/postgresqlwriter.md) |
+| | DRDS | √ | √ | [读](https://github.com/alibaba/DataX/blob/master/drdsreader/doc/drdsreader.md) 、[写](https://github.com/alibaba/DataX/blob/master/drdswriter/doc/drdswriter.md) |
+| | Kingbase | √ | √ | [读](https://github.com/alibaba/DataX/blob/master/drdsreader/doc/drdsreader.md) 、[写](https://github.com/alibaba/DataX/blob/master/drdswriter/doc/drdswriter.md) |
+| | 通用RDBMS(支持所有关系型数据库) | √ | √ | [读](https://github.com/alibaba/DataX/blob/master/rdbmsreader/doc/rdbmsreader.md) 、[写](https://github.com/alibaba/DataX/blob/master/rdbmswriter/doc/rdbmswriter.md) |
+| 阿里云数仓数据存储 | ODPS | √ | √ | [读](https://github.com/alibaba/DataX/blob/master/odpsreader/doc/odpsreader.md) 、[写](https://github.com/alibaba/DataX/blob/master/odpswriter/doc/odpswriter.md) |
+| | ADB | | √ | [写](https://github.com/alibaba/DataX/blob/master/adbmysqlwriter/doc/adbmysqlwriter.md) |
+| | ADS | | √ | [写](https://github.com/alibaba/DataX/blob/master/adswriter/doc/adswriter.md) |
+| | OSS | √ | √ | [读](https://github.com/alibaba/DataX/blob/master/ossreader/doc/ossreader.md) 、[写](https://github.com/alibaba/DataX/blob/master/osswriter/doc/osswriter.md) |
+| | OCS | | √ | [写](https://github.com/alibaba/DataX/blob/master/ocswriter/doc/ocswriter.md) |
+| | Hologres | | √ | [写](https://github.com/alibaba/DataX/blob/master/hologresjdbcwriter/doc/hologresjdbcwriter.md) |
+| | AnalyticDB For PostgreSQL | | √ | 写 |
+| 阿里云中间件 | datahub | √ | √ | 读 、写 |
+| | SLS | √ | √ | 读 、写 |
+| 图数据库 | 阿里云 GDB | √ | √ | [读](https://github.com/alibaba/DataX/blob/master/gdbreader/doc/gdbreader.md) 、[写](https://github.com/alibaba/DataX/blob/master/gdbwriter/doc/gdbwriter.md) |
+| | Neo4j | | √ | [写](https://github.com/alibaba/DataX/blob/master/neo4jwriter/doc/neo4jwriter.md) |
+| NoSQL数据存储 | OTS | √ | √ | [读](https://github.com/alibaba/DataX/blob/master/otsreader/doc/otsreader.md) 、[写](https://github.com/alibaba/DataX/blob/master/otswriter/doc/otswriter.md) |
+| | Hbase0.94 | √ | √ | [读](https://github.com/alibaba/DataX/blob/master/hbase094xreader/doc/hbase094xreader.md) 、[写](https://github.com/alibaba/DataX/blob/master/hbase094xwriter/doc/hbase094xwriter.md) |
+| | Hbase1.1 | √ | √ | [读](https://github.com/alibaba/DataX/blob/master/hbase11xreader/doc/hbase11xreader.md) 、[写](https://github.com/alibaba/DataX/blob/master/hbase11xwriter/doc/hbase11xwriter.md) |
+| | Phoenix4.x | √ | √ | [读](https://github.com/alibaba/DataX/blob/master/hbase11xsqlreader/doc/hbase11xsqlreader.md) 、[写](https://github.com/alibaba/DataX/blob/master/hbase11xsqlwriter/doc/hbase11xsqlwriter.md) |
+| | Phoenix5.x | √ | √ | [读](https://github.com/alibaba/DataX/blob/master/hbase20xsqlreader/doc/hbase20xsqlreader.md) 、[写](https://github.com/alibaba/DataX/blob/master/hbase20xsqlwriter/doc/hbase20xsqlwriter.md) |
+| | MongoDB | √ | √ | [读](https://github.com/alibaba/DataX/blob/master/mongodbreader/doc/mongodbreader.md) 、[写](https://github.com/alibaba/DataX/blob/master/mongodbwriter/doc/mongodbwriter.md) |
+| | Cassandra | √ | √ | [读](https://github.com/alibaba/DataX/blob/master/cassandrareader/doc/cassandrareader.md) 、[写](https://github.com/alibaba/DataX/blob/master/cassandrawriter/doc/cassandrawriter.md) |
+| 数仓数据存储 | StarRocks | √ | √ | 读 、[写](https://github.com/alibaba/DataX/blob/master/starrockswriter/doc/starrockswriter.md) |
+| | ApacheDoris | | √ | [写](https://github.com/alibaba/DataX/blob/master/doriswriter/doc/doriswriter.md) |
+| | ClickHouse | √ | √ | [读](https://github.com/alibaba/DataX/blob/master/clickhousereader/doc/clickhousereader.md) 、[写](https://github.com/alibaba/DataX/blob/master/clickhousewriter/doc/clickhousewriter.md) |
+| | Databend | | √ | [写](https://github.com/alibaba/DataX/blob/master/databendwriter/doc/databendwriter.md) |
+| | Hive | √ | √ | [读](https://github.com/alibaba/DataX/blob/master/hdfsreader/doc/hdfsreader.md) 、[写](https://github.com/alibaba/DataX/blob/master/hdfswriter/doc/hdfswriter.md) |
+| | kudu | | √ | [写](https://github.com/alibaba/DataX/blob/master/hdfswriter/doc/hdfswriter.md) |
+| | selectdb | | √ | [写](https://github.com/alibaba/DataX/blob/master/selectdbwriter/doc/selectdbwriter.md) |
+| 无结构化数据存储 | TxtFile | √ | √ | [读](https://github.com/alibaba/DataX/blob/master/txtfilereader/doc/txtfilereader.md) 、[写](https://github.com/alibaba/DataX/blob/master/txtfilewriter/doc/txtfilewriter.md) |
+| | FTP | √ | √ | [读](https://github.com/alibaba/DataX/blob/master/ftpreader/doc/ftpreader.md) 、[写](https://github.com/alibaba/DataX/blob/master/ftpwriter/doc/ftpwriter.md) |
+| | HDFS | √ | √ | [读](https://github.com/alibaba/DataX/blob/master/hdfsreader/doc/hdfsreader.md) 、[写](https://github.com/alibaba/DataX/blob/master/hdfswriter/doc/hdfswriter.md) |
+| | Elasticsearch | | √ | [写](https://github.com/alibaba/DataX/blob/master/elasticsearchwriter/doc/elasticsearchwriter.md) |
+| 时间序列数据库 | OpenTSDB | √ | | [读](https://github.com/alibaba/DataX/blob/master/opentsdbreader/doc/opentsdbreader.md) |
+| | TSDB | √ | √ | [读](https://github.com/alibaba/DataX/blob/master/tsdbreader/doc/tsdbreader.md) 、[写](https://github.com/alibaba/DataX/blob/master/tsdbwriter/doc/tsdbhttpwriter.md) |
+| | TDengine | √ | √ | [读](https://github.com/alibaba/DataX/blob/master/tdenginereader/doc/tdenginereader-CN.md) 、[写](https://github.com/alibaba/DataX/blob/master/tdenginewriter/doc/tdenginewriter-CN.md) |
# 阿里云DataWorks数据集成
@@ -99,7 +100,7 @@ DataX目前已经有了比较全面的插件体系,主流的RDBMS数据库、N
- 整库迁移:https://help.aliyun.com/document_detail/137809.html
- 批量上云:https://help.aliyun.com/document_detail/146671.html
- 更新更多能力请访问:https://help.aliyun.com/document_detail/137663.html
-
+ -
# 我要开发新的插件
@@ -109,6 +110,28 @@ DataX目前已经有了比较全面的插件体系,主流的RDBMS数据库、N
DataX 后续计划月度迭代更新,也欢迎感兴趣的同学提交 Pull requests,月度更新内容会介绍介绍如下。
+- [datax_v202309](https://github.com/alibaba/DataX/releases/tag/datax_v202309)
+ - 支持Phoenix 同步数据添加 where条件
+ - 支持华为 GuassDB读写插件
+ - 修复ClickReader 插件运行报错 Can't find bundle for base name
+ - 增加 DataX调试模块
+ - 修复 orc空文件报错问题
+ - 优化obwriter性能
+ - txtfilewriter 增加导出为insert语句功能支持
+ - HdfsReader/HdfsWriter 支持parquet读写能力
+
+- [datax_v202308](https://github.com/alibaba/DataX/releases/tag/datax_v202308)
+ - OTS 插件更新
+ - databend 插件更新
+ - Oceanbase驱动修复
+
+
+- [datax_v202306](https://github.com/alibaba/DataX/releases/tag/datax_v202306)
+ - 精简代码
+ - 新增插件(neo4jwriter、clickhousewriter)
+ - 优化插件、修复问题(oceanbase、hdfs、databend、txtfile)
+
+
- [datax_v202303](https://github.com/alibaba/DataX/releases/tag/datax_v202303)
- 精简代码
- 新增插件(adbmysqlwriter、databendwriter、selectdbwriter)
diff --git a/clickhousereader/doc/clickhousereader.md b/clickhousereader/doc/clickhousereader.md
new file mode 100644
index 00000000..bf3cd203
--- /dev/null
+++ b/clickhousereader/doc/clickhousereader.md
@@ -0,0 +1,344 @@
+
+# ClickhouseReader 插件文档
+
+
+___
+
+
+## 1 快速介绍
+
+ClickhouseReader插件实现了从Clickhouse读取数据。在底层实现上,ClickhouseReader通过JDBC连接远程Clickhouse数据库,并执行相应的sql语句将数据从Clickhouse库中SELECT出来。
+
+## 2 实现原理
+
+简而言之,ClickhouseReader通过JDBC连接器连接到远程的Clickhouse数据库,并根据用户配置的信息生成查询SELECT SQL语句并发送到远程Clickhouse数据库,并将该SQL执行返回结果使用DataX自定义的数据类型拼装为抽象的数据集,并传递给下游Writer处理。
+
+对于用户配置Table、Column、Where的信息,ClickhouseReader将其拼接为SQL语句发送到Clickhouse数据库;对于用户配置querySql信息,Clickhouse直接将其发送到Clickhouse数据库。
+
+
+## 3 功能说明
+
+### 3.1 配置样例
+
+* 配置一个从Clickhouse数据库同步抽取数据到本地的作业:
+
+```
+{
+ "job": {
+ "setting": {
+ "speed": {
+ //设置传输速度 byte/s 尽量逼近这个速度但是不高于它.
+ // channel 表示通道数量,byte表示通道速度,如果单通道速度1MB,配置byte为1048576表示一个channel
+ "byte": 1048576
+ },
+ //出错限制
+ "errorLimit": {
+ //先选择record
+ "record": 0,
+ //百分比 1表示100%
+ "percentage": 0.02
+ }
+ },
+ "content": [
+ {
+ "reader": {
+ "name": "clickhousereader",
+ "parameter": {
+ // 数据库连接用户名
+ "username": "root",
+ // 数据库连接密码
+ "password": "root",
+ "column": [
+ "id","name"
+ ],
+ "connection": [
+ {
+ "table": [
+ "table"
+ ],
+ "jdbcUrl": [
+ "jdbc:clickhouse://[HOST_NAME]:PORT/[DATABASE_NAME]"
+ ]
+ }
+ ]
+ }
+ },
+ "writer": {
+ //writer类型
+ "name": "streamwriter",
+ // 是否打印内容
+ "parameter": {
+ "print": true
+ }
+ }
+ }
+ ]
+ }
+}
+
+```
+
+* 配置一个自定义SQL的数据库同步任务到本地内容的作业:
+
+```
+{
+ "job": {
+ "setting": {
+ "speed": {
+ "channel": 5
+ }
+ },
+ "content": [
+ {
+ "reader": {
+ "name": "clickhousereader",
+ "parameter": {
+ "username": "root",
+ "password": "root",
+ "where": "",
+ "connection": [
+ {
+ "querySql": [
+ "select db_id,on_line_flag from db_info where db_id < 10"
+ ],
+ "jdbcUrl": [
+ "jdbc:clickhouse://1.1.1.1:8123/default"
+ ]
+ }
+ ]
+ }
+ },
+ "writer": {
+ "name": "streamwriter",
+ "parameter": {
+ "visible": false,
+ "encoding": "UTF-8"
+ }
+ }
+ }
+ ]
+ }
+}
+```
+
+
+### 3.2 参数说明
+
+* **jdbcUrl**
+
+ * 描述:描述的是到对端数据库的JDBC连接信息,使用JSON的数组描述,并支持一个库填写多个连接地址。之所以使用JSON数组描述连接信息,是因为阿里集团内部支持多个IP探测,如果配置了多个,ClickhouseReader可以依次探测ip的可连接性,直到选择一个合法的IP。如果全部连接失败,ClickhouseReader报错。 注意,jdbcUrl必须包含在connection配置单元中。对于阿里集团外部使用情况,JSON数组填写一个JDBC连接即可。
+
+ jdbcUrl按照Clickhouse官方规范,并可以填写连接附件控制信息。具体请参看[Clickhouse官方文档](https://clickhouse.com/docs/en/engines/table-engines/integrations/jdbc)。
+
+ * 必选:是
+
+ * 默认值:无
+
+* **username**
+
+ * 描述:数据源的用户名
+
+ * 必选:是
+
+ * 默认值:无
+
+* **password**
+
+ * 描述:数据源指定用户名的密码
+
+ * 必选:是
+
+ * 默认值:无
+
+* **table**
+
+ * 描述:所选取的需要同步的表。使用JSON的数组描述,因此支持多张表同时抽取。当配置为多张表时,用户自己需保证多张表是同一schema结构,ClickhouseReader不予检查表是否同一逻辑表。注意,table必须包含在connection配置单元中。
+
+ * 必选:是
+
+ * 默认值:无
+
+* **column**
+
+ * 描述:所配置的表中需要同步的列名集合,使用JSON的数组描述字段信息。用户使用\*代表默认使用所有列配置,例如['\*']。
+
+ 支持列裁剪,即列可以挑选部分列进行导出。
+
+ 支持列换序,即列可以不按照表schema信息进行导出。
+
+ 支持常量配置,用户需要按照JSON格式:
+ ["id", "`table`", "1", "'bazhen.csy'", "null", "to_char(a + 1)", "2.3" , "true"]
+ id为普通列名,\`table\`为包含保留在的列名,1为整形数字常量,'bazhen.csy'为字符串常量,null为空指针,to_char(a + 1)为表达式,2.3为浮点数,true为布尔值。
+
+ Column必须显示填写,不允许为空!
+
+ * 必选:是
+
+ * 默认值:无
+
+* **splitPk**
+
+ * 描述:ClickhouseReader进行数据抽取时,如果指定splitPk,表示用户希望使用splitPk代表的字段进行数据分片,DataX因此会启动并发任务进行数据同步,这样可以大大提供数据同步的效能。
+
+ 推荐splitPk用户使用表主键,因为表主键通常情况下比较均匀,因此切分出来的分片也不容易出现数据热点。
+
+ 目前splitPk仅支持整形数据切分,`不支持浮点、日期等其他类型`。如果用户指定其他非支持类型,ClickhouseReader将报错!
+
+ splitPk如果不填写,将视作用户不对单表进行切分,ClickhouseReader使用单通道同步全量数据。
+
+ * 必选:否
+
+ * 默认值:无
+
+* **where**
+
+ * 描述:筛选条件,MysqlReader根据指定的column、table、where条件拼接SQL,并根据这个SQL进行数据抽取。在实际业务场景中,往往会选择当天的数据进行同步,可以将where条件指定为gmt_create > $bizdate 。注意:不可以将where条件指定为limit 10,limit不是SQL的合法where子句。
+
+ where条件可以有效地进行业务增量同步。
+
+ * 必选:否
+
+ * 默认值:无
+
+* **querySql**
+
+ * 描述:在有些业务场景下,where这一配置项不足以描述所筛选的条件,用户可以通过该配置型来自定义筛选SQL。当用户配置了这一项之后,DataX系统就会忽略table,column这些配置型,直接使用这个配置项的内容对数据进行筛选,例如需要进行多表join后同步数据,使用select a,b from table_a join table_b on table_a.id = table_b.id
+
+ `当用户配置querySql时,ClickhouseReader直接忽略table、column、where条件的配置`。
+
+ * 必选:否
+
+ * 默认值:无
+
+* **fetchSize**
+
+ * 描述:该配置项定义了插件和数据库服务器端每次批量数据获取条数,该值决定了DataX和服务器端的网络交互次数,能够较大的提升数据抽取性能。
+
+ `注意,该值过大(>2048)可能造成DataX进程OOM。`。
+
+ * 必选:否
+
+ * 默认值:1024
+
+* **session**
+
+ * 描述:控制写入数据的时间格式,时区等的配置,如果表中有时间字段,配置该值以明确告知写入 clickhouse 的时间格式。通常配置的参数为:NLS_DATE_FORMAT,NLS_TIME_FORMAT。其配置的值为 json 格式,例如:
+```
+"session": [
+ "alter session set NLS_DATE_FORMAT='yyyy-mm-dd hh24:mi:ss'",
+ "alter session set NLS_TIMESTAMP_FORMAT='yyyy-mm-dd hh24:mi:ss'",
+ "alter session set NLS_TIMESTAMP_TZ_FORMAT='yyyy-mm-dd hh24:mi:ss'",
+ "alter session set TIME_ZONE='US/Pacific'"
+ ]
+```
+ `(注意"是 " 的转义字符串)`。
+
+ * 必选:否
+
+ * 默认值:无
+
+
+### 3.3 类型转换
+
+目前ClickhouseReader支持大部分Clickhouse类型,但也存在部分个别类型没有支持的情况,请注意检查你的类型。
+
+下面列出ClickhouseReader针对Clickhouse类型转换列表:
+
+
+| DataX 内部类型| Clickhouse 数据类型 |
+| -------- |--------------------------------------------------------------------------------------------|
+| Long | UInt8, UInt16, UInt32, UInt64, UInt128, UInt256, Int8, Int16, Int32, Int64, Int128, Int256 |
+| Double | Float32, Float64, Decimal |
+| String | String, FixedString |
+| Date | DATE, Date32, DateTime, DateTime64 |
+| Boolean | Boolean |
+| Bytes | BLOB,BFILE,RAW,LONG RAW |
+
+
+
+请注意:
+
+* `除上述罗列字段类型外,其他类型均不支持`。
+
+
+## 4 性能报告
+
+### 4.1 环境准备
+
+#### 4.1.1 数据特征
+
+为了模拟线上真实数据,我们设计两个Clickhouse数据表,分别为:
+
+#### 4.1.2 机器参数
+
+* 执行DataX的机器参数为:
+
+* Clickhouse数据库机器参数为:
+
+### 4.2 测试报告
+
+#### 4.2.1 表1测试报告
+
+
+| 并发任务数| DataX速度(Rec/s)|DataX流量|网卡流量|DataX运行负载|DB运行负载|
+|--------| --------|--------|--------|--------|--------|
+|1| DataX 统计速度(Rec/s)|DataX统计流量|网卡流量|DataX运行负载|DB运行负载|
+
+## 5 约束限制
+
+### 5.1 主备同步数据恢复问题
+
+主备同步问题指Clickhouse使用主从灾备,备库从主库不间断通过binlog恢复数据。由于主备数据同步存在一定的时间差,特别在于某些特定情况,例如网络延迟等问题,导致备库同步恢复的数据与主库有较大差别,导致从备库同步的数据不是一份当前时间的完整镜像。
+
+针对这个问题,我们提供了preSql功能,该功能待补充。
+
+### 5.2 一致性约束
+
+Clickhouse在数据存储划分中属于RDBMS系统,对外可以提供强一致性数据查询接口。例如当一次同步任务启动运行过程中,当该库存在其他数据写入方写入数据时,ClickhouseReader完全不会获取到写入更新数据,这是由于数据库本身的快照特性决定的。关于数据库快照特性,请参看[MVCC Wikipedia](https://en.wikipedia.org/wiki/Multiversion_concurrency_control)
+
+上述是在ClickhouseReader单线程模型下数据同步一致性的特性,由于ClickhouseReader可以根据用户配置信息使用了并发数据抽取,因此不能严格保证数据一致性:当ClickhouseReader根据splitPk进行数据切分后,会先后启动多个并发任务完成数据同步。由于多个并发任务相互之间不属于同一个读事务,同时多个并发任务存在时间间隔。因此这份数据并不是`完整的`、`一致的`数据快照信息。
+
+针对多线程的一致性快照需求,在技术上目前无法实现,只能从工程角度解决,工程化的方式存在取舍,我们提供几个解决思路给用户,用户可以自行选择:
+
+1. 使用单线程同步,即不再进行数据切片。缺点是速度比较慢,但是能够很好保证一致性。
+
+2. 关闭其他数据写入方,保证当前数据为静态数据,例如,锁表、关闭备库同步等等。缺点是可能影响在线业务。
+
+### 5.3 数据库编码问题
+
+
+ClickhouseReader底层使用JDBC进行数据抽取,JDBC天然适配各类编码,并在底层进行了编码转换。因此ClickhouseReader不需用户指定编码,可以自动获取编码并转码。
+
+对于Clickhouse底层写入编码和其设定的编码不一致的混乱情况,ClickhouseReader对此无法识别,对此也无法提供解决方案,对于这类情况,`导出有可能为乱码`。
+
+### 5.4 增量数据同步
+
+ClickhouseReader使用JDBC SELECT语句完成数据抽取工作,因此可以使用SELECT...WHERE...进行增量数据抽取,方式有多种:
+
+* 数据库在线应用写入数据库时,填充modify字段为更改时间戳,包括新增、更新、删除(逻辑删)。对于这类应用,ClickhouseReader只需要WHERE条件跟上一同步阶段时间戳即可。
+* 对于新增流水型数据,ClickhouseReader可以WHERE条件后跟上一阶段最大自增ID即可。
+
+对于业务上无字段区分新增、修改数据情况,ClickhouseReader也无法进行增量数据同步,只能同步全量数据。
+
+### 5.5 Sql安全性
+
+ClickhouseReader提供querySql语句交给用户自己实现SELECT抽取语句,ClickhouseReader本身对querySql不做任何安全性校验。这块交由DataX用户方自己保证。
+
+## 6 FAQ
+
+***
+
+**Q: ClickhouseReader同步报错,报错信息为XXX**
+
+ A: 网络或者权限问题,请使用Clickhouse命令行测试
+
+
+如果上述命令也报错,那可以证实是环境问题,请联系你的DBA。
+
+
+**Q: ClickhouseReader抽取速度很慢怎么办?**
+
+ A: 影响抽取时间的原因大概有如下几个:(来自专业 DBA 卫绾)
+ 1. 由于SQL的plan异常,导致的抽取时间长; 在抽取时,尽可能使用全表扫描代替索引扫描;
+ 2. 合理sql的并发度,减少抽取时间;
+ 3. 抽取sql要简单,尽量不用replace等函数,这个非常消耗cpu,会严重影响抽取速度;
diff --git a/clickhousereader/pom.xml b/clickhousereader/pom.xml
new file mode 100644
index 00000000..4b095796
--- /dev/null
+++ b/clickhousereader/pom.xml
@@ -0,0 +1,91 @@
+
+
+
+ datax-all
+ com.alibaba.datax
+ 0.0.1-SNAPSHOT
+
+
+ 4.0.0
+ clickhousereader
+ clickhousereader
+ jar
+
+
+
+ ru.yandex.clickhouse
+ clickhouse-jdbc
+ 0.2.4
+
+
+ com.alibaba.datax
+ datax-core
+ ${datax-project-version}
+
+
+ com.alibaba.datax
+ datax-common
+ ${datax-project-version}
+
+
+ org.slf4j
+ slf4j-api
+
+
+
+ ch.qos.logback
+ logback-classic
+
+
+
+ com.alibaba.datax
+ plugin-rdbms-util
+ ${datax-project-version}
+
+
+
+
+
+
+ src/main/java
+
+ **/*.properties
+
+
+
+
+
+
+ maven-compiler-plugin
+
+ ${jdk-version}
+ ${jdk-version}
+ ${project-sourceEncoding}
+
+
+
+
+ maven-assembly-plugin
+
+
+ src/main/assembly/package.xml
+
+ datax
+
+
+
+ dwzip
+ package
+
+ single
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/clickhousereader/src/main/assembly/package.xml b/clickhousereader/src/main/assembly/package.xml
new file mode 100644
index 00000000..9dc7fc13
--- /dev/null
+++ b/clickhousereader/src/main/assembly/package.xml
@@ -0,0 +1,35 @@
+
+
+
+ dir
+
+ false
+
+
+ src/main/resources
+
+ plugin.json
+ plugin_job_template.json
+
+ plugin/reader/clickhousereader
+
+
+ target/
+
+ clickhousereader-0.0.1-SNAPSHOT.jar
+
+ plugin/reader/clickhousereader
+
+
+
+
+
+ false
+ plugin/reader/clickhousereader/libs
+ runtime
+
+
+
\ No newline at end of file
diff --git a/clickhousereader/src/main/java/com/alibaba/datax/plugin/reader/clickhousereader/ClickhouseReader.java b/clickhousereader/src/main/java/com/alibaba/datax/plugin/reader/clickhousereader/ClickhouseReader.java
new file mode 100644
index 00000000..cfa6be99
--- /dev/null
+++ b/clickhousereader/src/main/java/com/alibaba/datax/plugin/reader/clickhousereader/ClickhouseReader.java
@@ -0,0 +1,85 @@
+package com.alibaba.datax.plugin.reader.clickhousereader;
+
+import java.sql.Array;
+import java.sql.ResultSet;
+import java.sql.ResultSetMetaData;
+import java.sql.SQLException;
+import java.sql.Types;
+import java.util.List;
+
+import com.alibaba.datax.common.element.Record;
+import com.alibaba.datax.common.element.StringColumn;
+import com.alibaba.datax.common.plugin.RecordSender;
+import com.alibaba.datax.common.plugin.TaskPluginCollector;
+import com.alibaba.datax.common.spi.Reader;
+import com.alibaba.datax.common.util.Configuration;
+import com.alibaba.datax.common.util.MessageSource;
+import com.alibaba.datax.plugin.rdbms.reader.CommonRdbmsReader;
+import com.alibaba.datax.plugin.rdbms.util.DataBaseType;
+import com.alibaba.fastjson2.JSON;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+public class ClickhouseReader extends Reader {
+
+ private static final DataBaseType DATABASE_TYPE = DataBaseType.ClickHouse;
+ private static final Logger LOG = LoggerFactory.getLogger(ClickhouseReader.class);
+
+ public static class Job extends Reader.Job {
+ private Configuration jobConfig = null;
+ private CommonRdbmsReader.Job commonRdbmsReaderMaster;
+
+ @Override
+ public void init() {
+ this.jobConfig = super.getPluginJobConf();
+ this.commonRdbmsReaderMaster = new CommonRdbmsReader.Job(DATABASE_TYPE);
+ this.commonRdbmsReaderMaster.init(this.jobConfig);
+ }
+
+ @Override
+ public List split(int mandatoryNumber) {
+ return this.commonRdbmsReaderMaster.split(this.jobConfig, mandatoryNumber);
+ }
+
+ @Override
+ public void post() {
+ this.commonRdbmsReaderMaster.post(this.jobConfig);
+ }
+
+ @Override
+ public void destroy() {
+ this.commonRdbmsReaderMaster.destroy(this.jobConfig);
+ }
+ }
+
+ public static class Task extends Reader.Task {
+
+ private Configuration jobConfig;
+ private CommonRdbmsReader.Task commonRdbmsReaderSlave;
+
+ @Override
+ public void init() {
+ this.jobConfig = super.getPluginJobConf();
+ this.commonRdbmsReaderSlave = new CommonRdbmsReader.Task(DATABASE_TYPE, super.getTaskGroupId(), super.getTaskId());
+ this.commonRdbmsReaderSlave.init(this.jobConfig);
+ }
+
+ @Override
+ public void startRead(RecordSender recordSender) {
+ int fetchSize = this.jobConfig.getInt(com.alibaba.datax.plugin.rdbms.reader.Constant.FETCH_SIZE, 1000);
+
+ this.commonRdbmsReaderSlave.startRead(this.jobConfig, recordSender, super.getTaskPluginCollector(), fetchSize);
+ }
+
+ @Override
+ public void post() {
+ this.commonRdbmsReaderSlave.post(this.jobConfig);
+ }
+
+ @Override
+ public void destroy() {
+ this.commonRdbmsReaderSlave.destroy(this.jobConfig);
+ }
+ }
+}
diff --git a/clickhousereader/src/main/resources/plugin.json b/clickhousereader/src/main/resources/plugin.json
new file mode 100644
index 00000000..5d608f6c
--- /dev/null
+++ b/clickhousereader/src/main/resources/plugin.json
@@ -0,0 +1,6 @@
+{
+ "name": "clickhousereader",
+ "class": "com.alibaba.datax.plugin.reader.clickhousereader.ClickhouseReader",
+ "description": "useScene: prod. mechanism: Jdbc connection using the database, execute select sql.",
+ "developer": "alibaba"
+}
\ No newline at end of file
diff --git a/clickhousereader/src/main/resources/plugin_job_template.json b/clickhousereader/src/main/resources/plugin_job_template.json
new file mode 100644
index 00000000..1814e510
--- /dev/null
+++ b/clickhousereader/src/main/resources/plugin_job_template.json
@@ -0,0 +1,16 @@
+{
+ "name": "clickhousereader",
+ "parameter": {
+ "username": "username",
+ "password": "password",
+ "column": ["col1", "col2", "col3"],
+ "connection": [
+ {
+ "jdbcUrl": "jdbc:clickhouse://:[/]",
+ "table": ["table1", "table2"]
+ }
+ ],
+ "preSql": [],
+ "postSql": []
+ }
+}
\ No newline at end of file
diff --git a/clickhousereader/src/test/resources/basic1.json b/clickhousereader/src/test/resources/basic1.json
new file mode 100755
index 00000000..c45a45e7
--- /dev/null
+++ b/clickhousereader/src/test/resources/basic1.json
@@ -0,0 +1,57 @@
+{
+ "job": {
+ "setting": {
+ "speed": {
+ "channel": 5
+ }
+ },
+ "content": [
+ {
+ "reader": {
+ "name": "clickhousereader",
+ "parameter": {
+ "username": "XXXX",
+ "password": "XXXX",
+ "column": [
+ "uint8_col",
+ "uint16_col",
+ "uint32_col",
+ "uint64_col",
+ "int8_col",
+ "int16_col",
+ "int32_col",
+ "int64_col",
+ "float32_col",
+ "float64_col",
+ "bool_col",
+ "str_col",
+ "fixedstr_col",
+ "uuid_col",
+ "date_col",
+ "datetime_col",
+ "enum_col",
+ "ary_uint8_col",
+ "ary_str_col",
+ "tuple_col",
+ "nullable_col",
+ "nested_col.nested_id",
+ "nested_col.nested_str",
+ "ipv4_col",
+ "ipv6_col",
+ "decimal_col"
+ ],
+ "connection": [
+ {
+ "table": [
+ "all_type_tbl"
+ ],
+ "jdbcUrl":["jdbc:clickhouse://XXXX:8123/default"]
+ }
+ ]
+ }
+ },
+ "writer": {}
+ }
+ ]
+ }
+}
\ No newline at end of file
diff --git a/clickhousereader/src/test/resources/basic1.sql b/clickhousereader/src/test/resources/basic1.sql
new file mode 100644
index 00000000..f937b889
--- /dev/null
+++ b/clickhousereader/src/test/resources/basic1.sql
@@ -0,0 +1,34 @@
+CREATE TABLE IF NOT EXISTS default.all_type_tbl
+(
+`uint8_col` UInt8,
+`uint16_col` UInt16,
+uint32_col UInt32,
+uint64_col UInt64,
+int8_col Int8,
+int16_col Int16,
+int32_col Int32,
+int64_col Int64,
+float32_col Float32,
+float64_col Float64,
+bool_col UInt8,
+str_col String,
+fixedstr_col FixedString(3),
+uuid_col UUID,
+date_col Date,
+datetime_col DateTime,
+enum_col Enum('hello' = 1, 'world' = 2),
+ary_uint8_col Array(UInt8),
+ary_str_col Array(String),
+tuple_col Tuple(UInt8, String),
+nullable_col Nullable(UInt8),
+nested_col Nested
+ (
+ nested_id UInt32,
+ nested_str String
+ ),
+ipv4_col IPv4,
+ipv6_col IPv6,
+decimal_col Decimal(5,3)
+)
+ENGINE = MergeTree()
+ORDER BY (uint8_col);
\ No newline at end of file
diff --git a/common/src/main/java/com/alibaba/datax/common/element/DateColumn.java b/common/src/main/java/com/alibaba/datax/common/element/DateColumn.java
index f688d163..df5e1e4a 100755
--- a/common/src/main/java/com/alibaba/datax/common/element/DateColumn.java
+++ b/common/src/main/java/com/alibaba/datax/common/element/DateColumn.java
@@ -5,6 +5,7 @@ import com.alibaba.datax.common.exception.DataXException;
import java.math.BigDecimal;
import java.math.BigInteger;
+import java.sql.Time;
import java.util.Date;
/**
@@ -12,18 +13,54 @@ import java.util.Date;
*/
public class DateColumn extends Column {
- private DateType subType = DateType.DATETIME;
+ private DateType subType = DateType.DATETIME;
- public static enum DateType {
- DATE, TIME, DATETIME
- }
+ private int nanos = 0;
- /**
- * 构建值为null的DateColumn,使用Date子类型为DATETIME
- * */
- public DateColumn() {
- this((Long)null);
- }
+ private int precision = -1;
+
+ public static enum DateType {
+ DATE, TIME, DATETIME
+ }
+
+ /**
+ * 构建值为time(java.sql.Time)的DateColumn,使用Date子类型为TIME,只有时间,没有日期
+ */
+ public DateColumn(Time time, int nanos, int jdbcPrecision) {
+ this(time);
+ if (time != null) {
+ setNanos(nanos);
+ }
+ if (jdbcPrecision == 10) {
+ setPrecision(0);
+ }
+ if (jdbcPrecision >= 12 && jdbcPrecision <= 17) {
+ setPrecision(jdbcPrecision - 11);
+ }
+ }
+
+ public long getNanos() {
+ return nanos;
+ }
+
+ public void setNanos(int nanos) {
+ this.nanos = nanos;
+ }
+
+ public int getPrecision() {
+ return precision;
+ }
+
+ public void setPrecision(int precision) {
+ this.precision = precision;
+ }
+
+ /**
+ * 构建值为null的DateColumn,使用Date子类型为DATETIME
+ */
+ public DateColumn() {
+ this((Long) null);
+ }
/**
* 构建值为stamp(Unix时间戳)的DateColumn,使用Date子类型为DATETIME
diff --git a/common/src/main/java/com/alibaba/datax/common/statistics/VMInfo.java b/common/src/main/java/com/alibaba/datax/common/statistics/VMInfo.java
index cab42a4b..423c794e 100644
--- a/common/src/main/java/com/alibaba/datax/common/statistics/VMInfo.java
+++ b/common/src/main/java/com/alibaba/datax/common/statistics/VMInfo.java
@@ -77,8 +77,8 @@ public class VMInfo {
garbageCollectorMXBeanList = java.lang.management.ManagementFactory.getGarbageCollectorMXBeans();
memoryPoolMXBeanList = java.lang.management.ManagementFactory.getMemoryPoolMXBeans();
- osInfo = runtimeMXBean.getVmVendor() + " " + runtimeMXBean.getSpecVersion() + " " + runtimeMXBean.getVmVersion();
- jvmInfo = osMXBean.getName() + " " + osMXBean.getArch() + " " + osMXBean.getVersion();
+ jvmInfo = runtimeMXBean.getVmVendor() + " " + runtimeMXBean.getSpecVersion() + " " + runtimeMXBean.getVmVersion();
+ osInfo = osMXBean.getName() + " " + osMXBean.getArch() + " " + osMXBean.getVersion();
totalProcessorCount = osMXBean.getAvailableProcessors();
//构建startPhyOSStatus
diff --git a/common/src/main/java/com/alibaba/datax/common/util/LimitLogger.java b/common/src/main/java/com/alibaba/datax/common/util/LimitLogger.java
new file mode 100644
index 00000000..a307e0fb
--- /dev/null
+++ b/common/src/main/java/com/alibaba/datax/common/util/LimitLogger.java
@@ -0,0 +1,34 @@
+package com.alibaba.datax.common.util;
+
+import org.apache.commons.lang3.StringUtils;
+
+import java.util.HashMap;
+import java.util.Map;
+
+/**
+ * @author jitongchen
+ * @date 2023/9/7 9:47 AM
+ */
+public class LimitLogger {
+
+ private static Map lastPrintTime = new HashMap<>();
+
+ public static void limit(String name, long limit, LoggerFunction function) {
+ if (StringUtils.isBlank(name)) {
+ name = "__all__";
+ }
+ if (limit <= 0) {
+ function.apply();
+ } else {
+ if (!lastPrintTime.containsKey(name)) {
+ lastPrintTime.put(name, System.currentTimeMillis());
+ function.apply();
+ } else {
+ if (System.currentTimeMillis() > lastPrintTime.get(name) + limit) {
+ lastPrintTime.put(name, System.currentTimeMillis());
+ function.apply();
+ }
+ }
+ }
+ }
+}
diff --git a/common/src/main/java/com/alibaba/datax/common/util/LoggerFunction.java b/common/src/main/java/com/alibaba/datax/common/util/LoggerFunction.java
new file mode 100644
index 00000000..ef24504f
--- /dev/null
+++ b/common/src/main/java/com/alibaba/datax/common/util/LoggerFunction.java
@@ -0,0 +1,10 @@
+package com.alibaba.datax.common.util;
+
+/**
+ * @author molin.lxd
+ * @date 2021-05-09
+ */
+public interface LoggerFunction {
+
+ void apply();
+}
diff --git a/core/src/main/java/com/alibaba/datax/core/transport/channel/memory/MemoryChannel.java b/core/src/main/java/com/alibaba/datax/core/transport/channel/memory/MemoryChannel.java
index e49c7878..5bce085f 100755
--- a/core/src/main/java/com/alibaba/datax/core/transport/channel/memory/MemoryChannel.java
+++ b/core/src/main/java/com/alibaba/datax/core/transport/channel/memory/MemoryChannel.java
@@ -29,7 +29,7 @@ public class MemoryChannel extends Channel {
private ReentrantLock lock;
- private Condition notInsufficient, notEmpty;
+ private Condition notSufficient, notEmpty;
public MemoryChannel(final Configuration configuration) {
super(configuration);
@@ -37,7 +37,7 @@ public class MemoryChannel extends Channel {
this.bufferSize = configuration.getInt(CoreConstant.DATAX_CORE_TRANSPORT_EXCHANGER_BUFFERSIZE);
lock = new ReentrantLock();
- notInsufficient = lock.newCondition();
+ notSufficient = lock.newCondition();
notEmpty = lock.newCondition();
}
@@ -75,7 +75,7 @@ public class MemoryChannel extends Channel {
lock.lockInterruptibly();
int bytes = getRecordBytes(rs);
while (memoryBytes.get() + bytes > this.byteCapacity || rs.size() > this.queue.remainingCapacity()) {
- notInsufficient.await(200L, TimeUnit.MILLISECONDS);
+ notSufficient.await(200L, TimeUnit.MILLISECONDS);
}
this.queue.addAll(rs);
waitWriterTime += System.nanoTime() - startTime;
@@ -116,7 +116,7 @@ public class MemoryChannel extends Channel {
waitReaderTime += System.nanoTime() - startTime;
int bytes = getRecordBytes(rs);
memoryBytes.addAndGet(-bytes);
- notInsufficient.signalAll();
+ notSufficient.signalAll();
} catch (InterruptedException e) {
throw DataXException.asDataXException(
FrameworkErrorCode.RUNTIME_ERROR, e);
diff --git a/core/src/main/java/com/alibaba/datax/core/util/ConfigParser.java b/core/src/main/java/com/alibaba/datax/core/util/ConfigParser.java
index 20039864..24f43d55 100755
--- a/core/src/main/java/com/alibaba/datax/core/util/ConfigParser.java
+++ b/core/src/main/java/com/alibaba/datax/core/util/ConfigParser.java
@@ -168,6 +168,7 @@ public final class ConfigParser {
boolean isDefaultPath = StringUtils.isBlank(pluginPath);
if (isDefaultPath) {
configuration.set("path", path);
+ configuration.set("loadType","jarLoader");
}
Configuration result = Configuration.newDefault();
diff --git a/core/src/main/java/com/alibaba/datax/core/util/container/JarLoader.java b/core/src/main/java/com/alibaba/datax/core/util/container/JarLoader.java
index 9fc113dc..ddf22bae 100755
--- a/core/src/main/java/com/alibaba/datax/core/util/container/JarLoader.java
+++ b/core/src/main/java/com/alibaba/datax/core/util/container/JarLoader.java
@@ -15,7 +15,7 @@ import java.util.List;
/**
* 提供Jar隔离的加载机制,会把传入的路径、及其子路径、以及路径中的jar文件加入到class path。
*/
-public class JarLoader extends URLClassLoader {
+public class JarLoader extends URLClassLoader{
public JarLoader(String[] paths) {
this(paths, JarLoader.class.getClassLoader());
}
diff --git a/core/src/main/java/com/alibaba/datax/core/util/container/LoadUtil.java b/core/src/main/java/com/alibaba/datax/core/util/container/LoadUtil.java
index 30e926c3..9a6a8302 100755
--- a/core/src/main/java/com/alibaba/datax/core/util/container/LoadUtil.java
+++ b/core/src/main/java/com/alibaba/datax/core/util/container/LoadUtil.java
@@ -49,7 +49,7 @@ public class LoadUtil {
/**
* jarLoader的缓冲
*/
- private static Map jarLoaderCenter = new HashMap();
+ private static Map jarLoaderCenter = new HashMap();
/**
* 设置pluginConfigs,方便后面插件来获取
diff --git a/databendwriter/doc/databendwriter-CN.md b/databendwriter/doc/databendwriter-CN.md
index d6a8f1f3..5b26ed7e 100644
--- a/databendwriter/doc/databendwriter-CN.md
+++ b/databendwriter/doc/databendwriter-CN.md
@@ -79,6 +79,8 @@ create table if not exsits datax.sample1(a string, b int64, c date, d timestamp,
"writer": {
"name": "databendwriter",
"parameter": {
+ "writeMode": "replace",
+ "onConflictColumn": ["id"],
"username": "databend",
"password": "databend",
"column": ["a", "b", "c", "d", "e", "f", "g"],
@@ -149,6 +151,16 @@ create table if not exsits datax.sample1(a string, b int64, c date, d timestamp,
* 必选: 否
* 默认值: 无
* 示例: ["select count(*) from datax.sample1"]
+* writeMode
+ * 描述:写入模式,支持 insert 和 replace 两种模式,默认为 insert。若为 replace,务必填写 onConflictColumn 参数
+ * 必选:否
+ * 默认值:insert
+ * 示例:"replace"
+* onConflictColumn
+ * 描述:on conflict 字段,指定 writeMode 为 replace 后,需要此参数
+ * 必选:否
+ * 默认值:无
+ * 示例:["id","user"]
### 3.3 类型转化
DataX中的数据类型可以转换为databend中的相应数据类型。下表显示了两种类型之间的对应关系。
diff --git a/databendwriter/doc/databendwriter.md b/databendwriter/doc/databendwriter.md
index 0b57bf13..c92d6387 100644
--- a/databendwriter/doc/databendwriter.md
+++ b/databendwriter/doc/databendwriter.md
@@ -142,6 +142,16 @@ create table if not exsits datax.sample1(a string, b int64, c date, d timestamp,
* Description: A list of SQL statements that will be executed after the write operation.
* Required: no
* Default: none
+* writeMode
+ * Description:The write mode, support `insert` and `replace` two mode.
+ * Required:no
+ * Default:insert
+ * Example:"replace"
+* onConflictColumn
+ * Description:On conflict fields list.
+ * Required:no
+ * Default:none
+ * Example:["id","user"]
### 3.3 Type Convert
Data types in datax can be converted to the corresponding data types in databend. The following table shows the correspondence between the two types.
diff --git a/databendwriter/pom.xml b/databendwriter/pom.xml
index 9ddc735c..b99ca5d8 100644
--- a/databendwriter/pom.xml
+++ b/databendwriter/pom.xml
@@ -17,7 +17,7 @@
com.databend
databend-jdbc
- 0.0.7
+ 0.1.0
com.alibaba.datax
diff --git a/databendwriter/src/main/java/com/alibaba/datax/plugin/writer/databendwriter/DatabendWriter.java b/databendwriter/src/main/java/com/alibaba/datax/plugin/writer/databendwriter/DatabendWriter.java
index a4222f08..ddb8fc9a 100644
--- a/databendwriter/src/main/java/com/alibaba/datax/plugin/writer/databendwriter/DatabendWriter.java
+++ b/databendwriter/src/main/java/com/alibaba/datax/plugin/writer/databendwriter/DatabendWriter.java
@@ -17,20 +17,17 @@ import java.sql.*;
import java.util.List;
import java.util.regex.Pattern;
-public class DatabendWriter extends Writer
-{
+public class DatabendWriter extends Writer {
private static final DataBaseType DATABASE_TYPE = DataBaseType.Databend;
public static class Job
- extends Writer.Job
- {
+ extends Writer.Job {
private static final Logger LOG = LoggerFactory.getLogger(Job.class);
private Configuration originalConfig;
private CommonRdbmsWriter.Job commonRdbmsWriterMaster;
@Override
- public void init()
- {
+ public void init() throws DataXException {
this.originalConfig = super.getPluginJobConf();
this.commonRdbmsWriterMaster = new CommonRdbmsWriter.Job(DATABASE_TYPE);
this.commonRdbmsWriterMaster.init(this.originalConfig);
@@ -39,8 +36,7 @@ public class DatabendWriter extends Writer
}
@Override
- public void preCheck()
- {
+ public void preCheck() {
this.init();
this.commonRdbmsWriterMaster.writerPreCheck(this.originalConfig, DATABASE_TYPE);
}
@@ -67,8 +63,7 @@ public class DatabendWriter extends Writer
}
- public static class Task extends Writer.Task
- {
+ public static class Task extends Writer.Task {
private static final Logger LOG = LoggerFactory.getLogger(Task.class);
private Configuration writerSliceConfig;
@@ -76,11 +71,10 @@ public class DatabendWriter extends Writer
private CommonRdbmsWriter.Task commonRdbmsWriterSlave;
@Override
- public void init()
- {
+ public void init() {
this.writerSliceConfig = super.getPluginJobConf();
- this.commonRdbmsWriterSlave = new CommonRdbmsWriter.Task(DataBaseType.Databend){
+ this.commonRdbmsWriterSlave = new CommonRdbmsWriter.Task(DataBaseType.Databend) {
@Override
protected PreparedStatement fillPreparedStatementColumnType(PreparedStatement preparedStatement, int columnIndex, int columnSqltype, String typeName, Column column) throws SQLException {
try {
@@ -177,8 +171,8 @@ public class DatabendWriter extends Writer
case Types.BOOLEAN:
- // warn: bit(1) -> Types.BIT 可使用setBoolean
- // warn: bit(>1) -> Types.VARBINARY 可使用setBytes
+ // warn: bit(1) -> Types.BIT 可使用setBoolean
+ // warn: bit(>1) -> Types.VARBINARY 可使用setBytes
case Types.BIT:
if (this.dataBaseType == DataBaseType.MySql) {
Boolean asBoolean = column.asBoolean();
@@ -224,8 +218,7 @@ public class DatabendWriter extends Writer
}
@Override
- public void destroy()
- {
+ public void destroy() {
this.commonRdbmsWriterSlave.destroy(this.writerSliceConfig);
}
@@ -238,9 +231,9 @@ public class DatabendWriter extends Writer
public void post() {
this.commonRdbmsWriterSlave.post(this.writerSliceConfig);
}
+
@Override
- public void startWrite(RecordReceiver lineReceiver)
- {
+ public void startWrite(RecordReceiver lineReceiver) {
this.commonRdbmsWriterSlave.startWrite(lineReceiver, this.writerSliceConfig, this.getTaskPluginCollector());
}
diff --git a/databendwriter/src/main/java/com/alibaba/datax/plugin/writer/databendwriter/DatabendWriterErrorCode.java b/databendwriter/src/main/java/com/alibaba/datax/plugin/writer/databendwriter/DatabendWriterErrorCode.java
new file mode 100644
index 00000000..21cbf428
--- /dev/null
+++ b/databendwriter/src/main/java/com/alibaba/datax/plugin/writer/databendwriter/DatabendWriterErrorCode.java
@@ -0,0 +1,33 @@
+package com.alibaba.datax.plugin.writer.databendwriter;
+
+import com.alibaba.datax.common.spi.ErrorCode;
+
+
+public enum DatabendWriterErrorCode implements ErrorCode {
+ CONF_ERROR("DatabendWriter-00", "配置错误."),
+ WRITE_DATA_ERROR("DatabendWriter-01", "写入数据时失败."),
+ ;
+
+ private final String code;
+ private final String description;
+
+ private DatabendWriterErrorCode(String code, String description) {
+ this.code = code;
+ this.description = description;
+ }
+
+ @Override
+ public String getCode() {
+ return this.code;
+ }
+
+ @Override
+ public String getDescription() {
+ return this.description;
+ }
+
+ @Override
+ public String toString() {
+ return String.format("Code:[%s], Description:[%s].", this.code, this.description);
+ }
+}
\ No newline at end of file
diff --git a/databendwriter/src/main/java/com/alibaba/datax/plugin/writer/databendwriter/util/DatabendWriterUtil.java b/databendwriter/src/main/java/com/alibaba/datax/plugin/writer/databendwriter/util/DatabendWriterUtil.java
index a862e920..516a75eb 100644
--- a/databendwriter/src/main/java/com/alibaba/datax/plugin/writer/databendwriter/util/DatabendWriterUtil.java
+++ b/databendwriter/src/main/java/com/alibaba/datax/plugin/writer/databendwriter/util/DatabendWriterUtil.java
@@ -1,40 +1,72 @@
package com.alibaba.datax.plugin.writer.databendwriter.util;
+
+import com.alibaba.datax.common.exception.DataXException;
import com.alibaba.datax.common.util.Configuration;
import com.alibaba.datax.plugin.rdbms.writer.Constant;
import com.alibaba.datax.plugin.rdbms.writer.Key;
+import com.alibaba.datax.plugin.writer.databendwriter.DatabendWriterErrorCode;
+import org.apache.commons.lang3.StringUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
+import javax.xml.crypto.Data;
import java.util.List;
import java.util.StringJoiner;
-public final class DatabendWriterUtil
-{
+public final class DatabendWriterUtil {
private static final Logger LOG = LoggerFactory.getLogger(DatabendWriterUtil.class);
- private DatabendWriterUtil() {}
- public static void dealWriteMode(Configuration originalConfig)
- {
+ private DatabendWriterUtil() {
+ }
+
+ public static void dealWriteMode(Configuration originalConfig) throws DataXException {
List columns = originalConfig.getList(Key.COLUMN, String.class);
+ List onConflictColumns = originalConfig.getList(Key.ONCONFLICT_COLUMN, String.class);
+ StringBuilder writeDataSqlTemplate = new StringBuilder();
String jdbcUrl = originalConfig.getString(String.format("%s[0].%s",
Constant.CONN_MARK, Key.JDBC_URL, String.class));
String writeMode = originalConfig.getString(Key.WRITE_MODE, "INSERT");
+ LOG.info("write mode is {}", writeMode);
+ if (writeMode.toLowerCase().contains("replace")) {
+ if (onConflictColumns == null || onConflictColumns.size() == 0) {
+ throw DataXException
+ .asDataXException(
+ DatabendWriterErrorCode.CONF_ERROR,
+ String.format(
+ "Replace mode must has onConflictColumn config."
+ ));
+ }
- StringBuilder writeDataSqlTemplate = new StringBuilder();
- writeDataSqlTemplate.append("INSERT INTO %s");
- StringJoiner columnString = new StringJoiner(",");
+ // for databend if you want to use replace mode, the writeMode should be: "writeMode": "replace"
+ writeDataSqlTemplate.append("REPLACE INTO %s (")
+ .append(StringUtils.join(columns, ",")).append(") ").append(onConFlictDoString(onConflictColumns))
+ .append(" VALUES");
- for (String column : columns) {
- columnString.add(column);
+ LOG.info("Replace data [\n{}\n], which jdbcUrl like:[{}]", writeDataSqlTemplate, jdbcUrl);
+ originalConfig.set(Constant.INSERT_OR_REPLACE_TEMPLATE_MARK, writeDataSqlTemplate);
+ } else {
+ writeDataSqlTemplate.append("INSERT INTO %s");
+ StringJoiner columnString = new StringJoiner(",");
+
+ for (String column : columns) {
+ columnString.add(column);
+ }
+ writeDataSqlTemplate.append(String.format("(%s)", columnString));
+ writeDataSqlTemplate.append(" VALUES");
+
+ LOG.info("Insert data [\n{}\n], which jdbcUrl like:[{}]", writeDataSqlTemplate, jdbcUrl);
+
+ originalConfig.set(Constant.INSERT_OR_REPLACE_TEMPLATE_MARK, writeDataSqlTemplate);
}
- writeDataSqlTemplate.append(String.format("(%s)", columnString));
- writeDataSqlTemplate.append(" VALUES");
- LOG.info("Write data [\n{}\n], which jdbcUrl like:[{}]", writeDataSqlTemplate, jdbcUrl);
-
- originalConfig.set(Constant.INSERT_OR_REPLACE_TEMPLATE_MARK, writeDataSqlTemplate);
}
-}
\ No newline at end of file
+
+ public static String onConFlictDoString(List conflictColumns) {
+ return " ON " +
+ "(" +
+ StringUtils.join(conflictColumns, ",") + ") ";
+ }
+}
diff --git a/datax-example/datax-example-core/pom.xml b/datax-example/datax-example-core/pom.xml
new file mode 100644
index 00000000..6a2e9e8e
--- /dev/null
+++ b/datax-example/datax-example-core/pom.xml
@@ -0,0 +1,20 @@
+
+
+ 4.0.0
+
+ com.alibaba.datax
+ datax-example
+ 0.0.1-SNAPSHOT
+
+
+ datax-example-core
+
+
+ 8
+ 8
+ UTF-8
+
+
+
\ No newline at end of file
diff --git a/datax-example/datax-example-core/src/main/java/com/alibaba/datax/example/ExampleContainer.java b/datax-example/datax-example-core/src/main/java/com/alibaba/datax/example/ExampleContainer.java
new file mode 100644
index 00000000..a4229fd1
--- /dev/null
+++ b/datax-example/datax-example-core/src/main/java/com/alibaba/datax/example/ExampleContainer.java
@@ -0,0 +1,26 @@
+package com.alibaba.datax.example;
+
+import com.alibaba.datax.common.util.Configuration;
+import com.alibaba.datax.core.Engine;
+import com.alibaba.datax.example.util.ExampleConfigParser;
+
+/**
+ * {@code Date} 2023/8/6 11:22
+ *
+ * @author fuyouj
+ */
+
+public class ExampleContainer {
+ /**
+ * example对外暴露的启动入口
+ * 使用前最好看下 datax-example/doc/README.MD
+ * @param jobPath 任务json绝对路径
+ */
+ public static void start(String jobPath) {
+
+ Configuration configuration = ExampleConfigParser.parse(jobPath);
+
+ Engine engine = new Engine();
+ engine.start(configuration);
+ }
+}
diff --git a/datax-example/datax-example-core/src/main/java/com/alibaba/datax/example/Main.java b/datax-example/datax-example-core/src/main/java/com/alibaba/datax/example/Main.java
new file mode 100644
index 00000000..56bf9f0b
--- /dev/null
+++ b/datax-example/datax-example-core/src/main/java/com/alibaba/datax/example/Main.java
@@ -0,0 +1,23 @@
+package com.alibaba.datax.example;
+
+
+import com.alibaba.datax.example.util.PathUtil;
+
+/**
+ * @author fuyouj
+ */
+public class Main {
+
+ /**
+ * 1.在example模块pom文件添加你依赖的的调试插件,
+ * 你可以直接打开本模块的pom文件,参考是如何引入streamreader,streamwriter
+ * 2. 在此处指定你的job文件
+ */
+ public static void main(String[] args) {
+
+ String classPathJobPath = "/job/stream2stream.json";
+ String absJobPath = PathUtil.getAbsolutePathFromClassPath(classPathJobPath);
+ ExampleContainer.start(absJobPath);
+ }
+
+}
diff --git a/datax-example/datax-example-core/src/main/java/com/alibaba/datax/example/util/ExampleConfigParser.java b/datax-example/datax-example-core/src/main/java/com/alibaba/datax/example/util/ExampleConfigParser.java
new file mode 100644
index 00000000..6bbb4a23
--- /dev/null
+++ b/datax-example/datax-example-core/src/main/java/com/alibaba/datax/example/util/ExampleConfigParser.java
@@ -0,0 +1,154 @@
+package com.alibaba.datax.example.util;
+
+import com.alibaba.datax.common.exception.DataXException;
+import com.alibaba.datax.common.util.Configuration;
+import com.alibaba.datax.core.util.ConfigParser;
+import com.alibaba.datax.core.util.FrameworkErrorCode;
+import com.alibaba.datax.core.util.container.CoreConstant;
+
+import java.io.File;
+import java.io.IOException;
+import java.net.URL;
+import java.nio.file.Paths;
+import java.util.*;
+
+/**
+ * @author fuyouj
+ */
+public class ExampleConfigParser {
+ private static final String CORE_CONF = "/example/conf/core.json";
+
+ private static final String PLUGIN_DESC_FILE = "plugin.json";
+
+ /**
+ * 指定Job配置路径,ConfigParser会解析Job、Plugin、Core全部信息,并以Configuration返回
+ * 不同于Core的ConfigParser,这里的core,plugin 不依赖于编译后的datax.home,而是扫描程序编译后的target目录
+ */
+ public static Configuration parse(final String jobPath) {
+
+ Configuration configuration = ConfigParser.parseJobConfig(jobPath);
+ configuration.merge(coreConfig(),
+ false);
+
+ Map pluginTypeMap = new HashMap<>();
+ String readerName = configuration.getString(CoreConstant.DATAX_JOB_CONTENT_READER_NAME);
+ String writerName = configuration.getString(CoreConstant.DATAX_JOB_CONTENT_WRITER_NAME);
+ pluginTypeMap.put(readerName, "reader");
+ pluginTypeMap.put(writerName, "writer");
+ Configuration pluginsDescConfig = parsePluginsConfig(pluginTypeMap);
+ configuration.merge(pluginsDescConfig, false);
+ return configuration;
+ }
+
+ private static Configuration parsePluginsConfig(Map pluginTypeMap) {
+
+ Configuration configuration = Configuration.newDefault();
+
+ //最初打算通过user.dir获取工作目录来扫描插件,
+ //但是user.dir在不同有一些不确定性,所以废弃了这个选择
+
+ for (File basePackage : runtimeBasePackages()) {
+ if (pluginTypeMap.isEmpty()) {
+ break;
+ }
+ scanPluginByPackage(basePackage, configuration, basePackage.listFiles(), pluginTypeMap);
+ }
+ if (!pluginTypeMap.isEmpty()) {
+ String failedPlugin = pluginTypeMap.keySet().toString();
+ String message = "\nplugin %s load failed :ry to analyze the reasons from the following aspects.。\n" +
+ "1: Check if the name of the plugin is spelled correctly, and verify whether DataX supports this plugin\n" +
+ "2:Verify if the tag has been added under section in the pom file of the relevant plugin.\n" +
+ " src/main/resources\n" +
+ " \n" +
+ " **/*.*\n" +
+ " \n" +
+ " true\n" +
+ " \n [Refer to the streamreader pom file] \n" +
+ "3: Check that the datax-yourPlugin-example module imported your test plugin";
+ message = String.format(message, failedPlugin);
+ throw DataXException.asDataXException(FrameworkErrorCode.PLUGIN_INIT_ERROR, message);
+ }
+ return configuration;
+ }
+
+ /**
+ * 通过classLoader获取程序编译的输出目录
+ *
+ * @return File[/datax-example/target/classes,xxReader/target/classes,xxWriter/target/classes]
+ */
+ private static File[] runtimeBasePackages() {
+ List basePackages = new ArrayList<>();
+ ClassLoader classLoader = Thread.currentThread().getContextClassLoader();
+ Enumeration resources = null;
+ try {
+ resources = classLoader.getResources("");
+ } catch (IOException e) {
+ throw DataXException.asDataXException(e.getMessage());
+ }
+
+ while (resources.hasMoreElements()) {
+ URL resource = resources.nextElement();
+ File file = new File(resource.getFile());
+ if (file.isDirectory()) {
+ basePackages.add(file);
+ }
+ }
+
+ return basePackages.toArray(new File[0]);
+ }
+
+ /**
+ * @param packageFile 编译出来的target/classes根目录 便于找到插件时设置插件的URL目录,设置根目录是最保险的方式
+ * @param configuration pluginConfig
+ * @param files 待扫描文件
+ * @param needPluginTypeMap 需要的插件
+ */
+ private static void scanPluginByPackage(File packageFile,
+ Configuration configuration,
+ File[] files,
+ Map needPluginTypeMap) {
+ if (files == null) {
+ return;
+ }
+ for (File file : files) {
+ if (file.isFile() && PLUGIN_DESC_FILE.equals(file.getName())) {
+ Configuration pluginDesc = Configuration.from(file);
+ String descPluginName = pluginDesc.getString("name", "");
+
+ if (needPluginTypeMap.containsKey(descPluginName)) {
+
+ String type = needPluginTypeMap.get(descPluginName);
+ configuration.merge(parseOnePlugin(packageFile.getAbsolutePath(), type, descPluginName, pluginDesc), false);
+ needPluginTypeMap.remove(descPluginName);
+
+ }
+ } else {
+ scanPluginByPackage(packageFile, configuration, file.listFiles(), needPluginTypeMap);
+ }
+ }
+ }
+
+
+ private static Configuration parseOnePlugin(String packagePath,
+ String pluginType,
+ String pluginName,
+ Configuration pluginDesc) {
+ //设置path 兼容jarLoader的加载方式URLClassLoader
+ pluginDesc.set("path", packagePath);
+ Configuration pluginConfInJob = Configuration.newDefault();
+ pluginConfInJob.set(
+ String.format("plugin.%s.%s", pluginType, pluginName),
+ pluginDesc.getInternal());
+ return pluginConfInJob;
+ }
+
+ private static Configuration coreConfig() {
+ try {
+ URL resource = ExampleConfigParser.class.getResource(CORE_CONF);
+ return Configuration.from(Paths.get(resource.toURI()).toFile());
+ } catch (Exception ignore) {
+ throw DataXException.asDataXException("Failed to load the configuration file core.json. " +
+ "Please check whether /example/conf/core.json exists!");
+ }
+ }
+}
diff --git a/datax-example/datax-example-core/src/main/java/com/alibaba/datax/example/util/PathUtil.java b/datax-example/datax-example-core/src/main/java/com/alibaba/datax/example/util/PathUtil.java
new file mode 100644
index 00000000..e197fa73
--- /dev/null
+++ b/datax-example/datax-example-core/src/main/java/com/alibaba/datax/example/util/PathUtil.java
@@ -0,0 +1,26 @@
+package com.alibaba.datax.example.util;
+
+
+import com.alibaba.datax.common.exception.DataXException;
+
+import java.net.URI;
+import java.net.URISyntaxException;
+import java.net.URL;
+import java.nio.file.Paths;
+
+/**
+ * @author fuyouj
+ */
+public class PathUtil {
+ public static String getAbsolutePathFromClassPath(String path) {
+ URL resource = PathUtil.class.getResource(path);
+ try {
+ assert resource != null;
+ URI uri = resource.toURI();
+ return Paths.get(uri).toString();
+ } catch (NullPointerException | URISyntaxException e) {
+ throw DataXException.asDataXException("path error,please check whether the path is correct");
+ }
+
+ }
+}
diff --git a/datax-example/datax-example-core/src/main/resources/example/conf/core.json b/datax-example/datax-example-core/src/main/resources/example/conf/core.json
new file mode 100755
index 00000000..33281ac0
--- /dev/null
+++ b/datax-example/datax-example-core/src/main/resources/example/conf/core.json
@@ -0,0 +1,60 @@
+{
+ "entry": {
+ "jvm": "-Xms1G -Xmx1G",
+ "environment": {}
+ },
+ "common": {
+ "column": {
+ "datetimeFormat": "yyyy-MM-dd HH:mm:ss",
+ "timeFormat": "HH:mm:ss",
+ "dateFormat": "yyyy-MM-dd",
+ "extraFormats":["yyyyMMdd"],
+ "timeZone": "GMT+8",
+ "encoding": "utf-8"
+ }
+ },
+ "core": {
+ "dataXServer": {
+ "address": "http://localhost:7001/api",
+ "timeout": 10000,
+ "reportDataxLog": false,
+ "reportPerfLog": false
+ },
+ "transport": {
+ "channel": {
+ "class": "com.alibaba.datax.core.transport.channel.memory.MemoryChannel",
+ "speed": {
+ "byte": -1,
+ "record": -1
+ },
+ "flowControlInterval": 20,
+ "capacity": 512,
+ "byteCapacity": 67108864
+ },
+ "exchanger": {
+ "class": "com.alibaba.datax.core.plugin.BufferedRecordExchanger",
+ "bufferSize": 32
+ }
+ },
+ "container": {
+ "job": {
+ "reportInterval": 10000
+ },
+ "taskGroup": {
+ "channel": 5
+ },
+ "trace": {
+ "enable": "false"
+ }
+
+ },
+ "statistics": {
+ "collector": {
+ "plugin": {
+ "taskClass": "com.alibaba.datax.core.statistics.plugin.task.StdoutPluginCollector",
+ "maxDirtyNumber": 10
+ }
+ }
+ }
+ }
+}
diff --git a/datax-example/datax-example-core/src/test/java/com/alibaba/datax/example/util/PathUtilTest.java b/datax-example/datax-example-core/src/test/java/com/alibaba/datax/example/util/PathUtilTest.java
new file mode 100644
index 00000000..8985b54c
--- /dev/null
+++ b/datax-example/datax-example-core/src/test/java/com/alibaba/datax/example/util/PathUtilTest.java
@@ -0,0 +1,19 @@
+package com.alibaba.datax.example.util;
+
+import org.junit.Assert;
+import org.junit.Test;
+
+/**
+ * {@code Author} FuYouJ
+ * {@code Date} 2023/8/19 21:38
+ */
+
+public class PathUtilTest {
+
+ @Test
+ public void testParseClassPathFile() {
+ String path = "/pathTest.json";
+ String absolutePathFromClassPath = PathUtil.getAbsolutePathFromClassPath(path);
+ Assert.assertNotNull(absolutePathFromClassPath);
+ }
+}
diff --git a/datax-example/datax-example-core/src/test/resources/pathTest.json b/datax-example/datax-example-core/src/test/resources/pathTest.json
new file mode 100644
index 00000000..9e26dfee
--- /dev/null
+++ b/datax-example/datax-example-core/src/test/resources/pathTest.json
@@ -0,0 +1 @@
+{}
\ No newline at end of file
diff --git a/datax-example/datax-example-neo4j/pom.xml b/datax-example/datax-example-neo4j/pom.xml
new file mode 100644
index 00000000..303b14a8
--- /dev/null
+++ b/datax-example/datax-example-neo4j/pom.xml
@@ -0,0 +1,43 @@
+
+
+ 4.0.0
+
+ com.alibaba.datax
+ datax-example
+ 0.0.1-SNAPSHOT
+
+
+ datax-example-neo4j
+
+
+ 8
+ 8
+ UTF-8
+ 1.17.6
+ 4.4.9
+
+
+
+ com.alibaba.datax
+ datax-example-core
+ 0.0.1-SNAPSHOT
+
+
+ org.testcontainers
+ testcontainers
+ ${test.container.version}
+
+
+ com.alibaba.datax
+ neo4jwriter
+ 0.0.1-SNAPSHOT
+
+
+ com.alibaba.datax
+ datax-example-streamreader
+ 0.0.1-SNAPSHOT
+
+
+
\ No newline at end of file
diff --git a/datax-example/datax-example-neo4j/src/test/java/com/alibaba/datax/example/neo4j/StreamReader2Neo4jWriterTest.java b/datax-example/datax-example-neo4j/src/test/java/com/alibaba/datax/example/neo4j/StreamReader2Neo4jWriterTest.java
new file mode 100644
index 00000000..9cf01253
--- /dev/null
+++ b/datax-example/datax-example-neo4j/src/test/java/com/alibaba/datax/example/neo4j/StreamReader2Neo4jWriterTest.java
@@ -0,0 +1,138 @@
+package com.alibaba.datax.example.neo4j;
+
+import com.alibaba.datax.example.ExampleContainer;
+import com.alibaba.datax.example.util.PathUtil;
+import org.junit.After;
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.Test;
+import org.neo4j.driver.*;
+import org.neo4j.driver.types.Node;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import org.testcontainers.containers.GenericContainer;
+import org.testcontainers.containers.Network;
+import org.testcontainers.containers.output.Slf4jLogConsumer;
+import org.testcontainers.lifecycle.Startables;
+import org.testcontainers.shaded.org.awaitility.Awaitility;
+import org.testcontainers.utility.DockerImageName;
+import org.testcontainers.utility.DockerLoggerFactory;
+
+import java.net.URI;
+import java.util.Arrays;
+import java.util.concurrent.TimeUnit;
+import java.util.stream.Stream;
+
+/**
+ * {@code Author} FuYouJ
+ * {@code Date} 2023/8/19 21:48
+ */
+
+public class StreamReader2Neo4jWriterTest {
+ private static final Logger LOGGER = LoggerFactory.getLogger(StreamReader2Neo4jWriterTest.class);
+ private static final String CONTAINER_IMAGE = "neo4j:5.9.0";
+
+ private static final String CONTAINER_HOST = "neo4j-host";
+ private static final int HTTP_PORT = 7474;
+ private static final int BOLT_PORT = 7687;
+ private static final String CONTAINER_NEO4J_USERNAME = "neo4j";
+ private static final String CONTAINER_NEO4J_PASSWORD = "Test@12343";
+ private static final URI CONTAINER_URI = URI.create("neo4j://localhost:" + BOLT_PORT);
+
+ protected static final Network NETWORK = Network.newNetwork();
+
+ private GenericContainer> container;
+ protected Driver neo4jDriver;
+ protected Session neo4jSession;
+ private static final int CHANNEL = 5;
+ private static final int READER_NUM = 10;
+
+ @Before
+ public void init() {
+ DockerImageName imageName = DockerImageName.parse(CONTAINER_IMAGE);
+ container =
+ new GenericContainer<>(imageName)
+ .withNetwork(NETWORK)
+ .withNetworkAliases(CONTAINER_HOST)
+ .withExposedPorts(HTTP_PORT, BOLT_PORT)
+ .withEnv(
+ "NEO4J_AUTH",
+ CONTAINER_NEO4J_USERNAME + "/" + CONTAINER_NEO4J_PASSWORD)
+ .withEnv("apoc.export.file.enabled", "true")
+ .withEnv("apoc.import.file.enabled", "true")
+ .withEnv("apoc.import.file.use_neo4j_config", "true")
+ .withEnv("NEO4J_PLUGINS", "[\"apoc\"]")
+ .withLogConsumer(
+ new Slf4jLogConsumer(
+ DockerLoggerFactory.getLogger(CONTAINER_IMAGE)));
+ container.setPortBindings(
+ Arrays.asList(
+ String.format("%s:%s", HTTP_PORT, HTTP_PORT),
+ String.format("%s:%s", BOLT_PORT, BOLT_PORT)));
+ Startables.deepStart(Stream.of(container)).join();
+ LOGGER.info("container started");
+ Awaitility.given()
+ .ignoreExceptions()
+ .await()
+ .atMost(30, TimeUnit.SECONDS)
+ .untilAsserted(this::initConnection);
+ }
+
+ //在neo4jWriter模块使用Example测试整个job,方便发现整个流程的代码问题
+ @Test
+ public void streamReader2Neo4j() {
+
+ deleteHistoryIfExist();
+
+ String path = "/streamreader2neo4j.json";
+ String jobPath = PathUtil.getAbsolutePathFromClassPath(path);
+
+ ExampleContainer.start(jobPath);
+
+ //根据channel和reader的mock数据,校验结果集是否符合预期
+ verifyWriteResult();
+ }
+
+ private void deleteHistoryIfExist() {
+ String query = "match (n:StreamReader) return n limit 1";
+ String delete = "match (n:StreamReader) delete n";
+ if (neo4jSession.run(query).hasNext()) {
+ neo4jSession.run(delete);
+ }
+ }
+
+ private void verifyWriteResult() {
+ int total = CHANNEL * READER_NUM;
+ String query = "match (n:StreamReader) return n";
+ Result run = neo4jSession.run(query);
+ int count = 0;
+ while (run.hasNext()) {
+ Record record = run.next();
+ Node node = record.get("n").asNode();
+ if (node.hasLabel("StreamReader")) {
+ count++;
+ }
+ }
+ Assert.assertEquals(count, total);
+ }
+ @After
+ public void destroy() {
+ if (neo4jSession != null) {
+ neo4jSession.close();
+ }
+ if (neo4jDriver != null) {
+ neo4jDriver.close();
+ }
+ if (container != null) {
+ container.close();
+ }
+ }
+
+ private void initConnection() {
+ neo4jDriver =
+ GraphDatabase.driver(
+ CONTAINER_URI,
+ AuthTokens.basic(CONTAINER_NEO4J_USERNAME, CONTAINER_NEO4J_PASSWORD));
+ neo4jSession = neo4jDriver.session(SessionConfig.forDatabase("neo4j"));
+ }
+}
diff --git a/datax-example/datax-example-neo4j/src/test/resources/streamreader2neo4j.json b/datax-example/datax-example-neo4j/src/test/resources/streamreader2neo4j.json
new file mode 100644
index 00000000..3d543ce3
--- /dev/null
+++ b/datax-example/datax-example-neo4j/src/test/resources/streamreader2neo4j.json
@@ -0,0 +1,51 @@
+{
+ "job": {
+ "content": [
+ {
+ "reader": {
+ "name": "streamreader",
+ "parameter": {
+ "sliceRecordCount": 10,
+ "column": [
+ {
+ "type": "string",
+ "value": "StreamReader"
+ },
+ {
+ "type": "string",
+ "value": "1997"
+ }
+ ]
+ }
+ },
+ "writer": {
+ "name": "neo4jWriter",
+ "parameter": {
+ "uri": "bolt://localhost:7687",
+ "username":"neo4j",
+ "password":"Test@12343",
+ "database":"neo4j",
+ "cypher": "unwind $batch as row CALL apoc.cypher.doIt( 'create (n:`' + row.Label + '`{id:$id})' ,{id: row.id} ) YIELD value RETURN 1 ",
+ "batchDataVariableName": "batch",
+ "batchSize": "3",
+ "properties": [
+ {
+ "name": "Label",
+ "type": "string"
+ },
+ {
+ "name": "id",
+ "type": "STRING"
+ }
+ ]
+ }
+ }
+ }
+ ],
+ "setting": {
+ "speed": {
+ "channel": 5
+ }
+ }
+ }
+}
\ No newline at end of file
diff --git a/datax-example/datax-example-streamreader/pom.xml b/datax-example/datax-example-streamreader/pom.xml
new file mode 100644
index 00000000..ea70de10
--- /dev/null
+++ b/datax-example/datax-example-streamreader/pom.xml
@@ -0,0 +1,37 @@
+
+
+ 4.0.0
+
+ com.alibaba.datax
+ datax-example
+ 0.0.1-SNAPSHOT
+
+
+ datax-example-streamreader
+
+
+ 8
+ 8
+ UTF-8
+
+
+
+ com.alibaba.datax
+ datax-example-core
+ 0.0.1-SNAPSHOT
+
+
+ com.alibaba.datax
+ streamreader
+ 0.0.1-SNAPSHOT
+
+
+ com.alibaba.datax
+ streamwriter
+ 0.0.1-SNAPSHOT
+
+
+
+
\ No newline at end of file
diff --git a/datax-example/datax-example-streamreader/src/test/java/com/alibaba/datax/example/streamreader/StreamReader2StreamWriterTest.java b/datax-example/datax-example-streamreader/src/test/java/com/alibaba/datax/example/streamreader/StreamReader2StreamWriterTest.java
new file mode 100644
index 00000000..71d083d0
--- /dev/null
+++ b/datax-example/datax-example-streamreader/src/test/java/com/alibaba/datax/example/streamreader/StreamReader2StreamWriterTest.java
@@ -0,0 +1,19 @@
+package com.alibaba.datax.example.streamreader;
+
+import com.alibaba.datax.example.ExampleContainer;
+import com.alibaba.datax.example.util.PathUtil;
+import org.junit.Test;
+
+/**
+ * {@code Author} FuYouJ
+ * {@code Date} 2023/8/14 20:16
+ */
+
+public class StreamReader2StreamWriterTest {
+ @Test
+ public void testStreamReader2StreamWriter() {
+ String path = "/stream2stream.json";
+ String jobPath = PathUtil.getAbsolutePathFromClassPath(path);
+ ExampleContainer.start(jobPath);
+ }
+}
diff --git a/datax-example/datax-example-streamreader/src/test/resources/stream2stream.json b/datax-example/datax-example-streamreader/src/test/resources/stream2stream.json
new file mode 100644
index 00000000..b2a57395
--- /dev/null
+++ b/datax-example/datax-example-streamreader/src/test/resources/stream2stream.json
@@ -0,0 +1,36 @@
+{
+ "job": {
+ "content": [
+ {
+ "reader": {
+ "name": "streamreader",
+ "parameter": {
+ "sliceRecordCount": 10,
+ "column": [
+ {
+ "type": "long",
+ "value": "10"
+ },
+ {
+ "type": "string",
+ "value": "hello,你好,世界-DataX"
+ }
+ ]
+ }
+ },
+ "writer": {
+ "name": "streamwriter",
+ "parameter": {
+ "encoding": "UTF-8",
+ "print": true
+ }
+ }
+ }
+ ],
+ "setting": {
+ "speed": {
+ "channel": 5
+ }
+ }
+ }
+}
\ No newline at end of file
diff --git a/datax-example/doc/README.md b/datax-example/doc/README.md
new file mode 100644
index 00000000..15f77e87
--- /dev/null
+++ b/datax-example/doc/README.md
@@ -0,0 +1,107 @@
+## [DataX-Example]调试datax插件的模块
+
+### 为什么要开发这个模块
+
+一般使用DataX启动数据同步任务是从datax.py 脚本开始,获取程序datax包目录设置到系统变量datax.home里,此后系统核心插件的加载,配置初始化均依赖于变量datax.home,这带来了一些麻烦,以一次本地 DeBug streamreader 插件为例。
+
+- maven 打包 datax 生成 datax 目录
+- 在 IDE 中 设置系统环境变量 datax.home,或者在Engine启动类中硬编码设置datax.home。
+- 修改插件 streamreader 代码
+- 再次 maven 打包,使JarLoader 能够加载到最新的 streamreader 代码。
+- 调试代码
+
+在以上步骤中,打包完全不必要且最耗时,等待打包也最煎熬。
+
+所以我编写一个新的模块(datax-example),此模块特用于本地调试和复现 BUG。如果模块顺利编写完成,那么以上流程将被简化至两步。
+
+- 修改插件 streamreader 代码。
+- 调试代码
+
+
+
+### 目录结构
+该目录结构演示了如何使用datax-example-core编写测试用例,和校验代码流程。
+
+
+### 实现原理
+
+- 不修改原有的ConfigParer,使用新的ExampleConfigParser,仅用于example模块。他不依赖datax.home,而是依赖ide编译后的target目录
+- 将ide的target目录作为每个插件的目录类加载目录。
+
+
+
+### 如何使用
+1.修改插件的pom文件,做如下改动。以streamreader为例。
+改动前
+```xml
+
+
+
+
+ maven-compiler-plugin
+
+ ${jdk-version}
+ ${jdk-version}
+ ${project-sourceEncoding}
+
+
+
+
+```
+改动后
+```xml
+
+
+
+
+ src/main/resources
+
+ **/*.*
+
+ true
+
+
+
+
+
+ maven-compiler-plugin
+
+ ${jdk-version}
+ ${jdk-version}
+ ${project-sourceEncoding}
+
+
+
+
+```
+#### 在测试模块模块使用
+参考datax-example/datax-example-streamreader的StreamReader2StreamWriterTest.java
+```java
+public class StreamReader2StreamWriterTest {
+ @Test
+ public void testStreamReader2StreamWriter() {
+ String path = "/stream2stream.json";
+ String jobPath = PathUtil.getAbsolutePathFromClassPath(path);
+ ExampleContainer.start(jobPath);
+ }
+}
+
+```
+参考datax-example/datax-example-neo4j的StreamReader2Neo4jWriterTest
+```java
+public class StreamReader2Neo4jWriterTest{
+@Test
+ public void streamReader2Neo4j() {
+
+ deleteHistoryIfExist();
+
+ String path = "/streamreader2neo4j.json";
+ String jobPath = PathUtil.getAbsolutePathFromClassPath(path);
+
+ ExampleContainer.start(jobPath);
+
+ //根据channel和reader的mock数据,校验结果集是否符合预期
+ verifyWriteResult();
+ }
+}
+```
\ No newline at end of file
diff --git a/datax-example/doc/img/img01.png b/datax-example/doc/img/img01.png
new file mode 100644
index 00000000..d0431c1a
Binary files /dev/null and b/datax-example/doc/img/img01.png differ
diff --git a/datax-example/doc/img/img02.png b/datax-example/doc/img/img02.png
new file mode 100644
index 00000000..eec860d4
Binary files /dev/null and b/datax-example/doc/img/img02.png differ
diff --git a/datax-example/doc/img/img03.png b/datax-example/doc/img/img03.png
new file mode 100644
index 00000000..731f81bd
Binary files /dev/null and b/datax-example/doc/img/img03.png differ
diff --git a/datax-example/pom.xml b/datax-example/pom.xml
new file mode 100644
index 00000000..9c4c9200
--- /dev/null
+++ b/datax-example/pom.xml
@@ -0,0 +1,68 @@
+
+
+ 4.0.0
+
+ com.alibaba.datax
+ datax-all
+ 0.0.1-SNAPSHOT
+
+
+ datax-example
+ pom
+
+ datax-example-core
+ datax-example-streamreader
+ datax-example-neo4j
+
+
+
+ 8
+ 8
+ UTF-8
+ 4.13.2
+
+
+
+ com.alibaba.datax
+ datax-common
+ 0.0.1-SNAPSHOT
+
+
+ com.alibaba.datax
+ datax-core
+ 0.0.1-SNAPSHOT
+
+
+ junit
+ junit
+ ${junit4.version}
+ test
+
+
+
+
+
+
+ src/main/resources
+
+ **/*.*
+
+ true
+
+
+
+
+
+ maven-compiler-plugin
+
+ ${jdk-version}
+ ${jdk-version}
+ ${project-sourceEncoding}
+
+
+
+
+
+
\ No newline at end of file
diff --git a/dataxPluginDev.md b/dataxPluginDev.md
index 4483f270..8c7241bf 100644
--- a/dataxPluginDev.md
+++ b/dataxPluginDev.md
@@ -447,6 +447,9 @@ DataX的内部类型在实现上会选用不同的java类型:
3. 用户在插件中在`reader`/`writer`配置的`name`字段指定插件名字。框架根据插件的类型(`reader`/`writer`)和插件名称去插件的路径下扫描所有的jar,加入`classpath`。
4. 根据插件配置中定义的入口类,框架通过反射实例化对应的`Job`和`Task`对象。
+### 编写测试用例
+1. 在datax-example工程下新建新的插件测试模块,调用`ExampleContainer.start(jobPath)`方法来检测你的代码逻辑是否正确。[datax-example使用](https://github.com/alibaba/DataX/blob/master/datax-example/doc/README.md)
+
## 三、Last but not Least
diff --git a/ftpreader/pom.xml b/ftpreader/pom.xml
index 7778d491..57bf889d 100755
--- a/ftpreader/pom.xml
+++ b/ftpreader/pom.xml
@@ -45,7 +45,7 @@
com.jcraft
jsch
- 0.1.51
+ 0.1.54
commons-net
@@ -89,4 +89,4 @@
-
+
diff --git a/ftpreader/src/main/java/com/alibaba/datax/plugin/reader/ftpreader/SftpHelper.java b/ftpreader/src/main/java/com/alibaba/datax/plugin/reader/ftpreader/SftpHelper.java
index d25b040c..6e42e10c 100644
--- a/ftpreader/src/main/java/com/alibaba/datax/plugin/reader/ftpreader/SftpHelper.java
+++ b/ftpreader/src/main/java/com/alibaba/datax/plugin/reader/ftpreader/SftpHelper.java
@@ -64,6 +64,8 @@ public class SftpHelper extends FtpHelper {
String message = String.format("请确认连接ftp服务器端口是否正确,错误的端口: [%s] ", port);
LOG.error(message);
throw DataXException.asDataXException(FtpReaderErrorCode.FAIL_LOGIN, message, e);
+ }else{
+ throw DataXException.asDataXException(FtpReaderErrorCode.COMMAND_FTP_IO_EXCEPTION, "", e);
}
}else {
if("Auth fail".equals(e.getMessage())){
diff --git a/ftpwriter/pom.xml b/ftpwriter/pom.xml
index 69ec4a07..bf7ce83d 100644
--- a/ftpwriter/pom.xml
+++ b/ftpwriter/pom.xml
@@ -45,7 +45,7 @@
com.jcraft
jsch
- 0.1.51
+ 0.1.54
commons-net
diff --git a/gaussdbreader/doc/gaussdbreader.md b/gaussdbreader/doc/gaussdbreader.md
new file mode 100644
index 00000000..5caa4d59
--- /dev/null
+++ b/gaussdbreader/doc/gaussdbreader.md
@@ -0,0 +1,297 @@
+
+# GaussDbReader 插件文档
+
+
+___
+
+
+## 1 快速介绍
+
+GaussDbReader插件实现了从GaussDB读取数据。在底层实现上,GaussDbReader通过JDBC连接远程GaussDB数据库,并执行相应的sql语句将数据从GaussDB库中SELECT出来。
+
+## 2 实现原理
+
+简而言之,GaussDbReader通过JDBC连接器连接到远程的GaussDB数据库,并根据用户配置的信息生成查询SELECT SQL语句并发送到远程GaussDB数据库,并将该SQL执行返回结果使用DataX自定义的数据类型拼装为抽象的数据集,并传递给下游Writer处理。
+
+对于用户配置Table、Column、Where的信息,GaussDbReader将其拼接为SQL语句发送到GaussDB数据库;对于用户配置querySql信息,GaussDbReader直接将其发送到GaussDB数据库。
+
+
+## 3 功能说明
+
+### 3.1 配置样例
+
+* 配置一个从GaussDB数据库同步抽取数据到本地的作业:
+
+```
+{
+ "job": {
+ "setting": {
+ "speed": {
+ //设置传输速度,单位为byte/s,DataX运行会尽可能达到该速度但是不超过它.
+ "byte": 1048576
+ },
+ //出错限制
+ "errorLimit": {
+ //出错的record条数上限,当大于该值即报错。
+ "record": 0,
+ //出错的record百分比上限 1.0表示100%,0.02表示2%
+ "percentage": 0.02
+ }
+ },
+ "content": [
+ {
+ "reader": {
+ "name": "gaussdbreader",
+ "parameter": {
+ // 数据库连接用户名
+ "username": "xx",
+ // 数据库连接密码
+ "password": "xx",
+ "column": [
+ "id","name"
+ ],
+ //切分主键
+ "splitPk": "id",
+ "connection": [
+ {
+ "table": [
+ "table"
+ ],
+ "jdbcUrl": [
+ "jdbc:opengauss://host:port/database"
+ ]
+ }
+ ]
+ }
+ },
+ "writer": {
+ //writer类型
+ "name": "streamwriter",
+ //是否打印内容
+ "parameter": {
+ "print":true,
+ }
+ }
+ }
+ ]
+ }
+}
+
+```
+
+* 配置一个自定义SQL的数据库同步任务到本地内容的作业:
+
+```json
+{
+ "job": {
+ "setting": {
+ "speed": 1048576
+ },
+ "content": [
+ {
+ "reader": {
+ "name": "gaussdbreader",
+ "parameter": {
+ "username": "xx",
+ "password": "xx",
+ "where": "",
+ "connection": [
+ {
+ "querySql": [
+ "select db_id,on_line_flag from db_info where db_id < 10;"
+ ],
+ "jdbcUrl": [
+ "jdbc:opengauss://host:port/database", "jdbc:opengauss://host:port/database"
+ ]
+ }
+ ]
+ }
+ },
+ "writer": {
+ "name": "streamwriter",
+ "parameter": {
+ "print": false,
+ "encoding": "UTF-8"
+ }
+ }
+ }
+ ]
+ }
+}
+```
+
+
+### 3.2 参数说明
+
+* **jdbcUrl**
+
+ * 描述:描述的是到对端数据库的JDBC连接信息,使用JSON的数组描述,并支持一个库填写多个连接地址。之所以使用JSON数组描述连接信息,是因为阿里集团内部支持多个IP探测,如果配置了多个,GaussDbReader可以依次探测ip的可连接性,直到选择一个合法的IP。如果全部连接失败,GaussDbReader报错。 注意,jdbcUrl必须包含在connection配置单元中。对于阿里集团外部使用情况,JSON数组填写一个JDBC连接即可。
+
+ jdbcUrl按照GaussDB官方规范,并可以填写连接附件控制信息。具体请参看[GaussDB官方文档](https://docs.opengauss.org/zh/docs/3.1.0/docs/Developerguide/java-sql-Connection.html)。
+
+ * 必选:是
+
+ * 默认值:无
+
+* **username**
+
+ * 描述:数据源的用户名
+
+ * 必选:是
+
+ * 默认值:无
+
+* **password**
+
+ * 描述:数据源指定用户名的密码
+
+ * 必选:是
+
+ * 默认值:无
+
+* **table**
+
+ * 描述:所选取的需要同步的表。使用JSON的数组描述,因此支持多张表同时抽取。当配置为多张表时,用户自己需保证多张表是同一schema结构,GaussDbReader不予检查表是否同一逻辑表。注意,table必须包含在connection配置单元中。
+
+ * 必选:是
+
+ * 默认值:无
+
+* **column**
+
+ * 描述:所配置的表中需要同步的列名集合,使用JSON的数组描述字段信息。用户使用\*代表默认使用所有列配置,例如['\*']。
+
+ 支持列裁剪,即列可以挑选部分列进行导出。
+
+ 支持列换序,即列可以不按照表schema信息进行导出。
+
+ 支持常量配置,用户需要按照GaussDB语法格式:
+ ["id", "'hello'::varchar", "true", "2.5::real", "power(2,3)"]
+ id为普通列名,'hello'::varchar为字符串常量,true为布尔值,2.5为浮点数, power(2,3)为函数。
+
+ **column必须用户显示指定同步的列集合,不允许为空!**
+
+ * 必选:是
+
+ * 默认值:无
+
+* **splitPk**
+
+ * 描述:GaussDbReader进行数据抽取时,如果指定splitPk,表示用户希望使用splitPk代表的字段进行数据分片,DataX因此会启动并发任务进行数据同步,这样可以大大提高数据同步的效能。
+
+ 推荐splitPk用户使用表主键,因为表主键通常情况下比较均匀,因此切分出来的分片也不容易出现数据热点。
+
+ 目前splitPk仅支持整形数据切分,`不支持浮点、字符串型、日期等其他类型`。如果用户指定其他非支持类型,GaussDbReader将报错!
+
+ splitPk设置为空,底层将视作用户不允许对单表进行切分,因此使用单通道进行抽取。
+
+ * 必选:否
+
+ * 默认值:空
+
+* **where**
+
+ * 描述:筛选条件,GaussDbReader根据指定的column、table、where条件拼接SQL,并根据这个SQL进行数据抽取。在实际业务场景中,往往会选择当天的数据进行同步,可以将where条件指定为gmt_create > $bizdate 。注意:不可以将where条件指定为limit 10,limit不是SQL的合法where子句。
+
+ where条件可以有效地进行业务增量同步。 where条件不配置或者为空,视作全表同步数据。
+
+ * 必选:否
+
+ * 默认值:无
+
+* **querySql**
+
+ * 描述:在有些业务场景下,where这一配置项不足以描述所筛选的条件,用户可以通过该配置型来自定义筛选SQL。当用户配置了这一项之后,DataX系统就会忽略table,column这些配置型,直接使用这个配置项的内容对数据进行筛选,例如需要进行多表join后同步数据,使用select a,b from table_a join table_b on table_a.id = table_b.id
+
+ `当用户配置querySql时,GaussDbReader直接忽略table、column、where条件的配置`。
+
+ * 必选:否
+
+ * 默认值:无
+
+* **fetchSize**
+
+ * 描述:该配置项定义了插件和数据库服务器端每次批量数据获取条数,该值决定了DataX和服务器端的网络交互次数,能够较大的提升数据抽取性能。
+
+ `注意,该值过大(>2048)可能造成DataX进程OOM。`。
+
+ * 必选:否
+
+ * 默认值:1024
+
+
+### 3.3 类型转换
+
+目前GaussDbReader支持大部分GaussDB类型,但也存在部分个别类型没有支持的情况,请注意检查你的类型。
+
+下面列出GaussDbReader针对GaussDB类型转换列表:
+
+
+| DataX 内部类型| GaussDB 数据类型 |
+| -------- | ----- |
+| Long |bigint, bigserial, integer, smallint, serial |
+| Double |double precision, money, numeric, real |
+| String |varchar, char, text, bit, inet|
+| Date |date, time, timestamp |
+| Boolean |bool|
+| Bytes |bytea|
+
+请注意:
+
+* `除上述罗列字段类型外,其他类型均不支持; money,inet,bit需用户使用a_inet::varchar类似的语法转换`。
+
+## 4 性能报告
+
+### 4.1 环境准备
+
+#### 4.1.1 数据特征
+建表语句:
+
+create table pref_test(
+ id serial,
+ a_bigint bigint,
+ a_bit bit(10),
+ a_boolean boolean,
+ a_char character(5),
+ a_date date,
+ a_double double precision,
+ a_integer integer,
+ a_money money,
+ a_num numeric(10,2),
+ a_real real,
+ a_smallint smallint,
+ a_text text,
+ a_time time,
+ a_timestamp timestamp
+)
+
+#### 4.1.2 机器参数
+
+* 执行DataX的机器参数为:
+ 1. cpu: 16核 Intel(R) Xeon(R) CPU E5620 @ 2.40GHz
+ 2. mem: MemTotal: 24676836kB MemFree: 6365080kB
+ 3. net: 百兆双网卡
+
+* GaussDB数据库机器参数为:
+ D12 24逻辑核 192G内存 12*480G SSD 阵列
+
+
+### 4.2 测试报告
+
+#### 4.2.1 单表测试报告
+
+
+| 通道数 | 是否按照主键切分 | DataX速度(Rec/s) | DataX流量(MB/s) | DataX机器运行负载 |
+|--------|--------| --------|--------|--------|
+|1| 否 | 10211 | 0.63 | 0.2 |
+|1| 是 | 10211 | 0.63 | 0.2 |
+|4| 否 | 10211 | 0.63 | 0.2 |
+|4| 是 | 40000 | 2.48 | 0.5 |
+|8| 否 | 10211 | 0.63 | 0.2 |
+|8| 是 | 78048 | 4.84 | 0.8 |
+
+
+说明:
+
+1. 这里的单表,主键类型为 serial,数据分布均匀。
+2. 对单表如果没有按照主键切分,那么配置通道个数不会提升速度,效果与1个通道一样。
diff --git a/gaussdbreader/pom.xml b/gaussdbreader/pom.xml
new file mode 100644
index 00000000..ad2e0ba0
--- /dev/null
+++ b/gaussdbreader/pom.xml
@@ -0,0 +1,86 @@
+
+
+
+ datax-all
+ com.alibaba.datax
+ 0.0.1-SNAPSHOT
+
+ 4.0.0
+
+ gaussdbreader
+ gaussdbreader
+ jar
+
+
+
+ com.alibaba.datax
+ datax-common
+ ${datax-project-version}
+
+
+ slf4j-log4j12
+ org.slf4j
+
+
+
+
+
+ org.slf4j
+ slf4j-api
+
+
+
+ ch.qos.logback
+ logback-classic
+
+
+
+ com.alibaba.datax
+ plugin-rdbms-util
+ ${datax-project-version}
+
+
+
+ org.opengauss
+ opengauss-jdbc
+ 3.0.0
+
+
+
+
+
+
+
+
+ maven-compiler-plugin
+
+ ${jdk-version}
+ ${jdk-version}
+ ${project-sourceEncoding}
+
+
+
+
+ maven-assembly-plugin
+
+
+ src/main/assembly/package.xml
+
+ datax
+
+
+
+ dwzip
+ package
+
+ single
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/gaussdbreader/src/main/assembly/package.xml b/gaussdbreader/src/main/assembly/package.xml
new file mode 100755
index 00000000..65601e45
--- /dev/null
+++ b/gaussdbreader/src/main/assembly/package.xml
@@ -0,0 +1,35 @@
+
+
+
+ dir
+
+ false
+
+
+ src/main/resources
+
+ plugin.json
+ plugin_job_template.json
+
+ plugin/reader/gaussdbreader
+
+
+ target/
+
+ gaussdbreader-0.0.1-SNAPSHOT.jar
+
+ plugin/reader/gaussdbreader
+
+
+
+
+
+ false
+ plugin/reader/gaussdbreader/libs
+ runtime
+
+
+
diff --git a/gaussdbreader/src/main/java/com/alibaba/datax/plugin/reader/gaussdbreader/Constant.java b/gaussdbreader/src/main/java/com/alibaba/datax/plugin/reader/gaussdbreader/Constant.java
new file mode 100644
index 00000000..33cdd309
--- /dev/null
+++ b/gaussdbreader/src/main/java/com/alibaba/datax/plugin/reader/gaussdbreader/Constant.java
@@ -0,0 +1,7 @@
+package com.alibaba.datax.plugin.reader.gaussdbreader;
+
+public class Constant {
+
+ public static final int DEFAULT_FETCH_SIZE = 1000;
+
+}
diff --git a/gaussdbreader/src/main/java/com/alibaba/datax/plugin/reader/gaussdbreader/GaussDbReader.java b/gaussdbreader/src/main/java/com/alibaba/datax/plugin/reader/gaussdbreader/GaussDbReader.java
new file mode 100644
index 00000000..ca158ab7
--- /dev/null
+++ b/gaussdbreader/src/main/java/com/alibaba/datax/plugin/reader/gaussdbreader/GaussDbReader.java
@@ -0,0 +1,86 @@
+package com.alibaba.datax.plugin.reader.gaussdbreader;
+
+import com.alibaba.datax.common.exception.DataXException;
+import com.alibaba.datax.common.plugin.RecordSender;
+import com.alibaba.datax.common.spi.Reader;
+import com.alibaba.datax.common.util.Configuration;
+import com.alibaba.datax.plugin.rdbms.reader.CommonRdbmsReader;
+import com.alibaba.datax.plugin.rdbms.util.DBUtilErrorCode;
+import com.alibaba.datax.plugin.rdbms.util.DataBaseType;
+
+import java.util.List;
+
+public class GaussDbReader extends Reader {
+
+ private static final DataBaseType DATABASE_TYPE = DataBaseType.GaussDB;
+
+ public static class Job extends Reader.Job {
+
+ private Configuration originalConfig;
+ private CommonRdbmsReader.Job commonRdbmsReaderMaster;
+
+ @Override
+ public void init() {
+ this.originalConfig = super.getPluginJobConf();
+ int fetchSize = this.originalConfig.getInt(com.alibaba.datax.plugin.rdbms.reader.Constant.FETCH_SIZE,
+ Constant.DEFAULT_FETCH_SIZE);
+ if (fetchSize < 1) {
+ throw DataXException.asDataXException(DBUtilErrorCode.REQUIRED_VALUE,
+ String.format("您配置的fetchSize有误,根据DataX的设计,fetchSize : [%d] 设置值不能小于 1.", fetchSize));
+ }
+ this.originalConfig.set(com.alibaba.datax.plugin.rdbms.reader.Constant.FETCH_SIZE, fetchSize);
+
+ this.commonRdbmsReaderMaster = new CommonRdbmsReader.Job(DATABASE_TYPE);
+ this.commonRdbmsReaderMaster.init(this.originalConfig);
+ }
+
+ @Override
+ public List split(int adviceNumber) {
+ return this.commonRdbmsReaderMaster.split(this.originalConfig, adviceNumber);
+ }
+
+ @Override
+ public void post() {
+ this.commonRdbmsReaderMaster.post(this.originalConfig);
+ }
+
+ @Override
+ public void destroy() {
+ this.commonRdbmsReaderMaster.destroy(this.originalConfig);
+ }
+
+ }
+
+ public static class Task extends Reader.Task {
+
+ private Configuration readerSliceConfig;
+ private CommonRdbmsReader.Task commonRdbmsReaderSlave;
+
+ @Override
+ public void init() {
+ this.readerSliceConfig = super.getPluginJobConf();
+ this.commonRdbmsReaderSlave = new CommonRdbmsReader.Task(DATABASE_TYPE,super.getTaskGroupId(), super.getTaskId());
+ this.commonRdbmsReaderSlave.init(this.readerSliceConfig);
+ }
+
+ @Override
+ public void startRead(RecordSender recordSender) {
+ int fetchSize = this.readerSliceConfig.getInt(com.alibaba.datax.plugin.rdbms.reader.Constant.FETCH_SIZE);
+
+ this.commonRdbmsReaderSlave.startRead(this.readerSliceConfig, recordSender,
+ super.getTaskPluginCollector(), fetchSize);
+ }
+
+ @Override
+ public void post() {
+ this.commonRdbmsReaderSlave.post(this.readerSliceConfig);
+ }
+
+ @Override
+ public void destroy() {
+ this.commonRdbmsReaderSlave.destroy(this.readerSliceConfig);
+ }
+
+ }
+
+}
diff --git a/gaussdbreader/src/main/resources/plugin.json b/gaussdbreader/src/main/resources/plugin.json
new file mode 100755
index 00000000..7d4ac8de
--- /dev/null
+++ b/gaussdbreader/src/main/resources/plugin.json
@@ -0,0 +1,6 @@
+{
+ "name": "gaussdbreader",
+ "class": "com.alibaba.datax.plugin.reader.gaussdbreader.GaussDbReader",
+ "description": "useScene: prod. mechanism: Jdbc connection using the database, execute select sql, retrieve data from the ResultSet. warn: The more you know about the database, the less problems you encounter.",
+ "developer": "alibaba"
+}
\ No newline at end of file
diff --git a/gaussdbreader/src/main/resources/plugin_job_template.json b/gaussdbreader/src/main/resources/plugin_job_template.json
new file mode 100644
index 00000000..e39220eb
--- /dev/null
+++ b/gaussdbreader/src/main/resources/plugin_job_template.json
@@ -0,0 +1,13 @@
+{
+ "name": "gaussdbreader",
+ "parameter": {
+ "username": "",
+ "password": "",
+ "connection": [
+ {
+ "table": [],
+ "jdbcUrl": []
+ }
+ ]
+ }
+}
\ No newline at end of file
diff --git a/gaussdbwriter/doc/gaussdbwriter.md b/gaussdbwriter/doc/gaussdbwriter.md
new file mode 100644
index 00000000..e65b74d3
--- /dev/null
+++ b/gaussdbwriter/doc/gaussdbwriter.md
@@ -0,0 +1,267 @@
+# DataX GaussDbWriter
+
+
+---
+
+
+## 1 快速介绍
+
+GaussDbWriter插件实现了写入数据到 GaussDB主库目的表的功能。在底层实现上,GaussDbWriter通过JDBC连接远程 GaussDB 数据库,并执行相应的 insert into ... sql 语句将数据写入 GaussDB,内部会分批次提交入库。
+
+GaussDbWriter面向ETL开发工程师,他们使用GaussDbWriter从数仓导入数据到GaussDB。同时 GaussDbWriter亦可以作为数据迁移工具为DBA等用户提供服务。
+
+
+## 2 实现原理
+
+GaussDbWriter通过 DataX 框架获取 Reader 生成的协议数据,根据你配置生成相应的SQL插入语句
+
+
+* `insert into...`(当主键/唯一性索引冲突时会写不进去冲突的行)
+
+
+
+ 注意:
+ 1. 目的表所在数据库必须是主库才能写入数据;整个任务至少需具备 insert into...的权限,是否需要其他权限,取决于你任务配置中在 preSql 和 postSql 中指定的语句。
+ 2. GaussDbWriter和MysqlWriter不同,不支持配置writeMode参数。
+
+
+## 3 功能说明
+
+### 3.1 配置样例
+
+* 这里使用一份从内存产生到 GaussDbWriter导入的数据。
+
+```json
+{
+ "job": {
+ "setting": {
+ "speed": {
+ "channel": 1
+ }
+ },
+ "content": [
+ {
+ "reader": {
+ "name": "streamreader",
+ "parameter": {
+ "column" : [
+ {
+ "value": "DataX",
+ "type": "string"
+ },
+ {
+ "value": 19880808,
+ "type": "long"
+ },
+ {
+ "value": "1988-08-08 08:08:08",
+ "type": "date"
+ },
+ {
+ "value": true,
+ "type": "bool"
+ },
+ {
+ "value": "test",
+ "type": "bytes"
+ }
+ ],
+ "sliceRecordCount": 1000
+ }
+ },
+ "writer": {
+ "name": "gaussdbwriter",
+ "parameter": {
+ "username": "xx",
+ "password": "xx",
+ "column": [
+ "id",
+ "name"
+ ],
+ "preSql": [
+ "delete from test"
+ ],
+ "connection": [
+ {
+ "jdbcUrl": "jdbc:opengauss://127.0.0.1:3002/datax",
+ "table": [
+ "test"
+ ]
+ }
+ ]
+ }
+ }
+ }
+ ]
+ }
+}
+
+```
+
+
+### 3.2 参数说明
+
+* **jdbcUrl**
+
+ * 描述:目的数据库的 JDBC 连接信息 ,jdbcUrl必须包含在connection配置单元中。
+
+ 注意:1、在一个数据库上只能配置一个值。
+ 2、jdbcUrl按照GaussDB官方规范,并可以填写连接附加参数信息。具体请参看GaussDB官方文档或者咨询对应 DBA。
+
+
+* 必选:是
+
+* 默认值:无
+
+* **username**
+
+ * 描述:目的数据库的用户名
+
+ * 必选:是
+
+ * 默认值:无
+
+* **password**
+
+ * 描述:目的数据库的密码
+
+ * 必选:是
+
+ * 默认值:无
+
+* **table**
+
+ * 描述:目的表的表名称。支持写入一个或者多个表。当配置为多张表时,必须确保所有表结构保持一致。
+
+ 注意:table 和 jdbcUrl 必须包含在 connection 配置单元中
+
+ * 必选:是
+
+ * 默认值:无
+
+* **column**
+
+ * 描述:目的表需要写入数据的字段,字段之间用英文逗号分隔。例如: "column": ["id","name","age"]。如果要依次写入全部列,使用\*表示, 例如: "column": ["\*"]
+
+ 注意:1、我们强烈不推荐你这样配置,因为当你目的表字段个数、类型等有改动时,你的任务可能运行不正确或者失败
+ 2、此处 column 不能配置任何常量值
+
+ * 必选:是
+
+ * 默认值:否
+
+* **preSql**
+
+ * 描述:写入数据到目的表前,会先执行这里的标准语句。如果 Sql 中有你需要操作到的表名称,请使用 `@table` 表示,这样在实际执行 Sql 语句时,会对变量按照实际表名称进行替换。比如你的任务是要写入到目的端的100个同构分表(表名称为:datax_00,datax01, ... datax_98,datax_99),并且你希望导入数据前,先对表中数据进行删除操作,那么你可以这样配置:`"preSql":["delete from @table"]`,效果是:在执行到每个表写入数据前,会先执行对应的 delete from 对应表名称
+
+ * 必选:否
+
+ * 默认值:无
+
+* **postSql**
+
+ * 描述:写入数据到目的表后,会执行这里的标准语句。(原理同 preSql )
+
+ * 必选:否
+
+ * 默认值:无
+
+* **batchSize**
+
+ * 描述:一次性批量提交的记录数大小,该值可以极大减少DataX与GaussDB的网络交互次数,并提升整体吞吐量。但是该值设置过大可能会造成DataX运行进程OOM情况。
+
+ * 必选:否
+
+ * 默认值:1024
+
+### 3.3 类型转换
+
+目前 GaussDbWriter支持大部分 GaussDB类型,但也存在部分没有支持的情况,请注意检查你的类型。
+
+下面列出 GaussDbWriter针对 GaussDB类型转换列表:
+
+| DataX 内部类型| GaussDB 数据类型 |
+| -------- | ----- |
+| Long |bigint, bigserial, integer, smallint, serial |
+| Double |double precision, money, numeric, real |
+| String |varchar, char, text, bit|
+| Date |date, time, timestamp |
+| Boolean |bool|
+| Bytes |bytea|
+
+## 4 性能报告
+
+### 4.1 环境准备
+
+#### 4.1.1 数据特征
+建表语句:
+
+create table pref_test(
+id serial,
+a_bigint bigint,
+a_bit bit(10),
+a_boolean boolean,
+a_char character(5),
+a_date date,
+a_double double precision,
+a_integer integer,
+a_money money,
+a_num numeric(10,2),
+a_real real,
+a_smallint smallint,
+a_text text,
+a_time time,
+a_timestamp timestamp
+)
+
+#### 4.1.2 机器参数
+
+* 执行DataX的机器参数为:
+ 1. cpu: 16核 Intel(R) Xeon(R) CPU E5620 @ 2.40GHz
+ 2. mem: MemTotal: 24676836kB MemFree: 6365080kB
+ 3. net: 百兆双网卡
+
+* GaussDB数据库机器参数为:
+ D12 24逻辑核 192G内存 12*480G SSD 阵列
+
+
+### 4.2 测试报告
+
+#### 4.2.1 单表测试报告
+
+| 通道数| 批量提交batchSize | DataX速度(Rec/s)| DataX流量(M/s) | DataX机器运行负载
+|--------|--------| --------|--------|--------|--------|
+|1| 128 | 9259 | 0.55 | 0.3
+|1| 512 | 10869 | 0.653 | 0.3
+|1| 2048 | 9803 | 0.589 | 0.8
+|4| 128 | 30303 | 1.82 | 1
+|4| 512 | 36363 | 2.18 | 1
+|4| 2048 | 36363 | 2.18 | 1
+|8| 128 | 57142 | 3.43 | 2
+|8| 512 | 66666 | 4.01 | 1.5
+|8| 2048 | 66666 | 4.01 | 1.1
+|16| 128 | 88888 | 5.34 | 1.8
+|16| 2048 | 94117 | 5.65 | 2.5
+|32| 512 | 76190 | 4.58 | 3
+
+#### 4.2.2 性能测试小结
+1. `channel数对性能影响很大`
+2. `通常不建议写入数据库时,通道个数 > 32`
+
+
+## FAQ
+
+***
+
+**Q: GaussDbWriter 执行 postSql 语句报错,那么数据导入到目标数据库了吗?**
+
+A: DataX 导入过程存在三块逻辑,pre 操作、导入操作、post 操作,其中任意一环报错,DataX 作业报错。由于 DataX 不能保证在同一个事务完成上述几个操作,因此有可能数据已经落入到目标端。
+
+***
+
+**Q: 按照上述说法,那么有部分脏数据导入数据库,如果影响到线上数据库怎么办?**
+
+A: 目前有两种解法,第一种配置 pre 语句,该 sql 可以清理当天导入数据, DataX 每次导入时候可以把上次清理干净并导入完整数据。
+第二种,向临时表导入数据,完成后再 rename 到线上表。
+
+***
diff --git a/gaussdbwriter/pom.xml b/gaussdbwriter/pom.xml
new file mode 100644
index 00000000..9da02eff
--- /dev/null
+++ b/gaussdbwriter/pom.xml
@@ -0,0 +1,86 @@
+
+
+
+ datax-all
+ com.alibaba.datax
+ 0.0.1-SNAPSHOT
+
+ 4.0.0
+
+ gaussdbwriter
+ gaussdbwriter
+ jar
+
+
+
+ com.alibaba.datax
+ datax-common
+ ${datax-project-version}
+
+
+ slf4j-log4j12
+ org.slf4j
+
+
+
+
+
+ org.slf4j
+ slf4j-api
+
+
+
+ ch.qos.logback
+ logback-classic
+
+
+
+ com.alibaba.datax
+ plugin-rdbms-util
+ ${datax-project-version}
+
+
+
+ org.opengauss
+ opengauss-jdbc
+ 3.0.0
+
+
+
+
+
+
+
+
+ maven-compiler-plugin
+
+ ${jdk-version}
+ ${jdk-version}
+ ${project-sourceEncoding}
+
+
+
+
+ maven-assembly-plugin
+
+
+ src/main/assembly/package.xml
+
+ datax
+
+
+
+ dwzip
+ package
+
+ single
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/gaussdbwriter/src/main/assembly/package.xml b/gaussdbwriter/src/main/assembly/package.xml
new file mode 100755
index 00000000..7167c89d
--- /dev/null
+++ b/gaussdbwriter/src/main/assembly/package.xml
@@ -0,0 +1,35 @@
+
+
+
+ dir
+
+ false
+
+
+ src/main/resources
+
+ plugin.json
+ plugin_job_template.json
+
+ plugin/writer/gaussdbwriter
+
+
+ target/
+
+ gaussdbwriter-0.0.1-SNAPSHOT.jar
+
+ plugin/writer/gaussdbwriter
+
+
+
+
+
+ false
+ plugin/writer/gaussdbwriter/libs
+ runtime
+
+
+
diff --git a/gaussdbwriter/src/main/java/com/alibaba/datax/plugin/reader/gaussdbwriter/GaussDbWriter.java b/gaussdbwriter/src/main/java/com/alibaba/datax/plugin/reader/gaussdbwriter/GaussDbWriter.java
new file mode 100644
index 00000000..3f758ee7
--- /dev/null
+++ b/gaussdbwriter/src/main/java/com/alibaba/datax/plugin/reader/gaussdbwriter/GaussDbWriter.java
@@ -0,0 +1,103 @@
+package com.alibaba.datax.plugin.reader.gaussdbwriter;
+
+import com.alibaba.datax.common.exception.DataXException;
+import com.alibaba.datax.common.plugin.RecordReceiver;
+import com.alibaba.datax.common.spi.Writer;
+import com.alibaba.datax.common.util.Configuration;
+import com.alibaba.datax.plugin.rdbms.util.DBUtilErrorCode;
+import com.alibaba.datax.plugin.rdbms.util.DataBaseType;
+import com.alibaba.datax.plugin.rdbms.writer.CommonRdbmsWriter;
+import com.alibaba.datax.plugin.rdbms.writer.Key;
+
+import java.util.List;
+
+public class GaussDbWriter extends Writer {
+
+ private static final DataBaseType DATABASE_TYPE = DataBaseType.GaussDB;
+
+ public static class Job extends Writer.Job {
+ private Configuration originalConfig = null;
+ private CommonRdbmsWriter.Job commonRdbmsWriterMaster;
+
+ @Override
+ public void init() {
+ this.originalConfig = super.getPluginJobConf();
+
+ // warn:not like mysql, GaussDB only support insert mode, don't use
+ String writeMode = this.originalConfig.getString(Key.WRITE_MODE);
+ if (null != writeMode) {
+ throw DataXException.asDataXException(DBUtilErrorCode.CONF_ERROR,
+ String.format("写入模式(writeMode)配置有误. 因为GaussDB不支持配置参数项 writeMode: %s, GaussDB仅使用insert sql 插入数据. 请检查您的配置并作出修改.", writeMode));
+ }
+
+ this.commonRdbmsWriterMaster = new CommonRdbmsWriter.Job(DATABASE_TYPE);
+ this.commonRdbmsWriterMaster.init(this.originalConfig);
+ }
+
+ @Override
+ public void prepare() {
+ this.commonRdbmsWriterMaster.prepare(this.originalConfig);
+ }
+
+ @Override
+ public List split(int mandatoryNumber) {
+ return this.commonRdbmsWriterMaster.split(this.originalConfig, mandatoryNumber);
+ }
+
+ @Override
+ public void post() {
+ this.commonRdbmsWriterMaster.post(this.originalConfig);
+ }
+
+ @Override
+ public void destroy() {
+ this.commonRdbmsWriterMaster.destroy(this.originalConfig);
+ }
+
+ }
+
+ public static class Task extends Writer.Task {
+ private Configuration writerSliceConfig;
+ private CommonRdbmsWriter.Task commonRdbmsWriterSlave;
+
+ @Override
+ public void init() {
+ this.writerSliceConfig = super.getPluginJobConf();
+ this.commonRdbmsWriterSlave = new CommonRdbmsWriter.Task(DATABASE_TYPE){
+ @Override
+ public String calcValueHolder(String columnType){
+ if("serial".equalsIgnoreCase(columnType)){
+ return "?::int";
+ }else if("bigserial".equalsIgnoreCase(columnType)){
+ return "?::int8";
+ }else if("bit".equalsIgnoreCase(columnType)){
+ return "?::bit varying";
+ }
+ return "?::" + columnType;
+ }
+ };
+ this.commonRdbmsWriterSlave.init(this.writerSliceConfig);
+ }
+
+ @Override
+ public void prepare() {
+ this.commonRdbmsWriterSlave.prepare(this.writerSliceConfig);
+ }
+
+ public void startWrite(RecordReceiver recordReceiver) {
+ this.commonRdbmsWriterSlave.startWrite(recordReceiver, this.writerSliceConfig, super.getTaskPluginCollector());
+ }
+
+ @Override
+ public void post() {
+ this.commonRdbmsWriterSlave.post(this.writerSliceConfig);
+ }
+
+ @Override
+ public void destroy() {
+ this.commonRdbmsWriterSlave.destroy(this.writerSliceConfig);
+ }
+
+ }
+
+}
diff --git a/gaussdbwriter/src/main/resources/plugin.json b/gaussdbwriter/src/main/resources/plugin.json
new file mode 100755
index 00000000..2f52a167
--- /dev/null
+++ b/gaussdbwriter/src/main/resources/plugin.json
@@ -0,0 +1,6 @@
+{
+ "name": "gaussdbwriter",
+ "class": "com.alibaba.datax.plugin.writer.gaussdbwriter.GaussDbWriter",
+ "description": "useScene: prod. mechanism: Jdbc connection using the database, execute insert sql. warn: The more you know about the database, the less problems you encounter.",
+ "developer": "alibaba"
+}
\ No newline at end of file
diff --git a/gaussdbwriter/src/main/resources/plugin_job_template.json b/gaussdbwriter/src/main/resources/plugin_job_template.json
new file mode 100644
index 00000000..539fa46f
--- /dev/null
+++ b/gaussdbwriter/src/main/resources/plugin_job_template.json
@@ -0,0 +1,16 @@
+{
+ "name": "gaussdbwriter",
+ "parameter": {
+ "username": "",
+ "password": "",
+ "column": [],
+ "connection": [
+ {
+ "jdbcUrl": "",
+ "table": []
+ }
+ ],
+ "preSql": [],
+ "postSql": []
+ }
+}
\ No newline at end of file
diff --git a/hbase11xsqlreader/doc/hbase11xsqlreader.md b/hbase11xsqlreader/doc/hbase11xsqlreader.md
index 03261a1f..9f70077f 100644
--- a/hbase11xsqlreader/doc/hbase11xsqlreader.md
+++ b/hbase11xsqlreader/doc/hbase11xsqlreader.md
@@ -60,12 +60,16 @@ hbase11xsqlreader插件实现了从Phoenix(HBase SQL)读取数据。在底层实
//填写连接Phoenix的hbase集群zk地址
"hbaseConfig": {
"hbase.zookeeper.quorum": "hb-proxy-xxx-002.hbase.rds.aliyuncs.com,hb-proxy-xxx-001.hbase.rds.aliyuncs.com,hb-proxy-xxx-003.hbase.rds.aliyuncs.com"
- },
+ },
+ //填写要读取的phoenix的命名空间
+ "schema": "TAG",
//填写要读取的phoenix的表名
"table": "US_POPULATION",
//填写要读取的列名,不填读取所有列
"column": [
- ]
+ ],
+ //查询条件
+ "where": "id="
}
},
"writer": {
@@ -92,11 +96,18 @@ hbase11xsqlreader插件实现了从Phoenix(HBase SQL)读取数据。在底层实
* 必选:是
+ * 默认值:无
+* **schema**
+
+ * 描述:编写Phoenix中的namespace,该值设置为''
+
+ * 必选:是
+
* 默认值:无
* **table**
- * 描述:编写Phoenix中的表名,如果有namespace,该值设置为'namespace.tablename'
+ * 描述:编写Phoenix中的表名,该值设置为'tablename'
* 必选:是
@@ -109,7 +120,13 @@ hbase11xsqlreader插件实现了从Phoenix(HBase SQL)读取数据。在底层实
* 必选:是
* 默认值:无
+* **where**
+
+ * 描述:填写需要从phoenix表中读取条件判断。
+ * 可选:是
+
+ * 默认值:无
### 3.3 类型转换
@@ -172,11 +189,14 @@ hbase11xsqlreader插件实现了从Phoenix(HBase SQL)读取数据。在底层实
"hbaseConfig": {
"hbase.zookeeper.quorum": "hb-proxy-xxx-002.hbase.rds.aliyuncs.com,hb-proxy-xxx-001.hbase.rds.aliyuncs.com,hb-proxy-xxx-003.hbase.rds.aliyuncs.com"
},
+ "schema": "TAG",
//填写要读取的phoenix的表名
"table": "US_POPULATION",
//填写要读取的列名,不填读取所有列
"column": [
- ]
+ ],
+ //查询条件
+ "where": "id="
}
},
"writer": {
@@ -204,7 +224,13 @@ hbase11xsqlreader插件实现了从Phoenix(HBase SQL)读取数据。在底层实
* 必选:是
* 默认值:无
-
+* **schema**
+
+ * 描述:编写Phoenix中的namespace,该值设置为''
+
+ * 必选:是
+
+ * 默认值:无
* **table**
* 描述:编写Phoenix中的表名,如果有namespace,该值设置为'namespace.tablename'
@@ -220,7 +246,13 @@ hbase11xsqlreader插件实现了从Phoenix(HBase SQL)读取数据。在底层实
* 必选:是
* 默认值:无
+ * **where**
+ * 描述:填写需要从phoenix表中读取条件判断。
+
+ * 可选:是
+
+ * 默认值:无
### 3.3 类型转换
diff --git a/hbase11xsqlreader/src/main/java/com/alibaba/datax/plugin/reader/hbase11xsqlreader/HbaseSQLHelper.java b/hbase11xsqlreader/src/main/java/com/alibaba/datax/plugin/reader/hbase11xsqlreader/HbaseSQLHelper.java
index 71665a6b..cf4304ee 100644
--- a/hbase11xsqlreader/src/main/java/com/alibaba/datax/plugin/reader/hbase11xsqlreader/HbaseSQLHelper.java
+++ b/hbase11xsqlreader/src/main/java/com/alibaba/datax/plugin/reader/hbase11xsqlreader/HbaseSQLHelper.java
@@ -26,9 +26,7 @@ import java.io.IOException;
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.SQLException;
-import java.util.ArrayList;
-import java.util.List;
-import java.util.Map;
+import java.util.*;
public class HbaseSQLHelper {
@@ -50,11 +48,15 @@ public class HbaseSQLHelper {
String zkUrl = readerConfig.getZkUrl();
PhoenixConfigurationUtil.setInputClass(conf, PhoenixRecordWritable.class);
- PhoenixConfigurationUtil.setInputTableName(conf, table);
+
+ PhoenixConfigurationUtil.setInputTableName(conf, readerConfig.getSchema()+"."+table);
if (!columns.isEmpty()) {
PhoenixConfigurationUtil.setSelectColumnNames(conf, columns.toArray(new String[columns.size()]));
}
+ if(Objects.nonNull(readerConfig.getWhere())){
+ PhoenixConfigurationUtil.setInputTableConditions(conf,readerConfig.getWhere());
+ }
PhoenixEmbeddedDriver.ConnectionInfo info = null;
try {
info = PhoenixEmbeddedDriver.ConnectionInfo.create(zkUrl);
@@ -67,15 +69,19 @@ public class HbaseSQLHelper {
conf.setInt(HConstants.ZOOKEEPER_CLIENT_PORT, info.getPort());
if (info.getRootNode() != null)
conf.set(HConstants.ZOOKEEPER_ZNODE_PARENT, info.getRootNode());
+ conf.set(Key.NAME_SPACE_MAPPING_ENABLED,"true");
+ conf.set(Key.SYSTEM_TABLES_TO_NAMESPACE,"true");
return conf;
}
- public static List getPColumnNames(String connectionString, String tableName) throws SQLException {
- Connection con =
- DriverManager.getConnection(connectionString);
+ public static List getPColumnNames(String connectionString, String tableName,String schema) throws SQLException {
+ Properties pro = new Properties();
+ pro.put(Key.NAME_SPACE_MAPPING_ENABLED, true);
+ pro.put(Key.SYSTEM_TABLES_TO_NAMESPACE, true);
+ Connection con = DriverManager.getConnection(connectionString,pro);
PhoenixConnection phoenixConnection = con.unwrap(PhoenixConnection.class);
MetaDataClient metaDataClient = new MetaDataClient(phoenixConnection);
- PTable table = metaDataClient.updateCache("", tableName).getTable();
+ PTable table = metaDataClient.updateCache(schema, tableName).getTable();
List columnNames = new ArrayList();
for (PColumn pColumn : table.getColumns()) {
if (!pColumn.getName().getString().equals(SaltingUtil.SALTING_COLUMN_NAME))
diff --git a/hbase11xsqlreader/src/main/java/com/alibaba/datax/plugin/reader/hbase11xsqlreader/HbaseSQLReaderConfig.java b/hbase11xsqlreader/src/main/java/com/alibaba/datax/plugin/reader/hbase11xsqlreader/HbaseSQLReaderConfig.java
index ab06f6e1..37060986 100644
--- a/hbase11xsqlreader/src/main/java/com/alibaba/datax/plugin/reader/hbase11xsqlreader/HbaseSQLReaderConfig.java
+++ b/hbase11xsqlreader/src/main/java/com/alibaba/datax/plugin/reader/hbase11xsqlreader/HbaseSQLReaderConfig.java
@@ -9,6 +9,7 @@ import org.slf4j.LoggerFactory;
import java.sql.SQLException;
import java.util.List;
+import java.util.StringJoiner;
public class HbaseSQLReaderConfig {
private final static Logger LOG = LoggerFactory.getLogger(HbaseSQLReaderConfig.class);
@@ -27,6 +28,9 @@ public class HbaseSQLReaderConfig {
private String tableName;
private List columns; // 目的表的所有列的列名,包括主键和非主键,不包括时间列
+ private String where;//条件
+
+ private String schema;//
/**
* @return 获取原始的datax配置
*/
@@ -96,22 +100,27 @@ public class HbaseSQLReaderConfig {
}
String zkQuorum = zkCfg.getFirst();
String znode = zkCfg.getSecond();
+
if (zkQuorum == null || zkQuorum.isEmpty()) {
throw DataXException.asDataXException(
HbaseSQLReaderErrorCode.ILLEGAL_VALUE, "HBase的hbase.zookeeper.quorum配置不能为空" );
}
// 生成sql使用的连接字符串, 格式: jdbc:hbase:zk_quorum:2181:/znode_parent
- cfg.connectionString = "jdbc:phoenix:" + zkQuorum;
- cfg.zkUrl = zkQuorum + ":2181";
+ StringBuilder connectionString=new StringBuilder("jdbc:phoenix:");
+ connectionString.append(zkQuorum);
+ cfg.connectionString = connectionString.toString();
+ StringBuilder zkUrl =new StringBuilder(zkQuorum);
+ cfg.zkUrl = zkUrl.append(":2181").toString();
if (!znode.isEmpty()) {
- cfg.connectionString += cfg.connectionString + ":" + znode;
- cfg.zkUrl += cfg.zkUrl + ":" + znode;
+ cfg.connectionString = connectionString.append(":").append(znode).toString();
+ cfg.zkUrl=zkUrl.append(":").append(znode).toString();
}
}
private static void parseTableConfig(HbaseSQLReaderConfig cfg, Configuration dataxCfg) {
// 解析并检查表名
cfg.tableName = dataxCfg.getString(Key.TABLE);
+ cfg.schema = dataxCfg.getString(Key.SCHEMA);
if (cfg.tableName == null || cfg.tableName.isEmpty()) {
throw DataXException.asDataXException(
HbaseSQLReaderErrorCode.ILLEGAL_VALUE, "HBase的tableName配置不能为空,请检查并修改配置." );
@@ -124,13 +133,14 @@ public class HbaseSQLReaderConfig {
HbaseSQLReaderErrorCode.ILLEGAL_VALUE, "您配置的tableName含有非法字符{0},请检查您的配置.");
} else if (cfg.columns.isEmpty()) {
try {
- cfg.columns = HbaseSQLHelper.getPColumnNames(cfg.connectionString, cfg.tableName);
+ cfg.columns = HbaseSQLHelper.getPColumnNames(cfg.connectionString, cfg.tableName,cfg.schema);
dataxCfg.set(Key.COLUMN, cfg.columns);
} catch (SQLException e) {
throw DataXException.asDataXException(
HbaseSQLReaderErrorCode.GET_PHOENIX_COLUMN_ERROR, "HBase的columns配置不能为空,请添加目标表的列名配置." + e.getMessage(), e);
}
}
+ cfg.where=dataxCfg.getString(Key.WHERE);
}
@Override
@@ -151,6 +161,8 @@ public class HbaseSQLReaderConfig {
ret.append(",");
}
ret.setLength(ret.length() - 1);
+ ret.append("[where=]").append(getWhere());
+ ret.append("[schema=]").append(getSchema());
ret.append("\n");
return ret.toString();
@@ -161,4 +173,20 @@ public class HbaseSQLReaderConfig {
*/
private HbaseSQLReaderConfig() {
}
+
+ public String getWhere() {
+ return where;
+ }
+
+ public void setWhere(String where) {
+ this.where = where;
+ }
+
+ public String getSchema() {
+ return schema;
+ }
+
+ public void setSchema(String schema) {
+ this.schema = schema;
+ }
}
diff --git a/hbase11xsqlreader/src/main/java/com/alibaba/datax/plugin/reader/hbase11xsqlreader/HbaseSQLReaderTask.java b/hbase11xsqlreader/src/main/java/com/alibaba/datax/plugin/reader/hbase11xsqlreader/HbaseSQLReaderTask.java
index 1ca22c6f..461649d1 100644
--- a/hbase11xsqlreader/src/main/java/com/alibaba/datax/plugin/reader/hbase11xsqlreader/HbaseSQLReaderTask.java
+++ b/hbase11xsqlreader/src/main/java/com/alibaba/datax/plugin/reader/hbase11xsqlreader/HbaseSQLReaderTask.java
@@ -19,10 +19,8 @@ import org.slf4j.LoggerFactory;
import java.io.IOException;
import java.math.BigDecimal;
import java.sql.*;
-import java.util.HashMap;
-import java.util.LinkedHashMap;
-import java.util.List;
-import java.util.Map;
+import java.sql.Date;
+import java.util.*;
/**
* Created by admin on 1/3/18.
@@ -42,11 +40,14 @@ public class HbaseSQLReaderTask {
}
private void getPColumns() throws SQLException {
+ Properties pro = new Properties();
+ pro.put(Key.NAME_SPACE_MAPPING_ENABLED, true);
+ pro.put(Key.SYSTEM_TABLES_TO_NAMESPACE, true);
Connection con =
- DriverManager.getConnection(this.readerConfig.getConnectionString());
+ DriverManager.getConnection(this.readerConfig.getConnectionString(),pro);
PhoenixConnection phoenixConnection = con.unwrap(PhoenixConnection.class);
MetaDataClient metaDataClient = new MetaDataClient(phoenixConnection);
- PTable table = metaDataClient.updateCache("", this.readerConfig.getTableName()).getTable();
+ PTable table = metaDataClient.updateCache(this.readerConfig.getSchema(), this.readerConfig.getTableName()).getTable();
List columnNames = this.readerConfig.getColumns();
for (PColumn pColumn : table.getColumns()) {
if (columnNames.contains(pColumn.getName().getString())) {
diff --git a/hbase11xsqlreader/src/main/java/com/alibaba/datax/plugin/reader/hbase11xsqlreader/Key.java b/hbase11xsqlreader/src/main/java/com/alibaba/datax/plugin/reader/hbase11xsqlreader/Key.java
index 7987d6c8..f8453add 100644
--- a/hbase11xsqlreader/src/main/java/com/alibaba/datax/plugin/reader/hbase11xsqlreader/Key.java
+++ b/hbase11xsqlreader/src/main/java/com/alibaba/datax/plugin/reader/hbase11xsqlreader/Key.java
@@ -24,5 +24,18 @@ public final class Key {
* 【必选】列配置
*/
public final static String COLUMN = "column";
+ /**
+ *
+ */
+ public static final String WHERE = "where";
+
+ /**
+ * 【可选】Phoenix表所属schema,默认为空
+ */
+ public static final String SCHEMA = "schema";
+
+ public static final String NAME_SPACE_MAPPING_ENABLED = "phoenix.schema.isNamespaceMappingEnabled";
+
+ public static final String SYSTEM_TABLES_TO_NAMESPACE = "phoenix.schema.mapSystemTablesToNamespace";
}
diff --git a/hdfsreader/pom.xml b/hdfsreader/pom.xml
index a5c2da2c..de7c0e21 100644
--- a/hdfsreader/pom.xml
+++ b/hdfsreader/pom.xml
@@ -1,5 +1,6 @@
-
+
datax-all
com.alibaba.datax
@@ -111,6 +112,42 @@
${datax-project-version}
+
+ org.apache.parquet
+ parquet-column
+ 1.12.0
+
+
+ org.apache.parquet
+ parquet-avro
+ 1.12.0
+
+
+ org.apache.parquet
+ parquet-common
+ 1.12.0
+
+
+ org.apache.parquet
+ parquet-format
+ 2.3.0
+
+
+ org.apache.parquet
+ parquet-jackson
+ 1.12.0
+
+
+ org.apache.parquet
+ parquet-encoding
+ 1.12.0
+
+
+ org.apache.parquet
+ parquet-hadoop
+ 1.12.0
+
+
diff --git a/hdfsreader/src/main/assembly/package.xml b/hdfsreader/src/main/assembly/package.xml
index 3f1393b7..a5f28e5c 100644
--- a/hdfsreader/src/main/assembly/package.xml
+++ b/hdfsreader/src/main/assembly/package.xml
@@ -37,6 +37,28 @@
+
+
+
+
+
+
+
+
+
+ src/main/libs
+
+ *.*
+
+ plugin/reader/ossreader/libs
+
+
+ src/main/libs
+
+ *.*
+
+ plugin/reader/hivereader/libs
+
diff --git a/hdfsreader/src/main/java/com/alibaba/datax/plugin/reader/hdfsreader/Constant.java b/hdfsreader/src/main/java/com/alibaba/datax/plugin/reader/hdfsreader/Constant.java
index 6bfb9bf7..061c55a0 100644
--- a/hdfsreader/src/main/java/com/alibaba/datax/plugin/reader/hdfsreader/Constant.java
+++ b/hdfsreader/src/main/java/com/alibaba/datax/plugin/reader/hdfsreader/Constant.java
@@ -10,4 +10,5 @@ public class Constant {
public static final String CSV = "CSV";
public static final String SEQ = "SEQ";
public static final String RC = "RC";
+ public static final String PARQUET = "PARQUET";
}
diff --git a/hdfsreader/src/main/java/com/alibaba/datax/plugin/reader/hdfsreader/DFSUtil.java b/hdfsreader/src/main/java/com/alibaba/datax/plugin/reader/hdfsreader/DFSUtil.java
index 5ba572e1..720f8bf6 100644
--- a/hdfsreader/src/main/java/com/alibaba/datax/plugin/reader/hdfsreader/DFSUtil.java
+++ b/hdfsreader/src/main/java/com/alibaba/datax/plugin/reader/hdfsreader/DFSUtil.java
@@ -9,12 +9,16 @@ import com.alibaba.datax.plugin.unstructuredstorage.reader.ColumnEntry;
import com.alibaba.datax.plugin.unstructuredstorage.reader.UnstructuredStorageReaderErrorCode;
import com.alibaba.datax.plugin.unstructuredstorage.reader.UnstructuredStorageReaderUtil;
import com.alibaba.fastjson2.JSON;
+import com.alibaba.fastjson2.JSONArray;
import com.alibaba.fastjson2.JSONObject;
+import org.apache.commons.lang3.BooleanUtils;
import org.apache.commons.lang3.StringUtils;
+import org.apache.commons.lang3.exception.ExceptionUtils;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hive.common.type.HiveDecimal;
import org.apache.hadoop.hive.ql.io.RCFile;
import org.apache.hadoop.hive.ql.io.RCFileRecordReader;
import org.apache.hadoop.hive.ql.io.orc.OrcFile;
@@ -29,14 +33,30 @@ import org.apache.hadoop.io.*;
import org.apache.hadoop.mapred.*;
import org.apache.hadoop.security.UserGroupInformation;
import org.apache.hadoop.util.ReflectionUtils;
+import org.apache.parquet.example.data.Group;
+import org.apache.parquet.hadoop.ParquetReader;
+import org.apache.parquet.hadoop.example.GroupReadSupport;
+import org.apache.parquet.hadoop.util.HadoopInputFile;
+import org.apache.parquet.io.api.Binary;
+import org.apache.parquet.schema.MessageType;
+import org.apache.parquet.schema.MessageTypeParser;
+import org.apache.parquet.schema.PrimitiveType;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.IOException;
import java.io.InputStream;
import java.nio.ByteBuffer;
+import java.nio.ByteOrder;
+import java.sql.Timestamp;
import java.text.SimpleDateFormat;
+import java.time.LocalDate;
+import java.time.LocalDateTime;
+import java.time.LocalTime;
import java.util.*;
+import java.util.concurrent.ConcurrentHashMap;
+import java.util.concurrent.TimeUnit;
+import java.util.stream.Collectors;
/**
* Created by mingya.wmy on 2015/8/12.
@@ -56,6 +76,10 @@ public class DFSUtil {
public static final String HDFS_DEFAULTFS_KEY = "fs.defaultFS";
public static final String HADOOP_SECURITY_AUTHENTICATION_KEY = "hadoop.security.authentication";
+ private Boolean skipEmptyOrcFile = false;
+
+ private Integer orcFileEmptySize = null;
+
public DFSUtil(Configuration taskConfig) {
hadoopConf = new org.apache.hadoop.conf.Configuration();
@@ -79,6 +103,7 @@ public class DFSUtil {
this.hadoopConf.set(HADOOP_SECURITY_AUTHENTICATION_KEY, "kerberos");
}
this.kerberosAuthentication(this.kerberosPrincipal, this.kerberosKeytabFilePath);
+ this.skipEmptyOrcFile = taskConfig.getBool(Key.SKIP_EMPTY_ORCFILE, false);
LOG.info(String.format("hadoopConfig details:%s", JSON.toJSONString(this.hadoopConf)));
}
@@ -102,10 +127,11 @@ public class DFSUtil {
* @param srcPaths 路径列表
* @param specifiedFileType 指定文件类型
*/
- public HashSet getAllFiles(List srcPaths, String specifiedFileType) {
+ public HashSet getAllFiles(List srcPaths, String specifiedFileType, Boolean skipEmptyOrcFile, Integer orcFileEmptySize) {
this.specifiedFileType = specifiedFileType;
-
+ this.skipEmptyOrcFile = skipEmptyOrcFile;
+ this.orcFileEmptySize = orcFileEmptySize;
if (!srcPaths.isEmpty()) {
for (String eachPath : srcPaths) {
LOG.info(String.format("get HDFS all files in path = [%s]", eachPath));
@@ -127,9 +153,13 @@ public class DFSUtil {
FileStatus stats[] = hdfs.globStatus(path);
for (FileStatus f : stats) {
if (f.isFile()) {
- if (f.getLen() == 0) {
+ long fileLength = f.getLen();
+ if (fileLength == 0) {
String message = String.format("文件[%s]长度为0,将会跳过不作处理!", hdfsPath);
LOG.warn(message);
+ } else if (BooleanUtils.isTrue(this.skipEmptyOrcFile) && this.orcFileEmptySize != null && fileLength <= this.orcFileEmptySize) {
+ String message = String.format("The orc file [%s] is empty, file size: %s, DataX will skip it !", f.getPath().toString(), fileLength);
+ LOG.warn(message);
} else {
addSourceFileByType(f.getPath().toString());
}
@@ -167,7 +197,16 @@ public class DFSUtil {
LOG.info(String.format("[%s] 是目录, 递归获取该目录下的文件", f.getPath().toString()));
getHDFSAllFilesNORegex(f.getPath().toString(), hdfs);
} else if (f.isFile()) {
-
+ long fileLength = f.getLen();
+ if (fileLength == 0) {
+ String message = String.format("The file [%s] is empty, DataX will skip it !", f.getPath().toString());
+ LOG.warn(message);
+ continue;
+ } else if (BooleanUtils.isTrue(this.skipEmptyOrcFile) && this.orcFileEmptySize != null && fileLength <= this.orcFileEmptySize) {
+ String message = String.format("The orc file [%s] is empty, file size: %s, DataX will skip it !", f.getPath().toString(), fileLength);
+ LOG.warn(message);
+ continue;
+ }
addSourceFileByType(f.getPath().toString());
} else {
String message = String.format("该路径[%s]文件类型既不是目录也不是文件,插件自动忽略。",
@@ -332,7 +371,19 @@ public class DFSUtil {
//Each file as a split
//TODO multy threads
// OrcInputFormat getSplits params numSplits not used, splits size = block numbers
- InputSplit[] splits = in.getSplits(conf, -1);
+ InputSplit[] splits;
+ try {
+ splits = in.getSplits(conf, 1);
+ } catch (Exception splitException) {
+ if (Boolean.TRUE.equals(this.skipEmptyOrcFile)) {
+ boolean isOrcFileEmptyException = checkIsOrcEmptyFileExecption(splitException);
+ if (isOrcFileEmptyException) {
+ LOG.info("skipEmptyOrcFile: true, \"{}\" is an empty orc file, skip it!", sourceOrcFilePath);
+ return;
+ }
+ }
+ throw splitException;
+ }
for (InputSplit split : splits) {
{
RecordReader reader = in.getRecordReader(split, conf, Reporter.NULL);
@@ -349,8 +400,11 @@ public class DFSUtil {
Object field = inspector.getStructFieldData(value, fields.get(i));
recordFields.add(field);
}
+ List hivePartitionColumnEntrys = UnstructuredStorageReaderUtil.getListColumnEntry(readerSliceConfig, com.alibaba.datax.plugin.unstructuredstorage.reader.Key.HIVE_PARTION_COLUMN);
+ ArrayList hivePartitionColumns = new ArrayList<>();
+ hivePartitionColumns = UnstructuredStorageReaderUtil.getHivePartitionColumns(sourceOrcFilePath, hivePartitionColumnEntrys);
transportOneRecord(column, recordFields, recordSender,
- taskPluginCollector, isReadAllColumns, nullFormat);
+ taskPluginCollector, isReadAllColumns, nullFormat,hivePartitionColumns);
}
reader.close();
}
@@ -367,8 +421,20 @@ public class DFSUtil {
}
}
+ private boolean checkIsOrcEmptyFileExecption(Exception e) {
+ if (e == null) {
+ return false;
+ }
+
+ String fullStackTrace = ExceptionUtils.getStackTrace(e);
+ if (fullStackTrace.contains("org.apache.orc.impl.ReaderImpl.getRawDataSizeOfColumn") && fullStackTrace.contains("Caused by: java.lang.IndexOutOfBoundsException: Index: 1, Size: 1")) {
+ return true;
+ }
+ return false;
+ }
+
private Record transportOneRecord(List columnConfigs, List
+
+ com.oceanbase
+ shade-ob-partition-calculator
+ 1.0-SNAPSHOT
+ system
+ ${pom.basedir}/src/main/libs/shade-ob-partition-calculator-1.0-SNAPSHOT.jar
+
-
+
+
log4j
log4j
1.2.16
diff --git a/oceanbasev10writer/src/main/java/com/alibaba/datax/plugin/writer/oceanbasev10writer/Config.java b/oceanbasev10writer/src/main/java/com/alibaba/datax/plugin/writer/oceanbasev10writer/Config.java
index 9fa3cd9a..6776196b 100644
--- a/oceanbasev10writer/src/main/java/com/alibaba/datax/plugin/writer/oceanbasev10writer/Config.java
+++ b/oceanbasev10writer/src/main/java/com/alibaba/datax/plugin/writer/oceanbasev10writer/Config.java
@@ -6,6 +6,7 @@ public interface Config {
double DEFAULT_MEMSTORE_THRESHOLD = 0.9d;
+ double DEFAULT_SLOW_MEMSTORE_THRESHOLD = 0.75d;
String MEMSTORE_CHECK_INTERVAL_SECOND = "memstoreCheckIntervalSecond";
long DEFAULT_MEMSTORE_CHECK_INTERVAL_SECOND = 30;
diff --git a/oceanbasev10writer/src/main/java/com/alibaba/datax/plugin/writer/oceanbasev10writer/OceanBaseV10Writer.java b/oceanbasev10writer/src/main/java/com/alibaba/datax/plugin/writer/oceanbasev10writer/OceanBaseV10Writer.java
index 3bcc1019..06292db5 100644
--- a/oceanbasev10writer/src/main/java/com/alibaba/datax/plugin/writer/oceanbasev10writer/OceanBaseV10Writer.java
+++ b/oceanbasev10writer/src/main/java/com/alibaba/datax/plugin/writer/oceanbasev10writer/OceanBaseV10Writer.java
@@ -86,6 +86,7 @@ public class OceanBaseV10Writer extends Writer {
if (tableNumber == 1) {
this.commonJob.prepare(this.originalConfig);
final String version = fetchServerVersion(originalConfig);
+ ObWriterUtils.setObVersion(version);
originalConfig.set(Config.OB_VERSION, version);
}
@@ -187,8 +188,9 @@ public class OceanBaseV10Writer extends Writer {
}
private String fetchServerVersion(Configuration config) {
- final String fetchVersionSql = "show variables like 'version'";
- return DbUtils.fetchSingleValueWithRetry(config, fetchVersionSql);
+ final String fetchVersionSql = "show variables like 'version_comment'";
+ String versionComment = DbUtils.fetchSingleValueWithRetry(config, fetchVersionSql);
+ return versionComment.split(" ")[1];
}
private void checkCompatibleMode(Configuration configure) {
diff --git a/oceanbasev10writer/src/main/java/com/alibaba/datax/plugin/writer/oceanbasev10writer/ext/AbstractConnHolder.java b/oceanbasev10writer/src/main/java/com/alibaba/datax/plugin/writer/oceanbasev10writer/ext/AbstractConnHolder.java
new file mode 100644
index 00000000..c8630cd0
--- /dev/null
+++ b/oceanbasev10writer/src/main/java/com/alibaba/datax/plugin/writer/oceanbasev10writer/ext/AbstractConnHolder.java
@@ -0,0 +1,48 @@
+package com.alibaba.datax.plugin.writer.oceanbasev10writer.ext;
+
+import com.alibaba.datax.common.util.Configuration;
+import com.alibaba.datax.plugin.rdbms.util.DBUtil;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.sql.Connection;
+
+public abstract class AbstractConnHolder {
+ private static final Logger LOG = LoggerFactory.getLogger(AbstractConnHolder.class);
+
+ protected final Configuration config;
+ protected Connection conn;
+
+ public AbstractConnHolder(Configuration config) {
+ this.config = config;
+ }
+
+ public abstract Connection initConnection();
+
+ public Configuration getConfig() {
+ return config;
+ }
+
+ public Connection getConn() {
+ try {
+ if (conn != null && !conn.isClosed()) {
+ return conn;
+ }
+ } catch (Exception e) {
+ LOG.warn("judge connection is closed or not failed. try to reconnect.", e);
+ }
+ return reconnect();
+ }
+
+ public Connection reconnect() {
+ DBUtil.closeDBResources(null, conn);
+ return initConnection();
+ }
+
+ public abstract String getJdbcUrl();
+
+ public abstract String getUserName();
+
+ public abstract void destroy();
+}
diff --git a/oceanbasev10writer/src/main/java/com/alibaba/datax/plugin/writer/oceanbasev10writer/ext/DataBaseWriterBuffer.java b/oceanbasev10writer/src/main/java/com/alibaba/datax/plugin/writer/oceanbasev10writer/ext/DataBaseWriterBuffer.java
index 53172495..b8ae259a 100644
--- a/oceanbasev10writer/src/main/java/com/alibaba/datax/plugin/writer/oceanbasev10writer/ext/DataBaseWriterBuffer.java
+++ b/oceanbasev10writer/src/main/java/com/alibaba/datax/plugin/writer/oceanbasev10writer/ext/DataBaseWriterBuffer.java
@@ -23,7 +23,7 @@ import org.slf4j.LoggerFactory;
public class DataBaseWriterBuffer {
private static final Logger LOG = LoggerFactory.getLogger(DataBaseWriterBuffer.class);
- private final ConnHolder connHolder;
+ private final AbstractConnHolder connHolder;
private final String dbName;
private Map> tableBuffer = new HashMap>();
private long lastCheckMemstoreTime;
@@ -33,7 +33,7 @@ public class DataBaseWriterBuffer {
this.dbName=dbName;
}
- public ConnHolder getConnHolder(){
+ public AbstractConnHolder getConnHolder(){
return connHolder;
}
diff --git a/oceanbasev10writer/src/main/java/com/alibaba/datax/plugin/writer/oceanbasev10writer/ext/OCJConnHolder.java b/oceanbasev10writer/src/main/java/com/alibaba/datax/plugin/writer/oceanbasev10writer/ext/OCJConnHolder.java
index 10de5615..262fb1cb 100644
--- a/oceanbasev10writer/src/main/java/com/alibaba/datax/plugin/writer/oceanbasev10writer/ext/OCJConnHolder.java
+++ b/oceanbasev10writer/src/main/java/com/alibaba/datax/plugin/writer/oceanbasev10writer/ext/OCJConnHolder.java
@@ -3,15 +3,13 @@ package com.alibaba.datax.plugin.writer.oceanbasev10writer.ext;
import java.sql.Connection;
import com.alibaba.datax.common.util.Configuration;
-import com.alibaba.datax.plugin.rdbms.util.DBUtil;
-import com.alibaba.datax.plugin.rdbms.util.DataBaseType;
/**
* wrap oceanbase java client
* @author oceanbase
*/
-public class OCJConnHolder extends ConnHolder {
+public class OCJConnHolder extends AbstractConnHolder {
private ServerConnectInfo connectInfo;
private String dataSourceKey;
@@ -28,17 +26,6 @@ public class OCJConnHolder extends ConnHolder {
return conn;
}
- @Override
- public Connection reconnect() {
- DBUtil.closeDBResources(null, conn);
- return initConnection();
- }
-
- @Override
- public Connection getConn() {
- return conn;
- }
-
@Override
public String getJdbcUrl() {
return connectInfo.jdbcUrl;
diff --git a/oceanbasev10writer/src/main/java/com/alibaba/datax/plugin/writer/oceanbasev10writer/ext/ObClientConnHolder.java b/oceanbasev10writer/src/main/java/com/alibaba/datax/plugin/writer/oceanbasev10writer/ext/ObClientConnHolder.java
index 8ff53039..ac75d359 100644
--- a/oceanbasev10writer/src/main/java/com/alibaba/datax/plugin/writer/oceanbasev10writer/ext/ObClientConnHolder.java
+++ b/oceanbasev10writer/src/main/java/com/alibaba/datax/plugin/writer/oceanbasev10writer/ext/ObClientConnHolder.java
@@ -16,7 +16,7 @@ import com.alibaba.datax.plugin.writer.oceanbasev10writer.util.ObWriterUtils;
* @author oceanbase
*
*/
-public class ObClientConnHolder extends ConnHolder {
+public class ObClientConnHolder extends AbstractConnHolder {
private final String jdbcUrl;
private final String userName;
private final String password;
diff --git a/oceanbasev10writer/src/main/java/com/alibaba/datax/plugin/writer/oceanbasev10writer/ext/ServerConnectInfo.java b/oceanbasev10writer/src/main/java/com/alibaba/datax/plugin/writer/oceanbasev10writer/ext/ServerConnectInfo.java
index b0611642..fe8889e1 100644
--- a/oceanbasev10writer/src/main/java/com/alibaba/datax/plugin/writer/oceanbasev10writer/ext/ServerConnectInfo.java
+++ b/oceanbasev10writer/src/main/java/com/alibaba/datax/plugin/writer/oceanbasev10writer/ext/ServerConnectInfo.java
@@ -1,5 +1,7 @@
package com.alibaba.datax.plugin.writer.oceanbasev10writer.ext;
+import static org.apache.commons.lang3.StringUtils.EMPTY;
+
import java.util.regex.Matcher;
import java.util.regex.Pattern;
@@ -12,40 +14,19 @@ public class ServerConnectInfo {
public String databaseName;
public String ipPort;
public String jdbcUrl;
+ public boolean publicCloud;
+ /**
+ *
+ * @param jdbcUrl format is jdbc:oceanbase//ip:port
+ * @param username format is cluster:tenant:username or username@tenant#cluster or user@tenant or user
+ * @param password
+ */
public ServerConnectInfo(final String jdbcUrl, final String username, final String password) {
- if (jdbcUrl.startsWith(com.alibaba.datax.plugin.rdbms.writer.Constant.OB10_SPLIT_STRING)) {
- String[] ss = jdbcUrl.split(com.alibaba.datax.plugin.rdbms.writer.Constant.OB10_SPLIT_STRING_PATTERN);
- if (ss.length != 3) {
- throw new RuntimeException("jdbc url format is not correct: " + jdbcUrl);
- }
- this.userName = username;
- this.clusterName = ss[1].trim().split(":")[0];
- this.tenantName = ss[1].trim().split(":")[1];
- this.jdbcUrl = ss[2].replace("jdbc:mysql:", "jdbc:oceanbase:");
- } else {
- this.jdbcUrl = jdbcUrl.replace("jdbc:mysql:", "jdbc:oceanbase:");
- if (username.contains("@") && username.contains("#")) {
- this.userName = username.substring(0, username.indexOf("@"));
- this.tenantName = username.substring(username.indexOf("@") + 1, username.indexOf("#"));
- this.clusterName = username.substring(username.indexOf("#") + 1);
- } else if (username.contains(":")) {
- String[] config = username.split(":");
- if (config.length != 3) {
- throw new RuntimeException ("username format is not correct: " + username);
- }
- this.clusterName = config[0];
- this.tenantName = config[1];
- this.userName = config[2];
- } else {
- this.clusterName = null;
- this.tenantName = null;
- this.userName = username;
- }
- }
-
+ this.jdbcUrl = jdbcUrl;
this.password = password;
parseJdbcUrl(jdbcUrl);
+ parseFullUserName(username);
}
private void parseJdbcUrl(final String jdbcUrl) {
@@ -56,11 +37,42 @@ public class ServerConnectInfo {
String dbName = matcher.group(2);
this.ipPort = ipPort;
this.databaseName = dbName;
+ this.publicCloud = ipPort.split(":")[0].endsWith("aliyuncs.com");
} else {
throw new RuntimeException("Invalid argument:" + jdbcUrl);
}
}
+ private void parseFullUserName(final String fullUserName) {
+ int tenantIndex = fullUserName.indexOf("@");
+ int clusterIndex = fullUserName.indexOf("#");
+ if (fullUserName.contains(":") && tenantIndex < 0) {
+ String[] names = fullUserName.split(":");
+ if (names.length != 3) {
+ throw new RuntimeException("invalid argument: " + fullUserName);
+ } else {
+ this.clusterName = names[0];
+ this.tenantName = names[1];
+ this.userName = names[2];
+ }
+ } else if (!publicCloud || tenantIndex < 0) {
+ this.userName = tenantIndex < 0 ? fullUserName : fullUserName.substring(0, tenantIndex);
+ this.clusterName = clusterIndex < 0 ? EMPTY : fullUserName.substring(clusterIndex + 1);
+ this.tenantName = tenantIndex < 0 ? EMPTY : fullUserName.substring(tenantIndex + 1, clusterIndex);
+ } else {
+ // If in public cloud, the username with format user@tenant#cluster should be parsed, otherwise, connection can't be created.
+ this.userName = fullUserName.substring(0, tenantIndex);
+ if (clusterIndex > tenantIndex) {
+ this.tenantName = fullUserName.substring(tenantIndex + 1, clusterIndex);
+ this.clusterName = fullUserName.substring(clusterIndex + 1);
+ } else {
+ this.tenantName = fullUserName.substring(tenantIndex + 1);
+ this.clusterName = EMPTY;
+ }
+ }
+ }
+
+ @Override
public String toString() {
StringBuffer strBuffer = new StringBuffer();
return strBuffer.append("clusterName:").append(clusterName).append(", tenantName:").append(tenantName)
@@ -69,11 +81,18 @@ public class ServerConnectInfo {
}
public String getFullUserName() {
- StringBuilder builder = new StringBuilder(userName);
- if (tenantName != null && clusterName != null) {
- builder.append("@").append(tenantName).append("#").append(clusterName);
+ StringBuilder builder = new StringBuilder();
+ builder.append(userName);
+ if (!EMPTY.equals(tenantName)) {
+ builder.append("@").append(tenantName);
}
+ if (!EMPTY.equals(clusterName)) {
+ builder.append("#").append(clusterName);
+ }
+ if (EMPTY.equals(this.clusterName) && EMPTY.equals(this.tenantName)) {
+ return this.userName;
+ }
return builder.toString();
}
}
diff --git a/oceanbasev10writer/src/main/java/com/alibaba/datax/plugin/writer/oceanbasev10writer/part/IObPartCalculator.java b/oceanbasev10writer/src/main/java/com/alibaba/datax/plugin/writer/oceanbasev10writer/part/IObPartCalculator.java
new file mode 100644
index 00000000..b49ade02
--- /dev/null
+++ b/oceanbasev10writer/src/main/java/com/alibaba/datax/plugin/writer/oceanbasev10writer/part/IObPartCalculator.java
@@ -0,0 +1,19 @@
+package com.alibaba.datax.plugin.writer.oceanbasev10writer.part;
+
+import com.alibaba.datax.common.element.Record;
+
+/**
+ * @author cjyyz
+ * @date 2023/02/07
+ * @since
+ */
+public interface IObPartCalculator {
+
+ /**
+ * 计算 Partition Id
+ *
+ * @param record
+ * @return Long
+ */
+ Long calculate(Record record);
+}
\ No newline at end of file
diff --git a/oceanbasev10writer/src/main/java/com/alibaba/datax/plugin/writer/oceanbasev10writer/part/ObPartitionCalculatorV1.java b/oceanbasev10writer/src/main/java/com/alibaba/datax/plugin/writer/oceanbasev10writer/part/ObPartitionCalculatorV1.java
new file mode 100644
index 00000000..96985588
--- /dev/null
+++ b/oceanbasev10writer/src/main/java/com/alibaba/datax/plugin/writer/oceanbasev10writer/part/ObPartitionCalculatorV1.java
@@ -0,0 +1,109 @@
+package com.alibaba.datax.plugin.writer.oceanbasev10writer.part;
+
+import com.alibaba.datax.common.element.Record;
+import com.alibaba.datax.plugin.writer.oceanbasev10writer.ext.ServerConnectInfo;
+import com.alipay.oceanbase.obproxy.data.TableEntryKey;
+import com.alipay.oceanbase.obproxy.util.ObPartitionIdCalculator;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Objects;
+import java.util.concurrent.TimeUnit;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * OceanBase 1.x和2.x的分区计算
+ *
+ * @author cjyyz
+ * @date 2023/02/07
+ * @since
+ */
+public class ObPartitionCalculatorV1 implements IObPartCalculator {
+
+ private static final Logger LOG = LoggerFactory.getLogger(ObPartitionCalculatorV1.class);
+
+ /**
+ * 分区键的位置
+ */
+ private List partIndexes;
+
+ /**
+ * 表的全部字段名
+ */
+ private List columnNames;
+
+ /**
+ * ocj partition calculator
+ */
+ private ObPartitionIdCalculator calculator;
+
+ /**
+ * @param connectInfo
+ * @param table
+ * @param columns
+ */
+ public ObPartitionCalculatorV1(ServerConnectInfo connectInfo, String table, List columns) {
+
+ initCalculator(connectInfo, table);
+
+ if (Objects.isNull(calculator)) {
+ LOG.warn("partCalculator is null");
+ return;
+ }
+
+ this.partIndexes = new ArrayList<>(columns.size());
+ this.columnNames = new ArrayList<>(columns);
+
+ for (int i = 0; i < columns.size(); ++i) {
+ String columnName = columns.get(i);
+ if (calculator.isPartitionKeyColumn(columnName)) {
+ LOG.info(columnName + " is partition key.");
+ partIndexes.add(i);
+ }
+ }
+ }
+
+ /**
+ * @param record
+ * @return Long
+ */
+ @Override
+ public Long calculate(Record record) {
+ if (Objects.isNull(calculator)) {
+ return null;
+ }
+
+ for (Integer i : partIndexes) {
+ calculator.addColumn(columnNames.get(i), record.getColumn(i).asString());
+ }
+ return calculator.calculate();
+ }
+
+ /**
+ * @param connectInfo
+ * @param table
+ */
+ private void initCalculator(ServerConnectInfo connectInfo, String table) {
+
+ LOG.info(String.format("create tableEntryKey with clusterName %s, tenantName %s, databaseName %s, tableName %s",
+ connectInfo.clusterName, connectInfo.tenantName, connectInfo.databaseName, table));
+ TableEntryKey tableEntryKey = new TableEntryKey(connectInfo.clusterName, connectInfo.tenantName,
+ connectInfo.databaseName, table);
+
+ int retry = 0;
+
+ do {
+ try {
+ if (retry > 0) {
+ TimeUnit.SECONDS.sleep(1);
+ LOG.info("retry create new part calculator {} times", retry);
+ }
+ LOG.info("create partCalculator with address: " + connectInfo.ipPort);
+ calculator = new ObPartitionIdCalculator(connectInfo.ipPort, tableEntryKey);
+ } catch (Exception ex) {
+ ++retry;
+ LOG.warn("create new part calculator failed, retry: {}", ex.getMessage());
+ }
+ } while (calculator == null && retry < 3);
+ }
+}
\ No newline at end of file
diff --git a/oceanbasev10writer/src/main/java/com/alibaba/datax/plugin/writer/oceanbasev10writer/part/ObPartitionCalculatorV2.java b/oceanbasev10writer/src/main/java/com/alibaba/datax/plugin/writer/oceanbasev10writer/part/ObPartitionCalculatorV2.java
new file mode 100644
index 00000000..11b7b25c
--- /dev/null
+++ b/oceanbasev10writer/src/main/java/com/alibaba/datax/plugin/writer/oceanbasev10writer/part/ObPartitionCalculatorV2.java
@@ -0,0 +1,169 @@
+package com.alibaba.datax.plugin.writer.oceanbasev10writer.part;
+
+import com.alibaba.datax.common.element.Record;
+import com.alibaba.datax.plugin.rdbms.util.DBUtil;
+import com.alibaba.datax.plugin.rdbms.util.DataBaseType;
+import com.alibaba.datax.plugin.writer.oceanbasev10writer.ext.ServerConnectInfo;
+import com.alibaba.datax.plugin.writer.oceanbasev10writer.util.DbUtils;
+import com.oceanbase.partition.calculator.ObPartIdCalculator;
+import com.oceanbase.partition.calculator.enums.ObPartLevel;
+import com.oceanbase.partition.calculator.enums.ObServerMode;
+import com.oceanbase.partition.calculator.helper.TableEntryExtractor;
+import com.oceanbase.partition.calculator.model.TableEntry;
+import com.oceanbase.partition.calculator.model.TableEntryKey;
+import com.oceanbase.partition.calculator.model.Version;
+import com.oceanbase.partition.metadata.desc.ObPartColumn;
+import com.oceanbase.partition.metadata.desc.ObTablePart;
+import java.sql.Connection;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Objects;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * OceanBase 3.x和4.x的分区计算
+ *
+ * @author cjyyz
+ * @date 2023/02/07
+ * @since
+ */
+public class ObPartitionCalculatorV2 implements IObPartCalculator {
+
+ private static final Logger LOG = LoggerFactory.getLogger(ObPartitionCalculatorV2.class);
+
+ /**
+ * OB的模式以及版本信息
+ */
+ private ObServerMode mode;
+
+ /**
+ * ob-partition-calculator 分区计算组件
+ */
+ private ObPartIdCalculator calculator;
+
+ /**
+ * 记录columns的字段名和在record中的位置。
+ * 当目标表结构的分区键是生成列时,calculator 需要从改结构中获取到生成列所依赖的字段的值
+ * e.g.
+ * create table t1 (
+ * c1 varchar(20),
+ * c2 varchar(20) generated always as (substr(`c1`,1,8))
+ * )partition by key(c2) partitions 5
+ *
+ * 此时,columnNameIndexMap包含的元素是 c1:0
+ * 需要将c1字段的值从columnNameIndexMap中添加到{@link com.oceanbase.partition.calculator.ObPartIdCalculator#getRefColumnValues()}
+ */
+ private Map columnNameIndexMap;
+
+ /**
+ * @param connectInfo
+ * @param table
+ * @param mode
+ */
+ public ObPartitionCalculatorV2(ServerConnectInfo connectInfo, String table, ObServerMode mode, List columns) {
+ this.mode = mode;
+ this.columnNameIndexMap = new HashMap<>();
+ for (int i = 0; i < columns.size(); i++) {
+ columnNameIndexMap.put(columns.get(i).toLowerCase(), i);
+ }
+ initCalculator(connectInfo, table);
+ }
+
+ /**
+ * @param record
+ * @return Long
+ */
+ @Override
+ public Long calculate(Record record) {
+ if (Objects.isNull(calculator)) {
+ return null;
+ }
+ if (!calculator.getTableEntry().isPartitionTable()) {
+ return 0L;
+ }
+ return calculator.calculatePartId(filterNullableColumns(record));
+ }
+
+ /**
+ * 初始化分区计算组件
+ *
+ * @param connectInfo
+ * @param table
+ */
+ private void initCalculator(ServerConnectInfo connectInfo, String table) {
+ TableEntryKey tableEntryKey = new TableEntryKey(connectInfo.clusterName, connectInfo.tenantName, connectInfo.databaseName, table, mode);
+ boolean subsequentFromV4 = !mode.getVersion().isOlderThan(new Version("4.0.0.0"));
+ try {
+ TableEntry tableEntry;
+ try (Connection conn = getConnection(connectInfo, subsequentFromV4)){
+ TableEntryExtractor extractor = new TableEntryExtractor();
+ tableEntry = extractor.queryTableEntry(conn, tableEntryKey,subsequentFromV4);
+ }
+ this.calculator = new ObPartIdCalculator(false, tableEntry, subsequentFromV4);
+ } catch (Exception e) {
+ LOG.warn("create new part calculator failed. reason: {}", e.getMessage());
+ }
+ }
+
+ private Connection getConnection(ServerConnectInfo connectInfo, boolean subsequentFromV4) throws Exception {
+ // OceanBase 4.0.0.0及之后版本均使用业务租户连接计算分区
+ if (subsequentFromV4) {
+ return DBUtil.getConnection(DataBaseType.OceanBase, connectInfo.jdbcUrl, connectInfo.getFullUserName(), connectInfo.password);
+ }
+ // OceanBase 4.0.0.0之前版本使用sys租户连接计算分区
+ return DbUtils.buildSysConn(connectInfo.jdbcUrl, connectInfo.clusterName);
+ }
+
+ /**
+ * 只选择分区字段值传入分区计算组件
+ *
+ * @param record
+ * @return Object[]
+ */
+ private Object[] filterNullableColumns(Record record) {
+ final ObTablePart tablePart = calculator.getTableEntry().getTablePart();
+
+ final Object[] filteredRecords = new Object[record.getColumnNumber()];
+
+ if (tablePart.getLevel().getIndex() > ObPartLevel.LEVEL_ZERO.getIndex()) {
+ // 从record中添加非生成列的一级分区值到filteredRecords数组中
+ for (ObPartColumn partColumn : tablePart.getPartColumns()) {
+ if (partColumn.getColumnExpr() == null) {
+ int metaIndex = partColumn.getColumnIndex();
+ String columnName = partColumn.getColumnName().toLowerCase();
+ int idxInRecord = columnNameIndexMap.get(columnName);
+ filteredRecords[metaIndex] = record.getColumn(idxInRecord).asString();
+ }
+
+ }
+ // 从record中添加生成列的一级分区值到calculator的redColumnMap中,ObTablePart.getRefPartColumns中的字段名均为小写
+ for (ObPartColumn partColumn : tablePart.getRefPartColumns()) {
+ String columnName = partColumn.getColumnName();
+ int index = columnNameIndexMap.get(columnName);
+ calculator.addRefColumn(columnName, record.getColumn(index).asString());
+ }
+ }
+
+ if (tablePart.getLevel().getIndex() >= ObPartLevel.LEVEL_TWO.getIndex()) {
+ // 从record中添加非生成列的二级分区值到filteredRecords数组中
+ for (ObPartColumn partColumn : tablePart.getSubPartColumns()) {
+ if (partColumn.getColumnExpr() == null) {
+ int metaIndex = partColumn.getColumnIndex();
+ String columnName = partColumn.getColumnName().toLowerCase();
+ int idxInRecord = columnNameIndexMap.get(columnName);
+ filteredRecords[metaIndex] = record.getColumn(idxInRecord).asString();
+ }
+
+ }
+ // 从record中添加生成列的二级分区值到calculator的redColumnMap中,ObTablePart.getRefSubPartColumns中的字段名均为小写
+ for (ObPartColumn partColumn : tablePart.getRefSubPartColumns()) {
+ String columnName = partColumn.getColumnName();
+ int index = columnNameIndexMap.get(columnName);
+ calculator.addRefColumn(columnName, record.getColumn(index).asString());
+ }
+ }
+ return filteredRecords;
+ }
+}
\ No newline at end of file
diff --git a/oceanbasev10writer/src/main/java/com/alibaba/datax/plugin/writer/oceanbasev10writer/task/ConcurrentTableWriterTask.java b/oceanbasev10writer/src/main/java/com/alibaba/datax/plugin/writer/oceanbasev10writer/task/ConcurrentTableWriterTask.java
index 82b16923..0ad3a1ed 100644
--- a/oceanbasev10writer/src/main/java/com/alibaba/datax/plugin/writer/oceanbasev10writer/task/ConcurrentTableWriterTask.java
+++ b/oceanbasev10writer/src/main/java/com/alibaba/datax/plugin/writer/oceanbasev10writer/task/ConcurrentTableWriterTask.java
@@ -1,6 +1,5 @@
package com.alibaba.datax.plugin.writer.oceanbasev10writer.task;
-import com.alibaba.datax.common.element.Column;
import com.alibaba.datax.common.element.Record;
import com.alibaba.datax.common.exception.DataXException;
import com.alibaba.datax.common.plugin.RecordReceiver;
@@ -11,16 +10,14 @@ import com.alibaba.datax.plugin.rdbms.util.DBUtilErrorCode;
import com.alibaba.datax.plugin.rdbms.util.DataBaseType;
import com.alibaba.datax.plugin.rdbms.writer.CommonRdbmsWriter;
import com.alibaba.datax.plugin.writer.oceanbasev10writer.Config;
-import com.alibaba.datax.plugin.writer.oceanbasev10writer.ext.ConnHolder;
+import com.alibaba.datax.plugin.writer.oceanbasev10writer.ext.AbstractConnHolder;
import com.alibaba.datax.plugin.writer.oceanbasev10writer.ext.ObClientConnHolder;
import com.alibaba.datax.plugin.writer.oceanbasev10writer.ext.ServerConnectInfo;
+import com.alibaba.datax.plugin.writer.oceanbasev10writer.part.IObPartCalculator;
+import com.alibaba.datax.plugin.writer.oceanbasev10writer.part.ObPartitionCalculatorV1;
+import com.alibaba.datax.plugin.writer.oceanbasev10writer.part.ObPartitionCalculatorV2;
import com.alibaba.datax.plugin.writer.oceanbasev10writer.util.ObWriterUtils;
-import com.alipay.oceanbase.obproxy.data.TableEntryKey;
-import com.alipay.oceanbase.obproxy.util.ObPartitionIdCalculator;
-import org.apache.commons.lang3.tuple.Pair;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
+import com.oceanbase.partition.calculator.enums.ObServerMode;
import java.sql.Connection;
import java.sql.PreparedStatement;
import java.sql.SQLException;
@@ -35,8 +32,12 @@ import java.util.concurrent.atomic.AtomicLong;
import java.util.concurrent.locks.Condition;
import java.util.concurrent.locks.Lock;
import java.util.concurrent.locks.ReentrantLock;
-
-//import java.sql.PreparedStatement;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import static com.alibaba.datax.plugin.writer.oceanbasev10writer.Config.DEFAULT_SLOW_MEMSTORE_THRESHOLD;
+import static com.alibaba.datax.plugin.writer.oceanbasev10writer.util.ObWriterUtils.LoadMode.FAST;
+import static com.alibaba.datax.plugin.writer.oceanbasev10writer.util.ObWriterUtils.LoadMode.PAUSE;
+import static com.alibaba.datax.plugin.writer.oceanbasev10writer.util.ObWriterUtils.LoadMode.SLOW;
public class ConcurrentTableWriterTask extends CommonRdbmsWriter.Task {
private static final Logger LOG = LoggerFactory.getLogger(ConcurrentTableWriterTask.class);
@@ -47,41 +48,31 @@ public class ConcurrentTableWriterTask extends CommonRdbmsWriter.Task {
private long memstoreCheckIntervalSecond = Config.DEFAULT_MEMSTORE_CHECK_INTERVAL_SECOND;
// 最后一次检查
private long lastCheckMemstoreTime;
+
+ private volatile ObWriterUtils.LoadMode loadMode = FAST;
private static AtomicLong totalTask = new AtomicLong(0);
private long taskId = -1;
-
private AtomicBoolean isMemStoreFull = new AtomicBoolean(false);
- private ConnHolder checkConnHolder;
+ private HashMap> groupInsertValues;
+ private IObPartCalculator obPartCalculator;
+ private ConcurrentTableWriter concurrentWriter = null;
+ private AbstractConnHolder connHolder;
+ private boolean allTaskInQueue = false;
+ private Lock lock = new ReentrantLock();
+ private Condition condition = lock.newCondition();
+ private long startTime;
+ private String obWriteMode = "update";
+ private boolean isOracleCompatibleMode = false;
+ private String obUpdateColumns = null;
+ private String dbName;
+ private int calPartFailedCount = 0;
- public ConcurrentTableWriterTask(DataBaseType dataBaseType) {
+ public ConcurrentTableWriterTask(DataBaseType dataBaseType) {
super(dataBaseType);
taskId = totalTask.getAndIncrement();
}
- private ObPartitionIdCalculator partCalculator = null;
-
- private HashMap> groupInsertValues;
- List unknownPartRecords = new ArrayList();
-// private List unknownPartRecords;
- private List partitionKeyIndexes;
-
- private ConcurrentTableWriter concurrentWriter = null;
-
- private ConnHolder connHolder;
-
- private boolean allTaskInQueue = false;
-
- private Lock lock = new ReentrantLock();
- private Condition condition = lock.newCondition();
-
- private long startTime;
- private String obWriteMode = "update";
- private boolean isOracleCompatibleMode = false;
- private String obUpdateColumns = null;
- private List> deleteColPos;
- private String dbName;
-
@Override
public void init(Configuration config) {
super.init(config);
@@ -95,15 +86,11 @@ public class ConcurrentTableWriterTask extends CommonRdbmsWriter.Task {
this.memstoreThreshold = config.getDouble(Config.MEMSTORE_THRESHOLD, Config.DEFAULT_MEMSTORE_THRESHOLD);
this.memstoreCheckIntervalSecond = config.getLong(Config.MEMSTORE_CHECK_INTERVAL_SECOND,
Config.DEFAULT_MEMSTORE_CHECK_INTERVAL_SECOND);
- this.isOracleCompatibleMode = ObWriterUtils.isOracleMode();
- LOG.info("configure url is unavailable, use obclient for connections.");
- this.checkConnHolder = new ObClientConnHolder(config, connectInfo.jdbcUrl,
+ this.connHolder = new ObClientConnHolder(config, connectInfo.jdbcUrl,
connectInfo.getFullUserName(), connectInfo.password);
- this.connHolder = new ObClientConnHolder(config, connectInfo.jdbcUrl,
- connectInfo.getFullUserName(), connectInfo.password);
- checkConnHolder.initConnection();
- if (isOracleCompatibleMode) {
+ this.isOracleCompatibleMode = ObWriterUtils.isOracleMode();
+ if (isOracleCompatibleMode) {
connectInfo.databaseName = connectInfo.databaseName.toUpperCase();
//在转义的情况下不翻译
if (!(table.startsWith("\"") && table.endsWith("\""))) {
@@ -115,43 +102,36 @@ public class ConcurrentTableWriterTask extends CommonRdbmsWriter.Task {
}
if (config.getBool(Config.USE_PART_CALCULATOR, Config.DEFAULT_USE_PART_CALCULATOR)) {
- initPartCalculator(connectInfo);
+ this.obPartCalculator = createPartitionCalculator(connectInfo, ObServerMode.from(config.getString(Config.OB_COMPATIBLE_MODE), config.getString(Config.OB_VERSION)));
} else {
LOG.info("Disable partition calculation feature.");
}
- obUpdateColumns = config.getString(Config.OB_UPDATE_COLUMNS, null);
- groupInsertValues = new HashMap>();
- partitionKeyIndexes = new ArrayList();
- rewriteSql();
+ obUpdateColumns = config.getString(Config.OB_UPDATE_COLUMNS, null);
+ groupInsertValues = new HashMap>();
+ rewriteSql();
- if (null == concurrentWriter) {
- concurrentWriter = new ConcurrentTableWriter(config, connectInfo, writeRecordSql);
- allTaskInQueue = false;
- }
- }
+ if (null == concurrentWriter) {
+ concurrentWriter = new ConcurrentTableWriter(config, connectInfo, writeRecordSql);
+ allTaskInQueue = false;
+ }
+ }
- private void initPartCalculator(ServerConnectInfo connectInfo) {
- int retry = 0;
- LOG.info(String.format("create tableEntryKey with clusterName %s, tenantName %s, databaseName %s, tableName %s",
- connectInfo.clusterName, connectInfo.tenantName, connectInfo.databaseName, table));
- TableEntryKey tableEntryKey = new TableEntryKey(connectInfo.clusterName, connectInfo.tenantName,
- connectInfo.databaseName, table);
- do {
- try {
- if (retry > 0) {
- int sleep = retry > 8 ? 500 : (1 << retry);
- TimeUnit.SECONDS.sleep(sleep);
- LOG.info("retry create new part calculator, the {} times", retry);
- }
- LOG.info("create partCalculator with address: " + connectInfo.ipPort);
- partCalculator = new ObPartitionIdCalculator(connectInfo.ipPort, tableEntryKey);
- } catch (Exception ex) {
- ++retry;
- LOG.warn("create new part calculator failed, retry {}: {}", retry, ex.getMessage());
- }
- } while (partCalculator == null && retry < 3); // try 3 times
- }
+ /**
+ * 创建需要的分区计算组件
+ *
+ * @param connectInfo
+ * @return
+ */
+ private IObPartCalculator createPartitionCalculator(ServerConnectInfo connectInfo, ObServerMode obServerMode) {
+ if (obServerMode.isSubsequentFrom("3.0.0.0")) {
+ LOG.info("oceanbase version is {}, use ob-partition-calculator to calculate partition Id.", obServerMode.getVersion());
+ return new ObPartitionCalculatorV2(connectInfo, table, obServerMode, columns);
+ }
+
+ LOG.info("oceanbase version is {}, use ocj to calculate partition Id.", obServerMode.getVersion());
+ return new ObPartitionCalculatorV1(connectInfo, table, columns);
+ }
public boolean isFinished() {
return allTaskInQueue && concurrentWriter.checkFinish();
@@ -174,43 +154,18 @@ public class ConcurrentTableWriterTask extends CommonRdbmsWriter.Task {
if (isOracleCompatibleMode && obWriteMode.equalsIgnoreCase("update")) {
// change obWriteMode to insert so the insert statement will be generated.
obWriteMode = "insert";
- deleteColPos = ObWriterUtils.buildDeleteSql(conn, dbName, table, columns);
}
this.writeRecordSql = ObWriterUtils.buildWriteSql(table, columns, conn, obWriteMode, obUpdateColumns);
LOG.info("writeRecordSql :{}", this.writeRecordSql);
}
-
+
+ @Override
public void prepare(Configuration writerSliceConfig) {
super.prepare(writerSliceConfig);
- calPartitionKeyIndex(partitionKeyIndexes);
concurrentWriter.start();
}
- private void calPartitionKeyIndex(List partKeyIndexes) {
- partKeyIndexes.clear();
- if (null == partCalculator) {
- LOG.error("partCalculator is null");
- return;
- }
- for (int i = 0; i < columns.size(); ++i) {
- if (partCalculator.isPartitionKeyColumn(columns.get(i))) {
- LOG.info(columns.get(i) + " is partition key.");
- partKeyIndexes.add(i);
- }
- }
- }
-
- private Long calPartitionId(List partKeyIndexes, Record record) {
- if (partCalculator == null) {
- return null;
- }
- for (Integer i : partKeyIndexes) {
- partCalculator.addColumn(columns.get(i), record.getColumn(i).asString());
- }
- return partCalculator.calculate();
- }
-
- @Override
+ @Override
public void startWriteWithConnection(RecordReceiver recordReceiver, TaskPluginCollector taskPluginCollector, Connection connection) {
this.taskPluginCollector = taskPluginCollector;
@@ -271,21 +226,6 @@ public class ConcurrentTableWriterTask extends CommonRdbmsWriter.Task {
return fillPreparedStatement(preparedStatement, record);
}
- public PreparedStatement fillStatementIndex(PreparedStatement preparedStatement,
- int prepIdx, int columnIndex, Column column) throws SQLException {
- int columnSqltype = this.resultSetMetaData.getMiddle().get(columnIndex);
- String typeName = this.resultSetMetaData.getRight().get(columnIndex);
- return fillPreparedStatementColumnType(preparedStatement, prepIdx, columnSqltype, typeName, column);
- }
-
- public void collectDirtyRecord(Record record, SQLException e) {
- taskPluginCollector.collectDirtyRecord(record, e);
- }
-
- public void insertOneRecord(Connection connection, List buffer) {
- doOneInsert(connection, buffer);
- }
-
private void addLeftRecords() {
//不需要刷新Cache,已经是最后一批数据了
for (List groupValues : groupInsertValues.values()) {
@@ -293,17 +233,16 @@ public class ConcurrentTableWriterTask extends CommonRdbmsWriter.Task {
addRecordsToWriteQueue(groupValues);
}
}
- if (unknownPartRecords.size() > 0) {
- addRecordsToWriteQueue(unknownPartRecords);
- }
}
private void addRecordToCache(final Record record) {
Long partId =null;
try {
- partId = calPartitionId(partitionKeyIndexes, record);
+ partId = obPartCalculator == null ? Long.MAX_VALUE : obPartCalculator.calculate(record);
} catch (Exception e1) {
- LOG.warn("fail to get partition id: " + e1.getMessage() + ", record: " + record);
+ if (calPartFailedCount++ < 10) {
+ LOG.warn("fail to get partition id: " + e1.getMessage() + ", record: " + record);
+ }
}
if (partId == null) {
@@ -311,24 +250,11 @@ public class ConcurrentTableWriterTask extends CommonRdbmsWriter.Task {
partId = Long.MAX_VALUE;
}
- if (partId != null) {
- List groupValues = groupInsertValues.get(partId);
- if (groupValues == null) {
- groupValues = new ArrayList(batchSize);
- groupInsertValues.put(partId, groupValues);
- }
- groupValues.add(record);
- if (groupValues.size() >= batchSize) {
- groupValues = addRecordsToWriteQueue(groupValues);
- groupInsertValues.put(partId, groupValues);
- }
- } else {
- LOG.debug("add unknown part record {}", record);
- unknownPartRecords.add(record);
- if (unknownPartRecords.size() >= batchSize) {
- unknownPartRecords = addRecordsToWriteQueue(unknownPartRecords);
- }
-
+ List groupValues = groupInsertValues.computeIfAbsent(partId, k -> new ArrayList(batchSize));
+ groupValues.add(record);
+ if (groupValues.size() >= batchSize) {
+ groupValues = addRecordsToWriteQueue(groupValues);
+ groupInsertValues.put(partId, groupValues);
}
}
@@ -354,15 +280,25 @@ public class ConcurrentTableWriterTask extends CommonRdbmsWriter.Task {
return new ArrayList(batchSize);
}
private void checkMemStore() {
- Connection checkConn = checkConnHolder.reconnect();
+ Connection checkConn = connHolder.getConn();
+ try {
+ if (checkConn == null || checkConn.isClosed()) {
+ checkConn = connHolder.reconnect();
+ }
+ }catch (Exception e) {
+ LOG.warn("Check connection is unusable");
+ }
+
long now = System.currentTimeMillis();
if (now - lastCheckMemstoreTime < 1000 * memstoreCheckIntervalSecond) {
return;
}
- boolean isFull = ObWriterUtils.isMemstoreFull(checkConn, memstoreThreshold);
- this.isMemStoreFull.set(isFull);
- if (isFull) {
- LOG.warn("OB memstore is full,sleep 30 seconds, threshold=" + memstoreThreshold);
+ double memUsedRatio = ObWriterUtils.queryMemUsedRatio(checkConn);
+ if (memUsedRatio >= DEFAULT_SLOW_MEMSTORE_THRESHOLD) {
+ this.loadMode = memUsedRatio >= memstoreThreshold ? PAUSE : SLOW;
+ LOG.info("Memstore used ration is {}. Load data {}", memUsedRatio, loadMode.name());
+ }else {
+ this.loadMode = FAST;
}
lastCheckMemstoreTime = now;
}
@@ -370,21 +306,23 @@ public class ConcurrentTableWriterTask extends CommonRdbmsWriter.Task {
public boolean isMemStoreFull() {
return isMemStoreFull.get();
}
-
- public void printEveryTime() {
- long cost = System.currentTimeMillis() - startTime;
- if (cost > 10000) { //10s
- print();
- startTime = System.currentTimeMillis();
- }
+
+ public boolean isShouldPause() {
+ return this.loadMode.equals(PAUSE);
+ }
+
+ public boolean isShouldSlow() {
+ return this.loadMode.equals(SLOW);
}
public void print() {
- LOG.debug("Statistic total task {}, finished {}, queue Size {}",
- concurrentWriter.getTotalTaskCount(),
- concurrentWriter.getFinishTaskCount(),
- concurrentWriter.getTaskQueueSize());
- concurrentWriter.printStatistics();
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("Statistic total task {}, finished {}, queue Size {}",
+ concurrentWriter.getTotalTaskCount(),
+ concurrentWriter.getFinishTaskCount(),
+ concurrentWriter.getTaskQueueSize());
+ concurrentWriter.printStatistics();
+ }
}
public void waitTaskFinish() {
@@ -417,8 +355,6 @@ public class ConcurrentTableWriterTask extends CommonRdbmsWriter.Task {
}
// 把本级持有的conn关闭掉
DBUtil.closeDBResources(null, connHolder.getConn());
- DBUtil.closeDBResources(null, checkConnHolder.getConn());
- checkConnHolder.destroy();
super.destroy(writerSliceConfig);
}
@@ -469,7 +405,7 @@ public class ConcurrentTableWriterTask extends CommonRdbmsWriter.Task {
public synchronized void start() {
for (int i = 0; i < threadCount; ++i) {
LOG.info("start {} insert task.", (i+1));
- InsertTask insertTask = new InsertTask(taskId, queue, config, connectInfo, rewriteRecordSql, deleteColPos);
+ InsertTask insertTask = new InsertTask(taskId, queue, config, connectInfo, rewriteRecordSql);
insertTask.setWriterTask(ConcurrentTableWriterTask.this);
insertTask.setWriter(this);
insertTasks.add(insertTask);
@@ -495,7 +431,7 @@ public class ConcurrentTableWriterTask extends CommonRdbmsWriter.Task {
public void addBatchRecords(final List records) throws InterruptedException {
boolean isSucc = false;
while (!isSucc) {
- isSucc = queue.offer(records, 5, TimeUnit.SECONDS);
+ isSucc = queue.offer(records, 5, TimeUnit.MILLISECONDS);
checkMemStore();
}
totalTaskCount.incrementAndGet();
diff --git a/oceanbasev10writer/src/main/java/com/alibaba/datax/plugin/writer/oceanbasev10writer/task/InsertTask.java b/oceanbasev10writer/src/main/java/com/alibaba/datax/plugin/writer/oceanbasev10writer/task/InsertTask.java
index 968908ca..df80cf7f 100644
--- a/oceanbasev10writer/src/main/java/com/alibaba/datax/plugin/writer/oceanbasev10writer/task/InsertTask.java
+++ b/oceanbasev10writer/src/main/java/com/alibaba/datax/plugin/writer/oceanbasev10writer/task/InsertTask.java
@@ -1,286 +1,204 @@
package com.alibaba.datax.plugin.writer.oceanbasev10writer.task;
-import java.sql.Connection;
-import java.sql.PreparedStatement;
-import java.sql.SQLException;
-import java.util.ArrayList;
-import java.util.List;
-import java.util.Queue;
-import java.util.concurrent.TimeUnit;
-
-import com.alibaba.datax.common.exception.DataXException;
-import com.alibaba.datax.plugin.rdbms.util.DBUtil;
-import com.alibaba.datax.plugin.rdbms.util.DBUtilErrorCode;
-import com.alibaba.datax.plugin.writer.oceanbasev10writer.ext.ObClientConnHolder;
-import org.apache.commons.lang3.tuple.Pair;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
import com.alibaba.datax.common.element.Record;
import com.alibaba.datax.common.util.Configuration;
+import com.alibaba.datax.plugin.rdbms.util.DBUtil;
import com.alibaba.datax.plugin.writer.oceanbasev10writer.Config;
-import com.alibaba.datax.plugin.writer.oceanbasev10writer.ext.ConnHolder;
+import com.alibaba.datax.plugin.writer.oceanbasev10writer.ext.AbstractConnHolder;
+import com.alibaba.datax.plugin.writer.oceanbasev10writer.ext.ObClientConnHolder;
import com.alibaba.datax.plugin.writer.oceanbasev10writer.ext.ServerConnectInfo;
import com.alibaba.datax.plugin.writer.oceanbasev10writer.task.ConcurrentTableWriterTask.ConcurrentTableWriter;
import com.alibaba.datax.plugin.writer.oceanbasev10writer.util.ObWriterUtils;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.sql.Connection;
+import java.sql.PreparedStatement;
+import java.sql.SQLException;
+import java.util.List;
+import java.util.concurrent.BlockingQueue;
+import java.util.concurrent.TimeUnit;
+
public class InsertTask implements Runnable {
private static final Logger LOG = LoggerFactory.getLogger(InsertTask.class);
- private ConcurrentTableWriterTask writerTask;
- private ConcurrentTableWriter writer;
+ private ConcurrentTableWriterTask writerTask;
+ private ConcurrentTableWriter writer;
- private String writeRecordSql;
- private long totalCost = 0;
- private long insertCount = 0;
+ private String writeRecordSql;
+ private long totalCost = 0;
+ private long insertCount = 0;
- private Queue> queue;
- private boolean isStop;
- private ConnHolder connHolder;
+ private BlockingQueue> queue;
+ private boolean isStop;
+ private AbstractConnHolder connHolder;
- private final long taskId;
- private ServerConnectInfo connInfo;
+ private final long taskId;
+ private ServerConnectInfo connInfo;
- // 失败重试次数
- private int failTryCount = Config.DEFAULT_FAIL_TRY_COUNT;
- private boolean printCost = Config.DEFAULT_PRINT_COST;
- private long costBound = Config.DEFAULT_COST_BOUND;
- private List> deleteMeta;
+ // 失败重试次数
+ private int failTryCount = Config.DEFAULT_FAIL_TRY_COUNT;
+ private boolean printCost = Config.DEFAULT_PRINT_COST;
+ private long costBound = Config.DEFAULT_COST_BOUND;
- public InsertTask(
- final long taskId,
- Queue> recordsQueue,
- Configuration config,
- ServerConnectInfo connectInfo,
- String writeRecordSql,
- List> deleteMeta) {
- this.taskId = taskId;
- this.queue = recordsQueue;
- this.connInfo = connectInfo;
- failTryCount = config.getInt(Config.FAIL_TRY_COUNT, Config.DEFAULT_FAIL_TRY_COUNT);
- printCost = config.getBool(Config.PRINT_COST, Config.DEFAULT_PRINT_COST);
- costBound = config.getLong(Config.COST_BOUND, Config.DEFAULT_COST_BOUND);
- this.connHolder = new ObClientConnHolder(config, connInfo.jdbcUrl,
- connInfo.getFullUserName(), connInfo.password);
- this.writeRecordSql = writeRecordSql;
- this.isStop = false;
- this.deleteMeta = deleteMeta;
- connHolder.initConnection();
- }
-
- void setWriterTask(ConcurrentTableWriterTask writerTask) {
- this.writerTask = writerTask;
- }
-
- void setWriter(ConcurrentTableWriter writer) {
- this.writer = writer;
- }
+ public InsertTask(
+ final long taskId,
+ BlockingQueue> recordsQueue,
+ Configuration config,
+ ServerConnectInfo connectInfo,
+ String writeRecordSql) {
+ this.taskId = taskId;
+ this.queue = recordsQueue;
+ this.connInfo = connectInfo;
+ failTryCount = config.getInt(Config.FAIL_TRY_COUNT, Config.DEFAULT_FAIL_TRY_COUNT);
+ printCost = config.getBool(Config.PRINT_COST, Config.DEFAULT_PRINT_COST);
+ costBound = config.getLong(Config.COST_BOUND, Config.DEFAULT_COST_BOUND);
+ this.connHolder = new ObClientConnHolder(config, connInfo.jdbcUrl,
+ connInfo.getFullUserName(), connInfo.password);
+ this.writeRecordSql = writeRecordSql;
+ this.isStop = false;
+ connHolder.initConnection();
+ }
- private boolean isStop() { return isStop; }
- public void setStop() { isStop = true; }
- public long getTotalCost() { return totalCost; }
- public long getInsertCount() { return insertCount; }
-
- @Override
- public void run() {
- Thread.currentThread().setName(String.format("%d-insertTask-%d", taskId, Thread.currentThread().getId()));
- LOG.debug("Task {} start to execute...", taskId);
- while (!isStop()) {
- try {
- List records = queue.poll();
- if (null != records) {
- doMultiInsert(records, this.printCost, this.costBound);
+ void setWriterTask(ConcurrentTableWriterTask writerTask) {
+ this.writerTask = writerTask;
+ }
- } else if (writerTask.isFinished()) {
- writerTask.singalTaskFinish();
- LOG.debug("not more task, thread exist ...");
- break;
- } else {
- TimeUnit.MILLISECONDS.sleep(5);
- }
- } catch (InterruptedException e) {
- LOG.debug("TableWriter is interrupt");
- } catch (Exception e) {
- LOG.warn("ERROR UNEXPECTED {}", e);
- }
- }
- LOG.debug("Thread exist...");
- }
-
- public void destroy() {
- connHolder.destroy();
- };
-
- public void calStatistic(final long cost) {
- writer.increFinishCount();
- ++insertCount;
- totalCost += cost;
- if (this.printCost && cost > this.costBound) {
- LOG.info("slow multi insert cost {}ms", cost);
- }
- }
+ void setWriter(ConcurrentTableWriter writer) {
+ this.writer = writer;
+ }
- private void doDelete(Connection conn, final List buffer) throws SQLException {
- if(deleteMeta == null || deleteMeta.size() == 0) {
- return;
- }
- for (int i = 0; i < deleteMeta.size(); i++) {
- String deleteSql = deleteMeta.get(i).getKey();
- int[] valueIdx = deleteMeta.get(i).getValue();
- PreparedStatement ps = null;
- try {
- ps = conn.prepareStatement(deleteSql);
- StringBuilder builder = new StringBuilder();
- for (Record record : buffer) {
- int bindIndex = 0;
- for (int idx : valueIdx) {
- writerTask.fillStatementIndex(ps, bindIndex++, idx, record.getColumn(idx));
- builder.append(record.getColumn(idx).asString()).append(",");
- }
- ps.addBatch();
- }
- LOG.debug("delete values: " + builder.toString());
- ps.executeBatch();
- } catch (SQLException ex) {
- LOG.error("SQL Exception when delete records with {}", deleteSql, ex);
- throw ex;
- } finally {
- DBUtil.closeDBResources(ps, null);
- }
- }
- }
+ private boolean isStop() {
+ return isStop;
+ }
- public void doMultiInsert(final List buffer, final boolean printCost, final long restrict) {
- checkMemstore();
- Connection conn = connHolder.getConn();
- boolean success = false;
- long cost = 0;
- long startTime = 0;
- try {
- for (int i = 0; i < failTryCount; ++i) {
- if (i > 0) {
- try {
- int sleep = i >= 9 ? 500 : 1 << i;//不明白为什么要sleep 500s
- TimeUnit.SECONDS.sleep(sleep);
- } catch (InterruptedException e) {
- LOG.info("thread interrupted ..., ignore");
- }
- conn = connHolder.getConn();
- LOG.info("retry {}, start do batch insert, size={}", i, buffer.size());
- checkMemstore();
- }
- startTime = System.currentTimeMillis();
- PreparedStatement ps = null;
- try {
- conn.setAutoCommit(false);
+ public void setStop() {
+ isStop = true;
+ }
- // do delete if necessary
- doDelete(conn, buffer);
+ public long getTotalCost() {
+ return totalCost;
+ }
- ps = conn.prepareStatement(writeRecordSql);
- for (Record record : buffer) {
- ps = writerTask.fillStatement(ps, record);
- ps.addBatch();
- }
- ps.executeBatch();
- conn.commit();
- success = true;
- cost = System.currentTimeMillis() - startTime;
- calStatistic(cost);
- break;
- } catch (SQLException e) {
- LOG.warn("Insert fatal error SqlState ={}, errorCode = {}, {}", e.getSQLState(), e.getErrorCode(), e);
- if (i == 0 || i > 10 ) {
- for (Record record : buffer) {
- LOG.warn("ERROR : record {}", record);
- }
- }
- // 按照错误码分类,分情况处理
- // 如果是OB系统级异常,则需要重建连接
- boolean fatalFail = ObWriterUtils.isFatalError(e);
- if (fatalFail) {
- ObWriterUtils.sleep(300000);
- connHolder.reconnect();
- // 如果是可恢复的异常,则重试
- } else if (ObWriterUtils.isRecoverableError(e)) {
- conn.rollback();
- ObWriterUtils.sleep(60000);
- } else {// 其它异常直接退出,采用逐条写入方式
- conn.rollback();
- ObWriterUtils.sleep(1000);
- break;
- }
- } catch (Exception e) {
- e.printStackTrace();
- LOG.warn("Insert error unexpected {}", e);
- } finally {
- DBUtil.closeDBResources(ps, null);
- }
- }
- } catch (SQLException e) {
- LOG.warn("ERROR:retry failSql State ={}, errorCode = {}, {}", e.getSQLState(), e.getErrorCode(), e);
- }
+ public long getInsertCount() {
+ return insertCount;
+ }
- if (!success) {
- try {
- LOG.info("do one insert");
- conn = connHolder.reconnect();
- doOneInsert(conn, buffer);
- cost = System.currentTimeMillis() - startTime;
- calStatistic(cost);
- } finally {
- }
- }
- }
+ @Override
+ public void run() {
+ Thread.currentThread().setName(String.format("%d-insertTask-%d", taskId, Thread.currentThread().getId()));
+ LOG.debug("Task {} start to execute...", taskId);
+ while (!isStop()) {
+ try {
+ List records = queue.poll(5, TimeUnit.MILLISECONDS);
+ if (null != records) {
+ doMultiInsert(records, this.printCost, this.costBound);
+ } else if (writerTask.isFinished()) {
+ writerTask.singalTaskFinish();
+ LOG.debug("not more task, thread exist ...");
+ break;
+ }
+ } catch (InterruptedException e) {
+ LOG.debug("TableWriter is interrupt");
+ } catch (Exception e) {
+ LOG.warn("ERROR UNEXPECTED ", e);
+ }
+ }
+ LOG.debug("Thread exist...");
+ }
- // process one row, delete before insert
- private void doOneInsert(Connection connection, List buffer) {
- List deletePstmtList = new ArrayList();
- PreparedStatement preparedStatement = null;
- try {
- connection.setAutoCommit(false);
- if (deleteMeta != null && deleteMeta.size() > 0) {
- for (int i = 0; i < deleteMeta.size(); i++) {
- String deleteSql = deleteMeta.get(i).getKey();
- deletePstmtList.add(connection.prepareStatement(deleteSql));
- }
- }
+ public void destroy() {
+ connHolder.destroy();
+ }
- preparedStatement = connection.prepareStatement(this.writeRecordSql);
- for (Record record : buffer) {
- try {
- for (int i = 0; i < deletePstmtList.size(); i++) {
- PreparedStatement deleteStmt = deletePstmtList.get(i);
- int[] valueIdx = deleteMeta.get(i).getValue();
- int bindIndex = 0;
- for (int idx : valueIdx) {
- writerTask.fillStatementIndex(deleteStmt, bindIndex++, idx, record.getColumn(idx));
- }
- deleteStmt.execute();
- }
- preparedStatement = writerTask.fillStatement(preparedStatement, record);
- preparedStatement.execute();
- connection.commit();
- } catch (SQLException e) {
- writerTask.collectDirtyRecord(record, e);
- } finally {
- // 此处不应该关闭statement,后续的数据还需要用到
- }
- }
- } catch (Exception e) {
- throw DataXException.asDataXException(
- DBUtilErrorCode.WRITE_DATA_ERROR, e);
- } finally {
- DBUtil.closeDBResources(preparedStatement, null);
- for (PreparedStatement pstmt : deletePstmtList) {
- DBUtil.closeDBResources(pstmt, null);
- }
- }
- }
+ public void calStatistic(final long cost) {
+ writer.increFinishCount();
+ ++insertCount;
+ totalCost += cost;
+ if (this.printCost && cost > this.costBound) {
+ LOG.info("slow multi insert cost {}ms", cost);
+ }
+ }
- private void checkMemstore() {
- while (writerTask.isMemStoreFull()) {
- ObWriterUtils.sleep(30000);
- }
- }
+ public void doMultiInsert(final List buffer, final boolean printCost, final long restrict) {
+ checkMemstore();
+ Connection conn = connHolder.getConn();
+ boolean success = false;
+ long cost = 0;
+ long startTime = 0;
+ try {
+ for (int i = 0; i < failTryCount; ++i) {
+ if (i > 0) {
+ conn = connHolder.getConn();
+ LOG.info("retry {}, start do batch insert, size={}", i, buffer.size());
+ checkMemstore();
+ }
+ startTime = System.currentTimeMillis();
+ PreparedStatement ps = null;
+ try {
+ conn.setAutoCommit(false);
+ ps = conn.prepareStatement(writeRecordSql);
+ for (Record record : buffer) {
+ ps = writerTask.fillStatement(ps, record);
+ ps.addBatch();
+ }
+ ps.executeBatch();
+ conn.commit();
+ success = true;
+ cost = System.currentTimeMillis() - startTime;
+ calStatistic(cost);
+ break;
+ } catch (SQLException e) {
+ LOG.warn("Insert fatal error SqlState ={}, errorCode = {}, {}", e.getSQLState(), e.getErrorCode(), e);
+ if (LOG.isDebugEnabled() && (i == 0 || i > 10)) {
+ for (Record record : buffer) {
+ LOG.warn("ERROR : record {}", record);
+ }
+ }
+ // 按照错误码分类,分情况处理
+ // 如果是OB系统级异常,则需要重建连接
+ boolean fatalFail = ObWriterUtils.isFatalError(e);
+ if (fatalFail) {
+ ObWriterUtils.sleep(300000);
+ connHolder.reconnect();
+ // 如果是可恢复的异常,则重试
+ } else if (ObWriterUtils.isRecoverableError(e)) {
+ conn.rollback();
+ ObWriterUtils.sleep(60000);
+ } else {// 其它异常直接退出,采用逐条写入方式
+ conn.rollback();
+ ObWriterUtils.sleep(1000);
+ break;
+ }
+ } catch (Exception e) {
+ e.printStackTrace();
+ LOG.warn("Insert error unexpected {}", e);
+ } finally {
+ DBUtil.closeDBResources(ps, null);
+ }
+ }
+ } catch (SQLException e) {
+ LOG.warn("ERROR:retry failSql State ={}, errorCode = {}, {}", e.getSQLState(), e.getErrorCode(), e);
+ }
+
+ if (!success) {
+ LOG.info("do one insert");
+ conn = connHolder.reconnect();
+ writerTask.doOneInsert(conn, buffer);
+ cost = System.currentTimeMillis() - startTime;
+ calStatistic(cost);
+ }
+ }
+
+ private void checkMemstore() {
+ if (writerTask.isShouldSlow()) {
+ ObWriterUtils.sleep(100);
+ } else {
+ while (writerTask.isShouldPause()) {
+ ObWriterUtils.sleep(100);
+ }
+ }
+ }
}
diff --git a/oceanbasev10writer/src/main/java/com/alibaba/datax/plugin/writer/oceanbasev10writer/task/SingleTableWriterTask.java b/oceanbasev10writer/src/main/java/com/alibaba/datax/plugin/writer/oceanbasev10writer/task/SingleTableWriterTask.java
index 637a3be4..d2f42de5 100644
--- a/oceanbasev10writer/src/main/java/com/alibaba/datax/plugin/writer/oceanbasev10writer/task/SingleTableWriterTask.java
+++ b/oceanbasev10writer/src/main/java/com/alibaba/datax/plugin/writer/oceanbasev10writer/task/SingleTableWriterTask.java
@@ -12,7 +12,7 @@ import com.alibaba.datax.plugin.rdbms.util.DataBaseType;
import com.alibaba.datax.plugin.rdbms.writer.CommonRdbmsWriter;
import com.alibaba.datax.plugin.rdbms.writer.Key;
import com.alibaba.datax.plugin.writer.oceanbasev10writer.Config;
-import com.alibaba.datax.plugin.writer.oceanbasev10writer.ext.ConnHolder;
+import com.alibaba.datax.plugin.writer.oceanbasev10writer.ext.AbstractConnHolder;
import com.alibaba.datax.plugin.writer.oceanbasev10writer.ext.ObClientConnHolder;
import com.alibaba.datax.plugin.writer.oceanbasev10writer.util.ObWriterUtils;
@@ -30,7 +30,7 @@ public class SingleTableWriterTask extends CommonRdbmsWriter.Task {
// 失败重试次数
private int failTryCount = Config.DEFAULT_FAIL_TRY_COUNT;
- private ConnHolder connHolder;
+ private AbstractConnHolder connHolder;
private String obWriteMode = "update";
private boolean isOracleCompatibleMode = false;
private String obUpdateColumns = null;
diff --git a/oceanbasev10writer/src/main/java/com/alibaba/datax/plugin/writer/oceanbasev10writer/util/DbUtils.java b/oceanbasev10writer/src/main/java/com/alibaba/datax/plugin/writer/oceanbasev10writer/util/DbUtils.java
index e590fe6b..adffc6f7 100644
--- a/oceanbasev10writer/src/main/java/com/alibaba/datax/plugin/writer/oceanbasev10writer/util/DbUtils.java
+++ b/oceanbasev10writer/src/main/java/com/alibaba/datax/plugin/writer/oceanbasev10writer/util/DbUtils.java
@@ -3,18 +3,17 @@ package com.alibaba.datax.plugin.writer.oceanbasev10writer.util;
import com.alibaba.datax.common.util.Configuration;
import com.alibaba.datax.plugin.rdbms.util.DBUtil;
import com.alibaba.datax.plugin.rdbms.util.DataBaseType;
-import com.alibaba.datax.plugin.rdbms.writer.CommonRdbmsWriter;
import com.alibaba.datax.plugin.rdbms.writer.Constant;
import com.alibaba.datax.plugin.rdbms.writer.Key;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
+import com.alibaba.datax.plugin.writer.oceanbasev10writer.Config;
import java.sql.Connection;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.util.List;
import java.util.concurrent.TimeUnit;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
public class DbUtils {
@@ -25,7 +24,7 @@ public class DbUtils {
final String password = config.getString(Key.PASSWORD);
String jdbcUrl = config.getString(Key.JDBC_URL);
- if(jdbcUrl == null) {
+ if (jdbcUrl == null) {
List conns = config.getList(Constant.CONN_MARK, Object.class);
Configuration connConf = Configuration.from(conns.get(0).toString());
jdbcUrl = connConf.getString(Key.JDBC_URL);
@@ -34,9 +33,9 @@ public class DbUtils {
Connection conn = null;
PreparedStatement stmt = null;
ResultSet result = null;
- boolean need_retry = false;
String value = null;
int retry = 0;
+ int failTryCount = config.getInt(Config.FAIL_TRY_COUNT, Config.DEFAULT_FAIL_TRY_COUNT);
do {
try {
if (retry > 0) {
@@ -58,14 +57,57 @@ public class DbUtils {
LOG.info("value for query [{}] is [{}]", query, value);
break;
} catch (SQLException e) {
- need_retry = true;
++retry;
LOG.warn("fetch value with {} error {}", query, e);
} finally {
DBUtil.closeDBResources(result, stmt, conn);
}
- } while (need_retry);
+ } while (retry < failTryCount);
return value;
}
+
+ /**
+ * build sys connection from ordinary jdbc url
+ *
+ * @param jdbcUrl
+ * @param clusterName
+ * @return
+ * @throws Exception
+ */
+ public static Connection buildSysConn(String jdbcUrl, String clusterName) throws Exception {
+ jdbcUrl = jdbcUrl.replace("jdbc:mysql://", "jdbc:oceanbase://");
+ int startIdx = jdbcUrl.indexOf('/', "jdbc:oceanbase://".length());
+ int endIdx = jdbcUrl.lastIndexOf('?');
+ String prefix = jdbcUrl.substring(0, startIdx + 1);
+ final String postfix = jdbcUrl.substring(endIdx);
+ String sysJDBCUrl = prefix + "oceanbase" + postfix;
+
+ String tenantName = "sys";
+ String[][] userConfigs = {
+ {"monitor", "monitor"}
+ };
+
+ Connection conn = null;
+ for (String[] userConfig : userConfigs) {
+ try {
+ conn = DBUtil.getConnectionWithoutRetry(DataBaseType.OceanBase, sysJDBCUrl, String.format("%s@%s#%s", userConfig[0],
+ tenantName, clusterName), userConfig[1]);
+ } catch (Exception e) {
+ LOG.warn("fail connecting to ob: " + e.getMessage());
+
+ }
+ if (conn == null) {
+ LOG.warn("fail to get connection with user " + userConfig[0] + ", try alternative user.");
+ } else {
+ break;
+ }
+ }
+
+ if (conn == null) {
+ throw new Exception("fail to get connection with sys tenant.");
+ }
+
+ return conn;
+ }
}
diff --git a/oceanbasev10writer/src/main/java/com/alibaba/datax/plugin/writer/oceanbasev10writer/util/ObWriterUtils.java b/oceanbasev10writer/src/main/java/com/alibaba/datax/plugin/writer/oceanbasev10writer/util/ObWriterUtils.java
index edc4b236..a5d6b0ea 100644
--- a/oceanbasev10writer/src/main/java/com/alibaba/datax/plugin/writer/oceanbasev10writer/util/ObWriterUtils.java
+++ b/oceanbasev10writer/src/main/java/com/alibaba/datax/plugin/writer/oceanbasev10writer/util/ObWriterUtils.java
@@ -1,8 +1,10 @@
package com.alibaba.datax.plugin.writer.oceanbasev10writer.util;
+import com.alibaba.datax.plugin.rdbms.reader.util.ObVersion;
import com.alibaba.datax.plugin.rdbms.util.DBUtil;
import com.alibaba.datax.plugin.rdbms.writer.CommonRdbmsWriter.Task;
import com.alibaba.datax.plugin.writer.oceanbasev10writer.Config;
+import org.apache.commons.lang3.RandomUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.lang3.tuple.ImmutablePair;
import org.apache.commons.lang3.tuple.Pair;
@@ -11,6 +13,7 @@ import org.slf4j.LoggerFactory;
import java.sql.*;
import java.util.*;
+import static com.alibaba.datax.plugin.writer.oceanbasev10writer.Config.DEFAULT_SLOW_MEMSTORE_THRESHOLD;
public class ObWriterUtils {
@@ -18,8 +21,14 @@ public class ObWriterUtils {
private static final String ORACLE_KEYWORDS = "ACCESS,ADD,ALL,ALTER,AND,ANY,ARRAYLEN,AS,ASC,AUDIT,BETWEEN,BY,CHAR,CHECK,CLUSTER,COLUMN,COMMENT,COMPRESS,CONNECT,CREATE,CURRENT,DATE,DECIMAL,DEFAULT,DELETE,DESC,DISTINCT,DROP,ELSE,EXCLUSIVE,EXISTS,FILE,FLOAT,FOR,FROM,GRANT,GROUP,HAVING,IDENTIFIED,IMMEDIATE,IN,INCREMENT,INDEX,INITIAL,INSERT,INTEGER,INTERSECT,INTO,IS,LEVEL,LIKE,LOCK,LONG,MAXEXTENTS,MINUS,MODE,MODIFY,NOAUDIT,NOCOMPRESS,NOT,NOTFOUND,NOWAIT,NULL,NUMBER,OF,OFFLINE,ON,ONLINE,OPTION,OR,ORDER,PCTFREE,PRIOR,PRIVILEGES,PUBLIC,RAW,RENAME,RESOURCE,REVOKE,ROW,ROWID,ROWLABEL,ROWNUM,ROWS,SELECT,SESSION,SET,SHARE,SIZE,SMALLINT,SQLBUF,START,SUCCESSFUL,SYNONYM,TABLE,THEN,TO,TRIGGER,UID,UNION,UNIQUE,UPDATE,USER,VALIDATE,VALUES,VARCHAR,VARCHAR2,VIEW,WHENEVER,WHERE,WITH";
private static String CHECK_MEMSTORE = "select 1 from %s.gv$memstore t where t.total>t.mem_limit * ?";
+ private static final String CHECK_MEMSTORE_4_0 = "select 1 from %s.gv$ob_memstore t where t.MEMSTORE_USED>t.MEMSTORE_LIMIT * ?";
+
+ private static String CHECK_MEMSTORE_RATIO = "select min(t.total/t.mem_limit) from %s.gv$memstore t";
+ private static final String CHECK_MEMSTORE_RATIO_4_0 = "select min(t.MEMSTORE_USED/t.MEMSTORE_LIMIT) from %s.gv$ob_memstore t";
+
private static Set databaseKeywords;
private static String compatibleMode = null;
+ private static String obVersion = null;
protected static final Logger LOG = LoggerFactory.getLogger(Task.class);
private static Set keywordsFromString2HashSet(final String keywords) {
return new HashSet(Arrays.asList(keywords.split(",")));
@@ -61,7 +70,7 @@ public class ObWriterUtils {
if (isOracleMode()) {
sysDbName = "sys";
}
- ps = conn.prepareStatement(String.format(CHECK_MEMSTORE, sysDbName));
+ ps = conn.prepareStatement(String.format(getMemStoreSql(), sysDbName));
ps.setDouble(1, memstoreThreshold);
rs = ps.executeQuery();
// 只要有满足条件的,则表示当前租户 有个机器的memstore即将满
@@ -77,10 +86,50 @@ public class ObWriterUtils {
return result;
}
+ public static double queryMemUsedRatio (Connection conn) {
+ PreparedStatement ps = null;
+ ResultSet rs = null;
+ double result = 0;
+ try {
+ String sysDbName = "oceanbase";
+ if (isOracleMode()) {
+ sysDbName = "sys";
+ }
+ ps = conn.prepareStatement(String.format(getMemStoreRatioSql(), sysDbName));
+ rs = ps.executeQuery();
+ // 只要有满足条件的,则表示当前租户 有个机器的memstore即将满
+ if (rs.next()) {
+ result = rs.getDouble(1);
+ }
+ } catch (Throwable e) {
+ LOG.warn("Check memstore fail, reason: {}. Use a random value instead.", e.getMessage());
+ result = RandomUtils.nextDouble(0.3D, DEFAULT_SLOW_MEMSTORE_THRESHOLD + 0.2D);
+ } finally {
+ //do not need to close the statment in ob1.0
+ }
+ return result;
+ }
+
public static boolean isOracleMode(){
return (compatibleMode.equals(Config.OB_COMPATIBLE_MODE_ORACLE));
}
+ private static String getMemStoreSql() {
+ if (ObVersion.valueOf(obVersion).compareTo(ObVersion.V4000) >= 0) {
+ return CHECK_MEMSTORE_4_0;
+ } else {
+ return CHECK_MEMSTORE;
+ }
+ }
+
+ private static String getMemStoreRatioSql() {
+ if (ObVersion.valueOf(obVersion).compareTo(ObVersion.V4000) >= 0) {
+ return CHECK_MEMSTORE_RATIO_4_0;
+ } else {
+ return CHECK_MEMSTORE_RATIO;
+ }
+ }
+
public static String getCompatibleMode() {
return compatibleMode;
}
@@ -89,6 +138,10 @@ public class ObWriterUtils {
compatibleMode = mode;
}
+ public static void setObVersion(String version) {
+ obVersion = version;
+ }
+
private static String buildDeleteSql (String tableName, List columns) {
StringBuilder builder = new StringBuilder("DELETE FROM ");
builder.append(tableName).append(" WHERE ");
@@ -165,7 +218,7 @@ public class ObWriterUtils {
}
List s = uniqueKeys.get(keyName);
if (s == null) {
- s = new ArrayList();
+ s = new ArrayList<>();
uniqueKeys.put(keyName, s);
}
s.add(columnName);
@@ -237,7 +290,7 @@ public class ObWriterUtils {
String columnName = StringUtils.upperCase(rs.getString("Column_name"));
Set s = uniqueKeys.get(keyName);
if (s == null) {
- s = new HashSet();
+ s = new HashSet<>();
uniqueKeys.put(keyName, s);
}
s.add(columnName);
@@ -399,7 +452,7 @@ public class ObWriterUtils {
private static Set white = new HashSet();
static {
- int[] errList = { 1213, 1047, 1041, 1094, 4000, 4012 };
+ int[] errList = { 1213, 1047, 1041, 1094, 4000, 4012, 4013 };
for (int err : errList) {
white.add(err);
}
@@ -429,4 +482,26 @@ public class ObWriterUtils {
t.setDaemon(true);
t.start();
}
+
+ /**
+ *
+ */
+ public static enum LoadMode {
+
+ /**
+ * Fast insert
+ */
+ FAST,
+
+ /**
+ * Insert slowly
+ */
+ SLOW,
+
+ /**
+ * Pause to insert
+ */
+ PAUSE
+ }
+
}
diff --git a/oceanbasev10writer/src/main/libs/shade-ob-partition-calculator-1.0-SNAPSHOT.jar b/oceanbasev10writer/src/main/libs/shade-ob-partition-calculator-1.0-SNAPSHOT.jar
new file mode 100644
index 00000000..34453ce6
Binary files /dev/null and b/oceanbasev10writer/src/main/libs/shade-ob-partition-calculator-1.0-SNAPSHOT.jar differ
diff --git a/ossreader/doc/ossreader.md b/ossreader/doc/ossreader.md
index e0259a2a..51d757bc 100644
--- a/ossreader/doc/ossreader.md
+++ b/ossreader/doc/ossreader.md
@@ -26,6 +26,8 @@ OSSReader实现了从OSS读取数据并转为DataX协议的功能,OSS本身是
6. 多个object可以支持并发读取。
+7. 支持读取 parquet orc 文件
+
我们暂时不能做到:
1. 单个Object(File)支持多线程并发读取,这里涉及到单个Object内部切分算法。二期考虑支持。
@@ -37,7 +39,7 @@ OSSReader实现了从OSS读取数据并转为DataX协议的功能,OSS本身是
### 3.1 配置样例
-
+读取 txt, csv 格式样例
```json
{
"job": {
@@ -80,6 +82,63 @@ OSSReader实现了从OSS读取数据并转为DataX协议的功能,OSS本身是
}
}
```
+读取 orc 格式样例
+```json
+{
+ "stepType": "oss",
+ "parameter": {
+ "endpoint": "http://oss.aliyuncs.com",
+ "accessId": "",
+ "accessKey": "",
+ "bucket": "myBucket",
+ "fileFormat": "orc",
+ "path": "/tests/case61/orc__691b6815_9260_4037_9899_****",
+ "column": [
+ {
+ "index": 0,
+ "type": "long"
+ },
+ {
+ "index": "1",
+ "type": "string"
+ },
+ {
+ "index": "2",
+ "type": "string"
+ }
+ ]
+ }
+}
+```
+读取 parquet 格式样例
+```json
+{
+ "stepType": "oss",
+ "parameter": {
+ "endpoint": "http://oss.aliyuncs.com",
+ "accessId": "",
+ "accessKey": "",
+ "bucket": "myBucket",
+ "fileFormat": "parquet",
+ "path": "/parquet",
+ "parquetSchema":"message m { optional BINARY registration_dttm (UTF8); optional Int64 id; optional BINARY first_name (UTF8); optional BINARY last_name (UTF8); optional BINARY email (UTF8); optional BINARY gender (UTF8); optional BINARY ip_address (UTF8); optional BINARY cc (UTF8); optional BINARY country (UTF8); optional BINARY birthdate (UTF8); optional DOUBLE salary; optional BINARY title (UTF8); optional BINARY comments (UTF8); }",
+ "column": [
+ {
+ "index": 0,
+ "type": "long"
+ },
+ {
+ "index": "1",
+ "type": "string"
+ },
+ {
+ "index": "2",
+ "type": "string"
+ }
+ ]
+ }
+}
+```
### 3.2 参数说明
diff --git a/osswriter/doc/osswriter.md b/osswriter/doc/osswriter.md
index 1a3d3e47..0c23e698 100644
--- a/osswriter/doc/osswriter.md
+++ b/osswriter/doc/osswriter.md
@@ -18,7 +18,7 @@ OSSWriter提供了向OSS写入类CSV格式的一个或者多个表文件。
OSSWriter实现了从DataX协议转为OSS中的TXT文件功能,OSS本身是无结构化数据存储,OSSWriter需要在如下几个方面增加:
-1. 支持且仅支持写入 TXT的文件,且要求TXT中shema为一张二维表。
+1. 支持写入 TXT的文件,且要求TXT中shema为一张二维表。
2. 支持类CSV格式文件,自定义分隔符。
@@ -28,6 +28,8 @@ OSSWriter实现了从DataX协议转为OSS中的TXT文件功能,OSS本身是无
7. 文件支持滚动,当文件大于某个size值或者行数值,文件需要切换。 [暂不支持]
+8. 支持写 PARQUET、ORC 文件
+
我们不能做到:
1. 单个文件不能支持并发写入。
@@ -37,7 +39,7 @@ OSSWriter实现了从DataX协议转为OSS中的TXT文件功能,OSS本身是无
### 3.1 配置样例
-
+写 txt文件样例
```json
{
"job": {
@@ -65,7 +67,90 @@ OSSWriter实现了从DataX协议转为OSS中的TXT文件功能,OSS本身是无
}
}
```
+写 orc 文件样例
+```json
+{
+ "job": {
+ "setting": {},
+ "content": [
+ {
+ "reader": {},
+ "writer": {
+ "name": "osswriter",
+ "parameter": {
+ "endpoint": "http://oss.aliyuncs.com",
+ "accessId": "",
+ "accessKey": "",
+ "bucket": "myBucket",
+ "fileName": "test",
+ "encoding": "UTF-8",
+ "column": [
+ {
+ "name": "col1",
+ "type": "BIGINT"
+ },
+ {
+ "name": "col2",
+ "type": "DOUBLE"
+ },
+ {
+ "name": "col3",
+ "type": "STRING"
+ }
+ ],
+ "fileFormat": "orc",
+ "path": "/tests/case61",
+ "writeMode": "append"
+ }
+ }
+ }
+ ]
+ }
+}
+```
+写 parquet 文件样例
+```json
+{
+ "job": {
+ "setting": {},
+ "content": [
+ {
+ "reader": {},
+ "writer": {
+ "name": "osswriter",
+ "parameter": {
+ "endpoint": "http://oss.aliyuncs.com",
+ "accessId": "",
+ "accessKey": "",
+ "bucket": "myBucket",
+ "fileName": "test",
+ "encoding": "UTF-8",
+ "column": [
+ {
+ "name": "col1",
+ "type": "BIGINT"
+ },
+ {
+ "name": "col2",
+ "type": "DOUBLE"
+ },
+ {
+ "name": "col3",
+ "type": "STRING"
+ }
+ ],
+ "parquetSchema": "message test { required int64 int64_col;\n required binary str_col (UTF8);\nrequired group params (MAP) {\nrepeated group key_value {\nrequired binary key (UTF8);\nrequired binary value (UTF8);\n}\n}\nrequired group params_arr (LIST) {\n repeated group list {\n required binary element (UTF8);\n }\n}\nrequired group params_struct {\n required int64 id;\n required binary name (UTF8);\n }\nrequired group params_arr_complex (LIST) {\n repeated group list {\n required group element {\n required int64 id;\n required binary name (UTF8);\n}\n }\n}\nrequired group params_complex (MAP) {\nrepeated group key_value {\nrequired binary key (UTF8);\nrequired group value {\n required int64 id;\n required binary name (UTF8);\n }\n}\n}\nrequired group params_struct_complex {\n required int64 id;\n required group detail {\n required int64 id;\n required binary name (UTF8);\n }\n }\n}",
+ "fileFormat": "parquet",
+ "path": "/tests/case61",
+ "writeMode": "append"
+ }
+ }
+ }
+ ]
+ }
+}
+```
### 3.2 参数说明
* **endpoint**
diff --git a/otsreader/doc/otsreader.md b/otsreader/doc/otsreader.md
index 1297dbd6..77b4edfe 100644
--- a/otsreader/doc/otsreader.md
+++ b/otsreader/doc/otsreader.md
@@ -13,7 +13,7 @@ OTSReader插件实现了从OTS读取数据,并可以通过用户指定抽取
* 范围抽取
* 指定分片抽取
-OTS是构建在阿里云飞天分布式系统之上的 NoSQL数据库服务,提供海量结构化数据的存储和实时访问。OTS 以实例和表的形式组织数据,通过数据分片和负载均衡技术,实现规模上的无缝扩展。
+本版本的OTSReader新增了支持多版本数据的读取功能,同时兼容旧版本的配置文件
## 2 实现原理
@@ -25,201 +25,425 @@ OTSReader会根据OTS的表范围,按照Datax并发的数目N,将范围等
### 3.1 配置样例
-* 配置一个从OTS全表同步抽取数据到本地的作业:
+#### 3.1.1
+* 配置一个从OTS表读取单版本数据的reader:
```
{
- "job": {
- "setting": {
- },
- "content": [
- {
- "reader": {
- "name": "otsreader",
- "parameter": {
- /* ----------- 必填 --------------*/
- "endpoint":"",
- "accessId":"",
- "accessKey":"",
- "instanceName":"",
-
- // 导出数据表的表名
- "table":"",
-
- // 需要导出的列名,支持重复列和常量列,区分大小写
- // 常量列:类型支持STRING,INT,DOUBLE,BOOL和BINARY
- // 备注:BINARY需要通过Base64转换为对应的字符串传入插件
- "column":[
- {"name":"col1"}, // 普通列
- {"name":"col2"}, // 普通列
- {"name":"col3"}, // 普通列
- {"type":"STRING", "value" : "bazhen"}, // 常量列(字符串)
- {"type":"INT", "value" : ""}, // 常量列(整形)
- {"type":"DOUBLE", "value" : ""}, // 常量列(浮点)
- {"type":"BOOL", "value" : ""}, // 常量列(布尔)
- {"type":"BINARY", "value" : "Base64(bin)"} // 常量列(二进制),使用Base64编码完成
- ],
- "range":{
- // 导出数据的起始范围
- // 支持INF_MIN, INF_MAX, STRING, INT
- "begin":[
- {"type":"INF_MIN"},
- ],
- // 导出数据的结束范围
- // 支持INF_MIN, INF_MAX, STRING, INT
- "end":[
- {"type":"INF_MAX"},
- ]
- }
- }
- },
- "writer": {}
- }
- ]
- }
-}
-```
-
-* 配置一个定义抽取范围的OTSReader:
-
-```
-{
- "job": {
- "setting": {
- "speed": {
- "byte":10485760
+ "job": {
+ "setting": {
+ "speed": {
+ //设置传输速度,单位为byte/s,DataX运行会尽可能达到该速度但是不超过它.
+ "byte": 1048576
+ }
+ //出错限制
+ "errorLimit": {
+ //出错的record条数上限,当大于该值即报错。
+ "record": 0,
+ //出错的record百分比上限 1.0表示100%,0.02表示2%
+ "percentage": 0.02
+ }
+ },
+ "content": [
+ {
+ "reader": {
+ "name": "otsreader-internal",
+ "parameter": {
+ "endpoint":"",
+ "accessId":"",
+ "accessKey":"",
+ "instanceName":"",
+ "table": "",
+ //version定义了是否使用新版本插件 可选值:false || true
+ "newVersion":"false",
+ //mode定义了读取数据的格式(普通数据/多版本数据),可选值:normal || multiversion
+ "mode": "normal",
+
+ // 导出的范围,读取的范围是[begin,end),左闭右开的区间
+ // begin小于end,表示正序读取数据
+ // begin大于end,表示反序读取数据
+ // begin和end不能相等
+ // type支持的类型有如下几类:
+ // string、int、binary
+ // binary输入的方式采用二进制的Base64字符串形式传入
+ // INF_MIN 表示无限小
+ // INF_MAX 表示无限大
+ "range":{
+ // 可选,默认表示从无限小开始读取
+ // 这个值的输入可以填写空数组,或者PK前缀,亦或者完整的PK,在正序读取数据时,默认填充PK后缀为INF_MIN,反序为INF_MAX
+ // 例子:
+ // 如果用户的表有2个PK,类型分别为string、int,那么如下3种输入都是合法,如:
+ // 1. [] --> 表示从表的开始位置读取
+ // 2. [{"type":"string", "value":"a"}] --> 表示从[{"type":"string", "value":"a"},{"type":"INF_MIN"}]
+ // 3. [{"type":"string", "value":"a"},{"type":"INF_MIN"}]
+ //
+ // binary类型的PK列比较特殊,因为Json不支持直接输入二进制数,所以系统定义:用户如果要传入
+ // 二进制,必须使用(Java)Base64.encodeBase64String方法,将二进制转换为一个可视化的字符串,然后将这个字符串填入value中
+ // 例子(Java):
+ // byte[] bytes = "hello".getBytes(); # 构造一个二进制数据,这里使用字符串hello的byte值
+ // String inputValue = Base64.encodeBase64String(bytes) # 调用Base64方法,将二进制转换为可视化的字符串
+ // 上面的代码执行之后,可以获得inputValue为"aGVsbG8="
+ // 最终写入配置:{"type":"binary","value" : "aGVsbG8="}
+
+ "begin":[{"type":"string", "value":"a"},{"type":"INF_MIN"}],
+
+ // 默认表示读取到无限大结束
+ // 这个值得输入可以填写空数组,或者PK前缀,亦或者完整的PK,在正序读取数据时,默认填充PK后缀为INF_MAX,反序为INF_MIN
+ // 可选
+ "end":[{"type":"string", "value":"a"},{"type":"INF_MAX"}],
+
+ // 当前用户数据较多时,需要开启并发导出,Split可以将当前范围的的数据按照切分点切分为多个并发任务
+ // 可选
+ // 1. split中的输入值只能PK的第一列(分片建),且值的类型必须和PartitionKey一致
+ // 2. 值的范围必须在begin和end之间
+ // 3. split内部的值必须根据begin和end的正反序关系而递增或者递减
+ "split":[{"type":"string", "value":"b"}, {"type":"string", "value":"c"}]
},
- "errorLimit":0.0
+
+
+ // 指定要导出的列,支持普通列和常量列
+ // 格式
+ // 普通列格式:{"name":"{your column name}"}
+ // 常量列格式:{"type":"", "value":""} , type支持string、int、binary、bool、double
+ // binary类型需要使用base64转换成对应的字符串传入
+ // 注意:
+ // 1. PK列也是需要用户在下面单独指定
+ "column": [
+ {"name":"pk1"}, // 普通列,下同
+ {"name":"pk2"},
+ {"name":"attr1"},
+ {"type":"string","value" : ""} // 指定常量列,下同
+ {"type":"int","value" : ""}
+ {"type":"double","value" : ""}
+ // binary类型的常量列比较特殊,因为Json不支持直接输入二进制数,所以系统定义:用户如果要传入
+ // 二进制,必须使用(Java)Base64.encodeBase64String方法,将二进制转换为一个可视化的字符串,然后将这个字符串填入value中
+ // 例子(Java):
+ // byte[] bytes = "hello".getBytes(); # 构造一个二进制数据,这里使用字符串hello的byte值
+ // String inputValue = Base64.encodeBase64String(bytes) # 调用Base64方法,将二进制转换为可视化的字符串
+ // 上面的代码执行之后,可以获得inputValue为"aGVsbG8="
+ // 最终写入配置:{"type":"binary","value" : "aGVsbG8="}
+
+ {"type":"binary","value" : "aGVsbG8="}
+ ],
+ }
},
- "content": [
- {
- "reader": {
- "name": "otsreader",
- "parameter": {
- "endpoint":"",
- "accessId":"",
- "accessKey":"",
- "instanceName":"",
-
- // 导出数据表的表名
- "table":"",
-
- // 需要导出的列名,支持重复类和常量列,区分大小写
- // 常量列:类型支持STRING,INT,DOUBLE,BOOL和BINARY
- // 备注:BINARY需要通过Base64转换为对应的字符串传入插件
- "column":[
- {"name":"col1"}, // 普通列
- {"name":"col2"}, // 普通列
- {"name":"col3"}, // 普通列
- {"type":"STRING","value" : ""}, // 常量列(字符串)
- {"type":"INT","value" : ""}, // 常量列(整形)
- {"type":"DOUBLE","value" : ""}, // 常量列(浮点)
- {"type":"BOOL","value" : ""}, // 常量列(布尔)
- {"type":"BINARY","value" : "Base64(bin)"} // 常量列(二进制)
- ],
- "range":{
- // 导出数据的起始范围
- // 支持INF_MIN, INF_MAX, STRING, INT
- "begin":[
- {"type":"INF_MIN"},
- {"type":"INF_MAX"},
- {"type":"STRING", "value":"hello"},
- {"type":"INT", "value":"2999"},
- ],
- // 导出数据的结束范围
- // 支持INF_MIN, INF_MAX, STRING, INT
- "end":[
- {"type":"INF_MAX"},
- {"type":"INF_MIN"},
- {"type":"STRING", "value":"hello"},
- {"type":"INT", "value":"2999"},
- ]
- }
- }
- },
- "writer": {}
- }
- ]
- }
+ "writer": {
+ //writer类型
+ "name": "streamwriter",
+ //是否打印内容
+ "parameter": {
+ "print": true
+ }
+ }
+ }
+ ]
+ }
}
```
+#### 3.1.2
+* 配置一个从OTS表读取多版本数据的reader(仅在newVersion == true时支持):
+
+```
+{
+ "job": {
+ "setting": {
+ "speed": {
+ //设置传输速度,单位为byte/s,DataX运行会尽可能达到该速度但是不超过它.
+ "byte": 1048576
+ }
+ //出错限制
+ "errorLimit": {
+ //出错的record条数上限,当大于该值即报错。
+ "record": 0,
+ //出错的record百分比上限 1.0表示100%,0.02表示2%
+ "percentage": 0.02
+ }
+ },
+ "content": [
+ {
+ "reader": {
+ "name": "otsreader-internal",
+ "parameter": {
+ "endpoint":"",
+ "accessId":"",
+ "accessKey":"",
+ "instanceName":"",
+ "table": "",
+ //version定义了是否使用新版本插件 可选值:false || true
+ "newVersion":"true",
+ //mode定义了读取数据的格式(普通数据/多版本数据),可选值:normal || multiversion
+ "mode": "multiversion",
+
+ // 导出的范围,,读取的范围是[begin,end),左闭右开的区间
+ // begin小于end,表示正序读取数据
+ // begin大于end,表示反序读取数据
+ // begin和end不能相等
+ // type支持的类型有如下几类:
+ // string、int、binary
+ // binary输入的方式采用二进制的Base64字符串形式传入
+ // INF_MIN 表示无限小
+ // INF_MAX 表示无限大
+ "range":{
+ // 可选,默认表示从无限小开始读取
+ // 这个值的输入可以填写空数组,或者PK前缀,亦或者完整的PK,在正序读取数据时,默认填充PK后缀为INF_MIN,反序为INF_MAX
+ // 例子:
+ // 如果用户的表有2个PK,类型分别为string、int,那么如下3种输入都是合法,如:
+ // 1. [] --> 表示从表的开始位置读取
+ // 2. [{"type":"string", "value":"a"}] --> 表示从[{"type":"string", "value":"a"},{"type":"INF_MIN"}]
+ // 3. [{"type":"string", "value":"a"},{"type":"INF_MIN"}]
+ //
+ // binary类型的PK列比较特殊,因为Json不支持直接输入二进制数,所以系统定义:用户如果要传入
+ // 二进制,必须使用(Java)Base64.encodeBase64String方法,将二进制转换为一个可视化的字符串,然后将这个字符串填入value中
+ // 例子(Java):
+ // byte[] bytes = "hello".getBytes(); # 构造一个二进制数据,这里使用字符串hello的byte值
+ // String inputValue = Base64.encodeBase64String(bytes) # 调用Base64方法,将二进制转换为可视化的字符串
+ // 上面的代码执行之后,可以获得inputValue为"aGVsbG8="
+ // 最终写入配置:{"type":"binary","value" : "aGVsbG8="}
+
+ "begin":[{"type":"string", "value":"a"},{"type":"INF_MIN"}],
+
+ // 默认表示读取到无限大结束
+ // 这个值得输入可以填写空数组,或者PK前缀,亦或者完整的PK,在正序读取数据时,默认填充PK后缀为INF_MAX,反序为INF_MIN
+ // 可选
+ "end":[{"type":"string", "value":"g"},{"type":"INF_MAX"}],
+
+ // 当前用户数据较多时,需要开启并发导出,Split可以将当前范围的的数据按照切分点切分为多个并发任务
+ // 可选
+ // 1. split中的输入值只能PK的第一列(分片建),且值的类型必须和PartitionKey一致
+ // 2. 值的范围必须在begin和end之间
+ // 3. split内部的值必须根据begin和end的正反序关系而递增或者递减
+ "split":[{"type":"string", "value":"b"}, {"type":"string", "value":"c"}]
+ },
+
+ // 指定要导出的列,在多版本模式下只支持普通列
+ // 格式:
+ // 普通列格式:{"name":"{your column name}"}
+ // 可选,默认导出所有列的所有版本
+ // 注意:
+ // 1.在多版本模式下,不支持常量列
+ // 2.PK列不能指定,导出4元组中默认包括完整的PK
+ // 3.不能重复指定列
+ "column": [
+ {"name":"attr1"}
+ ],
+
+ // 请求数据的Time Range,读取的范围是[begin,end),左闭右开的区间
+ // 可选,默认读取全部版本
+ // 注意:begin必须小于end
+ "timeRange":{
+ // 可选,默认为0
+ // 取值范围是0~LONG_MAX
+ "begin":1400000000,
+ // 可选,默认为Long Max(9223372036854775807L)
+ // 取值范围是0~LONG_MAX
+ "end" :1600000000
+ },
+
+ // 请求的指定Version
+ // 可选,默认读取所有版本
+ // 取值范围是1~INT32_MAX
+ "maxVersion":10,
+ }
+ },
+ "writer": {
+ //writer类型
+ "name": "streamwriter",
+ //是否打印内容
+ "parameter": {
+ "print": true
+ }
+ }
+ }
+ ]
+ }
+}
+```
+#### 3.1.3
+* 配置一个从OTS **时序表**读取数据的reader(仅在newVersion == true时支持):
+```json
+{
+ "job": {
+ "setting": {
+ "speed": {
+ // 读取时序数据的通道数
+ "channel": 5
+ }
+ },
+ "content": [
+ {
+ "reader": {
+ "name": "otsreader",
+ "parameter": {
+ "endpoint": "",
+ "accessId": "",
+ "accessKey": "",
+ "instanceName": "",
+ "table": "",
+ // 读时序数据mode必须为normal
+ "mode": "normal",
+ // 读时序数据newVersion必须为true
+ "newVersion": "true",
+ // 配置该表为时序表
+ "isTimeseriesTable":"true",
+ // 配置需要读取时间线的measurementName字段,非必需
+ // 为空则读取全表数据
+ "measurementName":"measurement_5",
+ // column是一个数组,每个元素表示一列
+ // 对于常量列,需要配置以下字段:
+ // 1. type : 字段值类型,必需
+ // 支持类型 : string, int, double, bool, binary
+ // 2. value : 字段值,必需
+ //
+ // 对于普通列,需要配置以下字段:
+ // 1. name : 列名,必需
+ // 时间线的'度量名称'使用_m_name标识,数据类型为String
+ // 时间线的'数据源'使用_data_source标识,数据类型为String
+ // 时间线的'标签'使用_tags标识,数据类型为String
+ // 时间线的'时间戳'使用_time标识,数据类型为Long
+ // 2. is_timeseries_tag : 是否为tags字段内部的键值,非必需,默认为false。
+ // 3. type : 字段值类型,非必需,默认为string。
+ // 支持类型 : string, int, double, bool, binary
+ "column": [
+ {
+ "name": "_m_name"
+ },
+ {
+ "name": "tagA",
+ "is_timeseries_tag":"true"
+ },
+ {
+ "name": "double_0",
+ "type":"DOUBLE"
+ },
+ {
+ "name": "string_0",
+ "type":"STRING"
+ },
+ {
+ "name": "long_0",
+ "type":"int"
+ },
+ {
+ "name": "binary_0",
+ "type":"BINARY"
+ },
+ {
+ "name": "bool_0",
+ "type":"BOOL"
+ },
+ {
+ "type":"STRING",
+ "value":"testString"
+ }
+ ]
+ }
+ },
+ "writer": {
+
+ }
+ }
+ ]
+ }
+}
+
+```
### 3.2 参数说明
* **endpoint**
- * 描述:OTS Server的EndPoint地址,例如http://bazhen.cn−hangzhou.ots.aliyuncs.com。
+ * 描述:OTS Server的EndPoint地址,例如http://bazhen.cn−hangzhou.ots.aliyuncs.com。
- * 必选:是
+ * 必选:是
- * 默认值:无
+ * 默认值:无
* **accessId**
- * 描述:OTS的accessId
+ * 描述:OTS的accessId
- * 必选:是
+ * 必选:是
- * 默认值:无
+ * 默认值:无
* **accessKey**
- * 描述:OTS的accessKey
+ * 描述:OTS的accessKey
- * 必选:是
+ * 必选:是
- * 默认值:无
+ * 默认值:无
* **instanceName**
- * 描述:OTS的实例名称,实例是用户使用和管理 OTS 服务的实体,用户在开通 OTS 服务之后,需要通过管理控制台来创建实例,然后在实例内进行表的创建和管理。实例是 OTS 资源管理的基础单元,OTS 对应用程序的访问控制和资源计量都在实例级别完成。
+ * 描述:OTS的实例名称,实例是用户使用和管理 OTS 服务的实体,用户在开通 OTS 服务之后,需要通过管理控制台来创建实例,然后在实例内进行表的创建和管理。实例是 OTS 资源管理的基础单元,OTS 对应用程序的访问控制和资源计量都在实例级别完成。
- * 必选:是
+ * 必选:是
- * 默认值:无
+ * 默认值:无
* **table**
- * 描述:所选取的需要抽取的表名称,这里有且只能填写一张表。在OTS不存在多表同步的需求。
+ * 描述:所选取的需要抽取的表名称,这里有且只能填写一张表。在OTS不存在多表同步的需求。
- * 必选:是
+ * 必选:是
- * 默认值:无
+ * 默认值:无
+
+* **newVersion**
+
+ * 描述:version定义了使用的ots SDK版本。
+ * true,新版本插件,使用com.alicloud.openservices.tablestore的依赖(推荐)
+ * false,旧版本插件,使用com.aliyun.openservices.ots的依赖,**不支持多版本数据的读取**
+
+ * 必选:否
+
+ * 默认值:false
+
+* **mode**
+
+ * 描述:读取为多版本格式的数据,目前有两种模式。
+ * normal,对应普通的数据
+ * multiVersion,写入数据为多版本格式的数据,多版本模式下,配置参数有所不同,详见3.1.2
+
+ * 必选:否
+
+ * 默认值:normal
* **column**
- * 描述:所配置的表中需要同步的列名集合,使用JSON的数组描述字段信息。由于OTS本身是NoSQL系统,在OTSReader抽取数据过程中,必须指定相应地字段名称。
+ * 描述:所配置的表中需要同步的列名集合,使用JSON的数组描述字段信息。由于OTS本身是NoSQL系统,在OTSReader抽取数据过程中,必须指定相应地字段名称。
- 支持普通的列读取,例如: {"name":"col1"}
+ 支持普通的列读取,例如: {"name":"col1"}
- 支持部分列读取,如用户不配置该列,则OTSReader不予读取。
+ 支持部分列读取,如用户不配置该列,则OTSReader不予读取。
- 支持常量列读取,例如: {"type":"STRING", "value" : "DataX"}。使用type描述常量类型,目前支持STRING、INT、DOUBLE、BOOL、BINARY(用户使用Base64编码填写)、INF_MIN(OTS的系统限定最小值,使用该值用户不能填写value属性,否则报错)、INF_MAX(OTS的系统限定最大值,使用该值用户不能填写value属性,否则报错)。
+ 支持常量列读取,例如: {"type":"STRING", "value" : "DataX"}。使用type描述常量类型,目前支持STRING、INT、DOUBLE、BOOL、BINARY(用户使用Base64编码填写)、INF_MIN(OTS的系统限定最小值,使用该值用户不能填写value属性,否则报错)、INF_MAX(OTS的系统限定最大值,使用该值用户不能填写value属性,否则报错)。
- 不支持函数或者自定义表达式,由于OTS本身不提供类似SQL的函数或者表达式功能,OTSReader也不能提供函数或表达式列功能。
+ 不支持函数或者自定义表达式,由于OTS本身不提供类似SQL的函数或者表达式功能,OTSReader也不能提供函数或表达式列功能。
- * 必选:是
+ * 必选:是
- * 默认值:无
+ * 默认值:无
* **begin/end**
- * 描述:该配置项必须配对使用,用于支持OTS表范围抽取。begin/end中描述的是OTS **PrimaryKey**的区间分布状态,而且必须保证区间覆盖到所有的PrimaryKey,**需要指定该表下所有的PrimaryKey范围,不能遗漏任意一个PrimaryKey**,对于无限大小的区间,可以使用{"type":"INF_MIN"},{"type":"INF_MAX"}指代。例如对一张主键为 [DeviceID, SellerID]的OTS进行抽取任务,begin/end可以配置为:
+ * 描述:该配置项必须配对使用,用于支持OTS表范围抽取。begin/end中描述的是OTS **PrimaryKey**的区间分布状态,而且必须保证区间覆盖到所有的PrimaryKey,**需要指定该表下所有的PrimaryKey范围,不能遗漏任意一个PrimaryKey**,对于无限大小的区间,可以使用{"type":"INF_MIN"},{"type":"INF_MAX"}指代。例如对一张主键为 [DeviceID, SellerID]的OTS进行抽取任务,begin/end可以配置为:
- ```json
- "range": {
- "begin": {
- {"type":"INF_MIN"}, //指定deviceID最小值
- {"type":"INT", "value":"0"} //指定deviceID最小值
- },
- "end": {
- {"type":"INF_MAX"}, //指定deviceID抽取最大值
- {"type":"INT", "value":"9999"} //指定deviceID抽取最大值
- }
- }
- ```
+ ```json
+ "range": {
+ "begin": {
+ {"type":"INF_MIN"}, //指定deviceID最小值
+ {"type":"INT", "value":"0"} //指定deviceID最小值
+ },
+ "end": {
+ {"type":"INF_MAX"}, //指定deviceID抽取最大值
+ {"type":"INT", "value":"9999"} //指定deviceID抽取最大值
+ }
+ }
+ ```
如果要对上述表抽取全表,可以使用如下配置:
@@ -237,42 +461,42 @@ OTSReader会根据OTS的表范围,按照Datax并发的数目N,将范围等
}
```
- * 必选:是
+ * 必选:否
- * 默认值:空
+ * 默认值:读取全部值
* **split**
- * 描述:该配置项属于高级配置项,是用户自己定义切分配置信息,普通情况下不建议用户使用。适用场景通常在OTS数据存储发生热点,使用OTSReader自动切分的策略不能生效情况下,使用用户自定义的切分规则。split指定是的在Begin、End区间内的切分点,且只能是partitionKey的切分点信息,即在split仅配置partitionKey,而不需要指定全部的PrimaryKey。
+ * 描述:该配置项属于高级配置项,是用户自己定义切分配置信息,普通情况下不建议用户使用。适用场景通常在OTS数据存储发生热点,使用OTSReader自动切分的策略不能生效情况下,使用用户自定义的切分规则。split指定是的在Begin、End区间内的切分点,且只能是partitionKey的切分点信息,即在split仅配置partitionKey,而不需要指定全部的PrimaryKey。
- 例如对一张主键为 [DeviceID, SellerID]的OTS进行抽取任务,可以配置为:
+ 例如对一张主键为 [DeviceID, SellerID]的OTS进行抽取任务,可以配置为:
- ```json
- "range": {
- "begin": {
- {"type":"INF_MIN"}, //指定deviceID最小值
- {"type":"INF_MIN"} //指定deviceID最小值
- },
- "end": {
- {"type":"INF_MAX"}, //指定deviceID抽取最大值
- {"type":"INF_MAX"} //指定deviceID抽取最大值
- },
- // 用户指定的切分点,如果指定了切分点,Job将按照begin、end和split进行Task的切分,
- // 切分的列只能是Partition Key(ParimaryKey的第一列)
- // 支持INF_MIN, INF_MAX, STRING, INT
- "split":[
- {"type":"STRING", "value":"1"},
- {"type":"STRING", "value":"2"},
- {"type":"STRING", "value":"3"},
- {"type":"STRING", "value":"4"},
- {"type":"STRING", "value":"5"}
- ]
- }
- ```
+ ```json
+ "range": {
+ "begin": {
+ {"type":"INF_MIN"}, //指定deviceID最小值
+ {"type":"INF_MIN"} //指定deviceID最小值
+ },
+ "end": {
+ {"type":"INF_MAX"}, //指定deviceID抽取最大值
+ {"type":"INF_MAX"} //指定deviceID抽取最大值
+ },
+ // 用户指定的切分点,如果指定了切分点,Job将按照begin、end和split进行Task的切分,
+ // 切分的列只能是Partition Key(ParimaryKey的第一列)
+ // 支持INF_MIN, INF_MAX, STRING, INT
+ "split":[
+ {"type":"STRING", "value":"1"},
+ {"type":"STRING", "value":"2"},
+ {"type":"STRING", "value":"3"},
+ {"type":"STRING", "value":"4"},
+ {"type":"STRING", "value":"5"}
+ ]
+ }
+ ```
- * 必选:否
+ * 必选:否
- * 默认值:无
+ * 默认值:无
### 3.3 类型转换
@@ -291,44 +515,14 @@ OTSReader会根据OTS的表范围,按照Datax并发的数目N,将范围等
* 注意,OTS本身不支持日期型类型。应用层一般使用Long报错时间的Unix TimeStamp。
-## 4 性能报告
-### 4.1 环境准备
+## 4 约束限制
-#### 4.1.1 数据特征
-
-15列String(10 Byte), 2两列Integer(8 Byte),总计168Byte/r。
-
-#### 4.1.2 机器参数
-
-OTS端:3台前端机,5台后端机
-
-DataX运行端: 24核CPU, 98GB内存
-
-#### 4.1.3 DataX jvm 参数
-
- -Xms1024m -Xmx1024m -XX:+HeapDumpOnOutOfMemoryError
-
-### 4.2 测试报告
-
-#### 4.2.1 测试报告
-
-|并发数|DataX CPU|OTS 流量|DATAX流量 | 前端QPS| 前端延时|
-|--------|--------| --------|--------|--------|------|
-|2| 36% |6.3M/s |12739 rec/s | 4.7 | 308ms |
-|11| 155% | 32M/s |60732 rec/s | 23.9 | 412ms |
-|50| 377% | 73M/s |145139 rec/s | 54 | 874ms |
-|100| 448% | 82M/s | 156262 rec/s |60 | 1570ms |
-
-
-
-## 5 约束限制
-
-### 5.1 一致性约束
+### 4.1 一致性约束
OTS是类BigTable的存储系统,OTS本身能够保证单行写事务性,无法提供跨行级别的事务。对于OTSReader而言也无法提供全表的一致性视图。例如对于OTSReader在0点启动的数据同步任务,在整个表数据同步过程中,OTSReader同样会抽取到后续更新的数据,无法提供准确的0点时刻该表一致性视图。
-### 5.2 增量数据同步
+### 4.2 增量数据同步
OTS本质上KV存储,目前只能针对PK进行范围查询,暂不支持按照字段范围抽取数据。因此只能对于增量查询,如果PK能够表示范围信息,例如自增ID,或者时间戳。
@@ -336,5 +530,4 @@ OTS本质上KV存储,目前只能针对PK进行范围查询,暂不支持按
时间戳, OTSReader可以通过PK过滤时间戳,通过制定Range范围进行增量抽取。这样使用的前提是OTS中的PrimaryKey必须包含主键时间列(时间主键需要使用OTS应用方生成。)
-## 6 FAQ
-
+## 5 FAQ
diff --git a/otsreader/pom.xml b/otsreader/pom.xml
index eaac8804..dad538bf 100644
--- a/otsreader/pom.xml
+++ b/otsreader/pom.xml
@@ -1,5 +1,5 @@
+ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
4.0.0
com.alibaba.datax
@@ -10,17 +10,6 @@
otsreader
-
- org.apache.logging.log4j
- log4j-api
- 2.17.1
-
-
-
- org.apache.logging.log4j
- log4j-core
- 2.17.1
-
com.alibaba.datax
datax-common
@@ -47,22 +36,43 @@
2.2.4
- log4j-api
+ log4j-core
org.apache.logging.log4j
+
+
+
+ com.aliyun.openservices
+ tablestore
+ 5.13.13
+
log4j-core
org.apache.logging.log4j
-
+
com.google.code.gson
gson
2.2.4
+
+ com.alibaba
+ fastjson
+ 1.2.83_noneautotype
+ compile
+
+
+
+ src/main/java
+
+ **/*.properties
+
+
+
@@ -98,10 +108,6 @@
maven-surefire-plugin
2.5
- all
- 10
- true
- -Xmx1024m -XX:+HeapDumpOnOutOfMemoryError -XX:HeapDumpPath=.
**/unittest/*.java
**/functiontest/*.java
@@ -111,4 +117,3 @@
-
diff --git a/otsreader/src/main/assembly/package.xml b/otsreader/src/main/assembly/package.xml
index 7ee305d1..cb90f3e8 100644
--- a/otsreader/src/main/assembly/package.xml
+++ b/otsreader/src/main/assembly/package.xml
@@ -12,8 +12,8 @@
src/main/resources
plugin.json
- plugin_job_template.json
-
+ plugin_job_template.json
+
plugin/reader/otsreader
diff --git a/otsreader/src/main/java/com/alibaba/datax/plugin/reader/otsreader/IOtsReaderMasterProxy.java b/otsreader/src/main/java/com/alibaba/datax/plugin/reader/otsreader/IOtsReaderMasterProxy.java
new file mode 100644
index 00000000..ee622e16
--- /dev/null
+++ b/otsreader/src/main/java/com/alibaba/datax/plugin/reader/otsreader/IOtsReaderMasterProxy.java
@@ -0,0 +1,15 @@
+package com.alibaba.datax.plugin.reader.otsreader;
+
+import java.util.List;
+
+import com.alibaba.datax.common.util.Configuration;
+
+public interface IOtsReaderMasterProxy {
+
+ public void init(Configuration param) throws Exception;
+
+ public List split(int num) throws Exception;
+
+ public void close();
+
+}
diff --git a/otsreader/src/main/java/com/alibaba/datax/plugin/reader/otsreader/IOtsReaderSlaveProxy.java b/otsreader/src/main/java/com/alibaba/datax/plugin/reader/otsreader/IOtsReaderSlaveProxy.java
new file mode 100644
index 00000000..d1100a2a
--- /dev/null
+++ b/otsreader/src/main/java/com/alibaba/datax/plugin/reader/otsreader/IOtsReaderSlaveProxy.java
@@ -0,0 +1,26 @@
+package com.alibaba.datax.plugin.reader.otsreader;
+
+import com.alibaba.datax.common.plugin.RecordSender;
+import com.alibaba.datax.common.util.Configuration;
+
+/**
+ * OTS Reader工作进程接口
+ */
+public interface IOtsReaderSlaveProxy {
+ /**
+ * 初始化函数,解析配置、初始化相关资源
+ */
+ public void init(Configuration configuration);
+
+ /**
+ * 关闭函数,释放资源
+ */
+ public void close();
+
+ /**
+ * 数据导出函数
+ * @param recordSender
+ * @throws Exception
+ */
+ public void startRead(RecordSender recordSender) throws Exception;
+}
diff --git a/otsreader/src/main/java/com/alibaba/datax/plugin/reader/otsreader/OtsReader.java b/otsreader/src/main/java/com/alibaba/datax/plugin/reader/otsreader/OtsReader.java
index 8880c07e..c6bc44b8 100644
--- a/otsreader/src/main/java/com/alibaba/datax/plugin/reader/otsreader/OtsReader.java
+++ b/otsreader/src/main/java/com/alibaba/datax/plugin/reader/otsreader/OtsReader.java
@@ -1,45 +1,48 @@
package com.alibaba.datax.plugin.reader.otsreader;
-import java.util.List;
-
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
import com.alibaba.datax.common.exception.DataXException;
import com.alibaba.datax.common.plugin.RecordSender;
import com.alibaba.datax.common.spi.Reader;
import com.alibaba.datax.common.util.Configuration;
-import com.alibaba.datax.plugin.reader.otsreader.utils.Common;
+import com.alibaba.datax.plugin.reader.otsreader.model.OTSConf;
+import com.alibaba.datax.plugin.reader.otsreader.model.OTSMode;
+import com.alibaba.datax.plugin.reader.otsreader.utils.Constant;
+import com.alibaba.datax.plugin.reader.otsreader.utils.GsonParser;
+import com.alibaba.datax.plugin.reader.otsreader.utils.OtsReaderError;
+import com.alicloud.openservices.tablestore.TableStoreException;
import com.aliyun.openservices.ots.ClientException;
-import com.aliyun.openservices.ots.OTSException;
+import org.apache.commons.lang3.StringUtils;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.util.List;
+
public class OtsReader extends Reader {
public static class Job extends Reader.Job {
private static final Logger LOG = LoggerFactory.getLogger(Job.class);
- private OtsReaderMasterProxy proxy = new OtsReaderMasterProxy();
+ //private static final MessageSource MESSAGE_SOURCE = MessageSource.loadResourceBundle(OtsReader.class);
+ private IOtsReaderMasterProxy proxy = null;
+
@Override
- public void init() {
+ public void init() {
LOG.info("init() begin ...");
+
+ proxy = new OtsReaderMasterProxy();
try {
this.proxy.init(getPluginJobConf());
- } catch (OTSException e) {
- LOG.error("OTSException. ErrorCode:{}, ErrorMsg:{}, RequestId:{}",
- new Object[]{e.getErrorCode(), e.getMessage(), e.getRequestId()});
- LOG.error("Stack", e);
- throw DataXException.asDataXException(new OtsReaderError(e.getErrorCode(), "OTS端的错误"), Common.getDetailMessage(e), e);
+ } catch (TableStoreException e) {
+ LOG.error("OTSException: {}", e.toString(), e);
+ throw DataXException.asDataXException(new OtsReaderError(e.getErrorCode(), "OTS ERROR"), e.toString(), e);
} catch (ClientException e) {
- LOG.error("ClientException. ErrorCode:{}, ErrorMsg:{}",
- new Object[]{e.getErrorCode(), e.getMessage()});
- LOG.error("Stack", e);
- throw DataXException.asDataXException(new OtsReaderError(e.getErrorCode(), "OTS端的错误"), Common.getDetailMessage(e), e);
- } catch (IllegalArgumentException e) {
- LOG.error("IllegalArgumentException. ErrorMsg:{}", e.getMessage(), e);
- throw DataXException.asDataXException(OtsReaderError.INVALID_PARAM, Common.getDetailMessage(e), e);
+ LOG.error("ClientException: {}", e.toString(), e);
+ throw DataXException.asDataXException(OtsReaderError.ERROR, e.toString(), e);
} catch (Exception e) {
- LOG.error("Exception. ErrorMsg:{}", e.getMessage(), e);
- throw DataXException.asDataXException(OtsReaderError.ERROR, Common.getDetailMessage(e), e);
+ LOG.error("Exception. ErrorMsg:{}", e.toString(), e);
+ throw DataXException.asDataXException(OtsReaderError.ERROR, e.toString(), e);
}
+
LOG.info("init() end ...");
}
@@ -60,22 +63,9 @@ public class OtsReader extends Reader {
try {
confs = this.proxy.split(adviceNumber);
- } catch (OTSException e) {
- LOG.error("OTSException. ErrorCode:{}, ErrorMsg:{}, RequestId:{}",
- new Object[]{e.getErrorCode(), e.getMessage(), e.getRequestId()});
- LOG.error("Stack", e);
- throw DataXException.asDataXException(new OtsReaderError(e.getErrorCode(), "OTS端的错误"), Common.getDetailMessage(e), e);
- } catch (ClientException e) {
- LOG.error("ClientException. ErrorCode:{}, ErrorMsg:{}",
- new Object[]{e.getErrorCode(), e.getMessage()});
- LOG.error("Stack", e);
- throw DataXException.asDataXException(new OtsReaderError(e.getErrorCode(), "OTS端的错误"), Common.getDetailMessage(e), e);
- } catch (IllegalArgumentException e) {
- LOG.error("IllegalArgumentException. ErrorMsg:{}", e.getMessage(), e);
- throw DataXException.asDataXException(OtsReaderError.INVALID_PARAM, Common.getDetailMessage(e), e);
} catch (Exception e) {
LOG.error("Exception. ErrorMsg:{}", e.getMessage(), e);
- throw DataXException.asDataXException(OtsReaderError.ERROR, Common.getDetailMessage(e), e);
+ throw DataXException.asDataXException(OtsReaderError.ERROR, e.toString(), e);
}
LOG.info("split() end ...");
@@ -85,39 +75,60 @@ public class OtsReader extends Reader {
public static class Task extends Reader.Task {
private static final Logger LOG = LoggerFactory.getLogger(Task.class);
- private OtsReaderSlaveProxy proxy = new OtsReaderSlaveProxy();
+ //private static final MessageSource MESSAGE_SOURCE = MessageSource.loadResourceBundle(OtsReader.class);
+ private IOtsReaderSlaveProxy proxy = null;
@Override
public void init() {
+
+ OTSConf conf = GsonParser.jsonToConf((String) this.getPluginJobConf().get(Constant.ConfigKey.CONF));
+ // 是否使用新接口
+ if(conf.isNewVersion()) {
+ if (conf.getMode() == OTSMode.MULTI_VERSION) {
+ LOG.info("init OtsReaderSlaveProxyMultiVersion");
+ proxy = new OtsReaderSlaveMultiVersionProxy();
+ } else {
+ LOG.info("init OtsReaderSlaveProxyNormal");
+ proxy = new OtsReaderSlaveNormalProxy();
+ }
+
+ }
+ else{
+ String metaMode = conf.getMetaMode();
+ if (StringUtils.isNotBlank(metaMode) && !metaMode.equalsIgnoreCase("false")) {
+ LOG.info("init OtsMetaReaderSlaveProxy");
+ proxy = new OtsReaderSlaveMetaProxy();
+ } else {
+ LOG.info("init OtsReaderSlaveProxyOld");
+ proxy = new OtsReaderSlaveProxyOld();
+ }
+ }
+
+ proxy.init(this.getPluginJobConf());
}
@Override
public void destroy() {
+ try {
+ proxy.close();
+ } catch (Exception e) {
+ LOG.error("Exception. ErrorMsg:{}", e.toString(), e);
+ throw DataXException.asDataXException(OtsReaderError.ERROR, e.toString(), e);
+ }
}
@Override
public void startRead(RecordSender recordSender) {
- LOG.info("startRead() begin ...");
+
try {
- this.proxy.read(recordSender,getPluginJobConf());
- } catch (OTSException e) {
- LOG.error("OTSException. ErrorCode:{}, ErrorMsg:{}, RequestId:{}",
- new Object[]{e.getErrorCode(), e.getMessage(), e.getRequestId()});
- LOG.error("Stack", e);
- throw DataXException.asDataXException(new OtsReaderError(e.getErrorCode(), "OTS端的错误"), Common.getDetailMessage(e), e);
- } catch (ClientException e) {
- LOG.error("ClientException. ErrorCode:{}, ErrorMsg:{}",
- new Object[]{e.getErrorCode(), e.getMessage()});
- LOG.error("Stack", e);
- throw DataXException.asDataXException(new OtsReaderError(e.getErrorCode(), "OTS端的错误"), Common.getDetailMessage(e), e);
- } catch (IllegalArgumentException e) {
- LOG.error("IllegalArgumentException. ErrorMsg:{}", e.getMessage(), e);
- throw DataXException.asDataXException(OtsReaderError.INVALID_PARAM, Common.getDetailMessage(e), e);
+ proxy.startRead(recordSender);
} catch (Exception e) {
- LOG.error("Exception. ErrorMsg:{}", e.getMessage(), e);
- throw DataXException.asDataXException(OtsReaderError.ERROR, Common.getDetailMessage(e), e);
+ LOG.error("Exception. ErrorMsg:{}", e.toString(), e);
+ throw DataXException.asDataXException(OtsReaderError.ERROR, e.toString(), e);
}
- LOG.info("startRead() end ...");
+
+
+
}
}
diff --git a/otsreader/src/main/java/com/alibaba/datax/plugin/reader/otsreader/OtsReaderMasterProxy.java b/otsreader/src/main/java/com/alibaba/datax/plugin/reader/otsreader/OtsReaderMasterProxy.java
index 2b758f06..4ecdd8c1 100644
--- a/otsreader/src/main/java/com/alibaba/datax/plugin/reader/otsreader/OtsReaderMasterProxy.java
+++ b/otsreader/src/main/java/com/alibaba/datax/plugin/reader/otsreader/OtsReaderMasterProxy.java
@@ -1,221 +1,243 @@
package com.alibaba.datax.plugin.reader.otsreader;
-import java.util.ArrayList;
-import java.util.List;
-import java.util.Map;
-
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
import com.alibaba.datax.common.util.Configuration;
import com.alibaba.datax.plugin.reader.otsreader.callable.GetFirstRowPrimaryKeyCallable;
-import com.alibaba.datax.plugin.reader.otsreader.callable.GetTableMetaCallable;
import com.alibaba.datax.plugin.reader.otsreader.model.OTSConf;
-import com.alibaba.datax.plugin.reader.otsreader.model.OTSConst;
import com.alibaba.datax.plugin.reader.otsreader.model.OTSRange;
-import com.alibaba.datax.plugin.reader.otsreader.utils.ParamChecker;
-import com.alibaba.datax.plugin.reader.otsreader.utils.Common;
-import com.alibaba.datax.plugin.reader.otsreader.utils.GsonParser;
-import com.alibaba.datax.plugin.reader.otsreader.utils.ReaderModelParser;
-import com.alibaba.datax.plugin.reader.otsreader.utils.RangeSplit;
-import com.alibaba.datax.plugin.reader.otsreader.utils.RetryHelper;
-import com.aliyun.openservices.ots.OTSClient;
-import com.aliyun.openservices.ots.model.Direction;
-import com.aliyun.openservices.ots.model.PrimaryKeyValue;
-import com.aliyun.openservices.ots.model.RangeRowQueryCriteria;
-import com.aliyun.openservices.ots.model.RowPrimaryKey;
-import com.aliyun.openservices.ots.model.TableMeta;
+import com.alibaba.datax.plugin.reader.otsreader.utils.*;
+import com.alicloud.openservices.tablestore.SyncClientInterface;
+import com.alicloud.openservices.tablestore.model.*;
+import com.alicloud.openservices.tablestore.model.timeseries.ScanTimeseriesDataResponse;
+import com.alicloud.openservices.tablestore.model.timeseries.TimeseriesScanSplitInfo;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
-public class OtsReaderMasterProxy {
+import java.lang.reflect.Field;
+import java.util.ArrayList;
+import java.util.List;
- private OTSConf conf = new OTSConf();
-
- private OTSRange range = null;
-
- private OTSClient ots = null;
-
- private TableMeta meta = null;
-
- private Direction direction = null;
+public class OtsReaderMasterProxy implements IOtsReaderMasterProxy {
private static final Logger LOG = LoggerFactory.getLogger(OtsReaderMasterProxy.class);
+ private OTSConf conf = null;
+ private TableMeta meta = null;
+ private SyncClientInterface ots = null;
+ private Direction direction = null;
- /**
- * 1.检查参数是否为
- * null,endpoint,accessid,accesskey,instance-name,table,column,range-begin,range-end,range-split
- * 2.检查参数是否为空字符串
- * endpoint,accessid,accesskey,instance-name,table
- * 3.检查是否为空数组
- * column
- * 4.检查Range的类型个个数是否和PrimaryKey匹配
- * column,range-begin,range-end
- * 5.检查Range Split 顺序和类型是否Range一致,类型是否于PartitionKey一致
- * column-split
- * @param param
- * @throws Exception
- */
- public void init(Configuration param) throws Exception {
- // 默认参数
- // 每次重试的时间都是上一次的一倍,当sleep时间大于30秒时,Sleep重试时间不在增长。18次能覆盖OTS的Failover时间5分钟
- conf.setRetry(param.getInt(OTSConst.RETRY, 18));
- conf.setSleepInMilliSecond(param.getInt(OTSConst.SLEEP_IN_MILLI_SECOND, 100));
-
- // 必选参数
- conf.setEndpoint(ParamChecker.checkStringAndGet(param, Key.OTS_ENDPOINT));
- conf.setAccessId(ParamChecker.checkStringAndGet(param, Key.OTS_ACCESSID));
- conf.setAccesskey(ParamChecker.checkStringAndGet(param, Key.OTS_ACCESSKEY));
- conf.setInstanceName(ParamChecker.checkStringAndGet(param, Key.OTS_INSTANCE_NAME));
- conf.setTableName(ParamChecker.checkStringAndGet(param, Key.TABLE_NAME));
-
- ots = new OTSClient(
- this.conf.getEndpoint(),
- this.conf.getAccessId(),
- this.conf.getAccesskey(),
- this.conf.getInstanceName());
-
- meta = getTableMeta(ots, conf.getTableName());
- LOG.info("Table Meta : {}", GsonParser.metaToJson(meta));
-
- conf.setColumns(ReaderModelParser.parseOTSColumnList(ParamChecker.checkListAndGet(param, Key.COLUMN, true)));
-
- Map rangeMap = ParamChecker.checkMapAndGet(param, Key.RANGE, true);
- conf.setRangeBegin(ReaderModelParser.parsePrimaryKey(ParamChecker.checkListAndGet(rangeMap, Key.RANGE_BEGIN, false)));
- conf.setRangeEnd(ReaderModelParser.parsePrimaryKey(ParamChecker.checkListAndGet(rangeMap, Key.RANGE_END, false)));
-
- range = ParamChecker.checkRangeAndGet(meta, this.conf.getRangeBegin(), this.conf.getRangeEnd());
-
- direction = ParamChecker.checkDirectionAndEnd(meta, range.getBegin(), range.getEnd());
- LOG.info("Direction : {}", direction);
-
- List points = ReaderModelParser.parsePrimaryKey(ParamChecker.checkListAndGet(rangeMap, Key.RANGE_SPLIT));
- ParamChecker.checkInputSplitPoints(meta, range, direction, points);
- conf.setRangeSplit(points);
- }
-
- public List split(int num) throws Exception {
- LOG.info("Expect split num : " + num);
-
- List configurations = new ArrayList();
-
- List ranges = null;
-
- if (this.conf.getRangeSplit() != null) { // 用户显示指定了拆分范围
- LOG.info("Begin userDefinedRangeSplit");
- ranges = userDefinedRangeSplit(meta, range, this.conf.getRangeSplit());
- LOG.info("End userDefinedRangeSplit");
- } else { // 采用默认的切分算法
- LOG.info("Begin defaultRangeSplit");
- ranges = defaultRangeSplit(ots, meta, range, num);
- LOG.info("End defaultRangeSplit");
- }
-
- // 解决大量的Split Point序列化消耗内存的问题
- // 因为slave中不会使用这个配置,所以置为空
- this.conf.setRangeSplit(null);
-
- for (OTSRange item : ranges) {
- Configuration configuration = Configuration.newDefault();
- configuration.set(OTSConst.OTS_CONF, GsonParser.confToJson(this.conf));
- configuration.set(OTSConst.OTS_RANGE, GsonParser.rangeToJson(item));
- configuration.set(OTSConst.OTS_DIRECTION, GsonParser.directionToJson(direction));
- configurations.add(configuration);
- }
-
- LOG.info("Configuration list count : " + configurations.size());
-
- return configurations;
- }
public OTSConf getConf() {
return conf;
}
+ public TableMeta getMeta() {
+ return meta;
+ }
+
+ public SyncClientInterface getOts() {
+ return ots;
+ }
+
+ public void setOts(SyncClientInterface ots) {
+ this.ots = ots;
+ }
+
+ /**
+ * 基于配置传入的配置文件,解析为对应的参数
+ *
+ * @param param
+ * @throws Exception
+ */
+ public void init(Configuration param) throws Exception {
+ // 基于预定义的Json格式,检查传入参数是否符合Conf定义规范
+ conf = OTSConf.load(param);
+
+ // Init ots
+ ots = OtsHelper.getOTSInstance(conf);
+
+ // 宽行表init
+ if (!conf.isTimeseriesTable()) {
+ // 获取TableMeta
+ meta = OtsHelper.getTableMeta(
+ ots,
+ conf.getTableName(),
+ conf.getRetry(),
+ conf.getRetryPauseInMillisecond());
+
+ // 基于Meta检查Conf是否正确
+ ParamChecker.checkAndSetOTSConf(conf, meta);
+ direction = ParamChecker.checkDirectionAndEnd(meta, conf.getRange().getBegin(), conf.getRange().getEnd());
+ }
+ // 时序表 检查tablestore SDK version
+ if (conf.isTimeseriesTable()){
+ Common.checkTableStoreSDKVersion();
+ }
+
+
+ }
+
+ public List split(int mandatoryNumber) throws Exception {
+ LOG.info("Expect split num : " + mandatoryNumber);
+
+ List configurations = new ArrayList();
+
+ if (conf.isTimeseriesTable()) {{ // 时序表全部采用默认切分策略
+ LOG.info("Begin timeseries table defaultRangeSplit");
+ configurations = getTimeseriesConfigurationBySplit(mandatoryNumber);
+ LOG.info("End timeseries table defaultRangeSplit");
+ }}
+ else if (this.conf.getRange().getSplit().size() != 0) { // 用户显示指定了拆分范围
+ LOG.info("Begin userDefinedRangeSplit");
+ configurations = getNormalConfigurationBySplit();
+ LOG.info("End userDefinedRangeSplit");
+ } else { // 采用默认的切分算法
+ LOG.info("Begin defaultRangeSplit");
+ configurations = getDefaultConfiguration(mandatoryNumber);
+ LOG.info("End defaultRangeSplit");
+ }
+
+ LOG.info("Expect split num: "+ mandatoryNumber +", and final configuration list count : " + configurations.size());
+ return configurations;
+ }
+
public void close() {
ots.shutdown();
}
- // private function
-
- private TableMeta getTableMeta(OTSClient ots, String tableName) throws Exception {
- return RetryHelper.executeWithRetry(
- new GetTableMetaCallable(ots, tableName),
+ /**
+ * timeseries split信息,根据切分数配置多个Task
+ */
+ private List getTimeseriesConfigurationBySplit(int mandatoryNumber) throws Exception {
+ List timeseriesScanSplitInfoList = OtsHelper.splitTimeseriesScan(
+ ots,
+ conf.getTableName(),
+ conf.getMeasurementName(),
+ mandatoryNumber,
conf.getRetry(),
- conf.getSleepInMilliSecond()
- );
+ conf.getRetryPauseInMillisecond());
+ List configurations = new ArrayList<>();
+
+ for (int i = 0; i < timeseriesScanSplitInfoList.size(); i++) {
+ Configuration configuration = Configuration.newDefault();
+ configuration.set(Constant.ConfigKey.CONF, GsonParser.confToJson(conf));
+ configuration.set(Constant.ConfigKey.SPLIT_INFO, GsonParser.timeseriesScanSplitInfoToString(timeseriesScanSplitInfoList.get(i)));
+ configurations.add(configuration);
+ }
+ return configurations;
}
- private RowPrimaryKey getPKOfFirstRow(
- OTSRange range , Direction direction) throws Exception {
+ /**
+ * 根据用户配置的split信息,将配置文件基于Range范围转换为多个Task的配置
+ */
+ private List getNormalConfigurationBySplit() {
+ List> primaryKeys = new ArrayList>();
+ primaryKeys.add(conf.getRange().getBegin());
+ for (PrimaryKeyColumn column : conf.getRange().getSplit()) {
+ List point = new ArrayList();
+ point.add(column);
+ ParamChecker.fillPrimaryKey(this.meta.getPrimaryKeyList(), point, PrimaryKeyValue.INF_MIN);
+ primaryKeys.add(point);
+ }
+ primaryKeys.add(conf.getRange().getEnd());
- RangeRowQueryCriteria cur = new RangeRowQueryCriteria(this.conf.getTableName());
- cur.setInclusiveStartPrimaryKey(range.getBegin());
- cur.setExclusiveEndPrimaryKey(range.getEnd());
- cur.setLimit(1);
- cur.setColumnsToGet(Common.getPrimaryKeyNameList(meta));
- cur.setDirection(direction);
+ List configurations = new ArrayList(primaryKeys.size() - 1);
- return RetryHelper.executeWithRetry(
- new GetFirstRowPrimaryKeyCallable(ots, meta, cur),
- conf.getRetry(),
- conf.getSleepInMilliSecond()
- );
+ for (int i = 0; i < primaryKeys.size() - 1; i++) {
+ OTSRange range = new OTSRange();
+ range.setBegin(primaryKeys.get(i));
+ range.setEnd(primaryKeys.get(i + 1));
+
+ Configuration configuration = Configuration.newDefault();
+ configuration.set(Constant.ConfigKey.CONF, GsonParser.confToJson(conf));
+ configuration.set(Constant.ConfigKey.RANGE, GsonParser.rangeToJson(range));
+ configuration.set(Constant.ConfigKey.META, GsonParser.metaToJson(meta));
+ configurations.add(configuration);
+ }
+ return configurations;
}
- private List defaultRangeSplit(OTSClient ots, TableMeta meta, OTSRange range, int num) throws Exception {
+ private List getDefaultConfiguration(int num) throws Exception {
if (num == 1) {
List ranges = new ArrayList();
+ OTSRange range = new OTSRange();
+ range.setBegin(conf.getRange().getBegin());
+ range.setEnd(conf.getRange().getEnd());
ranges.add(range);
- return ranges;
+
+ return getConfigurationsFromRanges(ranges);
}
-
+
OTSRange reverseRange = new OTSRange();
- reverseRange.setBegin(range.getEnd());
- reverseRange.setEnd(range.getBegin());
+ reverseRange.setBegin(conf.getRange().getEnd());
+ reverseRange.setEnd(conf.getRange().getBegin());
Direction reverseDirection = (direction == Direction.FORWARD ? Direction.BACKWARD : Direction.FORWARD);
- RowPrimaryKey realBegin = getPKOfFirstRow(range, direction);
- RowPrimaryKey realEnd = getPKOfFirstRow(reverseRange, reverseDirection);
-
+ List realBegin = getPKOfFirstRow(conf.getRange(), direction);
+ List realEnd = getPKOfFirstRow(reverseRange, reverseDirection);
+
// 因为如果其中一行为空,表示这个范围内至多有一行数据
// 所以不再细分,直接使用用户定义的范围
if (realBegin == null || realEnd == null) {
List ranges = new ArrayList();
- ranges.add(range);
- return ranges;
+ ranges.add(conf.getRange());
+ return getConfigurationsFromRanges(ranges);
}
-
+
// 如果出现realBegin,realEnd的方向和direction不一致的情况,直接返回range
int cmp = Common.compareRangeBeginAndEnd(meta, realBegin, realEnd);
Direction realDirection = cmp > 0 ? Direction.BACKWARD : Direction.FORWARD;
if (realDirection != direction) {
LOG.warn("Expect '" + direction + "', but direction of realBegin and readlEnd is '" + realDirection + "'");
List ranges = new ArrayList();
- ranges.add(range);
- return ranges;
+ ranges.add(conf.getRange());
+ return getConfigurationsFromRanges(ranges);
}
List ranges = RangeSplit.rangeSplitByCount(meta, realBegin, realEnd, num);
if (ranges.isEmpty()) { // 当PartitionKey相等时,工具内部不会切分Range
- ranges.add(range);
+ ranges.add(conf.getRange());
} else {
// replace first and last
OTSRange first = ranges.get(0);
OTSRange last = ranges.get(ranges.size() - 1);
- first.setBegin(range.getBegin());
- last.setEnd(range.getEnd());
+ first.setBegin(conf.getRange().getBegin());
+ last.setEnd(conf.getRange().getEnd());
}
-
- return ranges;
+
+ return getConfigurationsFromRanges(ranges);
}
- private List userDefinedRangeSplit(TableMeta meta, OTSRange range, List points) {
- List ranges = RangeSplit.rangeSplitByPoint(meta, range.getBegin(), range.getEnd(), points);
- if (ranges.isEmpty()) { // 当PartitionKey相等时,工具内部不会切分Range
- ranges.add(range);
+ private List getConfigurationsFromRanges(List ranges){
+ List configurationList = new ArrayList<>();
+ for (OTSRange range:ranges
+ ) {
+ Configuration configuration = Configuration.newDefault();
+ configuration.set(Constant.ConfigKey.CONF, GsonParser.confToJson(conf));
+ configuration.set(Constant.ConfigKey.RANGE, GsonParser.rangeToJson(range));
+ configuration.set(Constant.ConfigKey.META, GsonParser.metaToJson(meta));
+ configurationList.add(configuration);
}
- return ranges;
+ return configurationList;
}
+
+ private List getPKOfFirstRow(
+ OTSRange range , Direction direction) throws Exception {
+
+ RangeRowQueryCriteria cur = new RangeRowQueryCriteria(this.conf.getTableName());
+ cur.setInclusiveStartPrimaryKey(new PrimaryKey(range.getBegin()));
+ cur.setExclusiveEndPrimaryKey(new PrimaryKey(range.getEnd()));
+ cur.setLimit(1);
+ cur.addColumnsToGet(Common.getPrimaryKeyNameList(meta));
+ cur.setDirection(direction);
+ cur.setMaxVersions(1);
+
+ return RetryHelper.executeWithRetry(
+ new GetFirstRowPrimaryKeyCallable(ots, meta, cur),
+ conf.getRetry(),
+ conf.getRetryPauseInMillisecond()
+ );
+ }
+
}
diff --git a/otsreader/src/main/java/com/alibaba/datax/plugin/reader/otsreader/OtsReaderSlaveMetaProxy.java b/otsreader/src/main/java/com/alibaba/datax/plugin/reader/otsreader/OtsReaderSlaveMetaProxy.java
new file mode 100644
index 00000000..f9860194
--- /dev/null
+++ b/otsreader/src/main/java/com/alibaba/datax/plugin/reader/otsreader/OtsReaderSlaveMetaProxy.java
@@ -0,0 +1,160 @@
+package com.alibaba.datax.plugin.reader.otsreader;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Map.Entry;
+
+import com.alibaba.datax.plugin.reader.otsreader.model.OTSConf;
+import com.alibaba.datax.plugin.reader.otsreader.model.OTSRange;
+import com.alibaba.datax.plugin.reader.otsreader.utils.Constant;
+import com.alibaba.datax.plugin.reader.otsreader.utils.Key;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import com.alibaba.datax.common.element.Record;
+import com.alibaba.datax.common.element.StringColumn;
+import com.alibaba.datax.common.plugin.RecordSender;
+import com.alibaba.datax.common.util.Configuration;
+import com.alibaba.datax.plugin.reader.otsreader.utils.ParamCheckerOld;
+import com.alibaba.datax.plugin.reader.otsreader.utils.ReaderModelParser;
+import com.alibaba.datax.plugin.reader.otsreader.model.OTSColumn;
+import com.alibaba.datax.plugin.reader.otsreader.utils.DefaultNoRetry;
+import com.alibaba.datax.plugin.reader.otsreader.utils.GsonParser;
+import com.alibaba.fastjson.JSON;
+import com.aliyun.openservices.ots.OTSClient;
+import com.aliyun.openservices.ots.OTSServiceConfiguration;
+import com.aliyun.openservices.ots.model.DescribeTableRequest;
+import com.aliyun.openservices.ots.model.DescribeTableResult;
+import com.aliyun.openservices.ots.model.ListTableResult;
+import com.aliyun.openservices.ots.model.PrimaryKeyType;
+import com.aliyun.openservices.ots.model.ReservedThroughputDetails;
+import com.aliyun.openservices.ots.model.TableMeta;
+
+public class OtsReaderSlaveMetaProxy implements IOtsReaderSlaveProxy {
+
+ private OTSClient ots = null;
+ private OTSConf conf = null;
+ private OTSRange range = null;
+ private com.alicloud.openservices.tablestore.model.TableMeta meta = null;
+ private Configuration configuration = null;
+ private static final Logger LOG = LoggerFactory.getLogger(OtsReaderSlaveMetaProxy.class);
+
+
+ @Override
+ public void init(Configuration configuration) {
+ OTSServiceConfiguration configure = new OTSServiceConfiguration();
+ configure.setRetryStrategy(new DefaultNoRetry());
+
+ this.configuration = configuration;
+ conf = GsonParser.jsonToConf((String) configuration.get(Constant.ConfigKey.CONF));
+ range = GsonParser.jsonToRange((String) configuration.get(Constant.ConfigKey.RANGE));
+ meta = GsonParser.jsonToMeta((String) configuration.get(Constant.ConfigKey.META));
+
+ String endpoint = conf.getEndpoint();
+ String accessId = conf.getAccessId();
+ String accessKey = conf.getAccessKey();
+ String instanceName = conf.getInstanceName();
+
+ ots = new OTSClient(endpoint, accessId, accessKey, instanceName, null, configure, null);
+ }
+
+ @Override
+ public void close() {
+ ots.shutdown();
+ }
+
+ @Override
+ public void startRead(RecordSender recordSender) throws Exception {
+ List columns = ReaderModelParser
+ .parseOTSColumnList(ParamCheckerOld.checkListAndGet(configuration, Key.COLUMN, true));
+ String metaMode = conf.getMetaMode(); // column
+
+
+ ListTableResult listTableResult = null;
+ try {
+ listTableResult = ots.listTable();
+ LOG.info(String.format("ots listTable requestId:%s, traceId:%s", listTableResult.getRequestID(),
+ listTableResult.getTraceId()));
+ List allTables = listTableResult.getTableNames();
+ for (String eachTable : allTables) {
+ DescribeTableRequest describeTableRequest = new DescribeTableRequest();
+ describeTableRequest.setTableName(eachTable);
+ DescribeTableResult describeTableResult = ots.describeTable(describeTableRequest);
+ LOG.info(String.format("ots describeTable requestId:%s, traceId:%s", describeTableResult.getRequestID(),
+ describeTableResult.getTraceId()));
+
+ TableMeta tableMeta = describeTableResult.getTableMeta();
+ // table_name: first_table
+ // table primary key: type, data type: STRING
+ // table primary key: db_name, data type: STRING
+ // table primary key: table_name, data type: STRING
+ // Reserved throughput: read(0), write(0)
+ // last increase time: 1502881295
+ // last decrease time: None
+ // number of decreases today: 0
+
+ String tableName = tableMeta.getTableName();
+ Map primaryKey = tableMeta.getPrimaryKey();
+ ReservedThroughputDetails reservedThroughputDetails = describeTableResult
+ .getReservedThroughputDetails();
+ int reservedThroughputRead = reservedThroughputDetails.getCapacityUnit().getReadCapacityUnit();
+ int reservedThroughputWrite = reservedThroughputDetails.getCapacityUnit().getWriteCapacityUnit();
+ long lastIncreaseTime = reservedThroughputDetails.getLastIncreaseTime();
+ long lastDecreaseTime = reservedThroughputDetails.getLastDecreaseTime();
+ int numberOfDecreasesToday = reservedThroughputDetails.getNumberOfDecreasesToday();
+
+ Map allData = new HashMap();
+ allData.put("endpoint", conf.getEndpoint());
+ allData.put("instanceName", conf.getInstanceName());
+ allData.put("table", tableName);
+ // allData.put("primaryKey", JSON.toJSONString(primaryKey));
+ allData.put("reservedThroughputRead", reservedThroughputRead + "");
+ allData.put("reservedThroughputWrite", reservedThroughputWrite + "");
+ allData.put("lastIncreaseTime", lastIncreaseTime + "");
+ allData.put("lastDecreaseTime", lastDecreaseTime + "");
+ allData.put("numberOfDecreasesToday", numberOfDecreasesToday + "");
+
+ // 可扩展的可配置的形式
+ if ("column".equalsIgnoreCase(metaMode)) {
+ // 如果是列元数据模式并且column中配置的name是primaryKey,映射成多行DataX Record
+ List primaryKeyRecords = new ArrayList();
+ for (Entry eachPk : primaryKey.entrySet()) {
+ Record line = recordSender.createRecord();
+ for (OTSColumn col : columns) {
+ if (col.getColumnType() == OTSColumn.OTSColumnType.CONST) {
+ line.addColumn(col.getValue());
+ } else if ("primaryKey.name".equalsIgnoreCase(col.getName())) {
+ line.addColumn(new StringColumn(eachPk.getKey()));
+ } else if ("primaryKey.type".equalsIgnoreCase(col.getName())) {
+ line.addColumn(new StringColumn(eachPk.getValue().name()));
+ } else {
+ String v = allData.get(col.getName());
+ line.addColumn(new StringColumn(v));
+ }
+ }
+ LOG.debug("Reader send record : {}", line.toString());
+ recordSender.sendToWriter(line);
+ primaryKeyRecords.add(line);
+ }
+ } else {
+ Record line = recordSender.createRecord();
+ for (OTSColumn col : columns) {
+ if (col.getColumnType() == OTSColumn.OTSColumnType.CONST) {
+ line.addColumn(col.getValue());
+ } else {
+ String v = allData.get(col.getName());
+ line.addColumn(new StringColumn(v));
+ }
+ }
+ LOG.debug("Reader send record : {}", line.toString());
+ recordSender.sendToWriter(line);
+ }
+ }
+ } catch (Exception e) {
+ LOG.warn(JSON.toJSONString(listTableResult), e);
+ }
+
+ }
+}
diff --git a/otsreader/src/main/java/com/alibaba/datax/plugin/reader/otsreader/OtsReaderSlaveMultiVersionProxy.java b/otsreader/src/main/java/com/alibaba/datax/plugin/reader/otsreader/OtsReaderSlaveMultiVersionProxy.java
new file mode 100644
index 00000000..818a507e
--- /dev/null
+++ b/otsreader/src/main/java/com/alibaba/datax/plugin/reader/otsreader/OtsReaderSlaveMultiVersionProxy.java
@@ -0,0 +1,102 @@
+package com.alibaba.datax.plugin.reader.otsreader;
+
+import com.alibaba.datax.common.element.LongColumn;
+import com.alibaba.datax.common.element.Record;
+import com.alibaba.datax.common.element.StringColumn;
+import com.alibaba.datax.common.plugin.RecordSender;
+import com.alibaba.datax.common.util.Configuration;
+import com.alibaba.datax.plugin.reader.otsreader.model.OTSConf;
+import com.alibaba.datax.plugin.reader.otsreader.model.OTSRange;
+import com.alibaba.datax.plugin.reader.otsreader.utils.*;
+import com.alicloud.openservices.tablestore.SyncClientInterface;
+import com.alicloud.openservices.tablestore.model.*;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+public class OtsReaderSlaveMultiVersionProxy implements IOtsReaderSlaveProxy {
+ private OTSConf conf = null;
+ private OTSRange range = null;
+ private TableMeta meta = null;
+ private SyncClientInterface ots = null;
+
+ private static final Logger LOG = LoggerFactory.getLogger(OtsReaderSlaveMultiVersionProxy.class);
+
+ @Override
+ public void init(Configuration configuration) {
+ conf = GsonParser.jsonToConf((String) configuration.get(Constant.ConfigKey.CONF));
+ range = GsonParser.jsonToRange((String) configuration.get(Constant.ConfigKey.RANGE));
+ meta = GsonParser.jsonToMeta((String) configuration.get(Constant.ConfigKey.META));
+
+ this.ots = OtsHelper.getOTSInstance(conf);
+ }
+
+ @Override
+ public void close() {
+ ots.shutdown();
+ }
+
+ private void sendToDatax(RecordSender recordSender, PrimaryKey pk, Column c) {
+ Record line = recordSender.createRecord();
+ //-------------------------
+ // 四元组 pk, column name, timestamp, value
+ //-------------------------
+
+ // pk
+ for( PrimaryKeyColumn pkc : pk.getPrimaryKeyColumns()) {
+ line.addColumn(TranformHelper.otsPrimaryKeyColumnToDataxColumn(pkc));
+ }
+ // column name
+ line.addColumn(new StringColumn(c.getName()));
+ // Timestamp
+ line.addColumn(new LongColumn(c.getTimestamp()));
+ // Value
+ line.addColumn(TranformHelper.otsColumnToDataxColumn(c));
+
+ recordSender.sendToWriter(line);
+ }
+
+ private void sendToDatax(RecordSender recordSender, Row row) {
+ PrimaryKey pk = row.getPrimaryKey();
+ for (Column c : row.getColumns()) {
+ sendToDatax(recordSender, pk, c);
+ }
+ }
+
+ /**
+ * 将获取到的数据采用4元组的方式传递给datax
+ * @param recordSender
+ * @param result
+ */
+ private void sendToDatax(RecordSender recordSender, GetRangeResponse result) {
+ LOG.debug("Per request get row count : " + result.getRows().size());
+ for (Row row : result.getRows()) {
+ sendToDatax(recordSender, row);
+ }
+ }
+
+ @Override
+ public void startRead(RecordSender recordSender) throws Exception {
+
+ PrimaryKey inclusiveStartPrimaryKey = new PrimaryKey(range.getBegin());
+ PrimaryKey exclusiveEndPrimaryKey = new PrimaryKey(range.getEnd());
+ PrimaryKey next = inclusiveStartPrimaryKey;
+
+ RangeRowQueryCriteria rangeRowQueryCriteria = new RangeRowQueryCriteria(conf.getTableName());
+ rangeRowQueryCriteria.setExclusiveEndPrimaryKey(exclusiveEndPrimaryKey);
+ rangeRowQueryCriteria.setDirection(Common.getDirection(range.getBegin(), range.getEnd()));
+ rangeRowQueryCriteria.setTimeRange(conf.getMulti().getTimeRange());
+ rangeRowQueryCriteria.setMaxVersions(conf.getMulti().getMaxVersion());
+ rangeRowQueryCriteria.addColumnsToGet(Common.toColumnToGet(conf.getColumn(), meta));
+
+ do{
+ rangeRowQueryCriteria.setInclusiveStartPrimaryKey(next);
+ GetRangeResponse result = OtsHelper.getRange(
+ ots,
+ rangeRowQueryCriteria,
+ conf.getRetry(),
+ conf.getRetryPauseInMillisecond());
+ sendToDatax(recordSender, result);
+ next = result.getNextStartPrimaryKey();
+ } while(next != null);
+ }
+}
diff --git a/otsreader/src/main/java/com/alibaba/datax/plugin/reader/otsreader/OtsReaderSlaveNormalProxy.java b/otsreader/src/main/java/com/alibaba/datax/plugin/reader/otsreader/OtsReaderSlaveNormalProxy.java
new file mode 100644
index 00000000..f7d89b15
--- /dev/null
+++ b/otsreader/src/main/java/com/alibaba/datax/plugin/reader/otsreader/OtsReaderSlaveNormalProxy.java
@@ -0,0 +1,256 @@
+package com.alibaba.datax.plugin.reader.otsreader;
+
+import com.alibaba.datax.common.element.LongColumn;
+import com.alibaba.datax.common.element.Record;
+import com.alibaba.datax.common.element.StringColumn;
+import com.alibaba.datax.common.exception.DataXException;
+import com.alibaba.datax.common.plugin.RecordSender;
+import com.alibaba.datax.common.util.Configuration;
+import com.alibaba.datax.plugin.reader.otsreader.model.OTSColumn;
+import com.alibaba.datax.plugin.reader.otsreader.model.OTSConf;
+import com.alibaba.datax.plugin.reader.otsreader.model.OTSCriticalException;
+import com.alibaba.datax.plugin.reader.otsreader.model.OTSRange;
+import com.alibaba.datax.plugin.reader.otsreader.utils.*;
+import com.alicloud.openservices.tablestore.SyncClientInterface;
+import com.alicloud.openservices.tablestore.core.utils.Pair;
+import com.alicloud.openservices.tablestore.model.*;
+import com.alicloud.openservices.tablestore.model.timeseries.ScanTimeseriesDataRequest;
+import com.alicloud.openservices.tablestore.model.timeseries.ScanTimeseriesDataResponse;
+import com.alicloud.openservices.tablestore.model.timeseries.TimeseriesRow;
+import com.alicloud.openservices.tablestore.model.timeseries.TimeseriesScanSplitInfo;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Map;
+
+public class OtsReaderSlaveNormalProxy implements IOtsReaderSlaveProxy {
+ private static final Logger LOG = LoggerFactory.getLogger(OtsReaderSlaveNormalProxy.class);
+ private OTSConf conf = null;
+ private OTSRange range = null;
+ private TableMeta meta = null;
+ private SyncClientInterface ots = null;
+ private TimeseriesScanSplitInfo splitInfo = null;
+
+ @Override
+ public void init(Configuration configuration) {
+ conf = GsonParser.jsonToConf((String) configuration.get(Constant.ConfigKey.CONF));
+ if (!conf.isTimeseriesTable()) {
+ range = GsonParser.jsonToRange((String) configuration.get(Constant.ConfigKey.RANGE));
+ meta = GsonParser.jsonToMeta((String) configuration.get(Constant.ConfigKey.META));
+ } else {
+ splitInfo = GsonParser.stringToTimeseriesScanSplitInfo((String) configuration.get(Constant.ConfigKey.SPLIT_INFO));
+ // 时序表 检查tablestore SDK version
+ try{
+ Common.checkTableStoreSDKVersion();
+ }
+ catch (Exception e){
+ LOG.error("Exception. ErrorMsg:{}", e.getMessage(), e);
+ throw DataXException.asDataXException(OtsReaderError.ERROR, e.toString(), e);
+ }
+ }
+
+
+ this.ots = OtsHelper.getOTSInstance(conf);
+ }
+
+ @Override
+ public void close() {
+ ots.shutdown();
+ }
+
+ private void sendToDatax(RecordSender recordSender, Row row) {
+ Record line = recordSender.createRecord();
+
+ PrimaryKey pk = row.getPrimaryKey();
+ for (OTSColumn column : conf.getColumn()) {
+ if (column.getColumnType() == OTSColumn.OTSColumnType.NORMAL) {
+ // 获取指定的列
+ PrimaryKeyColumn value = pk.getPrimaryKeyColumn(column.getName());
+ if (value != null) {
+ line.addColumn(TranformHelper.otsPrimaryKeyColumnToDataxColumn(value));
+ } else {
+ Column c = row.getLatestColumn(column.getName());
+ if (c != null) {
+ line.addColumn(TranformHelper.otsColumnToDataxColumn(c));
+ } else {
+ // 这里使用StringColumn的无参构造函数构造对象,而不是用null,下
+ // 游(writer)应该通过获取Column,然后通过Column的数据接口的返回值
+ // 是否是null来判断改Column是否为null
+ // Datax其他插件的也是使用这种方式,约定俗成,并没有使用直接向record中注入null方式代表空
+ line.addColumn(new StringColumn());
+ }
+ }
+ } else {
+ line.addColumn(column.getValue());
+ }
+ }
+ recordSender.sendToWriter(line);
+ }
+
+ private void sendToDatax(RecordSender recordSender, TimeseriesRow row) {
+
+
+ Record line = recordSender.createRecord();
+ // 对于配置项中的每一列
+ for (int i = 0; i < conf.getColumn().size(); i++) {
+ OTSColumn column = conf.getColumn().get(i);
+ // 如果不是常数列
+ if (column.getColumnType() == OTSColumn.OTSColumnType.NORMAL) {
+ // 如果是tags内字段
+ if (conf.getColumn().get(i).getTimeseriesTag()) {
+ String s = row.getTimeseriesKey().getTags().get(column.getName());
+ line.addColumn(new StringColumn(s));
+ }
+ // 如果为measurement字段
+ else if (column.getName().equals(Constant.ConfigKey.TimeseriesPKColumn.MEASUREMENT_NAME)) {
+ String s = row.getTimeseriesKey().getMeasurementName();
+ line.addColumn(new StringColumn(s));
+ }
+ // 如果为dataSource字段
+ else if (column.getName().equals(Constant.ConfigKey.TimeseriesPKColumn.DATA_SOURCE)) {
+ String s = row.getTimeseriesKey().getDataSource();
+ line.addColumn(new StringColumn(s));
+ }
+ // 如果为tags字段
+ else if (column.getName().equals(Constant.ConfigKey.TimeseriesPKColumn.TAGS)) {
+ line.addColumn(new StringColumn(row.getTimeseriesKey().buildTagsString()));
+ }
+ else if (column.getName().equals(Constant.ConfigKey.TimeseriesPKColumn.TIME)) {
+ Long l = row.getTimeInUs();
+ line.addColumn(new LongColumn(l));
+ }
+ // 否则为field内字段
+ else {
+ ColumnValue c = row.getFields().get(column.getName());
+ if (c == null) {
+ LOG.warn("Get column {} : type {} failed, use empty string instead", column.getName(), conf.getColumn().get(i).getValueType());
+ line.addColumn(new StringColumn());
+ } else if (c.getType() != conf.getColumn().get(i).getValueType()) {
+ LOG.warn("Get column {} failed, expected type: {}, actual type: {}. Sending actual type to writer.", column.getName(), conf.getColumn().get(i).getValueType(), c.getType());
+ line.addColumn(TranformHelper.otsColumnToDataxColumn(c));
+ } else {
+ line.addColumn(TranformHelper.otsColumnToDataxColumn(c));
+ }
+ }
+ }
+ // 如果是常数列
+ else {
+ line.addColumn(column.getValue());
+ }
+ }
+ recordSender.sendToWriter(line);
+ }
+
+ /**
+ * 将获取到的数据根据用户配置Column的方式传递给datax
+ *
+ * @param recordSender
+ * @param result
+ */
+ private void sendToDatax(RecordSender recordSender, GetRangeResponse result) {
+ for (Row row : result.getRows()) {
+ sendToDatax(recordSender, row);
+ }
+ }
+
+ private void sendToDatax(RecordSender recordSender, ScanTimeseriesDataResponse result) {
+ for (TimeseriesRow row : result.getRows()) {
+ sendToDatax(recordSender, row);
+ }
+ }
+
+ @Override
+ public void startRead(RecordSender recordSender) throws Exception {
+ if (conf.isTimeseriesTable()) {
+ readTimeseriesTable(recordSender);
+ } else {
+ readNormalTable(recordSender);
+ }
+ }
+
+ public void readTimeseriesTable(RecordSender recordSender) throws Exception {
+
+ List timeseriesPkName = new ArrayList<>();
+ timeseriesPkName.add(Constant.ConfigKey.TimeseriesPKColumn.MEASUREMENT_NAME);
+ timeseriesPkName.add(Constant.ConfigKey.TimeseriesPKColumn.DATA_SOURCE);
+ timeseriesPkName.add(Constant.ConfigKey.TimeseriesPKColumn.TAGS);
+ timeseriesPkName.add(Constant.ConfigKey.TimeseriesPKColumn.TIME);
+
+ ScanTimeseriesDataRequest scanTimeseriesDataRequest = new ScanTimeseriesDataRequest(conf.getTableName());
+ List> fieldsToGet = new ArrayList<>();
+ for (int i = 0; i < conf.getColumn().size(); i++) {
+ /**
+ * 如果所配置列
+ * 1. 不是常量列(即列名不为null)
+ * 2. 列名不在["measurementName","dataSource","tags"]中
+ * 3. 不是tags内的字段
+ * 则为需要获取的field字段。
+ */
+ String fieldName = conf.getColumn().get(i).getName();
+ if (fieldName != null && !timeseriesPkName.contains(fieldName) && !conf.getColumn().get(i).getTimeseriesTag()) {
+ Pair pair = new Pair<>(fieldName, conf.getColumn().get(i).getValueType());
+ fieldsToGet.add(pair);
+ }
+ }
+ scanTimeseriesDataRequest.setFieldsToGet(fieldsToGet);
+ scanTimeseriesDataRequest.setSplitInfo(splitInfo);
+
+ while (true) {
+ ScanTimeseriesDataResponse response = OtsHelper.scanTimeseriesData(
+ ots,
+ scanTimeseriesDataRequest,
+ conf.getRetry(),
+ conf.getRetryPauseInMillisecond());
+ sendToDatax(recordSender, response);
+ if (response.getNextToken() == null) {
+ break;
+ }
+ scanTimeseriesDataRequest.setNextToken(response.getNextToken());
+ }
+ }
+
+ public void readNormalTable(RecordSender recordSender) throws Exception {
+ PrimaryKey inclusiveStartPrimaryKey = new PrimaryKey(range.getBegin());
+ PrimaryKey exclusiveEndPrimaryKey = new PrimaryKey(range.getEnd());
+ PrimaryKey next = inclusiveStartPrimaryKey;
+
+ RangeRowQueryCriteria rangeRowQueryCriteria = new RangeRowQueryCriteria(conf.getTableName());
+ rangeRowQueryCriteria.setExclusiveEndPrimaryKey(exclusiveEndPrimaryKey);
+ rangeRowQueryCriteria.setDirection(Common.getDirection(range.getBegin(), range.getEnd()));
+ rangeRowQueryCriteria.setMaxVersions(1);
+ rangeRowQueryCriteria.addColumnsToGet(Common.toColumnToGet(conf.getColumn(), meta));
+
+ do {
+ rangeRowQueryCriteria.setInclusiveStartPrimaryKey(next);
+ GetRangeResponse result = OtsHelper.getRange(
+ ots,
+ rangeRowQueryCriteria,
+ conf.getRetry(),
+ conf.getRetryPauseInMillisecond());
+ sendToDatax(recordSender, result);
+ next = result.getNextStartPrimaryKey();
+ } while (next != null);
+ }
+
+
+ public void setConf(OTSConf conf) {
+ this.conf = conf;
+ }
+
+
+ public void setRange(OTSRange range) {
+ this.range = range;
+ }
+
+
+ public void setMeta(TableMeta meta) {
+ this.meta = meta;
+ }
+
+
+ public void setOts(SyncClientInterface ots) {
+ this.ots = ots;
+ }
+}
diff --git a/otsreader/src/main/java/com/alibaba/datax/plugin/reader/otsreader/OtsReaderSlaveProxy.java b/otsreader/src/main/java/com/alibaba/datax/plugin/reader/otsreader/OtsReaderSlaveProxy.java
deleted file mode 100644
index e64b4e7e..00000000
--- a/otsreader/src/main/java/com/alibaba/datax/plugin/reader/otsreader/OtsReaderSlaveProxy.java
+++ /dev/null
@@ -1,135 +0,0 @@
-package com.alibaba.datax.plugin.reader.otsreader;
-
-import java.util.List;
-
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import com.alibaba.datax.common.element.Record;
-import com.alibaba.datax.common.plugin.RecordSender;
-import com.alibaba.datax.common.util.Configuration;
-import com.alibaba.datax.plugin.reader.otsreader.callable.GetRangeCallable;
-import com.alibaba.datax.plugin.reader.otsreader.model.OTSColumn;
-import com.alibaba.datax.plugin.reader.otsreader.model.OTSConf;
-import com.alibaba.datax.plugin.reader.otsreader.model.OTSConst;
-import com.alibaba.datax.plugin.reader.otsreader.model.OTSRange;
-import com.alibaba.datax.plugin.reader.otsreader.utils.Common;
-import com.alibaba.datax.plugin.reader.otsreader.utils.GsonParser;
-import com.alibaba.datax.plugin.reader.otsreader.utils.DefaultNoRetry;
-import com.alibaba.datax.plugin.reader.otsreader.utils.RetryHelper;
-import com.aliyun.openservices.ots.OTSClientAsync;
-import com.aliyun.openservices.ots.OTSServiceConfiguration;
-import com.aliyun.openservices.ots.model.Direction;
-import com.aliyun.openservices.ots.model.GetRangeRequest;
-import com.aliyun.openservices.ots.model.GetRangeResult;
-import com.aliyun.openservices.ots.model.OTSFuture;
-import com.aliyun.openservices.ots.model.RangeRowQueryCriteria;
-import com.aliyun.openservices.ots.model.Row;
-import com.aliyun.openservices.ots.model.RowPrimaryKey;
-
-public class OtsReaderSlaveProxy {
-
- class RequestItem {
- private RangeRowQueryCriteria criteria;
- private OTSFuture future;
-
- RequestItem(RangeRowQueryCriteria criteria, OTSFuture future) {
- this.criteria = criteria;
- this.future = future;
- }
-
- public RangeRowQueryCriteria getCriteria() {
- return criteria;
- }
-
- public OTSFuture getFuture() {
- return future;
- }
- }
-
- private static final Logger LOG = LoggerFactory.getLogger(OtsReaderSlaveProxy.class);
-
- private void rowsToSender(List rows, RecordSender sender, List columns) {
- for (Row row : rows) {
- Record line = sender.createRecord();
- line = Common.parseRowToLine(row, columns, line);
-
- LOG.debug("Reader send record : {}", line.toString());
-
- sender.sendToWriter(line);
- }
- }
-
- private RangeRowQueryCriteria generateRangeRowQueryCriteria(String tableName, RowPrimaryKey begin, RowPrimaryKey end, Direction direction, List columns) {
- RangeRowQueryCriteria criteria = new RangeRowQueryCriteria(tableName);
- criteria.setInclusiveStartPrimaryKey(begin);
- criteria.setDirection(direction);
- criteria.setColumnsToGet(columns);
- criteria.setLimit(-1);
- criteria.setExclusiveEndPrimaryKey(end);
- return criteria;
- }
-
- private RequestItem generateRequestItem(
- OTSClientAsync ots,
- OTSConf conf,
- RowPrimaryKey begin,
- RowPrimaryKey end,
- Direction direction,
- List columns) throws Exception {
- RangeRowQueryCriteria criteria = generateRangeRowQueryCriteria(conf.getTableName(), begin, end, direction, columns);
-
- GetRangeRequest request = new GetRangeRequest();
- request.setRangeRowQueryCriteria(criteria);
- OTSFuture future = ots.getRange(request);
-
- return new RequestItem(criteria, future);
- }
-
- public void read(RecordSender sender, Configuration configuration) throws Exception {
- LOG.info("read begin.");
-
- OTSConf conf = GsonParser.jsonToConf(configuration.getString(OTSConst.OTS_CONF));
- OTSRange range = GsonParser.jsonToRange(configuration.getString(OTSConst.OTS_RANGE));
- Direction direction = GsonParser.jsonToDirection(configuration.getString(OTSConst.OTS_DIRECTION));
-
- OTSServiceConfiguration configure = new OTSServiceConfiguration();
- configure.setRetryStrategy(new DefaultNoRetry());
-
- OTSClientAsync ots = new OTSClientAsync(
- conf.getEndpoint(),
- conf.getAccessId(),
- conf.getAccesskey(),
- conf.getInstanceName(),
- null,
- configure,
- null);
-
- RowPrimaryKey token = range.getBegin();
- List columns = Common.getNormalColumnNameList(conf.getColumns());
-
- RequestItem request = null;
-
- do {
- LOG.debug("Next token : {}", GsonParser.rowPrimaryKeyToJson(token));
- if (request == null) {
- request = generateRequestItem(ots, conf, token, range.getEnd(), direction, columns);
- } else {
- RequestItem req = request;
-
- GetRangeResult result = RetryHelper.executeWithRetry(
- new GetRangeCallable(ots, req.getCriteria(), req.getFuture()),
- conf.getRetry(),
- conf.getSleepInMilliSecond()
- );
- if ((token = result.getNextStartPrimaryKey()) != null) {
- request = generateRequestItem(ots, conf, token, range.getEnd(), direction, columns);
- }
-
- rowsToSender(result.getRows(), sender, conf.getColumns());
- }
- } while (token != null);
- ots.shutdown();
- LOG.info("read end.");
- }
-}
diff --git a/otsreader/src/main/java/com/alibaba/datax/plugin/reader/otsreader/OtsReaderSlaveProxyOld.java b/otsreader/src/main/java/com/alibaba/datax/plugin/reader/otsreader/OtsReaderSlaveProxyOld.java
new file mode 100644
index 00000000..72eb885e
--- /dev/null
+++ b/otsreader/src/main/java/com/alibaba/datax/plugin/reader/otsreader/OtsReaderSlaveProxyOld.java
@@ -0,0 +1,181 @@
+package com.alibaba.datax.plugin.reader.otsreader;
+
+import java.util.List;
+
+import com.alibaba.datax.plugin.reader.otsreader.model.OTSRange;
+import com.alibaba.datax.plugin.reader.otsreader.model.OTSColumn;
+import com.alibaba.datax.plugin.reader.otsreader.model.OTSConf;
+import com.alibaba.datax.plugin.reader.otsreader.utils.*;
+import com.alicloud.openservices.tablestore.model.PrimaryKeyColumn;
+import com.aliyun.openservices.ots.model.*;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import com.alibaba.datax.common.element.Record;
+import com.alibaba.datax.common.plugin.RecordSender;
+import com.alibaba.datax.common.util.Configuration;
+import com.alibaba.datax.plugin.reader.otsreader.callable.GetRangeCallableOld;
+import com.aliyun.openservices.ots.OTSClientAsync;
+import com.aliyun.openservices.ots.OTSServiceConfiguration;
+
+public class OtsReaderSlaveProxyOld implements IOtsReaderSlaveProxy {
+
+
+ private OTSClientAsync ots = null;
+ private OTSConf conf = null;
+ private OTSRange range = null;
+
+ class RequestItem {
+ private RangeRowQueryCriteria criteria;
+ private OTSFuture future;
+
+ RequestItem(RangeRowQueryCriteria criteria, OTSFuture future) {
+ this.criteria = criteria;
+ this.future = future;
+ }
+
+ public RangeRowQueryCriteria getCriteria() {
+ return criteria;
+ }
+
+ public OTSFuture getFuture() {
+ return future;
+ }
+ }
+
+ private static final Logger LOG = LoggerFactory.getLogger(OtsReaderSlaveProxyOld.class);
+
+ private void rowsToSender(List rows, RecordSender sender, List columns) {
+ for (Row row : rows) {
+ Record line = sender.createRecord();
+ line = CommonOld.parseRowToLine(row, columns, line);
+
+ LOG.debug("Reader send record : {}", line.toString());
+
+ sender.sendToWriter(line);
+ }
+ }
+
+ private RangeRowQueryCriteria generateRangeRowQueryCriteria(String tableName, RowPrimaryKey begin, RowPrimaryKey end, Direction direction, List columns) {
+ RangeRowQueryCriteria criteria = new RangeRowQueryCriteria(tableName);
+ criteria.setInclusiveStartPrimaryKey(begin);
+ criteria.setDirection(direction);
+ criteria.setColumnsToGet(columns);
+ criteria.setLimit(-1);
+ criteria.setExclusiveEndPrimaryKey(end);
+ return criteria;
+ }
+
+ private RequestItem generateRequestItem(
+ OTSClientAsync ots,
+ OTSConf conf,
+ RowPrimaryKey begin,
+ RowPrimaryKey end,
+ Direction direction,
+ List columns) throws Exception {
+ RangeRowQueryCriteria criteria = generateRangeRowQueryCriteria(conf.getTableName(), begin, end, direction, columns);
+
+ GetRangeRequest request = new GetRangeRequest();
+ request.setRangeRowQueryCriteria(criteria);
+ OTSFuture future = ots.getRange(request);
+
+ return new RequestItem(criteria, future);
+ }
+
+ @Override
+ public void init(Configuration configuration) {
+ conf = GsonParser.jsonToConf(configuration.getString(Constant.ConfigKey.CONF));
+ range = GsonParser.jsonToRange(configuration.getString(Constant.ConfigKey.RANGE));
+
+ OTSServiceConfiguration configure = new OTSServiceConfiguration();
+ configure.setRetryStrategy(new DefaultNoRetry());
+
+ ots = new OTSClientAsync(
+ conf.getEndpoint(),
+ conf.getAccessId(),
+ conf.getAccessKey(),
+ conf.getInstanceName(),
+ null,
+ configure,
+ null);
+ }
+
+ @Override
+ public void close() {
+ ots.shutdown();
+ }
+
+ @Override
+ public void startRead(RecordSender recordSender) throws Exception {
+ RowPrimaryKey token = pKColumnList2RowPrimaryKey(range.getBegin());
+
+ List columns = CommonOld.getNormalColumnNameList(conf.getColumn());
+ Direction direction = null;
+ switch (Common.getDirection(range.getBegin(), range.getEnd())){
+ case FORWARD:
+ direction = Direction.FORWARD;
+ break;
+ case BACKWARD:
+ default:
+ direction = Direction.BACKWARD;
+ }
+ RequestItem request = null;
+
+ do {
+ LOG.debug("Next token : {}", GsonParser.rowPrimaryKeyToJson(token));
+ if (request == null) {
+ request = generateRequestItem(ots, conf, token, pKColumnList2RowPrimaryKey(range.getEnd()), direction, columns);
+ } else {
+ RequestItem req = request;
+
+ GetRangeResult result = RetryHelperOld.executeWithRetry(
+ new GetRangeCallableOld(ots, req.getCriteria(), req.getFuture()),
+ conf.getRetry(),
+ // TODO
+ 100
+ );
+ if ((token = result.getNextStartPrimaryKey()) != null) {
+ request = generateRequestItem(ots, conf, token, pKColumnList2RowPrimaryKey(range.getEnd()), direction, columns);
+ }
+
+ rowsToSender(result.getRows(), recordSender, conf.getColumn());
+ }
+ } while (token != null);
+ }
+
+ /**
+ * 将 {@link com.alicloud.openservices.tablestore.model.PrimaryKeyColumn}的列表转为{@link com.aliyun.openservices.ots.model.RowPrimaryKey}
+ * @param list
+ * @return
+ */
+ public RowPrimaryKey pKColumnList2RowPrimaryKey(List list){
+ RowPrimaryKey rowPrimaryKey = new RowPrimaryKey();
+ for(PrimaryKeyColumn pk : list){
+ PrimaryKeyValue v = null;
+ if(pk.getValue() == com.alicloud.openservices.tablestore.model.PrimaryKeyValue.INF_MAX){
+ v = PrimaryKeyValue.INF_MAX;
+ } else if (pk.getValue() == com.alicloud.openservices.tablestore.model.PrimaryKeyValue.INF_MIN) {
+ v = PrimaryKeyValue.INF_MIN;
+ }
+ // 非INF_MAX 或 INF_MIN
+ else{
+ switch (pk.getValue().getType()){
+ case STRING:
+ v = PrimaryKeyValue.fromString(pk.getValue().asString());
+ break;
+ case INTEGER:
+ v = PrimaryKeyValue.fromLong(pk.getValue().asLong());
+ break;
+ case BINARY:
+ v = PrimaryKeyValue.fromBinary(pk.getValue().asBinary());
+ break;
+ default:
+ throw new IllegalArgumentException("the pKColumnList to RowPrimaryKey conversion failed");
+ }
+ }
+
+ rowPrimaryKey.addPrimaryKeyColumn(pk.getName(),v);
+ }
+ return rowPrimaryKey;
+ }
+}
diff --git a/otsreader/src/main/java/com/alibaba/datax/plugin/reader/otsreader/adaptor/ColumnAdaptor.java b/otsreader/src/main/java/com/alibaba/datax/plugin/reader/otsreader/adaptor/ColumnAdaptor.java
new file mode 100644
index 00000000..b2e14b5c
--- /dev/null
+++ b/otsreader/src/main/java/com/alibaba/datax/plugin/reader/otsreader/adaptor/ColumnAdaptor.java
@@ -0,0 +1,63 @@
+package com.alibaba.datax.plugin.reader.otsreader.adaptor;
+
+import com.alibaba.datax.common.element.*;
+import com.google.gson.*;
+import org.apache.commons.codec.binary.Base64;
+
+import java.lang.reflect.Type;
+
+public class ColumnAdaptor implements JsonDeserializer, JsonSerializer{
+ private final static String TYPE = "type";
+ private final static String RAW = "rawData";
+
+ @Override
+ public JsonElement serialize(Column obj, Type t,
+ JsonSerializationContext c) {
+ JsonObject json = new JsonObject();
+
+ String rawData = null;
+ switch (obj.getType()){
+ case BOOL:
+ rawData = String.valueOf(obj.getRawData()); break;
+ case BYTES:
+ rawData = Base64.encodeBase64String((byte[]) obj.getRawData()); break;
+ case DOUBLE:
+ rawData = String.valueOf(obj.getRawData());break;
+ case LONG:
+ rawData = String.valueOf(obj.getRawData());break;
+ case STRING:
+ rawData = String.valueOf(obj.getRawData());break;
+ default:
+ throw new IllegalArgumentException("Unsupport parse the column type:" + obj.getType().toString());
+
+ }
+ json.add(TYPE, new JsonPrimitive(obj.getType().toString()));
+ json.add(RAW, new JsonPrimitive(rawData));
+ return json;
+ }
+
+ @Override
+ public Column deserialize(JsonElement ele, Type t,
+ JsonDeserializationContext c) throws JsonParseException {
+ JsonObject obj = ele.getAsJsonObject();
+
+ String strType = obj.getAsJsonPrimitive(TYPE).getAsString();
+ String strRaw = obj.getAsJsonPrimitive(RAW).getAsString();
+ Column.Type type = Column.Type.valueOf(strType);
+ switch (type){
+ case BOOL:
+ return new BoolColumn(strRaw);
+ case BYTES:
+ return new BytesColumn(Base64.decodeBase64(strRaw));
+ case DOUBLE:
+ return new DoubleColumn(strRaw);
+ case LONG:
+ return new LongColumn(strRaw);
+ case STRING:
+ return new StringColumn(strRaw);
+ default:
+ throw new IllegalArgumentException("Unsupport parse the column type:" + type.toString());
+
+ }
+ }
+}
diff --git a/otsreader/src/main/java/com/alibaba/datax/plugin/reader/otsreader/adaptor/OTSColumnAdaptor.java b/otsreader/src/main/java/com/alibaba/datax/plugin/reader/otsreader/adaptor/OTSColumnAdaptor.java
deleted file mode 100644
index 25f9b682..00000000
--- a/otsreader/src/main/java/com/alibaba/datax/plugin/reader/otsreader/adaptor/OTSColumnAdaptor.java
+++ /dev/null
@@ -1,117 +0,0 @@
-package com.alibaba.datax.plugin.reader.otsreader.adaptor;
-
-import java.lang.reflect.Type;
-
-import org.apache.commons.codec.binary.Base64;
-
-import com.alibaba.datax.plugin.reader.otsreader.model.OTSColumn;
-import com.aliyun.openservices.ots.model.ColumnType;
-import com.google.gson.JsonDeserializationContext;
-import com.google.gson.JsonDeserializer;
-import com.google.gson.JsonElement;
-import com.google.gson.JsonObject;
-import com.google.gson.JsonParseException;
-import com.google.gson.JsonPrimitive;
-import com.google.gson.JsonSerializationContext;
-import com.google.gson.JsonSerializer;
-
-public class OTSColumnAdaptor implements JsonDeserializer, JsonSerializer{
- private final static String NAME = "name";
- private final static String COLUMN_TYPE = "column_type";
- private final static String VALUE_TYPE = "value_type";
- private final static String VALUE = "value";
-
- private void serializeConstColumn(JsonObject json, OTSColumn obj) {
- switch (obj.getValueType()) {
- case STRING :
- json.add(VALUE_TYPE, new JsonPrimitive(ColumnType.STRING.toString()));
- json.add(VALUE, new JsonPrimitive(obj.getValue().asString()));
- break;
- case INTEGER :
- json.add(VALUE_TYPE, new JsonPrimitive(ColumnType.INTEGER.toString()));
- json.add(VALUE, new JsonPrimitive(obj.getValue().asLong()));
- break;
- case DOUBLE :
- json.add(VALUE_TYPE, new JsonPrimitive(ColumnType.DOUBLE.toString()));
- json.add(VALUE, new JsonPrimitive(obj.getValue().asDouble()));
- break;
- case BOOLEAN :
- json.add(VALUE_TYPE, new JsonPrimitive(ColumnType.BOOLEAN.toString()));
- json.add(VALUE, new JsonPrimitive(obj.getValue().asBoolean()));
- break;
- case BINARY :
- json.add(VALUE_TYPE, new JsonPrimitive(ColumnType.BINARY.toString()));
- json.add(VALUE, new JsonPrimitive(Base64.encodeBase64String(obj.getValue().asBytes())));
- break;
- default:
- throw new IllegalArgumentException("Unsupport serialize the type : " + obj.getValueType() + "");
- }
- }
-
- private OTSColumn deserializeConstColumn(JsonObject obj) {
- String strType = obj.getAsJsonPrimitive(VALUE_TYPE).getAsString();
- ColumnType type = ColumnType.valueOf(strType);
-
- JsonPrimitive jsonValue = obj.getAsJsonPrimitive(VALUE);
-
- switch (type) {
- case STRING :
- return OTSColumn.fromConstStringColumn(jsonValue.getAsString());
- case INTEGER :
- return OTSColumn.fromConstIntegerColumn(jsonValue.getAsLong());
- case DOUBLE :
- return OTSColumn.fromConstDoubleColumn(jsonValue.getAsDouble());
- case BOOLEAN :
- return OTSColumn.fromConstBoolColumn(jsonValue.getAsBoolean());
- case BINARY :
- return OTSColumn.fromConstBytesColumn(Base64.decodeBase64(jsonValue.getAsString()));
- default:
- throw new IllegalArgumentException("Unsupport deserialize the type : " + type + "");
- }
- }
-
- private void serializeNormalColumn(JsonObject json, OTSColumn obj) {
- json.add(NAME, new JsonPrimitive(obj.getName()));
- }
-
- private OTSColumn deserializeNormarlColumn(JsonObject obj) {
- return OTSColumn.fromNormalColumn(obj.getAsJsonPrimitive(NAME).getAsString());
- }
-
- @Override
- public JsonElement serialize(OTSColumn obj, Type t,
- JsonSerializationContext c) {
- JsonObject json = new JsonObject();
-
- switch (obj.getColumnType()) {
- case CONST:
- json.add(COLUMN_TYPE, new JsonPrimitive(OTSColumn.OTSColumnType.CONST.toString()));
- serializeConstColumn(json, obj);
- break;
- case NORMAL:
- json.add(COLUMN_TYPE, new JsonPrimitive(OTSColumn.OTSColumnType.NORMAL.toString()));
- serializeNormalColumn(json, obj);
- break;
- default:
- throw new IllegalArgumentException("Unsupport serialize the type : " + obj.getColumnType() + "");
- }
- return json;
- }
-
- @Override
- public OTSColumn deserialize(JsonElement ele, Type t,
- JsonDeserializationContext c) throws JsonParseException {
- JsonObject obj = ele.getAsJsonObject();
- String strColumnType = obj.getAsJsonPrimitive(COLUMN_TYPE).getAsString();
- OTSColumn.OTSColumnType columnType = OTSColumn.OTSColumnType.valueOf(strColumnType);
-
- switch(columnType) {
- case CONST:
- return deserializeConstColumn(obj);
- case NORMAL:
- return deserializeNormarlColumn(obj);
- default:
- throw new IllegalArgumentException("Unsupport deserialize the type : " + columnType + "");
- }
- }
-}
diff --git a/otsreader/src/main/java/com/alibaba/datax/plugin/reader/otsreader/adaptor/PrimaryKeyValueAdaptor.java b/otsreader/src/main/java/com/alibaba/datax/plugin/reader/otsreader/adaptor/PrimaryKeyValueAdaptor.java
index 1a49ea47..240427ae 100644
--- a/otsreader/src/main/java/com/alibaba/datax/plugin/reader/otsreader/adaptor/PrimaryKeyValueAdaptor.java
+++ b/otsreader/src/main/java/com/alibaba/datax/plugin/reader/otsreader/adaptor/PrimaryKeyValueAdaptor.java
@@ -1,18 +1,12 @@
package com.alibaba.datax.plugin.reader.otsreader.adaptor;
-import java.lang.reflect.Type;
+import com.alicloud.openservices.tablestore.model.ColumnType;
+import com.alicloud.openservices.tablestore.model.PrimaryKeyType;
+import com.alicloud.openservices.tablestore.model.PrimaryKeyValue;
+import com.google.gson.*;
+import org.apache.commons.codec.binary.Base64;
-import com.aliyun.openservices.ots.model.ColumnType;
-import com.aliyun.openservices.ots.model.PrimaryKeyType;
-import com.aliyun.openservices.ots.model.PrimaryKeyValue;
-import com.google.gson.JsonDeserializationContext;
-import com.google.gson.JsonDeserializer;
-import com.google.gson.JsonElement;
-import com.google.gson.JsonObject;
-import com.google.gson.JsonParseException;
-import com.google.gson.JsonPrimitive;
-import com.google.gson.JsonSerializationContext;
-import com.google.gson.JsonSerializer;
+import java.lang.reflect.Type;
/**
* {"type":"INF_MIN", "value":""}
@@ -31,27 +25,29 @@ public class PrimaryKeyValueAdaptor implements JsonDeserializer
JsonSerializationContext c) {
JsonObject json = new JsonObject();
- if (obj == PrimaryKeyValue.INF_MIN) {
+ if (obj.isInfMin()) {
json.add(TYPE, new JsonPrimitive(INF_MIN));
- json.add(VALUE, new JsonPrimitive(""));
return json;
}
- if (obj == PrimaryKeyValue.INF_MAX) {
+ if (obj.isInfMax()) {
json.add(TYPE, new JsonPrimitive(INF_MAX));
- json.add(VALUE, new JsonPrimitive(""));
return json;
}
switch (obj.getType()) {
case STRING :
- json.add(TYPE, new JsonPrimitive(ColumnType.STRING.toString()));
+ json.add(TYPE, new JsonPrimitive(ColumnType.STRING.toString()));
json.add(VALUE, new JsonPrimitive(obj.asString()));
break;
case INTEGER :
json.add(TYPE, new JsonPrimitive(ColumnType.INTEGER.toString()));
json.add(VALUE, new JsonPrimitive(obj.asLong()));
break;
+ case BINARY :
+ json.add(TYPE, new JsonPrimitive(ColumnType.BINARY.toString()));
+ json.add(VALUE, new JsonPrimitive(Base64.encodeBase64String(obj.asBinary())));
+ break;
default:
throw new IllegalArgumentException("Unsupport serialize the type : " + obj.getType() + "");
}
@@ -64,16 +60,17 @@ public class PrimaryKeyValueAdaptor implements JsonDeserializer
JsonObject obj = ele.getAsJsonObject();
String strType = obj.getAsJsonPrimitive(TYPE).getAsString();
- JsonPrimitive jsonValue = obj.getAsJsonPrimitive(VALUE);
- if (strType.equals(INF_MIN)) {
+ if (strType.equalsIgnoreCase(INF_MIN)) {
return PrimaryKeyValue.INF_MIN;
}
- if (strType.equals(INF_MAX)) {
+ if (strType.equalsIgnoreCase(INF_MAX)) {
return PrimaryKeyValue.INF_MAX;
}
+ JsonPrimitive jsonValue = obj.getAsJsonPrimitive(VALUE);
+
PrimaryKeyValue value = null;
PrimaryKeyType type = PrimaryKeyType.valueOf(strType);
switch(type) {
@@ -83,6 +80,9 @@ public class PrimaryKeyValueAdaptor implements JsonDeserializer
case INTEGER :
value = PrimaryKeyValue.fromLong(jsonValue.getAsLong());
break;
+ case BINARY :
+ value = PrimaryKeyValue.fromBinary(Base64.decodeBase64(jsonValue.getAsString()));
+ break;
default:
throw new IllegalArgumentException("Unsupport deserialize the type : " + type + "");
}
diff --git a/otsreader/src/main/java/com/alibaba/datax/plugin/reader/otsreader/callable/GetFirstRowPrimaryKeyCallable.java b/otsreader/src/main/java/com/alibaba/datax/plugin/reader/otsreader/callable/GetFirstRowPrimaryKeyCallable.java
index f004c0ff..cdcae91a 100644
--- a/otsreader/src/main/java/com/alibaba/datax/plugin/reader/otsreader/callable/GetFirstRowPrimaryKeyCallable.java
+++ b/otsreader/src/main/java/com/alibaba/datax/plugin/reader/otsreader/callable/GetFirstRowPrimaryKeyCallable.java
@@ -1,53 +1,42 @@
package com.alibaba.datax.plugin.reader.otsreader.callable;
+import com.alicloud.openservices.tablestore.SyncClientInterface;
+import com.alicloud.openservices.tablestore.model.*;
+
+import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.concurrent.Callable;
-import com.aliyun.openservices.ots.OTSClient;
-import com.aliyun.openservices.ots.model.ColumnType;
-import com.aliyun.openservices.ots.model.ColumnValue;
-import com.aliyun.openservices.ots.model.GetRangeRequest;
-import com.aliyun.openservices.ots.model.GetRangeResult;
-import com.aliyun.openservices.ots.model.PrimaryKeyType;
-import com.aliyun.openservices.ots.model.PrimaryKeyValue;
-import com.aliyun.openservices.ots.model.RangeRowQueryCriteria;
-import com.aliyun.openservices.ots.model.Row;
-import com.aliyun.openservices.ots.model.RowPrimaryKey;
-import com.aliyun.openservices.ots.model.TableMeta;
+public class GetFirstRowPrimaryKeyCallable implements Callable> {
-public class GetFirstRowPrimaryKeyCallable implements Callable{
-
- private OTSClient ots = null;
+ private SyncClientInterface ots = null;
private TableMeta meta = null;
private RangeRowQueryCriteria criteria = null;
-
- public GetFirstRowPrimaryKeyCallable(OTSClient ots, TableMeta meta, RangeRowQueryCriteria criteria) {
+
+ public GetFirstRowPrimaryKeyCallable(SyncClientInterface ots, TableMeta meta, RangeRowQueryCriteria criteria) {
this.ots = ots;
this.meta = meta;
this.criteria = criteria;
}
-
+
@Override
- public RowPrimaryKey call() throws Exception {
- RowPrimaryKey ret = new RowPrimaryKey();
+ public List call() throws Exception {
+ List ret = new ArrayList<>();
GetRangeRequest request = new GetRangeRequest();
request.setRangeRowQueryCriteria(criteria);
- GetRangeResult result = ots.getRange(request);
- List rows = result.getRows();
- if(rows.isEmpty()) {
+ GetRangeResponse response = ots.getRange(request);
+ List rows = response.getRows();
+ if (rows.isEmpty()) {
return null;// no data
- }
+ }
Row row = rows.get(0);
- Map pk = meta.getPrimaryKey();
- for (String key:pk.keySet()) {
- ColumnValue v = row.getColumns().get(key);
- if (v.getType() == ColumnType.INTEGER) {
- ret.addPrimaryKeyColumn(key, PrimaryKeyValue.fromLong(v.asLong()));
- } else {
- ret.addPrimaryKeyColumn(key, PrimaryKeyValue.fromString(v.asString()));
- }
+ Map pk = meta.getPrimaryKeyMap();
+
+ for (String key : pk.keySet()) {
+ PrimaryKeyColumn v = row.getPrimaryKey().getPrimaryKeyColumnsMap().get(key);
+ ret.add(v);
}
return ret;
}
diff --git a/otsreader/src/main/java/com/alibaba/datax/plugin/reader/otsreader/callable/GetRangeCallable.java b/otsreader/src/main/java/com/alibaba/datax/plugin/reader/otsreader/callable/GetRangeCallable.java
index 2cd1398a..995d491c 100644
--- a/otsreader/src/main/java/com/alibaba/datax/plugin/reader/otsreader/callable/GetRangeCallable.java
+++ b/otsreader/src/main/java/com/alibaba/datax/plugin/reader/otsreader/callable/GetRangeCallable.java
@@ -1,35 +1,26 @@
package com.alibaba.datax.plugin.reader.otsreader.callable;
+import com.alicloud.openservices.tablestore.SyncClientInterface;
+import com.alicloud.openservices.tablestore.model.GetRangeRequest;
+import com.alicloud.openservices.tablestore.model.GetRangeResponse;
+import com.alicloud.openservices.tablestore.model.RangeRowQueryCriteria;
+
import java.util.concurrent.Callable;
-import com.aliyun.openservices.ots.OTSClientAsync;
-import com.aliyun.openservices.ots.model.GetRangeRequest;
-import com.aliyun.openservices.ots.model.GetRangeResult;
-import com.aliyun.openservices.ots.model.OTSFuture;
-import com.aliyun.openservices.ots.model.RangeRowQueryCriteria;
-
-public class GetRangeCallable implements Callable {
+public class GetRangeCallable implements Callable {
- private OTSClientAsync ots;
+ private SyncClientInterface ots;
private RangeRowQueryCriteria criteria;
- private OTSFuture future;
- public GetRangeCallable(OTSClientAsync ots, RangeRowQueryCriteria criteria, OTSFuture future) {
+ public GetRangeCallable(SyncClientInterface ots, RangeRowQueryCriteria criteria) {
this.ots = ots;
this.criteria = criteria;
- this.future = future;
}
@Override
- public GetRangeResult call() throws Exception {
- try {
- return future.get();
- } catch (Exception e) {
- GetRangeRequest request = new GetRangeRequest();
- request.setRangeRowQueryCriteria(criteria);
- future = ots.getRange(request);
- throw e;
- }
+ public GetRangeResponse call() throws Exception {
+ GetRangeRequest request = new GetRangeRequest();
+ request.setRangeRowQueryCriteria(criteria);
+ return ots.getRange(request);
}
-
-}
+}
\ No newline at end of file
diff --git a/otsreader/src/main/java/com/alibaba/datax/plugin/reader/otsreader/callable/GetRangeCallableOld.java b/otsreader/src/main/java/com/alibaba/datax/plugin/reader/otsreader/callable/GetRangeCallableOld.java
new file mode 100644
index 00000000..c0434126
--- /dev/null
+++ b/otsreader/src/main/java/com/alibaba/datax/plugin/reader/otsreader/callable/GetRangeCallableOld.java
@@ -0,0 +1,35 @@
+package com.alibaba.datax.plugin.reader.otsreader.callable;
+
+import java.util.concurrent.Callable;
+
+import com.aliyun.openservices.ots.OTSClientAsync;
+import com.aliyun.openservices.ots.model.GetRangeRequest;
+import com.aliyun.openservices.ots.model.GetRangeResult;
+import com.aliyun.openservices.ots.model.OTSFuture;
+import com.aliyun.openservices.ots.model.RangeRowQueryCriteria;
+
+public class GetRangeCallableOld implements Callable {
+
+ private OTSClientAsync ots;
+ private RangeRowQueryCriteria criteria;
+ private OTSFuture future;
+
+ public GetRangeCallableOld(OTSClientAsync ots, RangeRowQueryCriteria criteria, OTSFuture future) {
+ this.ots = ots;
+ this.criteria = criteria;
+ this.future = future;
+ }
+
+ @Override
+ public GetRangeResult call() throws Exception {
+ try {
+ return future.get();
+ } catch (Exception e) {
+ GetRangeRequest request = new GetRangeRequest();
+ request.setRangeRowQueryCriteria(criteria);
+ future = ots.getRange(request);
+ throw e;
+ }
+ }
+
+}
diff --git a/otsreader/src/main/java/com/alibaba/datax/plugin/reader/otsreader/callable/GetTableMetaCallable.java b/otsreader/src/main/java/com/alibaba/datax/plugin/reader/otsreader/callable/GetTableMetaCallable.java
index 2884e12b..36a122c2 100644
--- a/otsreader/src/main/java/com/alibaba/datax/plugin/reader/otsreader/callable/GetTableMetaCallable.java
+++ b/otsreader/src/main/java/com/alibaba/datax/plugin/reader/otsreader/callable/GetTableMetaCallable.java
@@ -1,18 +1,19 @@
package com.alibaba.datax.plugin.reader.otsreader.callable;
+import com.alicloud.openservices.tablestore.SyncClientInterface;
+import com.alicloud.openservices.tablestore.model.DescribeTableRequest;
+import com.alicloud.openservices.tablestore.model.DescribeTableResponse;
+import com.alicloud.openservices.tablestore.model.TableMeta;
+
import java.util.concurrent.Callable;
-import com.aliyun.openservices.ots.OTSClient;
-import com.aliyun.openservices.ots.model.DescribeTableRequest;
-import com.aliyun.openservices.ots.model.DescribeTableResult;
-import com.aliyun.openservices.ots.model.TableMeta;
public class GetTableMetaCallable implements Callable{
- private OTSClient ots = null;
+ private SyncClientInterface ots = null;
private String tableName = null;
- public GetTableMetaCallable(OTSClient ots, String tableName) {
+ public GetTableMetaCallable(SyncClientInterface ots, String tableName) {
this.ots = ots;
this.tableName = tableName;
}
@@ -21,9 +22,9 @@ public class GetTableMetaCallable implements Callable{
public TableMeta call() throws Exception {
DescribeTableRequest describeTableRequest = new DescribeTableRequest();
describeTableRequest.setTableName(tableName);
- DescribeTableResult result = ots.describeTable(describeTableRequest);
+ DescribeTableResponse result = ots.describeTable(describeTableRequest);
TableMeta tableMeta = result.getTableMeta();
return tableMeta;
}
-}
+}
\ No newline at end of file
diff --git a/otsreader/src/main/java/com/alibaba/datax/plugin/reader/otsreader/callable/GetTimeseriesSplitCallable.java b/otsreader/src/main/java/com/alibaba/datax/plugin/reader/otsreader/callable/GetTimeseriesSplitCallable.java
new file mode 100644
index 00000000..96521c41
--- /dev/null
+++ b/otsreader/src/main/java/com/alibaba/datax/plugin/reader/otsreader/callable/GetTimeseriesSplitCallable.java
@@ -0,0 +1,38 @@
+package com.alibaba.datax.plugin.reader.otsreader.callable;
+
+import com.alicloud.openservices.tablestore.SyncClient;
+import com.alicloud.openservices.tablestore.SyncClientInterface;
+import com.alicloud.openservices.tablestore.TimeseriesClient;
+import com.alicloud.openservices.tablestore.model.timeseries.SplitTimeseriesScanTaskRequest;
+import com.alicloud.openservices.tablestore.model.timeseries.SplitTimeseriesScanTaskResponse;
+import com.alicloud.openservices.tablestore.model.timeseries.TimeseriesScanSplitInfo;
+
+import java.util.List;
+import java.util.concurrent.Callable;
+
+public class GetTimeseriesSplitCallable implements Callable> {
+
+ private TimeseriesClient client = null;
+ private String timeseriesTableName = null;
+ private String measurementName = null;
+ private int splitCountHint = 1;
+
+
+ public GetTimeseriesSplitCallable(SyncClientInterface ots, String timeseriesTableName, String measurementName, int splitCountHint) {
+ this.client = ((SyncClient) ots).asTimeseriesClient();
+ this.timeseriesTableName = timeseriesTableName;
+ this.measurementName = measurementName;
+ this.splitCountHint = splitCountHint;
+ }
+
+ @Override
+ public List call() throws Exception {
+ SplitTimeseriesScanTaskRequest request = new SplitTimeseriesScanTaskRequest(timeseriesTableName, splitCountHint);
+ if (measurementName.length() != 0) {
+ request.setMeasurementName(measurementName);
+ }
+
+ SplitTimeseriesScanTaskResponse response = client.splitTimeseriesScanTask(request);
+ return response.getSplitInfos();
+ }
+}
diff --git a/otsreader/src/main/java/com/alibaba/datax/plugin/reader/otsreader/callable/ScanTimeseriesDataCallable.java b/otsreader/src/main/java/com/alibaba/datax/plugin/reader/otsreader/callable/ScanTimeseriesDataCallable.java
new file mode 100644
index 00000000..726d0e5d
--- /dev/null
+++ b/otsreader/src/main/java/com/alibaba/datax/plugin/reader/otsreader/callable/ScanTimeseriesDataCallable.java
@@ -0,0 +1,27 @@
+package com.alibaba.datax.plugin.reader.otsreader.callable;
+
+import com.alicloud.openservices.tablestore.SyncClient;
+import com.alicloud.openservices.tablestore.SyncClientInterface;
+import com.alicloud.openservices.tablestore.TimeseriesClient;
+import com.alicloud.openservices.tablestore.model.timeseries.ScanTimeseriesDataRequest;
+import com.alicloud.openservices.tablestore.model.timeseries.ScanTimeseriesDataResponse;
+import com.alicloud.openservices.tablestore.model.timeseries.TimeseriesScanSplitInfo;
+
+import java.util.List;
+import java.util.concurrent.Callable;
+
+public class ScanTimeseriesDataCallable implements Callable {
+
+ private TimeseriesClient client = null;
+ private ScanTimeseriesDataRequest request = null;
+
+ public ScanTimeseriesDataCallable(SyncClientInterface ots, ScanTimeseriesDataRequest scanTimeseriesDataRequest){
+ this.client = ((SyncClient) ots).asTimeseriesClient();
+ this.request = scanTimeseriesDataRequest;
+ }
+
+ @Override
+ public ScanTimeseriesDataResponse call() throws Exception {
+ return client.scanTimeseriesData(request);
+ }
+}
diff --git a/otsreader/src/main/java/com/alibaba/datax/plugin/reader/otsreader/model/DefaultNoRetry.java b/otsreader/src/main/java/com/alibaba/datax/plugin/reader/otsreader/model/DefaultNoRetry.java
new file mode 100644
index 00000000..b286472d
--- /dev/null
+++ b/otsreader/src/main/java/com/alibaba/datax/plugin/reader/otsreader/model/DefaultNoRetry.java
@@ -0,0 +1,32 @@
+package com.alibaba.datax.plugin.reader.otsreader.model;
+
+
+import com.alicloud.openservices.tablestore.model.DefaultRetryStrategy;
+import com.alicloud.openservices.tablestore.model.RetryStrategy;
+
+public class DefaultNoRetry extends DefaultRetryStrategy {
+
+ public DefaultNoRetry() {
+ super();
+ }
+
+ @Override
+ public RetryStrategy clone() {
+ return super.clone();
+ }
+
+ @Override
+ public int getRetries() {
+ return super.getRetries();
+ }
+
+ @Override
+ public boolean shouldRetry(String action, Exception ex) {
+ return false;
+ }
+
+ @Override
+ public long nextPause(String action, Exception ex) {
+ return super.nextPause(action, ex);
+ }
+}
\ No newline at end of file
diff --git a/otsreader/src/main/java/com/alibaba/datax/plugin/reader/otsreader/model/OTSColumn.java b/otsreader/src/main/java/com/alibaba/datax/plugin/reader/otsreader/model/OTSColumn.java
index 129ccd2f..809f4c38 100644
--- a/otsreader/src/main/java/com/alibaba/datax/plugin/reader/otsreader/model/OTSColumn.java
+++ b/otsreader/src/main/java/com/alibaba/datax/plugin/reader/otsreader/model/OTSColumn.java
@@ -1,19 +1,18 @@
package com.alibaba.datax.plugin.reader.otsreader.model;
-import com.alibaba.datax.common.element.BoolColumn;
-import com.alibaba.datax.common.element.BytesColumn;
-import com.alibaba.datax.common.element.Column;
-import com.alibaba.datax.common.element.DoubleColumn;
-import com.alibaba.datax.common.element.LongColumn;
-import com.alibaba.datax.common.element.StringColumn;
-import com.aliyun.openservices.ots.model.ColumnType;
+import com.alibaba.datax.common.element.*;
+import com.alicloud.openservices.tablestore.model.ColumnType;
public class OTSColumn {
private String name;
private Column value;
+
private OTSColumnType columnType;
+
+ // 时序数据column配置
private ColumnType valueType;
-
+ private Boolean isTimeseriesTag;
+
public static enum OTSColumnType {
NORMAL, // 普通列
CONST // 常量列
@@ -24,10 +23,9 @@ public class OTSColumn {
this.columnType = OTSColumnType.NORMAL;
}
- private OTSColumn(Column value, ColumnType type) {
+ private OTSColumn(Column value) {
this.value = value;
this.columnType = OTSColumnType.CONST;
- this.valueType = type;
}
public static OTSColumn fromNormalColumn(String name) {
@@ -39,23 +37,23 @@ public class OTSColumn {
}
public static OTSColumn fromConstStringColumn(String value) {
- return new OTSColumn(new StringColumn(value), ColumnType.STRING);
+ return new OTSColumn(new StringColumn(value));
}
public static OTSColumn fromConstIntegerColumn(long value) {
- return new OTSColumn(new LongColumn(value), ColumnType.INTEGER);
+ return new OTSColumn(new LongColumn(value));
}
public static OTSColumn fromConstDoubleColumn(double value) {
- return new OTSColumn(new DoubleColumn(value), ColumnType.DOUBLE);
+ return new OTSColumn(new DoubleColumn(value));
}
public static OTSColumn fromConstBoolColumn(boolean value) {
- return new OTSColumn(new BoolColumn(value), ColumnType.BOOLEAN);
+ return new OTSColumn(new BoolColumn(value));
}
public static OTSColumn fromConstBytesColumn(byte[] value) {
- return new OTSColumn(new BytesColumn(value), ColumnType.BINARY);
+ return new OTSColumn(new BytesColumn(value));
}
public Column getValue() {
@@ -65,12 +63,25 @@ public class OTSColumn {
public OTSColumnType getColumnType() {
return columnType;
}
-
- public ColumnType getValueType() {
- return valueType;
- }
+
public String getName() {
return name;
}
-}
+
+ public ColumnType getValueType() {
+ return valueType;
+ }
+
+ public void setValueType(ColumnType valueType) {
+ this.valueType = valueType;
+ }
+
+ public Boolean getTimeseriesTag() {
+ return isTimeseriesTag;
+ }
+
+ public void setTimeseriesTag(Boolean timeseriesTag) {
+ isTimeseriesTag = timeseriesTag;
+ }
+}
\ No newline at end of file
diff --git a/otsreader/src/main/java/com/alibaba/datax/plugin/reader/otsreader/model/OTSConf.java b/otsreader/src/main/java/com/alibaba/datax/plugin/reader/otsreader/model/OTSConf.java
index 8b109a39..cbfd8f6a 100644
--- a/otsreader/src/main/java/com/alibaba/datax/plugin/reader/otsreader/model/OTSConf.java
+++ b/otsreader/src/main/java/com/alibaba/datax/plugin/reader/otsreader/model/OTSConf.java
@@ -1,90 +1,245 @@
package com.alibaba.datax.plugin.reader.otsreader.model;
+import com.alibaba.datax.common.element.Column;
+import com.alibaba.datax.common.util.Configuration;
+import com.alibaba.datax.plugin.reader.otsreader.utils.Constant;
+import com.alibaba.datax.plugin.reader.otsreader.utils.Key;
+import com.alibaba.datax.plugin.reader.otsreader.utils.ParamChecker;
+import com.alicloud.openservices.tablestore.model.ColumnType;
+
import java.util.List;
-import com.aliyun.openservices.ots.model.PrimaryKeyValue;
-
public class OTSConf {
- private String endpoint= null;
+ private String endpoint = null;
private String accessId = null;
- private String accesskey = null;
+ private String accessKey = null;
private String instanceName = null;
private String tableName = null;
+ private OTSRange range = null;
+ private List column = null;
+ private OTSMode mode = null;
+
+ @Deprecated
+ private String metaMode = "";
+
+ private boolean newVersion = false;
+ /**
+ * 以下配置仅用于timeseries数据读取
+ */
+ private boolean isTimeseriesTable = false;
+ private String measurementName = null;
+ /**
+ * 以上配置仅用于timeseries数据读取
+ */
+ private OTSMultiVersionConf multi = null;
- private List rangeBegin = null;
- private List rangeEnd = null;
- private List rangeSplit = null;
-
- private List columns = null;
-
- private int retry;
- private int sleepInMilliSecond;
-
+ private int retry = Constant.ConfigDefaultValue.RETRY;
+ private int retryPauseInMillisecond = Constant.ConfigDefaultValue.RETRY_PAUSE_IN_MILLISECOND;
+ private int ioThreadCount = Constant.ConfigDefaultValue.IO_THREAD_COUNT;
+ private int maxConnectionCount = Constant.ConfigDefaultValue.MAX_CONNECTION_COUNT;
+ private int socketTimeoutInMillisecond = Constant.ConfigDefaultValue.SOCKET_TIMEOUT_IN_MILLISECOND;
+ private int connectTimeoutInMillisecond = Constant.ConfigDefaultValue.CONNECT_TIMEOUT_IN_MILLISECOND;
+
+ public int getIoThreadCount() {
+ return ioThreadCount;
+ }
+
+ public void setIoThreadCount(int ioThreadCount) {
+ this.ioThreadCount = ioThreadCount;
+ }
+
+ public int getMaxConnectCount() {
+ return maxConnectionCount;
+ }
+
+ public void setMaxConnectCount(int maxConnectCount) {
+ this.maxConnectionCount = maxConnectCount;
+ }
+
+ public int getSocketTimeoutInMillisecond() {
+ return socketTimeoutInMillisecond;
+ }
+
+ public void setSocketTimeoutInMillisecond(int socketTimeoutInMillisecond) {
+ this.socketTimeoutInMillisecond = socketTimeoutInMillisecond;
+ }
+
+ public int getConnectTimeoutInMillisecond() {
+ return connectTimeoutInMillisecond;
+ }
+
+ public void setConnectTimeoutInMillisecond(int connectTimeoutInMillisecond) {
+ this.connectTimeoutInMillisecond = connectTimeoutInMillisecond;
+ }
+
+ public int getRetry() {
+ return retry;
+ }
+
+ public void setRetry(int retry) {
+ this.retry = retry;
+ }
+
+ public int getRetryPauseInMillisecond() {
+ return retryPauseInMillisecond;
+ }
+
+ public void setRetryPauseInMillisecond(int sleepInMillisecond) {
+ this.retryPauseInMillisecond = sleepInMillisecond;
+ }
+
public String getEndpoint() {
return endpoint;
}
+
public void setEndpoint(String endpoint) {
this.endpoint = endpoint;
}
+
public String getAccessId() {
return accessId;
}
+
public void setAccessId(String accessId) {
this.accessId = accessId;
}
- public String getAccesskey() {
- return accesskey;
+
+ public String getAccessKey() {
+ return accessKey;
}
- public void setAccesskey(String accesskey) {
- this.accesskey = accesskey;
+
+ public void setAccessKey(String accessKey) {
+ this.accessKey = accessKey;
}
+
public String getInstanceName() {
return instanceName;
}
+
public void setInstanceName(String instanceName) {
this.instanceName = instanceName;
}
+
public String getTableName() {
return tableName;
}
+
public void setTableName(String tableName) {
this.tableName = tableName;
}
- public List getColumns() {
- return columns;
+ public OTSRange getRange() {
+ return range;
}
- public void setColumns(List columns) {
- this.columns = columns;
+
+ public void setRange(OTSRange range) {
+ this.range = range;
}
- public int getRetry() {
- return retry;
+
+ public OTSMode getMode() {
+ return mode;
}
- public void setRetry(int retry) {
- this.retry = retry;
+
+ public void setMode(OTSMode mode) {
+ this.mode = mode;
}
- public int getSleepInMilliSecond() {
- return sleepInMilliSecond;
+
+ public OTSMultiVersionConf getMulti() {
+ return multi;
}
- public void setSleepInMilliSecond(int sleepInMilliSecond) {
- this.sleepInMilliSecond = sleepInMilliSecond;
+
+ public void setMulti(OTSMultiVersionConf multi) {
+ this.multi = multi;
}
- public List getRangeBegin() {
- return rangeBegin;
+
+ public List getColumn() {
+ return column;
}
- public void setRangeBegin(List rangeBegin) {
- this.rangeBegin = rangeBegin;
+
+ public void setColumn(List column) {
+ this.column = column;
}
- public List getRangeEnd() {
- return rangeEnd;
+
+ public boolean isNewVersion() {
+ return newVersion;
}
- public void setRangeEnd(List rangeEnd) {
- this.rangeEnd = rangeEnd;
+
+ public void setNewVersion(boolean newVersion) {
+ this.newVersion = newVersion;
}
- public List getRangeSplit() {
- return rangeSplit;
+
+ @Deprecated
+ public String getMetaMode() {
+ return metaMode;
}
- public void setRangeSplit(List rangeSplit) {
- this.rangeSplit = rangeSplit;
+
+ @Deprecated
+ public void setMetaMode(String metaMode) {
+ this.metaMode = metaMode;
+ }
+
+ public boolean isTimeseriesTable() {
+ return isTimeseriesTable;
+ }
+
+ public void setTimeseriesTable(boolean timeseriesTable) {
+ isTimeseriesTable = timeseriesTable;
+ }
+
+ public String getMeasurementName() {
+ return measurementName;
+ }
+
+ public void setMeasurementName(String measurementName) {
+ this.measurementName = measurementName;
+ }
+
+ public static OTSConf load(Configuration param) throws OTSCriticalException {
+ OTSConf c = new OTSConf();
+
+ // account
+ c.setEndpoint(ParamChecker.checkStringAndGet(param, Key.OTS_ENDPOINT, true));
+ c.setAccessId(ParamChecker.checkStringAndGet(param, Key.OTS_ACCESSID, true));
+ c.setAccessKey(ParamChecker.checkStringAndGet(param, Key.OTS_ACCESSKEY, true));
+ c.setInstanceName(ParamChecker.checkStringAndGet(param, Key.OTS_INSTANCE_NAME, true));
+ c.setTableName(ParamChecker.checkStringAndGet(param, Key.TABLE_NAME, true));
+
+ c.setRetry(param.getInt(Constant.ConfigKey.RETRY, Constant.ConfigDefaultValue.RETRY));
+ c.setRetryPauseInMillisecond(param.getInt(Constant.ConfigKey.RETRY_PAUSE_IN_MILLISECOND, Constant.ConfigDefaultValue.RETRY_PAUSE_IN_MILLISECOND));
+ c.setIoThreadCount(param.getInt(Constant.ConfigKey.IO_THREAD_COUNT, Constant.ConfigDefaultValue.IO_THREAD_COUNT));
+ c.setMaxConnectCount(param.getInt(Constant.ConfigKey.MAX_CONNECTION_COUNT, Constant.ConfigDefaultValue.MAX_CONNECTION_COUNT));
+ c.setSocketTimeoutInMillisecond(param.getInt(Constant.ConfigKey.SOCKET_TIMEOUTIN_MILLISECOND, Constant.ConfigDefaultValue.SOCKET_TIMEOUT_IN_MILLISECOND));
+ c.setConnectTimeoutInMillisecond(param.getInt(Constant.ConfigKey.CONNECT_TIMEOUT_IN_MILLISECOND, Constant.ConfigDefaultValue.CONNECT_TIMEOUT_IN_MILLISECOND));
+
+ // range
+ c.setRange(ParamChecker.checkRangeAndGet(param));
+
+ // mode 可选参数
+ c.setMode(ParamChecker.checkModeAndGet(param));
+ //isNewVersion 可选参数
+ c.setNewVersion(param.getBool(Key.NEW_VERSION, false));
+ // metaMode 旧版本配置
+ c.setMetaMode(param.getString(Key.META_MODE, ""));
+
+
+
+ // 读时序表配置项
+ c.setTimeseriesTable(param.getBool(Key.IS_TIMESERIES_TABLE, false));
+ // column
+ if(!c.isTimeseriesTable()){
+ //非时序表
+ c.setColumn(ParamChecker.checkOTSColumnAndGet(param, c.getMode()));
+ }
+ else{
+ // 时序表
+ c.setMeasurementName(param.getString(Key.MEASUREMENT_NAME, ""));
+ c.setColumn(ParamChecker.checkTimeseriesColumnAndGet(param));
+ ParamChecker.checkTimeseriesMode(c.getMode(), c.isNewVersion());
+ }
+
+ if (c.getMode() == OTSMode.MULTI_VERSION) {
+ c.setMulti(OTSMultiVersionConf.load(param));
+ }
+ return c;
}
}
diff --git a/otsreader/src/main/java/com/alibaba/datax/plugin/reader/otsreader/model/OTSCriticalException.java b/otsreader/src/main/java/com/alibaba/datax/plugin/reader/otsreader/model/OTSCriticalException.java
new file mode 100644
index 00000000..f02346bc
--- /dev/null
+++ b/otsreader/src/main/java/com/alibaba/datax/plugin/reader/otsreader/model/OTSCriticalException.java
@@ -0,0 +1,24 @@
+package com.alibaba.datax.plugin.reader.otsreader.model;
+
+/**
+ * 插件错误异常,该异常主要用于描述插件的异常退出
+ * @author redchen
+ */
+public class OTSCriticalException extends Exception{
+
+ private static final long serialVersionUID = 5820460098894295722L;
+
+ public OTSCriticalException() {}
+
+ public OTSCriticalException(String message) {
+ super(message);
+ }
+
+ public OTSCriticalException(Throwable a) {
+ super(a);
+ }
+
+ public OTSCriticalException(String message, Throwable a) {
+ super(message, a);
+ }
+}
diff --git a/otsreader/src/main/java/com/alibaba/datax/plugin/reader/otsreader/model/OTSErrorCode.java b/otsreader/src/main/java/com/alibaba/datax/plugin/reader/otsreader/model/OTSErrorCode.java
new file mode 100644
index 00000000..0c537fce
--- /dev/null
+++ b/otsreader/src/main/java/com/alibaba/datax/plugin/reader/otsreader/model/OTSErrorCode.java
@@ -0,0 +1,115 @@
+/**
+ * Copyright (C) Alibaba Cloud Computing
+ * All rights reserved.
+ *
+ * 版权所有 (C)阿里云计算有限公司
+ */
+
+package com.alibaba.datax.plugin.reader.otsreader.model;
+
+/**
+ * 表示来自开放结构化数据服务(Open Table Service,OTS)的错误代码。
+ *
+ */
+public class OTSErrorCode {
+ /**
+ * 用户身份验证失败。
+ */
+ public static final String AUTHORIZATION_FAILURE = "OTSAuthFailed";
+
+ /**
+ * 服务器内部错误。
+ */
+ public static final String INTERNAL_SERVER_ERROR = "OTSInternalServerError";
+
+ /**
+ * 参数错误。
+ */
+ public static final String INVALID_PARAMETER = "OTSParameterInvalid";
+
+ /**
+ * 整个请求过大。
+ */
+ public static final String REQUEST_TOO_LARGE = "OTSRequestBodyTooLarge";
+
+ /**
+ * 客户端请求超时。
+ */
+ public static final String REQUEST_TIMEOUT = "OTSRequestTimeout";
+
+ /**
+ * 用户的配额已经用满。
+ */
+ public static final String QUOTA_EXHAUSTED = "OTSQuotaExhausted";
+
+ /**
+ * 内部服务器发生failover,导致表的部分分区不可服务。
+ */
+ public static final String PARTITION_UNAVAILABLE = "OTSPartitionUnavailable";
+
+ /**
+ * 表刚被创建还无法立马提供服务。
+ */
+ public static final String TABLE_NOT_READY = "OTSTableNotReady";
+
+ /**
+ * 请求的表不存在。
+ */
+ public static final String OBJECT_NOT_EXIST = "OTSObjectNotExist";
+
+ /**
+ * 请求创建的表已经存在。
+ */
+ public static final String OBJECT_ALREADY_EXIST = "OTSObjectAlreadyExist";
+
+ /**
+ * 多个并发的请求写同一行数据,导致冲突。
+ */
+ public static final String ROW_OPEARTION_CONFLICT = "OTSRowOperationConflict";
+
+ /**
+ * 主键不匹配。
+ */
+ public static final String INVALID_PK = "OTSInvalidPK";
+
+ /**
+ * 读写能力调整过于频繁。
+ */
+ public static final String TOO_FREQUENT_RESERVED_THROUGHPUT_ADJUSTMENT = "OTSTooFrequentReservedThroughputAdjustment";
+
+ /**
+ * 该行总列数超出限制。
+ */
+ public static final String OUT_OF_COLUMN_COUNT_LIMIT = "OTSOutOfColumnCountLimit";
+
+ /**
+ * 该行所有列数据大小总和超出限制。
+ */
+ public static final String OUT_OF_ROW_SIZE_LIMIT = "OTSOutOfRowSizeLimit";
+
+ /**
+ * 剩余预留读写能力不足。
+ */
+ public static final String NOT_ENOUGH_CAPACITY_UNIT = "OTSNotEnoughCapacityUnit";
+
+ /**
+ * 预查条件检查失败。
+ */
+ public static final String CONDITION_CHECK_FAIL = "OTSConditionCheckFail";
+
+ /**
+ * 在OTS内部操作超时。
+ */
+ public static final String STORAGE_TIMEOUT = "OTSTimeout";
+
+ /**
+ * 在OTS内部有服务器不可访问。
+ */
+ public static final String SERVER_UNAVAILABLE = "OTSServerUnavailable";
+
+ /**
+ * OTS内部服务器繁忙。
+ */
+ public static final String SERVER_BUSY = "OTSServerBusy";
+
+}
\ No newline at end of file
diff --git a/otsreader/src/main/java/com/alibaba/datax/plugin/reader/otsreader/model/OTSMode.java b/otsreader/src/main/java/com/alibaba/datax/plugin/reader/otsreader/model/OTSMode.java
new file mode 100644
index 00000000..88c6ee67
--- /dev/null
+++ b/otsreader/src/main/java/com/alibaba/datax/plugin/reader/otsreader/model/OTSMode.java
@@ -0,0 +1,6 @@
+package com.alibaba.datax.plugin.reader.otsreader.model;
+
+public enum OTSMode {
+ NORMAL,
+ MULTI_VERSION
+}
diff --git a/otsreader/src/main/java/com/alibaba/datax/plugin/reader/otsreader/model/OTSMultiVersionConf.java b/otsreader/src/main/java/com/alibaba/datax/plugin/reader/otsreader/model/OTSMultiVersionConf.java
new file mode 100644
index 00000000..72a8e1b7
--- /dev/null
+++ b/otsreader/src/main/java/com/alibaba/datax/plugin/reader/otsreader/model/OTSMultiVersionConf.java
@@ -0,0 +1,35 @@
+package com.alibaba.datax.plugin.reader.otsreader.model;
+
+import com.alibaba.datax.common.util.Configuration;
+import com.alibaba.datax.plugin.reader.otsreader.utils.Constant;
+import com.alibaba.datax.plugin.reader.otsreader.utils.ParamChecker;
+import com.alicloud.openservices.tablestore.model.TimeRange;
+
+public class OTSMultiVersionConf {
+
+ private TimeRange timeRange = null;
+ private int maxVersion = -1;
+
+ public TimeRange getTimeRange() {
+ return timeRange;
+ }
+
+ public void setTimeRange(TimeRange timeRange) {
+ this.timeRange = timeRange;
+ }
+
+ public int getMaxVersion() {
+ return maxVersion;
+ }
+
+ public void setMaxVersion(int maxVersion) {
+ this.maxVersion = maxVersion;
+ }
+
+ public static OTSMultiVersionConf load(Configuration param) throws OTSCriticalException {
+ OTSMultiVersionConf conf = new OTSMultiVersionConf();
+ conf.setTimeRange(ParamChecker.checkTimeRangeAndGet(param));
+ conf.setMaxVersion(param.getInt(Constant.ConfigKey.MAX_VERSION, Constant.ConfigDefaultValue.MAX_VERSION));
+ return conf;
+ }
+}
diff --git a/otsreader/src/main/java/com/alibaba/datax/plugin/reader/otsreader/model/OTSPrimaryKeyColumn.java b/otsreader/src/main/java/com/alibaba/datax/plugin/reader/otsreader/model/OTSPrimaryKeyColumn.java
index eaec50ce..44a37c0c 100644
--- a/otsreader/src/main/java/com/alibaba/datax/plugin/reader/otsreader/model/OTSPrimaryKeyColumn.java
+++ b/otsreader/src/main/java/com/alibaba/datax/plugin/reader/otsreader/model/OTSPrimaryKeyColumn.java
@@ -15,8 +15,41 @@ public class OTSPrimaryKeyColumn {
public PrimaryKeyType getType() {
return type;
}
+
+ public com.alicloud.openservices.tablestore.model.PrimaryKeyType getType(Boolean newVersion) {
+ com.alicloud.openservices.tablestore.model.PrimaryKeyType res = null;
+ switch (this.type){
+ case BINARY:
+ res = com.alicloud.openservices.tablestore.model.PrimaryKeyType.BINARY;
+ break;
+ case INTEGER:
+ res = com.alicloud.openservices.tablestore.model.PrimaryKeyType.INTEGER;
+ break;
+ case STRING:
+ default:
+ res = com.alicloud.openservices.tablestore.model.PrimaryKeyType.STRING;
+ break;
+ }
+ return res;
+ }
+
public void setType(PrimaryKeyType type) {
this.type = type;
}
+
+ public void setType(com.alicloud.openservices.tablestore.model.PrimaryKeyType type) {
+ switch (type){
+ case BINARY:
+ this.type = PrimaryKeyType.BINARY;
+ break;
+ case INTEGER:
+ this.type = PrimaryKeyType.INTEGER;
+ break;
+ case STRING:
+ default:
+ this.type = PrimaryKeyType.STRING;
+ break;
+ }
+ }
}
diff --git a/otsreader/src/main/java/com/alibaba/datax/plugin/reader/otsreader/model/OTSRange.java b/otsreader/src/main/java/com/alibaba/datax/plugin/reader/otsreader/model/OTSRange.java
index 8ebfcf7e..eb3095e6 100644
--- a/otsreader/src/main/java/com/alibaba/datax/plugin/reader/otsreader/model/OTSRange.java
+++ b/otsreader/src/main/java/com/alibaba/datax/plugin/reader/otsreader/model/OTSRange.java
@@ -1,29 +1,31 @@
package com.alibaba.datax.plugin.reader.otsreader.model;
-import com.aliyun.openservices.ots.model.RowPrimaryKey;
+import com.alicloud.openservices.tablestore.model.PrimaryKeyColumn;
+
+import java.util.List;
+
public class OTSRange {
+ private List begin = null;
+ private List end = null;
+ private List split = null;
- private RowPrimaryKey begin = null;
- private RowPrimaryKey end = null;
-
- public OTSRange() {}
-
- public OTSRange(RowPrimaryKey begin, RowPrimaryKey end) {
- this.begin = begin;
- this.end = end;
- }
-
- public RowPrimaryKey getBegin() {
+ public List getBegin() {
return begin;
}
- public void setBegin(RowPrimaryKey begin) {
+ public void setBegin(List begin) {
this.begin = begin;
}
- public RowPrimaryKey getEnd() {
+ public List getEnd() {
return end;
}
- public void setEnd(RowPrimaryKey end) {
+ public void setEnd(List end) {
this.end = end;
}
+ public List getSplit() {
+ return split;
+ }
+ public void setSplit(List split) {
+ this.split = split;
+ }
}
diff --git a/otsreader/src/main/java/com/alibaba/datax/plugin/reader/otsreader/utils/Common.java b/otsreader/src/main/java/com/alibaba/datax/plugin/reader/otsreader/utils/Common.java
index fb8c7feb..90065d5d 100644
--- a/otsreader/src/main/java/com/alibaba/datax/plugin/reader/otsreader/utils/Common.java
+++ b/otsreader/src/main/java/com/alibaba/datax/plugin/reader/otsreader/utils/Common.java
@@ -1,26 +1,85 @@
package com.alibaba.datax.plugin.reader.otsreader.utils;
+import com.alibaba.datax.plugin.reader.otsreader.model.OTSColumn;
+import com.alibaba.datax.plugin.reader.otsreader.model.OTSCriticalException;
+import com.alibaba.datax.plugin.reader.otsreader.model.OTSPrimaryKeyColumn;
+import com.alicloud.openservices.tablestore.model.*;
+import com.alicloud.openservices.tablestore.model.timeseries.ScanTimeseriesDataResponse;
+
+import java.lang.reflect.Field;
import java.util.ArrayList;
+import java.util.HashMap;
import java.util.List;
import java.util.Map;
-import com.alibaba.datax.common.element.BoolColumn;
-import com.alibaba.datax.common.element.BytesColumn;
-import com.alibaba.datax.common.element.DoubleColumn;
-import com.alibaba.datax.common.element.LongColumn;
-import com.alibaba.datax.common.element.Record;
-import com.alibaba.datax.common.element.StringColumn;
-import com.alibaba.datax.plugin.reader.otsreader.model.OTSColumn;
-import com.alibaba.datax.plugin.reader.otsreader.model.OTSPrimaryKeyColumn;
-import com.aliyun.openservices.ots.ClientException;
-import com.aliyun.openservices.ots.OTSException;
-import com.aliyun.openservices.ots.model.ColumnValue;
-import com.aliyun.openservices.ots.model.PrimaryKeyValue;
-import com.aliyun.openservices.ots.model.Row;
-import com.aliyun.openservices.ots.model.RowPrimaryKey;
-import com.aliyun.openservices.ots.model.TableMeta;
-
public class Common {
+ public static List toColumnToGet(List columns, TableMeta meta) {
+ Map pk = meta.getPrimaryKeyMap();
+ List names = new ArrayList();
+ for (OTSColumn c : columns) {
+ if (c.getColumnType() == OTSColumn.OTSColumnType.NORMAL && !pk.containsKey(c.getName())) {
+ names.add(c.getName());
+ }
+ }
+ return names;
+ }
+
+ public static List getPrimaryKeyNameList(TableMeta meta) {
+ List names = new ArrayList();
+ names.addAll(meta.getPrimaryKeyMap().keySet());
+ return names;
+ }
+
+ public static OTSPrimaryKeyColumn getPartitionKey(TableMeta meta) {
+ List keys = new ArrayList();
+ keys.addAll(meta.getPrimaryKeyMap().keySet());
+
+ String key = keys.get(0);
+
+ OTSPrimaryKeyColumn col = new OTSPrimaryKeyColumn();
+ col.setName(key);
+ col.setType(meta.getPrimaryKeyMap().get(key));
+ return col;
+ }
+
+ public static Direction getDirection(List begin, List end) throws OTSCriticalException {
+ int cmp = CompareHelper.comparePrimaryKeyColumnList(begin, end);
+ if (cmp < 0) {
+ return Direction.FORWARD;
+ } else if (cmp > 0) {
+ return Direction.BACKWARD;
+ } else {
+ throw new OTSCriticalException("Bug branch, the begin of range equals end of range.");
+ }
+ }
+
+ public static int compareRangeBeginAndEnd(TableMeta meta, List begin, List end) {
+ if (begin.size() != end.size()) {
+ throw new IllegalArgumentException("Input size of begin not equal size of end, begin size : " + begin.size() +
+ ", end size : " + end.size() + ".");
+ }
+
+ Map beginMap = new HashMap<>();
+ Map endMap = new HashMap<>();
+
+ for(PrimaryKeyColumn primaryKeyColumn : begin){
+ beginMap.put(primaryKeyColumn.getName(), primaryKeyColumn.getValue());
+ }
+ for(PrimaryKeyColumn primaryKeyColumn : end){
+ endMap.put(primaryKeyColumn.getName(), primaryKeyColumn.getValue());
+ }
+
+ for (String key : meta.getPrimaryKeyMap().keySet()) {
+ PrimaryKeyValue v1 = beginMap.get(key);
+ PrimaryKeyValue v2 = endMap.get(key);
+ int cmp = primaryKeyValueCmp(v1, v2);
+ if (cmp != 0) {
+ return cmp;
+ }
+ }
+ return 0;
+ }
+
public static int primaryKeyValueCmp(PrimaryKeyValue v1, PrimaryKeyValue v2) {
if (v1.getType() != null && v2.getType() != null) {
@@ -29,14 +88,14 @@ public class Common {
"Not same column type, column1:" + v1.getType() + ", column2:" + v2.getType());
}
switch (v1.getType()) {
- case INTEGER:
- Long l1 = Long.valueOf(v1.asLong());
- Long l2 = Long.valueOf(v2.asLong());
- return l1.compareTo(l2);
- case STRING:
- return v1.asString().compareTo(v2.asString());
- default:
- throw new IllegalArgumentException("Unsuporrt compare the type: " + v1.getType() + ".");
+ case INTEGER:
+ Long l1 = Long.valueOf(v1.asLong());
+ Long l2 = Long.valueOf(v2.asLong());
+ return l1.compareTo(l2);
+ case STRING:
+ return v1.asString().compareTo(v2.asString());
+ default:
+ throw new IllegalArgumentException("Unsuporrt compare the type: " + v1.getType() + ".");
}
} else {
if (v1 == v2) {
@@ -46,116 +105,31 @@ public class Common {
return -1;
} else if (v1 == PrimaryKeyValue.INF_MAX) {
return 1;
- }
+ }
if (v2 == PrimaryKeyValue.INF_MAX) {
return -1;
} else if (v2 == PrimaryKeyValue.INF_MIN) {
return 1;
- }
- }
- }
- return 0;
- }
-
- public static OTSPrimaryKeyColumn getPartitionKey(TableMeta meta) {
- List keys = new ArrayList();
- keys.addAll(meta.getPrimaryKey().keySet());
-
- String key = keys.get(0);
-
- OTSPrimaryKeyColumn col = new OTSPrimaryKeyColumn();
- col.setName(key);
- col.setType(meta.getPrimaryKey().get(key));
- return col;
- }
-
- public static List getPrimaryKeyNameList(TableMeta meta) {
- List names = new ArrayList();
- names.addAll(meta.getPrimaryKey().keySet());
- return names;
- }
-
- public static int compareRangeBeginAndEnd(TableMeta meta, RowPrimaryKey begin, RowPrimaryKey end) {
- if (begin.getPrimaryKey().size() != end.getPrimaryKey().size()) {
- throw new IllegalArgumentException("Input size of begin not equal size of end, begin size : " + begin.getPrimaryKey().size() +
- ", end size : " + end.getPrimaryKey().size() + ".");
- }
- for (String key : meta.getPrimaryKey().keySet()) {
- PrimaryKeyValue v1 = begin.getPrimaryKey().get(key);
- PrimaryKeyValue v2 = end.getPrimaryKey().get(key);
- int cmp = primaryKeyValueCmp(v1, v2);
- if (cmp != 0) {
- return cmp;
- }
- }
- return 0;
- }
-
- public static List getNormalColumnNameList(List columns) {
- List normalColumns = new ArrayList();
- for (OTSColumn col : columns) {
- if (col.getColumnType() == OTSColumn.OTSColumnType.NORMAL) {
- normalColumns.add(col.getName());
- }
- }
- return normalColumns;
- }
-
- public static Record parseRowToLine(Row row, List columns, Record line) {
- Map values = row.getColumns();
- for (OTSColumn col : columns) {
- if (col.getColumnType() == OTSColumn.OTSColumnType.CONST) {
- line.addColumn(col.getValue());
- } else {
- ColumnValue v = values.get(col.getName());
- if (v == null) {
- line.addColumn(new StringColumn(null));
- } else {
- switch(v.getType()) {
- case STRING: line.addColumn(new StringColumn(v.asString())); break;
- case INTEGER: line.addColumn(new LongColumn(v.asLong())); break;
- case DOUBLE: line.addColumn(new DoubleColumn(v.asDouble())); break;
- case BOOLEAN: line.addColumn(new BoolColumn(v.asBoolean())); break;
- case BINARY: line.addColumn(new BytesColumn(v.asBinary())); break;
- default:
- throw new IllegalArgumentException("Unsupported transform the type: " + col.getValue().getType() + ".");
- }
}
}
}
- return line;
+ return 0;
}
-
- public static String getDetailMessage(Exception exception) {
- if (exception instanceof OTSException) {
- OTSException e = (OTSException) exception;
- return "OTSException[ErrorCode:" + e.getErrorCode() + ", ErrorMessage:" + e.getMessage() + ", RequestId:" + e.getRequestId() + "]";
- } else if (exception instanceof ClientException) {
- ClientException e = (ClientException) exception;
- return "ClientException[ErrorCode:" + e.getErrorCode() + ", ErrorMessage:" + e.getMessage() + "]";
- } else if (exception instanceof IllegalArgumentException) {
- IllegalArgumentException e = (IllegalArgumentException) exception;
- return "IllegalArgumentException[ErrorMessage:" + e.getMessage() + "]";
- } else {
- return "Exception[ErrorMessage:" + exception.getMessage() + "]";
- }
- }
-
- public static long getDelaySendMillinSeconds(int hadRetryTimes, int initSleepInMilliSecond) {
- if (hadRetryTimes <= 0) {
- return 0;
- }
-
- int sleepTime = initSleepInMilliSecond;
- for (int i = 1; i < hadRetryTimes; i++) {
- sleepTime += sleepTime;
- if (sleepTime > 30000) {
- sleepTime = 30000;
+ public static void checkTableStoreSDKVersion() throws OTSCriticalException {
+ Field[] fields = ScanTimeseriesDataResponse.class.getFields();
+ String sdkVersion = null;
+ for (Field f : fields){
+ if (f.getName().equals("_VERSION_")){
+ sdkVersion = ScanTimeseriesDataResponse._VERSION_;
break;
- }
+ }
+ }
+ if (sdkVersion == null){
+ throw new OTSCriticalException("Check ots java SDK failed. Please check the version of tableStore maven dependency.");
+ }else if (Integer.parseInt(sdkVersion) < 20230111){
+ throw new OTSCriticalException("Check tableStore java SDK failed. The expected version number is greater than 20230111, actually version : " + sdkVersion + ".");
}
- return sleepTime;
}
}
diff --git a/otsreader/src/main/java/com/alibaba/datax/plugin/reader/otsreader/utils/CommonOld.java b/otsreader/src/main/java/com/alibaba/datax/plugin/reader/otsreader/utils/CommonOld.java
new file mode 100644
index 00000000..d5c565f4
--- /dev/null
+++ b/otsreader/src/main/java/com/alibaba/datax/plugin/reader/otsreader/utils/CommonOld.java
@@ -0,0 +1,112 @@
+package com.alibaba.datax.plugin.reader.otsreader.utils;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Map;
+
+import com.alibaba.datax.common.element.BoolColumn;
+import com.alibaba.datax.common.element.BytesColumn;
+import com.alibaba.datax.common.element.DoubleColumn;
+import com.alibaba.datax.common.element.LongColumn;
+import com.alibaba.datax.common.element.Record;
+import com.alibaba.datax.common.element.StringColumn;
+import com.alibaba.datax.plugin.reader.otsreader.model.OTSColumn;
+import com.alibaba.datax.plugin.reader.otsreader.model.OTSPrimaryKeyColumn;
+import com.aliyun.openservices.ots.ClientException;
+import com.aliyun.openservices.ots.OTSException;
+import com.aliyun.openservices.ots.model.ColumnValue;
+import com.aliyun.openservices.ots.model.PrimaryKeyValue;
+import com.aliyun.openservices.ots.model.Row;
+import com.aliyun.openservices.ots.model.RowPrimaryKey;
+import com.aliyun.openservices.ots.model.TableMeta;
+
+public class CommonOld {
+ public static int primaryKeyValueCmp(PrimaryKeyValue v1, PrimaryKeyValue v2) {
+ if (v1.getType() != null && v2.getType() != null) {
+ if (v1.getType() != v2.getType()) {
+ throw new IllegalArgumentException(
+ "Not same column type, column1:" + v1.getType() + ", column2:" + v2.getType());
+ }
+ switch (v1.getType()) {
+ case INTEGER:
+ Long l1 = Long.valueOf(v1.asLong());
+ Long l2 = Long.valueOf(v2.asLong());
+ return l1.compareTo(l2);
+ case STRING:
+ return v1.asString().compareTo(v2.asString());
+ default:
+ throw new IllegalArgumentException("Unsuporrt compare the type: " + v1.getType() + ".");
+ }
+ } else {
+ if (v1 == v2) {
+ return 0;
+ } else {
+ if (v1 == PrimaryKeyValue.INF_MIN) {
+ return -1;
+ } else if (v1 == PrimaryKeyValue.INF_MAX) {
+ return 1;
+ }
+
+ if (v2 == PrimaryKeyValue.INF_MAX) {
+ return -1;
+ } else if (v2 == PrimaryKeyValue.INF_MIN) {
+ return 1;
+ }
+ }
+ }
+ return 0;
+ }
+
+
+ public static List getNormalColumnNameList(List columns) {
+ List normalColumns = new ArrayList();
+ for (OTSColumn col : columns) {
+ if (col.getColumnType() == OTSColumn.OTSColumnType.NORMAL) {
+ normalColumns.add(col.getName());
+ }
+ }
+ return normalColumns;
+ }
+
+ public static Record parseRowToLine(Row row, List columns, Record line) {
+ Map values = row.getColumns();
+ for (OTSColumn col : columns) {
+ if (col.getColumnType() == OTSColumn.OTSColumnType.CONST) {
+ line.addColumn(col.getValue());
+ } else {
+ ColumnValue v = values.get(col.getName());
+ if (v == null) {
+ line.addColumn(new StringColumn(null));
+ } else {
+ switch(v.getType()) {
+ case STRING: line.addColumn(new StringColumn(v.asString())); break;
+ case INTEGER: line.addColumn(new LongColumn(v.asLong())); break;
+ case DOUBLE: line.addColumn(new DoubleColumn(v.asDouble())); break;
+ case BOOLEAN: line.addColumn(new BoolColumn(v.asBoolean())); break;
+ case BINARY: line.addColumn(new BytesColumn(v.asBinary())); break;
+ default:
+ throw new IllegalArgumentException("Unsuporrt tranform the type: " + col.getValue().getType() + ".");
+ }
+ }
+ }
+ }
+ return line;
+ }
+
+ public static long getDelaySendMillinSeconds(int hadRetryTimes, int initSleepInMilliSecond) {
+
+ if (hadRetryTimes <= 0) {
+ return 0;
+ }
+
+ int sleepTime = initSleepInMilliSecond;
+ for (int i = 1; i < hadRetryTimes; i++) {
+ sleepTime += sleepTime;
+ if (sleepTime > 30000) {
+ sleepTime = 30000;
+ break;
+ }
+ }
+ return sleepTime;
+ }
+}
diff --git a/otsreader/src/main/java/com/alibaba/datax/plugin/reader/otsreader/utils/CompareHelper.java b/otsreader/src/main/java/com/alibaba/datax/plugin/reader/otsreader/utils/CompareHelper.java
new file mode 100644
index 00000000..19e06421
--- /dev/null
+++ b/otsreader/src/main/java/com/alibaba/datax/plugin/reader/otsreader/utils/CompareHelper.java
@@ -0,0 +1,37 @@
+package com.alibaba.datax.plugin.reader.otsreader.utils;
+
+import com.alicloud.openservices.tablestore.model.PrimaryKeyColumn;
+
+import java.util.List;
+
+
+public class CompareHelper {
+ /**
+ * 比较PrimaryKeyColumn List的大小
+ * 返回
+ * -1 表示before小于after
+ * 0 表示before等于after
+ * 1 表示before大于after
+ *
+ * @param before
+ * @param after
+ * @return
+ */
+ public static int comparePrimaryKeyColumnList(List before, List after) {
+ int size = before.size() < after.size() ? before.size() : after.size();
+
+ for (int i = 0; i < size; i++) {
+ int cmp = before.get(i).compareTo(after.get(i));
+ if (cmp != 0) {
+ return cmp;
+ }
+ }
+
+ if (before.size() < after.size() ) {
+ return -1;
+ } else if (before.size() > after.size() ) {
+ return 1;
+ }
+ return 0;
+ }
+}
diff --git a/otsreader/src/main/java/com/alibaba/datax/plugin/reader/otsreader/utils/Constant.java b/otsreader/src/main/java/com/alibaba/datax/plugin/reader/otsreader/utils/Constant.java
new file mode 100644
index 00000000..90273bfb
--- /dev/null
+++ b/otsreader/src/main/java/com/alibaba/datax/plugin/reader/otsreader/utils/Constant.java
@@ -0,0 +1,92 @@
+package com.alibaba.datax.plugin.reader.otsreader.utils;
+
+public class Constant {
+ /**
+ * Json中的Key名字定义
+ */
+public class ConfigKey {
+ public static final String CONF = "conf";
+ public static final String RANGE = "range";
+ public static final String META = "meta";
+ public static final String SPLIT_INFO = "splitInfo";
+
+ public static final String TIME_RANGE = "timeRange";
+ public static final String MAX_VERSION = "maxVersion";
+
+ public static final String RETRY = "maxRetryTime";
+ public static final String RETRY_PAUSE_IN_MILLISECOND = "retryPauseInMillisecond";
+ public static final String IO_THREAD_COUNT = "ioThreadCount";
+ public static final String MAX_CONNECTION_COUNT = "maxConnectionCount";
+ public static final String SOCKET_TIMEOUTIN_MILLISECOND = "socketTimeoutInMillisecond";
+ public static final String CONNECT_TIMEOUT_IN_MILLISECOND = "connectTimeoutInMillisecond";
+
+ public class Range {
+ public static final String BEGIN = "begin";
+ public static final String END = "end";
+ public static final String SPLIT = "split";
+ };
+
+ public class PrimaryKeyColumn {
+ public static final String TYPE = "type";
+ public static final String VALUE = "value";
+ };
+
+ public class TimeseriesPKColumn {
+ public static final String MEASUREMENT_NAME = "_m_name";
+ public static final String DATA_SOURCE = "_data_source";
+ public static final String TAGS = "_tags";
+ public static final String TIME = "_time";
+ }
+
+ public class Column {
+ public static final String NAME = "name";
+ public static final String TYPE = "type";
+ public static final String VALUE = "value";
+ public static final String IS_TAG = "is_timeseries_tag";
+ };
+
+ public class TimeRange {
+ public static final String BEGIN = "begin";
+ public static final String END = "end";
+ }
+ };
+
+ /**
+ * 定义的配置文件中value type中可取的值
+ */
+ public class ValueType {
+ public static final String INF_MIN = "INF_MIN";
+ public static final String INF_MAX = "INF_MAX";
+ public static final String STRING = "string";
+ public static final String INTEGER = "int";
+ public static final String BINARY = "binary";
+ public static final String DOUBLE = "double";
+ public static final String BOOLEAN = "bool";
+ };
+
+ /**
+ * 全局默认常量定义
+ */
+ public class ConfigDefaultValue {
+ public static final int RETRY = 18;
+ public static final int RETRY_PAUSE_IN_MILLISECOND = 100;
+ public static final int IO_THREAD_COUNT = 1;
+ public static final int MAX_CONNECTION_COUNT = 1;
+ public static final int SOCKET_TIMEOUT_IN_MILLISECOND = 10000;
+ public static final int CONNECT_TIMEOUT_IN_MILLISECOND = 10000;
+
+ public static final int MAX_VERSION = Integer.MAX_VALUE;
+
+ public static final String DEFAULT_NAME = "DEFAULT_NAME";
+
+ public class Mode {
+ public static final String NORMAL = "normal";
+ public static final String MULTI_VERSION = "multiVersion";
+ }
+
+ public class TimeRange {
+ public static final long MIN = 0;
+ public static final long MAX = Long.MAX_VALUE;
+ }
+ }
+}
diff --git a/otsreader/src/main/java/com/alibaba/datax/plugin/reader/otsreader/utils/GsonParser.java b/otsreader/src/main/java/com/alibaba/datax/plugin/reader/otsreader/utils/GsonParser.java
index a82f3350..205f536d 100644
--- a/otsreader/src/main/java/com/alibaba/datax/plugin/reader/otsreader/utils/GsonParser.java
+++ b/otsreader/src/main/java/com/alibaba/datax/plugin/reader/otsreader/utils/GsonParser.java
@@ -1,23 +1,26 @@
package com.alibaba.datax.plugin.reader.otsreader.utils;
-import com.alibaba.datax.plugin.reader.otsreader.adaptor.OTSColumnAdaptor;
+import com.alibaba.datax.common.element.Column;
+import com.alibaba.datax.plugin.reader.otsreader.adaptor.ColumnAdaptor;
import com.alibaba.datax.plugin.reader.otsreader.adaptor.PrimaryKeyValueAdaptor;
-import com.alibaba.datax.plugin.reader.otsreader.model.OTSColumn;
import com.alibaba.datax.plugin.reader.otsreader.model.OTSConf;
import com.alibaba.datax.plugin.reader.otsreader.model.OTSRange;
+import com.alicloud.openservices.tablestore.model.PrimaryKeyValue;
+import com.alicloud.openservices.tablestore.model.TableMeta;
+import com.alicloud.openservices.tablestore.model.timeseries.TimeseriesScanSplitInfo;
import com.aliyun.openservices.ots.model.Direction;
-import com.aliyun.openservices.ots.model.PrimaryKeyValue;
import com.aliyun.openservices.ots.model.RowPrimaryKey;
-import com.aliyun.openservices.ots.model.TableMeta;
import com.google.gson.Gson;
import com.google.gson.GsonBuilder;
+import java.util.Map;
+
public class GsonParser {
private static Gson gsonBuilder() {
return new GsonBuilder()
- .registerTypeAdapter(OTSColumn.class, new OTSColumnAdaptor())
.registerTypeAdapter(PrimaryKeyValue.class, new PrimaryKeyValueAdaptor())
+ .registerTypeAdapter(Column.class, new ColumnAdaptor())
.create();
}
@@ -40,24 +43,39 @@ public class GsonParser {
Gson g = gsonBuilder();
return g.fromJson(jsonStr, OTSConf.class);
}
-
- public static String directionToJson (Direction direction) {
+
+ public static String metaToJson (TableMeta meta) {
Gson g = gsonBuilder();
- return g.toJson(direction);
+ return g.toJson(meta);
+ }
+
+ public static TableMeta jsonToMeta (String jsonStr) {
+ Gson g = gsonBuilder();
+ return g.fromJson(jsonStr, TableMeta.class);
+ }
+
+ public static String timeseriesScanSplitInfoToString(TimeseriesScanSplitInfo timeseriesScanSplitInfo){
+ Gson g = gsonBuilder();
+ return g.toJson(timeseriesScanSplitInfo);
+ }
+
+ public static TimeseriesScanSplitInfo stringToTimeseriesScanSplitInfo(String jsonStr){
+ Gson g = gsonBuilder();
+ return g.fromJson(jsonStr, TimeseriesScanSplitInfo.class);
}
public static Direction jsonToDirection (String jsonStr) {
Gson g = gsonBuilder();
return g.fromJson(jsonStr, Direction.class);
}
-
- public static String metaToJson (TableMeta meta) {
- Gson g = gsonBuilder();
- return g.toJson(meta);
- }
-
+
public static String rowPrimaryKeyToJson (RowPrimaryKey row) {
Gson g = gsonBuilder();
return g.toJson(row);
}
+
+ public static String mapToJson (Map map) {
+ Gson g = gsonBuilder();
+ return g.toJson(map);
+ }
}
diff --git a/otsreader/src/main/java/com/alibaba/datax/plugin/reader/otsreader/Key.java b/otsreader/src/main/java/com/alibaba/datax/plugin/reader/otsreader/utils/Key.java
similarity index 81%
rename from otsreader/src/main/java/com/alibaba/datax/plugin/reader/otsreader/Key.java
rename to otsreader/src/main/java/com/alibaba/datax/plugin/reader/otsreader/utils/Key.java
index da6d4a5f..6628e4d3 100644
--- a/otsreader/src/main/java/com/alibaba/datax/plugin/reader/otsreader/Key.java
+++ b/otsreader/src/main/java/com/alibaba/datax/plugin/reader/otsreader/utils/Key.java
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-package com.alibaba.datax.plugin.reader.otsreader;
+package com.alibaba.datax.plugin.reader.otsreader.utils;
public final class Key {
/* ots account configuration */
@@ -46,5 +46,13 @@ public final class Key {
public final static String RANGE_END = "end";
public final static String RANGE_SPLIT = "split";
+
+ public final static String META_MODE = "metaMode";
+
+ public final static String MODE = "mode";
+ public final static String NEW_VERSION = "newVersion";
+
+ public final static String IS_TIMESERIES_TABLE = "isTimeseriesTable";
+ public final static String MEASUREMENT_NAME = "measurementName";
}
diff --git a/otsreader/src/main/java/com/alibaba/datax/plugin/reader/otsreader/utils/OtsHelper.java b/otsreader/src/main/java/com/alibaba/datax/plugin/reader/otsreader/utils/OtsHelper.java
new file mode 100644
index 00000000..060507b6
--- /dev/null
+++ b/otsreader/src/main/java/com/alibaba/datax/plugin/reader/otsreader/utils/OtsHelper.java
@@ -0,0 +1,82 @@
+package com.alibaba.datax.plugin.reader.otsreader.utils;
+
+import com.alibaba.datax.plugin.reader.otsreader.callable.GetRangeCallable;
+import com.alibaba.datax.plugin.reader.otsreader.callable.GetTableMetaCallable;
+import com.alibaba.datax.plugin.reader.otsreader.callable.GetTimeseriesSplitCallable;
+import com.alibaba.datax.plugin.reader.otsreader.callable.ScanTimeseriesDataCallable;
+import com.alibaba.datax.plugin.reader.otsreader.model.DefaultNoRetry;
+import com.alibaba.datax.plugin.reader.otsreader.model.OTSConf;
+import com.alicloud.openservices.tablestore.ClientConfiguration;
+import com.alicloud.openservices.tablestore.SyncClient;
+import com.alicloud.openservices.tablestore.SyncClientInterface;
+import com.alicloud.openservices.tablestore.core.utils.Pair;
+import com.alicloud.openservices.tablestore.model.ColumnType;
+import com.alicloud.openservices.tablestore.model.GetRangeResponse;
+import com.alicloud.openservices.tablestore.model.RangeRowQueryCriteria;
+import com.alicloud.openservices.tablestore.model.TableMeta;
+import com.alicloud.openservices.tablestore.model.timeseries.ScanTimeseriesDataRequest;
+import com.alicloud.openservices.tablestore.model.timeseries.ScanTimeseriesDataResponse;
+import com.alicloud.openservices.tablestore.model.timeseries.TimeseriesScanSplitInfo;
+
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+public class OtsHelper {
+
+ public static SyncClientInterface getOTSInstance(OTSConf conf) {
+ ClientConfiguration clientConfigure = new ClientConfiguration();
+ clientConfigure.setIoThreadCount(conf.getIoThreadCount());
+ clientConfigure.setMaxConnections(conf.getMaxConnectCount());
+ clientConfigure.setSocketTimeoutInMillisecond(conf.getSocketTimeoutInMillisecond());
+ clientConfigure.setConnectionTimeoutInMillisecond(conf.getConnectTimeoutInMillisecond());
+ clientConfigure.setRetryStrategy(new DefaultNoRetry());
+
+ SyncClient ots = new SyncClient(
+ conf.getEndpoint(),
+ conf.getAccessId(),
+ conf.getAccessKey(),
+ conf.getInstanceName(),
+ clientConfigure);
+
+
+ Map extraHeaders = new HashMap();
+ extraHeaders.put("x-ots-sdk-type", "public");
+ extraHeaders.put("x-ots-request-source", "datax-otsreader");
+ ots.setExtraHeaders(extraHeaders);
+
+ return ots;
+ }
+
+ public static TableMeta getTableMeta(SyncClientInterface ots, String tableName, int retry, int sleepInMillisecond) throws Exception {
+ return RetryHelper.executeWithRetry(
+ new GetTableMetaCallable(ots, tableName),
+ retry,
+ sleepInMillisecond
+ );
+ }
+
+ public static GetRangeResponse getRange(SyncClientInterface ots, RangeRowQueryCriteria rangeRowQueryCriteria, int retry, int sleepInMillisecond) throws Exception {
+ return RetryHelper.executeWithRetry(
+ new GetRangeCallable(ots, rangeRowQueryCriteria),
+ retry,
+ sleepInMillisecond
+ );
+ }
+
+ public static List splitTimeseriesScan(SyncClientInterface ots, String tableName, String measurementName, int splitCountHint, int retry, int sleepInMillisecond) throws Exception {
+ return RetryHelper.executeWithRetry(
+ new GetTimeseriesSplitCallable(ots, tableName, measurementName, splitCountHint),
+ retry,
+ sleepInMillisecond
+ );
+ }
+
+ public static ScanTimeseriesDataResponse scanTimeseriesData(SyncClientInterface ots, ScanTimeseriesDataRequest scanTimeseriesDataRequest, int retry, int sleepInMillisecond) throws Exception {
+ return RetryHelper.executeWithRetry(
+ new ScanTimeseriesDataCallable(ots, scanTimeseriesDataRequest),
+ retry,
+ sleepInMillisecond
+ );
+ }
+}
diff --git a/otsreader/src/main/java/com/alibaba/datax/plugin/reader/otsreader/OtsReaderError.java b/otsreader/src/main/java/com/alibaba/datax/plugin/reader/otsreader/utils/OtsReaderError.java
similarity index 76%
rename from otsreader/src/main/java/com/alibaba/datax/plugin/reader/otsreader/OtsReaderError.java
rename to otsreader/src/main/java/com/alibaba/datax/plugin/reader/otsreader/utils/OtsReaderError.java
index 05a13c1a..b578dcde 100644
--- a/otsreader/src/main/java/com/alibaba/datax/plugin/reader/otsreader/OtsReaderError.java
+++ b/otsreader/src/main/java/com/alibaba/datax/plugin/reader/otsreader/utils/OtsReaderError.java
@@ -1,4 +1,4 @@
-package com.alibaba.datax.plugin.reader.otsreader;
+package com.alibaba.datax.plugin.reader.otsreader.utils;
import com.alibaba.datax.common.spi.ErrorCode;
@@ -14,10 +14,10 @@ public class OtsReaderError implements ErrorCode {
public final static OtsReaderError ERROR = new OtsReaderError(
"OtsReaderError",
- "该错误表示插件的内部错误,表示系统没有处理到的异常");
+ "This error represents an internal error of the otsreader plugin, which indicates that the system is not processed.");
public final static OtsReaderError INVALID_PARAM = new OtsReaderError(
"OtsReaderInvalidParameter",
- "该错误表示参数错误,表示用户输入了错误的参数格式等");
+ "This error represents a parameter error, indicating that the user entered the wrong parameter format.");
public OtsReaderError (String code) {
this.code = code;
diff --git a/otsreader/src/main/java/com/alibaba/datax/plugin/reader/otsreader/utils/ParamChecker.java b/otsreader/src/main/java/com/alibaba/datax/plugin/reader/otsreader/utils/ParamChecker.java
index fbcdc972..b2139fc1 100644
--- a/otsreader/src/main/java/com/alibaba/datax/plugin/reader/otsreader/utils/ParamChecker.java
+++ b/otsreader/src/main/java/com/alibaba/datax/plugin/reader/otsreader/utils/ParamChecker.java
@@ -1,162 +1,40 @@
package com.alibaba.datax.plugin.reader.otsreader.utils;
-import java.util.List;
-import java.util.Map;
-import java.util.Map.Entry;
-
+import com.alibaba.datax.common.element.Column;
import com.alibaba.datax.common.util.Configuration;
-import com.alibaba.datax.plugin.reader.otsreader.model.OTSPrimaryKeyColumn;
-import com.alibaba.datax.plugin.reader.otsreader.model.OTSRange;
-import com.aliyun.openservices.ots.model.Direction;
-import com.aliyun.openservices.ots.model.PrimaryKeyType;
-import com.aliyun.openservices.ots.model.PrimaryKeyValue;
-import com.aliyun.openservices.ots.model.RowPrimaryKey;
-import com.aliyun.openservices.ots.model.TableMeta;
+import com.alibaba.datax.plugin.reader.otsreader.model.*;
+import com.alicloud.openservices.tablestore.model.*;
+
+import java.util.*;
public class ParamChecker {
- private static void throwNotExistException(String key) {
- throw new IllegalArgumentException("The param '" + key + "' is not exist.");
+ private static void throwNotExistException() {
+ throw new IllegalArgumentException("missing the key.");
}
- private static void throwStringLengthZeroException(String key) {
- throw new IllegalArgumentException("The param length of '" + key + "' is zero.");
+ private static void throwStringLengthZeroException() {
+ throw new IllegalArgumentException("input the key is empty string.");
}
- private static void throwEmptyException(String key) {
- throw new IllegalArgumentException("The param '" + key + "' is empty.");
- }
-
- private static void throwNotListException(String key) {
- throw new IllegalArgumentException("The param '" + key + "' is not a json array.");
- }
-
- private static void throwNotMapException(String key) {
- throw new IllegalArgumentException("The param '" + key + "' is not a json map.");
- }
-
- public static String checkStringAndGet(Configuration param, String key) {
- String value = param.getString(key);
- if (null == value) {
- throwNotExistException(key);
- } else if (value.length() == 0) {
- throwStringLengthZeroException(key);
- }
- return value;
- }
-
- public static List checkListAndGet(Configuration param, String key, boolean isCheckEmpty) {
- List value = null;
+ public static String checkStringAndGet(Configuration param, String key, boolean isTrim) throws OTSCriticalException {
try {
- value = param.getList(key);
- } catch (ClassCastException e) {
- throwNotListException(key);
- }
- if (null == value) {
- throwNotExistException(key);
- } else if (isCheckEmpty && value.isEmpty()) {
- throwEmptyException(key);
- }
- return value;
- }
-
- public static List checkListAndGet(Map range, String key) {
- Object obj = range.get(key);
- if (null == obj) {
- return null;
- }
- return checkListAndGet(range, key, false);
- }
-
- public static List checkListAndGet(Map range, String key, boolean isCheckEmpty) {
- Object obj = range.get(key);
- if (null == obj) {
- throwNotExistException(key);
- }
- if (obj instanceof List) {
- @SuppressWarnings("unchecked")
- List value = (List)obj;
- if (isCheckEmpty && value.isEmpty()) {
- throwEmptyException(key);
+ String value = param.getString(key);
+ if (isTrim) {
+ value = value != null ? value.trim() : null;
+ }
+ if (null == value) {
+ throwNotExistException();
+ } else if (value.length() == 0) {
+ throwStringLengthZeroException();
}
return value;
- } else {
- throw new IllegalArgumentException("Can not parse list of '" + key + "' from map.");
+ } catch(RuntimeException e) {
+ throw new OTSCriticalException("Parse '"+ key +"' fail, " + e.getMessage(), e);
}
}
- public static List checkListAndGet(Map range, String key, List defaultList) {
- Object obj = range.get(key);
- if (null == obj) {
- return defaultList;
- }
- if (obj instanceof List) {
- @SuppressWarnings("unchecked")
- List value = (List)obj;
- return value;
- } else {
- throw new IllegalArgumentException("Can not parse list of '" + key + "' from map.");
- }
- }
-
- public static Map checkMapAndGet(Configuration param, String key, boolean isCheckEmpty) {
- Map value = null;
- try {
- value = param.getMap(key);
- } catch (ClassCastException e) {
- throwNotMapException(key);
- }
- if (null == value) {
- throwNotExistException(key);
- } else if (isCheckEmpty && value.isEmpty()) {
- throwEmptyException(key);
- }
- return value;
- }
-
- public static RowPrimaryKey checkInputPrimaryKeyAndGet(TableMeta meta, List range) {
- if (meta.getPrimaryKey().size() != range.size()) {
- throw new IllegalArgumentException(String.format(
- "Input size of values not equal size of primary key. input size:%d, primary key size:%d .",
- range.size(), meta.getPrimaryKey().size()));
- }
- RowPrimaryKey pk = new RowPrimaryKey();
- int i = 0;
- for (Entry e: meta.getPrimaryKey().entrySet()) {
- PrimaryKeyValue value = range.get(i);
- if (e.getValue() != value.getType() && value != PrimaryKeyValue.INF_MIN && value != PrimaryKeyValue.INF_MAX) {
- throw new IllegalArgumentException(
- "Input range type not match primary key. Input type:" + value.getType() + ", Primary Key Type:"+ e.getValue() +", Index:" + i
- );
- } else {
- pk.addPrimaryKeyColumn(e.getKey(), value);
- }
- i++;
- }
- return pk;
- }
-
- public static OTSRange checkRangeAndGet(TableMeta meta, List begin, List end) {
- OTSRange range = new OTSRange();
- if (begin.size() == 0 && end.size() == 0) {
- RowPrimaryKey beginRow = new RowPrimaryKey();
- RowPrimaryKey endRow = new RowPrimaryKey();
- for (String name : meta.getPrimaryKey().keySet()) {
- beginRow.addPrimaryKeyColumn(name, PrimaryKeyValue.INF_MIN);
- endRow.addPrimaryKeyColumn(name, PrimaryKeyValue.INF_MAX);
- }
- range.setBegin(beginRow);
- range.setEnd(endRow);
- } else {
- RowPrimaryKey beginRow = checkInputPrimaryKeyAndGet(meta, begin);
- RowPrimaryKey endRow = checkInputPrimaryKeyAndGet(meta, end);
- range.setBegin(beginRow);
- range.setEnd(endRow);
- }
- return range;
- }
-
- public static Direction checkDirectionAndEnd(TableMeta meta, RowPrimaryKey begin, RowPrimaryKey end) {
+ public static Direction checkDirectionAndEnd(TableMeta meta, List begin, List end) {
Direction direction = null;
int cmp = Common.compareRangeBeginAndEnd(meta, begin, end) ;
@@ -170,76 +48,420 @@ public class ParamChecker {
return direction;
}
- /**
- * 检查类型是否一致,是否重复,方向是否一致
- * @param direction
- * @param before
- * @param after
- */
- private static void checkDirection(Direction direction, PrimaryKeyValue before, PrimaryKeyValue after) {
- int cmp = Common.primaryKeyValueCmp(before, after);
- if (cmp > 0) { // 反向
- if (direction == Direction.FORWARD) {
- throw new IllegalArgumentException("Input direction of 'range-split' is FORWARD, but direction of 'range' is BACKWARD.");
+ public static List checkInputPrimaryKeyAndGet(TableMeta meta, List range) {
+ if (meta.getPrimaryKeyMap().size() != range.size()) {
+ throw new IllegalArgumentException(String.format(
+ "Input size of values not equal size of primary key. input size:%d, primary key size:%d .",
+ range.size(), meta.getPrimaryKeyMap().size()));
+ }
+ List pk = new ArrayList<>();
+ int i = 0;
+ for (Map.Entry e: meta.getPrimaryKeyMap().entrySet()) {
+ PrimaryKeyValue value = range.get(i);
+ if (e.getValue() != value.getType() && value != PrimaryKeyValue.INF_MIN && value != PrimaryKeyValue.INF_MAX) {
+ throw new IllegalArgumentException(
+ "Input range type not match primary key. Input type:" + value.getType() + ", Primary Key Type:"+ e.getValue() +", Index:" + i
+ );
+ } else {
+ pk.add(new PrimaryKeyColumn(e.getKey(), value));
}
- } else if (cmp < 0) { // 正向
- if (direction == Direction.BACKWARD) {
- throw new IllegalArgumentException("Input direction of 'range-split' is BACKWARD, but direction of 'range' is FORWARD.");
+ i++;
+ }
+ return pk;
+ }
+
+ public static OTSRange checkRangeAndGet(Configuration param) throws OTSCriticalException {
+ try {
+ OTSRange range = new OTSRange();
+ Map value = param.getMap(Key.RANGE);
+ // 用户可以不用配置range,默认表示导出全表
+ if (value == null) {
+ return range;
}
- } else { // 重复列
- throw new IllegalArgumentException("Multi same column in 'range-split'.");
+
+ /**
+ * Range格式:{
+ * "begin":[],
+ * "end":[]
+ * }
+ */
+
+ // begin
+ // 如果不存在,表示从表开始位置读取
+ Object arrayObj = value.get(Constant.ConfigKey.Range.BEGIN);
+ if (arrayObj != null) {
+ range.setBegin(ParamParser.parsePrimaryKeyColumnArray(arrayObj));
+ }
+
+ // end
+ // 如果不存在,表示读取到表的结束位置
+ arrayObj = value.get(Constant.ConfigKey.Range.END);
+ if (arrayObj != null) {
+ range.setEnd(ParamParser.parsePrimaryKeyColumnArray(arrayObj));
+ }
+
+ // split
+ // 如果不存在,表示不做切分
+ arrayObj = value.get(Constant.ConfigKey.Range.SPLIT);
+ if (arrayObj != null) {
+ range.setSplit(ParamParser.parsePrimaryKeyColumnArray(arrayObj));
+ }
+
+ return range;
+ } catch (RuntimeException e) {
+ throw new OTSCriticalException("Parse 'range' fail, " + e.getMessage(), e);
+ }
+
+ }
+
+ public static TimeRange checkTimeRangeAndGet(Configuration param) throws OTSCriticalException {
+ try {
+
+ long begin = Constant.ConfigDefaultValue.TimeRange.MIN;
+ long end = Constant.ConfigDefaultValue.TimeRange.MAX;
+
+ Map value = param.getMap(Constant.ConfigKey.TIME_RANGE);
+ // 用户可以不用配置time range,默认表示导出全表
+ if (value == null) {
+ return new TimeRange(begin, end);
+ }
+
+ /**
+ * TimeRange格式:{
+ * "begin":,
+ * "end":
+ * }
+ */
+
+ // begin
+ // 如果不存在,表示从表开始位置读取
+ Object obj = value.get(Constant.ConfigKey.TimeRange.BEGIN);
+ if (obj != null) {
+ begin = ParamParser.parseTimeRangeItem(obj, Constant.ConfigKey.TimeRange.BEGIN);
+ }
+
+ // end
+ // 如果不存在,表示读取到表的结束位置
+ obj = value.get(Constant.ConfigKey.TimeRange.END);
+ if (obj != null) {
+ end = ParamParser.parseTimeRangeItem(obj, Constant.ConfigKey.TimeRange.END);
+ }
+
+ TimeRange range = new TimeRange(begin, end);
+ return range;
+ } catch (RuntimeException e) {
+ throw new OTSCriticalException("Parse 'timeRange' fail, " + e.getMessage(), e);
}
}
- /**
- * 检查 points中的所有点是否是在Begin和end之间
- * @param begin
- * @param end
- * @param points
- */
- private static void checkPointsRange(Direction direction, PrimaryKeyValue begin, PrimaryKeyValue end, List points) {
- if (direction == Direction.FORWARD) {
- if (!(Common.primaryKeyValueCmp(begin, points.get(0)) < 0 && Common.primaryKeyValueCmp(end, points.get(points.size() - 1)) > 0)) {
- throw new IllegalArgumentException("The item of 'range-split' is not within scope of 'range-begin' and 'range-end'.");
+ private static void checkColumnByMode(List columns , OTSMode mode) {
+ if (mode == OTSMode.MULTI_VERSION) {
+ for (OTSColumn c : columns) {
+ if (c.getColumnType() != OTSColumn.OTSColumnType.NORMAL) {
+ throw new IllegalArgumentException("in mode:'multiVersion', the 'column' only support specify column_name not const column.");
+ }
}
} else {
- if (!(Common.primaryKeyValueCmp(begin, points.get(0)) > 0 && Common.primaryKeyValueCmp(end, points.get(points.size() - 1)) < 0)) {
- throw new IllegalArgumentException("The item of 'range-split' is not within scope of 'range-begin' and 'range-end'.");
+ if (columns.isEmpty()) {
+ throw new IllegalArgumentException("in mode:'normal', the 'column' must specify at least one column_name or const column.");
+ }
+ }
+ }
+
+ public static List checkOTSColumnAndGet(Configuration param, OTSMode mode) throws OTSCriticalException {
+ try {
+ List value = param.getList(Key.COLUMN);
+ // 用户可以不用配置Column
+ if (value == null) {
+ value = Collections.emptyList();
+ }
+
+ /**
+ * Column格式:[
+ * {"Name":"pk1"},
+ * {"type":"Binary","value" : "base64()"}
+ * ]
+ */
+ List columns = ParamParser.parseOTSColumnArray(value);
+ checkColumnByMode(columns, mode);
+ return columns;
+ } catch (RuntimeException e) {
+ throw new OTSCriticalException("Parse 'column' fail, " + e.getMessage(), e);
+ }
+ }
+
+ public static List checkTimeseriesColumnAndGet(Configuration param) throws OTSCriticalException {
+ try {
+ List value = param.getList(Key.COLUMN);
+ List columns = ParamParser.parseOTSColumnArray(value);
+
+ List columnTypes = checkColumnTypeAndGet(param);
+ List isTags = checkColumnIsTagAndGet(param);
+
+ for (int i = 0; i < columns.size(); i++) {
+ columns.get(i).setValueType(columnTypes.get(i));
+ columns.get(i).setTimeseriesTag(isTags.get(i));
+ }
+
+ checkColumnByMode(columns, OTSMode.NORMAL);
+ return columns;
+ } catch (RuntimeException e) {
+ throw new OTSCriticalException("Parse 'column' fail, " + e.getMessage(), e);
+ }
+ }
+
+ public static List checkColumnTypeAndGet(Configuration param) throws OTSCriticalException {
+ try {
+ List value = param.getList(Key.COLUMN);
+ List columnTypes = ParamParser.parseColumnTypeArray(value);
+ return columnTypes;
+ } catch (RuntimeException e) {
+ throw new OTSCriticalException("Parse 'type of column' fail, " + e.getMessage(), e);
+ }
+ }
+
+ public static List checkColumnIsTagAndGet(Configuration param) throws OTSCriticalException {
+ try {
+ List value = param.getList(Key.COLUMN);
+ List columnIsTag = ParamParser.parseColumnIsTagArray(value);
+ return columnIsTag;
+ } catch (RuntimeException e) {
+ throw new OTSCriticalException("Parse 'isTag of column' fail, " + e.getMessage(), e);
+ }
+ }
+
+ public static OTSMode checkModeAndGet(Configuration param) throws OTSCriticalException {
+ try {
+ String modeValue = param.getString(Key.MODE, "normal");
+ if (modeValue.equalsIgnoreCase(Constant.ConfigDefaultValue.Mode.NORMAL)) {
+ return OTSMode.NORMAL;
+ } else if (modeValue.equalsIgnoreCase(Constant.ConfigDefaultValue.Mode.MULTI_VERSION)) {
+ return OTSMode.MULTI_VERSION;
+ } else {
+ throw new IllegalArgumentException("the 'mode' only support 'normal' and 'multiVersion' not '"+ modeValue +"'.");
+ }
+ } catch(RuntimeException e) {
+ throw new OTSCriticalException("Parse 'mode' fail, " + e.getMessage(), e);
+ }
+ }
+
+ public static void checkTimeseriesMode(OTSMode mode, Boolean isNewVersion) throws OTSCriticalException {
+ if (mode == OTSMode.MULTI_VERSION){
+ throw new OTSCriticalException("Timeseries table do not support mode : multiVersion." );
+ } else if (!isNewVersion){
+ throw new OTSCriticalException("Timeseries table is only supported in newVersion, please set \"newVersion\": \"true\"." );
+ }
+ }
+
+ public static List checkAndGetPrimaryKey(
+ List pk,
+ List pkSchema,
+ String jsonKey){
+ List result = new ArrayList();
+ if(pk != null) {
+ if (pk.size() > pkSchema.size()) {
+ throw new IllegalArgumentException("The '"+ jsonKey +"', input primary key column size more than table meta, input size: "+ pk.size()
+ +", meta pk size:" + pkSchema.size());
+ } else {
+ //类型检查
+ for (int i = 0; i < pk.size(); i++) {
+ PrimaryKeyValue pkc = pk.get(i).getValue();
+ PrimaryKeySchema pkcs = pkSchema.get(i);
+
+ if (!pkc.isInfMin() && !pkc.isInfMax() ) {
+ if (pkc.getType() != pkcs.getType()) {
+ throw new IllegalArgumentException(
+ "The '"+ jsonKey +"', input primary key column type mismath table meta, input type:"+ pkc.getType()
+ +", meta pk type:"+ pkcs.getType()
+ +", index:" + i);
+ }
+ }
+ result.add(new PrimaryKeyColumn(pkcs.getName(), pkc));
+ }
+ }
+ return result;
+ } else {
+ return new ArrayList();
+ }
+ }
+
+ /**
+ * 检查split的类型是否和PartitionKey一致
+ * @param points
+ * @param pkSchema
+ */
+ private static List checkAndGetSplit(
+ List points,
+ List pkSchema){
+ List result = new ArrayList();
+ if (points == null) {
+ return result;
+ }
+
+ // check 类型是否和PartitionKey一致即可
+ PrimaryKeySchema partitionKeySchema = pkSchema.get(0);
+ for (int i = 0 ; i < points.size(); i++) {
+ PrimaryKeyColumn p = points.get(i);
+ if (!p.getValue().isInfMin() && !p.getValue().isInfMax()) {
+ if (p.getValue().getType() != partitionKeySchema.getType()) {
+ throw new IllegalArgumentException("The 'split', input primary key column type is mismatch partition key, input type: "+ p.getValue().getType().toString()
+ +", partition key type:" + partitionKeySchema.getType().toString()
+ +", index:" + i);
+ }
+ }
+ result.add(new PrimaryKeyColumn(partitionKeySchema.getName(), p.getValue()));
+ }
+
+ return result;
+ }
+
+ public static void fillPrimaryKey(List pkSchema, List pk, PrimaryKeyValue fillValue) {
+ for(int i = pk.size(); i < pkSchema.size(); i++) {
+ pk.add(new PrimaryKeyColumn(pkSchema.get(i).getName(), fillValue));
+ }
+ }
+
+ private static void fillBeginAndEnd(
+ List begin,
+ List end,
+ List pkSchema) {
+ if (begin.isEmpty()) {
+ fillPrimaryKey(pkSchema, begin, PrimaryKeyValue.INF_MIN);
+ }
+ if (end.isEmpty()) {
+ fillPrimaryKey(pkSchema, end, PrimaryKeyValue.INF_MAX);
+ }
+ int cmp = CompareHelper.comparePrimaryKeyColumnList(begin, end);
+ if (cmp == 0) {
+ // begin.size()和end.size()理论上必然相等,但是考虑到语义的清晰性,显示的给出begin.size() == end.size()
+ if (begin.size() == end.size() && begin.size() < pkSchema.size()) {
+ fillPrimaryKey(pkSchema, begin, PrimaryKeyValue.INF_MIN);
+ fillPrimaryKey(pkSchema, end, PrimaryKeyValue.INF_MAX);
+ } else {
+ throw new IllegalArgumentException("The 'begin' can not be equal with 'end'.");
+ }
+ } else if (cmp < 0) { // 升序
+ fillPrimaryKey(pkSchema, begin, PrimaryKeyValue.INF_MIN);
+ fillPrimaryKey(pkSchema, end, PrimaryKeyValue.INF_MAX);
+ } else { // 降序
+ fillPrimaryKey(pkSchema, begin, PrimaryKeyValue.INF_MAX);
+ fillPrimaryKey(pkSchema, end, PrimaryKeyValue.INF_MIN);
+ }
+ }
+
+ private static void checkBeginAndEndAndSplit(
+ List begin,
+ List end,
+ List split) {
+ int cmp = CompareHelper.comparePrimaryKeyColumnList(begin, end);
+
+ if (!split.isEmpty()) {
+ if (cmp < 0) { // 升序
+ // 检查是否是升序
+ for (int i = 0 ; i < split.size() - 1; i++) {
+ PrimaryKeyColumn before = split.get(i);
+ PrimaryKeyColumn after = split.get(i + 1);
+ if (before.compareTo(after) >=0) { // 升序
+ throw new IllegalArgumentException("In 'split', the item value is not increasing, index: " + i);
+ }
+ }
+ if (begin.get(0).compareTo(split.get(0)) >= 0) {
+ throw new IllegalArgumentException("The 'begin' must be less than head of 'split'.");
+ }
+ if (split.get(split.size() - 1).compareTo(end.get(0)) >= 0) {
+ throw new IllegalArgumentException("tail of 'split' must be less than 'end'.");
+ }
+ } else if (cmp > 0) {// 降序
+ // 检查是否是降序
+ for (int i = 0 ; i < split.size() - 1; i++) {
+ PrimaryKeyColumn before = split.get(i);
+ PrimaryKeyColumn after = split.get(i + 1);
+ if (before.compareTo(after) <= 0) { // 升序
+ throw new IllegalArgumentException("In 'split', the item value is not descending, index: " + i);
+ }
+ }
+ if (begin.get(0).compareTo(split.get(0)) <= 0) {
+ throw new IllegalArgumentException("The 'begin' must be large than head of 'split'.");
+ }
+ if (split.get(split.size() - 1).compareTo(end.get(0)) <= 0) {
+ throw new IllegalArgumentException("tail of 'split' must be large than 'end'.");
+ }
+ } else {
+ throw new IllegalArgumentException("The 'begin' can not equal with 'end'.");
}
}
}
/**
- * 1.检测用户的输入类型是否和PartitionKey一致
- * 2.顺序是否和Range一致
- * 3.是否有重复列
- * 4.检查points的范围是否在range内
- * @param meta
- * @param points
+ * 填充不完整的PK
+ * 检查Begin、End、Split 3者之间的关系是否符合预期
+ * @param begin
+ * @param end
+ * @param split
*/
- public static void checkInputSplitPoints(TableMeta meta, OTSRange range, Direction direction, List points) {
- if (null == points || points.isEmpty()) {
- return;
- }
+ private static void fillAndcheckBeginAndEndAndSplit(
+ List begin,
+ List end,
+ List split,
+ List pkSchema
+ ) {
- OTSPrimaryKeyColumn part = Common.getPartitionKey(meta);
-
- // 处理第一个
- PrimaryKeyValue item = points.get(0);
- if ( item.getType() != part.getType()) {
- throw new IllegalArgumentException("Input type of 'range-split' not match partition key. "
- + "Item of 'range-split' type:" + item.getType()+ ", Partition type:" + part.getType());
- }
-
- for (int i = 0 ; i < points.size() - 1; i++) {
- PrimaryKeyValue before = points.get(i);
- PrimaryKeyValue after = points.get(i + 1);
- checkDirection(direction, before, after);
- }
-
- PrimaryKeyValue begin = range.getBegin().getPrimaryKey().get(part.getName());
- PrimaryKeyValue end = range.getEnd().getPrimaryKey().get(part.getName());
-
- checkPointsRange(direction, begin, end, points);
+ fillBeginAndEnd(begin, end, pkSchema);
+ checkBeginAndEndAndSplit(begin, end, split);
}
+
+ public static void checkAndSetOTSRange(OTSRange range, TableMeta meta) throws OTSCriticalException {
+ try {
+ List pkSchema = meta.getPrimaryKeyList();
+
+ // 检查是begin和end否和PK类型一致
+ range.setBegin(checkAndGetPrimaryKey(range.getBegin(), pkSchema, Constant.ConfigKey.Range.BEGIN));
+ range.setEnd(checkAndGetPrimaryKey(range.getEnd(), pkSchema, Constant.ConfigKey.Range.END));
+ range.setSplit(checkAndGetSplit(range.getSplit(), pkSchema));
+
+ // 1.填充Begin和End
+ // 2.检查begin,end,split顺序是否正确
+ fillAndcheckBeginAndEndAndSplit(range.getBegin(), range.getEnd(), range.getSplit(), pkSchema);
+ } catch(RuntimeException e) {
+ throw new OTSCriticalException("Parse 'range' fail, " + e.getMessage(), e);
+ }
+ }
+
+ public static void checkAndSetColumn(List columns, TableMeta meta, OTSMode mode) throws OTSCriticalException {
+ try {
+ if (mode == OTSMode.MULTI_VERSION) {
+ Set uniqueColumn = new HashSet();
+ Map pk = meta.getPrimaryKeyMap();
+ for (OTSColumn c : columns) {
+ // 是否包括PK列
+ if (pk.get(c.getName()) != null) {
+ throw new IllegalArgumentException("in mode:'multiVersion', the 'column' can not include primary key column, input:"+ c.getName() +".");
+ }
+ // 是否有重复列
+ if (uniqueColumn.contains(c.getName())) {
+ throw new IllegalArgumentException("in mode:'multiVersion', the 'column' can not include same column, input:"+ c.getName() +".");
+ } else {
+ uniqueColumn.add(c.getName());
+ }
+ }
+ }
+
+ } catch(RuntimeException e) {
+ throw new OTSCriticalException("Parse 'column' fail, " + e.getMessage(), e);
+ }
+ }
+
+ public static void normalCheck(OTSConf conf) {
+ // 旧版本不支持multiVersion模式
+ if(!conf.isNewVersion() && conf.getMode() == OTSMode.MULTI_VERSION){
+ throw new IllegalArgumentException("in mode:'multiVersion' :The old version do not support multiVersion mode. Please add config in otsreader: \"newVersion\":\"true\" .");
+ }
+ }
+
+ public static void checkAndSetOTSConf(OTSConf conf, TableMeta meta) throws OTSCriticalException {
+ normalCheck(conf);
+ checkAndSetOTSRange(conf.getRange(), meta);
+ checkAndSetColumn(conf.getColumn(), meta, conf.getMode());
+ }
+
}
diff --git a/otsreader/src/main/java/com/alibaba/datax/plugin/reader/otsreader/utils/ParamCheckerOld.java b/otsreader/src/main/java/com/alibaba/datax/plugin/reader/otsreader/utils/ParamCheckerOld.java
new file mode 100644
index 00000000..3489ab35
--- /dev/null
+++ b/otsreader/src/main/java/com/alibaba/datax/plugin/reader/otsreader/utils/ParamCheckerOld.java
@@ -0,0 +1,36 @@
+package com.alibaba.datax.plugin.reader.otsreader.utils;
+
+import com.alibaba.datax.common.util.Configuration;
+
+import java.util.List;
+
+public class ParamCheckerOld {
+
+ private static void throwNotExistException(String key) {
+ throw new IllegalArgumentException("The param '" + key + "' is not exist.");
+ }
+
+ private static void throwEmptyException(String key) {
+ throw new IllegalArgumentException("The param '" + key + "' is empty.");
+ }
+
+ private static void throwNotListException(String key) {
+ throw new IllegalArgumentException("The param '" + key + "' is not a json array.");
+ }
+
+ public static List checkListAndGet(Configuration param, String key, boolean isCheckEmpty) {
+ List value = null;
+ try {
+ value = param.getList(key);
+ } catch (ClassCastException e) {
+ throwNotListException(key);
+ }
+ if (null == value) {
+ throwNotExistException(key);
+ } else if (isCheckEmpty && value.isEmpty()) {
+ throwEmptyException(key);
+ }
+ return value;
+ }
+
+}
diff --git a/otsreader/src/main/java/com/alibaba/datax/plugin/reader/otsreader/utils/ParamParser.java b/otsreader/src/main/java/com/alibaba/datax/plugin/reader/otsreader/utils/ParamParser.java
new file mode 100644
index 00000000..862b915c
--- /dev/null
+++ b/otsreader/src/main/java/com/alibaba/datax/plugin/reader/otsreader/utils/ParamParser.java
@@ -0,0 +1,255 @@
+package com.alibaba.datax.plugin.reader.otsreader.utils;
+
+import com.alibaba.datax.common.element.Column;
+import com.alibaba.datax.plugin.reader.otsreader.model.OTSColumn;
+import com.alibaba.datax.plugin.reader.otsreader.model.OTSCriticalException;
+import com.alicloud.openservices.tablestore.model.ColumnType;
+import com.alicloud.openservices.tablestore.model.PrimaryKeyColumn;
+import com.alicloud.openservices.tablestore.model.PrimaryKeyValue;
+import org.apache.commons.codec.binary.Base64;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Map;
+
+public class ParamParser {
+
+ // ------------------------------------------------------------------------
+ // Range解析相关的逻辑
+ // ------------------------------------------------------------------------
+
+ private static PrimaryKeyValue parsePrimaryKeyValue(String type) {
+ return parsePrimaryKeyValue(type, null);
+ }
+
+ private static PrimaryKeyValue parsePrimaryKeyValue(String type, String value) {
+ if (type.equalsIgnoreCase(Constant.ValueType.INF_MIN)) {
+ return PrimaryKeyValue.INF_MIN;
+ } else if (type.equalsIgnoreCase(Constant.ValueType.INF_MAX)) {
+ return PrimaryKeyValue.INF_MAX;
+ } else {
+ if (value != null) {
+ if (type.equalsIgnoreCase(Constant.ValueType.STRING)) {
+ return PrimaryKeyValue.fromString(value);
+ } else if (type.equalsIgnoreCase(Constant.ValueType.INTEGER)) {
+ return PrimaryKeyValue.fromLong(Long.valueOf(value));
+ } else if (type.equalsIgnoreCase(Constant.ValueType.BINARY)) {
+ return PrimaryKeyValue.fromBinary(Base64.decodeBase64(value));
+ } else {
+ throw new IllegalArgumentException("the column type only support :['INF_MIN', 'INF_MAX', 'string', 'int', 'binary']");
+ }
+ } else {
+ throw new IllegalArgumentException("the column is missing the field 'value', input 'type':" + type);
+ }
+ }
+ }
+
+ private static PrimaryKeyColumn parsePrimaryKeyColumn(Map item) {
+ Object typeObj = item.get(Constant.ConfigKey.PrimaryKeyColumn.TYPE);
+ Object valueObj = item.get(Constant.ConfigKey.PrimaryKeyColumn.VALUE);
+
+ if (typeObj != null && valueObj != null) {
+ if (typeObj instanceof String && valueObj instanceof String) {
+ return new PrimaryKeyColumn(
+ Constant.ConfigDefaultValue.DEFAULT_NAME,
+ parsePrimaryKeyValue((String)typeObj, (String)valueObj)
+ );
+ } else {
+ throw new IllegalArgumentException(
+ "the column's 'type' and 'value' must be string value, "
+ + "but type of 'type' is :" + typeObj.getClass() +
+ ", type of 'value' is :" + valueObj.getClass()
+ );
+ }
+ } else if (typeObj != null) {
+ if (typeObj instanceof String) {
+ return new PrimaryKeyColumn(
+ Constant.ConfigDefaultValue.DEFAULT_NAME,
+ parsePrimaryKeyValue((String)typeObj)
+ );
+ } else {
+ throw new IllegalArgumentException(
+ "the column's 'type' must be string value, "
+ + "but type of 'type' is :" + typeObj.getClass()
+ );
+ }
+ } else {
+ throw new IllegalArgumentException("the column must include 'type' and 'value'.");
+ }
+ }
+
+ @SuppressWarnings("unchecked")
+ public static List parsePrimaryKeyColumnArray(Object arrayObj) throws OTSCriticalException {
+ try {
+ List columns = new ArrayList