diff --git a/NOTICE b/NOTICE new file mode 100644 index 00000000..eb7cbe4b --- /dev/null +++ b/NOTICE @@ -0,0 +1,39 @@ +======================================================== +DataX 是阿里云 DataWorks数据集成 的开源版本,在阿里巴巴集团内被广泛使用的离线数据同步工具/平台。DataX 实现了包括 MySQL、Oracle、OceanBase、SqlServer、Postgre、HDFS、Hive、ADS、HBase、TableStore(OTS)、MaxCompute(ODPS)、Hologres、DRDS 等各种异构数据源之间高效的数据同步功能。 + +DataX is an open source offline data synchronization tool / platform widely used in Alibaba group and other companies. DataX implements efficient data synchronization between heterogeneous data sources including mysql, Oracle, oceanbase, sqlserver, postgre, HDFS, hive, ads, HBase, tablestore (OTS), maxcompute (ODPs), hologres, DRDS, etc. + +Copyright 1999-2022 Alibaba Group Holding Ltd. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + +=================================================================== +文级别引用,按许可证 +This product contains various third-party components under other open source licenses. +This section summarizes those components and their licenses. +GNU Lesser General Public License +-------------------------------------- +opentsdbreader/src/main/java/com/alibaba/datax/plugin/reader/conn/CliQuery.java +opentsdbreader/src/main/java/com/alibaba/datax/plugin/reader/conn/Connection4TSDB.java +opentsdbreader/src/main/java/com/alibaba/datax/plugin/reader/conn/DataPoint4TSDB.java +opentsdbreader/src/main/java/com/alibaba/datax/plugin/reader/conn/DumpSeries.java +opentsdbreader/src/main/java/com/alibaba/datax/plugin/reader/conn/OpenTSDBConnection.java +opentsdbreader/src/main/java/com/alibaba/datax/plugin/reader/conn/OpenTSDBDump.java +opentsdbreader/src/main/java/com/alibaba/datax/plugin/reader/opentsdbreader/Constant.java +opentsdbreader/src/main/java/com/alibaba/datax/plugin/reader/opentsdbreader/Key.java +opentsdbreader/src/main/java/com/alibaba/datax/plugin/reader/opentsdbreader/OpenTSDBReader.java +opentsdbreader/src/main/java/com/alibaba/datax/plugin/reader/opentsdbreader/OpenTSDBReaderErrorCode.java +opentsdbreader/src/main/java/com/alibaba/datax/plugin/reader/util/HttpUtils.java +opentsdbreader/src/main/java/com/alibaba/datax/plugin/reader/util/TSDBUtils.java +opentsdbreader/src/main/java/com/alibaba/datax/plugin/reader/util/TimeUtils.java +=================================================================== diff --git a/README.md b/README.md index 681de048..01bbc3ea 100644 --- a/README.md +++ b/README.md @@ -1,12 +1,13 @@ ![Datax-logo](https://github.com/alibaba/DataX/blob/master/images/DataX-logo.jpg) - # DataX -DataX 是阿里云 [DataWorks数据集成](https://www.aliyun.com/product/bigdata/ide) 的开源版本,在阿里巴巴集团内被广泛使用的离线数据同步工具/平台。DataX 实现了包括 MySQL、Oracle、SqlServer、Postgre、HDFS、Hive、ADS、HBase、TableStore(OTS)、MaxCompute(ODPS)、Hologres、DRDS 等各种异构数据源之间高效的数据同步功能。 +[![Leaderboard](https://img.shields.io/badge/DataX-%E6%9F%A5%E7%9C%8B%E8%B4%A1%E7%8C%AE%E6%8E%92%E8%A1%8C%E6%A6%9C-orange)](https://opensource.alibaba.com/contribution_leaderboard/details?projectValue=datax) + +DataX 是阿里云 [DataWorks数据集成](https://www.aliyun.com/product/bigdata/ide) 的开源版本,在阿里巴巴集团内被广泛使用的离线数据同步工具/平台。DataX 实现了包括 MySQL、Oracle、OceanBase、SqlServer、Postgre、HDFS、Hive、ADS、HBase、TableStore(OTS)、MaxCompute(ODPS)、Hologres、DRDS, databend 等各种异构数据源之间高效的数据同步功能。 # DataX 商业版本 -阿里云DataWorks数据集成是DataX团队在阿里云上的商业化产品,致力于提供复杂网络环境下、丰富的异构数据源之间高速稳定的数据移动能力,以及繁杂业务背景下的数据同步解决方案。目前已经支持云上近3000家客户,单日同步数据超过3万亿条。DataWorks数据集成目前支持离线50+种数据源,可以进行整库迁移、批量上云、增量同步、分库分表等各类同步解决方案。2020年更新实时同步能力,2020年更新实时同步能力,支持10+种数据源的读写任意组合。提供MySQL,Oracle等多种数据源到阿里云MaxCompute,Hologres等大数据引擎的一键全增量同步解决方案。 +阿里云DataWorks数据集成是DataX团队在阿里云上的商业化产品,致力于提供复杂网络环境下、丰富的异构数据源之间高速稳定的数据移动能力,以及繁杂业务背景下的数据同步解决方案。目前已经支持云上近3000家客户,单日同步数据超过3万亿条。DataWorks数据集成目前支持离线50+种数据源,可以进行整库迁移、批量上云、增量同步、分库分表等各类同步解决方案。2020年更新实时同步能力,支持10+种数据源的读写任意组合。提供MySQL,Oracle等多种数据源到阿里云MaxCompute,Hologres等大数据引擎的一键全增量同步解决方案。 商业版本参见: https://www.aliyun.com/product/bigdata/ide @@ -25,7 +26,8 @@ DataX本身作为数据同步框架,将不同数据源的同步抽象为从源 # Quick Start -##### Download [DataX下载地址](http://datax-opensource.oss-cn-hangzhou.aliyuncs.com/datax.tar.gz) +##### Download [DataX下载地址](https://datax-opensource.oss-cn-hangzhou.aliyuncs.com/202303/datax.tar.gz) + ##### 请点击:[Quick Start](https://github.com/alibaba/DataX/blob/master/userGuid.md) @@ -35,32 +37,47 @@ DataX本身作为数据同步框架,将不同数据源的同步抽象为从源 DataX目前已经有了比较全面的插件体系,主流的RDBMS数据库、NOSQL、大数据计算系统都已经接入,目前支持数据如下图,详情请点击:[DataX数据源参考指南](https://github.com/alibaba/DataX/wiki/DataX-all-data-channels) -| 类型 | 数据源 | Reader(读) | Writer(写) |文档| -| ------------ | ---------- | :-------: | :-------: |:-------: | -| RDBMS 关系型数据库 | MySQL | √ | √ |[读](https://github.com/alibaba/DataX/blob/master/mysqlreader/doc/mysqlreader.md) 、[写](https://github.com/alibaba/DataX/blob/master/mysqlwriter/doc/mysqlwriter.md)| -|             | Oracle     |     √     |     √     |[读](https://github.com/alibaba/DataX/blob/master/oraclereader/doc/oraclereader.md) 、[写](https://github.com/alibaba/DataX/blob/master/oraclewriter/doc/oraclewriter.md)| -| | SQLServer | √ | √ |[读](https://github.com/alibaba/DataX/blob/master/sqlserverreader/doc/sqlserverreader.md) 、[写](https://github.com/alibaba/DataX/blob/master/sqlserverwriter/doc/sqlserverwriter.md)| -| | PostgreSQL | √ | √ |[读](https://github.com/alibaba/DataX/blob/master/postgresqlreader/doc/postgresqlreader.md) 、[写](https://github.com/alibaba/DataX/blob/master/postgresqlwriter/doc/postgresqlwriter.md)| -| | DRDS | √ | √ |[读](https://github.com/alibaba/DataX/blob/master/drdsreader/doc/drdsreader.md) 、[写](https://github.com/alibaba/DataX/blob/master/drdswriter/doc/drdswriter.md)| -| | 通用RDBMS(支持所有关系型数据库) | √ | √ |[读](https://github.com/alibaba/DataX/blob/master/rdbmsreader/doc/rdbmsreader.md) 、[写](https://github.com/alibaba/DataX/blob/master/rdbmswriter/doc/rdbmswriter.md)| -| 阿里云数仓数据存储 | ODPS | √ | √ |[读](https://github.com/alibaba/DataX/blob/master/odpsreader/doc/odpsreader.md) 、[写](https://github.com/alibaba/DataX/blob/master/odpswriter/doc/odpswriter.md)| -| | ADS | | √ |[写](https://github.com/alibaba/DataX/blob/master/adswriter/doc/adswriter.md)| -| | OSS | √ | √ |[读](https://github.com/alibaba/DataX/blob/master/ossreader/doc/ossreader.md) 、[写](https://github.com/alibaba/DataX/blob/master/osswriter/doc/osswriter.md)| -| | OCS | √ | √ |[读](https://github.com/alibaba/DataX/blob/master/ocsreader/doc/ocsreader.md) 、[写](https://github.com/alibaba/DataX/blob/master/ocswriter/doc/ocswriter.md)| -| NoSQL数据存储 | OTS | √ | √ |[读](https://github.com/alibaba/DataX/blob/master/otsreader/doc/otsreader.md) 、[写](https://github.com/alibaba/DataX/blob/master/otswriter/doc/otswriter.md)| -| | Hbase0.94 | √ | √ |[读](https://github.com/alibaba/DataX/blob/master/hbase094xreader/doc/hbase094xreader.md) 、[写](https://github.com/alibaba/DataX/blob/master/hbase094xwriter/doc/hbase094xwriter.md)| -| | Hbase1.1 | √ | √ |[读](https://github.com/alibaba/DataX/blob/master/hbase11xreader/doc/hbase11xreader.md) 、[写](https://github.com/alibaba/DataX/blob/master/hbase11xwriter/doc/hbase11xwriter.md)| -| | Phoenix4.x | √ | √ |[读](https://github.com/alibaba/DataX/blob/master/hbase11xsqlreader/doc/hbase11xsqlreader.md) 、[写](https://github.com/alibaba/DataX/blob/master/hbase11xsqlwriter/doc/hbase11xsqlwriter.md)| -| | Phoenix5.x | √ | √ |[读](https://github.com/alibaba/DataX/blob/master/hbase20xsqlreader/doc/hbase20xsqlreader.md) 、[写](https://github.com/alibaba/DataX/blob/master/hbase20xsqlwriter/doc/hbase20xsqlwriter.md)| -| | MongoDB | √ | √ |[读](https://github.com/alibaba/DataX/blob/master/mongodbreader/doc/mongodbreader.md) 、[写](https://github.com/alibaba/DataX/blob/master/mongodbwriter/doc/mongodbwriter.md)| -| | Hive | √ | √ |[读](https://github.com/alibaba/DataX/blob/master/hdfsreader/doc/hdfsreader.md) 、[写](https://github.com/alibaba/DataX/blob/master/hdfswriter/doc/hdfswriter.md)| -| | Cassandra | √ | √ |[读](https://github.com/alibaba/DataX/blob/master/cassandrareader/doc/cassandrareader.md) 、[写](https://github.com/alibaba/DataX/blob/master/cassandrawriter/doc/cassandrawriter.md)| -| 无结构化数据存储 | TxtFile | √ | √ |[读](https://github.com/alibaba/DataX/blob/master/txtfilereader/doc/txtfilereader.md) 、[写](https://github.com/alibaba/DataX/blob/master/txtfilewriter/doc/txtfilewriter.md)| -| | FTP | √ | √ |[读](https://github.com/alibaba/DataX/blob/master/ftpreader/doc/ftpreader.md) 、[写](https://github.com/alibaba/DataX/blob/master/ftpwriter/doc/ftpwriter.md)| -| | HDFS | √ | √ |[读](https://github.com/alibaba/DataX/blob/master/hdfsreader/doc/hdfsreader.md) 、[写](https://github.com/alibaba/DataX/blob/master/hdfswriter/doc/hdfswriter.md)| -| | Elasticsearch | | √ |[写](https://github.com/alibaba/DataX/blob/master/elasticsearchwriter/doc/elasticsearchwriter.md)| -| 时间序列数据库 | OpenTSDB | √ | |[读](https://github.com/alibaba/DataX/blob/master/opentsdbreader/doc/opentsdbreader.md)| -| | TSDB | √ | √ |[读](https://github.com/alibaba/DataX/blob/master/tsdbreader/doc/tsdbreader.md) 、[写](https://github.com/alibaba/DataX/blob/master/tsdbwriter/doc/tsdbhttpwriter.md)| +| 类型 | 数据源 | Reader(读) | Writer(写) | 文档 | +|--------------|---------------------------|:---------:|:---------:|:----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------:| +| RDBMS 关系型数据库 | MySQL | √ | √ | [读](https://github.com/alibaba/DataX/blob/master/mysqlreader/doc/mysqlreader.md) 、[写](https://github.com/alibaba/DataX/blob/master/mysqlwriter/doc/mysqlwriter.md) | +| | Oracle | √ | √ | [读](https://github.com/alibaba/DataX/blob/master/oraclereader/doc/oraclereader.md) 、[写](https://github.com/alibaba/DataX/blob/master/oraclewriter/doc/oraclewriter.md) | +| | OceanBase | √ | √ | [读](https://open.oceanbase.com/docs/community/oceanbase-database/V3.1.0/use-datax-to-full-migration-data-to-oceanbase) 、[写](https://open.oceanbase.com/docs/community/oceanbase-database/V3.1.0/use-datax-to-full-migration-data-to-oceanbase) | +| | SQLServer | √ | √ | [读](https://github.com/alibaba/DataX/blob/master/sqlserverreader/doc/sqlserverreader.md) 、[写](https://github.com/alibaba/DataX/blob/master/sqlserverwriter/doc/sqlserverwriter.md) | +| | PostgreSQL | √ | √ | [读](https://github.com/alibaba/DataX/blob/master/postgresqlreader/doc/postgresqlreader.md) 、[写](https://github.com/alibaba/DataX/blob/master/postgresqlwriter/doc/postgresqlwriter.md) | +| | DRDS | √ | √ | [读](https://github.com/alibaba/DataX/blob/master/drdsreader/doc/drdsreader.md) 、[写](https://github.com/alibaba/DataX/blob/master/drdswriter/doc/drdswriter.md) | +| | Kingbase | √ | √ | [读](https://github.com/alibaba/DataX/blob/master/drdsreader/doc/drdsreader.md) 、[写](https://github.com/alibaba/DataX/blob/master/drdswriter/doc/drdswriter.md) | +| | 通用RDBMS(支持所有关系型数据库) | √ | √ | [读](https://github.com/alibaba/DataX/blob/master/rdbmsreader/doc/rdbmsreader.md) 、[写](https://github.com/alibaba/DataX/blob/master/rdbmswriter/doc/rdbmswriter.md) | +| 阿里云数仓数据存储 | ODPS | √ | √ | [读](https://github.com/alibaba/DataX/blob/master/odpsreader/doc/odpsreader.md) 、[写](https://github.com/alibaba/DataX/blob/master/odpswriter/doc/odpswriter.md) | +| | ADB | | √ | [写](https://github.com/alibaba/DataX/blob/master/adbmysqlwriter/doc/adbmysqlwriter.md) | +| | ADS | | √ | [写](https://github.com/alibaba/DataX/blob/master/adswriter/doc/adswriter.md) | +| | OSS | √ | √ | [读](https://github.com/alibaba/DataX/blob/master/ossreader/doc/ossreader.md) 、[写](https://github.com/alibaba/DataX/blob/master/osswriter/doc/osswriter.md) | +| | OCS | | √ | [写](https://github.com/alibaba/DataX/blob/master/ocswriter/doc/ocswriter.md) | +| | Hologres | | √ | [写](https://github.com/alibaba/DataX/blob/master/hologresjdbcwriter/doc/hologresjdbcwriter.md) | +| | AnalyticDB For PostgreSQL | | √ | 写 | +| 阿里云中间件 | datahub | √ | √ | 读 、写 | +| | SLS | √ | √ | 读 、写 | +| 阿里云图数据库 | GDB | √ | √ | [读](https://github.com/alibaba/DataX/blob/master/gdbreader/doc/gdbreader.md) 、[写](https://github.com/alibaba/DataX/blob/master/gdbwriter/doc/gdbwriter.md) | +| NoSQL数据存储 | OTS | √ | √ | [读](https://github.com/alibaba/DataX/blob/master/otsreader/doc/otsreader.md) 、[写](https://github.com/alibaba/DataX/blob/master/otswriter/doc/otswriter.md) | +| | Hbase0.94 | √ | √ | [读](https://github.com/alibaba/DataX/blob/master/hbase094xreader/doc/hbase094xreader.md) 、[写](https://github.com/alibaba/DataX/blob/master/hbase094xwriter/doc/hbase094xwriter.md) | +| | Hbase1.1 | √ | √ | [读](https://github.com/alibaba/DataX/blob/master/hbase11xreader/doc/hbase11xreader.md) 、[写](https://github.com/alibaba/DataX/blob/master/hbase11xwriter/doc/hbase11xwriter.md) | +| | Phoenix4.x | √ | √ | [读](https://github.com/alibaba/DataX/blob/master/hbase11xsqlreader/doc/hbase11xsqlreader.md) 、[写](https://github.com/alibaba/DataX/blob/master/hbase11xsqlwriter/doc/hbase11xsqlwriter.md) | +| | Phoenix5.x | √ | √ | [读](https://github.com/alibaba/DataX/blob/master/hbase20xsqlreader/doc/hbase20xsqlreader.md) 、[写](https://github.com/alibaba/DataX/blob/master/hbase20xsqlwriter/doc/hbase20xsqlwriter.md) | +| | MongoDB | √ | √ | [读](https://github.com/alibaba/DataX/blob/master/mongodbreader/doc/mongodbreader.md) 、[写](https://github.com/alibaba/DataX/blob/master/mongodbwriter/doc/mongodbwriter.md) | +| | Cassandra | √ | √ | [读](https://github.com/alibaba/DataX/blob/master/cassandrareader/doc/cassandrareader.md) 、[写](https://github.com/alibaba/DataX/blob/master/cassandrawriter/doc/cassandrawriter.md) | +| 数仓数据存储 | StarRocks | √ | √ | 读 、[写](https://github.com/alibaba/DataX/blob/master/starrockswriter/doc/starrockswriter.md) | +| | ApacheDoris | | √ | [写](https://github.com/alibaba/DataX/blob/master/doriswriter/doc/doriswriter.md) | +| | ClickHouse | | √ | 写 | +| | Databend | | √ | [写](https://github.com/alibaba/DataX/blob/master/databendwriter/doc/databendwriter.md) | +| | Hive | √ | √ | [读](https://github.com/alibaba/DataX/blob/master/hdfsreader/doc/hdfsreader.md) 、[写](https://github.com/alibaba/DataX/blob/master/hdfswriter/doc/hdfswriter.md) | +| | kudu | | √ | [写](https://github.com/alibaba/DataX/blob/master/hdfswriter/doc/hdfswriter.md) | +| | selectdb | | √ | [写](https://github.com/alibaba/DataX/blob/master/selectdbwriter/doc/selectdbwriter.md) | +| 无结构化数据存储 | TxtFile | √ | √ | [读](https://github.com/alibaba/DataX/blob/master/txtfilereader/doc/txtfilereader.md) 、[写](https://github.com/alibaba/DataX/blob/master/txtfilewriter/doc/txtfilewriter.md) | +| | FTP | √ | √ | [读](https://github.com/alibaba/DataX/blob/master/ftpreader/doc/ftpreader.md) 、[写](https://github.com/alibaba/DataX/blob/master/ftpwriter/doc/ftpwriter.md) | +| | HDFS | √ | √ | [读](https://github.com/alibaba/DataX/blob/master/hdfsreader/doc/hdfsreader.md) 、[写](https://github.com/alibaba/DataX/blob/master/hdfswriter/doc/hdfswriter.md) | +| | Elasticsearch | | √ | [写](https://github.com/alibaba/DataX/blob/master/elasticsearchwriter/doc/elasticsearchwriter.md) | +| 时间序列数据库 | OpenTSDB | √ | | [读](https://github.com/alibaba/DataX/blob/master/opentsdbreader/doc/opentsdbreader.md) | +| | TSDB | √ | √ | [读](https://github.com/alibaba/DataX/blob/master/tsdbreader/doc/tsdbreader.md) 、[写](https://github.com/alibaba/DataX/blob/master/tsdbwriter/doc/tsdbhttpwriter.md) | +| | TDengine | √ | √ | [读](https://github.com/alibaba/DataX/blob/master/tdenginereader/doc/tdenginereader-CN.md) 、[写](https://github.com/alibaba/DataX/blob/master/tdenginewriter/doc/tdenginewriter-CN.md) | # 阿里云DataWorks数据集成 @@ -88,6 +105,25 @@ DataX目前已经有了比较全面的插件体系,主流的RDBMS数据库、N 请点击:[DataX插件开发宝典](https://github.com/alibaba/DataX/blob/master/dataxPluginDev.md) +# 重要版本更新说明 + +DataX 后续计划月度迭代更新,也欢迎感兴趣的同学提交 Pull requests,月度更新内容会介绍介绍如下。 + +- [datax_v202303](https://github.com/alibaba/DataX/releases/tag/datax_v202303) + - 精简代码 + - 新增插件(adbmysqlwriter、databendwriter、selectdbwriter) + - 优化插件、修复问题(sqlserver、hdfs、cassandra、kudu、oss) + - fastjson 升级到 fastjson2 + +- [datax_v202210](https://github.com/alibaba/DataX/releases/tag/datax_v202210) + - 涉及通道能力更新(OceanBase、Tdengine、Doris等) + +- [datax_v202209](https://github.com/alibaba/DataX/releases/tag/datax_v202209) + - 涉及通道能力更新(MaxCompute、Datahub、SLS等)、安全漏洞更新、通用打包更新等 + +- [datax_v202205](https://github.com/alibaba/DataX/releases/tag/datax_v202205) + - 涉及通道能力更新(MaxCompute、Hologres、OSS、Tdengine等)、安全漏洞更新、通用打包更新等 + # 项目成员 @@ -135,23 +171,10 @@ This software is free to use under the Apache License [Apache license](https://g 8. 对高并发、高稳定可用性、高性能、大数据处理有过实际项目及产品经验者优先考虑; 9. 有大数据产品、云产品、中间件技术解决方案者优先考虑。 ```` -钉钉用户群: -- DataX开源用户交流群 - - +用户咨询支持: -- DataX开源用户交流群2 - - +钉钉群目前暂时受到了一些管控策略影响,建议大家有问题优先在这里提交问题 Issue,DataX研发和社区会定期回答Issue中的问题,知识库丰富后也能帮助到后来的使用者。 -- DataX开源用户交流群3 - - -- DataX开源用户交流群4 - - - -- DataX开源用户交流群5 - - - -- DataX开源用户交流群6 - - diff --git a/adbmysqlwriter/doc/adbmysqlwriter.md b/adbmysqlwriter/doc/adbmysqlwriter.md new file mode 100644 index 00000000..27ac6b10 --- /dev/null +++ b/adbmysqlwriter/doc/adbmysqlwriter.md @@ -0,0 +1,338 @@ +# DataX AdbMysqlWriter + + +--- + + +## 1 快速介绍 + +AdbMysqlWriter 插件实现了写入数据到 ADB MySQL 目的表的功能。在底层实现上, AdbMysqlWriter 通过 JDBC 连接远程 ADB MySQL 数据库,并执行相应的 `insert into ...` 或者 ( `replace into ...` ) 的 SQL 语句将数据写入 ADB MySQL,内部会分批次提交入库。 + +AdbMysqlWriter 面向ETL开发工程师,他们使用 AdbMysqlWriter 从数仓导入数据到 ADB MySQL。同时 AdbMysqlWriter 亦可以作为数据迁移工具为DBA等用户提供服务。 + + +## 2 实现原理 + +AdbMysqlWriter 通过 DataX 框架获取 Reader 生成的协议数据,AdbMysqlWriter 通过 JDBC 连接远程 ADB MySQL 数据库,并执行相应的 `insert into ...` 或者 ( `replace into ...` ) 的 SQL 语句将数据写入 ADB MySQL。 + + +* `insert into...`(遇到主键重复时会自动忽略当前写入数据,不做更新,作用等同于`insert ignore into`) + +##### 或者 + +* `replace into...`(没有遇到主键/唯一性索引冲突时,与 insert into 行为一致,冲突时会用新行替换原有行所有字段) 的语句写入数据到 MySQL。出于性能考虑,采用了 `PreparedStatement + Batch`,并且设置了:`rewriteBatchedStatements=true`,将数据缓冲到线程上下文 Buffer 中,当 Buffer 累计到预定阈值时,才发起写入请求。 + +
+ + 注意:整个任务至少需要具备 `insert/replace into...` 的权限,是否需要其他权限,取决于你任务配置中在 preSql 和 postSql 中指定的语句。 + + +## 3 功能说明 + +### 3.1 配置样例 + +* 这里使用一份从内存产生到 ADB MySQL 导入的数据。 + +```json +{ + "job": { + "setting": { + "speed": { + "channel": 1 + } + }, + "content": [ + { + "reader": { + "name": "streamreader", + "parameter": { + "column" : [ + { + "value": "DataX", + "type": "string" + }, + { + "value": 19880808, + "type": "long" + }, + { + "value": "1988-08-08 08:08:08", + "type": "date" + }, + { + "value": true, + "type": "bool" + }, + { + "value": "test", + "type": "bytes" + } + ], + "sliceRecordCount": 1000 + } + }, + "writer": { + "name": "adbmysqlwriter", + "parameter": { + "writeMode": "replace", + "username": "root", + "password": "root", + "column": [ + "*" + ], + "preSql": [ + "truncate table @table" + ], + "connection": [ + { + "jdbcUrl": "jdbc:mysql://ip:port/database?useUnicode=true", + "table": [ + "test" + ] + } + ] + } + } + } + ] + } +} + +``` + + +### 3.2 参数说明 + +* **jdbcUrl** + + * 描述:目的数据库的 JDBC 连接信息。作业运行时,DataX 会在你提供的 jdbcUrl 后面追加如下属性:yearIsDateType=false&zeroDateTimeBehavior=convertToNull&rewriteBatchedStatements=true + + 注意:1、在一个数据库上只能配置一个 jdbcUrl + 2、一个 AdbMySQL 写入任务仅能配置一个 jdbcUrl + 3、jdbcUrl按照MySQL官方规范,并可以填写连接附加控制信息,比如想指定连接编码为 gbk ,则在 jdbcUrl 后面追加属性 useUnicode=true&characterEncoding=gbk。具体请参看 Mysql官方文档或者咨询对应 DBA。 + + * 必选:是
+ + * 默认值:无
+ +* **username** + + * 描述:目的数据库的用户名
+ + * 必选:是
+ + * 默认值:无
+ +* **password** + + * 描述:目的数据库的密码
+ + * 必选:是
+ + * 默认值:无
+ +* **table** + + * 描述:目的表的表名称。只能配置一个 AdbMySQL 的表名称。 + + 注意:table 和 jdbcUrl 必须包含在 connection 配置单元中 + + * 必选:是
+ + * 默认值:无
+ +* **column** + + * 描述:目的表需要写入数据的字段,字段之间用英文逗号分隔。例如: "column": ["id", "name", "age"]。如果要依次写入全部列,使用`*`表示, 例如: `"column": ["*"]`。 + + **column配置项必须指定,不能留空!** + + 注意:1、我们强烈不推荐你这样配置,因为当你目的表字段个数、类型等有改动时,你的任务可能运行不正确或者失败 + 2、 column 不能配置任何常量值 + + * 必选:是
+ + * 默认值:否
+ +* **session** + + * 描述: DataX在获取 ADB MySQL 连接时,执行session指定的SQL语句,修改当前connection session属性 + + * 必须: 否 + + * 默认值: 空 + +* **preSql** + + * 描述:写入数据到目的表前,会先执行这里的标准语句。如果 Sql 中有你需要操作到的表名称,请使用 `@table` 表示,这样在实际执行 SQL 语句时,会对变量按照实际表名称进行替换。比如希望导入数据前,先对表中数据进行删除操作,那么你可以这样配置:`"preSql":["truncate table @table"]`,效果是:在执行到每个表写入数据前,会先执行对应的 `truncate table 对应表名称`
+ + * 必选:否
+ + * 默认值:无
+ +* **postSql** + + * 描述:写入数据到目的表后,会执行这里的标准语句。(原理同 preSql )
+ + * 必选:否
+ + * 默认值:无
+ +* **writeMode** + + * 描述:控制写入数据到目标表采用 `insert into` 或者 `replace into` 或者 `ON DUPLICATE KEY UPDATE` 语句
+ + * 必选:是
+ + * 所有选项:insert/replace/update
+ + * 默认值:replace
+ +* **batchSize** + + * 描述:一次性批量提交的记录数大小,该值可以极大减少DataX与 Adb MySQL 的网络交互次数,并提升整体吞吐量。但是该值设置过大可能会造成DataX运行进程OOM情况。
+ + * 必选:否
+ + * 默认值:2048
+ + +### 3.3 类型转换 + +目前 AdbMysqlWriter 支持大部分 MySQL 类型,但也存在部分个别类型没有支持的情况,请注意检查你的类型。 + +下面列出 AdbMysqlWriter 针对 MySQL 类型转换列表: + +| DataX 内部类型 | AdbMysql 数据类型 | +|---------------|---------------------------------| +| Long | tinyint, smallint, int, bigint | +| Double | float, double, decimal | +| String | varchar | +| Date | date, time, datetime, timestamp | +| Boolean | boolean | +| Bytes | binary | + +## 4 性能报告 + +### 4.1 环境准备 + +#### 4.1.1 数据特征 +TPC-H 数据集 lineitem 表,共 17 个字段, 随机生成总记录行数 59986052。未压缩总数据量:7.3GiB + +建表语句: + + CREATE TABLE `datax_adbmysqlwriter_perf_lineitem` ( + `l_orderkey` bigint NOT NULL COMMENT '', + `l_partkey` int NOT NULL COMMENT '', + `l_suppkey` int NOT NULL COMMENT '', + `l_linenumber` int NOT NULL COMMENT '', + `l_quantity` decimal(15,2) NOT NULL COMMENT '', + `l_extendedprice` decimal(15,2) NOT NULL COMMENT '', + `l_discount` decimal(15,2) NOT NULL COMMENT '', + `l_tax` decimal(15,2) NOT NULL COMMENT '', + `l_returnflag` varchar(1024) NOT NULL COMMENT '', + `l_linestatus` varchar(1024) NOT NULL COMMENT '', + `l_shipdate` date NOT NULL COMMENT '', + `l_commitdate` date NOT NULL COMMENT '', + `l_receiptdate` date NOT NULL COMMENT '', + `l_shipinstruct` varchar(1024) NOT NULL COMMENT '', + `l_shipmode` varchar(1024) NOT NULL COMMENT '', + `l_comment` varchar(1024) NOT NULL COMMENT '', + `dummy` varchar(1024), + PRIMARY KEY (`l_orderkey`, `l_linenumber`) + ) ENGINE=InnoDB DEFAULT CHARSET=utf8 COMMENT='datax perf test'; + +单行记录类似于: + + l_orderkey: 2122789 + l_partkey: 1233571 + l_suppkey: 8608 + l_linenumber: 1 + l_quantity: 35.00 + l_extendedprice: 52657.85 + l_discount: 0.02 + l_tax: 0.07 + l_returnflag: N + l_linestatus: O + l_shipdate: 1996-11-03 + l_commitdate: 1996-12-07 + l_receiptdate: 1996-11-16 + l_shipinstruct: COLLECT COD + l_shipmode: FOB + l_comment: ld, regular theodolites. + dummy: + +#### 4.1.2 机器参数 + +* DataX ECS: 24Core48GB + +* Adb MySQL 数据库 + * 计算资源:16Core64GB(集群版) + * 弹性IO资源:3 + +#### 4.1.3 DataX jvm 参数 + + -Xms1G -Xmx10G -XX:+HeapDumpOnOutOfMemoryError + +### 4.2 测试报告 + +| 通道数 | 批量提交行数 | DataX速度(Rec/s) | DataX流量(MB/s) | 导入用时(s) | +|-----|-------|------------------|---------------|---------| +| 1 | 512 | 23071 | 2.34 | 2627 | +| 1 | 1024 | 26080 | 2.65 | 2346 | +| 1 | 2048 | 28162 | 2.86 | 2153 | +| 1 | 4096 | 28978 | 2.94 | 2119 | +| 4 | 512 | 56590 | 5.74 | 1105 | +| 4 | 1024 | 81062 | 8.22 | 763 | +| 4 | 2048 | 107117 | 10.87 | 605 | +| 4 | 4096 | 113181 | 11.48 | 579 | +| 8 | 512 | 81062 | 8.22 | 786 | +| 8 | 1024 | 127629 | 12.95 | 519 | +| 8 | 2048 | 187456 | 19.01 | 369 | +| 8 | 4096 | 206848 | 20.98 | 341 | +| 16 | 512 | 130404 | 13.23 | 513 | +| 16 | 1024 | 214235 | 21.73 | 335 | +| 16 | 2048 | 299930 | 30.42 | 253 | +| 16 | 4096 | 333255 | 33.80 | 227 | +| 32 | 512 | 206848 | 20.98 | 347 | +| 32 | 1024 | 315716 | 32.02 | 241 | +| 32 | 2048 | 399907 | 40.56 | 199 | +| 32 | 4096 | 461431 | 46.80 | 184 | +| 64 | 512 | 333255 | 33.80 | 231 | +| 64 | 1024 | 399907 | 40.56 | 204 | +| 64 | 2048 | 428471 | 43.46 | 199 | +| 64 | 4096 | 461431 | 46.80 | 187 | +| 128 | 512 | 333255 | 33.80 | 235 | +| 128 | 1024 | 399907 | 40.56 | 203 | +| 128 | 2048 | 425432 | 43.15 | 197 | +| 128 | 4096 | 387006 | 39.26 | 211 | + +说明: + +1. datax 使用 txtfilereader 读取本地文件,避免源端存在性能瓶颈。 + +#### 性能测试小结 +1. channel通道个数和batchSize对性能影响比较大 +2. 通常不建议写入数据库时,通道个数 > 32 + +## 5 约束限制 + +## FAQ + +*** + +**Q: AdbMysqlWriter 执行 postSql 语句报错,那么数据导入到目标数据库了吗?** + +A: DataX 导入过程存在三块逻辑,pre 操作、导入操作、post 操作,其中任意一环报错,DataX 作业报错。由于 DataX 不能保证在同一个事务完成上述几个操作,因此有可能数据已经落入到目标端。 + +*** + +**Q: 按照上述说法,那么有部分脏数据导入数据库,如果影响到线上数据库怎么办?** + +A: 目前有两种解法,第一种配置 pre 语句,该 sql 可以清理当天导入数据, DataX 每次导入时候可以把上次清理干净并导入完整数据。第二种,向临时表导入数据,完成后再 rename 到线上表。 + +*** + +**Q: 上面第二种方法可以避免对线上数据造成影响,那我具体怎样操作?** + +A: 可以配置临时表导入 diff --git a/adbmysqlwriter/pom.xml b/adbmysqlwriter/pom.xml new file mode 100755 index 00000000..6ffcab85 --- /dev/null +++ b/adbmysqlwriter/pom.xml @@ -0,0 +1,79 @@ + + 4.0.0 + + com.alibaba.datax + datax-all + 0.0.1-SNAPSHOT + + adbmysqlwriter + adbmysqlwriter + jar + + + + com.alibaba.datax + datax-common + ${datax-project-version} + + + slf4j-log4j12 + org.slf4j + + + + + org.slf4j + slf4j-api + + + ch.qos.logback + logback-classic + + + + com.alibaba.datax + plugin-rdbms-util + ${datax-project-version} + + + + mysql + mysql-connector-java + 5.1.40 + + + + + + + + maven-compiler-plugin + + ${jdk-version} + ${jdk-version} + ${project-sourceEncoding} + + + + + maven-assembly-plugin + + + src/main/assembly/package.xml + + datax + + + + dwzip + package + + single + + + + + + + diff --git a/adbmysqlwriter/src/main/assembly/package.xml b/adbmysqlwriter/src/main/assembly/package.xml new file mode 100755 index 00000000..7192e531 --- /dev/null +++ b/adbmysqlwriter/src/main/assembly/package.xml @@ -0,0 +1,35 @@ + + + + dir + + false + + + src/main/resources + + plugin.json + plugin_job_template.json + + plugin/writer/adbmysqlwriter + + + target/ + + adbmysqlwriter-0.0.1-SNAPSHOT.jar + + plugin/writer/adbmysqlwriter + + + + + + false + plugin/writer/adbmysqlwriter/libs + runtime + + + diff --git a/adbmysqlwriter/src/main/java/com/alibaba/datax/plugin/writer/adbmysqlwriter/AdbMysqlWriter.java b/adbmysqlwriter/src/main/java/com/alibaba/datax/plugin/writer/adbmysqlwriter/AdbMysqlWriter.java new file mode 100755 index 00000000..762c4934 --- /dev/null +++ b/adbmysqlwriter/src/main/java/com/alibaba/datax/plugin/writer/adbmysqlwriter/AdbMysqlWriter.java @@ -0,0 +1,138 @@ +package com.alibaba.datax.plugin.writer.adbmysqlwriter; + +import com.alibaba.datax.common.element.Record; +import com.alibaba.datax.common.plugin.RecordReceiver; +import com.alibaba.datax.common.spi.Writer; +import com.alibaba.datax.common.util.Configuration; +import com.alibaba.datax.plugin.rdbms.util.DataBaseType; +import com.alibaba.datax.plugin.rdbms.writer.CommonRdbmsWriter; +import com.alibaba.datax.plugin.rdbms.writer.Key; +import org.apache.commons.lang3.StringUtils; + +import java.sql.Connection; +import java.sql.SQLException; +import java.util.List; + +public class AdbMysqlWriter extends Writer { + private static final DataBaseType DATABASE_TYPE = DataBaseType.ADB; + + public static class Job extends Writer.Job { + private Configuration originalConfig = null; + private CommonRdbmsWriter.Job commonRdbmsWriterJob; + + @Override + public void preCheck(){ + this.init(); + this.commonRdbmsWriterJob.writerPreCheck(this.originalConfig, DATABASE_TYPE); + } + + @Override + public void init() { + this.originalConfig = super.getPluginJobConf(); + this.commonRdbmsWriterJob = new CommonRdbmsWriter.Job(DATABASE_TYPE); + this.commonRdbmsWriterJob.init(this.originalConfig); + } + + // 一般来说,是需要推迟到 task 中进行pre 的执行(单表情况例外) + @Override + public void prepare() { + //实跑先不支持 权限 检验 + //this.commonRdbmsWriterJob.privilegeValid(this.originalConfig, DATABASE_TYPE); + this.commonRdbmsWriterJob.prepare(this.originalConfig); + } + + @Override + public List split(int mandatoryNumber) { + return this.commonRdbmsWriterJob.split(this.originalConfig, mandatoryNumber); + } + + // 一般来说,是需要推迟到 task 中进行post 的执行(单表情况例外) + @Override + public void post() { + this.commonRdbmsWriterJob.post(this.originalConfig); + } + + @Override + public void destroy() { + this.commonRdbmsWriterJob.destroy(this.originalConfig); + } + + } + + public static class Task extends Writer.Task { + + private Configuration writerSliceConfig; + private CommonRdbmsWriter.Task commonRdbmsWriterTask; + + public static class DelegateClass extends CommonRdbmsWriter.Task { + private long writeTime = 0L; + private long writeCount = 0L; + private long lastLogTime = 0; + + public DelegateClass(DataBaseType dataBaseType) { + super(dataBaseType); + } + + @Override + protected void doBatchInsert(Connection connection, List buffer) + throws SQLException { + long startTime = System.currentTimeMillis(); + + super.doBatchInsert(connection, buffer); + + writeCount = writeCount + buffer.size(); + writeTime = writeTime + (System.currentTimeMillis() - startTime); + + // log write metrics every 10 seconds + if (System.currentTimeMillis() - lastLogTime > 10000) { + lastLogTime = System.currentTimeMillis(); + logTotalMetrics(); + } + } + + public void logTotalMetrics() { + LOG.info(Thread.currentThread().getName() + ", AdbMySQL writer take " + writeTime + " ms, write " + writeCount + " records."); + } + } + + @Override + public void init() { + this.writerSliceConfig = super.getPluginJobConf(); + + if (StringUtils.isBlank(this.writerSliceConfig.getString(Key.WRITE_MODE))) { + this.writerSliceConfig.set(Key.WRITE_MODE, "REPLACE"); + } + + this.commonRdbmsWriterTask = new DelegateClass(DATABASE_TYPE); + this.commonRdbmsWriterTask.init(this.writerSliceConfig); + } + + @Override + public void prepare() { + this.commonRdbmsWriterTask.prepare(this.writerSliceConfig); + } + + //TODO 改用连接池,确保每次获取的连接都是可用的(注意:连接可能需要每次都初始化其 session) + public void startWrite(RecordReceiver recordReceiver) { + this.commonRdbmsWriterTask.startWrite(recordReceiver, this.writerSliceConfig, + super.getTaskPluginCollector()); + } + + @Override + public void post() { + this.commonRdbmsWriterTask.post(this.writerSliceConfig); + } + + @Override + public void destroy() { + this.commonRdbmsWriterTask.destroy(this.writerSliceConfig); + } + + @Override + public boolean supportFailOver(){ + String writeMode = writerSliceConfig.getString(Key.WRITE_MODE); + return "replace".equalsIgnoreCase(writeMode); + } + + } +} diff --git a/adbmysqlwriter/src/main/resources/plugin.json b/adbmysqlwriter/src/main/resources/plugin.json new file mode 100755 index 00000000..58c69533 --- /dev/null +++ b/adbmysqlwriter/src/main/resources/plugin.json @@ -0,0 +1,6 @@ +{ + "name": "adbmysqlwriter", + "class": "com.alibaba.datax.plugin.writer.adbmysqlwriter.AdbMysqlWriter", + "description": "useScene: prod. mechanism: Jdbc connection using the database, execute insert sql. warn: The more you know about the database, the less problems you encounter.", + "developer": "alibaba" +} \ No newline at end of file diff --git a/adbmysqlwriter/src/main/resources/plugin_job_template.json b/adbmysqlwriter/src/main/resources/plugin_job_template.json new file mode 100644 index 00000000..9537ee5a --- /dev/null +++ b/adbmysqlwriter/src/main/resources/plugin_job_template.json @@ -0,0 +1,20 @@ +{ + "name": "adbmysqlwriter", + "parameter": { + "username": "username", + "password": "password", + "column": ["col1", "col2", "col3"], + "connection": [ + { + "jdbcUrl": "jdbc:mysql://:[/]", + "table": ["table1", "table2"] + } + ], + "preSql": [], + "postSql": [], + "batchSize": 65536, + "batchByteSize": 134217728, + "dryRun": false, + "writeMode": "insert" + } +} \ No newline at end of file diff --git a/adbpgwriter/src/main/doc/adbpgwriter.md b/adbpgwriter/src/main/doc/adbpgwriter.md index 80427241..6d3857bc 100644 --- a/adbpgwriter/src/main/doc/adbpgwriter.md +++ b/adbpgwriter/src/main/doc/adbpgwriter.md @@ -65,9 +65,9 @@ COPY命令将数据写入ADB PG数据库中。 "writer": { "name": "adbpgwriter", "parameter": { - "username": "username", - "password": "password", - "host": "host", + "username": "", + "password": "", + "host": "127.0.0.1", "port": "1234", "database": "database", "schema": "schema", diff --git a/adswriter/doc/adswriter.md b/adswriter/doc/adswriter.md index 4a0fd961..c02f8018 100644 --- a/adswriter/doc/adswriter.md +++ b/adswriter/doc/adswriter.md @@ -110,7 +110,6 @@ DataX 将数据直连ADS接口,利用ADS暴露的INSERT接口直写到ADS。 "account": "xxx@aliyun.com", "odpsServer": "xxx", "tunnelServer": "xxx", - "accountType": "aliyun", "project": "transfer_project" }, "writeMode": "load", diff --git a/adswriter/src/main/java/com/alibaba/datax/plugin/writer/adswriter/insert/AdsClientProxy.java b/adswriter/src/main/java/com/alibaba/datax/plugin/writer/adswriter/insert/AdsClientProxy.java index 8fdc70d6..326b464d 100644 --- a/adswriter/src/main/java/com/alibaba/datax/plugin/writer/adswriter/insert/AdsClientProxy.java +++ b/adswriter/src/main/java/com/alibaba/datax/plugin/writer/adswriter/insert/AdsClientProxy.java @@ -18,7 +18,7 @@ import com.alibaba.datax.plugin.writer.adswriter.AdsWriterErrorCode; import com.alibaba.datax.plugin.writer.adswriter.ads.TableInfo; import com.alibaba.datax.plugin.writer.adswriter.util.Constant; import com.alibaba.datax.plugin.writer.adswriter.util.Key; -import com.alibaba.fastjson.JSON; +import com.alibaba.fastjson2.JSON; import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.tuple.Pair; import org.slf4j.Logger; diff --git a/adswriter/src/main/java/com/alibaba/datax/plugin/writer/adswriter/load/TransferProjectConf.java b/adswriter/src/main/java/com/alibaba/datax/plugin/writer/adswriter/load/TransferProjectConf.java index bff4b7b9..3d28a833 100644 --- a/adswriter/src/main/java/com/alibaba/datax/plugin/writer/adswriter/load/TransferProjectConf.java +++ b/adswriter/src/main/java/com/alibaba/datax/plugin/writer/adswriter/load/TransferProjectConf.java @@ -12,7 +12,6 @@ public class TransferProjectConf { public final static String KEY_ACCOUNT = "odps.account"; public final static String KEY_ODPS_SERVER = "odps.odpsServer"; public final static String KEY_ODPS_TUNNEL = "odps.tunnelServer"; - public final static String KEY_ACCOUNT_TYPE = "odps.accountType"; public final static String KEY_PROJECT = "odps.project"; private String accessId; @@ -20,7 +19,6 @@ public class TransferProjectConf { private String account; private String odpsServer; private String odpsTunnel; - private String accountType; private String project; public static TransferProjectConf create(Configuration adsWriterConf) { @@ -30,7 +28,6 @@ public class TransferProjectConf { res.account = adsWriterConf.getString(KEY_ACCOUNT); res.odpsServer = adsWriterConf.getString(KEY_ODPS_SERVER); res.odpsTunnel = adsWriterConf.getString(KEY_ODPS_TUNNEL); - res.accountType = adsWriterConf.getString(KEY_ACCOUNT_TYPE, "aliyun"); res.project = adsWriterConf.getString(KEY_PROJECT); return res; } @@ -55,9 +52,6 @@ public class TransferProjectConf { return odpsTunnel; } - public String getAccountType() { - return accountType; - } public String getProject() { return project; diff --git a/adswriter/src/main/java/com/alibaba/datax/plugin/writer/adswriter/odps/DataType.java b/adswriter/src/main/java/com/alibaba/datax/plugin/writer/adswriter/odps/DataType.java index 595b1dfd..f625336e 100644 --- a/adswriter/src/main/java/com/alibaba/datax/plugin/writer/adswriter/odps/DataType.java +++ b/adswriter/src/main/java/com/alibaba/datax/plugin/writer/adswriter/odps/DataType.java @@ -70,7 +70,7 @@ public class DataType { } else if ("datetime".equals(type)) { return DATETIME; } else { - throw new IllegalArgumentException("unkown type: " + type); + throw new IllegalArgumentException("unknown type: " + type); } } diff --git a/cassandrareader/src/main/java/com/alibaba/datax/plugin/reader/cassandrareader/CassandraReaderHelper.java b/cassandrareader/src/main/java/com/alibaba/datax/plugin/reader/cassandrareader/CassandraReaderHelper.java index 0a4e83fa..f5937c2f 100644 --- a/cassandrareader/src/main/java/com/alibaba/datax/plugin/reader/cassandrareader/CassandraReaderHelper.java +++ b/cassandrareader/src/main/java/com/alibaba/datax/plugin/reader/cassandrareader/CassandraReaderHelper.java @@ -23,7 +23,7 @@ import com.alibaba.datax.common.element.StringColumn; import com.alibaba.datax.common.exception.DataXException; import com.alibaba.datax.common.plugin.TaskPluginCollector; import com.alibaba.datax.common.util.Configuration; -import com.alibaba.fastjson.JSON; +import com.alibaba.fastjson2.JSON; import com.datastax.driver.core.Cluster; import com.datastax.driver.core.CodecRegistry; @@ -298,6 +298,7 @@ public class CassandraReaderHelper { record.addColumn(new LongColumn(rs.getInt(i))); break; + case COUNTER: case BIGINT: record.addColumn(new LongColumn(rs.getLong(i))); break; @@ -558,26 +559,6 @@ public class CassandraReaderHelper { String.format( "配置信息有错误.列信息中需要包含'%s'字段 .",Key.COLUMN_NAME)); } - if( name.startsWith(Key.WRITE_TIME) ) { - String colName = name.substring(Key.WRITE_TIME.length(),name.length() - 1 ); - ColumnMetadata col = tableMetadata.getColumn(colName); - if( col == null ) { - throw DataXException - .asDataXException( - CassandraReaderErrorCode.CONF_ERROR, - String.format( - "配置信息有错误.列'%s'不存在 .",colName)); - } - } else { - ColumnMetadata col = tableMetadata.getColumn(name); - if( col == null ) { - throw DataXException - .asDataXException( - CassandraReaderErrorCode.CONF_ERROR, - String.format( - "配置信息有错误.列'%s'不存在 .",name)); - } - } } } diff --git a/cassandrawriter/src/main/java/com/alibaba/datax/plugin/writer/cassandrawriter/CassandraWriterHelper.java b/cassandrawriter/src/main/java/com/alibaba/datax/plugin/writer/cassandrawriter/CassandraWriterHelper.java index b68af281..5ac392b7 100644 --- a/cassandrawriter/src/main/java/com/alibaba/datax/plugin/writer/cassandrawriter/CassandraWriterHelper.java +++ b/cassandrawriter/src/main/java/com/alibaba/datax/plugin/writer/cassandrawriter/CassandraWriterHelper.java @@ -18,10 +18,10 @@ import java.util.UUID; import com.alibaba.datax.common.element.Column; import com.alibaba.datax.common.exception.DataXException; -import com.alibaba.fastjson.JSON; -import com.alibaba.fastjson.JSONArray; -import com.alibaba.fastjson.JSONException; -import com.alibaba.fastjson.JSONObject; +import com.alibaba.fastjson2.JSON; +import com.alibaba.fastjson2.JSONArray; +import com.alibaba.fastjson2.JSONException; +import com.alibaba.fastjson2.JSONObject; import com.datastax.driver.core.BoundStatement; import com.datastax.driver.core.CodecRegistry; @@ -204,7 +204,7 @@ public class CassandraWriterHelper { case MAP: { Map m = new HashMap(); - for (JSONObject.Entry e : ((JSONObject)jsonObject).entrySet()) { + for (Map.Entry e : ((JSONObject)jsonObject).entrySet()) { Object k = parseFromString((String) e.getKey(), type.getTypeArguments().get(0)); Object v = parseFromJson(e.getValue(), type.getTypeArguments().get(1)); m.put(k,v); @@ -233,7 +233,7 @@ public class CassandraWriterHelper { case UDT: { UDTValue t = ((UserType) type).newValue(); UserType userType = t.getType(); - for (JSONObject.Entry e : ((JSONObject)jsonObject).entrySet()) { + for (Map.Entry e : ((JSONObject)jsonObject).entrySet()) { DataType eleType = userType.getFieldType((String)e.getKey()); t.set((String)e.getKey(), parseFromJson(e.getValue(), eleType), registry.codecFor(eleType).getJavaType()); } diff --git a/clickhousewriter/src/main/java/com/alibaba/datax/plugin/writer/clickhousewriter/ClickhouseWriter.java b/clickhousewriter/src/main/java/com/alibaba/datax/plugin/writer/clickhousewriter/ClickhouseWriter.java index b928d421..83c421ee 100644 --- a/clickhousewriter/src/main/java/com/alibaba/datax/plugin/writer/clickhousewriter/ClickhouseWriter.java +++ b/clickhousewriter/src/main/java/com/alibaba/datax/plugin/writer/clickhousewriter/ClickhouseWriter.java @@ -10,8 +10,8 @@ import com.alibaba.datax.common.util.Configuration; import com.alibaba.datax.plugin.rdbms.util.DBUtilErrorCode; import com.alibaba.datax.plugin.rdbms.util.DataBaseType; import com.alibaba.datax.plugin.rdbms.writer.CommonRdbmsWriter; -import com.alibaba.fastjson.JSON; -import com.alibaba.fastjson.JSONArray; +import com.alibaba.fastjson2.JSON; +import com.alibaba.fastjson2.JSONArray; import java.sql.Array; import java.sql.Connection; @@ -68,7 +68,7 @@ public class ClickhouseWriter extends Writer { this.commonRdbmsWriterSlave = new CommonRdbmsWriter.Task(DATABASE_TYPE) { @Override - protected PreparedStatement fillPreparedStatementColumnType(PreparedStatement preparedStatement, int columnIndex, int columnSqltype, Column column) throws SQLException { + protected PreparedStatement fillPreparedStatementColumnType(PreparedStatement preparedStatement, int columnIndex, int columnSqltype, String typeName, Column column) throws SQLException { try { if (column.getRawData() == null) { preparedStatement.setNull(columnIndex + 1, columnSqltype); diff --git a/clickhousewriter/src/main/resources/plugin.json b/clickhousewriter/src/main/resources/plugin.json index ff1acf01..d70e2b1d 100755 --- a/clickhousewriter/src/main/resources/plugin.json +++ b/clickhousewriter/src/main/resources/plugin.json @@ -2,5 +2,5 @@ "name": "clickhousewriter", "class": "com.alibaba.datax.plugin.writer.clickhousewriter.ClickhouseWriter", "description": "useScene: prod. mechanism: Jdbc connection using the database, execute insert sql.", - "developer": "jiye.tjy" + "developer": "alibaba" } \ No newline at end of file diff --git a/common/pom.xml b/common/pom.xml index 1a57cccd..59d7073d 100755 --- a/common/pom.xml +++ b/common/pom.xml @@ -17,8 +17,8 @@ commons-lang3 - com.alibaba - fastjson + com.alibaba.fastjson2 + fastjson2 commons-io @@ -61,6 +61,14 @@ + + + src/main/java + + **/*.properties + + + maven-compiler-plugin diff --git a/common/src/main/java/com/alibaba/datax/common/element/BoolColumn.java b/common/src/main/java/com/alibaba/datax/common/element/BoolColumn.java index 7699e152..0978074b 100755 --- a/common/src/main/java/com/alibaba/datax/common/element/BoolColumn.java +++ b/common/src/main/java/com/alibaba/datax/common/element/BoolColumn.java @@ -92,7 +92,13 @@ public class BoolColumn extends Column { throw DataXException.asDataXException( CommonErrorCode.CONVERT_NOT_SUPPORT, "Bool类型不能转为Date ."); } - + + @Override + public Date asDate(String dateFormat) { + throw DataXException.asDataXException( + CommonErrorCode.CONVERT_NOT_SUPPORT, "Bool类型不能转为Date ."); + } + @Override public byte[] asBytes() { throw DataXException.asDataXException( diff --git a/common/src/main/java/com/alibaba/datax/common/element/BytesColumn.java b/common/src/main/java/com/alibaba/datax/common/element/BytesColumn.java index d3cc5993..bc1eeb79 100755 --- a/common/src/main/java/com/alibaba/datax/common/element/BytesColumn.java +++ b/common/src/main/java/com/alibaba/datax/common/element/BytesColumn.java @@ -75,6 +75,12 @@ public class BytesColumn extends Column { throw DataXException.asDataXException( CommonErrorCode.CONVERT_NOT_SUPPORT, "Bytes类型不能转为Date ."); } + + @Override + public Date asDate(String dateFormat) { + throw DataXException.asDataXException( + CommonErrorCode.CONVERT_NOT_SUPPORT, "Bytes类型不能转为Date ."); + } @Override public Boolean asBoolean() { diff --git a/common/src/main/java/com/alibaba/datax/common/element/Column.java b/common/src/main/java/com/alibaba/datax/common/element/Column.java index ed68e88d..13cfc7de 100755 --- a/common/src/main/java/com/alibaba/datax/common/element/Column.java +++ b/common/src/main/java/com/alibaba/datax/common/element/Column.java @@ -1,6 +1,6 @@ package com.alibaba.datax.common.element; -import com.alibaba.fastjson.JSON; +import com.alibaba.fastjson2.JSON; import java.math.BigDecimal; import java.math.BigInteger; @@ -55,6 +55,8 @@ public abstract class Column { public abstract String asString(); public abstract Date asDate(); + + public abstract Date asDate(String dateFormat); public abstract byte[] asBytes(); diff --git a/common/src/main/java/com/alibaba/datax/common/element/ColumnCast.java b/common/src/main/java/com/alibaba/datax/common/element/ColumnCast.java index 89d0a7c6..85d62ecc 100755 --- a/common/src/main/java/com/alibaba/datax/common/element/ColumnCast.java +++ b/common/src/main/java/com/alibaba/datax/common/element/ColumnCast.java @@ -22,6 +22,11 @@ public final class ColumnCast { throws ParseException { return StringCast.asDate(column); } + + public static Date string2Date(final StringColumn column, String dateFormat) + throws ParseException { + return StringCast.asDate(column, dateFormat); + } public static byte[] string2Bytes(final StringColumn column) throws UnsupportedEncodingException { @@ -115,6 +120,16 @@ class StringCast { } throw e; } + + static Date asDate(final StringColumn column, String dateFormat) throws ParseException { + ParseException e; + try { + return FastDateFormat.getInstance(dateFormat, StringCast.timeZoner).parse(column.asString()); + } catch (ParseException ignored) { + e = ignored; + } + throw e; + } static byte[] asBytes(final StringColumn column) throws UnsupportedEncodingException { diff --git a/common/src/main/java/com/alibaba/datax/common/element/DateColumn.java b/common/src/main/java/com/alibaba/datax/common/element/DateColumn.java index 6626a6fb..f688d163 100755 --- a/common/src/main/java/com/alibaba/datax/common/element/DateColumn.java +++ b/common/src/main/java/com/alibaba/datax/common/element/DateColumn.java @@ -89,6 +89,11 @@ public class DateColumn extends Column { return new Date((Long)this.getRawData()); } + + @Override + public Date asDate(String dateFormat) { + return asDate(); + } @Override public byte[] asBytes() { diff --git a/common/src/main/java/com/alibaba/datax/common/element/DoubleColumn.java b/common/src/main/java/com/alibaba/datax/common/element/DoubleColumn.java index 17170ea6..915bd8ef 100755 --- a/common/src/main/java/com/alibaba/datax/common/element/DoubleColumn.java +++ b/common/src/main/java/com/alibaba/datax/common/element/DoubleColumn.java @@ -132,6 +132,12 @@ public class DoubleColumn extends Column { throw DataXException.asDataXException( CommonErrorCode.CONVERT_NOT_SUPPORT, "Double类型无法转为Date类型 ."); } + + @Override + public Date asDate(String dateFormat) { + throw DataXException.asDataXException( + CommonErrorCode.CONVERT_NOT_SUPPORT, "Double类型无法转为Date类型 ."); + } @Override public byte[] asBytes() { diff --git a/common/src/main/java/com/alibaba/datax/common/element/LongColumn.java b/common/src/main/java/com/alibaba/datax/common/element/LongColumn.java index d8113f7c..e0f8d865 100755 --- a/common/src/main/java/com/alibaba/datax/common/element/LongColumn.java +++ b/common/src/main/java/com/alibaba/datax/common/element/LongColumn.java @@ -125,6 +125,11 @@ public class LongColumn extends Column { } return new Date(this.asLong()); } + + @Override + public Date asDate(String dateFormat) { + return this.asDate(); + } @Override public byte[] asBytes() { diff --git a/common/src/main/java/com/alibaba/datax/common/element/Record.java b/common/src/main/java/com/alibaba/datax/common/element/Record.java index d06d80aa..7abf45dd 100755 --- a/common/src/main/java/com/alibaba/datax/common/element/Record.java +++ b/common/src/main/java/com/alibaba/datax/common/element/Record.java @@ -1,5 +1,7 @@ package com.alibaba.datax.common.element; +import java.util.Map; + /** * Created by jingxing on 14-8-24. */ @@ -20,4 +22,8 @@ public interface Record { public int getMemorySize(); + public void setMeta(Map meta); + + public Map getMeta(); + } diff --git a/common/src/main/java/com/alibaba/datax/common/element/StringColumn.java b/common/src/main/java/com/alibaba/datax/common/element/StringColumn.java index 11209f46..c1e7a84e 100755 --- a/common/src/main/java/com/alibaba/datax/common/element/StringColumn.java +++ b/common/src/main/java/com/alibaba/datax/common/element/StringColumn.java @@ -149,6 +149,16 @@ public class StringColumn extends Column { String.format("String[\"%s\"]不能转为Date .", this.asString())); } } + + @Override + public Date asDate(String dateFormat) { + try { + return ColumnCast.string2Date(this, dateFormat); + } catch (Exception e) { + throw DataXException.asDataXException(CommonErrorCode.CONVERT_NOT_SUPPORT, + String.format("String[\"%s\"]不能转为Date .", this.asString())); + } + } @Override public byte[] asBytes() { diff --git a/common/src/main/java/com/alibaba/datax/common/exception/DataXException.java b/common/src/main/java/com/alibaba/datax/common/exception/DataXException.java index f360e699..09d00adc 100755 --- a/common/src/main/java/com/alibaba/datax/common/exception/DataXException.java +++ b/common/src/main/java/com/alibaba/datax/common/exception/DataXException.java @@ -16,6 +16,10 @@ public class DataXException extends RuntimeException { this.errorCode = errorCode; } + public DataXException(String errorMessage) { + super(errorMessage); + } + private DataXException(ErrorCode errorCode, String errorMessage, Throwable cause) { super(errorCode.toString() + " - " + getMessage(errorMessage) + " - " + getMessage(cause), cause); @@ -26,6 +30,10 @@ public class DataXException extends RuntimeException { return new DataXException(errorCode, message); } + public static DataXException asDataXException(String message) { + return new DataXException(message); + } + public static DataXException asDataXException(ErrorCode errorCode, String message, Throwable cause) { if (cause instanceof DataXException) { return (DataXException) cause; diff --git a/common/src/main/java/com/alibaba/datax/common/plugin/AbstractPlugin.java b/common/src/main/java/com/alibaba/datax/common/plugin/AbstractPlugin.java index 184ee89e..0323a976 100755 --- a/common/src/main/java/com/alibaba/datax/common/plugin/AbstractPlugin.java +++ b/common/src/main/java/com/alibaba/datax/common/plugin/AbstractPlugin.java @@ -3,6 +3,8 @@ package com.alibaba.datax.common.plugin; import com.alibaba.datax.common.base.BaseObject; import com.alibaba.datax.common.util.Configuration; +import java.util.List; + public abstract class AbstractPlugin extends BaseObject implements Pluginable { //作业的config private Configuration pluginJobConf; @@ -15,6 +17,8 @@ public abstract class AbstractPlugin extends BaseObject implements Pluginable { private String peerPluginName; + private List readerPluginSplitConf; + @Override public String getPluginName() { assert null != this.pluginConf; @@ -84,4 +88,12 @@ public abstract class AbstractPlugin extends BaseObject implements Pluginable { public void postHandler(Configuration jobConfiguration){ } + + public List getReaderPluginSplitConf(){ + return this.readerPluginSplitConf; + } + + public void setReaderPluginSplitConf(List readerPluginSplitConf){ + this.readerPluginSplitConf = readerPluginSplitConf; + } } diff --git a/common/src/main/java/com/alibaba/datax/common/statistics/PerfTrace.java b/common/src/main/java/com/alibaba/datax/common/statistics/PerfTrace.java index ea9aa421..cf0457bc 100644 --- a/common/src/main/java/com/alibaba/datax/common/statistics/PerfTrace.java +++ b/common/src/main/java/com/alibaba/datax/common/statistics/PerfTrace.java @@ -31,7 +31,6 @@ public class PerfTrace { private int taskGroupId; private int channelNumber; - private int priority; private int batchSize = 500; private volatile boolean perfReportEnable = true; @@ -54,12 +53,12 @@ public class PerfTrace { * @param taskGroupId * @return */ - public static PerfTrace getInstance(boolean isJob, long jobId, int taskGroupId, int priority, boolean enable) { + public static PerfTrace getInstance(boolean isJob, long jobId, int taskGroupId, boolean enable) { if (instance == null) { synchronized (lock) { if (instance == null) { - instance = new PerfTrace(isJob, jobId, taskGroupId, priority, enable); + instance = new PerfTrace(isJob, jobId, taskGroupId, enable); } } } @@ -76,22 +75,21 @@ public class PerfTrace { LOG.error("PerfTrace instance not be init! must have some error! "); synchronized (lock) { if (instance == null) { - instance = new PerfTrace(false, -1111, -1111, 0, false); + instance = new PerfTrace(false, -1111, -1111, false); } } } return instance; } - private PerfTrace(boolean isJob, long jobId, int taskGroupId, int priority, boolean enable) { + private PerfTrace(boolean isJob, long jobId, int taskGroupId, boolean enable) { try { this.perfTraceId = isJob ? "job_" + jobId : String.format("taskGroup_%s_%s", jobId, taskGroupId); this.enable = enable; this.isJob = isJob; this.taskGroupId = taskGroupId; this.instId = jobId; - this.priority = priority; - LOG.info(String.format("PerfTrace traceId=%s, isEnable=%s, priority=%s", this.perfTraceId, this.enable, this.priority)); + LOG.info(String.format("PerfTrace traceId=%s, isEnable=%s", this.perfTraceId, this.enable)); } catch (Exception e) { // do nothing @@ -398,7 +396,6 @@ public class PerfTrace { jdo.setWindowEnd(this.windowEnd); jdo.setJobStartTime(jobStartTime); jdo.setJobRunTimeMs(System.currentTimeMillis() - jobStartTime.getTime()); - jdo.setJobPriority(this.priority); jdo.setChannelNum(this.channelNumber); jdo.setCluster(this.cluster); jdo.setJobDomain(this.jobDomain); @@ -609,7 +606,6 @@ public class PerfTrace { private Date jobStartTime; private Date jobEndTime; private Long jobRunTimeMs; - private Integer jobPriority; private Integer channelNum; private String cluster; private String jobDomain; @@ -680,10 +676,6 @@ public class PerfTrace { return jobRunTimeMs; } - public Integer getJobPriority() { - return jobPriority; - } - public Integer getChannelNum() { return channelNum; } @@ -816,10 +808,6 @@ public class PerfTrace { this.jobRunTimeMs = jobRunTimeMs; } - public void setJobPriority(Integer jobPriority) { - this.jobPriority = jobPriority; - } - public void setChannelNum(Integer channelNum) { this.channelNum = channelNum; } diff --git a/common/src/main/java/com/alibaba/datax/common/util/Configuration.java b/common/src/main/java/com/alibaba/datax/common/util/Configuration.java index f570dd00..c1194532 100755 --- a/common/src/main/java/com/alibaba/datax/common/util/Configuration.java +++ b/common/src/main/java/com/alibaba/datax/common/util/Configuration.java @@ -3,8 +3,8 @@ package com.alibaba.datax.common.util; import com.alibaba.datax.common.exception.CommonErrorCode; import com.alibaba.datax.common.exception.DataXException; import com.alibaba.datax.common.spi.ErrorCode; -import com.alibaba.fastjson.JSON; -import com.alibaba.fastjson.serializer.SerializerFeature; +import com.alibaba.fastjson2.JSON; +import com.alibaba.fastjson2.JSONWriter; import org.apache.commons.io.IOUtils; import org.apache.commons.lang3.CharUtils; import org.apache.commons.lang3.StringUtils; @@ -411,6 +411,15 @@ public class Configuration { return list; } + public List getListWithJson(final String path, Class t) { + Object object = this.get(path, List.class); + if (null == object) { + return null; + } + + return JSON.parseArray(JSON.toJSONString(object),t); + } + /** * 根据用户提供的json path,寻址List对象,如果对象不存在,返回null */ @@ -577,7 +586,7 @@ public class Configuration { */ public String beautify() { return JSON.toJSONString(this.getInternal(), - SerializerFeature.PrettyFormat); + JSONWriter.Feature.PrettyFormat); } /** diff --git a/common/src/main/java/com/alibaba/datax/common/util/ConfigurationUtil.java b/common/src/main/java/com/alibaba/datax/common/util/ConfigurationUtil.java new file mode 100644 index 00000000..e5e07547 --- /dev/null +++ b/common/src/main/java/com/alibaba/datax/common/util/ConfigurationUtil.java @@ -0,0 +1,37 @@ +package com.alibaba.datax.common.util; + +import java.util.Arrays; +import java.util.List; +import java.util.Set; + +import org.apache.commons.lang3.StringUtils; + +public class ConfigurationUtil { + private static final List SENSITIVE_KEYS = Arrays.asList("password", "accessKey", "securityToken", + "AccessKeyId", "AccessKeySecert", "AccessKeySecret", "clientPassword"); + + public static Configuration filterSensitive(Configuration origin) { + // shell 任务configuration metric 可能为null。 + if (origin == null) { + return origin; + } + // 确保不影响入参的对象 + Configuration configuration = origin.clone(); + Set keys = configuration.getKeys(); + for (final String key : keys) { + boolean isSensitive = false; + for (String sensitiveKey : SENSITIVE_KEYS) { + if (StringUtils.endsWithIgnoreCase(key, sensitiveKey)) { + isSensitive = true; + break; + } + } + + if (isSensitive && configuration.get(key) instanceof String) { + configuration.set(key, configuration.getString(key).replaceAll(".", "*")); + } + + } + return configuration; + } +} \ No newline at end of file diff --git a/odpsreader/src/main/java/com/alibaba/datax/plugin/reader/odpsreader/util/DESCipher.java b/common/src/main/java/com/alibaba/datax/common/util/DESCipher.java old mode 100644 new mode 100755 similarity index 60% rename from odpsreader/src/main/java/com/alibaba/datax/plugin/reader/odpsreader/util/DESCipher.java rename to common/src/main/java/com/alibaba/datax/common/util/DESCipher.java index 82e97191..0692a7b3 --- a/odpsreader/src/main/java/com/alibaba/datax/plugin/reader/odpsreader/util/DESCipher.java +++ b/common/src/main/java/com/alibaba/datax/common/util/DESCipher.java @@ -1,5 +1,5 @@ /** - * (C) 2010-2014 Alibaba Group Holding Limited. + * (C) 2010-2022 Alibaba Group Holding Limited. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,342 +14,216 @@ * limitations under the License. */ -package com.alibaba.datax.plugin.reader.odpsreader.util; +package com.alibaba.datax.common.util; import javax.crypto.Cipher; import javax.crypto.SecretKey; import javax.crypto.SecretKeyFactory; import javax.crypto.spec.DESKeySpec; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + import java.security.SecureRandom; /** - *   * DES加解密,支持与delphi交互(字符串编码需统一为UTF-8) - * - *   * - * - *   * @author wym - * - *    + * DES加解密,支持与delphi交互(字符串编码需统一为UTF-8) + * 将这个工具类抽取到 common 中,方便后续代码复用 */ - public class DESCipher { - + private static Logger LOGGER = LoggerFactory.getLogger(DESCipher.class); /** - *   * 密钥 - * - *    + * 密钥 */ - - public static final String KEY = "DESDES"; - + public static final String KEY = ""; private final static String DES = "DES"; /** - *   * 加密 - * - *   * - * - *   * @param src - * - *   * 明文(字节) - * - *   * @param key - * - *   * 密钥,长度必须是8的倍数 - * - *   * @return 密文(字节) - * - *   * @throws Exception - * - *    + * 加密 + * @param src 明文(字节) + * @param key 密钥,长度必须是8的倍数 + * @return 密文(字节) + * @throws Exception */ - public static byte[] encrypt(byte[] src, byte[] key) throws Exception { - // DES算法要求有一个可信任的随机数源 - SecureRandom sr = new SecureRandom(); - + // 从原始密匙数据创建DESKeySpec对象 - DESKeySpec dks = new DESKeySpec(key); - + // 创建一个密匙工厂,然后用它把DESKeySpec转换成 - // 一个SecretKey对象 - SecretKeyFactory keyFactory = SecretKeyFactory.getInstance(DES); - SecretKey securekey = keyFactory.generateSecret(dks); - + // Cipher对象实际完成加密操作 - Cipher cipher = Cipher.getInstance(DES); // 用密匙初始化Cipher对象 - cipher.init(Cipher.ENCRYPT_MODE, securekey, sr); // 现在,获取数据并加密 - // 正式执行加密操作 - return cipher.doFinal(src); - } /** - *   * 解密 - * - *   * - * - *   * @param src - * - *   * 密文(字节) - * - *   * @param key - * - *   * 密钥,长度必须是8的倍数 - * - *   * @return 明文(字节) - * - *   * @throws Exception - * - *    + * * 解密 + * * @param src + * * 密文(字节) + * * @param key + * * 密钥,长度必须是8的倍数 + * * @return 明文(字节) + * * @throws Exception */ - public static byte[] decrypt(byte[] src, byte[] key) throws Exception { - // DES算法要求有一个可信任的随机数源 - SecureRandom sr = new SecureRandom(); // 从原始密匙数据创建一个DESKeySpec对象 - DESKeySpec dks = new DESKeySpec(key); // 创建一个密匙工厂,然后用它把DESKeySpec对象转换成 - // 一个SecretKey对象 - SecretKeyFactory keyFactory = SecretKeyFactory.getInstance(DES); - SecretKey securekey = keyFactory.generateSecret(dks); // Cipher对象实际完成解密操作 - Cipher cipher = Cipher.getInstance(DES); // 用密匙初始化Cipher对象 - cipher.init(Cipher.DECRYPT_MODE, securekey, sr); // 现在,获取数据并解密 - // 正式执行解密操作 - return cipher.doFinal(src); - } /** - *   * 加密 - * - *   * - * - *   * @param src - * - *   * 明文(字节) - * - *   * @return 密文(字节) - * - *   * @throws Exception - * - *    + * 加密 + * @param src * 明文(字节) + * @return 密文(字节) + * @throws Exception */ - public static byte[] encrypt(byte[] src) throws Exception { - return encrypt(src, KEY.getBytes()); - } /** - *   * 解密 - * - *   * - * - *   * @param src - * - *   * 密文(字节) - * - *   * @return 明文(字节) - * - *   * @throws Exception - * - *    + * 解密 + * @param src 密文(字节) + * @return 明文(字节) + * @throws Exception */ - public static byte[] decrypt(byte[] src) throws Exception { - return decrypt(src, KEY.getBytes()); - } /** - *   * 加密 - * - *   * - * - *   * @param src - * - *   * 明文(字符串) - * - *   * @return 密文(16进制字符串) - * - *   * @throws Exception - * - *    + * 加密 + * @param src 明文(字符串) + * @return 密文(16进制字符串) + * @throws Exception */ - public final static String encrypt(String src) { - try { - return byte2hex(encrypt(src.getBytes(), KEY.getBytes())); - } catch (Exception e) { - - e.printStackTrace(); - + LOGGER.warn(e.getMessage(), e); + } + return null; + } + + /** + * 加密 + * @param src 明文(字符串) + * @param encryptKey 加密用的秘钥 + * @return 密文(16进制字符串) + * @throws Exception + */ + public final static String encrypt(String src, String encryptKey) { + try { + return byte2hex(encrypt(src.getBytes(), encryptKey.getBytes())); + } catch (Exception e) { + LOGGER.warn(e.getMessage(), e); } - return null; - } /** - *   * 解密 - * - *   * - * - *   * @param src - * - *   * 密文(字符串) - * - *   * @return 明文(字符串) - * - *   * @throws Exception - * - *    + * 解密 + * @param src 密文(字符串) + * @return 明文(字符串) + * @throws Exception */ - public final static String decrypt(String src) { try { - return new String(decrypt(hex2byte(src.getBytes()), KEY.getBytes())); - } catch (Exception e) { - - e.printStackTrace(); - + LOGGER.warn(e.getMessage(), e); + } + return null; + } + + /** + * 解密 + * @param src 密文(字符串) + * @param decryptKey 解密用的秘钥 + * @return 明文(字符串) + * @throws Exception + */ + public final static String decrypt(String src, String decryptKey) { + try { + return new String(decrypt(hex2byte(src.getBytes()), decryptKey.getBytes())); + } catch (Exception e) { + LOGGER.warn(e.getMessage(), e); } - return null; - } /** - *   * 加密 - * - *   * - * - *   * @param src - * - *   * 明文(字节) - * - *   * @return 密文(16进制字符串) - * - *   * @throws Exception - * - *    + * 加密 + * @param src + * 明文(字节) + * @return 密文(16进制字符串) + * @throws Exception */ - public static String encryptToString(byte[] src) throws Exception { - return encrypt(new String(src)); - } /** - *   * 解密 - * - *   * - * - *   * @param src - * - *   * 密文(字节) - * - *   * @return 明文(字符串) - * - *   * @throws Exception - * - *    + * 解密 + * @param src 密文(字节) + * @return 明文(字符串) + * @throws Exception */ - public static String decryptToString(byte[] src) throws Exception { - return decrypt(new String(src)); - } public static String byte2hex(byte[] b) { - String hs = ""; - String stmp = ""; - for (int n = 0; n < b.length; n++) { - stmp = (Integer.toHexString(b[n] & 0XFF)); - if (stmp.length() == 1) - hs = hs + "0" + stmp; - else - hs = hs + stmp; - } - return hs.toUpperCase(); - } public static byte[] hex2byte(byte[] b) { - if ((b.length % 2) != 0) - - throw new IllegalArgumentException("长度不是偶数"); - + throw new IllegalArgumentException("The length is not an even number"); byte[] b2 = new byte[b.length / 2]; - for (int n = 0; n < b.length; n += 2) { - String item = new String(b, n, 2); - b2[n / 2] = (byte) Integer.parseInt(item, 16); - } return b2; - } - - /* - * public static void main(String[] args) { try { String src = "cheetah"; - * String crypto = DESCipher.encrypt(src); System.out.println("密文[" + src + - * "]:" + crypto); System.out.println("解密后:" + DESCipher.decrypt(crypto)); } - * catch (Exception e) { e.printStackTrace(); } } - */ } diff --git a/common/src/main/java/com/alibaba/datax/common/util/DataXCaseEnvUtil.java b/common/src/main/java/com/alibaba/datax/common/util/DataXCaseEnvUtil.java new file mode 100644 index 00000000..ca137b94 --- /dev/null +++ b/common/src/main/java/com/alibaba/datax/common/util/DataXCaseEnvUtil.java @@ -0,0 +1,33 @@ +package com.alibaba.datax.common.util; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + + +public class DataXCaseEnvUtil { + + private static final Logger LOGGER = LoggerFactory.getLogger(DataXCaseEnvUtil.class); + + // datax回归测试效率提升 + private static String DATAX_AUTOTEST_RETRY_TIME = System.getenv("DATAX_AUTOTEST_RETRY_TIME"); + private static String DATAX_AUTOTEST_RETRY_INTERVAL = System.getenv("DATAX_AUTOTEST_RETRY_INTERVAL"); + private static String DATAX_AUTOTEST_RETRY_EXPONENTIAL = System.getenv("DATAX_AUTOTEST_RETRY_EXPONENTIAL"); + + public static int getRetryTimes(int retryTimes) { + int actualRetryTimes = DATAX_AUTOTEST_RETRY_TIME != null ? Integer.valueOf(DATAX_AUTOTEST_RETRY_TIME) : retryTimes; + // LOGGER.info("The actualRetryTimes is {}", actualRetryTimes); + return actualRetryTimes; + } + + public static long getRetryInterval(long retryInterval) { + long actualRetryInterval = DATAX_AUTOTEST_RETRY_INTERVAL != null ? Long.valueOf(DATAX_AUTOTEST_RETRY_INTERVAL) : retryInterval; + // LOGGER.info("The actualRetryInterval is {}", actualRetryInterval); + return actualRetryInterval; + } + + public static boolean getRetryExponential(boolean retryExponential) { + boolean actualRetryExponential = DATAX_AUTOTEST_RETRY_EXPONENTIAL != null ? Boolean.valueOf(DATAX_AUTOTEST_RETRY_EXPONENTIAL) : retryExponential; + // LOGGER.info("The actualRetryExponential is {}", actualRetryExponential); + return actualRetryExponential; + } +} diff --git a/common/src/main/java/com/alibaba/datax/common/util/ListUtil.java b/common/src/main/java/com/alibaba/datax/common/util/ListUtil.java index d7a5b764..a381bb90 100755 --- a/common/src/main/java/com/alibaba/datax/common/util/ListUtil.java +++ b/common/src/main/java/com/alibaba/datax/common/util/ListUtil.java @@ -6,6 +6,7 @@ import org.apache.commons.lang3.StringUtils; import java.util.ArrayList; import java.util.Collections; +import java.util.HashSet; import java.util.List; /** @@ -136,4 +137,25 @@ public final class ListUtil { return result; } + + public static Boolean checkIfHasSameValue(List listA, List listB) { + if (null == listA || listA.isEmpty() || null == listB || listB.isEmpty()) { + return false; + } + + for (String oneValue : listA) { + if (listB.contains(oneValue)) { + return true; + } + } + + return false; + } + + public static boolean checkIfAllSameValue(List listA, List listB) { + if (null == listA || listA.isEmpty() || null == listB || listB.isEmpty() || listA.size() != listB.size()) { + return false; + } + return new HashSet<>(listA).containsAll(new HashSet<>(listB)); + } } diff --git a/common/src/main/java/com/alibaba/datax/common/util/LocalStrings.properties b/common/src/main/java/com/alibaba/datax/common/util/LocalStrings.properties new file mode 100644 index 00000000..25661f7a --- /dev/null +++ b/common/src/main/java/com/alibaba/datax/common/util/LocalStrings.properties @@ -0,0 +1,54 @@ +very_like_yixiao=\u4e00{0}\u4e8c{1}\u4e09 + + +configuration.1=\u914d\u7f6e\u4fe1\u606f\u9519\u8bef\uff0c\u60a8\u63d0\u4f9b\u7684\u914d\u7f6e\u6587\u4ef6[{0}]\u4e0d\u5b58\u5728. \u8bf7\u68c0\u67e5\u60a8\u7684\u914d\u7f6e\u6587\u4ef6. +configuration.2=\u914d\u7f6e\u4fe1\u606f\u9519\u8bef. \u60a8\u63d0\u4f9b\u914d\u7f6e\u6587\u4ef6[{0}]\u8bfb\u53d6\u5931\u8d25\uff0c\u9519\u8bef\u539f\u56e0: {1}. \u8bf7\u68c0\u67e5\u60a8\u7684\u914d\u7f6e\u6587\u4ef6\u7684\u6743\u9650\u8bbe\u7f6e. +configuration.3=\u8bf7\u68c0\u67e5\u60a8\u7684\u914d\u7f6e\u6587\u4ef6. \u60a8\u63d0\u4f9b\u7684\u914d\u7f6e\u6587\u4ef6\u8bfb\u53d6\u5931\u8d25\uff0c\u9519\u8bef\u539f\u56e0: {0}. \u8bf7\u68c0\u67e5\u60a8\u7684\u914d\u7f6e\u6587\u4ef6\u7684\u6743\u9650\u8bbe\u7f6e. +configuration.4=\u60a8\u63d0\u4f9b\u914d\u7f6e\u6587\u4ef6\u6709\u8bef\uff0c[{0}]\u662f\u5fc5\u586b\u53c2\u6570\uff0c\u4e0d\u5141\u8bb8\u4e3a\u7a7a\u6216\u8005\u7559\u767d . +configuration.5=\u60a8\u63d0\u4f9b\u914d\u7f6e\u6587\u4ef6\u6709\u8bef\uff0c[{0}]\u662f\u5fc5\u586b\u53c2\u6570\uff0c\u4e0d\u5141\u8bb8\u4e3a\u7a7a\u6216\u8005\u7559\u767d . +configuration.6=\u4efb\u52a1\u8bfb\u53d6\u914d\u7f6e\u6587\u4ef6\u51fa\u9519. \u56e0\u4e3a\u914d\u7f6e\u6587\u4ef6\u8def\u5f84[{0}] \u503c\u975e\u6cd5\uff0c\u671f\u671b\u662f\u5b57\u7b26\u7c7b\u578b: {1}. \u8bf7\u68c0\u67e5\u60a8\u7684\u914d\u7f6e\u5e76\u4f5c\u51fa\u4fee\u6539. +configuration.7=\u60a8\u63d0\u4f9b\u7684\u914d\u7f6e\u4fe1\u606f\u6709\u8bef\uff0c\u56e0\u4e3a\u4ece[{0}]\u83b7\u53d6\u7684\u503c[{1}]\u65e0\u6cd5\u8f6c\u6362\u4e3abool\u7c7b\u578b. \u8bf7\u68c0\u67e5\u6e90\u8868\u7684\u914d\u7f6e\u5e76\u4e14\u505a\u51fa\u76f8\u5e94\u7684\u4fee\u6539. +configuration.8=\u4efb\u52a1\u8bfb\u53d6\u914d\u7f6e\u6587\u4ef6\u51fa\u9519. \u914d\u7f6e\u6587\u4ef6\u8def\u5f84[{0}] \u503c\u975e\u6cd5, \u671f\u671b\u662f\u6574\u6570\u7c7b\u578b: {1}. \u8bf7\u68c0\u67e5\u60a8\u7684\u914d\u7f6e\u5e76\u4f5c\u51fa\u4fee\u6539. +configuration.9=\u4efb\u52a1\u8bfb\u53d6\u914d\u7f6e\u6587\u4ef6\u51fa\u9519. \u914d\u7f6e\u6587\u4ef6\u8def\u5f84[{0}] \u503c\u975e\u6cd5, \u671f\u671b\u662f\u6574\u6570\u7c7b\u578b: {1}. \u8bf7\u68c0\u67e5\u60a8\u7684\u914d\u7f6e\u5e76\u4f5c\u51fa\u4fee\u6539. +configuration.10=\u4efb\u52a1\u8bfb\u53d6\u914d\u7f6e\u6587\u4ef6\u51fa\u9519. \u914d\u7f6e\u6587\u4ef6\u8def\u5f84[{0}] \u503c\u975e\u6cd5, \u671f\u671b\u662f\u6d6e\u70b9\u7c7b\u578b: {1}. \u8bf7\u68c0\u67e5\u60a8\u7684\u914d\u7f6e\u5e76\u4f5c\u51fa\u4fee\u6539. +configuration.11=\u914d\u7f6e\u6587\u4ef6\u5bf9\u5e94Key[{0}]\u5e76\u4e0d\u5b58\u5728\uff0c\u8be5\u60c5\u51b5\u662f\u4ee3\u7801\u7f16\u7a0b\u9519\u8bef. \u8bf7\u8054\u7cfbDataX\u56e2\u961f\u7684\u540c\u5b66. +configuration.12=\u503c[{0}]\u65e0\u6cd5\u9002\u914d\u60a8\u63d0\u4f9b[{1}]\uff0c \u8be5\u5f02\u5e38\u4ee3\u8868\u7cfb\u7edf\u7f16\u7a0b\u9519\u8bef, \u8bf7\u8054\u7cfbDataX\u5f00\u53d1\u56e2\u961f! +configuration.13=Path\u4e0d\u80fd\u4e3anull\uff0c\u8be5\u5f02\u5e38\u4ee3\u8868\u7cfb\u7edf\u7f16\u7a0b\u9519\u8bef, \u8bf7\u8054\u7cfbDataX\u5f00\u53d1\u56e2\u961f ! +configuration.14=\u8def\u5f84[{0}]\u51fa\u73b0\u975e\u6cd5\u503c\u7c7b\u578b[{1}]\uff0c\u8be5\u5f02\u5e38\u4ee3\u8868\u7cfb\u7edf\u7f16\u7a0b\u9519\u8bef, \u8bf7\u8054\u7cfbDataX\u5f00\u53d1\u56e2\u961f! . +configuration.15=\u8be5\u5f02\u5e38\u4ee3\u8868\u7cfb\u7edf\u7f16\u7a0b\u9519\u8bef, \u8bf7\u8054\u7cfbDataX\u5f00\u53d1\u56e2\u961f ! +configuration.16=\u60a8\u63d0\u4f9b\u7684\u914d\u7f6e\u6587\u4ef6\u6709\u8bef. \u8def\u5f84[{0}]\u9700\u8981\u914d\u7f6eJson\u683c\u5f0f\u7684Map\u5bf9\u8c61\uff0c\u4f46\u8be5\u8282\u70b9\u53d1\u73b0\u5b9e\u9645\u7c7b\u578b\u662f[{1}]. \u8bf7\u68c0\u67e5\u60a8\u7684\u914d\u7f6e\u5e76\u4f5c\u51fa\u4fee\u6539. +configuration.17=\u60a8\u63d0\u4f9b\u7684\u914d\u7f6e\u6587\u4ef6\u6709\u8bef. \u8def\u5f84[{0}]\u503c\u4e3anull\uff0cdatax\u65e0\u6cd5\u8bc6\u522b\u8be5\u914d\u7f6e. \u8bf7\u68c0\u67e5\u60a8\u7684\u914d\u7f6e\u5e76\u4f5c\u51fa\u4fee\u6539. +configuration.18=\u60a8\u63d0\u4f9b\u7684\u914d\u7f6e\u6587\u4ef6\u6709\u8bef. \u8def\u5f84[{0}]\u9700\u8981\u914d\u7f6eJson\u683c\u5f0f\u7684Map\u5bf9\u8c61\uff0c\u4f46\u8be5\u8282\u70b9\u53d1\u73b0\u5b9e\u9645\u7c7b\u578b\u662f[{1}]. \u8bf7\u68c0\u67e5\u60a8\u7684\u914d\u7f6e\u5e76\u4f5c\u51fa\u4fee\u6539. +configuration.19=\u7cfb\u7edf\u7f16\u7a0b\u9519\u8bef\uff0c\u5217\u8868\u4e0b\u6807\u5fc5\u987b\u4e3a\u6570\u5b57\u7c7b\u578b\uff0c\u4f46\u8be5\u8282\u70b9\u53d1\u73b0\u5b9e\u9645\u7c7b\u578b\u662f[{0}] \uff0c\u8be5\u5f02\u5e38\u4ee3\u8868\u7cfb\u7edf\u7f16\u7a0b\u9519\u8bef, \u8bf7\u8054\u7cfbDataX\u5f00\u53d1\u56e2\u961f ! +configuration.20=\u7cfb\u7edf\u7f16\u7a0b\u9519\u8bef, \u8be5\u5f02\u5e38\u4ee3\u8868\u7cfb\u7edf\u7f16\u7a0b\u9519\u8bef, \u8bf7\u8054\u7cfbDataX\u5f00\u53d1\u56e2\u961f!. +configuration.21=\u7cfb\u7edf\u7f16\u7a0b\u9519\u8bef, \u8def\u5f84[{0}]\u4e0d\u5408\u6cd5, \u8def\u5f84\u5c42\u6b21\u4e4b\u95f4\u4e0d\u80fd\u51fa\u73b0\u7a7a\u767d\u5b57\u7b26 . +configuration.22=\u914d\u7f6e\u4fe1\u606f\u9519\u8bef. \u56e0\u4e3a\u60a8\u63d0\u4f9b\u7684\u914d\u7f6e\u4fe1\u606f\u4e0d\u662f\u5408\u6cd5\u7684JSON\u683c\u5f0f, JSON\u4e0d\u80fd\u4e3a\u7a7a\u767d. \u8bf7\u6309\u7167\u6807\u51c6json\u683c\u5f0f\u63d0\u4f9b\u914d\u7f6e\u4fe1\u606f. +configuration.23=\u914d\u7f6e\u4fe1\u606f\u9519\u8bef. \u60a8\u63d0\u4f9b\u7684\u914d\u7f6e\u4fe1\u606f\u4e0d\u662f\u5408\u6cd5\u7684JSON\u683c\u5f0f: {0} . \u8bf7\u6309\u7167\u6807\u51c6json\u683c\u5f0f\u63d0\u4f9b\u914d\u7f6e\u4fe1\u606f. + + +listutil.1=\u60a8\u63d0\u4f9b\u7684\u4f5c\u4e1a\u914d\u7f6e\u6709\u8bef\uff0cList\u4e0d\u80fd\u4e3a\u7a7a. +listutil.2=\u60a8\u63d0\u4f9b\u7684\u4f5c\u4e1a\u914d\u7f6e\u6709\u8bef, List\u4e0d\u80fd\u4e3a\u7a7a. +listutil.3=\u60a8\u63d0\u4f9b\u7684\u4f5c\u4e1a\u914d\u7f6e\u4fe1\u606f\u6709\u8bef, String:[{0}] \u4e0d\u5141\u8bb8\u91cd\u590d\u51fa\u73b0\u5728\u5217\u8868\u4e2d: [{1}]. +listutil.4=\u60a8\u63d0\u4f9b\u7684\u4f5c\u4e1a\u914d\u7f6e\u6709\u8bef, List\u4e0d\u80fd\u4e3a\u7a7a. +listutil.5=\u60a8\u63d0\u4f9b\u7684\u4f5c\u4e1a\u914d\u7f6e\u6709\u8bef, List\u4e0d\u80fd\u4e3a\u7a7a. +listutil.6=\u60a8\u63d0\u4f9b\u7684\u4f5c\u4e1a\u914d\u7f6e\u4fe1\u606f\u6709\u8bef, String:[{0}] \u4e0d\u5b58\u5728\u4e8e\u5217\u8868\u4e2d:[{1}]. +listutil.7=\u60a8\u63d0\u4f9b\u7684\u4f5c\u4e1a\u914d\u7f6e\u6709\u8bef, List\u4e0d\u80fd\u4e3a\u7a7a. +listutil.8=\u60a8\u63d0\u4f9b\u7684\u4f5c\u4e1a\u914d\u7f6e\u6709\u8bef, List\u4e0d\u80fd\u4e3a\u7a7a. + + +rangesplitutil.1=\u5207\u5206\u4efd\u6570\u4e0d\u80fd\u5c0f\u4e8e1. \u6b64\u5904:expectSliceNumber=[{0}]. +rangesplitutil.2=\u5bf9 BigInteger \u8fdb\u884c\u5207\u5206\u65f6\uff0c\u5176\u5de6\u53f3\u533a\u95f4\u4e0d\u80fd\u4e3a null. \u6b64\u5904:left=[{0}],right=[{1}]. +rangesplitutil.3=\u53c2\u6570 bigInteger \u4e0d\u80fd\u4e3a\u7a7a. +rangesplitutil.4=\u6839\u636e\u5b57\u7b26\u4e32\u8fdb\u884c\u5207\u5206\u65f6\u4ec5\u652f\u6301 ASCII \u5b57\u7b26\u4e32\uff0c\u800c\u5b57\u7b26\u4e32:[{0}]\u975e ASCII \u5b57\u7b26\u4e32. +rangesplitutil.5=\u53c2\u6570 bigInteger \u4e0d\u80fd\u4e3a\u7a7a. +rangesplitutil.6=\u6839\u636e\u5b57\u7b26\u4e32\u8fdb\u884c\u5207\u5206\u65f6\u4ec5\u652f\u6301 ASCII \u5b57\u7b26\u4e32\uff0c\u800c\u5b57\u7b26\u4e32:[{0}]\u975e ASCII \u5b57\u7b26\u4e32. + + +retryutil.1=\u7cfb\u7edf\u7f16\u7a0b\u9519\u8bef, \u5165\u53c2callable\u4e0d\u80fd\u4e3a\u7a7a ! +retryutil.2=\u7cfb\u7edf\u7f16\u7a0b\u9519\u8bef, \u5165\u53c2retrytime[%d]\u4e0d\u80fd\u5c0f\u4e8e1 ! +retryutil.3=Exception when calling callable, \u5f02\u5e38Msg:{0} +retryutil.4=Exception when calling callable, \u5373\u5c06\u5c1d\u8bd5\u6267\u884c\u7b2c{0}\u6b21\u91cd\u8bd5,\u5171\u8ba1\u91cd\u8bd5{1}\u6b21.\u672c\u6b21\u91cd\u8bd5\u8ba1\u5212\u7b49\u5f85[{2}]ms,\u5b9e\u9645\u7b49\u5f85[{3}]ms, \u5f02\u5e38Msg:[{4}] + + +httpclientutil.1=\u8BF7\u6C42\u5730\u5740\uFF1A{0}, \u8BF7\u6C42\u65B9\u6CD5\uFF1A{1}, STATUS CODE = {2}, Response Entity: {3} +httpclientutil.2=\u8FDC\u7A0B\u63A5\u53E3\u8FD4\u56DE-1,\u5C06\u91CD\u8BD5 diff --git a/common/src/main/java/com/alibaba/datax/common/util/LocalStrings_en_US.properties b/common/src/main/java/com/alibaba/datax/common/util/LocalStrings_en_US.properties new file mode 100644 index 00000000..2074bbb9 --- /dev/null +++ b/common/src/main/java/com/alibaba/datax/common/util/LocalStrings_en_US.properties @@ -0,0 +1,53 @@ +very_like_yixiao=1{0}2{1}3 + + +configuration.1=Configuration information error. The configuration file [{0}] you provided does not exist. Please check your configuration files. +configuration.2=Configuration information error. Failed to read the configuration file [{0}] you provided. Error reason: {1}. Please check the permission settings of your configuration files. +configuration.3=Please check your configuration files. Failed to read the configuration file you provided. Error reason: {0}. Please check the permission settings of your configuration files. +configuration.4=The configuration file you provided contains errors. [{0}] is a required parameter and cannot be empty or blank. +configuration.5=The configuration file you provided contains errors. [{0}] is a required parameter and cannot be empty or blank. +configuration.6=Task reading configuration file error. Invalid configuration file path [{0}] value. The expected value should be of the character type: {1}. Please check your configuration and make corrections. +configuration.7=The configuration information you provided contains errors. The value [{1}] obtained from [{0}] cannot be converted to the Bool type. Please check the source table configuration and make corrections. +configuration.8=Task reading configuration file error. Invalid configuration file path [{0}] value. The expected value should be of the integer type: {1}. Please check your configuration and make corrections. +configuration.9=Task reading configuration file error. Invalid configuration file path [{0}] value. The expected value should be of the integer type: {1}. Please check your configuration and make corrections. +configuration.10=Task reading configuration file error. Invalid configuration file path [{0}] value. The expected value should be of the floating-point type: {1}. Please check your configuration and make corrections. +configuration.11=The Key [{0}] for the configuration file does not exist. This is a code programming error. Please contact the DataX team. +configuration.12=The value [{0}] cannot adapt to the [{1}] you provided. This exception represents a system programming error. Please contact the DataX developer team. +configuration.13=The path cannot be null. This exception represents a system programming error. Please contact the DataX developer team. +configuration.14=The path [{0}] has an invalid value type [{1}]. This exception represents a system programming error. Please contact the DataX developer team. +configuration.15=This exception represents a system programming error. Please contact the DataX developer team. +configuration.16=The configuration file you provided contains errors. The path [{0}] requires you to configure a Map object in JSON format, but the actual type found on the node is [{1}]. Please check your configuration and make corrections. +configuration.17=The configuration file you provided contains errors. The value of the path [{0}] is null and DataX cannot recognize the configuration. Please check your configuration and make corrections. +configuration.18=The configuration file you provided contains errors. The path [{0}] requires you to configure a Map object in JSON format, but the actual type found on the node is [{1}]. Please check your configuration and make corrections. +configuration.19=System programming error. The list subscript must be of the numeric type, but the actual type found on this node is [{0}]. This exception represents a system programming error. Please contact the DataX developer team. +configuration.20=System programming error. This exception represents a system programming error. Please contact the DataX developer team. +configuration.21=System programming error. Invalid path [{0}]. No spaces are allowed between path layers. +configuration.22=Configuration information error. The configuration information you provided is not in a legal JSON format. JSON cannot be blank. Please provide the configuration information in the standard JSON format. +configuration.23=Configuration information error. The configuration information you provided is not in a valid JSON format: {0}. Please provide the configuration information in the standard JSON format. + + +listutil.1=The job configuration you provided contains errors. The list cannot be empty. +listutil.2=The job configuration you provided contains errors. The list cannot be empty. +listutil.3=The job configuration information you provided contains errors. String: [{0}] is not allowed to be repeated in the list: [{1}]. +listutil.4=The job configuration you provided contains errors. The list cannot be empty. +listutil.5=The job configuration you provided contains errors. The list cannot be empty. +listutil.6=The job configuration information you provided contains errors. String: [{0}] does not exist in the list: [{1}]. +listutil.7=The job configuration you provided contains errors. The list cannot be empty. +listutil.8=The job configuration you provided contains errors. The list cannot be empty. + + +rangesplitutil.1=The slice number cannot be less than 1. Here: [expectSliceNumber]=[{0}]. +rangesplitutil.2=The left or right intervals of BigInteger character strings cannot be null when they are sliced. Here: [left]=[{0}], [right]=[{1}]. +rangesplitutil.3=The [bigInteger] parameter cannot be null. +rangesplitutil.4=Only ASCII character strings are supported for character string slicing, but the [{0}] character string is not of the ASCII type. +rangesplitutil.5=The [bigInteger] parameter cannot be null. +rangesplitutil.6=Only ASCII character strings are supported for character string slicing, but the [{0}] character string is not of the ASCII type. + + +retryutil.1=System programming error. The “callable” input parameter cannot be null. +retryutil.2=System programming error. The “retrytime[%d]” input parameter cannot be less than 1. +retryutil.3=Exception when calling callable. Exception Msg: {0} +retryutil.4=Exception when calling callable. Retry Attempt: {0} will start soon. {1} attempts in total. This attempt planned to wait for [{2}]ms, and actually waited for [{3}]ms. Exception Msg: [{4}]. + +httpclientutil.1=Request address: {0}. Request method: {1}. STATUS CODE = {2}, Response Entity: {3} +httpclientutil.2=The remote interface returns -1. We will try again \ No newline at end of file diff --git a/common/src/main/java/com/alibaba/datax/common/util/LocalStrings_ja_JP.properties b/common/src/main/java/com/alibaba/datax/common/util/LocalStrings_ja_JP.properties new file mode 100644 index 00000000..d4409a8d --- /dev/null +++ b/common/src/main/java/com/alibaba/datax/common/util/LocalStrings_ja_JP.properties @@ -0,0 +1,53 @@ +very_like_yixiao=1{0}2{1}3 + + +configuration.1=\u914d\u7f6e\u4fe1\u606f\u9519\u8bef\uff0c\u60a8\u63d0\u4f9b\u7684\u914d\u7f6e\u6587\u4ef6[{0}]\u4e0d\u5b58\u5728. \u8bf7\u68c0\u67e5\u60a8\u7684\u914d\u7f6e\u6587\u4ef6. +configuration.2=\u914d\u7f6e\u4fe1\u606f\u9519\u8bef. \u60a8\u63d0\u4f9b\u914d\u7f6e\u6587\u4ef6[{0}]\u8bfb\u53d6\u5931\u8d25\uff0c\u9519\u8bef\u539f\u56e0: {1}. \u8bf7\u68c0\u67e5\u60a8\u7684\u914d\u7f6e\u6587\u4ef6\u7684\u6743\u9650\u8bbe\u7f6e. +configuration.3=\u8bf7\u68c0\u67e5\u60a8\u7684\u914d\u7f6e\u6587\u4ef6. \u60a8\u63d0\u4f9b\u7684\u914d\u7f6e\u6587\u4ef6\u8bfb\u53d6\u5931\u8d25\uff0c\u9519\u8bef\u539f\u56e0: {0}. \u8bf7\u68c0\u67e5\u60a8\u7684\u914d\u7f6e\u6587\u4ef6\u7684\u6743\u9650\u8bbe\u7f6e. +configuration.4=\u60a8\u63d0\u4f9b\u914d\u7f6e\u6587\u4ef6\u6709\u8bef\uff0c[{0}]\u662f\u5fc5\u586b\u53c2\u6570\uff0c\u4e0d\u5141\u8bb8\u4e3a\u7a7a\u6216\u8005\u7559\u767d . +configuration.5=\u60a8\u63d0\u4f9b\u914d\u7f6e\u6587\u4ef6\u6709\u8bef\uff0c[{0}]\u662f\u5fc5\u586b\u53c2\u6570\uff0c\u4e0d\u5141\u8bb8\u4e3a\u7a7a\u6216\u8005\u7559\u767d . +configuration.6=\u4efb\u52a1\u8bfb\u53d6\u914d\u7f6e\u6587\u4ef6\u51fa\u9519. \u56e0\u4e3a\u914d\u7f6e\u6587\u4ef6\u8def\u5f84[{0}] \u503c\u975e\u6cd5\uff0c\u671f\u671b\u662f\u5b57\u7b26\u7c7b\u578b: {1}. \u8bf7\u68c0\u67e5\u60a8\u7684\u914d\u7f6e\u5e76\u4f5c\u51fa\u4fee\u6539. +configuration.7=\u60a8\u63d0\u4f9b\u7684\u914d\u7f6e\u4fe1\u606f\u6709\u8bef\uff0c\u56e0\u4e3a\u4ece[{0}]\u83b7\u53d6\u7684\u503c[{1}]\u65e0\u6cd5\u8f6c\u6362\u4e3abool\u7c7b\u578b. \u8bf7\u68c0\u67e5\u6e90\u8868\u7684\u914d\u7f6e\u5e76\u4e14\u505a\u51fa\u76f8\u5e94\u7684\u4fee\u6539. +configuration.8=\u4efb\u52a1\u8bfb\u53d6\u914d\u7f6e\u6587\u4ef6\u51fa\u9519. \u914d\u7f6e\u6587\u4ef6\u8def\u5f84[{0}] \u503c\u975e\u6cd5, \u671f\u671b\u662f\u6574\u6570\u7c7b\u578b: {1}. \u8bf7\u68c0\u67e5\u60a8\u7684\u914d\u7f6e\u5e76\u4f5c\u51fa\u4fee\u6539. +configuration.9=\u4efb\u52a1\u8bfb\u53d6\u914d\u7f6e\u6587\u4ef6\u51fa\u9519. \u914d\u7f6e\u6587\u4ef6\u8def\u5f84[{0}] \u503c\u975e\u6cd5, \u671f\u671b\u662f\u6574\u6570\u7c7b\u578b: {1}. \u8bf7\u68c0\u67e5\u60a8\u7684\u914d\u7f6e\u5e76\u4f5c\u51fa\u4fee\u6539. +configuration.10=\u4efb\u52a1\u8bfb\u53d6\u914d\u7f6e\u6587\u4ef6\u51fa\u9519. \u914d\u7f6e\u6587\u4ef6\u8def\u5f84[{0}] \u503c\u975e\u6cd5, \u671f\u671b\u662f\u6d6e\u70b9\u7c7b\u578b: {1}. \u8bf7\u68c0\u67e5\u60a8\u7684\u914d\u7f6e\u5e76\u4f5c\u51fa\u4fee\u6539. +configuration.11=\u914d\u7f6e\u6587\u4ef6\u5bf9\u5e94Key[{0}]\u5e76\u4e0d\u5b58\u5728\uff0c\u8be5\u60c5\u51b5\u662f\u4ee3\u7801\u7f16\u7a0b\u9519\u8bef. \u8bf7\u8054\u7cfbDataX\u56e2\u961f\u7684\u540c\u5b66. +configuration.12=\u503c[{0}]\u65e0\u6cd5\u9002\u914d\u60a8\u63d0\u4f9b[{1}]\uff0c \u8be5\u5f02\u5e38\u4ee3\u8868\u7cfb\u7edf\u7f16\u7a0b\u9519\u8bef, \u8bf7\u8054\u7cfbDataX\u5f00\u53d1\u56e2\u961f! +configuration.13=Path\u4e0d\u80fd\u4e3anull\uff0c\u8be5\u5f02\u5e38\u4ee3\u8868\u7cfb\u7edf\u7f16\u7a0b\u9519\u8bef, \u8bf7\u8054\u7cfbDataX\u5f00\u53d1\u56e2\u961f ! +configuration.14=\u8def\u5f84[{0}]\u51fa\u73b0\u975e\u6cd5\u503c\u7c7b\u578b[{1}]\uff0c\u8be5\u5f02\u5e38\u4ee3\u8868\u7cfb\u7edf\u7f16\u7a0b\u9519\u8bef, \u8bf7\u8054\u7cfbDataX\u5f00\u53d1\u56e2\u961f! . +configuration.15=\u8be5\u5f02\u5e38\u4ee3\u8868\u7cfb\u7edf\u7f16\u7a0b\u9519\u8bef, \u8bf7\u8054\u7cfbDataX\u5f00\u53d1\u56e2\u961f ! +configuration.16=\u60a8\u63d0\u4f9b\u7684\u914d\u7f6e\u6587\u4ef6\u6709\u8bef. \u8def\u5f84[{0}]\u9700\u8981\u914d\u7f6eJson\u683c\u5f0f\u7684Map\u5bf9\u8c61\uff0c\u4f46\u8be5\u8282\u70b9\u53d1\u73b0\u5b9e\u9645\u7c7b\u578b\u662f[{1}]. \u8bf7\u68c0\u67e5\u60a8\u7684\u914d\u7f6e\u5e76\u4f5c\u51fa\u4fee\u6539. +configuration.17=\u60a8\u63d0\u4f9b\u7684\u914d\u7f6e\u6587\u4ef6\u6709\u8bef. \u8def\u5f84[{0}]\u503c\u4e3anull\uff0cdatax\u65e0\u6cd5\u8bc6\u522b\u8be5\u914d\u7f6e. \u8bf7\u68c0\u67e5\u60a8\u7684\u914d\u7f6e\u5e76\u4f5c\u51fa\u4fee\u6539. +configuration.18=\u60a8\u63d0\u4f9b\u7684\u914d\u7f6e\u6587\u4ef6\u6709\u8bef. \u8def\u5f84[{0}]\u9700\u8981\u914d\u7f6eJson\u683c\u5f0f\u7684Map\u5bf9\u8c61\uff0c\u4f46\u8be5\u8282\u70b9\u53d1\u73b0\u5b9e\u9645\u7c7b\u578b\u662f[{1}]. \u8bf7\u68c0\u67e5\u60a8\u7684\u914d\u7f6e\u5e76\u4f5c\u51fa\u4fee\u6539. +configuration.19=\u7cfb\u7edf\u7f16\u7a0b\u9519\u8bef\uff0c\u5217\u8868\u4e0b\u6807\u5fc5\u987b\u4e3a\u6570\u5b57\u7c7b\u578b\uff0c\u4f46\u8be5\u8282\u70b9\u53d1\u73b0\u5b9e\u9645\u7c7b\u578b\u662f[{0}] \uff0c\u8be5\u5f02\u5e38\u4ee3\u8868\u7cfb\u7edf\u7f16\u7a0b\u9519\u8bef, \u8bf7\u8054\u7cfbDataX\u5f00\u53d1\u56e2\u961f ! +configuration.20=\u7cfb\u7edf\u7f16\u7a0b\u9519\u8bef, \u8be5\u5f02\u5e38\u4ee3\u8868\u7cfb\u7edf\u7f16\u7a0b\u9519\u8bef, \u8bf7\u8054\u7cfbDataX\u5f00\u53d1\u56e2\u961f!. +configuration.21=\u7cfb\u7edf\u7f16\u7a0b\u9519\u8bef, \u8def\u5f84[{0}]\u4e0d\u5408\u6cd5, \u8def\u5f84\u5c42\u6b21\u4e4b\u95f4\u4e0d\u80fd\u51fa\u73b0\u7a7a\u767d\u5b57\u7b26 . +configuration.22=\u914d\u7f6e\u4fe1\u606f\u9519\u8bef. \u56e0\u4e3a\u60a8\u63d0\u4f9b\u7684\u914d\u7f6e\u4fe1\u606f\u4e0d\u662f\u5408\u6cd5\u7684JSON\u683c\u5f0f, JSON\u4e0d\u80fd\u4e3a\u7a7a\u767d. \u8bf7\u6309\u7167\u6807\u51c6json\u683c\u5f0f\u63d0\u4f9b\u914d\u7f6e\u4fe1\u606f. +configuration.23=\u914d\u7f6e\u4fe1\u606f\u9519\u8bef. \u60a8\u63d0\u4f9b\u7684\u914d\u7f6e\u4fe1\u606f\u4e0d\u662f\u5408\u6cd5\u7684JSON\u683c\u5f0f: {0} . \u8bf7\u6309\u7167\u6807\u51c6json\u683c\u5f0f\u63d0\u4f9b\u914d\u7f6e\u4fe1\u606f. + + +listutil.1=\u60a8\u63d0\u4f9b\u7684\u4f5c\u4e1a\u914d\u7f6e\u6709\u8bef\uff0cList\u4e0d\u80fd\u4e3a\u7a7a. +listutil.2=\u60a8\u63d0\u4f9b\u7684\u4f5c\u4e1a\u914d\u7f6e\u6709\u8bef, List\u4e0d\u80fd\u4e3a\u7a7a. +listutil.3=\u60a8\u63d0\u4f9b\u7684\u4f5c\u4e1a\u914d\u7f6e\u4fe1\u606f\u6709\u8bef, String:[{0}] \u4e0d\u5141\u8bb8\u91cd\u590d\u51fa\u73b0\u5728\u5217\u8868\u4e2d: [{1}]. +listutil.4=\u60a8\u63d0\u4f9b\u7684\u4f5c\u4e1a\u914d\u7f6e\u6709\u8bef, List\u4e0d\u80fd\u4e3a\u7a7a. +listutil.5=\u60a8\u63d0\u4f9b\u7684\u4f5c\u4e1a\u914d\u7f6e\u6709\u8bef, List\u4e0d\u80fd\u4e3a\u7a7a. +listutil.6=\u60a8\u63d0\u4f9b\u7684\u4f5c\u4e1a\u914d\u7f6e\u4fe1\u606f\u6709\u8bef, String:[{0}] \u4e0d\u5b58\u5728\u4e8e\u5217\u8868\u4e2d:[{1}]. +listutil.7=\u60a8\u63d0\u4f9b\u7684\u4f5c\u4e1a\u914d\u7f6e\u6709\u8bef, List\u4e0d\u80fd\u4e3a\u7a7a. +listutil.8=\u60a8\u63d0\u4f9b\u7684\u4f5c\u4e1a\u914d\u7f6e\u6709\u8bef, List\u4e0d\u80fd\u4e3a\u7a7a. + + +rangesplitutil.1=\u5207\u5206\u4efd\u6570\u4e0d\u80fd\u5c0f\u4e8e1. \u6b64\u5904:expectSliceNumber=[{0}]. +rangesplitutil.2=\u5bf9 BigInteger \u8fdb\u884c\u5207\u5206\u65f6\uff0c\u5176\u5de6\u53f3\u533a\u95f4\u4e0d\u80fd\u4e3a null. \u6b64\u5904:left=[{0}],right=[{1}]. +rangesplitutil.3=\u53c2\u6570 bigInteger \u4e0d\u80fd\u4e3a\u7a7a. +rangesplitutil.4=\u6839\u636e\u5b57\u7b26\u4e32\u8fdb\u884c\u5207\u5206\u65f6\u4ec5\u652f\u6301 ASCII \u5b57\u7b26\u4e32\uff0c\u800c\u5b57\u7b26\u4e32:[{0}]\u975e ASCII \u5b57\u7b26\u4e32. +rangesplitutil.5=\u53c2\u6570 bigInteger \u4e0d\u80fd\u4e3a\u7a7a. +rangesplitutil.6=\u6839\u636e\u5b57\u7b26\u4e32\u8fdb\u884c\u5207\u5206\u65f6\u4ec5\u652f\u6301 ASCII \u5b57\u7b26\u4e32\uff0c\u800c\u5b57\u7b26\u4e32:[{0}]\u975e ASCII \u5b57\u7b26\u4e32. + + +retryutil.1=\u7cfb\u7edf\u7f16\u7a0b\u9519\u8bef, \u5165\u53c2callable\u4e0d\u80fd\u4e3a\u7a7a ! +retryutil.2=\u7cfb\u7edf\u7f16\u7a0b\u9519\u8bef, \u5165\u53c2retrytime[%d]\u4e0d\u80fd\u5c0f\u4e8e1 ! +retryutil.3=Exception when calling callable, \u5f02\u5e38Msg:{0} +retryutil.4=Exception when calling callable, \u5373\u5c06\u5c1d\u8bd5\u6267\u884c\u7b2c{0}\u6b21\u91cd\u8bd5,\u5171\u8ba1\u91cd\u8bd5{1}\u6b21.\u672c\u6b21\u91cd\u8bd5\u8ba1\u5212\u7b49\u5f85[{2}]ms,\u5b9e\u9645\u7b49\u5f85[{3}]ms, \u5f02\u5e38Msg:[{4}] + +httpclientutil.1=\u8BF7\u6C42\u5730\u5740\uFF1A{0}, \u8BF7\u6C42\u65B9\u6CD5\uFF1A{1},STATUS CODE = {2}, Response Entity: {3} +httpclientutil.2=\u8FDC\u7A0B\u63A5\u53E3\u8FD4\u56DE-1,\u5C06\u91CD\u8BD5 \ No newline at end of file diff --git a/common/src/main/java/com/alibaba/datax/common/util/LocalStrings_zh_CN.properties b/common/src/main/java/com/alibaba/datax/common/util/LocalStrings_zh_CN.properties new file mode 100644 index 00000000..533dcd52 --- /dev/null +++ b/common/src/main/java/com/alibaba/datax/common/util/LocalStrings_zh_CN.properties @@ -0,0 +1,54 @@ +very_like_yixiao=\u4e00{0}\u4e8c{1}\u4e09 + + +configuration.1=\u914d\u7f6e\u4fe1\u606f\u9519\u8bef\uff0c\u60a8\u63d0\u4f9b\u7684\u914d\u7f6e\u6587\u4ef6[{0}]\u4e0d\u5b58\u5728. \u8bf7\u68c0\u67e5\u60a8\u7684\u914d\u7f6e\u6587\u4ef6. +configuration.2=\u914d\u7f6e\u4fe1\u606f\u9519\u8bef. \u60a8\u63d0\u4f9b\u914d\u7f6e\u6587\u4ef6[{0}]\u8bfb\u53d6\u5931\u8d25\uff0c\u9519\u8bef\u539f\u56e0: {1}. \u8bf7\u68c0\u67e5\u60a8\u7684\u914d\u7f6e\u6587\u4ef6\u7684\u6743\u9650\u8bbe\u7f6e. +configuration.3=\u8bf7\u68c0\u67e5\u60a8\u7684\u914d\u7f6e\u6587\u4ef6. \u60a8\u63d0\u4f9b\u7684\u914d\u7f6e\u6587\u4ef6\u8bfb\u53d6\u5931\u8d25\uff0c\u9519\u8bef\u539f\u56e0: {0}. \u8bf7\u68c0\u67e5\u60a8\u7684\u914d\u7f6e\u6587\u4ef6\u7684\u6743\u9650\u8bbe\u7f6e. +configuration.4=\u60a8\u63d0\u4f9b\u914d\u7f6e\u6587\u4ef6\u6709\u8bef\uff0c[{0}]\u662f\u5fc5\u586b\u53c2\u6570\uff0c\u4e0d\u5141\u8bb8\u4e3a\u7a7a\u6216\u8005\u7559\u767d . +configuration.5=\u60a8\u63d0\u4f9b\u914d\u7f6e\u6587\u4ef6\u6709\u8bef\uff0c[{0}]\u662f\u5fc5\u586b\u53c2\u6570\uff0c\u4e0d\u5141\u8bb8\u4e3a\u7a7a\u6216\u8005\u7559\u767d . +configuration.6=\u4efb\u52a1\u8bfb\u53d6\u914d\u7f6e\u6587\u4ef6\u51fa\u9519. \u56e0\u4e3a\u914d\u7f6e\u6587\u4ef6\u8def\u5f84[{0}] \u503c\u975e\u6cd5\uff0c\u671f\u671b\u662f\u5b57\u7b26\u7c7b\u578b: {1}. \u8bf7\u68c0\u67e5\u60a8\u7684\u914d\u7f6e\u5e76\u4f5c\u51fa\u4fee\u6539. +configuration.7=\u60a8\u63d0\u4f9b\u7684\u914d\u7f6e\u4fe1\u606f\u6709\u8bef\uff0c\u56e0\u4e3a\u4ece[{0}]\u83b7\u53d6\u7684\u503c[{1}]\u65e0\u6cd5\u8f6c\u6362\u4e3abool\u7c7b\u578b. \u8bf7\u68c0\u67e5\u6e90\u8868\u7684\u914d\u7f6e\u5e76\u4e14\u505a\u51fa\u76f8\u5e94\u7684\u4fee\u6539. +configuration.8=\u4efb\u52a1\u8bfb\u53d6\u914d\u7f6e\u6587\u4ef6\u51fa\u9519. \u914d\u7f6e\u6587\u4ef6\u8def\u5f84[{0}] \u503c\u975e\u6cd5, \u671f\u671b\u662f\u6574\u6570\u7c7b\u578b: {1}. \u8bf7\u68c0\u67e5\u60a8\u7684\u914d\u7f6e\u5e76\u4f5c\u51fa\u4fee\u6539. +configuration.9=\u4efb\u52a1\u8bfb\u53d6\u914d\u7f6e\u6587\u4ef6\u51fa\u9519. \u914d\u7f6e\u6587\u4ef6\u8def\u5f84[{0}] \u503c\u975e\u6cd5, \u671f\u671b\u662f\u6574\u6570\u7c7b\u578b: {1}. \u8bf7\u68c0\u67e5\u60a8\u7684\u914d\u7f6e\u5e76\u4f5c\u51fa\u4fee\u6539. +configuration.10=\u4efb\u52a1\u8bfb\u53d6\u914d\u7f6e\u6587\u4ef6\u51fa\u9519. \u914d\u7f6e\u6587\u4ef6\u8def\u5f84[{0}] \u503c\u975e\u6cd5, \u671f\u671b\u662f\u6d6e\u70b9\u7c7b\u578b: {1}. \u8bf7\u68c0\u67e5\u60a8\u7684\u914d\u7f6e\u5e76\u4f5c\u51fa\u4fee\u6539. +configuration.11=\u914d\u7f6e\u6587\u4ef6\u5bf9\u5e94Key[{0}]\u5e76\u4e0d\u5b58\u5728\uff0c\u8be5\u60c5\u51b5\u662f\u4ee3\u7801\u7f16\u7a0b\u9519\u8bef. \u8bf7\u8054\u7cfbDataX\u56e2\u961f\u7684\u540c\u5b66. +configuration.12=\u503c[{0}]\u65e0\u6cd5\u9002\u914d\u60a8\u63d0\u4f9b[{1}]\uff0c \u8be5\u5f02\u5e38\u4ee3\u8868\u7cfb\u7edf\u7f16\u7a0b\u9519\u8bef, \u8bf7\u8054\u7cfbDataX\u5f00\u53d1\u56e2\u961f! +configuration.13=Path\u4e0d\u80fd\u4e3anull\uff0c\u8be5\u5f02\u5e38\u4ee3\u8868\u7cfb\u7edf\u7f16\u7a0b\u9519\u8bef, \u8bf7\u8054\u7cfbDataX\u5f00\u53d1\u56e2\u961f ! +configuration.14=\u8def\u5f84[{0}]\u51fa\u73b0\u975e\u6cd5\u503c\u7c7b\u578b[{1}]\uff0c\u8be5\u5f02\u5e38\u4ee3\u8868\u7cfb\u7edf\u7f16\u7a0b\u9519\u8bef, \u8bf7\u8054\u7cfbDataX\u5f00\u53d1\u56e2\u961f! . +configuration.15=\u8be5\u5f02\u5e38\u4ee3\u8868\u7cfb\u7edf\u7f16\u7a0b\u9519\u8bef, \u8bf7\u8054\u7cfbDataX\u5f00\u53d1\u56e2\u961f ! +configuration.16=\u60a8\u63d0\u4f9b\u7684\u914d\u7f6e\u6587\u4ef6\u6709\u8bef. \u8def\u5f84[{0}]\u9700\u8981\u914d\u7f6eJson\u683c\u5f0f\u7684Map\u5bf9\u8c61\uff0c\u4f46\u8be5\u8282\u70b9\u53d1\u73b0\u5b9e\u9645\u7c7b\u578b\u662f[{1}]. \u8bf7\u68c0\u67e5\u60a8\u7684\u914d\u7f6e\u5e76\u4f5c\u51fa\u4fee\u6539. +configuration.17=\u60a8\u63d0\u4f9b\u7684\u914d\u7f6e\u6587\u4ef6\u6709\u8bef. \u8def\u5f84[{0}]\u503c\u4e3anull\uff0cdatax\u65e0\u6cd5\u8bc6\u522b\u8be5\u914d\u7f6e. \u8bf7\u68c0\u67e5\u60a8\u7684\u914d\u7f6e\u5e76\u4f5c\u51fa\u4fee\u6539. +configuration.18=\u60a8\u63d0\u4f9b\u7684\u914d\u7f6e\u6587\u4ef6\u6709\u8bef. \u8def\u5f84[{0}]\u9700\u8981\u914d\u7f6eJson\u683c\u5f0f\u7684Map\u5bf9\u8c61\uff0c\u4f46\u8be5\u8282\u70b9\u53d1\u73b0\u5b9e\u9645\u7c7b\u578b\u662f[{1}]. \u8bf7\u68c0\u67e5\u60a8\u7684\u914d\u7f6e\u5e76\u4f5c\u51fa\u4fee\u6539. +configuration.19=\u7cfb\u7edf\u7f16\u7a0b\u9519\u8bef\uff0c\u5217\u8868\u4e0b\u6807\u5fc5\u987b\u4e3a\u6570\u5b57\u7c7b\u578b\uff0c\u4f46\u8be5\u8282\u70b9\u53d1\u73b0\u5b9e\u9645\u7c7b\u578b\u662f[{0}] \uff0c\u8be5\u5f02\u5e38\u4ee3\u8868\u7cfb\u7edf\u7f16\u7a0b\u9519\u8bef, \u8bf7\u8054\u7cfbDataX\u5f00\u53d1\u56e2\u961f ! +configuration.20=\u7cfb\u7edf\u7f16\u7a0b\u9519\u8bef, \u8be5\u5f02\u5e38\u4ee3\u8868\u7cfb\u7edf\u7f16\u7a0b\u9519\u8bef, \u8bf7\u8054\u7cfbDataX\u5f00\u53d1\u56e2\u961f!. +configuration.21=\u7cfb\u7edf\u7f16\u7a0b\u9519\u8bef, \u8def\u5f84[{0}]\u4e0d\u5408\u6cd5, \u8def\u5f84\u5c42\u6b21\u4e4b\u95f4\u4e0d\u80fd\u51fa\u73b0\u7a7a\u767d\u5b57\u7b26 . +configuration.22=\u914d\u7f6e\u4fe1\u606f\u9519\u8bef. \u56e0\u4e3a\u60a8\u63d0\u4f9b\u7684\u914d\u7f6e\u4fe1\u606f\u4e0d\u662f\u5408\u6cd5\u7684JSON\u683c\u5f0f, JSON\u4e0d\u80fd\u4e3a\u7a7a\u767d. \u8bf7\u6309\u7167\u6807\u51c6json\u683c\u5f0f\u63d0\u4f9b\u914d\u7f6e\u4fe1\u606f. +configuration.23=\u914d\u7f6e\u4fe1\u606f\u9519\u8bef. \u60a8\u63d0\u4f9b\u7684\u914d\u7f6e\u4fe1\u606f\u4e0d\u662f\u5408\u6cd5\u7684JSON\u683c\u5f0f: {0} . \u8bf7\u6309\u7167\u6807\u51c6json\u683c\u5f0f\u63d0\u4f9b\u914d\u7f6e\u4fe1\u606f. + + +listutil.1=\u60a8\u63d0\u4f9b\u7684\u4f5c\u4e1a\u914d\u7f6e\u6709\u8bef\uff0cList\u4e0d\u80fd\u4e3a\u7a7a. +listutil.2=\u60a8\u63d0\u4f9b\u7684\u4f5c\u4e1a\u914d\u7f6e\u6709\u8bef, List\u4e0d\u80fd\u4e3a\u7a7a. +listutil.3=\u60a8\u63d0\u4f9b\u7684\u4f5c\u4e1a\u914d\u7f6e\u4fe1\u606f\u6709\u8bef, String:[{0}] \u4e0d\u5141\u8bb8\u91cd\u590d\u51fa\u73b0\u5728\u5217\u8868\u4e2d: [{1}]. +listutil.4=\u60a8\u63d0\u4f9b\u7684\u4f5c\u4e1a\u914d\u7f6e\u6709\u8bef, List\u4e0d\u80fd\u4e3a\u7a7a. +listutil.5=\u60a8\u63d0\u4f9b\u7684\u4f5c\u4e1a\u914d\u7f6e\u6709\u8bef, List\u4e0d\u80fd\u4e3a\u7a7a. +listutil.6=\u60a8\u63d0\u4f9b\u7684\u4f5c\u4e1a\u914d\u7f6e\u4fe1\u606f\u6709\u8bef, String:[{0}] \u4e0d\u5b58\u5728\u4e8e\u5217\u8868\u4e2d:[{1}]. +listutil.7=\u60a8\u63d0\u4f9b\u7684\u4f5c\u4e1a\u914d\u7f6e\u6709\u8bef, List\u4e0d\u80fd\u4e3a\u7a7a. +listutil.8=\u60a8\u63d0\u4f9b\u7684\u4f5c\u4e1a\u914d\u7f6e\u6709\u8bef, List\u4e0d\u80fd\u4e3a\u7a7a. + + +rangesplitutil.1=\u5207\u5206\u4efd\u6570\u4e0d\u80fd\u5c0f\u4e8e1. \u6b64\u5904:expectSliceNumber=[{0}]. +rangesplitutil.2=\u5bf9 BigInteger \u8fdb\u884c\u5207\u5206\u65f6\uff0c\u5176\u5de6\u53f3\u533a\u95f4\u4e0d\u80fd\u4e3a null. \u6b64\u5904:left=[{0}],right=[{1}]. +rangesplitutil.3=\u53c2\u6570 bigInteger \u4e0d\u80fd\u4e3a\u7a7a. +rangesplitutil.4=\u6839\u636e\u5b57\u7b26\u4e32\u8fdb\u884c\u5207\u5206\u65f6\u4ec5\u652f\u6301 ASCII \u5b57\u7b26\u4e32\uff0c\u800c\u5b57\u7b26\u4e32:[{0}]\u975e ASCII \u5b57\u7b26\u4e32. +rangesplitutil.5=\u53c2\u6570 bigInteger \u4e0d\u80fd\u4e3a\u7a7a. +rangesplitutil.6=\u6839\u636e\u5b57\u7b26\u4e32\u8fdb\u884c\u5207\u5206\u65f6\u4ec5\u652f\u6301 ASCII \u5b57\u7b26\u4e32\uff0c\u800c\u5b57\u7b26\u4e32:[{0}]\u975e ASCII \u5b57\u7b26\u4e32. + + +retryutil.1=\u7cfb\u7edf\u7f16\u7a0b\u9519\u8bef, \u5165\u53c2callable\u4e0d\u80fd\u4e3a\u7a7a ! +retryutil.2=\u7cfb\u7edf\u7f16\u7a0b\u9519\u8bef, \u5165\u53c2retrytime[%d]\u4e0d\u80fd\u5c0f\u4e8e1 ! +retryutil.3=Exception when calling callable, \u5f02\u5e38Msg:{0} +retryutil.4=Exception when calling callable, \u5373\u5c06\u5c1d\u8bd5\u6267\u884c\u7b2c{0}\u6b21\u91cd\u8bd5,\u5171\u8ba1\u91cd\u8bd5{1}\u6b21.\u672c\u6b21\u91cd\u8bd5\u8ba1\u5212\u7b49\u5f85[{2}]ms,\u5b9e\u9645\u7b49\u5f85[{3}]ms, \u5f02\u5e38Msg:[{4}] + + +httpclientutil.1=\u8BF7\u6C42\u5730\u5740\uFF1A{0}, \u8BF7\u6C42\u65B9\u6CD5\uFF1A{1},STATUS CODE = {2}, Response Entity: {3} +httpclientutil.2=\u8FDC\u7A0B\u63A5\u53E3\u8FD4\u56DE-1,\u5C06\u91CD\u8BD5 \ No newline at end of file diff --git a/common/src/main/java/com/alibaba/datax/common/util/LocalStrings_zh_HK.properties b/common/src/main/java/com/alibaba/datax/common/util/LocalStrings_zh_HK.properties new file mode 100644 index 00000000..ab26ac52 --- /dev/null +++ b/common/src/main/java/com/alibaba/datax/common/util/LocalStrings_zh_HK.properties @@ -0,0 +1,104 @@ +very_like_yixiao=\u4e00{0}\u4e8c{1}\u4e09 + + +configuration.1=\u914d\u7f6e\u4fe1\u606f\u9519\u8bef\uff0c\u60a8\u63d0\u4f9b\u7684\u914d\u7f6e\u6587\u4ef6[{0}]\u4e0d\u5b58\u5728. \u8bf7\u68c0\u67e5\u60a8\u7684\u914d\u7f6e\u6587\u4ef6. +configuration.2=\u914d\u7f6e\u4fe1\u606f\u9519\u8bef. \u60a8\u63d0\u4f9b\u914d\u7f6e\u6587\u4ef6[{0}]\u8bfb\u53d6\u5931\u8d25\uff0c\u9519\u8bef\u539f\u56e0: {1}. \u8bf7\u68c0\u67e5\u60a8\u7684\u914d\u7f6e\u6587\u4ef6\u7684\u6743\u9650\u8bbe\u7f6e. +configuration.3=\u8bf7\u68c0\u67e5\u60a8\u7684\u914d\u7f6e\u6587\u4ef6. \u60a8\u63d0\u4f9b\u7684\u914d\u7f6e\u6587\u4ef6\u8bfb\u53d6\u5931\u8d25\uff0c\u9519\u8bef\u539f\u56e0: {0}. \u8bf7\u68c0\u67e5\u60a8\u7684\u914d\u7f6e\u6587\u4ef6\u7684\u6743\u9650\u8bbe\u7f6e. +configuration.4=\u60a8\u63d0\u4f9b\u914d\u7f6e\u6587\u4ef6\u6709\u8bef\uff0c[{0}]\u662f\u5fc5\u586b\u53c2\u6570\uff0c\u4e0d\u5141\u8bb8\u4e3a\u7a7a\u6216\u8005\u7559\u767d . +configuration.5=\u60a8\u63d0\u4f9b\u914d\u7f6e\u6587\u4ef6\u6709\u8bef\uff0c[{0}]\u662f\u5fc5\u586b\u53c2\u6570\uff0c\u4e0d\u5141\u8bb8\u4e3a\u7a7a\u6216\u8005\u7559\u767d . +configuration.6=\u4efb\u52a1\u8bfb\u53d6\u914d\u7f6e\u6587\u4ef6\u51fa\u9519. \u56e0\u4e3a\u914d\u7f6e\u6587\u4ef6\u8def\u5f84[{0}] \u503c\u975e\u6cd5\uff0c\u671f\u671b\u662f\u5b57\u7b26\u7c7b\u578b: {1}. \u8bf7\u68c0\u67e5\u60a8\u7684\u914d\u7f6e\u5e76\u4f5c\u51fa\u4fee\u6539. +configuration.7=\u60a8\u63d0\u4f9b\u7684\u914d\u7f6e\u4fe1\u606f\u6709\u8bef\uff0c\u56e0\u4e3a\u4ece[{0}]\u83b7\u53d6\u7684\u503c[{1}]\u65e0\u6cd5\u8f6c\u6362\u4e3abool\u7c7b\u578b. \u8bf7\u68c0\u67e5\u6e90\u8868\u7684\u914d\u7f6e\u5e76\u4e14\u505a\u51fa\u76f8\u5e94\u7684\u4fee\u6539. +configuration.8=\u4efb\u52a1\u8bfb\u53d6\u914d\u7f6e\u6587\u4ef6\u51fa\u9519. \u914d\u7f6e\u6587\u4ef6\u8def\u5f84[{0}] \u503c\u975e\u6cd5, \u671f\u671b\u662f\u6574\u6570\u7c7b\u578b: {1}. \u8bf7\u68c0\u67e5\u60a8\u7684\u914d\u7f6e\u5e76\u4f5c\u51fa\u4fee\u6539. +configuration.9=\u4efb\u52a1\u8bfb\u53d6\u914d\u7f6e\u6587\u4ef6\u51fa\u9519. \u914d\u7f6e\u6587\u4ef6\u8def\u5f84[{0}] \u503c\u975e\u6cd5, \u671f\u671b\u662f\u6574\u6570\u7c7b\u578b: {1}. \u8bf7\u68c0\u67e5\u60a8\u7684\u914d\u7f6e\u5e76\u4f5c\u51fa\u4fee\u6539. +configuration.10=\u4efb\u52a1\u8bfb\u53d6\u914d\u7f6e\u6587\u4ef6\u51fa\u9519. \u914d\u7f6e\u6587\u4ef6\u8def\u5f84[{0}] \u503c\u975e\u6cd5, \u671f\u671b\u662f\u6d6e\u70b9\u7c7b\u578b: {1}. \u8bf7\u68c0\u67e5\u60a8\u7684\u914d\u7f6e\u5e76\u4f5c\u51fa\u4fee\u6539. +configuration.11=\u914d\u7f6e\u6587\u4ef6\u5bf9\u5e94Key[{0}]\u5e76\u4e0d\u5b58\u5728\uff0c\u8be5\u60c5\u51b5\u662f\u4ee3\u7801\u7f16\u7a0b\u9519\u8bef. \u8bf7\u8054\u7cfbDataX\u56e2\u961f\u7684\u540c\u5b66. +configuration.12=\u503c[{0}]\u65e0\u6cd5\u9002\u914d\u60a8\u63d0\u4f9b[{1}]\uff0c \u8be5\u5f02\u5e38\u4ee3\u8868\u7cfb\u7edf\u7f16\u7a0b\u9519\u8bef, \u8bf7\u8054\u7cfbDataX\u5f00\u53d1\u56e2\u961f! +configuration.13=Path\u4e0d\u80fd\u4e3anull\uff0c\u8be5\u5f02\u5e38\u4ee3\u8868\u7cfb\u7edf\u7f16\u7a0b\u9519\u8bef, \u8bf7\u8054\u7cfbDataX\u5f00\u53d1\u56e2\u961f ! +configuration.14=\u8def\u5f84[{0}]\u51fa\u73b0\u975e\u6cd5\u503c\u7c7b\u578b[{1}]\uff0c\u8be5\u5f02\u5e38\u4ee3\u8868\u7cfb\u7edf\u7f16\u7a0b\u9519\u8bef, \u8bf7\u8054\u7cfbDataX\u5f00\u53d1\u56e2\u961f! . +configuration.15=\u8be5\u5f02\u5e38\u4ee3\u8868\u7cfb\u7edf\u7f16\u7a0b\u9519\u8bef, \u8bf7\u8054\u7cfbDataX\u5f00\u53d1\u56e2\u961f ! +configuration.16=\u60a8\u63d0\u4f9b\u7684\u914d\u7f6e\u6587\u4ef6\u6709\u8bef. \u8def\u5f84[{0}]\u9700\u8981\u914d\u7f6eJson\u683c\u5f0f\u7684Map\u5bf9\u8c61\uff0c\u4f46\u8be5\u8282\u70b9\u53d1\u73b0\u5b9e\u9645\u7c7b\u578b\u662f[{1}]. \u8bf7\u68c0\u67e5\u60a8\u7684\u914d\u7f6e\u5e76\u4f5c\u51fa\u4fee\u6539. +configuration.17=\u60a8\u63d0\u4f9b\u7684\u914d\u7f6e\u6587\u4ef6\u6709\u8bef. \u8def\u5f84[{0}]\u503c\u4e3anull\uff0cdatax\u65e0\u6cd5\u8bc6\u522b\u8be5\u914d\u7f6e. \u8bf7\u68c0\u67e5\u60a8\u7684\u914d\u7f6e\u5e76\u4f5c\u51fa\u4fee\u6539. +configuration.18=\u60a8\u63d0\u4f9b\u7684\u914d\u7f6e\u6587\u4ef6\u6709\u8bef. \u8def\u5f84[{0}]\u9700\u8981\u914d\u7f6eJson\u683c\u5f0f\u7684Map\u5bf9\u8c61\uff0c\u4f46\u8be5\u8282\u70b9\u53d1\u73b0\u5b9e\u9645\u7c7b\u578b\u662f[{1}]. \u8bf7\u68c0\u67e5\u60a8\u7684\u914d\u7f6e\u5e76\u4f5c\u51fa\u4fee\u6539. +configuration.19=\u7cfb\u7edf\u7f16\u7a0b\u9519\u8bef\uff0c\u5217\u8868\u4e0b\u6807\u5fc5\u987b\u4e3a\u6570\u5b57\u7c7b\u578b\uff0c\u4f46\u8be5\u8282\u70b9\u53d1\u73b0\u5b9e\u9645\u7c7b\u578b\u662f[{0}] \uff0c\u8be5\u5f02\u5e38\u4ee3\u8868\u7cfb\u7edf\u7f16\u7a0b\u9519\u8bef, \u8bf7\u8054\u7cfbDataX\u5f00\u53d1\u56e2\u961f ! +configuration.20=\u7cfb\u7edf\u7f16\u7a0b\u9519\u8bef, \u8be5\u5f02\u5e38\u4ee3\u8868\u7cfb\u7edf\u7f16\u7a0b\u9519\u8bef, \u8bf7\u8054\u7cfbDataX\u5f00\u53d1\u56e2\u961f!. +configuration.21=\u7cfb\u7edf\u7f16\u7a0b\u9519\u8bef, \u8def\u5f84[{0}]\u4e0d\u5408\u6cd5, \u8def\u5f84\u5c42\u6b21\u4e4b\u95f4\u4e0d\u80fd\u51fa\u73b0\u7a7a\u767d\u5b57\u7b26 . +configuration.22=\u914d\u7f6e\u4fe1\u606f\u9519\u8bef. \u56e0\u4e3a\u60a8\u63d0\u4f9b\u7684\u914d\u7f6e\u4fe1\u606f\u4e0d\u662f\u5408\u6cd5\u7684JSON\u683c\u5f0f, JSON\u4e0d\u80fd\u4e3a\u7a7a\u767d. \u8bf7\u6309\u7167\u6807\u51c6json\u683c\u5f0f\u63d0\u4f9b\u914d\u7f6e\u4fe1\u606f. +configuration.23=\u914d\u7f6e\u4fe1\u606f\u9519\u8bef. \u60a8\u63d0\u4f9b\u7684\u914d\u7f6e\u4fe1\u606f\u4e0d\u662f\u5408\u6cd5\u7684JSON\u683c\u5f0f: {0} . \u8bf7\u6309\u7167\u6807\u51c6json\u683c\u5f0f\u63d0\u4f9b\u914d\u7f6e\u4fe1\u606f. + + +listutil.1=\u60a8\u63d0\u4f9b\u7684\u4f5c\u4e1a\u914d\u7f6e\u6709\u8bef\uff0cList\u4e0d\u80fd\u4e3a\u7a7a. +listutil.2=\u60a8\u63d0\u4f9b\u7684\u4f5c\u4e1a\u914d\u7f6e\u6709\u8bef, List\u4e0d\u80fd\u4e3a\u7a7a. +listutil.3=\u60a8\u63d0\u4f9b\u7684\u4f5c\u4e1a\u914d\u7f6e\u4fe1\u606f\u6709\u8bef, String:[{0}] \u4e0d\u5141\u8bb8\u91cd\u590d\u51fa\u73b0\u5728\u5217\u8868\u4e2d: [{1}]. +listutil.4=\u60a8\u63d0\u4f9b\u7684\u4f5c\u4e1a\u914d\u7f6e\u6709\u8bef, List\u4e0d\u80fd\u4e3a\u7a7a. +listutil.5=\u60a8\u63d0\u4f9b\u7684\u4f5c\u4e1a\u914d\u7f6e\u6709\u8bef, List\u4e0d\u80fd\u4e3a\u7a7a. +listutil.6=\u60a8\u63d0\u4f9b\u7684\u4f5c\u4e1a\u914d\u7f6e\u4fe1\u606f\u6709\u8bef, String:[{0}] \u4e0d\u5b58\u5728\u4e8e\u5217\u8868\u4e2d:[{1}]. +listutil.7=\u60a8\u63d0\u4f9b\u7684\u4f5c\u4e1a\u914d\u7f6e\u6709\u8bef, List\u4e0d\u80fd\u4e3a\u7a7a. +listutil.8=\u60a8\u63d0\u4f9b\u7684\u4f5c\u4e1a\u914d\u7f6e\u6709\u8bef, List\u4e0d\u80fd\u4e3a\u7a7a. + + +rangesplitutil.1=\u5207\u5206\u4efd\u6570\u4e0d\u80fd\u5c0f\u4e8e1. \u6b64\u5904:expectSliceNumber=[{0}]. +rangesplitutil.2=\u5bf9 BigInteger \u8fdb\u884c\u5207\u5206\u65f6\uff0c\u5176\u5de6\u53f3\u533a\u95f4\u4e0d\u80fd\u4e3a null. \u6b64\u5904:left=[{0}],right=[{1}]. +rangesplitutil.3=\u53c2\u6570 bigInteger \u4e0d\u80fd\u4e3a\u7a7a. +rangesplitutil.4=\u6839\u636e\u5b57\u7b26\u4e32\u8fdb\u884c\u5207\u5206\u65f6\u4ec5\u652f\u6301 ASCII \u5b57\u7b26\u4e32\uff0c\u800c\u5b57\u7b26\u4e32:[{0}]\u975e ASCII \u5b57\u7b26\u4e32. +rangesplitutil.5=\u53c2\u6570 bigInteger \u4e0d\u80fd\u4e3a\u7a7a. +rangesplitutil.6=\u6839\u636e\u5b57\u7b26\u4e32\u8fdb\u884c\u5207\u5206\u65f6\u4ec5\u652f\u6301 ASCII \u5b57\u7b26\u4e32\uff0c\u800c\u5b57\u7b26\u4e32:[{0}]\u975e ASCII \u5b57\u7b26\u4e32. + + +retryutil.1=\u7cfb\u7edf\u7f16\u7a0b\u9519\u8bef, \u5165\u53c2callable\u4e0d\u80fd\u4e3a\u7a7a ! +retryutil.2=\u7cfb\u7edf\u7f16\u7a0b\u9519\u8bef, \u5165\u53c2retrytime[%d]\u4e0d\u80fd\u5c0f\u4e8e1 ! +retryutil.3=Exception when calling callable, \u5f02\u5e38Msg:{0} +retryutil.4=Exception when calling callable, \u5373\u5c06\u5c1d\u8bd5\u6267\u884c\u7b2c{0}\u6b21\u91cd\u8bd5,\u5171\u8ba1\u91cd\u8bd5{1}\u6b21.\u672c\u6b21\u91cd\u8bd5\u8ba1\u5212\u7b49\u5f85[{2}]ms,\u5b9e\u9645\u7b49\u5f85[{3}]ms, \u5f02\u5e38Msg:[{4}] + +very_like_yixiao=一{0}二{1}三 + + +configuration.1=配置資訊錯誤,您提供的配置檔案[{0}]不存在. 請檢查您的配置檔案. +configuration.2=配置資訊錯誤. 您提供配置檔案[{0}]讀取失敗,錯誤原因: {1}. 請檢查您的配置檔案的權限設定. +configuration.3=請檢查您的配置檔案. 您提供的配置檔案讀取失敗,錯誤原因: {0}. 請檢查您的配置檔案的權限設定. +configuration.4=您提供配置檔案有誤,[{0}]是必填參數,不允許為空或者留白 . +configuration.5=您提供配置檔案有誤,[{0}]是必填參數,不允許為空或者留白 . +configuration.6=任務讀取配置檔案出錯. 因為配置檔案路徑[{0}] 值不合法,期望是字符類型: {1}. 請檢查您的配置並作出修改. +configuration.7=您提供的配置資訊有誤,因為從[{0}]獲取的值[{1}]無法轉換為bool類型. 請檢查源表的配置並且做出相應的修改. +configuration.8=任務讀取配置檔案出錯. 配置檔案路徑[{0}] 值不合法, 期望是整數類型: {1}. 請檢查您的配置並作出修改. +configuration.9=任務讀取配置檔案出錯. 配置檔案路徑[{0}] 值不合法, 期望是整數類型: {1}. 請檢查您的配置並作出修改. +configuration.10=任務讀取配置檔案出錯. 配置檔案路徑[{0}] 值不合法, 期望是浮點類型: {1}. 請檢查您的配置並作出修改. +configuration.11=配置檔案對應Key[{0}]並不存在,該情況是代碼編程錯誤. 請聯絡DataX團隊的同學. +configuration.12=值[{0}]無法適配您提供[{1}], 該異常代表系統編程錯誤, 請聯絡DataX開發團隊! +configuration.13=Path不能為null,該異常代表系統編程錯誤, 請聯絡DataX開發團隊 ! +configuration.14=路徑[{0}]出現不合法值類型[{1}],該異常代表系統編程錯誤, 請聯絡DataX開發團隊! . +configuration.15=該異常代表系統編程錯誤, 請聯絡DataX開發團隊 ! +configuration.16=您提供的配置檔案有誤. 路徑[{0}]需要配置Json格式的Map對象,但該節點發現實際類型是[{1}]. 請檢查您的配置並作出修改. +configuration.17=您提供的配置檔案有誤. 路徑[{0}]值為null,datax無法識別該配置. 請檢查您的配置並作出修改. +configuration.18=您提供的配置檔案有誤. 路徑[{0}]需要配置Json格式的Map對象,但該節點發現實際類型是[{1}]. 請檢查您的配置並作出修改. +configuration.19=系統編程錯誤,清單下標必須為數字類型,但該節點發現實際類型是[{0}] ,該異常代表系統編程錯誤, 請聯絡DataX開發團隊 ! +configuration.20=系統編程錯誤, 該異常代表系統編程錯誤, 請聯絡DataX開發團隊!. +configuration.21=系統編程錯誤, 路徑[{0}]不合法, 路徑層次之間不能出現空白字符 . +configuration.22=配置資訊錯誤. 因為您提供的配置資訊不是合法的JSON格式, JSON不能為空白. 請按照標準json格式提供配置資訊. +configuration.23=配置資訊錯誤. 您提供的配置資訊不是合法的JSON格式: {0}. 請按照標準json格式提供配置資訊. + + +listutil.1=您提供的作業配置有誤,List不能為空. +listutil.2=您提供的作業配置有誤, List不能為空. +listutil.3=您提供的作業配置資訊有誤, String:[{0}]不允許重複出現在清單中: [{1}]. +listutil.4=您提供的作業配置有誤, List不能為空. +listutil.5=您提供的作業配置有誤, List不能為空. +listutil.6=您提供的作業配置資訊有誤, String:[{0}]不存在於清單中:[{1}]. +listutil.7=您提供的作業配置有誤, List不能為空. +listutil.8=您提供的作業配置有誤, List不能為空. + + +rangesplitutil.1=切分份數不能小於1. 此處:expectSliceNumber=[{0}]. +rangesplitutil.2=對 BigInteger 進行切分時,其左右區間不能為 null. 此處:left=[{0}],right=[{1}]. +rangesplitutil.3=參數 bigInteger 不能為空. +rangesplitutil.4=根據字符串進行切分時僅支援 ASCII 字符串,而字符串:[{0}]非 ASCII 字符串. +rangesplitutil.5=參數 bigInteger 不能為空. +rangesplitutil.6=根據字符串進行切分時僅支援 ASCII 字符串,而字符串:[{0}]非 ASCII 字符串. + + +retryutil.1=系統編程錯誤, 入參callable不能為空 ! +retryutil.2=系統編程錯誤, 入參retrytime[%d]不能小於1 ! +retryutil.3=Exception when calling callable, 異常Msg:{0} +retryutil.4=Exception when calling callable, 即將嘗試執行第{0}次重試,共計重試{1}次.本次重試計劃等待[{2}]ms,實際等待[{3}]ms, 異常Msg:[{4}] + +httpclientutil.1=\u8ACB\u6C42\u5730\u5740\uFF1A{0}, \u8ACB\u6C42\u65B9\u6CD5\uFF1A{1},STATUS CODE = {2}, Response Entity: {3} +httpclientutil.2=\u9060\u7A0B\u63A5\u53E3\u8FD4\u56DE-1,\u5C07\u91CD\u8A66 \ No newline at end of file diff --git a/common/src/main/java/com/alibaba/datax/common/util/LocalStrings_zh_TW.properties b/common/src/main/java/com/alibaba/datax/common/util/LocalStrings_zh_TW.properties new file mode 100644 index 00000000..89eb1eae --- /dev/null +++ b/common/src/main/java/com/alibaba/datax/common/util/LocalStrings_zh_TW.properties @@ -0,0 +1,104 @@ +very_like_yixiao=\u4e00{0}\u4e8c{1}\u4e09 + + +configuration.1=\u914d\u7f6e\u4fe1\u606f\u9519\u8bef\uff0c\u60a8\u63d0\u4f9b\u7684\u914d\u7f6e\u6587\u4ef6[{0}]\u4e0d\u5b58\u5728. \u8bf7\u68c0\u67e5\u60a8\u7684\u914d\u7f6e\u6587\u4ef6. +configuration.2=\u914d\u7f6e\u4fe1\u606f\u9519\u8bef. \u60a8\u63d0\u4f9b\u914d\u7f6e\u6587\u4ef6[{0}]\u8bfb\u53d6\u5931\u8d25\uff0c\u9519\u8bef\u539f\u56e0: {1}. \u8bf7\u68c0\u67e5\u60a8\u7684\u914d\u7f6e\u6587\u4ef6\u7684\u6743\u9650\u8bbe\u7f6e. +configuration.3=\u8bf7\u68c0\u67e5\u60a8\u7684\u914d\u7f6e\u6587\u4ef6. \u60a8\u63d0\u4f9b\u7684\u914d\u7f6e\u6587\u4ef6\u8bfb\u53d6\u5931\u8d25\uff0c\u9519\u8bef\u539f\u56e0: {0}. \u8bf7\u68c0\u67e5\u60a8\u7684\u914d\u7f6e\u6587\u4ef6\u7684\u6743\u9650\u8bbe\u7f6e. +configuration.4=\u60a8\u63d0\u4f9b\u914d\u7f6e\u6587\u4ef6\u6709\u8bef\uff0c[{0}]\u662f\u5fc5\u586b\u53c2\u6570\uff0c\u4e0d\u5141\u8bb8\u4e3a\u7a7a\u6216\u8005\u7559\u767d . +configuration.5=\u60a8\u63d0\u4f9b\u914d\u7f6e\u6587\u4ef6\u6709\u8bef\uff0c[{0}]\u662f\u5fc5\u586b\u53c2\u6570\uff0c\u4e0d\u5141\u8bb8\u4e3a\u7a7a\u6216\u8005\u7559\u767d . +configuration.6=\u4efb\u52a1\u8bfb\u53d6\u914d\u7f6e\u6587\u4ef6\u51fa\u9519. \u56e0\u4e3a\u914d\u7f6e\u6587\u4ef6\u8def\u5f84[{0}] \u503c\u975e\u6cd5\uff0c\u671f\u671b\u662f\u5b57\u7b26\u7c7b\u578b: {1}. \u8bf7\u68c0\u67e5\u60a8\u7684\u914d\u7f6e\u5e76\u4f5c\u51fa\u4fee\u6539. +configuration.7=\u60a8\u63d0\u4f9b\u7684\u914d\u7f6e\u4fe1\u606f\u6709\u8bef\uff0c\u56e0\u4e3a\u4ece[{0}]\u83b7\u53d6\u7684\u503c[{1}]\u65e0\u6cd5\u8f6c\u6362\u4e3abool\u7c7b\u578b. \u8bf7\u68c0\u67e5\u6e90\u8868\u7684\u914d\u7f6e\u5e76\u4e14\u505a\u51fa\u76f8\u5e94\u7684\u4fee\u6539. +configuration.8=\u4efb\u52a1\u8bfb\u53d6\u914d\u7f6e\u6587\u4ef6\u51fa\u9519. \u914d\u7f6e\u6587\u4ef6\u8def\u5f84[{0}] \u503c\u975e\u6cd5, \u671f\u671b\u662f\u6574\u6570\u7c7b\u578b: {1}. \u8bf7\u68c0\u67e5\u60a8\u7684\u914d\u7f6e\u5e76\u4f5c\u51fa\u4fee\u6539. +configuration.9=\u4efb\u52a1\u8bfb\u53d6\u914d\u7f6e\u6587\u4ef6\u51fa\u9519. \u914d\u7f6e\u6587\u4ef6\u8def\u5f84[{0}] \u503c\u975e\u6cd5, \u671f\u671b\u662f\u6574\u6570\u7c7b\u578b: {1}. \u8bf7\u68c0\u67e5\u60a8\u7684\u914d\u7f6e\u5e76\u4f5c\u51fa\u4fee\u6539. +configuration.10=\u4efb\u52a1\u8bfb\u53d6\u914d\u7f6e\u6587\u4ef6\u51fa\u9519. \u914d\u7f6e\u6587\u4ef6\u8def\u5f84[{0}] \u503c\u975e\u6cd5, \u671f\u671b\u662f\u6d6e\u70b9\u7c7b\u578b: {1}. \u8bf7\u68c0\u67e5\u60a8\u7684\u914d\u7f6e\u5e76\u4f5c\u51fa\u4fee\u6539. +configuration.11=\u914d\u7f6e\u6587\u4ef6\u5bf9\u5e94Key[{0}]\u5e76\u4e0d\u5b58\u5728\uff0c\u8be5\u60c5\u51b5\u662f\u4ee3\u7801\u7f16\u7a0b\u9519\u8bef. \u8bf7\u8054\u7cfbDataX\u56e2\u961f\u7684\u540c\u5b66. +configuration.12=\u503c[{0}]\u65e0\u6cd5\u9002\u914d\u60a8\u63d0\u4f9b[{1}]\uff0c \u8be5\u5f02\u5e38\u4ee3\u8868\u7cfb\u7edf\u7f16\u7a0b\u9519\u8bef, \u8bf7\u8054\u7cfbDataX\u5f00\u53d1\u56e2\u961f! +configuration.13=Path\u4e0d\u80fd\u4e3anull\uff0c\u8be5\u5f02\u5e38\u4ee3\u8868\u7cfb\u7edf\u7f16\u7a0b\u9519\u8bef, \u8bf7\u8054\u7cfbDataX\u5f00\u53d1\u56e2\u961f ! +configuration.14=\u8def\u5f84[{0}]\u51fa\u73b0\u975e\u6cd5\u503c\u7c7b\u578b[{1}]\uff0c\u8be5\u5f02\u5e38\u4ee3\u8868\u7cfb\u7edf\u7f16\u7a0b\u9519\u8bef, \u8bf7\u8054\u7cfbDataX\u5f00\u53d1\u56e2\u961f! . +configuration.15=\u8be5\u5f02\u5e38\u4ee3\u8868\u7cfb\u7edf\u7f16\u7a0b\u9519\u8bef, \u8bf7\u8054\u7cfbDataX\u5f00\u53d1\u56e2\u961f ! +configuration.16=\u60a8\u63d0\u4f9b\u7684\u914d\u7f6e\u6587\u4ef6\u6709\u8bef. \u8def\u5f84[{0}]\u9700\u8981\u914d\u7f6eJson\u683c\u5f0f\u7684Map\u5bf9\u8c61\uff0c\u4f46\u8be5\u8282\u70b9\u53d1\u73b0\u5b9e\u9645\u7c7b\u578b\u662f[{1}]. \u8bf7\u68c0\u67e5\u60a8\u7684\u914d\u7f6e\u5e76\u4f5c\u51fa\u4fee\u6539. +configuration.17=\u60a8\u63d0\u4f9b\u7684\u914d\u7f6e\u6587\u4ef6\u6709\u8bef. \u8def\u5f84[{0}]\u503c\u4e3anull\uff0cdatax\u65e0\u6cd5\u8bc6\u522b\u8be5\u914d\u7f6e. \u8bf7\u68c0\u67e5\u60a8\u7684\u914d\u7f6e\u5e76\u4f5c\u51fa\u4fee\u6539. +configuration.18=\u60a8\u63d0\u4f9b\u7684\u914d\u7f6e\u6587\u4ef6\u6709\u8bef. \u8def\u5f84[{0}]\u9700\u8981\u914d\u7f6eJson\u683c\u5f0f\u7684Map\u5bf9\u8c61\uff0c\u4f46\u8be5\u8282\u70b9\u53d1\u73b0\u5b9e\u9645\u7c7b\u578b\u662f[{1}]. \u8bf7\u68c0\u67e5\u60a8\u7684\u914d\u7f6e\u5e76\u4f5c\u51fa\u4fee\u6539. +configuration.19=\u7cfb\u7edf\u7f16\u7a0b\u9519\u8bef\uff0c\u5217\u8868\u4e0b\u6807\u5fc5\u987b\u4e3a\u6570\u5b57\u7c7b\u578b\uff0c\u4f46\u8be5\u8282\u70b9\u53d1\u73b0\u5b9e\u9645\u7c7b\u578b\u662f[{0}] \uff0c\u8be5\u5f02\u5e38\u4ee3\u8868\u7cfb\u7edf\u7f16\u7a0b\u9519\u8bef, \u8bf7\u8054\u7cfbDataX\u5f00\u53d1\u56e2\u961f ! +configuration.20=\u7cfb\u7edf\u7f16\u7a0b\u9519\u8bef, \u8be5\u5f02\u5e38\u4ee3\u8868\u7cfb\u7edf\u7f16\u7a0b\u9519\u8bef, \u8bf7\u8054\u7cfbDataX\u5f00\u53d1\u56e2\u961f!. +configuration.21=\u7cfb\u7edf\u7f16\u7a0b\u9519\u8bef, \u8def\u5f84[{0}]\u4e0d\u5408\u6cd5, \u8def\u5f84\u5c42\u6b21\u4e4b\u95f4\u4e0d\u80fd\u51fa\u73b0\u7a7a\u767d\u5b57\u7b26 . +configuration.22=\u914d\u7f6e\u4fe1\u606f\u9519\u8bef. \u56e0\u4e3a\u60a8\u63d0\u4f9b\u7684\u914d\u7f6e\u4fe1\u606f\u4e0d\u662f\u5408\u6cd5\u7684JSON\u683c\u5f0f, JSON\u4e0d\u80fd\u4e3a\u7a7a\u767d. \u8bf7\u6309\u7167\u6807\u51c6json\u683c\u5f0f\u63d0\u4f9b\u914d\u7f6e\u4fe1\u606f. +configuration.23=\u914d\u7f6e\u4fe1\u606f\u9519\u8bef. \u60a8\u63d0\u4f9b\u7684\u914d\u7f6e\u4fe1\u606f\u4e0d\u662f\u5408\u6cd5\u7684JSON\u683c\u5f0f: {0} . \u8bf7\u6309\u7167\u6807\u51c6json\u683c\u5f0f\u63d0\u4f9b\u914d\u7f6e\u4fe1\u606f. + + +listutil.1=\u60a8\u63d0\u4f9b\u7684\u4f5c\u4e1a\u914d\u7f6e\u6709\u8bef\uff0cList\u4e0d\u80fd\u4e3a\u7a7a. +listutil.2=\u60a8\u63d0\u4f9b\u7684\u4f5c\u4e1a\u914d\u7f6e\u6709\u8bef, List\u4e0d\u80fd\u4e3a\u7a7a. +listutil.3=\u60a8\u63d0\u4f9b\u7684\u4f5c\u4e1a\u914d\u7f6e\u4fe1\u606f\u6709\u8bef, String:[{0}] \u4e0d\u5141\u8bb8\u91cd\u590d\u51fa\u73b0\u5728\u5217\u8868\u4e2d: [{1}]. +listutil.4=\u60a8\u63d0\u4f9b\u7684\u4f5c\u4e1a\u914d\u7f6e\u6709\u8bef, List\u4e0d\u80fd\u4e3a\u7a7a. +listutil.5=\u60a8\u63d0\u4f9b\u7684\u4f5c\u4e1a\u914d\u7f6e\u6709\u8bef, List\u4e0d\u80fd\u4e3a\u7a7a. +listutil.6=\u60a8\u63d0\u4f9b\u7684\u4f5c\u4e1a\u914d\u7f6e\u4fe1\u606f\u6709\u8bef, String:[{0}] \u4e0d\u5b58\u5728\u4e8e\u5217\u8868\u4e2d:[{1}]. +listutil.7=\u60a8\u63d0\u4f9b\u7684\u4f5c\u4e1a\u914d\u7f6e\u6709\u8bef, List\u4e0d\u80fd\u4e3a\u7a7a. +listutil.8=\u60a8\u63d0\u4f9b\u7684\u4f5c\u4e1a\u914d\u7f6e\u6709\u8bef, List\u4e0d\u80fd\u4e3a\u7a7a. + + +rangesplitutil.1=\u5207\u5206\u4efd\u6570\u4e0d\u80fd\u5c0f\u4e8e1. \u6b64\u5904:expectSliceNumber=[{0}]. +rangesplitutil.2=\u5bf9 BigInteger \u8fdb\u884c\u5207\u5206\u65f6\uff0c\u5176\u5de6\u53f3\u533a\u95f4\u4e0d\u80fd\u4e3a null. \u6b64\u5904:left=[{0}],right=[{1}]. +rangesplitutil.3=\u53c2\u6570 bigInteger \u4e0d\u80fd\u4e3a\u7a7a. +rangesplitutil.4=\u6839\u636e\u5b57\u7b26\u4e32\u8fdb\u884c\u5207\u5206\u65f6\u4ec5\u652f\u6301 ASCII \u5b57\u7b26\u4e32\uff0c\u800c\u5b57\u7b26\u4e32:[{0}]\u975e ASCII \u5b57\u7b26\u4e32. +rangesplitutil.5=\u53c2\u6570 bigInteger \u4e0d\u80fd\u4e3a\u7a7a. +rangesplitutil.6=\u6839\u636e\u5b57\u7b26\u4e32\u8fdb\u884c\u5207\u5206\u65f6\u4ec5\u652f\u6301 ASCII \u5b57\u7b26\u4e32\uff0c\u800c\u5b57\u7b26\u4e32:[{0}]\u975e ASCII \u5b57\u7b26\u4e32. + + +retryutil.1=\u7cfb\u7edf\u7f16\u7a0b\u9519\u8bef, \u5165\u53c2callable\u4e0d\u80fd\u4e3a\u7a7a ! +retryutil.2=\u7cfb\u7edf\u7f16\u7a0b\u9519\u8bef, \u5165\u53c2retrytime[%d]\u4e0d\u80fd\u5c0f\u4e8e1 ! +retryutil.3=Exception when calling callable, \u5f02\u5e38Msg:{0} +retryutil.4=Exception when calling callable, \u5373\u5c06\u5c1d\u8bd5\u6267\u884c\u7b2c{0}\u6b21\u91cd\u8bd5,\u5171\u8ba1\u91cd\u8bd5{1}\u6b21.\u672c\u6b21\u91cd\u8bd5\u8ba1\u5212\u7b49\u5f85[{2}]ms,\u5b9e\u9645\u7b49\u5f85[{3}]ms, \u5f02\u5e38Msg:[{4}] + +very_like_yixiao=一{0}二{1}三 + + +configuration.1=配置資訊錯誤,您提供的配置檔案[{0}]不存在. 請檢查您的配置檔案. +configuration.2=配置資訊錯誤. 您提供配置檔案[{0}]讀取失敗,錯誤原因: {1}. 請檢查您的配置檔案的權限設定. +configuration.3=請檢查您的配置檔案. 您提供的配置檔案讀取失敗,錯誤原因: {0}. 請檢查您的配置檔案的權限設定. +configuration.4=您提供配置檔案有誤,[{0}]是必填參數,不允許為空或者留白 . +configuration.5=您提供配置檔案有誤,[{0}]是必填參數,不允許為空或者留白 . +configuration.6=任務讀取配置檔案出錯. 因為配置檔案路徑[{0}] 值不合法,期望是字符類型: {1}. 請檢查您的配置並作出修改. +configuration.7=您提供的配置資訊有誤,因為從[{0}]獲取的值[{1}]無法轉換為bool類型. 請檢查源表的配置並且做出相應的修改. +configuration.8=任務讀取配置檔案出錯. 配置檔案路徑[{0}] 值不合法, 期望是整數類型: {1}. 請檢查您的配置並作出修改. +configuration.9=任務讀取配置檔案出錯. 配置檔案路徑[{0}] 值不合法, 期望是整數類型: {1}. 請檢查您的配置並作出修改. +configuration.10=任務讀取配置檔案出錯. 配置檔案路徑[{0}] 值不合法, 期望是浮點類型: {1}. 請檢查您的配置並作出修改. +configuration.11=配置檔案對應Key[{0}]並不存在,該情況是代碼編程錯誤. 請聯絡DataX團隊的同學. +configuration.12=值[{0}]無法適配您提供[{1}], 該異常代表系統編程錯誤, 請聯絡DataX開發團隊! +configuration.13=Path不能為null,該異常代表系統編程錯誤, 請聯絡DataX開發團隊 ! +configuration.14=路徑[{0}]出現不合法值類型[{1}],該異常代表系統編程錯誤, 請聯絡DataX開發團隊! . +configuration.15=該異常代表系統編程錯誤, 請聯絡DataX開發團隊 ! +configuration.16=您提供的配置檔案有誤. 路徑[{0}]需要配置Json格式的Map對象,但該節點發現實際類型是[{1}]. 請檢查您的配置並作出修改. +configuration.17=您提供的配置檔案有誤. 路徑[{0}]值為null,datax無法識別該配置. 請檢查您的配置並作出修改. +configuration.18=您提供的配置檔案有誤. 路徑[{0}]需要配置Json格式的Map對象,但該節點發現實際類型是[{1}]. 請檢查您的配置並作出修改. +configuration.19=系統編程錯誤,清單下標必須為數字類型,但該節點發現實際類型是[{0}] ,該異常代表系統編程錯誤, 請聯絡DataX開發團隊 ! +configuration.20=系統編程錯誤, 該異常代表系統編程錯誤, 請聯絡DataX開發團隊!. +configuration.21=系統編程錯誤, 路徑[{0}]不合法, 路徑層次之間不能出現空白字符 . +configuration.22=配置資訊錯誤. 因為您提供的配置資訊不是合法的JSON格式, JSON不能為空白. 請按照標準json格式提供配置資訊. +configuration.23=配置資訊錯誤. 您提供的配置資訊不是合法的JSON格式: {0}. 請按照標準json格式提供配置資訊. + + +listutil.1=您提供的作業配置有誤,List不能為空. +listutil.2=您提供的作業配置有誤, List不能為空. +listutil.3=您提供的作業配置資訊有誤, String:[{0}]不允許重複出現在清單中: [{1}]. +listutil.4=您提供的作業配置有誤, List不能為空. +listutil.5=您提供的作業配置有誤, List不能為空. +listutil.6=您提供的作業配置資訊有誤, String:[{0}]不存在於清單中:[{1}]. +listutil.7=您提供的作業配置有誤, List不能為空. +listutil.8=您提供的作業配置有誤, List不能為空. + + +rangesplitutil.1=切分份數不能小於1. 此處:expectSliceNumber=[{0}]. +rangesplitutil.2=對 BigInteger 進行切分時,其左右區間不能為 null. 此處:left=[{0}],right=[{1}]. +rangesplitutil.3=參數 bigInteger 不能為空. +rangesplitutil.4=根據字符串進行切分時僅支援 ASCII 字符串,而字符串:[{0}]非 ASCII 字符串. +rangesplitutil.5=參數 bigInteger 不能為空. +rangesplitutil.6=根據字符串進行切分時僅支援 ASCII 字符串,而字符串:[{0}]非 ASCII 字符串. + + +retryutil.1=系統編程錯誤, 入參callable不能為空 ! +retryutil.2=系統編程錯誤, 入參retrytime[%d]不能小於1 ! +retryutil.3=Exception when calling callable, 異常Msg:{0} +retryutil.4=Exception when calling callable, 即將嘗試執行第{0}次重試,共計重試{1}次.本次重試計劃等待[{2}]ms,實際等待[{3}]ms, 異常Msg:[{4}] + +httpclientutil.1=\u8BF7\u6C42\u5730\u5740\uFF1A{0}, \u8BF7\u6C42\u65B9\u6CD5\uFF1A{1},STATUS CODE = {2}, Response Entity: {3} +httpclientutil.2=\u8FDC\u7A0B\u63A5\u53E3\u8FD4\u56DE-1,\u5C06\u91CD\u8BD5 \ No newline at end of file diff --git a/common/src/main/java/com/alibaba/datax/common/util/MessageSource.java b/common/src/main/java/com/alibaba/datax/common/util/MessageSource.java new file mode 100644 index 00000000..d2411328 --- /dev/null +++ b/common/src/main/java/com/alibaba/datax/common/util/MessageSource.java @@ -0,0 +1,207 @@ +package com.alibaba.datax.common.util; + +import java.text.MessageFormat; +import java.util.HashMap; +import java.util.Locale; +import java.util.Map; +import java.util.MissingResourceException; +import java.util.ResourceBundle; +import java.util.TimeZone; + +import org.apache.commons.lang3.LocaleUtils; +import org.apache.commons.lang3.StringUtils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + + +public class MessageSource { + private static final Logger LOG = LoggerFactory.getLogger(MessageSource.class); + private static Map resourceBundleCache = new HashMap(); + public static Locale locale = null; + public static TimeZone timeZone = null; + private ResourceBundle resourceBundle = null; + + private MessageSource(ResourceBundle resourceBundle) { + this.resourceBundle = resourceBundle; + } + + /** + * @param baseName + * demo: javax.servlet.http.LocalStrings + * + * @throws MissingResourceException + * - if no resource bundle for the specified base name can be + * found + * */ + public static MessageSource loadResourceBundle(String baseName) { + return loadResourceBundle(baseName, MessageSource.locale, + MessageSource.timeZone); + } + + /** + * @param clazz + * 根据其获取package name + * */ + public static MessageSource loadResourceBundle(Class clazz) { + return loadResourceBundle(clazz.getPackage().getName()); + } + + /** + * @param clazz + * 根据其获取package name + * */ + public static MessageSource loadResourceBundle(Class clazz, + Locale locale, TimeZone timeZone) { + return loadResourceBundle(clazz.getPackage().getName(), locale, + timeZone); + } + + /** + * warn: + * ok: ResourceBundle.getBundle("xxx.LocalStrings", Locale.getDefault(), LoadUtil.getJarLoader(PluginType.WRITER, "odpswriter")) + * error: ResourceBundle.getBundle("xxx.LocalStrings", Locale.getDefault(), LoadUtil.getJarLoader(PluginType.WRITER, "odpswriter")) + * @param baseName + * demo: javax.servlet.http.LocalStrings + * + * @throws MissingResourceException + * - if no resource bundle for the specified base name can be + * found + * + * */ + public static MessageSource loadResourceBundle(String baseName, + Locale locale, TimeZone timeZone) { + ResourceBundle resourceBundle = null; + if (null == locale) { + locale = LocaleUtils.toLocale("en_US"); + } + if (null == timeZone) { + timeZone = TimeZone.getDefault(); + } + String resourceBaseName = String.format("%s.LocalStrings", baseName); + LOG.debug( + "initEnvironment MessageSource.locale[{}], MessageSource.timeZone[{}]", + MessageSource.locale, MessageSource.timeZone); + LOG.debug( + "loadResourceBundle with locale[{}], timeZone[{}], baseName[{}]", + locale, timeZone, resourceBaseName); + // warn: 这个map的维护需要考虑Local吗, no? + if (!MessageSource.resourceBundleCache.containsKey(resourceBaseName)) { + ClassLoader clazzLoader = Thread.currentThread() + .getContextClassLoader(); + LOG.debug("loadResourceBundle classLoader:{}", clazzLoader); + resourceBundle = ResourceBundle.getBundle(resourceBaseName, locale, + clazzLoader); + MessageSource.resourceBundleCache.put(resourceBaseName, + resourceBundle); + } else { + resourceBundle = MessageSource.resourceBundleCache + .get(resourceBaseName); + } + + return new MessageSource(resourceBundle); + } + + public static boolean unloadResourceBundle(Class clazz) { + String baseName = clazz.getPackage().getName(); + String resourceBaseName = String.format("%s.LocalStrings", baseName); + if (!MessageSource.resourceBundleCache.containsKey(resourceBaseName)) { + return false; + } else { + MessageSource.resourceBundleCache.remove(resourceBaseName); + return true; + } + } + + public static MessageSource reloadResourceBundle(Class clazz) { + MessageSource.unloadResourceBundle(clazz); + return MessageSource.loadResourceBundle(clazz); + } + + public static void setEnvironment(Locale locale, TimeZone timeZone) { + // warn: 设置默认? @2018.03.21 将此处注释移除,否则在国际化多时区下会遇到问题 + Locale.setDefault(locale); + TimeZone.setDefault(timeZone); + MessageSource.locale = locale; + MessageSource.timeZone = timeZone; + LOG.info("use Locale: {} timeZone: {}", locale, timeZone); + } + + public static void init(final Configuration configuration) { + Locale locale2Set = Locale.getDefault(); + String localeStr = configuration.getString("common.column.locale", "zh_CN");// 默认操作系统的 + if (StringUtils.isNotBlank(localeStr)) { + try { + locale2Set = LocaleUtils.toLocale(localeStr); + } catch (Exception e) { + LOG.warn("ignored locale parse exception: {}", e.getMessage()); + } + } + + TimeZone timeZone2Set = TimeZone.getDefault(); + String timeZoneStr = configuration.getString("common.column.timeZone");// 默认操作系统的 + if (StringUtils.isNotBlank(timeZoneStr)) { + try { + timeZone2Set = TimeZone.getTimeZone(timeZoneStr); + } catch (Exception e) { + LOG.warn("ignored timezone parse exception: {}", e.getMessage()); + } + } + + LOG.info("JVM TimeZone: {}, Locale: {}", timeZone2Set.getID(), locale2Set); + MessageSource.setEnvironment(locale2Set, timeZone2Set); + } + + public static void clearCache() { + MessageSource.resourceBundleCache.clear(); + } + + public String message(String code) { + return this.messageWithDefaultMessage(code, null); + } + + public String message(String code, String args1) { + return this.messageWithDefaultMessage(code, null, + new Object[] { args1 }); + } + + public String message(String code, String args1, String args2) { + return this.messageWithDefaultMessage(code, null, new Object[] { args1, + args2 }); + } + + public String message(String code, String args1, String args2, String args3) { + return this.messageWithDefaultMessage(code, null, new Object[] { args1, + args2, args3 }); + } + + // 上面几个重载可以应对大多数情况, 避免使用这个可以提高性能的 + public String message(String code, Object... args) { + return this.messageWithDefaultMessage(code, null, args); + } + + public String messageWithDefaultMessage(String code, String defaultMessage) { + return this.messageWithDefaultMessage(code, defaultMessage, + new Object[] {}); + } + + /** + * @param args + * MessageFormat会依次调用对应对象的toString方法 + * */ + public String messageWithDefaultMessage(String code, String defaultMessage, + Object... args) { + String messageStr = null; + try { + messageStr = this.resourceBundle.getString(code); + } catch (MissingResourceException e) { + messageStr = defaultMessage; + } + if (null != messageStr && null != args && args.length > 0) { + // warn: see loadResourceBundle set default locale + return MessageFormat.format(messageStr, args); + } else { + return messageStr; + } + + } +} diff --git a/common/src/main/java/com/alibaba/datax/common/util/RangeSplitUtil.java b/common/src/main/java/com/alibaba/datax/common/util/RangeSplitUtil.java index 791f9ea1..ec353730 100755 --- a/common/src/main/java/com/alibaba/datax/common/util/RangeSplitUtil.java +++ b/common/src/main/java/com/alibaba/datax/common/util/RangeSplitUtil.java @@ -206,4 +206,27 @@ public final class RangeSplitUtil { return true; } + + /** + * List拆分工具函数,主要用于reader插件的split拆分逻辑 + * */ + public static List> doListSplit(List objects, int adviceNumber) { + List> splitLists = new ArrayList>(); + if (null == objects) { + return splitLists; + } + long[] splitPoint = RangeSplitUtil.doLongSplit(0, objects.size(), adviceNumber); + for (int startIndex = 0; startIndex < splitPoint.length - 1; startIndex++) { + List objectsForTask = new ArrayList(); + int endIndex = startIndex + 1; + for (long i = splitPoint[startIndex]; i < splitPoint[endIndex]; i++) { + objectsForTask.add(objects.get((int) i)); + } + if (!objectsForTask.isEmpty()) { + splitLists.add(objectsForTask); + } + } + return splitLists; + } + } diff --git a/common/src/main/java/com/alibaba/datax/common/util/StrUtil.java b/common/src/main/java/com/alibaba/datax/common/util/StrUtil.java index 82222b0d..867a9516 100755 --- a/common/src/main/java/com/alibaba/datax/common/util/StrUtil.java +++ b/common/src/main/java/com/alibaba/datax/common/util/StrUtil.java @@ -3,6 +3,8 @@ package com.alibaba.datax.common.util; import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.Validate; +import java.security.MessageDigest; +import java.security.NoSuchAlgorithmException; import java.text.DecimalFormat; import java.util.HashMap; import java.util.Map; @@ -82,4 +84,20 @@ public class StrUtil { return s.substring(0, headLength) + "..." + s.substring(s.length() - tailLength); } + public static String getMd5(String plainText) { + try { + StringBuilder builder = new StringBuilder(); + for (byte b : MessageDigest.getInstance("MD5").digest(plainText.getBytes())) { + int i = b & 0xff; + if (i < 0x10) { + builder.append('0'); + } + builder.append(Integer.toHexString(i)); + } + return builder.toString(); + } catch (NoSuchAlgorithmException e) { + throw new RuntimeException(e); + } + } + } diff --git a/core/pom.xml b/core/pom.xml index 174a18d3..7685001b 100755 --- a/core/pom.xml +++ b/core/pom.xml @@ -41,7 +41,7 @@ org.apache.httpcomponents httpclient - 4.5 + 4.5.13 org.apache.httpcomponents @@ -100,6 +100,14 @@ + + + src/main/java + + **/*.properties + + + org.apache.maven.plugins diff --git a/core/src/main/bin/datax.py b/core/src/main/bin/datax.py index 1099ed3a..4811ae8d 100755 --- a/core/src/main/bin/datax.py +++ b/core/src/main/bin/datax.py @@ -1,23 +1,26 @@ #!/usr/bin/env python # -*- coding:utf-8 -*- -import sys -import os -import signal -import subprocess -import time -import re -import socket -import json -from optparse import OptionParser -from optparse import OptionGroup -from string import Template import codecs +import json +import os import platform +import re +import signal +import socket +import subprocess +import sys +import time +from optparse import OptionGroup +from optparse import OptionParser +from string import Template + +ispy2 = sys.version_info.major == 2 def isWindows(): return platform.system() == 'Windows' + DATAX_HOME = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) DATAX_VERSION = 'DATAX-OPENSOURCE-3.0' @@ -52,13 +55,19 @@ def getLocalIp(): def suicide(signum, e): global child_process - print >> sys.stderr, "[Error] DataX receive unexpected signal %d, starts to suicide." % (signum) + if ispy2: + print >> sys.stderr, "[Error] DataX receive unexpected signal %d, starts to suicide." % (signum) + else: + print("[Error] DataX receive unexpected signal %d, starts to suicide." % (signum), sys.stderr) if child_process: child_process.send_signal(signal.SIGQUIT) time.sleep(1) child_process.kill() - print >> sys.stderr, "DataX Process was killed ! you did ?" + if ispy2: + print >> sys.stderr, "DataX Process was killed ! you did ?" + else: + print("DataX Process was killed ! you did ?", sys.stderr) sys.exit(RET_STATE["KILL"]) @@ -92,10 +101,10 @@ def getOptionParser(): 'if you have mutiple parameters: -p"-DtableName=your-table-name -DcolumnName=your-column-name".' 'Note: you should config in you job tableName with ${tableName}.') prodEnvOptionGroup.add_option("-r", "--reader", metavar="", - action="store", dest="reader",type="string", + action="store", dest="reader", type="string", help='View job config[reader] template, eg: mysqlreader,streamreader') prodEnvOptionGroup.add_option("-w", "--writer", metavar="", - action="store", dest="writer",type="string", + action="store", dest="writer", type="string", help='View job config[writer] template, eg: mysqlwriter,streamwriter') parser.add_option_group(prodEnvOptionGroup) @@ -108,45 +117,50 @@ def getOptionParser(): parser.add_option_group(devEnvOptionGroup) return parser + def generateJobConfigTemplate(reader, writer): - readerRef = "Please refer to the %s document:\n https://github.com/alibaba/DataX/blob/master/%s/doc/%s.md \n" % (reader,reader,reader) - writerRef = "Please refer to the %s document:\n https://github.com/alibaba/DataX/blob/master/%s/doc/%s.md \n " % (writer,writer,writer) - print readerRef - print writerRef + readerRef = "Please refer to the %s document:\n https://github.com/alibaba/DataX/blob/master/%s/doc/%s.md \n" % ( + reader, reader, reader) + writerRef = "Please refer to the %s document:\n https://github.com/alibaba/DataX/blob/master/%s/doc/%s.md \n " % ( + writer, writer, writer) + print(readerRef) + print(writerRef) jobGuid = 'Please save the following configuration as a json file and use\n python {DATAX_HOME}/bin/datax.py {JSON_FILE_NAME}.json \nto run the job.\n' - print jobGuid - jobTemplate={ - "job": { - "setting": { - "speed": { - "channel": "" - } - }, - "content": [ - { - "reader": {}, - "writer": {} - } - ] - } + print(jobGuid) + jobTemplate = { + "job": { + "setting": { + "speed": { + "channel": "" + } + }, + "content": [ + { + "reader": {}, + "writer": {} + } + ] + } } - readerTemplatePath = "%s/plugin/reader/%s/plugin_job_template.json" % (DATAX_HOME,reader) - writerTemplatePath = "%s/plugin/writer/%s/plugin_job_template.json" % (DATAX_HOME,writer) + readerTemplatePath = "%s/plugin/reader/%s/plugin_job_template.json" % (DATAX_HOME, reader) + writerTemplatePath = "%s/plugin/writer/%s/plugin_job_template.json" % (DATAX_HOME, writer) try: - readerPar = readPluginTemplate(readerTemplatePath); - except Exception, e: - print "Read reader[%s] template error: can\'t find file %s" % (reader,readerTemplatePath) + readerPar = readPluginTemplate(readerTemplatePath) + except: + print("Read reader[%s] template error: can\'t find file %s" % (reader, readerTemplatePath)) try: - writerPar = readPluginTemplate(writerTemplatePath); - except Exception, e: - print "Read writer[%s] template error: : can\'t find file %s" % (writer,writerTemplatePath) - jobTemplate['job']['content'][0]['reader'] = readerPar; - jobTemplate['job']['content'][0]['writer'] = writerPar; - print json.dumps(jobTemplate, indent=4, sort_keys=True) + writerPar = readPluginTemplate(writerTemplatePath) + except: + print("Read writer[%s] template error: : can\'t find file %s" % (writer, writerTemplatePath)) + jobTemplate['job']['content'][0]['reader'] = readerPar + jobTemplate['job']['content'][0]['writer'] = writerPar + print(json.dumps(jobTemplate, indent=4, sort_keys=True)) + def readPluginTemplate(plugin): with open(plugin, 'r') as f: - return json.load(f) + return json.load(f) + def isUrl(path): if not path: @@ -168,7 +182,7 @@ def buildStartCommand(options, args): if options.remoteDebug: tempJVMCommand = tempJVMCommand + " " + REMOTE_DEBUG_CONFIG - print 'local ip: ', getLocalIp() + print('local ip: ', getLocalIp()) if options.loglevel: tempJVMCommand = tempJVMCommand + " " + ("-Dloglevel=%s" % (options.loglevel)) @@ -198,11 +212,11 @@ def buildStartCommand(options, args): def printCopyright(): - print ''' + print(''' DataX (%s), From Alibaba ! Copyright (C) 2010-2017, Alibaba Group. All Rights Reserved. -''' % DATAX_VERSION +''' % DATAX_VERSION) sys.stdout.flush() @@ -211,7 +225,7 @@ if __name__ == "__main__": parser = getOptionParser() options, args = parser.parse_args(sys.argv[1:]) if options.reader is not None and options.writer is not None: - generateJobConfigTemplate(options.reader,options.writer) + generateJobConfigTemplate(options.reader, options.writer) sys.exit(RET_STATE['OK']) if len(args) != 1: parser.print_help() diff --git a/core/src/main/java/com/alibaba/datax/core/Engine.java b/core/src/main/java/com/alibaba/datax/core/Engine.java index f80d792f..4ba9fc18 100755 --- a/core/src/main/java/com/alibaba/datax/core/Engine.java +++ b/core/src/main/java/com/alibaba/datax/core/Engine.java @@ -6,6 +6,7 @@ import com.alibaba.datax.common.spi.ErrorCode; import com.alibaba.datax.common.statistics.PerfTrace; import com.alibaba.datax.common.statistics.VMInfo; import com.alibaba.datax.common.util.Configuration; +import com.alibaba.datax.common.util.MessageSource; import com.alibaba.datax.core.job.JobContainer; import com.alibaba.datax.core.taskgroup.TaskGroupContainer; import com.alibaba.datax.core.util.ConfigParser; @@ -73,21 +74,14 @@ public class Engine { boolean traceEnable = allConf.getBool(CoreConstant.DATAX_CORE_CONTAINER_TRACE_ENABLE, true); boolean perfReportEnable = allConf.getBool(CoreConstant.DATAX_CORE_REPORT_DATAX_PERFLOG, true); - //standlone模式的datax shell任务不进行汇报 + //standalone模式的 datax shell任务不进行汇报 if(instanceId == -1){ perfReportEnable = false; } - int priority = 0; - try { - priority = Integer.parseInt(System.getenv("SKYNET_PRIORITY")); - }catch (NumberFormatException e){ - LOG.warn("prioriy set to 0, because NumberFormatException, the value is: "+System.getProperty("PROIORY")); - } - Configuration jobInfoConfig = allConf.getConfiguration(CoreConstant.DATAX_JOB_JOBINFO); //初始化PerfTrace - PerfTrace perfTrace = PerfTrace.getInstance(isJob, instanceId, taskGroupId, priority, traceEnable); + PerfTrace perfTrace = PerfTrace.getInstance(isJob, instanceId, taskGroupId, traceEnable); perfTrace.setJobInfo(jobInfoConfig,perfReportEnable,channelNumber); container.start(); @@ -135,6 +129,9 @@ public class Engine { RUNTIME_MODE = cl.getOptionValue("mode"); Configuration configuration = ConfigParser.parse(jobPath); + // 绑定i18n信息 + MessageSource.init(configuration); + MessageSource.reloadResourceBundle(Configuration.class); long jobId; if (!"-1".equalsIgnoreCase(jobIdString)) { diff --git a/core/src/main/java/com/alibaba/datax/core/LocalStrings.properties b/core/src/main/java/com/alibaba/datax/core/LocalStrings.properties new file mode 100644 index 00000000..97d46f07 --- /dev/null +++ b/core/src/main/java/com/alibaba/datax/core/LocalStrings.properties @@ -0,0 +1,5 @@ +very_like_yixiao=\u4e00{0}\u4e8c{1}\u4e09 + +engine.1=\u975e standalone \u6a21\u5f0f\u5fc5\u987b\u5728 URL \u4e2d\u63d0\u4f9b\u6709\u6548\u7684 jobId. +engine.2=\n\n\u7ecfDataX\u667a\u80fd\u5206\u6790,\u8be5\u4efb\u52a1\u6700\u53ef\u80fd\u7684\u9519\u8bef\u539f\u56e0\u662f:\n{0} + diff --git a/core/src/main/java/com/alibaba/datax/core/LocalStrings_en_US.properties b/core/src/main/java/com/alibaba/datax/core/LocalStrings_en_US.properties new file mode 100644 index 00000000..7ff93838 --- /dev/null +++ b/core/src/main/java/com/alibaba/datax/core/LocalStrings_en_US.properties @@ -0,0 +1,5 @@ +very_like_yixiao=1{0}2{1}3 + +engine.1=A valid job ID must be provided in the URL for the non-standalone mode. +engine.2=\n\nThrough the intelligent analysis by DataX, the most likely error reason of this task is: \n{0} + diff --git a/core/src/main/java/com/alibaba/datax/core/LocalStrings_ja_JP.properties b/core/src/main/java/com/alibaba/datax/core/LocalStrings_ja_JP.properties new file mode 100644 index 00000000..dfbad970 --- /dev/null +++ b/core/src/main/java/com/alibaba/datax/core/LocalStrings_ja_JP.properties @@ -0,0 +1,5 @@ +very_like_yixiao=1{0}2{1}3 + +engine.1=\u975e standalone \u6a21\u5f0f\u5fc5\u987b\u5728 URL \u4e2d\u63d0\u4f9b\u6709\u6548\u7684 jobId. +engine.2=\n\n\u7ecfDataX\u667a\u80fd\u5206\u6790,\u8be5\u4efb\u52a1\u6700\u53ef\u80fd\u7684\u9519\u8bef\u539f\u56e0\u662f:\n{0} + diff --git a/core/src/main/java/com/alibaba/datax/core/LocalStrings_zh_CN.properties b/core/src/main/java/com/alibaba/datax/core/LocalStrings_zh_CN.properties new file mode 100644 index 00000000..97d46f07 --- /dev/null +++ b/core/src/main/java/com/alibaba/datax/core/LocalStrings_zh_CN.properties @@ -0,0 +1,5 @@ +very_like_yixiao=\u4e00{0}\u4e8c{1}\u4e09 + +engine.1=\u975e standalone \u6a21\u5f0f\u5fc5\u987b\u5728 URL \u4e2d\u63d0\u4f9b\u6709\u6548\u7684 jobId. +engine.2=\n\n\u7ecfDataX\u667a\u80fd\u5206\u6790,\u8be5\u4efb\u52a1\u6700\u53ef\u80fd\u7684\u9519\u8bef\u539f\u56e0\u662f:\n{0} + diff --git a/core/src/main/java/com/alibaba/datax/core/LocalStrings_zh_HK.properties b/core/src/main/java/com/alibaba/datax/core/LocalStrings_zh_HK.properties new file mode 100644 index 00000000..2587e0ab --- /dev/null +++ b/core/src/main/java/com/alibaba/datax/core/LocalStrings_zh_HK.properties @@ -0,0 +1,10 @@ +very_like_yixiao=\u4e00{0}\u4e8c{1}\u4e09 + +engine.1=\u975e standalone \u6a21\u5f0f\u5fc5\u987b\u5728 URL \u4e2d\u63d0\u4f9b\u6709\u6548\u7684 jobId. +engine.2=\n\n\u7ecfDataX\u667a\u80fd\u5206\u6790,\u8be5\u4efb\u52a1\u6700\u53ef\u80fd\u7684\u9519\u8bef\u539f\u56e0\u662f:\n{0} + +very_like_yixiao=一{0}二{1}三 + +engine.1=非 standalone 模式必須在 URL 中提供有效的 jobId. +engine.2=\n\n經DataX智能分析,該任務最可能的錯誤原因是:\n{0} + diff --git a/core/src/main/java/com/alibaba/datax/core/LocalStrings_zh_TW.properties b/core/src/main/java/com/alibaba/datax/core/LocalStrings_zh_TW.properties new file mode 100644 index 00000000..2587e0ab --- /dev/null +++ b/core/src/main/java/com/alibaba/datax/core/LocalStrings_zh_TW.properties @@ -0,0 +1,10 @@ +very_like_yixiao=\u4e00{0}\u4e8c{1}\u4e09 + +engine.1=\u975e standalone \u6a21\u5f0f\u5fc5\u987b\u5728 URL \u4e2d\u63d0\u4f9b\u6709\u6548\u7684 jobId. +engine.2=\n\n\u7ecfDataX\u667a\u80fd\u5206\u6790,\u8be5\u4efb\u52a1\u6700\u53ef\u80fd\u7684\u9519\u8bef\u539f\u56e0\u662f:\n{0} + +very_like_yixiao=一{0}二{1}三 + +engine.1=非 standalone 模式必須在 URL 中提供有效的 jobId. +engine.2=\n\n經DataX智能分析,該任務最可能的錯誤原因是:\n{0} + diff --git a/core/src/main/java/com/alibaba/datax/core/container/util/JobAssignUtil.java b/core/src/main/java/com/alibaba/datax/core/container/util/JobAssignUtil.java index 31ba60a4..cbd0d2a1 100755 --- a/core/src/main/java/com/alibaba/datax/core/container/util/JobAssignUtil.java +++ b/core/src/main/java/com/alibaba/datax/core/container/util/JobAssignUtil.java @@ -114,7 +114,7 @@ public final class JobAssignUtil { * 需要实现的效果通过例子来说是: *
      * a 库上有表:0, 1, 2
-     * a 库上有表:3, 4
+     * b 库上有表:3, 4
      * c 库上有表:5, 6, 7
      *
      * 如果有 4个 taskGroup
diff --git a/core/src/main/java/com/alibaba/datax/core/job/JobContainer.java b/core/src/main/java/com/alibaba/datax/core/job/JobContainer.java
index 26b2989f..49f5a0a1 100755
--- a/core/src/main/java/com/alibaba/datax/core/job/JobContainer.java
+++ b/core/src/main/java/com/alibaba/datax/core/job/JobContainer.java
@@ -27,7 +27,7 @@ import com.alibaba.datax.core.util.container.ClassLoaderSwapper;
 import com.alibaba.datax.core.util.container.CoreConstant;
 import com.alibaba.datax.core.util.container.LoadUtil;
 import com.alibaba.datax.dataxservice.face.domain.enums.ExecuteMode;
-import com.alibaba.fastjson.JSON;
+import com.alibaba.fastjson2.JSON;
 import org.apache.commons.lang.StringUtils;
 import org.apache.commons.lang.Validate;
 import org.slf4j.Logger;
diff --git a/core/src/main/java/com/alibaba/datax/core/statistics/communication/CommunicationTool.java b/core/src/main/java/com/alibaba/datax/core/statistics/communication/CommunicationTool.java
index 51a601ae..1815ea02 100755
--- a/core/src/main/java/com/alibaba/datax/core/statistics/communication/CommunicationTool.java
+++ b/core/src/main/java/com/alibaba/datax/core/statistics/communication/CommunicationTool.java
@@ -2,7 +2,7 @@ package com.alibaba.datax.core.statistics.communication;
 
 import com.alibaba.datax.common.statistics.PerfTrace;
 import com.alibaba.datax.common.util.StrUtil;
-import com.alibaba.fastjson.JSON;
+import com.alibaba.fastjson2.JSON;
 import org.apache.commons.lang.Validate;
 
 import java.text.DecimalFormat;
diff --git a/core/src/main/java/com/alibaba/datax/core/statistics/plugin/task/StdoutPluginCollector.java b/core/src/main/java/com/alibaba/datax/core/statistics/plugin/task/StdoutPluginCollector.java
index 8b2a8378..d88ad0a8 100755
--- a/core/src/main/java/com/alibaba/datax/core/statistics/plugin/task/StdoutPluginCollector.java
+++ b/core/src/main/java/com/alibaba/datax/core/statistics/plugin/task/StdoutPluginCollector.java
@@ -6,7 +6,7 @@ import com.alibaba.datax.common.util.Configuration;
 import com.alibaba.datax.core.statistics.communication.Communication;
 import com.alibaba.datax.core.util.container.CoreConstant;
 import com.alibaba.datax.core.statistics.plugin.task.util.DirtyRecord;
-import com.alibaba.fastjson.JSON;
+import com.alibaba.fastjson2.JSON;
 
 import org.apache.commons.lang3.StringUtils;
 import org.slf4j.Logger;
diff --git a/core/src/main/java/com/alibaba/datax/core/statistics/plugin/task/util/DirtyRecord.java b/core/src/main/java/com/alibaba/datax/core/statistics/plugin/task/util/DirtyRecord.java
index fdc5d821..caa4cb5b 100755
--- a/core/src/main/java/com/alibaba/datax/core/statistics/plugin/task/util/DirtyRecord.java
+++ b/core/src/main/java/com/alibaba/datax/core/statistics/plugin/task/util/DirtyRecord.java
@@ -4,22 +4,25 @@ import com.alibaba.datax.common.element.Column;
 import com.alibaba.datax.common.element.Record;
 import com.alibaba.datax.common.exception.DataXException;
 import com.alibaba.datax.core.util.FrameworkErrorCode;
-import com.alibaba.fastjson.JSON;
+import com.alibaba.fastjson2.JSON;
 
 import java.math.BigDecimal;
 import java.math.BigInteger;
 import java.util.ArrayList;
 import java.util.Date;
 import java.util.List;
+import java.util.Map;
 
 public class DirtyRecord implements Record {
 	private List columns = new ArrayList();
+	private Map meta;
 
 	public static DirtyRecord asDirtyRecord(final Record record) {
 		DirtyRecord result = new DirtyRecord();
 		for (int i = 0; i < record.getColumnNumber(); i++) {
 			result.addColumn(record.getColumn(i));
 		}
+		result.setMeta(record.getMeta());
 
 		return result;
 	}
@@ -65,6 +68,16 @@ public class DirtyRecord implements Record {
 				"该方法不支持!");
 	}
 
+	@Override
+	public void setMeta(Map meta) {
+		this.meta = meta;
+	}
+
+	@Override
+	public Map getMeta() {
+		return this.meta;
+	}
+
 	public List getColumns() {
 		return columns;
 	}
@@ -119,6 +132,12 @@ class DirtyColumn extends Column {
 		throw DataXException.asDataXException(FrameworkErrorCode.RUNTIME_ERROR,
 				"该方法不支持!");
 	}
+	
+	@Override
+	public Date asDate(String dateFormat) {
+		throw DataXException.asDataXException(FrameworkErrorCode.RUNTIME_ERROR,
+				"该方法不支持!");
+	}
 
 	@Override
 	public byte[] asBytes() {
diff --git a/core/src/main/java/com/alibaba/datax/core/taskgroup/TaskGroupContainer.java b/core/src/main/java/com/alibaba/datax/core/taskgroup/TaskGroupContainer.java
index c30c94d9..b4b45695 100755
--- a/core/src/main/java/com/alibaba/datax/core/taskgroup/TaskGroupContainer.java
+++ b/core/src/main/java/com/alibaba/datax/core/taskgroup/TaskGroupContainer.java
@@ -27,7 +27,7 @@ import com.alibaba.datax.core.util.TransformerUtil;
 import com.alibaba.datax.core.util.container.CoreConstant;
 import com.alibaba.datax.core.util.container.LoadUtil;
 import com.alibaba.datax.dataxservice.face.domain.enums.State;
-import com.alibaba.fastjson.JSON;
+import com.alibaba.fastjson2.JSON;
 import org.apache.commons.lang3.Validate;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
diff --git a/core/src/main/java/com/alibaba/datax/core/transport/record/DefaultRecord.java b/core/src/main/java/com/alibaba/datax/core/transport/record/DefaultRecord.java
index 2598bc8c..1dfa02e8 100755
--- a/core/src/main/java/com/alibaba/datax/core/transport/record/DefaultRecord.java
+++ b/core/src/main/java/com/alibaba/datax/core/transport/record/DefaultRecord.java
@@ -5,7 +5,7 @@ import com.alibaba.datax.common.element.Record;
 import com.alibaba.datax.common.exception.DataXException;
 import com.alibaba.datax.core.util.ClassSize;
 import com.alibaba.datax.core.util.FrameworkErrorCode;
-import com.alibaba.fastjson.JSON;
+import com.alibaba.fastjson2.JSON;
 
 import java.util.ArrayList;
 import java.util.HashMap;
@@ -27,6 +27,8 @@ public class DefaultRecord implements Record {
 	// 首先是Record本身需要的内存
 	private int memorySize = ClassSize.DefaultRecordHead;
 
+	private Map meta;
+
 	public DefaultRecord() {
 		this.columns = new ArrayList(RECORD_AVERGAE_COLUMN_NUMBER);
 	}
@@ -83,6 +85,16 @@ public class DefaultRecord implements Record {
 		return memorySize;
 	}
 
+	@Override
+	public void setMeta(Map meta) {
+		this.meta = meta;
+	}
+
+	@Override
+	public Map getMeta() {
+		return this.meta;
+	}
+
 	private void decrByteSize(final Column column) {
 		if (null == column) {
 			return;
diff --git a/core/src/main/java/com/alibaba/datax/core/transport/record/TerminateRecord.java b/core/src/main/java/com/alibaba/datax/core/transport/record/TerminateRecord.java
index 928609ab..7cb1cff1 100755
--- a/core/src/main/java/com/alibaba/datax/core/transport/record/TerminateRecord.java
+++ b/core/src/main/java/com/alibaba/datax/core/transport/record/TerminateRecord.java
@@ -3,6 +3,8 @@ package com.alibaba.datax.core.transport.record;
 import com.alibaba.datax.common.element.Column;
 import com.alibaba.datax.common.element.Record;
 
+import java.util.Map;
+
 /**
  * 作为标示 生产者已经完成生产的标志
  * 
@@ -41,6 +43,16 @@ public class TerminateRecord implements Record {
 		return 0;
 	}
 
+	@Override
+	public void setMeta(Map meta) {
+
+	}
+
+	@Override
+	public Map getMeta() {
+		return null;
+	}
+
 	@Override
 	public void setColumn(int i, Column column) {
 		return;
diff --git a/core/src/main/java/com/alibaba/datax/core/transport/transformer/DigestTransformer.java b/core/src/main/java/com/alibaba/datax/core/transport/transformer/DigestTransformer.java
new file mode 100644
index 00000000..d2bf1431
--- /dev/null
+++ b/core/src/main/java/com/alibaba/datax/core/transport/transformer/DigestTransformer.java
@@ -0,0 +1,87 @@
+package com.alibaba.datax.core.transport.transformer;
+
+import com.alibaba.datax.common.element.Column;
+import com.alibaba.datax.common.element.Record;
+import com.alibaba.datax.common.element.StringColumn;
+import com.alibaba.datax.common.exception.DataXException;
+import com.alibaba.datax.transformer.Transformer;
+
+import org.apache.commons.codec.digest.DigestUtils;
+import org.apache.commons.lang.StringUtils;
+
+import java.util.Arrays;
+
+/**
+ * no comments.
+ *
+ * @author XuDaojie
+ * @since 2021-08-16
+ */
+public class DigestTransformer extends Transformer {
+
+    private static final String MD5 = "md5";
+    private static final String SHA1 = "sha1";
+    private static final String TO_UPPER_CASE = "toUpperCase";
+    private static final String TO_LOWER_CASE = "toLowerCase";
+
+    public DigestTransformer() {
+        setTransformerName("dx_digest");
+    }
+
+    @Override
+    public Record evaluate(Record record, Object... paras) {
+
+        int columnIndex;
+        String type;
+        String charType;
+
+        try {
+            if (paras.length != 3) {
+                throw new RuntimeException("dx_digest paras length must be 3");
+            }
+
+            columnIndex = (Integer) paras[0];
+            type = (String) paras[1];
+            charType = (String) paras[2];
+
+            if (!StringUtils.equalsIgnoreCase(MD5, type) && !StringUtils.equalsIgnoreCase(SHA1, type)) {
+                throw new RuntimeException("dx_digest paras index 1 must be md5 or sha1");
+            }
+            if (!StringUtils.equalsIgnoreCase(TO_UPPER_CASE, charType) && !StringUtils.equalsIgnoreCase(TO_LOWER_CASE, charType)) {
+                throw new RuntimeException("dx_digest paras index 2 must be toUpperCase or toLowerCase");
+            }
+        } catch (Exception e) {
+            throw DataXException.asDataXException(TransformerErrorCode.TRANSFORMER_ILLEGAL_PARAMETER, "paras:" + Arrays.asList(paras) + " => " + e.getMessage());
+        }
+
+        Column column = record.getColumn(columnIndex);
+
+        try {
+            String oriValue = column.asString();
+
+            // 如果字段为空,作为空字符串处理
+            if (oriValue == null) {
+                oriValue = "";
+            }
+            String newValue;
+            if (MD5.equals(type)) {
+                newValue = DigestUtils.md5Hex(oriValue);
+            } else {
+                newValue = DigestUtils.sha1Hex(oriValue);
+            }
+
+            if (TO_UPPER_CASE.equals(charType)) {
+                newValue = newValue.toUpperCase();
+            } else {
+                newValue = newValue.toLowerCase();
+            }
+
+            record.setColumn(columnIndex, new StringColumn(newValue));
+
+        } catch (Exception e) {
+            throw DataXException.asDataXException(TransformerErrorCode.TRANSFORMER_RUN_EXCEPTION, e.getMessage(), e);
+        }
+        return record;
+    }
+
+}
diff --git a/core/src/main/java/com/alibaba/datax/core/transport/transformer/FilterTransformer.java b/core/src/main/java/com/alibaba/datax/core/transport/transformer/FilterTransformer.java
index 8f6492fa..a3251715 100644
--- a/core/src/main/java/com/alibaba/datax/core/transport/transformer/FilterTransformer.java
+++ b/core/src/main/java/com/alibaba/datax/core/transport/transformer/FilterTransformer.java
@@ -61,7 +61,7 @@ public class FilterTransformer extends Transformer {
             } else if (code.equalsIgnoreCase("<=")) {
                 return doLess(record, value, column, true);
             } else {
-                throw new RuntimeException("dx_filter can't suport code:" + code);
+                throw new RuntimeException("dx_filter can't support code:" + code);
             }
         } catch (Exception e) {
             throw DataXException.asDataXException(TransformerErrorCode.TRANSFORMER_RUN_EXCEPTION, e.getMessage(), e);
diff --git a/core/src/main/java/com/alibaba/datax/core/transport/transformer/GroovyTransformerStaticUtil.java b/core/src/main/java/com/alibaba/datax/core/transport/transformer/GroovyTransformerStaticUtil.java
index 4c872993..487a8be8 100644
--- a/core/src/main/java/com/alibaba/datax/core/transport/transformer/GroovyTransformerStaticUtil.java
+++ b/core/src/main/java/com/alibaba/datax/core/transport/transformer/GroovyTransformerStaticUtil.java
@@ -1,10 +1,18 @@
 package com.alibaba.datax.core.transport.transformer;
 
+import org.apache.commons.codec.digest.DigestUtils;
+
 /**
  * GroovyTransformer的帮助类,供groovy代码使用,必须全是static的方法
  * Created by liqiang on 16/3/4.
  */
 public class GroovyTransformerStaticUtil  {
 
+    public static String md5(final String data) {
+        return DigestUtils.md5Hex(data);
+    }
 
+    public static String sha1(final String data) {
+        return DigestUtils.sha1Hex(data);
+    }
 }
diff --git a/core/src/main/java/com/alibaba/datax/core/transport/transformer/TransformerRegistry.java b/core/src/main/java/com/alibaba/datax/core/transport/transformer/TransformerRegistry.java
index 96a0d988..3c625153 100644
--- a/core/src/main/java/com/alibaba/datax/core/transport/transformer/TransformerRegistry.java
+++ b/core/src/main/java/com/alibaba/datax/core/transport/transformer/TransformerRegistry.java
@@ -36,6 +36,7 @@ public class TransformerRegistry {
         registTransformer(new ReplaceTransformer());
         registTransformer(new FilterTransformer());
         registTransformer(new GroovyTransformer());
+        registTransformer(new DigestTransformer());
     }
 
     public static void loadTransformerFromLocalStorage() {
diff --git a/core/src/main/java/com/alibaba/datax/core/util/LocalStrings.properties b/core/src/main/java/com/alibaba/datax/core/util/LocalStrings.properties
new file mode 100644
index 00000000..a90f7829
--- /dev/null
+++ b/core/src/main/java/com/alibaba/datax/core/util/LocalStrings.properties
@@ -0,0 +1,58 @@
+configparser.1=\u63D2\u4EF6[{0},{1}]\u52A0\u8F7D\u5931\u8D25\uFF0C1s\u540E\u91CD\u8BD5... Exception:{2}
+configparser.2=\u83B7\u53D6\u4F5C\u4E1A\u914D\u7F6E\u4FE1\u606F\u5931\u8D25:{0}
+configparser.3=\u83B7\u53D6\u4F5C\u4E1A\u914D\u7F6E\u4FE1\u606F\u5931\u8D25:{0}
+configparser.4=\u83B7\u53D6\u4F5C\u4E1A\u914D\u7F6E\u4FE1\u606F\u5931\u8D25:{0}
+configparser.5=\u63D2\u4EF6\u52A0\u8F7D\u5931\u8D25\uFF0C\u672A\u5B8C\u6210\u6307\u5B9A\u63D2\u4EF6\u52A0\u8F7D:{0}
+configparser.6=\u63D2\u4EF6\u52A0\u8F7D\u5931\u8D25,\u5B58\u5728\u91CD\u590D\u63D2\u4EF6:{0}
+
+dataxserviceutil.1=\u521B\u5EFA\u7B7E\u540D\u5F02\u5E38NoSuchAlgorithmException, [{0}]
+dataxserviceutil.2=\u521B\u5EFA\u7B7E\u540D\u5F02\u5E38InvalidKeyException, [{0}]
+dataxserviceutil.3=\u521B\u5EFA\u7B7E\u540D\u5F02\u5E38UnsupportedEncodingException, [{0}]
+
+errorrecordchecker.1=\u810F\u6570\u636E\u767E\u5206\u6BD4\u9650\u5236\u5E94\u8BE5\u5728[0.0, 1.0]\u4E4B\u95F4
+errorrecordchecker.2=\u810F\u6570\u636E\u6761\u6570\u73B0\u5728\u5E94\u8BE5\u4E3A\u975E\u8D1F\u6574\u6570
+errorrecordchecker.3=\u810F\u6570\u636E\u6761\u6570\u68C0\u67E5\u4E0D\u901A\u8FC7\uFF0C\u9650\u5236\u662F[{0}]\u6761\uFF0C\u4F46\u5B9E\u9645\u4E0A\u6355\u83B7\u4E86[{1}]\u6761.
+errorrecordchecker.4=\u810F\u6570\u636E\u767E\u5206\u6BD4\u68C0\u67E5\u4E0D\u901A\u8FC7\uFF0C\u9650\u5236\u662F[{0}]\uFF0C\u4F46\u5B9E\u9645\u4E0A\u6355\u83B7\u5230[{1}].
+
+
+errorcode.install_error=DataX\u5F15\u64CE\u5B89\u88C5\u9519\u8BEF, \u8BF7\u8054\u7CFB\u60A8\u7684\u8FD0\u7EF4\u89E3\u51B3 .
+errorcode.argument_error=DataX\u5F15\u64CE\u8FD0\u884C\u9519\u8BEF\uFF0C\u8BE5\u95EE\u9898\u901A\u5E38\u662F\u7531\u4E8E\u5185\u90E8\u7F16\u7A0B\u9519\u8BEF\u5F15\u8D77\uFF0C\u8BF7\u8054\u7CFBDataX\u5F00\u53D1\u56E2\u961F\u89E3\u51B3 .
+errorcode.runtime_error=DataX\u5F15\u64CE\u8FD0\u884C\u8FC7\u7A0B\u51FA\u9519\uFF0C\u5177\u4F53\u539F\u56E0\u8BF7\u53C2\u770BDataX\u8FD0\u884C\u7ED3\u675F\u65F6\u7684\u9519\u8BEF\u8BCA\u65AD\u4FE1\u606F  .
+errorcode.config_error=DataX\u5F15\u64CE\u914D\u7F6E\u9519\u8BEF\uFF0C\u8BE5\u95EE\u9898\u901A\u5E38\u662F\u7531\u4E8EDataX\u5B89\u88C5\u9519\u8BEF\u5F15\u8D77\uFF0C\u8BF7\u8054\u7CFB\u60A8\u7684\u8FD0\u7EF4\u89E3\u51B3 .
+errorcode.secret_error=DataX\u5F15\u64CE\u52A0\u89E3\u5BC6\u51FA\u9519\uFF0C\u8BE5\u95EE\u9898\u901A\u5E38\u662F\u7531\u4E8EDataX\u5BC6\u94A5\u914D\u7F6E\u9519\u8BEF\u5F15\u8D77\uFF0C\u8BF7\u8054\u7CFB\u60A8\u7684\u8FD0\u7EF4\u89E3\u51B3 .
+errorcode.hook_load_error=\u52A0\u8F7D\u5916\u90E8Hook\u51FA\u73B0\u9519\u8BEF\uFF0C\u901A\u5E38\u662F\u7531\u4E8EDataX\u5B89\u88C5\u5F15\u8D77\u7684
+errorcode.hook_fail_error=\u6267\u884C\u5916\u90E8Hook\u51FA\u73B0\u9519\u8BEF
+errorcode.plugin_install_error=DataX\u63D2\u4EF6\u5B89\u88C5\u9519\u8BEF, \u8BE5\u95EE\u9898\u901A\u5E38\u662F\u7531\u4E8EDataX\u5B89\u88C5\u9519\u8BEF\u5F15\u8D77\uFF0C\u8BF7\u8054\u7CFB\u60A8\u7684\u8FD0\u7EF4\u89E3\u51B3 .
+errorcode.plugin_not_found=DataX\u63D2\u4EF6\u914D\u7F6E\u9519\u8BEF, \u8BE5\u95EE\u9898\u901A\u5E38\u662F\u7531\u4E8EDataX\u5B89\u88C5\u9519\u8BEF\u5F15\u8D77\uFF0C\u8BF7\u8054\u7CFB\u60A8\u7684\u8FD0\u7EF4\u89E3\u51B3 .
+errorcode.plugin_init_error=DataX\u63D2\u4EF6\u521D\u59CB\u5316\u9519\u8BEF, \u8BE5\u95EE\u9898\u901A\u5E38\u662F\u7531\u4E8EDataX\u5B89\u88C5\u9519\u8BEF\u5F15\u8D77\uFF0C\u8BF7\u8054\u7CFB\u60A8\u7684\u8FD0\u7EF4\u89E3\u51B3 .
+errorcode.plugin_runtime_error=DataX\u63D2\u4EF6\u8FD0\u884C\u65F6\u51FA\u9519, \u5177\u4F53\u539F\u56E0\u8BF7\u53C2\u770BDataX\u8FD0\u884C\u7ED3\u675F\u65F6\u7684\u9519\u8BEF\u8BCA\u65AD\u4FE1\u606F .
+errorcode.plugin_dirty_data_limit_exceed=DataX\u4F20\u8F93\u810F\u6570\u636E\u8D85\u8FC7\u7528\u6237\u9884\u671F\uFF0C\u8BE5\u9519\u8BEF\u901A\u5E38\u662F\u7531\u4E8E\u6E90\u7AEF\u6570\u636E\u5B58\u5728\u8F83\u591A\u4E1A\u52A1\u810F\u6570\u636E\u5BFC\u81F4\uFF0C\u8BF7\u4ED4\u7EC6\u68C0\u67E5DataX\u6C47\u62A5\u7684\u810F\u6570\u636E\u65E5\u5FD7\u4FE1\u606F, \u6216\u8005\u60A8\u53EF\u4EE5\u9002\u5F53\u8C03\u5927\u810F\u6570\u636E\u9608\u503C .
+errorcode.plugin_split_error=DataX\u63D2\u4EF6\u5207\u5206\u51FA\u9519, \u8BE5\u95EE\u9898\u901A\u5E38\u662F\u7531\u4E8EDataX\u5404\u4E2A\u63D2\u4EF6\u7F16\u7A0B\u9519\u8BEF\u5F15\u8D77\uFF0C\u8BF7\u8054\u7CFBDataX\u5F00\u53D1\u56E2\u961F\u89E3\u51B3
+errorcode.kill_job_timeout_error=kill \u4EFB\u52A1\u8D85\u65F6\uFF0C\u8BF7\u8054\u7CFBPE\u89E3\u51B3
+errorcode.start_taskgroup_error=taskGroup\u542F\u52A8\u5931\u8D25,\u8BF7\u8054\u7CFBDataX\u5F00\u53D1\u56E2\u961F\u89E3\u51B3
+errorcode.call_datax_service_failed=\u8BF7\u6C42 DataX Service \u51FA\u9519.
+errorcode.call_remote_failed=\u8FDC\u7A0B\u8C03\u7528\u5931\u8D25
+errorcode.killed_exit_value=Job \u6536\u5230\u4E86 Kill \u547D\u4EE4.
+
+
+httpclientutil.1=\u8BF7\u6C42\u5730\u5740\uFF1A{0}, \u8BF7\u6C42\u65B9\u6CD5\uFF1A{1}, STATUS CODE = {2}, Response Entity: {3} 
+httpclientutil.2=\u8FDC\u7A0B\u63A5\u53E3\u8FD4\u56DE-1,\u5C06\u91CD\u8BD5
+
+
+secretutil.1=\u7CFB\u7EDF\u7F16\u7A0B\u9519\u8BEF,\u4E0D\u652F\u6301\u7684\u52A0\u5BC6\u7C7B\u578B
+secretutil.2=\u7CFB\u7EDF\u7F16\u7A0B\u9519\u8BEF,\u4E0D\u652F\u6301\u7684\u52A0\u5BC6\u7C7B\u578B
+secretutil.3=rsa\u52A0\u5BC6\u51FA\u9519
+secretutil.4=rsa\u89E3\u5BC6\u51FA\u9519
+secretutil.5=3\u91CDDES\u52A0\u5BC6\u51FA\u9519
+secretutil.6=rsa\u89E3\u5BC6\u51FA\u9519
+secretutil.7=\u6784\u5EFA\u4E09\u91CDDES\u5BC6\u5319\u51FA\u9519
+secretutil.8=DataX\u914D\u7F6E\u8981\u6C42\u52A0\u89E3\u5BC6\uFF0C\u4F46\u65E0\u6CD5\u627E\u5230\u5BC6\u94A5\u7684\u914D\u7F6E\u6587\u4EF6
+secretutil.9=\u8BFB\u53D6\u52A0\u89E3\u5BC6\u914D\u7F6E\u6587\u4EF6\u51FA\u9519
+secretutil.10=DataX\u914D\u7F6E\u7684\u5BC6\u94A5\u7248\u672C\u4E3A[{0}]\uFF0C\u4F46\u5728\u7CFB\u7EDF\u4E2D\u6CA1\u6709\u914D\u7F6E\uFF0C\u4EFB\u52A1\u5BC6\u94A5\u914D\u7F6E\u9519\u8BEF\uFF0C\u4E0D\u5B58\u5728\u60A8\u914D\u7F6E\u7684\u5BC6\u94A5\u7248\u672C
+secretutil.11=DataX\u914D\u7F6E\u7684\u5BC6\u94A5\u7248\u672C\u4E3A[{0}]\uFF0C\u4F46\u5728\u7CFB\u7EDF\u4E2D\u6CA1\u6709\u914D\u7F6E\uFF0C\u53EF\u80FD\u662F\u4EFB\u52A1\u5BC6\u94A5\u914D\u7F6E\u9519\u8BEF\uFF0C\u4E5F\u53EF\u80FD\u662F\u7CFB\u7EDF\u7EF4\u62A4\u95EE\u9898
+secretutil.12=DataX\u914D\u7F6E\u7684\u5BC6\u94A5\u7248\u672C\u4E3A[{0}]\uFF0C\u4F46\u5728\u7CFB\u7EDF\u4E2D\u6CA1\u6709\u914D\u7F6E\uFF0C\u4EFB\u52A1\u5BC6\u94A5\u914D\u7F6E\u9519\u8BEF\uFF0C\u4E0D\u5B58\u5728\u60A8\u914D\u7F6E\u7684\u5BC6\u94A5\u7248\u672C
+secretutil.13=DataX\u914D\u7F6E\u7684\u5BC6\u94A5\u7248\u672C\u4E3A[{0}]\uFF0C\u4F46\u5728\u7CFB\u7EDF\u4E2D\u6CA1\u6709\u914D\u7F6E\uFF0C\u53EF\u80FD\u662F\u4EFB\u52A1\u5BC6\u94A5\u914D\u7F6E\u9519\u8BEF\uFF0C\u4E5F\u53EF\u80FD\u662F\u7CFB\u7EDF\u7EF4\u62A4\u95EE\u9898
+secretutil.14=DataX\u914D\u7F6E\u8981\u6C42\u52A0\u89E3\u5BC6\uFF0C\u4F46\u914D\u7F6E\u7684\u5BC6\u94A5\u7248\u672C[{0}]\u5B58\u5728\u5BC6\u94A5\u4E3A\u7A7A\u7684\u60C5\u51B5
+secretutil.15=DataX\u914D\u7F6E\u8981\u6C42\u52A0\u89E3\u5BC6\uFF0C\u4F46\u914D\u7F6E\u7684\u516C\u79C1\u94A5\u5BF9\u5B58\u5728\u4E3A\u7A7A\u7684\u60C5\u51B5\uFF0C\u7248\u672C[{0}]
+secretutil.16=DataX\u914D\u7F6E\u8981\u6C42\u52A0\u89E3\u5BC6\uFF0C\u4F46\u65E0\u6CD5\u627E\u5230\u52A0\u89E3\u5BC6\u914D\u7F6E
+
diff --git a/core/src/main/java/com/alibaba/datax/core/util/LocalStrings_en_US.properties b/core/src/main/java/com/alibaba/datax/core/util/LocalStrings_en_US.properties
new file mode 100644
index 00000000..8e01b153
--- /dev/null
+++ b/core/src/main/java/com/alibaba/datax/core/util/LocalStrings_en_US.properties
@@ -0,0 +1,58 @@
+configparser.1=Failed to load the plug-in [{0},{1}]. We will retry in 1s... Exception: {2}
+configparser.2=Failed to obtain the job configuration information: {0}
+configparser.3=Failed to obtain the job configuration information: {0}
+configparser.4=Failed to obtain the job configuration information: {0}
+configparser.5=Failed to load the plug-in. Loading of the specific plug-in:{0} is not completed
+configparser.6=Failed to load the plug-in. A duplicate plug-in: {0} exists
+
+dataxserviceutil.1=Exception in creating signature. NoSuchAlgorithmException, [{0}]
+dataxserviceutil.2=Exception in creating signature. InvalidKeyException, [{0}]
+dataxserviceutil.3=Exception in creating signature. UnsupportedEncodingException, [{0}]
+
+errorrecordchecker.1=The percentage of dirty data should be limited to within [0.0, 1.0]
+errorrecordchecker.2=The number of dirty data entries should now be a nonnegative integer
+errorrecordchecker.3=Check for the number of dirty data entries has not passed. The limit is [{0}] entries, but [{1}] entries have been captured. 
+errorrecordchecker.4=Check for the percentage of dirty data has not passed. The limit is [{0}], but [{1}] of dirty data has been captured. 
+
+
+errorcode.install_error=Error in installing DataX engine. Please contact your O&M team to solve the problem.
+errorcode.argument_error=Error in running DataX engine. This problem is generally caused by an internal programming error. Please contact the DataX developer team to solve the problem. 
+errorcode.runtime_error=The DataX engine encountered an error during running. For the specific cause, refer to the error diagnosis after DataX stops running. 
+errorcode.config_error=Error in DataX engine configuration. This problem is generally caused by a DataX installation error. Please contact your O&M team to solve the problem. 
+errorcode.secret_error=Error in DataX engine encryption or decryption. This problem is generally caused by a DataX key configuration error. Please contact your O&M team to solve the problem. 
+errorcode.hook_load_error=Error in loading the external hook. This problem is generally caused by the DataX installation. 
+errorcode.hook_fail_error=Error in executing the external hook
+errorcode.plugin_install_error=Error in installing DataX plug-in. This problem is generally caused by a DataX installation error. Please contact your O&M team to solve the problem. 
+errorcode.plugin_not_found=Error in DataX plug-in configuration. This problem is generally caused by a DataX installation error. Please contact your O&M team to solve the problem. 
+errorcode.plugin_init_error=Error in DataX plug-in initialization. This problem is generally caused by a DataX installation error. Please contact your O&M team to solve the problem. 
+errorcode.plugin_runtime_error=The DataX plug-in encountered an error during running. For the specific cause, refer to the error diagnosis after DataX stops running. 
+errorcode.plugin_dirty_data_limit_exceed=The dirty data transmitted by DataX exceeds user expectations. This error often occurs when a lot dirty data exists in the source data. Please carefully check the dirty data log information reported by DataX, or you can tune up the dirty data threshold value. 
+errorcode.plugin_split_error=Error in DataX plug-in slicing. This problem is generally caused by a programming error in some DataX plug-in. Please contact the DataX developer team to solve the problem. 
+errorcode.kill_job_timeout_error=The kill task times out. Please contact the PE to solve the problem
+errorcode.start_taskgroup_error=Failed to start the task group. Please contact the DataX developer team to solve the problem
+errorcode.call_datax_service_failed=Error in requesting DataX Service.
+errorcode.call_remote_failed=Remote call failure
+errorcode.killed_exit_value=The job has received a Kill command.
+
+
+httpclientutil.1=Request address: {0}. Request method: {1}. STATUS CODE = {2}, Response Entity: {3}
+httpclientutil.2=The remote interface returns -1. We will try again
+
+
+secretutil.1=System programing error. Unsupported encryption type
+secretutil.2=System programing error. Unsupported encryption type
+secretutil.3=RSA encryption error
+secretutil.4=RSA decryption error
+secretutil.5=Triple DES encryption error
+secretutil.6=RSA decryption error
+secretutil.7=Error in building Triple DES key
+secretutil.8=DataX configuration requires encryption and decryption, but unable to find the key configuration file
+secretutil.9=Error in reading the encryption and decryption configuration file
+secretutil.10=The version of the DataX-configured key is [{0}], but there is no configuration in the system. Error in task key configuration. The key version you configured does not exist
+secretutil.11=The version of the DataX-configured key is [{0}], but there is no configuration in the system. There may be an error in task key configuration, or a problem in system maintenance
+secretutil.12=The version of the DataX-configured key is [{0}], but there is no configuration in the system. Error in task key configuration. The key version you configured does not exist
+secretutil.13=The version of the DataX-configured key is [{0}], but there is no configuration in the system. There may be an error in task key configuration, or a problem in system maintenance
+secretutil.14=DataX configuration requires encryption and decryption, but some key in the configured key version [{0}] is empty
+secretutil.15=DataX configuration requires encryption and decryption, but some configured public/private key pairs are empty and the version is [{0}]
+secretutil.16=DataX configuration requires encryption and decryption, but the encryption and decryption configuration cannot be found
+
diff --git a/core/src/main/java/com/alibaba/datax/core/util/LocalStrings_ja_JP.properties b/core/src/main/java/com/alibaba/datax/core/util/LocalStrings_ja_JP.properties
new file mode 100644
index 00000000..7a0c95ac
--- /dev/null
+++ b/core/src/main/java/com/alibaba/datax/core/util/LocalStrings_ja_JP.properties
@@ -0,0 +1,58 @@
+configparser.1=\u63D2\u4EF6[{0},{1}]\u52A0\u8F7D\u5931\u8D25\uFF0C1s\u540E\u91CD\u8BD5... Exception:{2}
+configparser.2=\u83B7\u53D6\u4F5C\u4E1A\u914D\u7F6E\u4FE1\u606F\u5931\u8D25:{0}
+configparser.3=\u83B7\u53D6\u4F5C\u4E1A\u914D\u7F6E\u4FE1\u606F\u5931\u8D25:{0}
+configparser.4=\u83B7\u53D6\u4F5C\u4E1A\u914D\u7F6E\u4FE1\u606F\u5931\u8D25:{0}
+configparser.5=\u63D2\u4EF6\u52A0\u8F7D\u5931\u8D25\uFF0C\u672A\u5B8C\u6210\u6307\u5B9A\u63D2\u4EF6\u52A0\u8F7D:{0}
+configparser.6=\u63D2\u4EF6\u52A0\u8F7D\u5931\u8D25,\u5B58\u5728\u91CD\u590D\u63D2\u4EF6:{0}
+
+dataxserviceutil.1=\u521B\u5EFA\u7B7E\u540D\u5F02\u5E38NoSuchAlgorithmException, [{0}]
+dataxserviceutil.2=\u521B\u5EFA\u7B7E\u540D\u5F02\u5E38InvalidKeyException, [{0}]
+dataxserviceutil.3=\u521B\u5EFA\u7B7E\u540D\u5F02\u5E38UnsupportedEncodingException, [{0}]
+
+errorrecordchecker.1=\u810F\u6570\u636E\u767E\u5206\u6BD4\u9650\u5236\u5E94\u8BE5\u5728[0.0, 1.0]\u4E4B\u95F4
+errorrecordchecker.2=\u810F\u6570\u636E\u6761\u6570\u73B0\u5728\u5E94\u8BE5\u4E3A\u975E\u8D1F\u6574\u6570
+errorrecordchecker.3=\u810F\u6570\u636E\u6761\u6570\u68C0\u67E5\u4E0D\u901A\u8FC7\uFF0C\u9650\u5236\u662F[{0}]\u6761\uFF0C\u4F46\u5B9E\u9645\u4E0A\u6355\u83B7\u4E86[{1}]\u6761.
+errorrecordchecker.4=\u810F\u6570\u636E\u767E\u5206\u6BD4\u68C0\u67E5\u4E0D\u901A\u8FC7\uFF0C\u9650\u5236\u662F[{0}]\uFF0C\u4F46\u5B9E\u9645\u4E0A\u6355\u83B7\u5230[{1}].
+
+
+errorcode.install_error=DataX\u5F15\u64CE\u5B89\u88C5\u9519\u8BEF, \u8BF7\u8054\u7CFB\u60A8\u7684\u8FD0\u7EF4\u89E3\u51B3 .
+errorcode.argument_error=DataX\u5F15\u64CE\u8FD0\u884C\u9519\u8BEF\uFF0C\u8BE5\u95EE\u9898\u901A\u5E38\u662F\u7531\u4E8E\u5185\u90E8\u7F16\u7A0B\u9519\u8BEF\u5F15\u8D77\uFF0C\u8BF7\u8054\u7CFBDataX\u5F00\u53D1\u56E2\u961F\u89E3\u51B3 .
+errorcode.runtime_error=DataX\u5F15\u64CE\u8FD0\u884C\u8FC7\u7A0B\u51FA\u9519\uFF0C\u5177\u4F53\u539F\u56E0\u8BF7\u53C2\u770BDataX\u8FD0\u884C\u7ED3\u675F\u65F6\u7684\u9519\u8BEF\u8BCA\u65AD\u4FE1\u606F  .
+errorcode.config_error=DataX\u5F15\u64CE\u914D\u7F6E\u9519\u8BEF\uFF0C\u8BE5\u95EE\u9898\u901A\u5E38\u662F\u7531\u4E8EDataX\u5B89\u88C5\u9519\u8BEF\u5F15\u8D77\uFF0C\u8BF7\u8054\u7CFB\u60A8\u7684\u8FD0\u7EF4\u89E3\u51B3 .
+errorcode.secret_error=DataX\u5F15\u64CE\u52A0\u89E3\u5BC6\u51FA\u9519\uFF0C\u8BE5\u95EE\u9898\u901A\u5E38\u662F\u7531\u4E8EDataX\u5BC6\u94A5\u914D\u7F6E\u9519\u8BEF\u5F15\u8D77\uFF0C\u8BF7\u8054\u7CFB\u60A8\u7684\u8FD0\u7EF4\u89E3\u51B3 .
+errorcode.hook_load_error=\u52A0\u8F7D\u5916\u90E8Hook\u51FA\u73B0\u9519\u8BEF\uFF0C\u901A\u5E38\u662F\u7531\u4E8EDataX\u5B89\u88C5\u5F15\u8D77\u7684
+errorcode.hook_fail_error=\u6267\u884C\u5916\u90E8Hook\u51FA\u73B0\u9519\u8BEF
+errorcode.plugin_install_error=DataX\u63D2\u4EF6\u5B89\u88C5\u9519\u8BEF, \u8BE5\u95EE\u9898\u901A\u5E38\u662F\u7531\u4E8EDataX\u5B89\u88C5\u9519\u8BEF\u5F15\u8D77\uFF0C\u8BF7\u8054\u7CFB\u60A8\u7684\u8FD0\u7EF4\u89E3\u51B3 .
+errorcode.plugin_not_found=DataX\u63D2\u4EF6\u914D\u7F6E\u9519\u8BEF, \u8BE5\u95EE\u9898\u901A\u5E38\u662F\u7531\u4E8EDataX\u5B89\u88C5\u9519\u8BEF\u5F15\u8D77\uFF0C\u8BF7\u8054\u7CFB\u60A8\u7684\u8FD0\u7EF4\u89E3\u51B3 .
+errorcode.plugin_init_error=DataX\u63D2\u4EF6\u521D\u59CB\u5316\u9519\u8BEF, \u8BE5\u95EE\u9898\u901A\u5E38\u662F\u7531\u4E8EDataX\u5B89\u88C5\u9519\u8BEF\u5F15\u8D77\uFF0C\u8BF7\u8054\u7CFB\u60A8\u7684\u8FD0\u7EF4\u89E3\u51B3 .
+errorcode.plugin_runtime_error=DataX\u63D2\u4EF6\u8FD0\u884C\u65F6\u51FA\u9519, \u5177\u4F53\u539F\u56E0\u8BF7\u53C2\u770BDataX\u8FD0\u884C\u7ED3\u675F\u65F6\u7684\u9519\u8BEF\u8BCA\u65AD\u4FE1\u606F .
+errorcode.plugin_dirty_data_limit_exceed=DataX\u4F20\u8F93\u810F\u6570\u636E\u8D85\u8FC7\u7528\u6237\u9884\u671F\uFF0C\u8BE5\u9519\u8BEF\u901A\u5E38\u662F\u7531\u4E8E\u6E90\u7AEF\u6570\u636E\u5B58\u5728\u8F83\u591A\u4E1A\u52A1\u810F\u6570\u636E\u5BFC\u81F4\uFF0C\u8BF7\u4ED4\u7EC6\u68C0\u67E5DataX\u6C47\u62A5\u7684\u810F\u6570\u636E\u65E5\u5FD7\u4FE1\u606F, \u6216\u8005\u60A8\u53EF\u4EE5\u9002\u5F53\u8C03\u5927\u810F\u6570\u636E\u9608\u503C .
+errorcode.plugin_split_error=DataX\u63D2\u4EF6\u5207\u5206\u51FA\u9519, \u8BE5\u95EE\u9898\u901A\u5E38\u662F\u7531\u4E8EDataX\u5404\u4E2A\u63D2\u4EF6\u7F16\u7A0B\u9519\u8BEF\u5F15\u8D77\uFF0C\u8BF7\u8054\u7CFBDataX\u5F00\u53D1\u56E2\u961F\u89E3\u51B3
+errorcode.kill_job_timeout_error=kill \u4EFB\u52A1\u8D85\u65F6\uFF0C\u8BF7\u8054\u7CFBPE\u89E3\u51B3
+errorcode.start_taskgroup_error=taskGroup\u542F\u52A8\u5931\u8D25,\u8BF7\u8054\u7CFBDataX\u5F00\u53D1\u56E2\u961F\u89E3\u51B3
+errorcode.call_datax_service_failed=\u8BF7\u6C42 DataX Service \u51FA\u9519.
+errorcode.call_remote_failed=\u8FDC\u7A0B\u8C03\u7528\u5931\u8D25
+errorcode.killed_exit_value=Job \u6536\u5230\u4E86 Kill \u547D\u4EE4.
+
+
+httpclientutil.1=\u8BF7\u6C42\u5730\u5740\uFF1A{0}, \u8BF7\u6C42\u65B9\u6CD5\uFF1A{1},STATUS CODE = {2}, Response Entity: {3}
+httpclientutil.2=\u8FDC\u7A0B\u63A5\u53E3\u8FD4\u56DE-1,\u5C06\u91CD\u8BD5
+
+
+secretutil.1=\u7CFB\u7EDF\u7F16\u7A0B\u9519\u8BEF,\u4E0D\u652F\u6301\u7684\u52A0\u5BC6\u7C7B\u578B
+secretutil.2=\u7CFB\u7EDF\u7F16\u7A0B\u9519\u8BEF,\u4E0D\u652F\u6301\u7684\u52A0\u5BC6\u7C7B\u578B
+secretutil.3=rsa\u52A0\u5BC6\u51FA\u9519
+secretutil.4=rsa\u89E3\u5BC6\u51FA\u9519
+secretutil.5=3\u91CDDES\u52A0\u5BC6\u51FA\u9519
+secretutil.6=rsa\u89E3\u5BC6\u51FA\u9519
+secretutil.7=\u6784\u5EFA\u4E09\u91CDDES\u5BC6\u5319\u51FA\u9519
+secretutil.8=DataX\u914D\u7F6E\u8981\u6C42\u52A0\u89E3\u5BC6\uFF0C\u4F46\u65E0\u6CD5\u627E\u5230\u5BC6\u94A5\u7684\u914D\u7F6E\u6587\u4EF6
+secretutil.9=\u8BFB\u53D6\u52A0\u89E3\u5BC6\u914D\u7F6E\u6587\u4EF6\u51FA\u9519
+secretutil.10=DataX\u914D\u7F6E\u7684\u5BC6\u94A5\u7248\u672C\u4E3A[{0}]\uFF0C\u4F46\u5728\u7CFB\u7EDF\u4E2D\u6CA1\u6709\u914D\u7F6E\uFF0C\u4EFB\u52A1\u5BC6\u94A5\u914D\u7F6E\u9519\u8BEF\uFF0C\u4E0D\u5B58\u5728\u60A8\u914D\u7F6E\u7684\u5BC6\u94A5\u7248\u672C
+secretutil.11=DataX\u914D\u7F6E\u7684\u5BC6\u94A5\u7248\u672C\u4E3A[{0}]\uFF0C\u4F46\u5728\u7CFB\u7EDF\u4E2D\u6CA1\u6709\u914D\u7F6E\uFF0C\u53EF\u80FD\u662F\u4EFB\u52A1\u5BC6\u94A5\u914D\u7F6E\u9519\u8BEF\uFF0C\u4E5F\u53EF\u80FD\u662F\u7CFB\u7EDF\u7EF4\u62A4\u95EE\u9898
+secretutil.12=DataX\u914D\u7F6E\u7684\u5BC6\u94A5\u7248\u672C\u4E3A[{0}]\uFF0C\u4F46\u5728\u7CFB\u7EDF\u4E2D\u6CA1\u6709\u914D\u7F6E\uFF0C\u4EFB\u52A1\u5BC6\u94A5\u914D\u7F6E\u9519\u8BEF\uFF0C\u4E0D\u5B58\u5728\u60A8\u914D\u7F6E\u7684\u5BC6\u94A5\u7248\u672C
+secretutil.13=DataX\u914D\u7F6E\u7684\u5BC6\u94A5\u7248\u672C\u4E3A[{0}]\uFF0C\u4F46\u5728\u7CFB\u7EDF\u4E2D\u6CA1\u6709\u914D\u7F6E\uFF0C\u53EF\u80FD\u662F\u4EFB\u52A1\u5BC6\u94A5\u914D\u7F6E\u9519\u8BEF\uFF0C\u4E5F\u53EF\u80FD\u662F\u7CFB\u7EDF\u7EF4\u62A4\u95EE\u9898
+secretutil.14=DataX\u914D\u7F6E\u8981\u6C42\u52A0\u89E3\u5BC6\uFF0C\u4F46\u914D\u7F6E\u7684\u5BC6\u94A5\u7248\u672C[{0}]\u5B58\u5728\u5BC6\u94A5\u4E3A\u7A7A\u7684\u60C5\u51B5
+secretutil.15=DataX\u914D\u7F6E\u8981\u6C42\u52A0\u89E3\u5BC6\uFF0C\u4F46\u914D\u7F6E\u7684\u516C\u79C1\u94A5\u5BF9\u5B58\u5728\u4E3A\u7A7A\u7684\u60C5\u51B5\uFF0C\u7248\u672C[{0}]
+secretutil.16=DataX\u914D\u7F6E\u8981\u6C42\u52A0\u89E3\u5BC6\uFF0C\u4F46\u65E0\u6CD5\u627E\u5230\u52A0\u89E3\u5BC6\u914D\u7F6E
+
diff --git a/core/src/main/java/com/alibaba/datax/core/util/LocalStrings_zh_CN.properties b/core/src/main/java/com/alibaba/datax/core/util/LocalStrings_zh_CN.properties
new file mode 100644
index 00000000..7a0c95ac
--- /dev/null
+++ b/core/src/main/java/com/alibaba/datax/core/util/LocalStrings_zh_CN.properties
@@ -0,0 +1,58 @@
+configparser.1=\u63D2\u4EF6[{0},{1}]\u52A0\u8F7D\u5931\u8D25\uFF0C1s\u540E\u91CD\u8BD5... Exception:{2}
+configparser.2=\u83B7\u53D6\u4F5C\u4E1A\u914D\u7F6E\u4FE1\u606F\u5931\u8D25:{0}
+configparser.3=\u83B7\u53D6\u4F5C\u4E1A\u914D\u7F6E\u4FE1\u606F\u5931\u8D25:{0}
+configparser.4=\u83B7\u53D6\u4F5C\u4E1A\u914D\u7F6E\u4FE1\u606F\u5931\u8D25:{0}
+configparser.5=\u63D2\u4EF6\u52A0\u8F7D\u5931\u8D25\uFF0C\u672A\u5B8C\u6210\u6307\u5B9A\u63D2\u4EF6\u52A0\u8F7D:{0}
+configparser.6=\u63D2\u4EF6\u52A0\u8F7D\u5931\u8D25,\u5B58\u5728\u91CD\u590D\u63D2\u4EF6:{0}
+
+dataxserviceutil.1=\u521B\u5EFA\u7B7E\u540D\u5F02\u5E38NoSuchAlgorithmException, [{0}]
+dataxserviceutil.2=\u521B\u5EFA\u7B7E\u540D\u5F02\u5E38InvalidKeyException, [{0}]
+dataxserviceutil.3=\u521B\u5EFA\u7B7E\u540D\u5F02\u5E38UnsupportedEncodingException, [{0}]
+
+errorrecordchecker.1=\u810F\u6570\u636E\u767E\u5206\u6BD4\u9650\u5236\u5E94\u8BE5\u5728[0.0, 1.0]\u4E4B\u95F4
+errorrecordchecker.2=\u810F\u6570\u636E\u6761\u6570\u73B0\u5728\u5E94\u8BE5\u4E3A\u975E\u8D1F\u6574\u6570
+errorrecordchecker.3=\u810F\u6570\u636E\u6761\u6570\u68C0\u67E5\u4E0D\u901A\u8FC7\uFF0C\u9650\u5236\u662F[{0}]\u6761\uFF0C\u4F46\u5B9E\u9645\u4E0A\u6355\u83B7\u4E86[{1}]\u6761.
+errorrecordchecker.4=\u810F\u6570\u636E\u767E\u5206\u6BD4\u68C0\u67E5\u4E0D\u901A\u8FC7\uFF0C\u9650\u5236\u662F[{0}]\uFF0C\u4F46\u5B9E\u9645\u4E0A\u6355\u83B7\u5230[{1}].
+
+
+errorcode.install_error=DataX\u5F15\u64CE\u5B89\u88C5\u9519\u8BEF, \u8BF7\u8054\u7CFB\u60A8\u7684\u8FD0\u7EF4\u89E3\u51B3 .
+errorcode.argument_error=DataX\u5F15\u64CE\u8FD0\u884C\u9519\u8BEF\uFF0C\u8BE5\u95EE\u9898\u901A\u5E38\u662F\u7531\u4E8E\u5185\u90E8\u7F16\u7A0B\u9519\u8BEF\u5F15\u8D77\uFF0C\u8BF7\u8054\u7CFBDataX\u5F00\u53D1\u56E2\u961F\u89E3\u51B3 .
+errorcode.runtime_error=DataX\u5F15\u64CE\u8FD0\u884C\u8FC7\u7A0B\u51FA\u9519\uFF0C\u5177\u4F53\u539F\u56E0\u8BF7\u53C2\u770BDataX\u8FD0\u884C\u7ED3\u675F\u65F6\u7684\u9519\u8BEF\u8BCA\u65AD\u4FE1\u606F  .
+errorcode.config_error=DataX\u5F15\u64CE\u914D\u7F6E\u9519\u8BEF\uFF0C\u8BE5\u95EE\u9898\u901A\u5E38\u662F\u7531\u4E8EDataX\u5B89\u88C5\u9519\u8BEF\u5F15\u8D77\uFF0C\u8BF7\u8054\u7CFB\u60A8\u7684\u8FD0\u7EF4\u89E3\u51B3 .
+errorcode.secret_error=DataX\u5F15\u64CE\u52A0\u89E3\u5BC6\u51FA\u9519\uFF0C\u8BE5\u95EE\u9898\u901A\u5E38\u662F\u7531\u4E8EDataX\u5BC6\u94A5\u914D\u7F6E\u9519\u8BEF\u5F15\u8D77\uFF0C\u8BF7\u8054\u7CFB\u60A8\u7684\u8FD0\u7EF4\u89E3\u51B3 .
+errorcode.hook_load_error=\u52A0\u8F7D\u5916\u90E8Hook\u51FA\u73B0\u9519\u8BEF\uFF0C\u901A\u5E38\u662F\u7531\u4E8EDataX\u5B89\u88C5\u5F15\u8D77\u7684
+errorcode.hook_fail_error=\u6267\u884C\u5916\u90E8Hook\u51FA\u73B0\u9519\u8BEF
+errorcode.plugin_install_error=DataX\u63D2\u4EF6\u5B89\u88C5\u9519\u8BEF, \u8BE5\u95EE\u9898\u901A\u5E38\u662F\u7531\u4E8EDataX\u5B89\u88C5\u9519\u8BEF\u5F15\u8D77\uFF0C\u8BF7\u8054\u7CFB\u60A8\u7684\u8FD0\u7EF4\u89E3\u51B3 .
+errorcode.plugin_not_found=DataX\u63D2\u4EF6\u914D\u7F6E\u9519\u8BEF, \u8BE5\u95EE\u9898\u901A\u5E38\u662F\u7531\u4E8EDataX\u5B89\u88C5\u9519\u8BEF\u5F15\u8D77\uFF0C\u8BF7\u8054\u7CFB\u60A8\u7684\u8FD0\u7EF4\u89E3\u51B3 .
+errorcode.plugin_init_error=DataX\u63D2\u4EF6\u521D\u59CB\u5316\u9519\u8BEF, \u8BE5\u95EE\u9898\u901A\u5E38\u662F\u7531\u4E8EDataX\u5B89\u88C5\u9519\u8BEF\u5F15\u8D77\uFF0C\u8BF7\u8054\u7CFB\u60A8\u7684\u8FD0\u7EF4\u89E3\u51B3 .
+errorcode.plugin_runtime_error=DataX\u63D2\u4EF6\u8FD0\u884C\u65F6\u51FA\u9519, \u5177\u4F53\u539F\u56E0\u8BF7\u53C2\u770BDataX\u8FD0\u884C\u7ED3\u675F\u65F6\u7684\u9519\u8BEF\u8BCA\u65AD\u4FE1\u606F .
+errorcode.plugin_dirty_data_limit_exceed=DataX\u4F20\u8F93\u810F\u6570\u636E\u8D85\u8FC7\u7528\u6237\u9884\u671F\uFF0C\u8BE5\u9519\u8BEF\u901A\u5E38\u662F\u7531\u4E8E\u6E90\u7AEF\u6570\u636E\u5B58\u5728\u8F83\u591A\u4E1A\u52A1\u810F\u6570\u636E\u5BFC\u81F4\uFF0C\u8BF7\u4ED4\u7EC6\u68C0\u67E5DataX\u6C47\u62A5\u7684\u810F\u6570\u636E\u65E5\u5FD7\u4FE1\u606F, \u6216\u8005\u60A8\u53EF\u4EE5\u9002\u5F53\u8C03\u5927\u810F\u6570\u636E\u9608\u503C .
+errorcode.plugin_split_error=DataX\u63D2\u4EF6\u5207\u5206\u51FA\u9519, \u8BE5\u95EE\u9898\u901A\u5E38\u662F\u7531\u4E8EDataX\u5404\u4E2A\u63D2\u4EF6\u7F16\u7A0B\u9519\u8BEF\u5F15\u8D77\uFF0C\u8BF7\u8054\u7CFBDataX\u5F00\u53D1\u56E2\u961F\u89E3\u51B3
+errorcode.kill_job_timeout_error=kill \u4EFB\u52A1\u8D85\u65F6\uFF0C\u8BF7\u8054\u7CFBPE\u89E3\u51B3
+errorcode.start_taskgroup_error=taskGroup\u542F\u52A8\u5931\u8D25,\u8BF7\u8054\u7CFBDataX\u5F00\u53D1\u56E2\u961F\u89E3\u51B3
+errorcode.call_datax_service_failed=\u8BF7\u6C42 DataX Service \u51FA\u9519.
+errorcode.call_remote_failed=\u8FDC\u7A0B\u8C03\u7528\u5931\u8D25
+errorcode.killed_exit_value=Job \u6536\u5230\u4E86 Kill \u547D\u4EE4.
+
+
+httpclientutil.1=\u8BF7\u6C42\u5730\u5740\uFF1A{0}, \u8BF7\u6C42\u65B9\u6CD5\uFF1A{1},STATUS CODE = {2}, Response Entity: {3}
+httpclientutil.2=\u8FDC\u7A0B\u63A5\u53E3\u8FD4\u56DE-1,\u5C06\u91CD\u8BD5
+
+
+secretutil.1=\u7CFB\u7EDF\u7F16\u7A0B\u9519\u8BEF,\u4E0D\u652F\u6301\u7684\u52A0\u5BC6\u7C7B\u578B
+secretutil.2=\u7CFB\u7EDF\u7F16\u7A0B\u9519\u8BEF,\u4E0D\u652F\u6301\u7684\u52A0\u5BC6\u7C7B\u578B
+secretutil.3=rsa\u52A0\u5BC6\u51FA\u9519
+secretutil.4=rsa\u89E3\u5BC6\u51FA\u9519
+secretutil.5=3\u91CDDES\u52A0\u5BC6\u51FA\u9519
+secretutil.6=rsa\u89E3\u5BC6\u51FA\u9519
+secretutil.7=\u6784\u5EFA\u4E09\u91CDDES\u5BC6\u5319\u51FA\u9519
+secretutil.8=DataX\u914D\u7F6E\u8981\u6C42\u52A0\u89E3\u5BC6\uFF0C\u4F46\u65E0\u6CD5\u627E\u5230\u5BC6\u94A5\u7684\u914D\u7F6E\u6587\u4EF6
+secretutil.9=\u8BFB\u53D6\u52A0\u89E3\u5BC6\u914D\u7F6E\u6587\u4EF6\u51FA\u9519
+secretutil.10=DataX\u914D\u7F6E\u7684\u5BC6\u94A5\u7248\u672C\u4E3A[{0}]\uFF0C\u4F46\u5728\u7CFB\u7EDF\u4E2D\u6CA1\u6709\u914D\u7F6E\uFF0C\u4EFB\u52A1\u5BC6\u94A5\u914D\u7F6E\u9519\u8BEF\uFF0C\u4E0D\u5B58\u5728\u60A8\u914D\u7F6E\u7684\u5BC6\u94A5\u7248\u672C
+secretutil.11=DataX\u914D\u7F6E\u7684\u5BC6\u94A5\u7248\u672C\u4E3A[{0}]\uFF0C\u4F46\u5728\u7CFB\u7EDF\u4E2D\u6CA1\u6709\u914D\u7F6E\uFF0C\u53EF\u80FD\u662F\u4EFB\u52A1\u5BC6\u94A5\u914D\u7F6E\u9519\u8BEF\uFF0C\u4E5F\u53EF\u80FD\u662F\u7CFB\u7EDF\u7EF4\u62A4\u95EE\u9898
+secretutil.12=DataX\u914D\u7F6E\u7684\u5BC6\u94A5\u7248\u672C\u4E3A[{0}]\uFF0C\u4F46\u5728\u7CFB\u7EDF\u4E2D\u6CA1\u6709\u914D\u7F6E\uFF0C\u4EFB\u52A1\u5BC6\u94A5\u914D\u7F6E\u9519\u8BEF\uFF0C\u4E0D\u5B58\u5728\u60A8\u914D\u7F6E\u7684\u5BC6\u94A5\u7248\u672C
+secretutil.13=DataX\u914D\u7F6E\u7684\u5BC6\u94A5\u7248\u672C\u4E3A[{0}]\uFF0C\u4F46\u5728\u7CFB\u7EDF\u4E2D\u6CA1\u6709\u914D\u7F6E\uFF0C\u53EF\u80FD\u662F\u4EFB\u52A1\u5BC6\u94A5\u914D\u7F6E\u9519\u8BEF\uFF0C\u4E5F\u53EF\u80FD\u662F\u7CFB\u7EDF\u7EF4\u62A4\u95EE\u9898
+secretutil.14=DataX\u914D\u7F6E\u8981\u6C42\u52A0\u89E3\u5BC6\uFF0C\u4F46\u914D\u7F6E\u7684\u5BC6\u94A5\u7248\u672C[{0}]\u5B58\u5728\u5BC6\u94A5\u4E3A\u7A7A\u7684\u60C5\u51B5
+secretutil.15=DataX\u914D\u7F6E\u8981\u6C42\u52A0\u89E3\u5BC6\uFF0C\u4F46\u914D\u7F6E\u7684\u516C\u79C1\u94A5\u5BF9\u5B58\u5728\u4E3A\u7A7A\u7684\u60C5\u51B5\uFF0C\u7248\u672C[{0}]
+secretutil.16=DataX\u914D\u7F6E\u8981\u6C42\u52A0\u89E3\u5BC6\uFF0C\u4F46\u65E0\u6CD5\u627E\u5230\u52A0\u89E3\u5BC6\u914D\u7F6E
+
diff --git a/core/src/main/java/com/alibaba/datax/core/util/LocalStrings_zh_HK.properties b/core/src/main/java/com/alibaba/datax/core/util/LocalStrings_zh_HK.properties
new file mode 100644
index 00000000..59ce9fd9
--- /dev/null
+++ b/core/src/main/java/com/alibaba/datax/core/util/LocalStrings_zh_HK.properties
@@ -0,0 +1,116 @@
+configparser.1=\u63D2\u4EF6[{0},{1}]\u52A0\u8F7D\u5931\u8D25\uFF0C1s\u540E\u91CD\u8BD5... Exception:{2}
+configparser.2=\u83B7\u53D6\u4F5C\u4E1A\u914D\u7F6E\u4FE1\u606F\u5931\u8D25:{0}
+configparser.3=\u83B7\u53D6\u4F5C\u4E1A\u914D\u7F6E\u4FE1\u606F\u5931\u8D25:{0}
+configparser.4=\u83B7\u53D6\u4F5C\u4E1A\u914D\u7F6E\u4FE1\u606F\u5931\u8D25:{0}
+configparser.5=\u63D2\u4EF6\u52A0\u8F7D\u5931\u8D25\uFF0C\u672A\u5B8C\u6210\u6307\u5B9A\u63D2\u4EF6\u52A0\u8F7D:{0}
+configparser.6=\u63D2\u4EF6\u52A0\u8F7D\u5931\u8D25,\u5B58\u5728\u91CD\u590D\u63D2\u4EF6:{0}
+
+dataxserviceutil.1=\u521B\u5EFA\u7B7E\u540D\u5F02\u5E38NoSuchAlgorithmException, [{0}]
+dataxserviceutil.2=\u521B\u5EFA\u7B7E\u540D\u5F02\u5E38InvalidKeyException, [{0}]
+dataxserviceutil.3=\u521B\u5EFA\u7B7E\u540D\u5F02\u5E38UnsupportedEncodingException, [{0}]
+
+errorrecordchecker.1=\u810F\u6570\u636E\u767E\u5206\u6BD4\u9650\u5236\u5E94\u8BE5\u5728[0.0, 1.0]\u4E4B\u95F4
+errorrecordchecker.2=\u810F\u6570\u636E\u6761\u6570\u73B0\u5728\u5E94\u8BE5\u4E3A\u975E\u8D1F\u6574\u6570
+errorrecordchecker.3=\u810F\u6570\u636E\u6761\u6570\u68C0\u67E5\u4E0D\u901A\u8FC7\uFF0C\u9650\u5236\u662F[{0}]\u6761\uFF0C\u4F46\u5B9E\u9645\u4E0A\u6355\u83B7\u4E86[{1}]\u6761.
+errorrecordchecker.4=\u810F\u6570\u636E\u767E\u5206\u6BD4\u68C0\u67E5\u4E0D\u901A\u8FC7\uFF0C\u9650\u5236\u662F[{0}]\uFF0C\u4F46\u5B9E\u9645\u4E0A\u6355\u83B7\u5230[{1}].
+
+
+errorcode.install_error=DataX\u5F15\u64CE\u5B89\u88C5\u9519\u8BEF, \u8BF7\u8054\u7CFB\u60A8\u7684\u8FD0\u7EF4\u89E3\u51B3 .
+errorcode.argument_error=DataX\u5F15\u64CE\u8FD0\u884C\u9519\u8BEF\uFF0C\u8BE5\u95EE\u9898\u901A\u5E38\u662F\u7531\u4E8E\u5185\u90E8\u7F16\u7A0B\u9519\u8BEF\u5F15\u8D77\uFF0C\u8BF7\u8054\u7CFBDataX\u5F00\u53D1\u56E2\u961F\u89E3\u51B3 .
+errorcode.runtime_error=DataX\u5F15\u64CE\u8FD0\u884C\u8FC7\u7A0B\u51FA\u9519\uFF0C\u5177\u4F53\u539F\u56E0\u8BF7\u53C2\u770BDataX\u8FD0\u884C\u7ED3\u675F\u65F6\u7684\u9519\u8BEF\u8BCA\u65AD\u4FE1\u606F  .
+errorcode.config_error=DataX\u5F15\u64CE\u914D\u7F6E\u9519\u8BEF\uFF0C\u8BE5\u95EE\u9898\u901A\u5E38\u662F\u7531\u4E8EDataX\u5B89\u88C5\u9519\u8BEF\u5F15\u8D77\uFF0C\u8BF7\u8054\u7CFB\u60A8\u7684\u8FD0\u7EF4\u89E3\u51B3 .
+errorcode.secret_error=DataX\u5F15\u64CE\u52A0\u89E3\u5BC6\u51FA\u9519\uFF0C\u8BE5\u95EE\u9898\u901A\u5E38\u662F\u7531\u4E8EDataX\u5BC6\u94A5\u914D\u7F6E\u9519\u8BEF\u5F15\u8D77\uFF0C\u8BF7\u8054\u7CFB\u60A8\u7684\u8FD0\u7EF4\u89E3\u51B3 .
+errorcode.hook_load_error=\u52A0\u8F7D\u5916\u90E8Hook\u51FA\u73B0\u9519\u8BEF\uFF0C\u901A\u5E38\u662F\u7531\u4E8EDataX\u5B89\u88C5\u5F15\u8D77\u7684
+errorcode.hook_fail_error=\u6267\u884C\u5916\u90E8Hook\u51FA\u73B0\u9519\u8BEF
+errorcode.plugin_install_error=DataX\u63D2\u4EF6\u5B89\u88C5\u9519\u8BEF, \u8BE5\u95EE\u9898\u901A\u5E38\u662F\u7531\u4E8EDataX\u5B89\u88C5\u9519\u8BEF\u5F15\u8D77\uFF0C\u8BF7\u8054\u7CFB\u60A8\u7684\u8FD0\u7EF4\u89E3\u51B3 .
+errorcode.plugin_not_found=DataX\u63D2\u4EF6\u914D\u7F6E\u9519\u8BEF, \u8BE5\u95EE\u9898\u901A\u5E38\u662F\u7531\u4E8EDataX\u5B89\u88C5\u9519\u8BEF\u5F15\u8D77\uFF0C\u8BF7\u8054\u7CFB\u60A8\u7684\u8FD0\u7EF4\u89E3\u51B3 .
+errorcode.plugin_init_error=DataX\u63D2\u4EF6\u521D\u59CB\u5316\u9519\u8BEF, \u8BE5\u95EE\u9898\u901A\u5E38\u662F\u7531\u4E8EDataX\u5B89\u88C5\u9519\u8BEF\u5F15\u8D77\uFF0C\u8BF7\u8054\u7CFB\u60A8\u7684\u8FD0\u7EF4\u89E3\u51B3 .
+errorcode.plugin_runtime_error=DataX\u63D2\u4EF6\u8FD0\u884C\u65F6\u51FA\u9519, \u5177\u4F53\u539F\u56E0\u8BF7\u53C2\u770BDataX\u8FD0\u884C\u7ED3\u675F\u65F6\u7684\u9519\u8BEF\u8BCA\u65AD\u4FE1\u606F .
+errorcode.plugin_dirty_data_limit_exceed=DataX\u4F20\u8F93\u810F\u6570\u636E\u8D85\u8FC7\u7528\u6237\u9884\u671F\uFF0C\u8BE5\u9519\u8BEF\u901A\u5E38\u662F\u7531\u4E8E\u6E90\u7AEF\u6570\u636E\u5B58\u5728\u8F83\u591A\u4E1A\u52A1\u810F\u6570\u636E\u5BFC\u81F4\uFF0C\u8BF7\u4ED4\u7EC6\u68C0\u67E5DataX\u6C47\u62A5\u7684\u810F\u6570\u636E\u65E5\u5FD7\u4FE1\u606F, \u6216\u8005\u60A8\u53EF\u4EE5\u9002\u5F53\u8C03\u5927\u810F\u6570\u636E\u9608\u503C .
+errorcode.plugin_split_error=DataX\u63D2\u4EF6\u5207\u5206\u51FA\u9519, \u8BE5\u95EE\u9898\u901A\u5E38\u662F\u7531\u4E8EDataX\u5404\u4E2A\u63D2\u4EF6\u7F16\u7A0B\u9519\u8BEF\u5F15\u8D77\uFF0C\u8BF7\u8054\u7CFBDataX\u5F00\u53D1\u56E2\u961F\u89E3\u51B3
+errorcode.kill_job_timeout_error=kill \u4EFB\u52A1\u8D85\u65F6\uFF0C\u8BF7\u8054\u7CFBPE\u89E3\u51B3
+errorcode.start_taskgroup_error=taskGroup\u542F\u52A8\u5931\u8D25,\u8BF7\u8054\u7CFBDataX\u5F00\u53D1\u56E2\u961F\u89E3\u51B3
+errorcode.call_datax_service_failed=\u8BF7\u6C42 DataX Service \u51FA\u9519.
+errorcode.call_remote_failed=\u8FDC\u7A0B\u8C03\u7528\u5931\u8D25
+errorcode.killed_exit_value=Job \u6536\u5230\u4E86 Kill \u547D\u4EE4.
+
+
+httpclientutil.1=\u8BF7\u6C42\u5730\u5740\uFF1A{0}, \u8BF7\u6C42\u65B9\u6CD5\uFF1A{1},STATUS CODE = {2}, Response Entity: {3}
+httpclientutil.2=\u8FDC\u7A0B\u63A5\u53E3\u8FD4\u56DE-1,\u5C06\u91CD\u8BD5
+
+
+secretutil.1=\u7CFB\u7EDF\u7F16\u7A0B\u9519\u8BEF,\u4E0D\u652F\u6301\u7684\u52A0\u5BC6\u7C7B\u578B
+secretutil.2=\u7CFB\u7EDF\u7F16\u7A0B\u9519\u8BEF,\u4E0D\u652F\u6301\u7684\u52A0\u5BC6\u7C7B\u578B
+secretutil.3=rsa\u52A0\u5BC6\u51FA\u9519
+secretutil.4=rsa\u89E3\u5BC6\u51FA\u9519
+secretutil.5=3\u91CDDES\u52A0\u5BC6\u51FA\u9519
+secretutil.6=rsa\u89E3\u5BC6\u51FA\u9519
+secretutil.7=\u6784\u5EFA\u4E09\u91CDDES\u5BC6\u5319\u51FA\u9519
+secretutil.8=DataX\u914D\u7F6E\u8981\u6C42\u52A0\u89E3\u5BC6\uFF0C\u4F46\u65E0\u6CD5\u627E\u5230\u5BC6\u94A5\u7684\u914D\u7F6E\u6587\u4EF6
+secretutil.9=\u8BFB\u53D6\u52A0\u89E3\u5BC6\u914D\u7F6E\u6587\u4EF6\u51FA\u9519
+secretutil.10=DataX\u914D\u7F6E\u7684\u5BC6\u94A5\u7248\u672C\u4E3A[{0}]\uFF0C\u4F46\u5728\u7CFB\u7EDF\u4E2D\u6CA1\u6709\u914D\u7F6E\uFF0C\u4EFB\u52A1\u5BC6\u94A5\u914D\u7F6E\u9519\u8BEF\uFF0C\u4E0D\u5B58\u5728\u60A8\u914D\u7F6E\u7684\u5BC6\u94A5\u7248\u672C
+secretutil.11=DataX\u914D\u7F6E\u7684\u5BC6\u94A5\u7248\u672C\u4E3A[{0}]\uFF0C\u4F46\u5728\u7CFB\u7EDF\u4E2D\u6CA1\u6709\u914D\u7F6E\uFF0C\u53EF\u80FD\u662F\u4EFB\u52A1\u5BC6\u94A5\u914D\u7F6E\u9519\u8BEF\uFF0C\u4E5F\u53EF\u80FD\u662F\u7CFB\u7EDF\u7EF4\u62A4\u95EE\u9898
+secretutil.12=DataX\u914D\u7F6E\u7684\u5BC6\u94A5\u7248\u672C\u4E3A[{0}]\uFF0C\u4F46\u5728\u7CFB\u7EDF\u4E2D\u6CA1\u6709\u914D\u7F6E\uFF0C\u4EFB\u52A1\u5BC6\u94A5\u914D\u7F6E\u9519\u8BEF\uFF0C\u4E0D\u5B58\u5728\u60A8\u914D\u7F6E\u7684\u5BC6\u94A5\u7248\u672C
+secretutil.13=DataX\u914D\u7F6E\u7684\u5BC6\u94A5\u7248\u672C\u4E3A[{0}]\uFF0C\u4F46\u5728\u7CFB\u7EDF\u4E2D\u6CA1\u6709\u914D\u7F6E\uFF0C\u53EF\u80FD\u662F\u4EFB\u52A1\u5BC6\u94A5\u914D\u7F6E\u9519\u8BEF\uFF0C\u4E5F\u53EF\u80FD\u662F\u7CFB\u7EDF\u7EF4\u62A4\u95EE\u9898
+secretutil.14=DataX\u914D\u7F6E\u8981\u6C42\u52A0\u89E3\u5BC6\uFF0C\u4F46\u914D\u7F6E\u7684\u5BC6\u94A5\u7248\u672C[{0}]\u5B58\u5728\u5BC6\u94A5\u4E3A\u7A7A\u7684\u60C5\u51B5
+secretutil.15=DataX\u914D\u7F6E\u8981\u6C42\u52A0\u89E3\u5BC6\uFF0C\u4F46\u914D\u7F6E\u7684\u516C\u79C1\u94A5\u5BF9\u5B58\u5728\u4E3A\u7A7A\u7684\u60C5\u51B5\uFF0C\u7248\u672C[{0}]
+secretutil.16=DataX\u914D\u7F6E\u8981\u6C42\u52A0\u89E3\u5BC6\uFF0C\u4F46\u65E0\u6CD5\u627E\u5230\u52A0\u89E3\u5BC6\u914D\u7F6E
+
+configparser.1=\u5916\u639B\u7A0B\u5F0F[{0},{1}]\u8F09\u5165\u5931\u6557\uFF0C1s\u5F8C\u91CD\u8A66... Exception:{2}
+configparser.2=\u7372\u53D6\u4F5C\u696D\u914D\u7F6E\u8CC7\u8A0A\u5931\u6557:{0}
+configparser.3=\u7372\u53D6\u4F5C\u696D\u914D\u7F6E\u8CC7\u8A0A\u5931\u6557:{0}
+configparser.4=\u7372\u53D6\u4F5C\u696D\u914D\u7F6E\u8CC7\u8A0A\u5931\u6557:{0}
+configparser.5=\u5916\u639B\u7A0B\u5F0F\u8F09\u5165\u5931\u6557\uFF0C\u672A\u5B8C\u6210\u6307\u5B9A\u5916\u639B\u7A0B\u5F0F\u8F09\u5165:{0}
+configparser.6=\u5916\u639B\u7A0B\u5F0F\u8F09\u5165\u5931\u6557,\u5B58\u5728\u91CD\u8907\u5916\u639B\u7A0B\u5F0F:{0}
+
+dataxserviceutil.1=\u5EFA\u7ACB\u7C3D\u540D\u7570\u5E38NoSuchAlgorithmException, [{0}]
+dataxserviceutil.2=\u5EFA\u7ACB\u7C3D\u540D\u7570\u5E38InvalidKeyException, [{0}]
+dataxserviceutil.3=\u5EFA\u7ACB\u7C3D\u540D\u7570\u5E38UnsupportedEncodingException, [{0}]
+
+errorrecordchecker.1=\u9AD2\u6578\u64DA\u767E\u5206\u6BD4\u9650\u5236\u61C9\u8A72\u5728[0.0, 1.0]\u4E4B\u9593
+errorrecordchecker.2=\u9AD2\u6578\u64DA\u689D\u6578\u73FE\u5728\u61C9\u8A72\u70BA\u975E\u8CA0\u6574\u6578
+errorrecordchecker.3=\u9AD2\u6578\u64DA\u689D\u6578\u6AA2\u67E5\u4E0D\u901A\u904E\uFF0C\u9650\u5236\u662F[{0}]\u689D\uFF0C\u4F46\u5BE6\u969B\u4E0A\u6355\u7372\u4E86[{1}]\u689D.
+errorrecordchecker.4=\u9AD2\u6578\u64DA\u767E\u5206\u6BD4\u6AA2\u67E5\u4E0D\u901A\u904E\uFF0C\u9650\u5236\u662F[{0}]\uFF0C\u4F46\u5BE6\u969B\u4E0A\u6355\u7372\u5230[{1}].
+
+
+errorcode.install_error=DataX\u5F15\u64CE\u5B89\u88DD\u932F\u8AA4, \u8ACB\u806F\u7D61\u60A8\u7684\u904B\u7DAD\u89E3\u6C7A .
+errorcode.argument_error=DataX\u5F15\u64CE\u904B\u884C\u932F\u8AA4\uFF0C\u8A72\u554F\u984C\u901A\u5E38\u662F\u7531\u65BC\u5167\u90E8\u7DE8\u7A0B\u932F\u8AA4\u5F15\u8D77\uFF0C\u8ACB\u806F\u7D61DataX\u958B\u767C\u5718\u968A\u89E3\u6C7A .
+errorcode.runtime_error=DataX\u5F15\u64CE\u904B\u884C\u904E\u7A0B\u51FA\u932F\uFF0C\u5177\u9AD4\u539F\u56E0\u8ACB\u53C3\u770BDataX\u904B\u884C\u7D50\u675F\u6642\u7684\u932F\u8AA4\u8A3A\u65B7\u8CC7\u8A0A  .
+errorcode.config_error=DataX\u5F15\u64CE\u914D\u7F6E\u932F\u8AA4\uFF0C\u8A72\u554F\u984C\u901A\u5E38\u662F\u7531\u65BCDataX\u5B89\u88DD\u932F\u8AA4\u5F15\u8D77\uFF0C\u8ACB\u806F\u7D61\u60A8\u7684\u904B\u7DAD\u89E3\u6C7A .
+errorcode.secret_error=DataX\u5F15\u64CE\u52A0\u89E3\u5BC6\u51FA\u932F\uFF0C\u8A72\u554F\u984C\u901A\u5E38\u662F\u7531\u65BCDataX\u5BC6\u9470\u914D\u7F6E\u932F\u8AA4\u5F15\u8D77\uFF0C\u8ACB\u806F\u7D61\u60A8\u7684\u904B\u7DAD\u89E3\u6C7A .
+errorcode.hook_load_error=\u8F09\u5165\u5916\u90E8Hook\u51FA\u73FE\u932F\u8AA4\uFF0C\u901A\u5E38\u662F\u7531\u65BCDataX\u5B89\u88DD\u5F15\u8D77\u7684
+errorcode.hook_fail_error=\u57F7\u884C\u5916\u90E8Hook\u51FA\u73FE\u932F\u8AA4
+errorcode.plugin_install_error=DataX\u5916\u639B\u7A0B\u5F0F\u5B89\u88DD\u932F\u8AA4, \u8A72\u554F\u984C\u901A\u5E38\u662F\u7531\u65BCDataX\u5B89\u88DD\u932F\u8AA4\u5F15\u8D77\uFF0C\u8ACB\u806F\u7D61\u60A8\u7684\u904B\u7DAD\u89E3\u6C7A .
+errorcode.plugin_not_found=DataX\u5916\u639B\u7A0B\u5F0F\u914D\u7F6E\u932F\u8AA4, \u8A72\u554F\u984C\u901A\u5E38\u662F\u7531\u65BCDataX\u5B89\u88DD\u932F\u8AA4\u5F15\u8D77\uFF0C\u8ACB\u806F\u7D61\u60A8\u7684\u904B\u7DAD\u89E3\u6C7A .
+errorcode.plugin_init_error=DataX\u5916\u639B\u7A0B\u5F0F\u521D\u59CB\u5316\u932F\u8AA4, \u8A72\u554F\u984C\u901A\u5E38\u662F\u7531\u65BCDataX\u5B89\u88DD\u932F\u8AA4\u5F15\u8D77\uFF0C\u8ACB\u806F\u7D61\u60A8\u7684\u904B\u7DAD\u89E3\u6C7A .
+errorcode.plugin_runtime_error=DataX\u5916\u639B\u7A0B\u5F0F\u904B\u884C\u6642\u51FA\u932F, \u5177\u9AD4\u539F\u56E0\u8ACB\u53C3\u770BDataX\u904B\u884C\u7D50\u675F\u6642\u7684\u932F\u8AA4\u8A3A\u65B7\u8CC7\u8A0A .
+errorcode.plugin_dirty_data_limit_exceed=DataX\u50B3\u8F38\u9AD2\u6578\u64DA\u8D85\u904E\u7528\u6236\u9810\u671F\uFF0C\u8A72\u932F\u8AA4\u901A\u5E38\u662F\u7531\u65BC\u6E90\u7AEF\u6578\u64DA\u5B58\u5728\u8F03\u591A\u696D\u52D9\u9AD2\u6578\u64DA\u5C0E\u81F4\uFF0C\u8ACB\u4ED4\u7D30\u6AA2\u67E5DataX\u5F59\u5831\u7684\u9AD2\u6578\u64DA\u65E5\u8A8C\u8CC7\u8A0A, \u6216\u8005\u60A8\u53EF\u4EE5\u9069\u7576\u8ABF\u5927\u9AD2\u6578\u64DA\u95BE\u503C .
+errorcode.plugin_split_error=DataX\u5916\u639B\u7A0B\u5F0F\u5207\u5206\u51FA\u932F, \u8A72\u554F\u984C\u901A\u5E38\u662F\u7531\u65BCDataX\u5404\u500B\u5916\u639B\u7A0B\u5F0F\u7DE8\u7A0B\u932F\u8AA4\u5F15\u8D77\uFF0C\u8ACB\u806F\u7D61DataX\u958B\u767C\u5718\u968A\u89E3\u6C7A
+errorcode.kill_job_timeout_error=kill \u4EFB\u52D9\u903E\u6642\uFF0C\u8ACB\u806F\u7D61PE\u89E3\u6C7A
+errorcode.start_taskgroup_error=taskGroup\u555F\u52D5\u5931\u6557,\u8ACB\u806F\u7D61DataX\u958B\u767C\u5718\u968A\u89E3\u6C7A
+errorcode.call_datax_service_failed=\u8ACB\u6C42 DataX Service \u51FA\u932F.
+errorcode.call_remote_failed=\u9060\u7A0B\u8ABF\u7528\u5931\u6557
+errorcode.killed_exit_value=Job \u6536\u5230\u4E86 Kill \u547D\u4EE4.
+
+
+httpclientutil.1=\u8ACB\u6C42\u5730\u5740\uFF1A{0}, \u8ACB\u6C42\u65B9\u6CD5\uFF1A{1},STATUS CODE = {2}, Response Entity: {3}
+httpclientutil.2=\u9060\u7A0B\u63A5\u53E3\u8FD4\u56DE-1,\u5C07\u91CD\u8A66
+
+
+secretutil.1=\u7CFB\u7D71\u7DE8\u7A0B\u932F\u8AA4,\u4E0D\u652F\u63F4\u7684\u52A0\u5BC6\u985E\u578B
+secretutil.2=\u7CFB\u7D71\u7DE8\u7A0B\u932F\u8AA4,\u4E0D\u652F\u63F4\u7684\u52A0\u5BC6\u985E\u578B
+secretutil.3=rsa\u52A0\u5BC6\u51FA\u932F
+secretutil.4=rsa\u89E3\u5BC6\u51FA\u932F
+secretutil.5=3\u91CDDES\u52A0\u5BC6\u51FA\u932F
+secretutil.6=rsa\u89E3\u5BC6\u51FA\u932F
+secretutil.7=\u69CB\u5EFA\u4E09\u91CDDES\u5BC6\u5319\u51FA\u932F
+secretutil.8=DataX\u914D\u7F6E\u8981\u6C42\u52A0\u89E3\u5BC6\uFF0C\u4F46\u7121\u6CD5\u627E\u5230\u5BC6\u9470\u7684\u914D\u7F6E\u6A94\u6848
+secretutil.9=\u8B80\u53D6\u52A0\u89E3\u5BC6\u914D\u7F6E\u6A94\u6848\u51FA\u932F
+secretutil.10=DataX\u914D\u7F6E\u7684\u5BC6\u9470\u7248\u672C\u70BA[{0}]\uFF0C\u4F46\u5728\u7CFB\u7D71\u4E2D\u6C92\u6709\u914D\u7F6E\uFF0C\u4EFB\u52D9\u5BC6\u9470\u914D\u7F6E\u932F\u8AA4\uFF0C\u4E0D\u5B58\u5728\u60A8\u914D\u7F6E\u7684\u5BC6\u9470\u7248\u672C
+secretutil.11=DataX\u914D\u7F6E\u7684\u5BC6\u9470\u7248\u672C\u70BA[{0}]\uFF0C\u4F46\u5728\u7CFB\u7D71\u4E2D\u6C92\u6709\u914D\u7F6E\uFF0C\u53EF\u80FD\u662F\u4EFB\u52D9\u5BC6\u9470\u914D\u7F6E\u932F\u8AA4\uFF0C\u4E5F\u53EF\u80FD\u662F\u7CFB\u7D71\u7DAD\u8B77\u554F\u984C
+secretutil.12=DataX\u914D\u7F6E\u7684\u5BC6\u9470\u7248\u672C\u70BA[{0}]\uFF0C\u4F46\u5728\u7CFB\u7D71\u4E2D\u6C92\u6709\u914D\u7F6E\uFF0C\u4EFB\u52D9\u5BC6\u9470\u914D\u7F6E\u932F\u8AA4\uFF0C\u4E0D\u5B58\u5728\u60A8\u914D\u7F6E\u7684\u5BC6\u9470\u7248\u672C
+secretutil.13=DataX\u914D\u7F6E\u7684\u5BC6\u9470\u7248\u672C\u70BA[{0}]\uFF0C\u4F46\u5728\u7CFB\u7D71\u4E2D\u6C92\u6709\u914D\u7F6E\uFF0C\u53EF\u80FD\u662F\u4EFB\u52D9\u5BC6\u9470\u914D\u7F6E\u932F\u8AA4\uFF0C\u4E5F\u53EF\u80FD\u662F\u7CFB\u7D71\u7DAD\u8B77\u554F\u984C
+secretutil.14=DataX\u914D\u7F6E\u8981\u6C42\u52A0\u89E3\u5BC6\uFF0C\u4F46\u914D\u7F6E\u7684\u5BC6\u9470\u7248\u672C[{0}]\u5B58\u5728\u5BC6\u9470\u70BA\u7A7A\u7684\u60C5\u6CC1
+secretutil.15=DataX\u914D\u7F6E\u8981\u6C42\u52A0\u89E3\u5BC6\uFF0C\u4F46\u914D\u7F6E\u7684\u516C\u79C1\u9470\u5C0D\u5B58\u5728\u70BA\u7A7A\u7684\u60C5\u6CC1\uFF0C\u7248\u672C[{0}]
+secretutil.16=DataX\u914D\u7F6E\u8981\u6C42\u52A0\u89E3\u5BC6\uFF0C\u4F46\u7121\u6CD5\u627E\u5230\u52A0\u89E3\u5BC6\u914D\u7F6E
+
diff --git a/core/src/main/java/com/alibaba/datax/core/util/LocalStrings_zh_TW.properties b/core/src/main/java/com/alibaba/datax/core/util/LocalStrings_zh_TW.properties
new file mode 100644
index 00000000..59ce9fd9
--- /dev/null
+++ b/core/src/main/java/com/alibaba/datax/core/util/LocalStrings_zh_TW.properties
@@ -0,0 +1,116 @@
+configparser.1=\u63D2\u4EF6[{0},{1}]\u52A0\u8F7D\u5931\u8D25\uFF0C1s\u540E\u91CD\u8BD5... Exception:{2}
+configparser.2=\u83B7\u53D6\u4F5C\u4E1A\u914D\u7F6E\u4FE1\u606F\u5931\u8D25:{0}
+configparser.3=\u83B7\u53D6\u4F5C\u4E1A\u914D\u7F6E\u4FE1\u606F\u5931\u8D25:{0}
+configparser.4=\u83B7\u53D6\u4F5C\u4E1A\u914D\u7F6E\u4FE1\u606F\u5931\u8D25:{0}
+configparser.5=\u63D2\u4EF6\u52A0\u8F7D\u5931\u8D25\uFF0C\u672A\u5B8C\u6210\u6307\u5B9A\u63D2\u4EF6\u52A0\u8F7D:{0}
+configparser.6=\u63D2\u4EF6\u52A0\u8F7D\u5931\u8D25,\u5B58\u5728\u91CD\u590D\u63D2\u4EF6:{0}
+
+dataxserviceutil.1=\u521B\u5EFA\u7B7E\u540D\u5F02\u5E38NoSuchAlgorithmException, [{0}]
+dataxserviceutil.2=\u521B\u5EFA\u7B7E\u540D\u5F02\u5E38InvalidKeyException, [{0}]
+dataxserviceutil.3=\u521B\u5EFA\u7B7E\u540D\u5F02\u5E38UnsupportedEncodingException, [{0}]
+
+errorrecordchecker.1=\u810F\u6570\u636E\u767E\u5206\u6BD4\u9650\u5236\u5E94\u8BE5\u5728[0.0, 1.0]\u4E4B\u95F4
+errorrecordchecker.2=\u810F\u6570\u636E\u6761\u6570\u73B0\u5728\u5E94\u8BE5\u4E3A\u975E\u8D1F\u6574\u6570
+errorrecordchecker.3=\u810F\u6570\u636E\u6761\u6570\u68C0\u67E5\u4E0D\u901A\u8FC7\uFF0C\u9650\u5236\u662F[{0}]\u6761\uFF0C\u4F46\u5B9E\u9645\u4E0A\u6355\u83B7\u4E86[{1}]\u6761.
+errorrecordchecker.4=\u810F\u6570\u636E\u767E\u5206\u6BD4\u68C0\u67E5\u4E0D\u901A\u8FC7\uFF0C\u9650\u5236\u662F[{0}]\uFF0C\u4F46\u5B9E\u9645\u4E0A\u6355\u83B7\u5230[{1}].
+
+
+errorcode.install_error=DataX\u5F15\u64CE\u5B89\u88C5\u9519\u8BEF, \u8BF7\u8054\u7CFB\u60A8\u7684\u8FD0\u7EF4\u89E3\u51B3 .
+errorcode.argument_error=DataX\u5F15\u64CE\u8FD0\u884C\u9519\u8BEF\uFF0C\u8BE5\u95EE\u9898\u901A\u5E38\u662F\u7531\u4E8E\u5185\u90E8\u7F16\u7A0B\u9519\u8BEF\u5F15\u8D77\uFF0C\u8BF7\u8054\u7CFBDataX\u5F00\u53D1\u56E2\u961F\u89E3\u51B3 .
+errorcode.runtime_error=DataX\u5F15\u64CE\u8FD0\u884C\u8FC7\u7A0B\u51FA\u9519\uFF0C\u5177\u4F53\u539F\u56E0\u8BF7\u53C2\u770BDataX\u8FD0\u884C\u7ED3\u675F\u65F6\u7684\u9519\u8BEF\u8BCA\u65AD\u4FE1\u606F  .
+errorcode.config_error=DataX\u5F15\u64CE\u914D\u7F6E\u9519\u8BEF\uFF0C\u8BE5\u95EE\u9898\u901A\u5E38\u662F\u7531\u4E8EDataX\u5B89\u88C5\u9519\u8BEF\u5F15\u8D77\uFF0C\u8BF7\u8054\u7CFB\u60A8\u7684\u8FD0\u7EF4\u89E3\u51B3 .
+errorcode.secret_error=DataX\u5F15\u64CE\u52A0\u89E3\u5BC6\u51FA\u9519\uFF0C\u8BE5\u95EE\u9898\u901A\u5E38\u662F\u7531\u4E8EDataX\u5BC6\u94A5\u914D\u7F6E\u9519\u8BEF\u5F15\u8D77\uFF0C\u8BF7\u8054\u7CFB\u60A8\u7684\u8FD0\u7EF4\u89E3\u51B3 .
+errorcode.hook_load_error=\u52A0\u8F7D\u5916\u90E8Hook\u51FA\u73B0\u9519\u8BEF\uFF0C\u901A\u5E38\u662F\u7531\u4E8EDataX\u5B89\u88C5\u5F15\u8D77\u7684
+errorcode.hook_fail_error=\u6267\u884C\u5916\u90E8Hook\u51FA\u73B0\u9519\u8BEF
+errorcode.plugin_install_error=DataX\u63D2\u4EF6\u5B89\u88C5\u9519\u8BEF, \u8BE5\u95EE\u9898\u901A\u5E38\u662F\u7531\u4E8EDataX\u5B89\u88C5\u9519\u8BEF\u5F15\u8D77\uFF0C\u8BF7\u8054\u7CFB\u60A8\u7684\u8FD0\u7EF4\u89E3\u51B3 .
+errorcode.plugin_not_found=DataX\u63D2\u4EF6\u914D\u7F6E\u9519\u8BEF, \u8BE5\u95EE\u9898\u901A\u5E38\u662F\u7531\u4E8EDataX\u5B89\u88C5\u9519\u8BEF\u5F15\u8D77\uFF0C\u8BF7\u8054\u7CFB\u60A8\u7684\u8FD0\u7EF4\u89E3\u51B3 .
+errorcode.plugin_init_error=DataX\u63D2\u4EF6\u521D\u59CB\u5316\u9519\u8BEF, \u8BE5\u95EE\u9898\u901A\u5E38\u662F\u7531\u4E8EDataX\u5B89\u88C5\u9519\u8BEF\u5F15\u8D77\uFF0C\u8BF7\u8054\u7CFB\u60A8\u7684\u8FD0\u7EF4\u89E3\u51B3 .
+errorcode.plugin_runtime_error=DataX\u63D2\u4EF6\u8FD0\u884C\u65F6\u51FA\u9519, \u5177\u4F53\u539F\u56E0\u8BF7\u53C2\u770BDataX\u8FD0\u884C\u7ED3\u675F\u65F6\u7684\u9519\u8BEF\u8BCA\u65AD\u4FE1\u606F .
+errorcode.plugin_dirty_data_limit_exceed=DataX\u4F20\u8F93\u810F\u6570\u636E\u8D85\u8FC7\u7528\u6237\u9884\u671F\uFF0C\u8BE5\u9519\u8BEF\u901A\u5E38\u662F\u7531\u4E8E\u6E90\u7AEF\u6570\u636E\u5B58\u5728\u8F83\u591A\u4E1A\u52A1\u810F\u6570\u636E\u5BFC\u81F4\uFF0C\u8BF7\u4ED4\u7EC6\u68C0\u67E5DataX\u6C47\u62A5\u7684\u810F\u6570\u636E\u65E5\u5FD7\u4FE1\u606F, \u6216\u8005\u60A8\u53EF\u4EE5\u9002\u5F53\u8C03\u5927\u810F\u6570\u636E\u9608\u503C .
+errorcode.plugin_split_error=DataX\u63D2\u4EF6\u5207\u5206\u51FA\u9519, \u8BE5\u95EE\u9898\u901A\u5E38\u662F\u7531\u4E8EDataX\u5404\u4E2A\u63D2\u4EF6\u7F16\u7A0B\u9519\u8BEF\u5F15\u8D77\uFF0C\u8BF7\u8054\u7CFBDataX\u5F00\u53D1\u56E2\u961F\u89E3\u51B3
+errorcode.kill_job_timeout_error=kill \u4EFB\u52A1\u8D85\u65F6\uFF0C\u8BF7\u8054\u7CFBPE\u89E3\u51B3
+errorcode.start_taskgroup_error=taskGroup\u542F\u52A8\u5931\u8D25,\u8BF7\u8054\u7CFBDataX\u5F00\u53D1\u56E2\u961F\u89E3\u51B3
+errorcode.call_datax_service_failed=\u8BF7\u6C42 DataX Service \u51FA\u9519.
+errorcode.call_remote_failed=\u8FDC\u7A0B\u8C03\u7528\u5931\u8D25
+errorcode.killed_exit_value=Job \u6536\u5230\u4E86 Kill \u547D\u4EE4.
+
+
+httpclientutil.1=\u8BF7\u6C42\u5730\u5740\uFF1A{0}, \u8BF7\u6C42\u65B9\u6CD5\uFF1A{1},STATUS CODE = {2}, Response Entity: {3}
+httpclientutil.2=\u8FDC\u7A0B\u63A5\u53E3\u8FD4\u56DE-1,\u5C06\u91CD\u8BD5
+
+
+secretutil.1=\u7CFB\u7EDF\u7F16\u7A0B\u9519\u8BEF,\u4E0D\u652F\u6301\u7684\u52A0\u5BC6\u7C7B\u578B
+secretutil.2=\u7CFB\u7EDF\u7F16\u7A0B\u9519\u8BEF,\u4E0D\u652F\u6301\u7684\u52A0\u5BC6\u7C7B\u578B
+secretutil.3=rsa\u52A0\u5BC6\u51FA\u9519
+secretutil.4=rsa\u89E3\u5BC6\u51FA\u9519
+secretutil.5=3\u91CDDES\u52A0\u5BC6\u51FA\u9519
+secretutil.6=rsa\u89E3\u5BC6\u51FA\u9519
+secretutil.7=\u6784\u5EFA\u4E09\u91CDDES\u5BC6\u5319\u51FA\u9519
+secretutil.8=DataX\u914D\u7F6E\u8981\u6C42\u52A0\u89E3\u5BC6\uFF0C\u4F46\u65E0\u6CD5\u627E\u5230\u5BC6\u94A5\u7684\u914D\u7F6E\u6587\u4EF6
+secretutil.9=\u8BFB\u53D6\u52A0\u89E3\u5BC6\u914D\u7F6E\u6587\u4EF6\u51FA\u9519
+secretutil.10=DataX\u914D\u7F6E\u7684\u5BC6\u94A5\u7248\u672C\u4E3A[{0}]\uFF0C\u4F46\u5728\u7CFB\u7EDF\u4E2D\u6CA1\u6709\u914D\u7F6E\uFF0C\u4EFB\u52A1\u5BC6\u94A5\u914D\u7F6E\u9519\u8BEF\uFF0C\u4E0D\u5B58\u5728\u60A8\u914D\u7F6E\u7684\u5BC6\u94A5\u7248\u672C
+secretutil.11=DataX\u914D\u7F6E\u7684\u5BC6\u94A5\u7248\u672C\u4E3A[{0}]\uFF0C\u4F46\u5728\u7CFB\u7EDF\u4E2D\u6CA1\u6709\u914D\u7F6E\uFF0C\u53EF\u80FD\u662F\u4EFB\u52A1\u5BC6\u94A5\u914D\u7F6E\u9519\u8BEF\uFF0C\u4E5F\u53EF\u80FD\u662F\u7CFB\u7EDF\u7EF4\u62A4\u95EE\u9898
+secretutil.12=DataX\u914D\u7F6E\u7684\u5BC6\u94A5\u7248\u672C\u4E3A[{0}]\uFF0C\u4F46\u5728\u7CFB\u7EDF\u4E2D\u6CA1\u6709\u914D\u7F6E\uFF0C\u4EFB\u52A1\u5BC6\u94A5\u914D\u7F6E\u9519\u8BEF\uFF0C\u4E0D\u5B58\u5728\u60A8\u914D\u7F6E\u7684\u5BC6\u94A5\u7248\u672C
+secretutil.13=DataX\u914D\u7F6E\u7684\u5BC6\u94A5\u7248\u672C\u4E3A[{0}]\uFF0C\u4F46\u5728\u7CFB\u7EDF\u4E2D\u6CA1\u6709\u914D\u7F6E\uFF0C\u53EF\u80FD\u662F\u4EFB\u52A1\u5BC6\u94A5\u914D\u7F6E\u9519\u8BEF\uFF0C\u4E5F\u53EF\u80FD\u662F\u7CFB\u7EDF\u7EF4\u62A4\u95EE\u9898
+secretutil.14=DataX\u914D\u7F6E\u8981\u6C42\u52A0\u89E3\u5BC6\uFF0C\u4F46\u914D\u7F6E\u7684\u5BC6\u94A5\u7248\u672C[{0}]\u5B58\u5728\u5BC6\u94A5\u4E3A\u7A7A\u7684\u60C5\u51B5
+secretutil.15=DataX\u914D\u7F6E\u8981\u6C42\u52A0\u89E3\u5BC6\uFF0C\u4F46\u914D\u7F6E\u7684\u516C\u79C1\u94A5\u5BF9\u5B58\u5728\u4E3A\u7A7A\u7684\u60C5\u51B5\uFF0C\u7248\u672C[{0}]
+secretutil.16=DataX\u914D\u7F6E\u8981\u6C42\u52A0\u89E3\u5BC6\uFF0C\u4F46\u65E0\u6CD5\u627E\u5230\u52A0\u89E3\u5BC6\u914D\u7F6E
+
+configparser.1=\u5916\u639B\u7A0B\u5F0F[{0},{1}]\u8F09\u5165\u5931\u6557\uFF0C1s\u5F8C\u91CD\u8A66... Exception:{2}
+configparser.2=\u7372\u53D6\u4F5C\u696D\u914D\u7F6E\u8CC7\u8A0A\u5931\u6557:{0}
+configparser.3=\u7372\u53D6\u4F5C\u696D\u914D\u7F6E\u8CC7\u8A0A\u5931\u6557:{0}
+configparser.4=\u7372\u53D6\u4F5C\u696D\u914D\u7F6E\u8CC7\u8A0A\u5931\u6557:{0}
+configparser.5=\u5916\u639B\u7A0B\u5F0F\u8F09\u5165\u5931\u6557\uFF0C\u672A\u5B8C\u6210\u6307\u5B9A\u5916\u639B\u7A0B\u5F0F\u8F09\u5165:{0}
+configparser.6=\u5916\u639B\u7A0B\u5F0F\u8F09\u5165\u5931\u6557,\u5B58\u5728\u91CD\u8907\u5916\u639B\u7A0B\u5F0F:{0}
+
+dataxserviceutil.1=\u5EFA\u7ACB\u7C3D\u540D\u7570\u5E38NoSuchAlgorithmException, [{0}]
+dataxserviceutil.2=\u5EFA\u7ACB\u7C3D\u540D\u7570\u5E38InvalidKeyException, [{0}]
+dataxserviceutil.3=\u5EFA\u7ACB\u7C3D\u540D\u7570\u5E38UnsupportedEncodingException, [{0}]
+
+errorrecordchecker.1=\u9AD2\u6578\u64DA\u767E\u5206\u6BD4\u9650\u5236\u61C9\u8A72\u5728[0.0, 1.0]\u4E4B\u9593
+errorrecordchecker.2=\u9AD2\u6578\u64DA\u689D\u6578\u73FE\u5728\u61C9\u8A72\u70BA\u975E\u8CA0\u6574\u6578
+errorrecordchecker.3=\u9AD2\u6578\u64DA\u689D\u6578\u6AA2\u67E5\u4E0D\u901A\u904E\uFF0C\u9650\u5236\u662F[{0}]\u689D\uFF0C\u4F46\u5BE6\u969B\u4E0A\u6355\u7372\u4E86[{1}]\u689D.
+errorrecordchecker.4=\u9AD2\u6578\u64DA\u767E\u5206\u6BD4\u6AA2\u67E5\u4E0D\u901A\u904E\uFF0C\u9650\u5236\u662F[{0}]\uFF0C\u4F46\u5BE6\u969B\u4E0A\u6355\u7372\u5230[{1}].
+
+
+errorcode.install_error=DataX\u5F15\u64CE\u5B89\u88DD\u932F\u8AA4, \u8ACB\u806F\u7D61\u60A8\u7684\u904B\u7DAD\u89E3\u6C7A .
+errorcode.argument_error=DataX\u5F15\u64CE\u904B\u884C\u932F\u8AA4\uFF0C\u8A72\u554F\u984C\u901A\u5E38\u662F\u7531\u65BC\u5167\u90E8\u7DE8\u7A0B\u932F\u8AA4\u5F15\u8D77\uFF0C\u8ACB\u806F\u7D61DataX\u958B\u767C\u5718\u968A\u89E3\u6C7A .
+errorcode.runtime_error=DataX\u5F15\u64CE\u904B\u884C\u904E\u7A0B\u51FA\u932F\uFF0C\u5177\u9AD4\u539F\u56E0\u8ACB\u53C3\u770BDataX\u904B\u884C\u7D50\u675F\u6642\u7684\u932F\u8AA4\u8A3A\u65B7\u8CC7\u8A0A  .
+errorcode.config_error=DataX\u5F15\u64CE\u914D\u7F6E\u932F\u8AA4\uFF0C\u8A72\u554F\u984C\u901A\u5E38\u662F\u7531\u65BCDataX\u5B89\u88DD\u932F\u8AA4\u5F15\u8D77\uFF0C\u8ACB\u806F\u7D61\u60A8\u7684\u904B\u7DAD\u89E3\u6C7A .
+errorcode.secret_error=DataX\u5F15\u64CE\u52A0\u89E3\u5BC6\u51FA\u932F\uFF0C\u8A72\u554F\u984C\u901A\u5E38\u662F\u7531\u65BCDataX\u5BC6\u9470\u914D\u7F6E\u932F\u8AA4\u5F15\u8D77\uFF0C\u8ACB\u806F\u7D61\u60A8\u7684\u904B\u7DAD\u89E3\u6C7A .
+errorcode.hook_load_error=\u8F09\u5165\u5916\u90E8Hook\u51FA\u73FE\u932F\u8AA4\uFF0C\u901A\u5E38\u662F\u7531\u65BCDataX\u5B89\u88DD\u5F15\u8D77\u7684
+errorcode.hook_fail_error=\u57F7\u884C\u5916\u90E8Hook\u51FA\u73FE\u932F\u8AA4
+errorcode.plugin_install_error=DataX\u5916\u639B\u7A0B\u5F0F\u5B89\u88DD\u932F\u8AA4, \u8A72\u554F\u984C\u901A\u5E38\u662F\u7531\u65BCDataX\u5B89\u88DD\u932F\u8AA4\u5F15\u8D77\uFF0C\u8ACB\u806F\u7D61\u60A8\u7684\u904B\u7DAD\u89E3\u6C7A .
+errorcode.plugin_not_found=DataX\u5916\u639B\u7A0B\u5F0F\u914D\u7F6E\u932F\u8AA4, \u8A72\u554F\u984C\u901A\u5E38\u662F\u7531\u65BCDataX\u5B89\u88DD\u932F\u8AA4\u5F15\u8D77\uFF0C\u8ACB\u806F\u7D61\u60A8\u7684\u904B\u7DAD\u89E3\u6C7A .
+errorcode.plugin_init_error=DataX\u5916\u639B\u7A0B\u5F0F\u521D\u59CB\u5316\u932F\u8AA4, \u8A72\u554F\u984C\u901A\u5E38\u662F\u7531\u65BCDataX\u5B89\u88DD\u932F\u8AA4\u5F15\u8D77\uFF0C\u8ACB\u806F\u7D61\u60A8\u7684\u904B\u7DAD\u89E3\u6C7A .
+errorcode.plugin_runtime_error=DataX\u5916\u639B\u7A0B\u5F0F\u904B\u884C\u6642\u51FA\u932F, \u5177\u9AD4\u539F\u56E0\u8ACB\u53C3\u770BDataX\u904B\u884C\u7D50\u675F\u6642\u7684\u932F\u8AA4\u8A3A\u65B7\u8CC7\u8A0A .
+errorcode.plugin_dirty_data_limit_exceed=DataX\u50B3\u8F38\u9AD2\u6578\u64DA\u8D85\u904E\u7528\u6236\u9810\u671F\uFF0C\u8A72\u932F\u8AA4\u901A\u5E38\u662F\u7531\u65BC\u6E90\u7AEF\u6578\u64DA\u5B58\u5728\u8F03\u591A\u696D\u52D9\u9AD2\u6578\u64DA\u5C0E\u81F4\uFF0C\u8ACB\u4ED4\u7D30\u6AA2\u67E5DataX\u5F59\u5831\u7684\u9AD2\u6578\u64DA\u65E5\u8A8C\u8CC7\u8A0A, \u6216\u8005\u60A8\u53EF\u4EE5\u9069\u7576\u8ABF\u5927\u9AD2\u6578\u64DA\u95BE\u503C .
+errorcode.plugin_split_error=DataX\u5916\u639B\u7A0B\u5F0F\u5207\u5206\u51FA\u932F, \u8A72\u554F\u984C\u901A\u5E38\u662F\u7531\u65BCDataX\u5404\u500B\u5916\u639B\u7A0B\u5F0F\u7DE8\u7A0B\u932F\u8AA4\u5F15\u8D77\uFF0C\u8ACB\u806F\u7D61DataX\u958B\u767C\u5718\u968A\u89E3\u6C7A
+errorcode.kill_job_timeout_error=kill \u4EFB\u52D9\u903E\u6642\uFF0C\u8ACB\u806F\u7D61PE\u89E3\u6C7A
+errorcode.start_taskgroup_error=taskGroup\u555F\u52D5\u5931\u6557,\u8ACB\u806F\u7D61DataX\u958B\u767C\u5718\u968A\u89E3\u6C7A
+errorcode.call_datax_service_failed=\u8ACB\u6C42 DataX Service \u51FA\u932F.
+errorcode.call_remote_failed=\u9060\u7A0B\u8ABF\u7528\u5931\u6557
+errorcode.killed_exit_value=Job \u6536\u5230\u4E86 Kill \u547D\u4EE4.
+
+
+httpclientutil.1=\u8ACB\u6C42\u5730\u5740\uFF1A{0}, \u8ACB\u6C42\u65B9\u6CD5\uFF1A{1},STATUS CODE = {2}, Response Entity: {3}
+httpclientutil.2=\u9060\u7A0B\u63A5\u53E3\u8FD4\u56DE-1,\u5C07\u91CD\u8A66
+
+
+secretutil.1=\u7CFB\u7D71\u7DE8\u7A0B\u932F\u8AA4,\u4E0D\u652F\u63F4\u7684\u52A0\u5BC6\u985E\u578B
+secretutil.2=\u7CFB\u7D71\u7DE8\u7A0B\u932F\u8AA4,\u4E0D\u652F\u63F4\u7684\u52A0\u5BC6\u985E\u578B
+secretutil.3=rsa\u52A0\u5BC6\u51FA\u932F
+secretutil.4=rsa\u89E3\u5BC6\u51FA\u932F
+secretutil.5=3\u91CDDES\u52A0\u5BC6\u51FA\u932F
+secretutil.6=rsa\u89E3\u5BC6\u51FA\u932F
+secretutil.7=\u69CB\u5EFA\u4E09\u91CDDES\u5BC6\u5319\u51FA\u932F
+secretutil.8=DataX\u914D\u7F6E\u8981\u6C42\u52A0\u89E3\u5BC6\uFF0C\u4F46\u7121\u6CD5\u627E\u5230\u5BC6\u9470\u7684\u914D\u7F6E\u6A94\u6848
+secretutil.9=\u8B80\u53D6\u52A0\u89E3\u5BC6\u914D\u7F6E\u6A94\u6848\u51FA\u932F
+secretutil.10=DataX\u914D\u7F6E\u7684\u5BC6\u9470\u7248\u672C\u70BA[{0}]\uFF0C\u4F46\u5728\u7CFB\u7D71\u4E2D\u6C92\u6709\u914D\u7F6E\uFF0C\u4EFB\u52D9\u5BC6\u9470\u914D\u7F6E\u932F\u8AA4\uFF0C\u4E0D\u5B58\u5728\u60A8\u914D\u7F6E\u7684\u5BC6\u9470\u7248\u672C
+secretutil.11=DataX\u914D\u7F6E\u7684\u5BC6\u9470\u7248\u672C\u70BA[{0}]\uFF0C\u4F46\u5728\u7CFB\u7D71\u4E2D\u6C92\u6709\u914D\u7F6E\uFF0C\u53EF\u80FD\u662F\u4EFB\u52D9\u5BC6\u9470\u914D\u7F6E\u932F\u8AA4\uFF0C\u4E5F\u53EF\u80FD\u662F\u7CFB\u7D71\u7DAD\u8B77\u554F\u984C
+secretutil.12=DataX\u914D\u7F6E\u7684\u5BC6\u9470\u7248\u672C\u70BA[{0}]\uFF0C\u4F46\u5728\u7CFB\u7D71\u4E2D\u6C92\u6709\u914D\u7F6E\uFF0C\u4EFB\u52D9\u5BC6\u9470\u914D\u7F6E\u932F\u8AA4\uFF0C\u4E0D\u5B58\u5728\u60A8\u914D\u7F6E\u7684\u5BC6\u9470\u7248\u672C
+secretutil.13=DataX\u914D\u7F6E\u7684\u5BC6\u9470\u7248\u672C\u70BA[{0}]\uFF0C\u4F46\u5728\u7CFB\u7D71\u4E2D\u6C92\u6709\u914D\u7F6E\uFF0C\u53EF\u80FD\u662F\u4EFB\u52D9\u5BC6\u9470\u914D\u7F6E\u932F\u8AA4\uFF0C\u4E5F\u53EF\u80FD\u662F\u7CFB\u7D71\u7DAD\u8B77\u554F\u984C
+secretutil.14=DataX\u914D\u7F6E\u8981\u6C42\u52A0\u89E3\u5BC6\uFF0C\u4F46\u914D\u7F6E\u7684\u5BC6\u9470\u7248\u672C[{0}]\u5B58\u5728\u5BC6\u9470\u70BA\u7A7A\u7684\u60C5\u6CC1
+secretutil.15=DataX\u914D\u7F6E\u8981\u6C42\u52A0\u89E3\u5BC6\uFF0C\u4F46\u914D\u7F6E\u7684\u516C\u79C1\u9470\u5C0D\u5B58\u5728\u70BA\u7A7A\u7684\u60C5\u6CC1\uFF0C\u7248\u672C[{0}]
+secretutil.16=DataX\u914D\u7F6E\u8981\u6C42\u52A0\u89E3\u5BC6\uFF0C\u4F46\u7121\u6CD5\u627E\u5230\u52A0\u89E3\u5BC6\u914D\u7F6E
+
diff --git a/core/src/main/job/job.json b/core/src/main/job/job.json
index 58206592..cc353877 100755
--- a/core/src/main/job/job.json
+++ b/core/src/main/job/job.json
@@ -2,7 +2,7 @@
     "job": {
         "setting": {
             "speed": {
-                "byte":10485760
+                "channel":1
             },
             "errorLimit": {
                 "record": 0,
diff --git a/databendwriter/doc/databendwriter-CN.md b/databendwriter/doc/databendwriter-CN.md
new file mode 100644
index 00000000..d6a8f1f3
--- /dev/null
+++ b/databendwriter/doc/databendwriter-CN.md
@@ -0,0 +1,171 @@
+# DataX DatabendWriter
+[简体中文](./databendwriter-CN.md) | [English](./databendwriter.md)
+
+## 1 快速介绍
+
+Databend Writer 是一个 DataX 的插件,用于从 DataX 中写入数据到 Databend 表中。
+该插件基于[databend JDBC driver](https://github.com/databendcloud/databend-jdbc) ,它使用 [RESTful http protocol](https://databend.rs/doc/integrations/api/rest)
+在开源的 databend 和 [databend cloud](https://app.databend.com/) 上执行查询。
+
+在每个写入批次中,databend writer 将批量数据上传到内部的 S3 stage,然后执行相应的 insert SQL 将数据上传到 databend 表中。
+
+为了最佳的用户体验,如果您使用的是 databend 社区版本,您应该尝试采用 [S3](https://aws.amazon.com/s3/)/[minio](https://min.io/)/[OSS](https://www.alibabacloud.com/product/object-storage-service) 作为其底层存储层,因为
+它们支持预签名上传操作,否则您可能会在数据传输上浪费不必要的成本。
+
+您可以在[文档](https://databend.rs/doc/deploy/deploying-databend)中了解更多详细信息
+
+## 2 实现原理
+
+Databend Writer 将使用 DataX 从 DataX Reader 中获取生成的记录,并将记录批量插入到 databend 表中指定的列中。
+
+## 3 功能说明
+
+### 3.1 配置样例
+
+* 以下配置将从内存中读取一些生成的数据,并将数据上传到databend表中
+
+#### 准备工作
+```sql
+--- create table in databend
+drop table if exists datax.sample1;
+drop database if exists datax;
+create database if not exists datax;
+create table if not exsits datax.sample1(a string, b int64, c date, d timestamp, e bool, f string, g variant);
+```
+
+#### 配置样例
+```json
+{
+  "job": {
+    "content": [
+      {
+        "reader": {
+          "name": "streamreader",
+          "parameter": {
+            "column" : [
+              {
+                "value": "DataX",
+                "type": "string"
+              },
+              {
+                "value": 19880808,
+                "type": "long"
+              },
+              {
+                "value": "1926-08-08 08:08:08",
+                "type": "date"
+              },
+              {
+                "value": "1988-08-08 08:08:08",
+                "type": "date"
+              },
+              {
+                "value": true,
+                "type": "bool"
+              },
+              {
+                "value": "test",
+                "type": "bytes"
+              },
+              {
+                "value": "{\"type\": \"variant\", \"value\": \"test\"}",
+                "type": "string"
+              }
+
+            ],
+            "sliceRecordCount": 10000
+          }
+        },
+        "writer": {
+          "name": "databendwriter",
+          "parameter": {
+            "username": "databend",
+            "password": "databend",
+            "column": ["a", "b", "c", "d", "e", "f", "g"],
+            "batchSize": 1000,
+            "preSql": [
+            ],
+            "postSql": [
+            ],
+            "connection": [
+              {
+                "jdbcUrl": "jdbc:databend://localhost:8000/datax",
+                "table": [
+                  "sample1"
+                ]
+              }
+            ]
+          }
+        }
+      }
+    ],
+    "setting": {
+      "speed": {
+        "channel": 1
+       }
+    }
+  }
+}
+```
+
+### 3.2 参数说明
+* jdbcUrl
+    * 描述: JDBC 数据源 url。请参阅仓库中的详细[文档](https://github.com/databendcloud/databend-jdbc)
+    * 必选: 是
+    * 默认值: 无
+    * 示例: jdbc:databend://localhost:8000/datax
+* username
+    * 描述: JDBC 数据源用户名
+    * 必选: 是
+    * 默认值: 无
+    * 示例: databend
+* password
+    * 描述: JDBC 数据源密码
+    * 必选: 是
+    * 默认值: 无
+    * 示例: databend
+* table
+    * 描述: 表名的集合,table应该包含column参数中的所有列。
+    * 必选: 是
+    * 默认值: 无
+    * 示例: ["sample1"]
+* column
+    * 描述: 表中的列名集合,字段顺序应该与reader的record中的column类型对应
+    * 必选: 是
+    * 默认值: 无
+    * 示例: ["a", "b", "c", "d", "e", "f", "g"]
+* batchSize
+    * 描述: 每个批次的记录数
+    * 必选: 否
+    * 默认值: 1000
+    * 示例: 1000
+* preSql
+    * 描述: 在写入数据之前执行的SQL语句
+    * 必选: 否
+    * 默认值: 无
+    * 示例: ["delete from datax.sample1"]
+* postSql
+    * 描述: 在写入数据之后执行的SQL语句
+    * 必选: 否
+    * 默认值: 无
+    * 示例: ["select count(*) from datax.sample1"]
+
+### 3.3 类型转化
+DataX中的数据类型可以转换为databend中的相应数据类型。下表显示了两种类型之间的对应关系。
+
+| DataX 内部类型 | Databend 数据类型                                             |
+|------------|-----------------------------------------------------------|
+| INT        | TINYINT, INT8, SMALLINT, INT16, INT, INT32, BIGINT, INT64 |
+| LONG       | TINYINT, INT8, SMALLINT, INT16, INT, INT32, BIGINT, INT64 |
+| STRING     | STRING, VARCHAR                                           |
+| DOUBLE     | FLOAT, DOUBLE                                             |
+| BOOL       | BOOLEAN, BOOL                                             |
+| DATE       | DATE, TIMESTAMP                                           |
+| BYTES      | STRING, VARCHAR                                           |
+
+## 4 性能测试
+
+## 5 约束限制
+目前,复杂数据类型支持不稳定,如果您想使用复杂数据类型,例如元组,数组,请检查databend和jdbc驱动程序的进一步版本。
+
+## FAQ
\ No newline at end of file
diff --git a/databendwriter/doc/databendwriter.md b/databendwriter/doc/databendwriter.md
new file mode 100644
index 00000000..0b57bf13
--- /dev/null
+++ b/databendwriter/doc/databendwriter.md
@@ -0,0 +1,166 @@
+# DataX DatabendWriter
+[简体中文](./databendwriter-CN.md) | [English](./databendwriter.md)
+
+## 1 Introduction
+Databend Writer is a plugin for DataX to write data to Databend Table from dataX records.
+The plugin is based on [databend JDBC driver](https://github.com/databendcloud/databend-jdbc) which use [RESTful http protocol](https://databend.rs/doc/integrations/api/rest)
+to execute query on open source databend and [databend cloud](https://app.databend.com/).
+
+During each write batch, databend writer will upload batch data into internal S3 stage and execute corresponding insert SQL to upload data into databend table.
+
+For best user experience, if you are using databend community distribution, you should try to adopt [S3](https://aws.amazon.com/s3/)/[minio](https://min.io/)/[OSS](https://www.alibabacloud.com/product/object-storage-service) as its underlying storage layer since 
+they support presign upload operation otherwise you may expend unneeded cost on data transfer.
+
+You could see more details on the [doc](https://databend.rs/doc/deploy/deploying-databend)
+
+## 2 Detailed Implementation
+Databend Writer would use DataX to fetch records generated by DataX Reader, and then batch insert records to the designated columns for your databend table.
+
+## 3 Features
+### 3.1 Example Configurations
+* the following configuration would read some generated data in memory and upload data into databend table
+
+#### Preparation
+```sql
+--- create table in databend
+drop table if exists datax.sample1;
+drop database if exists datax;
+create database if not exists datax;
+create table if not exsits datax.sample1(a string, b int64, c date, d timestamp, e bool, f string, g variant);
+```
+
+#### Configurations
+```json
+{
+  "job": {
+    "content": [
+      {
+        "reader": {
+          "name": "streamreader",
+          "parameter": {
+            "column" : [
+              {
+                "value": "DataX",
+                "type": "string"
+              },
+              {
+                "value": 19880808,
+                "type": "long"
+              },
+              {
+                "value": "1926-08-08 08:08:08",
+                "type": "date"
+              },
+              {
+                "value": "1988-08-08 08:08:08",
+                "type": "date"
+              },
+              {
+                "value": true,
+                "type": "bool"
+              },
+              {
+                "value": "test",
+                "type": "bytes"
+              },
+              {
+                "value": "{\"type\": \"variant\", \"value\": \"test\"}",
+                "type": "string"
+              }
+
+            ],
+            "sliceRecordCount": 10000
+          }
+        },
+        "writer": {
+          "name": "databendwriter",
+          "parameter": {
+            "username": "databend",
+            "password": "databend",
+            "column": ["a", "b", "c", "d", "e", "f", "g"],
+            "batchSize": 1000,
+            "preSql": [
+            ],
+            "postSql": [
+            ],
+            "connection": [
+              {
+                "jdbcUrl": "jdbc:databend://localhost:8000/datax",
+                "table": [
+                  "sample1"
+                ]
+              }
+            ]
+          }
+        }
+      }
+    ],
+    "setting": {
+      "speed": {
+        "channel": 1
+       }
+    }
+  }
+}
+```
+
+### 3.2 Configuration Description
+* jdbcUrl
+  * Description: JDBC Data source url in Databend. Please take a look at repository for detailed [doc](https://github.com/databendcloud/databend-jdbc)
+  * Required: yes
+  * Default: none
+  * Example: jdbc:databend://localhost:8000/datax
+* username
+  * Description: Databend user name
+  * Required: yes
+  * Default: none
+  * Example: databend
+* password
+  * Description: Databend user password
+  * Required: yes
+  * Default: none
+  * Example: databend
+* table
+  * Description: A list of table names that should contain all of the columns in the column parameter.
+  * Required: yes
+  * Default: none
+  * Example: ["sample1"]
+* column
+  * Description: A list of column field names that should be inserted into the table. if you want to insert all column fields use `["*"]` instead.
+  * Required: yes
+  * Default: none
+  * Example: ["a", "b", "c", "d", "e", "f", "g"]
+* batchSize
+  * Description: The number of records to be inserted in each batch.
+  * Required: no
+  * Default: 1024
+* preSql
+  * Description: A list of SQL statements that will be executed before the write operation.
+  * Required: no
+  * Default: none
+* postSql
+  * Description: A list of SQL statements that will be executed after the write operation.
+  * Required: no
+  * Default: none
+
+### 3.3 Type Convert
+Data types in datax can be converted to the corresponding data types in databend. The following table shows the correspondence between the two types.
+
+| DataX Type | Databend Type                                             |
+|------------|-----------------------------------------------------------|
+| INT        | TINYINT, INT8, SMALLINT, INT16, INT, INT32, BIGINT, INT64 |
+| LONG       | TINYINT, INT8, SMALLINT, INT16, INT, INT32, BIGINT, INT64 |
+| STRING     | STRING, VARCHAR                                           |
+| DOUBLE     | FLOAT, DOUBLE                                             |
+| BOOL       | BOOLEAN, BOOL                                             |
+| DATE       | DATE, TIMESTAMP                                           |
+| BYTES      | STRING, VARCHAR                                           |
+
+
+## 4 Performance Test
+
+
+## 5 Restrictions
+Currently, complex data type support is not stable, if you want to use complex data type such as tuple, array, please check further release version of databend and jdbc driver.
+
+## FAQ
diff --git a/databendwriter/pom.xml b/databendwriter/pom.xml
new file mode 100644
index 00000000..976ecd6a
--- /dev/null
+++ b/databendwriter/pom.xml
@@ -0,0 +1,101 @@
+
+
+    
+        datax-all
+        com.alibaba.datax
+        0.0.1-SNAPSHOT
+    
+
+    4.0.0
+    databendwriter
+    databendwriter
+    jar
+
+    
+        
+            com.databend
+            databend-jdbc
+            0.0.5
+        
+        
+            com.alibaba.datax
+            datax-core
+            ${datax-project-version}
+        
+        
+            com.alibaba.datax
+            datax-common
+            ${datax-project-version}
+        
+        
+            org.slf4j
+            slf4j-api
+        
+
+        
+            ch.qos.logback
+            logback-classic
+        
+
+        
+            com.alibaba.datax
+            plugin-rdbms-util
+            ${datax-project-version}
+            
+                
+                    com.google.guava
+                    guava
+                
+            
+        
+
+
+        
+            junit
+            junit
+            test
+        
+    
+    
+        
+            
+                src/main/java
+                
+                    **/*.properties
+                
+            
+        
+        
+            
+            
+                maven-compiler-plugin
+                
+                    ${jdk-version}
+                    ${jdk-version}
+                    ${project-sourceEncoding}
+                
+            
+            
+            
+                maven-assembly-plugin
+                
+                    
+                        src/main/assembly/package.xml
+                    
+                    datax
+                
+                
+                    
+                        dwzip
+                        package
+                        
+                            single
+                        
+                    
+                
+            
+        
+    
+
diff --git a/databendwriter/src/main/assembly/package.xml b/databendwriter/src/main/assembly/package.xml
new file mode 100755
index 00000000..8a9ba1b2
--- /dev/null
+++ b/databendwriter/src/main/assembly/package.xml
@@ -0,0 +1,34 @@
+
+    
+    
+        dir
+    
+    false
+    
+        
+            src/main/resources
+            
+                plugin.json
+ 				plugin_job_template.json
+ 			
+            plugin/writer/databendwriter
+        
+        
+            target/
+            
+                databendwriter-0.0.1-SNAPSHOT.jar
+            
+            plugin/writer/databendwriter
+        
+    
+
+    
+        
+            false
+            plugin/writer/databendwriter/libs
+        
+    
+
diff --git a/databendwriter/src/main/java/com/alibaba/datax/plugin/writer/databendwriter/DatabendWriter.java b/databendwriter/src/main/java/com/alibaba/datax/plugin/writer/databendwriter/DatabendWriter.java
new file mode 100644
index 00000000..a4222f08
--- /dev/null
+++ b/databendwriter/src/main/java/com/alibaba/datax/plugin/writer/databendwriter/DatabendWriter.java
@@ -0,0 +1,248 @@
+package com.alibaba.datax.plugin.writer.databendwriter;
+
+import com.alibaba.datax.common.element.Column;
+import com.alibaba.datax.common.element.StringColumn;
+import com.alibaba.datax.common.exception.CommonErrorCode;
+import com.alibaba.datax.common.exception.DataXException;
+import com.alibaba.datax.common.plugin.RecordReceiver;
+import com.alibaba.datax.common.spi.Writer;
+import com.alibaba.datax.common.util.Configuration;
+import com.alibaba.datax.plugin.rdbms.util.DataBaseType;
+import com.alibaba.datax.plugin.rdbms.writer.CommonRdbmsWriter;
+import com.alibaba.datax.plugin.writer.databendwriter.util.DatabendWriterUtil;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.sql.*;
+import java.util.List;
+import java.util.regex.Pattern;
+
+public class DatabendWriter extends Writer
+{
+    private static final DataBaseType DATABASE_TYPE = DataBaseType.Databend;
+
+    public static class Job
+            extends Writer.Job
+    {
+        private static final Logger LOG = LoggerFactory.getLogger(Job.class);
+        private Configuration originalConfig;
+        private CommonRdbmsWriter.Job commonRdbmsWriterMaster;
+
+        @Override
+        public void init()
+        {
+            this.originalConfig = super.getPluginJobConf();
+            this.commonRdbmsWriterMaster = new CommonRdbmsWriter.Job(DATABASE_TYPE);
+            this.commonRdbmsWriterMaster.init(this.originalConfig);
+            // placeholder currently not supported by databend driver, needs special treatment
+            DatabendWriterUtil.dealWriteMode(this.originalConfig);
+        }
+
+        @Override
+        public void preCheck()
+        {
+            this.init();
+            this.commonRdbmsWriterMaster.writerPreCheck(this.originalConfig, DATABASE_TYPE);
+        }
+
+        @Override
+        public void prepare() {
+            this.commonRdbmsWriterMaster.prepare(this.originalConfig);
+        }
+
+        @Override
+        public List split(int mandatoryNumber) {
+            return this.commonRdbmsWriterMaster.split(this.originalConfig, mandatoryNumber);
+        }
+
+        @Override
+        public void post() {
+            this.commonRdbmsWriterMaster.post(this.originalConfig);
+        }
+
+        @Override
+        public void destroy() {
+            this.commonRdbmsWriterMaster.destroy(this.originalConfig);
+        }
+    }
+
+
+    public static class Task extends Writer.Task
+    {
+        private static final Logger LOG = LoggerFactory.getLogger(Task.class);
+
+        private Configuration writerSliceConfig;
+
+        private CommonRdbmsWriter.Task commonRdbmsWriterSlave;
+
+        @Override
+        public void init()
+        {
+            this.writerSliceConfig = super.getPluginJobConf();
+
+            this.commonRdbmsWriterSlave = new CommonRdbmsWriter.Task(DataBaseType.Databend){
+                @Override
+                protected PreparedStatement fillPreparedStatementColumnType(PreparedStatement preparedStatement, int columnIndex, int columnSqltype, String typeName, Column column) throws SQLException {
+                    try {
+                        if (column.getRawData() == null) {
+                            preparedStatement.setNull(columnIndex + 1, columnSqltype);
+                            return preparedStatement;
+                        }
+
+                        java.util.Date utilDate;
+                        switch (columnSqltype) {
+
+                            case Types.TINYINT:
+                            case Types.SMALLINT:
+                            case Types.INTEGER:
+                                preparedStatement.setInt(columnIndex + 1, column.asBigInteger().intValue());
+                                break;
+                            case Types.BIGINT:
+                                preparedStatement.setLong(columnIndex + 1, column.asLong());
+                                break;
+                            case Types.DECIMAL:
+                                preparedStatement.setBigDecimal(columnIndex + 1, column.asBigDecimal());
+                                break;
+                            case Types.FLOAT:
+                            case Types.REAL:
+                                preparedStatement.setFloat(columnIndex + 1, column.asDouble().floatValue());
+                                break;
+                            case Types.DOUBLE:
+                                preparedStatement.setDouble(columnIndex + 1, column.asDouble());
+                                break;
+                            case Types.DATE:
+                                java.sql.Date sqlDate = null;
+                                try {
+                                    utilDate = column.asDate();
+                                } catch (DataXException e) {
+                                    throw new SQLException(String.format(
+                                            "Date type conversion error: [%s]", column));
+                                }
+
+                                if (null != utilDate) {
+                                    sqlDate = new java.sql.Date(utilDate.getTime());
+                                }
+                                preparedStatement.setDate(columnIndex + 1, sqlDate);
+                                break;
+
+                            case Types.TIME:
+                                java.sql.Time sqlTime = null;
+                                try {
+                                    utilDate = column.asDate();
+                                } catch (DataXException e) {
+                                    throw new SQLException(String.format(
+                                            "Date type conversion error: [%s]", column));
+                                }
+
+                                if (null != utilDate) {
+                                    sqlTime = new java.sql.Time(utilDate.getTime());
+                                }
+                                preparedStatement.setTime(columnIndex + 1, sqlTime);
+                                break;
+
+                            case Types.TIMESTAMP:
+                                Timestamp sqlTimestamp = null;
+                                if (column instanceof StringColumn && column.asString() != null) {
+                                    String timeStampStr = column.asString();
+                                    // JAVA TIMESTAMP 类型入参必须是 "2017-07-12 14:39:00.123566" 格式
+                                    String pattern = "^\\d+-\\d+-\\d+ \\d+:\\d+:\\d+.\\d+";
+                                    boolean isMatch = Pattern.matches(pattern, timeStampStr);
+                                    if (isMatch) {
+                                        sqlTimestamp = Timestamp.valueOf(timeStampStr);
+                                        preparedStatement.setTimestamp(columnIndex + 1, sqlTimestamp);
+                                        break;
+                                    }
+                                }
+                                try {
+                                    utilDate = column.asDate();
+                                } catch (DataXException e) {
+                                    throw new SQLException(String.format(
+                                            "Date type conversion error: [%s]", column));
+                                }
+
+                                if (null != utilDate) {
+                                    sqlTimestamp = new Timestamp(
+                                            utilDate.getTime());
+                                }
+                                preparedStatement.setTimestamp(columnIndex + 1, sqlTimestamp);
+                                break;
+
+                            case Types.BINARY:
+                            case Types.VARBINARY:
+                            case Types.BLOB:
+                            case Types.LONGVARBINARY:
+                                preparedStatement.setBytes(columnIndex + 1, column
+                                        .asBytes());
+                                break;
+
+                            case Types.BOOLEAN:
+
+                            // warn: bit(1) -> Types.BIT 可使用setBoolean
+                            // warn: bit(>1) -> Types.VARBINARY 可使用setBytes
+                            case Types.BIT:
+                                if (this.dataBaseType == DataBaseType.MySql) {
+                                    Boolean asBoolean = column.asBoolean();
+                                    if (asBoolean != null) {
+                                        preparedStatement.setBoolean(columnIndex + 1, asBoolean);
+                                    } else {
+                                        preparedStatement.setNull(columnIndex + 1, Types.BIT);
+                                    }
+                                } else {
+                                    preparedStatement.setString(columnIndex + 1, column.asString());
+                                }
+                                break;
+
+                            default:
+                                // cast variant / array into string is fine.
+                                preparedStatement.setString(columnIndex + 1, column.asString());
+                                break;
+                        }
+                        return preparedStatement;
+                    } catch (DataXException e) {
+                        // fix类型转换或者溢出失败时,将具体哪一列打印出来
+                        if (e.getErrorCode() == CommonErrorCode.CONVERT_NOT_SUPPORT ||
+                                e.getErrorCode() == CommonErrorCode.CONVERT_OVER_FLOW) {
+                            throw DataXException
+                                    .asDataXException(
+                                            e.getErrorCode(),
+                                            String.format(
+                                                    "type conversion error. columnName: [%s], columnType:[%d], columnJavaType: [%s]. please change the data type in given column field or do not sync on the column.",
+                                                    this.resultSetMetaData.getLeft()
+                                                            .get(columnIndex),
+                                                    this.resultSetMetaData.getMiddle()
+                                                            .get(columnIndex),
+                                                    this.resultSetMetaData.getRight()
+                                                            .get(columnIndex)));
+                        } else {
+                            throw e;
+                        }
+                    }
+                }
+
+            };
+            this.commonRdbmsWriterSlave.init(this.writerSliceConfig);
+        }
+
+        @Override
+        public void destroy()
+        {
+            this.commonRdbmsWriterSlave.destroy(this.writerSliceConfig);
+        }
+
+        @Override
+        public void prepare() {
+            this.commonRdbmsWriterSlave.prepare(this.writerSliceConfig);
+        }
+
+        @Override
+        public void post() {
+            this.commonRdbmsWriterSlave.post(this.writerSliceConfig);
+        }
+        @Override
+        public void startWrite(RecordReceiver lineReceiver)
+        {
+            this.commonRdbmsWriterSlave.startWrite(lineReceiver, this.writerSliceConfig, this.getTaskPluginCollector());
+        }
+
+    }
+}
diff --git a/databendwriter/src/main/java/com/alibaba/datax/plugin/writer/databendwriter/util/DatabendWriterUtil.java b/databendwriter/src/main/java/com/alibaba/datax/plugin/writer/databendwriter/util/DatabendWriterUtil.java
new file mode 100644
index 00000000..a862e920
--- /dev/null
+++ b/databendwriter/src/main/java/com/alibaba/datax/plugin/writer/databendwriter/util/DatabendWriterUtil.java
@@ -0,0 +1,40 @@
+package com.alibaba.datax.plugin.writer.databendwriter.util;
+import com.alibaba.datax.common.util.Configuration;
+import com.alibaba.datax.plugin.rdbms.writer.Constant;
+import com.alibaba.datax.plugin.rdbms.writer.Key;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.util.List;
+import java.util.StringJoiner;
+
+public final class DatabendWriterUtil
+{
+    private static final Logger LOG = LoggerFactory.getLogger(DatabendWriterUtil.class);
+
+    private DatabendWriterUtil() {}
+    public static void dealWriteMode(Configuration originalConfig)
+    {
+        List columns = originalConfig.getList(Key.COLUMN, String.class);
+
+        String jdbcUrl = originalConfig.getString(String.format("%s[0].%s",
+                Constant.CONN_MARK, Key.JDBC_URL, String.class));
+
+        String writeMode = originalConfig.getString(Key.WRITE_MODE, "INSERT");
+
+        StringBuilder writeDataSqlTemplate = new StringBuilder();
+        writeDataSqlTemplate.append("INSERT INTO %s");
+        StringJoiner columnString = new StringJoiner(",");
+
+        for (String column : columns) {
+            columnString.add(column);
+        }
+        writeDataSqlTemplate.append(String.format("(%s)", columnString));
+        writeDataSqlTemplate.append(" VALUES");
+
+        LOG.info("Write data [\n{}\n], which jdbcUrl like:[{}]", writeDataSqlTemplate, jdbcUrl);
+
+        originalConfig.set(Constant.INSERT_OR_REPLACE_TEMPLATE_MARK, writeDataSqlTemplate);
+    }
+}
\ No newline at end of file
diff --git a/databendwriter/src/main/resources/plugin.json b/databendwriter/src/main/resources/plugin.json
new file mode 100644
index 00000000..bab0130d
--- /dev/null
+++ b/databendwriter/src/main/resources/plugin.json
@@ -0,0 +1,6 @@
+{
+  "name": "databendwriter",
+  "class": "com.alibaba.datax.plugin.writer.databendwriter.DatabendWriter",
+  "description": "execute batch insert sql to write dataX data into databend",
+  "developer": "databend"
+}
\ No newline at end of file
diff --git a/databendwriter/src/main/resources/plugin_job_template.json b/databendwriter/src/main/resources/plugin_job_template.json
new file mode 100644
index 00000000..34d4b251
--- /dev/null
+++ b/databendwriter/src/main/resources/plugin_job_template.json
@@ -0,0 +1,19 @@
+{
+  "name": "databendwriter",
+  "parameter": {
+    "username": "username",
+    "password": "password",
+    "column": ["col1", "col2", "col3"],
+    "connection": [
+      {
+        "jdbcUrl": "jdbc:databend://:[/]",
+        "table": "table1"
+      }
+    ],
+    "preSql": [],
+    "postSql": [],
+
+    "maxBatchRows": 65536,
+    "maxBatchSize": 134217728
+  }
+}
\ No newline at end of file
diff --git a/datahubreader/pom.xml b/datahubreader/pom.xml
new file mode 100644
index 00000000..c0022b44
--- /dev/null
+++ b/datahubreader/pom.xml
@@ -0,0 +1,79 @@
+
+
+    
+        datax-all
+        com.alibaba.datax
+        0.0.1-SNAPSHOT
+    
+    4.0.0
+
+    datahubreader
+
+    0.0.1-SNAPSHOT
+
+    
+        
+            com.alibaba.datax
+            datax-common
+            ${datax-project-version}
+            
+                
+                    slf4j-log4j12
+                    org.slf4j
+                
+            
+        
+        
+            org.slf4j
+            slf4j-api
+        
+        
+            ch.qos.logback
+            logback-classic
+        
+        
+            com.aliyun.datahub
+            aliyun-sdk-datahub
+            2.21.6-public
+        
+        
+            junit
+            junit
+            4.12
+            test
+        
+    
+
+    
+        
+            
+            
+                maven-compiler-plugin
+                
+                    ${jdk-version}
+                    ${jdk-version}
+                    ${project-sourceEncoding}
+                
+            
+            
+            
+                maven-assembly-plugin
+                
+                    
+                        src/main/assembly/package.xml
+                    
+                    datax
+                
+                
+                    
+                        dwzip
+                        package
+                        
+                            single
+                        
+                    
+                
+            
+        
+    
+
diff --git a/datahubreader/src/main/assembly/package.xml b/datahubreader/src/main/assembly/package.xml
new file mode 100644
index 00000000..d14ea981
--- /dev/null
+++ b/datahubreader/src/main/assembly/package.xml
@@ -0,0 +1,34 @@
+
+    
+    
+        dir
+    
+    false
+    
+        
+            src/main/resources
+            
+                plugin.json
+            
+            plugin/reader/datahubreader
+        
+        
+            target/
+            
+                datahubreader-0.0.1-SNAPSHOT.jar
+            
+            plugin/reader/datahubreader
+        
+    
+
+    
+        
+            false
+            plugin/reader/datahubreader/libs
+            runtime
+        
+    
+
diff --git a/datahubreader/src/main/java/com/alibaba/datax/plugin/reader/datahubreader/Constant.java b/datahubreader/src/main/java/com/alibaba/datax/plugin/reader/datahubreader/Constant.java
new file mode 100644
index 00000000..bee3ccd7
--- /dev/null
+++ b/datahubreader/src/main/java/com/alibaba/datax/plugin/reader/datahubreader/Constant.java
@@ -0,0 +1,8 @@
+package com.alibaba.datax.plugin.reader.datahubreader;
+
+public class Constant {
+
+    public static String DATETIME_FORMAT = "yyyyMMddHHmmss";
+    public static String DATE_FORMAT = "yyyyMMdd";
+
+}
diff --git a/datahubreader/src/main/java/com/alibaba/datax/plugin/reader/datahubreader/DatahubClientHelper.java b/datahubreader/src/main/java/com/alibaba/datax/plugin/reader/datahubreader/DatahubClientHelper.java
new file mode 100644
index 00000000..2b7bcec4
--- /dev/null
+++ b/datahubreader/src/main/java/com/alibaba/datax/plugin/reader/datahubreader/DatahubClientHelper.java
@@ -0,0 +1,42 @@
+package com.alibaba.datax.plugin.reader.datahubreader;
+
+import com.alibaba.datax.common.util.Configuration;
+import com.alibaba.fastjson2.JSON;
+import com.alibaba.fastjson2.TypeReference;
+import com.aliyun.datahub.client.DatahubClient;
+import com.aliyun.datahub.client.DatahubClientBuilder;
+import com.aliyun.datahub.client.auth.Account;
+import com.aliyun.datahub.client.auth.AliyunAccount;
+import com.aliyun.datahub.client.common.DatahubConfig;
+import com.aliyun.datahub.client.http.HttpConfig;
+import org.apache.commons.lang3.StringUtils;
+
+public class DatahubClientHelper {
+    public static DatahubClient getDatahubClient(Configuration jobConfig) {
+        String accessId = jobConfig.getNecessaryValue(Key.CONFIG_KEY_ACCESS_ID,
+                DatahubWriterErrorCode.MISSING_REQUIRED_VALUE);
+        String accessKey = jobConfig.getNecessaryValue(Key.CONFIG_KEY_ACCESS_KEY,
+                DatahubWriterErrorCode.MISSING_REQUIRED_VALUE);
+        String endpoint = jobConfig.getNecessaryValue(Key.CONFIG_KEY_ENDPOINT,
+                DatahubWriterErrorCode.MISSING_REQUIRED_VALUE);
+        Account account = new AliyunAccount(accessId, accessKey);
+        // 是否开启二进制传输,服务端2.12版本开始支持
+        boolean enableBinary = jobConfig.getBool("enableBinary", false);
+        DatahubConfig datahubConfig = new DatahubConfig(endpoint, account, enableBinary);
+        // HttpConfig可不设置,不设置时采用默认值
+        // 读写数据推荐打开网络传输 LZ4压缩
+        HttpConfig httpConfig = null;
+        String httpConfigStr = jobConfig.getString("httpConfig");
+        if (StringUtils.isNotBlank(httpConfigStr)) {
+            httpConfig = JSON.parseObject(httpConfigStr, new TypeReference() {
+            });
+        }
+
+        DatahubClientBuilder builder = DatahubClientBuilder.newBuilder().setDatahubConfig(datahubConfig);
+        if (null != httpConfig) {
+            builder.setHttpConfig(httpConfig);
+        }
+        DatahubClient datahubClient = builder.build();
+        return datahubClient;
+    }
+}
diff --git a/datahubreader/src/main/java/com/alibaba/datax/plugin/reader/datahubreader/DatahubReader.java b/datahubreader/src/main/java/com/alibaba/datax/plugin/reader/datahubreader/DatahubReader.java
new file mode 100644
index 00000000..4792ac39
--- /dev/null
+++ b/datahubreader/src/main/java/com/alibaba/datax/plugin/reader/datahubreader/DatahubReader.java
@@ -0,0 +1,292 @@
+package com.alibaba.datax.plugin.reader.datahubreader;
+
+import java.text.ParseException;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+
+import com.aliyun.datahub.client.model.*;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import com.alibaba.datax.common.element.Column;
+import com.alibaba.datax.common.element.Record;
+import com.alibaba.datax.common.element.StringColumn;
+import com.alibaba.datax.common.exception.DataXException;
+import com.alibaba.datax.common.plugin.RecordSender;
+import com.alibaba.datax.common.spi.Reader;
+import com.alibaba.datax.common.util.Configuration;
+
+
+import com.aliyun.datahub.client.DatahubClient;
+
+
+public class DatahubReader extends Reader {
+    public static class Job extends Reader.Job {
+        private static final Logger LOG = LoggerFactory.getLogger(Job.class);
+        
+        private Configuration originalConfig;
+        
+        private Long beginTimestampMillis;
+        private Long endTimestampMillis;
+        
+        DatahubClient datahubClient;
+        
+        @Override
+        public void init() {
+            LOG.info("datahub reader job init begin ...");
+            this.originalConfig = super.getPluginJobConf();
+            validateParameter(originalConfig);
+            this.datahubClient = DatahubClientHelper.getDatahubClient(this.originalConfig);
+            LOG.info("datahub reader job init end.");
+        }
+        
+        private void validateParameter(Configuration conf){
+            conf.getNecessaryValue(Key.ENDPOINT,DatahubReaderErrorCode.REQUIRE_VALUE);
+            conf.getNecessaryValue(Key.ACCESSKEYID,DatahubReaderErrorCode.REQUIRE_VALUE);
+            conf.getNecessaryValue(Key.ACCESSKEYSECRET,DatahubReaderErrorCode.REQUIRE_VALUE);
+            conf.getNecessaryValue(Key.PROJECT,DatahubReaderErrorCode.REQUIRE_VALUE);
+            conf.getNecessaryValue(Key.TOPIC,DatahubReaderErrorCode.REQUIRE_VALUE);
+            conf.getNecessaryValue(Key.COLUMN,DatahubReaderErrorCode.REQUIRE_VALUE);
+            conf.getNecessaryValue(Key.BEGINDATETIME,DatahubReaderErrorCode.REQUIRE_VALUE);
+            conf.getNecessaryValue(Key.ENDDATETIME,DatahubReaderErrorCode.REQUIRE_VALUE);
+            
+            int batchSize = this.originalConfig.getInt(Key.BATCHSIZE, 1024);
+            if (batchSize > 10000) {
+                throw DataXException.asDataXException(DatahubReaderErrorCode.BAD_CONFIG_VALUE,
+                        "Invalid batchSize[" + batchSize + "] value (0,10000]!");
+            }
+            
+            String beginDateTime = this.originalConfig.getString(Key.BEGINDATETIME);            
+            if (beginDateTime != null) {
+                try {
+                    beginTimestampMillis = DatahubReaderUtils.getUnixTimeFromDateTime(beginDateTime);
+                } catch (ParseException e) {
+                    throw DataXException.asDataXException(DatahubReaderErrorCode.BAD_CONFIG_VALUE,
+                            "Invalid beginDateTime[" + beginDateTime + "], format [yyyyMMddHHmmss]!");    
+                }
+            }
+            
+            if (beginTimestampMillis != null && beginTimestampMillis <= 0) {
+                throw DataXException.asDataXException(DatahubReaderErrorCode.BAD_CONFIG_VALUE,
+                        "Invalid beginTimestampMillis[" + beginTimestampMillis + "]!");               
+            }
+            
+            String endDateTime = this.originalConfig.getString(Key.ENDDATETIME);            
+            if (endDateTime != null) {
+                try {
+                    endTimestampMillis = DatahubReaderUtils.getUnixTimeFromDateTime(endDateTime);
+                } catch (ParseException e) {
+                    throw DataXException.asDataXException(DatahubReaderErrorCode.BAD_CONFIG_VALUE,
+                            "Invalid beginDateTime[" + endDateTime + "], format [yyyyMMddHHmmss]!");    
+                }
+            }
+            
+            if (endTimestampMillis != null && endTimestampMillis <= 0) {
+                throw DataXException.asDataXException(DatahubReaderErrorCode.BAD_CONFIG_VALUE,
+                        "Invalid endTimestampMillis[" + endTimestampMillis + "]!");                
+            }
+            
+            if (beginTimestampMillis != null && endTimestampMillis != null
+                    && endTimestampMillis <= beginTimestampMillis) {
+                throw DataXException.asDataXException(DatahubReaderErrorCode.BAD_CONFIG_VALUE,
+                        "endTimestampMillis[" + endTimestampMillis + "] must bigger than beginTimestampMillis[" + beginTimestampMillis + "]!");  
+            }
+        }
+        
+        @Override
+        public void prepare() {
+            // create datahub client
+            String project = originalConfig.getNecessaryValue(Key.PROJECT, DatahubReaderErrorCode.REQUIRE_VALUE);
+            String topic = originalConfig.getNecessaryValue(Key.TOPIC, DatahubReaderErrorCode.REQUIRE_VALUE);
+            RecordType recordType = null;
+            try {
+                DatahubClient client = DatahubClientHelper.getDatahubClient(this.originalConfig);
+                GetTopicResult getTopicResult = client.getTopic(project, topic);
+                recordType = getTopicResult.getRecordType();
+            } catch (Exception e) {
+                LOG.warn("get topic type error: {}", e.getMessage());
+            }
+            if (null != recordType) {
+                if (recordType == RecordType.BLOB) {
+                    throw DataXException.asDataXException(DatahubReaderErrorCode.BAD_CONFIG_VALUE,
+                            "DatahubReader only support 'Tuple' RecordType now, but your RecordType is 'BLOB'");
+                }
+            }
+        }
+
+        @Override
+        public void destroy() {
+        }
+
+        @Override
+        public List split(int adviceNumber) {
+            LOG.info("split() begin...");
+            
+            List readerSplitConfigs = new ArrayList();
+            
+            String project = this.originalConfig.getString(Key.PROJECT);
+            String topic = this.originalConfig.getString(Key.TOPIC);
+            
+            List shardEntrys = DatahubReaderUtils.getShardsWithRetry(this.datahubClient, project, topic);
+            if (shardEntrys == null || shardEntrys.isEmpty() || shardEntrys.size() == 0) {
+                throw DataXException.asDataXException(DatahubReaderErrorCode.BAD_CONFIG_VALUE,
+                        "Project [" + project + "] Topic [" + topic + "] has no shards, please check !");       
+            }
+            
+            for (ShardEntry shardEntry : shardEntrys) {
+                Configuration splitedConfig = this.originalConfig.clone();
+                splitedConfig.set(Key.SHARDID, shardEntry.getShardId());
+                readerSplitConfigs.add(splitedConfig);
+            }
+            
+            LOG.info("split() ok and end...");
+            return readerSplitConfigs;
+        }
+        
+    }
+    
+    public static class Task extends Reader.Task {
+        private static final Logger LOG = LoggerFactory.getLogger(Task.class);
+        
+        private Configuration taskConfig;
+        
+        private String accessId;
+        private String accessKey;
+        private String endpoint;
+        private String project;
+        private String topic;
+        private String shardId;
+        private Long beginTimestampMillis;
+        private Long endTimestampMillis;
+        private int batchSize;
+        private List columns;
+        private RecordSchema schema;
+        private String timeStampUnit;
+        
+        DatahubClient datahubClient;
+        
+        @Override
+        public void init() {
+            this.taskConfig = super.getPluginJobConf();
+            
+            this.accessId = this.taskConfig.getString(Key.ACCESSKEYID);
+            this.accessKey = this.taskConfig.getString(Key.ACCESSKEYSECRET);
+            this.endpoint = this.taskConfig.getString(Key.ENDPOINT);
+            this.project = this.taskConfig.getString(Key.PROJECT);
+            this.topic = this.taskConfig.getString(Key.TOPIC);
+            this.shardId = this.taskConfig.getString(Key.SHARDID);
+            this.batchSize = this.taskConfig.getInt(Key.BATCHSIZE, 1024);
+            this.timeStampUnit = this.taskConfig.getString(Key.TIMESTAMP_UNIT, "MICROSECOND");
+            try {
+                this.beginTimestampMillis = DatahubReaderUtils.getUnixTimeFromDateTime(this.taskConfig.getString(Key.BEGINDATETIME));
+            } catch (ParseException e) {                
+            }
+            
+            try {
+                this.endTimestampMillis = DatahubReaderUtils.getUnixTimeFromDateTime(this.taskConfig.getString(Key.ENDDATETIME));
+            } catch (ParseException e) {                
+            }
+            
+            this.columns = this.taskConfig.getList(Key.COLUMN, String.class);
+            
+            this.datahubClient = DatahubClientHelper.getDatahubClient(this.taskConfig);
+
+
+            this.schema = DatahubReaderUtils.getDatahubSchemaWithRetry(this.datahubClient, this.project, topic);
+            
+            LOG.info("init datahub reader task finished.project:{} topic:{} batchSize:{}", project, topic, batchSize);
+        }
+
+        @Override
+        public void destroy() {
+        }
+
+        @Override
+        public void startRead(RecordSender recordSender) {
+            LOG.info("read start");
+            
+            String beginCursor = DatahubReaderUtils.getCursorWithRetry(this.datahubClient, this.project, 
+                    this.topic, this.shardId, this.beginTimestampMillis);
+            String endCursor = DatahubReaderUtils.getCursorWithRetry(this.datahubClient, this.project, 
+                    this.topic, this.shardId, this.endTimestampMillis);
+            
+            if (beginCursor == null) {
+                LOG.info("Shard:{} has no data!", this.shardId);
+                return;
+            } else if (endCursor == null) {
+                endCursor = DatahubReaderUtils.getLatestCursorWithRetry(this.datahubClient, this.project,
+                        this.topic, this.shardId);
+            }
+            
+            String curCursor = beginCursor;
+            
+            boolean exit = false;
+            
+            while (true) {
+                
+                GetRecordsResult result = DatahubReaderUtils.getRecordsResultWithRetry(this.datahubClient, this.project, this.topic,
+                        this.shardId, this.batchSize, curCursor, this.schema);
+                                
+                List records = result.getRecords();
+                if (records.size() > 0) {
+                    for (RecordEntry record : records) {
+                        if (record.getSystemTime() >= this.endTimestampMillis) {
+                            exit = true;
+                            break;
+                        }
+                        
+                        HashMap dataMap = new HashMap();
+                        List fields = ((TupleRecordData) record.getRecordData()).getRecordSchema().getFields();
+                        for (int i = 0; i < fields.size(); i++) {
+                            Field field = fields.get(i);
+                            Column column = DatahubReaderUtils.getColumnFromField(record, field, this.timeStampUnit);
+                            dataMap.put(field.getName(), column);
+                        }
+                        
+                        Record dataxRecord = recordSender.createRecord();
+                        
+                        if (null != this.columns && 1 == this.columns.size()) {
+                            String columnsInStr = columns.get(0).toString();
+                            if ("\"*\"".equals(columnsInStr) || "*".equals(columnsInStr)) {
+                                for (int i = 0; i < fields.size(); i++) {
+                                    dataxRecord.addColumn(dataMap.get(fields.get(i).getName()));
+                                }
+
+                            } else {
+                                if (dataMap.containsKey(columnsInStr)) {
+                                    dataxRecord.addColumn(dataMap.get(columnsInStr));
+                                } else {
+                                    dataxRecord.addColumn(new StringColumn(null));
+                                }
+                            }
+                        } else {
+                            for (String col : this.columns) {
+                                if (dataMap.containsKey(col)) {
+                                    dataxRecord.addColumn(dataMap.get(col));
+                                } else {
+                                    dataxRecord.addColumn(new StringColumn(null));
+                                }
+                            }
+                        }                         
+
+                        recordSender.sendToWriter(dataxRecord);                           
+                    }
+                } else {
+                    break;
+                }
+                
+                if (exit) {
+                    break;
+                }
+                
+                curCursor = result.getNextCursor();
+            }
+            
+            
+            LOG.info("end read datahub shard...");
+        }
+        
+    }
+
+}
diff --git a/datahubreader/src/main/java/com/alibaba/datax/plugin/reader/datahubreader/DatahubReaderErrorCode.java b/datahubreader/src/main/java/com/alibaba/datax/plugin/reader/datahubreader/DatahubReaderErrorCode.java
new file mode 100644
index 00000000..949a66f0
--- /dev/null
+++ b/datahubreader/src/main/java/com/alibaba/datax/plugin/reader/datahubreader/DatahubReaderErrorCode.java
@@ -0,0 +1,35 @@
+package com.alibaba.datax.plugin.reader.datahubreader;
+
+import com.alibaba.datax.common.spi.ErrorCode;
+
+public enum DatahubReaderErrorCode implements ErrorCode {
+    BAD_CONFIG_VALUE("DatahubReader-00", "The value you configured is invalid."),
+    LOG_HUB_ERROR("DatahubReader-01","Datahub exception"),
+    REQUIRE_VALUE("DatahubReader-02","Missing parameters"),
+    EMPTY_LOGSTORE_VALUE("DatahubReader-03","There is no shard under this LogStore");
+
+
+    private final String code;
+    private final String description;
+
+    private DatahubReaderErrorCode(String code, String description) {
+        this.code = code;
+        this.description = description;
+    }
+
+    @Override
+    public String getCode() {
+        return this.code;
+    }
+
+    @Override
+    public String getDescription() {
+        return this.description;
+    }
+
+    @Override
+    public String toString() {
+        return String.format("Code:[%s], Description:[%s]. ", this.code,
+                this.description);
+    }
+}
diff --git a/datahubreader/src/main/java/com/alibaba/datax/plugin/reader/datahubreader/DatahubReaderUtils.java b/datahubreader/src/main/java/com/alibaba/datax/plugin/reader/datahubreader/DatahubReaderUtils.java
new file mode 100644
index 00000000..6c3455df
--- /dev/null
+++ b/datahubreader/src/main/java/com/alibaba/datax/plugin/reader/datahubreader/DatahubReaderUtils.java
@@ -0,0 +1,200 @@
+package com.alibaba.datax.plugin.reader.datahubreader;
+
+import java.math.BigDecimal;
+import java.text.ParseException;
+import java.text.SimpleDateFormat;
+import java.util.Date;
+import java.util.List;
+import java.util.concurrent.Callable;
+
+import com.alibaba.datax.common.element.*;
+import com.alibaba.datax.common.exception.DataXException;
+import com.alibaba.datax.common.util.DataXCaseEnvUtil;
+import com.alibaba.datax.common.util.RetryUtil;
+
+import com.aliyun.datahub.client.DatahubClient;
+import com.aliyun.datahub.client.exception.InvalidParameterException;
+import com.aliyun.datahub.client.model.*;
+
+public class DatahubReaderUtils {
+
+    public static long getUnixTimeFromDateTime(String dateTime) throws ParseException {
+        try {
+            String format = Constant.DATETIME_FORMAT;
+            SimpleDateFormat simpleDateFormat = new SimpleDateFormat(format);
+            return simpleDateFormat.parse(dateTime).getTime();
+        } catch (ParseException ignored) {
+            throw DataXException.asDataXException(DatahubReaderErrorCode.BAD_CONFIG_VALUE,
+                    "Invalid DateTime[" + dateTime + "]!");   
+        }
+    }
+    
+    public static List getShardsWithRetry(final DatahubClient datahubClient, final String project, final String topic) {
+        
+        List shards = null;
+        try {
+            shards = RetryUtil.executeWithRetry(new Callable>() {
+                @Override
+                public List call() throws Exception {
+                    ListShardResult listShardResult = datahubClient.listShard(project, topic);
+                    return listShardResult.getShards(); 
+                }
+            }, DataXCaseEnvUtil.getRetryTimes(7), DataXCaseEnvUtil.getRetryInterval(1000L), DataXCaseEnvUtil.getRetryExponential(true));
+            
+        } catch (Exception e) {
+            throw DataXException.asDataXException(DatahubReaderErrorCode.BAD_CONFIG_VALUE,
+                    "get Shards error, please check ! detail error messsage: " + e.toString());
+        }         
+        return shards;
+    }
+    
+    public static String getCursorWithRetry(final DatahubClient datahubClient, final String project, final String topic, 
+            final String shardId, final long timestamp) {
+        
+        String cursor;
+        try {
+            cursor = RetryUtil.executeWithRetry(new Callable() {
+                @Override
+                public String call() throws Exception {
+                    try {
+                        return datahubClient.getCursor(project, topic, shardId, CursorType.SYSTEM_TIME, timestamp).getCursor();
+                    } catch (InvalidParameterException e) {
+                        if (e.getErrorMessage().indexOf("Time in seek request is out of range") >= 0) {
+                            return null;
+                        } else {
+                            throw e;
+                        }
+                        
+                    }
+                }
+            }, DataXCaseEnvUtil.getRetryTimes(7), DataXCaseEnvUtil.getRetryInterval(1000L), DataXCaseEnvUtil.getRetryExponential(true));
+            
+        } catch (Exception e) {
+            throw DataXException.asDataXException(DatahubReaderErrorCode.BAD_CONFIG_VALUE,
+                    "get Cursor error, please check ! detail error messsage: " + e.toString());
+        }         
+        return cursor;
+    }
+    
+    public static String getLatestCursorWithRetry(final DatahubClient datahubClient, final String project, final String topic,
+            final String shardId) {
+        
+        String cursor;
+        try {
+            cursor = RetryUtil.executeWithRetry(new Callable() {
+                @Override
+                public String call() throws Exception {
+                    return datahubClient.getCursor(project, topic, shardId, CursorType.LATEST).getCursor();
+                }
+            }, DataXCaseEnvUtil.getRetryTimes(7), DataXCaseEnvUtil.getRetryInterval(1000L), DataXCaseEnvUtil.getRetryExponential(true));
+            
+        } catch (Exception e) {
+            throw DataXException.asDataXException(DatahubReaderErrorCode.BAD_CONFIG_VALUE,
+                    "get Cursor error, please check ! detail error messsage: " + e.toString());
+        }         
+        return cursor;
+    }    
+    
+    public static RecordSchema getDatahubSchemaWithRetry(final DatahubClient datahubClient, final String project, final String topic) {
+        
+        RecordSchema schema;
+        try {
+            schema = RetryUtil.executeWithRetry(new Callable() {
+                @Override
+                public RecordSchema call() throws Exception {
+                    return datahubClient.getTopic(project, topic).getRecordSchema();
+                }
+            }, DataXCaseEnvUtil.getRetryTimes(7), DataXCaseEnvUtil.getRetryInterval(1000L), DataXCaseEnvUtil.getRetryExponential(true));
+            
+        } catch (Exception e) {
+            throw DataXException.asDataXException(DatahubReaderErrorCode.BAD_CONFIG_VALUE,
+                    "get Topic Schema error, please check ! detail error messsage: " + e.toString());
+        }         
+        return schema;
+    } 
+    
+    public static GetRecordsResult getRecordsResultWithRetry(final DatahubClient datahubClient, final String project,
+            final String topic, final String shardId, final int batchSize, final String cursor, final RecordSchema schema) {
+        
+        GetRecordsResult result;
+        try  {
+            result = RetryUtil.executeWithRetry(new Callable() {
+                @Override
+                public GetRecordsResult call() throws Exception {
+                    return datahubClient.getRecords(project, topic, shardId, schema, cursor, batchSize);
+                }
+            }, DataXCaseEnvUtil.getRetryTimes(7), DataXCaseEnvUtil.getRetryInterval(1000L), DataXCaseEnvUtil.getRetryExponential(true));
+            
+        } catch (Exception e) {
+            throw DataXException.asDataXException(DatahubReaderErrorCode.BAD_CONFIG_VALUE,
+                    "get Record Result error, please check ! detail error messsage: " + e.toString());
+        }     
+        return result;
+        
+    }
+    
+    public static Column getColumnFromField(RecordEntry record, Field field, String timeStampUnit) {
+        Column col = null;
+        TupleRecordData o = (TupleRecordData) record.getRecordData();
+
+        switch (field.getType()) {
+            case SMALLINT:
+                Short shortValue = ((Short) o.getField(field.getName()));
+                col = new LongColumn(shortValue == null ? null: shortValue.longValue());
+                break;
+            case INTEGER:
+                col = new LongColumn((Integer) o.getField(field.getName()));
+                break;
+            case BIGINT: {
+                col = new LongColumn((Long) o.getField(field.getName()));
+                break;
+            }
+            case TINYINT: {
+                Byte byteValue = ((Byte) o.getField(field.getName()));
+                col = new LongColumn(byteValue == null ? null : byteValue.longValue());
+                break;
+            }
+            case BOOLEAN: {
+                col = new BoolColumn((Boolean) o.getField(field.getName()));
+                break;
+            }
+            case FLOAT:
+                col = new DoubleColumn((Float) o.getField(field.getName()));
+                break;
+            case DOUBLE: {
+                col = new DoubleColumn((Double) o.getField(field.getName()));
+                break;
+            }
+            case STRING: {
+                col = new StringColumn((String) o.getField(field.getName()));
+                break;
+            }
+            case DECIMAL: {
+                BigDecimal value = (BigDecimal) o.getField(field.getName());
+                col = new DoubleColumn(value == null ? null : value.doubleValue());
+                break;
+            }
+            case TIMESTAMP: {
+                Long value = (Long) o.getField(field.getName());
+
+                if ("MILLISECOND".equals(timeStampUnit)) {
+                    // MILLISECOND, 13位精度,直接 new Date()
+                    col = new DateColumn(value == null ? null : new Date(value));
+                }
+                else if ("SECOND".equals(timeStampUnit)){
+                    col = new DateColumn(value == null ? null : new Date(value * 1000));
+                }
+                else {
+                    // 默认都是 MICROSECOND, 16位精度, 和之前的逻辑保持一致。
+                    col = new DateColumn(value == null ? null : new Date(value / 1000));
+                }
+                break;
+            }
+            default:
+                throw new RuntimeException("Unknown column type: " + field.getType());
+        }
+        
+        return col;
+    }
+    
+}
diff --git a/datahubreader/src/main/java/com/alibaba/datax/plugin/reader/datahubreader/DatahubWriterErrorCode.java b/datahubreader/src/main/java/com/alibaba/datax/plugin/reader/datahubreader/DatahubWriterErrorCode.java
new file mode 100644
index 00000000..c8633ea8
--- /dev/null
+++ b/datahubreader/src/main/java/com/alibaba/datax/plugin/reader/datahubreader/DatahubWriterErrorCode.java
@@ -0,0 +1,37 @@
+package com.alibaba.datax.plugin.reader.datahubreader;
+
+import com.alibaba.datax.common.spi.ErrorCode;
+import com.alibaba.datax.common.util.MessageSource;
+
+public enum DatahubWriterErrorCode implements ErrorCode {
+    MISSING_REQUIRED_VALUE("DatahubWriter-01", MessageSource.loadResourceBundle(DatahubWriterErrorCode.class).message("errorcode.missing_required_value")),
+    INVALID_CONFIG_VALUE("DatahubWriter-02", MessageSource.loadResourceBundle(DatahubWriterErrorCode.class).message("errorcode.invalid_config_value")),
+    GET_TOPOIC_INFO_FAIL("DatahubWriter-03", MessageSource.loadResourceBundle(DatahubWriterErrorCode.class).message("errorcode.get_topic_info_fail")),
+    WRITE_DATAHUB_FAIL("DatahubWriter-04", MessageSource.loadResourceBundle(DatahubWriterErrorCode.class).message("errorcode.write_datahub_fail")),
+    SCHEMA_NOT_MATCH("DatahubWriter-05", MessageSource.loadResourceBundle(DatahubWriterErrorCode.class).message("errorcode.schema_not_match")),
+    ;
+
+    private final String code;
+    private final String description;
+
+    private DatahubWriterErrorCode(String code, String description) {
+        this.code = code;
+        this.description = description;
+    }
+
+    @Override
+    public String getCode() {
+        return this.code;
+    }
+
+    @Override
+    public String getDescription() {
+        return this.description;
+    }
+
+    @Override
+    public String toString() {
+        return String.format("Code:[%s], Description:[%s]. ", this.code,
+                this.description);
+    }
+}
\ No newline at end of file
diff --git a/datahubreader/src/main/java/com/alibaba/datax/plugin/reader/datahubreader/Key.java b/datahubreader/src/main/java/com/alibaba/datax/plugin/reader/datahubreader/Key.java
new file mode 100644
index 00000000..3cb84b4b
--- /dev/null
+++ b/datahubreader/src/main/java/com/alibaba/datax/plugin/reader/datahubreader/Key.java
@@ -0,0 +1,35 @@
+package com.alibaba.datax.plugin.reader.datahubreader;
+
+public final class Key {
+
+    /**
+     * 此处声明插件用到的需要插件使用者提供的配置项
+     */
+    public static final String ENDPOINT = "endpoint";
+
+    public static final String ACCESSKEYID = "accessId";
+
+    public static final String ACCESSKEYSECRET = "accessKey";
+
+    public static final String PROJECT = "project";
+    
+    public static final String TOPIC = "topic";
+        
+    public static final String BEGINDATETIME = "beginDateTime";
+    
+    public static final String ENDDATETIME = "endDateTime";
+
+    public static final String BATCHSIZE = "batchSize";
+    
+    public static final String COLUMN = "column";
+    
+    public static final String SHARDID = "shardId";
+
+    public static final String CONFIG_KEY_ENDPOINT = "endpoint";
+    public static final String CONFIG_KEY_ACCESS_ID = "accessId";
+    public static final String CONFIG_KEY_ACCESS_KEY = "accessKey";
+
+
+    public static final String TIMESTAMP_UNIT = "timeStampUnit";
+    
+}
\ No newline at end of file
diff --git a/datahubreader/src/main/java/com/alibaba/datax/plugin/reader/datahubreader/LocalStrings.properties b/datahubreader/src/main/java/com/alibaba/datax/plugin/reader/datahubreader/LocalStrings.properties
new file mode 100644
index 00000000..e85c8ab3
--- /dev/null
+++ b/datahubreader/src/main/java/com/alibaba/datax/plugin/reader/datahubreader/LocalStrings.properties
@@ -0,0 +1,5 @@
+errorcode.missing_required_value=\u60A8\u7F3A\u5931\u4E86\u5FC5\u987B\u586B\u5199\u7684\u53C2\u6570\u503C.
+errorcode.invalid_config_value=\u60A8\u7684\u53C2\u6570\u914D\u7F6E\u9519\u8BEF.
+errorcode.get_topic_info_fail=\u83B7\u53D6shard\u5217\u8868\u5931\u8D25.
+errorcode.write_datahub_fail=\u5199\u6570\u636E\u5931\u8D25.
+errorcode.schema_not_match=\u6570\u636E\u683C\u5F0F\u9519\u8BEF.
diff --git a/datahubreader/src/main/java/com/alibaba/datax/plugin/reader/datahubreader/LocalStrings_en_US.properties b/datahubreader/src/main/java/com/alibaba/datax/plugin/reader/datahubreader/LocalStrings_en_US.properties
new file mode 100644
index 00000000..31a291e6
--- /dev/null
+++ b/datahubreader/src/main/java/com/alibaba/datax/plugin/reader/datahubreader/LocalStrings_en_US.properties
@@ -0,0 +1,5 @@
+errorcode.missing_required_value=\u60A8\u7F3A\u5931\u4E86\u5FC5\u987B\u586B\u5199\u7684\u53C2\u6570\u503C.
+errorcode.invalid_config_value=\u60A8\u7684\u53C2\u6570\u914D\u7F6E\u9519\u8BEF.
+errorcode.get_topic_info_fail=\u83B7\u53D6shard\u5217\u8868\u5931\u8D25.
+errorcode.write_datahub_fail=\u5199\u6570\u636E\u5931\u8D25.
+errorcode.schema_not_match=\u6570\u636E\u683C\u5F0F\u9519\u8BEF.
\ No newline at end of file
diff --git a/datahubreader/src/main/java/com/alibaba/datax/plugin/reader/datahubreader/LocalStrings_ja_JP.properties b/datahubreader/src/main/java/com/alibaba/datax/plugin/reader/datahubreader/LocalStrings_ja_JP.properties
new file mode 100644
index 00000000..31a291e6
--- /dev/null
+++ b/datahubreader/src/main/java/com/alibaba/datax/plugin/reader/datahubreader/LocalStrings_ja_JP.properties
@@ -0,0 +1,5 @@
+errorcode.missing_required_value=\u60A8\u7F3A\u5931\u4E86\u5FC5\u987B\u586B\u5199\u7684\u53C2\u6570\u503C.
+errorcode.invalid_config_value=\u60A8\u7684\u53C2\u6570\u914D\u7F6E\u9519\u8BEF.
+errorcode.get_topic_info_fail=\u83B7\u53D6shard\u5217\u8868\u5931\u8D25.
+errorcode.write_datahub_fail=\u5199\u6570\u636E\u5931\u8D25.
+errorcode.schema_not_match=\u6570\u636E\u683C\u5F0F\u9519\u8BEF.
\ No newline at end of file
diff --git a/datahubreader/src/main/java/com/alibaba/datax/plugin/reader/datahubreader/LocalStrings_zh_CN.properties b/datahubreader/src/main/java/com/alibaba/datax/plugin/reader/datahubreader/LocalStrings_zh_CN.properties
new file mode 100644
index 00000000..31a291e6
--- /dev/null
+++ b/datahubreader/src/main/java/com/alibaba/datax/plugin/reader/datahubreader/LocalStrings_zh_CN.properties
@@ -0,0 +1,5 @@
+errorcode.missing_required_value=\u60A8\u7F3A\u5931\u4E86\u5FC5\u987B\u586B\u5199\u7684\u53C2\u6570\u503C.
+errorcode.invalid_config_value=\u60A8\u7684\u53C2\u6570\u914D\u7F6E\u9519\u8BEF.
+errorcode.get_topic_info_fail=\u83B7\u53D6shard\u5217\u8868\u5931\u8D25.
+errorcode.write_datahub_fail=\u5199\u6570\u636E\u5931\u8D25.
+errorcode.schema_not_match=\u6570\u636E\u683C\u5F0F\u9519\u8BEF.
\ No newline at end of file
diff --git a/datahubreader/src/main/java/com/alibaba/datax/plugin/reader/datahubreader/LocalStrings_zh_HK.properties b/datahubreader/src/main/java/com/alibaba/datax/plugin/reader/datahubreader/LocalStrings_zh_HK.properties
new file mode 100644
index 00000000..c6a3a0e0
--- /dev/null
+++ b/datahubreader/src/main/java/com/alibaba/datax/plugin/reader/datahubreader/LocalStrings_zh_HK.properties
@@ -0,0 +1,9 @@
+errorcode.missing_required_value=\u60A8\u7F3A\u5931\u4E86\u5FC5\u987B\u586B\u5199\u7684\u53C2\u6570\u503C.
+errorcode.invalid_config_value=\u60A8\u7684\u53C2\u6570\u914D\u7F6E\u9519\u8BEF.
+errorcode.get_topic_info_fail=\u83B7\u53D6shard\u5217\u8868\u5931\u8D25.
+errorcode.write_datahub_fail=\u5199\u6570\u636E\u5931\u8D25.
+errorcode.schema_not_match=\u6570\u636E\u683C\u5F0F\u9519\u8BEF.errorcode.missing_required_value=您缺失了必須填寫的參數值.
+errorcode.invalid_config_value=您的參數配寘錯誤.
+errorcode.get_topic_info_fail=獲取shard清單失敗.
+errorcode.write_datahub_fail=寫數據失敗.
+errorcode.schema_not_match=數據格式錯誤.
diff --git a/datahubreader/src/main/java/com/alibaba/datax/plugin/reader/datahubreader/LocalStrings_zh_TW.properties b/datahubreader/src/main/java/com/alibaba/datax/plugin/reader/datahubreader/LocalStrings_zh_TW.properties
new file mode 100644
index 00000000..c6a3a0e0
--- /dev/null
+++ b/datahubreader/src/main/java/com/alibaba/datax/plugin/reader/datahubreader/LocalStrings_zh_TW.properties
@@ -0,0 +1,9 @@
+errorcode.missing_required_value=\u60A8\u7F3A\u5931\u4E86\u5FC5\u987B\u586B\u5199\u7684\u53C2\u6570\u503C.
+errorcode.invalid_config_value=\u60A8\u7684\u53C2\u6570\u914D\u7F6E\u9519\u8BEF.
+errorcode.get_topic_info_fail=\u83B7\u53D6shard\u5217\u8868\u5931\u8D25.
+errorcode.write_datahub_fail=\u5199\u6570\u636E\u5931\u8D25.
+errorcode.schema_not_match=\u6570\u636E\u683C\u5F0F\u9519\u8BEF.errorcode.missing_required_value=您缺失了必須填寫的參數值.
+errorcode.invalid_config_value=您的參數配寘錯誤.
+errorcode.get_topic_info_fail=獲取shard清單失敗.
+errorcode.write_datahub_fail=寫數據失敗.
+errorcode.schema_not_match=數據格式錯誤.
diff --git a/datahubreader/src/main/resources/job_config_template.json b/datahubreader/src/main/resources/job_config_template.json
new file mode 100644
index 00000000..eaf89804
--- /dev/null
+++ b/datahubreader/src/main/resources/job_config_template.json
@@ -0,0 +1,14 @@
+{
+    "name": "datahubreader",
+    "parameter": {
+        "endpoint":"",
+        "accessId": "",
+        "accessKey": "",
+        "project": "",
+        "topic": "",
+        "beginDateTime": "20180913121019",
+        "endDateTime": "20180913121119",
+        "batchSize": 1024,
+        "column": []
+    }
+}
\ No newline at end of file
diff --git a/datahubreader/src/main/resources/plugin.json b/datahubreader/src/main/resources/plugin.json
new file mode 100644
index 00000000..47b1c86b
--- /dev/null
+++ b/datahubreader/src/main/resources/plugin.json
@@ -0,0 +1,6 @@
+{
+    "name": "datahubreader",
+    "class": "com.alibaba.datax.plugin.reader.datahubreader.DatahubReader",
+    "description": "datahub reader",
+    "developer": "alibaba"
+}
\ No newline at end of file
diff --git a/datahubwriter/pom.xml b/datahubwriter/pom.xml
new file mode 100644
index 00000000..1ee1fe9b
--- /dev/null
+++ b/datahubwriter/pom.xml
@@ -0,0 +1,79 @@
+
+
+    
+        datax-all
+        com.alibaba.datax
+        0.0.1-SNAPSHOT
+    
+    4.0.0
+
+    datahubwriter
+
+    0.0.1-SNAPSHOT
+
+    
+        
+            com.alibaba.datax
+            datax-common
+            ${datax-project-version}
+            
+                
+                    slf4j-log4j12
+                    org.slf4j
+                
+            
+        
+        
+            org.slf4j
+            slf4j-api
+        
+        
+            ch.qos.logback
+            logback-classic
+        
+        
+            com.aliyun.datahub
+            aliyun-sdk-datahub
+            2.21.6-public
+        
+        
+            junit
+            junit
+            4.12
+            test
+        
+    
+
+    
+        
+            
+            
+                maven-compiler-plugin
+                
+                    ${jdk-version}
+                    ${jdk-version}
+                    ${project-sourceEncoding}
+                
+            
+            
+            
+                maven-assembly-plugin
+                
+                    
+                        src/main/assembly/package.xml
+                    
+                    datax
+                
+                
+                    
+                        dwzip
+                        package
+                        
+                            single
+                        
+                    
+                
+            
+        
+    
+
diff --git a/datahubwriter/src/main/assembly/package.xml b/datahubwriter/src/main/assembly/package.xml
new file mode 100644
index 00000000..aaef9f99
--- /dev/null
+++ b/datahubwriter/src/main/assembly/package.xml
@@ -0,0 +1,34 @@
+
+    
+    
+        dir
+    
+    false
+    
+        
+            src/main/resources
+            
+                plugin.json
+            
+            plugin/writer/datahubwriter
+        
+        
+            target/
+            
+                datahubwriter-0.0.1-SNAPSHOT.jar
+            
+            plugin/writer/datahubwriter
+        
+    
+
+    
+        
+            false
+            plugin/writer/datahubwriter/libs
+            runtime
+        
+    
+
diff --git a/datahubwriter/src/main/java/com/alibaba/datax/plugin/writer/datahubwriter/DatahubClientHelper.java b/datahubwriter/src/main/java/com/alibaba/datax/plugin/writer/datahubwriter/DatahubClientHelper.java
new file mode 100644
index 00000000..c25d1210
--- /dev/null
+++ b/datahubwriter/src/main/java/com/alibaba/datax/plugin/writer/datahubwriter/DatahubClientHelper.java
@@ -0,0 +1,43 @@
+package com.alibaba.datax.plugin.writer.datahubwriter;
+
+import org.apache.commons.lang3.StringUtils;
+
+import com.alibaba.datax.common.util.Configuration;
+import com.alibaba.fastjson2.JSON;
+import com.alibaba.fastjson2.TypeReference;
+import com.aliyun.datahub.client.DatahubClient;
+import com.aliyun.datahub.client.DatahubClientBuilder;
+import com.aliyun.datahub.client.auth.Account;
+import com.aliyun.datahub.client.auth.AliyunAccount;
+import com.aliyun.datahub.client.common.DatahubConfig;
+import com.aliyun.datahub.client.http.HttpConfig;
+
+public class DatahubClientHelper {
+    public static DatahubClient getDatahubClient(Configuration jobConfig) {
+        String accessId = jobConfig.getNecessaryValue(Key.CONFIG_KEY_ACCESS_ID,
+                DatahubWriterErrorCode.MISSING_REQUIRED_VALUE);
+        String accessKey = jobConfig.getNecessaryValue(Key.CONFIG_KEY_ACCESS_KEY,
+                DatahubWriterErrorCode.MISSING_REQUIRED_VALUE);
+        String endpoint = jobConfig.getNecessaryValue(Key.CONFIG_KEY_ENDPOINT,
+                DatahubWriterErrorCode.MISSING_REQUIRED_VALUE);
+        Account account = new AliyunAccount(accessId, accessKey);
+        // 是否开启二进制传输,服务端2.12版本开始支持
+        boolean enableBinary = jobConfig.getBool("enableBinary", false);
+        DatahubConfig datahubConfig = new DatahubConfig(endpoint, account, enableBinary);
+        // HttpConfig可不设置,不设置时采用默认值
+        // 读写数据推荐打开网络传输 LZ4压缩
+        HttpConfig httpConfig = null;
+        String httpConfigStr = jobConfig.getString("httpConfig");
+        if (StringUtils.isNotBlank(httpConfigStr)) {
+            httpConfig = JSON.parseObject(httpConfigStr, new TypeReference() {
+            });
+        }
+
+        DatahubClientBuilder builder = DatahubClientBuilder.newBuilder().setDatahubConfig(datahubConfig);
+        if (null != httpConfig) {
+            builder.setHttpConfig(httpConfig);
+        }
+        DatahubClient datahubClient = builder.build();
+        return datahubClient;
+    }
+}
diff --git a/datahubwriter/src/main/java/com/alibaba/datax/plugin/writer/datahubwriter/DatahubWriter.java b/datahubwriter/src/main/java/com/alibaba/datax/plugin/writer/datahubwriter/DatahubWriter.java
new file mode 100644
index 00000000..cd414fc5
--- /dev/null
+++ b/datahubwriter/src/main/java/com/alibaba/datax/plugin/writer/datahubwriter/DatahubWriter.java
@@ -0,0 +1,355 @@
+package com.alibaba.datax.plugin.writer.datahubwriter;
+
+import com.alibaba.datax.common.element.Column;
+import com.alibaba.datax.common.element.Record;
+import com.alibaba.datax.common.exception.DataXException;
+import com.alibaba.datax.common.plugin.RecordReceiver;
+import com.alibaba.datax.common.spi.Writer;
+import com.alibaba.datax.common.util.Configuration;
+import com.alibaba.datax.common.util.DataXCaseEnvUtil;
+import com.alibaba.datax.common.util.RetryUtil;
+import com.alibaba.fastjson2.JSON;
+import com.aliyun.datahub.client.DatahubClient;
+import com.aliyun.datahub.client.model.FieldType;
+import com.aliyun.datahub.client.model.GetTopicResult;
+import com.aliyun.datahub.client.model.ListShardResult;
+import com.aliyun.datahub.client.model.PutErrorEntry;
+import com.aliyun.datahub.client.model.PutRecordsResult;
+import com.aliyun.datahub.client.model.RecordEntry;
+import com.aliyun.datahub.client.model.RecordSchema;
+import com.aliyun.datahub.client.model.RecordType;
+import com.aliyun.datahub.client.model.ShardEntry;
+import com.aliyun.datahub.client.model.ShardState;
+import com.aliyun.datahub.client.model.TupleRecordData;
+
+import org.apache.commons.lang3.StringUtils;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+import java.util.Random;
+import java.util.concurrent.Callable;
+
+public class DatahubWriter extends Writer {
+
+    /**
+     * Job 中的方法仅执行一次,Task 中方法会由框架启动多个 Task 线程并行执行。
+     * 

+ * 整个 Writer 执行流程是: + *

+     * Job类init-->prepare-->split
+     *
+     *                          Task类init-->prepare-->startWrite-->post-->destroy
+     *                          Task类init-->prepare-->startWrite-->post-->destroy
+     *
+     *                                                                            Job类post-->destroy
+     * 
+ */ + public static class Job extends Writer.Job { + private static final Logger LOG = LoggerFactory + .getLogger(Job.class); + + private Configuration jobConfig = null; + + @Override + public void init() { + this.jobConfig = super.getPluginJobConf(); + jobConfig.getNecessaryValue(Key.CONFIG_KEY_ENDPOINT, DatahubWriterErrorCode.MISSING_REQUIRED_VALUE); + jobConfig.getNecessaryValue(Key.CONFIG_KEY_ACCESS_ID, DatahubWriterErrorCode.MISSING_REQUIRED_VALUE); + jobConfig.getNecessaryValue(Key.CONFIG_KEY_ACCESS_KEY, DatahubWriterErrorCode.MISSING_REQUIRED_VALUE); + jobConfig.getNecessaryValue(Key.CONFIG_KEY_PROJECT, DatahubWriterErrorCode.MISSING_REQUIRED_VALUE); + jobConfig.getNecessaryValue(Key.CONFIG_KEY_TOPIC, DatahubWriterErrorCode.MISSING_REQUIRED_VALUE); + } + + @Override + public void prepare() { + String project = jobConfig.getNecessaryValue(Key.CONFIG_KEY_PROJECT, + DatahubWriterErrorCode.MISSING_REQUIRED_VALUE); + String topic = jobConfig.getNecessaryValue(Key.CONFIG_KEY_TOPIC, + DatahubWriterErrorCode.MISSING_REQUIRED_VALUE); + RecordType recordType = null; + DatahubClient client = DatahubClientHelper.getDatahubClient(this.jobConfig); + try { + GetTopicResult getTopicResult = client.getTopic(project, topic); + recordType = getTopicResult.getRecordType(); + } catch (Exception e) { + LOG.warn("get topic type error: {}", e.getMessage()); + } + if (null != recordType) { + if (recordType == RecordType.BLOB) { + throw DataXException.asDataXException(DatahubWriterErrorCode.WRITE_DATAHUB_FAIL, + "DatahubWriter only support 'Tuple' RecordType now, but your RecordType is 'BLOB'"); + } + } + } + + @Override + public List split(int mandatoryNumber) { + List configs = new ArrayList(); + for (int i = 0; i < mandatoryNumber; ++i) { + configs.add(jobConfig.clone()); + } + return configs; + } + + @Override + public void post() {} + + @Override + public void destroy() {} + + } + + public static class Task extends Writer.Task { + private static final Logger LOG = LoggerFactory + .getLogger(Task.class); + private static final List FATAL_ERRORS_DEFAULT = Arrays.asList( + "InvalidParameterM", + "MalformedRecord", + "INVALID_SHARDID", + "NoSuchTopic", + "NoSuchShard" + ); + + private Configuration taskConfig; + private DatahubClient client; + private String project; + private String topic; + private List shards; + private int maxCommitSize; + private int maxRetryCount; + private RecordSchema schema; + private long retryInterval; + private Random random; + private List column; + private List columnIndex; + private boolean enableColumnConfig; + private List fatalErrors; + + @Override + public void init() { + this.taskConfig = super.getPluginJobConf(); + project = taskConfig.getNecessaryValue(Key.CONFIG_KEY_PROJECT, DatahubWriterErrorCode.MISSING_REQUIRED_VALUE); + topic = taskConfig.getNecessaryValue(Key.CONFIG_KEY_TOPIC, DatahubWriterErrorCode.MISSING_REQUIRED_VALUE); + maxCommitSize = taskConfig.getInt(Key.CONFIG_KEY_MAX_COMMIT_SIZE, 1024*1024); + maxRetryCount = taskConfig.getInt(Key.CONFIG_KEY_MAX_RETRY_COUNT, 500); + this.retryInterval = taskConfig.getInt(Key.RETRY_INTERVAL, 650); + this.random = new Random(); + this.column = this.taskConfig.getList(Key.CONFIG_KEY_COLUMN, String.class); + // ["*"] + if (null != this.column && 1 == this.column.size()) { + if (StringUtils.equals("*", this.column.get(0))) { + this.column = null; + } + } + this.columnIndex = new ArrayList(); + // 留个开关保平安 + this.enableColumnConfig = this.taskConfig.getBool("enableColumnConfig", true); + this.fatalErrors = this.taskConfig.getList("fatalErrors", Task.FATAL_ERRORS_DEFAULT, String.class); + this.client = DatahubClientHelper.getDatahubClient(this.taskConfig); + } + + @Override + public void prepare() { + final String shardIdConfig = this.taskConfig.getString(Key.CONFIG_KEY_SHARD_ID); + this.shards = new ArrayList(); + try { + RetryUtil.executeWithRetry(new Callable() { + @Override + public Void call() throws Exception { + ListShardResult result = client.listShard(project, topic); + if (StringUtils.isNotBlank(shardIdConfig)) { + shards.add(shardIdConfig); + } else { + for (ShardEntry shard : result.getShards()) { + if (shard.getState() == ShardState.ACTIVE || shard.getState() == ShardState.OPENING) { + shards.add(shard.getShardId()); + } + } + } + schema = client.getTopic(project, topic).getRecordSchema(); + return null; + } + }, DataXCaseEnvUtil.getRetryTimes(5), DataXCaseEnvUtil.getRetryInterval(10000L), DataXCaseEnvUtil.getRetryExponential(false)); + } catch (Exception e) { + throw DataXException.asDataXException(DatahubWriterErrorCode.GET_TOPOIC_INFO_FAIL, + "get topic info failed", e); + } + LOG.info("datahub topic {} shard to write: {}", this.topic, JSON.toJSONString(this.shards)); + LOG.info("datahub topic {} has schema: {}", this.topic, JSON.toJSONString(this.schema)); + + // 根据 schmea 顺序 和用户配置的 column,计算写datahub的顺序关系,以支持列换序 + // 后续统一使用 columnIndex 的顺位关系写 datahub + int totalSize = this.schema.getFields().size(); + if (null != this.column && !this.column.isEmpty() && this.enableColumnConfig) { + for (String eachCol : this.column) { + int indexFound = -1; + for (int i = 0; i < totalSize; i++) { + // warn: 大小写ignore + if (StringUtils.equalsIgnoreCase(eachCol, this.schema.getField(i).getName())) { + indexFound = i; + break; + } + } + if (indexFound >= 0) { + this.columnIndex.add(indexFound); + } else { + throw DataXException.asDataXException(DatahubWriterErrorCode.SCHEMA_NOT_MATCH, + String.format("can not find column %s in datahub topic %s", eachCol, this.topic)); + } + } + } else { + for (int i = 0; i < totalSize; i++) { + this.columnIndex.add(i); + } + } + } + + @Override + public void startWrite(RecordReceiver recordReceiver) { + Record record; + List records = new ArrayList(); + String shardId = null; + if (1 == this.shards.size()) { + shardId = shards.get(0); + } else { + shardId = shards.get(this.random.nextInt(shards.size())); + } + int commitSize = 0; + try { + while ((record = recordReceiver.getFromReader()) != null) { + RecordEntry dhRecord = convertRecord(record, shardId); + if (dhRecord != null) { + records.add(dhRecord); + } + commitSize += record.getByteSize(); + if (commitSize >= maxCommitSize) { + commit(records); + records.clear(); + commitSize = 0; + if (1 == this.shards.size()) { + shardId = shards.get(0); + } else { + shardId = shards.get(this.random.nextInt(shards.size())); + } + } + } + if (commitSize > 0) { + commit(records); + } + } catch (Exception e) { + throw DataXException.asDataXException( + DatahubWriterErrorCode.WRITE_DATAHUB_FAIL, e); + } + } + + @Override + public void post() {} + + @Override + public void destroy() {} + + private void commit(List records) throws InterruptedException { + PutRecordsResult result = client.putRecords(project, topic, records); + if (result.getFailedRecordCount() > 0) { + for (int i = 0; i < maxRetryCount; ++i) { + boolean limitExceededMessagePrinted = false; + for (PutErrorEntry error : result.getPutErrorEntries()) { + // 如果是 LimitExceeded 这样打印日志,不能每行记录打印一次了 + if (StringUtils.equalsIgnoreCase("LimitExceeded", error.getErrorcode())) { + if (!limitExceededMessagePrinted) { + LOG.warn("write record error, request id: {}, error code: {}, error message: {}", + result.getRequestId(), error.getErrorcode(), error.getMessage()); + limitExceededMessagePrinted = true; + } + } else { + LOG.error("write record error, request id: {}, error code: {}, error message: {}", + result.getRequestId(), error.getErrorcode(), error.getMessage()); + } + if (this.fatalErrors.contains(error.getErrorcode())) { + throw DataXException.asDataXException( + DatahubWriterErrorCode.WRITE_DATAHUB_FAIL, + error.getMessage()); + } + } + + if (this.retryInterval >= 0) { + Thread.sleep(this.retryInterval); + } else { + Thread.sleep(new Random().nextInt(700) + 300); + } + + result = client.putRecords(project, topic, result.getFailedRecords()); + if (result.getFailedRecordCount() == 0) { + return; + } + } + throw DataXException.asDataXException( + DatahubWriterErrorCode.WRITE_DATAHUB_FAIL, + "write datahub failed"); + } + } + + private RecordEntry convertRecord(Record dxRecord, String shardId) { + try { + RecordEntry dhRecord = new RecordEntry(); + dhRecord.setShardId(shardId); + TupleRecordData data = new TupleRecordData(this.schema); + for (int i = 0; i < this.columnIndex.size(); ++i) { + int orderInSchema = this.columnIndex.get(i); + FieldType type = this.schema.getField(orderInSchema).getType(); + Column column = dxRecord.getColumn(i); + switch (type) { + case BIGINT: + data.setField(orderInSchema, column.asLong()); + break; + case DOUBLE: + data.setField(orderInSchema, column.asDouble()); + break; + case STRING: + data.setField(orderInSchema, column.asString()); + break; + case BOOLEAN: + data.setField(orderInSchema, column.asBoolean()); + break; + case TIMESTAMP: + if (null == column.asDate()) { + data.setField(orderInSchema, null); + } else { + data.setField(orderInSchema, column.asDate().getTime() * 1000); + } + break; + case DECIMAL: + // warn + data.setField(orderInSchema, column.asBigDecimal()); + break; + case INTEGER: + data.setField(orderInSchema, column.asLong()); + break; + case FLOAT: + data.setField(orderInSchema, column.asDouble()); + break; + case TINYINT: + data.setField(orderInSchema, column.asLong()); + break; + case SMALLINT: + data.setField(orderInSchema, column.asLong()); + break; + default: + throw DataXException.asDataXException( + DatahubWriterErrorCode.SCHEMA_NOT_MATCH, + String.format("does not support type: %s", type)); + } + } + dhRecord.setRecordData(data); + return dhRecord; + } catch (Exception e) { + super.getTaskPluginCollector().collectDirtyRecord(dxRecord, e, "convert recor failed"); + } + return null; + } + } + +} \ No newline at end of file diff --git a/datahubwriter/src/main/java/com/alibaba/datax/plugin/writer/datahubwriter/DatahubWriterErrorCode.java b/datahubwriter/src/main/java/com/alibaba/datax/plugin/writer/datahubwriter/DatahubWriterErrorCode.java new file mode 100644 index 00000000..ad03abd1 --- /dev/null +++ b/datahubwriter/src/main/java/com/alibaba/datax/plugin/writer/datahubwriter/DatahubWriterErrorCode.java @@ -0,0 +1,37 @@ +package com.alibaba.datax.plugin.writer.datahubwriter; + +import com.alibaba.datax.common.spi.ErrorCode; +import com.alibaba.datax.common.util.MessageSource; + +public enum DatahubWriterErrorCode implements ErrorCode { + MISSING_REQUIRED_VALUE("DatahubWriter-01", MessageSource.loadResourceBundle(DatahubWriterErrorCode.class).message("errorcode.missing_required_value")), + INVALID_CONFIG_VALUE("DatahubWriter-02", MessageSource.loadResourceBundle(DatahubWriterErrorCode.class).message("errorcode.invalid_config_value")), + GET_TOPOIC_INFO_FAIL("DatahubWriter-03", MessageSource.loadResourceBundle(DatahubWriterErrorCode.class).message("errorcode.get_topic_info_fail")), + WRITE_DATAHUB_FAIL("DatahubWriter-04", MessageSource.loadResourceBundle(DatahubWriterErrorCode.class).message("errorcode.write_datahub_fail")), + SCHEMA_NOT_MATCH("DatahubWriter-05", MessageSource.loadResourceBundle(DatahubWriterErrorCode.class).message("errorcode.schema_not_match")), + ; + + private final String code; + private final String description; + + private DatahubWriterErrorCode(String code, String description) { + this.code = code; + this.description = description; + } + + @Override + public String getCode() { + return this.code; + } + + @Override + public String getDescription() { + return this.description; + } + + @Override + public String toString() { + return String.format("Code:[%s], Description:[%s]. ", this.code, + this.description); + } +} \ No newline at end of file diff --git a/datahubwriter/src/main/java/com/alibaba/datax/plugin/writer/datahubwriter/Key.java b/datahubwriter/src/main/java/com/alibaba/datax/plugin/writer/datahubwriter/Key.java new file mode 100644 index 00000000..5f179234 --- /dev/null +++ b/datahubwriter/src/main/java/com/alibaba/datax/plugin/writer/datahubwriter/Key.java @@ -0,0 +1,26 @@ +package com.alibaba.datax.plugin.writer.datahubwriter; + +public final class Key { + + /** + * 此处声明插件用到的需要插件使用者提供的配置项 + */ + public static final String CONFIG_KEY_ENDPOINT = "endpoint"; + public static final String CONFIG_KEY_ACCESS_ID = "accessId"; + public static final String CONFIG_KEY_ACCESS_KEY = "accessKey"; + public static final String CONFIG_KEY_PROJECT = "project"; + public static final String CONFIG_KEY_TOPIC = "topic"; + public static final String CONFIG_KEY_WRITE_MODE = "mode"; + public static final String CONFIG_KEY_SHARD_ID = "shardId"; + public static final String CONFIG_KEY_MAX_COMMIT_SIZE = "maxCommitSize"; + public static final String CONFIG_KEY_MAX_RETRY_COUNT = "maxRetryCount"; + + public static final String CONFIG_VALUE_SEQUENCE_MODE = "sequence"; + public static final String CONFIG_VALUE_RANDOM_MODE = "random"; + + public final static String MAX_RETRY_TIME = "maxRetryTime"; + + public final static String RETRY_INTERVAL = "retryInterval"; + + public final static String CONFIG_KEY_COLUMN = "column"; +} diff --git a/datahubwriter/src/main/java/com/alibaba/datax/plugin/writer/datahubwriter/LocalStrings.properties b/datahubwriter/src/main/java/com/alibaba/datax/plugin/writer/datahubwriter/LocalStrings.properties new file mode 100644 index 00000000..e85c8ab3 --- /dev/null +++ b/datahubwriter/src/main/java/com/alibaba/datax/plugin/writer/datahubwriter/LocalStrings.properties @@ -0,0 +1,5 @@ +errorcode.missing_required_value=\u60A8\u7F3A\u5931\u4E86\u5FC5\u987B\u586B\u5199\u7684\u53C2\u6570\u503C. +errorcode.invalid_config_value=\u60A8\u7684\u53C2\u6570\u914D\u7F6E\u9519\u8BEF. +errorcode.get_topic_info_fail=\u83B7\u53D6shard\u5217\u8868\u5931\u8D25. +errorcode.write_datahub_fail=\u5199\u6570\u636E\u5931\u8D25. +errorcode.schema_not_match=\u6570\u636E\u683C\u5F0F\u9519\u8BEF. diff --git a/datahubwriter/src/main/java/com/alibaba/datax/plugin/writer/datahubwriter/LocalStrings_en_US.properties b/datahubwriter/src/main/java/com/alibaba/datax/plugin/writer/datahubwriter/LocalStrings_en_US.properties new file mode 100644 index 00000000..31a291e6 --- /dev/null +++ b/datahubwriter/src/main/java/com/alibaba/datax/plugin/writer/datahubwriter/LocalStrings_en_US.properties @@ -0,0 +1,5 @@ +errorcode.missing_required_value=\u60A8\u7F3A\u5931\u4E86\u5FC5\u987B\u586B\u5199\u7684\u53C2\u6570\u503C. +errorcode.invalid_config_value=\u60A8\u7684\u53C2\u6570\u914D\u7F6E\u9519\u8BEF. +errorcode.get_topic_info_fail=\u83B7\u53D6shard\u5217\u8868\u5931\u8D25. +errorcode.write_datahub_fail=\u5199\u6570\u636E\u5931\u8D25. +errorcode.schema_not_match=\u6570\u636E\u683C\u5F0F\u9519\u8BEF. \ No newline at end of file diff --git a/datahubwriter/src/main/java/com/alibaba/datax/plugin/writer/datahubwriter/LocalStrings_ja_JP.properties b/datahubwriter/src/main/java/com/alibaba/datax/plugin/writer/datahubwriter/LocalStrings_ja_JP.properties new file mode 100644 index 00000000..31a291e6 --- /dev/null +++ b/datahubwriter/src/main/java/com/alibaba/datax/plugin/writer/datahubwriter/LocalStrings_ja_JP.properties @@ -0,0 +1,5 @@ +errorcode.missing_required_value=\u60A8\u7F3A\u5931\u4E86\u5FC5\u987B\u586B\u5199\u7684\u53C2\u6570\u503C. +errorcode.invalid_config_value=\u60A8\u7684\u53C2\u6570\u914D\u7F6E\u9519\u8BEF. +errorcode.get_topic_info_fail=\u83B7\u53D6shard\u5217\u8868\u5931\u8D25. +errorcode.write_datahub_fail=\u5199\u6570\u636E\u5931\u8D25. +errorcode.schema_not_match=\u6570\u636E\u683C\u5F0F\u9519\u8BEF. \ No newline at end of file diff --git a/datahubwriter/src/main/java/com/alibaba/datax/plugin/writer/datahubwriter/LocalStrings_zh_CN.properties b/datahubwriter/src/main/java/com/alibaba/datax/plugin/writer/datahubwriter/LocalStrings_zh_CN.properties new file mode 100644 index 00000000..31a291e6 --- /dev/null +++ b/datahubwriter/src/main/java/com/alibaba/datax/plugin/writer/datahubwriter/LocalStrings_zh_CN.properties @@ -0,0 +1,5 @@ +errorcode.missing_required_value=\u60A8\u7F3A\u5931\u4E86\u5FC5\u987B\u586B\u5199\u7684\u53C2\u6570\u503C. +errorcode.invalid_config_value=\u60A8\u7684\u53C2\u6570\u914D\u7F6E\u9519\u8BEF. +errorcode.get_topic_info_fail=\u83B7\u53D6shard\u5217\u8868\u5931\u8D25. +errorcode.write_datahub_fail=\u5199\u6570\u636E\u5931\u8D25. +errorcode.schema_not_match=\u6570\u636E\u683C\u5F0F\u9519\u8BEF. \ No newline at end of file diff --git a/datahubwriter/src/main/java/com/alibaba/datax/plugin/writer/datahubwriter/LocalStrings_zh_HK.properties b/datahubwriter/src/main/java/com/alibaba/datax/plugin/writer/datahubwriter/LocalStrings_zh_HK.properties new file mode 100644 index 00000000..c6a3a0e0 --- /dev/null +++ b/datahubwriter/src/main/java/com/alibaba/datax/plugin/writer/datahubwriter/LocalStrings_zh_HK.properties @@ -0,0 +1,9 @@ +errorcode.missing_required_value=\u60A8\u7F3A\u5931\u4E86\u5FC5\u987B\u586B\u5199\u7684\u53C2\u6570\u503C. +errorcode.invalid_config_value=\u60A8\u7684\u53C2\u6570\u914D\u7F6E\u9519\u8BEF. +errorcode.get_topic_info_fail=\u83B7\u53D6shard\u5217\u8868\u5931\u8D25. +errorcode.write_datahub_fail=\u5199\u6570\u636E\u5931\u8D25. +errorcode.schema_not_match=\u6570\u636E\u683C\u5F0F\u9519\u8BEF.errorcode.missing_required_value=您缺失了必須填寫的參數值. +errorcode.invalid_config_value=您的參數配寘錯誤. +errorcode.get_topic_info_fail=獲取shard清單失敗. +errorcode.write_datahub_fail=寫數據失敗. +errorcode.schema_not_match=數據格式錯誤. diff --git a/datahubwriter/src/main/java/com/alibaba/datax/plugin/writer/datahubwriter/LocalStrings_zh_TW.properties b/datahubwriter/src/main/java/com/alibaba/datax/plugin/writer/datahubwriter/LocalStrings_zh_TW.properties new file mode 100644 index 00000000..c6a3a0e0 --- /dev/null +++ b/datahubwriter/src/main/java/com/alibaba/datax/plugin/writer/datahubwriter/LocalStrings_zh_TW.properties @@ -0,0 +1,9 @@ +errorcode.missing_required_value=\u60A8\u7F3A\u5931\u4E86\u5FC5\u987B\u586B\u5199\u7684\u53C2\u6570\u503C. +errorcode.invalid_config_value=\u60A8\u7684\u53C2\u6570\u914D\u7F6E\u9519\u8BEF. +errorcode.get_topic_info_fail=\u83B7\u53D6shard\u5217\u8868\u5931\u8D25. +errorcode.write_datahub_fail=\u5199\u6570\u636E\u5931\u8D25. +errorcode.schema_not_match=\u6570\u636E\u683C\u5F0F\u9519\u8BEF.errorcode.missing_required_value=您缺失了必須填寫的參數值. +errorcode.invalid_config_value=您的參數配寘錯誤. +errorcode.get_topic_info_fail=獲取shard清單失敗. +errorcode.write_datahub_fail=寫數據失敗. +errorcode.schema_not_match=數據格式錯誤. diff --git a/datahubwriter/src/main/resources/job_config_template.json b/datahubwriter/src/main/resources/job_config_template.json new file mode 100644 index 00000000..8b0b41ae --- /dev/null +++ b/datahubwriter/src/main/resources/job_config_template.json @@ -0,0 +1,14 @@ +{ + "name": "datahubwriter", + "parameter": { + "endpoint":"", + "accessId": "", + "accessKey": "", + "project": "", + "topic": "", + "mode": "random", + "shardId": "", + "maxCommitSize": 524288, + "maxRetryCount": 500 + } +} \ No newline at end of file diff --git a/datahubwriter/src/main/resources/plugin.json b/datahubwriter/src/main/resources/plugin.json new file mode 100644 index 00000000..91c17292 --- /dev/null +++ b/datahubwriter/src/main/resources/plugin.json @@ -0,0 +1,6 @@ +{ + "name": "datahubwriter", + "class": "com.alibaba.datax.plugin.writer.datahubwriter.DatahubWriter", + "description": "datahub writer", + "developer": "alibaba" +} \ No newline at end of file diff --git a/doriswriter/doc/doriswriter.md b/doriswriter/doc/doriswriter.md new file mode 100644 index 00000000..58a688b8 --- /dev/null +++ b/doriswriter/doc/doriswriter.md @@ -0,0 +1,181 @@ +# DorisWriter 插件文档 + +## 1 快速介绍 +DorisWriter支持将大批量数据写入Doris中。 + +## 2 实现原理 +DorisWriter 通过Doris原生支持Stream load方式导入数据, DorisWriter会将`reader`读取的数据进行缓存在内存中,拼接成Json文本,然后批量导入至Doris。 + +## 3 功能说明 + +### 3.1 配置样例 + +这里是一份从Stream读取数据后导入至Doris的配置文件。 + +``` +{ + "job": { + "content": [ + { + "reader": { + "name": "mysqlreader", + "parameter": { + "column": ["emp_no", "birth_date", "first_name","last_name","gender","hire_date"], + "connection": [ + { + "jdbcUrl": ["jdbc:mysql://localhost:3306/demo"], + "table": ["employees_1"] + } + ], + "username": "root", + "password": "xxxxx", + "where": "" + } + }, + "writer": { + "name": "doriswriter", + "parameter": { + "loadUrl": ["172.16.0.13:8030"], + "loadProps": { + }, + "column": ["emp_no", "birth_date", "first_name","last_name","gender","hire_date"], + "username": "root", + "password": "xxxxxx", + "postSql": ["select count(1) from all_employees_info"], + "preSql": [], + "flushInterval":30000, + "connection": [ + { + "jdbcUrl": "jdbc:mysql://172.16.0.13:9030/demo", + "selectedDatabase": "demo", + "table": ["all_employees_info"] + } + ], + "loadProps": { + "format": "json", + "strip_outer_array": true + } + } + } + } + ], + "setting": { + "speed": { + "channel": "1" + } + } + } +} +``` + +### 3.2 参数说明 + +* **jdbcUrl** + + - 描述:Doris 的 JDBC 连接串,用户执行 preSql 或 postSQL。 + - 必选:是 + - 默认值:无 + +* **loadUrl** + + - 描述:作为 Stream Load 的连接目标。格式为 "ip:port"。其中 IP 是 FE 节点 IP,port 是 FE 节点的 http_port。可以填写多个,多个之间使用英文状态的分号隔开:`;`,doriswriter 将以轮询的方式访问。 + - 必选:是 + - 默认值:无 + +* **username** + + - 描述:访问Doris数据库的用户名 + - 必选:是 + - 默认值:无 + +* **password** + + - 描述:访问Doris数据库的密码 + - 必选:否 + - 默认值:空 + +* **connection.selectedDatabase** + - 描述:需要写入的Doris数据库名称。 + - 必选:是 + - 默认值:无 + +* **connection.table** + - 描述:需要写入的Doris表名称。 + - 必选:是 + - 默认值:无 + +* **column** + + - 描述:目的表**需要写入数据**的字段,这些字段将作为生成的 Json 数据的字段名。字段之间用英文逗号分隔。例如: "column": ["id","name","age"]。 + - 必选:是 + - 默认值:否 + +* **preSql** + + - 描述:写入数据到目的表前,会先执行这里的标准语句。 + - 必选:否 + - 默认值:无 + +* **postSql** + + - 描述:写入数据到目的表后,会执行这里的标准语句。 + - 必选:否 + - 默认值:无 + + +* **maxBatchRows** + + - 描述:每批次导入数据的最大行数。和 **batchSize** 共同控制每批次的导入数量。每批次数据达到两个阈值之一,即开始导入这一批次的数据。 + - 必选:否 + - 默认值:500000 + +* **batchSize** + + - 描述:每批次导入数据的最大数据量。和 **maxBatchRows** 共同控制每批次的导入数量。每批次数据达到两个阈值之一,即开始导入这一批次的数据。 + - 必选:否 + - 默认值:104857600 + +* **maxRetries** + + - 描述:每批次导入数据失败后的重试次数。 + - 必选:否 + - 默认值:0 + +* **labelPrefix** + + - 描述:每批次导入任务的 label 前缀。最终的 label 将有 `labelPrefix + UUID` 组成全局唯一的 label,确保数据不会重复导入 + - 必选:否 + - 默认值:`datax_doris_writer_` + +* **loadProps** + + - 描述:StreamLoad 的请求参数,详情参照StreamLoad介绍页面。[Stream load - Apache Doris](https://doris.apache.org/zh-CN/docs/data-operate/import/import-way/stream-load-manual) + + 这里包括导入的数据格式:format等,导入数据格式默认我们使用csv,支持JSON,具体可以参照下面类型转换部分,也可以参照上面Stream load 官方信息 + + - 必选:否 + + - 默认值:无 + +### 类型转换 + +默认传入的数据均会被转为字符串,并以`\t`作为列分隔符,`\n`作为行分隔符,组成`csv`文件进行StreamLoad导入操作。 + +默认是csv格式导入,如需更改列分隔符, 则正确配置 `loadProps` 即可: + +```json +"loadProps": { + "column_separator": "\\x01", + "line_delimiter": "\\x02" +} +``` + +如需更改导入格式为`json`, 则正确配置 `loadProps` 即可: +```json +"loadProps": { + "format": "json", + "strip_outer_array": true +} +``` + +更多信息请参照 Doris 官网:[Stream load - Apache Doris](https://doris.apache.org/zh-CN/docs/data-operate/import/import-way/stream-load-manual) \ No newline at end of file diff --git a/doriswriter/doc/mysql2doris.json b/doriswriter/doc/mysql2doris.json new file mode 100644 index 00000000..6992a2be --- /dev/null +++ b/doriswriter/doc/mysql2doris.json @@ -0,0 +1,46 @@ +{ + "job": { + "content": [ + { + "reader": { + "name": "mysqlreader", + "parameter": { + "column": ["k1", "k2", "k3"], + "connection": [ + { + "jdbcUrl": ["jdbc:mysql://192.168.10.10:3306/db1"], + "table": ["t1"] + } + ], + "username": "root", + "password": "", + "where": "" + } + }, + "writer": { + "name": "doriswriter", + "parameter": { + "loadUrl": ["192.168.1.1:8030"], + "loadProps": {}, + "database": "db1", + "column": ["k1", "k2", "k3"], + "username": "root", + "password": "", + "postSql": [], + "preSql": [], + "connection": [ + "jdbcUrl":"jdbc:mysql://192.168.1.1:9030/", + "table":["xxx"], + "selectedDatabase":"xxxx" + ] + } + } + } + ], + "setting": { + "speed": { + "channel": "1" + } + } + } +} diff --git a/doriswriter/pom.xml b/doriswriter/pom.xml new file mode 100644 index 00000000..aa1e6ff0 --- /dev/null +++ b/doriswriter/pom.xml @@ -0,0 +1,99 @@ + + + + + datax-all + com.alibaba.datax + 0.0.1-SNAPSHOT + + 4.0.0 + doriswriter + doriswriter + jar + + + com.alibaba.datax + datax-common + ${datax-project-version} + + + slf4j-log4j12 + org.slf4j + + + + + org.slf4j + slf4j-api + + + ch.qos.logback + logback-classic + + + com.alibaba.datax + plugin-rdbms-util + ${datax-project-version} + + + mysql + mysql-connector-java + ${mysql.driver.version} + + + org.apache.httpcomponents + httpclient + 4.5.13 + + + + + + + maven-compiler-plugin + + ${jdk-version} + ${jdk-version} + ${project-sourceEncoding} + + + + + maven-assembly-plugin + + + src/main/assembly/package.xml + + datax + + + + dwzip + package + + single + + + + + + + diff --git a/doriswriter/src/main/assembly/package.xml b/doriswriter/src/main/assembly/package.xml new file mode 100644 index 00000000..71596332 --- /dev/null +++ b/doriswriter/src/main/assembly/package.xml @@ -0,0 +1,52 @@ + + + + + + dir + + false + + + src/main/resources + + plugin.json + plugin_job_template.json + + plugin/writer/doriswriter + + + target/ + + doriswriter-0.0.1-SNAPSHOT.jar + + plugin/writer/doriswriter + + + + + false + plugin/writer/doriswriter/libs + runtime + + + diff --git a/doriswriter/src/main/java/com/alibaba/datax/plugin/writer/doriswriter/DelimiterParser.java b/doriswriter/src/main/java/com/alibaba/datax/plugin/writer/doriswriter/DelimiterParser.java new file mode 100644 index 00000000..e84bd7dd --- /dev/null +++ b/doriswriter/src/main/java/com/alibaba/datax/plugin/writer/doriswriter/DelimiterParser.java @@ -0,0 +1,54 @@ +package com.alibaba.datax.plugin.writer.doriswriter; + +import com.google.common.base.Strings; + +import java.io.StringWriter; + +public class DelimiterParser { + + private static final String HEX_STRING = "0123456789ABCDEF"; + + public static String parse(String sp, String dSp) throws RuntimeException { + if ( Strings.isNullOrEmpty(sp)) { + return dSp; + } + if (!sp.toUpperCase().startsWith("\\X")) { + return sp; + } + String hexStr = sp.substring(2); + // check hex str + if (hexStr.isEmpty()) { + throw new RuntimeException("Failed to parse delimiter: `Hex str is empty`"); + } + if (hexStr.length() % 2 != 0) { + throw new RuntimeException("Failed to parse delimiter: `Hex str length error`"); + } + for (char hexChar : hexStr.toUpperCase().toCharArray()) { + if (HEX_STRING.indexOf(hexChar) == -1) { + throw new RuntimeException("Failed to parse delimiter: `Hex str format error`"); + } + } + // transform to separator + StringWriter writer = new StringWriter(); + for (byte b : hexStrToBytes(hexStr)) { + writer.append((char) b); + } + return writer.toString(); + } + + private static byte[] hexStrToBytes(String hexStr) { + String upperHexStr = hexStr.toUpperCase(); + int length = upperHexStr.length() / 2; + char[] hexChars = upperHexStr.toCharArray(); + byte[] bytes = new byte[length]; + for (int i = 0; i < length; i++) { + int pos = i * 2; + bytes[i] = (byte) (charToByte(hexChars[pos]) << 4 | charToByte(hexChars[pos + 1])); + } + return bytes; + } + + private static byte charToByte(char c) { + return (byte) HEX_STRING.indexOf(c); + } +} diff --git a/doriswriter/src/main/java/com/alibaba/datax/plugin/writer/doriswriter/DorisBaseCodec.java b/doriswriter/src/main/java/com/alibaba/datax/plugin/writer/doriswriter/DorisBaseCodec.java new file mode 100644 index 00000000..ee7ded56 --- /dev/null +++ b/doriswriter/src/main/java/com/alibaba/datax/plugin/writer/doriswriter/DorisBaseCodec.java @@ -0,0 +1,23 @@ +package com.alibaba.datax.plugin.writer.doriswriter; + +import com.alibaba.datax.common.element.Column; + +public class DorisBaseCodec { + protected String convertionField( Column col) { + if (null == col.getRawData() || Column.Type.NULL == col.getType()) { + return null; + } + if ( Column.Type.BOOL == col.getType()) { + return String.valueOf(col.asLong()); + } + if ( Column.Type.BYTES == col.getType()) { + byte[] bts = (byte[])col.getRawData(); + long value = 0; + for (int i = 0; i < bts.length; i++) { + value += (bts[bts.length - i - 1] & 0xffL) << (8 * i); + } + return String.valueOf(value); + } + return col.asString(); + } +} diff --git a/doriswriter/src/main/java/com/alibaba/datax/plugin/writer/doriswriter/DorisCodec.java b/doriswriter/src/main/java/com/alibaba/datax/plugin/writer/doriswriter/DorisCodec.java new file mode 100644 index 00000000..a2437a1c --- /dev/null +++ b/doriswriter/src/main/java/com/alibaba/datax/plugin/writer/doriswriter/DorisCodec.java @@ -0,0 +1,10 @@ +package com.alibaba.datax.plugin.writer.doriswriter; + +import com.alibaba.datax.common.element.Record; + +import java.io.Serializable; + +public interface DorisCodec extends Serializable { + + String codec( Record row); +} diff --git a/doriswriter/src/main/java/com/alibaba/datax/plugin/writer/doriswriter/DorisCodecFactory.java b/doriswriter/src/main/java/com/alibaba/datax/plugin/writer/doriswriter/DorisCodecFactory.java new file mode 100644 index 00000000..22c4b409 --- /dev/null +++ b/doriswriter/src/main/java/com/alibaba/datax/plugin/writer/doriswriter/DorisCodecFactory.java @@ -0,0 +1,19 @@ +package com.alibaba.datax.plugin.writer.doriswriter; + +import java.util.Map; + +public class DorisCodecFactory { + public DorisCodecFactory (){ + + } + public static DorisCodec createCodec( Keys writerOptions) { + if ( Keys.StreamLoadFormat.CSV.equals(writerOptions.getStreamLoadFormat())) { + Map props = writerOptions.getLoadProps(); + return new DorisCsvCodec (null == props || !props.containsKey("column_separator") ? null : String.valueOf(props.get("column_separator"))); + } + if ( Keys.StreamLoadFormat.JSON.equals(writerOptions.getStreamLoadFormat())) { + return new DorisJsonCodec (writerOptions.getColumns()); + } + throw new RuntimeException("Failed to create row serializer, unsupported `format` from stream load properties."); + } +} diff --git a/doriswriter/src/main/java/com/alibaba/datax/plugin/writer/doriswriter/DorisCsvCodec.java b/doriswriter/src/main/java/com/alibaba/datax/plugin/writer/doriswriter/DorisCsvCodec.java new file mode 100644 index 00000000..518aa304 --- /dev/null +++ b/doriswriter/src/main/java/com/alibaba/datax/plugin/writer/doriswriter/DorisCsvCodec.java @@ -0,0 +1,27 @@ +package com.alibaba.datax.plugin.writer.doriswriter; + +import com.alibaba.datax.common.element.Record; + +public class DorisCsvCodec extends DorisBaseCodec implements DorisCodec { + + private static final long serialVersionUID = 1L; + + private final String columnSeparator; + + public DorisCsvCodec ( String sp) { + this.columnSeparator = DelimiterParser.parse(sp, "\t"); + } + + @Override + public String codec( Record row) { + StringBuilder sb = new StringBuilder(); + for (int i = 0; i < row.getColumnNumber(); i++) { + String value = convertionField(row.getColumn(i)); + sb.append(null == value ? "\\N" : value); + if (i < row.getColumnNumber() - 1) { + sb.append(columnSeparator); + } + } + return sb.toString(); + } +} diff --git a/doriswriter/src/main/java/com/alibaba/datax/plugin/writer/doriswriter/DorisJsonCodec.java b/doriswriter/src/main/java/com/alibaba/datax/plugin/writer/doriswriter/DorisJsonCodec.java new file mode 100644 index 00000000..68abd9eb --- /dev/null +++ b/doriswriter/src/main/java/com/alibaba/datax/plugin/writer/doriswriter/DorisJsonCodec.java @@ -0,0 +1,33 @@ +package com.alibaba.datax.plugin.writer.doriswriter; + +import com.alibaba.datax.common.element.Record; +import com.alibaba.fastjson2.JSON; + +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +public class DorisJsonCodec extends DorisBaseCodec implements DorisCodec { + + private static final long serialVersionUID = 1L; + + private final List fieldNames; + + public DorisJsonCodec ( List fieldNames) { + this.fieldNames = fieldNames; + } + + @Override + public String codec( Record row) { + if (null == fieldNames) { + return ""; + } + Map rowMap = new HashMap<> (fieldNames.size()); + int idx = 0; + for (String fieldName : fieldNames) { + rowMap.put(fieldName, convertionField(row.getColumn(idx))); + idx++; + } + return JSON.toJSONString(rowMap); + } +} diff --git a/doriswriter/src/main/java/com/alibaba/datax/plugin/writer/doriswriter/DorisStreamLoadObserver.java b/doriswriter/src/main/java/com/alibaba/datax/plugin/writer/doriswriter/DorisStreamLoadObserver.java new file mode 100644 index 00000000..6f7e9a5a --- /dev/null +++ b/doriswriter/src/main/java/com/alibaba/datax/plugin/writer/doriswriter/DorisStreamLoadObserver.java @@ -0,0 +1,236 @@ +package com.alibaba.datax.plugin.writer.doriswriter; + +import com.alibaba.fastjson2.JSON; +import org.apache.commons.codec.binary.Base64; +import org.apache.http.HttpEntity; +import org.apache.http.HttpHeaders; +import org.apache.http.client.config.RequestConfig; +import org.apache.http.client.methods.CloseableHttpResponse; +import org.apache.http.client.methods.HttpGet; +import org.apache.http.client.methods.HttpPut; +import org.apache.http.entity.ByteArrayEntity; +import org.apache.http.impl.client.CloseableHttpClient; +import org.apache.http.impl.client.DefaultRedirectStrategy; +import org.apache.http.impl.client.HttpClientBuilder; +import org.apache.http.impl.client.HttpClients; +import org.apache.http.util.EntityUtils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.net.HttpURLConnection; +import java.net.URL; +import java.nio.ByteBuffer; +import java.nio.charset.StandardCharsets; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.concurrent.TimeUnit; +import java.util.stream.Collectors; + +public class DorisStreamLoadObserver { + private static final Logger LOG = LoggerFactory.getLogger(DorisStreamLoadObserver.class); + + private Keys options; + + private long pos; + private static final String RESULT_FAILED = "Fail"; + private static final String RESULT_LABEL_EXISTED = "Label Already Exists"; + private static final String LAEBL_STATE_VISIBLE = "VISIBLE"; + private static final String LAEBL_STATE_COMMITTED = "COMMITTED"; + private static final String RESULT_LABEL_PREPARE = "PREPARE"; + private static final String RESULT_LABEL_ABORTED = "ABORTED"; + private static final String RESULT_LABEL_UNKNOWN = "UNKNOWN"; + + + public DorisStreamLoadObserver ( Keys options){ + this.options = options; + } + + public void streamLoad(WriterTuple data) throws Exception { + String host = getLoadHost(); + if(host == null){ + throw new IOException ("load_url cannot be empty, or the host cannot connect.Please check your configuration."); + } + String loadUrl = new StringBuilder(host) + .append("/api/") + .append(options.getDatabase()) + .append("/") + .append(options.getTable()) + .append("/_stream_load") + .toString(); + LOG.info("Start to join batch data: rows[{}] bytes[{}] label[{}].", data.getRows().size(), data.getBytes(), data.getLabel()); + Map loadResult = put(loadUrl, data.getLabel(), addRows(data.getRows(), data.getBytes().intValue())); + LOG.info("StreamLoad response :{}",JSON.toJSONString(loadResult)); + final String keyStatus = "Status"; + if (null == loadResult || !loadResult.containsKey(keyStatus)) { + throw new IOException("Unable to flush data to Doris: unknown result status."); + } + LOG.debug("StreamLoad response:{}",JSON.toJSONString(loadResult)); + if (RESULT_FAILED.equals(loadResult.get(keyStatus))) { + throw new IOException( + new StringBuilder("Failed to flush data to Doris.\n").append(JSON.toJSONString(loadResult)).toString() + ); + } else if (RESULT_LABEL_EXISTED.equals(loadResult.get(keyStatus))) { + LOG.debug("StreamLoad response:{}",JSON.toJSONString(loadResult)); + checkStreamLoadState(host, data.getLabel()); + } + } + + private void checkStreamLoadState(String host, String label) throws IOException { + int idx = 0; + while(true) { + try { + TimeUnit.SECONDS.sleep(Math.min(++idx, 5)); + } catch (InterruptedException ex) { + break; + } + try (CloseableHttpClient httpclient = HttpClients.createDefault()) { + HttpGet httpGet = new HttpGet(new StringBuilder(host).append("/api/").append(options.getDatabase()).append("/get_load_state?label=").append(label).toString()); + httpGet.setHeader("Authorization", getBasicAuthHeader(options.getUsername(), options.getPassword())); + httpGet.setHeader("Connection", "close"); + + try (CloseableHttpResponse resp = httpclient.execute(httpGet)) { + HttpEntity respEntity = getHttpEntity(resp); + if (respEntity == null) { + throw new IOException(String.format("Failed to flush data to Doris, Error " + + "could not get the final state of label[%s].\n", label), null); + } + Map result = (Map)JSON.parse(EntityUtils.toString(respEntity)); + String labelState = (String)result.get("state"); + if (null == labelState) { + throw new IOException(String.format("Failed to flush data to Doris, Error " + + "could not get the final state of label[%s]. response[%s]\n", label, EntityUtils.toString(respEntity)), null); + } + LOG.info(String.format("Checking label[%s] state[%s]\n", label, labelState)); + switch(labelState) { + case LAEBL_STATE_VISIBLE: + case LAEBL_STATE_COMMITTED: + return; + case RESULT_LABEL_PREPARE: + continue; + case RESULT_LABEL_ABORTED: + throw new DorisWriterExcetion (String.format("Failed to flush data to Doris, Error " + + "label[%s] state[%s]\n", label, labelState), null, true); + case RESULT_LABEL_UNKNOWN: + default: + throw new IOException(String.format("Failed to flush data to Doris, Error " + + "label[%s] state[%s]\n", label, labelState), null); + } + } + } + } + } + + private byte[] addRows(List rows, int totalBytes) { + if (Keys.StreamLoadFormat.CSV.equals(options.getStreamLoadFormat())) { + Map props = (options.getLoadProps() == null ? new HashMap<> () : options.getLoadProps()); + byte[] lineDelimiter = DelimiterParser.parse((String)props.get("line_delimiter"), "\n").getBytes(StandardCharsets.UTF_8); + ByteBuffer bos = ByteBuffer.allocate(totalBytes + rows.size() * lineDelimiter.length); + for (byte[] row : rows) { + bos.put(row); + bos.put(lineDelimiter); + } + return bos.array(); + } + + if (Keys.StreamLoadFormat.JSON.equals(options.getStreamLoadFormat())) { + ByteBuffer bos = ByteBuffer.allocate(totalBytes + (rows.isEmpty() ? 2 : rows.size() + 1)); + bos.put("[".getBytes(StandardCharsets.UTF_8)); + byte[] jsonDelimiter = ",".getBytes(StandardCharsets.UTF_8); + boolean isFirstElement = true; + for (byte[] row : rows) { + if (!isFirstElement) { + bos.put(jsonDelimiter); + } + bos.put(row); + isFirstElement = false; + } + bos.put("]".getBytes(StandardCharsets.UTF_8)); + return bos.array(); + } + throw new RuntimeException("Failed to join rows data, unsupported `format` from stream load properties:"); + } + private Map put(String loadUrl, String label, byte[] data) throws IOException { + LOG.info(String.format("Executing stream load to: '%s', size: '%s'", loadUrl, data.length)); + final HttpClientBuilder httpClientBuilder = HttpClients.custom() + .setRedirectStrategy(new DefaultRedirectStrategy () { + @Override + protected boolean isRedirectable(String method) { + return true; + } + }); + try ( CloseableHttpClient httpclient = httpClientBuilder.build()) { + HttpPut httpPut = new HttpPut(loadUrl); + httpPut.removeHeaders(HttpHeaders.CONTENT_LENGTH); + httpPut.removeHeaders(HttpHeaders.TRANSFER_ENCODING); + List cols = options.getColumns(); + if (null != cols && !cols.isEmpty() && Keys.StreamLoadFormat.CSV.equals(options.getStreamLoadFormat())) { + httpPut.setHeader("columns", String.join(",", cols.stream().map(f -> String.format("`%s`", f)).collect(Collectors.toList()))); + } + if (null != options.getLoadProps()) { + for (Map.Entry entry : options.getLoadProps().entrySet()) { + httpPut.setHeader(entry.getKey(), String.valueOf(entry.getValue())); + } + } + httpPut.setHeader("Expect", "100-continue"); + httpPut.setHeader("label", label); + httpPut.setHeader("two_phase_commit", "false"); + httpPut.setHeader("Authorization", getBasicAuthHeader(options.getUsername(), options.getPassword())); + httpPut.setEntity(new ByteArrayEntity(data)); + httpPut.setConfig(RequestConfig.custom().setRedirectsEnabled(true).build()); + try ( CloseableHttpResponse resp = httpclient.execute(httpPut)) { + HttpEntity respEntity = getHttpEntity(resp); + if (respEntity == null) + return null; + return (Map)JSON.parse(EntityUtils.toString(respEntity)); + } + } + } + + private String getBasicAuthHeader(String username, String password) { + String auth = username + ":" + password; + byte[] encodedAuth = Base64.encodeBase64(auth.getBytes(StandardCharsets.UTF_8)); + return new StringBuilder("Basic ").append(new String(encodedAuth)).toString(); + } + + private HttpEntity getHttpEntity(CloseableHttpResponse resp) { + int code = resp.getStatusLine().getStatusCode(); + if (200 != code) { + LOG.warn("Request failed with code:{}", code); + return null; + } + HttpEntity respEntity = resp.getEntity(); + if (null == respEntity) { + LOG.warn("Request failed with empty response."); + return null; + } + return respEntity; + } + + private String getLoadHost() { + List hostList = options.getLoadUrlList(); + long tmp = pos + hostList.size(); + for (; pos < tmp; pos++) { + String host = new StringBuilder("http://").append(hostList.get((int) (pos % hostList.size()))).toString(); + if (checkConnection(host)) { + return host; + } + } + return null; + } + + private boolean checkConnection(String host) { + try { + URL url = new URL(host); + HttpURLConnection co = (HttpURLConnection) url.openConnection(); + co.setConnectTimeout(5000); + co.connect(); + co.disconnect(); + return true; + } catch (Exception e1) { + e1.printStackTrace(); + return false; + } + } +} diff --git a/doriswriter/src/main/java/com/alibaba/datax/plugin/writer/doriswriter/DorisUtil.java b/doriswriter/src/main/java/com/alibaba/datax/plugin/writer/doriswriter/DorisUtil.java new file mode 100644 index 00000000..5f5a6f34 --- /dev/null +++ b/doriswriter/src/main/java/com/alibaba/datax/plugin/writer/doriswriter/DorisUtil.java @@ -0,0 +1,105 @@ +package com.alibaba.datax.plugin.writer.doriswriter; + +import com.alibaba.datax.plugin.rdbms.util.DBUtil; +import com.alibaba.datax.plugin.rdbms.util.DataBaseType; +import com.alibaba.datax.plugin.rdbms.util.RdbmsException; +import com.alibaba.datax.plugin.rdbms.writer.Constant; +import com.alibaba.druid.sql.parser.ParserException; +import com.google.common.base.Strings; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.sql.Connection; +import java.sql.ResultSet; +import java.sql.Statement; +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; + +/** + * jdbc util + */ +public class DorisUtil { + private static final Logger LOG = LoggerFactory.getLogger(DorisUtil.class); + + private DorisUtil() {} + + public static List getDorisTableColumns( Connection conn, String databaseName, String tableName) { + String currentSql = String.format("SELECT COLUMN_NAME FROM `information_schema`.`COLUMNS` WHERE `TABLE_SCHEMA` = '%s' AND `TABLE_NAME` = '%s' ORDER BY `ORDINAL_POSITION` ASC;", databaseName, tableName); + List columns = new ArrayList<> (); + ResultSet rs = null; + try { + rs = DBUtil.query(conn, currentSql); + while (DBUtil.asyncResultSetNext(rs)) { + String colName = rs.getString("COLUMN_NAME"); + columns.add(colName); + } + return columns; + } catch (Exception e) { + throw RdbmsException.asQueryException(DataBaseType.MySql, e, currentSql, null, null); + } finally { + DBUtil.closeDBResources(rs, null, null); + } + } + + public static List renderPreOrPostSqls(List preOrPostSqls, String tableName) { + if (null == preOrPostSqls) { + return Collections.emptyList(); + } + List renderedSqls = new ArrayList<>(); + for (String sql : preOrPostSqls) { + if (! Strings.isNullOrEmpty(sql)) { + renderedSqls.add(sql.replace(Constant.TABLE_NAME_PLACEHOLDER, tableName)); + } + } + return renderedSqls; + } + + public static void executeSqls(Connection conn, List sqls) { + Statement stmt = null; + String currentSql = null; + try { + stmt = conn.createStatement(); + for (String sql : sqls) { + currentSql = sql; + DBUtil.executeSqlWithoutResultSet(stmt, sql); + } + } catch (Exception e) { + throw RdbmsException.asQueryException(DataBaseType.MySql, e, currentSql, null, null); + } finally { + DBUtil.closeDBResources(null, stmt, null); + } + } + + public static void preCheckPrePareSQL( Keys options) { + String table = options.getTable(); + List preSqls = options.getPreSqlList(); + List renderedPreSqls = DorisUtil.renderPreOrPostSqls(preSqls, table); + if (null != renderedPreSqls && !renderedPreSqls.isEmpty()) { + LOG.info("Begin to preCheck preSqls:[{}].", String.join(";", renderedPreSqls)); + for (String sql : renderedPreSqls) { + try { + DBUtil.sqlValid(sql, DataBaseType.MySql); + } catch ( ParserException e) { + throw RdbmsException.asPreSQLParserException(DataBaseType.MySql,e,sql); + } + } + } + } + + public static void preCheckPostSQL( Keys options) { + String table = options.getTable(); + List postSqls = options.getPostSqlList(); + List renderedPostSqls = DorisUtil.renderPreOrPostSqls(postSqls, table); + if (null != renderedPostSqls && !renderedPostSqls.isEmpty()) { + LOG.info("Begin to preCheck postSqls:[{}].", String.join(";", renderedPostSqls)); + for(String sql : renderedPostSqls) { + try { + DBUtil.sqlValid(sql, DataBaseType.MySql); + } catch (ParserException e){ + throw RdbmsException.asPostSQLParserException(DataBaseType.MySql,e,sql); + } + } + } + } +} diff --git a/doriswriter/src/main/java/com/alibaba/datax/plugin/writer/doriswriter/DorisWriter.java b/doriswriter/src/main/java/com/alibaba/datax/plugin/writer/doriswriter/DorisWriter.java new file mode 100644 index 00000000..b44d5440 --- /dev/null +++ b/doriswriter/src/main/java/com/alibaba/datax/plugin/writer/doriswriter/DorisWriter.java @@ -0,0 +1,164 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package com.alibaba.datax.plugin.writer.doriswriter; + +import com.alibaba.datax.common.element.Record; +import com.alibaba.datax.common.exception.DataXException; +import com.alibaba.datax.common.plugin.RecordReceiver; +import com.alibaba.datax.common.spi.Writer; +import com.alibaba.datax.common.util.Configuration; +import com.alibaba.datax.plugin.rdbms.util.DBUtil; +import com.alibaba.datax.plugin.rdbms.util.DBUtilErrorCode; +import com.alibaba.datax.plugin.rdbms.util.DataBaseType; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.sql.Connection; +import java.util.ArrayList; +import java.util.List; + +/** + * doris data writer + */ +public class DorisWriter extends Writer { + + public static class Job extends Writer.Job { + + private static final Logger LOG = LoggerFactory.getLogger(Job.class); + private Configuration originalConfig = null; + private Keys options; + + @Override + public void init() { + this.originalConfig = super.getPluginJobConf(); + options = new Keys (super.getPluginJobConf()); + options.doPretreatment(); + } + + @Override + public void preCheck(){ + this.init(); + DorisUtil.preCheckPrePareSQL(options); + DorisUtil.preCheckPostSQL(options); + } + + @Override + public void prepare() { + String username = options.getUsername(); + String password = options.getPassword(); + String jdbcUrl = options.getJdbcUrl(); + List renderedPreSqls = DorisUtil.renderPreOrPostSqls(options.getPreSqlList(), options.getTable()); + if (null != renderedPreSqls && !renderedPreSqls.isEmpty()) { + Connection conn = DBUtil.getConnection(DataBaseType.MySql, jdbcUrl, username, password); + LOG.info("Begin to execute preSqls:[{}]. context info:{}.", String.join(";", renderedPreSqls), jdbcUrl); + DorisUtil.executeSqls(conn, renderedPreSqls); + DBUtil.closeDBResources(null, null, conn); + } + } + + @Override + public List split(int mandatoryNumber) { + List configurations = new ArrayList<>(mandatoryNumber); + for (int i = 0; i < mandatoryNumber; i++) { + configurations.add(originalConfig); + } + return configurations; + } + + @Override + public void post() { + String username = options.getUsername(); + String password = options.getPassword(); + String jdbcUrl = options.getJdbcUrl(); + List renderedPostSqls = DorisUtil.renderPreOrPostSqls(options.getPostSqlList(), options.getTable()); + if (null != renderedPostSqls && !renderedPostSqls.isEmpty()) { + Connection conn = DBUtil.getConnection(DataBaseType.MySql, jdbcUrl, username, password); + LOG.info("Start to execute preSqls:[{}]. context info:{}.", String.join(";", renderedPostSqls), jdbcUrl); + DorisUtil.executeSqls(conn, renderedPostSqls); + DBUtil.closeDBResources(null, null, conn); + } + } + + @Override + public void destroy() { + } + + } + + public static class Task extends Writer.Task { + private DorisWriterManager writerManager; + private Keys options; + private DorisCodec rowCodec; + + @Override + public void init() { + options = new Keys (super.getPluginJobConf()); + if (options.isWildcardColumn()) { + Connection conn = DBUtil.getConnection(DataBaseType.MySql, options.getJdbcUrl(), options.getUsername(), options.getPassword()); + List columns = DorisUtil.getDorisTableColumns(conn, options.getDatabase(), options.getTable()); + options.setInfoCchemaColumns(columns); + } + writerManager = new DorisWriterManager(options); + rowCodec = DorisCodecFactory.createCodec(options); + } + + @Override + public void prepare() { + } + + public void startWrite(RecordReceiver recordReceiver) { + try { + Record record; + while ((record = recordReceiver.getFromReader()) != null) { + if (record.getColumnNumber() != options.getColumns().size()) { + throw DataXException + .asDataXException( + DBUtilErrorCode.CONF_ERROR, + String.format( + "There is an error in the column configuration information. " + + "This is because you have configured a task where the number of fields to be read from the source:%s " + + "is not equal to the number of fields to be written to the destination table:%s. " + + "Please check your configuration and make changes.", + record.getColumnNumber(), + options.getColumns().size())); + } + writerManager.writeRecord(rowCodec.codec(record)); + } + } catch (Exception e) { + throw DataXException.asDataXException(DBUtilErrorCode.WRITE_DATA_ERROR, e); + } + } + + @Override + public void post() { + try { + writerManager.close(); + } catch (Exception e) { + throw DataXException.asDataXException(DBUtilErrorCode.WRITE_DATA_ERROR, e); + } + } + + @Override + public void destroy() {} + + @Override + public boolean supportFailOver(){ + return false; + } + } +} diff --git a/doriswriter/src/main/java/com/alibaba/datax/plugin/writer/doriswriter/DorisWriterExcetion.java b/doriswriter/src/main/java/com/alibaba/datax/plugin/writer/doriswriter/DorisWriterExcetion.java new file mode 100644 index 00000000..7797d79f --- /dev/null +++ b/doriswriter/src/main/java/com/alibaba/datax/plugin/writer/doriswriter/DorisWriterExcetion.java @@ -0,0 +1,29 @@ +package com.alibaba.datax.plugin.writer.doriswriter; + +import java.io.IOException; +import java.util.Map; + +public class DorisWriterExcetion extends IOException { + + private final Map response; + private boolean reCreateLabel; + + public DorisWriterExcetion ( String message, Map response) { + super(message); + this.response = response; + } + + public DorisWriterExcetion ( String message, Map response, boolean reCreateLabel) { + super(message); + this.response = response; + this.reCreateLabel = reCreateLabel; + } + + public Map getFailedResponse() { + return response; + } + + public boolean needReCreateLabel() { + return reCreateLabel; + } +} diff --git a/doriswriter/src/main/java/com/alibaba/datax/plugin/writer/doriswriter/DorisWriterManager.java b/doriswriter/src/main/java/com/alibaba/datax/plugin/writer/doriswriter/DorisWriterManager.java new file mode 100644 index 00000000..f0ba6b52 --- /dev/null +++ b/doriswriter/src/main/java/com/alibaba/datax/plugin/writer/doriswriter/DorisWriterManager.java @@ -0,0 +1,192 @@ +package com.alibaba.datax.plugin.writer.doriswriter; + +import com.google.common.base.Strings; +import org.apache.commons.lang3.concurrent.BasicThreadFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.nio.charset.StandardCharsets; +import java.util.ArrayList; +import java.util.List; +import java.util.UUID; +import java.util.concurrent.Executors; +import java.util.concurrent.LinkedBlockingDeque; +import java.util.concurrent.ScheduledExecutorService; +import java.util.concurrent.ScheduledFuture; +import java.util.concurrent.TimeUnit; + +public class DorisWriterManager { + + private static final Logger LOG = LoggerFactory.getLogger(DorisWriterManager.class); + + private final DorisStreamLoadObserver visitor; + private final Keys options; + private final List buffer = new ArrayList<> (); + private int batchCount = 0; + private long batchSize = 0; + private volatile boolean closed = false; + private volatile Exception flushException; + private final LinkedBlockingDeque< WriterTuple > flushQueue; + private ScheduledExecutorService scheduler; + private ScheduledFuture scheduledFuture; + + public DorisWriterManager( Keys options) { + this.options = options; + this.visitor = new DorisStreamLoadObserver (options); + flushQueue = new LinkedBlockingDeque<>(options.getFlushQueueLength()); + this.startScheduler(); + this.startAsyncFlushing(); + } + + public void startScheduler() { + stopScheduler(); + this.scheduler = Executors.newScheduledThreadPool(1, new BasicThreadFactory.Builder().namingPattern("Doris-interval-flush").daemon(true).build()); + this.scheduledFuture = this.scheduler.schedule(() -> { + synchronized (DorisWriterManager.this) { + if (!closed) { + try { + String label = createBatchLabel(); + LOG.info(String.format("Doris interval Sinking triggered: label[%s].", label)); + if (batchCount == 0) { + startScheduler(); + } + flush(label, false); + } catch (Exception e) { + flushException = e; + } + } + } + }, options.getFlushInterval(), TimeUnit.MILLISECONDS); + } + + public void stopScheduler() { + if (this.scheduledFuture != null) { + scheduledFuture.cancel(false); + this.scheduler.shutdown(); + } + } + + public final synchronized void writeRecord(String record) throws IOException { + checkFlushException(); + try { + byte[] bts = record.getBytes(StandardCharsets.UTF_8); + buffer.add(bts); + batchCount++; + batchSize += bts.length; + if (batchCount >= options.getBatchRows() || batchSize >= options.getBatchSize()) { + String label = createBatchLabel(); + LOG.debug(String.format("Doris buffer Sinking triggered: rows[%d] label[%s].", batchCount, label)); + flush(label, false); + } + } catch (Exception e) { + throw new IOException("Writing records to Doris failed.", e); + } + } + + public synchronized void flush(String label, boolean waitUtilDone) throws Exception { + checkFlushException(); + if (batchCount == 0) { + if (waitUtilDone) { + waitAsyncFlushingDone(); + } + return; + } + flushQueue.put(new WriterTuple (label, batchSize, new ArrayList<>(buffer))); + if (waitUtilDone) { + // wait the last flush + waitAsyncFlushingDone(); + } + buffer.clear(); + batchCount = 0; + batchSize = 0; + } + + public synchronized void close() { + if (!closed) { + closed = true; + try { + String label = createBatchLabel(); + if (batchCount > 0) LOG.debug(String.format("Doris Sink is about to close: label[%s].", label)); + flush(label, true); + } catch (Exception e) { + throw new RuntimeException("Writing records to Doris failed.", e); + } + } + checkFlushException(); + } + + public String createBatchLabel() { + StringBuilder sb = new StringBuilder(); + if (! Strings.isNullOrEmpty(options.getLabelPrefix())) { + sb.append(options.getLabelPrefix()); + } + return sb.append(UUID.randomUUID().toString()) + .toString(); + } + + private void startAsyncFlushing() { + // start flush thread + Thread flushThread = new Thread(new Runnable(){ + public void run() { + while(true) { + try { + asyncFlush(); + } catch (Exception e) { + flushException = e; + } + } + } + }); + flushThread.setDaemon(true); + flushThread.start(); + } + + private void waitAsyncFlushingDone() throws InterruptedException { + // wait previous flushings + for (int i = 0; i <= options.getFlushQueueLength(); i++) { + flushQueue.put(new WriterTuple ("", 0l, null)); + } + checkFlushException(); + } + + private void asyncFlush() throws Exception { + WriterTuple flushData = flushQueue.take(); + if (Strings.isNullOrEmpty(flushData.getLabel())) { + return; + } + stopScheduler(); + LOG.debug(String.format("Async stream load: rows[%d] bytes[%d] label[%s].", flushData.getRows().size(), flushData.getBytes(), flushData.getLabel())); + for (int i = 0; i <= options.getMaxRetries(); i++) { + try { + // flush to Doris with stream load + visitor.streamLoad(flushData); + LOG.info(String.format("Async stream load finished: label[%s].", flushData.getLabel())); + startScheduler(); + break; + } catch (Exception e) { + LOG.warn("Failed to flush batch data to Doris, retry times = {}", i, e); + if (i >= options.getMaxRetries()) { + throw new IOException(e); + } + if (e instanceof DorisWriterExcetion && (( DorisWriterExcetion )e).needReCreateLabel()) { + String newLabel = createBatchLabel(); + LOG.warn(String.format("Batch label changed from [%s] to [%s]", flushData.getLabel(), newLabel)); + flushData.setLabel(newLabel); + } + try { + Thread.sleep(1000l * Math.min(i + 1, 10)); + } catch (InterruptedException ex) { + Thread.currentThread().interrupt(); + throw new IOException("Unable to flush, interrupted while doing another attempt", e); + } + } + } + } + + private void checkFlushException() { + if (flushException != null) { + throw new RuntimeException("Writing records to Doris failed.", flushException); + } + } +} diff --git a/doriswriter/src/main/java/com/alibaba/datax/plugin/writer/doriswriter/Keys.java b/doriswriter/src/main/java/com/alibaba/datax/plugin/writer/doriswriter/Keys.java new file mode 100644 index 00000000..e460e76b --- /dev/null +++ b/doriswriter/src/main/java/com/alibaba/datax/plugin/writer/doriswriter/Keys.java @@ -0,0 +1,177 @@ +package com.alibaba.datax.plugin.writer.doriswriter; + +import com.alibaba.datax.common.exception.DataXException; +import com.alibaba.datax.common.util.Configuration; +import com.alibaba.datax.plugin.rdbms.util.DBUtilErrorCode; + +import java.io.Serializable; +import java.util.List; +import java.util.Map; +import java.util.stream.Collectors; + +public class Keys implements Serializable { + + private static final long serialVersionUID = 1l; + private static final int MAX_RETRIES = 3; + private static final int BATCH_ROWS = 500000; + private static final long DEFAULT_FLUSH_INTERVAL = 30000; + + private static final String LOAD_PROPS_FORMAT = "format"; + public enum StreamLoadFormat { + CSV, JSON; + } + + private static final String USERNAME = "username"; + private static final String PASSWORD = "password"; + private static final String DATABASE = "connection[0].selectedDatabase"; + private static final String TABLE = "connection[0].table[0]"; + private static final String COLUMN = "column"; + private static final String PRE_SQL = "preSql"; + private static final String POST_SQL = "postSql"; + private static final String JDBC_URL = "connection[0].jdbcUrl"; + private static final String LABEL_PREFIX = "labelPrefix"; + private static final String MAX_BATCH_ROWS = "maxBatchRows"; + private static final String MAX_BATCH_SIZE = "batchSize"; + private static final String FLUSH_INTERVAL = "flushInterval"; + private static final String LOAD_URL = "loadUrl"; + private static final String FLUSH_QUEUE_LENGTH = "flushQueueLength"; + private static final String LOAD_PROPS = "loadProps"; + + private static final String DEFAULT_LABEL_PREFIX = "datax_doris_writer_"; + + private static final long DEFAULT_MAX_BATCH_SIZE = 90 * 1024 * 1024; //default 90M + + private final Configuration options; + + private List infoSchemaColumns; + private List userSetColumns; + private boolean isWildcardColumn; + + public Keys ( Configuration options) { + this.options = options; + this.userSetColumns = options.getList(COLUMN, String.class).stream().map(str -> str.replace("`", "")).collect(Collectors.toList()); + if (1 == options.getList(COLUMN, String.class).size() && "*".trim().equals(options.getList(COLUMN, String.class).get(0))) { + this.isWildcardColumn = true; + } + } + + public void doPretreatment() { + validateRequired(); + validateStreamLoadUrl(); + } + + public String getJdbcUrl() { + return options.getString(JDBC_URL); + } + + public String getDatabase() { + return options.getString(DATABASE); + } + + public String getTable() { + return options.getString(TABLE); + } + + public String getUsername() { + return options.getString(USERNAME); + } + + public String getPassword() { + return options.getString(PASSWORD); + } + + public String getLabelPrefix() { + String label = options.getString(LABEL_PREFIX); + return null == label ? DEFAULT_LABEL_PREFIX : label; + } + + public List getLoadUrlList() { + return options.getList(LOAD_URL, String.class); + } + + public List getColumns() { + if (isWildcardColumn) { + return this.infoSchemaColumns; + } + return this.userSetColumns; + } + + public boolean isWildcardColumn() { + return this.isWildcardColumn; + } + + public void setInfoCchemaColumns(List cols) { + this.infoSchemaColumns = cols; + } + + public List getPreSqlList() { + return options.getList(PRE_SQL, String.class); + } + + public List getPostSqlList() { + return options.getList(POST_SQL, String.class); + } + + public Map getLoadProps() { + return options.getMap(LOAD_PROPS); + } + + public int getMaxRetries() { + return MAX_RETRIES; + } + + public int getBatchRows() { + Integer rows = options.getInt(MAX_BATCH_ROWS); + return null == rows ? BATCH_ROWS : rows; + } + + public long getBatchSize() { + Long size = options.getLong(MAX_BATCH_SIZE); + return null == size ? DEFAULT_MAX_BATCH_SIZE : size; + } + + public long getFlushInterval() { + Long interval = options.getLong(FLUSH_INTERVAL); + return null == interval ? DEFAULT_FLUSH_INTERVAL : interval; + } + + public int getFlushQueueLength() { + Integer len = options.getInt(FLUSH_QUEUE_LENGTH); + return null == len ? 1 : len; + } + + public StreamLoadFormat getStreamLoadFormat() { + Map loadProps = getLoadProps(); + if (null == loadProps) { + return StreamLoadFormat.CSV; + } + if (loadProps.containsKey(LOAD_PROPS_FORMAT) + && StreamLoadFormat.JSON.name().equalsIgnoreCase(String.valueOf(loadProps.get(LOAD_PROPS_FORMAT)))) { + return StreamLoadFormat.JSON; + } + return StreamLoadFormat.CSV; + } + + private void validateStreamLoadUrl() { + List urlList = getLoadUrlList(); + for (String host : urlList) { + if (host.split(":").length < 2) { + throw DataXException.asDataXException(DBUtilErrorCode.CONF_ERROR, + "The format of loadUrl is not correct, please enter:[`fe_ip:fe_http_ip;fe_ip:fe_http_ip`]."); + } + } + } + + private void validateRequired() { + final String[] requiredOptionKeys = new String[]{ + USERNAME, + DATABASE, + TABLE, + COLUMN, + LOAD_URL + }; + for (String optionKey : requiredOptionKeys) { + options.getNecessaryValue(optionKey, DBUtilErrorCode.REQUIRED_VALUE); + } + } +} diff --git a/doriswriter/src/main/java/com/alibaba/datax/plugin/writer/doriswriter/WriterTuple.java b/doriswriter/src/main/java/com/alibaba/datax/plugin/writer/doriswriter/WriterTuple.java new file mode 100644 index 00000000..32e0b341 --- /dev/null +++ b/doriswriter/src/main/java/com/alibaba/datax/plugin/writer/doriswriter/WriterTuple.java @@ -0,0 +1,20 @@ +package com.alibaba.datax.plugin.writer.doriswriter; + +import java.util.List; + +public class WriterTuple { + private String label; + private Long bytes; + private List rows; + + public WriterTuple ( String label, Long bytes, List rows){ + this.label = label; + this.rows = rows; + this.bytes = bytes; + } + + public String getLabel() { return label; } + public void setLabel(String label) { this.label = label; } + public Long getBytes() { return bytes; } + public List getRows() { return rows; } +} diff --git a/doriswriter/src/main/resources/plugin.json b/doriswriter/src/main/resources/plugin.json new file mode 100644 index 00000000..69dc31a2 --- /dev/null +++ b/doriswriter/src/main/resources/plugin.json @@ -0,0 +1,6 @@ +{ + "name": "doriswriter", + "class": "com.alibaba.datax.plugin.writer.doriswriter.DorisWriter", + "description": "apache doris writer plugin", + "developer": "apche doris" +} diff --git a/doriswriter/src/main/resources/plugin_job_template.json b/doriswriter/src/main/resources/plugin_job_template.json new file mode 100644 index 00000000..0187e539 --- /dev/null +++ b/doriswriter/src/main/resources/plugin_job_template.json @@ -0,0 +1,20 @@ +{ + "name": "doriswriter", + "parameter": { + "username": "", + "password": "", + "column": [], + "preSql": [], + "postSql": [], + "beLoadUrl": [], + "loadUrl": [], + "loadProps": {}, + "connection": [ + { + "jdbcUrl": "", + "selectedDatabase": "", + "table": [] + } + ] + } +} \ No newline at end of file diff --git a/drdsreader/doc/drdsreader.md b/drdsreader/doc/drdsreader.md index 25df9200..c54e6bd1 100644 --- a/drdsreader/doc/drdsreader.md +++ b/drdsreader/doc/drdsreader.md @@ -50,7 +50,7 @@ DRDS的插件目前DataX只适配了Mysql引擎的场景,DRDS对于DataX而言 // 数据库连接密码 "password": "root", "column": [ - "id","name" + "id","name" ], "connection": [ { diff --git a/elasticsearchwriter/pom.xml b/elasticsearchwriter/pom.xml index a60dbd88..8699c6e5 100644 --- a/elasticsearchwriter/pom.xml +++ b/elasticsearchwriter/pom.xml @@ -35,12 +35,12 @@ io.searchbox jest-common - 2.4.0 + 6.3.1 io.searchbox jest - 2.4.0 + 6.3.1 joda-time diff --git a/elasticsearchwriter/src/main/java/com/alibaba/datax/plugin/writer/elasticsearchwriter/ESClient.java b/elasticsearchwriter/src/main/java/com/alibaba/datax/plugin/writer/elasticsearchwriter/ESClient.java deleted file mode 100644 index 34bb7e54..00000000 --- a/elasticsearchwriter/src/main/java/com/alibaba/datax/plugin/writer/elasticsearchwriter/ESClient.java +++ /dev/null @@ -1,236 +0,0 @@ -package com.alibaba.datax.plugin.writer.elasticsearchwriter; - -import com.google.gson.Gson; -import com.google.gson.JsonElement; -import com.google.gson.JsonObject; -import com.google.gson.JsonParser; -import io.searchbox.action.Action; -import io.searchbox.client.JestClient; -import io.searchbox.client.JestClientFactory; -import io.searchbox.client.JestResult; -import io.searchbox.client.config.HttpClientConfig; -import io.searchbox.client.config.HttpClientConfig.Builder; -import io.searchbox.core.Bulk; -import io.searchbox.indices.CreateIndex; -import io.searchbox.indices.DeleteIndex; -import io.searchbox.indices.IndicesExists; -import io.searchbox.indices.aliases.*; -import io.searchbox.indices.mapping.PutMapping; -import org.apache.http.HttpHost; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import java.io.IOException; -import java.util.ArrayList; -import java.util.List; -import java.util.Map; -import java.util.concurrent.TimeUnit; - -/** - * Created by xiongfeng.bxf on 17/2/8. - */ -public class ESClient { - private static final Logger log = LoggerFactory.getLogger(ESClient.class); - - private JestClient jestClient; - - public JestClient getClient() { - return jestClient; - } - - public void createClient(String endpoint, - String user, - String passwd, - boolean multiThread, - int readTimeout, - boolean compression, - boolean discovery) { - - JestClientFactory factory = new JestClientFactory(); - Builder httpClientConfig = new HttpClientConfig - .Builder(endpoint) - .setPreemptiveAuth(new HttpHost(endpoint)) - .multiThreaded(multiThread) - .connTimeout(30000) - .readTimeout(readTimeout) - .maxTotalConnection(200) - .requestCompressionEnabled(compression) - .discoveryEnabled(discovery) - .discoveryFrequency(5l, TimeUnit.MINUTES); - - if (!("".equals(user) || "".equals(passwd))) { - httpClientConfig.defaultCredentials(user, passwd); - } - - factory.setHttpClientConfig(httpClientConfig.build()); - - jestClient = factory.getObject(); - } - - public boolean indicesExists(String indexName) throws Exception { - boolean isIndicesExists = false; - JestResult rst = jestClient.execute(new IndicesExists.Builder(indexName).build()); - if (rst.isSucceeded()) { - isIndicesExists = true; - } else { - switch (rst.getResponseCode()) { - case 404: - isIndicesExists = false; - break; - case 401: - // 无权访问 - default: - log.warn(rst.getErrorMessage()); - break; - } - } - return isIndicesExists; - } - - public boolean deleteIndex(String indexName) throws Exception { - log.info("delete index " + indexName); - if (indicesExists(indexName)) { - JestResult rst = execute(new DeleteIndex.Builder(indexName).build()); - if (!rst.isSucceeded()) { - return false; - } - } else { - log.info("index cannot found, skip delete " + indexName); - } - return true; - } - - public boolean createIndex(String indexName, String typeName, - Object mappings, String settings, boolean dynamic) throws Exception { - JestResult rst = null; - if (!indicesExists(indexName)) { - log.info("create index " + indexName); - rst = jestClient.execute( - new CreateIndex.Builder(indexName) - .settings(settings) - .setParameter("master_timeout", "5m") - .build() - ); - //index_already_exists_exception - if (!rst.isSucceeded()) { - if (getStatus(rst) == 400) { - log.info(String.format("index [%s] already exists", indexName)); - return true; - } else { - log.error(rst.getErrorMessage()); - return false; - } - } else { - log.info(String.format("create [%s] index success", indexName)); - } - } - - int idx = 0; - while (idx < 5) { - if (indicesExists(indexName)) { - break; - } - Thread.sleep(2000); - idx ++; - } - if (idx >= 5) { - return false; - } - - if (dynamic) { - log.info("ignore mappings"); - return true; - } - log.info("create mappings for " + indexName + " " + mappings); - rst = jestClient.execute(new PutMapping.Builder(indexName, typeName, mappings) - .setParameter("master_timeout", "5m").build()); - if (!rst.isSucceeded()) { - if (getStatus(rst) == 400) { - log.info(String.format("index [%s] mappings already exists", indexName)); - } else { - log.error(rst.getErrorMessage()); - return false; - } - } else { - log.info(String.format("index [%s] put mappings success", indexName)); - } - return true; - } - - public JestResult execute(Action clientRequest) throws Exception { - JestResult rst = null; - rst = jestClient.execute(clientRequest); - if (!rst.isSucceeded()) { - //log.warn(rst.getErrorMessage()); - } - return rst; - } - - public Integer getStatus(JestResult rst) { - JsonObject jsonObject = rst.getJsonObject(); - if (jsonObject.has("status")) { - return jsonObject.get("status").getAsInt(); - } - return 600; - } - - public boolean isBulkResult(JestResult rst) { - JsonObject jsonObject = rst.getJsonObject(); - return jsonObject.has("items"); - } - - - public boolean alias(String indexname, String aliasname, boolean needClean) throws IOException { - GetAliases getAliases = new GetAliases.Builder().addIndex(aliasname).build(); - AliasMapping addAliasMapping = new AddAliasMapping.Builder(indexname, aliasname).build(); - JestResult rst = jestClient.execute(getAliases); - log.info(rst.getJsonString()); - List list = new ArrayList(); - if (rst.isSucceeded()) { - JsonParser jp = new JsonParser(); - JsonObject jo = (JsonObject)jp.parse(rst.getJsonString()); - for(Map.Entry entry : jo.entrySet()){ - String tindex = entry.getKey(); - if (indexname.equals(tindex)) { - continue; - } - AliasMapping m = new RemoveAliasMapping.Builder(tindex, aliasname).build(); - String s = new Gson().toJson(m.getData()); - log.info(s); - if (needClean) { - list.add(m); - } - } - } - - ModifyAliases modifyAliases = new ModifyAliases.Builder(addAliasMapping).addAlias(list).setParameter("master_timeout", "5m").build(); - rst = jestClient.execute(modifyAliases); - if (!rst.isSucceeded()) { - log.error(rst.getErrorMessage()); - return false; - } - return true; - } - - public JestResult bulkInsert(Bulk.Builder bulk, int trySize) throws Exception { - // es_rejected_execution_exception - // illegal_argument_exception - // cluster_block_exception - JestResult rst = null; - rst = jestClient.execute(bulk.build()); - if (!rst.isSucceeded()) { - log.warn(rst.getErrorMessage()); - } - return rst; - } - - /** - * 关闭JestClient客户端 - * - */ - public void closeJestClient() { - if (jestClient != null) { - jestClient.shutdownClient(); - } - } -} diff --git a/elasticsearchwriter/src/main/java/com/alibaba/datax/plugin/writer/elasticsearchwriter/ESColumn.java b/elasticsearchwriter/src/main/java/com/alibaba/datax/plugin/writer/elasticsearchwriter/ESColumn.java deleted file mode 100644 index 8990d77c..00000000 --- a/elasticsearchwriter/src/main/java/com/alibaba/datax/plugin/writer/elasticsearchwriter/ESColumn.java +++ /dev/null @@ -1,65 +0,0 @@ -package com.alibaba.datax.plugin.writer.elasticsearchwriter; - -/** - * Created by xiongfeng.bxf on 17/3/2. - */ -public class ESColumn { - - private String name;//: "appkey", - - private String type;//": "TEXT", - - private String timezone; - - private String format; - - private Boolean array; - - public void setName(String name) { - this.name = name; - } - - public void setType(String type) { - this.type = type; - } - - public void setTimeZone(String timezone) { - this.timezone = timezone; - } - - public void setFormat(String format) { - this.format = format; - } - - public String getName() { - return name; - } - - public String getType() { - return type; - } - - public String getTimezone() { - return timezone; - } - - public String getFormat() { - return format; - } - - public void setTimezone(String timezone) { - this.timezone = timezone; - } - - public Boolean isArray() { - return array; - } - - public void setArray(Boolean array) { - this.array = array; - } - - public Boolean getArray() { - return array; - } -} diff --git a/elasticsearchwriter/src/main/java/com/alibaba/datax/plugin/writer/elasticsearchwriter/ESWriter.java b/elasticsearchwriter/src/main/java/com/alibaba/datax/plugin/writer/elasticsearchwriter/ESWriter.java deleted file mode 100644 index eb0e9a81..00000000 --- a/elasticsearchwriter/src/main/java/com/alibaba/datax/plugin/writer/elasticsearchwriter/ESWriter.java +++ /dev/null @@ -1,460 +0,0 @@ -package com.alibaba.datax.plugin.writer.elasticsearchwriter; - -import com.alibaba.datax.common.element.Column; -import com.alibaba.datax.common.element.Record; -import com.alibaba.datax.common.exception.DataXException; -import com.alibaba.datax.common.plugin.RecordReceiver; -import com.alibaba.datax.common.spi.Writer; -import com.alibaba.datax.common.util.Configuration; -import com.alibaba.datax.common.util.RetryUtil; -import com.alibaba.fastjson.JSON; -import com.alibaba.fastjson.JSONObject; -import com.alibaba.fastjson.TypeReference; -import io.searchbox.client.JestResult; -import io.searchbox.core.Bulk; -import io.searchbox.core.BulkResult; -import io.searchbox.core.Index; -import org.joda.time.DateTime; -import org.joda.time.DateTimeZone; -import org.joda.time.format.DateTimeFormat; -import org.joda.time.format.DateTimeFormatter; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import java.io.IOException; -import java.net.URLEncoder; -import java.util.*; -import java.util.concurrent.Callable; - -public class ESWriter extends Writer { - private final static String WRITE_COLUMNS = "write_columns"; - - public static class Job extends Writer.Job { - private static final Logger log = LoggerFactory.getLogger(Job.class); - - private Configuration conf = null; - - @Override - public void init() { - this.conf = super.getPluginJobConf(); - } - - @Override - public void prepare() { - /** - * 注意:此方法仅执行一次。 - * 最佳实践:如果 Job 中有需要进行数据同步之前的处理,可以在此处完成,如果没有必要则可以直接去掉。 - */ - ESClient esClient = new ESClient(); - esClient.createClient(Key.getEndpoint(conf), - Key.getAccessID(conf), - Key.getAccessKey(conf), - false, - 300000, - false, - false); - - String indexName = Key.getIndexName(conf); - String typeName = Key.getTypeName(conf); - boolean dynamic = Key.getDynamic(conf); - String mappings = genMappings(typeName); - String settings = JSONObject.toJSONString( - Key.getSettings(conf) - ); - log.info(String.format("index:[%s], type:[%s], mappings:[%s]", indexName, typeName, mappings)); - - try { - boolean isIndicesExists = esClient.indicesExists(indexName); - if (Key.isCleanup(this.conf) && isIndicesExists) { - esClient.deleteIndex(indexName); - } - // 强制创建,内部自动忽略已存在的情况 - if (!esClient.createIndex(indexName, typeName, mappings, settings, dynamic)) { - throw new IOException("create index or mapping failed"); - } - } catch (Exception ex) { - throw DataXException.asDataXException(ESWriterErrorCode.ES_MAPPINGS, ex.toString()); - } - esClient.closeJestClient(); - } - - private String genMappings(String typeName) { - String mappings = null; - Map propMap = new HashMap(); - List columnList = new ArrayList(); - - List column = conf.getList("column"); - if (column != null) { - for (Object col : column) { - JSONObject jo = JSONObject.parseObject(col.toString()); - String colName = jo.getString("name"); - String colTypeStr = jo.getString("type"); - if (colTypeStr == null) { - throw DataXException.asDataXException(ESWriterErrorCode.BAD_CONFIG_VALUE, col.toString() + " column must have type"); - } - ESFieldType colType = ESFieldType.getESFieldType(colTypeStr); - if (colType == null) { - throw DataXException.asDataXException(ESWriterErrorCode.BAD_CONFIG_VALUE, col.toString() + " unsupported type"); - } - - ESColumn columnItem = new ESColumn(); - - if (colName.equals(Key.PRIMARY_KEY_COLUMN_NAME)) { - // 兼容已有版本 - colType = ESFieldType.ID; - colTypeStr = "id"; - } - - columnItem.setName(colName); - columnItem.setType(colTypeStr); - - if (colType == ESFieldType.ID) { - columnList.add(columnItem); - // 如果是id,则properties为空 - continue; - } - - Boolean array = jo.getBoolean("array"); - if (array != null) { - columnItem.setArray(array); - } - Map field = new HashMap(); - field.put("type", colTypeStr); - //https://www.elastic.co/guide/en/elasticsearch/reference/5.2/breaking_50_mapping_changes.html#_literal_index_literal_property - // https://www.elastic.co/guide/en/elasticsearch/guide/2.x/_deep_dive_on_doc_values.html#_disabling_doc_values - field.put("doc_values", jo.getBoolean("doc_values")); - field.put("ignore_above", jo.getInteger("ignore_above")); - field.put("index", jo.getBoolean("index")); - - switch (colType) { - case STRING: - // 兼容string类型,ES5之前版本 - break; - case KEYWORD: - // https://www.elastic.co/guide/en/elasticsearch/reference/current/tune-for-search-speed.html#_warm_up_global_ordinals - field.put("eager_global_ordinals", jo.getBoolean("eager_global_ordinals")); - case TEXT: - field.put("analyzer", jo.getString("analyzer")); - // 优化disk使用,也同步会提高index性能 - // https://www.elastic.co/guide/en/elasticsearch/reference/current/tune-for-disk-usage.html - field.put("norms", jo.getBoolean("norms")); - field.put("index_options", jo.getBoolean("index_options")); - break; - case DATE: - columnItem.setTimeZone(jo.getString("timezone")); - columnItem.setFormat(jo.getString("format")); - // 后面时间会处理为带时区的标准时间,所以不需要给ES指定格式 - /* - if (jo.getString("format") != null) { - field.put("format", jo.getString("format")); - } else { - //field.put("format", "strict_date_optional_time||epoch_millis||yyyy-MM-dd HH:mm:ss||yyyy-MM-dd"); - } - */ - break; - case GEO_SHAPE: - field.put("tree", jo.getString("tree")); - field.put("precision", jo.getString("precision")); - default: - break; - } - propMap.put(colName, field); - columnList.add(columnItem); - } - } - - conf.set(WRITE_COLUMNS, JSON.toJSONString(columnList)); - - log.info(JSON.toJSONString(columnList)); - - Map rootMappings = new HashMap(); - Map typeMappings = new HashMap(); - typeMappings.put("properties", propMap); - rootMappings.put(typeName, typeMappings); - - mappings = JSON.toJSONString(rootMappings); - - if (mappings == null || "".equals(mappings)) { - throw DataXException.asDataXException(ESWriterErrorCode.BAD_CONFIG_VALUE, "must have mappings"); - } - - return mappings; - } - - @Override - public List split(int mandatoryNumber) { - List configurations = new ArrayList(mandatoryNumber); - for (int i = 0; i < mandatoryNumber; i++) { - configurations.add(conf); - } - return configurations; - } - - @Override - public void post() { - ESClient esClient = new ESClient(); - esClient.createClient(Key.getEndpoint(conf), - Key.getAccessID(conf), - Key.getAccessKey(conf), - false, - 300000, - false, - false); - String alias = Key.getAlias(conf); - if (!"".equals(alias)) { - log.info(String.format("alias [%s] to [%s]", alias, Key.getIndexName(conf))); - try { - esClient.alias(Key.getIndexName(conf), alias, Key.isNeedCleanAlias(conf)); - } catch (IOException e) { - throw DataXException.asDataXException(ESWriterErrorCode.ES_ALIAS_MODIFY, e); - } - } - } - - @Override - public void destroy() { - - } - } - - public static class Task extends Writer.Task { - - private static final Logger log = LoggerFactory.getLogger(Job.class); - - private Configuration conf; - - - ESClient esClient = null; - private List typeList; - private List columnList; - - private int trySize; - private int batchSize; - private String index; - private String type; - private String splitter; - - @Override - public void init() { - this.conf = super.getPluginJobConf(); - index = Key.getIndexName(conf); - type = Key.getTypeName(conf); - - trySize = Key.getTrySize(conf); - batchSize = Key.getBatchSize(conf); - splitter = Key.getSplitter(conf); - columnList = JSON.parseObject(this.conf.getString(WRITE_COLUMNS), new TypeReference>() { - }); - - typeList = new ArrayList(); - - for (ESColumn col : columnList) { - typeList.add(ESFieldType.getESFieldType(col.getType())); - } - - esClient = new ESClient(); - } - - @Override - public void prepare() { - esClient.createClient(Key.getEndpoint(conf), - Key.getAccessID(conf), - Key.getAccessKey(conf), - Key.isMultiThread(conf), - Key.getTimeout(conf), - Key.isCompression(conf), - Key.isDiscovery(conf)); - } - - @Override - public void startWrite(RecordReceiver recordReceiver) { - List writerBuffer = new ArrayList(this.batchSize); - Record record = null; - long total = 0; - while ((record = recordReceiver.getFromReader()) != null) { - writerBuffer.add(record); - if (writerBuffer.size() >= this.batchSize) { - total += doBatchInsert(writerBuffer); - writerBuffer.clear(); - } - } - - if (!writerBuffer.isEmpty()) { - total += doBatchInsert(writerBuffer); - writerBuffer.clear(); - } - - String msg = String.format("task end, write size :%d", total); - getTaskPluginCollector().collectMessage("writesize", String.valueOf(total)); - log.info(msg); - esClient.closeJestClient(); - } - - private String getDateStr(ESColumn esColumn, Column column) { - DateTime date = null; - DateTimeZone dtz = DateTimeZone.getDefault(); - if (esColumn.getTimezone() != null) { - // 所有时区参考 http://www.joda.org/joda-time/timezones.html - dtz = DateTimeZone.forID(esColumn.getTimezone()); - } - if (column.getType() != Column.Type.DATE && esColumn.getFormat() != null) { - DateTimeFormatter formatter = DateTimeFormat.forPattern(esColumn.getFormat()); - date = formatter.withZone(dtz).parseDateTime(column.asString()); - return date.toString(); - } else if (column.getType() == Column.Type.DATE) { - date = new DateTime(column.asLong(), dtz); - return date.toString(); - } else { - return column.asString(); - } - } - - private long doBatchInsert(final List writerBuffer) { - Map data = null; - final Bulk.Builder bulkaction = new Bulk.Builder().defaultIndex(this.index).defaultType(this.type); - for (Record record : writerBuffer) { - data = new HashMap(); - String id = null; - for (int i = 0; i < record.getColumnNumber(); i++) { - Column column = record.getColumn(i); - String columnName = columnList.get(i).getName(); - ESFieldType columnType = typeList.get(i); - //如果是数组类型,那它传入的必是字符串类型 - if (columnList.get(i).isArray() != null && columnList.get(i).isArray()) { - String[] dataList = column.asString().split(splitter); - if (!columnType.equals(ESFieldType.DATE)) { - data.put(columnName, dataList); - } else { - for (int pos = 0; pos < dataList.length; pos++) { - dataList[pos] = getDateStr(columnList.get(i), column); - } - data.put(columnName, dataList); - } - } else { - switch (columnType) { - case ID: - if (id != null) { - id += record.getColumn(i).asString(); - } else { - id = record.getColumn(i).asString(); - } - break; - case DATE: - try { - String dateStr = getDateStr(columnList.get(i), column); - data.put(columnName, dateStr); - } catch (Exception e) { - getTaskPluginCollector().collectDirtyRecord(record, String.format("时间类型解析失败 [%s:%s] exception: %s", columnName, column.toString(), e.toString())); - } - break; - case KEYWORD: - case STRING: - case TEXT: - case IP: - case GEO_POINT: - data.put(columnName, column.asString()); - break; - case BOOLEAN: - data.put(columnName, column.asBoolean()); - break; - case BYTE: - case BINARY: - data.put(columnName, column.asBytes()); - break; - case LONG: - data.put(columnName, column.asLong()); - break; - case INTEGER: - data.put(columnName, column.asBigInteger()); - break; - case SHORT: - data.put(columnName, column.asBigInteger()); - break; - case FLOAT: - case DOUBLE: - data.put(columnName, column.asDouble()); - break; - case NESTED: - case OBJECT: - case GEO_SHAPE: - data.put(columnName, JSON.parse(column.asString())); - break; - default: - getTaskPluginCollector().collectDirtyRecord(record, "类型错误:不支持的类型:" + columnType + " " + columnName); - } - } - } - - if (id == null) { - //id = UUID.randomUUID().toString(); - bulkaction.addAction(new Index.Builder(data).build()); - } else { - bulkaction.addAction(new Index.Builder(data).id(id).build()); - } - } - - try { - return RetryUtil.executeWithRetry(new Callable() { - @Override - public Integer call() throws Exception { - JestResult jestResult = esClient.bulkInsert(bulkaction, 1); - if (jestResult.isSucceeded()) { - return writerBuffer.size(); - } - - String msg = String.format("response code: [%d] error :[%s]", jestResult.getResponseCode(), jestResult.getErrorMessage()); - log.warn(msg); - if (esClient.isBulkResult(jestResult)) { - BulkResult brst = (BulkResult) jestResult; - List failedItems = brst.getFailedItems(); - for (BulkResult.BulkResultItem item : failedItems) { - if (item.status != 400) { - // 400 BAD_REQUEST 如果非数据异常,请求异常,则不允许忽略 - throw DataXException.asDataXException(ESWriterErrorCode.ES_INDEX_INSERT, String.format("status:[%d], error: %s", item.status, item.error)); - } else { - // 如果用户选择不忽略解析错误,则抛异常,默认为忽略 - if (!Key.isIgnoreParseError(conf)) { - throw DataXException.asDataXException(ESWriterErrorCode.ES_INDEX_INSERT, String.format("status:[%d], error: %s, config not ignoreParseError so throw this error", item.status, item.error)); - } - } - } - - List items = brst.getItems(); - for (int idx = 0; idx < items.size(); ++idx) { - BulkResult.BulkResultItem item = items.get(idx); - if (item.error != null && !"".equals(item.error)) { - getTaskPluginCollector().collectDirtyRecord(writerBuffer.get(idx), String.format("status:[%d], error: %s", item.status, item.error)); - } - } - return writerBuffer.size() - brst.getFailedItems().size(); - } else { - Integer status = esClient.getStatus(jestResult); - switch (status) { - case 429: //TOO_MANY_REQUESTS - log.warn("server response too many requests, so auto reduce speed"); - break; - } - throw DataXException.asDataXException(ESWriterErrorCode.ES_INDEX_INSERT, jestResult.getErrorMessage()); - } - } - }, trySize, 60000L, true); - } catch (Exception e) { - if (Key.isIgnoreWriteError(this.conf)) { - log.warn(String.format("重试[%d]次写入失败,忽略该错误,继续写入!", trySize)); - } else { - throw DataXException.asDataXException(ESWriterErrorCode.ES_INDEX_INSERT, e); - } - } - return 0; - } - - @Override - public void post() { - } - - @Override - public void destroy() { - esClient.closeJestClient(); - } - } -} diff --git a/elasticsearchwriter/src/main/java/com/alibaba/datax/plugin/writer/elasticsearchwriter/ESWriterErrorCode.java b/elasticsearchwriter/src/main/java/com/alibaba/datax/plugin/writer/elasticsearchwriter/ESWriterErrorCode.java deleted file mode 100644 index 59dcbd0a..00000000 --- a/elasticsearchwriter/src/main/java/com/alibaba/datax/plugin/writer/elasticsearchwriter/ESWriterErrorCode.java +++ /dev/null @@ -1,37 +0,0 @@ -package com.alibaba.datax.plugin.writer.elasticsearchwriter; - -import com.alibaba.datax.common.spi.ErrorCode; - -public enum ESWriterErrorCode implements ErrorCode { - BAD_CONFIG_VALUE("ESWriter-00", "您配置的值不合法."), - ES_INDEX_DELETE("ESWriter-01", "删除index错误."), - ES_INDEX_CREATE("ESWriter-02", "创建index错误."), - ES_MAPPINGS("ESWriter-03", "mappings错误."), - ES_INDEX_INSERT("ESWriter-04", "插入数据错误."), - ES_ALIAS_MODIFY("ESWriter-05", "别名修改错误."), - ; - - private final String code; - private final String description; - - ESWriterErrorCode(String code, String description) { - this.code = code; - this.description = description; - } - - @Override - public String getCode() { - return this.code; - } - - @Override - public String getDescription() { - return this.description; - } - - @Override - public String toString() { - return String.format("Code:[%s], Description:[%s]. ", this.code, - this.description); - } -} \ No newline at end of file diff --git a/elasticsearchwriter/src/main/java/com/alibaba/datax/plugin/writer/elasticsearchwriter/ElasticSearchClient.java b/elasticsearchwriter/src/main/java/com/alibaba/datax/plugin/writer/elasticsearchwriter/ElasticSearchClient.java new file mode 100644 index 00000000..08486e1f --- /dev/null +++ b/elasticsearchwriter/src/main/java/com/alibaba/datax/plugin/writer/elasticsearchwriter/ElasticSearchClient.java @@ -0,0 +1,314 @@ +package com.alibaba.datax.plugin.writer.elasticsearchwriter; + +import com.alibaba.datax.common.exception.DataXException; +import com.alibaba.datax.common.util.Configuration; +import com.alibaba.datax.plugin.writer.elasticsearchwriter.jest.ClusterInfo; +import com.alibaba.datax.plugin.writer.elasticsearchwriter.jest.ClusterInfoResult; +import com.alibaba.datax.plugin.writer.elasticsearchwriter.jest.PutMapping7; +import com.alibaba.fastjson2.JSON; +import com.alibaba.fastjson2.JSONObject; +import com.google.gson.Gson; +import com.google.gson.JsonElement; +import com.google.gson.JsonObject; +import com.google.gson.JsonParser; +import io.searchbox.action.Action; +import io.searchbox.client.JestClient; +import io.searchbox.client.JestClientFactory; +import io.searchbox.client.JestResult; +import io.searchbox.client.config.HttpClientConfig; +import io.searchbox.client.config.HttpClientConfig.Builder; +import io.searchbox.core.Bulk; +import io.searchbox.indices.CreateIndex; +import io.searchbox.indices.DeleteIndex; +import io.searchbox.indices.IndicesExists; +import io.searchbox.indices.aliases.*; +import io.searchbox.indices.mapping.GetMapping; +import io.searchbox.indices.mapping.PutMapping; + +import io.searchbox.indices.settings.GetSettings; +import org.apache.commons.lang3.StringUtils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.Map; +import java.util.concurrent.TimeUnit; + +/** + * Created by xiongfeng.bxf on 17/2/8. + */ +public class ElasticSearchClient { + private static final Logger LOGGER = LoggerFactory.getLogger(ElasticSearchClient.class); + + private JestClient jestClient; + private Configuration conf; + + public JestClient getClient() { + return jestClient; + } + + public ElasticSearchClient(Configuration conf) { + this.conf = conf; + String endpoint = Key.getEndpoint(conf); + //es是支持集群写入的 + String[] endpoints = endpoint.split(","); + String user = Key.getUsername(conf); + String passwd = Key.getPassword(conf); + boolean multiThread = Key.isMultiThread(conf); + int readTimeout = Key.getTimeout(conf); + boolean compression = Key.isCompression(conf); + boolean discovery = Key.isDiscovery(conf); + String discoveryFilter = Key.getDiscoveryFilter(conf); + int totalConnection = this.conf.getInt("maxTotalConnection", 200); + JestClientFactory factory = new JestClientFactory(); + Builder httpClientConfig = new HttpClientConfig + .Builder(Arrays.asList(endpoints)) +// .setPreemptiveAuth(new HttpHost(endpoint)) + .multiThreaded(multiThread) + .connTimeout(readTimeout) + .readTimeout(readTimeout) + .maxTotalConnection(totalConnection) + .requestCompressionEnabled(compression) + .discoveryEnabled(discovery) + .discoveryFrequency(5L, TimeUnit.MINUTES) + .discoveryFilter(discoveryFilter); + if (!(StringUtils.isBlank(user) || StringUtils.isBlank(passwd))) { + // 匿名登录 + httpClientConfig.defaultCredentials(user, passwd); + } + factory.setHttpClientConfig(httpClientConfig.build()); + this.jestClient = factory.getObject(); + } + + public boolean indicesExists(String indexName) throws Exception { + boolean isIndicesExists = false; + JestResult rst = execute(new IndicesExists.Builder(indexName).build()); + if (rst.isSucceeded()) { + isIndicesExists = true; + } else { + LOGGER.warn("IndicesExists got ResponseCode: {} ErrorMessage: {}", rst.getResponseCode(), rst.getErrorMessage()); + switch (rst.getResponseCode()) { + case 404: + isIndicesExists = false; + break; + case 401: + // 无权访问 + default: + LOGGER.warn(rst.getErrorMessage()); + break; + } + } + return isIndicesExists; + } + + public boolean deleteIndex(String indexName) throws Exception { + LOGGER.info("delete index {}", indexName); + if (indicesExists(indexName)) { + JestResult rst = execute(new DeleteIndex.Builder(indexName).build()); + if (!rst.isSucceeded()) { + LOGGER.warn("DeleteIndex got ResponseCode: {}, ErrorMessage: {}", rst.getResponseCode(), rst.getErrorMessage()); + return false; + } else { + LOGGER.info("delete index {} success", indexName); + } + } else { + LOGGER.info("index cannot found, skip delete index {}", indexName); + } + return true; + } + + public boolean isGreaterOrEqualThan7() throws Exception { + try { + ClusterInfoResult result = execute(new ClusterInfo.Builder().build()); + LOGGER.info("ClusterInfoResult: {}", result.getJsonString()); + return result.isGreaterOrEqualThan7(); + }catch(Exception e) { + LOGGER.warn(e.getMessage()); + return false; + } + } + + /** + * 获取索引的settings + * @param indexName 索引名 + * @return 设置 + */ + public String getIndexSettings(String indexName) { + GetSettings.Builder builder = new GetSettings.Builder(); + builder.addIndex(indexName); + GetSettings getSettings = builder.build(); + try { + LOGGER.info("begin GetSettings for index: {}", indexName); + JestResult result = this.execute(getSettings); + return result.getJsonString(); + } catch (Exception e) { + String message = "GetSettings for index error: " + e.getMessage(); + LOGGER.warn(message, e); + throw DataXException.asDataXException(ElasticSearchWriterErrorCode.ES_GET_SETTINGS, e.getMessage(), e); + } + } + + public boolean createIndexIfNotExists(String indexName, String typeName, + Object mappings, String settings, + boolean dynamic, boolean isGreaterOrEqualThan7) throws Exception { + JestResult rst; + if (!indicesExists(indexName)) { + LOGGER.info("create index {}", indexName); + rst = execute( + new CreateIndex.Builder(indexName) + .settings(settings) + .setParameter("master_timeout", Key.getMasterTimeout(this.conf)) + .build() + ); + //index_already_exists_exception + if (!rst.isSucceeded()) { + LOGGER.warn("CreateIndex got ResponseCode: {}, ErrorMessage: {}", rst.getResponseCode(), rst.getErrorMessage()); + if (getStatus(rst) == 400) { + LOGGER.info(String.format("index {} already exists", indexName)); + return true; + } else { + return false; + } + } else { + LOGGER.info("create {} index success", indexName); + } + } + + if (dynamic) { + LOGGER.info("dynamic is true, ignore mappings"); + return true; + } + LOGGER.info("create mappings for {} {}", indexName, mappings); + //如果大于7.x,mapping的PUT请求URI中不能带type,并且mapping设置中不能带有嵌套结构 + if (isGreaterOrEqualThan7) { + rst = execute(new PutMapping7.Builder(indexName, mappings). + setParameter("master_timeout", Key.getMasterTimeout(this.conf)).build()); + } else { + rst = execute(new PutMapping.Builder(indexName, typeName, mappings) + .setParameter("master_timeout", Key.getMasterTimeout(this.conf)).build()); + } + if (!rst.isSucceeded()) { + LOGGER.error("PutMapping got ResponseCode: {}, ErrorMessage: {}", rst.getResponseCode(), rst.getErrorMessage()); + return false; + } else { + LOGGER.info("index {} put mappings success", indexName); + } + return true; + } + + public T execute(Action clientRequest) throws IOException { + T rst = jestClient.execute(clientRequest); + if (!rst.isSucceeded()) { + LOGGER.warn(rst.getJsonString()); + } + return rst; + } + + public Integer getStatus(JestResult rst) { + JsonObject jsonObject = rst.getJsonObject(); + if (jsonObject.has("status")) { + return jsonObject.get("status").getAsInt(); + } + return 600; + } + + public boolean isBulkResult(JestResult rst) { + JsonObject jsonObject = rst.getJsonObject(); + return jsonObject.has("items"); + } + + + public boolean alias(String indexname, String aliasname, boolean needClean) throws IOException { + GetAliases getAliases = new GetAliases.Builder().addIndex(aliasname).build(); + AliasMapping addAliasMapping = new AddAliasMapping.Builder(indexname, aliasname).build(); + JestResult rst = null; + List list = new ArrayList(); + if (needClean) { + rst = execute(getAliases); + if (rst.isSucceeded()) { + JsonParser jp = new JsonParser(); + JsonObject jo = (JsonObject) jp.parse(rst.getJsonString()); + for (Map.Entry entry : jo.entrySet()) { + String tindex = entry.getKey(); + if (indexname.equals(tindex)) { + continue; + } + AliasMapping m = new RemoveAliasMapping.Builder(tindex, aliasname).build(); + String s = new Gson().toJson(m.getData()); + LOGGER.info(s); + list.add(m); + } + } + } + + ModifyAliases modifyAliases = new ModifyAliases.Builder(addAliasMapping).addAlias(list).setParameter("master_timeout", Key.getMasterTimeout(this.conf)).build(); + rst = execute(modifyAliases); + if (!rst.isSucceeded()) { + LOGGER.error(rst.getErrorMessage()); + throw new IOException(rst.getErrorMessage()); + } + return true; + } + + /** + * 获取index的mapping + */ + public String getIndexMapping(String indexName) { + GetMapping.Builder builder = new GetMapping.Builder(); + builder.addIndex(indexName); + GetMapping getMapping = builder.build(); + try { + LOGGER.info("begin GetMapping for index: {}", indexName); + JestResult result = this.execute(getMapping); + return result.getJsonString(); + } catch (Exception e) { + String message = "GetMapping for index error: " + e.getMessage(); + LOGGER.warn(message, e); + throw DataXException.asDataXException(ElasticSearchWriterErrorCode.ES_MAPPINGS, e.getMessage(), e); + } + } + + public String getMappingForIndexType(String indexName, String typeName) { + String indexMapping = this.getIndexMapping(indexName); + JSONObject indexMappingInJson = JSON.parseObject(indexMapping); + List paths = Arrays.asList(indexName, "mappings"); + JSONObject properties = JsonPathUtil.getJsonObject(paths, indexMappingInJson); + JSONObject propertiesParent = properties; + if (StringUtils.isNotBlank(typeName) && properties.containsKey(typeName)) { + propertiesParent = (JSONObject) properties.get(typeName); + } + JSONObject mapping = (JSONObject) propertiesParent.get("properties"); + return JSON.toJSONString(mapping); + } + + public JestResult bulkInsert(Bulk.Builder bulk) throws Exception { + // es_rejected_execution_exception + // illegal_argument_exception + // cluster_block_exception + JestResult rst = null; + rst = execute(bulk.build()); + if (!rst.isSucceeded()) { + LOGGER.warn(rst.getErrorMessage()); + } + return rst; + } + + /** + * 关闭JestClient客户端 + * + */ + public void closeJestClient() { + if (jestClient != null) { + try { + // jestClient.shutdownClient(); + jestClient.close(); + } catch (IOException e) { + LOGGER.warn("ignore error: ", e.getMessage()); + } + + } + } +} diff --git a/elasticsearchwriter/src/main/java/com/alibaba/datax/plugin/writer/elasticsearchwriter/ElasticSearchColumn.java b/elasticsearchwriter/src/main/java/com/alibaba/datax/plugin/writer/elasticsearchwriter/ElasticSearchColumn.java new file mode 100644 index 00000000..a27b15b2 --- /dev/null +++ b/elasticsearchwriter/src/main/java/com/alibaba/datax/plugin/writer/elasticsearchwriter/ElasticSearchColumn.java @@ -0,0 +1,126 @@ +package com.alibaba.datax.plugin.writer.elasticsearchwriter; + +import java.util.List; + +/** + * Created by xiongfeng.bxf on 17/3/2. + */ +public class ElasticSearchColumn { + + private String name;//: "appkey", + + private String type;//": "TEXT", + + private String timezone; + + /** + * 源头数据格式化处理,datax做的事情 + */ + private String format; + + /** + * 目标端格式化,es原生支持的格式 + */ + private String dstFormat; + + private boolean array; + + /** + * 是否使用目标端(ES原生)数组类型 + * + * 默认是false + */ + private boolean dstArray = false; + + private boolean jsonArray; + + private boolean origin; + + private List combineFields; + + private String combineFieldsValueSeparator = "-"; + + public String getCombineFieldsValueSeparator() { + return combineFieldsValueSeparator; + } + + public void setCombineFieldsValueSeparator(String combineFieldsValueSeparator) { + this.combineFieldsValueSeparator = combineFieldsValueSeparator; + } + + public List getCombineFields() { + return combineFields; + } + + public void setCombineFields(List combineFields) { + this.combineFields = combineFields; + } + + public void setName(String name) { + this.name = name; + } + + public void setType(String type) { + this.type = type; + } + + public void setTimeZone(String timezone) { + this.timezone = timezone; + } + + public void setFormat(String format) { + this.format = format; + } + + public String getName() { + return name; + } + + public String getType() { + return type; + } + + public boolean isOrigin() { return origin; } + + public void setOrigin(boolean origin) { this.origin = origin; } + + public String getTimezone() { + return timezone; + } + + public String getFormat() { + return format; + } + + public void setTimezone(String timezone) { + this.timezone = timezone; + } + + public boolean isArray() { + return array; + } + + public void setArray(boolean array) { + this.array = array; + } + + public boolean isJsonArray() {return jsonArray;} + + public void setJsonArray(boolean jsonArray) {this.jsonArray = jsonArray;} + + public String getDstFormat() { + return dstFormat; + } + + public void setDstFormat(String dstFormat) { + this.dstFormat = dstFormat; + } + + public boolean isDstArray() { + return dstArray; + } + + public void setDstArray(boolean dstArray) { + this.dstArray = dstArray; + } +} diff --git a/elasticsearchwriter/src/main/java/com/alibaba/datax/plugin/writer/elasticsearchwriter/ESFieldType.java b/elasticsearchwriter/src/main/java/com/alibaba/datax/plugin/writer/elasticsearchwriter/ElasticSearchFieldType.java similarity index 73% rename from elasticsearchwriter/src/main/java/com/alibaba/datax/plugin/writer/elasticsearchwriter/ESFieldType.java rename to elasticsearchwriter/src/main/java/com/alibaba/datax/plugin/writer/elasticsearchwriter/ElasticSearchFieldType.java index 14b09689..22c3ee6b 100644 --- a/elasticsearchwriter/src/main/java/com/alibaba/datax/plugin/writer/elasticsearchwriter/ESFieldType.java +++ b/elasticsearchwriter/src/main/java/com/alibaba/datax/plugin/writer/elasticsearchwriter/ElasticSearchFieldType.java @@ -3,8 +3,11 @@ package com.alibaba.datax.plugin.writer.elasticsearchwriter; /** * Created by xiongfeng.bxf on 17/3/1. */ -public enum ESFieldType { +public enum ElasticSearchFieldType { ID, + PARENT, + ROUTING, + VERSION, STRING, TEXT, KEYWORD, @@ -24,20 +27,18 @@ public enum ESFieldType { DATE_RANGE, GEO_POINT, GEO_SHAPE, - IP, + IP_RANGE, COMPLETION, TOKEN_COUNT, - - ARRAY, OBJECT, NESTED; - public static ESFieldType getESFieldType(String type) { + public static ElasticSearchFieldType getESFieldType(String type) { if (type == null) { return null; } - for (ESFieldType f : ESFieldType.values()) { + for (ElasticSearchFieldType f : ElasticSearchFieldType.values()) { if (f.name().compareTo(type.toUpperCase()) == 0) { return f; } diff --git a/elasticsearchwriter/src/main/java/com/alibaba/datax/plugin/writer/elasticsearchwriter/ElasticSearchWriter.java b/elasticsearchwriter/src/main/java/com/alibaba/datax/plugin/writer/elasticsearchwriter/ElasticSearchWriter.java new file mode 100644 index 00000000..2c8ed2d0 --- /dev/null +++ b/elasticsearchwriter/src/main/java/com/alibaba/datax/plugin/writer/elasticsearchwriter/ElasticSearchWriter.java @@ -0,0 +1,1117 @@ +package com.alibaba.datax.plugin.writer.elasticsearchwriter; + +import com.alibaba.datax.common.element.Column; +import com.alibaba.datax.common.element.Record; +import com.alibaba.datax.common.exception.DataXException; +import com.alibaba.datax.common.plugin.RecordReceiver; +import com.alibaba.datax.common.spi.Writer; +import com.alibaba.datax.common.util.Configuration; +import com.alibaba.datax.common.util.DataXCaseEnvUtil; +import com.alibaba.datax.common.util.RetryUtil; +import com.alibaba.datax.plugin.writer.elasticsearchwriter.Key.ActionType; +import com.alibaba.fastjson2.JSON; +import com.alibaba.fastjson2.JSONArray; +import com.alibaba.fastjson2.JSONObject; +import com.alibaba.fastjson2.TypeReference; +import com.alibaba.fastjson2.JSONWriter; +import com.google.common.base.Joiner; +import io.searchbox.client.JestResult; +import io.searchbox.core.*; +import io.searchbox.params.Parameters; +import org.apache.commons.lang3.StringUtils; +import org.joda.time.DateTime; +import org.joda.time.DateTimeZone; +import org.joda.time.format.DateTimeFormat; +import org.joda.time.format.DateTimeFormatter; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.util.*; +import java.util.concurrent.Callable; + +public class ElasticSearchWriter extends Writer { + private final static String WRITE_COLUMNS = "write_columns"; + + public static class Job extends Writer.Job { + private static final Logger LOGGER = LoggerFactory.getLogger(Job.class); + + private Configuration conf = null; + int retryTimes = 3; + long sleepTimeInMilliSecond = 10000L; + + private String settingsCache; + + private void setSettings(String settings) { + this.settingsCache = JsonUtil.mergeJsonStr(settings, this.settingsCache); + } + + @Override + public void init() { + this.conf = super.getPluginJobConf(); + //LOGGER.info("conf:{}", conf); + this.retryTimes = this.conf.getInt("retryTimes", 3); + this.sleepTimeInMilliSecond = this.conf.getLong("sleepTimeInMilliSecond", 10000L); + } + + public List getIncludeSettings() { + return this.conf.getList("includeSettingKeys", Arrays.asList("number_of_shards", "number_of_replicas"), String.class); + } + + /** + * 从es中获取的原始settings转为需要的settings + * @param originSettings 原始settings + * @return settings + */ + private String convertSettings(String originSettings) { + if(StringUtils.isBlank(originSettings)) { + return null; + } + JSONObject jsonObject = JSON.parseObject(originSettings); + for(String key : jsonObject.keySet()) { + JSONObject settingsObj = jsonObject.getJSONObject(key); + if(settingsObj != null) { + JSONObject indexObj = settingsObj.getJSONObject("settings"); + JSONObject settings = indexObj.getJSONObject("index"); + JSONObject filterSettings = new JSONObject(); + if(settings != null) { + List includeSettings = getIncludeSettings(); + if(includeSettings != null && includeSettings.size() > 0) { + for(String includeSetting : includeSettings) { + Object fieldValue = settings.get(includeSetting); + if(fieldValue != null) { + filterSettings.put(includeSetting, fieldValue); + } + } + return filterSettings.toJSONString(); + } + } + } + } + return null; + } + + @Override + public void prepare() { + /** + * 注意:此方法仅执行一次。 + * 最佳实践:如果 Job 中有需要进行数据同步之前的处理,可以在此处完成,如果没有必要则可以直接去掉。 + * 对于7.x之后的es版本,取消了index设置type的逻辑,因此在prepare阶段,加入了判断是否为7.x及以上版本 + * 如果是7.x及以上版本,需要对于index的type做不同的处理 + * 详见 : https://www.elastic.co/guide/en/elasticsearch/reference/6.8/removal-of-types.html + */ + final ElasticSearchClient esClient = new ElasticSearchClient(this.conf); + final String indexName = Key.getIndexName(conf); + ActionType actionType = Key.getActionType(conf); + final String typeName = Key.getTypeName(conf); + final boolean dynamic = Key.getDynamic(conf); + final String dstDynamic = Key.getDstDynamic(conf); + final String newSettings = JSONObject.toJSONString(Key.getSettings(conf)); + LOGGER.info("conf settings:{}, settingsCache:{}", newSettings, this.settingsCache); + final Integer esVersion = Key.getESVersion(conf); + boolean hasId = this.hasID(); + this.conf.set("hasId", hasId); + if (ActionType.UPDATE.equals(actionType) && !hasId && !hasPrimaryKeyInfo()) { + throw DataXException.asDataXException(ElasticSearchWriterErrorCode.UPDATE_WITH_ID, "Update mode must specify column type with id or primaryKeyInfo config"); + } + + try { + RetryUtil.executeWithRetry(() -> { + boolean isGreaterOrEqualThan7 = esClient.isGreaterOrEqualThan7(); + if (esVersion != null && esVersion >= 7) { + isGreaterOrEqualThan7 = true; + } + String mappings = genMappings(dstDynamic, typeName, isGreaterOrEqualThan7); + conf.set("isGreaterOrEqualThan7", isGreaterOrEqualThan7); + + + LOGGER.info(String.format("index:[%s], type:[%s], mappings:[%s]", indexName, typeName, mappings)); + boolean isIndicesExists = esClient.indicesExists(indexName); + if (isIndicesExists) { + try { + // 将原有的mapping打印出来,便于排查问题 + String oldMappings = esClient.getMappingForIndexType(indexName, typeName); + LOGGER.info("the mappings for old index is: {}", oldMappings); + } catch (Exception e) { + LOGGER.warn("warn message: {}", e.getMessage()); + } + } + + if (Key.isTruncate(conf) && isIndicesExists) { + // 备份老的索引中的settings到缓存 + try { + String oldOriginSettings = esClient.getIndexSettings(indexName); + if (StringUtils.isNotBlank(oldOriginSettings)) { + String includeSettings = convertSettings(oldOriginSettings); + LOGGER.info("merge1 settings:{}, settingsCache:{}, includeSettings:{}", + oldOriginSettings, + this.settingsCache, includeSettings); + this.setSettings(includeSettings); + } + } catch (Exception e) { + LOGGER.warn("get old settings fail, indexName:{}", indexName); + } + esClient.deleteIndex(indexName); + } + + // 更新缓存中的settings + this.setSettings(newSettings); + LOGGER.info("merge2 settings:{}, settingsCache:{}", newSettings, this.settingsCache); + // 强制创建,内部自动忽略已存在的情况 + if (!esClient.createIndexIfNotExists(indexName, typeName, mappings, this.settingsCache, dynamic, + isGreaterOrEqualThan7)) { + throw DataXException.asDataXException(ElasticSearchWriterErrorCode.ES_MAPPINGS, ""); + } + + return true; + }, DataXCaseEnvUtil.getRetryTimes(this.retryTimes), DataXCaseEnvUtil.getRetryInterval(this.sleepTimeInMilliSecond), DataXCaseEnvUtil.getRetryExponential(false)); + } catch (Exception ex) { + throw DataXException.asDataXException(ElasticSearchWriterErrorCode.ES_MAPPINGS, ex.getMessage(), ex); + } finally { + try { + esClient.closeJestClient(); + } catch (Exception e) { + LOGGER.warn("ignore close jest client error: {}", e.getMessage()); + } + } + } + + private boolean hasID() { + List column = conf.getList("column"); + if (column != null) { + for (Object col : column) { + JSONObject jo = JSONObject.parseObject(col.toString()); + String colTypeStr = jo.getString("type"); + ElasticSearchFieldType colType = ElasticSearchFieldType.getESFieldType(colTypeStr); + if (ElasticSearchFieldType.ID.equals(colType)) { + return true; + } + } + } + return false; + } + + private boolean hasPrimaryKeyInfo() { + PrimaryKeyInfo primaryKeyInfo = Key.getPrimaryKeyInfo(this.conf); + if (null != primaryKeyInfo && null != primaryKeyInfo.getColumn() && !primaryKeyInfo.getColumn().isEmpty()) { + return true; + } else { + return false; + } + } + + + private String genMappings(String dstDynamic, String typeName, boolean isGreaterOrEqualThan7) { + String mappings; + Map propMap = new HashMap(); + List columnList = new ArrayList(); + ElasticSearchColumn combineItem = null; + + List column = conf.getList("column"); + if (column != null) { + for (Object col : column) { + JSONObject jo = JSONObject.parseObject(col.toString()); + String colName = jo.getString("name"); + String colTypeStr = jo.getString("type"); + if (colTypeStr == null) { + throw DataXException.asDataXException(ElasticSearchWriterErrorCode.BAD_CONFIG_VALUE, col.toString() + " column must have type"); + } + ElasticSearchFieldType colType = ElasticSearchFieldType.getESFieldType(colTypeStr); + if (colType == null) { + throw DataXException.asDataXException(ElasticSearchWriterErrorCode.BAD_CONFIG_VALUE, col.toString() + " unsupported type"); + } + + ElasticSearchColumn columnItem = new ElasticSearchColumn(); + + if (Key.PRIMARY_KEY_COLUMN_NAME.equals(colName)) { + // 兼容已有版本 + colType = ElasticSearchFieldType.ID; + colTypeStr = "id"; + } + + columnItem.setName(colName); + columnItem.setType(colTypeStr); + + JSONArray combineFields = jo.getJSONArray("combineFields"); + if (combineFields != null && !combineFields.isEmpty() && ElasticSearchFieldType.ID.equals(ElasticSearchFieldType.getESFieldType(colTypeStr))) { + List fields = new ArrayList(); + for (Object item : combineFields) { + fields.add((String) item); + } + columnItem.setCombineFields(fields); + combineItem = columnItem; + } + + String combineFieldsValueSeparator = jo.getString("combineFieldsValueSeparator"); + if (StringUtils.isNotBlank(combineFieldsValueSeparator)) { + columnItem.setCombineFieldsValueSeparator(combineFieldsValueSeparator); + } + + // 如果是id,version,routing,不需要创建mapping + if (colType == ElasticSearchFieldType.ID || colType == ElasticSearchFieldType.VERSION || colType == ElasticSearchFieldType.ROUTING) { + columnList.add(columnItem); + continue; + } + + // 如果是组合id中的字段,不需要创建mapping + // 所以组合id的定义必须要在columns最前面 + if (combineItem != null && combineItem.getCombineFields().contains(colName)) { + columnList.add(columnItem); + continue; + } + columnItem.setDstArray(false); + Boolean array = jo.getBoolean("array"); + if (array != null) { + columnItem.setArray(array); + Boolean dstArray = jo.getBoolean("dstArray"); + if(dstArray!=null) { + columnItem.setDstArray(dstArray); + } + } else { + columnItem.setArray(false); + } + Boolean jsonArray = jo.getBoolean("json_array"); + if (jsonArray != null) { + columnItem.setJsonArray(jsonArray); + } else { + columnItem.setJsonArray(false); + } + Map field = new HashMap(); + field.put("type", colTypeStr); + //https://www.elastic.co/guide/en/elasticsearch/reference/5.2/breaking_50_mapping_changes.html#_literal_index_literal_property + // https://www.elastic.co/guide/en/elasticsearch/guide/2.x/_deep_dive_on_doc_values.html#_disabling_doc_values + field.put("doc_values", jo.getBoolean("doc_values")); + field.put("ignore_above", jo.getInteger("ignore_above")); + field.put("index", jo.getBoolean("index")); + switch (colType) { + case STRING: + // 兼容string类型,ES5之前版本 + break; + case KEYWORD: + // https://www.elastic.co/guide/en/elasticsearch/reference/current/tune-for-search-speed.html#_warm_up_global_ordinals + field.put("eager_global_ordinals", jo.getBoolean("eager_global_ordinals")); + break; + case TEXT: + field.put("analyzer", jo.getString("analyzer")); + // 优化disk使用,也同步会提高index性能 + // https://www.elastic.co/guide/en/elasticsearch/reference/current/tune-for-disk-usage.html + field.put("norms", jo.getBoolean("norms")); + field.put("index_options", jo.getBoolean("index_options")); + if(jo.getString("fields") != null) { + field.put("fields", jo.getJSONObject("fields")); + } + break; + case DATE: + if (Boolean.TRUE.equals(jo.getBoolean("origin"))) { + if (jo.getString("format") != null) { + field.put("format", jo.getString("format")); + } + // es原生format覆盖原先来的format + if (jo.getString("dstFormat") != null) { + field.put("format", jo.getString("dstFormat")); + } + if(jo.getBoolean("origin") != null) { + columnItem.setOrigin(jo.getBoolean("origin")); + } + } else { + columnItem.setTimeZone(jo.getString("timezone")); + columnItem.setFormat(jo.getString("format")); + } + break; + case GEO_SHAPE: + field.put("tree", jo.getString("tree")); + field.put("precision", jo.getString("precision")); + break; + case OBJECT: + case NESTED: + if (jo.getString("dynamic") != null) { + field.put("dynamic", jo.getString("dynamic")); + } + break; + default: + break; + } + if (jo.containsKey("other_params")) { + field.putAll(jo.getJSONObject("other_params")); + } + propMap.put(colName, field); + columnList.add(columnItem); + } + } + + long version = System.currentTimeMillis(); + LOGGER.info("unified version: {}", version); + conf.set("version", version); + conf.set(WRITE_COLUMNS, JSON.toJSONString(columnList)); + + LOGGER.info(JSON.toJSONString(columnList)); + + Map rootMappings = new HashMap(); + Map typeMappings = new HashMap(); + typeMappings.put("properties", propMap); + rootMappings.put(typeName, typeMappings); + + // 7.x以后版本取消了index中关于type的指定,所以mapping的格式只能支持 + // { + // "properties" : { + // "abc" : { + // "type" : "text" + // } + // } + // } + // properties 外不能再嵌套typeName + + if(StringUtils.isNotBlank(dstDynamic)) { + typeMappings.put("dynamic", dstDynamic); + } + if (isGreaterOrEqualThan7) { + mappings = JSON.toJSONString(typeMappings); + } else { + mappings = JSON.toJSONString(rootMappings); + } + if (StringUtils.isBlank(mappings)) { + throw DataXException.asDataXException(ElasticSearchWriterErrorCode.BAD_CONFIG_VALUE, "must have mappings"); + } + + return mappings; + } + + @Override + public List split(int mandatoryNumber) { + List configurations = new ArrayList(mandatoryNumber); + for (int i = 0; i < mandatoryNumber; i++) { + configurations.add(this.conf.clone()); + } + return configurations; + } + + @Override + public void post() { + ElasticSearchClient esClient = new ElasticSearchClient(this.conf); + String alias = Key.getAlias(conf); + if (!"".equals(alias)) { + LOGGER.info(String.format("alias [%s] to [%s]", alias, Key.getIndexName(conf))); + try { + esClient.alias(Key.getIndexName(conf), alias, Key.isNeedCleanAlias(conf)); + } catch (IOException e) { + throw DataXException.asDataXException(ElasticSearchWriterErrorCode.ES_ALIAS_MODIFY, e); + } + } + } + + @Override + public void destroy() { + + } + } + + public static class Task extends Writer.Task { + + private static final Logger LOGGER = LoggerFactory.getLogger(Job.class); + + private Configuration conf; + + + ElasticSearchClient esClient = null; + private List typeList; + private List columnList; + private List> deleteByConditions; + + private int trySize; + private long tryInterval; + private int batchSize; + private String index; + private String type; + private String splitter; + private ActionType actionType; + private ElasticSearchColumn combinedIdColumn; + private Map colNameToIndexMap; + private Map urlParams; + private boolean columnSizeChecked = false; + private boolean enableRedundantColumn = false; + private boolean enableWriteNull = true; + int retryTimes = 3; + long sleepTimeInMilliSecond = 10000L; + boolean isGreaterOrEqualThan7 = false; + private String fieldDelimiter; + private boolean hasId; + private PrimaryKeyInfo primaryKeyInfo; + private boolean hasPrimaryKeyInfo = false; + private List esPartitionColumn; + private boolean hasEsPartitionColumn = false; + + @Override + public void init() { + this.conf = super.getPluginJobConf(); + this.index = Key.getIndexName(conf); + this.type = Key.getTypeName(conf); + this.trySize = Key.getTrySize(conf); + this.tryInterval = Key.getTryInterval(conf); + this.batchSize = Key.getBatchSize(conf); + this.splitter = Key.getSplitter(conf); + this.actionType = Key.getActionType(conf); + this.urlParams = Key.getUrlParams(conf); + this.enableWriteNull = Key.isEnableNullUpdate(conf); + this.retryTimes = this.conf.getInt("retryTimes", 3); + this.sleepTimeInMilliSecond = this.conf.getLong("sleepTimeInMilliSecond", 10000L); + this.isGreaterOrEqualThan7 = this.conf.getBool("isGreaterOrEqualThan7", false); + this.parseDeleteCondition(conf); + this.columnList = JSON.parseObject(this.conf.getString(WRITE_COLUMNS), new TypeReference>() { + }); + LOGGER.info("columnList: {}", JSON.toJSONString(columnList)); + this.hasId = this.conf.getBool("hasId", false); + if (hasId) { + LOGGER.info("Task has id column, will use it to set _id property"); + } else { + LOGGER.info("Task will use elasticsearch auto generated _id property"); + } + this.fieldDelimiter = Key.getFieldDelimiter(this.conf); + this.enableRedundantColumn = this.conf.getBool("enableRedundantColumn", false); + this.typeList = new ArrayList(); + for (ElasticSearchColumn esColumn : this.columnList) { + this.typeList.add(ElasticSearchFieldType.getESFieldType(esColumn.getType())); + if (esColumn.getCombineFields() != null && esColumn.getCombineFields().size() > 0 + && ElasticSearchFieldType.getESFieldType(esColumn.getType()).equals(ElasticSearchFieldType.ID)) { + combinedIdColumn = esColumn; + } + } + this.primaryKeyInfo = Key.getPrimaryKeyInfo(this.conf); + this.esPartitionColumn = Key.getEsPartitionColumn(this.conf); + this.colNameToIndexMap = new HashMap(5); + this.handleMetaKeys(); + this.esClient = new ElasticSearchClient(this.conf); + } + + private void handleMetaKeys() { + if (null != this.primaryKeyInfo && null != this.primaryKeyInfo.getColumn() + && !this.primaryKeyInfo.getColumn().isEmpty()) { + this.hasPrimaryKeyInfo = true; + if (null == this.primaryKeyInfo.getFieldDelimiter()) { + if (null != this.fieldDelimiter) { + this.primaryKeyInfo.setFieldDelimiter(this.fieldDelimiter); + } else { + this.primaryKeyInfo.setFieldDelimiter(""); + } + } + + for (String eachPk : this.primaryKeyInfo.getColumn()) { + boolean foundKeyInColumn = false; + for (int i = 0; i < columnList.size(); i++) { + if (StringUtils.equals(eachPk, columnList.get(i).getName())) { + this.colNameToIndexMap.put(eachPk, i); + foundKeyInColumn = true; + break; + } + } + if (!foundKeyInColumn) { + throw DataXException.asDataXException(ElasticSearchWriterErrorCode.RECORD_FIELD_NOT_FOUND, + "primaryKeyInfo has column not exists in column"); + } + } + } + + if (null != this.esPartitionColumn && !this.esPartitionColumn.isEmpty()) { + this.hasEsPartitionColumn = true; + for (PartitionColumn eachPartitionCol : this.esPartitionColumn) { + boolean foundKeyInColumn = false; + for (int i = 0; i < columnList.size(); i++) { + if (StringUtils.equals(eachPartitionCol.getName(), columnList.get(i).getName())) { + this.colNameToIndexMap.put(eachPartitionCol.getName(), i); + foundKeyInColumn = true; + break; + } + } + if (!foundKeyInColumn) { + throw DataXException.asDataXException(ElasticSearchWriterErrorCode.RECORD_FIELD_NOT_FOUND, + "esPartitionColumn has column not exists in column"); + } + } + } + } + + private void parseDeleteCondition(Configuration conf) { + List> list = new ArrayList>(); + String config = Key.getDeleteBy(conf); + if (config != null) { + JSONArray array = JSON.parseArray(config); + for (Object obj : array) { + list.add((Map) obj); + } + deleteByConditions = list; + } + } + + + @Override + public void prepare() { + } + + /** + * 示例:{ + * "deleteBy" : [ + * {"product_status" : [-1,-2], "sub_status" : -3}, + * {"product_status" : -3} + * ] + * } + * + * 表示以下两类数据删除: + * 1. product_status为-1或-2并且sub_status为-3 + * 2. product_status为-3 + * + * 注意[{}]返回true + * @param record + * @return + */ + private boolean isDeleteRecord(Record record) { + if (deleteByConditions == null) { + return false; + } + + Map kv = new HashMap(); + for (int i = 0; i < record.getColumnNumber(); i++) { + Column column = record.getColumn(i); + String columnName = columnList.get(i).getName(); + kv.put(columnName, column.asString()); + } + + for (Map delCondition : deleteByConditions) { + if (meetAllCondition(kv, delCondition)) { + return true; + } + } + + return false; + } + + private boolean meetAllCondition(Map kv, Map delCondition) { + for (Map.Entry oneCondition : delCondition.entrySet()) { + if (!checkOneCondition(kv, oneCondition)) { + return false; + } + } + return true; + } + + private boolean checkOneCondition(Map kv, Map.Entry entry) { + Object value = kv.get(entry.getKey()); + if (entry.getValue() instanceof List) { + for (Object obj : (List) entry.getValue()) { + if (obj.toString().equals(value)) { + return true; + } + } + } else { + if (value != null && value.equals(entry.getValue().toString())) { + return true; + } + } + return false; + } + + @Override + public void startWrite(RecordReceiver recordReceiver) { + List writerBuffer = new ArrayList(this.batchSize); + Record record = null; + while ((record = recordReceiver.getFromReader()) != null) { + if (!columnSizeChecked) { + boolean isInvalid = true; + if (enableRedundantColumn) { + isInvalid = this.columnList.size() > record.getColumnNumber(); + } else { + isInvalid = this.columnList.size() != record.getColumnNumber(); + } + if (isInvalid) { + String message = String.format( + "column number not equal error, reader column size is %s, but the writer column size is %s", + record.getColumnNumber(), this.columnList.size()); + throw DataXException.asDataXException(ElasticSearchWriterErrorCode.BAD_CONFIG_VALUE, message); + } + columnSizeChecked = true; + } + writerBuffer.add(record); + if (writerBuffer.size() >= this.batchSize) { + this.doBatchInsert(writerBuffer); + writerBuffer.clear(); + } + } + + if (!writerBuffer.isEmpty()) { + this.doBatchInsert(writerBuffer); + writerBuffer.clear(); + } + } + + private String getDateStr(ElasticSearchColumn esColumn, Column column) { + // 如果保持原样,就直接返回 + if (esColumn.isOrigin()) { + return column.asString(); + } + DateTime date = null; + DateTimeZone dtz = DateTimeZone.getDefault(); + if (esColumn.getTimezone() != null) { + // 所有时区参考 http://www.joda.org/joda-time/timezones.html + // TODO:创建一次多处复用 + dtz = DateTimeZone.forID(esColumn.getTimezone()); + } + if (column.getType() != Column.Type.DATE && esColumn.getFormat() != null) { + // TODO:创建一次多处复用 + DateTimeFormatter formatter = DateTimeFormat.forPattern(esColumn.getFormat()); + date = formatter.withZone(dtz).parseDateTime(column.asString()); + return date.toString(); + } else if (column.getType() == Column.Type.DATE) { + if (null == column.getRawData()) { + return null; + } else { + date = new DateTime(column.asLong(), dtz); + return date.toString(); + } + } else { + return column.asString(); + } + } + + private void doBatchInsert(final List writerBuffer) { + Map data = null; + Bulk.Builder bulkactionTmp = null; + int totalNumber = writerBuffer.size(); + int dirtyDataNumber = 0; + if (this.isGreaterOrEqualThan7) { + bulkactionTmp = new Bulk.Builder().defaultIndex(this.index); + } else { + bulkactionTmp = new Bulk.Builder().defaultIndex(this.index).defaultType(this.type); + } + final Bulk.Builder bulkaction = bulkactionTmp; + // 增加url的参数 + for (Map.Entry entry : urlParams.entrySet()) { + bulkaction.setParameter(entry.getKey(), entry.getValue()); + } + for (Record record : writerBuffer) { + data = new HashMap(); + String id = null; + String parent = null; + String routing = null; + String version = null; + String columnName = null; + Column column = null; + try { + for (int i = 0; i < record.getColumnNumber(); i++) { + column = record.getColumn(i); + columnName = columnList.get(i).getName(); + // 如果组合id不等于null,需要把相关的字段全部忽略 + if (combinedIdColumn != null) { + if (combinedIdColumn.getCombineFields().contains(columnName)) { + continue; + } + } + //如果是json数组,当成对象类型处理 + ElasticSearchFieldType columnType = columnList.get(i).isJsonArray() ? ElasticSearchFieldType.NESTED : typeList.get(i); + + Boolean dstArray = columnList.get(i).isDstArray(); + + //如果是数组类型,那它传入的是字符串类型,也有可能是null + if (columnList.get(i).isArray() && null != column.asString()) { + String[] dataList = column.asString().split(splitter); + if (!columnType.equals(ElasticSearchFieldType.DATE)) { + if (dstArray) { + try { + // 根据客户配置的类型,转换成相应的类型 + switch (columnType) { + case BYTE: + case KEYWORD: + case TEXT: + data.put(columnName, dataList); + break; + case SHORT: + case INTEGER: + if (StringUtils.isBlank(column.asString().trim())) { + data.put(columnName, null); + } else { + Integer[] intDataList = new Integer[dataList.length]; + for (int j = 0; j < dataList.length; j++) { + dataList[j] = dataList[j].trim(); + if (StringUtils.isNotBlank(dataList[j])) { + intDataList[j] = Integer.valueOf(dataList[j]); + } + } + data.put(columnName, intDataList); + } + break; + case LONG: + if (StringUtils.isBlank(column.asString().trim())) { + data.put(columnName, null); + } else { + Long[] longDataList = new Long[dataList.length]; + for (int j = 0; j < dataList.length; j++) { + dataList[j] = dataList[j].trim(); + if (StringUtils.isNotBlank(dataList[j])) { + longDataList[j] = Long.valueOf(dataList[j]); + } + } + data.put(columnName, longDataList); + } + break; + case FLOAT: + case DOUBLE: + if (StringUtils.isBlank(column.asString().trim())) { + data.put(columnName, null); + } else { + Double[] doubleDataList = new Double[dataList.length]; + for (int j = 0; j < dataList.length; j++) { + dataList[j] = dataList[j].trim(); + if (StringUtils.isNotBlank(dataList[j])) { + doubleDataList[j] = Double.valueOf(dataList[j]); + } + } + data.put(columnName, doubleDataList); + } + break; + default: + data.put(columnName, dataList); + break; + } + } catch (Exception e) { + LOGGER.info("脏数据,记录:{}", record.toString()); + continue; + } + } else { + data.put(columnName, dataList); + } + } else { + data.put(columnName, dataList); + } + } else { + // LOGGER.info("columnType: {} integer: {}", columnType, column.asString()); + switch (columnType) { + case ID: + if (id != null) { + id += record.getColumn(i).asString(); + } else { + id = record.getColumn(i).asString(); + } + break; + case PARENT: + if (parent != null) { + parent += record.getColumn(i).asString(); + } else { + parent = record.getColumn(i).asString(); + } + break; + case ROUTING: + if (routing != null) { + routing += record.getColumn(i).asString(); + } else { + routing = record.getColumn(i).asString(); + } + break; + + case VERSION: + if (version != null) { + version += record.getColumn(i).asString(); + } else { + version = record.getColumn(i).asString(); + } + break; + case DATE: + String dateStr = getDateStr(columnList.get(i), column); + data.put(columnName, dateStr); + break; + case KEYWORD: + case STRING: + case TEXT: + case IP: + case GEO_POINT: + case IP_RANGE: + data.put(columnName, column.asString()); + break; + case BOOLEAN: + data.put(columnName, column.asBoolean()); + break; + case BYTE: + case BINARY: + // json序列化不支持byte类型,es支持的binary类型,必须传入base64的格式 + data.put(columnName, column.asString()); + break; + case LONG: + data.put(columnName, column.asLong()); + break; + case INTEGER: + data.put(columnName, column.asLong()); + break; + case SHORT: + data.put(columnName, column.asLong()); + break; + case FLOAT: + case DOUBLE: + data.put(columnName, column.asDouble()); + break; + case GEO_SHAPE: + case DATE_RANGE: + case INTEGER_RANGE: + case FLOAT_RANGE: + case LONG_RANGE: + case DOUBLE_RANGE: + if (null == column.asString()) { + data.put(columnName, column.asString()); + } else { + data.put(columnName, JSON.parse(column.asString())); + } + break; + case NESTED: + case OBJECT: + if (null == column.asString()) { + data.put(columnName, column.asString()); + } else { + // 转json格式 + data.put(columnName, JSON.parse(column.asString())); + } + break; + default: + throw DataXException.asDataXException(ElasticSearchWriterErrorCode.BAD_CONFIG_VALUE, String.format( + "Type error: unsupported type %s for column %s", columnType, columnName)); + } + } + } + + + if (this.hasPrimaryKeyInfo) { + List idData = new ArrayList(); + for (String eachCol : this.primaryKeyInfo.getColumn()) { + Column recordColumn = record.getColumn(this.colNameToIndexMap.get(eachCol)); + idData.add(recordColumn.asString()); + } + id = StringUtils.join(idData, this.primaryKeyInfo.getFieldDelimiter()); + } + if (this.hasEsPartitionColumn) { + List idData = new ArrayList(); + for (PartitionColumn eachCol : this.esPartitionColumn) { + Column recordColumn = record.getColumn(this.colNameToIndexMap.get(eachCol.getName())); + idData.add(recordColumn.asString()); + } + routing = StringUtils.join(idData, ""); + } + } catch (Exception e) { + // 脏数据 + super.getTaskPluginCollector().collectDirtyRecord(record, + String.format("parse error for column: %s errorMessage: %s", columnName, e.getMessage())); + dirtyDataNumber++; + // 处理下一个record + continue; + } + + if (LOGGER.isDebugEnabled()) { + LOGGER.debug("id: {} routing: {} data: {}", id, routing, JSON.toJSONString(data)); + } + + + if (isDeleteRecord(record)) { + Delete.Builder builder = new Delete.Builder(id); + bulkaction.addAction(builder.build()); + } else { + // 使用用户自定义组合唯一键 + if (combinedIdColumn != null) { + try { + id = processIDCombineFields(record, combinedIdColumn); + // LOGGER.debug("id: {}", id); + } catch (Exception e) { + // 脏数据 + super.getTaskPluginCollector().collectDirtyRecord(record, + String.format("parse error for column: %s errorMessage: %s", columnName, e.getMessage())); + // 处理下一个record + dirtyDataNumber++; + continue; + } + } + switch (actionType) { + case INDEX: + // 先进行json序列化,jest client的gson序列化会把等号按照html序列化 + Index.Builder builder = null; + if (this.enableWriteNull) { + builder = new Index.Builder( + JSONObject.toJSONString(data, JSONWriter.Feature.WriteMapNullValue, + JSONWriter.Feature.WriteEnumUsingToString)); + } else { + builder = new Index.Builder(JSONObject.toJSONString(data)); + } + if (id != null) { + builder.id(id); + } + if (parent != null) { + builder.setParameter(Parameters.PARENT, parent); + } + if (routing != null) { + builder.setParameter(Parameters.ROUTING, routing); + } + if (version != null) { + builder.setParameter(Parameters.VERSION, version); + builder.setParameter(Parameters.VERSION_TYPE, "external"); + } + bulkaction.addAction(builder.build()); + break; + case UPDATE: + // doc: https://www.cnblogs.com/crystaltu/articles/6992935.html + // doc: https://www.elastic.co/guide/en/elasticsearch/reference/current/docs-update.html + Map updateDoc = new HashMap(); + updateDoc.put("doc", data); + updateDoc.put("doc_as_upsert", true); + Update.Builder update = null; + if (this.enableWriteNull) { + // write: {a:"1",b:null} + update = new Update.Builder( + JSONObject.toJSONString(updateDoc, JSONWriter.Feature.WriteMapNullValue, + JSONWriter.Feature.WriteEnumUsingToString)); + // 在DEFAULT_GENERATE_FEATURE基础上,只增加了SerializerFeature.WRITE_MAP_NULL_FEATURES + } else { + // write: {"a":"1"} + update = new Update.Builder(JSONObject.toJSONString(updateDoc)); + } + if (id != null) { + update.id(id); + } + if (parent != null) { + update.setParameter(Parameters.PARENT, parent); + } + if (routing != null) { + update.setParameter(Parameters.ROUTING, routing); + } + // version type [EXTERNAL] is not supported by the update API + if (version != null) { + update.setParameter(Parameters.VERSION, version); + } + bulkaction.addAction(update.build()); + break; + default: + break; + } + } + } + + if (dirtyDataNumber >= totalNumber) { + // all batch is dirty data + LOGGER.warn("all this batch is dirty data, dirtyDataNumber: {} totalDataNumber: {}", dirtyDataNumber, + totalNumber); + return; + } + + BulkResult bulkResult = null; + try { + bulkResult = RetryUtil.executeWithRetry(new Callable() { + @Override + public BulkResult call() throws Exception { + JestResult jestResult = esClient.bulkInsert(bulkaction); + if (jestResult.isSucceeded()) { + return null; + } + String msg = String.format("response code: [%d] error :[%s]", jestResult.getResponseCode(), + jestResult.getErrorMessage()); + LOGGER.warn(msg); + if (esClient.isBulkResult(jestResult)) { + BulkResult brst = (BulkResult) jestResult; + List failedItems = brst.getFailedItems(); + for (BulkResult.BulkResultItem item : failedItems) { + if (item.status != 400) { + // 400 BAD_REQUEST 如果非数据异常,请求异常,则不允许忽略 + throw DataXException.asDataXException(ElasticSearchWriterErrorCode.ES_INDEX_INSERT, + String.format("status:[%d], error: %s", item.status, item.error)); + } else { + // 如果用户选择不忽略解析错误,则抛异常,默认为忽略 + if (!Key.isIgnoreParseError(conf)) { + throw new NoReRunException(ElasticSearchWriterErrorCode.ES_INDEX_INSERT, + String.format( + "status:[%d], error: %s, config not ignoreParseError so throw this error", + item.status, item.error)); + } + } + } + return brst; + } else { + Integer status = esClient.getStatus(jestResult); + switch (status) { + case 429: // TOO_MANY_REQUESTS + LOGGER.warn("server response too many requests, so auto reduce speed"); + break; + default: + break; + } + throw DataXException.asDataXException(ElasticSearchWriterErrorCode.ES_INDEX_INSERT, + jestResult.getErrorMessage()); + } + } + }, this.trySize, this.tryInterval, false, Arrays.asList(DataXException.class)); + } catch (Exception e) { + if (Key.isIgnoreWriteError(this.conf)) { + LOGGER.warn(String.format("Retry [%d] write failed, ignore the error, continue to write!", trySize)); + } else { + throw DataXException.asDataXException(ElasticSearchWriterErrorCode.ES_INDEX_INSERT, e.getMessage(), e); + } + } + + if (null != bulkResult) { + List items = bulkResult.getItems(); + for (int idx = 0; idx < items.size(); ++idx) { + BulkResult.BulkResultItem item = items.get(idx); + if (item.error != null && !"".equals(item.error)) { + super.getTaskPluginCollector().collectDirtyRecord(writerBuffer.get(idx), + String.format("status:[%d], error: %s", item.status, item.error)); + } + } + } + } + + private int getRecordColumnIndex(Record record, String columnName) { + if (colNameToIndexMap.containsKey(columnName)) { + return colNameToIndexMap.get(columnName); + } + + List columns = new ArrayList(); + int index = -1; + for (int i=0; i 1) { + throw DataXException.asDataXException( + ElasticSearchWriterErrorCode.RECORD_FIELD_NOT_FOUND, + "record has multiple columns found by name: " + columnName); + } + + colNameToIndexMap.put(columnName, index); + return index; + } + + private String processIDCombineFields(Record record, ElasticSearchColumn esColumn) { + List values = new ArrayList(esColumn.getCombineFields().size()); + for (String field : esColumn.getCombineFields()) { + int colIndex = getRecordColumnIndex(record, field); + Column col = record.getColumnNumber() <= colIndex ? null : record.getColumn(colIndex); + if (col == null) { + throw DataXException.asDataXException(ElasticSearchWriterErrorCode.RECORD_FIELD_NOT_FOUND, field); + } + values.add(col.asString()); + } + return Joiner.on(esColumn.getCombineFieldsValueSeparator()).join(values); + } + + @Override + public void post() { + } + + @Override + public void destroy() { + try { + this.esClient.closeJestClient(); + } catch (Exception e) { + LOGGER.warn("ignore close jest client error: {}", e.getMessage()); + } + } + + } +} diff --git a/elasticsearchwriter/src/main/java/com/alibaba/datax/plugin/writer/elasticsearchwriter/ElasticSearchWriterErrorCode.java b/elasticsearchwriter/src/main/java/com/alibaba/datax/plugin/writer/elasticsearchwriter/ElasticSearchWriterErrorCode.java new file mode 100644 index 00000000..c9b02532 --- /dev/null +++ b/elasticsearchwriter/src/main/java/com/alibaba/datax/plugin/writer/elasticsearchwriter/ElasticSearchWriterErrorCode.java @@ -0,0 +1,41 @@ +package com.alibaba.datax.plugin.writer.elasticsearchwriter; + +import com.alibaba.datax.common.spi.ErrorCode; + +public enum ElasticSearchWriterErrorCode implements ErrorCode { + BAD_CONFIG_VALUE("ESWriter-00", "The value you configured is not valid."), + ES_INDEX_DELETE("ESWriter-01", "Delete index error."), + ES_INDEX_CREATE("ESWriter-02", "Index creation error."), + ES_MAPPINGS("ESWriter-03", "The mappings error."), + ES_INDEX_INSERT("ESWriter-04", "Insert data error."), + ES_ALIAS_MODIFY("ESWriter-05", "Alias modification error."), + JSON_PARSE("ESWrite-06", "Json format parsing error"), + UPDATE_WITH_ID("ESWrite-07", "Update mode must specify column type with id"), + RECORD_FIELD_NOT_FOUND("ESWrite-08", "Field does not exist in the original table"), + ES_GET_SETTINGS("ESWriter-09", "get settings failed"); + ; + + private final String code; + private final String description; + + ElasticSearchWriterErrorCode(String code, String description) { + this.code = code; + this.description = description; + } + + @Override + public String getCode() { + return this.code; + } + + @Override + public String getDescription() { + return this.description; + } + + @Override + public String toString() { + return String.format("Code:[%s], Description:[%s]. ", this.code, + this.description); + } +} diff --git a/elasticsearchwriter/src/main/java/com/alibaba/datax/plugin/writer/elasticsearchwriter/JsonPathUtil.java b/elasticsearchwriter/src/main/java/com/alibaba/datax/plugin/writer/elasticsearchwriter/JsonPathUtil.java new file mode 100644 index 00000000..e7619e7c --- /dev/null +++ b/elasticsearchwriter/src/main/java/com/alibaba/datax/plugin/writer/elasticsearchwriter/JsonPathUtil.java @@ -0,0 +1,28 @@ +package com.alibaba.datax.plugin.writer.elasticsearchwriter; + +import java.util.List; + +import com.alibaba.fastjson2.JSONObject; + +public class JsonPathUtil { + + public static JSONObject getJsonObject(List paths, JSONObject data) { + if (null == paths || paths.isEmpty()) { + return data; + } + + if (null == data) { + return null; + } + + JSONObject dataTmp = data; + for (String each : paths) { + if (null != dataTmp) { + dataTmp = dataTmp.getJSONObject(each); + } else { + return null; + } + } + return dataTmp; + } +} \ No newline at end of file diff --git a/elasticsearchwriter/src/main/java/com/alibaba/datax/plugin/writer/elasticsearchwriter/JsonUtil.java b/elasticsearchwriter/src/main/java/com/alibaba/datax/plugin/writer/elasticsearchwriter/JsonUtil.java new file mode 100644 index 00000000..ad6c01be --- /dev/null +++ b/elasticsearchwriter/src/main/java/com/alibaba/datax/plugin/writer/elasticsearchwriter/JsonUtil.java @@ -0,0 +1,54 @@ +package com.alibaba.datax.plugin.writer.elasticsearchwriter; + +import com.alibaba.fastjson2.JSON; +import com.alibaba.fastjson2.JSONException; +import com.alibaba.fastjson2.JSONObject; + +/** + * @author bozu + * @date 2021/01/06 + */ +public class JsonUtil { + + /** + * 合并两个json + * @param source 源json + * @param target 目标json + * @return 合并后的json + * @throws JSONException + */ + public static String mergeJsonStr(String source, String target) throws JSONException { + if(source == null) { + return target; + } + if(target == null) { + return source; + } + return JSON.toJSONString(deepMerge(JSON.parseObject(source), JSON.parseObject(target))); + } + + /** + * 深度合并两个json对象,将source的值,merge到target中 + * @param source 源json + * @param target 目标json + * @return 合并后的json + * @throws JSONException + */ + private static JSONObject deepMerge(JSONObject source, JSONObject target) throws JSONException { + for (String key: source.keySet()) { + Object value = source.get(key); + if (target.containsKey(key)) { + // existing value for "key" - recursively deep merge: + if (value instanceof JSONObject) { + JSONObject valueJson = (JSONObject)value; + deepMerge(valueJson, target.getJSONObject(key)); + } else { + target.put(key, value); + } + } else { + target.put(key, value); + } + } + return target; + } +} diff --git a/elasticsearchwriter/src/main/java/com/alibaba/datax/plugin/writer/elasticsearchwriter/Key.java b/elasticsearchwriter/src/main/java/com/alibaba/datax/plugin/writer/elasticsearchwriter/Key.java index 0f2d3f5c..fcaac935 100644 --- a/elasticsearchwriter/src/main/java/com/alibaba/datax/plugin/writer/elasticsearchwriter/Key.java +++ b/elasticsearchwriter/src/main/java/com/alibaba/datax/plugin/writer/elasticsearchwriter/Key.java @@ -1,9 +1,13 @@ package com.alibaba.datax.plugin.writer.elasticsearchwriter; import com.alibaba.datax.common.util.Configuration; +import com.alibaba.fastjson2.JSON; +import com.alibaba.fastjson2.TypeReference; + import org.apache.commons.lang3.StringUtils; import java.util.HashMap; +import java.util.List; import java.util.Map; public final class Key { @@ -37,31 +41,35 @@ public final class Key { public static String getEndpoint(Configuration conf) { - return conf.getNecessaryValue("endpoint", ESWriterErrorCode.BAD_CONFIG_VALUE); + return conf.getNecessaryValue("endpoint", ElasticSearchWriterErrorCode.BAD_CONFIG_VALUE); } - public static String getAccessID(Configuration conf) { - return conf.getString("accessId", ""); + public static String getUsername(Configuration conf) { + return conf.getString("username", conf.getString("accessId")); } - public static String getAccessKey(Configuration conf) { - return conf.getString("accessKey", ""); + public static String getPassword(Configuration conf) { + return conf.getString("password", conf.getString("accessKey")); } public static int getBatchSize(Configuration conf) { - return conf.getInt("batchSize", 1000); + return conf.getInt("batchSize", 1024); } public static int getTrySize(Configuration conf) { return conf.getInt("trySize", 30); } + public static long getTryInterval(Configuration conf) { + return conf.getLong("tryInterval", 60000L); + } + public static int getTimeout(Configuration conf) { return conf.getInt("timeout", 600000); } - public static boolean isCleanup(Configuration conf) { - return conf.getBool("cleanup", false); + public static boolean isTruncate(Configuration conf) { + return conf.getBool("truncate", conf.getBool("cleanup", false)); } public static boolean isDiscovery(Configuration conf) { @@ -69,7 +77,7 @@ public final class Key { } public static boolean isCompression(Configuration conf) { - return conf.getBool("compression", true); + return conf.getBool("compress", conf.getBool("compression", true)); } public static boolean isMultiThread(Configuration conf) { @@ -77,9 +85,17 @@ public final class Key { } public static String getIndexName(Configuration conf) { - return conf.getNecessaryValue("index", ESWriterErrorCode.BAD_CONFIG_VALUE); + return conf.getNecessaryValue("index", ElasticSearchWriterErrorCode.BAD_CONFIG_VALUE); } + public static String getDeleteBy(Configuration conf) { + return conf.getString("deleteBy"); + } + + + /** + * TODO: 在7.0开始,一个索引只能建一个Type为_doc + * */ public static String getTypeName(Configuration conf) { String indexType = conf.getString("indexType"); if(StringUtils.isBlank(indexType)){ @@ -128,4 +144,58 @@ public final class Key { public static boolean getDynamic(Configuration conf) { return conf.getBool("dynamic", false); } + + public static String getDstDynamic(Configuration conf) { + return conf.getString("dstDynamic"); + } + + public static String getDiscoveryFilter(Configuration conf){ + return conf.getString("discoveryFilter","_all"); + } + + public static Boolean getVersioning(Configuration conf) { + return conf.getBool("versioning", false); + } + + public static Long getUnifiedVersion(Configuration conf) { + return conf.getLong("version", System.currentTimeMillis()); + } + + public static Map getUrlParams(Configuration conf) { + return conf.getMap("urlParams", new HashMap()); + } + + public static Integer getESVersion(Configuration conf) { + return conf.getInt("esVersion"); + } + + public static String getMasterTimeout(Configuration conf) { + return conf.getString("masterTimeout", "5m"); + } + + public static boolean isEnableNullUpdate(Configuration conf) { + return conf.getBool("enableWriteNull", true); + } + + public static String getFieldDelimiter(Configuration conf) { + return conf.getString("fieldDelimiter", ""); + } + + public static PrimaryKeyInfo getPrimaryKeyInfo(Configuration conf) { + String primaryKeyInfoString = conf.getString("primaryKeyInfo"); + if (StringUtils.isNotBlank(primaryKeyInfoString)) { + return JSON.parseObject(primaryKeyInfoString, new TypeReference() {}); + } else { + return null; + } + } + + public static List getEsPartitionColumn(Configuration conf) { + String esPartitionColumnString = conf.getString("esPartitionColumn"); + if (StringUtils.isNotBlank(esPartitionColumnString)) { + return JSON.parseObject(esPartitionColumnString, new TypeReference>() {}); + } else { + return null; + } + } } diff --git a/elasticsearchwriter/src/main/java/com/alibaba/datax/plugin/writer/elasticsearchwriter/NoReRunException.java b/elasticsearchwriter/src/main/java/com/alibaba/datax/plugin/writer/elasticsearchwriter/NoReRunException.java new file mode 100644 index 00000000..52064e58 --- /dev/null +++ b/elasticsearchwriter/src/main/java/com/alibaba/datax/plugin/writer/elasticsearchwriter/NoReRunException.java @@ -0,0 +1,16 @@ +package com.alibaba.datax.plugin.writer.elasticsearchwriter; + +import com.alibaba.datax.common.exception.DataXException; +import com.alibaba.datax.common.spi.ErrorCode; + +public class NoReRunException extends DataXException { + public NoReRunException(String errorMessage) { + super(errorMessage); + } + + public NoReRunException(ErrorCode errorCode, String errorMessage) { + super(errorCode, errorMessage); + } + + private static final long serialVersionUID = 1L; +} \ No newline at end of file diff --git a/elasticsearchwriter/src/main/java/com/alibaba/datax/plugin/writer/elasticsearchwriter/PartitionColumn.java b/elasticsearchwriter/src/main/java/com/alibaba/datax/plugin/writer/elasticsearchwriter/PartitionColumn.java new file mode 100644 index 00000000..b99829b2 --- /dev/null +++ b/elasticsearchwriter/src/main/java/com/alibaba/datax/plugin/writer/elasticsearchwriter/PartitionColumn.java @@ -0,0 +1,42 @@ +package com.alibaba.datax.plugin.writer.elasticsearchwriter; + +public class PartitionColumn { + private String name; + // like: DATA + private String metaType; + private String comment; + // like: VARCHAR + private String type; + + public String getName() { + return name; + } + + public String getMetaType() { + return metaType; + } + + public String getComment() { + return comment; + } + + public String getType() { + return type; + } + + public void setName(String name) { + this.name = name; + } + + public void setMetaType(String metaType) { + this.metaType = metaType; + } + + public void setComment(String comment) { + this.comment = comment; + } + + public void setType(String type) { + this.type = type; + } +} \ No newline at end of file diff --git a/elasticsearchwriter/src/main/java/com/alibaba/datax/plugin/writer/elasticsearchwriter/PrimaryKeyInfo.java b/elasticsearchwriter/src/main/java/com/alibaba/datax/plugin/writer/elasticsearchwriter/PrimaryKeyInfo.java new file mode 100644 index 00000000..b5821f51 --- /dev/null +++ b/elasticsearchwriter/src/main/java/com/alibaba/datax/plugin/writer/elasticsearchwriter/PrimaryKeyInfo.java @@ -0,0 +1,47 @@ +package com.alibaba.datax.plugin.writer.elasticsearchwriter; + +import java.util.List; + +public class PrimaryKeyInfo { + + /** + * 主键类型:PrimaryKeyTypeEnum + * + * pk: 单个(业务)主键 specific: 联合主键 + */ + private String type; + + /** + * 用户定义的联合主键的连接符号 + */ + private String fieldDelimiter; + + /** + * 主键的列的名称 + */ + private List column; + + public String getType() { + return type; + } + + public String getFieldDelimiter() { + return fieldDelimiter; + } + + public List getColumn() { + return column; + } + + public void setType(String type) { + this.type = type; + } + + public void setFieldDelimiter(String fieldDelimiter) { + this.fieldDelimiter = fieldDelimiter; + } + + public void setColumn(List column) { + this.column = column; + } +} \ No newline at end of file diff --git a/elasticsearchwriter/src/main/java/com/alibaba/datax/plugin/writer/elasticsearchwriter/jest/ClusterInfo.java b/elasticsearchwriter/src/main/java/com/alibaba/datax/plugin/writer/elasticsearchwriter/jest/ClusterInfo.java new file mode 100644 index 00000000..173bc9e2 --- /dev/null +++ b/elasticsearchwriter/src/main/java/com/alibaba/datax/plugin/writer/elasticsearchwriter/jest/ClusterInfo.java @@ -0,0 +1,35 @@ +package com.alibaba.datax.plugin.writer.elasticsearchwriter.jest; + +import com.google.gson.Gson; +import io.searchbox.action.AbstractAction; +import io.searchbox.client.config.ElasticsearchVersion; + +public class ClusterInfo extends AbstractAction { + @Override + protected String buildURI(ElasticsearchVersion elasticsearchVersion) { + return ""; + } + + @Override + public String getRestMethodName() { + return "GET"; + } + + @Override + public ClusterInfoResult createNewElasticSearchResult(String responseBody, int statusCode, String reasonPhrase, Gson gson) { + return createNewElasticSearchResult(new ClusterInfoResult(gson), responseBody, statusCode, reasonPhrase, gson); + } + + public static class Builder extends AbstractAction.Builder { + + public Builder() { + setHeader("accept", "application/json"); + setHeader("content-type", "application/json"); + } + + @Override + public ClusterInfo build() { + return new ClusterInfo(); + } + } +} diff --git a/elasticsearchwriter/src/main/java/com/alibaba/datax/plugin/writer/elasticsearchwriter/jest/ClusterInfoResult.java b/elasticsearchwriter/src/main/java/com/alibaba/datax/plugin/writer/elasticsearchwriter/jest/ClusterInfoResult.java new file mode 100644 index 00000000..b4f49a37 --- /dev/null +++ b/elasticsearchwriter/src/main/java/com/alibaba/datax/plugin/writer/elasticsearchwriter/jest/ClusterInfoResult.java @@ -0,0 +1,49 @@ +package com.alibaba.datax.plugin.writer.elasticsearchwriter.jest; + +import com.google.gson.Gson; +import io.searchbox.client.JestResult; + +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +public class ClusterInfoResult extends JestResult { + + private static final Pattern FIRST_NUMBER = Pattern.compile("\\d"); + + private static final int SEVEN = 7; + + public ClusterInfoResult(Gson gson) { + super(gson); + } + + public ClusterInfoResult(JestResult source) { + super(source); + } + + /** + * 判断es集群的部署版本是否大于7.x + * 大于7.x的es对于Index的type有较大改动,需要做额外判定 + * 对于7.x与6.x版本的es都做过测试,返回符合预期;5.x以下版本直接try-catch后返回false,向下兼容 + * @return + */ + public Boolean isGreaterOrEqualThan7() throws Exception { + // 如果是没有权限,直接返回false,兼容老版本 + if (responseCode == 403) { + return false; + } + if (!isSucceeded) { + throw new Exception(getJsonString()); + } + try { + String version = jsonObject.getAsJsonObject("version").get("number").toString(); + Matcher matcher = FIRST_NUMBER.matcher(version); + matcher.find(); + String number = matcher.group(); + Integer versionNum = Integer.valueOf(number); + return versionNum >= SEVEN; + } catch (Exception e) { + //5.x 以下版本不做兼容测试,如果返回json格式解析失败,有可能是以下版本,所以认为不大于7.x + return false; + } + } +} diff --git a/elasticsearchwriter/src/main/java/com/alibaba/datax/plugin/writer/elasticsearchwriter/jest/PutMapping7.java b/elasticsearchwriter/src/main/java/com/alibaba/datax/plugin/writer/elasticsearchwriter/jest/PutMapping7.java new file mode 100644 index 00000000..c9f1d6be --- /dev/null +++ b/elasticsearchwriter/src/main/java/com/alibaba/datax/plugin/writer/elasticsearchwriter/jest/PutMapping7.java @@ -0,0 +1,39 @@ +package com.alibaba.datax.plugin.writer.elasticsearchwriter.jest; + +import io.searchbox.action.GenericResultAbstractAction; +import io.searchbox.client.config.ElasticsearchVersion; + +public class PutMapping7 extends GenericResultAbstractAction { + protected PutMapping7(PutMapping7.Builder builder) { + super(builder); + + this.indexName = builder.index; + this.payload = builder.source; + } + + @Override + protected String buildURI(ElasticsearchVersion elasticsearchVersion) { + return super.buildURI(elasticsearchVersion) + "/_mapping"; + } + + @Override + public String getRestMethodName() { + return "PUT"; + } + + public static class Builder extends GenericResultAbstractAction.Builder { + private String index; + private Object source; + + public Builder(String index, Object source) { + this.index = index; + this.source = source; + } + + @Override + public PutMapping7 build() { + return new PutMapping7(this); + } + } + +} diff --git a/elasticsearchwriter/src/main/resources/plugin.json b/elasticsearchwriter/src/main/resources/plugin.json index b6e6384b..b39f1222 100644 --- a/elasticsearchwriter/src/main/resources/plugin.json +++ b/elasticsearchwriter/src/main/resources/plugin.json @@ -1,6 +1,6 @@ { "name": "elasticsearchwriter", - "class": "com.alibaba.datax.plugin.writer.elasticsearchwriter.ESWriter", + "class": "com.alibaba.datax.plugin.writer.elasticsearchwriter.ElasticSearchWriter", "description": "适用于: 生产环境. 原理: TODO", "developer": "alibaba" } \ No newline at end of file diff --git a/ftpwriter/doc/ftpwriter.md b/ftpwriter/doc/ftpwriter.md index 6b1b2687..a38a1052 100644 --- a/ftpwriter/doc/ftpwriter.md +++ b/ftpwriter/doc/ftpwriter.md @@ -24,7 +24,7 @@ FtpWriter实现了从DataX协议转为FTP文件功能,FTP文件本身是无结 我们不能做到: -1. 单个文件不能支持并发写入。 +1. 单个文件并发写入。 ## 3 功能说明 diff --git a/ftpwriter/src/main/java/com/alibaba/datax/plugin/writer/ftpwriter/util/SftpHelperImpl.java b/ftpwriter/src/main/java/com/alibaba/datax/plugin/writer/ftpwriter/util/SftpHelperImpl.java index e6d78629..e748f12c 100644 --- a/ftpwriter/src/main/java/com/alibaba/datax/plugin/writer/ftpwriter/util/SftpHelperImpl.java +++ b/ftpwriter/src/main/java/com/alibaba/datax/plugin/writer/ftpwriter/util/SftpHelperImpl.java @@ -14,8 +14,8 @@ import org.slf4j.LoggerFactory; import com.alibaba.datax.common.exception.DataXException; import com.alibaba.datax.plugin.writer.ftpwriter.FtpWriterErrorCode; -import com.alibaba.fastjson.JSON; -import com.alibaba.fastjson.serializer.SerializerFeature; +import com.alibaba.fastjson2.JSON; +import com.alibaba.fastjson2.JSONWriter; import com.jcraft.jsch.ChannelSftp; import com.jcraft.jsch.JSch; import com.jcraft.jsch.JSchException; @@ -251,7 +251,7 @@ public class SftpHelperImpl implements IFtpHelper { @SuppressWarnings("rawtypes") Vector allFiles = this.channelSftp.ls(dir); LOG.debug(String.format("ls: %s", JSON.toJSONString(allFiles, - SerializerFeature.UseSingleQuotes))); + JSONWriter.Feature.UseSingleQuotes))); for (int i = 0; i < allFiles.size(); i++) { LsEntry le = (LsEntry) allFiles.get(i); String strName = le.getFilename(); diff --git a/ftpwriter/src/main/java/com/alibaba/datax/plugin/writer/ftpwriter/util/StandardFtpHelperImpl.java b/ftpwriter/src/main/java/com/alibaba/datax/plugin/writer/ftpwriter/util/StandardFtpHelperImpl.java index 8999b0a8..d5b9a746 100644 --- a/ftpwriter/src/main/java/com/alibaba/datax/plugin/writer/ftpwriter/util/StandardFtpHelperImpl.java +++ b/ftpwriter/src/main/java/com/alibaba/datax/plugin/writer/ftpwriter/util/StandardFtpHelperImpl.java @@ -18,8 +18,8 @@ import org.slf4j.LoggerFactory; import com.alibaba.datax.common.exception.DataXException; import com.alibaba.datax.plugin.writer.ftpwriter.FtpWriterErrorCode; -import com.alibaba.fastjson.JSON; -import com.alibaba.fastjson.serializer.SerializerFeature; +import com.alibaba.fastjson2.JSON; +import com.alibaba.fastjson2.JSONWriter; public class StandardFtpHelperImpl implements IFtpHelper { private static final Logger LOG = LoggerFactory @@ -244,7 +244,7 @@ public class StandardFtpHelperImpl implements IFtpHelper { FTPFile[] fs = this.ftpClient.listFiles(dir); // LOG.debug(JSON.toJSONString(this.ftpClient.listNames(dir))); LOG.debug(String.format("ls: %s", - JSON.toJSONString(fs, SerializerFeature.UseSingleQuotes))); + JSON.toJSONString(fs, JSONWriter.Feature.UseSingleQuotes))); for (FTPFile ff : fs) { String strName = ff.getName(); if (strName.startsWith(prefixFileName)) { diff --git a/gdbwriter/src/main/java/com/alibaba/datax/plugin/writer/gdbwriter/mapping/DefaultGdbMapper.java b/gdbwriter/src/main/java/com/alibaba/datax/plugin/writer/gdbwriter/mapping/DefaultGdbMapper.java index 73a94cf5..2c015879 100644 --- a/gdbwriter/src/main/java/com/alibaba/datax/plugin/writer/gdbwriter/mapping/DefaultGdbMapper.java +++ b/gdbwriter/src/main/java/com/alibaba/datax/plugin/writer/gdbwriter/mapping/DefaultGdbMapper.java @@ -19,8 +19,8 @@ import com.alibaba.datax.plugin.writer.gdbwriter.Key; import com.alibaba.datax.plugin.writer.gdbwriter.model.GdbEdge; import com.alibaba.datax.plugin.writer.gdbwriter.model.GdbElement; import com.alibaba.datax.plugin.writer.gdbwriter.model.GdbVertex; -import com.alibaba.fastjson.JSONArray; -import com.alibaba.fastjson.JSONObject; +import com.alibaba.fastjson2.JSONArray; +import com.alibaba.fastjson2.JSONObject; import lombok.extern.slf4j.Slf4j; diff --git a/gdbwriter/src/main/java/com/alibaba/datax/plugin/writer/gdbwriter/util/ConfigHelper.java b/gdbwriter/src/main/java/com/alibaba/datax/plugin/writer/gdbwriter/util/ConfigHelper.java index 178b5e7c..644f8898 100644 --- a/gdbwriter/src/main/java/com/alibaba/datax/plugin/writer/gdbwriter/util/ConfigHelper.java +++ b/gdbwriter/src/main/java/com/alibaba/datax/plugin/writer/gdbwriter/util/ConfigHelper.java @@ -12,8 +12,8 @@ import org.apache.commons.lang3.StringUtils; import com.alibaba.datax.common.exception.DataXException; import com.alibaba.datax.common.util.Configuration; import com.alibaba.datax.plugin.writer.gdbwriter.GdbWriterErrorCode; -import com.alibaba.fastjson.JSON; -import com.alibaba.fastjson.JSONObject; +import com.alibaba.fastjson2.JSON; +import com.alibaba.fastjson2.JSONObject; /** * @author jerrywang diff --git a/hbase094xreader/src/main/java/com/alibaba/datax/plugin/reader/hbase094xreader/Hbase094xHelper.java b/hbase094xreader/src/main/java/com/alibaba/datax/plugin/reader/hbase094xreader/Hbase094xHelper.java index c3e2a212..b9f16b17 100644 --- a/hbase094xreader/src/main/java/com/alibaba/datax/plugin/reader/hbase094xreader/Hbase094xHelper.java +++ b/hbase094xreader/src/main/java/com/alibaba/datax/plugin/reader/hbase094xreader/Hbase094xHelper.java @@ -2,8 +2,8 @@ package com.alibaba.datax.plugin.reader.hbase094xreader; import com.alibaba.datax.common.exception.DataXException; import com.alibaba.datax.common.util.Configuration; -import com.alibaba.fastjson.JSON; -import com.alibaba.fastjson.TypeReference; +import com.alibaba.fastjson2.JSON; +import com.alibaba.fastjson2.TypeReference; import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.Validate; import org.apache.hadoop.fs.Path; diff --git a/hbase094xwriter/src/main/java/com/alibaba/datax/plugin/writer/hbase094xwriter/Hbase094xHelper.java b/hbase094xwriter/src/main/java/com/alibaba/datax/plugin/writer/hbase094xwriter/Hbase094xHelper.java index f671d31d..00b128f3 100644 --- a/hbase094xwriter/src/main/java/com/alibaba/datax/plugin/writer/hbase094xwriter/Hbase094xHelper.java +++ b/hbase094xwriter/src/main/java/com/alibaba/datax/plugin/writer/hbase094xwriter/Hbase094xHelper.java @@ -2,8 +2,8 @@ package com.alibaba.datax.plugin.writer.hbase094xwriter; import com.alibaba.datax.common.exception.DataXException; import com.alibaba.datax.common.util.Configuration; -import com.alibaba.fastjson.JSON; -import com.alibaba.fastjson.TypeReference; +import com.alibaba.fastjson2.JSON; +import com.alibaba.fastjson2.TypeReference; import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.Validate; import org.apache.hadoop.fs.Path; diff --git a/hbase11xreader/src/main/java/com/alibaba/datax/plugin/reader/hbase11xreader/Hbase11xHelper.java b/hbase11xreader/src/main/java/com/alibaba/datax/plugin/reader/hbase11xreader/Hbase11xHelper.java index 643072a9..82ad7122 100644 --- a/hbase11xreader/src/main/java/com/alibaba/datax/plugin/reader/hbase11xreader/Hbase11xHelper.java +++ b/hbase11xreader/src/main/java/com/alibaba/datax/plugin/reader/hbase11xreader/Hbase11xHelper.java @@ -2,8 +2,8 @@ package com.alibaba.datax.plugin.reader.hbase11xreader; import com.alibaba.datax.common.exception.DataXException; import com.alibaba.datax.common.util.Configuration; -import com.alibaba.fastjson.JSON; -import com.alibaba.fastjson.TypeReference; +import com.alibaba.fastjson2.JSON; +import com.alibaba.fastjson2.TypeReference; import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.Validate; import org.apache.hadoop.hbase.HBaseConfiguration; diff --git a/hbase11xsqlreader/src/main/java/com/alibaba/datax/plugin/reader/hbase11xsqlreader/HbaseSQLHelper.java b/hbase11xsqlreader/src/main/java/com/alibaba/datax/plugin/reader/hbase11xsqlreader/HbaseSQLHelper.java index 5309d1d9..71665a6b 100644 --- a/hbase11xsqlreader/src/main/java/com/alibaba/datax/plugin/reader/hbase11xsqlreader/HbaseSQLHelper.java +++ b/hbase11xsqlreader/src/main/java/com/alibaba/datax/plugin/reader/hbase11xsqlreader/HbaseSQLHelper.java @@ -2,8 +2,8 @@ package com.alibaba.datax.plugin.reader.hbase11xsqlreader; import com.alibaba.datax.common.exception.DataXException; import com.alibaba.datax.common.util.Configuration; -import com.alibaba.fastjson.JSON; -import com.alibaba.fastjson.TypeReference; +import com.alibaba.fastjson2.JSON; +import com.alibaba.fastjson2.TypeReference; import org.apache.hadoop.hbase.HConstants; import org.apache.hadoop.hbase.util.Pair; import org.apache.hadoop.mapreduce.InputSplit; diff --git a/hbase11xsqlreader/src/main/resources/plugin.json b/hbase11xsqlreader/src/main/resources/plugin.json index e245ca27..162f5712 100644 --- a/hbase11xsqlreader/src/main/resources/plugin.json +++ b/hbase11xsqlreader/src/main/resources/plugin.json @@ -2,6 +2,6 @@ "name": "hbase11xsqlreader", "class": "com.alibaba.datax.plugin.reader.hbase11xsqlreader.HbaseSQLReader", "description": "useScene: prod. mechanism: Scan to read data.", - "developer": "liwei.li, bug reported to : liwei.li@alibaba-inc.com" + "developer": "alibaba" } diff --git a/hbase11xsqlwriter/src/main/java/com/alibaba/datax/plugin/writer/hbase11xsqlwriter/HbaseSQLHelper.java b/hbase11xsqlwriter/src/main/java/com/alibaba/datax/plugin/writer/hbase11xsqlwriter/HbaseSQLHelper.java index 41e57d4e..d1b23fdf 100644 --- a/hbase11xsqlwriter/src/main/java/com/alibaba/datax/plugin/writer/hbase11xsqlwriter/HbaseSQLHelper.java +++ b/hbase11xsqlwriter/src/main/java/com/alibaba/datax/plugin/writer/hbase11xsqlwriter/HbaseSQLHelper.java @@ -2,8 +2,8 @@ package com.alibaba.datax.plugin.writer.hbase11xsqlwriter; import com.alibaba.datax.common.exception.DataXException; import com.alibaba.datax.common.util.Configuration; -import com.alibaba.fastjson.JSON; -import com.alibaba.fastjson.TypeReference; +import com.alibaba.fastjson2.JSON; +import com.alibaba.fastjson2.TypeReference; import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.client.Admin; import org.apache.hadoop.hbase.util.Pair; diff --git a/hbase11xwriter/doc/hbase11xwriter.md b/hbase11xwriter/doc/hbase11xwriter.md index ff20abe9..969f2e47 100644 --- a/hbase11xwriter/doc/hbase11xwriter.md +++ b/hbase11xwriter/doc/hbase11xwriter.md @@ -203,19 +203,20 @@ HbaseWriter 插件实现了从向Hbase中写取数据。在底层实现上,Hba * 描述:要写入的hbase字段。index:指定该列对应reader端column的索引,从0开始;name:指定hbase表中的列,必须为 列族:列名 的格式;type:指定写入数据类型,用于转换HBase byte[]。配置格式如下: ``` -"column": [ - { - "index":1, - "name": "cf1:q1", - "type": "string" - }, - { - "index":2, - "name": "cf1:q2", - "type": "string" - } - ] - + + "column": [ + { + "index":1, + "name": "cf1:q1", + "type": "string" + }, + { + "index":2, + "name": "cf1:q2", + "type": "string" + } + ] + ``` * 必选:是
@@ -227,17 +228,17 @@ HbaseWriter 插件实现了从向Hbase中写取数据。在底层实现上,Hba * 描述:要写入的hbase的rowkey列。index:指定该列对应reader端column的索引,从0开始,若为常量index为-1;type:指定写入数据类型,用于转换HBase byte[];value:配置常量,常作为多个字段的拼接符。hbasewriter会将rowkeyColumn中所有列按照配置顺序进行拼接作为写入hbase的rowkey,不能全为常量。配置格式如下: ``` -"rowkeyColumn": [ - { - "index":0, - "type":"string" - }, - { - "index":-1, - "type":"string", - "value":"_" - } - ] + "rowkeyColumn": [ + { + "index":0, + "type":"string" + }, + { + "index":-1, + "type":"string", + "value":"_" + } + ] ``` @@ -250,19 +251,19 @@ HbaseWriter 插件实现了从向Hbase中写取数据。在底层实现上,Hba * 描述:指定写入hbase的时间戳。支持:当前时间、指定时间列,指定时间,三者选一。若不配置表示用当前时间。index:指定对应reader端column的索引,从0开始,需保证能转换为long,若是Date类型,会尝试用yyyy-MM-dd HH:mm:ss和yyyy-MM-dd HH:mm:ss SSS去解析;若为指定时间index为-1;value:指定时间的值,long值。配置格式如下: ``` -"versionColumn":{ - "index":1 -} + "versionColumn":{ + "index":1 + } ``` 或者 ``` -"versionColumn":{ - "index":-1, - "value":123456789 -} + "versionColumn":{ + "index":-1, + "value":123456789 + } ``` diff --git a/hbase11xwriter/src/main/java/com/alibaba/datax/plugin/writer/hbase11xwriter/Hbase11xHelper.java b/hbase11xwriter/src/main/java/com/alibaba/datax/plugin/writer/hbase11xwriter/Hbase11xHelper.java index 94b13b60..2889b647 100644 --- a/hbase11xwriter/src/main/java/com/alibaba/datax/plugin/writer/hbase11xwriter/Hbase11xHelper.java +++ b/hbase11xwriter/src/main/java/com/alibaba/datax/plugin/writer/hbase11xwriter/Hbase11xHelper.java @@ -2,8 +2,8 @@ package com.alibaba.datax.plugin.writer.hbase11xwriter; import com.alibaba.datax.common.exception.DataXException; import com.alibaba.datax.common.util.Configuration; -import com.alibaba.fastjson.JSON; -import com.alibaba.fastjson.TypeReference; +import com.alibaba.fastjson2.JSON; +import com.alibaba.fastjson2.TypeReference; import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.Validate; import org.apache.hadoop.hbase.HBaseConfiguration; diff --git a/hbase20xsqlreader/src/main/java/com/alibaba/datax/plugin/reader/hbase20xsqlreader/HBase20SQLReaderHelper.java b/hbase20xsqlreader/src/main/java/com/alibaba/datax/plugin/reader/hbase20xsqlreader/HBase20SQLReaderHelper.java index 0edc993f..11bbf734 100644 --- a/hbase20xsqlreader/src/main/java/com/alibaba/datax/plugin/reader/hbase20xsqlreader/HBase20SQLReaderHelper.java +++ b/hbase20xsqlreader/src/main/java/com/alibaba/datax/plugin/reader/hbase20xsqlreader/HBase20SQLReaderHelper.java @@ -175,7 +175,7 @@ public class HBase20SQLReaderHelper { if (querySql == null || querySql.isEmpty()) { // 如果splitPoints为空,则根据splitKey自动切分,不过这种切分方式无法保证数据均分,且只支持整形和字符型列 if (splitPoints == null || splitPoints.isEmpty()) { - LOG.info("Split accoring min and max value of splitColumn..."); + LOG.info("Split according min and max value of splitColumn..."); Pair minMaxPK = getPkRange(configuration); if (null == minMaxPK) { throw DataXException.asDataXException(HBase20xSQLReaderErrorCode.ILLEGAL_SPLIT_PK, @@ -208,7 +208,7 @@ public class HBase20SQLReaderHelper { } } else { - LOG.info("Split accoring splitPoints..."); + LOG.info("Split according splitPoints..."); // 根据指定splitPoints进行切分 rangeList = buildSplitRange(); } diff --git a/hbase20xsqlreader/src/main/resources/plugin.json b/hbase20xsqlreader/src/main/resources/plugin.json index 45856411..4a7b4edf 100644 --- a/hbase20xsqlreader/src/main/resources/plugin.json +++ b/hbase20xsqlreader/src/main/resources/plugin.json @@ -2,6 +2,6 @@ "name": "hbase20xsqlreader", "class": "com.alibaba.datax.plugin.reader.hbase20xsqlreader.HBase20xSQLReader", "description": "useScene: prod. mechanism: read data from phoenix through queryserver.", - "developer": "bake" + "developer": "alibaba" } diff --git a/hbase20xsqlwriter/src/main/resources/plugin.json b/hbase20xsqlwriter/src/main/resources/plugin.json index 91b7069f..93d3002a 100755 --- a/hbase20xsqlwriter/src/main/resources/plugin.json +++ b/hbase20xsqlwriter/src/main/resources/plugin.json @@ -2,6 +2,6 @@ "name": "hbase20xsqlwriter", "class": "com.alibaba.datax.plugin.writer.hbase20xsqlwriter.HBase20xSQLWriter", "description": "useScene: prod. mechanism: use hbase sql UPSERT to put data, index tables will be updated too.", - "developer": "bake" + "developer": "alibaba" } diff --git a/hdfsreader/pom.xml b/hdfsreader/pom.xml index 5d07dc25..a5c2da2c 100644 --- a/hdfsreader/pom.xml +++ b/hdfsreader/pom.xml @@ -16,6 +16,17 @@ 2.7.1 + + org.apache.logging.log4j + log4j-api + 2.17.1 + + + + org.apache.logging.log4j + log4j-core + 2.17.1 + com.alibaba.datax datax-common @@ -51,6 +62,11 @@ hadoop-yarn-common ${hadoop.version} + + com.aliyun.oss + hadoop-aliyun + 2.7.2 + org.apache.hadoop hadoop-mapreduce-client-core diff --git a/hdfsreader/src/main/java/com/alibaba/datax/plugin/reader/hdfsreader/DFSUtil.java b/hdfsreader/src/main/java/com/alibaba/datax/plugin/reader/hdfsreader/DFSUtil.java index c39d3847..5ba572e1 100644 --- a/hdfsreader/src/main/java/com/alibaba/datax/plugin/reader/hdfsreader/DFSUtil.java +++ b/hdfsreader/src/main/java/com/alibaba/datax/plugin/reader/hdfsreader/DFSUtil.java @@ -8,8 +8,8 @@ import com.alibaba.datax.common.util.Configuration; import com.alibaba.datax.plugin.unstructuredstorage.reader.ColumnEntry; import com.alibaba.datax.plugin.unstructuredstorage.reader.UnstructuredStorageReaderErrorCode; import com.alibaba.datax.plugin.unstructuredstorage.reader.UnstructuredStorageReaderUtil; -import com.alibaba.fastjson.JSON; -import com.alibaba.fastjson.JSONObject; +import com.alibaba.fastjson2.JSON; +import com.alibaba.fastjson2.JSONObject; import org.apache.commons.lang3.StringUtils; import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.FileStatus; @@ -331,26 +331,30 @@ public class DFSUtil { //If the network disconnected, will retry 45 times, each time the retry interval for 20 seconds //Each file as a split //TODO multy threads - InputSplit[] splits = in.getSplits(conf, 1); + // OrcInputFormat getSplits params numSplits not used, splits size = block numbers + InputSplit[] splits = in.getSplits(conf, -1); + for (InputSplit split : splits) { + { + RecordReader reader = in.getRecordReader(split, conf, Reporter.NULL); + Object key = reader.createKey(); + Object value = reader.createValue(); + // 获取列信息 + List fields = inspector.getAllStructFieldRefs(); - RecordReader reader = in.getRecordReader(splits[0], conf, Reporter.NULL); - Object key = reader.createKey(); - Object value = reader.createValue(); - // 获取列信息 - List fields = inspector.getAllStructFieldRefs(); + List recordFields; + while (reader.next(key, value)) { + recordFields = new ArrayList(); - List recordFields; - while (reader.next(key, value)) { - recordFields = new ArrayList(); - - for (int i = 0; i <= columnIndexMax; i++) { - Object field = inspector.getStructFieldData(value, fields.get(i)); - recordFields.add(field); + for (int i = 0; i <= columnIndexMax; i++) { + Object field = inspector.getStructFieldData(value, fields.get(i)); + recordFields.add(field); + } + transportOneRecord(column, recordFields, recordSender, + taskPluginCollector, isReadAllColumns, nullFormat); + } + reader.close(); } - transportOneRecord(column, recordFields, recordSender, - taskPluginCollector, isReadAllColumns, nullFormat); } - reader.close(); } catch (Exception e) { String message = String.format("从orcfile文件路径[%s]中读取数据发生异常,请联系系统管理员。" , sourceOrcFilePath); diff --git a/hdfswriter/doc/hdfswriter.md b/hdfswriter/doc/hdfswriter.md index 028a544e..1259b253 100644 --- a/hdfswriter/doc/hdfswriter.md +++ b/hdfswriter/doc/hdfswriter.md @@ -231,6 +231,7 @@ HdfsWriter提供向HDFS文件系统指定路径中写入TEXTFile文件和ORCFile * append,写入前不做任何处理,DataX hdfswriter直接使用filename写入,并保证文件名不冲突。 * nonConflict,如果目录下有fileName前缀的文件,直接报错。 + * truncate,如果目录下有fileName前缀的文件,先删除后写入。 * 必选:是
diff --git a/hdfswriter/pom.xml b/hdfswriter/pom.xml index 15b3780a..741159cb 100644 --- a/hdfswriter/pom.xml +++ b/hdfswriter/pom.xml @@ -19,6 +19,17 @@ + + org.apache.logging.log4j + log4j-api + 2.17.1 + + + + org.apache.logging.log4j + log4j-core + 2.17.1 + com.alibaba.datax datax-common @@ -30,6 +41,11 @@ + + com.aliyun.oss + hadoop-aliyun + 2.7.2 + org.slf4j slf4j-api @@ -132,4 +148,4 @@ - \ No newline at end of file + diff --git a/hdfswriter/src/main/java/com/alibaba/datax/plugin/writer/hdfswriter/HdfsHelper.java b/hdfswriter/src/main/java/com/alibaba/datax/plugin/writer/hdfswriter/HdfsHelper.java index c8bfa50b..a9e157b7 100644 --- a/hdfswriter/src/main/java/com/alibaba/datax/plugin/writer/hdfswriter/HdfsHelper.java +++ b/hdfswriter/src/main/java/com/alibaba/datax/plugin/writer/hdfswriter/HdfsHelper.java @@ -6,10 +6,13 @@ import com.alibaba.datax.common.exception.DataXException; import com.alibaba.datax.common.plugin.RecordReceiver; import com.alibaba.datax.common.plugin.TaskPluginCollector; import com.alibaba.datax.common.util.Configuration; -import com.alibaba.fastjson.JSON; -import com.alibaba.fastjson.JSONObject; +import com.alibaba.datax.plugin.unstructuredstorage.util.ColumnTypeUtil; +import com.alibaba.datax.plugin.unstructuredstorage.util.HdfsUtil; +import com.alibaba.fastjson2.JSON; +import com.alibaba.fastjson2.JSONObject; import com.google.common.collect.Lists; import org.apache.commons.lang3.StringUtils; +import org.apache.commons.lang3.Validate; import org.apache.commons.lang3.tuple.MutablePair; import org.apache.hadoop.fs.*; import org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat; @@ -24,6 +27,10 @@ import org.apache.hadoop.mapred.*; import org.apache.hadoop.security.UserGroupInformation; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import parquet.schema.OriginalType; +import parquet.schema.PrimitiveType; +import parquet.schema.Types; + import java.io.IOException; import java.text.SimpleDateFormat; import java.util.*; @@ -556,4 +563,67 @@ public class HdfsHelper { transportResult.setLeft(recordList); return transportResult; } + + + public static String generateParquetSchemaFromColumnAndType(List columns) { + Map decimalColInfo = new HashMap<>(16); + ColumnTypeUtil.DecimalInfo PARQUET_DEFAULT_DECIMAL_INFO = new ColumnTypeUtil.DecimalInfo(10, 2); + Types.MessageTypeBuilder typeBuilder = Types.buildMessage(); + for (Configuration column : columns) { + String name = column.getString("name"); + String colType = column.getString("type"); + Validate.notNull(name, "column.name can't be null"); + Validate.notNull(colType, "column.type can't be null"); + switch (colType.toLowerCase()) { + case "tinyint": + case "smallint": + case "int": + typeBuilder.optional(PrimitiveType.PrimitiveTypeName.INT32).named(name); + break; + case "bigint": + case "long": + typeBuilder.optional(PrimitiveType.PrimitiveTypeName.INT64).named(name); + break; + case "float": + typeBuilder.optional(PrimitiveType.PrimitiveTypeName.FLOAT).named(name); + break; + case "double": + typeBuilder.optional(PrimitiveType.PrimitiveTypeName.DOUBLE).named(name); + break; + case "binary": + typeBuilder.optional(PrimitiveType.PrimitiveTypeName.BINARY).named(name); + break; + case "char": + case "varchar": + case "string": + typeBuilder.optional(PrimitiveType.PrimitiveTypeName.BINARY).as(OriginalType.UTF8).named(name); + break; + case "boolean": + typeBuilder.optional(PrimitiveType.PrimitiveTypeName.BOOLEAN).named(name); + break; + case "timestamp": + typeBuilder.optional(PrimitiveType.PrimitiveTypeName.INT96).named(name); + break; + case "date": + typeBuilder.optional(PrimitiveType.PrimitiveTypeName.INT32).as(OriginalType.DATE).named(name); + break; + default: + if (ColumnTypeUtil.isDecimalType(colType)) { + ColumnTypeUtil.DecimalInfo decimalInfo = ColumnTypeUtil.getDecimalInfo(colType, PARQUET_DEFAULT_DECIMAL_INFO); + typeBuilder.optional(PrimitiveType.PrimitiveTypeName.FIXED_LEN_BYTE_ARRAY) + .as(OriginalType.DECIMAL) + .precision(decimalInfo.getPrecision()) + .scale(decimalInfo.getScale()) + .length(HdfsUtil.computeMinBytesForPrecision(decimalInfo.getPrecision())) + .named(name); + + decimalColInfo.put(name, decimalInfo); + } else { + typeBuilder.optional(PrimitiveType.PrimitiveTypeName.BINARY).named(name); + } + break; + } + } + return typeBuilder.named("m").toString(); + } } diff --git a/hdfswriter/src/main/java/com/alibaba/datax/plugin/writer/hdfswriter/HdfsWriter.java b/hdfswriter/src/main/java/com/alibaba/datax/plugin/writer/hdfswriter/HdfsWriter.java index 853613a2..59ec6d18 100644 --- a/hdfswriter/src/main/java/com/alibaba/datax/plugin/writer/hdfswriter/HdfsWriter.java +++ b/hdfswriter/src/main/java/com/alibaba/datax/plugin/writer/hdfswriter/HdfsWriter.java @@ -9,9 +9,11 @@ import com.google.common.collect.Sets; import org.apache.commons.io.Charsets; import org.apache.commons.io.IOUtils; import org.apache.commons.lang3.StringUtils; +import org.apache.commons.lang3.Validate; import org.apache.hadoop.fs.Path; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import parquet.schema.MessageTypeParser; import java.util.*; @@ -323,8 +325,55 @@ public class HdfsWriter extends Writer { } return tmpFilePath; } + public void unitizeParquetConfig(Configuration writerSliceConfig) { + String parquetSchema = writerSliceConfig.getString(Key.PARQUET_SCHEMA); + if (StringUtils.isNotBlank(parquetSchema)) { + LOG.info("parquetSchema has config. use parquetSchema:\n{}", parquetSchema); + return; + } + + List columns = writerSliceConfig.getListConfiguration(Key.COLUMN); + if (columns == null || columns.isEmpty()) { + throw DataXException.asDataXException("parquetSchema or column can't be blank!"); + } + + parquetSchema = generateParquetSchemaFromColumn(columns); + // 为了兼容历史逻辑,对之前的逻辑做保留,但是如果配置的时候报错,则走新逻辑 + try { + MessageTypeParser.parseMessageType(parquetSchema); + } catch (Throwable e) { + LOG.warn("The generated parquetSchema {} is illegal, try to generate parquetSchema in another way", parquetSchema); + parquetSchema = HdfsHelper.generateParquetSchemaFromColumnAndType(columns); + LOG.info("The last generated parquet schema is {}", parquetSchema); + } + writerSliceConfig.set(Key.PARQUET_SCHEMA, parquetSchema); + LOG.info("dataxParquetMode use default fields."); + writerSliceConfig.set(Key.DATAX_PARQUET_MODE, "fields"); + } + + private String generateParquetSchemaFromColumn(List columns) { + StringBuffer parquetSchemaStringBuffer = new StringBuffer(); + parquetSchemaStringBuffer.append("message m {"); + for (Configuration column: columns) { + String name = column.getString("name"); + Validate.notNull(name, "column.name can't be null"); + + String type = column.getString("type"); + Validate.notNull(type, "column.type can't be null"); + + String parquetColumn = String.format("optional %s %s;", type, name); + parquetSchemaStringBuffer.append(parquetColumn); + } + parquetSchemaStringBuffer.append("}"); + String parquetSchema = parquetSchemaStringBuffer.toString(); + LOG.info("generate parquetSchema:\n{}", parquetSchema); + return parquetSchema; + } + } + + public static class Task extends Writer.Task { private static final Logger LOG = LoggerFactory.getLogger(Task.class); diff --git a/hdfswriter/src/main/java/com/alibaba/datax/plugin/writer/hdfswriter/Key.java b/hdfswriter/src/main/java/com/alibaba/datax/plugin/writer/hdfswriter/Key.java index f1f63096..2b1fab98 100644 --- a/hdfswriter/src/main/java/com/alibaba/datax/plugin/writer/hdfswriter/Key.java +++ b/hdfswriter/src/main/java/com/alibaba/datax/plugin/writer/hdfswriter/Key.java @@ -33,4 +33,17 @@ public class Key { public static final String KERBEROS_PRINCIPAL = "kerberosPrincipal"; // hadoop config public static final String HADOOP_CONFIG = "hadoopConfig"; + + // useOldRawDataTransf + public final static String PARQUET_FILE_USE_RAW_DATA_TRANSF = "useRawDataTransf"; + + public final static String DATAX_PARQUET_MODE = "dataxParquetMode"; + + // hdfs username 默认值 admin + public final static String HDFS_USERNAME = "hdfsUsername"; + + public static final String PROTECTION = "protection"; + + public static final String PARQUET_SCHEMA = "parquetSchema"; + public static final String PARQUET_MERGE_RESULT = "parquetMergeResult"; } diff --git a/hologresjdbcwriter/doc/hologresjdbcwriter.md b/hologresjdbcwriter/doc/hologresjdbcwriter.md new file mode 100644 index 00000000..8b163017 --- /dev/null +++ b/hologresjdbcwriter/doc/hologresjdbcwriter.md @@ -0,0 +1,204 @@ +# DataX HologresJdbcWriter + + +--- + + +## 1 快速介绍 + +HologresJdbcWriter 插件实现了写入数据到 Hologres目的表的功能。在底层实现上,HologresJdbcWriter通过JDBC连接远程 Hologres 数据库,并执行相应的 insert into ... on conflict sql 语句将数据写入 Hologres,内部会分批次提交入库。 + +
+ +* HologresJdbcWriter 只支持单表同步 + +## 2 实现原理 + +HologresJdbcWriter 通过 DataX 框架获取 Reader 生成的协议数据,根据你配置生成相应的SQL插入语句 + +* `insert into... on conflict ` + + +## 3 功能说明 + +### 3.1 配置样例 + +* 这里使用一份从内存产生到 HologresJdbcWriter导入的数据。 + +```json +{ + "job": { + "setting": { + "speed": { + "channel": 1 + } + }, + "content": [ + { + "reader": { + "name": "streamreader", + "parameter": { + "column" : [ + { + "value": "DataX", + "type": "string" + }, + { + "value": 19880808, + "type": "long" + }, + { + "value": "1988-08-08 08:08:08", + "type": "date" + }, + { + "value": true, + "type": "bool" + }, + { + "value": "test", + "type": "bytes" + } + ], + "sliceRecordCount": 1000 + } + }, + "writer": { + "name": "hologresjdbcwriter", + "parameter": { + "username": "xx", + "password": "xx", + "column": [ + "id", + "name" + ], + "preSql": [ + "delete from test" + ], + "connection": [ + { + "jdbcUrl": "jdbc:postgresql://127.0.0.1:3002/datax", + "table": [ + "test" + ] + } + ], + "writeMode" : "REPLACE", + "client" : { + "writeThreadSize" : 3 + } + } + } + } + ] + } +} + +``` + + +### 3.2 参数说明 + +* **jdbcUrl** + + * 描述:目的数据库的 JDBC 连接信息 ,jdbcUrl必须包含在connection配置单元中。 + + 注意:1、在一个数据库上只能配置一个值。 + 2、jdbcUrl按照PostgreSQL官方规范,并可以填写连接附加参数信息。具体请参看PostgreSQL官方文档或者咨询对应 DBA。 + + + * 必选:是
+ + * 默认值:无
+ +* **username** + + * 描述:目的数据库的用户名
+ + * 必选:是
+ + * 默认值:无
+ +* **password** + + * 描述:目的数据库的密码
+ + * 必选:是
+ + * 默认值:无
+ +* **table** + + * 描述:目的表的表名称。只支持写入一个表。 + + 注意:table 和 jdbcUrl 必须包含在 connection 配置单元中 + + * 必选:是
+ + * 默认值:无
+ +* **column** + + * 描述:目的表需要写入数据的字段,字段之间用英文逗号分隔。例如: "column": ["id","name","age"]。如果要依次写入全部列,使用\*表示, 例如: "column": ["\*"] + + 注意:1、我们强烈不推荐你这样配置,因为当你目的表字段个数、类型等有改动时,你的任务可能运行不正确或者失败 + 2、此处 column 不能配置任何常量值 + + * 必选:是
+ + * 默认值:否
+ +* **preSql** + + * 描述:写入数据到目的表前,会先执行这里的标准语句。如果 Sql 中有你需要操作到的表名称,请使用 `@table` 表示,这样在实际执行 Sql 语句时,会对变量按照实际表名称进行替换。
+ + * 必选:否
+ + * 默认值:无
+ +* **postSql** + + * 描述:写入数据到目的表后,会执行这里的标准语句。(原理同 preSql )
+ + * 必选:否
+ + * 默认值:无
+ +* **batchSize** + + * 描述:一次性批量提交的记录数大小,该值可以极大减少DataX与HologresJdbcWriter的网络交互次数,并提升整体吞吐量。但是该值设置过大可能会造成DataX运行进程OOM情况。
+ + * 必选:否
+ + * 默认值:512
+ +* **writeMode** + + * 描述:当写入hologres有主键表时,控制主键冲突后的策略。REPLACE表示冲突后hologres表的所有字段都被覆盖(未在writer中配置的字段将填充null);UPDATE表示冲突后hologres表writer配置的字段将被覆盖;IGNORE表示冲突后丢弃新数据,不覆盖。
+ + * 必选:否
+ + * 默认值:REPLACE
+ +* **client.writeThreadSize** + + * 描述:写入hologres的连接池大小,多个连接将并行写入数据。
+ + * 必选:否
+ + * 默认值:1
+ +### 3.3 类型转换 + +目前 HologresJdbcWriter支持大部分 Hologres类型,但也存在部分没有支持的情况,请注意检查你的类型。 + +下面列出 HologresJdbcWriter针对 Hologres类型转换列表: + +| DataX 内部类型| Hologres 数据类型 | +| -------- | ----- | +| Long |bigint, integer, smallint | +| Double |double precision, money, numeric, real | +| String |varchar, char, text, bit| +| Date |date, time, timestamp | +| Boolean |bool| +| Bytes |bytea| diff --git a/hologresjdbcwriter/pom.xml b/hologresjdbcwriter/pom.xml new file mode 100644 index 00000000..a908dfed --- /dev/null +++ b/hologresjdbcwriter/pom.xml @@ -0,0 +1,90 @@ + + + + datax-all + com.alibaba.datax + 0.0.1-SNAPSHOT + + 4.0.0 + + hologresjdbcwriter + hologresjdbcwriter + jar + writer data into hologres using jdbc + + + 1.8 + + + + + + com.alibaba.datax + datax-common + ${datax-project-version} + + + slf4j-log4j12 + org.slf4j + + + + + + org.slf4j + slf4j-api + + + + ch.qos.logback + logback-classic + + + + com.alibaba.datax + plugin-rdbms-util + ${datax-project-version} + + + + com.alibaba.hologres + holo-client + 2.1.0 + + + + + + + + maven-compiler-plugin + + ${jdk-version} + ${jdk-version} + ${project-sourceEncoding} + + + + + maven-assembly-plugin + + + src/main/assembly/package.xml + + datax + + + + dwzip + package + + single + + + + + + + \ No newline at end of file diff --git a/hologresjdbcwriter/src/main/assembly/package.xml b/hologresjdbcwriter/src/main/assembly/package.xml new file mode 100755 index 00000000..db8100e1 --- /dev/null +++ b/hologresjdbcwriter/src/main/assembly/package.xml @@ -0,0 +1,35 @@ + + + + dir + + false + + + src/main/resources + + plugin.json + plugin_job_template.json + + plugin/writer/hologresjdbcwriter + + + target/ + + hologresjdbcwriter-0.0.1-SNAPSHOT.jar + + plugin/writer/hologresjdbcwriter + + + + + + false + plugin/writer/hologresjdbcwriter/libs + runtime + + + diff --git a/hologresjdbcwriter/src/main/java/com/alibaba/datax/plugin/writer/hologresjdbcwriter/BaseWriter.java b/hologresjdbcwriter/src/main/java/com/alibaba/datax/plugin/writer/hologresjdbcwriter/BaseWriter.java new file mode 100644 index 00000000..89df08b1 --- /dev/null +++ b/hologresjdbcwriter/src/main/java/com/alibaba/datax/plugin/writer/hologresjdbcwriter/BaseWriter.java @@ -0,0 +1,526 @@ +package com.alibaba.datax.plugin.writer.hologresjdbcwriter; + +import com.alibaba.datax.common.element.Column; +import com.alibaba.datax.common.element.DateColumn; +import com.alibaba.datax.common.element.LongColumn; +import com.alibaba.datax.common.element.Record; +import com.alibaba.datax.common.exception.DataXException; +import com.alibaba.datax.common.plugin.RecordReceiver; +import com.alibaba.datax.common.plugin.TaskPluginCollector; +import com.alibaba.datax.common.util.Configuration; +import com.alibaba.datax.common.util.RetryUtil; +import com.alibaba.datax.plugin.rdbms.util.DBUtil; +import com.alibaba.datax.plugin.rdbms.util.DBUtilErrorCode; +import com.alibaba.datax.plugin.rdbms.util.DataBaseType; +import com.alibaba.datax.plugin.writer.hologresjdbcwriter.util.ConfLoader; +import com.alibaba.datax.plugin.writer.hologresjdbcwriter.util.OriginalConfPretreatmentUtil; +import com.alibaba.datax.plugin.writer.hologresjdbcwriter.util.WriterUtil; +import com.alibaba.fastjson2.JSONArray; +import com.alibaba.fastjson2.JSONObject; +import com.alibaba.hologres.client.HoloClient; +import com.alibaba.hologres.client.HoloConfig; +import com.alibaba.hologres.client.Put; +import com.alibaba.hologres.client.exception.HoloClientWithDetailsException; +import com.alibaba.hologres.client.model.TableSchema; +import org.apache.commons.lang3.StringUtils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.sql.Connection; +import java.sql.DriverManager; +import java.sql.SQLException; +import java.sql.Time; +import java.sql.Timestamp; +import java.sql.Types; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; + +public class BaseWriter { + + protected static final Set ignoreConfList; + + static { + ignoreConfList = new HashSet<>(); + ignoreConfList.add("jdbcUrl"); + ignoreConfList.add("username"); + ignoreConfList.add("password"); + ignoreConfList.add("writeMode"); + } + + enum WriteMode { + IGNORE, + UPDATE, + REPLACE + } + + private static WriteMode getWriteMode(String text) { + text = text.toUpperCase(); + switch (text) { + case "IGNORE": + return WriteMode.IGNORE; + case "UPDATE": + return WriteMode.UPDATE; + case "REPLACE": + return WriteMode.REPLACE; + default: + throw DataXException.asDataXException(DBUtilErrorCode.ILLEGAL_VALUE, "writeMode只支持IGNORE,UPDATE,REPLACE,无法识别 " + text); + } + } + + public static class Job { + private DataBaseType dataBaseType; + + private static final Logger LOG = LoggerFactory + .getLogger(BaseWriter.Job.class); + + public Job(DataBaseType dataBaseType) { + this.dataBaseType = dataBaseType; + OriginalConfPretreatmentUtil.DATABASE_TYPE = this.dataBaseType; + } + + public void init(Configuration originalConfig) { + OriginalConfPretreatmentUtil.doPretreatment(originalConfig, this.dataBaseType); + checkConf(originalConfig); + LOG.debug("After job init(), originalConfig now is:[\n{}\n]", + originalConfig.toJSON()); + } + + private void checkConf(Configuration originalConfig) { + getWriteMode(originalConfig.getString(Key.WRITE_MODE, "REPLACE")); + List userConfiguredColumns = originalConfig.getList(Key.COLUMN, String.class); + List conns = originalConfig.getList(Constant.CONN_MARK, + JSONObject.class); + if (conns.size() > 1) { + throw DataXException.asDataXException(DBUtilErrorCode.ILLEGAL_VALUE, "只支持单表同步"); + } + int tableNumber = originalConfig.getInt(Constant.TABLE_NUMBER_MARK); + if (tableNumber > 1) { + throw DataXException.asDataXException(DBUtilErrorCode.ILLEGAL_VALUE, "只支持单表同步"); + } + JSONObject connConf = conns.get(0); + String jdbcUrl = connConf.getString(Key.JDBC_URL); + String username = originalConfig.getString(Key.USERNAME); + String password = originalConfig.getString(Key.PASSWORD); + + String table = connConf.getJSONArray(Key.TABLE).getString(0); + + Map clientConf = originalConfig.getMap("client"); + + HoloConfig config = new HoloConfig(); + config.setJdbcUrl(jdbcUrl); + config.setUsername(username); + config.setPassword(password); + if (clientConf != null) { + try { + config = ConfLoader.load(clientConf, config, ignoreConfList); + } catch (Exception e) { + throw DataXException + .asDataXException( + DBUtilErrorCode.CONF_ERROR, + "配置解析失败."); + } + } + + try (HoloClient client = new HoloClient(config)) { + TableSchema schema = client.getTableSchema(table); + LOG.info("table {} column info:", schema.getTableNameObj().getFullName()); + for (com.alibaba.hologres.client.model.Column column : schema.getColumnSchema()) { + LOG.info("name:{},type:{},typeName:{},nullable:{},defaultValue:{}", column.getName(), column.getType(), column.getTypeName(), column.getAllowNull(), column.getDefaultValue()); + } + for (String userColumn : userConfiguredColumns) { + if (schema.getColumnIndex(userColumn) == null) { + throw DataXException.asDataXException(DBUtilErrorCode.CONF_ERROR, "配置的列 " + userColumn + " 不存在"); + } + } + } catch (Exception e) { + throw DataXException.asDataXException(DBUtilErrorCode.CONN_DB_ERROR, "获取表schema失败", e); + } + + } + + // 一般来说,是需要推迟到 task 中进行pre 的执行(单表情况例外) + public void prepare(Configuration originalConfig) { + + try { + String username = originalConfig.getString(Key.USERNAME); + String password = originalConfig.getString(Key.PASSWORD); + + List conns = originalConfig.getList(Constant.CONN_MARK, + Object.class); + Configuration connConf = Configuration.from(conns.get(0) + .toString()); + + String jdbcUrl = connConf.getString(Key.JDBC_URL); + originalConfig.set(Key.JDBC_URL, jdbcUrl); + + String table = connConf.getList(Key.TABLE, String.class).get(0); + originalConfig.set(Key.TABLE, table); + + List preSqls = originalConfig.getList(Key.PRE_SQL, + String.class); + List renderedPreSqls = WriterUtil.renderPreOrPostSqls( + preSqls, table); + + originalConfig.remove(Constant.CONN_MARK); + if (null != renderedPreSqls && !renderedPreSqls.isEmpty()) { + // 说明有 preSql 配置,则此处删除掉 + originalConfig.remove(Key.PRE_SQL); + String tempJdbcUrl = jdbcUrl.replace("postgresql", "hologres"); + try (Connection conn = DriverManager.getConnection( + tempJdbcUrl, username, password)) { + LOG.info("Begin to execute preSqls:[{}]. context info:{}.", + StringUtils.join(renderedPreSqls, ";"), tempJdbcUrl); + + WriterUtil.executeSqls(conn, renderedPreSqls, tempJdbcUrl, dataBaseType); + } + } + LOG.debug("After job prepare(), originalConfig now is:[\n{}\n]", + originalConfig.toJSON()); + } catch (SQLException e) { + throw DataXException.asDataXException(DBUtilErrorCode.SQL_EXECUTE_FAIL, e); + } + } + + public List split(Configuration originalConfig, + int mandatoryNumber) { + return WriterUtil.doSplit(originalConfig, mandatoryNumber); + } + + // 一般来说,是需要推迟到 task 中进行post 的执行(单表情况例外) + public void post(Configuration originalConfig) { + + String username = originalConfig.getString(Key.USERNAME); + String password = originalConfig.getString(Key.PASSWORD); + + String jdbcUrl = originalConfig.getString(Key.JDBC_URL); + + String table = originalConfig.getString(Key.TABLE); + + List postSqls = originalConfig.getList(Key.POST_SQL, + String.class); + List renderedPostSqls = WriterUtil.renderPreOrPostSqls( + postSqls, table); + + if (null != renderedPostSqls && !renderedPostSqls.isEmpty()) { + // 说明有 postSql 配置,则此处删除掉 + originalConfig.remove(Key.POST_SQL); + String tempJdbcUrl = jdbcUrl.replace("postgresql", "hologres"); + Connection conn = DBUtil.getConnection(this.dataBaseType, + tempJdbcUrl, username, password); + + LOG.info( + "Begin to execute postSqls:[{}]. context info:{}.", + StringUtils.join(renderedPostSqls, ";"), tempJdbcUrl); + WriterUtil.executeSqls(conn, renderedPostSqls, tempJdbcUrl, dataBaseType); + DBUtil.closeDBResources(null, null, conn); + } + + } + + public void destroy(Configuration originalConfig) { + } + + } + + public static class Task { + protected static final Logger LOG = LoggerFactory + .getLogger(BaseWriter.Task.class); + + protected DataBaseType dataBaseType; + + protected String username; + protected String password; + protected String jdbcUrl; + protected String table; + protected List columns; + protected int batchSize; + protected int batchByteSize; + protected int columnNumber = 0; + protected TaskPluginCollector taskPluginCollector; + + // 作为日志显示信息时,需要附带的通用信息。比如信息所对应的数据库连接等信息,针对哪个表做的操作 + protected static String BASIC_MESSAGE; + + protected WriteMode writeMode; + protected String arrayDelimiter; + protected boolean emptyAsNull; + + protected HoloConfig config; + + public Task(DataBaseType dataBaseType) { + this.dataBaseType = dataBaseType; + } + + public void init(Configuration writerSliceConfig) { + this.username = writerSliceConfig.getString(Key.USERNAME); + this.password = writerSliceConfig.getString(Key.PASSWORD); + this.jdbcUrl = writerSliceConfig.getString(Key.JDBC_URL); + this.table = writerSliceConfig.getString(Key.TABLE); + + this.columns = writerSliceConfig.getList(Key.COLUMN, String.class); + this.columnNumber = this.columns.size(); + + this.arrayDelimiter = writerSliceConfig.getString(Key.Array_Delimiter); + + this.batchSize = writerSliceConfig.getInt(Key.BATCH_SIZE, Constant.DEFAULT_BATCH_SIZE); + this.batchByteSize = writerSliceConfig.getInt(Key.BATCH_BYTE_SIZE, Constant.DEFAULT_BATCH_BYTE_SIZE); + + writeMode = getWriteMode(writerSliceConfig.getString(Key.WRITE_MODE, "REPLACE")); + emptyAsNull = writerSliceConfig.getBool(Key.EMPTY_AS_NULL, true); + + Map clientConf = writerSliceConfig.getMap("client"); + + config = new HoloConfig(); + config.setJdbcUrl(this.jdbcUrl); + config.setUsername(username); + config.setPassword(password); + config.setWriteMode(writeMode == WriteMode.IGNORE ? com.alibaba.hologres.client.model.WriteMode.INSERT_OR_IGNORE : (writeMode == WriteMode.UPDATE ? com.alibaba.hologres.client.model.WriteMode.INSERT_OR_UPDATE : com.alibaba.hologres.client.model.WriteMode.INSERT_OR_REPLACE)); + config.setWriteBatchSize(this.batchSize); + config.setWriteBatchTotalByteSize(this.batchByteSize); + config.setMetaCacheTTL(3600000L); + config.setEnableDefaultForNotNullColumn(false); + config.setRetryCount(5); + config.setAppName("datax"); + + if (clientConf != null) { + try { + config = ConfLoader.load(clientConf, config, ignoreConfList); + } catch (Exception e) { + throw DataXException + .asDataXException( + DBUtilErrorCode.CONF_ERROR, + "配置解析失败."); + } + } + + BASIC_MESSAGE = String.format("jdbcUrl:[%s], table:[%s]", + this.jdbcUrl, this.table); + } + + public void prepare(Configuration writerSliceConfig) { + + } + + public void startWriteWithConnection(RecordReceiver recordReceiver, TaskPluginCollector taskPluginCollector) { + this.taskPluginCollector = taskPluginCollector; + + try (HoloClient client = new HoloClient(config)) { + Record record; + TableSchema schema = RetryUtil.executeWithRetry(() -> client.getTableSchema(this.table), 3, 5000L, true); + while ((record = recordReceiver.getFromReader()) != null) { + if (record.getColumnNumber() != this.columnNumber) { + // 源头读取字段列数与目的表字段写入列数不相等,直接报错 + throw DataXException + .asDataXException( + DBUtilErrorCode.CONF_ERROR, + String.format( + "列配置信息有错误. 因为您配置的任务中,源头读取字段数:%s 与 目的表要写入的字段数:%s 不相等. 请检查您的配置并作出修改.", + record.getColumnNumber(), + this.columnNumber)); + } + Put put = convertToPut(record, schema); + if (null != put) { + try { + client.put(put); + } catch (HoloClientWithDetailsException detail) { + handleDirtyData(detail); + } + } + } + try { + client.flush(); + } catch (HoloClientWithDetailsException detail) { + handleDirtyData(detail); + } + } catch (Exception e) { + throw DataXException.asDataXException( + DBUtilErrorCode.WRITE_DATA_ERROR, e); + } + } + + private void handleDirtyData(HoloClientWithDetailsException detail) { + for (int i = 0; i < detail.size(); ++i) { + com.alibaba.hologres.client.model.Record failRecord = detail.getFailRecord(i); + if (failRecord.getAttachmentList() != null) { + for (Object obj : failRecord.getAttachmentList()) { + taskPluginCollector.collectDirtyRecord((Record) obj, detail.getException(i)); + } + } + } + } + + public void startWrite(RecordReceiver recordReceiver, + TaskPluginCollector taskPluginCollector) { + startWriteWithConnection(recordReceiver, taskPluginCollector); + } + + public void post(Configuration writerSliceConfig) { + + } + + public void destroy(Configuration writerSliceConfig) { + } + + // 直接使用了两个类变量:columnNumber,resultSetMetaData + protected Put convertToPut(Record record, TableSchema schema) { + try { + Put put = new Put(schema); + put.getRecord().addAttachment(record); + for (int i = 0; i < this.columnNumber; i++) { + fillColumn(put, schema, schema.getColumnIndex(this.columns.get(i)), record.getColumn(i)); + } + return put; + } catch (Exception e) { + taskPluginCollector.collectDirtyRecord(record, e); + return null; + } + + } + + protected void fillColumn(Put data, TableSchema schema, int index, Column column) throws SQLException { + com.alibaba.hologres.client.model.Column holoColumn = schema.getColumn(index); + switch (holoColumn.getType()) { + case Types.CHAR: + case Types.NCHAR: + case Types.CLOB: + case Types.NCLOB: + case Types.VARCHAR: + case Types.LONGVARCHAR: + case Types.NVARCHAR: + case Types.LONGNVARCHAR: + String value = column.asString(); + if (emptyAsNull && value != null && value.length() == 0) { + data.setObject(index, null); + } else { + data.setObject(index, value); + } + break; + + case Types.SMALLINT: + if (column.getByteSize() > 0) { + data.setObject(index, column.asBigInteger().shortValue()); + } else if (emptyAsNull) { + data.setObject(index, null); + } + break; + case Types.INTEGER: + if (column.getByteSize() > 0) { + data.setObject(index, column.asBigInteger().intValue()); + } else if (emptyAsNull) { + data.setObject(index, null); + } + break; + case Types.BIGINT: + if (column.getByteSize() > 0) { + data.setObject(index, column.asBigInteger().longValue()); + } else if (emptyAsNull) { + data.setObject(index, null); + } + break; + case Types.NUMERIC: + case Types.DECIMAL: + if (column.getByteSize() > 0) { + data.setObject(index, column.asBigDecimal()); + } else if (emptyAsNull) { + data.setObject(index, null); + } + break; + case Types.FLOAT: + case Types.REAL: + if (column.getByteSize() > 0) { + data.setObject(index, column.asBigDecimal().floatValue()); + } else if (emptyAsNull) { + data.setObject(index, null); + } + break; + case Types.DOUBLE: + if (column.getByteSize() > 0) { + data.setObject(index, column.asDouble()); + } else if (emptyAsNull) { + data.setObject(index, null); + } + break; + case Types.TIME: + if (column.getByteSize() > 0) { + if (column instanceof LongColumn || column instanceof DateColumn) { + data.setObject(index, new Time(column.asLong())); + } else { + data.setObject(index, column.asString()); + } + } else if (emptyAsNull) { + data.setObject(index, null); + } + break; + case Types.DATE: + if (column.getByteSize() > 0) { + if (column instanceof LongColumn || column instanceof DateColumn) { + data.setObject(index, column.asLong()); + } else { + data.setObject(index, column.asString()); + } + } else if (emptyAsNull) { + data.setObject(index, null); + } + break; + case Types.TIMESTAMP: + if (column.getByteSize() > 0) { + if (column instanceof LongColumn || column instanceof DateColumn) { + data.setObject(index, new Timestamp(column.asLong())); + } else { + data.setObject(index, column.asString()); + } + } else if (emptyAsNull) { + data.setObject(index, null); + } + break; + + case Types.BINARY: + case Types.VARBINARY: + case Types.BLOB: + case Types.LONGVARBINARY: + String byteValue = column.asString(); + if (null != byteValue) { + data.setObject(index, column + .asBytes()); + } + break; + case Types.BOOLEAN: + case Types.BIT: + if (column.getByteSize() == 0) { + break; + } + try { + Boolean boolValue = column.asBoolean(); + data.setObject(index, boolValue); + } catch (Exception e) { + data.setObject(index, !"0".equals(column.asString())); + } + break; + case Types.ARRAY: + String arrayString = column.asString(); + Object arrayObject = null; + if (null == arrayString || (emptyAsNull && "".equals(arrayString))) { + data.setObject(index, null); + break; + } else if (arrayDelimiter != null && arrayDelimiter.length() > 0) { + arrayObject = arrayString.split(this.arrayDelimiter); + } else { + arrayObject = JSONArray.parseArray(arrayString); + } + data.setObject(index, arrayObject); + break; + default: + throw DataXException + .asDataXException( + DBUtilErrorCode.UNSUPPORTED_TYPE, + String.format( + "您的配置文件中的列配置信息有误. 因为DataX 不支持数据库写入这种字段类型. 字段名:[%s], 字段类型:[%d], 字段Java类型:[%s]. 请修改表中该字段的类型或者不同步该字段.", + holoColumn.getName(), + holoColumn.getType(), + holoColumn.getTypeName())); + } + } + } +} diff --git a/hologresjdbcwriter/src/main/java/com/alibaba/datax/plugin/writer/hologresjdbcwriter/Constant.java b/hologresjdbcwriter/src/main/java/com/alibaba/datax/plugin/writer/hologresjdbcwriter/Constant.java new file mode 100755 index 00000000..eb51d026 --- /dev/null +++ b/hologresjdbcwriter/src/main/java/com/alibaba/datax/plugin/writer/hologresjdbcwriter/Constant.java @@ -0,0 +1,15 @@ +package com.alibaba.datax.plugin.writer.hologresjdbcwriter; + +/** + * 用于插件解析用户配置时,需要进行标识(MARK)的常量的声明. + */ +public final class Constant { + public static final int DEFAULT_BATCH_SIZE = 512; + + public static final int DEFAULT_BATCH_BYTE_SIZE = 50 * 1024 * 1024; + + public static String CONN_MARK = "connection"; + + public static String TABLE_NUMBER_MARK = "tableNumber"; + +} diff --git a/hologresjdbcwriter/src/main/java/com/alibaba/datax/plugin/writer/hologresjdbcwriter/HologresJdbcWriter.java b/hologresjdbcwriter/src/main/java/com/alibaba/datax/plugin/writer/hologresjdbcwriter/HologresJdbcWriter.java new file mode 100755 index 00000000..811a2e11 --- /dev/null +++ b/hologresjdbcwriter/src/main/java/com/alibaba/datax/plugin/writer/hologresjdbcwriter/HologresJdbcWriter.java @@ -0,0 +1,78 @@ +package com.alibaba.datax.plugin.writer.hologresjdbcwriter; + +import com.alibaba.datax.common.plugin.RecordReceiver; +import com.alibaba.datax.common.spi.Writer; +import com.alibaba.datax.common.util.Configuration; +import com.alibaba.datax.plugin.rdbms.util.DataBaseType; + +import java.util.List; + +public class HologresJdbcWriter extends Writer { + private static final DataBaseType DATABASE_TYPE = DataBaseType.PostgreSQL; + + public static class Job extends Writer.Job { + private Configuration originalConfig = null; + private BaseWriter.Job baseWriterMaster; + + @Override + public void init() { + this.originalConfig = super.getPluginJobConf(); + this.baseWriterMaster = new BaseWriter.Job(DATABASE_TYPE); + this.baseWriterMaster.init(this.originalConfig); + } + + @Override + public void prepare() { + this.baseWriterMaster.prepare(this.originalConfig); + } + + @Override + public List split(int mandatoryNumber) { + return this.baseWriterMaster.split(this.originalConfig, mandatoryNumber); + } + + @Override + public void post() { + this.baseWriterMaster.post(this.originalConfig); + } + + @Override + public void destroy() { + this.baseWriterMaster.destroy(this.originalConfig); + } + + } + + public static class Task extends Writer.Task { + private Configuration writerSliceConfig; + private BaseWriter.Task baseWriterSlave; + + @Override + public void init() { + this.writerSliceConfig = super.getPluginJobConf(); + this.baseWriterSlave = new BaseWriter.Task(DATABASE_TYPE); + this.baseWriterSlave.init(this.writerSliceConfig); + } + + @Override + public void prepare() { + this.baseWriterSlave.prepare(this.writerSliceConfig); + } + + public void startWrite(RecordReceiver recordReceiver) { + this.baseWriterSlave.startWrite(recordReceiver, super.getTaskPluginCollector()); + } + + @Override + public void post() { + this.baseWriterSlave.post(this.writerSliceConfig); + } + + @Override + public void destroy() { + this.baseWriterSlave.destroy(this.writerSliceConfig); + } + + } + +} diff --git a/hologresjdbcwriter/src/main/java/com/alibaba/datax/plugin/writer/hologresjdbcwriter/Key.java b/hologresjdbcwriter/src/main/java/com/alibaba/datax/plugin/writer/hologresjdbcwriter/Key.java new file mode 100755 index 00000000..3bd5d1e2 --- /dev/null +++ b/hologresjdbcwriter/src/main/java/com/alibaba/datax/plugin/writer/hologresjdbcwriter/Key.java @@ -0,0 +1,31 @@ +package com.alibaba.datax.plugin.writer.hologresjdbcwriter; + +public final class Key { + public final static String JDBC_URL = "jdbcUrl"; + + public final static String USERNAME = "username"; + + public final static String PASSWORD = "password"; + + public final static String TABLE = "table"; + + public final static String COLUMN = "column"; + + public final static String Array_Delimiter = "arrayDelimiter"; + + public final static String WRITE_MODE = "writeMode"; + + public final static String PRE_SQL = "preSql"; + + public final static String POST_SQL = "postSql"; + + //默认值:256 + public final static String BATCH_SIZE = "batchSize"; + + //默认值:50m + public final static String BATCH_BYTE_SIZE = "batchByteSize"; + + public final static String EMPTY_AS_NULL = "emptyAsNull"; + + +} \ No newline at end of file diff --git a/hologresjdbcwriter/src/main/java/com/alibaba/datax/plugin/writer/hologresjdbcwriter/util/ConfLoader.java b/hologresjdbcwriter/src/main/java/com/alibaba/datax/plugin/writer/hologresjdbcwriter/util/ConfLoader.java new file mode 100644 index 00000000..48d7584e --- /dev/null +++ b/hologresjdbcwriter/src/main/java/com/alibaba/datax/plugin/writer/hologresjdbcwriter/util/ConfLoader.java @@ -0,0 +1,59 @@ +package com.alibaba.datax.plugin.writer.hologresjdbcwriter.util; + +import com.alibaba.hologres.client.model.WriteMode; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.lang.reflect.Field; +import java.util.Map; +import java.util.Set; + +public class ConfLoader { + public static Logger LOG = LoggerFactory.getLogger(ConfLoader.class); + + static public T load(Map props, T config, Set ignoreList) throws Exception { + Field[] fields = config.getClass().getDeclaredFields(); + for (Map.Entry entry : props.entrySet()) { + String key = entry.getKey(); + String value = entry.getValue().toString(); + if (ignoreList.contains(key)) { + LOG.info("Config Skip {}", key); + continue; + } + boolean match = false; + for (Field field : fields) { + if (field.getName().equals(key)) { + match = true; + field.setAccessible(true); + Class type = field.getType(); + if (type.equals(String.class)) { + field.set(config, value); + } else if (type.equals(int.class)) { + field.set(config, Integer.parseInt(value)); + } else if (type.equals(long.class)) { + field.set(config, Long.parseLong(value)); + } else if (type.equals(boolean.class)) { + field.set(config, Boolean.parseBoolean(value)); + } else if (WriteMode.class.equals(type)) { + field.set(config, WriteMode.valueOf(value)); + } else { + throw new Exception("invalid type " + type + " for param " + key); + } + if ("password".equals(key)) { + StringBuilder sb = new StringBuilder(); + for (int i = 0; i < value.length(); ++i) { + sb.append("*"); + } + LOG.info("Config {}={}", key, sb.toString()); + } else { + LOG.info("Config {}={}", key, value); + } + } + } + if (!match) { + throw new Exception("param " + key + " not found in HoloConfig"); + } + } + return config; + } +} diff --git a/hologresjdbcwriter/src/main/java/com/alibaba/datax/plugin/writer/hologresjdbcwriter/util/OriginalConfPretreatmentUtil.java b/hologresjdbcwriter/src/main/java/com/alibaba/datax/plugin/writer/hologresjdbcwriter/util/OriginalConfPretreatmentUtil.java new file mode 100755 index 00000000..70176b91 --- /dev/null +++ b/hologresjdbcwriter/src/main/java/com/alibaba/datax/plugin/writer/hologresjdbcwriter/util/OriginalConfPretreatmentUtil.java @@ -0,0 +1,82 @@ +package com.alibaba.datax.plugin.writer.hologresjdbcwriter.util; + +import com.alibaba.datax.common.exception.DataXException; +import com.alibaba.datax.common.util.Configuration; +import com.alibaba.datax.plugin.rdbms.util.DBUtilErrorCode; +import com.alibaba.datax.plugin.rdbms.util.DataBaseType; +import com.alibaba.datax.plugin.rdbms.util.TableExpandUtil; +import com.alibaba.datax.plugin.writer.hologresjdbcwriter.Constant; +import com.alibaba.datax.plugin.writer.hologresjdbcwriter.Key; +import org.apache.commons.lang3.StringUtils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.List; + +public final class OriginalConfPretreatmentUtil { + private static final Logger LOG = LoggerFactory + .getLogger(OriginalConfPretreatmentUtil.class); + + public static DataBaseType DATABASE_TYPE; + + public static void doPretreatment(Configuration originalConfig, DataBaseType dataBaseType) { + // 检查 username/password 配置(必填) + originalConfig.getNecessaryValue(Key.USERNAME, DBUtilErrorCode.REQUIRED_VALUE); + originalConfig.getNecessaryValue(Key.PASSWORD, DBUtilErrorCode.REQUIRED_VALUE); + + doCheckBatchSize(originalConfig); + simplifyConf(originalConfig); + } + + public static void doCheckBatchSize(Configuration originalConfig) { + // 检查batchSize 配置(选填,如果未填写,则设置为默认值) + int batchSize = originalConfig.getInt(Key.BATCH_SIZE, Constant.DEFAULT_BATCH_SIZE); + if (batchSize < 1) { + throw DataXException.asDataXException(DBUtilErrorCode.ILLEGAL_VALUE, String.format( + "您的batchSize配置有误. 您所配置的写入数据库表的 batchSize:%s 不能小于1. 推荐配置范围为:[256-1024] (保持128的倍数), 该值越大, 内存溢出可能性越大. 请检查您的配置并作出修改.", + batchSize)); + } + + originalConfig.set(Key.BATCH_SIZE, batchSize); + } + + public static void simplifyConf(Configuration originalConfig) { + List connections = originalConfig.getList(Constant.CONN_MARK, + Object.class); + + int tableNum = 0; + + for (int i = 0, len = connections.size(); i < len; i++) { + Configuration connConf = Configuration.from(connections.get(i).toString()); + + String jdbcUrl = connConf.getString(Key.JDBC_URL); + if (StringUtils.isBlank(jdbcUrl)) { + throw DataXException.asDataXException(DBUtilErrorCode.REQUIRED_VALUE, "您未配置的写入数据库表的 jdbcUrl."); + } + + List tables = connConf.getList(Key.TABLE, String.class); + + if (null == tables || tables.isEmpty()) { + throw DataXException.asDataXException(DBUtilErrorCode.REQUIRED_VALUE, + "您未配置写入数据库表的表名称. 根据配置DataX找不到您配置的表. 请检查您的配置并作出修改."); + } + + // 对每一个connection 上配置的table 项进行解析 + List expandedTables = TableExpandUtil + .expandTableConf(DATABASE_TYPE, tables); + + if (null == expandedTables || expandedTables.isEmpty()) { + throw DataXException.asDataXException(DBUtilErrorCode.CONF_ERROR, + "您配置的写入数据库表名称错误. DataX找不到您配置的表,请检查您的配置并作出修改."); + } + + tableNum += expandedTables.size(); + + originalConfig.set(String.format("%s[%d].%s", Constant.CONN_MARK, + i, Key.TABLE), expandedTables); + } + + originalConfig.set(Constant.TABLE_NUMBER_MARK, tableNum); + } + +} diff --git a/hologresjdbcwriter/src/main/java/com/alibaba/datax/plugin/writer/hologresjdbcwriter/util/WriterUtil.java b/hologresjdbcwriter/src/main/java/com/alibaba/datax/plugin/writer/hologresjdbcwriter/util/WriterUtil.java new file mode 100755 index 00000000..d35194e8 --- /dev/null +++ b/hologresjdbcwriter/src/main/java/com/alibaba/datax/plugin/writer/hologresjdbcwriter/util/WriterUtil.java @@ -0,0 +1,111 @@ +package com.alibaba.datax.plugin.writer.hologresjdbcwriter.util; + +import com.alibaba.datax.common.exception.DataXException; +import com.alibaba.datax.common.util.Configuration; +import com.alibaba.datax.plugin.rdbms.util.DBUtil; +import com.alibaba.datax.plugin.rdbms.util.DBUtilErrorCode; +import com.alibaba.datax.plugin.rdbms.util.DataBaseType; +import com.alibaba.datax.plugin.rdbms.util.RdbmsException; +import com.alibaba.datax.plugin.rdbms.writer.Constant; +import com.alibaba.datax.plugin.rdbms.writer.Key; +import org.apache.commons.lang3.StringUtils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.sql.Connection; +import java.sql.Statement; +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; + +public final class WriterUtil { + private static final Logger LOG = LoggerFactory.getLogger(WriterUtil.class); + + //TODO 切分报错 + public static List doSplit(Configuration simplifiedConf, + int adviceNumber) { + + List splitResultConfigs = new ArrayList(); + + int tableNumber = simplifiedConf.getInt(Constant.TABLE_NUMBER_MARK); + + //处理单表的情况 + if (tableNumber == 1) { + //由于在之前的 master prepare 中已经把 table,jdbcUrl 提取出来,所以这里处理十分简单 + for (int j = 0; j < adviceNumber; j++) { + splitResultConfigs.add(simplifiedConf.clone()); + } + + return splitResultConfigs; + } + + if (tableNumber != adviceNumber) { + throw DataXException.asDataXException(DBUtilErrorCode.CONF_ERROR, + String.format("您的配置文件中的列配置信息有误. 您要写入的目的端的表个数是:%s , 但是根据系统建议需要切分的份数是:%s. 请检查您的配置并作出修改.", + tableNumber, adviceNumber)); + } + + String jdbcUrl; + List preSqls = simplifiedConf.getList(Key.PRE_SQL, String.class); + List postSqls = simplifiedConf.getList(Key.POST_SQL, String.class); + + List conns = simplifiedConf.getList(Constant.CONN_MARK, + Object.class); + + for (Object conn : conns) { + Configuration sliceConfig = simplifiedConf.clone(); + + Configuration connConf = Configuration.from(conn.toString()); + jdbcUrl = connConf.getString(Key.JDBC_URL); + sliceConfig.set(Key.JDBC_URL, jdbcUrl); + + sliceConfig.remove(Constant.CONN_MARK); + + List tables = connConf.getList(Key.TABLE, String.class); + + for (String table : tables) { + Configuration tempSlice = sliceConfig.clone(); + tempSlice.set(Key.TABLE, table); + tempSlice.set(Key.PRE_SQL, renderPreOrPostSqls(preSqls, table)); + tempSlice.set(Key.POST_SQL, renderPreOrPostSqls(postSqls, table)); + + splitResultConfigs.add(tempSlice); + } + + } + + return splitResultConfigs; + } + + public static List renderPreOrPostSqls(List preOrPostSqls, String tableName) { + if (null == preOrPostSqls) { + return Collections.emptyList(); + } + + List renderedSqls = new ArrayList(); + for (String sql : preOrPostSqls) { + //preSql为空时,不加入执行队列 + if (StringUtils.isNotBlank(sql)) { + renderedSqls.add(sql.replace(Constant.TABLE_NAME_PLACEHOLDER, tableName)); + } + } + + return renderedSqls; + } + + public static void executeSqls(Connection conn, List sqls, String basicMessage,DataBaseType dataBaseType) { + Statement stmt = null; + String currentSql = null; + try { + stmt = conn.createStatement(); + for (String sql : sqls) { + currentSql = sql; + DBUtil.executeSqlWithoutResultSet(stmt, sql); + } + } catch (Exception e) { + throw RdbmsException.asQueryException(dataBaseType,e,currentSql,null,null); + } finally { + DBUtil.closeDBResources(null, stmt, null); + } + } +} diff --git a/hologresjdbcwriter/src/main/resources/plugin.json b/hologresjdbcwriter/src/main/resources/plugin.json new file mode 100644 index 00000000..a9f93996 --- /dev/null +++ b/hologresjdbcwriter/src/main/resources/plugin.json @@ -0,0 +1,6 @@ +{ + "name": "hologresjdbcwriter", + "class": "com.alibaba.datax.plugin.writer.hologresjdbcwriter.HologresJdbcWriter", + "description": "", + "developer": "alibaba" +} \ No newline at end of file diff --git a/hologresjdbcwriter/src/main/resources/plugin_job_template.json b/hologresjdbcwriter/src/main/resources/plugin_job_template.json new file mode 100644 index 00000000..f509ccc0 --- /dev/null +++ b/hologresjdbcwriter/src/main/resources/plugin_job_template.json @@ -0,0 +1,11 @@ +{ + "name": "hologresjdbcwriter", + "parameter": { + "url": "", + "username": "", + "password": "", + "database": "", + "table": "", + "partition": "" + } +} diff --git a/images/datax.logo.png b/images/datax.logo.png new file mode 100644 index 00000000..d5b20350 Binary files /dev/null and b/images/datax.logo.png differ diff --git a/introduction.md b/introduction.md index b27607c7..d08ad98d 100644 --- a/introduction.md +++ b/introduction.md @@ -36,6 +36,7 @@ DataX本身作为离线数据同步框架,采用Framework + plugin架构构建 | ------------ | ---------- | :-------: | :-------: |:-------: | | RDBMS 关系型数据库 | MySQL | √ | √ |[读](https://github.com/alibaba/DataX/blob/master/mysqlreader/doc/mysqlreader.md) 、[写](https://github.com/alibaba/DataX/blob/master/mysqlwriter/doc/mysqlwriter.md)| |             | Oracle     |     √     |     √     |[读](https://github.com/alibaba/DataX/blob/master/oraclereader/doc/oraclereader.md) 、[写](https://github.com/alibaba/DataX/blob/master/oraclewriter/doc/oraclewriter.md)| +|             | OceanBase  |     √     |     √     |[读](https://open.oceanbase.com/docs/community/oceanbase-database/V3.1.0/use-datax-to-full-migration-data-to-oceanbase) 、[写](https://open.oceanbase.com/docs/community/oceanbase-database/V3.1.0/use-datax-to-full-migration-data-to-oceanbase)| | | SQLServer | √ | √ |[读](https://github.com/alibaba/DataX/blob/master/sqlserverreader/doc/sqlserverreader.md) 、[写](https://github.com/alibaba/DataX/blob/master/sqlserverwriter/doc/sqlserverwriter.md)| | | PostgreSQL | √ | √ |[读](https://github.com/alibaba/DataX/blob/master/postgresqlreader/doc/postgresqlreader.md) 、[写](https://github.com/alibaba/DataX/blob/master/postgresqlwriter/doc/postgresqlwriter.md)| | | DRDS | √ | √ |[读](https://github.com/alibaba/DataX/blob/master/drdsreader/doc/drdsreader.md) 、[写](https://github.com/alibaba/DataX/blob/master/drdswriter/doc/drdswriter.md)| diff --git a/kuduwriter/src/main/java/com/q1/datax/plugin/writer/kudu11xwriter/Kudu11xHelper.java b/kuduwriter/src/main/java/com/q1/datax/plugin/writer/kudu11xwriter/Kudu11xHelper.java index cf1b0f8f..558693ff 100644 --- a/kuduwriter/src/main/java/com/q1/datax/plugin/writer/kudu11xwriter/Kudu11xHelper.java +++ b/kuduwriter/src/main/java/com/q1/datax/plugin/writer/kudu11xwriter/Kudu11xHelper.java @@ -3,7 +3,7 @@ package com.q1.datax.plugin.writer.kudu11xwriter; import com.alibaba.datax.common.element.Column; import com.alibaba.datax.common.exception.DataXException; import com.alibaba.datax.common.util.Configuration; -import com.alibaba.fastjson.JSON; +import com.alibaba.fastjson2.JSON; import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.Validate; import org.apache.kudu.ColumnSchema; diff --git a/kuduwriter/src/main/java/com/q1/datax/plugin/writer/kudu11xwriter/KuduWriterTask.java b/kuduwriter/src/main/java/com/q1/datax/plugin/writer/kudu11xwriter/KuduWriterTask.java index bff3509f..df872842 100644 --- a/kuduwriter/src/main/java/com/q1/datax/plugin/writer/kudu11xwriter/KuduWriterTask.java +++ b/kuduwriter/src/main/java/com/q1/datax/plugin/writer/kudu11xwriter/KuduWriterTask.java @@ -134,7 +134,7 @@ public class KuduWriterTask { break; case BOOLEAN: synchronized (lock) { - row.addBoolean(name, Boolean.getBoolean(rawData)); + row.addBoolean(name, Boolean.parseBoolean(rawData)); } break; case STRING: diff --git a/license.txt b/license.txt index 00b845b4..2f293c0f 100644 --- a/license.txt +++ b/license.txt @@ -1,4 +1,4 @@ -Copyright 1999-2017 Alibaba Group Holding Ltd. +Copyright 1999-2022 Alibaba Group Holding Ltd. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. diff --git a/loghubreader/pom.xml b/loghubreader/pom.xml new file mode 100644 index 00000000..b2f52f3d --- /dev/null +++ b/loghubreader/pom.xml @@ -0,0 +1,73 @@ + + + + datax-all + com.alibaba.datax + 0.0.1-SNAPSHOT + + 4.0.0 + + loghubreader + + 0.0.1-SNAPSHOT + + + + com.alibaba.datax + datax-common + ${datax-project-version} + + + slf4j-log4j12 + org.slf4j + + + + + org.slf4j + slf4j-api + + + ch.qos.logback + logback-classic + + + com.aliyun.openservices + aliyun-log + 0.6.22 + + + + + + + + maven-compiler-plugin + + ${jdk-version} + ${jdk-version} + ${project-sourceEncoding} + + + + + maven-assembly-plugin + + + src/main/assembly/package.xml + + datax + + + + dwzip + package + + single + + + + + + + diff --git a/loghubreader/src/main/assembly/package.xml b/loghubreader/src/main/assembly/package.xml new file mode 100644 index 00000000..e1d8d739 --- /dev/null +++ b/loghubreader/src/main/assembly/package.xml @@ -0,0 +1,34 @@ + + + + dir + + false + + + src/main/resources + + plugin.json + + plugin/reader/loghubreader + + + target/ + + loghubreader-0.0.1-SNAPSHOT.jar + + plugin/reader/loghubreader + + + + + + false + plugin/reader/loghubreader/libs + runtime + + + diff --git a/loghubreader/src/main/java/com/alibaba/datax/plugin/reader/loghubreader/Constant.java b/loghubreader/src/main/java/com/alibaba/datax/plugin/reader/loghubreader/Constant.java new file mode 100644 index 00000000..fd9e88dc --- /dev/null +++ b/loghubreader/src/main/java/com/alibaba/datax/plugin/reader/loghubreader/Constant.java @@ -0,0 +1,26 @@ +package com.alibaba.datax.plugin.reader.loghubreader; + +public class Constant { + + public static String DATETIME_FORMAT = "yyyyMMddHHmmss"; + public static String DATE_FORMAT = "yyyyMMdd"; + + static String META_COL_SOURCE = "__source__"; + static String META_COL_TOPIC = "__topic__"; + static String META_COL_CATEGORY = "__category__"; + static String META_COL_MACHINEUUID = "__machineUUID__"; + static String META_COL_HOSTNAME = "__hostname__"; + static String META_COL_PATH = "__path__"; + static String META_COL_LOGTIME = "__logtime__"; + public static String META_COL_RECEIVE_TIME = "__receive_time__"; + + /** + * 除用户手动配置的列之外,其余数据列作为一个 json 读取到一列 + */ + static String COL_EXTRACT_OTHERS = "C__extract_others__"; + + /** + * 将所有元数据列作为一个 json 读取到一列 + */ + static String COL_EXTRACT_ALL_META = "C__extract_all_meta__"; +} diff --git a/loghubreader/src/main/java/com/alibaba/datax/plugin/reader/loghubreader/Key.java b/loghubreader/src/main/java/com/alibaba/datax/plugin/reader/loghubreader/Key.java new file mode 100644 index 00000000..9067cc68 --- /dev/null +++ b/loghubreader/src/main/java/com/alibaba/datax/plugin/reader/loghubreader/Key.java @@ -0,0 +1,38 @@ +package com.alibaba.datax.plugin.reader.loghubreader; + +public final class Key { + + /** + * 此处声明插件用到的需要插件使用者提供的配置项 + */ + public static final String ENDPOINT = "endpoint"; + + public static final String ACCESSKEYID = "accessId"; + + public static final String ACCESSKEYSECRET = "accessKey"; + + public static final String PROJECT = "project"; + + public static final String LOGSTORE = "logstore"; + + public static final String TOPIC = "topic"; + + public static final String COLUMN = "column"; + + public static final String BATCHSIZE = "batchSize"; + + public static final String BEGINTIMESTAMPMILLIS = "beginTimestampMillis"; + + public static final String ENDTIMESTAMPMILLIS = "endTimestampMillis"; + + public static final String BEGINDATETIME = "beginDateTime"; + + public static final String ENDDATETIME = "endDateTime"; + + public static final String TIMEFORMAT = "timeformat"; + + public static final String SOURCE = "source"; + + public static final String SHARD = "shard"; + +} diff --git a/loghubreader/src/main/java/com/alibaba/datax/plugin/reader/loghubreader/LogHubReader.java b/loghubreader/src/main/java/com/alibaba/datax/plugin/reader/loghubreader/LogHubReader.java new file mode 100644 index 00000000..c52ef62d --- /dev/null +++ b/loghubreader/src/main/java/com/alibaba/datax/plugin/reader/loghubreader/LogHubReader.java @@ -0,0 +1,482 @@ +package com.alibaba.datax.plugin.reader.loghubreader; + +import com.alibaba.datax.common.element.Record; +import com.alibaba.datax.common.element.StringColumn; +import com.alibaba.datax.common.exception.DataXException; +import com.alibaba.datax.common.plugin.RecordSender; +import com.alibaba.datax.common.spi.Reader; +import com.alibaba.datax.common.util.Configuration; +import com.alibaba.datax.common.util.DataXCaseEnvUtil; +import com.alibaba.datax.common.util.RetryUtil; +import com.alibaba.fastjson2.JSONObject; +import com.aliyun.openservices.log.Client; +import com.aliyun.openservices.log.common.Consts.CursorMode; +import com.aliyun.openservices.log.common.*; +import com.aliyun.openservices.log.exception.LogException; +import com.aliyun.openservices.log.response.BatchGetLogResponse; +import com.aliyun.openservices.log.response.GetCursorResponse; +import org.apache.commons.lang3.StringUtils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.text.ParseException; +import java.text.SimpleDateFormat; +import java.util.*; +import java.util.concurrent.Callable; + +public class LogHubReader extends Reader { + public static class Job extends Reader.Job { + + private static final Logger LOG = LoggerFactory.getLogger(Job.class); + + private Client client; + private Configuration originalConfig; + + private Long beginTimestampMillis; + private Long endTimestampMillis; + + @Override + public void init() { + LOG.info("loghub reader job init begin ..."); + this.originalConfig = super.getPluginJobConf(); + validateParameter(originalConfig); + + String endPoint = this.originalConfig.getString(Key.ENDPOINT); + String accessKeyId = this.originalConfig.getString(Key.ACCESSKEYID); + String accessKeySecret = this.originalConfig.getString(Key.ACCESSKEYSECRET); + + client = new Client(endPoint, accessKeyId, accessKeySecret); + LOG.info("loghub reader job init end."); + } + + private void validateParameter(Configuration conf){ + conf.getNecessaryValue(Key.ENDPOINT,LogHubReaderErrorCode.REQUIRE_VALUE); + conf.getNecessaryValue(Key.ACCESSKEYID,LogHubReaderErrorCode.REQUIRE_VALUE); + conf.getNecessaryValue(Key.ACCESSKEYSECRET,LogHubReaderErrorCode.REQUIRE_VALUE); + conf.getNecessaryValue(Key.PROJECT,LogHubReaderErrorCode.REQUIRE_VALUE); + conf.getNecessaryValue(Key.LOGSTORE,LogHubReaderErrorCode.REQUIRE_VALUE); + conf.getNecessaryValue(Key.COLUMN,LogHubReaderErrorCode.REQUIRE_VALUE); + + int batchSize = this.originalConfig.getInt(Key.BATCHSIZE); + if (batchSize > 1000) { + throw DataXException.asDataXException(LogHubReaderErrorCode.BAD_CONFIG_VALUE, + "Invalid batchSize[" + batchSize + "] value (0,1000]!"); + } + + beginTimestampMillis = this.originalConfig.getLong(Key.BEGINTIMESTAMPMILLIS); + String beginDateTime = this.originalConfig.getString(Key.BEGINDATETIME); + + if (beginDateTime != null) { + try { + beginTimestampMillis = getUnixTimeFromDateTime(beginDateTime); + } catch (ParseException e) { + throw DataXException.asDataXException(LogHubReaderErrorCode.BAD_CONFIG_VALUE, + "Invalid beginDateTime[" + beginDateTime + "], format [yyyyMMddHHmmss or yyyyMMdd]!"); + } + } + + if (beginTimestampMillis != null && beginTimestampMillis <= 0) { + throw DataXException.asDataXException(LogHubReaderErrorCode.BAD_CONFIG_VALUE, + "Invalid beginTimestampMillis[" + beginTimestampMillis + "]!"); + } + + endTimestampMillis = this.originalConfig.getLong(Key.ENDTIMESTAMPMILLIS); + String endDateTime = this.originalConfig.getString(Key.ENDDATETIME); + + if (endDateTime != null) { + try { + endTimestampMillis = getUnixTimeFromDateTime(endDateTime); + } catch (ParseException e) { + throw DataXException.asDataXException(LogHubReaderErrorCode.BAD_CONFIG_VALUE, + "Invalid beginDateTime[" + endDateTime + "], format [yyyyMMddHHmmss or yyyyMMdd]!"); + } + } + + if (endTimestampMillis != null && endTimestampMillis <= 0) { + throw DataXException.asDataXException(LogHubReaderErrorCode.BAD_CONFIG_VALUE, + "Invalid endTimestampMillis[" + endTimestampMillis + "]!"); + } + + if (beginTimestampMillis != null && endTimestampMillis != null + && endTimestampMillis <= beginTimestampMillis) { + throw DataXException.asDataXException(LogHubReaderErrorCode.BAD_CONFIG_VALUE, + "endTimestampMillis[" + endTimestampMillis + "] must bigger than beginTimestampMillis[" + beginTimestampMillis + "]!"); + } + } + + private long getUnixTimeFromDateTime(String dateTime) throws ParseException { + try { + String format = Constant.DATETIME_FORMAT; + SimpleDateFormat simpleDateFormat = new SimpleDateFormat(format); + return simpleDateFormat.parse(dateTime).getTime() / 1000; + } catch (ParseException ignored) { + throw DataXException.asDataXException(LogHubReaderErrorCode.BAD_CONFIG_VALUE, + "Invalid DateTime[" + dateTime + "]!"); + } + } + + @Override + public void prepare() { + } + + @Override + public List split(int adviceNumber) { + LOG.info("split() begin..."); + + List readerSplitConfigs = new ArrayList(); + + final String project = this.originalConfig.getString(Key.PROJECT); + final String logstore = this.originalConfig.getString(Key.LOGSTORE); + + List logStore = null; + try { + logStore = RetryUtil.executeWithRetry(new Callable>() { + @Override + public List call() throws Exception { + return client.ListShard(project, logstore).GetShards(); + } + }, DataXCaseEnvUtil.getRetryTimes(7), DataXCaseEnvUtil.getRetryInterval(1000L), DataXCaseEnvUtil.getRetryExponential(true)); + } catch (Exception e) { + throw DataXException.asDataXException(LogHubReaderErrorCode.BAD_CONFIG_VALUE, + "get LogStore[" + logstore + "] error, please check ! detail error messsage: " + e.toString()); + } + + if (logStore == null) { + throw DataXException.asDataXException(LogHubReaderErrorCode.BAD_CONFIG_VALUE, + "LogStore[" + logstore + "] isn't exists, please check !"); + } + + int splitNumber = logStore.size(); + if (0 == splitNumber) { + throw DataXException.asDataXException(LogHubReaderErrorCode.EMPTY_LOGSTORE_VALUE, + "LogStore[" + logstore + "] has 0 shard, please check !"); + } + + Collections.shuffle(logStore); + for (int i = 0; i < logStore.size(); i++) { + if (beginTimestampMillis != null && endTimestampMillis != null) { + try { + String beginCursor = getCursorWithRetry(client, project, logstore, logStore.get(i).GetShardId(), beginTimestampMillis).GetCursor(); + String endCursor = getCursorWithRetry(client, project, logstore, logStore.get(i).GetShardId(), endTimestampMillis).GetCursor(); + if (beginCursor.equals(endCursor)) { + if ((i == logStore.size() - 1) && (readerSplitConfigs.size() == 0)) { + + } else { + LOG.info("skip empty shard[" + logStore.get(i) + "]!"); + continue; + } + } + } catch (Exception e) { + LOG.error("Check Shard[" + logStore.get(i) + "] Error, please check !" + e.toString()); + throw DataXException.asDataXException(LogHubReaderErrorCode.LOG_HUB_ERROR, e); + } + } + Configuration splitedConfig = this.originalConfig.clone(); + splitedConfig.set(Key.SHARD, logStore.get(i).GetShardId()); + readerSplitConfigs.add(splitedConfig); + } + + if (splitNumber < adviceNumber) { + // LOG.info(MESSAGE_SOURCE.message("hdfsreader.12", + // splitNumber, adviceNumber, splitNumber, splitNumber)); + } + LOG.info("split() ok and end..."); + + return readerSplitConfigs; + } + + @Override + public void post() { + } + + @Override + public void destroy() { + } + + private GetCursorResponse getCursorWithRetry(final Client client, final String project, final String logstore, final int shard, final long fromTime) throws Exception { + return + RetryUtil.executeWithRetry(new Callable() { + @Override + public GetCursorResponse call() throws Exception { + LOG.info("loghug get cursor with project: {} logstore: {} shard: {} time: {}", project, logstore, shard, fromTime); + return client.GetCursor(project, logstore, shard, fromTime); + } + }, 7, 1000L, true); + } + + } + + public static class Task extends Reader.Task { + + private static final Logger LOG = LoggerFactory.getLogger(Task.class); + + private Configuration taskConfig; + private Client client; + private String endPoint; + private String accessKeyId; + private String accessKeySecret; + private String project; + private String logstore; + private long beginTimestampMillis; + private long endTimestampMillis; + private int batchSize; + private int shard; + private List columns; + + @Override + public void init() { + this.taskConfig = super.getPluginJobConf(); + + endPoint = this.taskConfig.getString(Key.ENDPOINT); + accessKeyId = this.taskConfig.getString(Key.ACCESSKEYID); + accessKeySecret = this.taskConfig.getString(Key.ACCESSKEYSECRET); + project = this.taskConfig.getString(Key.PROJECT); + logstore = this.taskConfig.getString(Key.LOGSTORE); + batchSize = this.taskConfig.getInt(Key.BATCHSIZE, 128); + + this.beginTimestampMillis = this.taskConfig.getLong(Key.BEGINTIMESTAMPMILLIS, -1); + String beginDateTime = this.taskConfig.getString(Key.BEGINDATETIME); + + if (beginDateTime != null) { + try { + beginTimestampMillis = getUnixTimeFromDateTime(beginDateTime); + } catch (ParseException e) { + } + } + + this.endTimestampMillis = this.taskConfig.getLong(Key.ENDTIMESTAMPMILLIS, -1); + String endDateTime = this.taskConfig.getString(Key.ENDDATETIME); + + if (endDateTime != null) { + try { + endTimestampMillis = getUnixTimeFromDateTime(endDateTime); + } catch (ParseException e) { + } + } + + columns = this.taskConfig.getList(Key.COLUMN, String.class); + + shard = this.taskConfig.getInt(Key.SHARD); + + client = new Client(endPoint, accessKeyId, accessKeySecret); + LOG.info("init loghub reader task finished.project:{} logstore:{} batchSize:{}", project, logstore, batchSize); + } + + @Override + public void prepare() { + } + + private long getUnixTimeFromDateTime(String dateTime) throws ParseException { + try { + String format = Constant.DATETIME_FORMAT; + SimpleDateFormat simpleDateFormat = new SimpleDateFormat(format); + return simpleDateFormat.parse(dateTime).getTime() / 1000; + } catch (ParseException ignored) { + } + String format = Constant.DATE_FORMAT; + SimpleDateFormat simpleDateFormat = new SimpleDateFormat(format); + return simpleDateFormat.parse(dateTime).getTime() / 1000; + } + + private GetCursorResponse getCursorWithRetry(final Client client, final String project, final String logstore, final int shard, final long fromTime) throws Exception { + return + RetryUtil.executeWithRetry(new Callable() { + @Override + public GetCursorResponse call() throws Exception { + LOG.info("loghug get cursor with project: {} logstore: {} shard: {} time: {}", project, logstore, shard, fromTime); + return client.GetCursor(project, logstore, shard, fromTime); + } + }, 7, 1000L, true); + } + + private GetCursorResponse getCursorWithRetry(final Client client, final String project, final String logstore, final int shard, final CursorMode mode) throws Exception { + return + RetryUtil.executeWithRetry(new Callable() { + @Override + public GetCursorResponse call() throws Exception { + LOG.info("loghug get cursor with project: {} logstore: {} shard: {} mode: {}", project, logstore, shard, mode); + return client.GetCursor(project, logstore, shard, mode); + } + }, 7, 1000L, true); + } + + private BatchGetLogResponse batchGetLogWithRetry(final Client client, final String project, final String logstore, final int shard, final int batchSize, + final String curCursor, final String endCursor) throws Exception { + return + RetryUtil.executeWithRetry(new Callable() { + @Override + public BatchGetLogResponse call() throws Exception { + return client.BatchGetLog(project, logstore, shard, batchSize, curCursor, endCursor); + } + }, 7, 1000L, true); + } + + @Override + public void startRead(RecordSender recordSender) { + LOG.info("read start"); + + try { + GetCursorResponse cursorRes; + if (this.beginTimestampMillis != -1) { + cursorRes = getCursorWithRetry(client, project, logstore, this.shard, beginTimestampMillis); + } else { + cursorRes = getCursorWithRetry(client, project, logstore, this.shard, CursorMode.BEGIN); + } + String beginCursor = cursorRes.GetCursor(); + + LOG.info("the begin cursor, loghub requestId: {} cursor: {}", cursorRes.GetRequestId(), cursorRes.GetCursor()); + + if (this.endTimestampMillis != -1) { + cursorRes = getCursorWithRetry(client, project, logstore, this.shard, endTimestampMillis); + } else { + cursorRes = getCursorWithRetry(client, project, logstore, this.shard, CursorMode.END); + } + String endCursor = cursorRes.GetCursor(); + LOG.info("the end cursor, loghub requestId: {} cursor: {}", cursorRes.GetRequestId(), cursorRes.GetCursor()); + + if (StringUtils.equals(beginCursor, endCursor)) { + LOG.info("beginCursor:{} equals endCursor:{}, end directly!", beginCursor, endCursor); + return; + } + + String currentCursor = null; + String nextCursor = beginCursor; + + HashMap metaMap = new HashMap(); + HashMap dataMap = new HashMap(); + JSONObject allMetaJson = new JSONObject(); + while (!StringUtils.equals(currentCursor, nextCursor)) { + currentCursor = nextCursor; + BatchGetLogResponse logDataRes = batchGetLogWithRetry(client, project, logstore, this.shard, this.batchSize, currentCursor, endCursor); + + List logGroups = logDataRes.GetLogGroups(); + + for(LogGroupData logGroup: logGroups) { + metaMap.clear(); + allMetaJson.clear(); + FastLogGroup flg = logGroup.GetFastLogGroup(); + + metaMap.put("C_Category", flg.getCategory()); + metaMap.put(Constant.META_COL_CATEGORY, flg.getCategory()); + allMetaJson.put(Constant.META_COL_CATEGORY, flg.getCategory()); + + metaMap.put("C_Source", flg.getSource()); + metaMap.put(Constant.META_COL_SOURCE, flg.getSource()); + allMetaJson.put(Constant.META_COL_SOURCE, flg.getSource()); + + metaMap.put("C_Topic", flg.getTopic()); + metaMap.put(Constant.META_COL_TOPIC, flg.getTopic()); + allMetaJson.put(Constant.META_COL_TOPIC, flg.getTopic()); + + metaMap.put("C_MachineUUID", flg.getMachineUUID()); + metaMap.put(Constant.META_COL_MACHINEUUID, flg.getMachineUUID()); + allMetaJson.put(Constant.META_COL_MACHINEUUID, flg.getMachineUUID()); + + for (int tagIdx = 0; tagIdx < flg.getLogTagsCount(); ++tagIdx) { + FastLogTag logtag = flg.getLogTags(tagIdx); + String tagKey = logtag.getKey(); + String tagValue = logtag.getValue(); + if (tagKey.equals(Constant.META_COL_HOSTNAME)) { + metaMap.put("C_HostName", logtag.getValue()); + } else if (tagKey.equals(Constant.META_COL_PATH)) { + metaMap.put("C_Path", logtag.getValue()); + } + metaMap.put(tagKey, tagValue); + allMetaJson.put(tagKey, tagValue); + } + + for (int lIdx = 0; lIdx < flg.getLogsCount(); ++lIdx) { + dataMap.clear(); + FastLog log = flg.getLogs(lIdx); + + String logTime = String.valueOf(log.getTime()); + metaMap.put("C_LogTime", logTime); + metaMap.put(Constant.META_COL_LOGTIME, logTime); + allMetaJson.put(Constant.META_COL_LOGTIME, logTime); + + for (int cIdx = 0; cIdx < log.getContentsCount(); ++cIdx) { + FastLogContent content = log.getContents(cIdx); + dataMap.put(content.getKey(), content.getValue()); + } + + Record record = recordSender.createRecord(); + + JSONObject extractOthers = new JSONObject(); + if(columns.contains(Constant.COL_EXTRACT_OTHERS)){ + List keyList = Arrays.asList(dataMap.keySet().toArray(new String[dataMap.keySet().size()])); + for (String otherKey:keyList) { + if (!columns.contains(otherKey)){ + extractOthers.put(otherKey,dataMap.get(otherKey)); + } + } + } + if (null != this.columns && 1 == this.columns.size()) { + String columnsInStr = columns.get(0).toString(); + if ("\"*\"".equals(columnsInStr) || "*".equals(columnsInStr)) { + List keyList = Arrays.asList(dataMap.keySet().toArray(new String[dataMap.keySet().size()])); + Collections.sort(keyList); + + for (String key : keyList) { + record.addColumn(new StringColumn(key + ":" + dataMap.get(key))); + } + } else { + if (dataMap.containsKey(columnsInStr)) { + record.addColumn(new StringColumn(dataMap.get(columnsInStr))); + } else if (metaMap.containsKey(columnsInStr)) { + record.addColumn(new StringColumn(metaMap.get(columnsInStr))); + } else if (Constant.COL_EXTRACT_OTHERS.equals(columnsInStr)){ + record.addColumn(new StringColumn(extractOthers.toJSONString())); + } else if (Constant.COL_EXTRACT_ALL_META.equals(columnsInStr)) { + record.addColumn(new StringColumn(allMetaJson.toJSONString())); + } + } + } else { + for (String col : this.columns) { + if (dataMap.containsKey(col)) { + record.addColumn(new StringColumn(dataMap.get(col))); + } else if (metaMap.containsKey(col)) { + record.addColumn(new StringColumn(metaMap.get(col))); + } else if (col != null && col.startsWith("'") && col.endsWith("'")){ + String constant = col.substring(1, col.length()-1); + record.addColumn(new StringColumn(constant)); + }else if (Constant.COL_EXTRACT_OTHERS.equals(col)){ + record.addColumn(new StringColumn(extractOthers.toJSONString())); + } else if (Constant.COL_EXTRACT_ALL_META.equals(col)) { + record.addColumn(new StringColumn(allMetaJson.toJSONString())); + } else { + record.addColumn(new StringColumn(null)); + } + } + } + + recordSender.sendToWriter(record); + } + } + + nextCursor = logDataRes.GetNextCursor(); + } + } catch (LogException e) { + if (e.GetErrorCode().equals("LogStoreNotExist")) { + LOG.info("logStore[" + logstore +"] Not Exits! detail error messsage: " + e.toString()); + } else { + LOG.error("read LogStore[" + logstore + "] error, please check ! detail error messsage: " + e.toString()); + throw DataXException.asDataXException(LogHubReaderErrorCode.LOG_HUB_ERROR, e); + } + + } catch (Exception e) { + LOG.error("read LogStore[" + logstore + "] error, please check ! detail error messsage: " + e.toString()); + throw DataXException.asDataXException(LogHubReaderErrorCode.LOG_HUB_ERROR, e); + } + + LOG.info("end read loghub shard..."); + } + + @Override + public void post() { + } + + @Override + public void destroy() { + } + } +} diff --git a/loghubreader/src/main/java/com/alibaba/datax/plugin/reader/loghubreader/LogHubReaderErrorCode.java b/loghubreader/src/main/java/com/alibaba/datax/plugin/reader/loghubreader/LogHubReaderErrorCode.java new file mode 100644 index 00000000..d9ee4c8e --- /dev/null +++ b/loghubreader/src/main/java/com/alibaba/datax/plugin/reader/loghubreader/LogHubReaderErrorCode.java @@ -0,0 +1,34 @@ +package com.alibaba.datax.plugin.reader.loghubreader; + +import com.alibaba.datax.common.spi.ErrorCode; + +public enum LogHubReaderErrorCode implements ErrorCode { + BAD_CONFIG_VALUE("LogHuReader-00", "The value you configured is invalid."), + LOG_HUB_ERROR("LogHubReader-01","LogHub access encounter exception"), + REQUIRE_VALUE("LogHubReader-02","Missing parameters"), + EMPTY_LOGSTORE_VALUE("LogHubReader-03","There is no shard in this LogStore"); + + private final String code; + private final String description; + + private LogHubReaderErrorCode(String code, String description) { + this.code = code; + this.description = description; + } + + @Override + public String getCode() { + return this.code; + } + + @Override + public String getDescription() { + return this.description; + } + + @Override + public String toString() { + return String.format("Code:[%s], Description:[%s]. ", this.code, + this.description); + } +} diff --git a/loghubreader/src/main/resources/plugin.json b/loghubreader/src/main/resources/plugin.json new file mode 100644 index 00000000..31403dd6 --- /dev/null +++ b/loghubreader/src/main/resources/plugin.json @@ -0,0 +1,6 @@ +{ + "name": "loghubreader", + "class": "com.alibaba.datax.plugin.reader.loghubreader.LogHubReader", + "description": "适用于: 从SLS LogHub中读取数据", + "developer": "alibaba" +} \ No newline at end of file diff --git a/loghubreader/src/main/resources/plugin_job_template.json b/loghubreader/src/main/resources/plugin_job_template.json new file mode 100644 index 00000000..4d536eb9 --- /dev/null +++ b/loghubreader/src/main/resources/plugin_job_template.json @@ -0,0 +1,12 @@ +{ + "name": "loghubreader", + "parameter": { + "endpoint": "", + "accessId": "", + "accessKey": "", + "project": "", + "logstore": "", + "batchSize":1024, + "column": [] + } +} \ No newline at end of file diff --git a/loghubwriter/pom.xml b/loghubwriter/pom.xml new file mode 100644 index 00000000..d43b7286 --- /dev/null +++ b/loghubwriter/pom.xml @@ -0,0 +1,73 @@ + + + + datax-all + com.alibaba.datax + 0.0.1-SNAPSHOT + + 4.0.0 + + loghubwriter + + 0.0.1-SNAPSHOT + + + + com.alibaba.datax + datax-common + ${datax-project-version} + + + slf4j-log4j12 + org.slf4j + + + + + org.slf4j + slf4j-api + + + ch.qos.logback + logback-classic + + + com.aliyun.openservices + aliyun-log + 0.6.12 + + + + + + + + maven-compiler-plugin + + ${jdk-version} + ${jdk-version} + ${project-sourceEncoding} + + + + + maven-assembly-plugin + + + src/main/assembly/package.xml + + datax + + + + dwzip + package + + single + + + + + + + diff --git a/loghubwriter/src/main/assembly/package.xml b/loghubwriter/src/main/assembly/package.xml new file mode 100644 index 00000000..44d25a48 --- /dev/null +++ b/loghubwriter/src/main/assembly/package.xml @@ -0,0 +1,34 @@ + + + + dir + + false + + + src/main/resources + + plugin.json + + plugin/writer/loghubwriter + + + target/ + + loghubwriter-0.0.1-SNAPSHOT.jar + + plugin/writer/loghubwriter + + + + + + false + plugin/writer/loghubwriter/libs + runtime + + + diff --git a/loghubwriter/src/main/java/com/alibaba/datax/plugin/writer/loghubwriter/Key.java b/loghubwriter/src/main/java/com/alibaba/datax/plugin/writer/loghubwriter/Key.java new file mode 100644 index 00000000..bdfe3fa5 --- /dev/null +++ b/loghubwriter/src/main/java/com/alibaba/datax/plugin/writer/loghubwriter/Key.java @@ -0,0 +1,35 @@ +package com.alibaba.datax.plugin.writer.loghubwriter; + +/** + * 配置关键字 + * @author + */ +public final class Key { + + /** + * 此处声明插件用到的需要插件使用者提供的配置项 + */ + public static final String ENDPOINT = "endpoint"; + + public static final String ACCESS_KEY_ID = "accessId"; + + public static final String ACCESS_KEY_SECRET = "accessKey"; + + public static final String PROJECT = "project"; + + public static final String LOG_STORE = "logstore"; + + public static final String TOPIC = "topic"; + + public static final String COLUMN = "column"; + + public static final String BATCH_SIZE = "batchSize"; + + public static final String TIME = "time"; + + public static final String TIME_FORMAT = "timeformat"; + + public static final String SOURCE = "source"; + + public static final String HASH_BY_KEY = "hashKey"; +} diff --git a/loghubwriter/src/main/java/com/alibaba/datax/plugin/writer/loghubwriter/LogHubWriter.java b/loghubwriter/src/main/java/com/alibaba/datax/plugin/writer/loghubwriter/LogHubWriter.java new file mode 100644 index 00000000..bf60d08c --- /dev/null +++ b/loghubwriter/src/main/java/com/alibaba/datax/plugin/writer/loghubwriter/LogHubWriter.java @@ -0,0 +1,315 @@ +package com.alibaba.datax.plugin.writer.loghubwriter; + +import com.alibaba.datax.common.element.Record; +import com.alibaba.datax.common.exception.DataXException; +import com.alibaba.datax.common.plugin.RecordReceiver; +import com.alibaba.datax.common.spi.Writer; +import com.alibaba.datax.common.util.Configuration; +import com.alibaba.datax.common.util.RetryUtil; +import com.alibaba.datax.common.util.StrUtil; +import com.aliyun.openservices.log.Client; +import com.aliyun.openservices.log.common.LogItem; +import com.aliyun.openservices.log.common.Shard; +import com.aliyun.openservices.log.exception.LogException; +import com.aliyun.openservices.log.request.ListShardRequest; +import com.aliyun.openservices.log.request.PutLogsRequest; +import com.aliyun.openservices.log.response.ListShardResponse; +import com.aliyun.openservices.log.response.PutLogsResponse; + +import org.apache.commons.codec.digest.Md5Crypt; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import sun.security.provider.MD5; + +import java.text.DateFormat; +import java.text.SimpleDateFormat; +import java.util.ArrayList; +import java.util.Date; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.concurrent.Callable; + +/** + * SLS 写插件 + * @author + */ +public class LogHubWriter extends Writer { + + public static class Job extends Writer.Job { + private static final Logger LOG = LoggerFactory.getLogger(Job.class); + + private Configuration jobConfig = null; + + @Override + public void init() { + info(LOG, "loghub writer job init begin ..."); + this.jobConfig = super.getPluginJobConf(); + validateParameter(jobConfig); + info(LOG, "loghub writer job init end."); + } + + private void validateParameter(Configuration conf){ + conf.getNecessaryValue(Key.ENDPOINT,LogHubWriterErrorCode.REQUIRE_VALUE); + conf.getNecessaryValue(Key.ACCESS_KEY_ID,LogHubWriterErrorCode.REQUIRE_VALUE); + conf.getNecessaryValue(Key.ACCESS_KEY_SECRET,LogHubWriterErrorCode.REQUIRE_VALUE); + conf.getNecessaryValue(Key.PROJECT,LogHubWriterErrorCode.REQUIRE_VALUE); + conf.getNecessaryValue(Key.LOG_STORE,LogHubWriterErrorCode.REQUIRE_VALUE); + conf.getNecessaryValue(Key.COLUMN,LogHubWriterErrorCode.REQUIRE_VALUE); + } + + @Override + public List split(int mandatoryNumber) { + info(LOG, "split begin..."); + List configurationList = new ArrayList(); + for (int i = 0; i < mandatoryNumber; i++) { + configurationList.add(this.jobConfig.clone()); + } + info(LOG, "split end..."); + return configurationList; + } + + @Override + public void post() { + } + + @Override + public void destroy() { + } + } + + public static class Task extends Writer.Task { + private static final Logger LOG = LoggerFactory.getLogger(Task.class); + private Configuration taskConfig; + private com.aliyun.openservices.log.Client logHubClient; + private String logStore; + private String topic; + private String project; + private List columnList; + private int batchSize; + private String timeCol; + private String timeFormat; + private String source; + private boolean isHashKey; + private List shards; + public void init() { + this.taskConfig = super.getPluginJobConf(); + String endpoint = taskConfig.getString(Key.ENDPOINT); + String accessKeyId = taskConfig.getString(Key.ACCESS_KEY_ID); + String accessKeySecret = taskConfig.getString(Key.ACCESS_KEY_SECRET); + project = taskConfig.getString(Key.PROJECT); + logStore = taskConfig.getString(Key.LOG_STORE); + topic = taskConfig.getString(Key.TOPIC,""); + columnList = taskConfig.getList(Key.COLUMN,String.class); + batchSize = taskConfig.getInt(Key.BATCH_SIZE,1024); + timeCol = taskConfig.getString(Key.TIME,""); + timeFormat = taskConfig.getString(Key.TIME_FORMAT,""); + source = taskConfig.getString(Key.SOURCE,""); + isHashKey = taskConfig.getBool(Key.HASH_BY_KEY,false); + logHubClient = new Client(endpoint, accessKeyId, accessKeySecret); + if (isHashKey) { + listShard(); + info(LOG, "init loghub writer with hash key mode."); + } + if (LOG.isInfoEnabled()) { + LOG.info("init loghub writer task finished.project:{} logstore:{} topic:{} batchSize:{}",project,logStore,topic,batchSize); + } + } + + /** + * 获取通道的分片信息 + */ + private void listShard() { + try { + ListShardResponse response = logHubClient.ListShard(new ListShardRequest(project,logStore)); + shards = response.GetShards(); + if (LOG.isInfoEnabled()) { + LOG.info("Get shard count:{}", shards.size()); + } + } catch (LogException e) { + info(LOG, "Get shard failed!"); + throw new RuntimeException("Get shard failed!", e); + } + } + + @Override + public void prepare() { + } + + private int getTime(String v) { + try { + if ("bigint".equalsIgnoreCase(timeFormat)) { + return Integer.valueOf(v); + } + + DateFormat sdf = new SimpleDateFormat(timeFormat); + Date date = sdf.parse(v); + return (int)(date.getTime()/1000); + } catch (Exception e) { + LOG.warn("Format time failed!", e); + } + return (int)(((new Date())).getTime()/1000); + } + + @Override + public void startWrite(RecordReceiver recordReceiver) { + info(LOG, "start to write....................."); + // 按照shared做hash处理 + if (isHashKey) { + processDataWithHashKey(recordReceiver); + } else { + processDataWithoutHashKey(recordReceiver); + } + info(LOG, "finish to write........."); + } + + private void processDataWithHashKey(RecordReceiver receiver) { + Record record; + Map> logMap = new HashMap>(shards.size()); + int count = 0; + try { + while ((record = receiver.getFromReader()) != null) { + LogItem logItem = new LogItem(); + if (record.getColumnNumber() != columnList.size()) { + this.getTaskPluginCollector().collectDirtyRecord(record, "column not match"); + } + + String id = ""; + for (int i = 0; i < record.getColumnNumber(); i++) { + String colName = columnList.get(i); + String colValue = record.getColumn(i).asString(); + if (colName.endsWith("_id")) { + id = colValue; + } + + logItem.PushBack(colName, colValue); + if (colName.equals(timeCol)) { + logItem.SetTime(getTime(colValue)); + } + } + + String hashKey = getShardHashKey(StrUtil.getMd5(id), shards); + if (!logMap.containsKey(hashKey)) { + info(LOG, "Hash key:" + hashKey); + logMap.put(hashKey, new ArrayList()); + } + logMap.get(hashKey).add(logItem); + + if (logMap.get(hashKey).size() % batchSize == 0) { + PutLogsRequest request = new PutLogsRequest(project, logStore, topic, source, logMap.get(hashKey), hashKey); + PutLogsResponse response = putLog(request); + count += logMap.get(hashKey).size(); + if (LOG.isDebugEnabled()) { + LOG.debug("record count:{}, request id:{}", logMap.get(hashKey).size(), response.GetRequestId()); + } + logMap.get(hashKey).clear(); + } + } + + for (Map.Entry> entry : logMap.entrySet()) { + if (!entry.getValue().isEmpty()) { + // 将剩余的数据发送 + PutLogsRequest request = new PutLogsRequest(project, logStore, topic, source, entry.getValue(), entry.getKey()); + PutLogsResponse response = putLog(request); + count += entry.getValue().size(); + if (LOG.isDebugEnabled()) { + LOG.debug("record count:{}, request id:{}", entry.getValue().size(), response.GetRequestId()); + } + entry.getValue().clear(); + } + } + LOG.info("{} records have been sent", count); + } catch (LogException ex) { + throw DataXException.asDataXException(LogHubWriterErrorCode.LOG_HUB_ERROR, ex.getMessage(), ex); + } catch (Exception e) { + throw DataXException.asDataXException(LogHubWriterErrorCode.LOG_HUB_ERROR, e.getMessage(), e); + } + } + + private void processDataWithoutHashKey(RecordReceiver receiver) { + Record record; + ArrayList logGroup = new ArrayList(); + int count = 0; + try { + while ((record = receiver.getFromReader()) != null) { + LogItem logItem = new LogItem(); + if(record.getColumnNumber() != columnList.size()){ + this.getTaskPluginCollector().collectDirtyRecord(record,"column not match"); + } + for (int i = 0; i < record.getColumnNumber(); i++) { + String colName = columnList.get(i); + String colValue = record.getColumn(i).asString(); + logItem.PushBack(colName, colValue); + if(colName.equals(timeCol)){ + logItem.SetTime(getTime(colValue)); + } + } + + logGroup.add(logItem); + count++; + if (count % batchSize == 0) { + PutLogsRequest request = new PutLogsRequest(project, logStore, topic, source, logGroup); + PutLogsResponse response = putLog(request); + logGroup.clear(); + if (LOG.isDebugEnabled()) { + LOG.debug("record count:{}, request id:{}", count, response.GetRequestId()); + } + } + } + if (!logGroup.isEmpty()) { + //将剩余的数据发送 + PutLogsRequest request = new PutLogsRequest(project, logStore, topic, source, logGroup); + PutLogsResponse response = putLog(request); + logGroup.clear(); + if (LOG.isDebugEnabled()) { + LOG.debug("record count:{}, request id:{}", count, response.GetRequestId()); + } + } + LOG.info("{} records have been sent", count); + } catch (LogException ex) { + throw DataXException.asDataXException(LogHubWriterErrorCode.LOG_HUB_ERROR, ex.getMessage(), ex); + } catch (Exception e) { + throw DataXException.asDataXException(LogHubWriterErrorCode.LOG_HUB_ERROR, e.getMessage(), e); + } + } + + private PutLogsResponse putLog(final PutLogsRequest request) throws Exception{ + final Client client = this.logHubClient; + + return RetryUtil.executeWithRetry(new Callable() { + public PutLogsResponse call() throws LogException{ + return client.PutLogs(request); + } + }, 3, 1000L, false); + } + + private String getShardHashKey(String hashKey, List shards) { + for (Shard shard : shards) { + if (hashKey.compareTo(shard.getExclusiveEndKey()) < 0 && hashKey.compareTo(shard.getInclusiveBeginKey()) >= 0) { + return shard.getInclusiveBeginKey(); + } + } + return shards.get(0).getInclusiveBeginKey(); + } + + @Override + public void post() { + } + + @Override + public void destroy() { + } + } + + /** + * 日志打印控制 + * + * @param logger + * @param message + */ + public static void info(Logger logger, String message) { + if (logger.isInfoEnabled()) { + logger.info(message); + } + } +} diff --git a/loghubwriter/src/main/java/com/alibaba/datax/plugin/writer/loghubwriter/LogHubWriterErrorCode.java b/loghubwriter/src/main/java/com/alibaba/datax/plugin/writer/loghubwriter/LogHubWriterErrorCode.java new file mode 100644 index 00000000..98c5e16f --- /dev/null +++ b/loghubwriter/src/main/java/com/alibaba/datax/plugin/writer/loghubwriter/LogHubWriterErrorCode.java @@ -0,0 +1,33 @@ +package com.alibaba.datax.plugin.writer.loghubwriter; + +import com.alibaba.datax.common.spi.ErrorCode; + +public enum LogHubWriterErrorCode implements ErrorCode { + BAD_CONFIG_VALUE("LogHubWriter-00", "The value you configured is invalid."), + LOG_HUB_ERROR("LogHubWriter-01","LogHub access encounter exception"), + REQUIRE_VALUE("LogHubWriter-02","Missing parameters"); + + private final String code; + private final String description; + + private LogHubWriterErrorCode(String code, String description) { + this.code = code; + this.description = description; + } + + @Override + public String getCode() { + return this.code; + } + + @Override + public String getDescription() { + return this.description; + } + + @Override + public String toString() { + return String.format("Code:[%s], Description:[%s]. ", this.code, + this.description); + } +} \ No newline at end of file diff --git a/loghubwriter/src/main/resources/plugin.json b/loghubwriter/src/main/resources/plugin.json new file mode 100644 index 00000000..2a913b14 --- /dev/null +++ b/loghubwriter/src/main/resources/plugin.json @@ -0,0 +1,6 @@ +{ + "name": "loghubwriter", + "class": "com.alibaba.datax.plugin.writer.loghubwriter.LogHubWriter", + "description": "适用于: 将数据导入到SLS LogHub中", + "developer": "alibaba" +} \ No newline at end of file diff --git a/loghubwriter/src/main/resources/plugin_job_template.json b/loghubwriter/src/main/resources/plugin_job_template.json new file mode 100644 index 00000000..ac0d3b2a --- /dev/null +++ b/loghubwriter/src/main/resources/plugin_job_template.json @@ -0,0 +1,13 @@ +{ + "name": "loghubwriter", + "parameter": { + "endpoint": "", + "accessId": "", + "accessKey": "", + "project": "", + "logstore": "", + "topic": "", + "batchSize":1024, + "column": [] + } +} \ No newline at end of file diff --git a/mongodbreader/doc/mongodbreader.md b/mongodbreader/doc/mongodbreader.md index 6d1e6a99..297e598c 100644 --- a/mongodbreader/doc/mongodbreader.md +++ b/mongodbreader/doc/mongodbreader.md @@ -8,7 +8,7 @@ MongoDBReader 插件利用 MongoDB 的java客户端MongoClient进行MongoDB的 MongoDBReader通过Datax框架从MongoDB并行的读取数据,通过主控的JOB程序按照指定的规则对MongoDB中的数据进行分片,并行读取,然后将MongoDB支持的类型通过逐一判断转换成Datax支持的类型。 #### 3 功能说明 -* 该示例从ODPS读一份数据到MongoDB。 +* 该示例从MongoDB读一份数据到ODPS。 { "job": { @@ -114,8 +114,7 @@ MongoDBReader通过Datax框架从MongoDB并行的读取数据,通过主控的J "accessKey": "********************", "truncate": true, "odpsServer": "xxx/api", - "tunnelServer": "xxx", - "accountType": "aliyun" + "tunnelServer": "xxx" } } } @@ -127,6 +126,7 @@ MongoDBReader通过Datax框架从MongoDB并行的读取数据,通过主控的J * address: MongoDB的数据地址信息,因为MonogDB可能是个集群,则ip端口信息需要以Json数组的形式给出。【必填】 * userName:MongoDB的用户名。【选填】 * userPassword: MongoDB的密码。【选填】 +* authDb: MongoDB认证数据库【选填】 * collectionName: MonogoDB的集合名。【必填】 * column:MongoDB的文档列名。【必填】 * name:Column的名字。【必填】 @@ -147,4 +147,4 @@ MongoDBReader通过Datax框架从MongoDB并行的读取数据,通过主控的J #### 6 性能报告 -#### 7 测试报告 \ No newline at end of file +#### 7 测试报告 diff --git a/mongodbreader/src/main/java/com/alibaba/datax/plugin/reader/mongodbreader/MongoDBReader.java b/mongodbreader/src/main/java/com/alibaba/datax/plugin/reader/mongodbreader/MongoDBReader.java index ba7f07f4..4d129a5a 100644 --- a/mongodbreader/src/main/java/com/alibaba/datax/plugin/reader/mongodbreader/MongoDBReader.java +++ b/mongodbreader/src/main/java/com/alibaba/datax/plugin/reader/mongodbreader/MongoDBReader.java @@ -18,9 +18,9 @@ import com.alibaba.datax.common.spi.Reader; import com.alibaba.datax.common.util.Configuration; import com.alibaba.datax.plugin.reader.mongodbreader.util.CollectionSplitUtil; import com.alibaba.datax.plugin.reader.mongodbreader.util.MongoUtil; -import com.alibaba.fastjson.JSON; -import com.alibaba.fastjson.JSONArray; -import com.alibaba.fastjson.JSONObject; +import com.alibaba.fastjson2.JSON; +import com.alibaba.fastjson2.JSONArray; +import com.alibaba.fastjson2.JSONObject; import com.google.common.base.Joiner; import com.google.common.base.Strings; diff --git a/mongodbwriter/doc/mongodbwriter.md b/mongodbwriter/doc/mongodbwriter.md index e30008db..93f50290 100644 --- a/mongodbwriter/doc/mongodbwriter.md +++ b/mongodbwriter/doc/mongodbwriter.md @@ -116,10 +116,10 @@ MongoDBWriter通过Datax框架获取Reader生成的数据,然后将Datax支持 "type": "int" } ], - "upsertInfo": { - "isUpsert": "true", - "upsertKey": "unique_id" - } + "writeMode": { + "isReplace": "true", + "replaceKey": "unique_id" + } } } } @@ -135,11 +135,11 @@ MongoDBWriter通过Datax框架获取Reader生成的数据,然后将Datax支持 * collectionName: MonogoDB的集合名。【必填】 * column:MongoDB的文档列名。【必填】 * name:Column的名字。【必填】 -* type:Column的类型。【选填】 +* type:Column的类型。【必填】 * splitter:特殊分隔符,当且仅当要处理的字符串要用分隔符分隔为字符数组时,才使用这个参数,通过这个参数指定的分隔符,将字符串分隔存储到MongoDB的数组中。【选填】 -* upsertInfo:指定了传输数据时更新的信息。【选填】 -* isUpsert:当设置为true时,表示针对相同的upsertKey做更新操作。【选填】 -* upsertKey:upsertKey指定了没行记录的业务主键。用来做更新时使用。【选填】 +* writeMode:指定了传输数据时更新的信息。【选填】 +* isReplace:当设置为true时,表示针对相同的replaceKey做更新操作。【选填】 +* replaceKey:replaceKey指定了每行记录的业务主键。用来做更新时使用。【选填】 #### 5 类型转换 @@ -154,4 +154,4 @@ MongoDBWriter通过Datax框架获取Reader生成的数据,然后将Datax支持 #### 6 性能报告 -#### 7 测试报告 \ No newline at end of file +#### 7 测试报告 diff --git a/mongodbwriter/src/main/java/com/alibaba/datax/plugin/writer/mongodbwriter/MongoDBWriter.java b/mongodbwriter/src/main/java/com/alibaba/datax/plugin/writer/mongodbwriter/MongoDBWriter.java index 66c75078..76f35a40 100644 --- a/mongodbwriter/src/main/java/com/alibaba/datax/plugin/writer/mongodbwriter/MongoDBWriter.java +++ b/mongodbwriter/src/main/java/com/alibaba/datax/plugin/writer/mongodbwriter/MongoDBWriter.java @@ -7,9 +7,9 @@ import com.alibaba.datax.common.spi.Writer; import com.alibaba.datax.common.util.Configuration; import com.alibaba.datax.plugin.rdbms.writer.Key; import com.alibaba.datax.plugin.writer.mongodbwriter.util.MongoUtil; -import com.alibaba.fastjson.JSON; -import com.alibaba.fastjson.JSONArray; -import com.alibaba.fastjson.JSONObject; +import com.alibaba.fastjson2.JSON; +import com.alibaba.fastjson2.JSONArray; +import com.alibaba.fastjson2.JSONObject; import com.google.common.base.Strings; import com.mongodb.*; import com.mongodb.client.MongoCollection; diff --git a/mysqlreader/doc/mysqlreader.md b/mysqlreader/doc/mysqlreader.md index 3ae52afb..bae4bce0 100644 --- a/mysqlreader/doc/mysqlreader.md +++ b/mysqlreader/doc/mysqlreader.md @@ -165,7 +165,7 @@ MysqlReader插件实现了从Mysql读取数据。在底层实现上,MysqlReade 支持常量配置,用户需要按照Mysql SQL语法格式: ["id", "\`table\`", "1", "'bazhen.csy'", "null", "to_char(a + 1)", "2.3" , "true"] - id为普通列名,\`table\`为包含保留在的列名,1为整形数字常量,'bazhen.csy'为字符串常量,null为空指针,to_char(a + 1)为表达式,2.3为浮点数,true为布尔值。 + id为普通列名,\`table\`为包含保留字的列名,1为整形数字常量,'bazhen.csy'为字符串常量,null为空指针,to_char(a + 1)为表达式,2.3为浮点数,true为布尔值。 * 必选:是
diff --git a/oceanbasev10reader/src/main/java/com/alibaba/datax/plugin/reader/oceanbasev10reader/OceanBaseReader.java b/oceanbasev10reader/src/main/java/com/alibaba/datax/plugin/reader/oceanbasev10reader/OceanBaseReader.java index 0a4934a1..e92e5025 100644 --- a/oceanbasev10reader/src/main/java/com/alibaba/datax/plugin/reader/oceanbasev10reader/OceanBaseReader.java +++ b/oceanbasev10reader/src/main/java/com/alibaba/datax/plugin/reader/oceanbasev10reader/OceanBaseReader.java @@ -3,6 +3,7 @@ package com.alibaba.datax.plugin.reader.oceanbasev10reader; import java.sql.Connection; import java.util.List; +import com.alibaba.datax.plugin.reader.oceanbasev10reader.ext.ObReaderKey; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -32,24 +33,41 @@ public class OceanBaseReader extends Reader { if (userConfigedFetchSize != null) { LOG.warn("The [fetchSize] is not recognized, please use readBatchSize instead."); } - this.originalConfig.set(Constant.FETCH_SIZE, Integer.MIN_VALUE); - setDatabaseType(originalConfig); - this.readerJob = new ReaderJob(); this.readerJob.init(this.originalConfig); } + @Override + public void prepare() { + //ObReaderUtils.DATABASE_TYPE获取当前数据库的语法模式 + } + @Override public void preCheck() { init(); - this.readerJob.preCheck(this.originalConfig, ObReaderUtils.DATABASE_TYPE); + this.readerJob.preCheck(this.originalConfig, ObReaderUtils.databaseType); } @Override public List split(int adviceNumber) { + String splitPk = originalConfig.getString(Key.SPLIT_PK); + List quotedColumns = originalConfig.getList(Key.COLUMN_LIST, String.class); + if (splitPk != null && splitPk.length() > 0 && quotedColumns != null) { + String escapeChar = ObReaderUtils.isOracleMode(originalConfig.getString(ObReaderKey.OB_COMPATIBILITY_MODE)) + ? "\"" : "`"; + if (!splitPk.startsWith(escapeChar) && !splitPk.endsWith(escapeChar)) { + splitPk = escapeChar + splitPk + escapeChar; + } + for (String column : quotedColumns) { + if (column.equals(splitPk)) { + LOG.info("splitPk is an ob reserved keyword, set to {}", splitPk); + originalConfig.set(Key.SPLIT_PK, splitPk); + } + } + } return this.readerJob.split(this.originalConfig, adviceNumber); } @@ -70,7 +88,7 @@ public class OceanBaseReader extends Reader { Configuration connConf = Configuration.from(conns.get(0).toString()); List jdbcUrls = connConf.getList(Key.JDBC_URL, String.class); String jdbcUrl = jdbcUrls.get(0); - if(jdbcUrl.startsWith(com.alibaba.datax.plugin.rdbms.writer.Constant.OB10_SPLIT_STRING)) { + if (jdbcUrl.startsWith(com.alibaba.datax.plugin.rdbms.writer.Constant.OB10_SPLIT_STRING)) { String[] ss = jdbcUrl.split(com.alibaba.datax.plugin.rdbms.writer.Constant.OB10_SPLIT_STRING_PATTERN); if (ss.length != 3) { LOG.warn("unrecognized jdbc url: " + jdbcUrl); @@ -84,10 +102,12 @@ public class OceanBaseReader extends Reader { String obJdbcUrl = jdbcUrl.replace("jdbc:mysql:", "jdbc:oceanbase:"); Connection conn = DBUtil.getConnection(DataBaseType.OceanBase, obJdbcUrl, username, password); String compatibleMode = ObReaderUtils.getCompatibleMode(conn); + config.set(ObReaderKey.OB_COMPATIBILITY_MODE, compatibleMode); if (ObReaderUtils.isOracleMode(compatibleMode)) { - ObReaderUtils.DATABASE_TYPE = DataBaseType.OceanBase; + ObReaderUtils.compatibleMode = ObReaderUtils.OB_COMPATIBLE_MODE_ORACLE; } - } catch (Exception e){ + + } catch (Exception e) { LOG.warn("error in get compatible mode, using mysql as default: " + e.getMessage()); } } diff --git a/oceanbasev10reader/src/main/java/com/alibaba/datax/plugin/reader/oceanbasev10reader/ext/Constant.java b/oceanbasev10reader/src/main/java/com/alibaba/datax/plugin/reader/oceanbasev10reader/ext/Constant.java new file mode 100644 index 00000000..57977ca4 --- /dev/null +++ b/oceanbasev10reader/src/main/java/com/alibaba/datax/plugin/reader/oceanbasev10reader/ext/Constant.java @@ -0,0 +1,11 @@ +package com.alibaba.datax.plugin.reader.oceanbasev10reader.ext; + +/** + * @author johnrobbet + */ +public class Constant { + + public static String WEAK_READ_QUERY_SQL_TEMPLATE_WITHOUT_WHERE = "select /*+read_consistency(weak)*/ %s from %s "; + + public static String WEAK_READ_QUERY_SQL_TEMPLATE = "select /*+read_consistency(weak)*/ %s from %s where (%s)"; +} diff --git a/oceanbasev10reader/src/main/java/com/alibaba/datax/plugin/reader/oceanbasev10reader/ext/ObReaderKey.java b/oceanbasev10reader/src/main/java/com/alibaba/datax/plugin/reader/oceanbasev10reader/ext/ObReaderKey.java new file mode 100644 index 00000000..bc8f4525 --- /dev/null +++ b/oceanbasev10reader/src/main/java/com/alibaba/datax/plugin/reader/oceanbasev10reader/ext/ObReaderKey.java @@ -0,0 +1,16 @@ +package com.alibaba.datax.plugin.reader.oceanbasev10reader.ext; + +/** + * @author johnrobbet + */ +public class ObReaderKey { + + public final static String READ_BY_PARTITION = "readByPartition"; + + public final static String PARTITION_NAME = "partitionName"; + + public final static String PARTITION_TYPE = "partitionType"; + + public final static String OB_COMPATIBILITY_MODE = "obCompatibilityMode"; + +} diff --git a/oceanbasev10reader/src/main/java/com/alibaba/datax/plugin/reader/oceanbasev10reader/ext/ReaderJob.java b/oceanbasev10reader/src/main/java/com/alibaba/datax/plugin/reader/oceanbasev10reader/ext/ReaderJob.java index c56155f6..2d60d0c6 100644 --- a/oceanbasev10reader/src/main/java/com/alibaba/datax/plugin/reader/oceanbasev10reader/ext/ReaderJob.java +++ b/oceanbasev10reader/src/main/java/com/alibaba/datax/plugin/reader/oceanbasev10reader/ext/ReaderJob.java @@ -6,35 +6,86 @@ import com.alibaba.datax.common.constant.CommonConstant; import com.alibaba.datax.common.util.Configuration; import com.alibaba.datax.plugin.rdbms.reader.CommonRdbmsReader; import com.alibaba.datax.plugin.rdbms.reader.Key; -import com.alibaba.datax.plugin.rdbms.writer.Constant; +import com.alibaba.datax.plugin.rdbms.reader.Constant; +import com.alibaba.datax.plugin.reader.oceanbasev10reader.OceanBaseReader; import com.alibaba.datax.plugin.reader.oceanbasev10reader.util.ObReaderUtils; +import com.alibaba.datax.plugin.reader.oceanbasev10reader.util.PartitionSplitUtil; +import com.alibaba.fastjson2.JSONObject; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + public class ReaderJob extends CommonRdbmsReader.Job { + private Logger LOG = LoggerFactory.getLogger(OceanBaseReader.Task.class); public ReaderJob() { - super(ObReaderUtils.DATABASE_TYPE); + super(ObReaderUtils.databaseType); + } + + @Override + public void init(Configuration originalConfig) { + //将config中的column和table中的关键字进行转义 + List columns = originalConfig.getList(Key.COLUMN, String.class); + ObReaderUtils.escapeDatabaseKeyword(columns); + originalConfig.set(Key.COLUMN, columns); + + List conns = originalConfig.getList(Constant.CONN_MARK, JSONObject.class); + for (int i = 0; i < conns.size(); i++) { + JSONObject conn = conns.get(i); + Configuration connConfig = Configuration.from(conn.toString()); + List tables = connConfig.getList(Key.TABLE, String.class); + + // tables will be null when querySql is configured + if (tables != null) { + ObReaderUtils.escapeDatabaseKeyword(tables); + originalConfig.set(String.format("%s[%d].%s", Constant.CONN_MARK, i, Key.TABLE), + tables); + } + } + super.init(originalConfig); } @Override public List split(Configuration originalConfig, int adviceNumber) { - List list = super.split(originalConfig, adviceNumber); + List list; + // readByPartition is lower priority than splitPk. + // and readByPartition only works in table mode. + if (!isSplitPkValid(originalConfig) && + originalConfig.getBool(Constant.IS_TABLE_MODE) && + originalConfig.getBool(ObReaderKey.READ_BY_PARTITION, false)) { + LOG.info("try to split reader job by partition."); + list = PartitionSplitUtil.splitByPartition(originalConfig); + } else { + LOG.info("try to split reader job by splitPk."); + list = super.split(originalConfig, adviceNumber); + } + for (Configuration config : list) { String jdbcUrl = config.getString(Key.JDBC_URL); String obRegionName = getObRegionName(jdbcUrl); config.set(CommonConstant.LOAD_BALANCE_RESOURCE_MARK, obRegionName); } + return list; } + private boolean isSplitPkValid(Configuration originalConfig) { + String splitPk = originalConfig.getString(Key.SPLIT_PK); + return splitPk != null && splitPk.trim().length() > 0; + } + private String getObRegionName(String jdbcUrl) { - if (jdbcUrl.startsWith(Constant.OB10_SPLIT_STRING)) { - String[] ss = jdbcUrl.split(Constant.OB10_SPLIT_STRING_PATTERN); - if (ss.length >= 2) { + final String obJdbcDelimiter = com.alibaba.datax.plugin.rdbms.writer.Constant.OB10_SPLIT_STRING; + if (jdbcUrl.startsWith(obJdbcDelimiter)) { + String[] ss = jdbcUrl.split(obJdbcDelimiter); + int elementCount = 2; + if (ss.length >= elementCount) { String tenant = ss[1].trim(); String[] sss = tenant.split(":"); return sss[0]; } } + return null; } } diff --git a/oceanbasev10reader/src/main/java/com/alibaba/datax/plugin/reader/oceanbasev10reader/ext/ReaderTask.java b/oceanbasev10reader/src/main/java/com/alibaba/datax/plugin/reader/oceanbasev10reader/ext/ReaderTask.java index 073bb3cb..a43dcebd 100644 --- a/oceanbasev10reader/src/main/java/com/alibaba/datax/plugin/reader/oceanbasev10reader/ext/ReaderTask.java +++ b/oceanbasev10reader/src/main/java/com/alibaba/datax/plugin/reader/oceanbasev10reader/ext/ReaderTask.java @@ -1,13 +1,5 @@ package com.alibaba.datax.plugin.reader.oceanbasev10reader.ext; -import java.sql.*; -import java.util.ArrayList; -import java.util.List; - -import org.apache.commons.lang3.StringUtils; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - import com.alibaba.datax.common.element.Column; import com.alibaba.datax.common.element.Record; import com.alibaba.datax.common.plugin.RecordSender; @@ -19,11 +11,17 @@ import com.alibaba.datax.plugin.rdbms.reader.CommonRdbmsReader; import com.alibaba.datax.plugin.rdbms.reader.Constant; import com.alibaba.datax.plugin.rdbms.reader.Key; import com.alibaba.datax.plugin.rdbms.util.DBUtil; -import com.alibaba.datax.plugin.rdbms.util.DataBaseType; import com.alibaba.datax.plugin.rdbms.util.RdbmsException; import com.alibaba.datax.plugin.reader.oceanbasev10reader.Config; import com.alibaba.datax.plugin.reader.oceanbasev10reader.util.ObReaderUtils; import com.alibaba.datax.plugin.reader.oceanbasev10reader.util.TaskContext; +import org.apache.commons.lang3.StringUtils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.sql.*; +import java.util.ArrayList; +import java.util.List; public class ReaderTask extends CommonRdbmsReader.Task { private static final Logger LOG = LoggerFactory.getLogger(ReaderTask.class); @@ -41,11 +39,12 @@ public class ReaderTask extends CommonRdbmsReader.Task { private boolean reuseConn = false; public ReaderTask(int taskGroupId, int taskId) { - super(ObReaderUtils.DATABASE_TYPE, taskGroupId, taskId); + super(ObReaderUtils.databaseType, taskGroupId, taskId); this.taskGroupId = taskGroupId; this.taskId = taskId; } + @Override public void init(Configuration readerSliceConfig) { /* for database connection */ username = readerSliceConfig.getString(Key.USERNAME); @@ -54,7 +53,7 @@ public class ReaderTask extends CommonRdbmsReader.Task { queryTimeoutSeconds = readerSliceConfig.getInt(Config.QUERY_TIMEOUT_SECOND, Config.DEFAULT_QUERY_TIMEOUT_SECOND); // ob10的处理 - if(jdbcUrl.startsWith(com.alibaba.datax.plugin.rdbms.writer.Constant.OB10_SPLIT_STRING)) { + if (jdbcUrl.startsWith(com.alibaba.datax.plugin.rdbms.writer.Constant.OB10_SPLIT_STRING)) { String[] ss = jdbcUrl.split(com.alibaba.datax.plugin.rdbms.writer.Constant.OB10_SPLIT_STRING_PATTERN); if (ss.length == 3) { LOG.info("this is ob1_0 jdbc url."); @@ -63,16 +62,14 @@ public class ReaderTask extends CommonRdbmsReader.Task { } } - if (ObReaderUtils.DATABASE_TYPE == DataBaseType.OceanBase) { - jdbcUrl = jdbcUrl.replace("jdbc:mysql:", "jdbc:oceanbase:") + "&socketTimeout=1800000&connectTimeout=60000"; //socketTimeout 半个小时 + jdbcUrl = jdbcUrl.replace("jdbc:mysql:", "jdbc:oceanbase:") + "&socketTimeout=1800000&connectTimeout=60000"; //socketTimeout 半个小时 + if (ObReaderUtils.compatibleMode.equals(ObReaderUtils.OB_COMPATIBLE_MODE_ORACLE)) { compatibleMode = ObReaderUtils.OB_COMPATIBLE_MODE_ORACLE; - } else { - jdbcUrl = jdbcUrl + "&socketTimeout=1800000&connectTimeout=60000"; //socketTimeout 半个小时 } LOG.info("this is ob1_0 jdbc url. user=" + username + " :url=" + jdbcUrl); mandatoryEncoding = readerSliceConfig.getString(Key.MANDATORY_ENCODING, ""); retryLimit = readerSliceConfig.getInt(Config.RETRY_LIMIT, Config.DEFAULT_RETRY_LIMIT); - LOG.info("retryLimit: "+ retryLimit); + LOG.info("retryLimit: " + retryLimit); } private void buildSavePoint(TaskContext context) { @@ -83,7 +80,6 @@ public class ReaderTask extends CommonRdbmsReader.Task { } /** - * * 如果isTableMode && table有PK *

* 则支持断点续读 (若pk不在原始的columns中,则追加到尾部,但不传给下游) @@ -92,7 +88,7 @@ public class ReaderTask extends CommonRdbmsReader.Task { */ @Override public void startRead(Configuration readerSliceConfig, RecordSender recordSender, - TaskPluginCollector taskPluginCollector, int fetchSize) { + TaskPluginCollector taskPluginCollector, int fetchSize) { String querySql = readerSliceConfig.getString(Key.QUERY_SQL); String table = readerSliceConfig.getString(Key.TABLE); PerfTrace.getInstance().addTaskDetails(taskId, table + "," + jdbcUrl); @@ -131,14 +127,14 @@ public class ReaderTask extends CommonRdbmsReader.Task { } private void startRead0(boolean isTableMode, TaskContext context, RecordSender recordSender, - TaskPluginCollector taskPluginCollector) { + TaskPluginCollector taskPluginCollector) { // 不是table模式 直接使用原来的做法 if (!isTableMode) { doRead(recordSender, taskPluginCollector, context); return; } // check primary key index - Connection conn = DBUtil.getConnection(ObReaderUtils.DATABASE_TYPE, jdbcUrl, username, password); + Connection conn = DBUtil.getConnection(ObReaderUtils.databaseType, jdbcUrl, username, password); ObReaderUtils.initConn4Reader(conn, queryTimeoutSeconds); context.setConn(conn); try { @@ -184,11 +180,11 @@ public class ReaderTask extends CommonRdbmsReader.Task { } } catch (Throwable e) { if (retryLimit == ++retryCount) { - throw RdbmsException.asQueryException(ObReaderUtils.DATABASE_TYPE, new Exception(e), + throw RdbmsException.asQueryException(ObReaderUtils.databaseType, new Exception(e), context.getQuerySql(), context.getTable(), username); } LOG.error("read fail, retry count " + retryCount + ", sleep 60 second, save point:" + - context.getSavePoint() + ", error: "+ e.getMessage()); + context.getSavePoint() + ", error: " + e.getMessage()); ObReaderUtils.sleep(60000); // sleep 10s } // 假如原来的查询有查出数据,则改成增量查询 @@ -227,7 +223,7 @@ public class ReaderTask extends CommonRdbmsReader.Task { LOG.info("connection is alive, will reuse this connection."); } else { LOG.info("Create new connection for reader."); - conn = DBUtil.getConnection(ObReaderUtils.DATABASE_TYPE, jdbcUrl, username, password); + conn = DBUtil.getConnection(ObReaderUtils.databaseType, jdbcUrl, username, password); ObReaderUtils.initConn4Reader(conn, queryTimeoutSeconds); context.setConn(conn); } @@ -287,7 +283,7 @@ public class ReaderTask extends CommonRdbmsReader.Task { ObReaderUtils.close(null, null, context.getConn()); context.setConn(null); LOG.error("reader data fail", e); - throw RdbmsException.asQueryException(ObReaderUtils.DATABASE_TYPE, e, context.getQuerySql(), + throw RdbmsException.asQueryException(ObReaderUtils.databaseType, e, context.getQuerySql(), context.getTable(), username); } finally { perfRecord.end(); diff --git a/oceanbasev10reader/src/main/java/com/alibaba/datax/plugin/reader/oceanbasev10reader/util/ObReaderUtils.java b/oceanbasev10reader/src/main/java/com/alibaba/datax/plugin/reader/oceanbasev10reader/util/ObReaderUtils.java index 2290fb43..06d53108 100644 --- a/oceanbasev10reader/src/main/java/com/alibaba/datax/plugin/reader/oceanbasev10reader/util/ObReaderUtils.java +++ b/oceanbasev10reader/src/main/java/com/alibaba/datax/plugin/reader/oceanbasev10reader/util/ObReaderUtils.java @@ -1,52 +1,80 @@ package com.alibaba.datax.plugin.reader.oceanbasev10reader.util; -import java.sql.Connection; -import java.sql.PreparedStatement; -import java.sql.ResultSet; -import java.sql.SQLException; -import java.sql.Statement; -import java.sql.Timestamp; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.Map.Entry; -import java.util.TreeMap; -import java.util.regex.Matcher; -import java.util.regex.Pattern; -import java.util.Set; -import java.util.TreeSet; - +import com.alibaba.datax.common.element.*; +import com.alibaba.datax.plugin.rdbms.reader.util.SingleTableSplitUtil; +import com.alibaba.datax.plugin.rdbms.util.DBUtil; +import com.alibaba.datax.plugin.rdbms.util.DataBaseType; +import com.alibaba.datax.plugin.reader.oceanbasev10reader.ext.Constant; +import com.alibaba.druid.sql.SQLUtils; +import com.alibaba.druid.sql.ast.SQLExpr; +import com.alibaba.druid.sql.ast.expr.SQLBinaryOpExpr; +import com.alibaba.druid.sql.ast.expr.SQLBinaryOperator; import org.apache.commons.lang3.ArrayUtils; import org.apache.commons.lang3.StringUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import com.alibaba.datax.common.element.BoolColumn; -import com.alibaba.datax.common.element.BytesColumn; -import com.alibaba.datax.common.element.Column; -import com.alibaba.datax.common.element.DateColumn; -import com.alibaba.datax.common.element.DoubleColumn; -import com.alibaba.datax.common.element.LongColumn; -import com.alibaba.datax.common.element.Record; -import com.alibaba.datax.common.element.StringColumn; -import com.alibaba.datax.plugin.rdbms.util.DBUtil; -import com.alibaba.datax.plugin.rdbms.util.DataBaseType; -import com.alibaba.druid.sql.SQLUtils; -import com.alibaba.druid.sql.ast.SQLExpr; -import com.alibaba.druid.sql.ast.expr.SQLBinaryOpExpr; -import com.alibaba.druid.sql.ast.expr.SQLBinaryOperator; +import java.sql.*; +import java.util.*; +import java.util.Map.Entry; +import java.util.regex.Matcher; +import java.util.regex.Pattern; +/** + * @author johnrobbet + */ public class ObReaderUtils { - private static final Logger LOG = LoggerFactory.getLogger(ObReaderUtils.class); + private static final String MYSQL_KEYWORDS = "ACCESSIBLE,ACCOUNT,ACTION,ADD,AFTER,AGAINST,AGGREGATE,ALGORITHM,ALL,ALTER,ALWAYS,ANALYSE,AND,ANY,AS,ASC,ASCII,ASENSITIVE,AT,AUTO_INCREMENT,AUTOEXTEND_SIZE,AVG,AVG_ROW_LENGTH,BACKUP,BEFORE,BEGIN,BETWEEN,BIGINT,BINARY,BINLOG,BIT,BLOB,BLOCK,BOOL,BOOLEAN,BOTH,BTREE,BY,BYTE,CACHE,CALL,CASCADE,CASCADED,CASE,CATALOG_NAME,CHAIN,CHANGE,CHANGED,CHANNEL,CHAR,CHARACTER,CHARSET,CHECK,CHECKSUM,CIPHER,CLASS_ORIGIN,CLIENT,CLOSE,COALESCE,CODE,COLLATE,COLLATION,COLUMN,COLUMN_FORMAT,COLUMN_NAME,COLUMNS,COMMENT,COMMIT,COMMITTED,COMPACT,COMPLETION,COMPRESSED,COMPRESSION,CONCURRENT,CONDITION,CONNECTION,CONSISTENT,CONSTRAINT,CONSTRAINT_CATALOG,CONSTRAINT_NAME,CONSTRAINT_SCHEMA,CONTAINS,CONTEXT,CONTINUE,CONVERT,CPU,CREATE,CROSS,CUBE,CURRENT,CURRENT_DATE,CURRENT_TIME,CURRENT_TIMESTAMP,CURRENT_USER,CURSOR,CURSOR_NAME,DATA,DATABASE,DATABASES,DATAFILE,DATE,DATETIME,DAY,DAY_HOUR,DAY_MICROSECOND,DAY_MINUTE,DAY_SECOND,DEALLOCATE,DEC,DECIMAL,DECLARE,DEFAULT,DEFAULT_AUTH,DEFINER,DELAY_KEY_WRITE,DELAYED,DELETE,DES_KEY_FILE,DESC,DESCRIBE,DETERMINISTIC,DIAGNOSTICS,DIRECTORY,DISABLE,DISCARD,DISK,DISTINCT,DISTINCTROW,DIV,DO,DOUBLE,DROP,DUAL,DUMPFILE,DUPLICATE,DYNAMIC,EACH,ELSE,ELSEIF,ENABLE,ENCLOSED,ENCRYPTION,END,ENDS,ENGINE,ENGINES,ENUM,ERROR,ERRORS,ESCAPE,ESCAPED,EVENT,EVENTS,EVERY,EXCHANGE,EXECUTE,EXISTS,EXIT,EXPANSION,EXPIRE,EXPLAIN,EXPORT,EXTENDED,EXTENT_SIZE,FAST,FAULTS,FETCH,FIELDS,FILE,FILE_BLOCK_SIZE,FILTER,FIRST,FIXED,FLOAT,FLOAT4,FLOAT8,FLUSH,FOLLOWS,FOR,FORCE,FOREIGN,FORMAT,FOUND,FROM,FULL,FULLTEXT,FUNCTION,GENERAL,GENERATED,GEOMETRY,GEOMETRYCOLLECTION,GET,GET_FORMAT,GLOBAL,GRANT,GRANTS,GROUP,GROUP_REPLICATION,HANDLER,HASH,HAVING,HELP,HIGH_PRIORITY,HOST,HOSTS,HOUR,HOUR_MICROSECOND,HOUR_MINUTE,HOUR_SECOND,IDENTIFIED,IF,IGNORE,IGNORE_SERVER_IDS,IMPORT,IN,INDEX,INDEXES,INFILE,INITIAL_SIZE,INNER,INOUT,INSENSITIVE,INSERT,INSERT_METHOD,INSTALL,INSTANCE,INT,INT1,INT2,INT3,INT4,INT8,INTEGER,INTERVAL,INTO,INVOKER,IO,IO_AFTER_GTIDS,IO_BEFORE_GTIDS,IO_THREAD,IPC,IS,ISOLATION,ISSUER,ITERATE,JOIN,JSON,KEY,KEY_BLOCK_SIZE,KEYS,KILL,LANGUAGE,LAST,LEADING,LEAVE,LEAVES,LEFT,LESS,LEVEL,LIKE,LIMIT,LINEAR,LINES,LINESTRING,LIST,LOAD,LOCAL,LOCALTIME,LOCALTIMESTAMP,LOCK,LOCKS,LOGFILE,LOGS,LONG,LONGBLOB,LONGTEXT,LOOP,LOW_PRIORITY,MASTER,MASTER_AUTO_POSITION,MASTER_BIND,MASTER_CONNECT_RETRY,MASTER_DELAY,MASTER_HEARTBEAT_PERIOD,MASTER_HOST,MASTER_LOG_FILE,MASTER_LOG_POS,MASTER_PASSWORD,MASTER_PORT,MASTER_RETRY_COUNT,MASTER_SERVER_ID,MASTER_SSL,MASTER_SSL_CA,MASTER_SSL_CAPATH,MASTER_SSL_CERT,MASTER_SSL_CIPHER,MASTER_SSL_CRL,MASTER_SSL_CRLPATH,MASTER_SSL_KEY,MASTER_SSL_VERIFY_SERVER_CERT,MASTER_TLS_VERSION,MASTER_USER,MATCH,MAX_CONNECTIONS_PER_HOUR,MAX_QUERIES_PER_HOUR,MAX_ROWS,MAX_SIZE,MAX_STATEMENT_TIME,MAX_UPDATES_PER_HOUR,MAX_USER_CONNECTIONS,MAXVALUE,MEDIUM,MEDIUMBLOB,MEDIUMINT,MEDIUMTEXT,MEMORY,MERGE,MESSAGE_TEXT,MICROSECOND,MIDDLEINT,MIGRATE,MIN_ROWS,MINUTE,MINUTE_MICROSECOND,MINUTE_SECOND,MOD,MODE,MODIFIES,MODIFY,MONTH,MULTILINESTRING,MULTIPOINT,MULTIPOLYGON,MUTEX,MYSQL_ERRNO,NAME,NAMES,NATIONAL,NATURAL,NCHAR,NDB,NDBCLUSTER,NEVER,NEW,NEXT,NO,NO_WAIT,NO_WRITE_TO_BINLOG,NODEGROUP,NONBLOCKING,NONE,NOT,NULL,NUMBER,NUMERIC,NVARCHAR,OFFSET,OLD_PASSWORD,ON,ONE,ONLY,OPEN,OPTIMIZE,OPTIMIZER_COSTS,OPTION,OPTIONALLY,OPTIONS,OR,ORDER,OUT,OUTER,OUTFILE,OWNER,PACK_KEYS,PAGE,PARSE_GCOL_EXPR,PARSER,PARTIAL,PARTITION,PARTITIONING,PARTITIONS,PASSWORD,PHASE,PLUGIN,PLUGIN_DIR,PLUGINS,POINT,POLYGON,PORT,PRECEDES,PRECISION,PREPARE,PRESERVE,PREV,PRIMARY,PRIVILEGES,PROCEDURE,PROCESSLIST,PROFILE,PROFILES,PROXY,PURGE,QUARTER,QUERY,QUICK,RANGE,READ,READ_ONLY,READ_WRITE,READS,REAL,REBUILD,RECOVER,REDO_BUFFER_SIZE,REDOFILE,REDUNDANT,REFERENCES,REGEXP,RELAY,RELAY_LOG_FILE,RELAY_LOG_POS,RELAY_THREAD,RELAYLOG,RELEASE,RELOAD,REMOVE,RENAME,REORGANIZE,REPAIR,REPEAT,REPEATABLE,REPLACE,REPLICATE_DO_DB,REPLICATE_DO_TABLE,REPLICATE_IGNORE_DB,REPLICATE_IGNORE_TABLE,REPLICATE_REWRITE_DB,REPLICATE_WILD_DO_TABLE,REPLICATE_WILD_IGNORE_TABLE,REPLICATION,REQUIRE,RESET,RESIGNAL,RESTORE,RESTRICT,RESUME,RETURN,RETURNED_SQLSTATE,RETURNS,REVERSE,REVOKE,RIGHT,RLIKE,ROLLBACK,ROLLUP,ROTATE,ROUTINE,ROW,ROW_COUNT,ROW_FORMAT,ROWS,RTREE,SAVEPOINT,SCHEDULE,SCHEMA,SCHEMA_NAME,SCHEMAS,SECOND,SECOND_MICROSECOND,SECURITY,SELECT,SENSITIVE,SEPARATOR,SERIAL,SERIALIZABLE,SERVER,SESSION,SET,SHARE,SHOW,SHUTDOWN,SIGNAL,SIGNED,SIMPLE,SLAVE,SLOW,SMALLINT,SNAPSHOT,SOCKET,SOME,SONAME,SOUNDS,SOURCE,SPATIAL,SPECIFIC,SQL,SQL_AFTER_GTIDS,SQL_AFTER_MTS_GAPS,SQL_BEFORE_GTIDS,SQL_BIG_RESULT,SQL_BUFFER_RESULT,SQL_CACHE,SQL_CALC_FOUND_ROWS,SQL_NO_CACHE,SQL_SMALL_RESULT,SQL_THREAD,SQL_TSI_DAY,SQL_TSI_HOUR,SQL_TSI_MINUTE,SQL_TSI_MONTH,SQL_TSI_QUARTER,SQL_TSI_SECOND,SQL_TSI_WEEK,SQL_TSI_YEAR,SQLEXCEPTION,SQLSTATE,SQLWARNING,SSL,STACKED,START,STARTING,STARTS,STATS_AUTO_RECALC,STATS_PERSISTENT,STATS_SAMPLE_PAGES,STATUS,STOP,STORAGE,STORED,STRAIGHT_JOIN,STRING,SUBCLASS_ORIGIN,SUBJECT,SUBPARTITION,SUBPARTITIONS,SUPER,SUSPEND,SWAPS,SWITCHES,TABLE,TABLE_CHECKSUM,TABLE_NAME,TABLES,TABLESPACE,TEMPORARY,TEMPTABLE,TERMINATED,TEXT,THAN,THEN,TIME,TIMESTAMP,TIMESTAMPADD,TIMESTAMPDIFF,TINYBLOB,TINYINT,TINYTEXT,TO,TRAILING,TRANSACTION,TRIGGER,TRIGGERS,TRUNCATE,TYPE,TYPES,UNCOMMITTED,UNDEFINED,UNDO,UNDO_BUFFER_SIZE,UNDOFILE,UNICODE,UNINSTALL,UNION,UNIQUE,UNKNOWN,UNLOCK,UNSIGNED,UNTIL,UPDATE,UPGRADE,USAGE,USE,USE_FRM,USER,USER_RESOURCES,USING,UTC_DATE,UTC_TIME,UTC_TIMESTAMP,VALIDATION,VALUE,VALUES,VARBINARY,VARCHAR,VARCHARACTER,VARIABLES,VARYING,VIEW,VIRTUAL,WAIT,WARNINGS,WEEK,WEIGHT_STRING,WHEN,WHERE,WHILE,WITH,WITHOUT,WORK,WRAPPER,WRITE,X509,XA,XID,XML,XOR,YEAR,YEAR_MONTH,ZEROFILL,FALSE,TRUE"; + private static final String ORACLE_KEYWORDS = "ACCESS,ADD,ALL,ALTER,AND,ANY,ARRAYLEN,AS,ASC,AUDIT,BETWEEN,BY,CHAR,CHECK,CLUSTER,COLUMN,COMMENT,COMPRESS,CONNECT,CREATE,CURRENT,DATE,DECIMAL,DEFAULT,DELETE,DESC,DISTINCT,DROP,ELSE,EXCLUSIVE,EXISTS,FILE,FLOAT,FOR,FROM,GRANT,GROUP,HAVING,IDENTIFIED,IMMEDIATE,IN,INCREMENT,INDEX,INITIAL,INSERT,INTEGER,INTERSECT,INTO,IS,LEVEL,LIKE,LOCK,LONG,MAXEXTENTS,MINUS,MODE,MODIFY,NOAUDIT,NOCOMPRESS,NOT,NOTFOUND,NOWAIT,NUMBER,OF,OFFLINE,ON,ONLINE,OPTION,OR,ORDER,PCTFREE,PRIOR,PRIVILEGES,PUBLIC,RAW,RENAME,RESOURCE,REVOKE,ROW,ROWID,ROWLABEL,ROWNUM,ROWS,SELECT,SESSION,SET,SHARE,SIZE,SMALLINT,SQLBUF,START,SUCCESSFUL,SYNONYM,TABLE,THEN,TO,TRIGGER,UID,UNION,UNIQUE,UPDATE,USER,VALIDATE,VALUES,VARCHAR,VARCHAR2,VIEW,WHENEVER,WHERE,WITH,KEY,NAME,VALUE,TYPE"; + private static Set databaseKeywords; final static public String OB_COMPATIBLE_MODE = "obCompatibilityMode"; final static public String OB_COMPATIBLE_MODE_ORACLE = "ORACLE"; final static public String OB_COMPATIBLE_MODE_MYSQL = "MYSQL"; - public static DataBaseType DATABASE_TYPE = DataBaseType.MySql; + public static String compatibleMode = OB_COMPATIBLE_MODE_MYSQL; + + public static final DataBaseType databaseType = DataBaseType.OceanBase; + + private static final String TABLE_SCHEMA_DELIMITER = "."; + + private static final Pattern JDBC_PATTERN = Pattern.compile("jdbc:(oceanbase|mysql)://([\\w\\.-]+:\\d+)/([\\w\\.-]+)"); + + private static Set keywordsFromString2HashSet(final String keywords) { + return new HashSet(Arrays.asList(keywords.split(","))); + } + + public static String escapeDatabaseKeyword(String keyword) { + if (databaseKeywords == null) { + if (isOracleMode(compatibleMode)) { + databaseKeywords = keywordsFromString2HashSet(ORACLE_KEYWORDS); + } else { + databaseKeywords = keywordsFromString2HashSet(MYSQL_KEYWORDS); + } + } + char escapeChar = isOracleMode(compatibleMode) ? '"' : '`'; + if (databaseKeywords.contains(keyword.toUpperCase())) { + keyword = escapeChar + keyword + escapeChar; + } + return keyword; + } + + public static void escapeDatabaseKeyword(List ids) { + if (ids != null && ids.size() > 0) { + for (int i = 0; i < ids.size(); i++) { + ids.set(i, escapeDatabaseKeyword(ids.get(i))); + } + } + } + + public static Boolean isEscapeMode(String keyword) { + if (isOracleMode(compatibleMode)) { + return keyword.startsWith("\"") && keyword.endsWith("\""); + } else { + return keyword.startsWith("`") && keyword.endsWith("`"); + } + } public static void initConn4Reader(Connection conn, long queryTimeoutSeconds) { String setQueryTimeout = "set ob_query_timeout=" + (queryTimeoutSeconds * 1000 * 1000L); @@ -57,7 +85,7 @@ public class ObReaderUtils { stmt = conn.createStatement(); stmt.execute(setQueryTimeout); stmt.execute(setTrxTimeout); - LOG.warn("setAutoCommit=true;"+setQueryTimeout+";"+setTrxTimeout+";"); + LOG.warn("setAutoCommit=true;" + setQueryTimeout + ";" + setTrxTimeout + ";"); } catch (Throwable e) { LOG.warn("initConn4Reader fail", e); } finally { @@ -73,7 +101,6 @@ public class ObReaderUtils { } /** - * * @param conn * @param context */ @@ -84,17 +111,24 @@ public class ObReaderUtils { return; } List columns = context.getColumns(); + // 最后参与排序的索引列 + context.setPkColumns(pkColumns); + + final String escapeChar = isOracleMode(context.getCompatibleMode()) ? "\"" : "`"; int[] pkIndexs = new int[pkColumns.length]; for (int i = 0, n = pkColumns.length; i < n; i++) { String pkc = pkColumns[i]; + String escapedPkc = String.format("%s%s%s", escapeChar, pkc, escapeChar); int j = 0; for (int k = columns.size(); j < k; j++) { // 如果用户定义的 columns中 带有 ``,也不影响, // 最多只是在select里多加了几列PK column - if (StringUtils.equalsIgnoreCase(pkc, columns.get(j))) { + if (StringUtils.equalsIgnoreCase(pkc, columns.get(j)) + || StringUtils.equalsIgnoreCase(escapedPkc, columns.get(j))) { pkIndexs[i] = j; + pkColumns[i] = columns.get(j); break; } } @@ -112,10 +146,20 @@ public class ObReaderUtils { String sql = "show index from " + tableName + " where Key_name='PRIMARY'"; if (isOracleMode(context.getCompatibleMode())) { tableName = tableName.toUpperCase(); - sql = "SELECT cols.column_name Column_name "+ + String schema; + if (tableName.contains(TABLE_SCHEMA_DELIMITER)) { + schema = String.format("'%s'", tableName.substring(0, tableName.indexOf("."))); + tableName = tableName.substring(tableName.indexOf(".") + 1); + } else { + schema = "(select sys_context('USERENV','current_schema') from dual)"; + } + sql = String.format( + "SELECT cols.column_name Column_name " + "FROM all_constraints cons, all_cons_columns cols " + - "WHERE cols.table_name = '" + tableName+ "' AND cons.constraint_type = 'P' " + - "AND cons.constraint_name = cols.constraint_name AND cons.owner = cols.owner"; + "WHERE cols.table_name = '%s' AND cons.constraint_type = 'P' " + + "AND cons.constraint_name = cols.constraint_name " + + "AND cons.owner = cols.owner and cons.OWNER = %s", + tableName, schema); } LOG.info("get primary key by sql: " + sql); Statement ps = null; @@ -125,20 +169,27 @@ public class ObReaderUtils { try { ps = conn.createStatement(); rs = ps.executeQuery(sql); + boolean hasPk = false; while (rs.next()) { - String columnName = StringUtils.lowerCase(rs.getString("Column_name")); + hasPk = true; + String columnName = rs.getString("Column_name"); + columnName = escapeDatabaseKeyword(columnName); if (!realIndex.contains(columnName)) { realIndex.add(columnName); } } - String[] pks = new String[realIndex.size()]; - realIndex.toArray(pks); - return pks; + + if (hasPk) { + String[] pks = new String[realIndex.size()]; + realIndex.toArray(pks); + return pks; + } } catch (Throwable e) { LOG.error("show index from table fail :" + sql, e); } finally { close(rs, ps, null); } + return null; } @@ -156,7 +207,7 @@ public class ObReaderUtils { if (StringUtils.isNotEmpty(indexName)) { String weakReadHint = weakRead ? "+READ_CONSISTENCY(WEAK)," : "+"; sql += " /*" + weakReadHint + "index(" + context.getTable() + " " + indexName + ")*/ "; - } else if (weakRead){ + } else if (weakRead) { sql += " /*+READ_CONSISTENCY(WEAK)*/ "; } sql += StringUtils.join(context.getColumns(), ','); @@ -187,7 +238,6 @@ public class ObReaderUtils { * 增量查的SQL * * @param conn - * * @param context * @return sql */ @@ -197,8 +247,8 @@ public class ObReaderUtils { String sql = "select "; if (StringUtils.isNotEmpty(indexName)) { String weakReadHint = weakRead ? "+READ_CONSISTENCY(WEAK)," : "+"; - sql += " /*"+ weakReadHint + "index(" + context.getTable() + " " + indexName + ")*/ "; - } else if (weakRead){ + sql += " /*" + weakReadHint + "index(" + context.getTable() + " " + indexName + ")*/ "; + } else if (weakRead) { sql += " /*+READ_CONSISTENCY(WEAK)*/ "; } sql += StringUtils.join(context.getColumns(), ',') + " from " + context.getTable(); @@ -295,7 +345,7 @@ public class ObReaderUtils { final char rightBracket = ')'; if (str != null && str.contains(String.valueOf(leftBracket)) && str.contains(String.valueOf(rightBracket)) && str.indexOf(leftBracket) < str.indexOf(rightBracket)) { - return str.substring(str.indexOf(leftBracket)+1, str.indexOf(rightBracket)); + return str.substring(str.indexOf(leftBracket) + 1, str.indexOf(rightBracket)); } return str; } @@ -322,7 +372,6 @@ public class ObReaderUtils { return; } SQLExpr expr = SQLUtils.toSQLExpr(context.getWhere(), "mysql"); - LOG.info("expr: " + expr); List allColumnsInTab = getAllColumnFromTab(conn, context.getTable()); List allColNames = getColNames(allColumnsInTab, expr); @@ -364,7 +413,7 @@ public class ObReaderUtils { /** * 找出where条件中的列名,目前仅支持全部为and条件,并且操作符为大于、大约等于、等于、小于、小于等于和不等于的表达式。 - * + *

* test coverage: - c6 = 20180710 OR c4 = 320: no index selected - 20180710 * = c6: correct index selected - 20180710 = c6 and c4 = 320 or c2 < 100: no * index selected @@ -414,21 +463,34 @@ public class ObReaderUtils { Map> allIndex = new HashMap>(); String sql = "show index from " + tableName; if (isOracleMode(compatibleMode)) { + String schema; tableName = tableName.toUpperCase(); - sql = "SELECT INDEX_NAME Key_name, COLUMN_NAME Column_name " + - "from dba_ind_columns where TABLE_NAME = '" + tableName +"' " + + if (tableName.contains(TABLE_SCHEMA_DELIMITER)) { + schema = String.format("'%s'", tableName.substring(0, tableName.indexOf("."))); + tableName = tableName.substring(tableName.indexOf(".") + 1); + } else { + schema = "(select sys_context('USERENV','current_schema') from dual)"; + } + + sql = String.format( + "SELECT INDEX_NAME Key_name, COLUMN_NAME Column_name " + + "from all_ind_columns " + + "where TABLE_NAME = '%s' and TABLE_OWNER = %s " + " union all " + - "SELECT DISTINCT " + - "CASE " + - "WHEN cons.CONSTRAINT_TYPE = 'P' THEN 'PRIMARY' " + - "WHEN cons.CONSTRAINT_TYPE = 'U' THEN cons.CONSTRAINT_NAME " + - "ELSE '' " + - "END AS Key_name, " + - "cols.column_name Column_name " + - "FROM all_constraints cons, all_cons_columns cols " + - "WHERE cols.table_name = '" + tableName + "' AND cons.constraint_type in('P', 'U') " + - "AND cons.constraint_name = cols.constraint_name AND cons.owner = cols.owner"; + "SELECT DISTINCT " + + "CASE " + + "WHEN cons.CONSTRAINT_TYPE = 'P' THEN 'PRIMARY' " + + "WHEN cons.CONSTRAINT_TYPE = 'U' THEN cons.CONSTRAINT_NAME " + + "ELSE '' " + + "END AS Key_name, " + + "cols.column_name Column_name " + + "FROM all_constraints cons, all_cons_columns cols " + + "WHERE cols.table_name = '%s' AND cons.constraint_type in('P', 'U') " + + "AND cons.constraint_name = cols.constraint_name AND cons.owner = cols.owner " + + "AND cons.owner = %s", + tableName, schema, tableName, schema); } + Statement stmt = null; ResultSet rs = null; @@ -451,11 +513,26 @@ public class ObReaderUtils { // add primary key to all index if (allIndex.containsKey("PRIMARY")) { List colsInPrimary = allIndex.get("PRIMARY"); - for (String keyName : allIndex.keySet()) { - if (keyName.equals("PRIMARY")) { + Iterator>> iterator = allIndex.entrySet().iterator(); + while (iterator.hasNext()) { + Map.Entry> entry = iterator.next(); + if ("PRIMARY".equals(entry.getKey())) { continue; } - allIndex.get(keyName).addAll(colsInPrimary); + + // remove the index which is identical with primary key + List indexColumns = entry.getValue(); + if (colsInPrimary.equals(indexColumns)) { + iterator.remove(); + } else { + // add primary key to the index if the index is not on the column + colsInPrimary.forEach( + c -> { + if (!indexColumns.contains(c)) { + indexColumns.add(c); + } + }); + } } } } catch (Exception e) { @@ -469,21 +546,21 @@ public class ObReaderUtils { } /** - * + * find out the indexes which contains all columns in where conditions * @param conn * @param table * @param colNamesInCondition * @return */ private static List getIndexName(Connection conn, String table, - Set colNamesInCondition, String compatibleMode) { + Set colNamesInCondition, String compatibleMode) { List indexNames = new ArrayList(); if (colNamesInCondition == null || colNamesInCondition.size() == 0) { LOG.info("there is no qulified conditions in the where clause, skip index selection."); return indexNames; } - LOG.info("columNamesInConditions: " + String.join(",", colNamesInCondition)); + LOG.info("columnNamesInConditions: " + String.join(",", colNamesInCondition)); Map> allIndex = getAllIndex(conn, table, compatibleMode); for (String keyName : allIndex.keySet()) { @@ -494,7 +571,7 @@ public class ObReaderUtils { if (allIndex.get(keyName).size() < colNamesInCondition.size()) { indexNotMatch = true; } else { - // the the first number columns of this index + // the first number columns of this index int num = colNamesInCondition.size(); for (String colName : allIndex.get(keyName)) { if (!colNamesInCondition.contains(colName)) { @@ -540,7 +617,7 @@ public class ObReaderUtils { Map index = new TreeMap(); List columnList = allIndexInTab.get(indexName); for (int i = 1; i <= columnList.size(); i++) { - index.put(i, columnList.get(i-1)); + index.put(i, columnList.get(i - 1)); } allIndexs.put(indexName, index); } else { @@ -644,19 +721,19 @@ public class ObReaderUtils { public static void binding(PreparedStatement ps, List list) throws SQLException { for (int i = 0, n = list.size(); i < n; i++) { Column c = list.get(i); - if(c instanceof BoolColumn){ - ps.setLong(i + 1, ((BoolColumn)c).asLong()); - }else if(c instanceof BytesColumn){ - ps.setBytes(i + 1, ((BytesColumn)c).asBytes()); - }else if(c instanceof DateColumn){ - ps.setTimestamp(i + 1, new Timestamp(((DateColumn)c).asDate().getTime())); - }else if(c instanceof DoubleColumn){ - ps.setDouble(i + 1, ((DoubleColumn)c).asDouble()); - }else if(c instanceof LongColumn){ - ps.setLong(i + 1, ((LongColumn)c).asLong()); - }else if(c instanceof StringColumn){ - ps.setString(i + 1, ((StringColumn)c).asString()); - }else{ + if (c instanceof BoolColumn) { + ps.setLong(i + 1, ((BoolColumn) c).asLong()); + } else if (c instanceof BytesColumn) { + ps.setBytes(i + 1, ((BytesColumn) c).asBytes()); + } else if (c instanceof DateColumn) { + ps.setTimestamp(i + 1, new Timestamp(((DateColumn) c).asDate().getTime())); + } else if (c instanceof DoubleColumn) { + ps.setDouble(i + 1, ((DoubleColumn) c).asDouble()); + } else if (c instanceof LongColumn) { + ps.setLong(i + 1, ((LongColumn) c).asLong()); + } else if (c instanceof StringColumn) { + ps.setString(i + 1, ((StringColumn) c).asString()); + } else { ps.setObject(i + 1, c.getRawData()); } } @@ -692,6 +769,98 @@ public class ObReaderUtils { } public static boolean isOracleMode(String mode) { - return (mode != null && OB_COMPATIBLE_MODE_ORACLE.equals(mode)); + return (mode != null && OB_COMPATIBLE_MODE_ORACLE.equalsIgnoreCase(mode)); + } + + public static String getDbNameFromJdbcUrl(String jdbcUrl) { + Matcher matcher = JDBC_PATTERN.matcher(jdbcUrl); + if (matcher.find()) { + return matcher.group(3); + } else { + LOG.error("jdbc url {} is not valid.", jdbcUrl); + } + + return null; + } + + public static String buildQuerySql(boolean weakRead, String column, String table, String where) { + if (weakRead) { + return buildWeakReadQuerySql(column, table, where); + } else { + return SingleTableSplitUtil.buildQuerySql(column, table, where); + } + } + + public static String buildWeakReadQuerySql(String column, String table, String where) { + String querySql; + + if (StringUtils.isBlank(where)) { + querySql = String.format(Constant.WEAK_READ_QUERY_SQL_TEMPLATE_WITHOUT_WHERE, column, table); + } else { + querySql = String.format(Constant.WEAK_READ_QUERY_SQL_TEMPLATE, column, table, where); + } + + return querySql; + } + + /** + * compare two ob versions + * @param version1 + * @param version2 + * @return 0 when the two versions are the same + * -1 when version1 is smaller (earlier) than version2 + * 1 when version is bigger (later) than version2 + */ + public static int compareObVersion(String version1, String version2) { + if (version1 == null || version2 == null) { + throw new RuntimeException("can not compare null version"); + } + ObVersion v1 = new ObVersion(version1); + ObVersion v2 = new ObVersion(version2); + return v1.compareTo(v2); + } + + /** + * + * @param conn + * @param sql + * @return + */ + public static List getResultsFromSql(Connection conn, String sql) { + List list = new ArrayList(); + Statement stmt = null; + ResultSet rs = null; + + LOG.info("executing sql: " + sql); + + try { + stmt = conn.createStatement(); + rs = stmt.executeQuery(sql); + while (rs.next()) { + list.add(rs.getString(1)); + } + } catch (Exception e) { + LOG.error("error when executing sql: " + e.getMessage()); + } finally { + DBUtil.closeDBResources(rs, stmt, null); + } + + return list; + } + + /** + * get obversion, try ob_version first, and then try version if failed + * @param conn + * @return + */ + public static ObVersion getObVersion(Connection conn) { + List results = getResultsFromSql(conn, "select ob_version()"); + if (results.size() == 0) { + results = getResultsFromSql(conn, "select version()"); + } + ObVersion obVersion = new ObVersion(results.get(0)); + + LOG.info("obVersion: " + obVersion); + return obVersion; } } diff --git a/oceanbasev10reader/src/main/java/com/alibaba/datax/plugin/reader/oceanbasev10reader/util/ObVersion.java b/oceanbasev10reader/src/main/java/com/alibaba/datax/plugin/reader/oceanbasev10reader/util/ObVersion.java new file mode 100644 index 00000000..2fc414ce --- /dev/null +++ b/oceanbasev10reader/src/main/java/com/alibaba/datax/plugin/reader/oceanbasev10reader/util/ObVersion.java @@ -0,0 +1,88 @@ +package com.alibaba.datax.plugin.reader.oceanbasev10reader.util; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * @author johnrobbet + */ +public class ObVersion implements Comparable { + + private static final Logger LOG = LoggerFactory.getLogger(ObVersion.class); + + private int majorVersion; + private int minorVersion; + private int releaseNumber; + private int patchNumber; + + public static final ObVersion V2276 = valueOf("2.2.76"); + public static final ObVersion V4000 = valueOf("4.0.0.0"); + + private static final ObVersion DEFAULT_VERSION = + valueOf(System.getProperty("defaultObVersion","3.2.3.0")); + + private static final int VERSION_PART_COUNT = 4; + + public ObVersion(String version) { + try { + String[] versionParts = version.split("\\."); + majorVersion = Integer.valueOf(versionParts[0]); + minorVersion = Integer.valueOf(versionParts[1]); + releaseNumber = Integer.valueOf(versionParts[2]); + int tempPatchNum = 0; + if (versionParts.length == VERSION_PART_COUNT) { + try { + tempPatchNum = Integer.valueOf(versionParts[3]); + } catch (Exception e) { + LOG.warn("fail to parse ob version: " + e.getMessage()); + } + } + patchNumber = tempPatchNum; + } catch (Exception ex) { + LOG.warn("fail to get ob version, using default {} {}", + DEFAULT_VERSION, ex.getMessage()); + majorVersion = DEFAULT_VERSION.majorVersion; + minorVersion = DEFAULT_VERSION.minorVersion; + releaseNumber = DEFAULT_VERSION.releaseNumber; + patchNumber = DEFAULT_VERSION.patchNumber; + } + } + + public static ObVersion valueOf(String version) { + return new ObVersion(version); + } + + @Override + public int compareTo(ObVersion o) { + if (this.majorVersion > o.majorVersion) { + return 1; + } else if (this.majorVersion < o.majorVersion) { + return -1; + } + + if (this.minorVersion > o.minorVersion) { + return 1; + } else if (this.minorVersion < o.minorVersion) { + return -1; + } + + if (this.releaseNumber > o.releaseNumber) { + return 1; + } else if (this.releaseNumber < o.releaseNumber) { + return -1; + } + + if (this.patchNumber > o.patchNumber) { + return 1; + } else if (this.patchNumber < o.patchNumber) { + return -1; + } + + return 0; + } + + @Override + public String toString() { + return String.format("%d.%d.%d.%d", majorVersion, minorVersion, releaseNumber, patchNumber); + } +} diff --git a/oceanbasev10reader/src/main/java/com/alibaba/datax/plugin/reader/oceanbasev10reader/util/PartInfo.java b/oceanbasev10reader/src/main/java/com/alibaba/datax/plugin/reader/oceanbasev10reader/util/PartInfo.java new file mode 100644 index 00000000..7a9a6f70 --- /dev/null +++ b/oceanbasev10reader/src/main/java/com/alibaba/datax/plugin/reader/oceanbasev10reader/util/PartInfo.java @@ -0,0 +1,35 @@ +package com.alibaba.datax.plugin.reader.oceanbasev10reader.util; + +import java.util.ArrayList; +import java.util.List; + +/** + * @author johnrobbet + */ +public class PartInfo { + + private PartType partType; + + List partList; + + public PartInfo(PartType partType) { + this.partType = partType; + this.partList = new ArrayList(); + } + + public String getPartType () { + return partType.getTypeString(); + } + + public void addPart(List partList) { + this.partList.addAll(partList); + } + + public List getPartList() { + return partList; + } + + public boolean isPartitionTable() { + return partType != PartType.NONPARTITION && partList.size() > 0; + } +} diff --git a/oceanbasev10reader/src/main/java/com/alibaba/datax/plugin/reader/oceanbasev10reader/util/PartType.java b/oceanbasev10reader/src/main/java/com/alibaba/datax/plugin/reader/oceanbasev10reader/util/PartType.java new file mode 100644 index 00000000..05c23d6f --- /dev/null +++ b/oceanbasev10reader/src/main/java/com/alibaba/datax/plugin/reader/oceanbasev10reader/util/PartType.java @@ -0,0 +1,28 @@ +package com.alibaba.datax.plugin.reader.oceanbasev10reader.util; + +/** + * @author johnrobbet + */ + +public enum PartType { + // Non partitioned table + NONPARTITION("NONPARTITION"), + + // Partitioned table + PARTITION("PARTITION"), + + // Subpartitioned table + SUBPARTITION("SUBPARTITION"); + + private String typeString; + + PartType (String typeString) { + this.typeString = typeString; + } + + public String getTypeString() { + return typeString; + } +} + + diff --git a/oceanbasev10reader/src/main/java/com/alibaba/datax/plugin/reader/oceanbasev10reader/util/PartitionSplitUtil.java b/oceanbasev10reader/src/main/java/com/alibaba/datax/plugin/reader/oceanbasev10reader/util/PartitionSplitUtil.java new file mode 100644 index 00000000..2929658a --- /dev/null +++ b/oceanbasev10reader/src/main/java/com/alibaba/datax/plugin/reader/oceanbasev10reader/util/PartitionSplitUtil.java @@ -0,0 +1,191 @@ +package com.alibaba.datax.plugin.reader.oceanbasev10reader.util; + +import com.alibaba.datax.common.util.Configuration; +import com.alibaba.datax.plugin.rdbms.reader.Constant; +import com.alibaba.datax.plugin.rdbms.reader.Key; +import com.alibaba.datax.plugin.rdbms.util.DBUtil; +import com.alibaba.datax.plugin.rdbms.util.DataBaseType; +import com.alibaba.datax.plugin.reader.oceanbasev10reader.ext.ObReaderKey; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.sql.Connection; +import java.util.ArrayList; +import java.util.List; + +/** + * @author johnrobbet + */ +public class PartitionSplitUtil { + private static final Logger LOG = LoggerFactory.getLogger(PartitionSplitUtil.class); + + private static final String ORACLE_GET_SUBPART_TEMPLATE = + "select subpartition_name " + + "from dba_tab_subpartitions " + + "where table_name = '%s' and table_owner = '%s'"; + + private static final String ORACLE_GET_PART_TEMPLATE = + "select partition_name " + + "from dba_tab_partitions " + + "where table_name = '%s' and table_owner = '%s'"; + + private static final String MYSQL_GET_PART_TEMPLATE = + "select p.part_name " + + "from oceanbase.__all_part p, oceanbase.%s t, oceanbase.__all_database d " + + "where p.table_id = t.table_id " + + "and d.database_id = t.database_id " + + "and d.database_name = '%s' " + + "and t.table_name = '%s'"; + + private static final String MYSQL_GET_SUBPART_TEMPLATE = + "select p.sub_part_name " + + "from oceanbase.__all_sub_part p, oceanbase.%s t, oceanbase.__all_database d " + + "where p.table_id = t.table_id " + + "and d.database_id = t.database_id " + + "and d.database_name = '%s' " + + "and t.table_name = '%s'"; + + /** + * get partition info from data dictionary in ob oracle mode + * @param config + * @param tableName + * @return + */ + public static PartInfo getObOraclePartInfoBySQL(Configuration config, String tableName) { + PartInfo partInfo; + DataBaseType dbType = ObReaderUtils.databaseType; + String jdbcUrl = config.getString(Key.JDBC_URL); + String username = config.getString(Key.USERNAME); + String password = config.getString(Key.PASSWORD); + String dbname = ObReaderUtils.getDbNameFromJdbcUrl(jdbcUrl).toUpperCase(); + Connection conn = DBUtil.getConnection(dbType, jdbcUrl, username, password); + tableName = tableName.toUpperCase(); + + // check if the table has subpartitions or not + String getSubPartSql = String.format(ORACLE_GET_SUBPART_TEMPLATE, tableName, dbname); + List partList = ObReaderUtils.getResultsFromSql(conn, getSubPartSql); + if (partList != null && partList.size() > 0) { + partInfo = new PartInfo(PartType.SUBPARTITION); + partInfo.addPart(partList); + return partInfo; + } + + String getPartSql = String.format(ORACLE_GET_PART_TEMPLATE, tableName, dbname); + partList = ObReaderUtils.getResultsFromSql(conn, getPartSql); + if (partList != null && partList.size() > 0) { + partInfo = new PartInfo(PartType.PARTITION); + partInfo.addPart(partList); + return partInfo; + } + + // table is not partitioned + partInfo = new PartInfo(PartType.NONPARTITION); + return partInfo; + } + + public static List splitByPartition (Configuration configuration) { + List allSlices = new ArrayList<>(); + List connections = configuration.getList(Constant.CONN_MARK, Object.class); + for (int i = 0, len = connections.size(); i < len; i++) { + Configuration sliceConfig = configuration.clone(); + Configuration connConf = Configuration.from(connections.get(i).toString()); + String jdbcUrl = connConf.getString(Key.JDBC_URL); + sliceConfig.set(Key.JDBC_URL, jdbcUrl); + sliceConfig.remove(Constant.CONN_MARK); + + List tables = connConf.getList(Key.TABLE, String.class); + for (String table : tables) { + Configuration tempSlice = sliceConfig.clone(); + tempSlice.set(Key.TABLE, table); + allSlices.addAll(splitSinglePartitionTable(tempSlice)); + } + } + + return allSlices; + } + + private static List splitSinglePartitionTable(Configuration configuration) { + String table = configuration.getString(Key.TABLE); + String where = configuration.getString(Key.WHERE, null); + String column = configuration.getString(Key.COLUMN); + final boolean weakRead = configuration.getBool(Key.WEAK_READ, true); + + List slices = new ArrayList(); + PartInfo partInfo = getObPartInfoBySQL(configuration, table); + if (partInfo != null && partInfo.isPartitionTable()) { + String partitionType = partInfo.getPartType(); + for (String partitionName : partInfo.getPartList()) { + LOG.info(String.format("add %s %s for table %s", partitionType, partitionName, table)); + Configuration slice = configuration.clone(); + slice.set(ObReaderKey.PARTITION_NAME, partitionName); + slice.set(ObReaderKey.PARTITION_TYPE, partitionType); + slice.set(Key.QUERY_SQL, + ObReaderUtils.buildQuerySql(weakRead, column, + String.format("%s partition(%s)", table, partitionName), where)); + slices.add(slice); + } + } else { + LOG.info("table is not partitioned."); + + Configuration slice = configuration.clone(); + slice.set(Key.QUERY_SQL, ObReaderUtils.buildQuerySql(weakRead, column, table, where)); + slices.add(slice); + } + + return slices; + } + + public static PartInfo getObPartInfoBySQL(Configuration config, String table) { + boolean isOracleMode = config.getString(ObReaderKey.OB_COMPATIBILITY_MODE).equals("ORACLE"); + if (isOracleMode) { + return getObOraclePartInfoBySQL(config, table); + } else { + return getObMySQLPartInfoBySQL(config, table); + } + } + + public static PartInfo getObMySQLPartInfoBySQL(Configuration config, String table) { + PartInfo partInfo = new PartInfo(PartType.NONPARTITION); + List partList; + Connection conn = null; + try { + String jdbcUrl = config.getString(Key.JDBC_URL); + String username = config.getString(Key.USERNAME); + String password = config.getString(Key.PASSWORD); + String dbname = ObReaderUtils.getDbNameFromJdbcUrl(jdbcUrl); + String allTable = "__all_table"; + + conn = DBUtil.getConnection(DataBaseType.OceanBase, jdbcUrl, username, password); + ObVersion obVersion = ObReaderUtils.getObVersion(conn); + if (obVersion.compareTo(ObVersion.V2276) >= 0 && + obVersion.compareTo(ObVersion.V4000) < 0) { + allTable = "__all_table_v2"; + } + + String querySubPart = String.format(MYSQL_GET_SUBPART_TEMPLATE, allTable, dbname, table); + + PartType partType = PartType.SUBPARTITION; + + // try subpartition first + partList = ObReaderUtils.getResultsFromSql(conn, querySubPart); + + // if table is not sub-partitioned, the try partition + if (partList.isEmpty()) { + String queryPart = String.format(MYSQL_GET_PART_TEMPLATE, allTable, dbname, table); + partList = ObReaderUtils.getResultsFromSql(conn, queryPart); + partType = PartType.PARTITION; + } + + if (!partList.isEmpty()) { + partInfo = new PartInfo(partType); + partInfo.addPart(partList); + } + } catch (Exception ex) { + LOG.error("error when get partition list: " + ex.getMessage()); + } finally { + DBUtil.closeDBResources(null, conn); + } + + return partInfo; + } +} diff --git a/oceanbasev10reader/src/main/java/com/alibaba/datax/plugin/reader/oceanbasev10reader/util/TaskContext.java b/oceanbasev10reader/src/main/java/com/alibaba/datax/plugin/reader/oceanbasev10reader/util/TaskContext.java index ba754a37..17655a52 100644 --- a/oceanbasev10reader/src/main/java/com/alibaba/datax/plugin/reader/oceanbasev10reader/util/TaskContext.java +++ b/oceanbasev10reader/src/main/java/com/alibaba/datax/plugin/reader/oceanbasev10reader/util/TaskContext.java @@ -162,6 +162,7 @@ public class TaskContext { public String getUserSavePoint() { return userSavePoint; } + public void setUserSavePoint(String userSavePoint) { this.userSavePoint = userSavePoint; } diff --git a/oceanbasev10reader/src/test/java/com/alibaba/datax/plugin/reader/oceanbasev10reader/util/ObReaderUtilsTest.java b/oceanbasev10reader/src/test/java/com/alibaba/datax/plugin/reader/oceanbasev10reader/util/ObReaderUtilsTest.java new file mode 100644 index 00000000..35966595 --- /dev/null +++ b/oceanbasev10reader/src/test/java/com/alibaba/datax/plugin/reader/oceanbasev10reader/util/ObReaderUtilsTest.java @@ -0,0 +1,24 @@ +package com.alibaba.datax.plugin.reader.oceanbasev10reader.util; + +import org.junit.Test; + +public class ObReaderUtilsTest { + + @Test + public void getDbTest() { + assert ObReaderUtils.getDbNameFromJdbcUrl("jdbc:mysql://127.0.0.1:3306/testdb").equalsIgnoreCase("testdb"); + assert ObReaderUtils.getDbNameFromJdbcUrl("jdbc:oceanbase://127.0.0.1:2883/testdb").equalsIgnoreCase("testdb"); + assert ObReaderUtils.getDbNameFromJdbcUrl("||_dsc_ob10_dsc_||obcluster:mysql||_dsc_ob10_dsc_||jdbc:mysql://127.0.0.1:3306/testdb").equalsIgnoreCase("testdb"); + assert ObReaderUtils.getDbNameFromJdbcUrl("||_dsc_ob10_dsc_||obcluster:oracle||_dsc_ob10_dsc_||jdbc:oceanbase://127.0.0.1:3306/testdb").equalsIgnoreCase("testdb"); + } + + @Test + public void compareObVersionTest() { + assert ObReaderUtils.compareObVersion("2.2.70", "3.2.2") == -1; + assert ObReaderUtils.compareObVersion("2.2.70", "2.2.50") == 1; + assert ObReaderUtils.compareObVersion("2.2.70", "3.1.2") == -1; + assert ObReaderUtils.compareObVersion("3.1.2", "3.1.2") == 0; + assert ObReaderUtils.compareObVersion("3.2.3.0", "3.2.3.0") == 0; + assert ObReaderUtils.compareObVersion("3.2.3.0-CE", "3.2.3.0") == 0; + } +} diff --git a/oceanbasev10writer/src/main/java/com/alibaba/datax/plugin/writer/oceanbasev10writer/OceanBaseV10Writer.java b/oceanbasev10writer/src/main/java/com/alibaba/datax/plugin/writer/oceanbasev10writer/OceanBaseV10Writer.java index 89ef1c52..3bcc1019 100644 --- a/oceanbasev10writer/src/main/java/com/alibaba/datax/plugin/writer/oceanbasev10writer/OceanBaseV10Writer.java +++ b/oceanbasev10writer/src/main/java/com/alibaba/datax/plugin/writer/oceanbasev10writer/OceanBaseV10Writer.java @@ -1,15 +1,5 @@ package com.alibaba.datax.plugin.writer.oceanbasev10writer; -import java.sql.*; -import java.util.ArrayList; -import java.util.List; -import java.util.concurrent.TimeUnit; - -import com.alibaba.datax.plugin.writer.oceanbasev10writer.util.DbUtils; -import org.apache.commons.lang3.StringUtils; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - import com.alibaba.datax.common.plugin.RecordReceiver; import com.alibaba.datax.common.spi.Writer; import com.alibaba.datax.common.util.Configuration; @@ -20,8 +10,16 @@ import com.alibaba.datax.plugin.rdbms.writer.Constant; import com.alibaba.datax.plugin.rdbms.writer.Key; import com.alibaba.datax.plugin.rdbms.writer.util.WriterUtil; import com.alibaba.datax.plugin.writer.oceanbasev10writer.task.ConcurrentTableWriterTask; -import com.alibaba.datax.plugin.writer.oceanbasev10writer.task.SingleTableWriterTask; +import com.alibaba.datax.plugin.writer.oceanbasev10writer.util.DbUtils; import com.alibaba.datax.plugin.writer.oceanbasev10writer.util.ObWriterUtils; +import com.alibaba.fastjson2.JSONObject; +import org.apache.commons.lang3.StringUtils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.sql.Connection; +import java.util.ArrayList; +import java.util.List; /** * 2016-04-07 @@ -61,6 +59,19 @@ public class OceanBaseV10Writer extends Writer { public void init() { this.originalConfig = super.getPluginJobConf(); checkCompatibleMode(originalConfig); + //将config中的column和table中的关键字进行转义 + List columns = originalConfig.getList(Key.COLUMN, String.class); + ObWriterUtils.escapeDatabaseKeyword(columns); + originalConfig.set(Key.COLUMN, columns); + + List conns = originalConfig.getList(Constant.CONN_MARK, JSONObject.class); + for (int i = 0; i < conns.size(); i++) { + JSONObject conn = conns.get(i); + Configuration connConfig = Configuration.from(conn.toString()); + List tables = connConfig.getList(Key.TABLE, String.class); + ObWriterUtils.escapeDatabaseKeyword(tables); + originalConfig.set(String.format("%s[%d].%s", Constant.CONN_MARK, i, Key.TABLE), tables); + } this.commonJob = new CommonRdbmsWriter.Job(DATABASE_TYPE); this.commonJob.init(this.originalConfig); } @@ -223,6 +234,7 @@ public class OceanBaseV10Writer extends Writer { /** * 注意:此方法每个 Task 都会执行一次。 最佳实践:此处适当封装确保简洁清晰完成数据写入工作。 */ + @Override public void startWrite(RecordReceiver recordReceiver) { this.writerTask.startWrite(recordReceiver, this.writerSliceConfig, super.getTaskPluginCollector()); } diff --git a/oceanbasev10writer/src/main/java/com/alibaba/datax/plugin/writer/oceanbasev10writer/ext/ServerConnectInfo.java b/oceanbasev10writer/src/main/java/com/alibaba/datax/plugin/writer/oceanbasev10writer/ext/ServerConnectInfo.java index 49e5c05f..b0611642 100644 --- a/oceanbasev10writer/src/main/java/com/alibaba/datax/plugin/writer/oceanbasev10writer/ext/ServerConnectInfo.java +++ b/oceanbasev10writer/src/main/java/com/alibaba/datax/plugin/writer/oceanbasev10writer/ext/ServerConnectInfo.java @@ -24,14 +24,32 @@ public class ServerConnectInfo { this.tenantName = ss[1].trim().split(":")[1]; this.jdbcUrl = ss[2].replace("jdbc:mysql:", "jdbc:oceanbase:"); } else { - throw new RuntimeException ("jdbc url format is not correct: " + jdbcUrl); + this.jdbcUrl = jdbcUrl.replace("jdbc:mysql:", "jdbc:oceanbase:"); + if (username.contains("@") && username.contains("#")) { + this.userName = username.substring(0, username.indexOf("@")); + this.tenantName = username.substring(username.indexOf("@") + 1, username.indexOf("#")); + this.clusterName = username.substring(username.indexOf("#") + 1); + } else if (username.contains(":")) { + String[] config = username.split(":"); + if (config.length != 3) { + throw new RuntimeException ("username format is not correct: " + username); + } + this.clusterName = config[0]; + this.tenantName = config[1]; + this.userName = config[2]; + } else { + this.clusterName = null; + this.tenantName = null; + this.userName = username; + } } + this.password = password; parseJdbcUrl(jdbcUrl); } private void parseJdbcUrl(final String jdbcUrl) { - Pattern pattern = Pattern.compile("//([\\w\\.\\-]+:\\d+)/([\\w]+)\\?"); + Pattern pattern = Pattern.compile("//([\\w\\.\\-]+:\\d+)/([\\w-]+)\\?"); Matcher matcher = pattern.matcher(jdbcUrl); if (matcher.find()) { String ipPort = matcher.group(1); @@ -51,8 +69,11 @@ public class ServerConnectInfo { } public String getFullUserName() { - StringBuilder builder = new StringBuilder(); - builder.append(userName).append("@").append(tenantName).append("#").append(clusterName); + StringBuilder builder = new StringBuilder(userName); + if (tenantName != null && clusterName != null) { + builder.append("@").append(tenantName).append("#").append(clusterName); + } + return builder.toString(); } } diff --git a/oceanbasev10writer/src/main/java/com/alibaba/datax/plugin/writer/oceanbasev10writer/task/ConcurrentTableWriterTask.java b/oceanbasev10writer/src/main/java/com/alibaba/datax/plugin/writer/oceanbasev10writer/task/ConcurrentTableWriterTask.java index 084acbeb..82b16923 100644 --- a/oceanbasev10writer/src/main/java/com/alibaba/datax/plugin/writer/oceanbasev10writer/task/ConcurrentTableWriterTask.java +++ b/oceanbasev10writer/src/main/java/com/alibaba/datax/plugin/writer/oceanbasev10writer/task/ConcurrentTableWriterTask.java @@ -1,7 +1,27 @@ package com.alibaba.datax.plugin.writer.oceanbasev10writer.task; +import com.alibaba.datax.common.element.Column; +import com.alibaba.datax.common.element.Record; +import com.alibaba.datax.common.exception.DataXException; +import com.alibaba.datax.common.plugin.RecordReceiver; +import com.alibaba.datax.common.plugin.TaskPluginCollector; +import com.alibaba.datax.common.util.Configuration; +import com.alibaba.datax.plugin.rdbms.util.DBUtil; +import com.alibaba.datax.plugin.rdbms.util.DBUtilErrorCode; +import com.alibaba.datax.plugin.rdbms.util.DataBaseType; +import com.alibaba.datax.plugin.rdbms.writer.CommonRdbmsWriter; +import com.alibaba.datax.plugin.writer.oceanbasev10writer.Config; +import com.alibaba.datax.plugin.writer.oceanbasev10writer.ext.ConnHolder; +import com.alibaba.datax.plugin.writer.oceanbasev10writer.ext.ObClientConnHolder; +import com.alibaba.datax.plugin.writer.oceanbasev10writer.ext.ServerConnectInfo; +import com.alibaba.datax.plugin.writer.oceanbasev10writer.util.ObWriterUtils; +import com.alipay.oceanbase.obproxy.data.TableEntryKey; +import com.alipay.oceanbase.obproxy.util.ObPartitionIdCalculator; +import org.apache.commons.lang3.tuple.Pair; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + import java.sql.Connection; -//import java.sql.PreparedStatement; import java.sql.PreparedStatement; import java.sql.SQLException; import java.util.ArrayList; @@ -16,27 +36,7 @@ import java.util.concurrent.locks.Condition; import java.util.concurrent.locks.Lock; import java.util.concurrent.locks.ReentrantLock; -import com.alibaba.datax.common.element.Column; -import com.alibaba.datax.plugin.writer.oceanbasev10writer.ext.ObClientConnHolder; -import org.apache.commons.lang3.tuple.Pair; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import com.alibaba.datax.common.element.Record; -import com.alibaba.datax.common.exception.DataXException; -import com.alibaba.datax.common.plugin.RecordReceiver; -import com.alibaba.datax.common.plugin.TaskPluginCollector; -import com.alibaba.datax.common.util.Configuration; -import com.alibaba.datax.plugin.rdbms.util.DBUtil; -import com.alibaba.datax.plugin.rdbms.util.DBUtilErrorCode; -import com.alibaba.datax.plugin.rdbms.util.DataBaseType; -import com.alibaba.datax.plugin.rdbms.writer.CommonRdbmsWriter; -import com.alibaba.datax.plugin.writer.oceanbasev10writer.Config; -import com.alibaba.datax.plugin.writer.oceanbasev10writer.ext.ConnHolder; -import com.alibaba.datax.plugin.writer.oceanbasev10writer.ext.ServerConnectInfo; -import com.alibaba.datax.plugin.writer.oceanbasev10writer.util.ObWriterUtils; -import com.alipay.oceanbase.obproxy.data.TableEntryKey; -import com.alipay.oceanbase.obproxy.util.ObPartitionIdCalculator; +//import java.sql.PreparedStatement; public class ConcurrentTableWriterTask extends CommonRdbmsWriter.Task { private static final Logger LOG = LoggerFactory.getLogger(ConcurrentTableWriterTask.class); @@ -62,6 +62,7 @@ public class ConcurrentTableWriterTask extends CommonRdbmsWriter.Task { private ObPartitionIdCalculator partCalculator = null; private HashMap> groupInsertValues; + List unknownPartRecords = new ArrayList(); // private List unknownPartRecords; private List partitionKeyIndexes; @@ -75,7 +76,6 @@ public class ConcurrentTableWriterTask extends CommonRdbmsWriter.Task { private Condition condition = lock.newCondition(); private long startTime; - private boolean isOb2 = false; private String obWriteMode = "update"; private boolean isOracleCompatibleMode = false; private String obUpdateColumns = null; @@ -104,10 +104,14 @@ public class ConcurrentTableWriterTask extends CommonRdbmsWriter.Task { connectInfo.getFullUserName(), connectInfo.password); checkConnHolder.initConnection(); if (isOracleCompatibleMode) { - connectInfo.databaseName = connectInfo.databaseName.toUpperCase(); - table = table.toUpperCase(); - LOG.info(String.format("this is oracle compatible mode, change database to %s, table to %s", - connectInfo.databaseName, table)); + connectInfo.databaseName = connectInfo.databaseName.toUpperCase(); + //在转义的情况下不翻译 + if (!(table.startsWith("\"") && table.endsWith("\""))) { + table = table.toUpperCase(); + } + + LOG.info(String.format("this is oracle compatible mode, change database to %s, table to %s", + connectInfo.databaseName, table)); } if (config.getBool(Config.USE_PART_CALCULATOR, Config.DEFAULT_USE_PART_CALCULATOR)) { @@ -125,12 +129,6 @@ public class ConcurrentTableWriterTask extends CommonRdbmsWriter.Task { concurrentWriter = new ConcurrentTableWriter(config, connectInfo, writeRecordSql); allTaskInQueue = false; } - - String version = config.getString(Config.OB_VERSION); - int pIdx = version.lastIndexOf('.'); - if ((Float.valueOf(version.substring(0, pIdx)) >= 2.1f)) { - isOb2 = true; - } } private void initPartCalculator(ServerConnectInfo connectInfo) { @@ -150,7 +148,7 @@ public class ConcurrentTableWriterTask extends CommonRdbmsWriter.Task { partCalculator = new ObPartitionIdCalculator(connectInfo.ipPort, tableEntryKey); } catch (Exception ex) { ++retry; - LOG.warn("create new part calculator failed, retry ... {}", retry, ex); + LOG.warn("create new part calculator failed, retry {}: {}", retry, ex.getMessage()); } } while (partCalculator == null && retry < 3); // try 3 times } @@ -289,20 +287,15 @@ public class ConcurrentTableWriterTask extends CommonRdbmsWriter.Task { } private void addLeftRecords() { + //不需要刷新Cache,已经是最后一批数据了 for (List groupValues : groupInsertValues.values()) { if (groupValues.size() > 0 ) { - int retry = 0; - while (true) { - try { - concurrentWriter.addBatchRecords(groupValues); - break; - } catch (InterruptedException e) { - retry++; - LOG.info("Concurrent table writer is interrupted, retry {}", retry); - } - } + addRecordsToWriteQueue(groupValues); } } + if (unknownPartRecords.size() > 0) { + addRecordsToWriteQueue(unknownPartRecords); + } } private void addRecordToCache(final Record record) { @@ -313,7 +306,7 @@ public class ConcurrentTableWriterTask extends CommonRdbmsWriter.Task { LOG.warn("fail to get partition id: " + e1.getMessage() + ", record: " + record); } - if (partId == null && isOb2) { + if (partId == null) { LOG.debug("fail to calculate parition id, just put into the default buffer."); partId = Long.MAX_VALUE; } @@ -326,41 +319,40 @@ public class ConcurrentTableWriterTask extends CommonRdbmsWriter.Task { } groupValues.add(record); if (groupValues.size() >= batchSize) { - int i = 0; - while (true) { - if (i > 0) { - LOG.info("retry add batch record the {} times", i); - } - try { - concurrentWriter.addBatchRecords(groupValues); - printEveryTime(); - break; - } catch (InterruptedException e) { - LOG.info("Concurrent table writer is interrupted"); - } - } - groupValues = new ArrayList(batchSize); + groupValues = addRecordsToWriteQueue(groupValues); groupInsertValues.put(partId, groupValues); } } else { - LOG.warn("add unknown part record {}", record); - List unknownPartRecords = new ArrayList(); + LOG.debug("add unknown part record {}", record); unknownPartRecords.add(record); - int i = 0; - while (true) { - if (i > 0) { - LOG.info("retry add batch record the {} times", i); - } - try { - concurrentWriter.addBatchRecords(unknownPartRecords); - break; - } catch (InterruptedException e) { - LOG.info("Concurrent table writer is interrupted"); - } + if (unknownPartRecords.size() >= batchSize) { + unknownPartRecords = addRecordsToWriteQueue(unknownPartRecords); } + } } + /** + * + * @param records + * @return 返回一个新的Cache用于存储接下来的数据 + */ + private List addRecordsToWriteQueue(List records) { + int i = 0; + while (true) { + if (i > 0) { + LOG.info("retry add batch record the {} times", i); + } + try { + concurrentWriter.addBatchRecords(records); + break; + } catch (InterruptedException e) { + i++; + LOG.info("Concurrent table writer is interrupted"); + } + } + return new ArrayList(batchSize); + } private void checkMemStore() { Connection checkConn = checkConnHolder.reconnect(); long now = System.currentTimeMillis(); diff --git a/oceanbasev10writer/src/main/java/com/alibaba/datax/plugin/writer/oceanbasev10writer/task/InsertTask.java b/oceanbasev10writer/src/main/java/com/alibaba/datax/plugin/writer/oceanbasev10writer/task/InsertTask.java index 522d91a5..968908ca 100644 --- a/oceanbasev10writer/src/main/java/com/alibaba/datax/plugin/writer/oceanbasev10writer/task/InsertTask.java +++ b/oceanbasev10writer/src/main/java/com/alibaba/datax/plugin/writer/oceanbasev10writer/task/InsertTask.java @@ -65,6 +65,7 @@ public class InsertTask implements Runnable { this.writeRecordSql = writeRecordSql; this.isStop = false; this.deleteMeta = deleteMeta; + connHolder.initConnection(); } void setWriterTask(ConcurrentTableWriterTask writerTask) { @@ -151,7 +152,6 @@ public class InsertTask implements Runnable { public void doMultiInsert(final List buffer, final boolean printCost, final long restrict) { checkMemstore(); - connHolder.initConnection(); Connection conn = connHolder.getConn(); boolean success = false; long cost = 0; @@ -165,7 +165,6 @@ public class InsertTask implements Runnable { } catch (InterruptedException e) { LOG.info("thread interrupted ..., ignore"); } - connHolder.initConnection(); conn = connHolder.getConn(); LOG.info("retry {}, start do batch insert, size={}", i, buffer.size()); checkMemstore(); diff --git a/oceanbasev10writer/src/main/java/com/alibaba/datax/plugin/writer/oceanbasev10writer/util/ObWriterUtils.java b/oceanbasev10writer/src/main/java/com/alibaba/datax/plugin/writer/oceanbasev10writer/util/ObWriterUtils.java index 368c3d17..edc4b236 100644 --- a/oceanbasev10writer/src/main/java/com/alibaba/datax/plugin/writer/oceanbasev10writer/util/ObWriterUtils.java +++ b/oceanbasev10writer/src/main/java/com/alibaba/datax/plugin/writer/oceanbasev10writer/util/ObWriterUtils.java @@ -11,15 +11,47 @@ import org.slf4j.LoggerFactory; import java.sql.*; import java.util.*; -import java.util.stream.Collectors; public class ObWriterUtils { - protected static final Logger LOG = LoggerFactory.getLogger(Task.class); + + private static final String MYSQL_KEYWORDS = "ACCESSIBLE,ACCOUNT,ACTION,ADD,AFTER,AGAINST,AGGREGATE,ALGORITHM,ALL,ALTER,ALWAYS,ANALYSE,AND,ANY,AS,ASC,ASCII,ASENSITIVE,AT,AUTO_INCREMENT,AUTOEXTEND_SIZE,AVG,AVG_ROW_LENGTH,BACKUP,BEFORE,BEGIN,BETWEEN,BIGINT,BINARY,BINLOG,BIT,BLOB,BLOCK,BOOL,BOOLEAN,BOTH,BTREE,BY,BYTE,CACHE,CALL,CASCADE,CASCADED,CASE,CATALOG_NAME,CHAIN,CHANGE,CHANGED,CHANNEL,CHAR,CHARACTER,CHARSET,CHECK,CHECKSUM,CIPHER,CLASS_ORIGIN,CLIENT,CLOSE,COALESCE,CODE,COLLATE,COLLATION,COLUMN,COLUMN_FORMAT,COLUMN_NAME,COLUMNS,COMMENT,COMMIT,COMMITTED,COMPACT,COMPLETION,COMPRESSED,COMPRESSION,CONCURRENT,CONDITION,CONNECTION,CONSISTENT,CONSTRAINT,CONSTRAINT_CATALOG,CONSTRAINT_NAME,CONSTRAINT_SCHEMA,CONTAINS,CONTEXT,CONTINUE,CONVERT,CPU,CREATE,CROSS,CUBE,CURRENT,CURRENT_DATE,CURRENT_TIME,CURRENT_TIMESTAMP,CURRENT_USER,CURSOR,CURSOR_NAME,DATA,DATABASE,DATABASES,DATAFILE,DATE,DATETIME,DAY,DAY_HOUR,DAY_MICROSECOND,DAY_MINUTE,DAY_SECOND,DEALLOCATE,DEC,DECIMAL,DECLARE,DEFAULT,DEFAULT_AUTH,DEFINER,DELAY_KEY_WRITE,DELAYED,DELETE,DES_KEY_FILE,DESC,DESCRIBE,DETERMINISTIC,DIAGNOSTICS,DIRECTORY,DISABLE,DISCARD,DISK,DISTINCT,DISTINCTROW,DIV,DO,DOUBLE,DROP,DUAL,DUMPFILE,DUPLICATE,DYNAMIC,EACH,ELSE,ELSEIF,ENABLE,ENCLOSED,ENCRYPTION,END,ENDS,ENGINE,ENGINES,ENUM,ERROR,ERRORS,ESCAPE,ESCAPED,EVENT,EVENTS,EVERY,EXCHANGE,EXECUTE,EXISTS,EXIT,EXPANSION,EXPIRE,EXPLAIN,EXPORT,EXTENDED,EXTENT_SIZE,FAST,FAULTS,FETCH,FIELDS,FILE,FILE_BLOCK_SIZE,FILTER,FIRST,FIXED,FLOAT,FLOAT4,FLOAT8,FLUSH,FOLLOWS,FOR,FORCE,FOREIGN,FORMAT,FOUND,FROM,FULL,FULLTEXT,FUNCTION,GENERAL,GENERATED,GEOMETRY,GEOMETRYCOLLECTION,GET,GET_FORMAT,GLOBAL,GRANT,GRANTS,GROUP,GROUP_REPLICATION,HANDLER,HASH,HAVING,HELP,HIGH_PRIORITY,HOST,HOSTS,HOUR,HOUR_MICROSECOND,HOUR_MINUTE,HOUR_SECOND,IDENTIFIED,IF,IGNORE,IGNORE_SERVER_IDS,IMPORT,IN,INDEX,INDEXES,INFILE,INITIAL_SIZE,INNER,INOUT,INSENSITIVE,INSERT,INSERT_METHOD,INSTALL,INSTANCE,INT,INT1,INT2,INT3,INT4,INT8,INTEGER,INTERVAL,INTO,INVOKER,IO,IO_AFTER_GTIDS,IO_BEFORE_GTIDS,IO_THREAD,IPC,IS,ISOLATION,ISSUER,ITERATE,JOIN,JSON,KEY,KEY_BLOCK_SIZE,KEYS,KILL,LANGUAGE,LAST,LEADING,LEAVE,LEAVES,LEFT,LESS,LEVEL,LIKE,LIMIT,LINEAR,LINES,LINESTRING,LIST,LOAD,LOCAL,LOCALTIME,LOCALTIMESTAMP,LOCK,LOCKS,LOGFILE,LOGS,LONG,LONGBLOB,LONGTEXT,LOOP,LOW_PRIORITY,MASTER,MASTER_AUTO_POSITION,MASTER_BIND,MASTER_CONNECT_RETRY,MASTER_DELAY,MASTER_HEARTBEAT_PERIOD,MASTER_HOST,MASTER_LOG_FILE,MASTER_LOG_POS,MASTER_PASSWORD,MASTER_PORT,MASTER_RETRY_COUNT,MASTER_SERVER_ID,MASTER_SSL,MASTER_SSL_CA,MASTER_SSL_CAPATH,MASTER_SSL_CERT,MASTER_SSL_CIPHER,MASTER_SSL_CRL,MASTER_SSL_CRLPATH,MASTER_SSL_KEY,MASTER_SSL_VERIFY_SERVER_CERT,MASTER_TLS_VERSION,MASTER_USER,MATCH,MAX_CONNECTIONS_PER_HOUR,MAX_QUERIES_PER_HOUR,MAX_ROWS,MAX_SIZE,MAX_STATEMENT_TIME,MAX_UPDATES_PER_HOUR,MAX_USER_CONNECTIONS,MAXVALUE,MEDIUM,MEDIUMBLOB,MEDIUMINT,MEDIUMTEXT,MEMORY,MERGE,MESSAGE_TEXT,MICROSECOND,MIDDLEINT,MIGRATE,MIN_ROWS,MINUTE,MINUTE_MICROSECOND,MINUTE_SECOND,MOD,MODE,MODIFIES,MODIFY,MONTH,MULTILINESTRING,MULTIPOINT,MULTIPOLYGON,MUTEX,MYSQL_ERRNO,NAME,NAMES,NATIONAL,NATURAL,NCHAR,NDB,NDBCLUSTER,NEVER,NEW,NEXT,NO,NO_WAIT,NO_WRITE_TO_BINLOG,NODEGROUP,NONBLOCKING,NONE,NOT,NULL,NUMBER,NUMERIC,NVARCHAR,OFFSET,OLD_PASSWORD,ON,ONE,ONLY,OPEN,OPTIMIZE,OPTIMIZER_COSTS,OPTION,OPTIONALLY,OPTIONS,OR,ORDER,OUT,OUTER,OUTFILE,OWNER,PACK_KEYS,PAGE,PARSE_GCOL_EXPR,PARSER,PARTIAL,PARTITION,PARTITIONING,PARTITIONS,PASSWORD,PHASE,PLUGIN,PLUGIN_DIR,PLUGINS,POINT,POLYGON,PORT,PRECEDES,PRECISION,PREPARE,PRESERVE,PREV,PRIMARY,PRIVILEGES,PROCEDURE,PROCESSLIST,PROFILE,PROFILES,PROXY,PURGE,QUARTER,QUERY,QUICK,RANGE,READ,READ_ONLY,READ_WRITE,READS,REAL,REBUILD,RECOVER,REDO_BUFFER_SIZE,REDOFILE,REDUNDANT,REFERENCES,REGEXP,RELAY,RELAY_LOG_FILE,RELAY_LOG_POS,RELAY_THREAD,RELAYLOG,RELEASE,RELOAD,REMOVE,RENAME,REORGANIZE,REPAIR,REPEAT,REPEATABLE,REPLACE,REPLICATE_DO_DB,REPLICATE_DO_TABLE,REPLICATE_IGNORE_DB,REPLICATE_IGNORE_TABLE,REPLICATE_REWRITE_DB,REPLICATE_WILD_DO_TABLE,REPLICATE_WILD_IGNORE_TABLE,REPLICATION,REQUIRE,RESET,RESIGNAL,RESTORE,RESTRICT,RESUME,RETURN,RETURNED_SQLSTATE,RETURNS,REVERSE,REVOKE,RIGHT,RLIKE,ROLLBACK,ROLLUP,ROTATE,ROUTINE,ROW,ROW_COUNT,ROW_FORMAT,ROWS,RTREE,SAVEPOINT,SCHEDULE,SCHEMA,SCHEMA_NAME,SCHEMAS,SECOND,SECOND_MICROSECOND,SECURITY,SELECT,SENSITIVE,SEPARATOR,SERIAL,SERIALIZABLE,SERVER,SESSION,SET,SHARE,SHOW,SHUTDOWN,SIGNAL,SIGNED,SIMPLE,SLAVE,SLOW,SMALLINT,SNAPSHOT,SOCKET,SOME,SONAME,SOUNDS,SOURCE,SPATIAL,SPECIFIC,SQL,SQL_AFTER_GTIDS,SQL_AFTER_MTS_GAPS,SQL_BEFORE_GTIDS,SQL_BIG_RESULT,SQL_BUFFER_RESULT,SQL_CACHE,SQL_CALC_FOUND_ROWS,SQL_NO_CACHE,SQL_SMALL_RESULT,SQL_THREAD,SQL_TSI_DAY,SQL_TSI_HOUR,SQL_TSI_MINUTE,SQL_TSI_MONTH,SQL_TSI_QUARTER,SQL_TSI_SECOND,SQL_TSI_WEEK,SQL_TSI_YEAR,SQLEXCEPTION,SQLSTATE,SQLWARNING,SSL,STACKED,START,STARTING,STARTS,STATS_AUTO_RECALC,STATS_PERSISTENT,STATS_SAMPLE_PAGES,STATUS,STOP,STORAGE,STORED,STRAIGHT_JOIN,STRING,SUBCLASS_ORIGIN,SUBJECT,SUBPARTITION,SUBPARTITIONS,SUPER,SUSPEND,SWAPS,SWITCHES,TABLE,TABLE_CHECKSUM,TABLE_NAME,TABLES,TABLESPACE,TEMPORARY,TEMPTABLE,TERMINATED,TEXT,THAN,THEN,TIME,TIMESTAMP,TIMESTAMPADD,TIMESTAMPDIFF,TINYBLOB,TINYINT,TINYTEXT,TO,TRAILING,TRANSACTION,TRIGGER,TRIGGERS,TRUNCATE,TYPE,TYPES,UNCOMMITTED,UNDEFINED,UNDO,UNDO_BUFFER_SIZE,UNDOFILE,UNICODE,UNINSTALL,UNION,UNIQUE,UNKNOWN,UNLOCK,UNSIGNED,UNTIL,UPDATE,UPGRADE,USAGE,USE,USE_FRM,USER,USER_RESOURCES,USING,UTC_DATE,UTC_TIME,UTC_TIMESTAMP,VALIDATION,VALUE,VALUES,VARBINARY,VARCHAR,VARCHARACTER,VARIABLES,VARYING,VIEW,VIRTUAL,WAIT,WARNINGS,WEEK,WEIGHT_STRING,WHEN,WHERE,WHILE,WITH,WITHOUT,WORK,WRAPPER,WRITE,X509,XA,XID,XML,XOR,YEAR,YEAR_MONTH,ZEROFILL,FALSE,TRUE"; + private static final String ORACLE_KEYWORDS = "ACCESS,ADD,ALL,ALTER,AND,ANY,ARRAYLEN,AS,ASC,AUDIT,BETWEEN,BY,CHAR,CHECK,CLUSTER,COLUMN,COMMENT,COMPRESS,CONNECT,CREATE,CURRENT,DATE,DECIMAL,DEFAULT,DELETE,DESC,DISTINCT,DROP,ELSE,EXCLUSIVE,EXISTS,FILE,FLOAT,FOR,FROM,GRANT,GROUP,HAVING,IDENTIFIED,IMMEDIATE,IN,INCREMENT,INDEX,INITIAL,INSERT,INTEGER,INTERSECT,INTO,IS,LEVEL,LIKE,LOCK,LONG,MAXEXTENTS,MINUS,MODE,MODIFY,NOAUDIT,NOCOMPRESS,NOT,NOTFOUND,NOWAIT,NULL,NUMBER,OF,OFFLINE,ON,ONLINE,OPTION,OR,ORDER,PCTFREE,PRIOR,PRIVILEGES,PUBLIC,RAW,RENAME,RESOURCE,REVOKE,ROW,ROWID,ROWLABEL,ROWNUM,ROWS,SELECT,SESSION,SET,SHARE,SIZE,SMALLINT,SQLBUF,START,SUCCESSFUL,SYNONYM,TABLE,THEN,TO,TRIGGER,UID,UNION,UNIQUE,UPDATE,USER,VALIDATE,VALUES,VARCHAR,VARCHAR2,VIEW,WHENEVER,WHERE,WITH"; private static String CHECK_MEMSTORE = "select 1 from %s.gv$memstore t where t.total>t.mem_limit * ?"; - + private static Set databaseKeywords; private static String compatibleMode = null; + protected static final Logger LOG = LoggerFactory.getLogger(Task.class); + private static Set keywordsFromString2HashSet(final String keywords) { + return new HashSet(Arrays.asList(keywords.split(","))); + } + public static String escapeDatabaseKeyword(String keyword) { + if (databaseKeywords == null) { + if (isOracleMode()) { + databaseKeywords = keywordsFromString2HashSet(ORACLE_KEYWORDS); + } else { + databaseKeywords = keywordsFromString2HashSet(MYSQL_KEYWORDS); + } + } + char escapeChar = isOracleMode() ? '"' : '`'; + if (databaseKeywords.contains(keyword.toUpperCase())) { + keyword = escapeChar + keyword + escapeChar; + } + return keyword; + } + + public static void escapeDatabaseKeyword(List keywords) { + for (int i = 0; i < keywords.size(); i++) { + keywords.set(i, escapeDatabaseKeyword(keywords.get(i))); + } + } + public static Boolean isEscapeMode(String keyword){ + if(isOracleMode()){ + return keyword.startsWith("\"") && keyword.endsWith("\""); + }else{ + return keyword.startsWith("`") && keyword.endsWith("`"); + } + } public static boolean isMemstoreFull(Connection conn, double memstoreThreshold) { PreparedStatement ps = null; ResultSet rs = null; @@ -70,7 +102,11 @@ public class ObWriterUtils { } private static int[] getColumnIndex(List columnsInIndex, List allColumns) { - allColumns = allColumns.stream().map(String::toUpperCase).collect(Collectors.toList()); + for (int i = 0; i < allColumns.size(); i++) { + if (!ObWriterUtils.isEscapeMode(allColumns.get(i))) { + allColumns.set(i, allColumns.get(i).toUpperCase()); + } + } int[] colIdx = new int[columnsInIndex.size()]; for (int i = 0; i < columnsInIndex.size(); i++) { int index = allColumns.indexOf(columnsInIndex.get(i)); @@ -122,7 +158,11 @@ public class ObWriterUtils { rs = stmt.executeQuery(sql); while (rs.next()) { String keyName = rs.getString("Key_name"); - String columnName = StringUtils.upperCase(rs.getString("Column_name")); + String columnName = rs.getString("Column_name"); + columnName= escapeDatabaseKeyword(columnName); + if(!ObWriterUtils.isEscapeMode(columnName)){ + columnName = columnName.toUpperCase(); + } List s = uniqueKeys.get(keyName); if (s == null) { s = new ArrayList(); @@ -135,6 +175,7 @@ public class ObWriterUtils { } finally { asyncClose(rs, stmt, null); } + //ObWriterUtils.escapeDatabaseKeywords(uniqueKeys); return uniqueKeys; } @@ -291,6 +332,7 @@ public class ObWriterUtils { * @param e * @return */ + public static boolean isFatalError(SQLException e) { String sqlState = e.getSQLState(); if (StringUtils.startsWith(sqlState, "08")) { diff --git a/odpsreader/pom.xml b/odpsreader/pom.xml index 5762a57b..3f2c4acb 100755 --- a/odpsreader/pom.xml +++ b/odpsreader/pom.xml @@ -36,18 +36,18 @@ guava 16.0.1 - - org.bouncycastle - bcprov-jdk15on - 1.52 - system - ${basedir}/src/main/libs/bcprov-jdk15on-1.52.jar - - - com.aliyun.odps - odps-sdk-core - 0.20.7-public - + + org.xerial + sqlite-jdbc + 3.34.0 + + + + + com.aliyun.odps + odps-sdk-core + 0.38.4-public + org.mockito @@ -87,29 +87,22 @@ 1.4.10 test - - org.mockito - mockito-core - 1.8.5 - test - - - org.powermock - powermock-api-mockito - 1.4.10 - test - - - - org.powermock - powermock-module-junit4 - 1.4.10 - test + commons-codec + commons-codec + 1.8 + + + src/main/java + + **/*.properties + + + diff --git a/odpsreader/src/main/assembly/package.xml b/odpsreader/src/main/assembly/package.xml index 9ec3309e..db659a17 100755 --- a/odpsreader/src/main/assembly/package.xml +++ b/odpsreader/src/main/assembly/package.xml @@ -23,13 +23,6 @@ plugin/reader/odpsreader - - src/main/libs - - *.* - - plugin/reader/odpsreader/libs - diff --git a/odpsreader/src/main/java/com/alibaba/datax/plugin/reader/odpsreader/ColumnType.java b/odpsreader/src/main/java/com/alibaba/datax/plugin/reader/odpsreader/ColumnType.java index eb674a7f..1c771d3e 100644 --- a/odpsreader/src/main/java/com/alibaba/datax/plugin/reader/odpsreader/ColumnType.java +++ b/odpsreader/src/main/java/com/alibaba/datax/plugin/reader/odpsreader/ColumnType.java @@ -3,20 +3,6 @@ package com.alibaba.datax.plugin.reader.odpsreader; public enum ColumnType { PARTITION, NORMAL, CONSTANT, UNKNOWN, ; - @Override - public String toString() { - switch (this) { - case PARTITION: - return "partition"; - case NORMAL: - return "normal"; - case CONSTANT: - return "constant"; - default: - return "unknown"; - } - } - public static ColumnType asColumnType(String columnTypeString) { if ("partition".equals(columnTypeString)) { return PARTITION; diff --git a/odpsreader/src/main/java/com/alibaba/datax/plugin/reader/odpsreader/Constant.java b/odpsreader/src/main/java/com/alibaba/datax/plugin/reader/odpsreader/Constant.java index c3c674dd..cf34762d 100755 --- a/odpsreader/src/main/java/com/alibaba/datax/plugin/reader/odpsreader/Constant.java +++ b/odpsreader/src/main/java/com/alibaba/datax/plugin/reader/odpsreader/Constant.java @@ -14,22 +14,13 @@ public class Constant { public static final String PARTITION_SPLIT_MODE = "partition"; - public static final String DEFAULT_ACCOUNT_TYPE = "aliyun"; - - public static final String TAOBAO_ACCOUNT_TYPE = "taobao"; - // 常量字段用COLUMN_CONSTANT_FLAG 首尾包住即可 public final static String COLUMN_CONSTANT_FLAG = "'"; - /** - * 以下是获取accesskey id 需要用到的常量值 - */ - public static final String SKYNET_ACCESSID = "SKYNET_ACCESSID"; - - public static final String SKYNET_ACCESSKEY = "SKYNET_ACCESSKEY"; - public static final String PARTITION_COLUMNS = "partitionColumns"; public static final String PARSED_COLUMNS = "parsedColumns"; - + + public static final String PARTITION_FILTER_HINT = "/*query*/"; + } diff --git a/odpsreader/src/main/java/com/alibaba/datax/plugin/reader/odpsreader/InternalColumnInfo.java b/odpsreader/src/main/java/com/alibaba/datax/plugin/reader/odpsreader/InternalColumnInfo.java new file mode 100644 index 00000000..b5a15f1d --- /dev/null +++ b/odpsreader/src/main/java/com/alibaba/datax/plugin/reader/odpsreader/InternalColumnInfo.java @@ -0,0 +1,24 @@ +package com.alibaba.datax.plugin.reader.odpsreader; + +public class InternalColumnInfo { + + private String columnName; + + private ColumnType columnType; + + public String getColumnName() { + return columnName; + } + + public void setColumnName(String columnName) { + this.columnName = columnName; + } + + public ColumnType getColumnType() { + return columnType; + } + + public void setColumnType(ColumnType columnType) { + this.columnType = columnType; + } +} diff --git a/odpsreader/src/main/java/com/alibaba/datax/plugin/reader/odpsreader/Key.java b/odpsreader/src/main/java/com/alibaba/datax/plugin/reader/odpsreader/Key.java index 9537cb93..6f8c7d92 100755 --- a/odpsreader/src/main/java/com/alibaba/datax/plugin/reader/odpsreader/Key.java +++ b/odpsreader/src/main/java/com/alibaba/datax/plugin/reader/odpsreader/Key.java @@ -5,6 +5,8 @@ public class Key { public final static String ACCESS_ID = "accessId"; public final static String ACCESS_KEY = "accessKey"; + + public final static String SECURITY_TOKEN = "securityToken"; public static final String PROJECT = "project"; @@ -22,13 +24,19 @@ public class Key { // 当值为:partition 则只切分到分区;当值为:record,则当按照分区切分后达不到adviceNum时,继续按照record切分 public final static String SPLIT_MODE = "splitMode"; - // 账号类型,默认为aliyun,也可能为taobao等其他类型 - public final static String ACCOUNT_TYPE = "accountType"; - public final static String PACKAGE_AUTHORIZED_PROJECT = "packageAuthorizedProject"; public final static String IS_COMPRESS = "isCompress"; public final static String MAX_RETRY_TIME = "maxRetryTime"; + + // 分区不存在时 + public final static String SUCCESS_ON_NO_PATITION="successOnNoPartition"; + // preSql + public final static String PRE_SQL="preSql"; + + // postSql + public final static String POST_SQL="postSql"; + } diff --git a/odpsreader/src/main/java/com/alibaba/datax/plugin/reader/odpsreader/LocalStrings.properties b/odpsreader/src/main/java/com/alibaba/datax/plugin/reader/odpsreader/LocalStrings.properties new file mode 100644 index 00000000..ef183002 --- /dev/null +++ b/odpsreader/src/main/java/com/alibaba/datax/plugin/reader/odpsreader/LocalStrings.properties @@ -0,0 +1,64 @@ +description.DATAX_R_ODPS_001=\u7F3A\u5C11\u5FC5\u586B\u53C2\u6570 +description.DATAX_R_ODPS_002=\u914D\u7F6E\u503C\u4E0D\u5408\u6CD5 +description.DATAX_R_ODPS_003=\u521B\u5EFAODPS Session\u5931\u8D25 +description.DATAX_R_ODPS_004=\u83B7\u53D6ODPS Session\u5931\u8D25 +description.DATAX_R_ODPS_005=\u8BFB\u53D6ODPS\u6570\u636E\u5931\u8D25 +description.DATAX_R_ODPS_006=\u83B7\u53D6AK\u5931\u8D25 +description.DATAX_R_ODPS_007=\u8BFB\u53D6\u6570\u636E\u53D1\u751F\u5F02\u5E38 +description.DATAX_R_ODPS_008=\u6253\u5F00RecordReader\u5931\u8D25 +description.DATAX_R_ODPS_009=ODPS\u9879\u76EE\u4E0D\u5B58\u5728 +description.DATAX_R_ODPS_010=\u8868\u4E0D\u5B58\u5728 +description.DATAX_R_ODPS_011=AK\u4E0D\u5B58\u5728 +description.DATAX_R_ODPS_012=AK\u975E\u6CD5 +description.DATAX_R_ODPS_013=AK\u62D2\u7EDD\u8BBF\u95EE +description.DATAX_R_ODPS_014=splitMode\u914D\u7F6E\u9519\u8BEF +description.DATAX_R_ODPS_015=ODPS\u8D26\u53F7\u7C7B\u578B\u9519\u8BEF +description.DATAX_R_ODPS_016=\u4E0D\u652F\u6301\u89C6\u56FE +description.DATAX_R_ODPS_017=\u5206\u533A\u914D\u7F6E\u9519\u8BEF +description.DATAX_R_ODPS_018=\u5206\u533A\u4E0D\u5B58\u5728 +description.DATAX_R_ODPS_019=\u6267\u884CODPS SQL\u5931\u8D25 +description.DATAX_R_ODPS_020=\u6267\u884CODPS SQL\u53D1\u751F\u5F02\u5E38 + + +solution.DATAX_R_ODPS_001=\u8BF7\u4FEE\u6539\u914D\u7F6E\u6587\u4EF6 +solution.DATAX_R_ODPS_002=\u8BF7\u4FEE\u6539\u914D\u7F6E\u503C +solution.DATAX_R_ODPS_003=\u8BF7\u786E\u5B9A\u914D\u7F6E\u7684AK\u6216\u8054\u7CFBODPS\u7BA1\u7406\u5458 +solution.DATAX_R_ODPS_004=\u8BF7\u8054\u7CFBODPS\u7BA1\u7406\u5458 +solution.DATAX_R_ODPS_005=\u8BF7\u8054\u7CFBODPS\u7BA1\u7406\u5458 +solution.DATAX_R_ODPS_006=\u8BF7\u786E\u5B9A\u914D\u7F6E\u7684AK +solution.DATAX_R_ODPS_007=\u8BF7\u8054\u7CFBODPS\u7BA1\u7406\u5458 +solution.DATAX_R_ODPS_008=\u8BF7\u8054\u7CFBODPS\u7BA1\u7406\u5458 +solution.DATAX_R_ODPS_009=\u8BF7\u786E\u5B9A\u914D\u7F6E\u7684\u9879\u76EE\u540D +solution.DATAX_R_ODPS_010=\u8BF7\u786E\u5B9A\u914D\u7F6E\u7684\u8868\u540D +solution.DATAX_R_ODPS_011=\u8BF7\u786E\u5B9A\u914D\u7F6E\u7684AK +solution.DATAX_R_ODPS_012=\u8BF7\u4FEE\u6539AK +solution.DATAX_R_ODPS_013=\u8BF7\u786E\u5B9AAK\u5728\u9879\u76EE\u4E2D\u7684\u6743\u9650 +solution.DATAX_R_ODPS_014=\u8BF7\u4FEE\u6539splitMode\u503C +solution.DATAX_R_ODPS_015=\u8BF7\u4FEE\u6539\u8D26\u53F7\u7C7B\u578B +solution.DATAX_R_ODPS_016=\u8BF7\u4FEE\u6539\u914D\u7F6E\u6587\u4EF6 +solution.DATAX_R_ODPS_017=\u8BF7\u4FEE\u6539\u5206\u533A\u503C +solution.DATAX_R_ODPS_018=\u8BF7\u4FEE\u6539\u914D\u7F6E\u7684\u5206\u533A\u503C +solution.DATAX_R_ODPS_019=\u8BF7\u8054\u7CFBODPS\u7BA1\u7406\u5458 +solution.DATAX_R_ODPS_020=\u8BF7\u8054\u7CFBODPS\u7BA1\u7406\u5458 + +odpsreader.1=\u6E90\u5934\u8868:{0} \u662F\u865A\u62DF\u89C6\u56FE\uFF0CDataX \u4E0D\u652F\u6301\u8BFB\u53D6\u865A\u62DF\u89C6\u56FE. +odpsreader.2=\u60A8\u6240\u914D\u7F6E\u7684 splitMode:{0} \u4E0D\u6B63\u786E. splitMode \u4EC5\u5141\u8BB8\u914D\u7F6E\u4E3A record \u6216\u8005 partition. +odpsreader.3=\u5206\u533A\u4FE1\u606F\u6CA1\u6709\u914D\u7F6E.\u7531\u4E8E\u6E90\u5934\u8868:{0} \u4E3A\u5206\u533A\u8868, \u6240\u4EE5\u60A8\u9700\u8981\u914D\u7F6E\u5176\u62BD\u53D6\u7684\u8868\u7684\u5206\u533A\u4FE1\u606F. \u683C\u5F0F\u5F62\u5982:pt=hello,ds=hangzhou\uFF0C\u8BF7\u60A8\u53C2\u8003\u6B64\u683C\u5F0F\u4FEE\u6539\u8BE5\u914D\u7F6E\u9879. +odpsreader.4=\u5206\u533A\u4FE1\u606F\u914D\u7F6E\u9519\u8BEF.\u6E90\u5934\u8868:{0} \u867D\u7136\u4E3A\u5206\u533A\u8868, \u4F46\u5176\u5B9E\u9645\u5206\u533A\u503C\u5E76\u4E0D\u5B58\u5728. \u8BF7\u786E\u8BA4\u6E90\u5934\u8868\u5DF2\u7ECF\u751F\u6210\u8BE5\u5206\u533A\uFF0C\u518D\u8FDB\u884C\u6570\u636E\u62BD\u53D6. +odpsreader.5=\u5206\u533A\u914D\u7F6E\u9519\u8BEF\uFF0C\u6839\u636E\u60A8\u6240\u914D\u7F6E\u7684\u5206\u533A\u6CA1\u6709\u5339\u914D\u5230\u6E90\u5934\u8868\u4E2D\u7684\u5206\u533A. \u6E90\u5934\u8868\u6240\u6709\u5206\u533A\u662F:[\n{0}\n], \u60A8\u914D\u7F6E\u7684\u5206\u533A\u662F:[\n{1}\n]. \u8BF7\u60A8\u6839\u636E\u5B9E\u9645\u60C5\u51B5\u518D\u4F5C\u51FA\u4FEE\u6539. +odpsreader.6=\u5206\u533A\u914D\u7F6E\u9519\u8BEF\uFF0C\u6E90\u5934\u8868:{0} \u4E3A\u975E\u5206\u533A\u8868, \u60A8\u4E0D\u80FD\u914D\u7F6E\u5206\u533A. \u8BF7\u60A8\u5220\u9664\u8BE5\u914D\u7F6E\u9879. +odpsreader.7=\u6E90\u5934\u8868:{0} \u7684\u6240\u6709\u5206\u533A\u5217\u662F:[{1}] +odpsreader.8=\u5206\u533A\u914D\u7F6E\u9519\u8BEF, \u60A8\u6240\u914D\u7F6E\u7684\u5206\u533A\u7EA7\u6570\u548C\u8BE5\u8868\u7684\u5B9E\u9645\u60C5\u51B5\u4E0D\u4E00\u81F4, \u6BD4\u5982\u5206\u533A:[{0}] \u662F {1} \u7EA7\u5206\u533A, \u800C\u5206\u533A:[{2}] \u662F {3} \u7EA7\u5206\u533A. DataX \u662F\u901A\u8FC7\u82F1\u6587\u9017\u53F7\u5224\u65AD\u60A8\u6240\u914D\u7F6E\u7684\u5206\u533A\u7EA7\u6570\u7684. \u6B63\u786E\u7684\u683C\u5F0F\u5F62\u5982\"pt=$'{bizdate'}, type=0\" \uFF0C\u8BF7\u60A8\u53C2\u8003\u793A\u4F8B\u4FEE\u6539\u8BE5\u914D\u7F6E\u9879. +odpsreader.9=\u5206\u533A\u914D\u7F6E\u9519\u8BEF, \u60A8\u6240\u914D\u7F6E\u7684\u5206\u533A:{0} \u7684\u7EA7\u6570:{1} \u4E0E\u60A8\u8981\u8BFB\u53D6\u7684 ODPS \u6E90\u5934\u8868\u7684\u5206\u533A\u7EA7\u6570:{2} \u4E0D\u76F8\u7B49. DataX \u662F\u901A\u8FC7\u82F1\u6587\u9017\u53F7\u5224\u65AD\u60A8\u6240\u914D\u7F6E\u7684\u5206\u533A\u7EA7\u6570\u7684.\u6B63\u786E\u7684\u683C\u5F0F\u5F62\u5982\"pt=$'{bizdate'}, type=0\" \uFF0C\u8BF7\u60A8\u53C2\u8003\u793A\u4F8B\u4FEE\u6539\u8BE5\u914D\u7F6E\u9879. +odpsreader.10=\u6E90\u5934\u8868:{0} \u7684\u6240\u6709\u5B57\u6BB5\u662F:[{1}] +odpsreader.11=\u8FD9\u662F\u4E00\u6761\u8B66\u544A\u4FE1\u606F\uFF0C\u60A8\u914D\u7F6E\u7684 ODPS \u8BFB\u53D6\u7684\u5217\u4E3A*\uFF0C\u8FD9\u662F\u4E0D\u63A8\u8350\u7684\u884C\u4E3A\uFF0C\u56E0\u4E3A\u5F53\u60A8\u7684\u8868\u5B57\u6BB5\u4E2A\u6570\u3001\u7C7B\u578B\u6709\u53D8\u52A8\u65F6\uFF0C\u53EF\u80FD\u5F71\u54CD\u4EFB\u52A1\u6B63\u786E\u6027\u751A\u81F3\u4F1A\u8FD0\u884C\u51FA\u9519. \u5EFA\u8BAE\u60A8\u628A\u6240\u6709\u9700\u8981\u62BD\u53D6\u7684\u5217\u90FD\u914D\u7F6E\u4E0A. +odpsreader.12=\u6E90\u5934\u8868:{0} \u7684\u5206\u533A:{1} \u6CA1\u6709\u5185\u5BB9\u53EF\u62BD\u53D6, \u8BF7\u60A8\u77E5\u6653. +odpsreader.13=\u6E90\u5934\u8868:{0} \u7684\u5206\u533A:{1} \u8BFB\u53D6\u884C\u6570\u4E3A\u8D1F\u6570, \u8BF7\u8054\u7CFB ODPS \u7BA1\u7406\u5458\u67E5\u770B\u8868\u72B6\u6001! +odpsreader.14=\u6E90\u5934\u8868:{0} \u7684\u5206\u533A:{1} \u8BFB\u53D6\u5931\u8D25, \u8BF7\u8054\u7CFB ODPS \u7BA1\u7406\u5458\u67E5\u770B\u9519\u8BEF\u8BE6\u60C5. + + +readerproxy.1=odps-read-exception, \u91CD\u8BD5\u7B2C{0}\u6B21 +readerproxy.2=\u60A8\u7684\u5206\u533A [{0}] \u89E3\u6790\u51FA\u73B0\u9519\u8BEF,\u89E3\u6790\u540E\u6B63\u786E\u7684\u914D\u7F6E\u65B9\u5F0F\u7C7B\u4F3C\u4E3A [ pt=1,dt=1 ]. +readerproxy.3=\u8868\u6240\u6709\u5206\u533A\u4FE1\u606F\u4E3A: {0} \u5176\u4E2D\u627E\u4E0D\u5230 [{1}] \u5BF9\u5E94\u7684\u5206\u533A\u503C. +readerproxy.4=\u60A8\u8BFB\u53D6\u5206\u533A [{0}] \u51FA\u73B0\u65E5\u671F\u8F6C\u6362\u5F02\u5E38, \u65E5\u671F\u7684\u5B57\u7B26\u4E32\u8868\u793A\u4E3A [{1}]. +readerproxy.5=DataX \u62BD\u53D6 ODPS \u6570\u636E\u4E0D\u652F\u6301\u5B57\u6BB5\u7C7B\u578B\u4E3A:[{0}]. \u76EE\u524D\u652F\u6301\u62BD\u53D6\u7684\u5B57\u6BB5\u7C7B\u578B\u6709\uFF1Abigint, boolean, datetime, double, decimal, string. \u60A8\u53EF\u4EE5\u9009\u62E9\u4E0D\u62BD\u53D6 DataX \u4E0D\u652F\u6301\u7684\u5B57\u6BB5\u6216\u8005\u8054\u7CFB ODPS \u7BA1\u7406\u5458\u5BFB\u6C42\u5E2E\u52A9. diff --git a/odpsreader/src/main/java/com/alibaba/datax/plugin/reader/odpsreader/OdpsReader.java b/odpsreader/src/main/java/com/alibaba/datax/plugin/reader/odpsreader/OdpsReader.java index f5cf10ca..615cee50 100755 --- a/odpsreader/src/main/java/com/alibaba/datax/plugin/reader/odpsreader/OdpsReader.java +++ b/odpsreader/src/main/java/com/alibaba/datax/plugin/reader/odpsreader/OdpsReader.java @@ -5,50 +5,42 @@ import com.alibaba.datax.common.plugin.RecordSender; import com.alibaba.datax.common.spi.Reader; import com.alibaba.datax.common.util.Configuration; import com.alibaba.datax.common.util.FilterUtil; -import com.alibaba.datax.plugin.reader.odpsreader.util.IdAndKeyUtil; -import com.alibaba.datax.plugin.reader.odpsreader.util.OdpsSplitUtil; -import com.alibaba.datax.plugin.reader.odpsreader.util.OdpsUtil; -import com.aliyun.odps.*; +import com.alibaba.datax.common.util.MessageSource; +import com.alibaba.datax.plugin.reader.odpsreader.util.*; +import com.alibaba.fastjson2.JSON; +import com.aliyun.odps.Column; +import com.aliyun.odps.Odps; +import com.aliyun.odps.Table; +import com.aliyun.odps.TableSchema; import com.aliyun.odps.tunnel.TableTunnel.DownloadSession; - +import com.aliyun.odps.type.TypeInfo; import org.apache.commons.lang3.StringUtils; -import org.apache.commons.lang3.tuple.MutablePair; -import org.apache.commons.lang3.tuple.Pair; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.HashSet; -import java.util.List; -import java.util.Map; -import java.util.Set; +import java.util.*; public class OdpsReader extends Reader { public static class Job extends Reader.Job { private static final Logger LOG = LoggerFactory - .getLogger(Job.class); - - private static boolean IS_DEBUG = LOG.isDebugEnabled(); + .getLogger(Job.class); + private static final MessageSource MESSAGE_SOURCE = MessageSource.loadResourceBundle(OdpsReaderErrorCode.class, Locale.ENGLISH, MessageSource.timeZone); private Configuration originalConfig; + private boolean successOnNoPartition; private Odps odps; private Table table; + @Override public void preCheck() { this.init(); + this.prepare(); } - @Override public void init() { this.originalConfig = super.getPluginJobConf(); - - //如果用户没有配置accessId/accessKey,尝试从环境变量获取 - String accountType = originalConfig.getString(Key.ACCOUNT_TYPE, Constant.DEFAULT_ACCOUNT_TYPE); - if (Constant.DEFAULT_ACCOUNT_TYPE.equalsIgnoreCase(accountType)) { - this.originalConfig = IdAndKeyUtil.parseAccessIdAndKey(this.originalConfig); - } + this.successOnNoPartition = this.originalConfig.getBool(Key.SUCCESS_ON_NO_PATITION, false); //检查必要的参数配置 OdpsUtil.checkNecessaryConfig(this.originalConfig); @@ -59,17 +51,21 @@ public class OdpsReader extends Reader { dealSplitMode(this.originalConfig); this.odps = OdpsUtil.initOdps(this.originalConfig); + + } + + private void initOdpsTableInfo() { String tableName = this.originalConfig.getString(Key.TABLE); String projectName = this.originalConfig.getString(Key.PROJECT); this.table = OdpsUtil.getTable(this.odps, projectName, tableName); this.originalConfig.set(Constant.IS_PARTITIONED_TABLE, - OdpsUtil.isPartitionedTable(table)); + OdpsUtil.isPartitionedTable(table)); boolean isVirtualView = this.table.isVirtualView(); if (isVirtualView) { throw DataXException.asDataXException(OdpsReaderErrorCode.VIRTUAL_VIEW_NOT_SUPPORT, - String.format("源头表:%s 是虚拟视图,DataX 不支持读取虚拟视图.", tableName)); + MESSAGE_SOURCE.message("odpsreader.1", tableName)); } this.dealPartition(this.table); @@ -79,11 +75,11 @@ public class OdpsReader extends Reader { private void dealSplitMode(Configuration originalConfig) { String splitMode = originalConfig.getString(Key.SPLIT_MODE, Constant.DEFAULT_SPLIT_MODE).trim(); if (splitMode.equalsIgnoreCase(Constant.DEFAULT_SPLIT_MODE) || - splitMode.equalsIgnoreCase(Constant.PARTITION_SPLIT_MODE)) { + splitMode.equalsIgnoreCase(Constant.PARTITION_SPLIT_MODE)) { originalConfig.set(Key.SPLIT_MODE, splitMode); } else { throw DataXException.asDataXException(OdpsReaderErrorCode.SPLIT_MODE_ERROR, - String.format("您所配置的 splitMode:%s 不正确. splitMode 仅允许配置为 record 或者 partition.", splitMode)); + MESSAGE_SOURCE.message("odpsreader.2", splitMode)); } } @@ -98,7 +94,7 @@ public class OdpsReader extends Reader { */ private void dealPartition(Table table) { List userConfiguredPartitions = this.originalConfig.getList( - Key.PARTITION, String.class); + Key.PARTITION, String.class); boolean isPartitionedTable = this.originalConfig.getBool(Constant.IS_PARTITIONED_TABLE); List partitionColumns = new ArrayList(); @@ -107,60 +103,140 @@ public class OdpsReader extends Reader { // 分区表,需要配置分区 if (null == userConfiguredPartitions || userConfiguredPartitions.isEmpty()) { throw DataXException.asDataXException(OdpsReaderErrorCode.PARTITION_ERROR, - String.format("分区信息没有配置.由于源头表:%s 为分区表, 所以您需要配置其抽取的表的分区信息. 格式形如:pt=hello,ds=hangzhou,请您参考此格式修改该配置项.", - table.getName())); + MESSAGE_SOURCE.message("odpsreader.3", table.getName())); } else { - List allPartitions = OdpsUtil.getTableAllPartitions(table); - - if (null == allPartitions || allPartitions.isEmpty()) { - throw DataXException.asDataXException(OdpsReaderErrorCode.PARTITION_ERROR, - String.format("分区信息配置错误.源头表:%s 虽然为分区表, 但其实际分区值并不存在. 请确认源头表已经生成该分区,再进行数据抽取.", - table.getName())); - } - - List parsedPartitions = expandUserConfiguredPartition( - allPartitions, userConfiguredPartitions); - - if (null == parsedPartitions || parsedPartitions.isEmpty()) { - throw DataXException.asDataXException( - OdpsReaderErrorCode.PARTITION_ERROR, - String.format( - "分区配置错误,根据您所配置的分区没有匹配到源头表中的分区. 源头表所有分区是:[\n%s\n], 您配置的分区是:[\n%s\n]. 请您根据实际情况在作出修改. ", - StringUtils.join(allPartitions, "\n"), - StringUtils.join(userConfiguredPartitions, "\n"))); - } - this.originalConfig.set(Key.PARTITION, parsedPartitions); - - for (Column column : table.getSchema() - .getPartitionColumns()) { + // 获取分区列名, 支持用户配置分区列同步 + for (Column column : table.getSchema().getPartitionColumns()) { partitionColumns.add(column.getName()); } + + List allPartitions = OdpsUtil.getTableAllPartitions(table); + + List parsedPartitions = expandUserConfiguredPartition( + table, allPartitions, userConfiguredPartitions, partitionColumns.size()); + if (null == parsedPartitions || parsedPartitions.isEmpty()) { + if (!this.successOnNoPartition) { + // PARTITION_NOT_EXISTS_ERROR 这个异常ErrorCode在AdsWriter有使用,用户判断空分区Load Data任务不报错 + // 其他类型的异常不要使用这个错误码 + throw DataXException.asDataXException( + OdpsReaderErrorCode.PARTITION_NOT_EXISTS_ERROR, + MESSAGE_SOURCE.message("odpsreader.5", + StringUtils.join(allPartitions, "\n"), + StringUtils.join(userConfiguredPartitions, "\n"))); + } else { + LOG.warn( + String.format( + "The partition configuration is wrong, " + + "but you have configured the successOnNoPartition to be true to ignore the error. " + + "According to the partition you have configured, it does not match the partition in the source table. " + + "All the partitions in the source table are:[\n%s\n], the partition you configured is:[\n%s\n]. " + + "please revise it according to the actual situation.", + StringUtils.join(allPartitions, "\n"), + StringUtils.join(userConfiguredPartitions, "\n"))); + } + } + LOG.info(String + .format("expand user configured partitions are : %s", JSON.toJSONString(parsedPartitions))); + this.originalConfig.set(Key.PARTITION, parsedPartitions); } } else { // 非分区表,则不能配置分区 if (null != userConfiguredPartitions - && !userConfiguredPartitions.isEmpty()) { + && !userConfiguredPartitions.isEmpty()) { throw DataXException.asDataXException(OdpsReaderErrorCode.PARTITION_ERROR, - String.format("分区配置错误,源头表:%s 为非分区表, 您不能配置分区. 请您删除该配置项. ", table.getName())); + MESSAGE_SOURCE.message("odpsreader.6", table.getName())); } } - + this.originalConfig.set(Constant.PARTITION_COLUMNS, partitionColumns); if (isPartitionedTable) { - LOG.info("{源头表:{} 的所有分区列是:[{}]}", table.getName(), - StringUtils.join(partitionColumns, ",")); + LOG.info(MESSAGE_SOURCE.message("odpsreader.7", table.getName(), + StringUtils.join(partitionColumns, ","))); } } - private List expandUserConfiguredPartition( - List allPartitions, List userConfiguredPartitions) { + /** + * 将用户配置的分区(可能是直接的分区配置 dt=20170101, 可能是简单正则dt=201701*, 也可能是区间过滤条件 dt>=20170101 and dt<20170130) 和ODPS + * table所有的分区进行匹配,过滤出用户希望同步的分区集合 + * + * @param table odps table + * @param allPartitions odps table所有的分区 + * @param userConfiguredPartitions 用户配置的分区 + * @param tableOriginalPartitionDepth odps table分区级数(一级分区,二级分区,三级分区等) + * @return 返回过滤出的分区 + */ + private List expandUserConfiguredPartition(Table table, + List allPartitions, + List userConfiguredPartitions, + int tableOriginalPartitionDepth) { + + UserConfiguredPartitionClassification userConfiguredPartitionClassification = OdpsUtil + .classifyUserConfiguredPartitions(userConfiguredPartitions); + + if (userConfiguredPartitionClassification.isIncludeHintPartition()) { + List expandUserConfiguredPartitionResult = new ArrayList(); + + // 处理不包含/*query*/的分区过滤 + if (!userConfiguredPartitionClassification.getUserConfiguredNormalPartition().isEmpty()) { + expandUserConfiguredPartitionResult.addAll(expandNoHintUserConfiguredPartition(allPartitions, + userConfiguredPartitionClassification.getUserConfiguredNormalPartition(), + tableOriginalPartitionDepth)); + } + if (!allPartitions.isEmpty()) { + expandUserConfiguredPartitionResult.addAll(expandHintUserConfiguredPartition(table, + allPartitions, userConfiguredPartitionClassification.getUserConfiguredHintPartition())); + } + return expandUserConfiguredPartitionResult; + } else { + return expandNoHintUserConfiguredPartition(allPartitions, userConfiguredPartitions, + tableOriginalPartitionDepth); + } + } + + /** + * 匹配包含 HINT 条件的过滤 + * + * @param table odps table + * @param allPartitions odps table所有的分区 + * @param userHintConfiguredPartitions 用户配置的分区 + * @return 返回过滤出的分区 + */ + private List expandHintUserConfiguredPartition(Table table, + List allPartitions, + List userHintConfiguredPartitions) { + try { + // load odps table all partitions into sqlite memory database + SqliteUtil sqliteUtil = new SqliteUtil(); + sqliteUtil.loadAllPartitionsIntoSqlite(table, allPartitions); + return sqliteUtil.selectUserConfiguredPartition(userHintConfiguredPartitions); + } catch (Exception ex) { + throw DataXException.asDataXException(OdpsReaderErrorCode.PARTITION_ERROR, + String.format("Expand user configured partition has exception: %s", ex.getMessage()), ex); + } + } + + /** + * 匹配没有 HINT 条件的过滤,包括 简单正则匹配(dt=201701*) 和 直接匹配(dt=20170101) + * + * @param allPartitions odps table所有的分区 + * @param userNormalConfiguredPartitions 用户配置的分区 + * @param tableOriginalPartitionDepth odps table分区级数(一级分区,二级分区,三级分区等) + * @return 返回过滤出的分区 + */ + private List expandNoHintUserConfiguredPartition(List allPartitions, + List userNormalConfiguredPartitions, + int tableOriginalPartitionDepth) { // 对odps 本身的所有分区进行特殊字符的处理 + LOG.info("format partition with rules: remove all space; remove all '; replace / to ,"); + // 表里面已有分区量比较大,有些任务无关,没有打印 List allStandardPartitions = OdpsUtil - .formatPartitions(allPartitions); + .formatPartitions(allPartitions); // 对用户自身配置的所有分区进行特殊字符的处理 List allStandardUserConfiguredPartitions = OdpsUtil - .formatPartitions(userConfiguredPartitions); + .formatPartitions(userNormalConfiguredPartitions); + LOG.info("user configured partition: {}", JSON.toJSONString(userNormalConfiguredPartitions)); + LOG.info("formated partition: {}", JSON.toJSONString(allStandardUserConfiguredPartitions)); /** * 对配置的分区级数(深度)进行检查 @@ -177,20 +253,20 @@ public class OdpsReader extends Reader { comparedPartitionDepth = comparedPartition.split(",").length; if (comparedPartitionDepth != firstPartitionDepth) { throw DataXException.asDataXException(OdpsReaderErrorCode.PARTITION_ERROR, - String.format("分区配置错误, 您所配置的分区级数和该表的实际情况不一致, 比如分区:[%s] 是 %s 级分区, 而分区:[%s] 是 %s 级分区. DataX 是通过英文逗号判断您所配置的分区级数的. 正确的格式形如\"pt=${bizdate}, type=0\" ,请您参考示例修改该配置项. ", - firstPartition, firstPartitionDepth, comparedPartition, comparedPartitionDepth)); + MESSAGE_SOURCE + .message("odpsreader.8", firstPartition, firstPartitionDepth, comparedPartition, + comparedPartitionDepth)); } } - int tableOriginalPartitionDepth = allStandardPartitions.get(0).split(",").length; if (firstPartitionDepth != tableOriginalPartitionDepth) { throw DataXException.asDataXException(OdpsReaderErrorCode.PARTITION_ERROR, - String.format("分区配置错误, 您所配置的分区:%s 的级数:%s 与您要读取的 ODPS 源头表的分区级数:%s 不相等. DataX 是通过英文逗号判断您所配置的分区级数的.正确的格式形如\"pt=${bizdate}, type=0\" ,请您参考示例修改该配置项.", - firstPartition, firstPartitionDepth, tableOriginalPartitionDepth)); + MESSAGE_SOURCE + .message("odpsreader.9", firstPartition, firstPartitionDepth, tableOriginalPartitionDepth)); } List retPartitions = FilterUtil.filterByRegulars(allStandardPartitions, - allStandardUserConfiguredPartitions); + allStandardUserConfiguredPartitions); return retPartitions; } @@ -198,11 +274,11 @@ public class OdpsReader extends Reader { private void dealColumn(Table table) { // 用户配置的 column 之前已经确保其不为空 List userConfiguredColumns = this.originalConfig.getList( - Key.COLUMN, String.class); + Key.COLUMN, String.class); List allColumns = OdpsUtil.getTableAllColumns(table); List allNormalColumns = OdpsUtil - .getTableOriginalColumnNameList(allColumns); + .getTableOriginalColumnNameList(allColumns); StringBuilder columnMeta = new StringBuilder(); for (Column column : allColumns) { @@ -210,46 +286,75 @@ public class OdpsReader extends Reader { } columnMeta.setLength(columnMeta.length() - 1); - LOG.info("源头表:{} 的所有字段是:[{}]", table.getName(), columnMeta.toString()); + LOG.info(MESSAGE_SOURCE.message("odpsreader.10", table.getName(), columnMeta.toString())); if (1 == userConfiguredColumns.size() - && "*".equals(userConfiguredColumns.get(0))) { - LOG.warn("这是一条警告信息,您配置的 ODPS 读取的列为*,这是不推荐的行为,因为当您的表字段个数、类型有变动时,可能影响任务正确性甚至会运行出错. 建议您把所有需要抽取的列都配置上. "); + && "*".equals(userConfiguredColumns.get(0))) { + LOG.warn(MESSAGE_SOURCE.message("odpsreader.11")); this.originalConfig.set(Key.COLUMN, allNormalColumns); } userConfiguredColumns = this.originalConfig.getList( - Key.COLUMN, String.class); + Key.COLUMN, String.class); /** * warn: 字符串常量需要与表原生字段tableOriginalColumnNameList 分开存放 demo: * ["id","'id'","name"] */ List allPartitionColumns = this.originalConfig.getList( - Constant.PARTITION_COLUMNS, String.class); - List> parsedColumns = OdpsUtil - .parseColumns(allNormalColumns, allPartitionColumns, - userConfiguredColumns); + Constant.PARTITION_COLUMNS, String.class); + List parsedColumns = OdpsUtil + .parseColumns(allNormalColumns, allPartitionColumns, + userConfiguredColumns); this.originalConfig.set(Constant.PARSED_COLUMNS, parsedColumns); StringBuilder sb = new StringBuilder(); sb.append("[ "); for (int i = 0, len = parsedColumns.size(); i < len; i++) { - Pair pair = parsedColumns.get(i); - sb.append(String.format(" %s : %s", pair.getLeft(), - pair.getRight())); + InternalColumnInfo pair = parsedColumns.get(i); + sb.append(String.format(" %s : %s", pair.getColumnName(), + pair.getColumnType())); if (i != len - 1) { sb.append(","); } } + + sb.append(" ]"); LOG.info("parsed column details: {} .", sb.toString()); } - @Override public void prepare() { + List preSqls = this.originalConfig.getList(Key.PRE_SQL, String.class); + if (preSqls != null && !preSqls.isEmpty()) { + LOG.info( + String.format("Beigin to exectue preSql : %s. \n Attention: these preSqls must be idempotent!!!", + JSON.toJSONString(preSqls))); + long beginTime = System.currentTimeMillis(); + + StringBuffer preSqlBuffer = new StringBuffer(); + for (String preSql : preSqls) { + preSql = preSql.trim(); + if (StringUtils.isNotBlank(preSql) && !preSql.endsWith(";")) { + preSql = String.format("%s;", preSql); + } + if (StringUtils.isNotBlank(preSql)) { + preSqlBuffer.append(preSql); + } + } + if (StringUtils.isNotBlank(preSqlBuffer.toString())) { + OdpsUtil.runSqlTaskWithRetry(this.odps, preSqlBuffer.toString(), "preSql"); + } else { + LOG.info("skip to execute the preSql: {}", JSON.toJSONString(preSqls)); + } + long endTime = System.currentTimeMillis(); + + LOG.info( + String.format("Exectue odpsreader preSql successfully! cost time: %s ms.", (endTime - beginTime))); + } + this.initOdpsTableInfo(); } @Override @@ -259,6 +364,33 @@ public class OdpsReader extends Reader { @Override public void post() { + List postSqls = this.originalConfig.getList(Key.POST_SQL, String.class); + + if (postSqls != null && !postSqls.isEmpty()) { + LOG.info( + String.format("Beigin to exectue postSql : %s. \n Attention: these postSqls must be idempotent!!!", + JSON.toJSONString(postSqls))); + long beginTime = System.currentTimeMillis(); + StringBuffer postSqlBuffer = new StringBuffer(); + for (String postSql : postSqls) { + postSql = postSql.trim(); + if (StringUtils.isNotBlank(postSql) && !postSql.endsWith(";")) { + postSql = String.format("%s;", postSql); + } + if (StringUtils.isNotBlank(postSql)) { + postSqlBuffer.append(postSql); + } + } + if (StringUtils.isNotBlank(postSqlBuffer.toString())) { + OdpsUtil.runSqlTaskWithRetry(this.odps, postSqlBuffer.toString(), "postSql"); + } else { + LOG.info("skip to execute the postSql: {}", JSON.toJSONString(postSqls)); + } + + long endTime = System.currentTimeMillis(); + LOG.info( + String.format("Exectue odpsreader postSql successfully! cost time: %s ms.", (endTime - beginTime))); + } } @Override @@ -268,6 +400,7 @@ public class OdpsReader extends Reader { public static class Task extends Reader.Task { private static final Logger LOG = LoggerFactory.getLogger(Task.class); + private static final MessageSource MESSAGE_SOURCE = MessageSource.loadResourceBundle(OdpsReader.class); private Configuration readerSliceConf; private String tunnelServer; @@ -278,32 +411,35 @@ public class OdpsReader extends Reader { private boolean isPartitionedTable; private String sessionId; private boolean isCompress; + private boolean successOnNoPartition; @Override public void init() { this.readerSliceConf = super.getPluginJobConf(); this.tunnelServer = this.readerSliceConf.getString( - Key.TUNNEL_SERVER, null); + Key.TUNNEL_SERVER, null); this.odps = OdpsUtil.initOdps(this.readerSliceConf); this.projectName = this.readerSliceConf.getString(Key.PROJECT); this.tableName = this.readerSliceConf.getString(Key.TABLE); this.table = OdpsUtil.getTable(this.odps, projectName, tableName); this.isPartitionedTable = this.readerSliceConf - .getBool(Constant.IS_PARTITIONED_TABLE); + .getBool(Constant.IS_PARTITIONED_TABLE); this.sessionId = this.readerSliceConf.getString(Constant.SESSION_ID, null); - - - this.isCompress = this.readerSliceConf.getBool(Key.IS_COMPRESS, false); + this.successOnNoPartition = this.readerSliceConf.getBool(Key.SUCCESS_ON_NO_PATITION, false); // sessionId 为空的情况是:切分级别只到 partition 的情况 - if (StringUtils.isBlank(this.sessionId)) { + String partition = this.readerSliceConf.getString(Key.PARTITION); + + // 没有分区读取时, 是没有sessionId这些的 + if (this.isPartitionedTable && StringUtils.isBlank(partition) && this.successOnNoPartition) { + LOG.warn("Partition is blank, but you config successOnNoPartition[true] ,don't need to create session"); + } else if (StringUtils.isBlank(this.sessionId)) { DownloadSession session = OdpsUtil.createMasterSessionForPartitionedTable(odps, - tunnelServer, projectName, tableName, this.readerSliceConf.getString(Key.PARTITION)); + tunnelServer, projectName, tableName, this.readerSliceConf.getString(Key.PARTITION)); this.sessionId = session.getId(); } - LOG.info("sessionId:{}", this.sessionId); } @@ -316,68 +452,61 @@ public class OdpsReader extends Reader { DownloadSession downloadSession = null; String partition = this.readerSliceConf.getString(Key.PARTITION); + if (this.isPartitionedTable && StringUtils.isBlank(partition) && this.successOnNoPartition) { + LOG.warn(String.format( + "Partition is blank,not need to be read")); + recordSender.flush(); + return; + } + if (this.isPartitionedTable) { downloadSession = OdpsUtil.getSlaveSessionForPartitionedTable(this.odps, this.sessionId, - this.tunnelServer, this.projectName, this.tableName, partition); + this.tunnelServer, this.projectName, this.tableName, partition); } else { downloadSession = OdpsUtil.getSlaveSessionForNonPartitionedTable(this.odps, this.sessionId, - this.tunnelServer, this.projectName, this.tableName); + this.tunnelServer, this.projectName, this.tableName); } long start = this.readerSliceConf.getLong(Constant.START_INDEX, 0); long count = this.readerSliceConf.getLong(Constant.STEP_COUNT, - downloadSession.getRecordCount()); + downloadSession.getRecordCount()); if (count > 0) { LOG.info(String.format( - "Begin to read ODPS table:%s, partition:%s, startIndex:%s, count:%s.", - this.tableName, partition, start, count)); + "Begin to read ODPS table:%s, partition:%s, startIndex:%s, count:%s.", + this.tableName, partition, start, count)); } else if (count == 0) { - LOG.warn(String.format("源头表:%s 的分区:%s 没有内容可抽取, 请您知晓.", - this.tableName, partition)); + LOG.warn(MESSAGE_SOURCE.message("odpsreader.12", this.tableName, partition)); return; } else { throw DataXException.asDataXException(OdpsReaderErrorCode.READ_DATA_FAIL, - String.format("源头表:%s 的分区:%s 读取行数为负数, 请联系 ODPS 管理员查看表状态!", - this.tableName, partition)); + MESSAGE_SOURCE.message("odpsreader.13", this.tableName, partition)); } - + TableSchema tableSchema = this.table.getSchema(); Set allColumns = new HashSet(); allColumns.addAll(tableSchema.getColumns()); allColumns.addAll(tableSchema.getPartitionColumns()); - Map columnTypeMap = new HashMap(); + Map columnTypeMap = new HashMap(); for (Column column : allColumns) { - columnTypeMap.put(column.getName(), column.getType()); + columnTypeMap.put(column.getName(), column.getTypeInfo()); } try { - List parsedColumnsTmp = this.readerSliceConf - .getListConfiguration(Constant.PARSED_COLUMNS); - List> parsedColumns = new ArrayList>(); - for (int i = 0; i < parsedColumnsTmp.size(); i++) { - Configuration eachColumnConfig = parsedColumnsTmp.get(i); - String columnName = eachColumnConfig.getString("left"); - ColumnType columnType = ColumnType - .asColumnType(eachColumnConfig.getString("right")); - parsedColumns.add(new MutablePair( - columnName, columnType)); - - } + List parsedColumns = this.readerSliceConf.getListWithJson(Constant.PARSED_COLUMNS, + InternalColumnInfo.class); ReaderProxy readerProxy = new ReaderProxy(recordSender, downloadSession, columnTypeMap, parsedColumns, partition, this.isPartitionedTable, - start, count, this.isCompress); - + start, count, this.isCompress, this.readerSliceConf); readerProxy.doRead(); } catch (Exception e) { throw DataXException.asDataXException(OdpsReaderErrorCode.READ_DATA_FAIL, - String.format("源头表:%s 的分区:%s 读取失败, 请联系 ODPS 管理员查看错误详情.", this.tableName, partition), e); + MESSAGE_SOURCE.message("odpsreader.14", this.tableName, partition), e); } } - @Override public void post() { } diff --git a/odpsreader/src/main/java/com/alibaba/datax/plugin/reader/odpsreader/OdpsReaderErrorCode.java b/odpsreader/src/main/java/com/alibaba/datax/plugin/reader/odpsreader/OdpsReaderErrorCode.java index cdda6ac8..8311d4ef 100755 --- a/odpsreader/src/main/java/com/alibaba/datax/plugin/reader/odpsreader/OdpsReaderErrorCode.java +++ b/odpsreader/src/main/java/com/alibaba/datax/plugin/reader/odpsreader/OdpsReaderErrorCode.java @@ -1,45 +1,53 @@ package com.alibaba.datax.plugin.reader.odpsreader; import com.alibaba.datax.common.spi.ErrorCode; +import com.alibaba.datax.common.util.MessageSource; public enum OdpsReaderErrorCode implements ErrorCode { - REQUIRED_VALUE("OdpsReader-00", "您缺失了必须填写的参数值."), - ILLEGAL_VALUE("OdpsReader-01", "您配置的值不合法."), - CREATE_DOWNLOADSESSION_FAIL("OdpsReader-03", "创建 ODPS 的 downloadSession 失败."), - GET_DOWNLOADSESSION_FAIL("OdpsReader-04", "获取 ODPS 的 downloadSession 失败."), - READ_DATA_FAIL("OdpsReader-05", "读取 ODPS 源头表失败."), - GET_ID_KEY_FAIL("OdpsReader-06", "获取 accessId/accessKey 失败."), + REQUIRED_VALUE("DATAX_R_ODPS_001", MessageSource.loadResourceBundle(OdpsReaderErrorCode.class).message("description.DATAX_R_ODPS_001"),MessageSource.loadResourceBundle(OdpsReaderErrorCode.class).message("solution.DATAX_R_ODPS_001")), + ILLEGAL_VALUE("DATAX_R_ODPS_002", MessageSource.loadResourceBundle(OdpsReaderErrorCode.class).message("description.DATAX_R_ODPS_002"),MessageSource.loadResourceBundle(OdpsReaderErrorCode.class).message("solution.DATAX_R_ODPS_002")), + CREATE_DOWNLOADSESSION_FAIL("DATAX_R_ODPS_003", MessageSource.loadResourceBundle(OdpsReaderErrorCode.class).message("description.DATAX_R_ODPS_003"),MessageSource.loadResourceBundle(OdpsReaderErrorCode.class).message("solution.DATAX_R_ODPS_003")), + GET_DOWNLOADSESSION_FAIL("DATAX_R_ODPS_004", MessageSource.loadResourceBundle(OdpsReaderErrorCode.class).message("description.DATAX_R_ODPS_004"),MessageSource.loadResourceBundle(OdpsReaderErrorCode.class).message("solution.DATAX_R_ODPS_004")), + READ_DATA_FAIL("DATAX_R_ODPS_005", MessageSource.loadResourceBundle(OdpsReaderErrorCode.class).message("description.DATAX_R_ODPS_005"),MessageSource.loadResourceBundle(OdpsReaderErrorCode.class).message("solution.DATAX_R_ODPS_005")), + GET_ID_KEY_FAIL("DATAX_R_ODPS_006", MessageSource.loadResourceBundle(OdpsReaderErrorCode.class).message("description.DATAX_R_ODPS_006"),MessageSource.loadResourceBundle(OdpsReaderErrorCode.class).message("solution.DATAX_R_ODPS_006")), - ODPS_READ_EXCEPTION("OdpsReader-07", "读取 odps 异常"), - OPEN_RECORD_READER_FAILED("OdpsReader-08", "打开 recordReader 失败."), + ODPS_READ_EXCEPTION("DATAX_R_ODPS_007", MessageSource.loadResourceBundle(OdpsReaderErrorCode.class).message("description.DATAX_R_ODPS_007"),MessageSource.loadResourceBundle(OdpsReaderErrorCode.class).message("solution.DATAX_R_ODPS_007")), + OPEN_RECORD_READER_FAILED("DATAX_R_ODPS_008", MessageSource.loadResourceBundle(OdpsReaderErrorCode.class).message("description.DATAX_R_ODPS_008"),MessageSource.loadResourceBundle(OdpsReaderErrorCode.class).message("solution.DATAX_R_ODPS_008")), - ODPS_PROJECT_NOT_FOUNT("OdpsReader-10", "您配置的值不合法, odps project 不存在."), //ODPS-0420111: Project not found + ODPS_PROJECT_NOT_FOUNT("DATAX_R_ODPS_009", MessageSource.loadResourceBundle(OdpsReaderErrorCode.class).message("description.DATAX_R_ODPS_009"),MessageSource.loadResourceBundle(OdpsReaderErrorCode.class).message("solution.DATAX_R_ODPS_009")), //ODPS-0420111: Project not found - ODPS_TABLE_NOT_FOUNT("OdpsReader-12", "您配置的值不合法, odps table 不存在."), // ODPS-0130131:Table not found + ODPS_TABLE_NOT_FOUNT("DATAX_R_ODPS_010", MessageSource.loadResourceBundle(OdpsReaderErrorCode.class).message("description.DATAX_R_ODPS_010"),MessageSource.loadResourceBundle(OdpsReaderErrorCode.class).message("solution.DATAX_R_ODPS_010")), // ODPS-0130131:Table not found - ODPS_ACCESS_KEY_ID_NOT_FOUND("OdpsReader-13", "您配置的值不合法, odps accessId,accessKey 不存在."), //ODPS-0410051:Invalid credentials - accessKeyId not found + ODPS_ACCESS_KEY_ID_NOT_FOUND("DATAX_R_ODPS_011", MessageSource.loadResourceBundle(OdpsReaderErrorCode.class).message("description.DATAX_R_ODPS_011"),MessageSource.loadResourceBundle(OdpsReaderErrorCode.class).message("solution.DATAX_R_ODPS_011")), //ODPS-0410051:Invalid credentials - accessKeyId not found - ODPS_ACCESS_KEY_INVALID("OdpsReader-14", "您配置的值不合法, odps accessKey 错误."), //ODPS-0410042:Invalid signature value - User signature dose not match + ODPS_ACCESS_KEY_INVALID("DATAX_R_ODPS_012", MessageSource.loadResourceBundle(OdpsReaderErrorCode.class).message("description.DATAX_R_ODPS_012"),MessageSource.loadResourceBundle(OdpsReaderErrorCode.class).message("solution.DATAX_R_ODPS_012")), //ODPS-0410042:Invalid signature value - User signature dose not match - ODPS_ACCESS_DENY("OdpsReader-15", "拒绝访问, 您不在 您配置的 project 中."), //ODPS-0420095: Access Denied - Authorization Failed [4002], You doesn't exist in project + ODPS_ACCESS_DENY("DATAX_R_ODPS_013", MessageSource.loadResourceBundle(OdpsReaderErrorCode.class).message("description.DATAX_R_ODPS_013"),MessageSource.loadResourceBundle(OdpsReaderErrorCode.class).message("solution.DATAX_R_ODPS_013")), //ODPS-0420095: Access Denied - Authorization Failed [4002], You doesn't exist in project - SPLIT_MODE_ERROR("OdpsReader-30", "splitMode配置错误."), + SPLIT_MODE_ERROR("DATAX_R_ODPS_014", MessageSource.loadResourceBundle(OdpsReaderErrorCode.class).message("description.DATAX_R_ODPS_014"),MessageSource.loadResourceBundle(OdpsReaderErrorCode.class).message("solution.DATAX_R_ODPS_014")), - ACCOUNT_TYPE_ERROR("OdpsReader-31", "odps 账号类型错误."), + ACCOUNT_TYPE_ERROR("DATAX_R_ODPS_015", MessageSource.loadResourceBundle(OdpsReaderErrorCode.class).message("description.DATAX_R_ODPS_015"),MessageSource.loadResourceBundle(OdpsReaderErrorCode.class).message("solution.DATAX_R_ODPS_015")), - VIRTUAL_VIEW_NOT_SUPPORT("OdpsReader-32", "Datax 不支持 读取虚拟视图."), + VIRTUAL_VIEW_NOT_SUPPORT("DATAX_R_ODPS_016", MessageSource.loadResourceBundle(OdpsReaderErrorCode.class).message("description.DATAX_R_ODPS_016"),MessageSource.loadResourceBundle(OdpsReaderErrorCode.class).message("solution.DATAX_R_ODPS_016")), - PARTITION_ERROR("OdpsReader-33", "分区配置错误."), + PARTITION_ERROR("DATAX_R_ODPS_017", MessageSource.loadResourceBundle(OdpsReaderErrorCode.class).message("description.DATAX_R_ODPS_017"),MessageSource.loadResourceBundle(OdpsReaderErrorCode.class).message("solution.DATAX_R_ODPS_017")), + PARTITION_NOT_EXISTS_ERROR("DATAX_R_ODPS_018", MessageSource.loadResourceBundle(OdpsReaderErrorCode.class).message("description.DATAX_R_ODPS_018"),MessageSource.loadResourceBundle(OdpsReaderErrorCode.class).message("solution.DATAX_R_ODPS_018")), + + RUN_SQL_FAILED("DATAX_R_ODPS_019", MessageSource.loadResourceBundle(OdpsReaderErrorCode.class).message("description.DATAX_R_ODPS_019"),MessageSource.loadResourceBundle(OdpsReaderErrorCode.class).message("solution.DATAX_R_ODPS_019")), + + RUN_SQL_ODPS_EXCEPTION("DATAX_R_ODPS_020", MessageSource.loadResourceBundle(OdpsReaderErrorCode.class).message("description.DATAX_R_ODPS_020"),MessageSource.loadResourceBundle(OdpsReaderErrorCode.class).message("solution.DATAX_R_ODPS_020")), ; private final String code; private final String description; + private final String solution; - private OdpsReaderErrorCode(String code, String description) { + private OdpsReaderErrorCode(String code, String description,String solution) { this.code = code; this.description = description; + this.solution = solution; } @Override @@ -52,9 +60,12 @@ public enum OdpsReaderErrorCode implements ErrorCode { return this.description; } + public String getSolution() { + return solution; + } + @Override public String toString() { - return String.format("Code:[%s], Description:[%s]. ", this.code, - this.description); + return String.format("Code:%s:%s, Solution:[%s]. ", this.code,this.description,this.solution); } } diff --git a/odpsreader/src/main/java/com/alibaba/datax/plugin/reader/odpsreader/ReaderProxy.java b/odpsreader/src/main/java/com/alibaba/datax/plugin/reader/odpsreader/ReaderProxy.java index 8e069ef5..c2e88eba 100755 --- a/odpsreader/src/main/java/com/alibaba/datax/plugin/reader/odpsreader/ReaderProxy.java +++ b/odpsreader/src/main/java/com/alibaba/datax/plugin/reader/odpsreader/ReaderProxy.java @@ -3,40 +3,75 @@ package com.alibaba.datax.plugin.reader.odpsreader; import com.alibaba.datax.common.element.*; import com.alibaba.datax.common.exception.DataXException; import com.alibaba.datax.common.plugin.RecordSender; +import com.alibaba.datax.common.util.Configuration; +import com.alibaba.datax.common.util.MessageSource; import com.alibaba.datax.plugin.reader.odpsreader.util.OdpsUtil; +import com.alibaba.fastjson2.JSON; +import com.aliyun.odps.Column; import com.aliyun.odps.OdpsType; +import com.aliyun.odps.data.*; import com.aliyun.odps.data.Record; -import com.aliyun.odps.data.RecordReader; import com.aliyun.odps.tunnel.TableTunnel; -import org.apache.commons.lang3.tuple.Pair; +import com.aliyun.odps.type.ArrayTypeInfo; +import com.aliyun.odps.type.MapTypeInfo; +import com.aliyun.odps.type.TypeInfo; +import org.apache.commons.codec.binary.Base64; +import org.apache.commons.lang3.StringUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.text.ParseException; -import java.util.HashMap; -import java.util.List; -import java.util.Map; +import java.text.SimpleDateFormat; +import java.util.*; public class ReaderProxy { private static final Logger LOG = LoggerFactory .getLogger(ReaderProxy.class); + private static final MessageSource MESSAGE_SOURCE = MessageSource.loadResourceBundle(ReaderProxy.class); private static boolean IS_DEBUG = LOG.isDebugEnabled(); private RecordSender recordSender; private TableTunnel.DownloadSession downloadSession; - private Map columnTypeMap; - private List> parsedColumns; + private Map columnTypeMap; + private List parsedColumns; private String partition; private boolean isPartitionTable; private long start; private long count; private boolean isCompress; + + private static final String NULL_INDICATOR = null; + // TODO 没有支持用户可配置 + // TODO 没有timezone + private SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); + + // 读取 jvm 默认时区 + private Calendar calendarForDate = null; + private boolean useDateWithCalendar = true; + + private Calendar initCalendar(Configuration config) { + // 理论上不会有其他选择,有配置化可以随时应急 + String calendarType = config.getString("calendarType", "iso8601"); + Boolean lenient = config.getBool("calendarLenient", true); + + // 默认jvm时区 + TimeZone timeZone = TimeZone.getDefault(); + String timeZoneStr = config.getString("calendarTimeZone"); + if (StringUtils.isNotBlank(timeZoneStr)) { + // 如果用户明确指定使用用户指定的 + timeZone = TimeZone.getTimeZone(timeZoneStr); + } + + Calendar calendarForDate = new Calendar.Builder().setCalendarType(calendarType).setLenient(lenient) + .setTimeZone(timeZone).build(); + return calendarForDate; + } public ReaderProxy(RecordSender recordSender, TableTunnel.DownloadSession downloadSession, - Map columnTypeMap, - List> parsedColumns, String partition, - boolean isPartitionTable, long start, long count, boolean isCompress) { + Map columnTypeMap, + List parsedColumns, String partition, + boolean isPartitionTable, long start, long count, boolean isCompress, Configuration taskConfig) { this.recordSender = recordSender; this.downloadSession = downloadSession; this.columnTypeMap = columnTypeMap; @@ -46,14 +81,24 @@ public class ReaderProxy { this.start = start; this.count = count; this.isCompress = isCompress; + + this.calendarForDate = this.initCalendar(taskConfig); + this.useDateWithCalendar = taskConfig.getBool("useDateWithCalendar", true); } // warn: odps 分区列和正常列不能重名, 所有列都不不区分大小写 public void doRead() { try { LOG.info("start={}, count={}",start, count); - //RecordReader recordReader = downloadSession.openRecordReader(start, count, isCompress); - RecordReader recordReader = OdpsUtil.getRecordReader(downloadSession, start, count, isCompress); + List userConfigNormalColumns = OdpsUtil.getNormalColumns(this.parsedColumns, this.columnTypeMap); + RecordReader recordReader = null; + // fix #ODPS-52184/10332469, updateColumnsSize表示如果用户指定的读取源表列数100列以内的话,则进行列裁剪优化; + int updateColumnsSize = 100; + if(userConfigNormalColumns.size() <= updateColumnsSize){ + recordReader = OdpsUtil.getRecordReader(downloadSession, start, count, isCompress, userConfigNormalColumns); + } else { + recordReader = OdpsUtil.getRecordReader(downloadSession, start, count, isCompress); + } Record odpsRecord; Map partitionMap = this @@ -72,7 +117,7 @@ public class ReaderProxy { } catch (InterruptedException ignored) { } recordReader = downloadSession.openRecordReader(start, count, isCompress); - LOG.warn("odps-read-exception, 重试第{}次", retryTimes); + LOG.warn(MESSAGE_SOURCE.message("readerproxy.1", retryTimes)); retryTimes++; continue; } else { @@ -90,9 +135,9 @@ public class ReaderProxy { // warn: for PARTITION||NORMAL columnTypeMap's key // sets(columnName) is big than parsedColumns's left // sets(columnName), always contain - for (Pair pair : this.parsedColumns) { - String columnName = pair.getLeft(); - switch (pair.getRight()) { + for (InternalColumnInfo pair : this.parsedColumns) { + String columnName = pair.getColumnName(); + switch (pair.getColumnType()) { case PARTITION: String partitionColumnValue = this .getPartitionColumnValue(partitionMap, @@ -144,9 +189,7 @@ public class ReaderProxy { throw DataXException .asDataXException( OdpsReaderErrorCode.ILLEGAL_VALUE, - String.format( - "您的分区 [%s] 解析出现错误,解析后正确的配置方式类似为 [ pt=1,dt=1 ].", - eachPartition)); + MESSAGE_SOURCE.message("readerproxy.2", eachPartition)); } // warn: translate to lower case, it's more comfortable to // compare whit user's input columns @@ -157,7 +200,7 @@ public class ReaderProxy { } if (IS_DEBUG) { LOG.debug(String.format("partition value details: %s", - com.alibaba.fastjson.JSON.toJSONString(partitionMap))); + com.alibaba.fastjson2.JSON.toJSONString(partitionMap))); } return partitionMap; } @@ -168,9 +211,8 @@ public class ReaderProxy { partitionColumnName = partitionColumnName.toLowerCase(); // it's will never happen, but add this checking if (!partitionMap.containsKey(partitionColumnName)) { - String errorMessage = String.format( - "表所有分区信息为: %s 其中找不到 [%s] 对应的分区值.", - com.alibaba.fastjson.JSON.toJSONString(partitionMap), + String errorMessage = MESSAGE_SOURCE.message("readerproxy.3", + com.alibaba.fastjson2.JSON.toJSONString(partitionMap), partitionColumnName); throw DataXException.asDataXException( OdpsReaderErrorCode.READ_DATA_FAIL, errorMessage); @@ -190,7 +232,7 @@ public class ReaderProxy { * every line record of odps table * @param dataXRecord * every datax record, to be send to writer. method getXXX() case sensitive - * @param type + * @param typeInfo * odps column type * @param columnNameValue * for partition column it's column value, for normal column it's @@ -199,83 +241,681 @@ public class ReaderProxy { * true means partition column and false means normal column * */ private void odpsColumnToDataXField(Record odpsRecord, - com.alibaba.datax.common.element.Record dataXRecord, OdpsType type, + com.alibaba.datax.common.element.Record dataXRecord, TypeInfo typeInfo, String columnNameValue, boolean isPartitionColumn) { + + ArrayRecord record = (ArrayRecord) odpsRecord; + + OdpsType type = typeInfo.getOdpsType(); + switch (type) { - case BIGINT: { - if (isPartitionColumn) { - dataXRecord.addColumn(new LongColumn(columnNameValue)); - } else { - dataXRecord.addColumn(new LongColumn(odpsRecord - .getBigint(columnNameValue))); - } - break; - } - case BOOLEAN: { - if (isPartitionColumn) { - dataXRecord.addColumn(new BoolColumn(columnNameValue)); - } else { - dataXRecord.addColumn(new BoolColumn(odpsRecord - .getBoolean(columnNameValue))); - } - break; - } - case DATETIME: { - if (isPartitionColumn) { - try { - dataXRecord.addColumn(new DateColumn(ColumnCast - .string2Date(new StringColumn(columnNameValue)))); - } catch (ParseException e) { - LOG.error(String.format("", this.partition)); - String errMessage = String.format( - "您读取分区 [%s] 出现日期转换异常, 日期的字符串表示为 [%s].", - this.partition, columnNameValue); - LOG.error(errMessage); - throw DataXException.asDataXException( - OdpsReaderErrorCode.READ_DATA_FAIL, errMessage, e); + case BIGINT: { + if (isPartitionColumn) { + dataXRecord.addColumn(new LongColumn(columnNameValue)); + } else { + dataXRecord.addColumn(new LongColumn(record + .getBigint(columnNameValue))); } - } else { - dataXRecord.addColumn(new DateColumn(odpsRecord - .getDatetime(columnNameValue))); + break; } + case BOOLEAN: { + if (isPartitionColumn) { + dataXRecord.addColumn(new BoolColumn(columnNameValue)); + } else { + dataXRecord.addColumn(new BoolColumn(record + .getBoolean(columnNameValue))); + } + break; + } + case DATE: + case DATETIME: { + // odps分区列,目前支持TINYINT、SMALLINT、INT、BIGINT、VARCHAR和STRING类型 + if (isPartitionColumn) { + try { + dataXRecord.addColumn(new DateColumn(ColumnCast + .string2Date(new StringColumn(columnNameValue)))); + } catch (ParseException e) { + String errMessage = MESSAGE_SOURCE.message("readerproxy.4", + this.partition, columnNameValue); + LOG.error(errMessage); + throw DataXException.asDataXException( + OdpsReaderErrorCode.READ_DATA_FAIL, errMessage, e); + } + } else { + if (com.aliyun.odps.OdpsType.DATETIME == type) { + dataXRecord.addColumn(new DateColumn(record + .getDatetime(columnNameValue))); + } else { + if (this.useDateWithCalendar) { + dataXRecord.addColumn(new DateColumn(record. + getDate(columnNameValue, this.calendarForDate))); + } else { + dataXRecord.addColumn(new DateColumn(record + .getDate(columnNameValue))); + } + + } + } - break; - } - case DOUBLE: { - if (isPartitionColumn) { - dataXRecord.addColumn(new DoubleColumn(columnNameValue)); - } else { - dataXRecord.addColumn(new DoubleColumn(odpsRecord - .getDouble(columnNameValue))); + break; } - break; - } - case DECIMAL: { - if(isPartitionColumn) { - dataXRecord.addColumn(new DoubleColumn(columnNameValue)); - } else { - dataXRecord.addColumn(new DoubleColumn(odpsRecord.getDecimal(columnNameValue))); + case DOUBLE: { + if (isPartitionColumn) { + dataXRecord.addColumn(new DoubleColumn(columnNameValue)); + } else { + dataXRecord.addColumn(new DoubleColumn(record + .getDouble(columnNameValue))); + } + break; } - break; - } - case STRING: { - if (isPartitionColumn) { - dataXRecord.addColumn(new StringColumn(columnNameValue)); - } else { - dataXRecord.addColumn(new StringColumn(odpsRecord - .getString(columnNameValue))); + case DECIMAL: { + if(isPartitionColumn) { + dataXRecord.addColumn(new DoubleColumn(columnNameValue)); + } else { + dataXRecord.addColumn(new DoubleColumn(record.getDecimal(columnNameValue))); + } + break; } - break; - } - default: - throw DataXException - .asDataXException( - OdpsReaderErrorCode.ILLEGAL_VALUE, - String.format( - "DataX 抽取 ODPS 数据不支持字段类型为:[%s]. 目前支持抽取的字段类型有:bigint, boolean, datetime, double, decimal, string. " - + "您可以选择不抽取 DataX 不支持的字段或者联系 ODPS 管理员寻求帮助.", - type)); + case STRING: { + if (isPartitionColumn) { + dataXRecord.addColumn(new StringColumn(columnNameValue)); + } else { + dataXRecord.addColumn(new StringColumn(record + .getString(columnNameValue))); + } + break; + } + case TINYINT: + if (isPartitionColumn) { + dataXRecord.addColumn(new LongColumn(columnNameValue)); + } else { + Byte value = record.getTinyint(columnNameValue); + Integer intValue = value != null ? value.intValue() : null; + dataXRecord.addColumn(new LongColumn(intValue)); + } + break; + case SMALLINT: { + if (isPartitionColumn) { + dataXRecord.addColumn(new LongColumn(columnNameValue)); + } else { + Short value = record.getSmallint(columnNameValue); + Long valueInLong = null; + if (null != value) { + valueInLong = value.longValue(); + } + dataXRecord.addColumn(new LongColumn(valueInLong)); + } + break; + } + case INT: { + if (isPartitionColumn) { + dataXRecord.addColumn(new LongColumn(columnNameValue)); + } else { + dataXRecord.addColumn(new LongColumn(record + .getInt(columnNameValue))); + } + break; + } + case FLOAT: { + if (isPartitionColumn) { + dataXRecord.addColumn(new DoubleColumn(columnNameValue)); + } else { + dataXRecord.addColumn(new DoubleColumn(record + .getFloat(columnNameValue))); + } + break; + } + case VARCHAR: { + if (isPartitionColumn) { + dataXRecord.addColumn(new StringColumn(columnNameValue)); + } else { + Varchar value = record.getVarchar(columnNameValue); + String columnValue = value != null ? value.getValue() : null; + dataXRecord.addColumn(new StringColumn(columnValue)); + } + break; + } + case TIMESTAMP: { + if (isPartitionColumn) { + try { + dataXRecord.addColumn(new DateColumn(ColumnCast + .string2Date(new StringColumn(columnNameValue)))); + } catch (ParseException e) { + String errMessage = MESSAGE_SOURCE.message("readerproxy.4", + this.partition, columnNameValue); + LOG.error(errMessage); + throw DataXException.asDataXException( + OdpsReaderErrorCode.READ_DATA_FAIL, errMessage, e); + } + } else { + dataXRecord.addColumn(new DateColumn(record + .getTimestamp(columnNameValue))); + } + + break; + } + case BINARY: { + if (isPartitionColumn) { + dataXRecord.addColumn(new BytesColumn(columnNameValue.getBytes())); + } else { +// dataXRecord.addColumn(new BytesColumn(record +// .getBinary(columnNameValue).data())); + Binary binaryData = record.getBinary(columnNameValue); + if (null == binaryData) { + dataXRecord.addColumn(new BytesColumn(null)); + } else { + dataXRecord.addColumn(new BytesColumn(binaryData.data())); + } + } + break; + } + case ARRAY: { + if (isPartitionColumn) { + dataXRecord.addColumn(new StringColumn(columnNameValue)); + } else { + List arrayValue = record.getArray(columnNameValue); + if (arrayValue == null) { + dataXRecord.addColumn(new StringColumn(null)); + } else { + dataXRecord.addColumn(new StringColumn(JSON.toJSONString(transOdpsArrayToJavaList(arrayValue, (ArrayTypeInfo)typeInfo)))); + } + } + break; + } + case MAP: { + if (isPartitionColumn) { + dataXRecord.addColumn(new StringColumn(columnNameValue)); + } else { + Map mapValue = record.getMap(columnNameValue); + if (mapValue == null) { + dataXRecord.addColumn(new StringColumn(null)); + } else { + dataXRecord.addColumn(new StringColumn(JSON.toJSONString(transOdpsMapToJavaMap(mapValue, (MapTypeInfo)typeInfo)))); + } + } + break; + } + case STRUCT: { + if (isPartitionColumn) { + dataXRecord.addColumn(new StringColumn(columnNameValue)); + } else { + Struct structValue = record.getStruct(columnNameValue); + if (structValue == null) { + dataXRecord.addColumn(new StringColumn(null)); + } else { + dataXRecord.addColumn(new StringColumn(JSON.toJSONString(transOdpsStructToJavaMap(structValue)))); + } + } + break; + } + default: + throw DataXException.asDataXException( + OdpsReaderErrorCode.ILLEGAL_VALUE, + MESSAGE_SOURCE.message("readerproxy.5", type)); } } + + private List transOdpsArrayToJavaList(List odpsArray, ArrayTypeInfo typeInfo) { + TypeInfo eleType = typeInfo.getElementTypeInfo(); + List result = new ArrayList(); + switch (eleType.getOdpsType()) { + // warn:array [1.2, 3.4] 被转为了:"["1.2", "3.4"]", 本来应该被转换成 "[1.2, 3.4]" + // 注意回归Case覆盖 + case BIGINT: + case DOUBLE: + case INT: + case FLOAT: + case DECIMAL: + case TINYINT: + case SMALLINT: + for (Object item : odpsArray) { + Object object = item; + result.add(object == null ? NULL_INDICATOR : object); + } + return result; + case BOOLEAN: // 未调整array 问题 + case STRING: + case VARCHAR: + case CHAR: + case TIMESTAMP: + case DATE: + for (Object item : odpsArray) { + Object object = item; + result.add(object == null ? NULL_INDICATOR : object.toString()); + } + return result; + /** + * 日期类型 + */ + case DATETIME: + for (Object item : odpsArray) { + Date dateVal = (Date) item; + result.add(dateVal == null ? NULL_INDICATOR : dateFormat.format(dateVal)); + } + return result; + /** + * 字节数组 + */ + case BINARY: + for (Object item : odpsArray) { + Binary binaryVal = (Binary) item; + result.add(binaryVal == null ? NULL_INDICATOR : + Base64.encodeBase64(binaryVal.data())); + } + return result; + /** + * 日期间隔 + */ + case INTERVAL_DAY_TIME: + for (Object item : odpsArray) { + IntervalDayTime dayTimeVal = (IntervalDayTime) item; + result.add(dayTimeVal == null ? NULL_INDICATOR : + transIntervalDayTimeToJavaMap(dayTimeVal)); + } + return result; + /** + * 年份间隔 + */ + case INTERVAL_YEAR_MONTH: + for (Object item : odpsArray) { + IntervalYearMonth yearMonthVal = (IntervalYearMonth) item; + result.add(yearMonthVal == null ? NULL_INDICATOR : + transIntervalYearMonthToJavaMap(yearMonthVal)); + } + return result; + /** + * 结构体 + */ + case STRUCT: + for (Object item : odpsArray) { + Struct structVal = (Struct) item; + result.add(structVal == null ? NULL_INDICATOR : + transOdpsStructToJavaMap(structVal)); + } + return result; + /** + * MAP类型 + */ + case MAP: + for (Object item : odpsArray) { + Map mapVal = (Map) item; + result.add(mapVal == null ? NULL_INDICATOR : + transOdpsMapToJavaMap(mapVal, (MapTypeInfo) eleType)); + } + return result; + /** + * ARRAY类型 + */ + case ARRAY: + for (Object item : odpsArray) { + List arrayVal = (List) item; + result.add(arrayVal == null ? NULL_INDICATOR : + transOdpsArrayToJavaList(arrayVal, (ArrayTypeInfo) eleType)); + } + return result; + default: + throw new IllegalArgumentException("decode record failed. column type: " + eleType.getTypeName()); + } + } + private Map transOdpsMapToJavaMap(Map odpsMap, MapTypeInfo typeInfo) { + TypeInfo keyType = typeInfo.getKeyTypeInfo(); + TypeInfo valueType = typeInfo.getValueTypeInfo(); + Map result = new HashMap(); + Set entrySet = null; + switch (valueType.getOdpsType()) { + case BIGINT: + case DOUBLE: + case BOOLEAN: + case STRING: + case DECIMAL: + case TINYINT: + case SMALLINT: + case INT: + case FLOAT: + case CHAR: + case VARCHAR: + case DATE: + case TIMESTAMP: + switch (keyType.getOdpsType()) { + case DATETIME: + entrySet = odpsMap.entrySet(); + for (Map.Entry item : entrySet) { + Object value = item.getValue(); + result.put(dateFormat.format((Date)item.getKey()), value == null ? NULL_INDICATOR : value.toString()); + } + return result; + case BINARY: + entrySet = odpsMap.entrySet(); + for (Map.Entry item : entrySet) { + Object value = item.getValue(); + result.put(Base64.encodeBase64(((Binary)item.getKey()).data()), + value == null ? NULL_INDICATOR : value.toString()); + } + return result; + default: + entrySet = odpsMap.entrySet(); + for (Map.Entry item : entrySet) { + Object value = item.getValue(); + result.put(item.getKey(), value == null ? NULL_INDICATOR : value.toString()); + } + return result; + } + /** + * 日期类型 + */ + case DATETIME: + switch (keyType.getOdpsType()) { + case DATETIME: + entrySet = odpsMap.entrySet(); + for (Map.Entry item : entrySet) { + Date dateVal = (Date) item.getValue(); + result.put(dateFormat.format((Date)item.getKey()), + dateVal == null ? NULL_INDICATOR : dateFormat.format(dateVal)); + } + return result; + case BINARY: + entrySet = odpsMap.entrySet(); + for (Map.Entry item : entrySet) { + Date dateVal = (Date) item.getValue(); + result.put(Base64.encodeBase64(((Binary)item.getKey()).data()), + dateVal == null ? NULL_INDICATOR : dateFormat.format(dateVal)); + } + return result; + default: + entrySet = odpsMap.entrySet(); + for (Map.Entry item : entrySet) { + Date dateVal = (Date) item.getValue(); + result.put(item.getKey(), dateVal == null ? NULL_INDICATOR : dateFormat.format(dateVal)); + } + return result; + } + /** + * 字节数组 + */ + case BINARY: + switch (keyType.getOdpsType()) { + case DATETIME: + entrySet = odpsMap.entrySet(); + for (Map.Entry item : entrySet) { + Binary binaryVal = (Binary) item.getValue(); + result.put(dateFormat.format((Date)item.getKey()), binaryVal == null ? NULL_INDICATOR : + Base64.encodeBase64(binaryVal.data())); + } + return result; + case BINARY: + entrySet = odpsMap.entrySet(); + for (Map.Entry item : entrySet) { + Binary binaryVal = (Binary) item.getValue(); + result.put(Base64.encodeBase64(((Binary)item.getKey()).data()), + binaryVal == null ? NULL_INDICATOR : + Base64.encodeBase64(binaryVal.data())); + } + return result; + default: + entrySet = odpsMap.entrySet(); + for (Map.Entry item : entrySet) { + Binary binaryVal = (Binary) item.getValue(); + result.put(item.getKey(), binaryVal == null ? NULL_INDICATOR : + Base64.encodeBase64(binaryVal.data())); + } + return result; + } + /** + * 日期间隔 + */ + case INTERVAL_DAY_TIME: + switch (keyType.getOdpsType()) { + case DATETIME: + entrySet = odpsMap.entrySet(); + for (Map.Entry item : entrySet) { + IntervalDayTime dayTimeVal = (IntervalDayTime) item.getValue(); + result.put(dateFormat.format((Date)item.getKey()), dayTimeVal == null ? NULL_INDICATOR : + transIntervalDayTimeToJavaMap(dayTimeVal)); + } + return result; + case BINARY: + entrySet = odpsMap.entrySet(); + for (Map.Entry item : entrySet) { + IntervalDayTime dayTimeVal = (IntervalDayTime) item.getValue(); + result.put(Base64.encodeBase64(((Binary)item.getKey()).data()), + dayTimeVal == null ? NULL_INDICATOR : + transIntervalDayTimeToJavaMap(dayTimeVal)); + } + return result; + default: + entrySet = odpsMap.entrySet(); + for (Map.Entry item : entrySet) { + IntervalDayTime dayTimeVal = (IntervalDayTime) item.getValue(); + result.put(item.getKey(), dayTimeVal == null ? NULL_INDICATOR : + transIntervalDayTimeToJavaMap(dayTimeVal)); + } + return result; + } + /** + * 年份间隔 + */ + case INTERVAL_YEAR_MONTH: + switch (keyType.getOdpsType()) { + case DATETIME: + entrySet = odpsMap.entrySet(); + for (Map.Entry item : entrySet) { + IntervalYearMonth yearMonthVal = (IntervalYearMonth) item.getValue(); + result.put(dateFormat.format((Date)item.getKey()), yearMonthVal == null ? NULL_INDICATOR : + transIntervalYearMonthToJavaMap(yearMonthVal)); + } + return result; + case BINARY: + entrySet = odpsMap.entrySet(); + for (Map.Entry item : entrySet) { + IntervalYearMonth yearMonthVal = (IntervalYearMonth) item.getValue(); + result.put(Base64.encodeBase64(((Binary)item.getKey()).data()), + yearMonthVal == null ? NULL_INDICATOR : + transIntervalYearMonthToJavaMap(yearMonthVal)); + } + return result; + default: + entrySet = odpsMap.entrySet(); + for (Map.Entry item : entrySet) { + IntervalYearMonth yearMonthVal = (IntervalYearMonth) item.getValue(); + result.put(item.getKey(), yearMonthVal == null ? NULL_INDICATOR : + transIntervalYearMonthToJavaMap(yearMonthVal)); + } + return result; + } + /** + * 结构体 + */ + case STRUCT: + switch (keyType.getOdpsType()) { + case DATETIME: + entrySet = odpsMap.entrySet(); + for (Map.Entry item : entrySet) { + Struct structVal = (Struct) item.getValue(); + result.put(dateFormat.format((Date)item.getKey()), structVal == null ? NULL_INDICATOR : + transOdpsStructToJavaMap(structVal)); + } + return result; + case BINARY: + entrySet = odpsMap.entrySet(); + for (Map.Entry item : entrySet) { + Struct structVal = (Struct) item.getValue(); + result.put(Base64.encodeBase64(((Binary)item.getKey()).data()), + structVal == null ? NULL_INDICATOR : + transOdpsStructToJavaMap(structVal)); + } + return result; + default: + entrySet = odpsMap.entrySet(); + for (Map.Entry item : entrySet) { + Struct structVal = (Struct) item.getValue(); + result.put(item.getKey(), structVal == null ? NULL_INDICATOR : + transOdpsStructToJavaMap(structVal)); + } + return result; + } + /** + * MAP类型 + */ + case MAP: + switch (keyType.getOdpsType()) { + case DATETIME: + entrySet = odpsMap.entrySet(); + for (Map.Entry item : entrySet) { + Map mapVal = (Map) item.getValue(); + result.put(dateFormat.format((Date)item.getKey()),mapVal == null ? NULL_INDICATOR : + transOdpsMapToJavaMap(mapVal, (MapTypeInfo) valueType)); + } + return result; + case BINARY: + entrySet = odpsMap.entrySet(); + for (Map.Entry item : entrySet) { + Map mapVal = (Map) item.getValue(); + result.put(Base64.encodeBase64(((Binary)item.getKey()).data()), + mapVal == null ? NULL_INDICATOR : transOdpsMapToJavaMap(mapVal, (MapTypeInfo) valueType)); + } + return result; + default: + entrySet = odpsMap.entrySet(); + for (Map.Entry item : entrySet) { + Map mapVal = (Map) item.getValue(); + result.put(item.getKey(), mapVal == null ? NULL_INDICATOR : + transOdpsMapToJavaMap(mapVal, (MapTypeInfo) valueType)); + } + return result; + } + /** + * ARRAY类型 + */ + case ARRAY: + switch (keyType.getOdpsType()) { + case DATETIME: + entrySet = odpsMap.entrySet(); + for (Map.Entry item : entrySet) { + List arrayVal = (List) item.getValue(); + result.put(dateFormat.format((Date)item.getKey()),arrayVal == null ? NULL_INDICATOR : + transOdpsArrayToJavaList(arrayVal, (ArrayTypeInfo) valueType)); + } + return result; + case BINARY: + entrySet = odpsMap.entrySet(); + for (Map.Entry item : entrySet) { + List arrayVal = (List) item.getValue(); + result.put(Base64.encodeBase64(((Binary)item.getKey()).data()), + arrayVal == null ? NULL_INDICATOR : transOdpsArrayToJavaList(arrayVal, (ArrayTypeInfo) valueType)); + } + return result; + default: + entrySet = odpsMap.entrySet(); + for (Map.Entry item : entrySet) { + List arrayVal = (List) item.getValue(); + result.put(item.getKey(), arrayVal == null ? NULL_INDICATOR : + transOdpsArrayToJavaList(arrayVal, (ArrayTypeInfo) valueType)); + } + return result; + } + default: + throw new IllegalArgumentException("decode record failed. column type: " + valueType.getTypeName()); + } + } + + private Map transIntervalDayTimeToJavaMap(IntervalDayTime dayTime) { + Map result = new HashMap(); + result.put("totalSeconds", dayTime.getTotalSeconds()); + result.put("nanos", (long)dayTime.getNanos()); + return result; + } + + private Map transOdpsStructToJavaMap(Struct odpsStruct) { + Map result = new HashMap(); + for (int i = 0; i < odpsStruct.getFieldCount(); i++) { + String fieldName = odpsStruct.getFieldName(i); + Object fieldValue = odpsStruct.getFieldValue(i); + TypeInfo fieldType = odpsStruct.getFieldTypeInfo(i); + switch (fieldType.getOdpsType()) { + case BIGINT: + case DOUBLE: + case BOOLEAN: + case STRING: + case DECIMAL: + case TINYINT: + case SMALLINT: + case INT: + case FLOAT: + case VARCHAR: + case CHAR: + case TIMESTAMP: + case DATE: + result.put(fieldName, fieldValue == null ? NULL_INDICATOR : fieldValue.toString()); + break; + /** + * 日期类型 + */ + case DATETIME: + Date dateVal = (Date) fieldValue; + result.put(fieldName, dateVal == null ? NULL_INDICATOR : dateFormat.format(dateVal)); + break; + /** + * 字节数组 + */ + case BINARY: + Binary binaryVal = (Binary) fieldValue; + result.put(fieldName, binaryVal == null ? NULL_INDICATOR : + Base64.encodeBase64(binaryVal.data())); + break; + /** + * 日期间隔 + */ + case INTERVAL_DAY_TIME: + IntervalDayTime dayTimeVal = (IntervalDayTime) fieldValue; + result.put(fieldName, dayTimeVal == null ? NULL_INDICATOR : + transIntervalDayTimeToJavaMap(dayTimeVal)); + break; + /** + * 年份间隔 + */ + case INTERVAL_YEAR_MONTH: + IntervalYearMonth yearMonthVal = (IntervalYearMonth) fieldValue; + result.put(fieldName, yearMonthVal == null ? NULL_INDICATOR : + transIntervalYearMonthToJavaMap(yearMonthVal)); + break; + /** + * 结构体 + */ + case STRUCT: + Struct structVal = (Struct) fieldValue; + result.put(fieldName, structVal == null ? NULL_INDICATOR : + transOdpsStructToJavaMap(structVal)); + break; + /** + * MAP类型 + */ + case MAP: + Map mapVal = (Map) fieldValue; + result.put(fieldName, mapVal == null ? NULL_INDICATOR : + transOdpsMapToJavaMap(mapVal, (MapTypeInfo) fieldType)); + break; + /** + * ARRAY类型 + */ + case ARRAY: + List arrayVal = (List) fieldValue; + result.put(fieldName, arrayVal == null ? NULL_INDICATOR : + transOdpsArrayToJavaList(arrayVal, (ArrayTypeInfo) fieldType)); + break; + default: + throw new IllegalArgumentException("decode record failed. column type: " + fieldType.getTypeName()); + } + } + + return result; + } + + private Map transIntervalYearMonthToJavaMap(IntervalYearMonth yearMonth) { + Map result = new HashMap(); + result.put("years", yearMonth.getYears()); + result.put("months", yearMonth.getMonths()); + return result; + } + } diff --git a/odpsreader/src/main/java/com/alibaba/datax/plugin/reader/odpsreader/util/IdAndKeyUtil.java b/odpsreader/src/main/java/com/alibaba/datax/plugin/reader/odpsreader/util/IdAndKeyUtil.java deleted file mode 100644 index faa90a98..00000000 --- a/odpsreader/src/main/java/com/alibaba/datax/plugin/reader/odpsreader/util/IdAndKeyUtil.java +++ /dev/null @@ -1,85 +0,0 @@ -/** - * (C) 2010-2014 Alibaba Group Holding Limited. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package com.alibaba.datax.plugin.reader.odpsreader.util; - -import com.alibaba.datax.common.exception.DataXException; -import com.alibaba.datax.common.util.Configuration; -import com.alibaba.datax.plugin.reader.odpsreader.Constant; -import com.alibaba.datax.plugin.reader.odpsreader.Key; -import com.alibaba.datax.plugin.reader.odpsreader.OdpsReaderErrorCode; -import org.apache.commons.lang3.StringUtils; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import java.util.Map; - -public class IdAndKeyUtil { - private static Logger LOG = LoggerFactory.getLogger(IdAndKeyUtil.class); - - public static Configuration parseAccessIdAndKey(Configuration originalConfig) { - String accessId = originalConfig.getString(Key.ACCESS_ID); - String accessKey = originalConfig.getString(Key.ACCESS_KEY); - - // 只要 accessId,accessKey 二者配置了一个,就理解为是用户本意是要直接手动配置其 accessid/accessKey - if (StringUtils.isNotBlank(accessId) || StringUtils.isNotBlank(accessKey)) { - LOG.info("Try to get accessId/accessKey from your config."); - //通过如下语句,进行检查是否确实配置了 - accessId = originalConfig.getNecessaryValue(Key.ACCESS_ID, OdpsReaderErrorCode.REQUIRED_VALUE); - accessKey = originalConfig.getNecessaryValue(Key.ACCESS_KEY, OdpsReaderErrorCode.REQUIRED_VALUE); - //检查完毕,返回即可 - return originalConfig; - } else { - Map envProp = System.getenv(); - return getAccessIdAndKeyFromEnv(originalConfig, envProp); - } - } - - private static Configuration getAccessIdAndKeyFromEnv(Configuration originalConfig, - Map envProp) { - String accessId = null; - String accessKey = null; - - String skynetAccessID = envProp.get(Constant.SKYNET_ACCESSID); - String skynetAccessKey = envProp.get(Constant.SKYNET_ACCESSKEY); - - if (StringUtils.isNotBlank(skynetAccessID) - || StringUtils.isNotBlank(skynetAccessKey)) { - /** - * 环境变量中,如果存在SKYNET_ACCESSID/SKYNET_ACCESSKEy(只要有其中一个变量,则认为一定是两个都存在的!), - * 则使用其值作为odps的accessId/accessKey(会解密) - */ - - LOG.info("Try to get accessId/accessKey from environment."); - accessId = skynetAccessID; - accessKey = DESCipher.decrypt(skynetAccessKey); - if (StringUtils.isNotBlank(accessKey)) { - originalConfig.set(Key.ACCESS_ID, accessId); - originalConfig.set(Key.ACCESS_KEY, accessKey); - LOG.info("Get accessId/accessKey from environment variables successfully."); - } else { - throw DataXException.asDataXException(OdpsReaderErrorCode.GET_ID_KEY_FAIL, - String.format("从环境变量中获取accessId/accessKey 失败, accessId=[%s]", accessId)); - } - } else { - // 无处获取(既没有配置在作业中,也没用在环境变量中) - throw DataXException.asDataXException(OdpsReaderErrorCode.GET_ID_KEY_FAIL, - "无法获取到accessId/accessKey. 它们既不存在于您的配置中,也不存在于环境变量中."); - } - - return originalConfig; - } -} diff --git a/odpsreader/src/main/java/com/alibaba/datax/plugin/reader/odpsreader/util/LocalStrings.properties b/odpsreader/src/main/java/com/alibaba/datax/plugin/reader/odpsreader/util/LocalStrings.properties new file mode 100644 index 00000000..897ce232 --- /dev/null +++ b/odpsreader/src/main/java/com/alibaba/datax/plugin/reader/odpsreader/util/LocalStrings.properties @@ -0,0 +1,25 @@ +descipher.1=\u957F\u5EA6\u4E0D\u662F\u5076\u6570 + +idandkeyutil.1=\u4ECE\u73AF\u5883\u53D8\u91CF\u4E2D\u83B7\u53D6accessId/accessKey \u5931\u8D25, accessId=[{0}] +idandkeyutil.2=\u65E0\u6CD5\u83B7\u53D6\u5230accessId/accessKey. \u5B83\u4EEC\u65E2\u4E0D\u5B58\u5728\u4E8E\u60A8\u7684\u914D\u7F6E\u4E2D\uFF0C\u4E5F\u4E0D\u5B58\u5728\u4E8E\u73AF\u5883\u53D8\u91CF\u4E2D. + + +odpssplitutil.1=\u60A8\u6240\u914D\u7F6E\u7684\u5206\u533A\u4E0D\u80FD\u4E3A\u7A7A\u767D. +odpssplitutil.2=\u5207\u5206\u7684 recordCount \u4E0D\u80FD\u4E3A\u8D1F\u6570.recordCount={0} +odpssplitutil.3=\u5207\u5206\u7684 adviceNum \u4E0D\u80FD\u4E3A\u8D1F\u6570.adviceNum={0} +odpssplitutil.4=\u6CE8\u610F: \u7531\u4E8E\u60A8\u914D\u7F6E\u4E86successOnNoPartition\u503C\u4E3Atrue (\u5373\u5F53\u5206\u533A\u503C\u4E0D\u5B58\u5728\u65F6, \u540C\u6B65\u4EFB\u52A1\u4E0D\u62A5\u9519), \u60A8\u8BBE\u7F6E\u7684\u5206\u533A\u65E0\u6CD5\u5339\u914D\u5230ODPS\u8868\u4E2D\u5BF9\u5E94\u7684\u5206\u533A, \u540C\u6B65\u4EFB\u52A1\u7EE7\u7EED... + +odpsutil.1=datax\u83B7\u53D6\u4E0D\u5230\u6E90\u8868\u7684\u5217\u4FE1\u606F\uFF0C \u7531\u4E8E\u60A8\u672A\u914D\u7F6E\u8BFB\u53D6\u6E90\u5934\u8868\u7684\u5217\u4FE1\u606F. datax\u65E0\u6CD5\u77E5\u9053\u8BE5\u62BD\u53D6\u8868\u7684\u54EA\u4E9B\u5B57\u6BB5\u7684\u6570\u636E\uFF0C \u6B63\u786E\u7684\u914D\u7F6E\u65B9\u5F0F\u662F\u7ED9 column \u914D\u7F6E\u4E0A\u60A8\u9700\u8981\u8BFB\u53D6\u7684\u5217\u540D\u79F0,\u7528\u82F1\u6587\u9017\u53F7\u5206\u9694. +odpsutil.2=\u60A8\u6240\u914D\u7F6E\u7684maxRetryTime \u503C\u9519\u8BEF. \u8BE5\u503C\u4E0D\u80FD\u5C0F\u4E8E1, \u4E14\u4E0D\u80FD\u5927\u4E8E {0}. \u63A8\u8350\u7684\u914D\u7F6E\u65B9\u5F0F\u662F\u7ED9maxRetryTime \u914D\u7F6E1-11\u4E4B\u95F4\u7684\u67D0\u4E2A\u503C. \u8BF7\u60A8\u68C0\u67E5\u914D\u7F6E\u5E76\u505A\u51FA\u76F8\u5E94\u4FEE\u6539. +odpsutil.3=\u4E0D\u652F\u6301\u7684\u8D26\u53F7\u7C7B\u578B:[{0}]. \u8D26\u53F7\u7C7B\u578B\u76EE\u524D\u4EC5\u652F\u6301aliyun, taobao. +odpsutil.4=\u60A8\u6240\u914D\u7F6E\u7684\u5206\u533A\u4E0D\u80FD\u4E3A\u7A7A\u767D. +odpsutil.5=\u6E90\u5934\u8868\u7684\u5217\u914D\u7F6E\u9519\u8BEF. \u60A8\u6240\u914D\u7F6E\u7684\u5217 [{0}] \u4E0D\u5B58\u5728. +odpsutil.6=open RecordReader\u5931\u8D25. \u8BF7\u8054\u7CFB ODPS \u7BA1\u7406\u5458\u5904\u7406. +odpsutil.7=\u52A0\u8F7D ODPS \u6E90\u5934\u8868:{0} \u5931\u8D25. \u8BF7\u68C0\u67E5\u60A8\u914D\u7F6E\u7684 ODPS \u6E90\u5934\u8868\u7684 [project] \u662F\u5426\u6B63\u786E. +odpsutil.8=\u52A0\u8F7D ODPS \u6E90\u5934\u8868:{0} \u5931\u8D25. \u8BF7\u68C0\u67E5\u60A8\u914D\u7F6E\u7684 ODPS \u6E90\u5934\u8868\u7684 [table] \u662F\u5426\u6B63\u786E. +odpsutil.9=\u52A0\u8F7D ODPS \u6E90\u5934\u8868:{0} \u5931\u8D25. \u8BF7\u68C0\u67E5\u60A8\u914D\u7F6E\u7684 ODPS \u6E90\u5934\u8868\u7684 [accessId] [accessKey]\u662F\u5426\u6B63\u786E. +odpsutil.10=\u52A0\u8F7D ODPS \u6E90\u5934\u8868:{0} \u5931\u8D25. \u8BF7\u68C0\u67E5\u60A8\u914D\u7F6E\u7684 ODPS \u6E90\u5934\u8868\u7684 [accessKey] \u662F\u5426\u6B63\u786E. +odpsutil.11=\u52A0\u8F7D ODPS \u6E90\u5934\u8868:{0} \u5931\u8D25. \u8BF7\u68C0\u67E5\u60A8\u914D\u7F6E\u7684 ODPS \u6E90\u5934\u8868\u7684 [accessId] [accessKey] [project]\u662F\u5426\u5339\u914D. +odpsutil.12=\u52A0\u8F7D ODPS \u6E90\u5934\u8868:{0} \u5931\u8D25. \u8BF7\u68C0\u67E5\u60A8\u914D\u7F6E\u7684 ODPS \u6E90\u5934\u8868\u7684 project,table,accessId,accessKey,odpsServer\u7B49\u503C. +odpsutil.13=\u6267\u884C ODPS SQL\u5931\u8D25, \u8FD4\u56DE\u503C\u4E3A:{0}. \u8BF7\u4ED4\u7EC6\u68C0\u67E5ODPS SQL\u662F\u5426\u6B63\u786E, \u5982\u679C\u68C0\u67E5\u65E0\u8BEF, \u8BF7\u8054\u7CFB ODPS \u503C\u73ED\u540C\u5B66\u5904\u7406. SQL \u5185\u5BB9\u4E3A:[\n{1}\n]. +odpsutil.14=\u6267\u884C ODPS SQL \u65F6\u629B\u51FA\u5F02\u5E38, \u8BF7\u4ED4\u7EC6\u68C0\u67E5ODPS SQL\u662F\u5426\u6B63\u786E, \u5982\u679C\u68C0\u67E5\u65E0\u8BEF, \u8BF7\u8054\u7CFB ODPS \u503C\u73ED\u540C\u5B66\u5904\u7406. SQL \u5185\u5BB9\u4E3A:[\n{0}\n]. \ No newline at end of file diff --git a/odpsreader/src/main/java/com/alibaba/datax/plugin/reader/odpsreader/util/OdpsSplitUtil.java b/odpsreader/src/main/java/com/alibaba/datax/plugin/reader/odpsreader/util/OdpsSplitUtil.java index b7f4f1aa..2030033d 100755 --- a/odpsreader/src/main/java/com/alibaba/datax/plugin/reader/odpsreader/util/OdpsSplitUtil.java +++ b/odpsreader/src/main/java/com/alibaba/datax/plugin/reader/odpsreader/util/OdpsSplitUtil.java @@ -2,19 +2,26 @@ package com.alibaba.datax.plugin.reader.odpsreader.util; import com.alibaba.datax.common.exception.DataXException; import com.alibaba.datax.common.util.Configuration; +import com.alibaba.datax.common.util.MessageSource; import com.alibaba.datax.common.util.RangeSplitUtil; import com.alibaba.datax.plugin.reader.odpsreader.Constant; import com.alibaba.datax.plugin.reader.odpsreader.Key; import com.alibaba.datax.plugin.reader.odpsreader.OdpsReaderErrorCode; import com.aliyun.odps.Odps; import com.aliyun.odps.tunnel.TableTunnel.DownloadSession; + import org.apache.commons.lang3.tuple.ImmutablePair; import org.apache.commons.lang3.tuple.Pair; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import java.util.ArrayList; import java.util.List; public final class OdpsSplitUtil { + private static final Logger LOG = LoggerFactory.getLogger(OdpsSplitUtil.class); + + private static final MessageSource MESSAGE_SOURCE = MessageSource.loadResourceBundle(OdpsSplitUtil.class); public static List doSplit(Configuration originalConfig, Odps odps, int adviceNum) { @@ -36,9 +43,17 @@ public final class OdpsSplitUtil { List partitions = originalConfig.getList(Key.PARTITION, String.class); + if ((null == partitions || partitions.isEmpty()) && originalConfig.getBool(Key.SUCCESS_ON_NO_PATITION, false)) { + Configuration tempConfig = originalConfig.clone(); + tempConfig.set(Key.PARTITION, null); + splittedConfigs.add(tempConfig); + LOG.warn(MESSAGE_SOURCE.message("odpssplitutil.4")); + return splittedConfigs; + } + if (null == partitions || partitions.isEmpty()) { throw DataXException.asDataXException(OdpsReaderErrorCode.ILLEGAL_VALUE, - "您所配置的分区不能为空白."); + MESSAGE_SOURCE.message("odpssplitutil.1")); } //splitMode 默认为 record @@ -141,11 +156,11 @@ public final class OdpsSplitUtil { */ private static List> splitRecordCount(long recordCount, int adviceNum) { if(recordCount<0){ - throw new IllegalArgumentException("切分的 recordCount 不能为负数.recordCount=" + recordCount); + throw new IllegalArgumentException(MESSAGE_SOURCE.message("odpssplitutil.2", recordCount)); } if(adviceNum<1){ - throw new IllegalArgumentException("切分的 adviceNum 不能为负数.adviceNum=" + adviceNum); + throw new IllegalArgumentException(MESSAGE_SOURCE.message("odpssplitutil.3", adviceNum)); } List> result = new ArrayList>(); diff --git a/odpsreader/src/main/java/com/alibaba/datax/plugin/reader/odpsreader/util/OdpsUtil.java b/odpsreader/src/main/java/com/alibaba/datax/plugin/reader/odpsreader/util/OdpsUtil.java index 2aa3f66e..0ff34a81 100755 --- a/odpsreader/src/main/java/com/alibaba/datax/plugin/reader/odpsreader/util/OdpsUtil.java +++ b/odpsreader/src/main/java/com/alibaba/datax/plugin/reader/odpsreader/util/OdpsUtil.java @@ -2,16 +2,23 @@ package com.alibaba.datax.plugin.reader.odpsreader.util; import com.alibaba.datax.common.exception.DataXException; import com.alibaba.datax.common.util.Configuration; +import com.alibaba.datax.common.util.DataXCaseEnvUtil; +import com.alibaba.datax.common.util.MessageSource; import com.alibaba.datax.common.util.RetryUtil; import com.alibaba.datax.plugin.reader.odpsreader.ColumnType; import com.alibaba.datax.plugin.reader.odpsreader.Constant; +import com.alibaba.datax.plugin.reader.odpsreader.InternalColumnInfo; import com.alibaba.datax.plugin.reader.odpsreader.Key; import com.alibaba.datax.plugin.reader.odpsreader.OdpsReaderErrorCode; import com.aliyun.odps.*; +import com.aliyun.odps.Column; import com.aliyun.odps.account.Account; import com.aliyun.odps.account.AliyunAccount; +import com.aliyun.odps.account.StsAccount; import com.aliyun.odps.data.RecordReader; +import com.aliyun.odps.task.SQLTask; import com.aliyun.odps.tunnel.TableTunnel; +import com.aliyun.odps.type.TypeInfo; import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.tuple.MutablePair; @@ -19,13 +26,12 @@ import org.apache.commons.lang3.tuple.Pair; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import java.util.ArrayList; -import java.util.Collections; -import java.util.List; +import java.util.*; import java.util.concurrent.Callable; public final class OdpsUtil { private static final Logger LOG = LoggerFactory.getLogger(OdpsUtil.class); + private static final MessageSource MESSAGE_SOURCE = MessageSource.loadResourceBundle(OdpsUtil.class); public static int MAX_RETRY_TIME = 10; @@ -37,8 +43,8 @@ public final class OdpsUtil { if (null == originalConfig.getList(Key.COLUMN) || originalConfig.getList(Key.COLUMN, String.class).isEmpty()) { - throw DataXException.asDataXException(OdpsReaderErrorCode.REQUIRED_VALUE, "datax获取不到源表的列信息, 由于您未配置读取源头表的列信息. datax无法知道该抽取表的哪些字段的数据 " + - "正确的配置方式是给 column 配置上您需要读取的列名称,用英文逗号分隔."); + throw DataXException.asDataXException(OdpsReaderErrorCode.REQUIRED_VALUE, + MESSAGE_SOURCE.message("odpsutil.1")); } } @@ -47,8 +53,8 @@ public final class OdpsUtil { int maxRetryTime = originalConfig.getInt(Key.MAX_RETRY_TIME, OdpsUtil.MAX_RETRY_TIME); if (maxRetryTime < 1 || maxRetryTime > OdpsUtil.MAX_RETRY_TIME) { - throw DataXException.asDataXException(OdpsReaderErrorCode.ILLEGAL_VALUE, "您所配置的maxRetryTime 值错误. 该值不能小于1, 且不能大于 " + OdpsUtil.MAX_RETRY_TIME + - ". 推荐的配置方式是给maxRetryTime 配置1-11之间的某个值. 请您检查配置并做出相应修改."); + throw DataXException.asDataXException(OdpsReaderErrorCode.ILLEGAL_VALUE, + MESSAGE_SOURCE.message("odpsutil.2", OdpsUtil.MAX_RETRY_TIME)); } MAX_RETRY_TIME = maxRetryTime; } @@ -59,36 +65,35 @@ public final class OdpsUtil { String accessId = originalConfig.getString(Key.ACCESS_ID); String accessKey = originalConfig.getString(Key.ACCESS_KEY); String project = originalConfig.getString(Key.PROJECT); + String securityToken = originalConfig.getString(Key.SECURITY_TOKEN); String packageAuthorizedProject = originalConfig.getString(Key.PACKAGE_AUTHORIZED_PROJECT); String defaultProject; - if(StringUtils.isBlank(packageAuthorizedProject)) { + if (StringUtils.isBlank(packageAuthorizedProject)) { defaultProject = project; } else { defaultProject = packageAuthorizedProject; } - String accountType = originalConfig.getString(Key.ACCOUNT_TYPE, - Constant.DEFAULT_ACCOUNT_TYPE); Account account = null; - if (accountType.equalsIgnoreCase(Constant.DEFAULT_ACCOUNT_TYPE)) { - account = new AliyunAccount(accessId, accessKey); + if (StringUtils.isNotBlank(securityToken)) { + account = new StsAccount(accessId, accessKey, securityToken); } else { - throw DataXException.asDataXException(OdpsReaderErrorCode.ACCOUNT_TYPE_ERROR, - String.format("不支持的账号类型:[%s]. 账号类型目前仅支持aliyun, taobao.", accountType)); + account = new AliyunAccount(accessId, accessKey); } Odps odps = new Odps(account); boolean isPreCheck = originalConfig.getBool("dryRun", false); - if(isPreCheck) { + if (isPreCheck) { odps.getRestClient().setConnectTimeout(3); odps.getRestClient().setReadTimeout(3); odps.getRestClient().setRetryTimes(2); } odps.setDefaultProject(defaultProject); odps.setEndpoint(odpsServer); + odps.setUserAgent("DATAX"); return odps; } @@ -103,7 +108,7 @@ public final class OdpsUtil { table.reload(); return table; } - }, 3, 1000, false); + }, DataXCaseEnvUtil.getRetryTimes(3), DataXCaseEnvUtil.getRetryInterval(1000), DataXCaseEnvUtil.getRetryExponential(false)); } catch (Exception e) { throwDataXExceptionWhenReloadTable(e, tableName); } @@ -154,7 +159,7 @@ public final class OdpsUtil { public static String formatPartition(String partition) { if (StringUtils.isBlank(partition)) { throw DataXException.asDataXException(OdpsReaderErrorCode.ILLEGAL_VALUE, - "您所配置的分区不能为空白."); + MESSAGE_SOURCE.message("odpsutil.4")); } else { return partition.trim().replaceAll(" *= *", "=") .replaceAll(" */ *", ",").replaceAll(" *, *", ",") @@ -175,19 +180,47 @@ public final class OdpsUtil { } } - public static List> parseColumns( + /** + * 将用户配置的分区分类成两类: + * (1) 包含 HINT 的区间过滤; + * (2) 不包含 HINT 的普通模式 + * @param userConfiguredPartitions + * @return + */ + public static UserConfiguredPartitionClassification classifyUserConfiguredPartitions(List userConfiguredPartitions){ + UserConfiguredPartitionClassification userConfiguredPartitionClassification = new UserConfiguredPartitionClassification(); + + List userConfiguredHintPartition = new ArrayList(); + List userConfiguredNormalPartition = new ArrayList(); + boolean isIncludeHintPartition = false; + for (String userConfiguredPartition : userConfiguredPartitions){ + if (StringUtils.isNotBlank(userConfiguredPartition)){ + if (userConfiguredPartition.trim().toLowerCase().startsWith(Constant.PARTITION_FILTER_HINT)) { + userConfiguredHintPartition.add(userConfiguredPartition.trim()); + isIncludeHintPartition = true; + }else { + userConfiguredNormalPartition.add(userConfiguredPartition.trim()); + } + } + } + userConfiguredPartitionClassification.setIncludeHintPartition(isIncludeHintPartition); + userConfiguredPartitionClassification.setUserConfiguredHintPartition(userConfiguredHintPartition); + userConfiguredPartitionClassification.setUserConfiguredNormalPartition(userConfiguredNormalPartition); + return userConfiguredPartitionClassification; + } + + public static List parseColumns( List allNormalColumns, List allPartitionColumns, List userConfiguredColumns) { - List> parsededColumns = new ArrayList>(); + List parsededColumns = new ArrayList(); // warn: upper & lower case for (String column : userConfiguredColumns) { - MutablePair pair = new MutablePair(); - + InternalColumnInfo pair = new InternalColumnInfo(); // if constant column if (OdpsUtil.checkIfConstantColumn(column)) { // remove first and last ' - pair.setLeft(column.substring(1, column.length() - 1)); - pair.setRight(ColumnType.CONSTANT); + pair.setColumnName(column.substring(1, column.length() - 1)); + pair.setColumnType(ColumnType.CONSTANT); parsededColumns.add(pair); continue; } @@ -196,8 +229,8 @@ public final class OdpsUtil { // repeated in partitioning columns int index = OdpsUtil.indexOfIgnoreCase(allNormalColumns, column); if (0 <= index) { - pair.setLeft(allNormalColumns.get(index)); - pair.setRight(ColumnType.NORMAL); + pair.setColumnName(allNormalColumns.get(index)); + pair.setColumnType(ColumnType.NORMAL); parsededColumns.add(pair); continue; } @@ -205,22 +238,22 @@ public final class OdpsUtil { // if partition column index = OdpsUtil.indexOfIgnoreCase(allPartitionColumns, column); if (0 <= index) { - pair.setLeft(allPartitionColumns.get(index)); - pair.setRight(ColumnType.PARTITION); + pair.setColumnName(allPartitionColumns.get(index)); + pair.setColumnType(ColumnType.PARTITION); parsededColumns.add(pair); continue; } // not exist column throw DataXException.asDataXException( OdpsReaderErrorCode.ILLEGAL_VALUE, - String.format("源头表的列配置错误. 您所配置的列 [%s] 不存在.", column)); + MESSAGE_SOURCE.message("odpsutil.5", column)); } return parsededColumns; } - + private static int indexOfIgnoreCase(List columnCollection, - String column) { + String column) { int index = -1; for (int i = 0; i < columnCollection.size(); i++) { if (columnCollection.get(i).equalsIgnoreCase(column)) { @@ -255,7 +288,7 @@ public final class OdpsUtil { return tunnel.createDownloadSession( projectName, tableName); } - }, MAX_RETRY_TIME, 1000, true); + }, DataXCaseEnvUtil.getRetryTimes(MAX_RETRY_TIME), DataXCaseEnvUtil.getRetryInterval(1000), DataXCaseEnvUtil.getRetryExponential(true)); } catch (Exception e) { throw DataXException.asDataXException(OdpsReaderErrorCode.CREATE_DOWNLOADSESSION_FAIL, e); } @@ -276,7 +309,7 @@ public final class OdpsUtil { return tunnel.getDownloadSession( projectName, tableName, sessionId); } - }, MAX_RETRY_TIME ,1000, true); + }, DataXCaseEnvUtil.getRetryTimes(MAX_RETRY_TIME), DataXCaseEnvUtil.getRetryInterval(1000), DataXCaseEnvUtil.getRetryExponential(true)); } catch (Exception e) { throw DataXException.asDataXException(OdpsReaderErrorCode.GET_DOWNLOADSESSION_FAIL, e); } @@ -299,7 +332,7 @@ public final class OdpsUtil { return tunnel.createDownloadSession( projectName, tableName, partitionSpec); } - }, MAX_RETRY_TIME, 1000, true); + }, DataXCaseEnvUtil.getRetryTimes(MAX_RETRY_TIME), DataXCaseEnvUtil.getRetryInterval(1000), DataXCaseEnvUtil.getRetryExponential(true)); } catch (Exception e) { throw DataXException.asDataXException(OdpsReaderErrorCode.CREATE_DOWNLOADSESSION_FAIL, e); } @@ -321,58 +354,152 @@ public final class OdpsUtil { return tunnel.getDownloadSession( projectName, tableName, partitionSpec, sessionId); } - }, MAX_RETRY_TIME, 1000, true); + }, DataXCaseEnvUtil.getRetryTimes(MAX_RETRY_TIME), DataXCaseEnvUtil.getRetryInterval(1000), DataXCaseEnvUtil.getRetryExponential(true)); } catch (Exception e) { throw DataXException.asDataXException(OdpsReaderErrorCode.GET_DOWNLOADSESSION_FAIL, e); } } - - + /** + * odpsreader采用的直接读取所有列的downloadSession + */ public static RecordReader getRecordReader(final TableTunnel.DownloadSession downloadSession, final long start, final long count, - final boolean isCompress) { + final boolean isCompress) { try { return RetryUtil.executeWithRetry(new Callable() { @Override public RecordReader call() throws Exception { return downloadSession.openRecordReader(start, count, isCompress); } - }, MAX_RETRY_TIME, 1000, true); + }, DataXCaseEnvUtil.getRetryTimes(MAX_RETRY_TIME), DataXCaseEnvUtil.getRetryInterval(1000), DataXCaseEnvUtil.getRetryExponential(true)); } catch (Exception e) { throw DataXException.asDataXException(OdpsReaderErrorCode.OPEN_RECORD_READER_FAILED, - "open RecordReader失败. 请联系 ODPS 管理员处理.", e); + MESSAGE_SOURCE.message("odpsutil.6"), e); } } + + /** + * odpsreader采用的指定读取某些列的downloadSession + */ + public static RecordReader getRecordReader(final TableTunnel.DownloadSession downloadSession, final long start, final long count, + final boolean isCompress, final List columns) { + try { + return RetryUtil.executeWithRetry(new Callable() { + @Override + public RecordReader call() throws Exception { + return downloadSession.openRecordReader(start, count, isCompress, columns); + } + }, DataXCaseEnvUtil.getRetryTimes(MAX_RETRY_TIME), DataXCaseEnvUtil.getRetryInterval(1000), DataXCaseEnvUtil.getRetryExponential(true)); + } catch (Exception e) { + throw DataXException.asDataXException(OdpsReaderErrorCode.OPEN_RECORD_READER_FAILED, + MESSAGE_SOURCE.message("odpsutil.6"), e); + } + } + + /** * table.reload() 方法抛出的 odps 异常 转化为更清晰的 datax 异常 抛出 */ public static void throwDataXExceptionWhenReloadTable(Exception e, String tableName) { - if(e.getMessage() != null) { - if(e.getMessage().contains(OdpsExceptionMsg.ODPS_PROJECT_NOT_FOUNT)) { + if (e.getMessage() != null) { + if (e.getMessage().contains(OdpsExceptionMsg.ODPS_PROJECT_NOT_FOUNT)) { throw DataXException.asDataXException(OdpsReaderErrorCode.ODPS_PROJECT_NOT_FOUNT, - String.format("加载 ODPS 源头表:%s 失败. " + - "请检查您配置的 ODPS 源头表的 [project] 是否正确.", tableName), e); - } else if(e.getMessage().contains(OdpsExceptionMsg.ODPS_TABLE_NOT_FOUNT)) { + MESSAGE_SOURCE.message("odpsutil.7", tableName), e); + } else if (e.getMessage().contains(OdpsExceptionMsg.ODPS_TABLE_NOT_FOUNT)) { throw DataXException.asDataXException(OdpsReaderErrorCode.ODPS_TABLE_NOT_FOUNT, - String.format("加载 ODPS 源头表:%s 失败. " + - "请检查您配置的 ODPS 源头表的 [table] 是否正确.", tableName), e); - } else if(e.getMessage().contains(OdpsExceptionMsg.ODPS_ACCESS_KEY_ID_NOT_FOUND)) { + MESSAGE_SOURCE.message("odpsutil.8", tableName), e); + } else if (e.getMessage().contains(OdpsExceptionMsg.ODPS_ACCESS_KEY_ID_NOT_FOUND)) { throw DataXException.asDataXException(OdpsReaderErrorCode.ODPS_ACCESS_KEY_ID_NOT_FOUND, - String.format("加载 ODPS 源头表:%s 失败. " + - "请检查您配置的 ODPS 源头表的 [accessId] [accessKey]是否正确.", tableName), e); - } else if(e.getMessage().contains(OdpsExceptionMsg.ODPS_ACCESS_KEY_INVALID)) { + MESSAGE_SOURCE.message("odpsutil.9", tableName), e); + } else if (e.getMessage().contains(OdpsExceptionMsg.ODPS_ACCESS_KEY_INVALID)) { throw DataXException.asDataXException(OdpsReaderErrorCode.ODPS_ACCESS_KEY_INVALID, - String.format("加载 ODPS 源头表:%s 失败. " + - "请检查您配置的 ODPS 源头表的 [accessKey] 是否正确.", tableName), e); - } else if(e.getMessage().contains(OdpsExceptionMsg.ODPS_ACCESS_DENY)) { + MESSAGE_SOURCE.message("odpsutil.10", tableName), e); + } else if (e.getMessage().contains(OdpsExceptionMsg.ODPS_ACCESS_DENY)) { throw DataXException.asDataXException(OdpsReaderErrorCode.ODPS_ACCESS_DENY, - String.format("加载 ODPS 源头表:%s 失败. " + - "请检查您配置的 ODPS 源头表的 [accessId] [accessKey] [project]是否匹配.", tableName), e); + MESSAGE_SOURCE.message("odpsutil.11", tableName), e); } } throw DataXException.asDataXException(OdpsReaderErrorCode.ILLEGAL_VALUE, - String.format("加载 ODPS 源头表:%s 失败. " + - "请检查您配置的 ODPS 源头表的 project,table,accessId,accessKey,odpsServer等值.", tableName), e); + MESSAGE_SOURCE.message("odpsutil.12", tableName), e); + } + + public static List getNormalColumns(List parsedColumns, + Map columnTypeMap) { + List userConfigNormalColumns = new ArrayList(); + Set columnNameSet = new HashSet(); + for (InternalColumnInfo columnInfo : parsedColumns) { + if (columnInfo.getColumnType() == ColumnType.NORMAL) { + String columnName = columnInfo.getColumnName(); + if (!columnNameSet.contains(columnName)) { + Column column = new Column(columnName, columnTypeMap.get(columnName)); + userConfigNormalColumns.add(column); + columnNameSet.add(columnName); + } + } + } + return userConfigNormalColumns; + } + + /** + * 执行odps preSql和postSql + * + * @param odps: odps client + * @param sql : 要执行的odps sql语句, 因为会有重试, 所以sql 必须为幂等的 + * @param tag : "preSql" or "postSql" + */ + public static void runSqlTaskWithRetry(final Odps odps, final String sql, final String tag){ + //重试次数 + int retryTimes = 10; + //重试间隔(ms) + long sleepTimeInMilliSecond = 1000L; + try { + RetryUtil.executeWithRetry(new Callable() { + @Override + public Void call() throws Exception { + long beginTime = System.currentTimeMillis(); + + runSqlTask(odps, sql, tag); + + long endIime = System.currentTimeMillis(); + LOG.info(String.format("exectue odps sql: %s finished, cost time : %s ms", + sql, (endIime - beginTime))); + return null; + } + }, DataXCaseEnvUtil.getRetryTimes(retryTimes), DataXCaseEnvUtil.getRetryInterval(sleepTimeInMilliSecond), DataXCaseEnvUtil.getRetryExponential(true)); + } catch (Exception e) { + String errMessage = String.format("Retry %s times to exectue sql :[%s] failed! Exception: %s", + retryTimes, e.getMessage()); + throw DataXException.asDataXException(OdpsReaderErrorCode.RUN_SQL_ODPS_EXCEPTION, errMessage, e); + } + } + + public static void runSqlTask(Odps odps, String sql, String tag) { + if (StringUtils.isBlank(sql)) { + return; + } + + String taskName = String.format("datax_odpsreader_%s_%s", tag, UUID.randomUUID().toString().replace('-', '_')); + + LOG.info("Try to start sqlTask:[{}] to run odps sql:[\n{}\n] .", taskName, sql); + + Instance instance; + Instance.TaskStatus status; + try { + Map hints = new HashMap(); + hints.put("odps.sql.submit.mode", "script"); + instance = SQLTask.run(odps, odps.getDefaultProject(), sql, taskName, hints, null); + instance.waitForSuccess(); + status = instance.getTaskStatus().get(taskName); + if (!Instance.TaskStatus.Status.SUCCESS.equals(status.getStatus())) { + throw DataXException.asDataXException(OdpsReaderErrorCode.RUN_SQL_FAILED, + MESSAGE_SOURCE.message("odpsutil.13", sql)); + } + } catch (DataXException e) { + throw e; + } catch (Exception e) { + throw DataXException.asDataXException(OdpsReaderErrorCode.RUN_SQL_ODPS_EXCEPTION, + MESSAGE_SOURCE.message("odpsutil.14", sql), e); + } } } diff --git a/odpsreader/src/main/java/com/alibaba/datax/plugin/reader/odpsreader/util/SqliteUtil.java b/odpsreader/src/main/java/com/alibaba/datax/plugin/reader/odpsreader/util/SqliteUtil.java new file mode 100644 index 00000000..70c22267 --- /dev/null +++ b/odpsreader/src/main/java/com/alibaba/datax/plugin/reader/odpsreader/util/SqliteUtil.java @@ -0,0 +1,103 @@ +package com.alibaba.datax.plugin.reader.odpsreader.util; + +import java.sql.Connection; +import java.sql.DriverManager; +import java.sql.ResultSet; +import java.sql.ResultSetMetaData; +import java.sql.SQLException; +import java.sql.Statement; +import java.util.ArrayList; +import java.util.List; + +import com.alibaba.datax.plugin.reader.odpsreader.Constant; +import com.aliyun.odps.Partition; +import com.aliyun.odps.Table; +import org.apache.commons.lang3.StringUtils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class SqliteUtil { + + private static final Logger LOGGER = LoggerFactory.getLogger(SqliteUtil.class); + + private Connection connection = null; + private Statement stmt = null; + + private String partitionName = "partitionName"; + + private String createSQLTemplate = "Create Table DataXODPSReaderPPR (" + partitionName +" String, %s)"; + private String insertSQLTemplate = "Insert Into DataXODPSReaderPPR Values (%s)"; + private String selectSQLTemplate = "Select * From DataXODPSReaderPPR Where %s"; + + public SqliteUtil() throws ClassNotFoundException, SQLException { + + Class.forName("org.sqlite.JDBC"); + this.connection = DriverManager.getConnection("jdbc:sqlite::memory:"); + this.stmt = this.connection.createStatement(); + } + + public void loadAllPartitionsIntoSqlite(Table table, List allOriginPartitions) throws SQLException { + List partitionColumnList = new ArrayList(); + String partition = allOriginPartitions.get(0); + String[] partitionSpecs = partition.split(","); + List partitionKeyList = new ArrayList(); + for (String partitionKeyValue : partitionSpecs) { + String partitionKey = partitionKeyValue.split("=")[0]; + partitionColumnList.add(String.format("%s String", partitionKey)); + partitionKeyList.add(partitionKey); + } + String createSQL = String.format(createSQLTemplate, StringUtils.join(partitionColumnList.toArray(), ",")); + LOGGER.info(createSQL); + this.stmt.execute(createSQL); + + insertAllOriginPartitionIntoSqlite(table, partitionKeyList); + } + + /** + * 根据用户配置的过滤条件, 从sqlite中select出符合的partition列表 + * @param userHintConfiguredPartitions + * @return + */ + public List selectUserConfiguredPartition(List userHintConfiguredPartitions) throws SQLException { + List selectedPartitionsFromSqlite = new ArrayList(); + for (String partitionWhereConditions : userHintConfiguredPartitions) { + String selectUserConfiguredPartitionsSql = String.format(selectSQLTemplate, + StringUtils.remove(partitionWhereConditions, Constant.PARTITION_FILTER_HINT)); + LOGGER.info(selectUserConfiguredPartitionsSql); + ResultSet rs = stmt.executeQuery(selectUserConfiguredPartitionsSql); + while (rs.next()) { + selectedPartitionsFromSqlite.add(getPartitionsValue(rs)); + } + } + return selectedPartitionsFromSqlite; + } + + private String getPartitionsValue (ResultSet rs) throws SQLException { + List partitions = new ArrayList(); + ResultSetMetaData rsMetaData = rs.getMetaData(); + Integer columnCounter = rs.getMetaData().getColumnCount(); + for (int columnIndex = 2; columnIndex <= columnCounter; columnIndex++) { + partitions.add(String.format("%s=%s", rsMetaData.getColumnName(columnIndex), rs.getString(columnIndex))); + } + return StringUtils.join(partitions, ","); + } + + /** + * 将odps table里所有partition值载入sqlite中 + * @param table + * @param partitionKeyList + * @throws SQLException + */ + private void insertAllOriginPartitionIntoSqlite(Table table, List partitionKeyList) throws SQLException { + List partitions = table.getPartitions(); + for (Partition partition : partitions){ + List partitionColumnValue = new ArrayList(); + partitionColumnValue.add("\""+partition.getPartitionSpec().toString()+"\""); + for (String partitionKey : partitionKeyList) { + partitionColumnValue.add("\""+partition.getPartitionSpec().get(partitionKey)+"\""); + } + String insertPartitionValueSql = String.format(insertSQLTemplate, StringUtils.join(partitionColumnValue, ",")); + this.stmt.execute(insertPartitionValueSql); + } + } +} diff --git a/odpsreader/src/main/java/com/alibaba/datax/plugin/reader/odpsreader/util/UserConfiguredPartitionClassification.java b/odpsreader/src/main/java/com/alibaba/datax/plugin/reader/odpsreader/util/UserConfiguredPartitionClassification.java new file mode 100644 index 00000000..1a979969 --- /dev/null +++ b/odpsreader/src/main/java/com/alibaba/datax/plugin/reader/odpsreader/util/UserConfiguredPartitionClassification.java @@ -0,0 +1,39 @@ +package com.alibaba.datax.plugin.reader.odpsreader.util; + +import java.util.List; + +public class UserConfiguredPartitionClassification { + + //包含/*query*/的partition, 例如: /*query*/ dt>=20170101 and dt<= 20170109 + private List userConfiguredHintPartition; + + //不包含/*query*/的partition, 例如: dt=20170101 或者 dt=201701* + private List userConfiguredNormalPartition; + + //是否包含hint的partition + private boolean isIncludeHintPartition; + + public List getUserConfiguredHintPartition() { + return userConfiguredHintPartition; + } + + public void setUserConfiguredHintPartition(List userConfiguredHintPartition) { + this.userConfiguredHintPartition = userConfiguredHintPartition; + } + + public List getUserConfiguredNormalPartition() { + return userConfiguredNormalPartition; + } + + public void setUserConfiguredNormalPartition(List userConfiguredNormalPartition) { + this.userConfiguredNormalPartition = userConfiguredNormalPartition; + } + + public boolean isIncludeHintPartition() { + return isIncludeHintPartition; + } + + public void setIncludeHintPartition(boolean includeHintPartition) { + isIncludeHintPartition = includeHintPartition; + } +} diff --git a/odpsreader/src/main/libs/bcprov-jdk15on-1.52.jar b/odpsreader/src/main/libs/bcprov-jdk15on-1.52.jar deleted file mode 100644 index 6c54dd90..00000000 Binary files a/odpsreader/src/main/libs/bcprov-jdk15on-1.52.jar and /dev/null differ diff --git a/odpswriter/doc/odpswriter.md b/odpswriter/doc/odpswriter.md index d81672b0..845dd1d3 100644 --- a/odpswriter/doc/odpswriter.md +++ b/odpswriter/doc/odpswriter.md @@ -71,8 +71,7 @@ ODPSWriter插件用于实现往ODPS插入或者更新数据,主要提供给etl "accessKey": "xxxx", "truncate": true, "odpsServer": "http://sxxx/api", - "tunnelServer": "http://xxx", - "accountType": "aliyun" + "tunnelServer": "http://xxx" } } } diff --git a/odpswriter/pom.xml b/odpswriter/pom.xml index 8073ec43..c253e3fc 100755 --- a/odpswriter/pom.xml +++ b/odpswriter/pom.xml @@ -31,17 +31,10 @@ logback-classic - org.bouncycastle - bcprov-jdk15on - 1.52 - system - ${basedir}/src/main/libs/bcprov-jdk15on-1.52.jar + com.aliyun.odps + odps-sdk-core + 0.38.4-public - - com.aliyun.odps - odps-sdk-core - 0.20.7-public - @@ -51,6 +44,14 @@ + + + + org.mockito mockito-core @@ -70,9 +71,30 @@ test + + + org.aspectj + aspectjweaver + 1.8.10 + + + + commons-codec + commons-codec + 1.8 + + + + + src/main/java + + **/*.properties + + + diff --git a/odpswriter/src/main/assembly/package.xml b/odpswriter/src/main/assembly/package.xml index 7d3c91b5..0ef0b43b 100755 --- a/odpswriter/src/main/assembly/package.xml +++ b/odpswriter/src/main/assembly/package.xml @@ -23,13 +23,6 @@ plugin/writer/odpswriter - - src/main/libs - - *.* - - plugin/writer/odpswriter/libs - diff --git a/odpswriter/src/main/java/com/alibaba/datax/plugin/writer/odpswriter/Constant.java b/odpswriter/src/main/java/com/alibaba/datax/plugin/writer/odpswriter/Constant.java index 22bcc16c..efedfea9 100755 --- a/odpswriter/src/main/java/com/alibaba/datax/plugin/writer/odpswriter/Constant.java +++ b/odpswriter/src/main/java/com/alibaba/datax/plugin/writer/odpswriter/Constant.java @@ -2,14 +2,37 @@ package com.alibaba.datax.plugin.writer.odpswriter; public class Constant { - public static final String SKYNET_ACCESSID = "SKYNET_ACCESSID"; - - public static final String SKYNET_ACCESSKEY = "SKYNET_ACCESSKEY"; - - public static final String DEFAULT_ACCOUNT_TYPE = "aliyun"; - - public static final String TAOBAO_ACCOUNT_TYPE = "taobao"; public static final String COLUMN_POSITION = "columnPosition"; + /* + * 每个task独立维护一个proxy列表,一共会生成 task并发量 * 分区数量 的proxy,每个proxy会创建 blocksizeInMB(一般是64M) 大小的数组 + * 因此极易OOM, + * 假设默认情况下768M的内存,实际最多只能创建 12 个proxy,8G内存最多只能创建126个proxy,所以最多只允许创建一定数量的proxy,对应到分区数量 1:1 + * + * blockSizeInMB 减小可以减少内存消耗,但是意味着更高频率的网络请求,会对odps服务器造成较大压力 + * + * 另外,可以考虑proxy不用常驻内存,但是需要增加复杂的控制逻辑 + * 但是一般情况下用户作为分区值得数据是有规律的,比如按照时间,2020-08的数据已经同步完成了,并且后面没有这个分区的数据了,对应的proxy还放在内存中, + * 会造成很大的内存浪费。所以有必要对某些proxy进行回收。 + * + * 这里采用是否回收某个proxy的标准是:在最近时间内是否有过数据传输。 + * + * + * 需要注意的问题! + * 多个任务公用一个proxy,写入时需要抢锁,多并发的性能会受到很大影响,相当于单个分区时串行写入 + * 这个对性能影响很大,需要避免这种方式,还是尽量各个task有独立的proxy,只是需要去控制内存的使用,只能是控制每个task保有的proxy数量了 + * + * 还可以考虑修改proxy的数组大小,但是设置太小不确定会不会影响性能。可以测试一下 + */ + + public static final Long PROXY_MAX_IDLE_TIME_MS =60 * 1000L; // 60s没有动作就回收 + + public static final Long MAX_PARTITION_CNT = 200L; + + public static final int UTF8_ENCODED_CHAR_MAX_SIZE = 6; + + public static final int DEFAULT_FIELD_MAX_SIZE = 8 * 1024 * 1024; + + } diff --git a/odpswriter/src/main/java/com/alibaba/datax/plugin/writer/odpswriter/DateTransForm.java b/odpswriter/src/main/java/com/alibaba/datax/plugin/writer/odpswriter/DateTransForm.java new file mode 100644 index 00000000..dedc9ecc --- /dev/null +++ b/odpswriter/src/main/java/com/alibaba/datax/plugin/writer/odpswriter/DateTransForm.java @@ -0,0 +1,57 @@ +package com.alibaba.datax.plugin.writer.odpswriter; + +public class DateTransForm { + /** + * 列名称 + */ + private String colName; + + /** + * 之前是什么格式 + */ + private String fromFormat; + + /** + * 要转换成什么格式 + */ + private String toFormat; + + public DateTransForm(String colName, String fromFormat, String toFormat) { + this.colName = colName; + this.fromFormat = fromFormat; + this.toFormat = toFormat; + } + + public String getColName() { + return colName; + } + + public void setColName(String colName) { + this.colName = colName; + } + + public String getFromFormat() { + return fromFormat; + } + + public void setFromFormat(String fromFormat) { + this.fromFormat = fromFormat; + } + + public String getToFormat() { + return toFormat; + } + + public void setToFormat(String toFormat) { + this.toFormat = toFormat; + } + + @Override + public String toString() { + return "DateTransForm{" + + "colName='" + colName + '\'' + + ", fromFormat='" + fromFormat + '\'' + + ", toFormat='" + toFormat + '\'' + + '}'; + } +} diff --git a/odpswriter/src/main/java/com/alibaba/datax/plugin/writer/odpswriter/Key.java b/odpswriter/src/main/java/com/alibaba/datax/plugin/writer/odpswriter/Key.java index f578d72d..8dff8a4c 100755 --- a/odpswriter/src/main/java/com/alibaba/datax/plugin/writer/odpswriter/Key.java +++ b/odpswriter/src/main/java/com/alibaba/datax/plugin/writer/odpswriter/Key.java @@ -11,6 +11,8 @@ public final class Key { public final static String ACCESS_KEY = "accessKey"; + public final static String SECURITY_TOKEN = "securityToken"; + public final static String PROJECT = "project"; public final static String TABLE = "table"; @@ -28,7 +30,59 @@ public final class Key { //boolean 类型,default:false public final static String EMPTY_AS_NULL = "emptyAsNull"; - public final static String ACCOUNT_TYPE = "accountType"; - public final static String IS_COMPRESS = "isCompress"; + + // preSql + public final static String PRE_SQL="preSql"; + + // postSql + public final static String POST_SQL="postSql"; + + public final static String CONSISTENCY_COMMIT = "consistencyCommit"; + + public final static String UPLOAD_ID = "uploadId"; + + public final static String TASK_COUNT = "taskCount"; + + /** + * support dynamic partition,支持动态分区,即根据读取到的record的某一列或几列来确定该record应该存入哪个分区 + * 1. 如何确定根据哪些列:根据目的表哪几列是分区列,再根据对应的column来路由 + * 2. 何时创建upload session:由于是动态分区,因此无法在初始化时确定分区,也就无法在初始化时创建 upload session,只有再读取到具体record之后才能创建 + * 3. 缓存 upload sesseion:每当出现新的分区,则创建新的session,同时将该分区对应的session缓存下来,以备下次又有需要存入该分区的记录 + * 4. 参数检查:不必要检查分区是否配置 + */ + public final static String SUPPORT_DYNAMIC_PARTITION = "supportDynamicPartition"; + + /** + * 动态分区下,用户如果将源表的某一个时间列映射到分区列,存在如下需求场景:源表的该时间列精确到秒,当时同步到odps表时,只想保留到天,并存入对应的天分区 + * 格式: + * "partitionColumnMapping":[ + * { + * "name":"pt", // 必填 + * "srcDateFormat":"YYYY-MM-dd hh:mm:ss", // 可选,可能源表中的时间列是 String 类型,此时必须通过 fromDateFormat 来指定源表中该列的日期格式 + * "dateFormat":"YYYY-MM-dd" // 必填 + * }, + * { + * ... + * }, + * + * ... + * ] + */ + public final static String PARTITION_COL_MAPPING = "partitionColumnMapping"; + public final static String PARTITION_COL_MAPPING_NAME = "name"; + public final static String PARTITION_COL_MAPPING_SRC_COL_DATEFORMAT = "srcDateFormat"; + public final static String PARTITION_COL_MAPPING_DATEFORMAT = "dateFormat"; + public final static String WRITE_TIMEOUT_IN_MS = "writeTimeoutInMs"; + + public static final String OVER_LENGTH_RULE = "overLengthRule"; + //截断后保留的最大长度 + public static final String MAX_FIELD_LENGTH = "maxFieldLength"; + //odps本身支持的最大长度 + public static final String MAX_ODPS_FIELD_LENGTH = "maxOdpsFieldLength"; + public static final String ENABLE_OVER_LENGTH_OUTPUT = "enableOverLengthOutput"; + public static final String MAX_OVER_LENGTH_OUTPUT_COUNT = "maxOverLengthOutputCount"; + + //动态分区写入模式下,内存使用率达到80%则flush时间间隔,单位分钟 + public static final String DYNAMIC_PARTITION_MEM_USAGE_FLUSH_INTERVAL_IN_MINUTE = "dynamicPartitionMemUsageFlushIntervalInMinute"; } diff --git a/odpswriter/src/main/java/com/alibaba/datax/plugin/writer/odpswriter/LocalStrings.properties b/odpswriter/src/main/java/com/alibaba/datax/plugin/writer/odpswriter/LocalStrings.properties new file mode 100644 index 00000000..be7862af --- /dev/null +++ b/odpswriter/src/main/java/com/alibaba/datax/plugin/writer/odpswriter/LocalStrings.properties @@ -0,0 +1,34 @@ +errorcode.required_value=\u60a8\u7f3a\u5931\u4e86\u5fc5\u987b\u586b\u5199\u7684\u53c2\u6570\u503c. +errorcode.illegal_value=\u60a8\u914d\u7f6e\u7684\u503c\u4e0d\u5408\u6cd5. +errorcode.unsupported_column_type=DataX \u4e0d\u652f\u6301\u5199\u5165 ODPS \u7684\u76ee\u7684\u8868\u7684\u6b64\u79cd\u6570\u636e\u7c7b\u578b. +errorcode.table_truncate_error=\u6e05\u7a7a ODPS \u76ee\u7684\u8868\u65f6\u51fa\u9519. +errorcode.create_master_upload_fail=\u521b\u5efa ODPS \u7684 uploadSession \u5931\u8d25. +errorcode.get_slave_upload_fail=\u83b7\u53d6 ODPS \u7684 uploadSession \u5931\u8d25. +errorcode.get_id_key_fail=\u83b7\u53d6 accessId/accessKey \u5931\u8d25. +errorcode.get_partition_fail=\u83b7\u53d6 ODPS \u76ee\u7684\u8868\u7684\u6240\u6709\u5206\u533a\u5931\u8d25. +errorcode.add_partition_failed=\u6dfb\u52a0\u5206\u533a\u5230 ODPS \u76ee\u7684\u8868\u5931\u8d25. +errorcode.writer_record_fail=\u5199\u5165\u6570\u636e\u5230 ODPS \u76ee\u7684\u8868\u5931\u8d25. +errorcode.commit_block_fail=\u63d0\u4ea4 block \u5230 ODPS \u76ee\u7684\u8868\u5931\u8d25. +errorcode.run_sql_failed=\u6267\u884c ODPS Sql \u5931\u8d25. +errorcode.check_if_partitioned_table_failed=\u68c0\u67e5 ODPS \u76ee\u7684\u8868:%s \u662f\u5426\u4e3a\u5206\u533a\u8868\u5931\u8d25. +errorcode.run_sql_odps_exception=\u6267\u884c ODPS Sql \u65f6\u629b\u51fa\u5f02\u5e38, \u53ef\u91cd\u8bd5 +errorcode.account_type_error=\u8d26\u53f7\u7c7b\u578b\u9519\u8bef. +errorcode.partition_error=\u5206\u533a\u914d\u7f6e\u9519\u8bef. +errorcode.column_not_exist=\u7528\u6237\u914d\u7f6e\u7684\u5217\u4e0d\u5b58\u5728. +errorcode.odps_project_not_fount=\u60a8\u914d\u7f6e\u7684\u503c\u4e0d\u5408\u6cd5, odps project \u4e0d\u5b58\u5728. +errorcode.odps_table_not_fount=\u60a8\u914d\u7f6e\u7684\u503c\u4e0d\u5408\u6cd5, odps table \u4e0d\u5b58\u5728 +errorcode.odps_access_key_id_not_found=\u60a8\u914d\u7f6e\u7684\u503c\u4e0d\u5408\u6cd5, odps accessId,accessKey \u4e0d\u5b58\u5728 +errorcode.odps_access_key_invalid=\u60a8\u914d\u7f6e\u7684\u503c\u4e0d\u5408\u6cd5, odps accessKey \u9519\u8bef +errorcode.odps_access_deny=\u62d2\u7edd\u8bbf\u95ee, \u60a8\u4e0d\u5728 \u60a8\u914d\u7f6e\u7684 project \u4e2d + + +odpswriter.1=\u8d26\u53f7\u7c7b\u578b\u9519\u8bef\uff0c\u56e0\u4e3a\u4f60\u7684\u8d26\u53f7 [{0}] \u4e0d\u662fdatax\u76ee\u524d\u652f\u6301\u7684\u8d26\u53f7\u7c7b\u578b\uff0c\u76ee\u524d\u4ec5\u652f\u6301aliyun, taobao\u8d26\u53f7\uff0c\u8bf7\u4fee\u6539\u60a8\u7684\u8d26\u53f7\u4fe1\u606f. +odpswriter.2=\u8fd9\u662f\u4e00\u6761\u9700\u8981\u6ce8\u610f\u7684\u4fe1\u606f \u7531\u4e8e\u60a8\u7684\u4f5c\u4e1a\u914d\u7f6e\u4e86\u5199\u5165 ODPS \u7684\u76ee\u7684\u8868\u65f6emptyAsNull=true, \u6240\u4ee5 DataX\u5c06\u4f1a\u628a\u957f\u5ea6\u4e3a0\u7684\u7a7a\u5b57\u7b26\u4e32\u4f5c\u4e3a java \u7684 null \u5199\u5165 ODPS. +odpswriter.3=\u60a8\u914d\u7f6e\u7684blockSizeInMB:{0} \u53c2\u6570\u9519\u8bef. \u6b63\u786e\u7684\u914d\u7f6e\u662f[1-512]\u4e4b\u95f4\u7684\u6574\u6570. \u8bf7\u4fee\u6539\u6b64\u53c2\u6570\u7684\u503c\u4e3a\u8be5\u533a\u95f4\u5185\u7684\u6570\u503c +odpswriter.4=\u5199\u5165 ODPS \u76ee\u7684\u8868\u5931\u8d25. \u8bf7\u8054\u7cfb ODPS \u7ba1\u7406\u5458\u5904\u7406. + + +odpswriterproxy.1=\u4eb2\uff0c\u914d\u7f6e\u4e2d\u7684\u6e90\u8868\u7684\u5217\u4e2a\u6570\u548c\u76ee\u7684\u7aef\u8868\u4e0d\u4e00\u81f4\uff0c\u6e90\u8868\u4e2d\u60a8\u914d\u7f6e\u7684\u5217\u6570\u662f:{0} \u5927\u4e8e\u76ee\u7684\u7aef\u7684\u5217\u6570\u662f:{1} , \u8fd9\u6837\u4f1a\u5bfc\u81f4\u6e90\u5934\u6570\u636e\u65e0\u6cd5\u6b63\u786e\u5bfc\u5165\u76ee\u7684\u7aef, \u8bf7\u68c0\u67e5\u60a8\u7684\u914d\u7f6e\u5e76\u4fee\u6539. +odpswriterproxy.2=\u6e90\u8868\u7684\u5217\u4e2a\u6570\u5c0f\u4e8e\u76ee\u7684\u8868\u7684\u5217\u4e2a\u6570\uff0c\u6e90\u8868\u5217\u6570\u662f:{0} \u76ee\u7684\u8868\u5217\u6570\u662f:{1} , \u6570\u76ee\u4e0d\u5339\u914d. DataX \u4f1a\u628a\u76ee\u7684\u7aef\u591a\u51fa\u7684\u5217\u7684\u503c\u8bbe\u7f6e\u4e3a\u7a7a\u503c. \u5982\u679c\u8fd9\u4e2a\u9ed8\u8ba4\u914d\u7f6e\u4e0d\u7b26\u5408\u60a8\u7684\u671f\u671b\uff0c\u8bf7\u4fdd\u6301\u6e90\u8868\u548c\u76ee\u7684\u8868\u914d\u7f6e\u7684\u5217\u6570\u76ee\u4fdd\u6301\u4e00\u81f4. +odpswriterproxy.3=Odps decimal \u7c7b\u578b\u7684\u6574\u6570\u4f4d\u4e2a\u6570\u4e0d\u80fd\u8d85\u8fc735 +odpswriterproxy.4=\u5199\u5165 ODPS \u76ee\u7684\u8868\u65f6\u9047\u5230\u4e86\u810f\u6570\u636e: \u7b2c[{0}]\u4e2a\u5b57\u6bb5 {1} \u7684\u6570\u636e\u51fa\u73b0\u9519\u8bef\uff0c\u8bf7\u68c0\u67e5\u8be5\u6570\u636e\u5e76\u4f5c\u51fa\u4fee\u6539 \u6216\u8005\u60a8\u53ef\u4ee5\u589e\u5927\u9600\u503c\uff0c\u5ffd\u7565\u8fd9\u6761\u8bb0\u5f55. \ No newline at end of file diff --git a/odpswriter/src/main/java/com/alibaba/datax/plugin/writer/odpswriter/OdpsWriter.java b/odpswriter/src/main/java/com/alibaba/datax/plugin/writer/odpswriter/OdpsWriter.java index 60deb5dd..9b7276fa 100755 --- a/odpswriter/src/main/java/com/alibaba/datax/plugin/writer/odpswriter/OdpsWriter.java +++ b/odpswriter/src/main/java/com/alibaba/datax/plugin/writer/odpswriter/OdpsWriter.java @@ -8,29 +8,49 @@ import com.alibaba.datax.common.spi.Writer; import com.alibaba.datax.common.statistics.PerfRecord; import com.alibaba.datax.common.util.Configuration; import com.alibaba.datax.common.util.ListUtil; -import com.alibaba.datax.plugin.writer.odpswriter.util.IdAndKeyUtil; -import com.alibaba.datax.plugin.writer.odpswriter.util.OdpsUtil; - +import com.alibaba.datax.common.util.MessageSource; +import com.alibaba.datax.plugin.writer.odpswriter.model.PartitionInfo; +import com.alibaba.datax.plugin.writer.odpswriter.model.UserDefinedFunction; +import com.alibaba.datax.plugin.writer.odpswriter.util.*; +import com.alibaba.fastjson2.JSON; +import com.alibaba.fastjson2.JSONArray; +import com.alibaba.fastjson2.JSONObject; import com.aliyun.odps.Odps; import com.aliyun.odps.Table; import com.aliyun.odps.TableSchema; import com.aliyun.odps.tunnel.TableTunnel; import org.apache.commons.lang3.StringUtils; +import org.apache.commons.lang3.tuple.MutablePair; +import org.apache.commons.lang3.tuple.Pair; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import java.util.ArrayList; -import java.util.List; +import java.lang.management.ManagementFactory; +import java.lang.management.MemoryUsage; +import java.util.*; +import java.util.concurrent.atomic.AtomicInteger; import java.util.concurrent.atomic.AtomicLong; +import java.util.stream.Collectors; + +import static com.alibaba.datax.plugin.writer.odpswriter.util.CustomPartitionUtils.getListWithJson; /** * 已修改为:每个 task 各自创建自己的 upload,拥有自己的 uploadId,并在 task 中完成对对应 block 的提交。 */ public class OdpsWriter extends Writer { + public static HashSet partitionsDealedTruncate = new HashSet<>(); + static final Object lockForPartitionDealedTruncate = new Object(); + public static AtomicInteger partitionCnt = new AtomicInteger(0); + public static Long maxPartitionCnt; + public static AtomicLong globalTotalTruncatedRecordNumber = new AtomicLong(0); + public static Long maxOutputOverLengthRecord; + public static int maxOdpsFieldLength = Constant.DEFAULT_FIELD_MAX_SIZE; + public static class Job extends Writer.Job { private static final Logger LOG = LoggerFactory .getLogger(Job.class); + private static final MessageSource MESSAGE_SOURCE = MessageSource.loadResourceBundle(OdpsWriter.class); private static final boolean IS_DEBUG = LOG.isDebugEnabled(); @@ -42,11 +62,12 @@ public class OdpsWriter extends Writer { private String tableName; private String tunnelServer; private String partition; - private String accountType; private boolean truncate; private String uploadId; private TableTunnel.UploadSession masterUpload; private int blockSizeInMB; + private boolean consistencyCommit; + private boolean supportDynamicPartition; public void preCheck() { this.init(); @@ -54,74 +75,74 @@ public class OdpsWriter extends Writer { } public void doPreCheck() { - //检查accessId,accessKey配置 - if (Constant.DEFAULT_ACCOUNT_TYPE - .equalsIgnoreCase(this.accountType)) { - this.originalConfig = IdAndKeyUtil.parseAccessIdAndKey(this.originalConfig); - String accessId = this.originalConfig.getString(Key.ACCESS_ID); - String accessKey = this.originalConfig.getString(Key.ACCESS_KEY); - if (IS_DEBUG) { - LOG.debug("accessId:[{}], accessKey:[{}] .", accessId, - accessKey); - } - LOG.info("accessId:[{}] .", accessId); - } - // init odps config - this.odps = OdpsUtil.initOdpsProject(this.originalConfig); - - //检查表等配置是否正确 - this.table = OdpsUtil.getTable(odps,this.projectName,this.tableName); //检查列信息是否正确 List allColumns = OdpsUtil.getAllColumns(this.table.getSchema()); LOG.info("allColumnList: {} .", StringUtils.join(allColumns, ',')); - dealColumn(this.originalConfig, allColumns); + List allPartColumns = OdpsUtil.getAllPartColumns(this.table.getSchema()); + LOG.info("allPartColumnsList: {} .", StringUtils.join(allPartColumns, ',')); + dealColumn(this.originalConfig, allColumns, allPartColumns); //检查分区信息是否正确 - OdpsUtil.preCheckPartition(this.odps, this.table, this.partition, this.truncate); + if (!supportDynamicPartition) { + OdpsUtil.preCheckPartition(this.odps, this.table, this.partition, this.truncate); + } } @Override public void init() { this.originalConfig = super.getPluginJobConf(); + OdpsUtil.checkNecessaryConfig(this.originalConfig); OdpsUtil.dealMaxRetryTime(this.originalConfig); + + this.projectName = this.originalConfig.getString(Key.PROJECT); this.tableName = this.originalConfig.getString(Key.TABLE); this.tunnelServer = this.originalConfig.getString(Key.TUNNEL_SERVER, null); + // init odps config + this.odps = OdpsUtil.initOdpsProject(this.originalConfig); + + //检查表等配置是否正确 + this.table = OdpsUtil.getTable(odps, this.projectName, this.tableName); + + // 处理动态分区参数,以及动态分区相关配置是否合法,如果没有配置动态分区,则根据列映射配置决定是否启用 + this.dealDynamicPartition(); + //check isCompress this.originalConfig.getBool(Key.IS_COMPRESS, false); - this.partition = OdpsUtil.formatPartition(this.originalConfig - .getString(Key.PARTITION, "")); - this.originalConfig.set(Key.PARTITION, this.partition); - - this.accountType = this.originalConfig.getString(Key.ACCOUNT_TYPE, - Constant.DEFAULT_ACCOUNT_TYPE); - if (!Constant.DEFAULT_ACCOUNT_TYPE.equalsIgnoreCase(this.accountType) && - !Constant.TAOBAO_ACCOUNT_TYPE.equalsIgnoreCase(this.accountType)) { - throw DataXException.asDataXException(OdpsWriterErrorCode.ACCOUNT_TYPE_ERROR, - String.format("账号类型错误,因为你的账号 [%s] 不是datax目前支持的账号类型,目前仅支持aliyun, taobao账号,请修改您的账号信息.", accountType)); + // 如果不是动态分区写入,则检查分区配置,动态分区写入不用检查 + if (!this.supportDynamicPartition) { + this.partition = OdpsUtil.formatPartition(this.originalConfig + .getString(Key.PARTITION, ""), true); + this.originalConfig.set(Key.PARTITION, this.partition); } - this.originalConfig.set(Key.ACCOUNT_TYPE, this.accountType); this.truncate = this.originalConfig.getBool(Key.TRUNCATE); + this.consistencyCommit = this.originalConfig.getBool(Key.CONSISTENCY_COMMIT, false); + boolean emptyAsNull = this.originalConfig.getBool(Key.EMPTY_AS_NULL, false); this.originalConfig.set(Key.EMPTY_AS_NULL, emptyAsNull); if (emptyAsNull) { - LOG.warn("这是一条需要注意的信息 由于您的作业配置了写入 ODPS 的目的表时emptyAsNull=true, 所以 DataX将会把长度为0的空字符串作为 java 的 null 写入 ODPS."); + LOG.warn(MESSAGE_SOURCE.message("odpswriter.2")); } this.blockSizeInMB = this.originalConfig.getInt(Key.BLOCK_SIZE_IN_MB, 64); - if(this.blockSizeInMB < 8) { + if (this.blockSizeInMB < 8) { this.blockSizeInMB = 8; } this.originalConfig.set(Key.BLOCK_SIZE_IN_MB, this.blockSizeInMB); LOG.info("blockSizeInMB={}.", this.blockSizeInMB); + maxPartitionCnt = ManagementFactory.getMemoryMXBean().getHeapMemoryUsage().getMax() / 1024 / 1024 / this.blockSizeInMB; + if (maxPartitionCnt < Constant.MAX_PARTITION_CNT) { + maxPartitionCnt = Constant.MAX_PARTITION_CNT; + } + LOG.info("maxPartitionCnt={}", maxPartitionCnt); if (IS_DEBUG) { LOG.debug("After master init(), job config now is: [\n{}\n] .", @@ -129,29 +150,95 @@ public class OdpsWriter extends Writer { } } - @Override - public void prepare() { - String accessId = null; - String accessKey = null; - if (Constant.DEFAULT_ACCOUNT_TYPE - .equalsIgnoreCase(this.accountType)) { - this.originalConfig = IdAndKeyUtil.parseAccessIdAndKey(this.originalConfig); - accessId = this.originalConfig.getString(Key.ACCESS_ID); - accessKey = this.originalConfig.getString(Key.ACCESS_KEY); - if (IS_DEBUG) { - LOG.debug("accessId:[{}], accessKey:[{}] .", accessId, - accessKey); + private void dealDynamicPartition() { + /* + * 如果显示配置了 supportDynamicPartition,则以配置为准 + * 如果没有配置,表为分区表且 列映射中包所有含分区列 + */ + List partitionCols = OdpsUtil.getAllPartColumns(this.table.getSchema()); + List configCols = this.originalConfig.getList(Key.COLUMN, String.class); + LOG.info("partition columns:{}", partitionCols); + LOG.info("config columns:{}", configCols); + LOG.info("support dynamic partition:{}",this.originalConfig.getBool(Key.SUPPORT_DYNAMIC_PARTITION)); + LOG.info("partition format type:{}",this.originalConfig.getString("partitionFormatType")); + if (this.originalConfig.getKeys().contains(Key.SUPPORT_DYNAMIC_PARTITION)) { + this.supportDynamicPartition = this.originalConfig.getBool(Key.SUPPORT_DYNAMIC_PARTITION); + if (supportDynamicPartition) { + // 自定义分区 + if("custom".equalsIgnoreCase(originalConfig.getString("partitionFormatType"))){ + List partitions = getListWithJson(originalConfig,"customPartitionColumns",PartitionInfo.class); + // 自定义分区配置必须与实际分区列完全一致 + if (!ListUtil.checkIfAllSameValue(partitions.stream().map(item->item.getName()).collect(Collectors.toList()), partitionCols)) { + throw DataXException.asDataXException("custom partition config is not same as real partition info."); + } + } else { + // 设置动态分区写入为真--检查是否所有分区列都配置在了列映射中,不满足则抛出异常 + if (!ListUtil.checkIfBInA(configCols, partitionCols, false)) { + throw DataXException.asDataXException("You config supportDynamicPartition as true, but didn't config all partition columns"); + } + } + } else { + // 设置动态分区写入为假--确保列映射中没有配置分区列,配置则抛出异常 + if (ListUtil.checkIfHasSameValue(configCols, partitionCols)) { + throw DataXException.asDataXException("You should config all partition columns in column param, or you can specify a static partition param"); + } + } + } else { + if (OdpsUtil.isPartitionedTable(table)) { + // 分区表,列映射配置了分区,同时检查所有分区列要么都被配置,要么都没有配置 + if (ListUtil.checkIfBInA(configCols, partitionCols, false)) { + // 所有的partition 列都配置在了column中 + this.supportDynamicPartition = true; + } else { + // 并非所有partition列都配置在了column中,此时还需检查是否只配置了部分,如果只配置了部分,则报错 + if (ListUtil.checkIfHasSameValue(configCols, partitionCols)) { + throw DataXException.asDataXException("You should config all partition columns in column param, or you can specify a static partition param"); + } + // 分区列没有配置任何分区列,则设置为false + this.supportDynamicPartition = false; + } + } else { + LOG.info("{} is not a partition tale, set supportDynamicParition as false", this.tableName); + this.supportDynamicPartition = false; } - LOG.info("accessId:[{}] .", accessId); } + // 分布式下不支持动态分区写入,如果是分布式模式则报错 + LOG.info("current run mode: {}", System.getProperty("datax.executeMode")); + if (supportDynamicPartition && StringUtils.equalsIgnoreCase("distribute", System.getProperty("datax.executeMode"))) { + LOG.error("Distribute mode don't support dynamic partition writing"); + System.exit(1); + } + } + + @Override + public void prepare() { // init odps config this.odps = OdpsUtil.initOdpsProject(this.originalConfig); - //检查表等配置是否正确 - this.table = OdpsUtil.getTable(odps,this.projectName,this.tableName); + List preSqls = this.originalConfig.getList(Key.PRE_SQL, String.class); + if (preSqls != null && !preSqls.isEmpty()) { + LOG.info(String.format("Beigin to exectue preSql : %s. \n Attention: these preSqls must be idempotent!!!", + JSON.toJSONString(preSqls))); + long beginTime = System.currentTimeMillis(); + for (String preSql : preSqls) { + preSql = preSql.trim(); + if (!preSql.endsWith(";")) { + preSql = String.format("%s;", preSql); + } + OdpsUtil.runSqlTaskWithRetry(this.odps, preSql, "preSql"); + } + long endTime = System.currentTimeMillis(); + LOG.info(String.format("Exectue odpswriter preSql successfully! cost time: %s ms.", (endTime - beginTime))); + } - OdpsUtil.dealTruncate(this.odps, this.table, this.partition, this.truncate); + //检查表等配置是否正确 + this.table = OdpsUtil.getTable(odps, this.projectName, this.tableName); + + // 如果是动态分区写入,因为无需配置分区信息,因此也无法在任务初始化时进行 truncate + if (!supportDynamicPartition) { + OdpsUtil.dealTruncate(this.odps, this.table, this.partition, this.truncate); + } } /** @@ -169,20 +256,34 @@ public class OdpsWriter extends Writer { tableTunnel.setEndpoint(tunnelServer); } - this.masterUpload = OdpsUtil.createMasterTunnelUpload( - tableTunnel, this.projectName, this.tableName, this.partition); - this.uploadId = this.masterUpload.getId(); - LOG.info("Master uploadId:[{}].", this.uploadId); - - TableSchema schema = this.masterUpload.getSchema(); + TableSchema schema = this.table.getSchema(); List allColumns = OdpsUtil.getAllColumns(schema); LOG.info("allColumnList: {} .", StringUtils.join(allColumns, ',')); + List allPartColumns = OdpsUtil.getAllPartColumns(this.table.getSchema()); + LOG.info("allPartColumnsList: {} .", StringUtils.join(allPartColumns, ',')); + dealColumn(this.originalConfig, allColumns, allPartColumns); + this.originalConfig.set("allColumns", allColumns); - dealColumn(this.originalConfig, allColumns); + // 动态分区模式下,无法事先根据分区创建好 session, + if (!supportDynamicPartition) { + this.masterUpload = OdpsUtil.createMasterTunnelUpload( + tableTunnel, this.projectName, this.tableName, this.partition); + this.uploadId = this.masterUpload.getId(); + LOG.info("Master uploadId:[{}].", this.uploadId); + } for (int i = 0; i < mandatoryNumber; i++) { Configuration tempConfig = this.originalConfig.clone(); + // 非动态分区模式下,设置了统一提交,则需要克隆主 upload session,否则各个 task "各自为战" + if (!supportDynamicPartition && this.consistencyCommit) { + tempConfig.set(Key.UPLOAD_ID, uploadId); + tempConfig.set(Key.TASK_COUNT, mandatoryNumber); + } + + // 设置task的supportDynamicPartition属性 + tempConfig.set(Key.SUPPORT_DYNAMIC_PARTITION, this.supportDynamicPartition); + configurations.add(tempConfig); } @@ -190,14 +291,18 @@ public class OdpsWriter extends Writer { LOG.debug("After master split, the job config now is:[\n{}\n].", this.originalConfig); } - this.masterUpload = null; - return configurations; } - private void dealColumn(Configuration originalConfig, List allColumns) { + private void dealColumn(Configuration originalConfig, List allColumns, List allPartColumns) { //之前已经检查了userConfiguredColumns 一定不为空 List userConfiguredColumns = originalConfig.getList(Key.COLUMN, String.class); + + // 动态分区下column不支持配置* + if (supportDynamicPartition && userConfiguredColumns.contains("*")) { + throw DataXException.asDataXException(OdpsWriterErrorCode.ILLEGAL_VALUE, + "In dynamic partition write mode you can't specify column with *."); + } if (1 == userConfiguredColumns.size() && "*".equals(userConfiguredColumns.get(0))) { userConfiguredColumns = allColumns; originalConfig.set(Key.COLUMN, allColumns); @@ -206,15 +311,51 @@ public class OdpsWriter extends Writer { ListUtil.makeSureNoValueDuplicate(userConfiguredColumns, false); //检查列是否存在,大小写不敏感 - ListUtil.makeSureBInA(allColumns, userConfiguredColumns, false); + if (supportDynamicPartition) { + List allColumnList = new ArrayList(); + allColumnList.addAll(allColumns); + allColumnList.addAll(allPartColumns); + ListUtil.makeSureBInA(allColumnList, userConfiguredColumns, false); + } else { + ListUtil.makeSureBInA(allColumns, userConfiguredColumns, false); + } } - List columnPositions = OdpsUtil.parsePosition(allColumns, userConfiguredColumns); + // 获取配置的所有数据列在目标表中所有数据列中的真正位置, -1 代表该列为分区列 + List columnPositions = OdpsUtil.parsePosition(allColumns, allPartColumns, userConfiguredColumns); originalConfig.set(Constant.COLUMN_POSITION, columnPositions); } @Override public void post() { + + if (supportDynamicPartition) { + LOG.info("Total create partition cnt:{}", partitionCnt); + } + + if (!supportDynamicPartition && this.consistencyCommit) { + LOG.info("Master which uploadId=[{}] begin to commit blocks.", this.uploadId); + OdpsUtil.masterComplete(this.masterUpload); + LOG.info("Master which uploadId=[{}] commit blocks ok.", this.uploadId); + } + + List postSqls = this.originalConfig.getList(Key.POST_SQL, String.class); + if (postSqls != null && !postSqls.isEmpty()) { + LOG.info(String.format("Beigin to exectue postSql : %s. \n Attention: these postSqls must be idempotent!!!", + JSON.toJSONString(postSqls))); + long beginTime = System.currentTimeMillis(); + for (String postSql : postSqls) { + postSql = postSql.trim(); + if (!postSql.endsWith(";")) { + postSql = String.format("%s;", postSql); + } + OdpsUtil.runSqlTaskWithRetry(this.odps, postSql, "postSql"); + } + long endTime = System.currentTimeMillis(); + LOG.info(String.format("Exectue odpswriter postSql successfully! cost time: %s ms.", (endTime - beginTime))); + } + + LOG.info("truncated record count: {}", globalTotalTruncatedRecordNumber.intValue() ); } @Override @@ -226,6 +367,7 @@ public class OdpsWriter extends Writer { public static class Task extends Writer.Task { private static final Logger LOG = LoggerFactory .getLogger(Task.class); + private static final MessageSource MESSAGE_SOURCE = MessageSource.loadResourceBundle(OdpsWriter.class); private static final boolean IS_DEBUG = LOG.isDebugEnabled(); @@ -246,18 +388,54 @@ public class OdpsWriter extends Writer { private List blocks; private int blockSizeInMB; + private boolean consistencyCommit; + + private int taskId; + private int taskCount; + private Integer failoverState = 0; //0 未failover 1准备failover 2已提交,不能failover private byte[] lock = new byte[0]; + private List allColumns; + + /* + * Partition 和 session 的对应关系,处理 record 时,路由到哪个分区,则通过对应的 proxy 上传 + * Key 为 所有分区列的值按配置顺序拼接 + */ + private HashMap>> partitionUploadSessionHashMap; + private Boolean supportDynamicPartition; + private TableTunnel tableTunnel; + private Table table; + + /** + * 保存分区列格式转换规则,只支持源表是 Date 列,或者内容为日期的 String 列 + */ + private HashMap dateTransFormMap; + + private Long writeTimeOutInMs; + + private String overLengthRule; + private int maxFieldLength; + private Boolean enableOverLengthOutput; + + /** + * 动态分区写入模式下,内存使用率达到80%则flush时间间隔,单位分钟 + * 默认5分钟做flush, 避免出现频繁的flush导致小文件问题 + */ + private int dynamicPartitionMemUsageFlushIntervalInMinute = 1; + + private long latestFlushTime = 0; @Override public void init() { this.sliceConfig = super.getPluginJobConf(); + // 默认十分钟超时时间 + this.writeTimeOutInMs = this.sliceConfig.getLong(Key.WRITE_TIMEOUT_IN_MS, 10 * 60 * 1000); this.projectName = this.sliceConfig.getString(Key.PROJECT); this.tableName = this.sliceConfig.getString(Key.TABLE); this.tunnelServer = this.sliceConfig.getString(Key.TUNNEL_SERVER, null); this.partition = OdpsUtil.formatPartition(this.sliceConfig - .getString(Key.PARTITION, "")); + .getString(Key.PARTITION, ""), true); this.sliceConfig.set(Key.PARTITION, this.partition); this.emptyAsNull = this.sliceConfig.getBool(Key.EMPTY_AS_NULL); @@ -265,9 +443,49 @@ public class OdpsWriter extends Writer { this.isCompress = this.sliceConfig.getBool(Key.IS_COMPRESS, false); if (this.blockSizeInMB < 1 || this.blockSizeInMB > 512) { throw DataXException.asDataXException(OdpsWriterErrorCode.ILLEGAL_VALUE, - String.format("您配置的blockSizeInMB:%s 参数错误. 正确的配置是[1-512]之间的整数. 请修改此参数的值为该区间内的数值", this.blockSizeInMB)); + MESSAGE_SOURCE.message("odpswriter.3", this.blockSizeInMB)); } + this.taskId = this.getTaskId(); + this.taskCount = this.sliceConfig.getInt(Key.TASK_COUNT, 0); + + this.supportDynamicPartition = this.sliceConfig.getBool(Key.SUPPORT_DYNAMIC_PARTITION, false); + + if (!supportDynamicPartition) { + this.consistencyCommit = this.sliceConfig.getBool(Key.CONSISTENCY_COMMIT, false); + if (consistencyCommit) { + this.uploadId = this.sliceConfig.getString(Key.UPLOAD_ID); + if (this.uploadId == null || this.uploadId.isEmpty()) { + throw DataXException.asDataXException(OdpsWriterErrorCode.ILLEGAL_VALUE, + MESSAGE_SOURCE.message("odpswriter.3", this.uploadId)); + } + } + } else { + this.partitionUploadSessionHashMap = new HashMap<>(); + + // 根据 partColFormats 参数初始化 dateTransFormMap + String dateTransListStr = this.sliceConfig.getString(Key.PARTITION_COL_MAPPING); + if (StringUtils.isNotBlank(dateTransListStr)) { + this.dateTransFormMap = new HashMap<>(); + JSONArray dateTransFormJsonArray = JSONArray.parseArray(dateTransListStr); + for (Object dateTransFormJson : dateTransFormJsonArray) { + DateTransForm dateTransForm = new DateTransForm( + ((JSONObject)dateTransFormJson).getString(Key.PARTITION_COL_MAPPING_NAME), + ((JSONObject)dateTransFormJson).getString(Key.PARTITION_COL_MAPPING_SRC_COL_DATEFORMAT), + ((JSONObject)dateTransFormJson).getString(Key.PARTITION_COL_MAPPING_DATEFORMAT)); + this.dateTransFormMap.put(((JSONObject)dateTransFormJson).getString(Key.PARTITION_COL_MAPPING_NAME), dateTransForm); + } + } + } + this.allColumns = this.sliceConfig.getList("allColumns", String.class); + this.overLengthRule = this.sliceConfig.getString(Key.OVER_LENGTH_RULE, "keepOn").toUpperCase(); + this.maxFieldLength = this.sliceConfig.getInt(Key.MAX_FIELD_LENGTH, Constant.DEFAULT_FIELD_MAX_SIZE); + this.enableOverLengthOutput = this.sliceConfig.getBool(Key.ENABLE_OVER_LENGTH_OUTPUT, true); + maxOutputOverLengthRecord = this.sliceConfig.getLong(Key.MAX_OVER_LENGTH_OUTPUT_COUNT); + maxOdpsFieldLength = this.sliceConfig.getInt(Key.MAX_ODPS_FIELD_LENGTH, Constant.DEFAULT_FIELD_MAX_SIZE); + + this.dynamicPartitionMemUsageFlushIntervalInMinute = this.sliceConfig.getInt(Key.DYNAMIC_PARTITION_MEM_USAGE_FLUSH_INTERVAL_IN_MINUTE, + 1); if (IS_DEBUG) { LOG.debug("After init in task, sliceConfig now is:[\n{}\n].", this.sliceConfig); } @@ -277,24 +495,32 @@ public class OdpsWriter extends Writer { @Override public void prepare() { this.odps = OdpsUtil.initOdpsProject(this.sliceConfig); + this.tableTunnel = new TableTunnel(this.odps); - TableTunnel tableTunnel = new TableTunnel(this.odps); - if (StringUtils.isNoneBlank(tunnelServer)) { - tableTunnel.setEndpoint(tunnelServer); + if (! supportDynamicPartition ) { + if (StringUtils.isNoneBlank(tunnelServer)) { + tableTunnel.setEndpoint(tunnelServer); + } + if (this.consistencyCommit) { + this.managerUpload = OdpsUtil.getSlaveTunnelUpload(this.tableTunnel, this.projectName, this.tableName, + this.partition, this.uploadId); + } else { + this.managerUpload = OdpsUtil.createMasterTunnelUpload(this.tableTunnel, this.projectName, + this.tableName, this.partition); + this.uploadId = this.managerUpload.getId(); + } + LOG.info("task uploadId:[{}].", this.uploadId); + this.workerUpload = OdpsUtil.getSlaveTunnelUpload(this.tableTunnel, this.projectName, + this.tableName, this.partition, uploadId); + } else { + this.table = OdpsUtil.getTable(this.odps, this.projectName, this.tableName); } - - this.managerUpload = OdpsUtil.createMasterTunnelUpload(tableTunnel, this.projectName, - this.tableName, this.partition); - this.uploadId = this.managerUpload.getId(); - LOG.info("task uploadId:[{}].", this.uploadId); - - this.workerUpload = OdpsUtil.getSlaveTunnelUpload(tableTunnel, this.projectName, - this.tableName, this.partition, uploadId); } @Override public void startWrite(RecordReceiver recordReceiver) { blocks = new ArrayList(); + List currentWriteBlocks; AtomicLong blockId = new AtomicLong(0); @@ -304,35 +530,212 @@ public class OdpsWriter extends Writer { try { TaskPluginCollector taskPluginCollector = super.getTaskPluginCollector(); - OdpsWriterProxy proxy = new OdpsWriterProxy(this.workerUpload, this.blockSizeInMB, blockId, - columnPositions, taskPluginCollector, this.emptyAsNull, this.isCompress); + OdpsWriterProxy proxy; + // 可以配置化,保平安 + boolean checkWithGetSize = this.sliceConfig.getBool("checkWithGetSize", true); + if (!supportDynamicPartition) { + if (this.consistencyCommit) { + proxy = new OdpsWriterProxy(this.workerUpload, this.blockSizeInMB, blockId, taskId, taskCount, + columnPositions, taskPluginCollector, this.emptyAsNull, this.isCompress, checkWithGetSize, this.allColumns, this.writeTimeOutInMs, this.sliceConfig, this.overLengthRule, this.maxFieldLength, this.enableOverLengthOutput); + } else { + proxy = new OdpsWriterProxy(this.workerUpload, this.blockSizeInMB, blockId, + columnPositions, taskPluginCollector, this.emptyAsNull, this.isCompress, checkWithGetSize, this.allColumns, false, this.writeTimeOutInMs, this.sliceConfig, this.overLengthRule, this.maxFieldLength, this.enableOverLengthOutput); + } + currentWriteBlocks = blocks; + } else { + proxy = null; + currentWriteBlocks = null; + } com.alibaba.datax.common.element.Record dataXRecord = null; - PerfRecord blockClose = new PerfRecord(super.getTaskGroupId(),super.getTaskId(), PerfRecord.PHASE.ODPS_BLOCK_CLOSE); + PerfRecord blockClose = new PerfRecord(super.getTaskGroupId(), super.getTaskId(), PerfRecord.PHASE.ODPS_BLOCK_CLOSE); blockClose.start(); long blockCloseUsedTime = 0; + boolean columnCntChecked = false; while ((dataXRecord = recordReceiver.getFromReader()) != null) { - blockCloseUsedTime += proxy.writeOneRecord(dataXRecord, blocks); + if (supportDynamicPartition) { + if (!columnCntChecked) { + // 动态分区模式下,读写两端的column数量必须相同 + if (dataXRecord.getColumnNumber() != this.sliceConfig.getList(Key.COLUMN).size()) { + throw DataXException.asDataXException(OdpsWriterErrorCode.ILLEGAL_VALUE, + "In dynamic partition write mode you must make sure reader and writer has same column count."); + } + columnCntChecked = true; + } + + // 如果是动态分区模式,则需要根据record内容来选择proxy + + String partitionFormatType = sliceConfig.getString("partitionFormatType"); + String partition; + if("custom".equalsIgnoreCase(partitionFormatType)){ + List partitions = getListWithJson(sliceConfig,"customPartitionColumns",PartitionInfo.class); + List functions = getListWithJson(sliceConfig,"customPartitionFunctions",UserDefinedFunction.class); + + partition = CustomPartitionUtils.generate(dataXRecord,functions, + partitions,sliceConfig.getList(Key.COLUMN, String.class)); + }else{ + partition = OdpsUtil.getPartColValFromDataXRecord(dataXRecord, columnPositions, + this.sliceConfig.getList(Key.COLUMN, String.class), + this.dateTransFormMap); + partition = OdpsUtil.formatPartition(partition, false); + } + + Pair> proxyBlocksPair = this.partitionUploadSessionHashMap.get(partition); + if (null != proxyBlocksPair) { + proxy = proxyBlocksPair.getLeft(); + currentWriteBlocks = proxyBlocksPair.getRight(); + if (null == proxy || null == currentWriteBlocks) { + throw DataXException.asDataXException("Get OdpsWriterProxy failed."); + } + } else { + /* + * 第一次写入该目标分区:处理truncate + * truncate 为 true,且还没有被truncate过,则truncate,加互斥锁 + */ + Boolean truncate = this.sliceConfig.getBool(Key.TRUNCATE); + if (truncate && !partitionsDealedTruncate.contains(partition)) { + synchronized (lockForPartitionDealedTruncate) { + if (!partitionsDealedTruncate.contains(partition)) { + LOG.info("Start to truncate partition {}", partition); + OdpsUtil.dealTruncate(this.odps, this.table, partition, truncate); + partitionsDealedTruncate.add(partition); + } + /* + * 判断分区是否创建过多,如果创建过多,则报错 + */ + if (partitionCnt.addAndGet(1) > maxPartitionCnt) { + throw new DataXException("Create too many partitions. Please make sure you config the right partition column"); + } + } + } + TableTunnel.UploadSession uploadSession = OdpsUtil.createMasterTunnelUpload(tableTunnel, this.projectName, + this.tableName, partition); + proxy = new OdpsWriterProxy(uploadSession, this.blockSizeInMB, blockId, + columnPositions, taskPluginCollector, this.emptyAsNull, this.isCompress, checkWithGetSize, this.allColumns, true, this.writeTimeOutInMs, this.sliceConfig, this.overLengthRule, this.maxFieldLength, this.enableOverLengthOutput); + currentWriteBlocks = new ArrayList<>(); + partitionUploadSessionHashMap.put(partition, new MutablePair<>(proxy, currentWriteBlocks)); + } + } + blockCloseUsedTime += proxy.writeOneRecord(dataXRecord, currentWriteBlocks); + + // 动态分区写入模式下,如果内存使用达到一定程度 80%,清理较久没有活动且缓存较多数据的分区 + if (supportDynamicPartition) { + boolean isNeedFush = checkIfNeedFlush(); + if (isNeedFush) { + LOG.info("====The memory used exceed 80%, start to clear...==="); + int releaseCnt = 0; + int remainCnt = 0; + for (String onePartition : partitionUploadSessionHashMap.keySet()) { + OdpsWriterProxy oneIdleProxy = partitionUploadSessionHashMap.get(onePartition) == null ? null : partitionUploadSessionHashMap.get(onePartition).getLeft(); + if (oneIdleProxy == null) { + continue; + } + + Long idleTime = System.currentTimeMillis() - oneIdleProxy.getLastActiveTime(); + if (idleTime > Constant.PROXY_MAX_IDLE_TIME_MS || oneIdleProxy.getCurrentTotalBytes() > (this.blockSizeInMB*1014*1024 / 2)) { + // 如果空闲一定时间,先把数据写出 + LOG.info("{} partition has no data last {} seconds, so release its uploadSession", onePartition, Constant.PROXY_MAX_IDLE_TIME_MS / 1000); + currentWriteBlocks = partitionUploadSessionHashMap.get(onePartition).getRight(); + blockCloseUsedTime += oneIdleProxy.writeRemainingRecord(currentWriteBlocks); + // 再清除 + partitionUploadSessionHashMap.put(onePartition, null); + releaseCnt++; + } else { + remainCnt++; + } + } + + // 释放的不足够多,再释放一次,这次随机释放,直到释放数量达到一半 + for (String onePartition : partitionUploadSessionHashMap.keySet()) { + if (releaseCnt >= remainCnt) { + break; + } + + if (partitionUploadSessionHashMap.get(onePartition) != null) { + OdpsWriterProxy oneIdleProxy = partitionUploadSessionHashMap.get(onePartition).getLeft(); + currentWriteBlocks = partitionUploadSessionHashMap.get(onePartition).getRight(); + blockCloseUsedTime += oneIdleProxy.writeRemainingRecord(currentWriteBlocks); + partitionUploadSessionHashMap.put(onePartition, null); + + releaseCnt++; + remainCnt--; + } + + } + + this.latestFlushTime = System.currentTimeMillis(); + LOG.info("===complete==="); + } + + } } - blockCloseUsedTime += proxy.writeRemainingRecord(blocks); - blockClose.end(blockCloseUsedTime); + // 对所有分区进行剩余 records 写入 + if (supportDynamicPartition) { + for (String partition : partitionUploadSessionHashMap.keySet()) { + if (partitionUploadSessionHashMap.get(partition) == null) { + continue; + } + proxy = partitionUploadSessionHashMap.get(partition).getLeft(); + currentWriteBlocks = partitionUploadSessionHashMap.get(partition).getRight(); + blockCloseUsedTime += proxy.writeRemainingRecord(currentWriteBlocks); + blockClose.end(blockCloseUsedTime); + } + } + else { + blockCloseUsedTime += proxy.writeRemainingRecord(blocks); + blockClose.end(blockCloseUsedTime); + } } catch (Exception e) { - throw DataXException.asDataXException(OdpsWriterErrorCode.WRITER_RECORD_FAIL, "写入 ODPS 目的表失败. 请联系 ODPS 管理员处理.", e); + throw DataXException.asDataXException(OdpsWriterErrorCode.WRITER_RECORD_FAIL, MESSAGE_SOURCE.message("odpswriter.4"), e); } } + private boolean checkIfNeedFlush() { + + //检查是否到达flush时间,超过flush间隔时间 + boolean isArriveFlushTime = (System.currentTimeMillis() - this.latestFlushTime) > this.dynamicPartitionMemUsageFlushIntervalInMinute * 60 * 1000; + if (!isArriveFlushTime) { + //如果flush时间没有到,直接return掉 + return false; + } + + MemoryUsage memoryUsage = ManagementFactory.getMemoryMXBean().getHeapMemoryUsage(); + boolean isMemUsageExceed = (double)memoryUsage.getUsed() / memoryUsage.getMax() > 0.8f; + return isMemUsageExceed; + } + @Override public void post() { - synchronized (lock){ - if(failoverState==0){ + synchronized (lock) { + if (failoverState == 0) { failoverState = 2; - LOG.info("Slave which uploadId=[{}] begin to commit blocks:[\n{}\n].", this.uploadId, - StringUtils.join(blocks, ",")); - OdpsUtil.masterCompleteBlocks(this.managerUpload, blocks.toArray(new Long[0])); - LOG.info("Slave which uploadId=[{}] commit blocks ok.", this.uploadId); - }else{ + if (! supportDynamicPartition) { + if (! this.consistencyCommit) { + LOG.info("Slave which uploadId=[{}] begin to commit blocks:[\n{}\n].", this.uploadId, + StringUtils.join(blocks, ",")); + OdpsUtil.masterCompleteBlocks(this.managerUpload, blocks.toArray(new Long[0])); + LOG.info("Slave which uploadId=[{}] commit blocks ok.", this.uploadId); + } else { + LOG.info("Slave which uploadId=[{}] begin to check blocks:[\n{}\n].", this.uploadId, + StringUtils.join(blocks, ",")); + OdpsUtil.checkBlockComplete(this.managerUpload, blocks.toArray(new Long[0])); + LOG.info("Slave which uploadId=[{}] check blocks ok.", this.uploadId); + } + } else { + for (String partition : partitionUploadSessionHashMap.keySet()) { + OdpsWriterProxy proxy = partitionUploadSessionHashMap.get(partition).getLeft(); + List blocks = partitionUploadSessionHashMap.get(partition).getRight(); + TableTunnel.UploadSession uploadSession = proxy.getSlaveUpload(); + LOG.info("Slave which uploadId=[{}] begin to check blocks:[\n{}\n].", uploadSession.getId(), + StringUtils.join(blocks, ",")); + OdpsUtil.masterCompleteBlocks(uploadSession, blocks.toArray(new Long[0])); + LOG.info("Slave which uploadId=[{}] check blocks ok.", uploadSession.getId()); + } + } + + } else { throw DataXException.asDataXException(CommonErrorCode.SHUT_DOWN_TASK, ""); } } @@ -343,9 +746,9 @@ public class OdpsWriter extends Writer { } @Override - public boolean supportFailOver(){ - synchronized (lock){ - if(failoverState==0){ + public boolean supportFailOver() { + synchronized (lock) { + if (failoverState == 0) { failoverState = 1; return true; } diff --git a/odpswriter/src/main/java/com/alibaba/datax/plugin/writer/odpswriter/OdpsWriterErrorCode.java b/odpswriter/src/main/java/com/alibaba/datax/plugin/writer/odpswriter/OdpsWriterErrorCode.java index 02020c04..35f2ed15 100755 --- a/odpswriter/src/main/java/com/alibaba/datax/plugin/writer/odpswriter/OdpsWriterErrorCode.java +++ b/odpswriter/src/main/java/com/alibaba/datax/plugin/writer/odpswriter/OdpsWriterErrorCode.java @@ -1,42 +1,43 @@ package com.alibaba.datax.plugin.writer.odpswriter; import com.alibaba.datax.common.spi.ErrorCode; +import com.alibaba.datax.common.util.MessageSource; public enum OdpsWriterErrorCode implements ErrorCode { - REQUIRED_VALUE("OdpsWriter-00", "您缺失了必须填写的参数值."), - ILLEGAL_VALUE("OdpsWriter-01", "您配置的值不合法."), - UNSUPPORTED_COLUMN_TYPE("OdpsWriter-02", "DataX 不支持写入 ODPS 的目的表的此种数据类型."), + REQUIRED_VALUE("OdpsWriter-00", MessageSource.loadResourceBundle(OdpsWriterErrorCode.class).message("errorcode.required_value")), + ILLEGAL_VALUE("OdpsWriter-01", MessageSource.loadResourceBundle(OdpsWriterErrorCode.class).message("errorcode.illegal_value")), + UNSUPPORTED_COLUMN_TYPE("OdpsWriter-02", MessageSource.loadResourceBundle(OdpsWriterErrorCode.class).message("errorcode.unsupported_column_type")), - TABLE_TRUNCATE_ERROR("OdpsWriter-03", "清空 ODPS 目的表时出错."), - CREATE_MASTER_UPLOAD_FAIL("OdpsWriter-04", "创建 ODPS 的 uploadSession 失败."), - GET_SLAVE_UPLOAD_FAIL("OdpsWriter-05", "获取 ODPS 的 uploadSession 失败."), - GET_ID_KEY_FAIL("OdpsWriter-06", "获取 accessId/accessKey 失败."), - GET_PARTITION_FAIL("OdpsWriter-07", "获取 ODPS 目的表的所有分区失败."), + TABLE_TRUNCATE_ERROR("OdpsWriter-03", MessageSource.loadResourceBundle(OdpsWriterErrorCode.class).message("errorcode.table_truncate_error")), + CREATE_MASTER_UPLOAD_FAIL("OdpsWriter-04", MessageSource.loadResourceBundle(OdpsWriterErrorCode.class).message("errorcode.create_master_upload_fail")), + GET_SLAVE_UPLOAD_FAIL("OdpsWriter-05", MessageSource.loadResourceBundle(OdpsWriterErrorCode.class).message("errorcode.get_slave_upload_fail")), + GET_ID_KEY_FAIL("OdpsWriter-06", MessageSource.loadResourceBundle(OdpsWriterErrorCode.class).message("errorcode.get_id_key_fail")), + GET_PARTITION_FAIL("OdpsWriter-07", MessageSource.loadResourceBundle(OdpsWriterErrorCode.class).message("errorcode.get_partition_fail")), - ADD_PARTITION_FAILED("OdpsWriter-08", "添加分区到 ODPS 目的表失败."), - WRITER_RECORD_FAIL("OdpsWriter-09", "写入数据到 ODPS 目的表失败."), + ADD_PARTITION_FAILED("OdpsWriter-08", MessageSource.loadResourceBundle(OdpsWriterErrorCode.class).message("errorcode.add_partition_failed")), + WRITER_RECORD_FAIL("OdpsWriter-09", MessageSource.loadResourceBundle(OdpsWriterErrorCode.class).message("errorcode.writer_record_fail")), - COMMIT_BLOCK_FAIL("OdpsWriter-10", "提交 block 到 ODPS 目的表失败."), - RUN_SQL_FAILED("OdpsWriter-11", "执行 ODPS Sql 失败."), - CHECK_IF_PARTITIONED_TABLE_FAILED("OdpsWriter-12", "检查 ODPS 目的表:%s 是否为分区表失败."), + COMMIT_BLOCK_FAIL("OdpsWriter-10", MessageSource.loadResourceBundle(OdpsWriterErrorCode.class).message("errorcode.commit_block_fail")), + RUN_SQL_FAILED("OdpsWriter-11", MessageSource.loadResourceBundle(OdpsWriterErrorCode.class).message("errorcode.run_sql_failed")), + CHECK_IF_PARTITIONED_TABLE_FAILED("OdpsWriter-12", MessageSource.loadResourceBundle(OdpsWriterErrorCode.class).message("errorcode.check_if_partitioned_table_failed")), - RUN_SQL_ODPS_EXCEPTION("OdpsWriter-13", "执行 ODPS Sql 时抛出异常, 可重试"), + RUN_SQL_ODPS_EXCEPTION("OdpsWriter-13", MessageSource.loadResourceBundle(OdpsWriterErrorCode.class).message("errorcode.run_sql_odps_exception")), - ACCOUNT_TYPE_ERROR("OdpsWriter-30", "账号类型错误."), + ACCOUNT_TYPE_ERROR("OdpsWriter-30", MessageSource.loadResourceBundle(OdpsWriterErrorCode.class).message("errorcode.account_type_error")), - PARTITION_ERROR("OdpsWriter-31", "分区配置错误."), + PARTITION_ERROR("OdpsWriter-31", MessageSource.loadResourceBundle(OdpsWriterErrorCode.class).message("errorcode.partition_error")), - COLUMN_NOT_EXIST("OdpsWriter-32", "用户配置的列不存在."), + COLUMN_NOT_EXIST("OdpsWriter-32", MessageSource.loadResourceBundle(OdpsWriterErrorCode.class).message("errorcode.column_not_exist")), - ODPS_PROJECT_NOT_FOUNT("OdpsWriter-100", "您配置的值不合法, odps project 不存在."), //ODPS-0420111: Project not found + ODPS_PROJECT_NOT_FOUNT("OdpsWriter-100", MessageSource.loadResourceBundle(OdpsWriterErrorCode.class).message("errorcode.odps_project_not_fount")), //ODPS-0420111: Project not found - ODPS_TABLE_NOT_FOUNT("OdpsWriter-101", "您配置的值不合法, odps table 不存在"), // ODPS-0130131:Table not found + ODPS_TABLE_NOT_FOUNT("OdpsWriter-101", MessageSource.loadResourceBundle(OdpsWriterErrorCode.class).message("errorcode.odps_table_not_fount")), // ODPS-0130131:Table not found - ODPS_ACCESS_KEY_ID_NOT_FOUND("OdpsWriter-102", "您配置的值不合法, odps accessId,accessKey 不存在"), //ODPS-0410051:Invalid credentials - accessKeyId not found + ODPS_ACCESS_KEY_ID_NOT_FOUND("OdpsWriter-102", MessageSource.loadResourceBundle(OdpsWriterErrorCode.class).message("errorcode.odps_access_key_id_not_found")), //ODPS-0410051:Invalid credentials - accessKeyId not found - ODPS_ACCESS_KEY_INVALID("OdpsWriter-103", "您配置的值不合法, odps accessKey 错误"), //ODPS-0410042:Invalid signature value - User signature dose not match; + ODPS_ACCESS_KEY_INVALID("OdpsWriter-103", MessageSource.loadResourceBundle(OdpsWriterErrorCode.class).message("errorcode.odps_access_key_invalid")), //ODPS-0410042:Invalid signature value - User signature dose not match; - ODPS_ACCESS_DENY("OdpsWriter-104", "拒绝访问, 您不在 您配置的 project 中") //ODPS-0420095: Access Denied - Authorization Failed [4002], You doesn't exist in project + ODPS_ACCESS_DENY("OdpsWriter-104", MessageSource.loadResourceBundle(OdpsWriterErrorCode.class).message("errorcode.odps_access_deny")) //ODPS-0420095: Access Denied - Authorization Failed [4002], You doesn't exist in project ; diff --git a/odpswriter/src/main/java/com/alibaba/datax/plugin/writer/odpswriter/OdpsWriterProxy.java b/odpswriter/src/main/java/com/alibaba/datax/plugin/writer/odpswriter/OdpsWriterProxy.java index 9833616c..e7c95be1 100755 --- a/odpswriter/src/main/java/com/alibaba/datax/plugin/writer/odpswriter/OdpsWriterProxy.java +++ b/odpswriter/src/main/java/com/alibaba/datax/plugin/writer/odpswriter/OdpsWriterProxy.java @@ -3,29 +3,58 @@ package com.alibaba.datax.plugin.writer.odpswriter; import com.alibaba.datax.common.element.StringColumn; import com.alibaba.datax.common.exception.DataXException; import com.alibaba.datax.common.plugin.TaskPluginCollector; +import com.alibaba.datax.common.util.Configuration; +import com.alibaba.datax.common.util.MessageSource; import com.alibaba.datax.plugin.writer.odpswriter.util.OdpsUtil; - -import com.alibaba.fastjson.JSON; +import com.alibaba.fastjson2.JSON; +import com.alibaba.fastjson2.JSONArray; +import com.alibaba.fastjson2.JSONObject; import com.aliyun.odps.OdpsType; import com.aliyun.odps.TableSchema; - +import com.aliyun.odps.data.ArrayRecord; +import com.aliyun.odps.data.Binary; +import com.aliyun.odps.data.Char; +import com.aliyun.odps.data.IntervalDayTime; +import com.aliyun.odps.data.IntervalYearMonth; import com.aliyun.odps.data.Record; - +import com.aliyun.odps.data.SimpleStruct; +import com.aliyun.odps.data.Struct; +import com.aliyun.odps.data.Varchar; import com.aliyun.odps.tunnel.TableTunnel; - import com.aliyun.odps.tunnel.TunnelException; import com.aliyun.odps.tunnel.io.ProtobufRecordPack; +import com.aliyun.odps.type.ArrayTypeInfo; +import com.aliyun.odps.type.CharTypeInfo; +import com.aliyun.odps.type.MapTypeInfo; +import com.aliyun.odps.type.StructTypeInfo; +import com.aliyun.odps.type.TypeInfo; +import com.aliyun.odps.type.VarcharTypeInfo; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.io.IOException; +import java.math.BigDecimal; +import java.sql.Timestamp; +import java.text.ParseException; +import java.text.SimpleDateFormat; +import java.util.ArrayList; +import java.util.Calendar; +import java.util.Date; + +import org.apache.commons.codec.binary.Base64; +import org.apache.commons.lang3.StringUtils; + +import java.util.HashMap; import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.TimeZone; import java.util.concurrent.atomic.AtomicLong; public class OdpsWriterProxy { - private static final Logger LOG = LoggerFactory - .getLogger(OdpsWriterProxy.class); + private static final Logger LOG = LoggerFactory.getLogger(OdpsWriterProxy.class); + private static final MessageSource MESSAGE_SOURCE = MessageSource.loadResourceBundle(OdpsWriterProxy.class); private volatile boolean printColumnLess;// 是否打印对于源头字段数小于 ODPS 目的表的行的日志 @@ -39,18 +68,98 @@ public class OdpsWriterProxy { private AtomicLong blockId; private List columnPositions; - private List tableOriginalColumnTypeList; + private List tableOriginalColumnTypeList; private boolean emptyAsNull; private boolean isCompress; + + private int taskId; + private int taskCOUNT; + private boolean consistencyCommit = false; + private boolean checkWithGetSize = true; + private List allColumns; + private String overLengthRule; + private int maxFieldLength; + private Boolean enableOverLengthOutput; - public OdpsWriterProxy(TableTunnel.UploadSession slaveUpload, int blockSizeInMB, - AtomicLong blockId, List columnPositions, - TaskPluginCollector taskPluginCollector, boolean emptyAsNull, boolean isCompress) - throws IOException, TunnelException { + /** + * 记录最近一次活动时间,动态分区写入模式下,超过一定时间不活动,则关闭这个proxy + */ + private Long lastActiveTime; + + /** + * 写block超时时间 + */ + private Long writeTimeoutInMs; + + private SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); + + // 读取 jvm 默认时区 + private Calendar calendarForDate = null; + private boolean useDateWithCalendar = true; + + private Calendar initCalendar(Configuration config) { + // 理论上不会有其他选择,有配置化可以随时应急 + String calendarType = config.getString("calendarType", "iso8601"); + Boolean lenient = config.getBool("calendarLenient", true); + + // 默认jvm时区 + TimeZone timeZone = TimeZone.getDefault(); + String timeZoneStr = config.getString("calendarTimeZone"); + if (StringUtils.isNotBlank(timeZoneStr)) { + // 如果用户明确指定使用用户指定的 + timeZone = TimeZone.getTimeZone(timeZoneStr); + } + + Calendar calendarForDate = new Calendar.Builder().setCalendarType(calendarType).setLenient(lenient) + .setTimeZone(timeZone).build(); + return calendarForDate; + } + + public OdpsWriterProxy(TableTunnel.UploadSession slaveUpload, int blockSizeInMB, AtomicLong blockId, + List columnPositions, TaskPluginCollector taskPluginCollector, boolean emptyAsNull, + boolean isCompress, boolean checkWithGetSize, List allColumns, boolean initBufSizeZero, + Long writeTimeoutInMs, Configuration taskConfig, String overLengthRule, int maxFieldLength, + Boolean enableOverLengthOutput) throws IOException, TunnelException { this.slaveUpload = slaveUpload; this.schema = this.slaveUpload.getSchema(); - this.tableOriginalColumnTypeList = OdpsUtil - .getTableOriginalColumnTypeList(this.schema); + this.tableOriginalColumnTypeList = OdpsUtil.getTableOriginalColumnTypeList(this.schema); + + this.blockId = blockId; + this.columnPositions = columnPositions; + this.taskPluginCollector = taskPluginCollector; + this.emptyAsNull = emptyAsNull; + this.isCompress = isCompress; + + // 初始化与 buffer 区相关的值 + this.maxBufferSize = (blockSizeInMB - 4) * 1024 * 1024; + if (initBufSizeZero) { + // 动态分区下初始化为0,随着写入的reord变多慢慢增加 + this.protobufCapacity = 0; + } else { + this.protobufCapacity = blockSizeInMB * 1024 * 1024; + } + this.protobufRecordPack = new ProtobufRecordPack(this.schema, null, this.protobufCapacity); + this.printColumnLess = true; + this.checkWithGetSize = checkWithGetSize; + + this.allColumns = allColumns; + this.overLengthRule = overLengthRule; + this.maxFieldLength = maxFieldLength; + this.enableOverLengthOutput = enableOverLengthOutput; + + this.writeTimeoutInMs = writeTimeoutInMs; + + this.calendarForDate = this.initCalendar(taskConfig); + this.useDateWithCalendar = taskConfig.getBool("useDateWithCalendar", true); + } + + public OdpsWriterProxy(TableTunnel.UploadSession slaveUpload, int blockSizeInMB, AtomicLong blockId, int taskId, + int taskCount, List columnPositions, TaskPluginCollector taskPluginCollector, boolean emptyAsNull, + boolean isCompress, boolean checkWithGetSize, List allColumns, Long writeTimeoutInMs, Configuration taskConfig, + String overLengthRule, int maxFieldLength, Boolean enableOverLengthOutput) throws IOException, TunnelException { + this.slaveUpload = slaveUpload; + this.schema = this.slaveUpload.getSchema(); + this.tableOriginalColumnTypeList = OdpsUtil.getTableOriginalColumnTypeList(this.schema); this.blockId = blockId; this.columnPositions = columnPositions; @@ -63,12 +172,38 @@ public class OdpsWriterProxy { this.protobufCapacity = blockSizeInMB * 1024 * 1024; this.protobufRecordPack = new ProtobufRecordPack(this.schema, null, this.protobufCapacity); printColumnLess = true; + + this.taskId = taskId; + this.taskCOUNT = taskCount; + this.consistencyCommit = true; + this.checkWithGetSize = checkWithGetSize; + this.allColumns = allColumns; + this.overLengthRule = overLengthRule; + this.maxFieldLength = maxFieldLength; + this.enableOverLengthOutput = enableOverLengthOutput; + this.writeTimeoutInMs = writeTimeoutInMs; + + this.calendarForDate = this.initCalendar(taskConfig); + this.useDateWithCalendar = taskConfig.getBool("useDateWithCalendar", true); } - public long writeOneRecord( - com.alibaba.datax.common.element.Record dataXRecord, - List blocks) throws Exception { + public long getCurrentBlockId() { + if (this.consistencyCommit) { + return this.taskId + this.taskCOUNT * (this.blockId.get()); + } else { + return this.blockId.get(); + } + } + + public TableTunnel.UploadSession getSlaveUpload() { + return this.slaveUpload; + } + + public long writeOneRecord(com.alibaba.datax.common.element.Record dataXRecord, List blocks) + throws Exception { + + this.lastActiveTime = System.currentTimeMillis(); Record record = dataxRecordToOdpsRecord(dataXRecord); @@ -77,12 +212,11 @@ public class OdpsWriterProxy { } protobufRecordPack.append(record); - if (protobufRecordPack.getTotalBytes() >= maxBufferSize) { + if (protobufRecordPack.getProtobufStream().size() >= maxBufferSize) { long startTimeInNs = System.nanoTime(); - OdpsUtil.slaveWriteOneBlock(this.slaveUpload, - protobufRecordPack, blockId.get(), this.isCompress); - LOG.info("write block {} ok.", blockId.get()); - blocks.add(blockId.get()); + OdpsUtil.slaveWriteOneBlock(this.slaveUpload, protobufRecordPack, getCurrentBlockId(), this.writeTimeoutInMs); + LOG.info("write block {} ok.", getCurrentBlockId()); + blocks.add(getCurrentBlockId()); protobufRecordPack.reset(); this.blockId.incrementAndGet(); return System.nanoTime() - startTimeInNs; @@ -92,13 +226,20 @@ public class OdpsWriterProxy { public long writeRemainingRecord(List blocks) throws Exception { // complete protobuf stream, then write to http - if (protobufRecordPack.getTotalBytes() != 0) { + // protobufRecordPack.getTotalBytes() 慕明: getTotalBytes并不一定保证能拿到写入的字节数,按你们的逻辑应该是用getTotalBytesWritten + // if (protobufRecordPack.getTotalBytes() != 0) { + boolean hasRemindData = false; + if (this.checkWithGetSize) { + hasRemindData = protobufRecordPack.getSize() != 0; + } else { + hasRemindData = protobufRecordPack.getTotalBytes() != 0; + } + if (hasRemindData) { long startTimeInNs = System.nanoTime(); - OdpsUtil.slaveWriteOneBlock(this.slaveUpload, - protobufRecordPack, blockId.get(), this.isCompress); - LOG.info("write block {} ok.", blockId.get()); + OdpsUtil.slaveWriteOneBlock(this.slaveUpload, protobufRecordPack, getCurrentBlockId(), this.writeTimeoutInMs); + LOG.info("write block {} ok.", getCurrentBlockId()); - blocks.add(blockId.get()); + blocks.add(getCurrentBlockId()); // reset the buffer for next block protobufRecordPack.reset(); return System.nanoTime() - startTimeInNs; @@ -106,85 +247,846 @@ public class OdpsWriterProxy { return 0; } - public Record dataxRecordToOdpsRecord( - com.alibaba.datax.common.element.Record dataXRecord) throws Exception { + public Record dataxRecordToOdpsRecord(com.alibaba.datax.common.element.Record dataXRecord) throws Exception { int sourceColumnCount = dataXRecord.getColumnNumber(); - Record odpsRecord = slaveUpload.newRecord(); + ArrayRecord odpsRecord = (ArrayRecord) slaveUpload.newRecord(); int userConfiguredColumnNumber = this.columnPositions.size(); -//todo + if (sourceColumnCount > userConfiguredColumnNumber) { - throw DataXException - .asDataXException( - OdpsWriterErrorCode.ILLEGAL_VALUE, - String.format( - "亲,配置中的源表的列个数和目的端表不一致,源表中您配置的列数是:%s 大于目的端的列数是:%s , 这样会导致源头数据无法正确导入目的端, 请检查您的配置并修改.", - sourceColumnCount, - userConfiguredColumnNumber)); + throw DataXException.asDataXException(OdpsWriterErrorCode.ILLEGAL_VALUE, + MESSAGE_SOURCE.message("odpswriterproxy.1", sourceColumnCount, userConfiguredColumnNumber)); } else if (sourceColumnCount < userConfiguredColumnNumber) { if (printColumnLess) { - LOG.warn( - "源表的列个数小于目的表的列个数,源表列数是:{} 目的表列数是:{} , 数目不匹配. DataX 会把目的端多出的列的值设置为空值. 如果这个默认配置不符合您的期望,请保持源表和目的表配置的列数目保持一致.", - sourceColumnCount, userConfiguredColumnNumber); + LOG.warn(MESSAGE_SOURCE.message("odpswriterproxy.2", sourceColumnCount, userConfiguredColumnNumber)); } printColumnLess = false; } - int currentIndex; + int currentIndex = 0; int sourceIndex = 0; try { com.alibaba.datax.common.element.Column columnValue; for (; sourceIndex < sourceColumnCount; sourceIndex++) { + // 跳过分区列 + if (this.columnPositions.get(sourceIndex) == -1) { + continue; + } currentIndex = columnPositions.get(sourceIndex); - OdpsType type = this.tableOriginalColumnTypeList - .get(currentIndex); + TypeInfo typeInfo = this.tableOriginalColumnTypeList.get(currentIndex); + OdpsType type = typeInfo.getOdpsType(); + String typeName = typeInfo.getTypeName(); columnValue = dataXRecord.getColumn(sourceIndex); if (columnValue == null) { continue; } // for compatible dt lib, "" as null - if(this.emptyAsNull && columnValue instanceof StringColumn && "".equals(columnValue.asString())){ + if (this.emptyAsNull && columnValue instanceof StringColumn && "".equals(columnValue.asString())) { continue; } switch (type) { - case STRING: - odpsRecord.setString(currentIndex, columnValue.asString()); - break; - case BIGINT: - odpsRecord.setBigint(currentIndex, columnValue.asLong()); - break; - case BOOLEAN: - odpsRecord.setBoolean(currentIndex, columnValue.asBoolean()); - break; - case DATETIME: - odpsRecord.setDatetime(currentIndex, columnValue.asDate()); - break; - case DOUBLE: - odpsRecord.setDouble(currentIndex, columnValue.asDouble()); - break; - case DECIMAL: - odpsRecord.setDecimal(currentIndex, columnValue.asBigDecimal()); - String columnStr = columnValue.asString(); - if(columnStr != null && columnStr.indexOf(".") >= 36) { - throw new Exception("Odps decimal 类型的整数位个数不能超过35"); + case STRING: + String newValue = (String)OdpsUtil.processOverLengthData(columnValue.asString(), OdpsType.STRING, this.overLengthRule, this.maxFieldLength, this.enableOverLengthOutput); + odpsRecord.setString(currentIndex, newValue); + break; + case BIGINT: + odpsRecord.setBigint(currentIndex, columnValue.asLong()); + break; + case BOOLEAN: + odpsRecord.setBoolean(currentIndex, columnValue.asBoolean()); + break; + case DATETIME: + odpsRecord.setDatetime(currentIndex, columnValue.asDate()); +// Date datetimeData = columnValue.asDate(); +// if (null == datetimeData) { +// odpsRecord.setDatetime(currentIndex, null); +// } else { +// Timestamp dateDataForOdps = new Timestamp(datetimeData.getTime()); +// if (datetimeData instanceof java.sql.Timestamp) { +// dateDataForOdps.setNanos(((java.sql.Timestamp)datetimeData).getNanos()); +// } +// odpsRecord.setDatetime(currentIndex, dateDataForOdps); +// } + break; + case DATE: + Date dateData = columnValue.asDate(); + if (null == dateData) { + odpsRecord.setDatetime(currentIndex, null); + } else { + if (this.useDateWithCalendar) { + odpsRecord.setDate(currentIndex, new java.sql.Date(dateData.getTime()), this.calendarForDate); + } else { + odpsRecord.setDatetime(currentIndex, new java.sql.Date(dateData.getTime())); } - default: - break; + } + break; + case DOUBLE: + odpsRecord.setDouble(currentIndex, columnValue.asDouble()); + break; + case FLOAT: + Double floatValue = columnValue.asDouble(); + if (null == floatValue) { + ((ArrayRecord) odpsRecord).setFloat(currentIndex, null); + } else { + ((ArrayRecord) odpsRecord).setFloat(currentIndex, floatValue.floatValue()); + } + break; + case DECIMAL: + odpsRecord.setDecimal(currentIndex, columnValue.asBigDecimal()); + String columnStr = columnValue.asString(); + if (columnStr != null && columnStr.indexOf(".") >= 36) { + throw new Exception(MESSAGE_SOURCE.message("odpswriterproxy.3")); + } + break; + case TINYINT: + Long tinyintValueStr = columnValue.asLong(); + if (null == tinyintValueStr) { + ((ArrayRecord) odpsRecord).setTinyint(currentIndex, null); + } else { + ((ArrayRecord) odpsRecord).setTinyint(currentIndex, + Byte.valueOf(String.valueOf(tinyintValueStr))); + } + break; + case SMALLINT: + Long smallIntValue = columnValue.asLong(); + if (null == smallIntValue) { + ((ArrayRecord) odpsRecord).setSmallint(currentIndex, null); + } else { + ((ArrayRecord) odpsRecord).setSmallint(currentIndex, smallIntValue.shortValue()); + } + break; + case INT: + Long intValue = columnValue.asLong(); + if (null == intValue) { + ((ArrayRecord) odpsRecord).setInt(currentIndex, null); + } else { + ((ArrayRecord) odpsRecord).setInt(currentIndex, intValue.intValue()); + } + break; + case VARCHAR: + // warn: columnValue.asString() 为 null 时 , odps sdk 有 BUG + // 不能用 Varchar 的默认构造函数,不然有 NPE + String varcharValueStr = columnValue.asString(); + Varchar varcharData = null; + if (varcharValueStr != null){ + varcharData = new Varchar(columnValue.asString()); + } + ((ArrayRecord) odpsRecord).setVarchar(currentIndex, varcharData); + break; + case CHAR: + String charValueStr = columnValue.asString(); + Char charData = null; + if (charValueStr != null ){ + charData = new Char(charValueStr); + } + ((ArrayRecord) odpsRecord).setChar(currentIndex, charData); + break; + case TIMESTAMP: + Date timestampData = columnValue.asDate(); + if (null == timestampData) { + ((ArrayRecord) odpsRecord).setTimestamp(currentIndex, null); + } else { + Timestamp timestampDataForOdps = new Timestamp(timestampData.getTime()); + if (timestampData instanceof java.sql.Timestamp) { + // 纳秒 + timestampDataForOdps.setNanos(((java.sql.Timestamp)timestampData).getNanos()); + } + // warn优化:如果原来类型就是Timestamp,直接使用就少创建了一个对象 + ((ArrayRecord) odpsRecord).setTimestamp(currentIndex, timestampDataForOdps); + } + break; + case BINARY: + Binary newBinaryData = (Binary)OdpsUtil.processOverLengthData(new Binary(columnValue.asBytes()), OdpsType.BINARY, this.overLengthRule, this.maxFieldLength, this.enableOverLengthOutput); + ((ArrayRecord) odpsRecord).setBinary(currentIndex,columnValue.asBytes() == null ? null : newBinaryData); + break; + case ARRAY: + JSONArray arrayJson = JSON.parseArray(columnValue.asString()); + ((ArrayRecord) odpsRecord).setArray(currentIndex, parseArray(arrayJson, (ArrayTypeInfo) typeInfo)); + break; + case MAP: + JSONObject mapJson = JSON.parseObject(columnValue.asString()); + ((ArrayRecord) odpsRecord).setMap(currentIndex, parseMap(mapJson, (MapTypeInfo) typeInfo)); + break; + case STRUCT: + JSONObject structJson = JSON.parseObject(columnValue.asString()); + ((ArrayRecord) odpsRecord).setStruct(currentIndex, + parseStruct(structJson, (StructTypeInfo) typeInfo)); + break; + default: + break; } } return odpsRecord; } catch (Exception e) { - String message = String.format( - "写入 ODPS 目的表时遇到了脏数据: 第[%s]个字段的数据出现错误,请检查该数据并作出修改 或者您可以增大阀值,忽略这条记录.", sourceIndex); - this.taskPluginCollector.collectDirtyRecord(dataXRecord, e, - message); - + String dirtyColumnName = ""; + try { + dirtyColumnName = this.allColumns.get(currentIndex); + } catch (Exception ignoreEx) { + // ignore + } + String message = MESSAGE_SOURCE.message("odpswriterproxy.4", sourceIndex, dirtyColumnName); + this.taskPluginCollector.collectDirtyRecord(dataXRecord, e, message); return null; } } + + private List parseArray(JSONArray jsonArray, ArrayTypeInfo arrayTypeInfo) throws ParseException { + if (null == jsonArray) { + return null; + } + List result = new ArrayList(); + switch (arrayTypeInfo.getElementTypeInfo().getOdpsType()) { + case BIGINT: + for (int i = 0; i < jsonArray.size(); i++) { + result.add(jsonArray.getLong(i)); + } + return result; + /** + * 双精度浮点 + */ + case DOUBLE: + for (int i = 0; i < jsonArray.size(); i++) { + result.add(jsonArray.getDouble(i)); + } + return result; + /** + * 布尔型 + */ + case BOOLEAN: + for (int i = 0; i < jsonArray.size(); i++) { + result.add(jsonArray.getBoolean(i)); + } + return result; + /** + * 日期类型 + */ + case DATETIME: + // TODO 精度 + for (int i = 0; i < jsonArray.size(); i++) { + result.add(dateFormat.parse(jsonArray.getString(i))); + } + return result; + /** + * 字符串类型 + */ + case STRING: + for (int i = 0; i < jsonArray.size(); i++) { + result.add(jsonArray.getString(i)); + } + return result; + /** + * 精确小数类型 + */ + case DECIMAL: + for (int i = 0; i < jsonArray.size(); i++) { + result.add(jsonArray.getBigDecimal(i)); + } + return result; + /** + * 1字节有符号整型 + */ + case TINYINT: + for (int i = 0; i < jsonArray.size(); i++) { + result.add(jsonArray.getByte(i)); + } + return result; + /** + * 2字节有符号整型 + */ + case SMALLINT: + for (int i = 0; i < jsonArray.size(); i++) { + result.add(jsonArray.getShort(i)); + } + return result; + /** + * 4字节有符号整型 + */ + case INT: + for (int i = 0; i < jsonArray.size(); i++) { + result.add(jsonArray.getInteger(i)); + } + return result; + /** + * 单精度浮点 + */ + case FLOAT: + for (int i = 0; i < jsonArray.size(); i++) { + result.add(jsonArray.getFloat(i)); + } + return result; + /** + * 固定长度字符串 + */ + case CHAR: + for (int i = 0; i < jsonArray.size(); i++) { + result.add(new Char(jsonArray.getString(i), + ((CharTypeInfo) arrayTypeInfo.getElementTypeInfo()).getLength())); + } + return result; + /** + * 可变长度字符串 + */ + case VARCHAR: + for (int i = 0; i < jsonArray.size(); i++) { + result.add(new Varchar(jsonArray.getString(i), + ((VarcharTypeInfo) arrayTypeInfo.getElementTypeInfo()).getLength())); + } + return result; + /** + * 时间类型 + */ + case DATE: + // TODO string -> date need timezone + // TODO how to use odps Record + for (int i = 0; i < jsonArray.size(); i++) { + result.add(java.sql.Date.valueOf(jsonArray.getString(i))); + } + return result; + /** + * 时间戳 + */ + case TIMESTAMP: + for (int i = 0; i < jsonArray.size(); i++) { + result.add(Timestamp.valueOf(jsonArray.getString(i))); + } + return result; + /** + * 字节数组 + */ + case BINARY: + for (int i = 0; i < jsonArray.size(); i++) { + result.add(Base64.decodeBase64(jsonArray.getString(i))); + } + return result; + /** + * 日期间隔 + */ + case INTERVAL_DAY_TIME: + for (int i = 0; i < jsonArray.size(); i++) { + JSONObject json = jsonArray.getJSONObject(i); + result.add(new IntervalDayTime(json.getInteger("totalSeconds"), json.getInteger("nanos"))); + } + return result; + /** + * 年份间隔 + */ + case INTERVAL_YEAR_MONTH: + for (int i = 0; i < jsonArray.size(); i++) { + JSONObject json = jsonArray.getJSONObject(i); + result.add(new IntervalYearMonth(json.getInteger("years"), json.getInteger("months"))); + } + return result; + /** + * 结构体 + */ + case STRUCT: + for (int i = 0; i < jsonArray.size(); i++) { + result.add( + parseStruct(jsonArray.getJSONObject(i), (StructTypeInfo) arrayTypeInfo.getElementTypeInfo())); + } + return result; + /** + * MAP类型 + */ + case MAP: + for (int i = 0; i < jsonArray.size(); i++) { + result.add(parseMap(jsonArray.getJSONObject(i), (MapTypeInfo) arrayTypeInfo.getElementTypeInfo())); + } + return result; + /** + * ARRAY类型 + */ + case ARRAY: + for (int i = 0; i < jsonArray.size(); i++) { + result.add(parseArray(jsonArray.getJSONArray(i), (ArrayTypeInfo) arrayTypeInfo.getElementTypeInfo())); + } + return result; + + default: + return result; + } + } + + private Map parseMap(JSONObject json, MapTypeInfo typeInfo) throws ParseException { + if (json == null) { + return null; + } + Map keyMap = new HashMap(); + Set keys = json.keySet(); + switch (typeInfo.getKeyTypeInfo().getOdpsType()) { + case BIGINT: + for (String item : keys) { + keyMap.put(Long.parseLong(item), item); + } + break; + /** + * 双精度浮点 + */ + case DOUBLE: + for (String item : keys) { + keyMap.put(Double.parseDouble(item), item); + } + break; + /** + * 布尔型 + */ + case BOOLEAN: + for (String item : keys) { + keyMap.put(Boolean.parseBoolean(item), item); + } + break; + /** + * 日期类型 + */ + case DATETIME: + // TODO 精度 + for (String item : keys) { + keyMap.put(dateFormat.parse(item), item); + } + break; + /** + * 字符串类型 + */ + case STRING: + for (String item : keys) { + keyMap.put(item, item); + } + break; + /** + * 精确小数类型 + */ + case DECIMAL: + for (String item : keys) { + keyMap.put(new BigDecimal(item), item); + } + break; + /** + * 1字节有符号整型 + */ + case TINYINT: + for (String item : keys) { + keyMap.put(Byte.parseByte(item), item); + } + break; + /** + * 2字节有符号整型 + */ + case SMALLINT: + for (String item : keys) { + keyMap.put(Short.parseShort(item), item); + } + break; + /** + * 4字节有符号整型 + */ + case INT: + for (String item : keys) { + keyMap.put(Integer.parseInt(item), item); + } + break; + /** + * 单精度浮点 + */ + case FLOAT: + for (String item : keys) { + keyMap.put(Float.parseFloat(item), item); + } + break; + /** + * 固定长度字符串 + */ + case CHAR: + for (String item : keys) { + keyMap.put(new Char(item, ((CharTypeInfo) typeInfo.getKeyTypeInfo()).getLength()), item); + } + break; + /** + * 可变长度字符串 + */ + case VARCHAR: + for (String item : keys) { + keyMap.put(new Varchar(item, ((VarcharTypeInfo) typeInfo.getKeyTypeInfo()).getLength()), item); + } + break; + /** + * 时间类型 + */ + case DATE: + // TODO string -> date need timezone + // TODO how to use odps Record + for (String item : keys) { + keyMap.put(java.sql.Date.valueOf(item), item); + } + break; + /** + * 时间戳 + */ + case TIMESTAMP: + for (String item : keys) { + keyMap.put(Timestamp.valueOf(item), item); + } + break; + /** + * 字节数组 + */ + case BINARY: + for (String item : keys) { + keyMap.put(new Binary(Base64.decodeBase64(item)), item); + } + break; + /** + * 日期间隔 + */ + case INTERVAL_DAY_TIME: + for (String item : keys) { + JSONObject jsonObject = JSON.parseObject(item); + keyMap.put(new IntervalDayTime(jsonObject.getInteger("totalSeconds"), jsonObject.getInteger("nanos")), + item); + } + break; + /** + * 年份间隔 + */ + case INTERVAL_YEAR_MONTH: + for (String item : keys) { + JSONObject jsonObject = JSON.parseObject(item); + keyMap.put(new IntervalYearMonth(jsonObject.getInteger("years"), jsonObject.getInteger("months")), + item); + } + break; + default: + break; + // TODO throw an exception + } + Map result = new HashMap(); + // process map value + switch (typeInfo.getValueTypeInfo().getOdpsType()) { + case BIGINT: + for (Map.Entry item : keyMap.entrySet()) { + result.put(item.getKey(), json.getLong(item.getValue())); + } + return result; + /** + * 双精度浮点 + */ + case DOUBLE: + for (Map.Entry item : keyMap.entrySet()) { + result.put(item.getKey(), json.getDouble(item.getValue())); + } + return result; + /** + * 布尔型 + */ + case BOOLEAN: + for (Map.Entry item : keyMap.entrySet()) { + result.put(item.getKey(), json.getBoolean(item.getValue())); + } + return result; + /** + * 日期类型 + */ + case DATETIME: + // TODO 精度 + for (Map.Entry item : keyMap.entrySet()) { + result.put(item.getKey(), dateFormat.parse(json.getString(item.getValue()))); + } + return result; + /** + * 字符串类型 + */ + case STRING: + for (Map.Entry item : keyMap.entrySet()) { + result.put(item.getKey(), json.getString(item.getValue())); + } + return result; + /** + * 精确小数类型 + */ + case DECIMAL: + for (Map.Entry item : keyMap.entrySet()) { + result.put(item.getKey(), json.getBigDecimal(item.getValue())); + } + return result; + /** + * 1字节有符号整型 + */ + case TINYINT: + for (Map.Entry item : keyMap.entrySet()) { + result.put(item.getKey(), json.getByte(item.getValue())); + } + return result; + /** + * 2字节有符号整型 + */ + case SMALLINT: + for (Map.Entry item : keyMap.entrySet()) { + result.put(item.getKey(), json.getShort(item.getValue())); + } + return result; + /** + * 4字节有符号整型 + */ + case INT: + for (Map.Entry item : keyMap.entrySet()) { + result.put(item.getKey(), json.getInteger(item.getValue())); + } + return result; + /** + * 单精度浮点 + */ + case FLOAT: + for (Map.Entry item : keyMap.entrySet()) { + result.put(item.getKey(), json.getFloat(item.getValue())); + } + return result; + /** + * 固定长度字符串 + */ + case CHAR: + for (Map.Entry item : keyMap.entrySet()) { + result.put(item.getKey(), new Char(json.getString(item.getValue()), + ((CharTypeInfo) typeInfo.getValueTypeInfo()).getLength())); + } + return result; + /** + * 可变长度字符串 + */ + case VARCHAR: + for (Map.Entry item : keyMap.entrySet()) { + result.put(item.getKey(), new Varchar(json.getString(item.getValue()), + ((VarcharTypeInfo) typeInfo.getValueTypeInfo()).getLength())); + } + return result; + /** + * 时间类型 + */ + case DATE: + // TODO string -> date need timezone + // TODO how to use odps Record + for (Map.Entry item : keyMap.entrySet()) { + result.put(item.getKey(), java.sql.Date.valueOf(json.getString(item.getValue()))); + } + return result; + /** + * 时间戳 + */ + case TIMESTAMP: + for (Map.Entry item : keyMap.entrySet()) { + result.put(item.getKey(), Timestamp.valueOf(json.getString(item.getValue()))); + } + return result; + /** + * 字节数组 + */ + case BINARY: + for (Map.Entry item : keyMap.entrySet()) { + result.put(item.getKey(), new Binary(Base64.decodeBase64(json.getString(item.getValue())))); + } + return result; + /** + * 日期间隔 + */ + case INTERVAL_DAY_TIME: + for (Map.Entry item : keyMap.entrySet()) { + JSONObject jsonObject = json.getJSONObject(item.getValue()); + result.put(item.getKey(), + new IntervalDayTime(jsonObject.getInteger("totalSeconds"), jsonObject.getInteger("nanos"))); + } + return result; + /** + * 年份间隔 + */ + case INTERVAL_YEAR_MONTH: + for (Map.Entry item : keyMap.entrySet()) { + JSONObject jsonObject = json.getJSONObject(item.getValue()); + result.put(item.getKey(), + new IntervalYearMonth(jsonObject.getInteger("years"), jsonObject.getInteger("months"))); + } + return result; + /** + * 结构体 + */ + case STRUCT: + for (Map.Entry item : keyMap.entrySet()) { + result.put(item.getKey(), + parseStruct(json.getJSONObject(item.getValue()), (StructTypeInfo) typeInfo.getValueTypeInfo())); + } + return result; + /** + * MAP类型 + */ + case MAP: + for (Map.Entry item : keyMap.entrySet()) { + result.put(item.getKey(), + parseMap(json.getJSONObject(item.getValue()), (MapTypeInfo) typeInfo.getValueTypeInfo())); + } + return result; + /** + * ARRAY类型 + */ + case ARRAY: + for (Map.Entry item : keyMap.entrySet()) { + result.put(item.getKey(), + parseArray(json.getJSONArray(item.getValue()), (ArrayTypeInfo) typeInfo.getValueTypeInfo())); + } + return result; + + default: + throw new IllegalArgumentException("decode record failed. column type: " + typeInfo.getTypeName()); + } + } + + public Struct parseStruct(JSONObject json, StructTypeInfo struct) throws ParseException { + if (null == json) { + return null; + } + List fieldNames = struct.getFieldNames(); + List typeInfos = struct.getFieldTypeInfos(); + List structValues = new ArrayList(); + for (int i = 0; i < fieldNames.size(); i++) { + String fieldName = fieldNames.get(i); + switch (typeInfos.get(i).getOdpsType()) { + case BIGINT: + structValues.add(json.getLong(fieldName)); + break; + /** + * 双精度浮点 + */ + case DOUBLE: + structValues.add(json.getDouble(fieldName)); + break; + /** + * 布尔型 + */ + case BOOLEAN: + structValues.add(json.getBoolean(fieldName)); + break; + /** + * 日期类型 + */ + case DATETIME: + // TODO 精度 + structValues.add(dateFormat.parse(json.getString(fieldName))); + break; + /** + * 字符串类型 + */ + case STRING: + structValues.add(json.getString(fieldName)); + break; + /** + * 精确小数类型 + */ + case DECIMAL: + structValues.add(json.getBigDecimal(fieldName)); + break; + /** + * 1字节有符号整型 + */ + case TINYINT: + structValues.add(json.getByte(fieldName)); + break; + /** + * 2字节有符号整型 + */ + case SMALLINT: + structValues.add(json.getShort(fieldName)); + break; + /** + * 4字节有符号整型 + */ + case INT: + structValues.add(json.getInteger(fieldName)); + break; + /** + * 单精度浮点 + */ + case FLOAT: + structValues.add(json.getFloat(fieldName)); + break; + /** + * 固定长度字符串 + */ + case CHAR: + structValues.add(new Char(json.getString(fieldName), ((CharTypeInfo) typeInfos.get(i)).getLength())); + break; + /** + * 可变长度字符串 + */ + case VARCHAR: + structValues + .add(new Varchar(json.getString(fieldName), ((VarcharTypeInfo) typeInfos.get(i)).getLength())); + break; + /** + * 时间类型 + */ + case DATE: + // TODO string -> date need timezone + // TODO how to use odps Record + structValues.add(java.sql.Date.valueOf(json.getString(fieldName))); + break; + /** + * 时间戳 + */ + case TIMESTAMP: + structValues.add(Timestamp.valueOf(json.getString(fieldName))); + break; + /** + * 字节数组 + */ + case BINARY: + structValues.add(Base64.decodeBase64(json.getString(fieldName))); + break; + /** + * 日期间隔 + */ + case INTERVAL_DAY_TIME: + // TODO special process as map object + structValues.add(new IntervalDayTime(json.getInteger("totalSeconds"), json.getInteger("nanos"))); + /** + * 年份间隔 + */ + case INTERVAL_YEAR_MONTH: + structValues.add(new IntervalYearMonth(json.getInteger("years"), json.getInteger("months"))); + /** + * 结构体 + */ + case STRUCT: + structValues.add(parseStruct(json.getJSONObject(fieldName), (StructTypeInfo) typeInfos.get(i))); + break; + /** + * MAP类型 + */ + case MAP: + structValues.add(parseMap(json.getJSONObject(fieldName), (MapTypeInfo) typeInfos.get(i))); + break; + /** + * ARRAY类型 + */ + case ARRAY: + structValues.add(parseArray(json.getJSONArray(fieldName), (ArrayTypeInfo) typeInfos.get(i))); + break; + } + } + + SimpleStruct simpleStruct = new SimpleStruct(struct, structValues); + return simpleStruct; + } + + public Long getLastActiveTime() { + return lastActiveTime; + } + + public void setLastActiveTime(Long lastActiveTime) { + this.lastActiveTime = lastActiveTime; + } + + public Long getCurrentTotalBytes() throws IOException { + return this.protobufRecordPack.getTotalBytes(); + } } diff --git a/odpswriter/src/main/java/com/alibaba/datax/plugin/writer/odpswriter/model/PartitionInfo.java b/odpswriter/src/main/java/com/alibaba/datax/plugin/writer/odpswriter/model/PartitionInfo.java new file mode 100644 index 00000000..f293d8cc --- /dev/null +++ b/odpswriter/src/main/java/com/alibaba/datax/plugin/writer/odpswriter/model/PartitionInfo.java @@ -0,0 +1,87 @@ +package com.alibaba.datax.plugin.writer.odpswriter.model; + +public class PartitionInfo { + /** + * 字段名 + */ + private String name; + /** + * String + */ + private String type; + /** + * eventTime or function + * yyyy/MM/dd/HH/mm + * 可自定义组合 + */ + private String valueMode; + private String value; + private String comment; + /** + * 自定义分区有效 + * eventTime / constant + * function + */ + private String category; + /** + * 当 partitionType 为function时 + * functionExpression 为 valueMode 对应的expression + */ + private String functionExpression; + + public String getFunctionExpression() { + return functionExpression; + } + + public void setFunctionExpression(String functionExpression) { + this.functionExpression = functionExpression; + } + + public String getCategory() { + return category; + } + + public void setCategory(String category) { + this.category = category; + } + + public String getComment() { + return comment; + } + + public void setComment(String comment) { + this.comment = comment; + } + + public String getType() { + return type; + } + + public void setType(String type) { + this.type = type; + } + + public String getName() { + return name; + } + + public void setName(String name) { + this.name = name; + } + + public String getValueMode() { + return valueMode; + } + + public void setValueMode(String valueMode) { + this.valueMode = valueMode; + } + + public String getValue() { + return value; + } + + public void setValue(String value) { + this.value = value; + } +} diff --git a/odpswriter/src/main/java/com/alibaba/datax/plugin/writer/odpswriter/model/UserDefinedFunction.java b/odpswriter/src/main/java/com/alibaba/datax/plugin/writer/odpswriter/model/UserDefinedFunction.java new file mode 100644 index 00000000..55c8a114 --- /dev/null +++ b/odpswriter/src/main/java/com/alibaba/datax/plugin/writer/odpswriter/model/UserDefinedFunction.java @@ -0,0 +1,44 @@ +package com.alibaba.datax.plugin.writer.odpswriter.model; + +import java.io.Serializable; +import java.util.List; + +public class UserDefinedFunction implements Serializable { + private static final long serialVersionUID = 1L; + private String name; + private String expression; + private String inputColumn; + private List variableRule; + + public String getName() { + return name; + } + + public void setName(String name) { + this.name = name; + } + + public String getExpression() { + return expression; + } + + public void setExpression(String expression) { + this.expression = expression; + } + + public String getInputColumn() { + return inputColumn; + } + + public void setInputColumn(String inputColumn) { + this.inputColumn = inputColumn; + } + + public List getVariableRule() { + return variableRule; + } + + public void setVariableRule(List variableRule) { + this.variableRule = variableRule; + } +} diff --git a/odpswriter/src/main/java/com/alibaba/datax/plugin/writer/odpswriter/model/UserDefinedFunctionRule.java b/odpswriter/src/main/java/com/alibaba/datax/plugin/writer/odpswriter/model/UserDefinedFunctionRule.java new file mode 100644 index 00000000..5676eb45 --- /dev/null +++ b/odpswriter/src/main/java/com/alibaba/datax/plugin/writer/odpswriter/model/UserDefinedFunctionRule.java @@ -0,0 +1,26 @@ +package com.alibaba.datax.plugin.writer.odpswriter.model; + +import java.io.Serializable; +import java.util.List; + +public class UserDefinedFunctionRule implements Serializable { + private static final long serialVersionUID = 1L; + private String type; + private List params; + + public String getType() { + return type; + } + + public void setType(String type) { + this.type = type; + } + + public List getParams() { + return params; + } + + public void setParams(List params) { + this.params = params; + } +} diff --git a/odpswriter/src/main/java/com/alibaba/datax/plugin/writer/odpswriter/util/CustomPartitionUtils.java b/odpswriter/src/main/java/com/alibaba/datax/plugin/writer/odpswriter/util/CustomPartitionUtils.java new file mode 100644 index 00000000..6153a820 --- /dev/null +++ b/odpswriter/src/main/java/com/alibaba/datax/plugin/writer/odpswriter/util/CustomPartitionUtils.java @@ -0,0 +1,54 @@ +package com.alibaba.datax.plugin.writer.odpswriter.util; + +import com.alibaba.datax.common.element.Record; +import com.alibaba.datax.common.util.Configuration; +import com.alibaba.datax.plugin.writer.odpswriter.model.PartitionInfo; +import com.alibaba.datax.plugin.writer.odpswriter.model.UserDefinedFunction; +import com.alibaba.fastjson2.JSON; +import com.google.common.base.Joiner; +import org.apache.commons.lang3.StringUtils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.Serializable; +import java.util.*; +import java.util.stream.Collectors; + +public class CustomPartitionUtils implements Serializable { + private static final long serialVersionUID = 1L; + protected static Logger logger = LoggerFactory.getLogger(CustomPartitionUtils.class); + + public static List getListWithJson(Configuration config, String path, Class clazz) { + Object object = config.get(path, List.class); + if (null == object) { + return null; + } + + return JSON.parseArray(JSON.toJSONString(object), clazz); + } + + public static String generate(Record record, List functions, List partitions, + List allColumns) { + for (PartitionInfo partitionInfo : partitions) { + partitionInfo.setValue(buildPartitionValue(partitionInfo, functions, record, allColumns)); + } + List partitionList = partitions.stream() + .map(item -> String.format("%s='%s'", item.getName(), item.getValue())) + .collect(Collectors.toList()); + return Joiner.on(",").join(partitionList); + } + + private static String buildPartitionValue(PartitionInfo partitionInfo, List functions, Record record, + List allColumns) { +// logger.info("try build partition value:partitionInfo:\n{},functions:\n{}", +// JSON.toJSONString(partitionInfo), JSON.toJSONString(functions)); + if (StringUtils.isBlank(partitionInfo.getCategory()) + || "eventTime".equalsIgnoreCase(partitionInfo.getCategory()) + || "constant".equalsIgnoreCase(partitionInfo.getCategory())) { + // 直接输出原样字符串 + return partitionInfo.getValueMode(); +// throw new RuntimeException("not support partition category:" + partitionInfo.getCategory()); + } + throw new RuntimeException("un support partition info type:" + partitionInfo.getCategory()); + } +} diff --git a/odpswriter/src/main/java/com/alibaba/datax/plugin/writer/odpswriter/util/DESCipher.java b/odpswriter/src/main/java/com/alibaba/datax/plugin/writer/odpswriter/util/DESCipher.java deleted file mode 100755 index 4afead52..00000000 --- a/odpswriter/src/main/java/com/alibaba/datax/plugin/writer/odpswriter/util/DESCipher.java +++ /dev/null @@ -1,355 +0,0 @@ -/** - * (C) 2010-2014 Alibaba Group Holding Limited. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package com.alibaba.datax.plugin.writer.odpswriter.util; - -import javax.crypto.Cipher; -import javax.crypto.SecretKey; -import javax.crypto.SecretKeyFactory; -import javax.crypto.spec.DESKeySpec; -import java.security.SecureRandom; - -/** - *   * DES加解密,支持与delphi交互(字符串编码需统一为UTF-8) - * - *   * - * - *   * @author wym - * - *    - */ - -public class DESCipher { - - /** - *   * 密钥 - * - *    - */ - - public static final String KEY = "DESDES"; - - private final static String DES = "DES"; - - /** - *   * 加密 - * - *   * - * - *   * @param src - * - *   * 明文(字节) - * - *   * @param key - * - *   * 密钥,长度必须是8的倍数 - * - *   * @return 密文(字节) - * - *   * @throws Exception - * - *    - */ - - public static byte[] encrypt(byte[] src, byte[] key) throws Exception { - - // DES算法要求有一个可信任的随机数源 - - SecureRandom sr = new SecureRandom(); - - // 从原始密匙数据创建DESKeySpec对象 - - DESKeySpec dks = new DESKeySpec(key); - - // 创建一个密匙工厂,然后用它把DESKeySpec转换成 - - // 一个SecretKey对象 - - SecretKeyFactory keyFactory = SecretKeyFactory.getInstance(DES); - - SecretKey securekey = keyFactory.generateSecret(dks); - - // Cipher对象实际完成加密操作 - - Cipher cipher = Cipher.getInstance(DES); - - // 用密匙初始化Cipher对象 - - cipher.init(Cipher.ENCRYPT_MODE, securekey, sr); - - // 现在,获取数据并加密 - - // 正式执行加密操作 - - return cipher.doFinal(src); - - } - - /** - *   * 解密 - * - *   * - * - *   * @param src - * - *   * 密文(字节) - * - *   * @param key - * - *   * 密钥,长度必须是8的倍数 - * - *   * @return 明文(字节) - * - *   * @throws Exception - * - *    - */ - - public static byte[] decrypt(byte[] src, byte[] key) throws Exception { - - // DES算法要求有一个可信任的随机数源 - - SecureRandom sr = new SecureRandom(); - - // 从原始密匙数据创建一个DESKeySpec对象 - - DESKeySpec dks = new DESKeySpec(key); - - // 创建一个密匙工厂,然后用它把DESKeySpec对象转换成 - - // 一个SecretKey对象 - - SecretKeyFactory keyFactory = SecretKeyFactory.getInstance(DES); - - SecretKey securekey = keyFactory.generateSecret(dks); - - // Cipher对象实际完成解密操作 - - Cipher cipher = Cipher.getInstance(DES); - - // 用密匙初始化Cipher对象 - - cipher.init(Cipher.DECRYPT_MODE, securekey, sr); - - // 现在,获取数据并解密 - - // 正式执行解密操作 - - return cipher.doFinal(src); - - } - - /** - *   * 加密 - * - *   * - * - *   * @param src - * - *   * 明文(字节) - * - *   * @return 密文(字节) - * - *   * @throws Exception - * - *    - */ - - public static byte[] encrypt(byte[] src) throws Exception { - - return encrypt(src, KEY.getBytes()); - - } - - /** - *   * 解密 - * - *   * - * - *   * @param src - * - *   * 密文(字节) - * - *   * @return 明文(字节) - * - *   * @throws Exception - * - *    - */ - - public static byte[] decrypt(byte[] src) throws Exception { - - return decrypt(src, KEY.getBytes()); - - } - - /** - *   * 加密 - * - *   * - * - *   * @param src - * - *   * 明文(字符串) - * - *   * @return 密文(16进制字符串) - * - *   * @throws Exception - * - *    - */ - - public final static String encrypt(String src) { - - try { - - return byte2hex(encrypt(src.getBytes(), KEY.getBytes())); - - } catch (Exception e) { - - e.printStackTrace(); - - } - - return null; - - } - - /** - *   * 解密 - * - *   * - * - *   * @param src - * - *   * 密文(字符串) - * - *   * @return 明文(字符串) - * - *   * @throws Exception - * - *    - */ - - public final static String decrypt(String src) { - try { - - return new String(decrypt(hex2byte(src.getBytes()), KEY.getBytes())); - - } catch (Exception e) { - - e.printStackTrace(); - - } - - return null; - - } - - /** - *   * 加密 - * - *   * - * - *   * @param src - * - *   * 明文(字节) - * - *   * @return 密文(16进制字符串) - * - *   * @throws Exception - * - *    - */ - - public static String encryptToString(byte[] src) throws Exception { - - return encrypt(new String(src)); - - } - - /** - *   * 解密 - * - *   * - * - *   * @param src - * - *   * 密文(字节) - * - *   * @return 明文(字符串) - * - *   * @throws Exception - * - *    - */ - - public static String decryptToString(byte[] src) throws Exception { - - return decrypt(new String(src)); - - } - - public static String byte2hex(byte[] b) { - - String hs = ""; - - String stmp = ""; - - for (int n = 0; n < b.length; n++) { - - stmp = (Integer.toHexString(b[n] & 0XFF)); - - if (stmp.length() == 1) - - hs = hs + "0" + stmp; - - else - - hs = hs + stmp; - - } - - return hs.toUpperCase(); - - } - - public static byte[] hex2byte(byte[] b) { - - if ((b.length % 2) != 0) - - throw new IllegalArgumentException("长度不是偶数"); - - byte[] b2 = new byte[b.length / 2]; - - for (int n = 0; n < b.length; n += 2) { - - String item = new String(b, n, 2); - - b2[n / 2] = (byte) Integer.parseInt(item, 16); - - } - return b2; - - } - - /* - * public static void main(String[] args) { try { String src = "cheetah"; - * String crypto = DESCipher.encrypt(src); System.out.println("密文[" + src + - * "]:" + crypto); System.out.println("解密后:" + DESCipher.decrypt(crypto)); } - * catch (Exception e) { e.printStackTrace(); } } - */ -} diff --git a/odpswriter/src/main/java/com/alibaba/datax/plugin/writer/odpswriter/util/IdAndKeyUtil.java b/odpswriter/src/main/java/com/alibaba/datax/plugin/writer/odpswriter/util/IdAndKeyUtil.java deleted file mode 100755 index 95e4b56b..00000000 --- a/odpswriter/src/main/java/com/alibaba/datax/plugin/writer/odpswriter/util/IdAndKeyUtil.java +++ /dev/null @@ -1,85 +0,0 @@ -/** - * (C) 2010-2014 Alibaba Group Holding Limited. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package com.alibaba.datax.plugin.writer.odpswriter.util; - -import com.alibaba.datax.common.exception.DataXException; -import com.alibaba.datax.common.util.Configuration; -import com.alibaba.datax.plugin.writer.odpswriter.Constant; -import com.alibaba.datax.plugin.writer.odpswriter.Key; -import com.alibaba.datax.plugin.writer.odpswriter.OdpsWriterErrorCode; -import org.apache.commons.lang3.StringUtils; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import java.util.Map; - -public class IdAndKeyUtil { - private static Logger LOG = LoggerFactory.getLogger(IdAndKeyUtil.class); - - public static Configuration parseAccessIdAndKey(Configuration originalConfig) { - String accessId = originalConfig.getString(Key.ACCESS_ID); - String accessKey = originalConfig.getString(Key.ACCESS_KEY); - - // 只要 accessId,accessKey 二者配置了一个,就理解为是用户本意是要直接手动配置其 accessid/accessKey - if (StringUtils.isNotBlank(accessId) || StringUtils.isNotBlank(accessKey)) { - LOG.info("Try to get accessId/accessKey from your config."); - //通过如下语句,进行检查是否确实配置了 - accessId = originalConfig.getNecessaryValue(Key.ACCESS_ID, OdpsWriterErrorCode.REQUIRED_VALUE); - accessKey = originalConfig.getNecessaryValue(Key.ACCESS_KEY, OdpsWriterErrorCode.REQUIRED_VALUE); - //检查完毕,返回即可 - return originalConfig; - } else { - Map envProp = System.getenv(); - return getAccessIdAndKeyFromEnv(originalConfig, envProp); - } - } - - private static Configuration getAccessIdAndKeyFromEnv(Configuration originalConfig, - Map envProp) { - String accessId = null; - String accessKey = null; - - String skynetAccessID = envProp.get(Constant.SKYNET_ACCESSID); - String skynetAccessKey = envProp.get(Constant.SKYNET_ACCESSKEY); - - if (StringUtils.isNotBlank(skynetAccessID) - || StringUtils.isNotBlank(skynetAccessKey)) { - /** - * 环境变量中,如果存在SKYNET_ACCESSID/SKYNET_ACCESSKEy(只要有其中一个变量,则认为一定是两个都存在的!), - * 则使用其值作为odps的accessId/accessKey(会解密) - */ - - LOG.info("Try to get accessId/accessKey from environment."); - accessId = skynetAccessID; - accessKey = DESCipher.decrypt(skynetAccessKey); - if (StringUtils.isNotBlank(accessKey)) { - originalConfig.set(Key.ACCESS_ID, accessId); - originalConfig.set(Key.ACCESS_KEY, accessKey); - LOG.info("Get accessId/accessKey from environment variables successfully."); - } else { - throw DataXException.asDataXException(OdpsWriterErrorCode.GET_ID_KEY_FAIL, - String.format("从环境变量中获取accessId/accessKey 失败, accessId=[%s]", accessId)); - } - } else { - // 无处获取(既没有配置在作业中,也没用在环境变量中) - throw DataXException.asDataXException(OdpsWriterErrorCode.GET_ID_KEY_FAIL, - "无法获取到accessId/accessKey. 它们既不存在于您的配置中,也不存在于环境变量中."); - } - - return originalConfig; - } -} diff --git a/odpswriter/src/main/java/com/alibaba/datax/plugin/writer/odpswriter/util/LocalStrings.properties b/odpswriter/src/main/java/com/alibaba/datax/plugin/writer/odpswriter/util/LocalStrings.properties new file mode 100644 index 00000000..289c70fa --- /dev/null +++ b/odpswriter/src/main/java/com/alibaba/datax/plugin/writer/odpswriter/util/LocalStrings.properties @@ -0,0 +1,39 @@ +descipher.1=\u957f\u5ea6\u4e0d\u662f\u5076\u6570 + +idandkeyutil.1=\u4ece\u73af\u5883\u53d8\u91cf\u4e2d\u83b7\u53d6accessId/accessKey \u5931\u8d25, accessId=[{0}] +idandkeyutil.2=\u65e0\u6cd5\u83b7\u53d6\u5230accessId/accessKey. \u5b83\u4eec\u65e2\u4e0d\u5b58\u5728\u4e8e\u60a8\u7684\u914d\u7f6e\u4e2d\uff0c\u4e5f\u4e0d\u5b58\u5728\u4e8e\u73af\u5883\u53d8\u91cf\u4e2d. + +odpsutil.1=\u60a8\u672a\u914d\u7f6e\u5199\u5165 ODPS \u76ee\u7684\u8868\u7684\u5217\u4fe1\u606f. \u6b63\u786e\u7684\u914d\u7f6e\u65b9\u5f0f\u662f\u7ed9datax\u7684 column \u9879\u914d\u7f6e\u4e0a\u60a8\u9700\u8981\u8bfb\u53d6\u7684\u5217\u540d\u79f0,\u7528\u82f1\u6587\u9017\u53f7\u5206\u9694 \u4f8b\u5982: \"column\": [\"id\",\"name\"]. +odpsutil.2=[truncate]\u662f\u5fc5\u586b\u914d\u7f6e\u9879, \u610f\u601d\u662f\u5199\u5165 ODPS \u76ee\u7684\u8868\u524d\u662f\u5426\u6e05\u7a7a\u8868/\u5206\u533a. \u8bf7\u60a8\u589e\u52a0 truncate \u7684\u914d\u7f6e\uff0c\u6839\u636e\u4e1a\u52a1\u9700\u8981\u9009\u62e9\u4e0atrue \u6216\u8005 false. +odpsutil.3=\u60a8\u6240\u914d\u7f6e\u7684maxRetryTime \u503c\u9519\u8bef. \u8be5\u503c\u4e0d\u80fd\u5c0f\u4e8e1, \u4e14\u4e0d\u80fd\u5927\u4e8e {0}. \u63a8\u8350\u7684\u914d\u7f6e\u65b9\u5f0f\u662f\u7ed9maxRetryTime \u914d\u7f6e1-11\u4e4b\u95f4\u7684\u67d0\u4e2a\u503c. \u8bf7\u60a8\u68c0\u67e5\u914d\u7f6e\u5e76\u505a\u51fa\u76f8\u5e94\u4fee\u6539. +odpsutil.4=\u4e0d\u652f\u6301\u7684\u8d26\u53f7\u7c7b\u578b:[{0}]. \u8d26\u53f7\u7c7b\u578b\u76ee\u524d\u4ec5\u652f\u6301aliyun, taobao. +odpsutil.5=\u83b7\u53d6 ODPS \u76ee\u7684\u8868:{0} \u7684\u6240\u6709\u5206\u533a\u5931\u8d25. \u8bf7\u8054\u7cfb ODPS \u7ba1\u7406\u5458\u5904\u7406. +odpsutil.6=\u68c0\u67e5 ODPS \u76ee\u7684\u8868:{0} \u662f\u5426\u4e3a\u5206\u533a\u8868\u5931\u8d25, \u8bf7\u8054\u7cfb ODPS \u7ba1\u7406\u5458\u5904\u7406. +odpsutil.7=\u6e05\u7a7a ODPS \u76ee\u7684\u8868:{0} \u5931\u8d25, \u8bf7\u8054\u7cfb ODPS \u7ba1\u7406\u5458\u5904\u7406. +odpsutil.8=\u6dfb\u52a0 ODPS \u76ee\u7684\u8868\u7684\u5206\u533a\u5931\u8d25. \u9519\u8bef\u53d1\u751f\u5728\u6dfb\u52a0 ODPS \u7684\u9879\u76ee:{0} \u7684\u8868:{1} \u7684\u5206\u533a:{2}. \u8bf7\u8054\u7cfb ODPS \u7ba1\u7406\u5458\u5904\u7406. +odpsutil.9=\u521b\u5efaTunnelUpload\u5931\u8d25. \u8bf7\u8054\u7cfb ODPS \u7ba1\u7406\u5458\u5904\u7406. +odpsutil.10=\u521b\u5efaTunnelUpload\u5931\u8d25. \u8bf7\u8054\u7cfb ODPS \u7ba1\u7406\u5458\u5904\u7406. +odpsutil.11=\u83b7\u53d6TunnelUpload\u5931\u8d25. \u8bf7\u8054\u7cfb ODPS \u7ba1\u7406\u5458\u5904\u7406. +odpsutil.12=\u83b7\u53d6TunnelUpload\u5931\u8d25. \u8bf7\u8054\u7cfb ODPS \u7ba1\u7406\u5458\u5904\u7406. +odpsutil.13=Drop ODPS \u76ee\u7684\u8868\u5206\u533a\u5931\u8d25. \u9519\u8bef\u53d1\u751f\u5728\u9879\u76ee:{0} \u7684\u8868:{1} \u7684\u5206\u533a:{2} .\u8bf7\u8054\u7cfb ODPS \u7ba1\u7406\u5458\u5904\u7406. +odpsutil.14=ODPS \u76ee\u7684\u8868\u81ea\u8eab\u7684 partition:{0} \u683c\u5f0f\u4e0d\u5bf9. \u6b63\u786e\u7684\u683c\u5f0f\u5f62\u5982: pt=1,ds=hangzhou +odpsutil.15=ODPS \u76ee\u7684\u8868\u5728\u8fd0\u884c ODPS SQL\u5931\u8d25, \u8fd4\u56de\u503c\u4e3a:{0}. \u8bf7\u8054\u7cfb ODPS \u7ba1\u7406\u5458\u5904\u7406. SQL \u5185\u5bb9\u4e3a:[\n{1}\n]. +odpsutil.16=ODPS \u76ee\u7684\u8868\u5728\u8fd0\u884c ODPS SQL \u65f6\u629b\u51fa\u5f02\u5e38, \u8bf7\u8054\u7cfb ODPS \u7ba1\u7406\u5458\u5904\u7406. SQL \u5185\u5bb9\u4e3a:[\n{0}\n]. +odpsutil.17=ODPS \u76ee\u7684\u8868\u5728\u63d0\u4ea4 block:[\n{0}\n] \u65f6\u5931\u8d25, uploadId=[{1}]. \u8bf7\u8054\u7cfb ODPS \u7ba1\u7406\u5458\u5904\u7406. +odpsutil.18=ODPS \u76ee\u7684\u8868\u5199 block:{0} \u5931\u8d25\uff0c uploadId=[{1}]. \u8bf7\u8054\u7cfb ODPS \u7ba1\u7406\u5458\u5904\u7406. +odpsutil.19=ODPS \u76ee\u7684\u8868\u7684\u5217\u914d\u7f6e\u9519\u8bef. \u7531\u4e8e\u60a8\u6240\u914d\u7f6e\u7684\u5217:{0} \u4e0d\u5b58\u5728\uff0c\u4f1a\u5bfc\u81f4datax\u65e0\u6cd5\u6b63\u5e38\u63d2\u5165\u6570\u636e\uff0c\u8bf7\u68c0\u67e5\u8be5\u5217\u662f\u5426\u5b58\u5728\uff0c\u5982\u679c\u5b58\u5728\u8bf7\u68c0\u67e5\u5927\u5c0f\u5199\u7b49\u914d\u7f6e. +odpsutil.20=DataX \u5199\u5165 ODPS \u8868\u4e0d\u652f\u6301\u8be5\u5b57\u6bb5\u7c7b\u578b:[{0}]. \u76ee\u524d\u652f\u6301\u62bd\u53d6\u7684\u5b57\u6bb5\u7c7b\u578b\u6709\uff1abigint, boolean, datetime, double, string. \u60a8\u53ef\u4ee5\u9009\u62e9\u4e0d\u62bd\u53d6 DataX \u4e0d\u652f\u6301\u7684\u5b57\u6bb5\u6216\u8005\u8054\u7cfb ODPS \u7ba1\u7406\u5458\u5bfb\u6c42\u5e2e\u52a9. +odpsutil.21=\u60a8\u6ca1\u6709\u914d\u7f6e\u5206\u533a\u4fe1\u606f\uff0c\u56e0\u4e3a\u4f60\u914d\u7f6e\u7684\u8868\u662f\u5206\u533a\u8868:{0} \u5982\u679c\u9700\u8981\u8fdb\u884c truncate \u64cd\u4f5c\uff0c\u5fc5\u987b\u6307\u5b9a\u9700\u8981\u6e05\u7a7a\u7684\u5177\u4f53\u5206\u533a. \u8bf7\u4fee\u6539\u5206\u533a\u914d\u7f6e\uff0c\u683c\u5f0f\u5f62\u5982 pt=$'{bizdate'} . +odpsutil.22=\u5206\u533a\u4fe1\u606f\u914d\u7f6e\u9519\u8bef\uff0c\u4f60\u7684ODPS\u8868\u662f\u975e\u5206\u533a\u8868:{0} \u8fdb\u884c truncate \u64cd\u4f5c\u65f6\u4e0d\u9700\u8981\u6307\u5b9a\u5177\u4f53\u5206\u533a\u503c. \u8bf7\u68c0\u67e5\u60a8\u7684\u5206\u533a\u914d\u7f6e\uff0c\u5220\u9664\u8be5\u914d\u7f6e\u9879\u7684\u503c. +odpsutil.23=\u60a8\u7684\u76ee\u7684\u8868\u662f\u5206\u533a\u8868\uff0c\u5199\u5165\u5206\u533a\u8868:{0} \u65f6\u5fc5\u987b\u6307\u5b9a\u5177\u4f53\u5206\u533a\u503c. \u8bf7\u4fee\u6539\u60a8\u7684\u5206\u533a\u914d\u7f6e\u4fe1\u606f\uff0c\u683c\u5f0f\u5f62\u5982 \u683c\u5f0f\u5f62\u5982 pt=$'{bizdate'}. +odpsutil.24=\u60a8\u7684\u76ee\u7684\u8868\u662f\u975e\u5206\u533a\u8868\uff0c\u5199\u5165\u975e\u5206\u533a\u8868:{0} \u65f6\u4e0d\u9700\u8981\u6307\u5b9a\u5177\u4f53\u5206\u533a\u503c. \u8bf7\u5220\u9664\u5206\u533a\u914d\u7f6e\u4fe1\u606f +odpsutil.25=\u60a8\u6ca1\u6709\u914d\u7f6e\u5206\u533a\u4fe1\u606f\uff0c\u56e0\u4e3a\u4f60\u914d\u7f6e\u7684\u8868\u662f\u5206\u533a\u8868:{0} \u5982\u679c\u9700\u8981\u8fdb\u884c truncate \u64cd\u4f5c\uff0c\u5fc5\u987b\u6307\u5b9a\u9700\u8981\u6e05\u7a7a\u7684\u5177\u4f53\u5206\u533a. \u8bf7\u4fee\u6539\u5206\u533a\u914d\u7f6e\uff0c\u683c\u5f0f\u5f62\u5982 pt=$'{bizdate'} . +odpsutil.26=\u5206\u533a\u4fe1\u606f\u914d\u7f6e\u9519\u8bef\uff0c\u4f60\u7684ODPS\u8868\u662f\u975e\u5206\u533a\u8868:{0} \u8fdb\u884c truncate \u64cd\u4f5c\u65f6\u4e0d\u9700\u8981\u6307\u5b9a\u5177\u4f53\u5206\u533a\u503c. \u8bf7\u68c0\u67e5\u60a8\u7684\u5206\u533a\u914d\u7f6e\uff0c\u5220\u9664\u8be5\u914d\u7f6e\u9879\u7684\u503c. +odpsutil.27=\u60a8\u7684\u76ee\u7684\u8868\u662f\u5206\u533a\u8868\uff0c\u5199\u5165\u5206\u533a\u8868:{0} \u65f6\u5fc5\u987b\u6307\u5b9a\u5177\u4f53\u5206\u533a\u503c. \u8bf7\u4fee\u6539\u60a8\u7684\u5206\u533a\u914d\u7f6e\u4fe1\u606f\uff0c\u683c\u5f0f\u5f62\u5982 \u683c\u5f0f\u5f62\u5982 pt=$'{bizdate'}. +odpsutil.28=\u60a8\u7684\u76ee\u7684\u8868\u662f\u975e\u5206\u533a\u8868\uff0c\u5199\u5165\u975e\u5206\u533a\u8868:{0} \u65f6\u4e0d\u9700\u8981\u6307\u5b9a\u5177\u4f53\u5206\u533a\u503c. \u8bf7\u5220\u9664\u5206\u533a\u914d\u7f6e\u4fe1\u606f +odpsutil.29=\u52a0\u8f7d ODPS \u76ee\u7684\u8868:{0} \u5931\u8d25. \u8bf7\u68c0\u67e5\u60a8\u914d\u7f6e\u7684 ODPS \u76ee\u7684\u8868\u7684 [project] \u662f\u5426\u6b63\u786e. +odpsutil.30=\u52a0\u8f7d ODPS \u76ee\u7684\u8868:{0} \u5931\u8d25. \u8bf7\u68c0\u67e5\u60a8\u914d\u7f6e\u7684 ODPS \u76ee\u7684\u8868\u7684 [table] \u662f\u5426\u6b63\u786e. +odpsutil.31=\u52a0\u8f7d ODPS \u76ee\u7684\u8868:{0} \u5931\u8d25. \u8bf7\u68c0\u67e5\u60a8\u914d\u7f6e\u7684 ODPS \u76ee\u7684\u8868\u7684 [accessId] [accessKey]\u662f\u5426\u6b63\u786e. +odpsutil.32=\u52a0\u8f7d ODPS \u76ee\u7684\u8868:{0} \u5931\u8d25. \u8bf7\u68c0\u67e5\u60a8\u914d\u7f6e\u7684 ODPS \u76ee\u7684\u8868\u7684 [accessKey] \u662f\u5426\u6b63\u786e. +odpsutil.33=\u52a0\u8f7d ODPS \u76ee\u7684\u8868:{0} \u5931\u8d25. \u8bf7\u68c0\u67e5\u60a8\u914d\u7f6e\u7684 ODPS \u76ee\u7684\u8868\u7684 [accessId] [accessKey] [project]\u662f\u5426\u5339\u914d. +odpsutil.34=\u52a0\u8f7d ODPS \u76ee\u7684\u8868:{0} \u5931\u8d25. \u8bf7\u68c0\u67e5\u60a8\u914d\u7f6e\u7684 ODPS \u76ee\u7684\u8868\u7684 project,table,accessId,accessKey,odpsServer\u7b49\u503c. \ No newline at end of file diff --git a/odpswriter/src/main/java/com/alibaba/datax/plugin/writer/odpswriter/util/OdpsExceptionMsg.java b/odpswriter/src/main/java/com/alibaba/datax/plugin/writer/odpswriter/util/OdpsExceptionMsg.java index d613eefd..ae6f275c 100644 --- a/odpswriter/src/main/java/com/alibaba/datax/plugin/writer/odpswriter/util/OdpsExceptionMsg.java +++ b/odpswriter/src/main/java/com/alibaba/datax/plugin/writer/odpswriter/util/OdpsExceptionMsg.java @@ -1,8 +1,5 @@ package com.alibaba.datax.plugin.writer.odpswriter.util; -/** - * Created by hongjiao.hj on 2015/6/9. - */ public class OdpsExceptionMsg { public static final String ODPS_PROJECT_NOT_FOUNT = "ODPS-0420111: Project not found"; diff --git a/odpswriter/src/main/java/com/alibaba/datax/plugin/writer/odpswriter/util/OdpsUtil.java b/odpswriter/src/main/java/com/alibaba/datax/plugin/writer/odpswriter/util/OdpsUtil.java index 2a401b69..a3a372af 100755 --- a/odpswriter/src/main/java/com/alibaba/datax/plugin/writer/odpswriter/util/OdpsUtil.java +++ b/odpswriter/src/main/java/com/alibaba/datax/plugin/writer/odpswriter/util/OdpsUtil.java @@ -1,29 +1,35 @@ package com.alibaba.datax.plugin.writer.odpswriter.util; +import com.alibaba.datax.common.element.*; import com.alibaba.datax.common.exception.DataXException; import com.alibaba.datax.common.util.Configuration; +import com.alibaba.datax.common.util.MessageSource; import com.alibaba.datax.common.util.RetryUtil; -import com.alibaba.datax.plugin.writer.odpswriter.Constant; -import com.alibaba.datax.plugin.writer.odpswriter.Key; - -import com.alibaba.datax.plugin.writer.odpswriter.OdpsWriterErrorCode; +import com.alibaba.datax.plugin.writer.odpswriter.*; import com.aliyun.odps.*; +import com.aliyun.odps.Column; import com.aliyun.odps.account.Account; import com.aliyun.odps.account.AliyunAccount; +import com.aliyun.odps.data.ResultSet; +import com.aliyun.odps.data.Binary; import com.aliyun.odps.task.SQLTask; import com.aliyun.odps.tunnel.TableTunnel; - import com.aliyun.odps.tunnel.io.ProtobufRecordPack; import com.aliyun.odps.tunnel.io.TunnelRecordWriter; +import com.aliyun.odps.type.TypeInfo; + import org.apache.commons.lang3.StringUtils; +import org.apache.commons.lang3.time.DateFormatUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import java.text.SimpleDateFormat; import java.util.*; import java.util.concurrent.Callable; public class OdpsUtil { private static final Logger LOG = LoggerFactory.getLogger(OdpsUtil.class); + private static final MessageSource MESSAGE_SOURCE = MessageSource.loadResourceBundle(OdpsUtil.class); public static int MAX_RETRY_TIME = 10; @@ -38,15 +44,14 @@ public class OdpsUtil { if (null == originalConfig.getList(Key.COLUMN) || originalConfig.getList(Key.COLUMN, String.class).isEmpty()) { - throw DataXException.asDataXException(OdpsWriterErrorCode.REQUIRED_VALUE, "您未配置写入 ODPS 目的表的列信息. " + - "正确的配置方式是给datax的 column 项配置上您需要读取的列名称,用英文逗号分隔 例如: \"column\": [\"id\",\"name\"]."); + throw DataXException.asDataXException(OdpsWriterErrorCode.REQUIRED_VALUE, MESSAGE_SOURCE.message("odpsutil.1")); } // getBool 内部要求,值只能为 true,false 的字符串(大小写不敏感),其他一律报错,不再有默认配置 + // 如果是动态分区写入,不进行truncate Boolean truncate = originalConfig.getBool(Key.TRUNCATE); if (null == truncate) { - throw DataXException.asDataXException(OdpsWriterErrorCode.REQUIRED_VALUE, "[truncate]是必填配置项, 意思是写入 ODPS 目的表前是否清空表/分区. " + - "请您增加 truncate 的配置,根据业务需要选择上true 或者 false."); + throw DataXException.asDataXException(OdpsWriterErrorCode.REQUIRED_VALUE, MESSAGE_SOURCE.message("odpsutil.2")); } } @@ -54,36 +59,38 @@ public class OdpsUtil { int maxRetryTime = originalConfig.getInt(Key.MAX_RETRY_TIME, OdpsUtil.MAX_RETRY_TIME); if (maxRetryTime < 1 || maxRetryTime > OdpsUtil.MAX_RETRY_TIME) { - throw DataXException.asDataXException(OdpsWriterErrorCode.ILLEGAL_VALUE, "您所配置的maxRetryTime 值错误. 该值不能小于1, 且不能大于 " + OdpsUtil.MAX_RETRY_TIME + - ". 推荐的配置方式是给maxRetryTime 配置1-11之间的某个值. 请您检查配置并做出相应修改."); + throw DataXException.asDataXException(OdpsWriterErrorCode.ILLEGAL_VALUE, MESSAGE_SOURCE.message("odpsutil.3", OdpsUtil.MAX_RETRY_TIME)); } MAX_RETRY_TIME = maxRetryTime; } - public static String formatPartition(String partitionString) { + public static String formatPartition(String partitionString, Boolean printLog) { if (null == partitionString) { return null; } - - return partitionString.trim().replaceAll(" *= *", "=").replaceAll(" */ *", ",") + String parsedPartition = partitionString.trim().replaceAll(" *= *", "=").replaceAll(" */ *", ",") .replaceAll(" *, *", ",").replaceAll("'", ""); + if (printLog) { + LOG.info("format partition with rules: remove all space; remove all '; replace / to ,"); + LOG.info("original partiton {} parsed partition {}", partitionString, parsedPartition); + } + return parsedPartition; } public static Odps initOdpsProject(Configuration originalConfig) { - String accountType = originalConfig.getString(Key.ACCOUNT_TYPE); String accessId = originalConfig.getString(Key.ACCESS_ID); String accessKey = originalConfig.getString(Key.ACCESS_KEY); String odpsServer = originalConfig.getString(Key.ODPS_SERVER); String project = originalConfig.getString(Key.PROJECT); + String securityToken = originalConfig.getString(Key.SECURITY_TOKEN); Account account; - if (accountType.equalsIgnoreCase(Constant.DEFAULT_ACCOUNT_TYPE)) { - account = new AliyunAccount(accessId, accessKey); + if (StringUtils.isNotBlank(securityToken)) { + account = new com.aliyun.odps.account.StsAccount(accessId, accessKey, securityToken); } else { - throw DataXException.asDataXException(OdpsWriterErrorCode.ACCOUNT_TYPE_ERROR, - String.format("不支持的账号类型:[%s]. 账号类型目前仅支持aliyun, taobao.", accountType)); + account = new AliyunAccount(accessId, accessKey); } Odps odps = new Odps(account); @@ -95,6 +102,7 @@ public class OdpsUtil { } odps.setDefaultProject(project); odps.setEndpoint(odpsServer); + odps.setUserAgent("DATAX"); return odps; } @@ -124,8 +132,7 @@ public class OdpsUtil { parts.add(partition.getPartitionSpec().toString()); } } catch (Exception e) { - throw DataXException.asDataXException(OdpsWriterErrorCode.GET_PARTITION_FAIL, String.format("获取 ODPS 目的表:%s 的所有分区失败. 请联系 ODPS 管理员处理.", - table.getName()), e); + throw DataXException.asDataXException(OdpsWriterErrorCode.GET_PARTITION_FAIL, MESSAGE_SOURCE.message("odpsutil.5", table.getName()), e); } return parts; } @@ -140,37 +147,45 @@ public class OdpsUtil { } } catch (Exception e) { throw DataXException.asDataXException(OdpsWriterErrorCode.CHECK_IF_PARTITIONED_TABLE_FAILED, - String.format("检查 ODPS 目的表:%s 是否为分区表失败, 请联系 ODPS 管理员处理.", table.getName()), e); + MESSAGE_SOURCE.message("odpsutil.6", table.getName()), e); } return false; } public static void truncateNonPartitionedTable(Odps odps, Table tab) { - String truncateNonPartitionedTableSql = "truncate table " + tab.getName() + ";"; + truncateNonPartitionedTable(odps, tab.getName()); + } + + public static void truncateNonPartitionedTable(Odps odps, String tableName) { + String truncateNonPartitionedTableSql = "truncate table " + tableName + ";"; try { - runSqlTaskWithRetry(odps, truncateNonPartitionedTableSql, MAX_RETRY_TIME, 1000, true); + LOG.info("truncate non partitioned table with sql: {}", truncateNonPartitionedTableSql); + runSqlTaskWithRetry(odps, truncateNonPartitionedTableSql, MAX_RETRY_TIME, 1000, true, "truncate", null); } catch (Exception e) { throw DataXException.asDataXException(OdpsWriterErrorCode.TABLE_TRUNCATE_ERROR, - String.format(" 清空 ODPS 目的表:%s 失败, 请联系 ODPS 管理员处理.", tab.getName()), e); + MESSAGE_SOURCE.message("odpsutil.7", tableName), e); } } public static void truncatePartition(Odps odps, Table table, String partition) { if (isPartitionExist(table, partition)) { + LOG.info("partition {} is already exist, truncate it to clean old data", partition); dropPart(odps, table, partition); } + LOG.info("begin to add partition {}", partition); addPart(odps, table, partition); } private static boolean isPartitionExist(Table table, String partition) { // check if exist partition 返回值不为 null List odpsParts = OdpsUtil.listOdpsPartitions(table); - int j = 0; for (; j < odpsParts.size(); j++) { if (odpsParts.get(j).replaceAll("'", "").equals(partition)) { + LOG.info("found a partiton {} equals to (ignore ' if contains) configured partiton {}", + odpsParts.get(j), partition); break; } } @@ -185,11 +200,14 @@ public class OdpsUtil { addPart.append("alter table ").append(table.getName()).append(" add IF NOT EXISTS partition(") .append(partSpec).append(");"); try { - runSqlTaskWithRetry(odps, addPart.toString(), MAX_RETRY_TIME, 1000, true); + Map hints = new HashMap(); + //开启ODPS SQL TYPE2.0类型 + hints.put("odps.sql.type.system.odps2", "true"); + LOG.info("add partition with sql: {}", addPart.toString()); + runSqlTaskWithRetry(odps, addPart.toString(), MAX_RETRY_TIME, 1000, true, "addPart", hints); } catch (Exception e) { throw DataXException.asDataXException(OdpsWriterErrorCode.ADD_PARTITION_FAILED, - String.format("添加 ODPS 目的表的分区失败. 错误发生在添加 ODPS 的项目:%s 的表:%s 的分区:%s. 请联系 ODPS 管理员处理.", - table.getProject(), table.getName(), partition), e); + MESSAGE_SOURCE.message("odpsutil.8", table.getProject(), table.getName(), partition), e); } } @@ -206,7 +224,7 @@ public class OdpsUtil { }, MAX_RETRY_TIME, 1000L, true); } catch (Exception e) { throw DataXException.asDataXException(OdpsWriterErrorCode.CREATE_MASTER_UPLOAD_FAIL, - "创建TunnelUpload失败. 请联系 ODPS 管理员处理.", e); + MESSAGE_SOURCE.message("odpsutil.9"), e); } } else { final PartitionSpec partitionSpec = new PartitionSpec(partition); @@ -219,7 +237,7 @@ public class OdpsUtil { }, MAX_RETRY_TIME, 1000L, true); } catch (Exception e) { throw DataXException.asDataXException(OdpsWriterErrorCode.CREATE_MASTER_UPLOAD_FAIL, - "创建TunnelUpload失败. 请联系 ODPS 管理员处理.", e); + MESSAGE_SOURCE.message("odpsutil.10"), e); } } } @@ -238,7 +256,7 @@ public class OdpsUtil { } catch (Exception e) { throw DataXException.asDataXException(OdpsWriterErrorCode.GET_SLAVE_UPLOAD_FAIL, - "获取TunnelUpload失败. 请联系 ODPS 管理员处理.", e); + MESSAGE_SOURCE.message("odpsutil.11"), e); } } else { final PartitionSpec partitionSpec = new PartitionSpec(partition); @@ -252,7 +270,7 @@ public class OdpsUtil { } catch (Exception e) { throw DataXException.asDataXException(OdpsWriterErrorCode.GET_SLAVE_UPLOAD_FAIL, - "获取TunnelUpload失败. 请联系 ODPS 管理员处理.", e); + MESSAGE_SOURCE.message("odpsutil.12"), e); } } } @@ -265,11 +283,14 @@ public class OdpsUtil { .append(" drop IF EXISTS partition(").append(partSpec) .append(");"); try { - runSqlTaskWithRetry(odps, dropPart.toString(), MAX_RETRY_TIME, 1000, true); + Map hints = new HashMap(); + //开启ODPS SQL TYPE2.0类型 + hints.put("odps.sql.type.system.odps2", "true"); + LOG.info("drop partition with sql: {}", dropPart.toString()); + runSqlTaskWithRetry(odps, dropPart.toString(), MAX_RETRY_TIME, 1000, true, "truncate", hints); } catch (Exception e) { throw DataXException.asDataXException(OdpsWriterErrorCode.ADD_PARTITION_FAILED, - String.format("Drop ODPS 目的表分区失败. 错误发生在项目:%s 的表:%s 的分区:%s .请联系 ODPS 管理员处理.", - table.getProject(), table.getName(), partition), e); + MESSAGE_SOURCE.message("odpsutil.13", table.getProject(), table.getName(), partition), e); } } @@ -281,7 +302,7 @@ public class OdpsUtil { String[] kv = part.split("="); if (kv.length != 2) { throw DataXException.asDataXException(OdpsWriterErrorCode.ILLEGAL_VALUE, - String.format("ODPS 目的表自身的 partition:%s 格式不对. 正确的格式形如: pt=1,ds=hangzhou", partition)); + MESSAGE_SOURCE.message("odpsutil.14", partition)); } partSpec.append(kv[0]).append("="); partSpec.append("'").append(kv[1].replace("'", "")).append("'"); @@ -292,6 +313,38 @@ public class OdpsUtil { return partSpec.toString(); } + public static Instance runSqlTaskWithRetry(final Odps odps, final String sql, String tag) { + try { + long beginTime = System.currentTimeMillis(); + + Instance instance = runSqlTaskWithRetry(odps, sql, MAX_RETRY_TIME, 1000, true, tag, null); + + long endIime = System.currentTimeMillis(); + LOG.info(String.format("exectue odps sql: %s finished, cost time : %s ms", + sql, (endIime - beginTime))); + return instance; + } catch (Exception e) { + throw DataXException.asDataXException(OdpsWriterErrorCode.RUN_SQL_ODPS_EXCEPTION, + MESSAGE_SOURCE.message("odpsutil.16", sql), e); + } + } + + public static ResultSet getSqlTaskRecordsWithRetry(final Odps odps, final String sql, String tag) { + Instance instance = runSqlTaskWithRetry(odps, sql, tag); + if (instance == null) { + LOG.error("can not get odps instance from sql {}", sql); + throw DataXException.asDataXException(OdpsWriterErrorCode.RUN_SQL_ODPS_EXCEPTION, + MESSAGE_SOURCE.message("odpsutil.16", sql)); + } + try { + return SQLTask.getResultSet(instance, instance.getTaskNames().iterator().next()); + } catch (Exception e) { + throw DataXException.asDataXException(OdpsWriterErrorCode.RUN_SQL_ODPS_EXCEPTION, + MESSAGE_SOURCE.message("odpsutil.16", sql), e); + } + } + + /** * 该方法只有在 sql 为幂等的才可以使用,且odps抛出异常时候才会进行重试 * @@ -299,12 +352,12 @@ public class OdpsUtil { * @param query 执行sql * @throws Exception */ - public static void runSqlTaskWithRetry(final Odps odps, final String query, int retryTimes, - long sleepTimeInMilliSecond, boolean exponential) throws Exception { + public static Instance runSqlTaskWithRetry(final Odps odps, final String query, int retryTimes, + long sleepTimeInMilliSecond, boolean exponential, String tag, + Map hints) throws Exception { for(int i = 0; i < retryTimes; i++) { try { - runSqlTask(odps, query); - return; + return runSqlTask(odps, query, tag, hints); } catch (DataXException e) { if (OdpsWriterErrorCode.RUN_SQL_ODPS_EXCEPTION.equals(e.getErrorCode())) { LOG.debug("Exception when calling callable", e); @@ -337,37 +390,86 @@ public class OdpsUtil { throw e; } } + return null; } - public static void runSqlTask(Odps odps, String query) { + public static Instance runSqlTask(Odps odps, String query, String tag, Map hints) { if (StringUtils.isBlank(query)) { - return; + return null; } - String taskName = "datax_odpswriter_trunacte_" + UUID.randomUUID().toString().replace('-', '_'); - + String taskName = String.format("datax_odpswriter_%s_%s", tag, UUID.randomUUID().toString().replace('-', '_')); LOG.info("Try to start sqlTask:[{}] to run odps sql:[\n{}\n] .", taskName, query); //todo:biz_id set (目前ddl先不做) Instance instance; Instance.TaskStatus status; try { - instance = SQLTask.run(odps, odps.getDefaultProject(), query, taskName, null, null); + instance = SQLTask.run(odps, odps.getDefaultProject(), query, taskName, hints, null); instance.waitForSuccess(); status = instance.getTaskStatus().get(taskName); if (!Instance.TaskStatus.Status.SUCCESS.equals(status.getStatus())) { throw DataXException.asDataXException(OdpsWriterErrorCode.RUN_SQL_FAILED, - String.format("ODPS 目的表在运行 ODPS SQL失败, 返回值为:%s. 请联系 ODPS 管理员处理. SQL 内容为:[\n%s\n].", instance.getTaskResults().get(taskName), - query)); + MESSAGE_SOURCE.message("odpsutil.15", query)); } + return instance; } catch (DataXException e) { throw e; } catch (Exception e) { throw DataXException.asDataXException(OdpsWriterErrorCode.RUN_SQL_ODPS_EXCEPTION, - String.format("ODPS 目的表在运行 ODPS SQL 时抛出异常, 请联系 ODPS 管理员处理. SQL 内容为:[\n%s\n].", query), e); + MESSAGE_SOURCE.message("odpsutil.16", query), e); } } + + public static String generateTaskName(String tag) { + return String.format("datax_odpswriter_%s_%s", tag, UUID.randomUUID().toString().replace('-', '_')); + } + + public static void checkBlockComplete(final TableTunnel.UploadSession masterUpload, final Long[] blocks) { + Long[] serverBlocks; + try { + serverBlocks = + RetryUtil.executeWithRetry(new Callable() { + @Override + public Long[] call() throws Exception { + return masterUpload.getBlockList(); + } + }, MAX_RETRY_TIME, 1000L, true); + } catch (Exception e) { + throw DataXException.asDataXException(OdpsWriterErrorCode.COMMIT_BLOCK_FAIL, + MESSAGE_SOURCE.message("odpsutil.17", masterUpload.getId()), e); + } + + HashMap serverBlockMap = new HashMap(); + for (Long blockId : serverBlocks) { + serverBlockMap.put(blockId, true); + } + + for (Long blockId : blocks) { + if (!serverBlockMap.containsKey(blockId)) { + throw DataXException.asDataXException(OdpsWriterErrorCode.COMMIT_BLOCK_FAIL, + "BlockId[" + blockId + "] upload failed!"); + } + } + + } + + public static void masterComplete(final TableTunnel.UploadSession masterUpload) { + try { + RetryUtil.executeWithRetry(new Callable() { + @Override + public Void call() throws Exception { + masterUpload.commit(); + return null; + } + }, MAX_RETRY_TIME, 1000L, true); + } catch (Exception e) { + throw DataXException.asDataXException(OdpsWriterErrorCode.COMMIT_BLOCK_FAIL, + MESSAGE_SOURCE.message("odpsutil.17", masterUpload.getId()), e); + } + } + public static void masterCompleteBlocks(final TableTunnel.UploadSession masterUpload, final Long[] blocks) { try { RetryUtil.executeWithRetry(new Callable() { @@ -379,30 +481,28 @@ public class OdpsUtil { }, MAX_RETRY_TIME, 1000L, true); } catch (Exception e) { throw DataXException.asDataXException(OdpsWriterErrorCode.COMMIT_BLOCK_FAIL, - String.format("ODPS 目的表在提交 block:[\n%s\n] 时失败, uploadId=[%s]. 请联系 ODPS 管理员处理.", StringUtils.join(blocks, ","), masterUpload.getId()), e); + MESSAGE_SOURCE.message("odpsutil.17", StringUtils.join(blocks, ","), masterUpload.getId()), e); } } public static void slaveWriteOneBlock(final TableTunnel.UploadSession slaveUpload, final ProtobufRecordPack protobufRecordPack, - final long blockId, final boolean isCompress) { + final long blockId, final Long timeoutInMs) { try { RetryUtil.executeWithRetry(new Callable() { @Override public Void call() throws Exception { - TunnelRecordWriter tunnelRecordWriter = (TunnelRecordWriter)slaveUpload.openRecordWriter(blockId, isCompress); - tunnelRecordWriter.write(protobufRecordPack); - tunnelRecordWriter.close(); + slaveUpload.writeBlock(blockId, protobufRecordPack, timeoutInMs); return null; } }, MAX_RETRY_TIME, 1000L, true); } catch (Exception e) { throw DataXException.asDataXException(OdpsWriterErrorCode.WRITER_RECORD_FAIL, - String.format("ODPS 目的表写 block:%s 失败, uploadId=[%s]. 请联系 ODPS 管理员处理.", blockId, slaveUpload.getId()), e); + MESSAGE_SOURCE.message("odpsutil.18", blockId, slaveUpload.getId()), e); } } - public static List parsePosition(List allColumnList, + public static List parsePosition(List allColumnList, List allPartColumnList, List userConfiguredColumns) { List retList = new ArrayList(); @@ -416,9 +516,20 @@ public class OdpsUtil { break; } } + + if (null != allPartColumnList) { + for (int i = 0, len = allPartColumnList.size(); i < len; i++) { + if (allPartColumnList.get(i).equalsIgnoreCase(col)) { + retList.add(-1); + hasColumn = true; + break; + } + } + } + if (!hasColumn) { throw DataXException.asDataXException(OdpsWriterErrorCode.COLUMN_NOT_EXIST, - String.format("ODPS 目的表的列配置错误. 由于您所配置的列:%s 不存在,会导致datax无法正常插入数据,请检查该列是否存在,如果存在请检查大小写等配置.", col)); + MESSAGE_SOURCE.message("odpsutil.19", col)); } } return retList; @@ -436,22 +547,81 @@ public class OdpsUtil { for(Column column: columns) { allColumns.add(column.getName()); type = column.getType(); - if (type == OdpsType.ARRAY || type == OdpsType.MAP) { - throw DataXException.asDataXException(OdpsWriterErrorCode.UNSUPPORTED_COLUMN_TYPE, - String.format("DataX 写入 ODPS 表不支持该字段类型:[%s]. 目前支持抽取的字段类型有:bigint, boolean, datetime, double, string. " + - "您可以选择不抽取 DataX 不支持的字段或者联系 ODPS 管理员寻求帮助.", - type)); - } } return allColumns; } - public static List getTableOriginalColumnTypeList(TableSchema schema) { - List tableOriginalColumnTypeList = new ArrayList(); + public static List getAllPartColumns(TableSchema schema) { + if (null == schema) { + throw new IllegalArgumentException("parameter schema can not be null."); + } + + List allPartColumns = new ArrayList<>(); + + List partCols = schema.getPartitionColumns(); + + for (Column column : partCols) { + allPartColumns.add(column.getName()); + } + + return allPartColumns; + } + + public static String getPartColValFromDataXRecord(com.alibaba.datax.common.element.Record dataxRecord, + List positions, List userConfiguredColumns, + Map dateTransFormMap) { + StringBuilder partition = new StringBuilder(); + for (int i = 0, len = dataxRecord.getColumnNumber(); i < len; i++) { + if (positions.get(i) == -1) { + if (partition.length() > 0) { + partition.append(","); + } + String partName = userConfiguredColumns.get(i); + //todo: 这里应该根据分区列的类型做转换,这里先直接toString转换了 + com.alibaba.datax.common.element.Column partitionCol = dataxRecord.getColumn(i); + String partVal = partitionCol.getRawData().toString(); + if (StringUtils.isBlank(partVal)) { + throw new DataXException(OdpsWriterErrorCode.ILLEGAL_VALUE, String.format( + "value of column %s exit null value, it can not be used as partition column", partName)); + } + + // 如果分区列的值的格式是一个日期,并且用户设置列的转换规则 + DateTransForm dateTransForm = null; + if (null != dateTransFormMap) { + dateTransForm = dateTransFormMap.get(partName); + } + if (null != dateTransForm) { + try { + // 日期列 + if (partitionCol.getType().equals(com.alibaba.datax.common.element.Column.Type.DATE)) { + partVal = OdpsUtil.date2StringWithFormat(partitionCol.asDate(), dateTransForm.getToFormat()); + } + // String 列,需要先按照 fromFormat 转换为日期 + if (partitionCol.getType().equals(com.alibaba.datax.common.element.Column.Type.STRING)) { + partVal = OdpsUtil.date2StringWithFormat(partitionCol.asDate(dateTransForm.getFromFormat()), dateTransForm.getToFormat()); + } + } catch (DataXException e) { + LOG.warn("Parse {} with format {} error! Please check the column config and {} config. So user original value '{}'. Detail info: {}", + partVal, dateTransForm.toString(), Key.PARTITION_COL_MAPPING, partVal, e); + } + } + + partition.append(partName).append("=").append(partVal); + } + } + return partition.toString(); + } + + public static String date2StringWithFormat(Date date, String dateFormat) { + return DateFormatUtils.format(date, dateFormat, TimeZone.getTimeZone("GMT+8")); + } + + public static List getTableOriginalColumnTypeList(TableSchema schema) { + List tableOriginalColumnTypeList = new ArrayList(); List columns = schema.getColumns(); for (Column column : columns) { - tableOriginalColumnTypeList.add(column.getType()); + tableOriginalColumnTypeList.add(column.getTypeInfo()); } return tableOriginalColumnTypeList; @@ -465,8 +635,7 @@ public class OdpsUtil { if (isPartitionedTable) { //分区表 if (StringUtils.isBlank(partition)) { - throw DataXException.asDataXException(OdpsWriterErrorCode.PARTITION_ERROR, String.format("您没有配置分区信息,因为你配置的表是分区表:%s 如果需要进行 truncate 操作,必须指定需要清空的具体分区. 请修改分区配置,格式形如 pt=${bizdate} .", - table.getName())); + throw DataXException.asDataXException(OdpsWriterErrorCode.PARTITION_ERROR, MESSAGE_SOURCE.message("odpsutil.21", table.getName())); } else { LOG.info("Try to truncate partition=[{}] in table=[{}].", partition, table.getName()); OdpsUtil.truncatePartition(odps, table, partition); @@ -474,8 +643,7 @@ public class OdpsUtil { } else { //非分区表 if (StringUtils.isNotBlank(partition)) { - throw DataXException.asDataXException(OdpsWriterErrorCode.PARTITION_ERROR, String.format("分区信息配置错误,你的ODPS表是非分区表:%s 进行 truncate 操作时不需要指定具体分区值. 请检查您的分区配置,删除该配置项的值.", - table.getName())); + throw DataXException.asDataXException(OdpsWriterErrorCode.PARTITION_ERROR, MESSAGE_SOURCE.message("odpsutil.22", table.getName())); } else { LOG.info("Try to truncate table:[{}].", table.getName()); OdpsUtil.truncateNonPartitionedTable(odps, table); @@ -487,7 +655,7 @@ public class OdpsUtil { //分区表 if (StringUtils.isBlank(partition)) { throw DataXException.asDataXException(OdpsWriterErrorCode.PARTITION_ERROR, - String.format("您的目的表是分区表,写入分区表:%s 时必须指定具体分区值. 请修改您的分区配置信息,格式形如 格式形如 pt=${bizdate}.", table.getName())); + MESSAGE_SOURCE.message("odpsutil.23", table.getName())); } else { boolean isPartitionExists = OdpsUtil.isPartitionExist(table, partition); if (!isPartitionExists) { @@ -500,7 +668,7 @@ public class OdpsUtil { //非分区表 if (StringUtils.isNotBlank(partition)) { throw DataXException.asDataXException(OdpsWriterErrorCode.PARTITION_ERROR, - String.format("您的目的表是非分区表,写入非分区表:%s 时不需要指定具体分区值. 请删除分区配置信息", table.getName())); + MESSAGE_SOURCE.message("odpsutil.24", table.getName())); } } } @@ -523,14 +691,12 @@ public class OdpsUtil { if (isPartitionedTable) { //分区表 if (StringUtils.isBlank(partition)) { - throw DataXException.asDataXException(OdpsWriterErrorCode.PARTITION_ERROR, String.format("您没有配置分区信息,因为你配置的表是分区表:%s 如果需要进行 truncate 操作,必须指定需要清空的具体分区. 请修改分区配置,格式形如 pt=${bizdate} .", - table.getName())); + throw DataXException.asDataXException(OdpsWriterErrorCode.PARTITION_ERROR, MESSAGE_SOURCE.message("odpsutil.25", table.getName())); } } else { //非分区表 if (StringUtils.isNotBlank(partition)) { - throw DataXException.asDataXException(OdpsWriterErrorCode.PARTITION_ERROR, String.format("分区信息配置错误,你的ODPS表是非分区表:%s 进行 truncate 操作时不需要指定具体分区值. 请检查您的分区配置,删除该配置项的值.", - table.getName())); + throw DataXException.asDataXException(OdpsWriterErrorCode.PARTITION_ERROR, MESSAGE_SOURCE.message("odpsutil.26", table.getName())); } } } else { @@ -539,13 +705,13 @@ public class OdpsUtil { //分区表 if (StringUtils.isBlank(partition)) { throw DataXException.asDataXException(OdpsWriterErrorCode.PARTITION_ERROR, - String.format("您的目的表是分区表,写入分区表:%s 时必须指定具体分区值. 请修改您的分区配置信息,格式形如 格式形如 pt=${bizdate}.", table.getName())); + MESSAGE_SOURCE.message("odpsutil.27", table.getName())); } } else { //非分区表 if (StringUtils.isNotBlank(partition)) { throw DataXException.asDataXException(OdpsWriterErrorCode.PARTITION_ERROR, - String.format("您的目的表是非分区表,写入非分区表:%s 时不需要指定具体分区值. 请删除分区配置信息", table.getName())); + MESSAGE_SOURCE.message("odpsutil.28", table.getName())); } } } @@ -558,29 +724,286 @@ public class OdpsUtil { if(e.getMessage() != null) { if(e.getMessage().contains(OdpsExceptionMsg.ODPS_PROJECT_NOT_FOUNT)) { throw DataXException.asDataXException(OdpsWriterErrorCode.ODPS_PROJECT_NOT_FOUNT, - String.format("加载 ODPS 目的表:%s 失败. " + - "请检查您配置的 ODPS 目的表的 [project] 是否正确.", tableName), e); + MESSAGE_SOURCE.message("odpsutil.29", tableName), e); } else if(e.getMessage().contains(OdpsExceptionMsg.ODPS_TABLE_NOT_FOUNT)) { throw DataXException.asDataXException(OdpsWriterErrorCode.ODPS_TABLE_NOT_FOUNT, - String.format("加载 ODPS 目的表:%s 失败. " + - "请检查您配置的 ODPS 目的表的 [table] 是否正确.", tableName), e); + MESSAGE_SOURCE.message("odpsutil.30", tableName), e); } else if(e.getMessage().contains(OdpsExceptionMsg.ODPS_ACCESS_KEY_ID_NOT_FOUND)) { throw DataXException.asDataXException(OdpsWriterErrorCode.ODPS_ACCESS_KEY_ID_NOT_FOUND, - String.format("加载 ODPS 目的表:%s 失败. " + - "请检查您配置的 ODPS 目的表的 [accessId] [accessKey]是否正确.", tableName), e); + MESSAGE_SOURCE.message("odpsutil.31", tableName), e); } else if(e.getMessage().contains(OdpsExceptionMsg.ODPS_ACCESS_KEY_INVALID)) { throw DataXException.asDataXException(OdpsWriterErrorCode.ODPS_ACCESS_KEY_INVALID, - String.format("加载 ODPS 目的表:%s 失败. " + - "请检查您配置的 ODPS 目的表的 [accessKey] 是否正确.", tableName), e); + MESSAGE_SOURCE.message("odpsutil.32", tableName), e); } else if(e.getMessage().contains(OdpsExceptionMsg.ODPS_ACCESS_DENY)) { throw DataXException.asDataXException(OdpsWriterErrorCode.ODPS_ACCESS_DENY, - String.format("加载 ODPS 目的表:%s 失败. " + - "请检查您配置的 ODPS 目的表的 [accessId] [accessKey] [project]是否匹配.", tableName), e); + MESSAGE_SOURCE.message("odpsutil.33", tableName), e); } } throw DataXException.asDataXException(OdpsWriterErrorCode.ILLEGAL_VALUE, - String.format("加载 ODPS 目的表:%s 失败. " + - "请检查您配置的 ODPS 目的表的 project,table,accessId,accessKey,odpsServer等值.", tableName), e); + MESSAGE_SOURCE.message("odpsutil.34", tableName), e); + } + + /** + * count统计数据,自动创建统计表 + * @param tableName 统计表名字 + * @return + */ + public static String getCreateSummaryTableDDL(String tableName) { + return String.format("CREATE TABLE IF NOT EXISTS %s " + + "(src_table_name STRING, " + + "dest_table_name STRING, " + + "src_row_num BIGINT, " + + "src_query_time DATETIME, " + + "read_succeed_records BIGINT," + + "write_succeed_records BIGINT," + + "dest_row_num BIGINT, " + + "write_time DATETIME);", + tableName); + } + + /** + * count统计数据,获取count dml + * @param tableName + * @return + */ + public static String countTableSql(final String tableName, final String partition) { + if (StringUtils.isNotBlank(partition)) { + String[] partitions = partition.split("\\,"); + String p = String.join(" and ", partitions); + return String.format("SELECT COUNT(1) AS odps_num FROM %s WHERE %s;", tableName, p); + } else { + return String.format("SELECT COUNT(1) AS odps_num FROM %s;", tableName); + } + } + + /** + * count统计数据 dml 对应字段,用于查询 + * @return + */ + public static String countName() { + return "odps_num"; + } + + /** + * count统计数据dml + * @param summaryTableName 统计数据写入表 + * @param sourceTableName datax reader 表 + * @param destTableName datax writer 表 + * @param srcCount reader表行数 + * @param queryTime reader表查询时间 + * @param destCount writer 表行书 + * @return insert dml sql + */ + public static String getInsertSummaryTableSql(String summaryTableName, String sourceTableName, String destTableName, + Long srcCount, String queryTime, Number readSucceedRecords, + Number writeSucceedRecords, Long destCount) { + final String sql = "INSERT INTO %s (src_table_name,dest_table_name," + + " src_row_num, src_query_time, read_succeed_records, write_succeed_records, dest_row_num, write_time) VALUES ( %s );"; + + String insertData = String.format("'%s', '%s', %s, %s, %s, %s, %s, getdate()", + sourceTableName, destTableName, srcCount, queryTime, readSucceedRecords, writeSucceedRecords, destCount ); + return String.format(sql, summaryTableName, insertData); + } + + public static void createTable(Odps odps, String tableName, final String sql) { + try { + LOG.info("create table with sql: {}", sql); + runSqlTaskWithRetry(odps, sql, MAX_RETRY_TIME, 1000, true, "create", null); + } catch (Exception e) { + throw DataXException.asDataXException(OdpsWriterErrorCode.RUN_SQL_FAILED, + MESSAGE_SOURCE.message("odpsutil.7", tableName), e); + } + } + + public static void createTableFromTable(Odps odps, String resourceTable, String targetTable) { + TableSchema schema = odps.tables().get(resourceTable).getSchema(); + StringBuilder builder = new StringBuilder(); + Iterator iterator = schema.getColumns().iterator(); + while (iterator.hasNext()) { + Column c = iterator.next(); + builder.append(String.format(" %s %s ", c.getName(), c.getTypeInfo().getTypeName())); + if (iterator.hasNext()) { + builder.append(","); + } + } + String createTableSql = String.format("CREATE TABLE IF NOT EXISTS %s (%s);", targetTable, builder.toString()); + + try { + LOG.info("create table with sql: {}", createTableSql); + runSqlTaskWithRetry(odps, createTableSql, MAX_RETRY_TIME, 1000, true, "create", null); + } catch (Exception e) { + throw DataXException.asDataXException(OdpsWriterErrorCode.RUN_SQL_FAILED, + MESSAGE_SOURCE.message("odpsutil.7", targetTable), e); + } + } + + public static Object truncateSingleFieldData(OdpsType type, Object data, int limit, Boolean enableOverLengthOutput) { + if (data == null) { + return data; + } + if (OdpsType.STRING.equals(type)) { + if(enableOverLengthOutput) { + LOG.warn( + "InvalidData: The string's length is more than " + limit + " bytes. content:" + data); + } + LOG.info("before truncate string length:" + ((String) data).length()); + //确保特殊字符场景下的截断 + limit -= Constant.UTF8_ENCODED_CHAR_MAX_SIZE; + data = cutString((String) data, limit); + LOG.info("after truncate string length:" + ((String) data).length()); + } else if (OdpsType.BINARY.equals(type)) { + byte[] oriDataBytes = ((Binary) data).data(); + if(oriDataBytes == null){ + return data; + } + int originLength = oriDataBytes.length; + if (originLength <= limit) { + return data; + } + if(enableOverLengthOutput) { + LOG.warn("InvalidData: The binary's length is more than " + limit + " bytes. content:" + byteArrToHex(oriDataBytes)); + } + LOG.info("before truncate binary length:" + oriDataBytes.length); + byte[] newData = new byte[limit]; + System.arraycopy(oriDataBytes, 0, newData, 0, limit); + LOG.info("after truncate binary length:" + newData.length); + return new Binary(newData); + } + return data; + } + public static Object setNull(OdpsType type,Object data, int limit, Boolean enableOverLengthOutput) { + if (data == null ) { + return null; + } + if (OdpsType.STRING.equals(type)) { + if(enableOverLengthOutput) { + LOG.warn( + "InvalidData: The string's length is more than " + limit + " bytes. content:" + data); + } + return null; + } else if (OdpsType.BINARY.equals(type)) { + byte[] oriDataBytes = ((Binary) data).data(); + int originLength = oriDataBytes.length; + if (originLength > limit) { + if(enableOverLengthOutput) { + LOG.warn("InvalidData: The binary's length is more than " + limit + " bytes. content:" + new String(oriDataBytes)); + } + return null; + } + } + return data; + } + public static boolean validateStringLength(String value, long limit) { + try { + if (value.length() * Constant.UTF8_ENCODED_CHAR_MAX_SIZE > limit + && value.getBytes("utf-8").length > limit) { + return false; + } + } catch (Exception e) { + e.printStackTrace(); + return true; + } + return true; + } + public static String cutString(String sourceString, int cutBytes) { + if (sourceString == null || "".equals(sourceString.trim()) || cutBytes < 1) { + return ""; + } + int lastIndex = 0; + boolean stopFlag = false; + int totalBytes = 0; + for (int i = 0; i < sourceString.length(); i++) { + String s = Integer.toBinaryString(sourceString.charAt(i)); + if (s.length() > 8) { + totalBytes += 3; + } else { + totalBytes += 1; + } + if (!stopFlag) { + if (totalBytes == cutBytes) { + lastIndex = i; + stopFlag = true; + } else if (totalBytes > cutBytes) { + lastIndex = i - 1; + stopFlag = true; + } + } + } + if (!stopFlag) { + return sourceString; + } else { + return sourceString.substring(0, lastIndex + 1); + } + } + public static boolean dataOverLength(OdpsType type, Object data, int limit){ + if (data == null ) { + return false; + } + if (OdpsType.STRING.equals(type)) { + if(!OdpsUtil.validateStringLength((String)data, limit)){ + return true; + } + }else if (OdpsType.BINARY.equals(type)){ + byte[] oriDataBytes = ((Binary) data).data(); + if(oriDataBytes == null){ + return false; + } + int originLength = oriDataBytes.length; + if (originLength > limit) { + return true; + } + } + return false; + } + public static Object processOverLengthData(Object data, OdpsType type, String overLengthRule, int maxFieldLength, Boolean enableOverLengthOutput) { + try{ + //超长数据检查 + if(OdpsWriter.maxOutputOverLengthRecord != null && OdpsWriter.globalTotalTruncatedRecordNumber.get() >= OdpsWriter.maxOutputOverLengthRecord){ + enableOverLengthOutput = false; + } + if ("truncate".equalsIgnoreCase(overLengthRule)) { + if (OdpsUtil.dataOverLength(type, data, OdpsWriter.maxOdpsFieldLength)) { + Object newData = OdpsUtil.truncateSingleFieldData(type, data, maxFieldLength, enableOverLengthOutput); + OdpsWriter.globalTotalTruncatedRecordNumber.incrementAndGet(); + return newData; + } + } else if ("setNull".equalsIgnoreCase(overLengthRule)) { + if (OdpsUtil.dataOverLength(type, data, OdpsWriter.maxOdpsFieldLength)) { + OdpsWriter.globalTotalTruncatedRecordNumber.incrementAndGet(); + return OdpsUtil.setNull(type, data, maxFieldLength, enableOverLengthOutput); + } + } + }catch (Throwable e){ + LOG.warn("truncate overLength data failed!", e); + } + return data; + } + private static final char HEX_CHAR_ARR[] = {'0','1','2','3','4','5','6','7','8','9','a','b','c','d','e','f'}; + /** + * 字节数组转十六进制字符串 + * @param btArr + * @return + */ + public static String byteArrToHex(byte[] btArr) { + char strArr[] = new char[btArr.length * 2]; + int i = 0; + for (byte bt : btArr) { + strArr[i++] = HEX_CHAR_ARR[bt>>>4 & 0xf]; + strArr[i++] = HEX_CHAR_ARR[bt & 0xf]; + } + return new String(strArr); + } + public static byte[] hexToByteArr(String hexStr) { + char[] charArr = hexStr.toCharArray(); + byte btArr[] = new byte[charArr.length / 2]; + int index = 0; + for (int i = 0; i < charArr.length; i++) { + int highBit = hexStr.indexOf(charArr[i]); + int lowBit = hexStr.indexOf(charArr[++i]); + btArr[index] = (byte) (highBit << 4 | lowBit); + index++; + } + return btArr; } } diff --git a/odpswriter/src/main/libs/bcprov-jdk15on-1.52.jar b/odpswriter/src/main/libs/bcprov-jdk15on-1.52.jar deleted file mode 100644 index 6c54dd90..00000000 Binary files a/odpswriter/src/main/libs/bcprov-jdk15on-1.52.jar and /dev/null differ diff --git a/opentsdbreader/pom.xml b/opentsdbreader/pom.xml index f2263726..b10fba02 100644 --- a/opentsdbreader/pom.xml +++ b/opentsdbreader/pom.xml @@ -24,9 +24,6 @@ 4.5 2.4 - - 1.2.28 - 2.3.2 @@ -47,10 +44,6 @@ slf4j-log4j12 org.slf4j - - fastjson - com.alibaba - commons-math3 org.apache.commons @@ -92,9 +85,8 @@ - com.alibaba - fastjson - ${fastjson.version} + com.alibaba.fastjson2 + fastjson2 diff --git a/opentsdbreader/src/main/java/com/alibaba/datax/plugin/reader/conn/CliQuery.java b/opentsdbreader/src/main/java/com/alibaba/datax/plugin/reader/conn/CliQuery.java index fe8dce2b..88822089 100644 --- a/opentsdbreader/src/main/java/com/alibaba/datax/plugin/reader/conn/CliQuery.java +++ b/opentsdbreader/src/main/java/com/alibaba/datax/plugin/reader/conn/CliQuery.java @@ -6,14 +6,30 @@ import net.opentsdb.utils.DateTime; import java.util.ArrayList; import java.util.HashMap; -/** - * Copyright @ 2019 alibaba.com - * All right reserved. - * Function:CliQuery - * - * @author Benedict Jin - * @since 2019-04-17 - */ +//This file is part of OpenTSDB. + +//Copyright (C) 2010-2012 The OpenTSDB Authors. +//Copyright(C)2019 Alibaba Group Holding Ltd. + +// + +//This program is free software: you can redistribute it and/or modify it + +//under the terms of the GNU Lesser General Public License as published by + +//the Free Software Foundation, either version 2.1 of the License, or (at your + +//option) any later version. This program is distributed in the hope that it + +//will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty + +//of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser + +//General Public License for more details. You should have received a copy + +//of the GNU Lesser General Public License along with this program. If not, + +//see . final class CliQuery { /** diff --git a/opentsdbreader/src/main/java/com/alibaba/datax/plugin/reader/conn/Connection4TSDB.java b/opentsdbreader/src/main/java/com/alibaba/datax/plugin/reader/conn/Connection4TSDB.java index 97a841cf..4b75acb9 100644 --- a/opentsdbreader/src/main/java/com/alibaba/datax/plugin/reader/conn/Connection4TSDB.java +++ b/opentsdbreader/src/main/java/com/alibaba/datax/plugin/reader/conn/Connection4TSDB.java @@ -4,14 +4,30 @@ import com.alibaba.datax.common.plugin.RecordSender; import java.util.List; -/** - * Copyright @ 2019 alibaba.com - * All right reserved. - * Function:Connection for TSDB-like databases - * - * @author Benedict Jin - * @since 2019-03-29 - */ +//This file is part of OpenTSDB. + +//Copyright (C) 2010-2012 The OpenTSDB Authors. +//Copyright(C)2019 Alibaba Group Holding Ltd. + +// + +//This program is free software: you can redistribute it and/or modify it + +//under the terms of the GNU Lesser General Public License as published by + +//the Free Software Foundation, either version 2.1 of the License, or (at your + +//option) any later version. This program is distributed in the hope that it + +//will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty + +//of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser + +//General Public License for more details. You should have received a copy + +//of the GNU Lesser General Public License along with this program. If not, + +//see . public interface Connection4TSDB { /** diff --git a/opentsdbreader/src/main/java/com/alibaba/datax/plugin/reader/conn/DataPoint4TSDB.java b/opentsdbreader/src/main/java/com/alibaba/datax/plugin/reader/conn/DataPoint4TSDB.java index 1f690245..e8a84fb2 100644 --- a/opentsdbreader/src/main/java/com/alibaba/datax/plugin/reader/conn/DataPoint4TSDB.java +++ b/opentsdbreader/src/main/java/com/alibaba/datax/plugin/reader/conn/DataPoint4TSDB.java @@ -1,17 +1,33 @@ package com.alibaba.datax.plugin.reader.conn; -import com.alibaba.fastjson.JSON; +import com.alibaba.fastjson2.JSON; import java.util.Map; -/** - * Copyright @ 2019 alibaba.com - * All right reserved. - * Function:DataPoint for TSDB - * - * @author Benedict Jin - * @since 2019-04-10 - */ +//This file is part of OpenTSDB. + +//Copyright (C) 2010-2012 The OpenTSDB Authors. +//Copyright(C)2019 Alibaba Group Holding Ltd. + +// + +//This program is free software: you can redistribute it and/or modify it + +//under the terms of the GNU Lesser General Public License as published by + +//the Free Software Foundation, either version 2.1 of the License, or (at your + +//option) any later version. This program is distributed in the hope that it + +//will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty + +//of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser + +//General Public License for more details. You should have received a copy + +//of the GNU Lesser General Public License along with this program. If not, + +//see . public class DataPoint4TSDB { private long timestamp; diff --git a/opentsdbreader/src/main/java/com/alibaba/datax/plugin/reader/conn/DumpSeries.java b/opentsdbreader/src/main/java/com/alibaba/datax/plugin/reader/conn/DumpSeries.java index 56ab0bc2..4aed1458 100644 --- a/opentsdbreader/src/main/java/com/alibaba/datax/plugin/reader/conn/DumpSeries.java +++ b/opentsdbreader/src/main/java/com/alibaba/datax/plugin/reader/conn/DumpSeries.java @@ -12,14 +12,30 @@ import org.slf4j.LoggerFactory; import java.util.*; -/** - * Copyright @ 2019 alibaba.com - * All right reserved. - * Function:Tool to dump the data straight from HBase - * - * @author Benedict Jin - * @since 2019-04-17 - */ +//This file is part of OpenTSDB. + +//Copyright (C) 2010-2012 The OpenTSDB Authors. +//Copyright(C)2019 Alibaba Group Holding Ltd. + +// + +//This program is free software: you can redistribute it and/or modify it + +//under the terms of the GNU Lesser General Public License as published by + +//the Free Software Foundation, either version 2.1 of the License, or (at your + +//option) any later version. This program is distributed in the hope that it + +//will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty + +//of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser + +//General Public License for more details. You should have received a copy + +//of the GNU Lesser General Public License along with this program. If not, + +//see . final class DumpSeries { private static final Logger LOG = LoggerFactory.getLogger(DumpSeries.class); diff --git a/opentsdbreader/src/main/java/com/alibaba/datax/plugin/reader/conn/OpenTSDBConnection.java b/opentsdbreader/src/main/java/com/alibaba/datax/plugin/reader/conn/OpenTSDBConnection.java index 9e7f12c9..49ba5fb3 100644 --- a/opentsdbreader/src/main/java/com/alibaba/datax/plugin/reader/conn/OpenTSDBConnection.java +++ b/opentsdbreader/src/main/java/com/alibaba/datax/plugin/reader/conn/OpenTSDBConnection.java @@ -2,19 +2,35 @@ package com.alibaba.datax.plugin.reader.conn; import com.alibaba.datax.common.plugin.RecordSender; import com.alibaba.datax.plugin.reader.util.TSDBUtils; -import com.alibaba.fastjson.JSON; +import com.alibaba.fastjson2.JSON; import org.apache.commons.lang3.StringUtils; import java.util.List; -/** - * Copyright @ 2019 alibaba.com - * All right reserved. - * Function:OpenTSDB Connection - * - * @author Benedict Jin - * @since 2019-03-29 - */ +//This file is part of OpenTSDB. + +//Copyright (C) 2010-2012 The OpenTSDB Authors. +//Copyright(C)2019 Alibaba Group Holding Ltd. + +// + +//This program is free software: you can redistribute it and/or modify it + +//under the terms of the GNU Lesser General Public License as published by + +//the Free Software Foundation, either version 2.1 of the License, or (at your + +//option) any later version. This program is distributed in the hope that it + +//will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty + +//of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser + +//General Public License for more details. You should have received a copy + +//of the GNU Lesser General Public License along with this program. If not, + +//see . public class OpenTSDBConnection implements Connection4TSDB { private String address; diff --git a/opentsdbreader/src/main/java/com/alibaba/datax/plugin/reader/conn/OpenTSDBDump.java b/opentsdbreader/src/main/java/com/alibaba/datax/plugin/reader/conn/OpenTSDBDump.java index 5ed0a314..6f3c551a 100644 --- a/opentsdbreader/src/main/java/com/alibaba/datax/plugin/reader/conn/OpenTSDBDump.java +++ b/opentsdbreader/src/main/java/com/alibaba/datax/plugin/reader/conn/OpenTSDBDump.java @@ -1,20 +1,36 @@ package com.alibaba.datax.plugin.reader.conn; import com.alibaba.datax.common.plugin.RecordSender; -import com.alibaba.fastjson.JSON; +import com.alibaba.fastjson2.JSON; import net.opentsdb.core.TSDB; import net.opentsdb.utils.Config; import java.util.Map; -/** - * Copyright @ 2019 alibaba.com - * All right reserved. - * Function:OpenTSDB Dump - * - * @author Benedict Jin - * @since 2019-04-15 - */ +//This file is part of OpenTSDB. + +//Copyright (C) 2010-2012 The OpenTSDB Authors. +//Copyright(C)2019 Alibaba Group Holding Ltd. + +// + +//This program is free software: you can redistribute it and/or modify it + +//under the terms of the GNU Lesser General Public License as published by + +//the Free Software Foundation, either version 2.1 of the License, or (at your + +//option) any later version. This program is distributed in the hope that it + +//will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty + +//of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser + +//General Public License for more details. You should have received a copy + +//of the GNU Lesser General Public License along with this program. If not, + +//see . final class OpenTSDBDump { private static TSDB TSDB_INSTANCE; diff --git a/opentsdbreader/src/main/java/com/alibaba/datax/plugin/reader/opentsdbreader/Constant.java b/opentsdbreader/src/main/java/com/alibaba/datax/plugin/reader/opentsdbreader/Constant.java index 6017d4e5..286443de 100644 --- a/opentsdbreader/src/main/java/com/alibaba/datax/plugin/reader/opentsdbreader/Constant.java +++ b/opentsdbreader/src/main/java/com/alibaba/datax/plugin/reader/opentsdbreader/Constant.java @@ -1,13 +1,29 @@ package com.alibaba.datax.plugin.reader.opentsdbreader; -/** - * Copyright @ 2019 alibaba.com - * All right reserved. - * Function:Key - * - * @author Benedict Jin - * @since 2019-04-18 - */ +//This file is part of OpenTSDB. + +//Copyright (C) 2010-2012 The OpenTSDB Authors. +//Copyright(C)2019 Alibaba Group Holding Ltd. + +// + +//This program is free software: you can redistribute it and/or modify it + +//under the terms of the GNU Lesser General Public License as published by + +//the Free Software Foundation, either version 2.1 of the License, or (at your + +//option) any later version. This program is distributed in the hope that it + +//will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty + +//of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser + +//General Public License for more details. You should have received a copy + +//of the GNU Lesser General Public License along with this program. If not, + +//see . public final class Constant { static final String DEFAULT_DATA_FORMAT = "yyyy-MM-dd HH:mm:ss"; diff --git a/opentsdbreader/src/main/java/com/alibaba/datax/plugin/reader/opentsdbreader/Key.java b/opentsdbreader/src/main/java/com/alibaba/datax/plugin/reader/opentsdbreader/Key.java index 5b8c4adc..2d2c2844 100644 --- a/opentsdbreader/src/main/java/com/alibaba/datax/plugin/reader/opentsdbreader/Key.java +++ b/opentsdbreader/src/main/java/com/alibaba/datax/plugin/reader/opentsdbreader/Key.java @@ -1,13 +1,29 @@ package com.alibaba.datax.plugin.reader.opentsdbreader; -/** - * Copyright @ 2019 alibaba.com - * All right reserved. - * Function:Key - * - * @author Benedict Jin - * @since 2019-04-18 - */ +//This file is part of OpenTSDB. + +//Copyright (C) 2010-2012 The OpenTSDB Authors. +//Copyright(C)2019 Alibaba Group Holding Ltd. + +// + +//This program is free software: you can redistribute it and/or modify it + +//under the terms of the GNU Lesser General Public License as published by + +//the Free Software Foundation, either version 2.1 of the License, or (at your + +//option) any later version. This program is distributed in the hope that it + +//will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty + +//of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser + +//General Public License for more details. You should have received a copy + +//of the GNU Lesser General Public License along with this program. If not, + +//see . public class Key { static final String ENDPOINT = "endpoint"; diff --git a/opentsdbreader/src/main/java/com/alibaba/datax/plugin/reader/opentsdbreader/OpenTSDBReader.java b/opentsdbreader/src/main/java/com/alibaba/datax/plugin/reader/opentsdbreader/OpenTSDBReader.java index d57456d1..7790a2b1 100755 --- a/opentsdbreader/src/main/java/com/alibaba/datax/plugin/reader/opentsdbreader/OpenTSDBReader.java +++ b/opentsdbreader/src/main/java/com/alibaba/datax/plugin/reader/opentsdbreader/OpenTSDBReader.java @@ -6,7 +6,7 @@ import com.alibaba.datax.common.spi.Reader; import com.alibaba.datax.common.util.Configuration; import com.alibaba.datax.plugin.reader.conn.OpenTSDBConnection; import com.alibaba.datax.plugin.reader.util.TimeUtils; -import com.alibaba.fastjson.JSON; +import com.alibaba.fastjson2.JSON; import org.apache.commons.lang3.StringUtils; import org.joda.time.DateTime; import org.slf4j.Logger; @@ -18,14 +18,30 @@ import java.util.ArrayList; import java.util.Collections; import java.util.List; -/** - * Copyright @ 2019 alibaba.com - * All right reserved. - * Function:Key - * - * @author Benedict Jin - * @since 2019-04-18 - */ +//This file is part of OpenTSDB. + +//Copyright (C) 2010-2012 The OpenTSDB Authors. +//Copyright(C)2019 Alibaba Group Holding Ltd. + +// + +//This program is free software: you can redistribute it and/or modify it + +//under the terms of the GNU Lesser General Public License as published by + +//the Free Software Foundation, either version 2.1 of the License, or (at your + +//option) any later version. This program is distributed in the hope that it + +//will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty + +//of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser + +//General Public License for more details. You should have received a copy + +//of the GNU Lesser General Public License along with this program. If not, + +//see . @SuppressWarnings("unused") public class OpenTSDBReader extends Reader { diff --git a/opentsdbreader/src/main/java/com/alibaba/datax/plugin/reader/opentsdbreader/OpenTSDBReaderErrorCode.java b/opentsdbreader/src/main/java/com/alibaba/datax/plugin/reader/opentsdbreader/OpenTSDBReaderErrorCode.java index 0d9de4c4..479936c6 100755 --- a/opentsdbreader/src/main/java/com/alibaba/datax/plugin/reader/opentsdbreader/OpenTSDBReaderErrorCode.java +++ b/opentsdbreader/src/main/java/com/alibaba/datax/plugin/reader/opentsdbreader/OpenTSDBReaderErrorCode.java @@ -2,14 +2,30 @@ package com.alibaba.datax.plugin.reader.opentsdbreader; import com.alibaba.datax.common.spi.ErrorCode; -/** - * Copyright @ 2019 alibaba.com - * All right reserved. - * Function:OpenTSDB Reader Error Code - * - * @author Benedict Jin - * @since 2019-04-18 - */ +//This file is part of OpenTSDB. + +//Copyright (C) 2010-2012 The OpenTSDB Authors. +//Copyright(C)2019 Alibaba Group Holding Ltd. + +// + +//This program is free software: you can redistribute it and/or modify it + +//under the terms of the GNU Lesser General Public License as published by + +//the Free Software Foundation, either version 2.1 of the License, or (at your + +//option) any later version. This program is distributed in the hope that it + +//will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty + +//of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser + +//General Public License for more details. You should have received a copy + +//of the GNU Lesser General Public License along with this program. If not, + +//see . public enum OpenTSDBReaderErrorCode implements ErrorCode { REQUIRED_VALUE("OpenTSDBReader-00", "缺失必要的值"), diff --git a/opentsdbreader/src/main/java/com/alibaba/datax/plugin/reader/util/HttpUtils.java b/opentsdbreader/src/main/java/com/alibaba/datax/plugin/reader/util/HttpUtils.java index cdf5c9c1..fa82b634 100644 --- a/opentsdbreader/src/main/java/com/alibaba/datax/plugin/reader/util/HttpUtils.java +++ b/opentsdbreader/src/main/java/com/alibaba/datax/plugin/reader/util/HttpUtils.java @@ -1,6 +1,6 @@ package com.alibaba.datax.plugin.reader.util; -import com.alibaba.fastjson.JSON; +import com.alibaba.fastjson2.JSON; import org.apache.http.client.fluent.Content; import org.apache.http.client.fluent.Request; import org.apache.http.entity.ContentType; @@ -9,14 +9,30 @@ import java.nio.charset.Charset; import java.util.Map; import java.util.concurrent.TimeUnit; -/** - * Copyright @ 2019 alibaba.com - * All right reserved. - * Function:HttpUtils - * - * @author Benedict Jin - * @since 2019-03-29 - */ +//This file is part of OpenTSDB. + +//Copyright (C) 2010-2012 The OpenTSDB Authors. +//Copyright(C)2019 Alibaba Group Holding Ltd. + +// + +//This program is free software: you can redistribute it and/or modify it + +//under the terms of the GNU Lesser General Public License as published by + +//the Free Software Foundation, either version 2.1 of the License, or (at your + +//option) any later version. This program is distributed in the hope that it + +//will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty + +//of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser + +//General Public License for more details. You should have received a copy + +//of the GNU Lesser General Public License along with this program. If not, + +//see . public final class HttpUtils { public final static Charset UTF_8 = Charset.forName("UTF-8"); diff --git a/opentsdbreader/src/main/java/com/alibaba/datax/plugin/reader/util/TSDBUtils.java b/opentsdbreader/src/main/java/com/alibaba/datax/plugin/reader/util/TSDBUtils.java index 72c7fd62..9f1e38d5 100644 --- a/opentsdbreader/src/main/java/com/alibaba/datax/plugin/reader/util/TSDBUtils.java +++ b/opentsdbreader/src/main/java/com/alibaba/datax/plugin/reader/util/TSDBUtils.java @@ -1,20 +1,36 @@ package com.alibaba.datax.plugin.reader.util; import com.alibaba.datax.plugin.reader.conn.DataPoint4TSDB; -import com.alibaba.fastjson.JSON; +import com.alibaba.fastjson2.JSON; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.util.List; -/** - * Copyright @ 2019 alibaba.com - * All right reserved. - * Function:TSDB Utils - * - * @author Benedict Jin - * @since 2019-03-29 - */ +//This file is part of OpenTSDB. + +//Copyright (C) 2010-2012 The OpenTSDB Authors. +//Copyright(C)2019 Alibaba Group Holding Ltd. + +// + +//This program is free software: you can redistribute it and/or modify it + +//under the terms of the GNU Lesser General Public License as published by + +//the Free Software Foundation, either version 2.1 of the License, or (at your + +//option) any later version. This program is distributed in the hope that it + +//will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty + +//of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser + +//General Public License for more details. You should have received a copy + +//of the GNU Lesser General Public License along with this program. If not, + +//see . public final class TSDBUtils { private static final Logger LOG = LoggerFactory.getLogger(TSDBUtils.class); diff --git a/opentsdbreader/src/main/java/com/alibaba/datax/plugin/reader/util/TimeUtils.java b/opentsdbreader/src/main/java/com/alibaba/datax/plugin/reader/util/TimeUtils.java index 9bc11b36..7d6bd112 100644 --- a/opentsdbreader/src/main/java/com/alibaba/datax/plugin/reader/util/TimeUtils.java +++ b/opentsdbreader/src/main/java/com/alibaba/datax/plugin/reader/util/TimeUtils.java @@ -2,14 +2,30 @@ package com.alibaba.datax.plugin.reader.util; import java.util.concurrent.TimeUnit; -/** - * Copyright @ 2019 alibaba.com - * All right reserved. - * Function:TimeUtils - * - * @author Benedict Jin - * @since 2019-04-22 - */ +//This file is part of OpenTSDB. + +//Copyright (C) 2010-2012 The OpenTSDB Authors. +//Copyright(C)2019 Alibaba Group Holding Ltd. + +// + +//This program is free software: you can redistribute it and/or modify it + +//under the terms of the GNU Lesser General Public License as published by + +//the Free Software Foundation, either version 2.1 of the License, or (at your + +//option) any later version. This program is distributed in the hope that it + +//will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty + +//of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser + +//General Public License for more details. You should have received a copy + +//of the GNU Lesser General Public License along with this program. If not, + +//see . public final class TimeUtils { private TimeUtils() { diff --git a/opentsdbreader/src/main/resources/plugin.json b/opentsdbreader/src/main/resources/plugin.json index 692a9853..5c9cbed9 100755 --- a/opentsdbreader/src/main/resources/plugin.json +++ b/opentsdbreader/src/main/resources/plugin.json @@ -6,5 +6,5 @@ "mechanism": "根据时间和 metric 直连底层 HBase 存储,从而 Scan 出符合条件的数据点", "warn": "指定起止时间会自动忽略分钟和秒,转为整点时刻,例如 2019-4-18 的 [3:35, 4:55) 会被转为 [3:00, 4:00)" }, - "developer": "Benedict Jin" + "developer": "alibaba" } diff --git a/oraclereader/pom.xml b/oraclereader/pom.xml index ae8e06fa..d60e5ebf 100755 --- a/oraclereader/pom.xml +++ b/oraclereader/pom.xml @@ -44,8 +44,6 @@ com.oracle ojdbc6 11.2.0.3 - system - ${basedir}/src/main/lib/ojdbc6-11.2.0.3.jar diff --git a/oraclereader/src/main/assembly/package.xml b/oraclereader/src/main/assembly/package.xml index a0c9fd1c..a954a30d 100755 --- a/oraclereader/src/main/assembly/package.xml +++ b/oraclereader/src/main/assembly/package.xml @@ -15,13 +15,6 @@ plugin_job_template.json plugin/reader/oraclereader - - - src/main/lib - - ojdbc6-11.2.0.3.jar - - plugin/reader/oraclereader/libs target/ diff --git a/oraclereader/src/main/lib/ojdbc6-11.2.0.3.jar b/oraclereader/src/main/lib/ojdbc6-11.2.0.3.jar deleted file mode 100644 index 01da074d..00000000 Binary files a/oraclereader/src/main/lib/ojdbc6-11.2.0.3.jar and /dev/null differ diff --git a/oraclewriter/pom.xml b/oraclewriter/pom.xml index 95b78caf..1e8d0274 100755 --- a/oraclewriter/pom.xml +++ b/oraclewriter/pom.xml @@ -42,8 +42,6 @@ com.oracle ojdbc6 11.2.0.3 - system - ${basedir}/src/main/lib/ojdbc6-11.2.0.3.jar diff --git a/oraclewriter/src/main/assembly/package.xml b/oraclewriter/src/main/assembly/package.xml index 09a25d1a..9dab0c8e 100755 --- a/oraclewriter/src/main/assembly/package.xml +++ b/oraclewriter/src/main/assembly/package.xml @@ -16,13 +16,6 @@ plugin/writer/oraclewriter - - src/main/lib - - ojdbc6-11.2.0.3.jar - - plugin/writer/oraclewriter/libs - target/ diff --git a/oraclewriter/src/main/lib/ojdbc6-11.2.0.3.jar b/oraclewriter/src/main/lib/ojdbc6-11.2.0.3.jar deleted file mode 100644 index 01da074d..00000000 Binary files a/oraclewriter/src/main/lib/ojdbc6-11.2.0.3.jar and /dev/null differ diff --git a/oscarwriter/pom.xml b/oscarwriter/pom.xml index 51643c76..06249a26 100644 --- a/oscarwriter/pom.xml +++ b/oscarwriter/pom.xml @@ -39,12 +39,18 @@ plugin-rdbms-util ${datax-project-version} - + + + + com.csicit.thirdparty + oscar + 1.0.1 diff --git a/oscarwriter/src/main/resources/plugin.json b/oscarwriter/src/main/resources/plugin.json index f1a99fec..43adfbfe 100644 --- a/oscarwriter/src/main/resources/plugin.json +++ b/oscarwriter/src/main/resources/plugin.json @@ -2,5 +2,5 @@ "name": "oscarwriter", "class": "com.alibaba.datax.plugin.writer.oscarwriter.OscarWriter", "description": "useScene: prod. mechanism: Jdbc connection using the database, execute insert sql. warn: The more you know about the database, the less problems you encounter.", - "developer": "linjiayu" + "developer": "alibaba" } \ No newline at end of file diff --git a/ossreader/pom.xml b/ossreader/pom.xml index 1feb42ff..d27b6a3d 100755 --- a/ossreader/pom.xml +++ b/ossreader/pom.xml @@ -11,6 +11,17 @@ jar + + org.apache.logging.log4j + log4j-api + 2.17.1 + + + + org.apache.logging.log4j + log4j-core + 2.17.1 + com.alibaba.datax datax-common @@ -43,13 +54,19 @@ com.aliyun.oss aliyun-sdk-oss - 2.2.3 + 3.4.2 junit junit test + + com.alibaba.datax + hdfsreader + 0.0.1-SNAPSHOT + compile + diff --git a/ossreader/src/main/java/com/alibaba/datax/plugin/reader/ossreader/Key.java b/ossreader/src/main/java/com/alibaba/datax/plugin/reader/ossreader/Key.java index e836fbbd..efa95343 100755 --- a/ossreader/src/main/java/com/alibaba/datax/plugin/reader/ossreader/Key.java +++ b/ossreader/src/main/java/com/alibaba/datax/plugin/reader/ossreader/Key.java @@ -18,4 +18,29 @@ public class Key { public static final String CNAME = "cname"; + public static final String SUCCESS_ON_NO_Object = "successOnNoObject"; + + public static final String PROXY_HOST = "proxyHost"; + + public static final String PROXY_PORT = "proxyPort"; + + public static final String PROXY_USERNAME = "proxyUsername"; + + public static final String PROXY_PASSWORD = "proxyPassword"; + + public static final String PROXY_DOMAIN = "proxyDomain"; + + public static final String PROXY_WORKSTATION = "proxyWorkstation"; + + public static final String HDOOP_CONFIG = "hadoopConfig"; + + public static final String FS_OSS_ACCESSID = "fs.oss.accessKeyId"; + + public static final String FS_OSS_ACCESSKEY = "fs.oss.accessKeySecret"; + + public static final String FS_OSS_ENDPOINT = "fs.oss.endpoint"; + + /*判断分片是否均匀的标准,是否有分片长度超出平均值的百分比*/ + public static final String BALANCE_THRESHOLD = "balanceThreshold"; + } diff --git a/ossreader/src/main/java/com/alibaba/datax/plugin/reader/ossreader/OssInputStream.java b/ossreader/src/main/java/com/alibaba/datax/plugin/reader/ossreader/OssInputStream.java new file mode 100644 index 00000000..a43146e7 --- /dev/null +++ b/ossreader/src/main/java/com/alibaba/datax/plugin/reader/ossreader/OssInputStream.java @@ -0,0 +1,132 @@ +package com.alibaba.datax.plugin.reader.ossreader; + +import com.alibaba.datax.common.exception.DataXException; +import com.alibaba.datax.common.util.RetryUtil; +import com.aliyun.oss.OSSClient; +import com.aliyun.oss.model.GetObjectRequest; +import com.aliyun.oss.model.OSSObject; +import org.apache.commons.io.IOUtils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.io.InputStream; +import java.util.concurrent.Callable; + +/** + * @Author: guxuan + * @Date 2022-05-17 15:52 + */ +public class OssInputStream extends InputStream { + + private final OSSClient ossClient; + private GetObjectRequest getObjectRequest; + + private long startIndex = 0; + private long endIndex = -1; + + private InputStream inputStream; + + /** + * retryTimes : 重试次数, 默认值是60次; + * description: 能够cover住的网络断连时间= retryTimes*(socket_timeout+sleepTime); + * 默认cover住的网络断连时间= 60*(5+5) = 600秒. + */ + private int retryTimes = 60; + + private static final Logger LOG = LoggerFactory.getLogger(OssInputStream.class); + + /** + * 如果start为0, end为1000, inputstream范围是[0,1000],共1001个字节 + * + * @param ossClient + * @param bucket + * @param object + * @param start inputstream start index + * @param end inputstream end index + */ + public OssInputStream(final OSSClient ossClient, final String bucket, final String object, long start, long end) { + this.ossClient = ossClient; + this.getObjectRequest = new GetObjectRequest(bucket, object); + this.startIndex = start; + this.getObjectRequest.setRange(this.startIndex, end); + this.endIndex = end; + try { + RetryUtil.executeWithRetry(new Callable() { + @Override + public Boolean call() throws Exception { + OSSObject ossObject = ossClient.getObject(getObjectRequest); + // 读取InputStream + inputStream = ossObject.getObjectContent(); + return true; + } + }, this.retryTimes, 5000, false); + } catch (Exception e) { + throw DataXException.asDataXException( + OssReaderErrorCode.RUNTIME_EXCEPTION,e.getMessage(), e); + } + } + + public OssInputStream(final OSSClient ossClient, final String bucket, final String object) { + this.ossClient = ossClient; + this.getObjectRequest = new GetObjectRequest(bucket, object); + this.getObjectRequest.setRange(startIndex, -1); + try { + RetryUtil.executeWithRetry(new Callable() { + @Override + public Boolean call() throws Exception { + OSSObject ossObject = ossClient.getObject(getObjectRequest); + // 读取InputStream + inputStream = ossObject.getObjectContent(); + return true; + } + }, this.retryTimes, 5000, false); + } catch (Exception e) { + throw DataXException.asDataXException( + OssReaderErrorCode.RUNTIME_EXCEPTION, e.getMessage(), e); + } + } + + @Override + public int read() throws IOException { + int cbyte; + try { + cbyte = RetryUtil.executeWithRetry(new Callable() { + @Override + public Integer call() throws Exception { + try { + int c = inputStream.read(); + startIndex++; + return c; + } catch (Exception e) { + LOG.warn(e.getMessage(),e); + /** + * 必须将inputStream先关闭, 否则会造成连接泄漏 + */ + IOUtils.closeQuietly(inputStream); + // getOssRangeInuptStream时,如果网络不连通,则会抛出异常,RetryUtil捕获异常进行重试 + inputStream = getOssRangeInuptStream(startIndex); + int c = inputStream.read(); + startIndex++; + return c; + } + } + }, this.retryTimes,5000, false); + return cbyte; + } catch (Exception e) { + throw DataXException.asDataXException( + OssReaderErrorCode.RUNTIME_EXCEPTION, e.getMessage(), e); + } + } + + private InputStream getOssRangeInuptStream(final long startIndex) { + LOG.info("Start to retry reading [inputStream] from Byte {}", startIndex); + // 第二个参数值设为-1,表示不设置结束的字节位置,读取startIndex及其以后的所有数据 + getObjectRequest.setRange(startIndex, this.endIndex); + // 范围下载 + OSSObject ossObject = ossClient.getObject(getObjectRequest); + // 读取InputStream + LOG.info("Start to retry reading [inputStream] from Byte {}", startIndex); + return ossObject.getObjectContent(); + } +} diff --git a/ossreader/src/main/java/com/alibaba/datax/plugin/reader/ossreader/OssReader.java b/ossreader/src/main/java/com/alibaba/datax/plugin/reader/ossreader/OssReader.java index ce4f0875..9b76c53e 100755 --- a/ossreader/src/main/java/com/alibaba/datax/plugin/reader/ossreader/OssReader.java +++ b/ossreader/src/main/java/com/alibaba/datax/plugin/reader/ossreader/OssReader.java @@ -4,33 +4,37 @@ import com.alibaba.datax.common.exception.DataXException; import com.alibaba.datax.common.plugin.RecordSender; import com.alibaba.datax.common.spi.Reader; import com.alibaba.datax.common.util.Configuration; +import com.alibaba.datax.plugin.reader.hdfsreader.HdfsReader; +import com.alibaba.datax.plugin.reader.ossreader.util.HdfsParquetUtil; +import com.alibaba.datax.plugin.reader.ossreader.util.OssSplitUtil; import com.alibaba.datax.plugin.reader.ossreader.util.OssUtil; +import com.alibaba.datax.plugin.unstructuredstorage.FileFormat; import com.alibaba.datax.plugin.unstructuredstorage.reader.UnstructuredStorageReaderUtil; +import com.alibaba.datax.plugin.unstructuredstorage.reader.binaryFileUtil.BinaryFileReaderUtil; +import com.alibaba.datax.plugin.unstructuredstorage.reader.split.StartEndPair; +import com.alibaba.fastjson2.JSON; +import com.alibaba.fastjson2.TypeReference; import com.aliyun.oss.ClientException; import com.aliyun.oss.OSSClient; import com.aliyun.oss.OSSException; import com.aliyun.oss.model.ListObjectsRequest; -import com.aliyun.oss.model.OSSObject; import com.aliyun.oss.model.OSSObjectSummary; import com.aliyun.oss.model.ObjectListing; -import com.google.common.collect.Sets; -import org.apache.commons.io.Charsets; +import com.aliyun.oss.model.ObjectMetadata; +import org.apache.commons.lang3.tuple.MutablePair; +import org.apache.commons.lang3.tuple.Pair; import org.apache.commons.io.IOUtils; import org.apache.commons.lang3.StringUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.io.InputStream; -import java.nio.charset.UnsupportedCharsetException; import java.util.ArrayList; import java.util.List; -import java.util.Set; +import java.util.Locale; import java.util.regex.Pattern; -/** - * Created by mengxin.liumx on 2014/12/7. - */ public class OssReader extends Reader { public static class Job extends Reader.Job { private static final Logger LOG = LoggerFactory @@ -38,194 +42,204 @@ public class OssReader extends Reader { private Configuration readerOriginConfig = null; + private OSSClient ossClient = null; + private String endpoint; + private String accessId; + private String accessKey; + private String bucket; + private boolean successOnNoObject; + private Boolean isBinaryFile; + + private List objects; + private List> objectSizePairs; /*用于任务切分的依据*/ + + private String fileFormat; + + private HdfsReader.Job hdfsReaderJob; + private boolean useHdfsReaderProxy = false; + @Override public void init() { LOG.debug("init() begin..."); this.readerOriginConfig = this.getPluginJobConf(); + this.basicValidateParameter(); + this.fileFormat = this.readerOriginConfig.getString(com.alibaba.datax.plugin.unstructuredstorage.reader.Key.FILE_FORMAT, + com.alibaba.datax.plugin.unstructuredstorage.reader.Constant.DEFAULT_FILE_FORMAT); + this.useHdfsReaderProxy = HdfsParquetUtil.isUseHdfsWriterProxy(this.fileFormat); + if(useHdfsReaderProxy){ + HdfsParquetUtil.adaptConfiguration(this.readerOriginConfig); + this.hdfsReaderJob = new HdfsReader.Job(); + this.hdfsReaderJob.setJobPluginCollector(this.getJobPluginCollector()); + this.hdfsReaderJob.setPeerPluginJobConf(this.getPeerPluginJobConf()); + this.hdfsReaderJob.setPeerPluginName(this.getPeerPluginName()); + this.hdfsReaderJob.setPluginJobConf(this.getPluginJobConf()); + this.hdfsReaderJob.init(); + return; + } + + this.isBinaryFile = FileFormat.getFileFormatByConfiguration(this.readerOriginConfig).isBinary(); this.validate(); + UnstructuredStorageReaderUtil.validateCsvReaderConfig(this.readerOriginConfig); + this.successOnNoObject = this.readerOriginConfig.getBool( + Key.SUCCESS_ON_NO_Object, false); LOG.debug("init() ok and end..."); } - private void validate() { - String endpoint = this.readerOriginConfig.getString(Key.ENDPOINT); + + private void basicValidateParameter(){ + endpoint = this.readerOriginConfig.getString(Key.ENDPOINT); if (StringUtils.isBlank(endpoint)) { throw DataXException.asDataXException( - OssReaderErrorCode.CONFIG_INVALID_EXCEPTION, - "您需要指定 endpoint"); + OssReaderErrorCode.CONFIG_INVALID_EXCEPTION,"invalid endpoint"); } - String accessId = this.readerOriginConfig.getString(Key.ACCESSID); + accessId = this.readerOriginConfig.getString(Key.ACCESSID); if (StringUtils.isBlank(accessId)) { throw DataXException.asDataXException( OssReaderErrorCode.CONFIG_INVALID_EXCEPTION, - "您需要指定 accessId"); + "invalid accessId"); } - String accessKey = this.readerOriginConfig.getString(Key.ACCESSKEY); + accessKey = this.readerOriginConfig.getString(Key.ACCESSKEY); if (StringUtils.isBlank(accessKey)) { throw DataXException.asDataXException( OssReaderErrorCode.CONFIG_INVALID_EXCEPTION, - "您需要指定 accessKey"); + "invalid accessKey"); } + } + // warn: 提前验证endpoint,accessId,accessKey,bucket,object的有效性 + private void validate() { + // fxxk + // ossClient = new OSSClient(endpoint,accessId,accessKey); + ossClient = OssUtil.initOssClient(this.readerOriginConfig); - String bucket = this.readerOriginConfig.getString(Key.BUCKET); + + bucket = this.readerOriginConfig.getString(Key.BUCKET); if (StringUtils.isBlank(bucket)) { throw DataXException.asDataXException( OssReaderErrorCode.CONFIG_INVALID_EXCEPTION, - "您需要指定 endpoint"); + "invalid bucket"); + }else if(!ossClient.doesBucketExist(bucket)){ + throw DataXException.asDataXException( + OssReaderErrorCode.CONFIG_INVALID_EXCEPTION, + "invalid bucket"); } String object = this.readerOriginConfig.getString(Key.OBJECT); if (StringUtils.isBlank(object)) { throw DataXException.asDataXException( OssReaderErrorCode.CONFIG_INVALID_EXCEPTION, - "您需要指定 object"); + "invalid object"); } - String fieldDelimiter = this.readerOriginConfig - .getString(com.alibaba.datax.plugin.unstructuredstorage.reader.Key.FIELD_DELIMITER); - // warn: need length 1 - if (null == fieldDelimiter || fieldDelimiter.length() == 0) { - throw DataXException.asDataXException( - OssReaderErrorCode.CONFIG_INVALID_EXCEPTION, - "您需要指定 fieldDelimiter"); - } - - String encoding = this.readerOriginConfig - .getString( - com.alibaba.datax.plugin.unstructuredstorage.reader.Key.ENCODING, - com.alibaba.datax.plugin.unstructuredstorage.reader.Constant.DEFAULT_ENCODING); - try { - Charsets.toCharset(encoding); - } catch (UnsupportedCharsetException uce) { - throw DataXException.asDataXException( - OssReaderErrorCode.ILLEGAL_VALUE, - String.format("不支持的编码格式 : [%s]", encoding), uce); - } catch (Exception e) { - throw DataXException.asDataXException( - OssReaderErrorCode.ILLEGAL_VALUE, - String.format("运行配置异常 : %s", e.getMessage()), e); - } - - // 检测是column 是否为 ["*"] 若是则填为空 - List column = this.readerOriginConfig - .getListConfiguration(com.alibaba.datax.plugin.unstructuredstorage.reader.Key.COLUMN); - if (null != column - && 1 == column.size() - && ("\"*\"".equals(column.get(0).toString()) || "'*'" - .equals(column.get(0).toString()))) { - readerOriginConfig - .set(com.alibaba.datax.plugin.unstructuredstorage.reader.Key.COLUMN, - new ArrayList()); - } else { - // column: 1. index type 2.value type 3.when type is Data, may - // have - // format - List columns = this.readerOriginConfig - .getListConfiguration(com.alibaba.datax.plugin.unstructuredstorage.reader.Key.COLUMN); - - if (null == columns || columns.size() == 0) { - throw DataXException.asDataXException( - OssReaderErrorCode.CONFIG_INVALID_EXCEPTION, - "您需要指定 columns"); - } - - if (null != columns && columns.size() != 0) { - for (Configuration eachColumnConf : columns) { - eachColumnConf - .getNecessaryValue( - com.alibaba.datax.plugin.unstructuredstorage.reader.Key.TYPE, - OssReaderErrorCode.REQUIRED_VALUE); - Integer columnIndex = eachColumnConf - .getInt(com.alibaba.datax.plugin.unstructuredstorage.reader.Key.INDEX); - String columnValue = eachColumnConf - .getString(com.alibaba.datax.plugin.unstructuredstorage.reader.Key.VALUE); - - if (null == columnIndex && null == columnValue) { - throw DataXException.asDataXException( - OssReaderErrorCode.NO_INDEX_VALUE, - "由于您配置了type, 则至少需要配置 index 或 value"); - } - - if (null != columnIndex && null != columnValue) { - throw DataXException.asDataXException( - OssReaderErrorCode.MIXED_INDEX_VALUE, - "您混合配置了index, value, 每一列同时仅能选择其中一种"); - } - - } - } - } - - // only support compress: gzip,bzip2,zip - String compress = this.readerOriginConfig - .getString(com.alibaba.datax.plugin.unstructuredstorage.reader.Key.COMPRESS); - if (StringUtils.isBlank(compress)) { - this.readerOriginConfig - .set(com.alibaba.datax.plugin.unstructuredstorage.reader.Key.COMPRESS, - null); - } else { - Set supportedCompress = Sets - .newHashSet("gzip", "bzip2", "zip"); - compress = compress.toLowerCase().trim(); - if (!supportedCompress.contains(compress)) { - throw DataXException - .asDataXException( - OssReaderErrorCode.ILLEGAL_VALUE, - String.format( - "仅支持 gzip, bzip2, zip 文件压缩格式 , 不支持您配置的文件压缩格式: [%s]", - compress)); - } - this.readerOriginConfig - .set(com.alibaba.datax.plugin.unstructuredstorage.reader.Key.COMPRESS, - compress); + if (this.isBinaryFile){ + return; } + UnstructuredStorageReaderUtil.validateParameter(this.readerOriginConfig); } + + @Override public void prepare() { - LOG.debug("prepare()"); + if(useHdfsReaderProxy){ + this.hdfsReaderJob.prepare(); + return; + } + // 将每个单独的 object 作为一个 slice + this.objectSizePairs = parseOriginObjectSizePairs(readerOriginConfig.getList(Key.OBJECT, String.class)); + this.objects = parseOriginObjects(readerOriginConfig.getList(Key.OBJECT, String.class)); + UnstructuredStorageReaderUtil.setSourceFileName(readerOriginConfig, this.objects); + UnstructuredStorageReaderUtil.setSourceFile(readerOriginConfig, this.objects); } @Override public void post() { + if(useHdfsReaderProxy){ + this.hdfsReaderJob.post(); + return; + } LOG.debug("post()"); } @Override public void destroy() { + if(useHdfsReaderProxy){ + this.hdfsReaderJob.destroy(); + return; + } LOG.debug("destroy()"); } @Override public List split(int adviceNumber) { LOG.debug("split() begin..."); - List readerSplitConfigs = new ArrayList(); + if(useHdfsReaderProxy){ + return hdfsReaderJob.split(adviceNumber); + } + List readerSplitConfigs; - // 将每个单独的 object 作为一个 slice - List objects = parseOriginObjects(readerOriginConfig - .getList(Constant.OBJECT, String.class)); - if (0 == objects.size()) { + if (0 == objects.size() && this.successOnNoObject) { + readerSplitConfigs = new ArrayList(); + Configuration splitedConfig = this.readerOriginConfig.clone(); + splitedConfig.set(com.alibaba.datax.plugin.unstructuredstorage.reader.Key.SPLIT_SLICE_CONFIG, null); + readerSplitConfigs.add(splitedConfig); + LOG.info(String.format("no OSS object to be read")); + LOG.debug("split() ok and end..."); + return readerSplitConfigs; + }else if (0 == objects.size()) { throw DataXException.asDataXException( OssReaderErrorCode.EMPTY_BUCKET_EXCEPTION, - String.format( - "未能找到待读取的Object,请确认您的配置项bucket: %s object: %s", + String.format("Unable to find the object to read. Please confirm your configured item [bucket]: %s object: %s", this.readerOriginConfig.get(Key.BUCKET), this.readerOriginConfig.get(Key.OBJECT))); } - for (String object : objects) { - Configuration splitedConfig = this.readerOriginConfig.clone(); - splitedConfig.set(Constant.OBJECT, object); - readerSplitConfigs.add(splitedConfig); - LOG.info(String.format("OSS object to be read:%s", object)); + /** + * 当文件类型是text纯文本文件,并且不是压缩的情况下, + * 可以对纯文本文件进行内部切分实现并发读取, 如果用户不希望对文件拆分, 可以指定fileFormat为csv + * + * 注意:这里判断文件是否为text以及是否压缩,信息都是通过任务配置项来获取的 + * + * 这里抽出一个方法来判断是否需要分片 + * */ + OssSplitUtil ossFileSplit = new OssSplitUtil(this.ossClient, this.bucket); + long t1 = System.currentTimeMillis(); + readerSplitConfigs = ossFileSplit.getSplitedConfigurations(this.readerOriginConfig, this.objectSizePairs, + adviceNumber); + long t2 = System.currentTimeMillis(); + LOG.info("all split done, cost {}ms", t2 - t1); + /** + * 在日志中告知用户,为什么实际datax切分跑的channel数会小于用户配置的channel数 + * 注意:这里的报告的原因不准确,报的原因是一个文件一个task,所以最终切分数小于用户配置数,实际原因还有单文件切分时, + * 单文件的大小太小(理论64M一个block),导致问题比较少 + */ + if(readerSplitConfigs.size() < adviceNumber){ + LOG.info("[Note]: During OSSReader data synchronization, one file can only be synchronized in one task. You want to synchronize {} files " + + "and the number is less than the number of channels you configured: {}. " + + "Therefore, please take note that DataX will actually have {} sub-tasks, that is, the actual concurrent channels = {}", + objects.size(), adviceNumber, objects.size(), objects.size()); } - LOG.debug("split() ok and end..."); + LOG.info("split() ok and end..."); return readerSplitConfigs; } private List parseOriginObjects(List originObjects) { - List parsedObjects = new ArrayList(); + List objList = new ArrayList<>(); + + if (this.objectSizePairs == null) { + this.objectSizePairs = parseOriginObjectSizePairs(originObjects); + } + + for (Pair objSizePair : this.objectSizePairs) { + objList.add(objSizePair.getKey()); + } + + return objList; + } + + private List> parseOriginObjectSizePairs(List originObjects) { + List> parsedObjectSizePaires = new ArrayList>(); for (String object : originObjects) { int firstMetaChar = (object.indexOf('*') > object.indexOf('?')) ? object @@ -236,52 +250,130 @@ public class OssReader extends Reader { IOUtils.DIR_SEPARATOR, firstMetaChar); String parentDir = object .substring(0, lastDirSeparator + 1); - List remoteObjects = getRemoteObjects(parentDir); + List> allRemoteObjectSizePairs = getAllRemoteObjectsKeyAndSizeInDir(parentDir); Pattern pattern = Pattern.compile(object.replace("*", ".*") .replace("?", ".?")); - for (String remoteObject : remoteObjects) { - if (pattern.matcher(remoteObject).matches()) { - parsedObjects.add(remoteObject); + for (Pair remoteObjectSizePair : allRemoteObjectSizePairs) { + if (pattern.matcher(remoteObjectSizePair.getKey()).matches()) { + parsedObjectSizePaires.add(remoteObjectSizePair); + LOG.info(String + .format("add object [%s] as a candidate to be read.", + remoteObjectSizePair.getKey())); } } } else { - parsedObjects.add(object); + // 如果没有配正则匹配,那么需要对用户自己配置的object存在性进行检测 + try{ + ossClient.getObject(bucket, object); + ObjectMetadata objMeta = ossClient.getObjectMetadata(bucket, object); + parsedObjectSizePaires.add(new MutablePair(object, objMeta.getContentLength() <= OssSplitUtil.SINGLE_FILE_SPLIT_THRESHOLD_IN_SIZE ? -1L : objMeta.getContentLength())); + LOG.info(String.format( + "add object [%s] as a candidate to be read.", + object)); + }catch (Exception e){ + trackOssDetailException(e, object); + } } } - return parsedObjects; + return parsedObjectSizePaires; } - private List getRemoteObjects(String parentDir) - throws OSSException, ClientException { + // 对oss配置异常信息进行细分定位 + private void trackOssDetailException(Exception e, String object){ + // 对异常信息进行细分定位 + String errorMessage = e.getMessage(); + if(StringUtils.isNotBlank(errorMessage)){ + if(errorMessage.contains("UnknownHost")){ + // endPoint配置错误 + throw DataXException.asDataXException( + OssReaderErrorCode.CONFIG_INVALID_EXCEPTION, + "The endpoint you configured is not correct. Please check the endpoint configuration", e); + }else if(errorMessage.contains("InvalidAccessKeyId")){ + // accessId配置错误 + throw DataXException.asDataXException( + OssReaderErrorCode.CONFIG_INVALID_EXCEPTION, + "The accessId you configured is not correct. Please check the accessId configuration", e); + }else if(errorMessage.contains("SignatureDoesNotMatch")){ + // accessKey配置错误 + throw DataXException.asDataXException( + OssReaderErrorCode.CONFIG_INVALID_EXCEPTION, + "The accessKey you configured is not correct. Please check the accessId configuration", e); + }else if(errorMessage.contains("NoSuchKey")){ + if (e instanceof OSSException) { + OSSException ossException = (OSSException) e; + if ("NoSuchKey".equalsIgnoreCase(ossException + .getErrorCode()) && this.successOnNoObject) { + LOG.warn(String.format("oss file %s is not exits to read:", object), e); + return; + } + } + // object配置错误 + throw DataXException.asDataXException( + OssReaderErrorCode.CONFIG_INVALID_EXCEPTION, + "The object you configured is not correct. Please check the accessId configuration"); + }else{ + // 其他错误 + throw DataXException.asDataXException( + OssReaderErrorCode.CONFIG_INVALID_EXCEPTION, + String.format("Please check whether the configuration of [endpoint], [accessId], [accessKey], [bucket], and [object] are correct. Error reason: %s",e.getMessage()), e); + } + }else{ + throw DataXException.asDataXException( + OssReaderErrorCode.CONFIG_INVALID_EXCEPTION, + "The configured json is invalid", e); + } + } - LOG.debug(String.format("父文件夹 : %s", parentDir)); + private List> getAllRemoteObjectsKeyAndSizeInDir(String parentDir) + throws OSSException, ClientException{ + List> objectSizePairs = new ArrayList>(); + List objectListings = getRemoteObjectListings(parentDir); + + if (objectListings.size() == 0) { + return objectSizePairs; + } + + for (ObjectListing objectList : objectListings){ + for (OSSObjectSummary objectSummary : objectList.getObjectSummaries()) { + Pair objNameSize = new MutablePair(objectSummary.getKey(), objectSummary.getSize() <= OssSplitUtil.SINGLE_FILE_SPLIT_THRESHOLD_IN_SIZE ? -1L : objectSummary.getSize()); + objectSizePairs.add(objNameSize); + } + } + + return objectSizePairs; + } + + private List getRemoteObjectListings(String parentDir) throws OSSException, ClientException { + + List remoteObjectListings = new ArrayList(); + + LOG.debug("Parent folder: {}", parentDir); List remoteObjects = new ArrayList(); OSSClient client = OssUtil.initOssClient(readerOriginConfig); + try { ListObjectsRequest listObjectsRequest = new ListObjectsRequest( readerOriginConfig.getString(Key.BUCKET)); listObjectsRequest.setPrefix(parentDir); - ObjectListing objectList; + ObjectListing remoteObjectList; do { - objectList = client.listObjects(listObjectsRequest); - for (OSSObjectSummary objectSummary : objectList - .getObjectSummaries()) { - LOG.debug(String.format("找到文件 : %s", - objectSummary.getKey())); - remoteObjects.add(objectSummary.getKey()); + remoteObjectList = client.listObjects(listObjectsRequest); + if (null != remoteObjectList) { + LOG.info("ListObjects prefix: {} requestId: {}", remoteObjectList.getPrefix(), remoteObjectList.getRequestId()); + } else { + LOG.info("ListObjectsRequest get null"); } - listObjectsRequest.setMarker(objectList.getNextMarker()); + remoteObjectListings.add(remoteObjectList); + listObjectsRequest.setMarker(remoteObjectList.getNextMarker()); LOG.debug(listObjectsRequest.getMarker()); - LOG.debug(String.valueOf(objectList.isTruncated())); - - } while (objectList.isTruncated()); - } catch (IllegalArgumentException e) { - throw DataXException.asDataXException( - OssReaderErrorCode.OSS_EXCEPTION, e.getMessage()); + LOG.debug(String.valueOf(remoteObjectList.isTruncated())); + } while (remoteObjectList.isTruncated()); + } catch (Exception e) { + trackOssDetailException(e, null); } - return remoteObjects; + return remoteObjectListings; } } @@ -289,30 +381,116 @@ public class OssReader extends Reader { private static Logger LOG = LoggerFactory.getLogger(Reader.Task.class); private Configuration readerSliceConfig; - - @Override - public void startRead(RecordSender recordSender) { - LOG.debug("read start"); - String object = readerSliceConfig.getString(Key.OBJECT); - OSSClient client = OssUtil.initOssClient(readerSliceConfig); - - OSSObject ossObject = client.getObject( - readerSliceConfig.getString(Key.BUCKET), object); - InputStream objectStream = ossObject.getObjectContent(); - UnstructuredStorageReaderUtil.readFromStream(objectStream, object, - this.readerSliceConfig, recordSender, - this.getTaskPluginCollector()); - recordSender.flush(); - } + private Boolean isBinaryFile; + private Integer blockSizeInByte; + private List allWorksForTask; + private boolean originSkipHeader; + private OSSClient ossClient; + private String fileFormat; + private HdfsReader.Task hdfsReaderTask; + private boolean useHdfsReaderProxy = false; @Override public void init() { this.readerSliceConfig = this.getPluginJobConf(); + this.fileFormat = this.readerSliceConfig.getString(com.alibaba.datax.plugin.unstructuredstorage.reader.Key.FILE_FORMAT, + com.alibaba.datax.plugin.unstructuredstorage.reader.Constant.DEFAULT_FILE_FORMAT); + this.useHdfsReaderProxy = HdfsParquetUtil.isUseHdfsWriterProxy(this.fileFormat); + if(useHdfsReaderProxy){ + this.hdfsReaderTask = new HdfsReader.Task(); + this.hdfsReaderTask.setPeerPluginJobConf(this.getPeerPluginJobConf()); + this.hdfsReaderTask.setPeerPluginName(this.getPeerPluginName()); + this.hdfsReaderTask.setPluginJobConf(this.getPluginJobConf()); + this.hdfsReaderTask.setReaderPluginSplitConf(this.getReaderPluginSplitConf()); + this.hdfsReaderTask.setTaskGroupId(this.getTaskGroupId()); + this.hdfsReaderTask.setTaskId(this.getTaskId()); + this.hdfsReaderTask.setTaskPluginCollector(this.getTaskPluginCollector()); + this.hdfsReaderTask.init(); + return; + } + String allWorksForTaskStr = this.readerSliceConfig + .getString(com.alibaba.datax.plugin.unstructuredstorage.reader.Key.SPLIT_SLICE_CONFIG); + if (StringUtils.isBlank(allWorksForTaskStr)) { + allWorksForTaskStr = "[]"; + } + this.allWorksForTask = JSON.parseObject(allWorksForTaskStr, new TypeReference>() { + }); + this.isBinaryFile = FileFormat.getFileFormatByConfiguration(this.readerSliceConfig).isBinary(); + this.blockSizeInByte = this.readerSliceConfig.getInt( + com.alibaba.datax.plugin.unstructuredstorage.reader.Key.BLOCK_SIZE_IN_BYTE, + com.alibaba.datax.plugin.unstructuredstorage.reader.Constant.DEFAULT_BLOCK_SIZE_IN_BYTE); + this.originSkipHeader = this.readerSliceConfig + .getBool(com.alibaba.datax.plugin.unstructuredstorage.reader.Key.SKIP_HEADER, false); + } + + @Override + public void prepare() { + LOG.info("task prepare() begin..."); + if(useHdfsReaderProxy){ + this.hdfsReaderTask.prepare(); + return; + } + } + + + @Override + public void startRead(RecordSender recordSender) { + if(useHdfsReaderProxy){ + this.hdfsReaderTask.startRead(recordSender); + return; + } + boolean successOnNoObject = this.readerSliceConfig.getBool(Key.SUCCESS_ON_NO_Object, false); + if (this.allWorksForTask.isEmpty() && successOnNoObject) { + recordSender.flush(); + return; + } + String bucket = this.readerSliceConfig.getString(Key.BUCKET); + this.ossClient = OssUtil.initOssClient(this.readerSliceConfig); + for (StartEndPair eachSlice : this.allWorksForTask) { + String object = eachSlice.getFilePath(); + Long start = eachSlice.getStart(); + Long end = eachSlice.getEnd(); + LOG.info(String.format("read bucket=[%s] object=[%s], range: [start=%s, end=%s] start...", bucket, + object, start, end)); + InputStream ossInputStream = new OssInputStream(ossClient, bucket, object, start, end); + // 检查是否要跳过表头, 防止重复跳过首行 + Boolean skipHeaderValue = this.originSkipHeader && (0L == start); + this.readerSliceConfig.set(com.alibaba.datax.plugin.unstructuredstorage.reader.Key.SKIP_HEADER, + skipHeaderValue); + try { + if (!this.isBinaryFile) { + UnstructuredStorageReaderUtil.readFromStream(ossInputStream, object, this.readerSliceConfig, + recordSender, this.getTaskPluginCollector()); + } else { + BinaryFileReaderUtil.readFromStream(ossInputStream, object, recordSender, this.blockSizeInByte); + } + } finally { + IOUtils.closeQuietly(ossInputStream); + } + } + recordSender.flush(); + } + + @Override + public void post() { + LOG.info("task post() begin..."); + if(useHdfsReaderProxy){ + this.hdfsReaderTask.post(); + return; + } } @Override public void destroy() { - + if(useHdfsReaderProxy){ + this.hdfsReaderTask.destroy(); + return; + } + try { + // this.ossClient.shutdown(); + } catch (Exception e) { + LOG.warn("shutdown ossclient meet a exception:" + e.getMessage(), e); + } } } } diff --git a/ossreader/src/main/java/com/alibaba/datax/plugin/reader/ossreader/util/HdfsParquetUtil.java b/ossreader/src/main/java/com/alibaba/datax/plugin/reader/ossreader/util/HdfsParquetUtil.java new file mode 100644 index 00000000..3012c84a --- /dev/null +++ b/ossreader/src/main/java/com/alibaba/datax/plugin/reader/ossreader/util/HdfsParquetUtil.java @@ -0,0 +1,40 @@ +package com.alibaba.datax.plugin.reader.ossreader.util; + +import com.alibaba.datax.common.util.Configuration; +import com.alibaba.datax.plugin.reader.ossreader.Key; +import com.alibaba.fastjson2.JSON; +import com.alibaba.fastjson2.JSONObject; + +/** + * @Author: guxuan + * @Date 2022-05-17 15:46 + */ +public class HdfsParquetUtil { + public static boolean isUseHdfsWriterProxy( String fileFormat){ + if("orc".equalsIgnoreCase(fileFormat) || "parquet".equalsIgnoreCase(fileFormat)){ + return true; + } + return false; + } + + /** + * 配置readerOriginConfig 适配hdfsreader读取oss parquet + * https://help.aliyun.com/knowledge_detail/74344.html + * @param readerOriginConfig + */ + public static void adaptConfiguration(Configuration readerOriginConfig){ + String bucket = readerOriginConfig.getString(Key.BUCKET); + String fs =String.format("oss://%s",bucket); + readerOriginConfig.set(com.alibaba.datax.plugin.reader.hdfsreader.Key.DEFAULT_FS,fs); + readerOriginConfig.set(com.alibaba.datax.plugin.reader.hdfsreader.Key.FILETYPE, + readerOriginConfig.getString(com.alibaba.datax.plugin.unstructuredstorage.writer.Key.FILE_FORMAT)); + /** + * "path"、 "column" 相互一致 + */ + JSONObject hadoopConfig = new JSONObject(); + hadoopConfig.put(Key.FS_OSS_ACCESSID,readerOriginConfig.getString(Key.ACCESSID)); + hadoopConfig.put(Key.FS_OSS_ACCESSKEY,readerOriginConfig.getString(Key.ACCESSKEY)); + hadoopConfig.put(Key.FS_OSS_ENDPOINT,readerOriginConfig.getString(Key.ENDPOINT)); + readerOriginConfig.set(Key.HDOOP_CONFIG,Configuration.from(JSON.toJSONString(hadoopConfig))); + } +} diff --git a/ossreader/src/main/java/com/alibaba/datax/plugin/reader/ossreader/util/OssSplitUtil.java b/ossreader/src/main/java/com/alibaba/datax/plugin/reader/ossreader/util/OssSplitUtil.java new file mode 100644 index 00000000..6ba80999 --- /dev/null +++ b/ossreader/src/main/java/com/alibaba/datax/plugin/reader/ossreader/util/OssSplitUtil.java @@ -0,0 +1,485 @@ +package com.alibaba.datax.plugin.reader.ossreader.util; + +import com.alibaba.datax.common.exception.DataXException; +import com.alibaba.datax.common.util.Configuration; +import com.alibaba.datax.plugin.reader.ossreader.OssInputStream; +import com.alibaba.datax.plugin.unstructuredstorage.reader.Key; +import com.alibaba.datax.plugin.unstructuredstorage.reader.UnstructuredStorageReaderErrorCode; +import com.alibaba.datax.plugin.unstructuredstorage.reader.split.StartEndPair; +import com.alibaba.datax.plugin.unstructuredstorage.reader.split.UnstructuredSplitUtil; +import com.alibaba.fastjson2.JSONArray; +import com.alibaba.fastjson2.JSONObject; +import com.aliyun.oss.OSSClient; +import com.aliyun.oss.model.GetObjectRequest; +import com.aliyun.oss.model.OSSObject; +import org.apache.commons.lang3.StringUtils; +import org.apache.commons.lang3.tuple.Pair; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.io.InputStream; +import java.util.ArrayList; +import java.util.Comparator; +import java.util.List; + +/** + * @Author: guxuan + * @Date 2022-05-17 15:48 + */ +public class OssSplitUtil extends UnstructuredSplitUtil { + + private static final Logger LOG = LoggerFactory.getLogger(UnstructuredSplitUtil.class); + public static final Long SINGLE_FILE_SPLIT_THRESHOLD_IN_SIZE = 64 * 1024 * 1024L; // 小于 1MB 的文件不做内部切分 + private OSSClient ossClient; + private String bucketName; + private Double balanceThreshold; + private Long avgLen = -1L; + private Integer splitGroupNum = -1; + + public OssSplitUtil(OSSClient ossClient, String bucketName) { + super(false); + this.ossClient = ossClient; + this.bucketName = bucketName; + } + + @Override + public Long getFileTotalLength(String filePath) { + // 获取object字节总数 + GetObjectRequest getObjectRequest = new GetObjectRequest(this.bucketName, filePath); + OSSObject ossObject = this.ossClient.getObject(getObjectRequest); + return ossObject.getObjectMetadata().getContentLength(); + } + + @Override + public InputStream getFileInputStream(StartEndPair startEndPair) { + InputStream inputStream = new OssInputStream(this.ossClient, this.bucketName, startEndPair.getFilePath(), + startEndPair.getStart(), startEndPair.getEnd()); + return inputStream; + } + + private Boolean canSplitSingleFile(Configuration jobConfig) { + Boolean enableInnerSplit = jobConfig.getBool(com.alibaba.datax.plugin.unstructuredstorage.reader.Key.ENABLE_INNER_SPLIT, true); + if (!enableInnerSplit) { + return false; + } + + // 默认不切分 + String fileFormat = jobConfig.getString(com.alibaba.datax.plugin.unstructuredstorage.reader.Key.FILE_FORMAT, + com.alibaba.datax.plugin.unstructuredstorage.reader.Constant.DEFAULT_FILE_FORMAT); + String compressType = jobConfig.getString(com.alibaba.datax.plugin.unstructuredstorage.reader.Key.COMPRESS); + + // 如果不满足"是text格式且非压缩文件",则直接返回false + if (! StringUtils.equalsIgnoreCase(fileFormat, com.alibaba.datax.plugin.unstructuredstorage.reader.Constant.FILE_FORMAT_TEXT) || + ! StringUtils.isBlank(compressType)) { + return false; + } + + // todo: 判断文件是否为软连接文件,如果为软连接文件,则不支持内部切分 + + return true; + } + + private boolean isGroupsBalance(List groups) { + assert (groups != null); + + if(groups.size() <= 1) { + return true; + } + + double avg = (double) this.avgLen * (1.0 + this.balanceThreshold/100); + for (Group group : groups) { + if(group.getFilledLenght() > avg) { + return false; + } + } + return true; + } + + /* + * 把 allObjectKeySizePares 分成 N 组,尽量使得各组中文件 size 之和 近似 + * */ + private List splitObjectToGroups(List> allObjKeySizePares, Integer N) { + List groups; + + // 若文件数 <= N,则每个文件分一个组 + if(allObjKeySizePares.size() <= N) { + groups = new ArrayList<>(); + int index = 0; + for (Pair pair : allObjKeySizePares) { + // capacity 初始化为avgLen + Group group = new Group(avgLen); + FileBlock fileBlock = new FileBlock(pair.getKey(), 0L, pair.getValue() - 1); + group.fill(fileBlock); + groups.add(group); + } + + // 文件不足N,则以空group补全 + for (int i = groups.size(); i < N; i++) { + groups.add(new Group(avgLen)); + } + + return groups; + } + + //文件数量 > N + //对 allObjKeySizePairs 按照 size 从大到小排序 + allObjKeySizePares.sort(new Comparator>() { + @Override + public int compare(Pair o1, Pair o2) { + if (o1.getValue().compareTo(o2.getValue()) < 0) { + return 1; + } + if (o1.getValue().equals(o2.getValue())) { + return 0; + } + return -1; + } + }); + + groups = new ArrayList<>(N); + + for (int i = 0; i < N; i++) { + Group group = new Group(avgLen); + groups.add(group); + } + + for (Pair pair : allObjKeySizePares) { + FileBlock fileBlock = new FileBlock(pair.getKey(), 0L, pair.getValue() - 1); + + // 对于avgLen < 0 的极端情况,直接将文件按照数量均分到各个group + if (avgLen > 0 && pair.getValue() >= avgLen) { + // 若果文件size > avgLen,则独立成组(放在一个空的group中 + for (int index = 0; index < N; index++) { + if (groups.get(index).isEmpty()) { + groups.get(index).fill(fileBlock); + break; + } + } + } else { + // 如果文件小于平均长度,则将其放在一个当前能够容纳,且容量最接近的 group 中 + int selectedIndex = 0, index = 0; + // 先找到第一个能容纳的 + for (; index < N; index++) { + if (groups.get(index).getCapacity() >= fileBlock.getSize()) { + selectedIndex = index; + } + } + // 找到能容纳且剩余容量最小的 + for (;index < N; index++) { + if (groups.get(index).getCapacity() >= fileBlock.getSize() + && groups.get(index).getCapacity() < groups.get(selectedIndex).getCapacity()) { + selectedIndex = index; + } + } + groups.get(selectedIndex).fill(fileBlock); + } + + } + + return groups; + } + + private void reBalanceGroup(List groups) { + LOG.info("reBalance start"); + assert (groups != null && groups.size() > 0); + // 对某些group内部的文件进行进一步切分 + /* 1. 选出负载最小和最大的组 */ + Group groupMinLoad = groups.get(0); + Group groupMaxLoad = groups.get(0); + for (Group group : groups) { + if (group.getFilledLenght() > groupMaxLoad.getFilledLenght()) { + groupMaxLoad = group; + } + + if (group.getFilledLenght() < groupMinLoad.getFilledLenght()) { + groupMinLoad = group; + } + } + + /* 2. 将 groupMaxLoad 最大文件切分出部分放入 groupMinLoad + * 大小为 min{grouMaxLoad.length - mean, mean - groupMinLoad.length} */ + Long splitLen = Math.min(groupMinLoad.getCapacity(), groupMaxLoad.getOverloadLength()); + FileBlock splitOutBlock = groupMaxLoad.split(splitLen, this.ossClient, this.bucketName); + + groupMinLoad.fill(splitOutBlock); + LOG.info("reBalance end"); + } + + private Long getTotoalLenOfObjList(List> objKeySizePares) { + Long totalLen = 0L; + for (Pair pair : objKeySizePares) { + totalLen += (pair.getValue() < 0 ? 1 : pair.getValue()); + } + + return totalLen; + } + + public List getSplitedConfigurations(Configuration originConfiguration, List> objKeySizePares, + int adviceNumber) { + List configurationList = new ArrayList<>(); + + this.splitGroupNum = adviceNumber; + this.avgLen = (long)Math.ceil((double)this.getTotoalLenOfObjList(objKeySizePares) / this.splitGroupNum); + this.balanceThreshold = originConfiguration.getDouble(com.alibaba.datax.plugin.reader.ossreader.Key.BALANCE_THRESHOLD, 10.0); + + List groups = this.splitObjectToGroups(objKeySizePares, this.splitGroupNum); + + // 划分后,各个组间如果长度确实比较近似,则不需要进一步对单个文件进行内部切分,反之,则需要对单个文件进行内部切分以进行进一步的调整 + if (canSplitSingleFile(originConfiguration)) { + // 防止文件内部单行过大,对循环加以限制,理论上最多只需要调整 splitGroupNum 次 + Integer i = 0; + Long timeStart = System.currentTimeMillis(); + while (i++ < splitGroupNum && ! this.isGroupsBalance(groups)) { + this.reBalanceGroup(groups); + } + Long timeEnd = System.currentTimeMillis(); + LOG.info("split groups cost {} ms", timeEnd - timeStart); + } + + LOG.info("Splited gourps:\n"); + for (Group group : groups) { + LOG.info(group.toString()); + } + + // 根据Groups划分结果初始化各个分片任务配置 + for (Group group : groups) { + Configuration configuration = originConfiguration.clone(); + // 根据groups初始化分片 + List startEndPairs = new ArrayList<>(); + for (FileBlock fileBlock : group.getFileBLocks()) { + if (canSplitSingleFile(originConfiguration)) { + startEndPairs.add(new StartEndPair(fileBlock.getStartOffset(), fileBlock.getEndOffset(), fileBlock.getObjName())); + } else { + // 如果不支持内部切分,则设置结束位点为-1,直接读取文件全部内容 + // 对于软连接文件,这是必要的 30190064 + startEndPairs.add(new StartEndPair(fileBlock.getStartOffset(), -1L, fileBlock.getObjName())); + } + } + configuration.set(Key.SPLIT_SLICE_CONFIG, startEndPairs); + configurationList.add(configuration); + } + + return configurationList; + } +} + +class Group { + /* + * fileBlockList 表示该Group中对应的文件块列表,单个文件块用一个三元组 表示 + * */ + private List fileBLockList; + private Long capacity; + private Long filledLenght; + private static final Logger LOG = LoggerFactory.getLogger(Group.class); + + Group (Long capacity) { + this(new ArrayList<>(), capacity); + } + + Group (List fileBLockList, Long capacity) { + this.capacity = capacity; + this.fileBLockList = fileBLockList; + this.filledLenght = 0L; + for (FileBlock fileBlock : fileBLockList) { + this.filledLenght += fileBlock.getSize(); + this.capacity -= fileBlock.getSize(); + } + } + + void fill(FileBlock fileBlock) { + if (null == fileBlock) { + return; + } + this.fileBLockList.add(fileBlock); + this.capacity -= fileBlock.getSize(); + this.filledLenght += fileBlock.getSize(); + } + + void take(FileBlock fileBlock) { + this.capacity += fileBlock.getSize(); + this.filledLenght -= fileBlock.getSize(); + this.fileBLockList.remove(fileBlock); + } + + Long getCapacity() { + return this.capacity; + } + + void setCapacity(Long capacity) { + this.capacity = capacity; + } + + Long getFilledLenght() { + return this.filledLenght; + } + + public boolean isEmpty() { + return this.fileBLockList.isEmpty(); + } + + public boolean isFull() { + return this.capacity <= 0; + } + + List getFileBLocks() { + return this.fileBLockList; + } + + private Integer getBiggestFileBlock() { + Integer index = 0; + Long maxSize = -1L; + for (int i = 0; i < this.fileBLockList.size(); i++) { + if (this.fileBLockList.get(index).getSize() > maxSize) { + index = i; + } + } + return index; + } + + /* + * 对Group进行切分,切分逻辑为:对最大block进行切分,前splitLen个字节作为一个新块 + * */ + FileBlock split(Long splitLen, OSSClient ossClient, String ossBucketName) { + Integer bigBlockIndex = this.getBiggestFileBlock(); + FileBlock bigBlock = this.fileBLockList.get(bigBlockIndex); + // 如果最大块的不足 10MB,则不进行内部切分直接返回 + if (bigBlock.getSize() <= OssSplitUtil.SINGLE_FILE_SPLIT_THRESHOLD_IN_SIZE) { + return null; + } + + FileBlock outBlock; + FileBlock remainBlock; + + this.take(bigBlock); + + // 如果splitLen 大于 最大块的长度, 则直接把最大块切分出去 + if (splitLen >= bigBlock.getSize()) { + outBlock = new FileBlock(bigBlock); + } else { + Long originalEnd = bigBlock.getEndOffset(); + outBlock = new FileBlock(bigBlock.getObjName(), bigBlock.getStartOffset(), bigBlock.getStartOffset() + splitLen - 1); + + // 校准第一个block的结束位点,即往后推到第一个换行符 + InputStream inputStream = new OssInputStream(ossClient, ossBucketName, outBlock.getObjName(), outBlock.getEndOffset(), originalEnd); + Long endForward = this.getLFIndex(inputStream); + outBlock.setEndOffset(outBlock.getEndOffset() + endForward); + + // outblock取的是前边部分record,切分除去后,剩余部分可能为空,这时候不生成remainBlock,确保有剩余(outBlock.end > originEnd)时再生成remainBlock. + if (outBlock.getEndOffset() < originalEnd) { + remainBlock = new FileBlock(bigBlock.getObjName(), outBlock.getEndOffset() + 1, originalEnd); + this.fill(remainBlock); + } + } + + return outBlock; + } + + Long getOverloadLength() { + return Math.max(0, -this.capacity); + } + + /** + * 获取到输入流开始的第一个'\n'偏移量 + * + * @param inputStream + * 输入流 + * @return + */ + public Long getLFIndex(InputStream inputStream) { + Long hasReadByteIndex = -1L; + int ch = 0; + while (ch != -1) { + try { + ch = inputStream.read(); + } catch (IOException e) { + throw DataXException.asDataXException(UnstructuredStorageReaderErrorCode.READ_FILE_IO_ERROR, + String.format("inputstream read Byte has exception: %s", e.getMessage()), e); + } + hasReadByteIndex++; + + if (ch == '\n') { + return hasReadByteIndex; + } + } + return -1L; + } + + public String toString() { + JSONArray fbList = new JSONArray(); + int index = 0; + for (FileBlock fb : this.fileBLockList) { + JSONObject jsonObject = new JSONObject(); + jsonObject.put(String.format("block[%d]", index++), fb.toString()); + fbList.add(jsonObject); + } + return fbList.toString(); + } +} + +class FileBlock { + private String objName; + private Long startOffset; + private Long endOffset; + private Long size; + + FileBlock(String objName, Long startOffset, Long endOffset) { + assert (StringUtils.isNotBlank(objName) && startOffset >= 0 ); + assert (endOffset == -1 || startOffset <= endOffset); + + this.objName = objName; + this.startOffset = startOffset; + // endOffset < 0 的情况下,统一设置为-1,size 设置为0 + this.endOffset = endOffset < 0 ? -1 : endOffset; + this.size = endOffset < 0 ? 1 : this.endOffset - this.startOffset + 1; + } + + public FileBlock(String objName) { + this(objName, 0L, -1L); + } + + public FileBlock(String objName, Pair starEndPair) { + this(objName, starEndPair.getKey(), starEndPair.getValue()); + } + + public FileBlock(FileBlock fileBlock) { + assert (fileBlock != null); + this.objName = fileBlock.objName; + this.startOffset = fileBlock.startOffset; + this.endOffset = fileBlock.endOffset; + this.size = fileBlock.size; + } + + Long getSize() { + return this.size; + } + + Long getStartOffset() { + return this.startOffset; + } + + void setStartOffset(Long startOffset) { + Long deltaSize = this.startOffset - startOffset; + this.startOffset = startOffset; + this.size += deltaSize; + } + + Long getEndOffset() { + return this.endOffset; + } + + void setEndOffset(Long endOffset) { + Long deltaSize = endOffset - this.endOffset; + this.endOffset = endOffset; + //size随之调整 + this.size += deltaSize; + } + + String getObjName() { + return this.objName; + } + + public String toString() { + return String.format("<%s,%d,%d>", this.objName, this.startOffset, this.endOffset); + } +} diff --git a/osswriter/pom.xml b/osswriter/pom.xml index 90d84c10..ac4029e0 100644 --- a/osswriter/pom.xml +++ b/osswriter/pom.xml @@ -10,6 +10,17 @@ osswriter jar + + org.apache.logging.log4j + log4j-api + 2.17.1 + + + + org.apache.logging.log4j + log4j-core + 2.17.1 + com.alibaba.datax datax-common @@ -44,6 +55,60 @@ aliyun-sdk-oss 2.2.3 + + + + org.apache.parquet + parquet-column + 1.8.1 + + + org.apache.parquet + parquet-avro + 1.8.1 + + + org.apache.parquet + parquet-common + 1.8.1 + + + org.apache.parquet + parquet-format + 2.3.1 + + + org.apache.parquet + parquet-jackson + 1.8.1 + + + org.apache.parquet + parquet-encoding + 1.8.1 + + + org.apache.parquet + parquet-hadoop + 1.8.1 + + + com.twitter + parquet-hadoop-bundle + 1.6.0 + + + com.alibaba.datax + hdfswriter + 0.0.1-SNAPSHOT + compile + + + com.alibaba.datax + datax-core + 0.0.1-SNAPSHOT + compile + @@ -77,4 +142,4 @@ - \ No newline at end of file + diff --git a/osswriter/src/main/java/com/alibaba/datax/plugin/writer/osswriter/Constant.java b/osswriter/src/main/java/com/alibaba/datax/plugin/writer/osswriter/Constant.java index 5bf2eb46..b5c7110b 100644 --- a/osswriter/src/main/java/com/alibaba/datax/plugin/writer/osswriter/Constant.java +++ b/osswriter/src/main/java/com/alibaba/datax/plugin/writer/osswriter/Constant.java @@ -6,4 +6,11 @@ package com.alibaba.datax.plugin.writer.osswriter; public class Constant { public static final String OBJECT = "object"; public static final int SOCKETTIMEOUT = 5000000; + public static final String DEFAULT_NULL_FORMAT = "null"; + + /** + * 每一个上传的Part都有一个标识它的号码(part number,范围是1-10000) + * https://help.aliyun.com/document_detail/31993.html + */ + public static final int MAX_BLOCK_SIZE = 10000; } diff --git a/osswriter/src/main/java/com/alibaba/datax/plugin/writer/osswriter/Key.java b/osswriter/src/main/java/com/alibaba/datax/plugin/writer/osswriter/Key.java index b922f59c..8ce263b0 100644 --- a/osswriter/src/main/java/com/alibaba/datax/plugin/writer/osswriter/Key.java +++ b/osswriter/src/main/java/com/alibaba/datax/plugin/writer/osswriter/Key.java @@ -16,4 +16,55 @@ public class Key { public static final String CNAME = "cname"; + public static final String PARTITION = "partition"; + + /** + * encrypt: 是否需要将数据在oss上加密存储 + */ + public static final String ENCRYPT = "encrypt"; + + public static final String BLOCK_SIZE_IN_MB = "blockSizeInMB"; + + public static final String OSS_CONFIG = "oss"; + public static final String POSTGRESQL_CONFIG = "postgresql"; + + public static final String PROXY_HOST = "proxyHost"; + + public static final String PROXY_PORT = "proxyPort"; + + public static final String PROXY_USERNAME = "proxyUsername"; + + public static final String PROXY_PASSWORD = "proxyPassword"; + + public static final String PROXY_DOMAIN = "proxyDomain"; + + public static final String PROXY_WORKSTATION = "proxyWorkstation"; + + public static final String HDOOP_CONFIG = "hadoopConfig"; + + public static final String FS_OSS_ACCESSID = "fs.oss.accessKeyId"; + + public static final String FS_OSS_ACCESSKEY = "fs.oss.accessKeySecret"; + + public static final String FS_OSS_ENDPOINT = "fs.oss.endpoint"; + /** + * 多个task是否写单个object文件: + * false 多个task写多个object(默认是false, 保持向前兼容) + * true 多个task写单个object + */ + public static final String WRITE_SINGLE_OBJECT = "writeSingleObject"; + + public static final String UPLOAD_ID = "uploadId"; + + /** + * Only for parquet or orc fileType + */ + public static final String PATH = "path"; + /** + * Only for parquet or orc fileType + */ + public static final String FILE_NAME = "fileName"; + + public static final String GENERATE_EMPTY_FILE = "generateEmptyFile"; + } diff --git a/osswriter/src/main/java/com/alibaba/datax/plugin/writer/osswriter/OssSingleObject.java b/osswriter/src/main/java/com/alibaba/datax/plugin/writer/osswriter/OssSingleObject.java new file mode 100644 index 00000000..415cf39c --- /dev/null +++ b/osswriter/src/main/java/com/alibaba/datax/plugin/writer/osswriter/OssSingleObject.java @@ -0,0 +1,78 @@ +package com.alibaba.datax.plugin.writer.osswriter; + +import com.alibaba.datax.common.exception.DataXException; +import com.aliyun.oss.model.PartETag; +import org.apache.commons.lang3.ArrayUtils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; +import java.util.concurrent.atomic.AtomicInteger; + +/** + * @Author: guxuan + * @Date 2022-05-17 16:30 + */ +public class OssSingleObject { + private static Logger logger = LoggerFactory.getLogger(OssSingleObject.class); + + /** + * 一个uploadId即一个oss对象 + */ + public static String uploadId; + + /** + * 将最后一个未提交的block全部缓存到lastBlockBuffer中 + */ + private static byte[] lastBlockBuffer; + + /** + * 当前part number + */ + public static AtomicInteger currentPartNumber = new AtomicInteger(1); + + /** + * 所有已经提交的block + * 注:allPartETags是线程安全的list + */ + public static List allPartETags = Collections.synchronizedList(new ArrayList()); + + /** + * 将每个task最后未upload的block加入到lastBlockBuffer, + * 如果lastBlockBuffer的大小已经超过blockSizeInByte,则需要upload一次, 防止task过多导致lastBlockBuffer暴增OOM + * + * @param lastBlock + * @param ossWriterProxy + * @param blockSizeInByte + * @param object + */ + public synchronized static void addLastBlockBuffer(byte[] lastBlock, + OssWriterProxy ossWriterProxy, + long blockSizeInByte, + String object, OssWriterProxy.HeaderProvider headerProvider) { + lastBlockBuffer = ArrayUtils.addAll(lastBlockBuffer, lastBlock); + //lastBlockBuffer大小超过blockSizeInByte则需要upload part + if (lastBlockBuffer != null && lastBlockBuffer.length >= blockSizeInByte) { + logger.info("write last block buffer part size [{}] to object [{}], all has uploaded part size:{}, current part number:{}, uploadId:{}", + lastBlockBuffer.length, object, allPartETags.size(), currentPartNumber.intValue(), uploadId); + try { + ossWriterProxy.uploadOnePartForSingleObject(lastBlockBuffer, uploadId, allPartETags, object, headerProvider); + } catch (Exception e) { + logger.error("upload part error: {}", e.getMessage(), e); + throw DataXException.asDataXException(e.getMessage()); + } + //currentPartNumber自增 + currentPartNumber.incrementAndGet(); + //清空lastBlockBuffer + lastBlockBuffer = null; + } + + } + + public static byte[] getLastBlockBuffer() { + return lastBlockBuffer; + } + +} diff --git a/osswriter/src/main/java/com/alibaba/datax/plugin/writer/osswriter/OssWriter.java b/osswriter/src/main/java/com/alibaba/datax/plugin/writer/osswriter/OssWriter.java index 90a34ad7..f96a8e01 100644 --- a/osswriter/src/main/java/com/alibaba/datax/plugin/writer/osswriter/OssWriter.java +++ b/osswriter/src/main/java/com/alibaba/datax/plugin/writer/osswriter/OssWriter.java @@ -1,18 +1,21 @@ package com.alibaba.datax.plugin.writer.osswriter; -import java.io.ByteArrayInputStream; -import java.io.IOException; -import java.io.InputStream; -import java.io.StringWriter; +import java.io.*; import java.text.DateFormat; import java.text.SimpleDateFormat; -import java.util.ArrayList; -import java.util.HashSet; -import java.util.List; -import java.util.Set; -import java.util.UUID; +import java.util.*; import java.util.concurrent.Callable; +import com.alibaba.datax.common.element.BytesColumn; +import com.alibaba.datax.common.element.Column; +import com.alibaba.datax.common.util.RangeSplitUtil; +import com.alibaba.datax.plugin.unstructuredstorage.FileFormat; +import com.alibaba.datax.plugin.unstructuredstorage.writer.binaryFileUtil.BinaryFileWriterUtil; +import com.alibaba.datax.plugin.writer.hdfswriter.HdfsWriter; +import com.alibaba.datax.plugin.writer.osswriter.util.HandlerUtil; +import com.alibaba.datax.plugin.writer.osswriter.util.HdfsParquetUtil; +import com.alibaba.fastjson2.JSON; +import com.aliyun.oss.model.*; import org.apache.commons.io.IOUtils; import org.apache.commons.lang3.StringUtils; import org.slf4j.Logger; @@ -31,50 +34,121 @@ import com.alibaba.datax.plugin.writer.osswriter.util.OssUtil; import com.aliyun.oss.ClientException; import com.aliyun.oss.OSSClient; import com.aliyun.oss.OSSException; -import com.aliyun.oss.model.CompleteMultipartUploadRequest; -import com.aliyun.oss.model.CompleteMultipartUploadResult; -import com.aliyun.oss.model.InitiateMultipartUploadRequest; -import com.aliyun.oss.model.InitiateMultipartUploadResult; -import com.aliyun.oss.model.OSSObjectSummary; -import com.aliyun.oss.model.ObjectListing; -import com.aliyun.oss.model.PartETag; -import com.aliyun.oss.model.UploadPartRequest; -import com.aliyun.oss.model.UploadPartResult; + +import static com.alibaba.datax.plugin.unstructuredstorage.writer.Constant.*; /** * Created by haiwei.luo on 15-02-09. */ public class OssWriter extends Writer { + + public static int parseParentPathLength(List path) { + if (path == null || path.size() != 1) { + throw DataXException.asDataXException( + OssWriterErrorCode.CONFIG_INVALID_EXCEPTION, String.format("only support configure one path in binary copy mode, your config: %s", JSON.toJSONString(path))); + } + String eachPath = path.get(0); + int endMark; + for (endMark = 0; endMark < eachPath.length(); endMark++) { + if ('*' != eachPath.charAt(endMark) && '?' != eachPath.charAt(endMark)) { + continue; + } else { + break; + } + } + + int lastDirSeparator = eachPath.lastIndexOf(IOUtils.DIR_SEPARATOR) + 1; + if (endMark < eachPath.length()) { + lastDirSeparator = eachPath.substring(0, endMark).lastIndexOf(IOUtils.DIR_SEPARATOR) + 1; + } + return lastDirSeparator; + } + public static class Job extends Writer.Job { private static final Logger LOG = LoggerFactory.getLogger(Job.class); private Configuration writerSliceConfig = null; private OSSClient ossClient = null; + private Configuration peerPluginJobConf; + private Boolean isBinaryFile; + private String objectDir; + private String syncMode; + private String fileFormat; + private String encoding; + private HdfsWriter.Job hdfsWriterJob; + private boolean useHdfsWriterProxy = false; + private boolean writeSingleObject; + private OssWriterProxy ossWriterProxy; + private String bucket; + private String object; + private List header; + + @Override + public void preHandler(Configuration jobConfiguration) { + HandlerUtil.preHandler(jobConfiguration); + } + @Override public void init() { this.writerSliceConfig = this.getPluginJobConf(); + this.basicValidateParameter(); + this.fileFormat = this.writerSliceConfig.getString( + com.alibaba.datax.plugin.unstructuredstorage.writer.Key.FILE_FORMAT, + com.alibaba.datax.plugin.unstructuredstorage.writer.Constant.FILE_FORMAT_TEXT); + this.encoding = this.writerSliceConfig.getString( + com.alibaba.datax.plugin.unstructuredstorage.writer.Key.ENCODING, + com.alibaba.datax.plugin.unstructuredstorage.writer.Constant.DEFAULT_ENCODING); + this.useHdfsWriterProxy = HdfsParquetUtil.isUseHdfsWriterProxy(this.fileFormat); + if(useHdfsWriterProxy){ + this.hdfsWriterJob = new HdfsWriter.Job(); + HdfsParquetUtil.adaptConfiguration(this.hdfsWriterJob, this.writerSliceConfig); + + this.hdfsWriterJob.setJobPluginCollector(this.getJobPluginCollector()); + this.hdfsWriterJob.setPeerPluginJobConf(this.getPeerPluginJobConf()); + this.hdfsWriterJob.setPeerPluginName(this.getPeerPluginName()); + this.hdfsWriterJob.setPluginJobConf(this.getPluginJobConf()); + this.hdfsWriterJob.init(); + return; + } + this.peerPluginJobConf = this.getPeerPluginJobConf(); + this.isBinaryFile = FileFormat.getFileFormatByConfiguration(this.peerPluginJobConf).isBinary(); + this.syncMode = this.writerSliceConfig + .getString(com.alibaba.datax.plugin.unstructuredstorage.writer.Key.SYNC_MODE, ""); + this.writeSingleObject = this.writerSliceConfig.getBool(Key.WRITE_SINGLE_OBJECT, false); + this.header = this.writerSliceConfig + .getList(com.alibaba.datax.plugin.unstructuredstorage.writer.Key.HEADER, null, String.class); this.validateParameter(); this.ossClient = OssUtil.initOssClient(this.writerSliceConfig); + this.ossWriterProxy = new OssWriterProxy(this.writerSliceConfig, this.ossClient); + } + + private void basicValidateParameter(){ + this.writerSliceConfig.getNecessaryValue(Key.ENDPOINT, OssWriterErrorCode.REQUIRED_VALUE); + this.writerSliceConfig.getNecessaryValue(Key.ACCESSID, OssWriterErrorCode.REQUIRED_VALUE); + this.writerSliceConfig.getNecessaryValue(Key.ACCESSKEY, OssWriterErrorCode.REQUIRED_VALUE); + this.writerSliceConfig.getNecessaryValue(Key.BUCKET, OssWriterErrorCode.REQUIRED_VALUE); } private void validateParameter() { - this.writerSliceConfig.getNecessaryValue(Key.ENDPOINT, - OssWriterErrorCode.REQUIRED_VALUE); - this.writerSliceConfig.getNecessaryValue(Key.ACCESSID, - OssWriterErrorCode.REQUIRED_VALUE); - this.writerSliceConfig.getNecessaryValue(Key.ACCESSKEY, - OssWriterErrorCode.REQUIRED_VALUE); - this.writerSliceConfig.getNecessaryValue(Key.BUCKET, - OssWriterErrorCode.REQUIRED_VALUE); - this.writerSliceConfig.getNecessaryValue(Key.OBJECT, - OssWriterErrorCode.REQUIRED_VALUE); + this.writerSliceConfig.getBool(Key.ENCRYPT); + + if (this.isBinaryFile){ + BinaryFileWriterUtil.validateParameter(this.writerSliceConfig); + return; + } + + if (!this.isPeer2PeerCopyMode()) { + // 非对等拷贝模式下必选 + this.writerSliceConfig.getNecessaryValue(Key.OBJECT, + OssWriterErrorCode.REQUIRED_VALUE); + } + // warn: do not support compress!! String compress = this.writerSliceConfig .getString(com.alibaba.datax.plugin.unstructuredstorage.writer.Key.COMPRESS); if (StringUtils.isNotBlank(compress)) { - String errorMessage = String.format( - "OSS写暂时不支持压缩, 该压缩配置项[%s]不起效用", compress); + String errorMessage = String.format("OSS writes do not support compression for the moment. The compressed item %s does not work", compress); LOG.error(errorMessage); throw DataXException.asDataXException( OssWriterErrorCode.ILLEGAL_VALUE, errorMessage); @@ -82,107 +156,376 @@ public class OssWriter extends Writer { } UnstructuredStorageWriterUtil .validateParameter(this.writerSliceConfig); - + LOG.info("writeSingleObject is: {}", this.writeSingleObject); } @Override public void prepare() { LOG.info("begin do prepare..."); - String bucket = this.writerSliceConfig.getString(Key.BUCKET); - String object = this.writerSliceConfig.getString(Key.OBJECT); + if(useHdfsWriterProxy){ + this.hdfsWriterJob.prepare(); + return; + } + this.bucket = this.writerSliceConfig.getString(Key.BUCKET); + this.object = this.writerSliceConfig.getString(Key.OBJECT); String writeMode = this.writerSliceConfig .getString(com.alibaba.datax.plugin.unstructuredstorage.writer.Key.WRITE_MODE); - // warn: bucket is not exists, create it - try { - // warn: do not create bucket for user - if (!this.ossClient.doesBucketExist(bucket)) { - // this.ossClient.createBucket(bucket); - String errorMessage = String.format( - "您配置的bucket [%s] 不存在, 请您确认您的配置项.", bucket); - LOG.error(errorMessage); - throw DataXException.asDataXException( - OssWriterErrorCode.ILLEGAL_VALUE, errorMessage); - } - LOG.info(String.format("access control details [%s].", - this.ossClient.getBucketAcl(bucket).toString())); + List sourceFileName = this.peerPluginJobConf.getList(SOURCE_FILE_NAME, new ArrayList(), + String.class); + this.objectDir = this.getObjectDir(object); - // truncate option handler - if ("truncate".equals(writeMode)) { - LOG.info(String - .format("由于您配置了writeMode truncate, 开始清理 [%s] 下面以 [%s] 开头的Object", - bucket, object)); - // warn: 默认情况下,如果Bucket中的Object数量大于100,则只会返回100个Object - while (true) { - ObjectListing listing = null; - LOG.info("list objects with listObject(bucket, object)"); - listing = this.ossClient.listObjects(bucket, object); - List objectSummarys = listing - .getObjectSummaries(); - for (OSSObjectSummary objectSummary : objectSummarys) { - LOG.info(String.format("delete oss object [%s].", - objectSummary.getKey())); - this.ossClient.deleteObject(bucket, - objectSummary.getKey()); - } - if (objectSummarys.isEmpty()) { - break; + // 对等拷贝模式下将源头获取的文件列表在目的端删除 + if (this.isPeer2PeerCopyMode()) { + String fullObjectName = null; + String truncateMode = this.writerSliceConfig.getString("truncateMode", "objectMatch"); + // 前缀删除模式 + if ("prefix".equalsIgnoreCase(truncateMode)) { + BinaryFileWriterUtil.checkFileNameIfRepeatedThrowException(sourceFileName); + if (TRUNCATE.equals(writeMode)) { + LOG.info("You have configured [writeMode] [truncate], so the system will start to clear the objects starting with [{}] under [{}]. ", bucket, object); + // warn: 默认情况下,如果Bucket中的Object数量大于100,则只会返回100个Object + while (true) { + ObjectListing listing = null; + LOG.info("list objects with listObject(bucket, object)"); + listing = this.ossClient.listObjects(bucket, object); + List objectSummarys = listing + .getObjectSummaries(); + if (objectSummarys.isEmpty()) { + break; + } + List objects2Delete = new ArrayList(); + for (OSSObjectSummary objectSummary : objectSummarys) { + objects2Delete.add(objectSummary.getKey()); + } + LOG.info(String.format("[prefix truncate mode]delete oss object [%s].", JSON.toJSONString(objects2Delete))); + DeleteObjectsRequest deleteRequest = new DeleteObjectsRequest(bucket); + deleteRequest.setKeys(objects2Delete); + deleteRequest.setQuiet(true);// 简单模式 + DeleteObjectsResult deleteResult = this.ossClient.deleteObjects(deleteRequest); + assert deleteResult.getDeletedObjects().isEmpty(); + LOG.warn("OSS request id:{}, objects delete failed:{}", deleteResult.getRequestId(), + JSON.toJSONString(deleteResult.getDeletedObjects())); } + + }else { + throw DataXException.asDataXException(OssWriterErrorCode.ILLEGAL_VALUE, + "only support truncate writeMode in copy sync mode."); } - } else if ("append".equals(writeMode)) { - LOG.info(String - .format("由于您配置了writeMode append, 写入前不做清理工作, 数据写入Bucket [%s] 下, 写入相应Object的前缀为 [%s]", - bucket, object)); - } else if ("nonConflict".equals(writeMode)) { - LOG.info(String - .format("由于您配置了writeMode nonConflict, 开始检查Bucket [%s] 下面以 [%s] 命名开头的Object", - bucket, object)); - ObjectListing listing = this.ossClient.listObjects(bucket, - object); - if (0 < listing.getObjectSummaries().size()) { - StringBuilder objectKeys = new StringBuilder(); - objectKeys.append("[ "); - for (OSSObjectSummary ossObjectSummary : listing - .getObjectSummaries()) { - objectKeys.append(ossObjectSummary.getKey() + " ,"); + } else { + if (TRUNCATE.equals(writeMode)) { + sourceFileName = this.peerPluginJobConf.getList(com.alibaba.datax.plugin.unstructuredstorage.writer.Constant.SOURCE_FILE, new ArrayList(), + String.class); + List readerPath = this.peerPluginJobConf.getList(com.alibaba.datax.plugin.unstructuredstorage.writer.Key.PATH, new ArrayList(), + String.class); + int parentPathLength = OssWriter.parseParentPathLength(readerPath); + this.writerSliceConfig.set("__parentPathLength", parentPathLength); + BinaryFileWriterUtil.checkFileNameIfRepeatedThrowException(sourceFileName); + + // 原样文件名删除模式 + int splitCount = sourceFileName.size() / 1000 + 1; + List> splitResult = RangeSplitUtil.doListSplit(sourceFileName, splitCount); + for (List eachSlice : splitResult) { + assert eachSlice.size() <= 1000; + if (eachSlice.isEmpty()) { + continue; + } + List ossObjFullPath = new ArrayList(); + for (String eachObj : eachSlice) { + fullObjectName = String.format("%s%s", objectDir, eachObj.substring(parentPathLength, eachObj.length())); + ossObjFullPath.add(fullObjectName); + } + LOG.info(String.format("[origin object name truncate mode]delete oss object [%s].", JSON.toJSONString(ossObjFullPath))); + DeleteObjectsRequest deleteRequest = new DeleteObjectsRequest(bucket); + deleteRequest.setKeys(ossObjFullPath); + deleteRequest.setQuiet(true);// 简单模式 + DeleteObjectsResult deleteResult = this.ossClient.deleteObjects(deleteRequest); + assert deleteResult.getDeletedObjects().isEmpty(); + LOG.warn("OSS request id:{}, objects delete failed:{}", deleteResult.getRequestId(), + JSON.toJSONString(deleteResult.getDeletedObjects())); } - objectKeys.append(" ]"); - LOG.info(String.format( - "object with prefix [%s] details: %s", object, - objectKeys.toString())); - throw DataXException - .asDataXException( - OssWriterErrorCode.ILLEGAL_VALUE, - String.format( - "您配置的Bucket: [%s] 下面存在其Object有前缀 [%s].", - bucket, object)); + } else { + throw DataXException.asDataXException(OssWriterErrorCode.ILLEGAL_VALUE, + "only support truncate writeMode in copy sync mode."); } } - } catch (OSSException e) { - throw DataXException.asDataXException( - OssWriterErrorCode.OSS_COMM_ERROR, e.getMessage()); - } catch (ClientException e) { - throw DataXException.asDataXException( - OssWriterErrorCode.OSS_COMM_ERROR, e.getMessage()); + return; + } else { + // warn: 源头表不是半结构化或者不是对等copy模式时走前缀删除策略 + // warn: bucket is not exists, create it + try { + // warn: do not create bucket for user + if (!this.ossClient.doesBucketExist(bucket)) { + // this.ossClient.createBucket(bucket); + String errorMessage = String.format("The [bucket]: %s you configured does not exist. Please confirm your configuration items. ", bucket); + LOG.error(errorMessage); + throw DataXException.asDataXException( + OssWriterErrorCode.ILLEGAL_VALUE, errorMessage); + } + LOG.info(String.format("access control details [%s].", + this.ossClient.getBucketAcl(bucket).toString())); + + if (writeSingleObject) { + doPrepareForSingleObject(bucket, object, writeMode); + } else { + doPrepareForMutliObject(bucket, object, writeMode); + } + } catch (OSSException e) { + throw DataXException.asDataXException( + OssWriterErrorCode.OSS_COMM_ERROR, e.getMessage(), e); + } catch (ClientException e) { + throw DataXException.asDataXException( + OssWriterErrorCode.OSS_COMM_ERROR, e.getMessage(), e); + } + } + } + + /** + * 执行多个task写单个object prepare逻辑 + * + * @param bucket + * @param object + * @param writeMode + */ + private void doPrepareForSingleObject(String bucket, String object, String writeMode) { + boolean doesObjectExist = this.ossClient.doesObjectExist(bucket, object); + LOG.info("does object [{}] exist in bucket {} : {}", object, bucket, doesObjectExist); + if (TRUNCATE.equals(writeMode)) { + LOG.info("Because you have configured writeMode truncate, and writeSingleObject is true, start cleaning up the duplicate object [{}] under [{}]", bucket, object); + if (doesObjectExist) { + LOG.info("object [{}] has exist in bucket, delete it!", object, bucket); + this.ossClient.deleteObject(bucket, object); + } + } else if (APPEND.equals(writeMode)) { + throw DataXException + .asDataXException( + OssWriterErrorCode.ILLEGAL_VALUE, + "Illegal value"); + } else if (NOCONFLICT.equals(writeMode)) { + LOG.info("Because you have configured writeMode nonConflict, and writeSingleObject is true, start checking bucket [{}] under the same name object [{}]", bucket, object); + if (doesObjectExist) { + throw DataXException + .asDataXException( + OssWriterErrorCode.ILLEGAL_VALUE, + String.format("Buffet you configured: %s There is a duplicate name of Object %s", bucket, object)); + } + } + } + + /** + * 执行多个task写多个object的prepare逻辑,这个是osswriter已有的逻辑,需要保持向前兼容性 + * + * @param bucket + * @param object + * @param writeMode + */ + private void doPrepareForMutliObject(String bucket, String object, String writeMode) { + // truncate option handler + if (TRUNCATE.equals(writeMode)) { + LOG.info("You have configured [writeMode] [truncate], so the system will start to clear the objects starting with [{}] under [{}]. ", bucket, object); + // warn: 默认情况下,如果Bucket中的Object数量大于100,则只会返回100个Object + while (true) { + ObjectListing listing = null; + LOG.info("list objects with listObject(bucket, object)"); + listing = this.ossClient.listObjects(bucket, object); + List objectSummarys = listing + .getObjectSummaries(); + for (OSSObjectSummary objectSummary : objectSummarys) { + LOG.info(String.format("delete oss object [%s].", + objectSummary.getKey())); + this.ossClient.deleteObject(bucket, + objectSummary.getKey()); + } + if (objectSummarys.isEmpty()) { + break; + } + } + } else if (APPEND.equals(writeMode)) { + LOG.info("You have configured [writeMode] [append], so the system won\\u2019t perform the clearing before writing. Data is written to objects with the name prefix of [{}] under the bucket: [{}]. ", bucket, object); + } else if (NOCONFLICT.equals(writeMode)) { + LOG.info("You have configured [writeMode] [nonConflict], so the system will start to check objects whose names start with [{}] under the bucket: [{}]. ", bucket, object); + ObjectListing listing = this.ossClient.listObjects(bucket, + object); + if (0 < listing.getObjectSummaries().size()) { + StringBuilder objectKeys = new StringBuilder(); + objectKeys.append("[ "); + for (OSSObjectSummary ossObjectSummary : listing + .getObjectSummaries()) { + objectKeys.append(ossObjectSummary.getKey() + " ,"); + } + objectKeys.append(" ]"); + LOG.info(String.format( + "object with prefix [%s] details: %s", object, + objectKeys.toString())); + throw DataXException + .asDataXException( + OssWriterErrorCode.ILLEGAL_VALUE, + String.format("The [bucket] you configured: %s contains objects with the name prefix of %s.", bucket, object)); + } } } @Override public void post() { + if(useHdfsWriterProxy){ + this.hdfsWriterJob.post(); + return; + } + if (this.writeSingleObject) { + try { + /**1. 合并上传最后一个block*/ + LOG.info("Has upload part size: {}", OssSingleObject.allPartETags.size()); + if (OssSingleObject.getLastBlockBuffer() != null && OssSingleObject.getLastBlockBuffer().length != 0) { + byte[] byteBuffer = OssSingleObject.getLastBlockBuffer(); + LOG.info("post writer single object last merge block size is : {}", byteBuffer.length); + this.ossWriterProxy.uploadOnePartForSingleObject(byteBuffer, OssSingleObject.uploadId, + OssSingleObject.allPartETags, this.object, this::getHeaderBytes); + } + if (OssSingleObject.allPartETags.size() == 0) { + LOG.warn("allPartETags size is 0, there is no part of data need to be complete uploaded, " + + "skip complete multipart upload!"); + this.ossWriterProxy.abortMultipartUpload(this.object,OssSingleObject.uploadId); + return; + } + + /**2. 完成complete upload */ + LOG.info("begin complete multi part upload, bucket:{}, object:{}, uploadId:{}, all has upload part size:{}", + this.bucket, this.object, OssSingleObject.uploadId, OssSingleObject.allPartETags.size()); + orderPartETages(OssSingleObject.allPartETags); + CompleteMultipartUploadRequest completeMultipartUploadRequest = new CompleteMultipartUploadRequest( + this.bucket, this.object, OssSingleObject.uploadId, OssSingleObject.allPartETags); + CompleteMultipartUploadResult completeMultipartUploadResult = this.ossWriterProxy.completeMultipartUpload(completeMultipartUploadRequest); + LOG.info(String.format("post final object etag is:[%s]", completeMultipartUploadResult.getETag())); + } catch (Exception e) { + LOG.error("osswriter post error: {}", e.getMessage(), e); + throw DataXException.asDataXException(e.getMessage()); + } + } + } + + private byte[] getHeaderBytes() throws IOException { + if (null != this.header && !this.header.isEmpty()) { + // write header to writer + try (StringWriter sw = new StringWriter(); + UnstructuredWriter headerWriter = UnstructuredStorageWriterUtil. + produceUnstructuredWriter(this.fileFormat, this.writerSliceConfig, sw)) { + headerWriter.writeOneRecord(this.header); + return sw.toString().getBytes(this.encoding); + } + } + return new byte[0]; + } + + /** + * 对allPartETags做递增排序 + * + * @param allPartETags + * @return + */ + private void orderPartETages(List allPartETags) { + Collections.sort(allPartETags, new Comparator() { + @Override + public int compare(PartETag o1, PartETag o2) { + //按照partNumber递增排序 + return o1.getPartNumber() - o2.getPartNumber(); + } + }); } @Override public void destroy() { - + if(useHdfsWriterProxy){ + this.hdfsWriterJob.destroy(); + return; + } + try { + // this.ossClient.shutdown(); + } catch (Exception e) { + LOG.warn("shutdown ossclient meet a exception:" + e.getMessage(), e); + } } @Override public List split(int mandatoryNumber) { LOG.info("begin do split..."); + if(useHdfsWriterProxy){ + return this.hdfsWriterJob.split(mandatoryNumber); + } + List writerSplitConfigs = new ArrayList(); + + // warn: 这个地方其实可能有bug,datax frame其实会shuffle, 文件内部切分也不好支持这个诉求 + if(this.isPeer2PeerCopyMode()){ + // 有这个需求风险: 源头oss的文件 abc/123/data.txt yixiao.txt 2个文件对等拷贝过来, 这个场景下data.txt + // yixiao.txt 只能放一个目录 + List readerSplitConfigs = this.getReaderPluginSplitConf(); + for (int i = 0; i < readerSplitConfigs.size(); i++) { + Configuration splitedTaskConfig = writerSliceConfig.clone(); + splitedTaskConfig.set(Key.OBJECT, objectDir); + splitedTaskConfig.set(com.alibaba.datax.plugin.unstructuredstorage.writer.Constant.BINARY, + this.isBinaryFile); + writerSplitConfigs.add(splitedTaskConfig); + } + } else { + if (this.writeSingleObject) { + writerSplitConfigs = doSplitForWriteSingleObject(mandatoryNumber); + } else { + writerSplitConfigs = doSplitForWriteMultiObject(mandatoryNumber); + } + } + LOG.info("end do split. split size: {}", writerSplitConfigs.size()); + return writerSplitConfigs; + } + + /** + * 针对多个task写单个文件模式,新增split逻辑 + * + * @param mandatoryNumber + * @return + */ + private List doSplitForWriteSingleObject(int mandatoryNumber) { + LOG.info("writeSingleObject is true, begin do split for write single object."); List writerSplitConfigs = new ArrayList(); String object = this.writerSliceConfig.getString(Key.OBJECT); - String bucket = this.writerSliceConfig.getString(Key.BUCKET); + InitiateMultipartUploadRequest uploadRequest = this.ossWriterProxy.getInitiateMultipartUploadRequest( + object); + + InitiateMultipartUploadResult uploadResult; + try { + uploadResult = this.ossWriterProxy.initiateMultipartUpload( + uploadRequest); + } catch (Exception e) { + LOG.error("initiateMultipartUpload error: {}", e.getMessage(), e); + throw DataXException.asDataXException(e.getMessage()); + } + /** + * 如果需要写同一个object,需要保证使用同一个upload Id + * see: https://help.aliyun.com/document_detail/31993.html + */ + String uploadId = uploadResult.getUploadId(); + OssSingleObject.uploadId = uploadId; + LOG.info("writeSingleObject use uploadId: {}", uploadId); + + for (int i = 0; i < mandatoryNumber; i++) { + Configuration splitedTaskConfig = this.writerSliceConfig + .clone(); + splitedTaskConfig.set(Key.OBJECT, object); + splitedTaskConfig.set(Key.UPLOAD_ID, uploadId); + writerSplitConfigs.add(splitedTaskConfig); + } + return writerSplitConfigs; + } + + /** + * osswriter多个task写多个object文件split逻辑,历史已有该逻辑,保持向前兼容性 + * + * @param mandatoryNumber + * @return + */ + private List doSplitForWriteMultiObject(int mandatoryNumber) { + List writerSplitConfigs = new ArrayList(); + String bucket = this.writerSliceConfig.getString(Key.BUCKET); + String object = this.writerSliceConfig.getString(Key.OBJECT); Set allObjects = new HashSet(); try { List ossObjectlisting = this.ossClient @@ -192,10 +535,10 @@ public class OssWriter extends Writer { } } catch (OSSException e) { throw DataXException.asDataXException( - OssWriterErrorCode.OSS_COMM_ERROR, e.getMessage()); + OssWriterErrorCode.OSS_COMM_ERROR, e.getMessage(), e); } catch (ClientException e) { throw DataXException.asDataXException( - OssWriterErrorCode.OSS_COMM_ERROR, e.getMessage()); + OssWriterErrorCode.OSS_COMM_ERROR, e.getMessage(), e); } String objectSuffix; @@ -223,9 +566,25 @@ public class OssWriter extends Writer { writerSplitConfigs.add(splitedTaskConfig); } - LOG.info("end do split."); return writerSplitConfigs; } + + private boolean isPeer2PeerCopyMode() { + return this.isBinaryFile + || com.alibaba.datax.plugin.unstructuredstorage.writer.Constant.SYNC_MODE_VALUE_COPY + .equalsIgnoreCase(this.syncMode); + } + + private String getObjectDir(String object) { + String dir = null; + if (StringUtils.isBlank(object)) { + dir = ""; + } else { + dir = object.trim(); + dir = dir.endsWith("/") ? dir : String.format("%s/", dir); + } + return dir; + } } public static class Task extends Writer.Task { @@ -237,17 +596,48 @@ public class OssWriter extends Writer { private String object; private String nullFormat; private String encoding; - private char fieldDelimiter; private String dateFormat; private DateFormat dateParse; private String fileFormat; private List header; private Long maxFileSize;// MB private String suffix; + private Boolean encrypt;// 是否在服务器端进行加密存储 + private long blockSizeInByte; + private Boolean isBinaryFile; + private String objectDir; + private String syncMode; + private int parentPathLength; + private String byteEncoding; + private HdfsWriter.Task hdfsWriterTask; + private boolean useHdfsWriterProxy = false; + private boolean writeSingleObject; + private String uploadId; + private OssWriterProxy ossWriterProxy; + private List partition; + private boolean generateEmptyFile; @Override public void init() { this.writerSliceConfig = this.getPluginJobConf(); + this.fileFormat = this.writerSliceConfig + .getString( + com.alibaba.datax.plugin.unstructuredstorage.writer.Key.FILE_FORMAT, + com.alibaba.datax.plugin.unstructuredstorage.writer.Constant.FILE_FORMAT_TEXT); + this.useHdfsWriterProxy = HdfsParquetUtil.isUseHdfsWriterProxy(this.fileFormat); + if(useHdfsWriterProxy){ + this.hdfsWriterTask = new HdfsWriter.Task(); + this.hdfsWriterTask.setPeerPluginJobConf(this.getPeerPluginJobConf()); + this.hdfsWriterTask.setPeerPluginName(this.getPeerPluginName()); + this.hdfsWriterTask.setPluginJobConf(this.getPluginJobConf()); + this.hdfsWriterTask.setReaderPluginSplitConf(this.getReaderPluginSplitConf()); + this.hdfsWriterTask.setTaskGroupId(this.getTaskGroupId()); + this.hdfsWriterTask.setTaskId(this.getTaskId()); + this.hdfsWriterTask.setTaskPluginCollector(this.getTaskPluginCollector()); + this.hdfsWriterTask.init(); + return; + } + this.ossClient = OssUtil.initOssClient(this.writerSliceConfig); this.bucket = this.writerSliceConfig.getString(Key.BUCKET); this.object = this.writerSliceConfig.getString(Key.OBJECT); @@ -264,14 +654,6 @@ public class OssWriter extends Writer { .getString( com.alibaba.datax.plugin.unstructuredstorage.writer.Key.ENCODING, com.alibaba.datax.plugin.unstructuredstorage.writer.Constant.DEFAULT_ENCODING); - this.fieldDelimiter = this.writerSliceConfig - .getChar( - com.alibaba.datax.plugin.unstructuredstorage.writer.Key.FIELD_DELIMITER, - com.alibaba.datax.plugin.unstructuredstorage.writer.Constant.DEFAULT_FIELD_DELIMITER); - this.fileFormat = this.writerSliceConfig - .getString( - com.alibaba.datax.plugin.unstructuredstorage.writer.Key.FILE_FORMAT, - com.alibaba.datax.plugin.unstructuredstorage.writer.Constant.FILE_FORMAT_TEXT); this.header = this.writerSliceConfig .getList( com.alibaba.datax.plugin.unstructuredstorage.writer.Key.HEADER, @@ -285,67 +667,334 @@ public class OssWriter extends Writer { com.alibaba.datax.plugin.unstructuredstorage.writer.Key.SUFFIX, com.alibaba.datax.plugin.unstructuredstorage.writer.Constant.DEFAULT_SUFFIX); this.suffix = this.suffix.trim();// warn: need trim + this.encrypt = this.writerSliceConfig.getBool(Key.ENCRYPT, false); + + // 设置每块字符串长度 + this.blockSizeInByte = this.writerSliceConfig.getLong(Key.BLOCK_SIZE_IN_MB, 10L) * 1024 * 1024; + + this.isBinaryFile = this.writerSliceConfig.getBool( + com.alibaba.datax.plugin.unstructuredstorage.writer.Constant.BINARY, false); + + this.objectDir = this.getObjectDir(this.object); + this.syncMode = this.writerSliceConfig + .getString(com.alibaba.datax.plugin.unstructuredstorage.writer.Key.SYNC_MODE, ""); + this.parentPathLength = this.writerSliceConfig.getInt("__parentPathLength", 0); + + this.byteEncoding = this.writerSliceConfig + .getString(com.alibaba.datax.plugin.unstructuredstorage.writer.Key.BYTE_ENCODING); + + this.writeSingleObject = this.writerSliceConfig.getBool(Key.WRITE_SINGLE_OBJECT, false); + this.uploadId = this.writerSliceConfig.getString(Key.UPLOAD_ID); + this.ossWriterProxy = new OssWriterProxy(this.writerSliceConfig, this.ossClient); + this.partition = this.writerSliceConfig.getList(Key.PARTITION, new ArrayList<>(), String.class); + //是否生成空文件开关 + this.generateEmptyFile = this.writerSliceConfig.getBool(Key.GENERATE_EMPTY_FILE,true); } @Override public void startWrite(RecordReceiver lineReceiver) { + if(useHdfsWriterProxy){ + hdfsWriterTask.startWrite(lineReceiver); + return; + } + if (this.isPeer2PeerCopyMode()) { + // 对等拷贝 + this.startWriteBinaryFile(lineReceiver); + } else if (this.writeSingleObject) { + this.startWriteSingleObjectUnstructedStorageFile(lineReceiver); + } else { + this.startWriteUnstructedStorageFile(lineReceiver,generateEmptyFile); + } + } + + /** + * 单object写入 + * + * @param lineReceiver + */ + public void startWriteSingleObjectUnstructedStorageFile(RecordReceiver lineReceiver) { + + try { + Record record; + String currentObject = this.object; + List currentPartETags = new ArrayList(); + + //warn: may be StringBuffer->StringBuilder + StringWriter sw = new StringWriter(); + StringBuffer sb = sw.getBuffer(); + UnstructuredWriter unstructuredWriter = UnstructuredStorageWriterUtil. + produceUnstructuredWriter(this.fileFormat, this.writerSliceConfig, sw); + + while ((record = lineReceiver.getFromReader()) != null) { + //单文件同步暂不支持轮转[目前单文件支持同步约最大100GB大小] + if (OssSingleObject.currentPartNumber.intValue() > Constant.MAX_BLOCK_SIZE) { + throw DataXException.asDataXException(String.format("When writeSingleObject is true, the write size of your single object has exceeded the maximum value of %s MB.", + (Constant.MAX_BLOCK_SIZE * this.blockSizeInByte / 1024 / 1024))); + } + + // write: upload data to current object + UnstructuredStorageWriterUtil.transportOneRecord(record, + this.nullFormat, this.dateParse, + this.getTaskPluginCollector(), unstructuredWriter, this.byteEncoding); + + // 达到 this.blockSizeInByte ,上传文件块 + if (sb.length() >= this.blockSizeInByte) { + LOG.info(String + .format("write to bucket: [%s] object: [%s] with oss uploadId: [%s], currentPartNumber: %s", + this.bucket, currentObject, + this.uploadId, OssSingleObject.currentPartNumber.intValue())); + byte[] byteArray = sw.toString().getBytes(this.encoding); + this.ossWriterProxy.uploadOnePartForSingleObject(byteArray, this.uploadId, currentPartETags, currentObject, this::getHeaderBytes); + sb.setLength(0); + } + } + //将本task所有upload的part加入到allPartETags中 + OssSingleObject.allPartETags.addAll(currentPartETags); + + //将task未写完的最后一个block加入到 OssSingleObject.lastBlockBuffer 中,待job阶段合并上传 + if (sb.length() > 0) { + byte[] lastBlock = sw.toString().getBytes(this.encoding); + LOG.info("begin add last block to buffer, last block size: {}", lastBlock.length); + OssSingleObject.addLastBlockBuffer(lastBlock, this.ossWriterProxy, this.blockSizeInByte, this.object, this::getHeaderBytes); + } + } catch (IOException e) { + // 脏数据UnstructuredStorageWriterUtil.transportOneRecord已经记录,header + // 都是字符串不认为有脏数据 + throw DataXException.asDataXException( + OssWriterErrorCode.Write_OBJECT_ERROR, e.getMessage(), e); + } catch (Exception e) { + throw DataXException.asDataXException( + OssWriterErrorCode.Write_OBJECT_ERROR, e.getMessage(), e); + } + LOG.info("single oss object end do write"); + } + + private byte[] getHeaderBytes() throws IOException { + if (null != this.header && !this.header.isEmpty()) { + // write header to writer + try (StringWriter sw = new StringWriter(); + UnstructuredWriter headerWriter = UnstructuredStorageWriterUtil. + produceUnstructuredWriter(this.fileFormat, this.writerSliceConfig, sw)) { + headerWriter.writeOneRecord(this.header); + return sw.toString().getBytes(this.encoding); + } + } + return new byte[0]; + } + + /** + * 同步音视频等无结构化文件 + * warn: 代码和startWriteUnstructedStorageFile重复程度太高,后续需要继续重构 + */ + private void startWriteBinaryFile(RecordReceiver lineReceiver) { + Record record; + String currentObject = null; + InitiateMultipartUploadRequest currentInitiateMultipartUploadRequest; + InitiateMultipartUploadResult currentInitiateMultipartUploadResult = null; + String lastUploadId = null; + boolean gotData = false; + List currentPartETags = null; + int currentPartNumber = 1; + Map meta; + + ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream(); + long currentSize = 0; + try { + // warn + boolean needInitMultipartTransform = true; + while ((record = lineReceiver.getFromReader()) != null) { + Column column = record.getColumn(0); + meta = record.getMeta(); + assert meta != null; + gotData = true; + String objectNameTmp = meta + .get(com.alibaba.datax.plugin.unstructuredstorage.writer.Key.META_KEY_FILE_PATH); + String fullObjectNameTmp = String.format("%s%s", this.objectDir, objectNameTmp.substring(this.parentPathLength, objectNameTmp.length())); + + // init: 2 condition begin new multipart upload + if (needInitMultipartTransform || !StringUtils.equals(currentObject, fullObjectNameTmp)) { + // 先将上一个分块上传的request complete掉 + if (null != currentInitiateMultipartUploadResult) { + // 如果还有部分分库数据没有提交,则先提交 + if (currentSize > 0) { + this.ossWriterProxy.uploadOnePart(byteArrayOutputStream.toByteArray(), currentPartNumber, + currentInitiateMultipartUploadResult, currentPartETags, currentObject); + currentPartNumber++; + currentSize = 0; + byteArrayOutputStream.reset(); + } + // TODO 如果当前文件是空文件 + String commitKey = currentInitiateMultipartUploadResult.getKey(); + LOG.info(String.format( + "current object [%s] size %s, complete current multipart upload %s and begin new one", + commitKey, currentPartNumber * this.blockSizeInByte, + currentInitiateMultipartUploadResult.getUploadId())); + CompleteMultipartUploadRequest currentCompleteMultipartUploadRequest = new CompleteMultipartUploadRequest( + this.bucket, commitKey, currentInitiateMultipartUploadResult.getUploadId(), + currentPartETags); + CompleteMultipartUploadResult currentCompleteMultipartUploadResult = this.ossWriterProxy.completeMultipartUpload( + currentCompleteMultipartUploadRequest); + lastUploadId = currentInitiateMultipartUploadResult.getUploadId(); + LOG.info(String.format("final object [%s] etag is:[%s]", commitKey, + currentCompleteMultipartUploadResult.getETag())); + } + // 这里发现一个全新的文件需要分块上传 + currentObject = fullObjectNameTmp; + currentInitiateMultipartUploadRequest = this.ossWriterProxy.getInitiateMultipartUploadRequest(currentObject); + currentInitiateMultipartUploadResult = this.ossWriterProxy.initiateMultipartUpload( + currentInitiateMultipartUploadRequest); + currentPartETags = new ArrayList(); + LOG.info(String.format("write to bucket: [%s] object: [%s] with oss uploadId: [%s]", + this.bucket, currentObject, currentInitiateMultipartUploadResult.getUploadId())); + // warn + needInitMultipartTransform = false; + currentPartNumber = 1; + } + // write: upload data to current object + byte[] data; + if (column instanceof BytesColumn) { + data = column.asBytes(); + byteArrayOutputStream.write(data); + currentSize += data.length; + } else { + String message = "the type of column must be BytesColumn!"; + throw DataXException.asDataXException(OssWriterErrorCode.Write_OBJECT_ERROR, message); + } + if (currentSize >= this.blockSizeInByte) { + this.ossWriterProxy.uploadOnePart(byteArrayOutputStream.toByteArray(), currentPartNumber, + currentInitiateMultipartUploadResult, currentPartETags, currentObject); + currentPartNumber++; + currentSize = 0; + byteArrayOutputStream.reset(); + } + } + + // TODO binary 模式读取,源头为空文件时是有问题的 + if (!gotData) { + LOG.info("Receive no data from the source."); + currentInitiateMultipartUploadRequest = new InitiateMultipartUploadRequest(this.bucket, + currentObject); + currentInitiateMultipartUploadResult = this.ossWriterProxy.initiateMultipartUpload( + currentInitiateMultipartUploadRequest); + currentPartETags = new ArrayList(); + } + + // warn: may be some data stall in byteArrayOutputStream + if (byteArrayOutputStream.size() > 0) { + this.ossWriterProxy.uploadOnePart(byteArrayOutputStream.toByteArray(), currentPartNumber, + currentInitiateMultipartUploadResult, currentPartETags, currentObject); + currentPartNumber++; + } + + // 避免重复提交 + if (!StringUtils.equals(lastUploadId, currentInitiateMultipartUploadResult.getUploadId())) { + CompleteMultipartUploadRequest completeMultipartUploadRequest = new CompleteMultipartUploadRequest( + this.bucket, currentObject, currentInitiateMultipartUploadResult.getUploadId(), + currentPartETags); + CompleteMultipartUploadResult completeMultipartUploadResult = this.ossWriterProxy.completeMultipartUpload( + completeMultipartUploadRequest); + LOG.info(String.format("final object etag is:[%s]", completeMultipartUploadResult.getETag())); + } + } catch (IOException e) { + // 脏数据UnstructuredStorageWriterUtil.transportOneRecord已经记录,header + // 都是字符串不认为有脏数据 + throw DataXException.asDataXException(OssWriterErrorCode.Write_OBJECT_ERROR, e.getMessage(), e); + } catch (Exception e) { + throw DataXException.asDataXException(OssWriterErrorCode.Write_OBJECT_ERROR, e.getMessage(), e); + } + LOG.info("end do write"); + } + + /** + * 开始写半结构化文件 + * + * @param lineReceiver + */ + private void startWriteUnstructedStorageFile(RecordReceiver lineReceiver, boolean generateEmptyFile){ // 设置每块字符串长度 - final long partSize = 1024 * 1024 * 10L; - long numberCacul = (this.maxFileSize * 1024 * 1024L) / partSize; + long numberCacul = (this.maxFileSize * 1024 * 1024L) / this.blockSizeInByte; final long maxPartNumber = numberCacul >= 1 ? numberCacul : 1; int objectRollingNumber = 0; - //warn: may be StringBuffer->StringBuilder - StringWriter sw = new StringWriter(); - StringBuffer sb = sw.getBuffer(); - UnstructuredWriter unstructuredWriter = TextCsvWriterManager - .produceUnstructuredWriter(this.fileFormat, - this.fieldDelimiter, sw); - Record record = null; - - LOG.info(String.format( - "begin do write, each object maxFileSize: [%s]MB...", - maxPartNumber * 10)); + Record record; String currentObject = this.object; - InitiateMultipartUploadRequest currentInitiateMultipartUploadRequest = null; + if (this.isPeer2PeerCopyMode()) { + currentObject = null; + } else { + // 加上suffix + currentObject = appedSuffixTo(currentObject); + } + InitiateMultipartUploadRequest currentInitiateMultipartUploadRequest; InitiateMultipartUploadResult currentInitiateMultipartUploadResult = null; + String lastUploadId = null; boolean gotData = false; List currentPartETags = null; // to do: // 可以根据currentPartNumber做分块级别的重试,InitiateMultipartUploadRequest多次一个currentPartNumber会覆盖原有 int currentPartNumber = 1; + Map meta; + + //warn: may be StringBuffer->StringBuilder + StringWriter sw = new StringWriter(); + StringBuffer sb = sw.getBuffer(); + UnstructuredWriter unstructuredWriter = UnstructuredStorageWriterUtil. + produceUnstructuredWriter(this.fileFormat, this.writerSliceConfig, sw); + LOG.info(String.format( + "begin do write, each object maxFileSize: [%s]MB...", + maxPartNumber * 10)); try { - // warn + // warn 源头可能是MySQL中,导致没有meta这个第一次初始化标示省不掉 boolean needInitMultipartTransform = true; while ((record = lineReceiver.getFromReader()) != null) { + meta = record.getMeta(); gotData = true; - // init:begin new multipart upload - if (needInitMultipartTransform) { - if (objectRollingNumber == 0) { - if (StringUtils.isBlank(this.suffix)) { - currentObject = this.object; - } else { - currentObject = String.format("%s%s", - this.object, this.suffix); - } - } else { - // currentObject is like(no suffix) - // myfile__9b886b70fbef11e59a3600163e00068c_1 - if (StringUtils.isBlank(this.suffix)) { - currentObject = String.format("%s_%s", - this.object, objectRollingNumber); - } else { - // or with suffix - // myfile__9b886b70fbef11e59a3600163e00068c_1.csv - currentObject = String.format("%s_%s%s", - this.object, objectRollingNumber, - this.suffix); - } + // init: 2 condition begin new multipart upload 轮转策略(文件名规则)不一致 + // condition: 对等拷贝模式 && Record中的Meta切换文件名 && + // condition: 类log4j日志轮转 && !对等拷贝模式 + boolean realyNeedInitUploadRequest = false; + if (this.isPeer2PeerCopyMode()) { + assert meta != null; + String objectNameTmp = meta + .get(com.alibaba.datax.plugin.unstructuredstorage.writer.Key.META_KEY_FILE_PATH); + String fullObjectNameTmp = String.format("%s%s", this.objectDir, objectNameTmp.substring(this.parentPathLength, objectNameTmp.length())); + if (!StringUtils.equals(currentObject, fullObjectNameTmp)) { + currentObject = fullObjectNameTmp; + realyNeedInitUploadRequest = true; } - objectRollingNumber++; - currentInitiateMultipartUploadRequest = new InitiateMultipartUploadRequest( - this.bucket, currentObject); - currentInitiateMultipartUploadResult = this.ossClient - .initiateMultipartUpload(currentInitiateMultipartUploadRequest); + } else { + if (needInitMultipartTransform || currentPartNumber > maxPartNumber) { + currentObject = getCurrentObject(objectRollingNumber, record); + objectRollingNumber++; + realyNeedInitUploadRequest = true; + } + } + + if (realyNeedInitUploadRequest) { + // 先将上一个分块上传的request complete掉 + if (null != currentInitiateMultipartUploadResult) { + if (sb.length() > 0) { + this.uploadOnePart(sw, currentPartNumber, currentInitiateMultipartUploadResult, + currentPartETags, currentObject); + currentPartNumber++; + sb.setLength(0); + } + // TODO 如果当前文件是空文件 + String commitKey = currentInitiateMultipartUploadResult.getKey(); + LOG.info(String.format( + "current object [%s] size %s, complete current multipart upload %s and begin new one", + commitKey, currentPartNumber * this.blockSizeInByte, + currentInitiateMultipartUploadResult.getUploadId())); + CompleteMultipartUploadRequest currentCompleteMultipartUploadRequest = new CompleteMultipartUploadRequest( + this.bucket, commitKey, currentInitiateMultipartUploadResult.getUploadId(), + currentPartETags); + CompleteMultipartUploadResult currentCompleteMultipartUploadResult = this.ossWriterProxy.completeMultipartUpload( + currentCompleteMultipartUploadRequest); + lastUploadId = currentInitiateMultipartUploadResult.getUploadId(); + LOG.info(String.format("final object [%s] etag is:[%s]", commitKey, + currentCompleteMultipartUploadResult.getETag())); + } + + currentInitiateMultipartUploadRequest = this.ossWriterProxy.getInitiateMultipartUploadRequest(currentObject); + currentInitiateMultipartUploadResult = this.ossWriterProxy.initiateMultipartUpload(currentInitiateMultipartUploadRequest); currentPartETags = new ArrayList(); LOG.info(String .format("write to bucket: [%s] object: [%s] with oss uploadId: [%s]", @@ -365,43 +1014,22 @@ public class OssWriter extends Writer { // write: upload data to current object UnstructuredStorageWriterUtil.transportOneRecord(record, this.nullFormat, this.dateParse, - this.getTaskPluginCollector(), unstructuredWriter); + this.getTaskPluginCollector(), unstructuredWriter, this.byteEncoding); - if (sb.length() >= partSize) { + if (sb.length() >= this.blockSizeInByte) { this.uploadOnePart(sw, currentPartNumber, currentInitiateMultipartUploadResult, currentPartETags, currentObject); currentPartNumber++; sb.setLength(0); } - - // save: end current multipart upload - if (currentPartNumber > maxPartNumber) { - LOG.info(String - .format("current object [%s] size > %s, complete current multipart upload and begin new one", - currentObject, currentPartNumber - * partSize)); - CompleteMultipartUploadRequest currentCompleteMultipartUploadRequest = new CompleteMultipartUploadRequest( - this.bucket, currentObject, - currentInitiateMultipartUploadResult - .getUploadId(), currentPartETags); - CompleteMultipartUploadResult currentCompleteMultipartUploadResult = this.ossClient - .completeMultipartUpload(currentCompleteMultipartUploadRequest); - LOG.info(String.format( - "final object [%s] etag is:[%s]", - currentObject, - currentCompleteMultipartUploadResult.getETag())); - // warn - needInitMultipartTransform = true; - } } if (!gotData) { LOG.info("Receive no data from the source."); currentInitiateMultipartUploadRequest = new InitiateMultipartUploadRequest( this.bucket, currentObject); - currentInitiateMultipartUploadResult = this.ossClient - .initiateMultipartUpload(currentInitiateMultipartUploadRequest); + currentInitiateMultipartUploadResult = this.ossWriterProxy.initiateMultipartUpload(currentInitiateMultipartUploadRequest); currentPartETags = new ArrayList(); // each object's header if (null != this.header && !this.header.isEmpty()) { @@ -414,81 +1042,145 @@ public class OssWriter extends Writer { currentInitiateMultipartUploadResult, currentPartETags, currentObject); } - CompleteMultipartUploadRequest completeMultipartUploadRequest = new CompleteMultipartUploadRequest( - this.bucket, currentObject, - currentInitiateMultipartUploadResult.getUploadId(), - currentPartETags); - CompleteMultipartUploadResult completeMultipartUploadResult = this.ossClient - .completeMultipartUpload(completeMultipartUploadRequest); - LOG.info(String.format("final object etag is:[%s]", - completeMultipartUploadResult.getETag())); + + // 避免重复提交 + if (!StringUtils.equals(lastUploadId, currentInitiateMultipartUploadResult.getUploadId())) { + CompleteMultipartUploadRequest completeMultipartUploadRequest = new CompleteMultipartUploadRequest( + this.bucket, currentObject, + currentInitiateMultipartUploadResult.getUploadId(), + currentPartETags); + if (gotData) { + completeUpload(completeMultipartUploadRequest); + } else{ + if (generateEmptyFile) { + LOG.info("Due to without data, oss will generate empty file, " + + "the generateEmptyFile is {}, you can set it false to avoid this",generateEmptyFile); + completeUpload(completeMultipartUploadRequest); + } else { + LOG.info("The generateEmptyFile is false, datax will not generate empty file"); + } + } + } } catch (IOException e) { // 脏数据UnstructuredStorageWriterUtil.transportOneRecord已经记录,header // 都是字符串不认为有脏数据 throw DataXException.asDataXException( - OssWriterErrorCode.Write_OBJECT_ERROR, e.getMessage()); + OssWriterErrorCode.Write_OBJECT_ERROR, e.getMessage(), e); } catch (Exception e) { throw DataXException.asDataXException( - OssWriterErrorCode.Write_OBJECT_ERROR, e.getMessage()); + OssWriterErrorCode.Write_OBJECT_ERROR, e.getMessage(), e); } LOG.info("end do write"); } + private void completeUpload(CompleteMultipartUploadRequest completeMultipartUploadRequest) throws Exception { + CompleteMultipartUploadResult completeMultipartUploadResult = this.ossWriterProxy.completeMultipartUpload(completeMultipartUploadRequest); + LOG.info(String.format("final object etag is:[%s]", + completeMultipartUploadResult.getETag())); + } + + + private String getCurrentObject(int objectRollingNumber, Record record) { + String currentObject = this.object; + + if (!this.partition.isEmpty()) { + String partitionValues = getPartitionValues(record); + currentObject = String.format("%s_%s", currentObject, partitionValues); + } + + if (objectRollingNumber > 0) { + currentObject = String.format("%s_%s", currentObject, objectRollingNumber); + } + + currentObject = appedSuffixTo(currentObject); + + return currentObject; + } + + private String getPartitionValues(Record record) { + // config like "partition": "ds,venture" + String partitionValues = ""; + // assume that partition columns are located in the last of order + for (int i = 0; i < this.partition.size(); i++) { + partitionValues += record.getColumn(record.getColumnNumber() - 1 - i).asString(); + } + return partitionValues; + } + + private String appedSuffixTo(String currentObject) { + StringBuilder sbCurrentObject = new StringBuilder(currentObject); + + if (StringUtils.isNotBlank(this.suffix)) { + if (!this.suffix.startsWith(".")) { + sbCurrentObject.append("."); + } + sbCurrentObject.append(suffix); + } + + return sbCurrentObject.toString(); + } + /** * 对于同一个UploadID,该号码不但唯一标识这一块数据,也标识了这块数据在整个文件内的相对位置。 * 如果你用同一个part号码,上传了新的数据,那么OSS上已有的这个号码的Part数据将被覆盖。 - * + * * @throws Exception * */ private void uploadOnePart( final StringWriter sw, final int partNumber, - final InitiateMultipartUploadResult initiateMultipartUploadResult, + final InitiateMultipartUploadResult currentInitiateMultipartUploadResult, final List partETags, final String currentObject) throws Exception { final String encoding = this.encoding; - final String bucket = this.bucket; - final OSSClient ossClient = this.ossClient; - RetryUtil.executeWithRetry(new Callable() { - @Override - public Boolean call() throws Exception { - byte[] byteArray = sw.toString().getBytes(encoding); - InputStream inputStream = new ByteArrayInputStream( - byteArray); - // 创建UploadPartRequest,上传分块 - UploadPartRequest uploadPartRequest = new UploadPartRequest(); - uploadPartRequest.setBucketName(bucket); - uploadPartRequest.setKey(currentObject); - uploadPartRequest.setUploadId(initiateMultipartUploadResult - .getUploadId()); - uploadPartRequest.setInputStream(inputStream); - uploadPartRequest.setPartSize(byteArray.length); - uploadPartRequest.setPartNumber(partNumber); - UploadPartResult uploadPartResult = ossClient - .uploadPart(uploadPartRequest); - partETags.add(uploadPartResult.getPartETag()); - LOG.info(String - .format("upload part [%s] size [%s] Byte has been completed.", - partNumber, byteArray.length)); - IOUtils.closeQuietly(inputStream); - return true; - } - }, 3, 1000L, false); + final byte[] byteArray = sw.toString().getBytes(encoding); + this.ossWriterProxy.uploadOnePart(byteArray, partNumber, currentInitiateMultipartUploadResult, partETags, currentObject); } @Override public void prepare() { - + if(useHdfsWriterProxy){ + hdfsWriterTask.prepare(); + return; + } } @Override public void post() { - + if(useHdfsWriterProxy){ + hdfsWriterTask.post(); + return; + } } @Override public void destroy() { + if(useHdfsWriterProxy){ + hdfsWriterTask.destroy(); + return; + } + try { + // this.ossClient.shutdown(); + } catch (Exception e) { + LOG.warn("shutdown ossclient meet a exception:" + e.getMessage(), e); + } + } + private boolean isPeer2PeerCopyMode() { + return this.isBinaryFile + || com.alibaba.datax.plugin.unstructuredstorage.writer.Constant.SYNC_MODE_VALUE_COPY + .equalsIgnoreCase(this.syncMode); + } + + private String getObjectDir(String object) { + String dir = null; + if (StringUtils.isBlank(object)) { + dir = ""; + } else { + dir = object.trim(); + dir = dir.endsWith("/") ? dir : String.format("%s/", dir); + } + return dir; } } } diff --git a/osswriter/src/main/java/com/alibaba/datax/plugin/writer/osswriter/OssWriterProxy.java b/osswriter/src/main/java/com/alibaba/datax/plugin/writer/osswriter/OssWriterProxy.java new file mode 100644 index 00000000..45516f73 --- /dev/null +++ b/osswriter/src/main/java/com/alibaba/datax/plugin/writer/osswriter/OssWriterProxy.java @@ -0,0 +1,171 @@ +package com.alibaba.datax.plugin.writer.osswriter; + +import com.alibaba.datax.common.util.Configuration; +import com.alibaba.datax.common.util.RetryUtil; +import com.aliyun.oss.OSSClient; +import com.aliyun.oss.model.*; +import org.apache.commons.io.IOUtils; +import org.apache.commons.lang3.ArrayUtils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.ByteArrayInputStream; +import java.io.InputStream; +import java.util.List; +import java.util.concurrent.Callable; + +/** + * @Author: guxuan + * @Date 2022-05-17 16:29 + */ +public class OssWriterProxy { + private static Logger logger = LoggerFactory.getLogger(OssWriterProxy.class); + + private OSSClient ossClient; + private Configuration configuration; + /** + * 是否在服务器端进行加密存储 + */ + private Boolean encrypt; + private String bucket; + + + public OssWriterProxy (Configuration configuration, OSSClient ossClient) { + this.configuration = configuration; + this.ossClient = ossClient; + this.encrypt = configuration.getBool(Key.ENCRYPT, false); + this.bucket = configuration.getString(Key.BUCKET); + } + + public InitiateMultipartUploadRequest getInitiateMultipartUploadRequest(String currentObject){ + InitiateMultipartUploadRequest currentInitiateMultipartUploadRequest; + if( !this.encrypt ) { + currentInitiateMultipartUploadRequest = new InitiateMultipartUploadRequest( + this.bucket, currentObject); + } else { + // 将数据加密存储在oss + ObjectMetadata objectMetadata = new ObjectMetadata(); + objectMetadata.setHeader("x-oss-server-side-encryption", + ObjectMetadata.AES_256_SERVER_SIDE_ENCRYPTION); + currentInitiateMultipartUploadRequest = new InitiateMultipartUploadRequest( + this.bucket, currentObject, objectMetadata); + } + return currentInitiateMultipartUploadRequest; + } + + public InitiateMultipartUploadResult initiateMultipartUpload( + final InitiateMultipartUploadRequest currentInitiateMultipartUploadRequest) throws Exception { + final OSSClient ossClient = this.ossClient; + return RetryUtil.executeWithRetry(new Callable() { + @Override + public InitiateMultipartUploadResult call() throws Exception { + return ossClient.initiateMultipartUpload(currentInitiateMultipartUploadRequest); + } + }, 10, 1000L, false); + } + + public CompleteMultipartUploadResult completeMultipartUpload( + final CompleteMultipartUploadRequest currentCompleteMultipartUploadRequest) throws Exception { + + final OSSClient ossClient = this.ossClient; + return RetryUtil.executeWithRetry(new Callable() { + @Override + public CompleteMultipartUploadResult call() throws Exception { + return ossClient.completeMultipartUpload(currentCompleteMultipartUploadRequest); + } + }, 10, 1000L, false); + } + + public void uploadOnePart( + final byte[] byteArray, + final int partNumber, + final InitiateMultipartUploadResult currentInitiateMultipartUploadResult, + final List partETags, + final String currentObject) + throws Exception { + final String bucket = this.bucket; + final OSSClient ossClient = this.ossClient; + RetryUtil.executeWithRetry(new Callable() { + @Override + public Boolean call() throws Exception { + InputStream inputStream = new ByteArrayInputStream( + byteArray); + // 创建UploadPartRequest,上传分块 + UploadPartRequest uploadPartRequest = new UploadPartRequest(); + uploadPartRequest.setBucketName(bucket); + uploadPartRequest.setKey(currentObject); + uploadPartRequest.setUploadId(currentInitiateMultipartUploadResult.getUploadId()); + uploadPartRequest.setInputStream(inputStream); + uploadPartRequest.setPartSize(byteArray.length); + uploadPartRequest.setPartNumber(partNumber); + UploadPartResult uploadPartResult = ossClient + .uploadPart(uploadPartRequest); + partETags.add(uploadPartResult.getPartETag()); + logger.info(String + .format("upload part [%s] size [%s] Byte has been completed.", + partNumber, byteArray.length)); + IOUtils.closeQuietly(inputStream); + return true; + } + }, 10, 1000L, false); + } + + public void abortMultipartUpload(final String currentObject, final String uploadId) { + final String bucket = this.bucket; + final OSSClient ossClient = this.ossClient; + try { + RetryUtil.executeWithRetry((Callable) () -> { + AbortMultipartUploadRequest abortMultipartUploadRequest = + new AbortMultipartUploadRequest(bucket, currentObject, uploadId); + ossClient.abortMultipartUpload(abortMultipartUploadRequest); + return null; + }, 5, 1, true); + } catch (Throwable e) { + logger.error(String.format("AbortMultipartUpload failed, msg is %s",e.getMessage()), e); + } + } + + public void uploadOnePartForSingleObject( + final byte[] byteArray, + final String uploadId, + final List partETags, + final String currentObject, + final HeaderProvider headerProvider) + throws Exception { + final String bucket = this.bucket; + final OSSClient ossClient = this.ossClient; + RetryUtil.executeWithRetry(new Callable() { + @Override + public Boolean call() throws Exception { + // 创建UploadPartRequest,上传分块 + UploadPartRequest uploadPartRequest = new UploadPartRequest(); + uploadPartRequest.setPartNumber(OssSingleObject.currentPartNumber.getAndIncrement()); + byte[] data = byteArray; + if (uploadPartRequest.getPartNumber() == 1) { + // write header + byte[] headerBytes = headerProvider.getHeader(); + logger.info("write header to part {}. header size: {}", + uploadPartRequest.getPartNumber(), ArrayUtils.getLength(headerBytes)); + data = ArrayUtils.addAll(headerBytes, byteArray); + } + ByteArrayInputStream inputStream = new ByteArrayInputStream(data); + uploadPartRequest.setBucketName(bucket); + uploadPartRequest.setKey(currentObject); + uploadPartRequest.setUploadId(uploadId); + uploadPartRequest.setInputStream(inputStream); + uploadPartRequest.setPartSize(data.length); + UploadPartResult uploadPartResult = ossClient + .uploadPart(uploadPartRequest); + partETags.add(uploadPartResult.getPartETag()); + logger.info("upload part number [{}] size [{}] Byte has been completed, uploadId: {}.", + uploadPartRequest.getPartNumber(), data.length, uploadId); + IOUtils.closeQuietly(inputStream); + return true; + } + }, 10, 1000L, false); + } + + public interface HeaderProvider { + byte[] getHeader() throws Exception; + } +} diff --git a/osswriter/src/main/java/com/alibaba/datax/plugin/writer/osswriter/parquet/ParquetFileProccessor.java b/osswriter/src/main/java/com/alibaba/datax/plugin/writer/osswriter/parquet/ParquetFileProccessor.java new file mode 100644 index 00000000..c88a6f10 --- /dev/null +++ b/osswriter/src/main/java/com/alibaba/datax/plugin/writer/osswriter/parquet/ParquetFileProccessor.java @@ -0,0 +1,49 @@ +package com.alibaba.datax.plugin.writer.osswriter.parquet; + +import org.apache.hadoop.fs.Path; + +import com.alibaba.datax.common.element.Record; +import com.alibaba.datax.common.plugin.TaskPluginCollector; +import com.alibaba.datax.common.util.Configuration; + +import parquet.hadoop.ParquetWriter; +import parquet.hadoop.metadata.CompressionCodecName; +import parquet.schema.MessageType; + +import java.io.IOException; + +/** + * @Author: guxuan + * @Date 2022-05-17 16:23 + */ +public class ParquetFileProccessor extends ParquetWriter { + private Path path; + + public ParquetFileProccessor(Path path, MessageType schema, Configuration taskConfig, + TaskPluginCollector taskPluginCollector) throws IOException { + this(path, schema, false, taskConfig, taskPluginCollector); + this.path = path; + } + + public ParquetFileProccessor(Path path, MessageType schema, boolean enableDictionary, Configuration taskConfig, + TaskPluginCollector taskPluginCollector) throws IOException { + this(path, schema, CompressionCodecName.UNCOMPRESSED, enableDictionary, taskConfig, taskPluginCollector); + this.path = path; + } + + public ParquetFileProccessor(Path path, MessageType schema, CompressionCodecName codecName, + boolean enableDictionary, Configuration taskConfig, TaskPluginCollector taskPluginCollector) + throws IOException { + super(path, new ParquetFileSupport(schema, taskConfig, taskPluginCollector), codecName, DEFAULT_BLOCK_SIZE, + DEFAULT_PAGE_SIZE, enableDictionary, false); + this.path = path; + } + + public byte[] getParquetRawData() { + if (null == this.path) { + return null; + } else { + return null; + } + } +} diff --git a/osswriter/src/main/java/com/alibaba/datax/plugin/writer/osswriter/parquet/ParquetFileSupport.java b/osswriter/src/main/java/com/alibaba/datax/plugin/writer/osswriter/parquet/ParquetFileSupport.java new file mode 100644 index 00000000..c3ff777c --- /dev/null +++ b/osswriter/src/main/java/com/alibaba/datax/plugin/writer/osswriter/parquet/ParquetFileSupport.java @@ -0,0 +1,355 @@ +package com.alibaba.datax.plugin.writer.osswriter.parquet; + +import com.alibaba.datax.common.element.Column; +import com.alibaba.datax.common.element.Record; +import com.alibaba.datax.common.plugin.TaskPluginCollector; +import com.alibaba.datax.plugin.unstructuredstorage.writer.Key; +import com.alibaba.datax.plugin.writer.osswriter.Constant; +import com.alibaba.fastjson2.JSON; +import com.alibaba.fastjson2.JSONArray; +import com.alibaba.fastjson2.JSONObject; +import org.apache.commons.lang3.StringUtils; +import org.apache.hadoop.conf.Configuration; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import parquet.hadoop.api.WriteSupport; +import parquet.io.api.Binary; +import parquet.io.api.RecordConsumer; +import parquet.schema.*; + +import java.text.DateFormat; +import java.text.SimpleDateFormat; +import java.util.HashMap; +import java.util.List; + +/** + * @Author: guxuan + * @Date 2022-05-17 16:25 + */ +public class ParquetFileSupport extends WriteSupport { + public static final Logger LOGGER = LoggerFactory.getLogger(ParquetFileSupport.class); + private MessageType schema; + private RecordConsumer recordConsumer; + private boolean printStackTrace = true; + + // 不通类型的nullFormat + private String nullFormat; + + private String dateFormat; + private DateFormat dateParse; + private Binary binaryForNull; + private TaskPluginCollector taskPluginCollector; + + public ParquetFileSupport(MessageType schema, com.alibaba.datax.common.util.Configuration taskConfig, TaskPluginCollector taskPluginCollector) { + this.schema = schema; + // 不通类型的nullFormat + this.nullFormat = taskConfig.getString(Key.NULL_FORMAT, Constant.DEFAULT_NULL_FORMAT); + this.binaryForNull = Binary.fromString(this.nullFormat); + + this.dateFormat = taskConfig.getString(Key.DATE_FORMAT, null); + if (StringUtils.isNotBlank(this.dateFormat)) { + this.dateParse = new SimpleDateFormat(dateFormat); + } + + this.taskPluginCollector = taskPluginCollector; + } + + @Override + public WriteContext init(Configuration configuration) { + return new WriteContext(schema, new HashMap()); + } + + @Override + public void prepareForWrite(RecordConsumer recordConsumer) { + this.recordConsumer = recordConsumer; + } + + @Override + public void write(Record values) { + LOGGER.info("Writing parquet data using fields mode(The correct mode.)"); + List types = this.schema.getFields(); + if (values != null && types != null && values.getColumnNumber() == types.size()) { + recordConsumer.startMessage(); + writeFields(types, values); + recordConsumer.endMessage(); + } + } + + private void writeFields(List types, Record values) { + for (int i = 0; i < types.size(); i++) { + Type type = types.get(i); + Column value = values.getColumn(i); + if (value != null) { + try { + if (type.isPrimitive()) { + writePrimitiveType(type, value, i); + } else { + writeGroupType(type, (JSON) JSON.parse(value.asString()), i); + } + } catch (Exception e) { + if (printStackTrace) { + printStackTrace = false; + LOGGER.warn("write to parquet error: {}", e.getMessage(), e); + } + // dirty data + if (null != this.taskPluginCollector) { + // job post 里面的merge taskPluginCollector 为null + this.taskPluginCollector.collectDirtyRecord(values, e, e.getMessage()); + } + } + } + } + } + + private void writeFields(List types, JSONObject values) { + for (int i = 0; i < types.size(); i++) { + Type type = types.get(i); + Object value = values.get(type.getName()); + + if (value != null) { + try { + if (type.isPrimitive()) { + writePrimitiveType(type, value, i); + } else { + writeGroupType(type, (JSON) value, i); + } + } catch (Exception e) { + if (printStackTrace) { + printStackTrace = false; + LOGGER.warn("write to parquet error: {}", e.getMessage(), e); + } + } + } else { + recordConsumer.addBinary(this.binaryForNull); + } + } + } + + private void writeGroupType(Type type, JSON value, int index) { + GroupType groupType = type.asGroupType(); + OriginalType originalType = groupType.getOriginalType(); + if (originalType != null) { + switch (originalType) { + case MAP: + writeMap(groupType, value, index); + break; + case LIST: + writeList(groupType, value, index); + break; + default: + break; + } + } else { + // struct + writeStruct(groupType, value, index); + } + } + + private void writeMap(GroupType groupType, JSON value, int index) { + if (value == null) { + return; + } + + JSONObject json = (JSONObject) value; + + if (json.isEmpty()) { + return; + } + + recordConsumer.startField(groupType.getName(), index); + + recordConsumer.startGroup(); + + // map + // key_value start + recordConsumer.startField("key_value", 0); + recordConsumer.startGroup(); + + List keyValueFields = groupType.getFields().get(0).asGroupType().getFields(); + Type keyType = keyValueFields.get(0); + Type valueType = keyValueFields.get(1); + for (String key : json.keySet()) { + // key + writePrimitiveType(keyType, key, 0); + + // value + if (valueType.isPrimitive()) { + writePrimitiveType(valueType, json.get(key), 1); + } else { + writeGroupType(valueType, (JSON) json.get(key), 1); + } + } + + recordConsumer.endGroup(); + recordConsumer.endField("key_value", 0); + // key_value end + + recordConsumer.endGroup(); + recordConsumer.endField(groupType.getName(), index); + } + + private void writeList(GroupType groupType, JSON value, int index) { + if (value == null) { + return; + } + + JSONArray json = (JSONArray) value; + + if (json.isEmpty()) { + return; + } + + recordConsumer.startField(groupType.getName(), index); + // list + recordConsumer.startGroup(); + + + // list start + recordConsumer.startField("list", 0); + recordConsumer.startGroup(); + + Type elementType = groupType.getFields().get(0).asGroupType().getFields().get(0); + + if (elementType.isPrimitive()) { + for (Object elementValue : json) { + writePrimitiveType(elementType, elementValue, 0); + } + } else { + for (Object elementValue : json) { + writeGroupType(elementType, (JSON) elementValue, 0); + } + } + + recordConsumer.endGroup(); + recordConsumer.endField("list", 0); + // list end + recordConsumer.endGroup(); + + recordConsumer.endField(groupType.getName(), index); + } + + private void writeStruct(GroupType groupType, JSON value, int index) { + if (value == null) { + return; + } + JSONObject json = (JSONObject) value; + if (json.isEmpty()) { + return; + } + + recordConsumer.startField(groupType.getName(), index); + // struct start + recordConsumer.startGroup(); + + writeFields(groupType.getFields(), json); + recordConsumer.endGroup(); + // struct end + recordConsumer.endField(groupType.getName(), index); + } + + private void writePrimitiveType(Type type, Object value, int index) { + if (value == null) { + return; + } + + recordConsumer.startField(type.getName(), index); + PrimitiveType primitiveType = type.asPrimitiveType(); + + switch (primitiveType.getPrimitiveTypeName()) { + case BOOLEAN: + recordConsumer.addBoolean((Boolean) value); + break; + case FLOAT: + if (value instanceof Float) { + recordConsumer.addFloat(((Float) value).floatValue()); + } else if (value instanceof Double) { + recordConsumer.addFloat(((Double) value).floatValue()); + } else if (value instanceof Long) { + recordConsumer.addFloat(((Long) value).floatValue()); + } else if (value instanceof Integer) { + recordConsumer.addFloat(((Integer) value).floatValue()); + } + break; + case DOUBLE: + if (value instanceof Float) { + recordConsumer.addDouble(((Float) value).doubleValue()); + } else if (value instanceof Double) { + recordConsumer.addDouble(((Double) value).doubleValue()); + } else if (value instanceof Long) { + recordConsumer.addDouble(((Long) value).doubleValue()); + } else if (value instanceof Integer) { + recordConsumer.addDouble(((Integer) value).doubleValue()); + } + break; + case INT32: + if (value instanceof Integer) { + recordConsumer.addInteger((Integer) value); + } else if (value instanceof Long) { + recordConsumer.addInteger(((Long) value).intValue()); + } else { + new IllegalArgumentException( + String.format("Invalid value: %s(clazz: %s) for field: %s", value, value.getClass(), type.getName()) + ); + } + break; + case INT64: + case INT96: + if (value instanceof Integer) { + recordConsumer.addLong(((Integer) value).longValue()); + } else if (value instanceof Long) { + recordConsumer.addInteger(((Long) value).intValue()); + } else { + new IllegalArgumentException( + String.format("Invalid value: %s(clazz: %s) for field: %s", value, value.getClass(), type.getName()) + ); + } + break; + case BINARY: + default: + recordConsumer.addBinary(Binary.fromString((String) value)); + break; + } + recordConsumer.endField(type.getName(), index); + } + + private void writePrimitiveType(Type type, Column value, int index) { + if (value == null || value.getRawData() == null) { + return; + } + + recordConsumer.startField(type.getName(), index); + PrimitiveType primitiveType = type.asPrimitiveType(); + switch (primitiveType.getPrimitiveTypeName()) { + case BOOLEAN: + recordConsumer.addBoolean(value.asBoolean()); + break; + case FLOAT: + recordConsumer.addFloat(value.asDouble().floatValue()); + break; + case DOUBLE: + recordConsumer.addDouble(value.asDouble()); + break; + case INT32: + recordConsumer.addInteger(value.asLong().intValue()); + break; + case INT64: + case INT96: + recordConsumer.addLong(value.asLong()); + break; + case BINARY: + String valueAsString2Write = null; + if (Column.Type.DATE == value.getType() && null != this.dateParse) { + valueAsString2Write = dateParse.format(value.asDate()); + } + else { + valueAsString2Write = value.asString(); + } + recordConsumer.addBinary(Binary.fromString(valueAsString2Write)); + break; + default: + recordConsumer.addBinary(Binary.fromString(value.asString())); + break; + } + recordConsumer.endField(type.getName(), index); + } +} diff --git a/osswriter/src/main/java/com/alibaba/datax/plugin/writer/osswriter/util/HandlerUtil.java b/osswriter/src/main/java/com/alibaba/datax/plugin/writer/osswriter/util/HandlerUtil.java new file mode 100644 index 00000000..488c119c --- /dev/null +++ b/osswriter/src/main/java/com/alibaba/datax/plugin/writer/osswriter/util/HandlerUtil.java @@ -0,0 +1,38 @@ +package com.alibaba.datax.plugin.writer.osswriter.util; + +import com.alibaba.datax.common.util.Configuration; +import com.alibaba.datax.core.util.container.CoreConstant; +import com.alibaba.datax.plugin.writer.osswriter.Key; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * @Author: guxuan + * @Date 2022-05-17 16:35 + */ +public class HandlerUtil { + + private static final Logger LOG = LoggerFactory.getLogger(HandlerUtil.class); + + /** + * 将configuration处理成 ODPS->OSS的 config + * + * @param jobConfiguration + */ + public static void preHandler(Configuration jobConfiguration) { + LOG.info("================ OssWriter Phase 1 preHandler starting... ================ "); + Configuration writerOriginPluginConf = jobConfiguration.getConfiguration( + CoreConstant.DATAX_JOB_CONTENT_WRITER_PARAMETER); + Configuration writerOssPluginConf = writerOriginPluginConf.getConfiguration(Key.OSS_CONFIG); + Configuration newWriterPluginConf = Configuration.newDefault(); + jobConfiguration.remove(CoreConstant.DATAX_JOB_CONTENT_WRITER_PARAMETER); + //将postgresqlwriter的pg配置注入到postgresqlConfig中, 供后面的postHandler使用 + writerOriginPluginConf.remove(Key.OSS_CONFIG); + newWriterPluginConf.set(Key.POSTGRESQL_CONFIG, writerOriginPluginConf); + newWriterPluginConf.merge(writerOssPluginConf, true); + //设置writer的名称为osswriter + jobConfiguration.set(CoreConstant.DATAX_JOB_CONTENT_WRITER_NAME, "osswriter"); + jobConfiguration.set(CoreConstant.DATAX_JOB_CONTENT_WRITER_PARAMETER, newWriterPluginConf); + LOG.info("================ OssWriter Phase 1 preHandler end... ================ "); + } +} diff --git a/osswriter/src/main/java/com/alibaba/datax/plugin/writer/osswriter/util/HdfsParquetUtil.java b/osswriter/src/main/java/com/alibaba/datax/plugin/writer/osswriter/util/HdfsParquetUtil.java new file mode 100644 index 00000000..dc102dac --- /dev/null +++ b/osswriter/src/main/java/com/alibaba/datax/plugin/writer/osswriter/util/HdfsParquetUtil.java @@ -0,0 +1,145 @@ +package com.alibaba.datax.plugin.writer.osswriter.util; + +import com.alibaba.datax.common.util.Configuration; +import com.alibaba.datax.plugin.writer.hdfswriter.HdfsWriter; +import com.alibaba.datax.plugin.writer.osswriter.Key; +import com.alibaba.fastjson2.JSON; +import com.alibaba.fastjson2.JSONObject; +import org.apache.commons.lang3.StringUtils; +import org.apache.commons.lang3.Validate; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.aliyun.oss.AliyunOSSFileSystem; +import org.apache.hadoop.mapred.JobConf; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.util.HashMap; +import java.util.Map; + +/** + * @Author: guxuan + * @Date 2022-05-17 16:35 + */ +public class HdfsParquetUtil { + + + private static final Logger logger = LoggerFactory.getLogger(HdfsParquetUtil.class); + + public static boolean isUseHdfsWriterProxy( String fileFormat){ + if("orc".equalsIgnoreCase(fileFormat) || "parquet".equalsIgnoreCase(fileFormat)){ + return true; + } + return false; + } + + /** + * 配置writerSliceConfig 适配hdfswriter写oss parquet + * https://help.aliyun.com/knowledge_detail/74344.html + * @param hdfsWriterJob + * @param writerSliceConfig + */ + public static void adaptConfiguration(HdfsWriter.Job hdfsWriterJob, Configuration writerSliceConfig){ + String fileFormat = writerSliceConfig.getString( + com.alibaba.datax.plugin.unstructuredstorage.writer.Key.FILE_FORMAT, + com.alibaba.datax.plugin.unstructuredstorage.writer.Constant.FILE_FORMAT_TEXT); + + String bucket = writerSliceConfig.getString(Key.BUCKET); + String fs =String.format("oss://%s",bucket); + writerSliceConfig.set(com.alibaba.datax.plugin.writer.hdfswriter.Key.DEFAULT_FS,fs); + writerSliceConfig.set(com.alibaba.datax.plugin.writer.hdfswriter.Key.FILE_TYPE, + writerSliceConfig.getString(com.alibaba.datax.plugin.unstructuredstorage.writer.Key.FILE_FORMAT)); + + /** + * "writeMode"、 "compress"、"encoding" 、path、fileName 相互一致 + */ + JSONObject hadoopConfig = new JSONObject(); + hadoopConfig.put(Key.FS_OSS_ACCESSID,writerSliceConfig.getString(Key.ACCESSID)); + hadoopConfig.put(Key.FS_OSS_ACCESSKEY,writerSliceConfig.getString(Key.ACCESSKEY)); + hadoopConfig.put(Key.FS_OSS_ENDPOINT,writerSliceConfig.getString(Key.ENDPOINT)); + writerSliceConfig.set(Key.HDOOP_CONFIG,Configuration.from(JSON.toJSONString(hadoopConfig))); + + String object = writerSliceConfig.getString(Key.OBJECT); + String path = writerSliceConfig.getString(Key.PATH); + String fielName = writerSliceConfig.getString(Key.FILE_NAME); + + if (StringUtils.isNotBlank(object) && (StringUtils.isNotBlank(path) || StringUtils.isNotBlank(fielName))) { + logger.warn("You configure both the \"object\" property and the \"path\" or \"fileName\" property, ignoring the object property. " + + "It is recommended to remove the \"path\" or \"fileName\" attribute, which has been deprecated."); + } + + //兼容之前配置了PATH的datax任务, 如果已经配置了PATH,则无需从object里解析 + if (StringUtils.isBlank(path)) { + Validate.notBlank(object, "object can't be blank!"); + writerSliceConfig.set(Key.PATH, getPathAndFileNameFromObject(object.trim()).get(Key.PATH)); + } + //兼容之前配置了fileName的datax任务,如果已经配置了fileName,则无需从object里解析 + if (StringUtils.isBlank(fielName)) { + Validate.notBlank(object, "object can't be blank!"); + writerSliceConfig.set(Key.FILE_NAME, getPathAndFileNameFromObject(object.trim()).get(Key.FILE_NAME)); + } + if (StringUtils.equalsIgnoreCase(fileFormat, "parquet")) { + hdfsWriterJob.unitizeParquetConfig(writerSliceConfig); + } + + } + + + + /** + * 从object中 解析出 path和fileName + * + * 举例1: + * /hello/aaa/bbb/ccc.txt + * path: /hello/aaa/bbb + * fileName: ccc.txt + * + * 举例2: + * hello/aaa/bbb/ccc.txt + * path: /hello/aaa/bbb + * fileName: ccc.txt + * + * 举例3: + * ccc.txt + * path: / + * fileName: ccc.txt + * + * 举例4: + * /ccc.txt + * path: / + * fileName: ccc.txt + * + * @param object + * @return + */ + public static Map getPathAndFileNameFromObject(String object) { + Map pathAndFileName = new HashMap<>(); + + boolean isContainsBackslash = object.contains("/"); + + //object里没有包含"/", 则将path设置为 "/", fileName设置为 object + if (!isContainsBackslash) { + pathAndFileName.put(Key.PATH, "/"); + pathAndFileName.put(Key.FILE_NAME, object); + return pathAndFileName; + } + + if (!object.startsWith("/")) { + object = "/" + object; + } + + int lastIndex = object.lastIndexOf("/"); + String path = object.substring(0, lastIndex); + String fileName = object.substring(lastIndex + 1); + + path = StringUtils.isNotBlank(path) ? path : "/"; + + logger.info("path: {}", path); + logger.info("fileName: {}", fileName); + + pathAndFileName.put(Key.PATH, path); + pathAndFileName.put(Key.FILE_NAME, fileName); + return pathAndFileName; + } +} diff --git a/otsreader/pom.xml b/otsreader/pom.xml index bd017423..eaac8804 100644 --- a/otsreader/pom.xml +++ b/otsreader/pom.xml @@ -10,6 +10,17 @@ otsreader + + org.apache.logging.log4j + log4j-api + 2.17.1 + + + + org.apache.logging.log4j + log4j-core + 2.17.1 + com.alibaba.datax datax-common @@ -34,6 +45,16 @@ com.aliyun.openservices ots-public 2.2.4 + + + log4j-api + org.apache.logging.log4j + + + log4j-core + org.apache.logging.log4j + + com.google.code.gson diff --git a/otsreader/src/main/java/com/alibaba/datax/plugin/reader/otsreader/utils/Common.java b/otsreader/src/main/java/com/alibaba/datax/plugin/reader/otsreader/utils/Common.java index 7bb3f52e..fb8c7feb 100644 --- a/otsreader/src/main/java/com/alibaba/datax/plugin/reader/otsreader/utils/Common.java +++ b/otsreader/src/main/java/com/alibaba/datax/plugin/reader/otsreader/utils/Common.java @@ -119,7 +119,7 @@ public class Common { case BOOLEAN: line.addColumn(new BoolColumn(v.asBoolean())); break; case BINARY: line.addColumn(new BytesColumn(v.asBinary())); break; default: - throw new IllegalArgumentException("Unsuporrt tranform the type: " + col.getValue().getType() + "."); + throw new IllegalArgumentException("Unsupported transform the type: " + col.getValue().getType() + "."); } } } diff --git a/otsstreamreader/pom.xml b/otsstreamreader/pom.xml index 2a12872f..cb4a6206 100644 --- a/otsstreamreader/pom.xml +++ b/otsstreamreader/pom.xml @@ -13,6 +13,17 @@ 0.0.1 + + org.apache.logging.log4j + log4j-api + 2.17.1 + + + + org.apache.logging.log4j + log4j-core + 2.17.1 + com.alibaba.datax datax-common @@ -33,6 +44,16 @@ com.aliyun.openservices tablestore-streamclient 1.0.0 + + + log4j-api + org.apache.logging.log4j + + + log4j-core + org.apache.logging.log4j + + com.google.code.gson diff --git a/otsstreamreader/src/main/resources/plugin.json b/otsstreamreader/src/main/resources/plugin.json index 9a70a47a..57071d6f 100644 --- a/otsstreamreader/src/main/resources/plugin.json +++ b/otsstreamreader/src/main/resources/plugin.json @@ -2,5 +2,5 @@ "name": "otsstreamreader", "class": "com.alibaba.datax.plugin.reader.otsstreamreader.internal.OTSStreamReader", "description": "", - "developer": "zhaofeng.zhou@alibaba-inc.com" + "developer": "alibaba" } diff --git a/otsstreamreader/tools/tablestore_streamreader_console.py b/otsstreamreader/tools/tablestore_streamreader_console.py deleted file mode 100644 index f9379d72..00000000 --- a/otsstreamreader/tools/tablestore_streamreader_console.py +++ /dev/null @@ -1,179 +0,0 @@ -#!/bin/usr/env python -#-*- coding: utf-8 -*- - -from optparse import OptionParser -import sys -import json -import tabulate -import zlib -from ots2 import * - -class ConsoleConfig: - def __init__(self, config_file): - f = open(config_file, 'r') - config = json.loads(f.read()) - self.endpoint = str(config['endpoint']) - self.accessid = str(config['accessId']) - self.accesskey = str(config['accessKey']) - self.instance_name = str(config['instanceName']) - self.status_table = str(config['statusTable']) - - self.ots = OTSClient(self.endpoint, self.accessid, self.accesskey, self.instance_name) - -def describe_job(config, options): - ''' - 1. get job's description - 2. get all job's checkpoints and check if it is done - ''' - if not options.stream_id: - print "Error: Should set the stream id using '-s' or '--streamid'." - sys.exit(-1) - - if not options.timestamp: - print "Error: Should set the timestamp using '-t' or '--timestamp'." - sys.exit(-1) - - pk = [('StreamId', options.stream_id), ('StatusType', 'DataxJobDesc'), ('StatusValue', '%16d' % int(options.timestamp))] - consumed, pk, attrs, next_token = config.ots.get_row(config.status_table, pk, [], None, 1) - if not attrs: - print 'Stream job is not found.' - sys.exit(-1) - - job_detail = parse_job_detail(attrs) - print '----------JobDescriptions----------' - print json.dumps(job_detail, indent=2) - print '-----------------------------------' - - stream_checkpoints = _list_checkpoints(config, options.stream_id, int(options.timestamp)) - - cps_headers = ['ShardId', 'SendRecordCount', 'Checkpoint', 'SkipCount', 'Version'] - table_content = [] - for cp in stream_checkpoints: - table_content.append([cp['ShardId'], cp['SendRecordCount'], cp['Checkpoint'], cp['SkipCount'], cp['Version']]) - - print tabulate.tabulate(table_content, headers=cps_headers) - - # check if stream job has finished - finished = True - if len(job_detail['ShardIds']) != len(stream_checkpoints): - finished = False - - for cp in stream_checkpoints: - if cp['Version'] != job_detail['Version']: - finished = False - - print '----------JobSummary----------' - print 'ShardsCount:', len(job_detail['ShardIds']) - print 'CheckPointsCount:', len(stream_checkpoints) - print 'JobStatus:', 'Finished' if finished else 'NotFinished' - print '------------------------------' - -def _list_checkpoints(config, stream_id, timestamp): - start_pk = [('StreamId', stream_id), ('StatusType', 'CheckpointForDataxReader'), ('StatusValue', '%16d' % timestamp)] - end_pk = [('StreamId', stream_id), ('StatusType', 'CheckpointForDataxReader'), ('StatusValue', '%16d' % (timestamp + 1))] - - consumed_counter = CapacityUnit(0, 0) - columns_to_get = [] - checkpoints = [] - range_iter = config.ots.xget_range( - config.status_table, Direction.FORWARD, - start_pk, end_pk, - consumed_counter, columns_to_get, 100, - column_filter=None, max_version=1 - ) - - rows = [] - for (primary_key, attrs) in range_iter: - checkpoint = {} - for attr in attrs: - checkpoint[attr[0]] = attr[1] - - if not checkpoint.has_key('SendRecordCount'): - checkpoint['SendRecordCount'] = 0 - checkpoint['ShardId'] = primary_key[2][1].split('\t')[1] - checkpoints.append(checkpoint) - - return checkpoints - -def list_job(config, options): - ''' - Two options: - 1. list all jobs of stream - 2. list all jobs and all streams - ''' - consumed_counter = CapacityUnit(0, 0) - - if options.stream_id: - start_pk = [('StreamId', options.stream_id), ('StatusType', INF_MIN), ('StatusValue', INF_MIN)] - end_pk = [('StreamId', options.stream_id), ('StatusType', INF_MAX), ('StatusValue', INF_MAX)] - else: - start_pk = [('StreamId', INF_MIN), ('StatusType', INF_MIN), ('StatusValue', INF_MIN)] - end_pk = [('StreamId', INF_MAX), ('StatusType', INF_MAX), ('StatusValue', INF_MAX)] - - columns_to_get = [] - range_iter = config.ots.xget_range( - config.status_table, Direction.FORWARD, - start_pk, end_pk, - consumed_counter, columns_to_get, None, - column_filter=None, max_version=1 - ) - - rows = [] - for (primary_key, attrs) in range_iter: - if primary_key[1][1] == 'DataxJobDesc': - job_detail = parse_job_detail(attrs) - rows.append([job_detail['TableName'], job_detail['JobStreamId'], job_detail['EndTime'], job_detail['StartTime'], job_detail['EndTime'], job_detail['Version']]) - - headers = ['TableName', 'JobStreamId', 'Timestamp', 'StartTime', 'EndTime', 'Version'] - print tabulate.tabulate(rows, headers=headers) - -def parse_job_detail(attrs): - job_details = {} - shard_ids_content = '' - for attr in attrs: - if attr[0].startswith('ShardIds_'): - shard_ids_content += attr[1] - else: - job_details[attr[0]] = attr[1] - - shard_ids = json.loads(zlib.decompress(shard_ids_content)) - - if not job_details.has_key('Version'): - job_details['Version'] = '' - - if not job_details.has_key('SkipCount'): - job_details['SkipCount'] = 0 - job_details['ShardIds'] = shard_ids - - return job_details - -def parse_time(value): - try: - return int(value) - except Exception,e: - return int(time.mktime(time.strptime(value, '%Y-%m-%d %H:%M:%S'))) - -if __name__ == '__main__': - parser = OptionParser() - parser.add_option('-c', '--config', dest='config_file', help='path of config file', metavar='tablestore_streamreader_config.json') - parser.add_option('-a', '--action', dest='action', help='the action to do', choices = ['describe_job', 'list_job'], metavar='') - parser.add_option('-t', '--timestamp', dest='timestamp', help='the timestamp', metavar='') - parser.add_option('-s', '--streamid', dest='stream_id', help='the id of stream', metavar='') - parser.add_option('-d', '--shardid', dest='shard_id', help='the id of shard', metavar='') - - options, args = parser.parse_args() - - if not options.config_file: - print "Error: Should set the path of config file using '-c' or '--config'." - sys.exit(-1) - - if not options.action: - print "Error: Should set the action using '-a' or '--action'." - sys.exit(-1) - - console_config = ConsoleConfig(options.config_file) - if options.action == 'list_job': - list_job(console_config, options) - elif options.action == 'describe_job': - describe_job(console_config, options) - diff --git a/otsstreamreader/tools/tabulate.py b/otsstreamreader/tools/tabulate.py deleted file mode 100644 index 2444dcbf..00000000 --- a/otsstreamreader/tools/tabulate.py +++ /dev/null @@ -1,1237 +0,0 @@ -# -*- coding: utf-8 -*- - -"""Pretty-print tabular data.""" - -from __future__ import print_function -from __future__ import unicode_literals -from collections import namedtuple, Iterable -from platform import python_version_tuple -import re - - -if python_version_tuple()[0] < "3": - from itertools import izip_longest - from functools import partial - _none_type = type(None) - _int_type = int - _long_type = long - _float_type = float - _text_type = unicode - _binary_type = str - - def _is_file(f): - return isinstance(f, file) - -else: - from itertools import zip_longest as izip_longest - from functools import reduce, partial - _none_type = type(None) - _int_type = int - _long_type = int - _float_type = float - _text_type = str - _binary_type = bytes - - import io - def _is_file(f): - return isinstance(f, io.IOBase) - -try: - import wcwidth # optional wide-character (CJK) support -except ImportError: - wcwidth = None - - -__all__ = ["tabulate", "tabulate_formats", "simple_separated_format"] -__version__ = "0.7.6-dev" - - -# minimum extra space in headers -MIN_PADDING = 2 - -# if True, enable wide-character (CJK) support -WIDE_CHARS_MODE = wcwidth is not None - - -Line = namedtuple("Line", ["begin", "hline", "sep", "end"]) - - -DataRow = namedtuple("DataRow", ["begin", "sep", "end"]) - - -# A table structure is suppposed to be: -# -# --- lineabove --------- -# headerrow -# --- linebelowheader --- -# datarow -# --- linebewteenrows --- -# ... (more datarows) ... -# --- linebewteenrows --- -# last datarow -# --- linebelow --------- -# -# TableFormat's line* elements can be -# -# - either None, if the element is not used, -# - or a Line tuple, -# - or a function: [col_widths], [col_alignments] -> string. -# -# TableFormat's *row elements can be -# -# - either None, if the element is not used, -# - or a DataRow tuple, -# - or a function: [cell_values], [col_widths], [col_alignments] -> string. -# -# padding (an integer) is the amount of white space around data values. -# -# with_header_hide: -# -# - either None, to display all table elements unconditionally, -# - or a list of elements not to be displayed if the table has column headers. -# -TableFormat = namedtuple("TableFormat", ["lineabove", "linebelowheader", - "linebetweenrows", "linebelow", - "headerrow", "datarow", - "padding", "with_header_hide"]) - - -def _pipe_segment_with_colons(align, colwidth): - """Return a segment of a horizontal line with optional colons which - indicate column's alignment (as in `pipe` output format).""" - w = colwidth - if align in ["right", "decimal"]: - return ('-' * (w - 1)) + ":" - elif align == "center": - return ":" + ('-' * (w - 2)) + ":" - elif align == "left": - return ":" + ('-' * (w - 1)) - else: - return '-' * w - - -def _pipe_line_with_colons(colwidths, colaligns): - """Return a horizontal line with optional colons to indicate column's - alignment (as in `pipe` output format).""" - segments = [_pipe_segment_with_colons(a, w) for a, w in zip(colaligns, colwidths)] - return "|" + "|".join(segments) + "|" - - -def _mediawiki_row_with_attrs(separator, cell_values, colwidths, colaligns): - alignment = { "left": '', - "right": 'align="right"| ', - "center": 'align="center"| ', - "decimal": 'align="right"| ' } - # hard-coded padding _around_ align attribute and value together - # rather than padding parameter which affects only the value - values_with_attrs = [' ' + alignment.get(a, '') + c + ' ' - for c, a in zip(cell_values, colaligns)] - colsep = separator*2 - return (separator + colsep.join(values_with_attrs)).rstrip() - - -def _textile_row_with_attrs(cell_values, colwidths, colaligns): - cell_values[0] += ' ' - alignment = { "left": "<.", "right": ">.", "center": "=.", "decimal": ">." } - values = (alignment.get(a, '') + v for a, v in zip(colaligns, cell_values)) - return '|' + '|'.join(values) + '|' - - -def _html_begin_table_without_header(colwidths_ignore, colaligns_ignore): - # this table header will be suppressed if there is a header row - return "\n".join(["", ""]) - - -def _html_row_with_attrs(celltag, cell_values, colwidths, colaligns): - alignment = { "left": '', - "right": ' style="text-align: right;"', - "center": ' style="text-align: center;"', - "decimal": ' style="text-align: right;"' } - values_with_attrs = ["<{0}{1}>{2}".format(celltag, alignment.get(a, ''), c) - for c, a in zip(cell_values, colaligns)] - rowhtml = "" + "".join(values_with_attrs).rstrip() + "" - if celltag == "th": # it's a header row, create a new table header - rowhtml = "\n".join(["
", - "", - rowhtml, - "", - ""]) - return rowhtml - -def _moin_row_with_attrs(celltag, cell_values, colwidths, colaligns, header=''): - alignment = { "left": '', - "right": '', - "center": '', - "decimal": '' } - values_with_attrs = ["{0}{1} {2} ".format(celltag, - alignment.get(a, ''), - header+c+header) - for c, a in zip(cell_values, colaligns)] - return "".join(values_with_attrs)+"||" - -def _latex_line_begin_tabular(colwidths, colaligns, booktabs=False): - alignment = { "left": "l", "right": "r", "center": "c", "decimal": "r" } - tabular_columns_fmt = "".join([alignment.get(a, "l") for a in colaligns]) - return "\n".join(["\\begin{tabular}{" + tabular_columns_fmt + "}", - "\\toprule" if booktabs else "\hline"]) - -LATEX_ESCAPE_RULES = {r"&": r"\&", r"%": r"\%", r"$": r"\$", r"#": r"\#", - r"_": r"\_", r"^": r"\^{}", r"{": r"\{", r"}": r"\}", - r"~": r"\textasciitilde{}", "\\": r"\textbackslash{}", - r"<": r"\ensuremath{<}", r">": r"\ensuremath{>}"} - - -def _latex_row(cell_values, colwidths, colaligns): - def escape_char(c): - return LATEX_ESCAPE_RULES.get(c, c) - escaped_values = ["".join(map(escape_char, cell)) for cell in cell_values] - rowfmt = DataRow("", "&", "\\\\") - return _build_simple_row(escaped_values, rowfmt) - - -_table_formats = {"simple": - TableFormat(lineabove=Line("", "-", " ", ""), - linebelowheader=Line("", "-", " ", ""), - linebetweenrows=None, - linebelow=Line("", "-", " ", ""), - headerrow=DataRow("", " ", ""), - datarow=DataRow("", " ", ""), - padding=0, - with_header_hide=["lineabove", "linebelow"]), - "plain": - TableFormat(lineabove=None, linebelowheader=None, - linebetweenrows=None, linebelow=None, - headerrow=DataRow("", " ", ""), - datarow=DataRow("", " ", ""), - padding=0, with_header_hide=None), - "grid": - TableFormat(lineabove=Line("+", "-", "+", "+"), - linebelowheader=Line("+", "=", "+", "+"), - linebetweenrows=Line("+", "-", "+", "+"), - linebelow=Line("+", "-", "+", "+"), - headerrow=DataRow("|", "|", "|"), - datarow=DataRow("|", "|", "|"), - padding=1, with_header_hide=None), - "fancy_grid": - TableFormat(lineabove=Line("╒", "═", "╤", "╕"), - linebelowheader=Line("╞", "═", "╪", "╡"), - linebetweenrows=Line("├", "─", "┼", "┤"), - linebelow=Line("╘", "═", "╧", "╛"), - headerrow=DataRow("│", "│", "│"), - datarow=DataRow("│", "│", "│"), - padding=1, with_header_hide=None), - "pipe": - TableFormat(lineabove=_pipe_line_with_colons, - linebelowheader=_pipe_line_with_colons, - linebetweenrows=None, - linebelow=None, - headerrow=DataRow("|", "|", "|"), - datarow=DataRow("|", "|", "|"), - padding=1, - with_header_hide=["lineabove"]), - "orgtbl": - TableFormat(lineabove=None, - linebelowheader=Line("|", "-", "+", "|"), - linebetweenrows=None, - linebelow=None, - headerrow=DataRow("|", "|", "|"), - datarow=DataRow("|", "|", "|"), - padding=1, with_header_hide=None), - "jira": - TableFormat(lineabove=None, - linebelowheader=None, - linebetweenrows=None, - linebelow=None, - headerrow=DataRow("||", "||", "||"), - datarow=DataRow("|", "|", "|"), - padding=1, with_header_hide=None), - "psql": - TableFormat(lineabove=Line("+", "-", "+", "+"), - linebelowheader=Line("|", "-", "+", "|"), - linebetweenrows=None, - linebelow=Line("+", "-", "+", "+"), - headerrow=DataRow("|", "|", "|"), - datarow=DataRow("|", "|", "|"), - padding=1, with_header_hide=None), - "rst": - TableFormat(lineabove=Line("", "=", " ", ""), - linebelowheader=Line("", "=", " ", ""), - linebetweenrows=None, - linebelow=Line("", "=", " ", ""), - headerrow=DataRow("", " ", ""), - datarow=DataRow("", " ", ""), - padding=0, with_header_hide=None), - "mediawiki": - TableFormat(lineabove=Line("{| class=\"wikitable\" style=\"text-align: left;\"", - "", "", "\n|+ \n|-"), - linebelowheader=Line("|-", "", "", ""), - linebetweenrows=Line("|-", "", "", ""), - linebelow=Line("|}", "", "", ""), - headerrow=partial(_mediawiki_row_with_attrs, "!"), - datarow=partial(_mediawiki_row_with_attrs, "|"), - padding=0, with_header_hide=None), - "moinmoin": - TableFormat(lineabove=None, - linebelowheader=None, - linebetweenrows=None, - linebelow=None, - headerrow=partial(_moin_row_with_attrs,"||",header="'''"), - datarow=partial(_moin_row_with_attrs,"||"), - padding=1, with_header_hide=None), - "html": - TableFormat(lineabove=_html_begin_table_without_header, - linebelowheader="", - linebetweenrows=None, - linebelow=Line("\n
", "", "", ""), - headerrow=partial(_html_row_with_attrs, "th"), - datarow=partial(_html_row_with_attrs, "td"), - padding=0, with_header_hide=["lineabove"]), - "latex": - TableFormat(lineabove=_latex_line_begin_tabular, - linebelowheader=Line("\\hline", "", "", ""), - linebetweenrows=None, - linebelow=Line("\\hline\n\\end{tabular}", "", "", ""), - headerrow=_latex_row, - datarow=_latex_row, - padding=1, with_header_hide=None), - "latex_booktabs": - TableFormat(lineabove=partial(_latex_line_begin_tabular, booktabs=True), - linebelowheader=Line("\\midrule", "", "", ""), - linebetweenrows=None, - linebelow=Line("\\bottomrule\n\\end{tabular}", "", "", ""), - headerrow=_latex_row, - datarow=_latex_row, - padding=1, with_header_hide=None), - "tsv": - TableFormat(lineabove=None, linebelowheader=None, - linebetweenrows=None, linebelow=None, - headerrow=DataRow("", "\t", ""), - datarow=DataRow("", "\t", ""), - padding=0, with_header_hide=None), - "textile": - TableFormat(lineabove=None, linebelowheader=None, - linebetweenrows=None, linebelow=None, - headerrow=DataRow("|_. ", "|_.", "|"), - datarow=_textile_row_with_attrs, - padding=1, with_header_hide=None)} - - -tabulate_formats = list(sorted(_table_formats.keys())) - - -_invisible_codes = re.compile(r"\x1b\[\d*m|\x1b\[\d*\;\d*\;\d*m") # ANSI color codes -_invisible_codes_bytes = re.compile(b"\x1b\[\d*m|\x1b\[\d*\;\d*\;\d*m") # ANSI color codes - - -def simple_separated_format(separator): - """Construct a simple TableFormat with columns separated by a separator. - - >>> tsv = simple_separated_format("\\t") ; \ - tabulate([["foo", 1], ["spam", 23]], tablefmt=tsv) == 'foo \\t 1\\nspam\\t23' - True - - """ - return TableFormat(None, None, None, None, - headerrow=DataRow('', separator, ''), - datarow=DataRow('', separator, ''), - padding=0, with_header_hide=None) - - -def _isconvertible(conv, string): - try: - n = conv(string) - return True - except (ValueError, TypeError): - return False - - -def _isnumber(string): - """ - >>> _isnumber("123.45") - True - >>> _isnumber("123") - True - >>> _isnumber("spam") - False - """ - return _isconvertible(float, string) - - -def _isint(string, inttype=int): - """ - >>> _isint("123") - True - >>> _isint("123.45") - False - """ - return type(string) is inttype or\ - (isinstance(string, _binary_type) or isinstance(string, _text_type))\ - and\ - _isconvertible(inttype, string) - - -def _type(string, has_invisible=True): - """The least generic type (type(None), int, float, str, unicode). - - >>> _type(None) is type(None) - True - >>> _type("foo") is type("") - True - >>> _type("1") is type(1) - True - >>> _type('\x1b[31m42\x1b[0m') is type(42) - True - >>> _type('\x1b[31m42\x1b[0m') is type(42) - True - - """ - - if has_invisible and \ - (isinstance(string, _text_type) or isinstance(string, _binary_type)): - string = _strip_invisible(string) - - if string is None: - return _none_type - elif hasattr(string, "isoformat"): # datetime.datetime, date, and time - return _text_type - elif _isint(string): - return int - elif _isint(string, _long_type): - return int - elif _isnumber(string): - return float - elif isinstance(string, _binary_type): - return _binary_type - else: - return _text_type - - -def _afterpoint(string): - """Symbols after a decimal point, -1 if the string lacks the decimal point. - - >>> _afterpoint("123.45") - 2 - >>> _afterpoint("1001") - -1 - >>> _afterpoint("eggs") - -1 - >>> _afterpoint("123e45") - 2 - - """ - if _isnumber(string): - if _isint(string): - return -1 - else: - pos = string.rfind(".") - pos = string.lower().rfind("e") if pos < 0 else pos - if pos >= 0: - return len(string) - pos - 1 - else: - return -1 # no point - else: - return -1 # not a number - - -def _padleft(width, s): - """Flush right. - - >>> _padleft(6, '\u044f\u0439\u0446\u0430') == ' \u044f\u0439\u0446\u0430' - True - - """ - fmt = "{0:>%ds}" % width - return fmt.format(s) - - -def _padright(width, s): - """Flush left. - - >>> _padright(6, '\u044f\u0439\u0446\u0430') == '\u044f\u0439\u0446\u0430 ' - True - - """ - fmt = "{0:<%ds}" % width - return fmt.format(s) - - -def _padboth(width, s): - """Center string. - - >>> _padboth(6, '\u044f\u0439\u0446\u0430') == ' \u044f\u0439\u0446\u0430 ' - True - - """ - fmt = "{0:^%ds}" % width - return fmt.format(s) - - -def _strip_invisible(s): - "Remove invisible ANSI color codes." - if isinstance(s, _text_type): - return re.sub(_invisible_codes, "", s) - else: # a bytestring - return re.sub(_invisible_codes_bytes, "", s) - - -def _visible_width(s): - """Visible width of a printed string. ANSI color codes are removed. - - >>> _visible_width('\x1b[31mhello\x1b[0m'), _visible_width("world") - (5, 5) - - """ - # optional wide-character support - if wcwidth is not None and WIDE_CHARS_MODE: - len_fn = wcwidth.wcswidth - else: - len_fn = len - if isinstance(s, _text_type) or isinstance(s, _binary_type): - return len_fn(_strip_invisible(s)) - else: - return len_fn(_text_type(s)) - - -def _align_column(strings, alignment, minwidth=0, has_invisible=True): - """[string] -> [padded_string] - - >>> list(map(str,_align_column(["12.345", "-1234.5", "1.23", "1234.5", "1e+234", "1.0e234"], "decimal"))) - [' 12.345 ', '-1234.5 ', ' 1.23 ', ' 1234.5 ', ' 1e+234 ', ' 1.0e234'] - - >>> list(map(str,_align_column(['123.4', '56.7890'], None))) - ['123.4', '56.7890'] - - """ - if alignment == "right": - strings = [s.strip() for s in strings] - padfn = _padleft - elif alignment == "center": - strings = [s.strip() for s in strings] - padfn = _padboth - elif alignment == "decimal": - if has_invisible: - decimals = [_afterpoint(_strip_invisible(s)) for s in strings] - else: - decimals = [_afterpoint(s) for s in strings] - maxdecimals = max(decimals) - strings = [s + (maxdecimals - decs) * " " - for s, decs in zip(strings, decimals)] - padfn = _padleft - elif not alignment: - return strings - else: - strings = [s.strip() for s in strings] - padfn = _padright - - enable_widechars = wcwidth is not None and WIDE_CHARS_MODE - if has_invisible: - width_fn = _visible_width - elif enable_widechars: # optional wide-character support if available - width_fn = wcwidth.wcswidth - else: - width_fn = len - - s_lens = list(map(len, strings)) - s_widths = list(map(width_fn, strings)) - maxwidth = max(max(s_widths), minwidth) - if not enable_widechars and not has_invisible: - padded_strings = [padfn(maxwidth, s) for s in strings] - else: - # enable wide-character width corrections - visible_widths = [maxwidth - (w - l) for w, l in zip(s_widths, s_lens)] - # wcswidth and _visible_width don't count invisible characters; - # padfn doesn't need to apply another correction - padded_strings = [padfn(w, s) for s, w in zip(strings, visible_widths)] - return padded_strings - - -def _more_generic(type1, type2): - types = { _none_type: 0, int: 1, float: 2, _binary_type: 3, _text_type: 4 } - invtypes = { 4: _text_type, 3: _binary_type, 2: float, 1: int, 0: _none_type } - moregeneric = max(types.get(type1, 4), types.get(type2, 4)) - return invtypes[moregeneric] - - -def _column_type(strings, has_invisible=True): - """The least generic type all column values are convertible to. - - >>> _column_type(["1", "2"]) is _int_type - True - >>> _column_type(["1", "2.3"]) is _float_type - True - >>> _column_type(["1", "2.3", "four"]) is _text_type - True - >>> _column_type(["four", '\u043f\u044f\u0442\u044c']) is _text_type - True - >>> _column_type([None, "brux"]) is _text_type - True - >>> _column_type([1, 2, None]) is _int_type - True - >>> import datetime as dt - >>> _column_type([dt.datetime(1991,2,19), dt.time(17,35)]) is _text_type - True - - """ - types = [_type(s, has_invisible) for s in strings ] - return reduce(_more_generic, types, int) - - -def _format(val, valtype, floatfmt, missingval="", has_invisible=True): - """Format a value accoding to its type. - - Unicode is supported: - - >>> hrow = ['\u0431\u0443\u043a\u0432\u0430', '\u0446\u0438\u0444\u0440\u0430'] ; \ - tbl = [['\u0430\u0437', 2], ['\u0431\u0443\u043a\u0438', 4]] ; \ - good_result = '\\u0431\\u0443\\u043a\\u0432\\u0430 \\u0446\\u0438\\u0444\\u0440\\u0430\\n------- -------\\n\\u0430\\u0437 2\\n\\u0431\\u0443\\u043a\\u0438 4' ; \ - tabulate(tbl, headers=hrow) == good_result - True - - """ - if val is None: - return missingval - - if valtype in [int, _text_type]: - return "{0}".format(val) - elif valtype is _binary_type: - try: - return _text_type(val, "ascii") - except TypeError: - return _text_type(val) - elif valtype is float: - is_a_colored_number = has_invisible and isinstance(val, (_text_type, _binary_type)) - if is_a_colored_number: - raw_val = _strip_invisible(val) - formatted_val = format(float(raw_val), floatfmt) - return val.replace(raw_val, formatted_val) - else: - return format(float(val), floatfmt) - else: - return "{0}".format(val) - - -def _align_header(header, alignment, width, visible_width): - "Pad string header to width chars given known visible_width of the header." - width += len(header) - visible_width - if alignment == "left": - return _padright(width, header) - elif alignment == "center": - return _padboth(width, header) - elif not alignment: - return "{0}".format(header) - else: - return _padleft(width, header) - - -def _prepend_row_index(rows, index): - """Add a left-most index column.""" - if index is None or index is False: - return rows - if len(index) != len(rows): - print('index=', index) - print('rows=', rows) - raise ValueError('index must be as long as the number of data rows') - rows = [[v]+list(row) for v,row in zip(index, rows)] - return rows - - -def _bool(val): - "A wrapper around standard bool() which doesn't throw on NumPy arrays" - try: - return bool(val) - except ValueError: # val is likely to be a numpy array with many elements - return False - - -def _normalize_tabular_data(tabular_data, headers, showindex="default"): - """Transform a supported data type to a list of lists, and a list of headers. - - Supported tabular data types: - - * list-of-lists or another iterable of iterables - - * list of named tuples (usually used with headers="keys") - - * list of dicts (usually used with headers="keys") - - * list of OrderedDicts (usually used with headers="keys") - - * 2D NumPy arrays - - * NumPy record arrays (usually used with headers="keys") - - * dict of iterables (usually used with headers="keys") - - * pandas.DataFrame (usually used with headers="keys") - - The first row can be used as headers if headers="firstrow", - column indices can be used as headers if headers="keys". - - If showindex="default", show row indices of the pandas.DataFrame. - If showindex="always", show row indices for all types of data. - If showindex="never", don't show row indices for all types of data. - If showindex is an iterable, show its values as row indices. - - """ - - try: - bool(headers) - is_headers2bool_broken = False - except ValueError: # numpy.ndarray, pandas.core.index.Index, ... - is_headers2bool_broken = True - headers = list(headers) - - index = None - if hasattr(tabular_data, "keys") and hasattr(tabular_data, "values"): - # dict-like and pandas.DataFrame? - if hasattr(tabular_data.values, "__call__"): - # likely a conventional dict - keys = tabular_data.keys() - rows = list(izip_longest(*tabular_data.values())) # columns have to be transposed - elif hasattr(tabular_data, "index"): - # values is a property, has .index => it's likely a pandas.DataFrame (pandas 0.11.0) - keys = tabular_data.keys() - vals = tabular_data.values # values matrix doesn't need to be transposed - # for DataFrames add an index per default - index = list(tabular_data.index) - rows = [list(row) for row in vals] - else: - raise ValueError("tabular data doesn't appear to be a dict or a DataFrame") - - if headers == "keys": - headers = list(map(_text_type,keys)) # headers should be strings - - else: # it's a usual an iterable of iterables, or a NumPy array - rows = list(tabular_data) - - if (headers == "keys" and - hasattr(tabular_data, "dtype") and - getattr(tabular_data.dtype, "names")): - # numpy record array - headers = tabular_data.dtype.names - elif (headers == "keys" - and len(rows) > 0 - and isinstance(rows[0], tuple) - and hasattr(rows[0], "_fields")): - # namedtuple - headers = list(map(_text_type, rows[0]._fields)) - elif (len(rows) > 0 - and isinstance(rows[0], dict)): - # dict or OrderedDict - uniq_keys = set() # implements hashed lookup - keys = [] # storage for set - if headers == "firstrow": - firstdict = rows[0] if len(rows) > 0 else {} - keys.extend(firstdict.keys()) - uniq_keys.update(keys) - rows = rows[1:] - for row in rows: - for k in row.keys(): - #Save unique items in input order - if k not in uniq_keys: - keys.append(k) - uniq_keys.add(k) - if headers == 'keys': - headers = keys - elif isinstance(headers, dict): - # a dict of headers for a list of dicts - headers = [headers.get(k, k) for k in keys] - headers = list(map(_text_type, headers)) - elif headers == "firstrow": - if len(rows) > 0: - headers = [firstdict.get(k, k) for k in keys] - headers = list(map(_text_type, headers)) - else: - headers = [] - elif headers: - raise ValueError('headers for a list of dicts is not a dict or a keyword') - rows = [[row.get(k) for k in keys] for row in rows] - - elif headers == "keys" and len(rows) > 0: - # keys are column indices - headers = list(map(_text_type, range(len(rows[0])))) - - # take headers from the first row if necessary - if headers == "firstrow" and len(rows) > 0: - if index is not None: - headers = [index[0]] + list(rows[0]) - index = index[1:] - else: - headers = rows[0] - headers = list(map(_text_type, headers)) # headers should be strings - rows = rows[1:] - - headers = list(map(_text_type,headers)) - rows = list(map(list,rows)) - - # add or remove an index column - showindex_is_a_str = type(showindex) in [_text_type, _binary_type] - if showindex == "default" and index is not None: - rows = _prepend_row_index(rows, index) - elif isinstance(showindex, Iterable) and not showindex_is_a_str: - rows = _prepend_row_index(rows, list(showindex)) - elif showindex == "always" or (_bool(showindex) and not showindex_is_a_str): - if index is None: - index = list(range(len(rows))) - rows = _prepend_row_index(rows, index) - elif showindex == "never" or (not _bool(showindex) and not showindex_is_a_str): - pass - - # pad with empty headers for initial columns if necessary - if headers and len(rows) > 0: - nhs = len(headers) - ncols = len(rows[0]) - if nhs < ncols: - headers = [""]*(ncols - nhs) + headers - - return rows, headers - - -def tabulate(tabular_data, headers=(), tablefmt="simple", - floatfmt="g", numalign="decimal", stralign="left", - missingval="", showindex="default"): - """Format a fixed width table for pretty printing. - - >>> print(tabulate([[1, 2.34], [-56, "8.999"], ["2", "10001"]])) - --- --------- - 1 2.34 - -56 8.999 - 2 10001 - --- --------- - - The first required argument (`tabular_data`) can be a - list-of-lists (or another iterable of iterables), a list of named - tuples, a dictionary of iterables, an iterable of dictionaries, - a two-dimensional NumPy array, NumPy record array, or a Pandas' - dataframe. - - - Table headers - ------------- - - To print nice column headers, supply the second argument (`headers`): - - - `headers` can be an explicit list of column headers - - if `headers="firstrow"`, then the first row of data is used - - if `headers="keys"`, then dictionary keys or column indices are used - - Otherwise a headerless table is produced. - - If the number of headers is less than the number of columns, they - are supposed to be names of the last columns. This is consistent - with the plain-text format of R and Pandas' dataframes. - - >>> print(tabulate([["sex","age"],["Alice","F",24],["Bob","M",19]], - ... headers="firstrow")) - sex age - ----- ----- ----- - Alice F 24 - Bob M 19 - - By default, pandas.DataFrame data have an additional column called - row index. To add a similar column to all other types of data, - use `showindex="always"` or `showindex=True`. To suppress row indices - for all types of data, pass `showindex="never" or `showindex=False`. - To add a custom row index column, pass `showindex=some_iterable`. - - >>> print(tabulate([["F",24],["M",19]], showindex="always")) - - - -- - 0 F 24 - 1 M 19 - - - -- - - - Column alignment - ---------------- - - `tabulate` tries to detect column types automatically, and aligns - the values properly. By default it aligns decimal points of the - numbers (or flushes integer numbers to the right), and flushes - everything else to the left. Possible column alignments - (`numalign`, `stralign`) are: "right", "center", "left", "decimal" - (only for `numalign`), and None (to disable alignment). - - - Table formats - ------------- - - `floatfmt` is a format specification used for columns which - contain numeric data with a decimal point. - - `None` values are replaced with a `missingval` string: - - >>> print(tabulate([["spam", 1, None], - ... ["eggs", 42, 3.14], - ... ["other", None, 2.7]], missingval="?")) - ----- -- ---- - spam 1 ? - eggs 42 3.14 - other ? 2.7 - ----- -- ---- - - Various plain-text table formats (`tablefmt`) are supported: - 'plain', 'simple', 'grid', 'pipe', 'orgtbl', 'rst', 'mediawiki', - 'latex', and 'latex_booktabs'. Variable `tabulate_formats` contains the list of - currently supported formats. - - "plain" format doesn't use any pseudographics to draw tables, - it separates columns with a double space: - - >>> print(tabulate([["spam", 41.9999], ["eggs", "451.0"]], - ... ["strings", "numbers"], "plain")) - strings numbers - spam 41.9999 - eggs 451 - - >>> print(tabulate([["spam", 41.9999], ["eggs", "451.0"]], tablefmt="plain")) - spam 41.9999 - eggs 451 - - "simple" format is like Pandoc simple_tables: - - >>> print(tabulate([["spam", 41.9999], ["eggs", "451.0"]], - ... ["strings", "numbers"], "simple")) - strings numbers - --------- --------- - spam 41.9999 - eggs 451 - - >>> print(tabulate([["spam", 41.9999], ["eggs", "451.0"]], tablefmt="simple")) - ---- -------- - spam 41.9999 - eggs 451 - ---- -------- - - "grid" is similar to tables produced by Emacs table.el package or - Pandoc grid_tables: - - >>> print(tabulate([["spam", 41.9999], ["eggs", "451.0"]], - ... ["strings", "numbers"], "grid")) - +-----------+-----------+ - | strings | numbers | - +===========+===========+ - | spam | 41.9999 | - +-----------+-----------+ - | eggs | 451 | - +-----------+-----------+ - - >>> print(tabulate([["spam", 41.9999], ["eggs", "451.0"]], tablefmt="grid")) - +------+----------+ - | spam | 41.9999 | - +------+----------+ - | eggs | 451 | - +------+----------+ - - "fancy_grid" draws a grid using box-drawing characters: - - >>> print(tabulate([["spam", 41.9999], ["eggs", "451.0"]], - ... ["strings", "numbers"], "fancy_grid")) - ╒═══════════╤═══════════╕ - │ strings │ numbers │ - ╞═══════════╪═══════════╡ - │ spam │ 41.9999 │ - ├───────────┼───────────┤ - │ eggs │ 451 │ - ╘═══════════╧═══════════╛ - - "pipe" is like tables in PHP Markdown Extra extension or Pandoc - pipe_tables: - - >>> print(tabulate([["spam", 41.9999], ["eggs", "451.0"]], - ... ["strings", "numbers"], "pipe")) - | strings | numbers | - |:----------|----------:| - | spam | 41.9999 | - | eggs | 451 | - - >>> print(tabulate([["spam", 41.9999], ["eggs", "451.0"]], tablefmt="pipe")) - |:-----|---------:| - | spam | 41.9999 | - | eggs | 451 | - - "orgtbl" is like tables in Emacs org-mode and orgtbl-mode. They - are slightly different from "pipe" format by not using colons to - define column alignment, and using a "+" sign to indicate line - intersections: - - >>> print(tabulate([["spam", 41.9999], ["eggs", "451.0"]], - ... ["strings", "numbers"], "orgtbl")) - | strings | numbers | - |-----------+-----------| - | spam | 41.9999 | - | eggs | 451 | - - - >>> print(tabulate([["spam", 41.9999], ["eggs", "451.0"]], tablefmt="orgtbl")) - | spam | 41.9999 | - | eggs | 451 | - - "rst" is like a simple table format from reStructuredText; please - note that reStructuredText accepts also "grid" tables: - - >>> print(tabulate([["spam", 41.9999], ["eggs", "451.0"]], - ... ["strings", "numbers"], "rst")) - ========= ========= - strings numbers - ========= ========= - spam 41.9999 - eggs 451 - ========= ========= - - >>> print(tabulate([["spam", 41.9999], ["eggs", "451.0"]], tablefmt="rst")) - ==== ======== - spam 41.9999 - eggs 451 - ==== ======== - - "mediawiki" produces a table markup used in Wikipedia and on other - MediaWiki-based sites: - - >>> print(tabulate([["strings", "numbers"], ["spam", 41.9999], ["eggs", "451.0"]], - ... headers="firstrow", tablefmt="mediawiki")) - {| class="wikitable" style="text-align: left;" - |+ - |- - ! strings !! align="right"| numbers - |- - | spam || align="right"| 41.9999 - |- - | eggs || align="right"| 451 - |} - - "html" produces HTML markup: - - >>> print(tabulate([["strings", "numbers"], ["spam", 41.9999], ["eggs", "451.0"]], - ... headers="firstrow", tablefmt="html")) - - - - - - - - -
strings numbers
spam 41.9999
eggs 451
- - "latex" produces a tabular environment of LaTeX document markup: - - >>> print(tabulate([["spam", 41.9999], ["eggs", "451.0"]], tablefmt="latex")) - \\begin{tabular}{lr} - \\hline - spam & 41.9999 \\\\ - eggs & 451 \\\\ - \\hline - \\end{tabular} - - "latex_booktabs" produces a tabular environment of LaTeX document markup - using the booktabs.sty package: - - >>> print(tabulate([["spam", 41.9999], ["eggs", "451.0"]], tablefmt="latex_booktabs")) - \\begin{tabular}{lr} - \\toprule - spam & 41.9999 \\\\ - eggs & 451 \\\\ - \\bottomrule - \end{tabular} - """ - if tabular_data is None: - tabular_data = [] - list_of_lists, headers = _normalize_tabular_data( - tabular_data, headers, showindex=showindex) - - # optimization: look for ANSI control codes once, - # enable smart width functions only if a control code is found - plain_text = '\n'.join(['\t'.join(map(_text_type, headers))] + \ - ['\t'.join(map(_text_type, row)) for row in list_of_lists]) - - has_invisible = re.search(_invisible_codes, plain_text) - enable_widechars = wcwidth is not None and WIDE_CHARS_MODE - if has_invisible: - width_fn = _visible_width - elif enable_widechars: # optional wide-character support if available - width_fn = wcwidth.wcswidth - else: - width_fn = len - - # format rows and columns, convert numeric values to strings - cols = list(zip(*list_of_lists)) - coltypes = list(map(_column_type, cols)) - cols = [[_format(v, ct, floatfmt, missingval, has_invisible) for v in c] - for c,ct in zip(cols, coltypes)] - - # align columns - aligns = [numalign if ct in [int,float] else stralign for ct in coltypes] - minwidths = [width_fn(h) + MIN_PADDING for h in headers] if headers else [0]*len(cols) - cols = [_align_column(c, a, minw, has_invisible) - for c, a, minw in zip(cols, aligns, minwidths)] - - if headers: - # align headers and add headers - t_cols = cols or [['']] * len(headers) - t_aligns = aligns or [stralign] * len(headers) - minwidths = [max(minw, width_fn(c[0])) for minw, c in zip(minwidths, t_cols)] - headers = [_align_header(h, a, minw, width_fn(h)) - for h, a, minw in zip(headers, t_aligns, minwidths)] - rows = list(zip(*cols)) - else: - minwidths = [width_fn(c[0]) for c in cols] - rows = list(zip(*cols)) - - if not isinstance(tablefmt, TableFormat): - tablefmt = _table_formats.get(tablefmt, _table_formats["simple"]) - - return _format_table(tablefmt, headers, rows, minwidths, aligns) - - -def _build_simple_row(padded_cells, rowfmt): - "Format row according to DataRow format without padding." - begin, sep, end = rowfmt - return (begin + sep.join(padded_cells) + end).rstrip() - - -def _build_row(padded_cells, colwidths, colaligns, rowfmt): - "Return a string which represents a row of data cells." - if not rowfmt: - return None - if hasattr(rowfmt, "__call__"): - return rowfmt(padded_cells, colwidths, colaligns) - else: - return _build_simple_row(padded_cells, rowfmt) - - -def _build_line(colwidths, colaligns, linefmt): - "Return a string which represents a horizontal line." - if not linefmt: - return None - if hasattr(linefmt, "__call__"): - return linefmt(colwidths, colaligns) - else: - begin, fill, sep, end = linefmt - cells = [fill*w for w in colwidths] - return _build_simple_row(cells, (begin, sep, end)) - - -def _pad_row(cells, padding): - if cells: - pad = " "*padding - padded_cells = [pad + cell + pad for cell in cells] - return padded_cells - else: - return cells - - -def _format_table(fmt, headers, rows, colwidths, colaligns): - """Produce a plain-text representation of the table.""" - lines = [] - hidden = fmt.with_header_hide if (headers and fmt.with_header_hide) else [] - pad = fmt.padding - headerrow = fmt.headerrow - - padded_widths = [(w + 2*pad) for w in colwidths] - padded_headers = _pad_row(headers, pad) - padded_rows = [_pad_row(row, pad) for row in rows] - - if fmt.lineabove and "lineabove" not in hidden: - lines.append(_build_line(padded_widths, colaligns, fmt.lineabove)) - - if padded_headers: - lines.append(_build_row(padded_headers, padded_widths, colaligns, headerrow)) - if fmt.linebelowheader and "linebelowheader" not in hidden: - lines.append(_build_line(padded_widths, colaligns, fmt.linebelowheader)) - - if padded_rows and fmt.linebetweenrows and "linebetweenrows" not in hidden: - # initial rows with a line below - for row in padded_rows[:-1]: - lines.append(_build_row(row, padded_widths, colaligns, fmt.datarow)) - lines.append(_build_line(padded_widths, colaligns, fmt.linebetweenrows)) - # the last row without a line below - lines.append(_build_row(padded_rows[-1], padded_widths, colaligns, fmt.datarow)) - else: - for row in padded_rows: - lines.append(_build_row(row, padded_widths, colaligns, fmt.datarow)) - - if fmt.linebelow and "linebelow" not in hidden: - lines.append(_build_line(padded_widths, colaligns, fmt.linebelow)) - - return "\n".join(lines) - - -def _main(): - """\ - Usage: tabulate [options] [FILE ...] - - Pretty-print tabular data. - See also https://bitbucket.org/astanin/python-tabulate - - FILE a filename of the file with tabular data; - if "-" or missing, read data from stdin. - - Options: - - -h, --help show this message - -1, --header use the first row of data as a table header - -o FILE, --output FILE print table to FILE (default: stdout) - -s REGEXP, --sep REGEXP use a custom column separator (default: whitespace) - -F FPFMT, --float FPFMT floating point number format (default: g) - -f FMT, --format FMT set output table format; supported formats: - plain, simple, grid, fancy_grid, pipe, orgtbl, - rst, mediawiki, html, latex, latex_booktabs, tsv - (default: simple) - """ - import getopt - import sys - import textwrap - usage = textwrap.dedent(_main.__doc__) - try: - opts, args = getopt.getopt(sys.argv[1:], - "h1o:s:F:f:", - ["help", "header", "output", "sep=", "float=", "format="]) - except getopt.GetoptError as e: - print(e) - print(usage) - sys.exit(2) - headers = [] - floatfmt = "g" - tablefmt = "simple" - sep = r"\s+" - outfile = "-" - for opt, value in opts: - if opt in ["-1", "--header"]: - headers = "firstrow" - elif opt in ["-o", "--output"]: - outfile = value - elif opt in ["-F", "--float"]: - floatfmt = value - elif opt in ["-f", "--format"]: - if value not in tabulate_formats: - print("%s is not a supported table format" % value) - print(usage) - sys.exit(3) - tablefmt = value - elif opt in ["-s", "--sep"]: - sep = value - elif opt in ["-h", "--help"]: - print(usage) - sys.exit(0) - files = [sys.stdin] if not args else args - with (sys.stdout if outfile == "-" else open(outfile, "w")) as out: - for f in files: - if f == "-": - f = sys.stdin - if _is_file(f): - _pprint_file(f, headers=headers, tablefmt=tablefmt, - sep=sep, floatfmt=floatfmt, file=out) - else: - with open(f) as fobj: - _pprint_file(fobj, headers=headers, tablefmt=tablefmt, - sep=sep, floatfmt=floatfmt, file=out) - - -def _pprint_file(fobject, headers, tablefmt, sep, floatfmt, file): - rows = fobject.readlines() - table = [re.split(sep, r.rstrip()) for r in rows if r.strip()] - print(tabulate(table, headers, tablefmt, floatfmt=floatfmt), file=file) - - -if __name__ == "__main__": - _main() \ No newline at end of file diff --git a/otswriter/pom.xml b/otswriter/pom.xml index 8677c8ab..cb255e1f 100644 --- a/otswriter/pom.xml +++ b/otswriter/pom.xml @@ -10,6 +10,17 @@ otswriter + + org.apache.logging.log4j + log4j-api + 2.17.1 + + + + org.apache.logging.log4j + log4j-core + 2.17.1 + com.alibaba.datax datax-common @@ -34,6 +45,16 @@ com.aliyun.openservices ots-public 2.2.4 + + + log4j-api + org.apache.logging.log4j + + + log4j-core + org.apache.logging.log4j + + com.google.code.gson diff --git a/package.xml b/package.xml index 882dd23b..4c1aff04 100755 --- a/package.xml +++ b/package.xml @@ -60,13 +60,6 @@
datax
- - db2reader/target/datax/ - - **/*.* - - datax - postgresqlreader/target/datax/ @@ -103,13 +96,13 @@ datax - - otsstreamreader/target/datax/ - - **/*.* - - datax - + + otsstreamreader/target/datax/ + + **/*.* + + datax + txtfilereader/target/datax/ @@ -131,6 +124,13 @@ datax + + tdenginereader/target/datax/ + + **/*.* + + datax + streamreader/target/datax/ @@ -180,6 +180,55 @@ datax + + gdbreader/target/datax/ + + **/*.* + + datax + + + hbase11xsqlreader/target/datax/ + + **/*.* + + datax + + + hbase20xsqlreader/target/datax/ + + **/*.* + + datax + + + tsdbreader/target/datax/ + + **/*.* + + datax + + + datahubreader/target/datax/ + + **/*.* + + datax + + + loghubreader/target/datax/ + + **/*.* + + datax + + + starrocksreader/target/datax/ + + **/*.* + + datax + @@ -189,6 +238,20 @@ datax + + tdenginewriter/target/datax/ + + **/*.* + + datax + + + starrockswriter/target/datax/ + + **/*.* + + datax + drdswriter/target/datax/ @@ -203,6 +266,13 @@ datax + + doriswriter/target/datax/ + + **/*.* + + datax + txtfilewriter/target/datax/ @@ -322,13 +392,6 @@ datax - - hbase11xsqlreader/target/datax/ - - **/*.* - - datax - elasticsearchwriter/target/datax/ @@ -336,13 +399,6 @@ datax - - hbase20xsqlreader/target/datax/ - - **/*.* - - datax - hbase20xsqlwriter/target/datax/ @@ -378,6 +434,13 @@ datax + + databendwriter/target/datax/ + + **/*.* + + datax + oscarwriter/target/datax/ @@ -392,5 +455,47 @@ datax + + gdbwriter/target/datax/ + + **/*.* + + datax + + + kuduwriter/target/datax/ + + **/*.* + + datax + + + hologresjdbcwriter/target/datax/ + + **/*.* + + datax + + + datahubwriter/target/datax/ + + **/*.* + + datax + + + loghubwriter/target/datax/ + + **/*.* + + datax + + + selectdbwriter/target/datax/ + + **/*.* + + datax + diff --git a/plugin-rdbms-util/src/main/java/com/alibaba/datax/plugin/rdbms/reader/util/OriginalConfPretreatmentUtil.java b/plugin-rdbms-util/src/main/java/com/alibaba/datax/plugin/rdbms/reader/util/OriginalConfPretreatmentUtil.java index 3ac5f2af..ef3a876d 100755 --- a/plugin-rdbms-util/src/main/java/com/alibaba/datax/plugin/rdbms/reader/util/OriginalConfPretreatmentUtil.java +++ b/plugin-rdbms-util/src/main/java/com/alibaba/datax/plugin/rdbms/reader/util/OriginalConfPretreatmentUtil.java @@ -261,7 +261,7 @@ public final class OriginalConfPretreatmentUtil { // 混合配制 table 和 querySql if (!ListUtil.checkIfValueSame(tableModeFlags) - || !ListUtil.checkIfValueSame(tableModeFlags)) { + || !ListUtil.checkIfValueSame(querySqlModeFlags)) { throw DataXException.asDataXException(DBUtilErrorCode.TABLE_QUERYSQL_MIXED, "您配置凌乱了. 不能同时既配置table又配置querySql. 请检查您的配置并作出修改."); } diff --git a/plugin-rdbms-util/src/main/java/com/alibaba/datax/plugin/rdbms/reader/util/SingleTableSplitUtil.java b/plugin-rdbms-util/src/main/java/com/alibaba/datax/plugin/rdbms/reader/util/SingleTableSplitUtil.java index d9846b39..10cfe795 100755 --- a/plugin-rdbms-util/src/main/java/com/alibaba/datax/plugin/rdbms/reader/util/SingleTableSplitUtil.java +++ b/plugin-rdbms-util/src/main/java/com/alibaba/datax/plugin/rdbms/reader/util/SingleTableSplitUtil.java @@ -5,7 +5,7 @@ import com.alibaba.datax.common.util.Configuration; import com.alibaba.datax.plugin.rdbms.reader.Constant; import com.alibaba.datax.plugin.rdbms.reader.Key; import com.alibaba.datax.plugin.rdbms.util.*; -import com.alibaba.fastjson.JSON; +import com.alibaba.fastjson2.JSON; import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.tuple.ImmutablePair; @@ -93,6 +93,7 @@ public class SingleTableSplitUtil { allQuerySql.add(tempQuerySql); tempConfig.set(Key.QUERY_SQL, tempQuerySql); + tempConfig.set(Key.WHERE, (hasWhere ? ("(" + where + ") and") : "") + range); pluginParams.add(tempConfig); } } else { @@ -103,6 +104,7 @@ public class SingleTableSplitUtil { + String.format(" %s IS NOT NULL", splitPkName); allQuerySql.add(tempQuerySql); tempConfig.set(Key.QUERY_SQL, tempQuerySql); + tempConfig.set(Key.WHERE, (hasWhere ? "(" + where + ") and" : "") + String.format(" %s IS NOT NULL", splitPkName)); pluginParams.add(tempConfig); } @@ -118,6 +120,7 @@ public class SingleTableSplitUtil { StringUtils.join(allQuerySql, "\n")); tempConfig.set(Key.QUERY_SQL, tempQuerySql); + tempConfig.set(Key.WHERE, (hasWhere ? "(" + where + ") and" : "") + String.format(" %s IS NULL", splitPkName)); pluginParams.add(tempConfig); return pluginParams; @@ -254,6 +257,7 @@ public class SingleTableSplitUtil { switch (SingleTableSplitUtil.DATABASE_TYPE) { case Oracle: + case OceanBase: isValidLongType |= type == Types.NUMERIC; break; default: diff --git a/plugin-rdbms-util/src/main/java/com/alibaba/datax/plugin/rdbms/util/DBUtil.java b/plugin-rdbms-util/src/main/java/com/alibaba/datax/plugin/rdbms/util/DBUtil.java index 2392d1ca..12a3aa74 100755 --- a/plugin-rdbms-util/src/main/java/com/alibaba/datax/plugin/rdbms/util/DBUtil.java +++ b/plugin-rdbms-util/src/main/java/com/alibaba/datax/plugin/rdbms/util/DBUtil.java @@ -380,6 +380,9 @@ public final class DBUtil { // unit ms prop.put("oracle.jdbc.ReadTimeout", socketTimeout); } + if (dataBaseType == DataBaseType.OceanBase) { + url = url.replace("jdbc:mysql:", "jdbc:oceanbase:"); + } return connect(dataBaseType, url, prop); } @@ -717,6 +720,11 @@ public final class DBUtil { new ArrayList(), String.class); DBUtil.doDealWithSessionConfig(conn, sessionConfig, message); break; + case SQLServer: + sessionConfig = config.getList(Key.SESSION, + new ArrayList(), String.class); + DBUtil.doDealWithSessionConfig(conn, sessionConfig, message); + break; default: break; } diff --git a/plugin-rdbms-util/src/main/java/com/alibaba/datax/plugin/rdbms/util/DataBaseType.java b/plugin-rdbms-util/src/main/java/com/alibaba/datax/plugin/rdbms/util/DataBaseType.java index 205919fe..1b46a8bc 100755 --- a/plugin-rdbms-util/src/main/java/com/alibaba/datax/plugin/rdbms/util/DataBaseType.java +++ b/plugin-rdbms-util/src/main/java/com/alibaba/datax/plugin/rdbms/util/DataBaseType.java @@ -18,12 +18,14 @@ public enum DataBaseType { PostgreSQL("postgresql", "org.postgresql.Driver"), RDBMS("rdbms", "com.alibaba.datax.plugin.rdbms.util.DataBaseType"), DB2("db2", "com.ibm.db2.jcc.DB2Driver"), + ADB("adb","com.mysql.jdbc.Driver"), ADS("ads","com.mysql.jdbc.Driver"), ClickHouse("clickhouse", "ru.yandex.clickhouse.ClickHouseDriver"), KingbaseES("kingbasees", "com.kingbase8.Driver"), Oscar("oscar", "com.oscar.Driver"), - OceanBase("oceanbase", "com.alipay.oceanbase.jdbc.Driver"); - + OceanBase("oceanbase", "com.alipay.oceanbase.jdbc.Driver"), + StarRocks("starrocks", "com.mysql.jdbc.Driver"), + Databend("databend", "com.databend.jdbc.DatabendDriver"); private String typeName; private String driverClassName; @@ -67,6 +69,8 @@ public enum DataBaseType { break; case Oscar: break; + case StarRocks: + break; default: throw DataXException.asDataXException(DBUtilErrorCode.UNSUPPORTED_TYPE, "unsupported database type."); } @@ -86,6 +90,14 @@ public enum DataBaseType { result = jdbc + "?" + suffix; } break; + case ADB: + suffix = "yearIsDateType=false&zeroDateTimeBehavior=convertToNull&rewriteBatchedStatements=true&tinyInt1isBit=false"; + if (jdbc.contains("?")) { + result = jdbc + "&" + suffix; + } else { + result = jdbc + "?" + suffix; + } + break; case DRDS: suffix = "yearIsDateType=false&zeroDateTimeBehavior=convertToNull"; if (jdbc.contains("?")) { @@ -106,6 +118,8 @@ public enum DataBaseType { break; case RDBMS: break; + case Databend: + break; case KingbaseES: break; case Oscar: diff --git a/plugin-rdbms-util/src/main/java/com/alibaba/datax/plugin/rdbms/writer/CommonRdbmsWriter.java b/plugin-rdbms-util/src/main/java/com/alibaba/datax/plugin/rdbms/writer/CommonRdbmsWriter.java index 27b88f44..bec3c683 100755 --- a/plugin-rdbms-util/src/main/java/com/alibaba/datax/plugin/rdbms/writer/CommonRdbmsWriter.java +++ b/plugin-rdbms-util/src/main/java/com/alibaba/datax/plugin/rdbms/writer/CommonRdbmsWriter.java @@ -409,11 +409,6 @@ public class CommonRdbmsWriter { return preparedStatement; } - protected PreparedStatement fillPreparedStatementColumnType(PreparedStatement preparedStatement, int columnIndex, - int columnSqltype, Column column) throws SQLException { - return fillPreparedStatementColumnType(preparedStatement, columnIndex, columnSqltype, null, column); - } - protected PreparedStatement fillPreparedStatementColumnType(PreparedStatement preparedStatement, int columnIndex, int columnSqltype, String typeName, Column column) throws SQLException { java.util.Date utilDate; @@ -524,7 +519,7 @@ public class CommonRdbmsWriter { break; case Types.BOOLEAN: - preparedStatement.setString(columnIndex + 1, column.asString()); + preparedStatement.setBoolean(columnIndex + 1, column.asBoolean()); break; // warn: bit(1) -> Types.BIT 可使用setBoolean diff --git a/plugin-rdbms-util/src/main/java/com/alibaba/datax/plugin/rdbms/writer/util/OriginalConfPretreatmentUtil.java b/plugin-rdbms-util/src/main/java/com/alibaba/datax/plugin/rdbms/writer/util/OriginalConfPretreatmentUtil.java index 34d1b3af..556e50ac 100755 --- a/plugin-rdbms-util/src/main/java/com/alibaba/datax/plugin/rdbms/writer/util/OriginalConfPretreatmentUtil.java +++ b/plugin-rdbms-util/src/main/java/com/alibaba/datax/plugin/rdbms/writer/util/OriginalConfPretreatmentUtil.java @@ -10,6 +10,7 @@ import org.apache.commons.lang3.StringUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import java.sql.Connection; import java.util.ArrayList; import java.util.List; @@ -120,9 +121,15 @@ public final class OriginalConfPretreatmentUtil { } else { // 确保用户配置的 column 不重复 ListUtil.makeSureNoValueDuplicate(userConfiguredColumns, false); + Connection connection = null; + try { + connection = connectionFactory.getConnecttion(); + // 检查列是否都为数据库表中正确的列(通过执行一次 select column from table 进行判断) + DBUtil.getColumnMetaData(connection, oneTable,StringUtils.join(userConfiguredColumns, ",")); + } finally { + DBUtil.closeDBResources(null, connection); + } - // 检查列是否都为数据库表中正确的列(通过执行一次 select column from table 进行判断) - DBUtil.getColumnMetaData(connectionFactory.getConnecttion(), oneTable,StringUtils.join(userConfiguredColumns, ",")); } } } diff --git a/plugin-unstructured-storage-util/src/main/java/com/alibaba/datax/plugin/unstructuredstorage/FileFormat.java b/plugin-unstructured-storage-util/src/main/java/com/alibaba/datax/plugin/unstructuredstorage/FileFormat.java new file mode 100644 index 00000000..b9368e67 --- /dev/null +++ b/plugin-unstructured-storage-util/src/main/java/com/alibaba/datax/plugin/unstructuredstorage/FileFormat.java @@ -0,0 +1,97 @@ +package com.alibaba.datax.plugin.unstructuredstorage; + +import com.alibaba.datax.common.exception.DataXException; +import com.alibaba.datax.common.util.Configuration; +import com.alibaba.datax.plugin.unstructuredstorage.reader.Constant; +import com.alibaba.datax.plugin.unstructuredstorage.reader.Key; +import com.alibaba.datax.plugin.unstructuredstorage.reader.UnstructuredStorageReaderErrorCode; +import org.apache.commons.lang3.StringUtils; + +import java.util.Arrays; + +/** + * @Author: guxuan + * @Date 2022-05-17 16:04 + */ +public enum FileFormat { + TEXT("text"), + CSV("csv"), + EXCEL("excel"), + BINARY("binary"); + + private String fileFormat; + + private boolean isText; + private boolean isCsv; + private boolean isExcel; + private boolean isBinary; + + FileFormat(String fileFormat) { + this.fileFormat = fileFormat.toLowerCase(); + } + + /** + * 获取文件类型: 目前支持text,csv,excel,binary + * @param configuration + * @return + */ + public static FileFormat getFileFormatByConfiguration(Configuration configuration) { + String fileFormat = configuration.getString(Key.FILE_FORMAT, Constant.DEFAULT_FILE_FORMAT); + return FileFormat.getByTypeName(fileFormat); + } + + public String getFileFormat() { + return this.fileFormat; + } + + public static FileFormat getByTypeName(String fileFormat) { + for (FileFormat fFormat : values()) { + if (fFormat.fileFormat.equalsIgnoreCase(fileFormat)) { + return fFormat; + } + } + throw DataXException.asDataXException(UnstructuredStorageReaderErrorCode.ILLEGAL_VALUE, + String.format("DataX 不支持该 fileFormat 类型:%s, 目前支持的 fileFormat 类型是:%s", fileFormat, Arrays.asList(values()))); + } + + public boolean equalsIgnoreCase(String fileFormat){ + return StringUtils.equalsIgnoreCase(fileFormat, this.fileFormat); + } + + public boolean isText() { + return this.equalsIgnoreCase(Constant.FILE_FORMAT_TEXT); + } + + public void setText(boolean text) { + isText = text; + } + + public boolean isCsv() { + return this.equalsIgnoreCase(Constant.FILE_FORMAT_CSV); + } + + public void setCsv(boolean csv) { + isCsv = csv; + } + + public boolean isExcel() { + return this.equalsIgnoreCase(Constant.FILE_FORMAT_EXCEL); + } + + public void setExcel(boolean excel) { + isExcel = excel; + } + + public boolean isBinary() { + return this.equalsIgnoreCase(Constant.FILE_FORMAT_BINARY); + } + + public void setBinary(boolean binary) { + isBinary = binary; + } + + @Override + public String toString(){ + return this.fileFormat; + } +} diff --git a/plugin-unstructured-storage-util/src/main/java/com/alibaba/datax/plugin/unstructuredstorage/LocalStrings.properties b/plugin-unstructured-storage-util/src/main/java/com/alibaba/datax/plugin/unstructuredstorage/LocalStrings.properties new file mode 100644 index 00000000..d53d4749 --- /dev/null +++ b/plugin-unstructured-storage-util/src/main/java/com/alibaba/datax/plugin/unstructuredstorage/LocalStrings.properties @@ -0,0 +1 @@ +fileformaterror.1=DataX \u4E0D\u652F\u6301\u8BE5 fileFormat \u7C7B\u578B:{0}, \u76EE\u524D\u652F\u6301\u7684 fileFormat \u7C7B\u578B\u662F:{1} \ No newline at end of file diff --git a/plugin-unstructured-storage-util/src/main/java/com/alibaba/datax/plugin/unstructuredstorage/LocalStrings_en_US.properties b/plugin-unstructured-storage-util/src/main/java/com/alibaba/datax/plugin/unstructuredstorage/LocalStrings_en_US.properties new file mode 100644 index 00000000..d53d4749 --- /dev/null +++ b/plugin-unstructured-storage-util/src/main/java/com/alibaba/datax/plugin/unstructuredstorage/LocalStrings_en_US.properties @@ -0,0 +1 @@ +fileformaterror.1=DataX \u4E0D\u652F\u6301\u8BE5 fileFormat \u7C7B\u578B:{0}, \u76EE\u524D\u652F\u6301\u7684 fileFormat \u7C7B\u578B\u662F:{1} \ No newline at end of file diff --git a/plugin-unstructured-storage-util/src/main/java/com/alibaba/datax/plugin/unstructuredstorage/LocalStrings_ja_JP.properties b/plugin-unstructured-storage-util/src/main/java/com/alibaba/datax/plugin/unstructuredstorage/LocalStrings_ja_JP.properties new file mode 100644 index 00000000..d53d4749 --- /dev/null +++ b/plugin-unstructured-storage-util/src/main/java/com/alibaba/datax/plugin/unstructuredstorage/LocalStrings_ja_JP.properties @@ -0,0 +1 @@ +fileformaterror.1=DataX \u4E0D\u652F\u6301\u8BE5 fileFormat \u7C7B\u578B:{0}, \u76EE\u524D\u652F\u6301\u7684 fileFormat \u7C7B\u578B\u662F:{1} \ No newline at end of file diff --git a/plugin-unstructured-storage-util/src/main/java/com/alibaba/datax/plugin/unstructuredstorage/LocalStrings_zh_CN.properties b/plugin-unstructured-storage-util/src/main/java/com/alibaba/datax/plugin/unstructuredstorage/LocalStrings_zh_CN.properties new file mode 100644 index 00000000..d53d4749 --- /dev/null +++ b/plugin-unstructured-storage-util/src/main/java/com/alibaba/datax/plugin/unstructuredstorage/LocalStrings_zh_CN.properties @@ -0,0 +1 @@ +fileformaterror.1=DataX \u4E0D\u652F\u6301\u8BE5 fileFormat \u7C7B\u578B:{0}, \u76EE\u524D\u652F\u6301\u7684 fileFormat \u7C7B\u578B\u662F:{1} \ No newline at end of file diff --git a/plugin-unstructured-storage-util/src/main/java/com/alibaba/datax/plugin/unstructuredstorage/LocalStrings_zh_HK.properties b/plugin-unstructured-storage-util/src/main/java/com/alibaba/datax/plugin/unstructuredstorage/LocalStrings_zh_HK.properties new file mode 100644 index 00000000..b92a73ec --- /dev/null +++ b/plugin-unstructured-storage-util/src/main/java/com/alibaba/datax/plugin/unstructuredstorage/LocalStrings_zh_HK.properties @@ -0,0 +1 @@ +fileformaterror.1=DataX \u4E0D\u652F\u6301\u8BE5 fileFormat \u7C7B\u578B:{0}, \u76EE\u524D\u652F\u6301\u7684 fileFormat \u7C7B\u578B\u662F:{1}fileformaterror.1=DataX不支持該fileFormat類型:{0},現時支持的fileFormat類型是:{1} diff --git a/plugin-unstructured-storage-util/src/main/java/com/alibaba/datax/plugin/unstructuredstorage/LocalStrings_zh_TW.properties b/plugin-unstructured-storage-util/src/main/java/com/alibaba/datax/plugin/unstructuredstorage/LocalStrings_zh_TW.properties new file mode 100644 index 00000000..b92a73ec --- /dev/null +++ b/plugin-unstructured-storage-util/src/main/java/com/alibaba/datax/plugin/unstructuredstorage/LocalStrings_zh_TW.properties @@ -0,0 +1 @@ +fileformaterror.1=DataX \u4E0D\u652F\u6301\u8BE5 fileFormat \u7C7B\u578B:{0}, \u76EE\u524D\u652F\u6301\u7684 fileFormat \u7C7B\u578B\u662F:{1}fileformaterror.1=DataX不支持該fileFormat類型:{0},現時支持的fileFormat類型是:{1} diff --git a/plugin-unstructured-storage-util/src/main/java/com/alibaba/datax/plugin/unstructuredstorage/reader/ColumnEntry.java b/plugin-unstructured-storage-util/src/main/java/com/alibaba/datax/plugin/unstructuredstorage/reader/ColumnEntry.java index ee3af816..c86bd206 100644 --- a/plugin-unstructured-storage-util/src/main/java/com/alibaba/datax/plugin/unstructuredstorage/reader/ColumnEntry.java +++ b/plugin-unstructured-storage-util/src/main/java/com/alibaba/datax/plugin/unstructuredstorage/reader/ColumnEntry.java @@ -5,7 +5,7 @@ import java.text.SimpleDateFormat; import org.apache.commons.lang3.StringUtils; -import com.alibaba.fastjson.JSON; +import com.alibaba.fastjson2.JSON; public class ColumnEntry { private Integer index; diff --git a/plugin-unstructured-storage-util/src/main/java/com/alibaba/datax/plugin/unstructuredstorage/reader/Constant.java b/plugin-unstructured-storage-util/src/main/java/com/alibaba/datax/plugin/unstructuredstorage/reader/Constant.java index 7c6bc139..6be46cb2 100755 --- a/plugin-unstructured-storage-util/src/main/java/com/alibaba/datax/plugin/unstructuredstorage/reader/Constant.java +++ b/plugin-unstructured-storage-util/src/main/java/com/alibaba/datax/plugin/unstructuredstorage/reader/Constant.java @@ -10,4 +10,47 @@ public class Constant { public static final String DEFAULT_NULL_FORMAT = "\\N"; public static final Integer DEFAULT_BUFFER_SIZE = 8192; + + public static final String FILE_FORMAT_CSV = "csv"; + + public static final String FILE_FORMAT_TEXT = "text"; + + public static final String FILE_FORMAT_EXCEL = "excel"; + + public static final String FILE_FORMAT_BINARY = "binary"; + + public static final String DEFAULT_FILE_FORMAT = "csv"; + + public static final Boolean DEFAULE_SKIP_TEXT_EMPTY_RECORDS = true; + + public static final String EXCEL_VERSION_03_OR_EARLIER = "03_OR_EARLIER"; + + public static final String EXCEL_VERSION_07_OR_LATER = "07_OR_LATER"; + + /** + * 文件全限定名 + * */ + public static final String SOURCE_FILE = "sourceFile"; + + /** + * 单纯的文件名 + * */ + public static final String SOURCE_FILE_NAME = "sourceFileName"; + + public static final boolean DEFAULT_OUTPUT_SHEET_NAME = false; + + /** + * TODO 暂时先不考虑整个文件夹同步 + * 在同步音视频等二进制文件的情况下: + * 半结构读插件(txtfilreader, ftpreader, hdfsreader, ossreader)需要将相对文件路径注入 RELATIVE_SOURCE_FILE 属性 + * 目的是半结构化写插件可以统一使用 RELATIVE_SOURCE_FILE 获取到读端插件的所有二进制文件名及其相对路径。 + * 举个栗子: + * 读端插件PATH配置了/home/admin/myapp/ + */ + public static final String RELATIVE_SOURCE_FILE = "relativeSourceFile"; + + /** + * 默认读取二进制文件一次性读取的Byte数目: 1048576 Byte [1MB] + */ + public static final int DEFAULT_BLOCK_SIZE_IN_BYTE = 1048576; } diff --git a/plugin-unstructured-storage-util/src/main/java/com/alibaba/datax/plugin/unstructuredstorage/reader/Key.java b/plugin-unstructured-storage-util/src/main/java/com/alibaba/datax/plugin/unstructuredstorage/reader/Key.java index bb5bf59f..71e13ad2 100755 --- a/plugin-unstructured-storage-util/src/main/java/com/alibaba/datax/plugin/unstructuredstorage/reader/Key.java +++ b/plugin-unstructured-storage-util/src/main/java/com/alibaba/datax/plugin/unstructuredstorage/reader/Key.java @@ -28,4 +28,63 @@ public class Key { public static final String CSV_READER_CONFIG = "csvReaderConfig"; + public static final String MARK_DONE_FILE_NAME = "markDoneFileName"; + + public static final String MARK_DOING_FILE_NAME = "markDoingFileName"; + + // public static final String RETRY_TIME = "retryTime"; + public final static String MAX_RETRY_TIME = "maxRetryTime"; + + public final static String RETRY_INTERVAL = "retryInterval"; + + public static final String TEXT_READER_CONFIG = "textReaderConfig"; + + public static final String SKIP_EMPTY_RECORDS = "skipEmptyRecords"; + + public static final String EXCEL_READER_CONFIG = "excelReaderConfig"; + + public static final String EXCEL_SHEET_NAME = "excelSheetName"; + + public static final String VERSION = "version"; + + public static final String OUTPUT_SHEET_NAME = "outputSheetName"; + + /** + * csv or text or excel + */ + public static final String FILE_FORMAT = "fileFormat"; + + /** + * 是否把一个file当做一个column + */ + public static final String FILE_AS_COLUMN = "fileAsColumn"; + + /** + * 读取二进制文件一次性读取的Byte数目 + */ + public static final String BLOCK_SIZE_IN_BYTE = "blockSizeInByte"; + + /** + * 半结构化标示一个Record来源的绝对文件路径名,可以是ftp文件,oss的object等 + * */ + public static final String META_KEY_FILE_PATH = "filePath"; + + /** + * 多文件切分的工作项,Task通过此配置项表示工作内容, 文件内部切分相关key + */ + public static final String SPLIT_SLICE_CONFIG = "__splitSliceConfig"; + public static final String SPLIT_SLICE_FILE_PATH = "filePath"; + public static final String SPLIT_SLICE_START_POINT = "startPoint"; + public static final String SPLIT_SLICE_END_POINT = "endPoint"; + + /** + * tar.gz压缩包,支持配置 tarFileFilterPattern 参数,来过滤要同步的文件 + * For Example: + * "tarFileFilterPattern" : "*.dat" + * + * 同步的时候,只同步 tar.gz 里面文件名后缀为 .dat 的文件 + */ + public static final String TAR_FILE_FILTER_PATTERN = "tarFileFilterPattern"; + public static final String ENABLE_INNER_SPLIT = "enableInnerSplit"; + } diff --git a/plugin-unstructured-storage-util/src/main/java/com/alibaba/datax/plugin/unstructuredstorage/reader/UnstructuredStorageReaderUtil.java b/plugin-unstructured-storage-util/src/main/java/com/alibaba/datax/plugin/unstructuredstorage/reader/UnstructuredStorageReaderUtil.java index 423f66db..afcad851 100755 --- a/plugin-unstructured-storage-util/src/main/java/com/alibaba/datax/plugin/unstructuredstorage/reader/UnstructuredStorageReaderUtil.java +++ b/plugin-unstructured-storage-util/src/main/java/com/alibaba/datax/plugin/unstructuredstorage/reader/UnstructuredStorageReaderUtil.java @@ -5,9 +5,9 @@ import com.alibaba.datax.common.exception.DataXException; import com.alibaba.datax.common.plugin.RecordSender; import com.alibaba.datax.common.plugin.TaskPluginCollector; import com.alibaba.datax.common.util.Configuration; -import com.alibaba.fastjson.JSON; -import com.alibaba.fastjson.JSONObject; -import com.alibaba.fastjson.TypeReference; +import com.alibaba.fastjson2.JSON; +import com.alibaba.fastjson2.JSONObject; +import com.alibaba.fastjson2.TypeReference; import com.csvreader.CsvReader; import org.apache.commons.beanutils.BeanUtils; import io.airlift.compress.snappy.SnappyCodec; @@ -26,10 +26,7 @@ import org.slf4j.LoggerFactory; import java.io.*; import java.nio.charset.UnsupportedCharsetException; import java.text.DateFormat; -import java.util.ArrayList; -import java.util.Date; -import java.util.HashMap; -import java.util.List; +import java.util.*; public class UnstructuredStorageReaderUtil { private static final Logger LOG = LoggerFactory @@ -695,4 +692,27 @@ public class UnstructuredStorageReaderUtil { LOG.info(String.format("CsvReader使用默认值[%s],csvReaderConfig值为[%s]",JSON.toJSONString(csvReader),JSON.toJSONString(UnstructuredStorageReaderUtil.csvReaderConfigMap))); } } + + public static Map buildRecordMeta(String filePath) { + Map meta = new HashMap(); + // 上下文filePath元数据注入, 目前传递的是纯文件名 + // File file = new File(filePath); + // meta.put(Key.META_KEY_FILE_PATH, file.getName()); + meta.put(Key.META_KEY_FILE_PATH, filePath); + return meta; + } + + public static void setSourceFileName(Configuration configuration, List sourceFiles){ + List sourceFilesName = new ArrayList(); + File file; + for (String sourceFile: sourceFiles){ + file = new File(sourceFile); + sourceFilesName.add(file.getName()); + } + configuration.set(Constant.SOURCE_FILE_NAME, sourceFilesName); + } + + public static void setSourceFile(Configuration configuration, List sourceFiles){ + configuration.set(Constant.SOURCE_FILE, sourceFiles); + } } diff --git a/plugin-unstructured-storage-util/src/main/java/com/alibaba/datax/plugin/unstructuredstorage/reader/binaryFileUtil/BinaryFileReaderUtil.java b/plugin-unstructured-storage-util/src/main/java/com/alibaba/datax/plugin/unstructuredstorage/reader/binaryFileUtil/BinaryFileReaderUtil.java new file mode 100644 index 00000000..a7d846b3 --- /dev/null +++ b/plugin-unstructured-storage-util/src/main/java/com/alibaba/datax/plugin/unstructuredstorage/reader/binaryFileUtil/BinaryFileReaderUtil.java @@ -0,0 +1,62 @@ +package com.alibaba.datax.plugin.unstructuredstorage.reader.binaryFileUtil; + +import com.alibaba.datax.common.element.BytesColumn; +import com.alibaba.datax.common.element.Column; +import com.alibaba.datax.common.element.Record; +import com.alibaba.datax.common.exception.DataXException; +import com.alibaba.datax.common.plugin.RecordSender; +import com.alibaba.datax.plugin.unstructuredstorage.reader.Key; +import com.alibaba.datax.plugin.unstructuredstorage.reader.UnstructuredStorageReaderErrorCode; +import com.alibaba.datax.plugin.unstructuredstorage.reader.UnstructuredStorageReaderUtil; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.io.InputStream; +import java.util.Arrays; +import java.util.HashMap; +import java.util.Map; + +/** + * @Author: guxuan + * @Date 2022-05-17 15:59 + */ +public class BinaryFileReaderUtil { + private static final Logger LOG = LoggerFactory.getLogger(BinaryFileReaderUtil.class); + + public static void readFromStream(InputStream inputStream, String filePath, RecordSender recordSender, int blockSizeInByte) { + try { + Map meta = UnstructuredStorageReaderUtil.buildRecordMeta(filePath); + byte[] tmp = new byte[blockSizeInByte]; + int len; + ByteUtils byteUtils = new ByteUtils(); + while ((len = inputStream.read(tmp)) != -1) { + /**如果len小于blockSizeInByte,说明已经读到了最后一个byte数组 + * 此时需要将byte数组长度调整为实际读到的字节数, + * 否则会导致写入目的文件字节数大于实际文件字节数, 有可能会导致文件损坏(比如pptx, docx等文件) + */ + // warn: 这里可以优化掉,没必要做一次数组拷贝,直接复用byte[] tmp即可 + byte[] readBytesArray = Arrays.copyOf(tmp, len); + byteUtils.append(readBytesArray); + if (byteUtils.getSize() >= blockSizeInByte) { + recordSenderBytesColumn(recordSender, byteUtils.getBuffer(), meta); + byteUtils.clear(); + } + } + recordSenderBytesColumn(recordSender, byteUtils.getBuffer(), meta); + LOG.info("End read!!!"); + } catch (IOException e) { + throw DataXException.asDataXException(UnstructuredStorageReaderErrorCode.READ_FILE_IO_ERROR, e); + } + } + + private static void recordSenderBytesColumn(RecordSender recordSender, byte[] tmp, Map meta){ + Record record = recordSender.createRecord(); + Column column = new BytesColumn(tmp); + record.addColumn(column); + record.setMeta(meta); + recordSender.sendToWriter(record); + } + + +} diff --git a/plugin-unstructured-storage-util/src/main/java/com/alibaba/datax/plugin/unstructuredstorage/reader/binaryFileUtil/ByteUtils.java b/plugin-unstructured-storage-util/src/main/java/com/alibaba/datax/plugin/unstructuredstorage/reader/binaryFileUtil/ByteUtils.java new file mode 100644 index 00000000..14ba9c47 --- /dev/null +++ b/plugin-unstructured-storage-util/src/main/java/com/alibaba/datax/plugin/unstructuredstorage/reader/binaryFileUtil/ByteUtils.java @@ -0,0 +1,48 @@ +package com.alibaba.datax.plugin.unstructuredstorage.reader.binaryFileUtil; + +import java.util.Arrays; + +/** + * @Author: guxuan + * @Date 2022-05-17 16:00 + */ +public class ByteUtils { + + private int size; + private int kDefaultBufferSize = 0; + private byte[] buffer; + + public byte[] getBuffer() { + return buffer; + } + + public ByteUtils() { + buffer = new byte[0]; + size = 0; + } + + public long getSize() { + return size; + } + + public void setSize(int size) { + this.size = size; + } + + public ByteUtils append(byte[] buf) { + + if (buf == null){ + return this; + } + buffer = Arrays.copyOf(buffer, buffer.length + buf.length); + System.arraycopy(buf, 0, buffer, size, buf.length); + size += buf.length; + return this; + } + + public void clear() + { + buffer = new byte[kDefaultBufferSize]; + size = 0; + } +} diff --git a/plugin-unstructured-storage-util/src/main/java/com/alibaba/datax/plugin/unstructuredstorage/reader/split/StartEndPair.java b/plugin-unstructured-storage-util/src/main/java/com/alibaba/datax/plugin/unstructuredstorage/reader/split/StartEndPair.java new file mode 100644 index 00000000..aa021c99 --- /dev/null +++ b/plugin-unstructured-storage-util/src/main/java/com/alibaba/datax/plugin/unstructuredstorage/reader/split/StartEndPair.java @@ -0,0 +1,49 @@ +package com.alibaba.datax.plugin.unstructuredstorage.reader.split; + +/** + * @Author: guxuan + * @Date 2022-05-17 15:50 + */ +public class StartEndPair { + private Long start; + private Long end; + private String filePath; + + public StartEndPair() { + } + + public StartEndPair(Long start, Long end, String filePath) { + this.start = start; + this.end = end; + this.filePath = filePath; + } + + public Long getEnd() { + return end; + } + + public void setEnd(Long end) { + this.end = end; + } + + public Long getStart() { + return start; + } + + public void setStart(Long start) { + this.start = start; + } + + public String getFilePath() { + return filePath; + } + + public void setFilePath(String filePath) { + this.filePath = filePath; + } + + @Override + public String toString() { + return "StartEndPair [start=" + start + ", end=" + end + ", filePath=" + filePath + "]"; + } +} diff --git a/plugin-unstructured-storage-util/src/main/java/com/alibaba/datax/plugin/unstructuredstorage/reader/split/UnstructuredSplitUtil.java b/plugin-unstructured-storage-util/src/main/java/com/alibaba/datax/plugin/unstructuredstorage/reader/split/UnstructuredSplitUtil.java new file mode 100644 index 00000000..4e42583d --- /dev/null +++ b/plugin-unstructured-storage-util/src/main/java/com/alibaba/datax/plugin/unstructuredstorage/reader/split/UnstructuredSplitUtil.java @@ -0,0 +1,191 @@ +package com.alibaba.datax.plugin.unstructuredstorage.reader.split; + +import com.alibaba.datax.common.exception.DataXException; +import com.alibaba.datax.common.util.Configuration; +import com.alibaba.datax.common.util.RangeSplitUtil; +import com.alibaba.datax.plugin.unstructuredstorage.reader.Key; +import com.alibaba.datax.plugin.unstructuredstorage.reader.UnstructuredStorageReaderErrorCode; +import com.alibaba.fastjson2.JSON; +import org.apache.commons.io.FileUtils; +import org.apache.commons.lang3.tuple.ImmutableTriple; +import org.apache.commons.lang3.tuple.Triple; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.io.InputStream; +import java.util.ArrayList; +import java.util.List; + +/** + * @Author: guxuan + * @Date 2022-05-17 15:49 + */ +public abstract class UnstructuredSplitUtil { + private static final Logger LOG = LoggerFactory.getLogger(UnstructuredSplitUtil.class); + private boolean needInnerSplit; + // 对每个文件进行切分的块大小是 64MB + // warn: 这个最好弄成可配置的, 用户配置channel为2但是有10个文件,不一定需要文件内部切分; + // 弄成可配置的有些情况下可以避免文件内部切分切分的task太碎 + private static final Long BLOCK_BYTE_CAPACITY = 64 * FileUtils.ONE_MB; + + public UnstructuredSplitUtil(boolean needInnerSplit) { + this.needInnerSplit = needInnerSplit; + } + + public List getSplitConfiguration(Configuration originConfiguration, List sourceObjectList, + int adviceNumber) { + + List splitConfiguration = new ArrayList(); + List regulateSplitStartEndPairList = new ArrayList(); + + for (String object : sourceObjectList) { + boolean realNeedInnerSplit = false; + Long contentTotalLength = -1L; + if (this.needInnerSplit) { + // 减少不必要的oss接口调用 + contentTotalLength = this.getFileTotalLength(object); + if (isNeedSplit(contentTotalLength)) { + realNeedInnerSplit = true; + } + } + // warn: 数据读模式允许文件内部切分,并且文件大小满足 + if (realNeedInnerSplit) { + List startEndPairList = getSplitStartEndPairList(contentTotalLength, object); + List> startEndInputStreamTripleList = new ArrayList>(); + for (int i = 0; i < startEndPairList.size(); i++) { + StartEndPair startEndPair = startEndPairList.get(i); + InputStream inputStream = this.getFileInputStream(startEndPair); + Triple startEndInputStreamTriple = new ImmutableTriple( + startEndPair.getStart(), startEndPair.getEnd(), inputStream); + startEndInputStreamTripleList.add(startEndInputStreamTriple); + } + regulateSplitStartEndPairList.addAll(regulateSplitStartEndPair(startEndInputStreamTripleList, object)); + } else { + // 如果指定的Range无效(比如开始位置、结束位置为负数,大于文件大小),则会下载整个文件; + StartEndPair startEndPair = new StartEndPair(0L, -1L, object); + regulateSplitStartEndPairList.add(startEndPair); + } + } + + // merge task 将多个文件merge到一个task中执行 + List> splitResult = RangeSplitUtil.doListSplit(regulateSplitStartEndPairList, adviceNumber); + // at here this.objects is not null and not empty + for (List eachSlice : splitResult) { + Configuration splitedConfig = originConfiguration.clone(); + splitedConfig.set(Key.SPLIT_SLICE_CONFIG, eachSlice); + splitConfiguration.add(splitedConfig); + LOG.info(String.format("File to be read:%s", JSON.toJSONString(eachSlice))); + } + return splitConfiguration; + } + + /** + * 对原始的切分点位进行调节校准, 将点位落在每一行数据的换行符处 + * + * @param startEndInputStreamTripleList + * 原始的切分点位及inputstream (start, end, inputStream) + * @return + */ + private List regulateSplitStartEndPair( + List> startEndInputStreamTripleList, String filePath) { + List regulatedStartEndPairList = new ArrayList(); + + for (int i = 0; i < startEndInputStreamTripleList.size(); i++) { + if (i == 0) { + Triple firstBlock = startEndInputStreamTripleList.get(i); + StartEndPair startEndPair = new StartEndPair(firstBlock.getLeft(), null, filePath); + regulatedStartEndPairList.add(startEndPair); + continue; + } + Triple block = startEndInputStreamTripleList.get(i); + long start = block.getLeft(); + long offset = 0; + // 对切分点位进行调节,将切分起始点移动到行尾(即'\n'上) + if (i < startEndInputStreamTripleList.size()) { + offset = getLFIndex(block.getRight()); + } + // 调节正确的切分点位 + long regulatedPoint = start + offset; + // 将上一个block的末尾点位调节成行尾 + regulatedStartEndPairList.get(i - 1).setEnd(regulatedPoint); + if (i < startEndInputStreamTripleList.size() - 1) { + // 将本block起始点位进行调节, 结束点位暂不调节 + regulatedStartEndPairList.add(new StartEndPair(regulatedPoint + 1, null, filePath)); + } else { + // 调节最后一个block, 调节起始点位, 结束点位就用文件的字节总长度 + regulatedStartEndPairList.add(new StartEndPair(regulatedPoint + 1, block.getMiddle(), filePath)); + } + } + return regulatedStartEndPairList; + } + + /** + * 获取到输入流开始的第一个'\n'偏移量, 如果向后偏移了ByteCapacity个字节,还是没有找到'\n'的话,则抛出异常 注: + * 对文件切分的最后一个分块不会调用该方法 + * + * @param inputStream + * 输入流 + * @return + */ + private Long getLFIndex(InputStream inputStream) { + Long hasReadByteIndex = -1L; + int ch = 0; + while (ch != -1) { + try { + ch = inputStream.read(); + } catch (IOException e) { + throw DataXException.asDataXException(UnstructuredStorageReaderErrorCode.READ_FILE_IO_ERROR, + String.format("inputstream read Byte has exception: %s", e.getMessage()), e); + } + hasReadByteIndex++; + if (ch == '\n') { + return hasReadByteIndex; + } + } + return hasReadByteIndex; + } + + /** + * 得到一个文件最多能拆分成的份数 + * + * @param fileTotalLength + * @return + */ + private List getSplitStartEndPairList(Long fileTotalLength, String filePath) { + long splitNum = (long) Math.ceil(fileTotalLength * 1.0 / BLOCK_BYTE_CAPACITY); + List startEndPairList = new ArrayList(); + long start, end; + for (int i = 1; i <= splitNum; i++) { + if (i == 1) { + start = (i - 1) * BLOCK_BYTE_CAPACITY; + end = i * BLOCK_BYTE_CAPACITY; + } else if (i < splitNum) { + start = (i - 1) * BLOCK_BYTE_CAPACITY + 1; + end = i * BLOCK_BYTE_CAPACITY; + } else { + start = (i - 1) * BLOCK_BYTE_CAPACITY + 1; + end = fileTotalLength - 1; + } + StartEndPair startEndPair = new StartEndPair(start, end, filePath); + startEndPairList.add(startEndPair); + } + return startEndPairList; + } + + /** + * 判断文件是否需要切分, 切分的条件是必须要大于 transport.channel.byteCapacity + * + * @param fileTotalLength: + * 文件总字节数 + * @return + */ + private boolean isNeedSplit(Long fileTotalLength) { + boolean fileSizeCouldSplit = fileTotalLength > BLOCK_BYTE_CAPACITY ? true : false; + return fileSizeCouldSplit && this.needInnerSplit; + } + + public abstract Long getFileTotalLength(String filePath); + + public abstract InputStream getFileInputStream(StartEndPair startEndPair); +} diff --git a/plugin-unstructured-storage-util/src/main/java/com/alibaba/datax/plugin/unstructuredstorage/util/ColumnTypeUtil.java b/plugin-unstructured-storage-util/src/main/java/com/alibaba/datax/plugin/unstructuredstorage/util/ColumnTypeUtil.java new file mode 100644 index 00000000..a03bf07e --- /dev/null +++ b/plugin-unstructured-storage-util/src/main/java/com/alibaba/datax/plugin/unstructuredstorage/util/ColumnTypeUtil.java @@ -0,0 +1,90 @@ +package com.alibaba.datax.plugin.unstructuredstorage.util; + +import com.alibaba.datax.common.util.Configuration; +import com.alibaba.datax.plugin.unstructuredstorage.reader.ColumnEntry; +import com.alibaba.fastjson2.JSON; +import com.alibaba.fastjson2.JSONObject; + +import java.util.ArrayList; +import java.util.List; +import java.util.Objects; + +/** + * @Author: guxuan + * @Date 2022-05-17 16:40 + */ +public class ColumnTypeUtil { + + private static final String TYPE_NAME = "decimal"; + private static final String LEFT_BRACKETS = "("; + private static final String RIGHT_BRACKETS = ")"; + private static final String DELIM = ","; + + public static boolean isDecimalType(String typeName){ + return typeName.toLowerCase().startsWith(TYPE_NAME); + } + + public static DecimalInfo getDecimalInfo(String typeName, DecimalInfo defaultInfo){ + if(!isDecimalType(typeName)){ + throw new IllegalArgumentException("Unsupported column type:" + typeName); + } + + if (typeName.contains(LEFT_BRACKETS) && typeName.contains(RIGHT_BRACKETS)){ + int precision = Integer.parseInt(typeName.substring(typeName.indexOf(LEFT_BRACKETS) + 1,typeName.indexOf(DELIM)).trim()); + int scale = Integer.parseInt(typeName.substring(typeName.indexOf(DELIM) + 1,typeName.indexOf(RIGHT_BRACKETS)).trim()); + return new DecimalInfo(precision, scale); + } else { + return defaultInfo; + } + } + + public static class DecimalInfo { + private int precision; + private int scale; + + public DecimalInfo(int precision, int scale) { + this.precision = precision; + this.scale = scale; + } + + public int getPrecision() { + return precision; + } + + public int getScale() { + return scale; + } + + @Override + public boolean equals(Object o) { + if (this == o) { + return true; + } + + if (o == null || getClass() != o.getClass()){ + return false; + + } + DecimalInfo that = (DecimalInfo) o; + return precision == that.precision && scale == that.scale; + } + + @Override + public int hashCode() { + return Objects.hash(precision, scale); + } + } + + public static List getListColumnEntry( + Configuration configuration, final String path) { + List lists = configuration.getList(path, JSONObject.class); + if (lists == null) { + return null; + } + List result = new ArrayList<>(); + for (final JSONObject object : lists) { + result.add(JSON.parseObject(object.toJSONString(), ColumnEntry.class)); + } + return result; + } +} diff --git a/plugin-unstructured-storage-util/src/main/java/com/alibaba/datax/plugin/unstructuredstorage/util/HdfsUtil.java b/plugin-unstructured-storage-util/src/main/java/com/alibaba/datax/plugin/unstructuredstorage/util/HdfsUtil.java new file mode 100644 index 00000000..4098ff1d --- /dev/null +++ b/plugin-unstructured-storage-util/src/main/java/com/alibaba/datax/plugin/unstructuredstorage/util/HdfsUtil.java @@ -0,0 +1,16 @@ +package com.alibaba.datax.plugin.unstructuredstorage.util; + + +public class HdfsUtil { + private static final double SCALE_TWO = 2.0; + private static final double SCALE_TEN = 10.0; + private static final int BIT_SIZE = 8; + public static int computeMinBytesForPrecision(int precision){ + + int numBytes = 1; + while (Math.pow(SCALE_TWO, BIT_SIZE * numBytes - 1.0) < Math.pow(SCALE_TEN, precision)) { + numBytes += 1; + } + return numBytes; + } +} diff --git a/plugin-unstructured-storage-util/src/main/java/com/alibaba/datax/plugin/unstructuredstorage/writer/Constant.java b/plugin-unstructured-storage-util/src/main/java/com/alibaba/datax/plugin/unstructuredstorage/writer/Constant.java index 93b4baa9..092fbfd7 100755 --- a/plugin-unstructured-storage-util/src/main/java/com/alibaba/datax/plugin/unstructuredstorage/writer/Constant.java +++ b/plugin-unstructured-storage-util/src/main/java/com/alibaba/datax/plugin/unstructuredstorage/writer/Constant.java @@ -11,9 +11,31 @@ public class Constant { public static final String FILE_FORMAT_CSV = "csv"; public static final String FILE_FORMAT_TEXT = "text"; - - //每个分块10MB,最大10000个分块 - public static final Long MAX_FILE_SIZE = 1024 * 1024 * 10 * 10000L; - + + //每个分块10MB,最大10000个分块, MAX_FILE_SIZE 单位: MB + public static final Long MAX_FILE_SIZE = 10 * 10000L; + public static final String DEFAULT_SUFFIX = ""; + + public static final String TRUNCATE = "truncate"; + public static final String APPEND = "append"; + public static final String NOCONFLICT = "nonConflict"; + + /** + * 在同步音视频等二进制文件的情况下: + * 半结构化写插件可以统一使用 SOURCE_FILE 获取到读端插件的split file路径 + */ + public static final String SOURCE_FILE = "sourceFile"; + + public static final String SOURCE_FILE_NAME = "sourceFileName"; + + /** + * 是否是音视频等无结构化文件 + */ + public static final String BINARY = "binary"; + + /** + * 文件同步模式, 如果是copy表示纯文件拷贝 + * */ + public static final String SYNC_MODE_VALUE_COPY = "copy"; } diff --git a/plugin-unstructured-storage-util/src/main/java/com/alibaba/datax/plugin/unstructuredstorage/writer/DataXCsvWriter.java b/plugin-unstructured-storage-util/src/main/java/com/alibaba/datax/plugin/unstructuredstorage/writer/DataXCsvWriter.java new file mode 100644 index 00000000..85deea2c --- /dev/null +++ b/plugin-unstructured-storage-util/src/main/java/com/alibaba/datax/plugin/unstructuredstorage/writer/DataXCsvWriter.java @@ -0,0 +1,296 @@ +package com.alibaba.datax.plugin.unstructuredstorage.writer; + +import org.apache.commons.io.IOUtils; + +import java.io.IOException; +import java.io.Writer; + +/** + * @Author: guxuan + * @Date 2022-05-19 10:44 + */ +public class DataXCsvWriter { + private Writer writer; + @SuppressWarnings("unused") + private String fileName; + private boolean firstColumn; + private boolean useCustomRecordDelimiter; + private UserSettings userSettings; + private boolean initialized; + private boolean closed; + public static final int ESCAPE_MODE_DOUBLED = 1; + public static final int ESCAPE_MODE_BACKSLASH = 2; + + public DataXCsvWriter(Writer writer, char delimiter) { + this.writer = null; + this.fileName = null; + this.firstColumn = true; + this.useCustomRecordDelimiter = false; + this.userSettings = new UserSettings(); + this.initialized = false; + this.closed = false; + if(writer == null) { + throw new IllegalArgumentException("Parameter writer can not be null."); + } else { + this.writer = writer; + this.userSettings.Delimiter = delimiter; + this.initialized = true; + } + } + + public char getDelimiter() { + return this.userSettings.Delimiter; + } + + public void setDelimiter(char var1) { + this.userSettings.Delimiter = var1; + } + + public char getRecordDelimiter() { + return this.userSettings.RecordDelimiter; + } + + public void setRecordDelimiter(char var1) { + this.useCustomRecordDelimiter = true; + this.userSettings.RecordDelimiter = var1; + } + + public char getTextQualifier() { + return this.userSettings.TextQualifier; + } + + public void setTextQualifier(char var1) { + this.userSettings.TextQualifier = var1; + } + + public boolean getUseTextQualifier() { + return this.userSettings.UseTextQualifier; + } + + public void setUseTextQualifier(boolean var1) { + this.userSettings.UseTextQualifier = var1; + } + + public int getEscapeMode() { + return this.userSettings.EscapeMode; + } + + public void setEscapeMode(int var1) { + this.userSettings.EscapeMode = var1; + } + + public void setComment(char var1) { + this.userSettings.Comment = var1; + } + + public char getComment() { + return this.userSettings.Comment; + } + + public boolean getForceQualifier() { + return this.userSettings.ForceQualifier; + } + + public void setForceQualifier(boolean var1) { + this.userSettings.ForceQualifier = var1; + } + + public void write(String var1, boolean var2) throws IOException { + this.checkClosed(); + if(var1 == null) { + var1 = ""; + } + + if(!this.firstColumn) { + this.writer.write(this.userSettings.Delimiter); + } + + boolean var3 = this.userSettings.ForceQualifier; + if(!var2 && var1.length() > 0) { + var1 = var1.trim(); + } + + if(!var3 && this.userSettings.UseTextQualifier && (var1.indexOf(this.userSettings.TextQualifier) > -1 || var1.indexOf(this.userSettings.Delimiter) > -1 || !this.useCustomRecordDelimiter && (var1.indexOf(10) > -1 || var1.indexOf(13) > -1) || this.useCustomRecordDelimiter && var1.indexOf(this.userSettings.RecordDelimiter) > -1 || this.firstColumn && var1.length() > 0 && var1.charAt(0) == this.userSettings.Comment || this.firstColumn && var1.length() == 0)) { + var3 = true; + } + + if(this.userSettings.UseTextQualifier && !var3 && var1.length() > 0 && var2) { + char var4 = var1.charAt(0); + if(var4 == 32 || var4 == 9) { + var3 = true; + } + + if(!var3 && var1.length() > 1) { + char var5 = var1.charAt(var1.length() - 1); + if(var5 == 32 || var5 == 9) { + var3 = true; + } + } + } + + if(var3) { + this.writer.write(this.userSettings.TextQualifier); + if(this.userSettings.EscapeMode == 2) { + var1 = replace(var1, "\\", "\\\\"); + var1 = replace(var1, "" + this.userSettings.TextQualifier, "\\" + this.userSettings.TextQualifier); + } else { + var1 = replace(var1, "" + this.userSettings.TextQualifier, "" + this.userSettings.TextQualifier + this.userSettings.TextQualifier); + } + } else if(this.userSettings.EscapeMode == 2) { + var1 = replace(var1, "\\", "\\\\"); + var1 = replace(var1, "" + this.userSettings.Delimiter, "\\" + this.userSettings.Delimiter); + if(this.useCustomRecordDelimiter) { + var1 = replace(var1, "" + this.userSettings.RecordDelimiter, "\\" + this.userSettings.RecordDelimiter); + } else { + var1 = replace(var1, "\r", "\\\r"); + var1 = replace(var1, "\n", "\\\n"); + } + + if(this.firstColumn && var1.length() > 0 && var1.charAt(0) == this.userSettings.Comment) { + if(var1.length() > 1) { + var1 = "\\" + this.userSettings.Comment + var1.substring(1); + } else { + var1 = "\\" + this.userSettings.Comment; + } + } + } + + this.writer.write(var1); + if(var3) { + this.writer.write(this.userSettings.TextQualifier); + } + + this.firstColumn = false; + } + + public void write(String var1) throws IOException { + this.write(var1, false); + } + + public void writeComment(String var1) throws IOException { + this.checkClosed(); + this.writer.write(this.userSettings.Comment); + this.writer.write(var1); + if(this.useCustomRecordDelimiter) { + this.writer.write(this.userSettings.RecordDelimiter); + } else { + this.writer.write(IOUtils.LINE_SEPARATOR); + } + + this.firstColumn = true; + } + + public void writeRecord(String[] var1, boolean var2) throws IOException { + if(var1 != null && var1.length > 0) { + for(int var3 = 0; var3 < var1.length; ++var3) { + this.write(var1[var3], var2); + } + + this.endRecord(); + } + + } + + public void writeRecord(String[] var1) throws IOException { + this.writeRecord(var1, false); + } + + public void endRecord() throws IOException { + this.checkClosed(); + if(this.useCustomRecordDelimiter) { + this.writer.write(this.userSettings.RecordDelimiter); + } else { + this.writer.write(IOUtils.LINE_SEPARATOR); + } + + this.firstColumn = true; + } + + public void flush() throws IOException { + this.writer.flush(); + } + + public void close() { + if(!this.closed) { + this.close(true); + this.closed = true; + } + + } + + private void close(boolean var1) { + if(!this.closed) { + try { + if(this.initialized) { + this.writer.close(); + } + } catch (Exception var3) { + ; + } + + this.writer = null; + this.closed = true; + } + + } + + private void checkClosed() throws IOException { + if(this.closed) { + throw new IOException("This instance of the CsvWriter class has already been closed."); + } + } + + @Override + protected void finalize() { + this.close(false); + } + + public static String replace(String var0, String var1, String var2) { + int var3 = var1.length(); + int var4 = var0.indexOf(var1); + if(var4 <= -1) { + return var0; + } else { + StringBuffer var5 = new StringBuffer(); + + int var6; + for(var6 = 0; var4 != -1; var4 = var0.indexOf(var1, var6)) { + var5.append(var0.substring(var6, var4)); + var5.append(var2); + var6 = var4 + var3; + } + + var5.append(var0.substring(var6)); + return var5.toString(); + } + } + + private class UserSettings { + public char TextQualifier = 34; + public boolean UseTextQualifier = true; + public char Delimiter = 44; + public char RecordDelimiter = 0; + public char Comment = 35; + public int EscapeMode = 1; + public boolean ForceQualifier = false; + + public UserSettings() { + } + } + + @SuppressWarnings("unused") + private class Letters { + public static final char LF = '\n'; + public static final char CR = '\r'; + public static final char QUOTE = '\"'; + public static final char COMMA = ','; + public static final char SPACE = ' '; + public static final char TAB = '\t'; + public static final char POUND = '#'; + public static final char BACKSLASH = '\\'; + public static final char NULL = '\u0000'; + + private Letters() { + } + } +} diff --git a/plugin-unstructured-storage-util/src/main/java/com/alibaba/datax/plugin/unstructuredstorage/writer/Key.java b/plugin-unstructured-storage-util/src/main/java/com/alibaba/datax/plugin/unstructuredstorage/writer/Key.java index 2e7fe079..125957f1 100755 --- a/plugin-unstructured-storage-util/src/main/java/com/alibaba/datax/plugin/unstructuredstorage/writer/Key.java +++ b/plugin-unstructured-storage-util/src/main/java/com/alibaba/datax/plugin/unstructuredstorage/writer/Key.java @@ -1,6 +1,7 @@ package com.alibaba.datax.plugin.unstructuredstorage.writer; public class Key { + public static final String PATH = "path"; // must have public static final String FILE_NAME = "fileName"; @@ -10,6 +11,11 @@ public class Key { // not must , not default , public static final String FIELD_DELIMITER = "fieldDelimiter"; + // not must , default os's line delimiter + public static final String LINE_DELIMITER = "lineDelimiter"; + + public static final String CSV_WRITER_CONFIG = "csvWriterConfig"; + // not must, default UTF-8 public static final String ENCODING = "encoding"; @@ -35,4 +41,32 @@ public class Key { // writer file type suffix, like .txt .csv public static final String SUFFIX = "suffix"; + + public static final String MARK_DONE_FILE_NAME = "markDoneFileName"; + + public static final String MARK_DOING_FILE_NAME = "markDoingFileName"; + + // public static final String RETRY_TIME = "retryTime"; + + public final static String MAX_RETRY_TIME = "maxRetryTime"; + + /** + * 半结构化标示一个Record来源的绝对文件路径名,可以是ftp文件,oss的object等 + * */ + public static final String META_KEY_FILE_PATH = "filePath"; + + /** + * 多文件切分的工作项,Task通过此配置项表示工作内容, 文件内部切分相关key + */ + public static final String SPLIT_SLICE_CONFIG = "__splitSliceConfig"; + public static final String SPLIT_SLICE_FILE_PATH = "filePath"; + public static final String SPLIT_SLICE_START_POINT = "startPoint"; + public static final String SPLIT_SLICE_END_POINT = "endPoint"; + + /** + * 文件同步模式, 如果是copy表示纯文件拷贝 + * */ + public static final String SYNC_MODE = "syncMode"; + + public static final String BYTE_ENCODING = "byteEncoding"; } diff --git a/plugin-unstructured-storage-util/src/main/java/com/alibaba/datax/plugin/unstructuredstorage/writer/TextCsvWriterManager.java b/plugin-unstructured-storage-util/src/main/java/com/alibaba/datax/plugin/unstructuredstorage/writer/TextCsvWriterManager.java index 1ea82759..4a9b9197 100644 --- a/plugin-unstructured-storage-util/src/main/java/com/alibaba/datax/plugin/unstructuredstorage/writer/TextCsvWriterManager.java +++ b/plugin-unstructured-storage-util/src/main/java/com/alibaba/datax/plugin/unstructuredstorage/writer/TextCsvWriterManager.java @@ -2,8 +2,13 @@ package com.alibaba.datax.plugin.unstructuredstorage.writer; import java.io.IOException; import java.io.Writer; +import java.util.HashMap; import java.util.List; +import com.alibaba.datax.common.util.Configuration; +import com.alibaba.fastjson2.JSON; +import com.alibaba.fastjson2.TypeReference; +import org.apache.commons.beanutils.BeanUtils; import org.apache.commons.io.IOUtils; import org.apache.commons.lang3.StringUtils; import org.slf4j.Logger; @@ -11,15 +16,15 @@ import org.slf4j.LoggerFactory; import com.csvreader.CsvWriter; + public class TextCsvWriterManager { - public static UnstructuredWriter produceUnstructuredWriter( - String fileFormat, char fieldDelimiter, Writer writer) { - // warn: false means plain text(old way), true means strict csv format - if (Constant.FILE_FORMAT_TEXT.equals(fileFormat)) { - return new TextWriterImpl(writer, fieldDelimiter); - } else { - return new CsvWriterImpl(writer, fieldDelimiter); - } + + public static UnstructuredWriter produceTextWriter( Writer writer, String fieldDelimiter, Configuration config) { + return new TextWriterImpl(writer, fieldDelimiter, config); + } + + public static UnstructuredWriter produceCsvWriter( Writer writer, char fieldDelimiter, Configuration config) { + return new CsvWriterImpl(writer, fieldDelimiter, config); } } @@ -28,15 +33,40 @@ class CsvWriterImpl implements UnstructuredWriter { .getLogger(CsvWriterImpl.class); // csv 严格符合csv语法, 有标准的转义等处理 private char fieldDelimiter; - private CsvWriter csvWriter; + private String lineDelimiter; + private DataXCsvWriter csvWriter; - public CsvWriterImpl(Writer writer, char fieldDelimiter) { + public CsvWriterImpl(Writer writer, char fieldDelimiter, Configuration config) { this.fieldDelimiter = fieldDelimiter; - this.csvWriter = new CsvWriter(writer, this.fieldDelimiter); + this.lineDelimiter = config.getString(Key.LINE_DELIMITER, IOUtils.LINE_SEPARATOR); + this.csvWriter = new DataXCsvWriter(writer, this.fieldDelimiter); this.csvWriter.setTextQualifier('"'); this.csvWriter.setUseTextQualifier(true); // warn: in linux is \n , in windows is \r\n - this.csvWriter.setRecordDelimiter(IOUtils.LINE_SEPARATOR.charAt(0)); + this.csvWriter.setRecordDelimiter(this.lineDelimiter.charAt(0)); + + String csvWriterConfig = config.getString(Key.CSV_WRITER_CONFIG); + if (StringUtils.isNotBlank(csvWriterConfig)) { + try { + HashMap csvWriterConfigMap = JSON.parseObject(csvWriterConfig, + new TypeReference>() { + }); + if (!csvWriterConfigMap.isEmpty()) { + // this.csvWriter.setComment(var1); + // this.csvWriter.setDelimiter(var1); + // this.csvWriter.setEscapeMode(var1); + // this.csvWriter.setForceQualifier(var1); + // this.csvWriter.setRecordDelimiter(var1); + // this.csvWriter.setTextQualifier(var1); + // this.csvWriter.setUseTextQualifier(var1); + BeanUtils.populate(this.csvWriter, csvWriterConfigMap); + LOG.info(String.format("csvwriterConfig is set successfully. After setting, csvwriter:%s", JSON.toJSONString(this.csvWriter))); + } + } catch (Exception e) { + LOG.warn(String.format("invalid csvWriterConfig config: %s, DataX will ignore it.", csvWriterConfig), + e); + } + } } @Override @@ -44,8 +74,7 @@ class CsvWriterImpl implements UnstructuredWriter { if (splitedRows.isEmpty()) { LOG.info("Found one record line which is empty."); } - this.csvWriter.writeRecord((String[]) splitedRows - .toArray(new String[0])); + this.csvWriter.writeRecord(splitedRows.toArray(new String[0])); } @Override @@ -64,12 +93,14 @@ class TextWriterImpl implements UnstructuredWriter { private static final Logger LOG = LoggerFactory .getLogger(TextWriterImpl.class); // text StringUtils的join方式, 简单的字符串拼接 - private char fieldDelimiter; + private String fieldDelimiter; private Writer textWriter; + private String lineDelimiter; - public TextWriterImpl(Writer writer, char fieldDelimiter) { + public TextWriterImpl(Writer writer, String fieldDelimiter, Configuration config) { this.fieldDelimiter = fieldDelimiter; this.textWriter = writer; + this.lineDelimiter = config.getString(Key.LINE_DELIMITER, IOUtils.LINE_SEPARATOR); } @Override @@ -79,7 +110,7 @@ class TextWriterImpl implements UnstructuredWriter { } this.textWriter.write(String.format("%s%s", StringUtils.join(splitedRows, this.fieldDelimiter), - IOUtils.LINE_SEPARATOR)); + this.lineDelimiter)); } @Override diff --git a/plugin-unstructured-storage-util/src/main/java/com/alibaba/datax/plugin/unstructuredstorage/writer/UnstructuredStorageWriterErrorCode.java b/plugin-unstructured-storage-util/src/main/java/com/alibaba/datax/plugin/unstructuredstorage/writer/UnstructuredStorageWriterErrorCode.java index 0f780ebd..b83cfa1c 100755 --- a/plugin-unstructured-storage-util/src/main/java/com/alibaba/datax/plugin/unstructuredstorage/writer/UnstructuredStorageWriterErrorCode.java +++ b/plugin-unstructured-storage-util/src/main/java/com/alibaba/datax/plugin/unstructuredstorage/writer/UnstructuredStorageWriterErrorCode.java @@ -8,7 +8,8 @@ public enum UnstructuredStorageWriterErrorCode implements ErrorCode { Write_FILE_WITH_CHARSET_ERROR("UnstructuredStorageWriter-01", "您配置的编码未能正常写入."), Write_FILE_IO_ERROR("UnstructuredStorageWriter-02", "您配置的文件在写入时出现IO异常."), RUNTIME_EXCEPTION("UnstructuredStorageWriter-03", "出现运行时异常, 请联系我们"), - REQUIRED_VALUE("UnstructuredStorageWriter-04", "您缺失了必须填写的参数值."),; + REQUIRED_VALUE("UnstructuredStorageWriter-04", "您缺失了必须填写的参数值."), + Write_ERROR("UnstructuredStorageWriter-05", "errorcode.write_error"),; private final String code; private final String description; diff --git a/plugin-unstructured-storage-util/src/main/java/com/alibaba/datax/plugin/unstructuredstorage/writer/UnstructuredStorageWriterUtil.java b/plugin-unstructured-storage-util/src/main/java/com/alibaba/datax/plugin/unstructuredstorage/writer/UnstructuredStorageWriterUtil.java index b1927ce7..e9040662 100755 --- a/plugin-unstructured-storage-util/src/main/java/com/alibaba/datax/plugin/unstructuredstorage/writer/UnstructuredStorageWriterUtil.java +++ b/plugin-unstructured-storage-util/src/main/java/com/alibaba/datax/plugin/unstructuredstorage/writer/UnstructuredStorageWriterUtil.java @@ -1,10 +1,6 @@ package com.alibaba.datax.plugin.unstructuredstorage.writer; -import java.io.BufferedWriter; -import java.io.IOException; -import java.io.OutputStream; -import java.io.OutputStreamWriter; -import java.io.UnsupportedEncodingException; +import java.io.*; import java.text.DateFormat; import java.text.SimpleDateFormat; import java.util.ArrayList; @@ -13,6 +9,8 @@ import java.util.List; import java.util.Set; import java.util.UUID; +import com.alibaba.datax.common.element.BytesColumn; +import org.apache.commons.codec.binary.Base64; import org.apache.commons.compress.compressors.CompressorOutputStream; import org.apache.commons.compress.compressors.bzip2.BZip2CompressorOutputStream; import org.apache.commons.compress.compressors.gzip.GzipCompressorOutputStream; @@ -53,10 +51,7 @@ public class UnstructuredStorageWriterUtil { if (!supportedWriteModes.contains(writeMode)) { throw DataXException .asDataXException( - UnstructuredStorageWriterErrorCode.ILLEGAL_VALUE, - String.format( - "仅支持 truncate, append, nonConflict 三种模式, 不支持您配置的 writeMode 模式 : [%s]", - writeMode)); + UnstructuredStorageWriterErrorCode.ILLEGAL_VALUE, writeMode); } writerConfiguration.set(Key.WRITE_MODE, writeMode); @@ -64,8 +59,6 @@ public class UnstructuredStorageWriterUtil { String encoding = writerConfiguration.getString(Key.ENCODING); if (StringUtils.isBlank(encoding)) { // like " ", null - LOG.warn(String.format("您的encoding配置为空, 将使用默认值[%s]", - Constant.DEFAULT_ENCODING)); writerConfiguration.set(Key.ENCODING, Constant.DEFAULT_ENCODING); } else { try { @@ -74,8 +67,7 @@ public class UnstructuredStorageWriterUtil { Charsets.toCharset(encoding); } catch (Exception e) { throw DataXException.asDataXException( - UnstructuredStorageWriterErrorCode.ILLEGAL_VALUE, - String.format("不支持您配置的编码格式:[%s]", encoding), e); + UnstructuredStorageWriterErrorCode.ILLEGAL_VALUE, e); } } @@ -86,45 +78,40 @@ public class UnstructuredStorageWriterUtil { } else { Set supportedCompress = Sets.newHashSet("gzip", "bzip2"); if (!supportedCompress.contains(compress.toLowerCase().trim())) { - String message = String.format( - "仅支持 [%s] 文件压缩格式 , 不支持您配置的文件压缩格式: [%s]", - StringUtils.join(supportedCompress, ","), compress); throw DataXException.asDataXException( - UnstructuredStorageWriterErrorCode.ILLEGAL_VALUE, - String.format(message, compress)); + UnstructuredStorageWriterErrorCode.ILLEGAL_VALUE, String.format("unsupported commpress format %s ", compress)); } } - // fieldDelimiter check - String delimiterInStr = writerConfiguration - .getString(Key.FIELD_DELIMITER); - // warn: if have, length must be one - if (null != delimiterInStr && 1 != delimiterInStr.length()) { - throw DataXException.asDataXException( - UnstructuredStorageWriterErrorCode.ILLEGAL_VALUE, - String.format("仅仅支持单字符切分, 您配置的切分为 : [%s]", delimiterInStr)); - } - if (null == delimiterInStr) { - LOG.warn(String.format("您没有配置列分隔符, 使用默认值[%s]", - Constant.DEFAULT_FIELD_DELIMITER)); - writerConfiguration.set(Key.FIELD_DELIMITER, - Constant.DEFAULT_FIELD_DELIMITER); - } - // fileFormat check - String fileFormat = writerConfiguration.getString(Key.FILE_FORMAT, - Constant.FILE_FORMAT_TEXT); + String fileFormat = writerConfiguration.getString(Key.FILE_FORMAT); + if (StringUtils.isBlank(fileFormat)) { + fileFormat = Constant.FILE_FORMAT_TEXT; + writerConfiguration.set(Key.FILE_FORMAT, fileFormat); + } if (!Constant.FILE_FORMAT_CSV.equals(fileFormat) && !Constant.FILE_FORMAT_TEXT.equals(fileFormat)) { throw DataXException.asDataXException( - UnstructuredStorageWriterErrorCode.ILLEGAL_VALUE, String - .format("您配置的fileFormat [%s]错误, 支持csv, text两种.", - fileFormat)); + UnstructuredStorageWriterErrorCode.ILLEGAL_VALUE, String.format("unsupported fileFormat %s ", fileFormat)); + } + + // fieldDelimiter check + String delimiterInStr = writerConfiguration.getString(Key.FIELD_DELIMITER); + + if (StringUtils.equalsIgnoreCase(fileFormat, Constant.FILE_FORMAT_CSV) && + null != delimiterInStr && 1 != delimiterInStr.length()) { + throw DataXException.asDataXException( + UnstructuredStorageWriterErrorCode.ILLEGAL_VALUE, + String.format("unsupported delimiterInStr %s ", delimiterInStr)); + } + if (null == delimiterInStr) { + delimiterInStr = String.valueOf(Constant.DEFAULT_FIELD_DELIMITER); + writerConfiguration.set(Key.FIELD_DELIMITER, delimiterInStr); } } public static List split(Configuration writerSliceConfig, - Set originAllFileExists, int mandatoryNumber) { + Set originAllFileExists, int mandatoryNumber) { LOG.info("begin do split..."); Set allFileExists = new HashSet(); allFileExists.addAll(originAllFileExists); @@ -153,19 +140,19 @@ public class UnstructuredStorageWriterUtil { } public static String buildFilePath(String path, String fileName, - String suffix) { + String suffix) { boolean isEndWithSeparator = false; switch (IOUtils.DIR_SEPARATOR) { - case IOUtils.DIR_SEPARATOR_UNIX: - isEndWithSeparator = path.endsWith(String - .valueOf(IOUtils.DIR_SEPARATOR)); - break; - case IOUtils.DIR_SEPARATOR_WINDOWS: - isEndWithSeparator = path.endsWith(String - .valueOf(IOUtils.DIR_SEPARATOR_WINDOWS)); - break; - default: - break; + case IOUtils.DIR_SEPARATOR_UNIX: + isEndWithSeparator = path.endsWith(String + .valueOf(IOUtils.DIR_SEPARATOR)); + break; + case IOUtils.DIR_SEPARATOR_WINDOWS: + isEndWithSeparator = path.endsWith(String + .valueOf(IOUtils.DIR_SEPARATOR_WINDOWS)); + break; + default: + break; } if (!isEndWithSeparator) { path = path + IOUtils.DIR_SEPARATOR; @@ -179,14 +166,12 @@ public class UnstructuredStorageWriterUtil { } public static void writeToStream(RecordReceiver lineReceiver, - OutputStream outputStream, Configuration config, String context, - TaskPluginCollector taskPluginCollector) { + OutputStream outputStream, Configuration config, String context, + TaskPluginCollector taskPluginCollector) { String encoding = config.getString(Key.ENCODING, Constant.DEFAULT_ENCODING); // handle blank encoding if (StringUtils.isBlank(encoding)) { - LOG.warn(String.format("您配置的encoding为[%s], 使用默认值[%s]", encoding, - Constant.DEFAULT_ENCODING)); encoding = Constant.DEFAULT_ENCODING; } String compress = config.getString(Key.COMPRESS); @@ -212,10 +197,7 @@ public class UnstructuredStorageWriterUtil { } else { throw DataXException .asDataXException( - UnstructuredStorageWriterErrorCode.ILLEGAL_VALUE, - String.format( - "仅支持 gzip, bzip2 文件压缩格式 , 不支持您配置的文件压缩格式: [%s]", - compress)); + UnstructuredStorageWriterErrorCode.ILLEGAL_VALUE, compress); } } UnstructuredStorageWriterUtil.doWriteToStream(lineReceiver, writer, @@ -223,24 +205,21 @@ public class UnstructuredStorageWriterUtil { } catch (UnsupportedEncodingException uee) { throw DataXException .asDataXException( - UnstructuredStorageWriterErrorCode.Write_FILE_WITH_CHARSET_ERROR, - String.format("不支持的编码格式 : [%s]", encoding), uee); + UnstructuredStorageWriterErrorCode.Write_FILE_WITH_CHARSET_ERROR, uee); } catch (NullPointerException e) { throw DataXException.asDataXException( - UnstructuredStorageWriterErrorCode.RUNTIME_EXCEPTION, - "运行时错误, 请联系我们", e); + UnstructuredStorageWriterErrorCode.RUNTIME_EXCEPTION,e); } catch (IOException e) { throw DataXException.asDataXException( - UnstructuredStorageWriterErrorCode.Write_FILE_IO_ERROR, - String.format("流写入错误 : [%s]", context), e); + UnstructuredStorageWriterErrorCode.Write_FILE_IO_ERROR, e); } finally { IOUtils.closeQuietly(writer); } } private static void doWriteToStream(RecordReceiver lineReceiver, - BufferedWriter writer, String contex, Configuration config, - TaskPluginCollector taskPluginCollector) throws IOException { + BufferedWriter writer, String contex, Configuration config, + TaskPluginCollector taskPluginCollector) throws IOException { String nullFormat = config.getString(Key.NULL_FORMAT); @@ -252,26 +231,9 @@ public class UnstructuredStorageWriterUtil { } // warn: default false - String fileFormat = config.getString(Key.FILE_FORMAT, - Constant.FILE_FORMAT_TEXT); + String fileFormat = config.getString(Key.FILE_FORMAT, Constant.FILE_FORMAT_TEXT); - String delimiterInStr = config.getString(Key.FIELD_DELIMITER); - if (null != delimiterInStr && 1 != delimiterInStr.length()) { - throw DataXException.asDataXException( - UnstructuredStorageWriterErrorCode.ILLEGAL_VALUE, - String.format("仅仅支持单字符切分, 您配置的切分为 : [%s]", delimiterInStr)); - } - if (null == delimiterInStr) { - LOG.warn(String.format("您没有配置列分隔符, 使用默认值[%s]", - Constant.DEFAULT_FIELD_DELIMITER)); - } - - // warn: fieldDelimiter could not be '' for no fieldDelimiter - char fieldDelimiter = config.getChar(Key.FIELD_DELIMITER, - Constant.DEFAULT_FIELD_DELIMITER); - - UnstructuredWriter unstructuredWriter = TextCsvWriterManager - .produceUnstructuredWriter(fileFormat, fieldDelimiter, writer); + UnstructuredWriter unstructuredWriter = produceUnstructuredWriter(fileFormat, config, writer); List headers = config.getList(Key.HEADER, String.class); if (null != headers && !headers.isEmpty()) { @@ -279,22 +241,38 @@ public class UnstructuredStorageWriterUtil { } Record record = null; + String byteEncoding = config.getString(Key.BYTE_ENCODING); while ((record = lineReceiver.getFromReader()) != null) { UnstructuredStorageWriterUtil.transportOneRecord(record, nullFormat, dateParse, taskPluginCollector, - unstructuredWriter); + unstructuredWriter, byteEncoding); } // warn:由调用方控制流的关闭 // IOUtils.closeQuietly(unstructuredWriter); } + public static UnstructuredWriter produceUnstructuredWriter(String fileFormat, Configuration config, Writer writer){ + UnstructuredWriter unstructuredWriter = null; + if (StringUtils.equalsIgnoreCase(fileFormat, Constant.FILE_FORMAT_CSV)) { + + Character fieldDelimiter = config.getChar(Key.FIELD_DELIMITER, Constant.DEFAULT_FIELD_DELIMITER); + unstructuredWriter = TextCsvWriterManager.produceCsvWriter(writer, fieldDelimiter, config); + } else if (StringUtils.equalsIgnoreCase(fileFormat, Constant.FILE_FORMAT_TEXT)) { + + String fieldDelimiter = config.getString(Key.FIELD_DELIMITER, String.valueOf(Constant.DEFAULT_FIELD_DELIMITER)); + unstructuredWriter = TextCsvWriterManager.produceTextWriter(writer, fieldDelimiter, config); + } + + return unstructuredWriter; + } + /** * 异常表示脏数据 * */ public static void transportOneRecord(Record record, String nullFormat, - DateFormat dateParse, TaskPluginCollector taskPluginCollector, - UnstructuredWriter unstructuredWriter) { + DateFormat dateParse, TaskPluginCollector taskPluginCollector, + UnstructuredWriter unstructuredWriter, String byteEncoding) { // warn: default is null if (null == nullFormat) { nullFormat = "null"; @@ -309,7 +287,15 @@ public class UnstructuredStorageWriterUtil { if (null != column.getRawData()) { boolean isDateColumn = column instanceof DateColumn; if (!isDateColumn) { - splitedRows.add(column.asString()); + if (column instanceof BytesColumn) { + if ("base64".equalsIgnoreCase(byteEncoding)) { + splitedRows.add(Base64.encodeBase64String(column.asBytes())); + } else { + splitedRows.add(column.asString()); + } + } else { + splitedRows.add(column.asString()); + } } else { if (null != dateParse) { splitedRows.add(dateParse.format(column @@ -325,9 +311,18 @@ public class UnstructuredStorageWriterUtil { } } unstructuredWriter.writeOneRecord(splitedRows); - } catch (Exception e) { + } catch (IllegalArgumentException e){ // warn: dirty data taskPluginCollector.collectDirtyRecord(record, e); + } catch (DataXException e){ + // warn: dirty data + taskPluginCollector.collectDirtyRecord(record, e); + } catch (Exception e) { + // throw exception, it is not dirty data, + // may be network unreachable and the other problem + throw DataXException.asDataXException( + UnstructuredStorageWriterErrorCode.Write_ERROR, e.getMessage(),e); } } + } diff --git a/plugin-unstructured-storage-util/src/main/java/com/alibaba/datax/plugin/unstructuredstorage/writer/binaryFileUtil/BinaryFileWriterErrorCode.java b/plugin-unstructured-storage-util/src/main/java/com/alibaba/datax/plugin/unstructuredstorage/writer/binaryFileUtil/BinaryFileWriterErrorCode.java new file mode 100755 index 00000000..77e51026 --- /dev/null +++ b/plugin-unstructured-storage-util/src/main/java/com/alibaba/datax/plugin/unstructuredstorage/writer/binaryFileUtil/BinaryFileWriterErrorCode.java @@ -0,0 +1,33 @@ +package com.alibaba.datax.plugin.unstructuredstorage.writer.binaryFileUtil; + +import com.alibaba.datax.common.spi.ErrorCode; + +public enum BinaryFileWriterErrorCode implements ErrorCode { + ILLEGAL_VALUE("UnstructuredStorageWriter-00", "errorcode.illegal_value"), + REPEATED_FILE_NAME("UnstructuredStorageWriter-01", "errorcode.repeated_file_name"), + REQUIRED_VALUE("UnstructuredStorageWriter-02","errorcode.required_value"),; + + private final String code; + private final String description; + + private BinaryFileWriterErrorCode(String code, String description) { + this.code = code; + this.description = description; + } + + @Override + public String getCode() { + return this.code; + } + + @Override + public String getDescription() { + return this.description; + } + + @Override + public String toString() { + return String.format("Code:[%s], Description:[%s].", this.code, + this.description); + } +} diff --git a/plugin-unstructured-storage-util/src/main/java/com/alibaba/datax/plugin/unstructuredstorage/writer/binaryFileUtil/BinaryFileWriterUtil.java b/plugin-unstructured-storage-util/src/main/java/com/alibaba/datax/plugin/unstructuredstorage/writer/binaryFileUtil/BinaryFileWriterUtil.java new file mode 100644 index 00000000..e2c9ad16 --- /dev/null +++ b/plugin-unstructured-storage-util/src/main/java/com/alibaba/datax/plugin/unstructuredstorage/writer/binaryFileUtil/BinaryFileWriterUtil.java @@ -0,0 +1,126 @@ +package com.alibaba.datax.plugin.unstructuredstorage.writer.binaryFileUtil; + +import com.alibaba.datax.common.element.Column; +import com.alibaba.datax.common.element.Record; +import com.alibaba.datax.common.exception.DataXException; +import com.alibaba.datax.common.plugin.RecordReceiver; +import com.alibaba.datax.common.util.Configuration; +import com.alibaba.datax.plugin.unstructuredstorage.reader.UnstructuredStorageReaderErrorCode; +import com.alibaba.datax.plugin.unstructuredstorage.writer.Key; +import com.alibaba.datax.plugin.unstructuredstorage.writer.UnstructuredStorageWriterErrorCode; +import com.google.common.collect.Sets; +import org.apache.commons.lang3.StringUtils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.File; +import java.io.IOException; +import java.io.OutputStream; +import java.util.ArrayList; +import java.util.HashSet; +import java.util.List; +import java.util.Set; + +import static com.alibaba.datax.plugin.unstructuredstorage.writer.Constant.*; + +/** + * @Author: guxuan + * @Date 2022-05-17 17:01 + */ +public class BinaryFileWriterUtil { + + private static final Logger LOG = LoggerFactory.getLogger(BinaryFileWriterUtil.class); + + + /** + * 从RecordReceiver获取源文件Bytes数组, 写到目的端 + * + * @param outputStream: 写文件流 + * @param recordReceiver: RecordReceiver + */ + public static void writeFileFromRecordReceiver(OutputStream outputStream, RecordReceiver recordReceiver) { + try { + Record record; + while ((record = recordReceiver.getFromReader()) != null) { + Column column = record.getColumn(0); + outputStream.write(column.asBytes()); + } + outputStream.flush(); + LOG.info("End write!!!"); + } catch (IOException e) { + throw DataXException.asDataXException(UnstructuredStorageReaderErrorCode.READ_FILE_IO_ERROR, e); + } + } + + /** + * 校验同步二进制文件的参数 + * + * @param writerConfiguration: writer的配置 + */ + public static void validateParameter(Configuration writerConfiguration) { + // writeMode check + String writeMode = writerConfiguration.getNecessaryValue( + Key.WRITE_MODE, + UnstructuredStorageWriterErrorCode.REQUIRED_VALUE); + writeMode = writeMode.trim(); + Set supportedWriteModes = Sets.newHashSet(TRUNCATE, NOCONFLICT); + if (!supportedWriteModes.contains(writeMode)) { + throw DataXException + .asDataXException( + BinaryFileWriterErrorCode.ILLEGAL_VALUE, + String.format("Synchronous binary format file, only supports truncate and nonConflict modes, does not support the writeMode mode you configured: %s", writeMode)); + } + writerConfiguration.set(Key.WRITE_MODE, writeMode); + } + + /** + * 校验文件名是否有重复的,如果有重复的文件名则抛出异常 + * @param fileNameList + */ + public static void checkFileNameIfRepeatedThrowException(List fileNameList) { + Set sourceFileNameSet = new HashSet(); + for (String fileName : fileNameList) { + if (!sourceFileNameSet.contains(fileName)) { + sourceFileNameSet.add(fileName); + } else { + throw DataXException.asDataXException(BinaryFileWriterErrorCode.REPEATED_FILE_NAME, + String.format("Source File Name [%s] is repeated!", fileName)); + } + } + } + + /** + * + * @param readerSplitConfigs + * @param writerSliceConfig + * @return 切分后的结果 + */ + public static List split(List readerSplitConfigs, Configuration writerSliceConfig) { + List writerSplitConfigs = new ArrayList(); + + for (Configuration readerSliceConfig : readerSplitConfigs) { + Configuration splitedTaskConfig = writerSliceConfig.clone(); + String fileName = getFileName(readerSliceConfig.getString(SOURCE_FILE)); + splitedTaskConfig + .set(com.alibaba.datax.plugin.unstructuredstorage.writer.Key.FILE_NAME, fileName); + splitedTaskConfig. + set(com.alibaba.datax.plugin.unstructuredstorage.writer.Constant.BINARY, true); + writerSplitConfigs.add(splitedTaskConfig); + } + LOG.info("end do split."); + return writerSplitConfigs; + } + + /** + * 根据文件路径获取到文件名, filePath必定包含了文件名 + * + * @param filePath: 文件路径 + */ + public static String getFileName(String filePath) { + if (StringUtils.isBlank(filePath)) { + return null; + } + File file = new File(filePath); + return file.getName(); + } +} diff --git a/pom.xml b/pom.xml index 3bd75a31..957c60ee 100644 --- a/pom.xml +++ b/pom.xml @@ -22,7 +22,7 @@ 3.3.2 1.10 1.2 - 1.1.46.sec10 + 2.0.23 16.0.1 3.7.2.1-SNAPSHOT @@ -53,60 +53,81 @@ postgresqlreader kingbaseesreader oraclereader + cassandrareader + oceanbasev10reader + rdbmsreader + odpsreader otsreader otsstreamreader - txtfilereader - hdfsreader - streamreader - ossreader - ftpreader - mongodbreader - rdbmsreader hbase11xreader hbase094xreader + hbase11xsqlreader + hbase20xsqlreader + + ossreader + hdfsreader + ftpreader + txtfilereader + streamreader + + mongodbreader + tdenginereader + gdbreader tsdbreader opentsdbreader - cassandrareader - gdbreader - oceanbasev10reader + loghubreader + datahubreader + starrocksreader mysqlwriter + starrockswriter drdswriter - odpswriter - txtfilewriter - ftpwriter - hdfswriter - streamwriter - otswriter + databendwriter oraclewriter sqlserverwriter postgresqlwriter kingbaseeswriter - osswriter - mongodbwriter adswriter - ocswriter + oceanbasev10writer + adbpgwriter + hologresjdbcwriter rdbmswriter + + + odpswriter + osswriter + otswriter hbase11xwriter hbase094xwriter hbase11xsqlwriter - hbase11xsqlreader + hbase20xsqlwriter + kuduwriter + ftpwriter + hdfswriter + txtfilewriter + streamwriter + elasticsearchwriter + mongodbwriter + tdenginewriter + ocswriter tsdbwriter - adbpgwriter gdbwriter + oscarwriter + loghubwriter + datahubwriter cassandrawriter clickhousewriter - oscarwriter - oceanbasev10writer + doriswriter + selectdbwriter + adbmysqlwriter + plugin-rdbms-util plugin-unstructured-storage-util - hbase20xsqlreader - hbase20xsqlwriter - kuduwriter + @@ -117,8 +138,8 @@ ${commons-lang3-version} - com.alibaba - fastjson + com.alibaba.fastjson2 + fastjson2 ${fastjson-version} - com.dm - dm - system - ${basedir}/src/main/libs/Dm7JdbcDriver16.jar + com.dameng + Dm7JdbcDriver17 + 7.6.0.142 + com.sybase jconn3 @@ -38,13 +39,20 @@ system ${basedir}/src/main/libs/jconn3-1.0.0-SNAPSHOT.jar - + + ppas ppas 16 system ${basedir}/src/main/libs/edb-jdbc16.jar + + + com.ibm.db2.jcc + db2jcc + db2jcc4 + org.slf4j @@ -97,13 +105,4 @@ - - - - com.dm - dm - 16 - - - diff --git a/rdbmsreader/src/main/libs/Dm7JdbcDriver16.jar b/rdbmsreader/src/main/libs/Dm7JdbcDriver16.jar deleted file mode 100755 index 30740dcd..00000000 Binary files a/rdbmsreader/src/main/libs/Dm7JdbcDriver16.jar and /dev/null differ diff --git a/rdbmsreader/src/main/libs/db2jcc4.jar b/rdbmsreader/src/main/libs/db2jcc4.jar deleted file mode 100755 index fc53cfd9..00000000 Binary files a/rdbmsreader/src/main/libs/db2jcc4.jar and /dev/null differ diff --git a/rdbmsreader/src/main/resources/plugin.json b/rdbmsreader/src/main/resources/plugin.json index d344dd86..f79a6ace 100755 --- a/rdbmsreader/src/main/resources/plugin.json +++ b/rdbmsreader/src/main/resources/plugin.json @@ -3,5 +3,5 @@ "class": "com.alibaba.datax.plugin.reader.rdbmsreader.RdbmsReader", "description": "useScene: prod. mechanism: Jdbc connection using the database, execute select sql, retrieve data from the ResultSet. warn: The more you know about the database, the less problems you encounter.", "developer": "alibaba", - "drivers":["dm.jdbc.driver.DmDriver", "com.sybase.jdbc3.jdbc.SybDriver", "com.edb.Driver"] + "drivers":["dm.jdbc.driver.DmDriver", "com.sybase.jdbc3.jdbc.SybDriver", "com.edb.Driver", "com.ibm.db2.jcc.DB2Driver"] } diff --git a/rdbmswriter/pom.xml b/rdbmswriter/pom.xml index 19461960..a74838b7 100755 --- a/rdbmswriter/pom.xml +++ b/rdbmswriter/pom.xml @@ -25,27 +25,34 @@ + - com.dm - dm + com.dameng + Dm7JdbcDriver17 + 7.6.0.142 + + + + com.sybase + jconn3 + 1.0.0-SNAPSHOT + system + ${basedir}/src/main/libs/jconn3-1.0.0-SNAPSHOT.jar + + + + ppas + ppas 16 system - ${basedir}/src/main/libs/Dm7JdbcDriver16.jar + ${basedir}/src/main/libs/edb-jdbc16.jar + - com.sybase - jconn3 - 1.0.0-SNAPSHOT - system - ${basedir}/src/main/libs/jconn3-1.0.0-SNAPSHOT.jar - - - ppas - ppas - 16 - system - ${basedir}/src/main/libs/edb-jdbc16.jar - + com.ibm.db2.jcc + db2jcc + db2jcc4 + org.slf4j diff --git a/rdbmswriter/src/main/java/com/alibaba/datax/plugin/reader/rdbmswriter/SubCommonRdbmsWriter.java b/rdbmswriter/src/main/java/com/alibaba/datax/plugin/reader/rdbmswriter/SubCommonRdbmsWriter.java index f1fbc552..88e50f11 100755 --- a/rdbmswriter/src/main/java/com/alibaba/datax/plugin/reader/rdbmswriter/SubCommonRdbmsWriter.java +++ b/rdbmswriter/src/main/java/com/alibaba/datax/plugin/reader/rdbmswriter/SubCommonRdbmsWriter.java @@ -29,7 +29,7 @@ public class SubCommonRdbmsWriter extends CommonRdbmsWriter { @Override protected PreparedStatement fillPreparedStatementColumnType( PreparedStatement preparedStatement, int columnIndex, - int columnSqltype, Column column) throws SQLException { + int columnSqltype, String typeName, Column column) throws SQLException { java.util.Date utilDate; try { switch (columnSqltype) { diff --git a/rdbmswriter/src/main/libs/Dm7JdbcDriver16.jar b/rdbmswriter/src/main/libs/Dm7JdbcDriver16.jar deleted file mode 100755 index 30740dcd..00000000 Binary files a/rdbmswriter/src/main/libs/Dm7JdbcDriver16.jar and /dev/null differ diff --git a/rdbmswriter/src/main/libs/db2jcc4.jar b/rdbmswriter/src/main/libs/db2jcc4.jar deleted file mode 100755 index fc53cfd9..00000000 Binary files a/rdbmswriter/src/main/libs/db2jcc4.jar and /dev/null differ diff --git a/rdbmswriter/src/main/resources/plugin.json b/rdbmswriter/src/main/resources/plugin.json index fa771af2..bf32140a 100755 --- a/rdbmswriter/src/main/resources/plugin.json +++ b/rdbmswriter/src/main/resources/plugin.json @@ -3,5 +3,5 @@ "class": "com.alibaba.datax.plugin.reader.rdbmswriter.RdbmsWriter", "description": "useScene: prod. mechanism: Jdbc connection using the database, execute select sql, retrieve data from the ResultSet. warn: The more you know about the database, the less problems you encounter.", "developer": "alibaba", - "drivers":["dm.jdbc.driver.DmDriver", "com.sybase.jdbc3.jdbc.SybDriver", "com.edb.Driver"] + "drivers":["dm.jdbc.driver.DmDriver", "com.sybase.jdbc3.jdbc.SybDriver", "com.edb.Driver", "com.ibm.db2.jcc.DB2Driver"] } diff --git a/selectdbwriter/doc/selectdbwriter.md b/selectdbwriter/doc/selectdbwriter.md new file mode 100644 index 00000000..cdf39263 --- /dev/null +++ b/selectdbwriter/doc/selectdbwriter.md @@ -0,0 +1,428 @@ +# SelectdbWriter 插件文档 + +## 1 快速介绍 +SelectdbWriter支持将大批量数据写入SELECTDB中。 + +## 2 实现原理 +SelectdbWriter 通过调用selectdb api (/copy/upload),返回一个重定向的S3地址,使用Http向S3地址发送字节流,设置参数达到要求时执行copy into + +## 3 编译 + +1. 运行 init-env.sh + +2. 编译 selectdbwriter: + +i. 单独编译 selectdbwriter 插件: + + ```text + mvn clean install -pl plugin-rdbms-util,selectdbwriter -DskipTests + ``` + + +ii.编译整个 DataX 项目: + + ```text + mvn package assembly:assembly -Dmaven.test.skip=true + ``` +产出在 target/datax/datax/. +hdfsreader, hdfswriter and oscarwriter 这三个插件需要额外的jar包。如果你并不需要这些插件,可以在 DataX/pom.xml 中删除这些插件的模块。 + + +iii.编译错误 + +如遇到如下编译错误: + ```text + Could not find artifact com.alibaba.datax:datax-all:pom:0.0.1-SNAPSHOT + ``` + +可尝试以下方式解决: + +a.下载 alibaba-datax-maven-m2-20210928.tar.gz + +b.解压后,将得到的 alibaba/datax/ 目录,拷贝到所使用的 maven 对应的 .m2/repository/com/alibaba/ 下。 + +c.再次尝试编译。 + +## 3 功能说明 + +### 3.1 配置样例 + +这里是一份从Stream读取数据后导入至selectdb的配置文件。 + +``` +{ + "job":{ + "content":[ + { + "reader":{ + "name":"streamreader", + "parameter":{ + "column":[ + { + "type":"string", + "random":"0,31" + }, + { + "type":"string", + "random":"0,31" + }, + { + "type":"string", + "random":"0,31" + }, + { + "type":"string", + "random":"0,31" + }, + { + "type":"long", + "random":"0,5" + }, + { + "type":"string", + "random":"0,10" + }, + { + "type":"string", + "random":"0,5" + }, + { + "type":"string", + "random":"0,31" + }, + { + "type":"string", + "random":"0,31" + }, + { + "type":"string", + "random":"0,21" + }, + { + "type":"string", + "random":"0,31" + }, + { + "type":"long", + "random":"0,10" + }, + { + "type":"long", + "random":"0,20" + }, + { + "type":"date", + "random":"2022-01-01 12:00:00,2023-01-01 12:00:00" + }, + { + "type":"long", + "random":"0,10" + }, + { + "type":"date", + "random":"2022-01-01 12:00:00,2023-01-01 12:00:00" + }, + { + "type":"string", + "random":"0,10" + }, + { + "type":"long", + "random":"0,10" + }, + { + "type":"date", + "random":"2022-01-01 12:00:00,2023-01-01 12:00:00" + }, + { + "type":"long", + "random":"0,10" + }, + { + "type":"date", + "random":"2022-01-01 12:00:00,2023-01-01 12:00:00" + }, + { + "type":"long", + "random":"0,10" + }, + { + "type":"date", + "random":"2022-01-01 12:00:00,2023-01-01 12:00:00" + }, + { + "type":"long", + "random":"0,10" + }, + { + "type":"date", + "random":"2022-01-01 12:00:00,2023-01-01 12:00:00" + }, + { + "type":"string", + "random":"0,100" + }, + { + "type":"string", + "random":"0,1" + }, + { + "type":"long", + "random":"0,1" + }, + { + "type":"string", + "random":"0,64" + }, + { + "type":"string", + "random":"0,20" + }, + { + "type":"string", + "random":"0,31" + }, + { + "type":"long", + "random":"0,3" + }, + { + "type":"long", + "random":"0,3" + }, + { + "type":"long", + "random":"0,19" + }, + { + "type":"date", + "random":"2022-01-01 12:00:00,2023-01-01 12:00:00" + }, + { + "type":"string", + "random":"0,1" + } + ], + "sliceRecordCount":10 + } + }, + "writer":{ + "name":"selectdbwriter", + "parameter":{ + "loadUrl":[ + "xxx:47150" + ], + "loadProps":{ + "file.type":"json", + "file.strip_outer_array":"true" + }, + "column":[ + "id", + "table_id", + "table_no", + "table_name", + "table_status", + "no_disturb", + "dinner_type", + "member_id", + "reserve_bill_no", + "pre_order_no", + "queue_num", + "person_num", + "open_time", + "open_time_format", + "order_time", + "order_time_format", + "table_bill_id", + "offer_time", + "offer_time_format", + "confirm_bill_time", + "confirm_bill_time_format", + "bill_time", + "bill_time_format", + "clear_time", + "clear_time_format", + "table_message", + "bill_close", + "table_type", + "pad_mac", + "company_id", + "shop_id", + "is_sync", + "table_split_no", + "ts", + "ts_format", + "dr" + ], + "username":"admin", + "password":"SelectDB2022", + "postSql":[ + + ], + "preSql":[ + + ], + "connection":[ + { + "jdbcUrl":"jdbc:mysql://xxx:34142/cl_test", + "table":[ + "ods_pos_pro_table_dynamic_delta_v4" + ], + "selectedDatabase":"cl_test" + } + ], + "maxBatchRows":1000000, + "maxBatchByteSize":536870912000 + } + } + } + ], + "setting":{ + "errorLimit":{ + "percentage":0.02, + "record":0 + }, + "speed":{ + "channel":5 + } + } + } +} + +``` + +### 3.2 参数说明 + +```text + **jdbcUrl** + + - 描述:selectdb 的 JDBC 连接串,用户执行 preSql 或 postSQL。 + - 必选:是 + - 默认值:无 + +* **loadUrl** + + - 描述:作为 selecdb 的连接目标。格式为 "ip:port"。其中 IP 是 selectdb的private-link,port 是selectdb 集群的 http_port + - 必选:是 + - 默认值:无 + +* **username** + + - 描述:访问selectdb数据库的用户名 + - 必选:是 + - 默认值:无 + +* **password** + + - 描述:访问selectdb数据库的密码 + - 必选:否 + - 默认值:空 + +* **connection.selectedDatabase** + - 描述:需要写入的selectdb数据库名称。 + - 必选:是 + - 默认值:无 + +* **connection.table** + - 描述:需要写入的selectdb表名称。 + - 必选:是 + - 默认值:无 + +* **column** + + - 描述:目的表**需要写入数据**的字段,这些字段将作为生成的 Json 数据的字段名。字段之间用英文逗号分隔。例如: "column": ["id","name","age"]。 + - 必选:是 + - 默认值:否 + +* **preSql** + + - 描述:写入数据到目的表前,会先执行这里的标准语句。 + - 必选:否 + - 默认值:无 + +* **postSql** + + - 描述:写入数据到目的表后,会执行这里的标准语句。 + - 必选:否 + - 默认值:无 + + +* **maxBatchRows** + + - 描述:每批次导入数据的最大行数。和 **batchSize** 共同控制每批次的导入数量。每批次数据达到两个阈值之一,即开始导入这一批次的数据。 + - 必选:否 + - 默认值:500000 + +* **batchSize** + + - 描述:每批次导入数据的最大数据量。和 **maxBatchRows** 共同控制每批次的导入数量。每批次数据达到两个阈值之一,即开始导入这一批次的数据。 + - 必选:否 + - 默认值:90M + +* **maxRetries** + + - 描述:每批次导入数据失败后的重试次数。 + - 必选:否 + - 默认值:3 + +* **labelPrefix** + + - 描述:每批次上传文件的 label 前缀。最终的 label 将有 `labelPrefix + UUID` 组成全局唯一的 label,确保数据不会重复导入 + - 必选:否 + - 默认值:`datax_selectdb_writer_` + +* **loadProps** + + - 描述:COPY INOT 的请求参数 + + 这里包括导入的数据格式:file.type等,导入数据格式默认我们使用csv,支持JSON,具体可以参照下面类型转换部分 + + - 必选:否 + + - 默认值:无 + +* **clusterName** + + - 描述:selectdb could 集群名称 + + - 必选:否 + + - 默认值:无 + +* **flushQueueLength** + + - 描述:队列长度 + + - 必选:否 + + - 默认值:1 + +* **flushInterval** + + - 描述:数据写入批次的时间间隔,如果maxBatchRows 和 batchSize 参数设置的有很大,那么很可能达不到你这设置的数据量大小,会执行导入。 + + - 必选:否 + + - 默认值:30000ms +``` + +### 类型转换 + +默认传入的数据均会被转为字符串,并以`\t`作为列分隔符,`\n`作为行分隔符,组成`csv`文件进行Selectdb导入操作。 + +默认是csv格式导入,如需更改列分隔符, 则正确配置 `loadProps` 即可: + +```json +"loadProps": { + "file.column_separator": "\\x01", + "file.line_delimiter": "\\x02" +} +``` + +如需更改导入格式为`json`, 则正确配置 `loadProps` 即可: +```json +"loadProps": { + "file.type": "json", + "file.strip_outer_array": true +} +``` \ No newline at end of file diff --git a/selectdbwriter/doc/stream2selectdb.json b/selectdbwriter/doc/stream2selectdb.json new file mode 100644 index 00000000..d5e14c48 --- /dev/null +++ b/selectdbwriter/doc/stream2selectdb.json @@ -0,0 +1,93 @@ +{ + "core":{ + "transport":{ + "channel":{ + "speed":{ + "byte":10485760 + } + } + } + }, + "job":{ + "content":[ + { + "reader":{ + "name":"streamreader", + "parameter":{ + "column":[ + { + "type":"string", + "value":"DataX" + }, + { + "type":"int", + "value":19890604 + }, + { + "type":"date", + "value":"1989-06-04 00:00:00" + }, + { + "type":"bool", + "value":true + }, + { + "type":"string", + "value":"test" + } + ], + "sliceRecordCount":1000000 + } + }, + "writer":{ + "name":"selectdbwriter", + "parameter":{ + "loadUrl":[ + "xxx:35871" + ], + "loadProps":{ + "file.type":"json", + "file.strip_outer_array":"true" + }, + "database":"db1", + "column":[ + "k1", + "k2", + "k3", + "k4", + "k5" + ], + "username":"admin", + "password":"SelectDB2022", + "postSql":[ + + ], + "preSql":[ + + ], + "connection":[ + { + "jdbcUrl":"jdbc:mysql://xxx:32386/cl_test", + "table":[ + "test_selectdb" + ], + "selectedDatabase":"cl_test" + } + ], + "maxBatchRows":200000, + "batchSize":53687091200 + } + } + } + ], + "setting":{ + "errorLimit":{ + "percentage":0.02, + "record":0 + }, + "speed":{ + "byte":10485760 + } + } + } +} \ No newline at end of file diff --git a/selectdbwriter/pom.xml b/selectdbwriter/pom.xml new file mode 100644 index 00000000..fd2a19f7 --- /dev/null +++ b/selectdbwriter/pom.xml @@ -0,0 +1,96 @@ + + + + datax-all + com.alibaba.datax + 0.0.1-SNAPSHOT + + 4.0.0 + selectdbwriter + selectdbwriter + jar + + + com.alibaba.datax + datax-common + ${datax-project-version} + + + slf4j-log4j12 + org.slf4j + + + + + org.slf4j + slf4j-api + + + ch.qos.logback + logback-classic + + + com.alibaba.datax + plugin-rdbms-util + ${datax-project-version} + + + mysql + mysql-connector-java + ${mysql.driver.version} + + + org.apache.httpcomponents + httpclient + 4.5.13 + + + com.fasterxml.jackson.core + jackson-annotations + 2.13.3 + + + com.fasterxml.jackson.core + jackson-core + 2.13.3 + + + com.fasterxml.jackson.core + jackson-databind + 2.13.3 + + + + + + + maven-compiler-plugin + + ${jdk-version} + ${jdk-version} + ${project-sourceEncoding} + + + + + maven-assembly-plugin + + + src/main/assembly/package.xml + + datax + + + + dwzip + package + + single + + + + + + + diff --git a/selectdbwriter/src/main/assembly/package.xml b/selectdbwriter/src/main/assembly/package.xml new file mode 100644 index 00000000..1ea0009e --- /dev/null +++ b/selectdbwriter/src/main/assembly/package.xml @@ -0,0 +1,34 @@ + + + + + dir + + false + + + src/main/resources + + plugin.json + plugin_job_template.json + + plugin/writer/selectdbwriter + + + target/ + + selectdbwriter-0.0.1-SNAPSHOT.jar + + plugin/writer/selectdbwriter + + + + + false + plugin/writer/selectdbwriter/libs + runtime + + + diff --git a/selectdbwriter/src/main/java/com/alibaba/datax/plugin/writer/selectdbwriter/BaseResponse.java b/selectdbwriter/src/main/java/com/alibaba/datax/plugin/writer/selectdbwriter/BaseResponse.java new file mode 100644 index 00000000..c02f725f --- /dev/null +++ b/selectdbwriter/src/main/java/com/alibaba/datax/plugin/writer/selectdbwriter/BaseResponse.java @@ -0,0 +1,23 @@ +package com.alibaba.datax.plugin.writer.selectdbwriter; + +import com.fasterxml.jackson.annotation.JsonIgnoreProperties; + +@JsonIgnoreProperties(ignoreUnknown = true) +public class BaseResponse { + private int code; + private String msg; + private T data; + private int count; + + public int getCode() { + return code; + } + + public String getMsg() { + return msg; + } + + public T getData(){ + return data; + } +} diff --git a/selectdbwriter/src/main/java/com/alibaba/datax/plugin/writer/selectdbwriter/CopyIntoResp.java b/selectdbwriter/src/main/java/com/alibaba/datax/plugin/writer/selectdbwriter/CopyIntoResp.java new file mode 100644 index 00000000..4da002ac --- /dev/null +++ b/selectdbwriter/src/main/java/com/alibaba/datax/plugin/writer/selectdbwriter/CopyIntoResp.java @@ -0,0 +1,26 @@ +package com.alibaba.datax.plugin.writer.selectdbwriter; + +import com.fasterxml.jackson.annotation.JsonIgnoreProperties; + +import java.util.Map; + +@JsonIgnoreProperties(ignoreUnknown = true) +public class CopyIntoResp extends BaseResponse{ + private String code; + private String exception; + + private Map result; + + public String getDataCode() { + return code; + } + + public String getException() { + return exception; + } + + public Map getResult() { + return result; + } + +} diff --git a/selectdbwriter/src/main/java/com/alibaba/datax/plugin/writer/selectdbwriter/CopySQLBuilder.java b/selectdbwriter/src/main/java/com/alibaba/datax/plugin/writer/selectdbwriter/CopySQLBuilder.java new file mode 100644 index 00000000..62910d5d --- /dev/null +++ b/selectdbwriter/src/main/java/com/alibaba/datax/plugin/writer/selectdbwriter/CopySQLBuilder.java @@ -0,0 +1,40 @@ +package com.alibaba.datax.plugin.writer.selectdbwriter; + + +import java.util.Map; +import java.util.StringJoiner; + +public class CopySQLBuilder { + private final static String COPY_SYNC = "copy.async"; + private final String fileName; + private final Keys options; + private Map properties; + + + + public CopySQLBuilder(Keys options, String fileName) { + this.options=options; + this.fileName=fileName; + this.properties=options.getLoadProps(); + } + + public String buildCopySQL(){ + StringBuilder sb = new StringBuilder(); + sb.append("COPY INTO ") + .append(options.getDatabase() + "." + options.getTable()) + .append(" FROM @~('").append(fileName).append("') ") + .append("PROPERTIES ("); + + //copy into must be sync + properties.put(COPY_SYNC,false); + StringJoiner props = new StringJoiner(","); + for(Map.Entry entry : properties.entrySet()){ + String key = String.valueOf(entry.getKey()); + String value = String.valueOf(entry.getValue()); + String prop = String.format("'%s'='%s'",key,value); + props.add(prop); + } + sb.append(props).append(" )"); + return sb.toString(); + } +} diff --git a/selectdbwriter/src/main/java/com/alibaba/datax/plugin/writer/selectdbwriter/DelimiterParser.java b/selectdbwriter/src/main/java/com/alibaba/datax/plugin/writer/selectdbwriter/DelimiterParser.java new file mode 100644 index 00000000..fa6b397c --- /dev/null +++ b/selectdbwriter/src/main/java/com/alibaba/datax/plugin/writer/selectdbwriter/DelimiterParser.java @@ -0,0 +1,54 @@ +package com.alibaba.datax.plugin.writer.selectdbwriter; + +import com.google.common.base.Strings; + +import java.io.StringWriter; + +public class DelimiterParser { + + private static final String HEX_STRING = "0123456789ABCDEF"; + + public static String parse(String sp, String dSp) throws RuntimeException { + if ( Strings.isNullOrEmpty(sp)) { + return dSp; + } + if (!sp.toUpperCase().startsWith("\\X")) { + return sp; + } + String hexStr = sp.substring(2); + // check hex str + if (hexStr.isEmpty()) { + throw new RuntimeException("Failed to parse delimiter: Hex str is empty"); + } + if (hexStr.length() % 2 != 0) { + throw new RuntimeException("Failed to parse delimiter: Hex str length error"); + } + for (char hexChar : hexStr.toUpperCase().toCharArray()) { + if (HEX_STRING.indexOf(hexChar) == -1) { + throw new RuntimeException("Failed to parse delimiter: Hex str format error"); + } + } + // transform to separator + StringWriter writer = new StringWriter(); + for (byte b : hexStrToBytes(hexStr)) { + writer.append((char) b); + } + return writer.toString(); + } + + private static byte[] hexStrToBytes(String hexStr) { + String upperHexStr = hexStr.toUpperCase(); + int length = upperHexStr.length() / 2; + char[] hexChars = upperHexStr.toCharArray(); + byte[] bytes = new byte[length]; + for (int i = 0; i < length; i++) { + int pos = i * 2; + bytes[i] = (byte) (charToByte(hexChars[pos]) << 4 | charToByte(hexChars[pos + 1])); + } + return bytes; + } + + private static byte charToByte(char c) { + return (byte) HEX_STRING.indexOf(c); + } +} diff --git a/selectdbwriter/src/main/java/com/alibaba/datax/plugin/writer/selectdbwriter/HttpPostBuilder.java b/selectdbwriter/src/main/java/com/alibaba/datax/plugin/writer/selectdbwriter/HttpPostBuilder.java new file mode 100644 index 00000000..9471debb --- /dev/null +++ b/selectdbwriter/src/main/java/com/alibaba/datax/plugin/writer/selectdbwriter/HttpPostBuilder.java @@ -0,0 +1,51 @@ +package com.alibaba.datax.plugin.writer.selectdbwriter; + +import org.apache.commons.codec.binary.Base64; +import org.apache.http.HttpEntity; +import org.apache.http.HttpHeaders; +import org.apache.http.client.methods.HttpPost; + +import java.nio.charset.StandardCharsets; +import java.util.HashMap; +import java.util.Map; + + +public class HttpPostBuilder { + String url; + Map header; + HttpEntity httpEntity; + public HttpPostBuilder() { + header = new HashMap<>(); + } + + public HttpPostBuilder setUrl(String url) { + this.url = url; + return this; + } + + public HttpPostBuilder addCommonHeader() { + header.put(HttpHeaders.EXPECT, "100-continue"); + return this; + } + + public HttpPostBuilder baseAuth(String user, String password) { + final String authInfo = user + ":" + password; + byte[] encoded = Base64.encodeBase64(authInfo.getBytes(StandardCharsets.UTF_8)); + header.put(HttpHeaders.AUTHORIZATION, "Basic " + new String(encoded)); + return this; + } + + public HttpPostBuilder setEntity(HttpEntity httpEntity) { + this.httpEntity = httpEntity; + return this; + } + + public HttpPost build() { + SelectdbUtil.checkNotNull(url); + SelectdbUtil.checkNotNull(httpEntity); + HttpPost put = new HttpPost(url); + header.forEach(put::setHeader); + put.setEntity(httpEntity); + return put; + } +} diff --git a/selectdbwriter/src/main/java/com/alibaba/datax/plugin/writer/selectdbwriter/HttpPutBuilder.java b/selectdbwriter/src/main/java/com/alibaba/datax/plugin/writer/selectdbwriter/HttpPutBuilder.java new file mode 100644 index 00000000..59d7dbca --- /dev/null +++ b/selectdbwriter/src/main/java/com/alibaba/datax/plugin/writer/selectdbwriter/HttpPutBuilder.java @@ -0,0 +1,65 @@ +package com.alibaba.datax.plugin.writer.selectdbwriter; + +import org.apache.commons.codec.binary.Base64; +import org.apache.http.HttpEntity; +import org.apache.http.HttpHeaders; +import org.apache.http.client.methods.HttpPut; +import org.apache.http.entity.StringEntity; + +import java.nio.charset.StandardCharsets; +import java.util.HashMap; +import java.util.Map; + +public class HttpPutBuilder { + String url; + Map header; + HttpEntity httpEntity; + public HttpPutBuilder() { + header = new HashMap<>(); + } + + public HttpPutBuilder setUrl(String url) { + this.url = url; + return this; + } + + public HttpPutBuilder addFileName(String fileName){ + header.put("fileName", fileName); + return this; + } + + public HttpPutBuilder setEmptyEntity() { + try { + this.httpEntity = new StringEntity(""); + } catch (Exception e) { + throw new IllegalArgumentException(e); + } + return this; + } + + public HttpPutBuilder addCommonHeader() { + header.put(HttpHeaders.EXPECT, "100-continue"); + return this; + } + + public HttpPutBuilder baseAuth(String user, String password) { + final String authInfo = user + ":" + password; + byte[] encoded = Base64.encodeBase64(authInfo.getBytes(StandardCharsets.UTF_8)); + header.put(HttpHeaders.AUTHORIZATION, "Basic " + new String(encoded)); + return this; + } + + public HttpPutBuilder setEntity(HttpEntity httpEntity) { + this.httpEntity = httpEntity; + return this; + } + + public HttpPut build() { + SelectdbUtil.checkNotNull(url); + SelectdbUtil.checkNotNull(httpEntity); + HttpPut put = new HttpPut(url); + header.forEach(put::setHeader); + put.setEntity(httpEntity); + return put; + } +} diff --git a/selectdbwriter/src/main/java/com/alibaba/datax/plugin/writer/selectdbwriter/Keys.java b/selectdbwriter/src/main/java/com/alibaba/datax/plugin/writer/selectdbwriter/Keys.java new file mode 100644 index 00000000..6c767d93 --- /dev/null +++ b/selectdbwriter/src/main/java/com/alibaba/datax/plugin/writer/selectdbwriter/Keys.java @@ -0,0 +1,186 @@ +package com.alibaba.datax.plugin.writer.selectdbwriter; + +import com.alibaba.datax.common.exception.DataXException; +import com.alibaba.datax.common.util.Configuration; +import com.alibaba.datax.plugin.rdbms.util.DBUtilErrorCode; + +import java.io.Serializable; +import java.util.List; +import java.util.Map; +import java.util.stream.Collectors; + +public class Keys implements Serializable { + + private static final long serialVersionUID = 1l; + private static final int DEFAULT_MAX_RETRIES = 3; + private static final int BATCH_ROWS = 500000; + private static final long DEFAULT_FLUSH_INTERVAL = 30000; + + private static final String LOAD_PROPS_FORMAT = "file.type"; + public enum StreamLoadFormat { + CSV, JSON; + } + + private static final String USERNAME = "username"; + private static final String PASSWORD = "password"; + private static final String DATABASE = "connection[0].selectedDatabase"; + private static final String TABLE = "connection[0].table[0]"; + private static final String COLUMN = "column"; + private static final String PRE_SQL = "preSql"; + private static final String POST_SQL = "postSql"; + private static final String JDBC_URL = "connection[0].jdbcUrl"; + private static final String LABEL_PREFIX = "labelPrefix"; + private static final String MAX_BATCH_ROWS = "maxBatchRows"; + private static final String MAX_BATCH_SIZE = "batchSize"; + private static final String FLUSH_INTERVAL = "flushInterval"; + private static final String LOAD_URL = "loadUrl"; + private static final String FLUSH_QUEUE_LENGTH = "flushQueueLength"; + private static final String LOAD_PROPS = "loadProps"; + + private static final String DEFAULT_LABEL_PREFIX = "datax_selectdb_writer_"; + + private static final long DEFAULT_MAX_BATCH_SIZE = 90 * 1024 * 1024; //default 90M + + private static final String CLUSTER_NAME = "clusterName"; + + private static final String MAX_RETRIES = "maxRetries"; + private final Configuration options; + + private List infoSchemaColumns; + private List userSetColumns; + private boolean isWildcardColumn; + + public Keys ( Configuration options) { + this.options = options; + this.userSetColumns = options.getList(COLUMN, String.class).stream().map(str -> str.replace("`", "")).collect(Collectors.toList()); + if (1 == options.getList(COLUMN, String.class).size() && "*".trim().equals(options.getList(COLUMN, String.class).get(0))) { + this.isWildcardColumn = true; + } + } + + public void doPretreatment() { + validateRequired(); + validateStreamLoadUrl(); + } + + public String getJdbcUrl() { + return options.getString(JDBC_URL); + } + + public String getDatabase() { + return options.getString(DATABASE); + } + + public String getTable() { + return options.getString(TABLE); + } + + public String getUsername() { + return options.getString(USERNAME); + } + + public String getPassword() { + return options.getString(PASSWORD); + } + + public String getClusterName(){ + return options.getString(CLUSTER_NAME); + } + + public String getLabelPrefix() { + String label = options.getString(LABEL_PREFIX); + return null == label ? DEFAULT_LABEL_PREFIX : label; + } + + public List getLoadUrlList() { + return options.getList(LOAD_URL, String.class); + } + + public List getColumns() { + if (isWildcardColumn) { + return this.infoSchemaColumns; + } + return this.userSetColumns; + } + + public boolean isWildcardColumn() { + return this.isWildcardColumn; + } + + public void setInfoCchemaColumns(List cols) { + this.infoSchemaColumns = cols; + } + + public List getPreSqlList() { + return options.getList(PRE_SQL, String.class); + } + + public List getPostSqlList() { + return options.getList(POST_SQL, String.class); + } + + public Map getLoadProps() { + return options.getMap(LOAD_PROPS); + } + + public int getMaxRetries() { + Integer retries = options.getInt(MAX_RETRIES); + return null == retries ? DEFAULT_MAX_RETRIES : retries; + } + + public int getBatchRows() { + Integer rows = options.getInt(MAX_BATCH_ROWS); + return null == rows ? BATCH_ROWS : rows; + } + + public long getBatchSize() { + Long size = options.getLong(MAX_BATCH_SIZE); + return null == size ? DEFAULT_MAX_BATCH_SIZE : size; + } + + public long getFlushInterval() { + Long interval = options.getLong(FLUSH_INTERVAL); + return null == interval ? DEFAULT_FLUSH_INTERVAL : interval; + } + + public int getFlushQueueLength() { + Integer len = options.getInt(FLUSH_QUEUE_LENGTH); + return null == len ? 1 : len; + } + + + public StreamLoadFormat getStreamLoadFormat() { + Map loadProps = getLoadProps(); + if (null == loadProps) { + return StreamLoadFormat.CSV; + } + if (loadProps.containsKey(LOAD_PROPS_FORMAT) + && StreamLoadFormat.JSON.name().equalsIgnoreCase(String.valueOf(loadProps.get(LOAD_PROPS_FORMAT)))) { + return StreamLoadFormat.JSON; + } + return StreamLoadFormat.CSV; + } + + private void validateStreamLoadUrl() { + List urlList = getLoadUrlList(); + for (String host : urlList) { + if (host.split(":").length < 2) { + throw DataXException.asDataXException(DBUtilErrorCode.CONF_ERROR, + "The format of loadUrl is not correct, please enter:[`fe_ip:fe_http_ip;fe_ip:fe_http_ip`]."); + } + } + } + + private void validateRequired() { + final String[] requiredOptionKeys = new String[]{ + USERNAME, + DATABASE, + TABLE, + COLUMN, + LOAD_URL + }; + for (String optionKey : requiredOptionKeys) { + options.getNecessaryValue(optionKey, DBUtilErrorCode.REQUIRED_VALUE); + } + } +} diff --git a/selectdbwriter/src/main/java/com/alibaba/datax/plugin/writer/selectdbwriter/SelectdbBaseCodec.java b/selectdbwriter/src/main/java/com/alibaba/datax/plugin/writer/selectdbwriter/SelectdbBaseCodec.java new file mode 100644 index 00000000..d2fc1224 --- /dev/null +++ b/selectdbwriter/src/main/java/com/alibaba/datax/plugin/writer/selectdbwriter/SelectdbBaseCodec.java @@ -0,0 +1,23 @@ +package com.alibaba.datax.plugin.writer.selectdbwriter; + +import com.alibaba.datax.common.element.Column; + +public class SelectdbBaseCodec { + protected String convertionField( Column col) { + if (null == col.getRawData() || Column.Type.NULL == col.getType()) { + return null; + } + if ( Column.Type.BOOL == col.getType()) { + return String.valueOf(col.asLong()); + } + if ( Column.Type.BYTES == col.getType()) { + byte[] bts = (byte[])col.getRawData(); + long value = 0; + for (int i = 0; i < bts.length; i++) { + value += (bts[bts.length - i - 1] & 0xffL) << (8 * i); + } + return String.valueOf(value); + } + return col.asString(); + } +} diff --git a/selectdbwriter/src/main/java/com/alibaba/datax/plugin/writer/selectdbwriter/SelectdbCodec.java b/selectdbwriter/src/main/java/com/alibaba/datax/plugin/writer/selectdbwriter/SelectdbCodec.java new file mode 100644 index 00000000..b7e9d6ae --- /dev/null +++ b/selectdbwriter/src/main/java/com/alibaba/datax/plugin/writer/selectdbwriter/SelectdbCodec.java @@ -0,0 +1,10 @@ +package com.alibaba.datax.plugin.writer.selectdbwriter; + +import com.alibaba.datax.common.element.Record; + +import java.io.Serializable; + +public interface SelectdbCodec extends Serializable { + + String codec( Record row); +} diff --git a/selectdbwriter/src/main/java/com/alibaba/datax/plugin/writer/selectdbwriter/SelectdbCodecFactory.java b/selectdbwriter/src/main/java/com/alibaba/datax/plugin/writer/selectdbwriter/SelectdbCodecFactory.java new file mode 100644 index 00000000..567f4c0b --- /dev/null +++ b/selectdbwriter/src/main/java/com/alibaba/datax/plugin/writer/selectdbwriter/SelectdbCodecFactory.java @@ -0,0 +1,19 @@ +package com.alibaba.datax.plugin.writer.selectdbwriter; + +import java.util.Map; + +public class SelectdbCodecFactory { + public SelectdbCodecFactory (){ + + } + public static SelectdbCodec createCodec( Keys writerOptions) { + if ( Keys.StreamLoadFormat.CSV.equals(writerOptions.getStreamLoadFormat())) { + Map props = writerOptions.getLoadProps(); + return new SelectdbCsvCodec (null == props || !props.containsKey("file.column_separator") ? null : String.valueOf(props.get("file.column_separator"))); + } + if ( Keys.StreamLoadFormat.JSON.equals(writerOptions.getStreamLoadFormat())) { + return new SelectdbJsonCodec (writerOptions.getColumns()); + } + throw new RuntimeException("Failed to create row serializer, unsupported `format` from stream load properties."); + } +} diff --git a/selectdbwriter/src/main/java/com/alibaba/datax/plugin/writer/selectdbwriter/SelectdbCopyIntoObserver.java b/selectdbwriter/src/main/java/com/alibaba/datax/plugin/writer/selectdbwriter/SelectdbCopyIntoObserver.java new file mode 100644 index 00000000..c9228b22 --- /dev/null +++ b/selectdbwriter/src/main/java/com/alibaba/datax/plugin/writer/selectdbwriter/SelectdbCopyIntoObserver.java @@ -0,0 +1,233 @@ +package com.alibaba.datax.plugin.writer.selectdbwriter; + +import com.fasterxml.jackson.core.type.TypeReference; +import com.fasterxml.jackson.databind.ObjectMapper; +import org.apache.commons.lang3.StringUtils; +import org.apache.http.Header; +import org.apache.http.HttpEntity; +import org.apache.http.client.methods.CloseableHttpResponse; +import org.apache.http.entity.InputStreamEntity; +import org.apache.http.entity.StringEntity; +import org.apache.http.impl.client.CloseableHttpClient; +import org.apache.http.impl.client.HttpClientBuilder; +import org.apache.http.impl.client.HttpClients; +import org.apache.http.util.EntityUtils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.ByteArrayInputStream; +import java.io.IOException; +import java.net.HttpURLConnection; +import java.net.URL; +import java.nio.ByteBuffer; +import java.nio.charset.StandardCharsets; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.regex.Pattern; + +public class SelectdbCopyIntoObserver { + private static final Logger LOG = LoggerFactory.getLogger(SelectdbCopyIntoObserver.class); + + private Keys options; + private long pos; + public static final int SUCCESS = 0; + public static final String FAIL = "1"; + private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); + private final HttpClientBuilder httpClientBuilder = HttpClients + .custom() + .disableRedirectHandling(); + private CloseableHttpClient httpClient; + private static final String UPLOAD_URL_PATTERN = "%s/copy/upload"; + private static final String COMMIT_PATTERN = "%s/copy/query"; + private static final Pattern COMMITTED_PATTERN = Pattern.compile("errCode = 2, detailMessage = No files can be copied, matched (\\d+) files, " + "filtered (\\d+) files because files may be loading or loaded"); + + + public SelectdbCopyIntoObserver(Keys options) { + this.options = options; + this.httpClient = httpClientBuilder.build(); + + } + + public void streamLoad(WriterTuple data) throws Exception { + String host = getLoadHost(); + if (host == null) { + throw new RuntimeException("load_url cannot be empty, or the host cannot connect.Please check your configuration."); + } + String loadUrl = String.format(UPLOAD_URL_PATTERN, host); + String uploadAddress = getUploadAddress(loadUrl, data.getLabel()); + put(uploadAddress, data.getLabel(), addRows(data.getRows(), data.getBytes().intValue())); + executeCopy(host,data.getLabel()); + + } + + private String getUploadAddress(String loadUrl, String fileName) throws IOException { + HttpPutBuilder putBuilder = new HttpPutBuilder(); + putBuilder.setUrl(loadUrl) + .addFileName(fileName) + .addCommonHeader() + .setEmptyEntity() + .baseAuth(options.getUsername(), options.getPassword()); + CloseableHttpResponse execute = httpClientBuilder.build().execute(putBuilder.build()); + int statusCode = execute.getStatusLine().getStatusCode(); + String reason = execute.getStatusLine().getReasonPhrase(); + if (statusCode == 307) { + Header location = execute.getFirstHeader("location"); + String uploadAddress = location.getValue(); + LOG.info("redirect to s3:{}", uploadAddress); + return uploadAddress; + } else { + HttpEntity entity = execute.getEntity(); + String result = entity == null ? null : EntityUtils.toString(entity); + LOG.error("Failed get the redirected address, status {}, reason {}, response {}", statusCode, reason, result); + throw new RuntimeException("Could not get the redirected address."); + } + + } + + private byte[] addRows(List rows, int totalBytes) { + if (Keys.StreamLoadFormat.CSV.equals(options.getStreamLoadFormat())) { + Map props = (options.getLoadProps() == null ? new HashMap<>() : options.getLoadProps()); + byte[] lineDelimiter = DelimiterParser.parse((String) props.get("file.line_delimiter"), "\n").getBytes(StandardCharsets.UTF_8); + ByteBuffer bos = ByteBuffer.allocate(totalBytes + rows.size() * lineDelimiter.length); + for (byte[] row : rows) { + bos.put(row); + bos.put(lineDelimiter); + } + return bos.array(); + } + + if (Keys.StreamLoadFormat.JSON.equals(options.getStreamLoadFormat())) { + ByteBuffer bos = ByteBuffer.allocate(totalBytes + (rows.isEmpty() ? 2 : rows.size() + 1)); + bos.put("[".getBytes(StandardCharsets.UTF_8)); + byte[] jsonDelimiter = ",".getBytes(StandardCharsets.UTF_8); + boolean isFirstElement = true; + for (byte[] row : rows) { + if (!isFirstElement) { + bos.put(jsonDelimiter); + } + bos.put(row); + isFirstElement = false; + } + bos.put("]".getBytes(StandardCharsets.UTF_8)); + return bos.array(); + } + throw new RuntimeException("Failed to join rows data, unsupported `file.type` from copy into properties:"); + } + + public void put(String loadUrl, String fileName, byte[] data) throws IOException { + LOG.info(String.format("Executing upload file to: '%s', size: '%s'", loadUrl, data.length)); + HttpPutBuilder putBuilder = new HttpPutBuilder(); + putBuilder.setUrl(loadUrl) + .addCommonHeader() + .setEntity(new InputStreamEntity(new ByteArrayInputStream(data))); + CloseableHttpResponse response = httpClient.execute(putBuilder.build()); + final int statusCode = response.getStatusLine().getStatusCode(); + if (statusCode != 200) { + String result = response.getEntity() == null ? null : EntityUtils.toString(response.getEntity()); + LOG.error("upload file {} error, response {}", fileName, result); + throw new SelectdbWriterException("upload file error: " + fileName,true); + } + } + + private String getLoadHost() { + List hostList = options.getLoadUrlList(); + long tmp = pos + hostList.size(); + for (; pos < tmp; pos++) { + String host = new StringBuilder("http://").append(hostList.get((int) (pos % hostList.size()))).toString(); + if (checkConnection(host)) { + return host; + } + } + return null; + } + + private boolean checkConnection(String host) { + try { + URL url = new URL(host); + HttpURLConnection co = (HttpURLConnection) url.openConnection(); + co.setConnectTimeout(5000); + co.connect(); + co.disconnect(); + return true; + } catch (Exception e1) { + e1.printStackTrace(); + return false; + } + } + + + /** + * execute copy into + */ + public void executeCopy(String hostPort, String fileName) throws IOException{ + long start = System.currentTimeMillis(); + CopySQLBuilder copySQLBuilder = new CopySQLBuilder(options, fileName); + String copySQL = copySQLBuilder.buildCopySQL(); + LOG.info("build copy SQL is {}", copySQL); + Map params = new HashMap<>(); + params.put("sql", copySQL); + if(StringUtils.isNotBlank(options.getClusterName())){ + params.put("cluster",options.getClusterName()); + } + HttpPostBuilder postBuilder = new HttpPostBuilder(); + postBuilder.setUrl(String.format(COMMIT_PATTERN, hostPort)) + .baseAuth(options.getUsername(), options.getPassword()) + .setEntity(new StringEntity(OBJECT_MAPPER.writeValueAsString(params))); + + CloseableHttpResponse response = httpClient.execute(postBuilder.build()); + final int statusCode = response.getStatusLine().getStatusCode(); + final String reasonPhrase = response.getStatusLine().getReasonPhrase(); + String loadResult = ""; + if (statusCode != 200) { + LOG.warn("commit failed with status {} {}, reason {}", statusCode, hostPort, reasonPhrase); + throw new SelectdbWriterException("commit error with file: " + fileName,true); + } else if (response.getEntity() != null){ + loadResult = EntityUtils.toString(response.getEntity()); + boolean success = handleCommitResponse(loadResult); + if(success){ + LOG.info("commit success cost {}ms, response is {}", System.currentTimeMillis() - start, loadResult); + }else{ + throw new SelectdbWriterException("commit fail",true); + } + } + } + + public boolean handleCommitResponse(String loadResult) throws IOException { + BaseResponse baseResponse = OBJECT_MAPPER.readValue(loadResult, new TypeReference>(){}); + if(baseResponse.getCode() == SUCCESS){ + CopyIntoResp dataResp = baseResponse.getData(); + if(FAIL.equals(dataResp.getDataCode())){ + LOG.error("copy into execute failed, reason:{}", loadResult); + return false; + }else{ + Map result = dataResp.getResult(); + if(!result.get("state").equals("FINISHED") && !isCommitted(result.get("msg"))){ + LOG.error("copy into load failed, reason:{}", loadResult); + return false; + }else{ + return true; + } + } + }else{ + LOG.error("commit failed, reason:{}", loadResult); + return false; + } + } + + public static boolean isCommitted(String msg) { + return COMMITTED_PATTERN.matcher(msg).matches(); + } + + + public void close() throws IOException { + if (null != httpClient) { + try { + httpClient.close(); + } catch (IOException e) { + LOG.error("Closing httpClient failed.", e); + throw new RuntimeException("Closing httpClient failed.", e); + } + } + } +} diff --git a/selectdbwriter/src/main/java/com/alibaba/datax/plugin/writer/selectdbwriter/SelectdbCsvCodec.java b/selectdbwriter/src/main/java/com/alibaba/datax/plugin/writer/selectdbwriter/SelectdbCsvCodec.java new file mode 100644 index 00000000..57cad84d --- /dev/null +++ b/selectdbwriter/src/main/java/com/alibaba/datax/plugin/writer/selectdbwriter/SelectdbCsvCodec.java @@ -0,0 +1,27 @@ +package com.alibaba.datax.plugin.writer.selectdbwriter; + +import com.alibaba.datax.common.element.Record; + +public class SelectdbCsvCodec extends SelectdbBaseCodec implements SelectdbCodec { + + private static final long serialVersionUID = 1L; + + private final String columnSeparator; + + public SelectdbCsvCodec ( String sp) { + this.columnSeparator = DelimiterParser.parse(sp, "\t"); + } + + @Override + public String codec( Record row) { + StringBuilder sb = new StringBuilder(); + for (int i = 0; i < row.getColumnNumber(); i++) { + String value = convertionField(row.getColumn(i)); + sb.append(null == value ? "\\N" : value); + if (i < row.getColumnNumber() - 1) { + sb.append(columnSeparator); + } + } + return sb.toString(); + } +} diff --git a/selectdbwriter/src/main/java/com/alibaba/datax/plugin/writer/selectdbwriter/SelectdbJsonCodec.java b/selectdbwriter/src/main/java/com/alibaba/datax/plugin/writer/selectdbwriter/SelectdbJsonCodec.java new file mode 100644 index 00000000..8b1a3760 --- /dev/null +++ b/selectdbwriter/src/main/java/com/alibaba/datax/plugin/writer/selectdbwriter/SelectdbJsonCodec.java @@ -0,0 +1,33 @@ +package com.alibaba.datax.plugin.writer.selectdbwriter; + +import com.alibaba.datax.common.element.Record; +import com.alibaba.fastjson2.JSON; + +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +public class SelectdbJsonCodec extends SelectdbBaseCodec implements SelectdbCodec { + + private static final long serialVersionUID = 1L; + + private final List fieldNames; + + public SelectdbJsonCodec ( List fieldNames) { + this.fieldNames = fieldNames; + } + + @Override + public String codec( Record row) { + if (null == fieldNames) { + return ""; + } + Map rowMap = new HashMap<> (fieldNames.size()); + int idx = 0; + for (String fieldName : fieldNames) { + rowMap.put(fieldName, convertionField(row.getColumn(idx))); + idx++; + } + return JSON.toJSONString(rowMap); + } +} diff --git a/selectdbwriter/src/main/java/com/alibaba/datax/plugin/writer/selectdbwriter/SelectdbUtil.java b/selectdbwriter/src/main/java/com/alibaba/datax/plugin/writer/selectdbwriter/SelectdbUtil.java new file mode 100644 index 00000000..6cfcc8bf --- /dev/null +++ b/selectdbwriter/src/main/java/com/alibaba/datax/plugin/writer/selectdbwriter/SelectdbUtil.java @@ -0,0 +1,113 @@ +package com.alibaba.datax.plugin.writer.selectdbwriter; + +import com.alibaba.datax.plugin.rdbms.util.DBUtil; +import com.alibaba.datax.plugin.rdbms.util.DataBaseType; +import com.alibaba.datax.plugin.rdbms.util.RdbmsException; +import com.alibaba.datax.plugin.rdbms.writer.Constant; +import com.alibaba.druid.sql.parser.ParserException; +import com.google.common.base.Strings; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.sql.Connection; +import java.sql.ResultSet; +import java.sql.Statement; +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; + +/** + * jdbc util + */ +public class SelectdbUtil { + private static final Logger LOG = LoggerFactory.getLogger(SelectdbUtil.class); + + private SelectdbUtil() {} + + public static List getDorisTableColumns( Connection conn, String databaseName, String tableName) { + String currentSql = String.format("SELECT COLUMN_NAME FROM `information_schema`.`COLUMNS` WHERE `TABLE_SCHEMA` = '%s' AND `TABLE_NAME` = '%s' ORDER BY `ORDINAL_POSITION` ASC;", databaseName, tableName); + List columns = new ArrayList<> (); + ResultSet rs = null; + try { + rs = DBUtil.query(conn, currentSql); + while (DBUtil.asyncResultSetNext(rs)) { + String colName = rs.getString("COLUMN_NAME"); + columns.add(colName); + } + return columns; + } catch (Exception e) { + throw RdbmsException.asQueryException(DataBaseType.MySql, e, currentSql, null, null); + } finally { + DBUtil.closeDBResources(rs, null, null); + } + } + + public static List renderPreOrPostSqls(List preOrPostSqls, String tableName) { + if (null == preOrPostSqls) { + return Collections.emptyList(); + } + List renderedSqls = new ArrayList<>(); + for (String sql : preOrPostSqls) { + if (! Strings.isNullOrEmpty(sql)) { + renderedSqls.add(sql.replace(Constant.TABLE_NAME_PLACEHOLDER, tableName)); + } + } + return renderedSqls; + } + + public static void executeSqls(Connection conn, List sqls) { + Statement stmt = null; + String currentSql = null; + try { + stmt = conn.createStatement(); + for (String sql : sqls) { + currentSql = sql; + DBUtil.executeSqlWithoutResultSet(stmt, sql); + } + } catch (Exception e) { + throw RdbmsException.asQueryException(DataBaseType.MySql, e, currentSql, null, null); + } finally { + DBUtil.closeDBResources(null, stmt, null); + } + } + + public static void preCheckPrePareSQL( Keys options) { + String table = options.getTable(); + List preSqls = options.getPreSqlList(); + List renderedPreSqls = SelectdbUtil.renderPreOrPostSqls(preSqls, table); + if (null != renderedPreSqls && !renderedPreSqls.isEmpty()) { + LOG.info("Begin to preCheck preSqls:[{}].", String.join(";", renderedPreSqls)); + for (String sql : renderedPreSqls) { + try { + DBUtil.sqlValid(sql, DataBaseType.MySql); + } catch ( ParserException e) { + throw RdbmsException.asPreSQLParserException(DataBaseType.MySql,e,sql); + } + } + } + } + + public static void preCheckPostSQL( Keys options) { + String table = options.getTable(); + List postSqls = options.getPostSqlList(); + List renderedPostSqls = SelectdbUtil.renderPreOrPostSqls(postSqls, table); + if (null != renderedPostSqls && !renderedPostSqls.isEmpty()) { + LOG.info("Begin to preCheck postSqls:[{}].", String.join(";", renderedPostSqls)); + for(String sql : renderedPostSqls) { + try { + DBUtil.sqlValid(sql, DataBaseType.MySql); + } catch (ParserException e){ + throw RdbmsException.asPostSQLParserException(DataBaseType.MySql,e,sql); + } + } + } + } + + public static T checkNotNull(T reference) { + if (reference == null) { + throw new NullPointerException(); + } else { + return reference; + } + } +} diff --git a/selectdbwriter/src/main/java/com/alibaba/datax/plugin/writer/selectdbwriter/SelectdbWriter.java b/selectdbwriter/src/main/java/com/alibaba/datax/plugin/writer/selectdbwriter/SelectdbWriter.java new file mode 100644 index 00000000..2b91f122 --- /dev/null +++ b/selectdbwriter/src/main/java/com/alibaba/datax/plugin/writer/selectdbwriter/SelectdbWriter.java @@ -0,0 +1,149 @@ +package com.alibaba.datax.plugin.writer.selectdbwriter; + +import com.alibaba.datax.common.element.Record; +import com.alibaba.datax.common.exception.DataXException; +import com.alibaba.datax.common.plugin.RecordReceiver; +import com.alibaba.datax.common.spi.Writer; +import com.alibaba.datax.common.util.Configuration; +import com.alibaba.datax.plugin.rdbms.util.DBUtil; +import com.alibaba.datax.plugin.rdbms.util.DBUtilErrorCode; +import com.alibaba.datax.plugin.rdbms.util.DataBaseType; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.sql.Connection; +import java.util.ArrayList; +import java.util.List; + +/** + * doris data writer + */ +public class SelectdbWriter extends Writer { + + public static class Job extends Writer.Job { + + private static final Logger LOG = LoggerFactory.getLogger(Job.class); + private Configuration originalConfig = null; + private Keys options; + + @Override + public void init() { + this.originalConfig = super.getPluginJobConf(); + options = new Keys (super.getPluginJobConf()); + options.doPretreatment(); + } + + @Override + public void preCheck(){ + this.init(); + SelectdbUtil.preCheckPrePareSQL(options); + SelectdbUtil.preCheckPostSQL(options); + } + + @Override + public void prepare() { + String username = options.getUsername(); + String password = options.getPassword(); + String jdbcUrl = options.getJdbcUrl(); + List renderedPreSqls = SelectdbUtil.renderPreOrPostSqls(options.getPreSqlList(), options.getTable()); + if (null != renderedPreSqls && !renderedPreSqls.isEmpty()) { + Connection conn = DBUtil.getConnection(DataBaseType.MySql, jdbcUrl, username, password); + LOG.info("Begin to execute preSqls:[{}]. context info:{}.", String.join(";", renderedPreSqls), jdbcUrl); + SelectdbUtil.executeSqls(conn, renderedPreSqls); + DBUtil.closeDBResources(null, null, conn); + } + } + + @Override + public List split(int mandatoryNumber) { + List configurations = new ArrayList<>(mandatoryNumber); + for (int i = 0; i < mandatoryNumber; i++) { + configurations.add(originalConfig); + } + return configurations; + } + + @Override + public void post() { + String username = options.getUsername(); + String password = options.getPassword(); + String jdbcUrl = options.getJdbcUrl(); + List renderedPostSqls = SelectdbUtil.renderPreOrPostSqls(options.getPostSqlList(), options.getTable()); + if (null != renderedPostSqls && !renderedPostSqls.isEmpty()) { + Connection conn = DBUtil.getConnection(DataBaseType.MySql, jdbcUrl, username, password); + LOG.info("Start to execute preSqls:[{}]. context info:{}.", String.join(";", renderedPostSqls), jdbcUrl); + SelectdbUtil.executeSqls(conn, renderedPostSqls); + DBUtil.closeDBResources(null, null, conn); + } + } + + @Override + public void destroy() { + } + + } + + public static class Task extends Writer.Task { + private SelectdbWriterManager writerManager; + private Keys options; + private SelectdbCodec rowCodec; + + @Override + public void init() { + options = new Keys (super.getPluginJobConf()); + if (options.isWildcardColumn()) { + Connection conn = DBUtil.getConnection(DataBaseType.MySql, options.getJdbcUrl(), options.getUsername(), options.getPassword()); + List columns = SelectdbUtil.getDorisTableColumns(conn, options.getDatabase(), options.getTable()); + options.setInfoCchemaColumns(columns); + } + writerManager = new SelectdbWriterManager(options); + rowCodec = SelectdbCodecFactory.createCodec(options); + } + + @Override + public void prepare() { + } + + public void startWrite(RecordReceiver recordReceiver) { + try { + Record record; + while ((record = recordReceiver.getFromReader()) != null) { + if (record.getColumnNumber() != options.getColumns().size()) { + throw DataXException + .asDataXException( + DBUtilErrorCode.CONF_ERROR, + String.format( + "There is an error in the column configuration information. " + + "This is because you have configured a task where the number of fields to be read from the source:%s " + + "is not equal to the number of fields to be written to the destination table:%s. " + + "Please check your configuration and make changes.", + record.getColumnNumber(), + options.getColumns().size())); + } + writerManager.writeRecord(rowCodec.codec(record)); + } + } catch (Exception e) { + throw DataXException.asDataXException(DBUtilErrorCode.WRITE_DATA_ERROR, e); + } + } + + @Override + public void post() { + try { + writerManager.close(); + } catch (Exception e) { + throw DataXException.asDataXException(DBUtilErrorCode.WRITE_DATA_ERROR, e); + } + } + + @Override + public void destroy() {} + + @Override + public boolean supportFailOver(){ + return false; + } + } + + +} diff --git a/selectdbwriter/src/main/java/com/alibaba/datax/plugin/writer/selectdbwriter/SelectdbWriterException.java b/selectdbwriter/src/main/java/com/alibaba/datax/plugin/writer/selectdbwriter/SelectdbWriterException.java new file mode 100644 index 00000000..f85a06d1 --- /dev/null +++ b/selectdbwriter/src/main/java/com/alibaba/datax/plugin/writer/selectdbwriter/SelectdbWriterException.java @@ -0,0 +1,39 @@ +package com.alibaba.datax.plugin.writer.selectdbwriter; + + +public class SelectdbWriterException extends RuntimeException { + + private boolean reCreateLabel; + + + public SelectdbWriterException() { + super(); + } + + public SelectdbWriterException(String message) { + super(message); + } + + public SelectdbWriterException(String message, boolean reCreateLabel) { + super(message); + this.reCreateLabel = reCreateLabel; + } + + public SelectdbWriterException(String message, Throwable cause) { + super(message, cause); + } + + public SelectdbWriterException(Throwable cause) { + super(cause); + } + + protected SelectdbWriterException(String message, Throwable cause, + boolean enableSuppression, + boolean writableStackTrace) { + super(message, cause, enableSuppression, writableStackTrace); + } + + public boolean needReCreateLabel() { + return reCreateLabel; + } +} diff --git a/selectdbwriter/src/main/java/com/alibaba/datax/plugin/writer/selectdbwriter/SelectdbWriterManager.java b/selectdbwriter/src/main/java/com/alibaba/datax/plugin/writer/selectdbwriter/SelectdbWriterManager.java new file mode 100644 index 00000000..e8b22b7f --- /dev/null +++ b/selectdbwriter/src/main/java/com/alibaba/datax/plugin/writer/selectdbwriter/SelectdbWriterManager.java @@ -0,0 +1,196 @@ +package com.alibaba.datax.plugin.writer.selectdbwriter; + +import com.google.common.base.Strings; +import org.apache.commons.lang3.concurrent.BasicThreadFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.nio.charset.StandardCharsets; +import java.util.ArrayList; +import java.util.List; +import java.util.UUID; +import java.util.concurrent.Executors; +import java.util.concurrent.LinkedBlockingDeque; +import java.util.concurrent.ScheduledExecutorService; +import java.util.concurrent.ScheduledFuture; +import java.util.concurrent.TimeUnit; + +public class SelectdbWriterManager { + + private static final Logger LOG = LoggerFactory.getLogger(SelectdbWriterManager.class); + + private final SelectdbCopyIntoObserver visitor; + private final Keys options; + private final List buffer = new ArrayList<>(); + private int batchCount = 0; + private long batchSize = 0; + private volatile boolean closed = false; + private volatile Exception flushException; + private final LinkedBlockingDeque flushQueue; + private ScheduledExecutorService scheduler; + private ScheduledFuture scheduledFuture; + + public SelectdbWriterManager(Keys options) { + this.options = options; + this.visitor = new SelectdbCopyIntoObserver(options); + flushQueue = new LinkedBlockingDeque<>(options.getFlushQueueLength()); + this.startScheduler(); + this.startAsyncFlushing(); + } + + public void startScheduler() { + stopScheduler(); + this.scheduler = Executors.newScheduledThreadPool(1, new BasicThreadFactory.Builder().namingPattern("Doris-interval-flush").daemon(true).build()); + this.scheduledFuture = this.scheduler.schedule(() -> { + synchronized (SelectdbWriterManager.this) { + if (!closed) { + try { + String label = createBatchLabel(); + LOG.info(String.format("Selectdb interval Sinking triggered: label[%s].", label)); + if (batchCount == 0) { + startScheduler(); + } + flush(label, false); + } catch (Exception e) { + flushException = e; + } + } + } + }, options.getFlushInterval(), TimeUnit.MILLISECONDS); + } + + public void stopScheduler() { + if (this.scheduledFuture != null) { + scheduledFuture.cancel(false); + this.scheduler.shutdown(); + } + } + + public final synchronized void writeRecord(String record) throws IOException { + checkFlushException(); + try { + byte[] bts = record.getBytes(StandardCharsets.UTF_8); + buffer.add(bts); + batchCount++; + batchSize += bts.length; + if (batchCount >= options.getBatchRows() || batchSize >= options.getBatchSize()) { + String label = createBatchLabel(); + if(LOG.isDebugEnabled()){ + LOG.debug(String.format("buffer Sinking triggered: rows[%d] label [%s].", batchCount, label)); + } + flush(label, false); + } + } catch (Exception e) { + throw new SelectdbWriterException("Writing records to selectdb failed.", e); + } + } + + public synchronized void flush(String label, boolean waitUtilDone) throws Exception { + checkFlushException(); + if (batchCount == 0) { + if (waitUtilDone) { + waitAsyncFlushingDone(); + } + return; + } + flushQueue.put(new WriterTuple(label, batchSize, new ArrayList<>(buffer))); + if (waitUtilDone) { + // wait the last flush + waitAsyncFlushingDone(); + } + buffer.clear(); + batchCount = 0; + batchSize = 0; + } + + public synchronized void close() throws IOException { + if (!closed) { + closed = true; + try { + String label = createBatchLabel(); + if (batchCount > 0) { + if (LOG.isDebugEnabled()) { + LOG.debug(String.format("Selectdb Sink is about to close: label[%s].", label)); + } + } + flush(label, true); + } catch (Exception e) { + throw new RuntimeException("Writing records to Selectdb failed.", e); + } + } + checkFlushException(); + } + + public String createBatchLabel() { + StringBuilder sb = new StringBuilder(); + if (!Strings.isNullOrEmpty(options.getLabelPrefix())) { + sb.append(options.getLabelPrefix()); + } + return sb.append(UUID.randomUUID().toString()) + .toString(); + } + + private void startAsyncFlushing() { + // start flush thread + Thread flushThread = new Thread(new Runnable() { + public void run() { + while (true) { + try { + asyncFlush(); + } catch (Exception e) { + flushException = e; + } + } + } + }); + flushThread.setDaemon(true); + flushThread.start(); + } + + private void waitAsyncFlushingDone() throws InterruptedException { + // wait previous flushings + for (int i = 0; i <= options.getFlushQueueLength(); i++) { + flushQueue.put(new WriterTuple("", 0l, null)); + } + checkFlushException(); + } + + private void asyncFlush() throws Exception { + WriterTuple flushData = flushQueue.take(); + if (Strings.isNullOrEmpty(flushData.getLabel())) { + return; + } + stopScheduler(); + for (int i = 0; i <= options.getMaxRetries(); i++) { + try { + // copy into + visitor.streamLoad(flushData); + startScheduler(); + break; + } catch (Exception e) { + LOG.warn("Failed to flush batch data to selectdb, retry times = {}", i, e); + if (i >= options.getMaxRetries()) { + throw new RuntimeException(e); + } + if (e instanceof SelectdbWriterException && ((SelectdbWriterException)e).needReCreateLabel()) { + String newLabel = createBatchLabel(); + LOG.warn(String.format("Batch label changed from [%s] to [%s]", flushData.getLabel(), newLabel)); + flushData.setLabel(newLabel); + } + try { + Thread.sleep(1000l * Math.min(i + 1, 100)); + } catch (InterruptedException ex) { + Thread.currentThread().interrupt(); + throw new RuntimeException("Unable to flush, interrupted while doing another attempt", e); + } + } + } + } + + private void checkFlushException() { + if (flushException != null) { + throw new RuntimeException("Writing records to selectdb failed.", flushException); + } + } +} diff --git a/selectdbwriter/src/main/java/com/alibaba/datax/plugin/writer/selectdbwriter/WriterTuple.java b/selectdbwriter/src/main/java/com/alibaba/datax/plugin/writer/selectdbwriter/WriterTuple.java new file mode 100644 index 00000000..483ade05 --- /dev/null +++ b/selectdbwriter/src/main/java/com/alibaba/datax/plugin/writer/selectdbwriter/WriterTuple.java @@ -0,0 +1,22 @@ +package com.alibaba.datax.plugin.writer.selectdbwriter; + +import java.util.List; + +public class WriterTuple { + private String label; + private Long bytes; + private List rows; + + + public WriterTuple ( String label, Long bytes, List rows){ + this.label = label; + this.rows = rows; + this.bytes = bytes; + } + + public String getLabel() { return label; } + public void setLabel(String label) { this.label = label; } + public Long getBytes() { return bytes; } + public List getRows() { return rows; } + +} diff --git a/selectdbwriter/src/main/resources/plugin.json b/selectdbwriter/src/main/resources/plugin.json new file mode 100644 index 00000000..4b84a945 --- /dev/null +++ b/selectdbwriter/src/main/resources/plugin.json @@ -0,0 +1,6 @@ +{ + "name": "selectdbwriter", + "class": "com.alibaba.datax.plugin.writer.selectdbwriter.SelectdbWriter", + "description": "selectdb writer plugin", + "developer": "selectdb" +} \ No newline at end of file diff --git a/selectdbwriter/src/main/resources/plugin_job_template.json b/selectdbwriter/src/main/resources/plugin_job_template.json new file mode 100644 index 00000000..c603b7e0 --- /dev/null +++ b/selectdbwriter/src/main/resources/plugin_job_template.json @@ -0,0 +1,19 @@ +{ + "name": "selectdbwriter", + "parameter": { + "username": "", + "password": "", + "column": [], + "preSql": [], + "postSql": [], + "loadUrl": [], + "loadProps": {}, + "connection": [ + { + "jdbcUrl": "", + "selectedDatabase": "", + "table": [] + } + ] + } +} \ No newline at end of file diff --git a/sqlserverreader/pom.xml b/sqlserverreader/pom.xml index 5372a057..326f1ce5 100755 --- a/sqlserverreader/pom.xml +++ b/sqlserverreader/pom.xml @@ -31,10 +31,7 @@ com.microsoft.sqlserver sqljdbc4 4.0 - system - ${basedir}/src/main/lib/sqljdbc4-4.0.jar - com.alibaba.datax plugin-rdbms-util diff --git a/sqlserverreader/src/main/assembly/package.xml b/sqlserverreader/src/main/assembly/package.xml index 55fbdc0b..6180fbc0 100755 --- a/sqlserverreader/src/main/assembly/package.xml +++ b/sqlserverreader/src/main/assembly/package.xml @@ -16,13 +16,6 @@ plugin/reader/sqlserverreader - - src/main/lib - - sqljdbc4-4.0.jar - - plugin/reader/sqlserverreader/libs - target/ diff --git a/sqlserverreader/src/main/lib/sqljdbc4-4.0.jar b/sqlserverreader/src/main/lib/sqljdbc4-4.0.jar deleted file mode 100644 index d6b7f6da..00000000 Binary files a/sqlserverreader/src/main/lib/sqljdbc4-4.0.jar and /dev/null differ diff --git a/sqlserverwriter/doc/sqlserverwriter.md b/sqlserverwriter/doc/sqlserverwriter.md index 255834c6..7d786292 100644 --- a/sqlserverwriter/doc/sqlserverwriter.md +++ b/sqlserverwriter/doc/sqlserverwriter.md @@ -69,6 +69,7 @@ SqlServerWriter 通过 DataX 框架获取 Reader 生成的协议数据,根据 "jdbcUrl": "jdbc:sqlserver://[HOST_NAME]:PORT;DatabaseName=[DATABASE_NAME]" } ], + "session": ["SET IDENTITY_INSERT TABLE_NAME ON"], "preSql": [ "delete from @table where db_id = -1;" ], @@ -127,7 +128,7 @@ SqlServerWriter 通过 DataX 框架获取 Reader 生成的协议数据,根据 * **column** - * 描述:目的表需要写入数据的字段,字段之间用英文逗号分隔。例如: "column": ["id","name","age"]。如果要依次写入全部列,使用*表示, 例如: "column": ["*"] + * 描述:目的表需要写入数据的字段,字段之间用英文逗号分隔。例如: "column": ["id","name","age"]。如果要依次写入全部列,使用*表示, 例如: "column": ["\*"] **column配置项必须指定,不能留空!** @@ -139,6 +140,14 @@ SqlServerWriter 通过 DataX 框架获取 Reader 生成的协议数据,根据 * 默认值:否
+* **session** + + * 描述:DataX在获取 seqlserver 连接时,执行session指定的SQL语句,修改当前connection session属性
+ + * 必选:否
+ + * 默认值:无
+ * **preSql** * 描述:写入数据到目的表前,会先执行这里的标准语句。如果 Sql 中有你需要操作到的表名称,请使用 `@table` 表示,这样在实际执行 Sql 语句时,会对变量按照实际表名称进行替换。
diff --git a/sqlserverwriter/pom.xml b/sqlserverwriter/pom.xml index d2b1eea1..6f52c14c 100644 --- a/sqlserverwriter/pom.xml +++ b/sqlserverwriter/pom.xml @@ -35,8 +35,6 @@ com.microsoft.sqlserver sqljdbc4 4.0 - system - ${basedir}/src/main/lib/sqljdbc4-4.0.jar
com.alibaba.datax diff --git a/sqlserverwriter/src/main/assembly/package.xml b/sqlserverwriter/src/main/assembly/package.xml index 761dffcd..f8f26298 100755 --- a/sqlserverwriter/src/main/assembly/package.xml +++ b/sqlserverwriter/src/main/assembly/package.xml @@ -16,13 +16,6 @@ plugin/writer/sqlserverwriter - - src/main/lib - - sqljdbc4-4.0.jar - - plugin/writer/sqlserverwriter/libs - target/ diff --git a/sqlserverwriter/src/main/lib/sqljdbc4-4.0.jar b/sqlserverwriter/src/main/lib/sqljdbc4-4.0.jar deleted file mode 100644 index d6b7f6da..00000000 Binary files a/sqlserverwriter/src/main/lib/sqljdbc4-4.0.jar and /dev/null differ diff --git a/starrocksreader/pom.xml b/starrocksreader/pom.xml new file mode 100644 index 00000000..a8b049ea --- /dev/null +++ b/starrocksreader/pom.xml @@ -0,0 +1,95 @@ + + + + datax-all + com.alibaba.datax + 0.0.1-SNAPSHOT + + 4.0.0 + starrocksreader + starrocksreader + jar + + + 8 + 8 + + + + + com.alibaba.datax + datax-common + ${datax-project-version} + + + slf4j-log4j12 + org.slf4j + + + + + org.slf4j + slf4j-api + + + ch.qos.logback + logback-classic + + + + com.alibaba.datax + plugin-rdbms-util + ${datax-project-version} + + + + mysql + mysql-connector-java + 5.1.46 + + + + + + + + src/main/java + + **/*.properties + + + + + + + maven-compiler-plugin + + ${jdk-version} + ${jdk-version} + ${project-sourceEncoding} + + + + + maven-assembly-plugin + + + src/main/assembly/package.xml + + datax + + + + dwzip + package + + single + + + + + + + \ No newline at end of file diff --git a/starrocksreader/src/main/assembly/package.xml b/starrocksreader/src/main/assembly/package.xml new file mode 100644 index 00000000..c126c107 --- /dev/null +++ b/starrocksreader/src/main/assembly/package.xml @@ -0,0 +1,35 @@ + + + + dir + + false + + + src/main/resources + + plugin.json + plugin_job_template.json + + plugin/reader/starrocksreader + + + target/ + + starrocksreader-0.0.1-SNAPSHOT.jar + + plugin/reader/starrocksreader + + + + + + false + plugin/reader/starrocksreader/libs + runtime + + + diff --git a/starrocksreader/src/main/java/com/alibaba/datax/plugin/reader/starrocksreader/StarRocksReader.java b/starrocksreader/src/main/java/com/alibaba/datax/plugin/reader/starrocksreader/StarRocksReader.java new file mode 100644 index 00000000..d4bf3437 --- /dev/null +++ b/starrocksreader/src/main/java/com/alibaba/datax/plugin/reader/starrocksreader/StarRocksReader.java @@ -0,0 +1,116 @@ +package com.alibaba.datax.plugin.reader.starrocksreader; + +import java.util.List; + +import com.alibaba.datax.common.plugin.RecordSender; +import com.alibaba.datax.common.spi.Reader; +import com.alibaba.datax.common.util.Configuration; +import com.alibaba.datax.plugin.rdbms.reader.CommonRdbmsReader; +import com.alibaba.datax.plugin.rdbms.reader.Constant; +import com.alibaba.datax.plugin.rdbms.reader.Key; +import com.alibaba.datax.plugin.rdbms.util.DataBaseType; + +import org.apache.commons.lang3.StringUtils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + + +public class StarRocksReader extends Reader { + + private static final DataBaseType DATABASE_TYPE = DataBaseType.StarRocks; + + public static class Job extends Reader.Job { + private static final Logger LOG = LoggerFactory + .getLogger(Job.class); + + private Configuration originalConfig = null; + private CommonRdbmsReader.Job commonRdbmsReaderJob; + + @Override + public void init() { + this.originalConfig = super.getPluginJobConf(); + int fetchSize = this.originalConfig.getInt(Constant.FETCH_SIZE, + Integer.MIN_VALUE); + this.originalConfig.set(Constant.FETCH_SIZE, fetchSize); + + this.commonRdbmsReaderJob = new CommonRdbmsReader.Job(DATABASE_TYPE); + this.commonRdbmsReaderJob.init(this.originalConfig); + } + + @Override + public void preCheck(){ + init(); + this.commonRdbmsReaderJob.preCheck(this.originalConfig,DATABASE_TYPE); + + } + + @Override + public void prepare() { + } + + @Override + public List split(int adviceNumber) { + LOG.info("split() begin..."); + List splitResult = this.commonRdbmsReaderJob.split(this.originalConfig, adviceNumber); + /** + * 在日志中告知用户,为什么实际datax切分跑的channel数会小于用户配置的channel数 + */ + if(splitResult.size() < adviceNumber){ + // 如果用户没有配置切分主键splitPk + if(StringUtils.isBlank(this.originalConfig.getString(Key.SPLIT_PK, null))){ + LOG.info("User has not configured splitPk."); + }else{ + // 用户配置了切分主键,但是切分主键可能重复太多,或者要同步的表的记录太少,无法切分成adviceNumber个task + LOG.info("User has configured splitPk. But the number of task finally split is smaller than that user has configured. " + + "The possible reasons are: 1) too many repeated splitPk values, 2) too few records."); + } + } + LOG.info("split() ok and end..."); + return splitResult; + } + + @Override + public void post() { + this.commonRdbmsReaderJob.post(this.originalConfig); + } + + @Override + public void destroy() { + this.commonRdbmsReaderJob.destroy(this.originalConfig); + } + + } + + public static class Task extends Reader.Task { + + private Configuration readerSliceConfig; + private CommonRdbmsReader.Task commonRdbmsReaderTask; + + @Override + public void init() { + this.readerSliceConfig = super.getPluginJobConf(); + this.commonRdbmsReaderTask = new CommonRdbmsReader.Task(DATABASE_TYPE, super.getTaskGroupId(), super.getTaskId()); + this.commonRdbmsReaderTask.init(this.readerSliceConfig); + + } + + @Override + public void startRead(RecordSender recordSender) { + int fetchSize = this.readerSliceConfig.getInt(Constant.FETCH_SIZE); + + this.commonRdbmsReaderTask.startRead(this.readerSliceConfig, recordSender, + super.getTaskPluginCollector(), fetchSize); + } + + @Override + public void post() { + this.commonRdbmsReaderTask.post(this.readerSliceConfig); + } + + @Override + public void destroy() { + this.commonRdbmsReaderTask.destroy(this.readerSliceConfig); + } + + } +} diff --git a/starrocksreader/src/main/resources/plugin.json b/starrocksreader/src/main/resources/plugin.json new file mode 100644 index 00000000..b0d6e039 --- /dev/null +++ b/starrocksreader/src/main/resources/plugin.json @@ -0,0 +1,6 @@ +{ + "name": "starrocksreader", + "class": "com.alibaba.datax.plugin.reader.starrocksreader.StarRocksReader", + "description": "useScene: prod. mechanism: Jdbc connection using the database, execute select sql, retrieve data from the ResultSet. warn: The more you know about the database, the less problems you encounter.", + "developer": "alibaba" +} \ No newline at end of file diff --git a/starrockswriter/doc/starrockswriter.md b/starrockswriter/doc/starrockswriter.md new file mode 100644 index 00000000..6ebe3681 --- /dev/null +++ b/starrockswriter/doc/starrockswriter.md @@ -0,0 +1,222 @@ +# DataX StarRocksWriter + + +--- + + +## 1 快速介绍 + +StarRocksWriter 插件实现了写入数据到 StarRocks 主库的目的表的功能。在底层实现上, StarRocksWriter 通过Streamload以csv格式导入数据至StarRocks。 + + +## 2 实现原理 + + StarRocksWriter 通过Streamload以csv格式导入数据至StarRocks, 内部将`reader`读取的数据进行缓存后批量导入至StarRocks,以提高写入性能。 + + +## 3 功能说明 + +### 3.1 配置样例 + +* 这里使用一份从内存Mysql读取数据后导入至StarRocks。 + +```json +{ + "job": { + "setting": { + "speed": { + "channel": 1 + }, + "errorLimit": { + "record": 0, + "percentage": 0 + } + }, + "content": [ + { + "reader": { + "name": "mysqlreader", + "parameter": { + "username": "xxxx", + "password": "xxxx", + "column": [ "k1", "k2", "v1", "v2" ], + "connection": [ + { + "table": [ "table1", "table2" ], + "jdbcUrl": [ + "jdbc:mysql://127.0.0.1:3306/datax_test1" + ] + }, + { + "table": [ "table3", "table4" ], + "jdbcUrl": [ + "jdbc:mysql://127.0.0.1:3306/datax_test2" + ] + } + ] + } + }, + "writer": { + "name": "starrockswriter", + "parameter": { + "username": "xxxx", + "password": "xxxx", + "column": ["k1", "k2", "v1", "v2"], + "preSql": [], + "postSql": [], + "connection": [ + { + "table": ["xxx"], + "jdbcUrl": "jdbc:mysql://172.28.17.100:9030/", + "selectedDatabase": "xxxx" + } + ], + "loadUrl": ["172.28.17.100:8030", "172.28.17.100:8030"], + "loadProps": {} + } + } + } + ] + } +} + +``` + + +### 3.2 参数说明 + +* **username** + + * 描述:StarRocks数据库的用户名
+ + * 必选:是
+ + * 默认值:无
+ +* **password** + + * 描述:StarRocks数据库的密码
+ + * 必选:是
+ + * 默认值:无
+ +* **selectedDatabase** + + * 描述:StarRocks表的数据库名称。 + + * 必选:是
+ + * 默认值:无
+ +* **table** + + * 描述:StarRocks表的表名称。 + + * 必选:是
+ + * 默认值:无
+ +* **loadUrl** + + * 描述:StarRocks FE的地址用于Streamload,可以为多个fe地址,`fe_ip:fe_http_port`。 + + * 必选:是
+ + * 默认值:无
+ +* **column** + + * 描述:目的表需要写入数据的字段,字段之间用英文逗号分隔。例如: "column": ["id","name","age"]。 + + **column配置项必须指定,不能留空!** + + 注意:我们强烈不推荐你这样配置,因为当你目的表字段个数、类型等有改动时,你的任务可能运行不正确或者失败 + + * 必选:是
+ + * 默认值:否
+ +* **preSql** + + * 描述:写入数据到目的表前,会先执行这里的标准语句。
+ + * 必选:否
+ + * 默认值:无
+ +* **postSql** + + * 描述:写入数据到目的表后,会执行这里的标准语句。
+ + * 必选:否
+ + * 默认值:无
+ +* **jdbcUrl** + + * 描述:目的数据库的 JDBC 连接信息,用于执行`preSql`及`postSql`。
+ + * 必选:否
+ + * 默认值:无
+ +* **maxBatchRows** + + * 描述:单次StreamLoad导入的最大行数
+ + * 必选:否
+ + * 默认值:500000 (50W)
+ +* **maxBatchSize** + + * 描述:单次StreamLoad导入的最大字节数。
+ + * 必选:否
+ + * 默认值:104857600 (100M) + +* **flushInterval** + + * 描述:上一次StreamLoad结束至下一次开始的时间间隔(单位:ms)。
+ + * 必选:否
+ + * 默认值:300000 (ms) + +* **loadProps** + + * 描述:StreamLoad 的请求参数,详情参照StreamLoad介绍页面。
+ + * 必选:否
+ + * 默认值:无
+ + +### 3.3 类型转换 + +默认传入的数据均会被转为字符串,并以`\t`作为列分隔符,`\n`作为行分隔符,组成`csv`文件进行StreamLoad导入操作。 +如需更改列分隔符, 则正确配置 `loadProps` 即可: +```json +"loadProps": { + "column_separator": "\\x01", + "row_delimiter": "\\x02" +} +``` + +如需更改导入格式为`json`, 则正确配置 `loadProps` 即可: +```json +"loadProps": { + "format": "json", + "strip_outer_array": true +} +``` + +## 4 性能报告 + + +## 5 约束限制 + + +## FAQ diff --git a/starrockswriter/pom.xml b/starrockswriter/pom.xml new file mode 100755 index 00000000..73a51422 --- /dev/null +++ b/starrockswriter/pom.xml @@ -0,0 +1,155 @@ + + 4.0.0 + + com.alibaba.datax + datax-all + 0.0.1-SNAPSHOT + + starrockswriter + starrockswriter + 1.1.0 + jar + + + + com.alibaba.datax + datax-common + ${datax-project-version} + + + slf4j-log4j12 + org.slf4j + + + + + org.slf4j + slf4j-api + + + ch.qos.logback + logback-classic + + + com.alibaba.datax + plugin-rdbms-util + ${datax-project-version} + + + commons-codec + commons-codec + 1.9 + + + org.apache.commons + commons-lang3 + 3.12.0 + + + commons-logging + commons-logging + 1.1.1 + + + org.apache.httpcomponents + httpcore + 4.4.6 + + + org.apache.httpcomponents + httpclient + 4.5.3 + + + com.alibaba.fastjson2 + fastjson2 + + + mysql + mysql-connector-java + 5.1.46 + + + + + + + + maven-compiler-plugin + + ${jdk-version} + ${jdk-version} + ${project-sourceEncoding} + + + + org.apache.maven.plugins + maven-shade-plugin + 3.0.0 + + + + package + + shade + + + true + + + org.apache.http + com.starrocks.shade.org.apache.http + + + org.apache.commons + com.starrocks.shade.org.apache.commons + + + + + org.apache.commons:commons-lang3 + commons-codec:commons-codec + commons-logging:* + org.apache.httpcomponents:httpclient + org.apache.httpcomponents:httpcore + + + + + + *:* + + META-INF/*.SF + META-INF/*.DSA + META-INF/*.RSA + + + + + + + + + + maven-assembly-plugin + + + src/main/assembly/package.xml + + datax + + + + dwzip + package + + single + + + + + + + diff --git a/starrockswriter/src/main/assembly/package.xml b/starrockswriter/src/main/assembly/package.xml new file mode 100755 index 00000000..c63845b4 --- /dev/null +++ b/starrockswriter/src/main/assembly/package.xml @@ -0,0 +1,35 @@ + + + + dir + + false + + + src/main/resources + + plugin.json + plugin_job_template.json + + plugin/writer/starrockswriter + + + target/ + + starrockswriter-1.1.0.jar + + plugin/writer/starrockswriter + + + + + + false + plugin/writer/starrockswriter/libs + runtime + + + diff --git a/starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/starrockswriter/StarRocksWriter.java b/starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/starrockswriter/StarRocksWriter.java new file mode 100755 index 00000000..75b2df3a --- /dev/null +++ b/starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/starrockswriter/StarRocksWriter.java @@ -0,0 +1,151 @@ +package com.starrocks.connector.datax.plugin.writer.starrockswriter; + +import com.alibaba.datax.common.element.Record; +import com.alibaba.datax.common.exception.DataXException; +import com.alibaba.datax.common.plugin.RecordReceiver; +import com.alibaba.datax.common.spi.Writer; +import com.alibaba.datax.common.util.Configuration; +import com.alibaba.datax.plugin.rdbms.util.DBUtil; +import com.alibaba.datax.plugin.rdbms.util.DBUtilErrorCode; +import com.alibaba.datax.plugin.rdbms.util.DataBaseType; +import com.starrocks.connector.datax.plugin.writer.starrockswriter.manager.StarRocksWriterManager; +import com.starrocks.connector.datax.plugin.writer.starrockswriter.row.StarRocksISerializer; +import com.starrocks.connector.datax.plugin.writer.starrockswriter.row.StarRocksSerializerFactory; +import com.starrocks.connector.datax.plugin.writer.starrockswriter.util.StarRocksWriterUtil; + +import org.apache.commons.lang3.StringUtils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.sql.Connection; +import java.util.ArrayList; +import java.util.List; + +public class StarRocksWriter extends Writer { + + public static class Job extends Writer.Job { + + private static final Logger LOG = LoggerFactory.getLogger(Job.class); + private Configuration originalConfig = null; + private StarRocksWriterOptions options; + + @Override + public void init() { + this.originalConfig = super.getPluginJobConf(); + String selectedDatabase = super.getPluginJobConf().getString(StarRocksWriterOptions.KEY_SELECTED_DATABASE); + if(StringUtils.isBlank(this.originalConfig.getString(StarRocksWriterOptions.KEY_DATABASE)) && StringUtils.isNotBlank(selectedDatabase)){ + this.originalConfig.set(StarRocksWriterOptions.KEY_DATABASE, selectedDatabase); + } + options = new StarRocksWriterOptions(super.getPluginJobConf()); + options.doPretreatment(); + } + + @Override + public void preCheck(){ + this.init(); + StarRocksWriterUtil.preCheckPrePareSQL(options); + StarRocksWriterUtil.preCheckPostSQL(options); + } + + @Override + public void prepare() { + String username = options.getUsername(); + String password = options.getPassword(); + String jdbcUrl = options.getJdbcUrl(); + List renderedPreSqls = StarRocksWriterUtil.renderPreOrPostSqls(options.getPreSqlList(), options.getTable()); + if (null != renderedPreSqls && !renderedPreSqls.isEmpty()) { + Connection conn = DBUtil.getConnection(DataBaseType.MySql, jdbcUrl, username, password); + LOG.info("Begin to execute preSqls:[{}]. context info:{}.", String.join(";", renderedPreSqls), jdbcUrl); + StarRocksWriterUtil.executeSqls(conn, renderedPreSqls); + DBUtil.closeDBResources(null, null, conn); + } + } + + @Override + public List split(int mandatoryNumber) { + List configurations = new ArrayList<>(mandatoryNumber); + for (int i = 0; i < mandatoryNumber; i++) { + configurations.add(originalConfig); + } + return configurations; + } + + @Override + public void post() { + String username = options.getUsername(); + String password = options.getPassword(); + String jdbcUrl = options.getJdbcUrl(); + List renderedPostSqls = StarRocksWriterUtil.renderPreOrPostSqls(options.getPostSqlList(), options.getTable()); + if (null != renderedPostSqls && !renderedPostSqls.isEmpty()) { + Connection conn = DBUtil.getConnection(DataBaseType.MySql, jdbcUrl, username, password); + LOG.info("Begin to execute preSqls:[{}]. context info:{}.", String.join(";", renderedPostSqls), jdbcUrl); + StarRocksWriterUtil.executeSqls(conn, renderedPostSqls); + DBUtil.closeDBResources(null, null, conn); + } + } + + @Override + public void destroy() { + } + + } + + public static class Task extends Writer.Task { + private StarRocksWriterManager writerManager; + private StarRocksWriterOptions options; + private StarRocksISerializer rowSerializer; + + @Override + public void init() { + options = new StarRocksWriterOptions(super.getPluginJobConf()); + if (options.isWildcardColumn()) { + Connection conn = DBUtil.getConnection(DataBaseType.MySql, options.getJdbcUrl(), options.getUsername(), options.getPassword()); + List columns = StarRocksWriterUtil.getStarRocksColumns(conn, options.getDatabase(), options.getTable()); + options.setInfoCchemaColumns(columns); + } + writerManager = new StarRocksWriterManager(options); + rowSerializer = StarRocksSerializerFactory.createSerializer(options); + } + + @Override + public void prepare() { + } + + public void startWrite(RecordReceiver recordReceiver) { + try { + Record record; + while ((record = recordReceiver.getFromReader()) != null) { + if (record.getColumnNumber() != options.getColumns().size()) { + throw DataXException + .asDataXException( + DBUtilErrorCode.CONF_ERROR, + String.format( + "Column configuration error. The number of reader columns %d and the number of writer columns %d are not equal.", + record.getColumnNumber(), + options.getColumns().size())); + } + writerManager.writeRecord(rowSerializer.serialize(record)); + } + } catch (Exception e) { + throw DataXException.asDataXException(DBUtilErrorCode.WRITE_DATA_ERROR, e); + } + } + + @Override + public void post() { + try { + writerManager.close(); + } catch (Exception e) { + throw DataXException.asDataXException(DBUtilErrorCode.WRITE_DATA_ERROR, e); + } + } + + @Override + public void destroy() {} + + @Override + public boolean supportFailOver(){ + return false; + } + } +} diff --git a/starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/starrockswriter/StarRocksWriterOptions.java b/starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/starrockswriter/StarRocksWriterOptions.java new file mode 100644 index 00000000..5c6ddacd --- /dev/null +++ b/starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/starrockswriter/StarRocksWriterOptions.java @@ -0,0 +1,199 @@ +package com.starrocks.connector.datax.plugin.writer.starrockswriter; + +import java.io.Serializable; + +import com.alibaba.datax.common.exception.DataXException; +import com.alibaba.datax.common.util.Configuration; +import com.alibaba.datax.plugin.rdbms.util.DBUtilErrorCode; +import org.apache.commons.lang3.StringUtils; + +import java.util.List; +import java.util.Map; +import java.util.stream.Collectors; + +public class StarRocksWriterOptions implements Serializable { + + private static final long serialVersionUID = 1l; + private static final long KILO_BYTES_SCALE = 1024l; + private static final long MEGA_BYTES_SCALE = KILO_BYTES_SCALE * KILO_BYTES_SCALE; + private static final int MAX_RETRIES = 1; + private static final int BATCH_ROWS = 500000; + private static final long BATCH_BYTES = 5 * MEGA_BYTES_SCALE; + private static final long FLUSH_INTERVAL = 300000; + + private static final String KEY_LOAD_PROPS_FORMAT = "format"; + public enum StreamLoadFormat { + CSV, JSON; + } + + public static final String KEY_USERNAME = "username"; + public static final String KEY_PASSWORD = "password"; + public static final String KEY_DATABASE = "database"; + public static final String KEY_SELECTED_DATABASE = "selectedDatabase"; + public static final String KEY_TABLE = "table"; + public static final String KEY_COLUMN = "column"; + public static final String KEY_PRE_SQL = "preSql"; + public static final String KEY_POST_SQL = "postSql"; + public static final String KEY_JDBC_URL = "jdbcUrl"; + public static final String KEY_LABEL_PREFIX = "labelPrefix"; + public static final String KEY_MAX_BATCH_ROWS = "maxBatchRows"; + public static final String KEY_MAX_BATCH_SIZE = "maxBatchSize"; + public static final String KEY_FLUSH_INTERVAL = "flushInterval"; + public static final String KEY_LOAD_URL = "loadUrl"; + public static final String KEY_FLUSH_QUEUE_LENGTH = "flushQueueLength"; + public static final String KEY_LOAD_PROPS = "loadProps"; + public static final String CONNECTION_JDBC_URL = "connection[0].jdbcUrl"; + public static final String CONNECTION_TABLE_NAME = "connection[0].table[0]"; + public static final String CONNECTION_SELECTED_DATABASE = "connection[0].selectedDatabase"; + + private final Configuration options; + private List infoCchemaColumns; + private List userSetColumns; + private boolean isWildcardColumn; + + public StarRocksWriterOptions(Configuration options) { + this.options = options; + // database + String database = this.options.getString(CONNECTION_SELECTED_DATABASE); + if (StringUtils.isBlank(database)) { + database = this.options.getString(KEY_SELECTED_DATABASE); + } + if (StringUtils.isNotBlank(database)) { + this.options.set(KEY_DATABASE, database); + } + // jdbcUrl + String jdbcUrl = this.options.getString(CONNECTION_JDBC_URL); + if (StringUtils.isNotBlank(jdbcUrl)) { + this.options.set(KEY_JDBC_URL, jdbcUrl); + } + // table + String table = this.options.getString(CONNECTION_TABLE_NAME); + if (StringUtils.isNotBlank(table)) { + this.options.set(KEY_TABLE, table); + } + // column + this.userSetColumns = options.getList(KEY_COLUMN, String.class).stream().map(str -> str.replace("`", "")).collect(Collectors.toList()); + if (1 == options.getList(KEY_COLUMN, String.class).size() && "*".trim().equals(options.getList(KEY_COLUMN, String.class).get(0))) { + this.isWildcardColumn = true; + } + } + + public void doPretreatment() { + validateRequired(); + validateStreamLoadUrl(); + } + + public String getJdbcUrl() { + return options.getString(KEY_JDBC_URL); + } + + public String getDatabase() { + return options.getString(KEY_DATABASE); + } + + public String getTable() { + return options.getString(KEY_TABLE); + } + + public String getUsername() { + return options.getString(KEY_USERNAME); + } + + public String getPassword() { + return options.getString(KEY_PASSWORD); + } + + public String getLabelPrefix() { + return options.getString(KEY_LABEL_PREFIX); + } + + public List getLoadUrlList() { + return options.getList(KEY_LOAD_URL, String.class); + } + + public List getColumns() { + if (isWildcardColumn) { + return this.infoCchemaColumns; + } + return this.userSetColumns; + } + + public boolean isWildcardColumn() { + return this.isWildcardColumn; + } + + public void setInfoCchemaColumns(List cols) { + this.infoCchemaColumns = cols; + } + + public List getPreSqlList() { + return options.getList(KEY_PRE_SQL, String.class); + } + + public List getPostSqlList() { + return options.getList(KEY_POST_SQL, String.class); + } + + public Map getLoadProps() { + return options.getMap(KEY_LOAD_PROPS); + } + + public int getMaxRetries() { + return MAX_RETRIES; + } + + public int getBatchRows() { + Integer rows = options.getInt(KEY_MAX_BATCH_ROWS); + return null == rows ? BATCH_ROWS : rows; + } + + public long getBatchSize() { + Long size = options.getLong(KEY_MAX_BATCH_SIZE); + return null == size ? BATCH_BYTES : size; + } + + public long getFlushInterval() { + Long interval = options.getLong(KEY_FLUSH_INTERVAL); + return null == interval ? FLUSH_INTERVAL : interval; + } + + public int getFlushQueueLength() { + Integer len = options.getInt(KEY_FLUSH_QUEUE_LENGTH); + return null == len ? 1 : len; + } + + public StreamLoadFormat getStreamLoadFormat() { + Map loadProps = getLoadProps(); + if (null == loadProps) { + return StreamLoadFormat.CSV; + } + if (loadProps.containsKey(KEY_LOAD_PROPS_FORMAT) + && StreamLoadFormat.JSON.name().equalsIgnoreCase(String.valueOf(loadProps.get(KEY_LOAD_PROPS_FORMAT)))) { + return StreamLoadFormat.JSON; + } + return StreamLoadFormat.CSV; + } + + private void validateStreamLoadUrl() { + List urlList = getLoadUrlList(); + for (String host : urlList) { + if (host.split(":").length < 2) { + throw DataXException.asDataXException(DBUtilErrorCode.CONF_ERROR, + "The format of loadUrl is illegal, please input `fe_ip:fe_http_ip;fe_ip:fe_http_ip`."); + } + } + } + + private void validateRequired() { + final String[] requiredOptionKeys = new String[]{ + KEY_USERNAME, + KEY_DATABASE, + KEY_TABLE, + KEY_COLUMN, + KEY_LOAD_URL + }; + for (String optionKey : requiredOptionKeys) { + options.getNecessaryValue(optionKey, DBUtilErrorCode.REQUIRED_VALUE); + } + } +} diff --git a/starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/starrockswriter/manager/StarRocksFlushTuple.java b/starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/starrockswriter/manager/StarRocksFlushTuple.java new file mode 100644 index 00000000..5c939f9b --- /dev/null +++ b/starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/starrockswriter/manager/StarRocksFlushTuple.java @@ -0,0 +1,21 @@ +package com.starrocks.connector.datax.plugin.writer.starrockswriter.manager; + +import java.util.List; + +public class StarRocksFlushTuple { + + private String label; + private Long bytes; + private List rows; + + public StarRocksFlushTuple(String label, Long bytes, List rows) { + this.label = label; + this.bytes = bytes; + this.rows = rows; + } + + public String getLabel() { return label; } + public void setLabel(String label) { this.label = label; } + public Long getBytes() { return bytes; } + public List getRows() { return rows; } +} \ No newline at end of file diff --git a/starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/starrockswriter/manager/StarRocksStreamLoadFailedException.java b/starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/starrockswriter/manager/StarRocksStreamLoadFailedException.java new file mode 100644 index 00000000..4eb47048 --- /dev/null +++ b/starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/starrockswriter/manager/StarRocksStreamLoadFailedException.java @@ -0,0 +1,33 @@ +package com.starrocks.connector.datax.plugin.writer.starrockswriter.manager; + +import java.io.IOException; +import java.util.Map; + + +public class StarRocksStreamLoadFailedException extends IOException { + + static final long serialVersionUID = 1L; + + private final Map response; + private boolean reCreateLabel; + + public StarRocksStreamLoadFailedException(String message, Map response) { + super(message); + this.response = response; + } + + public StarRocksStreamLoadFailedException(String message, Map response, boolean reCreateLabel) { + super(message); + this.response = response; + this.reCreateLabel = reCreateLabel; + } + + public Map getFailedResponse() { + return response; + } + + public boolean needReCreateLabel() { + return reCreateLabel; + } + +} diff --git a/starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/starrockswriter/manager/StarRocksStreamLoadVisitor.java b/starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/starrockswriter/manager/StarRocksStreamLoadVisitor.java new file mode 100644 index 00000000..b3671556 --- /dev/null +++ b/starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/starrockswriter/manager/StarRocksStreamLoadVisitor.java @@ -0,0 +1,304 @@ +package com.starrocks.connector.datax.plugin.writer.starrockswriter.manager; + +import java.io.IOException; +import java.net.HttpURLConnection; +import java.net.URL; +import java.nio.ByteBuffer; +import java.nio.charset.StandardCharsets; + +import com.alibaba.fastjson2.JSON; +import com.starrocks.connector.datax.plugin.writer.starrockswriter.StarRocksWriterOptions; +import com.starrocks.connector.datax.plugin.writer.starrockswriter.row.StarRocksDelimiterParser; + +import org.apache.commons.codec.binary.Base64; +import org.apache.http.HttpEntity; +import org.apache.http.client.config.RequestConfig; +import org.apache.http.client.methods.CloseableHttpResponse; +import org.apache.http.client.methods.HttpGet; +import org.apache.http.client.methods.HttpPut; +import org.apache.http.entity.ByteArrayEntity; +import org.apache.http.impl.client.CloseableHttpClient; +import org.apache.http.impl.client.DefaultRedirectStrategy; +import org.apache.http.impl.client.HttpClientBuilder; +import org.apache.http.impl.client.HttpClients; +import org.apache.http.util.EntityUtils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.concurrent.TimeUnit; +import java.util.stream.Collectors; + + +public class StarRocksStreamLoadVisitor { + + private static final Logger LOG = LoggerFactory.getLogger(StarRocksStreamLoadVisitor.class); + + private final StarRocksWriterOptions writerOptions; + private long pos; + private static final String RESULT_FAILED = "Fail"; + private static final String RESULT_LABEL_EXISTED = "Label Already Exists"; + private static final String LAEBL_STATE_VISIBLE = "VISIBLE"; + private static final String LAEBL_STATE_COMMITTED = "COMMITTED"; + private static final String RESULT_LABEL_PREPARE = "PREPARE"; + private static final String RESULT_LABEL_ABORTED = "ABORTED"; + private static final String RESULT_LABEL_UNKNOWN = "UNKNOWN"; + + public StarRocksStreamLoadVisitor(StarRocksWriterOptions writerOptions) { + this.writerOptions = writerOptions; + } + + public void doStreamLoad(StarRocksFlushTuple flushData) throws IOException { + String host = getAvailableHost(); + if (null == host) { + throw new IOException("None of the host in `load_url` could be connected."); + } + String loadUrl = new StringBuilder(host) + .append("/api/") + .append(writerOptions.getDatabase()) + .append("/") + .append(writerOptions.getTable()) + .append("/_stream_load") + .toString(); + if (LOG.isDebugEnabled()) { + LOG.debug(String.format("Start to join batch data: rows[%d] bytes[%d] label[%s].", flushData.getRows().size(), flushData.getBytes(), flushData.getLabel())); + } + Map loadResult = doHttpPut(loadUrl, flushData.getLabel(), joinRows(flushData.getRows(), flushData.getBytes().intValue())); + final String keyStatus = "Status"; + if (null == loadResult || !loadResult.containsKey(keyStatus)) { + LOG.error("unknown result status. {}", loadResult); + throw new IOException("Unable to flush data to StarRocks: unknown result status. " + loadResult); + } + if (LOG.isDebugEnabled()) { + LOG.debug(new StringBuilder("StreamLoad response:\n").append(JSON.toJSONString(loadResult)).toString()); + } + if (RESULT_FAILED.equals(loadResult.get(keyStatus))) { + StringBuilder errorBuilder = new StringBuilder("Failed to flush data to StarRocks.\n"); + if (loadResult.containsKey("Message")) { + errorBuilder.append(loadResult.get("Message")); + errorBuilder.append('\n'); + } + if (loadResult.containsKey("ErrorURL")) { + LOG.error("StreamLoad response: {}", loadResult); + try { + errorBuilder.append(doHttpGet(loadResult.get("ErrorURL").toString())); + errorBuilder.append('\n'); + } catch (IOException e) { + LOG.warn("Get Error URL failed. {} ", loadResult.get("ErrorURL"), e); + } + } else { + errorBuilder.append(JSON.toJSONString(loadResult)); + errorBuilder.append('\n'); + } + throw new IOException(errorBuilder.toString()); + } else if (RESULT_LABEL_EXISTED.equals(loadResult.get(keyStatus))) { + LOG.debug(new StringBuilder("StreamLoad response:\n").append(JSON.toJSONString(loadResult)).toString()); + // has to block-checking the state to get the final result + checkLabelState(host, flushData.getLabel()); + } + } + + private String getAvailableHost() { + List hostList = writerOptions.getLoadUrlList(); + long tmp = pos + hostList.size(); + for (; pos < tmp; pos++) { + String host = new StringBuilder("http://").append(hostList.get((int) (pos % hostList.size()))).toString(); + if (tryHttpConnection(host)) { + return host; + } + } + return null; + } + + private boolean tryHttpConnection(String host) { + try { + URL url = new URL(host); + HttpURLConnection co = (HttpURLConnection) url.openConnection(); + co.setConnectTimeout(1000); + co.connect(); + co.disconnect(); + return true; + } catch (Exception e1) { + LOG.warn("Failed to connect to address:{}", host, e1); + return false; + } + } + + private byte[] joinRows(List rows, int totalBytes) { + if (StarRocksWriterOptions.StreamLoadFormat.CSV.equals(writerOptions.getStreamLoadFormat())) { + Map props = (writerOptions.getLoadProps() == null ? new HashMap<>() : writerOptions.getLoadProps()); + byte[] lineDelimiter = StarRocksDelimiterParser.parse((String)props.get("row_delimiter"), "\n").getBytes(StandardCharsets.UTF_8); + ByteBuffer bos = ByteBuffer.allocate(totalBytes + rows.size() * lineDelimiter.length); + for (byte[] row : rows) { + bos.put(row); + bos.put(lineDelimiter); + } + return bos.array(); + } + + if (StarRocksWriterOptions.StreamLoadFormat.JSON.equals(writerOptions.getStreamLoadFormat())) { + ByteBuffer bos = ByteBuffer.allocate(totalBytes + (rows.isEmpty() ? 2 : rows.size() + 1)); + bos.put("[".getBytes(StandardCharsets.UTF_8)); + byte[] jsonDelimiter = ",".getBytes(StandardCharsets.UTF_8); + boolean isFirstElement = true; + for (byte[] row : rows) { + if (!isFirstElement) { + bos.put(jsonDelimiter); + } + bos.put(row); + isFirstElement = false; + } + bos.put("]".getBytes(StandardCharsets.UTF_8)); + return bos.array(); + } + throw new RuntimeException("Failed to join rows data, unsupported `format` from stream load properties:"); + } + + @SuppressWarnings("unchecked") + private void checkLabelState(String host, String label) throws IOException { + int idx = 0; + while(true) { + try { + TimeUnit.SECONDS.sleep(Math.min(++idx, 5)); + } catch (InterruptedException ex) { + break; + } + try (CloseableHttpClient httpclient = HttpClients.createDefault()) { + HttpGet httpGet = new HttpGet(new StringBuilder(host).append("/api/").append(writerOptions.getDatabase()).append("/get_load_state?label=").append(label).toString()); + httpGet.setHeader("Authorization", getBasicAuthHeader(writerOptions.getUsername(), writerOptions.getPassword())); + httpGet.setHeader("Connection", "close"); + + try (CloseableHttpResponse resp = httpclient.execute(httpGet)) { + HttpEntity respEntity = getHttpEntity(resp); + if (respEntity == null) { + throw new IOException(String.format("Failed to flush data to StarRocks, Error " + + "could not get the final state of label[%s].\n", label), null); + } + Map result = (Map)JSON.parse(EntityUtils.toString(respEntity)); + String labelState = (String)result.get("state"); + if (null == labelState) { + throw new IOException(String.format("Failed to flush data to StarRocks, Error " + + "could not get the final state of label[%s]. response[%s]\n", label, EntityUtils.toString(respEntity)), null); + } + LOG.info(String.format("Checking label[%s] state[%s]\n", label, labelState)); + switch(labelState) { + case LAEBL_STATE_VISIBLE: + case LAEBL_STATE_COMMITTED: + return; + case RESULT_LABEL_PREPARE: + continue; + case RESULT_LABEL_ABORTED: + throw new StarRocksStreamLoadFailedException(String.format("Failed to flush data to StarRocks, Error " + + "label[%s] state[%s]\n", label, labelState), null, true); + case RESULT_LABEL_UNKNOWN: + default: + throw new IOException(String.format("Failed to flush data to StarRocks, Error " + + "label[%s] state[%s]\n", label, labelState), null); + } + } + } + } + } + + @SuppressWarnings("unchecked") + private Map doHttpPut(String loadUrl, String label, byte[] data) throws IOException { + LOG.info(String.format("Executing stream load to: '%s', size: '%s'", loadUrl, data.length)); + final HttpClientBuilder httpClientBuilder = HttpClients.custom() + .setRedirectStrategy(new DefaultRedirectStrategy() { + @Override + protected boolean isRedirectable(String method) { + return true; + } + }); + try (CloseableHttpClient httpclient = httpClientBuilder.build()) { + HttpPut httpPut = new HttpPut(loadUrl); + List cols = writerOptions.getColumns(); + if (null != cols && !cols.isEmpty() && StarRocksWriterOptions.StreamLoadFormat.CSV.equals(writerOptions.getStreamLoadFormat())) { + httpPut.setHeader("columns", String.join(",", cols.stream().map(f -> String.format("`%s`", f)).collect(Collectors.toList()))); + } + if (null != writerOptions.getLoadProps()) { + for (Map.Entry entry : writerOptions.getLoadProps().entrySet()) { + httpPut.setHeader(entry.getKey(), String.valueOf(entry.getValue())); + } + } + httpPut.setHeader("Expect", "100-continue"); + httpPut.setHeader("label", label); + httpPut.setHeader("Content-Type", "application/x-www-form-urlencoded"); + httpPut.setHeader("Authorization", getBasicAuthHeader(writerOptions.getUsername(), writerOptions.getPassword())); + httpPut.setEntity(new ByteArrayEntity(data)); + httpPut.setConfig(RequestConfig.custom().setRedirectsEnabled(true).build()); + try (CloseableHttpResponse resp = httpclient.execute(httpPut)) { + int code = resp.getStatusLine().getStatusCode(); + if (200 != code) { + String errorText; + try { + HttpEntity respEntity = resp.getEntity(); + errorText = EntityUtils.toString(respEntity); + } catch (Exception err) { + errorText = "find errorText failed: " + err.getMessage(); + } + LOG.warn("Request failed with code:{}, err:{}", code, errorText); + Map errorMap = new HashMap<>(); + errorMap.put("Status", "Fail"); + errorMap.put("Message", errorText); + return errorMap; + } + HttpEntity respEntity = resp.getEntity(); + if (null == respEntity) { + LOG.warn("Request failed with empty response."); + return null; + } + return (Map)JSON.parse(EntityUtils.toString(respEntity)); + } + } + } + + private String getBasicAuthHeader(String username, String password) { + String auth = username + ":" + password; + byte[] encodedAuth = Base64.encodeBase64(auth.getBytes(StandardCharsets.UTF_8)); + return new StringBuilder("Basic ").append(new String(encodedAuth)).toString(); + } + + private HttpEntity getHttpEntity(CloseableHttpResponse resp) { + int code = resp.getStatusLine().getStatusCode(); + if (200 != code) { + LOG.warn("Request failed with code:{}", code); + return null; + } + HttpEntity respEntity = resp.getEntity(); + if (null == respEntity) { + LOG.warn("Request failed with empty response."); + return null; + } + return respEntity; + } + + private String doHttpGet(String getUrl) throws IOException { + LOG.info("Executing GET from {}.", getUrl); + try (CloseableHttpClient httpclient = buildHttpClient()) { + HttpGet httpGet = new HttpGet(getUrl); + try (CloseableHttpResponse resp = httpclient.execute(httpGet)) { + HttpEntity respEntity = resp.getEntity(); + if (null == respEntity) { + LOG.warn("Request failed with empty response."); + return null; + } + return EntityUtils.toString(respEntity); + } + } + } + + private CloseableHttpClient buildHttpClient(){ + final HttpClientBuilder httpClientBuilder = HttpClients.custom() + .setRedirectStrategy(new DefaultRedirectStrategy() { + @Override + protected boolean isRedirectable(String method) { + return true; + } + }); + return httpClientBuilder.build(); + } + +} diff --git a/starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/starrockswriter/manager/StarRocksWriterManager.java b/starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/starrockswriter/manager/StarRocksWriterManager.java new file mode 100644 index 00000000..a0cb1f8b --- /dev/null +++ b/starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/starrockswriter/manager/StarRocksWriterManager.java @@ -0,0 +1,203 @@ +package com.starrocks.connector.datax.plugin.writer.starrockswriter.manager; + +import org.apache.commons.lang3.concurrent.BasicThreadFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.nio.charset.StandardCharsets; +import java.util.ArrayList; +import java.util.List; +import java.util.UUID; +import java.util.concurrent.Executors; +import java.util.concurrent.LinkedBlockingDeque; +import java.util.concurrent.ScheduledExecutorService; +import java.util.concurrent.ScheduledFuture; +import java.util.concurrent.TimeUnit; + +import com.google.common.base.Strings; +import com.starrocks.connector.datax.plugin.writer.starrockswriter.StarRocksWriterOptions; + +public class StarRocksWriterManager { + + private static final Logger LOG = LoggerFactory.getLogger(StarRocksWriterManager.class); + + private final StarRocksStreamLoadVisitor starrocksStreamLoadVisitor; + private final StarRocksWriterOptions writerOptions; + + private final List buffer = new ArrayList<>(); + private int batchCount = 0; + private long batchSize = 0; + private volatile boolean closed = false; + private volatile Exception flushException; + private final LinkedBlockingDeque flushQueue; + private ScheduledExecutorService scheduler; + private ScheduledFuture scheduledFuture; + + public StarRocksWriterManager(StarRocksWriterOptions writerOptions) { + this.writerOptions = writerOptions; + this.starrocksStreamLoadVisitor = new StarRocksStreamLoadVisitor(writerOptions); + flushQueue = new LinkedBlockingDeque<>(writerOptions.getFlushQueueLength()); + this.startScheduler(); + this.startAsyncFlushing(); + } + + public void startScheduler() { + stopScheduler(); + this.scheduler = Executors.newScheduledThreadPool(1, new BasicThreadFactory.Builder().namingPattern("starrocks-interval-flush").daemon(true).build()); + this.scheduledFuture = this.scheduler.schedule(() -> { + synchronized (StarRocksWriterManager.this) { + if (!closed) { + try { + String label = createBatchLabel(); + LOG.info(String.format("StarRocks interval Sinking triggered: label[%s].", label)); + if (batchCount == 0) { + startScheduler(); + } + flush(label, false); + } catch (Exception e) { + flushException = e; + } + } + } + }, writerOptions.getFlushInterval(), TimeUnit.MILLISECONDS); + } + + public void stopScheduler() { + if (this.scheduledFuture != null) { + scheduledFuture.cancel(false); + this.scheduler.shutdown(); + } + } + + public final synchronized void writeRecord(String record) throws IOException { + checkFlushException(); + try { + byte[] bts = record.getBytes(StandardCharsets.UTF_8); + buffer.add(bts); + batchCount++; + batchSize += bts.length; + if (batchCount >= writerOptions.getBatchRows() || batchSize >= writerOptions.getBatchSize()) { + String label = createBatchLabel(); + if (LOG.isDebugEnabled()) { + LOG.debug(String.format("StarRocks buffer Sinking triggered: rows[%d] label[%s].", batchCount, label)); + } + flush(label, false); + } + } catch (Exception e) { + throw new IOException("Writing records to StarRocks failed.", e); + } + } + + public synchronized void flush(String label, boolean waitUtilDone) throws Exception { + checkFlushException(); + if (batchCount == 0) { + if (waitUtilDone) { + waitAsyncFlushingDone(); + } + return; + } + flushQueue.put(new StarRocksFlushTuple(label, batchSize, new ArrayList<>(buffer))); + if (waitUtilDone) { + // wait the last flush + waitAsyncFlushingDone(); + } + buffer.clear(); + batchCount = 0; + batchSize = 0; + } + + public synchronized void close() { + if (!closed) { + closed = true; + try { + String label = createBatchLabel(); + if (batchCount > 0) { + if (LOG.isDebugEnabled()) { + LOG.debug(String.format("StarRocks Sink is about to close: label[%s].", label)); + } + } + flush(label, true); + } catch (Exception e) { + throw new RuntimeException("Writing records to StarRocks failed.", e); + } + } + checkFlushException(); + } + + public String createBatchLabel() { + StringBuilder sb = new StringBuilder(); + if (!Strings.isNullOrEmpty(writerOptions.getLabelPrefix())) { + sb.append(writerOptions.getLabelPrefix()); + } + return sb.append(UUID.randomUUID().toString()) + .toString(); + } + + private void startAsyncFlushing() { + // start flush thread + Thread flushThread = new Thread(new Runnable(){ + public void run() { + while(true) { + try { + asyncFlush(); + } catch (Exception e) { + flushException = e; + } + } + } + }); + flushThread.setDaemon(true); + flushThread.start(); + } + + private void waitAsyncFlushingDone() throws InterruptedException { + // wait previous flushings + for (int i = 0; i <= writerOptions.getFlushQueueLength(); i++) { + flushQueue.put(new StarRocksFlushTuple("", 0l, null)); + } + checkFlushException(); + } + + private void asyncFlush() throws Exception { + StarRocksFlushTuple flushData = flushQueue.take(); + if (Strings.isNullOrEmpty(flushData.getLabel())) { + return; + } + stopScheduler(); + if (LOG.isDebugEnabled()) { + LOG.debug(String.format("Async stream load: rows[%d] bytes[%d] label[%s].", flushData.getRows().size(), flushData.getBytes(), flushData.getLabel())); + } + for (int i = 0; i <= writerOptions.getMaxRetries(); i++) { + try { + // flush to StarRocks with stream load + starrocksStreamLoadVisitor.doStreamLoad(flushData); + LOG.info(String.format("Async stream load finished: label[%s].", flushData.getLabel())); + startScheduler(); + break; + } catch (Exception e) { + LOG.warn("Failed to flush batch data to StarRocks, retry times = {}", i, e); + if (i >= writerOptions.getMaxRetries()) { + throw new IOException(e); + } + if (e instanceof StarRocksStreamLoadFailedException && ((StarRocksStreamLoadFailedException)e).needReCreateLabel()) { + String newLabel = createBatchLabel(); + LOG.warn(String.format("Batch label changed from [%s] to [%s]", flushData.getLabel(), newLabel)); + flushData.setLabel(newLabel); + } + try { + Thread.sleep(1000l * Math.min(i + 1, 10)); + } catch (InterruptedException ex) { + Thread.currentThread().interrupt(); + throw new IOException("Unable to flush, interrupted while doing another attempt", e); + } + } + } + } + + private void checkFlushException() { + if (flushException != null) { + throw new RuntimeException("Writing records to StarRocks failed.", flushException); + } + } +} diff --git a/starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/starrockswriter/row/StarRocksBaseSerializer.java b/starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/starrockswriter/row/StarRocksBaseSerializer.java new file mode 100644 index 00000000..a7ad499d --- /dev/null +++ b/starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/starrockswriter/row/StarRocksBaseSerializer.java @@ -0,0 +1,26 @@ +package com.starrocks.connector.datax.plugin.writer.starrockswriter.row; + +import com.alibaba.datax.common.element.Column; +import com.alibaba.datax.common.element.Column.Type; + +public class StarRocksBaseSerializer { + + protected String fieldConvertion(Column col) { + if (null == col.getRawData() || Type.NULL == col.getType()) { + return null; + } + if (Type.BOOL == col.getType()) { + return String.valueOf(col.asLong()); + } + if (Type.BYTES == col.getType()) { + byte[] bts = (byte[])col.getRawData(); + long value = 0; + for (int i = 0; i < bts.length; i++) { + value += (bts[bts.length - i - 1] & 0xffL) << (8 * i); + } + return String.valueOf(value); + } + return col.asString(); + } + +} \ No newline at end of file diff --git a/starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/starrockswriter/row/StarRocksCsvSerializer.java b/starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/starrockswriter/row/StarRocksCsvSerializer.java new file mode 100644 index 00000000..1366d570 --- /dev/null +++ b/starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/starrockswriter/row/StarRocksCsvSerializer.java @@ -0,0 +1,32 @@ +package com.starrocks.connector.datax.plugin.writer.starrockswriter.row; + +import java.io.StringWriter; + +import com.alibaba.datax.common.element.Record; + +import com.google.common.base.Strings; + +public class StarRocksCsvSerializer extends StarRocksBaseSerializer implements StarRocksISerializer { + + private static final long serialVersionUID = 1L; + + private final String columnSeparator; + + public StarRocksCsvSerializer(String sp) { + this.columnSeparator = StarRocksDelimiterParser.parse(sp, "\t"); + } + + @Override + public String serialize(Record row) { + StringBuilder sb = new StringBuilder(); + for (int i = 0; i < row.getColumnNumber(); i++) { + String value = fieldConvertion(row.getColumn(i)); + sb.append(null == value ? "\\N" : value); + if (i < row.getColumnNumber() - 1) { + sb.append(columnSeparator); + } + } + return sb.toString(); + } + +} diff --git a/starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/starrockswriter/row/StarRocksDelimiterParser.java b/starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/starrockswriter/row/StarRocksDelimiterParser.java new file mode 100644 index 00000000..04301e0f --- /dev/null +++ b/starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/starrockswriter/row/StarRocksDelimiterParser.java @@ -0,0 +1,55 @@ +package com.starrocks.connector.datax.plugin.writer.starrockswriter.row; + +import java.io.StringWriter; + +import com.google.common.base.Strings; + +public class StarRocksDelimiterParser { + + private static final String HEX_STRING = "0123456789ABCDEF"; + + public static String parse(String sp, String dSp) throws RuntimeException { + if (Strings.isNullOrEmpty(sp)) { + return dSp; + } + if (!sp.toUpperCase().startsWith("\\X")) { + return sp; + } + String hexStr = sp.substring(2); + // check hex str + if (hexStr.isEmpty()) { + throw new RuntimeException("Failed to parse delimiter: `Hex str is empty`"); + } + if (hexStr.length() % 2 != 0) { + throw new RuntimeException("Failed to parse delimiter: `Hex str length error`"); + } + for (char hexChar : hexStr.toUpperCase().toCharArray()) { + if (HEX_STRING.indexOf(hexChar) == -1) { + throw new RuntimeException("Failed to parse delimiter: `Hex str format error`"); + } + } + // transform to separator + StringWriter writer = new StringWriter(); + for (byte b : hexStrToBytes(hexStr)) { + writer.append((char) b); + } + return writer.toString(); + } + + private static byte[] hexStrToBytes(String hexStr) { + String upperHexStr = hexStr.toUpperCase(); + int length = upperHexStr.length() / 2; + char[] hexChars = upperHexStr.toCharArray(); + byte[] bytes = new byte[length]; + for (int i = 0; i < length; i++) { + int pos = i * 2; + bytes[i] = (byte) (charToByte(hexChars[pos]) << 4 | charToByte(hexChars[pos + 1])); + } + return bytes; + } + + private static byte charToByte(char c) { + return (byte) HEX_STRING.indexOf(c); + } + +} diff --git a/starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/starrockswriter/row/StarRocksISerializer.java b/starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/starrockswriter/row/StarRocksISerializer.java new file mode 100644 index 00000000..7bcb8973 --- /dev/null +++ b/starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/starrockswriter/row/StarRocksISerializer.java @@ -0,0 +1,11 @@ +package com.starrocks.connector.datax.plugin.writer.starrockswriter.row; + +import java.io.Serializable; + +import com.alibaba.datax.common.element.Record; + +public interface StarRocksISerializer extends Serializable { + + String serialize(Record row); + +} diff --git a/starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/starrockswriter/row/StarRocksJsonSerializer.java b/starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/starrockswriter/row/StarRocksJsonSerializer.java new file mode 100644 index 00000000..f235a08d --- /dev/null +++ b/starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/starrockswriter/row/StarRocksJsonSerializer.java @@ -0,0 +1,34 @@ +package com.starrocks.connector.datax.plugin.writer.starrockswriter.row; + +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import com.alibaba.datax.common.element.Record; +import com.alibaba.fastjson2.JSON; + +public class StarRocksJsonSerializer extends StarRocksBaseSerializer implements StarRocksISerializer { + + private static final long serialVersionUID = 1L; + + private final List fieldNames; + + public StarRocksJsonSerializer(List fieldNames) { + this.fieldNames = fieldNames; + } + + @Override + public String serialize(Record row) { + if (null == fieldNames) { + return ""; + } + Map rowMap = new HashMap<>(fieldNames.size()); + int idx = 0; + for (String fieldName : fieldNames) { + rowMap.put(fieldName, fieldConvertion(row.getColumn(idx))); + idx++; + } + return JSON.toJSONString(rowMap); + } + +} diff --git a/starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/starrockswriter/row/StarRocksSerializerFactory.java b/starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/starrockswriter/row/StarRocksSerializerFactory.java new file mode 100644 index 00000000..85f446cd --- /dev/null +++ b/starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/starrockswriter/row/StarRocksSerializerFactory.java @@ -0,0 +1,22 @@ +package com.starrocks.connector.datax.plugin.writer.starrockswriter.row; + +import java.util.Map; + +import com.starrocks.connector.datax.plugin.writer.starrockswriter.StarRocksWriterOptions; + +public class StarRocksSerializerFactory { + + private StarRocksSerializerFactory() {} + + public static StarRocksISerializer createSerializer(StarRocksWriterOptions writerOptions) { + if (StarRocksWriterOptions.StreamLoadFormat.CSV.equals(writerOptions.getStreamLoadFormat())) { + Map props = writerOptions.getLoadProps(); + return new StarRocksCsvSerializer(null == props || !props.containsKey("column_separator") ? null : String.valueOf(props.get("column_separator"))); + } + if (StarRocksWriterOptions.StreamLoadFormat.JSON.equals(writerOptions.getStreamLoadFormat())) { + return new StarRocksJsonSerializer(writerOptions.getColumns()); + } + throw new RuntimeException("Failed to create row serializer, unsupported `format` from stream load properties."); + } + +} diff --git a/starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/starrockswriter/util/StarRocksWriterUtil.java b/starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/starrockswriter/util/StarRocksWriterUtil.java new file mode 100755 index 00000000..8de4ad60 --- /dev/null +++ b/starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/starrockswriter/util/StarRocksWriterUtil.java @@ -0,0 +1,102 @@ +package com.starrocks.connector.datax.plugin.writer.starrockswriter.util; + +import com.alibaba.datax.plugin.rdbms.util.DBUtil; +import com.alibaba.datax.plugin.rdbms.util.DataBaseType; +import com.alibaba.datax.plugin.rdbms.util.RdbmsException; +import com.alibaba.datax.plugin.rdbms.writer.Constant; +import com.alibaba.druid.sql.parser.ParserException; +import com.starrocks.connector.datax.plugin.writer.starrockswriter.StarRocksWriterOptions; +import com.google.common.base.Strings; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.sql.Connection; +import java.sql.ResultSet; +import java.sql.Statement; +import java.util.*; + +public final class StarRocksWriterUtil { + private static final Logger LOG = LoggerFactory.getLogger(StarRocksWriterUtil.class); + + private StarRocksWriterUtil() {} + + public static List getStarRocksColumns(Connection conn, String databaseName, String tableName) { + String currentSql = String.format("SELECT COLUMN_NAME FROM `information_schema`.`COLUMNS` WHERE `TABLE_SCHEMA` = '%s' AND `TABLE_NAME` = '%s' ORDER BY `ORDINAL_POSITION` ASC;", databaseName, tableName); + List columns = new ArrayList<>(); + ResultSet rs = null; + try { + rs = DBUtil.query(conn, currentSql); + while (DBUtil.asyncResultSetNext(rs)) { + String colName = rs.getString("COLUMN_NAME"); + columns.add(colName); + } + return columns; + } catch (Exception e) { + throw RdbmsException.asQueryException(DataBaseType.MySql, e, currentSql, null, null); + } finally { + DBUtil.closeDBResources(rs, null, null); + } + } + + public static List renderPreOrPostSqls(List preOrPostSqls, String tableName) { + if (null == preOrPostSqls) { + return Collections.emptyList(); + } + List renderedSqls = new ArrayList<>(); + for (String sql : preOrPostSqls) { + if (!Strings.isNullOrEmpty(sql)) { + renderedSqls.add(sql.replace(Constant.TABLE_NAME_PLACEHOLDER, tableName)); + } + } + return renderedSqls; + } + + public static void executeSqls(Connection conn, List sqls) { + Statement stmt = null; + String currentSql = null; + try { + stmt = conn.createStatement(); + for (String sql : sqls) { + currentSql = sql; + DBUtil.executeSqlWithoutResultSet(stmt, sql); + } + } catch (Exception e) { + throw RdbmsException.asQueryException(DataBaseType.MySql, e, currentSql, null, null); + } finally { + DBUtil.closeDBResources(null, stmt, null); + } + } + + public static void preCheckPrePareSQL(StarRocksWriterOptions options) { + String table = options.getTable(); + List preSqls = options.getPreSqlList(); + List renderedPreSqls = StarRocksWriterUtil.renderPreOrPostSqls(preSqls, table); + if (null != renderedPreSqls && !renderedPreSqls.isEmpty()) { + LOG.info("Begin to preCheck preSqls:[{}].", String.join(";", renderedPreSqls)); + for (String sql : renderedPreSqls) { + try { + DBUtil.sqlValid(sql, DataBaseType.MySql); + } catch (ParserException e) { + throw RdbmsException.asPreSQLParserException(DataBaseType.MySql,e,sql); + } + } + } + } + + public static void preCheckPostSQL(StarRocksWriterOptions options) { + String table = options.getTable(); + List postSqls = options.getPostSqlList(); + List renderedPostSqls = StarRocksWriterUtil.renderPreOrPostSqls(postSqls, table); + if (null != renderedPostSqls && !renderedPostSqls.isEmpty()) { + LOG.info("Begin to preCheck postSqls:[{}].", String.join(";", renderedPostSqls)); + for(String sql : renderedPostSqls) { + try { + DBUtil.sqlValid(sql, DataBaseType.MySql); + } catch (ParserException e){ + throw RdbmsException.asPostSQLParserException(DataBaseType.MySql,e,sql); + } + } + } + } +} diff --git a/starrockswriter/src/main/resources/plugin.json b/starrockswriter/src/main/resources/plugin.json new file mode 100755 index 00000000..8edec1e0 --- /dev/null +++ b/starrockswriter/src/main/resources/plugin.json @@ -0,0 +1,6 @@ +{ + "name": "starrockswriter", + "class": "com.starrocks.connector.datax.plugin.writer.starrockswriter.StarRocksWriter", + "description": "useScene: prod. mechanism: StarRocksStreamLoad. warn: The more you know about the database, the less problems you encounter.", + "developer": "starrocks" +} \ No newline at end of file diff --git a/starrockswriter/src/main/resources/plugin_job_template.json b/starrockswriter/src/main/resources/plugin_job_template.json new file mode 100644 index 00000000..06c075bc --- /dev/null +++ b/starrockswriter/src/main/resources/plugin_job_template.json @@ -0,0 +1,18 @@ +{ + "name": "starrockswriter", + "parameter": { + "username": "", + "password": "", + "column": [], + "preSql": [], + "postSql": [], + "loadUrl": [], + "connection": [ + { + "jdbcUrl": "", + "selectedDatabase": "", + "table": [] + } + ] + } +} \ No newline at end of file diff --git a/streamreader/src/main/java/com/alibaba/datax/plugin/reader/streamreader/StreamReader.java b/streamreader/src/main/java/com/alibaba/datax/plugin/reader/streamreader/StreamReader.java index e3b86659..6b8c55bc 100755 --- a/streamreader/src/main/java/com/alibaba/datax/plugin/reader/streamreader/StreamReader.java +++ b/streamreader/src/main/java/com/alibaba/datax/plugin/reader/streamreader/StreamReader.java @@ -5,7 +5,7 @@ import com.alibaba.datax.common.exception.DataXException; import com.alibaba.datax.common.plugin.RecordSender; import com.alibaba.datax.common.spi.Reader; import com.alibaba.datax.common.util.Configuration; -import com.alibaba.fastjson.JSONObject; +import com.alibaba.fastjson2.JSONObject; import org.apache.commons.lang3.RandomStringUtils; import org.apache.commons.lang3.RandomUtils; diff --git a/tdenginereader/doc/tdenginereader-CN.md b/tdenginereader/doc/tdenginereader-CN.md new file mode 100644 index 00000000..e9502756 --- /dev/null +++ b/tdenginereader/doc/tdenginereader-CN.md @@ -0,0 +1,195 @@ +# DataX TDengineReader + +## 1 快速介绍 + +TDengineReader 插件实现了 TDengine 读取数据的功能。 + +## 2 实现原理 + +TDengineReader 通过 TDengine 的 JDBC driver 查询获取数据。 + +## 3 功能说明 + +### 3.1 配置样例 + +* 配置一个从 TDengine 抽取数据作业: + +```json +{ + "job": { + "content": [ + { + "reader": { + "name": "tdenginereader", + "parameter": { + "username": "root", + "password": "taosdata", + "connection": [ + { + "table": [ + "meters" + ], + "jdbcUrl": [ + "jdbc:TAOS-RS://192.168.56.105:6041/test?timestampFormat=TIMESTAMP" + ] + } + ], + "column": [ + "ts", + "current", + "voltage", + "phase" + ], + "where": "ts>=0", + "beginDateTime": "2017-07-14 10:40:00", + "endDateTime": "2017-08-14 10:40:00" + } + }, + "writer": { + "name": "streamwriter", + "parameter": { + "encoding": "UTF-8", + "print": true + } + } + } + ], + "setting": { + "speed": { + "channel": 1 + } + } + } +} +``` + +* 配置一个自定义 SQL 的数据抽取作业: + +```json +{ + "job": { + "content": [ + { + "reader": { + "name": "tdenginereader", + "parameter": { + "user": "root", + "password": "taosdata", + "connection": [ + { + "querySql": [ + "select * from test.meters" + ], + "jdbcUrl": [ + "jdbc:TAOS-RS://192.168.56.105:6041/test?timestampFormat=TIMESTAMP" + ] + } + ] + } + }, + "writer": { + "name": "streamwriter", + "parameter": { + "encoding": "UTF-8", + "print": true + } + } + } + ], + "setting": { + "speed": { + "channel": 1 + } + } + } +} +``` + +### 3.2 参数说明 + +* **username** + * 描述:TDengine 实例的用户名
+ * 必选:是
+ * 默认值:无
+* **password** + * 描述:TDengine 实例的密码
+ * 必选:是
+ * 默认值:无
+* **jdbcUrl** + * 描述:TDengine 数据库的JDBC连接信息。注意,jdbcUrl必须包含在connection配置单元中。JdbcUrl具体请参看TDengine官方文档。 + * 必选:是
+ * 默认值:无
+* **querySql** + * 描述:在有些业务场景下,where 这一配置项不足以描述所筛选的条件,用户可以通过该配置型来自定义筛选SQL。当用户配置了 querySql 后, TDengineReader 就会忽略 table, column, + where, beginDateTime, endDateTime这些配置型,直接使用这个配置项的内容对数据进行筛选。例如需要 进行多表join后同步数据,使用 select a,b from table_a join + table_b on table_a.id = table_b.id
+ * 必选:否
+ * 默认值:无
+* **table** + * 描述:所选取的需要同步的表。使用 JSON 的数组描述,因此支持多张表同时抽取。当配置为多张表时,用户自己需保证多张表是同一 schema 结构, TDengineReader不予检查表是否同一逻辑表。注意,table必须包含在 + connection 配置单元中。
+ * 必选:是
+ * 默认值:无
+* **where** + * 描述:筛选条件中的 where 子句,TDengineReader 根据指定的column, table, where, begingDateTime, endDateTime 条件拼接 SQL,并根据这个 SQL + 进行数据抽取。
+ * 必选:否
+ * 默认值:无
+* **beginDateTime** + * 描述:数据的开始时间,Job 迁移从 begineDateTime 到 endDateTime 的数据,格式为 yyyy-MM-dd HH:mm:ss
+ * 必选:否
+ * 默认值:无
+* **endDateTime** + * 描述:数据的结束时间,Job 迁移从 begineDateTime 到 endDateTime 的数据,格式为 yyyy-MM-dd HH:mm:ss
+ * 必选:否
+ * 默认值:无
+ +### 3.3 类型转换 + +| TDengine 数据类型 | DataX 内部类型 | +| --------------- | ------------- | +| TINYINT | Long | +| SMALLINT | Long | +| INTEGER | Long | +| BIGINT | Long | +| FLOAT | Double | +| DOUBLE | Double | +| BOOLEAN | Bool | +| TIMESTAMP | Date | +| BINARY | Bytes | +| NCHAR | String | + +## 4 性能报告 + +### 4.1 环境准备 + +#### 4.1.1 数据特征 + +#### 4.1.2 机器参数 + +#### 4.1.3 DataX jvm 参数 + + -Xms1024m -Xmx1024m -XX:+HeapDumpOnOutOfMemoryError + +### 4.2 测试报告 + +#### 4.2.1 单表测试报告 + +| 通道数| DataX速度(Rec/s)|DataX流量(MB/s)| DataX机器网卡流出流量(MB/s)|DataX机器运行负载|DB网卡进入流量(MB/s)|DB运行负载|DB TPS| +|--------| --------|--------|--------|--------|--------|--------|--------| +|1| | | | | | | | +|4| | | | | | | | +|8| | | | | | | | +|16| | | | | | | | +|32| | | | | | | | + +说明: + +#### 4.2.4 性能测试小结 + +1. +2. + +## 5 约束限制 + +## FAQ \ No newline at end of file diff --git a/tdenginereader/pom.xml b/tdenginereader/pom.xml new file mode 100644 index 00000000..075a2789 --- /dev/null +++ b/tdenginereader/pom.xml @@ -0,0 +1,123 @@ + + + + datax-all + com.alibaba.datax + 0.0.1-SNAPSHOT + + 4.0.0 + + tdenginereader + + + 8 + 8 + + + + + com.alibaba.datax + datax-common + ${datax-project-version} + + + slf4j-log4j12 + org.slf4j + + + + + + com.alibaba.datax.tdenginewriter + tdenginewriter + 0.0.1-SNAPSHOT + compile + + + + com.taosdata.jdbc + taos-jdbcdriver + 2.0.39 + + + + junit + junit + ${junit-version} + test + + + com.alibaba.datax + plugin-rdbms-util + 0.0.1-SNAPSHOT + compile + + + + com.alibaba.datax + datax-core + 0.0.1-SNAPSHOT + test + + + + + + + + + + + + + + + + maven-compiler-plugin + + ${jdk-version} + ${jdk-version} + ${project-sourceEncoding} + + + + maven-assembly-plugin + + + src/main/assembly/package.xml + + datax + + + + dwzip + package + + single + + + + + + + org.apache.maven.plugins + maven-surefire-plugin + 2.12.4 + + + + **/*Test.java + + + + + true + + + + + + + \ No newline at end of file diff --git a/tdenginereader/src/main/assembly/package.xml b/tdenginereader/src/main/assembly/package.xml new file mode 100755 index 00000000..b52f20fb --- /dev/null +++ b/tdenginereader/src/main/assembly/package.xml @@ -0,0 +1,34 @@ + + + + dir + + false + + + src/main/resources + + plugin.json + plugin_job_template.json + + plugin/reader/tdenginereader + + + target/ + + tdenginereader-0.0.1-SNAPSHOT.jar + + plugin/reader/tdenginereader + + + + + + false + plugin/reader/tdenginereader/libs + runtime + + + diff --git a/tdenginereader/src/main/java/com/alibaba/datax/plugin/reader/TDengineReader.java b/tdenginereader/src/main/java/com/alibaba/datax/plugin/reader/TDengineReader.java new file mode 100644 index 00000000..4ec42d9e --- /dev/null +++ b/tdenginereader/src/main/java/com/alibaba/datax/plugin/reader/TDengineReader.java @@ -0,0 +1,266 @@ +package com.alibaba.datax.plugin.reader; + +import com.alibaba.datax.common.element.*; +import com.alibaba.datax.common.exception.DataXException; +import com.alibaba.datax.common.plugin.RecordSender; +import com.alibaba.datax.common.spi.Reader; +import com.alibaba.datax.common.util.Configuration; +import com.alibaba.datax.plugin.writer.tdenginewriter.Key; +import org.apache.commons.lang3.StringUtils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.UnsupportedEncodingException; +import java.sql.*; +import java.text.ParseException; +import java.text.SimpleDateFormat; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.List; + +public class TDengineReader extends Reader { + + private static final String DATETIME_FORMAT = "yyyy-MM-dd HH:mm:ss"; + + public static class Job extends Reader.Job { + private static final Logger LOG = LoggerFactory.getLogger(Job.class); + private Configuration originalConfig; + + @Override + public void init() { + this.originalConfig = super.getPluginJobConf(); + // check username + String username = this.originalConfig.getString(Key.USERNAME); + if (StringUtils.isBlank(username)) + throw DataXException.asDataXException(TDengineReaderErrorCode.REQUIRED_VALUE, + "The parameter [" + Key.USERNAME + "] is not set."); + + // check password + String password = this.originalConfig.getString(Key.PASSWORD); + if (StringUtils.isBlank(password)) + throw DataXException.asDataXException(TDengineReaderErrorCode.REQUIRED_VALUE, + "The parameter [" + Key.PASSWORD + "] is not set."); + + // check connection + List connectionList = this.originalConfig.getListConfiguration(Key.CONNECTION); + if (connectionList == null || connectionList.isEmpty()) + throw DataXException.asDataXException(TDengineReaderErrorCode.REQUIRED_VALUE, + "The parameter [" + Key.CONNECTION + "] is not set."); + for (int i = 0; i < connectionList.size(); i++) { + Configuration conn = connectionList.get(i); + // check jdbcUrl + List jdbcUrlList = conn.getList(Key.JDBC_URL); + if (jdbcUrlList == null || jdbcUrlList.isEmpty()) { + throw DataXException.asDataXException(TDengineReaderErrorCode.REQUIRED_VALUE, + "The parameter [" + Key.JDBC_URL + "] of connection[" + (i + 1) + "] is not set."); + } + // check table/querySql + List querySqlList = conn.getList(Key.QUERY_SQL); + if (querySqlList == null || querySqlList.isEmpty()) { + String querySql = conn.getString(Key.QUERY_SQL); + if (StringUtils.isBlank(querySql)) { + List table = conn.getList(Key.TABLE); + if (table == null || table.isEmpty()) + throw DataXException.asDataXException(TDengineReaderErrorCode.REQUIRED_VALUE, + "The parameter [" + Key.TABLE + "] of connection[" + (i + 1) + "] is not set."); + } + } + } + + SimpleDateFormat format = new SimpleDateFormat(DATETIME_FORMAT); + // check beginDateTime + String beginDatetime = this.originalConfig.getString(Key.BEGIN_DATETIME); + long start = Long.MIN_VALUE; + if (!StringUtils.isBlank(beginDatetime)) { + try { + start = format.parse(beginDatetime).getTime(); + } catch (ParseException e) { + throw DataXException.asDataXException(TDengineReaderErrorCode.ILLEGAL_VALUE, + "The parameter [" + Key.BEGIN_DATETIME + "] needs to conform to the [" + DATETIME_FORMAT + "] format."); + } + } + // check endDateTime + String endDatetime = this.originalConfig.getString(Key.END_DATETIME); + long end = Long.MAX_VALUE; + if (!StringUtils.isBlank(endDatetime)) { + try { + end = format.parse(endDatetime).getTime(); + } catch (ParseException e) { + throw DataXException.asDataXException(TDengineReaderErrorCode.ILLEGAL_VALUE, + "The parameter [" + Key.END_DATETIME + "] needs to conform to the [" + DATETIME_FORMAT + "] format."); + } + } + if (start >= end) + throw DataXException.asDataXException(TDengineReaderErrorCode.ILLEGAL_VALUE, + "The parameter [" + Key.BEGIN_DATETIME + "] should be less than the parameter [" + Key.END_DATETIME + "]."); + + } + + @Override + public void destroy() { + + } + + @Override + public List split(int adviceNumber) { + List configurations = new ArrayList<>(); + + List connectionList = this.originalConfig.getListConfiguration(Key.CONNECTION); + for (Configuration conn : connectionList) { + List jdbcUrlList = conn.getList(Key.JDBC_URL, String.class); + for (String jdbcUrl : jdbcUrlList) { + Configuration clone = this.originalConfig.clone(); + clone.set(Key.JDBC_URL, jdbcUrl); + clone.set(Key.TABLE, conn.getList(Key.TABLE)); + clone.set(Key.QUERY_SQL, conn.getList(Key.QUERY_SQL)); + clone.remove(Key.CONNECTION); + configurations.add(clone); + } + } + + LOG.info("Configuration: {}", configurations); + return configurations; + } + } + + public static class Task extends Reader.Task { + private static final Logger LOG = LoggerFactory.getLogger(Task.class); + + private Configuration readerSliceConfig; + private String mandatoryEncoding; + private Connection conn; + + private List tables; + private List columns; + private String startTime; + private String endTime; + private String where; + private List querySql; + + static { + try { + Class.forName("com.taosdata.jdbc.TSDBDriver"); + Class.forName("com.taosdata.jdbc.rs.RestfulDriver"); + } catch (ClassNotFoundException ignored) { + LOG.warn(ignored.getMessage(), ignored); + } + } + + @Override + public void init() { + this.readerSliceConfig = super.getPluginJobConf(); + + String user = readerSliceConfig.getString(Key.USERNAME); + String password = readerSliceConfig.getString(Key.PASSWORD); + + String url = readerSliceConfig.getString(Key.JDBC_URL); + try { + this.conn = DriverManager.getConnection(url, user, password); + } catch (SQLException e) { + throw DataXException.asDataXException(TDengineReaderErrorCode.CONNECTION_FAILED, + "The parameter [" + Key.JDBC_URL + "] : " + url + " failed to connect since: " + e.getMessage(), e); + } + + this.tables = readerSliceConfig.getList(Key.TABLE, String.class); + this.columns = readerSliceConfig.getList(Key.COLUMN, String.class); + this.startTime = readerSliceConfig.getString(Key.BEGIN_DATETIME); + this.endTime = readerSliceConfig.getString(Key.END_DATETIME); + this.where = readerSliceConfig.getString(Key.WHERE, "_c0 > " + Long.MIN_VALUE); + this.querySql = readerSliceConfig.getList(Key.QUERY_SQL, String.class); + this.mandatoryEncoding = readerSliceConfig.getString(Key.MANDATORY_ENCODING, "UTF-8"); + } + + @Override + public void destroy() { + try { + if (conn != null) + conn.close(); + } catch (SQLException e) { + LOG.error(e.getMessage(), e); + } + } + + @Override + public void startRead(RecordSender recordSender) { + List sqlList = new ArrayList<>(); + + if (querySql == null || querySql.isEmpty()) { + for (String table : tables) { + StringBuilder sb = new StringBuilder(); + sb.append("select ").append(StringUtils.join(columns, ",")).append(" from ").append(table).append(" "); + sb.append("where ").append(where); + if (!StringUtils.isBlank(startTime)) { + sb.append(" and _c0 >= '").append(startTime).append("'"); + } + if (!StringUtils.isBlank(endTime)) { + sb.append(" and _c0 < '").append(endTime).append("'"); + } + String sql = sb.toString().trim(); + sqlList.add(sql); + } + } else { + sqlList.addAll(querySql); + } + + for (String sql : sqlList) { + try (Statement stmt = conn.createStatement()) { + ResultSet rs = stmt.executeQuery(sql); + while (rs.next()) { + Record record = buildRecord(recordSender, rs, mandatoryEncoding); + recordSender.sendToWriter(record); + } + } catch (SQLException e) { + LOG.error(e.getMessage(), e); + } + } + } + + private Record buildRecord(RecordSender recordSender, ResultSet rs, String mandatoryEncoding) { + Record record = recordSender.createRecord(); + try { + ResultSetMetaData metaData = rs.getMetaData(); + for (int i = 1; i <= metaData.getColumnCount(); i++) { + int columnType = metaData.getColumnType(i); + switch (columnType) { + case Types.SMALLINT: + case Types.TINYINT: + case Types.INTEGER: + case Types.BIGINT: + record.addColumn(new LongColumn(rs.getString(i))); + break; + case Types.FLOAT: + case Types.DOUBLE: + record.addColumn(new DoubleColumn(rs.getString(i))); + break; + case Types.BOOLEAN: + record.addColumn(new BoolColumn(rs.getBoolean(i))); + break; + case Types.TIMESTAMP: + record.addColumn(new DateColumn(rs.getTimestamp(i))); + break; + case Types.BINARY: + record.addColumn(new BytesColumn(rs.getBytes(i))); + break; + case Types.NCHAR: + String rawData; + if (StringUtils.isBlank(mandatoryEncoding)) { + rawData = rs.getString(i); + } else { + rawData = new String((rs.getBytes(i) == null ? new byte[0] : rs.getBytes(i)), mandatoryEncoding); + } + record.addColumn(new StringColumn(rawData)); + break; + } + } + } catch (SQLException e) { + throw DataXException.asDataXException(TDengineReaderErrorCode.ILLEGAL_VALUE, "database query error!", e); + } catch (UnsupportedEncodingException e) { + throw DataXException.asDataXException(TDengineReaderErrorCode.ILLEGAL_VALUE, "illegal mandatoryEncoding", e); + } + return record; + } + } + + +} diff --git a/tdenginereader/src/main/java/com/alibaba/datax/plugin/reader/TDengineReaderErrorCode.java b/tdenginereader/src/main/java/com/alibaba/datax/plugin/reader/TDengineReaderErrorCode.java new file mode 100644 index 00000000..b784ab06 --- /dev/null +++ b/tdenginereader/src/main/java/com/alibaba/datax/plugin/reader/TDengineReaderErrorCode.java @@ -0,0 +1,34 @@ +package com.alibaba.datax.plugin.reader; + +import com.alibaba.datax.common.spi.ErrorCode; + +public enum TDengineReaderErrorCode implements ErrorCode { + + REQUIRED_VALUE("TDengineReader-00", "parameter value is missing"), + ILLEGAL_VALUE("TDengineReader-01", "invalid parameter value"), + CONNECTION_FAILED("TDengineReader-02", "connection error"), + RUNTIME_EXCEPTION("TDengineWriter-03", "runtime exception"); + + private final String code; + private final String description; + + TDengineReaderErrorCode(String code, String description) { + this.code = code; + this.description = description; + } + + @Override + public String getCode() { + return this.code; + } + + @Override + public String getDescription() { + return this.description; + } + + @Override + public String toString() { + return String.format("Code:[%s], Description:[%s]. ", this.code, this.description); + } +} diff --git a/tdenginereader/src/main/resources/plugin.json b/tdenginereader/src/main/resources/plugin.json new file mode 100755 index 00000000..7ccdbe63 --- /dev/null +++ b/tdenginereader/src/main/resources/plugin.json @@ -0,0 +1,9 @@ +{ + "name": "tdenginereader", + "class": "com.alibaba.datax.plugin.reader.TDengineReader", + "description": { + "useScene": "data migration from tdengine", + "mechanism": "use JDBC to read data from tdengine." + }, + "developer": "zyyang-taosdata" +} \ No newline at end of file diff --git a/tdenginereader/src/main/resources/plugin_job_template.json b/tdenginereader/src/main/resources/plugin_job_template.json new file mode 100644 index 00000000..934fe96a --- /dev/null +++ b/tdenginereader/src/main/resources/plugin_job_template.json @@ -0,0 +1,23 @@ +{ + "name": "tdenginereader", + "parameter": { + "user": "", + "password": "", + "connection": [ + { + "table": [ + "" + ], + "jdbcUrl": [ + "" + ] + } + ], + "column": [ + "" + ], + "beginDateTime": "", + "endDateTime": "", + "where": "" + } +} \ No newline at end of file diff --git a/tdenginereader/src/test/java/com/alibaba/datax/plugin/reader/TDengine2DMTest.java b/tdenginereader/src/test/java/com/alibaba/datax/plugin/reader/TDengine2DMTest.java new file mode 100644 index 00000000..e1064717 --- /dev/null +++ b/tdenginereader/src/test/java/com/alibaba/datax/plugin/reader/TDengine2DMTest.java @@ -0,0 +1,86 @@ +package com.alibaba.datax.plugin.reader; + +import com.alibaba.datax.core.Engine; +import org.junit.Ignore; +import org.junit.Test; + +import java.sql.Connection; +import java.sql.DriverManager; +import java.sql.SQLException; +import java.sql.Statement; +import java.util.Random; + +@Ignore +public class TDengine2DMTest { + private static final String host1 = "192.168.56.105"; + private static final String host2 = "192.168.0.72"; + + private final Random random = new Random(System.currentTimeMillis()); + + @Test + public void t2dm_case01() throws Throwable { + // given + createSupTable("ms"); + + // when + String[] params = {"-mode", "standalone", "-jobid", "-1", "-job", "src/test/resources/t2dm.json"}; + System.setProperty("datax.home", "../target/datax/datax"); + Engine.entry(params); + } + + @Test + public void t2dm_case02() throws Throwable { + // given + createSupTable("us"); + + // when + String[] params = {"-mode", "standalone", "-jobid", "-1", "-job", "src/test/resources/t2dm.json"}; + System.setProperty("datax.home", "../target/datax/datax"); + Engine.entry(params); + } + + @Test + public void t2dm_case03() throws Throwable { + // given + createSupTable("ns"); + + // when + String[] params = {"-mode", "standalone", "-jobid", "-1", "-job", "src/test/resources/t2dm.json"}; + System.setProperty("datax.home", "../target/datax/datax"); + Engine.entry(params); + } + + private void createSupTable(String precision) throws SQLException { + final String url = "jdbc:TAOS-RS://" + host1 + ":6041/"; + try (Connection conn = DriverManager.getConnection(url, "root", "taosdata")) { + Statement stmt = conn.createStatement(); + + stmt.execute("drop database if exists db1"); + stmt.execute("create database if not exists db1 precision '" + precision + "'"); + stmt.execute("create table db1.stb1(ts timestamp, f1 tinyint, f2 smallint, f3 int, f4 bigint, f5 float, " + + "f6 double, f7 bool, f8 binary(100), f9 nchar(100)) tags(t1 timestamp, t2 tinyint, t3 smallint, " + + "t4 int, t5 bigint, t6 float, t7 double, t8 bool, t9 binary(100), t10 nchar(100))"); + + for (int i = 1; i <= 10; i++) { + stmt.execute("insert into db1.tb" + i + " using db1.stb1 tags(now, " + random.nextInt(10) + "," + + random.nextInt(10) + "," + random.nextInt(10) + "," + random.nextInt(10) + "," + + random.nextFloat() + "," + random.nextDouble() + "," + random.nextBoolean() + ",'abcABC123'," + + "'北京朝阳望京') values(now+" + i + "s, " + random.nextInt(10) + "," + random.nextInt(10) + "," + + +random.nextInt(10) + "," + random.nextInt(10) + "," + random.nextFloat() + "," + + random.nextDouble() + "," + random.nextBoolean() + ",'abcABC123','北京朝阳望京')"); + } + stmt.close(); + } + + final String url2 = "jdbc:dm://" + host2 + ":5236"; + try (Connection conn = DriverManager.getConnection(url2, "TESTUSER", "test123456")) { + conn.setAutoCommit(true); + Statement stmt = conn.createStatement(); + stmt.execute("drop table if exists stb2"); + stmt.execute("create table stb2(ts timestamp, f1 tinyint, f2 smallint, f3 int, f4 bigint, f5 float, " + + "f6 double, f7 BIT, f8 VARCHAR(100), f9 VARCHAR2(200), t1 timestamp, t2 tinyint, t3 smallint, " + + "t4 int, t5 bigint, t6 float, t7 double, t8 BIT, t9 VARCHAR(100), t10 VARCHAR2(200))"); + } + } + +} diff --git a/tdenginereader/src/test/java/com/alibaba/datax/plugin/reader/TDengine2StreamTest.java b/tdenginereader/src/test/java/com/alibaba/datax/plugin/reader/TDengine2StreamTest.java new file mode 100644 index 00000000..f628a648 --- /dev/null +++ b/tdenginereader/src/test/java/com/alibaba/datax/plugin/reader/TDengine2StreamTest.java @@ -0,0 +1,66 @@ +package com.alibaba.datax.plugin.reader; + +import com.alibaba.datax.core.Engine; +import org.junit.Ignore; +import org.junit.Test; + +import java.sql.Connection; +import java.sql.DriverManager; +import java.sql.SQLException; +import java.sql.Statement; +import java.util.Random; + +@Ignore +public class TDengine2StreamTest { + + private static final String host = "192.168.56.105"; + private static final Random random = new Random(System.currentTimeMillis()); + + @Test + public void case01() throws Throwable { + // given + prepare("ms"); + + // when + String[] params = {"-mode", "standalone", "-jobid", "-1", "-job", "src/test/resources/t2stream-1.json"}; + System.setProperty("datax.home", "../target/datax/datax"); + Engine.entry(params); + } + + @Test + public void case02() throws Throwable { + // given + prepare("ms"); + + // when + String[] params = {"-mode", "standalone", "-jobid", "-1", "-job", "src/test/resources/t2stream-2.json"}; + System.setProperty("datax.home", "../target/datax/datax"); + Engine.entry(params); + } + + + private void prepare(String precision) throws SQLException { + final String url = "jdbc:TAOS-RS://" + host + ":6041/"; + try (Connection conn = DriverManager.getConnection(url, "root", "taosdata")) { + Statement stmt = conn.createStatement(); + + stmt.execute("drop database if exists db1"); + stmt.execute("create database if not exists db1 precision '" + precision + "'"); + stmt.execute("create table db1.stb1(ts timestamp, f1 tinyint, f2 smallint, f3 int, f4 bigint, f5 float, " + + "f6 double, f7 bool, f8 binary(100), f9 nchar(100)) tags(t1 timestamp, t2 tinyint, t3 smallint, " + + "t4 int, t5 bigint, t6 float, t7 double, t8 bool, t9 binary(100), t10 nchar(100))"); + + for (int i = 1; i <= 10; i++) { + stmt.execute("insert into db1.tb" + i + " using db1.stb1 tags(now, " + random.nextInt(10) + "," + + random.nextInt(10) + "," + random.nextInt(10) + "," + random.nextInt(10) + "," + + random.nextFloat() + "," + random.nextDouble() + "," + random.nextBoolean() + ",'abcABC123'," + + "'北京朝阳望京') values(now+" + i + "s, " + random.nextInt(10) + "," + random.nextInt(10) + "," + + +random.nextInt(10) + "," + random.nextInt(10) + "," + random.nextFloat() + "," + + random.nextDouble() + "," + random.nextBoolean() + ",'abcABC123','北京朝阳望京')"); + } + stmt.close(); + } + } + + +} diff --git a/tdenginereader/src/test/java/com/alibaba/datax/plugin/reader/TDengineReaderTest.java b/tdenginereader/src/test/java/com/alibaba/datax/plugin/reader/TDengineReaderTest.java new file mode 100644 index 00000000..491ddbaf --- /dev/null +++ b/tdenginereader/src/test/java/com/alibaba/datax/plugin/reader/TDengineReaderTest.java @@ -0,0 +1,153 @@ +package com.alibaba.datax.plugin.reader; + +import com.alibaba.datax.common.util.Configuration; +import com.alibaba.datax.plugin.writer.tdenginewriter.Key; +import org.junit.Assert; +import org.junit.Test; + +import java.util.List; + +public class TDengineReaderTest { + + @Test + public void jobInit_case01() { + // given + TDengineReader.Job job = new TDengineReader.Job(); + Configuration configuration = Configuration.from("{" + + "\"username\": \"root\"," + + "\"password\": \"taosdata\"," + + "\"connection\": [{\"table\":[\"weather\"],\"jdbcUrl\":[\"jdbc:TAOS-RS://master:6041/test\"]}]," + + "\"column\": [\"ts\",\"current\",\"voltage\",\"phase\"]," + + "\"where\":\"_c0 > 0\"," + + "\"beginDateTime\": \"2021-01-01 00:00:00\"," + + "\"endDateTime\": \"2021-01-01 12:00:00\"" + + "}"); + job.setPluginJobConf(configuration); + + // when + job.init(); + + // assert + Configuration conf = job.getPluginJobConf(); + + Assert.assertEquals("root", conf.getString(Key.USERNAME)); + Assert.assertEquals("taosdata", conf.getString("password")); + Assert.assertEquals("weather", conf.getString("connection[0].table[0]")); + Assert.assertEquals("jdbc:TAOS-RS://master:6041/test", conf.getString("connection[0].jdbcUrl[0]")); + Assert.assertEquals("2021-01-01 00:00:00", conf.getString("beginDateTime")); + Assert.assertEquals("2021-01-01 12:00:00", conf.getString("endDateTime")); + Assert.assertEquals("_c0 > 0", conf.getString("where")); + } + + + @Test + public void jobInit_case02() { + // given + TDengineReader.Job job = new TDengineReader.Job(); + Configuration configuration = Configuration.from("{" + + "\"username\": \"root\"," + + "\"password\": \"taosdata\"," + + "\"connection\": [{\"querySql\":[\"select * from weather\"],\"jdbcUrl\":[\"jdbc:TAOS-RS://master:6041/test\"]}]," + + "}"); + job.setPluginJobConf(configuration); + + // when + job.init(); + + // assert + Configuration conf = job.getPluginJobConf(); + + Assert.assertEquals("root", conf.getString(Key.USERNAME)); + Assert.assertEquals("taosdata", conf.getString("password")); + Assert.assertEquals("jdbc:TAOS-RS://master:6041/test", conf.getString("connection[0].jdbcUrl[0]")); + Assert.assertEquals("select * from weather", conf.getString("connection[0].querySql[0]")); + } + + @Test + public void jobSplit_case01() { + // given + TDengineReader.Job job = new TDengineReader.Job(); + Configuration configuration = Configuration.from("{" + + "\"username\": \"root\"," + + "\"password\": \"taosdata\"," + + "\"connection\": [{\"table\":[\"weather\"],\"jdbcUrl\":[\"jdbc:TAOS-RS://master:6041/test\"]}]," + + "\"column\": [\"ts\",\"current\",\"voltage\",\"phase\"]," + + "\"where\":\"_c0 > 0\"," + + "\"beginDateTime\": \"2021-01-01 00:00:00\"," + + "\"endDateTime\": \"2021-01-01 12:00:00\"" + + "}"); + job.setPluginJobConf(configuration); + + // when + job.init(); + List configurationList = job.split(1); + + // assert + Assert.assertEquals(1, configurationList.size()); + Configuration conf = configurationList.get(0); + Assert.assertEquals("root", conf.getString("username")); + Assert.assertEquals("taosdata", conf.getString("password")); + Assert.assertEquals("_c0 > 0", conf.getString("where")); + Assert.assertEquals("weather", conf.getString("table[0]")); + Assert.assertEquals("jdbc:TAOS-RS://master:6041/test", conf.getString("jdbcUrl")); + + } + + @Test + public void jobSplit_case02() { + // given + TDengineReader.Job job = new TDengineReader.Job(); + Configuration configuration = Configuration.from("{" + + "\"username\": \"root\"," + + "\"password\": \"taosdata\"," + + "\"connection\": [{\"querySql\":[\"select * from weather\"],\"jdbcUrl\":[\"jdbc:TAOS-RS://master:6041/test\"]}]," + + "\"column\": [\"ts\",\"current\",\"voltage\",\"phase\"]," + + "}"); + job.setPluginJobConf(configuration); + + // when + job.init(); + List configurationList = job.split(1); + + // assert + Assert.assertEquals(1, configurationList.size()); + Configuration conf = configurationList.get(0); + Assert.assertEquals("root", conf.getString("username")); + Assert.assertEquals("taosdata", conf.getString("password")); + Assert.assertEquals("select * from weather", conf.getString("querySql[0]")); + Assert.assertEquals("jdbc:TAOS-RS://master:6041/test", conf.getString("jdbcUrl")); + } + + @Test + public void jobSplit_case03() { + // given + TDengineReader.Job job = new TDengineReader.Job(); + Configuration configuration = Configuration.from("{" + + "\"username\": \"root\"," + + "\"password\": \"taosdata\"," + + "\"connection\": [{\"querySql\":[\"select * from weather\",\"select * from test.meters\"],\"jdbcUrl\":[\"jdbc:TAOS-RS://master:6041/test\", \"jdbc:TAOS://master:6030/test\"]}]," + + "\"column\": [\"ts\",\"current\",\"voltage\",\"phase\"]," + + "}"); + job.setPluginJobConf(configuration); + + // when + job.init(); + List configurationList = job.split(1); + + // assert + Assert.assertEquals(2, configurationList.size()); + Configuration conf = configurationList.get(0); + Assert.assertEquals("root", conf.getString("username")); + Assert.assertEquals("taosdata", conf.getString("password")); + Assert.assertEquals("select * from weather", conf.getString("querySql[0]")); + Assert.assertEquals("jdbc:TAOS-RS://master:6041/test", conf.getString("jdbcUrl")); + + Configuration conf1 = configurationList.get(1); + Assert.assertEquals("root", conf1.getString("username")); + Assert.assertEquals("taosdata", conf1.getString("password")); + Assert.assertEquals("select * from weather", conf1.getString("querySql[0]")); + Assert.assertEquals("select * from test.meters", conf1.getString("querySql[1]")); + Assert.assertEquals("jdbc:TAOS://master:6030/test", conf1.getString("jdbcUrl")); + } + +} \ No newline at end of file diff --git a/tdenginereader/src/test/resources/t2dm.json b/tdenginereader/src/test/resources/t2dm.json new file mode 100644 index 00000000..d87ade0c --- /dev/null +++ b/tdenginereader/src/test/resources/t2dm.json @@ -0,0 +1,52 @@ +{ + "job": { + "content": [ + { + "reader": { + "name": "tdenginereader", + "parameter": { + "username": "root", + "password": "taosdata", + "column": [ + "*" + ], + "connection": [ + { + "table": [ + "stb1" + ], + "jdbcUrl": [ + "jdbc:TAOS-RS://192.168.56.105:6041/db1" + ] + } + ] + } + }, + "writer": { + "name": "rdbmswriter", + "parameter": { + "connection": [ + { + "table": [ + "stb2" + ], + "jdbcUrl": "jdbc:dm://192.168.0.72:5236" + } + ], + "username": "TESTUSER", + "password": "test123456", + "table": "stb2", + "column": [ + "*" + ] + } + } + } + ], + "setting": { + "speed": { + "channel": 1 + } + } + } +} \ No newline at end of file diff --git a/tdenginereader/src/test/resources/t2stream-1.json b/tdenginereader/src/test/resources/t2stream-1.json new file mode 100644 index 00000000..183ab7e2 --- /dev/null +++ b/tdenginereader/src/test/resources/t2stream-1.json @@ -0,0 +1,47 @@ +{ + "job": { + "content": [ + { + "reader": { + "name": "tdenginereader", + "parameter": { + "username": "root", + "password": "taosdata", + "column": [ + "ts", + "f1", + "f2", + "t1", + "t2" + ], + "connection": [ + { + "table": [ + "stb1" + ], + "jdbcUrl": [ + "jdbc:TAOS-RS://192.168.56.105:6041/db1" + ] + } + ], + "where": "t10 = '北京朝阳望京'", + "beginDateTime": "2022-03-07 12:00:00", + "endDateTime": "2022-03-07 19:00:00" + } + }, + "writer": { + "name": "streamwriter", + "parameter": { + "encoding": "UTF-8", + "print": true + } + } + } + ], + "setting": { + "speed": { + "channel": 1 + } + } + } +} \ No newline at end of file diff --git a/tdenginereader/src/test/resources/t2stream-2.json b/tdenginereader/src/test/resources/t2stream-2.json new file mode 100644 index 00000000..15bfe9be --- /dev/null +++ b/tdenginereader/src/test/resources/t2stream-2.json @@ -0,0 +1,37 @@ +{ + "job": { + "content": [ + { + "reader": { + "name": "tdenginereader", + "parameter": { + "username": "root", + "password": "taosdata", + "connection": [ + { + "querySql": [ + "select * from stb1 where t10 = '北京朝阳望京' and _c0 >= '2022-03-07 12:00:00' and _c0 < '2022-03-07 19:00:00'" + ], + "jdbcUrl": [ + "jdbc:TAOS-RS://192.168.56.105:6041/db1" + ] + } + ] + } + }, + "writer": { + "name": "streamwriter", + "parameter": { + "encoding": "UTF-8", + "print": true + } + } + } + ], + "setting": { + "speed": { + "channel": 1 + } + } + } +} \ No newline at end of file diff --git a/tdenginewriter/doc/tdenginewriter-CN.md b/tdenginewriter/doc/tdenginewriter-CN.md new file mode 100644 index 00000000..3d115fb7 --- /dev/null +++ b/tdenginewriter/doc/tdenginewriter-CN.md @@ -0,0 +1,245 @@ +# DataX TDengineWriter + +简体中文| [English](./tdenginewriter.md) + +## 1 快速介绍 + +TDengineWriter插件实现了写入数据到TDengine数据库目标表的功能。底层实现上,TDengineWriter通过JDBC连接TDengine,按照TDengine的SQL语法,执行insert语句/schemaless语句,将数据写入TDengine。 + +TDengineWriter可以作为数据迁移工具供DBA将其它数据库的数据导入到TDengine。 + + + +## 2 实现原理 + +TDengineWriter 通过 DataX 框架获取 Reader生成的协议数据,通过JDBC Driver连接TDengine,执行insert语句/schemaless语句,将数据写入TDengine。 + +在TDengine中,table可以分成超级表、子表、普通表三种类型,超级表和子表包括colum和tag,子表的tag列的值为固定值,普通表与关系型数据库中表的概念一致。(详细请参考:[数据模型](https://www.taosdata.com/docs/cn/v2.0/architecture#model) ) + +TDengineWriter支持向超级表、子表、普通表中写入数据,按照table的类型和column参数中是否包含tbname,使用以下方法进行写入: + +1. table为超级表,column中指定tbname:使用自动建表的insert语句,使用tbname作为子表的名称。 +2. table为超级表,column中未指定tbname:使用schemaless写入,TDengine会根据超级表名、tag值计算一个子表名称。 +3. table为子表:使用insert语句写入,ignoreTagUnmatched参数为true时,忽略record中tag值与table的tag值不一致的数据。 +4. table为普通表:使用insert语句写入。 + + + +## 3 功能说明 +### 3.1 配置样例 + +配置一个写入TDengine的作业 +先在TDengine上创建超级表: + +```sql +create database if not exists test; +create table test.weather (ts timestamp, temperature int, humidity double) tags(is_normal bool, device_id binary(100), address nchar(100)); +``` + +使用下面的Job配置,将数据写入TDengine: + +```json +{ + "job": { + "content": [ + { + "reader": { + "name": "streamreader", + "parameter": { + "column": [ + { + "type": "string", + "value": "tb1" + }, + { + "type": "date", + "value": "2022-02-20 12:00:01" + }, + { + "type": "long", + "random": "0, 10" + }, + { + "type": "double", + "random": "0, 10" + }, + { + "type": "bool", + "random": "0, 50" + }, + { + "type": "bytes", + "value": "abcABC123" + }, + { + "type": "string", + "value": "北京朝阳望京" + } + ], + "sliceRecordCount": 1 + } + }, + "writer": { + "name": "tdenginewriter", + "parameter": { + "username": "root", + "password": "taosdata", + "column": [ + "tbname", + "ts", + "temperature", + "humidity", + "is_normal", + "device_id", + "address" + ], + "connection": [ + { + "table": [ + "weather" + ], + "jdbcUrl": "jdbc:TAOS-RS://192.168.56.105:6041/test" + } + ], + "batchSize": 100, + "ignoreTagsUnmatched": true + } + } + } + ], + "setting": { + "speed": { + "channel": 1 + } + } + } +} +``` + +### 3.2 参数说明 + +* jdbcUrl + * 描述:数据源的JDBC连接信息,TDengine的JDBC信息请参考:[Java连接器的使用](https://www.taosdata.com/docs/cn/v2.0/connector/java#url) + * 必选:是 + * 默认值:无 +* username + * 描述:用户名 + * 必选:是 + * 默认值:无 +* password + * 描述:用户名的密码 + * 必选:是 + * 默认值:无 +* table + * 描述:表名的集合,table应该包含column参数中的所有列(tbname除外)。注意,column中的tbname会被当作TDengine中子表名使用。 + * 必选:是 + * 默认值:无 +* column + * 描述:字段名的集合,字段的顺序应该与record中column的 + * 必选:是 + * 默认值:无 +* batchSize + * 描述:每batchSize条record为一个batch进行写入 + * 必选:否 + * 默认值:1 +* ignoreTagsUnmatched + * 描述:当table为TDengine中的一张子表,table具有tag值。如果数据的tag值与table的tag值不想等,数据不写入到table中。 + * 必选:否 + * 默认值:false + + +### 3.3 类型转换 + +datax中的数据类型,可以映射到TDengine的数据类型 + +| DataX 内部类型 | TDengine 数据类型 | +| -------------- | ----------------------------------------- | +| INT | TINYINT, SMALLINT, INT | +| LONG | TIMESTAMP, TINYINT, SMALLINT, INT, BIGINT | +| DOUBLE | FLOAT, DOUBLE | +| STRING | TIMESTAMP, BINARY, NCHAR | +| BOOL | BOOL | +| DATE | TIMESTAMP | +| BYTES | BINARY | + + + +### 3.4 各数据源到TDengine的参考示例 + +下面是一些数据源到TDengine进行数据迁移的示例 + +| 数据迁移示例 | 配置的示例 | +| ------------------ | ------------------------------------------------------------ | +| TDengine到TDengine | [超级表到超级表,指定tbname](../src/test/resources/t2t-1.json) | +| TDengine到TDengine | [超级表到超级表,不指定tbname](../src/test/resources/t2t-2.json) | +| TDengine到TDengine | [超级表到子表](../src/test/resources/t2t-3.json) | +| TDengine到TDengine | [普通表到普通表](../src/test/resources/t2t-4.json) | +| RDBMS到TDengine | [普通表到超级表,指定tbname](../src/test/resources/dm2t-1.json) | +| RDBMS到TDengine | [普通表到超级表,不指定tbname](../src/test/resources/dm2t-3.json) | +| RDBMS到TDengine | [普通表到子表](../src/test/resources/dm2t-2.json) | +| RDBMS到TDengine | [普通表到普通表](../src/test/resources/dm2t-4.json) | +| OpenTSDB到TDengine | [metric到普通表](../src/test/resources/o2t-1.json) | + + + + +## 4 性能报告 + +### 4.1 环境准备 + +#### 4.1.1 数据特征 + +建表语句: + +单行记录类似于: + +#### 4.1.2 机器参数 + +* 执行DataX的机器参数为: + 1. cpu: + 2. mem: + 3. net: 千兆双网卡 + 4. disc: DataX 数据不落磁盘,不统计此项 + +* TDengine数据库机器参数为: + 1. cpu: + 2. mem: + 3. net: 千兆双网卡 + 4. disc: + +#### 4.1.3 DataX jvm 参数 + + -Xms1024m -Xmx1024m -XX:+HeapDumpOnOutOfMemoryError + +### 4.2 测试报告 + +#### 4.2.1 单表测试报告 + +| 通道数 | DataX速度(Rec/s) | DataX流量(MB/s) | DataX机器网卡流出流量(MB/s) | DataX机器运行负载 | DB网卡进入流量(MB/s) | DB运行负载 | DB TPS | +| ------ | ---------------- | --------------- | --------------------------- | ----------------- | -------------------- | ---------- | ------ | +| 1 | | | | | | | | +| 4 | | | | | | | | +| 8 | | | | | | | | +| 16 | | | | | | | | +| 32 | | | | | | | | + +说明: + +1. + +#### 4.2.4 性能测试小结 + + + + +## 5 约束限制 + +1. + + + +## FAQ + +### 源表和目标表的字段顺序一致吗? + +是的,TDengineWriter按照column中字段的顺序解析来自datax的数据。 diff --git a/tdenginewriter/doc/tdenginewriter.md b/tdenginewriter/doc/tdenginewriter.md new file mode 100644 index 00000000..ba20fdb7 --- /dev/null +++ b/tdenginewriter/doc/tdenginewriter.md @@ -0,0 +1,196 @@ +# DataX TDengineWriter + +[简体中文](./tdenginewriter-CN.md) | English + +## 1 Quick Introduction + +The TDengineWriter plugin enables writing data to the target table of the TDengine database. At the bottom level, TDengineWriter connects TDengine through JDBC, executes insert statement /schemaless statement according to TDengine SQL syntax, and writes data to TDengine. + +TDengineWriter can be used as a data migration tool for DBAs to import data from other databases into TDengine. + + + +## 2 Implementation + +TDengineWriter obtains the protocol data generated by Reader through DataX framework, connects to TDengine through JDBC Driver, executes insert statement /schemaless statement, and writes the data to TDengine. + +In TDengine, table can be divided into super table, sub-table and ordinary table. Super table and sub-table include Colum and Tag. The value of tag column of sub-table is fixed value. (details please refer to: [data model](https://www.taosdata.com/docs/cn/v2.0/architecture#model)) + +The TDengineWriter can write data to super tables, sub-tables, and ordinary tables using the following methods based on the type of the table and whether the column parameter contains TBName: + +1. Table is a super table and column specifies tbname: use the automatic insert statement to create the table and use tbname as the name of the sub-table. + +2. Table is a super table and column does not contain tbname: use schemaless to write the table. TDengine will auto-create a tbname based on the super table name and tag value. + +3. Table is a sub-table: Use insert statement to write, ignoreTagUnmatched parameter is true, ignore data in record whose tag value is inconsistent with that of table. + +4. Table is a common table: use insert statement to write data. + + +## 3 Features Introduction +### 3.1 Sample +Configure a job to write to TDengine + +Create a supertable on TDengine: + +```sql +create database if not exists test; +create table test.weather (ts timestamp, temperature int, humidity double) tags(is_normal bool, device_id binary(100), address nchar(100)); +``` + +Write data to TDengine using the following Job configuration: + +```json +{ + "job": { + "content": [ + { + "reader": { + "name": "streamreader", + "parameter": { + "column": [ + { + "type": "string", + "value": "tb1" + }, + { + "type": "date", + "value": "2022-02-20 12:00:01" + }, + { + "type": "long", + "random": "0, 10" + }, + { + "type": "double", + "random": "0, 10" + }, + { + "type": "bool", + "random": "0, 50" + }, + { + "type": "bytes", + "value": "abcABC123" + }, + { + "type": "string", + "value": "北京朝阳望京" + } + ], + "sliceRecordCount": 1 + } + }, + "writer": { + "name": "tdenginewriter", + "parameter": { + "username": "root", + "password": "taosdata", + "column": [ + "tbname", + "ts", + "temperature", + "humidity", + "is_normal", + "device_id", + "address" + ], + "connection": [ + { + "table": [ + "weather" + ], + "jdbcUrl": "jdbc:TAOS-RS://192.168.56.105:6041/test" + } + ], + "batchSize": 100, + "ignoreTagsUnmatched": true + } + } + } + ], + "setting": { + "speed": { + "channel": 1 + } + } + } +} +``` + +### 3.2 Configuration + +* jdbcUrl + * Descrption: Data source JDBC connection information, TDengine JDBC information please refer to: [Java connector](https://www.taosdata.com/docs/cn/v2.0/connector/java#url) + * Required: yes + * Default: none +* username + * Descrption: username + * Required: yes + * Default: none + +* password + * Descrption: password of username + * Required: yes + * Default: none + +* table + * Descrption: A list of table names that should contain all of the columns in the column parameter (except tbname). Note that tbname in column is used as the TDengine sub-table name. + * Required: yes + * Default: none + +* column + * Descrption: A list of field names, the order of the fields should be the column in the record + * Required: yes + * Default: none + +* batchSize + * Descrption: Each batchSize record is written to a batch + * Required: no + * Default: 1 + +* ignoreTagsUnmatched + * Descrption: When table is a sub-table in TDengine, table has a tag value. If the tag value of the data and the tag value of the table are not equal, the data is not written to the table. + * Required: no + * Default: false + + +#### 3.3 Type Convert + +Data types in datax that can be mapped to data types in TDengine + +| DataX Type | TDengine Type | +| ---------- | ----------------------------------------- | +| INT | TINYINT, SMALLINT, INT | +| LONG | TIMESTAMP, TINYINT, SMALLINT, INT, BIGINT | +| DOUBLE | FLOAT, DOUBLE | +| STRING | TIMESTAMP, BINARY, NCHAR | +| BOOL | BOOL | +| DATE | TIMESTAMP | +| BYTES | BINARY | + +### 3.2 From MongoDB to TDengine + +Here are some examples of data sources migrating to TDengine + +| Sample | Configuration | +| -------------------- | ------------------------------------------------------------ | +| TDengine to TDengine | [super table to super table with tbname](../src/test/resources/t2t-1.json) | +| TDengine to TDengine | [super table to super table without tbname](../src/test/resources/t2t-2.json) | +| TDengine to TDengine | [super table to sub-table](../src/test/resources/t2t-3.json) | +| TDengine to TDengine | [table to table](../src/test/resources/t2t-4.json) | +| RDBMS to TDengine | [table to super table with tbname](../src/test/resources/dm2t-1.json) | +| RDBMS to TDengine | [table to super table without tbname](../src/test/resources/dm2t-2.json) | +| RDBMS to TDengine | [table to sub-table](../src/test/resources/dm2t-3.json) | +| RDBMS to TDengine | [table to table](../src/test/resources/dm2t-4.json) | +| OpenTSDB to TDengine | [metric to table](../src/test/resources/o2t-1.json) | + +## 4 Restriction + + + +## FAQ + +### Do columns in source table and columns in target table must be in the same order? + +Yes, TDengineWriter parses the data from the Datax in the order of the fields in the column. diff --git a/tdenginewriter/pom.xml b/tdenginewriter/pom.xml new file mode 100644 index 00000000..a7564e6b --- /dev/null +++ b/tdenginewriter/pom.xml @@ -0,0 +1,127 @@ + + + + datax-all + com.alibaba.datax + 0.0.1-SNAPSHOT + + 4.0.0 + + com.alibaba.datax.tdenginewriter + tdenginewriter + 0.0.1-SNAPSHOT + + + 8 + 8 + + + + + + com.taosdata.jdbc + taos-jdbcdriver + 2.0.39 + + + + org.apache.commons + commons-lang3 + ${commons-lang3-version} + + + + com.alibaba.datax + datax-common + ${datax-project-version} + + + slf4j-log4j12 + org.slf4j + + + + + + junit + junit + ${junit-version} + test + + + com.alibaba.datax + datax-core + 0.0.1-SNAPSHOT + test + + + mysql + mysql-connector-java + 5.1.49 + test + + + + + + + + + + + + + + + + + + + maven-compiler-plugin + + ${jdk-version} + ${jdk-version} + ${project-sourceEncoding} + + + + maven-assembly-plugin + + + src/main/assembly/package.xml + + datax + + + + dwzip + package + + single + + + + + + + org.apache.maven.plugins + maven-surefire-plugin + 2.12.4 + + + + **/*Test.java + + + + + true + + + + + + + \ No newline at end of file diff --git a/tdenginewriter/src/main/assembly/package.xml b/tdenginewriter/src/main/assembly/package.xml new file mode 100644 index 00000000..d3b75ea2 --- /dev/null +++ b/tdenginewriter/src/main/assembly/package.xml @@ -0,0 +1,34 @@ + + + + dir + + false + + + src/main/resources + + plugin.json + plugin_job_template.json + + plugin/writer/tdenginewriter + + + target/ + + tdenginewriter-0.0.1-SNAPSHOT.jar + + plugin/writer/tdenginewriter + + + + + + false + plugin/writer/tdenginewriter/libs + runtime + + + diff --git a/tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/tdenginewriter/ColumnMeta.java b/tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/tdenginewriter/ColumnMeta.java new file mode 100644 index 00000000..5c77eccd --- /dev/null +++ b/tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/tdenginewriter/ColumnMeta.java @@ -0,0 +1,24 @@ +package com.alibaba.datax.plugin.writer.tdenginewriter; + +public class ColumnMeta { + String field; + String type; + int length; + String note; + boolean isTag; + boolean isPrimaryKey; + Object value; + + @Override + public String toString() { + return "ColumnMeta{" + + "field='" + field + '\'' + + ", type='" + type + '\'' + + ", length=" + length + + ", note='" + note + '\'' + + ", isTag=" + isTag + + ", isPrimaryKey=" + isPrimaryKey + + ", value=" + value + + '}'; + } +} diff --git a/tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/tdenginewriter/Constants.java b/tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/tdenginewriter/Constants.java new file mode 100644 index 00000000..d62c8f32 --- /dev/null +++ b/tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/tdenginewriter/Constants.java @@ -0,0 +1,8 @@ +package com.alibaba.datax.plugin.writer.tdenginewriter; + +public class Constants { + public static final String DEFAULT_USERNAME = "root"; + public static final String DEFAULT_PASSWORD = "taosdata"; + public static final int DEFAULT_BATCH_SIZE = 1; + public static final boolean DEFAULT_IGNORE_TAGS_UNMATCHED = false; +} \ No newline at end of file diff --git a/tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/tdenginewriter/DataHandler.java b/tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/tdenginewriter/DataHandler.java new file mode 100644 index 00000000..f22d4d6c --- /dev/null +++ b/tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/tdenginewriter/DataHandler.java @@ -0,0 +1,8 @@ +package com.alibaba.datax.plugin.writer.tdenginewriter; + +import com.alibaba.datax.common.plugin.RecordReceiver; +import com.alibaba.datax.common.plugin.TaskPluginCollector; + +public interface DataHandler { + int handle(RecordReceiver lineReceiver, TaskPluginCollector collector); +} diff --git a/tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/tdenginewriter/DefaultDataHandler.java b/tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/tdenginewriter/DefaultDataHandler.java new file mode 100644 index 00000000..27ade382 --- /dev/null +++ b/tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/tdenginewriter/DefaultDataHandler.java @@ -0,0 +1,569 @@ +package com.alibaba.datax.plugin.writer.tdenginewriter; + +import com.alibaba.datax.common.element.Column; +import com.alibaba.datax.common.element.Record; +import com.alibaba.datax.common.exception.DataXException; +import com.alibaba.datax.common.plugin.RecordReceiver; +import com.alibaba.datax.common.plugin.TaskPluginCollector; +import com.alibaba.datax.common.util.Configuration; +import com.taosdata.jdbc.SchemalessWriter; +import com.taosdata.jdbc.enums.SchemalessProtocolType; +import com.taosdata.jdbc.enums.SchemalessTimestampType; +import com.taosdata.jdbc.utils.Utils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.sql.*; +import java.util.*; +import java.util.Date; +import java.util.stream.Collectors; +import java.util.stream.IntStream; + +public class DefaultDataHandler implements DataHandler { + private static final Logger LOG = LoggerFactory.getLogger(DefaultDataHandler.class); + + static { + try { + Class.forName("com.taosdata.jdbc.TSDBDriver"); + Class.forName("com.taosdata.jdbc.rs.RestfulDriver"); + } catch (ClassNotFoundException e) { + LOG.error(e.getMessage(), e); + } + } + + private final TaskPluginCollector taskPluginCollector; + private String username; + private String password; + private String jdbcUrl; + private int batchSize; + private boolean ignoreTagsUnmatched; + + private List tables; + private List columns; + + private Map tableMetas; + private SchemaManager schemaManager; + + public void setTableMetas(Map tableMetas) { + this.tableMetas = tableMetas; + } + + public void setTbnameColumnMetasMap(Map> tbnameColumnMetasMap) { + this.tbnameColumnMetasMap = tbnameColumnMetasMap; + } + + public void setSchemaManager(SchemaManager schemaManager) { + this.schemaManager = schemaManager; + } + + private Map> tbnameColumnMetasMap; + + public DefaultDataHandler(Configuration configuration, TaskPluginCollector taskPluginCollector) { + this.username = configuration.getString(Key.USERNAME, Constants.DEFAULT_USERNAME); + this.password = configuration.getString(Key.PASSWORD, Constants.DEFAULT_PASSWORD); + this.jdbcUrl = configuration.getString(Key.JDBC_URL); + this.batchSize = configuration.getInt(Key.BATCH_SIZE, Constants.DEFAULT_BATCH_SIZE); + this.tables = configuration.getList(Key.TABLE, String.class); + this.columns = configuration.getList(Key.COLUMN, String.class); + this.ignoreTagsUnmatched = configuration.getBool(Key.IGNORE_TAGS_UNMATCHED, Constants.DEFAULT_IGNORE_TAGS_UNMATCHED); + this.taskPluginCollector = taskPluginCollector; + } + + @Override + public int handle(RecordReceiver lineReceiver, TaskPluginCollector collector) { + int count = 0; + int affectedRows = 0; + + try (Connection conn = DriverManager.getConnection(jdbcUrl, username, password)) { + LOG.info("connection[ jdbcUrl: " + jdbcUrl + ", username: " + username + "] established."); + // prepare table_name -> table_meta + this.schemaManager = new SchemaManager(conn); + this.tableMetas = schemaManager.loadTableMeta(tables); + // prepare table_name -> column_meta + this.tbnameColumnMetasMap = schemaManager.loadColumnMetas(tables); + + List recordBatch = new ArrayList<>(); + Record record; + for (int i = 1; (record = lineReceiver.getFromReader()) != null; i++) { + if (i % batchSize != 0) { + recordBatch.add(record); + } else { + try { + recordBatch.add(record); + affectedRows += writeBatch(conn, recordBatch); + } catch (SQLException e) { + LOG.warn("use one row insert. because:" + e.getMessage()); + affectedRows += writeEachRow(conn, recordBatch); + } + recordBatch.clear(); + } + count++; + } + + if (!recordBatch.isEmpty()) { + try { + affectedRows += writeBatch(conn, recordBatch); + } catch (SQLException e) { + LOG.warn("use one row insert. because:" + e.getMessage()); + affectedRows += writeEachRow(conn, recordBatch); + } + recordBatch.clear(); + } + } catch (SQLException e) { + throw DataXException.asDataXException(TDengineWriterErrorCode.RUNTIME_EXCEPTION, e.getMessage()); + } + + if (affectedRows != count) { + LOG.error("write record missing or incorrect happened, affectedRows: " + affectedRows + ", total: " + count); + } + + return affectedRows; + } + + private int writeEachRow(Connection conn, List recordBatch) { + int affectedRows = 0; + for (Record record : recordBatch) { + List recordList = new ArrayList<>(); + recordList.add(record); + try { + affectedRows += writeBatch(conn, recordList); + } catch (SQLException e) { + LOG.error(e.getMessage()); + this.taskPluginCollector.collectDirtyRecord(record, e); + } + } + return affectedRows; + } + + /** + * table: [ "stb1", "stb2", "tb1", "tb2", "t1" ] + * stb1[ts,f1,f2] tags:[t1] + * stb2[ts,f1,f2,f3] tags:[t1,t2] + * 1. tables 表的的类型分成:stb(super table)/tb(sub table)/t(original table) + * 2. 对于stb,自动建表/schemaless + * 2.1: data中有tbname字段, 例如:data: [ts, f1, f2, f3, t1, t2, tbname] tbColumn: [ts, f1, f2, t1] => insert into tbname using stb1 tags(t1) values(ts, f1, f2) + * 2.2: data中没有tbname字段,例如:data: [ts, f1, f2, f3, t1, t2] tbColumn: [ts, f1, f2, t1] => schemaless: stb1,t1=t1 f1=f1,f2=f2 ts, 没有批量写 + * 3. 对于tb,拼sql,例如:data: [ts, f1, f2, f3, t1, t2] tbColumn: [ts, f1, f2, t1] => insert into tb(ts, f1, f2) values(ts, f1, f2) + * 4. 对于t,拼sql,例如:data: [ts, f1, f2, f3, t1, t2] tbColumn: [ts, f1, f2, f3, t1, t2] insert into t(ts, f1, f2, f3, t1, t2) values(ts, f1, f2, f3, t1, t2) + */ + public int writeBatch(Connection conn, List recordBatch) throws SQLException { + int affectedRows = 0; + for (String table : tables) { + TableMeta tableMeta = tableMetas.get(table); + switch (tableMeta.tableType) { + case SUP_TABLE: { + if (columns.contains("tbname")) { + affectedRows += writeBatchToSupTableBySQL(conn, table, recordBatch); + } else { + Map tag2Tbname = schemaManager.loadTagTableNameMap(table); + affectedRows += writeBatchToSupTableWithoutTbname(conn, table, recordBatch, tag2Tbname); + } + } + break; + case SUB_TABLE: + affectedRows += writeBatchToSubTable(conn, table, recordBatch); + break; + case NML_TABLE: + default: + affectedRows += writeBatchToNormalTable(conn, table, recordBatch); + } + } + return affectedRows; + } + + private int writeBatchToSupTableWithoutTbname(Connection conn, String table, List recordBatch, Map tag2Tbname) throws SQLException { + List columnMetas = tbnameColumnMetasMap.get(table); + List subTableExist = filterSubTableExistRecords(recordBatch, columnMetas, tag2Tbname); + List subTableNotExist = filterSubTableNotExistRecords(recordBatch, columnMetas, tag2Tbname); + + int affectedRows = 0; + Map> subTableRecordsMap = splitRecords(subTableExist, columnMetas, tag2Tbname); + + List subTables = new ArrayList<>(subTableRecordsMap.keySet()); + this.tbnameColumnMetasMap.putAll(schemaManager.loadColumnMetas(subTables)); + + for (String subTable : subTableRecordsMap.keySet()) { + List subTableRecords = subTableRecordsMap.get(subTable); + affectedRows += writeBatchToNormalTable(conn, subTable, subTableRecords); + } + if (!subTableNotExist.isEmpty()) + affectedRows += writeBatchToSupTableBySchemaless(conn, table, subTableNotExist); + return affectedRows; + } + + private List filterSubTableExistRecords(List recordBatch, List columnMetas, Map tag2Tbname) { + return recordBatch.stream().filter(record -> { + String tagStr = getTagString(columnMetas, record); + return tag2Tbname.containsKey(tagStr); + }).collect(Collectors.toList()); + } + + private List filterSubTableNotExistRecords(List recordBatch, List columnMetas, Map tag2Tbname) { + return recordBatch.stream().filter(record -> { + String tagStr = getTagString(columnMetas, record); + return !tag2Tbname.containsKey(tagStr); + }).collect(Collectors.toList()); + } + + private Map> splitRecords(List subTableExist, List columnMetas, Map tag2Tbname) { + Map> ret = new HashMap<>(); + for (Record record : subTableExist) { + String tagstr = getTagString(columnMetas, record); + String tbname = tag2Tbname.get(tagstr); + if (ret.containsKey(tbname)) { + ret.get(tbname).add(record); + } else { + List list = new ArrayList<>(); + list.add(record); + ret.put(tbname, list); + } + } + return ret; + } + + private String getTagString(List columnMetas, Record record) { + return IntStream.range(0, columnMetas.size()).mapToObj(colIndex -> { + ColumnMeta columnMeta = columnMetas.get(colIndex); + if (columnMeta.isTag) { + Column column = record.getColumn(colIndex); + switch (columnMeta.type) { + case "TINYINT": + case "SMALLINT": + case "INT": + case "BIGINT": + return column.asLong().toString(); + default: + return column.asString(); + } + } + return ""; + }).collect(Collectors.joining()); + } + + /** + * insert into record[idx(tbname)] using table tags(record[idx(t1)]) (ts, f1, f2, f3) values(record[idx(ts)], record[idx(f1)], ) + * record[idx(tbname)] using table tags(record[idx(t1)]) (ts, f1, f2, f3) values(record[idx(ts)], record[idx(f1)], ) + * record[idx(tbname)] using table tags(record[idx(t1)]) (ts, f1, f2, f3) values(record[idx(ts)], record[idx(f1)], ) + */ + private int writeBatchToSupTableBySQL(Connection conn, String table, List recordBatch) throws SQLException { + List columnMetas = this.tbnameColumnMetasMap.get(table); + + StringBuilder sb = new StringBuilder("insert into"); + for (Record record : recordBatch) { + sb.append(" ").append(record.getColumn(indexOf("tbname")).asString()) + .append(" using ").append(table) + .append(" tags") + .append(columnMetas.stream().filter(colMeta -> columns.contains(colMeta.field)).filter(colMeta -> { + return colMeta.isTag; + }).map(colMeta -> { + return buildColumnValue(colMeta, record); + }).collect(Collectors.joining(",", "(", ")"))) + .append(" ") + .append(columnMetas.stream().filter(colMeta -> columns.contains(colMeta.field)).filter(colMeta -> { + return !colMeta.isTag; + }).map(colMeta -> { + return colMeta.field; + }).collect(Collectors.joining(",", "(", ")"))) + .append(" values") + .append(columnMetas.stream().filter(colMeta -> columns.contains(colMeta.field)).filter(colMeta -> { + return !colMeta.isTag; + }).map(colMeta -> { + return buildColumnValue(colMeta, record); + }).collect(Collectors.joining(",", "(", ")"))); + } + String sql = sb.toString(); + + return executeUpdate(conn, sql); + } + + private int executeUpdate(Connection conn, String sql) throws SQLException { + int count; + try (Statement stmt = conn.createStatement()) { + LOG.debug(">>> " + sql); + count = stmt.executeUpdate(sql); + } + return count; + } + + private String buildColumnValue(ColumnMeta colMeta, Record record) { + Column column = record.getColumn(indexOf(colMeta.field)); + TimestampPrecision timestampPrecision = schemaManager.loadDatabasePrecision(); + switch (column.getType()) { + case DATE: { + Date value = column.asDate(); + switch (timestampPrecision) { + case MILLISEC: + return "" + (value.getTime()); + case MICROSEC: + return "" + (value.getTime() * 1000); + case NANOSEC: + return "" + (value.getTime() * 1000_000); + default: + return "'" + column.asString() + "'"; + } + } + case BYTES: + case STRING: + if (colMeta.type.equals("TIMESTAMP")) + return "\"" + column.asString() + "\""; + String value = column.asString(); + if (value == null) + return "NULL"; + return "\'" + Utils.escapeSingleQuota(value) + "\'"; + case NULL: + case BAD: + return "NULL"; + case BOOL: + case DOUBLE: + case INT: + case LONG: + default: + return column.asString(); + } + } + + /** + * table: ["stb1"], column: ["ts", "f1", "f2", "t1"] + * data: [ts, f1, f2, f3, t1, t2] tbColumn: [ts, f1, f2, t1] => schemaless: stb1,t1=t1 f1=f1,f2=f2 ts + */ + private int writeBatchToSupTableBySchemaless(Connection conn, String table, List recordBatch) throws SQLException { + int count = 0; + TimestampPrecision timestampPrecision = schemaManager.loadDatabasePrecision(); + + List columnMetaList = this.tbnameColumnMetasMap.get(table); + ColumnMeta ts = columnMetaList.stream().filter(colMeta -> colMeta.isPrimaryKey).findFirst().get(); + + List lines = new ArrayList<>(); + for (Record record : recordBatch) { + StringBuilder sb = new StringBuilder(); + sb.append(table).append(",") + .append(columnMetaList.stream().filter(colMeta -> columns.contains(colMeta.field)).filter(colMeta -> { + return colMeta.isTag; + }).map(colMeta -> { + String value = record.getColumn(indexOf(colMeta.field)).asString(); + if (value.contains(" ")) + value = value.replace(" ", "\\ "); + return colMeta.field + "=" + value; + }).collect(Collectors.joining(","))) + .append(" ") + .append(columnMetaList.stream().filter(colMeta -> columns.contains(colMeta.field)).filter(colMeta -> { + return !colMeta.isTag && !colMeta.isPrimaryKey; + }).map(colMeta -> { + return colMeta.field + "=" + buildSchemalessColumnValue(colMeta, record); +// return colMeta.field + "=" + record.getColumn(indexOf(colMeta.field)).asString(); + }).collect(Collectors.joining(","))) + .append(" "); + // timestamp + Column column = record.getColumn(indexOf(ts.field)); + Object tsValue = column.getRawData(); + if (column.getType() == Column.Type.DATE && tsValue instanceof Date) { + long time = column.asDate().getTime(); + switch (timestampPrecision) { + case NANOSEC: + sb.append(time * 1000000); + break; + case MICROSEC: + sb.append(time * 1000); + break; + case MILLISEC: + default: + sb.append(time); + } + } else if (column.getType() == Column.Type.STRING) { + sb.append(Utils.parseTimestamp(column.asString())); + } else { + sb.append(column.asLong()); + } + String line = sb.toString(); + LOG.debug(">>> " + line); + lines.add(line); + count++; + } + + SchemalessWriter writer = new SchemalessWriter(conn); + SchemalessTimestampType timestampType; + switch (timestampPrecision) { + case NANOSEC: + timestampType = SchemalessTimestampType.NANO_SECONDS; + break; + case MICROSEC: + timestampType = SchemalessTimestampType.MICRO_SECONDS; + break; + case MILLISEC: + timestampType = SchemalessTimestampType.MILLI_SECONDS; + break; + default: + timestampType = SchemalessTimestampType.NOT_CONFIGURED; + } + + writer.write(lines, SchemalessProtocolType.LINE, timestampType); + + LOG.warn("schemalessWriter does not return affected rows!"); + return count; + } + + private long dateAsLong(Column column) { + TimestampPrecision timestampPrecision = schemaManager.loadDatabasePrecision(); + long time = column.asDate().getTime(); + switch (timestampPrecision) { + case NANOSEC: + return time * 1000000; + case MICROSEC: + return time * 1000; + case MILLISEC: + default: + return time; + } + } + + private String buildSchemalessColumnValue(ColumnMeta colMeta, Record record) { + Column column = record.getColumn(indexOf(colMeta.field)); + switch (column.getType()) { + case DATE: + if (colMeta.type.equals("TIMESTAMP")) + return dateAsLong(column) + "i64"; + return "L'" + column.asString() + "'"; + case NULL: + case BAD: + return "NULL"; + case DOUBLE: { + if (colMeta.type.equals("FLOAT")) + return column.asString() + "f32"; + if (colMeta.type.equals("DOUBLE")) + return column.asString() + "f64"; + } + case INT: + case LONG: { + if (colMeta.type.equals("TINYINT")) + return column.asString() + "i8"; + if (colMeta.type.equals("SMALLINT")) + return column.asString() + "i16"; + if (colMeta.type.equals("INT")) + return column.asString() + "i32"; + if (colMeta.type.equals("BIGINT")) + return column.asString() + "i64"; + } + case BYTES: + case STRING: + if (colMeta.type.equals("TIMESTAMP")) + return column.asString() + "i64"; + String value = column.asString(); + value = value.replace("\"", "\\\""); + if (colMeta.type.startsWith("BINARY")) + return "\"" + value + "\""; + if (colMeta.type.startsWith("NCHAR")) + return "L\"" + value + "\""; + case BOOL: + default: + return column.asString(); + } + } + + /** + * table: ["tb1"], column: [tbname, ts, f1, f2, t1] + * if contains("tbname") and tbname != tb1 continue; + * else if t1 != record[idx(t1)] or t2 != record[idx(t2)]... continue; + * else + * insert into tb1 (ts, f1, f2) values( record[idx(ts)], record[idx(f1)], record[idx(f2)]) + */ + private int writeBatchToSubTable(Connection conn, String table, List recordBatch) throws SQLException { + List columnMetas = this.tbnameColumnMetasMap.get(table); + + StringBuilder sb = new StringBuilder(); + sb.append("insert into ").append(table).append(" ") + .append(columnMetas.stream().filter(colMeta -> columns.contains(colMeta.field)).filter(colMeta -> { + return !colMeta.isTag; + }).map(colMeta -> { + return colMeta.field; + }).collect(Collectors.joining(",", "(", ")"))) + .append(" values"); + int validRecords = 0; + for (Record record : recordBatch) { + if (columns.contains("tbname") && !table.equals(record.getColumn(indexOf("tbname")).asString())) + continue; + + boolean tagsAllMatch = columnMetas.stream().filter(colMeta -> columns.contains(colMeta.field)).filter(colMeta -> { + return colMeta.isTag; + }).allMatch(colMeta -> { + Column column = record.getColumn(indexOf(colMeta.field)); + boolean equals = equals(column, colMeta); + return equals; + }); + + if (ignoreTagsUnmatched && !tagsAllMatch) + continue; + + sb.append(columnMetas.stream().filter(colMeta -> columns.contains(colMeta.field)).filter(colMeta -> { + return !colMeta.isTag; + }).map(colMeta -> { + return buildColumnValue(colMeta, record); + }).collect(Collectors.joining(", ", "(", ") "))); + validRecords++; + } + + if (validRecords == 0) { + LOG.warn("no valid records in this batch"); + return 0; + } + + String sql = sb.toString(); + return executeUpdate(conn, sql); + } + + private boolean equals(Column column, ColumnMeta colMeta) { + switch (column.getType()) { + case BOOL: + return column.asBoolean().equals(Boolean.valueOf(colMeta.value.toString())); + case INT: + case LONG: + return column.asLong().equals(Long.valueOf(colMeta.value.toString())); + case DOUBLE: + return column.asDouble().equals(Double.valueOf(colMeta.value.toString())); + case NULL: + return colMeta.value == null; + case DATE: + return column.asDate().getTime() == ((Timestamp) colMeta.value).getTime(); + case BAD: + case BYTES: + return Arrays.equals(column.asBytes(), (byte[]) colMeta.value); + case STRING: + default: + return column.asString().equals(colMeta.value.toString()); + } + } + + /** + * table: ["weather"], column: ["ts, f1, f2, f3, t1, t2"] + * sql: insert into weather (ts, f1, f2, f3, t1, t2) values( record[idx(ts), record[idx(f1)], ...) + */ + private int writeBatchToNormalTable(Connection conn, String table, List recordBatch) throws SQLException { + List columnMetas = this.tbnameColumnMetasMap.get(table); + + StringBuilder sb = new StringBuilder(); + sb.append("insert into ").append(table) + .append(" ") + .append(columnMetas.stream().filter(colMeta -> !colMeta.isTag).filter(colMeta -> columns.contains(colMeta.field)).map(colMeta -> { + return colMeta.field; + }).collect(Collectors.joining(",", "(", ")"))) + .append(" values "); + + for (Record record : recordBatch) { + sb.append(columnMetas.stream().filter(colMeta -> !colMeta.isTag).filter(colMeta -> columns.contains(colMeta.field)).map(colMeta -> { + return buildColumnValue(colMeta, record); + }).collect(Collectors.joining(",", "(", ")"))); + } + + String sql = sb.toString(); + return executeUpdate(conn, sql); + } + + private int indexOf(String colName) throws DataXException { + for (int i = 0; i < columns.size(); i++) { + if (columns.get(i).equals(colName)) + return i; + } + throw DataXException.asDataXException(TDengineWriterErrorCode.RUNTIME_EXCEPTION, + "cannot find col: " + colName + " in columns: " + columns); + } + +} \ No newline at end of file diff --git a/tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/tdenginewriter/Key.java b/tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/tdenginewriter/Key.java new file mode 100644 index 00000000..1a9358db --- /dev/null +++ b/tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/tdenginewriter/Key.java @@ -0,0 +1,18 @@ +package com.alibaba.datax.plugin.writer.tdenginewriter; + +public class Key { + public static final String USERNAME = "username"; + public static final String PASSWORD = "password"; + public static final String CONNECTION = "connection"; + public static final String BATCH_SIZE = "batchSize"; + public static final String TABLE = "table"; + public static final String JDBC_URL = "jdbcUrl"; + public static final String COLUMN = "column"; + public static final String IGNORE_TAGS_UNMATCHED = "ignoreTagsUnmatched"; + + public static final String BEGIN_DATETIME = "beginDateTime"; + public static final String END_DATETIME = "endDateTime"; + public static final String WHERE = "where"; + public static final String QUERY_SQL = "querySql"; + public static final String MANDATORY_ENCODING = "mandatoryEncoding"; +} \ No newline at end of file diff --git a/tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/tdenginewriter/OpentsdbDataHandler.java b/tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/tdenginewriter/OpentsdbDataHandler.java new file mode 100644 index 00000000..2fb5a98f --- /dev/null +++ b/tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/tdenginewriter/OpentsdbDataHandler.java @@ -0,0 +1,99 @@ +package com.alibaba.datax.plugin.writer.tdenginewriter; + +import com.alibaba.datax.common.element.Column; +import com.alibaba.datax.common.element.Record; +import com.alibaba.datax.common.exception.DataXException; +import com.alibaba.datax.common.plugin.RecordReceiver; +import com.alibaba.datax.common.plugin.TaskPluginCollector; +import com.alibaba.datax.common.util.Configuration; +import com.taosdata.jdbc.SchemalessWriter; +import com.taosdata.jdbc.enums.SchemalessProtocolType; +import com.taosdata.jdbc.enums.SchemalessTimestampType; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.sql.Connection; +import java.sql.DriverManager; +import java.sql.SQLException; + +public class OpentsdbDataHandler implements DataHandler { + private static final Logger LOG = LoggerFactory.getLogger(OpentsdbDataHandler.class); + private SchemalessWriter writer; + + private String jdbcUrl; + private String user; + private String password; + int batchSize; + + public OpentsdbDataHandler(Configuration config) { + // opentsdb json protocol use JNI and schemaless API to write + this.jdbcUrl = config.getString(Key.JDBC_URL); + this.user = config.getString(Key.USERNAME, "root"); + this.password = config.getString(Key.PASSWORD, "taosdata"); + this.batchSize = config.getInt(Key.BATCH_SIZE, Constants.DEFAULT_BATCH_SIZE); + } + + @Override + public int handle(RecordReceiver lineReceiver, TaskPluginCollector collector) { + int count = 0; + try (Connection conn = DriverManager.getConnection(jdbcUrl, user, password);) { + LOG.info("connection[ jdbcUrl: " + jdbcUrl + ", username: " + user + "] established."); + writer = new SchemalessWriter(conn); + count = write(lineReceiver, batchSize); + } catch (Exception e) { + throw DataXException.asDataXException(TDengineWriterErrorCode.RUNTIME_EXCEPTION, e); + } + + return count; + } + + private int write(RecordReceiver lineReceiver, int batchSize) throws DataXException { + int recordIndex = 1; + try { + Record record; + StringBuilder sb = new StringBuilder(); + while ((record = lineReceiver.getFromReader()) != null) { + if (batchSize == 1) { + String jsonData = recordToString(record); + LOG.debug(">>> " + jsonData); + writer.write(jsonData, SchemalessProtocolType.JSON, SchemalessTimestampType.NOT_CONFIGURED); + } else if (recordIndex % batchSize == 1) { + sb.append("[").append(recordToString(record)).append(","); + } else if (recordIndex % batchSize == 0) { + sb.append(recordToString(record)).append("]"); + String jsonData = sb.toString(); + LOG.debug(">>> " + jsonData); + writer.write(jsonData, SchemalessProtocolType.JSON, SchemalessTimestampType.NOT_CONFIGURED); + sb.delete(0, sb.length()); + } else { + sb.append(recordToString(record)).append(","); + } + recordIndex++; + } + if (sb.length() != 0 && sb.charAt(0) == '[') { + String jsonData = sb.deleteCharAt(sb.length() - 1).append("]").toString(); + System.err.println(jsonData); + LOG.debug(">>> " + jsonData); + writer.write(jsonData, SchemalessProtocolType.JSON, SchemalessTimestampType.NOT_CONFIGURED); + } + } catch (Exception e) { + throw DataXException.asDataXException(TDengineWriterErrorCode.RUNTIME_EXCEPTION, e); + } + return recordIndex - 1; + } + + private String recordToString(Record record) { + int recordLength = record.getColumnNumber(); + if (0 == recordLength) { + return ""; + } + Column column; + StringBuilder sb = new StringBuilder(); + for (int i = 0; i < recordLength; i++) { + column = record.getColumn(i); + sb.append(column.asString()).append("\t"); + } + sb.setLength(sb.length() - 1); + return sb.toString(); + } +} diff --git a/tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/tdenginewriter/SchemaManager.java b/tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/tdenginewriter/SchemaManager.java new file mode 100644 index 00000000..fc0c002d --- /dev/null +++ b/tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/tdenginewriter/SchemaManager.java @@ -0,0 +1,206 @@ +package com.alibaba.datax.plugin.writer.tdenginewriter; + +import com.alibaba.datax.common.exception.DataXException; +import org.apache.commons.lang3.StringUtils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.sql.*; +import java.util.*; +import java.util.stream.Collectors; + +public class SchemaManager { + private static final Logger LOG = LoggerFactory.getLogger(SchemaManager.class); +// private static final String TAG_TABLE_NAME_MAP_KEY_SPLITTER = "_"; + private static final String TAG_TABLE_NAME_MAP_KEY_SPLITTER = ""; + + private final Connection conn; + private TimestampPrecision precision; + private Map> tags2tbnameMaps = new HashMap<>(); + + public SchemaManager(Connection conn) { + this.conn = conn; + } + + public TimestampPrecision loadDatabasePrecision() throws DataXException { + if (this.precision != null) + return this.precision; + + try (Statement stmt = conn.createStatement()) { + ResultSet rs = stmt.executeQuery("select database()"); + String dbname = null; + while (rs.next()) { + dbname = rs.getString("database()"); + } + if (dbname == null) + throw DataXException.asDataXException(TDengineWriterErrorCode.RUNTIME_EXCEPTION, + "Database not specified or available"); + + rs = stmt.executeQuery("show databases"); + while (rs.next()) { + String name = rs.getString("name"); + if (!name.equalsIgnoreCase(dbname)) + continue; + String precision = rs.getString("precision"); + switch (precision) { + case "ns": + this.precision = TimestampPrecision.NANOSEC; + break; + case "us": + this.precision = TimestampPrecision.MICROSEC; + break; + case "ms": + default: + this.precision = TimestampPrecision.MILLISEC; + } + } + } catch (SQLException e) { + throw DataXException.asDataXException(TDengineWriterErrorCode.RUNTIME_EXCEPTION, e.getMessage()); + } + return this.precision; + } + + public Map loadTableMeta(List tables) throws DataXException { + Map tableMetas = new HashMap(); + + try (Statement stmt = conn.createStatement()) { + ResultSet rs = stmt.executeQuery("show stables"); + while (rs.next()) { + TableMeta tableMeta = buildSupTableMeta(rs); + if (!tables.contains(tableMeta.tbname)) + continue; + tableMetas.put(tableMeta.tbname, tableMeta); + } + + rs = stmt.executeQuery("show tables"); + while (rs.next()) { + TableMeta tableMeta = buildSubTableMeta(rs); + if (!tables.contains(tableMeta.tbname)) + continue; + tableMetas.put(tableMeta.tbname, tableMeta); + } + + for (String tbname : tables) { + if (!tableMetas.containsKey(tbname)) { + throw DataXException.asDataXException(TDengineWriterErrorCode.RUNTIME_EXCEPTION, "table metadata of " + tbname + " is empty!"); + } + } + } catch (SQLException e) { + throw DataXException.asDataXException(TDengineWriterErrorCode.RUNTIME_EXCEPTION, e.getMessage()); + } + return tableMetas; + } + + public Map> loadColumnMetas(List tables) throws DataXException { + Map> ret = new HashMap<>(); + + for (String table : tables) { + List columnMetaList = new ArrayList<>(); + try (Statement stmt = conn.createStatement()) { + ResultSet rs = stmt.executeQuery("describe " + table); + for (int i = 0; rs.next(); i++) { + ColumnMeta columnMeta = buildColumnMeta(rs, i == 0); + columnMetaList.add(columnMeta); + } + } catch (SQLException e) { + throw DataXException.asDataXException(TDengineWriterErrorCode.RUNTIME_EXCEPTION, e.getMessage()); + } + + if (columnMetaList.isEmpty()) { + LOG.error("column metadata of " + table + " is empty!"); + continue; + } + + columnMetaList.stream().filter(colMeta -> colMeta.isTag).forEach(colMeta -> { + String sql = "select " + colMeta.field + " from " + table; + Object value = null; + try (Statement stmt = conn.createStatement()) { + ResultSet rs = stmt.executeQuery(sql); + for (int i = 0; rs.next(); i++) { + value = rs.getObject(colMeta.field); + if (i > 0) { + value = null; + break; + } + } + } catch (SQLException e) { + e.printStackTrace(); + } + colMeta.value = value; + }); + + LOG.debug("load column metadata of " + table + ": " + Arrays.toString(columnMetaList.toArray())); + ret.put(table, columnMetaList); + } + return ret; + } + + private TableMeta buildSupTableMeta(ResultSet rs) throws SQLException { + TableMeta tableMeta = new TableMeta(); + tableMeta.tableType = TableType.SUP_TABLE; + tableMeta.tbname = rs.getString("name"); + tableMeta.columns = rs.getInt("columns"); + tableMeta.tags = rs.getInt("tags"); + tableMeta.tables = rs.getInt("tables"); + + LOG.debug("load table metadata of " + tableMeta.tbname + ": " + tableMeta); + return tableMeta; + } + + private TableMeta buildSubTableMeta(ResultSet rs) throws SQLException { + TableMeta tableMeta = new TableMeta(); + String stable_name = rs.getString("stable_name"); + tableMeta.tableType = StringUtils.isBlank(stable_name) ? TableType.NML_TABLE : TableType.SUB_TABLE; + tableMeta.tbname = rs.getString("table_name"); + tableMeta.columns = rs.getInt("columns"); + tableMeta.stable_name = StringUtils.isBlank(stable_name) ? null : stable_name; + + LOG.debug("load table metadata of " + tableMeta.tbname + ": " + tableMeta); + return tableMeta; + } + + private ColumnMeta buildColumnMeta(ResultSet rs, boolean isPrimaryKey) throws SQLException { + ColumnMeta columnMeta = new ColumnMeta(); + columnMeta.field = rs.getString("Field"); + columnMeta.type = rs.getString("Type"); + columnMeta.length = rs.getInt("Length"); + columnMeta.note = rs.getString("Note"); + columnMeta.isTag = columnMeta.note != null && columnMeta.note.equals("TAG"); + columnMeta.isPrimaryKey = isPrimaryKey; + return columnMeta; + } + + public Map loadTagTableNameMap(String table) throws SQLException { + if (tags2tbnameMaps.containsKey(table)) + return tags2tbnameMaps.get(table); + Map tags2tbname = new HashMap<>(); + try (Statement stmt = conn.createStatement()) { + // describe table + List tags = new ArrayList<>(); + ResultSet rs = stmt.executeQuery("describe " + table); + while (rs.next()) { + String note = rs.getString("Note"); + if ("TAG".equals(note)) { + tags.add(rs.getString("Field")); + } + } + // select distinct tbname, t1, t2 from stb + rs = stmt.executeQuery("select distinct " + String.join(",", tags) + ",tbname from " + table); + while (rs.next()) { + ResultSet finalRs = rs; + String tagStr = tags.stream().map(t -> { + try { + return finalRs.getString(t); + } catch (SQLException e) { + LOG.error(e.getMessage(), e); + } + return "NULL"; + }).collect(Collectors.joining(TAG_TABLE_NAME_MAP_KEY_SPLITTER)); + String tbname = rs.getString("tbname"); + tags2tbname.put(tagStr, tbname); + } + } + tags2tbnameMaps.put(table, tags2tbname); + return tags2tbname; + } +} diff --git a/tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/tdenginewriter/TDengineWriter.java b/tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/tdenginewriter/TDengineWriter.java new file mode 100644 index 00000000..73982744 --- /dev/null +++ b/tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/tdenginewriter/TDengineWriter.java @@ -0,0 +1,114 @@ +package com.alibaba.datax.plugin.writer.tdenginewriter; + +import com.alibaba.datax.common.exception.DataXException; +import com.alibaba.datax.common.plugin.RecordReceiver; +import com.alibaba.datax.common.plugin.TaskPluginCollector; +import com.alibaba.datax.common.spi.Writer; +import com.alibaba.datax.common.util.Configuration; +import org.apache.commons.lang3.StringUtils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.ArrayList; +import java.util.List; + +public class TDengineWriter extends Writer { + + private static final String PEER_PLUGIN_NAME = "peerPluginName"; + + public static class Job extends Writer.Job { + + private Configuration originalConfig; + private static final Logger LOG = LoggerFactory.getLogger(Job.class); + + @Override + public void init() { + this.originalConfig = super.getPluginJobConf(); + this.originalConfig.set(PEER_PLUGIN_NAME, getPeerPluginName()); + + // check username + String user = this.originalConfig.getString(Key.USERNAME); + if (StringUtils.isBlank(user)) + throw DataXException.asDataXException(TDengineWriterErrorCode.REQUIRED_VALUE, "The parameter [" + + Key.USERNAME + "] is not set."); + + // check password + String password = this.originalConfig.getString(Key.PASSWORD); + if (StringUtils.isBlank(password)) + throw DataXException.asDataXException(TDengineWriterErrorCode.REQUIRED_VALUE, "The parameter [" + + Key.PASSWORD + "] is not set."); + + // check connection + List connection = this.originalConfig.getList(Key.CONNECTION); + if (connection == null || connection.isEmpty()) + throw DataXException.asDataXException(TDengineWriterErrorCode.REQUIRED_VALUE, "The parameter [" + + Key.CONNECTION + "] is not set."); + if (connection.size() > 1) + LOG.warn("connection.size is " + connection.size() + " and only connection[0] will be used."); + Configuration conn = Configuration.from(connection.get(0).toString()); + String jdbcUrl = conn.getString(Key.JDBC_URL); + if (StringUtils.isBlank(jdbcUrl)) + throw DataXException.asDataXException(TDengineWriterErrorCode.REQUIRED_VALUE, "The parameter [" + + Key.JDBC_URL + "] of connection is not set."); + + // check column + } + + @Override + public void destroy() { + + } + + @Override + public List split(int mandatoryNumber) { + List writerSplitConfigs = new ArrayList<>(); + + List conns = this.originalConfig.getList(Key.CONNECTION); + for (int i = 0; i < mandatoryNumber; i++) { + Configuration clone = this.originalConfig.clone(); + Configuration conf = Configuration.from(conns.get(0).toString()); + String jdbcUrl = conf.getString(Key.JDBC_URL); + clone.set(Key.JDBC_URL, jdbcUrl); + clone.set(Key.TABLE, conf.getList(Key.TABLE)); + clone.remove(Key.CONNECTION); + writerSplitConfigs.add(clone); + } + + return writerSplitConfigs; + } + } + + public static class Task extends Writer.Task { + private static final Logger LOG = LoggerFactory.getLogger(Task.class); + + private Configuration writerSliceConfig; + private TaskPluginCollector taskPluginCollector; + + @Override + public void init() { + this.writerSliceConfig = getPluginJobConf(); + this.taskPluginCollector = super.getTaskPluginCollector(); + } + + @Override + public void destroy() { + + } + + @Override + public void startWrite(RecordReceiver lineReceiver) { + String peerPluginName = this.writerSliceConfig.getString(PEER_PLUGIN_NAME); + LOG.debug("start to handle record from: " + peerPluginName); + + DataHandler handler; + if (peerPluginName.equals("opentsdbreader")) + handler = new OpentsdbDataHandler(this.writerSliceConfig); + else + handler = new DefaultDataHandler(this.writerSliceConfig, this.taskPluginCollector); + + long records = handler.handle(lineReceiver, getTaskPluginCollector()); + LOG.debug("handle data finished, records: " + records); + } + + } +} diff --git a/tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/tdenginewriter/TDengineWriterErrorCode.java b/tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/tdenginewriter/TDengineWriterErrorCode.java new file mode 100644 index 00000000..469449e6 --- /dev/null +++ b/tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/tdenginewriter/TDengineWriterErrorCode.java @@ -0,0 +1,34 @@ +package com.alibaba.datax.plugin.writer.tdenginewriter; + +import com.alibaba.datax.common.spi.ErrorCode; + +public enum TDengineWriterErrorCode implements ErrorCode { + + REQUIRED_VALUE("TDengineWriter-00", "缺失必要的值"), + ILLEGAL_VALUE("TDengineWriter-01", "值非法"), + RUNTIME_EXCEPTION("TDengineWriter-02", "运行时异常"), + TYPE_ERROR("TDengineWriter-03", "Datax类型无法正确映射到TDengine类型"); + + private final String code; + private final String description; + + TDengineWriterErrorCode(String code, String description) { + this.code = code; + this.description = description; + } + + @Override + public String getCode() { + return this.code; + } + + @Override + public String getDescription() { + return this.description; + } + + @Override + public String toString() { + return String.format("Code:[%s], Description:[%s]. ", this.code, this.description); + } +} diff --git a/tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/tdenginewriter/TableMeta.java b/tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/tdenginewriter/TableMeta.java new file mode 100644 index 00000000..2a6b0536 --- /dev/null +++ b/tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/tdenginewriter/TableMeta.java @@ -0,0 +1,22 @@ +package com.alibaba.datax.plugin.writer.tdenginewriter; + +public class TableMeta { + TableType tableType; + String tbname; + int columns; + int tags; + int tables; + String stable_name; + + @Override + public String toString() { + return "TableMeta{" + + "tableType=" + tableType + + ", tbname='" + tbname + '\'' + + ", columns=" + columns + + ", tags=" + tags + + ", tables=" + tables + + ", stable_name='" + stable_name + '\'' + + '}'; + } +} diff --git a/tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/tdenginewriter/TableType.java b/tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/tdenginewriter/TableType.java new file mode 100644 index 00000000..6c97ac3f --- /dev/null +++ b/tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/tdenginewriter/TableType.java @@ -0,0 +1,5 @@ +package com.alibaba.datax.plugin.writer.tdenginewriter; + +public enum TableType { + SUP_TABLE, SUB_TABLE, NML_TABLE +} diff --git a/tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/tdenginewriter/TimestampPrecision.java b/tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/tdenginewriter/TimestampPrecision.java new file mode 100644 index 00000000..46564d6a --- /dev/null +++ b/tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/tdenginewriter/TimestampPrecision.java @@ -0,0 +1,5 @@ +package com.alibaba.datax.plugin.writer.tdenginewriter; + +public enum TimestampPrecision { + MILLISEC, MICROSEC, NANOSEC +} diff --git a/tdenginewriter/src/main/resources/plugin.json b/tdenginewriter/src/main/resources/plugin.json new file mode 100644 index 00000000..10d8e2cf --- /dev/null +++ b/tdenginewriter/src/main/resources/plugin.json @@ -0,0 +1,9 @@ +{ + "name": "tdenginewriter", + "class": "com.alibaba.datax.plugin.writer.tdenginewriter.TDengineWriter", + "description": { + "useScene": "data migration to tdengine", + "mechanism": "use taos-jdbcdriver to write data." + }, + "developer": "support@taosdata.com" +} \ No newline at end of file diff --git a/tdenginewriter/src/main/resources/plugin_job_template.json b/tdenginewriter/src/main/resources/plugin_job_template.json new file mode 100644 index 00000000..3d303305 --- /dev/null +++ b/tdenginewriter/src/main/resources/plugin_job_template.json @@ -0,0 +1,20 @@ +{ + "name": "tdenginewriter", + "parameter": { + "username": "root", + "password": "taosdata", + "column": [ + "" + ], + "connection": [ + { + "table": [ + "" + ], + "jdbcUrl": "" + } + ], + "batchSize": 1000, + "ignoreTagsUnmatched": true + } +} \ No newline at end of file diff --git a/tdenginewriter/src/test/java/com/alibaba/datax/plugin/writer/tdenginewriter/Csv2TDengineTest.java b/tdenginewriter/src/test/java/com/alibaba/datax/plugin/writer/tdenginewriter/Csv2TDengineTest.java new file mode 100644 index 00000000..7352c3ca --- /dev/null +++ b/tdenginewriter/src/test/java/com/alibaba/datax/plugin/writer/tdenginewriter/Csv2TDengineTest.java @@ -0,0 +1,41 @@ +package com.alibaba.datax.plugin.writer.tdenginewriter; + +import com.alibaba.datax.core.Engine; +import org.junit.Ignore; +import org.junit.Test; + +import java.sql.Connection; +import java.sql.DriverManager; +import java.sql.SQLException; +import java.sql.Statement; + +@Ignore +public class Csv2TDengineTest { + + private static final String host = "192.168.56.105"; + + @Test + public void case01() throws Throwable { + // given + prepareTable(); + + // when + String[] params = {"-mode", "standalone", "-jobid", "-1", "-job", "src/test/resources/csv2t.json"}; + System.setProperty("datax.home", "../target/datax/datax"); + Engine.entry(params); + } + + public void prepareTable() throws SQLException { + final String url = "jdbc:TAOS-RS://" + host + ":6041"; + try (Connection conn = DriverManager.getConnection(url, "root", "taosdata")) { + Statement stmt = conn.createStatement(); + + stmt.execute("drop database if exists test"); + stmt.execute("create database if not exists test"); + stmt.execute("create table test.weather (ts timestamp, temperature bigint, humidity double, is_normal bool) " + + "tags(device_id binary(10),address nchar(10))"); + } + } + + +} diff --git a/tdenginewriter/src/test/java/com/alibaba/datax/plugin/writer/tdenginewriter/DM2TDengineTest.java b/tdenginewriter/src/test/java/com/alibaba/datax/plugin/writer/tdenginewriter/DM2TDengineTest.java new file mode 100644 index 00000000..15f6b1bc --- /dev/null +++ b/tdenginewriter/src/test/java/com/alibaba/datax/plugin/writer/tdenginewriter/DM2TDengineTest.java @@ -0,0 +1,122 @@ +package com.alibaba.datax.plugin.writer.tdenginewriter; + +import com.alibaba.datax.core.Engine; +import org.junit.Before; +import org.junit.Test; + +import java.sql.*; +import java.text.SimpleDateFormat; +import java.util.Date; +import java.util.Random; + +public class DM2TDengineTest { + + private String host1 = "192.168.0.72"; + private String host2 = "192.168.1.93"; + private final Random random = new Random(System.currentTimeMillis()); + + @Test + public void dm2t_case01() throws Throwable { + // given + createSupTable(); + + // when + String[] params = {"-mode", "standalone", "-jobid", "-1", "-job", "src/test/resources/dm2t-1.json"}; + System.setProperty("datax.home", "../target/datax/datax"); + Engine.entry(params); + } + + @Test + public void dm2t_case02() throws Throwable { + // given + createSupAndSubTable(); + + // when + String[] params = {"-mode", "standalone", "-jobid", "-1", "-job", "src/test/resources/dm2t-2.json"}; + System.setProperty("datax.home", "../target/datax/datax"); + Engine.entry(params); + } + + @Test + public void dm2t_case03() throws Throwable { + // given + createTable(); + + // when + String[] params = {"-mode", "standalone", "-jobid", "-1", "-job", "src/test/resources/dm2t-3.json"}; + System.setProperty("datax.home", "../target/datax/datax"); + Engine.entry(params); + } + + @Test + public void dm2t_case04() throws Throwable { + // given + createSupTable(); + + // when + String[] params = {"-mode", "standalone", "-jobid", "-1", "-job", "src/test/resources/dm2t-4.json"}; + System.setProperty("datax.home", "../target/datax/datax"); + Engine.entry(params); + } + + private void createSupTable() throws SQLException { + final String url2 = "jdbc:TAOS-RS://" + host2 + ":6041"; + try (Connection conn = DriverManager.getConnection(url2, "root", "taosdata")) { + Statement stmt = conn.createStatement(); + stmt.execute("drop database if exists db2"); + stmt.execute("create database if not exists db2"); + stmt.execute("create table db2.stb2(ts timestamp, f2 smallint, f4 bigint,f5 float, " + + "f6 double, f7 double, f8 bool, f9 nchar(100), f10 nchar(200)) tags(f1 tinyint,f3 int)"); + stmt.close(); + } + } + + private void createSupAndSubTable() throws SQLException { + final String url2 = "jdbc:TAOS-RS://" + host2 + ":6041"; + try (Connection conn = DriverManager.getConnection(url2, "root", "taosdata")) { + Statement stmt = conn.createStatement(); + stmt.execute("drop database if exists db2"); + stmt.execute("create database if not exists db2"); + stmt.execute("create table db2.stb2(ts timestamp, f2 smallint, f4 bigint,f5 float, " + + "f6 double, f7 double, f8 bool, f9 nchar(100), f10 nchar(200)) tags(f1 tinyint,f3 int)"); + for (int i = 0; i < 10; i++) { + stmt.execute("create table db2.t" + (i + 1) + "_" + i + " using db2.stb2 tags(" + (i + 1) + "," + i + ")"); + } + stmt.close(); + } + } + + private void createTable() throws SQLException { + final String url2 = "jdbc:TAOS-RS://" + host2 + ":6041"; + try (Connection conn = DriverManager.getConnection(url2, "root", "taosdata")) { + Statement stmt = conn.createStatement(); + stmt.execute("drop database if exists db2"); + stmt.execute("create database if not exists db2"); + stmt.execute("create table db2.stb2(ts timestamp, f1 tinyint, f2 smallint, f3 int, f4 bigint,f5 float, " + + "f6 double, f7 double, f8 bool, f9 nchar(100), f10 nchar(200))"); + stmt.close(); + } + } + + @Before + public void before() throws SQLException { + SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss.SSS"); + long ts = System.currentTimeMillis(); + + final String url = "jdbc:dm://" + host1 + ":5236"; + try (Connection conn = DriverManager.getConnection(url, "TESTUSER", "test123456")) { + conn.setAutoCommit(true); + Statement stmt = conn.createStatement(); + stmt.execute("drop table if exists stb1"); + stmt.execute("create table stb1(ts timestamp, f1 tinyint, f2 smallint, f3 int, f4 bigint, f5 float, " + + "f6 double, f7 NUMERIC(10,2), f8 BIT, f9 VARCHAR(100), f10 VARCHAR2(200))"); + for (int i = 0; i < 10; i++) { + String sql = "insert into stb1 values('" + sdf.format(new Date(ts + i * 1000)) + "'," + (i + 1) + "," + + random.nextInt(100) + "," + i + ",4,5.55,6.666,7.77," + (random.nextBoolean() ? 1 : 0) + + ",'abcABC123','北京朝阳望京DM')"; + stmt.execute(sql); + } + } + } + +} diff --git a/tdenginewriter/src/test/java/com/alibaba/datax/plugin/writer/tdenginewriter/DefaultDataHandlerTest.java b/tdenginewriter/src/test/java/com/alibaba/datax/plugin/writer/tdenginewriter/DefaultDataHandlerTest.java new file mode 100644 index 00000000..46e601ad --- /dev/null +++ b/tdenginewriter/src/test/java/com/alibaba/datax/plugin/writer/tdenginewriter/DefaultDataHandlerTest.java @@ -0,0 +1,301 @@ +package com.alibaba.datax.plugin.writer.tdenginewriter; + +import com.alibaba.datax.common.element.DateColumn; +import com.alibaba.datax.common.element.LongColumn; +import com.alibaba.datax.common.element.Record; +import com.alibaba.datax.common.element.StringColumn; +import com.alibaba.datax.common.plugin.TaskPluginCollector; +import com.alibaba.datax.common.util.Configuration; +import com.alibaba.datax.core.transport.record.DefaultRecord; +import org.junit.AfterClass; +import org.junit.Assert; +import org.junit.BeforeClass; +import org.junit.Test; + +import java.sql.Connection; +import java.sql.DriverManager; +import java.sql.SQLException; +import java.sql.Statement; +import java.util.List; +import java.util.Map; +import java.util.stream.Collectors; +import java.util.stream.IntStream; + +public class DefaultDataHandlerTest { + + private static final String host = "192.168.1.93"; + private static Connection conn; + + private final TaskPluginCollector taskPluginCollector = new TDengineWriter.Task().getTaskPluginCollector(); + + @Test + public void writeSupTableBySQL() throws SQLException { + // given + createSupAndSubTable(); + Configuration configuration = Configuration.from("{" + + "\"username\": \"root\"," + + "\"password\": \"taosdata\"," + + "\"column\": [\"tbname\", \"ts\", \"f1\", \"f2\", \"t1\"]," + + "\"table\":[\"stb1\"]," + + "\"jdbcUrl\":\"jdbc:TAOS-RS://" + host + ":6041/test\"," + + "\"batchSize\": \"1000\"" + + "}"); + long current = System.currentTimeMillis(); + List recordList = IntStream.range(1, 11).mapToObj(i -> { + Record record = new DefaultRecord(); + record.addColumn(new StringColumn("tb" + (i + 10))); + record.addColumn(new DateColumn(current + 1000 * i)); + record.addColumn(new LongColumn(1)); + record.addColumn(new LongColumn(2)); + record.addColumn(new LongColumn(i)); + return record; + }).collect(Collectors.toList()); + + + // when + DefaultDataHandler handler = new DefaultDataHandler(configuration, taskPluginCollector); + List tables = configuration.getList("table", String.class); + SchemaManager schemaManager = new SchemaManager(conn); + Map tableMetas = schemaManager.loadTableMeta(tables); + Map> columnMetas = schemaManager.loadColumnMetas(tables); + handler.setTableMetas(tableMetas); + handler.setTbnameColumnMetasMap(columnMetas); + handler.setSchemaManager(schemaManager); + + int count = handler.writeBatch(conn, recordList); + + // then + Assert.assertEquals(10, count); + } + + @Test + public void writeSupTableBySQL_2() throws SQLException { + // given + createSupAndSubTable(); + Configuration configuration = Configuration.from("{" + + "\"username\": \"root\"," + + "\"password\": \"taosdata\"," + + "\"column\": [\"tbname\", \"ts\", \"f1\", \"t1\"]," + + "\"table\":[\"stb1\"]," + + "\"jdbcUrl\":\"jdbc:TAOS-RS://" + host + ":6041/test\"," + + "\"batchSize\": \"1000\"" + + "}"); + long current = System.currentTimeMillis(); + List recordList = IntStream.range(1, 11).mapToObj(i -> { + Record record = new DefaultRecord(); + record.addColumn(new StringColumn("tb" + (i + 10))); + record.addColumn(new DateColumn(current + 1000 * i)); + record.addColumn(new LongColumn(1)); + record.addColumn(new LongColumn(i)); + return record; + }).collect(Collectors.toList()); + + // when + DefaultDataHandler handler = new DefaultDataHandler(configuration, taskPluginCollector); + List tables = configuration.getList("table", String.class); + SchemaManager schemaManager = new SchemaManager(conn); + Map tableMetas = schemaManager.loadTableMeta(tables); + Map> columnMetas = schemaManager.loadColumnMetas(tables); + handler.setTableMetas(tableMetas); + handler.setTbnameColumnMetasMap(columnMetas); + handler.setSchemaManager(schemaManager); + + int count = handler.writeBatch(conn, recordList); + + // then + Assert.assertEquals(10, count); + } + + @Test + public void writeSupTableBySchemaless() throws SQLException { + // given + createSupTable(); + Configuration configuration = Configuration.from("{" + + "\"username\": \"root\"," + + "\"password\": \"taosdata\"," + + "\"column\": [\"ts\", \"f1\", \"f2\", \"t1\"]," + + "\"table\":[\"stb1\"]," + + "\"jdbcUrl\":\"jdbc:TAOS://" + host + ":6030/scm_test\"," + + "\"batchSize\": \"1000\"" + + "}"); + String jdbcUrl = configuration.getString("jdbcUrl"); + Connection connection = DriverManager.getConnection(jdbcUrl, "root", "taosdata"); + long current = System.currentTimeMillis(); + List recordList = IntStream.range(1, 11).mapToObj(i -> { + Record record = new DefaultRecord(); + record.addColumn(new DateColumn(current + 1000 * i)); + record.addColumn(new LongColumn(1)); + record.addColumn(new LongColumn(2)); + record.addColumn(new StringColumn("t" + i + " 22")); + return record; + }).collect(Collectors.toList()); + + // when + DefaultDataHandler handler = new DefaultDataHandler(configuration, taskPluginCollector); + List tables = configuration.getList("table", String.class); + SchemaManager schemaManager = new SchemaManager(connection); + Map tableMetas = schemaManager.loadTableMeta(tables); + Map> columnMetas = schemaManager.loadColumnMetas(tables); + handler.setTableMetas(tableMetas); + handler.setTbnameColumnMetasMap(columnMetas); + handler.setSchemaManager(schemaManager); + + int count = handler.writeBatch(connection, recordList); + + // then + Assert.assertEquals(10, count); + } + + @Test + public void writeSubTableWithTableName() throws SQLException { + // given + createSupAndSubTable(); + Configuration configuration = Configuration.from("{" + + "\"username\": \"root\"," + + "\"password\": \"taosdata\"," + + "\"column\": [\"tbname\", \"ts\", \"f1\", \"f2\", \"t1\"]," + + "\"table\":[\"tb1\"]," + + "\"jdbcUrl\":\"jdbc:TAOS-RS://" + host + ":6041/test\"," + + "\"batchSize\": \"1000\"" + + "}"); + long current = System.currentTimeMillis(); + List recordList = IntStream.range(1, 11).mapToObj(i -> { + Record record = new DefaultRecord(); + record.addColumn(new StringColumn("tb" + i)); + record.addColumn(new DateColumn(current + 1000 * i)); + record.addColumn(new LongColumn(1)); + record.addColumn(new LongColumn(2)); + record.addColumn(new LongColumn(i)); + return record; + }).collect(Collectors.toList()); + + // when + DefaultDataHandler handler = new DefaultDataHandler(configuration, taskPluginCollector); + List tables = configuration.getList("table", String.class); + SchemaManager schemaManager = new SchemaManager(conn); + Map tableMetas = schemaManager.loadTableMeta(tables); + Map> columnMetas = schemaManager.loadColumnMetas(tables); + handler.setTableMetas(tableMetas); + handler.setTbnameColumnMetasMap(columnMetas); + handler.setSchemaManager(schemaManager); + + int count = handler.writeBatch(conn, recordList); + + // then + Assert.assertEquals(1, count); + } + + @Test + public void writeSubTableWithoutTableName() throws SQLException { + // given + createSupAndSubTable(); + Configuration configuration = Configuration.from("{" + + "\"username\": \"root\"," + + "\"password\": \"taosdata\"," + + "\"column\": [\"ts\", \"f1\", \"f2\", \"t1\"]," + + "\"table\":[\"tb1\"]," + + "\"jdbcUrl\":\"jdbc:TAOS-RS://" + host + ":6041/test\"," + + "\"batchSize\": \"1000\"," + + "\"ignoreTagsUnmatched\": \"true\"" + + "}"); + long current = System.currentTimeMillis(); + List recordList = IntStream.range(1, 11).mapToObj(i -> { + Record record = new DefaultRecord(); + record.addColumn(new DateColumn(current + 1000 * i)); + record.addColumn(new LongColumn(1)); + record.addColumn(new LongColumn(2)); + record.addColumn(new LongColumn(i)); + return record; + }).collect(Collectors.toList()); + + // when + DefaultDataHandler handler = new DefaultDataHandler(configuration, taskPluginCollector); + List tables = configuration.getList("table", String.class); + SchemaManager schemaManager = new SchemaManager(conn); + Map tableMetas = schemaManager.loadTableMeta(tables); + Map> columnMetas = schemaManager.loadColumnMetas(tables); + handler.setTableMetas(tableMetas); + handler.setTbnameColumnMetasMap(columnMetas); + handler.setSchemaManager(schemaManager); + + int count = handler.writeBatch(conn, recordList); + + // then + Assert.assertEquals(1, count); + } + + @Test + public void writeNormalTable() throws SQLException { + // given + createSupAndSubTable(); + Configuration configuration = Configuration.from("{" + + "\"username\": \"root\"," + + "\"password\": \"taosdata\"," + + "\"column\": [\"ts\", \"f1\", \"f2\", \"t1\"]," + + "\"table\":[\"weather\"]," + + "\"jdbcUrl\":\"jdbc:TAOS-RS://" + host + ":6041/test\"," + + "\"batchSize\": \"1000\"," + + "\"ignoreTagsUnmatched\": \"true\"" + + "}"); + long current = System.currentTimeMillis(); + List recordList = IntStream.range(1, 11).mapToObj(i -> { + Record record = new DefaultRecord(); + record.addColumn(new DateColumn(current + 1000 * i)); + record.addColumn(new LongColumn(1)); + record.addColumn(new LongColumn(2)); + record.addColumn(new LongColumn(i)); + return record; + }).collect(Collectors.toList()); + + // when + DefaultDataHandler handler = new DefaultDataHandler(configuration, taskPluginCollector); + List tables = configuration.getList("table", String.class); + SchemaManager schemaManager = new SchemaManager(conn); + Map tableMetas = schemaManager.loadTableMeta(tables); + Map> columnMetas = schemaManager.loadColumnMetas(tables); + handler.setTableMetas(tableMetas); + handler.setTbnameColumnMetasMap(columnMetas); + handler.setSchemaManager(schemaManager); + + int count = handler.writeBatch(conn, recordList); + + // then + Assert.assertEquals(10, count); + } + + private void createSupAndSubTable() throws SQLException { + try (Statement stmt = conn.createStatement()) { + stmt.execute("drop database if exists scm_test"); + stmt.execute("create database if not exists scm_test"); + stmt.execute("use scm_test"); + stmt.execute("create table stb1(ts timestamp, f1 int, f2 int) tags(t1 nchar(32))"); + stmt.execute("create table stb2(ts timestamp, f1 int, f2 int, f3 int) tags(t1 int, t2 int)"); + stmt.execute("create table tb1 using stb1 tags(1)"); + stmt.execute("create table tb2 using stb1 tags(2)"); + stmt.execute("create table tb3 using stb2 tags(1,1)"); + stmt.execute("create table tb4 using stb2 tags(2,2)"); + stmt.execute("create table weather(ts timestamp, f1 int, f2 int, f3 int, t1 int, t2 int)"); + } + } + + private void createSupTable() throws SQLException { + try (Statement stmt = conn.createStatement()) { + stmt.execute("drop database if exists scm_test"); + stmt.execute("create database if not exists scm_test"); + stmt.execute("use scm_test"); + stmt.execute("create table stb1(ts timestamp, f1 int, f2 int) tags(t1 nchar(32))"); + } + } + + @BeforeClass + public static void beforeClass() throws SQLException { + conn = DriverManager.getConnection("jdbc:TAOS-RS://" + host + ":6041", "root", "taosdata"); + } + + @AfterClass + public static void afterClass() throws SQLException { + if (conn != null) { + conn.close(); + } + } +} \ No newline at end of file diff --git a/tdenginewriter/src/test/java/com/alibaba/datax/plugin/writer/tdenginewriter/Mongo2TDengineTest.java b/tdenginewriter/src/test/java/com/alibaba/datax/plugin/writer/tdenginewriter/Mongo2TDengineTest.java new file mode 100644 index 00000000..2356b6f8 --- /dev/null +++ b/tdenginewriter/src/test/java/com/alibaba/datax/plugin/writer/tdenginewriter/Mongo2TDengineTest.java @@ -0,0 +1,16 @@ +package com.alibaba.datax.plugin.writer.tdenginewriter; + +import com.alibaba.datax.core.Engine; +import org.junit.Test; + +public class Mongo2TDengineTest { + + @Test + public void case01() throws Throwable { + + // when + String[] params = {"-mode", "standalone", "-jobid", "-1", "-job", "src/test/resources/mongo2t.json"}; + System.setProperty("datax.home", "../target/datax/datax"); + Engine.entry(params); + } +} \ No newline at end of file diff --git a/tdenginewriter/src/test/java/com/alibaba/datax/plugin/writer/tdenginewriter/Mysql2TDengineTest.java b/tdenginewriter/src/test/java/com/alibaba/datax/plugin/writer/tdenginewriter/Mysql2TDengineTest.java new file mode 100644 index 00000000..4a662711 --- /dev/null +++ b/tdenginewriter/src/test/java/com/alibaba/datax/plugin/writer/tdenginewriter/Mysql2TDengineTest.java @@ -0,0 +1,70 @@ +package com.alibaba.datax.plugin.writer.tdenginewriter; + +import com.alibaba.datax.core.Engine; +import org.junit.Before; +import org.junit.Test; + +import java.sql.*; +import java.text.SimpleDateFormat; +import java.util.Random; + +public class Mysql2TDengineTest { + + private static final String host1 = "192.168.56.105"; + private static final String host2 = "192.168.1.93"; + private static final Random random = new Random(System.currentTimeMillis()); + + @Test + public void mysql2tdengine() throws Throwable { + String[] params = {"-mode", "standalone", "-jobid", "-1", "-job", "src/test/resources/m2t-1.json"}; + System.setProperty("datax.home", "../target/datax/datax"); + Engine.entry(params); + } + + @Before + public void before() throws SQLException { + SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss.SSS"); + String ts = sdf.format(new Date(System.currentTimeMillis())); + + final String url = "jdbc:mysql://" + host1 + ":3306/?useSSL=false&useUnicode=true&charset=UTF-8&generateSimpleParameterMetadata=true"; + try (Connection conn = DriverManager.getConnection(url, "root", "123456")) { + Statement stmt = conn.createStatement(); + + stmt.execute("drop database if exists db1"); + stmt.execute("create database if not exists db1"); + stmt.execute("use db1"); + stmt.execute("create table stb1(id int primary key AUTO_INCREMENT, " + + "f1 tinyint, f2 smallint, f3 int, f4 bigint, " + + "f5 float, f6 double, " + + "ts timestamp, dt datetime," + + "f7 nchar(100), f8 varchar(100))"); + for (int i = 1; i <= 10; i++) { + String sql = "insert into stb1(f1, f2, f3, f4, f5, f6, ts, dt, f7, f8) values(" + + i + "," + random.nextInt(100) + "," + random.nextInt(100) + "," + random.nextInt(100) + "," + + random.nextFloat() + "," + random.nextDouble() + ", " + + "'" + ts + "', '" + ts + "', " + + "'中国北京朝阳望京abc', '中国北京朝阳望京adc')"; + stmt.execute(sql); + } + + stmt.close(); + } + + final String url2 = "jdbc:TAOS-RS://" + host2 + ":6041/"; + try (Connection conn = DriverManager.getConnection(url2, "root", "taosdata")) { + Statement stmt = conn.createStatement(); + + stmt.execute("drop database if exists db2"); + stmt.execute("create database if not exists db2"); + stmt.execute("create table db2.stb2(" + + "ts timestamp, dt timestamp, " + + "f1 tinyint, f2 smallint, f3 int, f4 bigint, " + + "f5 float, f6 double, " + + "f7 nchar(100), f8 nchar(100))"); + + stmt.close(); + } + + } + +} diff --git a/tdenginewriter/src/test/java/com/alibaba/datax/plugin/writer/tdenginewriter/Opentsdb2TDengineTest.java b/tdenginewriter/src/test/java/com/alibaba/datax/plugin/writer/tdenginewriter/Opentsdb2TDengineTest.java new file mode 100644 index 00000000..ad326f7e --- /dev/null +++ b/tdenginewriter/src/test/java/com/alibaba/datax/plugin/writer/tdenginewriter/Opentsdb2TDengineTest.java @@ -0,0 +1,36 @@ +package com.alibaba.datax.plugin.writer.tdenginewriter; + +import com.alibaba.datax.core.Engine; +import org.junit.Assert; +import org.junit.Test; + +import java.sql.*; + +public class Opentsdb2TDengineTest { + + @Test + public void opentsdb2tdengine() throws SQLException { + // when + String[] params = {"-mode", "standalone", "-jobid", "-1", "-job", "src/test/resources/o2t-1.json"}; + System.setProperty("datax.home", "../target/datax/datax"); + try { + Engine.entry(params); + } catch (Throwable e) { + e.printStackTrace(); + } + + // assert + String jdbcUrl = "jdbc:TAOS://192.168.56.105:6030/test?timestampFormat=TIMESTAMP"; + try (Connection conn = DriverManager.getConnection(jdbcUrl, "root", "taosdata")) { + Statement stmt = conn.createStatement(); + ResultSet rs = stmt.executeQuery("select count(*) from weather_temperature"); + int rows = 0; + while (rs.next()) { + rows = rs.getInt("count(*)"); + } + Assert.assertEquals(5, rows); + stmt.close(); + } + } + +} diff --git a/tdenginewriter/src/test/java/com/alibaba/datax/plugin/writer/tdenginewriter/SchemaManagerTest.java b/tdenginewriter/src/test/java/com/alibaba/datax/plugin/writer/tdenginewriter/SchemaManagerTest.java new file mode 100644 index 00000000..3708e6f9 --- /dev/null +++ b/tdenginewriter/src/test/java/com/alibaba/datax/plugin/writer/tdenginewriter/SchemaManagerTest.java @@ -0,0 +1,107 @@ +package com.alibaba.datax.plugin.writer.tdenginewriter; + +import org.junit.AfterClass; +import org.junit.Assert; +import org.junit.BeforeClass; +import org.junit.Test; + +import java.sql.Connection; +import java.sql.DriverManager; +import java.sql.SQLException; +import java.sql.Statement; +import java.util.Arrays; +import java.util.List; +import java.util.Map; + +public class SchemaManagerTest { + + private static Connection conn; + + @Test + public void loadTableMeta() throws SQLException { + // given + SchemaManager schemaManager = new SchemaManager(conn); + List tables = Arrays.asList("stb1", "stb2", "tb1", "tb3", "weather"); + + // when + Map tableMetaMap = schemaManager.loadTableMeta(tables); + + // then + TableMeta stb1 = tableMetaMap.get("stb1"); + Assert.assertEquals(TableType.SUP_TABLE, stb1.tableType); + Assert.assertEquals("stb1", stb1.tbname); + Assert.assertEquals(3, stb1.columns); + Assert.assertEquals(1, stb1.tags); + Assert.assertEquals(2, stb1.tables); + + TableMeta tb3 = tableMetaMap.get("tb3"); + Assert.assertEquals(TableType.SUB_TABLE, tb3.tableType); + Assert.assertEquals("tb3", tb3.tbname); + Assert.assertEquals(4, tb3.columns); + Assert.assertEquals("stb2", tb3.stable_name); + + TableMeta weather = tableMetaMap.get("weather"); + Assert.assertEquals(TableType.NML_TABLE, weather.tableType); + Assert.assertEquals("weather", weather.tbname); + Assert.assertEquals(6, weather.columns); + Assert.assertNull(weather.stable_name); + } + + @Test + public void loadColumnMetas() { + // given + SchemaManager schemaManager = new SchemaManager(conn); + List tables = Arrays.asList("stb1", "stb2", "tb1", "tb3", "weather"); + + // when + Map> columnMetaMap = schemaManager.loadColumnMetas(tables); + + // then + List stb1 = columnMetaMap.get("stb1"); + Assert.assertEquals(4, stb1.size()); + } + + @Test + public void loadTagTableNameMap() throws SQLException { + // given + SchemaManager schemaManager = new SchemaManager(conn); + String table = "stb3"; + + // when + Map tagTableMap = schemaManager.loadTagTableNameMap(table); + + // then + Assert.assertEquals(2, tagTableMap.keySet().size()); + Assert.assertTrue(tagTableMap.containsKey("11.1abc")); + Assert.assertTrue(tagTableMap.containsKey("22.2defg")); + Assert.assertEquals("tb5", tagTableMap.get("11.1abc")); + Assert.assertEquals("tb6", tagTableMap.get("22.2defg")); + } + + @BeforeClass + public static void beforeClass() throws SQLException { + conn = DriverManager.getConnection("jdbc:TAOS-RS://192.168.56.105:6041", "root", "taosdata"); + try (Statement stmt = conn.createStatement()) { + stmt.execute("drop database if exists scm_test"); + stmt.execute("create database if not exists scm_test"); + stmt.execute("use scm_test"); + stmt.execute("create table stb1(ts timestamp, f1 int, f2 int) tags(t1 int)"); + stmt.execute("create table stb2(ts timestamp, f1 int, f2 int, f3 int) tags(t1 int, t2 int)"); + stmt.execute("insert into tb1 using stb1 tags(1) values(now, 1, 2)"); + stmt.execute("insert into tb2 using stb1 tags(2) values(now, 1, 2)"); + stmt.execute("insert into tb3 using stb2 tags(1,1) values(now, 1, 2, 3)"); + stmt.execute("insert into tb4 using stb2 tags(2,2) values(now, 1, 2, 3)"); + stmt.execute("create table weather(ts timestamp, f1 int, f2 int, f3 int, t1 int, t2 int)"); + stmt.execute("create table stb3(ts timestamp, f1 int) tags(t1 int, t2 float, t3 nchar(32))"); + stmt.execute("insert into tb5 using stb3 tags(1,1.1,'abc') values(now, 1)"); + stmt.execute("insert into tb6 using stb3 tags(2,2.2,'defg') values(now, 2)"); + } + } + + @AfterClass + public static void afterClass() throws SQLException { + if (conn != null) { + conn.close(); + } + } +} \ No newline at end of file diff --git a/tdenginewriter/src/test/java/com/alibaba/datax/plugin/writer/tdenginewriter/Stream2TDengineTest.java b/tdenginewriter/src/test/java/com/alibaba/datax/plugin/writer/tdenginewriter/Stream2TDengineTest.java new file mode 100644 index 00000000..e54bcbde --- /dev/null +++ b/tdenginewriter/src/test/java/com/alibaba/datax/plugin/writer/tdenginewriter/Stream2TDengineTest.java @@ -0,0 +1,69 @@ +package com.alibaba.datax.plugin.writer.tdenginewriter; + +import com.alibaba.datax.core.Engine; +import org.junit.Before; +import org.junit.Test; + +import java.sql.Connection; +import java.sql.DriverManager; +import java.sql.SQLException; +import java.sql.Statement; + +public class Stream2TDengineTest { + + private String host2 = "192.168.56.105"; + + @Test + public void s2t_case1() throws Throwable { + // given + createSupTable("ms"); + + // when + String[] params = {"-mode", "standalone", "-jobid", "-1", "-job", "src/test/resources/defaultJob.json"}; + System.setProperty("datax.home", "../target/datax/datax"); + Engine.entry(params); + } + + @Test + public void s2t_case2() throws Throwable { + // given + createSupTable("us"); + + // when + String[] params = {"-mode", "standalone", "-jobid", "-1", "-job", "src/test/resources/defaultJob.json"}; + System.setProperty("datax.home", "../target/datax/datax"); + Engine.entry(params); + } + + @Test + public void s2t_case3() throws Throwable { + // given + createSupTable("ns"); + + // when + String[] params = {"-mode", "standalone", "-jobid", "-1", "-job", "src/test/resources/defaultJob.json"}; + System.setProperty("datax.home", "../target/datax/datax"); + Engine.entry(params); + } + + void createSupTable(String precision) throws SQLException { + + final String url = "jdbc:TAOS-RS://" + host2 + ":6041/"; + try (Connection conn = DriverManager.getConnection(url, "root", "taosdata")) { + Statement stmt = conn.createStatement(); + + stmt.execute("drop database if exists db2"); + stmt.execute("create database if not exists db2 precision '" + precision + "'"); + stmt.execute("create table db2.stb2(ts1 timestamp, ts2 timestamp,ts3 timestamp,ts4 timestamp,ts5 timestamp," + + "ts6 timestamp,ts7 timestamp, ts8 timestamp, ts9 timestamp, ts10 timestamp, f1 tinyint, f2 smallint," + + "f3 int, f4 bigint, f5 float, f6 double," + + "f7 bool, f8 binary(100), f9 nchar(100)) tags(t1 timestamp,t2 timestamp,t3 timestamp,t4 timestamp," + + "t5 timestamp,t6 timestamp,t7 timestamp, t8 tinyint, t9 smallint, t10 int, t11 bigint, t12 float," + + "t13 double, t14 bool, t15 binary(100), t16 nchar(100))"); + + stmt.close(); + } + + } + +} diff --git a/tdenginewriter/src/test/java/com/alibaba/datax/plugin/writer/tdenginewriter/TDengine2TDengineTest.java b/tdenginewriter/src/test/java/com/alibaba/datax/plugin/writer/tdenginewriter/TDengine2TDengineTest.java new file mode 100644 index 00000000..9e954633 --- /dev/null +++ b/tdenginewriter/src/test/java/com/alibaba/datax/plugin/writer/tdenginewriter/TDengine2TDengineTest.java @@ -0,0 +1,127 @@ +package com.alibaba.datax.plugin.writer.tdenginewriter; + +import com.alibaba.datax.core.Engine; +import org.junit.Before; +import org.junit.Test; + +import java.sql.*; +import java.text.SimpleDateFormat; +import java.util.Random; + +public class TDengine2TDengineTest { + + private static final String host1 = "192.168.56.105"; + private static final String host2 = "192.168.1.93"; + private static final Random random = new Random(System.currentTimeMillis()); + + @Test + public void case_01() throws Throwable { + // given + createSupTable(); + + // when + String[] params = {"-mode", "standalone", "-jobid", "-1", "-job", "src/test/resources/t2t-1.json"}; + System.setProperty("datax.home", "../target/datax/datax"); + Engine.entry(params); + } + + @Test + public void case_02() throws Throwable { + // given + createSupTable(); + + // when + String[] params = {"-mode", "standalone", "-jobid", "-1", "-job", "src/test/resources/t2t-2.json"}; + System.setProperty("datax.home", "../target/datax/datax"); + Engine.entry(params); + } + + @Test + public void case_03() throws Throwable { + // given + createSupAndSubTable(); + + // when + String[] params = {"-mode", "standalone", "-jobid", "-1", "-job", "src/test/resources/t2t-3.json"}; + System.setProperty("datax.home", "../target/datax/datax"); + Engine.entry(params); + } + + @Test + public void case_04() throws Throwable { + // given + createTable(); + + // when + String[] params = {"-mode", "standalone", "-jobid", "-1", "-job", "src/test/resources/t2t-4.json"}; + System.setProperty("datax.home", "../target/datax/datax"); + Engine.entry(params); + } + + private void createTable() throws SQLException { + final String url2 = "jdbc:TAOS-RS://" + host2 + ":6041"; + try (Connection conn = DriverManager.getConnection(url2, "root", "taosdata")) { + Statement stmt = conn.createStatement(); + stmt.execute("drop database if exists db2"); + stmt.execute("create database if not exists db2"); + stmt.execute("create table db2.weather (ts timestamp, f1 tinyint, f2 smallint, f3 int, f4 bigint, " + + "f5 float, f6 double, f7 bool, f8 binary(100), f9 nchar(100))"); + stmt.close(); + } + } + + private void createSupTable() throws SQLException { + final String url2 = "jdbc:TAOS-RS://" + host2 + ":6041"; + try (Connection conn = DriverManager.getConnection(url2, "root", "taosdata")) { + Statement stmt = conn.createStatement(); + stmt.execute("drop database if exists db2"); + stmt.execute("create database if not exists db2"); + stmt.execute("create table db2.stb2 (ts timestamp, f1 tinyint, f2 smallint, f3 int, f4 bigint," + + " f5 float, f6 double, f7 bool, f8 binary(100), f9 nchar(100)) tags(t1 timestamp, t2 tinyint, " + + "t3 smallint, t4 int, t5 bigint, t6 float, t7 double, t8 bool, t9 binary(100), t10 nchar(1000))"); + stmt.close(); + } + } + + private void createSupAndSubTable() throws SQLException { + SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss.SSS"); + final String ts = sdf.format(new Date(System.currentTimeMillis())); + + final String url2 = "jdbc:TAOS-RS://" + host2 + ":6041"; + try (Connection conn = DriverManager.getConnection(url2, "root", "taosdata")) { + Statement stmt = conn.createStatement(); + stmt.execute("drop database if exists db2"); + stmt.execute("create database if not exists db2"); + stmt.execute("create table db2.stb2 (ts timestamp, f1 tinyint, f2 smallint, f3 int, f4 bigint," + + " f5 float, f6 double, f7 bool, f8 binary(100), f9 nchar(100)) tags(t1 timestamp, t2 tinyint, " + + "t3 smallint, t4 int, t5 bigint, t6 float, t7 double, t8 bool, t9 binary(100), t10 nchar(1000))"); + + stmt.execute("create table db2.t1 using db2.stb2 tags('" + ts + "',1,2,3,4,5.0,6.0,true,'abc123ABC','北京朝阳望京')"); + stmt.close(); + } + } + + @Before + public void before() throws SQLException { + final String url = "jdbc:TAOS-RS://" + host1 + ":6041"; + try (Connection conn = DriverManager.getConnection(url, "root", "taosdata")) { + Statement stmt = conn.createStatement(); + + stmt.execute("drop database if exists db1"); + stmt.execute("create database if not exists db1"); + stmt.execute("create table db1.stb1 (ts timestamp, f1 tinyint, f2 smallint, f3 int, f4 bigint," + + " f5 float, f6 double, f7 bool, f8 binary(100), f9 nchar(100)) tags(t1 timestamp, t2 tinyint, " + + "t3 smallint, t4 int, t5 bigint, t6 float, t7 double, t8 bool, t9 binary(100), t10 nchar(1000))"); + for (int i = 0; i < 10; i++) { + String sql = "insert into db1.t" + (i + 1) + " using db1.stb1 tags(now+" + i + "s," + + random.nextInt(100) + "," + random.nextInt(100) + "," + random.nextInt(100) + "," + + random.nextInt(100) + "," + random.nextFloat() + "," + random.nextDouble() + "," + + random.nextBoolean() + ",'abc123ABC','北京朝阳望京') values(now+" + i + "s, " + + random.nextInt(100) + "," + random.nextInt(100) + "," + random.nextInt(100) + "," + + random.nextInt(100) + "," + random.nextFloat() + "," + random.nextDouble() + "," + + random.nextBoolean() + ",'abc123ABC','北京朝阳望京')"; + stmt.execute(sql); + } + } + } +} diff --git a/tdenginewriter/src/test/java/com/alibaba/datax/plugin/writer/tdenginewriter/TDengineWriterTest.java b/tdenginewriter/src/test/java/com/alibaba/datax/plugin/writer/tdenginewriter/TDengineWriterTest.java new file mode 100644 index 00000000..813f6131 --- /dev/null +++ b/tdenginewriter/src/test/java/com/alibaba/datax/plugin/writer/tdenginewriter/TDengineWriterTest.java @@ -0,0 +1,62 @@ +package com.alibaba.datax.plugin.writer.tdenginewriter; + +import com.alibaba.datax.common.util.Configuration; +import org.junit.Assert; +import org.junit.Before; +import org.junit.Test; + +import java.util.List; + +public class TDengineWriterTest { + + TDengineWriter.Job job; + + @Before + public void before() { + job = new TDengineWriter.Job(); + Configuration configuration = Configuration.from("{" + + "\"username\": \"root\"," + + "\"password\": \"taosdata\"," + + "\"column\": [\"ts\", \"f1\", \"f2\", \"t1\"]," + + "\"connection\": [{\"table\":[\"weather\"],\"jdbcUrl\":\"jdbc:TAOS-RS://master:6041/test\"}]," + + "\"batchSize\": \"1000\"" + + "}"); + job.setPluginJobConf(configuration); + } + + @Test + public void jobInit() { + // when + job.init(); + + // assert + Configuration conf = job.getPluginJobConf(); + + Assert.assertEquals("root", conf.getString("username")); + Assert.assertEquals("taosdata", conf.getString("password")); + Assert.assertEquals("jdbc:TAOS-RS://master:6041/test", conf.getString("connection[0].jdbcUrl")); + Assert.assertEquals(new Integer(1000), conf.getInt("batchSize")); + Assert.assertEquals("ts", conf.getString("column[0]")); + Assert.assertEquals("f2", conf.getString("column[2]")); + } + + @Test + public void jobSplit() { + // when + job.init(); + List configurationList = job.split(10); + + // assert + Assert.assertEquals(10, configurationList.size()); + for (Configuration conf : configurationList) { + Assert.assertEquals("root", conf.getString("username")); + Assert.assertEquals("taosdata", conf.getString("password")); + Assert.assertEquals("jdbc:TAOS-RS://master:6041/test", conf.getString("jdbcUrl")); + Assert.assertEquals(new Integer(1000), conf.getInt("batchSize")); + Assert.assertEquals("ts", conf.getString("column[0]")); + Assert.assertEquals("f2", conf.getString("column[2]")); + + } + } + +} \ No newline at end of file diff --git a/tdenginewriter/src/test/resources/csv2t.json b/tdenginewriter/src/test/resources/csv2t.json new file mode 100644 index 00000000..ef5c4d04 --- /dev/null +++ b/tdenginewriter/src/test/resources/csv2t.json @@ -0,0 +1,80 @@ +{ + "job": { + "content": [ + { + "reader": { + "name": "txtfilereader", + "parameter": { + "path": [ + "/Users/yangzy/IdeaProjects/DataX/tdenginewriter/src/test/resources/weather.csv" + ], + "encoding": "UTF-8", + "column": [ + { + "index": 0, + "type": "string" + }, + { + "index": 1, + "type": "date", + "format": "yyy-MM-dd HH:mm:ss.SSS" + }, + { + "index": 2, + "type": "long" + }, + { + "index": 3, + "type": "double" + }, + { + "index": 4, + "type": "long" + }, + { + "index": 5, + "type": "string" + }, + { + "index": 6, + "type": "String" + } + ], + "fieldDelimiter": "," + } + }, + "writer": { + "name": "tdenginewriter", + "parameter": { + "username": "root", + "password": "taosdata", + "column": [ + "tbname", + "ts", + "temperature", + "humidity", + "is_normal", + "device_id", + "address" + ], + "connection": [ + { + "table": [ + "weather" + ], + "jdbcUrl": "jdbc:TAOS-RS://192.168.56.105:6041/test" + } + ], + "batchSize": 100, + "ignoreTagsUnmatched": true + } + } + } + ], + "setting": { + "speed": { + "channel": 1 + } + } + } +} \ No newline at end of file diff --git a/tdenginewriter/src/test/resources/defaultJob.json b/tdenginewriter/src/test/resources/defaultJob.json new file mode 100644 index 00000000..2a36dfd0 --- /dev/null +++ b/tdenginewriter/src/test/resources/defaultJob.json @@ -0,0 +1,226 @@ +{ + "job": { + "content": [ + { + "reader": { + "name": "streamreader", + "parameter": { + "column": [ + { + "type": "string", + "value": "tb1" + }, + { + "type": "date", + "value": "2022-02-20 12:00:01" + }, + { + "type": "date", + "value": "2022-02-20 12:00:02.123", + "dateFormat": "yyyy-MM-dd HH:mm:ss.SSS" + }, + { + "type": "date", + "value": "2022-02-20 12:00:03.123456", + "dateFormat": "yyyy-MM-dd HH:mm:ss.SSSSSS" + }, + { + "type": "date", + "value": "2022-02-20 12:00:04.123456789", + "dateFormat": "yyyy-MM-dd HH:mm:ss.SSSSSSSSS" + }, + { + "type": "string", + "value": "2022-02-20 12:00:05.123" + }, + { + "type": "string", + "value": "2022-02-20 12:00:06.123456" + }, + { + "type": "string", + "value": "2022-02-20 12:00:07.123456789" + }, + { + "type": "long", + "value": 1645329608000 + }, + { + "type": "long", + "value": 1645329609000000 + }, + { + "type": "long", + "value": 1645329610000000000 + }, + { + "type": "long", + "random": "0, 10" + }, + { + "type": "long", + "random": "0, 100" + }, + { + "type": "long", + "random": "0, 1000" + }, + { + "type": "long", + "random": "0, 10000" + }, + { + "type": "double", + "random": "0, 10" + }, + { + "type": "double", + "random": "10, 20" + }, + { + "type": "bool", + "random": "0, 50" + }, + { + "type": "bytes", + "random": "0, 10" + }, + { + "type": "string", + "random": "10, 50" + }, + { + "type": "date", + "value": "2022-02-20 12:00:01" + }, + { + "type": "date", + "value": "2022-02-20 12:00:02.123", + "dateFormat": "yyyy-MM-dd HH:mm:ss.SSS" + }, + { + "type": "date", + "value": "2022-02-20 12:00:03.123456", + "dateFormat": "yyyy-MM-dd HH:mm:ss.SSSSSS" + }, + { + "type": "date", + "value": "2022-02-20 12:00:04.123456789", + "dateFormat": "yyyy-MM-dd HH:mm:ss.SSSSSSSSS" + }, + { + "type": "string", + "value": "2022-02-20 12:00:05.123" + }, + { + "type": "string", + "value": "2022-02-20 12:00:06.123456" + }, + { + "type": "string", + "value": "2022-02-20 12:00:07.123456789" + }, + { + "type": "long", + "value": 1 + }, + { + "type": "long", + "value": 2 + }, + { + "type": "long", + "value": 3 + }, + { + "type": "long", + "value": 4 + }, + { + "type": "double", + "value": 5.55 + }, + { + "type": "double", + "value": 6.666666 + }, + { + "type": "bool", + "value": true + }, + { + "type": "bytes", + "value": "abcABC123" + }, + { + "type": "string", + "value": "北京朝阳望京" + } + ], + "sliceRecordCount": 10 + } + }, + "writer": { + "name": "tdenginewriter", + "parameter": { + "username": "root", + "password": "taosdata", + "column": [ + "tbname", + "ts1", + "ts2", + "ts3", + "ts4", + "ts5", + "ts6", + "ts7", + "ts8", + "ts9", + "ts10", + "f1", + "f2", + "f3", + "f4", + "f5", + "f6", + "f7", + "f8", + "f9", + "t1", + "t2", + "t3", + "t4", + "t5", + "t6", + "t7", + "t8", + "t9", + "t10", + "t11", + "t12", + "t13", + "t14", + "t15", + "t16" + ], + "connection": [ + { + "table": [ + "stb2" + ], + "jdbcUrl": "jdbc:TAOS-RS://192.168.56.105:6041/db2" + } + ], + "batchSize": 100, + "ignoreTagsUnmatched": true + } + } + } + ], + "setting": { + "speed": { + "channel": 1 + } + } + } +} \ No newline at end of file diff --git a/tdenginewriter/src/test/resources/dm-schema.sql b/tdenginewriter/src/test/resources/dm-schema.sql new file mode 100644 index 00000000..d9985b1b --- /dev/null +++ b/tdenginewriter/src/test/resources/dm-schema.sql @@ -0,0 +1,30 @@ +select tablespace_name +from dba_data_files; + +create +tablespace test datafile '/home/dmdba/dmdbms/data/DAMENG/test.dbf' size 32 autoextend on next 1 maxsize 1024; + +create +user TESTUSER identified by test123456 default tablespace test; + +grant dba to TESTUSER; + +select * +from user_tables; + +drop table if exists stb1; + +create table stb1 +( + ts timestamp, + f1 tinyint, + f2 smallint, + f3 int, + f4 bigint, + f5 float, + f6 double, + f7 NUMERIC(10, 2), + f8 BIT, + f9 VARCHAR(100), + f10 VARCHAR2(200) +); diff --git a/tdenginewriter/src/test/resources/dm2t-1.json b/tdenginewriter/src/test/resources/dm2t-1.json new file mode 100644 index 00000000..183786bf --- /dev/null +++ b/tdenginewriter/src/test/resources/dm2t-1.json @@ -0,0 +1,62 @@ +{ + "job": { + "content": [ + { + "reader": { + "name": "rdbmsreader", + "parameter": { + "username": "TESTUSER", + "password": "test123456", + "connection": [ + { + "querySql": [ + "select concat(concat(concat('t', f1), '_'),f3) as tbname,* from stb1;" + ], + "jdbcUrl": [ + "jdbc:dm://192.168.0.72:5236" + ] + } + ], + "fetchSize": 1024 + } + }, + "writer": { + "name": "tdenginewriter", + "parameter": { + "username": "root", + "password": "taosdata", + "column": [ + "tbname", + "ts", + "f1", + "f2", + "f3", + "f4", + "f5", + "f6", + "f7", + "f8", + "f9", + "f10" + ], + "connection": [ + { + "table": [ + "stb2" + ], + "jdbcUrl": "jdbc:TAOS-RS://192.168.1.93:6041/db2" + } + ], + "batchSize": 1000, + "ignoreTagsUnmatched": true + } + } + } + ], + "setting": { + "speed": { + "channel": 1 + } + } + } +} \ No newline at end of file diff --git a/tdenginewriter/src/test/resources/dm2t-2.json b/tdenginewriter/src/test/resources/dm2t-2.json new file mode 100644 index 00000000..dfea82bf --- /dev/null +++ b/tdenginewriter/src/test/resources/dm2t-2.json @@ -0,0 +1,62 @@ +{ + "job": { + "content": [ + { + "reader": { + "name": "rdbmsreader", + "parameter": { + "username": "TESTUSER", + "password": "test123456", + "connection": [ + { + "querySql": [ + "select concat(concat(concat('t', f1), '_'),f3) as tbname,* from stb1;" + ], + "jdbcUrl": [ + "jdbc:dm://192.168.0.72:5236" + ] + } + ], + "fetchSize": 1024, + } + }, + "writer": { + "name": "tdenginewriter", + "parameter": { + "username": "root", + "password": "taosdata", + "column": [ + "tbname", + "ts", + "f1", + "f2", + "f3", + "f4", + "f5", + "f6", + "f7", + "f8", + "f9", + "f10" + ], + "connection": [ + { + "table": [ + "t1_0" + ], + "jdbcUrl": "jdbc:TAOS-RS://192.168.1.93:6041/db2" + } + ], + "batchSize": 1000, + "ignoreTagsUnmatched": true + } + } + } + ], + "setting": { + "speed": { + "channel": 1 + } + } + } +} \ No newline at end of file diff --git a/tdenginewriter/src/test/resources/dm2t-3.json b/tdenginewriter/src/test/resources/dm2t-3.json new file mode 100644 index 00000000..cd96a536 --- /dev/null +++ b/tdenginewriter/src/test/resources/dm2t-3.json @@ -0,0 +1,76 @@ +{ + "job": { + "content": [ + { + "reader": { + "name": "rdbmsreader", + "parameter": { + "username": "TESTUSER", + "password": "test123456", + "column": [ + "ts", + "f1", + "f2", + "f3", + "f4", + "f5", + "f6", + "f7", + "f8", + "f9", + "f10" + ], + "splitPk": "f1", + "connection": [ + { + "table": [ + "stb1" + ], + "jdbcUrl": [ + "jdbc:dm://192.168.0.72:5236" + ] + } + ], + "fetchSize": 1024, + "where": "1 = 1" + } + }, + "writer": { + "name": "tdenginewriter", + "parameter": { + "username": "root", + "password": "taosdata", + "column": [ + "ts", + "f1", + "f2", + "f3", + "f4", + "f5", + "f6", + "f7", + "f8", + "f9", + "f10" + ], + "connection": [ + { + "table": [ + "stb2" + ], + "jdbcUrl": "jdbc:TAOS-RS://192.168.1.93:6041/db2" + } + ], + "batchSize": 1000, + "ignoreTagsUnmatched": true + } + } + } + ], + "setting": { + "speed": { + "channel": 1 + } + } + } +} \ No newline at end of file diff --git a/tdenginewriter/src/test/resources/dm2t-4.json b/tdenginewriter/src/test/resources/dm2t-4.json new file mode 100644 index 00000000..5f169d5b --- /dev/null +++ b/tdenginewriter/src/test/resources/dm2t-4.json @@ -0,0 +1,61 @@ +{ + "job": { + "content": [ + { + "reader": { + "name": "rdbmsreader", + "parameter": { + "username": "TESTUSER", + "password": "test123456", + "connection": [ + { + "querySql": [ + "select * from stb1" + ], + "jdbcUrl": [ + "jdbc:dm://192.168.0.72:5236" + ] + } + ], + "fetchSize": 1024 + } + }, + "writer": { + "name": "tdenginewriter", + "parameter": { + "username": "root", + "password": "taosdata", + "column": [ + "ts", + "f1", + "f2", + "f3", + "f4", + "f5", + "f6", + "f7", + "f8", + "f9", + "f10" + ], + "connection": [ + { + "table": [ + "stb2" + ], + "jdbcUrl": "jdbc:TAOS://192.168.1.93:6030/db2" + } + ], + "batchSize": 1000, + "ignoreTagsUnmatched": true + } + } + } + ], + "setting": { + "speed": { + "channel": 1 + } + } + } +} \ No newline at end of file diff --git a/tdenginewriter/src/test/resources/incremental_sync/clean_env.sh b/tdenginewriter/src/test/resources/incremental_sync/clean_env.sh new file mode 100755 index 00000000..f3dca7c1 --- /dev/null +++ b/tdenginewriter/src/test/resources/incremental_sync/clean_env.sh @@ -0,0 +1,9 @@ +#!/bin/bash + +datax_home_dir=$(dirname $(readlink -f "$0")) + +curl -H 'Authorization: Basic cm9vdDp0YW9zZGF0YQ==' -d 'drop table if exists db2.stb2;' 192.168.1.93:6041/rest/sql +curl -H 'Authorization: Basic cm9vdDp0YW9zZGF0YQ==' -d 'create table if not exists db2.stb2 (`ts` TIMESTAMP,`f2` SMALLINT,`f4` BIGINT,`f5` FLOAT,`f6` DOUBLE,`f7` DOUBLE,`f8` BOOL,`f9` NCHAR(100),`f10` NCHAR(200)) TAGS (`f1` TINYINT,`f3` INT);' 192.168.1.93:6041/rest/sql + +rm -f ${datax_home_dir}/log/* +rm -f ${datax_home_dir}/job/*.csv \ No newline at end of file diff --git a/tdenginewriter/src/test/resources/incremental_sync/csv2t-jni.json b/tdenginewriter/src/test/resources/incremental_sync/csv2t-jni.json new file mode 100644 index 00000000..625c3801 --- /dev/null +++ b/tdenginewriter/src/test/resources/incremental_sync/csv2t-jni.json @@ -0,0 +1,106 @@ +{ + "job": { + "content": [ + { + "reader": { + "name": "txtfilereader", + "parameter": { + "path": [ + "/root/workspace/tmp/a.txt" + ], + "encoding": "UTF-8", + "column": [ + { + "index": 0, + "type": "date", + "format": "yyyy-MM-dd HH:mm:ss.SSS" + }, + { + "index": 1, + "type": "long" + }, + { + "index": 2, + "type": "long" + }, + { + "index": 3, + "type": "long" + }, + { + "index": 4, + "type": "long" + }, + { + "index": 5, + "type": "double" + }, + { + "index": 6, + "type": "double" + }, + { + "index": 7, + "type": "boolean" + }, + { + "index": 8, + "type": "string" + }, + { + "index": 9, + "type": "string" + }, + { + "index": 10, + "type": "date", + "format": "yyyy-MM-dd HH:mm:ss.SSS" + }, + { + "index": 11, + "type": "string" + } + ], + "fieldDelimiter": "," + } + }, + "writer": { + "name": "tdenginewriter", + "parameter": { + "username": "root", + "password": "taosdata", + "column": [ + "ts", + "f1", + "f2", + "f3", + "f4", + "f5", + "f6", + "f7", + "f8", + "f9", + "t1", + "tbname" + ], + "connection": [ + { + "table": [ + "stb2" + ], + "jdbcUrl": "jdbc:TAOS://192.168.1.93:6030/db2" + } + ], + "batchSize": 1000, + "ignoreTagsUnmatched": true + } + } + } + ], + "setting": { + "speed": { + "channel": 1 + } + } + } +} \ No newline at end of file diff --git a/tdenginewriter/src/test/resources/incremental_sync/csv2t-restful.json b/tdenginewriter/src/test/resources/incremental_sync/csv2t-restful.json new file mode 100644 index 00000000..d852e2e2 --- /dev/null +++ b/tdenginewriter/src/test/resources/incremental_sync/csv2t-restful.json @@ -0,0 +1,57 @@ +{ + "job": { + "content": [ + { + "reader": { + "name": "txtfilereader", + "parameter": { + "path": [ + "/root/workspace/tmp/a.txt" + ], + "encoding": "UTF-8", + "column": [ + "*" + ], + "fieldDelimiter": "," + } + }, + "writer": { + "name": "tdenginewriter", + "parameter": { + "username": "root", + "password": "taosdata", + "column": [ + "ts", + "f1", + "f2", + "f3", + "f4", + "f5", + "f6", + "f7", + "f8", + "f9", + "t1", + "tbname" + ], + "connection": [ + { + "table": [ + "stb2" + ], + "jdbcUrl": "jdbc:TAOS-RS://192.168.1.93:6041/db2" + } + ], + "batchSize": 1000, + "ignoreTagsUnmatched": true + } + } + } + ], + "setting": { + "speed": { + "channel": 1 + } + } + } +} \ No newline at end of file diff --git a/tdenginewriter/src/test/resources/incremental_sync/dm2t-jni.json b/tdenginewriter/src/test/resources/incremental_sync/dm2t-jni.json new file mode 100644 index 00000000..3e86bb8d --- /dev/null +++ b/tdenginewriter/src/test/resources/incremental_sync/dm2t-jni.json @@ -0,0 +1,62 @@ +{ + "job": { + "content": [ + { + "reader": { + "name": "rdbmsreader", + "parameter": { + "username": "TESTUSER", + "password": "test123456", + "connection": [ + { + "querySql": [ + "select concat(concat(concat('t', f1), '_'),f3) as tbname,* from stb1;" + ], + "jdbcUrl": [ + "jdbc:dm://192.168.0.72:5236" + ] + } + ], + "fetchSize": 1024 + } + }, + "writer": { + "name": "tdenginewriter", + "parameter": { + "username": "root", + "password": "taosdata", + "column": [ + "tbname", + "ts", + "f1", + "f2", + "f3", + "f4", + "f5", + "f6", + "f7", + "f8", + "f9", + "f10" + ], + "connection": [ + { + "table": [ + "stb2" + ], + "jdbcUrl": "jdbc:TAOS://192.168.1.93:6030/db2" + } + ], + "batchSize": 1000, + "ignoreTagsUnmatched": true + } + } + } + ], + "setting": { + "speed": { + "channel": 1 + } + } + } +} \ No newline at end of file diff --git a/tdenginewriter/src/test/resources/incremental_sync/dm2t-restful.json b/tdenginewriter/src/test/resources/incremental_sync/dm2t-restful.json new file mode 100644 index 00000000..183786bf --- /dev/null +++ b/tdenginewriter/src/test/resources/incremental_sync/dm2t-restful.json @@ -0,0 +1,62 @@ +{ + "job": { + "content": [ + { + "reader": { + "name": "rdbmsreader", + "parameter": { + "username": "TESTUSER", + "password": "test123456", + "connection": [ + { + "querySql": [ + "select concat(concat(concat('t', f1), '_'),f3) as tbname,* from stb1;" + ], + "jdbcUrl": [ + "jdbc:dm://192.168.0.72:5236" + ] + } + ], + "fetchSize": 1024 + } + }, + "writer": { + "name": "tdenginewriter", + "parameter": { + "username": "root", + "password": "taosdata", + "column": [ + "tbname", + "ts", + "f1", + "f2", + "f3", + "f4", + "f5", + "f6", + "f7", + "f8", + "f9", + "f10" + ], + "connection": [ + { + "table": [ + "stb2" + ], + "jdbcUrl": "jdbc:TAOS-RS://192.168.1.93:6041/db2" + } + ], + "batchSize": 1000, + "ignoreTagsUnmatched": true + } + } + } + ], + "setting": { + "speed": { + "channel": 1 + } + } + } +} \ No newline at end of file diff --git a/tdenginewriter/src/test/resources/incremental_sync/dm2t-update.json b/tdenginewriter/src/test/resources/incremental_sync/dm2t-update.json new file mode 100644 index 00000000..d9285b23 --- /dev/null +++ b/tdenginewriter/src/test/resources/incremental_sync/dm2t-update.json @@ -0,0 +1,63 @@ +{ + "job": { + "content": [ + { + "reader": { + "name": "rdbmsreader", + "parameter": { + "username": "TESTUSER", + "password": "test123456", + "connection": [ + { + "querySql": [ + "select concat(concat(concat('t', f1), '_'),f3) as tbname,* from stb1" + ], + "jdbcUrl": [ + "jdbc:dm://192.168.0.72:5236" + ] + } + ], + "where": "1=1", + "fetchSize": 1024 + } + }, + "writer": { + "name": "tdenginewriter", + "parameter": { + "username": "root", + "password": "taosdata", + "column": [ + "tbname", + "ts", + "f1", + "f2", + "f3", + "f4", + "f5", + "f6", + "f7", + "f8", + "f9", + "f10" + ], + "connection": [ + { + "table": [ + "stb2" + ], + "jdbcUrl": "jdbc:TAOS-RS://192.168.1.93:6041/db2" + } + ], + "batchSize": 1000, + "ignoreTagsUnmatched": true + } + } + } + ], + "setting": { + "speed": { + "channel": 1 + } + } + } +} \ No newline at end of file diff --git a/tdenginewriter/src/test/resources/incremental_sync/dm2t_sync.sh b/tdenginewriter/src/test/resources/incremental_sync/dm2t_sync.sh new file mode 100755 index 00000000..426c6233 --- /dev/null +++ b/tdenginewriter/src/test/resources/incremental_sync/dm2t_sync.sh @@ -0,0 +1,57 @@ +#!/bin/bash + +set -e +#set -x + +datax_home_dir=$(dirname $(readlink -f "$0")) +table_name="stb1" +update_key="ts" + +while getopts "hd:t:" arg; do + case $arg in + d) + datax_home_dir=$(echo $OPTARG) + ;; + v) + table_name=$(echo $OPTARG) + ;; + h) + echo "Usage: $(basename $0) -d [datax_home_dir] -t [table_name] -k [update_key]" + echo " -h help" + exit 0 + ;; + ?) #unknow option + echo "unkonw argument" + exit 1 + ;; + esac +done + +if [[ -e ${datax_home_dir}/job/${table_name}.csv ]]; then + MAX_TIME=$(cat ${datax_home_dir}/job/${table_name}.csv) +else + MAX_TIME="null" +fi +current_datetime=$(date +"%Y-%m-%d %H:%M:%S") +current_timestamp=$(date +%s) + +if [ "$MAX_TIME" != "null" ]; then + WHERE="${update_key} >= '$MAX_TIME' and ${update_key} < '$current_datetime'" + sed "s/1=1/$WHERE/g" ${datax_home_dir}/job/dm2t-update.json >${datax_home_dir}/job/dm2t_${current_timestamp}.json + echo "incremental data synchronization, from '${MAX_TIME}' to '${current_datetime}'" + python ${datax_home_dir}/bin/datax.py ${datax_home_dir}/job/dm2t_${current_timestamp}.json 1> /dev/null 2>&1 +else + echo "full data synchronization, to '${current_datetime}'" + python ${datax_home_dir}/bin/datax.py ${datax_home_dir}/job/dm2t-update.json 1> /dev/null 2>&1 +fi + +if [[ $? -ne 0 ]]; then + echo "datax migration job falied" +else + echo ${current_datetime} >$datax_home_dir/job/${table_name}.csv + echo "datax migration job success" +fi + +rm -rf ${datax_home_dir}/job/dm2t_${current_timestamp}.json + +#while true; do ./dm2t_sync.sh; sleep 5s; done \ No newline at end of file diff --git a/tdenginewriter/src/test/resources/incremental_sync/t2dm-jni.json b/tdenginewriter/src/test/resources/incremental_sync/t2dm-jni.json new file mode 100644 index 00000000..341f6293 --- /dev/null +++ b/tdenginewriter/src/test/resources/incremental_sync/t2dm-jni.json @@ -0,0 +1,50 @@ +{ + "job": { + "content": [ + { + "reader": { + "name": "tdenginereader", + "parameter": { + "username": "root", + "password": "taosdata", + "column": [ + "*" + ], + "connection": [ + { + "table": [ + "stb1" + ], + "jdbcUrl": "jdbc:TAOS://192.168.56.105:6030/db1" + } + ] + } + }, + "writer": { + "name": "rdbmswriter", + "parameter": { + "connection": [ + { + "table": [ + "stb2" + ], + "jdbcUrl": "jdbc:dm://192.168.0.72:5236" + } + ], + "username": "TESTUSER", + "password": "test123456", + "table": "stb2", + "column": [ + "*" + ] + } + } + } + ], + "setting": { + "speed": { + "channel": 1 + } + } + } +} \ No newline at end of file diff --git a/tdenginewriter/src/test/resources/incremental_sync/t2dm-restful.json b/tdenginewriter/src/test/resources/incremental_sync/t2dm-restful.json new file mode 100644 index 00000000..b2cf91e2 --- /dev/null +++ b/tdenginewriter/src/test/resources/incremental_sync/t2dm-restful.json @@ -0,0 +1,50 @@ +{ + "job": { + "content": [ + { + "reader": { + "name": "tdenginereader", + "parameter": { + "username": "root", + "password": "taosdata", + "column": [ + "*" + ], + "connection": [ + { + "table": [ + "stb1" + ], + "jdbcUrl": "jdbc:TAOS-RS://192.168.56.105:6041/db1" + } + ] + } + }, + "writer": { + "name": "rdbmswriter", + "parameter": { + "connection": [ + { + "table": [ + "stb2" + ], + "jdbcUrl": "jdbc:dm://192.168.0.72:5236" + } + ], + "username": "TESTUSER", + "password": "test123456", + "table": "stb2", + "column": [ + "*" + ] + } + } + } + ], + "setting": { + "speed": { + "channel": 1 + } + } + } +} \ No newline at end of file diff --git a/tdenginewriter/src/test/resources/incremental_sync/upload.sh b/tdenginewriter/src/test/resources/incremental_sync/upload.sh new file mode 100755 index 00000000..388d275b --- /dev/null +++ b/tdenginewriter/src/test/resources/incremental_sync/upload.sh @@ -0,0 +1,13 @@ +#!/bin/bash + +scp t2dm-restful.json root@192.168.56.105:/root/workspace/tmp/datax/job +scp t2dm-jni.json root@192.168.56.105:/root/workspace/tmp/datax/job +scp dm2t-restful.json root@192.168.56.105:/root/workspace/tmp/datax/job +scp dm2t-jni.json root@192.168.56.105:/root/workspace/tmp/datax/job +scp dm2t-update.json root@192.168.56.105:/root/workspace/tmp/datax/job +scp csv2t-restful.json root@192.168.56.105:/root/workspace/tmp/datax/job +scp csv2t-jni.json root@192.168.56.105:/root/workspace/tmp/datax/job + + +scp dm2t_sync.sh root@192.168.56.105:/root/workspace/tmp/datax +scp clean_env.sh root@192.168.56.105:/root/workspace/tmp/datax \ No newline at end of file diff --git a/tdenginewriter/src/test/resources/m2t-1.json b/tdenginewriter/src/test/resources/m2t-1.json new file mode 100644 index 00000000..dcacb4b2 --- /dev/null +++ b/tdenginewriter/src/test/resources/m2t-1.json @@ -0,0 +1,72 @@ +{ + "job": { + "content": [ + { + "reader": { + "name": "mysqlreader", + "parameter": { + "username": "root", + "password": "123456", + "column": [ + "ts", + "dt", + "f1", + "f2", + "f3", + "f4", + "f5", + "f6", + "f7", + "f8" + ], + "splitPk": "id", + "connection": [ + { + "table": [ + "stb1" + ], + "jdbcUrl": [ + "jdbc:mysql://192.168.56.105:3306/db1?useSSL=false&useUnicode=true&characterEncoding=utf8" + ] + } + ] + } + }, + "writer": { + "name": "tdenginewriter", + "parameter": { + "username": "root", + "password": "taosdata", + "column": [ + "ts", + "dt", + "f1", + "f2", + "f3", + "f4", + "f5", + "f6", + "f7", + "f8" + ], + "connection": [ + { + "table": [ + "stb2" + ], + "jdbcUrl": "jdbc:TAOS-RS://192.168.1.93:6041/db2" + } + ], + "batchSize": 1000, + "ignoreTagsUnmatched": true + } + } + } + ], + "setting": { + "speed": { + "channel": 1 + } + } + } +} \ No newline at end of file diff --git a/tdenginewriter/src/test/resources/mongo2t.json b/tdenginewriter/src/test/resources/mongo2t.json new file mode 100644 index 00000000..902e6f7c --- /dev/null +++ b/tdenginewriter/src/test/resources/mongo2t.json @@ -0,0 +1,66 @@ +{ + "job": { + "content": [ + { + "reader": { + "name": "mongodbreader", + "parameter": { + "address": [ + "192.168.1.213:27017" + ], + "userName": "", + "userPassword": "", + "dbName": "testdb", + "collectionName": "monitor_data", + "column": [ + { + "name": "ct", + "type": "date" + }, + { + "name": "pv", + "type": "float" + }, + { + "name": "tv", + "type": "float" + }, + { + "name": "pid", + "type": "float" + } + ] + } + }, + "writer": { + "name": "tdenginewriter", + "parameter": { + "username": "root", + "password": "hmdata", + "column": [ + "ts", + "pressure", + "temperature", + "position_id" + ], + "connection": [ + { + "table": [ + "pipeline_data" + ], + "jdbcUrl": "jdbc:TAOS-RS://192.168.1.213:6041/mongo3040" + } + ], + "batchSize": 1000, + "ignoreTagsUnmatched": true + } + } + } + ], + "setting": { + "speed": { + "channel": 1 + } + } + } +} \ No newline at end of file diff --git a/tdenginewriter/src/test/resources/o2t-1.json b/tdenginewriter/src/test/resources/o2t-1.json new file mode 100644 index 00000000..11264c98 --- /dev/null +++ b/tdenginewriter/src/test/resources/o2t-1.json @@ -0,0 +1,36 @@ +{ + "job":{ + "content":[{ + "reader": { + "name": "opentsdbreader", + "parameter": { + "endpoint": "http://192.168.56.105:4242", + "column": ["weather_temperature"], + "beginDateTime": "2021-01-01 00:00:00", + "endDateTime": "2021-01-01 01:00:00" + } + }, + "writer": { + "name": "tdenginewriter", + "parameter": { + "username": "root", + "password": "taosdata", + "connection": [ + { + "table": [ + "meters" + ], + "jdbcUrl": "jdbc:TAOS://192.168.56.105:6030/test?timestampFormat=TIMESTAMP" + } + ], + "batchSize": 1000 + } + } + }], + "setting": { + "speed": { + "channel": 1 + } + } + } +} \ No newline at end of file diff --git a/tdenginewriter/src/test/resources/t2t-1.json b/tdenginewriter/src/test/resources/t2t-1.json new file mode 100644 index 00000000..5ca04d9a --- /dev/null +++ b/tdenginewriter/src/test/resources/t2t-1.json @@ -0,0 +1,94 @@ +{ + "job": { + "content": [ + { + "reader": { + "name": "tdenginereader", + "parameter": { + "username": "root", + "password": "taosdata", + "connection": [ + { + "table": [ + "stb1" + ], + "jdbcUrl": "jdbc:TAOS-RS://192.168.56.105:6041/db1?timestampFormat=TIMESTAMP" + } + ], + "column": [ + "tbname", + "ts", + "f1", + "f2", + "f3", + "f4", + "f5", + "f6", + "f7", + "f8", + "f9", + "t1", + "t2", + "t3", + "t4", + "t5", + "t6", + "t7", + "t8", + "t9", + "t10" + ], + "beginDateTime": "2022-02-15 00:00:00", + "endDateTime": "2022-02-16 00:00:00", + "splitInterval": "1d" + } + }, + "writer": { + "name": "tdenginewriter", + "parameter": { + "username": "root", + "password": "taosdata", + "column": [ + "tbname", + "ts", + "f1", + "f2", + "f3", + "f4", + "f5", + "f6", + "f7", + "f8", + "f9", + "t1", + "t2", + "t3", + "t4", + "t5", + "t6", + "t7", + "t8", + "t9", + "t10" + ], + "connection": [ + { + "table": [ + "stb2" + ], + "jdbcUrl": "jdbc:TAOS-RS://192.168.1.93:6041/db2?timestampFormat=TIMESTAMP" + } + ], + "batchSize": 1000, + "ignoreTagsUnmatched": true + } + } + } + ], + "setting": { + "speed": { + "channel": 1 + } + } + } +} \ No newline at end of file diff --git a/tdenginewriter/src/test/resources/t2t-2.json b/tdenginewriter/src/test/resources/t2t-2.json new file mode 100644 index 00000000..18130b6e --- /dev/null +++ b/tdenginewriter/src/test/resources/t2t-2.json @@ -0,0 +1,92 @@ +{ + "job": { + "content": [ + { + "reader": { + "name": "tdenginereader", + "parameter": { + "username": "root", + "password": "taosdata", + "connection": [ + { + "table": [ + "stb1" + ], + "jdbcUrl": "jdbc:TAOS-RS://192.168.56.105:6041/db1?timestampFormat=TIMESTAMP" + } + ], + "column": [ + "ts", + "f1", + "f2", + "f3", + "f4", + "f5", + "f6", + "f7", + "f8", + "f9", + "t1", + "t2", + "t3", + "t4", + "t5", + "t6", + "t7", + "t8", + "t9", + "t10" + ], + "beginDateTime": "2022-02-15 00:00:00", + "endDateTime": "2022-02-16 00:00:00", + "splitInterval": "1d" + } + }, + "writer": { + "name": "tdenginewriter", + "parameter": { + "username": "root", + "password": "taosdata", + "column": [ + "ts", + "f1", + "f2", + "f3", + "f4", + "f5", + "f6", + "f7", + "f8", + "f9", + "t1", + "t2", + "t3", + "t4", + "t5", + "t6", + "t7", + "t8", + "t9", + "t10" + ], + "connection": [ + { + "table": [ + "stb2" + ], + "jdbcUrl": "jdbc:TAOS://192.168.1.93:6030/db2" + } + ], + "batchSize": 1000, + "ignoreTagsUnmatched": true + } + } + } + ], + "setting": { + "speed": { + "channel": 1 + } + } + } +} \ No newline at end of file diff --git a/tdenginewriter/src/test/resources/t2t-3.json b/tdenginewriter/src/test/resources/t2t-3.json new file mode 100644 index 00000000..e0a22959 --- /dev/null +++ b/tdenginewriter/src/test/resources/t2t-3.json @@ -0,0 +1,92 @@ +{ + "job": { + "content": [ + { + "reader": { + "name": "tdenginereader", + "parameter": { + "username": "root", + "password": "taosdata", + "connection": [ + { + "table": [ + "stb1" + ], + "jdbcUrl": "jdbc:TAOS-RS://192.168.56.105:6041/db1?timestampFormat=TIMESTAMP" + } + ], + "column": [ + "ts", + "f1", + "f2", + "f3", + "f4", + "f5", + "f6", + "f7", + "f8", + "f9", + "t1", + "t2", + "t3", + "t4", + "t5", + "t6", + "t7", + "t8", + "t9", + "t10" + ], + "beginDateTime": "2022-02-15 00:00:00", + "endDateTime": "2022-02-16 00:00:00", + "splitInterval": "1d" + } + }, + "writer": { + "name": "tdenginewriter", + "parameter": { + "username": "root", + "password": "taosdata", + "column": [ + "ts", + "f1", + "f2", + "f3", + "f4", + "f5", + "f6", + "f7", + "f8", + "f9", + "t1", + "t2", + "t3", + "t4", + "t5", + "t6", + "t7", + "t8", + "t9", + "t10" + ], + "connection": [ + { + "table": [ + "t1" + ], + "jdbcUrl": "jdbc:TAOS://192.168.1.93:6030/db2?timestampFormat=TIMESTAMP" + } + ], + "batchSize": 1000, + "ignoreTagsUnmatched": true + } + } + } + ], + "setting": { + "speed": { + "channel": 1 + } + } + } +} \ No newline at end of file diff --git a/tdenginewriter/src/test/resources/t2t-4.json b/tdenginewriter/src/test/resources/t2t-4.json new file mode 100644 index 00000000..b7716363 --- /dev/null +++ b/tdenginewriter/src/test/resources/t2t-4.json @@ -0,0 +1,72 @@ +{ + "job": { + "content": [ + { + "reader": { + "name": "tdenginereader", + "parameter": { + "username": "root", + "password": "taosdata", + "connection": [ + { + "table": [ + "stb1" + ], + "jdbcUrl": "jdbc:TAOS-RS://192.168.56.105:6041/db1?timestampFormat=TIMESTAMP" + } + ], + "column": [ + "ts", + "f1", + "f2", + "f3", + "f4", + "f5", + "f6", + "f7", + "f8", + "f9" + ], + "beginDateTime": "2022-02-15 00:00:00", + "endDateTime": "2022-02-16 00:00:00", + "splitInterval": "1d" + } + }, + "writer": { + "name": "tdenginewriter", + "parameter": { + "username": "root", + "password": "taosdata", + "column": [ + "ts", + "f1", + "f2", + "f3", + "f4", + "f5", + "f6", + "f7", + "f8", + "f9" + ], + "connection": [ + { + "table": [ + "weather" + ], + "jdbcUrl": "jdbc:TAOS-RS://192.168.1.93:6041/db2" + } + ], + "batchSize": 1000, + "ignoreTagsUnmatched": true + } + } + } + ], + "setting": { + "speed": { + "channel": 1 + } + } + } +} \ No newline at end of file diff --git a/tdenginewriter/src/test/resources/weather.csv b/tdenginewriter/src/test/resources/weather.csv new file mode 100644 index 00000000..21c4a1aa --- /dev/null +++ b/tdenginewriter/src/test/resources/weather.csv @@ -0,0 +1,10 @@ +tb1,2022-02-20 04:05:59.255,5,8.591868744,1,abcABC123,北京朝阳望京 +tb1,2022-02-20 04:58:47.068,3,1.489693641,1,abcABC123,北京朝阳望京 +tb1,2022-02-20 06:31:09.408,1,4.026500719,1,abcABC123,北京朝阳望京 +tb1,2022-02-20 08:08:00.336,1,9.606400360,1,abcABC123,北京朝阳望京 +tb1,2022-02-20 08:28:58.053,9,7.872178184,1,abcABC123123,北京朝阳望京 +tb1,2022-02-20 10:23:20.836,9,2.699478524,1,abcABC123,北京朝阳望京 +tb1,2022-02-20 11:09:59.739,7,7.906723716,1,abcABC123,北京朝阳望京 +tb1,2022-02-20 19:08:29.315,1,5.852338895,1,abcABC123,北京朝阳望京 +tb1,2022-02-20 22:10:06.243,10,5.535007901,1,abcABC123,北京朝阳望京 +tb1,2022-02-20 23:52:43.683,10,10.642013185,1,abcABC123,北京朝阳望京 diff --git a/transformer/doc/transformer.md b/transformer/doc/transformer.md index 247ab39b..260c0fb6 100644 --- a/transformer/doc/transformer.md +++ b/transformer/doc/transformer.md @@ -59,7 +59,17 @@ dx_replace(1,"5","10","****") column 1的value为“dataxTest”=>"data****" dx_filter(1,"like","dataTest") dx_filter(1,">=","10") ``` -5. dx_groovy +5. dx_digest +* 参数:3个 + * 第一个参数:字段编号,对应record中第几个字段。 + * 第二个参数:hash类型,md5、sha1 + * 第三个参数:hash值大小写 toUpperCase(大写)、toLowerCase(小写) +* 返回: 返回指定类型的hashHex,如果字段为空,则转为空字符串,再返回对应hashHex +* 举例: +``` +dx_digest(1,"md5","toUpperCase"), column 1的值为 xyzzzzz => 9CDFFC4FA4E45A99DB8BBCD762ACFFA2 +``` +6. dx_groovy * 参数。 * 第一个参数: groovy code * 第二个参数(列表或者为空):extraPackage @@ -67,7 +77,9 @@ dx_filter(1,">=","10") * dx_groovy只能调用一次。不能多次调用。 * groovy code中支持java.lang, java.util的包,可直接引用的对象有record,以及element下的各种column(BoolColumn.class,BytesColumn.class,DateColumn.class,DoubleColumn.class,LongColumn.class,StringColumn.class)。不支持其他包,如果用户有需要用到其他包,可设置extraPackage,注意extraPackage不支持第三方jar包。 * groovy code中,返回更新过的Record(比如record.setColumn(columnIndex, new StringColumn(newValue));),或者null。返回null表示过滤此行。 - * 用户可以直接调用静态的Util方式(GroovyTransformerStaticUtil),目前GroovyTransformerStaticUtil的方法列表 (按需补充): + * 用户可以直接调用静态的Util方式(GroovyTransformerStaticUtil),目前GroovyTransformerStaticUtil的方法列表: + * md5(String):String + * sha1(String):String * 举例: ``` groovy 实现的subStr: @@ -109,7 +121,7 @@ String code3 = "Column column = record.getColumn(1);\n" + ``` ## Job定义 -* 本例中,配置3个UDF。 +* 本例中,配置4个UDF。 ``` { @@ -176,6 +188,14 @@ String code3 = "Column column = record.getColumn(1);\n" + "paras":["3","4","****"] } }, + { + "name": "dx_digest", + "parameter": + { + "columnIndex":3, + "paras":["md5", "toLowerCase"] + } + }, { "name": "dx_groovy", "parameter": diff --git a/tsdbreader/pom.xml b/tsdbreader/pom.xml index 0f990234..4b3f58c6 100644 --- a/tsdbreader/pom.xml +++ b/tsdbreader/pom.xml @@ -24,9 +24,6 @@ 4.5 2.4 - - 1.2.28 - 4.13.1 @@ -44,10 +41,6 @@ slf4j-log4j12 org.slf4j - - fastjson - com.alibaba - commons-math3 org.apache.commons @@ -89,9 +82,8 @@ - com.alibaba - fastjson - ${fastjson.version} + com.alibaba.fastjson2 + fastjson2 diff --git a/tsdbreader/src/main/java/com/alibaba/datax/plugin/reader/tsdbreader/Constant.java b/tsdbreader/src/main/java/com/alibaba/datax/plugin/reader/tsdbreader/Constant.java index e42dedc0..f5069dc9 100644 --- a/tsdbreader/src/main/java/com/alibaba/datax/plugin/reader/tsdbreader/Constant.java +++ b/tsdbreader/src/main/java/com/alibaba/datax/plugin/reader/tsdbreader/Constant.java @@ -16,6 +16,8 @@ public final class Constant { static final String DEFAULT_DATA_FORMAT = "yyyy-MM-dd HH:mm:ss"; public static final String METRIC_SPECIFY_KEY = "__metric__"; + public static final String METRIC_SPECIFY_KEY_PREFIX = METRIC_SPECIFY_KEY + "."; + public static final int METRIC_SPECIFY_KEY_PREFIX_LENGTH = METRIC_SPECIFY_KEY_PREFIX.length(); public static final String TS_SPECIFY_KEY = "__ts__"; public static final String VALUE_SPECIFY_KEY = "__value__"; diff --git a/tsdbreader/src/main/java/com/alibaba/datax/plugin/reader/tsdbreader/Key.java b/tsdbreader/src/main/java/com/alibaba/datax/plugin/reader/tsdbreader/Key.java index 14ee7e41..c8a3d7ae 100644 --- a/tsdbreader/src/main/java/com/alibaba/datax/plugin/reader/tsdbreader/Key.java +++ b/tsdbreader/src/main/java/com/alibaba/datax/plugin/reader/tsdbreader/Key.java @@ -17,14 +17,19 @@ public class Key { // RDB for MySQL / ADB etc. static final String SINK_DB_TYPE = "sinkDbType"; static final String ENDPOINT = "endpoint"; + static final String USERNAME = "username"; + static final String PASSWORD = "password"; static final String COLUMN = "column"; static final String METRIC = "metric"; static final String FIELD = "field"; static final String TAG = "tag"; + static final String COMBINE = "combine"; static final String INTERVAL_DATE_TIME = "splitIntervalMs"; static final String BEGIN_DATE_TIME = "beginDateTime"; static final String END_DATE_TIME = "endDateTime"; + static final String HINT = "hint"; + static final Boolean COMBINE_DEFAULT_VALUE = false; static final Integer INTERVAL_DATE_TIME_DEFAULT_VALUE = 60; static final String TYPE_DEFAULT_VALUE = "TSDB"; static final Set TYPE_SET = new HashSet<>(); diff --git a/tsdbreader/src/main/java/com/alibaba/datax/plugin/reader/tsdbreader/TSDBReader.java b/tsdbreader/src/main/java/com/alibaba/datax/plugin/reader/tsdbreader/TSDBReader.java index 04b931c7..1f8c3d18 100755 --- a/tsdbreader/src/main/java/com/alibaba/datax/plugin/reader/tsdbreader/TSDBReader.java +++ b/tsdbreader/src/main/java/com/alibaba/datax/plugin/reader/tsdbreader/TSDBReader.java @@ -6,7 +6,7 @@ import com.alibaba.datax.common.spi.Reader; import com.alibaba.datax.common.util.Configuration; import com.alibaba.datax.plugin.reader.tsdbreader.conn.TSDBConnection; import com.alibaba.datax.plugin.reader.tsdbreader.util.TimeUtils; -import com.alibaba.fastjson.JSON; +import com.alibaba.fastjson2.JSON; import org.apache.commons.lang3.StringUtils; import org.joda.time.DateTime; import org.slf4j.Logger; @@ -60,6 +60,15 @@ public class TSDBReader extends Reader { "The parameter [" + Key.ENDPOINT + "] is not set."); } + String username = originalConfig.getString(Key.USERNAME, null); + if (StringUtils.isBlank(username)) { + LOG.warn("The parameter [" + Key.USERNAME + "] is blank."); + } + String password = originalConfig.getString(Key.PASSWORD, null); + if (StringUtils.isBlank(password)) { + LOG.warn("The parameter [" + Key.PASSWORD + "] is blank."); + } + // tagK / field could be empty if ("TSDB".equals(type)) { List columns = originalConfig.getList(Key.COLUMN, String.class); @@ -76,7 +85,14 @@ public class TSDBReader extends Reader { "The parameter [" + Key.COLUMN + "] is not set."); } for (String specifyKey : Constant.MUST_CONTAINED_SPECIFY_KEYS) { - if (!columns.contains(specifyKey)) { + boolean containSpecifyKey = false; + for (String column : columns) { + if (column.startsWith(specifyKey)) { + containSpecifyKey = true; + break; + } + } + if (!containSpecifyKey) { throw DataXException.asDataXException( TSDBReaderErrorCode.ILLEGAL_VALUE, "The parameter [" + Key.COLUMN + "] should contain " @@ -99,6 +115,8 @@ public class TSDBReader extends Reader { "The parameter [" + Key.INTERVAL_DATE_TIME + "] should be great than zero."); } + Boolean isCombine = originalConfig.getBool(Key.COMBINE, Key.COMBINE_DEFAULT_VALUE); + SimpleDateFormat format = new SimpleDateFormat(Constant.DEFAULT_DATA_FORMAT); String startTime = originalConfig.getString(Key.BEGIN_DATE_TIME); Long startDate; @@ -168,14 +186,14 @@ public class TSDBReader extends Reader { startTime = format.parse(originalConfig.getString(Key.BEGIN_DATE_TIME)).getTime(); } catch (ParseException e) { throw DataXException.asDataXException( - TSDBReaderErrorCode.ILLEGAL_VALUE, "解析[" + Key.BEGIN_DATE_TIME + "]失败.", e); + TSDBReaderErrorCode.ILLEGAL_VALUE, "Analysis [" + Key.BEGIN_DATE_TIME + "] failed.", e); } long endTime; try { endTime = format.parse(originalConfig.getString(Key.END_DATE_TIME)).getTime(); } catch (ParseException e) { throw DataXException.asDataXException( - TSDBReaderErrorCode.ILLEGAL_VALUE, "解析[" + Key.END_DATE_TIME + "]失败.", e); + TSDBReaderErrorCode.ILLEGAL_VALUE, "Analysis [" + Key.END_DATE_TIME + "] failed.", e); } if (TimeUtils.isSecond(startTime)) { startTime *= 1000; @@ -186,13 +204,14 @@ public class TSDBReader extends Reader { DateTime startDateTime = new DateTime(TimeUtils.getTimeInHour(startTime)); DateTime endDateTime = new DateTime(TimeUtils.getTimeInHour(endTime)); + final Boolean isCombine = originalConfig.getBool(Key.COMBINE, Key.COMBINE_DEFAULT_VALUE); + if ("TSDB".equals(type)) { - // split by metric - for (String column : columns4TSDB) { + if (isCombine) { // split by time in hour while (startDateTime.isBefore(endDateTime)) { Configuration clone = this.originalConfig.clone(); - clone.set(Key.COLUMN, Collections.singletonList(column)); + clone.set(Key.COLUMN, columns4TSDB); clone.set(Key.BEGIN_DATE_TIME, startDateTime.getMillis()); startDateTime = startDateTime.plusMillis(splitIntervalMs); @@ -202,15 +221,30 @@ public class TSDBReader extends Reader { LOG.info("Configuration: {}", JSON.toJSONString(clone)); } + } else { + // split by time in hour + while (startDateTime.isBefore(endDateTime)) { + // split by metric + for (String column : columns4TSDB) { + Configuration clone = this.originalConfig.clone(); + clone.set(Key.COLUMN, Collections.singletonList(column)); + + clone.set(Key.BEGIN_DATE_TIME, startDateTime.getMillis()); + startDateTime = startDateTime.plusMillis(splitIntervalMs); + // Make sure the time interval is [start, end). + clone.set(Key.END_DATE_TIME, startDateTime.getMillis() - 1); + configurations.add(clone); + + LOG.info("Configuration: {}", JSON.toJSONString(clone)); + } + } } } else { - // split by metric - for (String metric : metrics) { - // split by time in hour + if (isCombine) { while (startDateTime.isBefore(endDateTime)) { Configuration clone = this.originalConfig.clone(); clone.set(Key.COLUMN, columns4RDB); - clone.set(Key.METRIC, Collections.singletonList(metric)); + clone.set(Key.METRIC, metrics); clone.set(Key.BEGIN_DATE_TIME, startDateTime.getMillis()); startDateTime = startDateTime.plusMillis(splitIntervalMs); @@ -220,6 +254,24 @@ public class TSDBReader extends Reader { LOG.info("Configuration: {}", JSON.toJSONString(clone)); } + } else { + // split by time in hour + while (startDateTime.isBefore(endDateTime)) { + // split by metric + for (String metric : metrics) { + Configuration clone = this.originalConfig.clone(); + clone.set(Key.COLUMN, columns4RDB); + clone.set(Key.METRIC, Collections.singletonList(metric)); + + clone.set(Key.BEGIN_DATE_TIME, startDateTime.getMillis()); + startDateTime = startDateTime.plusMillis(splitIntervalMs); + // Make sure the time interval is [start, end). + clone.set(Key.END_DATE_TIME, startDateTime.getMillis() - 1); + configurations.add(clone); + + LOG.info("Configuration: {}", JSON.toJSONString(clone)); + } + } } } return configurations; @@ -247,6 +299,8 @@ public class TSDBReader extends Reader { private TSDBConnection conn; private Long startTime; private Long endTime; + private Boolean isCombine; + private Map hint; @Override public void init() { @@ -265,11 +319,16 @@ public class TSDBReader extends Reader { this.tags = readerSliceConfig.getMap(Key.TAG); String address = readerSliceConfig.getString(Key.ENDPOINT); + String username = readerSliceConfig.getString(Key.USERNAME); + String password = readerSliceConfig.getString(Key.PASSWORD); - conn = new TSDBConnection(address); + conn = new TSDBConnection(address, username, password); this.startTime = readerSliceConfig.getLong(Key.BEGIN_DATE_TIME); this.endTime = readerSliceConfig.getLong(Key.END_DATE_TIME); + + this.isCombine = readerSliceConfig.getBool(Key.COMBINE, Key.COMBINE_DEFAULT_VALUE); + this.hint = readerSliceConfig.getMap(Key.HINT); } @Override @@ -283,29 +342,35 @@ public class TSDBReader extends Reader { if ("TSDB".equals(type)) { for (String metric : columns4TSDB) { final Map tags = this.tags == null ? - null : (Map) this.tags.get(metric); + null : (Map) this.tags.get(metric); if (fields == null || !fields.containsKey(metric)) { - conn.sendDPs(metric, tags, this.startTime, this.endTime, recordSender); + conn.sendDPs(metric, tags, this.startTime, this.endTime, recordSender, hint); } else { conn.sendDPs(metric, (List) fields.get(metric), - tags, this.startTime, this.endTime, recordSender); + tags, this.startTime, this.endTime, recordSender, hint); } } } else { - for (String metric : metrics) { + if (isCombine) { final Map tags = this.tags == null ? - null : (Map) this.tags.get(metric); - if (fields == null || !fields.containsKey(metric)) { - conn.sendRecords(metric, tags, startTime, endTime, columns4RDB, recordSender); - } else { - conn.sendRecords(metric, (List) fields.get(metric), - tags, startTime, endTime, columns4RDB, recordSender); + null : (Map) this.tags.get(metrics.get(0)); + conn.sendRecords(metrics, tags, startTime, endTime, columns4RDB, recordSender, hint); + } else { + for (String metric : metrics) { + final Map tags = this.tags == null ? + null : (Map) this.tags.get(metric); + if (fields == null || !fields.containsKey(metric)) { + conn.sendRecords(metric, tags, startTime, endTime, columns4RDB, isCombine, recordSender, hint); + } else { + conn.sendRecords(metric, (List) fields.get(metric), + tags, startTime, endTime, columns4RDB, recordSender, hint); + } } } } } catch (Exception e) { throw DataXException.asDataXException( - TSDBReaderErrorCode.ILLEGAL_VALUE, "获取或发送数据点的过程中出错!", e); + TSDBReaderErrorCode.ILLEGAL_VALUE, "Error in getting or sending data point!", e); } } diff --git a/tsdbreader/src/main/java/com/alibaba/datax/plugin/reader/tsdbreader/conn/Connection4TSDB.java b/tsdbreader/src/main/java/com/alibaba/datax/plugin/reader/tsdbreader/conn/Connection4TSDB.java index 500894bb..96cb7f9d 100644 --- a/tsdbreader/src/main/java/com/alibaba/datax/plugin/reader/tsdbreader/conn/Connection4TSDB.java +++ b/tsdbreader/src/main/java/com/alibaba/datax/plugin/reader/tsdbreader/conn/Connection4TSDB.java @@ -22,6 +22,20 @@ public interface Connection4TSDB { */ String address(); + /** + * Get the address of Database. + * + * @return host+ip + */ + String username(); + + /** + * Get the address of Database. + * + * @return host+ip + */ + String password(); + /** * Get the version of Database. * @@ -46,22 +60,27 @@ public interface Connection4TSDB { /** * Send data points for TSDB with single field. */ - void sendDPs(String metric, Map tags, Long start, Long end, RecordSender recordSender) throws Exception; + void sendDPs(String metric, Map tags, Long start, Long end, RecordSender recordSender, Map hint) throws Exception; /** * Send data points for TSDB with multi fields. */ - void sendDPs(String metric, List fields, Map tags, Long start, Long end, RecordSender recordSender) throws Exception; + void sendDPs(String metric, List fields, Map tags, Long start, Long end, RecordSender recordSender, Map hint) throws Exception; /** * Send data points for RDB with single field. */ - void sendRecords(String metric, Map tags, Long start, Long end, List columns4RDB, RecordSender recordSender) throws Exception; + void sendRecords(String metric, Map tags, Long start, Long end, List columns4RDB, Boolean isCombine, RecordSender recordSender, Map hint) throws Exception; /** * Send data points for RDB with multi fields. */ - void sendRecords(String metric, List fields, Map tags, Long start, Long end, List columns4RDB, RecordSender recordSender) throws Exception; + void sendRecords(String metric, List fields, Map tags, Long start, Long end, List columns4RDB, RecordSender recordSender, Map hint) throws Exception; + + /** + * Send data points for RDB with single fields on combine mode. + */ + void sendRecords(List metrics, Map tags, Long start, Long end, List columns4RDB, RecordSender recordSender, Map hint) throws Exception; /** * Put data point. diff --git a/tsdbreader/src/main/java/com/alibaba/datax/plugin/reader/tsdbreader/conn/DataPoint4MultiFieldsTSDB.java b/tsdbreader/src/main/java/com/alibaba/datax/plugin/reader/tsdbreader/conn/DataPoint4MultiFieldsTSDB.java index 5b380c73..3e8d43d4 100644 --- a/tsdbreader/src/main/java/com/alibaba/datax/plugin/reader/tsdbreader/conn/DataPoint4MultiFieldsTSDB.java +++ b/tsdbreader/src/main/java/com/alibaba/datax/plugin/reader/tsdbreader/conn/DataPoint4MultiFieldsTSDB.java @@ -1,6 +1,6 @@ package com.alibaba.datax.plugin.reader.tsdbreader.conn; -import com.alibaba.fastjson.JSON; +import com.alibaba.fastjson2.JSON; import java.util.Map; diff --git a/tsdbreader/src/main/java/com/alibaba/datax/plugin/reader/tsdbreader/conn/DataPoint4TSDB.java b/tsdbreader/src/main/java/com/alibaba/datax/plugin/reader/tsdbreader/conn/DataPoint4TSDB.java index 5c5c1349..8724bfbb 100644 --- a/tsdbreader/src/main/java/com/alibaba/datax/plugin/reader/tsdbreader/conn/DataPoint4TSDB.java +++ b/tsdbreader/src/main/java/com/alibaba/datax/plugin/reader/tsdbreader/conn/DataPoint4TSDB.java @@ -1,6 +1,6 @@ package com.alibaba.datax.plugin.reader.tsdbreader.conn; -import com.alibaba.fastjson.JSON; +import com.alibaba.fastjson2.JSON; import java.util.Map; diff --git a/tsdbreader/src/main/java/com/alibaba/datax/plugin/reader/tsdbreader/conn/TSDBConnection.java b/tsdbreader/src/main/java/com/alibaba/datax/plugin/reader/tsdbreader/conn/TSDBConnection.java index 5426ab49..479c16c1 100644 --- a/tsdbreader/src/main/java/com/alibaba/datax/plugin/reader/tsdbreader/conn/TSDBConnection.java +++ b/tsdbreader/src/main/java/com/alibaba/datax/plugin/reader/tsdbreader/conn/TSDBConnection.java @@ -2,7 +2,7 @@ package com.alibaba.datax.plugin.reader.tsdbreader.conn; import com.alibaba.datax.common.plugin.RecordSender; import com.alibaba.datax.plugin.reader.tsdbreader.util.TSDBUtils; -import com.alibaba.fastjson.JSON; +import com.alibaba.fastjson2.JSON; import org.apache.commons.lang3.StringUtils; import java.util.List; @@ -19,9 +19,13 @@ import java.util.Map; public class TSDBConnection implements Connection4TSDB { private String address; + private String username; + private String password; - public TSDBConnection(String address) { + public TSDBConnection(String address, String username, String password) { this.address = address; + this.username = username; + this.password = password; } @Override @@ -29,14 +33,24 @@ public class TSDBConnection implements Connection4TSDB { return address; } + @Override + public String username() { + return username; + } + + @Override + public String password() { + return password; + } + @Override public String version() { - return TSDBUtils.version(address); + return TSDBUtils.version(address, username, password); } @Override public String config() { - return TSDBUtils.config(address); + return TSDBUtils.config(address, username, password); } @Override @@ -45,23 +59,28 @@ public class TSDBConnection implements Connection4TSDB { } @Override - public void sendDPs(String metric, Map tags, Long start, Long end, RecordSender recordSender) throws Exception { - TSDBDump.dump4TSDB(this, metric, tags, start, end, recordSender); + public void sendDPs(String metric, Map tags, Long start, Long end, RecordSender recordSender, Map hint) throws Exception { + TSDBDump.dump4TSDB(this, metric, tags, start, end, recordSender, hint); } @Override - public void sendDPs(String metric, List fields, Map tags, Long start, Long end, RecordSender recordSender) throws Exception { - TSDBDump.dump4TSDB(this, metric, fields, tags, start, end, recordSender); + public void sendDPs(String metric, List fields, Map tags, Long start, Long end, RecordSender recordSender, Map hint) throws Exception { + TSDBDump.dump4TSDB(this, metric, fields, tags, start, end, recordSender, hint); } @Override - public void sendRecords(String metric, Map tags, Long start, Long end, List columns4RDB, RecordSender recordSender) throws Exception { - TSDBDump.dump4RDB(this, metric, tags, start, end, columns4RDB, recordSender); + public void sendRecords(String metric, Map tags, Long start, Long end, List columns4RDB, Boolean isCombine, RecordSender recordSender, Map hint) throws Exception { + TSDBDump.dump4RDB(this, metric, tags, start, end, columns4RDB, recordSender, hint); } @Override - public void sendRecords(String metric, List fields, Map tags, Long start, Long end, List columns4RDB, RecordSender recordSender) throws Exception { - TSDBDump.dump4RDB(this, metric, fields, tags, start, end, columns4RDB, recordSender); + public void sendRecords(List metrics, Map tags, Long start, Long end, List columns4RDB, RecordSender recordSender, Map hint) throws Exception { + TSDBDump.dump4RDB(this, metrics, tags, start, end, columns4RDB, recordSender, hint); + } + + @Override + public void sendRecords(String metric, List fields, Map tags, Long start, Long end, List columns4RDB, RecordSender recordSender, Map hint) throws Exception { + TSDBDump.dump4RDB(this, metric, fields, tags, start, end, columns4RDB, recordSender, hint); } @Override diff --git a/tsdbreader/src/main/java/com/alibaba/datax/plugin/reader/tsdbreader/conn/TSDBDump.java b/tsdbreader/src/main/java/com/alibaba/datax/plugin/reader/tsdbreader/conn/TSDBDump.java index 8bae3a70..05b9c5c2 100644 --- a/tsdbreader/src/main/java/com/alibaba/datax/plugin/reader/tsdbreader/conn/TSDBDump.java +++ b/tsdbreader/src/main/java/com/alibaba/datax/plugin/reader/tsdbreader/conn/TSDBDump.java @@ -4,15 +4,16 @@ import com.alibaba.datax.common.element.*; import com.alibaba.datax.common.plugin.RecordSender; import com.alibaba.datax.plugin.reader.tsdbreader.Constant; import com.alibaba.datax.plugin.reader.tsdbreader.util.HttpUtils; -import com.alibaba.fastjson.JSON; -import com.alibaba.fastjson.parser.Feature; +import com.alibaba.fastjson2.JSON; +import com.alibaba.fastjson2.JSONReader; +import com.alibaba.fastjson2.JSONReader.Feature; +import com.alibaba.fastjson2.JSONWriter; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import java.util.HashMap; -import java.util.LinkedList; -import java.util.List; -import java.util.Map; +import java.util.*; + +import static com.alibaba.datax.plugin.reader.tsdbreader.Constant.METRIC_SPECIFY_KEY_PREFIX_LENGTH; /** * Copyright @ 2019 alibaba.com @@ -30,17 +31,17 @@ final class TSDBDump { private static final String QUERY_MULTI_FIELD = "/api/mquery"; static { - JSON.DEFAULT_PARSER_FEATURE &= ~Feature.UseBigDecimal.getMask(); + JSON.config(Feature.UseBigDecimalForDoubles); } private TSDBDump() { } static void dump4TSDB(TSDBConnection conn, String metric, Map tags, - Long start, Long end, RecordSender sender) throws Exception { + Long start, Long end, RecordSender sender, Map hint) throws Exception { LOG.info("conn address: {}, metric: {}, start: {}, end: {}", conn.address(), metric, start, end); - String res = queryRange4SingleField(conn, metric, tags, start, end); + String res = queryRange4SingleField(conn, metric, tags, start, end, hint); List dps = getDps4TSDB(metric, res); if (dps == null || dps.isEmpty()) { return; @@ -49,10 +50,10 @@ final class TSDBDump { } static void dump4TSDB(TSDBConnection conn, String metric, List fields, Map tags, - Long start, Long end, RecordSender sender) throws Exception { + Long start, Long end, RecordSender sender, Map hint) throws Exception { LOG.info("conn address: {}, metric: {}, start: {}, end: {}", conn.address(), metric, start, end); - String res = queryRange4MultiFields(conn, metric, fields, tags, start, end); + String res = queryRange4MultiFields(conn, metric, fields, tags, start, end, hint); List dps = getDps4TSDB(metric, fields, res); if (dps == null || dps.isEmpty()) { return; @@ -61,10 +62,10 @@ final class TSDBDump { } static void dump4RDB(TSDBConnection conn, String metric, Map tags, - Long start, Long end, List columns4RDB, RecordSender sender) throws Exception { + Long start, Long end, List columns4RDB, RecordSender sender, Map hint) throws Exception { LOG.info("conn address: {}, metric: {}, start: {}, end: {}", conn.address(), metric, start, end); - String res = queryRange4SingleField(conn, metric, tags, start, end); + String res = queryRange4SingleField(conn, metric, tags, start, end, hint); List dps = getDps4RDB(metric, res); if (dps == null || dps.isEmpty()) { return; @@ -92,12 +93,71 @@ final class TSDBDump { } } + public static void dump4RDB(TSDBConnection conn, List metrics, Map tags, Long start, Long end, List columns4RDB, RecordSender sender, Map hint) throws Exception { + LOG.info("conn address: {}, metric: {}, start: {}, end: {}", conn.address(), metrics, start, end); + + List dps = new LinkedList<>(); + for (String metric : metrics) { + String res = queryRange4SingleField(conn, metric, tags, start, end, hint); + final List dpList = getDps4RDB(metric, res); + if (dpList == null || dpList.isEmpty()) { + continue; + } + dps.addAll(dpList); + } + if (dps.isEmpty()) { + return; + } + Map> dpsCombinedByTs = new LinkedHashMap<>(); + for (DataPoint4TSDB dp : dps) { + final long ts = dp.getTimestamp(); + final Map dpsWithSameTs = dpsCombinedByTs.computeIfAbsent(ts, k -> new LinkedHashMap<>()); + dpsWithSameTs.put(dp.getMetric(), dp); + } + + for (Map.Entry> entry : dpsCombinedByTs.entrySet()) { + final Long ts = entry.getKey(); + final Map metricAndDps = entry.getValue(); + final Record record = sender.createRecord(); + DataPoint4TSDB tmpDp = null; + + for (final String column : columns4RDB) { + if (column.startsWith(Constant.METRIC_SPECIFY_KEY)) { + final String m = column.substring(METRIC_SPECIFY_KEY_PREFIX_LENGTH); + tmpDp = metricAndDps.get(m); + if (tmpDp == null) { + continue; + } + record.addColumn(getColumn(tmpDp.getValue())); + } else if (Constant.TS_SPECIFY_KEY.equals(column)) { + record.addColumn(new LongColumn(ts)); + } else if (Constant.VALUE_SPECIFY_KEY.equals(column)) { + // combine 模式下,不应该定义 __value__ 字段,因为 __metric__.xxx 字段会输出对应的 value 值 + throw new RuntimeException("The " + Constant.VALUE_SPECIFY_KEY + + " column should not be specified in combine mode!"); + } else { + // combine 模式下,应该确保 __metric__.xxx 字段的定义,放在 column 数组的最前面,以保证获取到 metric + if (tmpDp == null) { + throw new RuntimeException("These " + Constant.METRIC_SPECIFY_KEY_PREFIX + + " column should be placed first in the column array in combine mode!"); + } + final Object tagv = tmpDp.getTags().get(column); + if (tagv == null) { + continue; + } + record.addColumn(getColumn(tagv)); + } + } + sender.sendToWriter(record); + } + } + static void dump4RDB(TSDBConnection conn, String metric, List fields, Map tags, Long start, Long end, - List columns4RDB, RecordSender sender) throws Exception { + List columns4RDB, RecordSender sender, Map hint) throws Exception { LOG.info("conn address: {}, metric: {}, start: {}, end: {}", conn.address(), metric, start, end); - String res = queryRange4MultiFields(conn, metric, fields, tags, start, end); + String res = queryRange4MultiFields(conn, metric, fields, tags, start, end, hint); List dps = getDps4RDB(metric, fields, res); if (dps == null || dps.isEmpty()) { return; @@ -131,14 +191,16 @@ final class TSDBDump { valueColumn = new LongColumn((Long) value); } else if (value instanceof String) { valueColumn = new StringColumn((String) value); + } else if (value instanceof Integer) { + valueColumn = new LongColumn(((Integer)value).longValue()); } else { - throw new Exception(String.format("value 不支持类型: [%s]", value.getClass().getSimpleName())); + throw new Exception(String.format("value not supported type: [%s]", value.getClass().getSimpleName())); } return valueColumn; } private static String queryRange4SingleField(TSDBConnection conn, String metric, Map tags, - Long start, Long end) throws Exception { + Long start, Long end, Map hint) throws Exception { String tagKV = getFilterByTags(tags); String body = "{\n" + " \"start\": " + start + ",\n" + @@ -148,14 +210,15 @@ final class TSDBDump { " \"aggregator\": \"none\",\n" + " \"metric\": \"" + metric + "\"\n" + (tagKV == null ? "" : tagKV) + + (hint == null ? "" : (", \"hint\": " + JSON.toJSONString(hint))) + " }\n" + " ]\n" + "}"; - return HttpUtils.post(conn.address() + QUERY, body); + return HttpUtils.post(conn.address() + QUERY, conn.username(), conn.password(), body); } private static String queryRange4MultiFields(TSDBConnection conn, String metric, List fields, - Map tags, Long start, Long end) throws Exception { + Map tags, Long start, Long end, Map hint) throws Exception { // fields StringBuilder fieldBuilder = new StringBuilder(); fieldBuilder.append("\"fields\":["); @@ -177,10 +240,11 @@ final class TSDBDump { " \"metric\": \"" + metric + "\",\n" + fieldBuilder.toString() + (tagKV == null ? "" : tagKV) + + (hint == null ? "" : (", \"hint\": " + JSON.toJSONString(hint))) + " }\n" + " ]\n" + "}"; - return HttpUtils.post(conn.address() + QUERY_MULTI_FIELD, body); + return HttpUtils.post(conn.address() + QUERY_MULTI_FIELD, conn.username(), conn.password(), body); } private static String getFilterByTags(Map tags) { diff --git a/tsdbreader/src/main/java/com/alibaba/datax/plugin/reader/tsdbreader/util/HttpUtils.java b/tsdbreader/src/main/java/com/alibaba/datax/plugin/reader/tsdbreader/util/HttpUtils.java index 3e0be854..af81988c 100644 --- a/tsdbreader/src/main/java/com/alibaba/datax/plugin/reader/tsdbreader/util/HttpUtils.java +++ b/tsdbreader/src/main/java/com/alibaba/datax/plugin/reader/tsdbreader/util/HttpUtils.java @@ -1,11 +1,13 @@ package com.alibaba.datax.plugin.reader.tsdbreader.util; -import com.alibaba.fastjson.JSON; +import com.alibaba.fastjson2.JSON; +import org.apache.commons.lang3.StringUtils; import org.apache.http.client.fluent.Content; import org.apache.http.client.fluent.Request; import org.apache.http.entity.ContentType; import java.nio.charset.StandardCharsets; +import java.util.Base64; import java.util.Map; import java.util.concurrent.TimeUnit; @@ -22,13 +24,18 @@ public final class HttpUtils { public final static int CONNECT_TIMEOUT_DEFAULT_IN_MILL = (int) TimeUnit.SECONDS.toMillis(60); public final static int SOCKET_TIMEOUT_DEFAULT_IN_MILL = (int) TimeUnit.SECONDS.toMillis(60); + private static final String CREDENTIALS_FORMAT = "%s:%s"; + private static final String BASIC_AUTHENTICATION_FORMAT = "Basic %s"; + private HttpUtils() { } - public static String get(String url) throws Exception { - Content content = Request.Get(url) + public static String get(String url, String username, String password) throws Exception { + final Request request = Request.Get(url) .connectTimeout(CONNECT_TIMEOUT_DEFAULT_IN_MILL) - .socketTimeout(SOCKET_TIMEOUT_DEFAULT_IN_MILL) + .socketTimeout(SOCKET_TIMEOUT_DEFAULT_IN_MILL); + addAuth(request, username, password); + Content content = request .execute() .returnContent(); if (content == null) { @@ -37,24 +44,21 @@ public final class HttpUtils { return content.asString(StandardCharsets.UTF_8); } - public static String post(String url, Map params) throws Exception { - return post(url, JSON.toJSONString(params), CONNECT_TIMEOUT_DEFAULT_IN_MILL, SOCKET_TIMEOUT_DEFAULT_IN_MILL); + public static String post(String url, String username, String password, Map params) throws Exception { + return post(url, username, password, JSON.toJSONString(params), CONNECT_TIMEOUT_DEFAULT_IN_MILL, SOCKET_TIMEOUT_DEFAULT_IN_MILL); } - public static String post(String url, String params) throws Exception { - return post(url, params, CONNECT_TIMEOUT_DEFAULT_IN_MILL, SOCKET_TIMEOUT_DEFAULT_IN_MILL); + public static String post(String url, String username, String password, String params) throws Exception { + return post(url, username, password, params, CONNECT_TIMEOUT_DEFAULT_IN_MILL, SOCKET_TIMEOUT_DEFAULT_IN_MILL); } - public static String post(String url, Map params, + public static String post(String url, String username, String password, String params, int connectTimeoutInMill, int socketTimeoutInMill) throws Exception { - return post(url, JSON.toJSONString(params), connectTimeoutInMill, socketTimeoutInMill); - } - - public static String post(String url, String params, - int connectTimeoutInMill, int socketTimeoutInMill) throws Exception { - Content content = Request.Post(url) + Request request = Request.Post(url) .connectTimeout(connectTimeoutInMill) - .socketTimeout(socketTimeoutInMill) + .socketTimeout(socketTimeoutInMill); + addAuth(request, username, password); + Content content = request .addHeader("Content-Type", "application/json") .bodyString(params, ContentType.APPLICATION_JSON) .execute() @@ -64,4 +68,20 @@ public final class HttpUtils { } return content.asString(StandardCharsets.UTF_8); } + + private static void addAuth(Request request, String username, String password) { + String authorization = generateHttpAuthorization(username, password); + if (authorization != null) { + request.setHeader("Authorization", authorization); + } + } + + private static String generateHttpAuthorization(String username, String password) { + if (StringUtils.isBlank(username) || StringUtils.isBlank(password)) { + return null; + } + String credentials = String.format(CREDENTIALS_FORMAT, username, password); + credentials = Base64.getEncoder().encodeToString(credentials.getBytes()); + return String.format(BASIC_AUTHENTICATION_FORMAT, credentials); + } } diff --git a/tsdbreader/src/main/java/com/alibaba/datax/plugin/reader/tsdbreader/util/TSDBUtils.java b/tsdbreader/src/main/java/com/alibaba/datax/plugin/reader/tsdbreader/util/TSDBUtils.java index bb7b4b87..d91c3557 100644 --- a/tsdbreader/src/main/java/com/alibaba/datax/plugin/reader/tsdbreader/util/TSDBUtils.java +++ b/tsdbreader/src/main/java/com/alibaba/datax/plugin/reader/tsdbreader/util/TSDBUtils.java @@ -1,11 +1,5 @@ package com.alibaba.datax.plugin.reader.tsdbreader.util; -import com.alibaba.datax.plugin.reader.tsdbreader.conn.DataPoint4TSDB; -import com.alibaba.fastjson.JSON; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import java.util.List; /** * Copyright @ 2019 alibaba.com @@ -17,52 +11,28 @@ import java.util.List; */ public final class TSDBUtils { - private static final Logger LOGGER = LoggerFactory.getLogger(TSDBUtils.class); - private TSDBUtils() { } - public static String version(String address) { + public static String version(String address, String username, String password) { String url = String.format("%s/api/version", address); String rsp; try { - rsp = HttpUtils.get(url); + rsp = HttpUtils.get(url, username, password); } catch (Exception e) { throw new RuntimeException(e); } return rsp; } - public static String config(String address) { + public static String config(String address, String username, String password) { String url = String.format("%s/api/config", address); String rsp; try { - rsp = HttpUtils.get(url); + rsp = HttpUtils.get(url, username, password); } catch (Exception e) { throw new RuntimeException(e); } return rsp; } - - public static boolean put(String address, List dps) { - return put(address, JSON.toJSON(dps)); - } - - public static boolean put(String address, DataPoint4TSDB dp) { - return put(address, JSON.toJSON(dp)); - } - - private static boolean put(String address, Object o) { - String url = String.format("%s/api/put", address); - String rsp; - try { - rsp = HttpUtils.post(url, o.toString()); - // If successful, the returned content should be null. - assert rsp == null; - } catch (Exception e) { - LOGGER.error("Address: {}, DataPoints: {}", url, o); - throw new RuntimeException(e); - } - return true; - } } diff --git a/tsdbreader/src/main/resources/plugin.json b/tsdbreader/src/main/resources/plugin.json index f2dbb1f0..3b10d228 100755 --- a/tsdbreader/src/main/resources/plugin.json +++ b/tsdbreader/src/main/resources/plugin.json @@ -6,5 +6,5 @@ "mechanism": "通过 /api/query 接口查询出符合条件的数据点", "warn": "指定起止时间会自动忽略分钟和秒,转为整点时刻,例如 2019-4-18 的 [3:35, 4:55) 会被转为 [3:00, 4:00)" }, - "developer": "Benedict Jin" + "developer": "alibaba" } diff --git a/tsdbreader/src/test/java/com/alibaba/datax/plugin/reader/tsdbreader/conn/TSDBConnectionTest.java b/tsdbreader/src/test/java/com/alibaba/datax/plugin/reader/tsdbreader/conn/TSDBConnectionTest.java index e4544088..6be291e8 100644 --- a/tsdbreader/src/test/java/com/alibaba/datax/plugin/reader/tsdbreader/conn/TSDBConnectionTest.java +++ b/tsdbreader/src/test/java/com/alibaba/datax/plugin/reader/tsdbreader/conn/TSDBConnectionTest.java @@ -19,12 +19,12 @@ public class TSDBConnectionTest { @Test public void testVersion() { - String version = new TSDBConnection(TSDB_ADDRESS).version(); + String version = new TSDBConnection(TSDB_ADDRESS,null,null).version(); Assert.assertNotNull(version); } @Test public void testIsSupported() { - Assert.assertTrue(new TSDBConnection(TSDB_ADDRESS).isSupported()); + Assert.assertTrue(new TSDBConnection(TSDB_ADDRESS,null,null).isSupported()); } } diff --git a/tsdbreader/src/test/java/com/alibaba/datax/plugin/reader/tsdbreader/util/HttpUtilsTest.java b/tsdbreader/src/test/java/com/alibaba/datax/plugin/reader/tsdbreader/util/HttpUtilsTest.java deleted file mode 100644 index 12a2660a..00000000 --- a/tsdbreader/src/test/java/com/alibaba/datax/plugin/reader/tsdbreader/util/HttpUtilsTest.java +++ /dev/null @@ -1,39 +0,0 @@ -package com.alibaba.datax.plugin.reader.tsdbreader.util; - -import org.junit.Assert; -import org.junit.Ignore; -import org.junit.Test; - -import java.util.HashMap; -import java.util.Map; - -/** - * Copyright @ 2019 alibaba.com - * All right reserved. - * Function:HttpUtils Test - * - * @author Benedict Jin - * @since 2019-10-21 - */ -@Ignore -public class HttpUtilsTest { - - @Test - public void testSimpleCase() throws Exception { - String url = "https://httpbin.org/post"; - Map params = new HashMap<>(); - params.put("foo", "bar"); - - String rsp = HttpUtils.post(url, params); - System.out.println(rsp); - Assert.assertNotNull(rsp); - } - - @Test - public void testGet() throws Exception { - String url = String.format("%s/api/version", Const.TSDB_ADDRESS); - String rsp = HttpUtils.get(url); - System.out.println(rsp); - Assert.assertNotNull(rsp); - } -} diff --git a/tsdbwriter/doc/tsdbhttpwriter.md b/tsdbwriter/doc/tsdbhttpwriter.md index c723a360..c0b13654 100644 --- a/tsdbwriter/doc/tsdbhttpwriter.md +++ b/tsdbwriter/doc/tsdbhttpwriter.md @@ -6,24 +6,57 @@ ___ ## 1 快速介绍 -TSDBWriter 插件实现了将数据点写入到阿里巴巴自研 TSDB 数据库中(后续简称 TSDB)。 +TSDBWriter 插件实现了将数据点写入到阿里巴巴云原生多模数据库Lindorm TSDB数据库中(后续简称 TSDB)。 -时间序列数据库(Time Series Database , 简称 TSDB)是一种高性能,低成本,稳定可靠的在线时序数据库服务;提供高效读写,高压缩比存储、时序数据插值及聚合计算,广泛应用于物联网(IoT)设备监控系统 ,企业能源管理系统(EMS),生产安全监控系统,电力检测系统等行业场景。 TSDB 提供百万级时序数据秒级写入,高压缩比低成本存储、预降采样、插值、多维聚合计算,查询结果可视化功能;解决由于设备采集点数量巨大,数据采集频率高,造成的存储成本高,写入和查询分析效率低的问题。更多关于 TSDB 的介绍,详见[阿里云 TSDB 官网](https://help.aliyun.com/product/54825.html)。 +时间序列数据库(Time Series Database , 简称 TSDB)是一种高性能,低成本,稳定可靠的在线时序数据库服务;提供高效读写,高压缩比存储、时序数据插值及聚合计算,广泛应用于物联网(IoT)设备监控系统 ,企业能源管理系统(EMS),生产安全监控系统,电力检测系统等行业场景。 TSDB 提供千万级时序数据秒级写入,高压缩比低成本存储、预降采样、插值、多维聚合计算,查询结果可视化功能;解决由于设备采集点数量巨大,数据采集频率高,造成的存储成本高,写入和查询分析效率低的问题。更多关于 TSDB 的介绍,详见[阿里云 Lindorm TSDB 官网](https://help.aliyun.com/document_detail/174600.html)。 +注意:阿里巴巴自研HiTSDB已全新升级为云原生多模数据库Lindorm TSDB。Lindorm TSDB兼容大部分HiTSDB的HTTP API并提供原生SQL能力,TSDBWriter插件使用HTTP API方式写入,要使用原生SQL能力需要提前在Lindorm TSDB进行建表。详细参见[与旧版TSDB的比较](https://help.aliyun.com/document_detail/387477.html) ## 2 实现原理 -通过 HTTP 连接 TSDB 实例,并通过 `/api/put` 接口将数据点写入。关于写入接口详见 TSDB 的[接口说明文档](https://help.aliyun.com/document_detail/59939.html)。 - - +通过TSDB客户端 hitsdb-client 连接 TSDB 实例,并将数据点通过HTTP API方式写入。关于写入接口详见TSDB 的[SDK 参考](https://help.aliyun.com/document_detail/61587.html)。 ## 3 功能说明 ### 3.1 配置样例 -* 配置一个从 OpenTSDB 数据库同步抽取数据到 TSDB: +* 配置TSDB Writer: +```json +{ + "name": "tsdbwriter", + "parameter": { + "endpoint": "http://localhost:8242", + "sourceDbType": "RDB", + "batchSize": 256, + "columnType": [ + "tag", + "tag", + "field_string", + "field_double", + "timestamp", + "field_bool" + ], + "column": [ + "tag1", + "tag2", + "field1", + "field2", + "timestamp", + "field3" + ], + "multiField":"true", + "table":"testmetric", + "username":"xxx", + "password":"xxx", + "ignoreWriteError":"false", + "database":"default" + } +} +``` + +* 配置一个从 支持 OpenTSDB 协议的数据库同步抽取数据到 TSDB: ```json { @@ -42,7 +75,7 @@ TSDBWriter 插件实现了将数据点写入到阿里巴巴自研 TSDB 数据库 } }, "writer": { - "name": "tsdbhttpwriter", + "name": "tsdbwriter", "parameter": { "endpoint": "http://localhost:8242" } @@ -58,7 +91,37 @@ TSDBWriter 插件实现了将数据点写入到阿里巴巴自研 TSDB 数据库 } ``` - +* 使用 OpenTSDB (单值)协议写入TSDB(不推荐): +```json +{ + "name": "tsdbwriter", + "endpoint": "http://localhost:8242", + "sourceDbType": "RDB", + "parameter": { + "batchSize": 256, + "columnType": [ + "tag", + "tag", + "field_string", + "field_double", + "timestamp", + "field_boolean" + ], + "column": [ + "tag1", + "tag2", + "field_metric_1", + "field_metric_2", + "timestamp", + "field_metric_3" + ], + "username":"tsdb", + "password":"enxU^", + "ignoreWriteError":"false" + } +} +``` +转换到的TSDB 表名(metric)由column中field对应的列名决定:对于上诉配置一行关系型数据将会写入三个metric(field_metric_1,field_metric_2,field_metric_3) ### 3.2 参数说明 @@ -74,23 +137,85 @@ TSDBWriter 插件实现了将数据点写入到阿里巴巴自研 TSDB 数据库 * 格式:http://IP:Port * 默认值:无 +* **sourceDbType** + * 描述:源端数据类型 + * 必选:否 + * 格式:string [RDB或者TSDB] + * 默认值:TSDB + +* **multiField** + * 描述:使用HTTP API多值(多个field)方式写入,目前TSDB版本使用多值写入,需要指定为true + * 必选:是 + * 格式:bool + * 默认值:false (单值) + * 说明: 如果使用Lindorm TSDB原生SQL能力访问HTTP API方式写入的数据,需要在TSDB进行预建表,否则只能使用HiTSDB HTTP API方式[查询数据](https://help.aliyun.com/document_detail/107576.html)。 + +* **column** + * 描述:关系型数据库中表的字段名 + * 必选:当sourceDbType为RDB时必选 + * 格式:string + * 默认值:无 + * 说明: 此处的字段顺序,需要和Reader插件中配置的column字段的顺序保持一致。 + +* **columnType** + * 描述:关系型数据库中表字段,映射到TSDB中的类型。支持的类型如下所示: + * timestamp:该字段为时间戳 + * tag:该字段为tag + * field_string: 该Field的value是字符串类型 + * field_double: 该Field的value是数值类型 + * field_boolean: 该Field的value是布尔类型 + * 必选:当sourceDbType为RDB时必选 + * 格式:string [RDB或者TSDB] + * 默认值: 无 + * 说明: 此处的字段顺序,需要和column配置中的字段顺序保持一致 + +* **table** + * 描述:TSDB对应表名(metric) + * 必选:当sourceDbType为RDB时且multiField为true时必选 + * 格式:string + * 默认值:无 + * 说明: 要导入的TSDB表名,如果multiField为falase,不需要填写,对应的metric需要写到column字段 + * **batchSize** * 描述:每次批量数据的条数 * 必选:否 * 格式:int,需要保证大于 0 * 默认值:100 +* **ignoreWriteError** + * 描述:如果设置为 true,则忽略写入错误,继续写入;否则,多次重试后仍写入失败的话,则会终止写入任务 + * 必选:否 + * 格式:bool + * 默认值:false + +* **username** + * 描述:数据库用户名 + * 必选:否 + * 格式:string + * 默认值:无 + * 说明: TSDB配置了鉴权需要填写 + + +* **password** + * 描述:数据库密码 + * 必选:否 + * 格式:string + * 默认值:无 + * 说明: TSDB配置了鉴权需要填写 + +* **database** + * 描述:导入的数据库 + * 必选:否 + * 格式:string + * 默认值:default + * 说明: TSDB需要提前创建数据库 + * **maxRetryTime** * 描述:失败后重试的次数 * 必选:否 * 格式:int,需要保证大于 1 * 默认值:3 -* **ignoreWriteError** - * 描述:如果设置为 true,则忽略写入错误,继续写入;否则,多次重试后仍写入失败的话,则会终止写入任务 - * 必选:否 - * 格式:bool - * 默认值:false @@ -171,7 +296,7 @@ HBase 机型: 8C16G * 5 ## 5 约束限制 -### 5.1 目前只支持兼容 TSDB 2.4.x 及以上版本 +### 5.1 目前支持Lindorm TSDB全部版本 以及 HiTSDB 2.4.x 及以上版本 其他版本暂不保证兼容 diff --git a/tsdbwriter/pom.xml b/tsdbwriter/pom.xml index 1fb7c1e0..9f997123 100644 --- a/tsdbwriter/pom.xml +++ b/tsdbwriter/pom.xml @@ -24,9 +24,6 @@ 4.5 2.4 - - 1.2.28 - 4.13.1 @@ -41,10 +38,6 @@ slf4j-log4j12 org.slf4j - - fastjson - com.alibaba - commons-math3 org.apache.commons @@ -86,9 +79,15 @@ - com.alibaba - fastjson - ${fastjson.version} + com.alibaba.fastjson2 + fastjson2 + + + + + com.aliyun + hitsdb-client + 0.3.7 diff --git a/tsdbwriter/src/main/java/com/alibaba/datax/plugin/writer/conn/Connection4TSDB.java b/tsdbwriter/src/main/java/com/alibaba/datax/plugin/writer/conn/Connection4TSDB.java index 8119348d..ecb30055 100644 --- a/tsdbwriter/src/main/java/com/alibaba/datax/plugin/writer/conn/Connection4TSDB.java +++ b/tsdbwriter/src/main/java/com/alibaba/datax/plugin/writer/conn/Connection4TSDB.java @@ -21,6 +21,28 @@ public interface Connection4TSDB { */ String address(); + /** + * Get the setted database name. + * + * @return database + */ + String database(); + + + /** + * Get the username of Database. + * + * @return username + */ + String username(); + + /** + * Get the password of Database. + * + * @return password + */ + String password(); + /** * Get the version of Database. * @@ -69,17 +91,25 @@ public interface Connection4TSDB { boolean put(List dps); /** - * Put data points. + * Put data points with single field. * * @param dps data points * @return whether the data point is written successfully */ boolean put(String dps); + /** + * Put data points with multi fields. + * + * @param dps data points + * @return whether the data point is written successfully + */ + boolean mput(String dps); + /** * Whether current version is supported. * * @return true: supported; false: not yet! */ boolean isSupported(); -} +} \ No newline at end of file diff --git a/tsdbwriter/src/main/java/com/alibaba/datax/plugin/writer/conn/DataPoint4TSDB.java b/tsdbwriter/src/main/java/com/alibaba/datax/plugin/writer/conn/DataPoint4TSDB.java index fee012df..b6e2d309 100644 --- a/tsdbwriter/src/main/java/com/alibaba/datax/plugin/writer/conn/DataPoint4TSDB.java +++ b/tsdbwriter/src/main/java/com/alibaba/datax/plugin/writer/conn/DataPoint4TSDB.java @@ -1,6 +1,6 @@ package com.alibaba.datax.plugin.writer.conn; -import com.alibaba.fastjson.JSON; +import com.alibaba.fastjson2.JSON; import java.util.Map; diff --git a/tsdbwriter/src/main/java/com/alibaba/datax/plugin/writer/conn/TSDBConnection.java b/tsdbwriter/src/main/java/com/alibaba/datax/plugin/writer/conn/TSDBConnection.java index e4ebad7d..5266f5d9 100644 --- a/tsdbwriter/src/main/java/com/alibaba/datax/plugin/writer/conn/TSDBConnection.java +++ b/tsdbwriter/src/main/java/com/alibaba/datax/plugin/writer/conn/TSDBConnection.java @@ -2,7 +2,7 @@ package com.alibaba.datax.plugin.writer.conn; import com.alibaba.datax.common.plugin.RecordSender; import com.alibaba.datax.plugin.writer.util.TSDBUtils; -import com.alibaba.fastjson.JSON; +import com.alibaba.fastjson2.JSON; import org.apache.commons.lang3.StringUtils; import java.util.List; @@ -18,12 +18,18 @@ import java.util.List; public class TSDBConnection implements Connection4TSDB { private String address; + private String username; + private String password; + private String database; - public TSDBConnection(String address) { + public TSDBConnection(String address, String database, String username, String password) { if (StringUtils.isBlank(address)) { throw new RuntimeException("TSDBConnection init failed because address is blank!"); } this.address = address; + this.database = database; + this.username = username; + this.password = password; } @Override @@ -31,14 +37,29 @@ public class TSDBConnection implements Connection4TSDB { return address; } + @Override + public String username() { + return username; + } + + @Override + public String database() { + return database; + } + + @Override + public String password() { + return password; + } + @Override public String version() { - return TSDBUtils.version(address); + return TSDBUtils.version(address, username, password); } @Override public String config() { - return TSDBUtils.config(address); + return TSDBUtils.config(address, username, password); } @Override @@ -53,17 +74,22 @@ public class TSDBConnection implements Connection4TSDB { @Override public boolean put(DataPoint4TSDB dp) { - return TSDBUtils.put(address, dp); + return TSDBUtils.put(address, database, username, password, dp); } @Override public boolean put(List dps) { - return TSDBUtils.put(address, dps); + return TSDBUtils.put(address, database, username, password, dps); } @Override public boolean put(String dps) { - return TSDBUtils.put(address, dps); + return TSDBUtils.put(address, database, username, password, dps); + } + + @Override + public boolean mput(String dps) { + return TSDBUtils.mput(address, database, username, password, dps); } @Override diff --git a/tsdbwriter/src/main/java/com/alibaba/datax/plugin/writer/tsdbwriter/Key.java b/tsdbwriter/src/main/java/com/alibaba/datax/plugin/writer/tsdbwriter/Key.java index 2cc3f671..6cb239ec 100755 --- a/tsdbwriter/src/main/java/com/alibaba/datax/plugin/writer/tsdbwriter/Key.java +++ b/tsdbwriter/src/main/java/com/alibaba/datax/plugin/writer/tsdbwriter/Key.java @@ -10,8 +10,22 @@ package com.alibaba.datax.plugin.writer.tsdbwriter; */ public class Key { + static final String SOURCE_DB_TYPE = "sourceDbType"; + static final String MULTI_FIELD = "multiField"; + + // common static final String ENDPOINT = "endpoint"; + static final String USERNAME = "username"; + static final String PASSWORD = "password"; + static final String IGNORE_WRITE_ERROR = "ignoreWriteError"; + static final String DATABASE = "database"; + + // for tsdb static final String BATCH_SIZE = "batchSize"; static final String MAX_RETRY_TIME = "maxRetryTime"; - static final String IGNORE_WRITE_ERROR = "ignoreWriteError"; + + // for rdb + static final String COLUMN = "column"; + static final String COLUMN_TYPE = "columnType"; + static final String TABLE = "table"; } diff --git a/tsdbwriter/src/main/java/com/alibaba/datax/plugin/writer/tsdbwriter/SourceDBType.java b/tsdbwriter/src/main/java/com/alibaba/datax/plugin/writer/tsdbwriter/SourceDBType.java new file mode 100644 index 00000000..792806a6 --- /dev/null +++ b/tsdbwriter/src/main/java/com/alibaba/datax/plugin/writer/tsdbwriter/SourceDBType.java @@ -0,0 +1,5 @@ +package com.alibaba.datax.plugin.writer.tsdbwriter; + +public enum SourceDBType { + TSDB, RDB +} diff --git a/tsdbwriter/src/main/java/com/alibaba/datax/plugin/writer/tsdbwriter/TSDBConverter.java b/tsdbwriter/src/main/java/com/alibaba/datax/plugin/writer/tsdbwriter/TSDBConverter.java new file mode 100644 index 00000000..9bde0c9e --- /dev/null +++ b/tsdbwriter/src/main/java/com/alibaba/datax/plugin/writer/tsdbwriter/TSDBConverter.java @@ -0,0 +1,96 @@ +package com.alibaba.datax.plugin.writer.tsdbwriter; + +import com.alibaba.datax.common.element.Column; +import com.alibaba.datax.common.element.Record; +import com.alibaba.fastjson2.JSON; +import com.aliyun.hitsdb.client.value.request.MultiFieldPoint; +import com.aliyun.hitsdb.client.value.request.Point; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +class TSDBConverter { + + private static final Logger LOG = LoggerFactory.getLogger(TSDBConverter.class); + + private List columnName; + private List columnType; + + TSDBConverter(List columnName, List columnType) { + this.columnName = columnName; + this.columnType = columnType; + LOG.info("columnName: {}, columnType: {}", JSON.toJSONString(columnName), JSON.toJSONString(columnType)); + } + + List transRecord2Point(List records) { + List dps = new ArrayList(); + for (Record record : records) { + List metricBuilders = new ArrayList(); + Map tags = new HashMap(); + Long time = 0L; + + for (int i = 0; i < columnType.size(); i++) { + String type = columnType.get(i); + String name = columnName.get(i); + Column column = record.getColumn(i); + if (TSDBModel.TSDB_TAG.equals(type)) { + tags.put(name, column.asString()); + } else if (TSDBModel.TSDB_FIELD_DOUBLE.equals(type)) { + metricBuilders.add(new Point.MetricBuilder(name).value(column.asDouble())); + } else if (TSDBModel.TSDB_FIELD_STRING.equals(type)) { + metricBuilders.add(new Point.MetricBuilder(name).value(column.asString())); + } else if (TSDBModel.TSDB_FIELD_BOOL.equals(type)) { + metricBuilders.add(new Point.MetricBuilder(name).value(column.asBoolean())); + } else if (TSDBModel.TSDB_TIMESTAMP.equals(type)) { + time = column.asLong(); + } else if (TSDBModel.TSDB_METRIC_NUM.equals(type)) { + // compatible with previous usage of TSDB_METRIC_NUM + metricBuilders.add(new Point.MetricBuilder(name).value(column.asDouble())); + } else if (TSDBModel.TSDB_METRIC_STRING.equals(type)) { + // compatible with previous usage of TSDB_METRIC_STRING + metricBuilders.add(new Point.MetricBuilder(name).value(column.asString())); + } + } + for (Point.MetricBuilder metricBuilder : metricBuilders) { + dps.add(metricBuilder.tag(tags).timestamp(time).build(false)); + } + } + return dps; + } + + List transRecord2MultiFieldPoint(List records, String tableName) { + List dps = new ArrayList(); + for (Record record : records) { + MultiFieldPoint.MetricBuilder builder = MultiFieldPoint.metric(tableName); + for (int i = 0; i < columnType.size(); i++) { + String type = columnType.get(i); + String name = columnName.get(i); + Column column = record.getColumn(i); + if (TSDBModel.TSDB_TAG.equals(type)) { + builder.tag(name, column.asString()); + } else if (TSDBModel.TSDB_FIELD_DOUBLE.equals(type)) { + builder.field(name, column.asDouble()); + } else if (TSDBModel.TSDB_FIELD_STRING.equals(type)) { + builder.field(name, column.asString()); + } else if (TSDBModel.TSDB_FIELD_BOOL.equals(type)) { + builder.field(name, column.asBoolean()); + } else if (TSDBModel.TSDB_TIMESTAMP.equals(type)) { + builder.timestamp(column.asLong()); + } else if (TSDBModel.TSDB_METRIC_NUM.equals(type)) { + // compatible with previous usage of TSDB_METRIC_NUM + builder.field(name, column.asDouble()); + } else if (TSDBModel.TSDB_METRIC_STRING.equals(type)) { + // compatible with previous usage of TSDB_METRIC_STRING + builder.field(name, column.asString()); + } + } + MultiFieldPoint point = builder.build(false); + dps.add(point); + } + return dps; + } +} diff --git a/tsdbwriter/src/main/java/com/alibaba/datax/plugin/writer/tsdbwriter/TSDBModel.java b/tsdbwriter/src/main/java/com/alibaba/datax/plugin/writer/tsdbwriter/TSDBModel.java new file mode 100644 index 00000000..ead0e2cc --- /dev/null +++ b/tsdbwriter/src/main/java/com/alibaba/datax/plugin/writer/tsdbwriter/TSDBModel.java @@ -0,0 +1,11 @@ +package com.alibaba.datax.plugin.writer.tsdbwriter; + +class TSDBModel { + static final String TSDB_METRIC_NUM = "metric_num"; + static final String TSDB_METRIC_STRING = "metric_string"; + static final String TSDB_TAG = "tag"; + static final String TSDB_TIMESTAMP = "timestamp"; + static final String TSDB_FIELD_DOUBLE = "field_double"; + static final String TSDB_FIELD_STRING = "field_string"; + static final String TSDB_FIELD_BOOL = "field_bool"; +} \ No newline at end of file diff --git a/tsdbwriter/src/main/java/com/alibaba/datax/plugin/writer/tsdbwriter/TSDBWriter.java b/tsdbwriter/src/main/java/com/alibaba/datax/plugin/writer/tsdbwriter/TSDBWriter.java index e410b2ba..433527da 100755 --- a/tsdbwriter/src/main/java/com/alibaba/datax/plugin/writer/tsdbwriter/TSDBWriter.java +++ b/tsdbwriter/src/main/java/com/alibaba/datax/plugin/writer/tsdbwriter/TSDBWriter.java @@ -5,14 +5,23 @@ import com.alibaba.datax.common.exception.DataXException; import com.alibaba.datax.common.plugin.RecordReceiver; import com.alibaba.datax.common.spi.Writer; import com.alibaba.datax.common.util.Configuration; +import com.alibaba.datax.common.util.ConfigurationUtil; import com.alibaba.datax.common.util.RetryUtil; import com.alibaba.datax.plugin.writer.conn.TSDBConnection; +import com.aliyun.hitsdb.client.TSDB; +import com.aliyun.hitsdb.client.TSDBClientFactory; +import com.aliyun.hitsdb.client.TSDBConfig; +import com.aliyun.hitsdb.client.value.request.MultiFieldPoint; +import com.aliyun.hitsdb.client.value.request.Point; +import com.aliyun.hitsdb.client.value.response.batch.IgnoreErrorsResult; +import com.aliyun.hitsdb.client.value.response.batch.MultiFieldIgnoreErrorsResult; +import com.aliyun.hitsdb.client.value.response.batch.SummaryResult; import org.apache.commons.lang3.StringUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import java.util.ArrayList; -import java.util.List; +import java.io.IOException; +import java.util.*; import java.util.concurrent.Callable; /** @@ -26,6 +35,9 @@ import java.util.concurrent.Callable; @SuppressWarnings("unused") public class TSDBWriter extends Writer { + private static SourceDBType DB_TYPE; + private static TSDB tsdb = null; + public static class Job extends Writer.Job { private static final Logger LOG = LoggerFactory.getLogger(Job.class); @@ -34,33 +46,100 @@ public class TSDBWriter extends Writer { @Override public void init() { - this.originalConfig = super.getPluginJobConf(); + originalConfig = super.getPluginJobConf(); - String address = this.originalConfig.getString(Key.ENDPOINT); - if (StringUtils.isBlank(address)) { + // check source db type + String sourceDbType = originalConfig.getString(Key.SOURCE_DB_TYPE); + if (StringUtils.isBlank(sourceDbType)) { + sourceDbType = SourceDBType.TSDB.name(); + originalConfig.set(Key.SOURCE_DB_TYPE, sourceDbType); + LOG.info("The parameter [" + Key.SOURCE_DB_TYPE + "] will be default value: " + SourceDBType.TSDB); + } + try { + DB_TYPE = SourceDBType.valueOf(sourceDbType); + } catch (Exception e) { throw DataXException.asDataXException(TSDBWriterErrorCode.REQUIRED_VALUE, - "The parameter [" + Key.ENDPOINT + "] is not set."); + "The parameter [" + Key.SOURCE_DB_TYPE + + "] is invalid, which should be one of [" + Arrays.toString(SourceDBType.values()) + "]."); } - Integer batchSize = this.originalConfig.getInt(Key.BATCH_SIZE); - if (batchSize == null || batchSize < 1) { - originalConfig.set(Key.BATCH_SIZE, Constant.DEFAULT_BATCH_SIZE); - LOG.info("The parameter [" + Key.BATCH_SIZE + - "] will be default value: " + Constant.DEFAULT_BATCH_SIZE); - } + // for tsdb + if (DB_TYPE == SourceDBType.TSDB) { + String address = originalConfig.getString(Key.ENDPOINT); + if (StringUtils.isBlank(address)) { + throw DataXException.asDataXException(TSDBWriterErrorCode.REQUIRED_VALUE, + "The parameter [" + Key.ENDPOINT + "] is not set."); + } - Integer retrySize = this.originalConfig.getInt(Key.MAX_RETRY_TIME); - if (retrySize == null || retrySize < 0) { - originalConfig.set(Key.MAX_RETRY_TIME, Constant.DEFAULT_TRY_SIZE); - LOG.info("The parameter [" + Key.MAX_RETRY_TIME + - "] will be default value: " + Constant.DEFAULT_TRY_SIZE); - } + String username = originalConfig.getString(Key.USERNAME, null); + if (StringUtils.isBlank(username)) { + LOG.warn("The parameter [" + Key.USERNAME + "] is blank."); + } + String password = originalConfig.getString(Key.PASSWORD, null); + if (StringUtils.isBlank(password)) { + LOG.warn("The parameter [" + Key.PASSWORD + "] is blank."); + } + + Integer batchSize = originalConfig.getInt(Key.BATCH_SIZE); + if (batchSize == null || batchSize < 1) { + originalConfig.set(Key.BATCH_SIZE, Constant.DEFAULT_BATCH_SIZE); + LOG.info("The parameter [" + Key.BATCH_SIZE + + "] will be default value: " + Constant.DEFAULT_BATCH_SIZE); + } + + Integer retrySize = originalConfig.getInt(Key.MAX_RETRY_TIME); + if (retrySize == null || retrySize < 0) { + originalConfig.set(Key.MAX_RETRY_TIME, Constant.DEFAULT_TRY_SIZE); + LOG.info("The parameter [" + Key.MAX_RETRY_TIME + + "] will be default value: " + Constant.DEFAULT_TRY_SIZE); + } + + Boolean ignoreWriteError = originalConfig.getBool(Key.IGNORE_WRITE_ERROR); + if (ignoreWriteError == null) { + originalConfig.set(Key.IGNORE_WRITE_ERROR, Constant.DEFAULT_IGNORE_WRITE_ERROR); + LOG.info("The parameter [" + Key.IGNORE_WRITE_ERROR + + "] will be default value: " + Constant.DEFAULT_IGNORE_WRITE_ERROR); + } + } else if (DB_TYPE == SourceDBType.RDB) { + // for rdb + originalConfig.getNecessaryValue(Key.ENDPOINT, TSDBWriterErrorCode.REQUIRED_VALUE); + originalConfig.getNecessaryValue(Key.COLUMN_TYPE, TSDBWriterErrorCode.REQUIRED_VALUE); + originalConfig.getNecessaryValue(Key.COLUMN, TSDBWriterErrorCode.REQUIRED_VALUE); + String endpoint = originalConfig.getString(Key.ENDPOINT); + String[] split = endpoint.split(":"); + if (split.length != 3) { + throw DataXException.asDataXException(TSDBWriterErrorCode.REQUIRED_VALUE, + "The parameter [" + Key.ENDPOINT + "] is invalid, which should be [http://IP:Port]."); + } + String ip = split[1].substring(2); + int port = Integer.parseInt(split[2]); + + String username = originalConfig.getString(Key.USERNAME, null); + if (StringUtils.isBlank(username)) { + LOG.warn("The parameter [" + Key.USERNAME + "] is blank."); + } + + String password = originalConfig.getString(Key.PASSWORD, null); + if (StringUtils.isBlank(password)) { + LOG.warn("The parameter [" + Key.PASSWORD + "] is blank."); + } + + if (!StringUtils.isBlank(password) && !StringUtils.isBlank(username)) { + tsdb = TSDBClientFactory.connect(TSDBConfig.address(ip, port).basicAuth(username, password).config()); + } else { + tsdb = TSDBClientFactory.connect(TSDBConfig.address(ip, port).config()); + } + + String database = originalConfig.getString(Key.DATABASE, null); + if (StringUtils.isBlank(database)) { + LOG.info("The parameter [" + Key.DATABASE + "] is blank."); + } else { + LOG.warn("The parameter [" + Key.DATABASE + "] : {} is ignored."); + // tsdb.useDatabase(database); + } + + LOG.info("Tsdb config: {}", ConfigurationUtil.filterSensitive(originalConfig).toJSON()); - Boolean ignoreWriteError = this.originalConfig.getBool(Key.IGNORE_WRITE_ERROR); - if (ignoreWriteError == null) { - originalConfig.set(Key.IGNORE_WRITE_ERROR, Constant.DEFAULT_IGNORE_WRITE_ERROR); - LOG.info("The parameter [" + Key.IGNORE_WRITE_ERROR + - "] will be default value: " + Constant.DEFAULT_IGNORE_WRITE_ERROR); } } @@ -72,7 +151,7 @@ public class TSDBWriter extends Writer { public List split(int mandatoryNumber) { ArrayList configurations = new ArrayList(mandatoryNumber); for (int i = 0; i < mandatoryNumber; i++) { - configurations.add(this.originalConfig.clone()); + configurations.add(originalConfig.clone()); } return configurations; } @@ -83,6 +162,14 @@ public class TSDBWriter extends Writer { @Override public void destroy() { + if (DB_TYPE == SourceDBType.RDB) { + if (tsdb != null) { + try { + tsdb.close(); + } catch (IOException ignored) { + } + } + } } } @@ -91,18 +178,87 @@ public class TSDBWriter extends Writer { private static final Logger LOG = LoggerFactory.getLogger(Task.class); private TSDBConnection conn; + private boolean multiField; private int batchSize; private int retrySize; private boolean ignoreWriteError; + private String tableName; + private TSDBConverter tsdbConverter; @Override public void init() { Configuration writerSliceConfig = getPluginJobConf(); - String address = writerSliceConfig.getString(Key.ENDPOINT); - this.conn = new TSDBConnection(address); - this.batchSize = writerSliceConfig.getInt(Key.BATCH_SIZE); - this.retrySize = writerSliceConfig.getInt(Key.MAX_RETRY_TIME); + + // single field | multi fields + this.multiField = writerSliceConfig.getBool(Key.MULTI_FIELD, false); this.ignoreWriteError = writerSliceConfig.getBool(Key.IGNORE_WRITE_ERROR); + + // for tsdb + if (DB_TYPE == SourceDBType.TSDB) { + String address = writerSliceConfig.getString(Key.ENDPOINT); + String database = writerSliceConfig.getString(Key.DATABASE); + String username = writerSliceConfig.getString(Key.USERNAME); + String password = writerSliceConfig.getString(Key.PASSWORD); + this.conn = new TSDBConnection(address, database, username, password); + this.batchSize = writerSliceConfig.getInt(Key.BATCH_SIZE); + this.retrySize = writerSliceConfig.getInt(Key.MAX_RETRY_TIME); + + } else if (DB_TYPE == SourceDBType.RDB) { + // for rdb + int timeSize = 0; + int fieldSize = 0; + int tagSize = 0; + batchSize = writerSliceConfig.getInt(Key.BATCH_SIZE, 100); + List columnName = writerSliceConfig.getList(Key.COLUMN, String.class); + List columnType = writerSliceConfig.getList(Key.COLUMN_TYPE, String.class); + Set typeSet = new HashSet(columnType); + if (columnName.size() != columnType.size()) { + throw DataXException.asDataXException(TSDBWriterErrorCode.ILLEGAL_VALUE, + "The parameter [" + Key.COLUMN_TYPE + "] should has same length with [" + Key.COLUMN + "]."); + } + + for (String type : columnType) { + if (TSDBModel.TSDB_TAG.equals(type)) { + tagSize ++; + } else if (TSDBModel.TSDB_FIELD_DOUBLE.equals(type) || TSDBModel.TSDB_FIELD_STRING.equals(type) + || TSDBModel.TSDB_FIELD_BOOL.equals(type)) { + fieldSize++; + } else if (TSDBModel.TSDB_TIMESTAMP.equals(type)) { + timeSize++; + } + } + + if (fieldSize == 0) { + // compatible with previous usage of TSDB_METRIC_NUM and TSDB_METRIC_STRING + if (!typeSet.contains(TSDBModel.TSDB_METRIC_NUM) && !typeSet.contains(TSDBModel.TSDB_METRIC_STRING)) { + throw DataXException.asDataXException(TSDBWriterErrorCode.ILLEGAL_VALUE, + "The parameter [" + Key.COLUMN_TYPE + "] is invalid, must set at least one of " + + TSDBModel.TSDB_FIELD_DOUBLE + ", " + TSDBModel.TSDB_FIELD_STRING + " or " + TSDBModel.TSDB_FIELD_BOOL + "."); + } + } + + if (tagSize == 0) { + throw DataXException.asDataXException(TSDBWriterErrorCode.ILLEGAL_VALUE, + "The parameter [" + Key.COLUMN_TYPE + "] is invalid, must set " + TSDBModel.TSDB_TAG + ". "); + } + + if (timeSize != 1) { + throw DataXException.asDataXException(TSDBWriterErrorCode.ILLEGAL_VALUE, + "The parameter [" + Key.COLUMN_TYPE + "] is invalid, must set one and only one " + + TSDBModel.TSDB_TIMESTAMP + "."); + } + + if (multiField) { + // check source db type + tableName = writerSliceConfig.getString(Key.TABLE); + if (StringUtils.isBlank(tableName)) { + throw DataXException.asDataXException(TSDBWriterErrorCode.ILLEGAL_VALUE, + "The parameter [" + Key.TABLE + "] h must set when use multi field input."); + } + } + tsdbConverter = new TSDBConverter(columnName, columnType); + + } } @Override @@ -111,30 +267,52 @@ public class TSDBWriter extends Writer { @Override public void startWrite(RecordReceiver recordReceiver) { - try { - Record lastRecord = null; - Record record; - int count = 0; - StringBuilder dps = new StringBuilder(); - while ((record = recordReceiver.getFromReader()) != null) { - final int recordLength = record.getColumnNumber(); - for (int i = 0; i < recordLength; i++) { - dps.append(record.getColumn(i).asString()); - dps.append(","); - count++; - if (count == batchSize) { - count = 0; - batchPut(record, "[" + dps.substring(0, dps.length() - 1) + "]"); - dps = new StringBuilder(); + // for tsdb + if (DB_TYPE == SourceDBType.TSDB) { + try { + Record lastRecord = null; + Record record; + int count = 0; + StringBuilder dps = new StringBuilder(); + while ((record = recordReceiver.getFromReader()) != null) { + final int recordLength = record.getColumnNumber(); + for (int i = 0; i < recordLength; i++) { + dps.append(record.getColumn(i).asString()); + dps.append(","); + count++; + if (count == batchSize) { + count = 0; + batchPut(record, "[" + dps.substring(0, dps.length() - 1) + "]"); + dps = new StringBuilder(); + } } + lastRecord = record; } - lastRecord = record; + if (StringUtils.isNotBlank(dps.toString())) { + batchPut(lastRecord, "[" + dps.substring(0, dps.length() - 1) + "]"); + } + } catch (Exception e) { + throw DataXException.asDataXException(TSDBWriterErrorCode.RUNTIME_EXCEPTION, e); } - if (StringUtils.isNotBlank(dps.toString())) { - batchPut(lastRecord, "[" + dps.substring(0, dps.length() - 1) + "]"); + } else if (DB_TYPE == SourceDBType.RDB) { + // for rdb + List writerBuffer = new ArrayList(this.batchSize); + Record record; + long total = 0; + while ((record = recordReceiver.getFromReader()) != null) { + writerBuffer.add(record); + if (writerBuffer.size() >= this.batchSize) { + total += doBatchInsert(writerBuffer); + writerBuffer.clear(); + } } - } catch (Exception e) { - throw DataXException.asDataXException(TSDBWriterErrorCode.RUNTIME_EXCEPTION, e); + if (!writerBuffer.isEmpty()) { + total += doBatchInsert(writerBuffer); + writerBuffer.clear(); + } + getTaskPluginCollector().collectMessage("write size", total + ""); + LOG.info("Task finished, write size: {}", total); + } } @@ -143,12 +321,13 @@ public class TSDBWriter extends Writer { RetryUtil.executeWithRetry(new Callable() { @Override public Integer call() { - if (!conn.put(dps)) { - getTaskPluginCollector().collectDirtyRecord(record, "Put data points failed!"); - throw DataXException.asDataXException(TSDBWriterErrorCode.RUNTIME_EXCEPTION, - "Put data points failed!"); + final boolean success = multiField ? conn.mput(dps) : conn.put(dps); + if (success) { + return 0; } - return 0; + getTaskPluginCollector().collectDirtyRecord(record, "Put data points failed!"); + throw DataXException.asDataXException(TSDBWriterErrorCode.RUNTIME_EXCEPTION, + "Put data points failed!"); } }, retrySize, 60000L, true); } catch (Exception e) { @@ -160,6 +339,47 @@ public class TSDBWriter extends Writer { } } + private long doBatchInsert(final List writerBuffer) { + int size; + if (ignoreWriteError) { + if (multiField) { + List points = tsdbConverter.transRecord2MultiFieldPoint(writerBuffer, tableName); + size = points.size(); + MultiFieldIgnoreErrorsResult ignoreErrorsResult = tsdb.multiFieldPutSync(points, MultiFieldIgnoreErrorsResult.class); + if (ignoreErrorsResult == null) { + LOG.error("Unexpected inner error for insert"); + } else if (ignoreErrorsResult.getFailed() > 0) { + LOG.error("write TSDB failed num:" + ignoreErrorsResult.getFailed()); + } + } else { + List points = tsdbConverter.transRecord2Point(writerBuffer); + size = points.size(); + IgnoreErrorsResult ignoreErrorsResult = tsdb.putSync(points, IgnoreErrorsResult.class); + if (ignoreErrorsResult == null) { + LOG.error("Unexpected inner error for insert"); + } else if (ignoreErrorsResult.getFailed() > 0) { + LOG.error("write TSDB failed num:" + ignoreErrorsResult.getFailed()); + } + } + } else { + SummaryResult summaryResult; + if (multiField) { + List points = tsdbConverter.transRecord2MultiFieldPoint(writerBuffer, tableName); + size = points.size(); + summaryResult = tsdb.multiFieldPutSync(points, SummaryResult.class); + } else { + List points = tsdbConverter.transRecord2Point(writerBuffer); + size = points.size(); + summaryResult = tsdb.putSync(points, SummaryResult.class); + } + if (summaryResult.getFailed() > 0) { + LOG.error("write TSDB failed num:" + summaryResult.getFailed()); + throw DataXException.asDataXException(TSDBWriterErrorCode.RUNTIME_EXCEPTION, "Write TSDB failed", new Exception()); + } + } + return size; + } + @Override public void post() { } diff --git a/tsdbwriter/src/main/java/com/alibaba/datax/plugin/writer/tsdbwriter/TSDBWriterErrorCode.java b/tsdbwriter/src/main/java/com/alibaba/datax/plugin/writer/tsdbwriter/TSDBWriterErrorCode.java index f907fb67..ab4c3894 100755 --- a/tsdbwriter/src/main/java/com/alibaba/datax/plugin/writer/tsdbwriter/TSDBWriterErrorCode.java +++ b/tsdbwriter/src/main/java/com/alibaba/datax/plugin/writer/tsdbwriter/TSDBWriterErrorCode.java @@ -13,6 +13,7 @@ import com.alibaba.datax.common.spi.ErrorCode; public enum TSDBWriterErrorCode implements ErrorCode { REQUIRED_VALUE("TSDBWriter-00", "Missing the necessary value"), + ILLEGAL_VALUE("TSDBWriter-01", "Illegal value"), RUNTIME_EXCEPTION("TSDBWriter-01", "Runtime exception"), RETRY_WRITER_EXCEPTION("TSDBWriter-02", "After repeated attempts, the write still fails"); diff --git a/tsdbwriter/src/main/java/com/alibaba/datax/plugin/writer/util/HttpUtils.java b/tsdbwriter/src/main/java/com/alibaba/datax/plugin/writer/util/HttpUtils.java index b81512f7..97055adc 100644 --- a/tsdbwriter/src/main/java/com/alibaba/datax/plugin/writer/util/HttpUtils.java +++ b/tsdbwriter/src/main/java/com/alibaba/datax/plugin/writer/util/HttpUtils.java @@ -1,11 +1,14 @@ package com.alibaba.datax.plugin.writer.util; -import com.alibaba.fastjson.JSON; +import com.alibaba.fastjson2.JSON; +import org.apache.commons.lang3.StringUtils; import org.apache.http.client.fluent.Content; import org.apache.http.client.fluent.Request; import org.apache.http.entity.ContentType; import java.nio.charset.Charset; +import java.nio.charset.StandardCharsets; +import java.util.Base64; import java.util.Map; import java.util.concurrent.TimeUnit; @@ -19,43 +22,44 @@ import java.util.concurrent.TimeUnit; */ public final class HttpUtils { - public final static Charset UTF_8 = Charset.forName("UTF-8"); public final static int CONNECT_TIMEOUT_DEFAULT_IN_MILL = (int) TimeUnit.SECONDS.toMillis(60); public final static int SOCKET_TIMEOUT_DEFAULT_IN_MILL = (int) TimeUnit.SECONDS.toMillis(60); + private static final String CREDENTIALS_FORMAT = "%s:%s"; + private static final String BASIC_AUTHENTICATION_FORMAT = "Basic %s"; + private HttpUtils() { } - public static String get(String url) throws Exception { - Content content = Request.Get(url) + public static String get(String url, String username, String password) throws Exception { + final Request request = Request.Get(url) .connectTimeout(CONNECT_TIMEOUT_DEFAULT_IN_MILL) - .socketTimeout(SOCKET_TIMEOUT_DEFAULT_IN_MILL) + .socketTimeout(SOCKET_TIMEOUT_DEFAULT_IN_MILL); + addAuth(request, username, password); + Content content = request .execute() .returnContent(); if (content == null) { return null; } - return content.asString(UTF_8); + return content.asString(StandardCharsets.UTF_8); } - public static String post(String url, Map params) throws Exception { - return post(url, JSON.toJSONString(params), CONNECT_TIMEOUT_DEFAULT_IN_MILL, SOCKET_TIMEOUT_DEFAULT_IN_MILL); + public static String post(String url, String username, String password, Map params) throws Exception { + return post(url, username, password, JSON.toJSONString(params), CONNECT_TIMEOUT_DEFAULT_IN_MILL, SOCKET_TIMEOUT_DEFAULT_IN_MILL); } - public static String post(String url, String params) throws Exception { - return post(url, params, CONNECT_TIMEOUT_DEFAULT_IN_MILL, SOCKET_TIMEOUT_DEFAULT_IN_MILL); + public static String post(String url, String username, String password, String params) throws Exception { + return post(url, username, password, params, CONNECT_TIMEOUT_DEFAULT_IN_MILL, SOCKET_TIMEOUT_DEFAULT_IN_MILL); } - public static String post(String url, Map params, + public static String post(String url, String username, String password, String params, int connectTimeoutInMill, int socketTimeoutInMill) throws Exception { - return post(url, JSON.toJSONString(params), connectTimeoutInMill, socketTimeoutInMill); - } - - public static String post(String url, String params, - int connectTimeoutInMill, int socketTimeoutInMill) throws Exception { - Content content = Request.Post(url) + Request request = Request.Post(url) .connectTimeout(connectTimeoutInMill) - .socketTimeout(socketTimeoutInMill) + .socketTimeout(socketTimeoutInMill); + addAuth(request, username, password); + Content content = request .addHeader("Content-Type", "application/json") .bodyString(params, ContentType.APPLICATION_JSON) .execute() @@ -63,6 +67,22 @@ public final class HttpUtils { if (content == null) { return null; } - return content.asString(UTF_8); + return content.asString(StandardCharsets.UTF_8); + } + + private static void addAuth(Request request, String username, String password) { + String authorization = generateHttpAuthorization(username, password); + if (authorization != null) { + request.setHeader("Authorization", authorization); + } + } + + private static String generateHttpAuthorization(String username, String password) { + if (StringUtils.isBlank(username) || StringUtils.isBlank(password)) { + return null; + } + String credentials = String.format(CREDENTIALS_FORMAT, username, password); + credentials = Base64.getEncoder().encodeToString(credentials.getBytes()); + return String.format(BASIC_AUTHENTICATION_FORMAT, credentials); } } diff --git a/tsdbwriter/src/main/java/com/alibaba/datax/plugin/writer/util/TSDBUtils.java b/tsdbwriter/src/main/java/com/alibaba/datax/plugin/writer/util/TSDBUtils.java index ed01d877..83250b32 100644 --- a/tsdbwriter/src/main/java/com/alibaba/datax/plugin/writer/util/TSDBUtils.java +++ b/tsdbwriter/src/main/java/com/alibaba/datax/plugin/writer/util/TSDBUtils.java @@ -1,7 +1,8 @@ package com.alibaba.datax.plugin.writer.util; import com.alibaba.datax.plugin.writer.conn.DataPoint4TSDB; -import com.alibaba.fastjson.JSON; +import com.alibaba.fastjson2.JSON; +import org.apache.commons.lang3.StringUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -22,45 +23,56 @@ public final class TSDBUtils { private TSDBUtils() { } - public static String version(String address) { + public static String version(String address, String username, String password) { String url = String.format("%s/api/version", address); String rsp; try { - rsp = HttpUtils.get(url); + rsp = HttpUtils.get(url, username, password); } catch (Exception e) { throw new RuntimeException(e); } return rsp; } - public static String config(String address) { + public static String config(String address, String username, String password) { String url = String.format("%s/api/config", address); String rsp; try { - rsp = HttpUtils.get(url); + rsp = HttpUtils.get(url, username, password); } catch (Exception e) { throw new RuntimeException(e); } return rsp; } - public static boolean put(String address, List dps) { - return put(address, JSON.toJSON(dps)); + public static boolean put(String address, String database, String username, String password, List dps) { + return put(address, database, username, password, JSON.toJSON(dps)); } - public static boolean put(String address, DataPoint4TSDB dp) { - return put(address, JSON.toJSON(dp)); + public static boolean put(String address, String database, String username, String password, DataPoint4TSDB dp) { + return put(address, database, username, password, JSON.toJSON(dp)); } - private static boolean put(String address, Object o) { - return put(address, o.toString()); + private static boolean put(String address, String database, String username, String password, Object o) { + return put(address, database, username, password, o.toString()); } - public static boolean put(String address, String s) { - String url = String.format("%s/api/put", address); + public static boolean put(String address, String database, String username, String password, String s) { + return put(address, database, username, password, s, false); + } + + public static boolean mput(String address, String database, String username, String password, String s) { + return put(address, database, username, password, s, true); + } + + public static boolean put(String address, String database, String username, String password, String s, boolean multiField) { + String url = address + (multiField ? "/api/mput" : "/api/put"); + if (!StringUtils.isBlank(database)) { + url = url.concat("?db=" + database); + } String rsp; try { - rsp = HttpUtils.post(url, s); + rsp = HttpUtils.post(url, username, password, s); // If successful, the returned content should be null. assert rsp == null; } catch (Exception e) { diff --git a/tsdbwriter/src/main/resources/plugin.json b/tsdbwriter/src/main/resources/plugin.json index 78c8273f..26f927c2 100755 --- a/tsdbwriter/src/main/resources/plugin.json +++ b/tsdbwriter/src/main/resources/plugin.json @@ -6,5 +6,5 @@ "mechanism": "调用 TSDB 的 /api/put 接口,实现数据点的写入", "warn": "" }, - "developer": "Benedict Jin" + "developer": "alibaba" } diff --git a/tsdbwriter/src/test/java/com/alibaba/datax/plugin/writer/conn/TSDBConnectionTest.java b/tsdbwriter/src/test/java/com/alibaba/datax/plugin/writer/conn/TSDBConnectionTest.java index 455f4ce6..fada706e 100644 --- a/tsdbwriter/src/test/java/com/alibaba/datax/plugin/writer/conn/TSDBConnectionTest.java +++ b/tsdbwriter/src/test/java/com/alibaba/datax/plugin/writer/conn/TSDBConnectionTest.java @@ -19,12 +19,12 @@ public class TSDBConnectionTest { @Test public void testVersion() { - String version = new TSDBConnection(TSDB_ADDRESS).version(); + String version = new TSDBConnection(TSDB_ADDRESS,null,null,null).version(); Assert.assertNotNull(version); } @Test public void testIsSupported() { - Assert.assertTrue(new TSDBConnection(TSDB_ADDRESS).isSupported()); + Assert.assertTrue(new TSDBConnection(TSDB_ADDRESS,null,null,null).isSupported()); } } diff --git a/tsdbwriter/src/test/java/com/alibaba/datax/plugin/writer/util/HttpUtilsTest.java b/tsdbwriter/src/test/java/com/alibaba/datax/plugin/writer/util/HttpUtilsTest.java index 69f26b80..1f8fb870 100644 --- a/tsdbwriter/src/test/java/com/alibaba/datax/plugin/writer/util/HttpUtilsTest.java +++ b/tsdbwriter/src/test/java/com/alibaba/datax/plugin/writer/util/HttpUtilsTest.java @@ -24,7 +24,7 @@ public class HttpUtilsTest { Map params = new HashMap(); params.put("foo", "bar"); - String rsp = HttpUtils.post(url, params); + String rsp = HttpUtils.post(url, null,null,params); System.out.println(rsp); Assert.assertNotNull(rsp); } @@ -32,7 +32,7 @@ public class HttpUtilsTest { @Test public void testGet() throws Exception { String url = String.format("%s/api/version", Const.OPENTSDB_ADDRESS); - String rsp = HttpUtils.get(url); + String rsp = HttpUtils.get(url,null,null); System.out.println(rsp); Assert.assertNotNull(rsp); } diff --git a/tsdbwriter/src/test/java/com/alibaba/datax/plugin/writer/util/TSDBTest.java b/tsdbwriter/src/test/java/com/alibaba/datax/plugin/writer/util/TSDBTest.java index 7d22bb72..8debf406 100644 --- a/tsdbwriter/src/test/java/com/alibaba/datax/plugin/writer/util/TSDBTest.java +++ b/tsdbwriter/src/test/java/com/alibaba/datax/plugin/writer/util/TSDBTest.java @@ -17,11 +17,11 @@ public class TSDBTest { @Test public void testVersion() { - String version = TSDBUtils.version(Const.TSDB_ADDRESS); + String version = TSDBUtils.version(Const.TSDB_ADDRESS,null,null); Assert.assertNotNull(version); System.out.println(version); - version = TSDBUtils.version(Const.OPENTSDB_ADDRESS); + version = TSDBUtils.version(Const.OPENTSDB_ADDRESS,null,null); Assert.assertNotNull(version); System.out.println(version); } diff --git a/txtfilereader/src/main/java/com/alibaba/datax/plugin/reader/txtfilereader/TxtFileReader.java b/txtfilereader/src/main/java/com/alibaba/datax/plugin/reader/txtfilereader/TxtFileReader.java index 914305c6..a74ef8fc 100755 --- a/txtfilereader/src/main/java/com/alibaba/datax/plugin/reader/txtfilereader/TxtFileReader.java +++ b/txtfilereader/src/main/java/com/alibaba/datax/plugin/reader/txtfilereader/TxtFileReader.java @@ -182,6 +182,7 @@ public class TxtFileReader extends Reader { delimiterInStr)); } + UnstructuredStorageReaderUtil.validateCsvReaderConfig(this.originConfig); } @Override diff --git a/userGuid.md b/userGuid.md index 153c8111..876bae99 100644 --- a/userGuid.md +++ b/userGuid.md @@ -10,14 +10,14 @@ DataX本身作为数据同步框架,将不同数据源的同步抽象为从源 - Linux - [JDK(1.8以上,推荐1.8) ](http://www.oracle.com/technetwork/cn/java/javase/downloads/index.html) -- [Python(推荐Python2.6.X) ](https://www.python.org/downloads/) +- [Python(2或3都可以) ](https://www.python.org/downloads/) - [Apache Maven 3.x](https://maven.apache.org/download.cgi) (Compile DataX) # Quick Start * 工具部署 - * 方法一、直接下载DataX工具包:[DataX下载地址](http://datax-opensource.oss-cn-hangzhou.aliyuncs.com/datax.tar.gz) + * 方法一、直接下载DataX工具包:[DataX下载地址](https://datax-opensource.oss-cn-hangzhou.aliyuncs.com/202210/datax.tar.gz) 下载后解压至本地某个目录,进入bin目录,即可运行同步作业: