mirror of
https://github.com/alibaba/DataX.git
synced 2025-05-02 04:59:51 +08:00
add obhbase reader and writer plugin by cjyyz64
This commit is contained in:
parent
f1c20abc7d
commit
0ec767730e
@ -2,11 +2,10 @@
|
|||||||
"job": {
|
"job": {
|
||||||
"setting": {
|
"setting": {
|
||||||
"speed": {
|
"speed": {
|
||||||
"channel":1
|
"channel": 2
|
||||||
},
|
},
|
||||||
"errorLimit": {
|
"errorLimit": {
|
||||||
"record": 0,
|
"record": 0
|
||||||
"percentage": 0.02
|
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"content": [
|
"content": [
|
||||||
@ -14,17 +13,17 @@
|
|||||||
"reader": {
|
"reader": {
|
||||||
"name": "streamreader",
|
"name": "streamreader",
|
||||||
"parameter": {
|
"parameter": {
|
||||||
"column" : [
|
"column": [
|
||||||
{
|
{
|
||||||
"value": "DataX",
|
"value": "DataX",
|
||||||
"type": "string"
|
"type": "string"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"value": 19890604,
|
"value": 1724154616370,
|
||||||
"type": "long"
|
"type": "long"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"value": "1989-06-04 00:00:00",
|
"value": "2024-01-01 00:00:00",
|
||||||
"type": "date"
|
"type": "date"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -32,11 +31,11 @@
|
|||||||
"type": "bool"
|
"type": "bool"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"value": "test",
|
"value": "TestRawData",
|
||||||
"type": "bytes"
|
"type": "bytes"
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"sliceRecordCount": 100000
|
"sliceRecordCount": 100
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"writer": {
|
"writer": {
|
||||||
|
@ -36,8 +36,6 @@ DorisWriter 通过Doris原生支持Stream load方式导入数据, DorisWriter
|
|||||||
"name": "doriswriter",
|
"name": "doriswriter",
|
||||||
"parameter": {
|
"parameter": {
|
||||||
"loadUrl": ["172.16.0.13:8030"],
|
"loadUrl": ["172.16.0.13:8030"],
|
||||||
"loadProps": {
|
|
||||||
},
|
|
||||||
"column": ["emp_no", "birth_date", "first_name","last_name","gender","hire_date"],
|
"column": ["emp_no", "birth_date", "first_name","last_name","gender","hire_date"],
|
||||||
"username": "root",
|
"username": "root",
|
||||||
"password": "xxxxxx",
|
"password": "xxxxxx",
|
||||||
|
@ -168,78 +168,3 @@
|
|||||||
* 描述: 不使用datax的mappings,使用es自己的自动mappings
|
* 描述: 不使用datax的mappings,使用es自己的自动mappings
|
||||||
* 必选: 否
|
* 必选: 否
|
||||||
* 默认值: false
|
* 默认值: false
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
## 4 性能报告
|
|
||||||
|
|
||||||
### 4.1 环境准备
|
|
||||||
|
|
||||||
* 总数据量 1kw条数据, 每条0.1kb
|
|
||||||
* 1个shard, 0个replica
|
|
||||||
* 不加id,这样默认是append_only模式,不检查版本,插入速度会有20%左右的提升
|
|
||||||
|
|
||||||
#### 4.1.1 输入数据类型(streamreader)
|
|
||||||
|
|
||||||
```
|
|
||||||
{"value": "1.1.1.1", "type": "string"},
|
|
||||||
{"value": 19890604.0, "type": "double"},
|
|
||||||
{"value": 19890604, "type": "long"},
|
|
||||||
{"value": 19890604, "type": "long"},
|
|
||||||
{"value": "hello world", "type": "string"},
|
|
||||||
{"value": "hello world", "type": "string"},
|
|
||||||
{"value": "41.12,-71.34", "type": "string"},
|
|
||||||
{"value": "2017-05-25", "type": "string"},
|
|
||||||
```
|
|
||||||
|
|
||||||
#### 4.1.2 输出数据类型(eswriter)
|
|
||||||
|
|
||||||
```
|
|
||||||
{ "name": "col_ip","type": "ip" },
|
|
||||||
{ "name": "col_double","type": "double" },
|
|
||||||
{ "name": "col_long","type": "long" },
|
|
||||||
{ "name": "col_integer","type": "integer" },
|
|
||||||
{ "name": "col_keyword", "type": "keyword" },
|
|
||||||
{ "name": "col_text", "type": "text"},
|
|
||||||
{ "name": "col_geo_point", "type": "geo_point" },
|
|
||||||
{ "name": "col_date", "type": "date"}
|
|
||||||
```
|
|
||||||
|
|
||||||
#### 4.1.2 机器参数
|
|
||||||
|
|
||||||
1. cpu: 32 Intel(R) Xeon(R) CPU E5-2650 v2 @ 2.60GHz
|
|
||||||
2. mem: 128G
|
|
||||||
3. net: 千兆双网卡
|
|
||||||
|
|
||||||
#### 4.1.3 DataX jvm 参数
|
|
||||||
|
|
||||||
-Xms1024m -Xmx1024m -XX:+HeapDumpOnOutOfMemoryError
|
|
||||||
|
|
||||||
### 4.2 测试报告
|
|
||||||
|
|
||||||
| 通道数| 批量提交行数| DataX速度(Rec/s)|DataX流量(MB/s)|
|
|
||||||
|--------|--------| --------|--------|
|
|
||||||
| 4| 256| 11013| 0.828|
|
|
||||||
| 4| 1024| 19417| 1.43|
|
|
||||||
| 4| 4096| 23923| 1.76|
|
|
||||||
| 4| 8172| 24449| 1.80|
|
|
||||||
| 8| 256| 21459| 1.58|
|
|
||||||
| 8| 1024| 37037| 2.72|
|
|
||||||
| 8| 4096| 45454| 3.34|
|
|
||||||
| 8| 8172| 45871| 3.37|
|
|
||||||
| 16| 1024| 67567| 4.96|
|
|
||||||
| 16| 4096| 78125| 5.74|
|
|
||||||
| 16| 8172| 77519| 5.69|
|
|
||||||
| 32| 1024| 94339| 6.93|
|
|
||||||
| 32| 4096| 96153| 7.06|
|
|
||||||
| 64| 1024| 91743| 6.74|
|
|
||||||
|
|
||||||
### 4.3 测试总结
|
|
||||||
|
|
||||||
* 最好的结果是32通道,每次传4096,如果单条数据很大, 请适当减少批量数,防止oom
|
|
||||||
* 当然这个很容易水平扩展,而且es也是分布式的,多设置几个shard也可以水平扩展
|
|
||||||
|
|
||||||
## 5 约束限制
|
|
||||||
|
|
||||||
* 如果导入id,这样数据导入失败也会重试,重新导入也仅仅是覆盖,保证数据一致性
|
|
||||||
* 如果不导入id,就是append_only模式,elasticsearch自动生成id,速度会提升20%左右,但数据无法修复,适合日志型数据(对数据精度要求不高的)
|
|
178
obhbasereader/doc/obhbasereader.md
Normal file
178
obhbasereader/doc/obhbasereader.md
Normal file
@ -0,0 +1,178 @@
|
|||||||
|
OceanBase的table api为应用提供了ObHBase的访问接口,因此,OceanBase的table api的reader与HBase Reader的结构和配置方法类似。
|
||||||
|
obhbasereader插件支持sql和hbase api两种读取方式,两种方式存在如下区别:
|
||||||
|
|
||||||
|
1. sql方式可以按照分区或者K值进行数据切片,而hbase api方式的数据切片需要用户手动设置。
|
||||||
|
2. sql方式会将从obhbase读取的kqtv形式的数据转换为单一横行,而hbase api则不做行列转换,直接以kqtv形式将数据传递给下游。
|
||||||
|
3. sql方式需要配置column属性,hbase api则不需要配置,数据均为固定的kqtv四列。
|
||||||
|
4. sql方式仅支持获取获得最新或者最旧版本的数据,而hbase api支持获得多版本数据。
|
||||||
|
#### 脚本配置
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"job": {
|
||||||
|
"setting": {
|
||||||
|
"speed": {
|
||||||
|
"channel": 3,
|
||||||
|
"byte": 104857600
|
||||||
|
},
|
||||||
|
"errorLimit": {
|
||||||
|
"record": 10
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"content": [
|
||||||
|
{
|
||||||
|
"reader": {
|
||||||
|
"name": "obhbasereader",
|
||||||
|
"parameter": {
|
||||||
|
"username": "username",
|
||||||
|
"password": "password",
|
||||||
|
"encoding": "utf8",
|
||||||
|
"column": [
|
||||||
|
{
|
||||||
|
"name": "f1:column1_1",
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "f1:column2_2",
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "f1:column1_1",
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "f1:column2_2",
|
||||||
|
"type": "string"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"range": [
|
||||||
|
{
|
||||||
|
"startRowkey": "aaa",
|
||||||
|
"endRowkey": "ccc",
|
||||||
|
"isBinaryRowkey": false
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"startRowkey": "eee",
|
||||||
|
"endRowkey": "zzz",
|
||||||
|
"isBinaryRowkey": false
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"mode": "normal",
|
||||||
|
"readByPartition": "true",
|
||||||
|
"scanCacheSize": "",
|
||||||
|
"readerHint": "",
|
||||||
|
"readBatchSize": "1000",
|
||||||
|
"connection": [
|
||||||
|
{
|
||||||
|
"table": [
|
||||||
|
"htable1",
|
||||||
|
"htable2"
|
||||||
|
],
|
||||||
|
"jdbcUrl": [
|
||||||
|
"||_dsc_ob10_dsc_||集群:租户||_dsc_ob10_dsc_||jdbc:mysql://ip:port/dbName1"
|
||||||
|
],
|
||||||
|
"username": "username",
|
||||||
|
"password": "password"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"table": [
|
||||||
|
"htable1",
|
||||||
|
"htable2"
|
||||||
|
],
|
||||||
|
"jdbcUrl": [
|
||||||
|
"jdbc:mysql://ip:port/database"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"writer": {
|
||||||
|
"name": "txtfilewriter",
|
||||||
|
"parameter": {
|
||||||
|
"path": "/Users/xujing/datax/txtfile",
|
||||||
|
"charset": "UTF-8",
|
||||||
|
"fieldDelimiter": ",",
|
||||||
|
"fileName": "hbase",
|
||||||
|
"nullFormat": "null",
|
||||||
|
"writeMode": "truncate"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
##### 参数解释
|
||||||
|
|
||||||
|
- **connection**
|
||||||
|
- 描述:配置分库分表的jdbcUrl和分表名。如果一个分库中有多个分表可以用逗号隔开,也可以写成表名[起始序号-截止序号]
|
||||||
|
- 必须:是
|
||||||
|
- 默认值:无
|
||||||
|
- **jdbcUrl**
|
||||||
|
- 描述:连接ob使用的jdbc url,支持如下两种格式:
|
||||||
|
- jdbc:mysql://obproxyIp:obproxyPort/db
|
||||||
|
- 此格式下username需要写成三段式格式
|
||||||
|
- ||_dsc_ob10_dsc_||集群名:租户名||_dsc_ob10_dsc_||jdbc:mysql://obproxyIp:obproxyPort/db
|
||||||
|
- 此格式下username仅填写用户名本身,无需三段式写法
|
||||||
|
|
||||||
|
- 必选:是
|
||||||
|
- 默认值:无
|
||||||
|
- **table**
|
||||||
|
- 描述:所选取的需要同步的表。使用JSON的数组描述,因此支持多张表同时抽取。当配置为多张表时,用户自己需保证多张表是同一schema结构,obhbasereader不予检查表是否同一逻辑表。注意,table必须包含在connection配置单元中。
|
||||||
|
- 必选:是
|
||||||
|
- 默认值:无
|
||||||
|
- **readByPartition**
|
||||||
|
- 描述:使用sql方式读取时,配置**仅**按照分区进行切片。
|
||||||
|
- 必须:否
|
||||||
|
- 默认值:false
|
||||||
|
- **partitionName**
|
||||||
|
- 描述:使用sql方式读取时,标识仅读取指定分区名的数据,用户需要保证配置的分区名在表结构中真实存在(要求严格大小写)。
|
||||||
|
- 必须:否
|
||||||
|
- 默认值:无
|
||||||
|
- **readBatchSize**
|
||||||
|
- 描述:使用sql方式读取时,分页大小。
|
||||||
|
- 必须:否
|
||||||
|
- 默认值:10w
|
||||||
|
- **fetchSize**
|
||||||
|
- 描述:使用sql方式读取时,控制每次读取数据时从结果集中获取的数据行数。
|
||||||
|
- 必须:否
|
||||||
|
- 默认值:-2147483648
|
||||||
|
- **scanCacheSize**
|
||||||
|
- 描述:使用hbase api读取时,每次rpc从服务器端读取的行数
|
||||||
|
- 必须:否
|
||||||
|
- 默认值:256
|
||||||
|
- **readerHint**
|
||||||
|
- 描述:obhbasereader使用sql方式读取时使用的hint
|
||||||
|
- 必须:否
|
||||||
|
- 默认值:/*+READ_CONSISTENCY(weak),QUERY_TIMEOUT(86400000000)*/
|
||||||
|
- **column**
|
||||||
|
- 描述:使用sql方式读取数据时,所配置的表中需要同步的列名集合,使用JSON的数组描述字段信息。
|
||||||
|
- 支持列裁剪,即列可以挑选部分列进行导出。
|
||||||
|
```
|
||||||
|
支持列换序,即列可以不按照表schema信息进行导出,同时支持通配符*,在使用之前需仔细核对列信息。
|
||||||
|
```
|
||||||
|
|
||||||
|
- 必选:sql方式读取时必选
|
||||||
|
- 默认值:无
|
||||||
|
- **range**
|
||||||
|
- 描述**:**指定hbasereader读取的rowkey范围
|
||||||
|
- 必须:否
|
||||||
|
- 默认值:无
|
||||||
|
- **username**
|
||||||
|
- 描述:访问OceanBase的用户名
|
||||||
|
- 必选:是
|
||||||
|
- 默认值:无
|
||||||
|
- **mode**
|
||||||
|
- 描述:读取obhbase的模式,normal 模式,即仅读取一个版本的数据。
|
||||||
|
- 必选:是
|
||||||
|
- 默认值:normal
|
||||||
|
- **version**
|
||||||
|
- 描述:读取obhbase的版本,当前支持oldest、latest模式,分别表示读取最旧和最新的数据。
|
||||||
|
- 必须:是
|
||||||
|
- 默认值:oldest
|
||||||
|
|
||||||
|
一些注意点:
|
||||||
|
注:如果配置了**partitionName**,则无需再配置readByPartition,即便配置了也会忽略readByPartition选项,而是仅会读取指定分区的数据。
|
||||||
|
注:如果配置了**readByPartition**,任务将仅按照分区切分任务,而不会再按照K值进行切分。如果是非分区表,则整张表会被当作一个任务而不会再切分。
|
||||||
|
|
||||||
|
|
||||||
|
|
151
obhbasereader/pom.xml
Executable file
151
obhbasereader/pom.xml
Executable file
@ -0,0 +1,151 @@
|
|||||||
|
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||||
|
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
|
||||||
|
<modelVersion>4.0.0</modelVersion>
|
||||||
|
<parent>
|
||||||
|
<groupId>com.alibaba.datax</groupId>
|
||||||
|
<artifactId>datax-all</artifactId>
|
||||||
|
<version>0.0.1-SNAPSHOT</version>
|
||||||
|
</parent>
|
||||||
|
|
||||||
|
<artifactId>obhbasereader</artifactId>
|
||||||
|
<groupId>com.alibaba.datax</groupId>
|
||||||
|
<name>obhbasereader</name>
|
||||||
|
<version>0.0.1-SNAPSHOT</version>
|
||||||
|
|
||||||
|
<dependencies>
|
||||||
|
<dependency>
|
||||||
|
<groupId>com.alibaba.datax</groupId>
|
||||||
|
<artifactId>datax-core</artifactId>
|
||||||
|
<version>${datax-project-version}</version>
|
||||||
|
<scope>provided</scope>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>com.alibaba.datax</groupId>
|
||||||
|
<artifactId>oceanbasev10reader</artifactId>
|
||||||
|
<version>0.0.1-SNAPSHOT</version>
|
||||||
|
<exclusions>
|
||||||
|
<exclusion>
|
||||||
|
<artifactId>guava</artifactId>
|
||||||
|
<groupId>com.google.guava</groupId>
|
||||||
|
</exclusion>
|
||||||
|
</exclusions>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.apache.zookeeper</groupId>
|
||||||
|
<artifactId>zookeeper</artifactId>
|
||||||
|
<version>3.3.2</version>
|
||||||
|
<exclusions>
|
||||||
|
<exclusion>
|
||||||
|
<artifactId>log4j</artifactId>
|
||||||
|
<groupId>log4j</groupId>
|
||||||
|
</exclusion>
|
||||||
|
</exclusions>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>commons-collections</groupId>
|
||||||
|
<artifactId>commons-collections</artifactId>
|
||||||
|
<version>3.2.1</version>
|
||||||
|
</dependency>
|
||||||
|
|
||||||
|
<!-- <dependency>-->
|
||||||
|
<!-- <groupId>com.oceanbase</groupId>-->
|
||||||
|
<!-- <artifactId>shade-obkv-table-client</artifactId>-->
|
||||||
|
<!-- <version>1.2.6-RELEASE</version>-->
|
||||||
|
<!-- </dependency>-->
|
||||||
|
|
||||||
|
<dependency>
|
||||||
|
<groupId>com.oceanbase</groupId>
|
||||||
|
<artifactId>obkv-hbase-client</artifactId>
|
||||||
|
<version>0.1.4.2</version>
|
||||||
|
<exclusions>
|
||||||
|
<exclusion>
|
||||||
|
<artifactId>guava</artifactId>
|
||||||
|
<groupId>com.google.guava</groupId>
|
||||||
|
</exclusion>
|
||||||
|
</exclusions>
|
||||||
|
</dependency>
|
||||||
|
|
||||||
|
<dependency>
|
||||||
|
<groupId>com.google.guava</groupId>
|
||||||
|
<artifactId>guava</artifactId>
|
||||||
|
<version>${guava-version}</version>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>com.alibaba.toolkit.common</groupId>
|
||||||
|
<artifactId>toolkit-common-logging</artifactId>
|
||||||
|
<version>1.14</version>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.json</groupId>
|
||||||
|
<artifactId>json</artifactId>
|
||||||
|
<version>20160810</version>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>junit</groupId>
|
||||||
|
<artifactId>junit</artifactId>
|
||||||
|
<version>4.11</version>
|
||||||
|
<scope>test</scope>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.powermock</groupId>
|
||||||
|
<artifactId>powermock-module-junit4</artifactId>
|
||||||
|
<version>1.4.10</version>
|
||||||
|
<scope>test</scope>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.powermock</groupId>
|
||||||
|
<artifactId>powermock-api-mockito</artifactId>
|
||||||
|
<version>1.4.10</version>
|
||||||
|
<scope>test</scope>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.mockito</groupId>
|
||||||
|
<artifactId>mockito-core</artifactId>
|
||||||
|
<version>1.8.5</version>
|
||||||
|
<scope>test</scope>
|
||||||
|
</dependency>
|
||||||
|
|
||||||
|
</dependencies>
|
||||||
|
|
||||||
|
<build>
|
||||||
|
<resources>
|
||||||
|
<resource>
|
||||||
|
<directory>src/main/java</directory>
|
||||||
|
<includes>
|
||||||
|
<include>**/*.properties</include>
|
||||||
|
</includes>
|
||||||
|
</resource>
|
||||||
|
</resources>
|
||||||
|
<plugins>
|
||||||
|
<!-- compiler plugin -->
|
||||||
|
<plugin>
|
||||||
|
<artifactId>maven-compiler-plugin</artifactId>
|
||||||
|
<configuration>
|
||||||
|
<source>${jdk-version}</source>
|
||||||
|
<target>${jdk-version}</target>
|
||||||
|
<encoding>${project-sourceEncoding}</encoding>
|
||||||
|
</configuration>
|
||||||
|
</plugin>
|
||||||
|
<!-- assembly plugin -->
|
||||||
|
<plugin>
|
||||||
|
<artifactId>maven-assembly-plugin</artifactId>
|
||||||
|
<configuration>
|
||||||
|
<descriptors>
|
||||||
|
<descriptor>src/main/assembly/package.xml</descriptor>
|
||||||
|
</descriptors>
|
||||||
|
<finalName>datax</finalName>
|
||||||
|
</configuration>
|
||||||
|
<executions>
|
||||||
|
<execution>
|
||||||
|
<id>dwzip</id>
|
||||||
|
<phase>package</phase>
|
||||||
|
<goals>
|
||||||
|
<goal>single</goal>
|
||||||
|
</goals>
|
||||||
|
</execution>
|
||||||
|
</executions>
|
||||||
|
</plugin>
|
||||||
|
</plugins>
|
||||||
|
</build>
|
||||||
|
|
||||||
|
</project>
|
35
obhbasereader/src/main/assembly/package.xml
Executable file
35
obhbasereader/src/main/assembly/package.xml
Executable file
@ -0,0 +1,35 @@
|
|||||||
|
<assembly
|
||||||
|
xmlns="http://maven.apache.org/plugins/maven-assembly-plugin/assembly/1.1.0"
|
||||||
|
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||||
|
xsi:schemaLocation="http://maven.apache.org/plugins/maven-assembly-plugin/assembly/1.1.0 http://maven.apache.org/xsd/assembly-1.1.0.xsd">
|
||||||
|
<id></id>
|
||||||
|
<formats>
|
||||||
|
<format>dir</format>
|
||||||
|
</formats>
|
||||||
|
<includeBaseDirectory>false</includeBaseDirectory>
|
||||||
|
<fileSets>
|
||||||
|
<fileSet>
|
||||||
|
<directory>src/main/resources</directory>
|
||||||
|
<includes>
|
||||||
|
<include>plugin.json</include>
|
||||||
|
<include>plugin_job_template.json</include>
|
||||||
|
</includes>
|
||||||
|
<outputDirectory>plugin/reader/obhbasereader</outputDirectory>
|
||||||
|
</fileSet>
|
||||||
|
<fileSet>
|
||||||
|
<directory>target/</directory>
|
||||||
|
<includes>
|
||||||
|
<include>obhbasereader-0.0.1-SNAPSHOT.jar</include>
|
||||||
|
</includes>
|
||||||
|
<outputDirectory>plugin/reader/obhbasereader</outputDirectory>
|
||||||
|
</fileSet>
|
||||||
|
</fileSets>
|
||||||
|
|
||||||
|
<dependencySets>
|
||||||
|
<dependencySet>
|
||||||
|
<useProjectArtifact>false</useProjectArtifact>
|
||||||
|
<outputDirectory>plugin/reader/obhbasereader/libs</outputDirectory>
|
||||||
|
<scope>runtime</scope>
|
||||||
|
</dependencySet>
|
||||||
|
</dependencySets>
|
||||||
|
</assembly>
|
@ -0,0 +1,34 @@
|
|||||||
|
package com.alibaba.datax.plugin.reader.obhbasereader;
|
||||||
|
|
||||||
|
import ch.qos.logback.classic.Level;
|
||||||
|
|
||||||
|
public final class Constant {
|
||||||
|
public static final String ROWKEY_FLAG = "rowkey";
|
||||||
|
public static final int DEFAULT_SCAN_CACHE = 256;
|
||||||
|
public static final int DEFAULT_FETCH_SIZE = Integer.MIN_VALUE;
|
||||||
|
public static final int DEFAULT_READ_BATCH_SIZE = 100000;
|
||||||
|
// timeout:24 * 3600 = 86400s
|
||||||
|
public static final String OB_READ_HINT = "/*+READ_CONSISTENCY(weak),QUERY_TIMEOUT(86400000000)*/";
|
||||||
|
public static final String DEFAULT_DATE_FORMAT = "yyyy-MM-dd HH:mm:ss";
|
||||||
|
public static final String DEFAULT_ENCODING = "UTF-8";
|
||||||
|
public static final String DEFAULT_TIMEZONE = "UTC";
|
||||||
|
public static final boolean DEFAULT_USE_SQLREADER = true;
|
||||||
|
public static final boolean DEFAULT_USE_ODPMODE = true;
|
||||||
|
public static final String OB_TABLE_CLIENT_PROPERTY = "logging.path.com.alipay.oceanbase-table-client";
|
||||||
|
public static final String OB_TABLE_HBASE_PROPERTY = "logging.path.com.alipay.oceanbase-table-hbase";
|
||||||
|
public static final String OB_TABLE_CLIENT_LOG_LEVEL = "logging.level.oceanbase-table-client";
|
||||||
|
public static final String OB_TABLE_HBASE_LOG_LEVEL = "logging.level.oceanbase-table-hbase";
|
||||||
|
public static final String OB_COM_ALIPAY_TABLE_CLIENT_LOG_LEVEL = "logging.level.com.alipay.oceanbase-table-client";
|
||||||
|
public static final String OB_COM_ALIPAY_TABLE_HBASE_LOG_LEVEL = "logging.level.com.alipay.oceanbase-table-hbase";
|
||||||
|
public static final String OB_HBASE_LOG_PATH = System.getProperty("datax.home") + "/log/";
|
||||||
|
public static final String DEFAULT_OB_TABLE_CLIENT_LOG_LEVEL = Level.OFF.toString();
|
||||||
|
public static final String DEFAULT_OB_TABLE_HBASE_LOG_LEVEL = Level.OFF.toString();
|
||||||
|
public static final String OBMYSQL_KEYWORDS =
|
||||||
|
"CUME_DIST,DENSE_RANK,EMPTY,FIRST_VALUE,GROUPING,GROUPS,INTERSECT,JSON_TABLE,LAG,LAST_VALUE,LATERAL,LEAD,NTH_VALUE,NTILE,OF,OVER,PERCENT_RANK,RANK,RECURSIVE,ROW_NUMBER,SYSTEM,WINDOW,ACCESSIBLE,ACCOUNT,ACTION,ADD,AFTER,AGAINST,AGGREGATE,ALGORITHM,ALL,ALTER,ALWAYS,ANALYSE,AND,ANY,AS,ASC,ASCII,ASENSITIVE,AT,AUTO_INCREMENT,AUTOEXTEND_SIZE,AVG,AVG_ROW_LENGTH,BACKUP,BEFORE,BEGIN,BETWEEN,BIGINT,BINARY,BINLOG,BIT,BLOB,BLOCK,BOOL,BOOLEAN,BOTH,BTREE,BY,BYTE,CACHE,CALL,CASCADE,CASCADED,CASE,CATALOG_NAME,CHAIN,CHANGE,CHANGED,CHANNEL,CHAR,CHARACTER,CHARSET,CHECK,CHECKSUM,CIPHER,CLASS_ORIGIN,CLIENT,CLOSE,COALESCE,CODE,COLLATE,COLLATION,COLUMN,COLUMN_FORMAT,COLUMN_NAME,COLUMNS,COMMENT,COMMIT,COMMITTED,COMPACT,COMPLETION,COMPRESSED,COMPRESSION,CONCURRENT,CONDITION,CONNECTION,CONSISTENT,CONSTRAINT,CONSTRAINT_CATALOG,CONSTRAINT_NAME,CONSTRAINT_SCHEMA,CONTAINS,CONTEXT,CONTINUE,CONVERT,CPU,CREATE,CROSS,CUBE,CURRENT,CURRENT_DATE,CURRENT_TIME,CURRENT_TIMESTAMP,CURRENT_USER,CURSOR,"
|
||||||
|
+ "CURSOR_NAME,DATA,DATABASE,DATABASES,DATAFILE,DATE,DATETIME,DAY,DAY_HOUR,DAY_MICROSECOND,DAY_MINUTE,DAY_SECOND,DEALLOCATE,DEC,DECIMAL,DECLARE,DEFAULT,DEFAULT_AUTH,DEFINER,DELAY_KEY_WRITE,DELAYED,DELETE,DES_KEY_FILE,DESC,DESCRIBE,DETERMINISTIC,DIAGNOSTICS,DIRECTORY,DISABLE,DISCARD,DISK,DISTINCT,DISTINCTROW,DIV,DO,DOUBLE,DROP,DUAL,DUMPFILE,DUPLICATE,DYNAMIC,EACH,ELSE,ELSEIF,ENABLE,ENCLOSED,ENCRYPTION,END,ENDS,ENGINE,ENGINES,ENUM,ERROR,ERRORS,ESCAPE,ESCAPED,EVENT,EVENTS,EVERY,EXCHANGE,EXECUTE,EXISTS,EXIT,EXPANSION,EXPIRE,EXPLAIN,EXPORT,EXTENDED,EXTENT_SIZE,FAST,FAULTS,FETCH,FIELDS,FILE,FILE_BLOCK_SIZE,FILTER,FIRST,FIXED,FLOAT,FLOAT4,FLOAT8,FLUSH,FOLLOWS,FOR,FORCE,FOREIGN,FORMAT,FOUND,FROM,FULL,FULLTEXT,FUNCTION,GENERAL,GENERATED,GEOMETRY,GEOMETRYCOLLECTION,GET,GET_FORMAT,GLOBAL,GRANT,GRANTS,GROUP,GROUP_REPLICATION,HANDLER,HASH,HAVING,HELP,HIGH_PRIORITY,HOST,HOSTS,HOUR,HOUR_MICROSECOND,HOUR_MINUTE,HOUR_SECOND,IDENTIFIED,IF,IGNORE,IGNORE_SERVER_IDS,IMPORT,IN,INDEX,"
|
||||||
|
+ "INDEXES," + "INFILE,INITIAL_SIZE,INNER,INOUT,INSENSITIVE,INSERT,INSERT_METHOD,INSTALL,INSTANCE,INT,INT1,INT2,INT3,INT4,INT8,INTEGER,INTERVAL,INTO,INVOKE,INVOKER,IO,IO_AFTER_GTIDS,IO_BEFORE_GTIDS,IO_THREAD,IPC,IS,ISOLATION,ISSUER,ITERATE,JOIN,JSON,KEY,KEY_BLOCK_SIZE,KEYS,KILL,LANGUAGE,LAST,LEADING,LEAVE,LEAVES,LEFT,LESS,LEVEL,LIKE,LIMIT,LINEAR,LINES,LINESTRING,LIST,LOAD,LOCAL,LOCALTIME,LOCALTIMESTAMP,LOCK,LOCKS,LOGFILE,LOGS,LONG,LONGBLOB,LONGTEXT,LOOP,LOW_PRIORITY,MASTER,MASTER_AUTO_POSITION,MASTER_BIND,MASTER_CONNECT_RETRY,MASTER_DELAY,MASTER_HEARTBEAT_PERIOD,MASTER_HOST,MASTER_LOG_FILE,MASTER_LOG_POS,MASTER_PASSWORD,MASTER_PORT,MASTER_RETRY_COUNT,MASTER_SERVER_ID,MASTER_SSL,MASTER_SSL_CA,MASTER_SSL_CAPATH,MASTER_SSL_CERT,MASTER_SSL_CIPHER,MASTER_SSL_CRL,MASTER_SSL_CRLPATH,MASTER_SSL_KEY,MASTER_SSL_VERIFY_SERVER_CERT,MASTER_TLS_VERSION,MASTER_USER,MATCH,MAX_CONNECTIONS_PER_HOUR,MAX_QUERIES_PER_HOUR,MAX_ROWS,MAX_SIZE,MAX_STATEMENT_TIME,MAX_UPDATES_PER_HOUR,"
|
||||||
|
+ "MAX_USER_CONNECTIONS,"
|
||||||
|
+ "MAXVALUE,MEDIUM,MEDIUMBLOB,MEDIUMINT,MEDIUMTEXT,MEMORY,MERGE,MESSAGE_TEXT,MICROSECOND,MIDDLEINT,MIGRATE,MIN_ROWS,MINUTE,MINUTE_MICROSECOND,MINUTE_SECOND,MOD,MODE,MODIFIES,MODIFY,MONTH,MULTILINESTRING,MULTIPOINT,MULTIPOLYGON,MUTEX,MYSQL_ERRNO,NAME,NAMES,NATIONAL,NATURAL,NCHAR,NDB,NDBCLUSTER,NEVER,NEW,NEXT,NO,NO_WAIT,NO_WRITE_TO_BINLOG,NODEGROUP,NONBLOCKING,NONE,NOT,NUMBER,NUMERIC,NVARCHAR,OFFSET,OLD_PASSWORD,ON,ONE,ONLY,OPEN,OPTIMIZE,OPTIMIZER_COSTS,OPTION,OPTIONALLY,OPTIONS,OR,ORDER,OUT,OUTER,OUTFILE,OWNER,PACK_KEYS,PAGE,PARSE_GCOL_EXPR,PARSER,PARTIAL,PARTITION,PARTITIONING,PARTITIONS,PASSWORD,PHASE,PLUGIN,PLUGIN_DIR,PLUGINS,POINT,POLYGON,PORT,PRECEDES,PRECISION,PREPARE,PRESERVE,PREV,PRIMARY,PRIVILEGES,PROCEDURE,PROCESSLIST,PROFILE,PROFILES,PROXY,PURGE,QUARTER,QUERY,QUICK,RANGE,READ,READ_ONLY,READ_WRITE,READS,REAL,REBUILD,RECOVER,REDO_BUFFER_SIZE,REDOFILE,REDUNDANT,REFERENCES,REGEXP,RELAY,RELAY_LOG_FILE,RELAY_LOG_POS,RELAY_THREAD,RELAYLOG,RELEASE,RELOAD,REMOVE,"
|
||||||
|
+ "RENAME,REORGANIZE,REPAIR,REPEAT,REPEATABLE,REPLACE,REPLICATE_DO_DB,REPLICATE_DO_TABLE,REPLICATE_IGNORE_DB,REPLICATE_IGNORE_TABLE,REPLICATE_REWRITE_DB,REPLICATE_WILD_DO_TABLE,REPLICATE_WILD_IGNORE_TABLE,REPLICATION,REQUIRE,RESET,RESIGNAL,RESTORE,RESTRICT,RESUME,RETURN,RETURNED_SQLSTATE,RETURNS,REVERSE,REVOKE,RIGHT,RLIKE,ROLLBACK,ROLLUP,ROTATE,ROUTINE,ROW,ROW_COUNT,ROW_FORMAT,ROWS,RTREE,SAVEPOINT,SCHEDULE,SCHEMA,SCHEMA_NAME,SCHEMAS,SECOND,SECOND_MICROSECOND,SECURITY,SELECT,SENSITIVE,SEPARATOR,SERIAL,SERIALIZABLE,SERVER,SESSION,SET,SHARE,SHOW,SHUTDOWN,SIGNAL,SIGNED,SIMPLE,SLAVE,SLOW,SMALLINT,SNAPSHOT,SOCKET,SOME,SONAME,SOUNDS,SOURCE,SPATIAL,SPECIFIC,SQL,SQL_AFTER_GTIDS,SQL_AFTER_MTS_GAPS,SQL_BEFORE_GTIDS,SQL_BIG_RESULT,SQL_BUFFER_RESULT,SQL_CACHE,SQL_CALC_FOUND_ROWS,SQL_NO_CACHE,SQL_SMALL_RESULT,SQL_THREAD,SQL_TSI_DAY,SQL_TSI_HOUR,SQL_TSI_MINUTE,SQL_TSI_MONTH,SQL_TSI_QUARTER,SQL_TSI_SECOND,SQL_TSI_WEEK,SQL_TSI_YEAR,SQLEXCEPTION,SQLSTATE,SQLWARNING,SSL,STACKED,"
|
||||||
|
+ "START," + "STARTING,STARTS,STATS_AUTO_RECALC,STATS_PERSISTENT,STATS_SAMPLE_PAGES,STATUS,STOP,STORAGE,STORED,STRAIGHT_JOIN,STRING,SUBCLASS_ORIGIN,SUBJECT,SUBPARTITION,SUBPARTITIONS,SUPER,SUSPEND,SWAPS,SWITCHES,TABLE,TABLE_CHECKSUM,TABLE_NAME,TABLES,TABLESPACE,TEMPORARY,TEMPTABLE,TERMINATED,TEXT,THAN,THEN,TIME,TIMESTAMP,TIMESTAMPADD,TIMESTAMPDIFF,TINYBLOB,TINYINT,TINYTEXT,TO,TRAILING,TRANSACTION,TRIGGER,TRIGGERS,TRUNCATE,TYPE,TYPES,UNCOMMITTED,UNDEFINED,UNDO,UNDO_BUFFER_SIZE,UNDOFILE,UNICODE,UNINSTALL,UNION,UNIQUE,UNKNOWN,UNLOCK,UNSIGNED,UNTIL,UPDATE,UPGRADE,USAGE,USE,USE_FRM,USER,USER_RESOURCES,USING,UTC_DATE,UTC_TIME,UTC_TIMESTAMP,VALIDATION,VALUE,VALUES,VARBINARY,VARCHAR,VARCHARACTER,VARIABLES,VARYING,VIEW,VIRTUAL,WAIT,WARNINGS,WEEK,WEIGHT_STRING,WHEN,WHERE,WHILE,WITH,WITHOUT,WORK,WRAPPER,WRITE,X509,XA,XID,XML,XOR,YEAR,YEAR_MONTH,ZEROFILL,FALSE,TRUE";
|
||||||
|
}
|
@ -0,0 +1,19 @@
|
|||||||
|
package com.alibaba.datax.plugin.reader.obhbasereader;
|
||||||
|
|
||||||
|
import com.alipay.oceanbase.hbase.OHTable;
|
||||||
|
import org.apache.hadoop.conf.Configuration;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
|
||||||
|
public final class HTableManager {
|
||||||
|
|
||||||
|
public static OHTable createHTable(Configuration config, String tableName) throws IOException {
|
||||||
|
return new OHTable(config, tableName);
|
||||||
|
}
|
||||||
|
|
||||||
|
public static void closeHTable(OHTable hTable) throws IOException {
|
||||||
|
if (hTable != null) {
|
||||||
|
hTable.close();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,124 @@
|
|||||||
|
package com.alibaba.datax.plugin.reader.obhbasereader;
|
||||||
|
|
||||||
|
import com.alibaba.datax.common.base.BaseObject;
|
||||||
|
import com.alibaba.datax.plugin.reader.obhbasereader.enums.ColumnType;
|
||||||
|
import com.alibaba.datax.plugin.reader.obhbasereader.util.ObHbaseReaderUtil;
|
||||||
|
|
||||||
|
import org.apache.commons.lang3.StringUtils;
|
||||||
|
import org.apache.commons.lang3.Validate;
|
||||||
|
import org.apache.hadoop.hbase.util.Bytes;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 描述 hbasereader 插件中,column 配置中的一个单元项实体
|
||||||
|
*/
|
||||||
|
public class HbaseColumnCell extends BaseObject {
|
||||||
|
private ColumnType columnType;
|
||||||
|
|
||||||
|
// columnName 格式为:列族:列名
|
||||||
|
private String columnName;
|
||||||
|
|
||||||
|
private byte[] cf;
|
||||||
|
private byte[] qualifier;
|
||||||
|
|
||||||
|
//对于常量类型,其常量值放到 columnValue 里
|
||||||
|
private String columnValue;
|
||||||
|
|
||||||
|
//当配置了 columnValue 时,isConstant=true(这个成员变量是用于方便使用本类的地方判断是否是常量类型字段)
|
||||||
|
private boolean isConstant;
|
||||||
|
|
||||||
|
// 只在类型是时间类型时,才会设置该值,无默认值。形式如:yyyy-MM-dd HH:mm:ss
|
||||||
|
private String dateformat;
|
||||||
|
|
||||||
|
private HbaseColumnCell(Builder builder) {
|
||||||
|
this.columnType = builder.columnType;
|
||||||
|
|
||||||
|
//columnName 和 columnValue 必须有一个为 null
|
||||||
|
Validate.isTrue(builder.columnName == null || builder.columnValue == null, "In obhbasereader, column cannot configure both column name and column value. Choose one of them.");
|
||||||
|
|
||||||
|
//columnName 和 columnValue 不能都为 null
|
||||||
|
Validate.isTrue(builder.columnName != null || builder.columnValue != null, "In obhbasereader, column cannot configure both column name and column value. Choose one of them.");
|
||||||
|
|
||||||
|
if (builder.columnName != null) {
|
||||||
|
this.isConstant = false;
|
||||||
|
this.columnName = builder.columnName;
|
||||||
|
|
||||||
|
// 如果 columnName 不是 rowkey,则必须配置为:列族:列名 格式
|
||||||
|
if (!ObHbaseReaderUtil.isRowkeyColumn(this.columnName)) {
|
||||||
|
|
||||||
|
String promptInfo = "In obhbasereader, the column configuration format of column should be: 'family:column'. The column you configured is wrong:" + this.columnName;
|
||||||
|
String[] cfAndQualifier = this.columnName.split(":");
|
||||||
|
Validate.isTrue(cfAndQualifier.length == 2 && StringUtils.isNotBlank(cfAndQualifier[0]) && StringUtils.isNotBlank(cfAndQualifier[1]), promptInfo);
|
||||||
|
|
||||||
|
this.cf = Bytes.toBytes(cfAndQualifier[0].trim());
|
||||||
|
this.qualifier = Bytes.toBytes(cfAndQualifier[1].trim());
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
this.isConstant = true;
|
||||||
|
this.columnValue = builder.columnValue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (builder.dateformat != null) {
|
||||||
|
this.dateformat = builder.dateformat;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public ColumnType getColumnType() {
|
||||||
|
return columnType;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getColumnName() {
|
||||||
|
return columnName;
|
||||||
|
}
|
||||||
|
|
||||||
|
public byte[] getCf() {
|
||||||
|
return cf;
|
||||||
|
}
|
||||||
|
|
||||||
|
public byte[] getQualifier() {
|
||||||
|
return qualifier;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getDateformat() {
|
||||||
|
return dateformat;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getColumnValue() {
|
||||||
|
return columnValue;
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean isConstant() {
|
||||||
|
return isConstant;
|
||||||
|
}
|
||||||
|
|
||||||
|
// 内部 builder 类
|
||||||
|
public static class Builder {
|
||||||
|
private ColumnType columnType;
|
||||||
|
private String columnName;
|
||||||
|
private String columnValue;
|
||||||
|
|
||||||
|
private String dateformat;
|
||||||
|
|
||||||
|
public Builder(ColumnType columnType) {
|
||||||
|
this.columnType = columnType;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Builder columnName(String columnName) {
|
||||||
|
this.columnName = columnName;
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Builder columnValue(String columnValue) {
|
||||||
|
this.columnValue = columnValue;
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Builder dateformat(String dateformat) {
|
||||||
|
this.dateformat = dateformat;
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
public HbaseColumnCell build() {
|
||||||
|
return new HbaseColumnCell(this);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,36 @@
|
|||||||
|
package com.alibaba.datax.plugin.reader.obhbasereader;
|
||||||
|
|
||||||
|
import com.alibaba.datax.common.spi.ErrorCode;
|
||||||
|
|
||||||
|
public enum HbaseReaderErrorCode implements ErrorCode {
|
||||||
|
REQUIRED_VALUE("ObHbaseReader-00", "Missing required parameters."),
|
||||||
|
ILLEGAL_VALUE("ObHbaseReader-01", "Illegal configuration."),
|
||||||
|
PREPAR_READ_ERROR("ObHbaseReader-02", "Preparing to read ObHBase error."),
|
||||||
|
SPLIT_ERROR("ObHbaseReader-03", "Splitting ObHBase table error."),
|
||||||
|
INIT_TABLE_ERROR("ObHbaseReader-04", "Initializing ObHBase extraction table error"),
|
||||||
|
PARSE_COLUMN_ERROR("ObHbaseReader-05", "Parse column failed."),
|
||||||
|
READ_ERROR("ObHbaseReader-06", "Read ObHBase error.");
|
||||||
|
|
||||||
|
private final String code;
|
||||||
|
private final String description;
|
||||||
|
|
||||||
|
private HbaseReaderErrorCode(String code, String description) {
|
||||||
|
this.code = code;
|
||||||
|
this.description = description;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String getCode() {
|
||||||
|
return this.code;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String getDescription() {
|
||||||
|
return this.description;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String toString() {
|
||||||
|
return String.format("Code:[%s], Description:[%s]. ", this.code, this.description);
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,103 @@
|
|||||||
|
package com.alibaba.datax.plugin.reader.obhbasereader;
|
||||||
|
|
||||||
|
public final class Key {
|
||||||
|
|
||||||
|
public final static String HBASE_CONFIG = "hbaseConfig";
|
||||||
|
|
||||||
|
/**
|
||||||
|
* mode 可以取 normal 或者 multiVersionFixedColumn 或者 multiVersionDynamicColumn 三个值,无默认值。
|
||||||
|
* <p/>
|
||||||
|
* normal 配合 column(Map 结构的)使用
|
||||||
|
* <p/>
|
||||||
|
* multiVersionFixedColumn 配合 maxVersion,tetradType, column(List 结构的)使用
|
||||||
|
* <p/>
|
||||||
|
* multiVersionDynamicColumn 配合 maxVersion,tetradType, columnFamily(List 结构的)使用
|
||||||
|
*/
|
||||||
|
public final static String MODE = "mode";
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 配合 mode = multiVersion 时使用,指明需要读取的版本个数。无默认值
|
||||||
|
* -1 表示去读全部版本
|
||||||
|
* 不能为0,1
|
||||||
|
* >1 表示最多读取对应个数的版本数(不能超过 Integer 的最大值)
|
||||||
|
*/
|
||||||
|
public final static String MAX_VERSION = "maxVersion";
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 多版本情况下,必须配置 四元组的类型(rowkey,column,timestamp,value)
|
||||||
|
*/
|
||||||
|
public final static String TETRAD_TYPE = "tetradType";
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 默认为 utf8
|
||||||
|
*/
|
||||||
|
public final static String ENCODING = "encoding";
|
||||||
|
|
||||||
|
public final static String TABLE = "table";
|
||||||
|
|
||||||
|
public final static String USERNAME = "username";
|
||||||
|
|
||||||
|
public final static String OB_SYS_USERNAME = "obSysUser";
|
||||||
|
|
||||||
|
public final static String CONFIG_URL = "obConfigUrl";
|
||||||
|
|
||||||
|
public final static String ODP_HOST = "odpHost";
|
||||||
|
|
||||||
|
public final static String ODP_PORT = "odpPort";
|
||||||
|
|
||||||
|
public final static String DB_NAME = "dbName";
|
||||||
|
|
||||||
|
public final static String PASSWORD = "password";
|
||||||
|
|
||||||
|
public final static String OB_SYS_PASSWORD = "obSysPassword";
|
||||||
|
|
||||||
|
public final static String COLUMN_FAMILY = "columnFamily";
|
||||||
|
|
||||||
|
public final static String COLUMN = "column";
|
||||||
|
|
||||||
|
public final static String START_ROWKEY = "startRowkey";
|
||||||
|
|
||||||
|
public final static String END_ROWKEY = "endRowkey";
|
||||||
|
|
||||||
|
public final static String IS_BINARY_ROWKEY = "isBinaryRowkey";
|
||||||
|
|
||||||
|
public final static String SCAN_CACHE = "scanCache";
|
||||||
|
|
||||||
|
public final static String RS_URL = "rsUrl";
|
||||||
|
|
||||||
|
public final static String MAX_ACTIVE_CONNECTION = "maxActiveConnection";
|
||||||
|
|
||||||
|
public final static int DEFAULT_MAX_ACTIVE_CONNECTION = 2000;
|
||||||
|
|
||||||
|
public final static String TIMEOUT = "timeout";
|
||||||
|
|
||||||
|
public final static long DEFAULT_TIMEOUT = 30;
|
||||||
|
|
||||||
|
public final static String PARTITION_NAME = "partitionName";
|
||||||
|
|
||||||
|
public final static String JDBC_URL = "jdbcUrl";
|
||||||
|
|
||||||
|
public final static String TIMEZONE = "timezone";
|
||||||
|
|
||||||
|
public final static String FETCH_SIZE = "fetchSize";
|
||||||
|
|
||||||
|
public final static String READ_BATCH_SIZE = "readBatchSize";
|
||||||
|
|
||||||
|
public final static String SESSION = "session";
|
||||||
|
|
||||||
|
public final static String READER_HINT = "readerHint";
|
||||||
|
|
||||||
|
public final static String QUERY_SQL = "querySql";
|
||||||
|
|
||||||
|
public final static String SAMPLE_PERCENTAGE = "samplePercentage";
|
||||||
|
// 是否使用独立密码
|
||||||
|
public final static String USE_SPECIAL_SECRET = "useSpecialSecret";
|
||||||
|
|
||||||
|
public final static String USE_SQL_READER = "useSqlReader";
|
||||||
|
|
||||||
|
public final static String USE_ODP_MODE = "useOdpMode";
|
||||||
|
|
||||||
|
public final static String RANGE = "range";
|
||||||
|
|
||||||
|
public final static String READ_BY_PARTITION = "readByPartition";
|
||||||
|
}
|
@ -0,0 +1,445 @@
|
|||||||
|
package com.alibaba.datax.plugin.reader.obhbasereader;
|
||||||
|
|
||||||
|
import static com.alibaba.datax.plugin.reader.obhbasereader.Constant.DEFAULT_OB_TABLE_CLIENT_LOG_LEVEL;
|
||||||
|
import static com.alibaba.datax.plugin.reader.obhbasereader.Constant.DEFAULT_OB_TABLE_HBASE_LOG_LEVEL;
|
||||||
|
import static com.alibaba.datax.plugin.reader.obhbasereader.Constant.DEFAULT_USE_ODPMODE;
|
||||||
|
import static com.alibaba.datax.plugin.reader.obhbasereader.Constant.OB_COM_ALIPAY_TABLE_CLIENT_LOG_LEVEL;
|
||||||
|
import static com.alibaba.datax.plugin.reader.obhbasereader.Constant.OB_COM_ALIPAY_TABLE_HBASE_LOG_LEVEL;
|
||||||
|
import static com.alibaba.datax.plugin.reader.obhbasereader.Constant.OB_HBASE_LOG_PATH;
|
||||||
|
import static com.alibaba.datax.plugin.reader.obhbasereader.Constant.OB_TABLE_CLIENT_LOG_LEVEL;
|
||||||
|
import static com.alibaba.datax.plugin.reader.obhbasereader.Constant.OB_TABLE_CLIENT_PROPERTY;
|
||||||
|
import static com.alibaba.datax.plugin.reader.obhbasereader.Constant.OB_TABLE_HBASE_LOG_LEVEL;
|
||||||
|
import static com.alibaba.datax.plugin.reader.obhbasereader.Constant.OB_TABLE_HBASE_PROPERTY;
|
||||||
|
import static org.apache.commons.lang3.StringUtils.EMPTY;
|
||||||
|
|
||||||
|
import com.alibaba.datax.common.element.Record;
|
||||||
|
import com.alibaba.datax.common.exception.DataXException;
|
||||||
|
import com.alibaba.datax.common.plugin.RecordSender;
|
||||||
|
import com.alibaba.datax.common.spi.Reader;
|
||||||
|
import com.alibaba.datax.common.util.Configuration;
|
||||||
|
import com.alibaba.datax.plugin.rdbms.reader.Constant;
|
||||||
|
import com.alibaba.datax.plugin.rdbms.reader.util.ObVersion;
|
||||||
|
import com.alibaba.datax.plugin.rdbms.util.DBUtil;
|
||||||
|
import com.alibaba.datax.plugin.rdbms.util.DBUtilErrorCode;
|
||||||
|
import com.alibaba.datax.plugin.rdbms.util.DataBaseType;
|
||||||
|
import com.alibaba.datax.plugin.rdbms.util.TableExpandUtil;
|
||||||
|
import com.alibaba.datax.plugin.reader.obhbasereader.enums.ModeType;
|
||||||
|
import com.alibaba.datax.plugin.reader.obhbasereader.ext.ServerConnectInfo;
|
||||||
|
import com.alibaba.datax.plugin.reader.obhbasereader.task.AbstractHbaseTask;
|
||||||
|
import com.alibaba.datax.plugin.reader.obhbasereader.task.SQLNormalModeReader;
|
||||||
|
import com.alibaba.datax.plugin.reader.obhbasereader.task.ScanMultiVersionReader;
|
||||||
|
import com.alibaba.datax.plugin.reader.obhbasereader.task.ScanNormalModeReader;
|
||||||
|
import com.alibaba.datax.plugin.reader.obhbasereader.util.HbaseSplitUtil;
|
||||||
|
import com.alibaba.datax.plugin.reader.obhbasereader.util.ObHbaseReaderUtil;
|
||||||
|
import com.alibaba.datax.plugin.reader.obhbasereader.util.SqlReaderSplitUtil;
|
||||||
|
import com.alibaba.datax.plugin.reader.oceanbasev10reader.util.ObReaderUtils;
|
||||||
|
|
||||||
|
import com.google.common.base.Preconditions;
|
||||||
|
import java.sql.PreparedStatement;
|
||||||
|
import java.sql.ResultSet;
|
||||||
|
import org.apache.commons.collections.CollectionUtils;
|
||||||
|
import org.apache.commons.lang3.StringUtils;
|
||||||
|
import org.apache.commons.lang3.Validate;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
import java.sql.Connection;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.Arrays;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Map;
|
||||||
|
import java.util.Set;
|
||||||
|
import java.util.concurrent.TimeUnit;
|
||||||
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* ObHbaseReader 支持分库分表
|
||||||
|
* 仅支持ob3.x及以上版本
|
||||||
|
*/
|
||||||
|
public class ObHbaseReader extends Reader {
|
||||||
|
|
||||||
|
public static class Job extends Reader.Job {
|
||||||
|
static private final String ACCESS_DENIED_ERROR = "Access denied for user";
|
||||||
|
private static Logger LOG = LoggerFactory.getLogger(ObHbaseReader.class);
|
||||||
|
private Configuration originalConfig;
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void init() {
|
||||||
|
if (System.getProperty(OB_TABLE_CLIENT_PROPERTY) == null) {
|
||||||
|
LOG.info(OB_TABLE_CLIENT_PROPERTY + " not set");
|
||||||
|
System.setProperty(OB_TABLE_CLIENT_PROPERTY, OB_HBASE_LOG_PATH);
|
||||||
|
}
|
||||||
|
if (System.getProperty(OB_TABLE_HBASE_PROPERTY) == null) {
|
||||||
|
LOG.info(OB_TABLE_HBASE_PROPERTY + " not set");
|
||||||
|
System.setProperty(OB_TABLE_HBASE_PROPERTY, OB_HBASE_LOG_PATH);
|
||||||
|
}
|
||||||
|
if (System.getProperty(OB_TABLE_CLIENT_LOG_LEVEL) == null) {
|
||||||
|
LOG.info(OB_TABLE_CLIENT_LOG_LEVEL + " not set");
|
||||||
|
System.setProperty(OB_TABLE_CLIENT_LOG_LEVEL, DEFAULT_OB_TABLE_CLIENT_LOG_LEVEL);
|
||||||
|
}
|
||||||
|
if (System.getProperty(OB_TABLE_HBASE_LOG_LEVEL) == null) {
|
||||||
|
LOG.info(OB_TABLE_HBASE_LOG_LEVEL + " not set");
|
||||||
|
System.setProperty(OB_TABLE_HBASE_LOG_LEVEL, DEFAULT_OB_TABLE_HBASE_LOG_LEVEL);
|
||||||
|
}
|
||||||
|
if (System.getProperty(OB_COM_ALIPAY_TABLE_CLIENT_LOG_LEVEL) == null) {
|
||||||
|
LOG.info(OB_COM_ALIPAY_TABLE_CLIENT_LOG_LEVEL + " not set");
|
||||||
|
System.setProperty(OB_COM_ALIPAY_TABLE_CLIENT_LOG_LEVEL, DEFAULT_OB_TABLE_CLIENT_LOG_LEVEL);
|
||||||
|
}
|
||||||
|
if (System.getProperty(OB_COM_ALIPAY_TABLE_HBASE_LOG_LEVEL) == null) {
|
||||||
|
LOG.info(OB_COM_ALIPAY_TABLE_HBASE_LOG_LEVEL + " not set");
|
||||||
|
System.setProperty(OB_COM_ALIPAY_TABLE_HBASE_LOG_LEVEL, DEFAULT_OB_TABLE_HBASE_LOG_LEVEL);
|
||||||
|
}
|
||||||
|
|
||||||
|
LOG.info("{} is set to {}, {} is set to {}",
|
||||||
|
OB_TABLE_CLIENT_PROPERTY, OB_HBASE_LOG_PATH, OB_TABLE_HBASE_PROPERTY, OB_HBASE_LOG_PATH);
|
||||||
|
this.originalConfig = super.getPluginJobConf();
|
||||||
|
ObHbaseReaderUtil.doPretreatment(originalConfig);
|
||||||
|
List<Object> conns = originalConfig.getList(Constant.CONN_MARK, Object.class);
|
||||||
|
// 逻辑表配置
|
||||||
|
Preconditions.checkArgument(CollectionUtils.isNotEmpty(conns), "connection information is empty.");
|
||||||
|
dealLogicConnAndTable(conns);
|
||||||
|
if (LOG.isDebugEnabled()) {
|
||||||
|
LOG.debug("After init(), now originalConfig is:\n{}\n", this.originalConfig);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void destroy() {
|
||||||
|
}
|
||||||
|
|
||||||
|
private void dealLogicConnAndTable(List<Object> conns) {
|
||||||
|
String unifiedUsername = originalConfig.getString(Key.USERNAME);
|
||||||
|
String unifiedPassword = originalConfig.getString(Key.PASSWORD);
|
||||||
|
boolean useSqlReader = originalConfig.getBool(Key.USE_SQL_READER, com.alibaba.datax.plugin.reader.obhbasereader.Constant.DEFAULT_USE_SQLREADER);
|
||||||
|
boolean checkSlave = originalConfig.getBool(com.alibaba.datax.plugin.rdbms.reader.Key.CHECK_SLAVE, false);
|
||||||
|
Set<String> keywords = Arrays.stream(com.alibaba.datax.plugin.reader.obhbasereader.Constant.OBMYSQL_KEYWORDS.split(",")).collect(Collectors.toSet());
|
||||||
|
List<String> preSql = originalConfig.getList(com.alibaba.datax.plugin.rdbms.reader.Key.PRE_SQL, String.class);
|
||||||
|
|
||||||
|
int tableNum = 0;
|
||||||
|
|
||||||
|
for (int i = 0, len = conns.size(); i < len; i++) {
|
||||||
|
Configuration connConf = Configuration.from(conns.get(i).toString());
|
||||||
|
String curUsername = connConf.getString(Key.USERNAME, unifiedUsername);
|
||||||
|
Preconditions.checkArgument(StringUtils.isNotEmpty(curUsername), "username is empty.");
|
||||||
|
String curPassword = connConf.getString(Key.PASSWORD, unifiedPassword);
|
||||||
|
|
||||||
|
originalConfig.set(String.format("%s[%d].%s", Constant.CONN_MARK, i, Key.USERNAME), curUsername);
|
||||||
|
originalConfig.set(String.format("%s[%d].%s", Constant.CONN_MARK, i, Key.PASSWORD), curPassword);
|
||||||
|
|
||||||
|
List<String> jdbcUrls = connConf.getList(Key.JDBC_URL, new ArrayList<>(), String.class);
|
||||||
|
String jdbcUrl;
|
||||||
|
if (useSqlReader) {
|
||||||
|
// sql模式下,jdbcUrl必须配置,只有使用sql模式的情况才检查地址
|
||||||
|
Preconditions.checkArgument(CollectionUtils.isNotEmpty(jdbcUrls), "if using sql mode, jdbcUrl is needed");
|
||||||
|
jdbcUrl = DBUtil.chooseJdbcUrlWithoutRetry(DataBaseType.MySql, jdbcUrls, curUsername, curPassword, preSql, checkSlave);
|
||||||
|
jdbcUrl = DataBaseType.MySql.appendJDBCSuffixForReader(jdbcUrl);
|
||||||
|
// 回写到connection[i].jdbcUrl
|
||||||
|
originalConfig.set(String.format("%s[%d].%s", Constant.CONN_MARK, i, Key.JDBC_URL), jdbcUrl);
|
||||||
|
LOG.info("Available jdbcUrl:{}.", jdbcUrl);
|
||||||
|
} else {
|
||||||
|
jdbcUrl = jdbcUrls.get(0);
|
||||||
|
jdbcUrl = StringUtils.isNotBlank(jdbcUrl) ? DataBaseType.MySql.appendJDBCSuffixForReader(jdbcUrl) : EMPTY;
|
||||||
|
checkAndSetHbaseConnConf(jdbcUrl, curUsername, curPassword, connConf, i);
|
||||||
|
}
|
||||||
|
|
||||||
|
// table 方式
|
||||||
|
// 对每一个connection 上配置的table 项进行解析(已对表名称进行了 ` 处理的)
|
||||||
|
List<String> tables = connConf.getList(Key.TABLE, String.class);
|
||||||
|
|
||||||
|
List<String> expandedTables = TableExpandUtil.expandTableConf(DataBaseType.MySql, tables);
|
||||||
|
|
||||||
|
if (expandedTables.isEmpty()) {
|
||||||
|
throw DataXException.asDataXException(DBUtilErrorCode.ILLEGAL_VALUE, "The specified table list is empty.");
|
||||||
|
}
|
||||||
|
|
||||||
|
for (int ti = 0; ti < expandedTables.size(); ti++) {
|
||||||
|
String tableName = expandedTables.get(ti);
|
||||||
|
if (keywords.contains(tableName.toUpperCase())) {
|
||||||
|
expandedTables.set(ti, "`" + tableName + "`");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
tableNum += expandedTables.size();
|
||||||
|
originalConfig.set(String.format("%s[%d].%s", Constant.CONN_MARK, i, Key.TABLE), expandedTables);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (tableNum == 0) {
|
||||||
|
// 分库分表读,未匹配到可以抽取的表
|
||||||
|
LOG.error("sharding rule result is empty.");
|
||||||
|
throw DataXException.asDataXException("No tables were matched");
|
||||||
|
}
|
||||||
|
originalConfig.set(Constant.TABLE_NUMBER_MARK, tableNum);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* In public cloud, only odp mode can be used.
|
||||||
|
* In private cloud, both odp mode and ocp mode can be used.
|
||||||
|
*
|
||||||
|
* @param jdbcUrl
|
||||||
|
* @param curUsername
|
||||||
|
* @param curPassword
|
||||||
|
* @param connConf
|
||||||
|
*/
|
||||||
|
private void checkAndSetHbaseConnConf(String jdbcUrl, String curUsername, String curPassword, Configuration connConf, int curIndex) {
|
||||||
|
ServerConnectInfo serverConnectInfo = new ServerConnectInfo(jdbcUrl, curUsername, curPassword);
|
||||||
|
if (!originalConfig.getBool(Key.USE_ODP_MODE, false)) {
|
||||||
|
// Normally, only need to query at first time
|
||||||
|
// In ocp mode, dbName, configUrl, sysUser and sysPass are needed.
|
||||||
|
String sysUser = connConf.getString(Key.OB_SYS_USERNAME, originalConfig.getString(Key.OB_SYS_USERNAME));
|
||||||
|
String sysPass = connConf.getString(Key.OB_SYS_PASSWORD, originalConfig.getString(Key.OB_SYS_PASSWORD));
|
||||||
|
serverConnectInfo.setSysUser(sysUser);
|
||||||
|
serverConnectInfo.setSysPass(sysPass);
|
||||||
|
String configUrl = connConf.getString(Key.CONFIG_URL, originalConfig.getString(Key.CONFIG_URL));
|
||||||
|
if (StringUtils.isBlank(configUrl)) {
|
||||||
|
configUrl = queryRsUrl(serverConnectInfo);
|
||||||
|
}
|
||||||
|
originalConfig.set(String.format("%s[%d].%s", Constant.CONN_MARK, curIndex, Key.USERNAME), curUsername);
|
||||||
|
originalConfig.set(String.format("%s[%d].%s", Constant.CONN_MARK, curIndex, Key.OB_SYS_USERNAME), serverConnectInfo.sysUser);
|
||||||
|
originalConfig.set(String.format("%s[%d].%s", Constant.CONN_MARK, curIndex, Key.OB_SYS_PASSWORD), serverConnectInfo.sysPass);
|
||||||
|
originalConfig.set(String.format("%s[%d].%s", Constant.CONN_MARK, curIndex, Key.CONFIG_URL), configUrl);
|
||||||
|
} else {
|
||||||
|
// In odp mode, dbName, odp host and odp port are needed.
|
||||||
|
String odpHost = connConf.getString(Key.ODP_HOST, serverConnectInfo.host);
|
||||||
|
String odpPort = connConf.getString(Key.ODP_PORT, serverConnectInfo.port);
|
||||||
|
originalConfig.set(String.format("%s[%d].%s", Constant.CONN_MARK, curIndex, Key.ODP_HOST), odpHost);
|
||||||
|
originalConfig.set(String.format("%s[%d].%s", Constant.CONN_MARK, curIndex, Key.ODP_PORT), odpPort);
|
||||||
|
}
|
||||||
|
originalConfig.set(String.format("%s[%d].%s", Constant.CONN_MARK, curIndex, Key.DB_NAME), serverConnectInfo.databaseName);
|
||||||
|
}
|
||||||
|
|
||||||
|
private String queryRsUrl(ServerConnectInfo serverInfo) {
|
||||||
|
Preconditions.checkArgument(checkVersionAfterV3(serverInfo.jdbcUrl, serverInfo.getFullUserName(), serverInfo.password), "ob before 3.x is not supported.");
|
||||||
|
String configUrl = originalConfig.getString(Key.CONFIG_URL, null);
|
||||||
|
if (configUrl == null) {
|
||||||
|
try {
|
||||||
|
Connection conn = null;
|
||||||
|
int retry = 0;
|
||||||
|
final String sysJDBCUrl = serverInfo.jdbcUrl.replace(serverInfo.databaseName, "oceanbase");
|
||||||
|
do {
|
||||||
|
try {
|
||||||
|
if (retry > 0) {
|
||||||
|
int sleep = retry > 9 ? 500 : 1 << retry;
|
||||||
|
try {
|
||||||
|
TimeUnit.SECONDS.sleep(sleep);
|
||||||
|
} catch (InterruptedException e) {
|
||||||
|
}
|
||||||
|
LOG.warn("retry fetch RsUrl the {} times", retry);
|
||||||
|
}
|
||||||
|
conn = DBUtil.getConnection(DataBaseType.OceanBase, sysJDBCUrl, serverInfo.sysUser, serverInfo.sysPass);
|
||||||
|
String sql = "show parameters like 'obconfig_url'";
|
||||||
|
LOG.info("query param: {}", sql);
|
||||||
|
PreparedStatement stmt = conn.prepareStatement(sql);
|
||||||
|
ResultSet result = stmt.executeQuery();
|
||||||
|
if (result.next()) {
|
||||||
|
configUrl = result.getString("Value");
|
||||||
|
}
|
||||||
|
if (StringUtils.isNotBlank(configUrl)) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
} catch (Exception e) {
|
||||||
|
++retry;
|
||||||
|
LOG.warn("fetch root server list(rsList) error {}", e.getMessage());
|
||||||
|
} finally {
|
||||||
|
DBUtil.closeDBResources(null, conn);
|
||||||
|
}
|
||||||
|
} while (retry < 3);
|
||||||
|
|
||||||
|
LOG.info("configure url is: " + configUrl);
|
||||||
|
originalConfig.set(Key.CONFIG_URL, configUrl);
|
||||||
|
} catch (Exception e) {
|
||||||
|
LOG.error("Fail to get configure url: {}", e.getMessage(), e);
|
||||||
|
throw DataXException.asDataXException(HbaseReaderErrorCode.REQUIRED_VALUE, "未配置obConfigUrl,且无法获取obConfigUrl");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return configUrl;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void prepare() {
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void post() {
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public List<Configuration> split(int adviceNumber) {
|
||||||
|
Map<String, HbaseColumnCell> hbaseColumnCells = ObHbaseReaderUtil.parseColumn(originalConfig.getList(Key.COLUMN, Map.class));
|
||||||
|
if (hbaseColumnCells.size() == 0) {
|
||||||
|
LOG.error("no column cells specified.");
|
||||||
|
throw new RuntimeException("no column cells specified");
|
||||||
|
}
|
||||||
|
String columnFamily = ObHbaseReaderUtil.parseColumnFamily(hbaseColumnCells.values());
|
||||||
|
Preconditions.checkArgument(StringUtils.isNotEmpty(columnFamily), "column family is empty.");
|
||||||
|
List<Object> conns = originalConfig.getList(Constant.CONN_MARK, Object.class);
|
||||||
|
Preconditions.checkArgument(conns != null && !conns.isEmpty(), "connection information is necessary.");
|
||||||
|
return splitLogicTables(adviceNumber, conns, columnFamily);
|
||||||
|
}
|
||||||
|
|
||||||
|
private List<Configuration> splitLogicTables(int adviceNumber, List<Object> conns, String columnFamily) {
|
||||||
|
// adviceNumber这里是channel数量大小, 即datax并发task数量
|
||||||
|
// eachTableShouldSplittedNumber是单表应该切分的份数
|
||||||
|
int eachTableShouldSplittedNumber = (int) Math.ceil(1.0 * adviceNumber / originalConfig.getInt(Constant.TABLE_NUMBER_MARK));
|
||||||
|
boolean useSqlReader = originalConfig.getBool(Key.USE_SQL_READER, com.alibaba.datax.plugin.reader.obhbasereader.Constant.DEFAULT_USE_SQLREADER);
|
||||||
|
boolean odpMode = originalConfig.getBool(Key.USE_ODP_MODE, DEFAULT_USE_ODPMODE);
|
||||||
|
boolean readByPartition = originalConfig.getBool(Key.READ_BY_PARTITION, false);
|
||||||
|
List<Configuration> splittedConfigs = new ArrayList<>();
|
||||||
|
|
||||||
|
for (int i = 0, len = conns.size(); i < len; i++) {
|
||||||
|
Configuration sliceConfig = originalConfig.clone();
|
||||||
|
Configuration connConf = Configuration.from(conns.get(i).toString());
|
||||||
|
copyConnConfByMode(useSqlReader, odpMode, sliceConfig, connConf);
|
||||||
|
// 说明是配置的 table 方式
|
||||||
|
// 已在之前进行了扩展和`处理,可以直接使用
|
||||||
|
List<String> tables = connConf.getList(Key.TABLE, String.class);
|
||||||
|
Validate.isTrue(null != tables && !tables.isEmpty(), "error in your configuration for the reading database table.");
|
||||||
|
int tempEachTableShouldSplittedNumber = eachTableShouldSplittedNumber;
|
||||||
|
if (tables.size() == 1) {
|
||||||
|
Integer splitFactor = originalConfig.getInt(com.alibaba.datax.plugin.rdbms.reader.Key.SPLIT_FACTOR, Constant.SPLIT_FACTOR);
|
||||||
|
tempEachTableShouldSplittedNumber = eachTableShouldSplittedNumber * splitFactor;
|
||||||
|
}
|
||||||
|
for (String table : tables) {
|
||||||
|
Configuration tempSlice;
|
||||||
|
tempSlice = sliceConfig.clone();
|
||||||
|
tempSlice.set(Key.TABLE, table);
|
||||||
|
splittedConfigs.addAll(
|
||||||
|
useSqlReader ? SqlReaderSplitUtil.splitSingleTable(tempSlice, table, columnFamily, tempEachTableShouldSplittedNumber, readByPartition) : HbaseSplitUtil.split(tempSlice));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return splittedConfigs;
|
||||||
|
}
|
||||||
|
|
||||||
|
private void copyConnConfByMode(boolean useSqlReader, boolean odpMode, Configuration targetConf, Configuration sourceConnConf) {
|
||||||
|
String username = sourceConnConf.getNecessaryValue(Key.USERNAME, DBUtilErrorCode.REQUIRED_VALUE);
|
||||||
|
targetConf.set(Key.USERNAME, username);
|
||||||
|
String password = sourceConnConf.getNecessaryValue(Key.PASSWORD, DBUtilErrorCode.REQUIRED_VALUE);
|
||||||
|
targetConf.set(Key.PASSWORD, password);
|
||||||
|
|
||||||
|
if (useSqlReader) {
|
||||||
|
String jdbcUrl = sourceConnConf.getNecessaryValue(Key.JDBC_URL, DBUtilErrorCode.REQUIRED_VALUE);
|
||||||
|
targetConf.set(Key.JDBC_URL, jdbcUrl);
|
||||||
|
} else if (odpMode) {
|
||||||
|
String dbName = sourceConnConf.getNecessaryValue(Key.DB_NAME, DBUtilErrorCode.REQUIRED_VALUE);
|
||||||
|
targetConf.set(Key.DB_NAME, dbName);
|
||||||
|
String odpHost = sourceConnConf.getNecessaryValue(Key.ODP_HOST, DBUtilErrorCode.REQUIRED_VALUE);
|
||||||
|
targetConf.set(Key.ODP_HOST, odpHost);
|
||||||
|
String odpPort = sourceConnConf.getNecessaryValue(Key.ODP_PORT, DBUtilErrorCode.REQUIRED_VALUE);
|
||||||
|
targetConf.set(Key.ODP_PORT, odpPort);
|
||||||
|
} else {
|
||||||
|
String dbName = sourceConnConf.getNecessaryValue(Key.DB_NAME, DBUtilErrorCode.REQUIRED_VALUE);
|
||||||
|
targetConf.set(Key.DB_NAME, dbName);
|
||||||
|
String sysUser = sourceConnConf.getNecessaryValue(Key.OB_SYS_USERNAME, DBUtilErrorCode.REQUIRED_VALUE);
|
||||||
|
targetConf.set(Key.OB_SYS_USERNAME, sysUser);
|
||||||
|
String sysPass = sourceConnConf.getString(Key.OB_SYS_PASSWORD);
|
||||||
|
targetConf.set(Key.OB_SYS_PASSWORD, sysPass);
|
||||||
|
}
|
||||||
|
targetConf.remove(Constant.CONN_MARK);
|
||||||
|
}
|
||||||
|
|
||||||
|
private boolean checkVersionAfterV3(String jdbcUrl, String username, String password) {
|
||||||
|
int retryLimit = 3;
|
||||||
|
int retryCount = 0;
|
||||||
|
Connection conn = null;
|
||||||
|
while (retryCount++ <= retryLimit) {
|
||||||
|
try {
|
||||||
|
conn = DBUtil.getConnectionWithoutRetry(DataBaseType.MySql, jdbcUrl, username, password);
|
||||||
|
ObVersion obVersion = ObReaderUtils.getObVersion(conn);
|
||||||
|
return ObVersion.V3.compareTo(obVersion) <= 0;
|
||||||
|
} catch (Exception e) {
|
||||||
|
LOG.error("fail to check ob version, will retry: " + e.getMessage());
|
||||||
|
if (e.getMessage().contains(ACCESS_DENIED_ERROR)) {
|
||||||
|
throw new RuntimeException(e);
|
||||||
|
}
|
||||||
|
try {
|
||||||
|
TimeUnit.SECONDS.sleep(1);
|
||||||
|
} catch (Exception ex) {
|
||||||
|
LOG.error("interrupted while waiting for retry.");
|
||||||
|
}
|
||||||
|
} finally {
|
||||||
|
DBUtil.closeDBResources(null, conn);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public static class Task extends Reader.Task {
|
||||||
|
private static Logger LOG = LoggerFactory.getLogger(Task.class);
|
||||||
|
private Configuration taskConfig;
|
||||||
|
private AbstractHbaseTask hbaseTaskProxy;
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void init() {
|
||||||
|
this.taskConfig = super.getPluginJobConf();
|
||||||
|
|
||||||
|
String mode = this.taskConfig.getString(Key.MODE);
|
||||||
|
ModeType modeType = ModeType.getByTypeName(mode);
|
||||||
|
boolean useSqlReader = this.taskConfig.getBool(Key.USE_SQL_READER, com.alibaba.datax.plugin.reader.obhbasereader.Constant.DEFAULT_USE_SQLREADER);
|
||||||
|
LOG.info("init reader with mode: " + modeType);
|
||||||
|
|
||||||
|
switch (modeType) {
|
||||||
|
case Normal:
|
||||||
|
this.hbaseTaskProxy = useSqlReader ? new SQLNormalModeReader(this.taskConfig) : new ScanNormalModeReader(this.taskConfig);
|
||||||
|
break;
|
||||||
|
case MultiVersionFixedColumn:
|
||||||
|
this.hbaseTaskProxy = new ScanMultiVersionReader(this.taskConfig);
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
throw DataXException.asDataXException(HbaseReaderErrorCode.ILLEGAL_VALUE, "This type of mode is not supported by hbasereader:" + modeType);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void destroy() {
|
||||||
|
if (this.hbaseTaskProxy != null) {
|
||||||
|
try {
|
||||||
|
this.hbaseTaskProxy.close();
|
||||||
|
} catch (Exception e) {
|
||||||
|
//
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void prepare() {
|
||||||
|
try {
|
||||||
|
this.hbaseTaskProxy.prepare();
|
||||||
|
} catch (Exception e) {
|
||||||
|
throw DataXException.asDataXException(HbaseReaderErrorCode.PREPAR_READ_ERROR, e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void post() {
|
||||||
|
super.post();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void startRead(RecordSender recordSender) {
|
||||||
|
Record record = recordSender.createRecord();
|
||||||
|
boolean fetchOK;
|
||||||
|
int retryTimes = 0;
|
||||||
|
int maxRetryTimes = 3;
|
||||||
|
while (true) {
|
||||||
|
try {
|
||||||
|
// TODO check exception
|
||||||
|
fetchOK = this.hbaseTaskProxy.fetchLine(record);
|
||||||
|
} catch (Exception e) {
|
||||||
|
LOG.info("fetch record failed. reason: {}.", e.getMessage(), e);
|
||||||
|
super.getTaskPluginCollector().collectDirtyRecord(record, e);
|
||||||
|
if (retryTimes++ > maxRetryTimes) {
|
||||||
|
throw DataXException.asDataXException(HbaseReaderErrorCode.READ_ERROR, "read from obhbase failed", e);
|
||||||
|
}
|
||||||
|
record = recordSender.createRecord();
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if (fetchOK) {
|
||||||
|
recordSender.sendToWriter(record);
|
||||||
|
record = recordSender.createRecord();
|
||||||
|
} else {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
recordSender.flush();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,44 @@
|
|||||||
|
package com.alibaba.datax.plugin.reader.obhbasereader.enums;
|
||||||
|
|
||||||
|
import com.alibaba.datax.common.exception.DataXException;
|
||||||
|
import com.alibaba.datax.plugin.reader.obhbasereader.HbaseReaderErrorCode;
|
||||||
|
|
||||||
|
import java.util.Arrays;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 只对 normal 模式读取时有用,多版本读取时,不存在列类型的
|
||||||
|
*/
|
||||||
|
public enum ColumnType {
|
||||||
|
STRING("string"),
|
||||||
|
BINARY_STRING("binarystring"),
|
||||||
|
BYTES("bytes"),
|
||||||
|
BOOLEAN("boolean"),
|
||||||
|
SHORT("short"),
|
||||||
|
INT("int"),
|
||||||
|
LONG("long"),
|
||||||
|
FLOAT("float"),
|
||||||
|
DOUBLE("double"),
|
||||||
|
DATE("date");
|
||||||
|
|
||||||
|
private String typeName;
|
||||||
|
|
||||||
|
ColumnType(String typeName) {
|
||||||
|
this.typeName = typeName;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static ColumnType getByTypeName(String typeName) {
|
||||||
|
for (ColumnType columnType : values()) {
|
||||||
|
if (columnType.typeName.equalsIgnoreCase(typeName)) {
|
||||||
|
return columnType;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
throw DataXException.asDataXException(HbaseReaderErrorCode.ILLEGAL_VALUE,
|
||||||
|
String.format("The type %s is not supported by hbasereader, currently supported type is:%s .", typeName, Arrays.asList(values())));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String toString() {
|
||||||
|
return this.typeName;
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,28 @@
|
|||||||
|
package com.alibaba.datax.plugin.reader.obhbasereader.enums;
|
||||||
|
|
||||||
|
import com.alibaba.datax.common.exception.DataXException;
|
||||||
|
import com.alibaba.datax.plugin.reader.obhbasereader.HbaseReaderErrorCode;
|
||||||
|
|
||||||
|
import java.util.Arrays;
|
||||||
|
import java.util.Optional;
|
||||||
|
import java.util.stream.Stream;
|
||||||
|
|
||||||
|
public enum FetchVersion {
|
||||||
|
|
||||||
|
OLDEST("oldest"), LATEST("latest");
|
||||||
|
|
||||||
|
private final String version;
|
||||||
|
|
||||||
|
FetchVersion(String version) {
|
||||||
|
this.version = version;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static FetchVersion getByDesc(String name) {
|
||||||
|
Optional<FetchVersion> result = Stream.of(values()).filter(v -> v.version.equalsIgnoreCase(name))
|
||||||
|
.findFirst();
|
||||||
|
return result.orElseThrow(() -> {
|
||||||
|
return DataXException.asDataXException(HbaseReaderErrorCode.ILLEGAL_VALUE,
|
||||||
|
String.format("obHBasereader 不支持该类型:%s, 目前支持的类型是:%s", name, Arrays.asList(values())));
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,30 @@
|
|||||||
|
package com.alibaba.datax.plugin.reader.obhbasereader.enums;
|
||||||
|
|
||||||
|
import com.alibaba.datax.common.exception.DataXException;
|
||||||
|
import com.alibaba.datax.plugin.reader.obhbasereader.HbaseReaderErrorCode;
|
||||||
|
|
||||||
|
import java.util.Arrays;
|
||||||
|
|
||||||
|
public enum ModeType {
|
||||||
|
Normal("normal"),
|
||||||
|
MultiVersionFixedColumn("multiVersionFixedColumn"),
|
||||||
|
MultiVersionDynamicColumn("multiVersionDynamicColumn"),
|
||||||
|
;
|
||||||
|
|
||||||
|
private String mode;
|
||||||
|
|
||||||
|
ModeType(String mode) {
|
||||||
|
this.mode = mode.toLowerCase();
|
||||||
|
}
|
||||||
|
|
||||||
|
public static ModeType getByTypeName(String modeName) {
|
||||||
|
for (ModeType modeType : values()) {
|
||||||
|
if (modeType.mode.equalsIgnoreCase(modeName)) {
|
||||||
|
return modeType;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
throw DataXException.asDataXException(
|
||||||
|
HbaseReaderErrorCode.ILLEGAL_VALUE, String.format("The mode type is not supported by hbasereader:%s, and the currently supported mode type is:%s", modeName, Arrays.asList(values())));
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,146 @@
|
|||||||
|
package com.alibaba.datax.plugin.reader.obhbasereader.ext;
|
||||||
|
|
||||||
|
import com.google.common.base.Preconditions;
|
||||||
|
import java.util.regex.Matcher;
|
||||||
|
import java.util.regex.Pattern;
|
||||||
|
import static org.apache.commons.lang3.StringUtils.EMPTY;
|
||||||
|
|
||||||
|
public class ServerConnectInfo {
|
||||||
|
|
||||||
|
public String clusterName;
|
||||||
|
public String tenantName;
|
||||||
|
// userName doesn't contain tenantName or clusterName
|
||||||
|
public String userName;
|
||||||
|
public String password;
|
||||||
|
public String databaseName;
|
||||||
|
public String ipPort;
|
||||||
|
public String jdbcUrl;
|
||||||
|
public String host;
|
||||||
|
public String port;
|
||||||
|
public boolean publicCloud;
|
||||||
|
public int rpcPort;
|
||||||
|
public String sysUser;
|
||||||
|
public String sysPass;
|
||||||
|
|
||||||
|
/**
|
||||||
|
*
|
||||||
|
* @param jdbcUrl format is jdbc:oceanbase//ip:port
|
||||||
|
* @param username format is cluster:tenant:username or username@tenant#cluster or user@tenant or user
|
||||||
|
* @param password
|
||||||
|
*/
|
||||||
|
public ServerConnectInfo(final String jdbcUrl, final String username, final String password) {
|
||||||
|
this(jdbcUrl, username, password, null, null);
|
||||||
|
}
|
||||||
|
|
||||||
|
public ServerConnectInfo(final String jdbcUrl, final String username, final String password, final String sysUser, final String sysPass) {
|
||||||
|
if (jdbcUrl.startsWith(com.alibaba.datax.plugin.rdbms.writer.Constant.OB10_SPLIT_STRING)) {
|
||||||
|
String[] ss = jdbcUrl.split(com.alibaba.datax.plugin.rdbms.writer.Constant.OB10_SPLIT_STRING_PATTERN);
|
||||||
|
Preconditions.checkArgument(ss.length == 3, "jdbc url format is not correct:" + jdbcUrl);
|
||||||
|
this.userName = username;
|
||||||
|
this.clusterName = ss[1].trim().split(":")[0];
|
||||||
|
this.tenantName = ss[1].trim().split(":")[1];
|
||||||
|
this.jdbcUrl = ss[2];
|
||||||
|
} else {
|
||||||
|
this.jdbcUrl = jdbcUrl;
|
||||||
|
}
|
||||||
|
this.password = password;
|
||||||
|
this.sysUser = sysUser;
|
||||||
|
this.sysPass = sysPass;
|
||||||
|
parseJdbcUrl(jdbcUrl);
|
||||||
|
parseFullUserName(username);
|
||||||
|
}
|
||||||
|
|
||||||
|
private void parseJdbcUrl(final String jdbcUrl) {
|
||||||
|
Pattern pattern = Pattern.compile("//([\\w\\.\\-]+:\\d+)/([\\w-]+)\\?");
|
||||||
|
Matcher matcher = pattern.matcher(jdbcUrl);
|
||||||
|
if (matcher.find()) {
|
||||||
|
String ipPort = matcher.group(1);
|
||||||
|
String dbName = matcher.group(2);
|
||||||
|
this.ipPort = ipPort;
|
||||||
|
String[] hostPort = ipPort.split(":");
|
||||||
|
this.host = hostPort[0];
|
||||||
|
this.port = hostPort[1];
|
||||||
|
this.databaseName = dbName;
|
||||||
|
this.publicCloud = host.endsWith("aliyuncs.com");
|
||||||
|
} else {
|
||||||
|
throw new RuntimeException("Invalid argument:" + jdbcUrl);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void parseFullUserName(final String fullUserName) {
|
||||||
|
int tenantIndex = fullUserName.indexOf("@");
|
||||||
|
int clusterIndex = fullUserName.indexOf("#");
|
||||||
|
// 适用于jdbcUrl以||_dsc_ob10_dsc_开头的场景
|
||||||
|
if (fullUserName.contains(":") && tenantIndex < 0) {
|
||||||
|
String[] names = fullUserName.split(":");
|
||||||
|
if (names.length != 3) {
|
||||||
|
throw new RuntimeException("invalid argument: " + fullUserName);
|
||||||
|
} else {
|
||||||
|
this.clusterName = names[0];
|
||||||
|
this.tenantName = names[1];
|
||||||
|
this.userName = names[2];
|
||||||
|
}
|
||||||
|
} else if (tenantIndex < 0) {
|
||||||
|
// 适用于short jdbcUrl,且username中不含租户名(主要是公有云场景,此场景下不计算分区)
|
||||||
|
this.userName = fullUserName;
|
||||||
|
this.clusterName = EMPTY;
|
||||||
|
this.tenantName = EMPTY;
|
||||||
|
} else {
|
||||||
|
// 适用于short jdbcUrl,且username中含租户名
|
||||||
|
this.userName = fullUserName.substring(0, tenantIndex);
|
||||||
|
if (clusterIndex < 0) {
|
||||||
|
this.clusterName = EMPTY;
|
||||||
|
this.tenantName = fullUserName.substring(tenantIndex + 1);
|
||||||
|
} else {
|
||||||
|
this.clusterName = fullUserName.substring(clusterIndex + 1);
|
||||||
|
this.tenantName = fullUserName.substring(tenantIndex + 1, clusterIndex);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String toString() {
|
||||||
|
return "ServerConnectInfo{" +
|
||||||
|
"clusterName='" + clusterName + '\'' +
|
||||||
|
", tenantName='" + tenantName + '\'' +
|
||||||
|
", userName='" + userName + '\'' +
|
||||||
|
", password='" + password + '\'' +
|
||||||
|
", databaseName='" + databaseName + '\'' +
|
||||||
|
", ipPort='" + ipPort + '\'' +
|
||||||
|
", jdbcUrl='" + jdbcUrl + '\'' +
|
||||||
|
", publicCloud=" + publicCloud +
|
||||||
|
", rpcPort=" + rpcPort +
|
||||||
|
'}';
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getFullUserName() {
|
||||||
|
StringBuilder builder = new StringBuilder();
|
||||||
|
builder.append(userName);
|
||||||
|
if (publicCloud || (rpcPort != 0 && EMPTY.equals(clusterName))) {
|
||||||
|
return builder.toString();
|
||||||
|
}
|
||||||
|
if (!EMPTY.equals(tenantName)) {
|
||||||
|
builder.append("@").append(tenantName);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!EMPTY.equals(clusterName)) {
|
||||||
|
builder.append("#").append(clusterName);
|
||||||
|
}
|
||||||
|
if (EMPTY.equals(this.clusterName) && EMPTY.equals(this.tenantName)) {
|
||||||
|
return this.userName;
|
||||||
|
}
|
||||||
|
return builder.toString();
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setRpcPort(int rpcPort) {
|
||||||
|
this.rpcPort = rpcPort;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setSysUser(String sysUser) {
|
||||||
|
this.sysUser = sysUser;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setSysPass(String sysPass) {
|
||||||
|
this.sysPass = sysPass;
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,41 @@
|
|||||||
|
package com.alibaba.datax.plugin.reader.obhbasereader.task;
|
||||||
|
|
||||||
|
import com.alibaba.datax.common.element.Column;
|
||||||
|
import com.alibaba.datax.common.element.Record;
|
||||||
|
import com.alibaba.datax.common.util.Configuration;
|
||||||
|
import com.alibaba.datax.plugin.reader.obhbasereader.Constant;
|
||||||
|
import com.alibaba.datax.plugin.reader.obhbasereader.HbaseColumnCell;
|
||||||
|
import com.alibaba.datax.plugin.reader.obhbasereader.Key;
|
||||||
|
import com.alibaba.datax.plugin.reader.obhbasereader.enums.ModeType;
|
||||||
|
import com.alibaba.datax.plugin.reader.obhbasereader.util.ObHbaseReaderUtil;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.HashMap;
|
||||||
|
import java.util.Map;
|
||||||
|
|
||||||
|
public abstract class AbstractHbaseTask {
|
||||||
|
protected String encoding;
|
||||||
|
protected String timezone = null;
|
||||||
|
protected Map<String, HbaseColumnCell> hbaseColumnCellMap;
|
||||||
|
// 常量字段
|
||||||
|
protected Map<String, Column> constantMap;
|
||||||
|
protected ModeType modeType;
|
||||||
|
|
||||||
|
public AbstractHbaseTask() {
|
||||||
|
}
|
||||||
|
|
||||||
|
public AbstractHbaseTask(Configuration configuration) {
|
||||||
|
this.timezone = configuration.getString(Key.TIMEZONE, Constant.DEFAULT_TIMEZONE);
|
||||||
|
this.encoding = configuration.getString(Key.ENCODING, Constant.DEFAULT_ENCODING);
|
||||||
|
String mode = configuration.getString(Key.MODE, "Normal");
|
||||||
|
this.modeType = ModeType.getByTypeName(mode);
|
||||||
|
this.constantMap = new HashMap<>();
|
||||||
|
this.hbaseColumnCellMap = ObHbaseReaderUtil.parseColumn(configuration.getList(Key.COLUMN, Map.class), constantMap, encoding, timezone);
|
||||||
|
}
|
||||||
|
|
||||||
|
public abstract void prepare() throws Exception;
|
||||||
|
|
||||||
|
public abstract boolean fetchLine(Record record) throws Exception;
|
||||||
|
|
||||||
|
public abstract void close() throws IOException;
|
||||||
|
}
|
@ -0,0 +1,99 @@
|
|||||||
|
package com.alibaba.datax.plugin.reader.obhbasereader.task;
|
||||||
|
|
||||||
|
import com.alibaba.datax.common.util.Configuration;
|
||||||
|
import com.alibaba.datax.plugin.reader.obhbasereader.Constant;
|
||||||
|
import com.alibaba.datax.plugin.reader.obhbasereader.HTableManager;
|
||||||
|
import com.alibaba.datax.plugin.reader.obhbasereader.HbaseColumnCell;
|
||||||
|
import com.alibaba.datax.plugin.reader.obhbasereader.Key;
|
||||||
|
import com.alibaba.datax.plugin.reader.obhbasereader.util.ObHbaseReaderUtil;
|
||||||
|
|
||||||
|
import com.alipay.oceanbase.hbase.OHTable;
|
||||||
|
import org.apache.hadoop.hbase.client.Result;
|
||||||
|
import org.apache.hadoop.hbase.client.ResultScanner;
|
||||||
|
import org.apache.hadoop.hbase.client.Scan;
|
||||||
|
import org.apache.hadoop.hbase.util.Bytes;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
|
||||||
|
public abstract class AbstractScanReader extends AbstractHbaseTask {
|
||||||
|
private static Logger LOG = LoggerFactory.getLogger(AbstractScanReader.class);
|
||||||
|
protected OHTable ohtable;
|
||||||
|
protected Result lastResult = null;
|
||||||
|
protected Scan scan;
|
||||||
|
protected ResultScanner resultScanner;
|
||||||
|
protected int maxVersion;
|
||||||
|
private int scanCache;
|
||||||
|
private byte[] startKey = null;
|
||||||
|
private byte[] endKey = null;
|
||||||
|
|
||||||
|
public AbstractScanReader(Configuration configuration) {
|
||||||
|
super(configuration);
|
||||||
|
this.maxVersion = configuration.getInt(Key.MAX_VERSION, 1);
|
||||||
|
this.scanCache = configuration.getInt(Key.SCAN_CACHE, Constant.DEFAULT_SCAN_CACHE);
|
||||||
|
this.ohtable = ObHbaseReaderUtil.initOHtable(configuration);
|
||||||
|
this.startKey = ObHbaseReaderUtil.convertInnerStartRowkey(configuration);
|
||||||
|
this.endKey = ObHbaseReaderUtil.convertInnerEndRowkey(configuration);
|
||||||
|
LOG.info("The task set startRowkey=[{}], endRowkey=[{}].", Bytes.toStringBinary(this.startKey), Bytes.toStringBinary(this.endKey));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void prepare() throws Exception {
|
||||||
|
this.scan = new Scan();
|
||||||
|
this.scan.setSmall(false);
|
||||||
|
this.scan.setCacheBlocks(false);
|
||||||
|
this.scan.setStartRow(startKey);
|
||||||
|
this.scan.setStopRow(endKey);
|
||||||
|
LOG.info("The task set startRowkey=[{}], endRowkey=[{}].", Bytes.toStringBinary(this.startKey), Bytes.toStringBinary(this.endKey));
|
||||||
|
this.scan.setCaching(this.scanCache);
|
||||||
|
if (this.maxVersion == -1 || this.maxVersion == Integer.MAX_VALUE) {
|
||||||
|
this.scan.setMaxVersions();
|
||||||
|
} else {
|
||||||
|
this.scan.setMaxVersions(this.maxVersion);
|
||||||
|
}
|
||||||
|
initScanColumns();
|
||||||
|
this.resultScanner = this.ohtable.getScanner(this.scan);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void close() throws IOException {
|
||||||
|
if (this.resultScanner != null) {
|
||||||
|
this.resultScanner.close();
|
||||||
|
}
|
||||||
|
HTableManager.closeHTable(this.ohtable);
|
||||||
|
}
|
||||||
|
|
||||||
|
protected void initScanColumns() {
|
||||||
|
boolean isConstant;
|
||||||
|
boolean isRowkeyColumn;
|
||||||
|
for (HbaseColumnCell cell : this.hbaseColumnCellMap.values()) {
|
||||||
|
isConstant = cell.isConstant();
|
||||||
|
isRowkeyColumn = ObHbaseReaderUtil.isRowkeyColumn(cell.getColumnName());
|
||||||
|
if (!isConstant && !isRowkeyColumn) {
|
||||||
|
LOG.info("columnFamily: " + new String(cell.getCf()) + ", qualifier: " + new String(cell.getQualifier()));
|
||||||
|
this.scan.addColumn(cell.getCf(), cell.getQualifier());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
protected Result getNextHbaseRow() throws Exception {
|
||||||
|
Result result = null;
|
||||||
|
try {
|
||||||
|
result = resultScanner.next();
|
||||||
|
} catch (Exception e) {
|
||||||
|
LOG.error("failed to get result", e);
|
||||||
|
if (lastResult != null) {
|
||||||
|
scan.setStartRow(lastResult.getRow());
|
||||||
|
}
|
||||||
|
resultScanner = this.ohtable.getScanner(scan);
|
||||||
|
result = resultScanner.next();
|
||||||
|
if (lastResult != null && Bytes.equals(lastResult.getRow(), result.getRow())) {
|
||||||
|
result = resultScanner.next();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
lastResult = result;
|
||||||
|
// may be null
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,257 @@
|
|||||||
|
package com.alibaba.datax.plugin.reader.obhbasereader.task;
|
||||||
|
|
||||||
|
import static com.alibaba.datax.plugin.reader.obhbasereader.Constant.OB_READ_HINT;
|
||||||
|
|
||||||
|
import com.alibaba.datax.common.element.Column;
|
||||||
|
import com.alibaba.datax.common.element.Record;
|
||||||
|
import com.alibaba.datax.common.exception.DataXException;
|
||||||
|
import com.alibaba.datax.common.util.Configuration;
|
||||||
|
import com.alibaba.datax.plugin.rdbms.util.DBUtil;
|
||||||
|
import com.alibaba.datax.plugin.rdbms.util.DataBaseType;
|
||||||
|
import com.alibaba.datax.plugin.reader.obhbasereader.Constant;
|
||||||
|
import com.alibaba.datax.plugin.reader.obhbasereader.HbaseColumnCell;
|
||||||
|
import com.alibaba.datax.plugin.reader.obhbasereader.HbaseReaderErrorCode;
|
||||||
|
import com.alibaba.datax.plugin.reader.obhbasereader.Key;
|
||||||
|
import com.alibaba.datax.plugin.reader.obhbasereader.enums.FetchVersion;
|
||||||
|
import com.alibaba.datax.plugin.reader.obhbasereader.util.ObHbaseReaderUtil;
|
||||||
|
import com.alibaba.datax.plugin.reader.oceanbasev10reader.util.ObReaderUtils;
|
||||||
|
|
||||||
|
import com.google.common.collect.Lists;
|
||||||
|
import com.google.common.collect.Maps;
|
||||||
|
import org.apache.commons.lang3.StringUtils;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.sql.Connection;
|
||||||
|
import java.sql.PreparedStatement;
|
||||||
|
import java.sql.ResultSet;
|
||||||
|
import java.sql.SQLException;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.Arrays;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Map;
|
||||||
|
import java.util.Set;
|
||||||
|
import java.util.function.Predicate;
|
||||||
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
|
public class SQLNormalModeReader extends AbstractHbaseTask {
|
||||||
|
private final static String QUERY_SQL_TEMPLATE = "select %s K, Q, T, V, hex(K) as `hex` from %s %s";
|
||||||
|
private static Logger LOG = LoggerFactory.getLogger(SQLNormalModeReader.class);
|
||||||
|
private final Map<String, byte[]> columnMap;
|
||||||
|
private final Map<String, Long> versionMap;
|
||||||
|
private final FetchVersion fetchVersion;
|
||||||
|
private Set<String> columnNames;
|
||||||
|
private boolean noMoreData = false;
|
||||||
|
private String querySQL = null;
|
||||||
|
private Connection conn = null;
|
||||||
|
private PreparedStatement stmt = null;
|
||||||
|
private ResultSet rs = null;
|
||||||
|
private String jdbcUrl = null;
|
||||||
|
private String columnFamily = null;
|
||||||
|
private String username = null;
|
||||||
|
private String password = null;
|
||||||
|
private int fetchSize = com.alibaba.datax.plugin.reader.obhbasereader.Constant.DEFAULT_FETCH_SIZE;
|
||||||
|
private long readBatchSize = com.alibaba.datax.plugin.reader.obhbasereader.Constant.DEFAULT_READ_BATCH_SIZE;
|
||||||
|
private Configuration configuration;
|
||||||
|
private boolean hasRange = false;
|
||||||
|
private String[] savepoint = new String[3];
|
||||||
|
// only used by unit test
|
||||||
|
protected boolean reuseConn = false;
|
||||||
|
|
||||||
|
public SQLNormalModeReader(Configuration configuration) {
|
||||||
|
this.configuration = configuration;
|
||||||
|
this.hbaseColumnCellMap = ObHbaseReaderUtil.parseColumn(configuration.getList(Key.COLUMN, Map.class));
|
||||||
|
if (hbaseColumnCellMap.size() == 0) {
|
||||||
|
LOG.error("no column cells specified.");
|
||||||
|
throw new RuntimeException("no column cells specified");
|
||||||
|
}
|
||||||
|
columnFamily = ObHbaseReaderUtil.parseColumnFamily(hbaseColumnCellMap.values());
|
||||||
|
this.columnNames =
|
||||||
|
hbaseColumnCellMap.keySet().stream().map(e -> ObHbaseReaderUtil.isRowkeyColumn(e) ? Constant.ROWKEY_FLAG : e.substring((columnFamily + ":").length())).collect(Collectors.toSet());
|
||||||
|
|
||||||
|
String partInfo = "";
|
||||||
|
String partName = configuration.getString(Key.PARTITION_NAME, null);
|
||||||
|
if (partName != null) {
|
||||||
|
partInfo = "partition(" + partName + ")";
|
||||||
|
}
|
||||||
|
|
||||||
|
String tableName = configuration.getString(Key.TABLE, null);
|
||||||
|
String hint = configuration.getString(Key.READER_HINT, OB_READ_HINT);
|
||||||
|
this.hasRange = !StringUtils.isEmpty(configuration.getString(Key.RANGE, null));
|
||||||
|
this.querySQL = String.format(QUERY_SQL_TEMPLATE, hint, tableName + "$" + columnFamily, partInfo);
|
||||||
|
if (hasRange) {
|
||||||
|
this.querySQL = querySQL + " where (" + configuration.getString(Key.RANGE) + ")";
|
||||||
|
}
|
||||||
|
this.jdbcUrl = configuration.getString(Key.JDBC_URL, null);
|
||||||
|
this.username = configuration.getString(Key.USERNAME, null);
|
||||||
|
this.password = configuration.getString(Key.PASSWORD, null);
|
||||||
|
this.columnMap = Maps.newHashMap();
|
||||||
|
this.versionMap = Maps.newHashMap();
|
||||||
|
this.fetchVersion = FetchVersion.getByDesc(configuration.getString("version", FetchVersion.LATEST.name()));
|
||||||
|
this.timezone = configuration.getString(Key.TIMEZONE, "UTC");
|
||||||
|
this.encoding = configuration.getString(Key.ENCODING, Constant.DEFAULT_ENCODING);
|
||||||
|
this.fetchSize = configuration.getInt(Key.FETCH_SIZE, com.alibaba.datax.plugin.reader.obhbasereader.Constant.DEFAULT_FETCH_SIZE);
|
||||||
|
this.readBatchSize = configuration.getLong(Key.READ_BATCH_SIZE, com.alibaba.datax.plugin.reader.obhbasereader.Constant.DEFAULT_READ_BATCH_SIZE);
|
||||||
|
LOG.info("read from jdbcUrl {} with fetchSize {}, readBatchSize {}", jdbcUrl, fetchSize, readBatchSize);
|
||||||
|
}
|
||||||
|
|
||||||
|
private boolean notFinished(String currentKey) throws SQLException {
|
||||||
|
boolean updateSuccess = updateResultSet();
|
||||||
|
if (updateSuccess) {
|
||||||
|
String newKey = rs.getString("K");
|
||||||
|
return newKey.equals(currentKey);
|
||||||
|
} else {
|
||||||
|
noMoreData = true;
|
||||||
|
Arrays.fill(savepoint, null);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private boolean updateResultSet() throws SQLException {
|
||||||
|
if (rs != null && rs.next()) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
if (savepoint[0] != null) {
|
||||||
|
int retryLimit = 10;
|
||||||
|
int retryCount = 0;
|
||||||
|
String tempQuery = querySQL + (hasRange ? " and " : " where ") + "(K,Q,T) > (unhex(?),?,?) order by K,Q,T limit " + readBatchSize;
|
||||||
|
while (retryCount < retryLimit) {
|
||||||
|
retryCount++;
|
||||||
|
try {
|
||||||
|
resetConnection();
|
||||||
|
DBUtil.closeDBResources(rs, stmt, null);
|
||||||
|
stmt = conn.prepareStatement(tempQuery, ResultSet.TYPE_FORWARD_ONLY, ResultSet.CONCUR_READ_ONLY);
|
||||||
|
stmt.setFetchSize(fetchSize);
|
||||||
|
for (int i = 0; i < savepoint.length; i++) {
|
||||||
|
stmt.setObject(i + 1, savepoint[i]);
|
||||||
|
}
|
||||||
|
rs = stmt.executeQuery();
|
||||||
|
if (rs.next()) {
|
||||||
|
LOG.info("execute sql: {}, savepoint:[{}]", tempQuery, Arrays.stream(savepoint).map(e -> "'" + e + "'").collect(Collectors.joining(",")));
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
// All data in this task are read
|
||||||
|
break;
|
||||||
|
} catch (Exception ex) {
|
||||||
|
LOG.error("failed to query sql, will retry {} times", retryCount, ex);
|
||||||
|
DBUtil.closeDBResources(rs, stmt, conn);
|
||||||
|
if (retryCount > retryLimit) {
|
||||||
|
LOG.error("Sql: [{}] executed failed, savepoint:[{}], reason: {}", tempQuery, Arrays.stream(savepoint).map(e -> "'" + e + "'").collect(Collectors.joining(",")),
|
||||||
|
ex.getMessage());
|
||||||
|
throw new RuntimeException(ex);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void prepare() {
|
||||||
|
int retryLimit = 10;
|
||||||
|
int retryCount = 0;
|
||||||
|
while (true) {
|
||||||
|
retryCount++;
|
||||||
|
try {
|
||||||
|
resetConnection();
|
||||||
|
String tempQuery = querySQL + " order by K,Q,T limit " + readBatchSize;
|
||||||
|
stmt = conn.prepareStatement(tempQuery, ResultSet.TYPE_FORWARD_ONLY, ResultSet.CONCUR_READ_ONLY);
|
||||||
|
stmt.setFetchSize(fetchSize);
|
||||||
|
LOG.info("execute sql : {}", tempQuery);
|
||||||
|
rs = stmt.executeQuery();
|
||||||
|
if (!rs.next()) {
|
||||||
|
noMoreData = true;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
} catch (Exception e) {
|
||||||
|
LOG.error("failed to query sql, will retry {} times", retryCount, e);
|
||||||
|
DBUtil.closeDBResources(rs, stmt, conn);
|
||||||
|
if (retryCount > retryLimit) {
|
||||||
|
LOG.error("Sql: [{}] executed failed, reason: {}", querySQL, e.getMessage());
|
||||||
|
throw new RuntimeException(e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean fetchLine(Record record) throws Exception {
|
||||||
|
try {
|
||||||
|
if (noMoreData) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
String currentKey = rs.getString("K");
|
||||||
|
savepoint[0] = rs.getString("hex");
|
||||||
|
columnMap.put(Constant.ROWKEY_FLAG, currentKey.getBytes());
|
||||||
|
do {
|
||||||
|
String columnName = rs.getString("Q");
|
||||||
|
savepoint[1] = columnName;
|
||||||
|
if (!this.columnNames.contains(columnName)) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
Long version = rs.getLong("T");
|
||||||
|
savepoint[2] = String.valueOf(version);
|
||||||
|
byte[] value = rs.getBytes("V");
|
||||||
|
Predicate<Long> predicate;
|
||||||
|
switch (this.fetchVersion) {
|
||||||
|
case OLDEST:
|
||||||
|
predicate = v -> v.compareTo(versionMap.getOrDefault(columnName, Long.MIN_VALUE)) > 0;
|
||||||
|
break;
|
||||||
|
case LATEST:
|
||||||
|
predicate = v -> v.compareTo(versionMap.getOrDefault(columnName, Long.MAX_VALUE)) < 0;
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
throw DataXException.asDataXException(HbaseReaderErrorCode.ILLEGAL_VALUE, "Not support version: " + this.fetchVersion);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (predicate.test(version)) {
|
||||||
|
versionMap.put(columnName, version);
|
||||||
|
columnMap.put(columnName, value);
|
||||||
|
}
|
||||||
|
} while (notFinished(currentKey));
|
||||||
|
|
||||||
|
for (HbaseColumnCell cell : this.hbaseColumnCellMap.values()) {
|
||||||
|
Column column = null;
|
||||||
|
if (cell.isConstant()) {
|
||||||
|
// 对常量字段的处理
|
||||||
|
column = this.constantMap.get(cell.getColumnName());
|
||||||
|
} else {
|
||||||
|
String columnName = ObHbaseReaderUtil.isRowkeyColumn(cell.getColumnName()) ? Constant.ROWKEY_FLAG : cell.getColumnName().substring((columnFamily + ":").length());
|
||||||
|
byte[] value = null;
|
||||||
|
if (!columnMap.containsKey(columnName)) {
|
||||||
|
LOG.debug("{} is not contained in the record with K value={}. consider this record as null record.", columnName, currentKey);
|
||||||
|
} else {
|
||||||
|
value = columnMap.get(columnName);
|
||||||
|
}
|
||||||
|
column = ObHbaseReaderUtil.buildColumn(value, cell.getColumnType(), encoding, cell.getDateformat(), timezone);
|
||||||
|
}
|
||||||
|
record.addColumn(column);
|
||||||
|
}
|
||||||
|
} finally {
|
||||||
|
this.columnMap.clear();
|
||||||
|
this.versionMap.clear();
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void close() throws IOException {
|
||||||
|
DBUtil.closeDBResources(rs, stmt, conn);
|
||||||
|
}
|
||||||
|
|
||||||
|
private void resetConnection() throws SQLException {
|
||||||
|
if (reuseConn && conn != null && !conn.isClosed()) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
// set ob_query_timeout and ob_trx_timeout to a large time in case timeout
|
||||||
|
int queryTimeoutSeconds = 60 * 60 * 48;
|
||||||
|
String setQueryTimeout = "set ob_query_timeout=" + (queryTimeoutSeconds * 1000 * 1000L);
|
||||||
|
String setTrxTimeout = "set ob_trx_timeout=" + ((queryTimeoutSeconds + 5) * 1000 * 1000L);
|
||||||
|
List<String> newSessionConfig = Lists.newArrayList(setQueryTimeout, setTrxTimeout);
|
||||||
|
List<String> sessionConfig = configuration.getList(Key.SESSION, new ArrayList<>(), String.class);
|
||||||
|
newSessionConfig.addAll(sessionConfig);
|
||||||
|
configuration.set(Key.SESSION, newSessionConfig);
|
||||||
|
conn = DBUtil.getConnection(DataBaseType.MySql, jdbcUrl, this.username, this.password);
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,98 @@
|
|||||||
|
package com.alibaba.datax.plugin.reader.obhbasereader.task;
|
||||||
|
|
||||||
|
import com.alibaba.datax.common.element.Column;
|
||||||
|
import com.alibaba.datax.common.element.LongColumn;
|
||||||
|
import com.alibaba.datax.common.element.Record;
|
||||||
|
import com.alibaba.datax.common.exception.DataXException;
|
||||||
|
import com.alibaba.datax.common.util.Configuration;
|
||||||
|
import com.alibaba.datax.plugin.reader.obhbasereader.Constant;
|
||||||
|
import com.alibaba.datax.plugin.reader.obhbasereader.HbaseColumnCell;
|
||||||
|
import com.alibaba.datax.plugin.reader.obhbasereader.HbaseReaderErrorCode;
|
||||||
|
import com.alibaba.datax.plugin.reader.obhbasereader.enums.ColumnType;
|
||||||
|
import com.alibaba.datax.plugin.reader.obhbasereader.util.ObHbaseReaderUtil;
|
||||||
|
|
||||||
|
import org.apache.hadoop.hbase.KeyValue;
|
||||||
|
import org.apache.hadoop.hbase.client.Result;
|
||||||
|
import org.apache.hadoop.hbase.util.Bytes;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
import java.io.UnsupportedEncodingException;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
public class ScanMultiVersionReader extends AbstractScanReader {
|
||||||
|
private final static Logger LOG = LoggerFactory.getLogger(ScanMultiVersionReader.class);
|
||||||
|
private static byte[] COLON_BYTE;
|
||||||
|
private List<KeyValue> kvList = new ArrayList<>();
|
||||||
|
private int currentReadPosition = 0;
|
||||||
|
|
||||||
|
// rowKey类型
|
||||||
|
private ColumnType rowkeyReadoutType = null;
|
||||||
|
|
||||||
|
public ScanMultiVersionReader(Configuration configuration) {
|
||||||
|
super(configuration);
|
||||||
|
HbaseColumnCell rowKey = hbaseColumnCellMap.get(Constant.ROWKEY_FLAG);
|
||||||
|
if (rowKey != null && rowKey.getColumnType() != null) {
|
||||||
|
this.rowkeyReadoutType = rowKey.getColumnType();
|
||||||
|
} else {
|
||||||
|
this.rowkeyReadoutType = ColumnType.BYTES;
|
||||||
|
}
|
||||||
|
try {
|
||||||
|
ScanMultiVersionReader.COLON_BYTE = ":".getBytes(encoding);
|
||||||
|
} catch (UnsupportedEncodingException e) {
|
||||||
|
throw DataXException.asDataXException(HbaseReaderErrorCode.PREPAR_READ_ERROR, "Failed to get binary of column family and column name colon separator inside the system.", e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void convertKVToLine(KeyValue keyValue, Record record) throws Exception {
|
||||||
|
byte[] rawRowkey = keyValue.getRow();
|
||||||
|
long timestamp = keyValue.getTimestamp();
|
||||||
|
byte[] cfAndQualifierName = Bytes.add(keyValue.getFamily(), ScanMultiVersionReader.COLON_BYTE, keyValue.getQualifier());
|
||||||
|
|
||||||
|
record.addColumn(convertBytesToAssignType(this.rowkeyReadoutType, rawRowkey));
|
||||||
|
|
||||||
|
record.addColumn(convertBytesToAssignType(ColumnType.STRING, cfAndQualifierName));
|
||||||
|
|
||||||
|
// 直接忽略了用户配置的 timestamp 的类型
|
||||||
|
record.addColumn(new LongColumn(timestamp));
|
||||||
|
|
||||||
|
String cfAndQualifierNameStr = Bytes.toString(cfAndQualifierName);
|
||||||
|
HbaseColumnCell currentCell = hbaseColumnCellMap.get(cfAndQualifierNameStr);
|
||||||
|
ColumnType valueReadoutType = currentCell != null ? currentCell.getColumnType() : ColumnType.BYTES;
|
||||||
|
String dateFormat = currentCell != null ? currentCell.getDateformat() : null;
|
||||||
|
record.addColumn(convertBytesToAssignType(valueReadoutType, keyValue.getValue(), dateFormat));
|
||||||
|
}
|
||||||
|
|
||||||
|
private Column convertBytesToAssignType(ColumnType columnType, byte[] byteArray) throws Exception {
|
||||||
|
return convertBytesToAssignType(columnType, byteArray, null);
|
||||||
|
}
|
||||||
|
|
||||||
|
private Column convertBytesToAssignType(ColumnType columnType, byte[] byteArray, String dateFormat) throws Exception {
|
||||||
|
return ObHbaseReaderUtil.buildColumn(byteArray, columnType, encoding, dateFormat, timezone);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean fetchLine(Record record) throws Exception {
|
||||||
|
Result result;
|
||||||
|
if (this.kvList.size() == this.currentReadPosition) {
|
||||||
|
result = getNextHbaseRow();
|
||||||
|
if (result == null) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
this.kvList = result.list();
|
||||||
|
if (this.kvList == null) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
this.currentReadPosition = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
KeyValue keyValue = this.kvList.get(this.currentReadPosition);
|
||||||
|
convertKVToLine(keyValue, record);
|
||||||
|
} finally {
|
||||||
|
this.currentReadPosition++;
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,65 @@
|
|||||||
|
package com.alibaba.datax.plugin.reader.obhbasereader.task;
|
||||||
|
|
||||||
|
import com.alibaba.datax.common.element.Column;
|
||||||
|
import com.alibaba.datax.common.element.Record;
|
||||||
|
import com.alibaba.datax.common.element.StringColumn;
|
||||||
|
import com.alibaba.datax.common.util.Configuration;
|
||||||
|
import com.alibaba.datax.plugin.reader.obhbasereader.HbaseColumnCell;
|
||||||
|
import com.alibaba.datax.plugin.reader.obhbasereader.enums.ColumnType;
|
||||||
|
import com.alibaba.datax.plugin.reader.obhbasereader.util.ObHbaseReaderUtil;
|
||||||
|
|
||||||
|
import org.apache.hadoop.hbase.client.Result;
|
||||||
|
import org.apache.hadoop.hbase.util.Bytes;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
public class ScanNormalModeReader extends AbstractScanReader {
|
||||||
|
private static Logger LOG = LoggerFactory.getLogger(ScanNormalModeReader.class);
|
||||||
|
|
||||||
|
public ScanNormalModeReader(Configuration configuration) {
|
||||||
|
super(configuration);
|
||||||
|
this.maxVersion = 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean fetchLine(Record record) throws Exception {
|
||||||
|
Result result = getNextHbaseRow();
|
||||||
|
if (null == result) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
try {
|
||||||
|
byte[] hbaseColumnValue;
|
||||||
|
String columnName;
|
||||||
|
ColumnType columnType;
|
||||||
|
|
||||||
|
byte[] cf;
|
||||||
|
byte[] qualifier;
|
||||||
|
|
||||||
|
for (HbaseColumnCell cell : this.hbaseColumnCellMap.values()) {
|
||||||
|
columnType = cell.getColumnType();
|
||||||
|
Column column = null;
|
||||||
|
if (cell.isConstant()) {
|
||||||
|
// 对常量字段的处理
|
||||||
|
column = constantMap.get(cell.getColumnName());
|
||||||
|
} else {
|
||||||
|
// 根据列名称获取值
|
||||||
|
columnName = cell.getColumnName();
|
||||||
|
if (ObHbaseReaderUtil.isRowkeyColumn(columnName)) {
|
||||||
|
hbaseColumnValue = result.getRow();
|
||||||
|
} else {
|
||||||
|
cf = cell.getCf();
|
||||||
|
qualifier = cell.getQualifier();
|
||||||
|
hbaseColumnValue = result.getValue(cf, qualifier);
|
||||||
|
}
|
||||||
|
column = ObHbaseReaderUtil.buildColumn(hbaseColumnValue, columnType, super.encoding, cell.getDateformat(), timezone);
|
||||||
|
}
|
||||||
|
record.addColumn(column);
|
||||||
|
}
|
||||||
|
} catch (Exception e) {
|
||||||
|
// 注意,这里catch的异常,期望是byte数组转换失败的情况。而实际上,string的byte数组,转成整数类型是不容易报错的。但是转成double类型容易报错。
|
||||||
|
record.setColumn(0, new StringColumn(Bytes.toStringBinary(result.getRow())));
|
||||||
|
throw e;
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,154 @@
|
|||||||
|
package com.alibaba.datax.plugin.reader.obhbasereader.util;
|
||||||
|
|
||||||
|
import com.alibaba.datax.common.exception.DataXException;
|
||||||
|
import com.alibaba.datax.common.util.Configuration;
|
||||||
|
import com.alibaba.datax.plugin.reader.obhbasereader.HbaseReaderErrorCode;
|
||||||
|
import com.alibaba.datax.plugin.reader.obhbasereader.Key;
|
||||||
|
|
||||||
|
import com.google.common.collect.Lists;
|
||||||
|
import org.apache.commons.collections.CollectionUtils;
|
||||||
|
import org.apache.commons.lang3.StringUtils;
|
||||||
|
import org.apache.hadoop.hbase.HConstants;
|
||||||
|
import org.apache.hadoop.hbase.util.Bytes;
|
||||||
|
import org.apache.hadoop.hbase.util.Pair;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
public final class HbaseSplitUtil {
|
||||||
|
private final static Logger LOG = LoggerFactory.getLogger(HbaseSplitUtil.class);
|
||||||
|
|
||||||
|
public static List<Configuration> split(Configuration configuration) {
|
||||||
|
final List<Configuration> ranges = configuration.getListConfiguration(Key.RANGE);
|
||||||
|
if (CollectionUtils.isEmpty(ranges)) {
|
||||||
|
return Lists.newArrayList(configuration);
|
||||||
|
}
|
||||||
|
|
||||||
|
//TODO(yuez) 后续hbase api具备查询region的功能后,这里需要添加查询table region的逻辑,并且取table region和用户指定的range的交集
|
||||||
|
List<Configuration> sliceConfs = new ArrayList<>(ranges.size());
|
||||||
|
for (Configuration range : ranges) {
|
||||||
|
byte[] startRowKey = convertUserRowkey(range, true);
|
||||||
|
byte[] endRowKey = convertUserRowkey(range, false);
|
||||||
|
if (startRowKey.length != 0 && endRowKey.length != 0 && Bytes.compareTo(startRowKey, endRowKey) > 0) {
|
||||||
|
throw DataXException.asDataXException(HbaseReaderErrorCode.ILLEGAL_VALUE, "The startRowkey in obhbasereader must not be greater than the endRowkey.");
|
||||||
|
}
|
||||||
|
Configuration sliceConf = configuration.clone();
|
||||||
|
sliceConf.remove(Key.RANGE);
|
||||||
|
String startKeyStr = Bytes.toStringBinary(startRowKey);
|
||||||
|
String endRowKeyStr = Bytes.toStringBinary(endRowKey);
|
||||||
|
sliceConf.set(Key.START_ROWKEY, startKeyStr);
|
||||||
|
sliceConf.set(Key.END_ROWKEY, endRowKeyStr);
|
||||||
|
sliceConfs.add(sliceConf);
|
||||||
|
}
|
||||||
|
return sliceConfs;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static byte[] convertUserRowkey(Configuration configuration, boolean isStart) {
|
||||||
|
String keyName = isStart ? Key.START_ROWKEY : Key.END_ROWKEY;
|
||||||
|
String startRowkey = configuration.getString(keyName);
|
||||||
|
if (StringUtils.isBlank(startRowkey)) {
|
||||||
|
return HConstants.EMPTY_BYTE_ARRAY;
|
||||||
|
} else {
|
||||||
|
boolean isBinaryRowkey = configuration.getBool(Key.IS_BINARY_ROWKEY, false);
|
||||||
|
return stringToBytes(startRowkey, isBinaryRowkey);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private static byte[] stringToBytes(String rowkey, boolean isBinaryRowkey) {
|
||||||
|
if (isBinaryRowkey) {
|
||||||
|
return Bytes.toBytesBinary(rowkey);
|
||||||
|
} else {
|
||||||
|
return Bytes.toBytes(rowkey);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 后续hbase api具备查询region的功能后才用得到此方法
|
||||||
|
*
|
||||||
|
* @param config
|
||||||
|
* @param startRowkeyByte
|
||||||
|
* @param endRowkeyByte
|
||||||
|
* @param regionRanges
|
||||||
|
* @return
|
||||||
|
*/
|
||||||
|
private static List<Configuration> doSplit(Configuration config, byte[] startRowkeyByte, byte[] endRowkeyByte, Pair<byte[][], byte[][]> regionRanges) {
|
||||||
|
|
||||||
|
List<Configuration> configurations = new ArrayList<Configuration>();
|
||||||
|
|
||||||
|
for (int i = 0; i < regionRanges.getFirst().length; i++) {
|
||||||
|
|
||||||
|
byte[] regionStartKey = regionRanges.getFirst()[i];
|
||||||
|
byte[] regionEndKey = regionRanges.getSecond()[i];
|
||||||
|
|
||||||
|
// 当前的region为最后一个region
|
||||||
|
// 如果最后一个region的start Key大于用户指定的userEndKey,则最后一个region,应该不包含在内
|
||||||
|
// 注意如果用户指定userEndKey为"",则此判断应该不成立。userEndKey为""表示取得最大的region
|
||||||
|
if (Bytes.compareTo(regionEndKey, HConstants.EMPTY_BYTE_ARRAY) == 0 && (endRowkeyByte.length != 0 && (Bytes.compareTo(regionStartKey, endRowkeyByte) > 0))) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// 如果当前的region不是最后一个region,
|
||||||
|
// 用户配置的userStartKey大于等于region的endkey,则这个region不应该含在内
|
||||||
|
if ((Bytes.compareTo(regionEndKey, HConstants.EMPTY_BYTE_ARRAY) != 0) && (Bytes.compareTo(startRowkeyByte, regionEndKey) >= 0)) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// 如果用户配置的userEndKey小于等于 region的startkey,则这个region不应该含在内
|
||||||
|
// 注意如果用户指定的userEndKey为"",则次判断应该不成立。userEndKey为""表示取得最大的region
|
||||||
|
if (endRowkeyByte.length != 0 && (Bytes.compareTo(endRowkeyByte, regionStartKey) <= 0)) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
String thisStartKey = getStartKey(startRowkeyByte, regionStartKey);
|
||||||
|
String thisEndKey = getEndKey(endRowkeyByte, regionEndKey);
|
||||||
|
Configuration p = config.clone();
|
||||||
|
p.set(Key.START_ROWKEY, thisStartKey);
|
||||||
|
p.set(Key.END_ROWKEY, thisEndKey);
|
||||||
|
LOG.debug("startRowkey:[{}], endRowkey:[{}] .", thisStartKey, thisEndKey);
|
||||||
|
configurations.add(p);
|
||||||
|
}
|
||||||
|
|
||||||
|
return configurations;
|
||||||
|
}
|
||||||
|
|
||||||
|
private static String getEndKey(byte[] endRowkeyByte, byte[] regionEndKey) {
|
||||||
|
if (endRowkeyByte == null) { // 由于之前处理过,所以传入的userStartKey不可能为null
|
||||||
|
throw new IllegalArgumentException("userEndKey should not be null!");
|
||||||
|
}
|
||||||
|
|
||||||
|
byte[] tempEndRowkeyByte;
|
||||||
|
|
||||||
|
if (endRowkeyByte.length == 0) {
|
||||||
|
tempEndRowkeyByte = regionEndKey;
|
||||||
|
} else if (Bytes.compareTo(regionEndKey, HConstants.EMPTY_BYTE_ARRAY) == 0) {
|
||||||
|
// 为最后一个region
|
||||||
|
tempEndRowkeyByte = endRowkeyByte;
|
||||||
|
} else {
|
||||||
|
if (Bytes.compareTo(endRowkeyByte, regionEndKey) > 0) {
|
||||||
|
tempEndRowkeyByte = regionEndKey;
|
||||||
|
} else {
|
||||||
|
tempEndRowkeyByte = endRowkeyByte;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return Bytes.toStringBinary(tempEndRowkeyByte);
|
||||||
|
}
|
||||||
|
|
||||||
|
private static String getStartKey(byte[] startRowkeyByte, byte[] regionStarKey) {
|
||||||
|
if (startRowkeyByte == null) { // 由于之前处理过,所以传入的userStartKey不可能为null
|
||||||
|
throw new IllegalArgumentException("userStartKey should not be null!");
|
||||||
|
}
|
||||||
|
|
||||||
|
byte[] tempStartRowkeyByte;
|
||||||
|
|
||||||
|
if (Bytes.compareTo(startRowkeyByte, regionStarKey) < 0) {
|
||||||
|
tempStartRowkeyByte = regionStarKey;
|
||||||
|
} else {
|
||||||
|
tempStartRowkeyByte = startRowkeyByte;
|
||||||
|
}
|
||||||
|
|
||||||
|
return Bytes.toStringBinary(tempStartRowkeyByte);
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,293 @@
|
|||||||
|
package com.alibaba.datax.plugin.reader.obhbasereader.util;
|
||||||
|
|
||||||
|
import static com.alibaba.datax.plugin.reader.obhbasereader.enums.ModeType.MultiVersionFixedColumn;
|
||||||
|
import static com.alipay.oceanbase.hbase.constants.OHConstants.HBASE_OCEANBASE_DATABASE;
|
||||||
|
import static com.alipay.oceanbase.hbase.constants.OHConstants.HBASE_OCEANBASE_FULL_USER_NAME;
|
||||||
|
import static com.alipay.oceanbase.hbase.constants.OHConstants.HBASE_OCEANBASE_ODP_ADDR;
|
||||||
|
import static com.alipay.oceanbase.hbase.constants.OHConstants.HBASE_OCEANBASE_ODP_MODE;
|
||||||
|
import static com.alipay.oceanbase.hbase.constants.OHConstants.HBASE_OCEANBASE_ODP_PORT;
|
||||||
|
import static com.alipay.oceanbase.hbase.constants.OHConstants.HBASE_OCEANBASE_PARAM_URL;
|
||||||
|
import static com.alipay.oceanbase.hbase.constants.OHConstants.HBASE_OCEANBASE_PASSWORD;
|
||||||
|
import static com.alipay.oceanbase.hbase.constants.OHConstants.HBASE_OCEANBASE_SYS_PASSWORD;
|
||||||
|
import static com.alipay.oceanbase.hbase.constants.OHConstants.HBASE_OCEANBASE_SYS_USER_NAME;
|
||||||
|
|
||||||
|
import com.alibaba.datax.common.element.BoolColumn;
|
||||||
|
import com.alibaba.datax.common.element.BytesColumn;
|
||||||
|
import com.alibaba.datax.common.element.Column;
|
||||||
|
import com.alibaba.datax.common.element.DateColumn;
|
||||||
|
import com.alibaba.datax.common.element.DoubleColumn;
|
||||||
|
import com.alibaba.datax.common.element.LongColumn;
|
||||||
|
import com.alibaba.datax.common.element.StringColumn;
|
||||||
|
import com.alibaba.datax.common.exception.DataXException;
|
||||||
|
import com.alibaba.datax.common.util.Configuration;
|
||||||
|
import com.alibaba.datax.plugin.reader.obhbasereader.Constant;
|
||||||
|
import com.alibaba.datax.plugin.reader.obhbasereader.HTableManager;
|
||||||
|
import com.alibaba.datax.plugin.reader.obhbasereader.HbaseColumnCell;
|
||||||
|
import com.alibaba.datax.plugin.reader.obhbasereader.HbaseReaderErrorCode;
|
||||||
|
import com.alibaba.datax.plugin.reader.obhbasereader.Key;
|
||||||
|
import com.alibaba.datax.plugin.reader.obhbasereader.enums.ColumnType;
|
||||||
|
import com.alibaba.datax.plugin.reader.obhbasereader.enums.ModeType;
|
||||||
|
import com.alibaba.fastjson.JSON;
|
||||||
|
import com.alibaba.fastjson.TypeReference;
|
||||||
|
|
||||||
|
import com.alipay.oceanbase.hbase.OHTable;
|
||||||
|
import org.apache.commons.collections.MapUtils;
|
||||||
|
import org.apache.commons.lang3.StringUtils;
|
||||||
|
import org.apache.commons.lang3.Validate;
|
||||||
|
import org.apache.commons.lang3.time.DateUtils;
|
||||||
|
import org.apache.hadoop.hbase.HConstants;
|
||||||
|
import org.apache.hadoop.hbase.util.Bytes;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
import java.nio.charset.Charset;
|
||||||
|
import java.text.SimpleDateFormat;
|
||||||
|
import java.util.Collection;
|
||||||
|
import java.util.Date;
|
||||||
|
import java.util.LinkedHashMap;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Map;
|
||||||
|
import java.util.regex.Matcher;
|
||||||
|
import java.util.regex.Pattern;
|
||||||
|
|
||||||
|
public final class ObHbaseReaderUtil {
|
||||||
|
private static Logger LOG = LoggerFactory.getLogger(ObHbaseReaderUtil.class);
|
||||||
|
|
||||||
|
public static void doPretreatment(Configuration originalConfig) {
|
||||||
|
String mode = ObHbaseReaderUtil.dealMode(originalConfig);
|
||||||
|
originalConfig.set(Key.MODE, mode);
|
||||||
|
|
||||||
|
String encoding = originalConfig.getString(Key.ENCODING, Constant.DEFAULT_ENCODING);
|
||||||
|
if (!Charset.isSupported(encoding)) {
|
||||||
|
throw DataXException.asDataXException(HbaseReaderErrorCode.ILLEGAL_VALUE, String.format("The encoding you configured is not supported by hbasereader:[%s]", encoding));
|
||||||
|
}
|
||||||
|
originalConfig.set(Key.ENCODING, encoding);
|
||||||
|
|
||||||
|
// 此处增强一个检查:isBinaryRowkey 配置不能出现在与 hbaseConfig 等配置平级地位
|
||||||
|
Boolean isBinaryRowkey = originalConfig.getBool(Key.IS_BINARY_ROWKEY);
|
||||||
|
if (isBinaryRowkey != null) {
|
||||||
|
throw DataXException.asDataXException(HbaseReaderErrorCode.ILLEGAL_VALUE, String.format("%s cannot be configured here. It should be configured in range.", Key.IS_BINARY_ROWKEY));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 对模式以及与模式进行配对的配置进行检查
|
||||||
|
*/
|
||||||
|
private static String dealMode(Configuration originalConfig) {
|
||||||
|
String mode = originalConfig.getString(Key.MODE);
|
||||||
|
ModeType modeType = ModeType.getByTypeName(mode);
|
||||||
|
List<Map> column = originalConfig.getList(Key.COLUMN, Map.class);
|
||||||
|
if (column == null || column.isEmpty()) {
|
||||||
|
throw DataXException.asDataXException(HbaseReaderErrorCode.REQUIRED_VALUE,
|
||||||
|
"You have configured the normal mode to read the data in HBase, so you must configure the column in the form of:column:[{\"name\": \"cf0:column0\",\"type\": \"string\"},"
|
||||||
|
+ "{\"name\": \"cf1:column1\",\"type\": \"long\"}]");
|
||||||
|
}
|
||||||
|
|
||||||
|
// 通过 parse 进行 column 格式的进一步检查
|
||||||
|
ObHbaseReaderUtil.parseColumn(column);
|
||||||
|
if (MultiVersionFixedColumn.equals(modeType)) {
|
||||||
|
Integer maxVersion = originalConfig.getInt(Key.MAX_VERSION);
|
||||||
|
Validate.notNull(maxVersion, String.format("You have configured thw mode %s to read the data in HBase, so you must configure: maxVersion", mode));
|
||||||
|
|
||||||
|
boolean isMaxVersionValid = maxVersion == -1 || maxVersion > 1;
|
||||||
|
Validate.isTrue(isMaxVersionValid, String.format(
|
||||||
|
"You have configured the mode %s to read the data in HBase, but the configured maxVersion value is wrong. maxVersion specifies that: - 1 is to read all versions, and cannot be "
|
||||||
|
+ "configured as 0 or 1 (because 0 or 1, we think the user wants to read the data in normal mode instead of reading in mode %s, the difference is big). If it is greater "
|
||||||
|
+ "than"
|
||||||
|
+ " 1, it means to read the latest corresponding number of versions.",
|
||||||
|
mode, mode));
|
||||||
|
}
|
||||||
|
return mode;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 注意:convertUserStartRowkey 和 convertInnerStartRowkey,前者会受到 isBinaryRowkey 的影响,只用于第一次对用户配置的 String 类型的 rowkey 转为二进制时使用。而后者约定:切分时得到的二进制的 rowkey 回填到配置中时采用
|
||||||
|
*/
|
||||||
|
public static byte[] convertInnerStartRowkey(Configuration configuration) {
|
||||||
|
String startRowkey = configuration.getString(Key.START_ROWKEY);
|
||||||
|
if (StringUtils.isBlank(startRowkey)) {
|
||||||
|
return HConstants.EMPTY_BYTE_ARRAY;
|
||||||
|
}
|
||||||
|
|
||||||
|
return Bytes.toBytesBinary(startRowkey);
|
||||||
|
}
|
||||||
|
|
||||||
|
public static byte[] convertInnerEndRowkey(Configuration configuration) {
|
||||||
|
String endRowkey = configuration.getString(Key.END_ROWKEY);
|
||||||
|
if (StringUtils.isBlank(endRowkey)) {
|
||||||
|
return HConstants.EMPTY_BYTE_ARRAY;
|
||||||
|
}
|
||||||
|
|
||||||
|
return Bytes.toBytesBinary(endRowkey);
|
||||||
|
}
|
||||||
|
|
||||||
|
private static void setObHBaseConfig(com.alibaba.datax.common.util.Configuration confFile, org.apache.hadoop.conf.Configuration oHbaseConf) {
|
||||||
|
|
||||||
|
boolean odpMode = confFile.getBool(Key.USE_ODP_MODE);
|
||||||
|
String username = confFile.getString(Key.USERNAME);
|
||||||
|
String password = confFile.getString(Key.PASSWORD);
|
||||||
|
String dbName = confFile.getString(Key.DB_NAME);
|
||||||
|
|
||||||
|
// oHbaseConf.set(RS_LIST_ACQUIRE_CONNECT_TIMEOUT.getKey(), "500");
|
||||||
|
// oHbaseConf.set(RS_LIST_ACQUIRE_READ_TIMEOUT.getKey(), "5000");
|
||||||
|
oHbaseConf.set(HBASE_OCEANBASE_FULL_USER_NAME, username);
|
||||||
|
oHbaseConf.set(HBASE_OCEANBASE_PASSWORD, password);
|
||||||
|
// oHbaseConf.set(HBASE_, META_SCANNER_CACHING);
|
||||||
|
if (odpMode) {
|
||||||
|
oHbaseConf.setBoolean(HBASE_OCEANBASE_ODP_MODE, true);
|
||||||
|
oHbaseConf.set(HBASE_OCEANBASE_DATABASE, dbName);
|
||||||
|
oHbaseConf.set(HBASE_OCEANBASE_ODP_ADDR, confFile.getString(Key.ODP_HOST));
|
||||||
|
oHbaseConf.setInt(HBASE_OCEANBASE_ODP_PORT, confFile.getInt(Key.ODP_PORT));
|
||||||
|
} else {
|
||||||
|
String clusterName = null;
|
||||||
|
final Pattern pattern = Pattern.compile("([\\w]+)@([\\w]+)#([\\w]+)");
|
||||||
|
Matcher matcher = pattern.matcher(username);
|
||||||
|
if (matcher.find()) {
|
||||||
|
clusterName = matcher.group(3);
|
||||||
|
} else {
|
||||||
|
throw new RuntimeException("user name is not in the correct format: user@tenant#cluster");
|
||||||
|
}
|
||||||
|
String configUrl = confFile.getString(Key.CONFIG_URL);
|
||||||
|
if (!configUrl.contains("ObRegion")) {
|
||||||
|
if (configUrl.contains("?")) {
|
||||||
|
configUrl += "&ObRegion=" + clusterName;
|
||||||
|
} else {
|
||||||
|
configUrl += "?ObRegion=" + clusterName;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!configUrl.contains("database")) {
|
||||||
|
configUrl += "&database=" + dbName;
|
||||||
|
}
|
||||||
|
oHbaseConf.set(HBASE_OCEANBASE_PARAM_URL, configUrl);
|
||||||
|
oHbaseConf.set(HBASE_OCEANBASE_SYS_USER_NAME, confFile.getString(Key.OB_SYS_USERNAME));
|
||||||
|
oHbaseConf.set(HBASE_OCEANBASE_SYS_PASSWORD, confFile.getString(Key.OB_SYS_PASSWORD));
|
||||||
|
}
|
||||||
|
|
||||||
|
String hbaseConf = confFile.getString(Key.HBASE_CONFIG);
|
||||||
|
Map<String, String> map = JSON.parseObject(hbaseConf, new TypeReference<Map<String, String>>() {
|
||||||
|
});
|
||||||
|
if (MapUtils.isNotEmpty(map)) {
|
||||||
|
for (Map.Entry<String, String> entry : map.entrySet()) {
|
||||||
|
oHbaseConf.set(entry.getKey(), entry.getValue());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 每次都获取一个新的HTable 注意:HTable 本身是线程不安全的
|
||||||
|
*/
|
||||||
|
public static OHTable initOHtable(com.alibaba.datax.common.util.Configuration configuration) {
|
||||||
|
String tableName = configuration.getString(Key.TABLE);
|
||||||
|
try {
|
||||||
|
org.apache.hadoop.conf.Configuration oHbaseConf = new org.apache.hadoop.conf.Configuration();
|
||||||
|
setObHBaseConfig(configuration, oHbaseConf);
|
||||||
|
return HTableManager.createHTable(oHbaseConf, tableName);
|
||||||
|
} catch (Exception e) {
|
||||||
|
LOG.error("init ohTable error, reason: {}", e.getMessage(), e);
|
||||||
|
throw DataXException.asDataXException(HbaseReaderErrorCode.INIT_TABLE_ERROR, e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public static boolean isRowkeyColumn(String columnName) {
|
||||||
|
return Constant.ROWKEY_FLAG.equalsIgnoreCase(columnName);
|
||||||
|
}
|
||||||
|
|
||||||
|
public static String parseColumnFamily(Collection<HbaseColumnCell> hbaseColumnCells) {
|
||||||
|
for (HbaseColumnCell columnCell : hbaseColumnCells) {
|
||||||
|
if (ObHbaseReaderUtil.isRowkeyColumn(columnCell.getColumnName())) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if (columnCell.getColumnName() == null || columnCell.getColumnName().split(":").length != 2) {
|
||||||
|
LOG.error("column cell format is unknown: {}", columnCell);
|
||||||
|
throw new RuntimeException("Column cell format is unknown: " + columnCell);
|
||||||
|
}
|
||||||
|
return columnCell.getColumnName().split(":")[0];
|
||||||
|
}
|
||||||
|
throw new RuntimeException("parse column family failed.");
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 用于解析列配置
|
||||||
|
*/
|
||||||
|
public static LinkedHashMap<String, HbaseColumnCell> parseColumn(List<Map> column) {
|
||||||
|
return parseColumn(column, null, Constant.DEFAULT_ENCODING, Constant.DEFAULT_TIMEZONE);
|
||||||
|
}
|
||||||
|
|
||||||
|
public static LinkedHashMap<String, HbaseColumnCell> parseColumn(List<Map> column, Map<String, Column> constantMap, String encoding, String timezone) {
|
||||||
|
LinkedHashMap<String, HbaseColumnCell> hbaseColumnCells = new LinkedHashMap<>(column.size());
|
||||||
|
boolean cacheConstantValue = constantMap != null;
|
||||||
|
HbaseColumnCell oneColumnCell;
|
||||||
|
try {
|
||||||
|
for (Map<String, String> aColumn : column) {
|
||||||
|
ColumnType type = ColumnType.getByTypeName(aColumn.get("type"));
|
||||||
|
boolean isRowKey = isRowkeyColumn(aColumn.get("name"));
|
||||||
|
String columnName = isRowKey ? Constant.ROWKEY_FLAG : aColumn.get("name");
|
||||||
|
|
||||||
|
String columnValue = aColumn.get("value");
|
||||||
|
String dateFormat = aColumn.getOrDefault("format", Constant.DEFAULT_DATE_FORMAT);
|
||||||
|
Validate.isTrue(StringUtils.isNotBlank(columnName) || StringUtils.isNotBlank(columnValue),
|
||||||
|
"It is either a combination of type + name + format or a combination of type + value + format. Your configuration is neither of the two. Please check and modify it.");
|
||||||
|
if (type == ColumnType.DATE) {
|
||||||
|
if (StringUtils.isBlank(dateFormat)) {
|
||||||
|
LOG.warn("date format for {} is empty, use default date format 'yyyy-MM-dd HH:mm:ss' instead.", columnName);
|
||||||
|
}
|
||||||
|
oneColumnCell = new HbaseColumnCell.Builder(type).columnName(columnName).columnValue(columnValue).dateformat(dateFormat).build();
|
||||||
|
} else {
|
||||||
|
oneColumnCell = new HbaseColumnCell.Builder(type).columnName(columnName).columnValue(columnValue).build();
|
||||||
|
}
|
||||||
|
hbaseColumnCells.put(columnName, oneColumnCell);
|
||||||
|
if (cacheConstantValue && oneColumnCell.isConstant()) {
|
||||||
|
constantMap.put(columnName, buildColumn(columnValue, type, encoding, dateFormat, timezone));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return hbaseColumnCells;
|
||||||
|
} catch (Exception e) {
|
||||||
|
LOG.error("parse column failed, reason:{}", e.getMessage(), e);
|
||||||
|
throw DataXException.asDataXException(HbaseReaderErrorCode.PARSE_COLUMN_ERROR, e.getMessage());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public static Column buildColumn(String columnValue, ColumnType columnType, String encoding, String dateformat, String timezone) throws Exception {
|
||||||
|
return buildColumn(columnValue.getBytes(encoding), columnType, encoding, dateformat, timezone);
|
||||||
|
}
|
||||||
|
|
||||||
|
public static Column buildColumn(byte[] columnValue, ColumnType columnType, String encoding, String dateformat, String timezone) throws Exception {
|
||||||
|
switch (columnType) {
|
||||||
|
case BOOLEAN:
|
||||||
|
return new BoolColumn(columnValue == null ? null : Bytes.toBoolean(columnValue));
|
||||||
|
case SHORT:
|
||||||
|
return new LongColumn(columnValue == null ? null : String.valueOf(Bytes.toShort(columnValue)));
|
||||||
|
case INT:
|
||||||
|
return new LongColumn(columnValue == null ? null : Bytes.toInt(columnValue));
|
||||||
|
case LONG:
|
||||||
|
return new LongColumn(columnValue == null ? null : Bytes.toLong(columnValue));
|
||||||
|
case BYTES:
|
||||||
|
return new BytesColumn(columnValue == null ? null : columnValue);
|
||||||
|
case FLOAT:
|
||||||
|
return new DoubleColumn(columnValue == null ? null : Bytes.toFloat(columnValue));
|
||||||
|
case DOUBLE:
|
||||||
|
return new DoubleColumn(columnValue == null ? null : Bytes.toDouble(columnValue));
|
||||||
|
case STRING:
|
||||||
|
return new StringColumn(columnValue == null ? null : new String(columnValue, encoding));
|
||||||
|
case BINARY_STRING:
|
||||||
|
return new StringColumn(columnValue == null ? null : Bytes.toStringBinary(columnValue));
|
||||||
|
case DATE:
|
||||||
|
String dateValue = Bytes.toStringBinary(columnValue);
|
||||||
|
String timestamp = null;
|
||||||
|
try {
|
||||||
|
long milliSec = Long.parseLong(dateValue);
|
||||||
|
Date date = new java.util.Date(milliSec);
|
||||||
|
SimpleDateFormat sdf = new java.text.SimpleDateFormat(dateformat);
|
||||||
|
sdf.setTimeZone(java.util.TimeZone.getTimeZone(timezone));
|
||||||
|
timestamp = sdf.format(date);
|
||||||
|
} catch (Exception e) {
|
||||||
|
// this is already formatted timestamp
|
||||||
|
timestamp = dateValue;
|
||||||
|
}
|
||||||
|
return columnValue == null ? null : new DateColumn(DateUtils.parseDate(timestamp, dateformat));
|
||||||
|
default:
|
||||||
|
throw DataXException.asDataXException(HbaseReaderErrorCode.ILLEGAL_VALUE, "obHbasereader 不支持您配置的列类型:" + columnType);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,190 @@
|
|||||||
|
package com.alibaba.datax.plugin.reader.obhbasereader.util;
|
||||||
|
|
||||||
|
import com.alibaba.datax.common.util.Configuration;
|
||||||
|
import com.alibaba.datax.plugin.rdbms.reader.Constant;
|
||||||
|
import com.alibaba.datax.plugin.rdbms.util.DBUtil;
|
||||||
|
import com.alibaba.datax.plugin.rdbms.util.DBUtilErrorCode;
|
||||||
|
import com.alibaba.datax.plugin.rdbms.util.DataBaseType;
|
||||||
|
import com.alibaba.datax.plugin.rdbms.util.SplitedSlice;
|
||||||
|
import com.alibaba.datax.plugin.reader.obhbasereader.Key;
|
||||||
|
import com.alibaba.datax.plugin.reader.oceanbasev10reader.util.ExecutorTemplate;
|
||||||
|
import com.alibaba.datax.plugin.reader.oceanbasev10reader.util.ObReaderUtils;
|
||||||
|
import com.alibaba.datax.plugin.reader.oceanbasev10reader.util.PartInfo;
|
||||||
|
import com.alibaba.datax.plugin.reader.oceanbasev10reader.util.PartitionSplitUtil;
|
||||||
|
import com.google.common.base.Preconditions;
|
||||||
|
import com.google.common.collect.Lists;
|
||||||
|
import java.sql.Connection;
|
||||||
|
import java.sql.ResultSet;
|
||||||
|
import java.sql.SQLException;
|
||||||
|
import java.sql.Statement;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.Collection;
|
||||||
|
import java.util.HashSet;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Set;
|
||||||
|
import java.util.stream.Collectors;
|
||||||
|
import org.apache.commons.collections.CollectionUtils;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
public class SqlReaderSplitUtil {
|
||||||
|
public static final String SAMPLE_SQL_TEMPLATE = "SELECT `hex` FROM (SELECT `hex`,K , bucket, ROW_NUMBER() OVER (PARTITION BY bucket ORDER BY K) rn FROM(SELECT %s `hex`, K ,NTILE(%s) OVER "
|
||||||
|
+ "(ORDER BY K ) bucket FROM (SELECT hex(K) as `hex`, K FROM %s SAMPLE BLOCK(%s)) a) b) c WHERE rn = 1 GROUP BY K ORDER BY K";
|
||||||
|
public static final String MIDDLE_RANGE_TEMPLATE = "((K) > (unhex('%s'))) AND ((K) <= (unhex('%s')))";
|
||||||
|
public static final String MIN_MAX_RANGE_TEMPLATE = "((K)<= (unhex('%s'))) or ((K) > (unhex('%s')))";
|
||||||
|
private static final Logger LOG = LoggerFactory.getLogger(SqlReaderSplitUtil.class);
|
||||||
|
|
||||||
|
public static List<Configuration> splitSingleTable(Configuration configuration, String tableName, String columnFamily, int eachTableShouldSplittedNumber, boolean readByPartition) {
|
||||||
|
List<String> partitionList = Lists.newArrayList();
|
||||||
|
String tableNameWithCf = tableName + "$" + columnFamily;
|
||||||
|
PartInfo partInfo = PartitionSplitUtil.getObMySQLPartInfoBySQL(configuration, tableNameWithCf);
|
||||||
|
if (partInfo.isPartitionTable()) {
|
||||||
|
partitionList.addAll(partInfo.getPartList());
|
||||||
|
}
|
||||||
|
// read all partitions and split job only by partition
|
||||||
|
if (readByPartition) {
|
||||||
|
LOG.info("table: [{}] will read only by partition", tableNameWithCf);
|
||||||
|
return splitSingleTableByPartition(configuration, partitionList);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (eachTableShouldSplittedNumber <= 1) {
|
||||||
|
LOG.info("total enable splitted number of table: [{}] is {}, no need to split", tableNameWithCf, eachTableShouldSplittedNumber);
|
||||||
|
return Lists.newArrayList(configuration);
|
||||||
|
}
|
||||||
|
|
||||||
|
// If user specified some partitions to be read,
|
||||||
|
List<String> userSetPartitions = configuration.getList(Key.PARTITION_NAME, String.class);
|
||||||
|
if (CollectionUtils.isNotEmpty(userSetPartitions)) {
|
||||||
|
Set<String> partSet = new HashSet<>(partitionList);
|
||||||
|
// If partition name does not exist in the table, throw exception directly. Case is sensitive.
|
||||||
|
userSetPartitions.forEach(e -> Preconditions.checkArgument(partSet.contains(e), "partition %s does not exist in table: %s", e, tableNameWithCf));
|
||||||
|
partitionList.clear();
|
||||||
|
partitionList.addAll(userSetPartitions);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (partitionList.isEmpty()) {
|
||||||
|
LOG.info("table: [{}] is not partitioned, just split table by rowKey.", tableNameWithCf);
|
||||||
|
List<Configuration> splitConfs = splitSingleTableByRowKey(configuration, tableNameWithCf, eachTableShouldSplittedNumber);
|
||||||
|
LOG.info("total split count of non-partitioned table :[{}] is {}", tableNameWithCf, splitConfs.size());
|
||||||
|
return splitConfs;
|
||||||
|
} else {
|
||||||
|
ExecutorTemplate<List<Configuration>> template = new ExecutorTemplate<>("split-rows-by-rowkey-" + tableNameWithCf + "-", eachTableShouldSplittedNumber);
|
||||||
|
int splitNumPerPartition = (int) Math.ceil(1.0d * eachTableShouldSplittedNumber / partitionList.size());
|
||||||
|
LOG.info("table: [{}] is partitioned, split table by rowKey in parallel. splitNumPerPartition is {}", tableNameWithCf, splitNumPerPartition);
|
||||||
|
for (String partName : partitionList) {
|
||||||
|
try {
|
||||||
|
template.submit(() -> {
|
||||||
|
Configuration tempConf = configuration.clone();
|
||||||
|
tempConf.set(Key.PARTITION_NAME, partName);
|
||||||
|
return splitSingleTableByRowKey(tempConf, tableNameWithCf, splitNumPerPartition);
|
||||||
|
});
|
||||||
|
} catch (Throwable th) {
|
||||||
|
LOG.error("submit split task of table: [{}-{}] failed, reason: {}", tableNameWithCf, partName, th.getMessage(), th);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
List<Configuration> splitConfs = template.waitForResult().stream().flatMap(Collection::stream).collect(Collectors.toList());
|
||||||
|
LOG.info("total split count of partitioned table :[{}] is {}", tableNameWithCf, splitConfs.size());
|
||||||
|
return splitConfs;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private static List<Configuration> splitSingleTableByPartition(Configuration configuration, List<String> partList) {
|
||||||
|
if (partList == null || partList.isEmpty()) {
|
||||||
|
return Lists.newArrayList(configuration);
|
||||||
|
}
|
||||||
|
List<Configuration> confList = new ArrayList<>();
|
||||||
|
for (String partName : partList) {
|
||||||
|
LOG.info("read sub task: reading from partition " + partName);
|
||||||
|
Configuration conf = configuration.clone();
|
||||||
|
conf.set(Key.PARTITION_NAME, partName);
|
||||||
|
confList.add(conf);
|
||||||
|
}
|
||||||
|
return confList;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @param configuration
|
||||||
|
* @param tableNameWithCf
|
||||||
|
* @param eachTableShouldSplittedNumber
|
||||||
|
* @return
|
||||||
|
*/
|
||||||
|
public static List<Configuration> splitSingleTableByRowKey(Configuration configuration, String tableNameWithCf, int eachTableShouldSplittedNumber) {
|
||||||
|
String jdbcURL = configuration.getString(Key.JDBC_URL);
|
||||||
|
String username = configuration.getString(Key.USERNAME);
|
||||||
|
String password = configuration.getString(Key.PASSWORD);
|
||||||
|
String hint = configuration.getString(Key.READER_HINT, com.alibaba.datax.plugin.reader.obhbasereader.Constant.OB_READ_HINT);
|
||||||
|
String partInfo = "";
|
||||||
|
String partName = configuration.getString(Key.PARTITION_NAME, null);
|
||||||
|
if (partName != null) {
|
||||||
|
partInfo = " partition(" + partName + ")";
|
||||||
|
}
|
||||||
|
tableNameWithCf += partInfo;
|
||||||
|
int fetchSize = configuration.getInt(Constant.FETCH_SIZE, com.alibaba.datax.plugin.reader.obhbasereader.Constant.DEFAULT_FETCH_SIZE);
|
||||||
|
Double percentage = configuration.getDouble(Key.SAMPLE_PERCENTAGE, 0.1);
|
||||||
|
List<SplitedSlice> slices = new ArrayList<>();
|
||||||
|
List<Configuration> pluginParams = new ArrayList<>();
|
||||||
|
// set ob_query_timeout and ob_trx_timeout to a large time in case timeout
|
||||||
|
int queryTimeoutSeconds = 60 * 60 * 48;
|
||||||
|
try (Connection conn = DBUtil.getConnection(DataBaseType.MySql, jdbcURL, username, password)) {
|
||||||
|
String setQueryTimeout = "set ob_query_timeout=" + (queryTimeoutSeconds * 1000 * 1000L);
|
||||||
|
String setTrxTimeout = "set ob_trx_timeout=" + ((queryTimeoutSeconds + 5) * 1000 * 1000L);
|
||||||
|
try (Statement stmt = conn.createStatement()) {
|
||||||
|
stmt.execute(setQueryTimeout);
|
||||||
|
stmt.execute(setTrxTimeout);
|
||||||
|
} catch (Exception e) {
|
||||||
|
LOG.warn("set ob_query_timeout and set ob_trx_timeout failed. reason: {}", e.getMessage(), e);
|
||||||
|
}
|
||||||
|
slices = getSplitSqlBySample(conn, tableNameWithCf, fetchSize, percentage, eachTableShouldSplittedNumber, hint);
|
||||||
|
} catch (Throwable e) {
|
||||||
|
LOG.warn("query rowkey range failed of table: {}. reason: {}. the table will not be splitted.", tableNameWithCf, e.getMessage(), e);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!slices.isEmpty()) {
|
||||||
|
for (SplitedSlice slice : slices) {
|
||||||
|
Configuration tempConfig = configuration.clone();
|
||||||
|
tempConfig.set(Key.RANGE, slice.getRange());
|
||||||
|
pluginParams.add(tempConfig);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
Configuration tempConfig = configuration.clone();
|
||||||
|
pluginParams.add(tempConfig);
|
||||||
|
}
|
||||||
|
return pluginParams;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 按照采样方法切分,不能直接顺序切分否则可能导致原本属于一行的数据被切分为两行
|
||||||
|
*
|
||||||
|
* @param conn
|
||||||
|
* @param tableName
|
||||||
|
* @param fetchSize
|
||||||
|
* @param percentage
|
||||||
|
* @param adviceNum
|
||||||
|
* @param hint
|
||||||
|
* @return List<SplitedSlice>
|
||||||
|
* @throws SQLException
|
||||||
|
*/
|
||||||
|
private static List<SplitedSlice> getSplitSqlBySample(Connection conn, String tableName, int fetchSize, double percentage, int adviceNum, String hint) throws SQLException {
|
||||||
|
String splitSql = String.format(SAMPLE_SQL_TEMPLATE, hint, adviceNum, tableName, percentage);
|
||||||
|
LOG.info("split pk [sql={}] is running... ", splitSql);
|
||||||
|
List<String> boundList = new ArrayList<>();
|
||||||
|
try (ResultSet rs = DBUtil.query(conn, splitSql, fetchSize)) {
|
||||||
|
while (rs.next()) {
|
||||||
|
boundList.add(rs.getString(1));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (boundList.size() == 0) {
|
||||||
|
return new ArrayList<>();
|
||||||
|
}
|
||||||
|
List<SplitedSlice> rangeSql = new ArrayList<>();
|
||||||
|
for (int i = 0; i < boundList.size() - 1; i++) {
|
||||||
|
String range = String.format(MIDDLE_RANGE_TEMPLATE, boundList.get(i), boundList.get(i + 1));
|
||||||
|
SplitedSlice slice = new SplitedSlice(boundList.get(i), boundList.get(i + 1), range);
|
||||||
|
rangeSql.add(slice);
|
||||||
|
}
|
||||||
|
String range = String.format(MIN_MAX_RANGE_TEMPLATE, boundList.get(0), boundList.get(boundList.size() - 1));
|
||||||
|
SplitedSlice slice = new SplitedSlice(null, null, range);
|
||||||
|
rangeSql.add(slice);
|
||||||
|
return rangeSql;
|
||||||
|
}
|
||||||
|
}
|
6
obhbasereader/src/main/resources/plugin.json
Executable file
6
obhbasereader/src/main/resources/plugin.json
Executable file
@ -0,0 +1,6 @@
|
|||||||
|
{
|
||||||
|
"name": "obhbasereader",
|
||||||
|
"class": "com.alibaba.datax.plugin.reader.obhbasereader.ObHbaseReader",
|
||||||
|
"description": "useScene: prod. mechanism: Scan to read data.",
|
||||||
|
"developer": "alibaba"
|
||||||
|
}
|
15
obhbasereader/src/main/resources/plugin_job_template.json
Normal file
15
obhbasereader/src/main/resources/plugin_job_template.json
Normal file
@ -0,0 +1,15 @@
|
|||||||
|
{
|
||||||
|
"name": "obhbasereader",
|
||||||
|
"parameter": {
|
||||||
|
"hbaseConfig": {},
|
||||||
|
"table": "",
|
||||||
|
"encoding": "",
|
||||||
|
"mode": "",
|
||||||
|
"column": [],
|
||||||
|
"range": {
|
||||||
|
"startRowkey": "",
|
||||||
|
"endRowkey": ""
|
||||||
|
},
|
||||||
|
"isBinaryRowkey": true
|
||||||
|
}
|
||||||
|
}
|
209
obhbasewriter/doc/obhbasewriter.md
Normal file
209
obhbasewriter/doc/obhbasewriter.md
Normal file
@ -0,0 +1,209 @@
|
|||||||
|
OceanBase的table api为应用提供了ObHBase的访问接口,因此,OceanBase table api的reader与HBase writer的结构和配置方法类似。
|
||||||
|
1 快速介绍
|
||||||
|
obhbaseWriter 插件实现了从向ObHbase中写取数据。在底层实现上,obhbaseWriter 通过 HBase 的 Java 客户端连接远程 HBase 服务,并通过 put 方式写入obHbase。
|
||||||
|
1.1支持功能
|
||||||
|
1、目前obhbasewriter支持的obHbase版本为OceanBase3.x以及4.x版本。
|
||||||
|
2、目前obhbasewriter支持源端多个字段拼接作为ObHbase 表的 rowkey,具体配置参考:rowkeyColumn配置;
|
||||||
|
3、写入obhbase的时间戳(版本)支持:用当前时间作为版本,指定源端列作为版本,指定一个时间 三种方式作为版本;
|
||||||
|
#### 脚本配置
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"job": {
|
||||||
|
"setting": {
|
||||||
|
"speed": {
|
||||||
|
"channel": 5
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"content": [
|
||||||
|
{
|
||||||
|
"reader": {
|
||||||
|
"name": "txtfilereader",
|
||||||
|
"parameter": {
|
||||||
|
"path": "/normal.txt",
|
||||||
|
"charset": "UTF-8",
|
||||||
|
"column": [
|
||||||
|
{
|
||||||
|
"index": 0,
|
||||||
|
"type": "String"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"index": 1,
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"index": 2,
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"index": 3,
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"index": 4,
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"index": 5,
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"index": 6,
|
||||||
|
"type": "string"
|
||||||
|
}
|
||||||
|
|
||||||
|
],
|
||||||
|
"fieldDelimiter": ","
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"writer": {
|
||||||
|
"name": "obhbasewriter",
|
||||||
|
"parameter": {
|
||||||
|
"username": "username",
|
||||||
|
"password": "password",
|
||||||
|
"writerThreadCount": "20",
|
||||||
|
"writeBufferHighMark": "2147483647",
|
||||||
|
"rpcExecuteTimeout": "30000",
|
||||||
|
"useOdpMode": "false",
|
||||||
|
"obSysUser": "root",
|
||||||
|
"obSysPassword": "",
|
||||||
|
"column": [
|
||||||
|
{
|
||||||
|
"index": 0,
|
||||||
|
"name": "family1:c1",
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"index": 1,
|
||||||
|
"name": "family1:c2",
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"index": 2,
|
||||||
|
"name": "family1:c3",
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"index": 3,
|
||||||
|
"name": "family1:c4",
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"index": 4,
|
||||||
|
"name": "family1:c5",
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"index": 5,
|
||||||
|
"name": "family1:c6",
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"index": 6,
|
||||||
|
"name": "family1:c7",
|
||||||
|
"type": "string"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"mode": "normal",
|
||||||
|
"rowkeyColumn": [
|
||||||
|
{
|
||||||
|
"index": 0,
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"index": 3,
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"index": 2,
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"index": 1,
|
||||||
|
"type": "string"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"table": "htable3",
|
||||||
|
"batchSize": "200",
|
||||||
|
"dbName": "database",
|
||||||
|
"jdbcUrl": "jdbc:mysql://ip:port/database?"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
##### 参数解释
|
||||||
|
|
||||||
|
- **connection**
|
||||||
|
|
||||||
|
公有云和私有云需要配置的信息不同,具体如下:
|
||||||
|
公有云:
|
||||||
|
|
||||||
|
- 数据库用户名;(在外层统一配置)
|
||||||
|
- 用户密码;(在外层统一配置)
|
||||||
|
- proxy的jdbc地址
|
||||||
|
- 数据库名称;
|
||||||
|
|
||||||
|
私有云:
|
||||||
|
|
||||||
|
- 数据库用户名;(在外层统一配置)
|
||||||
|
- 用户密码;(在外层统一配置)
|
||||||
|
- proxy的jdbc地址
|
||||||
|
- obSysUser:sys租户的用户名;
|
||||||
|
- obSysPass:sys租户的密码;
|
||||||
|
- configUrl;
|
||||||
|
- 描述:可以通过show parameters like 'obConfigUrl' 获得。
|
||||||
|
- 必须:是
|
||||||
|
- 默认值:无
|
||||||
|
- **jdbcUrl**
|
||||||
|
- 描述:连接ob使用的jdbc url,支持如下两种格式:
|
||||||
|
- jdbc:mysql://obproxyIp:obproxyPort/db
|
||||||
|
- 此格式下username需要写成三段式格式
|
||||||
|
- ||_dsc_ob10_dsc_||集群名:租户名||_dsc_ob10_dsc_||jdbc:mysql://obproxyIp:obproxyPort/db
|
||||||
|
- 此格式下username仅填写用户名本身,无需三段式写法
|
||||||
|
- 必选:是
|
||||||
|
- 默认值:无
|
||||||
|
- **table**
|
||||||
|
- 描述:所选取的需要同步的表。无需增加列族信息。
|
||||||
|
- 必选:是
|
||||||
|
- 默认值:无
|
||||||
|
- **username**
|
||||||
|
- 描述:访问OceanBase的用户名
|
||||||
|
- 必选:是
|
||||||
|
- 默认值:无
|
||||||
|
- **useOdpMode**
|
||||||
|
- 描述:是否通过proxy连接。无法提供sys租户帐密时需要设置为true
|
||||||
|
- 必须:否
|
||||||
|
- 默认值:false
|
||||||
|
- **column**
|
||||||
|
- 描述:要写入的hbase字段。index:指定该列对应reader端column的索引,从0开始;name:指定hbase表中的列,必须为 列族:列名 的格式;type:指定写入数据类型,用于转换HBase byte[]。配置格式如下:
|
||||||
|
```json
|
||||||
|
"column": [ { "index":1, "name": "cf1:q1", "type": "string" }, { "index":2, "name": "cf1:q2", "type": "string" } ]
|
||||||
|
```
|
||||||
|
|
||||||
|
- 必选:是
|
||||||
|
- 默认值:无
|
||||||
|
- **rowkeyColumn**
|
||||||
|
- 描述:要写入的ObHbase的rowkey列。index:指定该列对应reader端column的索引,从0开始,若为常量index为-1;type:指定写入数据类型,用于转换HBase byte[];value:配置常量,常作为多个字段的拼接符。obhbasewriter会将rowkeyColumn中所有列按照配置顺序进行拼接作为写入hbase的rowkey,不能全为常量。配置格式如下:
|
||||||
|
```json
|
||||||
|
"rowkeyColumn": [ { "index":0, "type":"string" }, { "index":-1, "type":"string", "value":"_" } ]
|
||||||
|
```
|
||||||
|
|
||||||
|
- 必选:是
|
||||||
|
- 默认值:无
|
||||||
|
- **versionColumn**
|
||||||
|
- 描述:指定写入obhbase的时间戳。支持:当前时间、指定时间列,指定时间,三者选一。若不配置表示用当前时间。index:指定对应reader端column的索引,从0开始,需保证能转换为long,若是Date类型,会尝试用yyyy-MM-dd HH:mm:ss和yyyy-MM-dd HH:mm:ss SSS去解析;若为指定时间index为-1;value:指定时间的值,long值。配置格式如下:
|
||||||
|
```json
|
||||||
|
"versionColumn":{ "index":1 }
|
||||||
|
```
|
||||||
|
或者
|
||||||
|
```json
|
||||||
|
"versionColumn":{ "index":-1, "value":123456789 }
|
||||||
|
```
|
||||||
|
|
||||||
|
- 必选:否
|
||||||
|
- 默认值:无
|
||||||
|
|
||||||
|
|
||||||
|
|
185
obhbasewriter/pom.xml
Normal file
185
obhbasewriter/pom.xml
Normal file
@ -0,0 +1,185 @@
|
|||||||
|
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||||
|
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
|
||||||
|
<parent>
|
||||||
|
<artifactId>datax-all</artifactId>
|
||||||
|
<groupId>com.alibaba.datax</groupId>
|
||||||
|
<version>0.0.1-SNAPSHOT</version>
|
||||||
|
</parent>
|
||||||
|
<modelVersion>4.0.0</modelVersion>
|
||||||
|
|
||||||
|
<artifactId>obhbasewriter</artifactId>
|
||||||
|
|
||||||
|
<groupId>com.alibaba.datax</groupId>
|
||||||
|
<version>0.0.1-SNAPSHOT</version>
|
||||||
|
|
||||||
|
<dependencies>
|
||||||
|
<dependency>
|
||||||
|
<groupId>com.alibaba.datax</groupId>
|
||||||
|
<artifactId>datax-common</artifactId>
|
||||||
|
<version>${datax-project-version}</version>
|
||||||
|
<exclusions>
|
||||||
|
<exclusion>
|
||||||
|
<artifactId>slf4j-log4j12</artifactId>
|
||||||
|
<groupId>org.slf4j</groupId>
|
||||||
|
</exclusion>
|
||||||
|
</exclusions>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>com.alibaba.datax</groupId>
|
||||||
|
<artifactId>plugin-rdbms-util</artifactId>
|
||||||
|
<version>${datax-project-version}</version>
|
||||||
|
<exclusions>
|
||||||
|
<exclusion>
|
||||||
|
<artifactId>guava</artifactId>
|
||||||
|
<groupId>com.google.guava</groupId>
|
||||||
|
</exclusion>
|
||||||
|
</exclusions>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.slf4j</groupId>
|
||||||
|
<artifactId>slf4j-api</artifactId>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>ch.qos.logback</groupId>
|
||||||
|
<artifactId>logback-classic</artifactId>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>com.alibaba.datax</groupId>
|
||||||
|
<artifactId>simulator</artifactId>
|
||||||
|
<version>${datax-project-version}</version>
|
||||||
|
<scope>test</scope>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.springframework</groupId>
|
||||||
|
<artifactId>spring-test</artifactId>
|
||||||
|
<version>4.0.4.RELEASE</version>
|
||||||
|
<scope>test</scope>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>com.taobao.tddl</groupId>
|
||||||
|
<artifactId>tddl-client</artifactId>
|
||||||
|
<exclusions>
|
||||||
|
<exclusion>
|
||||||
|
<groupId>com.google.guava</groupId>
|
||||||
|
<artifactId>guava</artifactId>
|
||||||
|
</exclusion>
|
||||||
|
<exclusion>
|
||||||
|
<groupId>com.taobao.diamond</groupId>
|
||||||
|
<artifactId>diamond-client</artifactId>
|
||||||
|
</exclusion>
|
||||||
|
</exclusions>
|
||||||
|
</dependency>
|
||||||
|
|
||||||
|
<dependency>
|
||||||
|
<groupId>com.google.guava</groupId>
|
||||||
|
<artifactId>guava</artifactId>
|
||||||
|
<version>33.1.0-jre</version>
|
||||||
|
</dependency>
|
||||||
|
|
||||||
|
<dependency>
|
||||||
|
<groupId>com.alipay.oceanbase</groupId>
|
||||||
|
<artifactId>oceanbase-connector-java</artifactId>
|
||||||
|
<version>3.2.0</version>
|
||||||
|
</dependency>
|
||||||
|
|
||||||
|
<dependency>
|
||||||
|
<groupId>log4j</groupId>
|
||||||
|
<artifactId>log4j</artifactId>
|
||||||
|
<version>1.2.16</version>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>com.alibaba.toolkit.common</groupId>
|
||||||
|
<artifactId>toolkit-common-logging</artifactId>
|
||||||
|
<version>1.10</version>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.json</groupId>
|
||||||
|
<artifactId>json</artifactId>
|
||||||
|
<version>20160810</version>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>junit</groupId>
|
||||||
|
<artifactId>junit</artifactId>
|
||||||
|
<version>4.11</version>
|
||||||
|
<scope>test</scope>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.powermock</groupId>
|
||||||
|
<artifactId>powermock-module-junit4</artifactId>
|
||||||
|
<version>1.4.10</version>
|
||||||
|
<scope>test</scope>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.powermock</groupId>
|
||||||
|
<artifactId>powermock-api-mockito</artifactId>
|
||||||
|
<version>1.4.10</version>
|
||||||
|
<scope>test</scope>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.mockito</groupId>
|
||||||
|
<artifactId>mockito-core</artifactId>
|
||||||
|
<version>1.8.5</version>
|
||||||
|
<scope>test</scope>
|
||||||
|
</dependency>
|
||||||
|
|
||||||
|
<dependency>
|
||||||
|
<groupId>com.oceanbase</groupId>
|
||||||
|
<artifactId>obkv-hbase-client</artifactId>
|
||||||
|
<version>0.1.4.2</version>
|
||||||
|
<exclusions>
|
||||||
|
<exclusion>
|
||||||
|
<artifactId>guava</artifactId>
|
||||||
|
<groupId>com.google.guava</groupId>
|
||||||
|
</exclusion>
|
||||||
|
</exclusions>
|
||||||
|
</dependency>
|
||||||
|
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.apache.hadoop</groupId>
|
||||||
|
<artifactId>hadoop-core</artifactId>
|
||||||
|
<version>1.0.3</version>
|
||||||
|
</dependency>
|
||||||
|
|
||||||
|
</dependencies>
|
||||||
|
|
||||||
|
<build>
|
||||||
|
<resources>
|
||||||
|
<resource>
|
||||||
|
<directory>src/main/java</directory>
|
||||||
|
<includes>
|
||||||
|
<include>**/*.properties</include>
|
||||||
|
</includes>
|
||||||
|
</resource>
|
||||||
|
</resources>
|
||||||
|
<plugins>
|
||||||
|
<!-- compiler plugin -->
|
||||||
|
<plugin>
|
||||||
|
<artifactId>maven-compiler-plugin</artifactId>
|
||||||
|
<configuration>
|
||||||
|
<source>${jdk-version}</source>
|
||||||
|
<target>${jdk-version}</target>
|
||||||
|
<encoding>${project-sourceEncoding}</encoding>
|
||||||
|
</configuration>
|
||||||
|
</plugin>
|
||||||
|
<!-- assembly plugin -->
|
||||||
|
<plugin>
|
||||||
|
<artifactId>maven-assembly-plugin</artifactId>
|
||||||
|
<configuration>
|
||||||
|
<descriptors>
|
||||||
|
<descriptor>src/main/assembly/package.xml</descriptor>
|
||||||
|
</descriptors>
|
||||||
|
<finalName>datax</finalName>
|
||||||
|
</configuration>
|
||||||
|
<executions>
|
||||||
|
<execution>
|
||||||
|
<id>dwzip</id>
|
||||||
|
<phase>package</phase>
|
||||||
|
<goals>
|
||||||
|
<goal>single</goal>
|
||||||
|
</goals>
|
||||||
|
</execution>
|
||||||
|
</executions>
|
||||||
|
</plugin>
|
||||||
|
</plugins>
|
||||||
|
</build>
|
||||||
|
</project>
|
35
obhbasewriter/src/main/assembly/package.xml
Normal file
35
obhbasewriter/src/main/assembly/package.xml
Normal file
@ -0,0 +1,35 @@
|
|||||||
|
<assembly
|
||||||
|
xmlns="http://maven.apache.org/plugins/maven-assembly-plugin/assembly/1.1.0"
|
||||||
|
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||||
|
xsi:schemaLocation="http://maven.apache.org/plugins/maven-assembly-plugin/assembly/1.1.0 http://maven.apache.org/xsd/assembly-1.1.0.xsd">
|
||||||
|
<id></id>
|
||||||
|
<formats>
|
||||||
|
<format>dir</format>
|
||||||
|
</formats>
|
||||||
|
<includeBaseDirectory>false</includeBaseDirectory>
|
||||||
|
<fileSets>
|
||||||
|
<fileSet>
|
||||||
|
<directory>src/main/resources</directory>
|
||||||
|
<includes>
|
||||||
|
<include>plugin.json</include>
|
||||||
|
<include>plugin_job_template.json</include>
|
||||||
|
</includes>
|
||||||
|
<outputDirectory>plugin/writer/obhbasewriter</outputDirectory>
|
||||||
|
</fileSet>
|
||||||
|
<fileSet>
|
||||||
|
<directory>target/</directory>
|
||||||
|
<includes>
|
||||||
|
<include>obhbasewriter-0.0.1-SNAPSHOT.jar</include>
|
||||||
|
</includes>
|
||||||
|
<outputDirectory>plugin/writer/obhbasewriter</outputDirectory>
|
||||||
|
</fileSet>
|
||||||
|
</fileSets>
|
||||||
|
|
||||||
|
<dependencySets>
|
||||||
|
<dependencySet>
|
||||||
|
<useProjectArtifact>false</useProjectArtifact>
|
||||||
|
<outputDirectory>plugin/writer/obhbasewriter/libs</outputDirectory>
|
||||||
|
<scope>runtime</scope>
|
||||||
|
</dependencySet>
|
||||||
|
</dependencySets>
|
||||||
|
</assembly>
|
@ -0,0 +1,50 @@
|
|||||||
|
package com.alibaba.datax.plugin.writer.obhbasewriter;
|
||||||
|
|
||||||
|
import com.alibaba.datax.common.exception.DataXException;
|
||||||
|
import com.alibaba.datax.common.util.MessageSource;
|
||||||
|
import java.util.Arrays;
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
import org.apache.commons.lang.StringUtils;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 只对 normal 模式读取时有用,多版本读取时,不存在列类型的
|
||||||
|
*/
|
||||||
|
public enum ColumnType {
|
||||||
|
STRING("string"),
|
||||||
|
BINARY_STRING("binarystring"),
|
||||||
|
BYTES("bytes"),
|
||||||
|
BOOLEAN("boolean"),
|
||||||
|
SHORT("short"),
|
||||||
|
INT("int"),
|
||||||
|
LONG("long"),
|
||||||
|
FLOAT("float"),
|
||||||
|
DOUBLE("double"),
|
||||||
|
DATE("date"),
|
||||||
|
BINARY("binary");
|
||||||
|
|
||||||
|
private String typeName;
|
||||||
|
|
||||||
|
ColumnType(String typeName) {
|
||||||
|
this.typeName = typeName;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static ColumnType getByTypeName(String typeName) {
|
||||||
|
if (StringUtils.isBlank(typeName)) {
|
||||||
|
throw DataXException.asDataXException(Hbase094xWriterErrorCode.ILLEGAL_VALUE, MessageSource.loadResourceBundle(ColumnType.class).message("columntype.1", typeName, Arrays.asList(values())));
|
||||||
|
}
|
||||||
|
for (ColumnType columnType : values()) {
|
||||||
|
if (StringUtils.equalsIgnoreCase(columnType.typeName, typeName.trim())) {
|
||||||
|
return columnType;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
throw DataXException.asDataXException(Hbase094xWriterErrorCode.ILLEGAL_VALUE, MessageSource.loadResourceBundle(ColumnType.class).message("columntype.1", typeName, Arrays.asList(values())));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String toString() {
|
||||||
|
return this.typeName;
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,42 @@
|
|||||||
|
package com.alibaba.datax.plugin.writer.obhbasewriter;
|
||||||
|
|
||||||
|
public interface Config {
|
||||||
|
|
||||||
|
String MEMSTORE_THRESHOLD = "memstoreThreshold";
|
||||||
|
|
||||||
|
double DEFAULT_MEMSTORE_THRESHOLD = 0.9d;
|
||||||
|
|
||||||
|
String MEMSTORE_CHECK_INTERVAL_SECOND = "memstoreCheckIntervalSecond";
|
||||||
|
|
||||||
|
long DEFAULT_MEMSTORE_CHECK_INTERVAL_SECOND = 30;
|
||||||
|
|
||||||
|
String FAIL_TRY_COUNT = "failTryCount";
|
||||||
|
|
||||||
|
int DEFAULT_FAIL_TRY_COUNT = 10000;
|
||||||
|
|
||||||
|
String WRITER_THREAD_COUNT = "writerThreadCount";
|
||||||
|
|
||||||
|
int DEFAULT_WRITER_THREAD_COUNT = 5;
|
||||||
|
|
||||||
|
String CONCURRENT_WRITE = "concurrentWrite";
|
||||||
|
|
||||||
|
boolean DEFAULT_CONCURRENT_WRITE = true;
|
||||||
|
|
||||||
|
String RS_URL = "rsUrl";
|
||||||
|
|
||||||
|
String OB_VERSION = "obVersion";
|
||||||
|
|
||||||
|
String TIMEOUT = "timeout";
|
||||||
|
|
||||||
|
String PRINT_COST = "printCost";
|
||||||
|
|
||||||
|
boolean DEFAULT_PRINT_COST = false;
|
||||||
|
|
||||||
|
String COST_BOUND = "costBound";
|
||||||
|
|
||||||
|
long DEFAULT_COST_BOUND = 20;
|
||||||
|
|
||||||
|
String MAX_ACTIVE_CONNECTION = "maxActiveConnection";
|
||||||
|
|
||||||
|
int DEFAULT_MAX_ACTIVE_CONNECTION = 2000;
|
||||||
|
}
|
@ -0,0 +1,78 @@
|
|||||||
|
package com.alibaba.datax.plugin.writer.obhbasewriter;
|
||||||
|
|
||||||
|
public final class ConfigKey {
|
||||||
|
|
||||||
|
public final static String HBASE_CONFIG = "hbaseConfig";
|
||||||
|
|
||||||
|
public final static String TABLE = "table";
|
||||||
|
|
||||||
|
public final static String DBNAME = "dbName";
|
||||||
|
|
||||||
|
public final static String OBCONFIG_URL = "obConfigUrl";
|
||||||
|
|
||||||
|
public final static String JDBC_URL = "jdbcUrl";
|
||||||
|
/**
|
||||||
|
* mode 可以取 normal 或者 multiVersionFixedColumn 或者 multiVersionDynamicColumn 三个值,无默认值。
|
||||||
|
* <p/>
|
||||||
|
* normal 配合 column(Map 结构的)使用
|
||||||
|
* <p/>
|
||||||
|
* multiVersion
|
||||||
|
*/
|
||||||
|
public final static String MODE = "mode";
|
||||||
|
|
||||||
|
public final static String ROWKEY_COLUMN = "rowkeyColumn";
|
||||||
|
|
||||||
|
public final static String VERSION_COLUMN = "versionColumn";
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 默认为 utf8
|
||||||
|
*/
|
||||||
|
public final static String ENCODING = "encoding";
|
||||||
|
|
||||||
|
public final static String COLUMN = "column";
|
||||||
|
|
||||||
|
public static final String INDEX = "index";
|
||||||
|
|
||||||
|
public static final String NAME = "name";
|
||||||
|
|
||||||
|
public static final String TYPE = "type";
|
||||||
|
|
||||||
|
public static final String VALUE = "value";
|
||||||
|
|
||||||
|
public static final String FORMAT = "format";
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 默认为 EMPTY_BYTES
|
||||||
|
*/
|
||||||
|
public static final String NULL_MODE = "nullMode";
|
||||||
|
|
||||||
|
public static final String TRUNCATE = "truncate";
|
||||||
|
|
||||||
|
public static final String AUTO_FLUSH = "autoFlush";
|
||||||
|
|
||||||
|
public static final String WAL_FLAG = "walFlag";
|
||||||
|
|
||||||
|
public static final String WRITE_BUFFER_SIZE = "writeBufferSize";
|
||||||
|
|
||||||
|
public static final String MAX_RETRY_COUNT = "maxRetryCount";
|
||||||
|
|
||||||
|
public static final String USE_ODP_MODE = "useOdpMode";
|
||||||
|
|
||||||
|
public static final String OB_SYS_USER = "obSysUser";
|
||||||
|
|
||||||
|
public static final String OB_SYS_PASSWORD = "obSysPassword";
|
||||||
|
|
||||||
|
public static final String ODP_HOST = "odpHost";
|
||||||
|
|
||||||
|
public static final String ODP_PORT = "odpPort";
|
||||||
|
|
||||||
|
public static final String OBHBASE_HTABLE_CLIENT_WRITE_BUFFER = "obhbaseClientWriteBuffer";
|
||||||
|
|
||||||
|
public static final String OBHBASE_HTABLE_PUT_WRITE_BUFFER_CHECK = "obhbaseHtablePutWriteBufferCheck";
|
||||||
|
|
||||||
|
public static final String WRITE_BUFFER_LOW_MARK = "writeBufferLowMark";
|
||||||
|
|
||||||
|
public static final String WRITE_BUFFER_HIGH_MARK = "writeBufferHighMark";
|
||||||
|
|
||||||
|
public static final String TABLE_CLIENT_RPC_EXECUTE_TIMEOUT = "rpcExecuteTimeout";
|
||||||
|
}
|
@ -0,0 +1,110 @@
|
|||||||
|
package com.alibaba.datax.plugin.writer.obhbasewriter;
|
||||||
|
|
||||||
|
import com.alibaba.datax.common.exception.DataXException;
|
||||||
|
import com.alibaba.datax.common.util.Configuration;
|
||||||
|
import com.alibaba.datax.common.util.MessageSource;
|
||||||
|
import com.alibaba.datax.plugin.rdbms.writer.Key;
|
||||||
|
|
||||||
|
import java.nio.charset.Charset;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Created by johnxu.xj on Sept 30 2018
|
||||||
|
*/
|
||||||
|
public class ConfigValidator {
|
||||||
|
private static final MessageSource MESSAGE_SOURCE = MessageSource.loadResourceBundle(ConfigValidator.class);
|
||||||
|
|
||||||
|
public static void validateParameter(com.alibaba.datax.common.util.Configuration originalConfig) {
|
||||||
|
originalConfig.getNecessaryValue(Key.USERNAME, Hbase094xWriterErrorCode.REQUIRED_VALUE);
|
||||||
|
originalConfig.getNecessaryValue(Key.PASSWORD, Hbase094xWriterErrorCode.REQUIRED_VALUE);
|
||||||
|
// originalConfig.getNecessaryValue(ConfigKey.OBCONFIG_URL, Hbase094xWriterErrorCode.REQUIRED_VALUE);
|
||||||
|
originalConfig.getNecessaryValue(ConfigKey.TABLE, Hbase094xWriterErrorCode.REQUIRED_VALUE);
|
||||||
|
originalConfig.getNecessaryValue(ConfigKey.DBNAME, Hbase094xWriterErrorCode.REQUIRED_VALUE);
|
||||||
|
|
||||||
|
ConfigValidator.validateMode(originalConfig);
|
||||||
|
|
||||||
|
String encoding = originalConfig.getString(ConfigKey.ENCODING, Constant.DEFAULT_ENCODING);
|
||||||
|
if (!Charset.isSupported(encoding)) {
|
||||||
|
throw DataXException.asDataXException(Hbase094xWriterErrorCode.ILLEGAL_VALUE, MESSAGE_SOURCE.message("hbase094xhelper.9", encoding));
|
||||||
|
}
|
||||||
|
originalConfig.set(ConfigKey.ENCODING, encoding);
|
||||||
|
}
|
||||||
|
|
||||||
|
public static void validateMode(com.alibaba.datax.common.util.Configuration originalConfig) {
|
||||||
|
String mode = originalConfig.getNecessaryValue(ConfigKey.MODE, Hbase094xWriterErrorCode.REQUIRED_VALUE);
|
||||||
|
ModeType modeType = ModeType.getByTypeName(mode);
|
||||||
|
if (ModeType.Normal.equals(modeType)) {
|
||||||
|
validateRowkeyColumn(originalConfig);
|
||||||
|
validateColumn(originalConfig);
|
||||||
|
validateVersionColumn(originalConfig);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (originalConfig.getBool(ConfigKey.USE_ODP_MODE)) {
|
||||||
|
originalConfig.getNecessaryValue(ConfigKey.ODP_HOST, Hbase094xWriterErrorCode.REQUIRED_VALUE);
|
||||||
|
originalConfig.getNecessaryValue(ConfigKey.ODP_PORT, Hbase094xWriterErrorCode.REQUIRED_VALUE);
|
||||||
|
} else {
|
||||||
|
originalConfig.getNecessaryValue(ConfigKey.OBCONFIG_URL, Hbase094xWriterErrorCode.REQUIRED_VALUE);
|
||||||
|
originalConfig.getNecessaryValue(ConfigKey.OB_SYS_USER, Hbase094xWriterErrorCode.REQUIRED_VALUE);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public static void validateColumn(com.alibaba.datax.common.util.Configuration originalConfig) {
|
||||||
|
List<Configuration> columns = originalConfig.getListConfiguration(ConfigKey.COLUMN);
|
||||||
|
if (columns == null || columns.isEmpty()) {
|
||||||
|
throw DataXException.asDataXException(Hbase094xWriterErrorCode.REQUIRED_VALUE, MESSAGE_SOURCE.message("hbase094xhelper.11"));
|
||||||
|
}
|
||||||
|
for (Configuration aColumn : columns) {
|
||||||
|
Integer index = aColumn.getInt(ConfigKey.INDEX);
|
||||||
|
String type = aColumn.getNecessaryValue(ConfigKey.TYPE, Hbase094xWriterErrorCode.REQUIRED_VALUE);
|
||||||
|
String name = aColumn.getNecessaryValue(ConfigKey.NAME, Hbase094xWriterErrorCode.REQUIRED_VALUE);
|
||||||
|
ColumnType.getByTypeName(type);
|
||||||
|
if (name.split(":").length != 2) {
|
||||||
|
throw DataXException.asDataXException(Hbase094xWriterErrorCode.ILLEGAL_VALUE, MESSAGE_SOURCE.message("hbase094xhelper.12", name));
|
||||||
|
}
|
||||||
|
if (index == null || index < 0) {
|
||||||
|
throw DataXException.asDataXException(Hbase094xWriterErrorCode.ILLEGAL_VALUE, MESSAGE_SOURCE.message("hbase094xhelper.13"));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public static void validateRowkeyColumn(com.alibaba.datax.common.util.Configuration originalConfig) {
|
||||||
|
List<Configuration> rowkeyColumn = originalConfig.getListConfiguration(ConfigKey.ROWKEY_COLUMN);
|
||||||
|
if (rowkeyColumn == null || rowkeyColumn.isEmpty()) {
|
||||||
|
throw DataXException.asDataXException(Hbase094xWriterErrorCode.REQUIRED_VALUE, MESSAGE_SOURCE.message("hbase094xhelper.14"));
|
||||||
|
}
|
||||||
|
int rowkeyColumnSize = rowkeyColumn.size();
|
||||||
|
//包含{"index":0,"type":"string"} 或者 {"index":-1,"type":"string","value":"_"}
|
||||||
|
for (Configuration aRowkeyColumn : rowkeyColumn) {
|
||||||
|
Integer index = aRowkeyColumn.getInt(ConfigKey.INDEX);
|
||||||
|
String type = aRowkeyColumn.getNecessaryValue(ConfigKey.TYPE, Hbase094xWriterErrorCode.REQUIRED_VALUE);
|
||||||
|
ColumnType.getByTypeName(type);
|
||||||
|
if (index == null) {
|
||||||
|
throw DataXException.asDataXException(Hbase094xWriterErrorCode.REQUIRED_VALUE, MESSAGE_SOURCE.message("hbase094xhelper.15"));
|
||||||
|
}
|
||||||
|
//不能只有-1列,即rowkey连接串
|
||||||
|
if (rowkeyColumnSize == 1 && index == -1) {
|
||||||
|
throw DataXException.asDataXException(Hbase094xWriterErrorCode.ILLEGAL_VALUE, MESSAGE_SOURCE.message("hbase094xhelper.16"));
|
||||||
|
}
|
||||||
|
if (index == -1) {
|
||||||
|
aRowkeyColumn.getNecessaryValue(ConfigKey.VALUE, Hbase094xWriterErrorCode.REQUIRED_VALUE);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public static void validateVersionColumn(com.alibaba.datax.common.util.Configuration originalConfig) {
|
||||||
|
Configuration versionColumn = originalConfig.getConfiguration(ConfigKey.VERSION_COLUMN);
|
||||||
|
//为null,表示用当前时间;指定列,需要index
|
||||||
|
if (versionColumn != null) {
|
||||||
|
Integer index = versionColumn.getInt(ConfigKey.INDEX);
|
||||||
|
if (index == null) {
|
||||||
|
throw DataXException.asDataXException(Hbase094xWriterErrorCode.REQUIRED_VALUE, MESSAGE_SOURCE.message("hbase094xhelper.17"));
|
||||||
|
}
|
||||||
|
if (index == -1) {
|
||||||
|
//指定时间,需要index=-1,value
|
||||||
|
versionColumn.getNecessaryValue(ConfigKey.VALUE, Hbase094xWriterErrorCode.REQUIRED_VALUE);
|
||||||
|
} else if (index < 0) {
|
||||||
|
throw DataXException.asDataXException(Hbase094xWriterErrorCode.ILLEGAL_VALUE, MESSAGE_SOURCE.message("hbase094xhelper.18"));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,27 @@
|
|||||||
|
package com.alibaba.datax.plugin.writer.obhbasewriter;
|
||||||
|
|
||||||
|
import ch.qos.logback.classic.Level;
|
||||||
|
|
||||||
|
public final class Constant {
|
||||||
|
public static final String DEFAULT_ENCODING = "UTF-8";
|
||||||
|
public static final String DEFAULT_DATA_FORMAT = "yyyy-MM-dd HH:mm:ss";
|
||||||
|
public static final String DEFAULT_NULL_MODE = "skip";
|
||||||
|
public static final long DEFAULT_WRITE_BUFFER_SIZE = 8 * 1024 * 1024;
|
||||||
|
public static final long DEFAULT_MEMSTORE_CHECK_INTERVAL_SECOND = 30;
|
||||||
|
public static final double DEFAULT_MEMSTORE_THRESHOLD = 0.9d;
|
||||||
|
public static final int DEFAULT_FAIL_TRY_COUNT = 10000;
|
||||||
|
public static final String OB_TABLE_CLIENT_PROPERTY = "logging.path.com.alipay.oceanbase-table-client";
|
||||||
|
public static final String OB_TABLE_HBASE_PROPERTY = "logging.path.com.alipay.oceanbase-table-hbase";
|
||||||
|
public static final String OB_TABLE_CLIENT_LOG_LEVEL = "logging.level.oceanbase-table-client";
|
||||||
|
public static final String OB_TABLE_HBASE_LOG_LEVEL = "logging.level.oceanbase-table-hbase";
|
||||||
|
public static final String OB_COM_ALIPAY_TABLE_CLIENT_LOG_LEVEL = "logging.level.com.alipay.oceanbase-table-client";
|
||||||
|
public static final String OB_COM_ALIPAY_TABLE_HBASE_LOG_LEVEL = "logging.level.com.alipay.oceanbase-table-hbase";
|
||||||
|
public static final String OB_HBASE_LOG_PATH = System.getProperty("datax.home") + "/log/";
|
||||||
|
public static final String DEFAULT_OB_TABLE_CLIENT_LOG_LEVEL = Level.OFF.toString();
|
||||||
|
public static final String DEFAULT_OB_TABLE_HBASE_LOG_LEVEL = Level.OFF.toString();
|
||||||
|
public static final String DEFAULT_NETTY_BUFFER_LOW_WATERMARK = Integer.toString(512 * 1024);
|
||||||
|
public static final String DEFAULT_NETTY_BUFFER_HIGH_WATERMARK = Integer.toString(1024 * 1024);
|
||||||
|
public static final String DEFAULT_HBASE_HTABLE_CLIENT_WRITE_BUFFER = "2097152";
|
||||||
|
public static final String DEFAULT_HBASE_HTABLE_PUT_WRITE_BUFFER_CHECK = "10";
|
||||||
|
public static final String DEFAULT_RPC_EXECUTE_TIMEOUT = "3000";
|
||||||
|
}
|
@ -0,0 +1,44 @@
|
|||||||
|
package com.alibaba.datax.plugin.writer.obhbasewriter;
|
||||||
|
|
||||||
|
import com.alibaba.datax.common.spi.ErrorCode;
|
||||||
|
import com.alibaba.datax.common.util.MessageSource;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Created by shf on 16/3/8.
|
||||||
|
*/
|
||||||
|
public enum Hbase094xWriterErrorCode implements ErrorCode {
|
||||||
|
REQUIRED_VALUE("Hbasewriter-00", MessageSource.loadResourceBundle(Hbase094xWriterErrorCode.class).message("errorcode.required_value")),
|
||||||
|
ILLEGAL_VALUE("Hbasewriter-01", MessageSource.loadResourceBundle(Hbase094xWriterErrorCode.class).message("errorcode.illegal_value")),
|
||||||
|
GET_HBASE_CONFIG_ERROR("Hbasewriter-02", MessageSource.loadResourceBundle(Hbase094xWriterErrorCode.class).message("errorcode.get_hbase_config_error")),
|
||||||
|
GET_HBASE_TABLE_ERROR("Hbasewriter-03", MessageSource.loadResourceBundle(Hbase094xWriterErrorCode.class).message("errorcode.get_hbase_table_error")),
|
||||||
|
CLOSE_HBASE_AMIN_ERROR("Hbasewriter-05", MessageSource.loadResourceBundle(Hbase094xWriterErrorCode.class).message("errorcode.close_hbase_amin_error")),
|
||||||
|
CLOSE_HBASE_TABLE_ERROR("Hbasewriter-06", MessageSource.loadResourceBundle(Hbase094xWriterErrorCode.class).message("errorcode.close_hbase_table_error")),
|
||||||
|
PUT_HBASE_ERROR("Hbasewriter-07", MessageSource.loadResourceBundle(Hbase094xWriterErrorCode.class).message("errorcode.put_hbase_error")),
|
||||||
|
DELETE_HBASE_ERROR("Hbasewriter-08", MessageSource.loadResourceBundle(Hbase094xWriterErrorCode.class).message("errorcode.delete_hbase_error")),
|
||||||
|
TRUNCATE_HBASE_ERROR("Hbasewriter-09", MessageSource.loadResourceBundle(Hbase094xWriterErrorCode.class).message("errorcode.truncate_hbase_error")),
|
||||||
|
CONSTRUCT_ROWKEY_ERROR("Hbasewriter-10", MessageSource.loadResourceBundle(Hbase094xWriterErrorCode.class).message("errorcode.construct_rowkey_error")),
|
||||||
|
CONSTRUCT_VERSION_ERROR("Hbasewriter-11", MessageSource.loadResourceBundle(Hbase094xWriterErrorCode.class).message("errorcode.construct_version_error")),
|
||||||
|
INIT_ERROR("Hbasewriter-12", MessageSource.loadResourceBundle(Hbase094xWriterErrorCode.class).message("errorcode.init_error"));
|
||||||
|
private final String code;
|
||||||
|
private final String description;
|
||||||
|
|
||||||
|
private Hbase094xWriterErrorCode(String code, String description) {
|
||||||
|
this.code = code;
|
||||||
|
this.description = description;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String getCode() {
|
||||||
|
return this.code;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String getDescription() {
|
||||||
|
return this.description;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String toString() {
|
||||||
|
return String.format("Code:[%s], Description:[%s].", this.code, this.description);
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,30 @@
|
|||||||
|
package com.alibaba.datax.plugin.writer.obhbasewriter;
|
||||||
|
|
||||||
|
import java.util.Arrays;
|
||||||
|
|
||||||
|
import com.alibaba.datax.common.exception.DataXException;
|
||||||
|
import com.alibaba.datax.common.util.MessageSource;
|
||||||
|
|
||||||
|
public enum ModeType {
|
||||||
|
Normal("normal"),
|
||||||
|
MultiVersion("multiVersion");
|
||||||
|
|
||||||
|
private String mode;
|
||||||
|
|
||||||
|
ModeType(String mode) {
|
||||||
|
this.mode = mode.toLowerCase();
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getMode() {
|
||||||
|
return mode;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static ModeType getByTypeName(String modeName) {
|
||||||
|
for (ModeType modeType : values()) {
|
||||||
|
if (modeType.mode.equalsIgnoreCase(modeName)) {
|
||||||
|
return modeType;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
throw DataXException.asDataXException(Hbase094xWriterErrorCode.ILLEGAL_VALUE, MessageSource.loadResourceBundle(ModeType.class).message("modetype.1", modeName, Arrays.asList(values())));
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,30 @@
|
|||||||
|
package com.alibaba.datax.plugin.writer.obhbasewriter;
|
||||||
|
|
||||||
|
import java.util.Arrays;
|
||||||
|
|
||||||
|
import com.alibaba.datax.common.exception.DataXException;
|
||||||
|
import com.alibaba.datax.common.util.MessageSource;
|
||||||
|
|
||||||
|
public enum NullModeType {
|
||||||
|
Skip("skip"),
|
||||||
|
Empty("empty");
|
||||||
|
|
||||||
|
private String mode;
|
||||||
|
|
||||||
|
NullModeType(String mode) {
|
||||||
|
this.mode = mode.toLowerCase();
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getMode() {
|
||||||
|
return mode;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static NullModeType getByTypeName(String modeName) {
|
||||||
|
for (NullModeType modeType : values()) {
|
||||||
|
if (modeType.mode.equalsIgnoreCase(modeName)) {
|
||||||
|
return modeType;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
throw DataXException.asDataXException(Hbase094xWriterErrorCode.ILLEGAL_VALUE, MessageSource.loadResourceBundle(NullModeType.class).message("nullmodetype.1", modeName, Arrays.asList(values())));
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,108 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (c) 2021 OceanBase ob-loader-dumper is licensed under Mulan PSL v2. You can use this software according to
|
||||||
|
* the terms and conditions of the Mulan PSL v2. You may obtain a copy of Mulan PSL v2 at:
|
||||||
|
*
|
||||||
|
* http://license.coscl.org.cn/MulanPSL2
|
||||||
|
*
|
||||||
|
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING
|
||||||
|
* BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. See the Mulan PSL v2 for more
|
||||||
|
* details.
|
||||||
|
*/
|
||||||
|
package com.alibaba.datax.plugin.writer.obhbasewriter;
|
||||||
|
|
||||||
|
import com.alibaba.datax.common.util.Configuration;
|
||||||
|
import com.alibaba.datax.plugin.rdbms.reader.Key;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.LinkedHashMap;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Map;
|
||||||
|
import org.apache.commons.lang3.tuple.Triple;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @author cjyyz
|
||||||
|
* @date 2023/03/24
|
||||||
|
* @since
|
||||||
|
*/
|
||||||
|
public class ObHTableInfo {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 不带列族的表名,用于构建OHTable
|
||||||
|
*/
|
||||||
|
String tableName;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 带列族的表名,用于分区计算
|
||||||
|
*/
|
||||||
|
String fullHbaseTableName;
|
||||||
|
|
||||||
|
NullModeType nullModeType;
|
||||||
|
|
||||||
|
String encoding;
|
||||||
|
|
||||||
|
List<Configuration> columns;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 记录配置文件中的columns的列族名,字段名,字段类型,避免每次执行插入都解析
|
||||||
|
* Triple<String, String, String> left : 列族名;middle : 字段名;right:字段类型
|
||||||
|
*/
|
||||||
|
LinkedHashMap<Integer, Triple<String, String, ColumnType>> indexColumnInfoMap;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 记录配置文件中rowKey的Index,常量值,字段类型,避免每次执行插入都解析
|
||||||
|
* Triple<Integer, String, ColumnType> left : Index;middle : 常量值;right:字段类型
|
||||||
|
*/
|
||||||
|
List<Triple<Integer, String, ColumnType>> rowKeyElementList;
|
||||||
|
|
||||||
|
public ObHTableInfo(Configuration configuration) {
|
||||||
|
this.nullModeType = NullModeType.getByTypeName(configuration.getString(ConfigKey.NULL_MODE, Constant.DEFAULT_NULL_MODE));
|
||||||
|
this.encoding = configuration.getString(ConfigKey.ENCODING, Constant.DEFAULT_ENCODING);
|
||||||
|
this.columns = configuration.getListConfiguration(ConfigKey.COLUMN);
|
||||||
|
this.indexColumnInfoMap = new LinkedHashMap<>();
|
||||||
|
configuration.getListConfiguration(ConfigKey.COLUMN).forEach(e -> {
|
||||||
|
String[] name = e.getString(ConfigKey.NAME).split(":");
|
||||||
|
indexColumnInfoMap.put(e.getInt(ConfigKey.INDEX), Triple.of(name[0], name[1], ColumnType.getByTypeName(e.getString(ConfigKey.TYPE)))
|
||||||
|
);
|
||||||
|
});
|
||||||
|
|
||||||
|
this.rowKeyElementList = new ArrayList<>();
|
||||||
|
configuration.getListConfiguration(ConfigKey.ROWKEY_COLUMN).forEach(e -> {
|
||||||
|
Integer index = e.getInt(ConfigKey.INDEX);
|
||||||
|
String constantValue = e.getString(ConfigKey.VALUE);
|
||||||
|
ColumnType columnType = ColumnType.getByTypeName(e.getString(ConfigKey.TYPE));
|
||||||
|
rowKeyElementList.add(Triple.of(index, constantValue, columnType));
|
||||||
|
|
||||||
|
});
|
||||||
|
|
||||||
|
this.tableName = configuration.getString(Key.TABLE);
|
||||||
|
this.fullHbaseTableName = tableName;
|
||||||
|
if (!fullHbaseTableName.contains("$")) {
|
||||||
|
String name = columns.get(0).getString(ConfigKey.NAME);
|
||||||
|
String familyName = name.split(":")[0];
|
||||||
|
fullHbaseTableName = fullHbaseTableName + "$" + familyName;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getTableName() {
|
||||||
|
return tableName;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getFullHbaseTableName() {
|
||||||
|
return fullHbaseTableName;
|
||||||
|
}
|
||||||
|
|
||||||
|
public NullModeType getNullModeType() {
|
||||||
|
return nullModeType;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getEncoding() {
|
||||||
|
return encoding;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Map<Integer, Triple<String, String, ColumnType>> getIndexColumnInfoMap() {
|
||||||
|
return indexColumnInfoMap;
|
||||||
|
}
|
||||||
|
|
||||||
|
public List<Triple<Integer, String, ColumnType>> getRowKeyElementList() {
|
||||||
|
return rowKeyElementList;
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,267 @@
|
|||||||
|
package com.alibaba.datax.plugin.writer.obhbasewriter;
|
||||||
|
|
||||||
|
import static com.alibaba.datax.plugin.writer.obhbasewriter.Constant.DEFAULT_OB_TABLE_CLIENT_LOG_LEVEL;
|
||||||
|
import static com.alibaba.datax.plugin.writer.obhbasewriter.Constant.DEFAULT_OB_TABLE_HBASE_LOG_LEVEL;
|
||||||
|
import static com.alibaba.datax.plugin.writer.obhbasewriter.Constant.OB_COM_ALIPAY_TABLE_CLIENT_LOG_LEVEL;
|
||||||
|
import static com.alibaba.datax.plugin.writer.obhbasewriter.Constant.OB_COM_ALIPAY_TABLE_HBASE_LOG_LEVEL;
|
||||||
|
import static com.alibaba.datax.plugin.writer.obhbasewriter.Constant.OB_HBASE_LOG_PATH;
|
||||||
|
import static com.alibaba.datax.plugin.writer.obhbasewriter.Constant.OB_TABLE_CLIENT_LOG_LEVEL;
|
||||||
|
import static com.alibaba.datax.plugin.writer.obhbasewriter.Constant.OB_TABLE_CLIENT_PROPERTY;
|
||||||
|
import static com.alibaba.datax.plugin.writer.obhbasewriter.Constant.OB_TABLE_HBASE_LOG_LEVEL;
|
||||||
|
import static com.alibaba.datax.plugin.writer.obhbasewriter.Constant.OB_TABLE_HBASE_PROPERTY;
|
||||||
|
|
||||||
|
import com.alibaba.datax.common.exception.DataXException;
|
||||||
|
import com.alibaba.datax.common.plugin.RecordReceiver;
|
||||||
|
import com.alibaba.datax.common.spi.Writer;
|
||||||
|
import com.alibaba.datax.common.util.Configuration;
|
||||||
|
import com.alibaba.datax.plugin.rdbms.reader.util.ObVersion;
|
||||||
|
import com.alibaba.datax.plugin.rdbms.util.DBUtil;
|
||||||
|
import com.alibaba.datax.plugin.rdbms.util.DataBaseType;
|
||||||
|
import com.alibaba.datax.plugin.rdbms.writer.CommonRdbmsWriter;
|
||||||
|
import com.alibaba.datax.plugin.rdbms.writer.Key;
|
||||||
|
import com.alibaba.datax.plugin.writer.obhbasewriter.ext.ServerConnectInfo;
|
||||||
|
import com.alibaba.datax.plugin.writer.obhbasewriter.task.ObHBaseWriteTask;
|
||||||
|
import com.google.common.base.Preconditions;
|
||||||
|
import java.sql.Connection;
|
||||||
|
import java.sql.PreparedStatement;
|
||||||
|
import java.sql.ResultSet;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.concurrent.TimeUnit;
|
||||||
|
import org.apache.commons.lang3.StringUtils;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
import java.util.ArrayList;
|
||||||
|
|
||||||
|
/**
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
public class ObHbaseWriter extends Writer {
|
||||||
|
/**
|
||||||
|
* Job 中的方法仅执行一次,Task 中方法会由框架启动多个 Task 线程并行执行。
|
||||||
|
* <p/>
|
||||||
|
* 整个 Writer 执行流程是:
|
||||||
|
*
|
||||||
|
* <pre>
|
||||||
|
* Job类init-->prepare-->split
|
||||||
|
*
|
||||||
|
* Task类init-->prepare-->startWrite-->post-->destroy
|
||||||
|
* Task类init-->prepare-->startWrite-->post-->destroy
|
||||||
|
*
|
||||||
|
* Job类post-->destroy
|
||||||
|
* </pre>
|
||||||
|
*/
|
||||||
|
public static class Job extends Writer.Job {
|
||||||
|
private Configuration originalConfig = null;
|
||||||
|
private static final Logger LOG = LoggerFactory.getLogger(Job.class);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 注意:此方法仅执行一次。 最佳实践:通常在这里对用户的配置进行校验:是否缺失必填项?有无错误值?有没有无关配置项?...
|
||||||
|
* 并给出清晰的报错/警告提示。校验通常建议采用静态工具类进行,以保证本类结构清晰。
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public void init() {
|
||||||
|
if (System.getProperty(OB_TABLE_CLIENT_PROPERTY) == null) {
|
||||||
|
LOG.info(OB_TABLE_CLIENT_PROPERTY + " not set");
|
||||||
|
System.setProperty(OB_TABLE_CLIENT_PROPERTY, OB_HBASE_LOG_PATH);
|
||||||
|
}
|
||||||
|
if (System.getProperty(OB_TABLE_HBASE_PROPERTY) == null) {
|
||||||
|
LOG.info(OB_TABLE_HBASE_PROPERTY + " not set");
|
||||||
|
System.setProperty(OB_TABLE_HBASE_PROPERTY, OB_HBASE_LOG_PATH);
|
||||||
|
}
|
||||||
|
if (System.getProperty(OB_TABLE_CLIENT_LOG_LEVEL) == null) {
|
||||||
|
LOG.info(OB_TABLE_CLIENT_LOG_LEVEL + " not set");
|
||||||
|
System.setProperty(OB_TABLE_CLIENT_LOG_LEVEL, DEFAULT_OB_TABLE_CLIENT_LOG_LEVEL);
|
||||||
|
}
|
||||||
|
if (System.getProperty(OB_TABLE_HBASE_LOG_LEVEL) == null) {
|
||||||
|
LOG.info(OB_TABLE_HBASE_LOG_LEVEL + " not set");
|
||||||
|
System.setProperty(OB_TABLE_HBASE_LOG_LEVEL, DEFAULT_OB_TABLE_HBASE_LOG_LEVEL);
|
||||||
|
}
|
||||||
|
if (System.getProperty(OB_COM_ALIPAY_TABLE_CLIENT_LOG_LEVEL) == null) {
|
||||||
|
LOG.info(OB_COM_ALIPAY_TABLE_CLIENT_LOG_LEVEL + " not set");
|
||||||
|
System.setProperty(OB_COM_ALIPAY_TABLE_CLIENT_LOG_LEVEL, DEFAULT_OB_TABLE_CLIENT_LOG_LEVEL);
|
||||||
|
}
|
||||||
|
if (System.getProperty(OB_COM_ALIPAY_TABLE_HBASE_LOG_LEVEL) == null) {
|
||||||
|
LOG.info(OB_COM_ALIPAY_TABLE_HBASE_LOG_LEVEL + " not set");
|
||||||
|
System.setProperty(OB_COM_ALIPAY_TABLE_HBASE_LOG_LEVEL, DEFAULT_OB_TABLE_HBASE_LOG_LEVEL);
|
||||||
|
}
|
||||||
|
|
||||||
|
LOG.info("{} is set to {}, {} is set to {}",
|
||||||
|
OB_TABLE_CLIENT_PROPERTY, OB_HBASE_LOG_PATH, OB_TABLE_HBASE_PROPERTY, OB_HBASE_LOG_PATH);
|
||||||
|
this.originalConfig = super.getPluginJobConf();
|
||||||
|
boolean useOdpMode = originalConfig.getBool(ConfigKey.USE_ODP_MODE, false);
|
||||||
|
String configUrl = originalConfig.getString(ConfigKey.OBCONFIG_URL, null);
|
||||||
|
String jdbcUrl = originalConfig.getString(ConfigKey.JDBC_URL, null);
|
||||||
|
jdbcUrl = DataBaseType.MySql.appendJDBCSuffixForReader(jdbcUrl);
|
||||||
|
String user = originalConfig.getString(Key.USERNAME, null);
|
||||||
|
String password = originalConfig.getString(Key.PASSWORD);
|
||||||
|
ServerConnectInfo serverConnectInfo = new ServerConnectInfo(jdbcUrl, user, password);
|
||||||
|
if (useOdpMode) {
|
||||||
|
originalConfig.set(ConfigKey.ODP_HOST, serverConnectInfo.host);
|
||||||
|
originalConfig.set(ConfigKey.ODP_PORT, serverConnectInfo.port);
|
||||||
|
} else if (StringUtils.isBlank(configUrl)) {
|
||||||
|
serverConnectInfo.setSysUser(originalConfig.getString(ConfigKey.OB_SYS_USER));
|
||||||
|
serverConnectInfo.setSysPass(originalConfig.getString(ConfigKey.OB_SYS_PASSWORD));
|
||||||
|
try {
|
||||||
|
originalConfig.set(ConfigKey.OBCONFIG_URL, queryRsUrl(serverConnectInfo));
|
||||||
|
originalConfig.set(ConfigKey.OB_SYS_USER, serverConnectInfo.sysUser);
|
||||||
|
originalConfig.set(ConfigKey.OB_SYS_PASSWORD, serverConnectInfo.sysPass);
|
||||||
|
LOG.info("fetch configUrl success, configUrl is {}", configUrl);
|
||||||
|
} catch (Exception e) {
|
||||||
|
LOG.error("fail to get configure url: " + e.getMessage());
|
||||||
|
throw DataXException.asDataXException(Hbase094xWriterErrorCode.REQUIRED_VALUE, "Missing obConfigUrl");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (StringUtils.isBlank(originalConfig.getString(ConfigKey.DBNAME))) {
|
||||||
|
originalConfig.set(ConfigKey.DBNAME, serverConnectInfo.databaseName);
|
||||||
|
}
|
||||||
|
ConfigValidator.validateParameter(this.originalConfig);
|
||||||
|
}
|
||||||
|
|
||||||
|
private String queryRsUrl(ServerConnectInfo serverInfo) {
|
||||||
|
String configUrl = originalConfig.getString(ConfigKey.OBCONFIG_URL, null);
|
||||||
|
if (configUrl == null) {
|
||||||
|
try {
|
||||||
|
Connection conn = null;
|
||||||
|
int retry = 0;
|
||||||
|
final String sysJDBCUrl = serverInfo.jdbcUrl.replace(serverInfo.databaseName, "oceanbase");
|
||||||
|
do {
|
||||||
|
try {
|
||||||
|
if (retry > 0) {
|
||||||
|
int sleep = retry > 9 ? 500 : 1 << retry;
|
||||||
|
try {
|
||||||
|
TimeUnit.SECONDS.sleep(sleep);
|
||||||
|
} catch (InterruptedException e) {
|
||||||
|
}
|
||||||
|
LOG.warn("retry fetch RsUrl the {} times", retry);
|
||||||
|
}
|
||||||
|
conn = DBUtil.getConnection(DataBaseType.OceanBase, sysJDBCUrl, serverInfo.sysUser, serverInfo.sysPass);
|
||||||
|
String sql = "show parameters like 'obconfig_url'";
|
||||||
|
LOG.info("query param: {}", sql);
|
||||||
|
PreparedStatement stmt = conn.prepareStatement(sql);
|
||||||
|
ResultSet result = stmt.executeQuery();
|
||||||
|
if (result.next()) {
|
||||||
|
configUrl = result.getString("Value");
|
||||||
|
}
|
||||||
|
if (StringUtils.isNotBlank(configUrl)) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
} catch (Exception e) {
|
||||||
|
++retry;
|
||||||
|
LOG.warn("fetch root server list(rsList) error {}", e.getMessage());
|
||||||
|
} finally {
|
||||||
|
DBUtil.closeDBResources(null, conn);
|
||||||
|
}
|
||||||
|
} while (retry < 3);
|
||||||
|
|
||||||
|
LOG.info("configure url is: " + configUrl);
|
||||||
|
originalConfig.set(ConfigKey.OBCONFIG_URL, configUrl);
|
||||||
|
} catch (Exception e) {
|
||||||
|
LOG.error("Fail to get configure url: {}", e.getMessage(), e);
|
||||||
|
throw DataXException.asDataXException(Hbase094xWriterErrorCode.REQUIRED_VALUE, "未配置obConfigUrl,且无法获取obConfigUrl");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return configUrl;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 注意:此方法仅执行一次。 最佳实践:如果 Job 中有需要进行数据同步之前的处理,可以在此处完成,如果没有必要则可以直接去掉。
|
||||||
|
*/
|
||||||
|
// 一般来说,是需要推迟到 task 中进行pre 的执行(单表情况例外)
|
||||||
|
@Override
|
||||||
|
public void prepare() {
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 注意:此方法仅执行一次。 最佳实践:通常采用工具静态类完成把 Job 配置切分成多个 Task 配置的工作。 这里的
|
||||||
|
* mandatoryNumber 是强制必须切分的份数。
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public List<Configuration> split(int mandatoryNumber) {
|
||||||
|
// This function does not need any change.
|
||||||
|
Configuration simplifiedConf = this.originalConfig;
|
||||||
|
|
||||||
|
List<Configuration> splitResultConfigs = new ArrayList<Configuration>();
|
||||||
|
for (int j = 0; j < mandatoryNumber; j++) {
|
||||||
|
splitResultConfigs.add(simplifiedConf.clone());
|
||||||
|
}
|
||||||
|
return splitResultConfigs;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 注意:此方法仅执行一次。 最佳实践:如果 Job 中有需要进行数据同步之后的后续处理,可以在此处完成。
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public void post() {
|
||||||
|
// No post supported
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 注意:此方法仅执行一次。 最佳实践:通常配合 Job 中的 post() 方法一起完成 Job 的资源释放。
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public void destroy() {
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public static class Task extends Writer.Task {
|
||||||
|
private Configuration taskConfig;
|
||||||
|
private CommonRdbmsWriter.Task writerTask;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 注意:此方法每个 Task 都会执行一次。 最佳实践:此处通过对 taskConfig 配置的读取,进而初始化一些资源为
|
||||||
|
* startWrite()做准备。
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public void init() {
|
||||||
|
this.taskConfig = super.getPluginJobConf();
|
||||||
|
String mode = this.taskConfig.getString(ConfigKey.MODE);
|
||||||
|
ModeType modeType = ModeType.getByTypeName(mode);
|
||||||
|
|
||||||
|
switch (modeType) {
|
||||||
|
case Normal:
|
||||||
|
try {
|
||||||
|
this.writerTask = new ObHBaseWriteTask(this.taskConfig);
|
||||||
|
} catch (Exception e) {
|
||||||
|
throw DataXException.asDataXException(Hbase094xWriterErrorCode.INIT_ERROR, "ObHbase writer init error:" + e.getMessage());
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
throw DataXException.asDataXException(Hbase094xWriterErrorCode.ILLEGAL_VALUE, "ObHbase not support this mode type:" + modeType);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 注意:此方法每个 Task 都会执行一次。 最佳实践:如果 Task
|
||||||
|
* 中有需要进行数据同步之前的处理,可以在此处完成,如果没有必要则可以直接去掉。
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public void prepare() {
|
||||||
|
this.writerTask.prepare(taskConfig);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 注意:此方法每个 Task 都会执行一次。 最佳实践:此处适当封装确保简洁清晰完成数据写入工作。
|
||||||
|
*/
|
||||||
|
public void startWrite(RecordReceiver recordReceiver) {
|
||||||
|
this.writerTask.startWrite(recordReceiver, taskConfig, super.getTaskPluginCollector());
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 注意:此方法每个 Task 都会执行一次。 最佳实践:如果 Task 中有需要进行数据同步之后的后续处理,可以在此处完成。
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public void post() {
|
||||||
|
this.writerTask.post(taskConfig);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 注意:此方法每个 Task 都会执行一次。 最佳实践:通常配合Task 中的 post() 方法一起完成 Task 的资源释放。
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public void destroy() {
|
||||||
|
this.writerTask.destroy(taskConfig);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1 @@
|
|||||||
|
databasewriterbuffer.1=\u901a\u8fc7\u89c4\u5219\u8ba1\u7b97\u51fa\u6765\u7684table\u4e0d\u5b58\u5728, \u7b97\u51fa\u7684tableName={0},db={1}, \u8bf7\u68c0\u67e5\u60a8\u914d\u7f6e\u7684\u89c4\u5219.
|
@ -0,0 +1 @@
|
|||||||
|
databasewriterbuffer.1=The [table] calculated based on the rules does not exist. The calculated [tableName]={0}, [db]={1}. Please check the rules you configured.
|
@ -0,0 +1 @@
|
|||||||
|
databasewriterbuffer.1=\u901a\u8fc7\u89c4\u5219\u8ba1\u7b97\u51fa\u6765\u7684table\u4e0d\u5b58\u5728, \u7b97\u51fa\u7684tableName={0},db={1}, \u8bf7\u68c0\u67e5\u60a8\u914d\u7f6e\u7684\u89c4\u5219.
|
@ -0,0 +1 @@
|
|||||||
|
databasewriterbuffer.1=\u901a\u8fc7\u89c4\u5219\u8ba1\u7b97\u51fa\u6765\u7684table\u4e0d\u5b58\u5728, \u7b97\u51fa\u7684tableName={0},db={1}, \u8bf7\u68c0\u67e5\u60a8\u914d\u7f6e\u7684\u89c4\u5219.
|
@ -0,0 +1 @@
|
|||||||
|
databasewriterbuffer.1=\u901a\u8fc7\u89c4\u5219\u8ba1\u7b97\u51fa\u6765\u7684table\u4e0d\u5b58\u5728, \u7b97\u51fa\u7684tableName={0},db={1}, \u8bf7\u68c0\u67e5\u60a8\u914d\u7f6e\u7684\u89c4\u5219.databasewriterbuffer.1=通過規則計算出來的table不存在, 算出的tableName={0},db={1}, 請檢查您配置的規則.
|
@ -0,0 +1 @@
|
|||||||
|
databasewriterbuffer.1=\u901a\u8fc7\u89c4\u5219\u8ba1\u7b97\u51fa\u6765\u7684table\u4e0d\u5b58\u5728, \u7b97\u51fa\u7684tableName={0},db={1}, \u8bf7\u68c0\u67e5\u60a8\u914d\u7f6e\u7684\u89c4\u5219.databasewriterbuffer.1=通過規則計算出來的table不存在, 算出的tableName={0},db={1}, 請檢查您配置的規則.
|
@ -0,0 +1,30 @@
|
|||||||
|
package com.alibaba.datax.plugin.writer.obhbasewriter.ext;
|
||||||
|
|
||||||
|
import com.alibaba.datax.common.spi.ErrorCode;
|
||||||
|
|
||||||
|
public enum ObDataSourceErrorCode implements ErrorCode {
|
||||||
|
DESC("ObDataSourceError code", "connect error");
|
||||||
|
|
||||||
|
private final String code;
|
||||||
|
private final String describe;
|
||||||
|
|
||||||
|
private ObDataSourceErrorCode(String code, String describe) {
|
||||||
|
this.code = code;
|
||||||
|
this.describe = describe;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String getCode() {
|
||||||
|
return this.code;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String getDescription() {
|
||||||
|
return this.describe;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String toString() {
|
||||||
|
return String.format("Code:[%s], Describe:[%s]. ", this.code, this.describe);
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,60 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (c) 2021 OceanBase ob-loader-dumper is licensed under Mulan PSL v2. You can use this software according to
|
||||||
|
* the terms and conditions of the Mulan PSL v2. You may obtain a copy of Mulan PSL v2 at:
|
||||||
|
*
|
||||||
|
* http://license.coscl.org.cn/MulanPSL2
|
||||||
|
*
|
||||||
|
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING
|
||||||
|
* BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. See the Mulan PSL v2 for more
|
||||||
|
* details.
|
||||||
|
*/
|
||||||
|
package com.alibaba.datax.plugin.writer.obhbasewriter.ext;
|
||||||
|
|
||||||
|
import com.alibaba.datax.common.exception.DataXException;
|
||||||
|
import com.alibaba.datax.plugin.writer.obhbasewriter.Hbase094xWriterErrorCode;
|
||||||
|
import com.alipay.oceanbase.hbase.OHTable;
|
||||||
|
import org.apache.hadoop.conf.Configuration;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @author cjyyz
|
||||||
|
* @date 2023/03/16
|
||||||
|
* @since
|
||||||
|
*/
|
||||||
|
public class ObHbaseTableHolder {
|
||||||
|
private static final Logger LOG = LoggerFactory.getLogger(ObHbaseTableHolder.class);
|
||||||
|
|
||||||
|
private Configuration configuration;
|
||||||
|
|
||||||
|
private String hbaseTableName;
|
||||||
|
|
||||||
|
private OHTable ohTable;
|
||||||
|
|
||||||
|
public ObHbaseTableHolder(Configuration configuration, String hbaseTableName) {
|
||||||
|
this.configuration = configuration;
|
||||||
|
this.hbaseTableName = hbaseTableName;
|
||||||
|
}
|
||||||
|
|
||||||
|
public OHTable getOhTable() {
|
||||||
|
try {
|
||||||
|
if (ohTable == null) {
|
||||||
|
ohTable = new OHTable(configuration, hbaseTableName);
|
||||||
|
}
|
||||||
|
return ohTable;
|
||||||
|
} catch (Exception e) {
|
||||||
|
LOG.error("build obHTable: {} failed. reason: {}", hbaseTableName, e.getMessage());
|
||||||
|
throw DataXException.asDataXException(Hbase094xWriterErrorCode.GET_HBASE_TABLE_ERROR, Hbase094xWriterErrorCode.GET_HBASE_TABLE_ERROR.getDescription());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public void destroy() {
|
||||||
|
try {
|
||||||
|
if (ohTable != null) {
|
||||||
|
ohTable.close();
|
||||||
|
}
|
||||||
|
} catch (Exception e) {
|
||||||
|
LOG.warn("error in closing htable: {}. Reason: {}", hbaseTableName, e.getMessage());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,146 @@
|
|||||||
|
package com.alibaba.datax.plugin.writer.obhbasewriter.ext;
|
||||||
|
|
||||||
|
import com.google.common.base.Preconditions;
|
||||||
|
import java.util.regex.Matcher;
|
||||||
|
import java.util.regex.Pattern;
|
||||||
|
import static org.apache.commons.lang3.StringUtils.EMPTY;
|
||||||
|
|
||||||
|
public class ServerConnectInfo {
|
||||||
|
|
||||||
|
public String clusterName;
|
||||||
|
public String tenantName;
|
||||||
|
// userName doesn't contain tenantName or clusterName
|
||||||
|
public String userName;
|
||||||
|
public String password;
|
||||||
|
public String databaseName;
|
||||||
|
public String ipPort;
|
||||||
|
public String jdbcUrl;
|
||||||
|
public String host;
|
||||||
|
public String port;
|
||||||
|
public boolean publicCloud;
|
||||||
|
public int rpcPort;
|
||||||
|
public String sysUser;
|
||||||
|
public String sysPass;
|
||||||
|
|
||||||
|
/**
|
||||||
|
*
|
||||||
|
* @param jdbcUrl format is jdbc:oceanbase//ip:port
|
||||||
|
* @param username format is cluster:tenant:username or username@tenant#cluster or user@tenant or user
|
||||||
|
* @param password
|
||||||
|
*/
|
||||||
|
public ServerConnectInfo(final String jdbcUrl, final String username, final String password) {
|
||||||
|
this(jdbcUrl, username, password, null, null);
|
||||||
|
}
|
||||||
|
|
||||||
|
public ServerConnectInfo(final String jdbcUrl, final String username, final String password, final String sysUser, final String sysPass) {
|
||||||
|
if (jdbcUrl.startsWith(com.alibaba.datax.plugin.rdbms.writer.Constant.OB10_SPLIT_STRING)) {
|
||||||
|
String[] ss = jdbcUrl.split(com.alibaba.datax.plugin.rdbms.writer.Constant.OB10_SPLIT_STRING_PATTERN);
|
||||||
|
Preconditions.checkArgument(ss.length == 3, "jdbc url format is not correct:" + jdbcUrl);
|
||||||
|
this.userName = username;
|
||||||
|
this.clusterName = ss[1].trim().split(":")[0];
|
||||||
|
this.tenantName = ss[1].trim().split(":")[1];
|
||||||
|
this.jdbcUrl = ss[2];
|
||||||
|
} else {
|
||||||
|
this.jdbcUrl = jdbcUrl;
|
||||||
|
}
|
||||||
|
this.password = password;
|
||||||
|
this.sysUser = sysUser;
|
||||||
|
this.sysPass = sysPass;
|
||||||
|
parseJdbcUrl(jdbcUrl);
|
||||||
|
parseFullUserName(username);
|
||||||
|
}
|
||||||
|
|
||||||
|
private void parseJdbcUrl(final String jdbcUrl) {
|
||||||
|
Pattern pattern = Pattern.compile("//([\\w\\.\\-]+:\\d+)/([\\w-]+)\\?");
|
||||||
|
Matcher matcher = pattern.matcher(jdbcUrl);
|
||||||
|
if (matcher.find()) {
|
||||||
|
String ipPort = matcher.group(1);
|
||||||
|
String dbName = matcher.group(2);
|
||||||
|
this.ipPort = ipPort;
|
||||||
|
String[] hostPort = ipPort.split(":");
|
||||||
|
this.host = hostPort[0];
|
||||||
|
this.port = hostPort[1];
|
||||||
|
this.databaseName = dbName;
|
||||||
|
this.publicCloud = host.endsWith("aliyuncs.com");
|
||||||
|
} else {
|
||||||
|
throw new RuntimeException("Invalid argument:" + jdbcUrl);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void parseFullUserName(final String fullUserName) {
|
||||||
|
int tenantIndex = fullUserName.indexOf("@");
|
||||||
|
int clusterIndex = fullUserName.indexOf("#");
|
||||||
|
// 适用于jdbcUrl以||_dsc_ob10_dsc_开头的场景
|
||||||
|
if (fullUserName.contains(":") && tenantIndex < 0) {
|
||||||
|
String[] names = fullUserName.split(":");
|
||||||
|
if (names.length != 3) {
|
||||||
|
throw new RuntimeException("invalid argument: " + fullUserName);
|
||||||
|
} else {
|
||||||
|
this.clusterName = names[0];
|
||||||
|
this.tenantName = names[1];
|
||||||
|
this.userName = names[2];
|
||||||
|
}
|
||||||
|
} else if (tenantIndex < 0) {
|
||||||
|
// 适用于short jdbcUrl,且username中不含租户名(主要是公有云场景,此场景下不计算分区)
|
||||||
|
this.userName = fullUserName;
|
||||||
|
this.clusterName = EMPTY;
|
||||||
|
this.tenantName = EMPTY;
|
||||||
|
} else {
|
||||||
|
// 适用于short jdbcUrl,且username中含租户名
|
||||||
|
this.userName = fullUserName.substring(0, tenantIndex);
|
||||||
|
if (clusterIndex < 0) {
|
||||||
|
this.clusterName = EMPTY;
|
||||||
|
this.tenantName = fullUserName.substring(tenantIndex + 1);
|
||||||
|
} else {
|
||||||
|
this.clusterName = fullUserName.substring(clusterIndex + 1);
|
||||||
|
this.tenantName = fullUserName.substring(tenantIndex + 1, clusterIndex);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String toString() {
|
||||||
|
return "ServerConnectInfo{" +
|
||||||
|
"clusterName='" + clusterName + '\'' +
|
||||||
|
", tenantName='" + tenantName + '\'' +
|
||||||
|
", userName='" + userName + '\'' +
|
||||||
|
", password='" + password + '\'' +
|
||||||
|
", databaseName='" + databaseName + '\'' +
|
||||||
|
", ipPort='" + ipPort + '\'' +
|
||||||
|
", jdbcUrl='" + jdbcUrl + '\'' +
|
||||||
|
", publicCloud=" + publicCloud +
|
||||||
|
", rpcPort=" + rpcPort +
|
||||||
|
'}';
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getFullUserName() {
|
||||||
|
StringBuilder builder = new StringBuilder();
|
||||||
|
builder.append(userName);
|
||||||
|
if (publicCloud || (rpcPort != 0 && EMPTY.equals(clusterName))) {
|
||||||
|
return builder.toString();
|
||||||
|
}
|
||||||
|
if (!EMPTY.equals(tenantName)) {
|
||||||
|
builder.append("@").append(tenantName);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!EMPTY.equals(clusterName)) {
|
||||||
|
builder.append("#").append(clusterName);
|
||||||
|
}
|
||||||
|
if (EMPTY.equals(this.clusterName) && EMPTY.equals(this.tenantName)) {
|
||||||
|
return this.userName;
|
||||||
|
}
|
||||||
|
return builder.toString();
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setRpcPort(int rpcPort) {
|
||||||
|
this.rpcPort = rpcPort;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setSysUser(String sysUser) {
|
||||||
|
this.sysUser = sysUser;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setSysPass(String sysPass) {
|
||||||
|
this.sysPass = sysPass;
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,21 @@
|
|||||||
|
multitablewritertask.1=\u914d\u7f6e\u7684tableList\u4e3a\u591a\u8868\uff0c\u4f46\u672a\u914d\u7f6e\u5206\u8868\u89c4\u5219\uff0c\u8bf7\u68c0\u67e5\u60a8\u7684\u914d\u7f6e
|
||||||
|
multitablewritertask.2=\u914d\u7f6e\u7684\u591a\u5e93\u4e2d\u7684\u8868\u540d\u6709\u91cd\u590d\u7684\uff0c\u4f46\u672a\u914d\u7f6e\u5206\u5e93\u89c4\u5219\u548c\u5206\u8868\u89c4\u5219\uff0c\u8bf7\u68c0\u67e5\u60a8\u7684\u914d\u7f6e
|
||||||
|
multitablewritertask.3=\u914d\u7f6e\u7684\u6240\u6709\u8868\u540d\u90fd\u76f8\u540c\uff0c\u4f46\u672a\u914d\u7f6e\u5206\u5e93\u89c4\u5219\uff0c\u8bf7\u68c0\u67e5\u60a8\u7684\u914d\u7f6e
|
||||||
|
multitablewritertask.4=\u914d\u7f6e\u7684table\u548cdb\u540d\u79f0\u90fd\u76f8\u540c\uff0c\u6b64\u79cd\u56de\u6d41\u65b9\u5f0f\u4e0d\u652f\u6301
|
||||||
|
multitablewritertask.5=\u5217\u914d\u7f6e\u4fe1\u606f\u6709\u9519\u8bef. \u56e0\u4e3a\u60a8\u914d\u7f6e\u7684\u4efb\u52a1\u4e2d\uff0c\u6e90\u5934\u8bfb\u53d6\u5b57\u6bb5\u6570:{0} \u4e0e \u76ee\u7684\u8868\u8981\u5199\u5165\u7684\u5b57\u6bb5\u6570:{1} \u4e0d\u76f8\u7b49. \u8bf7\u68c0\u67e5\u60a8\u7684\u914d\u7f6e\u5e76\u4f5c\u51fa\u4fee\u6539.
|
||||||
|
multitablewritertask.6=\u901a\u8fc7\u89c4\u5219\u8ba1\u7b97\u51fa\u6765\u7684tableName\u67e5\u627e\u5bf9\u5e94\u7684db\u4e0d\u5b58\u5728\uff0ctableName={0}, \u8bf7\u68c0\u67e5\u60a8\u914d\u7f6e\u7684\u89c4\u5219.
|
||||||
|
multitablewritertask.7=\u901a\u8fc7\u89c4\u5219\u8ba1\u7b97\u51fa\u6765\u7684db\u548ctable\u4e0d\u5b58\u5728\uff0c\u7b97\u51fa\u7684dbName={0},tableName={1}, \u8bf7\u68c0\u67e5\u60a8\u914d\u7f6e\u7684\u89c4\u5219.
|
||||||
|
multitablewritertask.8=\u901a\u8fc7\u89c4\u5219\u8ba1\u7b97\u51fa\u6765\u7684db\u4e0d\u5b58\u5728\uff0c\u7b97\u51fa\u7684dbName={0}, \u8bf7\u68c0\u67e5\u60a8\u914d\u7f6e\u7684\u89c4\u5219.
|
||||||
|
multitablewritertask.9=\u901a\u8fc7\u89c4\u5219\u8ba1\u7b97\u51fa\u6765\u7684dbName[{0}], \u5b58\u5728\u591a\u5f20\u5206\u8868\uff0c\u8bf7\u914d\u7f6e\u60a8\u7684\u5206\u8868\u89c4\u5219.
|
||||||
|
multitablewritertask.10=\u9047\u5230OB\u81f4\u547d\u5f02\u5e38,\u56de\u6eda\u6b64\u6b21\u5199\u5165, \u4f11\u7720 5\u5206\u949f,SQLState:{0},ErrorCode:{1}
|
||||||
|
multitablewritertask.11=\u9047\u5230OB\u53ef\u6062\u590d\u5f02\u5e38,\u56de\u6eda\u6b64\u6b21\u5199\u5165, \u4f11\u7720 1\u5206\u949f,SQLState:{0},ErrorCode:{1}
|
||||||
|
multitablewritertask.12=\u9047\u5230OB\u5f02\u5e38,\u56de\u6eda\u6b64\u6b21\u5199\u5165, \u4f11\u7720 1\u79d2,\u91c7\u7528\u9010\u6761\u5199\u5165\u63d0\u4ea4,SQLState:{0},ErrorCode:{1}
|
||||||
|
multitablewritertask.13=\u9047\u5230OB\u5f02\u5e38,\u56de\u6eda\u6b64\u6b21\u5199\u5165, \u91c7\u7528\u9010\u6761\u5199\u5165\u63d0\u4ea4,SQLState:{0},ErrorCode:{1}
|
||||||
|
multitablewritertask.14=\u5199\u5165\u8868[{0}]\u5931\u8d25,\u4f11\u7720[{1}]\u6beb\u79d2,\u6570\u636e:{2}
|
||||||
|
multitablewritertask.15=\u5199\u5165\u8868[{0}]\u5b58\u5728\u810f\u6570\u636e,record={1}, \u5199\u5165\u5f02\u5e38\u4e3a:
|
||||||
|
|
||||||
|
|
||||||
|
singletablewritertask.1=\u9047\u5230OB\u81f4\u547d\u5f02\u5e38,\u56de\u6eda\u6b64\u6b21\u5199\u5165, \u4f11\u7720 5\u5206\u949f,SQLState:{0},ErrorCode:{1}
|
||||||
|
singletablewritertask.2=\u9047\u5230OB\u53ef\u6062\u590d\u5f02\u5e38,\u56de\u6eda\u6b64\u6b21\u5199\u5165, \u4f11\u7720 1\u5206\u949f,SQLState:{0},ErrorCode:{1}
|
||||||
|
singletablewritertask.3=\u9047\u5230OB\u5f02\u5e38,\u56de\u6eda\u6b64\u6b21\u5199\u5165, \u4f11\u7720 1\u79d2,\u91c7\u7528\u9010\u6761\u5199\u5165\u63d0\u4ea4,SQLState:{0},ErrorCode:{1}
|
||||||
|
singletablewritertask.4=\u9047\u5230OB\u5f02\u5e38,\u56de\u6eda\u6b64\u6b21\u5199\u5165, \u91c7\u7528\u9010\u6761\u5199\u5165\u63d0\u4ea4,SQLState:{0},ErrorCode:{1}
|
@ -0,0 +1,21 @@
|
|||||||
|
multitablewritertask.1=The configured [tableList] contains multiple tables but no table splitting rules have been configured. Please check your configuration.
|
||||||
|
multitablewritertask.2=There are repeated table names in the multiple tables you configured, but no database or table splitting rules have been configured. Please check your configuration.
|
||||||
|
multitablewritertask.3=All configured tables share the same name, but no database splitting rules have been configured. Please check your configuration.
|
||||||
|
multitablewritertask.4=The configured table and database share the same name. This back-to-source method is not supported.
|
||||||
|
multitablewritertask.5=Error in column configuration information. In your configured tasks, the number of source fields to be read: {0} and the number of fields to be written to the target table: {1} are not equivalent. Please check your configuration and make corrections.
|
||||||
|
multitablewritertask.6=The database that corresponds to the [tableName] calculated based on the rules does not exist. The [tableName]={0}. Please check the rules you configured.
|
||||||
|
multitablewritertask.7=The database and [table] calculated based on the rules do not exist. The calculated [dbName]={0}, and [tableName]={1}. Please check the rules you configured.
|
||||||
|
multitablewritertask.8=The database calculated based on the rules does not exist. The calculated [dbName]={0}. Please check the rules you configured.
|
||||||
|
multitablewritertask.9=The [dbName] [{0}] calculated based on the rules contains multiple sub-tables. Please configure your table splitting rules.
|
||||||
|
multitablewritertask.10=Fatal exception in OB. Roll back this write and hibernate for five minutes. SQLState: {0}. ErrorCode: {1}
|
||||||
|
multitablewritertask.11=Recoverable exception in OB. Roll back this write and hibernate for one minute. SQLState: {0}. ErrorCode: {1}
|
||||||
|
multitablewritertask.12=Exception in OB. Roll back this write and hibernate for one second. Write and submit the records one by one. SQLState: {0}. ErrorCode: {1}
|
||||||
|
multitablewritertask.13=Exception in OB. Roll back this write. Write and submit the records one by one. SQLState: {0}. ErrorCode: {1}
|
||||||
|
multitablewritertask.14=Failed to write to table: [{0}]. Hibernate for [{1}] milliseconds. Data: {2}
|
||||||
|
multitablewritertask.15=writing table [{0}] contains dirty data. Record={1}. Writing exception is:
|
||||||
|
|
||||||
|
|
||||||
|
singletablewritertask.1=Fatal exception in OB. Roll back this write and hibernate for five minutes. SQLState: {0}. ErrorCode: {1}
|
||||||
|
singletablewritertask.2=Recoverable exception in OB. Roll back this write and hibernate for one minute. SQLState: {0}. ErrorCode: {1}
|
||||||
|
singletablewritertask.3=Exception in OB. Roll back this write and hibernate for one second. Write and submit the records one by one. SQLState: {0}. ErrorCode: {1}
|
||||||
|
singletablewritertask.4=Exception in OB. Roll back this write. Write and submit the records one by one. SQLState: {0}. ErrorCode: {1}
|
@ -0,0 +1,21 @@
|
|||||||
|
multitablewritertask.1=\u914d\u7f6e\u7684tableList\u4e3a\u591a\u8868\uff0c\u4f46\u672a\u914d\u7f6e\u5206\u8868\u89c4\u5219\uff0c\u8bf7\u68c0\u67e5\u60a8\u7684\u914d\u7f6e
|
||||||
|
multitablewritertask.2=\u914d\u7f6e\u7684\u591a\u5e93\u4e2d\u7684\u8868\u540d\u6709\u91cd\u590d\u7684\uff0c\u4f46\u672a\u914d\u7f6e\u5206\u5e93\u89c4\u5219\u548c\u5206\u8868\u89c4\u5219\uff0c\u8bf7\u68c0\u67e5\u60a8\u7684\u914d\u7f6e
|
||||||
|
multitablewritertask.3=\u914d\u7f6e\u7684\u6240\u6709\u8868\u540d\u90fd\u76f8\u540c\uff0c\u4f46\u672a\u914d\u7f6e\u5206\u5e93\u89c4\u5219\uff0c\u8bf7\u68c0\u67e5\u60a8\u7684\u914d\u7f6e
|
||||||
|
multitablewritertask.4=\u914d\u7f6e\u7684table\u548cdb\u540d\u79f0\u90fd\u76f8\u540c\uff0c\u6b64\u79cd\u56de\u6d41\u65b9\u5f0f\u4e0d\u652f\u6301
|
||||||
|
multitablewritertask.5=\u5217\u914d\u7f6e\u4fe1\u606f\u6709\u9519\u8bef. \u56e0\u4e3a\u60a8\u914d\u7f6e\u7684\u4efb\u52a1\u4e2d\uff0c\u6e90\u5934\u8bfb\u53d6\u5b57\u6bb5\u6570:{0} \u4e0e \u76ee\u7684\u8868\u8981\u5199\u5165\u7684\u5b57\u6bb5\u6570:{1} \u4e0d\u76f8\u7b49. \u8bf7\u68c0\u67e5\u60a8\u7684\u914d\u7f6e\u5e76\u4f5c\u51fa\u4fee\u6539.
|
||||||
|
multitablewritertask.6=\u901a\u8fc7\u89c4\u5219\u8ba1\u7b97\u51fa\u6765\u7684tableName\u67e5\u627e\u5bf9\u5e94\u7684db\u4e0d\u5b58\u5728\uff0ctableName={0}, \u8bf7\u68c0\u67e5\u60a8\u914d\u7f6e\u7684\u89c4\u5219.
|
||||||
|
multitablewritertask.7=\u901a\u8fc7\u89c4\u5219\u8ba1\u7b97\u51fa\u6765\u7684db\u548ctable\u4e0d\u5b58\u5728\uff0c\u7b97\u51fa\u7684dbName={0},tableName={1}, \u8bf7\u68c0\u67e5\u60a8\u914d\u7f6e\u7684\u89c4\u5219.
|
||||||
|
multitablewritertask.8=\u901a\u8fc7\u89c4\u5219\u8ba1\u7b97\u51fa\u6765\u7684db\u4e0d\u5b58\u5728\uff0c\u7b97\u51fa\u7684dbName={0}, \u8bf7\u68c0\u67e5\u60a8\u914d\u7f6e\u7684\u89c4\u5219.
|
||||||
|
multitablewritertask.9=\u901a\u8fc7\u89c4\u5219\u8ba1\u7b97\u51fa\u6765\u7684dbName[{0}], \u5b58\u5728\u591a\u5f20\u5206\u8868\uff0c\u8bf7\u914d\u7f6e\u60a8\u7684\u5206\u8868\u89c4\u5219.
|
||||||
|
multitablewritertask.10=\u9047\u5230OB\u81f4\u547d\u5f02\u5e38,\u56de\u6eda\u6b64\u6b21\u5199\u5165, \u4f11\u7720 5\u5206\u949f,SQLState:{0}
|
||||||
|
multitablewritertask.11=\u9047\u5230OB\u53ef\u6062\u590d\u5f02\u5e38,\u56de\u6eda\u6b64\u6b21\u5199\u5165, \u4f11\u7720 1\u5206\u949f,SQLState:{0}
|
||||||
|
multitablewritertask.12=\u9047\u5230OB\u5f02\u5e38,\u56de\u6eda\u6b64\u6b21\u5199\u5165, \u4f11\u7720 1\u79d2,\u91c7\u7528\u9010\u6761\u5199\u5165\u63d0\u4ea4,SQLState:{0}
|
||||||
|
multitablewritertask.13=\u9047\u5230OB\u5f02\u5e38,\u56de\u6eda\u6b64\u6b21\u5199\u5165, \u91c7\u7528\u9010\u6761\u5199\u5165\u63d0\u4ea4,SQLState:{0}
|
||||||
|
multitablewritertask.14=\u5199\u5165\u8868[{0}]\u5931\u8d25,\u4f11\u7720[{1}]\u6beb\u79d2,\u6570\u636e:{2}
|
||||||
|
multitablewritertask.15=\u5199\u5165\u8868[{0}]\u5b58\u5728\u810f\u6570\u636e,record={1}, \u5199\u5165\u5f02\u5e38\u4e3a:
|
||||||
|
|
||||||
|
|
||||||
|
singletablewritertask.1=\u9047\u5230OB\u81f4\u547d\u5f02\u5e38,\u56de\u6eda\u6b64\u6b21\u5199\u5165, \u4f11\u7720 5\u5206\u949f,SQLState:{0}
|
||||||
|
singletablewritertask.2=\u9047\u5230OB\u53ef\u6062\u590d\u5f02\u5e38,\u56de\u6eda\u6b64\u6b21\u5199\u5165, \u4f11\u7720 1\u5206\u949f,SQLState:{0}
|
||||||
|
singletablewritertask.3=\u9047\u5230OB\u5f02\u5e38,\u56de\u6eda\u6b64\u6b21\u5199\u5165, \u4f11\u7720 1\u79d2,\u91c7\u7528\u9010\u6761\u5199\u5165\u63d0\u4ea4,SQLState:{0}
|
||||||
|
singletablewritertask.4=\u9047\u5230OB\u5f02\u5e38,\u56de\u6eda\u6b64\u6b21\u5199\u5165, \u91c7\u7528\u9010\u6761\u5199\u5165\u63d0\u4ea4,SQLState:{0}
|
@ -0,0 +1,21 @@
|
|||||||
|
multitablewritertask.1=\u914d\u7f6e\u7684tableList\u4e3a\u591a\u8868\uff0c\u4f46\u672a\u914d\u7f6e\u5206\u8868\u89c4\u5219\uff0c\u8bf7\u68c0\u67e5\u60a8\u7684\u914d\u7f6e
|
||||||
|
multitablewritertask.2=\u914d\u7f6e\u7684\u591a\u5e93\u4e2d\u7684\u8868\u540d\u6709\u91cd\u590d\u7684\uff0c\u4f46\u672a\u914d\u7f6e\u5206\u5e93\u89c4\u5219\u548c\u5206\u8868\u89c4\u5219\uff0c\u8bf7\u68c0\u67e5\u60a8\u7684\u914d\u7f6e
|
||||||
|
multitablewritertask.3=\u914d\u7f6e\u7684\u6240\u6709\u8868\u540d\u90fd\u76f8\u540c\uff0c\u4f46\u672a\u914d\u7f6e\u5206\u5e93\u89c4\u5219\uff0c\u8bf7\u68c0\u67e5\u60a8\u7684\u914d\u7f6e
|
||||||
|
multitablewritertask.4=\u914d\u7f6e\u7684table\u548cdb\u540d\u79f0\u90fd\u76f8\u540c\uff0c\u6b64\u79cd\u56de\u6d41\u65b9\u5f0f\u4e0d\u652f\u6301
|
||||||
|
multitablewritertask.5=\u5217\u914d\u7f6e\u4fe1\u606f\u6709\u9519\u8bef. \u56e0\u4e3a\u60a8\u914d\u7f6e\u7684\u4efb\u52a1\u4e2d\uff0c\u6e90\u5934\u8bfb\u53d6\u5b57\u6bb5\u6570:{0} \u4e0e \u76ee\u7684\u8868\u8981\u5199\u5165\u7684\u5b57\u6bb5\u6570:{1} \u4e0d\u76f8\u7b49. \u8bf7\u68c0\u67e5\u60a8\u7684\u914d\u7f6e\u5e76\u4f5c\u51fa\u4fee\u6539.
|
||||||
|
multitablewritertask.6=\u901a\u8fc7\u89c4\u5219\u8ba1\u7b97\u51fa\u6765\u7684tableName\u67e5\u627e\u5bf9\u5e94\u7684db\u4e0d\u5b58\u5728\uff0ctableName={0}, \u8bf7\u68c0\u67e5\u60a8\u914d\u7f6e\u7684\u89c4\u5219.
|
||||||
|
multitablewritertask.7=\u901a\u8fc7\u89c4\u5219\u8ba1\u7b97\u51fa\u6765\u7684db\u548ctable\u4e0d\u5b58\u5728\uff0c\u7b97\u51fa\u7684dbName={0},tableName={1}, \u8bf7\u68c0\u67e5\u60a8\u914d\u7f6e\u7684\u89c4\u5219.
|
||||||
|
multitablewritertask.8=\u901a\u8fc7\u89c4\u5219\u8ba1\u7b97\u51fa\u6765\u7684db\u4e0d\u5b58\u5728\uff0c\u7b97\u51fa\u7684dbName={0}, \u8bf7\u68c0\u67e5\u60a8\u914d\u7f6e\u7684\u89c4\u5219.
|
||||||
|
multitablewritertask.9=\u901a\u8fc7\u89c4\u5219\u8ba1\u7b97\u51fa\u6765\u7684dbName[{0}], \u5b58\u5728\u591a\u5f20\u5206\u8868\uff0c\u8bf7\u914d\u7f6e\u60a8\u7684\u5206\u8868\u89c4\u5219.
|
||||||
|
multitablewritertask.10=\u9047\u5230OB\u81f4\u547d\u5f02\u5e38,\u56de\u6eda\u6b64\u6b21\u5199\u5165, \u4f11\u7720 5\u5206\u949f,SQLState:{0}
|
||||||
|
multitablewritertask.11=\u9047\u5230OB\u53ef\u6062\u590d\u5f02\u5e38,\u56de\u6eda\u6b64\u6b21\u5199\u5165, \u4f11\u7720 1\u5206\u949f,SQLState:{0}
|
||||||
|
multitablewritertask.12=\u9047\u5230OB\u5f02\u5e38,\u56de\u6eda\u6b64\u6b21\u5199\u5165, \u4f11\u7720 1\u79d2,\u91c7\u7528\u9010\u6761\u5199\u5165\u63d0\u4ea4,SQLState:{0}
|
||||||
|
multitablewritertask.13=\u9047\u5230OB\u5f02\u5e38,\u56de\u6eda\u6b64\u6b21\u5199\u5165, \u91c7\u7528\u9010\u6761\u5199\u5165\u63d0\u4ea4,SQLState:{0}
|
||||||
|
multitablewritertask.14=\u5199\u5165\u8868[{0}]\u5931\u8d25,\u4f11\u7720[{1}]\u6beb\u79d2,\u6570\u636e:{2}
|
||||||
|
multitablewritertask.15=\u5199\u5165\u8868[{0}]\u5b58\u5728\u810f\u6570\u636e,record={1}, \u5199\u5165\u5f02\u5e38\u4e3a:
|
||||||
|
|
||||||
|
|
||||||
|
singletablewritertask.1=\u9047\u5230OB\u81f4\u547d\u5f02\u5e38,\u56de\u6eda\u6b64\u6b21\u5199\u5165, \u4f11\u7720 5\u5206\u949f,SQLState:{0}
|
||||||
|
singletablewritertask.2=\u9047\u5230OB\u53ef\u6062\u590d\u5f02\u5e38,\u56de\u6eda\u6b64\u6b21\u5199\u5165, \u4f11\u7720 1\u5206\u949f,SQLState:{0}
|
||||||
|
singletablewritertask.3=\u9047\u5230OB\u5f02\u5e38,\u56de\u6eda\u6b64\u6b21\u5199\u5165, \u4f11\u7720 1\u79d2,\u91c7\u7528\u9010\u6761\u5199\u5165\u63d0\u4ea4,SQLState:{0}
|
||||||
|
singletablewritertask.4=\u9047\u5230OB\u5f02\u5e38,\u56de\u6eda\u6b64\u6b21\u5199\u5165, \u91c7\u7528\u9010\u6761\u5199\u5165\u63d0\u4ea4,SQLState:{0}
|
@ -0,0 +1,41 @@
|
|||||||
|
multitablewritertask.1=\u914d\u7f6e\u7684tableList\u4e3a\u591a\u8868\uff0c\u4f46\u672a\u914d\u7f6e\u5206\u8868\u89c4\u5219\uff0c\u8bf7\u68c0\u67e5\u60a8\u7684\u914d\u7f6e
|
||||||
|
multitablewritertask.2=\u914d\u7f6e\u7684\u591a\u5e93\u4e2d\u7684\u8868\u540d\u6709\u91cd\u590d\u7684\uff0c\u4f46\u672a\u914d\u7f6e\u5206\u5e93\u89c4\u5219\u548c\u5206\u8868\u89c4\u5219\uff0c\u8bf7\u68c0\u67e5\u60a8\u7684\u914d\u7f6e
|
||||||
|
multitablewritertask.3=\u914d\u7f6e\u7684\u6240\u6709\u8868\u540d\u90fd\u76f8\u540c\uff0c\u4f46\u672a\u914d\u7f6e\u5206\u5e93\u89c4\u5219\uff0c\u8bf7\u68c0\u67e5\u60a8\u7684\u914d\u7f6e
|
||||||
|
multitablewritertask.4=\u914d\u7f6e\u7684table\u548cdb\u540d\u79f0\u90fd\u76f8\u540c\uff0c\u6b64\u79cd\u56de\u6d41\u65b9\u5f0f\u4e0d\u652f\u6301
|
||||||
|
multitablewritertask.5=\u5217\u914d\u7f6e\u4fe1\u606f\u6709\u9519\u8bef. \u56e0\u4e3a\u60a8\u914d\u7f6e\u7684\u4efb\u52a1\u4e2d\uff0c\u6e90\u5934\u8bfb\u53d6\u5b57\u6bb5\u6570:{0} \u4e0e \u76ee\u7684\u8868\u8981\u5199\u5165\u7684\u5b57\u6bb5\u6570:{1} \u4e0d\u76f8\u7b49. \u8bf7\u68c0\u67e5\u60a8\u7684\u914d\u7f6e\u5e76\u4f5c\u51fa\u4fee\u6539.
|
||||||
|
multitablewritertask.6=\u901a\u8fc7\u89c4\u5219\u8ba1\u7b97\u51fa\u6765\u7684tableName\u67e5\u627e\u5bf9\u5e94\u7684db\u4e0d\u5b58\u5728\uff0ctableName={0}, \u8bf7\u68c0\u67e5\u60a8\u914d\u7f6e\u7684\u89c4\u5219.
|
||||||
|
multitablewritertask.7=\u901a\u8fc7\u89c4\u5219\u8ba1\u7b97\u51fa\u6765\u7684db\u548ctable\u4e0d\u5b58\u5728\uff0c\u7b97\u51fa\u7684dbName={0},tableName={1}, \u8bf7\u68c0\u67e5\u60a8\u914d\u7f6e\u7684\u89c4\u5219.
|
||||||
|
multitablewritertask.8=\u901a\u8fc7\u89c4\u5219\u8ba1\u7b97\u51fa\u6765\u7684db\u4e0d\u5b58\u5728\uff0c\u7b97\u51fa\u7684dbName={0}, \u8bf7\u68c0\u67e5\u60a8\u914d\u7f6e\u7684\u89c4\u5219.
|
||||||
|
multitablewritertask.9=\u901a\u8fc7\u89c4\u5219\u8ba1\u7b97\u51fa\u6765\u7684dbName[{0}], \u5b58\u5728\u591a\u5f20\u5206\u8868\uff0c\u8bf7\u914d\u7f6e\u60a8\u7684\u5206\u8868\u89c4\u5219.
|
||||||
|
multitablewritertask.10=\u9047\u5230OB\u81f4\u547d\u5f02\u5e38,\u56de\u6eda\u6b64\u6b21\u5199\u5165, \u4f11\u7720 5\u5206\u949f,SQLState:{0}
|
||||||
|
multitablewritertask.11=\u9047\u5230OB\u53ef\u6062\u590d\u5f02\u5e38,\u56de\u6eda\u6b64\u6b21\u5199\u5165, \u4f11\u7720 1\u5206\u949f,SQLState:{0}
|
||||||
|
multitablewritertask.12=\u9047\u5230OB\u5f02\u5e38,\u56de\u6eda\u6b64\u6b21\u5199\u5165, \u4f11\u7720 1\u79d2,\u91c7\u7528\u9010\u6761\u5199\u5165\u63d0\u4ea4,SQLState:{0}
|
||||||
|
multitablewritertask.13=\u9047\u5230OB\u5f02\u5e38,\u56de\u6eda\u6b64\u6b21\u5199\u5165, \u91c7\u7528\u9010\u6761\u5199\u5165\u63d0\u4ea4,SQLState:{0}
|
||||||
|
multitablewritertask.14=\u5199\u5165\u8868[{0}]\u5931\u8d25,\u4f11\u7720[{1}]\u6beb\u79d2,\u6570\u636e:{2}
|
||||||
|
multitablewritertask.15=\u5199\u5165\u8868[{0}]\u5b58\u5728\u810f\u6570\u636e,record={1}, \u5199\u5165\u5f02\u5e38\u4e3a:
|
||||||
|
|
||||||
|
|
||||||
|
singletablewritertask.1=\u9047\u5230OB\u81f4\u547d\u5f02\u5e38,\u56de\u6eda\u6b64\u6b21\u5199\u5165, \u4f11\u7720 5\u5206\u949f,SQLState:{0}
|
||||||
|
singletablewritertask.2=\u9047\u5230OB\u53ef\u6062\u590d\u5f02\u5e38,\u56de\u6eda\u6b64\u6b21\u5199\u5165, \u4f11\u7720 1\u5206\u949f,SQLState:{0}
|
||||||
|
singletablewritertask.3=\u9047\u5230OB\u5f02\u5e38,\u56de\u6eda\u6b64\u6b21\u5199\u5165, \u4f11\u7720 1\u79d2,\u91c7\u7528\u9010\u6761\u5199\u5165\u63d0\u4ea4,SQLState:{0}
|
||||||
|
singletablewritertask.4=\u9047\u5230OB\u5f02\u5e38,\u56de\u6eda\u6b64\u6b21\u5199\u5165, \u91c7\u7528\u9010\u6761\u5199\u5165\u63d0\u4ea4,SQLState:{0}multitablewritertask.1=配置的tableList為多表,但未配置分表規則,請檢查您的配置
|
||||||
|
multitablewritertask.2=配置的多庫中的表名有重複的,但未配置分庫規則和分表規則,請檢查您的配置
|
||||||
|
multitablewritertask.3=配置的所有表名都相同,但未配置分庫規則,請檢查您的配置
|
||||||
|
multitablewritertask.4=配置的table和db名稱都相同,此種回流方式不支援
|
||||||
|
multitablewritertask.5=列配置資訊有錯誤. 因為您配置的任務中,源頭讀取欄位數:{0}與 目的表要寫入的欄位數:{1}不相等. 請檢查您的配置並作出修改.
|
||||||
|
multitablewritertask.6=通過規則計算出來的tableName查找對應的db不存在,tableName={0}, 請檢查您配置的規則.
|
||||||
|
multitablewritertask.7=通過規則計算出來的db和table不存在,算出的dbName={0},tableName={1}, 請檢查您配置的規則.
|
||||||
|
multitablewritertask.8=通過規則計算出來的db不存在,算出的dbName={0}, 請檢查您配置的規則.
|
||||||
|
multitablewritertask.9=通過規則計算出來的dbName[{0}], 存在多張分表,請配置您的分表規則.
|
||||||
|
multitablewritertask.10=遇到OB致命異常,回滾此次寫入, 休眠 5分鐘,SQLState:{0}
|
||||||
|
multitablewritertask.11=遇到OB可恢復異常,回滾此次寫入, 休眠 1分鐘,SQLState:{0}
|
||||||
|
multitablewritertask.12=遇到OB異常,回滾此次寫入, 休眠 1秒,採用逐條寫入提交,SQLState:{0}
|
||||||
|
multitablewritertask.13=遇到OB異常,回滾此次寫入, 採用逐條寫入提交,SQLState:{0}
|
||||||
|
multitablewritertask.14=寫入表[{0}]失敗,休眠[{1}]毫秒,數據:{2}
|
||||||
|
multitablewritertask.15=寫入表[{0}]存在髒數據,record={1}, 寫入異常為:
|
||||||
|
|
||||||
|
|
||||||
|
singletablewritertask.1=遇到OB致命異常,回滾此次寫入, 休眠 5分鐘,SQLState:{0}
|
||||||
|
singletablewritertask.2=遇到OB可恢復異常,回滾此次寫入, 休眠 1分鐘,SQLState:{0}
|
||||||
|
singletablewritertask.3=遇到OB異常,回滾此次寫入, 休眠 1秒,採用逐條寫入提交,SQLState:{0}
|
||||||
|
singletablewritertask.4=遇到OB異常,回滾此次寫入, 採用逐條寫入提交,SQLState:{0}
|
@ -0,0 +1,41 @@
|
|||||||
|
multitablewritertask.1=\u914d\u7f6e\u7684tableList\u4e3a\u591a\u8868\uff0c\u4f46\u672a\u914d\u7f6e\u5206\u8868\u89c4\u5219\uff0c\u8bf7\u68c0\u67e5\u60a8\u7684\u914d\u7f6e
|
||||||
|
multitablewritertask.2=\u914d\u7f6e\u7684\u591a\u5e93\u4e2d\u7684\u8868\u540d\u6709\u91cd\u590d\u7684\uff0c\u4f46\u672a\u914d\u7f6e\u5206\u5e93\u89c4\u5219\u548c\u5206\u8868\u89c4\u5219\uff0c\u8bf7\u68c0\u67e5\u60a8\u7684\u914d\u7f6e
|
||||||
|
multitablewritertask.3=\u914d\u7f6e\u7684\u6240\u6709\u8868\u540d\u90fd\u76f8\u540c\uff0c\u4f46\u672a\u914d\u7f6e\u5206\u5e93\u89c4\u5219\uff0c\u8bf7\u68c0\u67e5\u60a8\u7684\u914d\u7f6e
|
||||||
|
multitablewritertask.4=\u914d\u7f6e\u7684table\u548cdb\u540d\u79f0\u90fd\u76f8\u540c\uff0c\u6b64\u79cd\u56de\u6d41\u65b9\u5f0f\u4e0d\u652f\u6301
|
||||||
|
multitablewritertask.5=\u5217\u914d\u7f6e\u4fe1\u606f\u6709\u9519\u8bef. \u56e0\u4e3a\u60a8\u914d\u7f6e\u7684\u4efb\u52a1\u4e2d\uff0c\u6e90\u5934\u8bfb\u53d6\u5b57\u6bb5\u6570:{0} \u4e0e \u76ee\u7684\u8868\u8981\u5199\u5165\u7684\u5b57\u6bb5\u6570:{1} \u4e0d\u76f8\u7b49. \u8bf7\u68c0\u67e5\u60a8\u7684\u914d\u7f6e\u5e76\u4f5c\u51fa\u4fee\u6539.
|
||||||
|
multitablewritertask.6=\u901a\u8fc7\u89c4\u5219\u8ba1\u7b97\u51fa\u6765\u7684tableName\u67e5\u627e\u5bf9\u5e94\u7684db\u4e0d\u5b58\u5728\uff0ctableName={0}, \u8bf7\u68c0\u67e5\u60a8\u914d\u7f6e\u7684\u89c4\u5219.
|
||||||
|
multitablewritertask.7=\u901a\u8fc7\u89c4\u5219\u8ba1\u7b97\u51fa\u6765\u7684db\u548ctable\u4e0d\u5b58\u5728\uff0c\u7b97\u51fa\u7684dbName={0},tableName={1}, \u8bf7\u68c0\u67e5\u60a8\u914d\u7f6e\u7684\u89c4\u5219.
|
||||||
|
multitablewritertask.8=\u901a\u8fc7\u89c4\u5219\u8ba1\u7b97\u51fa\u6765\u7684db\u4e0d\u5b58\u5728\uff0c\u7b97\u51fa\u7684dbName={0}, \u8bf7\u68c0\u67e5\u60a8\u914d\u7f6e\u7684\u89c4\u5219.
|
||||||
|
multitablewritertask.9=\u901a\u8fc7\u89c4\u5219\u8ba1\u7b97\u51fa\u6765\u7684dbName[{0}], \u5b58\u5728\u591a\u5f20\u5206\u8868\uff0c\u8bf7\u914d\u7f6e\u60a8\u7684\u5206\u8868\u89c4\u5219.
|
||||||
|
multitablewritertask.10=\u9047\u5230OB\u81f4\u547d\u5f02\u5e38,\u56de\u6eda\u6b64\u6b21\u5199\u5165, \u4f11\u7720 5\u5206\u949f,SQLState:{0}
|
||||||
|
multitablewritertask.11=\u9047\u5230OB\u53ef\u6062\u590d\u5f02\u5e38,\u56de\u6eda\u6b64\u6b21\u5199\u5165, \u4f11\u7720 1\u5206\u949f,SQLState:{0}
|
||||||
|
multitablewritertask.12=\u9047\u5230OB\u5f02\u5e38,\u56de\u6eda\u6b64\u6b21\u5199\u5165, \u4f11\u7720 1\u79d2,\u91c7\u7528\u9010\u6761\u5199\u5165\u63d0\u4ea4,SQLState:{0}
|
||||||
|
multitablewritertask.13=\u9047\u5230OB\u5f02\u5e38,\u56de\u6eda\u6b64\u6b21\u5199\u5165, \u91c7\u7528\u9010\u6761\u5199\u5165\u63d0\u4ea4,SQLState:{0}
|
||||||
|
multitablewritertask.14=\u5199\u5165\u8868[{0}]\u5931\u8d25,\u4f11\u7720[{1}]\u6beb\u79d2,\u6570\u636e:{2}
|
||||||
|
multitablewritertask.15=\u5199\u5165\u8868[{0}]\u5b58\u5728\u810f\u6570\u636e,record={1}, \u5199\u5165\u5f02\u5e38\u4e3a:
|
||||||
|
|
||||||
|
|
||||||
|
singletablewritertask.1=\u9047\u5230OB\u81f4\u547d\u5f02\u5e38,\u56de\u6eda\u6b64\u6b21\u5199\u5165, \u4f11\u7720 5\u5206\u949f,SQLState:{0}
|
||||||
|
singletablewritertask.2=\u9047\u5230OB\u53ef\u6062\u590d\u5f02\u5e38,\u56de\u6eda\u6b64\u6b21\u5199\u5165, \u4f11\u7720 1\u5206\u949f,SQLState:{0}
|
||||||
|
singletablewritertask.3=\u9047\u5230OB\u5f02\u5e38,\u56de\u6eda\u6b64\u6b21\u5199\u5165, \u4f11\u7720 1\u79d2,\u91c7\u7528\u9010\u6761\u5199\u5165\u63d0\u4ea4,SQLState:{0}
|
||||||
|
singletablewritertask.4=\u9047\u5230OB\u5f02\u5e38,\u56de\u6eda\u6b64\u6b21\u5199\u5165, \u91c7\u7528\u9010\u6761\u5199\u5165\u63d0\u4ea4,SQLState:{0}multitablewritertask.1=配置的tableList為多表,但未配置分表規則,請檢查您的配置
|
||||||
|
multitablewritertask.2=配置的多庫中的表名有重複的,但未配置分庫規則和分表規則,請檢查您的配置
|
||||||
|
multitablewritertask.3=配置的所有表名都相同,但未配置分庫規則,請檢查您的配置
|
||||||
|
multitablewritertask.4=配置的table和db名稱都相同,此種回流方式不支援
|
||||||
|
multitablewritertask.5=列配置資訊有錯誤. 因為您配置的任務中,源頭讀取欄位數:{0}與 目的表要寫入的欄位數:{1}不相等. 請檢查您的配置並作出修改.
|
||||||
|
multitablewritertask.6=通過規則計算出來的tableName查找對應的db不存在,tableName={0}, 請檢查您配置的規則.
|
||||||
|
multitablewritertask.7=通過規則計算出來的db和table不存在,算出的dbName={0},tableName={1}, 請檢查您配置的規則.
|
||||||
|
multitablewritertask.8=通過規則計算出來的db不存在,算出的dbName={0}, 請檢查您配置的規則.
|
||||||
|
multitablewritertask.9=通過規則計算出來的dbName[{0}], 存在多張分表,請配置您的分表規則.
|
||||||
|
multitablewritertask.10=遇到OB致命異常,回滾此次寫入, 休眠 5分鐘,SQLState:{0}
|
||||||
|
multitablewritertask.11=遇到OB可恢復異常,回滾此次寫入, 休眠 1分鐘,SQLState:{0}
|
||||||
|
multitablewritertask.12=遇到OB異常,回滾此次寫入, 休眠 1秒,採用逐條寫入提交,SQLState:{0}
|
||||||
|
multitablewritertask.13=遇到OB異常,回滾此次寫入, 採用逐條寫入提交,SQLState:{0}
|
||||||
|
multitablewritertask.14=寫入表[{0}]失敗,休眠[{1}]毫秒,數據:{2}
|
||||||
|
multitablewritertask.15=寫入表[{0}]存在髒數據,record={1}, 寫入異常為:
|
||||||
|
|
||||||
|
|
||||||
|
singletablewritertask.1=遇到OB致命異常,回滾此次寫入, 休眠 5分鐘,SQLState:{0}
|
||||||
|
singletablewritertask.2=遇到OB可恢復異常,回滾此次寫入, 休眠 1分鐘,SQLState:{0}
|
||||||
|
singletablewritertask.3=遇到OB異常,回滾此次寫入, 休眠 1秒,採用逐條寫入提交,SQLState:{0}
|
||||||
|
singletablewritertask.4=遇到OB異常,回滾此次寫入, 採用逐條寫入提交,SQLState:{0}
|
@ -0,0 +1,12 @@
|
|||||||
|
package com.alibaba.datax.plugin.writer.obhbasewriter.task;
|
||||||
|
|
||||||
|
import com.alibaba.datax.common.util.Configuration;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* TODO(yuez)升级hbase api之后再补充暂时用不到
|
||||||
|
*/
|
||||||
|
public class MultiVersionWriteTask extends ObHBaseWriteTask{
|
||||||
|
public MultiVersionWriteTask(Configuration configuration) throws Exception {
|
||||||
|
super(configuration);
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,12 @@
|
|||||||
|
package com.alibaba.datax.plugin.writer.obhbasewriter.task;
|
||||||
|
|
||||||
|
import com.alibaba.datax.common.util.Configuration;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* TODO(yuez) 升级hbase api之后再补充暂时用不到
|
||||||
|
*/
|
||||||
|
public class NormalWriteTask extends ObHBaseWriteTask{
|
||||||
|
public NormalWriteTask(Configuration configuration) throws Exception {
|
||||||
|
super(configuration);
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,317 @@
|
|||||||
|
package com.alibaba.datax.plugin.writer.obhbasewriter.task;
|
||||||
|
|
||||||
|
import com.alibaba.datax.common.element.Record;
|
||||||
|
import com.alibaba.datax.common.exception.DataXException;
|
||||||
|
import com.alibaba.datax.common.plugin.RecordReceiver;
|
||||||
|
import com.alibaba.datax.common.plugin.TaskPluginCollector;
|
||||||
|
import com.alibaba.datax.common.util.Configuration;
|
||||||
|
import com.alibaba.datax.common.util.MessageSource;
|
||||||
|
import com.alibaba.datax.plugin.rdbms.reader.Key;
|
||||||
|
import com.alibaba.datax.plugin.rdbms.util.DBUtilErrorCode;
|
||||||
|
import com.alibaba.datax.plugin.rdbms.util.DataBaseType;
|
||||||
|
import com.alibaba.datax.plugin.rdbms.writer.CommonRdbmsWriter;
|
||||||
|
import com.alibaba.datax.plugin.writer.obhbasewriter.Config;
|
||||||
|
import com.alibaba.datax.plugin.writer.obhbasewriter.ConfigKey;
|
||||||
|
import com.alibaba.datax.plugin.writer.obhbasewriter.Constant;
|
||||||
|
import com.alibaba.datax.plugin.writer.obhbasewriter.NullModeType;
|
||||||
|
import com.alibaba.datax.plugin.writer.obhbasewriter.ObHTableInfo;
|
||||||
|
import com.alibaba.datax.plugin.writer.obhbasewriter.ext.ServerConnectInfo;
|
||||||
|
import com.google.common.collect.Lists;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.concurrent.BlockingQueue;
|
||||||
|
import java.util.concurrent.ExecutorService;
|
||||||
|
import java.util.concurrent.Executors;
|
||||||
|
import java.util.concurrent.LinkedBlockingQueue;
|
||||||
|
import java.util.concurrent.TimeUnit;
|
||||||
|
import java.util.concurrent.atomic.AtomicLong;
|
||||||
|
import java.util.concurrent.locks.Condition;
|
||||||
|
import java.util.concurrent.locks.Lock;
|
||||||
|
import java.util.concurrent.locks.ReentrantLock;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
public class ObHBaseWriteTask extends CommonRdbmsWriter.Task {
|
||||||
|
private final static MessageSource MESSAGE_SOURCE = MessageSource.loadResourceBundle(ObHBaseWriteTask.class);
|
||||||
|
private final static Logger LOG = LoggerFactory.getLogger(ObHBaseWriteTask.class);
|
||||||
|
|
||||||
|
public NullModeType nullMode = null;
|
||||||
|
private int maxRetryCount;
|
||||||
|
|
||||||
|
public List<Configuration> columns;
|
||||||
|
public List<Configuration> rowkeyColumn;
|
||||||
|
public Configuration versionColumn;
|
||||||
|
|
||||||
|
public String hbaseTableName;
|
||||||
|
public String encoding;
|
||||||
|
public Boolean walFlag;
|
||||||
|
|
||||||
|
String configUrl;
|
||||||
|
String dbName;
|
||||||
|
String ip;
|
||||||
|
String port;
|
||||||
|
|
||||||
|
String fullUserName;
|
||||||
|
boolean usdOdpMode;
|
||||||
|
String sysUsername;
|
||||||
|
String sysPassword;
|
||||||
|
private ObHTableInfo obHTableInfo;
|
||||||
|
|
||||||
|
private ConcurrentTableWriter concurrentWriter;
|
||||||
|
private boolean allTaskInQueue = false;
|
||||||
|
private long startTime = 0;
|
||||||
|
private String threadName = Thread.currentThread().getName();
|
||||||
|
|
||||||
|
private Lock lock = new ReentrantLock();
|
||||||
|
private Condition condition = lock.newCondition();
|
||||||
|
|
||||||
|
public ObHBaseWriteTask(Configuration configuration) {
|
||||||
|
super(DataBaseType.MySql);
|
||||||
|
init(configuration);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void init(com.alibaba.datax.common.util.Configuration configuration) {
|
||||||
|
this.obHTableInfo = new ObHTableInfo(configuration);
|
||||||
|
this.hbaseTableName = configuration.getString(ConfigKey.TABLE);
|
||||||
|
this.columns = configuration.getListConfiguration(ConfigKey.COLUMN);
|
||||||
|
this.rowkeyColumn = configuration.getListConfiguration(ConfigKey.ROWKEY_COLUMN);
|
||||||
|
this.versionColumn = configuration.getConfiguration(ConfigKey.VERSION_COLUMN);
|
||||||
|
this.encoding = configuration.getString(ConfigKey.ENCODING, Constant.DEFAULT_ENCODING);
|
||||||
|
this.nullMode = NullModeType.getByTypeName(configuration.getString(ConfigKey.NULL_MODE, Constant.DEFAULT_NULL_MODE));
|
||||||
|
// this.memstoreThreshold = configuration.getDouble(Config.MEMSTORE_THRESHOLD, Config.DEFAULT_MEMSTORE_THRESHOLD);
|
||||||
|
this.walFlag = configuration.getBool(ConfigKey.WAL_FLAG, true);
|
||||||
|
this.maxRetryCount = configuration.getInt(ConfigKey.MAX_RETRY_COUNT, 3);
|
||||||
|
|
||||||
|
// default 1000 rows are committed together
|
||||||
|
this.batchSize = com.alibaba.datax.plugin.rdbms.writer.Constant.DEFAULT_BATCH_SIZE;
|
||||||
|
this.batchByteSize = com.alibaba.datax.plugin.rdbms.writer.Constant.DEFAULT_BATCH_BYTE_SIZE;
|
||||||
|
|
||||||
|
this.configUrl = configuration.getString(ConfigKey.OBCONFIG_URL);
|
||||||
|
this.jdbcUrl = configuration.getString(ConfigKey.JDBC_URL);
|
||||||
|
this.username = configuration.getString(Key.USERNAME);
|
||||||
|
this.password = configuration.getString(Key.PASSWORD);
|
||||||
|
this.dbName = configuration.getString(Key.DBNAME);
|
||||||
|
this.usdOdpMode = configuration.getBool(ConfigKey.USE_ODP_MODE);
|
||||||
|
|
||||||
|
ServerConnectInfo connectInfo = new ServerConnectInfo(jdbcUrl, username, password);
|
||||||
|
String clusterName = connectInfo.clusterName;
|
||||||
|
this.fullUserName = connectInfo.getFullUserName();
|
||||||
|
final String[] ipPort = connectInfo.ipPort.split(":");
|
||||||
|
if (usdOdpMode) {
|
||||||
|
this.ip = ipPort[0];
|
||||||
|
this.port = ipPort[1];
|
||||||
|
} else {
|
||||||
|
this.sysUsername = configuration.getString(ConfigKey.OB_SYS_USER);
|
||||||
|
this.sysPassword = configuration.getString(ConfigKey.OB_SYS_PASSWORD);
|
||||||
|
connectInfo.setSysUser(sysUsername);
|
||||||
|
connectInfo.setSysPass(sysPassword);
|
||||||
|
if (!configUrl.contains("ObRegion")) {
|
||||||
|
if (configUrl.contains("?")) {
|
||||||
|
configUrl += "&ObRegion=" + clusterName;
|
||||||
|
} else {
|
||||||
|
configUrl += "?ObRegion=" + clusterName;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (!configUrl.contains("database")) {
|
||||||
|
configUrl += "&database=" + dbName;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (null == concurrentWriter) {
|
||||||
|
concurrentWriter = new ConcurrentTableWriter(configuration, connectInfo);
|
||||||
|
allTaskInQueue = false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void prepare(Configuration configuration) {
|
||||||
|
concurrentWriter.start();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void startWrite(RecordReceiver recordReceiver, Configuration configuration, TaskPluginCollector taskPluginCollector) {
|
||||||
|
this.taskPluginCollector = taskPluginCollector;
|
||||||
|
int recordCount = 0;
|
||||||
|
int bufferBytes = 0;
|
||||||
|
List<Record> records = new ArrayList<>();
|
||||||
|
try {
|
||||||
|
Record record;
|
||||||
|
while ((record = recordReceiver.getFromReader()) != null) {
|
||||||
|
recordCount++;
|
||||||
|
bufferBytes += record.getMemorySize();
|
||||||
|
records.add(record);
|
||||||
|
// 按照指定的批大小进行批量写入
|
||||||
|
if (records.size() >= batchSize || bufferBytes >= batchByteSize) {
|
||||||
|
concurrentWriter.addBatchRecords(Lists.newArrayList(records));
|
||||||
|
records.clear();
|
||||||
|
bufferBytes = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!records.isEmpty()) {
|
||||||
|
concurrentWriter.addBatchRecords(records);
|
||||||
|
}
|
||||||
|
} catch (Throwable e) {
|
||||||
|
LOG.warn("startWrite error unexpected ", e);
|
||||||
|
throw DataXException.asDataXException(DBUtilErrorCode.WRITE_DATA_ERROR, e);
|
||||||
|
}
|
||||||
|
LOG.info(recordCount + " rows received.");
|
||||||
|
waitTaskFinish();
|
||||||
|
}
|
||||||
|
|
||||||
|
public void waitTaskFinish() {
|
||||||
|
this.allTaskInQueue = true;
|
||||||
|
LOG.info("ConcurrentTableWriter has put all task in queue, queueSize = {}, total = {}, finished = {}",
|
||||||
|
concurrentWriter.getTaskQueueSize(),
|
||||||
|
concurrentWriter.getTotalTaskCount(),
|
||||||
|
concurrentWriter.getFinishTaskCount());
|
||||||
|
|
||||||
|
lock.lock();
|
||||||
|
try {
|
||||||
|
while (!concurrentWriter.checkFinish()) {
|
||||||
|
condition.await(50, TimeUnit.MILLISECONDS);
|
||||||
|
// print statistic
|
||||||
|
LOG.debug("Statistic total task {}, finished {}, queue Size {}",
|
||||||
|
concurrentWriter.getTotalTaskCount(),
|
||||||
|
concurrentWriter.getFinishTaskCount(),
|
||||||
|
concurrentWriter.getTaskQueueSize());
|
||||||
|
concurrentWriter.printStatistics();
|
||||||
|
}
|
||||||
|
} catch (InterruptedException e) {
|
||||||
|
LOG.warn("Concurrent table writer wait task finish interrupt");
|
||||||
|
} finally {
|
||||||
|
lock.unlock();
|
||||||
|
}
|
||||||
|
LOG.debug("wait all InsertTask finished ...");
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean isFinished() {
|
||||||
|
return allTaskInQueue && concurrentWriter.checkFinish();
|
||||||
|
}
|
||||||
|
|
||||||
|
public void singalTaskFinish() {
|
||||||
|
lock.lock();
|
||||||
|
try {
|
||||||
|
condition.signal();
|
||||||
|
} finally {
|
||||||
|
lock.unlock();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public void collectDirtyRecord(Record record, Throwable throwable) {
|
||||||
|
this.taskPluginCollector.collectDirtyRecord(record, throwable);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void post(Configuration configuration) {
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void destroy(Configuration configuration) {
|
||||||
|
if (concurrentWriter != null) {
|
||||||
|
concurrentWriter.destory();
|
||||||
|
}
|
||||||
|
super.destroy(configuration);
|
||||||
|
}
|
||||||
|
|
||||||
|
public class ConcurrentTableWriter {
|
||||||
|
private BlockingQueue<List<Record>> queue;
|
||||||
|
private List<PutTask> putTasks;
|
||||||
|
private Configuration config;
|
||||||
|
private AtomicLong totalTaskCount;
|
||||||
|
private AtomicLong finishTaskCount;
|
||||||
|
private ServerConnectInfo connectInfo;
|
||||||
|
private ExecutorService executorService;
|
||||||
|
private final int threadCount;
|
||||||
|
|
||||||
|
public ConcurrentTableWriter(Configuration config, ServerConnectInfo connectInfo) {
|
||||||
|
this.threadCount = config.getInt(Config.WRITER_THREAD_COUNT, Config.DEFAULT_WRITER_THREAD_COUNT);
|
||||||
|
this.queue = new LinkedBlockingQueue<List<Record>>(threadCount << 1);
|
||||||
|
this.putTasks = new ArrayList<PutTask>(threadCount);
|
||||||
|
this.config = config;
|
||||||
|
this.totalTaskCount = new AtomicLong(0);
|
||||||
|
this.finishTaskCount = new AtomicLong(0);
|
||||||
|
this.executorService = Executors.newFixedThreadPool(threadCount);
|
||||||
|
this.connectInfo = connectInfo;
|
||||||
|
}
|
||||||
|
|
||||||
|
public long getTotalTaskCount() {
|
||||||
|
return totalTaskCount.get();
|
||||||
|
}
|
||||||
|
|
||||||
|
public long getFinishTaskCount() {
|
||||||
|
return finishTaskCount.get();
|
||||||
|
}
|
||||||
|
|
||||||
|
public int getTaskQueueSize() {
|
||||||
|
return queue.size();
|
||||||
|
}
|
||||||
|
|
||||||
|
public void increFinishCount() {
|
||||||
|
finishTaskCount.incrementAndGet();
|
||||||
|
}
|
||||||
|
|
||||||
|
// should check after put all the task in the queue
|
||||||
|
public boolean checkFinish() {
|
||||||
|
long finishCount = finishTaskCount.get();
|
||||||
|
long totalCount = totalTaskCount.get();
|
||||||
|
return finishCount == totalCount;
|
||||||
|
}
|
||||||
|
|
||||||
|
public synchronized void start() {
|
||||||
|
for (int i = 0; i < threadCount; ++i) {
|
||||||
|
LOG.info("start {} insert task.", (i + 1));
|
||||||
|
PutTask putTask = new PutTask(threadName, queue, config, connectInfo, obHTableInfo, ObHBaseWriteTask.this);
|
||||||
|
putTask.setWriter(this);
|
||||||
|
putTasks.add(putTask);
|
||||||
|
}
|
||||||
|
for (PutTask task : putTasks) {
|
||||||
|
executorService.execute(task);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public void printStatistics() {
|
||||||
|
long insertTotalCost = 0;
|
||||||
|
long insertTotalCount = 0;
|
||||||
|
for (PutTask task : putTasks) {
|
||||||
|
insertTotalCost += task.getTotalCost();
|
||||||
|
insertTotalCount += task.getPutCount();
|
||||||
|
}
|
||||||
|
long avgCost = 0;
|
||||||
|
if (insertTotalCount != 0) {
|
||||||
|
avgCost = insertTotalCost / insertTotalCount;
|
||||||
|
}
|
||||||
|
ObHBaseWriteTask.LOG.debug("Put {} times, totalCost {} ms, average {} ms",
|
||||||
|
insertTotalCount, insertTotalCost, avgCost);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void addBatchRecords(final List<Record> records) throws InterruptedException {
|
||||||
|
boolean isSucc = false;
|
||||||
|
while (!isSucc) {
|
||||||
|
isSucc = queue.offer(records, 5, TimeUnit.MILLISECONDS);
|
||||||
|
}
|
||||||
|
totalTaskCount.incrementAndGet();
|
||||||
|
}
|
||||||
|
|
||||||
|
public synchronized void destory() {
|
||||||
|
if (putTasks != null) {
|
||||||
|
for (PutTask task : putTasks) {
|
||||||
|
task.setStop();
|
||||||
|
task.destroy();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
destroyExecutor();
|
||||||
|
}
|
||||||
|
|
||||||
|
private void destroyExecutor() {
|
||||||
|
if (executorService != null && !executorService.isShutdown()) {
|
||||||
|
executorService.shutdown();
|
||||||
|
try {
|
||||||
|
executorService.awaitTermination(0L, TimeUnit.SECONDS);
|
||||||
|
} catch (InterruptedException var2) {
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,325 @@
|
|||||||
|
package com.alibaba.datax.plugin.writer.obhbasewriter.task;
|
||||||
|
|
||||||
|
import com.alibaba.datax.common.element.DoubleColumn;
|
||||||
|
import com.alibaba.datax.common.element.LongColumn;
|
||||||
|
import com.alibaba.datax.common.element.Record;
|
||||||
|
import com.alibaba.datax.common.exception.DataXException;
|
||||||
|
import com.alibaba.datax.common.util.Configuration;
|
||||||
|
import com.alibaba.datax.common.util.MessageSource;
|
||||||
|
import com.alibaba.datax.plugin.writer.obhbasewriter.ColumnType;
|
||||||
|
import com.alibaba.datax.plugin.writer.obhbasewriter.Config;
|
||||||
|
import com.alibaba.datax.plugin.writer.obhbasewriter.ConfigKey;
|
||||||
|
import com.alibaba.datax.plugin.writer.obhbasewriter.Hbase094xWriterErrorCode;
|
||||||
|
import com.alibaba.datax.plugin.writer.obhbasewriter.ObHTableInfo;
|
||||||
|
import com.alibaba.datax.plugin.writer.obhbasewriter.ext.ObHbaseTableHolder;
|
||||||
|
import com.alibaba.datax.plugin.writer.obhbasewriter.ext.ServerConnectInfo;
|
||||||
|
import com.alipay.oceanbase.hbase.constants.OHConstants;
|
||||||
|
import com.alipay.oceanbase.rpc.property.Property;
|
||||||
|
|
||||||
|
import com.google.common.base.Stopwatch;
|
||||||
|
import java.text.ParseException;
|
||||||
|
import java.text.SimpleDateFormat;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.Date;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Map;
|
||||||
|
import java.util.Objects;
|
||||||
|
import java.util.Queue;
|
||||||
|
import java.util.concurrent.TimeUnit;
|
||||||
|
import org.apache.commons.lang3.tuple.Triple;
|
||||||
|
import org.apache.hadoop.hbase.client.HTableInterface;
|
||||||
|
import org.apache.hadoop.hbase.client.Put;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
import static com.alibaba.datax.plugin.writer.obhbasewriter.ConfigKey.OBHBASE_HTABLE_CLIENT_WRITE_BUFFER;
|
||||||
|
import static com.alibaba.datax.plugin.writer.obhbasewriter.ConfigKey.OBHBASE_HTABLE_PUT_WRITE_BUFFER_CHECK;
|
||||||
|
import static com.alibaba.datax.plugin.writer.obhbasewriter.ConfigKey.TABLE_CLIENT_RPC_EXECUTE_TIMEOUT;
|
||||||
|
import static com.alibaba.datax.plugin.writer.obhbasewriter.ConfigKey.WRITE_BUFFER_HIGH_MARK;
|
||||||
|
import static com.alibaba.datax.plugin.writer.obhbasewriter.ConfigKey.WRITE_BUFFER_LOW_MARK;
|
||||||
|
import static com.alibaba.datax.plugin.writer.obhbasewriter.Constant.DEFAULT_HBASE_HTABLE_CLIENT_WRITE_BUFFER;
|
||||||
|
import static com.alibaba.datax.plugin.writer.obhbasewriter.Constant.DEFAULT_HBASE_HTABLE_PUT_WRITE_BUFFER_CHECK;
|
||||||
|
import static com.alibaba.datax.plugin.writer.obhbasewriter.Constant.DEFAULT_NETTY_BUFFER_HIGH_WATERMARK;
|
||||||
|
import static com.alibaba.datax.plugin.writer.obhbasewriter.Constant.DEFAULT_NETTY_BUFFER_LOW_WATERMARK;
|
||||||
|
import static com.alibaba.datax.plugin.writer.obhbasewriter.Constant.DEFAULT_RPC_EXECUTE_TIMEOUT;
|
||||||
|
import static com.alibaba.datax.plugin.writer.obhbasewriter.util.ObHbaseWriterUtils.getColumnByte;
|
||||||
|
import static com.alibaba.datax.plugin.writer.obhbasewriter.util.ObHbaseWriterUtils.getRowkey;
|
||||||
|
import static com.alipay.oceanbase.hbase.constants.OHConstants.HBASE_HTABLE_CLIENT_WRITE_BUFFER;
|
||||||
|
import static com.alipay.oceanbase.hbase.constants.OHConstants.HBASE_HTABLE_PUT_WRITE_BUFFER_CHECK;
|
||||||
|
import static com.alipay.oceanbase.hbase.constants.OHConstants.HBASE_OCEANBASE_DATABASE;
|
||||||
|
import static com.alipay.oceanbase.hbase.constants.OHConstants.HBASE_OCEANBASE_FULL_USER_NAME;
|
||||||
|
import static com.alipay.oceanbase.hbase.constants.OHConstants.HBASE_OCEANBASE_PARAM_URL;
|
||||||
|
import static com.alipay.oceanbase.hbase.constants.OHConstants.HBASE_OCEANBASE_PASSWORD;
|
||||||
|
import static com.alipay.oceanbase.hbase.constants.OHConstants.HBASE_OCEANBASE_SYS_USER_NAME;
|
||||||
|
import static com.alipay.oceanbase.hbase.constants.OHConstants.HBASE_OCEANBASE_SYS_PASSWORD;
|
||||||
|
|
||||||
|
public class PutTask implements Runnable {
|
||||||
|
|
||||||
|
private static final MessageSource MESSAGE_SOURCE = MessageSource.loadResourceBundle(PutTask.class);
|
||||||
|
|
||||||
|
private static final Logger LOG = LoggerFactory.getLogger(PutTask.class);
|
||||||
|
|
||||||
|
private ObHBaseWriteTask writerTask;
|
||||||
|
private ObHBaseWriteTask.ConcurrentTableWriter writer;
|
||||||
|
|
||||||
|
private long totalCost = 0;
|
||||||
|
private long putCount = 0;
|
||||||
|
private boolean isStop;
|
||||||
|
|
||||||
|
private ObHTableInfo obHTableInfo;
|
||||||
|
private final Configuration versionColumn;
|
||||||
|
// 失败重试次数
|
||||||
|
private final int failTryCount;
|
||||||
|
|
||||||
|
private String parentThreadName;
|
||||||
|
private Queue<List<Record>> queue;
|
||||||
|
private Configuration config;
|
||||||
|
private ServerConnectInfo connInfo;
|
||||||
|
|
||||||
|
private ObHbaseTableHolder tableHolder;
|
||||||
|
|
||||||
|
private final SimpleDateFormat df_second = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
|
||||||
|
private final SimpleDateFormat df_ms = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss SSS");
|
||||||
|
|
||||||
|
public PutTask(String parentThreadName, Queue<List<Record>> recordsQueue, Configuration config, ServerConnectInfo connectInfo, ObHTableInfo obHTableInfo, ObHBaseWriteTask writerTask) {
|
||||||
|
this.parentThreadName = parentThreadName;
|
||||||
|
this.queue = recordsQueue;
|
||||||
|
this.config = config;
|
||||||
|
this.connInfo = connectInfo;
|
||||||
|
this.obHTableInfo = obHTableInfo;
|
||||||
|
this.writerTask = writerTask;
|
||||||
|
this.versionColumn = config.getConfiguration(ConfigKey.VERSION_COLUMN);
|
||||||
|
this.failTryCount = config.getInt(Config.FAIL_TRY_COUNT, Config.DEFAULT_FAIL_TRY_COUNT);
|
||||||
|
this.isStop = false;
|
||||||
|
initTableHolder();
|
||||||
|
}
|
||||||
|
|
||||||
|
private void initTableHolder() {
|
||||||
|
try {
|
||||||
|
org.apache.hadoop.conf.Configuration c = new org.apache.hadoop.conf.Configuration();
|
||||||
|
c.set(HBASE_OCEANBASE_FULL_USER_NAME, writerTask.fullUserName);
|
||||||
|
c.set(HBASE_OCEANBASE_PASSWORD, this.connInfo.password);
|
||||||
|
c.set(HBASE_OCEANBASE_DATABASE, writerTask.dbName);
|
||||||
|
// obkv-table-client is needed the code below
|
||||||
|
if (writerTask.usdOdpMode) {
|
||||||
|
c.setBoolean(OHConstants.HBASE_OCEANBASE_ODP_MODE, true);
|
||||||
|
c.set(OHConstants.HBASE_OCEANBASE_ODP_ADDR, connInfo.host);
|
||||||
|
c.set(OHConstants.HBASE_OCEANBASE_ODP_PORT, connInfo.port);
|
||||||
|
LOG.info("sysUser and sysPassword is empty, build HTABLE in odp mode.");
|
||||||
|
} else {
|
||||||
|
c.set(HBASE_OCEANBASE_PARAM_URL, writerTask.configUrl);
|
||||||
|
c.set(HBASE_OCEANBASE_SYS_USER_NAME, this.connInfo.sysUser);
|
||||||
|
c.set(HBASE_OCEANBASE_SYS_PASSWORD, this.connInfo.sysPass);
|
||||||
|
LOG.info("sysUser and sysPassword is not empty, build HTABLE in sys mode.");
|
||||||
|
}
|
||||||
|
c.set(HBASE_HTABLE_PUT_WRITE_BUFFER_CHECK, config.getString(OBHBASE_HTABLE_PUT_WRITE_BUFFER_CHECK, DEFAULT_HBASE_HTABLE_PUT_WRITE_BUFFER_CHECK));
|
||||||
|
c.set(HBASE_HTABLE_CLIENT_WRITE_BUFFER, config.getString(OBHBASE_HTABLE_CLIENT_WRITE_BUFFER, DEFAULT_HBASE_HTABLE_CLIENT_WRITE_BUFFER));
|
||||||
|
|
||||||
|
c.set(Property.RS_LIST_ACQUIRE_CONNECT_TIMEOUT.getKey(), "500");
|
||||||
|
c.set(Property.RS_LIST_ACQUIRE_READ_TIMEOUT.getKey(), "5000");
|
||||||
|
c.set(Property.RPC_EXECUTE_TIMEOUT.getKey(), config.getString(TABLE_CLIENT_RPC_EXECUTE_TIMEOUT, DEFAULT_RPC_EXECUTE_TIMEOUT));
|
||||||
|
c.set(Property.NETTY_BUFFER_LOW_WATERMARK.getKey(), config.getString(WRITE_BUFFER_LOW_MARK, DEFAULT_NETTY_BUFFER_LOW_WATERMARK));
|
||||||
|
c.set(Property.NETTY_BUFFER_HIGH_WATERMARK.getKey(), config.getString(WRITE_BUFFER_HIGH_MARK, DEFAULT_NETTY_BUFFER_HIGH_WATERMARK));
|
||||||
|
this.tableHolder = new ObHbaseTableHolder(c, obHTableInfo.getTableName());
|
||||||
|
} catch (Exception e) {
|
||||||
|
LOG.error("init table holder failed, reason: {}", e.getMessage());
|
||||||
|
throw new IllegalStateException(e);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
private void batchWrite(final List<Record> buffer) {
|
||||||
|
HTableInterface ohTable = null;
|
||||||
|
Stopwatch stopwatch = Stopwatch.createStarted();
|
||||||
|
try {
|
||||||
|
ohTable = this.tableHolder.getOhTable();
|
||||||
|
List<Put> puts = buildBatchPutList(buffer);
|
||||||
|
ohTable.put(puts);
|
||||||
|
} catch (Exception e) {
|
||||||
|
if (Objects.isNull(ohTable)) {
|
||||||
|
LOG.error("build obHTable: {} failed. reason: {}", obHTableInfo.getTableName(), e.getMessage());
|
||||||
|
throw DataXException.asDataXException(Hbase094xWriterErrorCode.GET_HBASE_TABLE_ERROR, Hbase094xWriterErrorCode.GET_HBASE_TABLE_ERROR.getDescription());
|
||||||
|
}
|
||||||
|
//
|
||||||
|
LOG.error("hbase batch error: " + e);
|
||||||
|
// 出错了之后对该出错的batch逐条重试
|
||||||
|
for (Record record : buffer) {
|
||||||
|
writeOneRecord(ohTable, record);
|
||||||
|
}
|
||||||
|
} finally {
|
||||||
|
this.writer.increFinishCount();
|
||||||
|
putCount++;
|
||||||
|
totalCost += stopwatch.elapsed(TimeUnit.MILLISECONDS);
|
||||||
|
try {
|
||||||
|
if (!Objects.isNull(ohTable)) {
|
||||||
|
ohTable.close();
|
||||||
|
}
|
||||||
|
} catch (Exception e) {
|
||||||
|
LOG.warn("error in closing htable: {}. Reason: {}", obHTableInfo.getFullHbaseTableName(), e.getMessage());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void writeOneRecord(HTableInterface ohTable, Record record) {
|
||||||
|
int retryCount = 0;
|
||||||
|
while (retryCount < this.failTryCount) {
|
||||||
|
try {
|
||||||
|
byte[] rowkey = getRowkey(record, obHTableInfo);
|
||||||
|
Put put = new Put(rowkey); // row key
|
||||||
|
boolean hasValidValue = buildPut(put, record);
|
||||||
|
|
||||||
|
if (hasValidValue) {
|
||||||
|
ohTable.put(put);
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
} catch (Exception e) {
|
||||||
|
retryCount++;
|
||||||
|
LOG.error("error in writing: " + e.getMessage() + ", retry count: " + retryCount);
|
||||||
|
if (retryCount == this.failTryCount) {
|
||||||
|
LOG.warn("ERROR : record {}", record);
|
||||||
|
this.writerTask.collectDirtyRecord(record, e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private List<Put> buildBatchPutList(List<Record> buffer) {
|
||||||
|
List<Put> puts = new ArrayList<>();
|
||||||
|
for (Record record : buffer) {
|
||||||
|
byte[] rowkey = getRowkey(record, obHTableInfo);
|
||||||
|
Put put = new org.apache.hadoop.hbase.client.Put(rowkey); // row key
|
||||||
|
boolean hasValidValue = buildPut(put, record);
|
||||||
|
if (hasValidValue) {
|
||||||
|
puts.add(put);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return puts;
|
||||||
|
}
|
||||||
|
|
||||||
|
private boolean buildPut(Put put, Record record) {
|
||||||
|
boolean hasValidValue = false;
|
||||||
|
long timestamp = buildTimestamp(record);
|
||||||
|
for (Map.Entry<Integer, Triple<String, String, ColumnType>> columnInfo : obHTableInfo.getIndexColumnInfoMap().entrySet()) {
|
||||||
|
Integer index = columnInfo.getKey();
|
||||||
|
if (index >= record.getColumnNumber()) {
|
||||||
|
throw DataXException.asDataXException(Hbase094xWriterErrorCode.ILLEGAL_VALUE,
|
||||||
|
MESSAGE_SOURCE.message("normaltask.2", record.getColumnNumber(), index));
|
||||||
|
}
|
||||||
|
ColumnType columnType = columnInfo.getValue().getRight();
|
||||||
|
String familyName = columnInfo.getValue().getLeft();
|
||||||
|
String columnName = columnInfo.getValue().getMiddle();
|
||||||
|
|
||||||
|
byte[] value = getColumnByte(columnType, record.getColumn(index), obHTableInfo);
|
||||||
|
if (value != null) {
|
||||||
|
hasValidValue = true;
|
||||||
|
if (timestamp == -1) {
|
||||||
|
put.add(familyName.getBytes(), // family
|
||||||
|
columnName.getBytes(), // Q
|
||||||
|
value); // V
|
||||||
|
} else {
|
||||||
|
put.add(familyName.getBytes(), // family
|
||||||
|
columnName.getBytes(), // Q
|
||||||
|
timestamp, // timestamp/version
|
||||||
|
value); // V
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return hasValidValue;
|
||||||
|
}
|
||||||
|
|
||||||
|
private long buildTimestamp(Record record) {
|
||||||
|
if (versionColumn == null) {
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
int index = versionColumn.getInt(ConfigKey.INDEX);
|
||||||
|
long timestamp;
|
||||||
|
if (index == -1) {
|
||||||
|
// user specified the constant as timestamp
|
||||||
|
timestamp = versionColumn.getLong(ConfigKey.VALUE);
|
||||||
|
if (timestamp < 0) {
|
||||||
|
throw DataXException.asDataXException(Hbase094xWriterErrorCode.CONSTRUCT_VERSION_ERROR,
|
||||||
|
MESSAGE_SOURCE.message("normaltask.4"));
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// 指定列作为版本,long/doubleColumn直接record.aslong, 其它类型尝试用yyyy-MM-dd HH:mm:ss,
|
||||||
|
// yyyy-MM-dd HH:mm:ss SSS去format
|
||||||
|
if (index >= record.getColumnNumber()) {
|
||||||
|
throw DataXException.asDataXException(Hbase094xWriterErrorCode.CONSTRUCT_VERSION_ERROR,
|
||||||
|
MESSAGE_SOURCE.message("normaltask.5", record.getColumnNumber(), index));
|
||||||
|
}
|
||||||
|
|
||||||
|
if (record.getColumn(index).getRawData() == null) {
|
||||||
|
throw DataXException.asDataXException(Hbase094xWriterErrorCode.CONSTRUCT_VERSION_ERROR,
|
||||||
|
MESSAGE_SOURCE.message("normaltask.6"));
|
||||||
|
}
|
||||||
|
|
||||||
|
if (record.getColumn(index) instanceof LongColumn || record.getColumn(index) instanceof DoubleColumn) {
|
||||||
|
timestamp = record.getColumn(index).asLong();
|
||||||
|
} else {
|
||||||
|
Date date;
|
||||||
|
try {
|
||||||
|
date = df_ms.parse(record.getColumn(index).asString());
|
||||||
|
} catch (ParseException e) {
|
||||||
|
try {
|
||||||
|
date = df_second.parse(record.getColumn(index).asString());
|
||||||
|
} catch (ParseException e1) {
|
||||||
|
LOG.info(MESSAGE_SOURCE.message("normaltask.7", index));
|
||||||
|
throw DataXException.asDataXException(Hbase094xWriterErrorCode.CONSTRUCT_VERSION_ERROR, e1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
timestamp = date.getTime();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return timestamp;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setStop() {isStop = true;}
|
||||||
|
|
||||||
|
public long getTotalCost() {return totalCost;}
|
||||||
|
|
||||||
|
public long getPutCount() {return putCount;}
|
||||||
|
|
||||||
|
public void destroy() {
|
||||||
|
tableHolder.destroy();
|
||||||
|
}
|
||||||
|
|
||||||
|
void setWriterTask(ObHBaseWriteTask writerTask) {
|
||||||
|
this.writerTask = writerTask;
|
||||||
|
}
|
||||||
|
|
||||||
|
void setWriter(ObHBaseWriteTask.ConcurrentTableWriter writer) {
|
||||||
|
this.writer = writer;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void run() {
|
||||||
|
String currentThreadName = String.format("%s-putTask-%d", parentThreadName, Thread.currentThread().getId());
|
||||||
|
Thread.currentThread().setName(currentThreadName);
|
||||||
|
LOG.debug("Task {} start to execute...", currentThreadName);
|
||||||
|
int sleepTimes = 0;
|
||||||
|
while (!isStop) {
|
||||||
|
try {
|
||||||
|
List<Record> records = queue.poll();
|
||||||
|
if (null != records) {
|
||||||
|
batchWrite(records);
|
||||||
|
} else if (writerTask.isFinished()) {
|
||||||
|
writerTask.singalTaskFinish();
|
||||||
|
LOG.debug("not more task, thread exist ...");
|
||||||
|
break;
|
||||||
|
} else {
|
||||||
|
TimeUnit.MILLISECONDS.sleep(5);
|
||||||
|
sleepTimes++;
|
||||||
|
}
|
||||||
|
} catch (InterruptedException e) {
|
||||||
|
LOG.debug("TableWriter is interrupt");
|
||||||
|
} catch (Exception e) {
|
||||||
|
LOG.warn("ERROR UNEXPECTED {}", e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
LOG.debug("Thread exist...");
|
||||||
|
LOG.debug("sleep {} times, total sleep time: {}", sleepTimes, sleepTimes * 5);
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,139 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (c) 2021 OceanBase ob-loader-dumper is licensed under Mulan PSL v2. You can use this software according to
|
||||||
|
* the terms and conditions of the Mulan PSL v2. You may obtain a copy of Mulan PSL v2 at:
|
||||||
|
*
|
||||||
|
* http://license.coscl.org.cn/MulanPSL2
|
||||||
|
*
|
||||||
|
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING
|
||||||
|
* BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. See the Mulan PSL v2 for more
|
||||||
|
* details.
|
||||||
|
*/
|
||||||
|
package com.alibaba.datax.plugin.writer.obhbasewriter.util;
|
||||||
|
|
||||||
|
import com.alibaba.datax.common.element.Column;
|
||||||
|
import com.alibaba.datax.common.element.Record;
|
||||||
|
import com.alibaba.datax.common.exception.DataXException;
|
||||||
|
import com.alibaba.datax.common.util.MessageSource;
|
||||||
|
import com.alibaba.datax.plugin.writer.obhbasewriter.ColumnType;
|
||||||
|
import com.alibaba.datax.plugin.writer.obhbasewriter.Hbase094xWriterErrorCode;
|
||||||
|
import com.alibaba.datax.plugin.writer.obhbasewriter.ObHTableInfo;
|
||||||
|
import com.alibaba.datax.plugin.writer.obhbasewriter.task.PutTask;
|
||||||
|
import java.nio.charset.Charset;
|
||||||
|
import org.apache.commons.lang3.tuple.Triple;
|
||||||
|
import org.apache.hadoop.hbase.HConstants;
|
||||||
|
import org.apache.hadoop.hbase.util.Bytes;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @author cjyyz
|
||||||
|
* @date 2023/03/23
|
||||||
|
* @since
|
||||||
|
*/
|
||||||
|
public class ObHbaseWriterUtils {
|
||||||
|
|
||||||
|
private static final MessageSource MESSAGE_SOURCE = MessageSource.loadResourceBundle(PutTask.class);
|
||||||
|
|
||||||
|
public static byte[] getRowkey(Record record, ObHTableInfo obHTableInfo) {
|
||||||
|
byte[] rowkeyBuffer = {};
|
||||||
|
for (Triple<Integer, String, ColumnType> rowKeyElement : obHTableInfo.getRowKeyElementList()) {
|
||||||
|
Integer index = rowKeyElement.getLeft();
|
||||||
|
ColumnType columnType = rowKeyElement.getRight();
|
||||||
|
if (index == -1) {
|
||||||
|
String value = rowKeyElement.getMiddle();
|
||||||
|
rowkeyBuffer = Bytes.add(rowkeyBuffer, getValueByte(columnType, value, obHTableInfo.getEncoding()));
|
||||||
|
} else {
|
||||||
|
if (index >= record.getColumnNumber()) {
|
||||||
|
throw DataXException.asDataXException(Hbase094xWriterErrorCode.CONSTRUCT_ROWKEY_ERROR, MESSAGE_SOURCE.message("normaltask.3", record.getColumnNumber(), index));
|
||||||
|
}
|
||||||
|
byte[] value = getColumnByte(columnType, record.getColumn(index), obHTableInfo);
|
||||||
|
rowkeyBuffer = Bytes.add(rowkeyBuffer, value);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return rowkeyBuffer;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static byte[] getColumnByte(ColumnType columnType, Column column, ObHTableInfo obHTableInfo) {
|
||||||
|
byte[] bytes;
|
||||||
|
if (column.getRawData() != null && !(columnType == ColumnType.STRING && column.asString().equals("null"))) {
|
||||||
|
switch (columnType) {
|
||||||
|
case INT:
|
||||||
|
bytes = Bytes.toBytes(column.asLong().intValue());
|
||||||
|
break;
|
||||||
|
case LONG:
|
||||||
|
bytes = Bytes.toBytes(column.asLong());
|
||||||
|
break;
|
||||||
|
case DOUBLE:
|
||||||
|
bytes = Bytes.toBytes(column.asDouble());
|
||||||
|
break;
|
||||||
|
case FLOAT:
|
||||||
|
bytes = Bytes.toBytes(column.asDouble().floatValue());
|
||||||
|
break;
|
||||||
|
case SHORT:
|
||||||
|
bytes = Bytes.toBytes(column.asLong().shortValue());
|
||||||
|
break;
|
||||||
|
case BOOLEAN:
|
||||||
|
bytes = Bytes.toBytes(column.asBoolean());
|
||||||
|
break;
|
||||||
|
case STRING:
|
||||||
|
bytes = getValueByte(columnType, column.asString(), obHTableInfo.getEncoding());
|
||||||
|
break;
|
||||||
|
case BINARY:
|
||||||
|
bytes = Bytes.toBytesBinary(column.asString());
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
throw DataXException.asDataXException(Hbase094xWriterErrorCode.ILLEGAL_VALUE, MESSAGE_SOURCE.message("hbaseabstracttask.2", columnType));
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
switch (obHTableInfo.getNullModeType()) {
|
||||||
|
case Skip:
|
||||||
|
bytes = null;
|
||||||
|
break;
|
||||||
|
case Empty:
|
||||||
|
bytes = HConstants.EMPTY_BYTE_ARRAY;
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
throw DataXException.asDataXException(Hbase094xWriterErrorCode.ILLEGAL_VALUE, MESSAGE_SOURCE.message("hbaseabstracttask.3"));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return bytes;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @param columnType
|
||||||
|
* @param value
|
||||||
|
* @return byte[]
|
||||||
|
*/
|
||||||
|
private static byte[] getValueByte(ColumnType columnType, String value, String encoding) {
|
||||||
|
byte[] bytes;
|
||||||
|
if (value != null) {
|
||||||
|
switch (columnType) {
|
||||||
|
case INT:
|
||||||
|
bytes = Bytes.toBytes(Integer.parseInt(value));
|
||||||
|
break;
|
||||||
|
case LONG:
|
||||||
|
bytes = Bytes.toBytes(Long.parseLong(value));
|
||||||
|
break;
|
||||||
|
case DOUBLE:
|
||||||
|
bytes = Bytes.toBytes(Double.parseDouble(value));
|
||||||
|
break;
|
||||||
|
case FLOAT:
|
||||||
|
bytes = Bytes.toBytes(Float.parseFloat(value));
|
||||||
|
break;
|
||||||
|
case SHORT:
|
||||||
|
bytes = Bytes.toBytes(Short.parseShort(value));
|
||||||
|
break;
|
||||||
|
case BOOLEAN:
|
||||||
|
bytes = Bytes.toBytes(Boolean.parseBoolean(value));
|
||||||
|
break;
|
||||||
|
case STRING:
|
||||||
|
bytes = value.getBytes(Charset.forName(encoding));
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
throw DataXException.asDataXException(Hbase094xWriterErrorCode.ILLEGAL_VALUE, MESSAGE_SOURCE.message("hbaseabstracttask.4", columnType));
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
bytes = HConstants.EMPTY_BYTE_ARRAY;
|
||||||
|
}
|
||||||
|
return bytes;
|
||||||
|
}
|
||||||
|
}
|
6
obhbasewriter/src/main/resources/plugin.json
Normal file
6
obhbasewriter/src/main/resources/plugin.json
Normal file
@ -0,0 +1,6 @@
|
|||||||
|
{
|
||||||
|
"name": "obhbasewriter",
|
||||||
|
"class": "com.alibaba.datax.plugin.writer.obhbasewriter.ObHbaseWriter",
|
||||||
|
"description": "适用于: 生产环境. 原理: TODO",
|
||||||
|
"developer": "alibaba"
|
||||||
|
}
|
@ -0,0 +1,287 @@
|
|||||||
|
package com.alibaba.datax.plugin.reader.oceanbasev10reader.util;
|
||||||
|
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.Collections;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.concurrent.ArrayBlockingQueue;
|
||||||
|
import java.util.concurrent.Callable;
|
||||||
|
import java.util.concurrent.ExecutorCompletionService;
|
||||||
|
import java.util.concurrent.ExecutorService;
|
||||||
|
import java.util.concurrent.Future;
|
||||||
|
import java.util.concurrent.ThreadFactory;
|
||||||
|
import java.util.concurrent.ThreadPoolExecutor;
|
||||||
|
import java.util.concurrent.TimeUnit;
|
||||||
|
import java.util.concurrent.atomic.AtomicInteger;
|
||||||
|
|
||||||
|
public class ExecutorTemplate<T> {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The default thread pool size. Set as the number of available processors by default.
|
||||||
|
*/
|
||||||
|
public static int DEFAULT_POOL_SIZE = Runtime.getRuntime().availableProcessors();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Indicate whether the executor closes automatically.
|
||||||
|
*/
|
||||||
|
private final boolean autoClose;
|
||||||
|
|
||||||
|
/**
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
private final List<Future<T>> futures;
|
||||||
|
|
||||||
|
/**
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
private final ExecutorService internalExecutor;
|
||||||
|
|
||||||
|
private final ExecutorCompletionService<T> completionService;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Set pool size for ExecutorTemplate.
|
||||||
|
*/
|
||||||
|
public static void setPoolSize(int size) {
|
||||||
|
DEFAULT_POOL_SIZE = size;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Default: 1024 AutoClose: true
|
||||||
|
*
|
||||||
|
* @param poolName
|
||||||
|
*/
|
||||||
|
public ExecutorTemplate(String poolName) {
|
||||||
|
this(defaultExecutor(poolName), true);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Default: 1024 AutoClose: true
|
||||||
|
*
|
||||||
|
* @param poolName
|
||||||
|
*/
|
||||||
|
public ExecutorTemplate(String poolName, int poolSize) {
|
||||||
|
this(defaultExecutor(poolName, poolSize), true);
|
||||||
|
}
|
||||||
|
|
||||||
|
public ExecutorTemplate(String poolName, int poolSize, boolean autoClose) {
|
||||||
|
this(defaultExecutor(poolName, poolSize), autoClose);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Default: 1024
|
||||||
|
*
|
||||||
|
* @param poolName
|
||||||
|
* @param autoClose
|
||||||
|
*/
|
||||||
|
public ExecutorTemplate(String poolName, boolean autoClose) {
|
||||||
|
this(defaultExecutor(poolName), autoClose);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Default: 1024 AutoClose: true
|
||||||
|
*
|
||||||
|
* @param executor
|
||||||
|
*/
|
||||||
|
public ExecutorTemplate(ExecutorService executor) {
|
||||||
|
this(executor, true);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @param executor
|
||||||
|
*/
|
||||||
|
public ExecutorTemplate(ExecutorService executor, boolean autoClose) {
|
||||||
|
this.autoClose = autoClose;
|
||||||
|
this.internalExecutor = executor;
|
||||||
|
this.completionService = new ExecutorCompletionService<>(executor);
|
||||||
|
this.futures = Collections.synchronizedList(new ArrayList<>());
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @param poolName
|
||||||
|
* @return ExecutorService
|
||||||
|
*/
|
||||||
|
public static ExecutorService defaultExecutor(String poolName) {
|
||||||
|
return defaultExecutor(100000, poolName, DEFAULT_POOL_SIZE);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @param poolName
|
||||||
|
* @param poolSize
|
||||||
|
* @return ExecutorService
|
||||||
|
*/
|
||||||
|
public static ExecutorService defaultExecutor(String poolName, int poolSize) {
|
||||||
|
return defaultExecutor(100000, poolName, poolSize);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @param capacity
|
||||||
|
* @param poolName
|
||||||
|
* @return ExecutorService
|
||||||
|
*/
|
||||||
|
public static ExecutorService defaultExecutor(int capacity, String poolName, int poolSize) {
|
||||||
|
return new ThreadPoolExecutor(poolSize, poolSize, 30, TimeUnit.SECONDS, /* */
|
||||||
|
new ArrayBlockingQueue<>(capacity), new NamedThreadFactory(poolName));
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Submit a callable task
|
||||||
|
*
|
||||||
|
* @param task
|
||||||
|
*/
|
||||||
|
public void submit(Callable<T> task) {
|
||||||
|
Future<T> f = this.completionService.submit(task);
|
||||||
|
futures.add(f);
|
||||||
|
check(f);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Submit a runnable task
|
||||||
|
*
|
||||||
|
* @param task
|
||||||
|
*/
|
||||||
|
public void submit(Runnable task) {
|
||||||
|
Future<T> f = this.completionService.submit(task, null);
|
||||||
|
futures.add(f);
|
||||||
|
check(f);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Wait all the task run finished, and get all the results.
|
||||||
|
*
|
||||||
|
* @return List<T>
|
||||||
|
*/
|
||||||
|
public List<T> waitForResult() {
|
||||||
|
try {
|
||||||
|
int index = 0;
|
||||||
|
Throwable ex = null;
|
||||||
|
List<T> result = new ArrayList<T>();
|
||||||
|
while (index < futures.size()) {
|
||||||
|
try {
|
||||||
|
Future<T> f = this.completionService.take();
|
||||||
|
result.add(f.get());
|
||||||
|
} catch (Throwable e) {
|
||||||
|
ex = getRootCause(e);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
index++;
|
||||||
|
}
|
||||||
|
if (ex != null) {
|
||||||
|
cancelAll();
|
||||||
|
throw new RuntimeException(ex);
|
||||||
|
} else {
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
} finally {
|
||||||
|
clearFutures();
|
||||||
|
if (autoClose) {
|
||||||
|
destroyExecutor();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
public void cancelAll() {
|
||||||
|
for (Future<T> f : futures) {
|
||||||
|
if (!f.isDone() && !f.isCancelled()) {
|
||||||
|
f.cancel(false);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
public void clearFutures() {
|
||||||
|
this.futures.clear();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
public void destroyExecutor() {
|
||||||
|
if (internalExecutor != null && !internalExecutor.isShutdown()) {
|
||||||
|
this.internalExecutor.shutdown();
|
||||||
|
try {
|
||||||
|
this.internalExecutor.awaitTermination(0, TimeUnit.SECONDS);
|
||||||
|
} catch (InterruptedException e) {
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Fast check the future
|
||||||
|
*
|
||||||
|
* @param f
|
||||||
|
*/
|
||||||
|
private void check(Future<T> f) {
|
||||||
|
if (f != null && f.isDone()) {
|
||||||
|
try {
|
||||||
|
f.get();
|
||||||
|
} catch (Throwable e) {
|
||||||
|
cancelAll();
|
||||||
|
throw new RuntimeException(e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @param throwable
|
||||||
|
* @return Throwable
|
||||||
|
*/
|
||||||
|
private Throwable getRootCause(Throwable throwable) {
|
||||||
|
final Throwable holder = throwable;
|
||||||
|
final List<Throwable> list = new ArrayList<>();
|
||||||
|
while (throwable != null && !list.contains(throwable)) {
|
||||||
|
list.add(throwable);
|
||||||
|
throwable = throwable.getCause();
|
||||||
|
}
|
||||||
|
return list.size() < 2 ? holder : list.get(list.size() - 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* An internal named thread factory
|
||||||
|
*/
|
||||||
|
static class NamedThreadFactory implements ThreadFactory {
|
||||||
|
|
||||||
|
/**
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
private final boolean daemon;
|
||||||
|
|
||||||
|
/**
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
private final String name;
|
||||||
|
|
||||||
|
/**
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
private final AtomicInteger seq = new AtomicInteger(0);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @param name
|
||||||
|
*/
|
||||||
|
public NamedThreadFactory(String name) {
|
||||||
|
this(name, false);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @param name
|
||||||
|
* @param daemon
|
||||||
|
*/
|
||||||
|
public NamedThreadFactory(String name, boolean daemon) {
|
||||||
|
this.name = name;
|
||||||
|
this.daemon = daemon;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Thread newThread(Runnable r) {
|
||||||
|
Thread t = new Thread(r);
|
||||||
|
t.setDaemon(daemon);
|
||||||
|
t.setPriority(Thread.NORM_PRIORITY);
|
||||||
|
t.setName((name + seq.incrementAndGet()));
|
||||||
|
return t;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
@ -28,6 +28,16 @@
|
|||||||
<groupId>com.alibaba.datax</groupId>
|
<groupId>com.alibaba.datax</groupId>
|
||||||
<artifactId>plugin-rdbms-util</artifactId>
|
<artifactId>plugin-rdbms-util</artifactId>
|
||||||
<version>${datax-project-version}</version>
|
<version>${datax-project-version}</version>
|
||||||
|
<exclusions>
|
||||||
|
<exclusion>
|
||||||
|
<artifactId>guava</artifactId>
|
||||||
|
<groupId>com.google.guava</groupId>
|
||||||
|
</exclusion>
|
||||||
|
<exclusion>
|
||||||
|
<groupId>com.alibaba</groupId>
|
||||||
|
<artifactId>druid</artifactId>
|
||||||
|
</exclusion>
|
||||||
|
</exclusions>
|
||||||
</dependency>
|
</dependency>
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.slf4j</groupId>
|
<groupId>org.slf4j</groupId>
|
||||||
@ -43,13 +53,11 @@
|
|||||||
<version>4.0.4.RELEASE</version>
|
<version>4.0.4.RELEASE</version>
|
||||||
<scope>test</scope>
|
<scope>test</scope>
|
||||||
</dependency>
|
</dependency>
|
||||||
<!--
|
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>com.alipay.oceanbase</groupId>
|
<groupId>com.alibaba</groupId>
|
||||||
<artifactId>oceanbase-partition</artifactId>
|
<artifactId>druid</artifactId>
|
||||||
<version>0.0.5</version>
|
<version>1.2.18</version>
|
||||||
</dependency>
|
</dependency>
|
||||||
-->
|
|
||||||
|
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>com.alipay.oceanbase</groupId>
|
<groupId>com.alipay.oceanbase</groupId>
|
||||||
@ -64,6 +72,19 @@
|
|||||||
</exclusion>
|
</exclusion>
|
||||||
</exclusions>
|
</exclusions>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
|
||||||
|
<dependency>
|
||||||
|
<groupId>com.oceanbase</groupId>
|
||||||
|
<artifactId>oceanbase-client</artifactId>
|
||||||
|
<version>2.4.11</version>
|
||||||
|
<exclusions>
|
||||||
|
<exclusion>
|
||||||
|
<groupId>com.google.guava</groupId>
|
||||||
|
<artifactId>guava</artifactId>
|
||||||
|
</exclusion>
|
||||||
|
</exclusions>
|
||||||
|
</dependency>
|
||||||
|
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>com.oceanbase</groupId>
|
<groupId>com.oceanbase</groupId>
|
||||||
<artifactId>shade-ob-partition-calculator</artifactId>
|
<artifactId>shade-ob-partition-calculator</artifactId>
|
||||||
@ -72,8 +93,13 @@
|
|||||||
<systemPath>${pom.basedir}/src/main/libs/shade-ob-partition-calculator-1.0-SNAPSHOT.jar</systemPath>
|
<systemPath>${pom.basedir}/src/main/libs/shade-ob-partition-calculator-1.0-SNAPSHOT.jar</systemPath>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
|
||||||
|
|
||||||
<dependency>
|
<dependency>
|
||||||
|
<groupId>com.google.guava</groupId>
|
||||||
|
<artifactId>guava</artifactId>
|
||||||
|
<version>27.0-jre</version>
|
||||||
|
</dependency>
|
||||||
|
|
||||||
|
<dependency>
|
||||||
<groupId>log4j</groupId>
|
<groupId>log4j</groupId>
|
||||||
<artifactId>log4j</artifactId>
|
<artifactId>log4j</artifactId>
|
||||||
<version>1.2.16</version>
|
<version>1.2.16</version>
|
||||||
@ -89,6 +115,152 @@
|
|||||||
<version>4.11</version>
|
<version>4.11</version>
|
||||||
<scope>test</scope>
|
<scope>test</scope>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>com.oceanbase</groupId>
|
||||||
|
<artifactId>obkv-table-client</artifactId>
|
||||||
|
<version>1.2.6</version>
|
||||||
|
<exclusions>
|
||||||
|
<exclusion>
|
||||||
|
<groupId>com.alibaba</groupId>
|
||||||
|
<artifactId>fastjson</artifactId>
|
||||||
|
</exclusion>
|
||||||
|
<exclusion>
|
||||||
|
<!-- add dependency in parent project -->
|
||||||
|
<groupId>org.slf4j</groupId>
|
||||||
|
<artifactId>slf4j-api</artifactId>
|
||||||
|
</exclusion>
|
||||||
|
<exclusion>
|
||||||
|
<!-- add dependency in parent project -->
|
||||||
|
<groupId>com.oceanbase</groupId>
|
||||||
|
<artifactId>oceanbase-client</artifactId>
|
||||||
|
</exclusion>
|
||||||
|
<exclusion>
|
||||||
|
<!-- add dependency in parent project -->
|
||||||
|
<groupId>com.google.guava</groupId>
|
||||||
|
<artifactId>guava</artifactId>
|
||||||
|
</exclusion>
|
||||||
|
<exclusion>
|
||||||
|
<groupId>commons-lang</groupId>
|
||||||
|
<artifactId>commons-lang</artifactId>
|
||||||
|
</exclusion>
|
||||||
|
<exclusion>
|
||||||
|
<groupId>com.alipay.sofa.common</groupId>
|
||||||
|
<artifactId>sofa-common-tools</artifactId>
|
||||||
|
</exclusion>
|
||||||
|
<!-- codec module -->
|
||||||
|
<exclusion>
|
||||||
|
<groupId>io.netty</groupId>
|
||||||
|
<artifactId>netty-codec-dns</artifactId>
|
||||||
|
</exclusion>
|
||||||
|
<exclusion>
|
||||||
|
<groupId>io.netty</groupId>
|
||||||
|
<artifactId>netty-codec-http</artifactId>
|
||||||
|
</exclusion>
|
||||||
|
<exclusion>
|
||||||
|
<groupId>io.netty</groupId>
|
||||||
|
<artifactId>netty-codec-http2</artifactId>
|
||||||
|
</exclusion>
|
||||||
|
<exclusion>
|
||||||
|
<groupId>io.netty</groupId>
|
||||||
|
<artifactId>netty-codec-haproxy</artifactId>
|
||||||
|
</exclusion>
|
||||||
|
<exclusion>
|
||||||
|
<groupId>io.netty</groupId>
|
||||||
|
<artifactId>netty-codec-mqtt</artifactId>
|
||||||
|
</exclusion>
|
||||||
|
<exclusion>
|
||||||
|
<groupId>io.netty</groupId>
|
||||||
|
<artifactId>netty-codec-memcache</artifactId>
|
||||||
|
</exclusion>
|
||||||
|
<exclusion>
|
||||||
|
<groupId>io.netty</groupId>
|
||||||
|
<artifactId>netty-codec-redis</artifactId>
|
||||||
|
</exclusion>
|
||||||
|
<exclusion>
|
||||||
|
<groupId>io.netty</groupId>
|
||||||
|
<artifactId>netty-codec-smtp</artifactId>
|
||||||
|
</exclusion>
|
||||||
|
<exclusion>
|
||||||
|
<groupId>io.netty</groupId>
|
||||||
|
<artifactId>netty-codec-socks</artifactId>
|
||||||
|
</exclusion>
|
||||||
|
<exclusion>
|
||||||
|
<groupId>io.netty</groupId>
|
||||||
|
<artifactId>netty-codec-stomp</artifactId>
|
||||||
|
</exclusion>
|
||||||
|
<exclusion>
|
||||||
|
<groupId>io.netty</groupId>
|
||||||
|
<artifactId>netty-codec-xml</artifactId>
|
||||||
|
</exclusion>
|
||||||
|
<!-- handler module -->
|
||||||
|
<exclusion>
|
||||||
|
<groupId>io.netty</groupId>
|
||||||
|
<artifactId>netty-handler-proxy</artifactId>
|
||||||
|
</exclusion>
|
||||||
|
<exclusion>
|
||||||
|
<groupId>io.netty</groupId>
|
||||||
|
<artifactId>netty-handler-ssl-ocsp</artifactId>
|
||||||
|
</exclusion>
|
||||||
|
<!-- resolver module -->
|
||||||
|
<exclusion>
|
||||||
|
<groupId>io.netty</groupId>
|
||||||
|
<artifactId>netty-resolver-dns</artifactId>
|
||||||
|
</exclusion>
|
||||||
|
<exclusion>
|
||||||
|
<groupId>io.netty</groupId>
|
||||||
|
<artifactId>netty-resolver-dns-classes-macos</artifactId>
|
||||||
|
</exclusion>
|
||||||
|
<exclusion>
|
||||||
|
<groupId>io.netty</groupId>
|
||||||
|
<artifactId>netty-resolver-dns-native-macos</artifactId>
|
||||||
|
</exclusion>
|
||||||
|
<!-- transport module -->
|
||||||
|
<exclusion>
|
||||||
|
<groupId>io.netty</groupId>
|
||||||
|
<artifactId>netty-transport-rxtx</artifactId>
|
||||||
|
</exclusion>
|
||||||
|
<exclusion>
|
||||||
|
<groupId>io.netty</groupId>
|
||||||
|
<artifactId>netty-transport-udt</artifactId>
|
||||||
|
</exclusion>
|
||||||
|
<exclusion>
|
||||||
|
<groupId>io.netty</groupId>
|
||||||
|
<artifactId>netty-transport-sctp</artifactId>
|
||||||
|
</exclusion>
|
||||||
|
</exclusions>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>com.alipay.sofa.common</groupId>
|
||||||
|
<artifactId>sofa-common-tools</artifactId>
|
||||||
|
<version>1.3.11</version>
|
||||||
|
<exclusions>
|
||||||
|
<exclusion>
|
||||||
|
<!-- add dependency in parent project -->
|
||||||
|
<groupId>org.slf4j</groupId>
|
||||||
|
<artifactId>slf4j-api</artifactId>
|
||||||
|
</exclusion>
|
||||||
|
<exclusion>
|
||||||
|
<!-- add dependency in parent project -->
|
||||||
|
<groupId>com.google.guava</groupId>
|
||||||
|
<artifactId>guava</artifactId>
|
||||||
|
</exclusion>
|
||||||
|
</exclusions>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>com.alibaba</groupId>
|
||||||
|
<artifactId>fastjson</artifactId>
|
||||||
|
<version>1.2.83</version>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>commons-lang</groupId>
|
||||||
|
<artifactId>commons-lang</artifactId>
|
||||||
|
<version>2.6</version>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>mysql</groupId>
|
||||||
|
<artifactId>mysql-connector-java</artifactId>
|
||||||
|
<version>${mysql.driver.version}</version>
|
||||||
|
</dependency>
|
||||||
</dependencies>
|
</dependencies>
|
||||||
|
|
||||||
<build>
|
<build>
|
||||||
|
14
package.xml
14
package.xml
@ -39,6 +39,13 @@
|
|||||||
</includes>
|
</includes>
|
||||||
<outputDirectory>datax</outputDirectory>
|
<outputDirectory>datax</outputDirectory>
|
||||||
</fileSet>
|
</fileSet>
|
||||||
|
<fileSet>
|
||||||
|
<directory>obhbasereader/target/datax/</directory>
|
||||||
|
<includes>
|
||||||
|
<include>**/*.*</include>
|
||||||
|
</includes>
|
||||||
|
<outputDirectory>datax</outputDirectory>
|
||||||
|
</fileSet>
|
||||||
<fileSet>
|
<fileSet>
|
||||||
<directory>drdsreader/target/datax/</directory>
|
<directory>drdsreader/target/datax/</directory>
|
||||||
<includes>
|
<includes>
|
||||||
@ -476,6 +483,13 @@
|
|||||||
</includes>
|
</includes>
|
||||||
<outputDirectory>datax</outputDirectory>
|
<outputDirectory>datax</outputDirectory>
|
||||||
</fileSet>
|
</fileSet>
|
||||||
|
<fileSet>
|
||||||
|
<directory>obhbasewriter/target/datax/</directory>
|
||||||
|
<includes>
|
||||||
|
<include>**/*.*</include>
|
||||||
|
</includes>
|
||||||
|
<outputDirectory>datax</outputDirectory>
|
||||||
|
</fileSet>
|
||||||
<fileSet>
|
<fileSet>
|
||||||
<directory>gdbwriter/target/datax/</directory>
|
<directory>gdbwriter/target/datax/</directory>
|
||||||
<includes>
|
<includes>
|
||||||
|
@ -33,6 +33,17 @@
|
|||||||
<version>${mysql.driver.version}</version>
|
<version>${mysql.driver.version}</version>
|
||||||
<scope>test</scope>
|
<scope>test</scope>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>com.oceanbase</groupId>
|
||||||
|
<artifactId>oceanbase-client</artifactId>
|
||||||
|
<version>2.4.11</version>
|
||||||
|
<exclusions>
|
||||||
|
<exclusion>
|
||||||
|
<groupId>com.google.guava</groupId>
|
||||||
|
<artifactId>guava</artifactId>
|
||||||
|
</exclusion>
|
||||||
|
</exclusions>
|
||||||
|
</dependency>
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.slf4j</groupId>
|
<groupId>org.slf4j</groupId>
|
||||||
<artifactId>slf4j-api</artifactId>
|
<artifactId>slf4j-api</artifactId>
|
||||||
|
@ -16,6 +16,8 @@ public class ObVersion implements Comparable<ObVersion> {
|
|||||||
private int patchNumber;
|
private int patchNumber;
|
||||||
|
|
||||||
public static final ObVersion V2276 = valueOf("2.2.76");
|
public static final ObVersion V2276 = valueOf("2.2.76");
|
||||||
|
public static final ObVersion V2252 = valueOf("2.2.52");
|
||||||
|
public static final ObVersion V3 = valueOf("3.0.0.0");
|
||||||
public static final ObVersion V4000 = valueOf("4.0.0.0");
|
public static final ObVersion V4000 = valueOf("4.0.0.0");
|
||||||
|
|
||||||
private static final ObVersion DEFAULT_VERSION =
|
private static final ObVersion DEFAULT_VERSION =
|
||||||
|
@ -0,0 +1,37 @@
|
|||||||
|
package com.alibaba.datax.plugin.rdbms.util;
|
||||||
|
|
||||||
|
public class SplitedSlice {
|
||||||
|
private String begin;
|
||||||
|
private String end;
|
||||||
|
private String range;
|
||||||
|
|
||||||
|
public SplitedSlice(String begin, String end, String range) {
|
||||||
|
this.begin = begin;
|
||||||
|
this.end = end;
|
||||||
|
this.range = range;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getBegin() {
|
||||||
|
return begin;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setBegin(String begin) {
|
||||||
|
this.begin = begin;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getEnd() {
|
||||||
|
return end;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setEnd(String end) {
|
||||||
|
this.end = end;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getRange() {
|
||||||
|
return range;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setRange(String range) {
|
||||||
|
this.range = range;
|
||||||
|
}
|
||||||
|
}
|
2
pom.xml
2
pom.xml
@ -55,6 +55,7 @@
|
|||||||
<module>oraclereader</module>
|
<module>oraclereader</module>
|
||||||
<module>cassandrareader</module>
|
<module>cassandrareader</module>
|
||||||
<module>oceanbasev10reader</module>
|
<module>oceanbasev10reader</module>
|
||||||
|
<module>obhbasereader</module>
|
||||||
<module>rdbmsreader</module>
|
<module>rdbmsreader</module>
|
||||||
|
|
||||||
<module>odpsreader</module>
|
<module>odpsreader</module>
|
||||||
@ -93,6 +94,7 @@
|
|||||||
<module>kingbaseeswriter</module>
|
<module>kingbaseeswriter</module>
|
||||||
<module>adswriter</module>
|
<module>adswriter</module>
|
||||||
<module>oceanbasev10writer</module>
|
<module>oceanbasev10writer</module>
|
||||||
|
<module>obhbasewriter</module>
|
||||||
<module>adbpgwriter</module>
|
<module>adbpgwriter</module>
|
||||||
<module>hologresjdbcwriter</module>
|
<module>hologresjdbcwriter</module>
|
||||||
<module>rdbmswriter</module>
|
<module>rdbmswriter</module>
|
||||||
|
@ -1,92 +1,61 @@
|
|||||||
{
|
{
|
||||||
"core":{
|
"core": {
|
||||||
"transport":{
|
"transport": {
|
||||||
"channel":{
|
"channel": {
|
||||||
"speed":{
|
"speed": {
|
||||||
"byte":10485760
|
"byte": 10485760
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"job":{
|
"job": {
|
||||||
"content":[
|
"content": [
|
||||||
{
|
{
|
||||||
"reader":{
|
"reader": {},
|
||||||
"name":"streamreader",
|
"writer": {
|
||||||
"parameter":{
|
"name": "selectdbwriter",
|
||||||
"column":[
|
"parameter": {
|
||||||
{
|
"loadUrl": [
|
||||||
"type":"string",
|
|
||||||
"value":"DataX"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"type":"int",
|
|
||||||
"value":19890604
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"type":"date",
|
|
||||||
"value":"1989-06-04 00:00:00"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"type":"bool",
|
|
||||||
"value":true
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"type":"string",
|
|
||||||
"value":"test"
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"sliceRecordCount":1000000
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"writer":{
|
|
||||||
"name":"selectdbwriter",
|
|
||||||
"parameter":{
|
|
||||||
"loadUrl":[
|
|
||||||
"xxx:35871"
|
"xxx:35871"
|
||||||
],
|
],
|
||||||
"loadProps":{
|
"loadProps": {
|
||||||
"file.type":"json",
|
"file.type": "json",
|
||||||
"file.strip_outer_array":"true"
|
"file.strip_outer_array": "true"
|
||||||
},
|
},
|
||||||
"database":"db1",
|
"database": "db1",
|
||||||
"column":[
|
"column": [
|
||||||
"k1",
|
"k1",
|
||||||
"k2",
|
"k2",
|
||||||
"k3",
|
"k3",
|
||||||
"k4",
|
"k4",
|
||||||
"k5"
|
"k5"
|
||||||
],
|
],
|
||||||
"username":"admin",
|
"username": "admin",
|
||||||
"password":"SelectDB2022",
|
"password": "SelectDB2022",
|
||||||
"postSql":[
|
"postSql": [],
|
||||||
|
"preSql": [],
|
||||||
],
|
"connection": [
|
||||||
"preSql":[
|
|
||||||
|
|
||||||
],
|
|
||||||
"connection":[
|
|
||||||
{
|
{
|
||||||
"jdbcUrl":"jdbc:mysql://xxx:32386/cl_test",
|
"jdbcUrl": "jdbc:mysql://xxx:32386/cl_test",
|
||||||
"table":[
|
"table": [
|
||||||
"test_selectdb"
|
"test_selectdb"
|
||||||
],
|
],
|
||||||
"selectedDatabase":"cl_test"
|
"selectedDatabase": "cl_test"
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"maxBatchRows":200000,
|
"maxBatchRows": 200000,
|
||||||
"batchSize":53687091200
|
"batchSize": 53687091200
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"setting":{
|
"setting": {
|
||||||
"errorLimit":{
|
"errorLimit": {
|
||||||
"percentage":0.02,
|
"percentage": 0.02,
|
||||||
"record":0
|
"record": 0
|
||||||
},
|
},
|
||||||
"speed":{
|
"speed": {
|
||||||
"byte":10485760
|
"byte": 10485760
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -47,7 +47,7 @@ dx_replace(1,"5","10","****") column 1的value为“dataxTest”=>"datax****"
|
|||||||
4. dx_filter (关联filter暂不支持,即多个字段的联合判断,函参太过复杂,用户难以使用。)
|
4. dx_filter (关联filter暂不支持,即多个字段的联合判断,函参太过复杂,用户难以使用。)
|
||||||
* 参数:
|
* 参数:
|
||||||
* 第一个参数:字段编号,对应record中第几个字段。
|
* 第一个参数:字段编号,对应record中第几个字段。
|
||||||
* 第二个参数:运算符,支持一下运算符:like, not like, >, =, <, >=, !=, <=
|
* 第二个参数:运算符,支持以下运算符:like, not like, >, =, <, >=, !=, <=
|
||||||
* 第三个参数:正则表达式(java正则表达式)、值。
|
* 第三个参数:正则表达式(java正则表达式)、值。
|
||||||
* 返回:
|
* 返回:
|
||||||
* 如果匹配正则表达式,返回Null,表示过滤该行。不匹配表达式时,表示保留该行。(注意是该行)。对于>=<都是对字段直接compare的结果.
|
* 如果匹配正则表达式,返回Null,表示过滤该行。不匹配表达式时,表示保留该行。(注意是该行)。对于>=<都是对字段直接compare的结果.
|
||||||
@ -145,11 +145,11 @@ String code3 = "Column column = record.getColumn(1);\n" +
|
|||||||
"type": "string"
|
"type": "string"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"value": 19890604,
|
"value": 1724154616370,
|
||||||
"type": "long"
|
"type": "long"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"value": "1989-06-04 00:00:00",
|
"value": "2024-01-01 00:00:00",
|
||||||
"type": "date"
|
"type": "date"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -157,11 +157,11 @@ String code3 = "Column column = record.getColumn(1);\n" +
|
|||||||
"type": "bool"
|
"type": "bool"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"value": "test",
|
"value": "TestRawData",
|
||||||
"type": "bytes"
|
"type": "bytes"
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"sliceRecordCount": 100000
|
"sliceRecordCount": 100
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"writer": {
|
"writer": {
|
||||||
@ -174,38 +174,44 @@ String code3 = "Column column = record.getColumn(1);\n" +
|
|||||||
"transformer": [
|
"transformer": [
|
||||||
{
|
{
|
||||||
"name": "dx_substr",
|
"name": "dx_substr",
|
||||||
"parameter":
|
"parameter": {
|
||||||
{
|
"columnIndex": 5,
|
||||||
"columnIndex":5,
|
"paras": [
|
||||||
"paras":["1","3"]
|
"1",
|
||||||
}
|
"3"
|
||||||
|
]
|
||||||
|
}
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"name": "dx_replace",
|
"name": "dx_replace",
|
||||||
"parameter":
|
"parameter": {
|
||||||
{
|
"columnIndex": 4,
|
||||||
"columnIndex":4,
|
"paras": [
|
||||||
"paras":["3","4","****"]
|
"3",
|
||||||
}
|
"4",
|
||||||
|
"****"
|
||||||
|
]
|
||||||
|
}
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"name": "dx_digest",
|
"name": "dx_digest",
|
||||||
"parameter":
|
"parameter": {
|
||||||
{
|
"columnIndex": 3,
|
||||||
"columnIndex":3,
|
"paras": [
|
||||||
"paras":["md5", "toLowerCase"]
|
"md5",
|
||||||
}
|
"toLowerCase"
|
||||||
|
]
|
||||||
|
}
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"name": "dx_groovy",
|
"name": "dx_groovy",
|
||||||
"parameter":
|
"parameter": {
|
||||||
{
|
"code": "//groovy code//",
|
||||||
"code": "//groovy code//",
|
"extraPackage": [
|
||||||
"extraPackage":[
|
"import somePackage1;",
|
||||||
"import somePackage1;",
|
"import somePackage2;"
|
||||||
"import somePackage2;"
|
]
|
||||||
]
|
}
|
||||||
}
|
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user