add obhbase reader and writer plugin by cjyyz64

This commit is contained in:
TrafalgarLuo 2024-08-20 20:17:56 +08:00
parent f1c20abc7d
commit 0ec767730e
93 changed files with 5633 additions and 186 deletions

View File

@ -2,11 +2,10 @@
"job": {
"setting": {
"speed": {
"channel":1
"channel": 2
},
"errorLimit": {
"record": 0,
"percentage": 0.02
"record": 0
}
},
"content": [
@ -14,17 +13,17 @@
"reader": {
"name": "streamreader",
"parameter": {
"column" : [
"column": [
{
"value": "DataX",
"type": "string"
},
{
"value": 19890604,
"value": 1724154616370,
"type": "long"
},
{
"value": "1989-06-04 00:00:00",
"value": "2024-01-01 00:00:00",
"type": "date"
},
{
@ -32,11 +31,11 @@
"type": "bool"
},
{
"value": "test",
"value": "TestRawData",
"type": "bytes"
}
],
"sliceRecordCount": 100000
"sliceRecordCount": 100
}
},
"writer": {
@ -49,4 +48,4 @@
}
]
}
}
}

View File

@ -36,8 +36,6 @@ DorisWriter 通过Doris原生支持Stream load方式导入数据 DorisWriter
"name": "doriswriter",
"parameter": {
"loadUrl": ["172.16.0.13:8030"],
"loadProps": {
},
"column": ["emp_no", "birth_date", "first_name","last_name","gender","hire_date"],
"username": "root",
"password": "xxxxxx",
@ -178,4 +176,4 @@ DorisWriter 通过Doris原生支持Stream load方式导入数据 DorisWriter
}
```
更多信息请参照 Doris 官网:[Stream load - Apache Doris](https://doris.apache.org/zh-CN/docs/data-operate/import/import-way/stream-load-manual)
更多信息请参照 Doris 官网:[Stream load - Apache Doris](https://doris.apache.org/zh-CN/docs/data-operate/import/import-way/stream-load-manual)

View File

@ -167,79 +167,4 @@
* dynamic
* 描述: 不使用datax的mappings使用es自己的自动mappings
* 必选: 否
* 默认值: false
## 4 性能报告
### 4.1 环境准备
* 总数据量 1kw条数据, 每条0.1kb
* 1个shard, 0个replica
* 不加id这样默认是append_only模式不检查版本插入速度会有20%左右的提升
#### 4.1.1 输入数据类型(streamreader)
```
{"value": "1.1.1.1", "type": "string"},
{"value": 19890604.0, "type": "double"},
{"value": 19890604, "type": "long"},
{"value": 19890604, "type": "long"},
{"value": "hello world", "type": "string"},
{"value": "hello world", "type": "string"},
{"value": "41.12,-71.34", "type": "string"},
{"value": "2017-05-25", "type": "string"},
```
#### 4.1.2 输出数据类型(eswriter)
```
{ "name": "col_ip","type": "ip" },
{ "name": "col_double","type": "double" },
{ "name": "col_long","type": "long" },
{ "name": "col_integer","type": "integer" },
{ "name": "col_keyword", "type": "keyword" },
{ "name": "col_text", "type": "text"},
{ "name": "col_geo_point", "type": "geo_point" },
{ "name": "col_date", "type": "date"}
```
#### 4.1.2 机器参数
1. cpu: 32 Intel(R) Xeon(R) CPU E5-2650 v2 @ 2.60GHz
2. mem: 128G
3. net: 千兆双网卡
#### 4.1.3 DataX jvm 参数
-Xms1024m -Xmx1024m -XX:+HeapDumpOnOutOfMemoryError
### 4.2 测试报告
| 通道数| 批量提交行数| DataX速度(Rec/s)|DataX流量(MB/s)|
|--------|--------| --------|--------|
| 4| 256| 11013| 0.828|
| 4| 1024| 19417| 1.43|
| 4| 4096| 23923| 1.76|
| 4| 8172| 24449| 1.80|
| 8| 256| 21459| 1.58|
| 8| 1024| 37037| 2.72|
| 8| 4096| 45454| 3.34|
| 8| 8172| 45871| 3.37|
| 16| 1024| 67567| 4.96|
| 16| 4096| 78125| 5.74|
| 16| 8172| 77519| 5.69|
| 32| 1024| 94339| 6.93|
| 32| 4096| 96153| 7.06|
| 64| 1024| 91743| 6.74|
### 4.3 测试总结
* 最好的结果是32通道每次传4096如果单条数据很大 请适当减少批量数防止oom
* 当然这个很容易水平扩展而且es也是分布式的多设置几个shard也可以水平扩展
## 5 约束限制
* 如果导入id这样数据导入失败也会重试重新导入也仅仅是覆盖保证数据一致性
* 如果不导入id就是append_only模式elasticsearch自动生成id速度会提升20%左右,但数据无法修复,适合日志型数据(对数据精度要求不高的)
* 默认值: false

View File

@ -0,0 +1,178 @@
OceanBase的table api为应用提供了ObHBase的访问接口因此OceanBase的table api的reader与HBase Reader的结构和配置方法类似。
obhbasereader插件支持sql和hbase api两种读取方式两种方式存在如下区别
1. sql方式可以按照分区或者K值进行数据切片而hbase api方式的数据切片需要用户手动设置。
2. sql方式会将从obhbase读取的kqtv形式的数据转换为单一横行而hbase api则不做行列转换直接以kqtv形式将数据传递给下游。
3. sql方式需要配置column属性hbase api则不需要配置数据均为固定的kqtv四列。
4. sql方式仅支持获取获得最新或者最旧版本的数据而hbase api支持获得多版本数据。
#### 脚本配置
```json
{
"job": {
"setting": {
"speed": {
"channel": 3,
"byte": 104857600
},
"errorLimit": {
"record": 10
}
},
"content": [
{
"reader": {
"name": "obhbasereader",
"parameter": {
"username": "username",
"password": "password",
"encoding": "utf8",
"column": [
{
"name": "f1:column1_1",
"type": "string"
},
{
"name": "f1:column2_2",
"type": "string"
},
{
"name": "f1:column1_1",
"type": "string"
},
{
"name": "f1:column2_2",
"type": "string"
}
],
"range": [
{
"startRowkey": "aaa",
"endRowkey": "ccc",
"isBinaryRowkey": false
},
{
"startRowkey": "eee",
"endRowkey": "zzz",
"isBinaryRowkey": false
}
],
"mode": "normal",
"readByPartition": "true",
"scanCacheSize": "",
"readerHint": "",
"readBatchSize": "1000",
"connection": [
{
"table": [
"htable1",
"htable2"
],
"jdbcUrl": [
"||_dsc_ob10_dsc_||集群:租户||_dsc_ob10_dsc_||jdbc:mysql://ip:port/dbName1"
],
"username": "username",
"password": "password"
},
{
"table": [
"htable1",
"htable2"
],
"jdbcUrl": [
"jdbc:mysql://ip:port/database"
]
}
]
}
},
"writer": {
"name": "txtfilewriter",
"parameter": {
"path": "/Users/xujing/datax/txtfile",
"charset": "UTF-8",
"fieldDelimiter": ",",
"fileName": "hbase",
"nullFormat": "null",
"writeMode": "truncate"
}
}
}
]
}
}
```
##### 参数解释
- **connection**
- 描述配置分库分表的jdbcUrl和分表名。如果一个分库中有多个分表可以用逗号隔开也可以写成表名[起始序号-截止序号]
- 必须:是
- 默认值:无
- **jdbcUrl**
- 描述连接ob使用的jdbc url支持如下两种格式
- jdbc:mysql://obproxyIp:obproxyPort/db
- 此格式下username需要写成三段式格式
- ||_dsc_ob10_dsc_||集群名:租户名||_dsc_ob10_dsc_||jdbc:mysql://obproxyIp:obproxyPort/db
- 此格式下username仅填写用户名本身无需三段式写法
- 必选:是
- 默认值:无
- **table**
- 描述所选取的需要同步的表。使用JSON的数组描述因此支持多张表同时抽取。当配置为多张表时用户自己需保证多张表是同一schema结构obhbasereader不予检查表是否同一逻辑表。注意table必须包含在connection配置单元中。
- 必选:是
- 默认值:无
- **readByPartition**
- 描述使用sql方式读取时配置**仅**按照分区进行切片。
- 必须:否
- 默认值false
- **partitionName**
- 描述使用sql方式读取时标识仅读取指定分区名的数据用户需要保证配置的分区名在表结构中真实存在要求严格大小写
- 必须:否
- 默认值:无
- **readBatchSize**
- 描述使用sql方式读取时分页大小。
- 必须:否
- 默认值10w
- **fetchSize**
- 描述使用sql方式读取时控制每次读取数据时从结果集中获取的数据行数。
- 必须:否
- 默认值:-2147483648
- **scanCacheSize**
- 描述使用hbase api读取时每次rpc从服务器端读取的行数
- 必须:否
- 默认值256
- **readerHint**
- 描述obhbasereader使用sql方式读取时使用的hint
- 必须:否
- 默认值:/*+READ_CONSISTENCY(weak),QUERY_TIMEOUT(86400000000)*/
- **column**
- 描述使用sql方式读取数据时所配置的表中需要同步的列名集合使用JSON的数组描述字段信息。
- 支持列裁剪,即列可以挑选部分列进行导出。
```
支持列换序即列可以不按照表schema信息进行导出同时支持通配符*,在使用之前需仔细核对列信息。
```
- 必选sql方式读取时必选
- 默认值:无
- **range**
- 描述****指定hbasereader读取的rowkey范围
- 必须:否
- 默认值:无
- **username**
- 描述访问OceanBase的用户名
- 必选:是
- 默认值:无
- **mode**
- 描述读取obhbase的模式normal 模式,即仅读取一个版本的数据。
- 必选:是
- 默认值normal
- **version**
- 描述读取obhbase的版本当前支持oldest、latest模式分别表示读取最旧和最新的数据。
- 必须:是
- 默认值oldest
一些注意点:
注:如果配置了**partitionName**则无需再配置readByPartition即便配置了也会忽略readByPartition选项而是仅会读取指定分区的数据。
注:如果配置了**readByPartition**任务将仅按照分区切分任务而不会再按照K值进行切分。如果是非分区表则整张表会被当作一个任务而不会再切分。

151
obhbasereader/pom.xml Executable file
View File

@ -0,0 +1,151 @@
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<parent>
<groupId>com.alibaba.datax</groupId>
<artifactId>datax-all</artifactId>
<version>0.0.1-SNAPSHOT</version>
</parent>
<artifactId>obhbasereader</artifactId>
<groupId>com.alibaba.datax</groupId>
<name>obhbasereader</name>
<version>0.0.1-SNAPSHOT</version>
<dependencies>
<dependency>
<groupId>com.alibaba.datax</groupId>
<artifactId>datax-core</artifactId>
<version>${datax-project-version}</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>com.alibaba.datax</groupId>
<artifactId>oceanbasev10reader</artifactId>
<version>0.0.1-SNAPSHOT</version>
<exclusions>
<exclusion>
<artifactId>guava</artifactId>
<groupId>com.google.guava</groupId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.apache.zookeeper</groupId>
<artifactId>zookeeper</artifactId>
<version>3.3.2</version>
<exclusions>
<exclusion>
<artifactId>log4j</artifactId>
<groupId>log4j</groupId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>commons-collections</groupId>
<artifactId>commons-collections</artifactId>
<version>3.2.1</version>
</dependency>
<!-- <dependency>-->
<!-- <groupId>com.oceanbase</groupId>-->
<!-- <artifactId>shade-obkv-table-client</artifactId>-->
<!-- <version>1.2.6-RELEASE</version>-->
<!-- </dependency>-->
<dependency>
<groupId>com.oceanbase</groupId>
<artifactId>obkv-hbase-client</artifactId>
<version>0.1.4.2</version>
<exclusions>
<exclusion>
<artifactId>guava</artifactId>
<groupId>com.google.guava</groupId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>com.google.guava</groupId>
<artifactId>guava</artifactId>
<version>${guava-version}</version>
</dependency>
<dependency>
<groupId>com.alibaba.toolkit.common</groupId>
<artifactId>toolkit-common-logging</artifactId>
<version>1.14</version>
</dependency>
<dependency>
<groupId>org.json</groupId>
<artifactId>json</artifactId>
<version>20160810</version>
</dependency>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>4.11</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.powermock</groupId>
<artifactId>powermock-module-junit4</artifactId>
<version>1.4.10</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.powermock</groupId>
<artifactId>powermock-api-mockito</artifactId>
<version>1.4.10</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.mockito</groupId>
<artifactId>mockito-core</artifactId>
<version>1.8.5</version>
<scope>test</scope>
</dependency>
</dependencies>
<build>
<resources>
<resource>
<directory>src/main/java</directory>
<includes>
<include>**/*.properties</include>
</includes>
</resource>
</resources>
<plugins>
<!-- compiler plugin -->
<plugin>
<artifactId>maven-compiler-plugin</artifactId>
<configuration>
<source>${jdk-version}</source>
<target>${jdk-version}</target>
<encoding>${project-sourceEncoding}</encoding>
</configuration>
</plugin>
<!-- assembly plugin -->
<plugin>
<artifactId>maven-assembly-plugin</artifactId>
<configuration>
<descriptors>
<descriptor>src/main/assembly/package.xml</descriptor>
</descriptors>
<finalName>datax</finalName>
</configuration>
<executions>
<execution>
<id>dwzip</id>
<phase>package</phase>
<goals>
<goal>single</goal>
</goals>
</execution>
</executions>
</plugin>
</plugins>
</build>
</project>

View File

@ -0,0 +1,35 @@
<assembly
xmlns="http://maven.apache.org/plugins/maven-assembly-plugin/assembly/1.1.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/plugins/maven-assembly-plugin/assembly/1.1.0 http://maven.apache.org/xsd/assembly-1.1.0.xsd">
<id></id>
<formats>
<format>dir</format>
</formats>
<includeBaseDirectory>false</includeBaseDirectory>
<fileSets>
<fileSet>
<directory>src/main/resources</directory>
<includes>
<include>plugin.json</include>
<include>plugin_job_template.json</include>
</includes>
<outputDirectory>plugin/reader/obhbasereader</outputDirectory>
</fileSet>
<fileSet>
<directory>target/</directory>
<includes>
<include>obhbasereader-0.0.1-SNAPSHOT.jar</include>
</includes>
<outputDirectory>plugin/reader/obhbasereader</outputDirectory>
</fileSet>
</fileSets>
<dependencySets>
<dependencySet>
<useProjectArtifact>false</useProjectArtifact>
<outputDirectory>plugin/reader/obhbasereader/libs</outputDirectory>
<scope>runtime</scope>
</dependencySet>
</dependencySets>
</assembly>

View File

@ -0,0 +1,34 @@
package com.alibaba.datax.plugin.reader.obhbasereader;
import ch.qos.logback.classic.Level;
public final class Constant {
public static final String ROWKEY_FLAG = "rowkey";
public static final int DEFAULT_SCAN_CACHE = 256;
public static final int DEFAULT_FETCH_SIZE = Integer.MIN_VALUE;
public static final int DEFAULT_READ_BATCH_SIZE = 100000;
// timeout:24 * 3600 = 86400s
public static final String OB_READ_HINT = "/*+READ_CONSISTENCY(weak),QUERY_TIMEOUT(86400000000)*/";
public static final String DEFAULT_DATE_FORMAT = "yyyy-MM-dd HH:mm:ss";
public static final String DEFAULT_ENCODING = "UTF-8";
public static final String DEFAULT_TIMEZONE = "UTC";
public static final boolean DEFAULT_USE_SQLREADER = true;
public static final boolean DEFAULT_USE_ODPMODE = true;
public static final String OB_TABLE_CLIENT_PROPERTY = "logging.path.com.alipay.oceanbase-table-client";
public static final String OB_TABLE_HBASE_PROPERTY = "logging.path.com.alipay.oceanbase-table-hbase";
public static final String OB_TABLE_CLIENT_LOG_LEVEL = "logging.level.oceanbase-table-client";
public static final String OB_TABLE_HBASE_LOG_LEVEL = "logging.level.oceanbase-table-hbase";
public static final String OB_COM_ALIPAY_TABLE_CLIENT_LOG_LEVEL = "logging.level.com.alipay.oceanbase-table-client";
public static final String OB_COM_ALIPAY_TABLE_HBASE_LOG_LEVEL = "logging.level.com.alipay.oceanbase-table-hbase";
public static final String OB_HBASE_LOG_PATH = System.getProperty("datax.home") + "/log/";
public static final String DEFAULT_OB_TABLE_CLIENT_LOG_LEVEL = Level.OFF.toString();
public static final String DEFAULT_OB_TABLE_HBASE_LOG_LEVEL = Level.OFF.toString();
public static final String OBMYSQL_KEYWORDS =
"CUME_DIST,DENSE_RANK,EMPTY,FIRST_VALUE,GROUPING,GROUPS,INTERSECT,JSON_TABLE,LAG,LAST_VALUE,LATERAL,LEAD,NTH_VALUE,NTILE,OF,OVER,PERCENT_RANK,RANK,RECURSIVE,ROW_NUMBER,SYSTEM,WINDOW,ACCESSIBLE,ACCOUNT,ACTION,ADD,AFTER,AGAINST,AGGREGATE,ALGORITHM,ALL,ALTER,ALWAYS,ANALYSE,AND,ANY,AS,ASC,ASCII,ASENSITIVE,AT,AUTO_INCREMENT,AUTOEXTEND_SIZE,AVG,AVG_ROW_LENGTH,BACKUP,BEFORE,BEGIN,BETWEEN,BIGINT,BINARY,BINLOG,BIT,BLOB,BLOCK,BOOL,BOOLEAN,BOTH,BTREE,BY,BYTE,CACHE,CALL,CASCADE,CASCADED,CASE,CATALOG_NAME,CHAIN,CHANGE,CHANGED,CHANNEL,CHAR,CHARACTER,CHARSET,CHECK,CHECKSUM,CIPHER,CLASS_ORIGIN,CLIENT,CLOSE,COALESCE,CODE,COLLATE,COLLATION,COLUMN,COLUMN_FORMAT,COLUMN_NAME,COLUMNS,COMMENT,COMMIT,COMMITTED,COMPACT,COMPLETION,COMPRESSED,COMPRESSION,CONCURRENT,CONDITION,CONNECTION,CONSISTENT,CONSTRAINT,CONSTRAINT_CATALOG,CONSTRAINT_NAME,CONSTRAINT_SCHEMA,CONTAINS,CONTEXT,CONTINUE,CONVERT,CPU,CREATE,CROSS,CUBE,CURRENT,CURRENT_DATE,CURRENT_TIME,CURRENT_TIMESTAMP,CURRENT_USER,CURSOR,"
+ "CURSOR_NAME,DATA,DATABASE,DATABASES,DATAFILE,DATE,DATETIME,DAY,DAY_HOUR,DAY_MICROSECOND,DAY_MINUTE,DAY_SECOND,DEALLOCATE,DEC,DECIMAL,DECLARE,DEFAULT,DEFAULT_AUTH,DEFINER,DELAY_KEY_WRITE,DELAYED,DELETE,DES_KEY_FILE,DESC,DESCRIBE,DETERMINISTIC,DIAGNOSTICS,DIRECTORY,DISABLE,DISCARD,DISK,DISTINCT,DISTINCTROW,DIV,DO,DOUBLE,DROP,DUAL,DUMPFILE,DUPLICATE,DYNAMIC,EACH,ELSE,ELSEIF,ENABLE,ENCLOSED,ENCRYPTION,END,ENDS,ENGINE,ENGINES,ENUM,ERROR,ERRORS,ESCAPE,ESCAPED,EVENT,EVENTS,EVERY,EXCHANGE,EXECUTE,EXISTS,EXIT,EXPANSION,EXPIRE,EXPLAIN,EXPORT,EXTENDED,EXTENT_SIZE,FAST,FAULTS,FETCH,FIELDS,FILE,FILE_BLOCK_SIZE,FILTER,FIRST,FIXED,FLOAT,FLOAT4,FLOAT8,FLUSH,FOLLOWS,FOR,FORCE,FOREIGN,FORMAT,FOUND,FROM,FULL,FULLTEXT,FUNCTION,GENERAL,GENERATED,GEOMETRY,GEOMETRYCOLLECTION,GET,GET_FORMAT,GLOBAL,GRANT,GRANTS,GROUP,GROUP_REPLICATION,HANDLER,HASH,HAVING,HELP,HIGH_PRIORITY,HOST,HOSTS,HOUR,HOUR_MICROSECOND,HOUR_MINUTE,HOUR_SECOND,IDENTIFIED,IF,IGNORE,IGNORE_SERVER_IDS,IMPORT,IN,INDEX,"
+ "INDEXES," + "INFILE,INITIAL_SIZE,INNER,INOUT,INSENSITIVE,INSERT,INSERT_METHOD,INSTALL,INSTANCE,INT,INT1,INT2,INT3,INT4,INT8,INTEGER,INTERVAL,INTO,INVOKE,INVOKER,IO,IO_AFTER_GTIDS,IO_BEFORE_GTIDS,IO_THREAD,IPC,IS,ISOLATION,ISSUER,ITERATE,JOIN,JSON,KEY,KEY_BLOCK_SIZE,KEYS,KILL,LANGUAGE,LAST,LEADING,LEAVE,LEAVES,LEFT,LESS,LEVEL,LIKE,LIMIT,LINEAR,LINES,LINESTRING,LIST,LOAD,LOCAL,LOCALTIME,LOCALTIMESTAMP,LOCK,LOCKS,LOGFILE,LOGS,LONG,LONGBLOB,LONGTEXT,LOOP,LOW_PRIORITY,MASTER,MASTER_AUTO_POSITION,MASTER_BIND,MASTER_CONNECT_RETRY,MASTER_DELAY,MASTER_HEARTBEAT_PERIOD,MASTER_HOST,MASTER_LOG_FILE,MASTER_LOG_POS,MASTER_PASSWORD,MASTER_PORT,MASTER_RETRY_COUNT,MASTER_SERVER_ID,MASTER_SSL,MASTER_SSL_CA,MASTER_SSL_CAPATH,MASTER_SSL_CERT,MASTER_SSL_CIPHER,MASTER_SSL_CRL,MASTER_SSL_CRLPATH,MASTER_SSL_KEY,MASTER_SSL_VERIFY_SERVER_CERT,MASTER_TLS_VERSION,MASTER_USER,MATCH,MAX_CONNECTIONS_PER_HOUR,MAX_QUERIES_PER_HOUR,MAX_ROWS,MAX_SIZE,MAX_STATEMENT_TIME,MAX_UPDATES_PER_HOUR,"
+ "MAX_USER_CONNECTIONS,"
+ "MAXVALUE,MEDIUM,MEDIUMBLOB,MEDIUMINT,MEDIUMTEXT,MEMORY,MERGE,MESSAGE_TEXT,MICROSECOND,MIDDLEINT,MIGRATE,MIN_ROWS,MINUTE,MINUTE_MICROSECOND,MINUTE_SECOND,MOD,MODE,MODIFIES,MODIFY,MONTH,MULTILINESTRING,MULTIPOINT,MULTIPOLYGON,MUTEX,MYSQL_ERRNO,NAME,NAMES,NATIONAL,NATURAL,NCHAR,NDB,NDBCLUSTER,NEVER,NEW,NEXT,NO,NO_WAIT,NO_WRITE_TO_BINLOG,NODEGROUP,NONBLOCKING,NONE,NOT,NUMBER,NUMERIC,NVARCHAR,OFFSET,OLD_PASSWORD,ON,ONE,ONLY,OPEN,OPTIMIZE,OPTIMIZER_COSTS,OPTION,OPTIONALLY,OPTIONS,OR,ORDER,OUT,OUTER,OUTFILE,OWNER,PACK_KEYS,PAGE,PARSE_GCOL_EXPR,PARSER,PARTIAL,PARTITION,PARTITIONING,PARTITIONS,PASSWORD,PHASE,PLUGIN,PLUGIN_DIR,PLUGINS,POINT,POLYGON,PORT,PRECEDES,PRECISION,PREPARE,PRESERVE,PREV,PRIMARY,PRIVILEGES,PROCEDURE,PROCESSLIST,PROFILE,PROFILES,PROXY,PURGE,QUARTER,QUERY,QUICK,RANGE,READ,READ_ONLY,READ_WRITE,READS,REAL,REBUILD,RECOVER,REDO_BUFFER_SIZE,REDOFILE,REDUNDANT,REFERENCES,REGEXP,RELAY,RELAY_LOG_FILE,RELAY_LOG_POS,RELAY_THREAD,RELAYLOG,RELEASE,RELOAD,REMOVE,"
+ "RENAME,REORGANIZE,REPAIR,REPEAT,REPEATABLE,REPLACE,REPLICATE_DO_DB,REPLICATE_DO_TABLE,REPLICATE_IGNORE_DB,REPLICATE_IGNORE_TABLE,REPLICATE_REWRITE_DB,REPLICATE_WILD_DO_TABLE,REPLICATE_WILD_IGNORE_TABLE,REPLICATION,REQUIRE,RESET,RESIGNAL,RESTORE,RESTRICT,RESUME,RETURN,RETURNED_SQLSTATE,RETURNS,REVERSE,REVOKE,RIGHT,RLIKE,ROLLBACK,ROLLUP,ROTATE,ROUTINE,ROW,ROW_COUNT,ROW_FORMAT,ROWS,RTREE,SAVEPOINT,SCHEDULE,SCHEMA,SCHEMA_NAME,SCHEMAS,SECOND,SECOND_MICROSECOND,SECURITY,SELECT,SENSITIVE,SEPARATOR,SERIAL,SERIALIZABLE,SERVER,SESSION,SET,SHARE,SHOW,SHUTDOWN,SIGNAL,SIGNED,SIMPLE,SLAVE,SLOW,SMALLINT,SNAPSHOT,SOCKET,SOME,SONAME,SOUNDS,SOURCE,SPATIAL,SPECIFIC,SQL,SQL_AFTER_GTIDS,SQL_AFTER_MTS_GAPS,SQL_BEFORE_GTIDS,SQL_BIG_RESULT,SQL_BUFFER_RESULT,SQL_CACHE,SQL_CALC_FOUND_ROWS,SQL_NO_CACHE,SQL_SMALL_RESULT,SQL_THREAD,SQL_TSI_DAY,SQL_TSI_HOUR,SQL_TSI_MINUTE,SQL_TSI_MONTH,SQL_TSI_QUARTER,SQL_TSI_SECOND,SQL_TSI_WEEK,SQL_TSI_YEAR,SQLEXCEPTION,SQLSTATE,SQLWARNING,SSL,STACKED,"
+ "START," + "STARTING,STARTS,STATS_AUTO_RECALC,STATS_PERSISTENT,STATS_SAMPLE_PAGES,STATUS,STOP,STORAGE,STORED,STRAIGHT_JOIN,STRING,SUBCLASS_ORIGIN,SUBJECT,SUBPARTITION,SUBPARTITIONS,SUPER,SUSPEND,SWAPS,SWITCHES,TABLE,TABLE_CHECKSUM,TABLE_NAME,TABLES,TABLESPACE,TEMPORARY,TEMPTABLE,TERMINATED,TEXT,THAN,THEN,TIME,TIMESTAMP,TIMESTAMPADD,TIMESTAMPDIFF,TINYBLOB,TINYINT,TINYTEXT,TO,TRAILING,TRANSACTION,TRIGGER,TRIGGERS,TRUNCATE,TYPE,TYPES,UNCOMMITTED,UNDEFINED,UNDO,UNDO_BUFFER_SIZE,UNDOFILE,UNICODE,UNINSTALL,UNION,UNIQUE,UNKNOWN,UNLOCK,UNSIGNED,UNTIL,UPDATE,UPGRADE,USAGE,USE,USE_FRM,USER,USER_RESOURCES,USING,UTC_DATE,UTC_TIME,UTC_TIMESTAMP,VALIDATION,VALUE,VALUES,VARBINARY,VARCHAR,VARCHARACTER,VARIABLES,VARYING,VIEW,VIRTUAL,WAIT,WARNINGS,WEEK,WEIGHT_STRING,WHEN,WHERE,WHILE,WITH,WITHOUT,WORK,WRAPPER,WRITE,X509,XA,XID,XML,XOR,YEAR,YEAR_MONTH,ZEROFILL,FALSE,TRUE";
}

View File

@ -0,0 +1,19 @@
package com.alibaba.datax.plugin.reader.obhbasereader;
import com.alipay.oceanbase.hbase.OHTable;
import org.apache.hadoop.conf.Configuration;
import java.io.IOException;
public final class HTableManager {
public static OHTable createHTable(Configuration config, String tableName) throws IOException {
return new OHTable(config, tableName);
}
public static void closeHTable(OHTable hTable) throws IOException {
if (hTable != null) {
hTable.close();
}
}
}

View File

@ -0,0 +1,124 @@
package com.alibaba.datax.plugin.reader.obhbasereader;
import com.alibaba.datax.common.base.BaseObject;
import com.alibaba.datax.plugin.reader.obhbasereader.enums.ColumnType;
import com.alibaba.datax.plugin.reader.obhbasereader.util.ObHbaseReaderUtil;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.lang3.Validate;
import org.apache.hadoop.hbase.util.Bytes;
/**
* 描述 hbasereader 插件中column 配置中的一个单元项实体
*/
public class HbaseColumnCell extends BaseObject {
private ColumnType columnType;
// columnName 格式为列族:列名
private String columnName;
private byte[] cf;
private byte[] qualifier;
//对于常量类型其常量值放到 columnValue
private String columnValue;
//当配置了 columnValue isConstant=true这个成员变量是用于方便使用本类的地方判断是否是常量类型字段
private boolean isConstant;
// 只在类型是时间类型时才会设置该值无默认值形式如yyyy-MM-dd HH:mm:ss
private String dateformat;
private HbaseColumnCell(Builder builder) {
this.columnType = builder.columnType;
//columnName columnValue 必须有一个为 null
Validate.isTrue(builder.columnName == null || builder.columnValue == null, "In obhbasereader, column cannot configure both column name and column value. Choose one of them.");
//columnName columnValue 不能都为 null
Validate.isTrue(builder.columnName != null || builder.columnValue != null, "In obhbasereader, column cannot configure both column name and column value. Choose one of them.");
if (builder.columnName != null) {
this.isConstant = false;
this.columnName = builder.columnName;
// 如果 columnName 不是 rowkey则必须配置为列族:列名 格式
if (!ObHbaseReaderUtil.isRowkeyColumn(this.columnName)) {
String promptInfo = "In obhbasereader, the column configuration format of column should be: 'family:column'. The column you configured is wrong:" + this.columnName;
String[] cfAndQualifier = this.columnName.split(":");
Validate.isTrue(cfAndQualifier.length == 2 && StringUtils.isNotBlank(cfAndQualifier[0]) && StringUtils.isNotBlank(cfAndQualifier[1]), promptInfo);
this.cf = Bytes.toBytes(cfAndQualifier[0].trim());
this.qualifier = Bytes.toBytes(cfAndQualifier[1].trim());
}
} else {
this.isConstant = true;
this.columnValue = builder.columnValue;
}
if (builder.dateformat != null) {
this.dateformat = builder.dateformat;
}
}
public ColumnType getColumnType() {
return columnType;
}
public String getColumnName() {
return columnName;
}
public byte[] getCf() {
return cf;
}
public byte[] getQualifier() {
return qualifier;
}
public String getDateformat() {
return dateformat;
}
public String getColumnValue() {
return columnValue;
}
public boolean isConstant() {
return isConstant;
}
// 内部 builder
public static class Builder {
private ColumnType columnType;
private String columnName;
private String columnValue;
private String dateformat;
public Builder(ColumnType columnType) {
this.columnType = columnType;
}
public Builder columnName(String columnName) {
this.columnName = columnName;
return this;
}
public Builder columnValue(String columnValue) {
this.columnValue = columnValue;
return this;
}
public Builder dateformat(String dateformat) {
this.dateformat = dateformat;
return this;
}
public HbaseColumnCell build() {
return new HbaseColumnCell(this);
}
}
}

View File

@ -0,0 +1,36 @@
package com.alibaba.datax.plugin.reader.obhbasereader;
import com.alibaba.datax.common.spi.ErrorCode;
public enum HbaseReaderErrorCode implements ErrorCode {
REQUIRED_VALUE("ObHbaseReader-00", "Missing required parameters."),
ILLEGAL_VALUE("ObHbaseReader-01", "Illegal configuration."),
PREPAR_READ_ERROR("ObHbaseReader-02", "Preparing to read ObHBase error."),
SPLIT_ERROR("ObHbaseReader-03", "Splitting ObHBase table error."),
INIT_TABLE_ERROR("ObHbaseReader-04", "Initializing ObHBase extraction table error"),
PARSE_COLUMN_ERROR("ObHbaseReader-05", "Parse column failed."),
READ_ERROR("ObHbaseReader-06", "Read ObHBase error.");
private final String code;
private final String description;
private HbaseReaderErrorCode(String code, String description) {
this.code = code;
this.description = description;
}
@Override
public String getCode() {
return this.code;
}
@Override
public String getDescription() {
return this.description;
}
@Override
public String toString() {
return String.format("Code:[%s], Description:[%s]. ", this.code, this.description);
}
}

View File

@ -0,0 +1,103 @@
package com.alibaba.datax.plugin.reader.obhbasereader;
public final class Key {
public final static String HBASE_CONFIG = "hbaseConfig";
/**
* mode 可以取 normal 或者 multiVersionFixedColumn 或者 multiVersionDynamicColumn 三个值无默认值
* <p/>
* normal 配合 column(Map 结构的)使用
* <p/>
* multiVersionFixedColumn 配合 maxVersion,tetradType, column(List 结构的)使用
* <p/>
* multiVersionDynamicColumn 配合 maxVersion,tetradType, columnFamily(List 结构的)使用
*/
public final static String MODE = "mode";
/**
* 配合 mode = multiVersion 时使用指明需要读取的版本个数无默认值
* -1 表示去读全部版本
* 不能为01
* >1 表示最多读取对应个数的版本数(不能超过 Integer 的最大值)
*/
public final static String MAX_VERSION = "maxVersion";
/**
* 多版本情况下必须配置 四元组的类型(rowkey,column,timestamp,value)
*/
public final static String TETRAD_TYPE = "tetradType";
/**
* 默认为 utf8
*/
public final static String ENCODING = "encoding";
public final static String TABLE = "table";
public final static String USERNAME = "username";
public final static String OB_SYS_USERNAME = "obSysUser";
public final static String CONFIG_URL = "obConfigUrl";
public final static String ODP_HOST = "odpHost";
public final static String ODP_PORT = "odpPort";
public final static String DB_NAME = "dbName";
public final static String PASSWORD = "password";
public final static String OB_SYS_PASSWORD = "obSysPassword";
public final static String COLUMN_FAMILY = "columnFamily";
public final static String COLUMN = "column";
public final static String START_ROWKEY = "startRowkey";
public final static String END_ROWKEY = "endRowkey";
public final static String IS_BINARY_ROWKEY = "isBinaryRowkey";
public final static String SCAN_CACHE = "scanCache";
public final static String RS_URL = "rsUrl";
public final static String MAX_ACTIVE_CONNECTION = "maxActiveConnection";
public final static int DEFAULT_MAX_ACTIVE_CONNECTION = 2000;
public final static String TIMEOUT = "timeout";
public final static long DEFAULT_TIMEOUT = 30;
public final static String PARTITION_NAME = "partitionName";
public final static String JDBC_URL = "jdbcUrl";
public final static String TIMEZONE = "timezone";
public final static String FETCH_SIZE = "fetchSize";
public final static String READ_BATCH_SIZE = "readBatchSize";
public final static String SESSION = "session";
public final static String READER_HINT = "readerHint";
public final static String QUERY_SQL = "querySql";
public final static String SAMPLE_PERCENTAGE = "samplePercentage";
// 是否使用独立密码
public final static String USE_SPECIAL_SECRET = "useSpecialSecret";
public final static String USE_SQL_READER = "useSqlReader";
public final static String USE_ODP_MODE = "useOdpMode";
public final static String RANGE = "range";
public final static String READ_BY_PARTITION = "readByPartition";
}

View File

@ -0,0 +1,445 @@
package com.alibaba.datax.plugin.reader.obhbasereader;
import static com.alibaba.datax.plugin.reader.obhbasereader.Constant.DEFAULT_OB_TABLE_CLIENT_LOG_LEVEL;
import static com.alibaba.datax.plugin.reader.obhbasereader.Constant.DEFAULT_OB_TABLE_HBASE_LOG_LEVEL;
import static com.alibaba.datax.plugin.reader.obhbasereader.Constant.DEFAULT_USE_ODPMODE;
import static com.alibaba.datax.plugin.reader.obhbasereader.Constant.OB_COM_ALIPAY_TABLE_CLIENT_LOG_LEVEL;
import static com.alibaba.datax.plugin.reader.obhbasereader.Constant.OB_COM_ALIPAY_TABLE_HBASE_LOG_LEVEL;
import static com.alibaba.datax.plugin.reader.obhbasereader.Constant.OB_HBASE_LOG_PATH;
import static com.alibaba.datax.plugin.reader.obhbasereader.Constant.OB_TABLE_CLIENT_LOG_LEVEL;
import static com.alibaba.datax.plugin.reader.obhbasereader.Constant.OB_TABLE_CLIENT_PROPERTY;
import static com.alibaba.datax.plugin.reader.obhbasereader.Constant.OB_TABLE_HBASE_LOG_LEVEL;
import static com.alibaba.datax.plugin.reader.obhbasereader.Constant.OB_TABLE_HBASE_PROPERTY;
import static org.apache.commons.lang3.StringUtils.EMPTY;
import com.alibaba.datax.common.element.Record;
import com.alibaba.datax.common.exception.DataXException;
import com.alibaba.datax.common.plugin.RecordSender;
import com.alibaba.datax.common.spi.Reader;
import com.alibaba.datax.common.util.Configuration;
import com.alibaba.datax.plugin.rdbms.reader.Constant;
import com.alibaba.datax.plugin.rdbms.reader.util.ObVersion;
import com.alibaba.datax.plugin.rdbms.util.DBUtil;
import com.alibaba.datax.plugin.rdbms.util.DBUtilErrorCode;
import com.alibaba.datax.plugin.rdbms.util.DataBaseType;
import com.alibaba.datax.plugin.rdbms.util.TableExpandUtil;
import com.alibaba.datax.plugin.reader.obhbasereader.enums.ModeType;
import com.alibaba.datax.plugin.reader.obhbasereader.ext.ServerConnectInfo;
import com.alibaba.datax.plugin.reader.obhbasereader.task.AbstractHbaseTask;
import com.alibaba.datax.plugin.reader.obhbasereader.task.SQLNormalModeReader;
import com.alibaba.datax.plugin.reader.obhbasereader.task.ScanMultiVersionReader;
import com.alibaba.datax.plugin.reader.obhbasereader.task.ScanNormalModeReader;
import com.alibaba.datax.plugin.reader.obhbasereader.util.HbaseSplitUtil;
import com.alibaba.datax.plugin.reader.obhbasereader.util.ObHbaseReaderUtil;
import com.alibaba.datax.plugin.reader.obhbasereader.util.SqlReaderSplitUtil;
import com.alibaba.datax.plugin.reader.oceanbasev10reader.util.ObReaderUtils;
import com.google.common.base.Preconditions;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import org.apache.commons.collections.CollectionUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.lang3.Validate;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.sql.Connection;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.TimeUnit;
import java.util.stream.Collectors;
/**
* ObHbaseReader 支持分库分表
* 仅支持ob3.x及以上版本
*/
public class ObHbaseReader extends Reader {
public static class Job extends Reader.Job {
static private final String ACCESS_DENIED_ERROR = "Access denied for user";
private static Logger LOG = LoggerFactory.getLogger(ObHbaseReader.class);
private Configuration originalConfig;
@Override
public void init() {
if (System.getProperty(OB_TABLE_CLIENT_PROPERTY) == null) {
LOG.info(OB_TABLE_CLIENT_PROPERTY + " not set");
System.setProperty(OB_TABLE_CLIENT_PROPERTY, OB_HBASE_LOG_PATH);
}
if (System.getProperty(OB_TABLE_HBASE_PROPERTY) == null) {
LOG.info(OB_TABLE_HBASE_PROPERTY + " not set");
System.setProperty(OB_TABLE_HBASE_PROPERTY, OB_HBASE_LOG_PATH);
}
if (System.getProperty(OB_TABLE_CLIENT_LOG_LEVEL) == null) {
LOG.info(OB_TABLE_CLIENT_LOG_LEVEL + " not set");
System.setProperty(OB_TABLE_CLIENT_LOG_LEVEL, DEFAULT_OB_TABLE_CLIENT_LOG_LEVEL);
}
if (System.getProperty(OB_TABLE_HBASE_LOG_LEVEL) == null) {
LOG.info(OB_TABLE_HBASE_LOG_LEVEL + " not set");
System.setProperty(OB_TABLE_HBASE_LOG_LEVEL, DEFAULT_OB_TABLE_HBASE_LOG_LEVEL);
}
if (System.getProperty(OB_COM_ALIPAY_TABLE_CLIENT_LOG_LEVEL) == null) {
LOG.info(OB_COM_ALIPAY_TABLE_CLIENT_LOG_LEVEL + " not set");
System.setProperty(OB_COM_ALIPAY_TABLE_CLIENT_LOG_LEVEL, DEFAULT_OB_TABLE_CLIENT_LOG_LEVEL);
}
if (System.getProperty(OB_COM_ALIPAY_TABLE_HBASE_LOG_LEVEL) == null) {
LOG.info(OB_COM_ALIPAY_TABLE_HBASE_LOG_LEVEL + " not set");
System.setProperty(OB_COM_ALIPAY_TABLE_HBASE_LOG_LEVEL, DEFAULT_OB_TABLE_HBASE_LOG_LEVEL);
}
LOG.info("{} is set to {}, {} is set to {}",
OB_TABLE_CLIENT_PROPERTY, OB_HBASE_LOG_PATH, OB_TABLE_HBASE_PROPERTY, OB_HBASE_LOG_PATH);
this.originalConfig = super.getPluginJobConf();
ObHbaseReaderUtil.doPretreatment(originalConfig);
List<Object> conns = originalConfig.getList(Constant.CONN_MARK, Object.class);
// 逻辑表配置
Preconditions.checkArgument(CollectionUtils.isNotEmpty(conns), "connection information is empty.");
dealLogicConnAndTable(conns);
if (LOG.isDebugEnabled()) {
LOG.debug("After init(), now originalConfig is:\n{}\n", this.originalConfig);
}
}
@Override
public void destroy() {
}
private void dealLogicConnAndTable(List<Object> conns) {
String unifiedUsername = originalConfig.getString(Key.USERNAME);
String unifiedPassword = originalConfig.getString(Key.PASSWORD);
boolean useSqlReader = originalConfig.getBool(Key.USE_SQL_READER, com.alibaba.datax.plugin.reader.obhbasereader.Constant.DEFAULT_USE_SQLREADER);
boolean checkSlave = originalConfig.getBool(com.alibaba.datax.plugin.rdbms.reader.Key.CHECK_SLAVE, false);
Set<String> keywords = Arrays.stream(com.alibaba.datax.plugin.reader.obhbasereader.Constant.OBMYSQL_KEYWORDS.split(",")).collect(Collectors.toSet());
List<String> preSql = originalConfig.getList(com.alibaba.datax.plugin.rdbms.reader.Key.PRE_SQL, String.class);
int tableNum = 0;
for (int i = 0, len = conns.size(); i < len; i++) {
Configuration connConf = Configuration.from(conns.get(i).toString());
String curUsername = connConf.getString(Key.USERNAME, unifiedUsername);
Preconditions.checkArgument(StringUtils.isNotEmpty(curUsername), "username is empty.");
String curPassword = connConf.getString(Key.PASSWORD, unifiedPassword);
originalConfig.set(String.format("%s[%d].%s", Constant.CONN_MARK, i, Key.USERNAME), curUsername);
originalConfig.set(String.format("%s[%d].%s", Constant.CONN_MARK, i, Key.PASSWORD), curPassword);
List<String> jdbcUrls = connConf.getList(Key.JDBC_URL, new ArrayList<>(), String.class);
String jdbcUrl;
if (useSqlReader) {
// sql模式下jdbcUrl必须配置只有使用sql模式的情况才检查地址
Preconditions.checkArgument(CollectionUtils.isNotEmpty(jdbcUrls), "if using sql mode, jdbcUrl is needed");
jdbcUrl = DBUtil.chooseJdbcUrlWithoutRetry(DataBaseType.MySql, jdbcUrls, curUsername, curPassword, preSql, checkSlave);
jdbcUrl = DataBaseType.MySql.appendJDBCSuffixForReader(jdbcUrl);
// 回写到connection[i].jdbcUrl
originalConfig.set(String.format("%s[%d].%s", Constant.CONN_MARK, i, Key.JDBC_URL), jdbcUrl);
LOG.info("Available jdbcUrl:{}.", jdbcUrl);
} else {
jdbcUrl = jdbcUrls.get(0);
jdbcUrl = StringUtils.isNotBlank(jdbcUrl) ? DataBaseType.MySql.appendJDBCSuffixForReader(jdbcUrl) : EMPTY;
checkAndSetHbaseConnConf(jdbcUrl, curUsername, curPassword, connConf, i);
}
// table 方式
// 对每一个connection 上配置的table 项进行解析(已对表名称进行了 ` 处理的)
List<String> tables = connConf.getList(Key.TABLE, String.class);
List<String> expandedTables = TableExpandUtil.expandTableConf(DataBaseType.MySql, tables);
if (expandedTables.isEmpty()) {
throw DataXException.asDataXException(DBUtilErrorCode.ILLEGAL_VALUE, "The specified table list is empty.");
}
for (int ti = 0; ti < expandedTables.size(); ti++) {
String tableName = expandedTables.get(ti);
if (keywords.contains(tableName.toUpperCase())) {
expandedTables.set(ti, "`" + tableName + "`");
}
}
tableNum += expandedTables.size();
originalConfig.set(String.format("%s[%d].%s", Constant.CONN_MARK, i, Key.TABLE), expandedTables);
}
if (tableNum == 0) {
// 分库分表读未匹配到可以抽取的表
LOG.error("sharding rule result is empty.");
throw DataXException.asDataXException("No tables were matched");
}
originalConfig.set(Constant.TABLE_NUMBER_MARK, tableNum);
}
/**
* In public cloud, only odp mode can be used.
* In private cloud, both odp mode and ocp mode can be used.
*
* @param jdbcUrl
* @param curUsername
* @param curPassword
* @param connConf
*/
private void checkAndSetHbaseConnConf(String jdbcUrl, String curUsername, String curPassword, Configuration connConf, int curIndex) {
ServerConnectInfo serverConnectInfo = new ServerConnectInfo(jdbcUrl, curUsername, curPassword);
if (!originalConfig.getBool(Key.USE_ODP_MODE, false)) {
// Normally, only need to query at first time
// In ocp mode, dbName, configUrl, sysUser and sysPass are needed.
String sysUser = connConf.getString(Key.OB_SYS_USERNAME, originalConfig.getString(Key.OB_SYS_USERNAME));
String sysPass = connConf.getString(Key.OB_SYS_PASSWORD, originalConfig.getString(Key.OB_SYS_PASSWORD));
serverConnectInfo.setSysUser(sysUser);
serverConnectInfo.setSysPass(sysPass);
String configUrl = connConf.getString(Key.CONFIG_URL, originalConfig.getString(Key.CONFIG_URL));
if (StringUtils.isBlank(configUrl)) {
configUrl = queryRsUrl(serverConnectInfo);
}
originalConfig.set(String.format("%s[%d].%s", Constant.CONN_MARK, curIndex, Key.USERNAME), curUsername);
originalConfig.set(String.format("%s[%d].%s", Constant.CONN_MARK, curIndex, Key.OB_SYS_USERNAME), serverConnectInfo.sysUser);
originalConfig.set(String.format("%s[%d].%s", Constant.CONN_MARK, curIndex, Key.OB_SYS_PASSWORD), serverConnectInfo.sysPass);
originalConfig.set(String.format("%s[%d].%s", Constant.CONN_MARK, curIndex, Key.CONFIG_URL), configUrl);
} else {
// In odp mode, dbName, odp host and odp port are needed.
String odpHost = connConf.getString(Key.ODP_HOST, serverConnectInfo.host);
String odpPort = connConf.getString(Key.ODP_PORT, serverConnectInfo.port);
originalConfig.set(String.format("%s[%d].%s", Constant.CONN_MARK, curIndex, Key.ODP_HOST), odpHost);
originalConfig.set(String.format("%s[%d].%s", Constant.CONN_MARK, curIndex, Key.ODP_PORT), odpPort);
}
originalConfig.set(String.format("%s[%d].%s", Constant.CONN_MARK, curIndex, Key.DB_NAME), serverConnectInfo.databaseName);
}
private String queryRsUrl(ServerConnectInfo serverInfo) {
Preconditions.checkArgument(checkVersionAfterV3(serverInfo.jdbcUrl, serverInfo.getFullUserName(), serverInfo.password), "ob before 3.x is not supported.");
String configUrl = originalConfig.getString(Key.CONFIG_URL, null);
if (configUrl == null) {
try {
Connection conn = null;
int retry = 0;
final String sysJDBCUrl = serverInfo.jdbcUrl.replace(serverInfo.databaseName, "oceanbase");
do {
try {
if (retry > 0) {
int sleep = retry > 9 ? 500 : 1 << retry;
try {
TimeUnit.SECONDS.sleep(sleep);
} catch (InterruptedException e) {
}
LOG.warn("retry fetch RsUrl the {} times", retry);
}
conn = DBUtil.getConnection(DataBaseType.OceanBase, sysJDBCUrl, serverInfo.sysUser, serverInfo.sysPass);
String sql = "show parameters like 'obconfig_url'";
LOG.info("query param: {}", sql);
PreparedStatement stmt = conn.prepareStatement(sql);
ResultSet result = stmt.executeQuery();
if (result.next()) {
configUrl = result.getString("Value");
}
if (StringUtils.isNotBlank(configUrl)) {
break;
}
} catch (Exception e) {
++retry;
LOG.warn("fetch root server list(rsList) error {}", e.getMessage());
} finally {
DBUtil.closeDBResources(null, conn);
}
} while (retry < 3);
LOG.info("configure url is: " + configUrl);
originalConfig.set(Key.CONFIG_URL, configUrl);
} catch (Exception e) {
LOG.error("Fail to get configure url: {}", e.getMessage(), e);
throw DataXException.asDataXException(HbaseReaderErrorCode.REQUIRED_VALUE, "未配置obConfigUrl且无法获取obConfigUrl");
}
}
return configUrl;
}
@Override
public void prepare() {
}
@Override
public void post() {
}
@Override
public List<Configuration> split(int adviceNumber) {
Map<String, HbaseColumnCell> hbaseColumnCells = ObHbaseReaderUtil.parseColumn(originalConfig.getList(Key.COLUMN, Map.class));
if (hbaseColumnCells.size() == 0) {
LOG.error("no column cells specified.");
throw new RuntimeException("no column cells specified");
}
String columnFamily = ObHbaseReaderUtil.parseColumnFamily(hbaseColumnCells.values());
Preconditions.checkArgument(StringUtils.isNotEmpty(columnFamily), "column family is empty.");
List<Object> conns = originalConfig.getList(Constant.CONN_MARK, Object.class);
Preconditions.checkArgument(conns != null && !conns.isEmpty(), "connection information is necessary.");
return splitLogicTables(adviceNumber, conns, columnFamily);
}
private List<Configuration> splitLogicTables(int adviceNumber, List<Object> conns, String columnFamily) {
// adviceNumber这里是channel数量大小, 即datax并发task数量
// eachTableShouldSplittedNumber是单表应该切分的份数
int eachTableShouldSplittedNumber = (int) Math.ceil(1.0 * adviceNumber / originalConfig.getInt(Constant.TABLE_NUMBER_MARK));
boolean useSqlReader = originalConfig.getBool(Key.USE_SQL_READER, com.alibaba.datax.plugin.reader.obhbasereader.Constant.DEFAULT_USE_SQLREADER);
boolean odpMode = originalConfig.getBool(Key.USE_ODP_MODE, DEFAULT_USE_ODPMODE);
boolean readByPartition = originalConfig.getBool(Key.READ_BY_PARTITION, false);
List<Configuration> splittedConfigs = new ArrayList<>();
for (int i = 0, len = conns.size(); i < len; i++) {
Configuration sliceConfig = originalConfig.clone();
Configuration connConf = Configuration.from(conns.get(i).toString());
copyConnConfByMode(useSqlReader, odpMode, sliceConfig, connConf);
// 说明是配置的 table 方式
// 已在之前进行了扩展和`处理可以直接使用
List<String> tables = connConf.getList(Key.TABLE, String.class);
Validate.isTrue(null != tables && !tables.isEmpty(), "error in your configuration for the reading database table.");
int tempEachTableShouldSplittedNumber = eachTableShouldSplittedNumber;
if (tables.size() == 1) {
Integer splitFactor = originalConfig.getInt(com.alibaba.datax.plugin.rdbms.reader.Key.SPLIT_FACTOR, Constant.SPLIT_FACTOR);
tempEachTableShouldSplittedNumber = eachTableShouldSplittedNumber * splitFactor;
}
for (String table : tables) {
Configuration tempSlice;
tempSlice = sliceConfig.clone();
tempSlice.set(Key.TABLE, table);
splittedConfigs.addAll(
useSqlReader ? SqlReaderSplitUtil.splitSingleTable(tempSlice, table, columnFamily, tempEachTableShouldSplittedNumber, readByPartition) : HbaseSplitUtil.split(tempSlice));
}
}
return splittedConfigs;
}
private void copyConnConfByMode(boolean useSqlReader, boolean odpMode, Configuration targetConf, Configuration sourceConnConf) {
String username = sourceConnConf.getNecessaryValue(Key.USERNAME, DBUtilErrorCode.REQUIRED_VALUE);
targetConf.set(Key.USERNAME, username);
String password = sourceConnConf.getNecessaryValue(Key.PASSWORD, DBUtilErrorCode.REQUIRED_VALUE);
targetConf.set(Key.PASSWORD, password);
if (useSqlReader) {
String jdbcUrl = sourceConnConf.getNecessaryValue(Key.JDBC_URL, DBUtilErrorCode.REQUIRED_VALUE);
targetConf.set(Key.JDBC_URL, jdbcUrl);
} else if (odpMode) {
String dbName = sourceConnConf.getNecessaryValue(Key.DB_NAME, DBUtilErrorCode.REQUIRED_VALUE);
targetConf.set(Key.DB_NAME, dbName);
String odpHost = sourceConnConf.getNecessaryValue(Key.ODP_HOST, DBUtilErrorCode.REQUIRED_VALUE);
targetConf.set(Key.ODP_HOST, odpHost);
String odpPort = sourceConnConf.getNecessaryValue(Key.ODP_PORT, DBUtilErrorCode.REQUIRED_VALUE);
targetConf.set(Key.ODP_PORT, odpPort);
} else {
String dbName = sourceConnConf.getNecessaryValue(Key.DB_NAME, DBUtilErrorCode.REQUIRED_VALUE);
targetConf.set(Key.DB_NAME, dbName);
String sysUser = sourceConnConf.getNecessaryValue(Key.OB_SYS_USERNAME, DBUtilErrorCode.REQUIRED_VALUE);
targetConf.set(Key.OB_SYS_USERNAME, sysUser);
String sysPass = sourceConnConf.getString(Key.OB_SYS_PASSWORD);
targetConf.set(Key.OB_SYS_PASSWORD, sysPass);
}
targetConf.remove(Constant.CONN_MARK);
}
private boolean checkVersionAfterV3(String jdbcUrl, String username, String password) {
int retryLimit = 3;
int retryCount = 0;
Connection conn = null;
while (retryCount++ <= retryLimit) {
try {
conn = DBUtil.getConnectionWithoutRetry(DataBaseType.MySql, jdbcUrl, username, password);
ObVersion obVersion = ObReaderUtils.getObVersion(conn);
return ObVersion.V3.compareTo(obVersion) <= 0;
} catch (Exception e) {
LOG.error("fail to check ob version, will retry: " + e.getMessage());
if (e.getMessage().contains(ACCESS_DENIED_ERROR)) {
throw new RuntimeException(e);
}
try {
TimeUnit.SECONDS.sleep(1);
} catch (Exception ex) {
LOG.error("interrupted while waiting for retry.");
}
} finally {
DBUtil.closeDBResources(null, conn);
}
}
return false;
}
}
public static class Task extends Reader.Task {
private static Logger LOG = LoggerFactory.getLogger(Task.class);
private Configuration taskConfig;
private AbstractHbaseTask hbaseTaskProxy;
@Override
public void init() {
this.taskConfig = super.getPluginJobConf();
String mode = this.taskConfig.getString(Key.MODE);
ModeType modeType = ModeType.getByTypeName(mode);
boolean useSqlReader = this.taskConfig.getBool(Key.USE_SQL_READER, com.alibaba.datax.plugin.reader.obhbasereader.Constant.DEFAULT_USE_SQLREADER);
LOG.info("init reader with mode: " + modeType);
switch (modeType) {
case Normal:
this.hbaseTaskProxy = useSqlReader ? new SQLNormalModeReader(this.taskConfig) : new ScanNormalModeReader(this.taskConfig);
break;
case MultiVersionFixedColumn:
this.hbaseTaskProxy = new ScanMultiVersionReader(this.taskConfig);
break;
default:
throw DataXException.asDataXException(HbaseReaderErrorCode.ILLEGAL_VALUE, "This type of mode is not supported by hbasereader:" + modeType);
}
}
@Override
public void destroy() {
if (this.hbaseTaskProxy != null) {
try {
this.hbaseTaskProxy.close();
} catch (Exception e) {
//
}
}
}
@Override
public void prepare() {
try {
this.hbaseTaskProxy.prepare();
} catch (Exception e) {
throw DataXException.asDataXException(HbaseReaderErrorCode.PREPAR_READ_ERROR, e);
}
}
@Override
public void post() {
super.post();
}
@Override
public void startRead(RecordSender recordSender) {
Record record = recordSender.createRecord();
boolean fetchOK;
int retryTimes = 0;
int maxRetryTimes = 3;
while (true) {
try {
// TODO check exception
fetchOK = this.hbaseTaskProxy.fetchLine(record);
} catch (Exception e) {
LOG.info("fetch record failed. reason: {}.", e.getMessage(), e);
super.getTaskPluginCollector().collectDirtyRecord(record, e);
if (retryTimes++ > maxRetryTimes) {
throw DataXException.asDataXException(HbaseReaderErrorCode.READ_ERROR, "read from obhbase failed", e);
}
record = recordSender.createRecord();
continue;
}
if (fetchOK) {
recordSender.sendToWriter(record);
record = recordSender.createRecord();
} else {
break;
}
}
recordSender.flush();
}
}
}

View File

@ -0,0 +1,44 @@
package com.alibaba.datax.plugin.reader.obhbasereader.enums;
import com.alibaba.datax.common.exception.DataXException;
import com.alibaba.datax.plugin.reader.obhbasereader.HbaseReaderErrorCode;
import java.util.Arrays;
/**
* 只对 normal 模式读取时有用多版本读取时不存在列类型的
*/
public enum ColumnType {
STRING("string"),
BINARY_STRING("binarystring"),
BYTES("bytes"),
BOOLEAN("boolean"),
SHORT("short"),
INT("int"),
LONG("long"),
FLOAT("float"),
DOUBLE("double"),
DATE("date");
private String typeName;
ColumnType(String typeName) {
this.typeName = typeName;
}
public static ColumnType getByTypeName(String typeName) {
for (ColumnType columnType : values()) {
if (columnType.typeName.equalsIgnoreCase(typeName)) {
return columnType;
}
}
throw DataXException.asDataXException(HbaseReaderErrorCode.ILLEGAL_VALUE,
String.format("The type %s is not supported by hbasereader, currently supported type is:%s .", typeName, Arrays.asList(values())));
}
@Override
public String toString() {
return this.typeName;
}
}

View File

@ -0,0 +1,28 @@
package com.alibaba.datax.plugin.reader.obhbasereader.enums;
import com.alibaba.datax.common.exception.DataXException;
import com.alibaba.datax.plugin.reader.obhbasereader.HbaseReaderErrorCode;
import java.util.Arrays;
import java.util.Optional;
import java.util.stream.Stream;
public enum FetchVersion {
OLDEST("oldest"), LATEST("latest");
private final String version;
FetchVersion(String version) {
this.version = version;
}
public static FetchVersion getByDesc(String name) {
Optional<FetchVersion> result = Stream.of(values()).filter(v -> v.version.equalsIgnoreCase(name))
.findFirst();
return result.orElseThrow(() -> {
return DataXException.asDataXException(HbaseReaderErrorCode.ILLEGAL_VALUE,
String.format("obHBasereader 不支持该类型:%s, 目前支持的类型是:%s", name, Arrays.asList(values())));
});
}
}

View File

@ -0,0 +1,30 @@
package com.alibaba.datax.plugin.reader.obhbasereader.enums;
import com.alibaba.datax.common.exception.DataXException;
import com.alibaba.datax.plugin.reader.obhbasereader.HbaseReaderErrorCode;
import java.util.Arrays;
public enum ModeType {
Normal("normal"),
MultiVersionFixedColumn("multiVersionFixedColumn"),
MultiVersionDynamicColumn("multiVersionDynamicColumn"),
;
private String mode;
ModeType(String mode) {
this.mode = mode.toLowerCase();
}
public static ModeType getByTypeName(String modeName) {
for (ModeType modeType : values()) {
if (modeType.mode.equalsIgnoreCase(modeName)) {
return modeType;
}
}
throw DataXException.asDataXException(
HbaseReaderErrorCode.ILLEGAL_VALUE, String.format("The mode type is not supported by hbasereader:%s, and the currently supported mode type is:%s", modeName, Arrays.asList(values())));
}
}

View File

@ -0,0 +1,146 @@
package com.alibaba.datax.plugin.reader.obhbasereader.ext;
import com.google.common.base.Preconditions;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import static org.apache.commons.lang3.StringUtils.EMPTY;
public class ServerConnectInfo {
public String clusterName;
public String tenantName;
// userName doesn't contain tenantName or clusterName
public String userName;
public String password;
public String databaseName;
public String ipPort;
public String jdbcUrl;
public String host;
public String port;
public boolean publicCloud;
public int rpcPort;
public String sysUser;
public String sysPass;
/**
*
* @param jdbcUrl format is jdbc:oceanbase//ip:port
* @param username format is cluster:tenant:username or username@tenant#cluster or user@tenant or user
* @param password
*/
public ServerConnectInfo(final String jdbcUrl, final String username, final String password) {
this(jdbcUrl, username, password, null, null);
}
public ServerConnectInfo(final String jdbcUrl, final String username, final String password, final String sysUser, final String sysPass) {
if (jdbcUrl.startsWith(com.alibaba.datax.plugin.rdbms.writer.Constant.OB10_SPLIT_STRING)) {
String[] ss = jdbcUrl.split(com.alibaba.datax.plugin.rdbms.writer.Constant.OB10_SPLIT_STRING_PATTERN);
Preconditions.checkArgument(ss.length == 3, "jdbc url format is not correct:" + jdbcUrl);
this.userName = username;
this.clusterName = ss[1].trim().split(":")[0];
this.tenantName = ss[1].trim().split(":")[1];
this.jdbcUrl = ss[2];
} else {
this.jdbcUrl = jdbcUrl;
}
this.password = password;
this.sysUser = sysUser;
this.sysPass = sysPass;
parseJdbcUrl(jdbcUrl);
parseFullUserName(username);
}
private void parseJdbcUrl(final String jdbcUrl) {
Pattern pattern = Pattern.compile("//([\\w\\.\\-]+:\\d+)/([\\w-]+)\\?");
Matcher matcher = pattern.matcher(jdbcUrl);
if (matcher.find()) {
String ipPort = matcher.group(1);
String dbName = matcher.group(2);
this.ipPort = ipPort;
String[] hostPort = ipPort.split(":");
this.host = hostPort[0];
this.port = hostPort[1];
this.databaseName = dbName;
this.publicCloud = host.endsWith("aliyuncs.com");
} else {
throw new RuntimeException("Invalid argument:" + jdbcUrl);
}
}
private void parseFullUserName(final String fullUserName) {
int tenantIndex = fullUserName.indexOf("@");
int clusterIndex = fullUserName.indexOf("#");
// 适用于jdbcUrl以||_dsc_ob10_dsc_开头的场景
if (fullUserName.contains(":") && tenantIndex < 0) {
String[] names = fullUserName.split(":");
if (names.length != 3) {
throw new RuntimeException("invalid argument: " + fullUserName);
} else {
this.clusterName = names[0];
this.tenantName = names[1];
this.userName = names[2];
}
} else if (tenantIndex < 0) {
// 适用于short jdbcUrl且username中不含租户名主要是公有云场景此场景下不计算分区
this.userName = fullUserName;
this.clusterName = EMPTY;
this.tenantName = EMPTY;
} else {
// 适用于short jdbcUrl且username中含租户名
this.userName = fullUserName.substring(0, tenantIndex);
if (clusterIndex < 0) {
this.clusterName = EMPTY;
this.tenantName = fullUserName.substring(tenantIndex + 1);
} else {
this.clusterName = fullUserName.substring(clusterIndex + 1);
this.tenantName = fullUserName.substring(tenantIndex + 1, clusterIndex);
}
}
}
@Override
public String toString() {
return "ServerConnectInfo{" +
"clusterName='" + clusterName + '\'' +
", tenantName='" + tenantName + '\'' +
", userName='" + userName + '\'' +
", password='" + password + '\'' +
", databaseName='" + databaseName + '\'' +
", ipPort='" + ipPort + '\'' +
", jdbcUrl='" + jdbcUrl + '\'' +
", publicCloud=" + publicCloud +
", rpcPort=" + rpcPort +
'}';
}
public String getFullUserName() {
StringBuilder builder = new StringBuilder();
builder.append(userName);
if (publicCloud || (rpcPort != 0 && EMPTY.equals(clusterName))) {
return builder.toString();
}
if (!EMPTY.equals(tenantName)) {
builder.append("@").append(tenantName);
}
if (!EMPTY.equals(clusterName)) {
builder.append("#").append(clusterName);
}
if (EMPTY.equals(this.clusterName) && EMPTY.equals(this.tenantName)) {
return this.userName;
}
return builder.toString();
}
public void setRpcPort(int rpcPort) {
this.rpcPort = rpcPort;
}
public void setSysUser(String sysUser) {
this.sysUser = sysUser;
}
public void setSysPass(String sysPass) {
this.sysPass = sysPass;
}
}

View File

@ -0,0 +1,41 @@
package com.alibaba.datax.plugin.reader.obhbasereader.task;
import com.alibaba.datax.common.element.Column;
import com.alibaba.datax.common.element.Record;
import com.alibaba.datax.common.util.Configuration;
import com.alibaba.datax.plugin.reader.obhbasereader.Constant;
import com.alibaba.datax.plugin.reader.obhbasereader.HbaseColumnCell;
import com.alibaba.datax.plugin.reader.obhbasereader.Key;
import com.alibaba.datax.plugin.reader.obhbasereader.enums.ModeType;
import com.alibaba.datax.plugin.reader.obhbasereader.util.ObHbaseReaderUtil;
import java.io.IOException;
import java.util.HashMap;
import java.util.Map;
public abstract class AbstractHbaseTask {
protected String encoding;
protected String timezone = null;
protected Map<String, HbaseColumnCell> hbaseColumnCellMap;
// 常量字段
protected Map<String, Column> constantMap;
protected ModeType modeType;
public AbstractHbaseTask() {
}
public AbstractHbaseTask(Configuration configuration) {
this.timezone = configuration.getString(Key.TIMEZONE, Constant.DEFAULT_TIMEZONE);
this.encoding = configuration.getString(Key.ENCODING, Constant.DEFAULT_ENCODING);
String mode = configuration.getString(Key.MODE, "Normal");
this.modeType = ModeType.getByTypeName(mode);
this.constantMap = new HashMap<>();
this.hbaseColumnCellMap = ObHbaseReaderUtil.parseColumn(configuration.getList(Key.COLUMN, Map.class), constantMap, encoding, timezone);
}
public abstract void prepare() throws Exception;
public abstract boolean fetchLine(Record record) throws Exception;
public abstract void close() throws IOException;
}

View File

@ -0,0 +1,99 @@
package com.alibaba.datax.plugin.reader.obhbasereader.task;
import com.alibaba.datax.common.util.Configuration;
import com.alibaba.datax.plugin.reader.obhbasereader.Constant;
import com.alibaba.datax.plugin.reader.obhbasereader.HTableManager;
import com.alibaba.datax.plugin.reader.obhbasereader.HbaseColumnCell;
import com.alibaba.datax.plugin.reader.obhbasereader.Key;
import com.alibaba.datax.plugin.reader.obhbasereader.util.ObHbaseReaderUtil;
import com.alipay.oceanbase.hbase.OHTable;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.client.ResultScanner;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.util.Bytes;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.IOException;
public abstract class AbstractScanReader extends AbstractHbaseTask {
private static Logger LOG = LoggerFactory.getLogger(AbstractScanReader.class);
protected OHTable ohtable;
protected Result lastResult = null;
protected Scan scan;
protected ResultScanner resultScanner;
protected int maxVersion;
private int scanCache;
private byte[] startKey = null;
private byte[] endKey = null;
public AbstractScanReader(Configuration configuration) {
super(configuration);
this.maxVersion = configuration.getInt(Key.MAX_VERSION, 1);
this.scanCache = configuration.getInt(Key.SCAN_CACHE, Constant.DEFAULT_SCAN_CACHE);
this.ohtable = ObHbaseReaderUtil.initOHtable(configuration);
this.startKey = ObHbaseReaderUtil.convertInnerStartRowkey(configuration);
this.endKey = ObHbaseReaderUtil.convertInnerEndRowkey(configuration);
LOG.info("The task set startRowkey=[{}], endRowkey=[{}].", Bytes.toStringBinary(this.startKey), Bytes.toStringBinary(this.endKey));
}
@Override
public void prepare() throws Exception {
this.scan = new Scan();
this.scan.setSmall(false);
this.scan.setCacheBlocks(false);
this.scan.setStartRow(startKey);
this.scan.setStopRow(endKey);
LOG.info("The task set startRowkey=[{}], endRowkey=[{}].", Bytes.toStringBinary(this.startKey), Bytes.toStringBinary(this.endKey));
this.scan.setCaching(this.scanCache);
if (this.maxVersion == -1 || this.maxVersion == Integer.MAX_VALUE) {
this.scan.setMaxVersions();
} else {
this.scan.setMaxVersions(this.maxVersion);
}
initScanColumns();
this.resultScanner = this.ohtable.getScanner(this.scan);
}
@Override
public void close() throws IOException {
if (this.resultScanner != null) {
this.resultScanner.close();
}
HTableManager.closeHTable(this.ohtable);
}
protected void initScanColumns() {
boolean isConstant;
boolean isRowkeyColumn;
for (HbaseColumnCell cell : this.hbaseColumnCellMap.values()) {
isConstant = cell.isConstant();
isRowkeyColumn = ObHbaseReaderUtil.isRowkeyColumn(cell.getColumnName());
if (!isConstant && !isRowkeyColumn) {
LOG.info("columnFamily: " + new String(cell.getCf()) + ", qualifier: " + new String(cell.getQualifier()));
this.scan.addColumn(cell.getCf(), cell.getQualifier());
}
}
}
protected Result getNextHbaseRow() throws Exception {
Result result = null;
try {
result = resultScanner.next();
} catch (Exception e) {
LOG.error("failed to get result", e);
if (lastResult != null) {
scan.setStartRow(lastResult.getRow());
}
resultScanner = this.ohtable.getScanner(scan);
result = resultScanner.next();
if (lastResult != null && Bytes.equals(lastResult.getRow(), result.getRow())) {
result = resultScanner.next();
}
}
lastResult = result;
// may be null
return result;
}
}

View File

@ -0,0 +1,257 @@
package com.alibaba.datax.plugin.reader.obhbasereader.task;
import static com.alibaba.datax.plugin.reader.obhbasereader.Constant.OB_READ_HINT;
import com.alibaba.datax.common.element.Column;
import com.alibaba.datax.common.element.Record;
import com.alibaba.datax.common.exception.DataXException;
import com.alibaba.datax.common.util.Configuration;
import com.alibaba.datax.plugin.rdbms.util.DBUtil;
import com.alibaba.datax.plugin.rdbms.util.DataBaseType;
import com.alibaba.datax.plugin.reader.obhbasereader.Constant;
import com.alibaba.datax.plugin.reader.obhbasereader.HbaseColumnCell;
import com.alibaba.datax.plugin.reader.obhbasereader.HbaseReaderErrorCode;
import com.alibaba.datax.plugin.reader.obhbasereader.Key;
import com.alibaba.datax.plugin.reader.obhbasereader.enums.FetchVersion;
import com.alibaba.datax.plugin.reader.obhbasereader.util.ObHbaseReaderUtil;
import com.alibaba.datax.plugin.reader.oceanbasev10reader.util.ObReaderUtils;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import org.apache.commons.lang3.StringUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.IOException;
import java.sql.Connection;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.function.Predicate;
import java.util.stream.Collectors;
public class SQLNormalModeReader extends AbstractHbaseTask {
private final static String QUERY_SQL_TEMPLATE = "select %s K, Q, T, V, hex(K) as `hex` from %s %s";
private static Logger LOG = LoggerFactory.getLogger(SQLNormalModeReader.class);
private final Map<String, byte[]> columnMap;
private final Map<String, Long> versionMap;
private final FetchVersion fetchVersion;
private Set<String> columnNames;
private boolean noMoreData = false;
private String querySQL = null;
private Connection conn = null;
private PreparedStatement stmt = null;
private ResultSet rs = null;
private String jdbcUrl = null;
private String columnFamily = null;
private String username = null;
private String password = null;
private int fetchSize = com.alibaba.datax.plugin.reader.obhbasereader.Constant.DEFAULT_FETCH_SIZE;
private long readBatchSize = com.alibaba.datax.plugin.reader.obhbasereader.Constant.DEFAULT_READ_BATCH_SIZE;
private Configuration configuration;
private boolean hasRange = false;
private String[] savepoint = new String[3];
// only used by unit test
protected boolean reuseConn = false;
public SQLNormalModeReader(Configuration configuration) {
this.configuration = configuration;
this.hbaseColumnCellMap = ObHbaseReaderUtil.parseColumn(configuration.getList(Key.COLUMN, Map.class));
if (hbaseColumnCellMap.size() == 0) {
LOG.error("no column cells specified.");
throw new RuntimeException("no column cells specified");
}
columnFamily = ObHbaseReaderUtil.parseColumnFamily(hbaseColumnCellMap.values());
this.columnNames =
hbaseColumnCellMap.keySet().stream().map(e -> ObHbaseReaderUtil.isRowkeyColumn(e) ? Constant.ROWKEY_FLAG : e.substring((columnFamily + ":").length())).collect(Collectors.toSet());
String partInfo = "";
String partName = configuration.getString(Key.PARTITION_NAME, null);
if (partName != null) {
partInfo = "partition(" + partName + ")";
}
String tableName = configuration.getString(Key.TABLE, null);
String hint = configuration.getString(Key.READER_HINT, OB_READ_HINT);
this.hasRange = !StringUtils.isEmpty(configuration.getString(Key.RANGE, null));
this.querySQL = String.format(QUERY_SQL_TEMPLATE, hint, tableName + "$" + columnFamily, partInfo);
if (hasRange) {
this.querySQL = querySQL + " where (" + configuration.getString(Key.RANGE) + ")";
}
this.jdbcUrl = configuration.getString(Key.JDBC_URL, null);
this.username = configuration.getString(Key.USERNAME, null);
this.password = configuration.getString(Key.PASSWORD, null);
this.columnMap = Maps.newHashMap();
this.versionMap = Maps.newHashMap();
this.fetchVersion = FetchVersion.getByDesc(configuration.getString("version", FetchVersion.LATEST.name()));
this.timezone = configuration.getString(Key.TIMEZONE, "UTC");
this.encoding = configuration.getString(Key.ENCODING, Constant.DEFAULT_ENCODING);
this.fetchSize = configuration.getInt(Key.FETCH_SIZE, com.alibaba.datax.plugin.reader.obhbasereader.Constant.DEFAULT_FETCH_SIZE);
this.readBatchSize = configuration.getLong(Key.READ_BATCH_SIZE, com.alibaba.datax.plugin.reader.obhbasereader.Constant.DEFAULT_READ_BATCH_SIZE);
LOG.info("read from jdbcUrl {} with fetchSize {}, readBatchSize {}", jdbcUrl, fetchSize, readBatchSize);
}
private boolean notFinished(String currentKey) throws SQLException {
boolean updateSuccess = updateResultSet();
if (updateSuccess) {
String newKey = rs.getString("K");
return newKey.equals(currentKey);
} else {
noMoreData = true;
Arrays.fill(savepoint, null);
return false;
}
}
private boolean updateResultSet() throws SQLException {
if (rs != null && rs.next()) {
return true;
}
if (savepoint[0] != null) {
int retryLimit = 10;
int retryCount = 0;
String tempQuery = querySQL + (hasRange ? " and " : " where ") + "(K,Q,T) > (unhex(?),?,?) order by K,Q,T limit " + readBatchSize;
while (retryCount < retryLimit) {
retryCount++;
try {
resetConnection();
DBUtil.closeDBResources(rs, stmt, null);
stmt = conn.prepareStatement(tempQuery, ResultSet.TYPE_FORWARD_ONLY, ResultSet.CONCUR_READ_ONLY);
stmt.setFetchSize(fetchSize);
for (int i = 0; i < savepoint.length; i++) {
stmt.setObject(i + 1, savepoint[i]);
}
rs = stmt.executeQuery();
if (rs.next()) {
LOG.info("execute sql: {}, savepoint:[{}]", tempQuery, Arrays.stream(savepoint).map(e -> "'" + e + "'").collect(Collectors.joining(",")));
return true;
}
// All data in this task are read
break;
} catch (Exception ex) {
LOG.error("failed to query sql, will retry {} times", retryCount, ex);
DBUtil.closeDBResources(rs, stmt, conn);
if (retryCount > retryLimit) {
LOG.error("Sql: [{}] executed failed, savepoint:[{}], reason: {}", tempQuery, Arrays.stream(savepoint).map(e -> "'" + e + "'").collect(Collectors.joining(",")),
ex.getMessage());
throw new RuntimeException(ex);
}
}
}
}
return false;
}
@Override
public void prepare() {
int retryLimit = 10;
int retryCount = 0;
while (true) {
retryCount++;
try {
resetConnection();
String tempQuery = querySQL + " order by K,Q,T limit " + readBatchSize;
stmt = conn.prepareStatement(tempQuery, ResultSet.TYPE_FORWARD_ONLY, ResultSet.CONCUR_READ_ONLY);
stmt.setFetchSize(fetchSize);
LOG.info("execute sql : {}", tempQuery);
rs = stmt.executeQuery();
if (!rs.next()) {
noMoreData = true;
}
break;
} catch (Exception e) {
LOG.error("failed to query sql, will retry {} times", retryCount, e);
DBUtil.closeDBResources(rs, stmt, conn);
if (retryCount > retryLimit) {
LOG.error("Sql: [{}] executed failed, reason: {}", querySQL, e.getMessage());
throw new RuntimeException(e);
}
}
}
}
@Override
public boolean fetchLine(Record record) throws Exception {
try {
if (noMoreData) {
return false;
}
String currentKey = rs.getString("K");
savepoint[0] = rs.getString("hex");
columnMap.put(Constant.ROWKEY_FLAG, currentKey.getBytes());
do {
String columnName = rs.getString("Q");
savepoint[1] = columnName;
if (!this.columnNames.contains(columnName)) {
continue;
}
Long version = rs.getLong("T");
savepoint[2] = String.valueOf(version);
byte[] value = rs.getBytes("V");
Predicate<Long> predicate;
switch (this.fetchVersion) {
case OLDEST:
predicate = v -> v.compareTo(versionMap.getOrDefault(columnName, Long.MIN_VALUE)) > 0;
break;
case LATEST:
predicate = v -> v.compareTo(versionMap.getOrDefault(columnName, Long.MAX_VALUE)) < 0;
break;
default:
throw DataXException.asDataXException(HbaseReaderErrorCode.ILLEGAL_VALUE, "Not support version: " + this.fetchVersion);
}
if (predicate.test(version)) {
versionMap.put(columnName, version);
columnMap.put(columnName, value);
}
} while (notFinished(currentKey));
for (HbaseColumnCell cell : this.hbaseColumnCellMap.values()) {
Column column = null;
if (cell.isConstant()) {
// 对常量字段的处理
column = this.constantMap.get(cell.getColumnName());
} else {
String columnName = ObHbaseReaderUtil.isRowkeyColumn(cell.getColumnName()) ? Constant.ROWKEY_FLAG : cell.getColumnName().substring((columnFamily + ":").length());
byte[] value = null;
if (!columnMap.containsKey(columnName)) {
LOG.debug("{} is not contained in the record with K value={}. consider this record as null record.", columnName, currentKey);
} else {
value = columnMap.get(columnName);
}
column = ObHbaseReaderUtil.buildColumn(value, cell.getColumnType(), encoding, cell.getDateformat(), timezone);
}
record.addColumn(column);
}
} finally {
this.columnMap.clear();
this.versionMap.clear();
}
return true;
}
@Override
public void close() throws IOException {
DBUtil.closeDBResources(rs, stmt, conn);
}
private void resetConnection() throws SQLException {
if (reuseConn && conn != null && !conn.isClosed()) {
return;
}
// set ob_query_timeout and ob_trx_timeout to a large time in case timeout
int queryTimeoutSeconds = 60 * 60 * 48;
String setQueryTimeout = "set ob_query_timeout=" + (queryTimeoutSeconds * 1000 * 1000L);
String setTrxTimeout = "set ob_trx_timeout=" + ((queryTimeoutSeconds + 5) * 1000 * 1000L);
List<String> newSessionConfig = Lists.newArrayList(setQueryTimeout, setTrxTimeout);
List<String> sessionConfig = configuration.getList(Key.SESSION, new ArrayList<>(), String.class);
newSessionConfig.addAll(sessionConfig);
configuration.set(Key.SESSION, newSessionConfig);
conn = DBUtil.getConnection(DataBaseType.MySql, jdbcUrl, this.username, this.password);
}
}

View File

@ -0,0 +1,98 @@
package com.alibaba.datax.plugin.reader.obhbasereader.task;
import com.alibaba.datax.common.element.Column;
import com.alibaba.datax.common.element.LongColumn;
import com.alibaba.datax.common.element.Record;
import com.alibaba.datax.common.exception.DataXException;
import com.alibaba.datax.common.util.Configuration;
import com.alibaba.datax.plugin.reader.obhbasereader.Constant;
import com.alibaba.datax.plugin.reader.obhbasereader.HbaseColumnCell;
import com.alibaba.datax.plugin.reader.obhbasereader.HbaseReaderErrorCode;
import com.alibaba.datax.plugin.reader.obhbasereader.enums.ColumnType;
import com.alibaba.datax.plugin.reader.obhbasereader.util.ObHbaseReaderUtil;
import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.util.Bytes;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.UnsupportedEncodingException;
import java.util.ArrayList;
import java.util.List;
public class ScanMultiVersionReader extends AbstractScanReader {
private final static Logger LOG = LoggerFactory.getLogger(ScanMultiVersionReader.class);
private static byte[] COLON_BYTE;
private List<KeyValue> kvList = new ArrayList<>();
private int currentReadPosition = 0;
// rowKey类型
private ColumnType rowkeyReadoutType = null;
public ScanMultiVersionReader(Configuration configuration) {
super(configuration);
HbaseColumnCell rowKey = hbaseColumnCellMap.get(Constant.ROWKEY_FLAG);
if (rowKey != null && rowKey.getColumnType() != null) {
this.rowkeyReadoutType = rowKey.getColumnType();
} else {
this.rowkeyReadoutType = ColumnType.BYTES;
}
try {
ScanMultiVersionReader.COLON_BYTE = ":".getBytes(encoding);
} catch (UnsupportedEncodingException e) {
throw DataXException.asDataXException(HbaseReaderErrorCode.PREPAR_READ_ERROR, "Failed to get binary of column family and column name colon separator inside the system.", e);
}
}
private void convertKVToLine(KeyValue keyValue, Record record) throws Exception {
byte[] rawRowkey = keyValue.getRow();
long timestamp = keyValue.getTimestamp();
byte[] cfAndQualifierName = Bytes.add(keyValue.getFamily(), ScanMultiVersionReader.COLON_BYTE, keyValue.getQualifier());
record.addColumn(convertBytesToAssignType(this.rowkeyReadoutType, rawRowkey));
record.addColumn(convertBytesToAssignType(ColumnType.STRING, cfAndQualifierName));
// 直接忽略了用户配置的 timestamp 的类型
record.addColumn(new LongColumn(timestamp));
String cfAndQualifierNameStr = Bytes.toString(cfAndQualifierName);
HbaseColumnCell currentCell = hbaseColumnCellMap.get(cfAndQualifierNameStr);
ColumnType valueReadoutType = currentCell != null ? currentCell.getColumnType() : ColumnType.BYTES;
String dateFormat = currentCell != null ? currentCell.getDateformat() : null;
record.addColumn(convertBytesToAssignType(valueReadoutType, keyValue.getValue(), dateFormat));
}
private Column convertBytesToAssignType(ColumnType columnType, byte[] byteArray) throws Exception {
return convertBytesToAssignType(columnType, byteArray, null);
}
private Column convertBytesToAssignType(ColumnType columnType, byte[] byteArray, String dateFormat) throws Exception {
return ObHbaseReaderUtil.buildColumn(byteArray, columnType, encoding, dateFormat, timezone);
}
@Override
public boolean fetchLine(Record record) throws Exception {
Result result;
if (this.kvList.size() == this.currentReadPosition) {
result = getNextHbaseRow();
if (result == null) {
return false;
}
this.kvList = result.list();
if (this.kvList == null) {
return false;
}
this.currentReadPosition = 0;
}
try {
KeyValue keyValue = this.kvList.get(this.currentReadPosition);
convertKVToLine(keyValue, record);
} finally {
this.currentReadPosition++;
}
return true;
}
}

View File

@ -0,0 +1,65 @@
package com.alibaba.datax.plugin.reader.obhbasereader.task;
import com.alibaba.datax.common.element.Column;
import com.alibaba.datax.common.element.Record;
import com.alibaba.datax.common.element.StringColumn;
import com.alibaba.datax.common.util.Configuration;
import com.alibaba.datax.plugin.reader.obhbasereader.HbaseColumnCell;
import com.alibaba.datax.plugin.reader.obhbasereader.enums.ColumnType;
import com.alibaba.datax.plugin.reader.obhbasereader.util.ObHbaseReaderUtil;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.util.Bytes;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
public class ScanNormalModeReader extends AbstractScanReader {
private static Logger LOG = LoggerFactory.getLogger(ScanNormalModeReader.class);
public ScanNormalModeReader(Configuration configuration) {
super(configuration);
this.maxVersion = 1;
}
@Override
public boolean fetchLine(Record record) throws Exception {
Result result = getNextHbaseRow();
if (null == result) {
return false;
}
try {
byte[] hbaseColumnValue;
String columnName;
ColumnType columnType;
byte[] cf;
byte[] qualifier;
for (HbaseColumnCell cell : this.hbaseColumnCellMap.values()) {
columnType = cell.getColumnType();
Column column = null;
if (cell.isConstant()) {
// 对常量字段的处理
column = constantMap.get(cell.getColumnName());
} else {
// 根据列名称获取值
columnName = cell.getColumnName();
if (ObHbaseReaderUtil.isRowkeyColumn(columnName)) {
hbaseColumnValue = result.getRow();
} else {
cf = cell.getCf();
qualifier = cell.getQualifier();
hbaseColumnValue = result.getValue(cf, qualifier);
}
column = ObHbaseReaderUtil.buildColumn(hbaseColumnValue, columnType, super.encoding, cell.getDateformat(), timezone);
}
record.addColumn(column);
}
} catch (Exception e) {
// 注意这里catch的异常期望是byte数组转换失败的情况而实际上string的byte数组转成整数类型是不容易报错的但是转成double类型容易报错
record.setColumn(0, new StringColumn(Bytes.toStringBinary(result.getRow())));
throw e;
}
return true;
}
}

View File

@ -0,0 +1,154 @@
package com.alibaba.datax.plugin.reader.obhbasereader.util;
import com.alibaba.datax.common.exception.DataXException;
import com.alibaba.datax.common.util.Configuration;
import com.alibaba.datax.plugin.reader.obhbasereader.HbaseReaderErrorCode;
import com.alibaba.datax.plugin.reader.obhbasereader.Key;
import com.google.common.collect.Lists;
import org.apache.commons.collections.CollectionUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.util.Pair;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.util.ArrayList;
import java.util.List;
public final class HbaseSplitUtil {
private final static Logger LOG = LoggerFactory.getLogger(HbaseSplitUtil.class);
public static List<Configuration> split(Configuration configuration) {
final List<Configuration> ranges = configuration.getListConfiguration(Key.RANGE);
if (CollectionUtils.isEmpty(ranges)) {
return Lists.newArrayList(configuration);
}
//TODO(yuez) 后续hbase api具备查询region的功能后这里需要添加查询table region的逻辑并且取table region和用户指定的range的交集
List<Configuration> sliceConfs = new ArrayList<>(ranges.size());
for (Configuration range : ranges) {
byte[] startRowKey = convertUserRowkey(range, true);
byte[] endRowKey = convertUserRowkey(range, false);
if (startRowKey.length != 0 && endRowKey.length != 0 && Bytes.compareTo(startRowKey, endRowKey) > 0) {
throw DataXException.asDataXException(HbaseReaderErrorCode.ILLEGAL_VALUE, "The startRowkey in obhbasereader must not be greater than the endRowkey.");
}
Configuration sliceConf = configuration.clone();
sliceConf.remove(Key.RANGE);
String startKeyStr = Bytes.toStringBinary(startRowKey);
String endRowKeyStr = Bytes.toStringBinary(endRowKey);
sliceConf.set(Key.START_ROWKEY, startKeyStr);
sliceConf.set(Key.END_ROWKEY, endRowKeyStr);
sliceConfs.add(sliceConf);
}
return sliceConfs;
}
public static byte[] convertUserRowkey(Configuration configuration, boolean isStart) {
String keyName = isStart ? Key.START_ROWKEY : Key.END_ROWKEY;
String startRowkey = configuration.getString(keyName);
if (StringUtils.isBlank(startRowkey)) {
return HConstants.EMPTY_BYTE_ARRAY;
} else {
boolean isBinaryRowkey = configuration.getBool(Key.IS_BINARY_ROWKEY, false);
return stringToBytes(startRowkey, isBinaryRowkey);
}
}
private static byte[] stringToBytes(String rowkey, boolean isBinaryRowkey) {
if (isBinaryRowkey) {
return Bytes.toBytesBinary(rowkey);
} else {
return Bytes.toBytes(rowkey);
}
}
/**
* 后续hbase api具备查询region的功能后才用得到此方法
*
* @param config
* @param startRowkeyByte
* @param endRowkeyByte
* @param regionRanges
* @return
*/
private static List<Configuration> doSplit(Configuration config, byte[] startRowkeyByte, byte[] endRowkeyByte, Pair<byte[][], byte[][]> regionRanges) {
List<Configuration> configurations = new ArrayList<Configuration>();
for (int i = 0; i < regionRanges.getFirst().length; i++) {
byte[] regionStartKey = regionRanges.getFirst()[i];
byte[] regionEndKey = regionRanges.getSecond()[i];
// 当前的region为最后一个region
// 如果最后一个region的start Key大于用户指定的userEndKey,则最后一个region应该不包含在内
// 注意如果用户指定userEndKey为"",则此判断应该不成立userEndKey为""表示取得最大的region
if (Bytes.compareTo(regionEndKey, HConstants.EMPTY_BYTE_ARRAY) == 0 && (endRowkeyByte.length != 0 && (Bytes.compareTo(regionStartKey, endRowkeyByte) > 0))) {
continue;
}
// 如果当前的region不是最后一个region
// 用户配置的userStartKey大于等于region的endkey,则这个region不应该含在内
if ((Bytes.compareTo(regionEndKey, HConstants.EMPTY_BYTE_ARRAY) != 0) && (Bytes.compareTo(startRowkeyByte, regionEndKey) >= 0)) {
continue;
}
// 如果用户配置的userEndKey小于等于 region的startkey,则这个region不应该含在内
// 注意如果用户指定的userEndKey为"",则次判断应该不成立userEndKey为""表示取得最大的region
if (endRowkeyByte.length != 0 && (Bytes.compareTo(endRowkeyByte, regionStartKey) <= 0)) {
continue;
}
String thisStartKey = getStartKey(startRowkeyByte, regionStartKey);
String thisEndKey = getEndKey(endRowkeyByte, regionEndKey);
Configuration p = config.clone();
p.set(Key.START_ROWKEY, thisStartKey);
p.set(Key.END_ROWKEY, thisEndKey);
LOG.debug("startRowkey:[{}], endRowkey:[{}] .", thisStartKey, thisEndKey);
configurations.add(p);
}
return configurations;
}
private static String getEndKey(byte[] endRowkeyByte, byte[] regionEndKey) {
if (endRowkeyByte == null) { // 由于之前处理过所以传入的userStartKey不可能为null
throw new IllegalArgumentException("userEndKey should not be null!");
}
byte[] tempEndRowkeyByte;
if (endRowkeyByte.length == 0) {
tempEndRowkeyByte = regionEndKey;
} else if (Bytes.compareTo(regionEndKey, HConstants.EMPTY_BYTE_ARRAY) == 0) {
// 为最后一个region
tempEndRowkeyByte = endRowkeyByte;
} else {
if (Bytes.compareTo(endRowkeyByte, regionEndKey) > 0) {
tempEndRowkeyByte = regionEndKey;
} else {
tempEndRowkeyByte = endRowkeyByte;
}
}
return Bytes.toStringBinary(tempEndRowkeyByte);
}
private static String getStartKey(byte[] startRowkeyByte, byte[] regionStarKey) {
if (startRowkeyByte == null) { // 由于之前处理过所以传入的userStartKey不可能为null
throw new IllegalArgumentException("userStartKey should not be null!");
}
byte[] tempStartRowkeyByte;
if (Bytes.compareTo(startRowkeyByte, regionStarKey) < 0) {
tempStartRowkeyByte = regionStarKey;
} else {
tempStartRowkeyByte = startRowkeyByte;
}
return Bytes.toStringBinary(tempStartRowkeyByte);
}
}

View File

@ -0,0 +1,293 @@
package com.alibaba.datax.plugin.reader.obhbasereader.util;
import static com.alibaba.datax.plugin.reader.obhbasereader.enums.ModeType.MultiVersionFixedColumn;
import static com.alipay.oceanbase.hbase.constants.OHConstants.HBASE_OCEANBASE_DATABASE;
import static com.alipay.oceanbase.hbase.constants.OHConstants.HBASE_OCEANBASE_FULL_USER_NAME;
import static com.alipay.oceanbase.hbase.constants.OHConstants.HBASE_OCEANBASE_ODP_ADDR;
import static com.alipay.oceanbase.hbase.constants.OHConstants.HBASE_OCEANBASE_ODP_MODE;
import static com.alipay.oceanbase.hbase.constants.OHConstants.HBASE_OCEANBASE_ODP_PORT;
import static com.alipay.oceanbase.hbase.constants.OHConstants.HBASE_OCEANBASE_PARAM_URL;
import static com.alipay.oceanbase.hbase.constants.OHConstants.HBASE_OCEANBASE_PASSWORD;
import static com.alipay.oceanbase.hbase.constants.OHConstants.HBASE_OCEANBASE_SYS_PASSWORD;
import static com.alipay.oceanbase.hbase.constants.OHConstants.HBASE_OCEANBASE_SYS_USER_NAME;
import com.alibaba.datax.common.element.BoolColumn;
import com.alibaba.datax.common.element.BytesColumn;
import com.alibaba.datax.common.element.Column;
import com.alibaba.datax.common.element.DateColumn;
import com.alibaba.datax.common.element.DoubleColumn;
import com.alibaba.datax.common.element.LongColumn;
import com.alibaba.datax.common.element.StringColumn;
import com.alibaba.datax.common.exception.DataXException;
import com.alibaba.datax.common.util.Configuration;
import com.alibaba.datax.plugin.reader.obhbasereader.Constant;
import com.alibaba.datax.plugin.reader.obhbasereader.HTableManager;
import com.alibaba.datax.plugin.reader.obhbasereader.HbaseColumnCell;
import com.alibaba.datax.plugin.reader.obhbasereader.HbaseReaderErrorCode;
import com.alibaba.datax.plugin.reader.obhbasereader.Key;
import com.alibaba.datax.plugin.reader.obhbasereader.enums.ColumnType;
import com.alibaba.datax.plugin.reader.obhbasereader.enums.ModeType;
import com.alibaba.fastjson.JSON;
import com.alibaba.fastjson.TypeReference;
import com.alipay.oceanbase.hbase.OHTable;
import org.apache.commons.collections.MapUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.lang3.Validate;
import org.apache.commons.lang3.time.DateUtils;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.util.Bytes;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.nio.charset.Charset;
import java.text.SimpleDateFormat;
import java.util.Collection;
import java.util.Date;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public final class ObHbaseReaderUtil {
private static Logger LOG = LoggerFactory.getLogger(ObHbaseReaderUtil.class);
public static void doPretreatment(Configuration originalConfig) {
String mode = ObHbaseReaderUtil.dealMode(originalConfig);
originalConfig.set(Key.MODE, mode);
String encoding = originalConfig.getString(Key.ENCODING, Constant.DEFAULT_ENCODING);
if (!Charset.isSupported(encoding)) {
throw DataXException.asDataXException(HbaseReaderErrorCode.ILLEGAL_VALUE, String.format("The encoding you configured is not supported by hbasereader:[%s]", encoding));
}
originalConfig.set(Key.ENCODING, encoding);
// 此处增强一个检查isBinaryRowkey 配置不能出现在与 hbaseConfig 等配置平级地位
Boolean isBinaryRowkey = originalConfig.getBool(Key.IS_BINARY_ROWKEY);
if (isBinaryRowkey != null) {
throw DataXException.asDataXException(HbaseReaderErrorCode.ILLEGAL_VALUE, String.format("%s cannot be configured here. It should be configured in range.", Key.IS_BINARY_ROWKEY));
}
}
/**
* 对模式以及与模式进行配对的配置进行检查
*/
private static String dealMode(Configuration originalConfig) {
String mode = originalConfig.getString(Key.MODE);
ModeType modeType = ModeType.getByTypeName(mode);
List<Map> column = originalConfig.getList(Key.COLUMN, Map.class);
if (column == null || column.isEmpty()) {
throw DataXException.asDataXException(HbaseReaderErrorCode.REQUIRED_VALUE,
"You have configured the normal mode to read the data in HBase, so you must configure the column in the form of:column:[{\"name\": \"cf0:column0\",\"type\": \"string\"},"
+ "{\"name\": \"cf1:column1\",\"type\": \"long\"}]");
}
// 通过 parse 进行 column 格式的进一步检查
ObHbaseReaderUtil.parseColumn(column);
if (MultiVersionFixedColumn.equals(modeType)) {
Integer maxVersion = originalConfig.getInt(Key.MAX_VERSION);
Validate.notNull(maxVersion, String.format("You have configured thw mode %s to read the data in HBase, so you must configure: maxVersion", mode));
boolean isMaxVersionValid = maxVersion == -1 || maxVersion > 1;
Validate.isTrue(isMaxVersionValid, String.format(
"You have configured the mode %s to read the data in HBase, but the configured maxVersion value is wrong. maxVersion specifies that: - 1 is to read all versions, and cannot be "
+ "configured as 0 or 1 (because 0 or 1, we think the user wants to read the data in normal mode instead of reading in mode %s, the difference is big). If it is greater "
+ "than"
+ " 1, it means to read the latest corresponding number of versions.",
mode, mode));
}
return mode;
}
/**
* 注意convertUserStartRowkey convertInnerStartRowkey前者会受到 isBinaryRowkey 的影响只用于第一次对用户配置的 String 类型的 rowkey 转为二进制时使用而后者约定切分时得到的二进制的 rowkey 回填到配置中时采用
*/
public static byte[] convertInnerStartRowkey(Configuration configuration) {
String startRowkey = configuration.getString(Key.START_ROWKEY);
if (StringUtils.isBlank(startRowkey)) {
return HConstants.EMPTY_BYTE_ARRAY;
}
return Bytes.toBytesBinary(startRowkey);
}
public static byte[] convertInnerEndRowkey(Configuration configuration) {
String endRowkey = configuration.getString(Key.END_ROWKEY);
if (StringUtils.isBlank(endRowkey)) {
return HConstants.EMPTY_BYTE_ARRAY;
}
return Bytes.toBytesBinary(endRowkey);
}
private static void setObHBaseConfig(com.alibaba.datax.common.util.Configuration confFile, org.apache.hadoop.conf.Configuration oHbaseConf) {
boolean odpMode = confFile.getBool(Key.USE_ODP_MODE);
String username = confFile.getString(Key.USERNAME);
String password = confFile.getString(Key.PASSWORD);
String dbName = confFile.getString(Key.DB_NAME);
// oHbaseConf.set(RS_LIST_ACQUIRE_CONNECT_TIMEOUT.getKey(), "500");
// oHbaseConf.set(RS_LIST_ACQUIRE_READ_TIMEOUT.getKey(), "5000");
oHbaseConf.set(HBASE_OCEANBASE_FULL_USER_NAME, username);
oHbaseConf.set(HBASE_OCEANBASE_PASSWORD, password);
// oHbaseConf.set(HBASE_, META_SCANNER_CACHING);
if (odpMode) {
oHbaseConf.setBoolean(HBASE_OCEANBASE_ODP_MODE, true);
oHbaseConf.set(HBASE_OCEANBASE_DATABASE, dbName);
oHbaseConf.set(HBASE_OCEANBASE_ODP_ADDR, confFile.getString(Key.ODP_HOST));
oHbaseConf.setInt(HBASE_OCEANBASE_ODP_PORT, confFile.getInt(Key.ODP_PORT));
} else {
String clusterName = null;
final Pattern pattern = Pattern.compile("([\\w]+)@([\\w]+)#([\\w]+)");
Matcher matcher = pattern.matcher(username);
if (matcher.find()) {
clusterName = matcher.group(3);
} else {
throw new RuntimeException("user name is not in the correct format: user@tenant#cluster");
}
String configUrl = confFile.getString(Key.CONFIG_URL);
if (!configUrl.contains("ObRegion")) {
if (configUrl.contains("?")) {
configUrl += "&ObRegion=" + clusterName;
} else {
configUrl += "?ObRegion=" + clusterName;
}
}
if (!configUrl.contains("database")) {
configUrl += "&database=" + dbName;
}
oHbaseConf.set(HBASE_OCEANBASE_PARAM_URL, configUrl);
oHbaseConf.set(HBASE_OCEANBASE_SYS_USER_NAME, confFile.getString(Key.OB_SYS_USERNAME));
oHbaseConf.set(HBASE_OCEANBASE_SYS_PASSWORD, confFile.getString(Key.OB_SYS_PASSWORD));
}
String hbaseConf = confFile.getString(Key.HBASE_CONFIG);
Map<String, String> map = JSON.parseObject(hbaseConf, new TypeReference<Map<String, String>>() {
});
if (MapUtils.isNotEmpty(map)) {
for (Map.Entry<String, String> entry : map.entrySet()) {
oHbaseConf.set(entry.getKey(), entry.getValue());
}
}
}
/**
* 每次都获取一个新的HTable 注意HTable 本身是线程不安全的
*/
public static OHTable initOHtable(com.alibaba.datax.common.util.Configuration configuration) {
String tableName = configuration.getString(Key.TABLE);
try {
org.apache.hadoop.conf.Configuration oHbaseConf = new org.apache.hadoop.conf.Configuration();
setObHBaseConfig(configuration, oHbaseConf);
return HTableManager.createHTable(oHbaseConf, tableName);
} catch (Exception e) {
LOG.error("init ohTable error, reason: {}", e.getMessage(), e);
throw DataXException.asDataXException(HbaseReaderErrorCode.INIT_TABLE_ERROR, e);
}
}
public static boolean isRowkeyColumn(String columnName) {
return Constant.ROWKEY_FLAG.equalsIgnoreCase(columnName);
}
public static String parseColumnFamily(Collection<HbaseColumnCell> hbaseColumnCells) {
for (HbaseColumnCell columnCell : hbaseColumnCells) {
if (ObHbaseReaderUtil.isRowkeyColumn(columnCell.getColumnName())) {
continue;
}
if (columnCell.getColumnName() == null || columnCell.getColumnName().split(":").length != 2) {
LOG.error("column cell format is unknown: {}", columnCell);
throw new RuntimeException("Column cell format is unknown: " + columnCell);
}
return columnCell.getColumnName().split(":")[0];
}
throw new RuntimeException("parse column family failed.");
}
/**
* 用于解析列配置
*/
public static LinkedHashMap<String, HbaseColumnCell> parseColumn(List<Map> column) {
return parseColumn(column, null, Constant.DEFAULT_ENCODING, Constant.DEFAULT_TIMEZONE);
}
public static LinkedHashMap<String, HbaseColumnCell> parseColumn(List<Map> column, Map<String, Column> constantMap, String encoding, String timezone) {
LinkedHashMap<String, HbaseColumnCell> hbaseColumnCells = new LinkedHashMap<>(column.size());
boolean cacheConstantValue = constantMap != null;
HbaseColumnCell oneColumnCell;
try {
for (Map<String, String> aColumn : column) {
ColumnType type = ColumnType.getByTypeName(aColumn.get("type"));
boolean isRowKey = isRowkeyColumn(aColumn.get("name"));
String columnName = isRowKey ? Constant.ROWKEY_FLAG : aColumn.get("name");
String columnValue = aColumn.get("value");
String dateFormat = aColumn.getOrDefault("format", Constant.DEFAULT_DATE_FORMAT);
Validate.isTrue(StringUtils.isNotBlank(columnName) || StringUtils.isNotBlank(columnValue),
"It is either a combination of type + name + format or a combination of type + value + format. Your configuration is neither of the two. Please check and modify it.");
if (type == ColumnType.DATE) {
if (StringUtils.isBlank(dateFormat)) {
LOG.warn("date format for {} is empty, use default date format 'yyyy-MM-dd HH:mm:ss' instead.", columnName);
}
oneColumnCell = new HbaseColumnCell.Builder(type).columnName(columnName).columnValue(columnValue).dateformat(dateFormat).build();
} else {
oneColumnCell = new HbaseColumnCell.Builder(type).columnName(columnName).columnValue(columnValue).build();
}
hbaseColumnCells.put(columnName, oneColumnCell);
if (cacheConstantValue && oneColumnCell.isConstant()) {
constantMap.put(columnName, buildColumn(columnValue, type, encoding, dateFormat, timezone));
}
}
return hbaseColumnCells;
} catch (Exception e) {
LOG.error("parse column failed, reason:{}", e.getMessage(), e);
throw DataXException.asDataXException(HbaseReaderErrorCode.PARSE_COLUMN_ERROR, e.getMessage());
}
}
public static Column buildColumn(String columnValue, ColumnType columnType, String encoding, String dateformat, String timezone) throws Exception {
return buildColumn(columnValue.getBytes(encoding), columnType, encoding, dateformat, timezone);
}
public static Column buildColumn(byte[] columnValue, ColumnType columnType, String encoding, String dateformat, String timezone) throws Exception {
switch (columnType) {
case BOOLEAN:
return new BoolColumn(columnValue == null ? null : Bytes.toBoolean(columnValue));
case SHORT:
return new LongColumn(columnValue == null ? null : String.valueOf(Bytes.toShort(columnValue)));
case INT:
return new LongColumn(columnValue == null ? null : Bytes.toInt(columnValue));
case LONG:
return new LongColumn(columnValue == null ? null : Bytes.toLong(columnValue));
case BYTES:
return new BytesColumn(columnValue == null ? null : columnValue);
case FLOAT:
return new DoubleColumn(columnValue == null ? null : Bytes.toFloat(columnValue));
case DOUBLE:
return new DoubleColumn(columnValue == null ? null : Bytes.toDouble(columnValue));
case STRING:
return new StringColumn(columnValue == null ? null : new String(columnValue, encoding));
case BINARY_STRING:
return new StringColumn(columnValue == null ? null : Bytes.toStringBinary(columnValue));
case DATE:
String dateValue = Bytes.toStringBinary(columnValue);
String timestamp = null;
try {
long milliSec = Long.parseLong(dateValue);
Date date = new java.util.Date(milliSec);
SimpleDateFormat sdf = new java.text.SimpleDateFormat(dateformat);
sdf.setTimeZone(java.util.TimeZone.getTimeZone(timezone));
timestamp = sdf.format(date);
} catch (Exception e) {
// this is already formatted timestamp
timestamp = dateValue;
}
return columnValue == null ? null : new DateColumn(DateUtils.parseDate(timestamp, dateformat));
default:
throw DataXException.asDataXException(HbaseReaderErrorCode.ILLEGAL_VALUE, "obHbasereader 不支持您配置的列类型:" + columnType);
}
}
}

View File

@ -0,0 +1,190 @@
package com.alibaba.datax.plugin.reader.obhbasereader.util;
import com.alibaba.datax.common.util.Configuration;
import com.alibaba.datax.plugin.rdbms.reader.Constant;
import com.alibaba.datax.plugin.rdbms.util.DBUtil;
import com.alibaba.datax.plugin.rdbms.util.DBUtilErrorCode;
import com.alibaba.datax.plugin.rdbms.util.DataBaseType;
import com.alibaba.datax.plugin.rdbms.util.SplitedSlice;
import com.alibaba.datax.plugin.reader.obhbasereader.Key;
import com.alibaba.datax.plugin.reader.oceanbasev10reader.util.ExecutorTemplate;
import com.alibaba.datax.plugin.reader.oceanbasev10reader.util.ObReaderUtils;
import com.alibaba.datax.plugin.reader.oceanbasev10reader.util.PartInfo;
import com.alibaba.datax.plugin.reader.oceanbasev10reader.util.PartitionSplitUtil;
import com.google.common.base.Preconditions;
import com.google.common.collect.Lists;
import java.sql.Connection;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.sql.Statement;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import java.util.stream.Collectors;
import org.apache.commons.collections.CollectionUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
public class SqlReaderSplitUtil {
public static final String SAMPLE_SQL_TEMPLATE = "SELECT `hex` FROM (SELECT `hex`,K , bucket, ROW_NUMBER() OVER (PARTITION BY bucket ORDER BY K) rn FROM(SELECT %s `hex`, K ,NTILE(%s) OVER "
+ "(ORDER BY K ) bucket FROM (SELECT hex(K) as `hex`, K FROM %s SAMPLE BLOCK(%s)) a) b) c WHERE rn = 1 GROUP BY K ORDER BY K";
public static final String MIDDLE_RANGE_TEMPLATE = "((K) > (unhex('%s'))) AND ((K) <= (unhex('%s')))";
public static final String MIN_MAX_RANGE_TEMPLATE = "((K)<= (unhex('%s'))) or ((K) > (unhex('%s')))";
private static final Logger LOG = LoggerFactory.getLogger(SqlReaderSplitUtil.class);
public static List<Configuration> splitSingleTable(Configuration configuration, String tableName, String columnFamily, int eachTableShouldSplittedNumber, boolean readByPartition) {
List<String> partitionList = Lists.newArrayList();
String tableNameWithCf = tableName + "$" + columnFamily;
PartInfo partInfo = PartitionSplitUtil.getObMySQLPartInfoBySQL(configuration, tableNameWithCf);
if (partInfo.isPartitionTable()) {
partitionList.addAll(partInfo.getPartList());
}
// read all partitions and split job only by partition
if (readByPartition) {
LOG.info("table: [{}] will read only by partition", tableNameWithCf);
return splitSingleTableByPartition(configuration, partitionList);
}
if (eachTableShouldSplittedNumber <= 1) {
LOG.info("total enable splitted number of table: [{}] is {}, no need to split", tableNameWithCf, eachTableShouldSplittedNumber);
return Lists.newArrayList(configuration);
}
// If user specified some partitions to be read,
List<String> userSetPartitions = configuration.getList(Key.PARTITION_NAME, String.class);
if (CollectionUtils.isNotEmpty(userSetPartitions)) {
Set<String> partSet = new HashSet<>(partitionList);
// If partition name does not exist in the table, throw exception directly. Case is sensitive.
userSetPartitions.forEach(e -> Preconditions.checkArgument(partSet.contains(e), "partition %s does not exist in table: %s", e, tableNameWithCf));
partitionList.clear();
partitionList.addAll(userSetPartitions);
}
if (partitionList.isEmpty()) {
LOG.info("table: [{}] is not partitioned, just split table by rowKey.", tableNameWithCf);
List<Configuration> splitConfs = splitSingleTableByRowKey(configuration, tableNameWithCf, eachTableShouldSplittedNumber);
LOG.info("total split count of non-partitioned table :[{}] is {}", tableNameWithCf, splitConfs.size());
return splitConfs;
} else {
ExecutorTemplate<List<Configuration>> template = new ExecutorTemplate<>("split-rows-by-rowkey-" + tableNameWithCf + "-", eachTableShouldSplittedNumber);
int splitNumPerPartition = (int) Math.ceil(1.0d * eachTableShouldSplittedNumber / partitionList.size());
LOG.info("table: [{}] is partitioned, split table by rowKey in parallel. splitNumPerPartition is {}", tableNameWithCf, splitNumPerPartition);
for (String partName : partitionList) {
try {
template.submit(() -> {
Configuration tempConf = configuration.clone();
tempConf.set(Key.PARTITION_NAME, partName);
return splitSingleTableByRowKey(tempConf, tableNameWithCf, splitNumPerPartition);
});
} catch (Throwable th) {
LOG.error("submit split task of table: [{}-{}] failed, reason: {}", tableNameWithCf, partName, th.getMessage(), th);
}
}
List<Configuration> splitConfs = template.waitForResult().stream().flatMap(Collection::stream).collect(Collectors.toList());
LOG.info("total split count of partitioned table :[{}] is {}", tableNameWithCf, splitConfs.size());
return splitConfs;
}
}
private static List<Configuration> splitSingleTableByPartition(Configuration configuration, List<String> partList) {
if (partList == null || partList.isEmpty()) {
return Lists.newArrayList(configuration);
}
List<Configuration> confList = new ArrayList<>();
for (String partName : partList) {
LOG.info("read sub task: reading from partition " + partName);
Configuration conf = configuration.clone();
conf.set(Key.PARTITION_NAME, partName);
confList.add(conf);
}
return confList;
}
/**
* @param configuration
* @param tableNameWithCf
* @param eachTableShouldSplittedNumber
* @return
*/
public static List<Configuration> splitSingleTableByRowKey(Configuration configuration, String tableNameWithCf, int eachTableShouldSplittedNumber) {
String jdbcURL = configuration.getString(Key.JDBC_URL);
String username = configuration.getString(Key.USERNAME);
String password = configuration.getString(Key.PASSWORD);
String hint = configuration.getString(Key.READER_HINT, com.alibaba.datax.plugin.reader.obhbasereader.Constant.OB_READ_HINT);
String partInfo = "";
String partName = configuration.getString(Key.PARTITION_NAME, null);
if (partName != null) {
partInfo = " partition(" + partName + ")";
}
tableNameWithCf += partInfo;
int fetchSize = configuration.getInt(Constant.FETCH_SIZE, com.alibaba.datax.plugin.reader.obhbasereader.Constant.DEFAULT_FETCH_SIZE);
Double percentage = configuration.getDouble(Key.SAMPLE_PERCENTAGE, 0.1);
List<SplitedSlice> slices = new ArrayList<>();
List<Configuration> pluginParams = new ArrayList<>();
// set ob_query_timeout and ob_trx_timeout to a large time in case timeout
int queryTimeoutSeconds = 60 * 60 * 48;
try (Connection conn = DBUtil.getConnection(DataBaseType.MySql, jdbcURL, username, password)) {
String setQueryTimeout = "set ob_query_timeout=" + (queryTimeoutSeconds * 1000 * 1000L);
String setTrxTimeout = "set ob_trx_timeout=" + ((queryTimeoutSeconds + 5) * 1000 * 1000L);
try (Statement stmt = conn.createStatement()) {
stmt.execute(setQueryTimeout);
stmt.execute(setTrxTimeout);
} catch (Exception e) {
LOG.warn("set ob_query_timeout and set ob_trx_timeout failed. reason: {}", e.getMessage(), e);
}
slices = getSplitSqlBySample(conn, tableNameWithCf, fetchSize, percentage, eachTableShouldSplittedNumber, hint);
} catch (Throwable e) {
LOG.warn("query rowkey range failed of table: {}. reason: {}. the table will not be splitted.", tableNameWithCf, e.getMessage(), e);
}
if (!slices.isEmpty()) {
for (SplitedSlice slice : slices) {
Configuration tempConfig = configuration.clone();
tempConfig.set(Key.RANGE, slice.getRange());
pluginParams.add(tempConfig);
}
} else {
Configuration tempConfig = configuration.clone();
pluginParams.add(tempConfig);
}
return pluginParams;
}
/**
* 按照采样方法切分不能直接顺序切分否则可能导致原本属于一行的数据被切分为两行
*
* @param conn
* @param tableName
* @param fetchSize
* @param percentage
* @param adviceNum
* @param hint
* @return List<SplitedSlice>
* @throws SQLException
*/
private static List<SplitedSlice> getSplitSqlBySample(Connection conn, String tableName, int fetchSize, double percentage, int adviceNum, String hint) throws SQLException {
String splitSql = String.format(SAMPLE_SQL_TEMPLATE, hint, adviceNum, tableName, percentage);
LOG.info("split pk [sql={}] is running... ", splitSql);
List<String> boundList = new ArrayList<>();
try (ResultSet rs = DBUtil.query(conn, splitSql, fetchSize)) {
while (rs.next()) {
boundList.add(rs.getString(1));
}
}
if (boundList.size() == 0) {
return new ArrayList<>();
}
List<SplitedSlice> rangeSql = new ArrayList<>();
for (int i = 0; i < boundList.size() - 1; i++) {
String range = String.format(MIDDLE_RANGE_TEMPLATE, boundList.get(i), boundList.get(i + 1));
SplitedSlice slice = new SplitedSlice(boundList.get(i), boundList.get(i + 1), range);
rangeSql.add(slice);
}
String range = String.format(MIN_MAX_RANGE_TEMPLATE, boundList.get(0), boundList.get(boundList.size() - 1));
SplitedSlice slice = new SplitedSlice(null, null, range);
rangeSql.add(slice);
return rangeSql;
}
}

View File

@ -0,0 +1,6 @@
{
"name": "obhbasereader",
"class": "com.alibaba.datax.plugin.reader.obhbasereader.ObHbaseReader",
"description": "useScene: prod. mechanism: Scan to read data.",
"developer": "alibaba"
}

View File

@ -0,0 +1,15 @@
{
"name": "obhbasereader",
"parameter": {
"hbaseConfig": {},
"table": "",
"encoding": "",
"mode": "",
"column": [],
"range": {
"startRowkey": "",
"endRowkey": ""
},
"isBinaryRowkey": true
}
}

View File

@ -0,0 +1,209 @@
OceanBase的table api为应用提供了ObHBase的访问接口因此OceanBase table api的reader与HBase writer的结构和配置方法类似。
1 快速介绍
obhbaseWriter 插件实现了从向ObHbase中写取数据。在底层实现上obhbaseWriter 通过 HBase 的 Java 客户端连接远程 HBase 服务,并通过 put 方式写入obHbase。
1.1支持功能
1、目前obhbasewriter支持的obHbase版本为OceanBase3.x以及4.x版本。
2、目前obhbasewriter支持源端多个字段拼接作为ObHbase 表的 rowkey具体配置参考rowkeyColumn配置
3、写入obhbase的时间戳版本支持用当前时间作为版本指定源端列作为版本指定一个时间 三种方式作为版本;
#### 脚本配置
```json
{
"job": {
"setting": {
"speed": {
"channel": 5
}
},
"content": [
{
"reader": {
"name": "txtfilereader",
"parameter": {
"path": "/normal.txt",
"charset": "UTF-8",
"column": [
{
"index": 0,
"type": "String"
},
{
"index": 1,
"type": "string"
},
{
"index": 2,
"type": "string"
},
{
"index": 3,
"type": "string"
},
{
"index": 4,
"type": "string"
},
{
"index": 5,
"type": "string"
},
{
"index": 6,
"type": "string"
}
],
"fieldDelimiter": ","
}
},
"writer": {
"name": "obhbasewriter",
"parameter": {
"username": "username",
"password": "password",
"writerThreadCount": "20",
"writeBufferHighMark": "2147483647",
"rpcExecuteTimeout": "30000",
"useOdpMode": "false",
"obSysUser": "root",
"obSysPassword": "",
"column": [
{
"index": 0,
"name": "family1:c1",
"type": "string"
},
{
"index": 1,
"name": "family1:c2",
"type": "string"
},
{
"index": 2,
"name": "family1:c3",
"type": "string"
},
{
"index": 3,
"name": "family1:c4",
"type": "string"
},
{
"index": 4,
"name": "family1:c5",
"type": "string"
},
{
"index": 5,
"name": "family1:c6",
"type": "string"
},
{
"index": 6,
"name": "family1:c7",
"type": "string"
}
],
"mode": "normal",
"rowkeyColumn": [
{
"index": 0,
"type": "string"
},
{
"index": 3,
"type": "string"
},
{
"index": 2,
"type": "string"
},
{
"index": 1,
"type": "string"
}
],
"table": "htable3",
"batchSize": "200",
"dbName": "database",
"jdbcUrl": "jdbc:mysql://ip:port/database?"
}
}
}
]
}
}
```
##### 参数解释
- **connection**
公有云和私有云需要配置的信息不同,具体如下:
公有云:
- 数据库用户名;(在外层统一配置)
- 用户密码;(在外层统一配置)
- proxy的jdbc地址
- 数据库名称;
私有云:
- 数据库用户名;(在外层统一配置)
- 用户密码;(在外层统一配置)
- proxy的jdbc地址
- obSysUsersys租户的用户名
- obSysPasssys租户的密码
- configUrl
- 描述可以通过show parameters like 'obConfigUrl' 获得。
- 必须:是
- 默认值:无
- **jdbcUrl**
- 描述连接ob使用的jdbc url支持如下两种格式
- jdbc:mysql://obproxyIp:obproxyPort/db
- 此格式下username需要写成三段式格式
- ||_dsc_ob10_dsc_||集群名:租户名||_dsc_ob10_dsc_||jdbc:mysql://obproxyIp:obproxyPort/db
- 此格式下username仅填写用户名本身无需三段式写法
- 必选:是
- 默认值:无
- **table**
- 描述:所选取的需要同步的表。无需增加列族信息。
- 必选:是
- 默认值:无
- **username**
- 描述访问OceanBase的用户名
- 必选:是
- 默认值:无
- **useOdpMode**
- 描述是否通过proxy连接。无法提供sys租户帐密时需要设置为true
- 必须:否
- 默认值false
- **column**
- 描述要写入的hbase字段。index指定该列对应reader端column的索引从0开始name指定hbase表中的列必须为 列族:列名 的格式type指定写入数据类型用于转换HBase byte[]。配置格式如下:
```json
"column": [ { "index":1, "name": "cf1:q1", "type": "string" }, { "index":2, "name": "cf1:q2", "type": "string" }
```
- 必选:是
- 默认值:无
- **rowkeyColumn**
- 描述要写入的ObHbase的rowkey列。index指定该列对应reader端column的索引从0开始若为常量index为1type指定写入数据类型用于转换HBase byte[]value配置常量常作为多个字段的拼接符。obhbasewriter会将rowkeyColumn中所有列按照配置顺序进行拼接作为写入hbase的rowkey不能全为常量。配置格式如下
```json
"rowkeyColumn": [ { "index":0, "type":"string" }, { "index":-1, "type":"string", "value":"_" } ]
```
- 必选:是
- 默认值:无
- **versionColumn**
- 描述指定写入obhbase的时间戳。支持当前时间、指定时间列指定时间三者选一。若不配置表示用当前时间。index指定对应reader端column的索引从0开始需保证能转换为long,若是Date类型会尝试用yyyy-MM-dd HH:mm:ss和yyyy-MM-dd HH:mm:ss SSS去解析若为指定时间index为1value指定时间的值,long值。配置格式如下
```json
"versionColumn":{ "index":1 }
```
或者
```json
"versionColumn":{ "index":1, "value":123456789 }
```
- 必选:否
- 默认值:无

185
obhbasewriter/pom.xml Normal file
View File

@ -0,0 +1,185 @@
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<parent>
<artifactId>datax-all</artifactId>
<groupId>com.alibaba.datax</groupId>
<version>0.0.1-SNAPSHOT</version>
</parent>
<modelVersion>4.0.0</modelVersion>
<artifactId>obhbasewriter</artifactId>
<groupId>com.alibaba.datax</groupId>
<version>0.0.1-SNAPSHOT</version>
<dependencies>
<dependency>
<groupId>com.alibaba.datax</groupId>
<artifactId>datax-common</artifactId>
<version>${datax-project-version}</version>
<exclusions>
<exclusion>
<artifactId>slf4j-log4j12</artifactId>
<groupId>org.slf4j</groupId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>com.alibaba.datax</groupId>
<artifactId>plugin-rdbms-util</artifactId>
<version>${datax-project-version}</version>
<exclusions>
<exclusion>
<artifactId>guava</artifactId>
<groupId>com.google.guava</groupId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-api</artifactId>
</dependency>
<dependency>
<groupId>ch.qos.logback</groupId>
<artifactId>logback-classic</artifactId>
</dependency>
<dependency>
<groupId>com.alibaba.datax</groupId>
<artifactId>simulator</artifactId>
<version>${datax-project-version}</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.springframework</groupId>
<artifactId>spring-test</artifactId>
<version>4.0.4.RELEASE</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>com.taobao.tddl</groupId>
<artifactId>tddl-client</artifactId>
<exclusions>
<exclusion>
<groupId>com.google.guava</groupId>
<artifactId>guava</artifactId>
</exclusion>
<exclusion>
<groupId>com.taobao.diamond</groupId>
<artifactId>diamond-client</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>com.google.guava</groupId>
<artifactId>guava</artifactId>
<version>33.1.0-jre</version>
</dependency>
<dependency>
<groupId>com.alipay.oceanbase</groupId>
<artifactId>oceanbase-connector-java</artifactId>
<version>3.2.0</version>
</dependency>
<dependency>
<groupId>log4j</groupId>
<artifactId>log4j</artifactId>
<version>1.2.16</version>
</dependency>
<dependency>
<groupId>com.alibaba.toolkit.common</groupId>
<artifactId>toolkit-common-logging</artifactId>
<version>1.10</version>
</dependency>
<dependency>
<groupId>org.json</groupId>
<artifactId>json</artifactId>
<version>20160810</version>
</dependency>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>4.11</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.powermock</groupId>
<artifactId>powermock-module-junit4</artifactId>
<version>1.4.10</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.powermock</groupId>
<artifactId>powermock-api-mockito</artifactId>
<version>1.4.10</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.mockito</groupId>
<artifactId>mockito-core</artifactId>
<version>1.8.5</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>com.oceanbase</groupId>
<artifactId>obkv-hbase-client</artifactId>
<version>0.1.4.2</version>
<exclusions>
<exclusion>
<artifactId>guava</artifactId>
<groupId>com.google.guava</groupId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-core</artifactId>
<version>1.0.3</version>
</dependency>
</dependencies>
<build>
<resources>
<resource>
<directory>src/main/java</directory>
<includes>
<include>**/*.properties</include>
</includes>
</resource>
</resources>
<plugins>
<!-- compiler plugin -->
<plugin>
<artifactId>maven-compiler-plugin</artifactId>
<configuration>
<source>${jdk-version}</source>
<target>${jdk-version}</target>
<encoding>${project-sourceEncoding}</encoding>
</configuration>
</plugin>
<!-- assembly plugin -->
<plugin>
<artifactId>maven-assembly-plugin</artifactId>
<configuration>
<descriptors>
<descriptor>src/main/assembly/package.xml</descriptor>
</descriptors>
<finalName>datax</finalName>
</configuration>
<executions>
<execution>
<id>dwzip</id>
<phase>package</phase>
<goals>
<goal>single</goal>
</goals>
</execution>
</executions>
</plugin>
</plugins>
</build>
</project>

View File

@ -0,0 +1,35 @@
<assembly
xmlns="http://maven.apache.org/plugins/maven-assembly-plugin/assembly/1.1.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/plugins/maven-assembly-plugin/assembly/1.1.0 http://maven.apache.org/xsd/assembly-1.1.0.xsd">
<id></id>
<formats>
<format>dir</format>
</formats>
<includeBaseDirectory>false</includeBaseDirectory>
<fileSets>
<fileSet>
<directory>src/main/resources</directory>
<includes>
<include>plugin.json</include>
<include>plugin_job_template.json</include>
</includes>
<outputDirectory>plugin/writer/obhbasewriter</outputDirectory>
</fileSet>
<fileSet>
<directory>target/</directory>
<includes>
<include>obhbasewriter-0.0.1-SNAPSHOT.jar</include>
</includes>
<outputDirectory>plugin/writer/obhbasewriter</outputDirectory>
</fileSet>
</fileSets>
<dependencySets>
<dependencySet>
<useProjectArtifact>false</useProjectArtifact>
<outputDirectory>plugin/writer/obhbasewriter/libs</outputDirectory>
<scope>runtime</scope>
</dependencySet>
</dependencySets>
</assembly>

View File

@ -0,0 +1,50 @@
package com.alibaba.datax.plugin.writer.obhbasewriter;
import com.alibaba.datax.common.exception.DataXException;
import com.alibaba.datax.common.util.MessageSource;
import java.util.Arrays;
import org.apache.commons.lang.StringUtils;
/**
* 只对 normal 模式读取时有用多版本读取时不存在列类型的
*/
public enum ColumnType {
STRING("string"),
BINARY_STRING("binarystring"),
BYTES("bytes"),
BOOLEAN("boolean"),
SHORT("short"),
INT("int"),
LONG("long"),
FLOAT("float"),
DOUBLE("double"),
DATE("date"),
BINARY("binary");
private String typeName;
ColumnType(String typeName) {
this.typeName = typeName;
}
public static ColumnType getByTypeName(String typeName) {
if (StringUtils.isBlank(typeName)) {
throw DataXException.asDataXException(Hbase094xWriterErrorCode.ILLEGAL_VALUE, MessageSource.loadResourceBundle(ColumnType.class).message("columntype.1", typeName, Arrays.asList(values())));
}
for (ColumnType columnType : values()) {
if (StringUtils.equalsIgnoreCase(columnType.typeName, typeName.trim())) {
return columnType;
}
}
throw DataXException.asDataXException(Hbase094xWriterErrorCode.ILLEGAL_VALUE, MessageSource.loadResourceBundle(ColumnType.class).message("columntype.1", typeName, Arrays.asList(values())));
}
@Override
public String toString() {
return this.typeName;
}
}

View File

@ -0,0 +1,42 @@
package com.alibaba.datax.plugin.writer.obhbasewriter;
public interface Config {
String MEMSTORE_THRESHOLD = "memstoreThreshold";
double DEFAULT_MEMSTORE_THRESHOLD = 0.9d;
String MEMSTORE_CHECK_INTERVAL_SECOND = "memstoreCheckIntervalSecond";
long DEFAULT_MEMSTORE_CHECK_INTERVAL_SECOND = 30;
String FAIL_TRY_COUNT = "failTryCount";
int DEFAULT_FAIL_TRY_COUNT = 10000;
String WRITER_THREAD_COUNT = "writerThreadCount";
int DEFAULT_WRITER_THREAD_COUNT = 5;
String CONCURRENT_WRITE = "concurrentWrite";
boolean DEFAULT_CONCURRENT_WRITE = true;
String RS_URL = "rsUrl";
String OB_VERSION = "obVersion";
String TIMEOUT = "timeout";
String PRINT_COST = "printCost";
boolean DEFAULT_PRINT_COST = false;
String COST_BOUND = "costBound";
long DEFAULT_COST_BOUND = 20;
String MAX_ACTIVE_CONNECTION = "maxActiveConnection";
int DEFAULT_MAX_ACTIVE_CONNECTION = 2000;
}

View File

@ -0,0 +1,78 @@
package com.alibaba.datax.plugin.writer.obhbasewriter;
public final class ConfigKey {
public final static String HBASE_CONFIG = "hbaseConfig";
public final static String TABLE = "table";
public final static String DBNAME = "dbName";
public final static String OBCONFIG_URL = "obConfigUrl";
public final static String JDBC_URL = "jdbcUrl";
/**
* mode 可以取 normal 或者 multiVersionFixedColumn 或者 multiVersionDynamicColumn 三个值无默认值
* <p/>
* normal 配合 column(Map 结构的)使用
* <p/>
* multiVersion
*/
public final static String MODE = "mode";
public final static String ROWKEY_COLUMN = "rowkeyColumn";
public final static String VERSION_COLUMN = "versionColumn";
/**
* 默认为 utf8
*/
public final static String ENCODING = "encoding";
public final static String COLUMN = "column";
public static final String INDEX = "index";
public static final String NAME = "name";
public static final String TYPE = "type";
public static final String VALUE = "value";
public static final String FORMAT = "format";
/**
* 默认为 EMPTY_BYTES
*/
public static final String NULL_MODE = "nullMode";
public static final String TRUNCATE = "truncate";
public static final String AUTO_FLUSH = "autoFlush";
public static final String WAL_FLAG = "walFlag";
public static final String WRITE_BUFFER_SIZE = "writeBufferSize";
public static final String MAX_RETRY_COUNT = "maxRetryCount";
public static final String USE_ODP_MODE = "useOdpMode";
public static final String OB_SYS_USER = "obSysUser";
public static final String OB_SYS_PASSWORD = "obSysPassword";
public static final String ODP_HOST = "odpHost";
public static final String ODP_PORT = "odpPort";
public static final String OBHBASE_HTABLE_CLIENT_WRITE_BUFFER = "obhbaseClientWriteBuffer";
public static final String OBHBASE_HTABLE_PUT_WRITE_BUFFER_CHECK = "obhbaseHtablePutWriteBufferCheck";
public static final String WRITE_BUFFER_LOW_MARK = "writeBufferLowMark";
public static final String WRITE_BUFFER_HIGH_MARK = "writeBufferHighMark";
public static final String TABLE_CLIENT_RPC_EXECUTE_TIMEOUT = "rpcExecuteTimeout";
}

View File

@ -0,0 +1,110 @@
package com.alibaba.datax.plugin.writer.obhbasewriter;
import com.alibaba.datax.common.exception.DataXException;
import com.alibaba.datax.common.util.Configuration;
import com.alibaba.datax.common.util.MessageSource;
import com.alibaba.datax.plugin.rdbms.writer.Key;
import java.nio.charset.Charset;
import java.util.List;
/**
* Created by johnxu.xj on Sept 30 2018
*/
public class ConfigValidator {
private static final MessageSource MESSAGE_SOURCE = MessageSource.loadResourceBundle(ConfigValidator.class);
public static void validateParameter(com.alibaba.datax.common.util.Configuration originalConfig) {
originalConfig.getNecessaryValue(Key.USERNAME, Hbase094xWriterErrorCode.REQUIRED_VALUE);
originalConfig.getNecessaryValue(Key.PASSWORD, Hbase094xWriterErrorCode.REQUIRED_VALUE);
// originalConfig.getNecessaryValue(ConfigKey.OBCONFIG_URL, Hbase094xWriterErrorCode.REQUIRED_VALUE);
originalConfig.getNecessaryValue(ConfigKey.TABLE, Hbase094xWriterErrorCode.REQUIRED_VALUE);
originalConfig.getNecessaryValue(ConfigKey.DBNAME, Hbase094xWriterErrorCode.REQUIRED_VALUE);
ConfigValidator.validateMode(originalConfig);
String encoding = originalConfig.getString(ConfigKey.ENCODING, Constant.DEFAULT_ENCODING);
if (!Charset.isSupported(encoding)) {
throw DataXException.asDataXException(Hbase094xWriterErrorCode.ILLEGAL_VALUE, MESSAGE_SOURCE.message("hbase094xhelper.9", encoding));
}
originalConfig.set(ConfigKey.ENCODING, encoding);
}
public static void validateMode(com.alibaba.datax.common.util.Configuration originalConfig) {
String mode = originalConfig.getNecessaryValue(ConfigKey.MODE, Hbase094xWriterErrorCode.REQUIRED_VALUE);
ModeType modeType = ModeType.getByTypeName(mode);
if (ModeType.Normal.equals(modeType)) {
validateRowkeyColumn(originalConfig);
validateColumn(originalConfig);
validateVersionColumn(originalConfig);
}
if (originalConfig.getBool(ConfigKey.USE_ODP_MODE)) {
originalConfig.getNecessaryValue(ConfigKey.ODP_HOST, Hbase094xWriterErrorCode.REQUIRED_VALUE);
originalConfig.getNecessaryValue(ConfigKey.ODP_PORT, Hbase094xWriterErrorCode.REQUIRED_VALUE);
} else {
originalConfig.getNecessaryValue(ConfigKey.OBCONFIG_URL, Hbase094xWriterErrorCode.REQUIRED_VALUE);
originalConfig.getNecessaryValue(ConfigKey.OB_SYS_USER, Hbase094xWriterErrorCode.REQUIRED_VALUE);
}
}
public static void validateColumn(com.alibaba.datax.common.util.Configuration originalConfig) {
List<Configuration> columns = originalConfig.getListConfiguration(ConfigKey.COLUMN);
if (columns == null || columns.isEmpty()) {
throw DataXException.asDataXException(Hbase094xWriterErrorCode.REQUIRED_VALUE, MESSAGE_SOURCE.message("hbase094xhelper.11"));
}
for (Configuration aColumn : columns) {
Integer index = aColumn.getInt(ConfigKey.INDEX);
String type = aColumn.getNecessaryValue(ConfigKey.TYPE, Hbase094xWriterErrorCode.REQUIRED_VALUE);
String name = aColumn.getNecessaryValue(ConfigKey.NAME, Hbase094xWriterErrorCode.REQUIRED_VALUE);
ColumnType.getByTypeName(type);
if (name.split(":").length != 2) {
throw DataXException.asDataXException(Hbase094xWriterErrorCode.ILLEGAL_VALUE, MESSAGE_SOURCE.message("hbase094xhelper.12", name));
}
if (index == null || index < 0) {
throw DataXException.asDataXException(Hbase094xWriterErrorCode.ILLEGAL_VALUE, MESSAGE_SOURCE.message("hbase094xhelper.13"));
}
}
}
public static void validateRowkeyColumn(com.alibaba.datax.common.util.Configuration originalConfig) {
List<Configuration> rowkeyColumn = originalConfig.getListConfiguration(ConfigKey.ROWKEY_COLUMN);
if (rowkeyColumn == null || rowkeyColumn.isEmpty()) {
throw DataXException.asDataXException(Hbase094xWriterErrorCode.REQUIRED_VALUE, MESSAGE_SOURCE.message("hbase094xhelper.14"));
}
int rowkeyColumnSize = rowkeyColumn.size();
//包含{"index":0,"type":"string"} 或者 {"index":-1,"type":"string","value":"_"}
for (Configuration aRowkeyColumn : rowkeyColumn) {
Integer index = aRowkeyColumn.getInt(ConfigKey.INDEX);
String type = aRowkeyColumn.getNecessaryValue(ConfigKey.TYPE, Hbase094xWriterErrorCode.REQUIRED_VALUE);
ColumnType.getByTypeName(type);
if (index == null) {
throw DataXException.asDataXException(Hbase094xWriterErrorCode.REQUIRED_VALUE, MESSAGE_SOURCE.message("hbase094xhelper.15"));
}
//不能只有-1列,即rowkey连接串
if (rowkeyColumnSize == 1 && index == -1) {
throw DataXException.asDataXException(Hbase094xWriterErrorCode.ILLEGAL_VALUE, MESSAGE_SOURCE.message("hbase094xhelper.16"));
}
if (index == -1) {
aRowkeyColumn.getNecessaryValue(ConfigKey.VALUE, Hbase094xWriterErrorCode.REQUIRED_VALUE);
}
}
}
public static void validateVersionColumn(com.alibaba.datax.common.util.Configuration originalConfig) {
Configuration versionColumn = originalConfig.getConfiguration(ConfigKey.VERSION_COLUMN);
//为null,表示用当前时间;指定列,需要index
if (versionColumn != null) {
Integer index = versionColumn.getInt(ConfigKey.INDEX);
if (index == null) {
throw DataXException.asDataXException(Hbase094xWriterErrorCode.REQUIRED_VALUE, MESSAGE_SOURCE.message("hbase094xhelper.17"));
}
if (index == -1) {
//指定时间,需要index=-1,value
versionColumn.getNecessaryValue(ConfigKey.VALUE, Hbase094xWriterErrorCode.REQUIRED_VALUE);
} else if (index < 0) {
throw DataXException.asDataXException(Hbase094xWriterErrorCode.ILLEGAL_VALUE, MESSAGE_SOURCE.message("hbase094xhelper.18"));
}
}
}
}

View File

@ -0,0 +1,27 @@
package com.alibaba.datax.plugin.writer.obhbasewriter;
import ch.qos.logback.classic.Level;
public final class Constant {
public static final String DEFAULT_ENCODING = "UTF-8";
public static final String DEFAULT_DATA_FORMAT = "yyyy-MM-dd HH:mm:ss";
public static final String DEFAULT_NULL_MODE = "skip";
public static final long DEFAULT_WRITE_BUFFER_SIZE = 8 * 1024 * 1024;
public static final long DEFAULT_MEMSTORE_CHECK_INTERVAL_SECOND = 30;
public static final double DEFAULT_MEMSTORE_THRESHOLD = 0.9d;
public static final int DEFAULT_FAIL_TRY_COUNT = 10000;
public static final String OB_TABLE_CLIENT_PROPERTY = "logging.path.com.alipay.oceanbase-table-client";
public static final String OB_TABLE_HBASE_PROPERTY = "logging.path.com.alipay.oceanbase-table-hbase";
public static final String OB_TABLE_CLIENT_LOG_LEVEL = "logging.level.oceanbase-table-client";
public static final String OB_TABLE_HBASE_LOG_LEVEL = "logging.level.oceanbase-table-hbase";
public static final String OB_COM_ALIPAY_TABLE_CLIENT_LOG_LEVEL = "logging.level.com.alipay.oceanbase-table-client";
public static final String OB_COM_ALIPAY_TABLE_HBASE_LOG_LEVEL = "logging.level.com.alipay.oceanbase-table-hbase";
public static final String OB_HBASE_LOG_PATH = System.getProperty("datax.home") + "/log/";
public static final String DEFAULT_OB_TABLE_CLIENT_LOG_LEVEL = Level.OFF.toString();
public static final String DEFAULT_OB_TABLE_HBASE_LOG_LEVEL = Level.OFF.toString();
public static final String DEFAULT_NETTY_BUFFER_LOW_WATERMARK = Integer.toString(512 * 1024);
public static final String DEFAULT_NETTY_BUFFER_HIGH_WATERMARK = Integer.toString(1024 * 1024);
public static final String DEFAULT_HBASE_HTABLE_CLIENT_WRITE_BUFFER = "2097152";
public static final String DEFAULT_HBASE_HTABLE_PUT_WRITE_BUFFER_CHECK = "10";
public static final String DEFAULT_RPC_EXECUTE_TIMEOUT = "3000";
}

View File

@ -0,0 +1,44 @@
package com.alibaba.datax.plugin.writer.obhbasewriter;
import com.alibaba.datax.common.spi.ErrorCode;
import com.alibaba.datax.common.util.MessageSource;
/**
* Created by shf on 16/3/8.
*/
public enum Hbase094xWriterErrorCode implements ErrorCode {
REQUIRED_VALUE("Hbasewriter-00", MessageSource.loadResourceBundle(Hbase094xWriterErrorCode.class).message("errorcode.required_value")),
ILLEGAL_VALUE("Hbasewriter-01", MessageSource.loadResourceBundle(Hbase094xWriterErrorCode.class).message("errorcode.illegal_value")),
GET_HBASE_CONFIG_ERROR("Hbasewriter-02", MessageSource.loadResourceBundle(Hbase094xWriterErrorCode.class).message("errorcode.get_hbase_config_error")),
GET_HBASE_TABLE_ERROR("Hbasewriter-03", MessageSource.loadResourceBundle(Hbase094xWriterErrorCode.class).message("errorcode.get_hbase_table_error")),
CLOSE_HBASE_AMIN_ERROR("Hbasewriter-05", MessageSource.loadResourceBundle(Hbase094xWriterErrorCode.class).message("errorcode.close_hbase_amin_error")),
CLOSE_HBASE_TABLE_ERROR("Hbasewriter-06", MessageSource.loadResourceBundle(Hbase094xWriterErrorCode.class).message("errorcode.close_hbase_table_error")),
PUT_HBASE_ERROR("Hbasewriter-07", MessageSource.loadResourceBundle(Hbase094xWriterErrorCode.class).message("errorcode.put_hbase_error")),
DELETE_HBASE_ERROR("Hbasewriter-08", MessageSource.loadResourceBundle(Hbase094xWriterErrorCode.class).message("errorcode.delete_hbase_error")),
TRUNCATE_HBASE_ERROR("Hbasewriter-09", MessageSource.loadResourceBundle(Hbase094xWriterErrorCode.class).message("errorcode.truncate_hbase_error")),
CONSTRUCT_ROWKEY_ERROR("Hbasewriter-10", MessageSource.loadResourceBundle(Hbase094xWriterErrorCode.class).message("errorcode.construct_rowkey_error")),
CONSTRUCT_VERSION_ERROR("Hbasewriter-11", MessageSource.loadResourceBundle(Hbase094xWriterErrorCode.class).message("errorcode.construct_version_error")),
INIT_ERROR("Hbasewriter-12", MessageSource.loadResourceBundle(Hbase094xWriterErrorCode.class).message("errorcode.init_error"));
private final String code;
private final String description;
private Hbase094xWriterErrorCode(String code, String description) {
this.code = code;
this.description = description;
}
@Override
public String getCode() {
return this.code;
}
@Override
public String getDescription() {
return this.description;
}
@Override
public String toString() {
return String.format("Code:[%s], Description:[%s].", this.code, this.description);
}
}

View File

@ -0,0 +1,30 @@
package com.alibaba.datax.plugin.writer.obhbasewriter;
import java.util.Arrays;
import com.alibaba.datax.common.exception.DataXException;
import com.alibaba.datax.common.util.MessageSource;
public enum ModeType {
Normal("normal"),
MultiVersion("multiVersion");
private String mode;
ModeType(String mode) {
this.mode = mode.toLowerCase();
}
public String getMode() {
return mode;
}
public static ModeType getByTypeName(String modeName) {
for (ModeType modeType : values()) {
if (modeType.mode.equalsIgnoreCase(modeName)) {
return modeType;
}
}
throw DataXException.asDataXException(Hbase094xWriterErrorCode.ILLEGAL_VALUE, MessageSource.loadResourceBundle(ModeType.class).message("modetype.1", modeName, Arrays.asList(values())));
}
}

View File

@ -0,0 +1,30 @@
package com.alibaba.datax.plugin.writer.obhbasewriter;
import java.util.Arrays;
import com.alibaba.datax.common.exception.DataXException;
import com.alibaba.datax.common.util.MessageSource;
public enum NullModeType {
Skip("skip"),
Empty("empty");
private String mode;
NullModeType(String mode) {
this.mode = mode.toLowerCase();
}
public String getMode() {
return mode;
}
public static NullModeType getByTypeName(String modeName) {
for (NullModeType modeType : values()) {
if (modeType.mode.equalsIgnoreCase(modeName)) {
return modeType;
}
}
throw DataXException.asDataXException(Hbase094xWriterErrorCode.ILLEGAL_VALUE, MessageSource.loadResourceBundle(NullModeType.class).message("nullmodetype.1", modeName, Arrays.asList(values())));
}
}

View File

@ -0,0 +1,108 @@
/*
* Copyright (c) 2021 OceanBase ob-loader-dumper is licensed under Mulan PSL v2. You can use this software according to
* the terms and conditions of the Mulan PSL v2. You may obtain a copy of Mulan PSL v2 at:
*
* http://license.coscl.org.cn/MulanPSL2
*
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING
* BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. See the Mulan PSL v2 for more
* details.
*/
package com.alibaba.datax.plugin.writer.obhbasewriter;
import com.alibaba.datax.common.util.Configuration;
import com.alibaba.datax.plugin.rdbms.reader.Key;
import java.util.ArrayList;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import org.apache.commons.lang3.tuple.Triple;
/**
* @author cjyyz
* @date 2023/03/24
* @since
*/
public class ObHTableInfo {
/**
* 不带列族的表名用于构建OHTable
*/
String tableName;
/**
* 带列族的表名用于分区计算
*/
String fullHbaseTableName;
NullModeType nullModeType;
String encoding;
List<Configuration> columns;
/**
* 记录配置文件中的columns的列族名字段名字段类型避免每次执行插入都解析
* Triple<String, String, String> left 列族名middle 字段名right字段类型
*/
LinkedHashMap<Integer, Triple<String, String, ColumnType>> indexColumnInfoMap;
/**
* 记录配置文件中rowKey的Index常量值字段类型避免每次执行插入都解析
* Triple<Integer, String, ColumnType> left Indexmiddle 常量值right字段类型
*/
List<Triple<Integer, String, ColumnType>> rowKeyElementList;
public ObHTableInfo(Configuration configuration) {
this.nullModeType = NullModeType.getByTypeName(configuration.getString(ConfigKey.NULL_MODE, Constant.DEFAULT_NULL_MODE));
this.encoding = configuration.getString(ConfigKey.ENCODING, Constant.DEFAULT_ENCODING);
this.columns = configuration.getListConfiguration(ConfigKey.COLUMN);
this.indexColumnInfoMap = new LinkedHashMap<>();
configuration.getListConfiguration(ConfigKey.COLUMN).forEach(e -> {
String[] name = e.getString(ConfigKey.NAME).split(":");
indexColumnInfoMap.put(e.getInt(ConfigKey.INDEX), Triple.of(name[0], name[1], ColumnType.getByTypeName(e.getString(ConfigKey.TYPE)))
);
});
this.rowKeyElementList = new ArrayList<>();
configuration.getListConfiguration(ConfigKey.ROWKEY_COLUMN).forEach(e -> {
Integer index = e.getInt(ConfigKey.INDEX);
String constantValue = e.getString(ConfigKey.VALUE);
ColumnType columnType = ColumnType.getByTypeName(e.getString(ConfigKey.TYPE));
rowKeyElementList.add(Triple.of(index, constantValue, columnType));
});
this.tableName = configuration.getString(Key.TABLE);
this.fullHbaseTableName = tableName;
if (!fullHbaseTableName.contains("$")) {
String name = columns.get(0).getString(ConfigKey.NAME);
String familyName = name.split(":")[0];
fullHbaseTableName = fullHbaseTableName + "$" + familyName;
}
}
public String getTableName() {
return tableName;
}
public String getFullHbaseTableName() {
return fullHbaseTableName;
}
public NullModeType getNullModeType() {
return nullModeType;
}
public String getEncoding() {
return encoding;
}
public Map<Integer, Triple<String, String, ColumnType>> getIndexColumnInfoMap() {
return indexColumnInfoMap;
}
public List<Triple<Integer, String, ColumnType>> getRowKeyElementList() {
return rowKeyElementList;
}
}

View File

@ -0,0 +1,267 @@
package com.alibaba.datax.plugin.writer.obhbasewriter;
import static com.alibaba.datax.plugin.writer.obhbasewriter.Constant.DEFAULT_OB_TABLE_CLIENT_LOG_LEVEL;
import static com.alibaba.datax.plugin.writer.obhbasewriter.Constant.DEFAULT_OB_TABLE_HBASE_LOG_LEVEL;
import static com.alibaba.datax.plugin.writer.obhbasewriter.Constant.OB_COM_ALIPAY_TABLE_CLIENT_LOG_LEVEL;
import static com.alibaba.datax.plugin.writer.obhbasewriter.Constant.OB_COM_ALIPAY_TABLE_HBASE_LOG_LEVEL;
import static com.alibaba.datax.plugin.writer.obhbasewriter.Constant.OB_HBASE_LOG_PATH;
import static com.alibaba.datax.plugin.writer.obhbasewriter.Constant.OB_TABLE_CLIENT_LOG_LEVEL;
import static com.alibaba.datax.plugin.writer.obhbasewriter.Constant.OB_TABLE_CLIENT_PROPERTY;
import static com.alibaba.datax.plugin.writer.obhbasewriter.Constant.OB_TABLE_HBASE_LOG_LEVEL;
import static com.alibaba.datax.plugin.writer.obhbasewriter.Constant.OB_TABLE_HBASE_PROPERTY;
import com.alibaba.datax.common.exception.DataXException;
import com.alibaba.datax.common.plugin.RecordReceiver;
import com.alibaba.datax.common.spi.Writer;
import com.alibaba.datax.common.util.Configuration;
import com.alibaba.datax.plugin.rdbms.reader.util.ObVersion;
import com.alibaba.datax.plugin.rdbms.util.DBUtil;
import com.alibaba.datax.plugin.rdbms.util.DataBaseType;
import com.alibaba.datax.plugin.rdbms.writer.CommonRdbmsWriter;
import com.alibaba.datax.plugin.rdbms.writer.Key;
import com.alibaba.datax.plugin.writer.obhbasewriter.ext.ServerConnectInfo;
import com.alibaba.datax.plugin.writer.obhbasewriter.task.ObHBaseWriteTask;
import com.google.common.base.Preconditions;
import java.sql.Connection;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.util.List;
import java.util.concurrent.TimeUnit;
import org.apache.commons.lang3.StringUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.util.ArrayList;
/**
*
*/
public class ObHbaseWriter extends Writer {
/**
* Job 中的方法仅执行一次Task 中方法会由框架启动多个 Task 线程并行执行
* <p/>
* 整个 Writer 执行流程是
*
* <pre>
* Job类init-->prepare-->split
*
* Task类init-->prepare-->startWrite-->post-->destroy
* Task类init-->prepare-->startWrite-->post-->destroy
*
* Job类post-->destroy
* </pre>
*/
public static class Job extends Writer.Job {
private Configuration originalConfig = null;
private static final Logger LOG = LoggerFactory.getLogger(Job.class);
/**
* 注意此方法仅执行一次 最佳实践通常在这里对用户的配置进行校验是否缺失必填项有无错误值有没有无关配置项...
* 并给出清晰的报错/警告提示校验通常建议采用静态工具类进行以保证本类结构清晰
*/
@Override
public void init() {
if (System.getProperty(OB_TABLE_CLIENT_PROPERTY) == null) {
LOG.info(OB_TABLE_CLIENT_PROPERTY + " not set");
System.setProperty(OB_TABLE_CLIENT_PROPERTY, OB_HBASE_LOG_PATH);
}
if (System.getProperty(OB_TABLE_HBASE_PROPERTY) == null) {
LOG.info(OB_TABLE_HBASE_PROPERTY + " not set");
System.setProperty(OB_TABLE_HBASE_PROPERTY, OB_HBASE_LOG_PATH);
}
if (System.getProperty(OB_TABLE_CLIENT_LOG_LEVEL) == null) {
LOG.info(OB_TABLE_CLIENT_LOG_LEVEL + " not set");
System.setProperty(OB_TABLE_CLIENT_LOG_LEVEL, DEFAULT_OB_TABLE_CLIENT_LOG_LEVEL);
}
if (System.getProperty(OB_TABLE_HBASE_LOG_LEVEL) == null) {
LOG.info(OB_TABLE_HBASE_LOG_LEVEL + " not set");
System.setProperty(OB_TABLE_HBASE_LOG_LEVEL, DEFAULT_OB_TABLE_HBASE_LOG_LEVEL);
}
if (System.getProperty(OB_COM_ALIPAY_TABLE_CLIENT_LOG_LEVEL) == null) {
LOG.info(OB_COM_ALIPAY_TABLE_CLIENT_LOG_LEVEL + " not set");
System.setProperty(OB_COM_ALIPAY_TABLE_CLIENT_LOG_LEVEL, DEFAULT_OB_TABLE_CLIENT_LOG_LEVEL);
}
if (System.getProperty(OB_COM_ALIPAY_TABLE_HBASE_LOG_LEVEL) == null) {
LOG.info(OB_COM_ALIPAY_TABLE_HBASE_LOG_LEVEL + " not set");
System.setProperty(OB_COM_ALIPAY_TABLE_HBASE_LOG_LEVEL, DEFAULT_OB_TABLE_HBASE_LOG_LEVEL);
}
LOG.info("{} is set to {}, {} is set to {}",
OB_TABLE_CLIENT_PROPERTY, OB_HBASE_LOG_PATH, OB_TABLE_HBASE_PROPERTY, OB_HBASE_LOG_PATH);
this.originalConfig = super.getPluginJobConf();
boolean useOdpMode = originalConfig.getBool(ConfigKey.USE_ODP_MODE, false);
String configUrl = originalConfig.getString(ConfigKey.OBCONFIG_URL, null);
String jdbcUrl = originalConfig.getString(ConfigKey.JDBC_URL, null);
jdbcUrl = DataBaseType.MySql.appendJDBCSuffixForReader(jdbcUrl);
String user = originalConfig.getString(Key.USERNAME, null);
String password = originalConfig.getString(Key.PASSWORD);
ServerConnectInfo serverConnectInfo = new ServerConnectInfo(jdbcUrl, user, password);
if (useOdpMode) {
originalConfig.set(ConfigKey.ODP_HOST, serverConnectInfo.host);
originalConfig.set(ConfigKey.ODP_PORT, serverConnectInfo.port);
} else if (StringUtils.isBlank(configUrl)) {
serverConnectInfo.setSysUser(originalConfig.getString(ConfigKey.OB_SYS_USER));
serverConnectInfo.setSysPass(originalConfig.getString(ConfigKey.OB_SYS_PASSWORD));
try {
originalConfig.set(ConfigKey.OBCONFIG_URL, queryRsUrl(serverConnectInfo));
originalConfig.set(ConfigKey.OB_SYS_USER, serverConnectInfo.sysUser);
originalConfig.set(ConfigKey.OB_SYS_PASSWORD, serverConnectInfo.sysPass);
LOG.info("fetch configUrl success, configUrl is {}", configUrl);
} catch (Exception e) {
LOG.error("fail to get configure url: " + e.getMessage());
throw DataXException.asDataXException(Hbase094xWriterErrorCode.REQUIRED_VALUE, "Missing obConfigUrl");
}
}
if (StringUtils.isBlank(originalConfig.getString(ConfigKey.DBNAME))) {
originalConfig.set(ConfigKey.DBNAME, serverConnectInfo.databaseName);
}
ConfigValidator.validateParameter(this.originalConfig);
}
private String queryRsUrl(ServerConnectInfo serverInfo) {
String configUrl = originalConfig.getString(ConfigKey.OBCONFIG_URL, null);
if (configUrl == null) {
try {
Connection conn = null;
int retry = 0;
final String sysJDBCUrl = serverInfo.jdbcUrl.replace(serverInfo.databaseName, "oceanbase");
do {
try {
if (retry > 0) {
int sleep = retry > 9 ? 500 : 1 << retry;
try {
TimeUnit.SECONDS.sleep(sleep);
} catch (InterruptedException e) {
}
LOG.warn("retry fetch RsUrl the {} times", retry);
}
conn = DBUtil.getConnection(DataBaseType.OceanBase, sysJDBCUrl, serverInfo.sysUser, serverInfo.sysPass);
String sql = "show parameters like 'obconfig_url'";
LOG.info("query param: {}", sql);
PreparedStatement stmt = conn.prepareStatement(sql);
ResultSet result = stmt.executeQuery();
if (result.next()) {
configUrl = result.getString("Value");
}
if (StringUtils.isNotBlank(configUrl)) {
break;
}
} catch (Exception e) {
++retry;
LOG.warn("fetch root server list(rsList) error {}", e.getMessage());
} finally {
DBUtil.closeDBResources(null, conn);
}
} while (retry < 3);
LOG.info("configure url is: " + configUrl);
originalConfig.set(ConfigKey.OBCONFIG_URL, configUrl);
} catch (Exception e) {
LOG.error("Fail to get configure url: {}", e.getMessage(), e);
throw DataXException.asDataXException(Hbase094xWriterErrorCode.REQUIRED_VALUE, "未配置obConfigUrl且无法获取obConfigUrl");
}
}
return configUrl;
}
/**
* 注意此方法仅执行一次 最佳实践如果 Job 中有需要进行数据同步之前的处理可以在此处完成如果没有必要则可以直接去掉
*/
// 一般来说是需要推迟到 task 中进行pre 的执行单表情况例外
@Override
public void prepare() {
}
/**
* 注意此方法仅执行一次 最佳实践通常采用工具静态类完成把 Job 配置切分成多个 Task 配置的工作 这里的
* mandatoryNumber 是强制必须切分的份数
*/
@Override
public List<Configuration> split(int mandatoryNumber) {
// This function does not need any change.
Configuration simplifiedConf = this.originalConfig;
List<Configuration> splitResultConfigs = new ArrayList<Configuration>();
for (int j = 0; j < mandatoryNumber; j++) {
splitResultConfigs.add(simplifiedConf.clone());
}
return splitResultConfigs;
}
/**
* 注意此方法仅执行一次 最佳实践如果 Job 中有需要进行数据同步之后的后续处理可以在此处完成
*/
@Override
public void post() {
// No post supported
}
/**
* 注意此方法仅执行一次 最佳实践通常配合 Job 中的 post() 方法一起完成 Job 的资源释放
*/
@Override
public void destroy() {
}
}
public static class Task extends Writer.Task {
private Configuration taskConfig;
private CommonRdbmsWriter.Task writerTask;
/**
* 注意此方法每个 Task 都会执行一次 最佳实践此处通过对 taskConfig 配置的读取进而初始化一些资源为
* startWrite()做准备
*/
@Override
public void init() {
this.taskConfig = super.getPluginJobConf();
String mode = this.taskConfig.getString(ConfigKey.MODE);
ModeType modeType = ModeType.getByTypeName(mode);
switch (modeType) {
case Normal:
try {
this.writerTask = new ObHBaseWriteTask(this.taskConfig);
} catch (Exception e) {
throw DataXException.asDataXException(Hbase094xWriterErrorCode.INIT_ERROR, "ObHbase writer init error:" + e.getMessage());
}
break;
default:
throw DataXException.asDataXException(Hbase094xWriterErrorCode.ILLEGAL_VALUE, "ObHbase not support this mode type:" + modeType);
}
}
/**
* 注意此方法每个 Task 都会执行一次 最佳实践如果 Task
* 中有需要进行数据同步之前的处理可以在此处完成如果没有必要则可以直接去掉
*/
@Override
public void prepare() {
this.writerTask.prepare(taskConfig);
}
/**
* 注意此方法每个 Task 都会执行一次 最佳实践此处适当封装确保简洁清晰完成数据写入工作
*/
public void startWrite(RecordReceiver recordReceiver) {
this.writerTask.startWrite(recordReceiver, taskConfig, super.getTaskPluginCollector());
}
/**
* 注意此方法每个 Task 都会执行一次 最佳实践如果 Task 中有需要进行数据同步之后的后续处理可以在此处完成
*/
@Override
public void post() {
this.writerTask.post(taskConfig);
}
/**
* 注意此方法每个 Task 都会执行一次 最佳实践通常配合Task 中的 post() 方法一起完成 Task 的资源释放
*/
@Override
public void destroy() {
this.writerTask.destroy(taskConfig);
}
}
}

View File

@ -0,0 +1 @@
databasewriterbuffer.1=\u901a\u8fc7\u89c4\u5219\u8ba1\u7b97\u51fa\u6765\u7684table\u4e0d\u5b58\u5728, \u7b97\u51fa\u7684tableName={0},db={1}, \u8bf7\u68c0\u67e5\u60a8\u914d\u7f6e\u7684\u89c4\u5219.

View File

@ -0,0 +1 @@
databasewriterbuffer.1=The [table] calculated based on the rules does not exist. The calculated [tableName]={0}, [db]={1}. Please check the rules you configured.

View File

@ -0,0 +1 @@
databasewriterbuffer.1=\u901a\u8fc7\u89c4\u5219\u8ba1\u7b97\u51fa\u6765\u7684table\u4e0d\u5b58\u5728, \u7b97\u51fa\u7684tableName={0},db={1}, \u8bf7\u68c0\u67e5\u60a8\u914d\u7f6e\u7684\u89c4\u5219.

View File

@ -0,0 +1 @@
databasewriterbuffer.1=\u901a\u8fc7\u89c4\u5219\u8ba1\u7b97\u51fa\u6765\u7684table\u4e0d\u5b58\u5728, \u7b97\u51fa\u7684tableName={0},db={1}, \u8bf7\u68c0\u67e5\u60a8\u914d\u7f6e\u7684\u89c4\u5219.

View File

@ -0,0 +1 @@
databasewriterbuffer.1=\u901a\u8fc7\u89c4\u5219\u8ba1\u7b97\u51fa\u6765\u7684table\u4e0d\u5b58\u5728, \u7b97\u51fa\u7684tableName={0},db={1}, \u8bf7\u68c0\u67e5\u60a8\u914d\u7f6e\u7684\u89c4\u5219.databasewriterbuffer.1=通過規則計算出來的table不存在, 算出的tableName={0},db={1}, 請檢查您配置的規則.

View File

@ -0,0 +1 @@
databasewriterbuffer.1=\u901a\u8fc7\u89c4\u5219\u8ba1\u7b97\u51fa\u6765\u7684table\u4e0d\u5b58\u5728, \u7b97\u51fa\u7684tableName={0},db={1}, \u8bf7\u68c0\u67e5\u60a8\u914d\u7f6e\u7684\u89c4\u5219.databasewriterbuffer.1=通過規則計算出來的table不存在, 算出的tableName={0},db={1}, 請檢查您配置的規則.

View File

@ -0,0 +1,30 @@
package com.alibaba.datax.plugin.writer.obhbasewriter.ext;
import com.alibaba.datax.common.spi.ErrorCode;
public enum ObDataSourceErrorCode implements ErrorCode {
DESC("ObDataSourceError code", "connect error");
private final String code;
private final String describe;
private ObDataSourceErrorCode(String code, String describe) {
this.code = code;
this.describe = describe;
}
@Override
public String getCode() {
return this.code;
}
@Override
public String getDescription() {
return this.describe;
}
@Override
public String toString() {
return String.format("Code:[%s], Describe:[%s]. ", this.code, this.describe);
}
}

View File

@ -0,0 +1,60 @@
/*
* Copyright (c) 2021 OceanBase ob-loader-dumper is licensed under Mulan PSL v2. You can use this software according to
* the terms and conditions of the Mulan PSL v2. You may obtain a copy of Mulan PSL v2 at:
*
* http://license.coscl.org.cn/MulanPSL2
*
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING
* BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. See the Mulan PSL v2 for more
* details.
*/
package com.alibaba.datax.plugin.writer.obhbasewriter.ext;
import com.alibaba.datax.common.exception.DataXException;
import com.alibaba.datax.plugin.writer.obhbasewriter.Hbase094xWriterErrorCode;
import com.alipay.oceanbase.hbase.OHTable;
import org.apache.hadoop.conf.Configuration;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* @author cjyyz
* @date 2023/03/16
* @since
*/
public class ObHbaseTableHolder {
private static final Logger LOG = LoggerFactory.getLogger(ObHbaseTableHolder.class);
private Configuration configuration;
private String hbaseTableName;
private OHTable ohTable;
public ObHbaseTableHolder(Configuration configuration, String hbaseTableName) {
this.configuration = configuration;
this.hbaseTableName = hbaseTableName;
}
public OHTable getOhTable() {
try {
if (ohTable == null) {
ohTable = new OHTable(configuration, hbaseTableName);
}
return ohTable;
} catch (Exception e) {
LOG.error("build obHTable: {} failed. reason: {}", hbaseTableName, e.getMessage());
throw DataXException.asDataXException(Hbase094xWriterErrorCode.GET_HBASE_TABLE_ERROR, Hbase094xWriterErrorCode.GET_HBASE_TABLE_ERROR.getDescription());
}
}
public void destroy() {
try {
if (ohTable != null) {
ohTable.close();
}
} catch (Exception e) {
LOG.warn("error in closing htable: {}. Reason: {}", hbaseTableName, e.getMessage());
}
}
}

View File

@ -0,0 +1,146 @@
package com.alibaba.datax.plugin.writer.obhbasewriter.ext;
import com.google.common.base.Preconditions;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import static org.apache.commons.lang3.StringUtils.EMPTY;
public class ServerConnectInfo {
public String clusterName;
public String tenantName;
// userName doesn't contain tenantName or clusterName
public String userName;
public String password;
public String databaseName;
public String ipPort;
public String jdbcUrl;
public String host;
public String port;
public boolean publicCloud;
public int rpcPort;
public String sysUser;
public String sysPass;
/**
*
* @param jdbcUrl format is jdbc:oceanbase//ip:port
* @param username format is cluster:tenant:username or username@tenant#cluster or user@tenant or user
* @param password
*/
public ServerConnectInfo(final String jdbcUrl, final String username, final String password) {
this(jdbcUrl, username, password, null, null);
}
public ServerConnectInfo(final String jdbcUrl, final String username, final String password, final String sysUser, final String sysPass) {
if (jdbcUrl.startsWith(com.alibaba.datax.plugin.rdbms.writer.Constant.OB10_SPLIT_STRING)) {
String[] ss = jdbcUrl.split(com.alibaba.datax.plugin.rdbms.writer.Constant.OB10_SPLIT_STRING_PATTERN);
Preconditions.checkArgument(ss.length == 3, "jdbc url format is not correct:" + jdbcUrl);
this.userName = username;
this.clusterName = ss[1].trim().split(":")[0];
this.tenantName = ss[1].trim().split(":")[1];
this.jdbcUrl = ss[2];
} else {
this.jdbcUrl = jdbcUrl;
}
this.password = password;
this.sysUser = sysUser;
this.sysPass = sysPass;
parseJdbcUrl(jdbcUrl);
parseFullUserName(username);
}
private void parseJdbcUrl(final String jdbcUrl) {
Pattern pattern = Pattern.compile("//([\\w\\.\\-]+:\\d+)/([\\w-]+)\\?");
Matcher matcher = pattern.matcher(jdbcUrl);
if (matcher.find()) {
String ipPort = matcher.group(1);
String dbName = matcher.group(2);
this.ipPort = ipPort;
String[] hostPort = ipPort.split(":");
this.host = hostPort[0];
this.port = hostPort[1];
this.databaseName = dbName;
this.publicCloud = host.endsWith("aliyuncs.com");
} else {
throw new RuntimeException("Invalid argument:" + jdbcUrl);
}
}
private void parseFullUserName(final String fullUserName) {
int tenantIndex = fullUserName.indexOf("@");
int clusterIndex = fullUserName.indexOf("#");
// 适用于jdbcUrl以||_dsc_ob10_dsc_开头的场景
if (fullUserName.contains(":") && tenantIndex < 0) {
String[] names = fullUserName.split(":");
if (names.length != 3) {
throw new RuntimeException("invalid argument: " + fullUserName);
} else {
this.clusterName = names[0];
this.tenantName = names[1];
this.userName = names[2];
}
} else if (tenantIndex < 0) {
// 适用于short jdbcUrl且username中不含租户名主要是公有云场景此场景下不计算分区
this.userName = fullUserName;
this.clusterName = EMPTY;
this.tenantName = EMPTY;
} else {
// 适用于short jdbcUrl且username中含租户名
this.userName = fullUserName.substring(0, tenantIndex);
if (clusterIndex < 0) {
this.clusterName = EMPTY;
this.tenantName = fullUserName.substring(tenantIndex + 1);
} else {
this.clusterName = fullUserName.substring(clusterIndex + 1);
this.tenantName = fullUserName.substring(tenantIndex + 1, clusterIndex);
}
}
}
@Override
public String toString() {
return "ServerConnectInfo{" +
"clusterName='" + clusterName + '\'' +
", tenantName='" + tenantName + '\'' +
", userName='" + userName + '\'' +
", password='" + password + '\'' +
", databaseName='" + databaseName + '\'' +
", ipPort='" + ipPort + '\'' +
", jdbcUrl='" + jdbcUrl + '\'' +
", publicCloud=" + publicCloud +
", rpcPort=" + rpcPort +
'}';
}
public String getFullUserName() {
StringBuilder builder = new StringBuilder();
builder.append(userName);
if (publicCloud || (rpcPort != 0 && EMPTY.equals(clusterName))) {
return builder.toString();
}
if (!EMPTY.equals(tenantName)) {
builder.append("@").append(tenantName);
}
if (!EMPTY.equals(clusterName)) {
builder.append("#").append(clusterName);
}
if (EMPTY.equals(this.clusterName) && EMPTY.equals(this.tenantName)) {
return this.userName;
}
return builder.toString();
}
public void setRpcPort(int rpcPort) {
this.rpcPort = rpcPort;
}
public void setSysUser(String sysUser) {
this.sysUser = sysUser;
}
public void setSysPass(String sysPass) {
this.sysPass = sysPass;
}
}

View File

@ -0,0 +1,21 @@
multitablewritertask.1=\u914d\u7f6e\u7684tableList\u4e3a\u591a\u8868\uff0c\u4f46\u672a\u914d\u7f6e\u5206\u8868\u89c4\u5219\uff0c\u8bf7\u68c0\u67e5\u60a8\u7684\u914d\u7f6e
multitablewritertask.2=\u914d\u7f6e\u7684\u591a\u5e93\u4e2d\u7684\u8868\u540d\u6709\u91cd\u590d\u7684\uff0c\u4f46\u672a\u914d\u7f6e\u5206\u5e93\u89c4\u5219\u548c\u5206\u8868\u89c4\u5219\uff0c\u8bf7\u68c0\u67e5\u60a8\u7684\u914d\u7f6e
multitablewritertask.3=\u914d\u7f6e\u7684\u6240\u6709\u8868\u540d\u90fd\u76f8\u540c\uff0c\u4f46\u672a\u914d\u7f6e\u5206\u5e93\u89c4\u5219\uff0c\u8bf7\u68c0\u67e5\u60a8\u7684\u914d\u7f6e
multitablewritertask.4=\u914d\u7f6e\u7684table\u548cdb\u540d\u79f0\u90fd\u76f8\u540c\uff0c\u6b64\u79cd\u56de\u6d41\u65b9\u5f0f\u4e0d\u652f\u6301
multitablewritertask.5=\u5217\u914d\u7f6e\u4fe1\u606f\u6709\u9519\u8bef. \u56e0\u4e3a\u60a8\u914d\u7f6e\u7684\u4efb\u52a1\u4e2d\uff0c\u6e90\u5934\u8bfb\u53d6\u5b57\u6bb5\u6570:{0} \u4e0e \u76ee\u7684\u8868\u8981\u5199\u5165\u7684\u5b57\u6bb5\u6570:{1} \u4e0d\u76f8\u7b49. \u8bf7\u68c0\u67e5\u60a8\u7684\u914d\u7f6e\u5e76\u4f5c\u51fa\u4fee\u6539.
multitablewritertask.6=\u901a\u8fc7\u89c4\u5219\u8ba1\u7b97\u51fa\u6765\u7684tableName\u67e5\u627e\u5bf9\u5e94\u7684db\u4e0d\u5b58\u5728\uff0ctableName={0}, \u8bf7\u68c0\u67e5\u60a8\u914d\u7f6e\u7684\u89c4\u5219.
multitablewritertask.7=\u901a\u8fc7\u89c4\u5219\u8ba1\u7b97\u51fa\u6765\u7684db\u548ctable\u4e0d\u5b58\u5728\uff0c\u7b97\u51fa\u7684dbName={0},tableName={1}, \u8bf7\u68c0\u67e5\u60a8\u914d\u7f6e\u7684\u89c4\u5219.
multitablewritertask.8=\u901a\u8fc7\u89c4\u5219\u8ba1\u7b97\u51fa\u6765\u7684db\u4e0d\u5b58\u5728\uff0c\u7b97\u51fa\u7684dbName={0}, \u8bf7\u68c0\u67e5\u60a8\u914d\u7f6e\u7684\u89c4\u5219.
multitablewritertask.9=\u901a\u8fc7\u89c4\u5219\u8ba1\u7b97\u51fa\u6765\u7684dbName[{0}], \u5b58\u5728\u591a\u5f20\u5206\u8868\uff0c\u8bf7\u914d\u7f6e\u60a8\u7684\u5206\u8868\u89c4\u5219.
multitablewritertask.10=\u9047\u5230OB\u81f4\u547d\u5f02\u5e38,\u56de\u6eda\u6b64\u6b21\u5199\u5165, \u4f11\u7720 5\u5206\u949f,SQLState:{0},ErrorCode:{1}
multitablewritertask.11=\u9047\u5230OB\u53ef\u6062\u590d\u5f02\u5e38,\u56de\u6eda\u6b64\u6b21\u5199\u5165, \u4f11\u7720 1\u5206\u949f,SQLState:{0},ErrorCode:{1}
multitablewritertask.12=\u9047\u5230OB\u5f02\u5e38,\u56de\u6eda\u6b64\u6b21\u5199\u5165, \u4f11\u7720 1\u79d2,\u91c7\u7528\u9010\u6761\u5199\u5165\u63d0\u4ea4,SQLState:{0},ErrorCode:{1}
multitablewritertask.13=\u9047\u5230OB\u5f02\u5e38,\u56de\u6eda\u6b64\u6b21\u5199\u5165, \u91c7\u7528\u9010\u6761\u5199\u5165\u63d0\u4ea4,SQLState:{0},ErrorCode:{1}
multitablewritertask.14=\u5199\u5165\u8868[{0}]\u5931\u8d25,\u4f11\u7720[{1}]\u6beb\u79d2,\u6570\u636e:{2}
multitablewritertask.15=\u5199\u5165\u8868[{0}]\u5b58\u5728\u810f\u6570\u636e,record={1}, \u5199\u5165\u5f02\u5e38\u4e3a:
singletablewritertask.1=\u9047\u5230OB\u81f4\u547d\u5f02\u5e38,\u56de\u6eda\u6b64\u6b21\u5199\u5165, \u4f11\u7720 5\u5206\u949f,SQLState:{0},ErrorCode:{1}
singletablewritertask.2=\u9047\u5230OB\u53ef\u6062\u590d\u5f02\u5e38,\u56de\u6eda\u6b64\u6b21\u5199\u5165, \u4f11\u7720 1\u5206\u949f,SQLState:{0},ErrorCode:{1}
singletablewritertask.3=\u9047\u5230OB\u5f02\u5e38,\u56de\u6eda\u6b64\u6b21\u5199\u5165, \u4f11\u7720 1\u79d2,\u91c7\u7528\u9010\u6761\u5199\u5165\u63d0\u4ea4,SQLState:{0},ErrorCode:{1}
singletablewritertask.4=\u9047\u5230OB\u5f02\u5e38,\u56de\u6eda\u6b64\u6b21\u5199\u5165, \u91c7\u7528\u9010\u6761\u5199\u5165\u63d0\u4ea4,SQLState:{0},ErrorCode:{1}

View File

@ -0,0 +1,21 @@
multitablewritertask.1=The configured [tableList] contains multiple tables but no table splitting rules have been configured. Please check your configuration.
multitablewritertask.2=There are repeated table names in the multiple tables you configured, but no database or table splitting rules have been configured. Please check your configuration.
multitablewritertask.3=All configured tables share the same name, but no database splitting rules have been configured. Please check your configuration.
multitablewritertask.4=The configured table and database share the same name. This back-to-source method is not supported.
multitablewritertask.5=Error in column configuration information. In your configured tasks, the number of source fields to be read: {0} and the number of fields to be written to the target table: {1} are not equivalent. Please check your configuration and make corrections.
multitablewritertask.6=The database that corresponds to the [tableName] calculated based on the rules does not exist. The [tableName]={0}. Please check the rules you configured.
multitablewritertask.7=The database and [table] calculated based on the rules do not exist. The calculated [dbName]={0}, and [tableName]={1}. Please check the rules you configured.
multitablewritertask.8=The database calculated based on the rules does not exist. The calculated [dbName]={0}. Please check the rules you configured.
multitablewritertask.9=The [dbName] [{0}] calculated based on the rules contains multiple sub-tables. Please configure your table splitting rules.
multitablewritertask.10=Fatal exception in OB. Roll back this write and hibernate for five minutes. SQLState: {0}. ErrorCode: {1}
multitablewritertask.11=Recoverable exception in OB. Roll back this write and hibernate for one minute. SQLState: {0}. ErrorCode: {1}
multitablewritertask.12=Exception in OB. Roll back this write and hibernate for one second. Write and submit the records one by one. SQLState: {0}. ErrorCode: {1}
multitablewritertask.13=Exception in OB. Roll back this write. Write and submit the records one by one. SQLState: {0}. ErrorCode: {1}
multitablewritertask.14=Failed to write to table: [{0}]. Hibernate for [{1}] milliseconds. Data: {2}
multitablewritertask.15=writing table [{0}] contains dirty data. Record={1}. Writing exception is:
singletablewritertask.1=Fatal exception in OB. Roll back this write and hibernate for five minutes. SQLState: {0}. ErrorCode: {1}
singletablewritertask.2=Recoverable exception in OB. Roll back this write and hibernate for one minute. SQLState: {0}. ErrorCode: {1}
singletablewritertask.3=Exception in OB. Roll back this write and hibernate for one second. Write and submit the records one by one. SQLState: {0}. ErrorCode: {1}
singletablewritertask.4=Exception in OB. Roll back this write. Write and submit the records one by one. SQLState: {0}. ErrorCode: {1}

View File

@ -0,0 +1,21 @@
multitablewritertask.1=\u914d\u7f6e\u7684tableList\u4e3a\u591a\u8868\uff0c\u4f46\u672a\u914d\u7f6e\u5206\u8868\u89c4\u5219\uff0c\u8bf7\u68c0\u67e5\u60a8\u7684\u914d\u7f6e
multitablewritertask.2=\u914d\u7f6e\u7684\u591a\u5e93\u4e2d\u7684\u8868\u540d\u6709\u91cd\u590d\u7684\uff0c\u4f46\u672a\u914d\u7f6e\u5206\u5e93\u89c4\u5219\u548c\u5206\u8868\u89c4\u5219\uff0c\u8bf7\u68c0\u67e5\u60a8\u7684\u914d\u7f6e
multitablewritertask.3=\u914d\u7f6e\u7684\u6240\u6709\u8868\u540d\u90fd\u76f8\u540c\uff0c\u4f46\u672a\u914d\u7f6e\u5206\u5e93\u89c4\u5219\uff0c\u8bf7\u68c0\u67e5\u60a8\u7684\u914d\u7f6e
multitablewritertask.4=\u914d\u7f6e\u7684table\u548cdb\u540d\u79f0\u90fd\u76f8\u540c\uff0c\u6b64\u79cd\u56de\u6d41\u65b9\u5f0f\u4e0d\u652f\u6301
multitablewritertask.5=\u5217\u914d\u7f6e\u4fe1\u606f\u6709\u9519\u8bef. \u56e0\u4e3a\u60a8\u914d\u7f6e\u7684\u4efb\u52a1\u4e2d\uff0c\u6e90\u5934\u8bfb\u53d6\u5b57\u6bb5\u6570:{0} \u4e0e \u76ee\u7684\u8868\u8981\u5199\u5165\u7684\u5b57\u6bb5\u6570:{1} \u4e0d\u76f8\u7b49. \u8bf7\u68c0\u67e5\u60a8\u7684\u914d\u7f6e\u5e76\u4f5c\u51fa\u4fee\u6539.
multitablewritertask.6=\u901a\u8fc7\u89c4\u5219\u8ba1\u7b97\u51fa\u6765\u7684tableName\u67e5\u627e\u5bf9\u5e94\u7684db\u4e0d\u5b58\u5728\uff0ctableName={0}, \u8bf7\u68c0\u67e5\u60a8\u914d\u7f6e\u7684\u89c4\u5219.
multitablewritertask.7=\u901a\u8fc7\u89c4\u5219\u8ba1\u7b97\u51fa\u6765\u7684db\u548ctable\u4e0d\u5b58\u5728\uff0c\u7b97\u51fa\u7684dbName={0},tableName={1}, \u8bf7\u68c0\u67e5\u60a8\u914d\u7f6e\u7684\u89c4\u5219.
multitablewritertask.8=\u901a\u8fc7\u89c4\u5219\u8ba1\u7b97\u51fa\u6765\u7684db\u4e0d\u5b58\u5728\uff0c\u7b97\u51fa\u7684dbName={0}, \u8bf7\u68c0\u67e5\u60a8\u914d\u7f6e\u7684\u89c4\u5219.
multitablewritertask.9=\u901a\u8fc7\u89c4\u5219\u8ba1\u7b97\u51fa\u6765\u7684dbName[{0}], \u5b58\u5728\u591a\u5f20\u5206\u8868\uff0c\u8bf7\u914d\u7f6e\u60a8\u7684\u5206\u8868\u89c4\u5219.
multitablewritertask.10=\u9047\u5230OB\u81f4\u547d\u5f02\u5e38,\u56de\u6eda\u6b64\u6b21\u5199\u5165, \u4f11\u7720 5\u5206\u949f,SQLState:{0}
multitablewritertask.11=\u9047\u5230OB\u53ef\u6062\u590d\u5f02\u5e38,\u56de\u6eda\u6b64\u6b21\u5199\u5165, \u4f11\u7720 1\u5206\u949f,SQLState:{0}
multitablewritertask.12=\u9047\u5230OB\u5f02\u5e38,\u56de\u6eda\u6b64\u6b21\u5199\u5165, \u4f11\u7720 1\u79d2,\u91c7\u7528\u9010\u6761\u5199\u5165\u63d0\u4ea4,SQLState:{0}
multitablewritertask.13=\u9047\u5230OB\u5f02\u5e38,\u56de\u6eda\u6b64\u6b21\u5199\u5165, \u91c7\u7528\u9010\u6761\u5199\u5165\u63d0\u4ea4,SQLState:{0}
multitablewritertask.14=\u5199\u5165\u8868[{0}]\u5931\u8d25,\u4f11\u7720[{1}]\u6beb\u79d2,\u6570\u636e:{2}
multitablewritertask.15=\u5199\u5165\u8868[{0}]\u5b58\u5728\u810f\u6570\u636e,record={1}, \u5199\u5165\u5f02\u5e38\u4e3a:
singletablewritertask.1=\u9047\u5230OB\u81f4\u547d\u5f02\u5e38,\u56de\u6eda\u6b64\u6b21\u5199\u5165, \u4f11\u7720 5\u5206\u949f,SQLState:{0}
singletablewritertask.2=\u9047\u5230OB\u53ef\u6062\u590d\u5f02\u5e38,\u56de\u6eda\u6b64\u6b21\u5199\u5165, \u4f11\u7720 1\u5206\u949f,SQLState:{0}
singletablewritertask.3=\u9047\u5230OB\u5f02\u5e38,\u56de\u6eda\u6b64\u6b21\u5199\u5165, \u4f11\u7720 1\u79d2,\u91c7\u7528\u9010\u6761\u5199\u5165\u63d0\u4ea4,SQLState:{0}
singletablewritertask.4=\u9047\u5230OB\u5f02\u5e38,\u56de\u6eda\u6b64\u6b21\u5199\u5165, \u91c7\u7528\u9010\u6761\u5199\u5165\u63d0\u4ea4,SQLState:{0}

View File

@ -0,0 +1,21 @@
multitablewritertask.1=\u914d\u7f6e\u7684tableList\u4e3a\u591a\u8868\uff0c\u4f46\u672a\u914d\u7f6e\u5206\u8868\u89c4\u5219\uff0c\u8bf7\u68c0\u67e5\u60a8\u7684\u914d\u7f6e
multitablewritertask.2=\u914d\u7f6e\u7684\u591a\u5e93\u4e2d\u7684\u8868\u540d\u6709\u91cd\u590d\u7684\uff0c\u4f46\u672a\u914d\u7f6e\u5206\u5e93\u89c4\u5219\u548c\u5206\u8868\u89c4\u5219\uff0c\u8bf7\u68c0\u67e5\u60a8\u7684\u914d\u7f6e
multitablewritertask.3=\u914d\u7f6e\u7684\u6240\u6709\u8868\u540d\u90fd\u76f8\u540c\uff0c\u4f46\u672a\u914d\u7f6e\u5206\u5e93\u89c4\u5219\uff0c\u8bf7\u68c0\u67e5\u60a8\u7684\u914d\u7f6e
multitablewritertask.4=\u914d\u7f6e\u7684table\u548cdb\u540d\u79f0\u90fd\u76f8\u540c\uff0c\u6b64\u79cd\u56de\u6d41\u65b9\u5f0f\u4e0d\u652f\u6301
multitablewritertask.5=\u5217\u914d\u7f6e\u4fe1\u606f\u6709\u9519\u8bef. \u56e0\u4e3a\u60a8\u914d\u7f6e\u7684\u4efb\u52a1\u4e2d\uff0c\u6e90\u5934\u8bfb\u53d6\u5b57\u6bb5\u6570:{0} \u4e0e \u76ee\u7684\u8868\u8981\u5199\u5165\u7684\u5b57\u6bb5\u6570:{1} \u4e0d\u76f8\u7b49. \u8bf7\u68c0\u67e5\u60a8\u7684\u914d\u7f6e\u5e76\u4f5c\u51fa\u4fee\u6539.
multitablewritertask.6=\u901a\u8fc7\u89c4\u5219\u8ba1\u7b97\u51fa\u6765\u7684tableName\u67e5\u627e\u5bf9\u5e94\u7684db\u4e0d\u5b58\u5728\uff0ctableName={0}, \u8bf7\u68c0\u67e5\u60a8\u914d\u7f6e\u7684\u89c4\u5219.
multitablewritertask.7=\u901a\u8fc7\u89c4\u5219\u8ba1\u7b97\u51fa\u6765\u7684db\u548ctable\u4e0d\u5b58\u5728\uff0c\u7b97\u51fa\u7684dbName={0},tableName={1}, \u8bf7\u68c0\u67e5\u60a8\u914d\u7f6e\u7684\u89c4\u5219.
multitablewritertask.8=\u901a\u8fc7\u89c4\u5219\u8ba1\u7b97\u51fa\u6765\u7684db\u4e0d\u5b58\u5728\uff0c\u7b97\u51fa\u7684dbName={0}, \u8bf7\u68c0\u67e5\u60a8\u914d\u7f6e\u7684\u89c4\u5219.
multitablewritertask.9=\u901a\u8fc7\u89c4\u5219\u8ba1\u7b97\u51fa\u6765\u7684dbName[{0}], \u5b58\u5728\u591a\u5f20\u5206\u8868\uff0c\u8bf7\u914d\u7f6e\u60a8\u7684\u5206\u8868\u89c4\u5219.
multitablewritertask.10=\u9047\u5230OB\u81f4\u547d\u5f02\u5e38,\u56de\u6eda\u6b64\u6b21\u5199\u5165, \u4f11\u7720 5\u5206\u949f,SQLState:{0}
multitablewritertask.11=\u9047\u5230OB\u53ef\u6062\u590d\u5f02\u5e38,\u56de\u6eda\u6b64\u6b21\u5199\u5165, \u4f11\u7720 1\u5206\u949f,SQLState:{0}
multitablewritertask.12=\u9047\u5230OB\u5f02\u5e38,\u56de\u6eda\u6b64\u6b21\u5199\u5165, \u4f11\u7720 1\u79d2,\u91c7\u7528\u9010\u6761\u5199\u5165\u63d0\u4ea4,SQLState:{0}
multitablewritertask.13=\u9047\u5230OB\u5f02\u5e38,\u56de\u6eda\u6b64\u6b21\u5199\u5165, \u91c7\u7528\u9010\u6761\u5199\u5165\u63d0\u4ea4,SQLState:{0}
multitablewritertask.14=\u5199\u5165\u8868[{0}]\u5931\u8d25,\u4f11\u7720[{1}]\u6beb\u79d2,\u6570\u636e:{2}
multitablewritertask.15=\u5199\u5165\u8868[{0}]\u5b58\u5728\u810f\u6570\u636e,record={1}, \u5199\u5165\u5f02\u5e38\u4e3a:
singletablewritertask.1=\u9047\u5230OB\u81f4\u547d\u5f02\u5e38,\u56de\u6eda\u6b64\u6b21\u5199\u5165, \u4f11\u7720 5\u5206\u949f,SQLState:{0}
singletablewritertask.2=\u9047\u5230OB\u53ef\u6062\u590d\u5f02\u5e38,\u56de\u6eda\u6b64\u6b21\u5199\u5165, \u4f11\u7720 1\u5206\u949f,SQLState:{0}
singletablewritertask.3=\u9047\u5230OB\u5f02\u5e38,\u56de\u6eda\u6b64\u6b21\u5199\u5165, \u4f11\u7720 1\u79d2,\u91c7\u7528\u9010\u6761\u5199\u5165\u63d0\u4ea4,SQLState:{0}
singletablewritertask.4=\u9047\u5230OB\u5f02\u5e38,\u56de\u6eda\u6b64\u6b21\u5199\u5165, \u91c7\u7528\u9010\u6761\u5199\u5165\u63d0\u4ea4,SQLState:{0}

View File

@ -0,0 +1,41 @@
multitablewritertask.1=\u914d\u7f6e\u7684tableList\u4e3a\u591a\u8868\uff0c\u4f46\u672a\u914d\u7f6e\u5206\u8868\u89c4\u5219\uff0c\u8bf7\u68c0\u67e5\u60a8\u7684\u914d\u7f6e
multitablewritertask.2=\u914d\u7f6e\u7684\u591a\u5e93\u4e2d\u7684\u8868\u540d\u6709\u91cd\u590d\u7684\uff0c\u4f46\u672a\u914d\u7f6e\u5206\u5e93\u89c4\u5219\u548c\u5206\u8868\u89c4\u5219\uff0c\u8bf7\u68c0\u67e5\u60a8\u7684\u914d\u7f6e
multitablewritertask.3=\u914d\u7f6e\u7684\u6240\u6709\u8868\u540d\u90fd\u76f8\u540c\uff0c\u4f46\u672a\u914d\u7f6e\u5206\u5e93\u89c4\u5219\uff0c\u8bf7\u68c0\u67e5\u60a8\u7684\u914d\u7f6e
multitablewritertask.4=\u914d\u7f6e\u7684table\u548cdb\u540d\u79f0\u90fd\u76f8\u540c\uff0c\u6b64\u79cd\u56de\u6d41\u65b9\u5f0f\u4e0d\u652f\u6301
multitablewritertask.5=\u5217\u914d\u7f6e\u4fe1\u606f\u6709\u9519\u8bef. \u56e0\u4e3a\u60a8\u914d\u7f6e\u7684\u4efb\u52a1\u4e2d\uff0c\u6e90\u5934\u8bfb\u53d6\u5b57\u6bb5\u6570:{0} \u4e0e \u76ee\u7684\u8868\u8981\u5199\u5165\u7684\u5b57\u6bb5\u6570:{1} \u4e0d\u76f8\u7b49. \u8bf7\u68c0\u67e5\u60a8\u7684\u914d\u7f6e\u5e76\u4f5c\u51fa\u4fee\u6539.
multitablewritertask.6=\u901a\u8fc7\u89c4\u5219\u8ba1\u7b97\u51fa\u6765\u7684tableName\u67e5\u627e\u5bf9\u5e94\u7684db\u4e0d\u5b58\u5728\uff0ctableName={0}, \u8bf7\u68c0\u67e5\u60a8\u914d\u7f6e\u7684\u89c4\u5219.
multitablewritertask.7=\u901a\u8fc7\u89c4\u5219\u8ba1\u7b97\u51fa\u6765\u7684db\u548ctable\u4e0d\u5b58\u5728\uff0c\u7b97\u51fa\u7684dbName={0},tableName={1}, \u8bf7\u68c0\u67e5\u60a8\u914d\u7f6e\u7684\u89c4\u5219.
multitablewritertask.8=\u901a\u8fc7\u89c4\u5219\u8ba1\u7b97\u51fa\u6765\u7684db\u4e0d\u5b58\u5728\uff0c\u7b97\u51fa\u7684dbName={0}, \u8bf7\u68c0\u67e5\u60a8\u914d\u7f6e\u7684\u89c4\u5219.
multitablewritertask.9=\u901a\u8fc7\u89c4\u5219\u8ba1\u7b97\u51fa\u6765\u7684dbName[{0}], \u5b58\u5728\u591a\u5f20\u5206\u8868\uff0c\u8bf7\u914d\u7f6e\u60a8\u7684\u5206\u8868\u89c4\u5219.
multitablewritertask.10=\u9047\u5230OB\u81f4\u547d\u5f02\u5e38,\u56de\u6eda\u6b64\u6b21\u5199\u5165, \u4f11\u7720 5\u5206\u949f,SQLState:{0}
multitablewritertask.11=\u9047\u5230OB\u53ef\u6062\u590d\u5f02\u5e38,\u56de\u6eda\u6b64\u6b21\u5199\u5165, \u4f11\u7720 1\u5206\u949f,SQLState:{0}
multitablewritertask.12=\u9047\u5230OB\u5f02\u5e38,\u56de\u6eda\u6b64\u6b21\u5199\u5165, \u4f11\u7720 1\u79d2,\u91c7\u7528\u9010\u6761\u5199\u5165\u63d0\u4ea4,SQLState:{0}
multitablewritertask.13=\u9047\u5230OB\u5f02\u5e38,\u56de\u6eda\u6b64\u6b21\u5199\u5165, \u91c7\u7528\u9010\u6761\u5199\u5165\u63d0\u4ea4,SQLState:{0}
multitablewritertask.14=\u5199\u5165\u8868[{0}]\u5931\u8d25,\u4f11\u7720[{1}]\u6beb\u79d2,\u6570\u636e:{2}
multitablewritertask.15=\u5199\u5165\u8868[{0}]\u5b58\u5728\u810f\u6570\u636e,record={1}, \u5199\u5165\u5f02\u5e38\u4e3a:
singletablewritertask.1=\u9047\u5230OB\u81f4\u547d\u5f02\u5e38,\u56de\u6eda\u6b64\u6b21\u5199\u5165, \u4f11\u7720 5\u5206\u949f,SQLState:{0}
singletablewritertask.2=\u9047\u5230OB\u53ef\u6062\u590d\u5f02\u5e38,\u56de\u6eda\u6b64\u6b21\u5199\u5165, \u4f11\u7720 1\u5206\u949f,SQLState:{0}
singletablewritertask.3=\u9047\u5230OB\u5f02\u5e38,\u56de\u6eda\u6b64\u6b21\u5199\u5165, \u4f11\u7720 1\u79d2,\u91c7\u7528\u9010\u6761\u5199\u5165\u63d0\u4ea4,SQLState:{0}
singletablewritertask.4=\u9047\u5230OB\u5f02\u5e38,\u56de\u6eda\u6b64\u6b21\u5199\u5165, \u91c7\u7528\u9010\u6761\u5199\u5165\u63d0\u4ea4,SQLState:{0}multitablewritertask.1=配置的tableList為多表但未配置分表規則請檢查您的配置
multitablewritertask.2=配置的多庫中的表名有重複的,但未配置分庫規則和分表規則,請檢查您的配置
multitablewritertask.3=配置的所有表名都相同,但未配置分庫規則,請檢查您的配置
multitablewritertask.4=配置的table和db名稱都相同此種回流方式不支援
multitablewritertask.5=列配置資訊有錯誤. 因為您配置的任務中,源頭讀取欄位數:{0}與 目的表要寫入的欄位數:{1}不相等. 請檢查您的配置並作出修改.
multitablewritertask.6=通過規則計算出來的tableName查找對應的db不存在tableName={0}, 請檢查您配置的規則.
multitablewritertask.7=通過規則計算出來的db和table不存在算出的dbName={0},tableName={1}, 請檢查您配置的規則.
multitablewritertask.8=通過規則計算出來的db不存在算出的dbName={0}, 請檢查您配置的規則.
multitablewritertask.9=通過規則計算出來的dbName[{0}], 存在多張分表,請配置您的分表規則.
multitablewritertask.10=遇到OB致命異常,回滾此次寫入, 休眠 5分鐘,SQLState:{0}
multitablewritertask.11=遇到OB可恢復異常,回滾此次寫入, 休眠 1分鐘,SQLState:{0}
multitablewritertask.12=遇到OB異常,回滾此次寫入, 休眠 1秒,採用逐條寫入提交,SQLState:{0}
multitablewritertask.13=遇到OB異常,回滾此次寫入, 採用逐條寫入提交,SQLState:{0}
multitablewritertask.14=寫入表[{0}]失敗,休眠[{1}]毫秒,數據:{2}
multitablewritertask.15=寫入表[{0}]存在髒數據,record={1}, 寫入異常為:
singletablewritertask.1=遇到OB致命異常,回滾此次寫入, 休眠 5分鐘,SQLState:{0}
singletablewritertask.2=遇到OB可恢復異常,回滾此次寫入, 休眠 1分鐘,SQLState:{0}
singletablewritertask.3=遇到OB異常,回滾此次寫入, 休眠 1秒,採用逐條寫入提交,SQLState:{0}
singletablewritertask.4=遇到OB異常,回滾此次寫入, 採用逐條寫入提交,SQLState:{0}

View File

@ -0,0 +1,41 @@
multitablewritertask.1=\u914d\u7f6e\u7684tableList\u4e3a\u591a\u8868\uff0c\u4f46\u672a\u914d\u7f6e\u5206\u8868\u89c4\u5219\uff0c\u8bf7\u68c0\u67e5\u60a8\u7684\u914d\u7f6e
multitablewritertask.2=\u914d\u7f6e\u7684\u591a\u5e93\u4e2d\u7684\u8868\u540d\u6709\u91cd\u590d\u7684\uff0c\u4f46\u672a\u914d\u7f6e\u5206\u5e93\u89c4\u5219\u548c\u5206\u8868\u89c4\u5219\uff0c\u8bf7\u68c0\u67e5\u60a8\u7684\u914d\u7f6e
multitablewritertask.3=\u914d\u7f6e\u7684\u6240\u6709\u8868\u540d\u90fd\u76f8\u540c\uff0c\u4f46\u672a\u914d\u7f6e\u5206\u5e93\u89c4\u5219\uff0c\u8bf7\u68c0\u67e5\u60a8\u7684\u914d\u7f6e
multitablewritertask.4=\u914d\u7f6e\u7684table\u548cdb\u540d\u79f0\u90fd\u76f8\u540c\uff0c\u6b64\u79cd\u56de\u6d41\u65b9\u5f0f\u4e0d\u652f\u6301
multitablewritertask.5=\u5217\u914d\u7f6e\u4fe1\u606f\u6709\u9519\u8bef. \u56e0\u4e3a\u60a8\u914d\u7f6e\u7684\u4efb\u52a1\u4e2d\uff0c\u6e90\u5934\u8bfb\u53d6\u5b57\u6bb5\u6570:{0} \u4e0e \u76ee\u7684\u8868\u8981\u5199\u5165\u7684\u5b57\u6bb5\u6570:{1} \u4e0d\u76f8\u7b49. \u8bf7\u68c0\u67e5\u60a8\u7684\u914d\u7f6e\u5e76\u4f5c\u51fa\u4fee\u6539.
multitablewritertask.6=\u901a\u8fc7\u89c4\u5219\u8ba1\u7b97\u51fa\u6765\u7684tableName\u67e5\u627e\u5bf9\u5e94\u7684db\u4e0d\u5b58\u5728\uff0ctableName={0}, \u8bf7\u68c0\u67e5\u60a8\u914d\u7f6e\u7684\u89c4\u5219.
multitablewritertask.7=\u901a\u8fc7\u89c4\u5219\u8ba1\u7b97\u51fa\u6765\u7684db\u548ctable\u4e0d\u5b58\u5728\uff0c\u7b97\u51fa\u7684dbName={0},tableName={1}, \u8bf7\u68c0\u67e5\u60a8\u914d\u7f6e\u7684\u89c4\u5219.
multitablewritertask.8=\u901a\u8fc7\u89c4\u5219\u8ba1\u7b97\u51fa\u6765\u7684db\u4e0d\u5b58\u5728\uff0c\u7b97\u51fa\u7684dbName={0}, \u8bf7\u68c0\u67e5\u60a8\u914d\u7f6e\u7684\u89c4\u5219.
multitablewritertask.9=\u901a\u8fc7\u89c4\u5219\u8ba1\u7b97\u51fa\u6765\u7684dbName[{0}], \u5b58\u5728\u591a\u5f20\u5206\u8868\uff0c\u8bf7\u914d\u7f6e\u60a8\u7684\u5206\u8868\u89c4\u5219.
multitablewritertask.10=\u9047\u5230OB\u81f4\u547d\u5f02\u5e38,\u56de\u6eda\u6b64\u6b21\u5199\u5165, \u4f11\u7720 5\u5206\u949f,SQLState:{0}
multitablewritertask.11=\u9047\u5230OB\u53ef\u6062\u590d\u5f02\u5e38,\u56de\u6eda\u6b64\u6b21\u5199\u5165, \u4f11\u7720 1\u5206\u949f,SQLState:{0}
multitablewritertask.12=\u9047\u5230OB\u5f02\u5e38,\u56de\u6eda\u6b64\u6b21\u5199\u5165, \u4f11\u7720 1\u79d2,\u91c7\u7528\u9010\u6761\u5199\u5165\u63d0\u4ea4,SQLState:{0}
multitablewritertask.13=\u9047\u5230OB\u5f02\u5e38,\u56de\u6eda\u6b64\u6b21\u5199\u5165, \u91c7\u7528\u9010\u6761\u5199\u5165\u63d0\u4ea4,SQLState:{0}
multitablewritertask.14=\u5199\u5165\u8868[{0}]\u5931\u8d25,\u4f11\u7720[{1}]\u6beb\u79d2,\u6570\u636e:{2}
multitablewritertask.15=\u5199\u5165\u8868[{0}]\u5b58\u5728\u810f\u6570\u636e,record={1}, \u5199\u5165\u5f02\u5e38\u4e3a:
singletablewritertask.1=\u9047\u5230OB\u81f4\u547d\u5f02\u5e38,\u56de\u6eda\u6b64\u6b21\u5199\u5165, \u4f11\u7720 5\u5206\u949f,SQLState:{0}
singletablewritertask.2=\u9047\u5230OB\u53ef\u6062\u590d\u5f02\u5e38,\u56de\u6eda\u6b64\u6b21\u5199\u5165, \u4f11\u7720 1\u5206\u949f,SQLState:{0}
singletablewritertask.3=\u9047\u5230OB\u5f02\u5e38,\u56de\u6eda\u6b64\u6b21\u5199\u5165, \u4f11\u7720 1\u79d2,\u91c7\u7528\u9010\u6761\u5199\u5165\u63d0\u4ea4,SQLState:{0}
singletablewritertask.4=\u9047\u5230OB\u5f02\u5e38,\u56de\u6eda\u6b64\u6b21\u5199\u5165, \u91c7\u7528\u9010\u6761\u5199\u5165\u63d0\u4ea4,SQLState:{0}multitablewritertask.1=配置的tableList為多表但未配置分表規則請檢查您的配置
multitablewritertask.2=配置的多庫中的表名有重複的,但未配置分庫規則和分表規則,請檢查您的配置
multitablewritertask.3=配置的所有表名都相同,但未配置分庫規則,請檢查您的配置
multitablewritertask.4=配置的table和db名稱都相同此種回流方式不支援
multitablewritertask.5=列配置資訊有錯誤. 因為您配置的任務中,源頭讀取欄位數:{0}與 目的表要寫入的欄位數:{1}不相等. 請檢查您的配置並作出修改.
multitablewritertask.6=通過規則計算出來的tableName查找對應的db不存在tableName={0}, 請檢查您配置的規則.
multitablewritertask.7=通過規則計算出來的db和table不存在算出的dbName={0},tableName={1}, 請檢查您配置的規則.
multitablewritertask.8=通過規則計算出來的db不存在算出的dbName={0}, 請檢查您配置的規則.
multitablewritertask.9=通過規則計算出來的dbName[{0}], 存在多張分表,請配置您的分表規則.
multitablewritertask.10=遇到OB致命異常,回滾此次寫入, 休眠 5分鐘,SQLState:{0}
multitablewritertask.11=遇到OB可恢復異常,回滾此次寫入, 休眠 1分鐘,SQLState:{0}
multitablewritertask.12=遇到OB異常,回滾此次寫入, 休眠 1秒,採用逐條寫入提交,SQLState:{0}
multitablewritertask.13=遇到OB異常,回滾此次寫入, 採用逐條寫入提交,SQLState:{0}
multitablewritertask.14=寫入表[{0}]失敗,休眠[{1}]毫秒,數據:{2}
multitablewritertask.15=寫入表[{0}]存在髒數據,record={1}, 寫入異常為:
singletablewritertask.1=遇到OB致命異常,回滾此次寫入, 休眠 5分鐘,SQLState:{0}
singletablewritertask.2=遇到OB可恢復異常,回滾此次寫入, 休眠 1分鐘,SQLState:{0}
singletablewritertask.3=遇到OB異常,回滾此次寫入, 休眠 1秒,採用逐條寫入提交,SQLState:{0}
singletablewritertask.4=遇到OB異常,回滾此次寫入, 採用逐條寫入提交,SQLState:{0}

View File

@ -0,0 +1,12 @@
package com.alibaba.datax.plugin.writer.obhbasewriter.task;
import com.alibaba.datax.common.util.Configuration;
/**
* TODO(yuez)升级hbase api之后再补充暂时用不到
*/
public class MultiVersionWriteTask extends ObHBaseWriteTask{
public MultiVersionWriteTask(Configuration configuration) throws Exception {
super(configuration);
}
}

View File

@ -0,0 +1,12 @@
package com.alibaba.datax.plugin.writer.obhbasewriter.task;
import com.alibaba.datax.common.util.Configuration;
/**
* TODO(yuez) 升级hbase api之后再补充暂时用不到
*/
public class NormalWriteTask extends ObHBaseWriteTask{
public NormalWriteTask(Configuration configuration) throws Exception {
super(configuration);
}
}

View File

@ -0,0 +1,317 @@
package com.alibaba.datax.plugin.writer.obhbasewriter.task;
import com.alibaba.datax.common.element.Record;
import com.alibaba.datax.common.exception.DataXException;
import com.alibaba.datax.common.plugin.RecordReceiver;
import com.alibaba.datax.common.plugin.TaskPluginCollector;
import com.alibaba.datax.common.util.Configuration;
import com.alibaba.datax.common.util.MessageSource;
import com.alibaba.datax.plugin.rdbms.reader.Key;
import com.alibaba.datax.plugin.rdbms.util.DBUtilErrorCode;
import com.alibaba.datax.plugin.rdbms.util.DataBaseType;
import com.alibaba.datax.plugin.rdbms.writer.CommonRdbmsWriter;
import com.alibaba.datax.plugin.writer.obhbasewriter.Config;
import com.alibaba.datax.plugin.writer.obhbasewriter.ConfigKey;
import com.alibaba.datax.plugin.writer.obhbasewriter.Constant;
import com.alibaba.datax.plugin.writer.obhbasewriter.NullModeType;
import com.alibaba.datax.plugin.writer.obhbasewriter.ObHTableInfo;
import com.alibaba.datax.plugin.writer.obhbasewriter.ext.ServerConnectInfo;
import com.google.common.collect.Lists;
import java.util.ArrayList;
import java.util.List;
import java.util.concurrent.BlockingQueue;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.LinkedBlockingQueue;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicLong;
import java.util.concurrent.locks.Condition;
import java.util.concurrent.locks.Lock;
import java.util.concurrent.locks.ReentrantLock;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
public class ObHBaseWriteTask extends CommonRdbmsWriter.Task {
private final static MessageSource MESSAGE_SOURCE = MessageSource.loadResourceBundle(ObHBaseWriteTask.class);
private final static Logger LOG = LoggerFactory.getLogger(ObHBaseWriteTask.class);
public NullModeType nullMode = null;
private int maxRetryCount;
public List<Configuration> columns;
public List<Configuration> rowkeyColumn;
public Configuration versionColumn;
public String hbaseTableName;
public String encoding;
public Boolean walFlag;
String configUrl;
String dbName;
String ip;
String port;
String fullUserName;
boolean usdOdpMode;
String sysUsername;
String sysPassword;
private ObHTableInfo obHTableInfo;
private ConcurrentTableWriter concurrentWriter;
private boolean allTaskInQueue = false;
private long startTime = 0;
private String threadName = Thread.currentThread().getName();
private Lock lock = new ReentrantLock();
private Condition condition = lock.newCondition();
public ObHBaseWriteTask(Configuration configuration) {
super(DataBaseType.MySql);
init(configuration);
}
@Override
public void init(com.alibaba.datax.common.util.Configuration configuration) {
this.obHTableInfo = new ObHTableInfo(configuration);
this.hbaseTableName = configuration.getString(ConfigKey.TABLE);
this.columns = configuration.getListConfiguration(ConfigKey.COLUMN);
this.rowkeyColumn = configuration.getListConfiguration(ConfigKey.ROWKEY_COLUMN);
this.versionColumn = configuration.getConfiguration(ConfigKey.VERSION_COLUMN);
this.encoding = configuration.getString(ConfigKey.ENCODING, Constant.DEFAULT_ENCODING);
this.nullMode = NullModeType.getByTypeName(configuration.getString(ConfigKey.NULL_MODE, Constant.DEFAULT_NULL_MODE));
// this.memstoreThreshold = configuration.getDouble(Config.MEMSTORE_THRESHOLD, Config.DEFAULT_MEMSTORE_THRESHOLD);
this.walFlag = configuration.getBool(ConfigKey.WAL_FLAG, true);
this.maxRetryCount = configuration.getInt(ConfigKey.MAX_RETRY_COUNT, 3);
// default 1000 rows are committed together
this.batchSize = com.alibaba.datax.plugin.rdbms.writer.Constant.DEFAULT_BATCH_SIZE;
this.batchByteSize = com.alibaba.datax.plugin.rdbms.writer.Constant.DEFAULT_BATCH_BYTE_SIZE;
this.configUrl = configuration.getString(ConfigKey.OBCONFIG_URL);
this.jdbcUrl = configuration.getString(ConfigKey.JDBC_URL);
this.username = configuration.getString(Key.USERNAME);
this.password = configuration.getString(Key.PASSWORD);
this.dbName = configuration.getString(Key.DBNAME);
this.usdOdpMode = configuration.getBool(ConfigKey.USE_ODP_MODE);
ServerConnectInfo connectInfo = new ServerConnectInfo(jdbcUrl, username, password);
String clusterName = connectInfo.clusterName;
this.fullUserName = connectInfo.getFullUserName();
final String[] ipPort = connectInfo.ipPort.split(":");
if (usdOdpMode) {
this.ip = ipPort[0];
this.port = ipPort[1];
} else {
this.sysUsername = configuration.getString(ConfigKey.OB_SYS_USER);
this.sysPassword = configuration.getString(ConfigKey.OB_SYS_PASSWORD);
connectInfo.setSysUser(sysUsername);
connectInfo.setSysPass(sysPassword);
if (!configUrl.contains("ObRegion")) {
if (configUrl.contains("?")) {
configUrl += "&ObRegion=" + clusterName;
} else {
configUrl += "?ObRegion=" + clusterName;
}
}
if (!configUrl.contains("database")) {
configUrl += "&database=" + dbName;
}
}
if (null == concurrentWriter) {
concurrentWriter = new ConcurrentTableWriter(configuration, connectInfo);
allTaskInQueue = false;
}
}
@Override
public void prepare(Configuration configuration) {
concurrentWriter.start();
}
@Override
public void startWrite(RecordReceiver recordReceiver, Configuration configuration, TaskPluginCollector taskPluginCollector) {
this.taskPluginCollector = taskPluginCollector;
int recordCount = 0;
int bufferBytes = 0;
List<Record> records = new ArrayList<>();
try {
Record record;
while ((record = recordReceiver.getFromReader()) != null) {
recordCount++;
bufferBytes += record.getMemorySize();
records.add(record);
// 按照指定的批大小进行批量写入
if (records.size() >= batchSize || bufferBytes >= batchByteSize) {
concurrentWriter.addBatchRecords(Lists.newArrayList(records));
records.clear();
bufferBytes = 0;
}
}
if (!records.isEmpty()) {
concurrentWriter.addBatchRecords(records);
}
} catch (Throwable e) {
LOG.warn("startWrite error unexpected ", e);
throw DataXException.asDataXException(DBUtilErrorCode.WRITE_DATA_ERROR, e);
}
LOG.info(recordCount + " rows received.");
waitTaskFinish();
}
public void waitTaskFinish() {
this.allTaskInQueue = true;
LOG.info("ConcurrentTableWriter has put all task in queue, queueSize = {}, total = {}, finished = {}",
concurrentWriter.getTaskQueueSize(),
concurrentWriter.getTotalTaskCount(),
concurrentWriter.getFinishTaskCount());
lock.lock();
try {
while (!concurrentWriter.checkFinish()) {
condition.await(50, TimeUnit.MILLISECONDS);
// print statistic
LOG.debug("Statistic total task {}, finished {}, queue Size {}",
concurrentWriter.getTotalTaskCount(),
concurrentWriter.getFinishTaskCount(),
concurrentWriter.getTaskQueueSize());
concurrentWriter.printStatistics();
}
} catch (InterruptedException e) {
LOG.warn("Concurrent table writer wait task finish interrupt");
} finally {
lock.unlock();
}
LOG.debug("wait all InsertTask finished ...");
}
public boolean isFinished() {
return allTaskInQueue && concurrentWriter.checkFinish();
}
public void singalTaskFinish() {
lock.lock();
try {
condition.signal();
} finally {
lock.unlock();
}
}
public void collectDirtyRecord(Record record, Throwable throwable) {
this.taskPluginCollector.collectDirtyRecord(record, throwable);
}
@Override
public void post(Configuration configuration) {
}
@Override
public void destroy(Configuration configuration) {
if (concurrentWriter != null) {
concurrentWriter.destory();
}
super.destroy(configuration);
}
public class ConcurrentTableWriter {
private BlockingQueue<List<Record>> queue;
private List<PutTask> putTasks;
private Configuration config;
private AtomicLong totalTaskCount;
private AtomicLong finishTaskCount;
private ServerConnectInfo connectInfo;
private ExecutorService executorService;
private final int threadCount;
public ConcurrentTableWriter(Configuration config, ServerConnectInfo connectInfo) {
this.threadCount = config.getInt(Config.WRITER_THREAD_COUNT, Config.DEFAULT_WRITER_THREAD_COUNT);
this.queue = new LinkedBlockingQueue<List<Record>>(threadCount << 1);
this.putTasks = new ArrayList<PutTask>(threadCount);
this.config = config;
this.totalTaskCount = new AtomicLong(0);
this.finishTaskCount = new AtomicLong(0);
this.executorService = Executors.newFixedThreadPool(threadCount);
this.connectInfo = connectInfo;
}
public long getTotalTaskCount() {
return totalTaskCount.get();
}
public long getFinishTaskCount() {
return finishTaskCount.get();
}
public int getTaskQueueSize() {
return queue.size();
}
public void increFinishCount() {
finishTaskCount.incrementAndGet();
}
// should check after put all the task in the queue
public boolean checkFinish() {
long finishCount = finishTaskCount.get();
long totalCount = totalTaskCount.get();
return finishCount == totalCount;
}
public synchronized void start() {
for (int i = 0; i < threadCount; ++i) {
LOG.info("start {} insert task.", (i + 1));
PutTask putTask = new PutTask(threadName, queue, config, connectInfo, obHTableInfo, ObHBaseWriteTask.this);
putTask.setWriter(this);
putTasks.add(putTask);
}
for (PutTask task : putTasks) {
executorService.execute(task);
}
}
public void printStatistics() {
long insertTotalCost = 0;
long insertTotalCount = 0;
for (PutTask task : putTasks) {
insertTotalCost += task.getTotalCost();
insertTotalCount += task.getPutCount();
}
long avgCost = 0;
if (insertTotalCount != 0) {
avgCost = insertTotalCost / insertTotalCount;
}
ObHBaseWriteTask.LOG.debug("Put {} times, totalCost {} ms, average {} ms",
insertTotalCount, insertTotalCost, avgCost);
}
public void addBatchRecords(final List<Record> records) throws InterruptedException {
boolean isSucc = false;
while (!isSucc) {
isSucc = queue.offer(records, 5, TimeUnit.MILLISECONDS);
}
totalTaskCount.incrementAndGet();
}
public synchronized void destory() {
if (putTasks != null) {
for (PutTask task : putTasks) {
task.setStop();
task.destroy();
}
}
destroyExecutor();
}
private void destroyExecutor() {
if (executorService != null && !executorService.isShutdown()) {
executorService.shutdown();
try {
executorService.awaitTermination(0L, TimeUnit.SECONDS);
} catch (InterruptedException var2) {
}
}
}
}
}

View File

@ -0,0 +1,325 @@
package com.alibaba.datax.plugin.writer.obhbasewriter.task;
import com.alibaba.datax.common.element.DoubleColumn;
import com.alibaba.datax.common.element.LongColumn;
import com.alibaba.datax.common.element.Record;
import com.alibaba.datax.common.exception.DataXException;
import com.alibaba.datax.common.util.Configuration;
import com.alibaba.datax.common.util.MessageSource;
import com.alibaba.datax.plugin.writer.obhbasewriter.ColumnType;
import com.alibaba.datax.plugin.writer.obhbasewriter.Config;
import com.alibaba.datax.plugin.writer.obhbasewriter.ConfigKey;
import com.alibaba.datax.plugin.writer.obhbasewriter.Hbase094xWriterErrorCode;
import com.alibaba.datax.plugin.writer.obhbasewriter.ObHTableInfo;
import com.alibaba.datax.plugin.writer.obhbasewriter.ext.ObHbaseTableHolder;
import com.alibaba.datax.plugin.writer.obhbasewriter.ext.ServerConnectInfo;
import com.alipay.oceanbase.hbase.constants.OHConstants;
import com.alipay.oceanbase.rpc.property.Property;
import com.google.common.base.Stopwatch;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Queue;
import java.util.concurrent.TimeUnit;
import org.apache.commons.lang3.tuple.Triple;
import org.apache.hadoop.hbase.client.HTableInterface;
import org.apache.hadoop.hbase.client.Put;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import static com.alibaba.datax.plugin.writer.obhbasewriter.ConfigKey.OBHBASE_HTABLE_CLIENT_WRITE_BUFFER;
import static com.alibaba.datax.plugin.writer.obhbasewriter.ConfigKey.OBHBASE_HTABLE_PUT_WRITE_BUFFER_CHECK;
import static com.alibaba.datax.plugin.writer.obhbasewriter.ConfigKey.TABLE_CLIENT_RPC_EXECUTE_TIMEOUT;
import static com.alibaba.datax.plugin.writer.obhbasewriter.ConfigKey.WRITE_BUFFER_HIGH_MARK;
import static com.alibaba.datax.plugin.writer.obhbasewriter.ConfigKey.WRITE_BUFFER_LOW_MARK;
import static com.alibaba.datax.plugin.writer.obhbasewriter.Constant.DEFAULT_HBASE_HTABLE_CLIENT_WRITE_BUFFER;
import static com.alibaba.datax.plugin.writer.obhbasewriter.Constant.DEFAULT_HBASE_HTABLE_PUT_WRITE_BUFFER_CHECK;
import static com.alibaba.datax.plugin.writer.obhbasewriter.Constant.DEFAULT_NETTY_BUFFER_HIGH_WATERMARK;
import static com.alibaba.datax.plugin.writer.obhbasewriter.Constant.DEFAULT_NETTY_BUFFER_LOW_WATERMARK;
import static com.alibaba.datax.plugin.writer.obhbasewriter.Constant.DEFAULT_RPC_EXECUTE_TIMEOUT;
import static com.alibaba.datax.plugin.writer.obhbasewriter.util.ObHbaseWriterUtils.getColumnByte;
import static com.alibaba.datax.plugin.writer.obhbasewriter.util.ObHbaseWriterUtils.getRowkey;
import static com.alipay.oceanbase.hbase.constants.OHConstants.HBASE_HTABLE_CLIENT_WRITE_BUFFER;
import static com.alipay.oceanbase.hbase.constants.OHConstants.HBASE_HTABLE_PUT_WRITE_BUFFER_CHECK;
import static com.alipay.oceanbase.hbase.constants.OHConstants.HBASE_OCEANBASE_DATABASE;
import static com.alipay.oceanbase.hbase.constants.OHConstants.HBASE_OCEANBASE_FULL_USER_NAME;
import static com.alipay.oceanbase.hbase.constants.OHConstants.HBASE_OCEANBASE_PARAM_URL;
import static com.alipay.oceanbase.hbase.constants.OHConstants.HBASE_OCEANBASE_PASSWORD;
import static com.alipay.oceanbase.hbase.constants.OHConstants.HBASE_OCEANBASE_SYS_USER_NAME;
import static com.alipay.oceanbase.hbase.constants.OHConstants.HBASE_OCEANBASE_SYS_PASSWORD;
public class PutTask implements Runnable {
private static final MessageSource MESSAGE_SOURCE = MessageSource.loadResourceBundle(PutTask.class);
private static final Logger LOG = LoggerFactory.getLogger(PutTask.class);
private ObHBaseWriteTask writerTask;
private ObHBaseWriteTask.ConcurrentTableWriter writer;
private long totalCost = 0;
private long putCount = 0;
private boolean isStop;
private ObHTableInfo obHTableInfo;
private final Configuration versionColumn;
// 失败重试次数
private final int failTryCount;
private String parentThreadName;
private Queue<List<Record>> queue;
private Configuration config;
private ServerConnectInfo connInfo;
private ObHbaseTableHolder tableHolder;
private final SimpleDateFormat df_second = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
private final SimpleDateFormat df_ms = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss SSS");
public PutTask(String parentThreadName, Queue<List<Record>> recordsQueue, Configuration config, ServerConnectInfo connectInfo, ObHTableInfo obHTableInfo, ObHBaseWriteTask writerTask) {
this.parentThreadName = parentThreadName;
this.queue = recordsQueue;
this.config = config;
this.connInfo = connectInfo;
this.obHTableInfo = obHTableInfo;
this.writerTask = writerTask;
this.versionColumn = config.getConfiguration(ConfigKey.VERSION_COLUMN);
this.failTryCount = config.getInt(Config.FAIL_TRY_COUNT, Config.DEFAULT_FAIL_TRY_COUNT);
this.isStop = false;
initTableHolder();
}
private void initTableHolder() {
try {
org.apache.hadoop.conf.Configuration c = new org.apache.hadoop.conf.Configuration();
c.set(HBASE_OCEANBASE_FULL_USER_NAME, writerTask.fullUserName);
c.set(HBASE_OCEANBASE_PASSWORD, this.connInfo.password);
c.set(HBASE_OCEANBASE_DATABASE, writerTask.dbName);
// obkv-table-client is needed the code below
if (writerTask.usdOdpMode) {
c.setBoolean(OHConstants.HBASE_OCEANBASE_ODP_MODE, true);
c.set(OHConstants.HBASE_OCEANBASE_ODP_ADDR, connInfo.host);
c.set(OHConstants.HBASE_OCEANBASE_ODP_PORT, connInfo.port);
LOG.info("sysUser and sysPassword is empty, build HTABLE in odp mode.");
} else {
c.set(HBASE_OCEANBASE_PARAM_URL, writerTask.configUrl);
c.set(HBASE_OCEANBASE_SYS_USER_NAME, this.connInfo.sysUser);
c.set(HBASE_OCEANBASE_SYS_PASSWORD, this.connInfo.sysPass);
LOG.info("sysUser and sysPassword is not empty, build HTABLE in sys mode.");
}
c.set(HBASE_HTABLE_PUT_WRITE_BUFFER_CHECK, config.getString(OBHBASE_HTABLE_PUT_WRITE_BUFFER_CHECK, DEFAULT_HBASE_HTABLE_PUT_WRITE_BUFFER_CHECK));
c.set(HBASE_HTABLE_CLIENT_WRITE_BUFFER, config.getString(OBHBASE_HTABLE_CLIENT_WRITE_BUFFER, DEFAULT_HBASE_HTABLE_CLIENT_WRITE_BUFFER));
c.set(Property.RS_LIST_ACQUIRE_CONNECT_TIMEOUT.getKey(), "500");
c.set(Property.RS_LIST_ACQUIRE_READ_TIMEOUT.getKey(), "5000");
c.set(Property.RPC_EXECUTE_TIMEOUT.getKey(), config.getString(TABLE_CLIENT_RPC_EXECUTE_TIMEOUT, DEFAULT_RPC_EXECUTE_TIMEOUT));
c.set(Property.NETTY_BUFFER_LOW_WATERMARK.getKey(), config.getString(WRITE_BUFFER_LOW_MARK, DEFAULT_NETTY_BUFFER_LOW_WATERMARK));
c.set(Property.NETTY_BUFFER_HIGH_WATERMARK.getKey(), config.getString(WRITE_BUFFER_HIGH_MARK, DEFAULT_NETTY_BUFFER_HIGH_WATERMARK));
this.tableHolder = new ObHbaseTableHolder(c, obHTableInfo.getTableName());
} catch (Exception e) {
LOG.error("init table holder failed, reason: {}", e.getMessage());
throw new IllegalStateException(e);
}
}
private void batchWrite(final List<Record> buffer) {
HTableInterface ohTable = null;
Stopwatch stopwatch = Stopwatch.createStarted();
try {
ohTable = this.tableHolder.getOhTable();
List<Put> puts = buildBatchPutList(buffer);
ohTable.put(puts);
} catch (Exception e) {
if (Objects.isNull(ohTable)) {
LOG.error("build obHTable: {} failed. reason: {}", obHTableInfo.getTableName(), e.getMessage());
throw DataXException.asDataXException(Hbase094xWriterErrorCode.GET_HBASE_TABLE_ERROR, Hbase094xWriterErrorCode.GET_HBASE_TABLE_ERROR.getDescription());
}
//
LOG.error("hbase batch error: " + e);
// 出错了之后对该出错的batch逐条重试
for (Record record : buffer) {
writeOneRecord(ohTable, record);
}
} finally {
this.writer.increFinishCount();
putCount++;
totalCost += stopwatch.elapsed(TimeUnit.MILLISECONDS);
try {
if (!Objects.isNull(ohTable)) {
ohTable.close();
}
} catch (Exception e) {
LOG.warn("error in closing htable: {}. Reason: {}", obHTableInfo.getFullHbaseTableName(), e.getMessage());
}
}
}
private void writeOneRecord(HTableInterface ohTable, Record record) {
int retryCount = 0;
while (retryCount < this.failTryCount) {
try {
byte[] rowkey = getRowkey(record, obHTableInfo);
Put put = new Put(rowkey); // row key
boolean hasValidValue = buildPut(put, record);
if (hasValidValue) {
ohTable.put(put);
}
break;
} catch (Exception e) {
retryCount++;
LOG.error("error in writing: " + e.getMessage() + ", retry count: " + retryCount);
if (retryCount == this.failTryCount) {
LOG.warn("ERROR : record {}", record);
this.writerTask.collectDirtyRecord(record, e);
}
}
}
}
private List<Put> buildBatchPutList(List<Record> buffer) {
List<Put> puts = new ArrayList<>();
for (Record record : buffer) {
byte[] rowkey = getRowkey(record, obHTableInfo);
Put put = new org.apache.hadoop.hbase.client.Put(rowkey); // row key
boolean hasValidValue = buildPut(put, record);
if (hasValidValue) {
puts.add(put);
}
}
return puts;
}
private boolean buildPut(Put put, Record record) {
boolean hasValidValue = false;
long timestamp = buildTimestamp(record);
for (Map.Entry<Integer, Triple<String, String, ColumnType>> columnInfo : obHTableInfo.getIndexColumnInfoMap().entrySet()) {
Integer index = columnInfo.getKey();
if (index >= record.getColumnNumber()) {
throw DataXException.asDataXException(Hbase094xWriterErrorCode.ILLEGAL_VALUE,
MESSAGE_SOURCE.message("normaltask.2", record.getColumnNumber(), index));
}
ColumnType columnType = columnInfo.getValue().getRight();
String familyName = columnInfo.getValue().getLeft();
String columnName = columnInfo.getValue().getMiddle();
byte[] value = getColumnByte(columnType, record.getColumn(index), obHTableInfo);
if (value != null) {
hasValidValue = true;
if (timestamp == -1) {
put.add(familyName.getBytes(), // family
columnName.getBytes(), // Q
value); // V
} else {
put.add(familyName.getBytes(), // family
columnName.getBytes(), // Q
timestamp, // timestamp/version
value); // V
}
}
}
return hasValidValue;
}
private long buildTimestamp(Record record) {
if (versionColumn == null) {
return -1;
}
int index = versionColumn.getInt(ConfigKey.INDEX);
long timestamp;
if (index == -1) {
// user specified the constant as timestamp
timestamp = versionColumn.getLong(ConfigKey.VALUE);
if (timestamp < 0) {
throw DataXException.asDataXException(Hbase094xWriterErrorCode.CONSTRUCT_VERSION_ERROR,
MESSAGE_SOURCE.message("normaltask.4"));
}
} else {
// 指定列作为版本,long/doubleColumn直接record.aslong, 其它类型尝试用yyyy-MM-dd HH:mm:ss,
// yyyy-MM-dd HH:mm:ss SSS去format
if (index >= record.getColumnNumber()) {
throw DataXException.asDataXException(Hbase094xWriterErrorCode.CONSTRUCT_VERSION_ERROR,
MESSAGE_SOURCE.message("normaltask.5", record.getColumnNumber(), index));
}
if (record.getColumn(index).getRawData() == null) {
throw DataXException.asDataXException(Hbase094xWriterErrorCode.CONSTRUCT_VERSION_ERROR,
MESSAGE_SOURCE.message("normaltask.6"));
}
if (record.getColumn(index) instanceof LongColumn || record.getColumn(index) instanceof DoubleColumn) {
timestamp = record.getColumn(index).asLong();
} else {
Date date;
try {
date = df_ms.parse(record.getColumn(index).asString());
} catch (ParseException e) {
try {
date = df_second.parse(record.getColumn(index).asString());
} catch (ParseException e1) {
LOG.info(MESSAGE_SOURCE.message("normaltask.7", index));
throw DataXException.asDataXException(Hbase094xWriterErrorCode.CONSTRUCT_VERSION_ERROR, e1);
}
}
timestamp = date.getTime();
}
}
return timestamp;
}
public void setStop() {isStop = true;}
public long getTotalCost() {return totalCost;}
public long getPutCount() {return putCount;}
public void destroy() {
tableHolder.destroy();
}
void setWriterTask(ObHBaseWriteTask writerTask) {
this.writerTask = writerTask;
}
void setWriter(ObHBaseWriteTask.ConcurrentTableWriter writer) {
this.writer = writer;
}
@Override
public void run() {
String currentThreadName = String.format("%s-putTask-%d", parentThreadName, Thread.currentThread().getId());
Thread.currentThread().setName(currentThreadName);
LOG.debug("Task {} start to execute...", currentThreadName);
int sleepTimes = 0;
while (!isStop) {
try {
List<Record> records = queue.poll();
if (null != records) {
batchWrite(records);
} else if (writerTask.isFinished()) {
writerTask.singalTaskFinish();
LOG.debug("not more task, thread exist ...");
break;
} else {
TimeUnit.MILLISECONDS.sleep(5);
sleepTimes++;
}
} catch (InterruptedException e) {
LOG.debug("TableWriter is interrupt");
} catch (Exception e) {
LOG.warn("ERROR UNEXPECTED {}", e);
}
}
LOG.debug("Thread exist...");
LOG.debug("sleep {} times, total sleep time: {}", sleepTimes, sleepTimes * 5);
}
}

View File

@ -0,0 +1,139 @@
/*
* Copyright (c) 2021 OceanBase ob-loader-dumper is licensed under Mulan PSL v2. You can use this software according to
* the terms and conditions of the Mulan PSL v2. You may obtain a copy of Mulan PSL v2 at:
*
* http://license.coscl.org.cn/MulanPSL2
*
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING
* BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. See the Mulan PSL v2 for more
* details.
*/
package com.alibaba.datax.plugin.writer.obhbasewriter.util;
import com.alibaba.datax.common.element.Column;
import com.alibaba.datax.common.element.Record;
import com.alibaba.datax.common.exception.DataXException;
import com.alibaba.datax.common.util.MessageSource;
import com.alibaba.datax.plugin.writer.obhbasewriter.ColumnType;
import com.alibaba.datax.plugin.writer.obhbasewriter.Hbase094xWriterErrorCode;
import com.alibaba.datax.plugin.writer.obhbasewriter.ObHTableInfo;
import com.alibaba.datax.plugin.writer.obhbasewriter.task.PutTask;
import java.nio.charset.Charset;
import org.apache.commons.lang3.tuple.Triple;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.util.Bytes;
/**
* @author cjyyz
* @date 2023/03/23
* @since
*/
public class ObHbaseWriterUtils {
private static final MessageSource MESSAGE_SOURCE = MessageSource.loadResourceBundle(PutTask.class);
public static byte[] getRowkey(Record record, ObHTableInfo obHTableInfo) {
byte[] rowkeyBuffer = {};
for (Triple<Integer, String, ColumnType> rowKeyElement : obHTableInfo.getRowKeyElementList()) {
Integer index = rowKeyElement.getLeft();
ColumnType columnType = rowKeyElement.getRight();
if (index == -1) {
String value = rowKeyElement.getMiddle();
rowkeyBuffer = Bytes.add(rowkeyBuffer, getValueByte(columnType, value, obHTableInfo.getEncoding()));
} else {
if (index >= record.getColumnNumber()) {
throw DataXException.asDataXException(Hbase094xWriterErrorCode.CONSTRUCT_ROWKEY_ERROR, MESSAGE_SOURCE.message("normaltask.3", record.getColumnNumber(), index));
}
byte[] value = getColumnByte(columnType, record.getColumn(index), obHTableInfo);
rowkeyBuffer = Bytes.add(rowkeyBuffer, value);
}
}
return rowkeyBuffer;
}
public static byte[] getColumnByte(ColumnType columnType, Column column, ObHTableInfo obHTableInfo) {
byte[] bytes;
if (column.getRawData() != null && !(columnType == ColumnType.STRING && column.asString().equals("null"))) {
switch (columnType) {
case INT:
bytes = Bytes.toBytes(column.asLong().intValue());
break;
case LONG:
bytes = Bytes.toBytes(column.asLong());
break;
case DOUBLE:
bytes = Bytes.toBytes(column.asDouble());
break;
case FLOAT:
bytes = Bytes.toBytes(column.asDouble().floatValue());
break;
case SHORT:
bytes = Bytes.toBytes(column.asLong().shortValue());
break;
case BOOLEAN:
bytes = Bytes.toBytes(column.asBoolean());
break;
case STRING:
bytes = getValueByte(columnType, column.asString(), obHTableInfo.getEncoding());
break;
case BINARY:
bytes = Bytes.toBytesBinary(column.asString());
break;
default:
throw DataXException.asDataXException(Hbase094xWriterErrorCode.ILLEGAL_VALUE, MESSAGE_SOURCE.message("hbaseabstracttask.2", columnType));
}
} else {
switch (obHTableInfo.getNullModeType()) {
case Skip:
bytes = null;
break;
case Empty:
bytes = HConstants.EMPTY_BYTE_ARRAY;
break;
default:
throw DataXException.asDataXException(Hbase094xWriterErrorCode.ILLEGAL_VALUE, MESSAGE_SOURCE.message("hbaseabstracttask.3"));
}
}
return bytes;
}
/**
* @param columnType
* @param value
* @return byte[]
*/
private static byte[] getValueByte(ColumnType columnType, String value, String encoding) {
byte[] bytes;
if (value != null) {
switch (columnType) {
case INT:
bytes = Bytes.toBytes(Integer.parseInt(value));
break;
case LONG:
bytes = Bytes.toBytes(Long.parseLong(value));
break;
case DOUBLE:
bytes = Bytes.toBytes(Double.parseDouble(value));
break;
case FLOAT:
bytes = Bytes.toBytes(Float.parseFloat(value));
break;
case SHORT:
bytes = Bytes.toBytes(Short.parseShort(value));
break;
case BOOLEAN:
bytes = Bytes.toBytes(Boolean.parseBoolean(value));
break;
case STRING:
bytes = value.getBytes(Charset.forName(encoding));
break;
default:
throw DataXException.asDataXException(Hbase094xWriterErrorCode.ILLEGAL_VALUE, MESSAGE_SOURCE.message("hbaseabstracttask.4", columnType));
}
} else {
bytes = HConstants.EMPTY_BYTE_ARRAY;
}
return bytes;
}
}

View File

@ -0,0 +1,6 @@
{
"name": "obhbasewriter",
"class": "com.alibaba.datax.plugin.writer.obhbasewriter.ObHbaseWriter",
"description": "适用于: 生产环境. 原理: TODO",
"developer": "alibaba"
}

View File

@ -0,0 +1,287 @@
package com.alibaba.datax.plugin.reader.oceanbasev10reader.util;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.concurrent.ArrayBlockingQueue;
import java.util.concurrent.Callable;
import java.util.concurrent.ExecutorCompletionService;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Future;
import java.util.concurrent.ThreadFactory;
import java.util.concurrent.ThreadPoolExecutor;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicInteger;
public class ExecutorTemplate<T> {
/**
* The default thread pool size. Set as the number of available processors by default.
*/
public static int DEFAULT_POOL_SIZE = Runtime.getRuntime().availableProcessors();
/**
* Indicate whether the executor closes automatically.
*/
private final boolean autoClose;
/**
*
*/
private final List<Future<T>> futures;
/**
*
*/
private final ExecutorService internalExecutor;
private final ExecutorCompletionService<T> completionService;
/**
* Set pool size for ExecutorTemplate.
*/
public static void setPoolSize(int size) {
DEFAULT_POOL_SIZE = size;
}
/**
* Default: 1024 AutoClose: true
*
* @param poolName
*/
public ExecutorTemplate(String poolName) {
this(defaultExecutor(poolName), true);
}
/**
* Default: 1024 AutoClose: true
*
* @param poolName
*/
public ExecutorTemplate(String poolName, int poolSize) {
this(defaultExecutor(poolName, poolSize), true);
}
public ExecutorTemplate(String poolName, int poolSize, boolean autoClose) {
this(defaultExecutor(poolName, poolSize), autoClose);
}
/**
* Default: 1024
*
* @param poolName
* @param autoClose
*/
public ExecutorTemplate(String poolName, boolean autoClose) {
this(defaultExecutor(poolName), autoClose);
}
/**
* Default: 1024 AutoClose: true
*
* @param executor
*/
public ExecutorTemplate(ExecutorService executor) {
this(executor, true);
}
/**
* @param executor
*/
public ExecutorTemplate(ExecutorService executor, boolean autoClose) {
this.autoClose = autoClose;
this.internalExecutor = executor;
this.completionService = new ExecutorCompletionService<>(executor);
this.futures = Collections.synchronizedList(new ArrayList<>());
}
/**
* @param poolName
* @return ExecutorService
*/
public static ExecutorService defaultExecutor(String poolName) {
return defaultExecutor(100000, poolName, DEFAULT_POOL_SIZE);
}
/**
* @param poolName
* @param poolSize
* @return ExecutorService
*/
public static ExecutorService defaultExecutor(String poolName, int poolSize) {
return defaultExecutor(100000, poolName, poolSize);
}
/**
* @param capacity
* @param poolName
* @return ExecutorService
*/
public static ExecutorService defaultExecutor(int capacity, String poolName, int poolSize) {
return new ThreadPoolExecutor(poolSize, poolSize, 30, TimeUnit.SECONDS, /* */
new ArrayBlockingQueue<>(capacity), new NamedThreadFactory(poolName));
}
/**
* Submit a callable task
*
* @param task
*/
public void submit(Callable<T> task) {
Future<T> f = this.completionService.submit(task);
futures.add(f);
check(f);
}
/**
* Submit a runnable task
*
* @param task
*/
public void submit(Runnable task) {
Future<T> f = this.completionService.submit(task, null);
futures.add(f);
check(f);
}
/**
* Wait all the task run finished, and get all the results.
*
* @return List<T>
*/
public List<T> waitForResult() {
try {
int index = 0;
Throwable ex = null;
List<T> result = new ArrayList<T>();
while (index < futures.size()) {
try {
Future<T> f = this.completionService.take();
result.add(f.get());
} catch (Throwable e) {
ex = getRootCause(e);
break;
}
index++;
}
if (ex != null) {
cancelAll();
throw new RuntimeException(ex);
} else {
return result;
}
} finally {
clearFutures();
if (autoClose) {
destroyExecutor();
}
}
}
/**
*
*/
public void cancelAll() {
for (Future<T> f : futures) {
if (!f.isDone() && !f.isCancelled()) {
f.cancel(false);
}
}
}
/**
*
*/
public void clearFutures() {
this.futures.clear();
}
/**
*
*/
public void destroyExecutor() {
if (internalExecutor != null && !internalExecutor.isShutdown()) {
this.internalExecutor.shutdown();
try {
this.internalExecutor.awaitTermination(0, TimeUnit.SECONDS);
} catch (InterruptedException e) {
}
}
}
/**
* Fast check the future
*
* @param f
*/
private void check(Future<T> f) {
if (f != null && f.isDone()) {
try {
f.get();
} catch (Throwable e) {
cancelAll();
throw new RuntimeException(e);
}
}
}
/**
* @param throwable
* @return Throwable
*/
private Throwable getRootCause(Throwable throwable) {
final Throwable holder = throwable;
final List<Throwable> list = new ArrayList<>();
while (throwable != null && !list.contains(throwable)) {
list.add(throwable);
throwable = throwable.getCause();
}
return list.size() < 2 ? holder : list.get(list.size() - 1);
}
/**
* An internal named thread factory
*/
static class NamedThreadFactory implements ThreadFactory {
/**
*
*/
private final boolean daemon;
/**
*
*/
private final String name;
/**
*
*/
private final AtomicInteger seq = new AtomicInteger(0);
/**
* @param name
*/
public NamedThreadFactory(String name) {
this(name, false);
}
/**
* @param name
* @param daemon
*/
public NamedThreadFactory(String name, boolean daemon) {
this.name = name;
this.daemon = daemon;
}
@Override
public Thread newThread(Runnable r) {
Thread t = new Thread(r);
t.setDaemon(daemon);
t.setPriority(Thread.NORM_PRIORITY);
t.setName((name + seq.incrementAndGet()));
return t;
}
}
}

View File

@ -28,6 +28,16 @@
<groupId>com.alibaba.datax</groupId>
<artifactId>plugin-rdbms-util</artifactId>
<version>${datax-project-version}</version>
<exclusions>
<exclusion>
<artifactId>guava</artifactId>
<groupId>com.google.guava</groupId>
</exclusion>
<exclusion>
<groupId>com.alibaba</groupId>
<artifactId>druid</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.slf4j</groupId>
@ -43,13 +53,11 @@
<version>4.0.4.RELEASE</version>
<scope>test</scope>
</dependency>
<!--
<dependency>
<groupId>com.alipay.oceanbase</groupId>
<artifactId>oceanbase-partition</artifactId>
<version>0.0.5</version>
<groupId>com.alibaba</groupId>
<artifactId>druid</artifactId>
<version>1.2.18</version>
</dependency>
-->
<dependency>
<groupId>com.alipay.oceanbase</groupId>
@ -64,6 +72,19 @@
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>com.oceanbase</groupId>
<artifactId>oceanbase-client</artifactId>
<version>2.4.11</version>
<exclusions>
<exclusion>
<groupId>com.google.guava</groupId>
<artifactId>guava</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>com.oceanbase</groupId>
<artifactId>shade-ob-partition-calculator</artifactId>
@ -72,8 +93,13 @@
<systemPath>${pom.basedir}/src/main/libs/shade-ob-partition-calculator-1.0-SNAPSHOT.jar</systemPath>
</dependency>
<dependency>
<groupId>com.google.guava</groupId>
<artifactId>guava</artifactId>
<version>27.0-jre</version>
</dependency>
<dependency>
<groupId>log4j</groupId>
<artifactId>log4j</artifactId>
<version>1.2.16</version>
@ -89,6 +115,152 @@
<version>4.11</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>com.oceanbase</groupId>
<artifactId>obkv-table-client</artifactId>
<version>1.2.6</version>
<exclusions>
<exclusion>
<groupId>com.alibaba</groupId>
<artifactId>fastjson</artifactId>
</exclusion>
<exclusion>
<!-- add dependency in parent project -->
<groupId>org.slf4j</groupId>
<artifactId>slf4j-api</artifactId>
</exclusion>
<exclusion>
<!-- add dependency in parent project -->
<groupId>com.oceanbase</groupId>
<artifactId>oceanbase-client</artifactId>
</exclusion>
<exclusion>
<!-- add dependency in parent project -->
<groupId>com.google.guava</groupId>
<artifactId>guava</artifactId>
</exclusion>
<exclusion>
<groupId>commons-lang</groupId>
<artifactId>commons-lang</artifactId>
</exclusion>
<exclusion>
<groupId>com.alipay.sofa.common</groupId>
<artifactId>sofa-common-tools</artifactId>
</exclusion>
<!-- codec module -->
<exclusion>
<groupId>io.netty</groupId>
<artifactId>netty-codec-dns</artifactId>
</exclusion>
<exclusion>
<groupId>io.netty</groupId>
<artifactId>netty-codec-http</artifactId>
</exclusion>
<exclusion>
<groupId>io.netty</groupId>
<artifactId>netty-codec-http2</artifactId>
</exclusion>
<exclusion>
<groupId>io.netty</groupId>
<artifactId>netty-codec-haproxy</artifactId>
</exclusion>
<exclusion>
<groupId>io.netty</groupId>
<artifactId>netty-codec-mqtt</artifactId>
</exclusion>
<exclusion>
<groupId>io.netty</groupId>
<artifactId>netty-codec-memcache</artifactId>
</exclusion>
<exclusion>
<groupId>io.netty</groupId>
<artifactId>netty-codec-redis</artifactId>
</exclusion>
<exclusion>
<groupId>io.netty</groupId>
<artifactId>netty-codec-smtp</artifactId>
</exclusion>
<exclusion>
<groupId>io.netty</groupId>
<artifactId>netty-codec-socks</artifactId>
</exclusion>
<exclusion>
<groupId>io.netty</groupId>
<artifactId>netty-codec-stomp</artifactId>
</exclusion>
<exclusion>
<groupId>io.netty</groupId>
<artifactId>netty-codec-xml</artifactId>
</exclusion>
<!-- handler module -->
<exclusion>
<groupId>io.netty</groupId>
<artifactId>netty-handler-proxy</artifactId>
</exclusion>
<exclusion>
<groupId>io.netty</groupId>
<artifactId>netty-handler-ssl-ocsp</artifactId>
</exclusion>
<!-- resolver module -->
<exclusion>
<groupId>io.netty</groupId>
<artifactId>netty-resolver-dns</artifactId>
</exclusion>
<exclusion>
<groupId>io.netty</groupId>
<artifactId>netty-resolver-dns-classes-macos</artifactId>
</exclusion>
<exclusion>
<groupId>io.netty</groupId>
<artifactId>netty-resolver-dns-native-macos</artifactId>
</exclusion>
<!-- transport module -->
<exclusion>
<groupId>io.netty</groupId>
<artifactId>netty-transport-rxtx</artifactId>
</exclusion>
<exclusion>
<groupId>io.netty</groupId>
<artifactId>netty-transport-udt</artifactId>
</exclusion>
<exclusion>
<groupId>io.netty</groupId>
<artifactId>netty-transport-sctp</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>com.alipay.sofa.common</groupId>
<artifactId>sofa-common-tools</artifactId>
<version>1.3.11</version>
<exclusions>
<exclusion>
<!-- add dependency in parent project -->
<groupId>org.slf4j</groupId>
<artifactId>slf4j-api</artifactId>
</exclusion>
<exclusion>
<!-- add dependency in parent project -->
<groupId>com.google.guava</groupId>
<artifactId>guava</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>com.alibaba</groupId>
<artifactId>fastjson</artifactId>
<version>1.2.83</version>
</dependency>
<dependency>
<groupId>commons-lang</groupId>
<artifactId>commons-lang</artifactId>
<version>2.6</version>
</dependency>
<dependency>
<groupId>mysql</groupId>
<artifactId>mysql-connector-java</artifactId>
<version>${mysql.driver.version}</version>
</dependency>
</dependencies>
<build>

View File

@ -39,6 +39,13 @@
</includes>
<outputDirectory>datax</outputDirectory>
</fileSet>
<fileSet>
<directory>obhbasereader/target/datax/</directory>
<includes>
<include>**/*.*</include>
</includes>
<outputDirectory>datax</outputDirectory>
</fileSet>
<fileSet>
<directory>drdsreader/target/datax/</directory>
<includes>
@ -476,6 +483,13 @@
</includes>
<outputDirectory>datax</outputDirectory>
</fileSet>
<fileSet>
<directory>obhbasewriter/target/datax/</directory>
<includes>
<include>**/*.*</include>
</includes>
<outputDirectory>datax</outputDirectory>
</fileSet>
<fileSet>
<directory>gdbwriter/target/datax/</directory>
<includes>

View File

@ -33,6 +33,17 @@
<version>${mysql.driver.version}</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>com.oceanbase</groupId>
<artifactId>oceanbase-client</artifactId>
<version>2.4.11</version>
<exclusions>
<exclusion>
<groupId>com.google.guava</groupId>
<artifactId>guava</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-api</artifactId>

View File

@ -16,6 +16,8 @@ public class ObVersion implements Comparable<ObVersion> {
private int patchNumber;
public static final ObVersion V2276 = valueOf("2.2.76");
public static final ObVersion V2252 = valueOf("2.2.52");
public static final ObVersion V3 = valueOf("3.0.0.0");
public static final ObVersion V4000 = valueOf("4.0.0.0");
private static final ObVersion DEFAULT_VERSION =

View File

@ -0,0 +1,37 @@
package com.alibaba.datax.plugin.rdbms.util;
public class SplitedSlice {
private String begin;
private String end;
private String range;
public SplitedSlice(String begin, String end, String range) {
this.begin = begin;
this.end = end;
this.range = range;
}
public String getBegin() {
return begin;
}
public void setBegin(String begin) {
this.begin = begin;
}
public String getEnd() {
return end;
}
public void setEnd(String end) {
this.end = end;
}
public String getRange() {
return range;
}
public void setRange(String range) {
this.range = range;
}
}

View File

@ -55,6 +55,7 @@
<module>oraclereader</module>
<module>cassandrareader</module>
<module>oceanbasev10reader</module>
<module>obhbasereader</module>
<module>rdbmsreader</module>
<module>odpsreader</module>
@ -93,6 +94,7 @@
<module>kingbaseeswriter</module>
<module>adswriter</module>
<module>oceanbasev10writer</module>
<module>obhbasewriter</module>
<module>adbpgwriter</module>
<module>hologresjdbcwriter</module>
<module>rdbmswriter</module>

View File

@ -1,92 +1,61 @@
{
"core":{
"transport":{
"channel":{
"speed":{
"byte":10485760
"core": {
"transport": {
"channel": {
"speed": {
"byte": 10485760
}
}
}
},
"job":{
"content":[
"job": {
"content": [
{
"reader":{
"name":"streamreader",
"parameter":{
"column":[
{
"type":"string",
"value":"DataX"
},
{
"type":"int",
"value":19890604
},
{
"type":"date",
"value":"1989-06-04 00:00:00"
},
{
"type":"bool",
"value":true
},
{
"type":"string",
"value":"test"
}
],
"sliceRecordCount":1000000
}
},
"writer":{
"name":"selectdbwriter",
"parameter":{
"loadUrl":[
"reader": {},
"writer": {
"name": "selectdbwriter",
"parameter": {
"loadUrl": [
"xxx:35871"
],
"loadProps":{
"file.type":"json",
"file.strip_outer_array":"true"
"loadProps": {
"file.type": "json",
"file.strip_outer_array": "true"
},
"database":"db1",
"column":[
"database": "db1",
"column": [
"k1",
"k2",
"k3",
"k4",
"k5"
],
"username":"admin",
"password":"SelectDB2022",
"postSql":[
],
"preSql":[
],
"connection":[
"username": "admin",
"password": "SelectDB2022",
"postSql": [],
"preSql": [],
"connection": [
{
"jdbcUrl":"jdbc:mysql://xxx:32386/cl_test",
"table":[
"jdbcUrl": "jdbc:mysql://xxx:32386/cl_test",
"table": [
"test_selectdb"
],
"selectedDatabase":"cl_test"
"selectedDatabase": "cl_test"
}
],
"maxBatchRows":200000,
"batchSize":53687091200
"maxBatchRows": 200000,
"batchSize": 53687091200
}
}
}
],
"setting":{
"errorLimit":{
"percentage":0.02,
"record":0
"setting": {
"errorLimit": {
"percentage": 0.02,
"record": 0
},
"speed":{
"byte":10485760
"speed": {
"byte": 10485760
}
}
}

View File

@ -47,7 +47,7 @@ dx_replace(1,"5","10","****") column 1的value为“dataxTest”=>"datax****"
4. dx_filter 关联filter暂不支持即多个字段的联合判断函参太过复杂用户难以使用。
* 参数:
* 第一个参数字段编号对应record中第几个字段。
* 第二个参数:运算符,支持下运算符like, not like, >, =, <, >=, !=, <=
* 第二个参数:运算符,支持下运算符like, not like, >, =, <, >=, !=, <=
* 第三个参数正则表达式java正则表达式、值。
* 返回:
* 如果匹配正则表达式返回Null表示过滤该行。不匹配表达式时表示保留该行。注意是该行。对于>=<都是对字段直接compare的结果.
@ -145,11 +145,11 @@ String code3 = "Column column = record.getColumn(1);\n" +
"type": "string"
},
{
"value": 19890604,
"value": 1724154616370,
"type": "long"
},
{
"value": "1989-06-04 00:00:00",
"value": "2024-01-01 00:00:00",
"type": "date"
},
{
@ -157,11 +157,11 @@ String code3 = "Column column = record.getColumn(1);\n" +
"type": "bool"
},
{
"value": "test",
"value": "TestRawData",
"type": "bytes"
}
],
"sliceRecordCount": 100000
"sliceRecordCount": 100
}
},
"writer": {
@ -174,38 +174,44 @@ String code3 = "Column column = record.getColumn(1);\n" +
"transformer": [
{
"name": "dx_substr",
"parameter":
{
"columnIndex":5,
"paras":["1","3"]
}
"parameter": {
"columnIndex": 5,
"paras": [
"1",
"3"
]
}
},
{
"name": "dx_replace",
"parameter":
{
"columnIndex":4,
"paras":["3","4","****"]
}
"parameter": {
"columnIndex": 4,
"paras": [
"3",
"4",
"****"
]
}
},
{
"name": "dx_digest",
"parameter":
{
"columnIndex":3,
"paras":["md5", "toLowerCase"]
}
"parameter": {
"columnIndex": 3,
"paras": [
"md5",
"toLowerCase"
]
}
},
{
"name": "dx_groovy",
"parameter":
{
"code": "//groovy code//",
"extraPackage":[
"import somePackage1;",
"import somePackage2;"
]
}
"parameter": {
"code": "//groovy code//",
"extraPackage": [
"import somePackage1;",
"import somePackage2;"
]
}
}
]
}