From 24e80b55bba090519e3194e9464374501cfb537c Mon Sep 17 00:00:00 2001 From: XuDaojie Date: Tue, 17 Aug 2021 10:43:33 +0800 Subject: [PATCH 1/2] =?UTF-8?q?feature:=20=E6=96=B0=E5=A2=9EDigestTransfor?= =?UTF-8?q?mer?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../transformer/DigestTransformer.java | 87 +++++++++++++++++++ .../GroovyTransformerStaticUtil.java | 8 ++ .../transformer/TransformerRegistry.java | 1 + 3 files changed, 96 insertions(+) create mode 100644 core/src/main/java/com/alibaba/datax/core/transport/transformer/DigestTransformer.java diff --git a/core/src/main/java/com/alibaba/datax/core/transport/transformer/DigestTransformer.java b/core/src/main/java/com/alibaba/datax/core/transport/transformer/DigestTransformer.java new file mode 100644 index 00000000..d2bf1431 --- /dev/null +++ b/core/src/main/java/com/alibaba/datax/core/transport/transformer/DigestTransformer.java @@ -0,0 +1,87 @@ +package com.alibaba.datax.core.transport.transformer; + +import com.alibaba.datax.common.element.Column; +import com.alibaba.datax.common.element.Record; +import com.alibaba.datax.common.element.StringColumn; +import com.alibaba.datax.common.exception.DataXException; +import com.alibaba.datax.transformer.Transformer; + +import org.apache.commons.codec.digest.DigestUtils; +import org.apache.commons.lang.StringUtils; + +import java.util.Arrays; + +/** + * no comments. + * + * @author XuDaojie + * @since 2021-08-16 + */ +public class DigestTransformer extends Transformer { + + private static final String MD5 = "md5"; + private static final String SHA1 = "sha1"; + private static final String TO_UPPER_CASE = "toUpperCase"; + private static final String TO_LOWER_CASE = "toLowerCase"; + + public DigestTransformer() { + setTransformerName("dx_digest"); + } + + @Override + public Record evaluate(Record record, Object... paras) { + + int columnIndex; + String type; + String charType; + + try { + if (paras.length != 3) { + throw new RuntimeException("dx_digest paras length must be 3"); + } + + columnIndex = (Integer) paras[0]; + type = (String) paras[1]; + charType = (String) paras[2]; + + if (!StringUtils.equalsIgnoreCase(MD5, type) && !StringUtils.equalsIgnoreCase(SHA1, type)) { + throw new RuntimeException("dx_digest paras index 1 must be md5 or sha1"); + } + if (!StringUtils.equalsIgnoreCase(TO_UPPER_CASE, charType) && !StringUtils.equalsIgnoreCase(TO_LOWER_CASE, charType)) { + throw new RuntimeException("dx_digest paras index 2 must be toUpperCase or toLowerCase"); + } + } catch (Exception e) { + throw DataXException.asDataXException(TransformerErrorCode.TRANSFORMER_ILLEGAL_PARAMETER, "paras:" + Arrays.asList(paras) + " => " + e.getMessage()); + } + + Column column = record.getColumn(columnIndex); + + try { + String oriValue = column.asString(); + + // 如果字段为空,作为空字符串处理 + if (oriValue == null) { + oriValue = ""; + } + String newValue; + if (MD5.equals(type)) { + newValue = DigestUtils.md5Hex(oriValue); + } else { + newValue = DigestUtils.sha1Hex(oriValue); + } + + if (TO_UPPER_CASE.equals(charType)) { + newValue = newValue.toUpperCase(); + } else { + newValue = newValue.toLowerCase(); + } + + record.setColumn(columnIndex, new StringColumn(newValue)); + + } catch (Exception e) { + throw DataXException.asDataXException(TransformerErrorCode.TRANSFORMER_RUN_EXCEPTION, e.getMessage(), e); + } + return record; + } + +} diff --git a/core/src/main/java/com/alibaba/datax/core/transport/transformer/GroovyTransformerStaticUtil.java b/core/src/main/java/com/alibaba/datax/core/transport/transformer/GroovyTransformerStaticUtil.java index 4c872993..487a8be8 100644 --- a/core/src/main/java/com/alibaba/datax/core/transport/transformer/GroovyTransformerStaticUtil.java +++ b/core/src/main/java/com/alibaba/datax/core/transport/transformer/GroovyTransformerStaticUtil.java @@ -1,10 +1,18 @@ package com.alibaba.datax.core.transport.transformer; +import org.apache.commons.codec.digest.DigestUtils; + /** * GroovyTransformer的帮助类,供groovy代码使用,必须全是static的方法 * Created by liqiang on 16/3/4. */ public class GroovyTransformerStaticUtil { + public static String md5(final String data) { + return DigestUtils.md5Hex(data); + } + public static String sha1(final String data) { + return DigestUtils.sha1Hex(data); + } } diff --git a/core/src/main/java/com/alibaba/datax/core/transport/transformer/TransformerRegistry.java b/core/src/main/java/com/alibaba/datax/core/transport/transformer/TransformerRegistry.java index 96a0d988..3c625153 100644 --- a/core/src/main/java/com/alibaba/datax/core/transport/transformer/TransformerRegistry.java +++ b/core/src/main/java/com/alibaba/datax/core/transport/transformer/TransformerRegistry.java @@ -36,6 +36,7 @@ public class TransformerRegistry { registTransformer(new ReplaceTransformer()); registTransformer(new FilterTransformer()); registTransformer(new GroovyTransformer()); + registTransformer(new DigestTransformer()); } public static void loadTransformerFromLocalStorage() { From 9af62e5aa063ace4d845c5044866a40fb80585d9 Mon Sep 17 00:00:00 2001 From: XuDaojie Date: Fri, 20 Aug 2021 10:38:10 +0800 Subject: [PATCH 2/2] update: transformer.md --- transformer/doc/transformer.md | 26 +++++++++++++++++++++++--- 1 file changed, 23 insertions(+), 3 deletions(-) diff --git a/transformer/doc/transformer.md b/transformer/doc/transformer.md index 247ab39b..260c0fb6 100644 --- a/transformer/doc/transformer.md +++ b/transformer/doc/transformer.md @@ -59,7 +59,17 @@ dx_replace(1,"5","10","****") column 1的value为“dataxTest”=>"data****" dx_filter(1,"like","dataTest") dx_filter(1,">=","10") ``` -5. dx_groovy +5. dx_digest +* 参数:3个 + * 第一个参数:字段编号,对应record中第几个字段。 + * 第二个参数:hash类型,md5、sha1 + * 第三个参数:hash值大小写 toUpperCase(大写)、toLowerCase(小写) +* 返回: 返回指定类型的hashHex,如果字段为空,则转为空字符串,再返回对应hashHex +* 举例: +``` +dx_digest(1,"md5","toUpperCase"), column 1的值为 xyzzzzz => 9CDFFC4FA4E45A99DB8BBCD762ACFFA2 +``` +6. dx_groovy * 参数。 * 第一个参数: groovy code * 第二个参数(列表或者为空):extraPackage @@ -67,7 +77,9 @@ dx_filter(1,">=","10") * dx_groovy只能调用一次。不能多次调用。 * groovy code中支持java.lang, java.util的包,可直接引用的对象有record,以及element下的各种column(BoolColumn.class,BytesColumn.class,DateColumn.class,DoubleColumn.class,LongColumn.class,StringColumn.class)。不支持其他包,如果用户有需要用到其他包,可设置extraPackage,注意extraPackage不支持第三方jar包。 * groovy code中,返回更新过的Record(比如record.setColumn(columnIndex, new StringColumn(newValue));),或者null。返回null表示过滤此行。 - * 用户可以直接调用静态的Util方式(GroovyTransformerStaticUtil),目前GroovyTransformerStaticUtil的方法列表 (按需补充): + * 用户可以直接调用静态的Util方式(GroovyTransformerStaticUtil),目前GroovyTransformerStaticUtil的方法列表: + * md5(String):String + * sha1(String):String * 举例: ``` groovy 实现的subStr: @@ -109,7 +121,7 @@ String code3 = "Column column = record.getColumn(1);\n" + ``` ## Job定义 -* 本例中,配置3个UDF。 +* 本例中,配置4个UDF。 ``` { @@ -176,6 +188,14 @@ String code3 = "Column column = record.getColumn(1);\n" + "paras":["3","4","****"] } }, + { + "name": "dx_digest", + "parameter": + { + "columnIndex":3, + "paras":["md5", "toLowerCase"] + } + }, { "name": "dx_groovy", "parameter":