This commit is contained in:
thinking24k 2025-04-10 16:20:58 +08:00 committed by GitHub
commit fa666eb76d
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 126 additions and 119 deletions

View File

@ -10,116 +10,120 @@ MongoDBReader通过Datax框架从MongoDB并行的读取数据通过主控的J
#### 3 功能说明
* 该示例从MongoDB读一份数据到ODPS。
{
"job": {
"setting": {
"speed": {
"channel": 2
}
},
"content": [
{
"reader": {
"name": "mongodbreader",
"parameter": {
"address": ["127.0.0.1:27017"],
"userName": "",
"userPassword": "",
"dbName": "tag_per_data",
"collectionName": "tag_data12",
"column": [
{
"name": "unique_id",
"type": "string"
},
{
"name": "sid",
"type": "string"
},
{
"name": "user_id",
"type": "string"
},
{
"name": "auction_id",
"type": "string"
},
{
"name": "content_type",
"type": "string"
},
{
"name": "pool_type",
"type": "string"
},
{
"name": "frontcat_id",
"type": "Array",
"spliter": ""
},
{
"name": "categoryid",
"type": "Array",
"spliter": ""
},
{
"name": "gmt_create",
"type": "string"
},
{
"name": "taglist",
"type": "Array",
"spliter": " "
},
{
"name": "property",
"type": "string"
},
{
"name": "scorea",
"type": "int"
},
{
"name": "scoreb",
"type": "int"
},
{
"name": "scorec",
"type": "int"
}
]
}
},
"writer": {
"name": "odpswriter",
"parameter": {
"project": "tb_ai_recommendation",
"table": "jianying_tag_datax_read_test01",
"column": [
"unique_id",
"sid",
"user_id",
"auction_id",
"content_type",
"pool_type",
"frontcat_id",
"categoryid",
"gmt_create",
"taglist",
"property",
"scorea",
"scoreb"
],
"accessId": "**************",
"accessKey": "********************",
"truncate": true,
"odpsServer": "xxx/api",
"tunnelServer": "xxx"
}
}
}
]
}
{
"job": {
"setting": {
"speed": {
"channel": 2
}
},
"content": [
{
"reader": {
"name": "mongodbreader",
"parameter": {
"address": ["127.0.0.1:27017"],
"userName": "",
"userPassword": "",
"dbName": "tag_per_data",
"collectionName": "tag_data12",
"column": [
{
"name": "unique_id",
"type": "string"
},
{
"name": "sid",
"type": "string"
},
{
"name": "user_id",
"type": "string"
},
{
"name": "auction_id",
"type": "string"
},
{
"name": "content_type",
"type": "string"
},
{
"name": "pool_type",
"type": "string"
},
{
"name": "frontcat_id",
"type": "Array",
"spliter": ""
},
{
"name": "categoryid",
"type": "Array",
"spliter": ""
},
{
"name": "gmt_create",
"type": "string"
},
{
"name": "taglist",
"type": "Array",
"spliter": " "
},
{
"name": "property",
"type": "string"
},
{
"name": "scorea",
"type": "int"
},
{
"name": "scoreb",
"type": "int"
},
{
"name": "scorec",
"type": "int"
},
{
"name": "appid_list",
"type": "json"
}
]
}
},
"writer": {
"name": "odpswriter",
"parameter": {
"project": "tb_ai_recommendation",
"table": "jianying_tag_datax_read_test01",
"column": [
"unique_id",
"sid",
"user_id",
"auction_id",
"content_type",
"pool_type",
"frontcat_id",
"categoryid",
"gmt_create",
"taglist",
"property",
"scorea",
"scoreb"
],
"accessId": "**************",
"accessKey": "********************",
"truncate": true,
"odpsServer": "xxx/api",
"tunnelServer": "xxx"
}
}
}
]
}
}
#### 4 参数说明
@ -133,17 +137,18 @@ MongoDBReader通过Datax框架从MongoDB并行的读取数据通过主控的J
* typeColumn的类型。【选填】
* splitter因为MongoDB支持数组类型但是Datax框架本身不支持数组类型所以mongoDB读出来的数组类型要通过这个分隔符合并成字符串。【选填】
* query: MongoDB的额外查询条件。【选填】
* json因为MongoDB支持子文档和数组类型子文档但是Datax框架本身不支持所以mongoDB读出来的数据通过JSON序列化成字符串。
#### 5 类型转换
| DataX 内部类型| MongoDB 数据类型 |
| -------- | ----- |
| Long | int, Long |
| Double | double |
| String | string, array |
| Date | date |
| Boolean | boolean |
| Bytes | bytes |
| DataX 内部类型 | MongoDB 数据类型 |
|------------|---------------|
| Long | int, Long |
| Double | double |
| String | string, array |
| Date | date |
| Boolean | boolean |
| Bytes | bytes |
| Object | json |
#### 6 性能报告

View File

@ -173,6 +173,8 @@ public class MongoDBReader extends Reader {
String tempArrayStr = Joiner.on(splitter).join(array);
record.addColumn(new StringColumn(tempArrayStr));
}
}else if ("json".equalsIgnoreCase(column.getString("type"))) {
record.addColumn(new StringColumn(JSON.toJSONString(tempCol)));
} else {
record.addColumn(new StringColumn(tempCol.toString()));
}