From c2027ed102085f6e789f77ef1342a7df1dc750f4 Mon Sep 17 00:00:00 2001 From: dingbo Date: Fri, 19 Nov 2021 16:38:55 +0800 Subject: [PATCH] migrate datax.py to python3, and compatible with python2 --- core/src/main/bin/datax.py | 114 +++++++++++++++++++++---------------- userGuid.md | 2 +- 2 files changed, 65 insertions(+), 51 deletions(-) diff --git a/core/src/main/bin/datax.py b/core/src/main/bin/datax.py index 1099ed3a..4811ae8d 100755 --- a/core/src/main/bin/datax.py +++ b/core/src/main/bin/datax.py @@ -1,23 +1,26 @@ #!/usr/bin/env python # -*- coding:utf-8 -*- -import sys -import os -import signal -import subprocess -import time -import re -import socket -import json -from optparse import OptionParser -from optparse import OptionGroup -from string import Template import codecs +import json +import os import platform +import re +import signal +import socket +import subprocess +import sys +import time +from optparse import OptionGroup +from optparse import OptionParser +from string import Template + +ispy2 = sys.version_info.major == 2 def isWindows(): return platform.system() == 'Windows' + DATAX_HOME = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) DATAX_VERSION = 'DATAX-OPENSOURCE-3.0' @@ -52,13 +55,19 @@ def getLocalIp(): def suicide(signum, e): global child_process - print >> sys.stderr, "[Error] DataX receive unexpected signal %d, starts to suicide." % (signum) + if ispy2: + print >> sys.stderr, "[Error] DataX receive unexpected signal %d, starts to suicide." % (signum) + else: + print("[Error] DataX receive unexpected signal %d, starts to suicide." % (signum), sys.stderr) if child_process: child_process.send_signal(signal.SIGQUIT) time.sleep(1) child_process.kill() - print >> sys.stderr, "DataX Process was killed ! you did ?" + if ispy2: + print >> sys.stderr, "DataX Process was killed ! you did ?" + else: + print("DataX Process was killed ! you did ?", sys.stderr) sys.exit(RET_STATE["KILL"]) @@ -92,10 +101,10 @@ def getOptionParser(): 'if you have mutiple parameters: -p"-DtableName=your-table-name -DcolumnName=your-column-name".' 'Note: you should config in you job tableName with ${tableName}.') prodEnvOptionGroup.add_option("-r", "--reader", metavar="", - action="store", dest="reader",type="string", + action="store", dest="reader", type="string", help='View job config[reader] template, eg: mysqlreader,streamreader') prodEnvOptionGroup.add_option("-w", "--writer", metavar="", - action="store", dest="writer",type="string", + action="store", dest="writer", type="string", help='View job config[writer] template, eg: mysqlwriter,streamwriter') parser.add_option_group(prodEnvOptionGroup) @@ -108,45 +117,50 @@ def getOptionParser(): parser.add_option_group(devEnvOptionGroup) return parser + def generateJobConfigTemplate(reader, writer): - readerRef = "Please refer to the %s document:\n https://github.com/alibaba/DataX/blob/master/%s/doc/%s.md \n" % (reader,reader,reader) - writerRef = "Please refer to the %s document:\n https://github.com/alibaba/DataX/blob/master/%s/doc/%s.md \n " % (writer,writer,writer) - print readerRef - print writerRef + readerRef = "Please refer to the %s document:\n https://github.com/alibaba/DataX/blob/master/%s/doc/%s.md \n" % ( + reader, reader, reader) + writerRef = "Please refer to the %s document:\n https://github.com/alibaba/DataX/blob/master/%s/doc/%s.md \n " % ( + writer, writer, writer) + print(readerRef) + print(writerRef) jobGuid = 'Please save the following configuration as a json file and use\n python {DATAX_HOME}/bin/datax.py {JSON_FILE_NAME}.json \nto run the job.\n' - print jobGuid - jobTemplate={ - "job": { - "setting": { - "speed": { - "channel": "" - } - }, - "content": [ - { - "reader": {}, - "writer": {} - } - ] - } + print(jobGuid) + jobTemplate = { + "job": { + "setting": { + "speed": { + "channel": "" + } + }, + "content": [ + { + "reader": {}, + "writer": {} + } + ] + } } - readerTemplatePath = "%s/plugin/reader/%s/plugin_job_template.json" % (DATAX_HOME,reader) - writerTemplatePath = "%s/plugin/writer/%s/plugin_job_template.json" % (DATAX_HOME,writer) + readerTemplatePath = "%s/plugin/reader/%s/plugin_job_template.json" % (DATAX_HOME, reader) + writerTemplatePath = "%s/plugin/writer/%s/plugin_job_template.json" % (DATAX_HOME, writer) try: - readerPar = readPluginTemplate(readerTemplatePath); - except Exception, e: - print "Read reader[%s] template error: can\'t find file %s" % (reader,readerTemplatePath) + readerPar = readPluginTemplate(readerTemplatePath) + except: + print("Read reader[%s] template error: can\'t find file %s" % (reader, readerTemplatePath)) try: - writerPar = readPluginTemplate(writerTemplatePath); - except Exception, e: - print "Read writer[%s] template error: : can\'t find file %s" % (writer,writerTemplatePath) - jobTemplate['job']['content'][0]['reader'] = readerPar; - jobTemplate['job']['content'][0]['writer'] = writerPar; - print json.dumps(jobTemplate, indent=4, sort_keys=True) + writerPar = readPluginTemplate(writerTemplatePath) + except: + print("Read writer[%s] template error: : can\'t find file %s" % (writer, writerTemplatePath)) + jobTemplate['job']['content'][0]['reader'] = readerPar + jobTemplate['job']['content'][0]['writer'] = writerPar + print(json.dumps(jobTemplate, indent=4, sort_keys=True)) + def readPluginTemplate(plugin): with open(plugin, 'r') as f: - return json.load(f) + return json.load(f) + def isUrl(path): if not path: @@ -168,7 +182,7 @@ def buildStartCommand(options, args): if options.remoteDebug: tempJVMCommand = tempJVMCommand + " " + REMOTE_DEBUG_CONFIG - print 'local ip: ', getLocalIp() + print('local ip: ', getLocalIp()) if options.loglevel: tempJVMCommand = tempJVMCommand + " " + ("-Dloglevel=%s" % (options.loglevel)) @@ -198,11 +212,11 @@ def buildStartCommand(options, args): def printCopyright(): - print ''' + print(''' DataX (%s), From Alibaba ! Copyright (C) 2010-2017, Alibaba Group. All Rights Reserved. -''' % DATAX_VERSION +''' % DATAX_VERSION) sys.stdout.flush() @@ -211,7 +225,7 @@ if __name__ == "__main__": parser = getOptionParser() options, args = parser.parse_args(sys.argv[1:]) if options.reader is not None and options.writer is not None: - generateJobConfigTemplate(options.reader,options.writer) + generateJobConfigTemplate(options.reader, options.writer) sys.exit(RET_STATE['OK']) if len(args) != 1: parser.print_help() diff --git a/userGuid.md b/userGuid.md index 153c8111..16771a5e 100644 --- a/userGuid.md +++ b/userGuid.md @@ -10,7 +10,7 @@ DataX本身作为数据同步框架,将不同数据源的同步抽象为从源 - Linux - [JDK(1.8以上,推荐1.8) ](http://www.oracle.com/technetwork/cn/java/javase/downloads/index.html) -- [Python(推荐Python2.6.X) ](https://www.python.org/downloads/) +- [Python(2或3都可以) ](https://www.python.org/downloads/) - [Apache Maven 3.x](https://maven.apache.org/download.cgi) (Compile DataX) # Quick Start