From 6dd6a4fc863074d570919b183fdf5c20e86c5e0b Mon Sep 17 00:00:00 2001 From: Szabolcs Vasas Date: Wed, 31 Oct 2018 16:48:54 +0100 Subject: [PATCH] SQOOP-2949: SQL Syntax error when split-by column is of character type and min or max value has single quote inside it (Fero Szabo via Szabolcs Vasas) --- .../sqoop/mapreduce/db/TextSplitter.java | 17 +- .../sqoop/importjob/SplitByImportTest.java | 153 ++++++++++++++++++ ...ericImportJobSplitByTestConfiguration.java | 78 +++++++++ 3 files changed, 245 insertions(+), 3 deletions(-) create mode 100644 src/test/org/apache/sqoop/importjob/SplitByImportTest.java create mode 100644 src/test/org/apache/sqoop/importjob/configuration/GenericImportJobSplitByTestConfiguration.java diff --git a/src/java/org/apache/sqoop/mapreduce/db/TextSplitter.java b/src/java/org/apache/sqoop/mapreduce/db/TextSplitter.java index 22bbfe68..f8d92287 100644 --- a/src/java/org/apache/sqoop/mapreduce/db/TextSplitter.java +++ b/src/java/org/apache/sqoop/mapreduce/db/TextSplitter.java @@ -167,24 +167,35 @@ public List split(int numSplits, String minString, // Convert the BigDecimal splitPoints into their string representations. for (BigDecimal bd : splitPoints) { - splitStrings.add(commonPrefix + bigDecimalToString(bd)); + splitStrings.add(escapeSingleQuotesInSql(commonPrefix + bigDecimalToString(bd))); } // Make sure that our user-specified boundaries are the first and last // entries in the array. if (splitStrings.size() == 0 || !splitStrings.get(0).equals(commonPrefix + minString)) { - splitStrings.add(0, commonPrefix + minString); + splitStrings.add(0, escapeSingleQuotesInSql(commonPrefix + minString)); } if (splitStrings.size() == 1 || !splitStrings.get(splitStrings.size() - 1).equals( commonPrefix + maxString)) { - splitStrings.add(commonPrefix + maxString); + splitStrings.add(escapeSingleQuotesInSql(commonPrefix + maxString)); } return splitStrings; } + /** + * Return string after escaping single quotes + */ + private String escapeSingleQuotesInSql (String val) { + if (val == null) { + return null; + } + + return val.replaceAll("'", "''"); + } + private static final BigDecimal ONE_PLACE = new BigDecimal(65536); // Maximum number of characters to convert. This is to prevent rounding diff --git a/src/test/org/apache/sqoop/importjob/SplitByImportTest.java b/src/test/org/apache/sqoop/importjob/SplitByImportTest.java new file mode 100644 index 00000000..7977c0b0 --- /dev/null +++ b/src/test/org/apache/sqoop/importjob/SplitByImportTest.java @@ -0,0 +1,153 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.sqoop.importjob; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.Path; +import org.apache.sqoop.SqoopOptions; +import org.apache.sqoop.importjob.configuration.GenericImportJobSplitByTestConfiguration; +import org.apache.sqoop.testutil.ArgumentArrayBuilder; +import org.apache.sqoop.testutil.ImportJobTestCase; +import org.apache.sqoop.testutil.adapter.DatabaseAdapter; +import org.apache.sqoop.testutil.adapter.MSSQLServerDatabaseAdapter; +import org.apache.sqoop.testutil.adapter.MySqlDatabaseAdapter; +import org.apache.sqoop.testutil.adapter.OracleDatabaseAdapter; +import org.apache.sqoop.testutil.adapter.PostgresDatabaseAdapter; +import org.apache.sqoop.util.ParquetReader; +import org.junit.After; +import org.junit.Before; +import org.junit.Rule; +import org.junit.Test; +import org.junit.rules.ExpectedException; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; +import org.junit.runners.Parameterized.Parameters; + +import java.io.IOException; +import java.sql.SQLException; +import java.util.List; + +import static java.util.Arrays.asList; +import static org.junit.Assert.assertEquals; + +@RunWith(Parameterized.class) +public class SplitByImportTest extends ImportJobTestCase { + + public static final Log LOG = LogFactory.getLog(SplitByImportTest.class.getName()); + + private Configuration conf = new Configuration(); + + private final ImportJobTestConfiguration configuration; + private final DatabaseAdapter adapter; + + @Parameters(name = "Adapter: {0}| Config: {1}") + public static Iterable testConfigurations() { + GenericImportJobSplitByTestConfiguration testConfiguration = new GenericImportJobSplitByTestConfiguration(); + return asList( + new Object[] {new OracleDatabaseAdapter(), testConfiguration}, + new Object[] {new PostgresDatabaseAdapter(), testConfiguration}, + new Object[] {new MSSQLServerDatabaseAdapter(), testConfiguration}, + new Object[] {new MySqlDatabaseAdapter(), testConfiguration} + ); + } + + public SplitByImportTest(DatabaseAdapter adapter, ImportJobTestConfiguration configuration) { + this.adapter = adapter; + this.configuration = configuration; + } + + @Rule + public ExpectedException thrown = ExpectedException.none(); + + @Override + protected Configuration getConf() { + return conf; + } + + @Override + protected boolean useHsqldbTestServer() { + return false; + } + + @Override + protected String getConnectString() { + return adapter.getConnectionString(); + } + + @Override + protected SqoopOptions getSqoopOptions(Configuration conf) { + SqoopOptions opts = new SqoopOptions(conf); + adapter.injectConnectionParameters(opts); + return opts; + } + + @Override + protected void dropTableIfExists(String table) throws SQLException { + adapter.dropTableIfExists(table, getManager()); + } + + @Before + public void setUp() { + super.setUp(); + String[] names = configuration.getNames(); + String[] types = configuration.getTypes(); + createTableWithColTypesAndNames(names, types, new String[0]); + List inputData = configuration.getSampleData(); + for (String[] input : inputData) { + insertIntoTable(names, types, input); + } + } + + @After + public void tearDown() { + try { + dropTableIfExists(getTableName()); + } catch (SQLException e) { + LOG.warn("Error trying to drop table on tearDown: " + e); + } + super.tearDown(); + } + + private ArgumentArrayBuilder getArgsBuilder() { + return new ArgumentArrayBuilder() + .withCommonHadoopFlags(true) + .withProperty("org.apache.sqoop.splitter.allow_text_splitter","true") + .withOption("warehouse-dir", getWarehouseDir()) + .withOption("num-mappers", "2") + .withOption("table", getTableName()) + .withOption("connect", getConnectString()) + .withOption("split-by", GenericImportJobSplitByTestConfiguration.NAME_COLUMN) + .withOption("as-parquetfile"); + } + + @Test + public void testSplitBy() throws IOException { + ArgumentArrayBuilder builder = getArgsBuilder(); + String[] args = builder.build(); + runImport(args); + verifyParquetFile(); + } + + private void verifyParquetFile() { + ParquetReader reader = new ParquetReader(new Path(getWarehouseDir() + "/" + getTableName()), getConf()); + assertEquals(asList(configuration.getExpectedResults()), reader.readAllInCsvSorted()); + } +} diff --git a/src/test/org/apache/sqoop/importjob/configuration/GenericImportJobSplitByTestConfiguration.java b/src/test/org/apache/sqoop/importjob/configuration/GenericImportJobSplitByTestConfiguration.java new file mode 100644 index 00000000..f137b56b --- /dev/null +++ b/src/test/org/apache/sqoop/importjob/configuration/GenericImportJobSplitByTestConfiguration.java @@ -0,0 +1,78 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.sqoop.importjob.configuration; + +import org.apache.commons.lang3.StringUtils; +import org.apache.sqoop.importjob.ImportJobTestConfiguration; + +import java.util.ArrayList; +import java.util.List; + +/** + * This test configuration intends to cover the fact that oracle stores these types without padding them with 0s, + * therefore when importing into avro, one has to use the padding feature. + */ +public class GenericImportJobSplitByTestConfiguration implements ImportJobTestConfiguration { + + public static final String NAME_COLUMN = "NAME"; + public static final char SEPARATOR = ','; + + List data = new ArrayList<>(); + { + data.add(new String[]{"ID_1", "Mr T."}); + data.add(new String[]{"ID_2", "D'Artagnan"}); + data.add(new String[]{"ID_3", "Jean D'Arc"}); + data.add(new String[]{"ID_4", "Jeremy Renner"}); + } + + List escapedData = new ArrayList<>(); + { + escapedData.add(new String[]{"'ID_1'", "'Mr T.'"}); + escapedData.add(new String[]{"'ID_2'", "'D''Artagnan'"}); + escapedData.add(new String[]{"'ID_3'", "'Jean D''Arc'"}); + escapedData.add(new String[]{"'ID_4'", "'Jeremy Renner'"}); + } + + @Override + public String[] getTypes() { + return new String[]{"VARCHAR(20)", "VARCHAR(20)"}; + } + + @Override + public String[] getNames() { + return new String[]{"ID", NAME_COLUMN}; + } + + @Override + public List getSampleData() { + return new ArrayList<>(escapedData); + } + + @Override + public String[] getExpectedResults() { + return data.stream() + .map(element -> StringUtils.join(element, SEPARATOR)) + .toArray(String[]::new); + } + + @Override + public String toString() { + return getClass().getSimpleName(); + } +}