mirror of
https://github.com/apache/sqoop.git
synced 2025-05-03 02:20:24 +08:00
SQOOP-435: Avro import should write the Schema to a file
(James Anderson via Jarek Jarcec Cecho)
This commit is contained in:
parent
8407118126
commit
a555a1f31a
@ -18,9 +18,12 @@
|
||||
|
||||
package org.apache.sqoop.mapreduce;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.sql.SQLException;
|
||||
|
||||
import org.apache.avro.Schema;
|
||||
import org.apache.commons.io.FileUtils;
|
||||
import org.apache.commons.logging.Log;
|
||||
import org.apache.commons.logging.LogFactory;
|
||||
import org.apache.hadoop.io.LongWritable;
|
||||
@ -32,6 +35,7 @@
|
||||
import org.apache.hadoop.mapreduce.OutputFormat;
|
||||
import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat;
|
||||
import org.apache.sqoop.mapreduce.hcat.SqoopHCatUtilities;
|
||||
|
||||
import com.cloudera.sqoop.SqoopOptions;
|
||||
import com.cloudera.sqoop.config.ConfigurationHelper;
|
||||
import com.cloudera.sqoop.lib.LargeObjectLoader;
|
||||
@ -83,12 +87,35 @@ protected void configureMapper(Job job, String tableName,
|
||||
AvroSchemaGenerator generator = new AvroSchemaGenerator(options,
|
||||
connManager, tableName);
|
||||
Schema schema = generator.generate();
|
||||
|
||||
try {
|
||||
writeAvroSchema(schema);
|
||||
} catch (final IOException e) {
|
||||
LOG.error("Error while writing Avro schema.", e);
|
||||
}
|
||||
|
||||
AvroJob.setMapOutputSchema(job.getConfiguration(), schema);
|
||||
}
|
||||
|
||||
job.setMapperClass(getMapperClass());
|
||||
}
|
||||
|
||||
private void writeAvroSchema(final Schema schema) throws IOException {
|
||||
// Generate schema in JAR output directory.
|
||||
final File schemaFile = new File(options.getJarOutputDir(), schema.getName() + ".avsc");
|
||||
|
||||
LOG.info("Writing Avro schema file: " + schemaFile);
|
||||
FileUtils.forceMkdir(schemaFile.getParentFile());
|
||||
FileUtils.writeStringToFile(schemaFile, schema.toString(true), null);
|
||||
|
||||
// Copy schema to code output directory.
|
||||
try {
|
||||
FileUtils.moveFileToDirectory(schemaFile, new File(options.getCodeOutputDir()), true);
|
||||
} catch (final IOException e) {
|
||||
LOG.debug("Could not move Avro schema file to code output directory.", e);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Class<? extends Mapper> getMapperClass() {
|
||||
if (options.getHCatTableName() != null) {
|
||||
|
@ -18,6 +18,7 @@
|
||||
|
||||
package com.cloudera.sqoop;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.nio.ByteBuffer;
|
||||
import java.sql.SQLException;
|
||||
@ -157,6 +158,8 @@ private void avroImportTestHelper(String[] extraArgs, String codec)
|
||||
if (codec != null) {
|
||||
assertEquals(codec, reader.getMetaString(DataFileConstants.CODEC));
|
||||
}
|
||||
|
||||
checkSchemaFile(schema);
|
||||
}
|
||||
|
||||
public void testOverrideTypeMapping() throws IOException {
|
||||
@ -235,4 +238,9 @@ private DataFileReader<GenericRecord> read(Path filename) throws IOException {
|
||||
return new DataFileReader<GenericRecord>(fsInput, datumReader);
|
||||
}
|
||||
|
||||
private void checkSchemaFile(final Schema schema) throws IOException {
|
||||
final File schemaFile = new File(schema.getName() + ".avsc");
|
||||
assertTrue(schemaFile.exists());
|
||||
assertEquals(schema, new Schema.Parser().parse(schemaFile));
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user