Skip to content

Instantly share code, notes, and snippets.

@shatestest
Last active May 3, 2019 13:32
Show Gist options
  • Save shatestest/34c6f7fda3e24499080b6d03e2bbb45d to your computer and use it in GitHub Desktop.
Save shatestest/34c6f7fda3e24499080b6d03e2bbb45d to your computer and use it in GitHub Desktop.
Issue with setting jdbc/partitionColumn type to Date in spark 2.4.1
Map(driver -> oracle.jdbc.OracleDriver,
numPartitions -> 2000,
timestampFormat -> yyyy-mm-dd hh:mm:ss,
upperBound -> 2019-05-01 23:59:59.0,
lowerBound -> 2002-03-31 00:00:00.0,
partitionColumn -> DATA_DATE,
url -> jdbc:oracle:thin:@,
user -> user1, password -> pwd1,
customSchema -> DATA_DATE TIMESTAMP, DATA_ITEM_VALUE_NUMERIC DECIMAL(38, 15) , MODEL_FAMILY_ID INTEGER , FISCAL_YEAR INTEGER , FISCAL_QUARTER INTEGER,
fetchsize -> 10000)
public class DbUtils{
public static Dataset<Row> readOracleData( DataFrameReader ora_df_reader, String schema , String partitionColumn, String query, String customSchema) throws SQLException{
DataFrameReader local_ora_df_reader = ora_df_reader;
Map<String,String> oraConfOptions = ConfigUtils.getMoreOracleConfigOptions();
if(! oraConfOptions.isEmpty()){
local_ora_df_reader.options(oraConfOptions);
local_ora_df_reader.option("partitionColumn", partitionColumn);
}
if(!customSchema.isEmpty()){
local_ora_df_reader.option("customSchema", customSchema );
}
Dataset<Row> ss = local_ora_df_reader
.option("inferSchema", true)
.option("schema",schema)
.option("dbtable", query)
.load();
return ss;
}
}
java.lang.IllegalArgumentException: Timestamp format must be yyyy-mm-dd hh:mm:ss[.fffffffff]
at java.sql.Timestamp.valueOf(Timestamp.java:204)
at org.apache.spark.sql.execution.datasources.jdbc.JDBCRelation$.toInternalBoundValue(JDBCRelation.scala:179)
at org.apache.spark.sql.execution.datasources.jdbc.JDBCRelation$.columnPartition(JDBCRelation.scala:88)
at org.apache.spark.sql.execution.datasources.jdbc.JdbcRelationProvider.createRelation(JdbcRelationProvider.scala:36)
at org.apache.spark.sql.execution.datasources.DataSource.resolveRelation(DataSource.scala:318)
at org.apache.spark.sql.DataFrameReader.loadV1Source(DataFrameReader.scala:223)
at org.apache.spark.sql.DataFrameReader.load(DataFrameReader.scala:211)
at org.apache.spark.sql.DataFrameReader.load(DataFrameReader.scala:167)
at com.snp.utils.DbUtils.readOracleData(DbUtils.java:62)
class Extractor{
public void extract( DataFrameReader oraOptionDfConfig, SparkSession sparkSession , String keyspace,
String columnFamilyName , String fromDate , String toDate ) {
String o_query = getQuery(fromDate , toDate);
System.out.println(" dynamic query : " + o_query);
logger.info(" dynamic query : " + o_query);
/*
* EXTRACTION - PHASE
* Load the data from oracle for given schema and query.
*/
oraOptionDfConfig.option("lowerBound", "2002-03-31 00:00:00" );
oraOptionDfConfig.option("upperBound", "2019-05-01 23:59:59");
oraOptionDfConfig.option("numPartitions", 240);
// oraOptionDfConfig.option("timestampFormat", "yyyy-MM-dd HH:mm:ss"); // tried with this too
Dataset<Row> ora_raw_benchmark_model_vals_df = DbUtils.readOracleData(oraOptionDfConfig, ORACLE_SCHEMA_RSCS ,
PARTITION_COLUMN, o_query , BENCHMARK_MODEL_VALS_CUSTOM_SCHEMA);
}
}
root
|-- MODEL_FAMILY_ID: integer (nullable = true)
|-- DATA_DATE: timestamp (nullable = true)
|-- DATA_ITEM_CODE: string (nullable = true)
|-- FISCAL_YEAR: integer (nullable = true)
|-- FISCAL_QUARTER: integer (nullable = true)
|-- CREATE_DATE: timestamp (nullable = true)
|-- LAST_UPDATE_DATE: timestamp (nullable = true)
+---------------+-------------------+--------------+-----------+--------------+-------------------+----------------+
|MODEL_FAMILY_ID| DATA_DATE|DATA_ITEM_CODE|FISCAL_YEAR|FISCAL_QUARTER| CREATE_DATE|LAST_UPDATE_DATE|
+---------------+-------------------+--------------+-----------+--------------+-------------------+----------------+
| 3|2012-06-12 00:00:00| PD_4YR| null| null|2016-10-15 11:04:31| null|
| 3|2012-06-12 00:00:00| PD_4YR| null| null|2016-10-15 11:04:31| null|
| 3|2012-06-12 00:00:00| PD_4YR| null| null|2016-10-15 11:04:31| null|
| 3|2012-06-12 00:00:00| PD_4YR| null| null|2016-10-15 11:04:31| null|
| 3|2012-06-12 00:00:00| PD_4YR| null| null|2016-10-15 11:04:31| null|
| 3|2012-06-05 00:00:00| PD_4YR| null| null|2016-10-15 14:19:03| null|
| 3|2012-06-05 00:00:00| PD_4YR| null| null|2016-10-15 14:19:03| null|
| 3|2012-06-07 00:00:00| PD_4YR| null| null|2016-10-15 11:04:31| null|
| 3|2012-06-28 00:00:00| PD_4YR| null| null|2016-10-15 11:04:31| null|
| 3|2012-06-25 00:00:00| PD_4YR| null| null|2016-10-15 11:04:31| null|
| 3|2012-06-25 00:00:00| PD_4YR| null| null|2016-10-15 11:04:31| null|
| 3|2012-06-25 00:00:00| PD_4YR| null| null|2016-10-15 11:04:31| null|
| 3|2012-06-28 00:00:00| PD_4YR| null| null|2016-10-15 11:04:31| null|
| 3|2012-06-28 00:00:00| PD_4YR| null| null|2016-10-15 11:04:31| null|
| 3|2012-06-28 00:00:00| PD_4YR| null| null|2016-10-15 11:04:31| null|
| 3|2012-06-28 00:00:00| PD_4YR| null| null|2016-10-15 11:04:31| null|
| 3|2012-06-28 00:00:00| PD_4YR| null| null|2016-10-15 11:04:31| null|
| 3|2012-06-28 00:00:00| PD_4YR| null| null|2016-10-15 11:04:31| null|
| 3|2012-06-28 00:00:00| PD_4YR| null| null|2016-10-15 11:04:31| null|
| 3|2012-06-28 00:00:00| PD_4YR| null| null|2016-10-15 11:04:31| null|
+---------------+-------------------+--------------+-----------+--------------+-------------------+----------------+
only showing top 20 rows
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment