Last active
May 3, 2019 13:32
-
-
Save shatestest/34c6f7fda3e24499080b6d03e2bbb45d to your computer and use it in GitHub Desktop.
Issue with setting jdbc/partitionColumn type to Date in spark 2.4.1
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Map(driver -> oracle.jdbc.OracleDriver, | |
numPartitions -> 2000, | |
timestampFormat -> yyyy-mm-dd hh:mm:ss, | |
upperBound -> 2019-05-01 23:59:59.0, | |
lowerBound -> 2002-03-31 00:00:00.0, | |
partitionColumn -> DATA_DATE, | |
url -> jdbc:oracle:thin:@, | |
user -> user1, password -> pwd1, | |
customSchema -> DATA_DATE TIMESTAMP, DATA_ITEM_VALUE_NUMERIC DECIMAL(38, 15) , MODEL_FAMILY_ID INTEGER , FISCAL_YEAR INTEGER , FISCAL_QUARTER INTEGER, | |
fetchsize -> 10000) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
public class DbUtils{ | |
public static Dataset<Row> readOracleData( DataFrameReader ora_df_reader, String schema , String partitionColumn, String query, String customSchema) throws SQLException{ | |
DataFrameReader local_ora_df_reader = ora_df_reader; | |
Map<String,String> oraConfOptions = ConfigUtils.getMoreOracleConfigOptions(); | |
if(! oraConfOptions.isEmpty()){ | |
local_ora_df_reader.options(oraConfOptions); | |
local_ora_df_reader.option("partitionColumn", partitionColumn); | |
} | |
if(!customSchema.isEmpty()){ | |
local_ora_df_reader.option("customSchema", customSchema ); | |
} | |
Dataset<Row> ss = local_ora_df_reader | |
.option("inferSchema", true) | |
.option("schema",schema) | |
.option("dbtable", query) | |
.load(); | |
return ss; | |
} | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
java.lang.IllegalArgumentException: Timestamp format must be yyyy-mm-dd hh:mm:ss[.fffffffff] | |
at java.sql.Timestamp.valueOf(Timestamp.java:204) | |
at org.apache.spark.sql.execution.datasources.jdbc.JDBCRelation$.toInternalBoundValue(JDBCRelation.scala:179) | |
at org.apache.spark.sql.execution.datasources.jdbc.JDBCRelation$.columnPartition(JDBCRelation.scala:88) | |
at org.apache.spark.sql.execution.datasources.jdbc.JdbcRelationProvider.createRelation(JdbcRelationProvider.scala:36) | |
at org.apache.spark.sql.execution.datasources.DataSource.resolveRelation(DataSource.scala:318) | |
at org.apache.spark.sql.DataFrameReader.loadV1Source(DataFrameReader.scala:223) | |
at org.apache.spark.sql.DataFrameReader.load(DataFrameReader.scala:211) | |
at org.apache.spark.sql.DataFrameReader.load(DataFrameReader.scala:167) | |
at com.snp.utils.DbUtils.readOracleData(DbUtils.java:62) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
class Extractor{ | |
public void extract( DataFrameReader oraOptionDfConfig, SparkSession sparkSession , String keyspace, | |
String columnFamilyName , String fromDate , String toDate ) { | |
String o_query = getQuery(fromDate , toDate); | |
System.out.println(" dynamic query : " + o_query); | |
logger.info(" dynamic query : " + o_query); | |
/* | |
* EXTRACTION - PHASE | |
* Load the data from oracle for given schema and query. | |
*/ | |
oraOptionDfConfig.option("lowerBound", "2002-03-31 00:00:00" ); | |
oraOptionDfConfig.option("upperBound", "2019-05-01 23:59:59"); | |
oraOptionDfConfig.option("numPartitions", 240); | |
// oraOptionDfConfig.option("timestampFormat", "yyyy-MM-dd HH:mm:ss"); // tried with this too | |
Dataset<Row> ora_raw_benchmark_model_vals_df = DbUtils.readOracleData(oraOptionDfConfig, ORACLE_SCHEMA_RSCS , | |
PARTITION_COLUMN, o_query , BENCHMARK_MODEL_VALS_CUSTOM_SCHEMA); | |
} | |
} | |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
root | |
|-- MODEL_FAMILY_ID: integer (nullable = true) | |
|-- DATA_DATE: timestamp (nullable = true) | |
|-- DATA_ITEM_CODE: string (nullable = true) | |
|-- FISCAL_YEAR: integer (nullable = true) | |
|-- FISCAL_QUARTER: integer (nullable = true) | |
|-- CREATE_DATE: timestamp (nullable = true) | |
|-- LAST_UPDATE_DATE: timestamp (nullable = true) | |
+---------------+-------------------+--------------+-----------+--------------+-------------------+----------------+ | |
|MODEL_FAMILY_ID| DATA_DATE|DATA_ITEM_CODE|FISCAL_YEAR|FISCAL_QUARTER| CREATE_DATE|LAST_UPDATE_DATE| | |
+---------------+-------------------+--------------+-----------+--------------+-------------------+----------------+ | |
| 3|2012-06-12 00:00:00| PD_4YR| null| null|2016-10-15 11:04:31| null| | |
| 3|2012-06-12 00:00:00| PD_4YR| null| null|2016-10-15 11:04:31| null| | |
| 3|2012-06-12 00:00:00| PD_4YR| null| null|2016-10-15 11:04:31| null| | |
| 3|2012-06-12 00:00:00| PD_4YR| null| null|2016-10-15 11:04:31| null| | |
| 3|2012-06-12 00:00:00| PD_4YR| null| null|2016-10-15 11:04:31| null| | |
| 3|2012-06-05 00:00:00| PD_4YR| null| null|2016-10-15 14:19:03| null| | |
| 3|2012-06-05 00:00:00| PD_4YR| null| null|2016-10-15 14:19:03| null| | |
| 3|2012-06-07 00:00:00| PD_4YR| null| null|2016-10-15 11:04:31| null| | |
| 3|2012-06-28 00:00:00| PD_4YR| null| null|2016-10-15 11:04:31| null| | |
| 3|2012-06-25 00:00:00| PD_4YR| null| null|2016-10-15 11:04:31| null| | |
| 3|2012-06-25 00:00:00| PD_4YR| null| null|2016-10-15 11:04:31| null| | |
| 3|2012-06-25 00:00:00| PD_4YR| null| null|2016-10-15 11:04:31| null| | |
| 3|2012-06-28 00:00:00| PD_4YR| null| null|2016-10-15 11:04:31| null| | |
| 3|2012-06-28 00:00:00| PD_4YR| null| null|2016-10-15 11:04:31| null| | |
| 3|2012-06-28 00:00:00| PD_4YR| null| null|2016-10-15 11:04:31| null| | |
| 3|2012-06-28 00:00:00| PD_4YR| null| null|2016-10-15 11:04:31| null| | |
| 3|2012-06-28 00:00:00| PD_4YR| null| null|2016-10-15 11:04:31| null| | |
| 3|2012-06-28 00:00:00| PD_4YR| null| null|2016-10-15 11:04:31| null| | |
| 3|2012-06-28 00:00:00| PD_4YR| null| null|2016-10-15 11:04:31| null| | |
| 3|2012-06-28 00:00:00| PD_4YR| null| null|2016-10-15 11:04:31| null| | |
+---------------+-------------------+--------------+-----------+--------------+-------------------+----------------+ | |
only showing top 20 rows |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment