Skip to content

Instantly share code, notes, and snippets.

View dmateusp's full-sized avatar
:octocat:

Daniel Mateus Pires dmateusp

:octocat:
View GitHub Profile
@dmateusp
dmateusp / settings.json
Created May 15, 2026 10:36
[~/.claude/settings.json] Terminal notifications for Claude Code + VSCode + MacOSX using terminal-notifier: focuses on the VSCode window that Claude notified from, adds the git branch to the notification
{
"effortLevel": "medium",
"model": "sonnet[1m]",
"hooks": {
"PreToolUse": [
{
"matcher": "AskUserQuestion",
"hooks": [
{
"type": "command",
@dmateusp
dmateusp / RLESpark.scala
Created October 10, 2022 20:31
Sorting a Dataframe before writing it out in Spark to compare file sizes
package main
import org.apache.spark.sql.SparkSession
import org.apache.spark.sql.SaveMode
case class User(id: Int, `type`: String)
object Main {
val spark = SparkSession
.builder()
@dmateusp
dmateusp / cloudSettings
Created September 2, 2020 12:11
Visual Studio Code Settings Sync Gist
{"lastUpload":"2020-09-02T12:11:43.678Z","extensionVersion":"v3.4.3"}
@dmateusp
dmateusp / AfterRefactor.scala
Last active August 12, 2022 11:00
DataFrame.transform - Spark Function Composition
// load me with :load AfterRefactor.scala
type Transform = DataFrame => DataFrame
def sumAmounts(by: Column*): Transform =
df => df.groupBy(by: _*).agg(sum(col("amount")))
def extractPayerBeneficiary(columnName: String): Transform =
df =>
df.withColumn(
@dmateusp
dmateusp / FinalExampleTransform.scala
Created June 4, 2019 07:03
DataFrame.transform - Spark Function Composition - final functions examples
// chain transform calls
dfTransactions
.transform(extractPayerBeneficiary("details"))
.transform(sumAmounts(date_trunc("day", col("ts")), col("details_beneficiary")))
// andThen
dfTransactions
.transform(extractPayerBeneficiary("details") andThen sumAmounts(date_trunc("day", col("ts")), col("details_beneficiary")))
// compose
@dmateusp
dmateusp / ExampleWithTransform.scala
Last active June 4, 2019 06:49
DataFrame.transform - Spark Function Composition - Methods with transform
dfTransactions
.transform(extractPayerBeneficiary("details", _))
.transform(sumAmounts(_, date_trunc("day", col("ts")), col("details_beneficiary")))
.filter(col("sum(amount)") > 25)
.show
@dmateusp
dmateusp / ExampleTransformLibRefactor.scala
Last active June 5, 2019 19:14
DataFrame.transform - Spark Function Composition - Functions post refactor
def sumAmounts(by: Column*): DataFrame => DataFrame =
df => df.groupBy(by: _*).agg(sum(col("amount")))
def extractPayerBeneficiary(columnName: String): DataFrame => DataFrame =
df =>
df.withColumn(
s"${columnName}_payer",
regexp_extract(
col(columnName),
"paid by ([A-Z])",
@dmateusp
dmateusp / ExampleTransformLibPreRefactorUsage.scala
Last active June 5, 2019 19:12
DataFrame.transform - Spark Function Composition - Functions pre refactor usage
sumAmounts(
extractPayerBeneficiary(
"details",
dfTransactions),
col("details_beneficiary"),
date_trunc(
"day",
col("ts")
)
).filter(col("sum(amount)") > 25).show
@dmateusp
dmateusp / ExampleTransformLibPreRefactor.scala
Last active June 5, 2019 19:16
DataFrame.transform - Spark Function Composition - Functions pre refactor
def sumAmounts(df: DataFrame, by: Column*): DataFrame =
df.groupBy(by: _*).agg(sum(col("amount")))
def extractPayerBeneficiary(columnName: String, df: DataFrame): DataFrame =
df.withColumn(
s"${columnName}_payer",
regexp_extract(
col(columnName),
"paid by ([A-Z])",
1
@dmateusp
dmateusp / Transactions.scala
Last active June 3, 2019 10:07
DataFrame.transform - Spark Function Composition - Transactions
case class Transaction(details: String, amount: Int, ts: Timestamp)
val dfTransactions: DataFrame = Seq(
Transaction("paid by A to X", 100, Timestamp.valueOf("2018-01-05 08:00:00")),
Transaction("paid by B to X", 10, Timestamp.valueOf("2018-01-05 11:00:00")),
Transaction("paid by C to Y", 15, Timestamp.valueOf("2018-01-06 12:00:00")),
Transaction("paid by D to Z", 50, Timestamp.valueOf("2018-01-06 15:00:00"))
).toDF