tdfirth · April 8, 2024 03:27 · tdfirth · Feb 22, 2024
diff --git a/.Asof SQL Queries b/.Asof SQL Queries
 How to think about asof queries.
diff --git a/1_orders.csv b/1_orders.csv
diff --git a/2_post_views.csv b/2_post_views.csv
diff --git a/3_window_query.sql b/3_window_query.sql
 with candidates as (
  select 
    o.user_id as user_id,
    o.id as order_id,
    p.id as post_id,
    o.timestamp as order_time,
    p.timestamp as post_view_time,
    row_number() over (partition by o.id order by p.timestamp desc) rn 
  from "orders.csv" o 
  inner join "post_views.csv" p 
  on o.user_id = p.user_id and o.timestamp >= p.timestamp
 )
 select * from candidates 
 where rn = 1 
 order by order_time desc;
diff --git a/4_window_output.csv b/4_window_output.csv
diff --git a/5_asof_query.sql b/5_asof_query.sql
 select
  o.user_id as user_id,
  o.id as order_id,
  p.id as post_id,
  o.timestamp as order_time,
  p.timestamp as post_view_time
 from "orders.csv" o 
 asof join "post_views.csv" p
 on o.user_id = p.user_id
 and o.timestamp >= p.timestamp
 order by order_time desc;
diff --git a/6_asof_output.csv b/6_asof_output.csv
diff --git a/7_window_plan.txt b/7_window_plan.txt
 ┌─────────────────────────────┐
 │┌───────────────────────────┐│
 ││       Physical Plan       ││
 │└───────────────────────────┘│
 └─────────────────────────────┘
 ┌───────────────────────────┐                             
 │          ORDER_BY         │                             
 │   ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─   │                             
 │          ORDERS:          │                             
 │ candidates.order_time DESC│                             
 └─────────────┬─────────────┘                                                          
 ┌─────────────┴─────────────┐                             
 │         PROJECTION        │                             
 │   ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─   │                             
 │          user_id          │                             
 │          order_id         │                             
 │          post_id          │                             
 │         order_time        │                             
 │       post_view_time      │                             
 │             rn            │                             
 └─────────────┬─────────────┘                                                          
 ┌─────────────┴─────────────┐                             
 │         PROJECTION        │                             
 │   ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─   │                             
 │             #0            │                             
 │             #1            │                             
 │             #2            │                             
 │             #4            │                             
 │             #5            │                             
 │             #6            │                             
 └─────────────┬─────────────┘                                                          
 ┌─────────────┴─────────────┐                             
 │           FILTER          │                             
 │   ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─   │                             
 │          (rn = 1)         │                             
 │   ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─   │                             
 │           EC: 23          │                             
 └─────────────┬─────────────┘                                                          
 ┌─────────────┴─────────────┐                             
 │           WINDOW          │                             
 │   ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─   │                             
 │     ROW_NUMBER() OVER     │                             
 │(PARTITION BY id ORDER...  │                             
 │      DESC NULLS LAST)     │                             
 └─────────────┬─────────────┘                                                          
 ┌─────────────┴─────────────┐                             
 │         HASH_JOIN         │                             
 │   ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─   │                             
 │           INNER           │                             
 │     user_id = user_id     ├──────────────┐              
 │   timestamp <= timestamp  │              │              
 │   ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─   │              │              
 │           EC: 23          │              │              
 └─────────────┬─────────────┘              │                                           
 ┌─────────────┴─────────────┐┌─────────────┴─────────────┐
 │READ_CSV_AUTO (MULTI-T...  ││READ_CSV_AUTO (MULTI-T...  │
 │   ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─   ││   ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─   │
 │          user_id          ││          user_id          │
 │         timestamp         ││         timestamp         │
 │             id            ││             id            │
 │   ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─   ││   ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─   │
 │           EC: 23          ││           EC: 7           │
 └───────────────────────────┘└───────────────────────────┘
diff --git a/8_asof_plan.txt b/8_asof_plan.txt
 ┌─────────────────────────────┐
 │┌───────────────────────────┐│
 ││       Physical Plan       ││
 │└───────────────────────────┘│
 └─────────────────────────────┘
 ┌───────────────────────────┐                             
 │          ORDER_BY         │                             
 │   ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─   │                             
 │          ORDERS:          │                             
 │     o."timestamp" DESC    │                             
 └─────────────┬─────────────┘                                                          
 ┌─────────────┴─────────────┐                             
 │         PROJECTION        │                             
 │   ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─   │                             
 │          user_id          │                             
 │          order_id         │                             
 │          post_id          │                             
 │         order_time        │                             
 │       post_view_time      │                             
 └─────────────┬─────────────┘                                                          
 ┌─────────────┴─────────────┐                             
 │         ASOF_JOIN         │                             
 │   ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─   │                             
 │           INNER           │                             
 │     user_id = user_id     ├──────────────┐              
 │   timestamp >= timestamp  │              │              
 │   ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─   │              │              
 │           EC: 23          │              │              
 └─────────────┬─────────────┘              │                                           
 ┌─────────────┴─────────────┐┌─────────────┴─────────────┐
 │READ_CSV_AUTO (MULTI-T...  ││READ_CSV_AUTO (MULTI-T...  │
 │   ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─   ││   ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─   │
 │          user_id          ││          user_id          │
 │         timestamp         ││         timestamp         │
 │             id            ││             id            │
 │   ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─   ││   ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─   │
 │           EC: 7           ││           EC: 23          │
 └───────────────────────────┘└───────────────────────────┘
id	user_id	timestamp
1	1	2/21/2024
2	1	2/17/2024
3	2	2/20/2024
4	3	2/19/2024
5	3	2/15/2024
6	4	2/18/2024
id	user_id	post	timestamp
1	1	1	2/22/2024
2	1	2	2/20/2024
3	1	3	2/18/2024
4	1	4	2/16/2024
5	1	5	2/14/2024
6	2	1	2/22/2024
7	2	2	2/20/2024
8	2	3	2/18/2024
9	2	4	2/16/2024
10	2	5	2/14/2024
11	3	1	2/22/2024
12	3	2	2/20/2024
13	3	3	2/18/2024
14	3	4	2/16/2024
15	3	5	2/14/2024
16	4	1	2/22/2024
17	4	2	2/20/2024
18	4	3	2/18/2024
19	4	4	2/16/2024
20	4	5	2/14/2024
21	5	1	2/22/2024
22	5	2	2/20/2024
23	5	3	2/18/2024
24	5	4	2/16/2024
25	5	5	2/14/2024
	with candidates as (
	select
	o.user_id as user_id,
	o.id as order_id,
	p.id as post_id,
	o.timestamp as order_time,
	p.timestamp as post_view_time,
	row_number() over (partition by o.id order by p.timestamp desc) rn
	from "orders.csv" o
	inner join "post_views.csv" p
	on o.user_id = p.user_id and o.timestamp >= p.timestamp
	)
	select * from candidates
	where rn = 1
	order by order_time desc;
user_id	order_id	post_id	order_time	post_view_time	rn
1	1	2	2024-02-21	2024-02-20	1
2	3	7	2024-02-20	2024-02-20	1
3	4	13	2024-02-19	2024-02-18	1
4	6	18	2024-02-18	2024-02-18	1
1	2	4	2024-02-17	2024-02-16	1
3	5	15	2024-02-15	2024-02-14	1
	┌─────────────────────────────┐
	│┌───────────────────────────┐│
	││ Physical Plan ││
	│└───────────────────────────┘│
	└─────────────────────────────┘
	┌───────────────────────────┐
	│ ORDER_BY │
	│ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ │
	│ ORDERS: │
	│ candidates.order_time DESC│
	└─────────────┬─────────────┘
	┌─────────────┴─────────────┐
	│ PROJECTION │
	│ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ │
	│ user_id │
	│ order_id │
	│ post_id │
	│ order_time │
	│ post_view_time │
	│ rn │
	└─────────────┬─────────────┘
	┌─────────────┴─────────────┐
	│ PROJECTION │
	│ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ │
	│ #0 │
	│ #1 │
	│ #2 │
	│ #4 │
	│ #5 │
	│ #6 │
	└─────────────┬─────────────┘
	┌─────────────┴─────────────┐
	│ FILTER │
	│ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ │
	│ (rn = 1) │
	│ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ │
	│ EC: 23 │
	└─────────────┬─────────────┘
	┌─────────────┴─────────────┐
	│ WINDOW │
	│ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ │
	│ ROW_NUMBER() OVER │
	│(PARTITION BY id ORDER... │
	│ DESC NULLS LAST) │
	└─────────────┬─────────────┘
	┌─────────────┴─────────────┐
	│ HASH_JOIN │
	│ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ │
	│ INNER │
	│ user_id = user_id ├──────────────┐
	│ timestamp <= timestamp │ │
	│ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ │ │
	│ EC: 23 │ │
	└─────────────┬─────────────┘ │
	┌─────────────┴─────────────┐┌─────────────┴─────────────┐
	│READ_CSV_AUTO (MULTI-T... ││READ_CSV_AUTO (MULTI-T... │
	│ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ││ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ │
	│ user_id ││ user_id │
	│ timestamp ││ timestamp │
	│ id ││ id │
	│ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ││ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ │
	│ EC: 23 ││ EC: 7 │
	└───────────────────────────┘└───────────────────────────┘