import pandas as pddf = pd.DataFrame({'x': [1, 1, 1, 2, 2, 3],| data = pd.DataFrame({'id':[1,1,1,2,2,2],'sales':[4,1,2,7,6,7],'views':[3,1,2,8,6,7]}) |
| # data is from DuckDB's github repo | |
| # https://github.com/duckdb/duckdb/tree/master/benchmark/micro/join | |
| import pandas as pd; import numpy as np; import janitor as jn; import duckdb | |
| query = """SELECT SETSEED(0.8675309); | |
| CREATE TABLE events AS ( | |
| SELECT *, | |
| "start" + INTERVAL (CASE WHEN random() < 0.1 THEN 120 ELSE (5 + round(random() * 50, 0)::BIGINT) END) MINUTE | |
| AS "end" | |
| FROM ( |
| from datatable import dt, f | |
| from typing import Pattern, NamedTuple, Union | |
| from collections import Counter, defaultdict | |
| from itertools import compress, chain | |
| import re | |
| import numpy as np | |
| class measure(NamedTuple): | |
| """reshape either with a separator or a regular expression.""" | |
| column_names:Union[str, list] |
import pandas as pddf = pd.DataFrame({'x': [1, 1, 1, 2, 2, 3],import pandas as pd
import numpy as np
import janitor