This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from typing import Iterable, cast | |
import polars as pl | |
from polars.testing import assert_frame_equal | |
import numpy as np | |
from datetime import timedelta | |
import datetime | |
import asyncio | |
import time | |
from io import StringIO |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#[tokio::main] | |
async fn main() { | |
let mut client = make_postgres().await; // tokio_postgres::connect with spawn | |
let transaction = client.transaction().await.unwrap(); | |
// make an example df | |
let utc=PlSmallStr::from_str("UTC"); | |
let ava = AnyValue::Datetime(Utc | |
.with_ymd_and_hms(2010, 1, 1, 1, 0, 0) | |
.unwrap() |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import inspect | |
import json | |
import os | |
from pyarrow import compute as pc | |
from types import FunctionType | |
import pyarrow as pa | |
from datetime import datetime | |
import multiprocessing as mp | |
import sys |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import sys | |
import polars as pl | |
""" | |
This method is a work around for two annoyances. | |
1. If one wants to reuse an earlier column definition then the | |
walrus operator can be used but then it looks awkward because it | |
has the python variable on the left but then still needs an | |
alias at the end such as | |
`with_columns(a:=(pl.col('b')+1).alias('a'), (a*2).alias('c'))` |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import polars as pl | |
import pyarrow.compute as pc | |
# Example df | |
df = pl.DataFrame( | |
[ | |
pl.Series("a", [1, 2, 3], dtype=pl.Int64), | |
] | |
) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import httpx | |
import asyncio | |
from bs4 import BeautifulSoup | |
import os | |
import geopandas as gpd | |
import pandas as pd | |
from pathlib import Path | |
from geoarrow.rust.core import ( | |
GeoTable, | |
write_parquet, |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def parse_dtypes(df, exclude=[]): | |
str_cols = [x for x, y in df.schema.items() if y == pl.String and x not in exclude] | |
try_casts = df.select( | |
pl.struct(pl.all()).alias("original"), | |
pl.struct( | |
pl.coalesce( | |
pl.col(col).str.strptime(pl.Datetime, x, strict=False) | |
for x in ["%Y-%m-%dT%H:%M:%S", "%Y-%m-%d %H:%M:%S"] | |
) | |
for col in str_cols |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from pathlib import Path | |
import re | |
rootpath = Path("./polars/crates") | |
for p in rootpath.rglob("*.rs"): | |
with p.open() as f: | |
filestr = f.read() | |
if filestr.find("fn") == -1: | |
continue |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import polars as pl | |
import numpy as np | |
from itertools import product | |
import time | |
from datetime import datetime | |
import json | |
def gen_long_string(str_len=10, n_rows=10_000_000): | |
rng = np.random.default_rng() |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def pl_cal_sheet( | |
wb: CalamineWorkbook, | |
sheet: str, | |
header_rows: int | None = None, | |
header_merge_char: str = "_", | |
skip_rows: int = 0, | |
infer_schema_length: int = 1000, | |
infer_schema_minrow: int = 10, | |
column_dupe_name_seperator: str = "_", | |
) -> pl.DataFrame: |