Skip to content

Instantly share code, notes, and snippets.

@ikegami-yukino
Last active October 19, 2024 17:29
Show Gist options
  • Save ikegami-yukino/f631b67b98405883dcb81257af70fcff to your computer and use it in GitHub Desktop.
Save ikegami-yukino/f631b67b98405883dcb81257af70fcff to your computer and use it in GitHub Desktop.
Pyladies Tokyo 10th Aniv. Party
import os
import time
import urllib.request
import zipfile
import pandas as pd
from pandarallel import pandarallel
URL = "https://www.post.japanpost.jp/zipcode/dl/utf/zip/utf_ken_all.zip"
ZIP_FILE_NAME = URL.split("/")[-1]
TARGET_CSV_NAME = "utf_ken_all.csv"
ITERATIONS = 20
pandarallel.initialize()
def to_hiragana_map(x):
import jaconv
x = jaconv.kata2hira(jaconv.h2z(x))
return x
def to_hiragana_apply(x):
import jaconv
for i in range(3, 6):
x[i] = jaconv.kata2hira(jaconv.h2z(x[i]))
return x
if __name__ == "__main__":
if not os.path.exists(ZIP_FILE_NAME):
urllib.request.urlretrieve(URL, ZIP_FILE_NAME)
if not os.path.exists(TARGET_CSV_NAME):
with zipfile.ZipFile(ZIP_FILE_NAME) as zf:
zf.extract(TARGET_CSV_NAME, "./")
df = pd.read_csv(TARGET_CSV_NAME, header=None, dtype="object")
# データ量を10倍にかさまし
df = pd.concat([df for _ in range(10)])
elapsed_times = []
for _ in range(ITERATIONS):
start_time = time.monotonic()
df[3] = df[3].map(to_hiragana_map)
df[4] = df[4].map(to_hiragana_map)
df[5] = df[5].map(to_hiragana_map)
elapsed_times.append(time.monotonic() - start_time)
print(f"[map] Average elapsed time: {sum(elapsed_times) / ITERATIONS}")
elapsed_times = []
for _ in range(ITERATIONS):
start_time = time.monotonic()
df[3] = df[3].parallel_map(to_hiragana_map)
df[4] = df[4].parallel_map(to_hiragana_map)
df[5] = df[5].parallel_map(to_hiragana_map)
elapsed_times.append(time.monotonic() - start_time)
print(f"[parallel_map] Average elapsed time: {sum(elapsed_times) / ITERATIONS}")
elapsed_times = []
for _ in range(ITERATIONS):
start_time = time.monotonic()
df = df.apply(to_hiragana_apply, axis=1)
elapsed_times.append(time.monotonic() - start_time)
print(f"[apply] Average elapsed time: {sum(elapsed_times) / ITERATIONS}")
elapsed_times = []
for _ in range(ITERATIONS):
start_time = time.monotonic()
df = df.parallel_apply(to_hiragana_apply, axis=1)
elapsed_times.append(time.monotonic() - start_time)
print(f"[parallel_apply] Average elapsed time: {sum(elapsed_times) / ITERATIONS}")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment