Skip to content

Instantly share code, notes, and snippets.

View konverner's full-sized avatar

Konstantin Verner konverner

View GitHub Profile
@konverner
konverner / subprocess_with_timeout.py
Created October 12, 2024 09:33
run the script for N seconds for M arguments
import subprocess
import time
timeout = 40 # Set timeout to 40 seconds
with open('items.txt', 'r') as file:
items = file.read().strip().split('\n')
for item in items:
item = item.strip() # Clean up any extra whitespace
@konverner
konverner / extract_json_block.py
Created September 23, 2024 19:42
Extract json content from markdown content
import json
def extract_json_from_text(text):
# Find the first '{' character
start = text.find('{')
if start == -1:
print("No JSON block found in the text.")
return None
# Use a stack to track nested braces
@konverner
konverner / load_env.py
Last active September 20, 2024 21:54
Load environment variables from .env in Python
# pip install python-dotenv
import os
from dotenv import find_dotenv, load_dotenv
# load variables from .env
load_dotenv(find_dotenv(usecwd=True))
MY_VAR = os.getenv("MY_VAR")
@konverner
konverner / create_keyfile_dict.py
Last active December 22, 2024 09:42
a function allows to generate json key file used by google api from environment variables
"""
A function allows to generate json key file used by google api from environment variables. Example of `.env` file:
```
TYPE="my_service_account"
PROJECT_ID="my_project_id"
PRIVATE_KEY_ID="my_private_key_id"
PRIVATE_KEY="-----BEGIN PRIVATE KEY-----
my_private_key_content
-----END PRIVATE KEY-----
@konverner
konverner / spans_to_conll.py
Last active February 5, 2024 12:47
convert spans NER annotation to conll BIO format
import json
from typing import Any, Dict, List
import spacy
from spacy.training.iob_utils import biluo_to_iob, doc_to_biluo_tags
from tqdm import tqdm
def spans_to_conll(
samples: List[Dict[str, Any]],
@konverner
konverner / fix_ner_spans.py
Created February 1, 2024 00:00
fix corrupted spans in NER annotation
def fix_span(text: str, span: dict):
# let us check that spans are correctly extracted
fixed_span = span.copy()
# span starts with a space or a punctuation
while text[fixed_span["start"]] in [" ", ".", ",", ";", ":", "!", "?"]:
fixed_span["start"] += 1
# span is cut in the begging: e.g. "ashington DC"
@konverner
konverner / generate_ssh.sh
Created December 18, 2023 21:06
generate ssh key on linux
mkdir -p ~/.ssh
ssh-keyscan -t rsa github.com >> ~/.ssh/known_hosts
ssh-keygen -t rsa -C <email>
cat ~/.ssh/id_rsa.pub
@konverner
konverner / ols_template.py
Last active November 10, 2023 21:03
OLS method with statsmodels
import numpy as np
import statsmodels.api as sm
X = np.array([[1, 85, 5],
[1, 177, 6],
[1, 100, 9],
[1, 110, 8],
[1, 90, 7.5],
[1, 144, 5.5]])
@konverner
konverner / changePython.sh
Created July 8, 2023 21:20
Change python version in Colab
!sudo update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.8 1
!sudo update-alternatives --config python3
!sudo apt install python3-pip
!python3 --version
@konverner
konverner / stacked_barplot.py
Created June 17, 2023 20:42
stacked barplot from dataframe
import pandas as pd
df = pd.DataFrame({'name': ['Store A', 'Store B', 'Store C', 'Store D']
'district': ['I', "II", "I", "III"],
"category": [X, X, Y, Z]
}
)
district_category_pivot_table = df.pivot_table('name', 'district', 'category', aggfunc='count')