Skip to content

Instantly share code, notes, and snippets.

$ sudo apt install stockfish
$ sudo apt upgrade stockfish
$ pip install chess
import chess.pgn
import chess.engine
# Load the PGN file
pgn = open("./file.pgn")
engine = chess.engine.SimpleEngine

Spark Listener Demo

This demonstrates Spark Job, Stage and Tasks Listeners

1) Start spark-shell

Welcome to
      ____              __
 / __/__ ___ _____/ /__
Both of these two functions take two arguments: start and end of the frame and they can be specified as follows:
- Window.unboundedPreceding, Window.unboundedFollowing — the entire window from the beginning to the end
- Window.unboundedPreceding, Window.currentRow — from the beginning of the window to the current row, this is used for the cumulative sum
- using numerical values, for example, 0 means currentRow, but the meaning of other values can differ based on the framing function rowsBetween/rangeBetween.
df.withColumn('activity_sum', sum('activity').over(w))
https://miro.medium.com/max/1400/1*WYO-zRP1SlrzGqT4S_5Jvw.webp
// Hadoop
Download winutils.exe and hadoop.dll: https://github.com/kontext-tech/winutils
add them inside older C:\hadoop\bin
add env varible hadoop.home.dir and HADOOP_HOME, with value = C:\hadoop
add %HADOOP_HOME%\bin to the path
add hadoop.dll to C:\Windows\system32
Make sure your JVM is 64 bit.
// Spark
download spark from https://spark.apache.org/downloads.html
// case FetchFailedException or MetadataFetchFailedException: how to avoid BroadcastNestedLoopJoin
- spark.executor.memoryOverhead=1g
- spark.kubernetes.memoryOverheadFactor=0.2
// avoid skew spark >= 3.0
spark.sql.adaptive.optimizeSkewedJoin.enabled
// when you have a skewed data when joining this method will fix it
def saltedJoin(df: DataFrame, buildDf: DataFrame, joinExpression: Column, joinType: String, salt: Int): DataFrame = {
import org.apache.spark.sql.functions._
val tmpDf = buildDf.withColumn("slt_range", array(Range(0, salt).toList.map(lit): _*))
val tableDf = tmpDf.withColumn("slt_ratio_s", explode(tmpDf("slt_range"))).drop("slt_range")
val streamDf = df.withColumn("slt_ratio", monotonically_increasing_id % salt)
val saltedExpr = streamDf("slt_ratio") === tableDf("slt_ratio_s") && joinExpression
streamDf.join(tableDf, saltedExpr, joinType).drop("slt_ratio_s").drop("slt_ratio")
// Cucmumber datatable to spark dataframe
import scala.collection.convert.ImplicitConversions.`collection AsScalaIterable`
import io.cucumber.datatable.DataTable
import org.apache.spark.sql.DataFrame
def dataTableToDataframe(table: DataTable): DataFrame = {
import sparkSession.implicits._
val columns: Seq[String] = table.cells().head.toSeq
val data = table.cells().drop(1).toSeq.map(r => r.toList)
data.toDF().select(columns.indices.map(i => col("value")(i).alias(columns(i))): _*)
// styles.scss
@import '~@angular/material/prebuilt-themes/indigo-pink.css';
// HTML
<mat-table class="lessons-table mat-elevation-z8" [dataSource]="dataSource">
<ng-container matColumnDef="id">
<mat-header-cell *matHeaderCellDef>#</mat-header-cell>
<mat-cell *matCellDef="let customer">{{customer.id}}</mat-cell>
import numpy as np
from PIL import ImageGrab
import cv2
def draw_detections(img, rects, thickness = 1):
for x, y, w, h in rects:
pad_w, pad_h = int(0.15*w), int(0.05*h)
cv2.rectangle(img, (x+pad_w, y+pad_h), (x+w-pad_w, y+h-pad_h), (0, 255, 0), thickness)
hog = cv2.HOGDescriptor()
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.preprocessing import Imputer
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
from sklearn.linear_model import LinearRegression
# Importing the dataset
dataset = pd.read_csv('train.csv')
df = pd.DataFrame(dataset)