Starting Vim
vim [file1] [file2] ...
""" | |
The schemas that Spark produces for DataFrames are typically | |
nested, and these nested schemas are quite difficult to work with | |
interactively. In many cases, it's possible to flatten a schema | |
into a single level of column names. | |
""" | |
import typing as T | |
import cytoolz.curried as tz |
# A simple cheat sheet of Spark Dataframe syntax | |
# Current for Spark 1.6.1 | |
# import statements | |
#from pyspark.sql import SQLContext | |
#from pyspark.sql.types import * | |
#from pyspark.sql.functions import * | |
from pyspark.sql import functions as F | |
#SparkContext available as sc, HiveContext available as sqlContext. |
""" | |
Convert Pandas DFs in an HDFStore to parquet files for better compatibility | |
with Spark. | |
Run from the command line with: | |
spark-submit --driver-memory 4g --master 'local[*]' hdf5_to_parquet.py | |
""" | |
import pandas as pd |
#!/bin/bash | |
##################################################### | |
# Name: Bash CheatSheet for Mac OSX | |
# | |
# A little overlook of the Bash basics | |
# | |
# Usage: | |
# | |
# Author: J. Le Coupanec | |
# Date: 2014/11/04 |
class BSTnode(object): | |
""" | |
Representation of a node in a binary search tree. | |
Has a left child, right child, and key value, and stores its subtree size. | |
""" | |
def __init__(self, parent, t): | |
"""Create a new leaf with key t.""" | |
self.key = t | |
self.parent = parent | |
self.left = None |
from collections import defaultdict | |
from heapq import * | |
def dijkstra(edges, f, t): | |
g = defaultdict(list) | |
for l,r,c in edges: | |
g[l].append((c,r)) | |
q, seen, mins = [(0,f,())], set(), {f: 0} | |
while q: |
import sys | |
from pyspark.context import SparkContext | |
from numpy import array, random as np_random | |
from sklearn import linear_model as lm | |
from sklearn.base import copy | |
N = 10000 # Number of data points | |
D = 10 # Numer of dimensions | |
ITERATIONS = 5 |