Derrick flrngel

Pretraining

A Map for Studying Pre-training in LLMs

To setup some key bindings in Xcode with the same like VSCode

Add this to /Applications/Xcode.app/Contents/Frameworks/IDEKit.framework/Versions/A/Resources/IDETextKeyBindingSet.plist Thanks to: https://gist.github.com/emotality/b1bcb2bb8a07921f9c8cad1c969daedf

<key>Duplication</key>

	{
	"tools": [
	{
	"type": "function",
	"function": {
	"name": "codebase_search",
	"description": "Find snippets of code from the codebase most relevant to the search query.\nThis is a semantic search tool, so the query should ask for something semantically matching what is needed.\nIf it makes sense to only search in particular directories, please specify them in the target_directories field.\nUnless there is a clear reason to use your own search query, please just reuse the user's exact query with their wording.\nTheir exact wording/phrasing can often be helpful for the semantic search query. Keeping the same exact question format can also be helpful.",
	"parameters": {
	"type": "object",
	"properties": {

	# Swift Language Fundamentals

	Swift is a modern programming language for Apple platforms (iOS, macOS, etc.) with these key characteristics:

	1. Core Features:
	- Type inference for automatic type detection
	- Optionals for safe handling of missing values
	- Closures for flexible function passing
	- Memory safety by design
	- Built-in error handling

	from diffusers import FluxPipeline, AutoencoderKL
	from diffusers.image_processor import VaeImageProcessor
	from transformers import T5EncoderModel, T5TokenizerFast, CLIPTokenizer, CLIPTextModel
	import torch
	import gc


	def flush():
	gc.collect()
	torch.cuda.empty_cache()

	'''
	https://arxiv.org/abs/2312.00858
	1. put this file in ComfyUI/custom_nodes
	2. load node from <loaders>

	start_step, end_step: apply this method when the timestep is between start_step and end_step
	cache_interval: interval of caching (1 means no caching)
	cache_depth: depth of caching
	'''

	# This is a modified version of TRL's `SFTTrainer` example (https://github.com/huggingface/trl/blob/main/examples/scripts/sft_trainer.py),
	# adapted to run with DeepSpeed ZeRO-3 and Mistral-7B-V1.0. The settings below were run on 1 node of 8 x A100 (80GB) GPUs.
	#
	# Usage:
	# - Install the latest transformers & accelerate versions: `pip install -U transformers accelerate`
	# - Install deepspeed: `pip install deepspeed==0.9.5`
	# - Install TRL from main: pip install git+https://github.com/huggingface/trl.git
	# - Clone the repo: git clone github.com/huggingface/trl.git
	# - Copy this Gist into trl/examples/scripts
	# - Run from root of trl repo with: accelerate launch --config_file=examples/accelerate_configs/deepspeed_zero3.yaml --gradient_accumulation_steps 8 examples/scripts/sft_trainer.py

	import Darwin
	import Foundation
	import UIKit

	// https://github.com/xybp888/iOS-SDKs/blob/master/iPhoneOS17.1.sdk/System/Library/PrivateFrameworks/CoreSVG.framework/CoreSVG.tbd
	// https://developer.limneos.net/index.php?ios=17.1&framework=UIKitCore.framework&header=UIImage.h

	@objc
	class CGSVGDocument: NSObject { }

	import functools
	import numpy as np
	import tensorflow.compat.v1 as tf
	from tensorflow.python.tpu import tpu_function


	BATCH_NORM_DECAY = 0.9
	BATCH_NORM_EPSILON = 1e-5