Skip to content

Instantly share code, notes, and snippets.

@deejayy
Created February 6, 2026 10:52
Show Gist options
  • Select an option

  • Save deejayy/958054b7105ea51f5ee1c70ba08893a4 to your computer and use it in GitHub Desktop.

Select an option

Save deejayy/958054b7105ea51f5ee1c70ba08893a4 to your computer and use it in GitHub Desktop.
TOTP, assembly, win64
BITS 64
DEFAULT REL
; --- imports (link with bcrypt.lib & kernel32.lib & shell32.lib) ---
extern GetSystemTimeAsFileTime
extern BCryptOpenAlgorithmProvider
extern BCryptCreateHash
extern BCryptHashData
extern BCryptFinishHash
extern BCryptDestroyHash
extern BCryptCloseAlgorithmProvider
extern ExitProcess
extern GetStdHandle
extern WriteFile
extern GetCommandLineW
extern CommandLineToArgvW
; --- constants ---
STD_OUTPUT_HANDLE equ -11
BCRYPT_ALG_HANDLE_HMAC_FLAG equ 8
TOTP_DIGITS equ 6
TOTP_PERIOD equ 30
SHA1_HASH_SIZE equ 20
; Epoch difference: 116444736000000000 (100-ns intervals from 1601 to 1970)
section .data
algId db 'S',0,'H',0,'A',0,'1',0,0,0 ; L"SHA1" (UTF-16LE)
section .bss
hAlg resq 1 ; algorithm handle
hHash resq 1 ; hash handle
hashValue resb 20 ; SHA1 output (20 bytes)
timeCounter resq 1 ; 8-byte counter (big-endian for TOTP)
decodedKey resb 64 ; decoded base32 key (max ~64 bytes)
keyLen resq 1 ; length of decoded key
outBuf resb 16 ; output buffer for digits + newline
filetime resq 1 ; FILETIME storage
written resd 1 ; bytes written
hStdOut resq 1 ; stdout handle
argc resd 1 ; argument count
exitCode resd 1 ; exit code
section .text
global mainCRTStartup
mainCRTStartup:
; Set up stack frame with shadow space
sub rsp, 104 ; 32 shadow + locals + alignment
mov dword [exitCode], 0
; Get stdout handle
mov ecx, STD_OUTPUT_HANDLE
call GetStdHandle
mov [hStdOut], rax
; Parse command line to get argv[1]
call GetCommandLineW
mov rcx, rax ; lpCmdLine
lea rdx, [argc] ; pNumArgs
call CommandLineToArgvW
; rax = pointer to argv array (wide strings)
; Check argc >= 2
cmp dword [argc], 2
jl .exit_error
; Get argv[1] (the base32 secret)
mov rsi, [rax + 8] ; argv[1] is at offset 8 (second pointer)
; Decode base32 secret
lea rdi, [decodedKey]
call base32_decode
mov [keyLen], rax ; store decoded length
test rax, rax
jz .exit_error
; Get current time as FILETIME
lea rcx, [filetime]
call GetSystemTimeAsFileTime
; Convert FILETIME to Unix timestamp
; FILETIME is 100-nanosecond intervals since Jan 1, 1601
; Unix time is seconds since Jan 1, 1970
mov rax, [filetime]
; Subtract epoch difference (116444736000000000)
mov rcx, 0x019DB1DED53E8000
sub rax, rcx
; Divide by 10,000,000 to get seconds
mov rcx, 10000000
xor edx, edx
div rcx ; rax = Unix timestamp in seconds
; Divide by TOTP period (30) to get counter
xor edx, edx
mov ecx, TOTP_PERIOD
div rcx ; rax = time counter
; Store counter as big-endian 8 bytes (TOTP requirement)
bswap rax
mov [timeCounter], rax
; --- BCrypt HMAC-SHA1 ---
; BCryptOpenAlgorithmProvider(&hAlg, L"SHA1", NULL, BCRYPT_ALG_HANDLE_HMAC_FLAG)
lea rcx, [hAlg] ; phAlgorithm
lea rdx, [algId] ; pszAlgId = L"SHA1"
xor r8d, r8d ; pszImplementation = NULL
mov r9d, BCRYPT_ALG_HANDLE_HMAC_FLAG
call BCryptOpenAlgorithmProvider
test eax, eax
jnz .exit_error
; BCryptCreateHash(hAlg, &hHash, NULL, 0, key, keyLen, 0)
mov rcx, [hAlg] ; hAlgorithm
lea rdx, [hHash] ; phHash
xor r8d, r8d ; pbHashObject = NULL
xor r9d, r9d ; cbHashObject = 0
lea rax, [decodedKey]
mov [rsp+32], rax ; pbSecret
mov rax, [keyLen]
mov [rsp+40], rax ; cbSecret
mov qword [rsp+48], 0 ; dwFlags
call BCryptCreateHash
test eax, eax
jnz .cleanup_alg
; BCryptHashData(hHash, timeCounter, 8, 0)
mov rcx, [hHash] ; hHash
lea rdx, [timeCounter] ; pbInput
mov r8d, 8 ; cbInput (8 bytes counter)
xor r9d, r9d ; dwFlags
call BCryptHashData
test eax, eax
jnz .cleanup_hash
; BCryptFinishHash(hHash, hashValue, 20, 0)
mov rcx, [hHash] ; hHash
lea rdx, [hashValue] ; pbOutput
mov r8d, SHA1_HASH_SIZE ; cbOutput
xor r9d, r9d ; dwFlags
call BCryptFinishHash
test eax, eax
jnz .cleanup_hash
; --- Dynamic Truncation (RFC 4226) ---
; offset = hashValue[19] & 0x0F
lea rsi, [hashValue]
movzx eax, byte [rsi + 19]
and eax, 0x0F ; offset = 0..15
; Extract 4 bytes at offset (big-endian) & 0x7FFFFFFF
mov ecx, [rsi + rax] ; 4 bytes at offset
bswap ecx ; convert to little-endian
and ecx, 0x7FFFFFFF ; clear top bit
; code = truncated % 10^TOTP_DIGITS
mov eax, ecx
xor edx, edx
mov ecx, 1000000 ; 10^6 for 6 digits
div ecx
; edx = remainder = OTP code
; Convert to string (6 digits with leading zeros)
mov eax, edx
lea rdi, [outBuf + TOTP_DIGITS]
mov byte [rdi], 10 ; newline after digits
mov ecx, TOTP_DIGITS
.digit_loop:
dec rdi
xor edx, edx
mov r8d, 10
div r8d ; eax = quotient, edx = remainder
add dl, '0'
mov [rdi], dl
dec ecx
jnz .digit_loop
; Output the code with newline using WriteFile
; WriteFile(hStdOut, outBuf, 7, &written, NULL)
mov rcx, [hStdOut] ; hFile
lea rdx, [outBuf] ; lpBuffer
mov r8d, TOTP_DIGITS+1 ; nNumberOfBytesToWrite (6 digits + newline)
lea r9, [written] ; lpNumberOfBytesWritten
mov qword [rsp+32], 0 ; lpOverlapped = NULL
call WriteFile
jmp .cleanup_hash
.cleanup_hash:
; BCryptDestroyHash(hHash)
mov rcx, [hHash]
test rcx, rcx
jz .cleanup_alg
push qword [exitCode] ; preserve exit code
call BCryptDestroyHash
pop qword [exitCode]
.cleanup_alg:
; BCryptCloseAlgorithmProvider(hAlg, 0)
mov rcx, [hAlg]
test rcx, rcx
jz .exit
xor edx, edx
push qword [exitCode]
call BCryptCloseAlgorithmProvider
pop qword [exitCode]
.exit:
mov ecx, [exitCode]
call ExitProcess
.exit_error:
mov dword [exitCode], 1
jmp .cleanup_hash
; ============================================================
; base32_decode: Decode base32 string (wide char) to binary
; Input: RSI = pointer to wide string (UTF-16LE, null-terminated)
; RDI = output buffer
; Output: RAX = number of bytes decoded
; ============================================================
base32_decode:
push rbx
push r12
push r13
push r14
push r15
mov r12, rdi ; save output pointer
xor r13d, r13d ; bit accumulator
xor r14d, r14d ; bits in accumulator
xor r15d, r15d ; output byte count
.b32_loop:
movzx eax, word [rsi] ; get wide char
add rsi, 2 ; advance to next wide char
test ax, ax ; null terminator?
jz .b32_done
; Skip padding '='
cmp al, '='
je .b32_loop
; Convert char to 5-bit value
; A-Z = 0-25, 2-7 = 26-31
cmp al, 'A'
jb .try_lower
cmp al, 'Z'
ja .try_digit
sub al, 'A' ; A-Z -> 0-25
jmp .b32_got_value
.try_lower:
cmp al, 'a'
jb .try_digit
cmp al, 'z'
ja .try_digit
sub al, 'a' ; a-z -> 0-25
jmp .b32_got_value
.try_digit:
cmp al, '2'
jb .b32_loop ; skip invalid
cmp al, '7'
ja .b32_loop ; skip invalid
sub al, '2'
add al, 26 ; 2-7 -> 26-31
.b32_got_value:
; Accumulate 5 bits
movzx eax, al
shl r13d, 5
or r13d, eax
add r14d, 5
; Output bytes when we have 8+ bits
.b32_check_output:
cmp r14d, 8
jl .b32_loop
; Extract top 8 bits
mov eax, r14d
sub eax, 8
mov ecx, eax
mov eax, r13d
shr eax, cl ; shift to get top 8 bits
mov [r12 + r15], al ; store byte
inc r15d
; Keep remaining bits
sub r14d, 8
mov cl, r14b
mov eax, 1
shl eax, cl
dec eax ; mask for remaining bits
and r13d, eax
jmp .b32_check_output
.b32_done:
mov rax, r15 ; return byte count
pop r15
pop r14
pop r13
pop r12
pop rbx
ret
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment