Skip to content

Instantly share code, notes, and snippets.

@AnatolyShirykalov
Last active February 10, 2018 21:30
Show Gist options
  • Save AnatolyShirykalov/8da7c123a5097b9825d586800ec354a1 to your computer and use it in GitHub Desktop.
Save AnatolyShirykalov/8da7c123a5097b9825d586800ec354a1 to your computer and use it in GitHub Desktop.
nasm sse sin
qsin:
movapd xmm1, xmm0 ; xmm1 = 0:x
mov rax, 1
cvtsi2sd xmm2, rax
shufpd xmm1, xmm2, 0
mulpd xmm1, xmm1 ; xmm1 = 1:x^2
shufpd xmm0, xmm0, 0 ; xmm0 = x:x
mulpd xmm1, xmm0 ; xmm1 = x:x^3
movapd xmm2, xmm0 ; xmm2 = x:x
mulpd xmm2, xmm2 ; xmm2 = x^2:x^2
mulpd xmm2, xmm2 ; xmm2 = x^4:x^4
xorpd xmm0, xmm0 ; xmm0 = 0:0
mov rax, 1
;pinsrq xmm3, rax, 0 ; sse4.1
cvtsi2sd xmm3, rax
shufpd xmm3, xmm3, 0 ; xmm3 = 1.0:1.0
mov rax, 5
cvtsi2sd xmm4, rax ; xmm4 = 0:5
movapd xmm5, xmm3 ; xmm5 = 1:1
addpd xmm5, xmm4 ; xmm5 = 1:6
divpd xmm1, xmm5 ; xmm1 = x:x^3/6
movapd xmm0, xmm1 ; xmm0 = x:x^3/6
mov rax, 2
cvtsi2sd xmm4, rax ; xmm4 = 0:2
addpd xmm4, xmm3 ; xmm4 = 1:3
mov rcx, 10
qsin_loop:
addpd xmm4, xmm3
divpd xmm1, xmm4
addpd xmm4, xmm3
divpd xmm1, xmm4
addpd xmm4, xmm3
divpd xmm1, xmm4
addpd xmm4, xmm3
divpd xmm1, xmm4
mulpd xmm1, xmm2 ; xmm1 *= x^2(n(n+1):m(m+1))
addpd xmm0, xmm1
dec rcx
jge qsin_loop
movq xmm1, xmm0
shufpd xmm0, xmm0, 1
subsd xmm0, xmm1
ret
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment