eric-hemasystems · July 26, 2023 19:15
diff --git a/js.rb b/js.rb
 # A module to execute JS in a safe sandbox. The JS engine is Spidermonkey
 # (the engine used in Firefox). The sandbox is wasmer.
 #
 # This module assume the compiled wasm spidermonkey binary is in the same
 # directory. Use one of the binaries from Mozilla directly listed here:
 #
 # https://github.com/mozilla-spidermonkey/sm-wasi-demo/blob/main/data.json
 #
 # The one on wapm is very out-dated and will not work with this module.
 #
 # For performance and to avoid excessive allocation and de-allocation the JS
 # engine in cached in memory. The binary is almost 9MB but obviously that is
 # just the binary. The runtime environment to execute the JS is higher than
 # that. I attempted to measure this by running the following loop:
 #
 #   1000.times { JS.execute('1+1') }
 #
 # With that test my machines show it to increase the size of the process by
 # about 150MB-200MB of memory usage basically from having that module always
 # loaded. Each execution will take more memory but that is reclaimed after the
 # execution.
 #
 # Ideas for future improvement are:
 #
 # 1. There are a gazillion memory related switches that can be given to the
 #    JS engine. Perhaps one of them is appropiate for our usage here and could
 #    reduce memory consumption.
 # 2. My use case is to run a variety of JS scripts and these JS scripts don't
 #    have high performance demands. If you are running the same scripts over
 #    and over and need more performance the `selfhosted-xdr-path` and
 #    `selfhosted-xdr-mode` might be useful. I think it caches the JIT bytecode
 #    information.
 class JS

  # The main interface for executing some JS code.
  #
  # Generally just one script is given but if multiple are given they are
  # executed as if concatenated together (but each argument must be a
  # syntactically valid script on it's own). The multiple code arguments might
  # be useful to prepend or add additional boilerplate that you want every
  # script to have. Examples:
  #
  #   JS.execute 'console.log(1+1)'
  #   JS.execute 'a = 1', 'console.log(a)'
  #
  # Keyword arguments become variables in the script. Example:
  #
  #   JS.execute 'console.log(a.length)', a: ['x', 'y', 'z']
  #
  # This would return 3. Anything that can be converted to JSON can be passed to
  # the script. Whatever comes back from stdout (i.e. console.log calls) is
  # returned.
  #
  # If a syntax or execution error occurs an exception will be raised which will
  # include any stderr and stdout information.
  def self.execute *code, **arguments
    arguments = arguments.map do |(var, val)|
      "#{var.to_s.parameterize.underscore} = #{val.to_json}"
    end

    new(arguments + code).()
  end

  # Exception raised with either a syntax or execution error
  class ScriptError < StandardError
    attr_reader :stdout, :stderr

    def initialize stdout, stderr, msg=nil # :nodoc:
      super msg

      @stdout = stdout
      @stderr = stderr
    end

    def to_s # :nodoc:
      <<~OUTPUT
        Standard Out: #{@stdout}
        Standard Error: #{@stderr}
      OUTPUT
    end
  end

  def initialize(code) = @code = code
  private_class_method :new

  def call # :nodoc:
    Dir.mktmpdir do |dir|
      scripts = @code.collect do |code|
        Tempfile.create(['', '.js'], dir).tap do |file|
          file.write code
          file.flush
        end
      end

      silence_warnings { execute dir, scripts }
    ensure
      scripts.each do |script|
        script.close
        File.unlink script.path
      end
    end
  end

  WASM_BIN_PATH = "#{__dir__}/js.wasm"
  private_constant :WASM_BIN_PATH

  class << self
    memoize def store = Wasmer::Store.new
    memoize def module_ = Wasmer::Module.new(store, bin)
    memoize def version = Wasmer::Wasi.get_version(module_, true)
    def preload! = silence_warnings { version }

    private

    def bin = File.read(WASM_BIN_PATH, mode: 'rb')
  end

  # Load module into memory so it's ready to go when we want to execute. This
  # also helps ensure that the memory taken by different threads of execution
  # is shared due to copy-on-write
  preload!

  private

  def execute dir, scripts
    import = env(dir, arguments(scripts)).generate_import_object store, version
    OutputCapturer.new do
      Wasmer::Instance.new(module_, import).exports._start.()
      sleep 1
    end.()
  end

  def env dir, arguments
    Wasmer::Wasi::StateBuilder.new('js.wasm')
      .arguments(arguments)
      .map_directory('/', dir)
      .finalize
  end

  def arguments(scripts) = scripts.collect { ['-f', File.basename(_1.path)] }.flatten

  delegate :store, :module_, :version, to: :class, private: true

  # Can execute a block of Ruby code and capture the stderr. If an exception
  # occurs in that block the thrown exception will contain both the stdout
  # and stderr.
  #
  # Most importantly it doesn't matter how the data arrives on stdout or stderr.
  # Could be via Ruby `puts`, could be a native ext, could be a shell command.
  # Also is thread-safe.
  class OutputCapturer

    # Initialize with block to run
    def initialize &blk
      @blk = blk
    end

    # Execute the block capturing stdio
    def call
      # Define the new IO before forking so both parent process knows what
      # file descriptor to read to get the IO.
      with_new_io do |stdout, stderr|
        fork do
          # Rebind within child process so parent process stdio is not affected
          rebind_io stdout, stderr
          @blk.()

          # Explicit exit of fork with `!` to prevent any `at_exit` callback
          # from running. Since the block executed correctly we return a
          # `true` status code to indicate all good.
          Process.exit! true
        rescue
          # Some sort of error occurred. Still call `exit!` to prevent any
          # `at_exit` handler but this time no argument to signal a problem
          Process.exit!
        end

        # Wait for child to finish. If failed then throw error with full
        # stdout and stderr
        Process.wait
        raise ScriptError.new(stdout.read, stderr.read) unless $?.exitstatus.zero?

        # No failure so only return stdout
        stdout.read
      end
    end

    private

    def with_new_io
      Tempfile.create do |stdout|
        Tempfile.create do |stderr|
          yield stdout, stderr
        end
      end
    end

    def rebind_io stdout, stderr
      $stdout.reopen stdout, 'w+'
      $stdout.sync = true

      $stderr.reopen stderr, 'w+'
      $stderr.sync = true
    end
  end
  private_constant :OutputCapturer
 end
diff --git a/js_spec.rb b/js_spec.rb
 describe JS do
  it 'executes JS' do
    output = JS.execute 'console.log("Hello World")'
    expect( output ).to eq "Hello World\n"
  end

  it 'executes multiple scripts' do
    output = JS.execute 'a = "Hello World"', 'console.log(a)'
    expect( output ).to eq "Hello World\n"
  end

  it 'executes with thread safety' do
    threads = (1..10).collect do
      Thread.new do
        output = JS.execute 'console.log("Hello World")'
        expect( output ).to eq "Hello World\n"
      end
    end
  ensure
    threads.each &:join
  end

  describe 'passing arguments' do
    it 'encodes basic data' do
      output = JS.execute 'console.log(a)', a: 'Hello World'
      expect( output ).to eq "Hello World\n"
    end

    it "encodes content with a '" do
      output = JS.execute 'console.log(a)', a: "O'Toole"
      expect( output ).to eq "O'Toole\n"
    end

    it 'encodes content with a "' do
      output = JS.execute 'console.log(a)', a: 'Some "quote" content'
      expect( output ).to eq "Some \"quote\" content\n"
    end
  end

  describe 'errors' do
    shared_examples 'error handling' do
      it 'indicates an error code' do
        expect { JS.execute code }.to raise_error JS::ScriptError
      end
    end

    context 'syntax' do
      let(:code) { 'a +' }

      include_examples 'error handling'
    end

    context 'execution' do
      let(:code) do
        <<~JS
          console.log('truncated output')
          throw "error"
        JS
      end

      include_examples 'error handling'

      it 'provides stdout up to the point of error' do
        JS.execute code
        fail 'should have raised error'
      rescue JS::ScriptError
        expect( $!.stdout ).to eq "truncated output\n"
      end
    end
  end
 end
	# A module to execute JS in a safe sandbox. The JS engine is Spidermonkey
	# (the engine used in Firefox). The sandbox is wasmer.
	#
	# This module assume the compiled wasm spidermonkey binary is in the same
	# directory. Use one of the binaries from Mozilla directly listed here:
	#
	# https://github.com/mozilla-spidermonkey/sm-wasi-demo/blob/main/data.json
	#
	# The one on wapm is very out-dated and will not work with this module.
	#
	# For performance and to avoid excessive allocation and de-allocation the JS
	# engine in cached in memory. The binary is almost 9MB but obviously that is
	# just the binary. The runtime environment to execute the JS is higher than
	# that. I attempted to measure this by running the following loop:
	#
	# 1000.times { JS.execute('1+1') }
	#
	# With that test my machines show it to increase the size of the process by
	# about 150MB-200MB of memory usage basically from having that module always
	# loaded. Each execution will take more memory but that is reclaimed after the
	# execution.
	#
	# Ideas for future improvement are:
	#
	# 1. There are a gazillion memory related switches that can be given to the
	# JS engine. Perhaps one of them is appropiate for our usage here and could
	# reduce memory consumption.
	# 2. My use case is to run a variety of JS scripts and these JS scripts don't
	# have high performance demands. If you are running the same scripts over
	# and over and need more performance the `selfhosted-xdr-path` and
	# `selfhosted-xdr-mode` might be useful. I think it caches the JIT bytecode
	# information.
	class JS

	# The main interface for executing some JS code.
	#
	# Generally just one script is given but if multiple are given they are
	# executed as if concatenated together (but each argument must be a
	# syntactically valid script on it's own). The multiple code arguments might
	# be useful to prepend or add additional boilerplate that you want every
	# script to have. Examples:
	#
	# JS.execute 'console.log(1+1)'
	# JS.execute 'a = 1', 'console.log(a)'
	#
	# Keyword arguments become variables in the script. Example:
	#
	# JS.execute 'console.log(a.length)', a: ['x', 'y', 'z']
	#
	# This would return 3. Anything that can be converted to JSON can be passed to
	# the script. Whatever comes back from stdout (i.e. console.log calls) is
	# returned.
	#
	# If a syntax or execution error occurs an exception will be raised which will
	# include any stderr and stdout information.
	def self.execute code, *arguments
	arguments = arguments.map do \|(var, val)\|
	"#{var.to_s.parameterize.underscore} = #{val.to_json}"
	end

	new(arguments + code).()
	end

	# Exception raised with either a syntax or execution error
	class ScriptError < StandardError
	attr_reader :stdout, :stderr

	def initialize stdout, stderr, msg=nil # :nodoc:
	super msg

	@stdout = stdout
	@stderr = stderr
	end

	def to_s # :nodoc:
	<<~OUTPUT
	Standard Out: #{@stdout}
	Standard Error: #{@stderr}
	OUTPUT
	end
	end

	def initialize(code) = @code = code
	private_class_method :new

	def call # :nodoc:
	Dir.mktmpdir do \|dir\|
	scripts = @code.collect do \|code\|
	Tempfile.create(['', '.js'], dir).tap do \|file\|
	file.write code
	file.flush
	end
	end

	silence_warnings { execute dir, scripts }
	ensure
	scripts.each do \|script\|
	script.close
	File.unlink script.path
	end
	end
	end

	WASM_BIN_PATH = "#{__dir__}/js.wasm"
	private_constant :WASM_BIN_PATH

	class << self
	memoize def store = Wasmer::Store.new
	memoize def module_ = Wasmer::Module.new(store, bin)
	memoize def version = Wasmer::Wasi.get_version(module_, true)
	def preload! = silence_warnings { version }

	private

	def bin = File.read(WASM_BIN_PATH, mode: 'rb')
	end

	# Load module into memory so it's ready to go when we want to execute. This
	# also helps ensure that the memory taken by different threads of execution
	# is shared due to copy-on-write
	preload!

	private

	def execute dir, scripts
	import = env(dir, arguments(scripts)).generate_import_object store, version
	OutputCapturer.new do
	Wasmer::Instance.new(module_, import).exports._start.()
	sleep 1
	end.()
	end

	def env dir, arguments
	Wasmer::Wasi::StateBuilder.new('js.wasm')
	.arguments(arguments)
	.map_directory('/', dir)
	.finalize
	end

	def arguments(scripts) = scripts.collect { ['-f', File.basename(_1.path)] }.flatten

	delegate :store, :module_, :version, to: :class, private: true

	# Can execute a block of Ruby code and capture the stderr. If an exception
	# occurs in that block the thrown exception will contain both the stdout
	# and stderr.
	#
	# Most importantly it doesn't matter how the data arrives on stdout or stderr.
	# Could be via Ruby `puts`, could be a native ext, could be a shell command.
	# Also is thread-safe.
	class OutputCapturer

	# Initialize with block to run
	def initialize &blk
	@blk = blk
	end

	# Execute the block capturing stdio
	def call
	# Define the new IO before forking so both parent process knows what
	# file descriptor to read to get the IO.
	with_new_io do \|stdout, stderr\|
	fork do
	# Rebind within child process so parent process stdio is not affected
	rebind_io stdout, stderr
	@blk.()

	# Explicit exit of fork with `!` to prevent any `at_exit` callback
	# from running. Since the block executed correctly we return a
	# `true` status code to indicate all good.
	Process.exit! true
	rescue
	# Some sort of error occurred. Still call `exit!` to prevent any
	# `at_exit` handler but this time no argument to signal a problem
	Process.exit!
	end

	# Wait for child to finish. If failed then throw error with full
	# stdout and stderr
	Process.wait
	raise ScriptError.new(stdout.read, stderr.read) unless $?.exitstatus.zero?

	# No failure so only return stdout
	stdout.read
	end
	end

	private

	def with_new_io
	Tempfile.create do \|stdout\|
	Tempfile.create do \|stderr\|
	yield stdout, stderr
	end
	end
	end

	def rebind_io stdout, stderr
	$stdout.reopen stdout, 'w+'
	$stdout.sync = true

	$stderr.reopen stderr, 'w+'
	$stderr.sync = true
	end
	end
	private_constant :OutputCapturer
	end
	describe JS do
	it 'executes JS' do
	output = JS.execute 'console.log("Hello World")'
	expect( output ).to eq "Hello World\n"
	end

	it 'executes multiple scripts' do
	output = JS.execute 'a = "Hello World"', 'console.log(a)'
	expect( output ).to eq "Hello World\n"
	end

	it 'executes with thread safety' do
	threads = (1..10).collect do
	Thread.new do
	output = JS.execute 'console.log("Hello World")'
	expect( output ).to eq "Hello World\n"
	end
	end
	ensure
	threads.each &:join
	end

	describe 'passing arguments' do
	it 'encodes basic data' do
	output = JS.execute 'console.log(a)', a: 'Hello World'
	expect( output ).to eq "Hello World\n"
	end

	it "encodes content with a '" do
	output = JS.execute 'console.log(a)', a: "O'Toole"
	expect( output ).to eq "O'Toole\n"
	end

	it 'encodes content with a "' do
	output = JS.execute 'console.log(a)', a: 'Some "quote" content'
	expect( output ).to eq "Some \"quote\" content\n"
	end
	end

	describe 'errors' do
	shared_examples 'error handling' do
	it 'indicates an error code' do
	expect { JS.execute code }.to raise_error JS::ScriptError
	end
	end

	context 'syntax' do
	let(:code) { 'a +' }

	include_examples 'error handling'
	end

	context 'execution' do
	let(:code) do
	<<~JS
	console.log('truncated output')
	throw "error"
	JS
	end

	include_examples 'error handling'

	it 'provides stdout up to the point of error' do
	JS.execute code
	fail 'should have raised error'
	rescue JS::ScriptError
	expect( $!.stdout ).to eq "truncated output\n"
	end
	end
	end
	end