Thomascountz · July 25, 2025 12:36
diff --git a/apple_memos_transcription_extractor.rb b/apple_memos_transcription_extractor.rb
 require "json"

 class AppleMemosTranscriptionExtractor
  class TsrpAtomNotFoundError < StandardError; end

  class TranscriptDataInvalidError < StandardError; end

  class MalformedAtomError < StandardError; end

  RECORDINGS_PATH = "#{Dir.home}/Library/Group Containers/group.com.apple.VoiceMemos.shared/Recordings".freeze

  COMPACT_HEADER_SIZE = 8
  EXTENDED_HEADER_SIZE = 16

  # Path of parent atoms leading to the 'udta' atom typically containing 'tsrp'.
  # This is an optimization based on observed Voice Memo M4A structure.
  # A more generic parser might traverse all container atoms.
  PATH_TO_TSRP = %w[moov trak mdia udta].freeze
  TSRP_ATOM_TYPE = "tsrp".freeze

  def extract_transcript(file_path)
    tsrp_payload = find_tsrp_atom_payload_in_file(file_path)

    if tsrp_payload.nil?
      raise TsrpAtomNotFoundError, "Could not find 'tsrp' atom in #{file_path}"
    end

    parse_payload_and_extract_text(tsrp_payload, file_path)
  end

  private

  def find_tsrp_atom_payload_in_file(file_path)
    File.open(file_path, "rb") do |io|
      search_for_tsrp_recursively(io, io.size)
    end
  end

  def search_for_tsrp_recursively(io, search_end_offset)
    while io.pos < search_end_offset
      atom_start = io.pos
      atom_header = read_atom_header(io, search_end_offset)

      return nil unless atom_header

      atom_size, atom_type, header_size = atom_header

      io.seek(atom_start + header_size)

      atom_end = [atom_start + atom_size, search_end_offset].min
      payload_size = atom_end - io.pos

      if payload_size < 0
        raise MalformedAtomError, "Negative payload size for atom '#{atom_type}' at offset #{atom_start}"
      end

      if atom_type == TSRP_ATOM_TYPE
        return io.read(payload_size)
      elsif PATH_TO_TSRP.include?(atom_type)
        found_payload = search_for_tsrp_recursively(io, atom_end)
        return found_payload if found_payload

        io.seek(atom_end)
      else
        io.seek(atom_end)
      end
    end
    nil
  end

  # See: https://developer.apple.com/documentation/quicktime-file-format/atoms#Atom-structure
  def read_atom_header(io, search_end_offset)
    atom_start = io.pos

    initial_header = io.read(COMPACT_HEADER_SIZE)
    return nil if initial_header.nil? || initial_header.length < COMPACT_HEADER_SIZE

    size_field = initial_header[0, 4].unpack1("N") # 32-bit unsigned integer, network (big-endian)
    atom_type = initial_header[4, 4]

    if size_field == 1 # Extended size: 64-bit unsigned integer, big-endian
      extended_size_bytes = io.read(8)
      atom_size = extended_size_bytes.unpack1("Q>")
      header_size = EXTENDED_HEADER_SIZE
    elsif size_field == 0 # Atom extends to end of search area
      atom_size = search_end_offset - atom_start
      header_size = COMPACT_HEADER_SIZE
    else # Compact atom header (standard)
      atom_size = size_field
      header_size = COMPACT_HEADER_SIZE
    end

    if atom_size < header_size && atom_size != 0
      raise MalformedAtomError, "Malformed atom: type '#{atom_type}', size #{atom_size} < header_size #{header_size} at offset #{atom_start}"
    end

    [atom_size, atom_type, header_size]
  end

  def parse_payload_and_extract_text(payload_string, file_path)
    begin
      parsed_json = JSON.parse(payload_string.force_encoding("UTF-8").scrub)
    rescue JSON::ParserError => e
      raise TranscriptDataInvalidError, "Invalid JSON in transcript data: #{e.message}"
    end

    if !parsed_json.is_a?(Hash)
      raise TranscriptDataInvalidError, "Transcript data root is not a JSON object"
    end

    runs_data = parsed_json.dig("attributedString", "runs")

    if !runs_data.is_a?(Array)
      raise TranscriptDataInvalidError, "Expected 'attributedString.runs' to be an array"
    end

    runs_data.each_slice(2).map { |text_segment, _| text_segment }.join("")
  end
 end

 if __FILE__ == $PROGRAM_NAME
  if ARGV.empty?
    warn "Usage: ruby #{$PROGRAM_NAME} <path_to_m4a_file>"
    warn "Example: ruby #{$PROGRAM_NAME} \"#{AppleMemosTranscriptionExtractor::RECORDINGS_PATH}/My Recording.m4a\""
    exit 1
  end

  file_path = ARGV[0]
  extractor = AppleMemosTranscriptionExtractor.new

  begin
    transcript = extractor.extract_transcript(file_path)
    puts transcript
  rescue AppleMemosTranscriptionExtractor::TsrpAtomNotFoundError => e
    warn "Error: #{e.message}"
    exit 2
  rescue AppleMemosTranscriptionExtractor::TranscriptDataInvalidError, AppleMemosTranscriptionExtractor::MalformedAtomError => e
    warn "Error: #{e.message}"
    exit 3
  rescue Errno::ENOENT
    warn "Error: File not found at '#{file_path}'"
    exit 4
  rescue Errno::EACCES
    warn "Error: Permission denied for file '#{file_path}'"
    exit 5
  rescue => e
    warn "An unexpected error occurred: #{e.class} - #{e.message}"
    warn "Backtrace (run with DEBUG=1 for more details):"
    warn e.backtrace.first(5).join("\n")
    if ENV["DEBUG"]
      warn "\nFull Backtrace:\n#{e.backtrace.join("\n")}"
    end
    exit 1
  end
 end

 __END__

 MIT License

 Copyright (c) 2025 Thomas Countz

 Permission is hereby granted, free of charge, to any person obtaining a copy
 of this software and associated documentation files (the "Software"), to deal
 in the Software without restriction, including without limitation the rights
 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 copies of the Software, and to permit persons to whom the Software is
 furnished to do so, subject to the following conditions:

 The above copyright notice and this permission notice shall be included in all
 copies or substantial portions of the Software.

 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 SOFTWARE.
	require "json"

	class AppleMemosTranscriptionExtractor
	class TsrpAtomNotFoundError < StandardError; end

	class TranscriptDataInvalidError < StandardError; end

	class MalformedAtomError < StandardError; end

	RECORDINGS_PATH = "#{Dir.home}/Library/Group Containers/group.com.apple.VoiceMemos.shared/Recordings".freeze

	COMPACT_HEADER_SIZE = 8
	EXTENDED_HEADER_SIZE = 16

	# Path of parent atoms leading to the 'udta' atom typically containing 'tsrp'.
	# This is an optimization based on observed Voice Memo M4A structure.
	# A more generic parser might traverse all container atoms.
	PATH_TO_TSRP = %w[moov trak mdia udta].freeze
	TSRP_ATOM_TYPE = "tsrp".freeze

	def extract_transcript(file_path)
	tsrp_payload = find_tsrp_atom_payload_in_file(file_path)

	if tsrp_payload.nil?
	raise TsrpAtomNotFoundError, "Could not find 'tsrp' atom in #{file_path}"
	end

	parse_payload_and_extract_text(tsrp_payload, file_path)
	end

	private

	def find_tsrp_atom_payload_in_file(file_path)
	File.open(file_path, "rb") do \|io\|
	search_for_tsrp_recursively(io, io.size)
	end
	end

	def search_for_tsrp_recursively(io, search_end_offset)
	while io.pos < search_end_offset
	atom_start = io.pos
	atom_header = read_atom_header(io, search_end_offset)

	return nil unless atom_header

	atom_size, atom_type, header_size = atom_header

	io.seek(atom_start + header_size)

	atom_end = [atom_start + atom_size, search_end_offset].min
	payload_size = atom_end - io.pos

	if payload_size < 0
	raise MalformedAtomError, "Negative payload size for atom '#{atom_type}' at offset #{atom_start}"
	end

	if atom_type == TSRP_ATOM_TYPE
	return io.read(payload_size)
	elsif PATH_TO_TSRP.include?(atom_type)
	found_payload = search_for_tsrp_recursively(io, atom_end)
	return found_payload if found_payload

	io.seek(atom_end)
	else
	io.seek(atom_end)
	end
	end
	nil
	end

	# See: https://developer.apple.com/documentation/quicktime-file-format/atoms#Atom-structure
	def read_atom_header(io, search_end_offset)
	atom_start = io.pos

	initial_header = io.read(COMPACT_HEADER_SIZE)
	return nil if initial_header.nil? \|\| initial_header.length < COMPACT_HEADER_SIZE

	size_field = initial_header[0, 4].unpack1("N") # 32-bit unsigned integer, network (big-endian)
	atom_type = initial_header[4, 4]

	if size_field == 1 # Extended size: 64-bit unsigned integer, big-endian
	extended_size_bytes = io.read(8)
	atom_size = extended_size_bytes.unpack1("Q>")
	header_size = EXTENDED_HEADER_SIZE
	elsif size_field == 0 # Atom extends to end of search area
	atom_size = search_end_offset - atom_start
	header_size = COMPACT_HEADER_SIZE
	else # Compact atom header (standard)
	atom_size = size_field
	header_size = COMPACT_HEADER_SIZE
	end

	if atom_size < header_size && atom_size != 0
	raise MalformedAtomError, "Malformed atom: type '#{atom_type}', size #{atom_size} < header_size #{header_size} at offset #{atom_start}"
	end

	[atom_size, atom_type, header_size]
	end

	def parse_payload_and_extract_text(payload_string, file_path)
	begin
	parsed_json = JSON.parse(payload_string.force_encoding("UTF-8").scrub)
	rescue JSON::ParserError => e
	raise TranscriptDataInvalidError, "Invalid JSON in transcript data: #{e.message}"
	end

	if !parsed_json.is_a?(Hash)
	raise TranscriptDataInvalidError, "Transcript data root is not a JSON object"
	end

	runs_data = parsed_json.dig("attributedString", "runs")

	if !runs_data.is_a?(Array)
	raise TranscriptDataInvalidError, "Expected 'attributedString.runs' to be an array"
	end

	runs_data.each_slice(2).map { \|text_segment, _\| text_segment }.join("")
	end
	end

	if __FILE__ == $PROGRAM_NAME
	if ARGV.empty?
	warn "Usage: ruby #{$PROGRAM_NAME} <path_to_m4a_file>"
	warn "Example: ruby #{$PROGRAM_NAME} \"#{AppleMemosTranscriptionExtractor::RECORDINGS_PATH}/My Recording.m4a\""
	exit 1
	end

	file_path = ARGV[0]
	extractor = AppleMemosTranscriptionExtractor.new

	begin
	transcript = extractor.extract_transcript(file_path)
	puts transcript
	rescue AppleMemosTranscriptionExtractor::TsrpAtomNotFoundError => e
	warn "Error: #{e.message}"
	exit 2
	rescue AppleMemosTranscriptionExtractor::TranscriptDataInvalidError, AppleMemosTranscriptionExtractor::MalformedAtomError => e
	warn "Error: #{e.message}"
	exit 3
	rescue Errno::ENOENT
	warn "Error: File not found at '#{file_path}'"
	exit 4
	rescue Errno::EACCES
	warn "Error: Permission denied for file '#{file_path}'"
	exit 5
	rescue => e
	warn "An unexpected error occurred: #{e.class} - #{e.message}"
	warn "Backtrace (run with DEBUG=1 for more details):"
	warn e.backtrace.first(5).join("\n")
	if ENV["DEBUG"]
	warn "\nFull Backtrace:\n#{e.backtrace.join("\n")}"
	end
	exit 1
	end
	end

	__END__

	MIT License

	Copyright (c) 2025 Thomas Countz

	Permission is hereby granted, free of charge, to any person obtaining a copy
	of this software and associated documentation files (the "Software"), to deal
	in the Software without restriction, including without limitation the rights
	to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
	copies of the Software, and to permit persons to whom the Software is
	furnished to do so, subject to the following conditions:

	The above copyright notice and this permission notice shall be included in all
	copies or substantial portions of the Software.

	THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
	IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
	FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
	AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
	LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
	OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
	SOFTWARE.
No results found