|
require 'dropbox-sign' |
|
require 'fileutils' |
|
require 'json' |
|
|
|
# File paths for caching and tracking progress. |
|
CACHE_FILE = "./signature_requests_cache.json" |
|
COMPLETED_FILE = "./completed_downloads.json" |
|
FAILED_FILE = "./failed_downloads.json" |
|
|
|
# Configure your HelloSign API credentials. |
|
Dropbox::Sign.configure do |config| |
|
# Configure HTTP basic authorization: api_key |
|
config.username = "API KEY" |
|
end |
|
|
|
|
|
# Helper method to sanitize strings for file names. |
|
def sanitize_filename(str) |
|
# Replace any character that is not alphanumeric, dot, underscore, or hyphen with an underscore. |
|
str.gsub(/[^0-9A-Za-z.\-_]/, '_') |
|
end |
|
|
|
|
|
# Helper method to fetch a page with retries on errors (especially 429 errors). |
|
def fetch_signature_request_page(signature_request_api, page_size, page_number, retries = 3) |
|
begin |
|
result = signature_request_api.signature_request_list({ |
|
account_id: 'all', |
|
page_size: page_size, |
|
page: page_number, |
|
complete: true |
|
}) |
|
# Debug: print the page info from the API response if available. |
|
if result.list_info.respond_to?(:page) |
|
puts "DEBUG: Received page #{result.list_info.page} of #{result.list_info.num_pages}." |
|
else |
|
puts "DEBUG: Fetched page #{page_number} (API did not return explicit page info)." |
|
end |
|
return result |
|
rescue StandardError => e |
|
if e.message.include?("429") && retries > 0 |
|
wait_time = 20 |
|
puts "Received 429 error on page #{page_number}. Retrying in #{wait_time} seconds..." |
|
sleep(wait_time) |
|
return fetch_signature_request_page(signature_request_api, page_size, page_number, retries - 1) |
|
else |
|
puts "Error fetching page #{page_number}: #{e.message}" |
|
raise e |
|
end |
|
end |
|
end |
|
|
|
|
|
# Retrieves all complete signature requests using pagination. |
|
# By including complete: true in the query params, only requests with a signed document are returned. |
|
def get_all_signature_requests |
|
if File.exist?(CACHE_FILE) |
|
puts "Loading cached signature requests from #{CACHE_FILE}..." |
|
data = JSON.parse(File.read(CACHE_FILE)) |
|
puts "Loaded #{data.size} signature request(s) from cache." |
|
return data |
|
end |
|
|
|
signature_request_api = Dropbox::Sign::SignatureRequestApi.new |
|
page_size = 100 |
|
signature_requests = [] |
|
|
|
# Initial call to get pagination info. |
|
initial_result = fetch_signature_request_page(signature_request_api, page_size, 1) |
|
total_pages = initial_result.list_info.num_pages |
|
puts "Total pages to process: #{total_pages}" |
|
|
|
(1..total_pages).each do |page_number| |
|
puts "\nProcessing page #{page_number} of #{total_pages}..." |
|
result = fetch_signature_request_page(signature_request_api, page_size, page_number) |
|
current_page_count = result.signature_requests.size |
|
puts "DEBUG: Processing page #{page_number}, received #{current_page_count} requests." |
|
|
|
result.signature_requests.each do |req| |
|
current_req = { |
|
"signature_request_id" => req.signature_request_id, |
|
"title" => req.title, |
|
"requester_email_address" => req.requester_email_address, |
|
"recipient_email_address" => (req.respond_to?(:signatures) && req.signatures && !req.signatures.empty?) ? |
|
req.signatures.first.signer_email_address : "unknown" |
|
} |
|
|
|
if signature_requests.any? { |r| r["signature_request_id"] == current_req["signature_request_id"] } |
|
puts "DEBUG: Duplicate encountered on page #{page_number}: #{current_req["signature_request_id"]} (title: #{current_req["title"]}, requester: #{current_req["requester_email_address"]}, recipient: #{current_req["recipient_email_address"]})" |
|
else |
|
signature_requests << current_req |
|
end |
|
end |
|
|
|
puts " Retrieved #{current_page_count} signature request(s) from page #{page_number}." |
|
puts " Running total (unique complete requests so far): #{signature_requests.size}" |
|
end |
|
|
|
# Option 1: Deduplicate by signature_request_id (one file per unique signature request) |
|
puts "\nFinished retrieving signature requests." |
|
puts "Total unique complete signature requests retrieved: #{signature_requests.size}" |
|
File.write(CACHE_FILE, JSON.pretty_generate(signature_requests)) |
|
puts "Cached signature requests to #{CACHE_FILE}." |
|
signature_requests |
|
|
|
# Option 2: Comment out the deduplication if you want to process every record (even duplicates) |
|
# puts "\nFinished retrieving signature requests." |
|
# puts "Total complete signature requests retrieved: #{signature_requests.size}" |
|
# File.write(CACHE_FILE, JSON.pretty_generate(signature_requests)) |
|
# puts "Cached signature requests to #{CACHE_FILE}." |
|
# return signature_requests |
|
end |
|
|
|
|
|
# Helper function to download a file with retry logic. |
|
# |
|
# This method attempts to download the PDF for a signature request. |
|
# If a 429 error (Too Many Requests) is encountered, it will wait (20 seconds) |
|
# and retry up to max_attempts. |
|
# |
|
# After copying the file from the temporary location to the destination, |
|
# it explicitly closes and removes the temporary file. |
|
def download_with_retry(signature_request_api, req, file_path, attempt=1, max_attempts=3) |
|
begin |
|
file_bin = signature_request_api.signature_request_files(req["signature_request_id"], { file_type: "pdf" }) |
|
|
|
# Copy the file from the temporary location to the destination. |
|
FileUtils.cp(file_bin.path, file_path) |
|
|
|
# Explicitly close and remove the temporary file. |
|
file_bin.close if file_bin.respond_to?(:close) |
|
if file_bin.respond_to?(:unlink) |
|
file_bin.unlink |
|
elsif file_bin.respond_to?(:delete) |
|
file_bin.delete |
|
end |
|
|
|
return true |
|
rescue StandardError => e |
|
if e.message.include?("HTTP status code: 429") |
|
if attempt < max_attempts |
|
wait_time = 20 # Reduced wait time as per your configuration. |
|
puts "Received 429 for request #{req["signature_request_id"]} (attempt #{attempt} of #{max_attempts}). Retrying in #{wait_time} seconds..." |
|
sleep(wait_time) |
|
return download_with_retry(signature_request_api, req, file_path, attempt + 1, max_attempts) |
|
else |
|
puts "Exceeded maximum retry attempts for request #{req["signature_request_id"]}." |
|
return false |
|
end |
|
else |
|
puts "Error downloading request #{req["signature_request_id"]}: #{e.message}" |
|
return false |
|
end |
|
end |
|
end |
|
|
|
|
|
# Downloads the PDF files for each signature request in batches of 25. |
|
# Tracks and caches completed downloads so that the process can be resumed. |
|
def download_requests(requests) |
|
signature_request_api = Dropbox::Sign::SignatureRequestApi.new |
|
batch_size = 25 |
|
|
|
# Ensure the output directory exists. |
|
FileUtils.mkdir_p('./files') |
|
|
|
# Load the list of completed downloads. |
|
completed = if File.exist?(COMPLETED_FILE) |
|
JSON.parse(File.read(COMPLETED_FILE)) |
|
else |
|
[] |
|
end |
|
|
|
# Load the list of previous failures (so we don't add duplicates). |
|
failures = if File.exist?(FAILED_FILE) |
|
JSON.parse(File.read(FAILED_FILE)) |
|
else |
|
[] |
|
end |
|
|
|
# Filter out requests that have already been processed OR are missing a valid signature_request_id. |
|
requests_to_download = requests.reject do |req| |
|
id = req["signature_request_id"].to_s.strip |
|
id.empty? || completed.include?(id) |
|
end |
|
|
|
total_requests = requests_to_download.size |
|
total_batches = (total_requests / batch_size.to_f).ceil |
|
processed_files_count = 0 |
|
|
|
puts "\nStarting downloads..." |
|
puts "Total requests to download (skipping completed ones): #{total_requests}" |
|
|
|
requests_to_download.each_slice(batch_size).with_index do |batch, batch_index| |
|
current_batch_number = batch_index + 1 |
|
start_time = Time.now |
|
|
|
puts "\nProcessing batch #{current_batch_number} of #{total_batches}..." |
|
|
|
batch.each do |req| |
|
# Build a descriptive file name using the sender's email, recipient's email, and request title. |
|
sender_email = req["requester_email_address"] || "unknown" |
|
recipient_email = req["recipient_email_address"] || "unknown" |
|
request_title = req["title"] || "untitled" |
|
sanitized_sender = sanitize_filename(sender_email) |
|
sanitized_recipient = sanitize_filename(recipient_email) |
|
sanitized_title = sanitize_filename(request_title) |
|
file_path = "./files/#{sanitized_sender}_#{sanitized_recipient}_#{sanitized_title}_#{req["signature_request_id"]}.pdf" |
|
|
|
if download_with_retry(signature_request_api, req, file_path) |
|
processed_files_count += 1 |
|
puts " Successfully downloaded request #{req["signature_request_id"]} to:" |
|
puts " #{file_path} (Total downloaded in this run: #{processed_files_count})" |
|
completed << req["signature_request_id"] |
|
File.write(COMPLETED_FILE, JSON.pretty_generate(completed)) |
|
else |
|
puts " Failed to download request #{req["signature_request_id"]} after retries." |
|
|
|
# Check if we already have this failure recorded to avoid duplicates. |
|
unless failures.any? { |f| f["signature_request_id"] == req["signature_request_id"] } |
|
failures << { |
|
"signature_request_id" => req["signature_request_id"], |
|
"title" => req["title"], |
|
"requester_email_address" => req["requester_email_address"], |
|
"recipient_email_address" => req["recipient_email_address"] |
|
} |
|
File.write(FAILED_FILE, JSON.pretty_generate(failures)) |
|
end |
|
end |
|
end |
|
|
|
files_remaining = total_requests - processed_files_count |
|
batches_remaining = total_batches - current_batch_number |
|
puts "Completed batch #{current_batch_number}/#{total_batches}: #{batch.size} file(s) processed in this batch." |
|
puts "Total files downloaded so far: #{processed_files_count} of #{total_requests}." |
|
puts "Batches remaining: #{batches_remaining} | Files remaining: #{files_remaining}" |
|
|
|
# Enforce the rate limit for high-tier endpoints (25 requests per minute). |
|
elapsed = Time.now - start_time |
|
sleep_time = 60 - elapsed |
|
if sleep_time > 0 && current_batch_number < total_batches |
|
puts "Sleeping for #{sleep_time.round} seconds to respect rate limit..." |
|
sleep(sleep_time) |
|
end |
|
end |
|
|
|
puts "\nDownload process completed." |
|
puts "Failed requests are logged in '#{FAILED_FILE}' (#{failures.size} failures recorded)." |
|
end |
|
|
|
# Main execution flow. |
|
# Load (or retrieve) signature requests and then download the pending ones. |
|
requests = get_all_signature_requests |
|
download_requests(requests) |