Mapagmataas1331 · August 23, 2024 19:02
diff --git a/get_files_forGPT.py b/get_files_forGPT.py
 import os
 import tiktoken

 GPT_MODELS = {
    # "gpt-3.5-turbo": 4096,
    # "gpt-3.5-turbo-16k": 16384,
    # "gpt-4o": 4096,
    "gpt-4o": 4000,
    # "gpt-4": 8192,
    # "gpt-4-32k": 32768,
    # "GPT-4-turbo": 128000
 }


 def scan_files_recursive(directory):
  """Recursively scan files in the directory."""
  file_paths = []
  for root, _, files in os.walk(directory):
    for file in files:
      file_paths.append(os.path.join(root, file))
  return file_paths


 def print_directory_tree(base_path, included_paths):
  """Print a tree-like structure for files in the directory."""
  tree = {}
  for path in included_paths:
    relative_path = os.path.relpath(path, base_path)
    parts = relative_path.split(os.sep)
    node = tree
    for part in parts:
      if part not in node:
        node[part] = {}
      node = node[part]

  def print_tree(node, indent=""):
    for key, value in node.items():
      if value:
        print(f"{indent}{key}/")
        print_tree(value, indent + "  ")
      else:
        print(f"{indent}{key}")

  print_tree(tree)


 def read_file_content(file_path):
  """Read and return the content of a file."""
  try:
    with open(file_path, 'r', encoding='utf-8') as file:
      return file.read()
  except Exception as e:
    print(f"Error reading {file_path}: {e}")
    return None


 def split_content(content, max_tokens, model):
  """Split content into parts according to token limits."""
  encoding = tiktoken.encoding_for_model(model)
  tokens = encoding.encode(content)
  parts = []
  start = 0

  while start < len(tokens):
    end = start + max_tokens
    if end >= len(tokens):
      part = encoding.decode(tokens[start:])
      parts.append(part)
      break

    split_point = end
    for i in range(end, start, -1):
      if encoding.decode(tokens[i - 1:i]) == "\n":
        split_point = i
        break

    if split_point == start:
      split_point = end

    part = encoding.decode(tokens[start:split_point])
    parts.append(part)
    start = split_point

  return parts


 def save_output(output_path, global_path, output, total_files):
  """Save output to a text file, including the global path and total files at the top."""
  try:
    with open(output_path, 'w', encoding='utf-8') as file:
      file.write(f"Global Project Path: {global_path}\n")
      file.write(f"Total Files: {total_files}\n\n")
      file.write(output)
    print(f"Output saved to {output_path}")
  except Exception as e:
    print(f"Error saving output: {e}")


 def normalize_path(path):
  """Normalize the case of the path for consistent comparison."""
  return os.path.normcase(os.path.normpath(path))


 def main():
  print("\nChoose an option:")
  print("1. Scan and print file paths.")
  print("2. Scan and print file contents (no splitting).")
  print("3. Scan and print file contents (split by GPT token limits).")

  option = input("\nEnter 1, 2, or 3: ").strip()

  if option not in {"1", "2", "3"}:
    print("\nInvalid option.\n")
    return

  if option == "3":
    print("\nAvailable GPT Models:")
    for idx, (model_name, token_limit) in enumerate(GPT_MODELS.items(), 1):
      print(f"{idx}. {model_name} (Token limit: {token_limit})")

    model_choice = input("\nSelect a model by number: ").strip()
    try:
      model_choice = int(model_choice)
      if 1 <= model_choice <= len(GPT_MODELS):
        selected_model = list(GPT_MODELS.keys())[model_choice - 1]
        max_tokens = GPT_MODELS[selected_model]
        print(f"Selected model: {selected_model} (Token limit: {max_tokens})")
      else:
        print("\nInvalid selection.\n")
        return
    except ValueError:
      print("\nInvalid input.\n")
      return

  global_path = input("\nEnter the global project path: ").strip()
  if not os.path.exists(global_path):
    print("\nThe provided path does not exist.\n")
    return

  included_paths = []

  while True:
    print("\nIncluded paths:")
    print_directory_tree(global_path, included_paths)
    user_input = input(
        "\nEnter more files/folders to include/remove, type '.' for all, or type 'STOP' to finish: ").strip()

    if user_input.lower() == "stop":
      break

    user_input_path = normalize_path(os.path.join(global_path, user_input))
    if os.path.exists(user_input_path):
      if os.path.isdir(user_input_path):
        file_paths = [normalize_path(
            path) for path in scan_files_recursive(user_input_path)]
      else:
        file_paths = [user_input_path]

      output_file_path = normalize_path(
          os.path.join(global_path, "output.txt"))
      file_paths = [path for path in file_paths if path != output_file_path]

      if all(path in included_paths for path in file_paths):
        for path in file_paths:
          included_paths.remove(path)
        print(f"Removed '{user_input}' from the list.")
      else:
        for path in file_paths:
          if path not in included_paths:
            included_paths.append(path)
        print(f"Added '{user_input}' to the list.")
    else:
      print(f"Path '{user_input_path}' does not exist.")

  if not included_paths:
    print("No files selected.")
    return

  output_content = ""

  if option == "1":
    for file_path in included_paths:
      relative_path = os.path.relpath(file_path, global_path)
      output_content += relative_path + "\n"
  else:
    for file_path in included_paths:
      content = read_file_content(file_path)
      if content is None:
        continue
      relative_path = os.path.relpath(file_path, global_path)
      if option == "2":
        output_content += f"Path: {relative_path}\nContent:\n{
            content}\n\n-=-=-=-=-=-=-=-=-=-=-=\n\n"
      elif option == "3":
        parts = split_content(content, max_tokens, selected_model)
        if len(parts) == 1:
          output_content += f"Path: {relative_path}\nContent:\n{
              parts[0]}\n\n-=-=-=-=-=-=-=-=-=-=-=\n\n"
        else:
          for idx, part in enumerate(parts):
            output_content += f"Path: {relative_path} (Part {idx + 1})\nContent:\n{
                part}\n\n-=-=-=-=-=-=-=-=-=-=-=\n\n"

  total_files = len(included_paths)

  output_path = os.path.join(global_path, "output.txt")
  save_output(output_path, global_path, output_content, total_files)


 if __name__ == "__main__":
  main()
	import os
	import tiktoken

	GPT_MODELS = {
	# "gpt-3.5-turbo": 4096,
	# "gpt-3.5-turbo-16k": 16384,
	# "gpt-4o": 4096,
	"gpt-4o": 4000,
	# "gpt-4": 8192,
	# "gpt-4-32k": 32768,
	# "GPT-4-turbo": 128000
	}


	def scan_files_recursive(directory):
	"""Recursively scan files in the directory."""
	file_paths = []
	for root, _, files in os.walk(directory):
	for file in files:
	file_paths.append(os.path.join(root, file))
	return file_paths


	def print_directory_tree(base_path, included_paths):
	"""Print a tree-like structure for files in the directory."""
	tree = {}
	for path in included_paths:
	relative_path = os.path.relpath(path, base_path)
	parts = relative_path.split(os.sep)
	node = tree
	for part in parts:
	if part not in node:
	node[part] = {}
	node = node[part]

	def print_tree(node, indent=""):
	for key, value in node.items():
	if value:
	print(f"{indent}{key}/")
	print_tree(value, indent + " ")
	else:
	print(f"{indent}{key}")

	print_tree(tree)


	def read_file_content(file_path):
	"""Read and return the content of a file."""
	try:
	with open(file_path, 'r', encoding='utf-8') as file:
	return file.read()
	except Exception as e:
	print(f"Error reading {file_path}: {e}")
	return None


	def split_content(content, max_tokens, model):
	"""Split content into parts according to token limits."""
	encoding = tiktoken.encoding_for_model(model)
	tokens = encoding.encode(content)
	parts = []
	start = 0

	while start < len(tokens):
	end = start + max_tokens
	if end >= len(tokens):
	part = encoding.decode(tokens[start:])
	parts.append(part)
	break

	split_point = end
	for i in range(end, start, -1):
	if encoding.decode(tokens[i - 1:i]) == "\n":
	split_point = i
	break

	if split_point == start:
	split_point = end

	part = encoding.decode(tokens[start:split_point])
	parts.append(part)
	start = split_point

	return parts


	def save_output(output_path, global_path, output, total_files):
	"""Save output to a text file, including the global path and total files at the top."""
	try:
	with open(output_path, 'w', encoding='utf-8') as file:
	file.write(f"Global Project Path: {global_path}\n")
	file.write(f"Total Files: {total_files}\n\n")
	file.write(output)
	print(f"Output saved to {output_path}")
	except Exception as e:
	print(f"Error saving output: {e}")


	def normalize_path(path):
	"""Normalize the case of the path for consistent comparison."""
	return os.path.normcase(os.path.normpath(path))


	def main():
	print("\nChoose an option:")
	print("1. Scan and print file paths.")
	print("2. Scan and print file contents (no splitting).")
	print("3. Scan and print file contents (split by GPT token limits).")

	option = input("\nEnter 1, 2, or 3: ").strip()

	if option not in {"1", "2", "3"}:
	print("\nInvalid option.\n")
	return

	if option == "3":
	print("\nAvailable GPT Models:")
	for idx, (model_name, token_limit) in enumerate(GPT_MODELS.items(), 1):
	print(f"{idx}. {model_name} (Token limit: {token_limit})")

	model_choice = input("\nSelect a model by number: ").strip()
	try:
	model_choice = int(model_choice)
	if 1 <= model_choice <= len(GPT_MODELS):
	selected_model = list(GPT_MODELS.keys())[model_choice - 1]
	max_tokens = GPT_MODELS[selected_model]
	print(f"Selected model: {selected_model} (Token limit: {max_tokens})")
	else:
	print("\nInvalid selection.\n")
	return
	except ValueError:
	print("\nInvalid input.\n")
	return

	global_path = input("\nEnter the global project path: ").strip()
	if not os.path.exists(global_path):
	print("\nThe provided path does not exist.\n")
	return

	included_paths = []

	while True:
	print("\nIncluded paths:")
	print_directory_tree(global_path, included_paths)
	user_input = input(
	"\nEnter more files/folders to include/remove, type '.' for all, or type 'STOP' to finish: ").strip()

	if user_input.lower() == "stop":
	break

	user_input_path = normalize_path(os.path.join(global_path, user_input))
	if os.path.exists(user_input_path):
	if os.path.isdir(user_input_path):
	file_paths = [normalize_path(
	path) for path in scan_files_recursive(user_input_path)]
	else:
	file_paths = [user_input_path]

	output_file_path = normalize_path(
	os.path.join(global_path, "output.txt"))
	file_paths = [path for path in file_paths if path != output_file_path]

	if all(path in included_paths for path in file_paths):
	for path in file_paths:
	included_paths.remove(path)
	print(f"Removed '{user_input}' from the list.")
	else:
	for path in file_paths:
	if path not in included_paths:
	included_paths.append(path)
	print(f"Added '{user_input}' to the list.")
	else:
	print(f"Path '{user_input_path}' does not exist.")

	if not included_paths:
	print("No files selected.")
	return

	output_content = ""

	if option == "1":
	for file_path in included_paths:
	relative_path = os.path.relpath(file_path, global_path)
	output_content += relative_path + "\n"
	else:
	for file_path in included_paths:
	content = read_file_content(file_path)
	if content is None:
	continue
	relative_path = os.path.relpath(file_path, global_path)
	if option == "2":
	output_content += f"Path: {relative_path}\nContent:\n{
	content}\n\n-=-=-=-=-=-=-=-=-=-=-=\n\n"
	elif option == "3":
	parts = split_content(content, max_tokens, selected_model)
	if len(parts) == 1:
	output_content += f"Path: {relative_path}\nContent:\n{
	parts[0]}\n\n-=-=-=-=-=-=-=-=-=-=-=\n\n"
	else:
	for idx, part in enumerate(parts):
	output_content += f"Path: {relative_path} (Part {idx + 1})\nContent:\n{
	part}\n\n-=-=-=-=-=-=-=-=-=-=-=\n\n"

	total_files = len(included_paths)

	output_path = os.path.join(global_path, "output.txt")
	save_output(output_path, global_path, output_content, total_files)


	if __name__ == "__main__":
	main()