### ---------- SETUP ---------- ###

# python 3.12+ ( was not extensively tested on pre-3.12 versions!! )
# pip install bencodepy

## AUTHORED BY zakkarry with help from Q

### ---------- SETUP ---------- ###


### ---------- INSTRUCTIONS ---------- ###
### ---------- INSTRUCTIONS ---------- ###
### ---------- INSTRUCTIONS ---------- ###
#
# PLEASE READ THE WHOLE INSTRUCTIONS BLOCK AND DOCUMENTATION PRIOR TO USING THIS!
# THIS IS ENTIRELY OPTIONAL AND IF YOU DONT FOLLOW THESE INSTRUCTIONS YOU CAN RISK
# DATA LOSS!!!
#
# THIS IS NOT REQUIRED AND IS ENTIRELY OPTIONAL FOR LINKING STRUCTURE.
# If you have an exotic setup, this script MAY NOT work for you. Please be sure to make and make keep your backups
# in case anything goes wrong. We will create our own backups, but you should have your own anyway!
#
# If DRY_RUN set to True gives no errors or log bad reports, it should be able to run without issues.
#
# DO NOT RUN WITHOUT READING ALL OF THE INSTRUCTIONS FIRST AND VALIDATING DRY RUNS !!!!!!

# This script will convert all cross-seed torrents in client
# to link at LINK_DIR/TrackerName, as v6 `flatlinking: false` does in normal operation.
# Torrent with save paths already located inside the associated TrackerName will be ignored.
#
# WARNING: You will likely need to clean up orphaned files following this script
#
# This script does not remove the non linked/abandoned "original" torrent's data files
# I simply use this to link all the files with this script that weren't in tracker folders to an "OLD_CROSSEEDS"
# until I was sure everything was running correctly and working. Then I removed the old structured files/folders
# when I knew that what I had before was no longer needed.
#
# Torrents that are missing any/all of their files will
# immediately error in Deluge upon restarting. When you restart after running this, you will see if files are missing
# right away.

# ## INSTRUCTIONS ##
# ##
# ## PLEASE READ ALL OF THESE ONCE OR TWICE BEFORE DOING ANYTHING ELSE!
# ##
# 1. Populate ONLY the options in the CONFIG section below according to your cross-seed setup
#        THESE PATHS USED BELOW ARE HOW CROSS-SEED AND YOUR TORRENT CLIENT KNOW THE PATHS
#        IF YOU ARE USING DOCKER THESE ARE YOUR CONTAINER PATHS, NOT THE HOST SYSTEMS PATHS

# 2. If you are using docker, you'll need to get a shell/console inside the container and stop the deluged process
#
#               To do this, use "docker exec -it deluge /bin/sh"
#
#               Replace "deluge" in this command with your container name if it is not default!
#
#                         - linuxserver (lscr) containers need to get a shell in the container and run the command
#                                 s6-svc -d /run/service/svc-deluged
#
#                         - binhex containers need to run the command
#                                kill -15 $(ps aux | grep /usr/bin/deluged | awk '{print $2}')
#
#                BEFORE PROCEEDING - VALIDATE THAT DELUGEd (core) PROCESS HAS STOPPED AND NOT AUTOMATICALLY RESTARTED!
#                USE "ps aux|grep deluged" to confirm in addition to checking your webui - the webui should stay running for docker
#                native users can just stop the entire deluge "suite"
#
# 3. Non docker needs to STOP deluge in a way that it will not restart automatically (eg. systemctl or service or regular exit)
#
# 4. Once deluge has exited, Make a manual _FULL_ backup of the torrents.state and torrents.fastresume files
#       (this script will backup before modifying files as well)
#
# 5. Run the script with DRY_RUN = True (SET TO TRUE WHEN YOU DOWNLOADED THIS) and check the console output or logs for
#       the paths to ensure they are correct for you
# 6. After verifying everything is good, run the script with DRY_RUN = False to create the links and modify your torrent state file.
#       It will make a backup as well
#
# 7. After running non-dry run, retart Deluge (or restart the container) and check if any torrents are in Error state, check the
#       paths of a few of the changed torrents, pause and recheck a couple for verification
# 8. If anything is wrong, STOP Deluge (or the container) entirely, replace the .state and .fastresume files from either BACKUP, and start Deluge.
# 9. You may need to re-run the script a second time. There is no risk in retrying if something appears wrong. Just keep backups of the original files!
#        Always make sure to exit deluged beforehand.
#
# 9. If everything is correct, you will NEED to clean up/move the orphaned files as they are no longer being used by your client.
#       This script does not remove files, only creates links to the new location
#
#   PLEASE NOTE: If you have any concerns, questions, or hesitation, please contact the cross-seed discord for support.

### ---------- END INSTRUCTIONS ---------- ###
### ---------- END INSTRUCTIONS ---------- ###
### ---------- END INSTRUCTIONS ---------- ###


### ---------- CONFIG ----------
### ---------- CONFIG ----------
### ---------- CONFIG ----------

# Set to True to only log what would be done
DRY_RUN = True

# This must be the path from the prespective of this script
# It may differ from the path you have in cross-seed
# THIS WILL BE WHERE YOUR .torrent and .state and .fastresume for deluge reside!
# QUIT Deluge and BACKUP torrents.state and torrents.fastresume MANUALLY before running this script
# KEEP THE BACKUPS OF THE FILES SAFE FOR NOW!
TORRENT_DIR = "/config/state"

# this is either "symlink" or "hardlink" ONLY
LINK_TYPE = "hardlink"

# This must be the path from the prespective of this script
# It MAY differ from the path you have in cross-seed
# If script says lots of files are not found, this may be the cause
LINK_DIR = "/data/linkdir"

# The key (left side of ":") is a unique portion of the tracker url or announce id,
# the value (right side of ":") is the folder name for torrents associated with that tracker
# The url is for the tracker, NOT the website. View the tracker urls for a torrent in client (announce url) for correct url
# You don't need the entire url, just a unique part of it
# Any torrents without one of these will be ignored
# Some example tems are pre-filed with tracker url and announce id
TRACKER_NAME_URL = {
    "identifiable-url-snippet.com": "tracker_linking_subfolder",
    "thepiratebay.com": "tpb",
    "l33tx": "leetx",
}

### ---------- END CONFIG ----------
### ---------- END CONFIG ----------
### ---------- END CONFIG ----------
import logging
import os
import pickle
import re
import shutil
import sys
from collections import OrderedDict
from pathlib import Path
from typing import Any, NewType
from urllib.parse import urlparse

try:
    import bencodepy  # type: ignore
except ModuleNotFoundError:
    print(r"bencode module missing. please run 'pip install bencodepy'")
    exit()
except KeyboardInterrupt:
    exit()

# types
FastResume = NewType("FastResume", OrderedDict[bytes, Any])

assert sys.version_info >= (3, 12), "Python 3.12+ required"
assert isinstance(
    DRY_RUN, bool
), "DRY_RUN must be a boolean: DRY_RUN = True or DRY_RUN = False"
assert isinstance(
    TORRENT_DIR, str
), 'TORRENT_DIR must be a string like: TORRENT_DIR = "/path/to/folder"'
TORRENT_DIR_PATH = Path(TORRENT_DIR)
assert (
    TORRENT_DIR_PATH.exists() and TORRENT_DIR_PATH.is_dir()
), f"TORRENT_DIR does not exist or is not a folder: {TORRENT_DIR}"
assert LINK_TYPE in [
    "hardlink",
    "symlink",
], 'LINK_TYPE must be either "hardlink" or "symlink": LINK_TYPE = "hardlink"'
assert isinstance(
    LINK_DIR, str
), 'LINK_DIR must be a string like: LINK_DIR = Path("/path/to/folder")'
LINK_DIR_PATH = Path(LINK_DIR)
assert (
    Path(LINK_DIR_PATH).exists() and Path(LINK_DIR_PATH).is_dir()
), f"LINK_DIR does not exist or is not a folder: {LINK_DIR}"
assert (
    len(TRACKER_NAME_URL) > 0
), 'TRACKER_NAME_URL must be a dictionary like: TRACKER_NAME_URL = {"https://tracker.url": "tracker_name"}'

# Logging
LOG_NAME = f"{Path(__file__).stem}.log"
LOG_NAME = f"/media/file2.log"
LOG_FORMAT_DATE = "%Y-%m-%d %H:%M:%S"
LOG_FORMAT_STREAM = "%(asctime)s.%(msecs)03d %(levelname)s: %(message)s"
LOG_FORMAT_FILE = "%(asctime)s.%(msecs)03d %(levelname)s: %(message)s"
LOG_HANDLER_STREAM = logging.StreamHandler(sys.stdout)
LOG_HANDLER_FILE = logging.FileHandler(LOG_NAME)
LOG_HANDLER_STREAM.setFormatter(logging.Formatter(LOG_FORMAT_STREAM, LOG_FORMAT_DATE))
LOG_HANDLER_FILE.setFormatter(logging.Formatter(LOG_FORMAT_FILE, LOG_FORMAT_DATE))
log = logging.getLogger(LOG_NAME)
log.setLevel(logging.DEBUG)
LOG_HANDLER_STREAM.setLevel(logging.DEBUG)
LOG_HANDLER_STREAM.setLevel(logging.DEBUG)
log.addHandler(LOG_HANDLER_STREAM)
log.addHandler(LOG_HANDLER_FILE)


def sanitize_infohash(infohash: str):
    if not re.compile(r"^[a-z0-9]{40}$").match(infohash):
        return infohash
    san_string = infohash[:5] + "..." + infohash[35:]
    return san_string


def get_torrent_size(torrent_path):
    with open(torrent_path, "rb") as f:
        torrent_data = bencodepy.decode(f.read())

    info = torrent_data[b"info"]

    if b"length" in info:
        # Single-file torrent
        return info[b"length"]
    elif b"files" in info:
        # Multi-file torrent
        total_size = sum(file_sizes[b"length"] for file_sizes in info[b"files"])
        return total_size
    else:
        return 0


def get_torrent_name(torrent_path):
    with open(torrent_path, "rb") as f:
        torrent_data = bencodepy.decode(f.read())

    info = torrent_data[b"info"]

    if b"name" in info:
        return info[b"name"].decode("utf-8")  # Decode bytes to string


# Backup TORRENT_STATE_FILE
TORRENT_STATE_FILE = TORRENT_DIR_PATH / "torrents.state"
TORRENT_STATE_OLD = TORRENT_STATE_FILE.with_suffix(".state.old")
TORRENT_FAST_RESUME = TORRENT_DIR_PATH / "torrents.fastresume"

assert (
    not TORRENT_STATE_OLD.exists()
), f"TORRENT_STATE_OLD already exists, move it somewhere safe before rerunning this script: {TORRENT_STATE_OLD}"
log.info(
    f"{'(DRY RUN) ' if DRY_RUN else ''}Backing up TORRENT_STATE_FILE to TORRENT_DIR: {TORRENT_STATE_FILE}"
)
if not DRY_RUN:
    shutil.copyfile(TORRENT_STATE_FILE, TORRENT_STATE_OLD)
    assert (
        TORRENT_STATE_OLD.exists()
    ), f"Failed to copy TORRENT_STATE_FILE state file to TORRENT_STATE_OLD: {TORRENT_STATE_FILE} -> {TORRENT_STATE_OLD}"

TORRENT_CATEGORY_SUFFIX = ".cross-seed"  # Not bytes

log.info(
    f"{'(DRY RUN) ' if DRY_RUN else ''}Processing cross-seed torrents in TORRENT_DIR: {TORRENT_DIR}"
)
total_linked: int = 0

state_file = open(TORRENT_STATE_FILE, "rb")
state = pickle.load(state_file)
state_file.close()
state_modified = False

fastresume: FastResume = bencodepy.decode_from_file(TORRENT_FAST_RESUME)  # type: ignore
fastresume_decoded = {k.decode(): v for k, v in fastresume.items()}
for torrent in state.torrents:
    migrated_full_path: None | Path = None
    migrated_path: None | str = None
    torrent_infohash: str = torrent.torrent_id
    torrent_name: str = get_torrent_name(
        TORRENT_DIR_PATH / f"{torrent_infohash}.torrent"
    )
    torrent_tracker_count: int = len(torrent.trackers)
    torrent_tracker_urls: list[str] = [
        torrent.trackers[x].get("url", None) for x in range(torrent_tracker_count)
    ]
    torrent_is_finished: bool = torrent.is_finished
    torrent_save_path: Path = torrent.save_path
    torrent_total_size: int = int(
        round(
            get_torrent_size(TORRENT_DIR_PATH / f"{torrent_infohash}.torrent")
            / 1024
            / 1024,
            1,
        )
    )
    to_decode_infohash = None
    if torrent_infohash in fastresume_decoded:
        to_decode_infohash = fastresume_decoded.get(torrent_infohash, None)

    if isinstance(to_decode_infohash, bytes):
        torrent_fast_resume = bencodepy.decode(to_decode_infohash)
    else:
        continue

    torrent_is_renamed = torrent_fast_resume.get(b"mapped_files", None)
    if torrent_is_renamed:
        continue
    torrent_total_downloaded: int = int(
        round(torrent_fast_resume.get(b"total_downloaded", 0) / 1024 / 1024, 1)
    )
    torrent_download_percent: int = round(torrent_total_downloaded / torrent_total_size)

    if (
        torrent_save_path
        and len(torrent_tracker_urls) >= 1
        and (val in torrent_save_path for val in TRACKER_NAME_URL.values())
    ):
        results: dict[str, bool] = {
            key: any(key in url for url in torrent_tracker_urls)
            for key in TRACKER_NAME_URL
        }
        for key, result in results.items():
            if not result:
                continue
            else:
                migrated_path = TRACKER_NAME_URL.get(key, None)
                migrated_full_path = LINK_DIR_PATH / migrated_path
                break

        if (
            not migrated_path
            or migrated_path in torrent_save_path
            or (torrent_download_percent > 0.8)
        ):
            continue
        print("\n")
        log.debug(
            f"(LINKING) {urlparse(torrent_tracker_urls[0]).netloc} in {sanitize_infohash(torrent_name)} ({sanitize_infohash(torrent_infohash)})"
        )
        log.debug(
            f"\t\t(downloaded: {torrent_total_downloaded}MB/{torrent_total_size}MB) - {round(torrent_download_percent * 100, 1)}%"
        )
        log.debug(f"\t\t(save_path: {torrent_save_path} -> {migrated_full_path})")
        state_modified = True

        torrent.save_path = torrent.save_path.replace(
            torrent_save_path, f"{migrated_full_path}"
        )
        torrent_src_path = Path(torrent_save_path) / torrent_name
        torrent_dest_path = Path(migrated_full_path) / torrent_name

        if torrent_src_path.is_dir():
            for root, dirs, files in os.walk(torrent_src_path):
                rel_path = Path(os.path.relpath(root, torrent_src_path))
                dest_dir = Path(os.path.join(torrent_dest_path, rel_path))
                if not os.path.exists(dest_dir):
                    if not DRY_RUN:
                        dest_dir.mkdir(parents=True, exist_ok=True)
                    log.debug(f"(MKDIR) making parent directory -> {dest_dir}")
                file_count = 1
                for file in files:
                    source_file = Path(os.path.join(root, file))
                    dest_file = Path(os.path.join(dest_dir, file))
                    torrent_type = (
                        "of a multi-file" if len(files) > 1 else "single-file"
                    )
                    torrent_file_count = (
                        f"({file_count}/{len(files)}) " if len(files) > 1 else ""
                    )

                    log.debug(
                        f"({LINK_TYPE.upper()}) linking {torrent_file_count}{torrent_type} torrent ({sanitize_infohash(torrent_infohash)})\n\t\t\t\t\tSRC: {source_file}\n\t\t\t\t\tDEST: {dest_file}"
                    )
                    if not DRY_RUN:
                        if not dest_file.exists():
                            if LINK_TYPE == "hardlink":
                                dest_file.hardlink_to(source_file)
                            else:
                                dest_file.symlink_to(source_file)
                        file_count += 1
            total_linked += 1
        else:
            log.debug(
                f"({LINK_TYPE.upper()}) linking single-file torrent ({sanitize_infohash(torrent_infohash)})\n\t\t\t\t\tSRC: {torrent_src_path}\n\t\t\t\t\tDEST: {torrent_dest_path}"
            )
            if not DRY_RUN:
                if not torrent_dest_path.exists():
                    if LINK_TYPE == "hardlink":
                        torrent_dest_path.hardlink_to(torrent_src_path)
                    else:
                        torrent_dest_path.symlink_to(torrent_src_path)
                total_linked += 1
print()
if state_modified:
    if not DRY_RUN:
        log.debug(f"YOU ARE NOT RUNNING IN DRY RUN!!!\n")
        shutil.copyfile(TORRENT_STATE_FILE, TORRENT_STATE_OLD)
        state_file = open(TORRENT_STATE_FILE, "wb")
        pickle.dump(state, state_file)
        state_file.close()
    else:
        log.debug(f"YOU ARE RUNNING IN DRY RUN!!!\n")
else:
    log.debug("No modifications were determined to need to be done!\n")

log.info(
    f"{'(DRY RUN) ' if DRY_RUN else ''}Total newly linked torrents: {total_linked}"
)