import threading
from pathlib import Path
from typing import TypeVar

from loguru import logger

from imbue_core.concurrency_group import ConcurrencyGroup
from imbue_core.constants import ExceptionPriority
from imbue_core.pydantic_serialization import MutableModel
from imbue_core.thread_utils import ObservableThread
from imbue_core.thread_utils import log_exception
from sculptor.interfaces.agents.agent import DockerEnvironment
from sculptor.interfaces.agents.agent import LocalSyncSetupStep
from sculptor.interfaces.environments.base import Environment
from sculptor.services.git_repo_service.default_implementation import LocalWritableGitRepo
from sculptor.services.git_repo_service.ref_namespace_stasher import checkout_branch_maybe_stashing_as_we_go
from sculptor.services.local_sync_service._debounce_and_watchdog_helpers import SlightlySaferObserver
from sculptor.services.local_sync_service._misc_utils_and_constants import ConcurrencyGroupController
from sculptor.services.local_sync_service._misc_utils_and_constants import join_background_thread_and_log_exceptions
from sculptor.services.local_sync_service._periodic_health_checker import LocalSyncHealthChecker
from sculptor.services.local_sync_service._watchmedo_via_environment import (
    hack_watchmedo_watcher_into_watchdog_event_queue,
)
from sculptor.services.local_sync_service.api import LocalSyncSessionState
from sculptor.services.local_sync_service.api import SyncSessionInfo
from sculptor.services.local_sync_service.errors import ExpectedStartupBlocker
from sculptor.services.local_sync_service.errors import ExpectedSyncStartupError
from sculptor.services.local_sync_service.errors import MutagenSyncError
from sculptor.services.local_sync_service.errors import SyncCleanupError
from sculptor.services.local_sync_service.git_branch_sync import RepoBranchSyncReconciler
from sculptor.services.local_sync_service.local_sync_update_messenger import LocalSyncUpdateMessengerAPI
from sculptor.services.local_sync_service.mutagen_filetree_sync import LocalSyncGitStateGuardian
from sculptor.services.local_sync_service.mutagen_filetree_sync import MutagenSyncSession
from sculptor.services.local_sync_service.mutagen_filetree_sync import MutagenSyncSessionReconciler
from sculptor.services.local_sync_service.mutagen_filetree_sync import create_bidirectional_user_prioritized_sync
from sculptor.services.local_sync_service.mutagen_filetree_sync import overwrite_local_with_remote_once
from sculptor.services.local_sync_service.path_batch_scheduler import DEFAULT_LOCAL_SYNC_DEBOUNCE_SECONDS
from sculptor.services.local_sync_service.path_batch_scheduler import DEFAULT_LOCAL_SYNC_MAX_DEBOUNCE_SECONDS
from sculptor.services.local_sync_service.path_batch_scheduler import LocalSyncPathBatchScheduler
from sculptor.services.local_sync_service.path_batch_scheduler import LocalSyncPathBatchSchedulerStatus
from sculptor.services.local_sync_service.path_batch_scheduler import register_batch_scheduler_with_observer
from sculptor.utils.timeout import log_runtime

ExceptionT = TypeVar("ExceptionT", bound=Exception)


def _validate_branches_are_safely_syncable(
    syncer: "RepoBranchSyncReconciler", info: SyncSessionInfo, is_stashing_ok: bool
) -> None:
    """
    Raises an ExpectedSyncStartupError if:
      - agent branch has gone missing
      - the agent's repo is not in the correct agent branch
      - user is ahead of agent branch, because then their changes would get clobbered
      - branches are divergent
      - user local checkout is dirty in any way
    Compositing the error messages if multiple are true so user doesn't have to do as many round-trips.
    """
    branch_name = syncer.branch_name
    does_agent_branch_exist = syncer.does_agent_branch_exist()
    if does_agent_branch_exist:
        # doesn't have message/blockers as it failing is undefined behavior (maybe git corruption)
        syncer.ensure_branch_is_mirrored_locally_or_fail()

    if not syncer.is_agent_branch_checked_out():
        if does_agent_branch_exist:
            messages = [f"Agent's repo must be in {branch_name} branch."]
            blockers = [ExpectedStartupBlocker.AGENT_REPO_WRONG_BRANCH]
        else:
            messages = [f"Agent branch {branch_name} not found in agent's repo."]
            blockers = [ExpectedStartupBlocker.AGENT_BRANCH_MISSING]

    elif syncer.is_user_head_equal_to_agent_head() or syncer.is_agent_a_fastforward_ahead_of_user():
        messages = []
        blockers = []

    elif syncer.is_user_a_fastforward_ahead_of_agent():
        messages = [f"Must push to agent: There are local commits to {branch_name} that would be lost."]
        blockers = [ExpectedStartupBlocker.USER_BRANCH_AHEAD_OF_AGENT]

    else:
        # no one is ahead and we aren't equal, must be diverged
        messages = [f"Must merge into agent: local and agent histories have diverged for {branch_name}."]
        blockers = [ExpectedStartupBlocker.BRANCHES_DIVERGED]

    user_status = syncer.user_repo.repo.get_current_status()
    if user_status.is_in_intermediate_state:
        messages.append("Local git state cannot have a merge, rebase, or cherry-pick in progress when starting sync.")
        blockers.append(ExpectedStartupBlocker.USER_GIT_STATE_UNSTASHABLE)

    # similar to is_singleton_stash_slot_available
    can_stash = is_stashing_ok and info.stash is None
    if (not can_stash) and (not user_status.files.are_clean_including_untracked):
        reason = "local git state is dirty or has untracked files"
        if is_stashing_ok:
            if info.is_carried_forward_from_previous_sync:
                message = "Cannot start new sync: Already have a stash and git state changed since prior sync."
            else:
                message = f"Cannot start new sync (unexpected logic path): Already have a stash and {reason}. "
                message += "Contact support if you see this twice and aren't sure why."
        else:
            message = f"Cannot sync without stashing if {reason}."
        messages.append(message)
        blockers.append(ExpectedStartupBlocker.USER_GIT_STATE_STASHING_PREVENTED)

    if len(blockers) == 0:
        return

    if (
        ExpectedStartupBlocker.USER_GIT_STATE_UNSTASHABLE in blockers
        or ExpectedStartupBlocker.USER_GIT_STATE_STASHING_PREVENTED in blockers
    ):
        messages.append(f"Current status:\n{user_status.describe()}")

    message = "Cannot start Pairing Mode: " + " Also: ".join(messages)
    raise ExpectedSyncStartupError(message, blockers, task_branch=branch_name)


class LocalSyncSession(MutableModel):
    """Container for all event messaging, threads (watchdog), sidecare daemons (mutagen) involved in synchronization.

    DOES NOT handle the handling of git and untracked files at the beginning or end of a sync

    Handles constructing the underlying watchers and registering them with the observer,
    while retaining reference to the underlying reconciler (our scheduler) for extracting and handling pause state notices.

    All Reconcilers do initial verification and first sync on build.

    NOTE:
    * This is getting a bit tangled, and should probably be refactored later esp if we migrate to watchman (as we probably should)
    * sculptor/docs/proposals/local_sync_lifecycle.md refers to NoSync, ActiveSync, PausedSync, which is repesented in HighLevelStatus.
    * implemention-wise, the observer STARTS and STOPs, while the LocalSyncPathBatchScheduler PAUSES.
    """

    session_info: SyncSessionInfo
    messenger: LocalSyncUpdateMessengerAPI  # added for caching last message
    observer: SlightlySaferObserver
    cg: ConcurrencyGroupController
    watchmedo_over_ssh_thread: ObservableThread

    # debounces events into batches, reports notices for pausing (and nonblocking, ie mutagen conflicts), and handles automatic restarting.
    scheduler: LocalSyncPathBatchScheduler

    mutagen_session: MutagenSyncSession

    healthchecker: LocalSyncHealthChecker | None

    @property
    def _all_background_observable_threads(self) -> tuple[ObservableThread, ...]:
        if self.healthchecker is not None:
            return (
                self.watchmedo_over_ssh_thread,
                *self.healthchecker.background_threads,
            )
        return (self.watchmedo_over_ssh_thread,)

    @property
    def _all_background_threads(self) -> tuple[threading.Thread, ...]:
        return (*self._all_background_observable_threads, self.observer)

    @property
    def state(self) -> LocalSyncSessionState | None:
        return LocalSyncSessionState.build_if_sensible(
            info=self.session_info,
            observer=self.observer,
            last_sent_message=self.messenger.last_sent_message,
            scheduler_status=self.scheduler.status,
        )

    @staticmethod
    def _initial_validation_and_sync_operations(
        agent_environment: Environment,
        session_info: SyncSessionInfo,
        user_repo_path: Path,
        messenger: LocalSyncUpdateMessengerAPI,
        concurrency_group: ConcurrencyGroup,
        is_stashing_ok: bool,
    ) -> tuple[RepoBranchSyncReconciler, LocalSyncGitStateGuardian, MutagenSyncSession]:
        """
        Initial operations that establish sync state:
        1. Setup git reconciler which attempts to validate some ref files
        2. _validate_branches_are_safely_syncable
        3. _sync_agent_to_user_and_checkout_branch
        4. overwrite_local_with_remote_once
        5. create_bidirectional_user_prioritized_sync
        """
        branch_name = session_info.sync_branch
        git_sync_reconciler = RepoBranchSyncReconciler.build(
            branch_name=branch_name,
            user_repo=LocalWritableGitRepo(repo_path=user_repo_path, concurrency_group=concurrency_group),
            agent_environment=agent_environment,
        )
        messenger.on_setup_update(next_step=LocalSyncSetupStep.VALIDATE_GIT_STATE_SAFETY)
        with log_runtime("LOCAL_SYNC._validate_branches_are_safely_syncable"):
            _validate_branches_are_safely_syncable(git_sync_reconciler, session_info, is_stashing_ok=is_stashing_ok)

        messenger.on_setup_update(next_step=LocalSyncSetupStep.MIRROR_AGENT_INTO_LOCAL_REPO)

        guardian = LocalSyncGitStateGuardian.build(
            user_repo=git_sync_reconciler.user_repo.repo,
            agent_repo=git_sync_reconciler.agent_repo.repo,
            branch_name=session_info.sync_branch,
            concurrency_group=concurrency_group,
        )
        # redundant with current checks in build_and_establish_safety_and_readiness
        # guardian.validate_state_is_acceptable()

        # TODO: Better encapsulate environment context
        remote_mutagen_url = agent_environment.get_repo_url_for_mutagen()
        guard = agent_environment.get_snapshot_guard() if isinstance(agent_environment, DockerEnvironment) else None

        # One-way git fast-forward from agent to user
        user_sync_repo_helper = git_sync_reconciler.user_repo
        user_sync_repo_helper.fetch_and_reset_mixed_on_branch(from_remote_repo=git_sync_reconciler.agent_repo.repo)

        # Checkout branch, stashing if ok and necessary
        user_repo = user_sync_repo_helper.repo
        is_singleton_stash_slot_available = session_info.stash is None

        # Reasoning as to why we don't check user_status.files.are_clean_including_untracked here is that
        # I wanted the logical code path / sequence of git actions to be invariant when starting a new sync with is_stashing_ok
        #
        # TODO for clarity: also connected to _unsync_from_task logic in that that code has to git reset
        # so that the working tree is clean if there's a stash already
        #
        # There's a kinda odd relationship when switching atm:
        # 1. if there's no stash, then if the state becomes dirty between _unsync_from_task and here, we will create a new stash while switching
        # 2. if there is a stash in the same scenario, we should ho on the is_switching_branches route and maybe fail with a dirty index
        if is_stashing_ok and is_singleton_stash_slot_available:
            with log_runtime("LOCAL_SYNC.checkout_branch_maybe_stashing_as_we_go"):
                # TODO TODO: mutates session_info, not obvious from rest of code
                # NOTE: I considered gating this on a status we could get back from _validate... ,
                # but we already know by now that it is safe WRT our business logic,
                # & git should blow up at us if it has managed to get into an intermediate state in the last half second or w/e
                stash_singleton = checkout_branch_maybe_stashing_as_we_go(
                    session_info.project_id, user_repo, branch_name
                )
                session_info.stash = stash_singleton.stash if stash_singleton else None
        elif session_info.is_switching_branches:
            user_repo.git_checkout_branch(branch_name)

        try:
            with log_runtime("LOCAL_SYNC.overwrite_local_with_remote_once"):
                overwrite_local_with_remote_once(
                    local_path=user_repo_path,
                    remote_mutagen_url=remote_mutagen_url,
                    session_name=f"{session_info.sync_name}-init",
                    snapshot_guard=guard,
                    concurrency_group=concurrency_group,
                )

            messenger.on_setup_update(next_step=LocalSyncSetupStep.BEGIN_TWO_WAY_CONTROLLED_SYNC)

            with log_runtime("LOCAL_SYNC.create_bidirectional_user_prioritized_sync"):
                mutagen_session = create_bidirectional_user_prioritized_sync(
                    local_path=user_repo_path,
                    remote_mutagen_url=remote_mutagen_url,
                    session_name=session_info.sync_name,
                    snapshot_guard=guard,
                    concurrency_group=concurrency_group,
                    is_flush_immediately=True,
                )
        except Exception:
            status = user_repo.get_current_status()
            if status.is_in_intermediate_state:
                logger.error("Skipping cleanup! Entered intermediate state during initial sync: {}", status.describe())
            elif status.files.are_clean_including_untracked:
                logger.info("cleaning via git reset after failed/partial initial mutagen sync: {}", status.describe())
                user_repo.reset_working_directory()
            raise

        return (git_sync_reconciler, guardian, mutagen_session)

    @classmethod
    def build_and_start(
        cls,
        agent_environment: Environment,
        session_info: SyncSessionInfo,
        user_repo_path: Path,
        messenger: LocalSyncUpdateMessengerAPI,
        concurrency_group: ConcurrencyGroup,
        is_stashing_ok: bool,
        debounce_seconds: float = DEFAULT_LOCAL_SYNC_DEBOUNCE_SECONDS,
        max_debounce_seconds: float = DEFAULT_LOCAL_SYNC_MAX_DEBOUNCE_SECONDS,
    ) -> "LocalSyncSession":
        """
        Builds and starts a LocalSyncSession, including starting all background threads and mutagen sync.

        We use a single ConcurrencyGroup child of the DefaultLocalSyncService concurrency_group here.
        This means we _should_ be attempting to treat environment failures as recoverable,
        but the code is still likely brittle to unhandled container restarts in any sub-threads/processes that access the environment directly.

        This includes logic in _watchmedo_via_environment and the similar _pipe_healthcheck_signals_from_environment_into_sink in _periodic_health_checker,
        which will probably both error out with broken pipes on a restart.
        """
        cg = ConcurrencyGroupController(concurrency_group=concurrency_group)
        cg.start()
        # TODO: Make it possible for environments to accept cleanup operations so that we can tie
        # mutagen (and potentially other resource) cleanup to the environment lifecycle.
        try:
            git_sync_reconciler, guardian, mutagen_session = cls._initial_validation_and_sync_operations(
                agent_environment=agent_environment,
                session_info=session_info,
                user_repo_path=user_repo_path,
                messenger=messenger,
                concurrency_group=concurrency_group,
                is_stashing_ok=is_stashing_ok,
            )
        except Exception:
            cg.stop()
            raise

        # Not expected to fail - this should mostly be pure class-hierarchy setup.
        # Just want to be very certain we terminate mutagen if anything from here down is borked.
        try:
            # NOTE: It seems to me like the spaghetti-ness of this passing the stopped_event around could be made more declarative.
            # Really everything in our context needs to know about it.
            # TODO: should we hack a child ShutdownEvent in here?
            observer = SlightlySaferObserver(name="watchdog_observer")
            messenger.hacked_in_stop_event = observer.stopped_event
            mutagen_reconciler = MutagenSyncSessionReconciler(
                session=mutagen_session,
                guardian=guardian,
                stop_event=observer.stopped_event,
            )
            healthchecker = LocalSyncHealthChecker.build(observer.threading_context.stop_event, agent_environment)
            scheduler = LocalSyncPathBatchScheduler(
                threading_context=observer.threading_context,
                lifecycle_callbacks=messenger,
                subpath_reconcilers=(git_sync_reconciler, mutagen_reconciler),
                debounce_seconds=debounce_seconds,
                max_debounce_seconds=max_debounce_seconds,
                healthchecker=healthchecker,
                # note: ugly that this conditional is duplicated but is being reworked elsewhere anyways
                environment_interaction_lock=agent_environment.get_snapshot_guard()
                if isinstance(agent_environment, DockerEnvironment)
                else None,
            )

            register_batch_scheduler_with_observer(observer, scheduler)
            # needs to be registered after because we're piggie-backing on the event emitter
            watchmedo_over_ssh_thread = hack_watchmedo_watcher_into_watchdog_event_queue(
                observer=observer,
                agent_environment=agent_environment,
                environment_dirs_to_watch=scheduler.top_level_environment_dirs_to_register,
            )
            session = cls(
                session_info=session_info,
                messenger=messenger,
                observer=observer,
                scheduler=scheduler,
                mutagen_session=mutagen_session,
                watchmedo_over_ssh_thread=watchmedo_over_ssh_thread,
                healthchecker=healthchecker,
                cg=cg,
            )
        except Exception:
            mutagen_session.terminate(is_skipped_if_uncreated=True)
            cg.stop()
            raise

        # Now we start everything, and have references for our except handling
        try:
            observer.start()
            for thread in session._all_background_observable_threads:
                concurrency_group.start_thread(thread)
            messenger.on_setup_complete()
        except Exception as e:
            # TODO: consider sending an error message here and having /enable kick-off enable sequence without blocking for completion
            log_exception(
                e,
                "local_sync_session: attempting mutagen cleanup after failed start. {session_info}",
                session_info=session_info,
            )
            # Clean up healthcheck if it was started
            mutagen_session.terminate(is_skipped_if_uncreated=True)
            cg.stop()
            join_background_thread_and_log_exceptions(observer, join_timeout=5)
            raise
        logger.info(
            "started sync for task {}, branch {} (watchdog observers)", session_info.task_id, session_info.sync_branch
        )
        return session

    def ensure_strands_cleaned_up(self) -> None:
        logger.trace("Ensuring observer is stopped and joined.")
        try:
            self.cg.stop()
        except Exception as e:
            log_exception(e, "LOCAL_SYNC cg retained exception, implying misuse!", ExceptionPriority.HIGH_PRIORITY)
        self.observer.ensure_stopped(source="session.ensure_strands_cleaned_up")
        join_background_thread_and_log_exceptions(self.observer, join_timeout=5)

        threads = self._all_background_observable_threads
        exited = [t.name for t in threads if not t.is_alive()]
        failed = [t.name for t in threads if t.is_alive()]
        logger.trace("local sync session joined threads: {}", exited, failed)
        if len(failed) == 0:
            return

        logger.error("Failed to cleanly stop local sync session threads: {}", failed)
        raise SyncCleanupError(
            f"some background threads encountered errors during run or did not stop cleanly {exited=}, {failed=}!",
            task_id=self.session_info.task_id,
            cleanup_step="observer_cleanup",
        )

    @property
    def is_fully_stopped(self) -> bool:
        if self.cg.concurrency_group.shutdown_event.is_set():
            # in our logic this always happens first, but if somehow we're asking this and it isn't we'll just fix it here
            self.observer.stopped_event.set()
            return True
        return False

    def stop(self) -> LocalSyncPathBatchSchedulerStatus:
        """returns last observed scheduler status"""
        # We want this so children (ie mutagent reconciler) will know not to undo any shutdown hard-kills,
        # but we can't always get the watchdog observer to stop cleanly without hard-killing the mutagen session first if necessary.
        # idk why exactly, the watchdog internals are kinda hairball-y.
        #
        # TODO: am bypassing the lifecycle system as seemed to be messing with stuff more
        self.observer.stopped_event.set()

        # This waits for the scheduler lock, ensuring any pending batch has been flushed before we go killing mutagen.
        #
        # We really want mutagen to flush cleanly, but the user could be intentionally trying to kill a bloated/off-the-rails sync session,
        # ie syncing a my_big_data/ dir.
        #
        # So, we have to balance these possibilities for now until we can inspect the mutagen state more precisely
        timeout = 15
        with log_runtime("LOCAL_SYNC.LocalSyncSession.stop.wait_for_final_batch_for_graceful_shutdown"):
            is_fully_flushed = self.scheduler.wait_for_final_batch_for_graceful_shutdown(timeout=timeout)

        flush_error = None
        if not is_fully_flushed:
            message = (
                f"Terminating mutagen in sync teardown after wait_for_final_batch_for_graceful_shutdown timeout of {timeout}s.",
                "This means the final batch of changes may not have fully flushed to the agent,",
                "though it was likely in a bad state or syncing something suspiciously large regardless.",
            )
            # TODO raise to user?
            flush_error = SyncCleanupError(
                " ".join(message), task_id=self.session_info.task_id, cleanup_step="wait_for_final_batch"
            )
            log_exception(flush_error, ExceptionPriority.MEDIUM_PRIORITY)
        elif not self.scheduler.status.is_paused:
            # Do one last mutagen flush because unsynced local changes could be lost irrecoverably.
            #
            # TODO: Here we're being extra careful and checking that no issues have arisen since the final batch.
            # But that might be more time than we care to spend here, given we've either:
            # 1. Just reconciled the last batch and so checked for pause seconds ago
            # 2. Never scheduled a batch, in which case a syncable file change would have had to happen in the last few seconds.
            # Which is all to say that maybe this method call + 2x check is overkill
            with log_runtime("LOCAL_SYNC.LocalSyncSession.stop.refresh_notices_by_tag"):
                self.scheduler.refresh_notices_by_tag()
            if not self.scheduler.status.is_paused:
                try:
                    self.mutagen_session.flush()
                except MutagenSyncError as e:
                    flush_error = e
                    log_exception(
                        flush_error,
                        "LOCAL_SYNC: final mutagen flush error from unpaused state. Continuing termination and will reraise if no other errors are encountered",
                        ExceptionPriority.MEDIUM_PRIORITY,
                    )
        final_status = self.scheduler.status

        self.mutagen_session.terminate()
        self.ensure_strands_cleaned_up()

        if flush_error is not None:
            raise flush_error

        logger.info("LOCAL_SYNC: Session stopped cleanly ({}), final_status={}", self.session_info, final_status)
        return final_status
