swarm repositories / source
summaryrefslogtreecommitdiff
path: root/crates/ra-mcp-domain/src/fault.rs
blob: 6d404ab12dfbb76448089e2746af1cb5582b40ba (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
//! Fault taxonomy and recovery guidance.

use crate::types::Generation;
use serde::{Deserialize, Serialize};
use thiserror::Error;

/// Logical fault class.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
pub enum FaultClass {
    /// Underlying I/O or transport channel failure.
    Transport,
    /// Child process startup/liveness/exiting failures.
    Process,
    /// Malformed or unexpected protocol payloads.
    Protocol,
    /// Deadline exceeded.
    Timeout,
    /// Internal resource budget exhaustion.
    Resource,
}

/// Fine-grained fault code.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
pub enum FaultCode {
    /// Pipe write failed with `EPIPE`.
    BrokenPipe,
    /// Pipe reached EOF.
    UnexpectedEof,
    /// Child process exited unexpectedly.
    ChildExited,
    /// Child process failed to spawn.
    SpawnFailed,
    /// Startup sequence exceeded deadline.
    StartupTimedOut,
    /// Request exceeded deadline.
    RequestTimedOut,
    /// Received an invalid protocol frame.
    InvalidFrame,
    /// Received invalid JSON.
    InvalidJson,
    /// Response could not be correlated with a pending request.
    UnknownResponseId,
}

/// Recovery strategy for a fault.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
pub enum RecoveryDirective {
    /// Retry the request on the same process.
    RetryInPlace,
    /// Restart the worker process and retry once.
    RestartAndReplay,
    /// Fail-fast and bubble to the caller.
    AbortRequest,
}

/// Structured fault event.
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub struct Fault {
    /// Generation in which this fault happened.
    pub generation: Generation,
    /// Broad fault class.
    pub class: FaultClass,
    /// Specific fault code.
    pub code: FaultCode,
    /// Caller-facing context.
    pub detail: FaultDetail,
}

impl Fault {
    /// Constructs a new fault.
    #[must_use]
    pub fn new(
        generation: Generation,
        class: FaultClass,
        code: FaultCode,
        detail: FaultDetail,
    ) -> Self {
        Self {
            generation,
            class,
            code,
            detail,
        }
    }

    /// Returns the default recovery directive for this fault.
    #[must_use]
    pub fn directive(&self) -> RecoveryDirective {
        match (self.class, self.code) {
            (FaultClass::Transport, FaultCode::BrokenPipe)
            | (FaultClass::Transport, FaultCode::UnexpectedEof)
            | (FaultClass::Process, FaultCode::ChildExited)
            | (FaultClass::Process, FaultCode::SpawnFailed)
            | (FaultClass::Timeout, FaultCode::StartupTimedOut) => {
                RecoveryDirective::RestartAndReplay
            }
            (FaultClass::Timeout, FaultCode::RequestTimedOut) => {
                RecoveryDirective::RestartAndReplay
            }
            (FaultClass::Protocol, FaultCode::UnknownResponseId) => RecoveryDirective::RetryInPlace,
            (FaultClass::Protocol, FaultCode::InvalidFrame)
            | (FaultClass::Protocol, FaultCode::InvalidJson)
            | (FaultClass::Resource, _) => RecoveryDirective::AbortRequest,
            _ => RecoveryDirective::AbortRequest,
        }
    }
}

/// Typed detail payload for a fault.
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub struct FaultDetail {
    /// Human-consumable context.
    pub message: String,
}

impl FaultDetail {
    /// Creates a new detail message.
    #[must_use]
    pub fn new(message: impl Into<String>) -> Self {
        Self {
            message: message.into(),
        }
    }
}

/// Domain fault conversion error.
#[derive(Debug, Error)]
#[error("fault conversion failure: {0}")]
pub struct FaultConversionError(String);