diff --git a/src/gps_denied/testing/metrics.py b/src/gps_denied/testing/metrics.py
new file mode 100644
index 0000000..3fd6ae2
--- /dev/null
+++ b/src/gps_denied/testing/metrics.py
@@ -0,0 +1,60 @@
+"""Trajectory comparison metrics — RMSE, Absolute Trajectory Error, Relative Pose Error.
+
+All inputs are Nx3 numpy arrays of ENU or local-tangent-plane positions in metres.
+Callers convert from lat/lon via gps_denied.core.coordinates.CoordinateTransformer
+before passing trajectories to these functions.
+"""
+
+from __future__ import annotations
+
+import numpy as np
+
+
+def trajectory_rmse(estimated: np.ndarray, ground_truth: np.ndarray) -> float:
+    """Root-mean-square position error between two aligned trajectories.
+
+    Both arrays must have the same shape (N, 3).
+    No alignment is performed — caller is responsible for time synchronisation
+    and (if needed) SE(3) alignment.
+    """
+    if estimated.shape != ground_truth.shape:
+        raise ValueError(
+            f"Shape mismatch: estimated={estimated.shape}, ground_truth={ground_truth.shape}"
+        )
+    diff = estimated - ground_truth
+    sq = np.sum(diff * diff, axis=1)  # per-frame squared position error
+    return float(np.sqrt(np.mean(sq)))
+
+
+def absolute_trajectory_error(
+    estimated: np.ndarray, ground_truth: np.ndarray
+) -> dict[str, float]:
+    """ATE: per-frame position error statistics."""
+    if estimated.shape != ground_truth.shape:
+        raise ValueError("shape mismatch")
+    err = np.linalg.norm(estimated - ground_truth, axis=1)
+    return {
+        "rmse": float(np.sqrt(np.mean(err * err))),
+        "mean": float(np.mean(err)),
+        "median": float(np.median(err)),
+        "max": float(np.max(err)),
+        "std": float(np.std(err)),
+    }
+
+
+def relative_pose_error(
+    estimated: np.ndarray, ground_truth: np.ndarray, delta: int = 1
+) -> dict[str, float]:
+    """RPE: error in relative motion over windows of `delta` frames."""
+    if estimated.shape != ground_truth.shape:
+        raise ValueError("shape mismatch")
+    if delta < 1 or delta >= len(estimated):
+        raise ValueError(f"delta must be in [1, N-1]; got delta={delta}, N={len(estimated)}")
+    est_delta = estimated[delta:] - estimated[:-delta]
+    gt_delta = ground_truth[delta:] - ground_truth[:-delta]
+    err = np.linalg.norm(est_delta - gt_delta, axis=1)
+    return {
+        "rmse": float(np.sqrt(np.mean(err * err))),
+        "mean": float(np.mean(err)),
+        "max": float(np.max(err)),
+    }
diff --git a/tests/e2e/test_metrics.py b/tests/e2e/test_metrics.py
new file mode 100644
index 0000000..20cc442
--- /dev/null
+++ b/tests/e2e/test_metrics.py
@@ -0,0 +1,45 @@
+"""Trajectory comparison metrics."""
+
+import numpy as np
+import pytest
+
+from gps_denied.testing.metrics import (
+    trajectory_rmse,
+    absolute_trajectory_error,
+    relative_pose_error,
+)
+
+
+def test_rmse_identical_trajectories_is_zero():
+    est = np.array([[0.0, 0.0, 0.0], [1.0, 0.0, 0.0], [2.0, 0.0, 0.0]])
+    gt = est.copy()
+    assert trajectory_rmse(est, gt) == pytest.approx(0.0)
+
+
+def test_rmse_constant_offset():
+    # Constant 1m offset in x
+    est = np.array([[1.0, 0.0, 0.0], [2.0, 0.0, 0.0]])
+    gt = np.array([[0.0, 0.0, 0.0], [1.0, 0.0, 0.0]])
+    assert trajectory_rmse(est, gt) == pytest.approx(1.0)
+
+
+def test_rmse_rejects_length_mismatch():
+    est = np.zeros((3, 3))
+    gt = np.zeros((4, 3))
+    with pytest.raises(ValueError):
+        trajectory_rmse(est, gt)
+
+
+def test_ate_returns_summary_stats():
+    est = np.array([[1.0, 0.0, 0.0], [2.0, 0.0, 0.0], [3.0, 0.0, 0.0]])
+    gt = np.array([[0.0, 0.0, 0.0], [1.0, 0.0, 0.0], [2.0, 0.0, 0.0]])
+    result = absolute_trajectory_error(est, gt)
+    assert result["rmse"] == pytest.approx(1.0)
+    assert result["mean"] == pytest.approx(1.0)
+    assert result["max"] == pytest.approx(1.0)
+
+
+def test_rpe_identical_trajectories_is_zero():
+    traj = np.array([[0.0, 0.0, 0.0], [1.0, 0.0, 0.0], [2.0, 0.0, 0.0]])
+    result = relative_pose_error(traj, traj, delta=1)
+    assert result["rmse"] == pytest.approx(0.0)