From d778e38b12476e7ba27d04fb4f0885c4a15cf402 Mon Sep 17 00:00:00 2001
From: thodson-usgs <thodson@usgs.gov>
Date: Wed, 3 Jun 2026 12:19:48 -0400
Subject: [PATCH] refactor(errors)!: a lean, idiomatic DataRetrievalError
 taxonomy

Every request failure raises a subclass of DataRetrievalError, so a caller can
handle any of them with a single `except dataretrieval.DataRetrievalError`. The
taxonomy stays small -- it adds only what the underlying httpx exceptions can't
express:

  DataRetrievalError(Exception)         # .status_code / .retry_after / .retryable
  |- HTTPError                   # .status_code -- the server returned an error status
  |   '- TransientError          # .retry_after -- retryable (429 / 5xx)
  |       |- RateLimited         #   429
  |       '- ServiceUnavailable  #   5xx
  |- RequestTooLarge             # the request can't fit
  |   |- URLTooLong              #   414 / client-side over-long URL
  |   '- Unchunkable             #   the Water Data chunker can't split the call
  |- NetworkError                # no response: timeout / DNS / refused connection
  '- NoSitesError                # no-data on the legacy nwis path (see below)

One factory -- error_for_status(status, message, *, retry_after) -- maps a
status to its type, and every request path routes through it (the legacy
`query` path, the Water Data chunker, nldi, nadp, streamstats), so a given
status surfaces as the same type everywhere. A fatal 4xx is a generic HTTPError
carrying .status_code (inspect the code rather than a class per code). The
chunker keys retry/resume on TransientError.

Every DataRetrievalError exposes three read-anywhere fields -- .status_code
(None when there is no HTTP status), .retry_after, and .retryable -- so a single
`except DataRetrievalError as e` clause can branch on the status or drive a
backoff loop without importing or isinstance-checking the concrete subclass.

Connection-level failures (no HTTP response: timeout, DNS, refused connection)
are wrapped as NetworkError, with the underlying httpx exception on __cause__,
so one `except DataRetrievalError` truly spans every failure. The single-shot
paths route their GETs through a thin `utils._get` wrapper that does the
translation; the chunker keeps its own client and wraps transport failures as
resumable interruptions instead. NetworkError carries no .status_code but is
.retryable; with TransientError it forms the retryable set.

A no-data result is not an error: the modern getters (waterdata, wqp, nldi)
return an empty DataFrame when nothing matches. Only the deprecated nwis
(waterservices) path still raises NoSitesError on no data.

The typed errors are picklable via the standard __getstate__/__setstate__
protocol, so they survive a pickle / deepcopy back from a multiprocessing /
lithops worker. A chunk-interruption error sheds its live resume handle (.call)
on that trip -- keeping the diagnostic counts and partial frame/response --
while in-process callers still get full `exc.call.resume()`.

A too-long-URL status (413 / 414) on the legacy `query` path keeps the
actionable "split your query" remediation message (the same one the client-side
over-long-URL case raises), rather than degrading to a bare HTTP-status line.

BREAKING CHANGES
- Request failures raise typed DataRetrievalError subclasses instead of bare
  ValueError / RuntimeError / httpx.HTTPStatusError. The exceptions root only at
  DataRetrievalError(Exception) and no longer also inherit ValueError /
  RuntimeError -- catch DataRetrievalError (or a subclass), not the builtins.
  This now also covers ChunkInterrupted (previously a RuntimeError) and
  mid-pagination failures (previously a bare RuntimeError).
- Connection-level failures are wrapped as NetworkError instead of surfacing as
  raw httpx exceptions on the single-shot paths -- catch NetworkError (or
  DataRetrievalError); the httpx exception is preserved on __cause__.
- A fatal 4xx raises HTTPError (read .status_code); there are no per-code types.

Also adds a dataretrieval.exceptions API docs page, a "Handling errors" user
guide, and a NEWS.md changelog entry.

ruff clean (pre-commit hooks); mypy --strict and the full pytest suite are
re-verified by CI on push.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 NEWS.md                              |   2 +
 dataretrieval/__init__.py            |  13 +-
 dataretrieval/exceptions.py          | 309 +++++++++++++++++++--------
 dataretrieval/nadp.py                |   8 +-
 dataretrieval/nldi.py                |  49 +----
 dataretrieval/streamstats.py         |  10 +-
 dataretrieval/utils.py               |  75 ++++---
 dataretrieval/waterdata/api.py       |   5 +-
 dataretrieval/waterdata/chunking.py  |  27 ++-
 dataretrieval/waterdata/ratings.py   |  24 +--
 dataretrieval/waterdata/utils.py     |  88 ++++----
 docs/source/reference/exceptions.rst |   8 +
 docs/source/reference/index.rst      |   1 +
 docs/source/userguide/errors.rst     | 103 +++++++++
 docs/source/userguide/index.rst      |   1 +
 tests/nldi_test.py                   |  25 ++-
 tests/nwis_test.py                   |   6 +-
 tests/utils_test.py                  | 146 ++++++++++---
 tests/waterdata_chunking_test.py     |  99 +++++++--
 tests/waterdata_utils_test.py        |  47 ++--
 tests/waterservices_test.py          |   4 +-
 21 files changed, 726 insertions(+), 324 deletions(-)
 create mode 100644 docs/source/reference/exceptions.rst
 create mode 100644 docs/source/userguide/errors.rst

diff --git a/NEWS.md b/NEWS.md
index 7519c62f..755ae58a 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -1,3 +1,5 @@
+**06/03/2026:** The request-error hierarchy is now unified. Every module (`nwis`, `wqp`, `nldi`, `waterdata`, `nadp`, `streamstats`) raises a subclass of `dataretrieval.DataRetrievalError` on a failed request, so a single `except dataretrieval.DataRetrievalError` spans them all. An HTTP error status surfaces as an `HTTPError` carrying `.status_code` (inspect it to branch on a specific code); the retryable 429/5xx subset is `TransientError` (`RateLimited` / `ServiceUnavailable`, carrying `.retry_after`); and a request too large to satisfy is a `RequestTooLarge` (`URLTooLong` for an over-long single request, `Unchunkable` when the Water Data chunker cannot split a call small enough). Connection-level failures (timeouts, DNS, refused connections) are wrapped as a `NetworkError`, with the underlying `httpx` exception on `__cause__`. Every `DataRetrievalError` also exposes `.status_code` (`None` when there is no HTTP status), `.retry_after`, and `.retryable`, so a single `except dataretrieval.DataRetrievalError as e` clause can branch on the status or retry transient failures without knowing the concrete subclass. **Breaking change:** these exceptions no longer multiply-inherit a built-in — code that caught request failures with `except ValueError` or `except RuntimeError` should switch to `except dataretrieval.DataRetrievalError` (or a specific subclass). A no-data result is **not** an error: the modern getters (`waterdata`, `wqp`, `nldi`) return an empty DataFrame when nothing matches. Only the deprecated `nwis` (waterservices) path still raises `NoSitesError` on no data.
+
 **05/17/2026:** The OGC `waterdata` getters (`get_daily`, `get_continuous`, `get_field_measurements`, and the rest of the multi-value-capable functions) now transparently chunk requests whose URLs would otherwise exceed the server's ~8 KB byte limit.
 
 **05/16/2026:** Fixed silent truncation in the paginated `waterdata` request loops (`_walk_pages` and `get_stats_data`). Mid-pagination failures (HTTP 429, 5xx, network error) were previously swallowed — pagination would quietly stop and the function would return whatever rows it had collected, leaving callers with truncated DataFrames they had no way to detect. The loops now status-check every page like the initial request and raise `RuntimeError` on any failure, with the upstream exception chained as `__cause__` and a short menu of recovery actions (wait and retry, reduce the request, or obtain an API token) in the message. **Behavior change**: callers that previously consumed partial DataFrames on transient upstream blips will now see an exception; retry the call (possibly with a smaller `limit` or narrower query).
diff --git a/dataretrieval/__init__.py b/dataretrieval/__init__.py
index 4b58247e..9dfb1991 100644
--- a/dataretrieval/__init__.py
+++ b/dataretrieval/__init__.py
@@ -17,8 +17,9 @@
 ``nldi`` requires geopandas (``pip install dataretrieval[nldi]``) and is
 imported on demand: ``from dataretrieval import nldi``.
 
-Every request failure raises a subclass of :class:`dataretrieval.DataRetrievalError`;
-the taxonomy lives in ``dataretrieval.exceptions``.
+A failed request raises a subclass of :class:`dataretrieval.DataRetrievalError`
+(the taxonomy lives in ``dataretrieval.exceptions``); connection-level failures
+(timeouts, DNS) are wrapped as :class:`dataretrieval.NetworkError`.
 """
 
 from importlib.metadata import PackageNotFoundError, version
@@ -29,10 +30,10 @@
     __version__ = "version-unknown"
 
 from dataretrieval.exceptions import (
-    BadRequestError,
     DataRetrievalError,
+    HTTPError,
+    NetworkError,
     NoSitesError,
-    NotFoundError,
     RateLimited,
     RequestTooLarge,
     ServiceUnavailable,
@@ -64,10 +65,10 @@
     # error taxonomy (canonical home: ``dataretrieval.exceptions``), re-exported
     # so callers can ``except dataretrieval.DataRetrievalError``
     "exceptions",
-    "BadRequestError",
     "DataRetrievalError",
+    "HTTPError",
+    "NetworkError",
     "NoSitesError",
-    "NotFoundError",
     "RateLimited",
     "RequestTooLarge",
     "ServiceUnavailable",
diff --git a/dataretrieval/exceptions.py b/dataretrieval/exceptions.py
index 2bb955c3..294621f2 100644
--- a/dataretrieval/exceptions.py
+++ b/dataretrieval/exceptions.py
@@ -1,89 +1,214 @@
 """Exception taxonomy for ``dataretrieval``.
 
-A failed request from any service module (``nwis``, ``wqp``, ``waterdata``,
-``nldi``, ...) raises a subclass of :class:`DataRetrievalError`, so a caller can
-handle any request failure with a single ``except dataretrieval.DataRetrievalError``.
-
-The tree has two intermediate bases a caller can catch to span a whole family:
-:class:`RequestTooLarge` (the request can't fit, however it was issued) and
-:class:`TransientError` (a temporary failure worth retrying).
-
-This module deliberately has no third-party dependencies, so any module can
-import it without pulling in pandas/httpx.
+Every service module (``nwis``, ``wqp``, ``nldi``, ``waterdata``, ``nadp``,
+``streamstats``) raises a subclass of :class:`DataRetrievalError` when a request
+fails, so one ``except dataretrieval.DataRetrievalError`` catches them all --
+including connection-level failures (timeouts, DNS, refused connections), which
+are wrapped as :class:`NetworkError` with the underlying ``httpx`` exception on
+``__cause__``.
+
+Most failures are an :class:`HTTPError` carrying the response ``.status_code``,
+of which :class:`TransientError` (429 / 5xx) is the retryable subset. The rest
+aren't a plain status: :class:`RequestTooLarge` (with :class:`URLTooLong` /
+:class:`Unchunkable`), :class:`NetworkError` (a failed connection, per above),
+and :class:`NoSitesError`. :func:`error_for_status` maps a status to its type.
+
+This module has no third-party runtime dependencies -- ``httpx`` is imported only
+for type checking -- so any module can import it without pulling in pandas / httpx
+and without risking an import cycle.
 """
 
 from __future__ import annotations
 
-from typing import TYPE_CHECKING
+from typing import TYPE_CHECKING, Any, ClassVar
 
 if TYPE_CHECKING:
     import httpx
 
 __all__ = [
     "DataRetrievalError",
-    "BadRequestError",
-    "NotFoundError",
+    "HTTPError",
+    "TransientError",
+    "RateLimited",
+    "ServiceUnavailable",
     "RequestTooLarge",
     "URLTooLong",
     "Unchunkable",
+    "NetworkError",
     "NoSitesError",
-    "TransientError",
-    "RateLimited",
-    "ServiceUnavailable",
+    "error_for_status",
 ]
 
 
 class DataRetrievalError(Exception):
-    """Base class for errors raised when a request to a USGS or EPA web
-    service fails.
+    """Base class for every failed-request error in ``dataretrieval``.
 
-    Every service module (``nwis``, ``wqp``, ``waterdata``, ``nldi``, ...)
-    raises a subclass of this when a request fails, so a caller can handle any
-    request failure uniformly::
+    Catch it to handle any USGS or EPA service failure uniformly, and branch on
+    the read-anywhere fields below without needing the concrete subclass::
 
         try:
-            df, md = dataretrieval.wqp.get_results(...)
-        except dataretrieval.DataRetrievalError:
-            ...
-
-    Subclasses also inherit from the built-in exception this package has
-    historically raised for the condition's *kind* -- :class:`ValueError` for a
-    request that can't succeed as written (bad params, too large), and
-    :class:`RuntimeError` for a transient transport failure -- so existing
-    ``except ValueError`` / ``except RuntimeError`` handlers keep working.
+            df, md = dataretrieval.waterdata.get_daily(...)
+        except dataretrieval.DataRetrievalError as e:
+            if e.retryable:  # 429 / 5xx / connection failure
+                time.sleep(e.retry_after or backoff)
+                ...  # re-issue the request
+            elif e.status_code == 404:  # ``None`` unless an HTTP status error
+                ...
+            else:
+                raise
+
+    Connection-level failures (timeouts, DNS) are wrapped as
+    :class:`NetworkError`, so this single clause covers them too.
+    """
+
+    #: HTTP status that triggered the error, or ``None`` for errors without one
+    #: (connection failure, too-long URL, no data). Set by :class:`HTTPError`.
+    status_code: int | None = None
+    #: Seconds the server asked us to wait before retrying (its ``Retry-After``
+    #: header), or ``None`` when it gave no hint. Set by :class:`TransientError`.
+    retry_after: float | None = None
+    #: Whether re-issuing the same request might succeed -- ``True`` for the
+    #: transient HTTP statuses (429 / 5xx, :class:`TransientError`) and for
+    #: connection failures (:class:`NetworkError`); ``False`` otherwise.
+    retryable: ClassVar[bool] = False
+
+    # These errors get pickled back across process boundaries (a lithops /
+    # multiprocessing worker returns whatever it raises). Default ``BaseException``
+    # pickling rebuilds via ``cls(*args)``, which these subclasses can't survive --
+    # keyword-only constructor fields, and ``ChunkInterrupted`` builds its message
+    # internally. So reconstruct via ``__new__`` + the standard getstate/setstate
+    # protocol, bypassing ``__init__``; a subclass drops unpicklable state by
+    # overriding ``__getstate__`` (see ``ChunkInterrupted``).
+    def __reduce__(self) -> tuple[Any, ...]:
+        return (_new_error, (self.__class__,), self.__getstate__())
+
+    def __getstate__(self) -> dict[str, Any]:
+        return {"args": self.args, **self.__dict__}
+
+    def __setstate__(self, state: dict[str, Any] | None) -> None:
+        state = state or {}
+        self.args = state.pop("args", ())
+        self.__dict__.update(state)
+
+
+def _new_error(cls: type[DataRetrievalError]) -> DataRetrievalError:
+    """Build a blank :class:`DataRetrievalError` for unpickling, bypassing
+    ``__init__``; pickle then calls ``__setstate__`` to restore its state."""
+    return cls.__new__(cls)
+
+
+# --- HTTP status errors --------------------------------------------------
+
+
+class HTTPError(DataRetrievalError):
+    """The service returned an error HTTP status.
+
+    The numeric status is on :attr:`status_code`; branch on it, e.g.
+    ``except HTTPError as e: ... if e.status_code == 404``. :class:`TransientError`
+    (429 / 5xx) is the retryable subset, and is itself an ``HTTPError``. The one
+    exception to "a status is an ``HTTPError``" is a request the service rejects
+    as too long: it surfaces as :class:`URLTooLong` (a :class:`RequestTooLarge`),
+    *not* an ``HTTPError`` -- so catch :class:`DataRetrievalError` to be certain
+    of spanning every failure. See :func:`error_for_status` for the full mapping.
+
+    Parameters
+    ----------
+    message : str
+        Human-readable error message.
+    status_code : int
+        The HTTP status the service returned.
+    """
+
+    def __init__(self, message: str, *, status_code: int) -> None:
+        super().__init__(message)
+        self.status_code = status_code
+
+
+class TransientError(HTTPError):
+    """A 429 or 5xx the server may serve on a later try -- :class:`RateLimited`
+    for 429, :class:`ServiceUnavailable` for 5xx.
+
+    This only classifies the condition; it does not itself retry. Whether to
+    retry is up to the calling path: a single-shot request raises it for the
+    caller to handle (e.g. wait :attr:`retry_after` seconds, then re-issue),
+    while the Water Data chunker retries and resumes automatically.
+
+    Parameters
+    ----------
+    message : str
+        Human-readable error message.
+    status_code : int, optional
+        The HTTP status the service returned. Defaults to the leaf's canonical
+        code (429 / 503) when omitted; :func:`error_for_status` always passes the
+        real status.
+    retry_after : float, optional
+        Seconds to wait before retrying, parsed from the ``Retry-After`` response
+        header; ``None`` when the header is absent or unparseable.
     """
 
+    retryable: ClassVar[bool] = True
+
+    #: Canonical status a concrete transient stamps when built without an
+    #: explicit ``status_code`` (:class:`RateLimited` = 429,
+    #: :class:`ServiceUnavailable` = 503). ``TransientError`` itself is abstract
+    #: and sets none, so constructing it bare requires ``status_code``.
+    _DEFAULT_STATUS: ClassVar[int]
+
+    def __init__(
+        self,
+        message: str,
+        *,
+        status_code: int | None = None,
+        retry_after: float | None = None,
+    ) -> None:
+        if status_code is None:
+            status_code = getattr(self, "_DEFAULT_STATUS", None)
+        if status_code is None:
+            raise TypeError(
+                f"{type(self).__name__} requires status_code "
+                "(only the RateLimited / ServiceUnavailable leaves default it)"
+            )
+        super().__init__(message, status_code=status_code)
+        self.retry_after = retry_after
+
+
+class RateLimited(TransientError):
+    """A request was rejected with HTTP 429 (too many requests)."""
+
+    _DEFAULT_STATUS = 429
 
-# --- Fatal client errors -------------------------------------------------
-# The request can't succeed as written; retrying it unchanged won't help. Each
-# is also a ``ValueError`` -- the built-in the legacy ``query`` path has always
-# raised -- so existing ``except ValueError`` handlers keep working.
 
+class ServiceUnavailable(TransientError):
+    """A request was rejected with a server error (HTTP 5xx).
+
+    Raised by both the legacy ``query`` path and the Water Data path, so a 5xx
+    surfaces as one type whichever subsystem issued the request. ``.status_code``
+    holds the actual 5xx; it falls back to 503 only on a bare hand-construction.
+    """
 
-class BadRequestError(DataRetrievalError, ValueError):
-    """The service rejected the request parameters (HTTP 400)."""
+    _DEFAULT_STATUS = 503
 
 
-class NotFoundError(DataRetrievalError, ValueError):
-    """The requested resource was not found; often an empty query (HTTP 404)."""
+# --- Request can't fit (not necessarily an HTTP status) ------------------
 
 
-class RequestTooLarge(DataRetrievalError, ValueError):
+class RequestTooLarge(DataRetrievalError):
     """The request is too large for the service to satisfy.
 
-    A base for the two ways a request can exceed what the service accepts;
-    catch it to handle either. The concrete subclasses are :class:`URLTooLong`
-    (a single request the server rejected) and :class:`Unchunkable` (the Water
-    Data chunker could not split the call small enough to fit).
+    Base for the two ways that happens; catch it to handle either:
+    :class:`URLTooLong` (a single request rejected for length) and
+    :class:`Unchunkable` (a Water Data call the chunker could not split small
+    enough to fit).
     """
 
 
 class URLTooLong(RequestTooLarge):
-    """A single request URL exceeded the service's limit (HTTP 414, or rejected
-    client-side before it was sent).
+    """A single request URL was too long for the service.
 
-    Raised by the legacy ``query`` path, which issues one request without
-    chunking. Remediation: query fewer sites, or split the call manually.
+    Raised on the legacy ``query`` path (which sends one un-chunked request),
+    whether the URL is rejected client-side before sending or by the server
+    (see :func:`error_for_status`). Remediation: query fewer sites, or split the
+    call manually.
     """
 
 
@@ -99,56 +224,72 @@ class Unchunkable(RequestTooLarge):
     """
 
 
-class NoSitesError(DataRetrievalError):
-    """The selection criteria matched no sites/data."""
+# --- Connection failure (no HTTP response) -------------------------------
 
-    def __init__(self, url: httpx.URL) -> None:
-        self.url = url
 
-    def __str__(self) -> str:
-        return (
-            "No sites/data found using the selection criteria specified in "
-            f"url: {self.url}"
-        )
+class NetworkError(DataRetrievalError):
+    """The request never completed a round-trip to the service -- a DNS
+    failure, refused connection, or timeout -- so no HTTP response arrived to
+    classify.
 
+    Wraps the underlying ``httpx`` transport exception, preserved on
+    ``__cause__``. Worth retrying (:attr:`~DataRetrievalError.retryable` is
+    ``True``), but carries no ``.status_code`` because no response came back.
+    """
 
-# --- Transient transport errors ------------------------------------------
-# The service was reachable but temporarily refused the request; the same call
-# may succeed if retried. Each is also a ``RuntimeError`` (the built-in the
-# waterdata path has always raised). The Water Data chunker recognizes them via
-# ``isinstance(exc, TransientError)`` and wraps them as resumable
-# ``ChunkInterrupted`` subclasses.
+    retryable: ClassVar[bool] = True
 
 
-class TransientError(DataRetrievalError, RuntimeError):
-    """Base for transient HTTP failures that are worth an automatic retry.
+# --- Empty result --------------------------------------------------------
 
-    One subclass per recoverable HTTP status family (429 -> :class:`RateLimited`,
-    5xx -> :class:`ServiceUnavailable`); the Water Data chunker recognizes them
-    by this shared base and wraps them as resumable interruptions.
 
-    Parameters
-    ----------
-    message : str
-        Human-readable error message.
-    retry_after : float, optional
-        Seconds to wait before retrying, parsed from the ``Retry-After``
-        response header; stored on the :attr:`retry_after` attribute (``None``
-        when the header is absent or unparseable).
+class NoSitesError(DataRetrievalError):
+    """A request succeeded (HTTP 200) but matched no sites/data.
+
+    A no-data result is normally **not** an error: the modern getters
+    (``waterdata``, ``wqp``, ``nldi``) return an empty ``DataFrame``. Only the
+    deprecated ``nwis`` (waterservices) path still raises this.
     """
 
-    def __init__(self, message: str, *, retry_after: float | None = None) -> None:
-        super().__init__(message)
-        self.retry_after = retry_after
+    def __init__(self, url: httpx.URL) -> None:
+        self.url = url
+
+    def __str__(self) -> str:
+        return (
+            "No sites/data found using the selection criteria specified in "
+            f"url: {self.url}"
+        )
 
 
-class RateLimited(TransientError):
-    """A request was rejected with HTTP 429 (too many requests)."""
+def error_for_status(
+    status: int, message: str, *, retry_after: float | None = None
+) -> DataRetrievalError:
+    """Return the typed :class:`DataRetrievalError` for an HTTP error *status*.
 
+    The one status-to-type mapping every request path shares (the legacy
+    ``query`` path, ``waterdata``, ``nadp`` / ``streamstats``), so a given status
+    becomes the same type everywhere:
 
-class ServiceUnavailable(TransientError):
-    """A request was rejected with a server error (HTTP 5xx).
+    * **413, 414** -> :class:`URLTooLong` (a :class:`RequestTooLarge`) -- the
+      "too long" semantic is more actionable than a bare status, and it matches
+      the client-side over-long-URL case
+    * **429** -> :class:`RateLimited`
+    * **5xx** -> :class:`ServiceUnavailable`
+    * **anything else** -> :class:`HTTPError`
 
-    Raised by both the legacy ``query`` path and the Water Data path, so a 5xx
-    surfaces as one type regardless of which subsystem issued the request.
+    ``message`` is used verbatim; ``retry_after`` is attached only to the
+    transient (:class:`TransientError`) types. *status* must be an error status
+    (``>= 400``) -- classifying a success or redirect is a usage error and raises
+    :class:`ValueError`.
     """
+    if status < 400:
+        raise ValueError(
+            f"error_for_status expects an HTTP error status (>= 400), got {status}"
+        )
+    if status in (413, 414):
+        return URLTooLong(message)
+    if status == 429:
+        return RateLimited(message, status_code=status, retry_after=retry_after)
+    if 500 <= status < 600:
+        return ServiceUnavailable(message, status_code=status, retry_after=retry_after)
+    return HTTPError(message, status_code=status)
diff --git a/dataretrieval/nadp.py b/dataretrieval/nadp.py
index a02b6671..4966fa92 100644
--- a/dataretrieval/nadp.py
+++ b/dataretrieval/nadp.py
@@ -43,9 +43,7 @@
 import warnings
 import zipfile
 
-import httpx
-
-from dataretrieval.utils import HTTPX_DEFAULTS
+from dataretrieval.utils import HTTPX_DEFAULTS, _get, _raise_for_status
 
 _DEPRECATION_MESSAGE = (
     "The `nadp` module is deprecated and will be removed from `dataretrieval` "
@@ -229,8 +227,8 @@ def get_zip(url: str, filename: str) -> NADP_ZipFile:
     """
     _warn_deprecated()
 
-    req = httpx.get(url + filename, **HTTPX_DEFAULTS)
-    req.raise_for_status()
+    req = _get(url + filename, **HTTPX_DEFAULTS)
+    _raise_for_status(req)
 
     # z = zipfile.ZipFile(io.BytesIO(req.content))
     z = NADP_ZipFile(io.BytesIO(req.content))
diff --git a/dataretrieval/nldi.py b/dataretrieval/nldi.py
index cb4e2488..9a169414 100644
--- a/dataretrieval/nldi.py
+++ b/dataretrieval/nldi.py
@@ -19,13 +19,11 @@
 def _query_nldi(
     url: str,
     query_params: dict[str, str],
-    error_message: str,
 ) -> dict[str, Any] | list[Any]:
-    # A helper function to query the NLDI API
+    # A helper function to query the NLDI API. ``query()`` already raises a
+    # typed ``DataRetrievalError`` for any HTTP error response, so a returned
+    # response is a success that we only need to parse.
     response = query(url, payload=query_params)
-    if response.status_code != 200:
-        raise ValueError(f"{error_message}. Error reason: {response.reason_phrase}")
-
     response_data: dict[str, Any] | list[Any] = {}
     try:
         response_data = response.json()
@@ -112,15 +110,7 @@ def get_flowlines(
     if stop_comid is not None:
         query_params["stopComid"] = str(stop_comid)
 
-    if feature_source:
-        err_msg = (
-            f"Error getting flowlines for feature source '{feature_source}'"
-            f" and feature_id '{feature_id}'"
-        )
-    else:
-        err_msg = f"Error getting flowlines for comid '{comid}'"
-
-    feature_collection = cast("dict[str, Any]", _query_nldi(url, query_params, err_msg))
+    feature_collection = cast("dict[str, Any]", _query_nldi(url, query_params))
     if as_json:
         return feature_collection
     gdf = _features_to_gdf(feature_collection)
@@ -172,11 +162,7 @@ def get_basin(
         "simplified": simplified_str,
         "splitCatchment": split_catchment_str,
     }
-    err_msg = (
-        f"Error getting basin for feature source '{feature_source}' and "
-        f"feature_id '{feature_id}'"
-    )
-    feature_collection = cast("dict[str, Any]", _query_nldi(url, query_params, err_msg))
+    feature_collection = cast("dict[str, Any]", _query_nldi(url, query_params))
     if as_json:
         return feature_collection
     gdf = _features_to_gdf(feature_collection)
@@ -266,7 +252,6 @@ def get_features(
             )
         url = f"{NLDI_API_BASE_URL}/comid/position"
         query_params = {"coords": f"POINT({long} {lat})"}
-        err_msg = f"Error getting features for lat '{lat}' and long '{long}'"
     else:
         if (comid is not None or data_source is not None) and navigation_mode is None:
             raise ValueError(
@@ -290,9 +275,8 @@ def get_features(
         else:
             url = f"{NLDI_API_BASE_URL}/{feature_source}/{feature_id}"
             query_params = {}
-        err_msg = _features_err_msg(feature_source, feature_id, comid, data_source)
 
-    feature_collection = cast("dict[str, Any]", _query_nldi(url, query_params, err_msg))
+    feature_collection = cast("dict[str, Any]", _query_nldi(url, query_params))
     if as_json:
         return feature_collection
     gdf = _features_to_gdf(feature_collection)
@@ -327,8 +311,7 @@ def get_features_by_data_source(data_source: str) -> gpd.GeoDataFrame:
     # validate the data source
     _validate_data_source(data_source)
     url = f"{NLDI_API_BASE_URL}/{data_source}"
-    err_msg = f"Error getting features for data source '{data_source}'"
-    feature_collection = cast("dict[str, Any]", _query_nldi(url, {}, err_msg))
+    feature_collection = cast("dict[str, Any]", _query_nldi(url, {}))
     gdf = _features_to_gdf(feature_collection)
     return gdf
 
@@ -477,9 +460,7 @@ def _validate_data_source(data_source: str) -> None:
     # get the available data/feature sources - if not already cached
     if _AVAILABLE_DATA_SOURCES is None:
         url = f"{NLDI_API_BASE_URL}/"
-        available_data_sources = _query_nldi(
-            url, {}, "Error getting available data sources"
-        )
+        available_data_sources = _query_nldi(url, {})
         if not isinstance(available_data_sources, list) or not all(
             isinstance(ds, dict) and "source" in ds for ds in available_data_sources
         ):
@@ -498,20 +479,6 @@ def _validate_data_source(data_source: str) -> None:
         raise ValueError(err_msg)
 
 
-def _features_err_msg(
-    feature_source: str | None,
-    feature_id: str | None,
-    comid: int | None,
-    data_source: str | None,
-) -> str:
-    if feature_source is not None:
-        return (
-            f"Error getting features for feature source '{feature_source}'"
-            f" and feature_id '{feature_id}', and data source '{data_source}'"
-        )
-    return f"Error getting features for comid '{comid}' and data source '{data_source}'"
-
-
 def _validate_navigation_mode(navigation_mode: str | None) -> str:
     if navigation_mode is None:
         raise ValueError(
diff --git a/dataretrieval/streamstats.py b/dataretrieval/streamstats.py
index d17aa7df..1458494a 100644
--- a/dataretrieval/streamstats.py
+++ b/dataretrieval/streamstats.py
@@ -12,7 +12,7 @@
 
 import httpx
 
-from dataretrieval.utils import HTTPX_DEFAULTS
+from dataretrieval.utils import HTTPX_DEFAULTS, _get, _raise_for_status
 
 
 def download_workspace(workspaceID: str, format: str = "") -> httpx.Response:
@@ -37,9 +37,9 @@ def download_workspace(workspaceID: str, format: str = "") -> httpx.Response:
     payload = {"workspaceID": workspaceID, "format": format}
     url = "https://streamstats.usgs.gov/streamstatsservices/download"
 
-    r = httpx.get(url, params=payload, **HTTPX_DEFAULTS)
+    r = _get(url, params=payload, **HTTPX_DEFAULTS)
 
-    r.raise_for_status()
+    _raise_for_status(r)
     return r
     # data = r.raw.read()
 
@@ -144,9 +144,9 @@ def get_watershed(
     }
     url = "https://streamstats.usgs.gov/streamstatsservices/watershed.geojson"
 
-    r = httpx.get(url, params=payload, **HTTPX_DEFAULTS)
+    r = _get(url, params=payload, **HTTPX_DEFAULTS)
 
-    r.raise_for_status()
+    _raise_for_status(r)
 
     if format == "geojson":
         return r
diff --git a/dataretrieval/utils.py b/dataretrieval/utils.py
index 9751b269..f0cec64b 100644
--- a/dataretrieval/utils.py
+++ b/dataretrieval/utils.py
@@ -14,11 +14,10 @@
 import dataretrieval
 from dataretrieval.codes import tz
 from dataretrieval.exceptions import (
-    BadRequestError,
+    NetworkError,
     NoSitesError,
-    NotFoundError,
-    ServiceUnavailable,
     URLTooLong,
+    error_for_status,
 )
 
 # Typed as ``dict[str, Any]`` (not the inferred ``dict[str, object]``) so that
@@ -289,32 +288,45 @@ def _url_too_long_error(detail: str) -> URLTooLong:
     )
 
 
+def _network_error(url: str | httpx.URL, exc: httpx.TransportError) -> NetworkError:
+    """Build the :class:`~dataretrieval.exceptions.NetworkError` for a failed
+    round-trip ``exc`` (no HTTP response: timeout, DNS, refused connection)."""
+    # Some httpx transport errors stringify empty (e.g. ``ConnectTimeout()``);
+    # fall back to the class name so the message is always informative.
+    detail = str(exc) or type(exc).__name__
+    return NetworkError(f"Could not reach the service at {url}: {detail}")
+
+
+def _get(url: str | httpx.URL, **kwargs: Any) -> httpx.Response:
+    """``httpx.get`` for the single-shot paths, surfacing a transport failure as
+    a typed :class:`~dataretrieval.exceptions.NetworkError` (the chunker wraps its
+    own as resumable interruptions, so it stays off this wrapper)."""
+    try:
+        return httpx.get(url, **kwargs)
+    except httpx.TransportError as exc:
+        raise _network_error(url, exc) from exc
+
+
 def _raise_for_status(response: httpx.Response) -> None:
-    """Map an unsuccessful HTTP status to a typed :class:`DataRetrievalError`;
+    """Raise the typed :class:`DataRetrievalError` for an HTTP error response;
     return ``None`` on success.
 
-    Shared by the legacy :func:`query` path. The 4xx types stay
-    :class:`ValueError`-compatible (this path's historical contract), but a 5xx
-    raises the transient :class:`ServiceUnavailable` (a :class:`RuntimeError`),
-    since a server failure is retryable rather than a bad request.
+    Shared by the legacy :func:`query` path (and ``nadp`` / ``streamstats``).
+    Delegates the status-to-type mapping to
+    :func:`dataretrieval.exceptions.error_for_status`, except a too-long-URL
+    status (413 / 414): that gets the same actionable "split your query"
+    remediation as the client-side over-long-URL case below, rather than a bare
+    ``HTTP 414`` (both still raise :class:`~dataretrieval.exceptions.URLTooLong`).
     """
     status = response.status_code
-    if status == 400:
-        raise BadRequestError(
-            f"Bad Request, check that your parameters are correct. URL: {response.url}"
-        )
-    elif status == 404:
-        raise NotFoundError(
-            "Page Not Found Error. May be the result of an empty query. "
-            f"URL: {response.url}"
-        )
-    elif status == 414:
+    if status < 400:
+        return
+    if status in (413, 414):
         raise _url_too_long_error(f"API response reason: {response.reason_phrase}")
-    elif 500 <= status < 600:
-        raise ServiceUnavailable(
-            f"Service Unavailable: {status} {response.reason_phrase}. "
-            f"The service at {response.url} may be down or experiencing issues."
-        )
+    raise error_for_status(
+        status,
+        f"HTTP {status} {response.reason_phrase}".rstrip() + f" (URL: {response.url})",
+    )
 
 
 def query(
@@ -348,13 +360,12 @@ def query(
     Raises
     ------
     DataRetrievalError
-        On failure: :class:`~dataretrieval.exceptions.BadRequestError` (400),
-        :class:`~dataretrieval.exceptions.NotFoundError` (404),
-        :class:`~dataretrieval.exceptions.URLTooLong` (414 or a client-side
-        over-long URL), :class:`~dataretrieval.exceptions.ServiceUnavailable`
-        (5xx), or :class:`~dataretrieval.exceptions.NoSitesError` (no sites/data
-        matched). The 4xx types are also :class:`ValueError`;
-        ``ServiceUnavailable`` is a :class:`RuntimeError`.
+        On an HTTP error response, the typed subclass for the status (see
+        :func:`dataretrieval.exceptions.error_for_status` for the mapping); or
+        :class:`~dataretrieval.exceptions.NoSitesError` when a 200 response
+        reports no data matched; or :class:`~dataretrieval.exceptions.NetworkError`
+        on a connection-level failure (timeout, DNS), with the underlying
+        ``httpx`` exception on ``__cause__``.
     """
 
     for key, value in payload.items():
@@ -366,7 +377,7 @@ def query(
     user_agent = {"user-agent": f"python-dataretrieval/{dataretrieval.__version__}"}
 
     try:
-        response = httpx.get(
+        response = _get(
             url,
             params=payload,
             headers=user_agent,
@@ -378,6 +389,8 @@ def query(
 
     _raise_for_status(response)
 
+    # USGS waterservices signals an empty result with a 200 whose body starts
+    # "No sites/data ..." (its legacy wording); surface it as NoSitesError.
     if response.text.startswith("No sites/data"):
         raise NoSitesError(response.url)
 
diff --git a/dataretrieval/waterdata/api.py b/dataretrieval/waterdata/api.py
index 5c2a4657..f4ecba47 100644
--- a/dataretrieval/waterdata/api.py
+++ b/dataretrieval/waterdata/api.py
@@ -20,6 +20,7 @@
     HTTPX_DEFAULTS,
     BaseMetadata,
     _attach_datetime_columns,
+    _get,
     to_str,
 )
 from dataretrieval.waterdata.filters import FILTER_LANG
@@ -2126,7 +2127,7 @@ def get_codes(code_service: CODE_SERVICES) -> tuple[pd.DataFrame, BaseMetadata]:
 
     url = f"{SAMPLES_URL}/codeservice/{code_service}?mimeType=application%2Fjson"
 
-    response = httpx.get(url, headers=_default_headers(), **HTTPX_DEFAULTS)
+    response = _get(url, headers=_default_headers(), **HTTPX_DEFAULTS)
 
     _raise_for_non_200(response)
 
@@ -2150,7 +2151,7 @@ def _get_samples_csv(
     as metadata and applies any per-getter post-step.
     """
     logger.debug("Request: %s", httpx.URL(url).copy_merge_params(params))
-    response = httpx.get(
+    response = _get(
         url,
         params=params,
         verify=ssl_check,
diff --git a/dataretrieval/waterdata/chunking.py b/dataretrieval/waterdata/chunking.py
index 82f5a37e..6626aba9 100644
--- a/dataretrieval/waterdata/chunking.py
+++ b/dataretrieval/waterdata/chunking.py
@@ -390,7 +390,7 @@ def _passthrough_result(
     return frame, response
 
 
-class ChunkInterrupted(DataRetrievalError, RuntimeError):
+class ChunkInterrupted(DataRetrievalError):
     """
     Base class for mid-stream chunk failures whose completed work is
     preserved and resumable.
@@ -496,6 +496,15 @@ def __init__(
             self.partial_frame = call.partial_frame.copy()
             self.partial_response = call.partial_response
 
+    def __getstate__(self) -> dict[str, Any]:
+        # Drop the live ChunkedCall before pickling: its ``.fetch`` is an
+        # undecorated module function pickle can't reference by name, so the
+        # interruption can't cross a process boundary with ``.call`` attached.
+        # The degraded ``call=None`` form keeps the counts, retry hint, and
+        # partial frame / response; only ``.resume()`` is lost (cross-process
+        # resume was never possible anyway).
+        return {**super().__getstate__(), "call": None}
+
 
 class QuotaExhausted(ChunkInterrupted):
     """
@@ -1034,18 +1043,18 @@ def _classify_chunk_error(
 
     Notes
     -----
-    ``_walk_pages`` re-wraps mid-pagination failures as
-    ``RuntimeError`` with the typed transport exception linked as
+    ``_walk_pages`` re-wraps mid-pagination failures as a base
+    ``DataRetrievalError`` with the typed transport exception linked as
     ``__cause__``, so this function must walk the chain rather than
     just ``isinstance`` the top-level exception.
 
     Bare ``httpx.HTTPError`` (``ConnectError``, ``TimeoutException``,
     etc.) and ``httpx.InvalidURL`` (server-supplied cursor URL too
     long, oversize follow-up) are also treated as transport failures
-    and wrapped as :class:`ServiceInterrupted` — these don't inherit
-    from ``RuntimeError`` (and ``InvalidURL`` doesn't even inherit
-    from ``HTTPError``), so without explicit handling they would
-    escape the chunker's catch with no resumable handle.
+    and wrapped as :class:`ServiceInterrupted` — they aren't one of the
+    typed status errors above (and ``InvalidURL`` doesn't even inherit
+    from ``httpx.HTTPError``), so without explicit handling they would
+    escape classification with no resumable handle.
     """
     cur: BaseException | None = exc
     while cur is not None:
@@ -1067,8 +1076,8 @@ def _retryable(exc: BaseException) -> tuple[bool, float | None]:
     :func:`_classify_chunk_error`, which walks the ``__cause__`` chain.
     The distinction matters because ``_paginate`` raises an
     initial-request transient (429 / 5xx / :class:`httpx.TransportError`)
-    *raw*, but wraps a mid-pagination failure as a ``RuntimeError``. So a
-    raw transient means a sub-request that made no progress and is cheap to
+    *raw*, but wraps a mid-pagination failure as a base ``DataRetrievalError``.
+    So a raw transient means a sub-request that made no progress and is cheap to
     re-issue, whereas a mid-pagination failure is left to escalate to a
     resumable :class:`ChunkInterrupted` rather than re-walked from page 1
     (which would re-spend the quota just exhausted). ``httpx.InvalidURL``
diff --git a/dataretrieval/waterdata/ratings.py b/dataretrieval/waterdata/ratings.py
index c0f870c1..2ffe5089 100644
--- a/dataretrieval/waterdata/ratings.py
+++ b/dataretrieval/waterdata/ratings.py
@@ -17,8 +17,9 @@
 import httpx
 import pandas as pd
 
+from dataretrieval.exceptions import DataRetrievalError
 from dataretrieval.rdb import extract_rdb_comment, read_rdb
-from dataretrieval.utils import HTTPX_DEFAULTS
+from dataretrieval.utils import HTTPX_DEFAULTS, _get
 
 from .utils import (
     _DURATION_RE,
@@ -184,17 +185,16 @@ def get_ratings(
         fid = feature["id"]
         try:
             out[fid] = _download_and_parse(feature, file_path, ssl_check)
-        # _download_and_parse can raise the module's typed errors via
-        # _raise_for_non_200 (RateLimited / ServiceUnavailable / RuntimeError —
-        # all RuntimeError subclasses), and a feature missing its data asset
-        # raises LookupError. Catch those too so one bad feature is logged and
-        # skipped rather than aborting the whole multi-site batch.
+        # One bad feature shouldn't abort the batch: log and skip the module's
+        # typed errors (DataRetrievalError, e.g. an HTTPError from
+        # _raise_for_non_200) plus the transport / parse / file / missing-asset
+        # errors a single download can raise.
         except (
+            DataRetrievalError,
             httpx.HTTPError,
-            RuntimeError,
-            ValueError,
-            OSError,
             LookupError,
+            OSError,
+            ValueError,
         ) as e:
             logger.warning("Failed to download / parse %s: %s", fid, e)
 
@@ -260,7 +260,7 @@ def _search(
     params: dict[str, Any] | None = query_params
     features: list[dict[str, Any]] = []
     while url is not None:
-        response = httpx.get(
+        response = _get(
             url,
             params=params,
             headers=_default_headers(),
@@ -288,9 +288,7 @@ def _download_and_parse(
 ) -> pd.DataFrame:
     """Fetch the feature's data asset, parse RDB, optionally persist to disk."""
     url = feature["assets"]["data"]["href"]
-    response = httpx.get(
-        url, headers=_default_headers(), verify=ssl_check, **HTTPX_DEFAULTS
-    )
+    response = _get(url, headers=_default_headers(), verify=ssl_check, **HTTPX_DEFAULTS)
     _raise_for_non_200(response)
 
     if file_path is not None:
diff --git a/dataretrieval/waterdata/utils.py b/dataretrieval/waterdata/utils.py
index a4706c15..7a2c0d5c 100644
--- a/dataretrieval/waterdata/utils.py
+++ b/dataretrieval/waterdata/utils.py
@@ -27,8 +27,8 @@
 from anyio.from_thread import start_blocking_portal
 
 from dataretrieval import __version__
-from dataretrieval.exceptions import RateLimited, ServiceUnavailable
-from dataretrieval.utils import HTTPX_DEFAULTS, BaseMetadata
+from dataretrieval.exceptions import DataRetrievalError, RateLimited, error_for_status
+from dataretrieval.utils import HTTPX_DEFAULTS, BaseMetadata, _get, _network_error
 from dataretrieval.waterdata import _progress, chunking
 from dataretrieval.waterdata.chunking import (
     _QUOTA_HEADER,
@@ -424,15 +424,15 @@ def _check_ogc_requests(
     ------
     ValueError
         If req_type is not "queryables" or "schema".
-    RateLimited, ServiceUnavailable, RuntimeError
-        From :func:`_raise_for_non_200` on any non-200 — same typed
-        contract as the main data path so callers can use one
-        ``except`` clause everywhere.
+    DataRetrievalError
+        From :func:`_raise_for_non_200` on any non-200 (the typed subclass for
+        the status) — same typed contract as the main data path so callers can
+        use one ``except`` clause everywhere.
     """
     if req_type not in ("queryables", "schema"):
         raise ValueError(f"req_type must be 'queryables' or 'schema', got {req_type!r}")
     url = f"{OGC_API_URL}/collections/{endpoint}/{req_type}"
-    resp = httpx.get(url, headers=_default_headers(), **HTTPX_DEFAULTS)
+    resp = _get(url, headers=_default_headers(), **HTTPX_DEFAULTS)
     _raise_for_non_200(resp)
     # ``Response.json`` is typed ``Any``; the OGC queryables/schema endpoints
     # return a JSON object, and callers index it as a dict.
@@ -537,26 +537,24 @@ def _raise_for_non_200(resp: httpx.Response) -> None:
 
     Raises
     ------
-    RateLimited
-        On HTTP 429 — typed so ``ChunkedCall`` can wrap as a resumable
-        :class:`~dataretrieval.waterdata.chunking.QuotaExhausted`.
-    ServiceUnavailable
-        On HTTP 5xx — typed so ``ChunkedCall`` can wrap as a resumable
-        :class:`~dataretrieval.waterdata.chunking.ServiceInterrupted`.
-    RuntimeError
-        On any other non-200 (4xx other than 429) — these are
-        programmer errors that retry won't fix.
+    DataRetrievalError
+        The typed subclass for the status (see
+        :func:`dataretrieval.exceptions.error_for_status` for the mapping). The
+        transient types (:class:`~dataretrieval.exceptions.TransientError`) are
+        distinguished so ``ChunkedCall`` can wrap them as a resumable
+        :class:`~dataretrieval.waterdata.chunking.QuotaExhausted` /
+        :class:`~dataretrieval.waterdata.chunking.ServiceInterrupted`; a fatal
+        :class:`~dataretrieval.exceptions.HTTPError` (not a ``TransientError``)
+        the chunker won't resume.
     """
     status = resp.status_code
-    if status == 200:
+    if status < 400:
         return
-    body = _error_body(resp)
-    retry_after = _parse_retry_after(resp.headers.get("Retry-After"))
-    if status == 429:
-        raise RateLimited(body, retry_after=retry_after)
-    if 500 <= status < 600:
-        raise ServiceUnavailable(body, retry_after=retry_after)
-    raise RuntimeError(body)
+    raise error_for_status(
+        status,
+        _error_body(resp),
+        retry_after=_parse_retry_after(resp.headers.get("Retry-After")),
+    )
 
 
 def _paginated_failure_message(pages_collected: int, cause: BaseException) -> str:
@@ -578,7 +576,7 @@ def _paginated_failure_message(pages_collected: int, cause: BaseException) -> st
     Returns
     -------
     str
-        A message suitable for the ``RuntimeError`` that
+        A message suitable for the ``DataRetrievalError`` that
         ``_walk_pages`` and ``get_stats_data`` raise from the
         original exception.
     """
@@ -1066,7 +1064,7 @@ async def _paginate(
     :func:`get_stats_data`: send the initial request, then loop calling
     ``follow_up`` until ``parse_response`` reports a ``None`` cursor,
     accumulating frames and elapsed time. Any mid-pagination failure
-    raises ``RuntimeError`` wrapping the cause — the API exposes no
+    raises ``DataRetrievalError`` wrapping the cause — the API exposes no
     resume cursor, so the caller's only recovery is to retry the whole
     call. Issuing HTTP asynchronously lets the multiple sub-requests of a
     chunked call run concurrently under
@@ -1101,15 +1099,14 @@ async def _paginate(
 
     Raises
     ------
-    RuntimeError
-        On a non-200 initial response (typed
-        :class:`~dataretrieval.exceptions.RateLimited` /
-        :class:`~dataretrieval.exceptions.ServiceUnavailable`
-        for 429/5xx, otherwise plain ``RuntimeError`` from
-        :func:`_error_body`), on an initial-page parse failure
-        (wrapped via :func:`_paginated_failure_message` with the
-        original exception on ``__cause__``), or any failure on a
-        subsequent page (same wrapping).
+    DataRetrievalError
+        On a non-200 initial response, the typed subclass for the status from
+        :func:`_raise_for_non_200` (a
+        :class:`~dataretrieval.exceptions.TransientError` for a retryable
+        429 / 5xx, otherwise a fatal :class:`~dataretrieval.exceptions.HTTPError`);
+        or, on an initial-page parse failure or any subsequent-page failure, a
+        base ``DataRetrievalError`` wrapping the cause (built by
+        :func:`_paginated_failure_message`, original exception on ``__cause__``).
     httpx.HTTPError
         Network-level failures on the *initial* request (e.g.
         ``ConnectError``, ``TimeoutException``) propagate unmodified
@@ -1132,7 +1129,7 @@ async def _paginate(
             # treatment as follow-up failures so callers see a consistent
             # diagnostic regardless of which page broke.
             logger.warning("Initial response parse failed.")
-            raise RuntimeError(_paginated_failure_message(0, e)) from e
+            raise DataRetrievalError(_paginated_failure_message(0, e)) from e
         dfs = [df]
         # Stop following ``next`` links once the optional row cap is reached
         # (see :func:`_row_cap`); ``None`` means uncapped. The concatenation
@@ -1164,7 +1161,7 @@ async def _paginate(
                     "Request failed at cursor %r. Data download interrupted.",
                     cursor,
                 )
-                raise RuntimeError(_paginated_failure_message(len(dfs), e)) from e
+                raise DataRetrievalError(_paginated_failure_message(len(dfs), e)) from e
 
         # Aggregate headers / elapsed onto a COPY of the initial
         # response so the user's caller never sees an in-place
@@ -1231,7 +1228,7 @@ async def _walk_pages(
 
     Raises
     ------
-    RuntimeError
+    DataRetrievalError
         See :func:`_paginate`.
     httpx.HTTPError
         See :func:`_paginate`.
@@ -1740,7 +1737,12 @@ def _run_sync(
     """
     with _progress.progress_context(service=service):
         with start_blocking_portal() as portal:
-            return portal.call(make_coro)
+            try:
+                return portal.call(make_coro)
+            except httpx.TransportError as exc:
+                # The initial-request connection failure ``_paginate`` lets
+                # through raw; mid-pagination failures are already typed.
+                raise _network_error(OGC_API_URL, exc) from exc
 
 
 def get_stats_data(
@@ -1785,6 +1787,14 @@ def get_stats_data(
         A DataFrame containing the retrieved and processed statistical data.
     BaseMetadata
         A metadata object containing request information including URL and query time.
+
+    Raises
+    ------
+    DataRetrievalError
+        The typed subclass for an HTTP error response (see :func:`_paginate`);
+        or :class:`~dataretrieval.exceptions.NetworkError` if the initial request
+        can't reach the service (timeout / DNS), the ``httpx`` exception chained
+        on ``__cause__``.
     """
 
     url = f"{STATISTICS_API_URL}/{service}"
diff --git a/docs/source/reference/exceptions.rst b/docs/source/reference/exceptions.rst
new file mode 100644
index 00000000..1d8de47e
--- /dev/null
+++ b/docs/source/reference/exceptions.rst
@@ -0,0 +1,8 @@
+.. _exceptions:
+
+dataretrieval.exceptions
+------------------------
+
+.. automodule:: dataretrieval.exceptions
+    :members:
+    :show-inheritance:
diff --git a/docs/source/reference/index.rst b/docs/source/reference/index.rst
index ec1eec43..43def275 100644
--- a/docs/source/reference/index.rst
+++ b/docs/source/reference/index.rst
@@ -7,6 +7,7 @@ API reference
 .. toctree::
     :maxdepth: 1
 
+    exceptions
     nadp
     nldi
     nwis
diff --git a/docs/source/userguide/errors.rst b/docs/source/userguide/errors.rst
new file mode 100644
index 00000000..cd81f546
--- /dev/null
+++ b/docs/source/userguide/errors.rst
@@ -0,0 +1,103 @@
+.. _handling-errors:
+
+===============
+Handling errors
+===============
+
+Every failed request raises a subclass of
+:class:`~dataretrieval.exceptions.DataRetrievalError`, so a single ``except``
+clause handles any failure regardless of which service you called:
+
+.. code-block:: python
+
+    import dataretrieval
+
+    try:
+        df, md = dataretrieval.waterdata.get_daily(
+            monitoring_location_id="USGS-05427718"
+        )
+    except dataretrieval.DataRetrievalError:
+        ...  # any request failure: error status, connection loss, too-large, ...
+
+Connection-level failures (timeouts, DNS, refused connections) are wrapped as
+:class:`~dataretrieval.exceptions.NetworkError`, so the clause above covers them
+too -- you never have to catch an ``httpx`` exception. A *no-data* result is **not** an
+error: the modern getters return an empty ``DataFrame`` when nothing matches, so
+check ``df.empty`` rather than catching anything.
+
+Branch without knowing the concrete type
+=========================================
+
+Every :class:`~dataretrieval.exceptions.DataRetrievalError` exposes three
+read-anywhere fields, so you rarely need to import the specific subclasses:
+
+* ``.status_code`` -- the HTTP status, or ``None`` when the failure carried no
+  response (a connection error, an over-long URL, ...).
+* ``.retry_after`` -- seconds the server asked you to wait (its ``Retry-After``
+  header), or ``None``.
+* ``.retryable`` -- ``True`` when re-issuing the same request might succeed (a
+  429 / 5xx, or a connection failure); ``False`` otherwise.
+
+.. code-block:: python
+
+    except dataretrieval.DataRetrievalError as e:
+        if e.status_code == 404:
+            ...            # not found
+        elif e.retryable:
+            ...            # transient -- see the retry recipe below
+        else:
+            raise
+
+Retry transient failures with backoff
+=====================================
+
+``.retryable`` and ``.retry_after`` make a backoff loop type-agnostic -- it
+covers rate limits (429), server errors (5xx), and connection failures alike,
+honoring the server's ``Retry-After`` hint when present:
+
+.. code-block:: python
+
+    import time
+    import dataretrieval
+
+    for attempt in range(5):
+        try:
+            df, md = dataretrieval.waterdata.get_continuous(
+                monitoring_location_id=sites
+            )
+            break
+        except dataretrieval.DataRetrievalError as e:
+            if not e.retryable or attempt == 4:
+                raise
+            time.sleep(e.retry_after or 2 ** attempt)
+
+Resume a large Water Data request
+=================================
+
+The Water Data getters transparently split an over-large request into chunks.
+When a transient failure interrupts one mid-stream, the work already completed
+is preserved: catch ``ChunkInterrupted`` and call ``exc.call.resume()`` once the
+condition clears -- only the unfinished sub-requests are re-issued.
+
+.. code-block:: python
+
+    import time
+    from dataretrieval.waterdata import get_daily
+    from dataretrieval.waterdata.chunking import ChunkInterrupted
+
+    try:
+        df, md = get_daily(monitoring_location_id=long_list_of_sites)
+    except ChunkInterrupted as exc:
+        while True:
+            time.sleep(exc.retry_after or 5 * 60)
+            try:
+                df, md = exc.call.resume()
+                break
+            except ChunkInterrupted as again:
+                exc = again
+
+The full taxonomy
+=================
+
+See :doc:`/reference/exceptions` for the complete class tree and per-type
+details.
diff --git a/docs/source/userguide/index.rst b/docs/source/userguide/index.rst
index 2ba5a93a..3cc4748a 100644
--- a/docs/source/userguide/index.rst
+++ b/docs/source/userguide/index.rst
@@ -13,5 +13,6 @@ Contents
 .. toctree::
     :maxdepth: 1
 
+    errors
     timeconventions
     dataportals
diff --git a/tests/nldi_test.py b/tests/nldi_test.py
index 2249e4f2..9092bbf3 100644
--- a/tests/nldi_test.py
+++ b/tests/nldi_test.py
@@ -341,12 +341,11 @@ def test_validate_navigation_mode_normalizes_lowercase():
     assert _validate_navigation_mode("um") == "UM"
 
 
-def test_query_nldi_non_200_surfaces_reason_phrase(httpx_mock):
-    """``_query_nldi`` must include the response's reason phrase in
-    the raised ``ValueError``. Pre-fix this crashed with
-    ``AttributeError: 'Response' object has no attribute 'reason'``
-    because the migration to httpx renamed ``.reason`` →
-    ``.reason_phrase`` but missed this call site."""
+def test_query_nldi_non_200_raises_typed_error(httpx_mock):
+    """A non-200 NLDI response surfaces a typed ``DataRetrievalError`` (here a
+    429 → ``RateLimited``, raised by the shared ``query`` path)."""
+    from dataretrieval.exceptions import RateLimited
+
     httpx_mock.add_response(
         method="GET",
         url=f"{NLDI_API_BASE_URL}/WQP/USGS-MISSING/basin"
@@ -354,7 +353,7 @@ def test_query_nldi_non_200_surfaces_reason_phrase(httpx_mock):
         status_code=429,
     )
     mock_request_data_sources(httpx_mock)
-    with pytest.raises(ValueError, match="Error reason:"):
+    with pytest.raises(RateLimited, match="429"):
         nldi.get_basin(feature_source="WQP", feature_id="USGS-MISSING")
 
 
@@ -374,15 +373,15 @@ def test_validate_data_source_rejects_malformed_catalog(httpx_mock, monkeypatch)
 
 
 def test_query_504_raises_service_unavailable(httpx_mock):
-    """``utils.query`` must classify 504 Gateway Timeout as a 5xx failure
-    (the transient ``ServiceUnavailable``). Pre-fix: the membership check
-    ``[500, 502, 503]`` missed 504 and returned the response unchanged,
-    leading downstream callers (e.g. ``_query_nldi``) to silently swallow
-    the failure as an empty dict via JSONDecodeError."""
+    """``utils.query`` classifies any 5xx (here 504 Gateway Timeout) as the
+    transient ``ServiceUnavailable`` -- the whole 5xx range, not an enumerated
+    subset of codes."""
     from dataretrieval.exceptions import ServiceUnavailable
     from dataretrieval.utils import query
 
     url = "https://example.invalid/x"
     httpx_mock.add_response(method="GET", url=f"{url}?a=1", status_code=504)
-    with pytest.raises(ServiceUnavailable, match="Service Unavailable: 504"):
+    # Match on the status number — robust against the exact message, which the
+    # legacy query path renders verbatim as "HTTP 504 <reason> (URL: ...)".
+    with pytest.raises(ServiceUnavailable, match="504"):
         query(url, {"a": "1"})
diff --git a/tests/nwis_test.py b/tests/nwis_test.py
index f343f26e..dab46d6a 100644
--- a/tests/nwis_test.py
+++ b/tests/nwis_test.py
@@ -9,6 +9,7 @@
 import pandas as pd
 import pytest
 
+from dataretrieval.exceptions import DataRetrievalError
 from dataretrieval.nwis import (
     NWIS_Metadata,
     _read_rdb,
@@ -73,8 +74,9 @@ def test_nwis_service_live():
     try:
         # Minimal query: just most recent record
         get_iv(sites=site)
-    except ValueError as e:
-        # Catch known transient service failures surfaced as ValueError
+    except (DataRetrievalError, ValueError) as e:
+        # Catch known transient service failures: a typed DataRetrievalError
+        # (e.g. ServiceUnavailable on a 5xx, a RuntimeError) or a legacy ValueError
         error_text = str(e)
         if any(
             err in error_text
diff --git a/tests/utils_test.py b/tests/utils_test.py
index 00cec52e..d90821ae 100644
--- a/tests/utils_test.py
+++ b/tests/utils_test.py
@@ -15,16 +15,14 @@ def test_url_too_long(self):
         """Test to confirm error when query URL too long.
 
         Test based on GitHub Issue #64.
-        The server may respond with a 414 (converted to ValueError by query())
-        or abruptly close the connection (ConnectionError). Both are valid
-        responses to an excessively long URL.
+        The server may respond with a 414 (converted to URLTooLong by query())
+        or abruptly close the connection (a transport error, now wrapped as
+        NetworkError). Both are valid responses to an excessively long URL.
         """
-        import httpx
-
         # all sites in MD
         sites, _ = nwis.what_sites(stateCd="MD")
         # raise error by trying to query them all, so URL is way too long
-        with pytest.raises((ValueError, httpx.ConnectError)):
+        with pytest.raises((exceptions.URLTooLong, exceptions.NetworkError)):
             nwis.get_iv(sites=sites.site_no.values.tolist())
 
     def test_header(self):
@@ -45,35 +43,62 @@ def test_header(self):
 class Test_error_taxonomy:
     """The unified request-error hierarchy.
 
-    Every module's request failures are catchable as ``DataRetrievalError``,
-    while remaining backward-compatible with the built-in type each path
-    historically raised (``ValueError`` for the legacy ``query`` path,
-    ``RuntimeError`` for the waterdata retryable types).
+    Every module's request failure is catchable as ``DataRetrievalError``.
+    A status error is an ``HTTPError`` carrying ``.status_code`` (the retryable
+    429 / 5xx subset is ``TransientError``); a connection failure is a
+    ``NetworkError``. The sole base is ``DataRetrievalError`` -- no builtin
+    (``ValueError`` / ``RuntimeError``) mixins.
     """
 
     @pytest.mark.parametrize(
-        "status, exc_name, match, builtin",
+        "status, exc_name",
         [
-            (400, "BadRequestError", "Bad Request", ValueError),
-            (404, "NotFoundError", "Page Not Found", ValueError),
-            (414, "URLTooLong", "Request URL too long", ValueError),
-            (503, "ServiceUnavailable", "Service Unavailable: 503", RuntimeError),
+            (400, "HTTPError"),
+            (403, "HTTPError"),
+            (404, "HTTPError"),
+            (429, "RateLimited"),
+            (503, "ServiceUnavailable"),
         ],
     )
-    def test_query_maps_status_to_typed_error(
-        self, httpx_mock, status, exc_name, match, builtin
-    ):
-        """``query`` maps each HTTP status family to a typed error that is both a
-        ``DataRetrievalError`` (new, unified) and the built-in this path
-        historically raised for that kind of failure -- ``ValueError`` for a bad
-        request, ``RuntimeError`` for a transient 5xx -- with the message kept."""
+    def test_query_maps_status_to_typed_error(self, httpx_mock, status, exc_name):
+        """``query`` maps each HTTP status to the right typed ``DataRetrievalError``:
+        a generic ``HTTPError`` (carrying ``.status_code``) for a fatal 4xx, and
+        the transient ``RateLimited`` / ``ServiceUnavailable`` for 429 / 5xx. The
+        too-long-URL statuses (413 / 414) are covered separately because their
+        message is the actionable remediation, not the bare status number."""
         exc_cls = getattr(exceptions, exc_name)
         url = "https://example.invalid/x"
         httpx_mock.add_response(method="GET", url=f"{url}?a=1", status_code=status)
-        with pytest.raises(exc_cls, match=match) as excinfo:
+        with pytest.raises(exc_cls, match=str(status)) as excinfo:
             utils.query(url, {"a": "1"})
         assert isinstance(excinfo.value, exceptions.DataRetrievalError)
-        assert isinstance(excinfo.value, builtin)  # backward compatibility
+        if isinstance(excinfo.value, exceptions.HTTPError):
+            assert excinfo.value.status_code == status
+
+    @pytest.mark.parametrize("status", [413, 414])
+    def test_query_too_long_url_gives_actionable_message(self, httpx_mock, status):
+        """A server 413 / 414 surfaces as ``URLTooLong`` carrying the actionable
+        "Modify your query" remediation (the same message as the client-side
+        over-long-URL path), not a bare ``HTTP 414`` status line."""
+        url = "https://example.invalid/x"
+        httpx_mock.add_response(method="GET", url=f"{url}?a=1", status_code=status)
+        with pytest.raises(exceptions.URLTooLong, match="Modify your query") as excinfo:
+            utils.query(url, {"a": "1"})
+        assert isinstance(excinfo.value, exceptions.RequestTooLarge)
+
+    def test_transport_error_wrapped_as_network_error(self, httpx_mock):
+        """A connection-level failure (no HTTP response) surfaces as the typed
+        ``NetworkError`` -- catchable via ``except DataRetrievalError`` like the
+        response-based errors, with the original ``httpx`` exception on
+        ``__cause__`` -- rather than leaking a raw ``httpx`` exception."""
+        import httpx
+
+        httpx_mock.add_exception(httpx.ConnectError("name resolution failed"))
+        with pytest.raises(exceptions.NetworkError) as excinfo:
+            utils.query("https://example.invalid/x", {"a": "1"})
+        assert isinstance(excinfo.value, exceptions.DataRetrievalError)
+        assert not isinstance(excinfo.value, exceptions.HTTPError)  # no status
+        assert isinstance(excinfo.value.__cause__, httpx.ConnectError)
 
     def test_query_failure_catchable_as_base(self, httpx_mock):
         """A bare ``except DataRetrievalError`` catches a legacy query failure."""
@@ -82,16 +107,72 @@ def test_query_failure_catchable_as_base(self, httpx_mock):
         with pytest.raises(exceptions.DataRetrievalError):
             utils.query(url, {"a": "1"})
 
+    def test_uniform_retry_attributes_readable_on_every_error(self):
+        """Every error exposes ``.status_code`` / ``.retry_after`` / ``.retryable``
+        so a base ``except DataRetrievalError as e`` can branch and retry without
+        an ``AttributeError`` on the types that lack a status (URLTooLong,
+        NetworkError, NoSitesError, ...). ``.retryable`` marks the 429/5xx and
+        connection failures."""
+        import httpx
+
+        # (error, status_code, retry_after, retryable)
+        cases = [
+            (exceptions.error_for_status(404, "x"), 404, None, False),
+            (exceptions.error_for_status(429, "x", retry_after=5.0), 429, 5.0, True),
+            (exceptions.error_for_status(503, "x"), 503, None, True),
+            (exceptions.error_for_status(414, "x"), None, None, False),  # URLTooLong
+            (exceptions.NetworkError("x"), None, None, True),
+            (exceptions.NoSitesError(httpx.URL("https://x/y")), None, None, False),
+            (exceptions.Unchunkable("x"), None, None, False),
+        ]
+        for err, status, retry_after, retryable in cases:
+            assert err.status_code == status, err
+            assert err.retry_after == retry_after, err
+            assert err.retryable is retryable, err
+
     def test_no_sites_error_is_data_retrieval_error(self):
-        """``NoSitesError`` joins the root (was a bare ``Exception``)."""
+        """``NoSitesError`` (the legacy nwis no-data signal) roots at
+        ``DataRetrievalError`` and is not a builtin ``ValueError``, so it is
+        caught by the unified ``except dataretrieval.DataRetrievalError``."""
         assert issubclass(exceptions.NoSitesError, exceptions.DataRetrievalError)
-        assert not issubclass(exceptions.NoSitesError, ValueError)  # unchanged
+        assert not issubclass(exceptions.NoSitesError, ValueError)
+        import dataretrieval
+
+        assert dataretrieval.NoSitesError is exceptions.NoSitesError
+
+    def test_typed_errors_survive_pickle_and_deepcopy(self):
+        """Typed errors round-trip through pickle/deepcopy -- they get pickled
+        back from multiprocessing / lithops workers, and their constructor fields
+        (status_code, retry_after, url) must survive the trip."""
+        import copy
+        import pickle
+
+        import httpx
+
+        samples = [
+            exceptions.error_for_status(404, "not found"),  # bare HTTPError
+            exceptions.error_for_status(429, "slow down", retry_after=5.0),
+            exceptions.error_for_status(503, "down"),
+            exceptions.TransientError("boom", status_code=502, retry_after=1.5),
+            exceptions.NoSitesError(httpx.URL("https://example.invalid/x?a=1")),
+            exceptions.NetworkError("could not reach the service"),
+        ]
+        for err in samples:
+            for revived in (pickle.loads(pickle.dumps(err)), copy.deepcopy(err)):
+                assert type(revived) is type(err)
+                assert str(revived) == str(err)
+                if isinstance(err, exceptions.HTTPError):
+                    assert revived.status_code == err.status_code
+                if isinstance(err, exceptions.TransientError):
+                    assert revived.retry_after == err.retry_after
+                if isinstance(err, exceptions.NoSitesError):
+                    assert revived.url == err.url
 
     def test_waterdata_exceptions_share_the_root(self):
         """waterdata's typed exceptions are ``DataRetrievalError`` too, so one
-        ``except`` clause spans the legacy and waterdata subsystems — while
-        keeping their historical ``RuntimeError`` / ``ValueError`` bases and the
-        shared family bases (``TransientError``, ``RequestTooLarge``)."""
+        ``except`` clause spans the legacy and waterdata subsystems, and they
+        slot under the shared family bases (``HTTPError`` / ``TransientError`` /
+        ``RequestTooLarge``)."""
         from dataretrieval.waterdata.chunking import (
             ChunkInterrupted,
             RateLimited,
@@ -101,13 +182,12 @@ def test_waterdata_exceptions_share_the_root(self):
 
         for cls in (RateLimited, ServiceUnavailable, Unchunkable, ChunkInterrupted):
             assert issubclass(cls, exceptions.DataRetrievalError)
-        # Transient transport failures: RuntimeError, under TransientError.
+        # Transient 429/5xx: an HTTPError-with-status, under TransientError.
         assert issubclass(RateLimited, exceptions.TransientError)
         assert issubclass(ServiceUnavailable, exceptions.TransientError)
-        assert issubclass(ServiceUnavailable, RuntimeError)
-        # "Too large" failures: ValueError, under RequestTooLarge.
+        assert issubclass(ServiceUnavailable, exceptions.HTTPError)
+        # "Too large" failures slot under RequestTooLarge.
         assert issubclass(Unchunkable, exceptions.RequestTooLarge)
-        assert issubclass(Unchunkable, ValueError)
 
     def test_base_exported_at_top_level(self):
         """Users can write ``except dataretrieval.DataRetrievalError``."""
diff --git a/tests/waterdata_chunking_test.py b/tests/waterdata_chunking_test.py
index 4ee4b555..0857da4a 100644
--- a/tests/waterdata_chunking_test.py
+++ b/tests/waterdata_chunking_test.py
@@ -31,6 +31,7 @@
 if sys.version_info < (3, 10):
     pytest.skip("Skip entire module on Python < 3.10", allow_module_level=True)
 
+from dataretrieval.exceptions import DataRetrievalError
 from dataretrieval.waterdata import chunking as _chunking
 from dataretrieval.waterdata import utils as _utils
 from dataretrieval.waterdata.chunking import (
@@ -38,6 +39,7 @@
     _NEVER_CHUNK,
     _OR_SEP,
     _QUOTA_HEADER,
+    ChunkedCall,
     ChunkInterrupted,
     ChunkPlan,
     QuotaExhausted,
@@ -455,11 +457,11 @@ async def fetch(args):
         state["i"] += 1
         if i == 2:
             # Match _walk_pages's wrapping: a generic mid-pagination
-            # RuntimeError with the typed RateLimited as __cause__.
+            # DataRetrievalError with the typed RateLimited as __cause__.
             try:
                 raise RateLimited("429: Too many requests made.")
             except RateLimited as cause:
-                raise RuntimeError(
+                raise DataRetrievalError(
                     "Paginated request failed after collecting 0 page(s): "
                     "429: Too many requests made."
                 ) from cause
@@ -687,7 +689,7 @@ async def fetch(args):
             try:
                 raise ServiceUnavailable("503: Service unavailable.")
             except ServiceUnavailable as cause:
-                raise RuntimeError(str(cause)) from cause
+                raise DataRetrievalError(str(cause)) from cause
         return (
             pd.DataFrame({"sites": list(args["sites"])}),
             _quota_response(500),
@@ -717,10 +719,72 @@ def test_chunk_interrupted_base_class_catches_both():
     and ``ServiceInterrupted`` must both subclass it."""
     assert issubclass(QuotaExhausted, ChunkInterrupted)
     assert issubclass(ServiceInterrupted, ChunkInterrupted)
-    # Sanity: ``ChunkInterrupted`` is itself a ``RuntimeError`` so
-    # bare ``except RuntimeError`` callers don't suddenly miss the
-    # wrapped failures after this refactor.
-    assert issubclass(ChunkInterrupted, RuntimeError)
+    # ``ChunkInterrupted`` roots at ``DataRetrievalError`` like the rest of the
+    # taxonomy (no ``RuntimeError`` mixin), so one ``except DataRetrievalError``
+    # spans chunked and single-shot failures alike.
+    assert issubclass(ChunkInterrupted, DataRetrievalError)
+    assert not issubclass(ChunkInterrupted, RuntimeError)
+
+
+def test_chunk_interrupted_pickles_as_degraded_across_process_boundary():
+    """A real ChunkInterrupted carries a live ChunkedCall whose ``fetch`` is not
+    stdlib-picklable, so a worker raising it inside a multiprocessing /
+    ProcessPoolExecutor pool could not ship it back. ``__getstate__`` drops
+    ``.call`` and pickles the documented degraded ``call=None`` state -- counts
+    and retry hint preserved, ``.resume()`` gone (un-resumable cross-process)."""
+    import pickle
+
+    plan = ChunkPlan(
+        {"monitoring_location_id": ["A", "B", "C"]}, _fake_build, url_limit=8000
+    )
+    # A local function isn't picklable by reference -- mirrors production, where
+    # ChunkedCall.fetch is the undecorated _fetch_once shadowed by its wrapper.
+    call = ChunkedCall(plan, lambda args: (pd.DataFrame(), None))
+    exc = call.wrap_failure(RateLimited("429: too many requests", retry_after=12.0))
+    assert isinstance(exc, QuotaExhausted) and exc.call is call
+    # the live fetch handle alone can't pickle (the whole point of the override)
+    with pytest.raises((pickle.PicklingError, AttributeError)):
+        pickle.dumps(exc.call.fetch)
+
+    revived = pickle.loads(pickle.dumps(exc))
+    assert isinstance(revived, QuotaExhausted)
+    assert revived.call is None  # degraded: no cross-process resume handle
+    assert revived.completed_chunks == exc.completed_chunks
+    assert revived.total_chunks == exc.total_chunks
+    assert revived.retry_after == 12.0
+    assert str(revived) == str(exc)
+
+
+def test_chunk_interrupted_with_partial_data_pickles_intact():
+    """The degrade drops only the live ``.call``; the captured *partial work*
+    must still cross the boundary so a worker can report what it salvaged.
+    Exercises the path the no-completed-chunks case above doesn't: a real
+    ``partial_frame`` (rows) and ``partial_response`` (a live ``httpx.Response``,
+    which must itself remain picklable)."""
+    import pickle
+
+    plan = ChunkPlan(
+        {"monitoring_location_id": ["A", "B", "C"]}, _fake_build, url_limit=8000
+    )
+    call = ChunkedCall(plan, lambda args: (pd.DataFrame(), None))
+    # One sub-request completed before the failure: a real frame + response.
+    call._chunks[0] = (
+        pd.DataFrame({"id": ["A"]}),
+        httpx.Response(
+            200,
+            request=httpx.Request("GET", "https://example.invalid/a"),
+            json={"features": []},
+        ),
+    )
+    exc = call.wrap_failure(ServiceUnavailable("503: down"))
+    assert exc.completed_chunks == 1
+    assert not exc.partial_frame.empty and exc.partial_response is not None
+
+    revived = pickle.loads(pickle.dumps(exc))
+    assert revived.call is None
+    assert revived.partial_frame["id"].tolist() == ["A"]
+    assert isinstance(revived.partial_response, httpx.Response)
+    assert revived.partial_response.status_code == 200
 
 
 def test_connection_error_wrapped_as_service_interrupted():
@@ -810,7 +874,7 @@ async def fetch(args):
             try:
                 raise ServiceUnavailable("503: Service unavailable.")
             except ServiceUnavailable as cause:
-                raise RuntimeError(str(cause)) from cause
+                raise DataRetrievalError(str(cause)) from cause
         return (
             pd.DataFrame({"sites": list(args["sites"])}),
             _quota_response(500),
@@ -1000,7 +1064,7 @@ async def fetch(args):
             try:
                 raise RateLimited("429: Too many requests.", retry_after=42.0)
             except RateLimited as cause:
-                raise RuntimeError(str(cause)) from cause
+                raise DataRetrievalError(str(cause)) from cause
         return (
             pd.DataFrame({"sites": list(args["sites"])}),
             _quota_response(500),
@@ -1525,12 +1589,12 @@ def test_combine_chunk_responses_does_not_mutate_input_urls():
 
 
 def _wrap_cause(transport_exc):
-    """Wrap ``transport_exc`` the way ``_walk_pages`` does — a
-    ``RuntimeError`` with the typed transport error on ``__cause__`` — so
+    """Wrap ``transport_exc`` the way ``_walk_pages`` does — a base
+    ``DataRetrievalError`` with the typed transport error on ``__cause__`` — so
     chain-walking is exercised realistically."""
     try:
-        raise RuntimeError("Paginated request failed") from transport_exc
-    except RuntimeError as wrapped:
+        raise DataRetrievalError("Paginated request failed") from transport_exc
+    except DataRetrievalError as wrapped:
         return wrapped
 
 
@@ -1603,18 +1667,23 @@ def test_retry_policy_from_env_honors_monkeypatched_constants(monkeypatch):
 
 
 def test_retryable_taxonomy():
+    from dataretrieval.exceptions import HTTPError
+
     assert _retryable(RateLimited("429", retry_after=5.0)) == (True, 5.0)
     assert _retryable(ServiceUnavailable("503")) == (True, None)
     assert _retryable(httpx.ReadTimeout("slow")) == (True, None)
     assert _retryable(httpx.ConnectError("down")) == (True, None)
     # InvalidURL is resumable but NOT retryable (a too-long cursor won't fix).
     assert _retryable(httpx.InvalidURL("too long")) == (False, None)
-    # Plain non-transient (e.g. a 4xx programmer error wrapped as RuntimeError).
+    # A fatal HTTP error (a plain HTTPError, not a TransientError) is never
+    # retried; nor is a bare RuntimeError.
+    assert _retryable(HTTPError("400", status_code=400)) == (False, None)
+    assert _retryable(HTTPError("403", status_code=403)) == (False, None)
     assert _retryable(RuntimeError("400")) == (False, None)
 
 
 def test_retryable_skips_wrapped_midpagination_transient():
-    # A transient surfaced mid-pagination is re-wrapped as RuntimeError by
+    # A transient surfaced mid-pagination is re-wrapped as DataRetrievalError by
     # _paginate; it must NOT be auto-retried (re-walking from page 1
     # would re-spend quota) — it escalates to the resumable handle instead.
     # Only the raw, top-level (initial-request) transient is retryable.
diff --git a/tests/waterdata_utils_test.py b/tests/waterdata_utils_test.py
index 01012d73..10733773 100644
--- a/tests/waterdata_utils_test.py
+++ b/tests/waterdata_utils_test.py
@@ -9,6 +9,7 @@
 import pytest
 
 import dataretrieval.waterdata.utils as _utils_module
+from dataretrieval.exceptions import DataRetrievalError, HTTPError, TransientError
 from dataretrieval.waterdata.chunking import RateLimited, ServiceUnavailable
 from dataretrieval.waterdata.utils import (
     OGC_API_URL,
@@ -240,7 +241,7 @@ def test_walk_pages_raises_on_connection_error_mid_pagination():
     """A connection error mid-pagination must raise with the upstream cause
     chained, and the wrapper message must include recovery guidance that
     is NOT rate-limit-specific (no quota window involved)."""
-    with pytest.raises(RuntimeError, match="Paginated request failed") as excinfo:
+    with pytest.raises(DataRetrievalError, match="Paginated request failed") as excinfo:
         _walk_pages_with_failure(httpx.ConnectError("boom"))
 
     msg = str(excinfo.value)
@@ -254,7 +255,7 @@ def test_walk_pages_raises_with_class_name_when_cause_stringifies_empty():
     """Some ``httpx`` exceptions (e.g. ``TimeoutException("")``)
     stringify to ``""``. The wrapper must still produce an informative
     message — fall back to the exception class name."""
-    with pytest.raises(RuntimeError, match="Paginated request failed") as excinfo:
+    with pytest.raises(DataRetrievalError, match="Paginated request failed") as excinfo:
         _walk_pages_with_failure(httpx.TimeoutException(""))
 
     msg = str(excinfo.value)
@@ -276,7 +277,7 @@ def test_walk_pages_raises_on_5xx_mid_pagination():
     }
     page2_503.url = "https://example.com/page2"
 
-    with pytest.raises(RuntimeError, match="Paginated request failed") as excinfo:
+    with pytest.raises(DataRetrievalError, match="Paginated request failed") as excinfo:
         _walk_pages_with_failure(page2_503)
 
     msg = str(excinfo.value)
@@ -291,7 +292,7 @@ def test_walk_pages_raises_on_mid_pagination_429():
     page2_429.status_code = 429
     page2_429.url = "https://example.com/page2"
 
-    with pytest.raises(RuntimeError, match="Paginated request failed") as excinfo:
+    with pytest.raises(DataRetrievalError, match="Paginated request failed") as excinfo:
         _walk_pages_with_failure(page2_429)
 
     msg = str(excinfo.value)
@@ -320,7 +321,7 @@ def test_walk_pages_wraps_initial_page_parse_error():
     mock_req.headers = {}
     mock_req.url = "https://example.com/page1"
 
-    with pytest.raises(RuntimeError, match="Paginated request failed") as excinfo:
+    with pytest.raises(DataRetrievalError, match="Paginated request failed") as excinfo:
         _run_walk_pages(geopd=False, req=mock_req, client=mock_client)
 
     # The JSONDecodeError causing it is on __cause__ so callers can drill in.
@@ -445,7 +446,7 @@ def test_get_stats_data_raises_on_mid_pagination_failure(monkeypatch):
     exercised by the ``_walk_pages`` triplet above. This single
     ``get_stats_data`` mid-pagination case proves the stats-specific
     follow-up callback is wired into ``_paginate`` correctly."""
-    with pytest.raises(RuntimeError, match="Paginated request failed") as excinfo:
+    with pytest.raises(DataRetrievalError, match="Paginated request failed") as excinfo:
         _run_get_stats_data_with_failure(
             httpx.ConnectError("stats-boom"),
             monkeypatch,
@@ -468,7 +469,7 @@ def test_get_stats_data_warning_includes_next_token(caplog, monkeypatch):
         "description": "upstream timeout",
     }
 
-    with pytest.raises(RuntimeError):
+    with pytest.raises(DataRetrievalError):
         _run_get_stats_data_with_failure(page2_503, monkeypatch)
 
     warnings_ = [r.getMessage() for r in caplog.records if r.levelno == logging.WARNING]
@@ -791,9 +792,9 @@ def test_parse_retry_after_returns_none_for_unparseable():
 
 
 def test_raise_for_non_200_raises_service_unavailable_for_5xx():
-    """5xx must surface as the typed ``ServiceUnavailable`` (not bare
-    ``RuntimeError``) so the chunker can wrap it as a resumable
-    ``ServiceInterrupted`` rather than treating it as a fatal error."""
+    """5xx must surface as the typed ``ServiceUnavailable`` so the chunker can
+    wrap it as a resumable ``ServiceInterrupted`` rather than treating it as a
+    fatal error."""
     resp = _make_response(503, "", reason="Service Unavailable")
     resp.headers["Retry-After"] = "120"
     with pytest.raises(ServiceUnavailable) as excinfo:
@@ -812,22 +813,22 @@ def test_raise_for_non_200_attaches_retry_after_to_rate_limited():
     assert excinfo.value.retry_after == 60.0
 
 
-def test_raise_for_non_200_still_raises_bare_runtimeerror_for_other_4xx():
-    """4xx other than 429 (e.g. 400 Bad Request) is a programmer error
-    that retry won't fix. Must remain bare ``RuntimeError`` so the
-    chunker's classifier doesn't wrap it as resumable."""
+def test_raise_for_non_200_400_raises_http_error():
+    """400 raises a fatal ``HTTPError`` (status_code=400) the chunker won't
+    resume. It must NOT be a ``TransientError`` so the chunker's classifier
+    treats it as fatal rather than wrapping it as resumable."""
     resp = _make_response(
         400,
         '{"code": "BadRequest", "description": "missing parameter"}',
         reason="Bad Request",
         content_type="application/json",
     )
-    with pytest.raises(RuntimeError) as excinfo:
+    with pytest.raises(HTTPError) as excinfo:
         _raise_for_non_200(resp)
-    # Must be exactly RuntimeError — not RateLimited, not
-    # ServiceUnavailable. Both subclass RuntimeError, so a plain
-    # ``pytest.raises(RuntimeError)`` would match either.
-    assert type(excinfo.value) is RuntimeError
+    assert excinfo.value.status_code == 400
+    # Fatal, not transient: the chunker keys off ``isinstance(_, TransientError)``
+    # to decide whether to wrap a failure as a resumable ChunkInterrupted.
+    assert not isinstance(excinfo.value, TransientError)
 
 
 def test_next_req_url_rejects_cross_host():
@@ -848,11 +849,9 @@ def test_next_req_url_rejects_cross_host():
 
 
 def test_check_ogc_requests_raises_typed_on_5xx(httpx_mock):
-    """``_check_ogc_requests`` previously called ``resp.raise_for_status()``,
-    which leaks raw ``httpx.HTTPStatusError``. Now routes through
-    ``_raise_for_non_200`` so callers see ``ServiceUnavailable`` /
-    ``RateLimited`` / ``RuntimeError`` — the same typed contract as
-    the main data path."""
+    """``_check_ogc_requests`` routes a non-200 through ``_raise_for_non_200``,
+    so a 5xx surfaces as the typed ``ServiceUnavailable`` — the same typed
+    contract as the main data path, not a raw ``httpx`` error."""
     httpx_mock.add_response(
         method="GET",
         url=f"{OGC_API_URL}/collections/daily/schema",
diff --git a/tests/waterservices_test.py b/tests/waterservices_test.py
index 874c2a0e..4b30456e 100644
--- a/tests/waterservices_test.py
+++ b/tests/waterservices_test.py
@@ -3,6 +3,7 @@
 import pytest
 from pandas import DataFrame
 
+from dataretrieval.exceptions import HTTPError, NoSitesError
 from dataretrieval.nwis import (
     get_discharge_peaks,
     get_dv,
@@ -15,7 +16,6 @@
     query_waterservices,
     what_sites,
 )
-from dataretrieval.utils import NoSitesError
 
 try:
     import geopandas as gpd
@@ -62,7 +62,7 @@ def test_query_validation(httpx_mock):
         "https://waterservices.usgs.gov/nwis/stat?sites=bad_site_id&format=rdb"
     )
     httpx_mock.add_response(method="GET", url=request_url, status_code=400)
-    with pytest.raises(ValueError) as type_error:
+    with pytest.raises(HTTPError) as type_error:
         get_stats(sites="bad_site_id")
     assert request_url in str(type_error)