This commit is contained in:
kalvinparker 2025-11-14 15:12:51 +00:00 committed by GitHub
commit c3719ec86f
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
21 changed files with 1010 additions and 17 deletions

37
.github/workflows/race.yml vendored Normal file
View file

@ -0,0 +1,37 @@
name: Race Detector
on:
workflow_dispatch: {}
pull_request:
branches:
- main
jobs:
race:
name: Run tests with race detector
runs-on: ubuntu-latest
strategy:
matrix:
go-version: [1.20.x]
steps:
- name: Checkout
uses: actions/checkout@v4
with:
fetch-depth: 0
- name: Set up Go
uses: actions/setup-go@v4
with:
go-version: ${{ matrix.go-version }}
- name: Install build tools (for cgo / race detector)
run: |
sudo apt-get update
sudo apt-get install -y build-essential
- name: Ensure CGO enabled
run: echo "CGO_ENABLED=1" >> $GITHUB_ENV
- name: Run tests with race detector
run: |
go test -race ./... -v

14
CHANGELOG.md Normal file
View file

@ -0,0 +1,14 @@
# Changelog
All notable changes to this project will be documented in this file.
## [Unreleased]
- Add `--registry-ca-validate` flag: when supplied with `--registry-ca`, Watchtower can validate the provided CA bundle on startup and fail fast on misconfiguration. Prefer using this over `--insecure-registry` in production.
- Security: registry TLS verification is now secure-by-default for internal HEAD/token requests; `--insecure-registry` is opt-in for testing.
- Registry CA support: add `--registry-ca` to provide a PEM bundle merged into system roots, and `--registry-ca-validate` to fail-fast on invalid bundles.
- Registry token caching: in-memory, concurrent-safe token cache added for registry auth tokens (honors `expires_in`), with deterministic and concurrency unit tests.
- Testability: refactored registry transport construction and exposed test helpers; added an injectable `now` variable for deterministic time-dependent tests.
- Docs: added detailed update flow docs, diagrams, and a developer guide (`docs/update-flow*.md`, PlantUML, and rendered SVG).
- CI: added a GitHub Actions workflow to run `go test -race ./...` with CGO enabled; recommended containerized `-race` run steps added to the developer guide.

View file

@ -33,6 +33,21 @@ $ docker run --detach \
Watchtower is intended to be used in homelabs, media centers, local dev environments, and similar. We do **not** recommend using Watchtower in a commercial or production environment. If that is you, you should be looking into using Kubernetes. If that feels like too big a step for you, please look into solutions like [MicroK8s](https://microk8s.io/) and [k3s](https://k3s.io/) that take away a lot of the toil of running a Kubernetes cluster.
### Using a custom registry CA (private registries)
If you run Watchtower against a private registry that uses a custom TLS certificate, provide the CA bundle and enable validation at startup so Watchtower fails fast on misconfiguration:
```
$ docker run --detach \
--name watchtower \
--volume /var/run/docker.sock:/var/run/docker.sock \
containrrr/watchtower \
--registry-ca /etc/ssl/certs/my-registry-ca.pem \
--registry-ca-validate=true
```
Prefer providing a CA bundle and enabling `--registry-ca-validate` over disabling TLS verification with `--insecure-registry` in production environments.
## Documentation
The full documentation is available at https://containrrr.dev/watchtower.

View file

@ -20,6 +20,7 @@ import (
"github.com/containrrr/watchtower/pkg/container"
"github.com/containrrr/watchtower/pkg/filters"
"github.com/containrrr/watchtower/pkg/metrics"
"github.com/containrrr/watchtower/pkg/registry"
"github.com/containrrr/watchtower/pkg/notifications"
t "github.com/containrrr/watchtower/pkg/types"
"github.com/robfig/cron"
@ -118,6 +119,30 @@ func PreRun(cmd *cobra.Command, _ []string) {
removeVolumes, _ := f.GetBool("remove-volumes")
warnOnHeadPullFailed, _ := f.GetString("warn-on-head-failure")
// Configure TLS verification for registry HEAD/token requests. Default is secure (verify certs).
insecureRegistry, _ := f.GetBool("insecure-registry")
registry.InsecureSkipVerify = insecureRegistry
if insecureRegistry {
log.Warn("TLS certificate verification for registry requests is disabled (insecure). This should only be used for testing.)")
}
registryCABundle, _ := f.GetString("registry-ca")
if registryCABundle != "" {
registry.RegistryCABundle = registryCABundle
log.Debugf("Using registry CA bundle: %s", registryCABundle)
}
// Optionally validate CA bundle at startup
validateCABundle, _ := f.GetBool("registry-ca-validate")
if validateCABundle && registry.RegistryCABundle != "" {
if pool := registry.GetRegistryCertPool(); pool == nil {
log.Fatalf("Failed to validate registry CA bundle at %s", registry.RegistryCABundle)
}
log.Info("Registry CA bundle validated successfully")
} else if validateCABundle && registry.RegistryCABundle == "" {
log.Fatalf("--registry-ca-validate was set but no --registry-ca was provided")
}
if monitorOnly && noPull {
log.Warn("Using `WATCHTOWER_NO_PULL` and `WATCHTOWER_MONITOR_ONLY` simultaneously might lead to no action being taken at all. If this is intentional, you may safely ignore this message.")
}

View file

@ -0,0 +1,29 @@
# Summary Checkpoint
This file marks a checkpoint for summarizing repository changes.
All future requests that ask to "summarise all the changes thus far" should consider
only changes made after this checkpoint was created.
Checkpoint timestamp (UTC): 2025-11-13T12:00:00Z
Notes:
- Purpose: act as a stable anchor so that subsequent "summarise all the changes thus far"
requests will include only modifications after this point.
- Location: `docs/SUMMARY_CHECKPOINT.md`
Recent delta (since previous checkpoint):
- Added CLI flags and wiring: `--registry-ca` and `--registry-ca-validate` (startup validation).
- Implemented secure-by-default registry transport behavior and support for a custom CA bundle.
- Introduced an in-memory bearer token cache (honors `expires_in`) and refactored time usage
to allow deterministic tests via an injectable `now` function.
- Added deterministic unit tests for the token cache (`pkg/registry/auth/auth_cache_test.go`).
- Added quickstart documentation snippets to `README.md`, `docs/index.md`, and
`docs/private-registries.md` showing `--registry-ca` + `--registry-ca-validate`.
- Created `CHANGELOG.md` with an Unreleased entry for the new `--registry-ca-validate` flag.
- Ran package tests locally: `pkg/registry/auth` and `pkg/registry/digest` — tests passed
(some integration tests were skipped due to missing credentials).
If you want the next checkpoint after more changes (e.g., mapping the update call chain,
documenting data shapes, or adding concurrency tests), request another summary break.

View file

@ -460,8 +460,34 @@ Alias for:
--notification-report
--notification-template porcelain.VERSION.summary-no-log
Argument: --porcelain, -P
Environment Variable: WATCHTOWER_PORCELAIN
Possible values: v1
Default: -
```
## Registry TLS options
Options to configure TLS verification when Watchtower talks to image registries.
```text
Argument: --insecure-registry
Environment Variable: WATCHTOWER_INSECURE_REGISTRY
Type: Boolean
Default: false
```
```text
Argument: --registry-ca
Environment Variable: WATCHTOWER_REGISTRY_CA
Type: String (path to PEM bundle inside container)
Default: -
```
```text
Argument: --registry-ca-validate
Environment Variable: WATCHTOWER_REGISTRY_CA_VALIDATE
Type: Boolean
Default: false
```

File diff suppressed because one or more lines are too long

After

Width:  |  Height:  |  Size: 22 KiB

78
docs/developer-guide.md Normal file
View file

@ -0,0 +1,78 @@
<!-- Developer guide: local dev and test commands -->
# Developer Guide — Running tests & race detector
This short guide covers how to run unit tests locally and how to run the race-enabled test suite in a Linux container (recommended for Windows hosts).
## Prerequisites
- Go toolchain (version compatible with project go.mod). To run `go test` locally, ensure `go` is in your PATH.
- Docker (for running a Linux container to execute `-race` with CGO enabled)
- Optional: GitHub CLI `gh` to open PRs from the command line.
## Run unit tests locally
From the repository root:
PowerShell
```powershell
go test ./... -v
```
If you only want to run a package tests, run:
```powershell
go test ./pkg/registry/auth -v
```
## Run race detector (recommended via container on Windows)
The Go race detector requires cgo and a C toolchain. On Linux runners this is usually available; on Windows it's simplest to run tests inside a Linux container.
Example (PowerShell):
```powershell
docker run --rm -v "${PWD}:/work" -w /work -e CGO_ENABLED=1 golang:1.20 bash -lc "apt-get update && apt-get install -y build-essential ; /usr/local/go/bin/go test -race ./... -v"
```
Notes:
- The command mounts the current working directory into the container and installs `build-essential` to provide a C toolchain so `-race` works.
- If you prefer a faster run, run `go test -run TestName ./pkg/yourpkg -race`.
## Render PlantUML diagrams (local)
To render PlantUML into SVG using Docker (no Java/PlantUML install required):
```powershell
docker run --rm -v "${PWD}:/work" -w /work plantuml/plantuml -tsvg docs/diagrams/update-flow.puml
```
Move the generated SVG into the docs assets folder:
```powershell
mkdir docs/assets/images -Force
Move-Item docs/diagrams/update-flow.svg docs/assets/images/update-flow.svg -Force
```
## Create a branch and PR (example)
Example git commands:
```powershell
git checkout -b docs/update-flow
git add docs/update-flow.md docs/diagrams/update-flow.puml docs/developer-guide.md docs/assets/images/update-flow.svg
git commit -m "docs: add update flow docs, diagrams and developer guide"
git push -u origin docs/update-flow
```
If you have the GitHub CLI installed you can open a PR with:
```powershell
gh pr create --title "docs: update flow + diagrams" --body "Adds update flow documentation, a PlantUML diagram and developer guide." --base main
```
If `gh` is not installed you can open a PR via GitHub web UI after pushing the branch.
---
If you'd like, I can push the branch and attempt to open the PR for you now.

View file

@ -0,0 +1,46 @@
@startuml
title Watchtower Update Flow
actor User as CLI
participant "cmd (root)" as CMD
participant "internal/actions.Update" as ACT
participant "container.Client" as CLIENT
participant "pkg/registry/digest" as DIG
participant "pkg/registry/auth" as AUTH
participant "pkg/registry" as REG
database "Docker Engine" as DOCKER
CLI -> CMD: trigger runUpdatesWithNotifications()
CMD -> ACT: Update(client, UpdateParams)
ACT -> CLIENT: ListContainers(filter)
loop per container
ACT -> CLIENT: IsContainerStale(container, params)
CLIENT -> CLIENT: PullImage (maybe)
CLIENT -> DIG: CompareDigest(container, registryAuth)
DIG -> AUTH: GetToken(challenge)
AUTH -> AUTH: getCachedToken / storeToken
DIG -> REG: newTransport() (uses --insecure-registry / --registry-ca)
DIG -> DOCKER: HEAD manifest with token
alt digest matches
CLIENT --> ACT: no pull needed
else
CLIENT -> DOCKER: ImagePull(image)
end
CLIENT --> ACT: HasNewImage -> stale/newestImage
end
ACT -> ACT: SortByDependencies
ACT -> CLIENT: StopContainer / StartContainer (with lifecycle hooks)
ACT -> CLIENT: RemoveImageByID (cleanup)
ACT --> CMD: progress.Report()
note right of AUTH
Tokens are cached by auth URL (realm+service+scope)
ExpiresIn (seconds) sets TTL when provided
end note
note left of REG
TLS is secure-by-default
`--registry-ca` provides PEM bundle
`--registry-ca-validate` fails startup on invalid bundle
end note
@enduml

View file

@ -63,3 +63,17 @@ the following command:
volumes:
- /var/run/docker.sock:/var/run/docker.sock
```
Quick note: if your registry uses a custom TLS certificate, mount the CA bundle and enable startup validation so Watchtower fails fast on misconfiguration:
```bash
docker run --detach \
--name watchtower \
--volume /var/run/docker.sock:/var/run/docker.sock \
--volume /etc/ssl/private-certs:/certs \
containrrr/watchtower \
--registry-ca /certs/my-registry-ca.pem \
--registry-ca-validate=true
```
+
Prefer this over `--insecure-registry` for production.

View file

@ -205,3 +205,45 @@ A few additional notes:
4. An alternative to adding the various variables is to create a ~/.aws/config and ~/.aws/credentials files and
place the settings there, then mount the ~/.aws directory to / in the container.
## Token caching and required scopes
Watchtower attempts to minimize calls to registry auth endpoints by caching short-lived bearer tokens when available.
- Token cache: When Watchtower requests a bearer token from a registry auth endpoint, it will cache the token in-memory keyed by the auth realm + service + scope. If the token response includes an `expires_in` field, Watchtower will honor it and refresh the token only after expiry. This reduces load and rate-limit pressure on registry auth servers.
- Required scope: Watchtower requests tokens with the following scope format: `repository:<image-path>:pull`. This is sufficient for read-only operations required by Watchtower (HEAD or pull). For registries enforcing fine-grained scopes, ensure the provided credentials can request tokens with `pull` scope for the repositories you want to monitor.
- Credential sources: Watchtower supports these sources (in priority order):
1. Environment variables: `REPO_USER` and `REPO_PASS`.
2. Docker config file (`DOCKER_CONFIG` path or default location, typically `/root/.docker/config.json` when running in container) including support for credential helpers and native stores.
When possible, prefer using short-lived tokens or credential helpers and avoid embedding long-lived plaintext credentials in environment variables.
### Providing a custom CA bundle
For private registries using certificates signed by an internal CA, prefer providing a PEM encoded CA bundle to disable verification bypassing. Use the `--registry-ca` flag or the `WATCHTOWER_REGISTRY_CA` environment variable to point to a file inside the container with one or more PEM encoded certificates. Watchtower will merge the provided bundle with system roots and validate registry certificates accordingly.
Example (docker run):
```bash
docker run -v /etc/ssl/private-certs:/certs -e WATCHTOWER_REGISTRY_CA=/certs/my-registry-ca.pem containrrr/watchtower
```
This is the recommended approach instead of `--insecure-registry` for production deployments.
#### Quick example: validate CA at startup
If you want Watchtower to fail fast when the provided CA bundle is invalid or missing, mount the CA into the container and enable validation:
```bash
docker run --detach \
--name watchtower \
--volume /var/run/docker.sock:/var/run/docker.sock \
--volume /etc/ssl/private-certs:/certs \
containrrr/watchtower \
--registry-ca /certs/my-registry-ca.pem \
--registry-ca-validate=true
```
+
This makes misconfiguration explicit during startup and is recommended for unattended deployments.

View file

@ -0,0 +1,186 @@
# Watchtower — Detailed Update Flow & Data Shapes
This file provides a precise, developer-oriented mapping of the update call chain and full data-shape details with file references to help maintenance and debugging.
Note: file paths are relative to the repository root.
## Entry points
- `main()``main.go`
- Sets default log level and calls `cmd.Execute()`.
- `cmd.Execute()` / Cobra root command — `cmd/root.go`
- `PreRun` configures flags, creates `container.Client`, sets registry flags (`registry.InsecureSkipVerify`, `registry.RegistryCABundle`) and may validate CA bundle.
- `runUpdatesWithNotifications` constructs `types.UpdateParams` and calls `internal/actions.Update`.
## Primary orchestration
- `internal/actions.Update(client container.Client, params types.UpdateParams) (types.Report, error)``internal/actions/update.go`
- High level steps:
1. Optional pre-checks: `pkg/lifecycle.ExecutePreChecks(client, params)` if `params.LifecycleHooks`.
2. Container discovery: `client.ListContainers(params.Filter)` (wrapper in `pkg/container/client.go`).
3. For each container:
- `client.IsContainerStale(container, params)` — defined in `pkg/container/client.go`.
- Pull logic: `client.PullImage(ctx, container)` (may skip via `container.IsNoPull(params)`).
- Digest optimization: `pkg/registry/digest.CompareDigest(container, registryAuth)`.
- Token flow: `pkg/registry/auth.GetToken``GetBearerHeader``GetAuthURL`.
- Token cache: see `pkg/registry/auth/auth.go` (`getCachedToken`, `storeToken`).
- HEAD request: `pkg/registry/digest.GetDigest` constructs `http.Client` with `digest.newTransport()`.
- `client.HasNewImage(ctx, container)` compares local and remote image IDs.
- `container.VerifyConfiguration()` to ensure image/container metadata is sufficient to recreate the container.
- Mark progress via `session.Progress` (`AddScanned`, `AddSkipped`), call `containers[i].SetStale(stale)`.
4. Sort by dependencies: `sorter.SortByDependencies(containers)`.
5. `UpdateImplicitRestart(containers)` sets `LinkedToRestarting` flags for dependent containers.
6. Build `containersToUpdate` (non-monitor-only) and mark for update in `Progress`.
7. Update execution:
- Rolling restart (`params.RollingRestart`): `performRollingRestart` stops and restarts each marked container in reverse order.
- Normal: `stopContainersInReversedOrder` then `restartContainersInSortedOrder`.
- Stop: `stopStaleContainer` optionally runs `lifecycle.ExecutePreUpdateCommand` and `client.StopContainer`.
- Restart: `restartStaleContainer` may `client.RenameContainer` (if self), `client.StartContainer`, then `lifecycle.ExecutePostUpdateCommand`.
8. Optional `cleanupImages(client, imageIDs)` when `params.Cleanup`.
9. Optional post-checks: `pkg/lifecycle.ExecutePostChecks(client, params)`.
10. Return `progress.Report()`.
## File-level locations (key functions)
- `internal/actions/update.go`
- `Update`, `performRollingRestart`, `stopContainersInReversedOrder`, `stopStaleContainer`, `restartContainersInSortedOrder`, `restartStaleContainer`, `UpdateImplicitRestart`.
- `pkg/container/client.go`
- `dockerClient.IsContainerStale`, `PullImage`, `HasNewImage`, `ListContainers`, `GetContainer`, `StopContainer`, `StartContainer`, `RenameContainer`, `RemoveImageByID`, `ExecuteCommand`.
- `pkg/container/container.go`
- Concrete `Container` struct and implementation of `types.Container`.
- `pkg/registry/auth/auth.go`
- `GetToken`, `GetBearerHeader`, token cache functions `getCachedToken` and `storeToken`.
- `pkg/registry/digest/digest.go`
- `CompareDigest`, `GetDigest`, `newTransport` (transport respects `registry.InsecureSkipVerify` and `registry.GetRegistryCertPool()`), `NewTransportForTest`.
- `pkg/registry/registry.go`
- `InsecureSkipVerify` (bool), `RegistryCABundle` (string), and `GetRegistryCertPool()`.
- `pkg/lifecycle/lifecycle.go`
- `ExecutePreChecks`, `ExecutePostChecks`, `ExecutePreUpdateCommand`, `ExecutePostUpdateCommand`.
- `pkg/session/progress.go` and `pkg/session/container_status.go`
- `Progress` (map) and `ContainerStatus` with fields and state enum.
## Data shapes — full details
Below are the main data shapes used in the update flow with fields and brief descriptions.
### types.UpdateParams (file: `pkg/types/update_params.go`)
```go
type UpdateParams struct {
Filter Filter // Filter applied to container selection
Cleanup bool // Whether to remove old images after update
NoRestart bool // Skip restarting containers
Timeout time.Duration// Timeout used when stopping containers / exec
MonitorOnly bool // Global monitor-only flag
NoPull bool // Global no-pull flag
LifecycleHooks bool // Enable lifecycle hook commands
RollingRestart bool // Use rolling restart strategy
LabelPrecedence bool // Prefers container labels over CLI flags
}
```
### container.Client interface (file: `pkg/container/client.go`)
Methods (signatures):
- `ListContainers(Filter) ([]types.Container, error)` — discover containers
- `GetContainer(containerID types.ContainerID) (types.Container, error)` — inspect container
- `StopContainer(types.Container, time.Duration) error`
- `StartContainer(types.Container) (types.ContainerID, error)`
- `RenameContainer(types.Container, string) error`
- `IsContainerStale(types.Container, types.UpdateParams) (bool, types.ImageID, error)`
- `ExecuteCommand(containerID types.ContainerID, command string, timeout int) (SkipUpdate bool, err error)`
- `RemoveImageByID(types.ImageID) error`
- `WarnOnHeadPullFailed(types.Container) bool`
### types.Container interface (file: `pkg/types/container.go`)
Key methods used during update: (method signatures only)
- `ContainerInfo() *types.ContainerJSON`
- `ID() ContainerID`
- `IsRunning() bool`
- `Name() string`
- `ImageID() ImageID`
- `SafeImageID() ImageID`
- `ImageName() string`
- `Enabled() (bool, bool)`
- `IsMonitorOnly(UpdateParams) bool`
- `Scope() (string, bool)`
- `Links() []string`
- `ToRestart() bool`
- `IsWatchtower() bool`
- `StopSignal() string`
- `HasImageInfo() bool`
- `ImageInfo() *types.ImageInspect`
- `GetLifecyclePreCheckCommand() string`
- `GetLifecyclePostCheckCommand() string`
- `GetLifecyclePreUpdateCommand() string`
- `GetLifecyclePostUpdateCommand() string`
- `VerifyConfiguration() error`
- `SetStale(bool)` / `IsStale() bool`
- `IsNoPull(UpdateParams) bool`
- `SetLinkedToRestarting(bool)` / `IsLinkedToRestarting() bool`
- `PreUpdateTimeout() int` / `PostUpdateTimeout() int`
- `IsRestarting() bool`
- `GetCreateConfig() *dockercontainer.Config` / `GetCreateHostConfig() *dockercontainer.HostConfig`
Concrete `Container` fields (file: `pkg/container/container.go`):
- `LinkedToRestarting bool`
- `Stale bool`
- `containerInfo *types.ContainerJSON`
- `imageInfo *types.ImageInspect`
### session.ContainerStatus (file: `pkg/session/container_status.go`)
Fields:
- `containerID types.ContainerID`
- `oldImage types.ImageID`
- `newImage types.ImageID`
- `containerName string`
- `imageName string`
- `error` (embedded error)
- `state session.State` (enum: Skipped/Scanned/Updated/Failed/Fresh/Stale)
`session.Progress` is `map[types.ContainerID]*ContainerStatus` and exposes helper methods: `AddScanned`, `AddSkipped`, `MarkForUpdate`, `UpdateFailed`, and `Report()` which returns a `types.Report`.
### types.TokenResponse (used by `pkg/registry/auth`) — inferred fields
- `Token string`
- `ExpiresIn int` (seconds)
### Registry TLS configuration (file: `pkg/registry/registry.go`)
- `var InsecureSkipVerify bool` — when true, `digest.newTransport()` sets `tls.Config{InsecureSkipVerify: true}`
- `var RegistryCABundle string` — path to PEM bundle; `GetRegistryCertPool()` reads/merges it into system roots
### Token cache (file: `pkg/registry/auth/auth.go`)
Implementation details:
- `type cachedToken struct { token string; expiresAt time.Time }`
- `var tokenCache = map[string]cachedToken{}` protected by `tokenCacheMu *sync.Mutex`
- `var now = time.Now` (overridable in tests)
- `getCachedToken(key string) string` returns token if present and not expired (deletes expired entries)
- `storeToken(key, token string, ttl int)` stores token with TTL (seconds), ttl<=0 => no expiry
- Cache key: full auth URL string (realm+service+scope)
## Transport behavior for digest HEAD & token requests
- `pkg/registry/digest.newTransport()` builds a `*http.Transport` that:
- Uses `http.ProxyFromEnvironment` and sane defaults for timeouts and connection pooling.
- If `registry.InsecureSkipVerify` is true, sets `TLSClientConfig = &tls.Config{InsecureSkipVerify: true}`.
- Else, if `registry.GetRegistryCertPool()` returns a non-nil pool, sets `TLSClientConfig = &tls.Config{RootCAs: pool}` (merges system roots + bundle).
## Edge cases and behavior notes
- If `container.VerifyConfiguration()` fails, container is marked skipped with the error logged and the update continues for other containers.
- If `lifecycle.ExecutePreUpdateCommand` returns `skipUpdate` (exit code 75), the container update is skipped.
- Watchtower self-update: the current watchtower container is renamed before starting the new container so the new container can reclaim the original name.
- Digest HEAD failures fall back to full `docker pull` and may log at `Warn` depending on `WarnOnHeadPullFailed`.
- Tokens are scoped per `repository:<path>:pull` — this prevents accidental reuse across repositories.
## How to use this doc
- Use the file references above to jump to implementations when changing behavior (e.g., token caching or TLS transport changes).
- For any change that affects pull/token behavior, update `pkg/registry/auth` tests and `pkg/registry/digest` tests, and run race-enabled tests.
If you want, I can also open a PR body (title + description + checklist) for you to paste into GitHub, or generate a patch file containing these new docs for you to push from your machine.

166
docs/update-flow.md Normal file
View file

@ -0,0 +1,166 @@
<!--
DO NOT EDIT: Generated documentation describing the Watchtower update flow.
This file contains the end-to-end flow, data shapes, and a mermaid diagram.
-->
# Watchtower Update Flow
This document explains the end-to-end update flow in the Watchtower codebase, including the main function call chain, the key data shapes, and diagrams (Mermaid & PlantUML).
## Quick Summary
- Trigger: CLI (`watchtower` start / scheduler / HTTP API update) constructs `types.UpdateParams` and calls `internal/actions.Update`.
- `internal/actions.Update` orchestrates discovery, stale detection, lifecycle hooks, stopping/restarting containers, cleanup and reporting.
- Image pull optimization uses a digest HEAD request (`pkg/registry/digest`) and a token flow (`pkg/registry/auth`) with an in-memory token cache.
- TLS for HEAD/token requests is secure-by-default and configurable via `--insecure-registry`, `--registry-ca`, and `--registry-ca-validate`.
---
## Call Chain (step-by-step)
1. CLI start / scheduler / HTTP API
- Entry points: `main()` -> `cmd.Execute()` -> Cobra command `Run` / `PreRun`.
- `cmd.PreRun` reads flags and config, sets `registry.InsecureSkipVerify` and `registry.RegistryCABundle`.
2. Run update
- `cmd.runUpdatesWithNotifications` builds `types.UpdateParams` and calls `internal/actions.Update(client, updateParams)`.
3. Orchestration: `internal/actions.Update`
- If `params.LifecycleHooks` -> `lifecycle.ExecutePreChecks(client, params)`
- Discover containers: `client.ListContainers(params.Filter)`
- For each container:
- `client.IsContainerStale(container, params)`
- calls `client.PullImage(ctx, container)` unless `container.IsNoPull(params)` is true
- `PullImage` obtains `types.ImagePullOptions` via `pkg/registry.GetPullOptions(image)`
- tries digest optimization: `pkg/registry/digest.CompareDigest(container, opts.RegistryAuth)`
- `auth.GetToken(container, registryAuth)` obtains a token:
- sends GET to the challenge URL (`/v2/`), inspects `WWW-Authenticate`
- for `Bearer`: constructs auth URL with `realm`, `service`, and `scope` (`repository:<path>:pull`)
- checks in-memory cache (`auth.getCachedToken(cacheKey)`) keyed by the auth URL
- if missing, requests token from auth URL (Basic header if Docker cred present), parses `types.TokenResponse` and calls `auth.storeToken(cacheKey, token, ExpiresIn)`
- `digest.GetDigest(manifestURL, token)` performs an HTTP `HEAD` using a transport created by `digest.newTransport()`
- transport respects `registry.InsecureSkipVerify` and uses `registry.GetRegistryCertPool()` when a CA bundle is provided
- If remote digest matches a local digest, `PullImage` skips the pull
- `client.HasNewImage(ctx, container)` compares local image ID with remote image ID
- `targetContainer.VerifyConfiguration()` (fail/skip logic)
- Mark scanned/skipped in `session.Progress` and set `container.SetStale(stale)`
- Sort containers: `sorter.SortByDependencies(containers)`
- `UpdateImplicitRestart(containers)` sets `LinkedToRestarting` flags
- Build `containersToUpdate` and mark them for update in `Progress`
- Update strategy:
- Rolling restart: `performRollingRestart(containersToUpdate, client, params)`
- `stopStaleContainer(c)` -> `restartStaleContainer(c)` per container
- Normal: `stopContainersInReversedOrder(...)` -> `restartContainersInSortedOrder(...)`
- `stopStaleContainer` runs `lifecycle.ExecutePreUpdateCommand` and `client.StopContainer`
- `restartStaleContainer` may `client.RenameContainer` (watchtower self), `client.StartContainer` and `lifecycle.ExecutePostUpdateCommand`
- If `params.Cleanup` -> `cleanupImages(client, imageIDs)` calls `client.RemoveImageByID`
- If `params.LifecycleHooks` -> `lifecycle.ExecutePostChecks(client, params)`
- Return `progress.Report()` (a `types.Report` implemented from `session.Progress`)
---
## Key data shapes
- `types.UpdateParams` (created in `cmd/runUpdatesWithNotifications`)
- `Filter` (types.Filter)
- `Cleanup bool`
- `NoRestart bool`
- `Timeout time.Duration`
- `MonitorOnly bool`
- `NoPull bool`
- `LifecycleHooks bool`
- `RollingRestart bool`
- `LabelPrecedence bool`
- `container.Client` interface (in `pkg/container/client.go`) — used by `actions.Update`
- `ListContainers(Filter) ([]types.Container, error)`
- `GetContainer(containerID) (types.Container, error)`
- `StopContainer(types.Container, time.Duration) error`
- `StartContainer(types.Container) (types.ContainerID, error)`
- `RenameContainer(types.Container, string) error`
- `IsContainerStale(types.Container, types.UpdateParams) (bool, types.ImageID, error)`
- `ExecuteCommand(containerID types.ContainerID, command string, timeout int) (SkipUpdate bool, err error)`
- `RemoveImageByID(types.ImageID) error`
- `WarnOnHeadPullFailed(types.Container) bool`
- `types.Container` interface (in `pkg/types/container.go`) — methods used include:
- `ID(), Name(), ImageName(), ImageID(), SafeImageID(), IsRunning(), IsRestarting()`
- `VerifyConfiguration() error`, `HasImageInfo() bool`, `ImageInfo() *types.ImageInspect`
- lifecycle hooks: `GetLifecyclePreUpdateCommand(), GetLifecyclePostUpdateCommand(), PreUpdateTimeout(), PostUpdateTimeout()`
- flags: `IsNoPull(UpdateParams), IsMonitorOnly(UpdateParams), ToRestart(), IsWatchtower()`
- `session.Progress` and `session.ContainerStatus` (reporting)
- `Progress` is a map `map[types.ContainerID]*ContainerStatus`
- `ContainerStatus` fields: `containerID, containerName, imageName, oldImage, newImage, error, state`
- `Progress.Report()` returns a `types.Report` implementation
- `types.TokenResponse` (used by `pkg/registry/auth`) contains `Token string` and `ExpiresIn int` (seconds)
---
## Diagrams
Mermaid sequence diagram (embedded):
```mermaid
sequenceDiagram
participant CLI as CLI / Scheduler / HTTP API
participant CMD as cmd
participant ACT as internal/actions.Update
participant CLIENT as container.Client (docker wrapper)
participant DIG as pkg/registry/digest
participant AUTH as pkg/registry/auth
participant REG as pkg/registry (TLS config)
participant DOCKER as Docker Engine
CLI->>CMD: trigger runUpdatesWithNotifications()
CMD->>ACT: Update(client, UpdateParams)
ACT->>CLIENT: ListContainers(filter)
loop per container
ACT->>CLIENT: IsContainerStale(container, params)
CLIENT->>CLIENT: PullImage (maybe)
CLIENT->>DIG: CompareDigest(container, registryAuth)
DIG->>AUTH: GetToken(challenge)
AUTH->>AUTH: getCachedToken / storeToken
DIG->>REG: newTransport() (uses --insecure-registry / --registry-ca)
DIG->>DOCKER: HEAD manifest with token
alt digest matches
CLIENT-->>ACT: no pull needed
else
CLIENT->>DOCKER: ImagePull(image)
end
CLIENT-->>ACT: HasNewImage -> stale/ newestImage
end
ACT->>ACT: SortByDependencies
ACT->>CLIENT: StopContainer / StartContainer (with lifecycle hooks)
ACT->>CLIENT: RemoveImageByID (cleanup)
ACT-->>CMD: progress.Report()
```
For reference, a PlantUML source for the same sequence is available in `docs/diagrams/update-flow.puml`.
---
## Security & operational notes
- TLS: registry HEAD and token requests are secure-by-default. Use `--registry-ca` to add private CAs, and `--registry-ca-validate` to fail fast on bad bundles. Avoid `--insecure-registry` except for testing.
- Token cache: tokens are cached per auth URL (realm+service+scope). Tokens with `ExpiresIn` are cached for that TTL. No persistent or distributed cache is provided.
- Digest HEAD optimization avoids pulls and unnecessary rate consumption when possible. DockerHub/GHCR may rate-limit HEAD or behave differently; the code includes a `WarnOnAPIConsumption` heuristic.
---
## Where to look in the code
- Orchestration: `internal/actions/update.go`
- CLI wiring: `cmd/root.go`, `internal/flags/flags.go`
- Container wrapper: `pkg/container/client.go`, `pkg/container/container.go`
- Digest & transport: `pkg/registry/digest/digest.go`
- Token & auth handling: `pkg/registry/auth/auth.go`
- TLS helpers: `pkg/registry/registry.go`
- Lifecycle hooks: `pkg/lifecycle/lifecycle.go`
- Session/reporting: `pkg/session/*`, `pkg/types/report.go`
---
If you'd like, I can also open a branch and create a PR with these files, or convert the PlantUML into an SVG and add it to the docs site.
End of document.

View file

@ -385,6 +385,21 @@ Should only be used for testing.`)
envString("WATCHTOWER_WARN_ON_HEAD_FAILURE"),
"When to warn about HEAD pull requests failing. Possible values: always, auto or never")
flags.Bool(
"insecure-registry",
envBool("WATCHTOWER_INSECURE_REGISTRY"),
"Disable TLS verification when contacting registries for HEAD/manifest requests (INSECURE; use only for testing)")
flags.String(
"registry-ca",
envString("WATCHTOWER_REGISTRY_CA"),
"Path to a PEM encoded CA certificate bundle to trust for private registries")
flags.Bool(
"registry-ca-validate",
envBool("WATCHTOWER_REGISTRY_CA_VALIDATE"),
"If set, watchtower will fail to start if the provided registry CA bundle cannot be loaded or parsed")
flags.Bool(
"notification-log-stdout",
envBool("WATCHTOWER_NOTIFICATION_LOG_STDOUT"),

View file

@ -8,6 +8,8 @@ import (
"net/http"
"net/url"
"strings"
"sync"
"time"
"github.com/containrrr/watchtower/pkg/registry/helpers"
"github.com/containrrr/watchtower/pkg/types"
@ -75,12 +77,20 @@ func GetChallengeRequest(URL url.URL) (*http.Request, error) {
// GetBearerHeader tries to fetch a bearer token from the registry based on the challenge instructions
func GetBearerHeader(challenge string, imageRef ref.Named, registryAuth string) (string, error) {
client := http.Client{}
authURL, err := GetAuthURL(challenge, imageRef)
authURL, err := GetAuthURL(challenge, imageRef)
if err != nil {
return "", err
}
// Build cache key from the auth realm, service and scope
cacheKey := authURL.String()
// Check cache first
if token := getCachedToken(cacheKey); token != "" {
return fmt.Sprintf("Bearer %s", token), nil
}
var r *http.Request
if r, err = http.NewRequest("GET", authURL.String(), nil); err != nil {
return "", err
@ -88,8 +98,6 @@ func GetBearerHeader(challenge string, imageRef ref.Named, registryAuth string)
if registryAuth != "" {
logrus.Debug("Credentials found.")
// CREDENTIAL: Uncomment to log registry credentials
// logrus.Tracef("Credentials: %v", registryAuth)
r.Header.Add("Authorization", fmt.Sprintf("Basic %s", registryAuth))
} else {
logrus.Debug("No credentials found.")
@ -99,6 +107,7 @@ func GetBearerHeader(challenge string, imageRef ref.Named, registryAuth string)
if authResponse, err = client.Do(r); err != nil {
return "", err
}
defer authResponse.Body.Close()
body, _ := io.ReadAll(authResponse.Body)
tokenResponse := &types.TokenResponse{}
@ -108,9 +117,54 @@ func GetBearerHeader(challenge string, imageRef ref.Named, registryAuth string)
return "", err
}
// Cache token if ExpiresIn provided
if tokenResponse.Token != "" {
storeToken(cacheKey, tokenResponse.Token, tokenResponse.ExpiresIn)
}
return fmt.Sprintf("Bearer %s", tokenResponse.Token), nil
}
// token cache implementation
type cachedToken struct {
token string
expiresAt time.Time
}
var (
tokenCache = map[string]cachedToken{}
tokenCacheMu = &sync.Mutex{}
)
// now is a package-level function returning current time. It is a variable so tests
// can override it for deterministic behavior.
var now = time.Now
// getCachedToken returns token string if present and not expired, otherwise empty
func getCachedToken(key string) string {
tokenCacheMu.Lock()
defer tokenCacheMu.Unlock()
if ct, ok := tokenCache[key]; ok {
if ct.expiresAt.IsZero() || now().Before(ct.expiresAt) {
return ct.token
}
// expired
delete(tokenCache, key)
}
return ""
}
// storeToken stores token with optional ttl (seconds). ttl<=0 means no expiry.
func storeToken(key, token string, ttl int) {
tokenCacheMu.Lock()
defer tokenCacheMu.Unlock()
ct := cachedToken{token: token}
if ttl > 0 {
ct.expiresAt = now().Add(time.Duration(ttl) * time.Second)
}
tokenCache[key] = ct
}
// GetAuthURL from the instructions in the challenge
func GetAuthURL(challenge string, imageRef ref.Named) (*url.URL, error) {
loweredChallenge := strings.ToLower(challenge)

View file

@ -0,0 +1,101 @@
package auth
import (
"sync"
"testing"
"time"
)
// Test concurrent stores and gets to ensure the mutex protects the cache
func TestTokenCacheConcurrentStoreAndGet(t *testing.T) {
// reset cache safely
tokenCacheMu.Lock()
tokenCache = map[string]cachedToken{}
tokenCacheMu.Unlock()
origNow := now
defer func() { now = origNow }()
now = time.Now
key := "concurrent-key"
token := "tok-concurrent"
var wg sync.WaitGroup
storeers := 50
getters := 50
iters := 100
for i := 0; i < storeers; i++ {
wg.Add(1)
go func() {
defer wg.Done()
for j := 0; j < iters; j++ {
storeToken(key, token, 0)
}
}()
}
for i := 0; i < getters; i++ {
wg.Add(1)
go func() {
defer wg.Done()
for j := 0; j < iters; j++ {
_ = getCachedToken(key)
}
}()
}
wg.Wait()
if got := getCachedToken(key); got != token {
t.Fatalf("expected token %q, got %q", token, got)
}
}
// Test concurrent access while token expires: readers run while time is advanced
func TestTokenCacheConcurrentExpiry(t *testing.T) {
// reset cache safely
tokenCacheMu.Lock()
tokenCache = map[string]cachedToken{}
tokenCacheMu.Unlock()
// Make now controllable and thread-safe
origNow := now
defer func() { now = origNow }()
base := time.Now()
var mu sync.Mutex
current := base
now = func() time.Time {
mu.Lock()
defer mu.Unlock()
return current
}
key := "concurrent-expire"
storeToken(key, "t", 1)
var wg sync.WaitGroup
readers := 100
for i := 0; i < readers; i++ {
wg.Add(1)
go func() {
defer wg.Done()
for j := 0; j < 100; j++ {
_ = getCachedToken(key)
}
}()
}
// advance time beyond ttl
mu.Lock()
current = current.Add(2 * time.Second)
mu.Unlock()
wg.Wait()
if got := getCachedToken(key); got != "" {
t.Fatalf("expected token to be expired, got %q", got)
}
}

View file

@ -0,0 +1,54 @@
package auth
import (
"testing"
"time"
)
func TestTokenCacheStoreAndGetHitAndMiss(t *testing.T) {
// save and restore original now
origNow := now
defer func() { now = origNow }()
// deterministic fake time
base := time.Date(2025, time.November, 13, 12, 0, 0, 0, time.UTC)
now = func() time.Time { return base }
key := "https://auth.example.com/?service=example&scope=repository:repo:pull"
// ensure empty at start
if got := getCachedToken(key); got != "" {
t.Fatalf("expected empty cache initially, got %q", got)
}
// store with no expiry (ttl <= 0)
storeToken(key, "tok-123", 0)
if got := getCachedToken(key); got != "tok-123" {
t.Fatalf("expected token tok-123, got %q", got)
}
}
func TestTokenCacheExpiry(t *testing.T) {
// save and restore original now
origNow := now
defer func() { now = origNow }()
// deterministic fake time that can be moved forward
base := time.Date(2025, time.November, 13, 12, 0, 0, 0, time.UTC)
current := base
now = func() time.Time { return current }
key := "https://auth.example.com/?service=example&scope=repository:repo2:pull"
// store with short ttl (1 second)
storeToken(key, "short-tok", 1)
if got := getCachedToken(key); got != "short-tok" {
t.Fatalf("expected token short-tok immediately after store, got %q", got)
}
// advance time beyond ttl
current = current.Add(2 * time.Second)
if got := getCachedToken(key); got != "" {
t.Fatalf("expected token to be expired and removed, got %q", got)
}
}

View file

@ -12,6 +12,7 @@ import (
"time"
"github.com/containrrr/watchtower/internal/meta"
"github.com/containrrr/watchtower/pkg/registry"
"github.com/containrrr/watchtower/pkg/registry/auth"
"github.com/containrrr/watchtower/pkg/registry/manifest"
"github.com/containrrr/watchtower/pkg/types"
@ -76,19 +77,7 @@ func TransformAuth(registryAuth string) string {
// GetDigest from registry using a HEAD request to prevent rate limiting
func GetDigest(url string, token string) (string, error) {
tr := &http.Transport{
Proxy: http.ProxyFromEnvironment,
DialContext: (&net.Dialer{
Timeout: 30 * time.Second,
KeepAlive: 30 * time.Second,
}).DialContext,
ForceAttemptHTTP2: true,
MaxIdleConns: 100,
IdleConnTimeout: 90 * time.Second,
TLSHandshakeTimeout: 10 * time.Second,
ExpectContinueTimeout: 1 * time.Second,
TLSClientConfig: &tls.Config{InsecureSkipVerify: true},
}
tr := newTransport()
client := &http.Client{Transport: tr}
req, _ := http.NewRequest("HEAD", url, nil)
@ -124,3 +113,35 @@ func GetDigest(url string, token string) (string, error) {
}
return res.Header.Get(ContentDigestHeader), nil
}
// newTransport constructs an *http.Transport used for registry HEAD/token requests.
// It respects the package-level `registry.InsecureSkipVerify` toggle.
func newTransport() *http.Transport {
tr := &http.Transport{
Proxy: http.ProxyFromEnvironment,
DialContext: (&net.Dialer{
Timeout: 30 * time.Second,
KeepAlive: 30 * time.Second,
}).DialContext,
ForceAttemptHTTP2: true,
MaxIdleConns: 100,
IdleConnTimeout: 90 * time.Second,
TLSHandshakeTimeout: 10 * time.Second,
ExpectContinueTimeout: 1 * time.Second,
}
certPool := registry.GetRegistryCertPool()
if registry.InsecureSkipVerify {
// Insecure mode requested: disable verification entirely
tr.TLSClientConfig = &tls.Config{InsecureSkipVerify: true}
} else if certPool != nil {
// Create TLS config with custom root CAs merged into system pool
tr.TLSClientConfig = &tls.Config{RootCAs: certPool}
}
return tr
}
// NewTransportForTest exposes the transport construction for unit tests.
func NewTransportForTest() *http.Transport {
return newTransport()
}

View file

@ -0,0 +1,27 @@
package digest_test
import (
"github.com/containrrr/watchtower/pkg/registry"
"github.com/containrrr/watchtower/pkg/registry/digest"
. "github.com/onsi/ginkgo"
. "github.com/onsi/gomega"
)
var _ = Describe("Digest transport configuration", func() {
AfterEach(func() {
// Reset to default after each test
registry.InsecureSkipVerify = false
})
It("should have nil TLSClientConfig by default", func() {
registry.InsecureSkipVerify = false
tr := digest.NewTransportForTest()
Expect(tr.TLSClientConfig).To(BeNil())
})
It("should set TLSClientConfig when insecure flag is true", func() {
registry.InsecureSkipVerify = true
tr := digest.NewTransportForTest()
Expect(tr.TLSClientConfig).ToNot(BeNil())
})
})

View file

@ -1,6 +1,9 @@
package registry
import (
"crypto/x509"
"io/ioutil"
"github.com/containrrr/watchtower/pkg/registry/helpers"
watchtowerTypes "github.com/containrrr/watchtower/pkg/types"
ref "github.com/distribution/reference"
@ -8,6 +11,18 @@ import (
log "github.com/sirupsen/logrus"
)
// InsecureSkipVerify controls whether registry HTTPS connections used for
// manifest HEAD/token requests disable certificate verification. Default is false.
// This is exposed so callers (e.g. CLI flag handling) can toggle it.
var InsecureSkipVerify = false
// RegistryCABundle is an optional filesystem path to a PEM bundle that will be
// used as additional trusted CAs when validating registry TLS certificates.
var RegistryCABundle string
// registryCertPool caches the loaded cert pool when RegistryCABundle is set
var registryCertPool *x509.CertPool
// GetPullOptions creates a struct with all options needed for pulling images from a registry
func GetPullOptions(imageName string) (types.ImagePullOptions, error) {
auth, err := EncodedAuth(imageName)
@ -59,3 +74,29 @@ func WarnOnAPIConsumption(container watchtowerTypes.Container) bool {
return false
}
// GetRegistryCertPool returns a cert pool that includes system roots plus any
// additional CAs provided via RegistryCABundle. The resulting pool is cached.
func GetRegistryCertPool() *x509.CertPool {
if RegistryCABundle == "" {
return nil
}
if registryCertPool != nil {
return registryCertPool
}
// Try to load file
data, err := ioutil.ReadFile(RegistryCABundle)
if err != nil {
log.WithField("path", RegistryCABundle).Errorf("Failed to load registry CA bundle: %v", err)
return nil
}
pool, err := x509.SystemCertPool()
if err != nil || pool == nil {
pool = x509.NewCertPool()
}
if ok := pool.AppendCertsFromPEM(data); !ok {
log.WithField("path", RegistryCABundle).Warn("No certs appended from registry CA bundle; file may be empty or invalid PEM")
}
registryCertPool = pool
return registryCertPool
}

View file

@ -2,5 +2,6 @@ package types
// TokenResponse is returned by the registry on successful authentication
type TokenResponse struct {
Token string `json:"token"`
Token string `json:"token"`
ExpiresIn int `json:"expires_in,omitempty"`
}