From e1f67fc3d091ee6047cd42d38cf3c0ae6b5e0e4d Mon Sep 17 00:00:00 2001 From: kalvinparker <106995826+kalvinparker@users.noreply.github.com> Date: Fri, 14 Nov 2025 14:30:37 +0000 Subject: [PATCH] feat(registry): add support for custom CA certificates and TLS validation - Introduced `--registry-ca` and `--registry-ca-validate` flags for configuring TLS verification with private registries. - Implemented in-memory token caching with expiration handling. - Updated documentation to reflect new CLI options and usage examples. - Added tests for token cache concurrency and expiry behavior. --- .github/workflows/race.yml | 37 ++++ CHANGELOG.md | 7 + README.md | 15 ++ cmd/root.go | 25 +++ docs/SUMMARY_CHECKPOINT.md | 29 +++ docs/arguments.md | 26 +++ docs/diagrams/update-flow.puml | 46 +++++ docs/index.md | 14 ++ docs/private-registries.md | 42 +++++ docs/update-flow.md | 166 ++++++++++++++++++ internal/flags/flags.go | 15 ++ pkg/registry/auth/auth.go | 60 ++++++- .../auth/auth_cache_concurrency_test.go | 101 +++++++++++ pkg/registry/auth/auth_cache_test.go | 54 ++++++ pkg/registry/digest/digest.go | 47 +++-- pkg/registry/digest/digest_transport_test.go | 27 +++ pkg/registry/registry.go | 41 +++++ pkg/types/token_response.go | 3 +- 18 files changed, 738 insertions(+), 17 deletions(-) create mode 100644 .github/workflows/race.yml create mode 100644 CHANGELOG.md create mode 100644 docs/SUMMARY_CHECKPOINT.md create mode 100644 docs/diagrams/update-flow.puml create mode 100644 docs/update-flow.md create mode 100644 pkg/registry/auth/auth_cache_concurrency_test.go create mode 100644 pkg/registry/auth/auth_cache_test.go create mode 100644 pkg/registry/digest/digest_transport_test.go diff --git a/.github/workflows/race.yml b/.github/workflows/race.yml new file mode 100644 index 0000000..24537f1 --- /dev/null +++ b/.github/workflows/race.yml @@ -0,0 +1,37 @@ +name: Race Detector + +on: + workflow_dispatch: {} + pull_request: + branches: + - main + +jobs: + race: + name: Run tests with race detector + runs-on: ubuntu-latest + strategy: + matrix: + go-version: [1.20.x] + steps: + - name: Checkout + uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - name: Set up Go + uses: actions/setup-go@v4 + with: + go-version: ${{ matrix.go-version }} + + - name: Install build tools (for cgo / race detector) + run: | + sudo apt-get update + sudo apt-get install -y build-essential + + - name: Ensure CGO enabled + run: echo "CGO_ENABLED=1" >> $GITHUB_ENV + + - name: Run tests with race detector + run: | + go test -race ./... -v diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 0000000..d4b3dec --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,7 @@ +# Changelog + +All notable changes to this project will be documented in this file. + +## [Unreleased] + +- Add `--registry-ca-validate` flag: when supplied with `--registry-ca`, Watchtower can validate the provided CA bundle on startup and fail fast on misconfiguration. Prefer using this over `--insecure-registry` in production. diff --git a/README.md b/README.md index f550302..1a39cad 100644 --- a/README.md +++ b/README.md @@ -33,6 +33,21 @@ $ docker run --detach \ Watchtower is intended to be used in homelabs, media centers, local dev environments, and similar. We do **not** recommend using Watchtower in a commercial or production environment. If that is you, you should be looking into using Kubernetes. If that feels like too big a step for you, please look into solutions like [MicroK8s](https://microk8s.io/) and [k3s](https://k3s.io/) that take away a lot of the toil of running a Kubernetes cluster. +### Using a custom registry CA (private registries) + +If you run Watchtower against a private registry that uses a custom TLS certificate, provide the CA bundle and enable validation at startup so Watchtower fails fast on misconfiguration: + +``` +$ docker run --detach \ + --name watchtower \ + --volume /var/run/docker.sock:/var/run/docker.sock \ + containrrr/watchtower \ + --registry-ca /etc/ssl/certs/my-registry-ca.pem \ + --registry-ca-validate=true +``` + +Prefer providing a CA bundle and enabling `--registry-ca-validate` over disabling TLS verification with `--insecure-registry` in production environments. + ## Documentation The full documentation is available at https://containrrr.dev/watchtower. diff --git a/cmd/root.go b/cmd/root.go index eef13ce..c744dcd 100644 --- a/cmd/root.go +++ b/cmd/root.go @@ -20,6 +20,7 @@ import ( "github.com/containrrr/watchtower/pkg/container" "github.com/containrrr/watchtower/pkg/filters" "github.com/containrrr/watchtower/pkg/metrics" + "github.com/containrrr/watchtower/pkg/registry" "github.com/containrrr/watchtower/pkg/notifications" t "github.com/containrrr/watchtower/pkg/types" "github.com/robfig/cron" @@ -118,6 +119,30 @@ func PreRun(cmd *cobra.Command, _ []string) { removeVolumes, _ := f.GetBool("remove-volumes") warnOnHeadPullFailed, _ := f.GetString("warn-on-head-failure") + // Configure TLS verification for registry HEAD/token requests. Default is secure (verify certs). + insecureRegistry, _ := f.GetBool("insecure-registry") + registry.InsecureSkipVerify = insecureRegistry + if insecureRegistry { + log.Warn("TLS certificate verification for registry requests is disabled (insecure). This should only be used for testing.)") + } + + registryCABundle, _ := f.GetString("registry-ca") + if registryCABundle != "" { + registry.RegistryCABundle = registryCABundle + log.Debugf("Using registry CA bundle: %s", registryCABundle) + } + + // Optionally validate CA bundle at startup + validateCABundle, _ := f.GetBool("registry-ca-validate") + if validateCABundle && registry.RegistryCABundle != "" { + if pool := registry.GetRegistryCertPool(); pool == nil { + log.Fatalf("Failed to validate registry CA bundle at %s", registry.RegistryCABundle) + } + log.Info("Registry CA bundle validated successfully") + } else if validateCABundle && registry.RegistryCABundle == "" { + log.Fatalf("--registry-ca-validate was set but no --registry-ca was provided") + } + if monitorOnly && noPull { log.Warn("Using `WATCHTOWER_NO_PULL` and `WATCHTOWER_MONITOR_ONLY` simultaneously might lead to no action being taken at all. If this is intentional, you may safely ignore this message.") } diff --git a/docs/SUMMARY_CHECKPOINT.md b/docs/SUMMARY_CHECKPOINT.md new file mode 100644 index 0000000..0c6c52c --- /dev/null +++ b/docs/SUMMARY_CHECKPOINT.md @@ -0,0 +1,29 @@ +# Summary Checkpoint + +This file marks a checkpoint for summarizing repository changes. + +All future requests that ask to "summarise all the changes thus far" should consider +only changes made after this checkpoint was created. + +Checkpoint timestamp (UTC): 2025-11-13T12:00:00Z + +Notes: +- Purpose: act as a stable anchor so that subsequent "summarise all the changes thus far" + requests will include only modifications after this point. +- Location: `docs/SUMMARY_CHECKPOINT.md` + +Recent delta (since previous checkpoint): + +- Added CLI flags and wiring: `--registry-ca` and `--registry-ca-validate` (startup validation). +- Implemented secure-by-default registry transport behavior and support for a custom CA bundle. +- Introduced an in-memory bearer token cache (honors `expires_in`) and refactored time usage + to allow deterministic tests via an injectable `now` function. +- Added deterministic unit tests for the token cache (`pkg/registry/auth/auth_cache_test.go`). +- Added quickstart documentation snippets to `README.md`, `docs/index.md`, and + `docs/private-registries.md` showing `--registry-ca` + `--registry-ca-validate`. +- Created `CHANGELOG.md` with an Unreleased entry for the new `--registry-ca-validate` flag. +- Ran package tests locally: `pkg/registry/auth` and `pkg/registry/digest` — tests passed + (some integration tests were skipped due to missing credentials). + +If you want the next checkpoint after more changes (e.g., mapping the update call chain, +documenting data shapes, or adding concurrency tests), request another summary break. diff --git a/docs/arguments.md b/docs/arguments.md index d7ed0b0..1cf7c2d 100644 --- a/docs/arguments.md +++ b/docs/arguments.md @@ -460,8 +460,34 @@ Alias for: --notification-report --notification-template porcelain.VERSION.summary-no-log + Argument: --porcelain, -P Environment Variable: WATCHTOWER_PORCELAIN Possible values: v1 Default: - ``` + +## Registry TLS options + +Options to configure TLS verification when Watchtower talks to image registries. + +```text + Argument: --insecure-registry +Environment Variable: WATCHTOWER_INSECURE_REGISTRY + Type: Boolean + Default: false +``` + +```text + Argument: --registry-ca +Environment Variable: WATCHTOWER_REGISTRY_CA + Type: String (path to PEM bundle inside container) + Default: - +``` + +```text + Argument: --registry-ca-validate +Environment Variable: WATCHTOWER_REGISTRY_CA_VALIDATE + Type: Boolean + Default: false +``` diff --git a/docs/diagrams/update-flow.puml b/docs/diagrams/update-flow.puml new file mode 100644 index 0000000..44ed6e7 --- /dev/null +++ b/docs/diagrams/update-flow.puml @@ -0,0 +1,46 @@ +@startuml +title Watchtower Update Flow +actor User as CLI +participant "cmd (root)" as CMD +participant "internal/actions.Update" as ACT +participant "container.Client" as CLIENT +participant "pkg/registry/digest" as DIG +participant "pkg/registry/auth" as AUTH +participant "pkg/registry" as REG +database "Docker Engine" as DOCKER + +CLI -> CMD: trigger runUpdatesWithNotifications() +CMD -> ACT: Update(client, UpdateParams) +ACT -> CLIENT: ListContainers(filter) +loop per container + ACT -> CLIENT: IsContainerStale(container, params) + CLIENT -> CLIENT: PullImage (maybe) + CLIENT -> DIG: CompareDigest(container, registryAuth) + DIG -> AUTH: GetToken(challenge) + AUTH -> AUTH: getCachedToken / storeToken + DIG -> REG: newTransport() (uses --insecure-registry / --registry-ca) + DIG -> DOCKER: HEAD manifest with token + alt digest matches + CLIENT --> ACT: no pull needed + else + CLIENT -> DOCKER: ImagePull(image) + end + CLIENT --> ACT: HasNewImage -> stale/newestImage +end +ACT -> ACT: SortByDependencies +ACT -> CLIENT: StopContainer / StartContainer (with lifecycle hooks) +ACT -> CLIENT: RemoveImageByID (cleanup) +ACT --> CMD: progress.Report() + +note right of AUTH + Tokens are cached by auth URL (realm+service+scope) + ExpiresIn (seconds) sets TTL when provided +end note + +note left of REG + TLS is secure-by-default + `--registry-ca` provides PEM bundle + `--registry-ca-validate` fails startup on invalid bundle +end note + +@enduml diff --git a/docs/index.md b/docs/index.md index 1d0b2cc..7a16638 100644 --- a/docs/index.md +++ b/docs/index.md @@ -63,3 +63,17 @@ the following command: volumes: - /var/run/docker.sock:/var/run/docker.sock ``` + +Quick note: if your registry uses a custom TLS certificate, mount the CA bundle and enable startup validation so Watchtower fails fast on misconfiguration: + +```bash +docker run --detach \ + --name watchtower \ + --volume /var/run/docker.sock:/var/run/docker.sock \ + --volume /etc/ssl/private-certs:/certs \ + containrrr/watchtower \ + --registry-ca /certs/my-registry-ca.pem \ + --registry-ca-validate=true +``` ++ +Prefer this over `--insecure-registry` for production. diff --git a/docs/private-registries.md b/docs/private-registries.md index 5367a8c..d4cff94 100644 --- a/docs/private-registries.md +++ b/docs/private-registries.md @@ -205,3 +205,45 @@ A few additional notes: 4. An alternative to adding the various variables is to create a ~/.aws/config and ~/.aws/credentials files and place the settings there, then mount the ~/.aws directory to / in the container. + +## Token caching and required scopes + +Watchtower attempts to minimize calls to registry auth endpoints by caching short-lived bearer tokens when available. + +- Token cache: When Watchtower requests a bearer token from a registry auth endpoint, it will cache the token in-memory keyed by the auth realm + service + scope. If the token response includes an `expires_in` field, Watchtower will honor it and refresh the token only after expiry. This reduces load and rate-limit pressure on registry auth servers. + +- Required scope: Watchtower requests tokens with the following scope format: `repository::pull`. This is sufficient for read-only operations required by Watchtower (HEAD or pull). For registries enforcing fine-grained scopes, ensure the provided credentials can request tokens with `pull` scope for the repositories you want to monitor. + +- Credential sources: Watchtower supports these sources (in priority order): + 1. Environment variables: `REPO_USER` and `REPO_PASS`. + 2. Docker config file (`DOCKER_CONFIG` path or default location, typically `/root/.docker/config.json` when running in container) including support for credential helpers and native stores. + +When possible, prefer using short-lived tokens or credential helpers and avoid embedding long-lived plaintext credentials in environment variables. + +### Providing a custom CA bundle + +For private registries using certificates signed by an internal CA, prefer providing a PEM encoded CA bundle to disable verification bypassing. Use the `--registry-ca` flag or the `WATCHTOWER_REGISTRY_CA` environment variable to point to a file inside the container with one or more PEM encoded certificates. Watchtower will merge the provided bundle with system roots and validate registry certificates accordingly. + +Example (docker run): + +```bash +docker run -v /etc/ssl/private-certs:/certs -e WATCHTOWER_REGISTRY_CA=/certs/my-registry-ca.pem containrrr/watchtower +``` + +This is the recommended approach instead of `--insecure-registry` for production deployments. + +#### Quick example: validate CA at startup + +If you want Watchtower to fail fast when the provided CA bundle is invalid or missing, mount the CA into the container and enable validation: + +```bash +docker run --detach \ + --name watchtower \ + --volume /var/run/docker.sock:/var/run/docker.sock \ + --volume /etc/ssl/private-certs:/certs \ + containrrr/watchtower \ + --registry-ca /certs/my-registry-ca.pem \ + --registry-ca-validate=true +``` ++ +This makes misconfiguration explicit during startup and is recommended for unattended deployments. diff --git a/docs/update-flow.md b/docs/update-flow.md new file mode 100644 index 0000000..8832352 --- /dev/null +++ b/docs/update-flow.md @@ -0,0 +1,166 @@ + +# Watchtower Update Flow + +This document explains the end-to-end update flow in the Watchtower codebase, including the main function call chain, the key data shapes, and diagrams (Mermaid & PlantUML). + +## Quick Summary + +- Trigger: CLI (`watchtower` start / scheduler / HTTP API update) constructs `types.UpdateParams` and calls `internal/actions.Update`. +- `internal/actions.Update` orchestrates discovery, stale detection, lifecycle hooks, stopping/restarting containers, cleanup and reporting. +- Image pull optimization uses a digest HEAD request (`pkg/registry/digest`) and a token flow (`pkg/registry/auth`) with an in-memory token cache. +- TLS for HEAD/token requests is secure-by-default and configurable via `--insecure-registry`, `--registry-ca`, and `--registry-ca-validate`. + +--- + +## Call Chain (step-by-step) + +1. CLI start / scheduler / HTTP API + - Entry points: `main()` -> `cmd.Execute()` -> Cobra command `Run` / `PreRun`. + - `cmd.PreRun` reads flags and config, sets `registry.InsecureSkipVerify` and `registry.RegistryCABundle`. + +2. Run update + - `cmd.runUpdatesWithNotifications` builds `types.UpdateParams` and calls `internal/actions.Update(client, updateParams)`. + +3. Orchestration: `internal/actions.Update` + - If `params.LifecycleHooks` -> `lifecycle.ExecutePreChecks(client, params)` + - Discover containers: `client.ListContainers(params.Filter)` + - For each container: + - `client.IsContainerStale(container, params)` + - calls `client.PullImage(ctx, container)` unless `container.IsNoPull(params)` is true + - `PullImage` obtains `types.ImagePullOptions` via `pkg/registry.GetPullOptions(image)` + - tries digest optimization: `pkg/registry/digest.CompareDigest(container, opts.RegistryAuth)` + - `auth.GetToken(container, registryAuth)` obtains a token: + - sends GET to the challenge URL (`/v2/`), inspects `WWW-Authenticate` + - for `Bearer`: constructs auth URL with `realm`, `service`, and `scope` (`repository::pull`) + - checks in-memory cache (`auth.getCachedToken(cacheKey)`) keyed by the auth URL + - if missing, requests token from auth URL (Basic header if Docker cred present), parses `types.TokenResponse` and calls `auth.storeToken(cacheKey, token, ExpiresIn)` + - `digest.GetDigest(manifestURL, token)` performs an HTTP `HEAD` using a transport created by `digest.newTransport()` + - transport respects `registry.InsecureSkipVerify` and uses `registry.GetRegistryCertPool()` when a CA bundle is provided + - If remote digest matches a local digest, `PullImage` skips the pull + - `client.HasNewImage(ctx, container)` compares local image ID with remote image ID + - `targetContainer.VerifyConfiguration()` (fail/skip logic) + - Mark scanned/skipped in `session.Progress` and set `container.SetStale(stale)` + - Sort containers: `sorter.SortByDependencies(containers)` + - `UpdateImplicitRestart(containers)` sets `LinkedToRestarting` flags + - Build `containersToUpdate` and mark them for update in `Progress` + - Update strategy: + - Rolling restart: `performRollingRestart(containersToUpdate, client, params)` + - `stopStaleContainer(c)` -> `restartStaleContainer(c)` per container + - Normal: `stopContainersInReversedOrder(...)` -> `restartContainersInSortedOrder(...)` + - `stopStaleContainer` runs `lifecycle.ExecutePreUpdateCommand` and `client.StopContainer` + - `restartStaleContainer` may `client.RenameContainer` (watchtower self), `client.StartContainer` and `lifecycle.ExecutePostUpdateCommand` + - If `params.Cleanup` -> `cleanupImages(client, imageIDs)` calls `client.RemoveImageByID` + - If `params.LifecycleHooks` -> `lifecycle.ExecutePostChecks(client, params)` + - Return `progress.Report()` (a `types.Report` implemented from `session.Progress`) + +--- + +## Key data shapes + +- `types.UpdateParams` (created in `cmd/runUpdatesWithNotifications`) + - `Filter` (types.Filter) + - `Cleanup bool` + - `NoRestart bool` + - `Timeout time.Duration` + - `MonitorOnly bool` + - `NoPull bool` + - `LifecycleHooks bool` + - `RollingRestart bool` + - `LabelPrecedence bool` + +- `container.Client` interface (in `pkg/container/client.go`) — used by `actions.Update` + - `ListContainers(Filter) ([]types.Container, error)` + - `GetContainer(containerID) (types.Container, error)` + - `StopContainer(types.Container, time.Duration) error` + - `StartContainer(types.Container) (types.ContainerID, error)` + - `RenameContainer(types.Container, string) error` + - `IsContainerStale(types.Container, types.UpdateParams) (bool, types.ImageID, error)` + - `ExecuteCommand(containerID types.ContainerID, command string, timeout int) (SkipUpdate bool, err error)` + - `RemoveImageByID(types.ImageID) error` + - `WarnOnHeadPullFailed(types.Container) bool` + +- `types.Container` interface (in `pkg/types/container.go`) — methods used include: + - `ID(), Name(), ImageName(), ImageID(), SafeImageID(), IsRunning(), IsRestarting()` + - `VerifyConfiguration() error`, `HasImageInfo() bool`, `ImageInfo() *types.ImageInspect` + - lifecycle hooks: `GetLifecyclePreUpdateCommand(), GetLifecyclePostUpdateCommand(), PreUpdateTimeout(), PostUpdateTimeout()` + - flags: `IsNoPull(UpdateParams), IsMonitorOnly(UpdateParams), ToRestart(), IsWatchtower()` + +- `session.Progress` and `session.ContainerStatus` (reporting) + - `Progress` is a map `map[types.ContainerID]*ContainerStatus` + - `ContainerStatus` fields: `containerID, containerName, imageName, oldImage, newImage, error, state` + - `Progress.Report()` returns a `types.Report` implementation + +- `types.TokenResponse` (used by `pkg/registry/auth`) contains `Token string` and `ExpiresIn int` (seconds) + +--- + +## Diagrams + +Mermaid sequence diagram (embedded): + +```mermaid +sequenceDiagram + participant CLI as CLI / Scheduler / HTTP API + participant CMD as cmd + participant ACT as internal/actions.Update + participant CLIENT as container.Client (docker wrapper) + participant DIG as pkg/registry/digest + participant AUTH as pkg/registry/auth + participant REG as pkg/registry (TLS config) + participant DOCKER as Docker Engine + + CLI->>CMD: trigger runUpdatesWithNotifications() + CMD->>ACT: Update(client, UpdateParams) + ACT->>CLIENT: ListContainers(filter) + loop per container + ACT->>CLIENT: IsContainerStale(container, params) + CLIENT->>CLIENT: PullImage (maybe) + CLIENT->>DIG: CompareDigest(container, registryAuth) + DIG->>AUTH: GetToken(challenge) + AUTH->>AUTH: getCachedToken / storeToken + DIG->>REG: newTransport() (uses --insecure-registry / --registry-ca) + DIG->>DOCKER: HEAD manifest with token + alt digest matches + CLIENT-->>ACT: no pull needed + else + CLIENT->>DOCKER: ImagePull(image) + end + CLIENT-->>ACT: HasNewImage -> stale/ newestImage + end + ACT->>ACT: SortByDependencies + ACT->>CLIENT: StopContainer / StartContainer (with lifecycle hooks) + ACT->>CLIENT: RemoveImageByID (cleanup) + ACT-->>CMD: progress.Report() +``` + +For reference, a PlantUML source for the same sequence is available in `docs/diagrams/update-flow.puml`. + +--- + +## Security & operational notes + +- TLS: registry HEAD and token requests are secure-by-default. Use `--registry-ca` to add private CAs, and `--registry-ca-validate` to fail fast on bad bundles. Avoid `--insecure-registry` except for testing. +- Token cache: tokens are cached per auth URL (realm+service+scope). Tokens with `ExpiresIn` are cached for that TTL. No persistent or distributed cache is provided. +- Digest HEAD optimization avoids pulls and unnecessary rate consumption when possible. DockerHub/GHCR may rate-limit HEAD or behave differently; the code includes a `WarnOnAPIConsumption` heuristic. + +--- + +## Where to look in the code + +- Orchestration: `internal/actions/update.go` +- CLI wiring: `cmd/root.go`, `internal/flags/flags.go` +- Container wrapper: `pkg/container/client.go`, `pkg/container/container.go` +- Digest & transport: `pkg/registry/digest/digest.go` +- Token & auth handling: `pkg/registry/auth/auth.go` +- TLS helpers: `pkg/registry/registry.go` +- Lifecycle hooks: `pkg/lifecycle/lifecycle.go` +- Session/reporting: `pkg/session/*`, `pkg/types/report.go` + +--- + +If you'd like, I can also open a branch and create a PR with these files, or convert the PlantUML into an SVG and add it to the docs site. + +End of document. diff --git a/internal/flags/flags.go b/internal/flags/flags.go index c11cdae..415a524 100644 --- a/internal/flags/flags.go +++ b/internal/flags/flags.go @@ -385,6 +385,21 @@ Should only be used for testing.`) envString("WATCHTOWER_WARN_ON_HEAD_FAILURE"), "When to warn about HEAD pull requests failing. Possible values: always, auto or never") + flags.Bool( + "insecure-registry", + envBool("WATCHTOWER_INSECURE_REGISTRY"), + "Disable TLS verification when contacting registries for HEAD/manifest requests (INSECURE; use only for testing)") + + flags.String( + "registry-ca", + envString("WATCHTOWER_REGISTRY_CA"), + "Path to a PEM encoded CA certificate bundle to trust for private registries") + + flags.Bool( + "registry-ca-validate", + envBool("WATCHTOWER_REGISTRY_CA_VALIDATE"), + "If set, watchtower will fail to start if the provided registry CA bundle cannot be loaded or parsed") + flags.Bool( "notification-log-stdout", envBool("WATCHTOWER_NOTIFICATION_LOG_STDOUT"), diff --git a/pkg/registry/auth/auth.go b/pkg/registry/auth/auth.go index 99b05c9..8e6c294 100644 --- a/pkg/registry/auth/auth.go +++ b/pkg/registry/auth/auth.go @@ -8,6 +8,8 @@ import ( "net/http" "net/url" "strings" + "sync" + "time" "github.com/containrrr/watchtower/pkg/registry/helpers" "github.com/containrrr/watchtower/pkg/types" @@ -75,12 +77,20 @@ func GetChallengeRequest(URL url.URL) (*http.Request, error) { // GetBearerHeader tries to fetch a bearer token from the registry based on the challenge instructions func GetBearerHeader(challenge string, imageRef ref.Named, registryAuth string) (string, error) { client := http.Client{} - authURL, err := GetAuthURL(challenge, imageRef) + authURL, err := GetAuthURL(challenge, imageRef) if err != nil { return "", err } + // Build cache key from the auth realm, service and scope + cacheKey := authURL.String() + + // Check cache first + if token := getCachedToken(cacheKey); token != "" { + return fmt.Sprintf("Bearer %s", token), nil + } + var r *http.Request if r, err = http.NewRequest("GET", authURL.String(), nil); err != nil { return "", err @@ -88,8 +98,6 @@ func GetBearerHeader(challenge string, imageRef ref.Named, registryAuth string) if registryAuth != "" { logrus.Debug("Credentials found.") - // CREDENTIAL: Uncomment to log registry credentials - // logrus.Tracef("Credentials: %v", registryAuth) r.Header.Add("Authorization", fmt.Sprintf("Basic %s", registryAuth)) } else { logrus.Debug("No credentials found.") @@ -99,6 +107,7 @@ func GetBearerHeader(challenge string, imageRef ref.Named, registryAuth string) if authResponse, err = client.Do(r); err != nil { return "", err } + defer authResponse.Body.Close() body, _ := io.ReadAll(authResponse.Body) tokenResponse := &types.TokenResponse{} @@ -108,9 +117,54 @@ func GetBearerHeader(challenge string, imageRef ref.Named, registryAuth string) return "", err } + // Cache token if ExpiresIn provided + if tokenResponse.Token != "" { + storeToken(cacheKey, tokenResponse.Token, tokenResponse.ExpiresIn) + } + return fmt.Sprintf("Bearer %s", tokenResponse.Token), nil } +// token cache implementation +type cachedToken struct { + token string + expiresAt time.Time +} + +var ( + tokenCache = map[string]cachedToken{} + tokenCacheMu = &sync.Mutex{} +) + +// now is a package-level function returning current time. It is a variable so tests +// can override it for deterministic behavior. +var now = time.Now + +// getCachedToken returns token string if present and not expired, otherwise empty +func getCachedToken(key string) string { + tokenCacheMu.Lock() + defer tokenCacheMu.Unlock() + if ct, ok := tokenCache[key]; ok { + if ct.expiresAt.IsZero() || now().Before(ct.expiresAt) { + return ct.token + } + // expired + delete(tokenCache, key) + } + return "" +} + +// storeToken stores token with optional ttl (seconds). ttl<=0 means no expiry. +func storeToken(key, token string, ttl int) { + tokenCacheMu.Lock() + defer tokenCacheMu.Unlock() + ct := cachedToken{token: token} + if ttl > 0 { + ct.expiresAt = now().Add(time.Duration(ttl) * time.Second) + } + tokenCache[key] = ct +} + // GetAuthURL from the instructions in the challenge func GetAuthURL(challenge string, imageRef ref.Named) (*url.URL, error) { loweredChallenge := strings.ToLower(challenge) diff --git a/pkg/registry/auth/auth_cache_concurrency_test.go b/pkg/registry/auth/auth_cache_concurrency_test.go new file mode 100644 index 0000000..0bb2e47 --- /dev/null +++ b/pkg/registry/auth/auth_cache_concurrency_test.go @@ -0,0 +1,101 @@ +package auth + +import ( + "sync" + "testing" + "time" +) + +// Test concurrent stores and gets to ensure the mutex protects the cache +func TestTokenCacheConcurrentStoreAndGet(t *testing.T) { + // reset cache safely + tokenCacheMu.Lock() + tokenCache = map[string]cachedToken{} + tokenCacheMu.Unlock() + + origNow := now + defer func() { now = origNow }() + now = time.Now + + key := "concurrent-key" + token := "tok-concurrent" + + var wg sync.WaitGroup + storeers := 50 + getters := 50 + iters := 100 + + for i := 0; i < storeers; i++ { + wg.Add(1) + go func() { + defer wg.Done() + for j := 0; j < iters; j++ { + storeToken(key, token, 0) + } + }() + } + + for i := 0; i < getters; i++ { + wg.Add(1) + go func() { + defer wg.Done() + for j := 0; j < iters; j++ { + _ = getCachedToken(key) + } + }() + } + + wg.Wait() + + if got := getCachedToken(key); got != token { + t.Fatalf("expected token %q, got %q", token, got) + } +} + +// Test concurrent access while token expires: readers run while time is advanced +func TestTokenCacheConcurrentExpiry(t *testing.T) { + // reset cache safely + tokenCacheMu.Lock() + tokenCache = map[string]cachedToken{} + tokenCacheMu.Unlock() + + // Make now controllable and thread-safe + origNow := now + defer func() { now = origNow }() + + base := time.Now() + var mu sync.Mutex + current := base + now = func() time.Time { + mu.Lock() + defer mu.Unlock() + return current + } + + key := "concurrent-expire" + storeToken(key, "t", 1) + + var wg sync.WaitGroup + readers := 100 + + for i := 0; i < readers; i++ { + wg.Add(1) + go func() { + defer wg.Done() + for j := 0; j < 100; j++ { + _ = getCachedToken(key) + } + }() + } + + // advance time beyond ttl + mu.Lock() + current = current.Add(2 * time.Second) + mu.Unlock() + + wg.Wait() + + if got := getCachedToken(key); got != "" { + t.Fatalf("expected token to be expired, got %q", got) + } +} diff --git a/pkg/registry/auth/auth_cache_test.go b/pkg/registry/auth/auth_cache_test.go new file mode 100644 index 0000000..d23211d --- /dev/null +++ b/pkg/registry/auth/auth_cache_test.go @@ -0,0 +1,54 @@ +package auth + +import ( + "testing" + "time" +) + +func TestTokenCacheStoreAndGetHitAndMiss(t *testing.T) { + // save and restore original now + origNow := now + defer func() { now = origNow }() + + // deterministic fake time + base := time.Date(2025, time.November, 13, 12, 0, 0, 0, time.UTC) + now = func() time.Time { return base } + + key := "https://auth.example.com/?service=example&scope=repository:repo:pull" + // ensure empty at start + if got := getCachedToken(key); got != "" { + t.Fatalf("expected empty cache initially, got %q", got) + } + + // store with no expiry (ttl <= 0) + storeToken(key, "tok-123", 0) + if got := getCachedToken(key); got != "tok-123" { + t.Fatalf("expected token tok-123, got %q", got) + } +} + +func TestTokenCacheExpiry(t *testing.T) { + // save and restore original now + origNow := now + defer func() { now = origNow }() + + // deterministic fake time that can be moved forward + base := time.Date(2025, time.November, 13, 12, 0, 0, 0, time.UTC) + current := base + now = func() time.Time { return current } + + key := "https://auth.example.com/?service=example&scope=repository:repo2:pull" + // store with short ttl (1 second) + storeToken(key, "short-tok", 1) + + if got := getCachedToken(key); got != "short-tok" { + t.Fatalf("expected token short-tok immediately after store, got %q", got) + } + + // advance time beyond ttl + current = current.Add(2 * time.Second) + + if got := getCachedToken(key); got != "" { + t.Fatalf("expected token to be expired and removed, got %q", got) + } +} diff --git a/pkg/registry/digest/digest.go b/pkg/registry/digest/digest.go index e569599..ea2af71 100644 --- a/pkg/registry/digest/digest.go +++ b/pkg/registry/digest/digest.go @@ -12,6 +12,7 @@ import ( "time" "github.com/containrrr/watchtower/internal/meta" + "github.com/containrrr/watchtower/pkg/registry" "github.com/containrrr/watchtower/pkg/registry/auth" "github.com/containrrr/watchtower/pkg/registry/manifest" "github.com/containrrr/watchtower/pkg/types" @@ -76,19 +77,7 @@ func TransformAuth(registryAuth string) string { // GetDigest from registry using a HEAD request to prevent rate limiting func GetDigest(url string, token string) (string, error) { - tr := &http.Transport{ - Proxy: http.ProxyFromEnvironment, - DialContext: (&net.Dialer{ - Timeout: 30 * time.Second, - KeepAlive: 30 * time.Second, - }).DialContext, - ForceAttemptHTTP2: true, - MaxIdleConns: 100, - IdleConnTimeout: 90 * time.Second, - TLSHandshakeTimeout: 10 * time.Second, - ExpectContinueTimeout: 1 * time.Second, - TLSClientConfig: &tls.Config{InsecureSkipVerify: true}, - } + tr := newTransport() client := &http.Client{Transport: tr} req, _ := http.NewRequest("HEAD", url, nil) @@ -124,3 +113,35 @@ func GetDigest(url string, token string) (string, error) { } return res.Header.Get(ContentDigestHeader), nil } + +// newTransport constructs an *http.Transport used for registry HEAD/token requests. +// It respects the package-level `registry.InsecureSkipVerify` toggle. +func newTransport() *http.Transport { + tr := &http.Transport{ + Proxy: http.ProxyFromEnvironment, + DialContext: (&net.Dialer{ + Timeout: 30 * time.Second, + KeepAlive: 30 * time.Second, + }).DialContext, + ForceAttemptHTTP2: true, + MaxIdleConns: 100, + IdleConnTimeout: 90 * time.Second, + TLSHandshakeTimeout: 10 * time.Second, + ExpectContinueTimeout: 1 * time.Second, + } + + certPool := registry.GetRegistryCertPool() + if registry.InsecureSkipVerify { + // Insecure mode requested: disable verification entirely + tr.TLSClientConfig = &tls.Config{InsecureSkipVerify: true} + } else if certPool != nil { + // Create TLS config with custom root CAs merged into system pool + tr.TLSClientConfig = &tls.Config{RootCAs: certPool} + } + return tr +} + +// NewTransportForTest exposes the transport construction for unit tests. +func NewTransportForTest() *http.Transport { + return newTransport() +} diff --git a/pkg/registry/digest/digest_transport_test.go b/pkg/registry/digest/digest_transport_test.go new file mode 100644 index 0000000..b11d112 --- /dev/null +++ b/pkg/registry/digest/digest_transport_test.go @@ -0,0 +1,27 @@ +package digest_test + +import ( + "github.com/containrrr/watchtower/pkg/registry" + "github.com/containrrr/watchtower/pkg/registry/digest" + . "github.com/onsi/ginkgo" + . "github.com/onsi/gomega" +) + +var _ = Describe("Digest transport configuration", func() { + AfterEach(func() { + // Reset to default after each test + registry.InsecureSkipVerify = false + }) + + It("should have nil TLSClientConfig by default", func() { + registry.InsecureSkipVerify = false + tr := digest.NewTransportForTest() + Expect(tr.TLSClientConfig).To(BeNil()) + }) + + It("should set TLSClientConfig when insecure flag is true", func() { + registry.InsecureSkipVerify = true + tr := digest.NewTransportForTest() + Expect(tr.TLSClientConfig).ToNot(BeNil()) + }) +}) diff --git a/pkg/registry/registry.go b/pkg/registry/registry.go index 430b401..07bd5e3 100644 --- a/pkg/registry/registry.go +++ b/pkg/registry/registry.go @@ -1,6 +1,9 @@ package registry import ( + "crypto/x509" + "io/ioutil" + "github.com/containrrr/watchtower/pkg/registry/helpers" watchtowerTypes "github.com/containrrr/watchtower/pkg/types" ref "github.com/distribution/reference" @@ -8,6 +11,18 @@ import ( log "github.com/sirupsen/logrus" ) +// InsecureSkipVerify controls whether registry HTTPS connections used for +// manifest HEAD/token requests disable certificate verification. Default is false. +// This is exposed so callers (e.g. CLI flag handling) can toggle it. +var InsecureSkipVerify = false + +// RegistryCABundle is an optional filesystem path to a PEM bundle that will be +// used as additional trusted CAs when validating registry TLS certificates. +var RegistryCABundle string + +// registryCertPool caches the loaded cert pool when RegistryCABundle is set +var registryCertPool *x509.CertPool + // GetPullOptions creates a struct with all options needed for pulling images from a registry func GetPullOptions(imageName string) (types.ImagePullOptions, error) { auth, err := EncodedAuth(imageName) @@ -59,3 +74,29 @@ func WarnOnAPIConsumption(container watchtowerTypes.Container) bool { return false } + +// GetRegistryCertPool returns a cert pool that includes system roots plus any +// additional CAs provided via RegistryCABundle. The resulting pool is cached. +func GetRegistryCertPool() *x509.CertPool { + if RegistryCABundle == "" { + return nil + } + if registryCertPool != nil { + return registryCertPool + } + // Try to load file + data, err := ioutil.ReadFile(RegistryCABundle) + if err != nil { + log.WithField("path", RegistryCABundle).Errorf("Failed to load registry CA bundle: %v", err) + return nil + } + pool, err := x509.SystemCertPool() + if err != nil || pool == nil { + pool = x509.NewCertPool() + } + if ok := pool.AppendCertsFromPEM(data); !ok { + log.WithField("path", RegistryCABundle).Warn("No certs appended from registry CA bundle; file may be empty or invalid PEM") + } + registryCertPool = pool + return registryCertPool +} diff --git a/pkg/types/token_response.go b/pkg/types/token_response.go index 722dde8..bb3e870 100644 --- a/pkg/types/token_response.go +++ b/pkg/types/token_response.go @@ -2,5 +2,6 @@ package types // TokenResponse is returned by the registry on successful authentication type TokenResponse struct { - Token string `json:"token"` + Token string `json:"token"` + ExpiresIn int `json:"expires_in,omitempty"` }