Skip to content
33 changes: 32 additions & 1 deletion docs/configuration.md
Original file line number Diff line number Diff line change
Expand Up @@ -44,11 +44,42 @@ The option `imageCopyPolicy` (default: `delayed`) defines the image copy strateg

## ImageCopyDeadline

## Cache Configuration

When caching is enabled, k8s-image-swapper caches the existence of images to reduce strain on the target registry.
This means that if an image is deleted from the target registry, k8s-image-swapper will continue to think it exists until the cache expires.
There are two settings that control this behavior:

### Cache TTL

The option `cacheTtlMinutes` (default: `1440` - 24 hours) defines how long image existence information is cached. Set to `0` to disable caching entirely.

### Cache Jitter

The option `cacheJitterMaxMinutes` (default: `180` - 3 hours) defines the maximum random time added to the TTL to prevent a cache stampede. When many cache entries expire at the same time, it can cause a sudden spike in registry requests. Adding random jitter helps spread these requests out.

!!! example
```yaml
# Cache for 4 hours (240 minutes) with up to 30 minutes of random jitter
cacheTtlMinutes: 240
cacheJitterMaxMinutes: 30

# Disable caching completely
cacheTtlMinutes: 0
cacheJitterMaxMinutes: 0

# Default behavior if not specified:
# cacheTtlMinutes: 1440 # 24 hours
# cacheJitterMaxMinutes: 180 # 3 hours
```

!!! note
The actual cache duration for each entry will be: `cacheTtlMinutes + random(0 to cacheJitterMaxMinutes)` minutes

The option `imageCopyDeadline` (default: `8s`) defines the duration after which the image copy if aborted.

This option only applies for `immediate` and `force` image copy strategies.


## Source

This section configures details about the image source.
Expand Down
12 changes: 8 additions & 4 deletions pkg/config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -39,10 +39,12 @@ type Config struct {

ListenAddress string

DryRun bool `yaml:"dryRun"`
ImageSwapPolicy string `yaml:"imageSwapPolicy" validate:"oneof=always exists"`
ImageCopyPolicy string `yaml:"imageCopyPolicy" validate:"oneof=delayed immediate force none"`
ImageCopyDeadline time.Duration `yaml:"imageCopyDeadline"`
DryRun bool `yaml:"dryRun"`
ImageSwapPolicy string `yaml:"imageSwapPolicy" validate:"oneof=always exists"`
ImageCopyPolicy string `yaml:"imageCopyPolicy" validate:"oneof=delayed immediate force none"`
ImageCopyDeadline time.Duration `yaml:"imageCopyDeadline"`
CacheTtlMinutes int `yaml:"cacheTtlMinutes"`
CacheJitterMaxMinutes int `yaml:"cacheJitterMaxMinutes"`

Source Source `yaml:"source"`
Target Registry `yaml:"target"`
Expand Down Expand Up @@ -169,4 +171,6 @@ func SetViperDefaults(v *viper.Viper) {
v.SetDefault("Target.AWS.ECROptions.ImageScanningConfiguration.ImageScanOnPush", true)
v.SetDefault("Target.AWS.ECROptions.ImageTagMutability", "MUTABLE")
v.SetDefault("Target.AWS.ECROptions.EncryptionConfiguration.EncryptionType", "AES256")
v.SetDefault("CacheTtlMinutes", 1440) // 24 hours
v.SetDefault("CacheJitterMaxMinutes", 180) // 3 hours
}
60 changes: 60 additions & 0 deletions pkg/config/config_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@ func TestConfigParses(t *testing.T) {
name: "should render empty config with defaults",
cfg: "",
expCfg: Config{
CacheTtlMinutes: 1440,
CacheJitterMaxMinutes: 180,
Target: Registry{
Type: "aws",
AWS: AWS{
Expand All @@ -46,6 +48,8 @@ source:
- jmespath: "obj.metadata.namespace != 'playground'"
`,
expCfg: Config{
CacheTtlMinutes: 1440,
CacheJitterMaxMinutes: 180,
Target: Registry{
Type: "aws",
AWS: AWS{
Expand Down Expand Up @@ -85,6 +89,8 @@ target:
value: B
`,
expCfg: Config{
CacheTtlMinutes: 1440,
CacheJitterMaxMinutes: 180,
Target: Registry{
Type: "aws",
AWS: AWS{
Expand Down Expand Up @@ -129,6 +135,8 @@ source:
region: "us-east-1"
`,
expCfg: Config{
CacheTtlMinutes: 1440,
CacheJitterMaxMinutes: 180,
Target: Registry{
Type: "aws",
AWS: AWS{
Expand Down Expand Up @@ -178,6 +186,8 @@ target:
value: B
`,
expCfg: Config{
CacheTtlMinutes: 1440,
CacheJitterMaxMinutes: 180,
Target: Registry{
Type: "aws",
AWS: AWS{
Expand Down Expand Up @@ -207,6 +217,56 @@ target:
},
},
},
{
name: "should render custom cache settings",
cfg: `
cacheTtlMinutes: 60
cacheJitterMaxMinutes: 20
`,
expCfg: Config{
CacheTtlMinutes: 60,
CacheJitterMaxMinutes: 20,
Target: Registry{
Type: "aws",
AWS: AWS{
ECROptions: ECROptions{
ImageTagMutability: "MUTABLE",
ImageScanningConfiguration: ImageScanningConfiguration{
ImageScanOnPush: true,
},
EncryptionConfiguration: EncryptionConfiguration{
EncryptionType: "AES256",
},
},
},
},
},
},
{
name: "should allow disabling cache",
cfg: `
cacheTtlMinutes: 0 # Disable cache
cacheJitterMaxMinutes: 0 # No jitter needed when cache is disabled
`,
expCfg: Config{
CacheTtlMinutes: 0,
CacheJitterMaxMinutes: 0,
Target: Registry{
Type: "aws",
AWS: AWS{
ECROptions: ECROptions{
ImageTagMutability: "MUTABLE",
ImageScanningConfiguration: ImageScanningConfiguration{
ImageScanOnPush: true,
},
EncryptionConfiguration: EncryptionConfiguration{
EncryptionType: "AES256",
},
},
},
},
},
},
}

for _, test := range tests {
Expand Down
46 changes: 29 additions & 17 deletions pkg/registry/ecr.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,16 +22,19 @@ import (
"github.com/estahn/k8s-image-swapper/pkg/config"
"github.com/go-co-op/gocron"
"github.com/rs/zerolog/log"
"github.com/spf13/viper"
)

type ECRClient struct {
client ecriface.ECRAPI
ecrDomain string
authToken []byte
cache *ristretto.Cache
scheduler *gocron.Scheduler
targetAccount string
options config.ECROptions
client ecriface.ECRAPI
ecrDomain string
authToken []byte
cache *ristretto.Cache
scheduler *gocron.Scheduler
targetAccount string
options config.ECROptions
cacheTtlMinutes int
cacheJitterMaxMinutes int
}

func NewECRClient(clientConfig config.AWS) (*ECRClient, error) {
Expand Down Expand Up @@ -78,12 +81,14 @@ func NewECRClient(clientConfig config.AWS) (*ECRClient, error) {
scheduler.StartAsync()

client := &ECRClient{
client: ecrClient,
ecrDomain: ecrDomain,
cache: cache,
scheduler: scheduler,
targetAccount: clientConfig.AccountID,
options: clientConfig.ECROptions,
client: ecrClient,
ecrDomain: ecrDomain,
cache: cache,
scheduler: scheduler,
targetAccount: clientConfig.AccountID,
options: clientConfig.ECROptions,
cacheTtlMinutes: viper.GetInt("CacheTtlMinutes"),
cacheJitterMaxMinutes: viper.GetInt("CacheJitterMaxMinutes"),
}

if err := client.scheduleTokenRenewal(); err != nil {
Expand Down Expand Up @@ -242,9 +247,11 @@ func (e *ECRClient) PutImage() error {

func (e *ECRClient) ImageExists(ctx context.Context, imageRef ctypes.ImageReference) bool {
ref := imageRef.DockerReference().String()
if _, found := e.cache.Get(ref); found {
log.Ctx(ctx).Trace().Str("ref", ref).Msg("found in cache")
return true
if e.cacheTtlMinutes > 0 {
if _, found := e.cache.Get(ref); found {
log.Ctx(ctx).Trace().Str("ref", ref).Msg("found in cache")
return true
}
}

app := "skopeo"
Expand All @@ -263,7 +270,12 @@ func (e *ECRClient) ImageExists(ctx context.Context, imageRef ctypes.ImageRefere

log.Ctx(ctx).Trace().Str("ref", ref).Msg("found in target repository")

e.cache.SetWithTTL(ref, "", 1, 24*time.Hour+time.Duration(rand.Intn(180))*time.Minute)
if e.cacheTtlMinutes > 0 {
// Add random jitter to prevent cache stampede
jitter := time.Duration(rand.Intn(e.cacheJitterMaxMinutes)) * time.Minute
cacheTtl := time.Duration(e.cacheTtlMinutes) * time.Minute
e.cache.SetWithTTL(ref, "", 1, cacheTtl+jitter)
}

return true
}
Expand Down
57 changes: 57 additions & 0 deletions pkg/registry/ecr_test.go
Original file line number Diff line number Diff line change
@@ -1,10 +1,13 @@
package registry

import (
"context"
"encoding/base64"
"testing"
"time"

"github.com/containers/image/v5/transports/alltransports"
"github.com/dgraph-io/ristretto"

"github.com/estahn/k8s-image-swapper/pkg/config"
"github.com/stretchr/testify/assert"
Expand Down Expand Up @@ -51,3 +54,57 @@ func TestECRIsOrigin(t *testing.T) {
assert.Equal(t, testcase.expected, result)
}
}

func TestEcrImageExistsCaching(t *testing.T) {
// Setup a test cache
cache, err := ristretto.NewCache(&ristretto.Config{
NumCounters: 1e7, // number of keys to track frequency of (10M).
MaxCost: 1 << 30, // maximum cost of cache (1GB).
BufferItems: 64, // number of keys per Get buffer.
})
assert.NoError(t, err)

tests := []struct {
name string
cacheTtlMinutes int
expectCached bool
}{
{
name: "cache disabled when TTL is 0",
cacheTtlMinutes: 0,
expectCached: false,
},
{
name: "cache enabled with TTL and jitter",
cacheTtlMinutes: 60,
expectCached: true,
},
}

for _, tc := range tests {
t.Run(tc.name, func(t *testing.T) {
ctx := context.Background()
client := NewDummyECRClient("us-east-1", "12345678912", "", config.ECROptions{}, []byte(""))

// Setup cache
client.cache = cache
client.cacheTtlMinutes = tc.cacheTtlMinutes

// Create a test image reference and add to cache. Use 100ms as TTL
imageRef, err := alltransports.ParseImageName("docker://12345678912.dkr.ecr.us-east-1.amazonaws.com/test-project/repo/test-image:latest")
cache.SetWithTTL(imageRef.DockerReference().String(), true, 1, 100*time.Millisecond)
assert.NoError(t, err)

// Cache should be a hit
exists := client.ImageExists(ctx, imageRef)
assert.Equal(t, tc.expectCached, exists)

if tc.expectCached {
// Verify cache expiry
time.Sleep(time.Duration(150 * time.Millisecond)) // Use milliseconds for testing
_, found := client.cache.Get(imageRef.DockerReference().String())
assert.False(t, found, "cache entry should have expired")
}
})
}
}
38 changes: 25 additions & 13 deletions pkg/registry/gar.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,16 +20,19 @@ import (
"google.golang.org/api/transport"

"github.com/rs/zerolog/log"
"github.com/spf13/viper"
)

type GARAPI interface{}

type GARClient struct {
client GARAPI
garDomain string
cache *ristretto.Cache
scheduler *gocron.Scheduler
authToken []byte
client GARAPI
garDomain string
cache *ristretto.Cache
scheduler *gocron.Scheduler
authToken []byte
cacheTtlMinutes int
cacheJitterMaxMinutes int
}

func NewGARClient(clientConfig config.GCP) (*GARClient, error) {
Expand All @@ -46,10 +49,12 @@ func NewGARClient(clientConfig config.GCP) (*GARClient, error) {
scheduler.StartAsync()

client := &GARClient{
client: nil,
garDomain: clientConfig.GarDomain(),
cache: cache,
scheduler: scheduler,
client: nil,
garDomain: clientConfig.GarDomain(),
cache: cache,
scheduler: scheduler,
cacheTtlMinutes: viper.GetInt("CacheTtlMinutes"),
cacheJitterMaxMinutes: viper.GetInt("CacheJitterMaxMinutes"),
}

if err := client.scheduleTokenRenewal(); err != nil {
Expand Down Expand Up @@ -132,9 +137,11 @@ func (e *GARClient) PutImage() error {

func (e *GARClient) ImageExists(ctx context.Context, imageRef ctypes.ImageReference) bool {
ref := imageRef.DockerReference().String()
if _, found := e.cache.Get(ref); found {
log.Ctx(ctx).Trace().Str("ref", ref).Msg("found in cache")
return true
if e.cacheTtlMinutes > 0 {
if _, found := e.cache.Get(ref); found {
log.Ctx(ctx).Trace().Str("ref", ref).Msg("found in cache")
return true
}
}

app := "skopeo"
Expand All @@ -153,7 +160,12 @@ func (e *GARClient) ImageExists(ctx context.Context, imageRef ctypes.ImageRefere

log.Ctx(ctx).Trace().Str("ref", ref).Msg("found in target repository")

e.cache.SetWithTTL(ref, "", 1, 24*time.Hour+time.Duration(rand.Intn(180))*time.Minute)
if e.cacheTtlMinutes > 0 {
// Add random jitter to prevent cache stampede
jitter := time.Duration(rand.Intn(e.cacheJitterMaxMinutes)) * time.Minute
cacheTtl := time.Duration(e.cacheTtlMinutes) * time.Minute
e.cache.SetWithTTL(ref, "", 1, cacheTtl+jitter)
}

return true
}
Expand Down
Loading
Loading