Rate-limiting (`rate-limit`)

The rate-limit component enables developers to limit access to resources such as API endpoints and background workers.

The component implements the token bucket algorithm , which provides high performance and high flexibility.

Configuring the rate limiter

When using the rate-limit component you pass the wanted configuration on every call, giving you full flexibility to dynamically configure how you enforce limits.

The configuration lets you configure:

Capacity: what is the maximum amount of rate-limit tokens this rate-limiter can have.
Refill rate: how many tokens to refill on every internal.
Refill interval: how often to refill the amount configured in refill rate (default is one second).

Here is an example configuration:


import { Temporal } from "temporal-polyfill-lite";
 
// Capacity: 10, refill rate: 1, refill interval: 1 second.
const config = {
    capacity: 10,
    refillAmount: 1,
    refillInterval: Temporal.Duration.from({ seconds: 1 }),
};


from datetime import timedelta
from diom.models import RateLimitConfig
 
# Capacity: 10, refill rate: 1, refill interval: 1 second.
config = RateLimitConfig(
    capacity=10,
    refill_amount=1,
    refill_interval_ms=timedelta(seconds=1),
)


use diom::models::RateLimitConfig;
use std::time::Duration;
 
// Capacity: 10, refill rate: 1, refill interval: 1 second.
let config = RateLimitConfig::new(10, 1)
    .with_refill_interval(Duration::from_secs(1));


import diom "diom.com/go/diom"
 
// Capacity: 10, refill rate: 1, refill interval: 1 second.
refillInterval := diom.DurationMs(1_000)
config := diom.RateLimitConfig{
    Capacity:       10,
    RefillAmount:   1,
    RefillInterval: &refillInterval,
}


import com.svix.diom.models.RateLimitConfig;
import java.time.Duration;
 
// Capacity: 10, refill rate: 1, refill interval: 1 second.
RateLimitConfig config = new RateLimitConfig()
    .capacity(10L)
    .refillAmount(1L)
    .refillInterval(Duration.ofSeconds(1));


# Config is passed inline with each command as JSON
# {"capacity": 10, "refill_amount": 1, "refill_interval_ms": 1000}

Using the rate limiter (`rate-limit.limit`)

To use the rate limiter you call the rate-limit.limit operation with a configuration like so:


import { Diom } from "diom";
import { Temporal } from "temporal-polyfill-lite";
 
const client = new Diom("AUTH_TOKEN");
const key = "user:42:api";
const config = {
    capacity: 10,
    refillAmount: 1,
    refillInterval: Temporal.Duration.from({ seconds: 1 }),
};
 
const out = await client.rateLimit.limit({ key, config });
 
if (out.allowed) {
    console.log(`Request allowed, ${out.remaining} tokens remaining`);
} else {
    const retryMs = out.retryAfter?.total("millisecond") ?? 0;
    console.log(`Request denied, retry after ${retryMs}ms`);
}


from datetime import timedelta
from diom import Diom
from diom.models import RateLimitCheckIn, RateLimitConfig
 
client = Diom("AUTH_TOKEN")
key = "user:42:api"
config = RateLimitConfig(
    capacity=10,
    refill_amount=1,
    refill_interval_ms=timedelta(seconds=1),
)
 
out = client.rate_limit.limit(RateLimitCheckIn(key=key, config=config))
 
if out.allowed:
    print(f"Request allowed, {out.remaining} tokens remaining")
else:
    retry_ms = out.retry_after.total_seconds() * 1000 if out.retry_after else 0
    print(f"Request denied, retry after {retry_ms:.0f}ms")


use diom::{DiomClient, models::{RateLimitCheckIn, RateLimitConfig}};
use std::time::Duration;
 
let client = DiomClient::new("AUTH_TOKEN".to_string(), None);
let key = "user:42:api".to_string();
let config = RateLimitConfig::new(10, 1)
    .with_refill_interval(Duration::from_secs(1));
 
let out = client.rate_limit()
    .limit(RateLimitCheckIn::new(key.clone(), config.clone()))
    .await?;
 
if out.allowed {
    println!("Request allowed, {} tokens remaining", out.remaining);
} else {
    let retry_ms = out.retry_after.unwrap_or_default().as_millis();
    println!("Request denied, retry after {retry_ms}ms");
}


import diom "diom.com/go/diom"
 
client, _ := diom.New("AUTH_TOKEN", nil)
refillInterval := diom.DurationMs(1_000)
out, err := client.RateLimit().Limit(ctx, diom.RateLimitCheckIn{
    Key: "user:42:api",
    Config: diom.RateLimitConfig{
        Capacity:       10,
        RefillAmount:   1,
        RefillInterval: &refillInterval,
    },
})
if err != nil {
    log.Fatal(err)
}
 
if out.Allowed {
    fmt.Printf("Request allowed, %d tokens remaining\n", out.Remaining)
} else {
    retryMs := uint64(0)
    if out.RetryAfter != nil {
        retryMs = out.RetryAfter.Milliseconds()
    }
    fmt.Printf("Request denied, retry after %dms\n", retryMs)
}


import com.svix.diom.Diom;
import com.svix.diom.models.RateLimitCheckIn;
import com.svix.diom.models.RateLimitCheckOut;
import com.svix.diom.models.RateLimitConfig;
import java.time.Duration;
 
Diom diom = new Diom("AUTH_TOKEN");
RateLimitCheckOut out = diom.rateLimit().limit(new RateLimitCheckIn()
    .key("user:42:api")
    .config(new RateLimitConfig()
        .capacity(10L)
        .refillAmount(1L)
        .refillInterval(Duration.ofSeconds(1))));
 
if (out.getAllowed()) {
    System.out.println("Request allowed, " + out.getRemaining() + " tokens remaining");
} else {
    long retryMs = out.getRetryAfter() != null ? out.getRetryAfter().toMillis() : 0;
    System.out.println("Request denied, retry after " + retryMs + "ms");
}


export DIOM_AUTH_TOKEN='AUTH_TOKEN'
diom rate-limit limit '{
  "key": "user:42:api",
  "config": {"capacity": 10, "refill_amount": 1, "refill_interval_ms": 1000}
}'

You can optionally consume more than 1 token per request:


const out = await client.rateLimit.limit({ key, config, tokens: 10 });


out = client.rate_limit.limit(RateLimitCheckIn(key=key, config=config, tokens=10))


let out = client.rate_limit()
    .limit(RateLimitCheckIn::new(key.clone(), config.clone()).with_tokens(10))
    .await?;


tokens := uint64(10)
out, _ := client.RateLimit().Limit(ctx, diom.RateLimitCheckIn{
    Key:    "user:42:api",
    Config: config,
    Tokens: &tokens,
})


RateLimitCheckOut out = diom.rateLimit().limit(new RateLimitCheckIn()
    .key("user:42:api")
    .config(config)
    .tokens(10L));


diom rate-limit limit '{
  "key": "user:42:api",
  "tokens": 10,
  "config": {"capacity": 10, "refill_amount": 1, "refill_interval_ms": 1000}
}'

This is useful for when you have different “weights” to different requests (some are more expensive than others).

Get remaining tokens (`rate-limit.get-remaining`)

You can use the rate-limit.get-remaining function to get the amount of tokens remaining without consuming any.


// Peek at the remaining tokens without consuming any.
const remaining = await client.rateLimit.getRemaining({ key, config });
const retryMs = remaining.retryAfter?.total("millisecond") ?? 0;
console.log(`Tokens remaining ${remaining.remaining}, retry after ${retryMs}ms`);


from diom.models import RateLimitGetRemainingIn
 
# Peek at the remaining tokens without consuming any.
remaining = client.rate_limit.get_remaining(
    RateLimitGetRemainingIn(key=key, config=config)
)
retry_ms = remaining.retry_after.total_seconds() * 1000 if remaining.retry_after else 0
print(f"Tokens remaining {remaining.remaining}, retry after {retry_ms:.0f}ms")


// Peek at the remaining tokens without consuming any.
let remaining = client.rate_limit()
    .get_remaining(RateLimitGetRemainingIn::new(key.clone(), config.clone()))
    .await?;
// Retry after tells you when you'll have sufficient tokens available.
let retry_ms = remaining.retry_after.unwrap_or_default().as_millis();
println!("Tokens remaining {}, retry after {retry_ms}ms", remaining.remaining);


// Peek at the remaining tokens without consuming any.
remaining, _ := client.RateLimit().GetRemaining(ctx, diom.RateLimitGetRemainingIn{
    Key:    key,
    Config: config,
})
retryMs := uint64(0)
if remaining.RetryAfter != nil {
    retryMs = remaining.RetryAfter.Milliseconds()
}
fmt.Printf("Tokens remaining %d, retry after %dms\n", remaining.Remaining, retryMs)


import com.svix.diom.models.RateLimitGetRemainingIn;
import com.svix.diom.models.RateLimitGetRemainingOut;
 
// Peek at the remaining tokens without consuming any.
RateLimitGetRemainingOut remaining = diom.rateLimit().getRemaining(
    new RateLimitGetRemainingIn().key("user:42:api").config(config));
long retryMs = remaining.getRetryAfter() != null ? remaining.getRetryAfter().toMillis() : 0;
System.out.println("Tokens remaining " + remaining.getRemaining() + ", retry after " + retryMs + "ms");


diom rate-limit get-remaining '{
  "key": "user:42:api",
  "config": {"capacity": 10, "refill_amount": 1, "refill_interval_ms": 1000}
}'

Reset the bucket to capacity (`rate-limit.reset`)

To reset the bucket back to full capacity, call the rate-limit.reset operation:


// Reset the bucket back to full capacity.
await client.rateLimit.reset({ key, config });


from diom.models import RateLimitResetIn
 
# Reset the bucket back to full capacity.
client.rate_limit.reset(RateLimitResetIn(key=key, config=config))


// Reset the bucket back to full capacity.
client.rate_limit().reset(RateLimitResetIn::new(key.clone(), config.clone()))
    .await?;


// Reset the bucket back to full capacity.
client.RateLimit().Reset(ctx, diom.RateLimitResetIn{
    Key:    key,
    Config: config,
})


import com.svix.diom.models.RateLimitResetIn;
 
// Reset the bucket back to full capacity.
diom.rateLimit().reset(new RateLimitResetIn().key("user:42:api").config(config));


diom rate-limit reset '{
  "key": "user:42:api",
  "config": {"capacity": 10, "refill_amount": 1, "refill_interval_ms": 1000}
}'

Examples

Here are a few examples of common use-cases when implementing rate-limiting.

Allow temporary bursts over the limit

A common use-case with rate-limiting is to allow temporary bursts (that go over the allowed rate-limit), but still enforcing the rate-limit on an ongoing basis.

To achieve it you would want to configure the capacity to the maximum spike capacity, and the refill rate to your desired limit. For example, if you want to allow for 100 tokens per second with spikes of 200 tokens, you can configure capacity to be 200 and refill rate to 100.

This works because it lets users consume 200 tokens, but the refill happens much more slowly, which means that under load they would be limited to 100 tokens per interval, but if the tokens had enough time to replenish to capacity, they would be able to go over the limit.

Example configuration:


const config = {
    capacity: 200,
    refillAmount: 100,
    refillInterval: Temporal.Duration.from({ seconds: 1 }),
};


config = RateLimitConfig(capacity=200, refill_amount=100, refill_interval_ms=timedelta(seconds=1))


RateLimitConfig::new(200, 100)
    .with_refill_interval(Duration::from_secs(1));


refillInterval := diom.DurationMs(1_000)
config := diom.RateLimitConfig{Capacity: 200, RefillAmount: 100, RefillInterval: &refillInterval}


new RateLimitConfig().capacity(200L).refillAmount(100L).refillInterval(Duration.ofSeconds(1));


# {"capacity": 200, "refill_amount": 100, "refill_interval_ms": 1000}

Tiered rate-limits

A common pattern is to have different rate-limits depending on the pricing tier. Because the rate-limit configuration is passed on each request, to achieve it with Diom you can just pass a different configuration based on the tier.

For example:


const config = tier === "free"
    ? { capacity: 50, refillAmount: 50, refillInterval: Temporal.Duration.from({ seconds: 1 }) }
    : { capacity: 200, refillAmount: 100, refillInterval: Temporal.Duration.from({ seconds: 1 }) };
 
// Now use it with rateLimit.limit...


if tier == "free":
    config = RateLimitConfig(capacity=50, refill_amount=50, refill_interval_ms=timedelta(seconds=1))
else:
    config = RateLimitConfig(capacity=200, refill_amount=100, refill_interval_ms=timedelta(seconds=1))
 
# Now use it with rate_limit.limit...


let config = match tier {
    Tier::Free =>
        RateLimitConfig::new(50, 50)
            .with_refill_interval(Duration::from_secs(1)),
    Tier::Professional =>
        RateLimitConfig::new(200, 100)
            .with_refill_interval(Duration::from_secs(1)),
};
 
// Now use it with `rate-limit.limit`...


refillInterval := diom.DurationMs(1_000)
var config diom.RateLimitConfig
if tier == "free" {
    config = diom.RateLimitConfig{Capacity: 50, RefillAmount: 50, RefillInterval: &refillInterval}
} else {
    config = diom.RateLimitConfig{Capacity: 200, RefillAmount: 100, RefillInterval: &refillInterval}
}
 
// Now use it with RateLimit().Limit()...


RateLimitConfig config = tier.equals("free")
    ? new RateLimitConfig().capacity(50L).refillAmount(50L).refillInterval(Duration.ofSeconds(1))
    : new RateLimitConfig().capacity(200L).refillAmount(100L).refillInterval(Duration.ofSeconds(1));
 
// Now use it with getRateLimit().limit()...


# Free tier: {"capacity": 50, "refill_amount": 50, "refill_interval_ms": 1000}
# Pro tier:  {"capacity": 200, "refill_amount": 100, "refill_interval_ms": 1000}

Multiple rate-limits

Sometimes you may want to have multiple rate-limits checked for a specific request. For example, you may have user-wide and organization-wide rate-limits in your service. Diom will soon add a built-in operation to support that, but until then you can use multiple rate-limit calls to emulate this behavior.


// Check user rate limit first
const userOut = await client.rateLimit.limit({ key: userKey, config: userConfig });
if (!userOut.allowed) return;
 
// Then check org rate limit
const orgOut = await client.rateLimit.limit({ key: orgKey, config: orgConfig });
if (!orgOut.allowed) return;
 
// Request allowed!


# Check user rate limit first
user_out = client.rate_limit.limit(RateLimitCheckIn(key=user_key, config=user_config))
if not user_out.allowed:
    return
 
# Then check org rate limit
org_out = client.rate_limit.limit(RateLimitCheckIn(key=org_key, config=org_config))
if not org_out.allowed:
    return
 
# Request allowed!


// Check user rate limit first
let out = client.rate_limit()
    .limit(RateLimitCheckIn::new(user_key.clone(), user_config.clone()))
    .await?;
 
if !out.allowed {
    return;
}
 
let out = client.rate_limit()
    .limit(RateLimitCheckIn::new(org_key.clone(), org_config.clone()))
    .await?;
 
if !out.allowed {
    return;
}
 
// Request allowed!


// Check user rate limit first
userOut, _ := client.RateLimit().Limit(ctx, diom.RateLimitCheckIn{Key: userKey, Config: userConfig})
if !userOut.Allowed {
    return
}
 
// Then check org rate limit
orgOut, _ := client.RateLimit().Limit(ctx, diom.RateLimitCheckIn{Key: orgKey, Config: orgConfig})
if !orgOut.Allowed {
    return
}
 
// Request allowed!


// Check user rate limit first
RateLimitCheckOut userOut = diom.rateLimit().limit(
    new RateLimitCheckIn().key(userKey).config(userConfig));
if (!userOut.getAllowed()) return;
 
// Then check org rate limit
RateLimitCheckOut orgOut = diom.rateLimit().limit(
    new RateLimitCheckIn().key(orgKey).config(orgConfig));
if (!orgOut.getAllowed()) return;
 
// Request allowed!


diom rate-limit limit '{"key": "user:42", "config": {...}}'
diom rate-limit limit '{"key": "org:7", "config": {...}}'

Smoothening rate-limiting (avoiding spikes)

A common issue with rate-limiting implementations is that traffic tends to be very spiky. The tokens are refilled every time an interval passes, which means that under load, most of the requests will happen at the window edges.

This can be solved using the token bucket algorithm by setting the configuration a bit differently. For example, if you want a rate-limit with 100 tokens that refills 100 tokens every 1 second, you can instead configure it to refill 1 token every 10 milliseconds. This still gives you a rate of 100 a second, but the refill happens more smoothly throughout the desired one second internal.


// Do this:
const smooth = { capacity: 100, refillAmount: 1, refillInterval: Temporal.Duration.from({ milliseconds: 10 }) };
 
// Instead of this:
const spiky = { capacity: 100, refillAmount: 100, refillInterval: Temporal.Duration.from({ seconds: 1 }) };


# Do this:
smooth = RateLimitConfig(capacity=100, refill_amount=1, refill_interval_ms=timedelta(milliseconds=10))
 
# Instead of this:
spiky = RateLimitConfig(capacity=100, refill_amount=100, refill_interval_ms=timedelta(seconds=1))


// Do this:
RateLimitConfig::new(100, 1)
    .with_refill_interval(Duration::from_millis(10));
 
// Instead of this:
RateLimitConfig::new(100, 100)
    .with_refill_interval(Duration::from_secs(1));


ri10ms := diom.DurationMs(10)
ri1s := diom.DurationMs(1_000)
 
// Do this:
smooth := diom.RateLimitConfig{Capacity: 100, RefillAmount: 1, RefillInterval: &ri10ms}
 
// Instead of this:
spiky := diom.RateLimitConfig{Capacity: 100, RefillAmount: 100, RefillInterval: &ri1s}


// Do this:
new RateLimitConfig().capacity(100L).refillAmount(1L).refillInterval(Duration.ofMillis(10));
 
// Instead of this:
new RateLimitConfig().capacity(100L).refillAmount(100L).refillInterval(Duration.ofSeconds(1));


# Do this:    {"capacity": 100, "refill_amount": 1,   "refill_interval_ms": 10}
# Not this:   {"capacity": 100, "refill_amount": 100, "refill_interval_ms": 1000}

Approximating the fixed window algorithm

The token bucket algorithm allows you to implement an equivalent to the fixed window algorithm. All you need to do is to set the capacity to the same same value as the refill amount, and set the refill interval to the window size.