Skip to content

Commit 4db2505

Browse files
committed
make static cache policy explicit, disable caching for objects >100 MiB
1 parent efcedce commit 4db2505

File tree

5 files changed

+100
-43
lines changed

5 files changed

+100
-43
lines changed

src/web/cache.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,9 @@ use tracing::error;
1818
/// This enables us to use the fastly "soft purge" for everything.
1919
pub const SURROGATE_KEY_ALL: SurrogateKey = SurrogateKey::from_static("all");
2020

21+
/// cache poicy for static assets like rustdoc files or build assets.
22+
pub const STATIC_ASSET_CACHE_POLICY: CachePolicy = CachePolicy::ForeverInCdnAndBrowser;
23+
2124
#[derive(Debug, Clone, PartialEq)]
2225
pub struct ResponseCacheHeaders {
2326
pub cache_control: Option<HeaderValue>,

src/web/file.rs

Lines changed: 74 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,12 @@ use axum_extra::{
1818
};
1919
use std::time::SystemTime;
2020
use tokio_util::io::ReaderStream;
21+
use tracing::warn;
22+
23+
// https://docs.fastly.com/products/compute-resource-limits#default-limits
24+
// https://www.fastly.com/documentation/guides/full-site-delivery/performance/failure-modes-with-large-objects/
25+
// https://www.fastly.com/documentation/guides/full-site-delivery/caching/segmented-caching/
26+
const FASTLY_CACHE_MAX_OBJECT_SIZE: usize = 100 * 1024 * 1024; // 100 MB
2127

2228
#[derive(Debug)]
2329
pub(crate) struct File(pub(crate) Blob);
@@ -41,9 +47,13 @@ impl File {
4147

4248
#[cfg(test)]
4349
impl File {
44-
pub fn into_response(self, if_none_match: Option<&IfNoneMatch>) -> AxumResponse {
50+
pub fn into_response(
51+
self,
52+
if_none_match: Option<&IfNoneMatch>,
53+
cache_policy: CachePolicy,
54+
) -> AxumResponse {
4555
let streaming_blob: StreamingBlob = self.0.into();
46-
StreamingFile(streaming_blob).into_response(if_none_match)
56+
StreamingFile(streaming_blob).into_response(if_none_match, cache_policy)
4757
}
4858
}
4959

@@ -56,8 +66,40 @@ impl StreamingFile {
5666
Ok(StreamingFile(storage.get_stream(path).await?))
5767
}
5868

59-
pub fn into_response(self, if_none_match: Option<&IfNoneMatch>) -> AxumResponse {
60-
const CACHE_POLICY: CachePolicy = CachePolicy::ForeverInCdnAndBrowser;
69+
pub fn into_response(
70+
self,
71+
if_none_match: Option<&IfNoneMatch>,
72+
mut cache_policy: CachePolicy,
73+
) -> AxumResponse {
74+
// by default Fastly can only cache objects up to 100 MiB.
75+
// Since we're streaming the response via chunked encoding, fastly itself doesn't know
76+
// the object size until the streamed data size is > 100 MiB. In this case fastly just
77+
// cuts the connection.
78+
// To avoid issues with caching large files, we disable CDN caching for files that are too
79+
// big.
80+
//
81+
// See:
82+
// https://docs.fastly.com/products/compute-resource-limits#default-limits
83+
// https://www.fastly.com/documentation/guides/full-site-delivery/performance/failure-modes-with-large-objects/
84+
// https://www.fastly.com/documentation/guides/full-site-delivery/caching/segmented-caching/
85+
//
86+
// For now I use the `NoStoreMustRevalidate` policy, the important cache-control statement
87+
// is only the `no-store` part.
88+
//
89+
// Future optimization could be:
90+
// * only forbid fastly to storstore, and browsers still could.
91+
// * implement segmented caching for large files somehow.
92+
if self.0.content_length > FASTLY_CACHE_MAX_OBJECT_SIZE
93+
&& !matches!(cache_policy, CachePolicy::NoStoreMustRevalidate)
94+
{
95+
warn!(
96+
storage_path = self.0.path,
97+
content_length = self.0.content_length,
98+
"Disabling CDN caching for large file"
99+
);
100+
cache_policy = CachePolicy::NoStoreMustRevalidate;
101+
}
102+
61103
let last_modified = LastModified::from(SystemTime::from(self.0.date_updated));
62104

63105
if let Some(if_none_match) = if_none_match
@@ -69,7 +111,7 @@ impl StreamingFile {
69111
// it's generally recommended to repeat caching headers on 304 responses
70112
TypedHeader(etag.clone()),
71113
TypedHeader(last_modified),
72-
Extension(CACHE_POLICY),
114+
Extension(cache_policy),
73115
)
74116
.into_response()
75117
} else {
@@ -81,7 +123,7 @@ impl StreamingFile {
81123
TypedHeader(ContentType::from(self.0.mime)),
82124
TypedHeader(last_modified),
83125
self.0.etag.map(TypedHeader),
84-
Extension(CACHE_POLICY),
126+
Extension(cache_policy),
85127
Body::from_stream(stream),
86128
)
87129
.into_response()
@@ -92,12 +134,18 @@ impl StreamingFile {
92134
#[cfg(test)]
93135
mod tests {
94136
use super::*;
95-
use crate::{storage::CompressionAlgorithm, test::TestEnvironment, web::headers::compute_etag};
137+
use crate::{
138+
storage::CompressionAlgorithm,
139+
test::TestEnvironment,
140+
web::{cache::STATIC_ASSET_CACHE_POLICY, headers::compute_etag},
141+
};
96142
use axum_extra::headers::{ETag, HeaderMapExt as _};
97143
use chrono::Utc;
98144
use http::header::{CACHE_CONTROL, ETAG, LAST_MODIFIED};
99145
use std::{io, rc::Rc};
100146

147+
const CONTENT: &[u8] = b"Hello, world!";
148+
101149
fn streaming_blob(
102150
content: impl Into<Vec<u8>>,
103151
alg: Option<CompressionAlgorithm>,
@@ -114,13 +162,28 @@ mod tests {
114162
}
115163
}
116164

165+
#[test]
166+
fn test_big_file_stream_drops_cache_policy() {
167+
let mut stream = streaming_blob(CONTENT, None);
168+
stream.content_length = FASTLY_CACHE_MAX_OBJECT_SIZE + 1;
169+
170+
let response =
171+
StreamingFile(stream).into_response(None, CachePolicy::ForeverInCdnAndBrowser);
172+
// even though we passed a cache policy in `into_response`, it should be overridden to
173+
// `NoCaching` due to the large size of the file.
174+
let cache = response
175+
.extensions()
176+
.get::<CachePolicy>()
177+
.expect("missing cache response extension");
178+
assert!(matches!(cache, CachePolicy::NoStoreMustRevalidate));
179+
}
180+
117181
#[tokio::test]
118182
async fn test_stream_into_response() -> Result<()> {
119-
const CONTENT: &[u8] = b"Hello, world!";
120183
let etag: ETag = {
121184
// first request normal
122185
let stream = StreamingFile(streaming_blob(CONTENT, None));
123-
let resp = stream.into_response(None);
186+
let resp = stream.into_response(None, STATIC_ASSET_CACHE_POLICY);
124187
assert!(resp.status().is_success());
125188
assert!(resp.headers().get(CACHE_CONTROL).is_none());
126189
let cache = resp
@@ -138,7 +201,7 @@ mod tests {
138201
{
139202
// cached request
140203
let stream = StreamingFile(streaming_blob(CONTENT, None));
141-
let resp = stream.into_response(Some(&if_none_match));
204+
let resp = stream.into_response(Some(&if_none_match), STATIC_ASSET_CACHE_POLICY);
142205
assert_eq!(resp.status(), StatusCode::NOT_MODIFIED);
143206

144207
// cache related headers are repeated on the not-modified response
@@ -172,7 +235,7 @@ mod tests {
172235

173236
file.0.date_updated = now;
174237

175-
let resp = file.into_response(None);
238+
let resp = file.into_response(None, STATIC_ASSET_CACHE_POLICY);
176239
assert!(resp.status().is_success());
177240
assert!(resp.headers().get(CACHE_CONTROL).is_none());
178241
let cache = resp

src/web/rustdoc.rs

Lines changed: 16 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ use crate::{
1111
utils::{self, Dependency},
1212
web::{
1313
MetaData, ReqVersion, axum_cached_redirect,
14-
cache::CachePolicy,
14+
cache::{CachePolicy, STATIC_ASSET_CACHE_POLICY},
1515
crate_details::CrateDetails,
1616
csp::Csp,
1717
error::{AxumNope, AxumResult},
@@ -228,7 +228,7 @@ async fn try_serve_legacy_toolchain_asset(
228228
// which is reached via the new handler.
229229
Ok(StreamingFile::from_path(&storage, &path)
230230
.await?
231-
.into_response(if_none_match))
231+
.into_response(if_none_match, STATIC_ASSET_CACHE_POLICY))
232232
}
233233

234234
/// Handler called for `/:crate` and `/:crate/:version` URLs. Automatically redirects to the docs
@@ -344,7 +344,8 @@ pub(crate) async fn rustdoc_redirector_handler(
344344
)
345345
.await
346346
{
347-
Ok(blob) => Ok(StreamingFile(blob).into_response(if_none_match.as_deref())),
347+
Ok(blob) => Ok(StreamingFile(blob)
348+
.into_response(if_none_match.as_deref(), STATIC_ASSET_CACHE_POLICY)),
348349
Err(err) => {
349350
if !matches!(err.downcast_ref(), Some(AxumNope::ResourceNotFound))
350351
&& !matches!(err.downcast_ref(), Some(crate::storage::PathNotFoundError))
@@ -763,7 +764,9 @@ pub(crate) async fn rustdoc_html_server_handler(
763764
// default asset caching behaviour is `Cache::ForeverInCdnAndBrowser`.
764765
// This is an edge-case when we serve invocation specific static assets under `/latest/`:
765766
// https://github.com/rust-lang/docs.rs/issues/1593
766-
return Ok(StreamingFile(blob).into_response(if_none_match.as_deref()));
767+
return Ok(
768+
StreamingFile(blob).into_response(if_none_match.as_deref(), STATIC_ASSET_CACHE_POLICY)
769+
);
767770
}
768771

769772
let latest_release = krate.latest_release()?;
@@ -982,9 +985,11 @@ pub(crate) async fn json_download_handler(
982985
Some(wanted_compression),
983986
);
984987

988+
let cache_policy = CachePolicy::ForeverInCdn(krate.name.clone().into());
989+
985990
let (mut response, updated_storage_path) = match storage.get_raw_stream(&storage_path).await {
986991
Ok(file) => (
987-
StreamingFile(file).into_response(if_none_match.as_deref()),
992+
StreamingFile(file).into_response(if_none_match.as_deref(), cache_policy),
988993
None,
989994
),
990995
Err(err) if matches!(err.downcast_ref(), Some(crate::storage::PathNotFoundError)) => {
@@ -1003,7 +1008,7 @@ pub(crate) async fn json_download_handler(
10031008
// redirect to that as fallback
10041009
(
10051010
StreamingFile(storage.get_raw_stream(&storage_path).await?)
1006-
.into_response(if_none_match.as_deref()),
1011+
.into_response(if_none_match.as_deref(), cache_policy),
10071012
Some(storage_path),
10081013
)
10091014
} else {
@@ -1013,13 +1018,6 @@ pub(crate) async fn json_download_handler(
10131018
Err(err) => return Err(err.into()),
10141019
};
10151020

1016-
// StreamingFile::into_response automatically set the default cache-policy for
1017-
// static assets (ForeverInCdnAndBrowser).
1018-
// Here we override it with the standard policy for build output.
1019-
response
1020-
.extensions_mut()
1021-
.insert(CachePolicy::ForeverInCdn(krate.name.clone().into()));
1022-
10231021
// set content-disposition to attachment to trigger download in browsers
10241022
// For the attachment filename we can use just the filename without the path,
10251023
// since that already contains all the info.
@@ -1059,15 +1057,10 @@ pub(crate) async fn download_handler(
10591057
let version = &matched_release.release.version;
10601058
let archive_path = rustdoc_archive_path(params.name(), version);
10611059

1062-
let mut response = StreamingFile(storage.get_raw_stream(&archive_path).await?)
1063-
.into_response(if_none_match.as_deref());
1064-
1065-
// StreamingFile::into_response automatically set the default cache-policy for
1066-
// static assets (ForeverInCdnAndBrowser).
1067-
// Here we override it with the standard policy for build output.
1068-
response
1069-
.extensions_mut()
1070-
.insert(CachePolicy::ForeverInCdn(matched_release.name.into()));
1060+
let mut response = StreamingFile(storage.get_raw_stream(&archive_path).await?).into_response(
1061+
if_none_match.as_deref(),
1062+
CachePolicy::ForeverInCdn(matched_release.name.into()),
1063+
);
10711064

10721065
// set content-disposition to attachment to trigger download in browsers
10731066
response.headers_mut().insert(
@@ -1092,7 +1085,7 @@ pub(crate) async fn static_asset_handler(
10921085

10931086
Ok(StreamingFile::from_path(&storage, &storage_path)
10941087
.await?
1095-
.into_response(if_none_match.as_deref()))
1088+
.into_response(if_none_match.as_deref(), STATIC_ASSET_CACHE_POLICY))
10961089
}
10971090

10981091
#[cfg(test)]

src/web/source.rs

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5,15 +5,14 @@ use crate::{
55
storage::PathNotFoundError,
66
web::{
77
MetaData, ReqVersion,
8-
cache::CachePolicy,
8+
cache::{CachePolicy, STATIC_ASSET_CACHE_POLICY},
99
error::{AxumNope, AxumResult},
1010
extractors::{
1111
DbConnection,
1212
rustdoc::{PageKind, RustdocParams},
1313
},
1414
file::StreamingFile,
15-
headers::CanonicalUrl,
16-
headers::IfNoneMatch,
15+
headers::{CanonicalUrl, IfNoneMatch},
1716
match_version,
1817
page::templates::{RenderBrands, RenderRegular, RenderSolid, filters},
1918
},
@@ -285,7 +284,8 @@ pub(crate) async fn source_browser_handler(
285284
let is_text = stream.mime.type_() == mime::TEXT || stream.mime == mime::APPLICATION_JSON;
286285
if !is_text {
287286
// if the file isn't text, serve it directly to the client
288-
let mut response = StreamingFile(stream).into_response(if_none_match.as_deref());
287+
let mut response = StreamingFile(stream)
288+
.into_response(if_none_match.as_deref(), STATIC_ASSET_CACHE_POLICY);
289289
response.headers_mut().typed_insert(canonical_url);
290290
response
291291
.extensions_mut()

src/web/statics.rs

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
use super::{
22
cache::CachePolicy, headers::IfNoneMatch, metrics::request_recorder, routes::get_static,
33
};
4-
use crate::db::mimes::APPLICATION_OPENSEARCH_XML;
4+
use crate::{db::mimes::APPLICATION_OPENSEARCH_XML, web::cache::STATIC_ASSET_CACHE_POLICY};
55
use axum::{
66
Router as AxumRouter,
77
extract::{Extension, Request},
@@ -24,13 +24,11 @@ const RUSTDOC_2021_12_05_CSS: &str =
2424
const RUSTDOC_2025_08_20_CSS: &str =
2525
include_str!(concat!(env!("OUT_DIR"), "/rustdoc-2025-08-20.css"));
2626

27-
const STATIC_CACHE_POLICY: CachePolicy = CachePolicy::ForeverInCdnAndBrowser;
28-
2927
include!(concat!(env!("OUT_DIR"), "/static_etag_map.rs"));
3028

3129
fn build_static_css_response(content: &'static str) -> impl IntoResponse {
3230
(
33-
Extension(STATIC_CACHE_POLICY),
31+
Extension(STATIC_ASSET_CACHE_POLICY),
3432
TypedHeader(ContentType::from(mime::TEXT_CSS)),
3533
content,
3634
)
@@ -43,7 +41,7 @@ async fn set_needed_static_headers(req: Request, next: Next) -> Response {
4341
let mut response = next.run(req).await;
4442

4543
if response.status().is_success() {
46-
response.extensions_mut().insert(STATIC_CACHE_POLICY);
44+
response.extensions_mut().insert(STATIC_ASSET_CACHE_POLICY);
4745
}
4846

4947
if is_opensearch_xml {

0 commit comments

Comments
 (0)