@@ -18,6 +18,12 @@ use axum_extra::{
1818} ;
1919use std:: time:: SystemTime ;
2020use tokio_util:: io:: ReaderStream ;
21+ use tracing:: warn;
22+
23+ // https://docs.fastly.com/products/compute-resource-limits#default-limits
24+ // https://www.fastly.com/documentation/guides/full-site-delivery/performance/failure-modes-with-large-objects/
25+ // https://www.fastly.com/documentation/guides/full-site-delivery/caching/segmented-caching/
26+ const FASTLY_CACHE_MAX_OBJECT_SIZE : usize = 100 * 1024 * 1024 ; // 100 MB
2127
2228#[ derive( Debug ) ]
2329pub ( crate ) struct File ( pub ( crate ) Blob ) ;
@@ -41,9 +47,13 @@ impl File {
4147
4248#[ cfg( test) ]
4349impl File {
44- pub fn into_response ( self , if_none_match : Option < & IfNoneMatch > ) -> AxumResponse {
50+ pub fn into_response (
51+ self ,
52+ if_none_match : Option < & IfNoneMatch > ,
53+ cache_policy : CachePolicy ,
54+ ) -> AxumResponse {
4555 let streaming_blob: StreamingBlob = self . 0 . into ( ) ;
46- StreamingFile ( streaming_blob) . into_response ( if_none_match)
56+ StreamingFile ( streaming_blob) . into_response ( if_none_match, cache_policy )
4757 }
4858}
4959
@@ -56,8 +66,40 @@ impl StreamingFile {
5666 Ok ( StreamingFile ( storage. get_stream ( path) . await ?) )
5767 }
5868
59- pub fn into_response ( self , if_none_match : Option < & IfNoneMatch > ) -> AxumResponse {
60- const CACHE_POLICY : CachePolicy = CachePolicy :: ForeverInCdnAndBrowser ;
69+ pub fn into_response (
70+ self ,
71+ if_none_match : Option < & IfNoneMatch > ,
72+ mut cache_policy : CachePolicy ,
73+ ) -> AxumResponse {
74+ // by default Fastly can only cache objects up to 100 MiB.
75+ // Since we're streaming the response via chunked encoding, fastly itself doesn't know
76+ // the object size until the streamed data size is > 100 MiB. In this case fastly just
77+ // cuts the connection.
78+ // To avoid issues with caching large files, we disable CDN caching for files that are too
79+ // big.
80+ //
81+ // See:
82+ // https://docs.fastly.com/products/compute-resource-limits#default-limits
83+ // https://www.fastly.com/documentation/guides/full-site-delivery/performance/failure-modes-with-large-objects/
84+ // https://www.fastly.com/documentation/guides/full-site-delivery/caching/segmented-caching/
85+ //
86+ // For now I use the `NoStoreMustRevalidate` policy, the important cache-control statement
87+ // is only the `no-store` part.
88+ //
89+ // Future optimization could be:
90+ // * only forbid fastly to storstore, and browsers still could.
91+ // * implement segmented caching for large files somehow.
92+ if self . 0 . content_length > FASTLY_CACHE_MAX_OBJECT_SIZE
93+ && !matches ! ( cache_policy, CachePolicy :: NoStoreMustRevalidate )
94+ {
95+ warn ! (
96+ storage_path = self . 0 . path,
97+ content_length = self . 0 . content_length,
98+ "Disabling CDN caching for large file"
99+ ) ;
100+ cache_policy = CachePolicy :: NoStoreMustRevalidate ;
101+ }
102+
61103 let last_modified = LastModified :: from ( SystemTime :: from ( self . 0 . date_updated ) ) ;
62104
63105 if let Some ( if_none_match) = if_none_match
@@ -69,7 +111,7 @@ impl StreamingFile {
69111 // it's generally recommended to repeat caching headers on 304 responses
70112 TypedHeader ( etag. clone ( ) ) ,
71113 TypedHeader ( last_modified) ,
72- Extension ( CACHE_POLICY ) ,
114+ Extension ( cache_policy ) ,
73115 )
74116 . into_response ( )
75117 } else {
@@ -81,7 +123,7 @@ impl StreamingFile {
81123 TypedHeader ( ContentType :: from ( self . 0 . mime ) ) ,
82124 TypedHeader ( last_modified) ,
83125 self . 0 . etag . map ( TypedHeader ) ,
84- Extension ( CACHE_POLICY ) ,
126+ Extension ( cache_policy ) ,
85127 Body :: from_stream ( stream) ,
86128 )
87129 . into_response ( )
@@ -92,12 +134,18 @@ impl StreamingFile {
92134#[ cfg( test) ]
93135mod tests {
94136 use super :: * ;
95- use crate :: { storage:: CompressionAlgorithm , test:: TestEnvironment , web:: headers:: compute_etag} ;
137+ use crate :: {
138+ storage:: CompressionAlgorithm ,
139+ test:: TestEnvironment ,
140+ web:: { cache:: STATIC_ASSET_CACHE_POLICY , headers:: compute_etag} ,
141+ } ;
96142 use axum_extra:: headers:: { ETag , HeaderMapExt as _} ;
97143 use chrono:: Utc ;
98144 use http:: header:: { CACHE_CONTROL , ETAG , LAST_MODIFIED } ;
99145 use std:: { io, rc:: Rc } ;
100146
147+ const CONTENT : & [ u8 ] = b"Hello, world!" ;
148+
101149 fn streaming_blob (
102150 content : impl Into < Vec < u8 > > ,
103151 alg : Option < CompressionAlgorithm > ,
@@ -114,13 +162,28 @@ mod tests {
114162 }
115163 }
116164
165+ #[ test]
166+ fn test_big_file_stream_drops_cache_policy ( ) {
167+ let mut stream = streaming_blob ( CONTENT , None ) ;
168+ stream. content_length = FASTLY_CACHE_MAX_OBJECT_SIZE + 1 ;
169+
170+ let response =
171+ StreamingFile ( stream) . into_response ( None , CachePolicy :: ForeverInCdnAndBrowser ) ;
172+ // even though we passed a cache policy in `into_response`, it should be overridden to
173+ // `NoCaching` due to the large size of the file.
174+ let cache = response
175+ . extensions ( )
176+ . get :: < CachePolicy > ( )
177+ . expect ( "missing cache response extension" ) ;
178+ assert ! ( matches!( cache, CachePolicy :: NoStoreMustRevalidate ) ) ;
179+ }
180+
117181 #[ tokio:: test]
118182 async fn test_stream_into_response ( ) -> Result < ( ) > {
119- const CONTENT : & [ u8 ] = b"Hello, world!" ;
120183 let etag: ETag = {
121184 // first request normal
122185 let stream = StreamingFile ( streaming_blob ( CONTENT , None ) ) ;
123- let resp = stream. into_response ( None ) ;
186+ let resp = stream. into_response ( None , STATIC_ASSET_CACHE_POLICY ) ;
124187 assert ! ( resp. status( ) . is_success( ) ) ;
125188 assert ! ( resp. headers( ) . get( CACHE_CONTROL ) . is_none( ) ) ;
126189 let cache = resp
@@ -138,7 +201,7 @@ mod tests {
138201 {
139202 // cached request
140203 let stream = StreamingFile ( streaming_blob ( CONTENT , None ) ) ;
141- let resp = stream. into_response ( Some ( & if_none_match) ) ;
204+ let resp = stream. into_response ( Some ( & if_none_match) , STATIC_ASSET_CACHE_POLICY ) ;
142205 assert_eq ! ( resp. status( ) , StatusCode :: NOT_MODIFIED ) ;
143206
144207 // cache related headers are repeated on the not-modified response
@@ -172,7 +235,7 @@ mod tests {
172235
173236 file. 0 . date_updated = now;
174237
175- let resp = file. into_response ( None ) ;
238+ let resp = file. into_response ( None , STATIC_ASSET_CACHE_POLICY ) ;
176239 assert ! ( resp. status( ) . is_success( ) ) ;
177240 assert ! ( resp. headers( ) . get( CACHE_CONTROL ) . is_none( ) ) ;
178241 let cache = resp
0 commit comments