@@ -21,7 +21,6 @@ use crate::{
2121 metrics:: otel:: AnyMeterProvider ,
2222 utils:: spawn_blocking,
2323} ;
24- use anyhow:: anyhow;
2524use axum_extra:: headers;
2625use chrono:: { DateTime , Utc } ;
2726use dashmap:: DashMap ;
@@ -40,13 +39,11 @@ use std::{
4039 path:: { Path , PathBuf } ,
4140 str:: FromStr ,
4241 sync:: Arc ,
43- time:: Duration ,
4442} ;
4543use tokio:: {
4644 io:: { AsyncBufRead , AsyncBufReadExt } ,
4745 runtime,
4846 sync:: Mutex ,
49- time:: sleep,
5047} ;
5148use tracing:: { debug, error, info_span, instrument, trace, warn} ;
5249use walkdir:: WalkDir ;
@@ -383,19 +380,29 @@ impl AsyncStorage {
383380 latest_build_id : Option < BuildId > ,
384381 path : & str ,
385382 ) -> Result < bool > {
386- match self
387- . find_in_archive_index ( archive_path, latest_build_id, path)
388- . await
389- {
390- Ok ( file_info) => Ok ( file_info. is_some ( ) ) ,
391- Err ( err) => {
392- if err. downcast_ref :: < PathNotFoundError > ( ) . is_some ( ) {
393- Ok ( false )
394- } else {
395- Err ( err)
383+ for attempt in 0 ..2 {
384+ match self
385+ . find_in_archive_index ( archive_path, latest_build_id, path)
386+ . await
387+ {
388+ Ok ( file_info) => return Ok ( file_info. is_some ( ) ) ,
389+ Err ( err) if err. downcast_ref :: < PathNotFoundError > ( ) . is_some ( ) => {
390+ return Ok ( false ) ;
391+ }
392+ Err ( err) if attempt == 0 => {
393+ warn ! (
394+ ?err,
395+ "error fetching range from archive, purging local index cache and retrying once"
396+ ) ;
397+ self . purge_archive_index_cache ( archive_path, latest_build_id)
398+ . await ?;
399+
400+ continue ;
396401 }
402+ Err ( err) => return Err ( err) ,
397403 }
398404 }
405+ unreachable ! ( "exists_in_archive retry loop exited unexpectedly" ) ;
399406 }
400407
401408 /// get, decompress and materialize an object from store
@@ -557,54 +564,24 @@ impl AsyncStorage {
557564 }
558565
559566 let lock = self . local_index_cache_lock ( & local_index_path) ;
567+ let write_guard = lock. lock ( ) . await ;
560568
561- // At this point we know the index is missing or broken.
562- // Try to become the "downloader" without queueing as a writer.
563- if let Ok ( write_guard) = lock. try_lock ( ) {
564- // Double-check: maybe someone fixed it between our first failure and now.
565- if let Ok ( res) = archive_index:: find_in_file ( & local_index_path, path_in_archive) . await {
566- return Ok ( res) ;
567- }
568-
569- let remote_index_path = format ! ( "{archive_path}.{ARCHIVE_INDEX_FILE_EXTENSION}" ) ;
570-
571- // We are the repairer: download fresh index into place.
572- self . download_archive_index ( & local_index_path, & remote_index_path)
573- . await ?;
574-
575- // Write lock is dropped here (end of scope), so others can proceed.
576- drop ( write_guard) ;
577-
578- // Final attempt: if this still fails, bubble the error.
579- return archive_index:: find_in_file ( local_index_path, path_in_archive) . await ;
569+ // Double-check: maybe someone fixed it between our first failure and now.
570+ if let Ok ( res) = archive_index:: find_in_file ( & local_index_path, path_in_archive) . await {
571+ return Ok ( res) ;
580572 }
581573
582- // Someone else is already downloading/repairing. Don't queue on write(); just wait
583- // a bit and poll the fast path until it becomes readable or we give up.
584- const STEP_MS : u64 = 10 ;
585- const ATTEMPTS : u64 = 50 ; // = 500ms total wait
586- const TOTAL_WAIT_MS : u64 = STEP_MS * ATTEMPTS ;
574+ let remote_index_path = format ! ( "{archive_path}.{ARCHIVE_INDEX_FILE_EXTENSION}" ) ;
587575
588- let mut last_err = None ;
589-
590- for _ in 0 ..ATTEMPTS {
591- sleep ( Duration :: from_millis ( STEP_MS ) ) . await ;
576+ // We are the repairer: download fresh index into place.
577+ self . download_archive_index ( & local_index_path, & remote_index_path)
578+ . await ?;
592579
593- match archive_index:: find_in_file ( local_index_path. clone ( ) , path_in_archive) . await {
594- Ok ( res) => return Ok ( res) ,
595- Err ( err) => {
596- // keep waiting; repair may still be in progress
597- last_err = Some ( err) ;
598- }
599- }
600- }
580+ // Write lock is dropped here (end of scope), so others can proceed.
581+ drop ( write_guard) ;
601582
602- // Still not usable after waiting: return the last error we saw.
603- Err ( last_err
604- . unwrap_or_else ( || anyhow ! ( "archive index unavailable after repair wait" ) )
605- . context ( format ! (
606- "no archive index after waiting for {TOTAL_WAIT_MS}ms"
607- ) ) )
583+ // Final attempt: if this still fails, bubble the error.
584+ return archive_index:: find_in_file ( local_index_path, path_in_archive) . await ;
608585 }
609586
610587 #[ instrument]
0 commit comments