1313// limitations under the License.
1414
1515use std:: collections:: { HashMap , HashSet } ;
16- use std:: ops:: Bound ;
16+ use std:: ops:: { Bound , ControlFlow } ;
1717use std:: path:: PathBuf ;
1818use std:: str:: FromStr ;
1919use std:: sync:: { Arc , Mutex , RwLock } ;
@@ -942,11 +942,6 @@ fn is_simple_all_query(search_request: &SearchRequest) -> bool {
942942 return false ;
943943 }
944944
945- // TODO: Update the logic to handle start_timestamp end_timestamp ranges
946- if search_request. start_timestamp . is_some ( ) || search_request. end_timestamp . is_some ( ) {
947- return false ;
948- }
949-
950945 let Ok ( query_ast) = serde_json:: from_str ( & search_request. query_ast ) else {
951946 return false ;
952947 } ;
@@ -1000,6 +995,26 @@ impl CanSplitDoBetter {
1000995 }
1001996 }
1002997
998+ fn is_contained ( split : & SplitIdAndFooterOffsets , search_request : & SearchRequest ) -> bool {
999+ if let Some ( start) = search_request. start_timestamp {
1000+ let Some ( split_start) = split. timestamp_start else {
1001+ return false ;
1002+ } ;
1003+ if split_start < start {
1004+ return false ;
1005+ }
1006+ }
1007+ if let Some ( end) = search_request. end_timestamp {
1008+ let Some ( split_end) = split. timestamp_end else {
1009+ return false ;
1010+ } ;
1011+ if split_end >= end {
1012+ return false ;
1013+ }
1014+ }
1015+ true
1016+ }
1017+
10031018 /// Optimize the order in which splits will get processed based on how it can skip the most
10041019 /// splits.
10051020 ///
@@ -1009,18 +1024,29 @@ impl CanSplitDoBetter {
10091024 /// are the most likely to fill our Top K.
10101025 /// In the future, as split get more metadata per column, we may be able to do this more than
10111026 /// just for timestamp and "unsorted" request.
1012- fn optimize_split_order ( & self , splits : & mut [ SplitIdAndFooterOffsets ] ) {
1027+ ///
1028+ /// To skip splits in time ranged queries, we sort the splits first by whether they are
1029+ /// contained in the search request time range.
1030+ fn optimize_split_order (
1031+ & self ,
1032+ splits : & mut [ SplitIdAndFooterOffsets ] ,
1033+ search_request : & SearchRequest ,
1034+ ) {
10131035 match self {
10141036 CanSplitDoBetter :: SplitIdHigher ( _) => {
10151037 splits. sort_unstable_by ( |a, b| b. split_id . cmp ( & a. split_id ) )
10161038 }
10171039 CanSplitDoBetter :: SplitTimestampHigher ( _)
10181040 | CanSplitDoBetter :: FindTraceIdsAggregation ( _) => {
1019- splits. sort_unstable_by_key ( |split| std :: cmp :: Reverse ( split . timestamp_end ( ) ) )
1020- }
1021- CanSplitDoBetter :: SplitTimestampLower ( _ ) => {
1022- splits . sort_unstable_by_key ( |split| split . timestamp_start ( ) )
1041+ splits. sort_unstable_by_key ( |split| {
1042+ let contained = Self :: is_contained ( split , search_request ) ;
1043+ ( !contained , std :: cmp :: Reverse ( split . timestamp_end ( ) ) )
1044+ } )
10231045 }
1046+ CanSplitDoBetter :: SplitTimestampLower ( _) => splits. sort_unstable_by_key ( |split| {
1047+ let contained = Self :: is_contained ( split, search_request) ;
1048+ ( !contained, split. timestamp_start ( ) )
1049+ } ) ,
10241050 CanSplitDoBetter :: Uninformative => ( ) ,
10251051 }
10261052 }
@@ -1034,7 +1060,7 @@ impl CanSplitDoBetter {
10341060 request : Arc < SearchRequest > ,
10351061 mut splits : Vec < SplitIdAndFooterOffsets > ,
10361062 ) -> Result < Vec < ( SplitIdAndFooterOffsets , SearchRequest ) > , SearchError > {
1037- self . optimize_split_order ( & mut splits) ;
1063+ self . optimize_split_order ( & mut splits, & request ) ;
10381064
10391065 if !is_simple_all_query ( & request) {
10401066 // no optimization opportunity here.
@@ -1047,17 +1073,33 @@ impl CanSplitDoBetter {
10471073 let num_requested_docs = request. start_offset + request. max_hits ;
10481074
10491075 // Calculate the number of splits which are guaranteed to deliver enough documents.
1050- let min_required_splits = splits
1051- . iter ( )
1052- . map ( |split| split. num_docs )
1053- // computing the partial sum
1054- . scan ( 0u64 , |partial_sum : & mut u64 , num_docs_in_split : u64 | {
1055- * partial_sum += num_docs_in_split;
1056- Some ( * partial_sum)
1057- } )
1058- . take_while ( |partial_sum| * partial_sum < num_requested_docs)
1059- . count ( )
1060- + 1 ;
1076+ let min_required_splits = {
1077+ let mut partial_sum = 0u64 ;
1078+
1079+ match splits
1080+ . iter ( )
1081+ // splits are sorted by whether they are contained in the request time range
1082+ . filter ( |split| Self :: is_contained ( split, & request) )
1083+ . map ( |split| split. num_docs )
1084+ . try_fold ( 0usize , |count, num_docs_in_split| {
1085+ partial_sum += num_docs_in_split;
1086+
1087+ if partial_sum >= num_requested_docs {
1088+ ControlFlow :: Break ( count + 1 )
1089+ } else {
1090+ ControlFlow :: Continue ( count + 1 )
1091+ }
1092+ } ) {
1093+ ControlFlow :: Break ( required) => required,
1094+ ControlFlow :: Continue ( _) => {
1095+ // didn't reach num_requested_docs, nothing to optimize.
1096+ return Ok ( splits
1097+ . into_iter ( )
1098+ . map ( |split| ( split, ( * request) . clone ( ) ) )
1099+ . collect :: < Vec < _ > > ( ) ) ;
1100+ }
1101+ }
1102+ } ;
10611103
10621104 // TODO: we maybe want here some deduplication + Cow logic
10631105 let mut split_with_req = splits
0 commit comments