@@ -240,14 +240,87 @@ def _compress_messages(self, messages: List[Dict[str, Any]], llm_model: str, max
240
240
logger .info (f"_compress_messages: { uncompressed_total_token_count } -> { compressed_token_count } " ) # Log the token compression for debugging later
241
241
242
242
if max_iterations <= 0 :
243
- logger .warning (f"_compress_messages: Max iterations reached" )
243
+ logger .warning (f"_compress_messages: Max iterations reached, omitting messages" )
244
+ result = self ._compress_messages_by_omitting_messages (messages , llm_model , max_tokens )
244
245
return result
245
246
246
247
if (compressed_token_count > max_tokens ):
247
248
logger .warning (f"Further token compression is needed: { compressed_token_count } > { max_tokens } " )
248
249
result = self ._compress_messages (messages , llm_model , max_tokens , int (token_threshold / 2 ), max_iterations - 1 )
249
250
250
251
return result
252
+
253
+ def _compress_messages_by_omitting_messages (
254
+ self ,
255
+ messages : List [Dict [str , Any ]],
256
+ llm_model : str ,
257
+ max_tokens : Optional [int ] = 41000 ,
258
+ removal_batch_size : int = 10 ,
259
+ min_messages_to_keep : int = 10
260
+ ) -> List [Dict [str , Any ]]:
261
+ """Compress the messages by omitting messages from the middle.
262
+
263
+ Args:
264
+ messages: List of messages to compress
265
+ llm_model: Model name for token counting
266
+ max_tokens: Maximum allowed tokens
267
+ removal_batch_size: Number of messages to remove per iteration
268
+ min_messages_to_keep: Minimum number of messages to preserve
269
+ """
270
+ if not messages :
271
+ return messages
272
+
273
+ result = messages
274
+ result = self ._remove_meta_messages (result )
275
+
276
+ # Early exit if no compression needed
277
+ initial_token_count = token_counter (model = llm_model , messages = result )
278
+ max_allowed_tokens = max_tokens or (100 * 1000 )
279
+
280
+ if initial_token_count <= max_allowed_tokens :
281
+ return result
282
+
283
+ # Separate system message (assumed to be first) from conversation messages
284
+ system_message = messages [0 ] if messages and messages [0 ].get ('role' ) == 'system' else None
285
+ conversation_messages = result [1 :] if system_message else result
286
+
287
+ safety_limit = 500
288
+ current_token_count = initial_token_count
289
+
290
+ while current_token_count > max_allowed_tokens and safety_limit > 0 :
291
+ safety_limit -= 1
292
+
293
+ if len (conversation_messages ) <= min_messages_to_keep :
294
+ logger .warning (f"Cannot compress further: only { len (conversation_messages )} messages remain (min: { min_messages_to_keep } )" )
295
+ break
296
+
297
+ # Calculate removal strategy based on current message count
298
+ if len (conversation_messages ) > (removal_batch_size * 2 ):
299
+ # Remove from middle, keeping recent and early context
300
+ middle_start = len (conversation_messages ) // 2 - (removal_batch_size // 2 )
301
+ middle_end = middle_start + removal_batch_size
302
+ conversation_messages = conversation_messages [:middle_start ] + conversation_messages [middle_end :]
303
+ else :
304
+ # Remove from earlier messages, preserving recent context
305
+ messages_to_remove = min (removal_batch_size , len (conversation_messages ) // 2 )
306
+ if messages_to_remove > 0 :
307
+ conversation_messages = conversation_messages [messages_to_remove :]
308
+ else :
309
+ # Can't remove any more messages
310
+ break
311
+
312
+ # Recalculate token count
313
+ messages_to_count = ([system_message ] + conversation_messages ) if system_message else conversation_messages
314
+ current_token_count = token_counter (model = llm_model , messages = messages_to_count )
315
+
316
+ # Prepare final result
317
+ final_messages = ([system_message ] + conversation_messages ) if system_message else conversation_messages
318
+ final_token_count = token_counter (model = llm_model , messages = final_messages )
319
+
320
+ logger .info (f"_compress_messages_by_omitting_messages: { initial_token_count } -> { final_token_count } tokens ({ len (messages )} -> { len (final_messages )} messages)" )
321
+
322
+ return final_messages
323
+
251
324
252
325
def add_tool (self , tool_class : Type [Tool ], function_names : Optional [List [str ]] = None , ** kwargs ):
253
326
"""Add a tool to the ThreadManager."""
0 commit comments