Skip to content

Commit 7266113

Browse files
authored
Merge pull request #770 from tnfssc/fix/context-window-finale
2 parents e02ba93 + e8dbc55 commit 7266113

File tree

1 file changed

+74
-1
lines changed

1 file changed

+74
-1
lines changed

backend/agentpress/thread_manager.py

Lines changed: 74 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -240,14 +240,87 @@ def _compress_messages(self, messages: List[Dict[str, Any]], llm_model: str, max
240240
logger.info(f"_compress_messages: {uncompressed_total_token_count} -> {compressed_token_count}") # Log the token compression for debugging later
241241

242242
if max_iterations <= 0:
243-
logger.warning(f"_compress_messages: Max iterations reached")
243+
logger.warning(f"_compress_messages: Max iterations reached, omitting messages")
244+
result = self._compress_messages_by_omitting_messages(messages, llm_model, max_tokens)
244245
return result
245246

246247
if (compressed_token_count > max_tokens):
247248
logger.warning(f"Further token compression is needed: {compressed_token_count} > {max_tokens}")
248249
result = self._compress_messages(messages, llm_model, max_tokens, int(token_threshold / 2), max_iterations - 1)
249250

250251
return result
252+
253+
def _compress_messages_by_omitting_messages(
254+
self,
255+
messages: List[Dict[str, Any]],
256+
llm_model: str,
257+
max_tokens: Optional[int] = 41000,
258+
removal_batch_size: int = 10,
259+
min_messages_to_keep: int = 10
260+
) -> List[Dict[str, Any]]:
261+
"""Compress the messages by omitting messages from the middle.
262+
263+
Args:
264+
messages: List of messages to compress
265+
llm_model: Model name for token counting
266+
max_tokens: Maximum allowed tokens
267+
removal_batch_size: Number of messages to remove per iteration
268+
min_messages_to_keep: Minimum number of messages to preserve
269+
"""
270+
if not messages:
271+
return messages
272+
273+
result = messages
274+
result = self._remove_meta_messages(result)
275+
276+
# Early exit if no compression needed
277+
initial_token_count = token_counter(model=llm_model, messages=result)
278+
max_allowed_tokens = max_tokens or (100 * 1000)
279+
280+
if initial_token_count <= max_allowed_tokens:
281+
return result
282+
283+
# Separate system message (assumed to be first) from conversation messages
284+
system_message = messages[0] if messages and messages[0].get('role') == 'system' else None
285+
conversation_messages = result[1:] if system_message else result
286+
287+
safety_limit = 500
288+
current_token_count = initial_token_count
289+
290+
while current_token_count > max_allowed_tokens and safety_limit > 0:
291+
safety_limit -= 1
292+
293+
if len(conversation_messages) <= min_messages_to_keep:
294+
logger.warning(f"Cannot compress further: only {len(conversation_messages)} messages remain (min: {min_messages_to_keep})")
295+
break
296+
297+
# Calculate removal strategy based on current message count
298+
if len(conversation_messages) > (removal_batch_size * 2):
299+
# Remove from middle, keeping recent and early context
300+
middle_start = len(conversation_messages) // 2 - (removal_batch_size // 2)
301+
middle_end = middle_start + removal_batch_size
302+
conversation_messages = conversation_messages[:middle_start] + conversation_messages[middle_end:]
303+
else:
304+
# Remove from earlier messages, preserving recent context
305+
messages_to_remove = min(removal_batch_size, len(conversation_messages) // 2)
306+
if messages_to_remove > 0:
307+
conversation_messages = conversation_messages[messages_to_remove:]
308+
else:
309+
# Can't remove any more messages
310+
break
311+
312+
# Recalculate token count
313+
messages_to_count = ([system_message] + conversation_messages) if system_message else conversation_messages
314+
current_token_count = token_counter(model=llm_model, messages=messages_to_count)
315+
316+
# Prepare final result
317+
final_messages = ([system_message] + conversation_messages) if system_message else conversation_messages
318+
final_token_count = token_counter(model=llm_model, messages=final_messages)
319+
320+
logger.info(f"_compress_messages_by_omitting_messages: {initial_token_count} -> {final_token_count} tokens ({len(messages)} -> {len(final_messages)} messages)")
321+
322+
return final_messages
323+
251324

252325
def add_tool(self, tool_class: Type[Tool], function_names: Optional[List[str]] = None, **kwargs):
253326
"""Add a tool to the ThreadManager."""

0 commit comments

Comments
 (0)