"""Token counting utilities for monitoring LLM usage.""" import tiktoken def count_tokens(text: str, model: str = "gpt-3.5-turbo") -> int: """ Count tokens in a text string using tiktoken. Args: text: Text to count tokens for model: Model name for tokenizer (default: gpt-3.5-turbo) Returns: Number of tokens in the text """ try: encoding = tiktoken.encoding_for_model(model) return len(encoding.encode(text)) except Exception as e: # Fallback: rough estimate (1 token ≈ 4 characters) print(f"Token counting error: {e}, using fallback") return len(text) // 4 def log_token_usage( conversation_id: str, system_tokens: int, history_tokens: int, user_tokens: int, total_tokens: int, output_mode: str, ) -> None: """ Log token usage for monitoring and optimization. Args: conversation_id: UUID of the conversation system_tokens: Tokens in system context history_tokens: Tokens in message history user_tokens: Tokens in current user message total_tokens: Total tokens in prompt output_mode: Current output mode """ log_entry = ( f"[TOKEN_USAGE] conv={conversation_id[:8]} | " f"mode={output_mode} | " f"system={system_tokens} | " f"history={history_tokens} | " f"user={user_tokens} | " f"total={total_tokens}" ) print(log_entry)