Spaces:
Sleeping
Sleeping
| """Token counting utilities for monitoring LLM usage.""" | |
| import tiktoken | |
| def count_tokens(text: str, model: str = "gpt-3.5-turbo") -> int: | |
| """ | |
| Count tokens in a text string using tiktoken. | |
| Args: | |
| text: Text to count tokens for | |
| model: Model name for tokenizer (default: gpt-3.5-turbo) | |
| Returns: | |
| Number of tokens in the text | |
| """ | |
| try: | |
| encoding = tiktoken.encoding_for_model(model) | |
| return len(encoding.encode(text)) | |
| except Exception as e: | |
| # Fallback: rough estimate (1 token ≈ 4 characters) | |
| print(f"Token counting error: {e}, using fallback") | |
| return len(text) // 4 | |
| def log_token_usage( | |
| conversation_id: str, | |
| system_tokens: int, | |
| history_tokens: int, | |
| user_tokens: int, | |
| total_tokens: int, | |
| output_mode: str, | |
| ) -> None: | |
| """ | |
| Log token usage for monitoring and optimization. | |
| Args: | |
| conversation_id: UUID of the conversation | |
| system_tokens: Tokens in system context | |
| history_tokens: Tokens in message history | |
| user_tokens: Tokens in current user message | |
| total_tokens: Total tokens in prompt | |
| output_mode: Current output mode | |
| """ | |
| log_entry = ( | |
| f"[TOKEN_USAGE] conv={conversation_id[:8]} | " | |
| f"mode={output_mode} | " | |
| f"system={system_tokens} | " | |
| f"history={history_tokens} | " | |
| f"user={user_tokens} | " | |
| f"total={total_tokens}" | |
| ) | |
| print(log_entry) | |