Files
Open-AutoGLM/scripts/check_deployment_en.py
liuyongbin b1ddd98552 fix format
2025-12-14 14:03:41 +08:00

130 lines
3.6 KiB
Python

import argparse
import json
import os
from openai import OpenAI
if __name__ == "__main__":
parser = argparse.ArgumentParser(
description="Tool for checking if model deployment is successful",
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog="""
Usage examples:
python scripts/check_deployment_en.py --base-url http://localhost:8000/v1 --apikey your-key --model autoglm-phone-9b
python scripts/check_deployment_en.py --base-url http://localhost:8000/v1 --apikey your-key --model autoglm-phone-9b --messages-file custom.json
""",
)
parser.add_argument(
"--base-url",
type=str,
required=True,
help="Base URL of the API service, e.g.: http://localhost:8000/v1",
)
parser.add_argument(
"--apikey", type=str, default="EMPTY", help="API key (default: EMPTY)"
)
parser.add_argument(
"--model",
type=str,
required=True,
help="Name of the model to test, e.g.: autoglm-phone-9b",
)
parser.add_argument(
"--messages-file",
type=str,
default="scripts/sample_messages_en.json",
help="Path to JSON file containing test messages (default: scripts/sample_messages_en.json)",
)
parser.add_argument(
"--max-tokens",
type=int,
default=3000,
help="Maximum generation tokens (default: 3000)",
)
parser.add_argument(
"--temperature",
type=float,
default=0.0,
help="Sampling temperature (default: 0.0)",
)
parser.add_argument(
"--top_p",
type=float,
default=0.85,
help="Nucleus sampling parameter (default: 0.85)",
)
parser.add_argument(
"--frequency_penalty",
type=float,
default=0.2,
help="Frequency penalty parameter (default: 0.2)",
)
args = parser.parse_args()
# Read test messages
if not os.path.exists(args.messages_file):
print(f"Error: Message file {args.messages_file} does not exist")
exit(1)
with open(args.messages_file) as f:
messages = json.load(f)
base_url = args.base_url
api_key = args.apikey
model = args.model
print(f"Starting model inference test...")
print(f"Base URL: {base_url}")
print(f"Model: {model}")
print(f"Messages file: {args.messages_file}")
print("=" * 80)
try:
client = OpenAI(
base_url=base_url,
api_key=api_key,
)
response = client.chat.completions.create(
messages=messages,
model=model,
max_tokens=args.max_tokens,
temperature=args.temperature,
top_p=args.top_p,
frequency_penalty=args.frequency_penalty,
stream=False,
)
print("\nModel inference result:")
print("=" * 80)
print(response.choices[0].message.content)
print("=" * 80)
if response.usage:
print(f"\nStatistics:")
print(f" - Prompt tokens: {response.usage.prompt_tokens}")
print(f" - Completion tokens: {response.usage.completion_tokens}")
print(f" - Total tokens: {response.usage.total_tokens}")
print(
f"\nPlease evaluate the above inference result to determine if the model deployment meets expectations."
)
except Exception as e:
print(f"\nError occurred while calling API:")
print(f"Error type: {type(e).__name__}")
print(f"Error message: {str(e)}")
print(
"\nTip: Please check if base_url, api_key and model parameters are correct, and if the service is running."
)
exit(1)