add en deployment check
This commit is contained in:
20
README_en.md
20
README_en.md
@@ -141,6 +141,26 @@ python3 -m vllm.entrypoints.openai.api_server \
|
|||||||
|
|
||||||
- After successful startup, the model service will be accessible at `http://localhost:8000/v1`. If you deploy the model on a remote server, access it using that server's IP address.
|
- After successful startup, the model service will be accessible at `http://localhost:8000/v1`. If you deploy the model on a remote server, access it using that server's IP address.
|
||||||
|
|
||||||
|
### 4. Check Model Deployment
|
||||||
|
|
||||||
|
After starting the model service, you can use the following command to verify the deployment:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
python scripts/check_deployment_en.py --base-url http://localhost:8000/v1 --model autoglm-phone-9b-multilingual
|
||||||
|
```
|
||||||
|
|
||||||
|
If using a third-party model service:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Novita AI
|
||||||
|
python scripts/check_deployment_en.py --base-url https://api.novita.ai/openai --model zai-org/autoglm-phone-9b-multilingual --apikey your-novita-api-key
|
||||||
|
|
||||||
|
# Parasail
|
||||||
|
python scripts/check_deployment_en.py --base-url https://api.parasail.io/v1 --model parasail-auto-glm-9b-multilingual --apikey your-parasail-api-key
|
||||||
|
```
|
||||||
|
|
||||||
|
Upon successful execution, the script will display the model's inference result and token statistics, helping you confirm whether the model deployment is working correctly.
|
||||||
|
|
||||||
## Using AutoGLM
|
## Using AutoGLM
|
||||||
|
|
||||||
### Command Line
|
### Command Line
|
||||||
|
|||||||
115
scripts/check_deployment_en.py
Normal file
115
scripts/check_deployment_en.py
Normal file
@@ -0,0 +1,115 @@
|
|||||||
|
import argparse
|
||||||
|
import json
|
||||||
|
import os
|
||||||
|
|
||||||
|
from openai import OpenAI
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
parser = argparse.ArgumentParser(
|
||||||
|
description="Tool for checking if model deployment is successful",
|
||||||
|
formatter_class=argparse.RawDescriptionHelpFormatter,
|
||||||
|
epilog="""
|
||||||
|
Usage examples:
|
||||||
|
python scripts/check_deployment_en.py --base-url http://localhost:8000/v1 --apikey your-key --model autoglm-phone-9b
|
||||||
|
python scripts/check_deployment_en.py --base-url http://localhost:8000/v1 --apikey your-key --model autoglm-phone-9b --messages-file custom.json
|
||||||
|
""",
|
||||||
|
)
|
||||||
|
|
||||||
|
parser.add_argument(
|
||||||
|
"--base-url",
|
||||||
|
type=str,
|
||||||
|
required=True,
|
||||||
|
help="Base URL of the API service, e.g.: http://localhost:8000/v1",
|
||||||
|
)
|
||||||
|
|
||||||
|
parser.add_argument(
|
||||||
|
"--apikey", type=str, default="EMPTY", help="API key (default: EMPTY)"
|
||||||
|
)
|
||||||
|
|
||||||
|
parser.add_argument(
|
||||||
|
"--model",
|
||||||
|
type=str,
|
||||||
|
required=True,
|
||||||
|
help="Name of the model to test, e.g.: autoglm-phone-9b",
|
||||||
|
)
|
||||||
|
|
||||||
|
parser.add_argument(
|
||||||
|
"--messages-file",
|
||||||
|
type=str,
|
||||||
|
default="scripts/sample_messages_en.json",
|
||||||
|
help="Path to JSON file containing test messages (default: scripts/sample_messages_en.json)",
|
||||||
|
)
|
||||||
|
|
||||||
|
parser.add_argument(
|
||||||
|
"--max-tokens", type=int, default=3000, help="Maximum generation tokens (default: 3000)"
|
||||||
|
)
|
||||||
|
|
||||||
|
parser.add_argument(
|
||||||
|
"--temperature", type=float, default=0.0, help="Sampling temperature (default: 0.0)"
|
||||||
|
)
|
||||||
|
|
||||||
|
parser.add_argument(
|
||||||
|
"--top_p", type=float, default=0.85, help="Nucleus sampling parameter (default: 0.85)"
|
||||||
|
)
|
||||||
|
|
||||||
|
parser.add_argument(
|
||||||
|
"--frequency_penalty", type=float, default=0.2, help="Frequency penalty parameter (default: 0.2)"
|
||||||
|
)
|
||||||
|
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
# Read test messages
|
||||||
|
if not os.path.exists(args.messages_file):
|
||||||
|
print(f"Error: Message file {args.messages_file} does not exist")
|
||||||
|
exit(1)
|
||||||
|
|
||||||
|
with open(args.messages_file) as f:
|
||||||
|
messages = json.load(f)
|
||||||
|
|
||||||
|
base_url = args.base_url
|
||||||
|
api_key = args.apikey
|
||||||
|
model = args.model
|
||||||
|
|
||||||
|
print(f"Starting model inference test...")
|
||||||
|
print(f"Base URL: {base_url}")
|
||||||
|
print(f"Model: {model}")
|
||||||
|
print(f"Messages file: {args.messages_file}")
|
||||||
|
print("=" * 80)
|
||||||
|
|
||||||
|
try:
|
||||||
|
client = OpenAI(
|
||||||
|
base_url=base_url,
|
||||||
|
api_key=api_key,
|
||||||
|
)
|
||||||
|
|
||||||
|
response = client.chat.completions.create(
|
||||||
|
messages=messages,
|
||||||
|
model=model,
|
||||||
|
max_tokens=args.max_tokens,
|
||||||
|
temperature=args.temperature,
|
||||||
|
top_p=args.top_p,
|
||||||
|
frequency_penalty=args.frequency_penalty,
|
||||||
|
stream=False,
|
||||||
|
)
|
||||||
|
|
||||||
|
print("\nModel inference result:")
|
||||||
|
print("=" * 80)
|
||||||
|
print(response.choices[0].message.content)
|
||||||
|
print("=" * 80)
|
||||||
|
|
||||||
|
if response.usage:
|
||||||
|
print(f"\nStatistics:")
|
||||||
|
print(f" - Prompt tokens: {response.usage.prompt_tokens}")
|
||||||
|
print(f" - Completion tokens: {response.usage.completion_tokens}")
|
||||||
|
print(f" - Total tokens: {response.usage.total_tokens}")
|
||||||
|
|
||||||
|
print(f"\nPlease evaluate the above inference result to determine if the model deployment meets expectations.")
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
print(f"\nError occurred while calling API:")
|
||||||
|
print(f"Error type: {type(e).__name__}")
|
||||||
|
print(f"Error message: {str(e)}")
|
||||||
|
print(
|
||||||
|
"\nTip: Please check if base_url, api_key and model parameters are correct, and if the service is running."
|
||||||
|
)
|
||||||
|
exit(1)
|
||||||
21
scripts/sample_messages_en.json
Normal file
21
scripts/sample_messages_en.json
Normal file
File diff suppressed because one or more lines are too long
Reference in New Issue
Block a user