Merge pull request #129 from zai-org/update-check-deployment-en

add en deployment check
2025-12-12 13:04:22 +08:00
parent 1fa7348905 d4fc4dd2ad
commit b2e985a790
3 changed files with 156 additions and 0 deletions
--- a/README_en.md
+++ b/README_en.md
@@ -161,6 +161,26 @@ python3 -m vllm.entrypoints.openai.api_server \

 - After successful startup, the model service will be accessible at `http://localhost:8000/v1`. If you deploy the model on a remote server, access it using that server's IP address.

+### 4. Check Model Deployment
+
+After starting the model service, you can use the following command to verify the deployment:
+
+```bash
+python scripts/check_deployment_en.py --base-url http://localhost:8000/v1 --model autoglm-phone-9b-multilingual
+```
+
+If using a third-party model service:
+
+```bash
+# Novita AI
+python scripts/check_deployment_en.py --base-url https://api.novita.ai/openai --model zai-org/autoglm-phone-9b-multilingual --apikey your-novita-api-key
+
+# Parasail
+python scripts/check_deployment_en.py --base-url https://api.parasail.io/v1 --model parasail-auto-glm-9b-multilingual --apikey your-parasail-api-key
+```
+
+Upon successful execution, the script will display the model's inference result and token statistics, helping you confirm whether the model deployment is working correctly.
+
 ## Using AutoGLM

 ### Command Line
--- a/scripts/check_deployment_en.py
+++ b/scripts/check_deployment_en.py
@@ -0,0 +1,115 @@
+import argparse
+import json
+import os
+
+from openai import OpenAI
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(
+        description="Tool for checking if model deployment is successful",
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        epilog="""
+Usage examples:
+  python scripts/check_deployment_en.py --base-url http://localhost:8000/v1 --apikey your-key --model autoglm-phone-9b
+  python scripts/check_deployment_en.py --base-url http://localhost:8000/v1 --apikey your-key --model autoglm-phone-9b --messages-file custom.json
+        """,
+    )
+
+    parser.add_argument(
+        "--base-url",
+        type=str,
+        required=True,
+        help="Base URL of the API service, e.g.: http://localhost:8000/v1",
+    )
+
+    parser.add_argument(
+        "--apikey", type=str, default="EMPTY", help="API key (default: EMPTY)"
+    )
+
+    parser.add_argument(
+        "--model",
+        type=str,
+        required=True,
+        help="Name of the model to test, e.g.: autoglm-phone-9b",
+    )
+
+    parser.add_argument(
+        "--messages-file",
+        type=str,
+        default="scripts/sample_messages_en.json",
+        help="Path to JSON file containing test messages (default: scripts/sample_messages_en.json)",
+    )
+
+    parser.add_argument(
+        "--max-tokens", type=int, default=3000, help="Maximum generation tokens (default: 3000)"
+    )
+
+    parser.add_argument(
+        "--temperature", type=float, default=0.0, help="Sampling temperature (default: 0.0)"
+    )
+
+    parser.add_argument(
+        "--top_p", type=float, default=0.85, help="Nucleus sampling parameter (default: 0.85)"
+    )
+
+    parser.add_argument(
+        "--frequency_penalty", type=float, default=0.2, help="Frequency penalty parameter (default: 0.2)"
+    )
+
+    args = parser.parse_args()
+
+    # Read test messages
+    if not os.path.exists(args.messages_file):
+        print(f"Error: Message file {args.messages_file} does not exist")
+        exit(1)
+
+    with open(args.messages_file) as f:
+        messages = json.load(f)
+
+    base_url = args.base_url
+    api_key = args.apikey
+    model = args.model
+
+    print(f"Starting model inference test...")
+    print(f"Base URL: {base_url}")
+    print(f"Model: {model}")
+    print(f"Messages file: {args.messages_file}")
+    print("=" * 80)
+
+    try:
+        client = OpenAI(
+            base_url=base_url,
+            api_key=api_key,
+        )
+
+        response = client.chat.completions.create(
+            messages=messages,
+            model=model,
+            max_tokens=args.max_tokens,
+            temperature=args.temperature,
+            top_p=args.top_p,
+            frequency_penalty=args.frequency_penalty,
+            stream=False,
+        )
+
+        print("\nModel inference result:")
+        print("=" * 80)
+        print(response.choices[0].message.content)
+        print("=" * 80)
+
+        if response.usage:
+            print(f"\nStatistics:")
+            print(f"  - Prompt tokens: {response.usage.prompt_tokens}")
+            print(f"  - Completion tokens: {response.usage.completion_tokens}")
+            print(f"  - Total tokens: {response.usage.total_tokens}")
+
+        print(f"\nPlease evaluate the above inference result to determine if the model deployment meets expectations.")
+
+    except Exception as e:
+        print(f"\nError occurred while calling API:")
+        print(f"Error type: {type(e).__name__}")
+        print(f"Error message: {str(e)}")
+        print(
+            "\nTip: Please check if base_url, api_key and model parameters are correct, and if the service is running."
+        )
+        exit(1)
--- a/scripts/sample_messages_en.json
+++ b/scripts/sample_messages_en.json