This tutorial shows you how to use DeepSeek-v3.2 API with Python, from basic calls to production-ready patterns. All examples use the OpenAI SDK, so if you've used GPT-4 before, you already know 90% of this.
pip install openai
from openai import OpenAI
client = OpenAI(
api_key="nvai-your-api-key",
base_url="https://aiapi-pro.com/v1"
)
response = client.chat.completions.create(
model="deepseek-v3.2",
messages=[
{"role": "system", "content": "You are a helpful coding assistant."},
{"role": "user", "content": "Write a Python function to merge two sorted lists"}
],
temperature=0.7,
max_tokens=2048
)
print(response.choices[0].message.content)
print(f"Tokens used: {response.usage.total_tokens}")
Streaming is essential for chat applications. It shows output token-by-token instead of waiting for the full response:
stream = client.chat.completions.create(
model="deepseek-v3.2",
messages=[{"role": "user", "content": "Explain async/await in Python"}],
stream=True
)
for chunk in stream:
content = chunk.choices[0].delta.content
if content:
print(content, end="", flush=True)
If you're building a FastAPI or async web app, use the async client:
from openai import AsyncOpenAI
import asyncio
client = AsyncOpenAI(
api_key="nvai-your-api-key",
base_url="https://aiapi-pro.com/v1"
)
async def ask_deepseek(question: str) -> str:
response = await client.chat.completions.create(
model="deepseek-v3.2",
messages=[{"role": "user", "content": question}]
)
return response.choices[0].message.content
# Run multiple requests concurrently
async def main():
tasks = [
ask_deepseek("What is a binary tree?"),
ask_deepseek("What is a hash map?"),
ask_deepseek("What is dynamic programming?"),
]
results = await asyncio.gather(*tasks)
for r in results:
print(r[:100], "...\n")
asyncio.run(main())
from openai import OpenAI, APIError, RateLimitError, APIConnectionError
import time
client = OpenAI(api_key="nvai-your-key", base_url="https://aiapi-pro.com/v1")
def safe_completion(messages, retries=3):
for attempt in range(retries):
try:
return client.chat.completions.create(
model="deepseek-v3.2",
messages=messages,
timeout=30
)
except RateLimitError:
time.sleep(2 ** attempt) # Exponential backoff
except APIConnectionError:
time.sleep(1)
except APIError as e:
print(f"API error: {e}")
break
return None
Use different models for different tasks through the same client:
# Same client, different models based on task
def code_review(code):
return client.chat.completions.create(
model="deepseek-v3.2", # Best for code — $0.20/1M
messages=[{"role": "user", "content": f"Review this code:\n{code}"}]
)
def quick_classify(text):
return client.chat.completions.create(
model="qwen-turbo", # Cheapest — $0.06/1M
messages=[{"role": "user", "content": f"Classify: {text}"}]
)
def free_test(prompt):
return client.chat.completions.create(
model="glm-4.6v-flash", # Completely FREE
messages=[{"role": "user", "content": prompt}]
)
# List all available models
models = client.models.list()
for m in models.data:
print(f"{m.id:25s} by {m.owned_by}")
Free signup. Free model for testing. All code examples on this page work out of the box.
Sign Up Free →