Stage 03 — Building LLM-Powered Apps
Building LLM-Powered Apps · Comprehensive Technical Training · ⏱ 8–10 hours
Learning Objectives
By the end of this stage you will be able to:
- Call Claude, OpenAI, and Gemini APIs from Python
- Handle streaming responses for real-time output
- Use function calling (tools) to extend LLM capabilities
- Implement system prompts and multi-turn conversations
- Process images with vision models
- Build error handling and exponential backoff retry logic
- Estimate token usage and API costs
- Create a reusable multi-provider client abstraction
Section 1: API Setup and Authentication
OpenAI API
import os
from openai import OpenAI
# Set OPENAI_API_KEY environment variable
client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
response = client.chat.completions.create(
model="gpt-4o",
messages=[{"role": "user", "content": "Hello!"}],
)
print(response.choices[0].message.content)
Anthropic Claude API
import os
from anthropic import Anthropic
client = Anthropic(api_key=os.getenv("ANTHROPIC_API_KEY"))
response = client.messages.create(
model="claude-3-5-sonnet-20241022",
max_tokens=1024,
messages=[{"role": "user", "content": "Hello!"}],
)
print(response.content[0].text)
Google Gemini API
import os
import google.generativeai as genai
genai.configure(api_key=os.getenv("GOOGLE_API_KEY"))
model = genai.GenerativeModel("gemini-1.5-pro")
response = model.generate_content("Hello!")
print(response.text)
Section 2: Chat Completions and Streaming
Basic Chat with Message History
def chat_with_memory():
messages = []
while True:
user_input = input("You: ")
if user_input.lower() == "quit":
break
messages.append({"role": "user", "content": user_input})
response = client.chat.completions.create(
model="gpt-4o",
messages=messages,
temperature=0.7,
)
assistant_message = response.choices[0].message.content
messages.append({"role": "assistant", "content": assistant_message})
print(f"Assistant: {assistant_message}")
Streaming Responses
def stream_response(prompt):
with client.chat.completions.create(
model="gpt-4o",
messages=[{"role": "user", "content": prompt}],
stream=True,
) as stream:
for text in stream.text_stream:
print(text, end="", flush=True)
print() # newline
# Real-time output appears as tokens arrive
stream_response("Write a haiku about programming")
Section 3: Function Calling (Tool Use)
Function calling allows LLMs to request execution of code functions, enabling dynamic tool integration.
Defining Tools
tools = [
{
"type": "function",
"function": {
"name": "get_weather",
"description": "Get weather for a location",
"parameters": {
"type": "object",
"properties": {
"location": {"type": "string", "description": "City name"},
"unit": {"type": "string", "enum": ["celsius", "fahrenheit"]},
},
"required": ["location"],
},
},
}
]
Handling Tool Calls
def get_weather(location, unit="celsius"):
# Simplified stub
return f"20 degrees {unit} in {location}, sunny"
def process_tool_call(tool_name, tool_input):
if tool_name == "get_weather":
return get_weather(**tool_input)
raise ValueError(f"Unknown tool: {tool_name}")
# Call API with tools
messages = [{"role": "user", "content": "What's the weather in Paris?"}]
response = client.chat.completions.create(
model="gpt-4o",
messages=messages,
tools=tools,
)
# If model requested a tool call
if response.choices[0].message.tool_calls:
for tool_call in response.choices[0].message.tool_calls:
result = process_tool_call(tool_call.function.name, json.loads(tool_call.function.arguments))
# Add assistant response and tool result to messages
messages.append({"role": "assistant", "content": response.choices[0].message.content})
messages.append({
"role": "user",
"content": [
{
"type": "tool_result",
"tool_use_id": tool_call.id,
"content": result,
}
],
})
Section 4: Error Handling and Retry Logic
import time
from openai import RateLimitError, APIConnectionError
def call_with_retries(prompt, max_retries=3, base_delay=1):
for attempt in range(max_retries):
try:
response = client.chat.completions.create(
model="gpt-4o",
messages=[{"role": "user", "content": prompt}],
)
return response.choices[0].message.content
except RateLimitError as e:
if attempt == max_retries - 1:
raise
delay = base_delay * (2 ** attempt) # Exponential backoff
print(f"Rate limited. Waiting {delay}s...")
time.sleep(delay)
except APIConnectionError as e:
if attempt == max_retries - 1:
raise
time.sleep(base_delay * (2 ** attempt))
# Never use bare except or lose error context
result = call_with_retries("Generate a poem")
Section 5: Vision API Calls
import base64
def encode_image(image_path):
with open(image_path, "rb") as image_file:
return base64.standard_b64encode(image_file.read()).decode("utf-8")
def analyze_image(image_path, prompt="Describe this image"):
base64_image = encode_image(image_path)
response = client.chat.completions.create(
model="gpt-4o",
messages=[
{
"role": "user",
"content": [
{"type": "text", "text": prompt},
{
"type": "image_url",
"image_url": {
"url": f"data:image/jpeg;base64,{base64_image}",
},
},
],
}
],
)
return response.choices[0].message.content
# Analyze screenshot or diagram
description = analyze_image("chart.png", "What does this graph show?")
print(description)
Section 6: Token Counting and Cost Estimation
import tiktoken
def count_tokens(text, model="gpt-4o"):
encoding = tiktoken.encoding_for_model(model)
return len(encoding.encode(text))
def estimate_cost(prompt_tokens, completion_tokens, model="gpt-4o"):
# Approximate pricing (check current rates)
pricing = {
"gpt-4o": {"prompt": 0.005, "completion": 0.015}, # per 1K tokens
"gpt-4o-mini": {"prompt": 0.00015, "completion": 0.0006},
"claude-3-5-sonnet": {"prompt": 0.003, "completion": 0.015},
}
rates = pricing.get(model, pricing["gpt-4o"])
cost = (prompt_tokens * rates["prompt"] + completion_tokens * rates["completion"]) / 1000
return cost
# Estimate before making expensive calls
prompt = "Write a 5000-word essay on..."
tokens = count_tokens(prompt)
print(f"This prompt uses ~{tokens} tokens")
print(f"Estimated cost: ${estimate_cost(tokens, 5000):.2f}")
What's Next
Stage 4 focuses on RAG: building retrieval-augmented generation systems that ground LLM responses in your own documents and data sources, eliminating hallucinations through real-time retrieval.
Lock In Founding Member Access
Get full access to every course on TechNodeX — AI, cybersecurity, Python, and everything we build next. $9/month, price locked forever.
Become a Founding Member →