Stage 03 — Building LLM-Powered Apps

Building LLM-Powered Apps · Comprehensive Technical Training · ⏱ 8–10 hours

Learning Objectives

By the end of this stage you will be able to:

Call Claude, OpenAI, and Gemini APIs from Python
Handle streaming responses for real-time output
Use function calling (tools) to extend LLM capabilities
Implement system prompts and multi-turn conversations
Process images with vision models
Build error handling and exponential backoff retry logic
Estimate token usage and API costs
Create a reusable multi-provider client abstraction

Section 1: API Setup and Authentication

OpenAI API

import os
from openai import OpenAI

# Set OPENAI_API_KEY environment variable
client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))

response = client.chat.completions.create(
    model="gpt-4o",
    messages=[{"role": "user", "content": "Hello!"}],
)
print(response.choices[0].message.content)

Anthropic Claude API

import os
from anthropic import Anthropic

client = Anthropic(api_key=os.getenv("ANTHROPIC_API_KEY"))

response = client.messages.create(
    model="claude-3-5-sonnet-20241022",
    max_tokens=1024,
    messages=[{"role": "user", "content": "Hello!"}],
)
print(response.content[0].text)

Google Gemini API

import os
import google.generativeai as genai

genai.configure(api_key=os.getenv("GOOGLE_API_KEY"))

model = genai.GenerativeModel("gemini-1.5-pro")
response = model.generate_content("Hello!")
print(response.text)

Section 2: Chat Completions and Streaming

Basic Chat with Message History

def chat_with_memory():
    messages = []

    while True:
        user_input = input("You: ")
        if user_input.lower() == "quit":
            break

        messages.append({"role": "user", "content": user_input})

        response = client.chat.completions.create(
            model="gpt-4o",
            messages=messages,
            temperature=0.7,
        )

        assistant_message = response.choices[0].message.content
        messages.append({"role": "assistant", "content": assistant_message})

        print(f"Assistant: {assistant_message}")

Streaming Responses

def stream_response(prompt):
    with client.chat.completions.create(
        model="gpt-4o",
        messages=[{"role": "user", "content": prompt}],
        stream=True,
    ) as stream:
        for text in stream.text_stream:
            print(text, end="", flush=True)
    print()  # newline

# Real-time output appears as tokens arrive
stream_response("Write a haiku about programming")

Section 3: Function Calling (Tool Use)

Function calling allows LLMs to request execution of code functions, enabling dynamic tool integration.

Defining Tools

tools = [
    {
        "type": "function",
        "function": {
            "name": "get_weather",
            "description": "Get weather for a location",
            "parameters": {
                "type": "object",
                "properties": {
                    "location": {"type": "string", "description": "City name"},
                    "unit": {"type": "string", "enum": ["celsius", "fahrenheit"]},
                },
                "required": ["location"],
            },
        },
    }
]

Handling Tool Calls

def get_weather(location, unit="celsius"):
    # Simplified stub
    return f"20 degrees {unit} in {location}, sunny"

def process_tool_call(tool_name, tool_input):
    if tool_name == "get_weather":
        return get_weather(**tool_input)
    raise ValueError(f"Unknown tool: {tool_name}")

# Call API with tools
messages = [{"role": "user", "content": "What's the weather in Paris?"}]
response = client.chat.completions.create(
    model="gpt-4o",
    messages=messages,
    tools=tools,
)

# If model requested a tool call
if response.choices[0].message.tool_calls:
    for tool_call in response.choices[0].message.tool_calls:
        result = process_tool_call(tool_call.function.name, json.loads(tool_call.function.arguments))

        # Add assistant response and tool result to messages
        messages.append({"role": "assistant", "content": response.choices[0].message.content})
        messages.append({
            "role": "user",
            "content": [
                {
                    "type": "tool_result",
                    "tool_use_id": tool_call.id,
                    "content": result,
                }
            ],
        })

Section 4: Error Handling and Retry Logic

import time
from openai import RateLimitError, APIConnectionError

def call_with_retries(prompt, max_retries=3, base_delay=1):
    for attempt in range(max_retries):
        try:
            response = client.chat.completions.create(
                model="gpt-4o",
                messages=[{"role": "user", "content": prompt}],
            )
            return response.choices[0].message.content
        except RateLimitError as e:
            if attempt == max_retries - 1:
                raise
            delay = base_delay * (2 ** attempt)  # Exponential backoff
            print(f"Rate limited. Waiting {delay}s...")
            time.sleep(delay)
        except APIConnectionError as e:
            if attempt == max_retries - 1:
                raise
            time.sleep(base_delay * (2 ** attempt))

# Never use bare except or lose error context
result = call_with_retries("Generate a poem")

Section 5: Vision API Calls

import base64

def encode_image(image_path):
    with open(image_path, "rb") as image_file:
        return base64.standard_b64encode(image_file.read()).decode("utf-8")

def analyze_image(image_path, prompt="Describe this image"):
    base64_image = encode_image(image_path)

    response = client.chat.completions.create(
        model="gpt-4o",
        messages=[
            {
                "role": "user",
                "content": [
                    {"type": "text", "text": prompt},
                    {
                        "type": "image_url",
                        "image_url": {
                            "url": f"data:image/jpeg;base64,{base64_image}",
                        },
                    },
                ],
            }
        ],
    )
    return response.choices[0].message.content

# Analyze screenshot or diagram
description = analyze_image("chart.png", "What does this graph show?")
print(description)

Section 6: Token Counting and Cost Estimation

import tiktoken

def count_tokens(text, model="gpt-4o"):
    encoding = tiktoken.encoding_for_model(model)
    return len(encoding.encode(text))

def estimate_cost(prompt_tokens, completion_tokens, model="gpt-4o"):
    # Approximate pricing (check current rates)
    pricing = {
        "gpt-4o": {"prompt": 0.005, "completion": 0.015},  # per 1K tokens
        "gpt-4o-mini": {"prompt": 0.00015, "completion": 0.0006},
        "claude-3-5-sonnet": {"prompt": 0.003, "completion": 0.015},
    }

    rates = pricing.get(model, pricing["gpt-4o"])
    cost = (prompt_tokens * rates["prompt"] + completion_tokens * rates["completion"]) / 1000
    return cost

# Estimate before making expensive calls
prompt = "Write a 5000-word essay on..."
tokens = count_tokens(prompt)
print(f"This prompt uses ~{tokens} tokens")
print(f"Estimated cost: ${estimate_cost(tokens, 5000):.2f}")

What's Next

Stage 4 focuses on RAG: building retrieval-augmented generation systems that ground LLM responses in your own documents and data sources, eliminating hallucinations through real-time retrieval.

Lock In Founding Member Access

Get full access to every course on TechNodeX — AI, cybersecurity, Python, and everything we build next. $9/month, price locked forever.

Become a Founding Member →

← Previous Stage Stage 3 of 6 Next: RAG (Retrieval-Augmented Generation) →