LLM và Ứng Dụng Thực Tế: AI Revolution

Large Language Models (LLM) đang thay đổi cách chúng ta làm việc và sáng tạo. Từ ChatGPT đến Claude, từ customer support đến code generation - hãy cùng khám phá cách áp dụng LLM vào thực tế!

AI and LLM

LLM là gì?

Mô Hình Ngôn Ngữ Lớn

LLM là AI model được train trên hàng tỷ từ để:

🧠 Hiểu ngữ cảnh và ý nghĩa
💬 Sinh text tự nhiên
🎯 Thực hiện tasks phức tạp
🌍 Hỗ trợ đa ngôn ngữ

Các LLM Phổ Biến

# Top LLM models (Dec 2025)
llm_models = {
    "GPT-4": {
        "provider": "OpenAI",
        "parameters": "1.76T",
        "context": "128K tokens",
        "strengths": ["Reasoning", "Coding", "Creative writing"]
    },
    "Claude 3.5 Sonnet": {
        "provider": "Anthropic",
        "parameters": "Unknown",
        "context": "200K tokens",
        "strengths": ["Long context", "Analysis", "Safety"]
    },
    "Gemini Pro": {
        "provider": "Google",
        "parameters": "Unknown",
        "context": "1M tokens",
        "strengths": ["Multimodal", "Search integration"]
    },
    "Llama 3": {
        "provider": "Meta",
        "parameters": "70B",
        "context": "8K tokens",
        "strengths": ["Open source", "Fine-tuning"]
    }
}

LLM Landscape

Use Case 1: Customer Support Chatbot

Build Smart Chatbot với OpenAI

from openai import OpenAI
import os

client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))

class SupportBot:
    def __init__(self):
        self.conversation_history = []
        self.system_prompt = """
        Bạn là trợ lý hỗ trợ khách hàng chuyên nghiệp.
        - Luôn lịch sự và thân thiện
        - Giải quyết vấn đề một cách chi tiết
        - Nếu không biết, hãy thừa nhận và chuyển sang human agent
        """
    
    def chat(self, user_message):
        # Add user message
        self.conversation_history.append({
            "role": "user",
            "content": user_message
        })
        
        # Call GPT-4
        response = client.chat.completions.create(
            model="gpt-4",
            messages=[
                {"role": "system", "content": self.system_prompt},
                *self.conversation_history
            ],
            temperature=0.7,
            max_tokens=500
        )
        
        ai_response = response.choices[0].message.content
        
        # Add to history
        self.conversation_history.append({
            "role": "assistant",
            "content": ai_response
        })
        
        return ai_response

# Usage
bot = SupportBot()
print(bot.chat("Làm sao để reset password?"))
# Response: "Để reset password, bạn có thể làm theo các bước sau..."

# Context được giữ nguyên!
print(bot.chat("Còn cách nào khác không?"))
# Bot nhớ câu hỏi trước! 🎯

Chatbot Interface

Streaming Responses

def stream_chat(user_message):
    response = client.chat.completions.create(
        model="gpt-4",
        messages=[{"role": "user", "content": user_message}],
        stream=True  # Enable streaming! ⚡
    )
    
    print("Bot: ", end="", flush=True)
    for chunk in response:
        if chunk.choices[0].delta.content:
            print(chunk.choices[0].delta.content, end="", flush=True)
    print()

# Real-time response như ChatGPT! 💬
stream_chat("Giải thích quantum computing")

Use Case 2: Code Assistant

AI Pair Programmer

import anthropic

client = anthropic.Anthropic(api_key=os.getenv("ANTHROPIC_API_KEY"))

def code_review(code):
    """Review code và suggest improvements"""
    
    message = client.messages.create(
        model="claude-3-5-sonnet-20250101",
        max_tokens=2000,
        messages=[{
            "role": "user",
            "content": f"""
            Review code này và suggest improvements:
            
            ```python
            {code}
            ```
            
            Focus on:
            1. Performance
            2. Security
            3. Best practices
            4. Potential bugs
            """
        }]
    )
    
    return message.content[0].text

# Example
bad_code = """
def get_user(id):
    query = f"SELECT * FROM users WHERE id = {id}"
    return db.execute(query)
"""

review = code_review(bad_code)
print(review)
# Output: "⚠️ SQL Injection vulnerability detected!
#          Use parameterized queries instead..."

Code Assistant

Generate Code from Description

def generate_code(description, language="python"):
    """Generate code from natural language"""
    
    response = client.chat.completions.create(
        model="gpt-4",
        messages=[{
            "role": "user",
            "content": f"""
            Write {language} code for:
            {description}
            
            Requirements:
            - Clean, readable code
            - Include comments
            - Handle errors
            - Follow best practices
            """
        }],
        temperature=0.3  # Lower temp = more deterministic
    )
    
    return response.choices[0].message.content

# Usage
code = generate_code(
    "FastAPI endpoint để upload image, resize về 800x600, save to S3"
)
print(code)
# Generates complete, working code! 🚀

Use Case 3: Document Analysis

RAG (Retrieval-Augmented Generation)

from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import Chroma
from langchain.chains import RetrievalQA
from langchain.llms import OpenAI

class DocumentQA:
    def __init__(self, pdf_path):
        # Load document
        loader = PyPDFLoader(pdf_path)
        documents = loader.load()
        
        # Split into chunks
        text_splitter = RecursiveCharacterTextSplitter(
            chunk_size=1000,
            chunk_overlap=200
        )
        texts = text_splitter.split_documents(documents)
        
        # Create embeddings và vector store
        embeddings = OpenAIEmbeddings()
        self.vectorstore = Chroma.from_documents(
            texts, 
            embeddings
        )
        
        # Create QA chain
        self.qa = RetrievalQA.from_chain_type(
            llm=OpenAI(temperature=0),
            chain_type="stuff",
            retriever=self.vectorstore.as_retriever()
        )
    
    def ask(self, question):
        return self.qa.run(question)

# Usage
doc_qa = DocumentQA("company_handbook.pdf")

# Ask questions about document
print(doc_qa.ask("Vacation policy là gì?"))
print(doc_qa.ask("Làm sao để request remote work?"))

# AI finds relevant sections và answers! 🎯

Document Analysis

Multi-Document Search

from langchain.chains import ConversationalRetrievalChain
from langchain.memory import ConversationBufferMemory

class SmartDocSearch:
    def __init__(self, document_paths):
        # Load multiple documents
        all_docs = []
        for path in document_paths:
            loader = PyPDFLoader(path)
            all_docs.extend(loader.load())
        
        # Chunking
        splitter = RecursiveCharacterTextSplitter(
            chunk_size=1000,
            chunk_overlap=200
        )
        chunks = splitter.split_documents(all_docs)
        
        # Vector store
        embeddings = OpenAIEmbeddings()
        self.vectorstore = Chroma.from_documents(chunks, embeddings)
        
        # Memory for conversation
        memory = ConversationBufferMemory(
            memory_key="chat_history",
            return_messages=True
        )
        
        # QA with memory
        self.chain = ConversationalRetrievalChain.from_llm(
            llm=OpenAI(temperature=0),
            retriever=self.vectorstore.as_retriever(),
            memory=memory
        )
    
    def search(self, query):
        result = self.chain({"question": query})
        return result["answer"]

# Search across multiple docs
search = SmartDocSearch([
    "policies.pdf",
    "procedures.pdf",
    "guidelines.pdf"
])

print(search.search("Quy trình onboarding mới?"))
# Searches all docs và synthesizes answer! 🔍

Use Case 4: Content Generation

Blog Post Writer

class ContentGenerator:
    def __init__(self):
        self.client = OpenAI()
    
    def generate_blog_post(self, topic, tone="professional"):
        prompt = f"""
        Write a comprehensive blog post about: {topic}
        
        Requirements:
        - Tone: {tone}
        - Length: 1000-1500 words
        - Include: Introduction, 3-4 main sections, conclusion
        - SEO optimized
        - Engaging và informative
        """
        
        response = self.client.chat.completions.create(
            model="gpt-4",
            messages=[
                {"role": "system", "content": "You are an expert content writer."},
                {"role": "user", "content": prompt}
            ],
            temperature=0.8  # More creative
        )
        
        return response.choices[0].message.content
    
    def generate_social_posts(self, blog_content):
        """Generate social media posts from blog"""
        
        prompt = f"""
        From this blog post, create:
        1. Tweet (280 chars max)
        2. LinkedIn post (engaging, professional)
        3. Facebook post (casual, friendly)
        
        Blog content:
        {blog_content[:1000]}...
        """
        
        response = self.client.chat.completions.create(
            model="gpt-4",
            messages=[{"role": "user", "content": prompt}]
        )
        
        return response.choices[0].message.content

# Generate content pipeline
generator = ContentGenerator()

# 1. Generate blog
blog = generator.generate_blog_post("AI in Healthcare", tone="professional")

# 2. Generate social posts
social = generator.generate_social_posts(blog)

print("Blog:", blog)
print("\nSocial Media:", social)

# Complete content marketing pipeline! 📝

Content Creation

Use Case 5: Data Analysis Assistant

SQL Query Generator

class SQLAssistant:
    def __init__(self, schema_info):
        self.client = OpenAI()
        self.schema = schema_info
    
    def natural_to_sql(self, question):
        """Convert natural language to SQL"""
        
        prompt = f"""
        Database schema:
        {self.schema}
        
        User question: {question}
        
        Generate SQL query to answer this question.
        Return only the SQL query, nothing else.
        """
        
        response = self.client.chat.completions.create(
            model="gpt-4",
            messages=[{"role": "user", "content": prompt}],
            temperature=0.1  # Very deterministic for SQL
        )
        
        return response.choices[0].message.content
    
    def explain_query(self, sql_query):
        """Explain SQL query in plain English"""
        
        prompt = f"""
        Explain this SQL query in simple terms:
        
        {sql_query}
        """
        
        response = self.client.chat.completions.create(
            model="gpt-4",
            messages=[{"role": "user", "content": prompt}]
        )
        
        return response.choices[0].message.content

# Setup
schema = """
users (id, name, email, created_at)
orders (id, user_id, total, status, created_at)
products (id, name, price, category)
order_items (order_id, product_id, quantity)
"""

assistant = SQLAssistant(schema)

# Natural language → SQL
query = assistant.natural_to_sql(
    "Show top 10 customers by total order value in 2025"
)
print("SQL:", query)
# Output: SELECT u.name, SUM(o.total) as total_value
#         FROM users u JOIN orders o ON u.id = o.user_id
#         WHERE YEAR(o.created_at) = 2025
#         GROUP BY u.id ORDER BY total_value DESC LIMIT 10

# Explain query
explanation = assistant.explain_query(query)
print("\nExplanation:", explanation)

Data Analysis

Use Case 6: AI Agents

Autonomous Task Executor

from langchain.agents import initialize_agent, Tool
from langchain.agents import AgentType
from langchain.llms import OpenAI
from langchain.utilities import GoogleSearchAPIWrapper

class AIAgent:
    def __init__(self):
        # Define tools agent can use
        search = GoogleSearchAPIWrapper()
        
        tools = [
            Tool(
                name="Search",
                func=search.run,
                description="Search Google for current information"
            ),
            Tool(
                name="Calculator",
                func=self.calculate,
                description="Calculate mathematical expressions"
            ),
            Tool(
                name="Weather",
                func=self.get_weather,
                description="Get current weather for a location"
            )
        ]
        
        # Initialize agent
        self.agent = initialize_agent(
            tools,
            OpenAI(temperature=0),
            agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION,
            verbose=True
        )
    
    def calculate(self, expression):
        return eval(expression)
    
    def get_weather(self, location):
        # Call weather API
        return f"Weather in {location}: Sunny, 25°C"
    
    def run(self, task):
        return self.agent.run(task)

# Create agent
agent = AIAgent()

# Give complex task
result = agent.run("""
What's the weather in Hanoi today? 
If it's above 30°C, calculate how many bottles of water 
a person should drink (formula: temp - 20 / 2).
Also search for tips to stay cool.
""")

# Agent autonomously:
# 1. Calls weather tool
# 2. Uses calculator
# 3. Searches Google
# 4. Synthesizes answer! 🤖
print(result)

AI Agents

Best Practices

1. Prompt Engineering

# ❌ Bad prompt
"Write code"

# ✅ Good prompt
"""
Write Python code that:
1. Reads CSV file
2. Filters rows where age > 18
3. Calculates average salary by department
4. Exports to Excel

Requirements:
- Use pandas
- Handle errors gracefully
- Include comments
- Add logging
"""

2. Token Management

from tiktoken import encoding_for_model

def count_tokens(text, model="gpt-4"):
    encoding = encoding_for_model(model)
    return len(encoding.encode(text))

def truncate_to_limit(text, max_tokens=4000, model="gpt-4"):
    """Truncate text to fit token limit"""
    encoding = encoding_for_model(model)
    tokens = encoding.encode(text)
    
    if len(tokens) <= max_tokens:
        return text
    
    # Truncate
    truncated_tokens = tokens[:max_tokens]
    return encoding.decode(truncated_tokens)

# Usage
long_text = "..." * 10000
safe_text = truncate_to_limit(long_text, max_tokens=2000)
# Prevents token limit errors! ✅

3. Cost Optimization

class CostOptimizedLLM:
    def __init__(self):
        self.cache = {}
    
    def call_llm(self, prompt, model="gpt-3.5-turbo"):
        # Check cache first
        cache_key = f"{model}:{hash(prompt)}"
        if cache_key in self.cache:
            return self.cache[cache_key]
        
        # Use cheaper model for simple tasks
        if len(prompt) < 100:
            model = "gpt-3.5-turbo"  # Cheaper!
        
        response = client.chat.completions.create(
            model=model,
            messages=[{"role": "user", "content": prompt}]
        )
        
        result = response.choices[0].message.content
        
        # Cache result
        self.cache[cache_key] = result
        
        return result

# Save costs with caching! 💰

Cost Optimization

4. Safety & Moderation

def moderate_content(text):
    """Check if content is safe"""
    
    response = client.moderations.create(input=text)
    result = response.results[0]
    
    if result.flagged:
        categories = [
            cat for cat, flagged in result.categories.items() 
            if flagged
        ]
        return False, f"Content flagged: {', '.join(categories)}"
    
    return True, "Content is safe"

# Usage
user_input = "..."
is_safe, message = moderate_content(user_input)

if not is_safe:
    print(f"⚠️ {message}")
else:
    # Process with LLM
    response = call_llm(user_input)

Challenges & Solutions

1. Hallucination

def verify_facts(text):
    """Ask LLM to verify its own output"""
    
    verification_prompt = f"""
    Review this text and identify any potentially false statements:
    
    {text}
    
    For each claim, indicate confidence level (high/medium/low).
    """
    
    response = client.chat.completions.create(
        model="gpt-4",
        messages=[{"role": "user", "content": verification_prompt}]
    )
    
    return response.choices[0].message.content

2. Context Window Limits

def chunk_conversation(messages, max_tokens=4000):
    """Keep only recent messages that fit"""
    
    total_tokens = 0
    kept_messages = []
    
    # Reverse iterate (keep recent first)
    for msg in reversed(messages):
        msg_tokens = count_tokens(msg["content"])
        
        if total_tokens + msg_tokens > max_tokens:
            break
        
        kept_messages.insert(0, msg)
        total_tokens += msg_tokens
    
    return kept_messages

Kết Luận

LLM đang revolutionize cách chúng ta làm việc với:

💬 Intelligent chatbots
👨‍💻 AI coding assistants
📊 Data analysis automation
📝 Content generation
🤖 Autonomous agents

# The future is now
future = {
    "ai_assistants": "everywhere",
    "productivity": "10x increase",
    "creativity": "unlimited",
    "possibilities": "endless"
}

print("Start building with LLMs today! 🚀")

Future of AI

Bạn đang build gì với LLM? Share ý tưởng của bạn! 💬

LLM và Ứng Dụng Thực Tế: Từ Chatbot Đến AI Agent