Bot WA AI Cost Optimization
Cara menghemat biaya AI untuk WhatsApp bot. Model selection, caching, token optimization. Panduan lengkap!
AI bisa mahal jika tidak dikelola!
Tanpa optimasi, biaya AI bisa membengkak dengan cepat. Panduan ini membantu kamu tetap hemat tanpa mengorbankan kualitas.
Memahami AI Pricing
💰 PRICING STRUCTURE:
OPENAI:
┌─────────────────┬──────────┬───────────┐
│ Model │ Input │ Output │
├─────────────────┼──────────┼───────────┤
│ GPT-4o │ $2.50/M │ $10.00/M │
│ GPT-4o-mini │ $0.15/M │ $0.60/M │
│ GPT-3.5-turbo │ $0.50/M │ $1.50/M │
└─────────────────┴──────────┴───────────┘
M = 1 Million tokens
ANTHROPIC (Claude):
┌─────────────────┬──────────┬───────────┐
│ Model │ Input │ Output │
├─────────────────┼──────────┼───────────┤
│ Claude 3.5 Son. │ $3.00/M │ $15.00/M │
│ Claude 3 Haiku │ $0.25/M │ $1.25/M │
└─────────────────┴──────────┴───────────┘
GOOGLE (Gemini):
- Gemini Pro: Free tier generous
- Gemini 1.5: $0.35-$1.05/MStrategy 1: Model Selection
Tiered Model Approach:
javascript
// Use different models for different tasks
const MODEL_TIERS = {
simple: 'gpt-4o-mini', // FAQ, simple queries
standard: 'gpt-4o-mini', // Most conversations
complex: 'gpt-4o', // Complex reasoning
creative: 'gpt-4o' // Creative tasks
};
function selectModel(taskType, messageLength, conversationDepth) {
// Simple FAQ or short messages
if (taskType === 'faq' || messageLength < 50) {
return MODEL_TIERS.simple;
}
// Complex multi-turn conversations
if (conversationDepth > 5 || taskType === 'complex') {
return MODEL_TIERS.complex;
}
// Default to standard
return MODEL_TIERS.standard;
}
async function chat(userId, message) {
const context = await getConversationContext(userId);
const taskType = classifyTask(message);
const model = selectModel(
taskType,
message.length,
context.messages.length
);
return await openai.chat.completions.create({
model,
messages: context.messages
});
}Task Classification:
javascript
// Quick classification tanpa API call
function classifyTask(message) {
const lowerMessage = message.toLowerCase();
// Simple queries
const simplePatterns = [
/^(hai|halo|hi|hello)/,
/jam (buka|operasional)/,
/alamat/,
/harga .{0,20}$/,
/^(ya|ok|oke|siap|makasih)/
];
if (simplePatterns.some(p => p.test(lowerMessage))) {
return 'simple';
}
// Complex queries
const complexPatterns = [
/bandingkan|compare/,
/jelaskan.*detail/,
/analisis|analisa/,
/rekomendasi.{20,}/
];
if (complexPatterns.some(p => p.test(lowerMessage))) {
return 'complex';
}
return 'standard';
}Strategy 2: Caching
Response Caching:
javascript
const NodeCache = require('node-cache');
const crypto = require('crypto');
// Cache with 1 hour TTL
const responseCache = new NodeCache({ stdTTL: 3600 });
function generateCacheKey(messages) {
// Hash the last user message + context
const content = messages.map(m => m.content).join('|');
return crypto.createHash('md5').update(content).digest('hex');
}
async function cachedChat(messages) {
const cacheKey = generateCacheKey(messages);
// Check cache
const cached = responseCache.get(cacheKey);
if (cached) {
console.log('Cache hit!');
return cached;
}
// Generate new response
const response = await openai.chat.completions.create({
model: 'gpt-4o-mini',
messages
});
const result = response.choices[0].message.content;
// Cache the response
responseCache.set(cacheKey, result);
return result;
}Semantic Caching:
javascript
// Cache similar questions, not just exact matches
async function semanticCache(userMessage) {
// Get embedding for user message
const embedding = await createEmbedding(userMessage);
// Search for similar cached queries
const similar = await db.queryCache.aggregate([
{
$vectorSearch: {
index: 'cache_vector_index',
path: 'embedding',
queryVector: embedding,
numCandidates: 10,
limit: 1
}
},
{
$match: {
score: { $gte: 0.95 } // Very high similarity
}
}
]).toArray();
if (similar.length > 0) {
console.log('Semantic cache hit!');
return similar[0].response;
}
// Generate new response
const response = await generateResponse(userMessage);
// Cache with embedding
await db.queryCache.insertOne({
query: userMessage,
embedding,
response,
createdAt: new Date()
});
return response;
}Strategy 3: Token Optimization
Shorter System Prompts:
javascript
// ❌ BAD: Long verbose prompt
const BAD_PROMPT = `
Kamu adalah customer service AI yang sangat helpful dan ramah
untuk toko online fashion wanita bernama [BRAND]. Kamu harus
selalu menjawab dengan sopan dan menggunakan bahasa Indonesia
yang baik dan benar. Kamu juga harus menggunakan emoji yang
sesuai untuk membuat percakapan lebih friendly. Jangan lupa
untuk selalu menawarkan bantuan di akhir setiap response...
[500+ words]
`;
// ✅ GOOD: Concise effective prompt
const GOOD_PROMPT = `CS AI untuk [BRAND] (fashion wanita).
Bahasa: Indonesia casual, pakai "Kak".
Tone: Ramah, helpful, emoji secukupnya.
Rules: Jawab singkat, akurat, tawarkan bantuan lain.`;Trim Conversation History:
javascript
function trimConversationHistory(messages, maxTokens = 2000) {
// Estimate tokens (rough: 1 token ≈ 4 chars Indonesian)
const estimateTokens = (text) => Math.ceil(text.length / 4);
let totalTokens = 0;
const trimmedMessages = [];
// Always keep system prompt
const systemMessage = messages.find(m => m.role === 'system');
if (systemMessage) {
totalTokens += estimateTokens(systemMessage.content);
trimmedMessages.push(systemMessage);
}
// Add messages from newest to oldest until limit
const nonSystemMessages = messages
.filter(m => m.role !== 'system')
.reverse();
for (const msg of nonSystemMessages) {
const msgTokens = estimateTokens(msg.content);
if (totalTokens + msgTokens > maxTokens) {
break;
}
totalTokens += msgTokens;
trimmedMessages.unshift(msg);
}
return trimmedMessages;
}Summarize Long Contexts:
javascript
async function summarizeContext(messages) {
if (messages.length < 10) return messages;
// Summarize older messages
const olderMessages = messages.slice(0, -4);
const recentMessages = messages.slice(-4);
const summary = await openai.chat.completions.create({
model: 'gpt-4o-mini', // Use cheap model for summarization
messages: [{
role: 'user',
content: `Summarize this conversation in 2-3 sentences:
${olderMessages.map(m => `${m.role}: ${m.content}`).join('\n')}`
}],
max_tokens: 150
});
return [
{ role: 'system', content: 'Previous context: ' + summary.choices[0].message.content },
...recentMessages
];
}Strategy 4: Hybrid Approach
javascript
// Use rules for simple queries, AI for complex
async function hybridChat(userId, message) {
// 1. Check for exact keyword matches first
const keywordResponse = checkKeywordRules(message);
if (keywordResponse) {
return keywordResponse; // FREE!
}
// 2. Check cache
const cachedResponse = await semanticCache(message);
if (cachedResponse) {
return cachedResponse; // FREE!
}
// 3. Check FAQ database
const faqResponse = await searchFAQ(message);
if (faqResponse && faqResponse.confidence > 0.9) {
return faqResponse.answer; // Minimal cost (just embedding)
}
// 4. Fall back to AI
const taskType = classifyTask(message);
const model = selectModel(taskType);
return await generateAIResponse(userId, message, model);
}
function checkKeywordRules(message) {
const rules = {
'jam buka': 'Kami buka setiap hari jam 08:00-21:00 WIB 🕐',
'alamat': 'Alamat kami: Jl. Contoh No. 123, Jakarta 📍',
'transfer ke': 'Transfer ke BCA 1234567890 a/n [BRAND] 🏦',
// ... more rules
};
const lowerMessage = message.toLowerCase();
for (const [keyword, response] of Object.entries(rules)) {
if (lowerMessage.includes(keyword)) {
return response;
}
}
return null;
}Strategy 5: Rate Limiting & Quotas
javascript
const userQuotas = new Map();
const DAILY_QUOTA = {
free: 20, // 20 AI calls/day
basic: 100, // 100 AI calls/day
premium: 500 // 500 AI calls/day
};
async function checkQuota(userId) {
const user = await db.users.findOne({ oderId
userId });
const tier = user?.tier || 'free';
const quota = DAILY_QUOTA[tier];
const today = new Date().toDateString();
const userUsage = userQuotas.get(userId) || { date: today, count: 0 };
// Reset if new day
if (userUsage.date !== today) {
userUsage.date = today;
userUsage.count = 0;
}
if (userUsage.count >= quota) {
return {
allowed: false,
message: 'Kuota harian tercapai. Upgrade ke premium untuk unlimited!'
};
}
userUsage.count++;
userQuotas.set(userId, userUsage);
return { allowed: true };
}Cost Monitoring
javascript
// Track costs per user/feature
async function trackAPIUsage(userId, model, inputTokens, outputTokens) {
const costs = {
'gpt-4o': { input: 0.0025, output: 0.01 },
'gpt-4o-mini': { input: 0.00015, output: 0.0006 },
'claude-3-5-sonnet': { input: 0.003, output: 0.015 }
};
const modelCost = costs[model] || costs['gpt-4o-mini'];
const totalCost = (inputTokens * modelCost.input + outputTokens * modelCost.output) / 1000;
await db.usageLogs.insertOne({
userId,
model,
inputTokens,
outputTokens,
cost: totalCost,
timestamp: new Date()
});
}
// Daily cost report
async function getDailyCostReport() {
const today = new Date();
today.setHours(0, 0, 0, 0);
const stats = await db.usageLogs.aggregate([
{ $match: { timestamp: { $gte: today } } },
{
$group: {
_id: '$model',
totalCost: { $sum: '$cost' },
totalCalls: { $sum: 1 },
totalInputTokens: { $sum: '$inputTokens' },
totalOutputTokens: { $sum: '$outputTokens' }
}
}
]).toArray();
return stats;
}Cost Comparison
💰 CONTOH BIAYA BULANAN (1000 chats/hari):
TANPA OPTIMASI:
- All GPT-4o
- No caching
- Long prompts
- Full history
≈ $300-500/bulan 😱
DENGAN OPTIMASI:
- Tiered models
- Semantic caching (50% hit)
- Short prompts
- Trimmed history
- Hybrid rules
≈ $30-50/bulan 🎉
SAVINGS: 90%!Best Practices
DO ✅
- Use cheapest model that works
- Cache aggressively
- Trim conversation history
- Monitor costs daily
- Set user quotas
- Use rules for simple queriesDON'T ❌
- Always use most expensive model
- No caching
- Keep full history forever
- Ignore cost monitoring
- Unlimited usage
- AI for everythingFAQ
GPT-4o-mini cukup bagus?
Untuk 80% use cases, ya! Complex reasoning pakai GPT-4o.
Berapa budget reasonable?
Mulai $50-100/bulan untuk small business dengan 500-1000 chats/hari.
Kesimpulan
Optimasi = Same quality, 90% cheaper!
| No Optimization | Optimized |
|---|---|
| $300-500/mo | $30-50/mo |
| Single model | Tiered |
| No caching | Smart caching |