Bot WA AI Testing & QA
Cara testing bot AI WhatsApp sebelum launch. Unit test, integration test, user acceptance testing. Panduan lengkap!
Test sebelum launch = Avoid disasters!
Bot yang tidak di-test bisa memalukan brand, frustasi customer, dan kehilangan sales. Testing adalah investasi yang worth it.
Testing Levels
π§ͺ TESTING PYRAMID:
βββββββββββ
β E2E β β Paling sedikit
β Tests β
ββ΄ββββββββββ΄β
βIntegrationβ
β Tests β
ββ΄ββββββββββββ΄β
β Unit Tests β β Paling banyak
βββββββββββββββ
COVERAGE TARGET:
- Unit: 80%+
- Integration: 60%+
- E2E: Core flowsUnit Testing
Test Utility Functions:
javascript
// utils.test.js
const {
normalizePhoneNumber,
extractKeywords,
formatPrice,
classifyIntent
} = require('./utils');
describe('normalizePhoneNumber', () => {
test('should normalize Indonesian number with 0', () => {
expect(normalizePhoneNumber('081234567890')).toBe('6281234567890');
});
test('should handle +62 format', () => {
expect(normalizePhoneNumber('+6281234567890')).toBe('6281234567890');
});
test('should handle number with spaces', () => {
expect(normalizePhoneNumber('0812 3456 7890')).toBe('6281234567890');
});
test('should reject invalid number', () => {
expect(normalizePhoneNumber('12345')).toBeNull();
});
});
describe('classifyIntent', () => {
test('should classify price inquiry', () => {
expect(classifyIntent('berapa harga dress ini?')).toBe('price_inquiry');
expect(classifyIntent('brp duit?')).toBe('price_inquiry');
});
test('should classify order intent', () => {
expect(classifyIntent('mau pesan 2')).toBe('order');
expect(classifyIntent('order dong')).toBe('order');
});
test('should classify complaint', () => {
expect(classifyIntent('produk rusak!')).toBe('complaint');
expect(classifyIntent('mau refund')).toBe('complaint');
});
test('should return unknown for unclear intent', () => {
expect(classifyIntent('halo')).toBe('greeting');
expect(classifyIntent('asdfghjkl')).toBe('unknown');
});
});Test AI Prompt Building:
javascript
// prompt.test.js
const { buildSystemPrompt, buildContextPrompt } = require('./prompt');
describe('buildSystemPrompt', () => {
test('should include brand name', () => {
const prompt = buildSystemPrompt({ brandName: 'TestBrand' });
expect(prompt).toContain('TestBrand');
});
test('should include all required sections', () => {
const prompt = buildSystemPrompt({ brandName: 'Test' });
expect(prompt).toContain('TUGAS');
expect(prompt).toContain('ATURAN');
expect(prompt).toContain('TONE');
});
});
describe('buildContextPrompt', () => {
test('should include user history', () => {
const context = {
userName: 'Budi',
previousOrders: [{ id: '001', product: 'Dress' }]
};
const prompt = buildContextPrompt(context);
expect(prompt).toContain('Budi');
expect(prompt).toContain('Dress');
});
});Integration Testing
Test Message Handler:
javascript
// messageHandler.test.js
const { handleMessage } = require('./messageHandler');
const { mockOpenAI, mockDatabase } = require('./testUtils');
describe('handleMessage', () => {
beforeEach(() => {
mockOpenAI.reset();
mockDatabase.reset();
});
test('should handle greeting', async () => {
mockOpenAI.mockResponse('Hai Kak! Ada yang bisa dibantu?');
const response = await handleMessage('user123', 'halo');
expect(response).toContain('Hai');
expect(mockOpenAI.calls).toBe(1);
});
test('should handle product inquiry', async () => {
mockOpenAI.mockResponse('Dress Brukat harganya Rp 350.000 kak!');
mockDatabase.mockProduct({ name: 'Dress Brukat', price: 350000 });
const response = await handleMessage('user123', 'harga dress brukat berapa?');
expect(response).toContain('350.000');
});
test('should fallback on API error', async () => {
mockOpenAI.mockError(new Error('Rate limit exceeded'));
const response = await handleMessage('user123', 'halo');
expect(response).toContain('coba lagi');
});
test('should escalate complaints', async () => {
const response = await handleMessage('user123', 'produk saya rusak! mau refund!');
expect(mockDatabase.escalatedTickets).toHaveLength(1);
expect(response).toContain('tim kami');
});
});Test Conversation Flow:
javascript
// conversationFlow.test.js
describe('Order Flow', () => {
let userId;
beforeEach(() => {
userId = 'test-user-' + Date.now();
});
test('should complete order flow', async () => {
// Step 1: Greeting
let response = await handleMessage(userId, 'halo');
expect(response).toContain('Ada yang bisa dibantu');
// Step 2: Product inquiry
response = await handleMessage(userId, 'mau lihat dress');
expect(response).toContain('katalog');
// Step 3: Select product
response = await handleMessage(userId, 'order dress brukat size M');
expect(response).toContain('alamat');
// Step 4: Provide address
response = await handleMessage(userId, 'Jl. Test No. 123, Jakarta');
expect(response).toContain('konfirmasi');
// Step 5: Confirm
response = await handleMessage(userId, 'ok');
expect(response).toContain('terima kasih');
expect(response).toContain('order');
});
test('should handle order cancellation mid-flow', async () => {
await handleMessage(userId, 'mau order dress');
await handleMessage(userId, 'dress brukat M');
const response = await handleMessage(userId, 'batal deh');
expect(response).toContain('dibatalkan');
// Verify state reset
const state = await getConversationState(userId);
expect(state.currentFlow).toBeNull();
});
});AI Response Testing
Test Response Quality:
javascript
// aiQuality.test.js
describe('AI Response Quality', () => {
const testCases = [
{
input: 'jam buka toko',
expectedContains: ['08:00', '21:00'],
notContains: ['sorry', 'tidak tahu']
},
{
input: 'bisa cod ga?',
expectedContains: ['COD', 'Jabodetabek'],
notContains: ['tidak bisa']
},
{
input: 'gimana cara retur?',
expectedContains: ['7 hari', 'retur'],
notContains: ['tidak ada']
}
];
testCases.forEach(({ input, expectedContains, notContains }) => {
test(`should answer "${input}" correctly`, async () => {
const response = await getAIResponse(input);
expectedContains.forEach(expected => {
expect(response.toLowerCase()).toContain(expected.toLowerCase());
});
notContains.forEach(notExpected => {
expect(response.toLowerCase()).not.toContain(notExpected.toLowerCase());
});
});
});
});
// Test for hallucination
describe('Hallucination Prevention', () => {
test('should not invent products', async () => {
const response = await getAIResponse('ada produk XYZ123?');
// Should not confirm non-existent product
expect(response).not.toMatch(/ada|tersedia|punya/i);
expect(response).toMatch(/tidak ada|belum tersedia|cek/i);
});
test('should not invent prices', async () => {
const response = await getAIResponse('harga tas branded berapa?');
// Should not make up a price for unknown product
expect(response).not.toMatch(/Rp \d+\.\d{3}/);
});
});Test Edge Cases:
javascript
// edgeCases.test.js
describe('Edge Cases', () => {
test('should handle empty message', async () => {
const response = await handleMessage('user', '');
expect(response).toBeTruthy();
});
test('should handle very long message', async () => {
const longMessage = 'a'.repeat(5000);
const response = await handleMessage('user', longMessage);
expect(response).toBeTruthy();
});
test('should handle special characters', async () => {
const response = await handleMessage('user', 'π Halo! @#$%');
expect(response).toBeTruthy();
});
test('should handle rapid messages', async () => {
const promises = Array(10).fill().map((_, i) =>
handleMessage('user', `message ${i}`)
);
const responses = await Promise.all(promises);
responses.forEach(r => expect(r).toBeTruthy());
});
test('should handle language mixing', async () => {
const response = await handleMessage('user', 'Hi mau tanya price list dong thanks');
expect(response).toBeTruthy();
});
});Load Testing
javascript
// loadTest.js
const autocannon = require('autocannon');
async function runLoadTest() {
const result = await autocannon({
url: 'http://localhost:3000/webhook',
connections: 10,
duration: 60,
method: 'POST',
headers: {
'Content-Type': 'application/json'
},
body: JSON.stringify({
message: 'halo mau tanya harga',
userId: 'loadtest-user'
})
});
console.log('Load Test Results:');
console.log(`Requests/sec: ${result.requests.average}`);
console.log(`Latency avg: ${result.latency.average}ms`);
console.log(`Latency p99: ${result.latency.p99}ms`);
console.log(`Errors: ${result.errors}`);
// Assertions
expect(result.requests.average).toBeGreaterThan(50);
expect(result.latency.p99).toBeLessThan(5000);
expect(result.errors).toBe(0);
}UAT Checklist
β
USER ACCEPTANCE TESTING:
BASIC FUNCTIONS:
β Bot responds to greeting
β Bot shows menu/options
β Bot answers FAQ correctly
β Bot handles typos gracefully
PRODUCT FUNCTIONS:
β Product search works
β Price inquiry accurate
β Stock check accurate
β Recommendations relevant
ORDER FLOW:
β Order creation works
β Address input works
β Payment instructions correct
β Order confirmation sent
SHIPPING:
β Tracking inquiry works
β Status updates accurate
β Delivery notifications sent
ERROR HANDLING:
β Unknown queries handled
β Error messages friendly
β Human escalation works
EDGE CASES:
β Long messages handled
β Special characters handled
β Rapid messages handled
β Session timeout handledAutomated Test Pipeline
yaml
# .github/workflows/test.yml
name: Bot Tests
on: [push, pull_request]
jobs:
test:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- name: Setup Node.js
uses: actions/setup-node@v3
with:
node-version: '18'
- name: Install dependencies
run: npm ci
- name: Run unit tests
run: npm run test:unit
- name: Run integration tests
run: npm run test:integration
env:
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY_TEST }}
- name: Run AI quality tests
run: npm run test:ai-quality
- name: Upload coverage
uses: codecov/codecov-action@v3Test Data Management
javascript
// testData.js
const testUsers = [
{ id: 'new-user', history: [] },
{ id: 'returning-user', history: [{ orderId: '001' }] },
{ id: 'vip-user', tier: 'vip', totalSpent: 5000000 }
];
const testMessages = {
greetings: ['halo', 'hi', 'hai kak', 'selamat siang'],
priceInquiry: ['berapa harganya', 'harga dong', 'brp duit'],
orderIntent: ['mau pesan', 'order dong', 'beli'],
complaints: ['produk rusak', 'mau refund', 'kecewa']
};
function getRandomTestMessage(category) {
const messages = testMessages[category];
return messages[Math.floor(Math.random() * messages.length)];
}Best Practices
DO β
- Test before every deploy
- Cover happy path + edge cases
- Test AI response quality
- Automate regression tests
- Load test before scale
- UAT with real usersDON'T β
- Deploy untested code
- Only test happy path
- Assume AI always correct
- Manual testing only
- Skip load testing
- Skip user testingFAQ
Berapa coverage yang ideal?
80%+ untuk unit tests. Core flows harus 100% covered.
Perlu test AI responses?
Ya! AI bisa berubah behavior. Test quality dan accuracy regularly.
Kesimpulan
Testing = Confidence to deploy!
| No Testing | With Testing |
|---|---|
| Bugs in production | Bugs caught early |
| User complaints | Smooth experience |
| Hotfixes | Stable releases |