WhatsApp Bot AI

Bot WA AI Testing & QA

Cara testing bot AI WhatsApp sebelum launch. Unit test, integration test, user acceptance testing. Panduan lengkap!

Balas WA

17 Feb 2026 • 5 min read

Bot WA AI Testing & QA

Test sebelum launch = Avoid disasters!

Bot yang tidak di-test bisa memalukan brand, frustasi customer, dan kehilangan sales. Testing adalah investasi yang worth it.

Testing Levels

🧪 TESTING PYRAMID:

        ┌─────────┐
        │  E2E    │  ← Paling sedikit
        │  Tests  │
       ┌┴─────────┴┐
       │Integration│
       │   Tests   │
      ┌┴───────────┴┐
      │  Unit Tests │  ← Paling banyak
      └─────────────┘

COVERAGE TARGET:
- Unit: 80%+
- Integration: 60%+
- E2E: Core flows

Unit Testing

Test Utility Functions:

javascript

// utils.test.js
const { 
    normalizePhoneNumber, 
    extractKeywords, 
    formatPrice,
    classifyIntent
} = require('./utils');

describe('normalizePhoneNumber', () => {
    test('should normalize Indonesian number with 0', () => {
        expect(normalizePhoneNumber('081234567890')).toBe('6281234567890');
    });
    
    test('should handle +62 format', () => {
        expect(normalizePhoneNumber('+6281234567890')).toBe('6281234567890');
    });
    
    test('should handle number with spaces', () => {
        expect(normalizePhoneNumber('0812 3456 7890')).toBe('6281234567890');
    });
    
    test('should reject invalid number', () => {
        expect(normalizePhoneNumber('12345')).toBeNull();
    });
});

describe('classifyIntent', () => {
    test('should classify price inquiry', () => {
        expect(classifyIntent('berapa harga dress ini?')).toBe('price_inquiry');
        expect(classifyIntent('brp duit?')).toBe('price_inquiry');
    });
    
    test('should classify order intent', () => {
        expect(classifyIntent('mau pesan 2')).toBe('order');
        expect(classifyIntent('order dong')).toBe('order');
    });
    
    test('should classify complaint', () => {
        expect(classifyIntent('produk rusak!')).toBe('complaint');
        expect(classifyIntent('mau refund')).toBe('complaint');
    });
    
    test('should return unknown for unclear intent', () => {
        expect(classifyIntent('halo')).toBe('greeting');
        expect(classifyIntent('asdfghjkl')).toBe('unknown');
    });
});

Test AI Prompt Building:

javascript

// prompt.test.js
const { buildSystemPrompt, buildContextPrompt } = require('./prompt');

describe('buildSystemPrompt', () => {
    test('should include brand name', () => {
        const prompt = buildSystemPrompt({ brandName: 'TestBrand' });
        expect(prompt).toContain('TestBrand');
    });
    
    test('should include all required sections', () => {
        const prompt = buildSystemPrompt({ brandName: 'Test' });
        expect(prompt).toContain('TUGAS');
        expect(prompt).toContain('ATURAN');
        expect(prompt).toContain('TONE');
    });
});

describe('buildContextPrompt', () => {
    test('should include user history', () => {
        const context = {
            userName: 'Budi',
            previousOrders: [{ id: '001', product: 'Dress' }]
        };
        const prompt = buildContextPrompt(context);
        expect(prompt).toContain('Budi');
        expect(prompt).toContain('Dress');
    });
});

Integration Testing

Test Message Handler:

javascript

// messageHandler.test.js
const { handleMessage } = require('./messageHandler');
const { mockOpenAI, mockDatabase } = require('./testUtils');

describe('handleMessage', () => {
    beforeEach(() => {
        mockOpenAI.reset();
        mockDatabase.reset();
    });
    
    test('should handle greeting', async () => {
        mockOpenAI.mockResponse('Hai Kak! Ada yang bisa dibantu?');
        
        const response = await handleMessage('user123', 'halo');
        
        expect(response).toContain('Hai');
        expect(mockOpenAI.calls).toBe(1);
    });
    
    test('should handle product inquiry', async () => {
        mockOpenAI.mockResponse('Dress Brukat harganya Rp 350.000 kak!');
        mockDatabase.mockProduct({ name: 'Dress Brukat', price: 350000 });
        
        const response = await handleMessage('user123', 'harga dress brukat berapa?');
        
        expect(response).toContain('350.000');
    });
    
    test('should fallback on API error', async () => {
        mockOpenAI.mockError(new Error('Rate limit exceeded'));
        
        const response = await handleMessage('user123', 'halo');
        
        expect(response).toContain('coba lagi');
    });
    
    test('should escalate complaints', async () => {
        const response = await handleMessage('user123', 'produk saya rusak! mau refund!');
        
        expect(mockDatabase.escalatedTickets).toHaveLength(1);
        expect(response).toContain('tim kami');
    });
});

Test Conversation Flow:

javascript

// conversationFlow.test.js
describe('Order Flow', () => {
    let userId;
    
    beforeEach(() => {
        userId = 'test-user-' + Date.now();
    });
    
    test('should complete order flow', async () => {
        // Step 1: Greeting
        let response = await handleMessage(userId, 'halo');
        expect(response).toContain('Ada yang bisa dibantu');
        
        // Step 2: Product inquiry
        response = await handleMessage(userId, 'mau lihat dress');
        expect(response).toContain('katalog');
        
        // Step 3: Select product
        response = await handleMessage(userId, 'order dress brukat size M');
        expect(response).toContain('alamat');
        
        // Step 4: Provide address
        response = await handleMessage(userId, 'Jl. Test No. 123, Jakarta');
        expect(response).toContain('konfirmasi');
        
        // Step 5: Confirm
        response = await handleMessage(userId, 'ok');
        expect(response).toContain('terima kasih');
        expect(response).toContain('order');
    });
    
    test('should handle order cancellation mid-flow', async () => {
        await handleMessage(userId, 'mau order dress');
        await handleMessage(userId, 'dress brukat M');
        
        const response = await handleMessage(userId, 'batal deh');
        
        expect(response).toContain('dibatalkan');
        
        // Verify state reset
        const state = await getConversationState(userId);
        expect(state.currentFlow).toBeNull();
    });
});

AI Response Testing

Test Response Quality:

javascript

// aiQuality.test.js
describe('AI Response Quality', () => {
    const testCases = [
        {
            input: 'jam buka toko',
            expectedContains: ['08:00', '21:00'],
            notContains: ['sorry', 'tidak tahu']
        },
        {
            input: 'bisa cod ga?',
            expectedContains: ['COD', 'Jabodetabek'],
            notContains: ['tidak bisa']
        },
        {
            input: 'gimana cara retur?',
            expectedContains: ['7 hari', 'retur'],
            notContains: ['tidak ada']
        }
    ];
    
    testCases.forEach(({ input, expectedContains, notContains }) => {
        test(`should answer "${input}" correctly`, async () => {
            const response = await getAIResponse(input);
            
            expectedContains.forEach(expected => {
                expect(response.toLowerCase()).toContain(expected.toLowerCase());
            });
            
            notContains.forEach(notExpected => {
                expect(response.toLowerCase()).not.toContain(notExpected.toLowerCase());
            });
        });
    });
});

// Test for hallucination
describe('Hallucination Prevention', () => {
    test('should not invent products', async () => {
        const response = await getAIResponse('ada produk XYZ123?');
        
        // Should not confirm non-existent product
        expect(response).not.toMatch(/ada|tersedia|punya/i);
        expect(response).toMatch(/tidak ada|belum tersedia|cek/i);
    });
    
    test('should not invent prices', async () => {
        const response = await getAIResponse('harga tas branded berapa?');
        
        // Should not make up a price for unknown product
        expect(response).not.toMatch(/Rp \d+\.\d{3}/);
    });
});

Test Edge Cases:

javascript

// edgeCases.test.js
describe('Edge Cases', () => {
    test('should handle empty message', async () => {
        const response = await handleMessage('user', '');
        expect(response).toBeTruthy();
    });
    
    test('should handle very long message', async () => {
        const longMessage = 'a'.repeat(5000);
        const response = await handleMessage('user', longMessage);
        expect(response).toBeTruthy();
    });
    
    test('should handle special characters', async () => {
        const response = await handleMessage('user', '👋 Halo! @#$%');
        expect(response).toBeTruthy();
    });
    
    test('should handle rapid messages', async () => {
        const promises = Array(10).fill().map((_, i) => 
            handleMessage('user', `message ${i}`)
        );
        
        const responses = await Promise.all(promises);
        responses.forEach(r => expect(r).toBeTruthy());
    });
    
    test('should handle language mixing', async () => {
        const response = await handleMessage('user', 'Hi mau tanya price list dong thanks');
        expect(response).toBeTruthy();
    });
});

Load Testing

javascript

// loadTest.js
const autocannon = require('autocannon');

async function runLoadTest() {
    const result = await autocannon({
        url: 'http://localhost:3000/webhook',
        connections: 10,
        duration: 60,
        method: 'POST',
        headers: {
            'Content-Type': 'application/json'
        },
        body: JSON.stringify({
            message: 'halo mau tanya harga',
            userId: 'loadtest-user'
        })
    });
    
    console.log('Load Test Results:');
    console.log(`Requests/sec: ${result.requests.average}`);
    console.log(`Latency avg: ${result.latency.average}ms`);
    console.log(`Latency p99: ${result.latency.p99}ms`);
    console.log(`Errors: ${result.errors}`);
    
    // Assertions
    expect(result.requests.average).toBeGreaterThan(50);
    expect(result.latency.p99).toBeLessThan(5000);
    expect(result.errors).toBe(0);
}

UAT Checklist

✅ USER ACCEPTANCE TESTING:

BASIC FUNCTIONS:
☐ Bot responds to greeting
☐ Bot shows menu/options
☐ Bot answers FAQ correctly
☐ Bot handles typos gracefully

PRODUCT FUNCTIONS:
☐ Product search works
☐ Price inquiry accurate
☐ Stock check accurate
☐ Recommendations relevant

ORDER FLOW:
☐ Order creation works
☐ Address input works
☐ Payment instructions correct
☐ Order confirmation sent

SHIPPING:
☐ Tracking inquiry works
☐ Status updates accurate
☐ Delivery notifications sent

ERROR HANDLING:
☐ Unknown queries handled
☐ Error messages friendly
☐ Human escalation works

EDGE CASES:
☐ Long messages handled
☐ Special characters handled
☐ Rapid messages handled
☐ Session timeout handled

Automated Test Pipeline

yaml

# .github/workflows/test.yml
name: Bot Tests

on: [push, pull_request]

jobs:
  test:
    runs-on: ubuntu-latest
    
    steps:
      - uses: actions/checkout@v3
      
      - name: Setup Node.js
        uses: actions/setup-node@v3
        with:
          node-version: '18'
          
      - name: Install dependencies
        run: npm ci
        
      - name: Run unit tests
        run: npm run test:unit
        
      - name: Run integration tests
        run: npm run test:integration
        env:
          OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY_TEST }}
          
      - name: Run AI quality tests
        run: npm run test:ai-quality
        
      - name: Upload coverage
        uses: codecov/codecov-action@v3

Test Data Management

javascript

// testData.js
const testUsers = [
    { id: 'new-user', history: [] },
    { id: 'returning-user', history: [{ orderId: '001' }] },
    { id: 'vip-user', tier: 'vip', totalSpent: 5000000 }
];

const testMessages = {
    greetings: ['halo', 'hi', 'hai kak', 'selamat siang'],
    priceInquiry: ['berapa harganya', 'harga dong', 'brp duit'],
    orderIntent: ['mau pesan', 'order dong', 'beli'],
    complaints: ['produk rusak', 'mau refund', 'kecewa']
};

function getRandomTestMessage(category) {
    const messages = testMessages[category];
    return messages[Math.floor(Math.random() * messages.length)];
}

Best Practices

DO ✅

- Test before every deploy
- Cover happy path + edge cases
- Test AI response quality
- Automate regression tests
- Load test before scale
- UAT with real users

DON'T ❌

- Deploy untested code
- Only test happy path
- Assume AI always correct
- Manual testing only
- Skip load testing
- Skip user testing

FAQ

Berapa coverage yang ideal?

80%+ untuk unit tests. Core flows harus 100% covered.

Perlu test AI responses?

Ya! AI bisa berubah behavior. Test quality dan accuracy regularly.

Kesimpulan

Testing = Confidence to deploy!

No Testing	With Testing
Bugs in production	Bugs caught early
User complaints	Smooth experience
Hotfixes	Stable releases

Build Tested Bot →

Testing Levels

Unit Testing

Test Utility Functions:

Test AI Prompt Building:

Integration Testing

Test Message Handler:

Test Conversation Flow:

AI Response Testing

Test Response Quality:

Test Edge Cases:

Load Testing

UAT Checklist

Automated Test Pipeline

Test Data Management

Best Practices

DO ✅

DON'T ❌

FAQ

Berapa coverage yang ideal?

Perlu test AI responses?

Kesimpulan

Artikel Terkait