Indexes & Performance

Database indexing strategies and performance optimization

Indexes & Performance

Strategic indexing is crucial for Smart Shelf's performance, especially as inventory data grows. This section covers our indexing strategy and performance optimization techniques.

Index Types and Usage

Single Column Indexes

Primary indexes for frequently queried columns:

-- Product identification and search
CREATE INDEX idx_products_sku ON products(sku);
CREATE INDEX idx_products_name ON products(name);
CREATE INDEX idx_products_category ON products(category_id);
CREATE INDEX idx_products_active ON products(is_active);

-- User management
CREATE INDEX idx_users_email ON users(email);
CREATE INDEX idx_users_role ON users(role);
CREATE INDEX idx_users_active ON users(is_active);

-- Inventory tracking
CREATE INDEX idx_inventory_product ON inventory(product_id);
CREATE INDEX idx_inventory_warehouse ON inventory(warehouse_id);
CREATE INDEX idx_inventory_available ON inventory(quantity_available);

-- Order management
CREATE INDEX idx_purchase_orders_status ON purchase_orders(status);
CREATE INDEX idx_purchase_orders_supplier ON purchase_orders(supplier_id);
CREATE INDEX idx_sales_orders_customer ON sales_orders(customer_id);
CREATE INDEX idx_sales_orders_status ON sales_orders(status);

Composite Indexes

Multi-column indexes for complex query patterns:

-- Inventory queries by product and warehouse
CREATE INDEX idx_inventory_product_warehouse ON inventory(product_id, warehouse_id);

-- Stock movements with date filtering
CREATE INDEX idx_stock_movements_product_date ON stock_movements(product_id, created_at);
CREATE INDEX idx_stock_movements_warehouse_date ON stock_movements(warehouse_id, created_at);

-- Order items relationships
CREATE INDEX idx_purchase_order_items_order_product ON purchase_order_items(purchase_order_id, product_id);
CREATE INDEX idx_sales_order_items_order_product ON sales_order_items(sales_order_id, product_id);

-- User sessions for security
CREATE INDEX idx_user_sessions_user_expires ON user_sessions(user_id, expires_at);

-- Audit trail queries
CREATE INDEX idx_audit_logs_table_record ON audit_logs(table_name, record_id);
CREATE INDEX idx_audit_logs_user_date ON audit_logs(user_id, created_at);

Partial Indexes

Conditional indexes for specific use cases:

-- Low stock alerts
CREATE INDEX idx_inventory_low_stock ON inventory(quantity_on_hand, quantity_allocated) 
WHERE quantity_on_hand <= quantity_allocated;

-- Active products only
CREATE INDEX idx_active_products_name ON products(name) 
WHERE is_active = true AND deleted_at IS NULL;

-- Pending orders
CREATE INDEX idx_pending_purchase_orders ON purchase_orders(created_at) 
WHERE status IN ('draft', 'sent', 'confirmed');

-- Overdue invoices
CREATE INDEX idx_overdue_invoices ON invoices(due_date, customer_id) 
WHERE status NOT IN ('paid', 'cancelled') AND due_date < CURRENT_DATE;

-- Recent stock movements
CREATE INDEX idx_recent_stock_movements ON stock_movements(product_id, created_at) 
WHERE created_at >= CURRENT_DATE - INTERVAL '30 days';

Full-Text Search Indexes

GIN indexes for text search capabilities:

-- Product search across name and description
CREATE INDEX idx_products_search ON products 
USING GIN(to_tsvector('english', name || ' ' || COALESCE(description, '')));

-- Customer search
CREATE INDEX idx_customers_search ON customers 
USING GIN(to_tsvector('english', name || ' ' || COALESCE(email, '')));

-- Supplier search
CREATE INDEX idx_suppliers_search ON suppliers 
USING GIN(to_tsvector('english', name || ' ' || COALESCE(contact_name, '')));

-- Example usage:
-- SELECT * FROM products 
-- WHERE to_tsvector('english', name || ' ' || COALESCE(description, '')) 
--       @@ plainto_tsquery('english', 'widget bluetooth');

JSONB Indexes

GIN indexes for JSON data queries:

-- Product tags and custom fields
CREATE INDEX idx_products_tags ON products USING GIN(tags);
CREATE INDEX idx_products_custom_fields ON products USING GIN(custom_fields);

-- User preferences
CREATE INDEX idx_users_preferences ON users USING GIN(preferences);

-- Warehouse operating hours
CREATE INDEX idx_warehouses_hours ON warehouses USING GIN(operating_hours);

-- Example queries:
-- SELECT * FROM products WHERE tags @> '["electronics"]';
-- SELECT * FROM products WHERE custom_fields @> '{"color": "red"}';
-- SELECT * FROM users WHERE preferences @> '{"theme": "dark"}';

Performance Optimization Strategies

Table Partitioning

Time-Based Partitioning

Large transaction tables partitioned by time:

-- Stock movements partitioned by month
CREATE TABLE stock_movements (
    id UUID NOT NULL DEFAULT gen_random_uuid(),
    product_id UUID NOT NULL,
    warehouse_id UUID NOT NULL,
    created_at TIMESTAMPTZ NOT NULL DEFAULT now(),
    -- other columns
) PARTITION BY RANGE (created_at);

-- Create monthly partitions
CREATE TABLE stock_movements_y2024m01 PARTITION OF stock_movements
FOR VALUES FROM ('2024-01-01') TO ('2024-02-01');

CREATE TABLE stock_movements_y2024m02 PARTITION OF stock_movements
FOR VALUES FROM ('2024-02-01') TO ('2024-03-01');

-- Automatic partition creation function
CREATE OR REPLACE FUNCTION create_monthly_partition(table_name text, start_date date)
RETURNS void AS $$
DECLARE
    partition_name text;
    end_date date;
BEGIN
    partition_name := table_name || '_y' || to_char(start_date, 'YYYY') || 'm' || to_char(start_date, 'MM');
    end_date := start_date + interval '1 month';
    
    EXECUTE format('CREATE TABLE %I PARTITION OF %I FOR VALUES FROM (%L) TO (%L)',
                   partition_name, table_name, start_date, end_date);
                   
    EXECUTE format('CREATE INDEX idx_%s_created_at ON %I (created_at)',
                   partition_name, partition_name);
END;
$$ LANGUAGE plpgsql;

Audit Log Partitioning

-- Audit logs partitioned by month
CREATE TABLE audit_logs (
    id UUID NOT NULL DEFAULT gen_random_uuid(),
    created_at TIMESTAMPTZ NOT NULL DEFAULT now(),
    -- other columns
) PARTITION BY RANGE (created_at);

-- Create partitions with appropriate indexes
CREATE TABLE audit_logs_y2024m01 PARTITION OF audit_logs
FOR VALUES FROM ('2024-01-01') TO ('2024-02-01');

CREATE INDEX idx_audit_logs_y2024m01_table_record 
ON audit_logs_y2024m01(table_name, record_id);

CREATE INDEX idx_audit_logs_y2024m01_user 
ON audit_logs_y2024m01(user_id);

Materialized Views

Pre-computed views for complex analytics:

-- Product performance summary
CREATE MATERIALIZED VIEW product_performance_summary AS
SELECT 
    p.id,
    p.name,
    p.sku,
    p.category_id,
    c.name AS category_name,
    
    -- Current inventory
    COALESCE(SUM(i.quantity_on_hand), 0) AS total_stock,
    COALESCE(SUM(i.quantity_available), 0) AS available_stock,
    COALESCE(AVG(i.average_cost), 0) AS avg_cost,
    COUNT(DISTINCT i.warehouse_id) AS warehouses_stocked,
    
    -- Sales performance (last 12 months)
    COALESCE(sales_data.total_sold, 0) AS total_sold_12m,
    COALESCE(sales_data.revenue, 0) AS revenue_12m,
    sales_data.last_sale_date,
    
    -- Purchase performance (last 12 months)
    COALESCE(purchase_data.total_purchased, 0) AS total_purchased_12m,
    COALESCE(purchase_data.purchase_cost, 0) AS purchase_cost_12m,
    purchase_data.last_purchase_date,
    
    -- Status indicators
    CASE 
        WHEN COALESCE(SUM(i.quantity_on_hand), 0) = 0 THEN 'out_of_stock'
        WHEN COALESCE(SUM(i.quantity_available), 0) <= p.reorder_point THEN 'low_stock'
        ELSE 'in_stock'
    END AS stock_status,
    
    now() AS last_updated
    
FROM products p
LEFT JOIN categories c ON p.category_id = c.id
LEFT JOIN inventory i ON p.id = i.product_id
LEFT JOIN (
    -- Sales data subquery
    SELECT 
        soi.product_id,
        SUM(soi.quantity_shipped) AS total_sold,
        SUM(soi.quantity_shipped * soi.unit_price) AS revenue,
        MAX(so.actual_ship_date) AS last_sale_date
    FROM sales_order_items soi
    JOIN sales_orders so ON soi.sales_order_id = so.id
    WHERE so.created_at >= CURRENT_DATE - INTERVAL '12 months'
    AND so.status NOT IN ('cancelled', 'draft')
    GROUP BY soi.product_id
) sales_data ON p.id = sales_data.product_id
LEFT JOIN (
    -- Purchase data subquery
    SELECT 
        poi.product_id,
        SUM(poi.quantity_received) AS total_purchased,
        SUM(poi.quantity_received * poi.unit_cost) AS purchase_cost,
        MAX(po.delivery_date) AS last_purchase_date
    FROM purchase_order_items poi
    JOIN purchase_orders po ON poi.purchase_order_id = po.id
    WHERE po.created_at >= CURRENT_DATE - INTERVAL '12 months'
    AND po.status NOT IN ('cancelled', 'draft')
    GROUP BY poi.product_id
) purchase_data ON p.id = purchase_data.product_id

WHERE p.is_active = true AND p.deleted_at IS NULL
GROUP BY p.id, p.name, p.sku, p.category_id, c.name, p.reorder_point,
         sales_data.total_sold, sales_data.revenue, sales_data.last_sale_date,
         purchase_data.total_purchased, purchase_data.purchase_cost, purchase_data.last_purchase_date;

-- Indexes for materialized view
CREATE UNIQUE INDEX idx_product_performance_summary_id 
ON product_performance_summary(id);

CREATE INDEX idx_product_performance_summary_category 
ON product_performance_summary(category_id);

CREATE INDEX idx_product_performance_summary_stock_status 
ON product_performance_summary(stock_status);

Warehouse Performance Summary

-- Warehouse performance metrics
CREATE MATERIALIZED VIEW warehouse_performance_summary AS
SELECT 
    w.id,
    w.name,
    w.code,
    
    -- Inventory metrics
    COUNT(DISTINCT i.product_id) AS unique_products,
    COALESCE(SUM(i.quantity_on_hand), 0) AS total_units,
    COALESCE(SUM(i.quantity_on_hand * i.average_cost), 0) AS inventory_value,
    
    -- Capacity utilization
    w.capacity_limit,
    CASE 
        WHEN w.capacity_limit IS NOT NULL AND w.capacity_limit > 0 
        THEN ROUND((COALESCE(SUM(i.quantity_on_hand), 0) * 100.0 / w.capacity_limit), 2)
        ELSE NULL 
    END AS capacity_utilization_pct,
    
    -- Activity metrics (last 30 days)
    COALESCE(activity.movements_in, 0) AS movements_in_30d,
    COALESCE(activity.movements_out, 0) AS movements_out_30d,
    COALESCE(activity.adjustments, 0) AS adjustments_30d,
    
    -- Order metrics
    COALESCE(orders.pending_shipments, 0) AS pending_shipments,
    COALESCE(orders.pending_receipts, 0) AS pending_receipts,
    
    now() AS last_updated
    
FROM warehouses w
LEFT JOIN inventory i ON w.id = i.warehouse_id
LEFT JOIN (
    SELECT 
        warehouse_id,
        SUM(CASE WHEN movement_type = 'in' THEN quantity ELSE 0 END) AS movements_in,
        SUM(CASE WHEN movement_type = 'out' THEN quantity ELSE 0 END) AS movements_out,
        SUM(CASE WHEN movement_type = 'adjustment' THEN ABS(quantity) ELSE 0 END) AS adjustments
    FROM stock_movements
    WHERE created_at >= CURRENT_DATE - INTERVAL '30 days'
    GROUP BY warehouse_id
) activity ON w.id = activity.warehouse_id
LEFT JOIN (
    SELECT 
        warehouse_id,
        COUNT(DISTINCT so.id) AS pending_shipments,
        COUNT(DISTINCT po.id) AS pending_receipts
    FROM warehouses w2
    LEFT JOIN sales_orders so ON w2.id = so.warehouse_id 
        AND so.status IN ('confirmed', 'picking', 'packed')
    LEFT JOIN purchase_orders po ON w2.id = po.warehouse_id 
        AND po.status IN ('sent', 'confirmed', 'partial')
    GROUP BY warehouse_id
) orders ON w.id = orders.warehouse_id

WHERE w.is_active = true
GROUP BY w.id, w.name, w.code, w.capacity_limit,
         activity.movements_in, activity.movements_out, activity.adjustments,
         orders.pending_shipments, orders.pending_receipts;

-- Indexes for warehouse performance
CREATE UNIQUE INDEX idx_warehouse_performance_summary_id 
ON warehouse_performance_summary(id);

CREATE INDEX idx_warehouse_performance_summary_utilization 
ON warehouse_performance_summary(capacity_utilization_pct);

Refresh Strategies

Automated refresh for materialized views:

-- Function to refresh materialized views
CREATE OR REPLACE FUNCTION refresh_performance_views()
RETURNS void AS $$
BEGIN
    REFRESH MATERIALIZED VIEW CONCURRENTLY product_performance_summary;
    REFRESH MATERIALIZED VIEW CONCURRENTLY warehouse_performance_summary;
    
    -- Log refresh completion
    INSERT INTO system_logs (log_level, message, created_at)
    VALUES ('INFO', 'Performance views refreshed successfully', now());
    
EXCEPTION WHEN OTHERS THEN
    INSERT INTO system_logs (log_level, message, details, created_at)
    VALUES ('ERROR', 'Failed to refresh performance views', SQLERRM, now());
    RAISE;
END;
$$ LANGUAGE plpgsql;

-- Schedule refresh (example for pg_cron extension)
-- SELECT cron.schedule('refresh-views', '0 */4 * * *', 'SELECT refresh_performance_views();');

Query Optimization

Query Analysis Tools

-- Analyze query performance
EXPLAIN (ANALYZE, BUFFERS, FORMAT JSON) 
SELECT p.name, i.quantity_on_hand, w.name 
FROM products p
JOIN inventory i ON p.id = i.product_id
JOIN warehouses w ON i.warehouse_id = w.id
WHERE p.is_active = true
AND i.quantity_on_hand < p.reorder_point;

-- Index usage statistics
SELECT 
    schemaname,
    tablename,
    indexname,
    idx_scan,
    idx_tup_read,
    idx_tup_fetch
FROM pg_stat_user_indexes
WHERE idx_scan > 0
ORDER BY idx_scan DESC;

-- Table access patterns
SELECT 
    schemaname,
    tablename,
    seq_scan,
    seq_tup_read,
    idx_scan,
    idx_tup_fetch,
    n_tup_ins,
    n_tup_upd,
    n_tup_del
FROM pg_stat_user_tables
ORDER BY seq_scan DESC;

Common Query Patterns

Optimized queries for frequent operations:

-- Efficient low stock query
SELECT 
    p.id,
    p.name,
    p.sku,
    p.reorder_point,
    i.quantity_on_hand,
    i.quantity_available,
    w.name AS warehouse_name
FROM products p
JOIN inventory i ON p.id = i.product_id
JOIN warehouses w ON i.warehouse_id = w.id
WHERE p.is_active = true
  AND i.quantity_available <= p.reorder_point
ORDER BY (i.quantity_available::float / NULLIF(p.reorder_point, 0)) ASC;

-- Efficient order fulfillment query
SELECT 
    so.id,
    so.order_number,
    so.customer_id,
    c.name AS customer_name,
    COUNT(soi.id) AS total_items,
    SUM(soi.quantity_ordered) AS total_quantity,
    SUM(soi.quantity_shipped) AS shipped_quantity,
    bool_and(soi.quantity_shipped >= soi.quantity_ordered) AS fully_shipped
FROM sales_orders so
JOIN customers c ON so.customer_id = c.id
JOIN sales_order_items soi ON so.id = soi.sales_order_id
WHERE so.status IN ('confirmed', 'picking', 'packed')
GROUP BY so.id, so.order_number, so.customer_id, c.name
HAVING NOT bool_and(soi.quantity_shipped >= soi.quantity_ordered)
ORDER BY so.promised_ship_date ASC;

Performance Monitoring

Key Metrics to Track

-- Database size monitoring
SELECT 
    pg_size_pretty(pg_database_size(current_database())) AS database_size;

-- Table sizes
SELECT 
    schemaname,
    tablename,
    pg_size_pretty(pg_total_relation_size(schemaname||'.'||tablename)) AS size,
    pg_size_pretty(pg_relation_size(schemaname||'.'||tablename)) AS table_size,
    pg_size_pretty(pg_total_relation_size(schemaname||'.'||tablename) - pg_relation_size(schemaname||'.'||tablename)) AS index_size
FROM pg_tables 
WHERE schemaname = 'public'
ORDER BY pg_total_relation_size(schemaname||'.'||tablename) DESC;

-- Index effectiveness
SELECT 
    t.tablename,
    indexname,
    c.reltuples AS num_rows,
    pg_size_pretty(pg_relation_size(quote_ident(t.schemaname)||'.'||quote_ident(t.tablename))) AS table_size,
    pg_size_pretty(pg_relation_size(quote_ident(t.schemaname)||'.'||quote_ident(indexrelname))) AS index_size,
    CASE WHEN indisunique THEN 'Y' ELSE 'N' END AS unique,
    idx_scan AS number_of_scans,
    idx_tup_read AS tuples_read,
    idx_tup_fetch AS tuples_fetched
FROM pg_tables t
LEFT OUTER JOIN pg_class c ON c.relname = t.tablename
LEFT OUTER JOIN pg_indexes i ON c.relname = i.tablename
LEFT OUTER JOIN pg_stat_user_indexes ui ON ui.indexrelname = i.indexname
LEFT OUTER JOIN pg_index ix ON ix.indrelid = c.oid AND ix.indexrelid = ui.indexrelid
WHERE t.schemaname = 'public'
ORDER BY pg_relation_size(quote_ident(t.schemaname)||'.'||quote_ident(indexrelname)) DESC;

Automated Performance Alerts

-- Create monitoring views for alerts
CREATE VIEW performance_alerts AS
SELECT 
    'slow_queries' AS alert_type,
    'Queries with mean time > 1000ms: ' || count(*) AS message,
    'high' AS severity
FROM pg_stat_statements
WHERE mean_time > 1000

UNION ALL

SELECT 
    'unused_indexes' AS alert_type,
    'Unused indexes: ' || count(*) AS message,
    'medium' AS severity
FROM pg_stat_user_indexes
WHERE idx_scan = 0
AND indexrelname NOT LIKE '%pkey'

UNION ALL

SELECT 
    'large_sequential_scans' AS alert_type,
    'Tables with high sequential scan ratio: ' || count(*) AS message,
    'medium' AS severity
FROM pg_stat_user_tables
WHERE seq_scan > idx_scan * 10
AND n_tup_ins + n_tup_upd + n_tup_del > 1000;

Best Practices

Index Maintenance

Regular Analysis: Run ANALYZE after bulk operations
Index Monitoring: Track index usage and remove unused indexes
Partial Indexes: Use for filtered queries to reduce index size
Composite Index Order: Most selective column first
Avoid Over-Indexing: Balance query performance with write performance

Query Optimization

Use EXPLAIN ANALYZE: Understand query execution plans
Limit Result Sets: Use LIMIT and pagination for large datasets
Avoid SELECT *: Select only needed columns
Use EXISTS vs IN: For subqueries with large result sets
Index Foreign Keys: Ensure all foreign key columns are indexed

Monitoring and Maintenance

Regular VACUUM: Prevent table bloat
Update Statistics: Keep query planner statistics current
Monitor Slow Queries: Use pg_stat_statements extension
Partition Large Tables: Use time-based or logical partitioning
Archive Old Data: Move historical data to separate tables/databases

Relationship Diagrams

Row Level Security