Indexes & Performance
Database indexing strategies and performance optimization
Indexes & Performance
Strategic indexing is crucial for Smart Shelf's performance, especially as inventory data grows. This section covers our indexing strategy and performance optimization techniques.
Index Types and Usage
Single Column Indexes
Primary indexes for frequently queried columns:
-- Product identification and search
CREATE INDEX idx_products_sku ON products(sku);
CREATE INDEX idx_products_name ON products(name);
CREATE INDEX idx_products_category ON products(category_id);
CREATE INDEX idx_products_active ON products(is_active);
-- User management
CREATE INDEX idx_users_email ON users(email);
CREATE INDEX idx_users_role ON users(role);
CREATE INDEX idx_users_active ON users(is_active);
-- Inventory tracking
CREATE INDEX idx_inventory_product ON inventory(product_id);
CREATE INDEX idx_inventory_warehouse ON inventory(warehouse_id);
CREATE INDEX idx_inventory_available ON inventory(quantity_available);
-- Order management
CREATE INDEX idx_purchase_orders_status ON purchase_orders(status);
CREATE INDEX idx_purchase_orders_supplier ON purchase_orders(supplier_id);
CREATE INDEX idx_sales_orders_customer ON sales_orders(customer_id);
CREATE INDEX idx_sales_orders_status ON sales_orders(status);
Composite Indexes
Multi-column indexes for complex query patterns:
-- Inventory queries by product and warehouse
CREATE INDEX idx_inventory_product_warehouse ON inventory(product_id, warehouse_id);
-- Stock movements with date filtering
CREATE INDEX idx_stock_movements_product_date ON stock_movements(product_id, created_at);
CREATE INDEX idx_stock_movements_warehouse_date ON stock_movements(warehouse_id, created_at);
-- Order items relationships
CREATE INDEX idx_purchase_order_items_order_product ON purchase_order_items(purchase_order_id, product_id);
CREATE INDEX idx_sales_order_items_order_product ON sales_order_items(sales_order_id, product_id);
-- User sessions for security
CREATE INDEX idx_user_sessions_user_expires ON user_sessions(user_id, expires_at);
-- Audit trail queries
CREATE INDEX idx_audit_logs_table_record ON audit_logs(table_name, record_id);
CREATE INDEX idx_audit_logs_user_date ON audit_logs(user_id, created_at);
Partial Indexes
Conditional indexes for specific use cases:
-- Low stock alerts
CREATE INDEX idx_inventory_low_stock ON inventory(quantity_on_hand, quantity_allocated)
WHERE quantity_on_hand <= quantity_allocated;
-- Active products only
CREATE INDEX idx_active_products_name ON products(name)
WHERE is_active = true AND deleted_at IS NULL;
-- Pending orders
CREATE INDEX idx_pending_purchase_orders ON purchase_orders(created_at)
WHERE status IN ('draft', 'sent', 'confirmed');
-- Overdue invoices
CREATE INDEX idx_overdue_invoices ON invoices(due_date, customer_id)
WHERE status NOT IN ('paid', 'cancelled') AND due_date < CURRENT_DATE;
-- Recent stock movements
CREATE INDEX idx_recent_stock_movements ON stock_movements(product_id, created_at)
WHERE created_at >= CURRENT_DATE - INTERVAL '30 days';
Full-Text Search Indexes
GIN indexes for text search capabilities:
-- Product search across name and description
CREATE INDEX idx_products_search ON products
USING GIN(to_tsvector('english', name || ' ' || COALESCE(description, '')));
-- Customer search
CREATE INDEX idx_customers_search ON customers
USING GIN(to_tsvector('english', name || ' ' || COALESCE(email, '')));
-- Supplier search
CREATE INDEX idx_suppliers_search ON suppliers
USING GIN(to_tsvector('english', name || ' ' || COALESCE(contact_name, '')));
-- Example usage:
-- SELECT * FROM products
-- WHERE to_tsvector('english', name || ' ' || COALESCE(description, ''))
-- @@ plainto_tsquery('english', 'widget bluetooth');
JSONB Indexes
GIN indexes for JSON data queries:
-- Product tags and custom fields
CREATE INDEX idx_products_tags ON products USING GIN(tags);
CREATE INDEX idx_products_custom_fields ON products USING GIN(custom_fields);
-- User preferences
CREATE INDEX idx_users_preferences ON users USING GIN(preferences);
-- Warehouse operating hours
CREATE INDEX idx_warehouses_hours ON warehouses USING GIN(operating_hours);
-- Example queries:
-- SELECT * FROM products WHERE tags @> '["electronics"]';
-- SELECT * FROM products WHERE custom_fields @> '{"color": "red"}';
-- SELECT * FROM users WHERE preferences @> '{"theme": "dark"}';
Performance Optimization Strategies
Table Partitioning
Time-Based Partitioning
Large transaction tables partitioned by time:
-- Stock movements partitioned by month
CREATE TABLE stock_movements (
id UUID NOT NULL DEFAULT gen_random_uuid(),
product_id UUID NOT NULL,
warehouse_id UUID NOT NULL,
created_at TIMESTAMPTZ NOT NULL DEFAULT now(),
-- other columns
) PARTITION BY RANGE (created_at);
-- Create monthly partitions
CREATE TABLE stock_movements_y2024m01 PARTITION OF stock_movements
FOR VALUES FROM ('2024-01-01') TO ('2024-02-01');
CREATE TABLE stock_movements_y2024m02 PARTITION OF stock_movements
FOR VALUES FROM ('2024-02-01') TO ('2024-03-01');
-- Automatic partition creation function
CREATE OR REPLACE FUNCTION create_monthly_partition(table_name text, start_date date)
RETURNS void AS $$
DECLARE
partition_name text;
end_date date;
BEGIN
partition_name := table_name || '_y' || to_char(start_date, 'YYYY') || 'm' || to_char(start_date, 'MM');
end_date := start_date + interval '1 month';
EXECUTE format('CREATE TABLE %I PARTITION OF %I FOR VALUES FROM (%L) TO (%L)',
partition_name, table_name, start_date, end_date);
EXECUTE format('CREATE INDEX idx_%s_created_at ON %I (created_at)',
partition_name, partition_name);
END;
$$ LANGUAGE plpgsql;
Audit Log Partitioning
-- Audit logs partitioned by month
CREATE TABLE audit_logs (
id UUID NOT NULL DEFAULT gen_random_uuid(),
created_at TIMESTAMPTZ NOT NULL DEFAULT now(),
-- other columns
) PARTITION BY RANGE (created_at);
-- Create partitions with appropriate indexes
CREATE TABLE audit_logs_y2024m01 PARTITION OF audit_logs
FOR VALUES FROM ('2024-01-01') TO ('2024-02-01');
CREATE INDEX idx_audit_logs_y2024m01_table_record
ON audit_logs_y2024m01(table_name, record_id);
CREATE INDEX idx_audit_logs_y2024m01_user
ON audit_logs_y2024m01(user_id);
Materialized Views
Pre-computed views for complex analytics:
-- Product performance summary
CREATE MATERIALIZED VIEW product_performance_summary AS
SELECT
p.id,
p.name,
p.sku,
p.category_id,
c.name AS category_name,
-- Current inventory
COALESCE(SUM(i.quantity_on_hand), 0) AS total_stock,
COALESCE(SUM(i.quantity_available), 0) AS available_stock,
COALESCE(AVG(i.average_cost), 0) AS avg_cost,
COUNT(DISTINCT i.warehouse_id) AS warehouses_stocked,
-- Sales performance (last 12 months)
COALESCE(sales_data.total_sold, 0) AS total_sold_12m,
COALESCE(sales_data.revenue, 0) AS revenue_12m,
sales_data.last_sale_date,
-- Purchase performance (last 12 months)
COALESCE(purchase_data.total_purchased, 0) AS total_purchased_12m,
COALESCE(purchase_data.purchase_cost, 0) AS purchase_cost_12m,
purchase_data.last_purchase_date,
-- Status indicators
CASE
WHEN COALESCE(SUM(i.quantity_on_hand), 0) = 0 THEN 'out_of_stock'
WHEN COALESCE(SUM(i.quantity_available), 0) <= p.reorder_point THEN 'low_stock'
ELSE 'in_stock'
END AS stock_status,
now() AS last_updated
FROM products p
LEFT JOIN categories c ON p.category_id = c.id
LEFT JOIN inventory i ON p.id = i.product_id
LEFT JOIN (
-- Sales data subquery
SELECT
soi.product_id,
SUM(soi.quantity_shipped) AS total_sold,
SUM(soi.quantity_shipped * soi.unit_price) AS revenue,
MAX(so.actual_ship_date) AS last_sale_date
FROM sales_order_items soi
JOIN sales_orders so ON soi.sales_order_id = so.id
WHERE so.created_at >= CURRENT_DATE - INTERVAL '12 months'
AND so.status NOT IN ('cancelled', 'draft')
GROUP BY soi.product_id
) sales_data ON p.id = sales_data.product_id
LEFT JOIN (
-- Purchase data subquery
SELECT
poi.product_id,
SUM(poi.quantity_received) AS total_purchased,
SUM(poi.quantity_received * poi.unit_cost) AS purchase_cost,
MAX(po.delivery_date) AS last_purchase_date
FROM purchase_order_items poi
JOIN purchase_orders po ON poi.purchase_order_id = po.id
WHERE po.created_at >= CURRENT_DATE - INTERVAL '12 months'
AND po.status NOT IN ('cancelled', 'draft')
GROUP BY poi.product_id
) purchase_data ON p.id = purchase_data.product_id
WHERE p.is_active = true AND p.deleted_at IS NULL
GROUP BY p.id, p.name, p.sku, p.category_id, c.name, p.reorder_point,
sales_data.total_sold, sales_data.revenue, sales_data.last_sale_date,
purchase_data.total_purchased, purchase_data.purchase_cost, purchase_data.last_purchase_date;
-- Indexes for materialized view
CREATE UNIQUE INDEX idx_product_performance_summary_id
ON product_performance_summary(id);
CREATE INDEX idx_product_performance_summary_category
ON product_performance_summary(category_id);
CREATE INDEX idx_product_performance_summary_stock_status
ON product_performance_summary(stock_status);
Warehouse Performance Summary
-- Warehouse performance metrics
CREATE MATERIALIZED VIEW warehouse_performance_summary AS
SELECT
w.id,
w.name,
w.code,
-- Inventory metrics
COUNT(DISTINCT i.product_id) AS unique_products,
COALESCE(SUM(i.quantity_on_hand), 0) AS total_units,
COALESCE(SUM(i.quantity_on_hand * i.average_cost), 0) AS inventory_value,
-- Capacity utilization
w.capacity_limit,
CASE
WHEN w.capacity_limit IS NOT NULL AND w.capacity_limit > 0
THEN ROUND((COALESCE(SUM(i.quantity_on_hand), 0) * 100.0 / w.capacity_limit), 2)
ELSE NULL
END AS capacity_utilization_pct,
-- Activity metrics (last 30 days)
COALESCE(activity.movements_in, 0) AS movements_in_30d,
COALESCE(activity.movements_out, 0) AS movements_out_30d,
COALESCE(activity.adjustments, 0) AS adjustments_30d,
-- Order metrics
COALESCE(orders.pending_shipments, 0) AS pending_shipments,
COALESCE(orders.pending_receipts, 0) AS pending_receipts,
now() AS last_updated
FROM warehouses w
LEFT JOIN inventory i ON w.id = i.warehouse_id
LEFT JOIN (
SELECT
warehouse_id,
SUM(CASE WHEN movement_type = 'in' THEN quantity ELSE 0 END) AS movements_in,
SUM(CASE WHEN movement_type = 'out' THEN quantity ELSE 0 END) AS movements_out,
SUM(CASE WHEN movement_type = 'adjustment' THEN ABS(quantity) ELSE 0 END) AS adjustments
FROM stock_movements
WHERE created_at >= CURRENT_DATE - INTERVAL '30 days'
GROUP BY warehouse_id
) activity ON w.id = activity.warehouse_id
LEFT JOIN (
SELECT
warehouse_id,
COUNT(DISTINCT so.id) AS pending_shipments,
COUNT(DISTINCT po.id) AS pending_receipts
FROM warehouses w2
LEFT JOIN sales_orders so ON w2.id = so.warehouse_id
AND so.status IN ('confirmed', 'picking', 'packed')
LEFT JOIN purchase_orders po ON w2.id = po.warehouse_id
AND po.status IN ('sent', 'confirmed', 'partial')
GROUP BY warehouse_id
) orders ON w.id = orders.warehouse_id
WHERE w.is_active = true
GROUP BY w.id, w.name, w.code, w.capacity_limit,
activity.movements_in, activity.movements_out, activity.adjustments,
orders.pending_shipments, orders.pending_receipts;
-- Indexes for warehouse performance
CREATE UNIQUE INDEX idx_warehouse_performance_summary_id
ON warehouse_performance_summary(id);
CREATE INDEX idx_warehouse_performance_summary_utilization
ON warehouse_performance_summary(capacity_utilization_pct);
Refresh Strategies
Automated refresh for materialized views:
-- Function to refresh materialized views
CREATE OR REPLACE FUNCTION refresh_performance_views()
RETURNS void AS $$
BEGIN
REFRESH MATERIALIZED VIEW CONCURRENTLY product_performance_summary;
REFRESH MATERIALIZED VIEW CONCURRENTLY warehouse_performance_summary;
-- Log refresh completion
INSERT INTO system_logs (log_level, message, created_at)
VALUES ('INFO', 'Performance views refreshed successfully', now());
EXCEPTION WHEN OTHERS THEN
INSERT INTO system_logs (log_level, message, details, created_at)
VALUES ('ERROR', 'Failed to refresh performance views', SQLERRM, now());
RAISE;
END;
$$ LANGUAGE plpgsql;
-- Schedule refresh (example for pg_cron extension)
-- SELECT cron.schedule('refresh-views', '0 */4 * * *', 'SELECT refresh_performance_views();');
Query Optimization
Query Analysis Tools
-- Analyze query performance
EXPLAIN (ANALYZE, BUFFERS, FORMAT JSON)
SELECT p.name, i.quantity_on_hand, w.name
FROM products p
JOIN inventory i ON p.id = i.product_id
JOIN warehouses w ON i.warehouse_id = w.id
WHERE p.is_active = true
AND i.quantity_on_hand < p.reorder_point;
-- Index usage statistics
SELECT
schemaname,
tablename,
indexname,
idx_scan,
idx_tup_read,
idx_tup_fetch
FROM pg_stat_user_indexes
WHERE idx_scan > 0
ORDER BY idx_scan DESC;
-- Table access patterns
SELECT
schemaname,
tablename,
seq_scan,
seq_tup_read,
idx_scan,
idx_tup_fetch,
n_tup_ins,
n_tup_upd,
n_tup_del
FROM pg_stat_user_tables
ORDER BY seq_scan DESC;
Common Query Patterns
Optimized queries for frequent operations:
-- Efficient low stock query
SELECT
p.id,
p.name,
p.sku,
p.reorder_point,
i.quantity_on_hand,
i.quantity_available,
w.name AS warehouse_name
FROM products p
JOIN inventory i ON p.id = i.product_id
JOIN warehouses w ON i.warehouse_id = w.id
WHERE p.is_active = true
AND i.quantity_available <= p.reorder_point
ORDER BY (i.quantity_available::float / NULLIF(p.reorder_point, 0)) ASC;
-- Efficient order fulfillment query
SELECT
so.id,
so.order_number,
so.customer_id,
c.name AS customer_name,
COUNT(soi.id) AS total_items,
SUM(soi.quantity_ordered) AS total_quantity,
SUM(soi.quantity_shipped) AS shipped_quantity,
bool_and(soi.quantity_shipped >= soi.quantity_ordered) AS fully_shipped
FROM sales_orders so
JOIN customers c ON so.customer_id = c.id
JOIN sales_order_items soi ON so.id = soi.sales_order_id
WHERE so.status IN ('confirmed', 'picking', 'packed')
GROUP BY so.id, so.order_number, so.customer_id, c.name
HAVING NOT bool_and(soi.quantity_shipped >= soi.quantity_ordered)
ORDER BY so.promised_ship_date ASC;
Performance Monitoring
Key Metrics to Track
-- Database size monitoring
SELECT
pg_size_pretty(pg_database_size(current_database())) AS database_size;
-- Table sizes
SELECT
schemaname,
tablename,
pg_size_pretty(pg_total_relation_size(schemaname||'.'||tablename)) AS size,
pg_size_pretty(pg_relation_size(schemaname||'.'||tablename)) AS table_size,
pg_size_pretty(pg_total_relation_size(schemaname||'.'||tablename) - pg_relation_size(schemaname||'.'||tablename)) AS index_size
FROM pg_tables
WHERE schemaname = 'public'
ORDER BY pg_total_relation_size(schemaname||'.'||tablename) DESC;
-- Index effectiveness
SELECT
t.tablename,
indexname,
c.reltuples AS num_rows,
pg_size_pretty(pg_relation_size(quote_ident(t.schemaname)||'.'||quote_ident(t.tablename))) AS table_size,
pg_size_pretty(pg_relation_size(quote_ident(t.schemaname)||'.'||quote_ident(indexrelname))) AS index_size,
CASE WHEN indisunique THEN 'Y' ELSE 'N' END AS unique,
idx_scan AS number_of_scans,
idx_tup_read AS tuples_read,
idx_tup_fetch AS tuples_fetched
FROM pg_tables t
LEFT OUTER JOIN pg_class c ON c.relname = t.tablename
LEFT OUTER JOIN pg_indexes i ON c.relname = i.tablename
LEFT OUTER JOIN pg_stat_user_indexes ui ON ui.indexrelname = i.indexname
LEFT OUTER JOIN pg_index ix ON ix.indrelid = c.oid AND ix.indexrelid = ui.indexrelid
WHERE t.schemaname = 'public'
ORDER BY pg_relation_size(quote_ident(t.schemaname)||'.'||quote_ident(indexrelname)) DESC;
Automated Performance Alerts
-- Create monitoring views for alerts
CREATE VIEW performance_alerts AS
SELECT
'slow_queries' AS alert_type,
'Queries with mean time > 1000ms: ' || count(*) AS message,
'high' AS severity
FROM pg_stat_statements
WHERE mean_time > 1000
UNION ALL
SELECT
'unused_indexes' AS alert_type,
'Unused indexes: ' || count(*) AS message,
'medium' AS severity
FROM pg_stat_user_indexes
WHERE idx_scan = 0
AND indexrelname NOT LIKE '%pkey'
UNION ALL
SELECT
'large_sequential_scans' AS alert_type,
'Tables with high sequential scan ratio: ' || count(*) AS message,
'medium' AS severity
FROM pg_stat_user_tables
WHERE seq_scan > idx_scan * 10
AND n_tup_ins + n_tup_upd + n_tup_del > 1000;
Best Practices
Index Maintenance
- Regular Analysis: Run
ANALYZEafter bulk operations - Index Monitoring: Track index usage and remove unused indexes
- Partial Indexes: Use for filtered queries to reduce index size
- Composite Index Order: Most selective column first
- Avoid Over-Indexing: Balance query performance with write performance
Query Optimization
- Use EXPLAIN ANALYZE: Understand query execution plans
- Limit Result Sets: Use LIMIT and pagination for large datasets
- Avoid SELECT *: Select only needed columns
- Use EXISTS vs IN: For subqueries with large result sets
- Index Foreign Keys: Ensure all foreign key columns are indexed
Monitoring and Maintenance
- Regular VACUUM: Prevent table bloat
- Update Statistics: Keep query planner statistics current
- Monitor Slow Queries: Use pg_stat_statements extension
- Partition Large Tables: Use time-based or logical partitioning
- Archive Old Data: Move historical data to separate tables/databases