-- First, create a tenant
INSERT INTO tenants (id, name) VALUES ('123e4567-e89b-12d3-a456-426614174000', 'Acme Corp');
-- Create a fact table for raw events
CREATE TABLE event_facts (
tenant_id UUID NOT NULL,
event_timestamp TIMESTAMP,
user_id INT,
event_type TEXT
);
-- Create an aggregated table with HLL for efficient unique counting
CREATE TABLE daily_event_stats (
tenant_id UUID NOT NULL,
event_date DATE,
event_type TEXT,
unique_users_hll HLL, -- Stores the set of all unique users for this day and event type
PRIMARY KEY (tenant_id, event_date, event_type)
);
-- Insert some raw events
INSERT INTO event_facts (tenant_id, event_timestamp, user_id, event_type)
VALUES
('123e4567-e89b-12d3-a456-426614174000', '2024-03-01 10:00:00', 1, 'click'),
('123e4567-e89b-12d3-a456-426614174000', '2024-03-01 10:05:00', 2, 'click'),
('123e4567-e89b-12d3-a456-426614174000', '2024-03-01 10:10:00', 3, 'click'),
('123e4567-e89b-12d3-a456-426614174000', '2024-03-01 10:15:00', 1, 'click'), -- duplicate user
('123e4567-e89b-12d3-a456-426614174000', '2024-03-02 10:00:00', 2, 'click'),
('123e4567-e89b-12d3-a456-426614174000', '2024-03-02 10:05:00', 4, 'click'),
('123e4567-e89b-12d3-a456-426614174000', '2024-03-02 10:10:00', 5, 'click');
-- Aggregate the raw events into daily stats using HLL
SET nile.tenant_id = '123e4567-e89b-12d3-a456-426614174000';
INSERT INTO daily_event_stats
SELECT
tenant_id,
date_trunc('day', event_timestamp)::DATE as event_date,
event_type,
hll_add_agg(hll_hash_integer(user_id))
FROM event_facts
GROUP BY tenant_id,date_trunc('day', event_timestamp)::DATE, event_type;