------------------------------------------------------------------------------ -- RT Extreme Database Makeover -- -- By David E. Wheeler and the Portland PostgreSQL User Group. -- -- http://pugs.postgresql.org/node/154 -- http://pugs.postgresql.org/node/161 -- -- This is the SQL I came up with to makeover a subset of the functionality of -- the RT database. The constraints I used to refactor the RT database were: -- -- * Make full use of PostgreSQL features. -- * Ensure relational integrity throughout. -- * Add constraints and triggers to ensure data integrity. -- * Rename columns and tables to maximize legibility. -- * Normalize data for tickets. -- -- This last item was the primary task of the exercise, with the goal to have -- something to share with the RT developers, in case they want to make use of -- any of our ideas. Since the meeting I filled in some gaps, added quite a -- few constraints and triggers, and tested it all on a live database. I've -- also added some notes on what I did and why. -- -- The only other thing I would definitely add at this point would be indexes. -- So far I'e only put them in for constraining purposes, but they likely -- would be needed elsehere, too, depending on query execution. It might also -- be cool to add some tsearch indexes, though to fully integrate full-text -- search. Any takers? ------------------------------------------------------------------------------ -- Create a priority data type. This data type is used in a number of places, -- and so it's useful to break out its definition into a domain. CREATE DOMAIN priority AS SMALLINT CHECK ( VALUE BETWEEN -1 AND 5 ); ------------------------------------------------------------------------------ -- Create a status data type. This just simplifies how this type of data is -- used. Side effects of using an ENUM are speed and ordering. Re-order this -- definition in order to change the ordering of values, as appropriate. CREATE TYPE status AS ENUM ('new', 'open', 'stalled', 'resolved', 'rejected', 'deleted'); ------------------------------------------------------------------------------ -- Create an event_type data type. This may or may not be complete, and if it -- needs to be open-ended, isn't strictly necessary. I added all the types I -- have in my production installation of RT. CREATE DOMAIN event_type AS TEXT CHECK ( VALUE IN ( 'AddLink', 'AddWatcher', 'Comment', 'CommentEmailRecord', 'Correspond', 'Create', 'CustomField', 'DelWather', 'EmailRecord', 'Give', 'Set', 'Status', 'Steal', 'Take') ); ------------------------------------------------------------------------------ -- Create a content_type data type. This is just a cute way to ensure that -- content types are correct. CREATE DOMAIN content_type AS TEXT CHECK ( VALUE ~* '^[[:alnum:]]+/[[:alnum:]]+(/[[:alnum:]]+)?$'); ------------------------------------------------------------------------------ -- Create a timezone data type. This is really fast. See -- http://justatheory.com/computers/databases/postgresql/timezone_validation.html. CREATE OR REPLACE FUNCTION is_timezone( tz TEXT ) RETURNS BOOLEAN as $$ BEGIN PERFORM now() AT TIME ZONE tz; RETURN TRUE; EXCEPTION WHEN invalid_parameter_value THEN RETURN FALSE; END; $$ language plpgsql STABLE; CREATE DOMAIN timezone AS TEXT CHECK ( is_timezone( value ) ); ------------------------------------------------------------------------------ -- Create trigger functions for updating user activity timestamps. These are -- used in a number of different tables, so it pays to have it be consistent. -- The trigger makes sure that they are always updated appropriately. I've -- also renamed the columns so that they are more descriptice ("created_at" -- and "updated_at" instead of "created" and "updated"). CREATE OR REPLACE FUNCTION set_create_times() RETURNS TRIGGER AS $$ BEGIN NEW.created_at := CURRENT_TIMESTAMP; NEW.updated_at := CURRENT_TIMESTAMP; RETURN NEW; END; $$ LANGUAGE plpgsql; CREATE OR REPLACE FUNCTION set_update_time() RETURNS TRIGGER AS $$ BEGIN NEW.updated_at := CURRENT_TIMESTAMP; RETURN NEW; END; $$ LANGUAGE plpgsql; ------------------------------------------------------------------------------ -- Create the users table. I omitted a bunch of columns that would more -- appropriately appear in separate tables. email_addresses should, too, -- really. Note that I've also added underscores to column names in order to -- make them more legible. The same is done for other tables, too. CREATE TABLE users ( id BIGSERIAL NOT NULL PRIMARY KEY, name TEXT NOT NULL DEFAULT '', real_name TEXT NOT NULL DEFAULT '', nickname TEXT NOT NULL DEFAULT '', email_address TEXT NOT NULL, lang TEXT NOT NULL DEFAULT 'en_US', email_encoding TEXT NOT NULL DEFAULT 'UTF-8', web_encoding TEXT NOT NULL DEFAULT 'UTF-8', timezone timezone NOT NULL DEFAULT 'UTC', created_by_id BIGINT NOT NULL REFERENCES users(id) DEFAULT LASTVAL(), created_at TIMESTAMPTZ NOT NULL DEFAULT CURRENT_TIMESTAMP, updated_by_id BIGINT NOT NULL REFERENCES users(id) DEFAULT LASTVAL(), updated_at TIMESTAMPTZ NOT NULL DEFAULT CURRENT_TIMESTAMP ); CREATE UNIQUE INDEX udx_users_email_address ON users(LOWER(email_address)); CREATE TRIGGER set_user_create_times BEFORE INSERT ON users FOR EACH ROW EXECUTE PROCEDURE set_create_times(); CREATE TRIGGER set_user_update_time BEFORE UPDATE ON users FOR EACH ROW EXECUTE PROCEDURE set_update_time(); ------------------------------------------------------------------------------ -- Create the queues table. This is pretty much the same as befor, with just a -- few minor changes. CREATE TABLE queues ( id BIGSERIAL NOT NULL PRIMARY KEY, name TEXT NOT NULL CHECK ( name <> ''), description TEXT NOT NULL DEFAULT '', correspond_address TEXT NOT NULL, comment_address TEXT NOT NULL, initial_priority PRIORITY NOT NULL DEFAULT 0, final_priority PRIORITY NOT NULL DEFAULT 0, default_due_in INTERVAL NOT NULL DEFAULT '00:00:00', created_by_id BIGINT NOT NULL REFERENCES users(id), created_at TIMESTAMPTZ NOT NULL DEFAULT CURRENT_TIMESTAMP, updated_by_id BIGINT NOT NULL REFERENCES users(id), updated_at TIMESTAMPTZ NOT NULL DEFAULT CURRENT_TIMESTAMP ); CREATE UNIQUE INDEX udx_queues_name ON queues(LOWER(name)); CREATE TRIGGER set_user_create_times BEFORE INSERT ON queues FOR EACH ROW EXECUTE PROCEDURE set_create_times(); CREATE TRIGGER set_user_update_time BEFORE UPDATE ON queues FOR EACH ROW EXECUTE PROCEDURE set_update_time(); ------------------------------------------------------------------------------ -- Create the tickets table. I've omitted some columns that don't seem to be -- used much. Restore as appropriate. CREATE TABLE tickets ( id BIGSERIAL NOT NULL PRIMARY KEY, queue_id BIGINT NOT NULL REFERENCES queues(id), owner_id BIGINT NOT NULL REFERENCES users(id), status STATUS NOT NULL DEFAULT 'new', name TEXT NOT NULL DEFAULT '', priority PRIORITY NOT NULL DEFAULT 0, initial_priority PRIORITY NOT NULL DEFAULT 0, final_priority PRIORITY NOT NULL DEFAULT 0, time_estimated INTERVAL NOT NULL DEFAULT '00:00:00', time_worked INTERVAL NOT NULL DEFAULT '00:00:00', time_left INTERVAL NOT NULL DEFAULT '00:00:00', started_at TIMESTAMPTZ NULL, due_at TIMESTAMPTZ NULL, resolved_at TIMESTAMPTZ NULL, created_by_id BIGINT NOT NULL REFERENCES users(id), created_at TIMESTAMPTZ NOT NULL DEFAULT CURRENT_TIMESTAMP, updated_by_id BIGINT NOT NULL REFERENCES users(id), updated_at TIMESTAMPTZ NOT NULL DEFAULT CURRENT_TIMESTAMP, enabled BOOLEAN NOT NULL DEFAULT TRUE ); CREATE TRIGGER set_user_create_times BEFORE INSERT ON tickets FOR EACH ROW EXECUTE PROCEDURE set_create_times(); CREATE TRIGGER set_user_update_time BEFORE UPDATE ON tickets FOR EACH ROW EXECUTE PROCEDURE set_update_time(); ------------------------------------------------------------------------------ -- Create the trigger to update resolved_at. This ensures that it always has -- the correct value. CREATE OR REPLACE FUNCTION resolve_ticket() RETURNS TRIGGER AS $$ BEGIN IF NEW.status = 'resolved' AND OLD.status <> 'resolved' THEN NEW.resolved_at = CURRENT_TIMESTAMP; END IF; RETURN NEW; END; $$ LANGUAGE plpgsql; CREATE TRIGGER resolve_ticket BEFORE UPDATE ON tickets FOR EACH ROW EXECUTE PROCEDURE resolve_ticket(); ------------------------------------------------------------------------------ -- Table for messages replaces special case use of transactions. I added some -- columns for frequently-used headers, too. CREATE TABLE messages ( id BIGSERIAL NOT NULL PRIMARY KEY, ticket_id BIGINT NOT NULL REFERENCES tickets(id), sender_id BIGINT NOT NULL REFERENCES users(id), subject TEXT NOT NULL DEFAULT '', received_at TIMESTAMPTZ NOT NULL DEFAULT CURRENT_TIMESTAMP, sent_at TIMESTAMPTZ NOT NULL, "to" TEXT NOT NULL DEFAULT '', reply_to TEXT NOT NULL DEFAULT '', cc TEXT NOT NULL DEFAULT '', bcc TEXT NOT NULL DEFAULT '', msg_id TEXT NOT NULL DEFAULT '', client TEXT NOT NULL DEFAULT '', in_reply_to TEXT NOT NULL DEFAULT '', "references" TEXT NOT NULL DEFAULT '', priority SMALLINT NOT NULL CHECK ( priority BETWEEN 1 AND 5), reply_to_id BIGINT NULL REFERENCES messages(id), headers TEXT NOT NULL DEFAULT '' ); ------------------------------------------------------------------------------ -- The parts table replaces attachments and is linked to the messages table, -- rather than to transactions. CREATE TABLE parts ( id BIGSERIAL NOT NULL PRIMARY KEY, message_id BIGINT NULL REFERENCES messages(id), parent_id BIGINT NULL REFERENCES parts(id), content_type TEXT NOT NULL DEFAULT 'text/plain', content_encoding TEXT NOT NULL DEFAULT '', filename TEXT NOT NULL DEFAULT '', headers TEXT NOT NULL DEFAULT '', content TEXT NOT NULL DEFAULT '' ); ------------------------------------------------------------------------------ -- ticket_events replaces transactions and applies only to tickets. Dupe as -- appropriate for other types of objects (group_events, user_events, etc.). -- message_id links to a message that was received to trigger the transaction. CREATE TABLE ticket_events ( id BIGSERIAL NOT NULL PRIMARY KEY, ticket_id BIGINT NOT NULL REFERENCES tickets(id), type EVENT_TYPE NOT NULL, message_id BIGINT NULL REFERENCES messages(id), created_at TIMESTAMPTZ NOT NULL DEFAULT CURRENT_TIMESTAMP, created_by_id BIGINT NOT NULL REFERENCES users(id) ); ------------------------------------------------------------------------------ -- A single ticket_event can have multiple fields changed. Reference stuff -- does not seem to be used, but if it is, add an identifier column to this -- table to distinguish field changes from reference changes. CREATE TABLE ticket_event_data ( id BIGSERIAL NOT NULL PRIMARY KEY, ticket_event_id INTEGER NOT NULL REFERENCES ticket_events(id), field TEXT NOT NULL DEFAULT '', old_value TEXT NOT NULL DEFAULT '', new_value TEXT NOT NULL DEFAULT '' ); ------------------------------------------------------------------------------ -- Tables not refactored. I'm not sure which are deprecated, and I don't know -- what some of them are for. If the RT developers decide to make use of any -- of this stuff, I'd be happy to help them refactor these tables, too. -- -- * acl -- * attributes -- * cachedgroupmembers -- * customfields -- * customfieldvalues -- * groups -- * groupmembers -- * links -- * objectcustomfields -- * objectcustomfieldvalues -- * principals -- * scripactions -- * scripconditions -- * sessions -- * templates