Skip to content

Commit

Permalink
[HWORKS-262] Remove user email FK from hopsworks schema
Browse files Browse the repository at this point in the history
  • Loading branch information
SirOibaf committed May 27, 2024
1 parent 0786109 commit d74d80c
Show file tree
Hide file tree
Showing 2 changed files with 140 additions and 46 deletions.
70 changes: 25 additions & 45 deletions files/default/sql/ddl/3.8.0__initial_tables.sql
Original file line number Diff line number Diff line change
Expand Up @@ -245,17 +245,17 @@ CREATE TABLE `dataset_request` (
`id` int(11) NOT NULL AUTO_INCREMENT,
`dataset` int(11) NOT NULL,
`projectId` int(11) NOT NULL,
`user_email` varchar(150) COLLATE latin1_general_cs NOT NULL,
`uid` INT(11) NOT NULL,
`requested` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
`message` varchar(3000) COLLATE latin1_general_cs DEFAULT NULL,
`message_id` int(11) DEFAULT NULL,
PRIMARY KEY (`id`),
UNIQUE KEY `index2` (`dataset`,`projectId`),
KEY `projectId` (`projectId`,`user_email`),
KEY `projectId` (`projectId`,`uid`),
KEY `message_id` (`message_id`),
CONSTRAINT `FK_429_449` FOREIGN KEY (`dataset`) REFERENCES `dataset` (`id`) ON DELETE CASCADE ON UPDATE NO ACTION,
CONSTRAINT `FK_438_452` FOREIGN KEY (`message_id`) REFERENCES `message` (`id`) ON DELETE NO ACTION ON UPDATE NO ACTION,
CONSTRAINT `FK_302_451` FOREIGN KEY (`projectId`,`user_email`) REFERENCES `project_team` (`project_id`,`team_member`) ON DELETE CASCADE ON UPDATE NO ACTION
CONSTRAINT `project_team_fk_ds` FOREIGN KEY (`projectId`,`uid`) REFERENCES `project_team` (`project_id`,`uid`) ON DELETE CASCADE ON UPDATE NO ACTION
) ENGINE=ndbcluster AUTO_INCREMENT=3 DEFAULT CHARSET=latin1 COLLATE=latin1_general_cs;
/*!40101 SET character_set_client = @saved_cs_client */;

Expand All @@ -269,7 +269,7 @@ CREATE TABLE `dataset_request` (
CREATE TABLE `executions` (
`id` int(11) NOT NULL AUTO_INCREMENT,
`submission_time` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP,
`user` varchar(150) COLLATE latin1_general_cs NOT NULL,
`uid` INT(11) NOT NULL,
`state` varchar(128) COLLATE latin1_general_cs NOT NULL,
`execution_start` bigint(20) DEFAULT NULL,
`execution_stop` bigint(20) DEFAULT NULL,
Expand All @@ -285,12 +285,12 @@ CREATE TABLE `executions` (
PRIMARY KEY (`id`),
UNIQUE KEY `app_id` (`app_id`),
KEY `job_id` (`job_id`),
KEY `user` (`user`),
KEY `user` (`uid`),
KEY `submission_time_idx` (`submission_time`,`job_id`),
KEY `state_idx` (`state`,`job_id`),
KEY `finalStatus_idx` (`finalStatus`,`job_id`),
KEY `progress_idx` (`progress`,`job_id`),
CONSTRAINT `FK_262_366` FOREIGN KEY (`user`) REFERENCES `users` (`email`) ON DELETE NO ACTION ON UPDATE NO ACTION
CONSTRAINT `user_fk_executions` FOREIGN KEY (`uid`) REFERENCES `users` (`uid`) ON DELETE NO ACTION ON UPDATE NO ACTION
) ENGINE=ndbcluster AUTO_INCREMENT=23 DEFAULT CHARSET=latin1 COLLATE=latin1_general_cs;
/*!40101 SET character_set_client = @saved_cs_client */;

Expand Down Expand Up @@ -645,17 +645,17 @@ CREATE TABLE `jobs` (
`name` varchar(128) COLLATE latin1_general_cs DEFAULT NULL,
`creation_time` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP,
`project_id` int(11) NOT NULL,
`creator` varchar(150) COLLATE latin1_general_cs NOT NULL,
`uid` INT(11) NOT NULL,
`type` varchar(128) COLLATE latin1_general_cs NOT NULL,
`json_config` varchar(12500) COLLATE latin1_general_cs NOT NULL,
PRIMARY KEY (`id`),
UNIQUE KEY `name_project_idx` (`name`,`project_id`),
KEY `project_id` (`project_id`),
KEY `creator` (`creator`),
KEY `creator_project_idx` (`creator`,`project_id`),
KEY `creator` (`uid`),
KEY `creator_project_idx` (`creator`,`uid`),
KEY `creation_time_project_idx` (`creation_time`,`project_id`),
KEY `type_project_id_idx` (`type`,`project_id`),
CONSTRAINT `FK_262_353` FOREIGN KEY (`creator`) REFERENCES `users` (`email`) ON DELETE NO ACTION ON UPDATE NO ACTION,
CONSTRAINT `user_fk_jobs` FOREIGN KEY (`uid`) REFERENCES `users` (`uid`) ON DELETE NO ACTION ON UPDATE NO ACTION,
CONSTRAINT `FK_284_352` FOREIGN KEY (`project_id`) REFERENCES `project` (`id`) ON DELETE CASCADE ON UPDATE NO ACTION
) ENGINE=ndbcluster AUTO_INCREMENT=37 DEFAULT CHARSET=latin1 COLLATE=latin1_general_cs;
/*!40101 SET character_set_client = @saved_cs_client */;
Expand Down Expand Up @@ -692,7 +692,7 @@ CREATE TABLE `jupyter_project` (
/*!40101 SET character_set_client = utf8 */;
CREATE TABLE `jupyter_settings` (
`project_id` int(11) NOT NULL,
`team_member` varchar(150) COLLATE latin1_general_cs NOT NULL,
`uid` INT(11) COLLATE latin1_general_cs NOT NULL,
`secret` varchar(255) COLLATE latin1_general_cs NOT NULL,
`advanced` tinyint(1) DEFAULT '0',
`shutdown_level` int(11) NOT NULL DEFAULT '6',
Expand All @@ -702,9 +702,8 @@ CREATE TABLE `jupyter_settings` (
`docker_config` varchar(1000) COLLATE latin1_general_cs DEFAULT NULL,
`python_kernel` TINYINT(1) DEFAULT 1,
PRIMARY KEY (`project_id`,`team_member`),
KEY `team_member` (`team_member`),
KEY `secret_idx` (`secret`),
CONSTRAINT `FK_262_309` FOREIGN KEY (`team_member`) REFERENCES `users` (`email`) ON DELETE CASCADE ON UPDATE NO ACTION,
CONSTRAINT `user_fk_jp_settings` FOREIGN KEY (`uid`) REFERENCES `users` (`uid`) ON DELETE CASCADE ON UPDATE NO ACTION,
CONSTRAINT `FK_284_308` FOREIGN KEY (`project_id`) REFERENCES `project` (`id`) ON DELETE CASCADE ON UPDATE NO ACTION
) ENGINE=ndbcluster DEFAULT CHARSET=latin1 COLLATE=latin1_general_cs;
/*!40101 SET character_set_client = @saved_cs_client */;
Expand Down Expand Up @@ -768,8 +767,8 @@ CREATE TABLE `materialized_jwt` (
/*!40101 SET character_set_client = utf8 */;
CREATE TABLE `message` (
`id` int(11) NOT NULL AUTO_INCREMENT,
`user_from` varchar(150) COLLATE latin1_general_cs DEFAULT NULL,
`user_to` varchar(150) COLLATE latin1_general_cs NOT NULL,
`uid_from` INT(11) DEFAULT NULL,
`uid_to` INT(11) NOT NULL,
`date_sent` datetime NOT NULL,
`subject` varchar(128) COLLATE latin1_general_cs DEFAULT NULL,
`preview` varchar(128) COLLATE latin1_general_cs DEFAULT NULL,
Expand All @@ -779,31 +778,15 @@ CREATE TABLE `message` (
`path` varchar(600) COLLATE latin1_general_cs DEFAULT NULL,
`reply_to_msg` int(11) DEFAULT NULL,
PRIMARY KEY (`id`),
KEY `user_from` (`user_from`),
KEY `user_to` (`user_to`),
KEY `user_from` (`uid_from`),
KEY `user_to` (`uid_to`),
KEY `reply_to_msg` (`reply_to_msg`),
CONSTRAINT `FK_262_441` FOREIGN KEY (`user_from`) REFERENCES `users` (`email`) ON DELETE SET NULL ON UPDATE NO ACTION,
CONSTRAINT `FK_262_442` FOREIGN KEY (`user_to`) REFERENCES `users` (`email`) ON DELETE CASCADE ON UPDATE NO ACTION,
CONSTRAINT `user_fk_msg_from` FOREIGN KEY (`uid_from`) REFERENCES `users` (`uid`) ON DELETE SET NULL ON UPDATE NO ACTION,
CONSTRAINT `user_fk_msg_to` FOREIGN KEY (`uid_to`) REFERENCES `users` (`uid`) ON DELETE CASCADE ON UPDATE NO ACTION,
CONSTRAINT `FK_438_443` FOREIGN KEY (`reply_to_msg`) REFERENCES `message` (`id`) ON DELETE SET NULL ON UPDATE NO ACTION
) ENGINE=ndbcluster AUTO_INCREMENT=3 DEFAULT CHARSET=latin1 COLLATE=latin1_general_cs;
/*!40101 SET character_set_client = @saved_cs_client */;

--
-- Table structure for table `message_to_user`
--

/*!40101 SET @saved_cs_client = @@character_set_client */;
/*!40101 SET character_set_client = utf8 */;
CREATE TABLE `message_to_user` (
`message` int(11) NOT NULL,
`user_email` varchar(150) COLLATE latin1_general_cs NOT NULL,
PRIMARY KEY (`message`,`user_email`),
KEY `user_email` (`user_email`),
CONSTRAINT `FK_262_458` FOREIGN KEY (`user_email`) REFERENCES `users` (`email`) ON DELETE CASCADE ON UPDATE NO ACTION,
CONSTRAINT `FK_438_457` FOREIGN KEY (`message`) REFERENCES `message` (`id`) ON DELETE CASCADE ON UPDATE NO ACTION
) ENGINE=ndbcluster DEFAULT CHARSET=latin1 COLLATE=latin1_general_cs;
/*!40101 SET character_set_client = @saved_cs_client */;

--
-- Table structure for table `oauth_client`
--
Expand Down Expand Up @@ -912,7 +895,7 @@ CREATE TABLE `ops_log` (
CREATE TABLE `project` (
`id` int(11) NOT NULL AUTO_INCREMENT,
`projectname` varchar(100) COLLATE latin1_general_cs NOT NULL,
`username` varchar(150) COLLATE latin1_general_cs NOT NULL,
`uid` varchar(150) COLLATE latin1_general_cs NOT NULL,
`created` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP,
`description` varchar(2000) COLLATE latin1_general_cs DEFAULT NULL,
`payment_type` varchar(255) COLLATE latin1_general_cs NOT NULL DEFAULT 'PREPAID',
Expand All @@ -925,8 +908,7 @@ CREATE TABLE `project` (
`online_feature_store_available` tinyint(1) NOT NULL DEFAULT '1',
PRIMARY KEY (`id`),
UNIQUE KEY `projectname` (`projectname`),
KEY `user_idx` (`username`),
CONSTRAINT `FK_262_290` FOREIGN KEY (`username`) REFERENCES `users` (`email`) ON DELETE NO ACTION ON UPDATE NO ACTION
CONSTRAINT `user_fk_project` FOREIGN KEY (`uid`) REFERENCES `users` (`uid`) ON DELETE NO ACTION ON UPDATE NO ACTION
) ENGINE=ndbcluster AUTO_INCREMENT=119 DEFAULT CHARSET=latin1 COLLATE=latin1_general_cs;
/*!40101 SET character_set_client = @saved_cs_client */;

Expand Down Expand Up @@ -1002,12 +984,11 @@ CREATE TABLE `project_services` (
/*!40101 SET character_set_client = utf8 */;
CREATE TABLE `project_team` (
`project_id` int(11) NOT NULL,
`team_member` varchar(150) COLLATE latin1_general_cs NOT NULL,
`uid` varchar(150) COLLATE latin1_general_cs NOT NULL,
`team_role` varchar(32) COLLATE latin1_general_cs NOT NULL,
`added` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
PRIMARY KEY (`project_id`,`team_member`),
KEY `team_member` (`team_member`),
CONSTRAINT `FK_262_304` FOREIGN KEY (`team_member`) REFERENCES `users` (`email`) ON DELETE CASCADE ON UPDATE NO ACTION,
PRIMARY KEY (`project_id`,`uid`),
CONSTRAINT `user_fk_team` FOREIGN KEY (`uid`) REFERENCES `users` (`uid`) ON DELETE CASCADE ON UPDATE NO ACTION,
CONSTRAINT `FK_284_303` FOREIGN KEY (`project_id`) REFERENCES `project` (`id`) ON DELETE CASCADE ON UPDATE NO ACTION
) ENGINE=ndbcluster DEFAULT CHARSET=latin1 COLLATE=latin1_general_cs;
/*!40101 SET character_set_client = @saved_cs_client */;
Expand Down Expand Up @@ -1205,7 +1186,7 @@ CREATE TABLE `rstudio_project` (
/*!40101 SET character_set_client = utf8 */;
CREATE TABLE `rstudio_settings` (
`project_id` int(11) NOT NULL,
`team_member` varchar(150) COLLATE latin1_general_cs NOT NULL,
`uid` INT(11) NOT NULL,
`num_tf_ps` int(11) DEFAULT '1',
`num_tf_gpus` int(11) DEFAULT '0',
`num_mpi_np` int(11) DEFAULT '1',
Expand All @@ -1229,9 +1210,8 @@ CREATE TABLE `rstudio_settings` (
`spark_params` varchar(6500) COLLATE latin1_general_cs DEFAULT '',
`shutdown_level` int(11) NOT NULL DEFAULT '6',
PRIMARY KEY (`project_id`,`team_member`),
KEY `team_member` (`team_member`),
KEY `secret_idx` (`secret`),
CONSTRAINT `RS_FK_USERS` FOREIGN KEY (`team_member`) REFERENCES `users` (`email`) ON DELETE CASCADE ON UPDATE NO ACTION,
CONSTRAINT `user_fk_rstudio` FOREIGN KEY (`uid`) REFERENCES `users` (`uid`) ON DELETE CASCADE ON UPDATE NO ACTION,
CONSTRAINT `RS_FK_PROJS` FOREIGN KEY (`project_id`) REFERENCES `project` (`id`) ON DELETE CASCADE ON UPDATE NO ACTION
) ENGINE=ndbcluster DEFAULT CHARSET=latin1 COLLATE=latin1_general_cs;
/*!40101 SET character_set_client = @saved_cs_client */;
Expand Down
116 changes: 115 additions & 1 deletion files/default/sql/ddl/updates/3.8.0.sql
Original file line number Diff line number Diff line change
Expand Up @@ -39,4 +39,118 @@ CREATE TABLE IF NOT EXISTS `hopsworks`.`model_link` (

-- FSTORE-920
ALTER TABLE `hopsworks`.`feature_store_jdbc_connector`
ADD `driver_path` VARCHAR(2000) DEFAULT NULL;
ADD `driver_path` VARCHAR(2000) DEFAULT NULL;

-- CHANGES HWORKS-262
DROP TABLE `hopsworks`.`message_to_user`;

-- to avoid repeating the same code 100s of time we create a procedure here
DROP PROCEDURE IF EXISTS REPLACE_EMAIL_FK;

DELIMITER //

CREATE PROCEDURE REPLACE_EMAIL_FK(IN table_name VARCHAR(100),
IN old_column_name VARCHAR(100), IN new_column_name VARCHAR(100),
IN old_fk_ref_table VARCHAR(100),
IN index_name VARCHAR(100), IN fk_name VARCHAR(100))
BEGIN
-- add the new column
SET @s := concat('ALTER TABLE hopsworks.', table_name, ' ADD COLUMN `', new_column_name ,'` INT(11)');
PREPARE stmt1 FROM @s;
EXECUTE stmt1;
DEALLOCATE PREPARE stmt1;

-- update the uid values based on the emails
SET SQL_SAFE_UPDATES = 0;
SET @s := concat('UPDATE hopsworks.', table_name, ' t JOIN `hopsworks`.`users` u ON t.', old_column_name
, ' = u.email SET t.', new_column_name ,'= u.uid');
PREPARE stmt1 FROM @s;
EXECUTE stmt1;
DEALLOCATE PREPARE stmt1;
SET SQL_SAFE_UPDATES = 1;

-- now that the column has been populated, modify it to be not null
-- this is needed to be able to build some unique indices/primary keys for some tables
SET @s := concat('ALTER TABLE hopsworks.', table_name, ' MODIFY COLUMN `', new_column_name ,'` INT(11) NOT NULL');
PREPARE stmt1 FROM @s;
EXECUTE stmt1;
DEALLOCATE PREPARE stmt1;

-- add fk constraint to the users table
SET @s := concat('ALTER TABLE hopsworks.', table_name, ' ADD CONSTRAINT `', fk_name
, '` FOREIGN KEY (`', new_column_name
,'`) REFERENCES `hopsworks`.`users` (`uid`) ON DELETE NO ACTION ON UPDATE NO ACTION');
PREPARE stmt1 FROM @s;
EXECUTE stmt1;
DEALLOCATE PREPARE stmt1;

-- drop the existing foreign key
SET @fk_name = (SELECT k.CONSTRAINT_NAME
FROM INFORMATION_SCHEMA.KEY_COLUMN_USAGE k
WHERE k.TABLE_SCHEMA = "hopsworks" AND k.TABLE_NAME = table_name AND k.COLUMN_NAME = old_column_name AND k.REFERENCED_TABLE_NAME=old_fk_ref_table);

SET @s := concat('ALTER TABLE hopsworks.', table_name , ' DROP FOREIGN KEY `', @fk_name, '`');
PREPARE stmt1 FROM @s;
EXECUTE stmt1;
DEALLOCATE PREPARE stmt1;

-- drop the index created by the foreign key
SET @s := concat('ALTER TABLE hopsworks.', table_name, ' DROP KEY `', index_name, '`');
PREPARE stmt1 FROM @s;
EXECUTE stmt1;
DEALLOCATE PREPARE stmt1;

IF table_name = "jobs" THEN
-- need to do it at this stage. earlier and the index is used by the old fk
-- later and we can't drop the column as there is an index assigned to it
ALTER TABLE `hopsworks`.`jobs` DROP KEY `creator_project_idx`;
END IF;

-- drop the original column
SET @s := concat('ALTER TABLE hopsworks.', table_name, ' DROP COLUMN `', old_column_name, '`');
PREPARE stmt1 FROM @s;
EXECUTE stmt1;
DEALLOCATE PREPARE stmt1;

END //

DELIMITER ;

-- welcome to the jungle.

CALL REPLACE_EMAIL_FK('jobs', 'creator', 'uid', 'users', 'creator', 'user_fk_executions');
-- add the index back, but with the UID column
ALTER TABLE `hopsworks`.`jobs` ADD KEY `uid_project_idx`(`uid`, `project_id`);

CALL REPLACE_EMAIL_FK('executions', 'user', 'uid', 'users', 'user', 'user_fk_executions');


-- dataset_request table has a FK to the project_team table that references project_id and email
-- we need to clean up the email from the dataset_request first then migrate the project_team
-- table and then add back the FK to the dataset_request table pointing to the new uid column
CALL REPLACE_EMAIL_FK('dataset_request', 'user_email', 'uid', 'project_team', 'projectId', 'project_team_fk');

-- the primary key for the project team table should be re-created to use uid instead of email
ALTER TABLE `hopsworks`.`project_team` DROP PRIMARY KEY;
-- migrate the column
CALL REPLACE_EMAIL_FK('project_team', 'team_member', 'uid', 'users', 'team_member', 'user_fk_team');
-- add back the primary key using the uid column
ALTER TABLE `hopsworks`.`project_team` ADD PRIMARY KEY(`project_id`, `uid`);

-- drop the foreign key created by the procedure above for dataset_request
-- and the proper one. This is done here to avoid having too much complexity
-- on the stored procedure
ALTER TABLE `hopsworks`.`dataset_request` DROP FOREIGN KEY project_team_fk;
ALTER TABLE `hopsworks`.`dataset_request` DROP KEY project_team_fk;
ALTER TABLE `hopsworks`.`dataset_request`
ADD CONSTRAINT `project_team_fk_ds` FOREIGN KEY (`projectId`,`uid`)
REFERENCES `project_team` (`project_id`,`uid`) ON DELETE CASCADE ON UPDATE NO ACTION;

-- These are the easy ones that should not get tangled
CALL REPLACE_EMAIL_FK('project', 'username', 'uid', 'users', 'user_idx', 'user_fk_project');
CALL REPLACE_EMAIL_FK('jupyter_settings', 'team_member', 'uid', 'users', 'team_member', 'user_fk_jp_settings');
CALL REPLACE_EMAIL_FK('rstudio_settings', 'team_member', 'uid', 'users', 'team_member', 'user_fk_rstudio');
CALL REPLACE_EMAIL_FK('message', 'user_from', 'uid_from', 'users', 'user_from', 'user_fk_msg_from');
CALL REPLACE_EMAIL_FK('message', 'user_to', 'uid_to', 'users', 'user_to', 'user_fk_msg_to');

-- END CHANGES HWORKS-262

0 comments on commit d74d80c

Please sign in to comment.