Last active
January 23, 2019 06:14
-
-
Save theSuiGenerisAakash/709fa432440290f6c9de2c3a868d3c8d to your computer and use it in GitHub Desktop.
A UDF to split strings based on a delimiter in MySQL
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
-- Select your database | |
USE sample_database; | |
/* You might need to run this if MySQL says that the below function is non-determininstic (although I've added DETERMINSTIC keyword in it) */ | |
-- SET GLOBAL log_bin_trust_function_creators = 1; | |
DELIMITER $$ | |
DROP FUNCTION IF EXISTS splitString $$ | |
-- Function splitString that returns a substring based on a certain delimiter's position | |
CREATE FUNCTION splitString ( | |
x VARCHAR(255), | |
delim VARCHAR(12), | |
pos INT | |
) | |
RETURNS VARCHAR(255) DETERMINISTIC | |
RETURN REPLACE(SUBSTRING(SUBSTRING_INDEX(x, delim, pos), | |
LENGTH(SUBSTRING_INDEX(x, delim, pos -1)) + 1), | |
delim, '') $$ | |
DROP PROCEDURE IF EXISTS getAllStrings $$ | |
-- Procedure to get all the words from the input string passed to it as argument | |
CREATE PROCEDURE getAllStrings(IN idString varchar(256)) | |
BEGIN | |
CREATE TEMPORARY TABLE IF NOT EXISTS tbl_temp_sep_strings( SPLIT_VALUES VARCHAR(256)); | |
SELECT LENGTH(idString) - LENGTH(REPLACE(idString, ', ', '')) INTO @noOfCommas; | |
IF @noOfCommas = 0 | |
THEN | |
INSERT INTO tbl_temp_sep_strings(SPLIT_VALUES) values(idString); | |
ELSE | |
SET @y = 1; | |
WHILE @y < @noOfCommas DO | |
SELECT splitString(idString, ', ', @y) INTO @tempSeparate; | |
SET @y = @y + 1; | |
INSERT INTO tbl_temp_sep_strings(SPLIT_VALUES) values(@tempSeparate); | |
END WHILE; | |
END IF; | |
END $$ | |
CALL getAllStrings('ab, ac'); -- Call the function like this | |
/* Now, you have the result, i.e., all the split words in the column SPLIT_VALUES of the tbl_temp_sep_strings | |
which you can use by doing a SELECT action. This is helpful as it can be used in SQL IN clauses and other | |
similar places where you might be looking for a tuple of values. | |
Please note that the table is temporary which is good because then it will get deallocated when the connection | |
is closed or the session ends as we wouldn't want a table in our db to persist. You can remove the TEMPORARY keyword | |
otherwise. | |
*/ | |
-- SELECT * FROM sample_database.tbl_temp_sep_strings | |
-- BONUS: If a field in some table has mixed values as in single as well as mixed, comma-separated values then you can use the below procedure | |
DROP PROCEDURE IF EXISTS getIndividualSkills $$ | |
CREATE PROCEDURE cleanMixedUpColumn() | |
BEGIN | |
SELECT COUNT(DISTINCT infected_column) INTO @noOfSkills FROM sample_database.sample_table WHERE Skill_id IS NOT NULL; | |
SET @y1 = 0; | |
WHILE @y1 < @noOfSkills DO | |
SET @sql1 = CONCAT('SELECT DISTINCT infected_column INTO @currentValue FROM sample_database.sample_table WHERE infected_column IS NOT NULL ORDER BY infected_column LIMIT 1 OFFSET ' ,@y1,';'); | |
PREPARE stmt1 FROM @sql1; | |
EXECUTE stmt1; | |
DEALLOCATE PREPARE stmt1; | |
CALL getAllStrings(@currentValue); | |
SET @y1 = @y1 + 1; | |
END WHILE; | |
END $$ | |
DELIMITER ; | |
CALL cleanMixedUpColumn(); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment