출처1 : https://www.simple-talk.com/sql/t-sql-programming/tsql-regular-expression-workbench/
출처2 : http://dbxo.egloos.com/2502132
MSSQL Server에서는 Where절의 like부분에 정규식을 사용할 수 있다.
하지만 조회된 데이터에서는 정규식이 불가능하며,
가끔 replace문을 여러번 중첩해야 할 때는 더욱 더 정규식이 필요하다고 느낀다.
그래서 찾다보니 정규식을 함수로 만들어서 사용하는 사람들이 있더라.
우선 Ole Automation Procedures를 활성화 해야한다.
SELECT *
FROM sys.configurations
WHERE NAME = 'Ole Automation Procedures'
--value:0, value_in_use:0
--show advanced options 값이 1인 상태에서만 옵션 수정 가능
sp_configure 'show advanced options', 1;
RECONFIGURE;
GO
--활성화
sp_configure 'Ole Automation Procedures',1;
RECONFIGURE;
GO
--구성 옵션 값 재확인
SELECT *
FROM sys.configurations
WHERE NAME = 'xp_cmdshell'
--value:1, value_in_use:1
--show advanced options 값 원복
sp_configure 'show advanced options', 0;
RECONFIGURE;
GO
그리고 아래 두 개의 함수를 DB에 생성한다.
IF OBJECT_ID (N'dbo.RegexMatch') IS NOT NULL
DROP FUNCTION dbo.RegexMatch
GO
CREATE FUNCTION dbo.RegexMatch
(
@pattern VARCHAR(2000),
@matchstring VARCHAR(MAX)--Varchar(8000) got SQL Server 2000
)
RETURNS INT
AS BEGIN
DECLARE @objRegexExp INT,
@objErrorObject INT,
@strErrorMessage VARCHAR(255),
@hr INT,
@match BIT
SELECT @strErrorMessage = 'creating a regex object'
EXEC @hr= sp_OACreate 'VBScript.RegExp', @objRegexExp OUT
IF @hr = 0
EXEC @hr= sp_OASetProperty @objRegexExp, 'Pattern', @pattern
--Specifying a case-insensitive match
IF @hr = 0
EXEC @hr= sp_OASetProperty @objRegexExp, 'IgnoreCase', 1
--Doing a Test'
IF @hr = 0
EXEC @hr= sp_OAMethod @objRegexExp, 'Test', @match OUT, @matchstring
IF @hr <> 0
BEGIN
RETURN NULL
END
EXEC sp_OADestroy @objRegexExp
RETURN @match
END
GO
-- Test
SELECT dbo.RegexMatch('\b(\w+)\s+\1\b','this has has been repeated')--1
SELECT dbo.RegexMatch('\b(\w+)\s+\1\b','this has not been repeated')--0
SELECT dbo.RegexMatch('\bfor(?:\W+\w+){1,2}?\W+last\b',
'You have failed me for the last time, Admiral')--1
SELECT dbo.RegexMatch('\bfor(?:\W+\w+){1,2}?\W+last\b',
'You have failed me for what could be the last time, Admiral')--0
IF OBJECT_ID(N'dbo.RegexReplace') IS NOT NULL
DROP FUNCTION dbo.RegexReplace
GO
CREATE FUNCTION dbo.RegexReplace
(
@pattern VARCHAR(255),
@replacement VARCHAR(255),
@Subject VARCHAR(MAX),
@global BIT = 1,
@Multiline bit =1
)
RETURNS VARCHAR(MAX)
AS BEGIN
DECLARE @objRegexExp INT,
@objErrorObject INT,
@strErrorMessage VARCHAR(255),
@Substituted VARCHAR(8000),
@hr INT,
@Replace BIT
SELECT @strErrorMessage = 'creating a regex object'
EXEC @hr= sp_OACreate 'VBScript.RegExp', @objRegexExp OUT
IF @hr = 0
SELECT @strErrorMessage = 'Setting the Regex pattern',
@objErrorObject = @objRegexExp
IF @hr = 0
EXEC @hr= sp_OASetProperty @objRegexExp, 'Pattern', @pattern
IF @hr = 0 /*By default, the regular expression is case sensitive. Set the IgnoreCase property to True to make it case insensitive.*/
SELECT @strErrorMessage = 'Specifying the type of match'
IF @hr = 0
EXEC @hr= sp_OASetProperty @objRegexExp, 'IgnoreCase', 1
IF @hr = 0
EXEC @hr= sp_OASetProperty @objRegexExp, 'MultiLine', @Multiline
IF @hr = 0
EXEC @hr= sp_OASetProperty @objRegexExp, 'Global', @global
IF @hr = 0
SELECT @strErrorMessage = 'Doing a Replacement'
IF @hr = 0
EXEC @hr= sp_OAMethod @objRegexExp, 'Replace', @Substituted OUT,
@subject, @Replacement
/*If the RegExp.Global property is False (the default), Replace will return the @subject string with the first regex match (if any) substituted with the replacement text. If RegExp.Global is true, the @Subject string will be returned with all matches replaced.*/
IF @hr <> 0
BEGIN
DECLARE @Source VARCHAR(255),
@Description VARCHAR(255),
@Helpfile VARCHAR(255),
@HelpID INT
EXECUTE sp_OAGetErrorInfo @objErrorObject, @source OUTPUT,
@Description OUTPUT, @Helpfile OUTPUT, @HelpID OUTPUT
SELECT @strErrorMessage = 'Error whilst '
+ COALESCE(@strErrorMessage, 'doing something') + ', '
+ COALESCE(@Description, '')
RETURN @strErrorMessage
END
EXEC sp_OADestroy @objRegexExp
RETURN @Substituted
END
GO
-- Test
-- remove repeated words in text
SELECT dbo.RegexReplace('\b(\w+)(?:\s+\1\b)+', '$1',
'Sometimes I cant help help help stuttering',1, 1)
-- find a #comment and add a TSQL --
SELECT dbo.RegexReplace('#.*','--$&','
# this is a comment
first,second,third,fourth',1,1)
-- replace a url with an HTML anchor
SELECT dbo.RegexReplace(
'\b(https?|ftp|file)://([-A-Z0-9+&@#/%?=~_|!:,.;]*[-A-Z0-9+&@#/%=~_|])',
'<a href="$2">$2</a>',
'There is this amazing site at http://www.simple-talk.com',1,1)
-- strip all HTML elements out of a string
SELECT dbo.RegexReplace('<(?:[^>''"]*|([''"]).*?\1)*>',
'','<a href="http://www.simple-talk.com">Simle Talk is wonderful</a><!--This is a comment --> we all love it',1,1)
-- import delimited text into a database, converting it into insert statements
SELECT dbo.RegexReplace(
'([^\|\r\n]+)[|\r\n]+([^\|\r\n]+)[|\r\n]+([^\|\r\n]+)[|\r\n]+([^\|\r\n]+)[|\r\n]+',
'Insert into MyTable (Firstcol,SecondCol, ThirdCol, Fourthcol)
select $1,$2,$3,$4
','1|white gloves|2435|24565
2|Sports Shoes|285678|0987
3|Stumps|2845|987
4|bat|29862|4875',1,1)
중간 중간 Test 하는 부분이 있는데, 테스트가 되면 잘 된 것이다.