Sqlserver
 sql >> Teknologi Basis Data >  >> RDS >> Sqlserver

Algoritma T-SQL untuk Mengkodekan Karakter HTML yang Tidak Aman sebagai Referensi Entitas Karakter HTML

Mengenkode lima karakter khusus dengan CTE rekursif:

DECLARE 
  @unsafe NVARCHAR(MAX),
  @safe   NVARCHAR(MAX) 

--
-- Create the unsafe html string
-- 
SET @unsafe = N'html''s encoding "method" is <= or >= & 1234 ' + NCHAR(129) 
--
-- Use a recursive CTE to iterate through each character in the string
-- 
;WITH cte AS 
(
  -- 
  -- The first row will contain the original
  -- string, an empty string to be used to 
  -- build the "safe" string, and a position
  -- column to mark the character position
  -- of the loop
  -- 
  SELECT 
    @unsafe AS unsafe_html,
    CONVERT(NVARCHAR(MAX), '') AS safe_html,
    1 AS pos
  WHERE @unsafe IS NOT NULL AND LEN(@unsafe) > 0 
  UNION ALL
  -- 
  -- Create a loop: 
  -- The anchor row starts at position one.
  -- Increment the position by one for each pass.
  -- Stop when the position value is equal to the string lenth.
  -- Evaluate the character in each string
  -- If the ASCII value > 128, use the &# format.
  -- Otherwise, check for 5 special characters: " & ' < >
  -- Use the encoding reference or just the original character
  --
  SELECT 
    @unsafe AS unsafe_html,
    CONVERT(NVARCHAR(MAX), safe_html + 
    CASE WHEN UNICODE(SUBSTRING(unsafe_html, pos, 1)) > 128 
         THEN '&#' + CONVERT(NVARCHAR(10), UNICODE(SUBSTRING(unsafe_html, pos, 1)))  
         ELSE CASE SUBSTRING(unsafe_html, pos, 1)
              WHEN '"'  THEN '&quot' 
              WHEN '&'  THEN '&amp'
              WHEN '''' THEN '&apos'
              WHEN '<'  THEN '&lt'
              WHEN '>'  THEN '&gt'
              ELSE SUBSTRING(unsafe_html, pos, 1)
              END 
         END ) AS safe_html,
    pos + 1 AS pos
  FROM cte
  WHERE pos <= LEN(@unsafe)
) 
--
-- Each pass through the string creates a row in the CTE
-- The last row will have the position value of the string length + 1
-- Use that row as the safe html string
-- SQL Server allows a max recursion of 32767
-- 
SELECT @safe = (
  SELECT safe_html 
  FROM cte
  WHERE pos = LEN(@unsafe) + 1
) 
OPTION (MAXRECURSION 32767) 

SELECT @safe

-- html&aposs encoding &quotmethod&quot is &lt= or &gt= &amp 1234 &#129

Versi awal:

DECLARE @s NVARCHAR(100)

SET @s = '<html>unsafe & safe<html>'
SELECT @s 
SELECT (SELECT @s FOR XML PATH(''))

---------------------------------------
<html>unsafe & safe<html>

-----------------------------------------
&lt;html&gt;unsafe &amp; safe&lt;html&gt;

Encoding lengkap dengan semua referensi resmi:

DECLARE 
    @unsafe  NVARCHAR(MAX),
    @safe NVARCHAR(MAX) 

-- Build string with first 10,000 unicode chars
SELECT @unsafe = COALESCE(@unsafe, '') + NCHAR(number) + ' ' 
FROM (
    SELECT TOP 10000 ROW_NUMBER() OVER (ORDER BY (SELECT 0)) AS number
    FROM sys.all_objects s1 CROSS JOIN sys.all_objects s2 
) t

-- Build table variable with character entity references defined in HTML 4.0
-- Reference: http://www.htmlcodetutorial.com/characterentities_famsupp_69.html
DECLARE @t TABLE (
    name NVARCHAR(25) NOT NULL, 
    unicode_val INT NOT NULL PRIMARY KEY 
) 

INSERT @t 
VALUES
('&quot', 34),
('&amp', 38),
('&apos', 39),
('&lt', 60),
('&gt', 62),
('&nbsp', 160),
('&iexcl', 161),
('&cent', 162),
('&pound', 163),
('&curren', 164),
('&yen', 165),
('&brvbar', 166),
('&sect', 167),
('&uml', 168),
('&copy', 169),
('&ordf', 170),
('&laquo', 171),
('&not', 172),
('&shy', 173),
('&reg', 174),
('&macr', 175),
('&deg', 176),
('&plusmn', 177),
('&sup2', 178),
('&sup3', 179),
('&acute', 180),
('&micro', 181),
('&para', 182),
('&middot', 183),
('&cedil', 184),
('&sup1', 185),
('&ordm', 186),
('&raquo', 187),
('&frac14', 188),
('&frac12', 189),
('&frac34', 190),
('&iquest', 191),
('&Agrave', 192),
('&Aacute', 193),
('&Acirc', 194),
('&Atilde', 195),
('&Auml', 196),
('&Aring', 197),
('&AElig', 198),
('&Ccedil', 199),
('&Egrave', 200),
('&Eacute', 201),
('&Ecirc', 202),
('&Euml', 203),
('&Igrave', 204),
('&Iacute', 205),
('&Icirc', 206),
('&Iuml', 207),
('&ETH', 208),
('&Ntilde', 209),
('&Ograve', 210),
('&Oacute', 211),
('&Ocirc', 212),
('&Otilde', 213),
('&Ouml', 214),
('&times', 215),
('&Oslash', 216),
('&Ugrave', 217),
('&Uacute', 218),
('&Ucirc', 219),
('&Uuml', 220),
('&Yacute', 221),
('&THORN', 222),
('&szlig', 223),
('&agrave', 224),
('&aacute', 225),
('&acirc', 226),
('&atilde', 227),
('&auml', 228),
('&aring', 229),
('&aelig', 230),
('&ccedil', 231),
('&egrave', 232),
('&eacute', 233),
('&ecirc', 234),
('&euml', 235),
('&igrave', 236),
('&iacute', 237),
('&icirc', 238),
('&iuml', 239),
('&eth', 240),
('&ntilde', 241),
('&ograve', 242),
('&oacute', 243),
('&ocirc', 244),
('&otilde', 245),
('&ouml', 246),
('&divide', 247),
('&oslash', 248),
('&ugrave', 249),
('&uacute', 250),
('&ucirc', 251),
('&uuml', 252),
('&yacute', 253),
('&thorn', 254),
('&yuml', 255),
('&OElig', 338),
('&oelig', 339),
('&Scaron', 352),
('&scaron', 353),
('&Yuml', 376),
('&fnof', 402),
('&circ', 710),
('&tilde', 732),
('&Alpha', 913),
('&Beta', 914),
('&Gamma', 915),
('&Delta', 916),
('&Epsilon', 917),
('&Zeta', 918),
('&Eta', 919),
('&Theta', 920),
('&Iota', 921),
('&Kappa', 922),
('&Lambda', 923),
('&Mu', 924),
('&Nu', 925),
('&Xi', 926),
('&Omicron', 927),
('&Pi', 928),
('&Rho', 929),
('&Sigma', 931),
('&Tau', 932),
('&Upsilon', 933),
('&Phi', 934),
('&Chi', 935),
('&Psi', 936),
('&Omega', 937),
('&alpha', 945),
('&beta', 946),
('&gamma', 947),
('&delta', 948),
('&epsilon', 949),
('&zeta', 950),
('&eta', 951),
('&theta', 952),
('&iota', 953),
('&kappa', 954),
('&lambda', 955),
('&mu', 956),
('&nu', 957),
('&xi', 958),
('&omicron', 959),
('&pi', 960),
('&rho', 961),
('&sigmaf', 962),
('&sigma', 963),
('&tau', 964),
('&upsilon', 965),
('&phi', 966),
('&chi', 967),
('&psi', 968),
('&omega', 969),
('&thetasym', 977),
('&upsih', 978),
('&piv', 982),
('&ensp', 8194),
('&emsp', 8195),
('&thinsp', 8201),
('&zwnj', 8204),
('&zwj', 8205),
('&lrm', 8206),
('&rlm', 8207),
('&ndash', 8211),
('&mdash', 8212),
('&lsquo', 8216),
('&rsquo', 8217),
('&sbquo', 8218),
('&ldquo', 8220),
('&rdquo', 8221),
('&bdquo', 8222),
('&dagger', 8224),
('&Dagger', 8225),
('&bull', 8226),
('&hellip', 8230),
('&permil', 8240),
('&prime', 8242),
('&Prime', 8243),
('&lsaquo', 8249),
('&rsaquo', 8250),
('&oline', 8254),
('&frasl', 8260),
('&euro', 8364),
('&image', 8465),
('&weierp', 8472),
('&real', 8476),
('&trade', 8482),
('&alefsym', 8501),
('&larr', 8592),
('&uarr', 8593),
('&rarr', 8594),
('&darr', 8595),
('&harr', 8596),
('&crarr', 8629),
('&lArr', 8656),
('&uArr', 8657),
('&rArr', 8658),
('&dArr', 8659),
('&hArr', 8660),
('&forall', 8704),
('&part', 8706),
('&exist', 8707),
('&empty', 8709),
('&nabla', 8711),
('&isin', 8712),
('&notin', 8713),
('&ni', 8715),
('&prod', 8719),
('&sum', 8721),
('&minus', 8722),
('&lowast', 8727),
('&radic', 8730),
('&prop', 8733),
('&infin', 8734),
('&ang', 8736),
('&and', 8743),
('&or', 8744),
('&cap', 8745),
('&cup', 8746),
('&int', 8747),
('&there4', 8756),
('&sim', 8764),
('&cong', 8773),
('&asymp', 8776),
('&ne', 8800),
('&equiv', 8801),
('&le', 8804),
('&ge', 8805),
('&sub', 8834),
('&sup', 8835),
('&nsub', 8836),
('&sube', 8838),
('&supe', 8839),
('&oplus', 8853),
('&otimes', 8855),
('&perp', 8869),
('&sdot', 8901),
('&lceil', 8968),
('&rceil', 8969),
('&lfloor', 8970),
('&rfloor', 8971),
('&lang', 9001),
('&rang', 9002),
('&loz', 9674),
('&spades', 9824),
('&clubs', 9827),
('&hearts', 9829),
('&diams', 9830)

-- Build numbers table to parse the string
DECLARE @numbers TABLE (number INT NOT NULL PRIMARY KEY) 
INSERT @numbers
SELECT TOP (LEN(@unsafe)) ROW_NUMBER() OVER (ORDER BY (SELECT 0)) AS number
FROM sys.all_objects s1 CROSS JOIN sys.all_objects s2

-- Use numbers table to parse each character.
-- If a match is found in character entity reference table,
-- then use the safe substitute. Otherwise, if the unicode
-- value is greater than 128, use &#<unicode char value>.
-- Finally, use the original character if nothing else
-- is a match
SELECT @safe = COALESCE(@safe,'') 
       + COALESCE(name, 
         CASE WHEN UNICODE(SUBSTRING(@unsafe, number, 1)) > 128 THEN '&#' 
          + CONVERT(NVARCHAR(10), UNICODE(SUBSTRING(@unsafe, number, 1))) 
          ELSE SUBSTRING(@unsafe, number, 1) END)
FROM @numbers 
LEFT OUTER JOIN @t  
  ON UNICODE(SUBSTRING(@unsafe, number, 1)) = unicode_val

SELECT @safe AS [safe]

Results:  
! &quot # $ % &amp &apos ( ) * + , - . / 0 1 2 3 4 5 6 7 8 9 : ; 
&lt = &gt ? @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z 
[ \ ] ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { 
| } ~   &#129 &#130 &#131 &#132 &#133 &#134 &#135 &#136 &#137 &#138 
&#139 &#140 &#141 &#142 &#143 &#144 &#145 &#146 &#147 &#148 &#149 
&#150 &#151 &#152 &#153 &#154 &#155 &#156 &#157 &#158 &#159 &nbsp 
&iexcl &cent &pound &curren &yen &brvbar &sect &uml &copy &ordf 
&laquo &not &shy &reg &macr &deg &plusmn &sup2 &sup3 &acute &micro 
&para &middot &cedil &sup1 &ordm &raquo &frac14 &frac12 &frac34 
&iquest &Agrave &Aacute &Acirc &Atilde &Auml &Aring &AElig &Ccedil 
&Egrave &Eacute &Ecirc &Euml &Igrave &Iacute &Icirc &Iuml &ETH &Ntilde 
&Ograve &Oacute &Ocirc &Otilde &Ouml &times &Oslash &Ugrave &Uacute 
&Ucirc &Uuml &Yacute &THORN &szlig &agrave &aacute &acirc &atilde &auml 
&aring &aelig &ccedil &egrave &eacute &ecirc &euml &igrave &iacute &icirc 
&iuml &eth &ntilde &ograve &oacute &ocirc &otilde &ouml &divide &oslash 
&ugrave &uacute &ucirc &uuml &yacute &thorn &yuml &#256 &#257 &#258 &#259 
&#260 &#261 &#262 &#263 &#264 &#265 &#266...


  1. Database
  2.   
  3. Mysql
  4.   
  5. Oracle
  6.   
  7. Sqlserver
  8.   
  9. PostgreSQL
  10.   
  11. Access
  12.   
  13. SQLite
  14.   
  15. MariaDB
  1. Temukan Entitas Referensi di SQL Server:sys.dm_sql_referencing_entities()

  2. Menghapus baris duplikat (berdasarkan nilai dari beberapa kolom) dari tabel SQL

  3. Simulasi CONNECT SEBELUMNYA dari Oracle di SQL Server

  4. LEFT JOIN vs. LEFT OUTER JOIN di SQL Server

  5. Jalankan kueri pada Layanan Analisis SQL Server dengan IronPython