XML, UTF-8 and SQL Server

DECLARE @i int
DECLARE @XML varchar(300)
DECLARE @nXML nvarchar(300)
DECLARE @XML_ENCODING varchar(50)
DECLARE @XML_BODY nvarchar(250)
SET @XML_BODY = N'<X>
 <TEST text="Found in XML ®™Âï"/>
 <TEST text="Symbols ®™"/>
 <TEST text="French àâäèéêëîïôœùûüÇç€"/>
 <TEST text="Spanish ÁÉÍÓÚÑÜáñ¿¡€"/>
 <TEST text="German ÄäÖöÜü߀"/>
 <TEST text="CYRILLIC ЛЙЖДФЦЭЮЯйфчьыъщ"/>
</X>'

Now let’s run some SQL specifying different encodings and see what happens. The first encoding we’ll try is Windows-1252. If you are unfamiliar with encodings then your default encoding is likely set to Windows-1252, which contains characters for Western European languages.

PRINT 'Windows-1252'
SET @XML_ENCODING = '<?xml version="1.0" encoding="Windows-1252"?>'
SET @XML = @XML_ENCODING + @XML_BODY
EXEC sp_xml_preparedocument @i OUTPUT, @XML
SELECT    *
FROM       OPENXML (@i, '/X/TEST',1)
 WITH ([text] nvarchar(80))
EXEC sp_xml_removedocument @i

Windows-1252
text
--------------------------------------------------------------------------------
Found in XML ®™Âï
Symbols ®™
French àâäèéêëîïôœùûüÇç€
Spanish ÁÉÍÓÚÑÜáñ¿¡€
German ÄäÖöÜü߀
CYRILLIC ????????????????
PRINT 'Windows-1251'
SET @XML_ENCODING = '<?xml version="1.0" encoding="Windows-1251"?>'
SET @XML = @XML_ENCODING + @XML_BODY
EXEC sp_xml_preparedocument @i OUTPUT, @XML
SELECT    *
FROM       OPENXML (@i, '/X/TEST',1)
 WITH ([text] nvarchar(80))
EXEC sp_xml_removedocument @i

Windows-1251
text
--------------------------------------------------------------------------------
Found in XML ®™Вп
Symbols ®™
French авдийклопфњщыьЗзЂ
Spanish БЙНУЪСЬбсїЎЂ
German ДдЦцЬьЯЂ
CYRILLIC ????????????????
PRINT 'UTF-16'
SET @XML_ENCODING = '<?xml version="1.0" encoding="UTF-16"?>'
SET @nXML = @XML_ENCODING + @XML_BODY
EXEC sp_xml_preparedocument @i OUTPUT, @nXML
SELECT    *
FROM       OPENXML (@i, '/X/TEST',1)
 WITH ([text] nvarchar(80))
EXEC sp_xml_removedocument @i

UTF-16
text
--------------------------------------------------------------------------------
Found in XML ®™Âï
Symbols ®™
French àâäèéêëîïôœùûüÇç€
Spanish ÁÉÍÓÚÑÜáñ¿¡€
German ÄäÖöÜü߀
CYRILLIC ЛЙЖДФЦЭЮЯйфчьыъщ
Advertisements
This entry was posted in Uncategorized. Bookmark the permalink.

Leave a Reply

Fill in your details below or click an icon to log in:

WordPress.com Logo

You are commenting using your WordPress.com account. Log Out / Change )

Twitter picture

You are commenting using your Twitter account. Log Out / Change )

Facebook photo

You are commenting using your Facebook account. Log Out / Change )

Google+ photo

You are commenting using your Google+ account. Log Out / Change )

Connecting to %s