'------------------------------
' INPUT: UTF-8 string
'------------------------------
' OUTPUT: ERROR UTF2UNICODE < 0
' ---------------------
' OK UTF2UNICODE => 0 AND UTF2UNICODE =< &H10FFFF
' Recoginsed unicode character is removed from the begining of argument. This can be turned off (see at the bottom).
'------------------------------
UTF2UNICODE& = -1 'Invalid argument
result = -2 'Unspecified error
chlen = 1
hb
= ASC(txt$
) 'head-byte only
IF (hb
AND &B10000000
) = 0 THEN ' ? 0xxx xxxx TRUE=byte is ASCII character result = hb
IF (hb
AND &B11100000
) = &B11000000
THEN ' ? 110x xxxx TRUE=byte is 1st of two bytes 'head-byte + data-byte
' 110xxxxx 10yyyyyy
'---------------------
chlen = 2
'result = (hb AND &B00011111) * &B01000000
result
= (hb
AND &H1F) * &H40 ' head-byte shifted left 6 places result | 0000 0000 0000 0000 0000 0xxx xx00 0000 | db
= ASC(MID$(txt$
, 2, 1)) ' data-byte 'result = result OR (db AND &B00111111)
result
= result
OR (db
AND &H3F) ' data-byte copied result | 0000 0000 0000 0000 0000 0xxx xxyy yyyy | IF (hb
AND &B11110000
) = &B11100000
THEN ' ? 1110 xxxx TRUE=byte is 1st of 3 bytes 'head-byte + data-byte1 + data-byte2
' 1110xxxx 10yyyyyy 10zzzzzz
'-----------------------------------
chlen = 3
'result = (hb AND &B00001111) * &B 0001 0000 0000 0000
result
= (hb
AND &HF) * &H1000 ' head-byte shifted left 12 places result | 0000 0000 0000 0000 xxxx 0000 0000 0000 | db
= ASC(MID$(txt$
, 2, 1)) ' data-byte1 result
= result
OR ((db
AND &H3F) * &H40) ' data-byte1 shifted left 6 places result | 0000 0000 0000 0000 xxxx yyyy yy00 0000 | db
= ASC(MID$(txt$
, 3, 1)) ' data-byte2 result
= result
OR (db
AND &H3F) ' data-byte2 copied result | 0000 0000 0000 0000 xxxx yyyy yyzz zzzz | IF (hb
AND &B11111000
) = &B11110000
THEN ' ? 1111 0xxx TRUE=byte is 1st of 4 'head-byte + data-byte1 + data-byte2 + data-byte3
' 11110xxx 10yyyyyy 10zzzzzz 10wwwwww
'------------------------------------------------
chlen = 4
'result = (hb AND &B00000111) * &B 0000 0100 0000 0000 0000 0000
result
= (hb
AND &H6) * &H400000 ' head-byte shifted left 18 places result | 0000 0000 000x xx00 0000 0000 0000 0000 | db
= ASC(MID$(txt$
, 2, 1)) ' data-byte1 result
= result
OR ((db
AND &H3F) * &H1000) ' data-byte1 shifted left 12 places result | 0000 0000 000x xxyy yyyy 0000 0000 0000 | db
= ASC(MID$(txt$
, 3, 1)) ' data-byte2 result
= result
OR ((db
AND &H3F) * &H40) ' data-byte2 shifted left 6 places result | 0000 0000 000x xxyy yyyy zzzz zz00 0000 | db
= ASC(MID$(txt$
, 4, 1)) ' data-byte3 result
= result
OR (db
AND &H3F) ' data-byte3 copied result | 0000 0000 000x xxyy yyyy zzzz zzww wwww | 'Not a head-byte.
result = hb
IF chlen
< LEN(txt$
) THEN txt$
= MID$(txt$
, chlen
+ 1) ELSE txt$
= "" ' By commenting this line, function will leave string-argument unchanged. UTF2UNICODE& = result