function cp2hex() {
local codepoint="${1#U}"
local -i bytes=0
codepoint=${codepoint^^}
local binary=$(echo "ibase=16;obase=2;${codepoint}"|bc)
local -a binary_arr
if ((0x${codepoint} <= 0x007F)); then
bytes=1
binary=$(printf "%08d" $binary)
read -d=' ' -a binary_arr < <(sed 's/./ &/g' <<< "$binary")
elif ((0x${codepoint} <= 0x07FF)); then
bytes=2
binary=$(printf "%011d" $binary)
read -d=' ' -a binary_arr < <(sed 's/./ &/g' <<< "$binary")
binary_arr[0]="110${binary_arr[0]}"
binary_arr[5]="10${binary_arr[5]}"
elif ((0x${codepoint} <= 0xFFFF)); then
bytes=3
binary=$(printf "%016d" $binary)
read -d=' ' -a binary_arr < <(sed 's/./ &/g' <<< "$binary")
binary_arr[0]="1110${binary_arr[0]}"
binary_arr[4]="10${binary_arr[4]}"
binary_arr[10]="10${binary_arr[10]}"
elif ((0x${codepoint} <= 0x10FFFF)); then
bytes=4
binary=$(printf "%021d" $binary)
read -d=' ' -a binary_arr < <(sed 's/./ &/g' <<< "$binary")
binary_arr[0]="11110${binary_arr[0]}"
binary_arr[3]="10${binary_arr[3]}"
binary_arr[9]="10${binary_arr[9]}"
binary_arr[15]="10${binary_arr[15]}"
fi
printf "${bytes} Byte unicode: \U${codepoint}\n"
echo "Codepoint: U${codepoint}"
echo "Codepoint binary: $binary"
binary=$(sed 's/ //g' <<< "${binary_arr[@]}")
echo "UTF-8 binary: ${binary}"
echo "UTF-8 hex: $(printf "%X" $((2#${binary})))"
}
$>cp2hex U0024
1 Byte unicode: $
Codepoint: U0024
Codepoint binary: 00100100
UTF-8 binary: 00100100
UTF-8 hex: 24
$>cp2hex U00A2
2 Byte unicode: ¢
Codepoint: U00A2
Codepoint binary: 00010100010
UTF-8 binary: 1100001010100010
UTF-8 hex: C2A2
$>cp2hex 20AC
3 Byte unicode: €
Codepoint: U20AC
Codepoint binary: 0010000010101100
UTF-8 binary: 111000101000001010101100
UTF-8 hex: E282AC
$>cp2hex U1F604
4 Byte unicode: 😄
Codepoint: U1F604
Codepoint binary: 000011111011000000100
UTF-8 binary: 11110000100111111001100010000100
UTF-8 hex: F09F9884
No comments:
Post a Comment