nixos/lua-lsp/script/encoder/init.lua

124 lines
3.1 KiB
Lua
Raw Blame History

local ansi = require 'encoder.ansi'
local utf16 = require 'encoder.utf16'
local utf16le = utf16('le', utf8.codepoint '<EFBFBD>')
local utf16be = utf16('be', utf8.codepoint '<EFBFBD>')
---@alias encoder.encoding '"utf8"'|'"utf16"'|'"utf16le"'|'"utf16be"'
---@alias encoder.bom '"no"'|'"yes"'|'"auto"'
local m = {}
---@param encoding encoder.encoding
---@param s string
---@param i? integer
---@param j? integer
function m.len(encoding, s, i, j)
i = i or 1
j = j or #s
if encoding == 'utf16'
or encoding == 'utf16' then
local us = utf16le.fromutf8(s:sub(i, j))
return #us // 2
end
if encoding == 'utf16be' then
local us = utf16be.fromutf8(s:sub(i, j))
return #us // 2
end
if encoding == 'utf8' then
return utf8.len(s, i, j, true)
end
log.error('Unsupport len encoding:', encoding)
return j - i + 1
end
---@param encoding encoder.encoding
---@param s string
---@param n integer
---@param i? integer
function m.offset(encoding, s, n, i)
i = i or 1
if encoding == 'utf16'
or encoding == 'utf16le' then
local line = s:match('[^\r\n]*', i)
if not line:find '[\x80-\xff]' then
return n + i - 1
end
local us = utf16le.fromutf8(line)
local os = utf16le.toutf8(us:sub(1, n * 2 - 2))
return #os + i
end
if encoding == 'utf16be' then
local line = s:match('[^\r\n]*', i)
if not line:find '[\x80-\xff]' then
return n + i - 1
end
local us = utf16be.fromutf8(line)
local os = utf16be.toutf8(us:sub(1, n * 2 - 2))
return #os + i
end
if encoding == 'utf8' then
return utf8.offset(s, n, i)
end
log.error('Unsupport offset encoding:', encoding)
return n + i - 1
end
---@param encoding encoder.encoding
---@param text string
---@param bom encoder.bom
---@return string
function m.encode(encoding, text, bom)
if encoding == 'utf8' then
if bom == 'yes' then
text = '\xEF\xBB\xBF' .. text
end
return text
end
if encoding == 'ansi' then
return ansi.fromutf8(text)
end
if encoding == 'utf16'
or encoding == 'utf16le' then
text = utf16le.fromutf8(text)
if bom == 'yes'
or bom == 'auto' then
text = '\xFF\xFE' .. text
end
return text
end
if encoding == 'utf16be' then
text = utf16be.fromutf8(text)
if bom == 'yes'
or bom == 'auto' then
text = '\xFE\xFF' .. text
end
return text
end
log.error('Unsupport encode encoding:', encoding)
return text
end
---@param encoding encoder.encoding
---@param text string
---@return string
function m.decode(encoding, text)
if encoding == 'utf8' then
return text
end
if encoding == 'ansi' then
return ansi.toutf8(text)
end
if encoding == 'utf16'
or encoding == 'utf16le' then
return utf16le.toutf8(text)
end
if encoding == 'utf16be' then
return utf16be.toutf8(text)
end
log.error('Unsupport encode encoding:', encoding)
return text
end
return m