lua如何截取中英文混合字符串
1个回答
展开全部
lua在utf8下一个中文字长度为3,这样在中英文混排时截取字符串就比较麻烦,下面的函数是中文字长度为1下的处理
-- 获取utf8编码字符串长度,中文长度为1
function utfstrlen(str)
local len = #str;
local left = len;
local cnt = 0;
local arr={0,0xc0,0xe0,0xf0,0xf8,0xfc};
while left ~= 0 do
local tmp=string.byte(str,-left);
local i=#arr;
while arr[i] do
if tmp>=arr[i] then left=left-i;break;end
i=i-1;
end
cnt=cnt+1;
end
return cnt;
end
--截取中英文混合字符串
--参数
-- string str 原始字符串
-- number start 起始位置,注意中文长度为1
-- number len 截取长度
--返回值
-- string 截取后的字符串
--备注
-- 1)中文UTF8默认占3个字节,可能对于一些占2个或4个字节的中文处理有问题
-- 2)回车\n等特殊控制字符也算一个长度
function subUTF8String(str, start, len)
local firstResult = ""
local strResult = ""
local maxLen = string.len(str)
start = start - 1
--找到起始位置
local preSite = 1
if start > 0 then
for i = 1, maxLen do
local s_dropping = string.byte(str, i)
if not s_dropping then
local s_str = string.sub(str, preSite, i - 1)
preSite = i + 1
break
end
if s_dropping < 128 or (i + 1 - preSite) == 3 then
local s_str = string.sub(str, preSite, i)
preSite = i + 1
firstResult = firstResult..s_str
local curLen = utfstrlen(firstResult)
if (curLen == start) then
break
end
end
end
end
--截取字符串
preSite = string.len(firstResult) + 1
local startC = preSite
for i = startC, maxLen do
local s_dropping = string.byte(str, i)
if not s_dropping then
local s_str = string.sub(str, preSite, i - 1)
preSite = i
strResult = strResult..s_str
return strResult
end
if s_dropping < 128 or (i + 1 - preSite) == 3 then
local s_str = string.sub(str, preSite, i)
preSite = i + 1
strResult = strResult..s_str
local curLen = utfstrlen(strResult)
if (curLen == len) then
return strResult
end
end
end
return strResult
end
例如:
print(subUTF8String("我爱死你们", 2, 3))
print(subUTF8String("abcde", 2, 3))
print(subUTF8String("我11爱死你们", 2, 3))
print(subUTF8String("我1", 2, 3))
print(subUTF8String("我日1爱死你们", 2, 3))
的结果是
[LUA-print] 爱死你
[LUA-print] bcd
[LUA-print] 11爱
[LUA-print] 1
[LUA-print] 日1爱
-- 获取utf8编码字符串长度,中文长度为1
function utfstrlen(str)
local len = #str;
local left = len;
local cnt = 0;
local arr={0,0xc0,0xe0,0xf0,0xf8,0xfc};
while left ~= 0 do
local tmp=string.byte(str,-left);
local i=#arr;
while arr[i] do
if tmp>=arr[i] then left=left-i;break;end
i=i-1;
end
cnt=cnt+1;
end
return cnt;
end
--截取中英文混合字符串
--参数
-- string str 原始字符串
-- number start 起始位置,注意中文长度为1
-- number len 截取长度
--返回值
-- string 截取后的字符串
--备注
-- 1)中文UTF8默认占3个字节,可能对于一些占2个或4个字节的中文处理有问题
-- 2)回车\n等特殊控制字符也算一个长度
function subUTF8String(str, start, len)
local firstResult = ""
local strResult = ""
local maxLen = string.len(str)
start = start - 1
--找到起始位置
local preSite = 1
if start > 0 then
for i = 1, maxLen do
local s_dropping = string.byte(str, i)
if not s_dropping then
local s_str = string.sub(str, preSite, i - 1)
preSite = i + 1
break
end
if s_dropping < 128 or (i + 1 - preSite) == 3 then
local s_str = string.sub(str, preSite, i)
preSite = i + 1
firstResult = firstResult..s_str
local curLen = utfstrlen(firstResult)
if (curLen == start) then
break
end
end
end
end
--截取字符串
preSite = string.len(firstResult) + 1
local startC = preSite
for i = startC, maxLen do
local s_dropping = string.byte(str, i)
if not s_dropping then
local s_str = string.sub(str, preSite, i - 1)
preSite = i
strResult = strResult..s_str
return strResult
end
if s_dropping < 128 or (i + 1 - preSite) == 3 then
local s_str = string.sub(str, preSite, i)
preSite = i + 1
strResult = strResult..s_str
local curLen = utfstrlen(strResult)
if (curLen == len) then
return strResult
end
end
end
return strResult
end
例如:
print(subUTF8String("我爱死你们", 2, 3))
print(subUTF8String("abcde", 2, 3))
print(subUTF8String("我11爱死你们", 2, 3))
print(subUTF8String("我1", 2, 3))
print(subUTF8String("我日1爱死你们", 2, 3))
的结果是
[LUA-print] 爱死你
[LUA-print] bcd
[LUA-print] 11爱
[LUA-print] 1
[LUA-print] 日1爱
推荐律师服务:
若未解决您的问题,请您详细描述您的问题,通过百度律临进行免费专业咨询