Module:BaseConvert
La documentation de ce module est générée par le modèle {{Documentation module}}.
Les éditeurs peuvent travailler dans le bac à sable (créer).
Voir les statistiques d'appel depuis le wikicode sur l'outil wstat et les appels depuis d'autres modules.
-- Convert to a string and normalize to uppercase ASCII.
function _normalizeNumber(s)
s = '' .. s -- convert to string
-- Check if there are non-ASCII characters
if not s:find('[\128-\255]') then -- string is ASCII only (fast path for lower memory use)
-- Remove whitespaces, and normalize ASCII letters to uppercase digits.
return s:gsub('%s', ''):upper()
end
-- Handle non-ASCII strings, assumed to be encoded with UTF-8 as used by module mw.ustring:
-- Basic Latin variants.
s = mw.ustring.gsub(s, '[!-~]', function(s) return mw.ustring.char(mw.ustring.codepoint(s, 1) - 0xFF01 + 0x21) end) -- Fullwidth (also punctuation, symbols and letters)
-- TODO: variants in supplementary planes.
--s = mw.ustring.gsub(mw.ustring.gsub(s, '[-]', function(s) return mw.ustring.char(mw.ustring.codepoint(s, 1) - 0x1D7CE + 0x30) end) -- Mathematical bold
--s = mw.ustring.gsub(mw.ustring.gsub(s, '[-]', function(s) return mw.ustring.char(mw.ustring.codepoint(s, 1) - 0x1D7D8 + 0x30) end) -- Mathematical double-struck
--s = mw.ustring.gsub(mw.ustring.gsub(s, '[-]', function(s) return mw.ustring.char(mw.ustring.codepoint(s, 1) - 0x1D7E2 + 0x30) end) -- Mathematical sans-serif
--s = mw.ustring.gsub(mw.ustring.gsub(s, '[-]', function(s) return mw.ustring.char(mw.ustring.codepoint(s, 1) - 0x1D7EC + 0x30) end) -- Mathematical sans-serif bold
--s = mw.ustring.gsub(mw.ustring.gsub(s, '[-]', function(s) return mw.ustring.char(mw.ustring.codepoint(s, 1) - 0x1D7F6 + 0x30) end) -- Mathematical monospace
-- Decimal digits and signs variants.
s = mw.ustring.gsub(s, '[⁺₊]', '+') -- superscript/subscript plus sign
s = mw.ustring.gsub(s, '[⁻₋−]', '-') -- superscript/subscript minus sign
s = mw.ustring.gsub(s, '⁰', '0') -- superscript digit 0
s = mw.ustring.gsub(s, '¹', '1') -- superscript digit 1
s = mw.ustring.gsub(s, '²', '2') -- superscript digit 2
s = mw.ustring.gsub(s, '³', '3') -- superscript digit 3
s = mw.ustring.gsub(s, '[⁴-⁹]', function(s) return mw.ustring.char(mw.ustring.codepoint(s, 1) - 0x2074 + 0x34) end) -- superscript digits 4-9
s = mw.ustring.gsub(s, '[₀-₉]', function(s) return mw.ustring.char(mw.ustring.codepoint(s, 1) - 0x2080 + 0x30) end) -- subscript digits
-- Alternate decimal digits from various Unicode scripts.
s = mw.ustring.gsub(s, '[٠-٩]', function(s) return mw.ustring.char(mw.ustring.codepoint(s, 1) - 0x660 + 0x30) end) -- Arabic
s = mw.ustring.gsub(s, '[۰-۹]', function(s) return mw.ustring.char(mw.ustring.codepoint(s, 1) - 0x6F0 + 0x30) end) -- Arabic-Indic
s = mw.ustring.gsub(s, '[߀-߉]', function(s) return mw.ustring.char(mw.ustring.codepoint(s, 1) - 0x7C0 + 0x30) end) -- N'ko
s = mw.ustring.gsub(s, '[०-९]', function(s) return mw.ustring.char(mw.ustring.codepoint(s, 1) - 0x966 + 0x30) end) -- Devanagari
s = mw.ustring.gsub(s, '[০-৭]', function(s) return mw.ustring.char(mw.ustring.codepoint(s, 1) - 0x9E6 + 0x30) end) -- Bengali
s = mw.ustring.gsub(s, '[੦-੯]', function(s) return mw.ustring.char(mw.ustring.codepoint(s, 1) - 0xA66 + 0x30) end) -- Gurmukhi
s = mw.ustring.gsub(s, '[૦-૯]', function(s) return mw.ustring.char(mw.ustring.codepoint(s, 1) - 0xAE6 + 0x30) end) -- Gujarati
s = mw.ustring.gsub(s, '[୦-୯]', function(s) return mw.ustring.char(mw.ustring.codepoint(s, 1) - 0xB66 + 0x30) end) -- Oriya
s = mw.ustring.gsub(s, '[௦-௯]', function(s) return mw.ustring.char(mw.ustring.codepoint(s, 1) - 0xBE6 + 0x30) end) -- Tamil
s = mw.ustring.gsub(s, '[౦-౯]', function(s) return mw.ustring.char(mw.ustring.codepoint(s, 1) - 0xC66 + 0x30) end) -- Telugu
s = mw.ustring.gsub(s, '[೦-೯]', function(s) return mw.ustring.char(mw.ustring.codepoint(s, 1) - 0xCE6 + 0x30) end) -- Kannada
s = mw.ustring.gsub(s, '[൦-൯]', function(s) return mw.ustring.char(mw.ustring.codepoint(s, 1) - 0xD66 + 0x30) end) -- Malayalam
s = mw.ustring.gsub(s, '[෦-෯]', function(s) return mw.ustring.char(mw.ustring.codepoint(s, 1) - 0xDE6 + 0x30) end) -- Sinhala
s = mw.ustring.gsub(s, '[๐-๙]', function(s) return mw.ustring.char(mw.ustring.codepoint(s, 1) - 0xE50 + 0x30) end) -- Thai
s = mw.ustring.gsub(s, '[໐-໙]', function(s) return mw.ustring.char(mw.ustring.codepoint(s, 1) - 0xED0 + 0x30) end) -- Lao
s = mw.ustring.gsub(s, '[༠-༩]', function(s) return mw.ustring.char(mw.ustring.codepoint(s, 1) - 0xF20 + 0x30) end) -- Tibetan
s = mw.ustring.gsub(s, '[၀-၉]', function(s) return mw.ustring.char(mw.ustring.codepoint(s, 1) - 0x1040 + 0x30) end) -- Myanmar
s = mw.ustring.gsub(s, '[႐-႙]', function(s) return mw.ustring.char(mw.ustring.codepoint(s, 1) - 0x1090 + 0x30) end) -- Myanmar Shan
s = mw.ustring.gsub(s, '[០-៩]', function(s) return mw.ustring.char(mw.ustring.codepoint(s, 1) - 0x17E0 + 0x30) end) -- Khmer
s = mw.ustring.gsub(s, '[᠐-᠙]', function(s) return mw.ustring.char(mw.ustring.codepoint(s, 1) - 0x1810 + 0x30) end) -- Mongolian
s = mw.ustring.gsub(s, '[᥆-᥏]', function(s) return mw.ustring.char(mw.ustring.codepoint(s, 1) - 0x1946 + 0x30) end) -- Limbu
s = mw.ustring.gsub(s, '[᧐-᧙]', function(s) return mw.ustring.char(mw.ustring.codepoint(s, 1) - 0x19D0 + 0x30) end) -- New Tai Lue
s = mw.ustring.gsub(s, '[᪀-᪉]', function(s) return mw.ustring.char(mw.ustring.codepoint(s, 1) - 0x1A80 + 0x30) end) -- Tai Tham Hora
s = mw.ustring.gsub(s, '[᪐-᪙]', function(s) return mw.ustring.char(mw.ustring.codepoint(s, 1) - 0x1A90 + 0x30) end) -- Tai Tham Tham
s = mw.ustring.gsub(s, '[᭐-᭙]', function(s) return mw.ustring.char(mw.ustring.codepoint(s, 1) - 0x1B50 + 0x30) end) -- Balinese
s = mw.ustring.gsub(s, '[᮰-᮹]', function(s) return mw.ustring.char(mw.ustring.codepoint(s, 1) - 0x1BB0 + 0x30) end) -- Sundanese
s = mw.ustring.gsub(s, '[᱀-᱉]', function(s) return mw.ustring.char(mw.ustring.codepoint(s, 1) - 0x1C40 + 0x30) end) -- Lepcha
s = mw.ustring.gsub(s, '[᱐-᱙]', function(s) return mw.ustring.char(mw.ustring.codepoint(s, 1) - 0x1C50 + 0x30) end) -- Ol Chiki
s = mw.ustring.gsub(s, '[꘠-꘩]', function(s) return mw.ustring.char(mw.ustring.codepoint(s, 1) - 0xA620 + 0x30) end) -- Vai
s = mw.ustring.gsub(s, '[꣐-꣙]', function(s) return mw.ustring.char(mw.ustring.codepoint(s, 1) - 0xA8D0 + 0x30) end) -- Saurashtra
s = mw.ustring.gsub(s, '[꤀-꤉]', function(s) return mw.ustring.char(mw.ustring.codepoint(s, 1) - 0xA900 + 0x30) end) -- Kayah Li
s = mw.ustring.gsub(s, '[꧐-꧙]', function(s) return mw.ustring.char(mw.ustring.codepoint(s, 1) - 0xA9D0 + 0x30) end) -- Javanese
s = mw.ustring.gsub(s, '[꧰-꧹]', function(s) return mw.ustring.char(mw.ustring.codepoint(s, 1) - 0xA9F0 + 0x30) end) -- Myanmar Tai Laing
s = mw.ustring.gsub(s, '[꩐-꩙]', function(s) return mw.ustring.char(mw.ustring.codepoint(s, 1) - 0xAA50 + 0x30) end) -- Cham
s = mw.ustring.gsub(s, '[꯰-꯹]', function(s) return mw.ustring.char(mw.ustring.codepoint(s, 1) - 0xABF0 + 0x30) end) -- Meetei Mayek
-- TODO: decimal digits in supplementary planes:
--s = mw.ustring.gsub(mw.ustring.gsub(s, '[-]', function(s) return mw.ustring.char(mw.ustring.codepoint(s, 1) - 0x104A0 + 0x30) end) -- Osmanya
--s = mw.ustring.gsub(mw.ustring.gsub(s, '[-]', function(s) return mw.ustring.char(mw.ustring.codepoint(s, 1) - 0x10D30 + 0x30) end) -- Hanifi Rohingya
--s = mw.ustring.gsub(mw.ustring.gsub(s, '[-]', function(s) return mw.ustring.char(mw.ustring.codepoint(s, 1) - 0x11066 + 0x30) end) -- Brahmi
--s = mw.ustring.gsub(mw.ustring.gsub(s, '[-]', function(s) return mw.ustring.char(mw.ustring.codepoint(s, 1) - 0x110F0 + 0x30) end) -- Sora Sompeng
--s = mw.ustring.gsub(mw.ustring.gsub(s, '[-]', function(s) return mw.ustring.char(mw.ustring.codepoint(s, 1) - 0x11136 + 0x30) end) -- Chakma
--s = mw.ustring.gsub(mw.ustring.gsub(s, '[-]', function(s) return mw.ustring.char(mw.ustring.codepoint(s, 1) - 0x111D0 + 0x30) end) -- Sharada
--s = mw.ustring.gsub(mw.ustring.gsub(s, '[-]', function(s) return mw.ustring.char(mw.ustring.codepoint(s, 1) - 0x112F0 + 0x30) end) -- Khudawadi
--s = mw.ustring.gsub(mw.ustring.gsub(s, '[-]', function(s) return mw.ustring.char(mw.ustring.codepoint(s, 1) - 0x11450 + 0x30) end) -- Newa
--s = mw.ustring.gsub(mw.ustring.gsub(s, '[-]', function(s) return mw.ustring.char(mw.ustring.codepoint(s, 1) - 0x114D0 + 0x30) end) -- Tirhuta
--s = mw.ustring.gsub(mw.ustring.gsub(s, '[-]', function(s) return mw.ustring.char(mw.ustring.codepoint(s, 1) - 0x11650 + 0x30) end) -- Modi
--s = mw.ustring.gsub(mw.ustring.gsub(s, '[-]', function(s) return mw.ustring.char(mw.ustring.codepoint(s, 1) - 0x116C0 + 0x30) end) -- Takri
--s = mw.ustring.gsub(mw.ustring.gsub(s, '[-]', function(s) return mw.ustring.char(mw.ustring.codepoint(s, 1) - 0x11730 + 0x30) end) -- Ahom
--s = mw.ustring.gsub(mw.ustring.gsub(s, '[-]', function(s) return mw.ustring.char(mw.ustring.codepoint(s, 1) - 0x118E0 + 0x30) end) -- Warang Citi
--s = mw.ustring.gsub(mw.ustring.gsub(s, '[-]', function(s) return mw.ustring.char(mw.ustring.codepoint(s, 1) - 0x11C50 + 0x30) end) -- Bhaiksuki
--s = mw.ustring.gsub(mw.ustring.gsub(s, '[-]', function(s) return mw.ustring.char(mw.ustring.codepoint(s, 1) - 0x11D50 + 0x30) end) -- Masaram Gondi
--s = mw.ustring.gsub(mw.ustring.gsub(s, '[-]', function(s) return mw.ustring.char(mw.ustring.codepoint(s, 1) - 0x11DA0 + 0x30) end) -- Gunjala
--s = mw.ustring.gsub(mw.ustring.gsub(s, '[-]', function(s) return mw.ustring.char(mw.ustring.codepoint(s, 1) - 0x16A60 + 0x30) end) -- Mro
--s = mw.ustring.gsub(mw.ustring.gsub(s, '[-]', function(s) return mw.ustring.char(mw.ustring.codepoint(s, 1) - 0x16B50 + 0x30) end) -- Pahawh Hmong
--s = mw.ustring.gsub(mw.ustring.gsub(s, '[-]', function(s) return mw.ustring.char(mw.ustring.codepoint(s, 1) - 0x1E950 + 0x30) end) -- Adlam
-- Remove Unicode whitespaces, and normalize ASCII letters to uppercase digits.
return mw.ustring.gsub(s, '%s', ''):upper()
end
function _convert(n, from, base, default, fmt)
from = tonumber(from) -- optional
base = tonumber(base) -- if nil, use base from, adjusted below
default = default or n -- optional, same as input if unspecified
fmt = fmt or {} -- optional read-only table
local prefix = fmt.prefix or ''
local minus = fmt.minus or '-'
local plus = fmt.plus or ''
local infix = fmt.infix or ''
local digits = fmt.digits or '0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ'
local padzero = mw.ustring.sub(fmt.padzero or digits, 1, 1) or '0'
local groupsep = fmt.groupsep or ' ' -- thin non-breaking space
local groupby = tonumber(fmt.groupby) or 3 -- high groups, 2 in India, 4 in CJK
local grouplo = tonumber(fmt.grouplo) or groupby -- low groups width, 3 in India
local width = tonumber(fmt.width) or 0 -- minimum, may be larger
local decimalsep = fmt.decimalsep or '.'
local precision = tonumber(fmt.precision) -- minimum, may be nil
local suffix = fmt.suffix or ''
local num
-- Capitalize all ASCII lowercase letters and strip all whitespaces, then
-- check for a leading sign. Do this while the input is still in string form,
-- because tonumber doesn't support signed numbers in non-10 bases.
n, num = _normalizeNumber(n):gsub('^+', ''):gsub('^-', '')
local sign = num > 0 and minus or plus
--[[TODO:
-- Handle sign format in prefix+suffix pairs (e.g. parentheses vs. spaces)
--]]
-- Strip off any leading '0[XOB]' (unless they are valid digits) or '#'
--[[TODO:
-- Check number of hex digits (1, 2, 3, 4, 6 or 8) for RGB or RGBA, as in
-- CSS, to properly return a normalized RGBA color with 8 digits, i.e.:
-- * GRAY '#1' is the same as RGBA '#111111FF' (non-standard),
-- * GRAY '#12' is the same as RGBA '#121212FF' (non-standard),
-- * RGB '#123' is the same as RGBA '#112233FF',
-- * RGBA '#1234' is the same as RGBA '#11223344', and
-- * RGB '#123456' is the same as RGBA '#123456FF';
-- * other lengths are invalid/ambiguous input.
-- Need a special conversion of hex digits (in lengths 3 and 4) to multiply
-- them by 0x11 and treat them in input base 256. Then when formatting the
-- output in base 16, use smallest format if there's no width specified,
-- or use width == 1, 3, 4, 6 or 8 for a fixed format in that base.
-- For formatting in base 10, 100 or 256, use commas between channels, and
-- treat output base 100 specially (using renormalized percentages).
if not from or from == 256 then
n, num = n:gsub('^#', '')
if num > 0 then
from = 256
end
end
--]]
if not from or from == 16 then
n, num = n:gsub('^0X', '') -- 'X' may be valid digit 33 in base from
if num > 0 then
from = 16
end
end
if not from or from == 8 then
n, num = n:gsub('^0O', '') -- 'O' may be valid digit 24 in base from
if num > 0 then
from = 8
end
end
if not from or from == 2 then
n, num = n:gsub('^0B', '') -- 'B' may be valid digit 11 in base from
if num > 0 then
from = 2
end
end
from = from or 10 -- default input base if no prefix was matched
-- Parse the input n in base from, compute a double in num.
--[[TODO: Need to extract exponent and evaluate the number in relevant base.
if from == 10 then
-- Handle scientific decimal notations '5.2e3' or '5200000e-3'.
n = n:gsub('E[-+]?[0-9]+$', '') -- note: already capitalized above
elseif from == 16 then -- prefixed by '0x' (detected and stripped above)
-- Handle scientific hexadecimal notations '0xA.B01p8' or '0xAB01p-8'.
n = n:gsub('P[-+]?[0-9]+$', '') -- note: already capitalized above
end
]]
num = tonumber(n, from)
if not num then
return default
end
--[[TODO: Adjust with the extracted exponent.]]
-- Handle parameters for output format.
--[[TODO:
-- Handle special bases 100 and 256 for CSS colors.
--]]
base = base and base >= 2 and base <= 36 and base
or from -- Default base for output is the same base as for input.
-- Decompose the number in parts.
--[[TODO:
-- Decompose the exponent for the relevant output base if needed.
-- Round number to the correct precision (using IEEE even rounding mode).
--]]
-- Decompose the mantissa into integral and fractional parts.
local i, f = math.modf(num)
-- Format the integral part.
local intPart = ''
repeat
num, i = i % base, math.floor(i / base)
intPart = digits:sub(num + 1, num + 1) .. intPart
until i == 0
while #intPart < width do
intPart = padzero .. intPart
end
-- Format the fractional part.
fracPart = ''
while f > 0 and #fracPart < (precision or 10) do
num, f = math.modf(f * base)
fracPart = fracPart .. mw.ustring.gsub(digits, num + 1, num + 1)
end
-- Add trailing zeros if needed, remove them otherwise.
if precision then
for num = 1, precision - #fracPart do
fracPart = fracPart .. padzero
end
else
fracPart = mw.ustring.gsub(fracPart, padzero .. '*$', '')
end
-- Add the radix point if needed.
if #fracPart > 0 then
--[[TODO: Use a localizable fractional separator.]]
fracPart = separator .. fracPart
end
--[[TODO:
-- Group digits in integral and fractional parts using a group separator.
-- Format the exponent part.
-- Adjust the sign notation in mantissa and exponent parts.
-- Determine the relative position of each part (notably the sign).
--]]
-- Return the composition of all parts.
return prefix .. sign .. infix .. intPart .. fracPart .. suffix
end
function convert(frame)
-- Allow for invocation via #invoke or directly from another module
local args
if frame == mw.getCurrentFrame() then
args = frame.args
else
args = frame
end
return _convert(args.n, args.from, args.base, args.default, {
prefix = args.prefix,
minus = args.minus,
plus = args.plus,
infix = args.infix,
digits = args.digits,
padzero = args.padzero,
groupby = args.groupby,
grouplo = args.grouplo,
width = args.width,
decimalsep = args.decimalsep,
precision = args.precision,
suffix = args.suffix,
})
end
-- Exports from this module.
return {
-- For use in Mediawiki with #invoke:
convert = convert,
-- For use from Lua only, or local debugging and tests:
_normalizeNumber = _normalizeNumber,
_convert = _convert,
}