Module:TableTools

From Viki
Jump to navigation Jump to search

Documentation for this module may be created at Module:TableTools/doc

  1 ------------------------------------------------------------------------------------
  2 --                                   TableTools                                   --
  3 --                                                                                --
  4 -- This module includes a number of functions for dealing with Lua tables.        --
  5 -- It is a meta-module, meant to be called from other Lua modules, and should not --
  6 -- be called directly from #invoke.                                               --
  7 ------------------------------------------------------------------------------------
  8 
  9 local libraryUtil = require('libraryUtil')
 10 
 11 local p = {}
 12 
 13 -- Define often-used variables and functions.
 14 local floor = math.floor
 15 local infinity = math.huge
 16 local checkType = libraryUtil.checkType
 17 local checkTypeMulti = libraryUtil.checkTypeMulti
 18 
 19 ------------------------------------------------------------------------------------
 20 -- isPositiveInteger
 21 --
 22 -- This function returns true if the given value is a positive integer, and false
 23 -- if not. Although it doesn't operate on tables, it is included here as it is
 24 -- useful for determining whether a given table key is in the array part or the
 25 -- hash part of a table.
 26 ------------------------------------------------------------------------------------
 27 function p.isPositiveInteger(v)
 28 	return type(v) == 'number' and v >= 1 and floor(v) == v and v < infinity
 29 end
 30 
 31 ------------------------------------------------------------------------------------
 32 -- isNan
 33 --
 34 -- This function returns true if the given number is a NaN value, and false if
 35 -- not. Although it doesn't operate on tables, it is included here as it is useful
 36 -- for determining whether a value can be a valid table key. Lua will generate an
 37 -- error if a NaN is used as a table key.
 38 ------------------------------------------------------------------------------------
 39 function p.isNan(v)
 40 	return type(v) == 'number' and v ~= v
 41 end
 42 
 43 ------------------------------------------------------------------------------------
 44 -- shallowClone
 45 --
 46 -- This returns a clone of a table. The value returned is a new table, but all
 47 -- subtables and functions are shared. Metamethods are respected, but the returned
 48 -- table will have no metatable of its own.
 49 ------------------------------------------------------------------------------------
 50 function p.shallowClone(t)
 51 	checkType('shallowClone', 1, t, 'table')
 52 	local ret = {}
 53 	for k, v in pairs(t) do
 54 		ret[k] = v
 55 	end
 56 	return ret
 57 end
 58 
 59 ------------------------------------------------------------------------------------
 60 -- removeDuplicates
 61 --
 62 -- This removes duplicate values from an array. Non-positive-integer keys are
 63 -- ignored. The earliest value is kept, and all subsequent duplicate values are
 64 -- removed, but otherwise the array order is unchanged.
 65 ------------------------------------------------------------------------------------
 66 function p.removeDuplicates(arr)
 67 	checkType('removeDuplicates', 1, arr, 'table')
 68 	local isNan = p.isNan
 69 	local ret, exists = {}, {}
 70 	for _, v in ipairs(arr) do
 71 		if isNan(v) then
 72 			-- NaNs can't be table keys, and they are also unique, so we don't need to check existence.
 73 			ret[#ret + 1] = v
 74 		else
 75 			if not exists[v] then
 76 				ret[#ret + 1] = v
 77 				exists[v] = true
 78 			end
 79 		end
 80 	end
 81 	return ret
 82 end
 83 
 84 ------------------------------------------------------------------------------------
 85 -- numKeys
 86 --
 87 -- This takes a table and returns an array containing the numbers of any numerical
 88 -- keys that have non-nil values, sorted in numerical order.
 89 ------------------------------------------------------------------------------------
 90 function p.numKeys(t)
 91 	checkType('numKeys', 1, t, 'table')
 92 	local isPositiveInteger = p.isPositiveInteger
 93 	local nums = {}
 94 	for k in pairs(t) do
 95 		if isPositiveInteger(k) then
 96 			nums[#nums + 1] = k
 97 		end
 98 	end
 99 	table.sort(nums)
100 	return nums
101 end
102 
103 ------------------------------------------------------------------------------------
104 -- affixNums
105 --
106 -- This takes a table and returns an array containing the numbers of keys with the
107 -- specified prefix and suffix. For example, for the table
108 -- {a1 = 'foo', a3 = 'bar', a6 = 'baz'} and the prefix "a", affixNums will return
109 -- {1, 3, 6}.
110 ------------------------------------------------------------------------------------
111 function p.affixNums(t, prefix, suffix)
112 	checkType('affixNums', 1, t, 'table')
113 	checkType('affixNums', 2, prefix, 'string', true)
114 	checkType('affixNums', 3, suffix, 'string', true)
115 
116 	local function cleanPattern(s)
117 		-- Cleans a pattern so that the magic characters ()%.[]*+-?^$ are interpreted literally.
118 		return s:gsub('([%(%)%%%.%[%]%*%+%-%?%^%$])', '%%%1')
119 	end
120 
121 	prefix = prefix or ''
122 	suffix = suffix or ''
123 	prefix = cleanPattern(prefix)
124 	suffix = cleanPattern(suffix)
125 	local pattern = '^' .. prefix .. '([1-9]%d*)' .. suffix .. '$'
126 
127 	local nums = {}
128 	for k in pairs(t) do
129 		if type(k) == 'string' then
130 			local num = mw.ustring.match(k, pattern)
131 			if num then
132 				nums[#nums + 1] = tonumber(num)
133 			end
134 		end
135 	end
136 	table.sort(nums)
137 	return nums
138 end
139 
140 ------------------------------------------------------------------------------------
141 -- numData
142 --
143 -- Given a table with keys like {"foo1", "bar1", "foo2", "baz2"}, returns a table
144 -- of subtables in the format
145 -- {[1] = {foo = 'text', bar = 'text'}, [2] = {foo = 'text', baz = 'text'}}.
146 -- Keys that don't end with an integer are stored in a subtable named "other". The
147 -- compress option compresses the table so that it can be iterated over with
148 -- ipairs.
149 ------------------------------------------------------------------------------------
150 function p.numData(t, compress)
151 	checkType('numData', 1, t, 'table')
152 	checkType('numData', 2, compress, 'boolean', true)
153 	local ret = {}
154 	for k, v in pairs(t) do
155 		local prefix, num = mw.ustring.match(tostring(k), '^([^0-9]*)([1-9][0-9]*)$')
156 		if num then
157 			num = tonumber(num)
158 			local subtable = ret[num] or {}
159 			if prefix == '' then
160 				-- Positional parameters match the blank string; put them at the start of the subtable instead.
161 				prefix = 1
162 			end
163 			subtable[prefix] = v
164 			ret[num] = subtable
165 		else
166 			local subtable = ret.other or {}
167 			subtable[k] = v
168 			ret.other = subtable
169 		end
170 	end
171 	if compress then
172 		local other = ret.other
173 		ret = p.compressSparseArray(ret)
174 		ret.other = other
175 	end
176 	return ret
177 end
178 
179 ------------------------------------------------------------------------------------
180 -- compressSparseArray
181 --
182 -- This takes an array with one or more nil values, and removes the nil values
183 -- while preserving the order, so that the array can be safely traversed with
184 -- ipairs.
185 ------------------------------------------------------------------------------------
186 function p.compressSparseArray(t)
187 	checkType('compressSparseArray', 1, t, 'table')
188 	local ret = {}
189 	local nums = p.numKeys(t)
190 	for _, num in ipairs(nums) do
191 		ret[#ret + 1] = t[num]
192 	end
193 	return ret
194 end
195 
196 ------------------------------------------------------------------------------------
197 -- sparseIpairs
198 --
199 -- This is an iterator for sparse arrays. It can be used like ipairs, but can
200 -- handle nil values.
201 ------------------------------------------------------------------------------------
202 function p.sparseIpairs(t)
203 	checkType('sparseIpairs', 1, t, 'table')
204 	local nums = p.numKeys(t)
205 	local i = 0
206 	local lim = #nums
207 	return function ()
208 		i = i + 1
209 		if i <= lim then
210 			local key = nums[i]
211 			return key, t[key]
212 		else
213 			return nil, nil
214 		end
215 	end
216 end
217 
218 ------------------------------------------------------------------------------------
219 -- size
220 --
221 -- This returns the size of a key/value pair table. It will also work on arrays,
222 -- but for arrays it is more efficient to use the # operator.
223 ------------------------------------------------------------------------------------
224 function p.size(t)
225 	checkType('size', 1, t, 'table')
226 	local i = 0
227 	for _ in pairs(t) do
228 		i = i + 1
229 	end
230 	return i
231 end
232 
233 local function defaultKeySort(item1, item2)
234 	-- "number" < "string", so numbers will be sorted before strings.
235 	local type1, type2 = type(item1), type(item2)
236 	if type1 ~= type2 then
237 		return type1 < type2
238 	elseif type1 == 'table' or type1 == 'boolean' or type1 == 'function' then
239 		return tostring(item1) < tostring(item2)
240 	else
241 		return item1 < item2
242 	end
243 end
244 ------------------------------------------------------------------------------------
245 -- keysToList
246 --
247 -- Returns an array of the keys in a table, sorted using either a default
248 -- comparison function or a custom keySort function.
249 ------------------------------------------------------------------------------------
250 function p.keysToList(t, keySort, checked)
251 	if not checked then
252 		checkType('keysToList', 1, t, 'table')
253 		checkTypeMulti('keysToList', 2, keySort, {'function', 'boolean', 'nil'})
254 	end
255 
256 	local arr = {}
257 	local index = 1
258 	for k in pairs(t) do
259 		arr[index] = k
260 		index = index + 1
261 	end
262 
263 	if keySort ~= false then
264 		keySort = type(keySort) == 'function' and keySort or defaultKeySort
265 		table.sort(arr, keySort)
266 	end
267 
268 	return arr
269 end
270 
271 ------------------------------------------------------------------------------------
272 -- sortedPairs
273 --
274 -- Iterates through a table, with the keys sorted using the keysToList function.
275 -- If there are only numerical keys, sparseIpairs is probably more efficient.
276 ------------------------------------------------------------------------------------
277 function p.sortedPairs(t, keySort)
278 	checkType('sortedPairs', 1, t, 'table')
279 	checkType('sortedPairs', 2, keySort, 'function', true)
280 
281 	local arr = p.keysToList(t, keySort, true)
282 
283 	local i = 0
284 	return function ()
285 		i = i + 1
286 		local key = arr[i]
287 		if key ~= nil then
288 			return key, t[key]
289 		else
290 			return nil, nil
291 		end
292 	end
293 end
294 
295 ------------------------------------------------------------------------------------
296 -- isArray
297 --
298 -- Returns true if the given value is a table and all keys are consecutive
299 -- integers starting at 1.
300 ------------------------------------------------------------------------------------
301 function p.isArray(v)
302 	if type(v) ~= 'table' then
303 		return false
304 	end
305 	local i = 0
306 	for _ in pairs(v) do
307 		i = i + 1
308 		if v[i] == nil then
309 			return false
310 		end
311 	end
312 	return true
313 end
314 
315 ------------------------------------------------------------------------------------
316 -- isArrayLike
317 --
318 -- Returns true if the given value is iterable and all keys are consecutive
319 -- integers starting at 1.
320 ------------------------------------------------------------------------------------
321 function p.isArrayLike(v)
322 	if not pcall(pairs, v) then
323 		return false
324 	end
325 	local i = 0
326 	for _ in pairs(v) do
327 		i = i + 1
328 		if v[i] == nil then
329 			return false
330 		end
331 	end
332 	return true
333 end
334 
335 ------------------------------------------------------------------------------------
336 -- invert
337 --
338 -- Transposes the keys and values in an array. For example, {"a", "b", "c"} ->
339 -- {a = 1, b = 2, c = 3}. Duplicates are not supported (result values refer to
340 -- the index of the last duplicate) and NaN values are ignored.
341 ------------------------------------------------------------------------------------
342 function p.invert(arr)
343 	checkType("invert", 1, arr, "table")
344 	local isNan = p.isNan
345 	local map = {}
346 	for i, v in ipairs(arr) do
347 		if not isNan(v) then
348 			map[v] = i
349 		end
350 	end
351 
352 	return map
353 end
354 
355 ------------------------------------------------------------------------------------
356 -- listToSet
357 --
358 -- Creates a set from the array part of the table. Indexing the set by any of the
359 -- values of the array returns true. For example, {"a", "b", "c"} ->
360 -- {a = true, b = true, c = true}. NaN values are ignored as Lua considers them
361 -- never equal to any value (including other NaNs or even themselves).
362 ------------------------------------------------------------------------------------
363 function p.listToSet(arr)
364 	checkType("listToSet", 1, arr, "table")
365 	local isNan = p.isNan
366 	local set = {}
367 	for _, v in ipairs(arr) do
368 		if not isNan(v) then
369 			set[v] = true
370 		end
371 	end
372 
373 	return set
374 end
375 
376 ------------------------------------------------------------------------------------
377 -- deepCopy
378 --
379 -- Recursive deep copy function. Preserves identities of subtables.
380 ------------------------------------------------------------------------------------
381 local function _deepCopy(orig, includeMetatable, already_seen)
382 	-- Stores copies of tables indexed by the original table.
383 	already_seen = already_seen or {}
384 
385 	local copy = already_seen[orig]
386 	if copy ~= nil then
387 		return copy
388 	end
389 
390 	if type(orig) == 'table' then
391 		copy = {}
392 		for orig_key, orig_value in pairs(orig) do
393 			copy[_deepCopy(orig_key, includeMetatable, already_seen)] = _deepCopy(orig_value, includeMetatable, already_seen)
394 		end
395 		already_seen[orig] = copy
396 
397 		if includeMetatable then
398 			local mt = getmetatable(orig)
399 			if mt ~= nil then
400 				local mt_copy = _deepCopy(mt, includeMetatable, already_seen)
401 				setmetatable(copy, mt_copy)
402 				already_seen[mt] = mt_copy
403 			end
404 		end
405 	else -- number, string, boolean, etc
406 		copy = orig
407 	end
408 	return copy
409 end
410 
411 function p.deepCopy(orig, noMetatable, already_seen)
412 	checkType("deepCopy", 3, already_seen, "table", true)
413 	return _deepCopy(orig, not noMetatable, already_seen)
414 end
415 
416 ------------------------------------------------------------------------------------
417 -- sparseConcat
418 --
419 -- Concatenates all values in the table that are indexed by a number, in order.
420 -- sparseConcat{a, nil, c, d}  =>  "acd"
421 -- sparseConcat{nil, b, c, d}  =>  "bcd"
422 ------------------------------------------------------------------------------------
423 function p.sparseConcat(t, sep, i, j)
424 	local arr = {}
425 
426 	local arr_i = 0
427 	for _, v in p.sparseIpairs(t) do
428 		arr_i = arr_i + 1
429 		arr[arr_i] = v
430 	end
431 
432 	return table.concat(arr, sep, i, j)
433 end
434 
435 ------------------------------------------------------------------------------------
436 -- length
437 --
438 -- Finds the length of an array, or of a quasi-array with keys such as "data1",
439 -- "data2", etc., using an exponential search algorithm. It is similar to the
440 -- operator #, but may return a different value when there are gaps in the array
441 -- portion of the table. Intended to be used on data loaded with mw.loadData. For
442 -- other tables, use #.
443 -- Note: #frame.args in frame object always be set to 0, regardless of  the number
444 -- of unnamed template parameters, so use this function for frame.args.
445 ------------------------------------------------------------------------------------
446 function p.length(t, prefix)
447 	-- requiring module inline so that [[Module:Exponential search]] which is
448 	-- only needed by this one function doesn't get millions of transclusions
449 	local expSearch = require("Module:Exponential search")
450 	checkType('length', 1, t, 'table')
451 	checkType('length', 2, prefix, 'string', true)
452 	return expSearch(function (i)
453 		local key
454 		if prefix then
455 			key = prefix .. tostring(i)
456 		else
457 			key = i
458 		end
459 		return t[key] ~= nil
460 	end) or 0
461 end
462 
463 ------------------------------------------------------------------------------------
464 -- inArray
465 --
466 -- Returns true if valueToFind is a member of the array, and false otherwise.
467 ------------------------------------------------------------------------------------
468 function p.inArray(arr, valueToFind)
469 	checkType("inArray", 1, arr, "table")
470 	-- if valueToFind is nil, error?
471 
472 	for _, v in ipairs(arr) do
473 		if v == valueToFind then
474 			return true
475 		end
476 	end
477 	return false
478 end
479 
480 return p