Module:String

From Viki
Jump to navigation Jump to search

Documentation for this module may be created at Module:String/doc

  1 --[[
  2 
  3 This module is intended to provide access to basic string functions.
  4 
  5 Most of the functions provided here can be invoked with named parameters,
  6 unnamed parameters, or a mixture.  If named parameters are used, Mediawiki will
  7 automatically remove any leading or trailing whitespace from the parameter.
  8 Depending on the intended use, it may be advantageous to either preserve or
  9 remove such whitespace.
 10 
 11 Global options
 12     ignore_errors: If set to 'true' or 1, any error condition will result in
 13         an empty string being returned rather than an error message.
 14 
 15     error_category: If an error occurs, specifies the name of a category to
 16         include with the error message.  The default category is
 17         [Category:Errors reported by Module String].
 18 
 19     no_category: If set to 'true' or 1, no category will be added if an error
 20         is generated.
 21 
 22 Unit tests for this module are available at Module:String/tests.
 23 ]]
 24 
 25 local str = {}
 26 
 27 --[[
 28 len
 29 
 30 This function returns the length of the target string.
 31 
 32 Usage:
 33 {{#invoke:String|len|target_string|}}
 34 OR
 35 {{#invoke:String|len|s=target_string}}
 36 
 37 Parameters
 38     s: The string whose length to report
 39 
 40 If invoked using named parameters, Mediawiki will automatically remove any leading or
 41 trailing whitespace from the target string.
 42 ]]
 43 function str.len( frame )
 44 	local new_args = str._getParameters( frame.args, {'s'} )
 45 	local s = new_args['s'] or ''
 46 	return mw.ustring.len( s )
 47 end
 48 
 49 --[[
 50 sub
 51 
 52 This function returns a substring of the target string at specified indices.
 53 
 54 Usage:
 55 {{#invoke:String|sub|target_string|start_index|end_index}}
 56 OR
 57 {{#invoke:String|sub|s=target_string|i=start_index|j=end_index}}
 58 
 59 Parameters
 60     s: The string to return a subset of
 61     i: The fist index of the substring to return, defaults to 1.
 62     j: The last index of the string to return, defaults to the last character.
 63 
 64 The first character of the string is assigned an index of 1.  If either i or j
 65 is a negative value, it is interpreted the same as selecting a character by
 66 counting from the end of the string.  Hence, a value of -1 is the same as
 67 selecting the last character of the string.
 68 
 69 If the requested indices are out of range for the given string, an error is
 70 reported.
 71 ]]
 72 function str.sub( frame )
 73 	local new_args = str._getParameters( frame.args, { 's', 'i', 'j' } )
 74 	local s = new_args['s'] or ''
 75 	local i = tonumber( new_args['i'] ) or 1
 76 	local j = tonumber( new_args['j'] ) or -1
 77 
 78 	local len = mw.ustring.len( s )
 79 
 80 	-- Convert negatives for range checking
 81 	if i < 0 then
 82 		i = len + i + 1
 83 	end
 84 	if j < 0 then
 85 		j = len + j + 1
 86 	end
 87 
 88 	if i > len or j > len or i < 1 or j < 1 then
 89 		return str._error( 'String subset index out of range' )
 90 	end
 91 	if j < i then
 92 		return str._error( 'String subset indices out of order' )
 93 	end
 94 
 95 	return mw.ustring.sub( s, i, j )
 96 end
 97 
 98 --[[
 99 This function implements that features of {{str sub old}} and is kept in order
100 to maintain these older templates.
101 ]]
102 function str.sublength( frame )
103 	local i = tonumber( frame.args.i ) or 0
104 	local len = tonumber( frame.args.len )
105 	return mw.ustring.sub( frame.args.s, i + 1, len and ( i + len ) )
106 end
107 
108 --[[
109 _match
110 
111 This function returns a substring from the source string that matches a
112 specified pattern. It is exported for use in other modules
113 
114 Usage:
115 strmatch = require("Module:String")._match
116 sresult = strmatch( s, pattern, start, match, plain, nomatch )
117 
118 Parameters
119     s: The string to search
120     pattern: The pattern or string to find within the string
121     start: The index within the source string to start the search.  The first
122         character of the string has index 1.  Defaults to 1.
123     match: In some cases it may be possible to make multiple matches on a single
124         string.  This specifies which match to return, where the first match is
125         match= 1.  If a negative number is specified then a match is returned
126         counting from the last match.  Hence match = -1 is the same as requesting
127         the last match.  Defaults to 1.
128     plain: A flag indicating that the pattern should be understood as plain
129         text.  Defaults to false.
130     nomatch: If no match is found, output the "nomatch" value rather than an error.
131 
132 For information on constructing Lua patterns, a form of [regular expression], see:
133 
134 * http://www.lua.org/manual/5.1/manual.html#5.4.1
135 * http://www.mediawiki.org/wiki/Extension:Scribunto/Lua_reference_manual#Patterns
136 * http://www.mediawiki.org/wiki/Extension:Scribunto/Lua_reference_manual#Ustring_patterns
137 
138 ]]
139 -- This sub-routine is exported for use in other modules
140 function str._match( s, pattern, start, match_index, plain_flag, nomatch )
141 	if s == '' then
142 		return str._error( 'Target string is empty' )
143 	end
144 	if pattern == '' then
145 		return str._error( 'Pattern string is empty' )
146 	end
147 	start = tonumber(start) or 1
148 	if math.abs(start) < 1 or math.abs(start) > mw.ustring.len( s ) then
149 		return str._error( 'Requested start is out of range' )
150 	end
151 	if match_index == 0 then
152 		return str._error( 'Match index is out of range' )
153 	end
154 	if plain_flag then
155 		pattern = str._escapePattern( pattern )
156 	end
157 
158 	local result
159 	if match_index == 1 then
160 		-- Find first match is simple case
161 		result = mw.ustring.match( s, pattern, start )
162 	else
163 		if start > 1 then
164 			s = mw.ustring.sub( s, start )
165 		end
166 
167 		local iterator = mw.ustring.gmatch(s, pattern)
168 		if match_index > 0 then
169 			-- Forward search
170 			for w in iterator do
171 				match_index = match_index - 1
172 				if match_index == 0 then
173 					result = w
174 					break
175 				end
176 			end
177 		else
178 			-- Reverse search
179 			local result_table = {}
180 			local count = 1
181 			for w in iterator do
182 				result_table[count] = w
183 				count = count + 1
184 			end
185 
186 			result = result_table[ count + match_index ]
187 		end
188 	end
189 
190 	if result == nil then
191 		if nomatch == nil then
192 			return str._error( 'Match not found' )
193 		else
194 			return nomatch
195 		end
196 	else
197 		return result
198 	end
199 end
200 
201 --[[
202 match
203 
204 This function returns a substring from the source string that matches a
205 specified pattern.
206 
207 Usage:
208 {{#invoke:String|match|source_string|pattern_string|start_index|match_number|plain_flag|nomatch_output}}
209 OR
210 {{#invoke:String|match|s=source_string|pattern=pattern_string|start=start_index
211     |match=match_number|plain=plain_flag|nomatch=nomatch_output}}
212 
213 Parameters
214     s: The string to search
215     pattern: The pattern or string to find within the string
216     start: The index within the source string to start the search.  The first
217         character of the string has index 1.  Defaults to 1.
218     match: In some cases it may be possible to make multiple matches on a single
219         string.  This specifies which match to return, where the first match is
220         match= 1.  If a negative number is specified then a match is returned
221         counting from the last match.  Hence match = -1 is the same as requesting
222         the last match.  Defaults to 1.
223     plain: A flag indicating that the pattern should be understood as plain
224         text.  Defaults to false.
225     nomatch: If no match is found, output the "nomatch" value rather than an error.
226 
227 If invoked using named parameters, Mediawiki will automatically remove any leading or
228 trailing whitespace from each string.  In some circumstances this is desirable, in
229 other cases one may want to preserve the whitespace.
230 
231 If the match_number or start_index are out of range for the string being queried, then
232 this function generates an error.  An error is also generated if no match is found.
233 If one adds the parameter ignore_errors=true, then the error will be suppressed and
234 an empty string will be returned on any failure.
235 
236 For information on constructing Lua patterns, a form of [regular expression], see:
237 
238 * http://www.lua.org/manual/5.1/manual.html#5.4.1
239 * http://www.mediawiki.org/wiki/Extension:Scribunto/Lua_reference_manual#Patterns
240 * http://www.mediawiki.org/wiki/Extension:Scribunto/Lua_reference_manual#Ustring_patterns
241 
242 ]]
243 -- This is the entry point for #invoke:String|match
244 function str.match( frame )
245 	local new_args = str._getParameters( frame.args, {'s', 'pattern', 'start', 'match', 'plain', 'nomatch'} )
246 	local s = new_args['s'] or ''
247 	local start = tonumber( new_args['start'] ) or 1
248 	local plain_flag = str._getBoolean( new_args['plain'] or false )
249 	local pattern = new_args['pattern'] or ''
250 	local match_index = math.floor( tonumber(new_args['match']) or 1 )
251 	local nomatch = new_args['nomatch']
252 
253 	return str._match( s, pattern, start, match_index, plain_flag, nomatch )
254 end
255 
256 --[[
257 pos
258 
259 This function returns a single character from the target string at position pos.
260 
261 Usage:
262 {{#invoke:String|pos|target_string|index_value}}
263 OR
264 {{#invoke:String|pos|target=target_string|pos=index_value}}
265 
266 Parameters
267     target: The string to search
268     pos: The index for the character to return
269 
270 If invoked using named parameters, Mediawiki will automatically remove any leading or
271 trailing whitespace from the target string.  In some circumstances this is desirable, in
272 other cases one may want to preserve the whitespace.
273 
274 The first character has an index value of 1.
275 
276 If one requests a negative value, this function will select a character by counting backwards
277 from the end of the string.  In other words pos = -1 is the same as asking for the last character.
278 
279 A requested value of zero, or a value greater than the length of the string returns an error.
280 ]]
281 function str.pos( frame )
282 	local new_args = str._getParameters( frame.args, {'target', 'pos'} )
283 	local target_str = new_args['target'] or ''
284 	local pos = tonumber( new_args['pos'] ) or 0
285 
286 	if pos == 0 or math.abs(pos) > mw.ustring.len( target_str ) then
287 		return str._error( 'String index out of range' )
288 	end
289 
290 	return mw.ustring.sub( target_str, pos, pos )
291 end
292 
293 --[[
294 str_find
295 
296 This function duplicates the behavior of {{str_find}}, including all of its quirks.
297 This is provided in order to support existing templates, but is NOT RECOMMENDED for
298 new code and templates.  New code is recommended to use the "find" function instead.
299 
300 Returns the first index in "source" that is a match to "target".  Indexing is 1-based,
301 and the function returns -1 if the "target" string is not present in "source".
302 
303 Important Note: If the "target" string is empty / missing, this function returns a
304 value of "1", which is generally unexpected behavior, and must be accounted for
305 separatetly.
306 ]]
307 function str.str_find( frame )
308 	local new_args = str._getParameters( frame.args, {'source', 'target'} )
309 	local source_str = new_args['source'] or ''
310 	local target_str = new_args['target'] or ''
311 
312 	if target_str == '' then
313 		return 1
314 	end
315 
316 	local start = mw.ustring.find( source_str, target_str, 1, true )
317 	if start == nil then
318 		start = -1
319 	end
320 
321 	return start
322 end
323 
324 --[[
325 find
326 
327 This function allows one to search for a target string or pattern within another
328 string.
329 
330 Usage:
331 {{#invoke:String|find|source_str|target_string|start_index|plain_flag}}
332 OR
333 {{#invoke:String|find|source=source_str|target=target_str|start=start_index|plain=plain_flag}}
334 
335 Parameters
336     source: The string to search
337     target: The string or pattern to find within source
338     start: The index within the source string to start the search, defaults to 1
339     plain: Boolean flag indicating that target should be understood as plain
340         text and not as a Lua style regular expression, defaults to true
341 
342 If invoked using named parameters, Mediawiki will automatically remove any leading or
343 trailing whitespace from the parameter.  In some circumstances this is desirable, in
344 other cases one may want to preserve the whitespace.
345 
346 This function returns the first index >= "start" where "target" can be found
347 within "source".  Indices are 1-based.  If "target" is not found, then this
348 function returns 0.  If either "source" or "target" are missing / empty, this
349 function also returns 0.
350 
351 This function should be safe for UTF-8 strings.
352 ]]
353 function str.find( frame )
354 	local new_args = str._getParameters( frame.args, {'source', 'target', 'start', 'plain' } )
355 	local source_str = new_args['source'] or ''
356 	local pattern = new_args['target'] or ''
357 	local start_pos = tonumber(new_args['start']) or 1
358 	local plain = new_args['plain'] or true
359 
360 	if source_str == '' or pattern == '' then
361 		return 0
362 	end
363 
364 	plain = str._getBoolean( plain )
365 
366 	local start = mw.ustring.find( source_str, pattern, start_pos, plain )
367 	if start == nil then
368 		start = 0
369 	end
370 
371 	return start
372 end
373 
374 --[[
375 replace
376 
377 This function allows one to replace a target string or pattern within another
378 string.
379 
380 Usage:
381 {{#invoke:String|replace|source_str|pattern_string|replace_string|replacement_count|plain_flag}}
382 OR
383 {{#invoke:String|replace|source=source_string|pattern=pattern_string|replace=replace_string|
384    count=replacement_count|plain=plain_flag}}
385 
386 Parameters
387     source: The string to search
388     pattern: The string or pattern to find within source
389     replace: The replacement text
390     count: The number of occurences to replace, defaults to all.
391     plain: Boolean flag indicating that pattern should be understood as plain
392         text and not as a Lua style regular expression, defaults to true
393 ]]
394 function str.replace( frame )
395 	local new_args = str._getParameters( frame.args, {'source', 'pattern', 'replace', 'count', 'plain' } )
396 	local source_str = new_args['source'] or ''
397 	local pattern = new_args['pattern'] or ''
398 	local replace = new_args['replace'] or ''
399 	local count = tonumber( new_args['count'] )
400 	local plain = new_args['plain'] or true
401 
402 	if source_str == '' or pattern == '' then
403 		return source_str
404 	end
405 	plain = str._getBoolean( plain )
406 
407 	if plain then
408 		pattern = str._escapePattern( pattern )
409 		replace = mw.ustring.gsub( replace, "%%", "%%%%" ) --Only need to escape replacement sequences.
410 	end
411 
412 	local result
413 
414 	if count ~= nil then
415 		result = mw.ustring.gsub( source_str, pattern, replace, count )
416 	else
417 		result = mw.ustring.gsub( source_str, pattern, replace )
418 	end
419 
420 	return result
421 end
422 
423 --[[
424     simple function to pipe string.rep to templates.
425 ]]
426 function str.rep( frame )
427 	local repetitions = tonumber( frame.args[2] )
428 	if not repetitions then
429 		return str._error( 'function rep expects a number as second parameter, received "' .. ( frame.args[2] or '' ) .. '"' )
430 	end
431 	return string.rep( frame.args[1] or '', repetitions )
432 end
433 
434 --[[
435 escapePattern
436 
437 This function escapes special characters from a Lua string pattern. See [1]
438 for details on how patterns work.
439 
440 [1] https://www.mediawiki.org/wiki/Extension:Scribunto/Lua_reference_manual#Patterns
441 
442 Usage:
443 {{#invoke:String|escapePattern|pattern_string}}
444 
445 Parameters
446     pattern_string: The pattern string to escape.
447 ]]
448 function str.escapePattern( frame )
449 	local pattern_str = frame.args[1]
450 	if not pattern_str then
451 		return str._error( 'No pattern string specified' )
452 	end
453 	local result = str._escapePattern( pattern_str )
454 	return result
455 end
456 
457 --[[
458 count
459 This function counts the number of occurrences of one string in another.
460 ]]
461 function str.count(frame)
462 	local args = str._getParameters(frame.args, {'source', 'pattern', 'plain'})
463 	local source = args.source or ''
464 	local pattern = args.pattern or ''
465 	local plain = str._getBoolean(args.plain or true)
466 	if plain then
467 		pattern = str._escapePattern(pattern)
468 	end
469 	local _, count = mw.ustring.gsub(source, pattern, '')
470 	return count
471 end
472 
473 --[[
474 endswith
475 This function determines whether a string ends with another string.
476 ]]
477 function str.endswith(frame)
478 	local args = str._getParameters(frame.args, {'source', 'pattern'})
479 	local source = args.source or ''
480 	local pattern = args.pattern or ''
481 	if pattern == '' then
482 		-- All strings end with the empty string.
483 		return "yes"
484 	end
485 	if mw.ustring.sub(source, -mw.ustring.len(pattern), -1) == pattern then
486 		return "yes"
487 	else
488 		return ""
489 	end
490 end
491 
492 --[[
493 join
494 
495 Join all non empty arguments together; the first argument is the separator.
496 Usage:
497 {{#invoke:String|join|sep|one|two|three}}
498 ]]
499 function str.join(frame)
500 	local args = {}
501 	local sep
502 	for _, v in ipairs( frame.args ) do
503 		if sep then
504 			if v ~= '' then
505 				table.insert(args, v)
506 			end
507 		else
508 			sep = v
509 		end
510 	end
511 	return table.concat( args, sep or '' )
512 end
513 
514 --[[
515 Helper function that populates the argument list given that user may need to use a mix of
516 named and unnamed parameters.  This is relevant because named parameters are not
517 identical to unnamed parameters due to string trimming, and when dealing with strings
518 we sometimes want to either preserve or remove that whitespace depending on the application.
519 ]]
520 function str._getParameters( frame_args, arg_list )
521 	local new_args = {}
522 	local index = 1
523 	local value
524 
525 	for _, arg in ipairs( arg_list ) do
526 		value = frame_args[arg]
527 		if value == nil then
528 			value = frame_args[index]
529 			index = index + 1
530 		end
531 		new_args[arg] = value
532 	end
533 
534 	return new_args
535 end
536 
537 --[[
538 Helper function to handle error messages.
539 ]]
540 function str._error( error_str )
541 	local frame = mw.getCurrentFrame()
542 	local error_category = frame.args.error_category or 'Errors reported by Module String'
543 	local ignore_errors = frame.args.ignore_errors or false
544 	local no_category = frame.args.no_category or false
545 
546 	if str._getBoolean(ignore_errors) then
547 		return ''
548 	end
549 
550 	local error_str = '<strong class="error">String Module Error: ' .. error_str .. '</strong>'
551 	if error_category ~= '' and not str._getBoolean( no_category ) then
552 		error_str = '[[Category:' .. error_category .. ']]' .. error_str
553 	end
554 
555 	return error_str
556 end
557 
558 --[[
559 Helper Function to interpret boolean strings
560 ]]
561 function str._getBoolean( boolean_str )
562 	local boolean_value
563 
564 	if type( boolean_str ) == 'string' then
565 		boolean_str = boolean_str:lower()
566 		if boolean_str == 'false' or boolean_str == 'no' or boolean_str == '0'
567 				or boolean_str == '' then
568 			boolean_value = false
569 		else
570 			boolean_value = true
571 		end
572 	elseif type( boolean_str ) == 'boolean' then
573 		boolean_value = boolean_str
574 	else
575 		error( 'No boolean value found' )
576 	end
577 	return boolean_value
578 end
579 
580 --[[
581 Helper function that escapes all pattern characters so that they will be treated
582 as plain text.
583 ]]
584 function str._escapePattern( pattern_str )
585 	return mw.ustring.gsub( pattern_str, "([%(%)%.%%%+%-%*%?%[%^%$%]])", "%%%1" )
586 end
587 
588 return str