Module:Citation/CS1/COinS
Jump to navigation
Jump to search
Documentation for this module may be created at Module:Citation/CS1/COinS/doc
1 --[[--------------------------< F O R W A R D D E C L A R A T I O N S >--------------------------------------
2 ]]
3
4 local is_set, in_array, remove_wiki_link, strip_apostrophe_markup; -- functions in Module:Citation/CS1/Utilities
5
6 local cfg; -- table of configuration tables that are defined in Module:Citation/CS1/Configuration
7
8
9 --[[--------------------------< M A K E _ C O I N S _ T I T L E >----------------------------------------------
10
11 Makes a title for COinS from Title and / or ScriptTitle (or any other name-script pairs)
12
13 Apostrophe markup (bold, italics) is stripped from each value so that the COinS metadata isn't corrupted with strings
14 of %27%27...
15
16 ]]
17
18 local function make_coins_title (title, script)
19 if is_set (title) then
20 title = strip_apostrophe_markup (title); -- strip any apostrophe markup
21 else
22 title=''; -- if not set, make sure title is an empty string
23 end
24 if is_set (script) then
25 script = script:gsub ('^%l%l%s*:%s*', ''); -- remove language prefix if present (script value may now be empty string)
26 script = strip_apostrophe_markup (script); -- strip any apostrophe markup
27 else
28 script=''; -- if not set, make sure script is an empty string
29 end
30 if is_set (title) and is_set (script) then
31 script = ' ' .. script; -- add a space before we concatenate
32 end
33 return title .. script; -- return the concatenation
34 end
35
36
37 --[[--------------------------< E S C A P E _ L U A _ M A G I C _ C H A R S >----------------------------------
38
39 Returns a string where all of lua's magic characters have been escaped. This is important because functions like
40 string.gsub() treat their pattern and replace strings as patterns, not literal strings.
41 ]]
42
43 local function escape_lua_magic_chars (argument)
44 argument = argument:gsub("%%", "%%%%"); -- replace % with %%
45 argument = argument:gsub("([%^%$%(%)%.%[%]%*%+%-%?])", "%%%1"); -- replace all other lua magic pattern characters
46 return argument;
47 end
48
49
50 --[[--------------------------< G E T _ C O I N S _ P A G E S >------------------------------------------------
51
52 Extract page numbers from external wikilinks in any of the |page=, |pages=, or |at= parameters for use in COinS.
53
54 ]]
55
56 local function get_coins_pages (pages)
57 local pattern;
58 if not is_set (pages) then return pages; end -- if no page numbers then we're done
59
60 while true do
61 pattern = pages:match("%[(%w*:?//[^ ]+%s+)[%w%d].*%]"); -- pattern is the opening bracket, the url and following space(s): "[url "
62 if nil == pattern then break; end -- no more urls
63 pattern = escape_lua_magic_chars (pattern); -- pattern is not a literal string; escape lua's magic pattern characters
64 pages = pages:gsub(pattern, ""); -- remove as many instances of pattern as possible
65 end
66 pages = pages:gsub("[%[%]]", ""); -- remove the brackets
67 pages = pages:gsub("–", "-" ); -- replace endashes with hyphens
68 pages = pages:gsub("&%w+;", "-" ); -- and replace html entities (– etc.) with hyphens; do we need to replace numerical entities like   and the like?
69 return pages;
70 end
71
72
73 --[=[-------------------------< C O I N S _ R E P L A C E _ M A T H _ S T R I P M A R K E R >------------------
74
75 There are three options for math markup rendering that depend on the editor's math preference settings. These
76 settings are at [[Special:Preferences#mw-prefsection-rendering]] and are
77 PNG images
78 TeX source
79 MathML with SVG or PNG fallback
80
81 All three are heavy with html and css which doesn't belong in the metadata.
82
83 Without this function, the metadata saved in the raw wikitext contained the rendering determined by the settings
84 of the last editor to save the page.
85
86 This function gets the rendered form of an equation according to the editor's preference before the page is saved. It
87 then searches the rendering for the text equivalent of the rendered equation and replaces the rendering with that so
88 that the page is saved without extraneous html/css markup and with a reasonably readable text form of the equation.
89
90 When a replacement is made, this function returns true and the value with replacement; otherwise false and the intital
91 value. To replace multipe equations it is necesary to call this function from within a loop.
92
93 ]=]
94
95 local function coins_replace_math_stripmarker (value)
96 local stripmarker = cfg.stripmarkers['math'];
97 local rendering = value:match (stripmarker); -- is there a math stripmarker
98
99 if not rendering then -- when value doesn't have a math stripmarker, abandon this test
100 return false, value;
101 end
102
103 rendering = mw.text.unstripNoWiki (rendering); -- convert stripmarker into rendered value (or nil? ''? when math render error)
104
105 if rendering:match ('alt="[^"]+"') then -- if PNG math option
106 rendering = rendering:match ('alt="([^"]+)"'); -- extract just the math text
107 elseif rendering:match ('$%s+.+%s+%$') then -- if TeX math option; $ is legit character that is escapes as \$
108 rendering = rendering:match ('$%s+(.+)%s+%$') -- extract just the math text
109 elseif rendering:match ('<annotation[^>]+>.+</annotation>') then -- if MathML math option
110 rendering = rendering:match ('<annotation[^>]+>(.+)</annotation>') -- extract just the math text
111 else
112 return false, value; -- had math stripmarker but not one of the three defined forms
113 end
114
115 return true, value:gsub (stripmarker, rendering, 1);
116 end
117
118
119 --[[--------------------------< C O I N S _ C L E A N U P >----------------------------------------------------
120
121 Cleanup parameter values for the metadata by removing or replacing invisible characters and certain html entities.
122
123 2015-12-10: there is a bug in mw.text.unstripNoWiki (). It replaces math stripmarkers with the appropriate content
124 when it shouldn't. See https://phabricator.wikimedia.org/T121085 and Wikipedia_talk:Lua#stripmarkers_and_mw.text.unstripNoWiki.28.29
125
126 TODO: move the replacement patterns and replacement values into a table in /Configuration similar to the invisible
127 characters table?
128
129 ]]
130
131 local function coins_cleanup (value)
132 local replaced = true; -- default state to get the do loop running
133
134 while replaced do -- loop until all math stripmarkers replaced
135 replaced, value = coins_replace_math_stripmarker (value); -- replace math stripmarker with text representation of the equation
136 end
137
138 value = value:gsub (cfg.stripmarkers['math'], "MATH RENDER ERROR"); -- one or more couldn't be replaced; insert vague error message
139
140 value = mw.text.unstripNoWiki (value); -- replace nowiki stripmarkers with their content
141 value = value:gsub ('<span class="nowrap" style="padding%-left:0%.1em;">'(s?)</span>', "'%1"); -- replace {{'}} or {{'s}} with simple apostrophe or apostrophe-s
142 value = value:gsub (' ', ' '); -- replace entity with plain space
143 value = value:gsub ('\226\128\138', ' '); -- replace hair space with plain space
144 if not mw.ustring.find (value, cfg.indic_script) then -- don't remove zero width joiner characters from indic script
145 value = value:gsub ('‍', ''); -- remove ‍ entities
146 value = mw.ustring.gsub (value, '[\226\128\141\226\128\139\194\173]', ''); -- remove zero-width joiner, zero-width space, soft hyphen
147 end
148 value = value:gsub ('[\009\010\013]', ' '); -- replace horizontal tab, line feed, carriage return with plain space
149 return value;
150 end
151
152
153 --[[--------------------------< C O I N S >--------------------------------------------------------------------
154
155 COinS metadata (see <http://ocoins.info/>) allows automated tools to parse the citation information.
156
157 ]]
158
159 local function COinS(data, class)
160 if 'table' ~= type(data) or nil == next(data) then
161 return '';
162 end
163
164 for k, v in pairs (data) do -- spin through all of the metadata parameter values
165 if 'ID_list' ~= k and 'Authors' ~= k then -- except the ID_list and Author tables (author nowiki stripmarker done when Author table processed)
166 data[k] = coins_cleanup (v);
167 end
168 end
169
170 local ctx_ver = "Z39.88-2004";
171
172 -- treat table strictly as an array with only set values.
173 local OCinSoutput = setmetatable( {}, {
174 __newindex = function(self, key, value)
175 if is_set(value) then
176 rawset( self, #self+1, table.concat{ key, '=', mw.uri.encode( remove_wiki_link( value ) ) } );
177 end
178 end
179 });
180
181 if in_array (class, {'arxiv', 'biorxiv', 'citeseerx', 'ssrn', 'journal', 'news', 'magazine'}) or (in_array (class, {'conference', 'interview', 'map', 'press release', 'web'}) and is_set(data.Periodical)) or
182 ('citation' == class and is_set(data.Periodical) and not is_set (data.Encyclopedia)) then
183 OCinSoutput.rft_val_fmt = "info:ofi/fmt:kev:mtx:journal"; -- journal metadata identifier
184 if in_array (class, {'arxiv', 'biorxiv', 'citeseerx', 'ssrn'}) then -- set genre according to the type of citation template we are rendering
185 OCinSoutput["rft.genre"] = "preprint"; -- cite arxiv, cite biorxiv, cite citeseerx, cite ssrn
186 elseif 'conference' == class then
187 OCinSoutput["rft.genre"] = "conference"; -- cite conference (when Periodical set)
188 elseif 'web' == class then
189 OCinSoutput["rft.genre"] = "unknown"; -- cite web (when Periodical set)
190 else
191 OCinSoutput["rft.genre"] = "article"; -- journal and other 'periodical' articles
192 end
193 OCinSoutput["rft.jtitle"] = data.Periodical; -- journal only
194 OCinSoutput["rft.atitle"] = data.Title; -- 'periodical' article titles
195
196 -- these used only for periodicals
197 OCinSoutput["rft.ssn"] = data.Season; -- keywords: winter, spring, summer, fall
198 OCinSoutput["rft.chron"] = data.Chron; -- free-form date components
199 OCinSoutput["rft.volume"] = data.Volume; -- does not apply to books
200 OCinSoutput["rft.issue"] = data.Issue;
201 OCinSoutput["rft.pages"] = data.Pages; -- also used in book metadata
202
203 elseif 'thesis' ~= class then -- all others except cite thesis are treated as 'book' metadata; genre distinguishes
204 OCinSoutput.rft_val_fmt = "info:ofi/fmt:kev:mtx:book"; -- book metadata identifier
205 if 'report' == class or 'techreport' == class then -- cite report and cite techreport
206 OCinSoutput["rft.genre"] = "report";
207 elseif 'conference' == class then -- cite conference when Periodical not set
208 OCinSoutput["rft.genre"] = "conference";
209 OCinSoutput["rft.atitle"] = data.Chapter; -- conference paper as chapter in proceedings (book)
210 elseif in_array (class, {'book', 'citation', 'encyclopaedia', 'interview', 'map'}) then
211 if is_set (data.Chapter) then
212 OCinSoutput["rft.genre"] = "bookitem";
213 OCinSoutput["rft.atitle"] = data.Chapter; -- book chapter, encyclopedia article, interview in a book, or map title
214 else
215 if 'map' == class or 'interview' == class then
216 OCinSoutput["rft.genre"] = 'unknown'; -- standalone map or interview
217 else
218 OCinSoutput["rft.genre"] = 'book'; -- book and encyclopedia
219 end
220 end
221 else --{'audio-visual', 'AV-media-notes', 'DVD-notes', 'episode', 'interview', 'mailinglist', 'map', 'newsgroup', 'podcast', 'press release', 'serial', 'sign', 'speech', 'web'}
222 OCinSoutput["rft.genre"] = "unknown";
223 end
224 OCinSoutput["rft.btitle"] = data.Title; -- book only
225 OCinSoutput["rft.place"] = data.PublicationPlace; -- book only
226 OCinSoutput["rft.series"] = data.Series; -- book only
227 OCinSoutput["rft.pages"] = data.Pages; -- book, journal
228 OCinSoutput["rft.edition"] = data.Edition; -- book only
229 OCinSoutput["rft.pub"] = data.PublisherName; -- book and dissertation
230
231 else -- cite thesis
232 OCinSoutput.rft_val_fmt = "info:ofi/fmt:kev:mtx:dissertation"; -- dissertation metadata identifier
233 OCinSoutput["rft.title"] = data.Title; -- dissertation (also patent but that is not yet supported)
234 OCinSoutput["rft.degree"] = data.Degree; -- dissertation only
235 OCinSoutput['rft.inst'] = data.PublisherName; -- book and dissertation
236 end
237 -- and now common parameters (as much as possible)
238 OCinSoutput["rft.date"] = data.Date; -- book, journal, dissertation
239
240 for k, v in pairs( data.ID_list ) do -- what to do about these? For now assume that they are common to all?
241 if k == 'ISBN' then v = v:gsub( "[^-0-9X]", "" ); end
242 local id = cfg.id_handlers[k].COinS;
243 if string.sub( id or "", 1, 4 ) == 'info' then -- for ids that are in the info:registry
244 OCinSoutput["rft_id"] = table.concat{ id, "/", v };
245 elseif string.sub (id or "", 1, 3 ) == 'rft' then -- for isbn, issn, eissn, etc that have defined COinS keywords
246 OCinSoutput[ id ] = v;
247 elseif id then -- when cfg.id_handlers[k].COinS is not nil
248 OCinSoutput["rft_id"] = table.concat{ cfg.id_handlers[k].prefix, v }; -- others; provide a url
249 end
250 end
251
252 local last, first;
253 for k, v in ipairs( data.Authors ) do
254 last, first = coins_cleanup (v.last), coins_cleanup (v.first or ''); -- replace any nowiki strip markers, non-printing or invisible characers
255 if k == 1 then -- for the first author name only
256 if is_set(last) and is_set(first) then -- set these COinS values if |first= and |last= specify the first author name
257 OCinSoutput["rft.aulast"] = last; -- book, journal, dissertation
258 OCinSoutput["rft.aufirst"] = first; -- book, journal, dissertation
259 elseif is_set(last) then
260 OCinSoutput["rft.au"] = last; -- book, journal, dissertation -- otherwise use this form for the first name
261 end
262 else -- for all other authors
263 if is_set(last) and is_set(first) then
264 OCinSoutput["rft.au"] = table.concat{ last, ", ", first }; -- book, journal, dissertation
265 elseif is_set(last) then
266 OCinSoutput["rft.au"] = last; -- book, journal, dissertation
267 end
268 end
269 end
270
271 OCinSoutput.rft_id = data.URL;
272 OCinSoutput.rfr_id = table.concat{ "info:sid/", mw.site.server:match( "[^/]*$" ), ":", data.RawPage };
273 OCinSoutput = setmetatable( OCinSoutput, nil );
274
275 -- sort with version string always first, and combine.
276 --table.sort( OCinSoutput );
277 table.insert( OCinSoutput, 1, "ctx_ver=" .. ctx_ver ); -- such as "Z39.88-2004"
278 return table.concat(OCinSoutput, "&");
279 end
280
281
282 --[[--------------------------< S E T _ S E L E C T E D _ M O D U L E S >--------------------------------------
283
284 Sets local cfg table and imported functions table to same (live or sandbox) as that used by the other modules.
285
286 ]]
287
288 local function set_selected_modules (cfg_table_ptr, utilities_page_ptr)
289 cfg = cfg_table_ptr;
290
291 is_set = utilities_page_ptr.is_set; -- import functions from selected Module:Citation/CS1/Utilities module
292 in_array = utilities_page_ptr.in_array;
293 remove_wiki_link = utilities_page_ptr.remove_wiki_link;
294 strip_apostrophe_markup = utilities_page_ptr.strip_apostrophe_markup;
295 end
296
297
298 --[[--------------------------< E X P O R T E D F U N C T I O N S >------------------------------------------
299 ]]
300
301 return {
302 make_coins_title = make_coins_title,
303 get_coins_pages = get_coins_pages,
304 COinS = COinS,
305 set_selected_modules = set_selected_modules,
306 }