Jump to content

Module:Citation/CS1/Date validation and Module:Citation/CS1/Date validation/sandbox: Difference between pages

(Difference between pages)
Page 1
Page 2
Content deleted Content added
No edit summary
 
No edit summary
 
Line 1: Line 1:
--[[
History of changes since last sync: 2025-08-30

2026-03-28: fix date disambiguation letter error; see Help_talk:Citation_Style_1#date_disambiguation_error
2026-03-29: dismissed a couple of TODOs;

]]


--[[--------------------------< F O R W A R D D E C L A R A T I O N S >--------------------------------------
--[[--------------------------< F O R W A R D D E C L A R A T I O N S >--------------------------------------
]]
]]
Line 12: Line 21:
]]
]]


local lang_object = mw.getContentLanguage(); -- used by is_valid_accessdate(), is_valid_year(), date_name_xlate(); TODO: move to ~/Configuration?
local lang_obj = mw.getContentLanguage(); -- used by is_valid_accessdate(), is_valid_year(), date_name_xlate();
local year_limit; -- used by is_valid_year()
local year_limit; -- used by is_valid_year()


Line 39: Line 48:
local access_ts, tomorrow_ts; -- to hold Unix time stamps representing the dates
local access_ts, tomorrow_ts; -- to hold Unix time stamps representing the dates


good1, access_ts = pcall (lang_object.formatDate, lang_object, 'U', accessdate ); -- convert accessdate value to Unix timestamp
good1, access_ts = pcall (lang_obj.formatDate, lang_obj, 'U', accessdate ); -- convert accessdate value to Unix timestamp
good2, tomorrow_ts = pcall (lang_object.formatDate, lang_object, 'U', 'today + 2 days' ); -- today midnight + 2 days is one second more than all day tomorrow
good2, tomorrow_ts = pcall (lang_obj.formatDate, lang_obj, 'U', 'today + 2 days' ); -- today midnight + 2 days is one second more than all day tomorrow
if good1 and good2 then -- lang.formatDate() returns a timestamp in the local script which which tonumber() may not understand
if good1 and good2 then -- lang.formatDate() returns a timestamp in the local script which which tonumber() may not understand
access_ts = tonumber (access_ts) or lang_object:parseFormattedNumber (access_ts); -- convert to numbers for the comparison;
access_ts = tonumber (access_ts) or lang_obj:parseFormattedNumber (access_ts); -- convert to numbers for the comparison;
tomorrow_ts = tonumber (tomorrow_ts) or lang_object:parseFormattedNumber (tomorrow_ts);
tomorrow_ts = tonumber (tomorrow_ts) or lang_obj:parseFormattedNumber (tomorrow_ts);
else
else
return false; -- one or both failed to convert to Unix time stamp
return false; -- one or both failed to convert to Unix time stamp
Line 185: Line 194:
end
end


year = tonumber (year) or lang_object:parseFormattedNumber (year); -- convert to number for the comparison
year = tonumber (year) or lang_obj:parseFormattedNumber (year); -- convert to number for the comparison
if year and (100 > year) then -- years less than 100 not supported
if year and (100 > year) then -- years less than 100 not supported
return false;
return false;
Line 324: Line 333:
local date; -- one date or first date in a range
local date; -- one date or first date in a range
local date2 = ''; -- end of range date
local date2 = ''; -- end of range date
input.year = tonumber (input.year) or lang_object:parseFormattedNumber (input.year); -- language-aware tonumber()
input.year = tonumber (input.year) or lang_obj:parseFormattedNumber (input.year); -- language-aware tonumber()
input.year2 = tonumber (input.year2) or lang_object:parseFormattedNumber (input.year2); -- COinS dates are pseudo-ISO 8601 so convert to Arabic numerals
input.year2 = tonumber (input.year2) or lang_obj:parseFormattedNumber (input.year2); -- COinS dates are pseudo-ISO 8601 so convert to Arabic numerals


if ((1582 == input.year) and (10 > tonumber(input.month))) or (1582 > input.year) then -- if a Julian calendar date
if ((1582 == input.year) and (10 > tonumber(input.month))) or (1582 > input.year) then -- if a Julian calendar date
Line 400: Line 409:
['ymd'] = {'^([1-9]%d%d%d)%-(%d%d)%-(%d%d)$', 'y', 'm', 'd'},
['ymd'] = {'^([1-9]%d%d%d)%-(%d%d)%-(%d%d)$', 'y', 'm', 'd'},
-- month-initial: month day, year
-- month-initial: month day, year
['Mdy'] = {'^(%D-) +([1-9]%d?), +(([1-9]%d%d%d?)%a?)$', 'm', 'd', 'a', 'y'},
['Mdy'] = {'^(%D-) +([1-9]%d?), +(([1-9]%d%d%d?)%l?)$', 'm', 'd', 'a', 'y'},
-- month-initial day range: month day–day, year; days are separated by endash
-- month-initial day range: month day–day, year; days are separated by endash
['Md-dy'] = {'^(%D-) +([1-9]%d?)[%-–]([1-9]%d?), +(([1-9]%d%d%d?)%a?)$', 'm', 'd', 'd2', 'a', 'y'},
['Md-dy'] = {'^(%D-) +([1-9]%d?)[%-–]([1-9]%d?), +(([1-9]%d%d%d?)%l?)$', 'm', 'd', 'd2', 'a', 'y'},
-- day-initial: day month year
-- day-initial: day month year
['dMy'] = {'^([1-9]%d?) +(%D-) +(([1-9]%d%d%d?)%a?)$', 'd', 'm', 'a', 'y'},
['dMy'] = {'^([1-9]%d?) +(%D-) +(([1-9]%d%d%d?)%l?)$', 'd', 'm', 'a', 'y'},
-- year-initial: year month day; day: 1 or 2 two digits, leading zero allowed; not supported at en.wiki
-- year-initial: year month day; day: 1 or 2 two digits, leading zero allowed; not supported at en.wiki
-- ['yMd'] = {'^(([1-9]%d%d%d?)%a?) +(%D-) +(%d%d?)$', 'a', 'y', 'm', 'd'},
-- ['yMd'] = {'^(([1-9]%d%d%d?)%l?) +(%D-) +(%d%d?)$', 'a', 'y', 'm', 'd'},
-- day-range-initial: day–day month year; days are separated by endash
-- day-range-initial: day–day month year; days are separated by endash
['d-dMy'] = {'^([1-9]%d?)[%-–]([1-9]%d?) +(%D-) +(([1-9]%d%d%d?)%a?)$', 'd', 'd2', 'm', 'a', 'y'},
['d-dMy'] = {'^([1-9]%d?)[%-–]([1-9]%d?) +(%D-) +(([1-9]%d%d%d?)%l?)$', 'd', 'd2', 'm', 'a', 'y'},
-- day initial month-day-range: day month - day month year; uses spaced endash
-- day initial month-day-range: day month - day month year; uses spaced endash
['dM-dMy'] = {'^([1-9]%d?) +(%D-) +[%-–] +([1-9]%d?) +(%D-) +(([1-9]%d%d%d?)%a?)$', 'd', 'm', 'd2', 'm2', 'a', 'y'},
['dM-dMy'] = {'^([1-9]%d?) +(%D-) +[%-–] +([1-9]%d?) +(%D-) +(([1-9]%d%d%d?)%l?)$', 'd', 'm', 'd2', 'm2', 'a', 'y'},
-- month initial month-day-range: month day – month day, year; uses spaced endash
-- month initial month-day-range: month day – month day, year; uses spaced endash
['Md-Mdy'] = {'^(%D-) +([1-9]%d?) +[%-–] +(%D-) +([1-9]%d?), +(([1-9]%d%d%d?)%a?)$','m', 'd', 'm2', 'd2', 'a', 'y'},
['Md-Mdy'] = {'^(%D-) +([1-9]%d?) +[%-–] +(%D-) +([1-9]%d?), +(([1-9]%d%d%d?)%l?)$','m', 'd', 'm2', 'd2', 'a', 'y'},
-- day initial month-day-year-range: day month year - day month year; uses spaced endash
-- day initial month-day-year-range: day month year - day month year; uses spaced endash
['dMy-dMy'] = {'^([1-9]%d?) +(%D-) +([1-9]%d%d%d?) +[%-–] +([1-9]%d?) +(%D-) +(([1-9]%d%d%d?)%a?)$', 'd', 'm', 'y', 'd2', 'm2', 'a', 'y2'},
['dMy-dMy'] = {'^([1-9]%d?) +(%D-) +([1-9]%d%d%d?) +[%-–] +([1-9]%d?) +(%D-) +(([1-9]%d%d%d?)%l?)$', 'd', 'm', 'y', 'd2', 'm2', 'a', 'y2'},
-- month initial month-day-year-range: month day, year – month day, year; uses spaced endash
-- month initial month-day-year-range: month day, year – month day, year; uses spaced endash
['Mdy-Mdy'] = {'^(%D-) +([1-9]%d?), +([1-9]%d%d%d?) +[%-–] +(%D-) +([1-9]%d?), +(([1-9]%d%d%d?)%a?)$', 'm', 'd', 'y', 'm2', 'd2', 'a', 'y2'},
['Mdy-Mdy'] = {'^(%D-) +([1-9]%d?), +([1-9]%d%d%d?) +[%-–] +(%D-) +([1-9]%d?), +(([1-9]%d%d%d?)%l?)$', 'm', 'd', 'y', 'm2', 'd2', 'a', 'y2'},


-- these date formats cannot be converted, per se, but month name can be rendered short or long
-- these date formats cannot be converted, per se, but month name can be rendered short or long
-- month/season year - month/season year; separated by spaced endash
-- month/season year - month/season year; separated by spaced endash
['My-My'] = {'^(%D-) +([1-9]%d%d%d?) +[%-–] +(%D-) +(([1-9]%d%d%d?)%a?)$', 'm', 'y', 'm2', 'a', 'y2'},
['My-My'] = {'^(%D-) +([1-9]%d%d%d?) +[%-–] +(%D-) +(([1-9]%d%d%d?)%l?)$', 'm', 'y', 'm2', 'a', 'y2'},
-- month/season range year; months separated by endash
-- month/season range year; months separated by endash
['M-My'] = {'^(%D-)[%-–](%D-) +(([1-9]%d%d%d?)%a?)$', 'm', 'm2', 'a', 'y'},
['M-My'] = {'^(%D-)[%-–](%D-) +(([1-9]%d%d%d?)%l?)$', 'm', 'm2', 'a', 'y'},
-- month/season year or proper-name year; quarter year when First Quarter YYYY etc.
-- month/season year or proper-name year; quarter year when First Quarter YYYY etc.
['My'] = {'^([^%d–]-) +(([1-9]%d%d%d?)%a?)$', 'm', 'a', 'y'}, -- this way because endash is a member of %D; %D- will match January–March 2019 when it shouldn't
['My'] = {'^([^%d–]-) +(([1-9]%d%d%d?)%l?)$', 'm', 'a', 'y'}, -- this way because endash is a member of %D; %D- will match January–March 2019 when it shouldn't


-- these date formats cannot be converted
-- these date formats cannot be converted
['Sy4-y2'] = {'^(%D-) +(([1-9]%d)%d%d)[%-–]((%d%d)%a?)$'}, -- special case Winter/Summer year-year (YYYY-YY); year separated with unspaced endash
['Sy4-y2'] = {'^(%D-) +(([1-9]%d)%d%d)[%-–]((%d%d)%l?)$'}, -- special case Winter/Summer year-year (YYYY-YY); year separated with unspaced endash
['Sy-y'] = {'^(%D-) +([1-9]%d%d%d?)[%-–](([1-9]%d%d%d?)%a?)$'}, -- special case Winter/Summer year-year; year separated with unspaced endash
['Sy-y'] = {'^(%D-) +([1-9]%d%d%d?)[%-–](([1-9]%d%d%d?)%l?)$'}, -- special case Winter/Summer year-year; year separated with unspaced endash
['y-y'] = {'^([1-9]%d%d%d?)[%-–](([1-9]%d%d%d?)%a?)$'}, -- year range: YYY-YYY or YYY-YYYY or YYYY–YYYY; separated by unspaced endash; 100-9999
['y-y'] = {'^([1-9]%d%d%d?)[%-–](([1-9]%d%d%d?)%l?)$'}, -- year range: YYY-YYY or YYY-YYYY or YYYY–YYYY; separated by unspaced endash; 100-9999
['y4-y2'] = {'^(([1-9]%d)%d%d)[%-–]((%d%d)%a?)$'}, -- year range: YYYY–YY; separated by unspaced endash
['y4-y2'] = {'^(([1-9]%d)%d%d)[%-–]((%d%d)%l?)$'}, -- year range: YYYY–YY; separated by unspaced endash
['y'] = {'^(([1-9]%d%d%d?)%a?)$'}, -- year; here accept either YYY or YYYY
['y'] = {'^(([1-9]%d%d%d?)%l?)$'}, -- year; here accept either YYY or YYYY
-- these date formats cannot be converted
['n.d.'] = {'^((n%.d%.)%l?)$'}, -- no date with dots
['nd'] = {'^((nd)%l?)$'}, -- no date
}
}


Line 709: Line 722:
if is_set(v.val) then -- if the parameter has a value
if is_set(v.val) then -- if the parameter has a value
v.val = mw.ustring.gsub(v.val, '%d', cfg.date_names.local_digits); -- translate 'local' digits to Western 0-9
v.val = mw.ustring.gsub(v.val, '%d', cfg.date_names.local_digits); -- translate 'local' digits to Western 0-9
if v.val:match("^c%. [1-9]%d%d%d?%a?$") then -- special case for c. year or with or without CITEREF disambiguator - only |date= and |year=
if v.val:match("^c%. [1-9]%d%d%d?%l?$") then -- special case for c. year or with or without CITEREF disambiguator - only |date= and |year=
local year = v.val:match("c%. ([1-9]%d%d%d?)%a?"); -- get the year portion so it can be tested
local year = v.val:match("c%. ([1-9]%d%d%d?)%l?"); -- get the year portion so it can be tested
if 'date' == k then
if 'date' == k then
anchor_year, COinS_date = v.val:match("((c%. [1-9]%d%d%d?)%a?)"); -- anchor year and COinS_date only from |date= parameter
anchor_year, COinS_date = v.val:match("((c%. [1-9]%d%d%d?)%l?)"); -- anchor year and COinS_date only from |date= parameter
good_date = is_valid_year(year);
good_date = is_valid_year(year);
elseif 'year' == k then
elseif 'year' == k then
Line 720: Line 733:
end
end
elseif 'date' == k then -- if the parameter is |date=
elseif 'date' == k then -- if the parameter is |date=
if v.val:match("^n%.d%.%a?$") then -- ToDo: I18N -- if |date=n.d. with or without a CITEREF disambiguator
if v.val:match (patterns_t['n.d.'][1]) then -- if |date=n.d. with or without a CITEREF disambiguator
good_date, anchor_year, COinS_date = true, v.val:match("((n%.d%.)%a?)"); -- ToDo: I18N -- "n.d."; no error when date parameter is set to no date
good_date, anchor_year, COinS_date = true, v.val:match (patterns_t['n.d.'][1]); -- "n.d."; no error when date parameter is set to no date
elseif v.val:match("^nd%a?$") then -- ToDo: I18N -- if |date=nd with or without a CITEREF disambiguator
elseif v.val:match (patterns_t.nd[1]) then -- if |date=nd with or without a CITEREF disambiguator
good_date, anchor_year, COinS_date = true, v.val:match("((nd)%a?)"); -- ToDo: I18N -- "nd"; no error when date parameter is set to no date
good_date, anchor_year, COinS_date = true, v.val:match (patterns_t.nd[1]); -- "nd"; no error when date parameter is set to no date
else
else
good_date, anchor_year, COinS_date = check_date (v.val, k, tCOinS_date); -- go test the date
good_date, anchor_year, COinS_date = check_date (v.val, k, tCOinS_date); -- go test the date
end
end
elseif 'year' == k then -- if the parameter is |year= it should hold only a year value
elseif 'year' == k then -- if the parameter is |year= it should hold only a year value
if v.val:match("^[1-9]%d%d%d?%a?$") then -- if |year = 3 or 4 digits only with or without a CITEREF disambiguator
if v.val:match("^[1-9]%d%d%d?%l?$") then -- if |year = 3 or 4 digits only with or without a CITEREF disambiguator
good_date, anchor_year, COinS_date = true, v.val:match("((%d+)%a?)");
good_date, anchor_year, COinS_date = true, v.val:match("((%d+)%l?)");
else
else
good_date = false;
good_date = false;