Module:Sandbox/trappist the monk/taxonomy

require('strict');

--[[--< T A X O M A P >

this is a table of sequence tables that each list the first and last taxon name in a particular module. module_select uses these tables to determine which data module 'should' have the data for the taxon name.

[1] first taxon name in the data module [3] suffix appended to the base module name to create: 'Module:Sandbox/trappist the monk/taxonomy '

These tables are created by an awb script when it splits much larger raw data file before the splits are uploaded to en.wiki.

]]

local taxomap_t = {		-- created/updated: 2021-10-24 A = { {'ADA clade', 'Acratus', 'A1'}, {'Acrecebus', 'Africotriton', 'A2'}, {'Afrida', 'Alicia (plant)', 'A3'}, {'Aliciella', 'Ammosperma', 'A4'}, {'Ammospermophilus', 'Anenthemonae', 'A5'}, {'Anentome', 'Aorangia', 'A6'}, {'Aoranthe', 'Archarius', 'A7'}, {'Archasia', 'Asemonea', 'A8'}, {'Asemoneinae', 'Aurana', 'A9'}, {'Auranticarpa', 'Azygopus', 'A10'}, },	B = { {'BOP clade', 'Bauruoolithus', 'B1'}, {'Baurusuchia', 'Bloomeria', 'B2'}, {'Blosnavirus', 'Brasilentulus', 'B3'}, {'Brasileodactylus', 'Byturus', 'B4'}, },	C = { {'CRuMs', 'Camillina', 'C1'}, {'Caminus', 'Cassianellidae', 'C2'}, {'Cassianopsinae', 'Cereus', 'C3'}, {'Cereus (anemone)', 'Chernes', 'C4'}, {'Chernetidae', 'Chrysopida', 'C5'}, {'Chrysopidae', 'Cnemidophorus', 'C6'}, {'Cnemidopyge', 'Conopias', 'C7'}, {'Conopidae', 'Crataegus ser. Intricata', 'C8'}, {'Crataegus ser. Intricatae', 'Cunninghamella', 'C9'}, {'Cunninghamellaceae', 'Czekanowskiales', 'C10'}, },	D = { {'Daanosaurus', 'Dentatherinidae', 'D1'}, {'Dentectus', 'Digalodon', 'D2'}, {'Digama', 'Dodecadenia', 'D3'}, {'Dodecahema', 'Dystrophaeus', 'D4'}, {'Dytaster', 'Dzungariotherium', 'D5'}, },	E = { {'Eacles', 'Empidadelpha', 'E1'}, {'Empididae', 'Epipogiinae', 'E2'}, {'Epipogium', 'Eucosmodon', 'E3'}, {'Eucosmodontidae', 'Eutreptiidae', 'E4'}, {'Eutreptiiida', 'Ezosciadium', 'E5'}, },	G = { {'Gabara', 'Giardia', 'G1'}, {'Giardiavirus', 'Gonostomatidae', 'G2'}, {'Gonostomatoidei', 'Gyroweisia', 'G3'}, },	H = { {'HTVC010P', 'Hecalini', 'H1'}, {'Hecamede', 'Hesperonychus', 'H2'}, {'Hesperoperla', 'Homalattus', 'H3'}, {'Homalia', 'Hymenasplenium', 'H4'}, {'Hymenelia', 'Hytrosaviridae', 'H5'}, },	L = { {'La', 'Lechia', 'L1'}, {'Lechriaspis', 'Lethiscidae', 'L2'}, {'Lethiscus', 'Lithophaga', 'L3'}, {'Lithophane', 'Lycopteridae', 'L4'}, {'Lycopteriformes', 'Lyttoniidina', 'L5'}, },	M = { {'Maaqwi', 'Manis (Manis)', 'M1'}, {'Manis (Paramanis)', 'Megamastax', 'M2'}, {'Megamelanus', 'Mesotitanina', 'M3'}, {'Mesovagus', 'Microzercon', 'M4'}, {'Microzoanthidae', 'Montfortia', 'M5'}, {'Montfortista', 'Mytilaria', 'M6'}, {'Mytilarioideae', 'minke whale species complex', 'M7'}, },	N = { {'Naashoibitosaurus', 'Neocoelidia', 'N1'}, {'Neocoelidiinae', 'Nichollsemys', 'N2'}, {'Nichollssaura', 'Nypa', 'N3'}, {'Nypoideae', 'Nyungwea', 'N4'}, },	O = { {'OSLEUM', 'Ondigus', 'O1'}, {'Ondina', 'Orthomorpha', 'O2'}, {'Orthomorphini', 'Ozyptila', 'O3'}, },	P = { {'P2virus', 'Papaipema', 'P1'}, {'Papakula', 'Parasynema', 'P2'}, {'Parasynthemis', 'Peltandreae', 'P3'}, {'Peltaria', 'Phaethornithinae', 'P4'}, {'Phaetusa', 'Phyllocnistinae', 'P5'}, {'Phyllocnistis', 'Planiliza', 'P6'}, {'Planipapillus', 'Podothecus', 'P7'}, {'Podothrombidiidae', 'Praezygaena', 'P8'}, {'Pragmatodes', 'Protoazin', 'P9'}, {'Protobalanus', 'Pseudomicrargus', 'P10'}, {'Pseudomicrocentria', 'Ptychatractidae', 'P11'}, {'Ptycheulimella', 'Pyxis', 'P12'}, },	S = { {'SAR', 'Scaptia', 'S1'}, {'Scaptius', 'Seegeriella', 'S2'}, {'Seeleyosaurus', 'Sinaivirus', 'S3'}, {'Sinamma', 'Sphaeriida', 'S4'}, {'Sphaeriidae', 'Stenogomphurus', 'S5'}, {'Stenogonum', 'Stylonuroidea', 'S6'}, {'Stylonuroides', 'Szovitsia', 'S7'}, },	T = { {'TG3', 'Tenthredo', 'T1'}, {'Tentoriceps', 'Therizinosaurus', 'T2'}, {'Therlinya', 'Torovirus', 'T3'}, {'Torpedinidae', 'Trigonosaurus', 'T4'}, {'Trigonoscuta', 'Typhlogarra', 'T5'}, {'Typhlogastrura', 'Tzvelevopyrethrum', 'T6'}, },	}

local is_not_italic_virus_taxon_t = { ['unranked domain'] = true,													-- taxon names assigned these ranks are *not* to be italicized ['unranked'] = true, ['virus group'] = true, ['virus'] = true, ['strain'] = true, ['serotype'] = true, }

local is_italic_taxon_t = {														-- taxon names assigned these ranks are to be italicized ['genus'] = true, ['ichnogenus'] = true, ['oogenus'] = true, ['subgenus'] = true, ['ichnosubgenus'] = true, ['oosubgenus'] = true,

['supersectio'] = true, ['sectio'] = true, ['subsectio'] = true,

['series'] = true, ['subseries'] = true,

['species complex'] = true, ['species group'] = true, ['species subgroup'] = true,

['species'] = true, ['ichnospecies'] = true, ['oospecies'] = true, ['subspecies'] = true, ['ichnosubspecies'] = true, ['oosubspecies'] = true, }

local anglicize_rank_t = {														-- this table adapted from ['alliance'] = '\'\'Alliance\'\'',											-- Special cases, alphabetic order ['basic shell type'] = '\'\'Basic shell type\'\'', ['branch'] = '\'\'Branch\'\'', ['clade'] = '\'\'Clade\'\'', ['cladus'] = '\'\'Clade\'\'', ['form taxon'] = '\'\'Form taxon\'\'', ['grade'] = '\'\'Grade\'\'', ['gradus'] = '\'\'Grade\'\'', ['informal'] = '\'\'Informal group\'\'', ['informal group'] = '\'\'Informal group\'\'', ['morphotype'] = '\'\'Morphotype\'\'', ['node'] = '\'\'Node\'\'', ['plesion'] = '\'\'Plesion\'\'', ['plesion-group'] = '\'\'Plesion-group\'\'', ['possible clade'] = '\'\'Clade?\'\'', ['realm'] = '\'\'Realm\'\'', ['species complex'] = '\'\'Species complex\'\'', ['species group'] = '\'\'Species group\'\'', ['species subgroup'] = '\'\'Species subgroup\'\'', ['stem group'] = '\'\'Stem group\'\'', ['total group'] = '\'\'Total group\'\'', ['unranked'] = '(unranked)',												-- not a special case in but included here for convenience

['serotype'] = 'Serotype',													-- Virus ranks below species ['strain'] = 'Strain', ['virus'] = 'Virus', ['virus group'] = 'Group',

['classis'] = 'Class',														-- Linnaean taxonomy, alphabetic order ['cohort'] = 'Cohort', ['divisio'] = 'Division', ['domain'] = 'Domain', ['epifamilia'] = 'Epifamily', ['familia'] = 'Family', ['forma'] = 'Form', ['genus'] = 'Genus', ['grandordo'] = 'Grandorder', ['grandordo-mb'] = 'Grandorder',											-- McKenna & Bell version ['hyperfamilia'] = 'Hyperfamily', ['infraclassis'] = 'Infraclass', ['infralegio'] = 'Infralegion', ['infralegion'] = 'Infralegion', ['infraordo'] = 'Infraorder', ['infraphylum'] = 'Infraphylum', ['infraregnum'] = 'Infrakingdom', ['infratribus'] = 'Infratribe', ['legio'] = 'Legion', ['legion'] = 'Legion', ['magnordo'] = 'Magnorder', ['micrordo'] = 'Microrder', ['microphylum'] = 'Microphylum', ['mirordo-Mirorder'] = 'Mirorder', ['mirordo-mb'] = 'Mirorder',												-- McKenna & Bell version ['nanordo'] = 'Nanorder', ['nanophylum'] = 'Nanophylum', ['ordo'] = 'Order', ['parafamilia'] = 'Parafamily', ['parvclassis'] = 'Parvclass', ['parvordo'] = 'Parvorder', ['phylum'] = 'Phylum', ['regnum'] = 'Kingdom', ['sectio'] = 'Section', ['series'] = 'Series', ['species'] = 'Species', ['subclassis'] = 'Subclass', ['subcohort'] = 'Subcohort', ['subdivisio'] = 'Subdivision', ['subfamilia'] = 'Subfamily', ['subgenus'] = 'Subgenus', ['sublegio'] = 'Sublegion', ['sublegion'] = 'Sublegion', ['subordo'] = 'Suborder', ['subphylum'] = 'Subphylum', ['subregnum'] = 'Subkingdom', ['subsectio'] = 'Subsection', ['subseries'] = 'Subseries', ['subspecies'] = 'Subspecies', ['subterclassis'] = 'Subterclass',												-- used in WoRMS ['subtribus'] = 'Subtribe', ['superclassis'] = 'Superclass', ['supercohort'] = 'Supercohort', ['superdivisio'] = 'Superdivision', ['superdomain'] = 'Superdomain', ['superfamilia'] = 'Superfamily', ['superlegio'] = 'Superlegion', ['superlegion'] = 'Superlegion', ['superordo'] = 'Superorder', ['superphylum'] = 'Superphylum', ['superregnum'] = 'Superkingdom', ['supersectio'] = 'Supersection', ['supertribus'] = 'Supertribe', ['tribus'] = 'Tribe', ['varietas'] = 'Variety', ['zoodivisio'] = 'Division', ['zoosectio'] = 'Section', ['zoosubdivisio'] = 'Subdivision', ['zoosubsectio'] = 'Subsection',

['ichnoclassis'] = 'Ichnoclass',											--trace fossil taxonomy, alphabetic order ['ichnocohort'] = 'Ichnocohort', ['ichnodivisio'] = 'Ichnodivision', ['ichnofamilia'] = 'Ichnofamily', ['ichnogenus'] = 'Ichnogenus', ['ichnograndordo'] = 'Ichnograndorder', ['ichnograndordo-mb'] = 'Ichnograndorder',									--McKenna & Bell version--> ['ichnoinfraclassis'] = 'Ichnoinfraclass', ['ichnoinfradivisio'] = 'Ichnoinfradivision', ['ichnoinfraordo'] = 'Ichnoinfraorder', ['ichnolegio'] = 'Ichnolegion', ['ichnolegion'] = 'Ichnolegion', ['ichnomagnordo'] = 'Ichnomagnorder', ['ichnomicrordo'] = 'Ichnomicrorder', ['ichnoordo'] = 'Ichnoorder', ['ichnoparvordo'] = 'Ichnoparvorder', ['ichnospecies'] = 'Ichnospecies', ['ichnostem-group'] = 'Ichnostem-Group', ['ichnosubclassis'] = 'Ichnosubclass', ['ichnosubdivisio'] = 'Ichnosubdivision', ['ichnosubfamilia'] = 'Ichnosubfamily', ['ichnosublegio'] = 'Ichnosublegion', ['ichnosublegion'] = 'Ichnosublegion', ['ichnosubordo'] = 'Ichnosuborder', ['ichnosuperclassis'] = 'Ichnosuperclass', ['ichnosupercohort'] = 'Ichnosupercohort', ['ichnosuperfamilia'] = 'Ichnosuperfamily', ['ichnosuperordo'] = 'Ichnosuperorder',

['ooclassis'] = 'Ooclass',													--fossilized egg taxonomy, alphabetic order ['oocohort'] = 'Oocohort', ['oofamilia'] = 'Oofamily', ['oogenus'] = 'Oogenus', ['oomagnordo'] = 'Oomagnorder', ['oordo'] = 'Oorder', ['oospecies'] = 'Oospecies', ['oosubclassis'] = 'Oosubclass', ['oosubgenus'] = 'Oosubgenus', ['oosubspecies'] = 'Oosubspecies', ['oosupercohort'] = 'Oosupercohort', ['oosuperordo'] = 'Oosuperorder', }

local is_always_displayed_t = { ['virus_group'] = true, ['regnum'] = true, ['kingdom'] = true, ['phylum'] = true, ['divisio'] = true, ['division'] = true, ['class'] = true, ['classis'] = true, ['order'] = true, ['ordo'] = true, ['familia'] = true, ['family'] = true, ['genus'] = true, ['species'] = true, }

local base_data_table_name = 'Module:Sandbox/trappist the monk/taxonomy ';		-- includes space between base name and suffix local modules_loaded = {};														-- DEBUG: a list of the modules loaded while crawling the tree; viewable in the lua log

--[[--< M O D U L E _ S E L E C T >

Select one data module to load that 'should' hold data for. Returns an enumerated letter suffix that will be appended to the base module name to make: Module:Sandbox/trappist the monk/taxonomy

]]

local function module_select (taxon) local letter = taxon:match ('^%a');											-- get the first character of the taxon name letter = (letter and letter:upper) or 'symbols';							-- if a letter force uppercase; 'symbols' else

if not taxomap_t[letter] then												-- if no table in  for return letter; end

for _, map_t in ipairs (taxomap_t[letter]) do								-- loop through the map table looking for the table that should hold if (map_t[1] <= taxon) and (taxon <= map_t[2]) then						-- compare against table's first and last entries return map_t[3];													-- should be here, return the enumerated letter end end end

--[[--< T E M P L A T E _ D A T A _ G E T >

read taxon data from an existing template

template data are read from the template using the embedded template. That template returns '$' delimited string of parameter values: parent$rank$link_target$unnamed parameter$always_display$extinct$same_as$refs This function builds a taxon_t table combining  and into ; the 'boolean' parameter values for  and are converted from case-insensitive 'yes' or 'true' (as strings) to 'true' (a lua boolean); all other values for these parameters are converted to lua nil.

]]

local function template_data_get (taxon) local template_name = 'Template:Taxonomy/' .. taxon .. '/sandbox';			-- DEBUG: make a template name from --	local template_name = 'Template:Taxonomy/' .. taxon;						-- make a template name from local frame = mw.getCurrentFrame;											-- so that we can expand the taxonomy template

local template_string;

-- using #ifexist bumps the expensive parser function counter but keeps nonexistent templates out of Database reports/Transclusions of non-existent templates -- executing a nonexistent template inside a pcall transcludes the nonexistent template -- creating a title object with mw.title.new or mw.title.makeTitle transcludes the nonexistent title -- creating a title object and testing with .exists transcludes the nonexistent title and bumps the expensive parser function counter if 'true' == frame:callParserFunction ({name='#ifexist', args={template_name, 'true'}}) then--, 'false'}}) then		template_string = frame:expandTemplate ({title=template_name, args={['machine code'] = 'all'}});	-- get $ delimited string of template parameter values	else		return;																	-- no template_name	end

local raw_taxon_t = mw.text.split (template_string, '$', true);				-- split the string into a sequence table local taxon_t = {}; local param_names = {'parent', 'rank', 'link', '1', 'always_display', 'extinct', 'same_as', 'refs'};	-- list of taxonomy template parameter name for i, v in ipairs (raw_taxon_t) do											-- for each template parameter value if ('always_display' == param_names[i]) or ('extinct' == param_names[i]) then	-- these take 'boolean' 'yes' and 'true' v = ({['yes'] = true, ['true'] = true})[v:lower];					-- convert case-insensitive 'yes' and 'true' (as strings) to boolean 'true'; nil else end if v and ('' ~= v) then													-- skip nil and empty string values if '1' == param_names[i] then										-- the link label positional parameter taxon_t.link = taxon_t.link .. '|' .. v;						-- gets appended to the link target else taxon_t[param_names[i]] = v;									-- add key and value end end end mw.log (template_name);														-- DEBUG return taxon_t; end

--[[--< M O D U L E _ D A T A _ G E T >

read taxon data from a data module

data-module data are loaded into package.loaded{} with a pcall wrapped call to require. taxon and same_as data are extracted from that table. Successive calls to a data module do not unload and then reload that module. Because Felis, Felinae, Felidae, and Feliformia are sequential in the taxonimic hierarchy, the 'F' data module is loaded only once for those four taxa. The 'new' data table is consulted before all other data tables, so it is loaded once and not unloaded until _crawl_tree terminates.

]]

local last_loaded_module;														-- module scope variable holds name of that last successfully loaded data module

local function module_data_get (taxon, module_name) if 'Life' == taxon or 'Veterovata' == taxon or 'Ichnos' == taxon then		-- these are the taxonomy end taxons return {};																-- nowhere to go so return empty table end

if (base_data_table_name .. 'new') ~= module_name then						-- 'new' loads once instead of every time so ignore it		if last_loaded_module and (last_loaded_module ~= module_name) then		-- if different from currently loaded module package.loaded[last_loaded_module] = nil;							-- unload current module to conserve memory last_loaded_module = nil;											-- unset end end if package.loaded[module_name] then											-- has already been loaded return package.loaded[module_name][taxon];								-- return taxon data if present; nil else else if not pcall (require, module_name) then								-- attempt to load; data will be fetched from package.loaded[module_name] return nil;															-- failed to load else local suffix = module_name:match ('taxonomy (.+)$');				-- DEBUG: was loaded; save module name for log modules_loaded['taxonomy ' .. suffix] = (modules_loaded['taxonomy ' .. suffix] and (modules_loaded['taxonomy ' .. suffix] + 1)) or 1;	-- DEBUG if 'new' ~= suffix then												-- not debug last_loaded_module = module_name;								-- remember this module name; 'taxonomy new' does not get 'remembered' end end

return package.loaded[module_name][taxon];								-- return taxon data if present; nil else end end

--[[--< S O U R C E _ D A T A _ G E T >

chooses which of module_data_get or template_data_get to call based on the state of 

]]

local function source_data_get (taxon, taxon_module_name) if taxon_module_name then return module_data_get (taxon, taxon_module_name);						-- get raw taxon data from a data module else return template_data_get (taxon);										-- get raw taxon data from a taxonomy template end end

--[[--< _ T A X O N _ G E T >--

fetch a taxon table from the appropriate taxonomy Template:Taxonomy/ template where is the taxon name we are looking for

or from '~/taxonomy ' data module where is: the uppercase first letter of name (may have enumerator digits) 'symbols' when the first character of name is not a letter 'new' for the temporary data holding module between new template creation and integration into the whole data set

fills ; follows one 'same_as' if that parameter is present and  is omitted, false, or nil

is taxon name we are looking for  is boolean; true don't follow  taxon if present; false or missing or nil, follow  taxon if present  is module name; used to unload module once we are done with it  is the table to be filled from the taxon data and from the same_as taxon data

returns nothing;  has data on success or is empty table on failure

TODO: better error handling?

]]

local function _taxon_get (taxon, no_follow, taxon_module_name, taxon_t) local same_as; local raw_taxon_t = {};														-- holds the data read from the template or from the data module for

raw_taxon_t = source_data_get (taxon, taxon_module_name);					-- get raw taxon data from a taxonomy template or a data module; for templates taxon_module_name is nil

if raw_taxon_t then for k, v in pairs (raw_taxon_t) do										-- copy content from  into return table () if no_follow or ('same_as' ~= k) then								-- but don't copy same_as data if present taxon_t[k] = v;			else																-- here when same_as is present and following is allowed (no_follow is false) same_as = v;													-- so save same_as value for use later end end raw_taxon_t = nil;														-- unset else return;																	-- failure abandon;  has not been modified end

if same_as then																-- if there is a same_as value in the taxonomy table if taxon_module_name then												-- nil when looking for template data local suffix = module_select (same_as);								-- make a from the <same_as> taxon name taxon_module_name = base_data_table_name .. suffix;					-- make a data module name end raw_taxon_t = source_data_get (same_as, taxon_module_name);				-- get raw same_as taxon data end

if same_as and raw_taxon_t then												-- if there is a <same_as> value and we have raw same_as data for k, v in pairs (raw_taxon_t) do										-- copy the same_as taxon data into <taxon_t> if not taxon_t[k] then												-- but do not overwrite parameter values already in <taxon_t> taxon_t[k] = v;			end end end end

--[[--< T A X O N _ G E T >

fetch a taxon table from the appropriate taxonomy template or '~/taxonomy ' data module where is: the uppercase first letter of name (may have enumerator digits) 'symbols' when the first character of name is not a letter 'new' for the temporary data holding module between new template creation and integration into the whole data set

follow one 'same_as' if that parameter is present

]]

local function taxon_get (taxon, no_follow) local taxon_t = {};															-- the return table

if not taxon then return {}; end

_taxon_get (taxon, no_follow, nil, taxon_t);								-- attempts to fill <taxon_t> from Template:Taxonomy/

if next (taxon_t) then														-- if <taxon_t> not empty return taxon_t;															-- return it without suffix end

local taxon_module_name = base_data_table_name .. 'new';					-- first look in the '~/Taxonomy new' data module _taxon_get (taxon, no_follow, taxon_module_name, taxon_t);					-- attempts to fill <taxon_t> from '~/Taxonomy new' data module

if next (taxon_t) then														-- if <taxon_t> not empty return taxon_t, 'new';													-- return it with 'new' suffix end

local suffix = module_select (taxon);										-- get the suffix to append to the base data module name taxon_module_name = base_data_table_name .. suffix;							-- not in '~/Taxonomy new' data module _taxon_get (taxon, no_follow, taxon_module_name, taxon_t);					-- attempts to fill <taxon_t> from '~/Taxonomy ' data module

return taxon_t, suffix; end

--[[--< I S _ S E T >--

Returns true if argument is set; false otherwise. Argument is 'set' when it exists (not nil) or when it is not an empty string.

]]

local function is_set( var ) return not (var == nil or var == ''); end

--[=[-< M A K E _ W I K I L I N K >

Makes a wikilink; when both link and display text is provided, returns a wikilink in the form L|D; if only link is provided, returns a wikilink in the form L; if neither are provided or link is omitted, returns an empty string.

]=]

local function make_wikilink (link, display) if is_set (link) then if is_set (display) then return table.concat ({, display, }); else return table.concat ({, link, }); end else return ''; end end

--[[--< L I N K _ M A K E >

makes a wikilink from the value assigned to 'link' in <taxon_t>

]]

local function link_make (taxon_t, taxon) local link;

if taxon_t.link then local link_label; local link_target; if taxon_t.link:find ('Incertae sedis', 1, true) then link_label = '\'\'incertae sedis\'\''; link_target = 'Incertae sedis'; elseif taxon_t.link:match ('([^|]+)|(.*)') then							-- is link a piped link? link_target, link_label = taxon_t.link:match ('([^|]+)|(.*)'); else link_label = taxon:match ('([^/]+)/(.*)') or taxon;					-- variant of Module:Autotaxobox l.stripExtra(taxon) end

link_target = (link_target and link_target) or link_label;				-- when no <link_target> make it same as <link_label> if is_italic_taxon_t[taxon_t.rank] then link_label = require('Module:TaxonItalics').italicizeTaxonName (link_label, false); end link = make_wikilink (link_target, link_label);

if taxon_t.extinct and (not link:find ('†', 1, true)) then link = ' † ' .. link; end if taxon:match ('/%?$') and not link:find ('?', 1, true) then link = link .. ' (?) '		end end

return link; end

--[[--< _ C R A W L _ T R E E >

experimental function to see if it is possible / makes sense to replace 87k+ taxonomy templates with lua data modules

for use in Module:Autotaxobox/sandbox call this function with make_tables from taxonomyList

fills tree_t: inverted sequence table of taxa and their ranks

]]

local function _crawl_tree (taxon, tree_t) local starting_taxon = taxon;												-- save a copy for error messaging local taxon_t = taxon_get (taxon);											-- initialize

while taxon_t and taxon do		if taxon_t.rank then													-- nil for Taxonomy/Life local styled_rank = anglicize_rank_t[taxon_t.rank]; styled_rank = ((taxon_t.always_display or is_always_displayed_t[taxon_t.rank:lower]) and '\'\'\ .. styled_rank .. '\'\'\) or styled_rank; local linked_taxon = link_make (taxon_t, taxon) local suffix = taxon:match ('%/[%w]+$') or ""                      -- get suffix (e.g. /skip, /plantae) if suffix ~= "" then linked_taxon = linked_taxon .. " " .. suffix .. " ";			end table.insert (tree_t, 1, styled_rank .. ': ' .. linked_taxon ); if taxon:find ('/skip', 1, true) then table.insert (tree_t, 1, string.rep ('&middot;', 5 ) .. ': ' .. string.rep ('&middot;', 5 )); end if taxon_t.parent then taxon = taxon_t.parent;											-- get the next taxon taxon_t = taxon_get (taxon);									-- and get its taxon table

if nil == taxon_t then table.insert (tree_t, 1, ' no path to \'Life\' at ' .. taxon .. ' from ' .. starting_taxon .. ' '); end else table.insert (tree_t, 1, ' no parent for taxon ' .. taxon .. ' from ' .. starting_taxon .. ' '); node = nil;														-- no next taxon end else if not (('Life' == taxon) or ('Veterovata' == taxon) or ('Ichnos' == taxon)) then table.insert (tree_t, 1, ' no rank or same_as for taxon ' .. taxon .. ' from ' .. starting_taxon .. ' '); end

taxon = nil;														-- no next taxon end end

if last_loaded_module then package.loaded[last_loaded_module] = nil;								-- unload to conserve memory end package.loaded[base_data_table_name .. 'new'] = nil; return tree_t end

--[[--< W I K I D A T A _ G E T >--

]]

local TAXON_NAME_P = 'P225'; -- mainsnak.datavalue["type"] = "string", mainsnak.datavalue.value = Felis, mainsnak["property"] = "P225", mainsnak["snaktype"] = "value", local TAXON_RANK_P = 'P105'; -- mainsnak.datavalue.value["entity-type"] = "item", mainsnak.datavalue.value.id = qid, mainsnak["property"] = "P105", mainsnak["snaktype"] = "value", local TAXON_PARENT_P = 'P171'; -- mainsnak.datavalue.value["entity-type"] = "item", mainsnak.datavalue.value.id = qid, mainsnak["property"] = "P171", mainsnak["snaktype"] = "value",

local function wikidata_get (qid, prop) local s_qid; local wd_table_t = mw.wikibase.getBestStatements (qid, prop)[1];			-- attempt to get the taxon name if not wd_table_t then error ('no data for ' .. qid .. ' ' .. prop);							-- some sort of better error handling needed end

if wd_table_t.mainsnak.datavalue then if 'string' == type (wd_table_t.mainsnak.datavalue.value) then return wd_table_t.mainsnak.datavalue.value;							-- return a string value; here for taxon name end

if 'table' == type (wd_table_t.mainsnak.datavalue.value) then s_qid = wd_table_t.mainsnak.datavalue.value.id;						-- extract the qid for taxon rank or parent taxon return mw.wikibase.getLabelByLang (s_qid, 'en'), s_qid;				-- return a string label and s_qid (used for parent); here for taxon rank and parent taxon end else return ' no value '; end end

--[[--< _ C R A W L _ W I K I D A T A _ T R E E >--

this is an experimental function to see if I can figure out how to walk a taxonomy tree in wikidata Wikidata:Project_chat#is_this_possible%3F

Apparently not: Wikidata:Project_chat/Archive/2021/10#is_this_possible?

]]

local function _crawl_wikidata_tree (taxon_qid) local out_t = {};

local taxon; local rank; local _;

while taxon_qid do		taxon = wikidata_get (taxon_qid, TAXON_NAME_P); rank = wikidata_get (taxon_qid, TAXON_RANK_P); _, taxon_qid = wikidata_get (taxon_qid, TAXON_PARENT_P);				-- parent taxon name discarded, reset taxon_qid to the parent taxon's qid

taxon = (is_italic_taxon_t[rank] and '\'\ .. taxon .. '\'\) or taxon;	-- italicize when appropriate

rank = anglicize_rank_t[rank] or rank:gsub ('(%a)', string.upper, 1);	-- anglicize accepted ranks; uppercase first letter for all others rank = (is_always_displayed_t[rank:lower] and '\'\'\ .. rank .. '\'\'\) or rank;	-- and italicize those that should be

table.insert (out_t, 1, rank .. ': ' .. taxon);							-- save at the top of the list end

return table.concat (out_t, ' ');										-- make a big string and done end

--[[--< C R A W L _ W I K I D A T A _ T R E E >

entry point from

]]

local function crawl_wikidata_tree (frame) return _crawl_wikidata_tree (frame.args[1]); end

--[[--< C R A W L _ T R E E >--

]]

local function crawl_tree (frame) local tree_t = {}; local loaded = {}; local taxon = frame.args[1] or 'Felis';

local wikidata_id = mw.wikibase.getEntityIdForTitle (taxon);				-- only works when no disambiguation

tree_t = _crawl_tree (taxon, tree_t)										-- crawl the tree to get the debug taxon list of taxa and their ranks

local count = 0;															-- DEBUG: tally of total modules loaded for module, v in pairs (modules_loaded) do									-- DEBUG: make a sortable list of module names for log count = count + v;		table.insert (loaded, module); end

local function comp (a, b)													-- DEBUG: sort module names for log local letter_a, enum_a = a:match ('(%a)(%d*)$');						-- get letter and enumerator from 'taxonomy ' local letter_b, enum_b = b:match ('(%a)(%d*)$');

enum_a = tonumber (enum_a);												-- convert enumerators to number type if present; nil else enum_b = tonumber (enum_b);

if (letter_a == letter_b) then											-- when letters the same compare enumerators if enum_a and enum_b then											-- both must be enumerated (we don't do 'taxonomy 'P' and 'taxonomy P1') return enum_a < enum_b; end end return letter_a < letter_b;												-- default compare letters end

table.sort (loaded, comp);													-- DEBUG: sort module names for log for _, module in ipairs (loaded) do											-- DEBOG: log module names mw.log (module .. ': ' .. modules_loaded[module]); end mw.log ('total modules loaded: ' .. count);									-- DEBUG: add tally of loaded modules to log

local out_t = {};															-- render crude tree from data modules, from wikidata, and taxonomy list from Module:Autotaxobox for comparison table.insert (out_t, '{| class="wikitable"\n! lua data module experiment !! wikidata experiment !! autotaobox reference\n|-\n|'); table.insert (out_t, table.concat (tree_t, ' '));	table.insert (out_t, '\n| '); table.insert (out_t, _crawl_wikidata_tree (wikidata_id)); table.insert (out_t, '\n| '); table.insert (out_t, frame:callParserFunction ('#invoke', {'Autotaxobox/sandbox', 'taxonomyList', taxon})); table.insert (out_t, '\n|-\n|}');

return table.concat (out_t); end

--[[--< _ M A K E _ T A B L E S >--

experimental function to see if it is possible / makes sense to replace 87k+ taxonomy templates with lua data modules

for use in Module:Autotaxobox/sandbox

fills two tables: taxon_tree_t: equivalent to Module:Autotaxobox taxonTable{} – a sequence table where [1] is but also has ['n']=number of taxa listed rank_tree_t: equivalent to Module:Autotaxobox taxonRankTable{} – a sequence table that matches the taxa in taxonTable{}; for 'Life', Veterovata, and Ichnos, empty string

]]

local function _make_tables (taxon, taxon_tree_t, rank_tree_t) local taxon_t = taxon_get (taxon);											-- initialize

while taxon_t and taxon do		if taxon_t.rank then													-- nil for Taxonomy/Life table.insert (taxon_tree_t, taxon);									-- add taxon name table.insert (rank_tree_t, taxon_t.rank);							-- add taxon rank if taxon_t.parent then taxon = taxon_t.parent;											-- get the next taxon taxon_t = taxon_get (taxon);									-- and get its taxon table end else if ('Life' == taxon) or ('Veterovata' == taxon) or ('Ichnos' == taxon) then table.insert (taxon_tree_t, taxon);								-- add last taxon name table.insert (rank_tree_t, '');									-- last taxon name has no rank so add empty string end

taxon = nil;														-- no next taxon end end

taxon_tree_t.n = #taxon_tree_t;												-- add the number of taxa in this table return taxon_tree_t, rank_tree_t end

--[[--< M A K E _ T A B L E S >

interface function between Module:Autotaxobox taxonomyList and _crawl_tree

]]

local function make_tables (taxon) local taxon_t = {}; local rank_t = {}; local _;

taxon_t, rank_t = _make_tables (taxon, taxon_t, rank_t);					-- crawl the tree to get the taxon list and the rank list; empty table not used here

return taxon_t, rank_t; end

--[[--< D A T A _ T A B L E _ W I K I L I N K _ M A K E >--

is the lua data module suffix A1, A2, Q, etc; nil when creating template wikilinks is the taxon name associated with Template:Taxonomy/ is boolean true when creating wikilink for wikitable caption

]]

local function data_table_wikilink_make (suffix, taxon, caption) local wikilink_t = {};

table.insert (wikilink_t, ' <span style="font-weight: normal; float:right; margin-right:');		-- open styling span tag	if caption then																-- if this wikilink is for the table caption		table.insert (wikilink_t, '.5em">'); if suffix then return '';															-- do not display wikilink/edit link at right of caption for data modules else																	-- here when caption and template so show template edit link only table.insert (wikilink_t, '&#x5B; ');		-- open plainlinks span table.insert (wikilink_t, '[https://en.wikipedia.org/w/index.php?action=edit&title=Template:taxonomy/');	-- begin edit link table.insert (wikilink_t, taxon);									-- add the taxon name table.insert (wikilink_t, '&preload=Template:Taxonomy/preload edit]');	-- close the edit link table.insert (wikilink_t, ' &#x5D; ');					-- close the plainlinks span and the styling span return table.concat (wikilink_t);									-- make a big string and done end else table.insert (wikilink_t, '.04em">')	end

table.insert (wikilink_t, '&#x5B;taxonomy ');								-- first part of the wikilink label		table.insert (wikilink_t, suffix);										-- add as last part of the wikilink label	else																		-- here when creating template wikilink		table.insert (wikilink_t, 'Template:Taxonomy/');		table.insert (wikilink_t, taxon);		table.insert (wikilink_t, '|Taxonomy');									-- the wikilink label for template	end	table.insert (wikilink_t, '');											-- close wikilink

if not suffix then															-- don't add 'edit' link for data modules table.insert (wikilink_t, '; ');						-- open plainlinks span table.insert (wikilink_t, '[https://en.wikipedia.org/w/index.php?action=edit&title=Template:taxonomy/');	-- begin edit link table.insert (wikilink_t, taxon);											-- add the taxon name table.insert (wikilink_t, '&preload=Template:Taxonomy/preload edit]');		-- close the edit link table.insert (wikilink_t, ' ');										-- close the plainlinks span end table.insert (wikilink_t, '&#x5D;');										-- close enclosing brackets --	table.insert (wikilink_t, ']]&#x5D;');										-- close wikilink; close enclosing brackets if caption then																-- if this wikilink is for the table caption -- table.insert (wikilink_t, ' ');									-- close styling span tag end table.insert (wikilink_t, ' '); return table.concat (wikilink_t);											-- make a big string and done end

--[[--< _ T E M P L A T E _ S K E L E T O N >--

creates a Template:Taxonomy/ skeleton for prefilled with data from the appropriate module (when data are available); creates blank skeleton else.

]]

local function _template_skeleton (frame, taxon) if ({['life'] = true, ['veterovata'] = true, ['ichnos'] = true})[taxon:lower] then	-- these are the taxonomy end taxons return ' error: taxon: ' .. taxon .. ' ';		-- error return for these; TODO: is this necessary? end

local taxon_t = {}; local out_t = {}

local taxon_module_name = base_data_table_name .. 'new';					-- first look in the '~/Taxonomy new' data module _taxon_get (taxon, true, taxon_module_name, taxon_t);						-- attempts to fill <taxon_t> from '~/Taxonomy new' data module

if not next (taxon_t) then													-- if <taxon_t> empty then taxon not in 'new' local suffix = module_select (taxon);									-- get the suffix to append to the base data module name taxon_module_name = base_data_table_name .. suffix;						-- not in '~/Taxonomy new' data module _taxon_get (taxon, true, taxon_module_name, taxon_t);					-- attempts to fill <taxon_t> from '~/Taxonomy ' data module end table.insert (out_t, "');	return table.concat ({'<pre style="border:none; background:inherit; margin-top:-0.6em; margin-left:-1em">', table.concat (out_t, '\n'), ' '});

--	return frame:callParserFunction ({name='#tag:syntaxhighlight', args={table.concat (out_t, '\n'), lang='moin'}}); end

--[[--< T E M P L A T E _ S K E L E T O N >

creates a Template:Taxonomy/ skeleton for prefilled with data from the appropriate module (when data are available); creates blank skeleton else.

]]

local function template_skeleton (frame) return _template_skeleton (frame, frame.args[1]); end

--[[--< S H O W _ T A X O N _ D A T A >

similar to the table produced by Template:Taxonomy key

]]

local function show_taxon_data (frame) local this_page = mw.title.getCurrentTitle.prefixedText;

local code_open_tag = ' ');										-- close code tag; include space before lua data table wikilink		table.insert (out_t, data_table_wikilink_make (psuffix, taxon_t.parent));	-- add bracketed wikilink to lua data table for this 	end

table.insert (out_t, '\n|-\n|Rank:\n|'); local rank; if taxon_t.rank then rank = anglicize_rank_t[taxon_t.rank]; local rank_t = {}; table.insert (rank_t, code_open_tag);									-- open code tag table.insert (rank_t, taxon_t.rank);									-- insert raw rank from taxon data table.insert (rank_t, ' [displays as: ');						-- start the message local always_display = taxon_t.always_display or is_always_displayed_t[taxon_t.rank]; if is_italic_taxon_t[taxon_t.rank] then									-- for italicized taxon ranks table.insert (rank_t, (always_display and '\'\'\'\'\) or '\'\);	-- open italic markup; bold if always displayed table.insert (rank_t, rank);										-- add anglicized rank table.insert (rank_t, (always_display and '\'\'\'\'\) or '\'\);-- close italic/bold markup else table.insert (rank_t, (always_display and '\'\'\) or );			-- bold if always displayed table.insert (rank_t, rank);										-- add anglicized rank table.insert (rank_t, (always_display and '\'\'\) or );			-- close italic/bold markup end table.insert (rank_t, ']');												-- finish the message rank = table.concat (rank_t);											-- and make a big string

elseif 'Life' == taxon or 'Veterovata' == taxon or 'Ichnos' == taxon then rank = 'none'; else rank = ' – a rank must be supplied '; end table.insert (out_t, rank);

local link = link_make (taxon_t, taxon);

if link then link = table.concat ({code_open_tag, mw.text.nowiki (taxon_t.link), ' [displays as: ', link, ']'}); end

table.insert (out_t, '\n|-\n|Link:\n|'); table.insert (out_t, (link and link) or '–');

local extinct = (taxon_t.extinct and (code_open_tag .. 'true ')) or 'no'; if not taxon.extinct and parent_t.extinct then extinct = ' parent is marked as extinct '; end

table.insert (out_t, '\n|-\n|Extinct:\n|'); table.insert (out_t, extinct);

table.insert (out_t, '\n|-\n|Always displayed:\n|'); table.insert (out_t, (taxon_t.always_display and (code_open_tag .. 'true ')) or ((is_always_displayed_t[taxon_t.rank] and 'yes (major rank)') or 'no'))

table.insert (out_t, '\n|-\n|Taxonomic references:\n|'); table.insert (out_t, taxon_t.refs or '–');

table.insert (out_t, '\n|-\n|Parent\'s taxonomic references:\n|'); table.insert (out_t, parent_t.refs or '–');

if same_as then table.insert (out_t, '\n|-\n|Same as taxon:\n|'); table.insert (out_t, code_open_tag);									-- open code tag table.insert (out_t, same_as);												-- add same_as taxon name table.insert (out_t, ' ');											-- close code tag; include space before lua data table wikilink table.insert (out_t, data_table_wikilink_make (ssuffix, same_as));		-- add bracketed wikilink to lua data table for this end

if taxon:find ('/skip$') then local skipped_taxon = taxon:match ('([^/]+)/skip$'); table.insert (out_t, '\n|-\n| colspan="2" | For the suffix "/skip", see Skip taxonomy templates. '); table.insert (out_t, 'For the skipped taxa, see ');					-- start the message and open wikilink markup		table.insert (out_t, skipped_taxon);									-- add skipped taxon name without '/skip' suffix		table.insert (out_t, '');												-- close wikimarkup --		table.insert (out_t, 'For the skipped taxa, see  ');										-- close code tag; include space before lua data table wikilink --		table.insert (out_t, data_table_wikilink_make (suffix, skipped_taxon));	-- add bracketed wikilink to lua data table for this or to taxonomy template end

if taxon:find ('/%?$') then table.insert (out_t, '\n|-\n| colspan="2" | For the suffix "/?", see Questionable assignments.'); end

if taxon:find ('Incertae sedis') then table.insert (out_t, '\n|-\n| colspan="2" |'); table.insert (out_t, 'For taxon names with "Incertae sedis", see \'\'Incertae sedis\'\' taxonomy templates.'); end

if suffix then table.insert (out_t, '\n|- style="vertical-align: top;"\n|Template skeleton: data loaded from '); table.insert (out_t, '&#x5B;taxonomy ');										-- first part of the wikilink label		table.insert (out_t, suffix);		table.insert (out_t, '&#x5D;');										-- close enclosing brackets; close wikilink table.insert (out_t, ' to change these data:\n#copy the template skeleton\n#click &#x5B;'); table.insert (out_t, ' [https://en.wikipedia.org/w/index.php?action=edit&title=Template:taxonomy/');	-- begin edit link table.insert (out_t, taxon);											-- add the taxon name table.insert (out_t, '&preload=Template:Taxonomy/preload create] &#x5D;\n#paste, and edit');		-- close the edit link table.insert (out_t, '\n|'); table.insert (out_t, _template_skeleton (frame, taxon)); end

table.insert (out_t, '\n|}');												-- close the wikitable return frame:preprocess (table.concat (out_t)); end

--[[--< D E L E T E _ T A X O N >--

deletes a taxon entry from a taxonomy data module.

Calling this function finds the correct data module, reads it and removes the specified taxon entry. The output a copy of the data module that can be copy/pasted into the data module. Yeah, I know, crude but lua can't write wikitext.

TODO: error checking? What if the taxon isn't found?

]]

local function delete_taxon (frame) local taxon = frame.args[1]; local taxonomy_t = {}; local suffix = module_select (taxon);

local content = mw.title.new (base_data_table_name .. suffix):getContent local found = false;

local out_t = {};

for entry in content:gmatch ('\t*%[\'[^\r\n]+},[\r\n]+') do		local entry_taxon = entry:match ('^\t*%[\'([^=]+)\'%]%s*='); if entry_taxon == taxon then found = true; else table.insert (out_t, entry); end end

if not found then return ' Taxon: ' .. taxon .. ' not found in Module:Sandbox/trappist the monk/taxonomy ' .. suffix .. ''; end table.sort (out_t)

table.insert (out_t, 1, 'return {\n') table.insert (out_t, '\t}')

return 'deleted: ' .. taxon .. '\n\n' .. frame:callParserFunction ({name='#tag:syntaxhighlight', args={table.concat (out_t), lang='lua'}});

end

--[[--< Q I D S _ G E T >--

where can be	no qids – returns a list of links that do not have qids and associated taxon name no links – returns a list of taxons that do not have links - returns a list of links, the associated qid, and associated taxon name

]]

local function qids_get (frame) local select = frame.args[1]; local module_name = 'Module:Sandbox/trappist_the_monk/taxonomy_' .. frame.args[2]; if not pcall (require, module_name) then									-- attempt to load; data will be fetched from package.loaded[module_name] return ' failed to load: Module:Sandbox/trappist_the_monk/taxonomy_' .. frame.args[2] .. ' ';		-- failed to load end

local qids = {}																-- sequence table of links and their qids local no_qids = {};															-- sequence table of links that do not have a qid local no_links = {};														-- sequence table of taxons that do not have links for taxon, taxon_t in pairs (package.loaded[module_name]) do		local link = taxon_t.link;												-- because taxon_t.link (from ms.loadData) is read only if link then link = link:gsub ('(.+)|.*', '%1');									-- remove any link label local qid = mw.wikibase.getEntityIdForTitle (link, 'enwiki');		-- qid referred to by link in module data if qid then table.insert (qids, '*' .. link .. '' .. ': ' .. qid .. ' from ' .. taxon); else table.insert (no_qids, '*' .. link .. '' .. ' from ' .. taxon); end else table.insert (no_links, taxon); end end

package.loaded[module_name]=nil; if 'no qids' == select then table.sort (no_qids); return table.concat (no_qids, '\n'); elseif 'no links' == select then table.sort (no_links); return table.concat (no_links, '\n'); else table.sort (qids); return table.concat (qids, '\n'); end end

--[[--< E X P O R T E D  F U N C T I O N S >--

]]

return { crawl_tree = crawl_tree, _crawl_tree = _crawl_tree, crawl_wikidata_tree = crawl_wikidata_tree,

make_tables = make_tables,													-- interface function between Module:Autotaxobox taxonomyList and _crawl_tree

show_taxon_data = show_taxon_data,											-- similar to the table produced by Template:Taxonomy key taxon_get = taxon_get,

--	taxomap_t = taxomap_t, delete_taxon = delete_taxon, template_skeleton = template_skeleton, qids_get = qids_get, }