Module:Sandbox/trappist the monk/Cat tree

--User:Trappist the monk/Algeria cat require ('strict') local file_list = { 'User:Trappist the monk/Algeria cat', 'User:Trappist the monk/Algeria cat2', 'User:Trappist the monk/Algeria cat3', }

local lang_obj = mw.language.getContentLanguage;								-- for formatting results

local categories_t = {};														-- a k/v table where k is cat name; v is sequence where [1] is number of pages and [2] is repeat_count local containers_t = {};														-- a k/v table of container cat names (no pages or files in cat) and v is repeat count

local algeria_cats_t = {};														-- list of categories that have 'Algeria' or 'Algerian' in the category's name local total_algeria_cats = 0;													-- holds the number of those categories

local total_pages = 0; local total_files = 0; local total_cats = 0; local total_unique_cats = 0;

local total_algerian_pages = 0;													-- holds the number of pages unique to Algeria local total_algerian_files = 0;													-- holds the number of files unique to Algeria

----< M A I N <--

local function main (selector) local input_titles = {}; if selector and file_list[selector] then table.insert (input_titles, file_list[selector]); else input_titles = file_list; end for _, input_title in ipairs (input_titles)	do local title_obj = mw.title.new (input_title); local content = title_obj:getContent;									-- get the content of the user page if not content then return 'error: no content: ' .. input_title; end local pattern = '%s*([^\r\n]+)\226\128\142 %(([^\r\n]+)%)'; for category, parts in content:gmatch (pattern) do						-- loop through the list of categories total_cats = total_cats + 1;										-- tally each category local pages;														-- the number of pages in the category goes here local files; local temp_t = {};													-- make a temp sequence if categories_t[category] then										-- have we seen this category before? categories_t[category][2] = categories_t[category][2] + 1;		-- yes; a repeat so count it; TODO: do something with this? else total_unique_cats = total_unique_cats + 1;						-- first time we've seen this category so tally it				pages = tonumber(parts:match ('(%d+) P')) or 0;					-- if the category has pages, get the number files = tonumber(parts:match ('(%d+) F')) or 0;					-- if the category has files, get the number if 0 ~= pages or 0 ~= files then								-- when there are pages and/or files temp_t[1] = pages + files;									-- total pages and files in this category temp_t[2] = 1;												-- note that we've seen this cat once; TODO: do something with this? categories_t[category] = temp_t;							-- add the pages/files count to the  k/v table if category:find ('[Aa]lgerian?') then						-- if has 'Algeria' or 'Algerian' end total_pages = total_pages + pages;							-- tally total pages total_files = total_files + files;							-- tally total files if category:find ('[Aa]lgerian?') then						-- for specifically 'Algerian' category names total_algerian_pages = total_algerian_pages + pages;	-- tally total 'Algerian' pages total_algerian_files = total_algerian_files + files;	-- tally total 'Algerian' files end end end end end

local max_repeat = 0;														-- holds the count of the most commonly occurring category(ies) in the list --	local max_cat;																-- name of one of those categories local unique_out_t = {};													-- parts of the rendered output go here local algerian_out_t = {}; local algerian_max_repeat = 0;														-- holds the count of the most commonly occurring category(ies) in the list for k, v_t in pairs (categories_t) do		if v_t[2] > max_repeat then max_repeat = v_t[2]; --			max_cat = k;		end table.insert (unique_out_t, k .. ' (' .. v_t[2] .. '×)');		if k:find ('[Aa]lgerian?') then table.insert (algerian_out_t, k .. ' (' .. v_t[2] .. '×)')			total_algeria_cats = total_algeria_cats + 1; if v_t[2] > algerian_max_repeat then algerian_max_repeat = v_t[2]; --			max_cat = k;			end end end

table.sort (unique_out_t); table.sort (algerian_out_t);

return table.concat ({		'number of categories in the raw tree: ' .. lang_obj:formatNum (total_cats),		'number of unique categories: ' .. lang_obj:formatNum (total_unique_cats),		'number of pages in the unique categories: ' .. lang_obj:formatNum (total_pages),		'number of files in the unique categories: ' .. lang_obj:formatNum (total_files),		'number of categories with \'Algeria\' or \'Algerian\' in the name: ' .. lang_obj:formatNum (total_algeria_cats),		'number of pages in \'Algerian\' categories: ' .. lang_obj:formatNum (total_algerian_pages),		'number of files in \'Algerian\' categories: ' .. lang_obj:formatNum (total_algerian_files),		'\nList of unique category names. The adjacent number indicates how many times the category appears in the raw category tree.',		'The most commonly occuring categories appear ' .. max_repeat .. ' times in the raw category tree.  The list is alpha sorted.\n', table.concat (unique_out_t, '\n'), '\nList of unique category names where the text \'Algeria\' or \'Algerian\' appears in the category name.', 'The most commonly occuring categories appear ' .. algerian_max_repeat .. ' times in the raw category tree. The list is alpha sorted.\n', table.concat (algerian_out_t, '\n'), }, '\n'); end

----< E X P O R T S >

return { main = main, }