Module:Str find word/sandbox

-- 2023-04-17 STABLE wrt basics, quotes "" '' * with base sep; working on resltstring & report -- todo: report options, more options -- todo: options count, pattern, out-table, out-htmllist, keepinputordersource require('strict') local p	= {} local mArgs		= require('Module:Arguments') local str		= require('Module:String') local yesno 	= require('Module:Yesno') local tTools 	= require('Module:TableTools') local strDeEnCode = require('Module:DecodeEncode') local iMaxWords	= 12 -- alpha-status, Apr2023. when stable, can be higher local tArgs		= {} local report	= nil -- initinated when explain=T

local function parseReportType( tArgs ) local xpReportTF = false local xpReportType = yesno( tArgs.explain, tArgs.explain ) or false -- to be parsed beyond T/F -- in: nil, false: FALSE type=nil -- in: true, preview: type=true TRUE (dflt: if prev) -- in: doc, testcases: by page TRUE (persistent on those pages) -- in: foo, other: FALSE

xpReportTF = false if yesno( xpReportType, false ) == nil then -- nil, false elseif xpReportType == 'testcases' then xpReportType = 'testcases' xpReportTF = true elseif xpReportType == 'doc' then xpReportType = 'doc' xpReportTF = true elseif xpReportType == true then xpReportType = 'preview' xpReportTF = true else xpReportTF = false -- unk word end tArgs.explain = xpReportTF return xpReportType

end

local function initReport( tArgs ) report = require('Module:Str find word/report') report.xpCheckExplain -- dummy end

local function isPreview -- here or in report? local ifPreview = require('Module:If preview') -- return not ( ifPreview._warning( {'is_preview'} ) == '' ) return ifPreview.main( true, false ) end

-- Turn "&#x0041;" into "A" etc. asap -- and reduce multi-spaces (including nbsp etc.) into single space local function sDecodeTrim( str ) if str == nil then return nil end str = mw.ustring.gsub( strDeEnCode._decode( str ), '%s+', ' ' ) return mw.text.trim( str ) end

-- %-Escape any word (character string) before feeding it into a string pattern function -- will be %-escaped: "([%(%)%.%%%+%-%*%?%[%^%$%]])" = 12 characters .%+-*?[^$] local function escape_word( word ) return str._escapePattern( word ) end

-- remove \' \" outer pair (& rm outer spaces); -- any result (=the inner string) is trimmed by T/F option (case " abc "). local function removeOuterQuotes( s, bTrimAfter )	if s == nil then return nil end

if mw.ustring.match( s, "^%s*\'" ) ~= nil then s = mw.ustring.gsub( s, "^%s*%\'(.*)%\'%s*$", "%1" ) elseif mw.ustring.match( s, '^%s*\"' ) ~= '' then		s = mw.ustring.gsub( mw.text.trim( s ), '^%\"(.*)%\"$', '%1' )	end	if bTrimAfter == true then		s = mw.text.trim( s )	end	return s end

-- separator-in -- todo: check characters '" _ {}; & accept?' local function setSepIn( sSep, sDefaultSep )	if sSep == nil then	return sDecodeTrim( sDefaultSep ) end	-- remove all %w (alphanumeric) and %s (WS)	sSep = mw.ustring.gsub( sDecodeTrim( sSep ), '[%w%s]*',  ) or 	if sSep == '' then		return sDecodeTrim( sDefaultSep )	else		return sSep	end end

-- separator local function setSepOut( sSep, sDefaultSep ) sSep = sDecodeTrim( sSep ) or nil if sSep == nil then return sDefaultSep end sSep = removeOuterQuotes( sSep, false ) if sSep == '' then return sDefaultSep else return sSep end end

-- Check whether a single word is in a table (simple array of words) -- returns hitword or nil; iPosition is helper to keep outlist ordered local function findWordInTable( tSource, word ) ---local bHit = false ---local iPosition = -1 for i, v in ipairs( tSource ) do		if v == word then ---	bHit = true --- del todo ---iPosition = i			return word --- break end end

return nil end

-- Reads and parses a word list and returns a table with words (simple array) -- words list can be: source, andwords-to-check, orwords-to-check -- step 1: basic preparation of the csv wordstring -- step 2: when case-insensitive, turn string into lowercase -- step 3: read (parse) quoted '..' -- step 4: read (parse) quoted ".." -- step 5: read (parse) comma-separated words -- step 6: merge quoted wordlists; keep in order -- step 7: when booleans=T, change boolean words into true/false (module:yesno rules) -- step 8: replace synonyms (by inout "|_nov=November, 11" input) -- step 9: remove duplicates from wordtable (rm latest) -- 		all words returned are trimmed -- return the table (a straight array) local function buildWordTable( sWordlist ) local wordTable = {} local hitWord	= '' local hitCount	= -1 local _ local sPattern local cQ1 = '_Q0027_' -- U+0027 = \' local cQ2 = '_Q0022_' -- U+0022 = \" local tQ1hits	= {} -- Q1-hits, reused to restore order local tQ2hits	= {} -- Q2-hits, reused to restore order local sMsg = '' -- xpmessage only local xpHasQuotes = false

-- Step 1: prepare sWordList sDecodeTrim( sWordlist ) if sWordlist == '' or sWordlist == nil then return wordTable end sWordlist = tArgs.sep .. sWordlist .. tArgs.sep -- test. dev only: xpHasQuotes = mw.ustring.match( sWordlist, '[\"\']' ) ~= '' -- unused	if xpHasQuotes then 		--- report.xpMessage( 'xpHasQuotes [unused]: ' .. tostring( xpHasQuotes ) )	end

-- Step 2: case sensitive if yesno( tArgs.case, true ) == false then sWordlist = string.lower( sWordlist ) end

-- Step 3: Q1 read quotes (single quotes '..') sPattern = '%f[^' .. tArgs.sep_pattern .. ']%s*%b\'\'%s*%f[' .. tArgs.sep_pattern .. ']'	-- initial: hitWord = sDecodeTrim( mw.ustring.match( sWordlist, sPattern ) ) or '' while hitWord ~= '' do		--- now into function/ to check if both \' and \" are not mixed		---	hitWord = sDecodeTrim( mw.ustring.gsub( hitWord, "^%\'(.+)%\'$", "%1" ) ) -- remove outer Qs \" hitWord = removeOuterQuotes( hitWord, true ) table.insert( tQ1hits, hitWord ) sWordlist = mw.ustring.gsub( sWordlist, sPattern, cQ1, 1 ) -- removes current 1st hit; replace with code

-- next hitWord = sDecodeTrim( mw.ustring.match( sWordlist, sPattern ) ) or '' end ---	report.xpMessage( 'sWL1: ' .. sWordlist ) ---	report.xpMessage( 'Qhits: ' .. table.concat( tQ1hits, '; ' ) ) -- Step 4: Q2 read quotes (double quotes "..") sPattern = '%f[^' .. tArgs.sep_pattern .. ']%s*%b\"\"%s*%f[' .. tArgs.sep_pattern .. ']'	-- initial search hitWord = sDecodeTrim( mw.ustring.match( sWordlist, sPattern ) ) or '' while hitWord ~=  do		--- hitWord = sDecodeTrim( mw.ustring.gsub( hitWord, '^%\"(.+)%\"$', '%1' ) ) -- remove outer Qs \"		hitWord = removeOuterQuotes( hitWord, true )		table.insert( tQ2hits, hitWord )		sWordlist = mw.ustring.gsub( sWordlist, sPattern, cQ2, 1 ) -- removes current '1st' hit; replace with code		-- next		hitWord = sDecodeTrim( mw.ustring.match( sWordlist, sPattern ) ) or 	end	---report.xpMessage( 'sWL2:' .. sWordlist )	---report.xpMessage( 'Qhits: ' .. table.concat( tQ2hits, '; ' ) )

-- Step 5: parse plain sep-delimited words sPattern = '%f[^' .. tArgs.sep_pattern .. '][^' .. tArgs.sep_pattern .. ']+%f[' .. tArgs.sep_pattern .. ']'	hitCount = 0 while hitCount < iMaxWords do		hitWord = sDecodeTrim( str._match( sWordlist, sPattern, 1, hitCount + 1, false, tArgs.sep ) ) or '' if hitWord == sDecodeTrim(tArgs.sep) then -- no more words found in the string break elseif hitWord ~= '' then hitCount = hitCount + 1 table.insert( wordTable, hitWord ) else -- blank word, to skip (note: but blank quotes as in .., " ", ..are kept = blank dcell '') hitCount = hitCount + 1 end end if hitCount >= iMaxWords then report.xpMessage( 'ERR701 wordcount ' .. hitCount .. ' > maxwords' .. iMaxWords ) end

-- Step 6: merge quoted words & wordtable, keep order for iQ, sQW in ipairs( tQ1hits ) do		for iW, sW in ipairs( wordTable ) do			if sW == cQ1 then wordTable[iW] = sQW break end end end for iQ, sQW in ipairs( tQ2hits ) do		for iW, sW in ipairs( wordTable ) do			if sW == cQ2 then wordTable[iW] = sQW break end end end

-- Step 7: when read as booleans, converse words to true/false if tArgs.booleans then local sBool for i, v in ipairs( wordTable ) do			sBool = yesno( v ) if sBool ~= nil then wordTable[i] = tostring( sBool ) end end end

-- Step 8: replace synonyms if #tArgs['synonymsTables'] >= 1 then for aka1, tAkas in pairs ( tArgs['synonymsTables'] ) do			for iW, w in ipairs( wordTable ) do if findWordInTable( tAkas, w ) then -- todo must be ... ~= nil ??? 26-3					wordTable[iW] = aka1 end end end end

if true then wordTable = tTools.removeDuplicates( wordTable ) else -- lol works but not needed, use ttools -- Step 9: remove duplicates from list local iR, iK -- iR = reader, iK = killer local hit = false iR = 1 while iR < #wordTable do		iK = #wordTable -- will be counting downwards while iK > iR do			if wordTable[iK] == wordTable[iR] then hit	= true sMsg = sMsg .. '=syn=' .. wordTable[iK] table.remove( wordTable, iK ) tTools.compressSparseArray( wordTable ) end iK = iK - 1 end tTools.compressSparseArray( wordTable ) iR = iR + 1 end end

return wordTable end

-- AND-logic with ANDwords words: ALL words must be found -- returns {T/F, hittable} -- 		T when *all* AND words are found -- 		hittable with all hit words -- note 1: when F, the hittable still contains the words that were found -- note 2: empty AND-wordlist => True by logic (because: not falsified) local function checkANDwords( tWorkf ) local bANDchk	= true -- main conclusion local result1 = nil -- per word hit local tHits	= {} -- hit table ---local iPos	= -1 -- helper info just to keep in order

if #tWorkf.ANDwords > 0 then bANDchk = true for i, word in ipairs( tWorkf.ANDwords ) do			result1 = findWordInTable( tWorkf.SOURCEwords, word ) or nil if result1 == nil then bANDchk = false -- Falsified! -- We could break now logically, but we continue to complete the hit table (feature) -- bAND remains false till & at end of loop else table.insert( tHits, result1 ) end end else bANDchk = true -- not falsified end tTools.compressSparseArray( tHits ) return bANDchk, tHits end

-- OR-logic with tORwords words: at least one word must be found -- returns {T/F, hittable} -- 		True when at least one OR word is found -- 		hittable has all hit words -- note 1: empty OR-wordlist => True by logic (because: not falsified) -- note 2: while just one hitword is a True result, the hittable contains all words found local function checkORwords( tWork ) local result1 local bORchk local tHits

bORchk = false tHits = {} result1 = nil if #tWork.ORwords > 0 then for i, word in ipairs( tWork.ORwords ) do			result1 = findWordInTable( tWork.SOURCEwords, word ) or nil if result1 == nil then -- this one is false; bOR unchanged; do next else bORchk = true -- Confirmed! table.insert( tHits, result1 ) -- could break here logically, but complete the check; bOR will not be set to False end end else bORchk = true end tTools.compressSparseArray( tHits ) return bORchk, tHits end

-- Determine the requested return value (a string) -- sRESULTstring is the _main return value (logically defined value) -- this function applies tArgs.out_true / tArgs.out_false return value -- note: out_true='' implies: blank return value -- note: no parameter out_true= (that is, out_true=nil) implies: by default, return the sRESULTstring --- todo add pref, suff local function yesnoReturnstring( tResults ) if tResults.resultALL == false then -- result False return tArgs.out_false or '' else -- result True if tArgs.out_true == nil then return table.concat( tResults.tTRUE, tArgs.out_sep ) else -- some |out-true= value is entered, could be '' return '_out-true' .. tArgs.out_true end end end

local function tCombinedSourceorderedTRUEtables( tResult ) local tOut = {} if tResult.tANDhits == nil then tOut = tResult.tORhits elseif tResult.tORhits == nil then tOut = tResult.tANDhits else tOut = tResult.tANDhits for i, v in ipairs( tResult.tORhits ) do			table.insert( tOut, i, v ) end end if tOut == nil then report.xpMessage( 'ERR921 BUG tOut is nil??? - tCombinedSourceorderedTRUEtables' ) end return tOut -- unsorted; never nil end

local function concatAndLists( s1, s2 ) local tLists = {} -- args in: both s1 and s2 to concat table.insert( tLists, s1 ) table.insert( tLists, s2 ) return table.concat( tLists, tArgs.sep ) end

-- ===== ===== ===== ===== ===== ===== ===== ===== ===== -- PARSE arguments local function parseArgs( origArgs ) local tNewArgs = {} local tDefault	= {} tDefault['sep']			= ',' tDefault['case']		= false tDefault['booleans']	= false tDefault['out_sep']		= ', '

tNewArgs.sep			= setSepIn( origArgs['sep'], tDefault['sep'] ) tNewArgs.sep_pattern	= escape_word( tNewArgs.sep ) tNewArgs.out_sep		= setSepOut( origArgs['out-sep'] or origArgs['sep'], tDefault['out_sep'] ) tNewArgs.case			= yesno( origArgs['case'] or origArgs['casesensitive'] ) or tDefault['case'] tNewArgs.booleans		= yesno( origArgs['bool'] or origArgs['booleans'] ) or tDefault['booleans'] tNewArgs.out_true		= sDecodeTrim( origArgs.out_true ) or nil -- nil =default so return sRESULTstring; keep '' as legal input & return value tNewArgs.out_false		= sDecodeTrim( origArgs.out_false ) or '' tNewArgs.prefix			= sDecodeTrim( origArgs.prefix or origArgs.p ) or '' tNewArgs.suffix			= sDecodeTrim( origArgs.suffix or origArgs.s ) or '' tNewArgs.out_format		= 'default' -- todo: table, default, htmllisttype, flatlidt, first, tNewArgs.explain		= false -- TEST17Apr origArgs.explain tNewArgs.explain_type	= parseReportType( tNewArgs ) or nil tNewArgs.test			= origArgs.test

-- the wordlists: tNewArgs['source']		= origArgs['source'] or origArgs['s'] or '' tNewArgs['sANDlist']	= concatAndLists( 								origArgs['word'] or origArgs['w'] or nil,								origArgs['andwords'] or origArgs['andw'] or nil ) tNewArgs['sORlist']		= origArgs['orwords'] or origArgs['orw'] or ''

tNewArgs['synonyms']		= {} tNewArgs['synonymsTables']	= {} -- to be populated later for k, v in pairs( origArgs ) do		if str._match( k, '^_%S', 1, 1, false, false ) then local syn1 syn1 = mw.ustring.gsub( k, '^_', '', 1 ) table.insert( tNewArgs['synonyms'], syn1 ) tNewArgs['synonyms'][syn1] = v		end end

if tNewArgs.explain == true then initReport( tNewArgs.explain ) report.xpMessage( 'EXPLAIN: ' .. origArgs.explain .. '=>' .. tNewArgs.explain_type or 'unk') report.xpReportSynonyms( tNewArgs ) end

if false then for aka1, sAkalist in pairs ( tNewArgs['synonyms'] ) do report.xpMessage( 'SYNONYMS: ' .. aka1 .. '=' .. sAkalist ) end end

return tNewArgs end

-- ===== ===== ===== ===== ===== ===== ===== ===== ===== ===== ===== ===== ===== -- _main function: check for presence of words in source string -- Checks and returns: -- 		when T: the string of all hitwords ( default ), or the |yes=... input -- 		when F: empty string '' ( default ), or the |no=... input -- steps: -- 1. input word strings are prepared ( parsed into an array of words ) -- 2. words checks are made ( applying AND-logic, OR-logic ) -- 3. final conclusion drawn ( T/F ) -- 4. optionally, the preview report is prepared ( debug, feedback ) -- 5. based on T or F status, the return value ( string ) is established and returned -- note 1: each return value ( yes=.., no=.. ) can be '' ( nullstring ) function p._main( origArgs ) local tWork = {} local tResults = {}

tArgs = parseArgs( origArgs ) -- make synonyms into tables -- 'aka1' = target synonym (= the synonym that remains) for aka1, sAkalist in pairs( tArgs['synonyms'] ) do		tArgs['synonymsTables'][aka1] = buildWordTable( tArgs['synonyms'][aka1] ) end

-- build the worktables tWork['SOURCEwords']	= buildWordTable( tArgs.source ) tWork['ANDwords']		= buildWordTable( tArgs.sANDlist ) tWork['ORwords']		= buildWordTable( tArgs.sORlist )

-- apply logic & conclude tResults.resultALL = nil -- best be set explicitly if ( #tWork.SOURCEwords == 0 ) or ( #tWork.ANDwords + #tWork.ORwords == 0 ) then -- No words to check tResults.resultALL = false if yesno( tArgs.explain, true ) then report.xpMessage( 'ERR201 No words to check' ) end else tResults['bAND'], tResults['tANDhits']	= checkANDwords( tWork ) tResults['bOR'], tResults['tORhits']	= checkORwords( tWork ) tResults.resultALL = ( tResults.bAND ) and ( tResults.bOR ) end

tResults.sRESULTstring = 'notinit' if tResults.resultALL == true then tResults.tTRUE = tCombinedSourceorderedTRUEtables( tResults ) or {} end tResults.sRESULTstring = yesnoReturnstring( tResults )

local sReport = '' if tArgs.explain then sReport = 'xp endfinal Report here L485' --sReport = report.xpPresent( tArgs, tWork, tResults ) end local test = 'Tunk' test = tArgs.test or '_unk'

if tArgs.explain then test = tostring(tArgs.explain) else test = 'not' end return string.upper( tostring( tResults.resultALL ) ) .. tResults.sRESULTstring end

function p.main( frame ) local origArgs	= mArgs.getArgs( frame )

return p._main( origArgs ) end

return p