Module:Sandbox/genewiki/geneboxdev

--This code is originally from https://en.wikipedia.org/wiki/Module:Sandbox/RexxS/AA and copied here (9/17/2015) for modifications and reusablity -- Testing for arbitrary access -- Intended to be: -- Use : -- E.g.: - to fetch value of 'spouse' (P26) from 'Richard Burton' (Q151973) -- While in sandbox: -- Use : -- E.g.: - to fetch value of 'spouse' (P26) from 'Richard Burton' (Q151973) -- Added Extra parameters for chosing rank (preferred, normal, truth, depreciated and random value (just first in list) -- E.g.:

local p = {}

-- This is used to get a value, or a comma separated list of them if multiple values exist

p.getValueFromID = function(frame) local itemID = mw.text.trim(frame.args[1] or "") local propertyID = mw.text.trim(frame.args[2] or "") local input_parm = mw.text.trim(frame.args[3] or "") local input_rank = mw.text.trim(frame.args[4] or "") local random = mw.text.trim(frame.args[5] or "")

if input_parm == "FETCH_WIKIDATA" then local entity = mw.wikibase.getEntity(itemID) local claims = entity.claims[propertyID] if claims then -- if wiki-linked value output as link if possible if (claims[1] and claims[1].mainsnak.snaktype == "value" and claims[1].mainsnak.datavalue.type == "wikibase-entityid" ) then local out = {} for k, v in pairs(claims) do local sitelink = mw.wikibase.sitelink("Q" .. v.mainsnak.datavalue.value["numeric-id"]) local label = mw.wikibase.label("Q" .. v.mainsnak.datavalue.value["numeric-id"]) if label == nil then label = "Q" .. v.mainsnak.datavalue.value["numeric-id"] end if sitelink then out[#out + 1] = "" .. label .. "" else --out[#out + 1] = "" .. label .. "[*] " out[#out + 1] = "" .. label .. "" end end return table.concat(out, ", ") else local results if input_rank == "" then results = entity:formatPropertyValues(propertyID, mw.wikibase.entity.claimRanks).value else results = entity:formatPropertyValues(propertyID, mw.wikibase.entity.claimRanks.input_rank).value end if random == "" then return results else local results_split = mw.text.split(results, ",") return results_split[1] end end else return "" end else return input_parm end end

-- A function to return the QID of a property value, rather than its text label -- May be useful for constructing chains of calls to get properties of properties, etc. -- It returns the QID of only the first property value if more than one -- Use like this: -- That will fetch the QID of the first value for the spouse (P26) of Richard Burton (Q151973) -- Returns an empty string if the value doesn't exist or has no QID.

p.getQIDFromID = function(frame) local itemID = mw.text.trim(frame.args[1] or "") local propertyID = mw.text.trim(frame.args[2] or "") local input_parm = mw.text.trim(frame.args[3] or "")

local entity = mw.wikibase.getEntity(itemID) local claims = entity.claims[propertyID] if claims then -- if wiki-linked value return the QID of the first value of the property if (claims[1] and claims[1].mainsnak.snaktype == "value" and claims[1].mainsnak.datavalue.type == "wikibase-entityid") then return "Q" .. claims[1].mainsnak.datavalue.value["numeric-id"] else return "" end else return "" end end

p.getRawValueFromID = function(frame) local itemID = mw.text.trim(frame.args[1] or "") local propertyID = mw.text.trim(frame.args[2] or "") local input_parm = mw.text.trim(frame.args[3] or "") if input_parm == "FETCH_WIKIDATA" then local entity = mw.wikibase.getEntity(itemID) local claims = entity.claims[propertyID] if claims then local result = entity:formatPropertyValues(propertyID, mw.wikibase.entity.claimRanks).value -- if number type: remove thousand separators if (claims[1] and claims[1].mainsnak.snaktype == "value" and claims[1].mainsnak.datavalue.type == "quantity") then result = mw.ustring.gsub(result, "(%d),(%d)", "%1%2") end return result else return "" end else return input_parm end end

--This could be expanded into more levels taking a list in. But the deeper one abstracts the more likely one level won't return a value and error checking could be tedious. -- p.getQIDFromID_two_levels = function(frame) local parent_itemID = mw.text.trim(frame.args[1] or "") ---first Qid local parent_propertyID = mw.text.trim(frame.args[2] or "") local child_propertyID = mw.text.trim(frame.args[3] or "") local input_parm = mw.text.trim(frame.args[4] or "")

local entity_parent = mw.wikibase.getEntity(parent_itemID) local claims_parent = entity_parent.claims[parent_propertyID] if claims_parent then -- if wiki-linked value return the QID of the first value of the property if (claims_parent[1] and claims_parent[1].mainsnak.snaktype == "value" and claims_parent[1].mainsnak.datavalue.type == "wikibase-entityid") then local child_itemID = "Q" .. claims_parent[1].mainsnak.datavalue.value["numeric-id"] local entity_child = mw.wikibase.getEntity(child_itemID) local claims_child = entity_child.claims[child_propertyID] --wiki-linked value of the second QID if (claims_child[1] and claims_child[1].mainsnak.snaktype == "value" and claims_child[1].mainsnak.datavalue.type == "wikibase-entityid") then return "Q"..claims_child[1].mainsnak.datavalue.value["numeric-id"]

else return "" end else return "" end else return "" end end

p.getQualifierID = function(frame) local propertyID = mw.text.trim(frame.args[1] or "") local qualifierID = mw.text.trim(frame.args[2] or "") local input_parm = mw.text.trim(frame.args[3] or "") local itemID = mw.text.trim(frame.args[4] or "") --if direct link from wikidata item if input_parm == "FETCH_WIKIDATA" then local entity = "" if itemID ~= "" then entity = mw.wikibase.getEntityObject(itemID) else entity = mw.wikibase.getEntityObject end if entity.claims[propertyID] ~= nil then local out = {} for k, v in pairs(entity.claims[propertyID]) do				for k2, v2 in pairs(v.qualifiers[qualifierID]) do					if v2.snaktype == 'value' then out[#out + 1] = "Q" .. v2.datavalue.value["numeric-id"] end end end return table.concat(out, ", ") else return "" end else return input_parm end end

p.getAliasFromGenomeAssembly_hs = function(frame) -- will contain the numeric value for the requested coordinate local output = "" local sep = " " -- can only be P644 (genomic start) or P645 (genomic end) for this to work -- should probably try to catch that. Might also increase legibility to use specific variable names when possible local propertyID = mw.text.trim(frame.args[1] or "") -- this can really only be P659 right now. I'm not sure of the value of including it as a parameter as other values will likely break this function local qualifierID = mw.text.trim(frame.args[2] or "") -- Why do we include this here? What should happen if FETCH_WIKIDATA is not included? local input_parm = mw.text.trim(frame.args[3] or "") -- this can needs to be fed to the function either by a call to or by setting it directly (e.g. if the function was applied on a page other than the targeted gene) --alert if this id is not a valid thing in wikidata, a Lua error will occur that says --The ID entered is unknown to the system. Please use a valid entity ID. local itemID = mw.text.trim(frame.args[4] or "") -- will track the different builds pulled from the qualifiers local newest_build = "0" -- starts the process local entity = mw.wikibase.getEntityObject(itemID) local claims --gets a table of claims on the (genomic start or end) property Q19847637 if entity and entity.claims then claims = entity.claims[propertyID] end --will return nothing if no claims are found if claims then --checking to be sure claims is populated, not sure it its needed if (claims[1] ) then --useful for debugging --local out = {} --pulls the genome location from the claim for k, v in pairs(claims) do				local quals = v.qualifiers.P659 --if there are any if quals then for qk, qv in pairs(quals) do						local qual_obj_id = "Q"..qv.datavalue.value["numeric-id"] --get to the entity targeted by the qualifier property. Genome builds are Items in wikidata local qual_obj = mw.wikibase.getEntityObject(qual_obj_id) local alias = "" --this uses the aliases to pull out version numbers --seems like there ought to be a better way to do this, but likely would need to change the data added by the bot if qual_obj["aliases"] ~= nil then local test = qual_obj["aliases"]["en"] for key, value in ipairs(test) do								if string.match(value['value'], '^hg') then alias = value['value'] local build_no = alias:gsub("hg","") --report only the most location associated with the most recent build --if there is more than one location per build, just give one back as that is not our problem right now. if build_no > newest_build then newest_build = build_no end end end end end --in case there are no qualifiers, but there is a location, might as well return it				else output = location end end return "hg"..newest_build else return "" end else return "" --debug --"no claims for "..itemID.." prop "..propertyID end end

--in future could just combine this with getChromosomeLoc once all in once code p.getAliasFromGenomeAssembly_mm = function(frame) -- will contain the numeric value for the requested coordinate local output = "" local sep = " " -- can only be P644 (genomic start) or P645 (genomic end) for this to work -- should probably try to catch that. Might also increase legibility to use specific variable names when possible local propertyID = mw.text.trim(frame.args[1] or "") -- this can really only be P659 right now. I'm not sure of the value of including it as a parameter as other values will likely break this function local qualifierID = mw.text.trim(frame.args[2] or "") -- Why do we include this here? What should happen if FETCH_WIKIDATA is not included? local input_parm = mw.text.trim(frame.args[3] or "") -- this can needs to be fed to the function either by a call to or by setting it directly (e.g. if the function was applied on a page other than the targeted gene) --alert if this id is not a valid thing in wikidata, a Lua error will occur that says --The ID entered is unknown to the system. Please use a valid entity ID. local itemID = mw.text.trim(frame.args[4] or "") -- will track the different builds pulled from the qualifiers local newest_build = "0" -- starts the process local mouse_propertyID = "P684" --get the QID for the mouse gene local mouse_itemID --use itemID (QID) to get the mouse QID to return start or end location (propertyID) local entity_gene = mw.wikibase.getEntity(itemID) local claims_gene = entity_gene.claims[mouse_propertyID] if claims_gene then local claims_mouse if (claims_gene[1] and claims_gene[1].mainsnak.snaktype == "value" and claims_gene[1].mainsnak.datavalue.type == "wikibase-entityid") then mouse_itemID = "Q" .. claims_gene[1].mainsnak.datavalue.value["numeric-id"] local entity_mouse = mw.wikibase.getEntity(mouse_itemID) claims_mouse = entity_mouse.claims[propertyID] end --will return nothing if no claims are found if claims_mouse then --checking to be sure claims is populated, not sure it its needed if (claims_mouse[1] ) then --useful for debugging --local out = {} --pulls the genome location from the claim for k, v in pairs(claims_mouse) do					local quals = v.qualifiers.P659 --if there are any if quals then for qk, qv in pairs(quals) do							local qual_obj_id = "Q"..qv.datavalue.value["numeric-id"] --get to the entity targeted by the qualifier property. Genome builds are Items in wikidata local qual_obj = mw.wikibase.getEntityObject(qual_obj_id) local alias = "" --this uses the aliases to pull out version numbers --seems like there ought to be a better way to do this, but likely would need to change the data added by the bot if qual_obj["aliases"] ~= nil then local test = qual_obj["aliases"]["en"] for key, value in ipairs(test) do									if string.match(value['value'], '^mm') then alias = value['value'] local build_no = alias:gsub("mm","") --report only the most location associated with the most recent build --if there is more than one location per build, just give one back as that is not our problem right now. if build_no > newest_build then newest_build = build_no end end end end end --in case there are no qualifiers, but there is a location, might as well return it					else end end return "mm"..newest_build else return "" end else return "" --debug --"no claims for "..itemID.." prop "..propertyID end end end

---getChromosomeLoc ---input propertyID ie(Genomic start) P644 ---     qualifierID (ie GenLoc Assembly) P659 ---     input_parm (ie FETCH_WIKIDATA) ---     ---output preferred chromosome location start value in this case it would be 49893092

--for debug window -- Q14865053 --frame = mw.getCurrentFrame --frame.args = {"P644","P659","FETCH_WIKIDATA","Q14865053"} --print(p.getChromosomeLoc(frame))

p.getChromosomeLoc = function(frame) -- will contain the numeric value for the requested coordinate local output = "" local sep = " " -- can only be P644 (genomic start) or P645 (genomic end) for this to work -- should probably try to catch that. Might also increase legibility to use specific variable names when possible local propertyID = mw.text.trim(frame.args[1] or "") -- this can really only be P659 right now. I'm not sure of the value of including it as a parameter as other values will likely break this function local qualifierID = mw.text.trim(frame.args[2] or "") -- Why do we include this here? What should happen if FETCH_WIKIDATA is not included? local input_parm = mw.text.trim(frame.args[3] or "") -- this can needs to be fed to the function either by a call to or by setting it directly (e.g. if the function was applied on a page other than the targeted gene) --alert if this id is not a valid thing in wikidata, a Lua error will occur that says --The ID entered is unknown to the system. Please use a valid entity ID. local itemID = mw.text.trim(frame.args[4] or "") -- will track the different builds pulled from the qualifiers local newest_build = "0" -- starts the process local entity = mw.wikibase.getEntityObject(itemID) local claims --gets a table of claims on the (genomic start or end) property Q19847637 if entity and entity.claims then claims = entity.claims[propertyID] end --will return nothing if no claims are found if claims then --checking to be sure claims is populated, not sure it its needed if (claims[1] ) then --useful for debugging --local out = {} --pulls the genome location from the claim for k, v in pairs(claims) do				local location = v.mainsnak.datavalue.value --debugging --out[#out + 1] = k.." location:" .. location.. " || " 				--gets the qualifiers linked to the current claim local quals = v.qualifiers.P659 --if there are any if quals then for qk, qv in pairs(quals) do						local qual_obj_id = "Q"..qv.datavalue.value["numeric-id"] --get to the entity targeted by the qualifier property. Genome builds are Items in wikidata local qual_obj = mw.wikibase.getEntityObject(qual_obj_id) local alias = "" --this uses the aliases to pull out version numbers --seems like there ought to be a better way to do this, but likely would need to change the data added by the bot if qual_obj["aliases"] ~= nil then local test = qual_obj["aliases"]["en"] for key, value in ipairs(test) do								if string.match(value['value'], '^hg') then alias = value['value'] local build_no = alias:gsub("hg","") --report only the most location associated with the most recent build --if there is more than one location per build, just give one back as that is not our problem right now. if build_no > newest_build then output = location newest_build = build_no end end end end end --in case there are no qualifiers, but there is a location, might as well return it				else output = location end end return output else return "" end else return "" --debug --"no claims for "..itemID.." prop "..propertyID end end

p.getChromosomeLoc_mm = function(frame) -- will contain the numeric value for the requested coordinate local output = "" local sep = " " -- can only be P644 (genomic start) or P645 (genomic end) for this to work -- should probably try to catch that. Might also increase legibility to use specific variable names when possible local propertyID = mw.text.trim(frame.args[1] or "") -- this can really only be P659 right now. I'm not sure of the value of including it as a parameter as other values will likely break this function local qualifierID = mw.text.trim(frame.args[2] or "") -- Why do we include this here? What should happen if FETCH_WIKIDATA is not included? local input_parm = mw.text.trim(frame.args[3] or "") -- this can needs to be fed to the function either by a call to or by setting it directly (e.g. if the function was applied on a page other than the targeted gene) --alert if this id is not a valid thing in wikidata, a Lua error will occur that says --The ID entered is unknown to the system. Please use a valid entity ID. local itemID = mw.text.trim(frame.args[4] or "") -- will track the different builds pulled from the qualifiers local newest_build = "0" -- starts the process local mouse_propertyID = "P684" --get the QID for the mouse gene local mouse_itemID --use itemID (QID) to get the mouse QID to return start or end location (propertyID) local entity_gene = mw.wikibase.getEntity(itemID) local claims_gene = entity_gene.claims[mouse_propertyID] if claims_gene then local claims_mouse if (claims_gene[1] and claims_gene[1].mainsnak.snaktype == "value" and claims_gene[1].mainsnak.datavalue.type == "wikibase-entityid") then mouse_itemID = "Q" .. claims_gene[1].mainsnak.datavalue.value["numeric-id"] local entity_mouse = mw.wikibase.getEntity(mouse_itemID) claims_mouse = entity_mouse.claims[propertyID] end --will return nothing if no claims are found if claims_mouse then --checking to be sure claims is populated, not sure it its needed if (claims_mouse[1] ) then --useful for debugging --local out = {} --pulls the genome location from the claim for k, v in pairs(claims_mouse) do					local location = v.mainsnak.datavalue.value --debugging --out[#out + 1] = k.." location:" .. location.. " || " 					--gets the qualifiers linked to the current claim local quals = v.qualifiers.P659 --if there are any if quals then for qk, qv in pairs(quals) do							local qual_obj_id = "Q"..qv.datavalue.value["numeric-id"] --get to the entity targeted by the qualifier property. Genome builds are Items in wikidata local qual_obj = mw.wikibase.getEntityObject(qual_obj_id) local alias = "" --this uses the aliases to pull out version numbers --seems like there ought to be a better way to do this, but likely would need to change the data added by the bot if qual_obj["aliases"] ~= nil then local test = qual_obj["aliases"]["en"] for key, value in ipairs(test) do									if string.match(value['value'], '^mm') then alias = value['value'] local build_no = alias:gsub("mm","") --report only the most location associated with the most recent build --if there is more than one location per build, just give one back as that is not our problem right now. if build_no > newest_build then output = location newest_build = build_no end end end end end --in case there are no qualifiers, but there is a location, might as well return it					else output = location end end return output else return "" end else return "" --debug --"no claims for "..itemID.." prop "..propertyID end end end

--eg:

--This function is used to generate a list of aliases --To Do: exclude gene symbol that is already displayed function p.get_aliases(frame) entity = mw.wikibase.getEntityObject(frame.args['from']) a = '' if entity['aliases'] ~= nil then test = entity['aliases']['en'] for key, value in ipairs(test) do a = a .. ', ' .. value['value'] end return a	end end

p.trimChromosome = function(frame) local string_to_trim = mw.text.trim(frame.args[1] or "") local out = ''; if string.find(string_to_trim, 'chromosome.') then out = string_to_trim:gsub("chromosome ", "") end if string.find(string_to_trim, 'mouse.chromosome.') then out = string_to_trim:gsub("mouse chromosome ", "") end return out end

--EG: p.getGO = function(frame) local itemID_parent = mw.text.trim(frame.args[1] or "") local propertyID_parent = mw.text.trim(frame.args[2] or "") -- ie molecular, cellular, function local propertyID_child = mw.text.trim(frame.args[3] or "") -- Gene Ontology ID	local input_parm = mw.text.trim(frame.args[4] or "") if input_parm == "FETCH_WIKIDATA" then local entity = mw.wikibase.getEntity(itemID_parent) local claims = entity.claims[propertyID_parent] if claims then if (claims[1] and claims[1].mainsnak.snaktype == "value" and claims[1].mainsnak.datavalue.type == "wikibase-entityid") then local out = {} for k, v in pairs(claims) do local itemID_child = "Q" .. v.mainsnak.datavalue.value["numeric-id"] local entity = mw.wikibase.getEntityObject(itemID_child) local claims local result_GOID = '' if entity then claims = entity.claims[propertyID_child] end if claims then result_GOID = entity:formatPropertyValues(propertyID_child, mw.wikibase.entity.claimRanks).value else return "" end local sitelink = "http://amigo.geneontology.org/amigo/term/GO:" local label = mw.wikibase.label("Q" .. v.mainsnak.datavalue.value["numeric-id"]) if label == nil then label = "Q" .. v.mainsnak.datavalue.value["numeric-id"] end if sitelink then ---out[#out + 1] = "*[" .. sitelink .. "|" .. label .."]\n" out[#out + 1] = "*[" .. sitelink .. result_GOID .. " " .. label .."]\n" else out[#out + 1] = "*" .. label .. "\n" end end return table.concat(out, "") else return entity:formatPropertyValues(propertyID, mw.wikibase.entity.claimRanks).value end else return "" end else return input_parm end end

p.getPDB = function(frame) local itemID = mw.text.trim(frame.args[1] or "") local propertyID = mw.text.trim(frame.args[2] or "") local input_parm = mw.text.trim(frame.args[3] or "") if input_parm == "FETCH_WIKIDATA" then local entity = mw.wikibase.getEntity(itemID) local claims = entity.claims[propertyID] local sitelink = "http://www.rcsb.org/pdb/explore/explore.do?pdbId=" if claims then if (claims[1] and claims[1].mainsnak.snaktype == "value") then local out = {} for k, v in pairs(claims) do --don't think we want the Q here, right ? --local label = mw.wikibase.label("Q" .. v.mainsnak.datavalue.value) --if label == nil then label = "Q" .. v.mainsnak.datavalue.value end local label = mw.wikibase.label(v.mainsnak.datavalue.value) if label == nil then label = v.mainsnak.datavalue.value end if sitelink then out[#out + 1] = "[" .. sitelink .. label .. " " ..label .. "]"					else out[#out + 1] = "" .. label .. "" end end return table.concat(out, ", ") else return entity:formatPropertyValues(propertyID, mw.wikibase.entity.claimRanks).value end else return "" end else return input_parm end end

function p.ViewSomething(frame) local itemID = mw.text.trim(frame.args[1] or "") local data = mw.wikibase.getEntityObject(itemID) if not data then return nil end

local f = frame.args[1] and frame or frame:getParent

local i = 1 while true do		local index = f.args[i] if not index then if type(data) == "table" then return mw.text.jsonEncode(data, mw.text.JSON_PRESERVE_KEYS + mw.text.JSON_PRETTY) else return tostring(data) end end data = data[index] or data[tonumber(index)] if not data then return end i = i + 1 end end

return p