User:User A1/svgTinker.py

From Wikipedia, the free encyclopedia
#!/usr/bin/python


from BeautifulSoup import BeautifulStoneSoup, Tag
import sys
import re

#Split all inkscape style=" " into known attributes for that tag
def splitInkscapeStyle(tag) :

	
	if not tag["style"] :
		return False

	breakAttr= [ "font-face", "font-size", "font-family" ]

	d={}


	strStyle=tag["style"]
	
	print "style is" + str(strStyle)

	splitStyle=strStyle.split(";")

	for i in splitStyle :
		print "I is :  " + i
		if i:
			splitter=i.rsplit(":")
			tag[splitter[0] ] = splitter[1]
				

	
	for t, val in d:
		tag[t] = val
	

	del tag['style']

#By Peter Waller, BS: Replacing a tag with its contents, BeautifulSoup mailing list
def tagRemove(tag, tagname):
	# Locate the span tag's position
	origIndex = tag.parent.contents.index(tag)

	# For each element in tag.contents, insert it before this tag
	# Use a list here, otherwise the list will shrink as elements are
	# disconnected from 'tag' and inserted into tag.parent
	for i, content in enumerate(tag.contents):
		tag.parent.insert(i+origIndex, content)

	# Excise the now empty span tag
	tag.extract() 


def epsilon():
	eps=1.0

	while eps + 1.0 > 1.0 : 
		eps //= 2

	return eps


def hasFontFace(tag):
	if not tag.string:
		return False

	#Check for encoded font base64
	return tag.string.find("@font-face")

#Takes a stone-soup tag and applies various
#workaround fixes of dubious effectiveness
def fontFix(tag):

	bold=False
	italic=False
	dejavu=False

	dejaVuRe=re.compile("'?(?i)dejavusans.*")
	boldRe=re.compile("(?i).*-bold.*")
	italicRe=re.compile("(?i).*-italic.*")
	fontRe=re.compile("(?i)-.*")

	for i in tag.attrs :
		if i[0] == "font-family" :
			#Check the font types and perform font substitution
			bold=boldRe.match(i[1])
			italic=italicRe.match(i[1])
			dejavu=dejaVuRe.match(i[1])
			#Strip font bold/italic embed
			tmp = fontRe.split(i[1])
			fontAttr=tmp[0]
			i = (i[0],fontAttr)
			break



	#if none of the above apply we can skip
	if not bold and not italic and not dejavu:
		return

	str=""

	if bold:
		str+="bad bolding method "
	if italic:
		str+="bad italicising method "
	if dejavu:
		str+="wrong font name"

	print "Fixing tag : "  + str
	print tag
	#Otherwise we have work to do!

	haveWeight=False
	haveStyle=False

	for i in tag.attrs:
		#find any bold font-weight tag
		if i[0] == "font-weight":
			haveWeight=True
			continue
		if i[0] == "font-style":
			haveStyle=True
			continue


	#Check for bold
	if bold:
		if haveWeight:
			if not re.match(i[1],".*(?i)bold.*"):
				tag["font-weight"]+=";Bold"
		else:
			tag["font-weight"]="Bold"
			
		tag["font-family"]=re.sub("(?i)-Bold","",tag["font-family"])
	

	#Check for italics
	if italic and haveStyle:
		if not re.match(i[1],".*(?i)italic.*"):
			tag["font-style"]+=";Italic"
	else:
		if italic and not haveStyle:
			tag["font-style"]="Italic"

	#Fix dejavu vs Deja Vu
	if dejavu:
		tag["font-family"]="DejaVu Sans"



#Check to see if a small font is being used in conjunction with 
def fontSizeFix(tag):

	#without a transformation there is nothing we can do
	if "transform" not in tag:
		return False

	#Find the parent tag with the font-size parameter
	haveFontSize=False

	thisParent=tag
	while not haveFontSize :
		if thisParent.has_key("font-size") :
			haveFontSize=True
		else :
			haveFontSize=False
			if thisParent.parent :
				thisParent=thisParent.parent
			else :
				break

	#check to see that we found the correct parent tag
	if not haveFontSize:
		return False
	else :
		parentTag=thisParent




	
	matrixRe=re.compile(".*(?i)matrix\(")
	scaleRe=re.compile(".*(?i)scale\(")
	
	if matrixRe.match(tag["transform"]) :

		#grab the matrix
		trans=re.sub(".*(?i)matrix\(","",tag["transform"])

		trans=re.sub("\)","",trans)

		#split the transformation matirx
		m = re.split("(\ |,)",trans)

		m=filter(lambda x: not (x=="" or x==" " or x==",") ,m)
	else:
		if scaleRe.match(tag["transform"]) :
			#grab the matrix components (11,22)
			trans=re.sub(".*(?i)scale\(","",tag["transform"])

			trans=re.sub("\)","",trans)

			#split the transformation matirx
			m = re.split("(\ |,)",trans)
			m=filter(lambda x: not (x=="" or x==" " or x==",") ,m)

			assert len(m) == 2
			#construct m as  a list in Mx+b form
			m = [ m[0] , "0" ,"0" ,m[1] ,"0", "0" ]

	#Transform should be of the form y=Mx+b
	print m
	assert len(m) == 6

	mF=[]
	for i in m:
		mF.append(float(i))

	m=mF


	print m
	EPSILON=0.001
	if abs(m[1]) < EPSILON and abs(m[2]) < EPSILON:
		#OK, so M is a diagonal matrix
		print "so far so good"
		if abs(m[0]) > abs(m[3]) :
			factor=m[0]
		else:
			factor=m[3]


		if factor > 1:
			#Pump up the font size by factor, then reduce the matrix
			fsStr=parentTag["font-size"]
			fsStr=fsStr.strip("px")
			
			fontSize =float(fsStr)
			parentTag["font-size"] = fontSize*factor


		m[0] = m[0]/factor
		m[3] = m[3]/factor


	tag["transform"] = "matrix(" + str(m[0]) + " "  + str(m[1]) + " " + str(m[2]) + " "  + str(m[3]) + " "+ str(m[4]) + " "  + str(m[5]) + ")"


#Crappy font substitution routine
def fontSub(tag):


	preferredFont = []
	preferredFont.append((re.compile("(?i)'?Arial.*"),"DejaVu Sans"))
	preferredFont.append((re.compile("(?i)'?Times new roman.*"),"Times"))

	for i in tag.attrs :
		if i[0] ==  "font-family" :
			#Substitute fonts from our preferred font table
			for j in preferredFont:
				if j[0].match(i[1])
					tag["font-family"]=j[1]
					break



def main():

	if len(sys.argv) != 3:
		print "Usage: svgTinker.py inputFile outputFile"
		quit(1)

	f = open(sys.argv[1])

	if not f :
		print "File does not exist or could not be read"
		quit(1)


	xmlText = f.read()

	soup=BeautifulStoneSoup(xmlText)


	#find all style="..." tags
	styleTags=soup.findAll(style=True)

	for i in styleTags:
		splitInkscapeStyle(i)

	tags=soup.findAll("text")

	#Correct all font tags
	for i in tags:

		fontFamilyTag=False
		fontSizeTag=False
		fontTrasnformTag=False
		if i.attrs:
			for j in i.attrs :

				#Check to see what attrs this guy has
				if re.match("(?i)font-family",j[0]):
					fontFamilyTag=True
					continue

				if re.match("(?i)transform",j[0]):
					fontTransformTag=True
					continue

				if re.match("(?i)font-size",j[0]):
					fontSizeTag=True


			if fontFamilyTag :
				fontFix(i)
				fontSub(i)
				continue

			if fontTransformTag : 
				fontSizeFix(i)
			

	#Fonts can also be stored in g elements.
	tags=soup.findAll("g")
	for i in tags:
		fontTag=False
		if i.attrs:
			for j in i.attrs :
				
				if re.match("(?i)font-family",j[0]):
					fontTag=True
					break

			if fontTag :
				fontFix(i)
				fontSub(i)
			


	tags=soup.findAll("tspan")
	
	#Nuke the tspans, preserving children	
	for i in tags:
		tagRemove(i,"tspans")
	

	tags=soup.findAll("style")

	#Find base64 encoded data and destroy it
	#FIXME: Not sure how to trick soup into inserting "" vs "<></>", so use <g></g> instead
	emptyTag = Tag(soup, "g")
	for i in tags:
		if hasFontFace(i):
			i.replaceWith(emptyTag)


	try:
		f=open(sys.argv[2],'w')
	except:
		print('Unable to open file for writing. aborting')
		quit(1)

	#prettify soup data
	soup.prettify()
	
	#save modified svg data
	f.write(str(soup))
	
	
	print("Wrote file : " + sys.argv[2])



if __name__ == "__main__":
	    main()