#!/usr/bin/python
from BeautifulSoup import BeautifulStoneSoup, Tag
import sys
import re
#Split all inkscape style=" " into known attributes for that tag
def splitInkscapeStyle(tag) :
if not tag"style" :
return False
breakAttr= "font-face", "font-size", "font-family"
d={}
strStyle=tag"style"
print "style is" + str(strStyle)
splitStyle=strStyle.split(";")
for i in splitStyle :
print "I is : " + i
if i:
splitter=i.rsplit(":")
tagsplitter0 = splitter1
for t, val in d:
tagt = val
del tag'style'
#By Peter Waller, BS: Replacing a tag with its contents, BeautifulSoup mailing list
def tagRemove(tag, tagname):
# Locate the span tag's position
origIndex = tag.parent.contents.index(tag)
# For each element in tag.contents, insert it before this tag
# Use a list here, otherwise the list will shrink as elements are
# disconnected from 'tag' and inserted into tag.parent
for i, content in enumerate(tag.contents):
tag.parent.insert(i+origIndex, content)
# Excise the now empty span tag
tag.extract()
def epsilon():
eps=1.0
while eps + 1.0 > 1.0 :
eps //= 2
return eps
def hasFontFace(tag):
if not tag.string:
return False
#Check for encoded font base64
return tag.string.find("@font-face")
#Takes a stone-soup tag and applies various
#workaround fixes of dubious effectiveness
def fontFix(tag):
bold=False
italic=False
dejavu=False
dejaVuRe=re.compile("'?(?i)dejavusans.*")
boldRe=re.compile("(?i).*-bold.*")
italicRe=re.compile("(?i).*-italic.*")
fontRe=re.compile("(?i)-.*")
for i in tag.attrs :
if i0 == "font-family" :
#Check the font types and perform font substitution
bold=boldRe.match(i1])
italic=italicRe.match(i1])
dejavu=dejaVuRe.match(i1])
#Strip font bold/italic embed
tmp = fontRe.split(i1])
fontAttr=tmp0
i = (i0],fontAttr)
break
#if none of the above apply we can skip
if not bold and not italic and not dejavu:
return
str=""
if bold:
str+="bad bolding method "
if italic:
str+="bad italicising method "
if dejavu:
str+="wrong font name"
print "Fixing tag : " + str
print tag
#Otherwise we have work to do!
haveWeight=False
haveStyle=False
for i in tag.attrs:
#find any bold font-weight tag
if i0 == "font-weight":
haveWeight=True
continue
if i0 == "font-style":
haveStyle=True
continue
#Check for bold
if bold:
if haveWeight:
if not re.match(i1],".*(?i)bold.*"):
tag"font-weight"+=";Bold"
else:
tag"font-weight"="Bold"
tag"font-family"=re.sub("(?i)-Bold","",tag"font-family"])
#Check for italics
if italic and haveStyle:
if not re.match(i1],".*(?i)italic.*"):
tag"font-style"+=";Italic"
else:
if italic and not haveStyle:
tag"font-style"="Italic"
#Fix dejavu vs Deja Vu
if dejavu:
tag"font-family"="DejaVu Sans"
#Check to see if a small font is being used in conjunction with
def fontSizeFix(tag):
#without a transformation there is nothing we can do
if "transform" not in tag:
return False
#Find the parent tag with the font-size parameter
haveFontSize=False
thisParent=tag
while not haveFontSize :
if thisParent.has_key("font-size") :
haveFontSize=True
else :
haveFontSize=False
if thisParent.parent :
thisParent=thisParent.parent
else :
break
#check to see that we found the correct parent tag
if not haveFontSize:
return False
else :
parentTag=thisParent
matrixRe=re.compile(".*(?i)matrix\(")
scaleRe=re.compile(".*(?i)scale\(")
if matrixRe.match(tag"transform"]) :
#grab the matrix
trans=re.sub(".*(?i)matrix\(","",tag"transform"])
trans=re.sub("\)","",trans)
#split the transformation matirx
m = re.split("(\ |,)",trans)
m=filter(lambda x: not (x=="" or x==" " or x==",") ,m)
else:
if scaleRe.match(tag"transform"]) :
#grab the matrix components (11,22)
trans=re.sub(".*(?i)scale\(","",tag"transform"])
trans=re.sub("\)","",trans)
#split the transformation matirx
m = re.split("(\ |,)",trans)
m=filter(lambda x: not (x=="" or x==" " or x==",") ,m)
assert len(m) == 2
#construct m as a list in Mx+b form
m = m0 , "0" ,"0" ,m1 ,"0", "0"
#Transform should be of the form y=Mx+b
print m
assert len(m) == 6
mF=[]
for i in m:
mF.append(float(i))
m=mF
print m
EPSILON=0.001
if abs(m1]) < EPSILON and abs(m2]) < EPSILON:
#OK, so M is a diagonal matrix
print "so far so good"
if abs(m0]) > abs(m3]) :
factor=m0
else:
factor=m3
if factor > 1:
#Pump up the font size by factor, then reduce the matrix
fsStr=parentTag"font-size"
fsStr=fsStr.strip("px")
fontSize =float(fsStr)
parentTag"font-size" = fontSize*factor
m0 = m0/factor
m3 = m3/factor
tag"transform" = "matrix(" + str(m0]) + " " + str(m1]) + " " + str(m2]) + " " + str(m3]) + " "+ str(m4]) + " " + str(m5]) + ")"
#Crappy font substitution routine
def fontSub(tag):
preferredFont = []
preferredFont.append((re.compile("(?i)'?Arial.*"),"DejaVu Sans"))
preferredFont.append((re.compile("(?i)'?Times new roman.*"),"Times"))
for i in tag.attrs :
if i0 == "font-family" :
#Substitute fonts from our preferred font table
for j in preferredFont:
if j0.match(i1])
tag"font-family"=j1
break
def main():
if len(sys.argv) != 3:
print "Usage: svgTinker.py inputFile outputFile"
quit(1)
f = open(sys.argv1])
if not f :
print "File does not exist or could not be read"
quit(1)
xmlText = f.read()
soup=BeautifulStoneSoup(xmlText)
#find all style="..." tags
styleTags=soup.findAll(style=True)
for i in styleTags:
splitInkscapeStyle(i)
tags=soup.findAll("text")
#Correct all font tags
for i in tags:
fontFamilyTag=False
fontSizeTag=False
fontTrasnformTag=False
if i.attrs:
for j in i.attrs :
#Check to see what attrs this guy has
if re.match("(?i)font-family",j0]):
fontFamilyTag=True
continue
if re.match("(?i)transform",j0]):
fontTransformTag=True
continue
if re.match("(?i)font-size",j0]):
fontSizeTag=True
if fontFamilyTag :
fontFix(i)
fontSub(i)
continue
if fontTransformTag :
fontSizeFix(i)
#Fonts can also be stored in g elements.
tags=soup.findAll("g")
for i in tags:
fontTag=False
if i.attrs:
for j in i.attrs :
if re.match("(?i)font-family",j0]):
fontTag=True
break
if fontTag :
fontFix(i)
fontSub(i)
tags=soup.findAll("tspan")
#Nuke the tspans, preserving children
for i in tags:
tagRemove(i,"tspans")
tags=soup.findAll("style")
#Find base64 encoded data and destroy it
#FIXME: Not sure how to trick soup into inserting "" vs "<></>", so use <g></g> instead
emptyTag = Tag(soup, "g")
for i in tags:
if hasFontFace(i):
i.replaceWith(emptyTag)
try:
f=open(sys.argv2],'w')
except:
print('Unable to open file for writing. aborting')
quit(1)
#prettify soup data
soup.prettify()
#save modified svg data
f.write(str(soup))
print("Wrote file : " + sys.argv2])
if __name__ == "__main__":
main()
#!/usr/bin/python
from BeautifulSoup import BeautifulStoneSoup, Tag
import sys
import re
#Split all inkscape style=" " into known attributes for that tag
def splitInkscapeStyle(tag) :
if not tag"style" :
return False
breakAttr= "font-face", "font-size", "font-family"
d={}
strStyle=tag"style"
print "style is" + str(strStyle)
splitStyle=strStyle.split(";")
for i in splitStyle :
print "I is : " + i
if i:
splitter=i.rsplit(":")
tagsplitter0 = splitter1
for t, val in d:
tagt = val
del tag'style'
#By Peter Waller, BS: Replacing a tag with its contents, BeautifulSoup mailing list
def tagRemove(tag, tagname):
# Locate the span tag's position
origIndex = tag.parent.contents.index(tag)
# For each element in tag.contents, insert it before this tag
# Use a list here, otherwise the list will shrink as elements are
# disconnected from 'tag' and inserted into tag.parent
for i, content in enumerate(tag.contents):
tag.parent.insert(i+origIndex, content)
# Excise the now empty span tag
tag.extract()
def epsilon():
eps=1.0
while eps + 1.0 > 1.0 :
eps //= 2
return eps
def hasFontFace(tag):
if not tag.string:
return False
#Check for encoded font base64
return tag.string.find("@font-face")
#Takes a stone-soup tag and applies various
#workaround fixes of dubious effectiveness
def fontFix(tag):
bold=False
italic=False
dejavu=False
dejaVuRe=re.compile("'?(?i)dejavusans.*")
boldRe=re.compile("(?i).*-bold.*")
italicRe=re.compile("(?i).*-italic.*")
fontRe=re.compile("(?i)-.*")
for i in tag.attrs :
if i0 == "font-family" :
#Check the font types and perform font substitution
bold=boldRe.match(i1])
italic=italicRe.match(i1])
dejavu=dejaVuRe.match(i1])
#Strip font bold/italic embed
tmp = fontRe.split(i1])
fontAttr=tmp0
i = (i0],fontAttr)
break
#if none of the above apply we can skip
if not bold and not italic and not dejavu:
return
str=""
if bold:
str+="bad bolding method "
if italic:
str+="bad italicising method "
if dejavu:
str+="wrong font name"
print "Fixing tag : " + str
print tag
#Otherwise we have work to do!
haveWeight=False
haveStyle=False
for i in tag.attrs:
#find any bold font-weight tag
if i0 == "font-weight":
haveWeight=True
continue
if i0 == "font-style":
haveStyle=True
continue
#Check for bold
if bold:
if haveWeight:
if not re.match(i1],".*(?i)bold.*"):
tag"font-weight"+=";Bold"
else:
tag"font-weight"="Bold"
tag"font-family"=re.sub("(?i)-Bold","",tag"font-family"])
#Check for italics
if italic and haveStyle:
if not re.match(i1],".*(?i)italic.*"):
tag"font-style"+=";Italic"
else:
if italic and not haveStyle:
tag"font-style"="Italic"
#Fix dejavu vs Deja Vu
if dejavu:
tag"font-family"="DejaVu Sans"
#Check to see if a small font is being used in conjunction with
def fontSizeFix(tag):
#without a transformation there is nothing we can do
if "transform" not in tag:
return False
#Find the parent tag with the font-size parameter
haveFontSize=False
thisParent=tag
while not haveFontSize :
if thisParent.has_key("font-size") :
haveFontSize=True
else :
haveFontSize=False
if thisParent.parent :
thisParent=thisParent.parent
else :
break
#check to see that we found the correct parent tag
if not haveFontSize:
return False
else :
parentTag=thisParent
matrixRe=re.compile(".*(?i)matrix\(")
scaleRe=re.compile(".*(?i)scale\(")
if matrixRe.match(tag"transform"]) :
#grab the matrix
trans=re.sub(".*(?i)matrix\(","",tag"transform"])
trans=re.sub("\)","",trans)
#split the transformation matirx
m = re.split("(\ |,)",trans)
m=filter(lambda x: not (x=="" or x==" " or x==",") ,m)
else:
if scaleRe.match(tag"transform"]) :
#grab the matrix components (11,22)
trans=re.sub(".*(?i)scale\(","",tag"transform"])
trans=re.sub("\)","",trans)
#split the transformation matirx
m = re.split("(\ |,)",trans)
m=filter(lambda x: not (x=="" or x==" " or x==",") ,m)
assert len(m) == 2
#construct m as a list in Mx+b form
m = m0 , "0" ,"0" ,m1 ,"0", "0"
#Transform should be of the form y=Mx+b
print m
assert len(m) == 6
mF=[]
for i in m:
mF.append(float(i))
m=mF
print m
EPSILON=0.001
if abs(m1]) < EPSILON and abs(m2]) < EPSILON:
#OK, so M is a diagonal matrix
print "so far so good"
if abs(m0]) > abs(m3]) :
factor=m0
else:
factor=m3
if factor > 1:
#Pump up the font size by factor, then reduce the matrix
fsStr=parentTag"font-size"
fsStr=fsStr.strip("px")
fontSize =float(fsStr)
parentTag"font-size" = fontSize*factor
m0 = m0/factor
m3 = m3/factor
tag"transform" = "matrix(" + str(m0]) + " " + str(m1]) + " " + str(m2]) + " " + str(m3]) + " "+ str(m4]) + " " + str(m5]) + ")"
#Crappy font substitution routine
def fontSub(tag):
preferredFont = []
preferredFont.append((re.compile("(?i)'?Arial.*"),"DejaVu Sans"))
preferredFont.append((re.compile("(?i)'?Times new roman.*"),"Times"))
for i in tag.attrs :
if i0 == "font-family" :
#Substitute fonts from our preferred font table
for j in preferredFont:
if j0.match(i1])
tag"font-family"=j1
break
def main():
if len(sys.argv) != 3:
print "Usage: svgTinker.py inputFile outputFile"
quit(1)
f = open(sys.argv1])
if not f :
print "File does not exist or could not be read"
quit(1)
xmlText = f.read()
soup=BeautifulStoneSoup(xmlText)
#find all style="..." tags
styleTags=soup.findAll(style=True)
for i in styleTags:
splitInkscapeStyle(i)
tags=soup.findAll("text")
#Correct all font tags
for i in tags:
fontFamilyTag=False
fontSizeTag=False
fontTrasnformTag=False
if i.attrs:
for j in i.attrs :
#Check to see what attrs this guy has
if re.match("(?i)font-family",j0]):
fontFamilyTag=True
continue
if re.match("(?i)transform",j0]):
fontTransformTag=True
continue
if re.match("(?i)font-size",j0]):
fontSizeTag=True
if fontFamilyTag :
fontFix(i)
fontSub(i)
continue
if fontTransformTag :
fontSizeFix(i)
#Fonts can also be stored in g elements.
tags=soup.findAll("g")
for i in tags:
fontTag=False
if i.attrs:
for j in i.attrs :
if re.match("(?i)font-family",j0]):
fontTag=True
break
if fontTag :
fontFix(i)
fontSub(i)
tags=soup.findAll("tspan")
#Nuke the tspans, preserving children
for i in tags:
tagRemove(i,"tspans")
tags=soup.findAll("style")
#Find base64 encoded data and destroy it
#FIXME: Not sure how to trick soup into inserting "" vs "<></>", so use <g></g> instead
emptyTag = Tag(soup, "g")
for i in tags:
if hasFontFace(i):
i.replaceWith(emptyTag)
try:
f=open(sys.argv2],'w')
except:
print('Unable to open file for writing. aborting')
quit(1)
#prettify soup data
soup.prettify()
#save modified svg data
f.write(str(soup))
print("Wrote file : " + sys.argv2])
if __name__ == "__main__":
main()