source: trunk/LatexTool.py @ 3136

Revision 3136, 22.0 KB checked in by jukka, 9 years ago (diff)

Added chart.apis.google.com as external latex service and adjusted html escaping for latex code.

Line 
1
2# This file is composed from code originally from ZWiki's LatexWiki-plugin:
3#
4#LatexWiki - a patch to ZWiki for rendering embedded LaTeX code
5#Copyright (C) 2001 Open Software Services <info@OpenSoftwareServices.com>
6#Copyright (C) 2003,2004,2005 Bob McElrath <bob+latexwiki@mcelrath.org>
7#Copyright (C) 2006 Simon Michael and Zwiki contributors <http://zwiki.org>
8#All rights reserved, all disclaimers apply, etc.
9#
10#This program is free software; you can redistribute it and/or
11#modify it under the terms of the GNU General Public License
12#as published by the Free Software Foundation; either version 2
13#of the License, or (at your option) any later version.
14#
15#This program is distributed in the hope that it will be useful,
16#but WITHOUT ANY WARRANTY; without even the implied warranty of
17#MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18#GNU General Public License for more details.
19#
20#You should have received a copy of the GNU General Public License
21#along with this program; if not, write to the Free Software
22#Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
23
24
25from Globals import InitializeClass
26from OFS.SimpleItem import SimpleItem
27from OFS.PropertyManager import PropertyManager
28from Products.CMFCore.utils import UniqueObject
29from AccessControl import ClassSecurityInfo
30from DocumentTemplate.html_quote import html_quote
31from urllib import quote_plus
32from string import strip, join, replace
33import os, sys, re, zLOG, string, math, popen2, select, htmllib
34from struct import pack, unpack
35from cgi import escape
36from config import LATEX_IMAGES_STORAGE_PATH, EXTERNAL_LATEX_SERVICE
37from Products.CMFCore.utils import getToolByName
38
39#try:
40import fcntl
41from PIL import Image, ImageFile, ImageChops, PngImagePlugin
42#except ImportError:
43#    LATEX_IMAGES_STORAGE_PATH='' # No local latex for you
44
45
46# From config we need LATEX_IMAGES_STORAGE_PATH and EXTERNAL_LATEX_SERVICE
47# if LATEX_IMAGES_STORAGE_PATH is set, then we assume that the server is capable of rendering latex by its own power.
48# if EXTERNAL_LATEX_SERVICE is set, then we assume it is a service like ???,
49# where you ask for a image and image url contains the latex code.
50# If LeMill-site becomes too popular, this will be quite a task for service provider
51# If both are set, first we try local, then external.
52# If none is set, we just give the latex code back
53
54latexpath='/usr/texbin'
55
56latexRemoveDelim = re.compile(r'^(?:(\$\$|\$(?!\$))|\\\(|\\begin{[^}]*}|\\\[)(.*)(?:\\\]|\\end{[^}]*}|\\\)|\1)$', re.MULTILINE|re.DOTALL)
57
58errorMessage = """\n<hr/><font size="-1" color="red">
59Some or all expressions may not have rendered properly,
60because Latex returned the following error:<br/><pre>%s</pre></font>"""
61
62# Actual location of images
63workingDir = '%s/%s' % (sys.modules['__builtin__'].CLIENT_HOME,  LATEX_IMAGES_STORAGE_PATH)
64# Default character size, if the user doesn't specify
65defaultcharsizepx = 18
66
67imageExtension = '.png'
68
69class LatexSyntaxError(Exception): pass
70class LatexRenderError(Exception): pass
71class GhostscriptError(Exception): pass
72class AlignError(Exception): pass
73
74
75def unescape(s):
76    p = htmllib.HTMLParser(None)
77    p.save_bgn()
78    p.feed(s)
79    return p.save_end()
80
81def findinpath(exe):
82    paths = [exe]
83    paths.extend( \
84        map(lambda x: os.path.join(x,exe), re.split(':', os.getenv('PATH'))))   # latexpath in possible paths is missing! 
85    for path in paths:
86        if os.access(path, os.X_OK): break
87        path = None
88    return path
89
90# find our external programs
91dvipngpath = findinpath('dvipng')
92gspath     = findinpath('gs')
93dvipspath  = findinpath('dvips') or '/usr/texbin/dvips'
94latexpath  = findinpath('latex') or '/usr/texbin/latex'
95print dvipngpath
96print gspath
97print latexpath
98print dvipspath
99
100charsizept = 10
101# dvipng and tex use 72.27 points per inch, internally and thus generate the
102# best-looking images.  Postscript uses 72 points per inch.  So if we have to
103# use ghostscript and go through a postscript conversion, there is a resolution
104# mismatch which puts nibs on the tops of letters for many choices of
105# charheightpx.
106if dvipngpath is not None:
107    ptperinch = 72.27
108else:
109    ptperinch = 72
110# Adjust the centerline by this many pixels, key is character height in px
111# This list was determined experimentally.  If anyone has a better algorithm
112# to align images, please contact me.
113centerfudge = dict({ # positive to move up, negative to move down
114    10:0,  11:+1, 12:0,  13:0,  14:0,  15:+2, 16:0, 17:0,  18:0,  19:0,  20:0,
115    21:0,  22:0,  23:+2, 24:0,  25:0,  26:0,  27:0, 28:0,  29:0,  30:0,  31:+1,
116    32:0,  33:+1, 34:0,  35:0,  36:0,  37:0,  38:0, 39:+1, 40:0,  41:+1, 42:-1,
117    43:0,  44:0,  45:0,  46:0,  47:+1, 48:0,  49:0, 50:0,  51:0,  52:-1, 53:0,
118    54:-1, 55:+1, 56:-1, 57:+1, 58:+3, 59:-1, 60:0, 61:-1, 62:-1, 63:+1, 64:-1
119    })
120
121latexInlinePattern = r'^(\$(?!\$)|\\\()$'
122
123# This is only used if your wiki does not have a node LatexTemplate.
124defaultLatexTemplate = r"""
125\documentclass[%dpt,notitlepage]{article}
126\usepackage{amsmath}
127\usepackage{amsfonts}
128\usepackage[all]{xy}
129\newenvironment{latex}{}{}
130\oddsidemargin -86pt
131\headheight 0pt
132\topmargin -96pt
133\nofiles
134\begin{document}
135\pagestyle{empty}
136%%s
137\end{document}
138"""  % (charsizept)
139# \topmargin -96pt
140
141
142class LatexTool(PropertyManager, UniqueObject, SimpleItem):
143    """ Tool for LaTex math support """
144
145    id = 'latex_tool'
146    meta_type = 'LatexTool'
147    security = ClassSecurityInfo()
148    plone_tool = 1
149    toolicon = 'skins/lemill/tool.gif'
150    __allow_access_to_unprotected_subobjects__ = 1
151
152
153
154
155
156
157    # This is the method you should call.
158
159    def getImageFor(self, latexCode, charheightpx=17):
160        if LATEX_IMAGES_STORAGE_PATH:
161            try:
162            #print latexCode
163                return self.getLocalImageFor(latexCode, charheightpx)
164            except (IOError, LatexSyntaxError), data:
165            #    print data
166                errors = str(data)
167                self.log(errors, 'LatexSyntaxError')
168        if EXTERNAL_LATEX_SERVICE:
169            return self.getRemoteImageFor(latexCode)           
170        else:
171            return latexCode           
172
173    def getLocalImageFor(self, latexCode, charheightpx=17):   
174        print 'using local latex'
175        portal_url = getToolByName(self, 'portal_url')
176        preamble, postamble = '', ''
177        width, height = '', ''
178        imageFile = self.fileNameFor(latexCode, charheightpx, '.png')
179        if not os.path.exists(os.path.join(workingDir, imageFile)):
180            errors = self.renderNonexistingImages([latexCode], charheightpx,  0.0, 1.03) # alignfudge orig. 0.0
181        imageUrl = '%s/%s/%s' % (portal_url(), LATEX_IMAGES_STORAGE_PATH, imageFile)
182        width, height = self.getPngSize(os.path.join(workingDir, imageFile))
183        alt = html_quote(latexRemoveDelim.match(latexCode).group(2))
184        return '%s<img alt="%s" class="equation" src="%s" width="%s" height="%s"/>%s' %(preamble,
185                                                                alt,
186                                                                imageUrl,
187                                                                width,
188                                                                height,
189                                                                postamble)
190
191    def getRemoteImageFor(self, latexCode):
192        print 'using remote latex'
193        code=unescape(latexCode)
194        code=latexRemoveDelim.match(code).group(2)
195        alt= html_quote(code)
196        src=''.join((EXTERNAL_LATEX_SERVICE,quote_plus(code)))
197        return '<img alt="%s" class="equation" src="%s" border="0" align="middle" />' % (alt, src)
198
199   
200    def fileNameFor(self, latexCode, size, extension=''):
201        return '%s-%spx%s' %(abs(hash(latexCode)), size, extension)
202   
203    def getPngSize(self, fname,
204                   magicBytes=pack('!BBBBBBBB', 137, 80, 78, 71, 13, 10, 26, 10)):
205        f = file(fname, 'r')
206        buf = f.read(24)
207        f.close()
208        assert buf[:8] == magicBytes, 'in getPngSize, file not a PNG!'
209        return tuple(map(int, unpack('!LL', buf[16:24])))
210   
211    def log(self, message,summary='',severity=0):
212            zLOG.LOG('LatexWikiDebugLog',severity,summary,message)
213   
214    # Make our file descriptors nonblocking so that reading doesn't hang.
215    def makeNonBlocking(self, f):
216        fl = fcntl.fcntl(f.fileno(), fcntl.F_GETFL)
217        fcntl.fcntl(f.fileno(), fcntl.F_SETFL, fl | os.O_NONBLOCK)
218       
219    def runCommand(self, cmdLine, input=None):
220        program = popen2.Popen3('cd %s; '%(workingDir) + cmdLine, 1)
221        if input:
222            program.tochild.write(input)
223        program.tochild.close()
224        self.makeNonBlocking(program.fromchild)
225        self.makeNonBlocking(program.childerr)
226        stderr = []
227        stdout = []
228        erreof = False
229        outeof = False
230        while(not (erreof and outeof)):
231            readme, writme, xme = select.select([program.fromchild, program.childerr], [], [])
232            for output in readme:
233                if(output == program.fromchild):
234                    text = program.fromchild.read()
235                    if(text == ''): outeof = True
236                    else: stdout.append(text)
237                elif(output == program.childerr):
238                    text = program.childerr.read()
239                    if(text == ''): erreof = True
240                    else: stderr.append(text)
241        status = program.wait()
242        error = os.WEXITSTATUS(status) or not os.WIFEXITED(status)
243        return error, string.join(stdout, ''), string.join(stderr, '')
244           
245    # methods from latexWrapper.py begin here
246
247    def imageDoesNotExist(self, code, charheightpx):
248        return not os.path.exists(os.path.join(workingDir,
249            self.fileNameFor(code, charheightpx, imageExtension)))
250   
251    def renderNonexistingImages(self, latexCodeList, charheightpx, alignfudge, resfudge, **kw):
252        """ take a list of strings of latex code, render the
253        images that don't already exist.
254        """
255        print 'LATEX: rendering images'
256        latexTemplate = (kw.get('latexTemplate', defaultLatexTemplate) or
257                         defaultLatexTemplate)
258        m = re.search(r'\\documentclass\[[^\]]*?(\d+)pt[^\]]*?\]', \
259            latexTemplate)
260        if m:
261            charsizept = int(m.group(1))
262        else:
263            charsizept = 10
264        res = charheightpx*ptperinch/charsizept*resfudge
265        errors = ""
266        latexCodeList=list(set(latexCodeList)) # removes duplicates
267        codeToRender = filter(lambda x: self.imageDoesNotExist(x, charheightpx), latexCodeList)
268        if (not codeToRender): return
269        #unifiedCode = re.sub(r'^(\$|\\\()', r'\1|~ ', codeToRender[0])
270        #for code in codeToRender[1:len(codeToRender)]:
271        #    unifiedCode = unifiedCode + '\n\\newpage\n' + re.sub(r'^(\$|\\\()', r'\1|~ ', code)
272        ### PATCH FOR LEMILL: JUST RENDER THE FIRST SNIPPET, THERE SHOULD NOT BE MORE
273        unifiedCode=codeToRender[0]
274        try:
275            self.runLatex(unifiedCode, res, charheightpx, latexTemplate)
276        except LatexSyntaxError, data:
277           errors = str(data)
278           self.log(errors, 'LatexSyntaxError')
279           # FIXME translate latex line number to source line number
280           return escape(errors)
281   
282        fName = self.fileNameFor(unifiedCode, charheightpx)
283        self.dviPng(fName, res)
284        for code, i in map(None, codeToRender, range(0, len(codeToRender))):
285            newFileName = self.fileNameFor(code, charheightpx, imageExtension)
286            imname = '%s-%03d.png'%(fName,i+1)
287
288            # The next clause causes problems, so I make it always False.
289            if False and re.match(r'^(?:\$|\\\()', code): # FIXME make dvipng do the alpha properly
290                im = Image.open(os.path.join(workingDir, imname))
291                try:
292                    im = self.align(im, charheightpx, alignfudge) # returns an RGBA image
293                except (AlignError, ValueError), data:
294                    raise LatexRenderError(str(data) + '\nThe code was:\n' + \
295                        code+ '\nin the file %s'%(os.path.join(workingDir, imname)))
296                if im.mode != 'RGBA':
297                    alpha = ImageChops.invert(im.convert('L'))
298                    i = im.putalpha(alpha)
299                im.save(os.path.join(workingDir, newFileName), "PNG")
300            else:
301                os.rename(os.path.join(workingDir, imname), os.path.join(workingDir, newFileName))
302        os.system('cd %s; rm -f *.log *.aux *.tex *.pdf *.dvi *.ps %s-???.png'%(workingDir, fName))
303        return escape(errors)
304   
305    def runLatex(self, code, res, charheightpx, latexTemplate):
306        def ensureWorkingDirectory(path):
307            """Ensure this directory exists and is writable."""
308            if not os.access(path,os.F_OK): os.mkdir(path)
309            if not os.access(path,os.W_OK): os.system('chmod u+rwx %s' % path)
310   
311        texfileName = self.fileNameFor(code, charheightpx, '.tex')
312        dvifileName = self.fileNameFor(code, charheightpx, '.dvi')
313        psfileName = self.fileNameFor(code, charheightpx, '.ps')
314        cmdLine = '%s %s' %(latexpath, texfileName)
315   
316        ensureWorkingDirectory(workingDir)
317        file = open(os.path.join(workingDir, texfileName), 'w')
318        file.write(latexTemplate %(code,))
319        file.close()
320        err, stdout, stderr = self.runCommand(cmdLine)
321       
322        if err:
323            out = stderr + '\n' + stdout+'\n'+ cmdLine
324            err = re.search('!.*\?', out, re.MULTILINE+re.DOTALL)
325            if err:
326                out = err.group(0)
327    # FIXME translate latex line numbers to source line numbers
328            raise LatexSyntaxError(out)
329   
330    def dviPng(self, fName, res):
331        input, output = fName+'.dvi', fName+'-%03d.png'
332        gspngfname = fName+'-gs-%03d.png'
333        psfname = fName+'-gs'; i=1
334        # '--truecolor -bg Transparent' generates RGB images with transparent pixel
335        # (not alpha channel) but it's close...
336        ppopt = ''
337        if dvipngpath is not None:
338            #cmdLine = '%s --truecolor -bg Transparent -picky -D %f -Ttight -v* -o %s %s'%\
339            #    (dvipngpath, res, output % i, input)
340            cmdLine = '%s -bg Transparent -picky -D %f -T tight -v -o %s %s'%\
341                (dvipngpath, res, output % i, input)
342            err, stdout, stderr = self.runCommand(cmdLine)
343            ppredo = []           
344            if not err: return
345            #print err
346            #print stdout
347            #print stderr
348            # dvipng -picky will give the following message on pages it cannot render
349            # (usually due to the use of postscript specials).  For that we fall
350            # through to ghostscript
351            matcher = re.finditer(r'\[(\d+) not rendered\]', stdout)
352            for m in matcher:
353                if ppredo: ppredo += ','
354                ppredo.append(m.group(1))           
355            if ppredo:
356                ppopt = '-pp ' + string.join(ppredo,',')
357        cmdLine = '%s %s -R -D %f -o %s %s'%(dvipspath, ppopt, res, psfname+'.ps', input)
358        #print cmdLine
359        err, stdout, stderr = self.runCommand(cmdLine)
360        if err:
361            self.log('%s\n%s\n%s\n'%(err, stdout, stderr), 'DVIPSError')
362            raise 'DVIPSError %s' % stderr+'\n'+stdout
363        if not ppopt:
364            ppredo = range(1,len(re.findall(r'\[\d+\]', stderr))+1)
365        err = self.runGhostscript(psfname, res, 'pngalpha')
366        self.center(psfname, res)
367        for page in ppredo:
368            oldfname = os.path.join(workingDir, gspngfname%i)
369            newfname = os.path.join(workingDir, output%int(page))
370            os.rename(oldfname, newfname)
371            i += 1
372   
373    def runGhostscript(self, fName, res, device):
374        input, output = fName+'.ps', fName+'-%03d.png'
375        cmdLine = '%s -dDOINTERPOLATE -dTextAlphaBits=4 '%gspath + \
376                  '-dGraphicsAlphaBits=4 -r%f -sDEVICE=%s ' + \
377                  '-dBATCH -dNOPAUSE -dQUIT -sOutputFile=%s %s '
378        cmdLine = cmdLine %(res, device, output, input)
379        err, stdout, stderr = self.runCommand(cmdLine)
380        if err:
381            log('%s\n%s\n%s\n'%(err, stdout, stderr), 'GhostscriptError')
382            raise GhostscriptError(stderr+'\n'+stdout)
383        return stderr # when using bbox, BoundingBox is on stderr
384   
385    # assumes png's already created
386    def center(self, fName, res):
387        bboxes = re.split('\n', self.runGhostscript(fName, res, 'bbox'))
388        pngfname = fName+'-%03d.png'
389        for i in range(0, len(bboxes)/2):
390            file = pngfname%(i+1)
391            start_x, start_y, end_x, end_y = map(float,
392                re.match(r'%%HiResBoundingBox: ([0-9\.]+) ([0-9\.]+) ([0-9\.]+) ([0-9\.]+)',
393                    bboxes[2*i+1]).groups())
394            xsize = int(round(((end_x - start_x) * res)/ptperinch))
395            ysize = int(round(((end_y - start_y) * res)/ptperinch))
396            if (xsize <= 0): xsize = 1
397            if (ysize <= 0): ysize = 1
398            start_x = int(round(start_x*res/ptperinch))
399            start_y = int(round(start_y*res/ptperinch))
400            im = Image.open(os.path.join(workingDir, file))
401            cropdim = (start_x, im.size[1]-start_y-ysize, start_x+xsize, im.size[1]-start_y)
402            cropdim = map(int, map(round, cropdim))
403            im = im.crop(cropdim)
404            im2 = Image.new('RGBA', im.size, (255,255,255))
405            im2.paste(im, (0, 0))
406            if im.mode != 'RGBA':
407                alpha = ImageChops.invert(im2.convert('L'))  # Image should already have an alpha
408                im3 = Image.new('RGBA', im.size, (0,0,0))
409                im3.putalpha(alpha)
410                im2 = im3
411            im2.save(os.path.join(workingDir, file), "PNG")
412   
413    def align(self, im, charheightpx=0, alignfudge=0):
414        dotstartx = -1; dotendx = -1; dotstarty = -1; dotendy = -1
415        widentop = 0; widenbottom = 0; letterstartx = -1; chopx = 0
416        if im.mode == 'P':
417            white = 0
418        elif im.mode == 'RGB':
419            white = (255,255,255)
420        elif im.mode == 'RGBA':
421            white = (254,254,254,0) # as output by ghostscript pngalpha device
422        elif im.mode == 'L':
423            white = 255 # FIXME I think
424        for x in range(0,im.size[0]):  # Try to find the leading dot
425            if(dotendy < 0) :
426                for y in range(0, im.size[1]):
427                    pixel=im.getpixel((x,y))
428                    if(dotstarty >= 0 and dotendy < 0):
429                        if(pixel == white):
430                            dotendy = y
431                            break
432                    if(dotstarty < 0 and pixel != white):
433                        dotstartx = x
434                        dotstarty = y
435                else:
436                    if dotstarty >= 0 and dotendy < 0:
437                        dotendy = im.size[1]
438            elif(dotendx < 0):
439                maybedotendx = x
440                for y in range(dotstarty, dotendy):
441                    pixel=im.getpixel((x,y))
442                    if pixel != white:
443                        maybedotendx = -1
444                if maybedotendx > 0:
445                    dotendx = x
446            else:               
447                for y in range(0,im.size[1]):
448                    pixel=im.getpixel((x,y))
449                    if pixel != white:
450                        letterstartx = x
451                        break
452                if letterstartx>0: break
453        else: # failed to find letterstartx
454            #print 'dotstartx=%d, dotendx=%d, dotstarty=%d, dotendy=%d, letterstartx=%d\n' % (dotstartx, dotendx, dotstarty, dotendy, letterstartx)
455            self.log('dotstartx=%d, dotendx=%d, dotstarty=%d, dotendy=%d, letterstartx=%d\n'
456                %(dotstartx, dotendx, dotstarty, dotendy, letterstartx))
457            self.log('Unable to find dot. (size=%dx%d)\n'%(im.size[0],im.size[1]), 'renderNonExistingImages')
458            raise AlignError('Image appears to be blank or not have an alignment dot.')
459        centerline = (dotendy-dotstarty)/2.0    # increase centerline to move char up WRT text
460        dotcenter = (dotendy-dotstarty)*7.0/144.0
461        centerline += dotcenter
462        if centerfudge.has_key(charheightpx):
463            centerline += centerfudge[charheightpx]/2.0
464        # if dot is not pixel-aligned, take that into account
465        # sum pixels above and below (dotendy-dotstarty)/2
466        dottophalf = 0
467        dotlinesize = dotendx-dotstartx
468        dottoplines = 0
469        for y in range(dotstarty, int(math.ceil(dotstarty+(dotendy-1-dotstarty)/2.0))):
470            dottoplines += 1
471            for x in range(dotstartx, dotendx):
472                dottophalf += self.cabs(im.getpixel((x,y)))
473            break
474        else:
475            dottophalf = 1 # dot was 1px high
476        dotbottomhalf = 0
477        for y in range(dotendy-1, dotstarty+(dotendy-1-dotstarty)/2,-1):
478            for x in range(dotstartx, dotendx):
479                dotbottomhalf += self.cabs(im.getpixel((x,y)))
480            break
481        else:
482            dotbottomhalf = 1 # dot was 1px high
483        if(dottophalf != 0.0 and dotbottomhalf != 0):
484            dotpixmiss = float(dottophalf-dotbottomhalf)/(dottophalf+dotbottomhalf)
485        else:
486            dotpixmiss = 0.0
487        centerline += dotpixmiss
488        centerline += alignfudge # user parameter -- FIXME remove?
489        bottomsize = im.size[1]-centerline               # pixels below midline
490        topsize = centerline                             # pixels above midline
491        if(topsize > bottomsize):
492            newheight = 2*topsize
493            widenbottom = topsize - topsize
494        else:
495            newheight = 2*bottomsize
496            widentop = bottomsize - topsize
497        chopx = letterstartx-1
498        newheight= int(newheight)
499        widentop = 0 #int(widentop) #SKWM broken
500        im2 = Image.new('RGBA', (im.size[0]-chopx,newheight), (255,255,255))
501        im2.paste(im,(-chopx,widentop,im.size[0]-chopx,im.size[1]-widentop))
502        return im2
503   
504    def cabs(self, A):
505        sq = 0.0
506        if type(A) == type(()):
507            for i in range(0,3):
508                sq += A[i]*A[i]
509            if len(A) == 4:
510                return math.sqrt(sq)*(A[3]/255.0)
511            else:
512                return math.sqrt(sq)
513        else:
514            return A
515   
516InitializeClass(LatexTool)
Note: See TracBrowser for help on using the repository browser.