* WGs marked with an * asterisk has had at least one new draft made available during the last 5 days

Pyht Tool

Pyht processes html files with embedded python processing directives.
Version: 0.30
Author:
About | About | Changelog | Code | Control | Todo | Download | | Copyright

#!/usr/bin/python -u
# -*- python -*-
#
#	Interpret python processing instructions within HTML.
#
#	-----------------------------------------------------------------
#
#	Copyright 2002 Henrik Levkowetz
#
#	This program is free software; you can redistribute it and/or modify
#	it under the terms of the GNU General Public License as published by
#	the Free Software Foundation; either version 2 of the License, or
#	(at your option) any later version.
#
#	This program is distributed in the hope that it will be useful,
#	but WITHOUT ANY WARRANTY; without even the implied warranty of
#	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#	GNU General Public License for more details.
#
#	You should have received a copy of the GNU General Public License
#	along with this program; if not, write to the Free Software
#	Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
#
#	-----------------------------------------------------------------
#
#	The current version of GPL is at http://www.gnu.org/licenses/gpl.txt
#
#	-----------------------------------------------------------------
#
#	The purpose of this program is to read a .pyht file which is
#	expected to contain html with embedded python code, and output
#	the html sections verbatim, while the python code is executed as
#	it is encountered.
#

"""
    PyHt reads a HTML file with embedded XML processing commands calling for
a processor "python", and passes the HTML straight through, while the python
instructions are passed to the python interpreter.  Call it as follows:

    http://example.org/cgi-bin/pyht.py/url/to/python-in-html-file.pyht

Or alternatively, register pyht.py with your http server as a handler for
.pyht files."""


import cgi, os, sys, urllib, re, string, traceback, stat, time

version = "0.30"

header_done = 0

def header(*headers):
    global header_done
    if header_done:
        #sys.stdout.write("<!-- Belated headers: %s -->" % repr(headers))
	return
    else:
	for line in headers:
	    print line

def die(str):
    header("Content-type: text/plain")
    print str
    sys.exit(1)

def out(str):
    global indent
    if (str):
        str = string.replace(str, "\n", "\n"+" "*indent)
        try:
            sys.stdout.write(str)
            # if cachefile: cachefile.write(str)
        except IOError, e:
            if e.errno == 32:           # Broken pipe can occur with Apache's worker MSM
                pass
            else:
                raise

def untaint(str):
    return re.sub("[\001-\010\013\014\016-\031!$&%`<|>\177-\240]","_", str)

def unindent(text):
    global indent
    lines = string.split(string.expandtabs(text), "\n")
    # search for the first nonblank, non-comment line:
    for i in range(len(lines)):
        if len(string.strip(lines[i])) > 0 and string.lstrip(lines[i])[0] != "#" :
            break
    indent = len(lines[i]) - len(string.lstrip(lines[i]))
    for i in range(i, len(lines)):
        if len(string.strip(lines[i][:indent])) == 0 :
            lines[i] = lines[i][indent:]
        else:
            lines[i] = string.lstrip(lines[i])
    return string.join(lines, "\n")

def filetime(name):
    if os.path.exists(name):
        return os.stat(name)[stat.ST_MTIME]
    else:
        return 0

def dirtime(dir):
    dirlist = os.listdir(dir)
    dirtime = filetime(".")
    for file in dirlist:
        this = filetime(file)
        if dirtime < this: dirtime = this
    return dirtime

def timestr(t):
    return time.strftime("%d %b %Y %H:%M GMT", time.gmtime(t))

def listdir(path, pattern):
    dirlist = os.listdir(path)
    files = [ x for x in dirlist if re.match(pattern, x) ]
    files.sort()
    return files

def filetext(path):
    if os.path.isfile(path):
        file = open(path)
        text = file.read()
        file.close()
        return text
    else:
        return ""

def makelocals(filename):
    def filetime(name=filename):
        if os.path.exists(name):
            return os.stat(name)[stat.ST_MTIME]
        else:
            return 0
    def filedate(name=filename):
        return timestr(filetime(name))
    base, ext = os.path.splitext(os.path.basename(filename))
    locals = {}
    locals["filename"] = filename
    locals["filepath"] = os.path.dirname(filename)
    locals["filebase"] = base
    locals["fileext"] = ext
    locals["filetime"] = filetime
    locals["filedate"] = filedate
    locals["timenow"] = time.time()
    locals["datenow"] = timestr(locals["timenow"])
    locals["urlname"] = pyhtfile
    locals["interpreter"] = sys.argv[0]
    return locals

def include(filename):

    global execglobals
    if not filename:
        die("\n\nMissing input filename.\n" + __doc__)
    if not re.match("^([@\w.,\/_-]+)$", filename):
	die("\n\nFile name '%s' has invalid characters (use letters, numbers, dot, dash and underscore" % filename)
    else:
        header("Content-type: text/html")
	pi = re.compile("(?ms)^(.*?)<\?python(\s([^\?]|\?[^>])*)\?>(.*)$")
        sys.path[0] = os.path.dirname(os.path.realpath(filename))
	file = open(filename, "r")
	buf = ""
	if file:
            execlocals = makelocals(filename)

            try:
                line = 1
                while 1:
                    chunk = file.read(10000)
                    if not chunk: break
                    buf = buf + chunk
                    while 1:
                        match = pi.match(buf)
                        if not match:
                            #print "No more p.i.s, finishing"
                            break

                        html = match.group(1)
                        code = unindent(match.group(2))
                        buf = match.group(4)

                        if len(html):
                            global header_done
                            if not header_done:

                                sys.stdout.write("X-Generator: pyht %s\n" % version)
                                sys.stdout.write("\n")
                                html = re.sub("^[ \t\r\n]*", "", html)
                                header_done = 1

                            sys.stdout.write( html )
                            # if cachefile: cachefile.write(html)

                        line = line + string.count(html, "\n")
                        execlocals["__name__"] = "embedded-code-at-line-%d" % line

                        if string.find(code, "\n") > -1:
                            exec code in execglobals, execlocals
                        else:
                            res = eval(code, execglobals, execlocals)
                            if not type(res) == type(""):
                                res = repr(res)
                            sys.stdout.write(res)
                            # if cachefile: cachefile.write(res)

                        line = line + string.count(code, "\n")


                #print "Final buf print:"
                sys.stdout.write( buf )
                # if cachefile: cachefile.write( buf )
            except IOError, e:
                if e.errno == 32:           # Broken pipe can occur with Apache's worker MSM
                    pass
                else:
                    raise

def tryinclude(filename):
    if os.path.isfile(filename):
        include(filename)
    return ""

def virtualinc(filename):
    root = os.environ.get("DOCUMENT_ROOT","./")
    if filename[0:1] == "/":
        filename = filename[1:]
        filename = os.path.join(root, filename)
    else:
        filename = os.path.join(root, os.getcwd()[1:], filename)
    tryinclude(filename)
    return ""

def sendmail(sender, recipient, subject, text):
    import smtplib
    from email.MIMEText import MIMEText

    msg = MIMEText(text)
    msg['Subject'] = subject
    msg['From'] = sender
    msg['To'] = recipient
    message = msg.as_string()

    server = smtplib.SMTP("localhost")
    server.sendmail(sender, recipient, message)
    server.quit()

def getupload(args, form, fieldname):

    uploadsrc = ""
    uploadfile = None

    if fieldname in args:
	if form[fieldname].filename:
	    uploadsrc = form[fieldname].filename
        else:
	    uploadsrc = form[fieldname].value

        if uploadsrc[:7] == "http://" or uploadsrc[:6] == "ftp://":
	    import urllib
	    uploadfile = urllib.urlopen(uploadsrc)
	elif form[fieldname].file:
	    uploadfile = form[fieldname].file

    if uploadsrc and uploadfile:
        uploaddst = os.path.basename(re.sub("\\\\","/", uploadsrc))
	if re.match("^[a-zA-Z0-9_.-]+$", uploaddst):
            if os.path.isdir("tmp"):
                uploaddst = os.path.join("tmp", uploaddst)
            else:
                uploaddst = os.path.join("/tmp", uploaddst)

	    outputfile = open(uploaddst, "w")
	    while 1:
		line = uploadfile.readline()
		if not line: break
		outputfile.write(line)
	    outputfile.close()
	    os.chmod(uploaddst, 0666)
            return uploaddst

    return None


# ------------------------------------------------------------------------------

if "PATH_INFO" in os.environ:
    pyhtfile=os.environ["PATH_INFO"]
    extpos = pyhtfile.index(".pyht")
    pyhtfile = pyhtfile[:extpos+len(".pyht")]
elif len(sys.argv) > 1:
    pyhtfile=sys.argv[1]
else:
    pyhtfile=""

execglobals = {'__builtins__': __builtins__ ,
               'pyht_version': version,
               'escape': lambda x: cgi.escape(repr(x)),
               'env': os.environ,
               'include': include,
               'tryinclude': tryinclude,
               'virtualinc': virtualinc,
               'out': out,
               'filetime':filetime,
               'timestr':timestr,
               'header':header,
               'listdir':listdir,
               'filetext':filetext,
               'sendmail':sendmail,
               'getupload':getupload,
               'join':os.path.join,
               'isfile':os.path.isfile,
               'dirname':os.path.dirname,
               'basename':os.path.basename,
               'splitext':os.path.splitext,
               'servername':os.environ.get("SERVER_NAME", "localhost"),
               'urlpath':"http://%s%s" % (os.environ.get("SERVER_NAME", "localhost"), os.path.dirname(pyhtfile))
               }

if __name__ == "__main__":
    global path, base, ext
    from getopt import getopt

    try:
        form = cgi.FieldStorage()
        args = {}
        if form:
            for key in form.keys():
                if type(form[key]) is type([]):
                    lst = []
                    for item in form[key]: lst.append(untaint(item.value))
                    args[key] = lst
                else:
                    args[key] = untaint(form[key].value)
        if not args:
            opts, names = getopt(sys.argv[1:], "", ["version", "info="])

            for o, v in opts:
                args[o] = v

            for arg in names:
                try:
                    o, v = arg.split("=",1)
                    args[o] = v
                except:
                    args[arg] = None

        execglobals["args"] = args
        execglobals["form"] = form

        script = os.environ.get("SCRIPT_NAME", "pyht")
        if script == "pyht":
            # we're not running as a cgi-bin script
            header_done = 1

        if "--version" in args:
            print "%s\t%s" % (script, version)
            sys.exit()

        if "--info" in args:
            info = args["--info"]
            if (info=="usage"):
                print __doc__,
            if (info=="version"):
                print "%s\t%s" % (script, version)
            if (info=="log"):
                print relnotes
            sys.exit()

        filename = os.environ.get("PATH_TRANSLATED", pyhtfile)
        extpos = filename.index(".pyht")
        filename = filename[:extpos+len(".pyht")]
        # cachename = filename + ".cache"

        if args.get("showcode", 0):
            header("Content-type: text/plain")
            print ""
            file = open(filename, "r")
            print file.read()
            file.close()
        else:
            os.chdir(os.path.dirname(os.path.realpath(filename)))
    #        if not os.path.isfile(cachename) or filetime(cachename) < filetime(filename) or filetime(cachename) < dirtime(".") or len(args) > 0:
    #            if len(args) > 0:
    #                cachefile = None
    #            else:
    #                try:
    #                    cachefile = open(cachename, "w")
    #                except:
    #                    cachefile = None
            include(filename)
    #            print "<!-- Cache out of date (cache: %s - dir: %s)  -->" % (timestr(filetime(cachename)), timestr(dirtime(".")))
    #            if not cachefile:
    #                print "<!-- Couldn't open cachefile for writing -->"
    #        else:
    #            file = open(cachename, "r")
    #            print ""
    #            print file.read()
    #            file.close()
    #            print "<!-- Using cached page, dated %s UTC. -->"  % timestr(filetime(cachename))
    #
    #        print "<!-- Generated by pyht.py v.%s on %s UTC -->" % (version, timestr(time.time()))
    except SystemExit:
        pass
    except:
        import cgitb
        domain = os.environ.get("SERVER_NAME", "")
        if domain:
            sendmail('Pyht script engine <pyht@%s>' % domain,
                    'webmaster@' + domain,
                    'Pyht script error in http://%s%s' % (domain, pyhtfile),
                    cgitb.text(sys.exc_info()))
        else:
            raise
        print """
        <h4 align="center"><i>An error occurred at this point in the web-page generation.<br />
        An error report has been sent to the webmaster.  If this error isn't
        fixed within 48 hours, please contact the web page author directly.
        </i></h4>
        <!-- %s -->
        """ % cgitb.text(sys.exc_info())


Latest update: 2007-03-28 08:43 PDT - webmaster@tools.ietf.org