D2854: hgweb: str/bytes followups after indygreg's refactoring

Previous Topic Next Topic
 
classic Classic list List threaded Threaded
7 messages Options
Reply | Threaded
Open this post in threaded view
|

D2854: hgweb: str/bytes followups after indygreg's refactoring

indygreg (Gregory Szorc)
durin42 created this revision.
Herald added a subscriber: mercurial-devel.
Herald added a reviewer: hg-reviewers.

REVISION SUMMARY
  As of this change, we're mostly principled about using native strings
  (so bytes on 2, unicode on 3) for accessing HTTP headers. This
  restores test-getbundle.t (probably among others) to passing on Python
  3.

REPOSITORY
  rHG Mercurial

REVISION DETAIL
  https://phab.mercurial-scm.org/D2854

AFFECTED FILES
  mercurial/hgweb/common.py
  mercurial/hgweb/hgweb_mod.py
  mercurial/hgweb/hgwebdir_mod.py
  mercurial/hgweb/request.py
  mercurial/hgweb/server.py
  mercurial/wireprotoserver.py

CHANGE DETAILS

diff --git a/mercurial/wireprotoserver.py b/mercurial/wireprotoserver.py
--- a/mercurial/wireprotoserver.py
+++ b/mercurial/wireprotoserver.py
@@ -29,9 +29,9 @@
 
 HTTP_OK = 200
 
-HGTYPE = 'application/mercurial-0.1'
-HGTYPE2 = 'application/mercurial-0.2'
-HGERRTYPE = 'application/hg-error'
+HGTYPE = r'application/mercurial-0.1'
+HGTYPE2 = r'application/mercurial-0.2'
+HGERRTYPE = r'application/hg-error'
 
 SSHV1 = wireprototypes.SSHV1
 SSHV2 = wireprototypes.SSHV2
@@ -44,7 +44,7 @@
     chunks = []
     i = 1
     while True:
-        v = req.headers.get(b'%s-%d' % (headerprefix, i))
+        v = req.headers.get(r'%s-%d' % (headerprefix, i))
         if v is None:
             break
         chunks.append(pycompat.bytesurl(v))
@@ -79,23 +79,23 @@
 
     def _args(self):
         args = self._req.qsparams.asdictoflists()
-        postlen = int(self._req.headers.get(b'X-HgArgs-Post', 0))
+        postlen = int(self._req.headers.get(r'X-HgArgs-Post', 0))
         if postlen:
             args.update(urlreq.parseqs(
                 self._req.bodyfh.read(postlen), keep_blank_values=True))
             return args
 
-        argvalue = decodevaluefromheaders(self._req, b'X-HgArg')
+        argvalue = decodevaluefromheaders(self._req, r'X-HgArg')
         args.update(urlreq.parseqs(argvalue, keep_blank_values=True))
         return args
 
     def forwardpayload(self, fp):
         # Existing clients *always* send Content-Length.
-        length = int(self._req.headers[b'Content-Length'])
+        length = int(self._req.headers[r'Content-Length'])
 
         # If httppostargs is used, we need to read Content-Length
         # minus the amount that was consumed by args.
-        length -= int(self._req.headers.get(b'X-HgArgs-Post', 0))
+        length -= int(self._req.headers.get(r'X-HgArgs-Post', 0))
         for s in util.filechunkiter(self._req.bodyfh, limit=length):
             fp.write(s)
 
@@ -189,7 +189,7 @@
     # in this case. We send an HTTP 404 for backwards compatibility reasons.
     if req.dispatchpath:
         res.status = hgwebcommon.statusmessage(404)
-        res.headers['Content-Type'] = HGTYPE
+        res.headers[r'Content-Type'] = HGTYPE
         # TODO This is not a good response to issue for this request. This
         # is mostly for BC for now.
         res.setbodybytes('0\n%s\n' % b'Not Found')
@@ -207,7 +207,7 @@
     except hgwebcommon.ErrorResponse as e:
         for k, v in e.headers:
             res.headers[k] = v
-        res.status = hgwebcommon.statusmessage(e.code, pycompat.bytestr(e))
+        res.status = hgwebcommon.statusmessage(e.code, e)
         # TODO This response body assumes the failed command was
         # "unbundle." That assumption is not always valid.
         res.setbodybytes('0\n%s\n' % pycompat.bytestr(e))
@@ -271,11 +271,11 @@
 
     def setresponse(code, contenttype, bodybytes=None, bodygen=None):
         if code == HTTP_OK:
-            res.status = '200 Script output follows'
+            res.status = r'200 Script output follows'
         else:
             res.status = hgwebcommon.statusmessage(code)
 
-        res.headers['Content-Type'] = contenttype
+        res.headers[r'Content-Type'] = pycompat.strurl(contenttype)
 
         if bodybytes is not None:
             res.setbodybytes(bodybytes)
diff --git a/mercurial/hgweb/server.py b/mercurial/hgweb/server.py
--- a/mercurial/hgweb/server.py
+++ b/mercurial/hgweb/server.py
@@ -101,7 +101,7 @@
         try:
             self.do_write()
         except Exception:
-            self._start_response("500 Internal Server Error", [])
+            self._start_response(r"500 Internal Server Error", [])
             self._write("Internal Server Error")
             self._done()
             tb = r"".join(traceback.format_exception(*sys.exc_info()))
@@ -136,26 +136,26 @@
                 env[r'CONTENT_TYPE'] = self.headers.get_default_type()
             else:
                 env[r'CONTENT_TYPE'] = self.headers.get_content_type()
-            length = self.headers.get('content-length')
+            length = self.headers.get(r'content-length')
         else:
             if self.headers.typeheader is None:
                 env[r'CONTENT_TYPE'] = self.headers.type
             else:
                 env[r'CONTENT_TYPE'] = self.headers.typeheader
-            length = self.headers.getheader('content-length')
+            length = self.headers.getheader(r'content-length')
         if length:
             env[r'CONTENT_LENGTH'] = length
         for header in [h for h in self.headers.keys()
-                       if h not in ('content-type', 'content-length')]:
+                       if h not in (r'content-type', r'content-length')]:
             hkey = r'HTTP_' + header.replace(r'-', r'_').upper()
             hval = self.headers.get(header)
             hval = hval.replace(r'\n', r'').strip()
             if hval:
                 env[hkey] = hval
         env[r'SERVER_PROTOCOL'] = self.request_version
         env[r'wsgi.version'] = (1, 0)
         env[r'wsgi.url_scheme'] = pycompat.sysstr(self.url_scheme)
-        if env.get(r'HTTP_EXPECT', '').lower() == '100-continue':
+        if env.get(r'HTTP_EXPECT', '').lower() == r'100-continue':
             self.rfile = common.continuereader(self.rfile, self.wfile.write)
 
         env[r'wsgi.input'] = self.rfile
diff --git a/mercurial/hgweb/request.py b/mercurial/hgweb/request.py
--- a/mercurial/hgweb/request.py
+++ b/mercurial/hgweb/request.py
@@ -316,18 +316,20 @@
         if k.startswith('HTTP_'):
             headers.append((k[len('HTTP_'):].replace('_', '-'), v))
 
-    headers = wsgiheaders.Headers(headers)
+
+    headers = wsgiheaders.Headers([(pycompat.strurl(k), pycompat.strurl(v))
+                                   for k, v in headers])
 
     # This is kind of a lie because the HTTP header wasn't explicitly
     # sent. But for all intents and purposes it should be OK to lie about
     # this, since a consumer will either either value to determine how many
     # bytes are available to read.
     if 'CONTENT_LENGTH' in env and 'HTTP_CONTENT_LENGTH' not in env:
-        headers['Content-Length'] = env['CONTENT_LENGTH']
+        headers[r'Content-Length'] = pycompat.strurl(env['CONTENT_LENGTH'])
 
     bodyfh = env['wsgi.input']
-    if 'Content-Length' in headers:
-        bodyfh = util.cappedreader(bodyfh, int(headers['Content-Length']))
+    if r'Content-Length' in headers:
+        bodyfh = util.cappedreader(bodyfh, int(headers[r'Content-Length']))
 
     return parsedrequest(method=env['REQUEST_METHOD'],
                          url=fullurl, baseurl=baseurl,
@@ -422,7 +424,7 @@
         """
         self._verifybody()
         self._bodybytes = b
-        self.headers['Content-Length'] = '%d' % len(b)
+        self.headers[r'Content-Length'] = r'%d' % len(b)
 
     def setbodygen(self, gen):
         """Define the response body as a generator of bytes."""
@@ -473,19 +475,19 @@
         # states that no response body can be issued. Content-Length can
         # be sent. But if it is present, it should be the size of the response
         # that wasn't transferred.
-        if self.status.startswith('304 '):
+        if self.status.startswith(r'304 '):
             # setbodybytes('') will set C-L to 0. This doesn't conform with the
             # spec. So remove it.
-            if self.headers.get('Content-Length') == '0':
-                del self.headers['Content-Length']
+            if self.headers.get(r'Content-Length') == r'0':
+                del self.headers[r'Content-Length']
 
             # Strictly speaking, this is too strict. But until it causes
             # problems, let's be strict.
             badheaders = {k for k in self.headers.keys()
-                          if k.lower() not in ('date', 'etag', 'expires',
-                                               'cache-control',
-                                               'content-location',
-                                               'vary')}
+                          if k.lower() not in (r'date', r'etag', r'expires',
+                                               r'cache-control',
+                                               r'content-location',
+                                               r'vary')}
             if badheaders:
                 raise error.ProgrammingError(
                     'illegal header on 304 response: %s' %
@@ -508,7 +510,7 @@
         # If the client sent Expect: 100-continue, we assume it is smart enough
         # to deal with the server sending a response before reading the request.
         # (httplib doesn't do this.)
-        if self._req.headers.get('Expect', '').lower() == '100-continue':
+        if self._req.headers.get(r'Expect', r'').lower() == r'100-continue':
             pass
         # Only tend to request methods that have bodies. Strictly speaking,
         # we should sniff for a body. But this is fine for our existing
diff --git a/mercurial/hgweb/hgwebdir_mod.py b/mercurial/hgweb/hgwebdir_mod.py
--- a/mercurial/hgweb/hgwebdir_mod.py
+++ b/mercurial/hgweb/hgwebdir_mod.py
@@ -371,11 +371,11 @@
             virtual = req.dispatchpath.strip('/')
             tmpl = self.templater(req, nonce)
             ctype = tmpl('mimetype', encoding=encoding.encoding)
-            ctype = templateutil.stringify(ctype)
+            ctype = pycompat.strurl(templateutil.stringify(ctype))
 
             # Global defaults. These can be overridden by any handler.
-            res.status = '200 Script output follows'
-            res.headers['Content-Type'] = ctype
+            res.status = r'200 Script output follows'
+            res.headers[r'Content-Type'] = ctype
 
             # a static file
             if virtual.startswith('static/') or 'static' in req.qsparams:
@@ -423,8 +423,10 @@
                 if real:
                     # Re-parse the WSGI environment to take into account our
                     # repository path component.
+                    strenv = {pycompat.strurl(k): pycompat.strurl(v) for k, v in
+                              req.rawenv.iteritems()}
                     req = requestmod.parserequestfromenv(
-                        req.rawenv, reponame=virtualrepo,
+                        strenv, reponame=virtualrepo,
                         altbaseurl=self.ui.config('web', 'baseurl'))
                     try:
                         # ensure caller gets private copy of ui
@@ -442,12 +444,12 @@
                 return self.makeindex(req, res, tmpl, subdir)
 
             # prefixes not found
-            res.status = '404 Not Found'
+            res.status = r'404 Not Found'
             res.setbodygen(tmpl('notfound', repo=virtual))
             return res.sendresponse()
 
         except ErrorResponse as e:
-            res.status = statusmessage(e.code, pycompat.bytestr(e))
+            res.status = statusmessage(e.code, e)
             res.setbodygen(tmpl('error', error=e.message or ''))
             return res.sendresponse()
         finally:
diff --git a/mercurial/hgweb/hgweb_mod.py b/mercurial/hgweb/hgweb_mod.py
--- a/mercurial/hgweb/hgweb_mod.py
+++ b/mercurial/hgweb/hgweb_mod.py
@@ -360,7 +360,7 @@
                     for a in args:
                         req.qsparams.add('file', a)
 
-            ua = req.headers.get('User-Agent', '')
+            ua = pycompat.bytesurl(req.headers.get(r'User-Agent', r''))
             if cmd == 'rev' and 'mercurial' in ua:
                 req.qsparams['style'] = 'raw'
 
@@ -379,7 +379,7 @@
         try:
             rctx.tmpl = rctx.templater(req)
             ctype = rctx.tmpl('mimetype', encoding=encoding.encoding)
-            ctype = templateutil.stringify(ctype)
+            ctype = pycompat.strurl(templateutil.stringify(ctype))
 
             # check read permissions non-static content
             if cmd != 'static':
@@ -392,41 +392,41 @@
             # Don't enable caching if using a CSP nonce because then it wouldn't
             # be a nonce.
             if rctx.configbool('web', 'cache') and not rctx.nonce:
-                tag = 'W/"%d"' % self.mtime
-                if req.headers.get('If-None-Match') == tag:
-                    res.status = '304 Not Modified'
+                tag = r'W/"%d"' % self.mtime
+                if req.headers.get(r'If-None-Match') == tag:
+                    res.status = r'304 Not Modified'
                     # Response body not allowed on 304.
                     res.setbodybytes('')
                     return res.sendresponse()
 
-                res.headers['ETag'] = tag
+                res.headers[r'ETag'] = tag
 
             if cmd not in webcommands.__all__:
                 msg = 'no such method: %s' % cmd
                 raise ErrorResponse(HTTP_BAD_REQUEST, msg)
             else:
                 # Set some globals appropriate for web handlers. Commands can
                 # override easily enough.
-                res.status = '200 Script output follows'
-                res.headers['Content-Type'] = ctype
+                res.status = r'200 Script output follows'
+                res.headers[r'Content-Type'] = ctype
                 return getattr(webcommands, cmd)(rctx)
 
         except (error.LookupError, error.RepoLookupError) as err:
             msg = pycompat.bytestr(err)
             if (util.safehasattr(err, 'name') and
                 not isinstance(err,  error.ManifestLookupError)):
                 msg = 'revision not found: %s' % err.name
 
-            res.status = '404 Not Found'
-            res.headers['Content-Type'] = ctype
+            res.status = r'404 Not Found'
+            res.headers[r'Content-Type'] = ctype
             return rctx.sendtemplate('error', error=msg)
         except (error.RepoError, error.RevlogError) as e:
-            res.status = '500 Internal Server Error'
-            res.headers['Content-Type'] = ctype
+            res.status = r'500 Internal Server Error'
+            res.headers[r'Content-Type'] = ctype
             return rctx.sendtemplate('error', error=pycompat.bytestr(e))
         except ErrorResponse as e:
-            res.status = statusmessage(e.code, pycompat.bytestr(e))
-            res.headers['Content-Type'] = ctype
+            res.status = statusmessage(e.code, e)
+            res.headers[r'Content-Type'] = ctype
             return rctx.sendtemplate('error', error=pycompat.bytestr(e))
 
     def check_perm(self, rctx, req, op):
diff --git a/mercurial/hgweb/common.py b/mercurial/hgweb/common.py
--- a/mercurial/hgweb/common.py
+++ b/mercurial/hgweb/common.py
@@ -128,7 +128,7 @@
     return responses.get(code, ('Error', 'Unknown error'))[0]
 
 def statusmessage(code, message=None):
-    return '%d %s' % (code, message or _statusmessage(code))
+    return r'%d %s' % (code, message or _statusmessage(code))
 
 def get_stat(spath, fn):
     """stat fn if it exists, spath otherwise"""
@@ -181,7 +181,7 @@
         res.setbodybytes(data)
         return res
     except TypeError:
-        raise ErrorResponse(HTTP_SERVER_ERROR, 'illegal filename')
+        raise ErrorResponse(HTTP_SERVER_ERROR, r'illegal filename')
     except OSError as err:
         if err.errno == errno.ENOENT:
             raise ErrorResponse(HTTP_NOT_FOUND)



To: durin42, #hg-reviewers
Cc: mercurial-devel
_______________________________________________
Mercurial-devel mailing list
[hidden email]
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
Reply | Threaded
Open this post in threaded view
|

D2854: hgweb: str/bytes followups after indygreg's refactoring

indygreg (Gregory Szorc)
indygreg requested changes to this revision.
indygreg added a comment.
This revision now requires changes to proceed.


  parsedrequest.headers is supposed to be bytes. One of the goals of this refactor was to make the WSGI code all bytes all the time. So if system strings are making their way to the new request or response objects (i.e. outside request.py), then something is wrong and we should fix that: we shouldn’t need to introduce raw literals outside of the very low level WSGI code.

REPOSITORY
  rHG Mercurial

REVISION DETAIL
  https://phab.mercurial-scm.org/D2854

To: durin42, #hg-reviewers, indygreg
Cc: indygreg, mercurial-devel
_______________________________________________
Mercurial-devel mailing list
[hidden email]
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
Reply | Threaded
Open this post in threaded view
|

D2854: hgweb: str/bytes followups after indygreg's refactoring

indygreg (Gregory Szorc)
In reply to this post by indygreg (Gregory Szorc)
indygreg added a comment.


  In https://phab.mercurial-scm.org/D2854#45911, @indygreg wrote:
 
  > parsedrequest.headers is supposed to be bytes. One of the goals of this refactor was to make the WSGI code all bytes all the time. So if system strings are making their way to the new request or response objects (i.e. outside request.py), then something is wrong and we should fix that: we shouldn’t need to introduce raw literals outside of the very low level WSGI code.
 
 
  We're using `wsgirequest.headers.Headers`. Python 3's implementation insists that headers be specified as the native `str` type: https://github.com/python/cpython/blob/master/Lib/wsgiref/headers.py. Because of course it does. Ugh.
 
  It looks like we'll have to implement a case insensitive + `-`/`_` normalizing keys dict for header storage. Shouldn't be a big deal.

REPOSITORY
  rHG Mercurial

REVISION DETAIL
  https://phab.mercurial-scm.org/D2854

To: durin42, #hg-reviewers, indygreg
Cc: indygreg, mercurial-devel
_______________________________________________
Mercurial-devel mailing list
[hidden email]
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
Reply | Threaded
Open this post in threaded view
|

D2854: hgweb: use our forked wsgiheaders module instead of stdlib one

indygreg (Gregory Szorc)
In reply to this post by indygreg (Gregory Szorc)
durin42 updated this revision to Diff 8009.
durin42 edited the summary of this revision.
durin42 retitled this revision from "hgweb: str/bytes followups after indygreg's refactoring" to "hgweb: use our forked wsgiheaders module instead of stdlib one".

REPOSITORY
  rHG Mercurial

CHANGES SINCE LAST UPDATE
  https://phab.mercurial-scm.org/D2854?vs=7020&id=8009

REVISION DETAIL
  https://phab.mercurial-scm.org/D2854

AFFECTED FILES
  mercurial/hgweb/request.py

CHANGE DETAILS

diff --git a/mercurial/hgweb/request.py b/mercurial/hgweb/request.py
--- a/mercurial/hgweb/request.py
+++ b/mercurial/hgweb/request.py
@@ -8,7 +8,6 @@
 
 from __future__ import absolute_import
 
-import wsgiref.headers as wsgiheaders
 #import wsgiref.validate
 
 from ..thirdparty import (
@@ -19,6 +18,9 @@
     pycompat,
     util,
 )
+from . import (
+    wsgiheaders,
+)
 
 class multidict(object):
     """A dict like object that can store multiple values for a key.



To: durin42, #hg-reviewers, indygreg
Cc: indygreg, mercurial-devel
_______________________________________________
Mercurial-devel mailing list
[hidden email]
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
Reply | Threaded
Open this post in threaded view
|

D2854: hgweb: use our forked wsgiheaders module instead of stdlib one

indygreg (Gregory Szorc)
In reply to this post by indygreg (Gregory Szorc)
indygreg requested changes to this revision.
indygreg added a comment.
This revision now requires changes to proceed.


  This one caused an import cycle: `Import cycle: mercurial.hgweb.__init__ -> mercurial.hgweb.hgweb_mod -> mercurial.hgweb.request -> mercurial.hgweb.__init__`

REPOSITORY
  rHG Mercurial

REVISION DETAIL
  https://phab.mercurial-scm.org/D2854

To: durin42, #hg-reviewers, indygreg
Cc: indygreg, mercurial-devel
_______________________________________________
Mercurial-devel mailing list
[hidden email]
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
Reply | Threaded
Open this post in threaded view
|

D2854: hgweb: use our forked wsgiheaders module instead of stdlib one

indygreg (Gregory Szorc)
In reply to this post by indygreg (Gregory Szorc)
durin42 updated this revision to Diff 8074.

REPOSITORY
  rHG Mercurial

CHANGES SINCE LAST UPDATE
  https://phab.mercurial-scm.org/D2854?vs=8009&id=8074

REVISION DETAIL
  https://phab.mercurial-scm.org/D2854

AFFECTED FILES
  mercurial/hgweb/request.py

CHANGE DETAILS

diff --git a/mercurial/hgweb/request.py b/mercurial/hgweb/request.py
--- a/mercurial/hgweb/request.py
+++ b/mercurial/hgweb/request.py
@@ -8,7 +8,6 @@
 
 from __future__ import absolute_import
 
-import wsgiref.headers as wsgiheaders
 #import wsgiref.validate
 
 from ..thirdparty import (
@@ -289,6 +288,7 @@
         if k.startswith('HTTP_'):
             headers.append((k[len('HTTP_'):].replace('_', '-'), v))
 
+    from . import wsgiheaders # avoid cycle
     headers = wsgiheaders.Headers(headers)
 
     # This is kind of a lie because the HTTP header wasn't explicitly
@@ -378,6 +378,7 @@
         self._startresponse = startresponse
 
         self.status = None
+        from . import wsgiheaders # avoid cycle
         self.headers = wsgiheaders.Headers([])
 
         self._bodybytes = None



To: durin42, #hg-reviewers, indygreg
Cc: indygreg, mercurial-devel
_______________________________________________
Mercurial-devel mailing list
[hidden email]
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
Reply | Threaded
Open this post in threaded view
|

D2854: hgweb: use our forked wsgiheaders module instead of stdlib one

indygreg (Gregory Szorc)
In reply to this post by indygreg (Gregory Szorc)
This revision was automatically updated to reflect the committed changes.
Closed by commit rHGda84e26d85ed: hgweb: use our forked wsgiheaders module instead of stdlib one (authored by durin42, committed by ).

REPOSITORY
  rHG Mercurial

CHANGES SINCE LAST UPDATE
  https://phab.mercurial-scm.org/D2854?vs=8074&id=8076

REVISION DETAIL
  https://phab.mercurial-scm.org/D2854

AFFECTED FILES
  mercurial/hgweb/request.py

CHANGE DETAILS

diff --git a/mercurial/hgweb/request.py b/mercurial/hgweb/request.py
--- a/mercurial/hgweb/request.py
+++ b/mercurial/hgweb/request.py
@@ -8,7 +8,6 @@
 
 from __future__ import absolute_import
 
-import wsgiref.headers as wsgiheaders
 #import wsgiref.validate
 
 from ..thirdparty import (
@@ -289,6 +288,7 @@
         if k.startswith('HTTP_'):
             headers.append((k[len('HTTP_'):].replace('_', '-'), v))
 
+    from . import wsgiheaders # avoid cycle
     headers = wsgiheaders.Headers(headers)
 
     # This is kind of a lie because the HTTP header wasn't explicitly
@@ -378,6 +378,7 @@
         self._startresponse = startresponse
 
         self.status = None
+        from . import wsgiheaders # avoid cycle
         self.headers = wsgiheaders.Headers([])
 
         self._bodybytes = None



To: durin42, #hg-reviewers, indygreg
Cc: indygreg, mercurial-devel
_______________________________________________
Mercurial-devel mailing list
[hidden email]
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel