ActiveState · icanhasmath · May 1, 2024 · Dec 13, 2024 · Dec 13, 2024 · Dec 14, 2024
diff --git a/.gitignore b/.gitignore
@@ -87,3 +87,4 @@ coverage/
 externals/
 htmlcov/
 gmon.out
+.aider*
diff --git a/Doc/library/email.utils.rst b/Doc/library/email.utils.rst
@@ -21,13 +21,18 @@ There are several useful utilities provided in the :mod:`email.utils` module:
    begins with angle brackets, they are stripped off.
 
 
-.. function:: parseaddr(address)
+.. function:: parseaddr(address, strict=True)
 
    Parse address -- which should be the value of some address-containing field such
    as :mailheader:`To` or :mailheader:`Cc` -- into its constituent *realname* and
    *email address* parts.  Returns a tuple of that information, unless the parse
    fails, in which case a 2-tuple of ``('', '')`` is returned.
 
+   If *strict* is true, use a strict parser which rejects malformed inputs.
+
+   .. versionchanged:: 2.7.18.12
+      Add *strict* optional parameter and reject malformed inputs by default.
+
 
 .. function:: formataddr(pair)
 
@@ -37,7 +42,7 @@ There are several useful utilities provided in the :mod:`email.utils` module:
    second element is returned unmodified.
 
 
-.. function:: getaddresses(fieldvalues)
+.. function:: getaddresses(fieldvalues, strict=True)
 
    This method returns a list of 2-tuples of the form returned by ``parseaddr()``.
    *fieldvalues* is a sequence of header field values as might be returned by
@@ -52,6 +57,9 @@ There are several useful utilities provided in the :mod:`email.utils` module:
       resent_ccs = msg.get_all('resent-cc', [])
       all_recipients = getaddresses(tos + ccs + resent_tos + resent_ccs)
 
+   .. versionchanged:: 2.7.18.12
+      Add *strict* optional parameter and reject malformed inputs by default.
+
 
 .. function:: parsedate(date)
 

diff --git a/Doc/whatsnew/2.7.rst b/Doc/whatsnew/2.7.rst
@@ -2793,3 +2793,20 @@ The author would like to thank the following people for offering
 suggestions, corrections and assistance with various drafts of this
 article: Nick Coghlan, Philip Jenvey, Ryan Lovett, R. David Murray,
 Hugh Secker-Walker.
+
+
+Notable changes in 3.8.20
+=========================
+
+email
+-----
+
+* :func:`email.utils.getaddresses` and :func:`email.utils.parseaddr` now return
+  ``('', '')`` 2-tuples in more situations where invalid email addresses are
+  encountered, instead of potentially inaccurate values.
+  An optional *strict* parameter was added to these two functions:
+  use ``strict=False`` to get the old behavior, accepting malformed inputs.
+  ``getattr(email.utils, 'supports_strict_parsing', False)`` can be used to
+  check if the *strict* paramater is available.
+  (Contributed by Thomas Dwyer and Victor Stinner for :gh:`102988` to improve
+  the CVE-2023-27043 fix.)
diff --git a/Include/patchlevel.h b/Include/patchlevel.h
@@ -27,7 +27,7 @@
 #define PY_RELEASE_SERIAL	0
 
 /* Version as a string */
-#define PY_VERSION      	"2.7.18.10"
+#define PY_VERSION      	"2.7.18.14"
 /*--end constants--*/
 
 /* Subversion Revision number of this file (not of the repository). Empty

diff --git a/Lib/Cookie.py b/Lib/Cookie.py
@@ -92,13 +92,14 @@
    'Set-Cookie: chips=ahoy\r\nSet-Cookie: vienna=finger'
 
 The load() method is darn-tootin smart about identifying cookies
-within a string.  Escaped quotation marks, nested semicolons, and other
-such trickeries do not confuse it.
+within a string.  Escaped quotation marks and nested semicolons do not
+confuse it.  (Note that cookies whose values contain control characters
+are now rejected to prevent Set-Cookie header injection; CVE-2026-0672.)
 
    >>> C = Cookie.SmartCookie()
-   >>> C.load('keebler="E=everybody; L=\\"Loves\\"; fudge=\\012;";')
+   >>> C.load('keebler="E=everybody; L=\\"Loves\\"; fudge=delicious;";')
    >>> print C
-   Set-Cookie: keebler="E=everybody; L=\"Loves\"; fudge=\012;"
+   Set-Cookie: keebler="E=everybody; L=\"Loves\"; fudge=delicious;"
 
 Each element of the Cookie also supports all of the RFC 2109
 Cookie attributes.  Here's an example which sets the Path
@@ -242,6 +243,15 @@ class CookieError(Exception):
 #       _Translator       hash-table for fast quoting
 #
 _LegalChars       = string.ascii_letters + string.digits + "!#$%&'*+-.^_`|~"
+_control_character_re = re.compile(r'[\x00-\x1f\x7f]')
+
+def _has_control_character(*values):
+    """Return True if any of the given string values holds a control char."""
+    for v in values:
+        if isinstance(v, basestring) and _control_character_re.search(v):
+            return True
+    return False
+
 _Translator       = {
     '\000' : '\\000',  '\001' : '\\001',  '\002' : '\\002',
     '\003' : '\\003',  '\004' : '\\004',  '\005' : '\\005',
@@ -424,6 +434,8 @@ def __setitem__(self, K, V):
         K = K.lower()
         if not K in self._reserved:
             raise CookieError("Invalid Attribute %s" % K)
+        if _has_control_character(K, V):
+            raise CookieError("Control characters are not allowed in cookies: %r %r" % (K, V))
         dict.__setitem__(self, K, V)
     # end __setitem__
 
@@ -440,6 +452,9 @@ def set(self, key, val, coded_val,
             raise CookieError("Attempt to set a reserved key: %s" % key)
         if "" != translate(key, idmap, LegalChars):
             raise CookieError("Illegal key value: %s" % key)
+        if _has_control_character(key, val, coded_val):
+            raise CookieError("Control characters are not allowed in cookies: %r %r %r"
+                              % (key, val, coded_val))
 
         # It's a good key, so save it.
         self.key                 = key

diff --git a/Lib/HTMLParser.py b/Lib/HTMLParser.py
@@ -20,6 +20,7 @@
 charref = re.compile('&#(?:[0-9]+|[xX][0-9a-fA-F]+)[^0-9a-fA-F]')
 
 starttagopen = re.compile('<[a-zA-Z]')
+endtagopen = re.compile('</[a-zA-Z]')
 piclose = re.compile('>')
 commentclose = re.compile(r'--\s*>')
 
@@ -167,22 +168,46 @@ def goahead(self, end):
                     k = self.parse_pi(i)
                 elif startswith("<!", i):
                     k = self.parse_html_declaration(i)
-                elif (i + 1) < n:
+                elif (i + 1) < n or end:
                     self.handle_data("<")
                     k = i + 1
                 else:
                     break
                 if k < 0:
                     if not end:
                         break
-                    k = rawdata.find('>', i + 1)
-                    if k < 0:
-                        k = rawdata.find('<', i + 1)
-                        if k < 0:
-                            k = i + 1
+                    # End of input with an unterminated construct.  Close it
+                    # per HTML5 instead of rescanning, which made repeated
+                    # incomplete constructs quadratic (CVE-2025-6069).
+                    if starttagopen.match(rawdata, i):  # < + letter
+                        pass
+                    elif startswith("</", i):
+                        if i + 2 == n:
+                            self.handle_data("</")
+                        elif endtagopen.match(rawdata, i):  # </ + letter
+                            pass
+                        else:
+                            # bogus comment
+                            self.handle_comment(rawdata[i+2:])
+                    elif startswith("<!--", i):
+                        j = n
+                        for suffix in ("--!", "--", "-"):
+                            if rawdata.endswith(suffix, i+4):
+                                j -= len(suffix)
+                                break
+                        self.handle_comment(rawdata[i+4:j])
+                    elif startswith("<![CDATA[", i):
+                        self.unknown_decl(rawdata[i+3:])
+                    elif rawdata[i:i+9].lower() == '<!doctype':
+                        self.handle_decl(rawdata[i+2:])
+                    elif startswith("<!", i):
+                        # bogus comment
+                        self.handle_comment(rawdata[i+2:])
+                    elif startswith("<?", i):
+                        self.handle_pi(rawdata[i+2:])
                     else:
-                        k += 1
-                    self.handle_data(rawdata[i:k])
+                        raise AssertionError("we should not get here!")
+                    k = n
                 i = self.updatepos(i, k)
             elif startswith("&#", i):
                 match = charref.match(rawdata, i)

diff --git a/Lib/base64.py b/Lib/base64.py
@@ -57,18 +57,37 @@ def b64encode(s, altchars=None):
     return encoded
 
 
-def b64decode(s, altchars=None):
+def b64decode(s, altchars=None, validate=False):
     """Decode a Base64 encoded string.
 
     s is the string to decode.  Optional altchars must be a string of at least
     length 2 (additional characters are ignored) which specifies the
     alternative alphabet used instead of the '+' and '/' characters.
 
     The decoded string is returned.  A TypeError is raised if s is
-    incorrectly padded.  Characters that are neither in the normal base-64
-    alphabet nor the alternative alphabet are discarded prior to the padding
-    check.
+    incorrectly padded.
+
+    If validate is False (the default), characters that are neither in the
+    normal base-64 alphabet nor the alternative alphabet are discarded prior
+    to the padding check.  If validate is True, these non-alphabet characters
+    in the input result in a binascii.Error.
+
+    Unlike upstream (which only deprecates the lenient behaviour), validation
+    here checks the input against the *requested* alphabet, so the standard
+    '+'/'/' characters are rejected when an alternative alphabet is given
+    (CVE-2025-12781), and any data after the padding is rejected rather than
+    silently ignored (CVE-2026-3446).
     """
+    if validate:
+        if altchars is not None:
+            extra = altchars[:2]
+        else:
+            extra = b'+/'
+        valid = frozenset(string.ascii_letters + string.digits + extra)
+        stripped = s.rstrip(b'=')
+        npad = len(s) - len(stripped)
+        if npad > 2 or not all(c in valid for c in stripped):
+            raise binascii.Error('Non-base64 digit found')
     if altchars is not None:
         s = s.translate(string.maketrans(altchars[:2], '+/'))
     try:

diff --git a/Lib/email/errors.py b/Lib/email/errors.py
@@ -30,6 +30,10 @@ class CharsetError(MessageError):
     """An illegal charset was given."""
 
 
+class HeaderWriteError(MessageError):
+    """Error while writing headers."""
+
+
 
 # These are parsing defects which the parser was able to work around.
 class MessageDefect:

diff --git a/Lib/email/generator.py b/Lib/email/generator.py
@@ -13,6 +13,12 @@
 
 from cStringIO import StringIO
 from email.header import Header
+from email.errors import HeaderWriteError
+
+# Matches a CR/LF that is NOT part of a valid header folding (i.e. not
+# immediately followed by folding whitespace).  Used to detect injected
+# newlines in generated headers (CVE-2024-6923).
+NEWLINE_WITHOUT_FWSP = re.compile(r'\r\n[^ \t]|\r[^ \n\t]|\n[^ \t]')
 
 UNDERSCORE = '_'
 NL = '\n'
@@ -139,29 +145,35 @@ def _dispatch(self, msg):
 
     def _write_headers(self, msg):
         for h, v in msg.items():
-            print >> self._fp, '%s:' % h,
             if self._maxheaderlen == 0:
                 # Explicit no-wrapping
-                print >> self._fp, v
+                value = v
             elif isinstance(v, Header):
                 # Header instances know what to do
-                print >> self._fp, v.encode()
+                value = v.encode()
             elif _is8bitstring(v):
                 # If we have raw 8bit data in a byte string, we have no idea
                 # what the encoding is.  There is no safe way to split this
                 # string.  If it's ascii-subset, then we could do a normal
                 # ascii split, but if it's multibyte then we could break the
                 # string.  There's no way to know so the least harm seems to
                 # be to not split the string and risk it being too long.
-                print >> self._fp, v
+                value = v
             else:
                 # Header's got lots of smarts, so use it.  Note that this is
                 # fundamentally broken though because we lose idempotency when
                 # the header string is continued with tabs.  It will now be
                 # continued with spaces.  This was reversedly broken before we
                 # fixed bug 1974.  Either way, we lose.
-                print >> self._fp, Header(
+                value = Header(
                     v, maxlinelen=self._maxheaderlen, header_name=h).encode()
+            # Reject headers that contain an injected newline, i.e. a CR/LF
+            # that is not part of valid header folding (CVE-2024-6923).
+            folded = '%s: %s' % (h, value)
+            if NEWLINE_WITHOUT_FWSP.search(folded):
+                raise HeaderWriteError(
+                    "header value contains an unexpected newline: %r" % (folded,))
+            print >> self._fp, folded
         # A blank line always separates headers from body
         print >> self._fp
 

diff --git a/Lib/email/test/test_email.py b/Lib/email/test/test_email.py
@@ -77,6 +77,26 @@ def _msgobj(self, filename):
 
 # Test various aspects of the Message class's API
 class TestMessageAPI(TestEmailBase):
+    def test_string_rejects_header_injection(self):
+        # CVE-2024-6923: generating a message must reject header values that
+        # contain an injected newline (one not part of valid folding).  The
+        # no-wrap path writes the value verbatim, which deterministically
+        # exercises the check.
+        import email.errors
+        from email.generator import Generator
+        from cStringIO import StringIO
+        for bad in ('value\r\nInjected: header',
+                    'value\nInjected: header',
+                    'value\rstuff'):
+            msg = Message()
+            msg['Subject'] = bad
+            g = Generator(StringIO(), maxheaderlen=0)
+            self.assertRaises(email.errors.HeaderWriteError, g.flatten, msg)
+        # A normal header is still emitted fine.
+        msg = Message()
+        msg['Subject'] = 'a normal subject that is reasonably short'
+        self.assertIn('Subject: a normal subject', msg.as_string())
+
     def test_get_all(self):
         eq = self.assertEqual
         msg = self._msgobj('msg_20.txt')
@@ -2320,6 +2340,22 @@ def test_parseaddr_multiple_domains(self):
             ('', '')
         )
 
+    def test_parseaddr_unicode(self):
+        """Test parseaddr with unicode strings"""
+
+        test_cases = [
+            u'user@example.com',
+            u'Test User <user@example.com>',
+            u'"Test User" <user@example.com>',
+        ]
+
+        for addr in test_cases:
+            result = Utils.parseaddr(addr, strict=True)
+            self.assertNotEqual(result, ('', ''))
+
+            result_non_strict = Utils.parseaddr(addr, strict=False)
+            self.assertEqual(result, result_non_strict)
+
     def test_noquote_dump(self):
         self.assertEqual(
             Utils.formataddr(('A Silly Person', 'person@dom.ain')),
@@ -2417,9 +2453,11 @@ def test_getaddresses(self):
     def test_getaddresses_nasty(self):
         eq = self.assertEqual
         eq(Utils.getaddresses(['foo: ;']), [('', '')])
-        eq(Utils.getaddresses(
-           ['[]*-- =~$']),
-           [('', ''), ('', ''), ('', '*--')])
+        addresses = ['[]*-- =~$']
+        eq(Utils.getaddresses(addresses),
+            [('', '')])
+        eq(Utils.getaddresses(addresses, strict=False),
+            [('', ''), ('', ''), ('', '*--')])
         eq(Utils.getaddresses(
            ['foo: ;', '"Jason R. Mastaler" <jason@dom.ain>']),
            [('', ''), ('Jason R. Mastaler', 'jason@dom.ain')])
@@ -2430,6 +2468,20 @@ def test_getaddresses_embedded_comment(self):
         addrs = Utils.getaddresses(['User ((nested comment)) <foo@bar.com>'])
         eq(addrs[0][1], 'foo@bar.com')
 
+    def test_getaddresses_unicode(self):
+        """Test getaddresses with unicode strings in Python 2"""
+
+        test_cases = [
+            ([u'user@example.com'], [('', 'user@example.com')]),
+            ([u'Test User <user@example.com>'], [('Test User', 'user@example.com')]),
+            ([u'"Test User" <user@example.com>'], [('Test User', 'user@example.com')]),
+            ([u'user1@example.com', u'user2@example.com'], [('', 'user1@example.com'), ('', 'user2@example.com')]),
+        ]
+
+        for addrs, expected in test_cases:
+            result = Utils.getaddresses(addrs)
+            self.assertEqual(result, expected)
+
     def test_make_msgid_collisions(self):
         # Test make_msgid uniqueness, even with multiple threads
         class MsgidsThread(Thread):