diff --git a/.gitignore b/.gitignore
index 8f3a2d133b5e35..eac0ca753f8357 100644
--- a/.gitignore
+++ b/.gitignore
@@ -87,3 +87,4 @@ coverage/
externals/
htmlcov/
gmon.out
+.aider*
diff --git a/Doc/library/email.utils.rst b/Doc/library/email.utils.rst
index 9b583c05af35db..b05351db1a4150 100644
--- a/Doc/library/email.utils.rst
+++ b/Doc/library/email.utils.rst
@@ -21,13 +21,18 @@ There are several useful utilities provided in the :mod:`email.utils` module:
begins with angle brackets, they are stripped off.
-.. function:: parseaddr(address)
+.. function:: parseaddr(address, strict=True)
Parse address -- which should be the value of some address-containing field such
as :mailheader:`To` or :mailheader:`Cc` -- into its constituent *realname* and
*email address* parts. Returns a tuple of that information, unless the parse
fails, in which case a 2-tuple of ``('', '')`` is returned.
+ If *strict* is true, use a strict parser which rejects malformed inputs.
+
+ .. versionchanged:: 2.7.18.12
+ Add *strict* optional parameter and reject malformed inputs by default.
+
.. function:: formataddr(pair)
@@ -37,7 +42,7 @@ There are several useful utilities provided in the :mod:`email.utils` module:
second element is returned unmodified.
-.. function:: getaddresses(fieldvalues)
+.. function:: getaddresses(fieldvalues, strict=True)
This method returns a list of 2-tuples of the form returned by ``parseaddr()``.
*fieldvalues* is a sequence of header field values as might be returned by
@@ -52,6 +57,9 @@ There are several useful utilities provided in the :mod:`email.utils` module:
resent_ccs = msg.get_all('resent-cc', [])
all_recipients = getaddresses(tos + ccs + resent_tos + resent_ccs)
+ .. versionchanged:: 2.7.18.12
+ Add *strict* optional parameter and reject malformed inputs by default.
+
.. function:: parsedate(date)
diff --git a/Doc/whatsnew/2.7.rst b/Doc/whatsnew/2.7.rst
index 71d410bcd1fbf0..cbadece56787e4 100644
--- a/Doc/whatsnew/2.7.rst
+++ b/Doc/whatsnew/2.7.rst
@@ -2793,3 +2793,20 @@ The author would like to thank the following people for offering
suggestions, corrections and assistance with various drafts of this
article: Nick Coghlan, Philip Jenvey, Ryan Lovett, R. David Murray,
Hugh Secker-Walker.
+
+
+Notable changes in 3.8.20
+=========================
+
+email
+-----
+
+* :func:`email.utils.getaddresses` and :func:`email.utils.parseaddr` now return
+ ``('', '')`` 2-tuples in more situations where invalid email addresses are
+ encountered, instead of potentially inaccurate values.
+ An optional *strict* parameter was added to these two functions:
+ use ``strict=False`` to get the old behavior, accepting malformed inputs.
+ ``getattr(email.utils, 'supports_strict_parsing', False)`` can be used to
+ check if the *strict* paramater is available.
+ (Contributed by Thomas Dwyer and Victor Stinner for :gh:`102988` to improve
+ the CVE-2023-27043 fix.)
diff --git a/Include/patchlevel.h b/Include/patchlevel.h
index 9a0c45e13d9bd0..bd7ff936aa8a45 100644
--- a/Include/patchlevel.h
+++ b/Include/patchlevel.h
@@ -27,7 +27,7 @@
#define PY_RELEASE_SERIAL 0
/* Version as a string */
-#define PY_VERSION "2.7.18.10"
+#define PY_VERSION "2.7.18.14"
/*--end constants--*/
/* Subversion Revision number of this file (not of the repository). Empty
diff --git a/Lib/Cookie.py b/Lib/Cookie.py
index a6ba4a92ed7227..c5df671f811fd1 100644
--- a/Lib/Cookie.py
+++ b/Lib/Cookie.py
@@ -92,13 +92,14 @@
'Set-Cookie: chips=ahoy\r\nSet-Cookie: vienna=finger'
The load() method is darn-tootin smart about identifying cookies
-within a string. Escaped quotation marks, nested semicolons, and other
-such trickeries do not confuse it.
+within a string. Escaped quotation marks and nested semicolons do not
+confuse it. (Note that cookies whose values contain control characters
+are now rejected to prevent Set-Cookie header injection; CVE-2026-0672.)
>>> C = Cookie.SmartCookie()
- >>> C.load('keebler="E=everybody; L=\\"Loves\\"; fudge=\\012;";')
+ >>> C.load('keebler="E=everybody; L=\\"Loves\\"; fudge=delicious;";')
>>> print C
- Set-Cookie: keebler="E=everybody; L=\"Loves\"; fudge=\012;"
+ Set-Cookie: keebler="E=everybody; L=\"Loves\"; fudge=delicious;"
Each element of the Cookie also supports all of the RFC 2109
Cookie attributes. Here's an example which sets the Path
@@ -242,6 +243,15 @@ class CookieError(Exception):
# _Translator hash-table for fast quoting
#
_LegalChars = string.ascii_letters + string.digits + "!#$%&'*+-.^_`|~"
+_control_character_re = re.compile(r'[\x00-\x1f\x7f]')
+
+def _has_control_character(*values):
+ """Return True if any of the given string values holds a control char."""
+ for v in values:
+ if isinstance(v, basestring) and _control_character_re.search(v):
+ return True
+ return False
+
_Translator = {
'\000' : '\\000', '\001' : '\\001', '\002' : '\\002',
'\003' : '\\003', '\004' : '\\004', '\005' : '\\005',
@@ -424,6 +434,8 @@ def __setitem__(self, K, V):
K = K.lower()
if not K in self._reserved:
raise CookieError("Invalid Attribute %s" % K)
+ if _has_control_character(K, V):
+ raise CookieError("Control characters are not allowed in cookies: %r %r" % (K, V))
dict.__setitem__(self, K, V)
# end __setitem__
@@ -440,6 +452,9 @@ def set(self, key, val, coded_val,
raise CookieError("Attempt to set a reserved key: %s" % key)
if "" != translate(key, idmap, LegalChars):
raise CookieError("Illegal key value: %s" % key)
+ if _has_control_character(key, val, coded_val):
+ raise CookieError("Control characters are not allowed in cookies: %r %r %r"
+ % (key, val, coded_val))
# It's a good key, so save it.
self.key = key
diff --git a/Lib/HTMLParser.py b/Lib/HTMLParser.py
index fb9380e128bfc7..85b980750e32ba 100644
--- a/Lib/HTMLParser.py
+++ b/Lib/HTMLParser.py
@@ -20,6 +20,7 @@
charref = re.compile('(?:[0-9]+|[xX][0-9a-fA-F]+)[^0-9a-fA-F]')
starttagopen = re.compile('<[a-zA-Z]')
+endtagopen = re.compile('[a-zA-Z]')
piclose = re.compile('>')
commentclose = re.compile(r'--\s*>')
@@ -167,7 +168,7 @@ def goahead(self, end):
k = self.parse_pi(i)
elif startswith("', i + 1)
- if k < 0:
- k = rawdata.find('<', i + 1)
- if k < 0:
- k = i + 1
+ # End of input with an unterminated construct. Close it
+ # per HTML5 instead of rescanning, which made repeated
+ # incomplete constructs quadratic (CVE-2025-6069).
+ if starttagopen.match(rawdata, i): # < + letter
+ pass
+ elif startswith("", i):
+ if i + 2 == n:
+ self.handle_data("")
+ elif endtagopen.match(rawdata, i): # + letter
+ pass
+ else:
+ # bogus comment
+ self.handle_comment(rawdata[i+2:])
+ elif startswith("', [('comment', 'foo')]),
+ ]
+ for html, expected in data:
+ self._run_check(html, expected)
+
+ def test_eof_in_declarations(self):
+ # CVE-2025-6069: unterminated declarations at EOF are closed.
+ data = [
+ ('Comment \xe7a va ? Tr\xe8s bien ?')
+ self.assertRaises(ExpatError, parseString,
+ 'not well-formed (invalid token)')
doc.unlink()
diff --git a/Lib/test/test_ntpath.py b/Lib/test/test_ntpath.py
index b9a4c906defc26..11e9624de60057 100644
--- a/Lib/test/test_ntpath.py
+++ b/Lib/test/test_ntpath.py
@@ -182,6 +182,17 @@ def test_normpath(self):
tester("ntpath.normpath('\\\\.\\NUL')", r'\\.\NUL')
tester("ntpath.normpath('\\\\?\\D:/XY\\Z')", r'\\?\D:/XY\Z')
+ def test_expandvars_many(self):
+ # CVE-2025-6075: many substitutions must expand correctly and in
+ # linear time (the result is built by a single regex pass).
+ with test_support.EnvironmentVarGuard() as env:
+ env.clear()
+ env["foo"] = "bar"
+ self.assertEqual(ntpath.expandvars("%foo%" * 1000), "bar" * 1000)
+ self.assertEqual(ntpath.expandvars("$foo " * 1000), "bar " * 1000)
+ self.assertEqual(ntpath.expandvars("a" * 100000 + "%foo%"),
+ "a" * 100000 + "bar")
+
def test_expandvars(self):
with test_support.EnvironmentVarGuard() as env:
env.clear()
diff --git a/Lib/test/test_poplib.py b/Lib/test/test_poplib.py
index d2143759ba6652..bbde7a146e6dab 100644
--- a/Lib/test/test_poplib.py
+++ b/Lib/test/test_poplib.py
@@ -156,6 +156,14 @@ def handle_error(self):
class TestPOP3Class(TestCase):
+ def test_putline_rejects_control_characters(self):
+ # CVE-2025-15367: control characters (e.g. CR/LF) in a command line
+ # must be rejected to prevent POP3 command injection.
+ self.assertRaises(poplib.error_proto, self.client._putline,
+ 'USER guido\r\nDELE 1')
+ self.assertRaises(poplib.error_proto, self.client._putline,
+ 'PASS secret\x00')
+
def assertOK(self, resp):
self.assertTrue(resp.startswith("+OK"))
diff --git a/Lib/test/test_posixpath.py b/Lib/test/test_posixpath.py
index 18ea2e42eadeac..4a3707f91ffc84 100644
--- a/Lib/test/test_posixpath.py
+++ b/Lib/test/test_posixpath.py
@@ -496,6 +496,22 @@ def test_relpath(self):
finally:
os.getcwd = real_getcwd
+ @unittest.skipUnless(test_support.FS_NONASCII, 'need test_support.FS_NONASCII')
+ def test_expandvars_many(self):
+ # CVE-2025-6075: many substitutions must expand correctly (and in
+ # linear time -- the result is built once, not rebuilt per match).
+ with test_support.EnvironmentVarGuard() as env:
+ env.clear()
+ env['FOO'] = 'bar'
+ self.assertEqual(posixpath.expandvars('$FOO' * 1000), 'bar' * 1000)
+ self.assertEqual(posixpath.expandvars('$NOPE' * 1000),
+ '$NOPE' * 1000)
+ self.assertEqual(posixpath.expandvars('${FOO}x' * 100),
+ 'barx' * 100)
+ # A long literal prefix followed by a '$' must not be quadratic.
+ big = 'a' * 100000 + '$FOO'
+ self.assertEqual(posixpath.expandvars(big), 'a' * 100000 + 'bar')
+
@unittest.skipUnless(test_support.FS_NONASCII, 'need test_support.FS_NONASCII')
def test_expandvars_nonascii_word(self):
encoding = sys.getfilesystemencoding()
diff --git a/Lib/test/test_socket.py b/Lib/test/test_socket.py
index 2c1b48c698ae99..56a17472c880ad 100644
--- a/Lib/test/test_socket.py
+++ b/Lib/test/test_socket.py
@@ -55,6 +55,17 @@ def _is_fd_in_blocking_mode(sock):
fcntl.fcntl(sock, fcntl.F_GETFL, os.O_NONBLOCK) & os.O_NONBLOCK)
+def _have_socket_can():
+ """Check whether CAN sockets are supported on this host."""
+ try:
+ s = socket.socket(socket.PF_CAN, socket.SOCK_RAW, socket.CAN_RAW)
+ except (AttributeError, socket.error):
+ return False
+ else:
+ s.close()
+ return True
+
+
HAVE_SOCKET_CAN = _have_socket_can()
class SocketTCPTest(unittest.TestCase):
diff --git a/Lib/test/test_tarfile.py b/Lib/test/test_tarfile.py
index b7ff47f783e72e..fe66218eaa826f 100644
--- a/Lib/test/test_tarfile.py
+++ b/Lib/test/test_tarfile.py
@@ -1,5 +1,6 @@
import sys
import os
+import io
import shutil
import StringIO
from binascii import unhexlify
@@ -12,6 +13,7 @@
from test import test_support
from test import test_support as support
+from test import symlink_support
# Check for our compression modules.
try:
@@ -27,11 +29,14 @@
def md5sum(data):
return md5(data).hexdigest()
-TEMPDIR = os.path.abspath(test_support.TESTFN)
-tarname = test_support.findfile("testtar.tar")
+TEMPDIR = os.path.abspath(support.TESTFN) + "-tardir"
+tarextdir = TEMPDIR + '-extract-test'
+tarname = support.findfile("testtar.tar")
gzipname = os.path.join(TEMPDIR, "testtar.tar.gz")
bz2name = os.path.join(TEMPDIR, "testtar.tar.bz2")
+xzname = os.path.join(TEMPDIR, "testtar.tar.xz")
tmpname = os.path.join(TEMPDIR, "tmp.tar")
+dotlessname = os.path.join(TEMPDIR, "testtar")
md5_regtype = "65f477c818ad9e15f7feab0c6d37742f"
md5_sparse = "a54fbc4ca4f4399a90e1b27164012fc6"
@@ -135,6 +140,18 @@ def test_fileobj_seek(self):
"read() after readline() failed")
fobj.close()
+ def test_fileobj_text(self):
+ with self.tar.extractfile("ustar/regtype") as fobj:
+ # fobj = io.TextIOWrapper(fobj)
+ data = fobj.read().encode("iso8859-1")
+ self.assertEqual(md5sum(data), md5_regtype)
+ try:
+ fobj.seek(100)
+ except AttributeError:
+ # Issue #13815: seek() complained about a missing
+ # flush() method.
+ self.fail("seeking failed in text mode")
+
# Test if symbolic and hard links are resolved by extractfile(). The
# test link members each point to a regular member whose data is
# supposed to be exported.
@@ -220,6 +237,17 @@ def test_list_verbose(self):
self.assertIn('pax' + ('/123' * 125) + '/longlink link to pax' +
('/123' * 125) + '/longname', out)
+ def test_list_members(self):
+ tio = io.BufferedRandom(io.BytesIO())
+ def members(tar):
+ for tarinfo in tar.getmembers():
+ if 'reg' in tarinfo.name:
+ yield tarinfo
+ with support.swap_attr(sys, 'stdout', tio):
+ self.tar.list(verbose=False, members=members(self.tar))
+ out = tio.detach().getvalue()
+ self.assertIn(b'ustar/regtype', out)
+ self.assertNotIn(b'ustar/conttype', out)
class GzipListTest(ListTest):
tarname = gzipname
@@ -252,6 +280,12 @@ def test_empty_tarfile(self):
finally:
tar.close()
+ def test_non_existent_tarfile(self):
+ # Test for issue11513: prevent non-existent gzipped tarfiles raising
+ # multiple exceptions.
+ with self.assertRaisesRegex(FileNotFoundError, "xxx"):
+ tarfile.open("xxx", self.mode)
+
def test_null_tarfile(self):
# Test for issue6123: Allow opening empty archives.
# This test guarantees that tarfile.open() does not treat an empty
@@ -440,6 +474,9 @@ def test_find_members(self):
self.assertTrue(self.tar.getmembers()[-1].name == "misc/eof",
"could not find all members")
+ @unittest.skipUnless(hasattr(os, "link"),
+ "Missing hardlink implementation")
+ @symlink_support.skip_unless_symlink
def test_extract_hardlink(self):
# Test hardlink extraction (e.g. bug #857297).
with tarfile.open(tarname, errorlevel=1, encoding="iso8859-1") as tar:
@@ -474,6 +511,43 @@ def test_extractall(self):
finally:
tar.close()
+ def test_extract_directory(self):
+ dirtype = "ustar/dirtype"
+ DIR = os.path.join(TEMPDIR, "extractdir")
+ os.mkdir(DIR)
+ try:
+ with tarfile.open(tarname, encoding="iso8859-1") as tar:
+ tarinfo = tar.getmember(dirtype)
+ tar.extract(tarinfo, path=DIR)
+ extracted = os.path.join(DIR, dirtype)
+ self.assertEqual(os.path.getmtime(extracted), tarinfo.mtime)
+ if sys.platform != "win32":
+ self.assertEqual(os.stat(extracted).st_mode & 0o777, 0o755)
+ finally:
+ support.rmtree(DIR)
+
+ #INFO: Pathlib doesn't exist on Python2
+ # def test_extractall_pathlike_name(self):
+ # DIR = pathlib.Path(TEMPDIR) / "extractall"
+ # with support.temp_dir(DIR), \
+ # tarfile.open(tarname, encoding="iso8859-1") as tar:
+ # directories = [t for t in tar if t.isdir()]
+ # tar.extractall(DIR, directories)
+ # for tarinfo in directories:
+ # path = DIR / tarinfo.name
+ # self.assertEqual(os.path.getmtime(path), tarinfo.mtime)
+
+ #INFO: Pathlib doesn't exist on Python2
+ # def test_extract_pathlike_name(self):
+ # dirtype = "ustar/dirtype"
+ # DIR = pathlib.Path(TEMPDIR) / "extractall"
+ # with support.temp_dir(DIR), \
+ # tarfile.open(tarname, encoding="iso8859-1") as tar:
+ # tarinfo = tar.getmember(dirtype)
+ # tar.extract(tarinfo, path=DIR)
+ # extracted = DIR / dirtype
+ # self.assertEqual(os.path.getmtime(extracted), tarinfo.mtime)
+
def test_init_close_fobj(self):
# Issue #7341: Close the internal file object in the TarFile
# constructor in case of an error. For the test we rely on
@@ -544,8 +618,18 @@ def test_compare_members(self):
tar1.close()
-class DetectReadTest(unittest.TestCase):
+class TarTest:
+ tarname = tarname
+ suffix = ''
+ open = io.FileIO
+ taropen = tarfile.TarFile.taropen
+
+ @property
+ def mode(self):
+ return self.prefix + self.suffix
+
+class DetectReadTest(TarTest, unittest.TestCase):
def _testfunc_file(self, name, mode):
try:
tar = tarfile.open(name, mode)
@@ -673,6 +757,10 @@ def test_find_sparse(self):
tarinfo = self.tar.getmember("ustar/sparse")
self._test_member(tarinfo, size=86016, chksum=md5_sparse)
+ def test_find_gnusparse(self):
+ tarinfo = self.tar.getmember("gnu/sparse")
+ self._test_member(tarinfo, size=86016, chksum=md5_sparse)
+
def test_find_umlauts(self):
tarinfo = self.tar.getmember("ustar/umlauts-\xc4\xd6\xdc\xe4\xf6\xfc\xdf")
self._test_member(tarinfo, size=7011, chksum=md5_regtype)
@@ -785,6 +873,67 @@ def test_pax_number_fields(self):
finally:
tar.close()
+ def test_pax_header_bad_formats(self):
+ # The fields from the pax header have priority over the
+ # TarInfo.
+ pax_header_replacements = (
+ b" foo=bar\n",
+ b"0 \n",
+ b"1 \n",
+ b"2 \n",
+ b"3 =\n",
+ b"4 =a\n",
+ b"1000000 foo=bar\n",
+ b"0 foo=bar\n",
+ b"-12 foo=bar\n",
+ b"000000000000000000000000036 foo=bar\n",
+ )
+ pax_headers = {"foo": "bar"}
+ for replacement in pax_header_replacements:
+ tar = tarfile.open(tmpname, "w", format=tarfile.PAX_FORMAT,
+ encoding="iso8859-1")
+ try:
+ t = tarfile.TarInfo()
+ t.name = "pax" # non-ASCII
+ t.uid = 1
+ t.pax_headers = pax_headers
+ tar.addfile(t)
+ finally:
+ tar.close()
+ with open(tmpname, "rb") as f:
+ data = f.read()
+ self.assertIn(b"11 foo=bar\n", data)
+ data = data.replace(b"11 foo=bar\n", replacement)
+ with open(tmpname, "wb") as f:
+ f.truncate()
+ f.write(data)
+ with self.assertRaisesRegexp(tarfile.ReadError, r"file could not be opened successfully"):
+ tarfile.open(tmpname, encoding="iso8859-1")
+
+ def test_pax_header_negative_size(self):
+ # A pax header with a negative "size" must be rejected rather than
+ # producing a negative member offset (CVE-2025-8194, gh-130577).
+ tar = tarfile.open(tmpname, "w", format=tarfile.PAX_FORMAT,
+ encoding="iso8859-1")
+ try:
+ t = tarfile.TarInfo()
+ t.name = "pax"
+ t.uid = 1
+ t.pax_headers = {"foo": "bar"}
+ tar.addfile(t)
+ finally:
+ tar.close()
+ with open(tmpname, "rb") as f:
+ data = f.read()
+ self.assertIn(b"11 foo=bar\n", data)
+ # "13 size=-512\n" -- record length (13) includes itself and the newline
+ data = data.replace(b"11 foo=bar\n", b"13 size=-512\n")
+ with open(tmpname, "wb") as f:
+ f.truncate()
+ f.write(data)
+ with self.assertRaisesRegexp(tarfile.ReadError, r"file could not be opened successfully"):
+ tarfile.open(tmpname, encoding="iso8859-1")
+
class WriteTestBase(unittest.TestCase):
# Put all write tests in here that are supposed to be tested
@@ -803,6 +952,16 @@ def test_fileobj_no_close(self):
self.assertFalse(fobj.closed)
self.assertEqual(data, fobj.getvalue())
+ def test_eof_marker(self):
+ # Make sure an end of archive marker is written (two zero blocks).
+ # tarfile insists on aligning archives to a 20 * 512 byte recordsize.
+ # So, we create an archive that has exactly 10240 bytes without the
+ # marker, and has 20480 bytes once the marker is written.
+ with tarfile.open(tmpname, self.mode) as tar:
+ t = tarfile.TarInfo("foo")
+ t.size = tarfile.RECORDSIZE - tarfile.BLOCKSIZE
+ tar.addfile(t, io.BytesIO(b"a" * t.size))
+
class WriteTest(WriteTestBase):
@@ -872,6 +1031,20 @@ def test_directory_size(self):
finally:
os.rmdir(path)
+
+ #INFO: We don't have pathlib on Python2, not sure if we can really test this
+ # def test_gettarinfo_pathlike_name(self):
+ # with tarfile.open(tmpname, self.mode) as tar:
+ # path = pathlib.Path(TEMPDIR) / "file"
+ # with open(path, "wb") as fobj:
+ # fobj.write(b"aaa")
+ # tarinfo = tar.gettarinfo(path)
+ # tarinfo2 = tar.gettarinfo(os.fspath(path))
+ # self.assertIsInstance(tarinfo.name, str)
+ # self.assertEqual(tarinfo.name, tarinfo2.name)
+ # self.assertEqual(tarinfo.size, 3)
+
+ @unittest.skipUnless(hasattr(os, "link"),"Missing hardlink implementation")
def test_link_size(self):
if hasattr(os, "link"):
link = os.path.join(TEMPDIR, "link")
@@ -892,6 +1065,7 @@ def test_link_size(self):
os.remove(target)
os.remove(link)
+ @symlink_support.skip_unless_symlink
def test_symlink_size(self):
if hasattr(os, "symlink"):
path = os.path.join(TEMPDIR, "symlink")
@@ -971,6 +1145,11 @@ def filter(tarinfo):
finally:
tar.close()
+ #FIX: Not sure how to test this on Python2 ATM
+ # # Verify that filter is a keyword-only argument
+ # with self.assertRaises(TypeError):
+ # tar.add(tempdir, "empty_dir", True, None, filter)
+
tar = tarfile.open(tmpname, "r")
try:
for tarinfo in tar:
@@ -1014,6 +1193,36 @@ def _test_pathname(self, path, cmp_path=None, dir=False):
self.assertEqual(t.name, cmp_path or path.replace(os.sep, "/"))
+
+ @symlink_support.skip_unless_symlink
+ def test_extractall_symlinks(self):
+ # Test if extractall works properly when tarfile contains symlinks
+ tempdir = os.path.join(TEMPDIR, "testsymlinks")
+ temparchive = os.path.join(TEMPDIR, "testsymlinks.tar")
+ os.mkdir(tempdir)
+ try:
+ source_file = os.path.join(tempdir,'source')
+ target_file = os.path.join(tempdir,'symlink')
+ with open(source_file,'w') as f:
+ f.write('something\n')
+ os.symlink(source_file, target_file)
+ tar = tarfile.open(temparchive,'w')
+ tar.add(source_file)
+ tar.add(target_file)
+ tar.close()
+ # Let's extract it to the location which contains the symlink
+ tar = tarfile.open(temparchive,'r')
+ # this should not raise OSError: [Errno 17] File exists
+ try:
+ tar.extractall(path=tempdir)
+ except OSError:
+ self.fail("extractall failed with symlinked files")
+ finally:
+ tar.close()
+ finally:
+ support.unlink(temparchive)
+ support.rmtree(tempdir)
+
def test_pathnames(self):
self._test_pathname("foo")
self._test_pathname(os.path.join("foo", ".", "bar"))
@@ -1294,6 +1503,105 @@ def test_longnamelink_1025(self):
("longlnk/" * 127) + "longlink_")
+class CreateTest(WriteTestBase, unittest.TestCase):
+
+ prefix = "x:"
+
+ file_path = os.path.join(TEMPDIR, "spameggs42")
+
+ def setUp(self):
+ support.unlink(tmpname)
+
+ @classmethod
+ def setUpClass(cls):
+ with open(cls.file_path, "wb") as fobj:
+ fobj.write(b"aaa")
+
+ @classmethod
+ def tearDownClass(cls):
+ support.unlink(cls.file_path)
+
+ def test_create(self):
+ with tarfile.open(tmpname, self.mode) as tobj:
+ tobj.add(self.file_path)
+
+ with self.taropen(tmpname) as tobj:
+ names = tobj.getnames()
+ self.assertEqual(len(names), 1)
+ self.assertIn('spameggs42', names[0])
+
+ def test_create_existing(self):
+ with tarfile.open(tmpname, self.mode) as tobj:
+ tobj.add(self.file_path)
+
+ with self.assertRaises(FileExistsError):
+ tobj = tarfile.open(tmpname, self.mode)
+
+ with self.taropen(tmpname) as tobj:
+ names = tobj.getnames()
+ self.assertEqual(len(names), 1)
+ self.assertIn('spameggs42', names[0])
+
+ def test_create_taropen(self):
+ with self.taropen(tmpname, "x") as tobj:
+ tobj.add(self.file_path)
+
+ with self.taropen(tmpname) as tobj:
+ names = tobj.getnames()
+ self.assertEqual(len(names), 1)
+ self.assertIn('spameggs42', names[0])
+
+ def test_create_existing_taropen(self):
+ with self.taropen(tmpname, "x") as tobj:
+ tobj.add(self.file_path)
+
+ with self.assertRaises(FileExistsError):
+ with self.taropen(tmpname, "x"):
+ pass
+
+ with self.taropen(tmpname) as tobj:
+ names = tobj.getnames()
+ self.assertEqual(len(names), 1)
+ self.assertIn("spameggs42", names[0])
+
+ def test_create_pathlike_name(self):
+ with tarfile.open(pathlib.Path(tmpname), self.mode) as tobj:
+ self.assertIsInstance(tobj.name, str)
+ self.assertEqual(tobj.name, os.path.abspath(tmpname))
+ tobj.add(pathlib.Path(self.file_path))
+ names = tobj.getnames()
+ self.assertEqual(len(names), 1)
+ self.assertIn('spameggs42', names[0])
+
+ with self.taropen(tmpname) as tobj:
+ names = tobj.getnames()
+ self.assertEqual(len(names), 1)
+ self.assertIn('spameggs42', names[0])
+
+ def test_create_taropen_pathlike_name(self):
+ with self.taropen(pathlib.Path(tmpname), "x") as tobj:
+ self.assertIsInstance(tobj.name, str)
+ self.assertEqual(tobj.name, os.path.abspath(tmpname))
+ tobj.add(pathlib.Path(self.file_path))
+ names = tobj.getnames()
+ self.assertEqual(len(names), 1)
+ self.assertIn('spameggs42', names[0])
+
+ with self.taropen(tmpname) as tobj:
+ names = tobj.getnames()
+ self.assertEqual(len(names), 1)
+ self.assertIn('spameggs42', names[0])
+
+
+class CreateWithXModeTest(CreateTest):
+
+ prefix = "x"
+
+ test_create_taropen = None
+ test_create_existing_taropen = None
+
+
+@unittest.skipUnless(hasattr(os, "link"), "Missing hardlink implementation")
class HardlinkTest(unittest.TestCase):
# Test the creation of LNKTYPE (hardlink) members in an archive.
@@ -1536,6 +1844,30 @@ def test_error_handler_utf8(self):
errors="utf-8")
self.assertEqual(tar.getnames()[0], "\xe4\xf6\xfc/" + u"\u20ac".encode("utf8"))
+ # FIX: Came in on Python3.6
+ # Test the same as above for the 100 bytes link field.
+ def test_unicode_link1(self):
+ self._test_ustar_link("0123456789" * 10)
+ self._test_ustar_link("0123456789" * 10 + "0", ValueError)
+ # Use a two byte UTF-8 character
+ self._test_ustar_link("0123456789" * 9 + "01234567\303\251")
+ self._test_ustar_link("0123456789" * 9 + "012345678\303\251", ValueError)
+
+ def _test_ustar_link(self, name, exc=None):
+ with tarfile.open(tmpname, "w", format=0, encoding="utf-8") as tar:
+ t = tarfile.TarInfo("foo")
+ t.linkname = name
+ if exc is None:
+ tar.addfile(t)
+ else:
+ self.assertRaises(exc, tar.addfile, t)
+
+ if exc is None:
+ with tarfile.open(tmpname, "r", encoding="utf-8") as tar:
+ for t in tar:
+ self.assertEqual(name, t.linkname)
+ break
+
class AppendTest(unittest.TestCase):
# Test append mode (cp. patch #1652681).
@@ -1686,7 +2018,48 @@ def test_pax_limits(self):
class MiscTest(unittest.TestCase):
+ # Came in on Python3.6
+ def test_char_fields(self):
+ self.assertEqual(tarfile.stn("foo", 8),
+ b"foo\0\0\0\0\0")
+ self.assertEqual(tarfile.stn("foobar", 3),
+ b"foo")
+ self.assertEqual(tarfile.nts(b"foo\0\0\0\0\0"),
+ "foo")
+ self.assertEqual(tarfile.nts(b"foo\0bar\0"),
+ "foo")
+
+ def test_block_negative_count(self):
+ # gh-130577: _block() must reject negative byte counts instead of
+ # returning a negative (rounded) value that yields a backward offset.
+ tarinfo = tarfile.TarInfo("foo")
+ self.assertEqual(tarinfo._block(834), 1024)
+ self.assertEqual(tarinfo._block(0), 0)
+ for bad in (-1, -512, -(2 ** 71)):
+ self.assertRaises(tarfile.InvalidHeaderError, tarinfo._block, bad)
+
+ def test_aregtype_dircheck(self):
+ # CVE-2025-13462: an AREGTYPE header whose name ends in a slash is
+ # normalized to DIRTYPE for a primary header, but NOT for a follow-up
+ # header (e.g. a GNU long name/link or pax header).
+ t = tarfile.TarInfo("foo/")
+ t.type = tarfile.AREGTYPE
+ buf = t.tobuf()
+ self.assertEqual(tarfile.TarInfo._frombuf(buf, dircheck=True).type,
+ tarfile.DIRTYPE)
+ self.assertEqual(tarfile.TarInfo._frombuf(buf, dircheck=False).type,
+ tarfile.AREGTYPE)
+
def test_read_number_fields(self):
+ # Issue 13158: Test if GNU tar specific base-256 number fields
+ # are decoded correctly.
+ self.assertEqual(tarfile.nti(b"0000001\x00"), 1)
+ self.assertEqual(tarfile.nti(b"7777777\x00"), 0o7777777)
+ self.assertEqual(tarfile.nti(b"\x80\x00\x00\x00\x00\x20\x00\x00"),
+ 0o10000000)
+ self.assertEqual(tarfile.nti(b"\x80\x00\x00\x00\xff\xff\xff\xff"),
+ 0xffffffff)
+
# Issue 24514: Test if empty number fields are converted to zero.
self.assertEqual(tarfile.nti("\0"), 0)
self.assertEqual(tarfile.nti(" \0"), 0)
@@ -1760,15 +2133,24 @@ def _test_link_extraction(self, name):
data = open(os.path.join(TEMPDIR, name), "rb").read()
self.assertEqual(md5sum(data), md5_regtype)
+ # See issues #1578269, #8879, and #17689 for some history on these skips
+ @unittest.skipIf(hasattr(os.path, "islink"),
+ "Skip emulation - has os.path.islink but not os.link")
def test_hardlink_extraction1(self):
self._test_link_extraction("ustar/lnktype")
+ @unittest.skipIf(hasattr(os.path, "islink"),
+ "Skip emulation - has os.path.islink but not os.link")
def test_hardlink_extraction2(self):
self._test_link_extraction("./ustar/linktest2/lnktype")
+ @unittest.skipIf(hasattr(os, "symlink"),
+ "Skip emulation if symlink exists")
def test_symlink_extraction1(self):
self._test_link_extraction("ustar/symtype")
+ @unittest.skipIf(hasattr(os, "symlink"),
+ "Skip emulation if symlink exists")
def test_symlink_extraction2(self):
self._test_link_extraction("./ustar/linktest2/symtype")
@@ -1835,6 +2217,10 @@ def test_partial_input_bz2(self):
def test_main():
+ #NOTE:
+ # The tests are assuming a default system locale with ISO-8859-1, but that's not normal anymore
+ tarfile.ENCODING = "ISO-8859-1"
+
support.unlink(TEMPDIR)
os.makedirs(TEMPDIR)
@@ -1908,5 +2294,10 @@ def test_main():
if os.path.exists(TEMPDIR):
shutil.rmtree(TEMPDIR)
+#NOTE: Reset tarfile default encoding again after tests are done
+tarfile.ENCODING = sys.getfilesystemencoding()
+if tarfile.ENCODING is None:
+ tarfile.ENCODING = sys.getdefaultencoding()
+
if __name__ == "__main__":
test_main()
diff --git a/Lib/test/test_urlparse.py b/Lib/test/test_urlparse.py
index f924289041a7a1..9f17975f06e694 100644
--- a/Lib/test/test_urlparse.py
+++ b/Lib/test/test_urlparse.py
@@ -679,6 +679,26 @@ def test_urlsplit_strip_url(self):
self.assertEqual(p.scheme, "https")
self.assertEqual(p.geturl(), "https://www.python.org/")
+ def test_invalid_bracketed_host(self):
+ # CVE-2025-0938: square brackets must only enclose a valid
+ # IPv6/IPvFuture host, not appear elsewhere in the host/netloc.
+ invalid = [
+ "http://ex[ample].com/",
+ "http://[example.com]/",
+ "http://[1.2.3.4]/",
+ "http://[v1.x]extra/",
+ "http://[fe80::g]/",
+ "http://user[x]@example.com/",
+ ]
+ for url in invalid:
+ for parse in (urlparse.urlsplit, urlparse.urlparse):
+ self.assertRaises(ValueError, parse, url)
+ # Valid bracketed IPv6/IPvFuture hosts are still accepted.
+ for url in ("http://[::1]/", "http://[::1]:8080/path",
+ "http://[2001:db8::1]/", "http://[v1.fe80::1]/"):
+ urlparse.urlsplit(url)
+ urlparse.urlparse(url)
+
def test_attributes_bad_port_a(self):
"""Check handling of invalid ports."""
for bytes in (False, True):
diff --git a/Lib/test/test_webbrowser.py b/Lib/test/test_webbrowser.py
new file mode 100644
index 00000000000000..9c4fbd8a6668f9
--- /dev/null
+++ b/Lib/test/test_webbrowser.py
@@ -0,0 +1,51 @@
+import unittest
+import webbrowser
+
+from test import test_support
+
+
+class FakeUnixBrowser(webbrowser.UnixBrowser):
+ # Concrete UnixBrowser with string actions so open() can run far enough
+ # to perform URL validation without launching anything.
+ remote_args = ['%action', '%s']
+ remote_action = ""
+ remote_action_newwin = "-new-window"
+ remote_action_newtab = "-new-tab"
+
+
+class CheckURLTest(unittest.TestCase):
+ # gh-bpo: webbrowser.open() must not let an attacker-controlled URL be
+ # turned into a command-line option (CVE-2026-4519 / CVE-2026-4786).
+
+ def test_check_url_rejects_leading_dash(self):
+ for bad in ("-remote", "--incognito", " -leadingspace", "\t-tab"):
+ self.assertRaises(ValueError,
+ webbrowser.BaseBrowser._check_url, bad)
+
+ def test_check_url_allows_normal(self):
+ for ok in ("http://example.com", "https://x/-dash-inside", ""):
+ # Must not raise.
+ webbrowser.BaseBrowser._check_url(ok)
+
+ def test_generic_browser_rejects_dash_url(self):
+ browser = webbrowser.GenericBrowser(["true", "%s"])
+ self.assertRaises(ValueError, browser.open, "-dangerous")
+
+ def test_unix_browser_rejects_dash_url(self):
+ browser = FakeUnixBrowser("fakebrowser")
+ self.assertRaises(ValueError, browser.open, "-dangerous")
+
+ def test_unix_browser_rejects_action_bypass(self):
+ # The %action substitution must not be usable to smuggle a leading
+ # dash past the check (CVE-2026-4786). With new=1 the action expands
+ # to "-new-window", so a "%action" URL would become a bare flag.
+ browser = FakeUnixBrowser("fakebrowser")
+ self.assertRaises(ValueError, browser.open, "%action", 1)
+
+
+def test_main():
+ test_support.run_unittest(CheckURLTest)
+
+
+if __name__ == "__main__":
+ test_main()
diff --git a/Lib/test/test_wsgiref.py b/Lib/test/test_wsgiref.py
index 20129e7edc7ea1..817b505afc95aa 100644
--- a/Lib/test/test_wsgiref.py
+++ b/Lib/test/test_wsgiref.py
@@ -300,6 +300,19 @@ def testHopByHop(self):
class HeaderTests(TestCase):
+ def testControlCharactersRejected(self):
+ # CVE-2026-0865: control characters in header names/values must be
+ # rejected to prevent HTTP response splitting / header injection.
+ h = Headers([])
+ self.assertRaises(ValueError, h.__setitem__, 'Foo', 'bar\r\nInjected: 1')
+ self.assertRaises(ValueError, h.__setitem__, 'Ba\nd', 'value')
+ self.assertRaises(ValueError, h.add_header, 'Foo', 'a\nb')
+ self.assertRaises(ValueError, h.add_header, 'Foo', 'ok', baz='x\ry')
+ self.assertRaises(ValueError, Headers, [('Foo', 'a\nb')])
+ # Benign headers still work.
+ h['Foo'] = 'bar'
+ h.add_header('Content-Disposition', 'attachment', filename='ok.txt')
+
def testMappingInterface(self):
test = [('x','y')]
self.assertEqual(len(Headers([])),0)
diff --git a/Lib/test/test_xml_etree.py b/Lib/test/test_xml_etree.py
index c75d55f05c17cd..fed93f1d907f0b 100644
--- a/Lib/test/test_xml_etree.py
+++ b/Lib/test/test_xml_etree.py
@@ -1482,11 +1482,13 @@ def test_issue6233(self):
b"\n"
b'
tãg')
- def test_issue3151(self):
- e = ET.XML('')
- self.assertEqual(e.tag, '{${stuff}}localname')
- t = ET.ElementTree(e)
- self.assertEqual(ET.tostring(e), b'')
+ # This IRI being used (xmlns:prefix) isn't a valid IRI, and this test will never work,
+ # because Expat is now stricter
+ # def test_issue3151(self):
+ # e = ET.XML('')
+ # self.assertEqual(e.tag, '{${stuff}}localname')
+ # t = ET.ElementTree(e)
+ # self.assertEqual(ET.tostring(e), b'')
def test_issue6565(self):
elem = ET.XML("")
diff --git a/Lib/test/test_zipfile.py b/Lib/test/test_zipfile.py
index 4e545f140a835a..248842f59a39e6 100644
--- a/Lib/test/test_zipfile.py
+++ b/Lib/test/test_zipfile.py
@@ -940,6 +940,35 @@ class OtherTests(unittest.TestCase):
b'\x01\x003\x00\x00\x003\x00\x00\x00\x00\x00'),
}
+ def test_overlapping_entries_rejected(self):
+ # CVE-2024-0450: an entry whose compressed data overruns the start of
+ # the following entry (a "quoted overlap" zip bomb) must be rejected.
+ buf = StringIO()
+ with zipfile.ZipFile(buf, "w", zipfile.ZIP_STORED) as zf:
+ zf.writestr("a", "a" * 1000)
+ zf = zipfile.ZipFile(buf, "r")
+ try:
+ info = zf.getinfo("a")
+ # Sanity: a normal archive reads fine and has a known end offset.
+ self.assertEqual(zf.read("a"), "a" * 1000)
+ self.assertIsNotNone(info._end_offset)
+ # Simulate an overlap by shrinking the member's end boundary.
+ info._end_offset = info.header_offset + 1
+ self.assertRaises(zipfile.BadZipfile, zf.open, "a")
+ finally:
+ zf.close()
+
+ def test_zip64_locator_bad_offset_rejected(self):
+ # CVE-2025-8291: a ZIP64 end-of-central-directory locator whose
+ # relative offset points past the expected record must be rejected.
+ loc = struct.pack(zipfile.structEndArchive64Locator,
+ zipfile.stringEndArchive64Locator, 0, 10 ** 9, 1)
+ buf = ('\0' * zipfile.sizeEndCentDir64 + loc +
+ '\0' * zipfile.sizeEndCentDir)
+ fpin = StringIO(buf)
+ self.assertRaises(zipfile.BadZipfile, zipfile._EndRecData64,
+ fpin, -zipfile.sizeEndCentDir, [0] * 10)
+
def test_unicode_filenames(self):
with zipfile.ZipFile(TESTFN, "w") as zf:
zf.writestr(u"foo.txt", "Test for unicode filename")
diff --git a/Lib/urlparse.py b/Lib/urlparse.py
index 0f12940c3dc0ed..4e7e5e5d025791 100644
--- a/Lib/urlparse.py
+++ b/Lib/urlparse.py
@@ -198,6 +198,44 @@ def _checknetloc(netloc):
"under NFKC normalization"
% netloc)
+def _check_bracketed_host(hostname):
+ # Validate the content of a bracketed (IPv6 / IPvFuture) host. ipaddress
+ # is unavailable in Python 2, so IPv6 is validated via socket.inet_pton
+ # when present, with a conservative character fallback otherwise.
+ if hostname.startswith('v'):
+ if not re.match(r"\Av[a-fA-F0-9]+\..+\Z", hostname):
+ raise ValueError("IPvFuture address is invalid")
+ elif ':' not in hostname:
+ # A bare domain name or IPv4 address is not allowed in brackets.
+ raise ValueError("An IPv4 address cannot be in brackets")
+ else:
+ try:
+ import socket
+ socket.inet_pton(socket.AF_INET6, hostname)
+ except AttributeError:
+ # inet_pton may be missing (e.g. Windows under Python 2).
+ if not re.match(r"\A[0-9A-Fa-f:.]+\Z", hostname):
+ raise ValueError("Invalid IPv6 address")
+ except (ValueError, socket.error):
+ raise ValueError("Invalid IPv6 address")
+
+def _check_bracketed_netloc(netloc):
+ # Reject '[' / ']' that do not delimit a valid IPv6/IPvFuture host
+ # (CVE-2025-0938). This mirrors the splitting done in _hostinfo().
+ hostname_and_port = netloc.rpartition('@')[2]
+ before_bracket, have_open_br, bracketed = hostname_and_port.partition('[')
+ if have_open_br:
+ # No data is allowed before a bracket.
+ if before_bracket:
+ raise ValueError("Invalid IPv6 URL")
+ hostname, _, port = bracketed.partition(']')
+ # No data is allowed after the bracket but before the port delimiter.
+ if port and not port.startswith(":"):
+ raise ValueError("Invalid IPv6 URL")
+ else:
+ hostname, _, port = hostname_and_port.partition(':')
+ _check_bracketed_host(hostname)
+
def urlsplit(url, scheme='', allow_fragments=True):
"""Parse a URL into 5 components:
:///?#
@@ -231,6 +269,8 @@ def urlsplit(url, scheme='', allow_fragments=True):
if (('[' in netloc and ']' not in netloc) or
(']' in netloc and '[' not in netloc)):
raise ValueError("Invalid IPv6 URL")
+ if '[' in netloc and ']' in netloc:
+ _check_bracketed_netloc(netloc)
if allow_fragments and '#' in url:
url, fragment = url.split('#', 1)
if '?' in url:
@@ -258,6 +298,8 @@ def urlsplit(url, scheme='', allow_fragments=True):
if (('[' in netloc and ']' not in netloc) or
(']' in netloc and '[' not in netloc)):
raise ValueError("Invalid IPv6 URL")
+ if '[' in netloc and ']' in netloc:
+ _check_bracketed_netloc(netloc)
if allow_fragments and '#' in url:
url, fragment = url.split('#', 1)
if '?' in url:
diff --git a/Lib/webbrowser.py b/Lib/webbrowser.py
index 15eeb660e25831..fb8b848178a68e 100755
--- a/Lib/webbrowser.py
+++ b/Lib/webbrowser.py
@@ -144,6 +144,12 @@ def __init__(self, name=""):
self.name = name
self.basename = name
+ @staticmethod
+ def _check_url(url):
+ """Ensures that the URL is safe to pass to subprocesses as a parameter"""
+ if url and url.lstrip().startswith("-"):
+ raise ValueError("Invalid URL (leading dash disallowed): %r" % (url,))
+
def open(self, url, new=0, autoraise=True):
raise NotImplementedError
@@ -169,6 +175,7 @@ def __init__(self, name):
self.basename = os.path.basename(self.name)
def open(self, url, new=0, autoraise=True):
+ self._check_url(url)
cmdline = [self.name] + [arg.replace("%s", url)
for arg in self.args]
try:
@@ -186,6 +193,7 @@ class BackgroundBrowser(GenericBrowser):
background."""
def open(self, url, new=0, autoraise=True):
+ self._check_url(url)
cmdline = [self.name] + [arg.replace("%s", url)
for arg in self.args]
try:
@@ -270,8 +278,11 @@ def open(self, url, new=0, autoraise=True):
raise Error("Bad 'new' parameter to open(); " +
"expected 0, 1, or 2, got %s" % new)
- args = [arg.replace("%s", url).replace("%action", action)
+ self._check_url(url.replace("%action", action))
+
+ args = [arg.replace("%action", action).replace("%s", url)
for arg in self.remote_args]
+ args = [arg for arg in args if arg]
success = self._invoke(args, True, autoraise)
if not success:
# remote invocation failed, try straight way
diff --git a/Lib/wsgiref/headers.py b/Lib/wsgiref/headers.py
index 5a95e84c3420ec..dec810b7a993f0 100644
--- a/Lib/wsgiref/headers.py
+++ b/Lib/wsgiref/headers.py
@@ -12,6 +12,16 @@
import re
tspecials = re.compile(r'[ \(\)<>@,;:\\"/\[\]\?=]')
+# Match C0 control characters and DEL, which must never appear in a header
+# name or value (they would allow HTTP response splitting / header injection).
+_control_chars_re = re.compile(r'[\x00-\x1f\x7f]')
+
+def _check_string(value):
+ """Reject header names/values containing control characters."""
+ if isinstance(value, str) and _control_chars_re.search(value):
+ raise ValueError("Control characters not allowed in headers")
+ return value
+
def _formatparam(param, value=None, quote=1):
"""Convenience function to format and return a key=value pair.
@@ -34,6 +44,9 @@ class Headers:
def __init__(self,headers):
if type(headers) is not ListType:
raise TypeError("Headers must be a list of name/value tuples")
+ for name, val in headers:
+ _check_string(name)
+ _check_string(val)
self._headers = headers
def __len__(self):
@@ -42,6 +55,8 @@ def __len__(self):
def __setitem__(self, name, val):
"""Set the value of a header."""
+ _check_string(name)
+ _check_string(val)
del self[name]
self._headers.append((name, val))
@@ -158,12 +173,15 @@ def add_header(self, _name, _value, **_params):
*not* handle '(charset, language, value)' tuples: all values must be
strings or None.
"""
+ _check_string(_name)
parts = []
if _value is not None:
+ _check_string(_value)
parts.append(_value)
for k, v in _params.items():
if v is None:
parts.append(k.replace('_', '-'))
else:
+ _check_string(v)
parts.append(_formatparam(k.replace('_', '-'), v))
self._headers.append((_name, "; ".join(parts)))
diff --git a/Lib/xml/dom/minidom.py b/Lib/xml/dom/minidom.py
index 05649d620fac53..1eec599e9da564 100644
--- a/Lib/xml/dom/minidom.py
+++ b/Lib/xml/dom/minidom.py
@@ -1467,7 +1467,11 @@ def _clear_id_cache(node):
if node.nodeType == Node.DOCUMENT_NODE:
node._id_cache.clear()
node._id_search_stack = None
- elif _in_document(node):
+ elif node.ownerDocument:
+ # Avoid the O(depth) _in_document() walk on every mutation; clearing
+ # the cache when the node has an owning document is sufficient and
+ # removes the quadratic cost of building deeply nested trees
+ # (CVE-2025-12084).
node.ownerDocument._id_cache.clear()
node.ownerDocument._id_search_stack= None
diff --git a/Lib/zipfile.py b/Lib/zipfile.py
index 991a0add205d17..2dba47e62cb91c 100644
--- a/Lib/zipfile.py
+++ b/Lib/zipfile.py
@@ -182,6 +182,8 @@ def _EndRecData64(fpin, offset, endrec):
"""
Read the ZIP64 end-of-archive records and use that to update endrec
"""
+ fpin.seek(0, 2)
+ filesize = fpin.tell()
try:
fpin.seek(offset - sizeEndCentDir64Locator, 2)
except IOError:
@@ -199,6 +201,14 @@ def _EndRecData64(fpin, offset, endrec):
if diskno != 0 or disks != 1:
raise BadZipfile("zipfiles that span multiple disks are not supported")
+ # The ZIP64 end of central directory record is expected to lie immediately
+ # before the locator. Reject archives whose locator's relative offset
+ # points past that position, instead of trusting the assumed adjacency
+ # (CVE-2025-8291).
+ expected_reloff = filesize + offset - sizeEndCentDir64Locator - sizeEndCentDir64
+ if reloff > expected_reloff:
+ raise BadZipfile("Corrupt zip64 end of central directory locator")
+
# Assume no 'zip64 extensible data'
fpin.seek(offset - sizeEndCentDir64Locator - sizeEndCentDir64, 2)
data = fpin.read(sizeEndCentDir64)
@@ -305,6 +315,7 @@ class ZipInfo (object):
'compress_size',
'file_size',
'_raw_time',
+ '_end_offset',
)
def __init__(self, filename="NoName", date_time=(1980,1,1,0,0,0)):
@@ -343,6 +354,9 @@ def __init__(self, filename="NoName", date_time=(1980,1,1,0,0,0)):
self.volume = 0 # Volume number of file header
self.internal_attr = 0 # Internal attributes
self.external_attr = 0 # External file attributes
+ self._end_offset = None # Start of the next local header (or
+ # the central directory); used to
+ # detect overlapping entries
# Other attributes are set by class ZipFile:
# header_offset Byte offset to the file header
# CRC CRC-32 of the uncompressed file
@@ -891,6 +905,17 @@ def _RealGetContents(self):
if self.debug > 2:
print "total", total
+ # Compute the end of each member's data as the start of the next
+ # member's local header (or the start of the central directory for the
+ # last member). This lets open() reject overlapping entries, i.e. a
+ # "quoted overlap" zip bomb (CVE-2024-0450).
+ end_offset = self.start_dir
+ for zinfo in sorted(self.filelist,
+ key=lambda zinfo: zinfo.header_offset,
+ reverse=True):
+ zinfo._end_offset = end_offset
+ end_offset = zinfo.header_offset
+
def namelist(self):
"""Return a list of file names in the archive."""
@@ -1002,6 +1027,14 @@ def open(self, name, mode="r", pwd=None):
'File name in directory "%s" and header "%s" differ.' % (
zinfo.orig_filename, fname)
+ # Reject entries whose compressed data would extend past the start
+ # of the next entry: overlapping members are a zip bomb vector
+ # (CVE-2024-0450).
+ if (zinfo._end_offset is not None and
+ zef_file.tell() + zinfo.compress_size > zinfo._end_offset):
+ raise BadZipfile("Overlapped entries: %r (possible zip bomb)"
+ % (zinfo.orig_filename,))
+
# check for encrypted flag & handle password
is_encrypted = zinfo.flag_bits & 0x1
zd = None
diff --git a/Makefile.pre.in b/Makefile.pre.in
index 2a14f3323bc3f4..5aa5e88cc4ebc3 100644
--- a/Makefile.pre.in
+++ b/Makefile.pre.in
@@ -873,6 +873,16 @@ testall: @DEF_MAKE_RULE@ platform
-$(TESTPYTHON) $(TESTPROG) -uall $(TESTOPTS)
$(TESTPYTHON) $(TESTPROG) -uall $(TESTOPTS)
+
+TESTOPTSA= $(TESTOPTS) -vvv
+TESTPROGA= $(srcdir)/Lib/test/test_shutil.py
+TESTPYTHONA= $(RUNSHARED) ./$(BUILDPYTHON) -Wd -3 -E -tt $(TESTPYTHONOPTS)
+testA:
+ -find $(srcdir)/Lib -name '*.py[co]' -print | xargs rm -f
+ -$(TESTPYTHONA) $(TESTPROGA) $(TESTOPTSA)
+ $(TESTPYTHONA) $(TESTPROGA) $(TESTOPTSA)
+
+
# Run the unitests for both architectures in a Universal build on OSX
# Must be run on an Intel box.
testuniversal: @DEF_MAKE_RULE@ platform
diff --git a/Misc/NEWS.d/2.7.18.11.rst b/Misc/NEWS.d/2.7.18.11.rst
new file mode 100644
index 00000000000000..c07383920dfa29
--- /dev/null
+++ b/Misc/NEWS.d/2.7.18.11.rst
@@ -0,0 +1,42 @@
+.. bpo: ?
+.. date: 2025-01-20
+.. nonce:
+.. release date: 2025-01-22
+.. section: Core and Builtins
+
+CVE-2007-4559
+
+Implement parts of PEP 706 Filter for tarfile.extractall
+
+ExFileObject now acts as a context manager.
+The list method of TarFile now has the "members" parameter
+
+Various tests were added to check for proper behaviour with SymLinks
+
+Python2 doesn't have pathlib, so those tests are disabled
+
+.. bpo: ?
+.. date: 2025-01-20
+.. nonce:
+.. release date: 2025-01-22
+.. section: Core and Builtins
+
+CVE-2024-6232
+
+Remove backtracking when parsing tarfile headers
+
+Python2 doesn't support PAX headers so, for the most part this doesn't affect Python2
+
+Various tests were added from the CVE fix to improve rigour
+
+[3.12] gh-121285: Remove backtracking when parsing tarfile headers (GH-121286) (GH-123543)
+
+.. bpo: ?
+.. date: 2025-01-20
+.. nonce:
+.. release date: 2025-01-22
+.. section: xml_etree
+
+BE-4504 Use newer Expat > 2.6.3 (in particular AS Platform Expat 2.6.4)
+
+Expat is now stricter, and invalid IRIs are now rejected with a syntax error.
diff --git a/Misc/NEWS.d/2.7.18.12.rst b/Misc/NEWS.d/2.7.18.12.rst
new file mode 100644
index 00000000000000..aeec7e8cf388a6
--- /dev/null
+++ b/Misc/NEWS.d/2.7.18.12.rst
@@ -0,0 +1,9 @@
+.. bpo: ?
+.. date: 2025-01-22
+.. nonce:
+.. release date: 2025-01-22
+.. section: Core and Builtins
+
+CVE-2023-27043
+
+The legacy email.utils.parseaddr function in Python through 3.11.4 allows attackers to trigger "RecursionError: maximum recursion depth exceeded while calling a Python object" via a crafted argument. This argument is plausibly an untrusted value from an application's input data that was supposed to contain a name and an e-mail address. NOTE: email.utils.parseaddr is categorized as a Legacy API in the documentation of the Python email package. Applications should instead use the email.parser.BytesParser or email.parser.Parser class. NOTE: the vendor's perspective is that this is neither a vulnerability nor a bug. The email package is intended to have size limits and to throw an exception when limits are exceeded; they were exceeded by the example demonstration code.
diff --git a/Misc/NEWS.d/2.7.18.13.rst b/Misc/NEWS.d/2.7.18.13.rst
new file mode 100644
index 00000000000000..df28689cd5ff1a
--- /dev/null
+++ b/Misc/NEWS.d/2.7.18.13.rst
@@ -0,0 +1,7 @@
+.. bpo: ?
+.. date: 2026-03-06
+.. nonce:
+.. release date: 2026-03-06
+.. section: Core and Builtins
+
+Refactor CVE-2023-27043 patch to support Unicode characters
diff --git a/Misc/NEWS.d/2.7.18.14.rst b/Misc/NEWS.d/2.7.18.14.rst
new file mode 100644
index 00000000000000..e0a887ea1ecddb
--- /dev/null
+++ b/Misc/NEWS.d/2.7.18.14.rst
@@ -0,0 +1,261 @@
+.. bpo: 0
+.. date: 2026-05-27
+.. nonce: as$i9+
+.. release date: 2026-05-27
+.. original section: Library
+.. section: Security
+
+Address CVE-2025-8194 in tarfile
+
+A tar archive carrying a negative member offset (reachable through a PAX
+extended header with a negative ``size`` value) caused
+:meth:`TarInfo._block` to return a negative block count, which moved the
+archive offset backwards and could trigger an infinite loop (on seekable
+files) or a ``StreamError`` (on streams). ``_block`` now rejects negative
+counts with :exc:`tarfile.InvalidHeaderError`.
+
+.. bpo: 0
+.. date: 2026-05-27
+.. nonce: as$i9+
+.. release date: 2026-05-27
+.. original section: Library
+.. section: Security
+
+Address CVE-2026-4519 and CVE-2026-4786 in webbrowser
+
+:func:`webbrowser.open` passed an attacker-controlled URL to the browser
+command line without validation, allowing a URL beginning with ``-`` to be
+interpreted as a command-line option (argument injection). The new
+``BaseBrowser._check_url`` rejects such URLs, and ``UnixBrowser.open`` now
+validates the URL after the ``%action`` substitution so that ``%action``
+cannot be used to smuggle a leading dash past the check.
+
+.. bpo: 0
+.. date: 2026-05-27
+.. nonce: as$i9+
+.. release date: 2026-05-27
+.. original section: Library
+.. section: Security
+
+Address CVE-2026-0865 in wsgiref
+
+``wsgiref.headers.Headers`` now rejects control characters in header names
+and values (in ``__init__``, ``__setitem__`` and ``add_header``), preventing
+HTTP response splitting / header injection.
+
+.. bpo: 0
+.. date: 2026-05-27
+.. nonce: as$i9+
+.. release date: 2026-05-27
+.. original section: Library
+.. section: Security
+
+Address CVE-2026-0672 in Cookie
+
+``Cookie.Morsel`` now rejects control characters in cookie keys, values,
+coded values and attribute values, preventing ``Set-Cookie`` header
+injection. Validation is performed where values are stored, so the
+``Morsel.update`` / ``|=`` / unpickling bypasses tracked as CVE-2026-3644
+(which do not exist in this module) cannot reintroduce the issue.
+
+.. bpo: 0
+.. date: 2026-05-27
+.. nonce: as$i9+
+.. release date: 2026-05-27
+.. original section: Library
+.. section: Security
+
+Address CVE-2025-15366 in imaplib
+
+``imaplib.IMAP4._command`` now rejects control characters in command
+arguments, preventing IMAP command injection via embedded CR/LF.
+
+.. bpo: 0
+.. date: 2026-05-27
+.. nonce: as$i9+
+.. release date: 2026-05-27
+.. original section: Library
+.. section: Security
+
+Address CVE-2025-15367 in poplib
+
+``poplib.POP3._putline`` (and the SSL override) now rejects control
+characters in the command line, preventing POP3 command injection via
+embedded CR/LF.
+
+.. bpo: 0
+.. date: 2026-05-27
+.. nonce: as$i9+
+.. release date: 2026-05-27
+.. original section: Library
+.. section: Security
+
+Address CVE-2026-1502 in httplib
+
+``httplib.HTTPConnection.set_tunnel`` now validates the CONNECT tunnel host
+for control characters, preventing CR/LF injection into the CONNECT request.
+
+.. bpo: 0
+.. date: 2026-05-27
+.. nonce: as$i9+
+.. release date: 2026-05-27
+.. original section: Library
+.. section: Security
+
+Address CVE-2024-6923 in email
+
+``email.generator.Generator`` now rejects a header whose serialized form
+contains a newline that is not part of valid folding, raising the new
+``email.errors.HeaderWriteError`` instead of emitting an injectable header.
+
+.. bpo: 0
+.. date: 2026-05-27
+.. nonce: as$i9+
+.. release date: 2026-05-27
+.. original section: Library
+.. section: Security
+
+Address CVE-2024-0450 in zipfile
+
+``zipfile`` now records each member's end offset and rejects archives with
+overlapping entries (a "quoted overlap" zip bomb) when an entry's compressed
+data would extend past the start of the next entry.
+
+.. bpo: 0
+.. date: 2026-05-27
+.. nonce: as$i9+
+.. release date: 2026-05-27
+.. original section: Library
+.. section: Security
+
+Address CVE-2025-8291 in zipfile
+
+``zipfile`` now validates the ZIP64 end-of-central-directory locator's
+relative offset instead of assuming the ZIP64 record is adjacent, rejecting
+archives whose locator points past the expected position.
+
+.. bpo: 0
+.. date: 2026-05-27
+.. nonce: as$i9+
+.. release date: 2026-05-27
+.. original section: Library
+.. section: Security
+
+Address CVE-2025-0938 and CVE-2024-11168 in urlparse
+
+``urlparse.urlsplit`` now allows square brackets in a URL host only when
+they enclose a valid IPv6/IPvFuture address, rejecting hosts such as
+``ex[ample].com`` or ``[not-an-ipv6]`` that previously parsed differently
+from RFC 3986 tools. This covers both the original bracketed-host
+validation (CVE-2024-11168) and the later tightening (CVE-2025-0938).
+
+.. bpo: 0
+.. date: 2026-05-27
+.. nonce: as$i9+
+.. release date: 2026-05-27
+.. original section: Library
+.. section: Security
+
+Address CVE-2025-6069 in HTMLParser
+
+``HTMLParser`` no longer has quadratic-time behaviour when input ends with
+unterminated constructs; at EOF such constructs are now closed per HTML5
+rather than being repeatedly rescanned.
+
+.. bpo: 0
+.. date: 2026-05-27
+.. nonce: as$i9+
+.. release date: 2026-05-27
+.. original section: Library
+.. section: Security
+
+Address CVE-2025-6075 in os.path
+
+``posixpath.expandvars`` and ``ntpath.expandvars`` now build their result in
+a single linear pass instead of rebuilding the whole string per
+substitution, removing quadratic-time behaviour on large inputs.
+
+.. bpo: 0
+.. date: 2026-05-27
+.. nonce: as$i9+
+.. release date: 2026-05-27
+.. original section: Library
+.. section: Security
+
+Address CVE-2025-12084 in xml.dom.minidom
+
+``xml.dom.minidom`` no longer walks the parent chain on every node mutation
+to clear the id cache, removing the quadratic cost of building deeply nested
+documents.
+
+.. bpo: 0
+.. date: 2026-05-27
+.. nonce: as$i9+
+.. release date: 2026-05-27
+.. original section: Library
+.. section: Security
+
+Address CVE-2025-13462 in tarfile
+
+``tarfile`` no longer normalizes an ``AREGTYPE`` header with a trailing-slash
+name to ``DIRTYPE`` when the header is a follow-up to a GNU long name/link or
+a pax header, so a crafted archive can no longer be made to interpret such a
+member differently from other tools.
+
+.. bpo: 0
+.. date: 2026-05-27
+.. nonce: as$i9+
+.. release date: 2026-05-27
+.. original section: Library
+.. section: Security
+
+Address CVE-2025-12781 and CVE-2026-3446 in base64
+
+``base64.b64decode`` gains a ``validate`` keyword argument (default
+``False``). When true, the input is validated against the requested
+alphabet, so the standard ``'+'``/``'/'`` characters are rejected when an
+alternative alphabet is given (CVE-2025-12781) and data after the padding is
+rejected rather than silently ignored (CVE-2026-3446).
+
+.. bpo: 0
+.. date: 2026-05-27
+.. nonce: as$i9+
+.. release date: 2026-05-27
+.. original section: Library
+.. section: Security
+
+Not affected: CVE-2025-13836, CVE-2025-15282, CVE-2025-11468, CVE-2025-1795,
+CVE-2026-3644, CVE-2024-5642 and CVE-2026-6100
+
+Several CVEs do not apply to Python 2.7 in this release. The vulnerable
+code paths are absent or already mitigated by the existing implementation:
+
+- CVE-2025-13836 (``http.client`` Content-Length-based memory exhaustion):
+ Python 2.7's ``httplib._safe_read`` reads in bounded 1 MB (``MAXAMOUNT``)
+ chunks and never pre-allocates based on ``Content-Length``.
+
+- CVE-2025-15282 (``urllib.request.DataHandler``, the ``data:`` URL handler)
+ was added in Python 3 and does not exist in 2.7.
+
+- CVE-2025-11468 and CVE-2025-1795 affect the modern
+ ``email._header_value_parser`` machinery -- its comment folding and its
+ address-list folding (which mis-encoded a separating comma). Both were
+ added in Python 3.
+
+- CVE-2026-3644 targets ``Morsel.update`` / ``|=`` / ``__setstate__``,
+ entry points that do not exist on 2.7's ``Cookie.Morsel``.
+
+- CVE-2024-5642 exploits NPN, the feature behind
+ ``ssl.set_npn_protocols``; NPN is removed in OpenSSL 1.1.1w and later,
+ against which this Python builds.
+
+- CVE-2026-6100 is a use-after-free in
+ ``bz2.BZ2Decompressor`` / ``lzma.LZMADecompressor`` /
+ ``zlib._ZlibDecompressor`` when a ``MemoryError`` leaves ``next_in``
+ dangling and the same decompressor is reused. ``lzma`` and the
+ ``_ZlibDecompressor`` object (Python 3.12+) do not exist in 2.7; 2.7's
+ legacy ``bz2.BZ2Decompressor`` and zlib ``compobject`` re-set
+ ``next_in`` fresh from the argument buffer on every call and persist
+ leftover input as owned Python string objects
+ (``unused_data`` / ``unconsumed_tail``), so no dangling raw pointer is
+ carried across calls.
diff --git a/Misc/NEWS.d/next/Library/2023-10-20-15-28-08.gh-issue-102988.dStNO7.rst b/Misc/NEWS.d/next/Library/2023-10-20-15-28-08.gh-issue-102988.dStNO7.rst
new file mode 100644
index 00000000000000..3d0e9e4078c934
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2023-10-20-15-28-08.gh-issue-102988.dStNO7.rst
@@ -0,0 +1,8 @@
+:func:`email.utils.getaddresses` and :func:`email.utils.parseaddr` now
+return ``('', '')`` 2-tuples in more situations where invalid email
+addresses are encountered instead of potentially inaccurate values. Add
+optional *strict* parameter to these two functions: use ``strict=False`` to
+get the old behavior, accept malformed inputs.
+``getattr(email.utils, 'supports_strict_parsing', False)`` can be use to check
+if the *strict* paramater is available. Patch by Thomas Dwyer and Victor
+Stinner to improve the CVE-2023-27043 fix.
diff --git a/Modules/expat/COPYING b/Modules/expat/COPYING
index 8d288f0f28fddd..ce9e5939291e45 100644
--- a/Modules/expat/COPYING
+++ b/Modules/expat/COPYING
@@ -1,5 +1,5 @@
Copyright (c) 1998-2000 Thai Open Source Software Center Ltd and Clark Cooper
-Copyright (c) 2001-2017 Expat maintainers
+Copyright (c) 2001-2022 Expat maintainers
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
diff --git a/Modules/expat/ascii.h b/Modules/expat/ascii.h
index c3587e57332bff..1f594d2e54b4d2 100644
--- a/Modules/expat/ascii.h
+++ b/Modules/expat/ascii.h
@@ -6,8 +6,11 @@
\___/_/\_\ .__/ \__,_|\__|
|_| XML parser
- Copyright (c) 1997-2000 Thai Open Source Software Center Ltd
- Copyright (c) 2000-2017 Expat development team
+ Copyright (c) 1999-2000 Thai Open Source Software Center Ltd
+ Copyright (c) 2000 Clark Cooper
+ Copyright (c) 2002 Fred L. Drake, Jr.
+ Copyright (c) 2007 Karl Waclawek
+ Copyright (c) 2017 Sebastian Pipping
Licensed under the MIT license:
Permission is hereby granted, free of charge, to any person obtaining
diff --git a/Modules/expat/asciitab.h b/Modules/expat/asciitab.h
index 63b1d1b4482efa..af766fb24785ea 100644
--- a/Modules/expat/asciitab.h
+++ b/Modules/expat/asciitab.h
@@ -7,7 +7,9 @@
|_| XML parser
Copyright (c) 1997-2000 Thai Open Source Software Center Ltd
- Copyright (c) 2000-2017 Expat development team
+ Copyright (c) 2000 Clark Cooper
+ Copyright (c) 2002 Fred L. Drake, Jr.
+ Copyright (c) 2017 Sebastian Pipping
Licensed under the MIT license:
Permission is hereby granted, free of charge, to any person obtaining
diff --git a/Modules/expat/expat.h b/Modules/expat/expat.h
index 6c8eb1fda4a30c..523b37d8d5787d 100644
--- a/Modules/expat/expat.h
+++ b/Modules/expat/expat.h
@@ -7,7 +7,18 @@
|_| XML parser
Copyright (c) 1997-2000 Thai Open Source Software Center Ltd
- Copyright (c) 2000-2017 Expat development team
+ Copyright (c) 2000 Clark Cooper
+ Copyright (c) 2000-2005 Fred L. Drake, Jr.
+ Copyright (c) 2001-2002 Greg Stein
+ Copyright (c) 2002-2016 Karl Waclawek
+ Copyright (c) 2016-2024 Sebastian Pipping
+ Copyright (c) 2016 Cristian RodrÃguez
+ Copyright (c) 2016 Thomas Beutlich
+ Copyright (c) 2017 Rhodri James
+ Copyright (c) 2022 Thijs Schreijer
+ Copyright (c) 2023 Hanno Böck
+ Copyright (c) 2023 Sony Corporation / Snild Dolkow
+ Copyright (c) 2024 Taichi Haradaguchi <20001722@ymail.ne.jp>
Licensed under the MIT license:
Permission is hereby granted, free of charge, to any person obtaining
@@ -115,7 +126,13 @@ enum XML_Error {
XML_ERROR_RESERVED_PREFIX_XMLNS,
XML_ERROR_RESERVED_NAMESPACE_URI,
/* Added in 2.2.1. */
- XML_ERROR_INVALID_ARGUMENT
+ XML_ERROR_INVALID_ARGUMENT,
+ /* Added in 2.3.0. */
+ XML_ERROR_NO_BUFFER,
+ /* Added in 2.4.0. */
+ XML_ERROR_AMPLIFICATION_LIMIT_BREACH,
+ /* Added in 2.6.4. */
+ XML_ERROR_NOT_STARTED,
};
enum XML_Content_Type {
@@ -163,8 +180,10 @@ struct XML_cp {
};
/* This is called for an element declaration. See above for
- description of the model argument. It's the caller's responsibility
- to free model when finished with it.
+ description of the model argument. It's the user code's responsibility
+ to free model when finished with it. See XML_FreeContentModel.
+ There is no need to free the model from the handler, it can be kept
+ around and freed at a later stage.
*/
typedef void(XMLCALL *XML_ElementDeclHandler)(void *userData,
const XML_Char *name,
@@ -226,6 +245,17 @@ XML_ParserCreate(const XML_Char *encoding);
and the local part will be concatenated without any separator.
It is a programming error to use the separator '\0' with namespace
triplets (see XML_SetReturnNSTriplet).
+ If a namespace separator is chosen that can be part of a URI or
+ part of an XML name, splitting an expanded name back into its
+ 1, 2 or 3 original parts on application level in the element handler
+ may end up vulnerable, so these are advised against; sane choices for
+ a namespace separator are e.g. '\n' (line feed) and '|' (pipe).
+
+ Note that Expat does not validate namespace URIs (beyond encoding)
+ against RFC 3986 today (and is not required to do so with regard to
+ the XML 1.0 namespaces specification) but it may start doing that
+ in future releases. Before that, an application using Expat must
+ be ready to receive namespace URIs containing non-URI characters.
*/
XMLPARSEAPI(XML_Parser)
XML_ParserCreateNS(const XML_Char *encoding, XML_Char namespaceSeparator);
@@ -244,7 +274,7 @@ XML_ParserCreate_MM(const XML_Char *encoding,
const XML_Memory_Handling_Suite *memsuite,
const XML_Char *namespaceSeparator);
-/* Prepare a parser object to be re-used. This is particularly
+/* Prepare a parser object to be reused. This is particularly
valuable when memory allocation overhead is disproportionately high,
such as when a large number of small documnents need to be parsed.
All handlers are cleared from the parser, except for the
@@ -306,7 +336,7 @@ typedef void(XMLCALL *XML_StartDoctypeDeclHandler)(void *userData,
const XML_Char *pubid,
int has_internal_subset);
-/* This is called for the start of the DOCTYPE declaration when the
+/* This is called for the end of the DOCTYPE declaration when the
closing > is encountered, but after processing any external
subset.
*/
@@ -318,7 +348,7 @@ typedef void(XMLCALL *XML_EndDoctypeDeclHandler)(void *userData);
For internal entities (), value will
be non-NULL and systemId, publicID, and notationName will be NULL.
- The value string is NOT nul-terminated; the length is provided in
+ The value string is NOT null-terminated; the length is provided in
the value_length argument. Since it is legal to have zero-length
values, do not use this argument to test for internal entities.
@@ -513,7 +543,7 @@ typedef struct {
Otherwise it must return XML_STATUS_ERROR.
If info does not describe a suitable encoding, then the parser will
- return an XML_UNKNOWN_ENCODING error.
+ return an XML_ERROR_UNKNOWN_ENCODING error.
*/
typedef int(XMLCALL *XML_UnknownEncodingHandler)(void *encodingHandlerData,
const XML_Char *name,
@@ -707,7 +737,7 @@ XML_GetBase(XML_Parser parser);
/* Returns the number of the attribute/value pairs passed in last call
to the XML_StartElementHandler that were specified in the start-tag
rather than defaulted. Each attribute/value pair counts as 2; thus
- this correspondds to an index into the atts array passed to the
+ this corresponds to an index into the atts array passed to the
XML_StartElementHandler. Returns -1 if parser == NULL.
*/
XMLPARSEAPI(int)
@@ -716,7 +746,7 @@ XML_GetSpecifiedAttributeCount(XML_Parser parser);
/* Returns the index of the ID attribute passed in the last call to
XML_StartElementHandler, or -1 if there is no ID attribute or
parser == NULL. Each attribute/value pair counts as 2; thus this
- correspondds to an index into the atts array passed to the
+ corresponds to an index into the atts array passed to the
XML_StartElementHandler.
*/
XMLPARSEAPI(int)
@@ -926,7 +956,7 @@ XMLPARSEAPI(XML_Index) XML_GetCurrentByteIndex(XML_Parser parser);
XMLPARSEAPI(int)
XML_GetCurrentByteCount(XML_Parser parser);
-/* If XML_CONTEXT_BYTES is defined, returns the input buffer, sets
+/* If XML_CONTEXT_BYTES is >=1, returns the input buffer, sets
the integer pointed to by offset to the offset within this buffer
of the current parse position, and sets the integer pointed to by size
to the size of this buffer (the number of input bytes). Otherwise
@@ -997,7 +1027,12 @@ enum XML_FeatureEnum {
XML_FEATURE_SIZEOF_XML_LCHAR,
XML_FEATURE_NS,
XML_FEATURE_LARGE_SIZE,
- XML_FEATURE_ATTR_INFO
+ XML_FEATURE_ATTR_INFO,
+ /* Added in Expat 2.4.0. */
+ XML_FEATURE_BILLION_LAUGHS_ATTACK_PROTECTION_MAXIMUM_AMPLIFICATION_DEFAULT,
+ XML_FEATURE_BILLION_LAUGHS_ATTACK_PROTECTION_ACTIVATION_THRESHOLD_DEFAULT,
+ /* Added in Expat 2.6.0. */
+ XML_FEATURE_GE
/* Additional features must be added to the end of this enum. */
};
@@ -1010,12 +1045,30 @@ typedef struct {
XMLPARSEAPI(const XML_Feature *)
XML_GetFeatureList(void);
+#if defined(XML_DTD) || (defined(XML_GE) && XML_GE == 1)
+/* Added in Expat 2.4.0 for XML_DTD defined and
+ * added in Expat 2.6.0 for XML_GE == 1. */
+XMLPARSEAPI(XML_Bool)
+XML_SetBillionLaughsAttackProtectionMaximumAmplification(
+ XML_Parser parser, float maximumAmplificationFactor);
+
+/* Added in Expat 2.4.0 for XML_DTD defined and
+ * added in Expat 2.6.0 for XML_GE == 1. */
+XMLPARSEAPI(XML_Bool)
+XML_SetBillionLaughsAttackProtectionActivationThreshold(
+ XML_Parser parser, unsigned long long activationThresholdBytes);
+#endif
+
+/* Added in Expat 2.6.0. */
+XMLPARSEAPI(XML_Bool)
+XML_SetReparseDeferralEnabled(XML_Parser parser, XML_Bool enabled);
+
/* Expat follows the semantic versioning convention.
- See http://semver.org.
+ See https://semver.org
*/
#define XML_MAJOR_VERSION 2
-#define XML_MINOR_VERSION 2
-#define XML_MICRO_VERSION 8
+#define XML_MINOR_VERSION 6
+#define XML_MICRO_VERSION 4
#ifdef __cplusplus
}
diff --git a/Modules/expat/expat_config.h b/Modules/expat/expat_config.h
index b8c1639b9769ab..c3967f03cec52e 100644
--- a/Modules/expat/expat_config.h
+++ b/Modules/expat/expat_config.h
@@ -14,6 +14,7 @@
#define XML_NS 1
#define XML_DTD 1
+#define XML_GE 1
#define XML_CONTEXT_BYTES 1024
#endif /* EXPAT_CONFIG_H */
diff --git a/Modules/expat/expat_external.h b/Modules/expat/expat_external.h
index f2b75dda8e2798..8829f77091047a 100644
--- a/Modules/expat/expat_external.h
+++ b/Modules/expat/expat_external.h
@@ -7,7 +7,14 @@
|_| XML parser
Copyright (c) 1997-2000 Thai Open Source Software Center Ltd
- Copyright (c) 2000-2017 Expat development team
+ Copyright (c) 2000 Clark Cooper
+ Copyright (c) 2000-2004 Fred L. Drake, Jr.
+ Copyright (c) 2001-2002 Greg Stein
+ Copyright (c) 2002-2006 Karl Waclawek
+ Copyright (c) 2016 Cristian RodrÃguez
+ Copyright (c) 2016-2019 Sebastian Pipping
+ Copyright (c) 2017 Rhodri James
+ Copyright (c) 2018 Yury Gribov
Licensed under the MIT license:
Permission is hereby granted, free of charge, to any person obtaining
@@ -57,11 +64,6 @@
compiled with the cdecl calling convention as the default since
system headers may assume the cdecl convention.
*/
-
-/* Namespace external symbols to allow multiple libexpat version to
- co-exist. */
-#include "pyexpatns.h"
-
#ifndef XMLCALL
# if defined(_MSC_VER)
# define XMLCALL __cdecl
diff --git a/Modules/expat/iasciitab.h b/Modules/expat/iasciitab.h
index ea97cfcf678e06..5d8646f2a318b8 100644
--- a/Modules/expat/iasciitab.h
+++ b/Modules/expat/iasciitab.h
@@ -7,7 +7,9 @@
|_| XML parser
Copyright (c) 1997-2000 Thai Open Source Software Center Ltd
- Copyright (c) 2000-2017 Expat development team
+ Copyright (c) 2000 Clark Cooper
+ Copyright (c) 2002 Fred L. Drake, Jr.
+ Copyright (c) 2017 Sebastian Pipping
Licensed under the MIT license:
Permission is hereby granted, free of charge, to any person obtaining
diff --git a/Modules/expat/internal.h b/Modules/expat/internal.h
index 60913dab762f8f..167ec36804a43b 100644
--- a/Modules/expat/internal.h
+++ b/Modules/expat/internal.h
@@ -25,8 +25,14 @@
\___/_/\_\ .__/ \__,_|\__|
|_| XML parser
- Copyright (c) 1997-2000 Thai Open Source Software Center Ltd
- Copyright (c) 2000-2017 Expat development team
+ Copyright (c) 2002-2003 Fred L. Drake, Jr.
+ Copyright (c) 2002-2006 Karl Waclawek
+ Copyright (c) 2003 Greg Stein
+ Copyright (c) 2016-2024 Sebastian Pipping
+ Copyright (c) 2018 Yury Gribov
+ Copyright (c) 2019 David Loffredo
+ Copyright (c) 2023-2024 Sony Corporation / Snild Dolkow
+ Copyright (c) 2024 Taichi Haradaguchi <20001722@ymail.ne.jp>
Licensed under the MIT license:
Permission is hereby granted, free of charge, to any person obtaining
@@ -101,22 +107,69 @@
# endif
#endif
+#include // ULONG_MAX
+
+#if defined(_WIN32) \
+ && (! defined(__USE_MINGW_ANSI_STDIO) \
+ || (1 - __USE_MINGW_ANSI_STDIO - 1 == 0))
+# define EXPAT_FMT_ULL(midpart) "%" midpart "I64u"
+# if defined(_WIN64) // Note: modifiers "td" and "zu" do not work for MinGW
+# define EXPAT_FMT_PTRDIFF_T(midpart) "%" midpart "I64d"
+# define EXPAT_FMT_SIZE_T(midpart) "%" midpart "I64u"
+# else
+# define EXPAT_FMT_PTRDIFF_T(midpart) "%" midpart "d"
+# define EXPAT_FMT_SIZE_T(midpart) "%" midpart "u"
+# endif
+#else
+# define EXPAT_FMT_ULL(midpart) "%" midpart "llu"
+# if ! defined(ULONG_MAX)
+# error Compiler did not define ULONG_MAX for us
+# elif ULONG_MAX == 18446744073709551615u // 2^64-1
+# define EXPAT_FMT_PTRDIFF_T(midpart) "%" midpart "ld"
+# define EXPAT_FMT_SIZE_T(midpart) "%" midpart "lu"
+# else
+# define EXPAT_FMT_PTRDIFF_T(midpart) "%" midpart "d"
+# define EXPAT_FMT_SIZE_T(midpart) "%" midpart "u"
+# endif
+#endif
+
#ifndef UNUSED_P
# define UNUSED_P(p) (void)p
#endif
+/* NOTE BEGIN If you ever patch these defaults to greater values
+ for non-attack XML payload in your environment,
+ please file a bug report with libexpat. Thank you!
+*/
+#define EXPAT_BILLION_LAUGHS_ATTACK_PROTECTION_MAXIMUM_AMPLIFICATION_DEFAULT \
+ 100.0f
+#define EXPAT_BILLION_LAUGHS_ATTACK_PROTECTION_ACTIVATION_THRESHOLD_DEFAULT \
+ 8388608 // 8 MiB, 2^23
+/* NOTE END */
+
+#include "expat.h" // so we can use type XML_Parser below
+
#ifdef __cplusplus
extern "C" {
#endif
-#ifdef XML_ENABLE_VISIBILITY
-# if XML_ENABLE_VISIBILITY
-__attribute__((visibility("default")))
-# endif
+void _INTERNAL_trim_to_complete_utf8_characters(const char *from,
+ const char **fromLimRef);
+
+#if defined(XML_GE) && XML_GE == 1
+unsigned long long testingAccountingGetCountBytesDirect(XML_Parser parser);
+unsigned long long testingAccountingGetCountBytesIndirect(XML_Parser parser);
+const char *unsignedCharToPrintable(unsigned char c);
+#endif
+
+extern
+#if ! defined(XML_TESTING)
+ const
+#endif
+ XML_Bool g_reparseDeferralEnabledDefault; // written ONLY in runtests.c
+#if defined(XML_TESTING)
+extern unsigned int g_bytesScanned; // used for testing only
#endif
-void
-_INTERNAL_trim_to_complete_utf8_characters(const char *from,
- const char **fromLimRef);
#ifdef __cplusplus
}
diff --git a/Modules/expat/latin1tab.h b/Modules/expat/latin1tab.h
index 6f916041355a86..b681d278af6569 100644
--- a/Modules/expat/latin1tab.h
+++ b/Modules/expat/latin1tab.h
@@ -7,7 +7,9 @@
|_| XML parser
Copyright (c) 1997-2000 Thai Open Source Software Center Ltd
- Copyright (c) 2000-2017 Expat development team
+ Copyright (c) 2000 Clark Cooper
+ Copyright (c) 2002 Fred L. Drake, Jr.
+ Copyright (c) 2017 Sebastian Pipping
Licensed under the MIT license:
Permission is hereby granted, free of charge, to any person obtaining
diff --git a/Modules/expat/nametab.h b/Modules/expat/nametab.h
index 3681df348eebd6..63485446b96727 100644
--- a/Modules/expat/nametab.h
+++ b/Modules/expat/nametab.h
@@ -6,8 +6,8 @@
\___/_/\_\ .__/ \__,_|\__|
|_| XML parser
- Copyright (c) 1997-2000 Thai Open Source Software Center Ltd
- Copyright (c) 2000-2017 Expat development team
+ Copyright (c) 2000 Clark Cooper
+ Copyright (c) 2017 Sebastian Pipping
Licensed under the MIT license:
Permission is hereby granted, free of charge, to any person obtaining
diff --git a/Modules/expat/siphash.h b/Modules/expat/siphash.h
index bfee65a332f1bf..04f6f74585b5a2 100644
--- a/Modules/expat/siphash.h
+++ b/Modules/expat/siphash.h
@@ -11,6 +11,9 @@
* --------------------------------------------------------------------------
* HISTORY:
*
+ * 2020-10-03 (Sebastian Pipping)
+ * - Drop support for Visual Studio 9.0/2008 and earlier
+ *
* 2019-08-03 (Sebastian Pipping)
* - Mark part of sip24_valid as to be excluded from clang-format
* - Re-format code using clang-format 9
@@ -96,22 +99,14 @@
#define SIPHASH_H
#include /* size_t */
-
-#if defined(_WIN32) && defined(_MSC_VER) && (_MSC_VER < 1600)
-/* For vs2003/7.1 up to vs2008/9.0; _MSC_VER 1600 is vs2010/10.0 */
-typedef unsigned __int8 uint8_t;
-typedef unsigned __int32 uint32_t;
-typedef unsigned __int64 uint64_t;
-#else
-# include /* uint64_t uint32_t uint8_t */
-#endif
+#include /* uint64_t uint32_t uint8_t */
/*
* Workaround to not require a C++11 compiler for using ULL suffix
* if this code is included and compiled as C++; related GCC warning is:
* warning: use of C++11 long long integer constant [-Wlong-long]
*/
-#define _SIP_ULL(high, low) (((uint64_t)high << 32) | low)
+#define SIP_ULL(high, low) ((((uint64_t)high) << 32) | (low))
#define SIP_ROTL(x, b) (uint64_t)(((x) << (b)) | ((x) >> (64 - (b))))
@@ -131,8 +126,7 @@ typedef unsigned __int64 uint64_t;
| ((uint64_t)((p)[4]) << 32) | ((uint64_t)((p)[5]) << 40) \
| ((uint64_t)((p)[6]) << 48) | ((uint64_t)((p)[7]) << 56))
-#define SIPHASH_INITIALIZER \
- { 0, 0, 0, 0, {0}, 0, 0 }
+#define SIPHASH_INITIALIZER {0, 0, 0, 0, {0}, 0, 0}
struct siphash {
uint64_t v0, v1, v2, v3;
@@ -195,10 +189,10 @@ sip_round(struct siphash *H, const int rounds) {
static struct siphash *
sip24_init(struct siphash *H, const struct sipkey *key) {
- H->v0 = _SIP_ULL(0x736f6d65U, 0x70736575U) ^ key->k[0];
- H->v1 = _SIP_ULL(0x646f7261U, 0x6e646f6dU) ^ key->k[1];
- H->v2 = _SIP_ULL(0x6c796765U, 0x6e657261U) ^ key->k[0];
- H->v3 = _SIP_ULL(0x74656462U, 0x79746573U) ^ key->k[1];
+ H->v0 = SIP_ULL(0x736f6d65U, 0x70736575U) ^ key->k[0];
+ H->v1 = SIP_ULL(0x646f7261U, 0x6e646f6dU) ^ key->k[1];
+ H->v2 = SIP_ULL(0x6c796765U, 0x6e657261U) ^ key->k[0];
+ H->v3 = SIP_ULL(0x74656462U, 0x79746573U) ^ key->k[1];
H->p = H->buf;
H->c = 0;
diff --git a/Modules/expat/utf8tab.h b/Modules/expat/utf8tab.h
index a22986acbb9526..88efcf91cc16a6 100644
--- a/Modules/expat/utf8tab.h
+++ b/Modules/expat/utf8tab.h
@@ -7,7 +7,9 @@
|_| XML parser
Copyright (c) 1997-2000 Thai Open Source Software Center Ltd
- Copyright (c) 2000-2017 Expat development team
+ Copyright (c) 2000 Clark Cooper
+ Copyright (c) 2002 Fred L. Drake, Jr.
+ Copyright (c) 2017 Sebastian Pipping
Licensed under the MIT license:
Permission is hereby granted, free of charge, to any person obtaining
diff --git a/Modules/expat/winconfig.h b/Modules/expat/winconfig.h
index 562a4a82dc1d63..05805514ec7fa2 100644
--- a/Modules/expat/winconfig.h
+++ b/Modules/expat/winconfig.h
@@ -6,8 +6,11 @@
\___/_/\_\ .__/ \__,_|\__|
|_| XML parser
- Copyright (c) 1997-2000 Thai Open Source Software Center Ltd
- Copyright (c) 2000-2017 Expat development team
+ Copyright (c) 2000 Clark Cooper
+ Copyright (c) 2002 Greg Stein
+ Copyright (c) 2005 Karl Waclawek
+ Copyright (c) 2017-2023 Sebastian Pipping
+ Copyright (c) 2023 Orgad Shaneh
Licensed under the MIT license:
Permission is hereby granted, free of charge, to any person obtaining
@@ -33,24 +36,13 @@
#ifndef WINCONFIG_H
#define WINCONFIG_H
-#define WIN32_LEAN_AND_MEAN
+#ifndef WIN32_LEAN_AND_MEAN
+# define WIN32_LEAN_AND_MEAN
+#endif
#include
#undef WIN32_LEAN_AND_MEAN
#include
#include
-#if defined(HAVE_EXPAT_CONFIG_H) /* e.g. MinGW */
-# include
-#else /* !defined(HAVE_EXPAT_CONFIG_H) */
-
-# define XML_NS 1
-# define XML_DTD 1
-# define XML_CONTEXT_BYTES 1024
-
-/* we will assume all Windows platforms are little endian */
-# define BYTEORDER 1234
-
-#endif /* !defined(HAVE_EXPAT_CONFIG_H) */
-
#endif /* ndef WINCONFIG_H */
diff --git a/Modules/expat/xmlparse.c b/Modules/expat/xmlparse.c
index 09ccacb5aae596..836db6c0b97f14 100644
--- a/Modules/expat/xmlparse.c
+++ b/Modules/expat/xmlparse.c
@@ -1,4 +1,4 @@
-/* f2d0ab6d1d4422a08cf1cf3bbdfba96b49dea42fb5ff4615e03a2a25c306e769 (2.2.8+)
+/* c5625880f4bf417c1463deee4eb92d86ff413f802048621c57e25fe483eb59e4 (2.6.4+)
__ __ _
___\ \/ /_ __ __ _| |_
/ _ \\ /| '_ \ / _` | __|
@@ -7,7 +7,40 @@
|_| XML parser
Copyright (c) 1997-2000 Thai Open Source Software Center Ltd
- Copyright (c) 2000-2017 Expat development team
+ Copyright (c) 2000 Clark Cooper
+ Copyright (c) 2000-2006 Fred L. Drake, Jr.
+ Copyright (c) 2001-2002 Greg Stein
+ Copyright (c) 2002-2016 Karl Waclawek
+ Copyright (c) 2005-2009 Steven Solie
+ Copyright (c) 2016 Eric Rahm
+ Copyright (c) 2016-2024 Sebastian Pipping
+ Copyright (c) 2016 Gaurav
+ Copyright (c) 2016 Thomas Beutlich
+ Copyright (c) 2016 Gustavo Grieco
+ Copyright (c) 2016 Pascal Cuoq
+ Copyright (c) 2016 Ed Schouten
+ Copyright (c) 2017-2022 Rhodri James
+ Copyright (c) 2017 Václav SlavÃk
+ Copyright (c) 2017 Viktor Szakats
+ Copyright (c) 2017 Chanho Park
+ Copyright (c) 2017 Rolf Eike Beer
+ Copyright (c) 2017 Hans Wennborg
+ Copyright (c) 2018 Anton Maklakov
+ Copyright (c) 2018 Benjamin Peterson
+ Copyright (c) 2018 Marco Maggi
+ Copyright (c) 2018 Mariusz Zaborski
+ Copyright (c) 2019 David Loffredo
+ Copyright (c) 2019-2020 Ben Wagner
+ Copyright (c) 2019 Vadim Zeitlin
+ Copyright (c) 2021 Donghee Na
+ Copyright (c) 2022 Samanta Navarro
+ Copyright (c) 2022 Jeffrey Walton
+ Copyright (c) 2022 Jann Horn
+ Copyright (c) 2022 Sean McBride
+ Copyright (c) 2023 Owain Davies
+ Copyright (c) 2023-2024 Sony Corporation / Snild Dolkow
+ Copyright (c) 2024 Berkay Eren Ürün
+ Copyright (c) 2024 Hanno Böck
Licensed under the MIT license:
Permission is hereby granted, free of charge, to any person obtaining
@@ -30,27 +63,45 @@
USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
-#if ! defined(_GNU_SOURCE)
-# define _GNU_SOURCE 1 /* syscall prototype */
+#define XML_BUILDING_EXPAT 1
+
+#include "expat_config.h"
+
+#if ! defined(XML_GE) || (1 - XML_GE - 1 == 2) || (XML_GE < 0) || (XML_GE > 1)
+# error XML_GE (for general entities) must be defined, non-empty, either 1 or 0 (0 to disable, 1 to enable; 1 is a common default)
#endif
-#ifdef _WIN32
-/* force stdlib to define rand_s() */
-# define _CRT_RAND_S
+#if defined(XML_DTD) && XML_GE == 0
+# error Either undefine XML_DTD or define XML_GE to 1.
+#endif
+
+#if ! defined(XML_CONTEXT_BYTES) || (1 - XML_CONTEXT_BYTES - 1 == 2) \
+ || (XML_CONTEXT_BYTES + 0 < 0)
+# error XML_CONTEXT_BYTES must be defined, non-empty and >=0 (0 to disable, >=1 to enable; 1024 is a common default)
+#endif
+
+#if defined(HAVE_SYSCALL_GETRANDOM)
+# if ! defined(_GNU_SOURCE)
+# define _GNU_SOURCE 1 /* syscall prototype */
+# endif
#endif
#ifdef _WIN32
-# include "winconfig.h"
-#elif defined(HAVE_EXPAT_CONFIG_H)
-# include
-#endif /* ndef _WIN32 */
+/* force stdlib to define rand_s() */
+# if ! defined(_CRT_RAND_S)
+# define _CRT_RAND_S
+# endif
+#endif
+#include
#include
#include /* memset(), memcpy() */
#include
#include /* UINT_MAX */
#include /* fprintf */
#include /* getenv, rand_s */
+#include /* uintptr_t */
+#include /* isnan */
#ifdef _WIN32
# define getpid GetCurrentProcessId
@@ -62,7 +113,9 @@
# include
#endif
-#define XML_BUILDING_EXPAT 1
+#ifdef _WIN32
+# include "winconfig.h"
+#endif
#include "ascii.h"
#include "expat.h"
@@ -97,14 +150,14 @@
enabled. For end user security, that is probably not what you want. \
\
Your options include: \
- * Linux + glibc >=2.25 (getrandom): HAVE_GETRANDOM, \
- * Linux + glibc <2.25 (syscall SYS_getrandom): HAVE_SYSCALL_GETRANDOM, \
- * BSD / macOS >=10.7 (arc4random_buf): HAVE_ARC4RANDOM_BUF, \
- * BSD / macOS <10.7 (arc4random): HAVE_ARC4RANDOM, \
+ * Linux >=3.17 + glibc >=2.25 (getrandom): HAVE_GETRANDOM, \
+ * Linux >=3.17 + glibc (including <2.25) (syscall SYS_getrandom): HAVE_SYSCALL_GETRANDOM, \
+ * BSD / macOS >=10.7 / glibc >=2.36 (arc4random_buf): HAVE_ARC4RANDOM_BUF, \
+ * BSD / macOS (including <10.7) / glibc >=2.36 (arc4random): HAVE_ARC4RANDOM, \
* libbsd (arc4random_buf): HAVE_ARC4RANDOM_BUF + HAVE_LIBBSD, \
* libbsd (arc4random): HAVE_ARC4RANDOM + HAVE_LIBBSD, \
- * Linux / BSD / macOS (/dev/urandom): XML_DEV_URANDOM \
- * Windows (rand_s): _WIN32. \
+ * Linux (including <3.17) / BSD / macOS (including <10.7) / Solaris >=8 (/dev/urandom): XML_DEV_URANDOM, \
+ * Windows >=Vista (rand_s): _WIN32. \
\
If insist on not using any of these, bypass this error by defining \
XML_POOR_ENTROPY; you have been warned. \
@@ -119,9 +172,7 @@
# define XmlGetInternalEncoding XmlGetUtf16InternalEncoding
# define XmlGetInternalEncodingNS XmlGetUtf16InternalEncodingNS
# define XmlEncode XmlUtf16Encode
-/* Using pointer subtraction to convert to integer type. */
-# define MUST_CONVERT(enc, s) \
- (! (enc)->isUtf16 || (((char *)(s) - (char *)NULL) & 1))
+# define MUST_CONVERT(enc, s) (! (enc)->isUtf16 || (((uintptr_t)(s)) & 1))
typedef unsigned short ICHAR;
#else
# define XML_ENCODE_MAX XML_UTF8_ENCODE_MAX
@@ -161,11 +212,13 @@ typedef char ICHAR;
#endif
/* Round up n to be a multiple of sz, where sz is a power of 2. */
-#define ROUND_UP(n, sz) (((n) + ((sz)-1)) & ~((sz)-1))
+#define ROUND_UP(n, sz) (((n) + ((sz) - 1)) & ~((sz) - 1))
/* Do safe (NULL-aware) pointer arithmetic */
#define EXPAT_SAFE_PTR_DIFF(p, q) (((p) && (q)) ? ((p) - (q)) : 0)
+#define EXPAT_MIN(a, b) (((a) < (b)) ? (a) : (b))
+
#include "internal.h"
#include "xmltok.h"
#include "xmlrole.h"
@@ -197,7 +250,7 @@ static void copy_salt_to_sipkey(XML_Parser parser, struct sipkey *key);
it odd, since odd numbers are always relative prime to a power of 2.
*/
#define SECOND_HASH(hash, mask, power) \
- ((((hash) & ~(mask)) >> ((power)-1)) & ((mask) >> 2))
+ ((((hash) & ~(mask)) >> ((power) - 1)) & ((mask) >> 2))
#define PROBE_STEP(hash, mask, power) \
((unsigned char)((SECOND_HASH(hash, mask, power)) | 1))
@@ -228,7 +281,7 @@ typedef struct binding {
typedef struct prefix {
const XML_Char *name;
BINDING *binding;
-} PREFIX;
+} PPREFIX;
typedef struct {
const XML_Char *str;
@@ -243,13 +296,13 @@ typedef struct {
The name of the element is stored in both the document and API
encodings. The memory buffer 'buf' is a separately-allocated
memory area which stores the name. During the XML_Parse()/
- XMLParseBuffer() when the element is open, the memory for the 'raw'
+ XML_ParseBuffer() when the element is open, the memory for the 'raw'
version of the name (in the document encoding) is shared with the
document buffer. If the element is open across calls to
XML_Parse()/XML_ParseBuffer(), the buffer is re-allocated to
contain the 'raw' name as well.
- A parser re-uses these structures, maintaining a list of allocated
+ A parser reuses these structures, maintaining a list of allocated
TAG objects in a free list.
*/
typedef struct tag {
@@ -307,7 +360,7 @@ typedef struct {
an attribute has been specified. */
typedef struct attribute_id {
XML_Char *name;
- PREFIX *prefix;
+ PPREFIX *prefix;
XML_Bool maybeTokenized;
XML_Bool xmlns;
} ATTRIBUTE_ID;
@@ -326,7 +379,7 @@ typedef struct {
typedef struct {
const XML_Char *name;
- PREFIX *prefix;
+ PPREFIX *prefix;
const ATTRIBUTE_ID *idAtt;
int nDefaultAtts;
int allocDefaultAtts;
@@ -351,7 +404,7 @@ typedef struct {
XML_Bool paramEntityRead;
HASH_TABLE paramEntities;
#endif /* XML_DTD */
- PREFIX defaultPrefix;
+ PPREFIX defaultPrefix;
/* === scaffolding for building content model === */
XML_Bool in_eldecl;
CONTENT_SCAFFOLD *scaffold;
@@ -371,6 +424,31 @@ typedef struct open_internal_entity {
XML_Bool betweenDecl; /* WFC: PE Between Declarations */
} OPEN_INTERNAL_ENTITY;
+enum XML_Account {
+ XML_ACCOUNT_DIRECT, /* bytes directly passed to the Expat parser */
+ XML_ACCOUNT_ENTITY_EXPANSION, /* intermediate bytes produced during entity
+ expansion */
+ XML_ACCOUNT_NONE /* i.e. do not account, was accounted already */
+};
+
+#if XML_GE == 1
+typedef unsigned long long XmlBigCount;
+typedef struct accounting {
+ XmlBigCount countBytesDirect;
+ XmlBigCount countBytesIndirect;
+ unsigned long debugLevel;
+ float maximumAmplificationFactor; // >=1.0
+ unsigned long long activationThresholdBytes;
+} ACCOUNTING;
+
+typedef struct entity_stats {
+ unsigned int countEverOpened;
+ unsigned int currentDepth;
+ unsigned int maximumDepthSeen;
+ unsigned long debugLevel;
+} ENTITY_STATS;
+#endif /* XML_GE == 1 */
+
typedef enum XML_Error PTRCALL Processor(XML_Parser parser, const char *start,
const char *end, const char **endPtr);
@@ -401,43 +479,55 @@ static enum XML_Error initializeEncoding(XML_Parser parser);
static enum XML_Error doProlog(XML_Parser parser, const ENCODING *enc,
const char *s, const char *end, int tok,
const char *next, const char **nextPtr,
- XML_Bool haveMore, XML_Bool allowClosingDoctype);
+ XML_Bool haveMore, XML_Bool allowClosingDoctype,
+ enum XML_Account account);
static enum XML_Error processInternalEntity(XML_Parser parser, ENTITY *entity,
XML_Bool betweenDecl);
static enum XML_Error doContent(XML_Parser parser, int startTagLevel,
const ENCODING *enc, const char *start,
const char *end, const char **endPtr,
- XML_Bool haveMore);
-static enum XML_Error doCdataSection(XML_Parser parser, const ENCODING *,
+ XML_Bool haveMore, enum XML_Account account);
+static enum XML_Error doCdataSection(XML_Parser parser, const ENCODING *enc,
const char **startPtr, const char *end,
- const char **nextPtr, XML_Bool haveMore);
+ const char **nextPtr, XML_Bool haveMore,
+ enum XML_Account account);
#ifdef XML_DTD
-static enum XML_Error doIgnoreSection(XML_Parser parser, const ENCODING *,
+static enum XML_Error doIgnoreSection(XML_Parser parser, const ENCODING *enc,
const char **startPtr, const char *end,
const char **nextPtr, XML_Bool haveMore);
#endif /* XML_DTD */
static void freeBindings(XML_Parser parser, BINDING *bindings);
-static enum XML_Error storeAtts(XML_Parser parser, const ENCODING *,
- const char *s, TAG_NAME *tagNamePtr,
- BINDING **bindingsPtr);
-static enum XML_Error addBinding(XML_Parser parser, PREFIX *prefix,
+static enum XML_Error storeAtts(XML_Parser parser, const ENCODING *enc,
+ const char *attStr, TAG_NAME *tagNamePtr,
+ BINDING **bindingsPtr,
+ enum XML_Account account);
+static enum XML_Error addBinding(XML_Parser parser, PPREFIX *prefix,
const ATTRIBUTE_ID *attId, const XML_Char *uri,
BINDING **bindingsPtr);
-static int defineAttribute(ELEMENT_TYPE *type, ATTRIBUTE_ID *, XML_Bool isCdata,
- XML_Bool isId, const XML_Char *dfltValue,
- XML_Parser parser);
-static enum XML_Error storeAttributeValue(XML_Parser parser, const ENCODING *,
- XML_Bool isCdata, const char *,
- const char *, STRING_POOL *);
-static enum XML_Error appendAttributeValue(XML_Parser parser, const ENCODING *,
- XML_Bool isCdata, const char *,
- const char *, STRING_POOL *);
+static int defineAttribute(ELEMENT_TYPE *type, ATTRIBUTE_ID *attId,
+ XML_Bool isCdata, XML_Bool isId,
+ const XML_Char *value, XML_Parser parser);
+static enum XML_Error storeAttributeValue(XML_Parser parser,
+ const ENCODING *enc, XML_Bool isCdata,
+ const char *ptr, const char *end,
+ STRING_POOL *pool,
+ enum XML_Account account);
+static enum XML_Error appendAttributeValue(XML_Parser parser,
+ const ENCODING *enc,
+ XML_Bool isCdata, const char *ptr,
+ const char *end, STRING_POOL *pool,
+ enum XML_Account account);
static ATTRIBUTE_ID *getAttributeId(XML_Parser parser, const ENCODING *enc,
const char *start, const char *end);
-static int setElementTypePrefix(XML_Parser parser, ELEMENT_TYPE *);
+static int setElementTypePrefix(XML_Parser parser, ELEMENT_TYPE *elementType);
+#if XML_GE == 1
static enum XML_Error storeEntityValue(XML_Parser parser, const ENCODING *enc,
- const char *start, const char *end);
+ const char *start, const char *end,
+ enum XML_Account account);
+#else
+static enum XML_Error storeSelfEntityValue(XML_Parser parser, ENTITY *entity);
+#endif
static int reportProcessingInstruction(XML_Parser parser, const ENCODING *enc,
const char *start, const char *end);
static int reportComment(XML_Parser parser, const ENCODING *enc,
@@ -457,21 +547,22 @@ static void dtdDestroy(DTD *p, XML_Bool isDocEntity,
const XML_Memory_Handling_Suite *ms);
static int dtdCopy(XML_Parser oldParser, DTD *newDtd, const DTD *oldDtd,
const XML_Memory_Handling_Suite *ms);
-static int copyEntityTable(XML_Parser oldParser, HASH_TABLE *, STRING_POOL *,
- const HASH_TABLE *);
+static int copyEntityTable(XML_Parser oldParser, HASH_TABLE *newTable,
+ STRING_POOL *newPool, const HASH_TABLE *oldTable);
static NAMED *lookup(XML_Parser parser, HASH_TABLE *table, KEY name,
size_t createSize);
-static void FASTCALL hashTableInit(HASH_TABLE *,
+static void FASTCALL hashTableInit(HASH_TABLE *table,
const XML_Memory_Handling_Suite *ms);
-static void FASTCALL hashTableClear(HASH_TABLE *);
-static void FASTCALL hashTableDestroy(HASH_TABLE *);
-static void FASTCALL hashTableIterInit(HASH_TABLE_ITER *, const HASH_TABLE *);
-static NAMED *FASTCALL hashTableIterNext(HASH_TABLE_ITER *);
+static void FASTCALL hashTableClear(HASH_TABLE *table);
+static void FASTCALL hashTableDestroy(HASH_TABLE *table);
+static void FASTCALL hashTableIterInit(HASH_TABLE_ITER *iter,
+ const HASH_TABLE *table);
+static NAMED *FASTCALL hashTableIterNext(HASH_TABLE_ITER *iter);
-static void FASTCALL poolInit(STRING_POOL *,
+static void FASTCALL poolInit(STRING_POOL *pool,
const XML_Memory_Handling_Suite *ms);
-static void FASTCALL poolClear(STRING_POOL *);
-static void FASTCALL poolDestroy(STRING_POOL *);
+static void FASTCALL poolClear(STRING_POOL *pool);
+static void FASTCALL poolDestroy(STRING_POOL *pool);
static XML_Char *poolAppend(STRING_POOL *pool, const ENCODING *enc,
const char *ptr, const char *end);
static XML_Char *poolStoreString(STRING_POOL *pool, const ENCODING *enc,
@@ -501,8 +592,35 @@ static XML_Parser parserCreate(const XML_Char *encodingName,
static void parserInit(XML_Parser parser, const XML_Char *encodingName);
+#if XML_GE == 1
+static float accountingGetCurrentAmplification(XML_Parser rootParser);
+static void accountingReportStats(XML_Parser originParser, const char *epilog);
+static void accountingOnAbort(XML_Parser originParser);
+static void accountingReportDiff(XML_Parser rootParser,
+ unsigned int levelsAwayFromRootParser,
+ const char *before, const char *after,
+ ptrdiff_t bytesMore, int source_line,
+ enum XML_Account account);
+static XML_Bool accountingDiffTolerated(XML_Parser originParser, int tok,
+ const char *before, const char *after,
+ int source_line,
+ enum XML_Account account);
+
+static void entityTrackingReportStats(XML_Parser parser, ENTITY *entity,
+ const char *action, int sourceLine);
+static void entityTrackingOnOpen(XML_Parser parser, ENTITY *entity,
+ int sourceLine);
+static void entityTrackingOnClose(XML_Parser parser, ENTITY *entity,
+ int sourceLine);
+
+static XML_Parser getRootParserOf(XML_Parser parser,
+ unsigned int *outLevelDiff);
+#endif /* XML_GE == 1 */
+
+static unsigned long getDebugLevel(const char *variableName,
+ unsigned long defaultDebugLevel);
+
#define poolStart(pool) ((pool)->start)
-#define poolEnd(pool) ((pool)->ptr)
#define poolLength(pool) ((pool)->ptr - (pool)->start)
#define poolChop(pool) ((void)--(pool->ptr))
#define poolLastChar(pool) (((pool)->ptr)[-1])
@@ -513,21 +631,41 @@ static void parserInit(XML_Parser parser, const XML_Char *encodingName);
? 0 \
: ((*((pool)->ptr)++ = c), 1))
+#if ! defined(XML_TESTING)
+const
+#endif
+ XML_Bool g_reparseDeferralEnabledDefault
+ = XML_TRUE; // write ONLY in runtests.c
+#if defined(XML_TESTING)
+unsigned int g_bytesScanned = 0; // used for testing only
+#endif
+
struct XML_ParserStruct {
/* The first member must be m_userData so that the XML_GetUserData
macro works. */
void *m_userData;
void *m_handlerArg;
- char *m_buffer;
+
+ // How the four parse buffer pointers below relate in time and space:
+ //
+ // m_buffer <= m_bufferPtr <= m_bufferEnd <= m_bufferLim
+ // | | | |
+ // <--parsed-->| | |
+ // <---parsing--->| |
+ // <--unoccupied-->|
+ // <---------total-malloced/realloced-------->|
+
+ char *m_buffer; // malloc/realloc base pointer of parse buffer
const XML_Memory_Handling_Suite m_mem;
- /* first character to be parsed */
- const char *m_bufferPtr;
- /* past last character to be parsed */
- char *m_bufferEnd;
- /* allocated end of m_buffer */
- const char *m_bufferLim;
+ const char *m_bufferPtr; // first character to be parsed
+ char *m_bufferEnd; // past last character to be parsed
+ const char *m_bufferLim; // allocated end of m_buffer
+
XML_Index m_parseEndByteIndex;
const char *m_parseEndPtr;
+ size_t m_partialTokenBytesBefore; /* used in heuristic to avoid O(n^2) */
+ XML_Bool m_reparseDeferralEnabled;
+ int m_lastBufferRequestSize;
XML_Char *m_dataBuf;
XML_Char *m_dataBufEnd;
XML_StartElementHandler m_startElementHandler;
@@ -614,6 +752,10 @@ struct XML_ParserStruct {
enum XML_ParamEntityParsing m_paramEntityParsing;
#endif
unsigned long m_hash_secret_salt;
+#if XML_GE == 1
+ ACCOUNTING m_accounting;
+ ENTITY_STATS m_entity_stats;
+#endif
};
#define MALLOC(parser, s) (parser->m_mem.malloc_fcn((s)))
@@ -627,11 +769,11 @@ XML_ParserCreate(const XML_Char *encodingName) {
XML_Parser XMLCALL
XML_ParserCreateNS(const XML_Char *encodingName, XML_Char nsSep) {
- XML_Char tmp[2];
- *tmp = nsSep;
+ XML_Char tmp[2] = {nsSep, 0};
return XML_ParserCreate_MM(encodingName, NULL, tmp);
}
+// "xml=http://www.w3.org/XML/1998/namespace"
static const XML_Char implicitContext[]
= {ASCII_x, ASCII_m, ASCII_l, ASCII_EQUALS, ASCII_h,
ASCII_t, ASCII_t, ASCII_p, ASCII_COLON, ASCII_SLASH,
@@ -734,6 +876,15 @@ writeRandomBytes_arc4random(void *target, size_t count) {
#ifdef _WIN32
+/* Provide declaration of rand_s() for MinGW-32 (not 64, which has it),
+ as it didn't declare it in its header prior to version 5.3.0 of its
+ runtime package (mingwrt, containing stdlib.h). The upstream fix
+ was introduced at https://osdn.net/projects/mingw/ticket/39658 . */
+# if defined(__MINGW32__) && defined(__MINGW32_VERSION) \
+ && __MINGW32_VERSION < 5003000L && ! defined(__MINGW64_VERSION_MAJOR)
+__declspec(dllimport) int rand_s(unsigned int *);
+# endif
+
/* Obtain entropy on Windows using the rand_s() function which
* generates cryptographically secure random numbers. Internally it
* uses RtlGenRandom API which is present in Windows XP and later.
@@ -789,9 +940,8 @@ gather_time_entropy(void) {
static unsigned long
ENTROPY_DEBUG(const char *label, unsigned long entropy) {
- const char *const EXPAT_ENTROPY_DEBUG = getenv("EXPAT_ENTROPY_DEBUG");
- if (EXPAT_ENTROPY_DEBUG && ! strcmp(EXPAT_ENTROPY_DEBUG, "1")) {
- fprintf(stderr, "Entropy: %s --> 0x%0*lx (%lu bytes)\n", label,
+ if (getDebugLevel("EXPAT_ENTROPY_DEBUG", 0) >= 1u) {
+ fprintf(stderr, "expat: Entropy: %s --> 0x%0*lx (%lu bytes)\n", label,
(int)sizeof(entropy) * 2, entropy, (unsigned long)sizeof(entropy));
}
return entropy;
@@ -847,6 +997,49 @@ get_hash_secret_salt(XML_Parser parser) {
return parser->m_hash_secret_salt;
}
+static enum XML_Error
+callProcessor(XML_Parser parser, const char *start, const char *end,
+ const char **endPtr) {
+ const size_t have_now = EXPAT_SAFE_PTR_DIFF(end, start);
+
+ if (parser->m_reparseDeferralEnabled
+ && ! parser->m_parsingStatus.finalBuffer) {
+ // Heuristic: don't try to parse a partial token again until the amount of
+ // available data has increased significantly.
+ const size_t had_before = parser->m_partialTokenBytesBefore;
+ // ...but *do* try anyway if we're close to causing a reallocation.
+ size_t available_buffer
+ = EXPAT_SAFE_PTR_DIFF(parser->m_bufferPtr, parser->m_buffer);
+#if XML_CONTEXT_BYTES > 0
+ available_buffer -= EXPAT_MIN(available_buffer, XML_CONTEXT_BYTES);
+#endif
+ available_buffer
+ += EXPAT_SAFE_PTR_DIFF(parser->m_bufferLim, parser->m_bufferEnd);
+ // m_lastBufferRequestSize is never assigned a value < 0, so the cast is ok
+ const bool enough
+ = (have_now >= 2 * had_before)
+ || ((size_t)parser->m_lastBufferRequestSize > available_buffer);
+
+ if (! enough) {
+ *endPtr = start; // callers may expect this to be set
+ return XML_ERROR_NONE;
+ }
+ }
+#if defined(XML_TESTING)
+ g_bytesScanned += (unsigned)have_now;
+#endif
+ const enum XML_Error ret = parser->m_processor(parser, start, end, endPtr);
+ if (ret == XML_ERROR_NONE) {
+ // if we consumed nothing, remember what we had on this parse attempt.
+ if (*endPtr == start) {
+ parser->m_partialTokenBytesBefore = have_now;
+ } else {
+ parser->m_partialTokenBytesBefore = 0;
+ }
+ }
+ return ret;
+}
+
static XML_Bool /* only valid for root parser */
startParsing(XML_Parser parser) {
/* hash functions must be initialized before setContext() is called */
@@ -876,7 +1069,7 @@ parserCreate(const XML_Char *encodingName,
if (memsuite) {
XML_Memory_Handling_Suite *mtemp;
- parser = (XML_Parser)memsuite->malloc_fcn(sizeof(struct XML_ParserStruct));
+ parser = memsuite->malloc_fcn(sizeof(struct XML_ParserStruct));
if (parser != NULL) {
mtemp = (XML_Memory_Handling_Suite *)&(parser->m_mem);
mtemp->malloc_fcn = memsuite->malloc_fcn;
@@ -968,6 +1161,14 @@ parserCreate(const XML_Char *encodingName,
parserInit(parser, encodingName);
if (encodingName && ! parser->m_protocolEncodingName) {
+ if (dtd) {
+ // We need to stop the upcoming call to XML_ParserFree from happily
+ // destroying parser->m_dtd because the DTD is shared with the parent
+ // parser and the only guard that keeps XML_ParserFree from destroying
+ // parser->m_dtd is parser->m_isParamEntity but it will be set to
+ // XML_TRUE only later in XML_ExternalEntityParserCreate (or not at all).
+ parser->m_dtd = NULL;
+ }
XML_ParserFree(parser);
return NULL;
}
@@ -1020,6 +1221,9 @@ parserInit(XML_Parser parser, const XML_Char *encodingName) {
parser->m_bufferEnd = parser->m_buffer;
parser->m_parseEndByteIndex = 0;
parser->m_parseEndPtr = NULL;
+ parser->m_partialTokenBytesBefore = 0;
+ parser->m_reparseDeferralEnabled = g_reparseDeferralEnabledDefault;
+ parser->m_lastBufferRequestSize = 0;
parser->m_declElementType = NULL;
parser->m_declAttributeId = NULL;
parser->m_declEntity = NULL;
@@ -1053,6 +1257,18 @@ parserInit(XML_Parser parser, const XML_Char *encodingName) {
parser->m_paramEntityParsing = XML_PARAM_ENTITY_PARSING_NEVER;
#endif
parser->m_hash_secret_salt = 0;
+
+#if XML_GE == 1
+ memset(&parser->m_accounting, 0, sizeof(ACCOUNTING));
+ parser->m_accounting.debugLevel = getDebugLevel("EXPAT_ACCOUNTING_DEBUG", 0u);
+ parser->m_accounting.maximumAmplificationFactor
+ = EXPAT_BILLION_LAUGHS_ATTACK_PROTECTION_MAXIMUM_AMPLIFICATION_DEFAULT;
+ parser->m_accounting.activationThresholdBytes
+ = EXPAT_BILLION_LAUGHS_ATTACK_PROTECTION_ACTIVATION_THRESHOLD_DEFAULT;
+
+ memset(&parser->m_entity_stats, 0, sizeof(ENTITY_STATS));
+ parser->m_entity_stats.debugLevel = getDebugLevel("EXPAT_ENTITY_DEBUG", 0u);
+#endif
}
/* moves list of bindings to m_freeBindingList */
@@ -1177,6 +1393,7 @@ XML_ExternalEntityParserCreate(XML_Parser oldParser, const XML_Char *context,
to worry which hash secrets each table has.
*/
unsigned long oldhash_secret_salt;
+ XML_Bool oldReparseDeferralEnabled;
/* Validate the oldParser parameter before we pull everything out of it */
if (oldParser == NULL)
@@ -1221,6 +1438,7 @@ XML_ExternalEntityParserCreate(XML_Parser oldParser, const XML_Char *context,
to worry which hash secrets each table has.
*/
oldhash_secret_salt = parser->m_hash_secret_salt;
+ oldReparseDeferralEnabled = parser->m_reparseDeferralEnabled;
#ifdef XML_DTD
if (! context)
@@ -1233,8 +1451,7 @@ XML_ExternalEntityParserCreate(XML_Parser oldParser, const XML_Char *context,
would be otherwise.
*/
if (parser->m_ns) {
- XML_Char tmp[2];
- *tmp = parser->m_namespaceSeparator;
+ XML_Char tmp[2] = {parser->m_namespaceSeparator, 0};
parser = parserCreate(encodingName, &parser->m_mem, tmp, newDtd);
} else {
parser = parserCreate(encodingName, &parser->m_mem, NULL, newDtd);
@@ -1274,6 +1491,7 @@ XML_ExternalEntityParserCreate(XML_Parser oldParser, const XML_Char *context,
parser->m_defaultExpandInternalEntities = oldDefaultExpandInternalEntities;
parser->m_ns_triplets = oldns_triplets;
parser->m_hash_secret_salt = oldhash_secret_salt;
+ parser->m_reparseDeferralEnabled = oldReparseDeferralEnabled;
parser->m_parentParser = oldParser;
#ifdef XML_DTD
parser->m_paramEntityParsing = oldParamEntityParsing;
@@ -1399,6 +1617,7 @@ XML_UseForeignDTD(XML_Parser parser, XML_Bool useDTD) {
parser->m_useForeignDTD = useDTD;
return XML_ERROR_NONE;
#else
+ UNUSED_P(useDTD);
return XML_ERROR_FEATURE_REQUIRES_XML_DTD;
#endif
}
@@ -1727,71 +1946,27 @@ XML_Parse(XML_Parser parser, const char *s, int len, int isFinal) {
parser->m_parsingStatus.parsing = XML_PARSING;
}
- if (len == 0) {
- parser->m_parsingStatus.finalBuffer = (XML_Bool)isFinal;
- if (! isFinal)
- return XML_STATUS_OK;
- parser->m_positionPtr = parser->m_bufferPtr;
- parser->m_parseEndPtr = parser->m_bufferEnd;
-
- /* If data are left over from last buffer, and we now know that these
- data are the final chunk of input, then we have to check them again
- to detect errors based on that fact.
- */
- parser->m_errorCode
- = parser->m_processor(parser, parser->m_bufferPtr,
- parser->m_parseEndPtr, &parser->m_bufferPtr);
-
- if (parser->m_errorCode == XML_ERROR_NONE) {
- switch (parser->m_parsingStatus.parsing) {
- case XML_SUSPENDED:
- /* It is hard to be certain, but it seems that this case
- * cannot occur. This code is cleaning up a previous parse
- * with no new data (since len == 0). Changing the parsing
- * state requires getting to execute a handler function, and
- * there doesn't seem to be an opportunity for that while in
- * this circumstance.
- *
- * Given the uncertainty, we retain the code but exclude it
- * from coverage tests.
- *
- * LCOV_EXCL_START
- */
- XmlUpdatePosition(parser->m_encoding, parser->m_positionPtr,
- parser->m_bufferPtr, &parser->m_position);
- parser->m_positionPtr = parser->m_bufferPtr;
- return XML_STATUS_SUSPENDED;
- /* LCOV_EXCL_STOP */
- case XML_INITIALIZED:
- case XML_PARSING:
- parser->m_parsingStatus.parsing = XML_FINISHED;
- /* fall through */
- default:
- return XML_STATUS_OK;
- }
- }
- parser->m_eventEndPtr = parser->m_eventPtr;
- parser->m_processor = errorProcessor;
- return XML_STATUS_ERROR;
- }
-#ifndef XML_CONTEXT_BYTES
- else if (parser->m_bufferPtr == parser->m_bufferEnd) {
+#if XML_CONTEXT_BYTES == 0
+ if (parser->m_bufferPtr == parser->m_bufferEnd) {
const char *end;
int nLeftOver;
enum XML_Status result;
/* Detect overflow (a+b > MAX <==> b > MAX-a) */
- if (len > ((XML_Size)-1) / 2 - parser->m_parseEndByteIndex) {
+ if ((XML_Size)len > ((XML_Size)-1) / 2 - parser->m_parseEndByteIndex) {
parser->m_errorCode = XML_ERROR_NO_MEMORY;
parser->m_eventPtr = parser->m_eventEndPtr = NULL;
parser->m_processor = errorProcessor;
return XML_STATUS_ERROR;
}
+ // though this isn't a buffer request, we assume that `len` is the app's
+ // preferred buffer fill size, and therefore save it here.
+ parser->m_lastBufferRequestSize = len;
parser->m_parseEndByteIndex += len;
parser->m_positionPtr = s;
parser->m_parsingStatus.finalBuffer = (XML_Bool)isFinal;
parser->m_errorCode
- = parser->m_processor(parser, s, parser->m_parseEndPtr = s + len, &end);
+ = callProcessor(parser, s, parser->m_parseEndPtr = s + len, &end);
if (parser->m_errorCode != XML_ERROR_NONE) {
parser->m_eventEndPtr = parser->m_eventPtr;
@@ -1818,23 +1993,25 @@ XML_Parse(XML_Parser parser, const char *s, int len, int isFinal) {
&parser->m_position);
nLeftOver = s + len - end;
if (nLeftOver) {
- if (parser->m_buffer == NULL
- || nLeftOver > parser->m_bufferLim - parser->m_buffer) {
- /* avoid _signed_ integer overflow */
- char *temp = NULL;
- const int bytesToAllocate = (int)((unsigned)len * 2U);
- if (bytesToAllocate > 0) {
- temp = (char *)REALLOC(parser, parser->m_buffer, bytesToAllocate);
- }
- if (temp == NULL) {
- parser->m_errorCode = XML_ERROR_NO_MEMORY;
- parser->m_eventPtr = parser->m_eventEndPtr = NULL;
- parser->m_processor = errorProcessor;
- return XML_STATUS_ERROR;
- }
- parser->m_buffer = temp;
- parser->m_bufferLim = parser->m_buffer + bytesToAllocate;
+ // Back up and restore the parsing status to avoid XML_ERROR_SUSPENDED
+ // (and XML_ERROR_FINISHED) from XML_GetBuffer.
+ const enum XML_Parsing originalStatus = parser->m_parsingStatus.parsing;
+ parser->m_parsingStatus.parsing = XML_PARSING;
+ void *const temp = XML_GetBuffer(parser, nLeftOver);
+ parser->m_parsingStatus.parsing = originalStatus;
+ // GetBuffer may have overwritten this, but we want to remember what the
+ // app requested, not how many bytes were left over after parsing.
+ parser->m_lastBufferRequestSize = len;
+ if (temp == NULL) {
+ // NOTE: parser->m_errorCode has already been set by XML_GetBuffer().
+ parser->m_eventPtr = parser->m_eventEndPtr = NULL;
+ parser->m_processor = errorProcessor;
+ return XML_STATUS_ERROR;
}
+ // Since we know that the buffer was empty and XML_CONTEXT_BYTES is 0, we
+ // don't have any data to preserve, and can copy straight into the start
+ // of the buffer rather than the GetBuffer return pointer (which may be
+ // pointing further into the allocated buffer).
memcpy(parser->m_buffer, end, nLeftOver);
}
parser->m_bufferPtr = parser->m_buffer;
@@ -1845,16 +2022,15 @@ XML_Parse(XML_Parser parser, const char *s, int len, int isFinal) {
parser->m_eventEndPtr = parser->m_bufferPtr;
return result;
}
-#endif /* not defined XML_CONTEXT_BYTES */
- else {
- void *buff = XML_GetBuffer(parser, len);
- if (buff == NULL)
- return XML_STATUS_ERROR;
- else {
- memcpy(buff, s, len);
- return XML_ParseBuffer(parser, len, isFinal);
- }
+#endif /* XML_CONTEXT_BYTES == 0 */
+ void *buff = XML_GetBuffer(parser, len);
+ if (buff == NULL)
+ return XML_STATUS_ERROR;
+ if (len > 0) {
+ assert(s != NULL); // make sure s==NULL && len!=0 was rejected above
+ memcpy(buff, s, len);
}
+ return XML_ParseBuffer(parser, len, isFinal);
}
enum XML_Status XMLCALL
@@ -1864,6 +2040,12 @@ XML_ParseBuffer(XML_Parser parser, int len, int isFinal) {
if (parser == NULL)
return XML_STATUS_ERROR;
+
+ if (len < 0) {
+ parser->m_errorCode = XML_ERROR_INVALID_ARGUMENT;
+ return XML_STATUS_ERROR;
+ }
+
switch (parser->m_parsingStatus.parsing) {
case XML_SUSPENDED:
parser->m_errorCode = XML_ERROR_SUSPENDED;
@@ -1872,6 +2054,12 @@ XML_ParseBuffer(XML_Parser parser, int len, int isFinal) {
parser->m_errorCode = XML_ERROR_FINISHED;
return XML_STATUS_ERROR;
case XML_INITIALIZED:
+ /* Has someone called XML_GetBuffer successfully before? */
+ if (! parser->m_bufferPtr) {
+ parser->m_errorCode = XML_ERROR_NO_BUFFER;
+ return XML_STATUS_ERROR;
+ }
+
if (parser->m_parentParser == NULL && ! startParsing(parser)) {
parser->m_errorCode = XML_ERROR_NO_MEMORY;
return XML_STATUS_ERROR;
@@ -1888,8 +2076,8 @@ XML_ParseBuffer(XML_Parser parser, int len, int isFinal) {
parser->m_parseEndByteIndex += len;
parser->m_parsingStatus.finalBuffer = (XML_Bool)isFinal;
- parser->m_errorCode = parser->m_processor(
- parser, start, parser->m_parseEndPtr, &parser->m_bufferPtr);
+ parser->m_errorCode = callProcessor(parser, start, parser->m_parseEndPtr,
+ &parser->m_bufferPtr);
if (parser->m_errorCode != XML_ERROR_NONE) {
parser->m_eventEndPtr = parser->m_eventPtr;
@@ -1934,10 +2122,14 @@ XML_GetBuffer(XML_Parser parser, int len) {
default:;
}
- if (len > EXPAT_SAFE_PTR_DIFF(parser->m_bufferLim, parser->m_bufferEnd)) {
-#ifdef XML_CONTEXT_BYTES
+ // whether or not the request succeeds, `len` seems to be the app's preferred
+ // buffer fill size; remember it.
+ parser->m_lastBufferRequestSize = len;
+ if (len > EXPAT_SAFE_PTR_DIFF(parser->m_bufferLim, parser->m_bufferEnd)
+ || parser->m_buffer == NULL) {
+#if XML_CONTEXT_BYTES > 0
int keep;
-#endif /* defined XML_CONTEXT_BYTES */
+#endif /* XML_CONTEXT_BYTES > 0 */
/* Do not invoke signed arithmetic overflow: */
int neededSize = (int)((unsigned)len
+ (unsigned)EXPAT_SAFE_PTR_DIFF(
@@ -1946,15 +2138,21 @@ XML_GetBuffer(XML_Parser parser, int len) {
parser->m_errorCode = XML_ERROR_NO_MEMORY;
return NULL;
}
-#ifdef XML_CONTEXT_BYTES
+#if XML_CONTEXT_BYTES > 0
keep = (int)EXPAT_SAFE_PTR_DIFF(parser->m_bufferPtr, parser->m_buffer);
if (keep > XML_CONTEXT_BYTES)
keep = XML_CONTEXT_BYTES;
+ /* Detect and prevent integer overflow */
+ if (keep > INT_MAX - neededSize) {
+ parser->m_errorCode = XML_ERROR_NO_MEMORY;
+ return NULL;
+ }
neededSize += keep;
-#endif /* defined XML_CONTEXT_BYTES */
- if (neededSize
- <= EXPAT_SAFE_PTR_DIFF(parser->m_bufferLim, parser->m_buffer)) {
-#ifdef XML_CONTEXT_BYTES
+#endif /* XML_CONTEXT_BYTES > 0 */
+ if (parser->m_buffer && parser->m_bufferPtr
+ && neededSize
+ <= EXPAT_SAFE_PTR_DIFF(parser->m_bufferLim, parser->m_buffer)) {
+#if XML_CONTEXT_BYTES > 0
if (keep < EXPAT_SAFE_PTR_DIFF(parser->m_bufferPtr, parser->m_buffer)) {
int offset
= (int)EXPAT_SAFE_PTR_DIFF(parser->m_bufferPtr, parser->m_buffer)
@@ -1967,19 +2165,17 @@ XML_GetBuffer(XML_Parser parser, int len) {
parser->m_bufferPtr -= offset;
}
#else
- if (parser->m_buffer && parser->m_bufferPtr) {
- memmove(parser->m_buffer, parser->m_bufferPtr,
- EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr));
- parser->m_bufferEnd
- = parser->m_buffer
- + EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr);
- parser->m_bufferPtr = parser->m_buffer;
- }
-#endif /* not defined XML_CONTEXT_BYTES */
+ memmove(parser->m_buffer, parser->m_bufferPtr,
+ EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr));
+ parser->m_bufferEnd
+ = parser->m_buffer
+ + EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr);
+ parser->m_bufferPtr = parser->m_buffer;
+#endif /* XML_CONTEXT_BYTES > 0 */
} else {
char *newBuf;
int bufferSize
- = (int)EXPAT_SAFE_PTR_DIFF(parser->m_bufferLim, parser->m_bufferPtr);
+ = (int)EXPAT_SAFE_PTR_DIFF(parser->m_bufferLim, parser->m_buffer);
if (bufferSize == 0)
bufferSize = INIT_BUFFER_SIZE;
do {
@@ -1996,7 +2192,7 @@ XML_GetBuffer(XML_Parser parser, int len) {
return NULL;
}
parser->m_bufferLim = newBuf + bufferSize;
-#ifdef XML_CONTEXT_BYTES
+#if XML_CONTEXT_BYTES > 0
if (parser->m_bufferPtr) {
memcpy(newBuf, &parser->m_bufferPtr[-keep],
EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr)
@@ -2026,7 +2222,7 @@ XML_GetBuffer(XML_Parser parser, int len) {
parser->m_bufferEnd = newBuf;
}
parser->m_bufferPtr = parser->m_buffer = newBuf;
-#endif /* not defined XML_CONTEXT_BYTES */
+#endif /* XML_CONTEXT_BYTES > 0 */
}
parser->m_eventPtr = parser->m_eventEndPtr = NULL;
parser->m_positionPtr = NULL;
@@ -2039,6 +2235,9 @@ XML_StopParser(XML_Parser parser, XML_Bool resumable) {
if (parser == NULL)
return XML_STATUS_ERROR;
switch (parser->m_parsingStatus.parsing) {
+ case XML_INITIALIZED:
+ parser->m_errorCode = XML_ERROR_NOT_STARTED;
+ return XML_STATUS_ERROR;
case XML_SUSPENDED:
if (resumable) {
parser->m_errorCode = XML_ERROR_SUSPENDED;
@@ -2049,7 +2248,7 @@ XML_StopParser(XML_Parser parser, XML_Bool resumable) {
case XML_FINISHED:
parser->m_errorCode = XML_ERROR_FINISHED;
return XML_STATUS_ERROR;
- default:
+ case XML_PARSING:
if (resumable) {
#ifdef XML_DTD
if (parser->m_isParamEntity) {
@@ -2060,6 +2259,9 @@ XML_StopParser(XML_Parser parser, XML_Bool resumable) {
parser->m_parsingStatus.parsing = XML_SUSPENDED;
} else
parser->m_parsingStatus.parsing = XML_FINISHED;
+ break;
+ default:
+ assert(0);
}
return XML_STATUS_OK;
}
@@ -2076,7 +2278,7 @@ XML_ResumeParser(XML_Parser parser) {
}
parser->m_parsingStatus.parsing = XML_PARSING;
- parser->m_errorCode = parser->m_processor(
+ parser->m_errorCode = callProcessor(
parser, parser->m_bufferPtr, parser->m_parseEndPtr, &parser->m_bufferPtr);
if (parser->m_errorCode != XML_ERROR_NONE) {
@@ -2140,7 +2342,7 @@ XML_GetCurrentByteCount(XML_Parser parser) {
const char *XMLCALL
XML_GetInputContext(XML_Parser parser, int *offset, int *size) {
-#ifdef XML_CONTEXT_BYTES
+#if XML_CONTEXT_BYTES > 0
if (parser == NULL)
return NULL;
if (parser->m_eventPtr && parser->m_buffer) {
@@ -2154,8 +2356,8 @@ XML_GetInputContext(XML_Parser parser, int *offset, int *size) {
(void)parser;
(void)offset;
(void)size;
-#endif /* defined XML_CONTEXT_BYTES */
- return (char *)0;
+#endif /* XML_CONTEXT_BYTES > 0 */
+ return (const char *)0;
}
XML_Size XMLCALL
@@ -2316,6 +2518,17 @@ XML_ErrorString(enum XML_Error code) {
/* Added in 2.2.5. */
case XML_ERROR_INVALID_ARGUMENT: /* Constant added in 2.2.1, already */
return XML_L("invalid argument");
+ /* Added in 2.3.0. */
+ case XML_ERROR_NO_BUFFER:
+ return XML_L(
+ "a successful prior call to function XML_GetBuffer is required");
+ /* Added in 2.4.0. */
+ case XML_ERROR_AMPLIFICATION_LIMIT_BREACH:
+ return XML_L(
+ "limit on input amplification factor (from DTD and entities) breached");
+ /* Added in 2.6.4. */
+ case XML_ERROR_NOT_STARTED:
+ return XML_L("parser not started");
}
return NULL;
}
@@ -2352,41 +2565,87 @@ XML_ExpatVersionInfo(void) {
const XML_Feature *XMLCALL
XML_GetFeatureList(void) {
- static const XML_Feature features[]
- = {{XML_FEATURE_SIZEOF_XML_CHAR, XML_L("sizeof(XML_Char)"),
- sizeof(XML_Char)},
- {XML_FEATURE_SIZEOF_XML_LCHAR, XML_L("sizeof(XML_LChar)"),
- sizeof(XML_LChar)},
+ static const XML_Feature features[] = {
+ {XML_FEATURE_SIZEOF_XML_CHAR, XML_L("sizeof(XML_Char)"),
+ sizeof(XML_Char)},
+ {XML_FEATURE_SIZEOF_XML_LCHAR, XML_L("sizeof(XML_LChar)"),
+ sizeof(XML_LChar)},
#ifdef XML_UNICODE
- {XML_FEATURE_UNICODE, XML_L("XML_UNICODE"), 0},
+ {XML_FEATURE_UNICODE, XML_L("XML_UNICODE"), 0},
#endif
#ifdef XML_UNICODE_WCHAR_T
- {XML_FEATURE_UNICODE_WCHAR_T, XML_L("XML_UNICODE_WCHAR_T"), 0},
+ {XML_FEATURE_UNICODE_WCHAR_T, XML_L("XML_UNICODE_WCHAR_T"), 0},
#endif
#ifdef XML_DTD
- {XML_FEATURE_DTD, XML_L("XML_DTD"), 0},
+ {XML_FEATURE_DTD, XML_L("XML_DTD"), 0},
#endif
-#ifdef XML_CONTEXT_BYTES
- {XML_FEATURE_CONTEXT_BYTES, XML_L("XML_CONTEXT_BYTES"),
- XML_CONTEXT_BYTES},
+#if XML_CONTEXT_BYTES > 0
+ {XML_FEATURE_CONTEXT_BYTES, XML_L("XML_CONTEXT_BYTES"),
+ XML_CONTEXT_BYTES},
#endif
#ifdef XML_MIN_SIZE
- {XML_FEATURE_MIN_SIZE, XML_L("XML_MIN_SIZE"), 0},
+ {XML_FEATURE_MIN_SIZE, XML_L("XML_MIN_SIZE"), 0},
#endif
#ifdef XML_NS
- {XML_FEATURE_NS, XML_L("XML_NS"), 0},
+ {XML_FEATURE_NS, XML_L("XML_NS"), 0},
#endif
#ifdef XML_LARGE_SIZE
- {XML_FEATURE_LARGE_SIZE, XML_L("XML_LARGE_SIZE"), 0},
+ {XML_FEATURE_LARGE_SIZE, XML_L("XML_LARGE_SIZE"), 0},
#endif
#ifdef XML_ATTR_INFO
- {XML_FEATURE_ATTR_INFO, XML_L("XML_ATTR_INFO"), 0},
+ {XML_FEATURE_ATTR_INFO, XML_L("XML_ATTR_INFO"), 0},
#endif
- {XML_FEATURE_END, NULL, 0}};
+#if XML_GE == 1
+ /* Added in Expat 2.4.0 for XML_DTD defined and
+ * added in Expat 2.6.0 for XML_GE == 1. */
+ {XML_FEATURE_BILLION_LAUGHS_ATTACK_PROTECTION_MAXIMUM_AMPLIFICATION_DEFAULT,
+ XML_L("XML_BLAP_MAX_AMP"),
+ (long int)
+ EXPAT_BILLION_LAUGHS_ATTACK_PROTECTION_MAXIMUM_AMPLIFICATION_DEFAULT},
+ {XML_FEATURE_BILLION_LAUGHS_ATTACK_PROTECTION_ACTIVATION_THRESHOLD_DEFAULT,
+ XML_L("XML_BLAP_ACT_THRES"),
+ EXPAT_BILLION_LAUGHS_ATTACK_PROTECTION_ACTIVATION_THRESHOLD_DEFAULT},
+ /* Added in Expat 2.6.0. */
+ {XML_FEATURE_GE, XML_L("XML_GE"), 0},
+#endif
+ {XML_FEATURE_END, NULL, 0}};
return features;
}
+#if XML_GE == 1
+XML_Bool XMLCALL
+XML_SetBillionLaughsAttackProtectionMaximumAmplification(
+ XML_Parser parser, float maximumAmplificationFactor) {
+ if ((parser == NULL) || (parser->m_parentParser != NULL)
+ || isnan(maximumAmplificationFactor)
+ || (maximumAmplificationFactor < 1.0f)) {
+ return XML_FALSE;
+ }
+ parser->m_accounting.maximumAmplificationFactor = maximumAmplificationFactor;
+ return XML_TRUE;
+}
+
+XML_Bool XMLCALL
+XML_SetBillionLaughsAttackProtectionActivationThreshold(
+ XML_Parser parser, unsigned long long activationThresholdBytes) {
+ if ((parser == NULL) || (parser->m_parentParser != NULL)) {
+ return XML_FALSE;
+ }
+ parser->m_accounting.activationThresholdBytes = activationThresholdBytes;
+ return XML_TRUE;
+}
+#endif /* XML_GE == 1 */
+
+XML_Bool XMLCALL
+XML_SetReparseDeferralEnabled(XML_Parser parser, XML_Bool enabled) {
+ if (parser != NULL && (enabled == XML_TRUE || enabled == XML_FALSE)) {
+ parser->m_reparseDeferralEnabled = enabled;
+ return XML_TRUE;
+ }
+ return XML_FALSE;
+}
+
/* Initially tag->rawName always points into the parse buffer;
for those TAG instances opened while the current parse buffer was
processed, and not yet closed, we need to store tag->rawName in a more
@@ -2398,6 +2657,7 @@ storeRawNames(XML_Parser parser) {
while (tag) {
int bufSize;
int nameLen = sizeof(XML_Char) * (tag->name.strLen + 1);
+ size_t rawNameLen;
char *rawNameBuf = tag->buf + nameLen;
/* Stop if already stored. Since m_tagStack is a stack, we can stop
at the first entry that has already been copied; everything
@@ -2406,10 +2666,14 @@ storeRawNames(XML_Parser parser) {
*/
if (tag->rawName == rawNameBuf)
break;
- /* For re-use purposes we need to ensure that the
+ /* For reuse purposes we need to ensure that the
size of tag->buf is a multiple of sizeof(XML_Char).
*/
- bufSize = nameLen + ROUND_UP(tag->rawNameLength, sizeof(XML_Char));
+ rawNameLen = ROUND_UP(tag->rawNameLength, sizeof(XML_Char));
+ /* Detect and prevent integer overflow. */
+ if (rawNameLen > (size_t)INT_MAX - nameLen)
+ return XML_FALSE;
+ bufSize = nameLen + (int)rawNameLen;
if (bufSize > tag->bufEnd - tag->buf) {
char *temp = (char *)REALLOC(parser, tag->buf, bufSize);
if (temp == NULL)
@@ -2439,9 +2703,9 @@ storeRawNames(XML_Parser parser) {
static enum XML_Error PTRCALL
contentProcessor(XML_Parser parser, const char *start, const char *end,
const char **endPtr) {
- enum XML_Error result
- = doContent(parser, 0, parser->m_encoding, start, end, endPtr,
- (XML_Bool)! parser->m_parsingStatus.finalBuffer);
+ enum XML_Error result = doContent(
+ parser, 0, parser->m_encoding, start, end, endPtr,
+ (XML_Bool)! parser->m_parsingStatus.finalBuffer, XML_ACCOUNT_DIRECT);
if (result == XML_ERROR_NONE) {
if (! storeRawNames(parser))
return XML_ERROR_NO_MEMORY;
@@ -2466,6 +2730,14 @@ externalEntityInitProcessor2(XML_Parser parser, const char *start,
int tok = XmlContentTok(parser->m_encoding, start, end, &next);
switch (tok) {
case XML_TOK_BOM:
+#if XML_GE == 1
+ if (! accountingDiffTolerated(parser, tok, start, next, __LINE__,
+ XML_ACCOUNT_DIRECT)) {
+ accountingOnAbort(parser);
+ return XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
+ }
+#endif /* XML_GE == 1 */
+
/* If we are at the end of the buffer, this would cause the next stage,
i.e. externalEntityInitProcessor3, to pass control directly to
doContent (by detecting XML_TOK_NONE) without processing any xml text
@@ -2503,6 +2775,10 @@ externalEntityInitProcessor3(XML_Parser parser, const char *start,
const char *next = start; /* XmlContentTok doesn't always set the last arg */
parser->m_eventPtr = start;
tok = XmlContentTok(parser->m_encoding, start, end, &next);
+ /* Note: These bytes are accounted later in:
+ - processXmlDecl
+ - externalEntityContentProcessor
+ */
parser->m_eventEndPtr = next;
switch (tok) {
@@ -2544,7 +2820,8 @@ externalEntityContentProcessor(XML_Parser parser, const char *start,
const char *end, const char **endPtr) {
enum XML_Error result
= doContent(parser, 1, parser->m_encoding, start, end, endPtr,
- (XML_Bool)! parser->m_parsingStatus.finalBuffer);
+ (XML_Bool)! parser->m_parsingStatus.finalBuffer,
+ XML_ACCOUNT_ENTITY_EXPANSION);
if (result == XML_ERROR_NONE) {
if (! storeRawNames(parser))
return XML_ERROR_NO_MEMORY;
@@ -2555,7 +2832,7 @@ externalEntityContentProcessor(XML_Parser parser, const char *start,
static enum XML_Error
doContent(XML_Parser parser, int startTagLevel, const ENCODING *enc,
const char *s, const char *end, const char **nextPtr,
- XML_Bool haveMore) {
+ XML_Bool haveMore, enum XML_Account account) {
/* save one level of indirection */
DTD *const dtd = parser->m_dtd;
@@ -2573,6 +2850,17 @@ doContent(XML_Parser parser, int startTagLevel, const ENCODING *enc,
for (;;) {
const char *next = s; /* XmlContentTok doesn't always set the last arg */
int tok = XmlContentTok(enc, s, end, &next);
+#if XML_GE == 1
+ const char *accountAfter
+ = ((tok == XML_TOK_TRAILING_RSQB) || (tok == XML_TOK_TRAILING_CR))
+ ? (haveMore ? s /* i.e. 0 bytes */ : end)
+ : next;
+ if (! accountingDiffTolerated(parser, tok, s, accountAfter, __LINE__,
+ account)) {
+ accountingOnAbort(parser);
+ return XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
+ }
+#endif
*eventEndPP = next;
switch (tok) {
case XML_TOK_TRAILING_CR:
@@ -2628,6 +2916,14 @@ doContent(XML_Parser parser, int startTagLevel, const ENCODING *enc,
XML_Char ch = (XML_Char)XmlPredefinedEntityName(
enc, s + enc->minBytesPerChar, next - enc->minBytesPerChar);
if (ch) {
+#if XML_GE == 1
+ /* NOTE: We are replacing 4-6 characters original input for 1 character
+ * so there is no amplification and hence recording without
+ * protection. */
+ accountingDiffTolerated(parser, tok, (char *)&ch,
+ ((char *)&ch) + sizeof(XML_Char), __LINE__,
+ XML_ACCOUNT_ENTITY_EXPANSION);
+#endif /* XML_GE == 1 */
if (parser->m_characterDataHandler)
parser->m_characterDataHandler(parser->m_handlerArg, &ch, 1);
else if (parser->m_defaultHandler)
@@ -2746,7 +3042,8 @@ doContent(XML_Parser parser, int startTagLevel, const ENCODING *enc,
}
tag->name.str = (XML_Char *)tag->buf;
*toPtr = XML_T('\0');
- result = storeAtts(parser, enc, s, &(tag->name), &(tag->bindings));
+ result
+ = storeAtts(parser, enc, s, &(tag->name), &(tag->bindings), account);
if (result)
return result;
if (parser->m_startElementHandler)
@@ -2770,7 +3067,8 @@ doContent(XML_Parser parser, int startTagLevel, const ENCODING *enc,
if (! name.str)
return XML_ERROR_NO_MEMORY;
poolFinish(&parser->m_tempPool);
- result = storeAtts(parser, enc, s, &name, &bindings);
+ result = storeAtts(parser, enc, s, &name, &bindings,
+ XML_ACCOUNT_NONE /* token spans whole start tag */);
if (result != XML_ERROR_NONE) {
freeBindings(parser, bindings);
return result;
@@ -2807,9 +3105,6 @@ doContent(XML_Parser parser, int startTagLevel, const ENCODING *enc,
int len;
const char *rawName;
TAG *tag = parser->m_tagStack;
- parser->m_tagStack = tag->parent;
- tag->parent = parser->m_freeTagList;
- parser->m_freeTagList = tag;
rawName = s + enc->minBytesPerChar * 2;
len = XmlNameLength(enc, rawName);
if (len != tag->rawNameLength
@@ -2817,6 +3112,9 @@ doContent(XML_Parser parser, int startTagLevel, const ENCODING *enc,
*eventPP = rawName;
return XML_ERROR_TAG_MISMATCH;
}
+ parser->m_tagStack = tag->parent;
+ tag->parent = parser->m_freeTagList;
+ parser->m_freeTagList = tag;
--parser->m_tagLevel;
if (parser->m_endElementHandler) {
const XML_Char *localPart;
@@ -2826,13 +3124,13 @@ doContent(XML_Parser parser, int startTagLevel, const ENCODING *enc,
if (parser->m_ns && localPart) {
/* localPart and prefix may have been overwritten in
tag->name.str, since this points to the binding->uri
- buffer which gets re-used; so we have to add them again
+ buffer which gets reused; so we have to add them again
*/
uri = (XML_Char *)tag->name.str + tag->name.uriLen;
/* don't need to check for space - already done in storeAtts() */
while (*localPart)
*uri++ = *localPart++;
- prefix = (XML_Char *)tag->name.prefix;
+ prefix = tag->name.prefix;
if (parser->m_ns_triplets && prefix) {
*uri++ = parser->m_namespaceSeparator;
while (*prefix)
@@ -2899,13 +3197,14 @@ doContent(XML_Parser parser, int startTagLevel, const ENCODING *enc,
However, now we have a start/endCdataSectionHandler, so it seems
easier to let the user deal with this.
*/
- else if (0 && parser->m_characterDataHandler)
+ else if ((0) && parser->m_characterDataHandler)
parser->m_characterDataHandler(parser->m_handlerArg, parser->m_dataBuf,
0);
/* END disabled code */
else if (parser->m_defaultHandler)
reportDefault(parser, enc, s, next);
- result = doCdataSection(parser, enc, &next, end, nextPtr, haveMore);
+ result
+ = doCdataSection(parser, enc, &next, end, nextPtr, haveMore, account);
if (result != XML_ERROR_NONE)
return result;
else if (! next) {
@@ -2927,8 +3226,8 @@ doContent(XML_Parser parser, int startTagLevel, const ENCODING *enc,
(int)(dataPtr - (ICHAR *)parser->m_dataBuf));
} else
parser->m_characterDataHandler(
- parser->m_handlerArg, (XML_Char *)s,
- (int)((XML_Char *)end - (XML_Char *)s));
+ parser->m_handlerArg, (const XML_Char *)s,
+ (int)((const XML_Char *)end - (const XML_Char *)s));
} else if (parser->m_defaultHandler)
reportDefault(parser, enc, s, end);
/* We are at the end of the final buffer, should we check for
@@ -2961,8 +3260,8 @@ doContent(XML_Parser parser, int startTagLevel, const ENCODING *enc,
*eventPP = s;
}
} else
- charDataHandler(parser->m_handlerArg, (XML_Char *)s,
- (int)((XML_Char *)next - (XML_Char *)s));
+ charDataHandler(parser->m_handlerArg, (const XML_Char *)s,
+ (int)((const XML_Char *)next - (const XML_Char *)s));
} else if (parser->m_defaultHandler)
reportDefault(parser, enc, s, next);
} break;
@@ -3034,7 +3333,8 @@ freeBindings(XML_Parser parser, BINDING *bindings) {
*/
static enum XML_Error
storeAtts(XML_Parser parser, const ENCODING *enc, const char *attStr,
- TAG_NAME *tagNamePtr, BINDING **bindingsPtr) {
+ TAG_NAME *tagNamePtr, BINDING **bindingsPtr,
+ enum XML_Account account) {
DTD *const dtd = parser->m_dtd; /* save one level of indirection */
ELEMENT_TYPE *elementType;
int nDefaultAtts;
@@ -3066,13 +3366,38 @@ storeAtts(XML_Parser parser, const ENCODING *enc, const char *attStr,
/* get the attributes from the tokenizer */
n = XmlGetAttributes(enc, attStr, parser->m_attsSize, parser->m_atts);
+
+ /* Detect and prevent integer overflow */
+ if (n > INT_MAX - nDefaultAtts) {
+ return XML_ERROR_NO_MEMORY;
+ }
+
if (n + nDefaultAtts > parser->m_attsSize) {
int oldAttsSize = parser->m_attsSize;
ATTRIBUTE *temp;
#ifdef XML_ATTR_INFO
XML_AttrInfo *temp2;
#endif
+
+ /* Detect and prevent integer overflow */
+ if ((nDefaultAtts > INT_MAX - INIT_ATTS_SIZE)
+ || (n > INT_MAX - (nDefaultAtts + INIT_ATTS_SIZE))) {
+ return XML_ERROR_NO_MEMORY;
+ }
+
parser->m_attsSize = n + nDefaultAtts + INIT_ATTS_SIZE;
+
+ /* Detect and prevent integer overflow.
+ * The preprocessor guard addresses the "always false" warning
+ * from -Wtype-limits on platforms where
+ * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
+#if UINT_MAX >= SIZE_MAX
+ if ((unsigned)parser->m_attsSize > (size_t)(-1) / sizeof(ATTRIBUTE)) {
+ parser->m_attsSize = oldAttsSize;
+ return XML_ERROR_NO_MEMORY;
+ }
+#endif
+
temp = (ATTRIBUTE *)REALLOC(parser, (void *)parser->m_atts,
parser->m_attsSize * sizeof(ATTRIBUTE));
if (temp == NULL) {
@@ -3081,6 +3406,17 @@ storeAtts(XML_Parser parser, const ENCODING *enc, const char *attStr,
}
parser->m_atts = temp;
#ifdef XML_ATTR_INFO
+ /* Detect and prevent integer overflow.
+ * The preprocessor guard addresses the "always false" warning
+ * from -Wtype-limits on platforms where
+ * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
+# if UINT_MAX >= SIZE_MAX
+ if ((unsigned)parser->m_attsSize > (size_t)(-1) / sizeof(XML_AttrInfo)) {
+ parser->m_attsSize = oldAttsSize;
+ return XML_ERROR_NO_MEMORY;
+ }
+# endif
+
temp2 = (XML_AttrInfo *)REALLOC(parser, (void *)parser->m_attInfo,
parser->m_attsSize * sizeof(XML_AttrInfo));
if (temp2 == NULL) {
@@ -3144,7 +3480,7 @@ storeAtts(XML_Parser parser, const ENCODING *enc, const char *attStr,
/* normalize the attribute value */
result = storeAttributeValue(
parser, enc, isCdata, parser->m_atts[i].valuePtr,
- parser->m_atts[i].valueEnd, &parser->m_tempPool);
+ parser->m_atts[i].valueEnd, &parser->m_tempPool, account);
if (result)
return result;
appAtts[attIndex] = poolStart(&parser->m_tempPool);
@@ -3219,7 +3555,13 @@ storeAtts(XML_Parser parser, const ENCODING *enc, const char *attStr,
if (nPrefixes) {
int j; /* hash table index */
unsigned long version = parser->m_nsAttsVersion;
- int nsAttsSize = (int)1 << parser->m_nsAttsPower;
+
+ /* Detect and prevent invalid shift */
+ if (parser->m_nsAttsPower >= sizeof(unsigned int) * 8 /* bits per byte */) {
+ return XML_ERROR_NO_MEMORY;
+ }
+
+ unsigned int nsAttsSize = 1u << parser->m_nsAttsPower;
unsigned char oldNsAttsPower = parser->m_nsAttsPower;
/* size of hash table must be at least 2 * (# of prefixed attributes) */
if ((nPrefixes << 1)
@@ -3230,7 +3572,28 @@ storeAtts(XML_Parser parser, const ENCODING *enc, const char *attStr,
;
if (parser->m_nsAttsPower < 3)
parser->m_nsAttsPower = 3;
- nsAttsSize = (int)1 << parser->m_nsAttsPower;
+
+ /* Detect and prevent invalid shift */
+ if (parser->m_nsAttsPower >= sizeof(nsAttsSize) * 8 /* bits per byte */) {
+ /* Restore actual size of memory in m_nsAtts */
+ parser->m_nsAttsPower = oldNsAttsPower;
+ return XML_ERROR_NO_MEMORY;
+ }
+
+ nsAttsSize = 1u << parser->m_nsAttsPower;
+
+ /* Detect and prevent integer overflow.
+ * The preprocessor guard addresses the "always false" warning
+ * from -Wtype-limits on platforms where
+ * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
+#if UINT_MAX >= SIZE_MAX
+ if (nsAttsSize > (size_t)(-1) / sizeof(NS_ATT)) {
+ /* Restore actual size of memory in m_nsAtts */
+ parser->m_nsAttsPower = oldNsAttsPower;
+ return XML_ERROR_NO_MEMORY;
+ }
+#endif
+
temp = (NS_ATT *)REALLOC(parser, parser->m_nsAtts,
nsAttsSize * sizeof(NS_ATT));
if (! temp) {
@@ -3388,9 +3751,31 @@ storeAtts(XML_Parser parser, const ENCODING *enc, const char *attStr,
tagNamePtr->prefixLen = prefixLen;
for (i = 0; localPart[i++];)
; /* i includes null terminator */
+
+ /* Detect and prevent integer overflow */
+ if (binding->uriLen > INT_MAX - prefixLen
+ || i > INT_MAX - (binding->uriLen + prefixLen)) {
+ return XML_ERROR_NO_MEMORY;
+ }
+
n = i + binding->uriLen + prefixLen;
if (n > binding->uriAlloc) {
TAG *p;
+
+ /* Detect and prevent integer overflow */
+ if (n > INT_MAX - EXPAND_SPARE) {
+ return XML_ERROR_NO_MEMORY;
+ }
+ /* Detect and prevent integer overflow.
+ * The preprocessor guard addresses the "always false" warning
+ * from -Wtype-limits on platforms where
+ * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
+#if UINT_MAX >= SIZE_MAX
+ if ((unsigned)(n + EXPAND_SPARE) > (size_t)(-1) / sizeof(XML_Char)) {
+ return XML_ERROR_NO_MEMORY;
+ }
+#endif
+
uri = (XML_Char *)MALLOC(parser, (n + EXPAND_SPARE) * sizeof(XML_Char));
if (! uri)
return XML_ERROR_NO_MEMORY;
@@ -3415,12 +3800,124 @@ storeAtts(XML_Parser parser, const ENCODING *enc, const char *attStr,
return XML_ERROR_NONE;
}
+static XML_Bool
+is_rfc3986_uri_char(XML_Char candidate) {
+ // For the RFC 3986 ANBF grammar see
+ // https://datatracker.ietf.org/doc/html/rfc3986#appendix-A
+
+ switch (candidate) {
+ // From rule "ALPHA" (uppercase half)
+ case 'A':
+ case 'B':
+ case 'C':
+ case 'D':
+ case 'E':
+ case 'F':
+ case 'G':
+ case 'H':
+ case 'I':
+ case 'J':
+ case 'K':
+ case 'L':
+ case 'M':
+ case 'N':
+ case 'O':
+ case 'P':
+ case 'Q':
+ case 'R':
+ case 'S':
+ case 'T':
+ case 'U':
+ case 'V':
+ case 'W':
+ case 'X':
+ case 'Y':
+ case 'Z':
+
+ // From rule "ALPHA" (lowercase half)
+ case 'a':
+ case 'b':
+ case 'c':
+ case 'd':
+ case 'e':
+ case 'f':
+ case 'g':
+ case 'h':
+ case 'i':
+ case 'j':
+ case 'k':
+ case 'l':
+ case 'm':
+ case 'n':
+ case 'o':
+ case 'p':
+ case 'q':
+ case 'r':
+ case 's':
+ case 't':
+ case 'u':
+ case 'v':
+ case 'w':
+ case 'x':
+ case 'y':
+ case 'z':
+
+ // From rule "DIGIT"
+ case '0':
+ case '1':
+ case '2':
+ case '3':
+ case '4':
+ case '5':
+ case '6':
+ case '7':
+ case '8':
+ case '9':
+
+ // From rule "pct-encoded"
+ case '%':
+
+ // From rule "unreserved"
+ case '-':
+ case '.':
+ case '_':
+ case '~':
+
+ // From rule "gen-delims"
+ case ':':
+ case '/':
+ case '?':
+ case '#':
+ case '[':
+ case ']':
+ case '@':
+
+ // From rule "sub-delims"
+ case '!':
+ case '$':
+ case '&':
+ case '\'':
+ case '(':
+ case ')':
+ case '*':
+ case '+':
+ case ',':
+ case ';':
+ case '=':
+ return XML_TRUE;
+
+ default:
+ return XML_FALSE;
+ }
+}
+
/* addBinding() overwrites the value of prefix->binding without checking.
Therefore one must keep track of the old value outside of addBinding().
*/
static enum XML_Error
-addBinding(XML_Parser parser, PREFIX *prefix, const ATTRIBUTE_ID *attId,
+addBinding(XML_Parser parser, PPREFIX *prefix, const ATTRIBUTE_ID *attId,
const XML_Char *uri, BINDING **bindingsPtr) {
+ // "http://www.w3.org/XML/1998/namespace"
static const XML_Char xmlNamespace[]
= {ASCII_h, ASCII_t, ASCII_t, ASCII_p, ASCII_COLON,
ASCII_SLASH, ASCII_SLASH, ASCII_w, ASCII_w, ASCII_w,
@@ -3431,6 +3928,7 @@ addBinding(XML_Parser parser, PREFIX *prefix, const ATTRIBUTE_ID *attId,
ASCII_e, ASCII_s, ASCII_p, ASCII_a, ASCII_c,
ASCII_e, '\0'};
static const int xmlLen = (int)sizeof(xmlNamespace) / sizeof(XML_Char) - 1;
+ // "http://www.w3.org/2000/xmlns/"
static const XML_Char xmlnsNamespace[]
= {ASCII_h, ASCII_t, ASCII_t, ASCII_p, ASCII_COLON, ASCII_SLASH,
ASCII_SLASH, ASCII_w, ASCII_w, ASCII_w, ASCII_PERIOD, ASCII_w,
@@ -3470,6 +3968,29 @@ addBinding(XML_Parser parser, PREFIX *prefix, const ATTRIBUTE_ID *attId,
if (! mustBeXML && isXMLNS
&& (len > xmlnsLen || uri[len] != xmlnsNamespace[len]))
isXMLNS = XML_FALSE;
+
+ // NOTE: While Expat does not validate namespace URIs against RFC 3986
+ // today (and is not REQUIRED to do so with regard to the XML 1.0
+ // namespaces specification) we have to at least make sure, that
+ // the application on top of Expat (that is likely splitting expanded
+ // element names ("qualified names") of form
+ // "[uri sep] local [sep prefix] '\0'" back into 1, 2 or 3 pieces
+ // in its element handler code) cannot be confused by an attacker
+ // putting additional namespace separator characters into namespace
+ // declarations. That would be ambiguous and not to be expected.
+ //
+ // While the HTML API docs of function XML_ParserCreateNS have been
+ // advising against use of a namespace separator character that can
+ // appear in a URI for >20 years now, some widespread applications
+ // are using URI characters (':' (colon) in particular) for a
+ // namespace separator, in practice. To keep these applications
+ // functional, we only reject namespaces URIs containing the
+ // application-chosen namespace separator if the chosen separator
+ // is a non-URI character with regard to RFC 3986.
+ if (parser->m_ns && (uri[len] == parser->m_namespaceSeparator)
+ && ! is_rfc3986_uri_char(uri[len])) {
+ return XML_ERROR_SYNTAX;
+ }
}
isXML = isXML && len == xmlLen;
isXMLNS = isXMLNS && len == xmlnsLen;
@@ -3486,6 +4007,21 @@ addBinding(XML_Parser parser, PREFIX *prefix, const ATTRIBUTE_ID *attId,
if (parser->m_freeBindingList) {
b = parser->m_freeBindingList;
if (len > b->uriAlloc) {
+ /* Detect and prevent integer overflow */
+ if (len > INT_MAX - EXPAND_SPARE) {
+ return XML_ERROR_NO_MEMORY;
+ }
+
+ /* Detect and prevent integer overflow.
+ * The preprocessor guard addresses the "always false" warning
+ * from -Wtype-limits on platforms where
+ * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
+#if UINT_MAX >= SIZE_MAX
+ if ((unsigned)(len + EXPAND_SPARE) > (size_t)(-1) / sizeof(XML_Char)) {
+ return XML_ERROR_NO_MEMORY;
+ }
+#endif
+
XML_Char *temp = (XML_Char *)REALLOC(
parser, b->uri, sizeof(XML_Char) * (len + EXPAND_SPARE));
if (temp == NULL)
@@ -3498,6 +4034,21 @@ addBinding(XML_Parser parser, PREFIX *prefix, const ATTRIBUTE_ID *attId,
b = (BINDING *)MALLOC(parser, sizeof(BINDING));
if (! b)
return XML_ERROR_NO_MEMORY;
+
+ /* Detect and prevent integer overflow */
+ if (len > INT_MAX - EXPAND_SPARE) {
+ return XML_ERROR_NO_MEMORY;
+ }
+ /* Detect and prevent integer overflow.
+ * The preprocessor guard addresses the "always false" warning
+ * from -Wtype-limits on platforms where
+ * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
+#if UINT_MAX >= SIZE_MAX
+ if ((unsigned)(len + EXPAND_SPARE) > (size_t)(-1) / sizeof(XML_Char)) {
+ return XML_ERROR_NO_MEMORY;
+ }
+#endif
+
b->uri
= (XML_Char *)MALLOC(parser, sizeof(XML_Char) * (len + EXPAND_SPARE));
if (! b->uri) {
@@ -3533,9 +4084,9 @@ addBinding(XML_Parser parser, PREFIX *prefix, const ATTRIBUTE_ID *attId,
static enum XML_Error PTRCALL
cdataSectionProcessor(XML_Parser parser, const char *start, const char *end,
const char **endPtr) {
- enum XML_Error result
- = doCdataSection(parser, parser->m_encoding, &start, end, endPtr,
- (XML_Bool)! parser->m_parsingStatus.finalBuffer);
+ enum XML_Error result = doCdataSection(
+ parser, parser->m_encoding, &start, end, endPtr,
+ (XML_Bool)! parser->m_parsingStatus.finalBuffer, XML_ACCOUNT_DIRECT);
if (result != XML_ERROR_NONE)
return result;
if (start) {
@@ -3555,7 +4106,8 @@ cdataSectionProcessor(XML_Parser parser, const char *start, const char *end,
*/
static enum XML_Error
doCdataSection(XML_Parser parser, const ENCODING *enc, const char **startPtr,
- const char *end, const char **nextPtr, XML_Bool haveMore) {
+ const char *end, const char **nextPtr, XML_Bool haveMore,
+ enum XML_Account account) {
const char *s = *startPtr;
const char **eventPP;
const char **eventEndPP;
@@ -3571,8 +4123,16 @@ doCdataSection(XML_Parser parser, const ENCODING *enc, const char **startPtr,
*startPtr = NULL;
for (;;) {
- const char *next;
+ const char *next = s; /* in case of XML_TOK_NONE or XML_TOK_PARTIAL */
int tok = XmlCdataSectionTok(enc, s, end, &next);
+#if XML_GE == 1
+ if (! accountingDiffTolerated(parser, tok, s, next, __LINE__, account)) {
+ accountingOnAbort(parser);
+ return XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
+ }
+#else
+ UNUSED_P(account);
+#endif
*eventEndPP = next;
switch (tok) {
case XML_TOK_CDATA_SECT_CLOSE:
@@ -3580,7 +4140,7 @@ doCdataSection(XML_Parser parser, const ENCODING *enc, const char **startPtr,
parser->m_endCdataSectionHandler(parser->m_handlerArg);
/* BEGIN disabled code */
/* see comment under XML_TOK_CDATA_SECT_OPEN */
- else if (0 && parser->m_characterDataHandler)
+ else if ((0) && parser->m_characterDataHandler)
parser->m_characterDataHandler(parser->m_handlerArg, parser->m_dataBuf,
0);
/* END disabled code */
@@ -3616,8 +4176,8 @@ doCdataSection(XML_Parser parser, const ENCODING *enc, const char **startPtr,
*eventPP = s;
}
} else
- charDataHandler(parser->m_handlerArg, (XML_Char *)s,
- (int)((XML_Char *)next - (XML_Char *)s));
+ charDataHandler(parser->m_handlerArg, (const XML_Char *)s,
+ (int)((const XML_Char *)next - (const XML_Char *)s));
} else if (parser->m_defaultHandler)
reportDefault(parser, enc, s, next);
} break;
@@ -3689,7 +4249,7 @@ ignoreSectionProcessor(XML_Parser parser, const char *start, const char *end,
static enum XML_Error
doIgnoreSection(XML_Parser parser, const ENCODING *enc, const char **startPtr,
const char *end, const char **nextPtr, XML_Bool haveMore) {
- const char *next;
+ const char *next = *startPtr; /* in case of XML_TOK_NONE or XML_TOK_PARTIAL */
int tok;
const char *s = *startPtr;
const char **eventPP;
@@ -3717,6 +4277,13 @@ doIgnoreSection(XML_Parser parser, const ENCODING *enc, const char **startPtr,
*eventPP = s;
*startPtr = NULL;
tok = XmlIgnoreSectionTok(enc, s, end, &next);
+# if XML_GE == 1
+ if (! accountingDiffTolerated(parser, tok, s, next, __LINE__,
+ XML_ACCOUNT_DIRECT)) {
+ accountingOnAbort(parser);
+ return XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
+ }
+# endif
*eventEndPP = next;
switch (tok) {
case XML_TOK_IGNORE_SECT:
@@ -3766,7 +4333,7 @@ initializeEncoding(XML_Parser parser) {
const char *s;
#ifdef XML_UNICODE
char encodingBuf[128];
- /* See comments abount `protoclEncodingName` in parserInit() */
+ /* See comments about `protocolEncodingName` in parserInit() */
if (! parser->m_protocolEncodingName)
s = NULL;
else {
@@ -3798,9 +4365,18 @@ processXmlDecl(XML_Parser parser, int isGeneralTextEntity, const char *s,
const XML_Char *storedEncName = NULL;
const ENCODING *newEncoding = NULL;
const char *version = NULL;
- const char *versionend;
+ const char *versionend = NULL;
const XML_Char *storedversion = NULL;
int standalone = -1;
+
+#if XML_GE == 1
+ if (! accountingDiffTolerated(parser, XML_TOK_XML_DECL, s, next, __LINE__,
+ XML_ACCOUNT_DIRECT)) {
+ accountingOnAbort(parser);
+ return XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
+ }
+#endif
+
if (! (parser->m_ns ? XmlParseXmlDeclNS : XmlParseXmlDecl)(
isGeneralTextEntity, parser->m_encoding, s, next, &parser->m_eventPtr,
&version, &versionend, &encodingName, &newEncoding, &standalone)) {
@@ -3950,6 +4526,10 @@ entityValueInitProcessor(XML_Parser parser, const char *s, const char *end,
for (;;) {
tok = XmlPrologTok(parser->m_encoding, start, end, &next);
+ /* Note: Except for XML_TOK_BOM below, these bytes are accounted later in:
+ - storeEntityValue
+ - processXmlDecl
+ */
parser->m_eventEndPtr = next;
if (tok <= 0) {
if (! parser->m_parsingStatus.finalBuffer && tok != XML_TOK_INVALID) {
@@ -3968,7 +4548,8 @@ entityValueInitProcessor(XML_Parser parser, const char *s, const char *end,
break;
}
/* found end of entity value - can store it now */
- return storeEntityValue(parser, parser->m_encoding, s, end);
+ return storeEntityValue(parser, parser->m_encoding, s, end,
+ XML_ACCOUNT_DIRECT);
} else if (tok == XML_TOK_XML_DECL) {
enum XML_Error result;
result = processXmlDecl(parser, 0, start, next);
@@ -3986,17 +4567,25 @@ entityValueInitProcessor(XML_Parser parser, const char *s, const char *end,
parser->m_processor = entityValueProcessor;
return entityValueProcessor(parser, next, end, nextPtr);
}
- /* If we are at the end of the buffer, this would cause XmlPrologTok to
- return XML_TOK_NONE on the next call, which would then cause the
- function to exit with *nextPtr set to s - that is what we want for other
- tokens, but not for the BOM - we would rather like to skip it;
- then, when this routine is entered the next time, XmlPrologTok will
- return XML_TOK_INVALID, since the BOM is still in the buffer
+ /* XmlPrologTok has now set the encoding based on the BOM it found, and we
+ must move s and nextPtr forward to consume the BOM.
+
+ If we didn't, and got XML_TOK_NONE from the next XmlPrologTok call, we
+ would leave the BOM in the buffer and return. On the next call to this
+ function, our XmlPrologTok call would return XML_TOK_INVALID, since it
+ is not valid to have multiple BOMs.
*/
- else if (tok == XML_TOK_BOM && next == end
- && ! parser->m_parsingStatus.finalBuffer) {
+ else if (tok == XML_TOK_BOM) {
+# if XML_GE == 1
+ if (! accountingDiffTolerated(parser, tok, s, next, __LINE__,
+ XML_ACCOUNT_DIRECT)) {
+ accountingOnAbort(parser);
+ return XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
+ }
+# endif
+
*nextPtr = next;
- return XML_ERROR_NONE;
+ s = next;
}
/* If we get this token, we have the start of what might be a
normal tag, but not a declaration (i.e. it doesn't begin with
@@ -4037,16 +4626,24 @@ externalParEntProcessor(XML_Parser parser, const char *s, const char *end,
}
/* This would cause the next stage, i.e. doProlog to be passed XML_TOK_BOM.
However, when parsing an external subset, doProlog will not accept a BOM
- as valid, and report a syntax error, so we have to skip the BOM
+ as valid, and report a syntax error, so we have to skip the BOM, and
+ account for the BOM bytes.
*/
else if (tok == XML_TOK_BOM) {
+ if (! accountingDiffTolerated(parser, tok, s, next, __LINE__,
+ XML_ACCOUNT_DIRECT)) {
+ accountingOnAbort(parser);
+ return XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
+ }
+
s = next;
tok = XmlPrologTok(parser->m_encoding, s, end, &next);
}
parser->m_processor = prologProcessor;
return doProlog(parser, parser->m_encoding, s, end, tok, next, nextPtr,
- (XML_Bool)! parser->m_parsingStatus.finalBuffer, XML_TRUE);
+ (XML_Bool)! parser->m_parsingStatus.finalBuffer, XML_TRUE,
+ XML_ACCOUNT_DIRECT);
}
static enum XML_Error PTRCALL
@@ -4059,6 +4656,9 @@ entityValueProcessor(XML_Parser parser, const char *s, const char *end,
for (;;) {
tok = XmlPrologTok(enc, start, end, &next);
+ /* Note: These bytes are accounted later in:
+ - storeEntityValue
+ */
if (tok <= 0) {
if (! parser->m_parsingStatus.finalBuffer && tok != XML_TOK_INVALID) {
*nextPtr = s;
@@ -4076,7 +4676,7 @@ entityValueProcessor(XML_Parser parser, const char *s, const char *end,
break;
}
/* found end of entity value - can store it now */
- return storeEntityValue(parser, enc, s, end);
+ return storeEntityValue(parser, enc, s, end, XML_ACCOUNT_DIRECT);
}
start = next;
}
@@ -4090,13 +4690,14 @@ prologProcessor(XML_Parser parser, const char *s, const char *end,
const char *next = s;
int tok = XmlPrologTok(parser->m_encoding, s, end, &next);
return doProlog(parser, parser->m_encoding, s, end, tok, next, nextPtr,
- (XML_Bool)! parser->m_parsingStatus.finalBuffer, XML_TRUE);
+ (XML_Bool)! parser->m_parsingStatus.finalBuffer, XML_TRUE,
+ XML_ACCOUNT_DIRECT);
}
static enum XML_Error
doProlog(XML_Parser parser, const ENCODING *enc, const char *s, const char *end,
int tok, const char *next, const char **nextPtr, XML_Bool haveMore,
- XML_Bool allowClosingDoctype) {
+ XML_Bool allowClosingDoctype, enum XML_Account account) {
#ifdef XML_DTD
static const XML_Char externalSubsetName[] = {ASCII_HASH, '\0'};
#endif /* XML_DTD */
@@ -4123,6 +4724,10 @@ doProlog(XML_Parser parser, const ENCODING *enc, const char *s, const char *end,
static const XML_Char enumValueSep[] = {ASCII_PIPE, '\0'};
static const XML_Char enumValueStart[] = {ASCII_LPAREN, '\0'};
+#ifndef XML_DTD
+ UNUSED_P(account);
+#endif
+
/* save one level of indirection */
DTD *const dtd = parser->m_dtd;
@@ -4187,6 +4792,21 @@ doProlog(XML_Parser parser, const ENCODING *enc, const char *s, const char *end,
}
}
role = XmlTokenRole(&parser->m_prologState, tok, s, next, enc);
+#if XML_GE == 1
+ switch (role) {
+ case XML_ROLE_INSTANCE_START: // bytes accounted in contentProcessor
+ case XML_ROLE_XML_DECL: // bytes accounted in processXmlDecl
+# ifdef XML_DTD
+ case XML_ROLE_TEXT_DECL: // bytes accounted in processXmlDecl
+# endif
+ break;
+ default:
+ if (! accountingDiffTolerated(parser, tok, s, next, __LINE__, account)) {
+ accountingOnAbort(parser);
+ return XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
+ }
+ }
+#endif
switch (role) {
case XML_ROLE_XML_DECL: {
enum XML_Error result = processXmlDecl(parser, 0, s, next);
@@ -4451,10 +5071,10 @@ doProlog(XML_Parser parser, const ENCODING *enc, const char *s, const char *end,
parser->m_handlerArg, parser->m_declElementType->name,
parser->m_declAttributeId->name, parser->m_declAttributeType, 0,
role == XML_ROLE_REQUIRED_ATTRIBUTE_VALUE);
- poolClear(&parser->m_tempPool);
handleDefault = XML_FALSE;
}
}
+ poolClear(&parser->m_tempPool);
break;
case XML_ROLE_DEFAULT_ATTRIBUTE_VALUE:
case XML_ROLE_FIXED_ATTRIBUTE_VALUE:
@@ -4462,7 +5082,8 @@ doProlog(XML_Parser parser, const ENCODING *enc, const char *s, const char *end,
const XML_Char *attVal;
enum XML_Error result = storeAttributeValue(
parser, enc, parser->m_declAttributeIsCdata,
- s + enc->minBytesPerChar, next - enc->minBytesPerChar, &dtd->pool);
+ s + enc->minBytesPerChar, next - enc->minBytesPerChar, &dtd->pool,
+ XML_ACCOUNT_NONE);
if (result)
return result;
attVal = poolStart(&dtd->pool);
@@ -4495,8 +5116,12 @@ doProlog(XML_Parser parser, const ENCODING *enc, const char *s, const char *end,
break;
case XML_ROLE_ENTITY_VALUE:
if (dtd->keepProcessing) {
- enum XML_Error result = storeEntityValue(
- parser, enc, s + enc->minBytesPerChar, next - enc->minBytesPerChar);
+#if XML_GE == 1
+ // This will store the given replacement text in
+ // parser->m_declEntity->textPtr.
+ enum XML_Error result
+ = storeEntityValue(parser, enc, s + enc->minBytesPerChar,
+ next - enc->minBytesPerChar, XML_ACCOUNT_NONE);
if (parser->m_declEntity) {
parser->m_declEntity->textPtr = poolStart(&dtd->entityValuePool);
parser->m_declEntity->textLen
@@ -4514,6 +5139,25 @@ doProlog(XML_Parser parser, const ENCODING *enc, const char *s, const char *end,
poolDiscard(&dtd->entityValuePool);
if (result != XML_ERROR_NONE)
return result;
+#else
+ // This will store "&entity123;" in parser->m_declEntity->textPtr
+ // to end up as "&entity123;" in the handler.
+ if (parser->m_declEntity != NULL) {
+ const enum XML_Error result
+ = storeSelfEntityValue(parser, parser->m_declEntity);
+ if (result != XML_ERROR_NONE)
+ return result;
+
+ if (parser->m_entityDeclHandler) {
+ *eventEndPP = s;
+ parser->m_entityDeclHandler(
+ parser->m_handlerArg, parser->m_declEntity->name,
+ parser->m_declEntity->is_param, parser->m_declEntity->textPtr,
+ parser->m_declEntity->textLen, parser->m_curBase, 0, 0, 0);
+ handleDefault = XML_FALSE;
+ }
+ }
+#endif
}
break;
case XML_ROLE_DOCTYPE_SYSTEM_ID:
@@ -4572,6 +5216,16 @@ doProlog(XML_Parser parser, const ENCODING *enc, const char *s, const char *end,
}
break;
case XML_ROLE_ENTITY_COMPLETE:
+#if XML_GE == 0
+ // This will store "&entity123;" in entity->textPtr
+ // to end up as "&entity123;" in the handler.
+ if (parser->m_declEntity != NULL) {
+ const enum XML_Error result
+ = storeSelfEntityValue(parser, parser->m_declEntity);
+ if (result != XML_ERROR_NONE)
+ return result;
+ }
+#endif
if (dtd->keepProcessing && parser->m_declEntity
&& parser->m_entityDeclHandler) {
*eventEndPP = s;
@@ -4755,6 +5409,11 @@ doProlog(XML_Parser parser, const ENCODING *enc, const char *s, const char *end,
if (parser->m_prologState.level >= parser->m_groupSize) {
if (parser->m_groupSize) {
{
+ /* Detect and prevent integer overflow */
+ if (parser->m_groupSize > (unsigned int)(-1) / 2u) {
+ return XML_ERROR_NO_MEMORY;
+ }
+
char *const new_connector = (char *)REALLOC(
parser, parser->m_groupConnector, parser->m_groupSize *= 2);
if (new_connector == NULL) {
@@ -4765,6 +5424,16 @@ doProlog(XML_Parser parser, const ENCODING *enc, const char *s, const char *end,
}
if (dtd->scaffIndex) {
+ /* Detect and prevent integer overflow.
+ * The preprocessor guard addresses the "always false" warning
+ * from -Wtype-limits on platforms where
+ * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
+#if UINT_MAX >= SIZE_MAX
+ if (parser->m_groupSize > (size_t)(-1) / sizeof(int)) {
+ return XML_ERROR_NO_MEMORY;
+ }
+#endif
+
int *const new_scaff_index = (int *)REALLOC(
parser, dtd->scaffIndex, parser->m_groupSize * sizeof(int));
if (new_scaff_index == NULL)
@@ -4845,7 +5514,7 @@ doProlog(XML_Parser parser, const ENCODING *enc, const char *s, const char *end,
*
* If 'standalone' is false, the DTD must have no
* parameter entities or we wouldn't have passed the outer
- * 'if' statement. That measn the only entity in the hash
+ * 'if' statement. That means the only entity in the hash
* table is the external subset name "#" which cannot be
* given as a parameter entity name in XML syntax, so the
* lookup must have returned NULL and we don't even reach
@@ -4886,12 +5555,15 @@ doProlog(XML_Parser parser, const ENCODING *enc, const char *s, const char *end,
if (parser->m_externalEntityRefHandler) {
dtd->paramEntityRead = XML_FALSE;
entity->open = XML_TRUE;
+ entityTrackingOnOpen(parser, entity, __LINE__);
if (! parser->m_externalEntityRefHandler(
parser->m_externalEntityRefHandlerArg, 0, entity->base,
entity->systemId, entity->publicId)) {
+ entityTrackingOnClose(parser, entity, __LINE__);
entity->open = XML_FALSE;
return XML_ERROR_EXTERNAL_ENTITY_HANDLING;
}
+ entityTrackingOnClose(parser, entity, __LINE__);
entity->open = XML_FALSE;
handleDefault = XML_FALSE;
if (! dtd->paramEntityRead) {
@@ -4970,7 +5642,7 @@ doProlog(XML_Parser parser, const ENCODING *enc, const char *s, const char *end,
if (dtd->in_eldecl) {
ELEMENT_TYPE *el;
const XML_Char *name;
- int nameLen;
+ size_t nameLen;
const char *nxt
= (quant == XML_CQUANT_NONE ? next : next - enc->minBytesPerChar);
int myindex = nextScaffoldPart(parser);
@@ -4986,7 +5658,13 @@ doProlog(XML_Parser parser, const ENCODING *enc, const char *s, const char *end,
nameLen = 0;
for (; name[nameLen++];)
;
- dtd->contentStringLen += nameLen;
+
+ /* Detect and prevent integer overflow */
+ if (nameLen > UINT_MAX - dtd->contentStringLen) {
+ return XML_ERROR_NO_MEMORY;
+ }
+
+ dtd->contentStringLen += (unsigned)nameLen;
if (parser->m_elementDeclHandler)
handleDefault = XML_FALSE;
}
@@ -5089,6 +5767,13 @@ epilogProcessor(XML_Parser parser, const char *s, const char *end,
for (;;) {
const char *next = NULL;
int tok = XmlPrologTok(parser->m_encoding, s, end, &next);
+#if XML_GE == 1
+ if (! accountingDiffTolerated(parser, tok, s, next, __LINE__,
+ XML_ACCOUNT_DIRECT)) {
+ accountingOnAbort(parser);
+ return XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
+ }
+#endif
parser->m_eventEndPtr = next;
switch (tok) {
/* report partial linebreak - it might be the last token */
@@ -5162,6 +5847,9 @@ processInternalEntity(XML_Parser parser, ENTITY *entity, XML_Bool betweenDecl) {
return XML_ERROR_NO_MEMORY;
}
entity->open = XML_TRUE;
+#if XML_GE == 1
+ entityTrackingOnOpen(parser, entity, __LINE__);
+#endif
entity->processed = 0;
openEntity->next = parser->m_openInternalEntities;
parser->m_openInternalEntities = openEntity;
@@ -5170,27 +5858,31 @@ processInternalEntity(XML_Parser parser, ENTITY *entity, XML_Bool betweenDecl) {
openEntity->betweenDecl = betweenDecl;
openEntity->internalEventPtr = NULL;
openEntity->internalEventEndPtr = NULL;
- textStart = (char *)entity->textPtr;
- textEnd = (char *)(entity->textPtr + entity->textLen);
+ textStart = (const char *)entity->textPtr;
+ textEnd = (const char *)(entity->textPtr + entity->textLen);
/* Set a safe default value in case 'next' does not get set */
next = textStart;
-#ifdef XML_DTD
if (entity->is_param) {
int tok
= XmlPrologTok(parser->m_internalEncoding, textStart, textEnd, &next);
result = doProlog(parser, parser->m_internalEncoding, textStart, textEnd,
- tok, next, &next, XML_FALSE, XML_FALSE);
- } else
-#endif /* XML_DTD */
+ tok, next, &next, XML_FALSE, XML_FALSE,
+ XML_ACCOUNT_ENTITY_EXPANSION);
+ } else {
result = doContent(parser, parser->m_tagLevel, parser->m_internalEncoding,
- textStart, textEnd, &next, XML_FALSE);
+ textStart, textEnd, &next, XML_FALSE,
+ XML_ACCOUNT_ENTITY_EXPANSION);
+ }
if (result == XML_ERROR_NONE) {
if (textEnd != next && parser->m_parsingStatus.parsing == XML_SUSPENDED) {
entity->processed = (int)(next - textStart);
parser->m_processor = internalEntityProcessor;
- } else {
+ } else if (parser->m_openInternalEntities->entity == entity) {
+#if XML_GE == 1
+ entityTrackingOnClose(parser, entity, __LINE__);
+#endif /* XML_GE == 1 */
entity->open = XML_FALSE;
parser->m_openInternalEntities = openEntity->next;
/* put openEntity back in list of free instances */
@@ -5213,52 +5905,67 @@ internalEntityProcessor(XML_Parser parser, const char *s, const char *end,
return XML_ERROR_UNEXPECTED_STATE;
entity = openEntity->entity;
- textStart = ((char *)entity->textPtr) + entity->processed;
- textEnd = (char *)(entity->textPtr + entity->textLen);
+ textStart = ((const char *)entity->textPtr) + entity->processed;
+ textEnd = (const char *)(entity->textPtr + entity->textLen);
/* Set a safe default value in case 'next' does not get set */
next = textStart;
-#ifdef XML_DTD
if (entity->is_param) {
int tok
= XmlPrologTok(parser->m_internalEncoding, textStart, textEnd, &next);
result = doProlog(parser, parser->m_internalEncoding, textStart, textEnd,
- tok, next, &next, XML_FALSE, XML_TRUE);
- } else
-#endif /* XML_DTD */
+ tok, next, &next, XML_FALSE, XML_TRUE,
+ XML_ACCOUNT_ENTITY_EXPANSION);
+ } else {
result = doContent(parser, openEntity->startTagLevel,
parser->m_internalEncoding, textStart, textEnd, &next,
- XML_FALSE);
+ XML_FALSE, XML_ACCOUNT_ENTITY_EXPANSION);
+ }
if (result != XML_ERROR_NONE)
return result;
- else if (textEnd != next
- && parser->m_parsingStatus.parsing == XML_SUSPENDED) {
- entity->processed = (int)(next - (char *)entity->textPtr);
+
+ if (textEnd != next && parser->m_parsingStatus.parsing == XML_SUSPENDED) {
+ entity->processed = (int)(next - (const char *)entity->textPtr);
return result;
- } else {
- entity->open = XML_FALSE;
- parser->m_openInternalEntities = openEntity->next;
- /* put openEntity back in list of free instances */
- openEntity->next = parser->m_freeInternalEntities;
- parser->m_freeInternalEntities = openEntity;
}
-#ifdef XML_DTD
+#if XML_GE == 1
+ entityTrackingOnClose(parser, entity, __LINE__);
+#endif
+ entity->open = XML_FALSE;
+ parser->m_openInternalEntities = openEntity->next;
+ /* put openEntity back in list of free instances */
+ openEntity->next = parser->m_freeInternalEntities;
+ parser->m_freeInternalEntities = openEntity;
+
+ // If there are more open entities we want to stop right here and have the
+ // upcoming call to XML_ResumeParser continue with entity content, or it would
+ // be ignored altogether.
+ if (parser->m_openInternalEntities != NULL
+ && parser->m_parsingStatus.parsing == XML_SUSPENDED) {
+ return XML_ERROR_NONE;
+ }
+
if (entity->is_param) {
int tok;
parser->m_processor = prologProcessor;
tok = XmlPrologTok(parser->m_encoding, s, end, &next);
return doProlog(parser, parser->m_encoding, s, end, tok, next, nextPtr,
- (XML_Bool)! parser->m_parsingStatus.finalBuffer, XML_TRUE);
- } else
-#endif /* XML_DTD */
- {
+ (XML_Bool)! parser->m_parsingStatus.finalBuffer, XML_TRUE,
+ XML_ACCOUNT_DIRECT);
+ } else {
parser->m_processor = contentProcessor;
/* see externalEntityContentProcessor vs contentProcessor */
- return doContent(parser, parser->m_parentParser ? 1 : 0, parser->m_encoding,
- s, end, nextPtr,
- (XML_Bool)! parser->m_parsingStatus.finalBuffer);
+ result = doContent(parser, parser->m_parentParser ? 1 : 0,
+ parser->m_encoding, s, end, nextPtr,
+ (XML_Bool)! parser->m_parsingStatus.finalBuffer,
+ XML_ACCOUNT_DIRECT);
+ if (result == XML_ERROR_NONE) {
+ if (! storeRawNames(parser))
+ return XML_ERROR_NO_MEMORY;
+ }
+ return result;
}
}
@@ -5273,9 +5980,10 @@ errorProcessor(XML_Parser parser, const char *s, const char *end,
static enum XML_Error
storeAttributeValue(XML_Parser parser, const ENCODING *enc, XML_Bool isCdata,
- const char *ptr, const char *end, STRING_POOL *pool) {
+ const char *ptr, const char *end, STRING_POOL *pool,
+ enum XML_Account account) {
enum XML_Error result
- = appendAttributeValue(parser, enc, isCdata, ptr, end, pool);
+ = appendAttributeValue(parser, enc, isCdata, ptr, end, pool, account);
if (result)
return result;
if (! isCdata && poolLength(pool) && poolLastChar(pool) == 0x20)
@@ -5287,11 +5995,23 @@ storeAttributeValue(XML_Parser parser, const ENCODING *enc, XML_Bool isCdata,
static enum XML_Error
appendAttributeValue(XML_Parser parser, const ENCODING *enc, XML_Bool isCdata,
- const char *ptr, const char *end, STRING_POOL *pool) {
+ const char *ptr, const char *end, STRING_POOL *pool,
+ enum XML_Account account) {
DTD *const dtd = parser->m_dtd; /* save one level of indirection */
+#ifndef XML_DTD
+ UNUSED_P(account);
+#endif
+
for (;;) {
- const char *next;
+ const char *next
+ = ptr; /* XmlAttributeValueTok doesn't always set the last arg */
int tok = XmlAttributeValueTok(enc, ptr, end, &next);
+#if XML_GE == 1
+ if (! accountingDiffTolerated(parser, tok, ptr, next, __LINE__, account)) {
+ accountingOnAbort(parser);
+ return XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
+ }
+#endif
switch (tok) {
case XML_TOK_NONE:
return XML_ERROR_NONE;
@@ -5351,6 +6071,14 @@ appendAttributeValue(XML_Parser parser, const ENCODING *enc, XML_Bool isCdata,
XML_Char ch = (XML_Char)XmlPredefinedEntityName(
enc, ptr + enc->minBytesPerChar, next - enc->minBytesPerChar);
if (ch) {
+#if XML_GE == 1
+ /* NOTE: We are replacing 4-6 characters original input for 1 character
+ * so there is no amplification and hence recording without
+ * protection. */
+ accountingDiffTolerated(parser, tok, (char *)&ch,
+ ((char *)&ch) + sizeof(XML_Char), __LINE__,
+ XML_ACCOUNT_ENTITY_EXPANSION);
+#endif /* XML_GE == 1 */
if (! poolAppendChar(pool, ch))
return XML_ERROR_NO_MEMORY;
break;
@@ -5428,9 +6156,16 @@ appendAttributeValue(XML_Parser parser, const ENCODING *enc, XML_Bool isCdata,
enum XML_Error result;
const XML_Char *textEnd = entity->textPtr + entity->textLen;
entity->open = XML_TRUE;
+#if XML_GE == 1
+ entityTrackingOnOpen(parser, entity, __LINE__);
+#endif
result = appendAttributeValue(parser, parser->m_internalEncoding,
- isCdata, (char *)entity->textPtr,
- (char *)textEnd, pool);
+ isCdata, (const char *)entity->textPtr,
+ (const char *)textEnd, pool,
+ XML_ACCOUNT_ENTITY_EXPANSION);
+#if XML_GE == 1
+ entityTrackingOnClose(parser, entity, __LINE__);
+#endif
entity->open = XML_FALSE;
if (result)
return result;
@@ -5458,16 +6193,20 @@ appendAttributeValue(XML_Parser parser, const ENCODING *enc, XML_Bool isCdata,
/* not reached */
}
+#if XML_GE == 1
static enum XML_Error
storeEntityValue(XML_Parser parser, const ENCODING *enc,
- const char *entityTextPtr, const char *entityTextEnd) {
+ const char *entityTextPtr, const char *entityTextEnd,
+ enum XML_Account account) {
DTD *const dtd = parser->m_dtd; /* save one level of indirection */
STRING_POOL *pool = &(dtd->entityValuePool);
enum XML_Error result = XML_ERROR_NONE;
-#ifdef XML_DTD
+# ifdef XML_DTD
int oldInEntityValue = parser->m_prologState.inEntityValue;
parser->m_prologState.inEntityValue = 1;
-#endif /* XML_DTD */
+# else
+ UNUSED_P(account);
+# endif /* XML_DTD */
/* never return Null for the value argument in EntityDeclHandler,
since this would indicate an external entity; therefore we
have to make sure that entityValuePool.start is not null */
@@ -5477,11 +6216,20 @@ storeEntityValue(XML_Parser parser, const ENCODING *enc,
}
for (;;) {
- const char *next;
+ const char *next
+ = entityTextPtr; /* XmlEntityValueTok doesn't always set the last arg */
int tok = XmlEntityValueTok(enc, entityTextPtr, entityTextEnd, &next);
+
+ if (! accountingDiffTolerated(parser, tok, entityTextPtr, next, __LINE__,
+ account)) {
+ accountingOnAbort(parser);
+ result = XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
+ goto endEntityValue;
+ }
+
switch (tok) {
case XML_TOK_PARAM_ENTITY_REF:
-#ifdef XML_DTD
+# ifdef XML_DTD
if (parser->m_isParamEntity || enc != parser->m_encoding) {
const XML_Char *name;
ENTITY *entity;
@@ -5504,7 +6252,7 @@ storeEntityValue(XML_Parser parser, const ENCODING *enc,
dtd->keepProcessing = dtd->standalone;
goto endEntityValue;
}
- if (entity->open) {
+ if (entity->open || (entity == parser->m_declEntity)) {
if (enc == parser->m_encoding)
parser->m_eventPtr = entityTextPtr;
result = XML_ERROR_RECURSIVE_ENTITY_REF;
@@ -5514,13 +6262,16 @@ storeEntityValue(XML_Parser parser, const ENCODING *enc,
if (parser->m_externalEntityRefHandler) {
dtd->paramEntityRead = XML_FALSE;
entity->open = XML_TRUE;
+ entityTrackingOnOpen(parser, entity, __LINE__);
if (! parser->m_externalEntityRefHandler(
parser->m_externalEntityRefHandlerArg, 0, entity->base,
entity->systemId, entity->publicId)) {
+ entityTrackingOnClose(parser, entity, __LINE__);
entity->open = XML_FALSE;
result = XML_ERROR_EXTERNAL_ENTITY_HANDLING;
goto endEntityValue;
}
+ entityTrackingOnClose(parser, entity, __LINE__);
entity->open = XML_FALSE;
if (! dtd->paramEntityRead)
dtd->keepProcessing = dtd->standalone;
@@ -5528,16 +6279,19 @@ storeEntityValue(XML_Parser parser, const ENCODING *enc,
dtd->keepProcessing = dtd->standalone;
} else {
entity->open = XML_TRUE;
+ entityTrackingOnOpen(parser, entity, __LINE__);
result = storeEntityValue(
- parser, parser->m_internalEncoding, (char *)entity->textPtr,
- (char *)(entity->textPtr + entity->textLen));
+ parser, parser->m_internalEncoding, (const char *)entity->textPtr,
+ (const char *)(entity->textPtr + entity->textLen),
+ XML_ACCOUNT_ENTITY_EXPANSION);
+ entityTrackingOnClose(parser, entity, __LINE__);
entity->open = XML_FALSE;
if (result)
goto endEntityValue;
}
break;
}
-#endif /* XML_DTD */
+# endif /* XML_DTD */
/* In the internal subset, PE references are not legal
within markup declarations, e.g entity values in this case. */
parser->m_eventPtr = entityTextPtr;
@@ -5618,12 +6372,38 @@ storeEntityValue(XML_Parser parser, const ENCODING *enc,
entityTextPtr = next;
}
endEntityValue:
-#ifdef XML_DTD
+# ifdef XML_DTD
parser->m_prologState.inEntityValue = oldInEntityValue;
-#endif /* XML_DTD */
+# endif /* XML_DTD */
return result;
}
+#else /* XML_GE == 0 */
+
+static enum XML_Error
+storeSelfEntityValue(XML_Parser parser, ENTITY *entity) {
+ // This will store "&entity123;" in entity->textPtr
+ // to end up as "&entity123;" in the handler.
+ const char *const entity_start = "&";
+ const char *const entity_end = ";";
+
+ STRING_POOL *const pool = &(parser->m_dtd->entityValuePool);
+ if (! poolAppendString(pool, entity_start)
+ || ! poolAppendString(pool, entity->name)
+ || ! poolAppendString(pool, entity_end)) {
+ poolDiscard(pool);
+ return XML_ERROR_NO_MEMORY;
+ }
+
+ entity->textPtr = poolStart(pool);
+ entity->textLen = (int)(poolLength(pool));
+ poolFinish(pool);
+
+ return XML_ERROR_NONE;
+}
+
+#endif /* XML_GE == 0 */
+
static void FASTCALL
normalizeLines(XML_Char *s) {
XML_Char *p;
@@ -5734,8 +6514,9 @@ reportDefault(XML_Parser parser, const ENCODING *enc, const char *s,
} while ((convert_res != XML_CONVERT_COMPLETED)
&& (convert_res != XML_CONVERT_INPUT_INCOMPLETE));
} else
- parser->m_defaultHandler(parser->m_handlerArg, (XML_Char *)s,
- (int)((XML_Char *)end - (XML_Char *)s));
+ parser->m_defaultHandler(
+ parser->m_handlerArg, (const XML_Char *)s,
+ (int)((const XML_Char *)end - (const XML_Char *)s));
}
static int
@@ -5763,7 +6544,24 @@ defineAttribute(ELEMENT_TYPE *type, ATTRIBUTE_ID *attId, XML_Bool isCdata,
}
} else {
DEFAULT_ATTRIBUTE *temp;
+
+ /* Detect and prevent integer overflow */
+ if (type->allocDefaultAtts > INT_MAX / 2) {
+ return 0;
+ }
+
int count = type->allocDefaultAtts * 2;
+
+ /* Detect and prevent integer overflow.
+ * The preprocessor guard addresses the "always false" warning
+ * from -Wtype-limits on platforms where
+ * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
+#if UINT_MAX >= SIZE_MAX
+ if ((unsigned)count > (size_t)(-1) / sizeof(DEFAULT_ATTRIBUTE)) {
+ return 0;
+ }
+#endif
+
temp = (DEFAULT_ATTRIBUTE *)REALLOC(parser, type->defaultAtts,
(count * sizeof(DEFAULT_ATTRIBUTE)));
if (temp == NULL)
@@ -5788,7 +6586,7 @@ setElementTypePrefix(XML_Parser parser, ELEMENT_TYPE *elementType) {
const XML_Char *name;
for (name = elementType->name; *name; name++) {
if (*name == XML_T(ASCII_COLON)) {
- PREFIX *prefix;
+ PPREFIX *prefix;
const XML_Char *s;
for (s = elementType->name; s != name; s++) {
if (! poolAppendChar(&dtd->pool, *s))
@@ -5796,8 +6594,8 @@ setElementTypePrefix(XML_Parser parser, ELEMENT_TYPE *elementType) {
}
if (! poolAppendChar(&dtd->pool, XML_T('\0')))
return 0;
- prefix = (PREFIX *)lookup(parser, &dtd->prefixes, poolStart(&dtd->pool),
- sizeof(PREFIX));
+ prefix = (PPREFIX *)lookup(parser, &dtd->prefixes, poolStart(&dtd->pool),
+ sizeof(PPREFIX));
if (! prefix)
return 0;
if (prefix->name == poolStart(&dtd->pool))
@@ -5822,7 +6620,7 @@ getAttributeId(XML_Parser parser, const ENCODING *enc, const char *start,
name = poolStoreString(&dtd->pool, enc, start, end);
if (! name)
return NULL;
- /* skip quotation mark - its storage will be re-used (like in name[-1]) */
+ /* skip quotation mark - its storage will be reused (like in name[-1]) */
++name;
id = (ATTRIBUTE_ID *)lookup(parser, &dtd->attributeIds, name,
sizeof(ATTRIBUTE_ID));
@@ -5841,8 +6639,8 @@ getAttributeId(XML_Parser parser, const ENCODING *enc, const char *start,
if (name[5] == XML_T('\0'))
id->prefix = &dtd->defaultPrefix;
else
- id->prefix = (PREFIX *)lookup(parser, &dtd->prefixes, name + 6,
- sizeof(PREFIX));
+ id->prefix = (PPREFIX *)lookup(parser, &dtd->prefixes, name + 6,
+ sizeof(PPREFIX));
id->xmlns = XML_TRUE;
} else {
int i;
@@ -5856,8 +6654,8 @@ getAttributeId(XML_Parser parser, const ENCODING *enc, const char *start,
}
if (! poolAppendChar(&dtd->pool, XML_T('\0')))
return NULL;
- id->prefix = (PREFIX *)lookup(parser, &dtd->prefixes,
- poolStart(&dtd->pool), sizeof(PREFIX));
+ id->prefix = (PPREFIX *)lookup(parser, &dtd->prefixes,
+ poolStart(&dtd->pool), sizeof(PPREFIX));
if (! id->prefix)
return NULL;
if (id->prefix->name == poolStart(&dtd->pool))
@@ -5921,7 +6719,7 @@ getContext(XML_Parser parser) {
int i;
int len;
const XML_Char *s;
- PREFIX *prefix = (PREFIX *)hashTableIterNext(&iter);
+ PPREFIX *prefix = (PPREFIX *)hashTableIterNext(&iter);
if (! prefix)
break;
if (! prefix->binding) {
@@ -5972,6 +6770,10 @@ getContext(XML_Parser parser) {
static XML_Bool
setContext(XML_Parser parser, const XML_Char *context) {
+ if (context == NULL) {
+ return XML_FALSE;
+ }
+
DTD *const dtd = parser->m_dtd; /* save one level of indirection */
const XML_Char *s = context;
@@ -5989,15 +6791,15 @@ setContext(XML_Parser parser, const XML_Char *context) {
context = s;
poolDiscard(&parser->m_tempPool);
} else if (*s == XML_T(ASCII_EQUALS)) {
- PREFIX *prefix;
+ PPREFIX *prefix;
if (poolLength(&parser->m_tempPool) == 0)
prefix = &dtd->defaultPrefix;
else {
if (! poolAppendChar(&parser->m_tempPool, XML_T('\0')))
return XML_FALSE;
prefix
- = (PREFIX *)lookup(parser, &dtd->prefixes,
- poolStart(&parser->m_tempPool), sizeof(PREFIX));
+ = (PPREFIX *)lookup(parser, &dtd->prefixes,
+ poolStart(&parser->m_tempPool), sizeof(PPREFIX));
if (! prefix)
return XML_FALSE;
if (prefix->name == poolStart(&parser->m_tempPool)) {
@@ -6053,7 +6855,7 @@ normalizePublicId(XML_Char *publicId) {
static DTD *
dtdCreate(const XML_Memory_Handling_Suite *ms) {
- DTD *p = (DTD *)ms->malloc_fcn(sizeof(DTD));
+ DTD *p = ms->malloc_fcn(sizeof(DTD));
if (p == NULL)
return p;
poolInit(&(p->pool), ms);
@@ -6164,13 +6966,13 @@ dtdCopy(XML_Parser oldParser, DTD *newDtd, const DTD *oldDtd,
hashTableIterInit(&iter, &(oldDtd->prefixes));
for (;;) {
const XML_Char *name;
- const PREFIX *oldP = (PREFIX *)hashTableIterNext(&iter);
+ const PPREFIX *oldP = (PPREFIX *)hashTableIterNext(&iter);
if (! oldP)
break;
name = poolCopyString(&(newDtd->pool), oldP->name);
if (! name)
return 0;
- if (! lookup(oldParser, &(newDtd->prefixes), name, sizeof(PREFIX)))
+ if (! lookup(oldParser, &(newDtd->prefixes), name, sizeof(PPREFIX)))
return 0;
}
@@ -6202,7 +7004,7 @@ dtdCopy(XML_Parser oldParser, DTD *newDtd, const DTD *oldDtd,
if (oldA->prefix == &oldDtd->defaultPrefix)
newA->prefix = &newDtd->defaultPrefix;
else
- newA->prefix = (PREFIX *)lookup(oldParser, &(newDtd->prefixes),
+ newA->prefix = (PPREFIX *)lookup(oldParser, &(newDtd->prefixes),
oldA->prefix->name, 0);
}
}
@@ -6226,8 +7028,18 @@ dtdCopy(XML_Parser oldParser, DTD *newDtd, const DTD *oldDtd,
if (! newE)
return 0;
if (oldE->nDefaultAtts) {
- newE->defaultAtts = (DEFAULT_ATTRIBUTE *)ms->malloc_fcn(
- oldE->nDefaultAtts * sizeof(DEFAULT_ATTRIBUTE));
+ /* Detect and prevent integer overflow.
+ * The preprocessor guard addresses the "always false" warning
+ * from -Wtype-limits on platforms where
+ * sizeof(int) < sizeof(size_t), e.g. on x86_64. */
+#if UINT_MAX >= SIZE_MAX
+ if ((size_t)oldE->nDefaultAtts
+ > ((size_t)(-1) / sizeof(DEFAULT_ATTRIBUTE))) {
+ return 0;
+ }
+#endif
+ newE->defaultAtts
+ = ms->malloc_fcn(oldE->nDefaultAtts * sizeof(DEFAULT_ATTRIBUTE));
if (! newE->defaultAtts) {
return 0;
}
@@ -6237,7 +7049,7 @@ dtdCopy(XML_Parser oldParser, DTD *newDtd, const DTD *oldDtd,
oldE->idAtt->name, 0);
newE->allocDefaultAtts = newE->nDefaultAtts = oldE->nDefaultAtts;
if (oldE->prefix)
- newE->prefix = (PREFIX *)lookup(oldParser, &(newDtd->prefixes),
+ newE->prefix = (PPREFIX *)lookup(oldParser, &(newDtd->prefixes),
oldE->prefix->name, 0);
for (i = 0; i < newE->nDefaultAtts; i++) {
newE->defaultAtts[i].id = (ATTRIBUTE_ID *)lookup(
@@ -6389,7 +7201,7 @@ lookup(XML_Parser parser, HASH_TABLE *table, KEY name, size_t createSize) {
/* table->size is a power of 2 */
table->size = (size_t)1 << INIT_POWER;
tsize = table->size * sizeof(NAMED *);
- table->v = (NAMED **)table->mem->malloc_fcn(tsize);
+ table->v = table->mem->malloc_fcn(tsize);
if (! table->v) {
table->size = 0;
return NULL;
@@ -6414,10 +7226,22 @@ lookup(XML_Parser parser, HASH_TABLE *table, KEY name, size_t createSize) {
/* check for overflow (table is half full) */
if (table->used >> (table->power - 1)) {
unsigned char newPower = table->power + 1;
+
+ /* Detect and prevent invalid shift */
+ if (newPower >= sizeof(unsigned long) * 8 /* bits per byte */) {
+ return NULL;
+ }
+
size_t newSize = (size_t)1 << newPower;
unsigned long newMask = (unsigned long)newSize - 1;
+
+ /* Detect and prevent integer overflow */
+ if (newSize > (size_t)(-1) / sizeof(NAMED *)) {
+ return NULL;
+ }
+
size_t tsize = newSize * sizeof(NAMED *);
- NAMED **newV = (NAMED **)table->mem->malloc_fcn(tsize);
+ NAMED **newV = table->mem->malloc_fcn(tsize);
if (! newV)
return NULL;
memset(newV, 0, tsize);
@@ -6446,7 +7270,7 @@ lookup(XML_Parser parser, HASH_TABLE *table, KEY name, size_t createSize) {
}
}
}
- table->v[i] = (NAMED *)table->mem->malloc_fcn(createSize);
+ table->v[i] = table->mem->malloc_fcn(createSize);
if (! table->v[i])
return NULL;
memset(table->v[i], 0, createSize);
@@ -6485,7 +7309,7 @@ hashTableInit(HASH_TABLE *p, const XML_Memory_Handling_Suite *ms) {
static void FASTCALL
hashTableIterInit(HASH_TABLE_ITER *iter, const HASH_TABLE *table) {
iter->p = table->v;
- iter->end = iter->p + table->size;
+ iter->end = iter->p ? iter->p + table->size : NULL;
}
static NAMED *FASTCALL
@@ -6550,7 +7374,7 @@ poolAppend(STRING_POOL *pool, const ENCODING *enc, const char *ptr,
return NULL;
for (;;) {
const enum XML_Convert_Result convert_res = XmlConvert(
- enc, &ptr, end, (ICHAR **)&(pool->ptr), (ICHAR *)pool->end);
+ enc, &ptr, end, (ICHAR **)&(pool->ptr), (const ICHAR *)pool->end);
if ((convert_res == XML_CONVERT_COMPLETED)
|| (convert_res == XML_CONVERT_INPUT_INCOMPLETE))
break;
@@ -6734,7 +7558,7 @@ poolGrow(STRING_POOL *pool) {
if (bytesToAllocate == 0)
return XML_FALSE;
- tem = (BLOCK *)pool->mem->malloc_fcn(bytesToAllocate);
+ tem = pool->mem->malloc_fcn(bytesToAllocate);
if (! tem)
return XML_FALSE;
tem->size = blockSize;
@@ -6756,6 +7580,15 @@ nextScaffoldPart(XML_Parser parser) {
int next;
if (! dtd->scaffIndex) {
+ /* Detect and prevent integer overflow.
+ * The preprocessor guard addresses the "always false" warning
+ * from -Wtype-limits on platforms where
+ * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
+#if UINT_MAX >= SIZE_MAX
+ if (parser->m_groupSize > ((size_t)(-1) / sizeof(int))) {
+ return -1;
+ }
+#endif
dtd->scaffIndex = (int *)MALLOC(parser, parser->m_groupSize * sizeof(int));
if (! dtd->scaffIndex)
return -1;
@@ -6765,6 +7598,20 @@ nextScaffoldPart(XML_Parser parser) {
if (dtd->scaffCount >= dtd->scaffSize) {
CONTENT_SCAFFOLD *temp;
if (dtd->scaffold) {
+ /* Detect and prevent integer overflow */
+ if (dtd->scaffSize > UINT_MAX / 2u) {
+ return -1;
+ }
+ /* Detect and prevent integer overflow.
+ * The preprocessor guard addresses the "always false" warning
+ * from -Wtype-limits on platforms where
+ * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
+#if UINT_MAX >= SIZE_MAX
+ if (dtd->scaffSize > (size_t)(-1) / 2u / sizeof(CONTENT_SCAFFOLD)) {
+ return -1;
+ }
+#endif
+
temp = (CONTENT_SCAFFOLD *)REALLOC(
parser, dtd->scaffold, dtd->scaffSize * 2 * sizeof(CONTENT_SCAFFOLD));
if (temp == NULL)
@@ -6796,55 +7643,130 @@ nextScaffoldPart(XML_Parser parser) {
return next;
}
-static void
-build_node(XML_Parser parser, int src_node, XML_Content *dest,
- XML_Content **contpos, XML_Char **strpos) {
- DTD *const dtd = parser->m_dtd; /* save one level of indirection */
- dest->type = dtd->scaffold[src_node].type;
- dest->quant = dtd->scaffold[src_node].quant;
- if (dest->type == XML_CTYPE_NAME) {
- const XML_Char *src;
- dest->name = *strpos;
- src = dtd->scaffold[src_node].name;
- for (;;) {
- *(*strpos)++ = *src;
- if (! *src)
- break;
- src++;
- }
- dest->numchildren = 0;
- dest->children = NULL;
- } else {
- unsigned int i;
- int cn;
- dest->numchildren = dtd->scaffold[src_node].childcnt;
- dest->children = *contpos;
- *contpos += dest->numchildren;
- for (i = 0, cn = dtd->scaffold[src_node].firstchild; i < dest->numchildren;
- i++, cn = dtd->scaffold[cn].nextsib) {
- build_node(parser, cn, &(dest->children[i]), contpos, strpos);
- }
- dest->name = NULL;
- }
-}
-
static XML_Content *
build_model(XML_Parser parser) {
+ /* Function build_model transforms the existing parser->m_dtd->scaffold
+ * array of CONTENT_SCAFFOLD tree nodes into a new array of
+ * XML_Content tree nodes followed by a gapless list of zero-terminated
+ * strings. */
DTD *const dtd = parser->m_dtd; /* save one level of indirection */
XML_Content *ret;
- XML_Content *cpos;
- XML_Char *str;
- int allocsize = (dtd->scaffCount * sizeof(XML_Content)
- + (dtd->contentStringLen * sizeof(XML_Char)));
+ XML_Char *str; /* the current string writing location */
+
+ /* Detect and prevent integer overflow.
+ * The preprocessor guard addresses the "always false" warning
+ * from -Wtype-limits on platforms where
+ * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
+#if UINT_MAX >= SIZE_MAX
+ if (dtd->scaffCount > (size_t)(-1) / sizeof(XML_Content)) {
+ return NULL;
+ }
+ if (dtd->contentStringLen > (size_t)(-1) / sizeof(XML_Char)) {
+ return NULL;
+ }
+#endif
+ if (dtd->scaffCount * sizeof(XML_Content)
+ > (size_t)(-1) - dtd->contentStringLen * sizeof(XML_Char)) {
+ return NULL;
+ }
+
+ const size_t allocsize = (dtd->scaffCount * sizeof(XML_Content)
+ + (dtd->contentStringLen * sizeof(XML_Char)));
ret = (XML_Content *)MALLOC(parser, allocsize);
if (! ret)
return NULL;
- str = (XML_Char *)(&ret[dtd->scaffCount]);
- cpos = &ret[1];
+ /* What follows is an iterative implementation (of what was previously done
+ * recursively in a dedicated function called "build_node". The old recursive
+ * build_node could be forced into stack exhaustion from input as small as a
+ * few megabyte, and so that was a security issue. Hence, a function call
+ * stack is avoided now by resolving recursion.)
+ *
+ * The iterative approach works as follows:
+ *
+ * - We have two writing pointers, both walking up the result array; one does
+ * the work, the other creates "jobs" for its colleague to do, and leads
+ * the way:
+ *
+ * - The faster one, pointer jobDest, always leads and writes "what job
+ * to do" by the other, once they reach that place in the
+ * array: leader "jobDest" stores the source node array index (relative
+ * to array dtd->scaffold) in field "numchildren".
+ *
+ * - The slower one, pointer dest, looks at the value stored in the
+ * "numchildren" field (which actually holds a source node array index
+ * at that time) and puts the real data from dtd->scaffold in.
+ *
+ * - Before the loop starts, jobDest writes source array index 0
+ * (where the root node is located) so that dest will have something to do
+ * when it starts operation.
+ *
+ * - Whenever nodes with children are encountered, jobDest appends
+ * them as new jobs, in order. As a result, tree node siblings are
+ * adjacent in the resulting array, for example:
+ *
+ * [0] root, has two children
+ * [1] first child of 0, has three children
+ * [3] first child of 1, does not have children
+ * [4] second child of 1, does not have children
+ * [5] third child of 1, does not have children
+ * [2] second child of 0, does not have children
+ *
+ * Or (the same data) presented in flat array view:
+ *
+ * [0] root, has two children
+ *
+ * [1] first child of 0, has three children
+ * [2] second child of 0, does not have children
+ *
+ * [3] first child of 1, does not have children
+ * [4] second child of 1, does not have children
+ * [5] third child of 1, does not have children
+ *
+ * - The algorithm repeats until all target array indices have been processed.
+ */
+ XML_Content *dest = ret; /* tree node writing location, moves upwards */
+ XML_Content *const destLimit = &ret[dtd->scaffCount];
+ XML_Content *jobDest = ret; /* next free writing location in target array */
+ str = (XML_Char *)&ret[dtd->scaffCount];
+
+ /* Add the starting job, the root node (index 0) of the source tree */
+ (jobDest++)->numchildren = 0;
+
+ for (; dest < destLimit; dest++) {
+ /* Retrieve source tree array index from job storage */
+ const int src_node = (int)dest->numchildren;
+
+ /* Convert item */
+ dest->type = dtd->scaffold[src_node].type;
+ dest->quant = dtd->scaffold[src_node].quant;
+ if (dest->type == XML_CTYPE_NAME) {
+ const XML_Char *src;
+ dest->name = str;
+ src = dtd->scaffold[src_node].name;
+ for (;;) {
+ *str++ = *src;
+ if (! *src)
+ break;
+ src++;
+ }
+ dest->numchildren = 0;
+ dest->children = NULL;
+ } else {
+ unsigned int i;
+ int cn;
+ dest->name = NULL;
+ dest->numchildren = dtd->scaffold[src_node].childcnt;
+ dest->children = jobDest;
+
+ /* Append scaffold indices of children to array */
+ for (i = 0, cn = dtd->scaffold[src_node].firstchild;
+ i < dest->numchildren; i++, cn = dtd->scaffold[cn].nextsib)
+ (jobDest++)->numchildren = (unsigned int)cn;
+ }
+ }
- build_node(parser, 0, ret, &cpos, &str);
return ret;
}
@@ -6873,7 +7795,7 @@ getElementType(XML_Parser parser, const ENCODING *enc, const char *ptr,
static XML_Char *
copyString(const XML_Char *s, const XML_Memory_Handling_Suite *memsuite) {
- int charsRequired = 0;
+ size_t charsRequired = 0;
XML_Char *result;
/* First determine how long the string is */
@@ -6891,3 +7813,759 @@ copyString(const XML_Char *s, const XML_Memory_Handling_Suite *memsuite) {
memcpy(result, s, charsRequired * sizeof(XML_Char));
return result;
}
+
+#if XML_GE == 1
+
+static float
+accountingGetCurrentAmplification(XML_Parser rootParser) {
+ // 1.........1.........12 => 22
+ const size_t lenOfShortestInclude = sizeof("") - 1;
+ const XmlBigCount countBytesOutput
+ = rootParser->m_accounting.countBytesDirect
+ + rootParser->m_accounting.countBytesIndirect;
+ const float amplificationFactor
+ = rootParser->m_accounting.countBytesDirect
+ ? (countBytesOutput
+ / (float)(rootParser->m_accounting.countBytesDirect))
+ : ((lenOfShortestInclude
+ + rootParser->m_accounting.countBytesIndirect)
+ / (float)lenOfShortestInclude);
+ assert(! rootParser->m_parentParser);
+ return amplificationFactor;
+}
+
+static void
+accountingReportStats(XML_Parser originParser, const char *epilog) {
+ const XML_Parser rootParser = getRootParserOf(originParser, NULL);
+ assert(! rootParser->m_parentParser);
+
+ if (rootParser->m_accounting.debugLevel == 0u) {
+ return;
+ }
+
+ const float amplificationFactor
+ = accountingGetCurrentAmplification(rootParser);
+ fprintf(stderr,
+ "expat: Accounting(%p): Direct " EXPAT_FMT_ULL(
+ "10") ", indirect " EXPAT_FMT_ULL("10") ", amplification %8.2f%s",
+ (void *)rootParser, rootParser->m_accounting.countBytesDirect,
+ rootParser->m_accounting.countBytesIndirect,
+ (double)amplificationFactor, epilog);
+}
+
+static void
+accountingOnAbort(XML_Parser originParser) {
+ accountingReportStats(originParser, " ABORTING\n");
+}
+
+static void
+accountingReportDiff(XML_Parser rootParser,
+ unsigned int levelsAwayFromRootParser, const char *before,
+ const char *after, ptrdiff_t bytesMore, int source_line,
+ enum XML_Account account) {
+ assert(! rootParser->m_parentParser);
+
+ fprintf(stderr,
+ " (+" EXPAT_FMT_PTRDIFF_T("6") " bytes %s|%u, xmlparse.c:%d) %*s\"",
+ bytesMore, (account == XML_ACCOUNT_DIRECT) ? "DIR" : "EXP",
+ levelsAwayFromRootParser, source_line, 10, "");
+
+ const char ellipis[] = "[..]";
+ const size_t ellipsisLength = sizeof(ellipis) /* because compile-time */ - 1;
+ const unsigned int contextLength = 10;
+
+ /* Note: Performance is of no concern here */
+ const char *walker = before;
+ if ((rootParser->m_accounting.debugLevel >= 3u)
+ || (after - before)
+ <= (ptrdiff_t)(contextLength + ellipsisLength + contextLength)) {
+ for (; walker < after; walker++) {
+ fprintf(stderr, "%s", unsignedCharToPrintable(walker[0]));
+ }
+ } else {
+ for (; walker < before + contextLength; walker++) {
+ fprintf(stderr, "%s", unsignedCharToPrintable(walker[0]));
+ }
+ fprintf(stderr, ellipis);
+ walker = after - contextLength;
+ for (; walker < after; walker++) {
+ fprintf(stderr, "%s", unsignedCharToPrintable(walker[0]));
+ }
+ }
+ fprintf(stderr, "\"\n");
+}
+
+static XML_Bool
+accountingDiffTolerated(XML_Parser originParser, int tok, const char *before,
+ const char *after, int source_line,
+ enum XML_Account account) {
+ /* Note: We need to check the token type *first* to be sure that
+ * we can even access variable , safely.
+ * E.g. for XML_TOK_NONE may hold an invalid pointer. */
+ switch (tok) {
+ case XML_TOK_INVALID:
+ case XML_TOK_PARTIAL:
+ case XML_TOK_PARTIAL_CHAR:
+ case XML_TOK_NONE:
+ return XML_TRUE;
+ }
+
+ if (account == XML_ACCOUNT_NONE)
+ return XML_TRUE; /* because these bytes have been accounted for, already */
+
+ unsigned int levelsAwayFromRootParser;
+ const XML_Parser rootParser
+ = getRootParserOf(originParser, &levelsAwayFromRootParser);
+ assert(! rootParser->m_parentParser);
+
+ const int isDirect
+ = (account == XML_ACCOUNT_DIRECT) && (originParser == rootParser);
+ const ptrdiff_t bytesMore = after - before;
+
+ XmlBigCount *const additionTarget
+ = isDirect ? &rootParser->m_accounting.countBytesDirect
+ : &rootParser->m_accounting.countBytesIndirect;
+
+ /* Detect and avoid integer overflow */
+ if (*additionTarget > (XmlBigCount)(-1) - (XmlBigCount)bytesMore)
+ return XML_FALSE;
+ *additionTarget += bytesMore;
+
+ const XmlBigCount countBytesOutput
+ = rootParser->m_accounting.countBytesDirect
+ + rootParser->m_accounting.countBytesIndirect;
+ const float amplificationFactor
+ = accountingGetCurrentAmplification(rootParser);
+ const XML_Bool tolerated
+ = (countBytesOutput < rootParser->m_accounting.activationThresholdBytes)
+ || (amplificationFactor
+ <= rootParser->m_accounting.maximumAmplificationFactor);
+
+ if (rootParser->m_accounting.debugLevel >= 2u) {
+ accountingReportStats(rootParser, "");
+ accountingReportDiff(rootParser, levelsAwayFromRootParser, before, after,
+ bytesMore, source_line, account);
+ }
+
+ return tolerated;
+}
+
+unsigned long long
+testingAccountingGetCountBytesDirect(XML_Parser parser) {
+ if (! parser)
+ return 0;
+ return parser->m_accounting.countBytesDirect;
+}
+
+unsigned long long
+testingAccountingGetCountBytesIndirect(XML_Parser parser) {
+ if (! parser)
+ return 0;
+ return parser->m_accounting.countBytesIndirect;
+}
+
+static void
+entityTrackingReportStats(XML_Parser rootParser, ENTITY *entity,
+ const char *action, int sourceLine) {
+ assert(! rootParser->m_parentParser);
+ if (rootParser->m_entity_stats.debugLevel == 0u)
+ return;
+
+# if defined(XML_UNICODE)
+ const char *const entityName = "[..]";
+# else
+ const char *const entityName = entity->name;
+# endif
+
+ fprintf(
+ stderr,
+ "expat: Entities(%p): Count %9u, depth %2u/%2u %*s%s%s; %s length %d (xmlparse.c:%d)\n",
+ (void *)rootParser, rootParser->m_entity_stats.countEverOpened,
+ rootParser->m_entity_stats.currentDepth,
+ rootParser->m_entity_stats.maximumDepthSeen,
+ (rootParser->m_entity_stats.currentDepth - 1) * 2, "",
+ entity->is_param ? "%" : "&", entityName, action, entity->textLen,
+ sourceLine);
+}
+
+static void
+entityTrackingOnOpen(XML_Parser originParser, ENTITY *entity, int sourceLine) {
+ const XML_Parser rootParser = getRootParserOf(originParser, NULL);
+ assert(! rootParser->m_parentParser);
+
+ rootParser->m_entity_stats.countEverOpened++;
+ rootParser->m_entity_stats.currentDepth++;
+ if (rootParser->m_entity_stats.currentDepth
+ > rootParser->m_entity_stats.maximumDepthSeen) {
+ rootParser->m_entity_stats.maximumDepthSeen++;
+ }
+
+ entityTrackingReportStats(rootParser, entity, "OPEN ", sourceLine);
+}
+
+static void
+entityTrackingOnClose(XML_Parser originParser, ENTITY *entity, int sourceLine) {
+ const XML_Parser rootParser = getRootParserOf(originParser, NULL);
+ assert(! rootParser->m_parentParser);
+
+ entityTrackingReportStats(rootParser, entity, "CLOSE", sourceLine);
+ rootParser->m_entity_stats.currentDepth--;
+}
+
+static XML_Parser
+getRootParserOf(XML_Parser parser, unsigned int *outLevelDiff) {
+ XML_Parser rootParser = parser;
+ unsigned int stepsTakenUpwards = 0;
+ while (rootParser->m_parentParser) {
+ rootParser = rootParser->m_parentParser;
+ stepsTakenUpwards++;
+ }
+ assert(! rootParser->m_parentParser);
+ if (outLevelDiff != NULL) {
+ *outLevelDiff = stepsTakenUpwards;
+ }
+ return rootParser;
+}
+
+const char *
+unsignedCharToPrintable(unsigned char c) {
+ switch (c) {
+ case 0:
+ return "\\0";
+ case 1:
+ return "\\x1";
+ case 2:
+ return "\\x2";
+ case 3:
+ return "\\x3";
+ case 4:
+ return "\\x4";
+ case 5:
+ return "\\x5";
+ case 6:
+ return "\\x6";
+ case 7:
+ return "\\x7";
+ case 8:
+ return "\\x8";
+ case 9:
+ return "\\t";
+ case 10:
+ return "\\n";
+ case 11:
+ return "\\xB";
+ case 12:
+ return "\\xC";
+ case 13:
+ return "\\r";
+ case 14:
+ return "\\xE";
+ case 15:
+ return "\\xF";
+ case 16:
+ return "\\x10";
+ case 17:
+ return "\\x11";
+ case 18:
+ return "\\x12";
+ case 19:
+ return "\\x13";
+ case 20:
+ return "\\x14";
+ case 21:
+ return "\\x15";
+ case 22:
+ return "\\x16";
+ case 23:
+ return "\\x17";
+ case 24:
+ return "\\x18";
+ case 25:
+ return "\\x19";
+ case 26:
+ return "\\x1A";
+ case 27:
+ return "\\x1B";
+ case 28:
+ return "\\x1C";
+ case 29:
+ return "\\x1D";
+ case 30:
+ return "\\x1E";
+ case 31:
+ return "\\x1F";
+ case 32:
+ return " ";
+ case 33:
+ return "!";
+ case 34:
+ return "\\\"";
+ case 35:
+ return "#";
+ case 36:
+ return "$";
+ case 37:
+ return "%";
+ case 38:
+ return "&";
+ case 39:
+ return "'";
+ case 40:
+ return "(";
+ case 41:
+ return ")";
+ case 42:
+ return "*";
+ case 43:
+ return "+";
+ case 44:
+ return ",";
+ case 45:
+ return "-";
+ case 46:
+ return ".";
+ case 47:
+ return "/";
+ case 48:
+ return "0";
+ case 49:
+ return "1";
+ case 50:
+ return "2";
+ case 51:
+ return "3";
+ case 52:
+ return "4";
+ case 53:
+ return "5";
+ case 54:
+ return "6";
+ case 55:
+ return "7";
+ case 56:
+ return "8";
+ case 57:
+ return "9";
+ case 58:
+ return ":";
+ case 59:
+ return ";";
+ case 60:
+ return "<";
+ case 61:
+ return "=";
+ case 62:
+ return ">";
+ case 63:
+ return "?";
+ case 64:
+ return "@";
+ case 65:
+ return "A";
+ case 66:
+ return "B";
+ case 67:
+ return "C";
+ case 68:
+ return "D";
+ case 69:
+ return "E";
+ case 70:
+ return "F";
+ case 71:
+ return "G";
+ case 72:
+ return "H";
+ case 73:
+ return "I";
+ case 74:
+ return "J";
+ case 75:
+ return "K";
+ case 76:
+ return "L";
+ case 77:
+ return "M";
+ case 78:
+ return "N";
+ case 79:
+ return "O";
+ case 80:
+ return "P";
+ case 81:
+ return "Q";
+ case 82:
+ return "R";
+ case 83:
+ return "S";
+ case 84:
+ return "T";
+ case 85:
+ return "U";
+ case 86:
+ return "V";
+ case 87:
+ return "W";
+ case 88:
+ return "X";
+ case 89:
+ return "Y";
+ case 90:
+ return "Z";
+ case 91:
+ return "[";
+ case 92:
+ return "\\\\";
+ case 93:
+ return "]";
+ case 94:
+ return "^";
+ case 95:
+ return "_";
+ case 96:
+ return "`";
+ case 97:
+ return "a";
+ case 98:
+ return "b";
+ case 99:
+ return "c";
+ case 100:
+ return "d";
+ case 101:
+ return "e";
+ case 102:
+ return "f";
+ case 103:
+ return "g";
+ case 104:
+ return "h";
+ case 105:
+ return "i";
+ case 106:
+ return "j";
+ case 107:
+ return "k";
+ case 108:
+ return "l";
+ case 109:
+ return "m";
+ case 110:
+ return "n";
+ case 111:
+ return "o";
+ case 112:
+ return "p";
+ case 113:
+ return "q";
+ case 114:
+ return "r";
+ case 115:
+ return "s";
+ case 116:
+ return "t";
+ case 117:
+ return "u";
+ case 118:
+ return "v";
+ case 119:
+ return "w";
+ case 120:
+ return "x";
+ case 121:
+ return "y";
+ case 122:
+ return "z";
+ case 123:
+ return "{";
+ case 124:
+ return "|";
+ case 125:
+ return "}";
+ case 126:
+ return "~";
+ case 127:
+ return "\\x7F";
+ case 128:
+ return "\\x80";
+ case 129:
+ return "\\x81";
+ case 130:
+ return "\\x82";
+ case 131:
+ return "\\x83";
+ case 132:
+ return "\\x84";
+ case 133:
+ return "\\x85";
+ case 134:
+ return "\\x86";
+ case 135:
+ return "\\x87";
+ case 136:
+ return "\\x88";
+ case 137:
+ return "\\x89";
+ case 138:
+ return "\\x8A";
+ case 139:
+ return "\\x8B";
+ case 140:
+ return "\\x8C";
+ case 141:
+ return "\\x8D";
+ case 142:
+ return "\\x8E";
+ case 143:
+ return "\\x8F";
+ case 144:
+ return "\\x90";
+ case 145:
+ return "\\x91";
+ case 146:
+ return "\\x92";
+ case 147:
+ return "\\x93";
+ case 148:
+ return "\\x94";
+ case 149:
+ return "\\x95";
+ case 150:
+ return "\\x96";
+ case 151:
+ return "\\x97";
+ case 152:
+ return "\\x98";
+ case 153:
+ return "\\x99";
+ case 154:
+ return "\\x9A";
+ case 155:
+ return "\\x9B";
+ case 156:
+ return "\\x9C";
+ case 157:
+ return "\\x9D";
+ case 158:
+ return "\\x9E";
+ case 159:
+ return "\\x9F";
+ case 160:
+ return "\\xA0";
+ case 161:
+ return "\\xA1";
+ case 162:
+ return "\\xA2";
+ case 163:
+ return "\\xA3";
+ case 164:
+ return "\\xA4";
+ case 165:
+ return "\\xA5";
+ case 166:
+ return "\\xA6";
+ case 167:
+ return "\\xA7";
+ case 168:
+ return "\\xA8";
+ case 169:
+ return "\\xA9";
+ case 170:
+ return "\\xAA";
+ case 171:
+ return "\\xAB";
+ case 172:
+ return "\\xAC";
+ case 173:
+ return "\\xAD";
+ case 174:
+ return "\\xAE";
+ case 175:
+ return "\\xAF";
+ case 176:
+ return "\\xB0";
+ case 177:
+ return "\\xB1";
+ case 178:
+ return "\\xB2";
+ case 179:
+ return "\\xB3";
+ case 180:
+ return "\\xB4";
+ case 181:
+ return "\\xB5";
+ case 182:
+ return "\\xB6";
+ case 183:
+ return "\\xB7";
+ case 184:
+ return "\\xB8";
+ case 185:
+ return "\\xB9";
+ case 186:
+ return "\\xBA";
+ case 187:
+ return "\\xBB";
+ case 188:
+ return "\\xBC";
+ case 189:
+ return "\\xBD";
+ case 190:
+ return "\\xBE";
+ case 191:
+ return "\\xBF";
+ case 192:
+ return "\\xC0";
+ case 193:
+ return "\\xC1";
+ case 194:
+ return "\\xC2";
+ case 195:
+ return "\\xC3";
+ case 196:
+ return "\\xC4";
+ case 197:
+ return "\\xC5";
+ case 198:
+ return "\\xC6";
+ case 199:
+ return "\\xC7";
+ case 200:
+ return "\\xC8";
+ case 201:
+ return "\\xC9";
+ case 202:
+ return "\\xCA";
+ case 203:
+ return "\\xCB";
+ case 204:
+ return "\\xCC";
+ case 205:
+ return "\\xCD";
+ case 206:
+ return "\\xCE";
+ case 207:
+ return "\\xCF";
+ case 208:
+ return "\\xD0";
+ case 209:
+ return "\\xD1";
+ case 210:
+ return "\\xD2";
+ case 211:
+ return "\\xD3";
+ case 212:
+ return "\\xD4";
+ case 213:
+ return "\\xD5";
+ case 214:
+ return "\\xD6";
+ case 215:
+ return "\\xD7";
+ case 216:
+ return "\\xD8";
+ case 217:
+ return "\\xD9";
+ case 218:
+ return "\\xDA";
+ case 219:
+ return "\\xDB";
+ case 220:
+ return "\\xDC";
+ case 221:
+ return "\\xDD";
+ case 222:
+ return "\\xDE";
+ case 223:
+ return "\\xDF";
+ case 224:
+ return "\\xE0";
+ case 225:
+ return "\\xE1";
+ case 226:
+ return "\\xE2";
+ case 227:
+ return "\\xE3";
+ case 228:
+ return "\\xE4";
+ case 229:
+ return "\\xE5";
+ case 230:
+ return "\\xE6";
+ case 231:
+ return "\\xE7";
+ case 232:
+ return "\\xE8";
+ case 233:
+ return "\\xE9";
+ case 234:
+ return "\\xEA";
+ case 235:
+ return "\\xEB";
+ case 236:
+ return "\\xEC";
+ case 237:
+ return "\\xED";
+ case 238:
+ return "\\xEE";
+ case 239:
+ return "\\xEF";
+ case 240:
+ return "\\xF0";
+ case 241:
+ return "\\xF1";
+ case 242:
+ return "\\xF2";
+ case 243:
+ return "\\xF3";
+ case 244:
+ return "\\xF4";
+ case 245:
+ return "\\xF5";
+ case 246:
+ return "\\xF6";
+ case 247:
+ return "\\xF7";
+ case 248:
+ return "\\xF8";
+ case 249:
+ return "\\xF9";
+ case 250:
+ return "\\xFA";
+ case 251:
+ return "\\xFB";
+ case 252:
+ return "\\xFC";
+ case 253:
+ return "\\xFD";
+ case 254:
+ return "\\xFE";
+ case 255:
+ return "\\xFF";
+ default:
+ assert(0); /* never gets here */
+ return "dead code";
+ }
+ assert(0); /* never gets here */
+}
+
+#endif /* XML_GE == 1 */
+
+static unsigned long
+getDebugLevel(const char *variableName, unsigned long defaultDebugLevel) {
+ const char *const valueOrNull = getenv(variableName);
+ if (valueOrNull == NULL) {
+ return defaultDebugLevel;
+ }
+ const char *const value = valueOrNull;
+
+ errno = 0;
+ char *afterValue = NULL;
+ unsigned long debugLevel = strtoul(value, &afterValue, 10);
+ if ((errno != 0) || (afterValue == value) || (afterValue[0] != '\0')) {
+ errno = 0;
+ return defaultDebugLevel;
+ }
+
+ return debugLevel;
+}
diff --git a/Modules/expat/xmlrole.c b/Modules/expat/xmlrole.c
index 4d3e3e86e9e864..2c48bf40867953 100644
--- a/Modules/expat/xmlrole.c
+++ b/Modules/expat/xmlrole.c
@@ -7,7 +7,15 @@
|_| XML parser
Copyright (c) 1997-2000 Thai Open Source Software Center Ltd
- Copyright (c) 2000-2017 Expat development team
+ Copyright (c) 2000 Clark Cooper
+ Copyright (c) 2002 Greg Stein
+ Copyright (c) 2002-2006 Karl Waclawek
+ Copyright (c) 2002-2003 Fred L. Drake, Jr.
+ Copyright (c) 2005-2009 Steven Solie
+ Copyright (c) 2016-2023 Sebastian Pipping
+ Copyright (c) 2017 Rhodri James
+ Copyright (c) 2019 David Loffredo
+ Copyright (c) 2021 Donghee Na
Licensed under the MIT license:
Permission is hereby granted, free of charge, to any person obtaining
@@ -30,15 +38,13 @@
USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
+#include "expat_config.h"
+
#include
#ifdef _WIN32
# include "winconfig.h"
-#else
-# ifdef HAVE_EXPAT_CONFIG_H
-# include
-# endif
-#endif /* ndef _WIN32 */
+#endif
#include "expat_external.h"
#include "internal.h"
@@ -1220,6 +1226,8 @@ common(PROLOG_STATE *state, int tok) {
#ifdef XML_DTD
if (! state->documentEntity && tok == XML_TOK_PARAM_ENTITY_REF)
return XML_ROLE_INNER_PARAM_ENTITY_REF;
+#else
+ UNUSED_P(tok);
#endif
state->handler = error;
return XML_ROLE_ERROR;
diff --git a/Modules/expat/xmlrole.h b/Modules/expat/xmlrole.h
index 036aba64fd29c6..a7904274c91d4e 100644
--- a/Modules/expat/xmlrole.h
+++ b/Modules/expat/xmlrole.h
@@ -7,7 +7,10 @@
|_| XML parser
Copyright (c) 1997-2000 Thai Open Source Software Center Ltd
- Copyright (c) 2000-2017 Expat development team
+ Copyright (c) 2000 Clark Cooper
+ Copyright (c) 2002 Karl Waclawek
+ Copyright (c) 2002 Fred L. Drake, Jr.
+ Copyright (c) 2017-2024 Sebastian Pipping
Licensed under the MIT license:
Permission is hereby granted, free of charge, to any person obtaining
@@ -124,9 +127,9 @@ typedef struct prolog_state {
#endif /* XML_DTD */
} PROLOG_STATE;
-void XmlPrologStateInit(PROLOG_STATE *);
+void XmlPrologStateInit(PROLOG_STATE *state);
#ifdef XML_DTD
-void XmlPrologStateInitExternalEntity(PROLOG_STATE *);
+void XmlPrologStateInitExternalEntity(PROLOG_STATE *state);
#endif /* XML_DTD */
#define XmlTokenRole(state, tok, ptr, end, enc) \
diff --git a/Modules/expat/xmltok.c b/Modules/expat/xmltok.c
index 54cfedb85c28c4..29a66d72ceea5e 100644
--- a/Modules/expat/xmltok.c
+++ b/Modules/expat/xmltok.c
@@ -7,7 +7,23 @@
|_| XML parser
Copyright (c) 1997-2000 Thai Open Source Software Center Ltd
- Copyright (c) 2000-2017 Expat development team
+ Copyright (c) 2000 Clark Cooper
+ Copyright (c) 2001-2003 Fred L. Drake, Jr.
+ Copyright (c) 2002 Greg Stein
+ Copyright (c) 2002-2016 Karl Waclawek
+ Copyright (c) 2005-2009 Steven Solie
+ Copyright (c) 2016-2024 Sebastian Pipping
+ Copyright (c) 2016 Pascal Cuoq
+ Copyright (c) 2016 Don Lewis
+ Copyright (c) 2017 Rhodri James
+ Copyright (c) 2017 Alexander Bluhm
+ Copyright (c) 2017 Benbuck Nason
+ Copyright (c) 2017 José Gutiérrez de la Concha
+ Copyright (c) 2019 David Loffredo
+ Copyright (c) 2021 Donghee Na
+ Copyright (c) 2022 Martin Ettl
+ Copyright (c) 2022 Sean McBride
+ Copyright (c) 2023 Hanno Böck
Licensed under the MIT license:
Permission is hereby granted, free of charge, to any person obtaining
@@ -30,24 +46,14 @@
USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
-#ifdef _WIN32
-# include "winconfig.h"
-#else
-# ifdef HAVE_EXPAT_CONFIG_H
-# include
-# endif
-#endif /* ndef _WIN32 */
+#include "expat_config.h"
#include
#include /* memcpy */
+#include
-#if defined(_MSC_VER) && (_MSC_VER <= 1700)
-/* for vs2012/11.0/1700 and earlier Visual Studio compilers */
-# define bool int
-# define false 0
-# define true 1
-#else
-# include
+#ifdef _WIN32
+# include "winconfig.h"
#endif
#include "expat_external.h"
@@ -72,7 +78,7 @@
#define VTABLE VTABLE1, PREFIX(toUtf8), PREFIX(toUtf16)
#define UCS2_GET_NAMING(pages, hi, lo) \
- (namingBitmap[(pages[hi] << 3) + ((lo) >> 5)] & (1u << ((lo)&0x1F)))
+ (namingBitmap[(pages[hi] << 3) + ((lo) >> 5)] & (1u << ((lo) & 0x1F)))
/* A 2 byte UTF-8 representation splits the characters 11 bits between
the bottom 5 and 6 bits of the bytes. We need 8 bits to index into
@@ -95,13 +101,8 @@
+ ((((byte)[1]) & 3) << 1) + ((((byte)[2]) >> 5) & 1)] \
& (1u << (((byte)[2]) & 0x1F)))
-#define UTF8_GET_NAMING(pages, p, n) \
- ((n) == 2 \
- ? UTF8_GET_NAMING2(pages, (const unsigned char *)(p)) \
- : ((n) == 3 ? UTF8_GET_NAMING3(pages, (const unsigned char *)(p)) : 0))
-
/* Detection of invalid UTF-8 sequences is based on Table 3.1B
- of Unicode 3.2: http://www.unicode.org/unicode/reports/tr28/
+ of Unicode 3.2: https://www.unicode.org/unicode/reports/tr28/
with the additional restriction of not allowing the Unicode
code points 0xFFFF and 0xFFFE (sequences EF,BF,BF and EF,BF,BE).
Implementation details:
@@ -226,7 +227,7 @@ struct normal_encoding {
/* isNmstrt2 */ NULL, /* isNmstrt3 */ NULL, /* isNmstrt4 */ NULL, \
/* isInvalid2 */ NULL, /* isInvalid3 */ NULL, /* isInvalid4 */ NULL
-static int FASTCALL checkCharRefNumber(int);
+static int FASTCALL checkCharRefNumber(int result);
#include "xmltok_impl.h"
#include "ascii.h"
@@ -244,7 +245,7 @@ static int FASTCALL checkCharRefNumber(int);
#endif
#define SB_BYTE_TYPE(enc, p) \
- (((struct normal_encoding *)(enc))->type[(unsigned char)*(p)])
+ (((const struct normal_encoding *)(enc))->type[(unsigned char)*(p)])
#ifdef XML_MIN_SIZE
static int PTRFASTCALL
@@ -269,8 +270,14 @@ sb_byteToAscii(const ENCODING *enc, const char *p) {
#define IS_NAME_CHAR(enc, p, n) (AS_NORMAL_ENCODING(enc)->isName##n(enc, p))
#define IS_NMSTRT_CHAR(enc, p, n) (AS_NORMAL_ENCODING(enc)->isNmstrt##n(enc, p))
-#define IS_INVALID_CHAR(enc, p, n) \
- (AS_NORMAL_ENCODING(enc)->isInvalid##n(enc, p))
+#ifdef XML_MIN_SIZE
+# define IS_INVALID_CHAR(enc, p, n) \
+ (AS_NORMAL_ENCODING(enc)->isInvalid##n \
+ && AS_NORMAL_ENCODING(enc)->isInvalid##n(enc, p))
+#else
+# define IS_INVALID_CHAR(enc, p, n) \
+ (AS_NORMAL_ENCODING(enc)->isInvalid##n(enc, p))
+#endif
#ifdef XML_MIN_SIZE
# define IS_NAME_CHAR_MINBPC(enc, p) \
@@ -292,7 +299,7 @@ sb_charMatches(const ENCODING *enc, const char *p, int c) {
}
#else
/* c is an ASCII character */
-# define CHAR_MATCHES(enc, p, c) (*(p) == c)
+# define CHAR_MATCHES(enc, p, c) (*(p) == (c))
#endif
#define PREFIX(ident) normal_##ident
@@ -402,7 +409,7 @@ utf8_toUtf16(const ENCODING *enc, const char **fromP, const char *fromLim,
unsigned short *to = *toP;
const char *from = *fromP;
while (from < fromLim && to < toLim) {
- switch (((struct normal_encoding *)enc)->type[(unsigned char)*from]) {
+ switch (SB_BYTE_TYPE(enc, from)) {
case BT_LEAD2:
if (fromLim - from < 2) {
res = XML_CONVERT_INPUT_INCOMPLETE;
@@ -589,13 +596,13 @@ static const struct normal_encoding ascii_encoding
static int PTRFASTCALL
unicode_byte_type(char hi, char lo) {
switch ((unsigned char)hi) {
- /* 0xD800–0xDBFF first 16-bit code unit or high surrogate (W1) */
+ /* 0xD800-0xDBFF first 16-bit code unit or high surrogate (W1) */
case 0xD8:
case 0xD9:
case 0xDA:
case 0xDB:
return BT_LEAD4;
- /* 0xDC00–0xDFFF second 16-bit code unit or low surrogate (W2) */
+ /* 0xDC00-0xDFFF second 16-bit code unit or low surrogate (W2) */
case 0xDC:
case 0xDD:
case 0xDE:
@@ -710,33 +717,28 @@ unicode_byte_type(char hi, char lo) {
return res; \
}
-#define SET2(ptr, ch) (((ptr)[0] = ((ch)&0xff)), ((ptr)[1] = ((ch) >> 8)))
#define GET_LO(ptr) ((unsigned char)(ptr)[0])
#define GET_HI(ptr) ((unsigned char)(ptr)[1])
DEFINE_UTF16_TO_UTF8(little2_)
DEFINE_UTF16_TO_UTF16(little2_)
-#undef SET2
#undef GET_LO
#undef GET_HI
-#define SET2(ptr, ch) (((ptr)[0] = ((ch) >> 8)), ((ptr)[1] = ((ch)&0xFF)))
#define GET_LO(ptr) ((unsigned char)(ptr)[1])
#define GET_HI(ptr) ((unsigned char)(ptr)[0])
DEFINE_UTF16_TO_UTF8(big2_)
DEFINE_UTF16_TO_UTF16(big2_)
-#undef SET2
#undef GET_LO
#undef GET_HI
#define LITTLE2_BYTE_TYPE(enc, p) \
- ((p)[1] == 0 ? ((struct normal_encoding *)(enc))->type[(unsigned char)*(p)] \
- : unicode_byte_type((p)[1], (p)[0]))
+ ((p)[1] == 0 ? SB_BYTE_TYPE(enc, p) : unicode_byte_type((p)[1], (p)[0]))
#define LITTLE2_BYTE_TO_ASCII(p) ((p)[1] == 0 ? (p)[0] : -1)
-#define LITTLE2_CHAR_MATCHES(p, c) ((p)[1] == 0 && (p)[0] == c)
+#define LITTLE2_CHAR_MATCHES(p, c) ((p)[1] == 0 && (p)[0] == (c))
#define LITTLE2_IS_NAME_CHAR_MINBPC(p) \
UCS2_GET_NAMING(namePages, (unsigned char)p[1], (unsigned char)p[0])
#define LITTLE2_IS_NMSTRT_CHAR_MINBPC(p) \
@@ -867,11 +869,9 @@ static const struct normal_encoding internal_little2_encoding
#endif
#define BIG2_BYTE_TYPE(enc, p) \
- ((p)[0] == 0 \
- ? ((struct normal_encoding *)(enc))->type[(unsigned char)(p)[1]] \
- : unicode_byte_type((p)[0], (p)[1]))
+ ((p)[0] == 0 ? SB_BYTE_TYPE(enc, p + 1) : unicode_byte_type((p)[0], (p)[1]))
#define BIG2_BYTE_TO_ASCII(p) ((p)[0] == 0 ? (p)[1] : -1)
-#define BIG2_CHAR_MATCHES(p, c) ((p)[0] == 0 && (p)[1] == c)
+#define BIG2_CHAR_MATCHES(p, c) ((p)[0] == 0 && (p)[1] == (c))
#define BIG2_IS_NAME_CHAR_MINBPC(p) \
UCS2_GET_NAMING(namePages, (unsigned char)p[0], (unsigned char)p[1])
#define BIG2_IS_NMSTRT_CHAR_MINBPC(p) \
diff --git a/Modules/expat/xmltok.h b/Modules/expat/xmltok.h
index 2adbf5307befae..c51fce1ec1518b 100644
--- a/Modules/expat/xmltok.h
+++ b/Modules/expat/xmltok.h
@@ -7,7 +7,11 @@
|_| XML parser
Copyright (c) 1997-2000 Thai Open Source Software Center Ltd
- Copyright (c) 2000-2017 Expat development team
+ Copyright (c) 2000 Clark Cooper
+ Copyright (c) 2002 Fred L. Drake, Jr.
+ Copyright (c) 2002-2005 Karl Waclawek
+ Copyright (c) 2016-2024 Sebastian Pipping
+ Copyright (c) 2017 Rhodri James
Licensed under the MIT license:
Permission is hereby granted, free of charge, to any person obtaining
@@ -285,7 +289,8 @@ int XmlParseXmlDecl(int isGeneralTextEntity, const ENCODING *enc,
const char **encodingNamePtr,
const ENCODING **namedEncodingPtr, int *standalonePtr);
-int XmlInitEncoding(INIT_ENCODING *, const ENCODING **, const char *name);
+int XmlInitEncoding(INIT_ENCODING *p, const ENCODING **encPtr,
+ const char *name);
const ENCODING *XmlGetUtf8InternalEncoding(void);
const ENCODING *XmlGetUtf16InternalEncoding(void);
int FASTCALL XmlUtf8Encode(int charNumber, char *buf);
@@ -303,7 +308,8 @@ int XmlParseXmlDeclNS(int isGeneralTextEntity, const ENCODING *enc,
const char **encodingNamePtr,
const ENCODING **namedEncodingPtr, int *standalonePtr);
-int XmlInitEncodingNS(INIT_ENCODING *, const ENCODING **, const char *name);
+int XmlInitEncodingNS(INIT_ENCODING *p, const ENCODING **encPtr,
+ const char *name);
const ENCODING *XmlGetUtf8InternalEncodingNS(void);
const ENCODING *XmlGetUtf16InternalEncodingNS(void);
ENCODING *XmlInitUnknownEncodingNS(void *mem, int *table, CONVERTER convert,
diff --git a/Modules/expat/xmltok_impl.c b/Modules/expat/xmltok_impl.c
index c209221cd79d13..239a2d06c4512c 100644
--- a/Modules/expat/xmltok_impl.c
+++ b/Modules/expat/xmltok_impl.c
@@ -1,4 +1,4 @@
-/* This file is included!
+/* This file is included (from xmltok.c, 1-3 times depending on XML_MIN_SIZE)!
__ __ _
___\ \/ /_ __ __ _| |_
/ _ \\ /| '_ \ / _` | __|
@@ -7,7 +7,16 @@
|_| XML parser
Copyright (c) 1997-2000 Thai Open Source Software Center Ltd
- Copyright (c) 2000-2017 Expat development team
+ Copyright (c) 2000 Clark Cooper
+ Copyright (c) 2002 Fred L. Drake, Jr.
+ Copyright (c) 2002-2016 Karl Waclawek
+ Copyright (c) 2016-2022 Sebastian Pipping
+ Copyright (c) 2017 Rhodri James
+ Copyright (c) 2018 Benjamin Peterson
+ Copyright (c) 2018 Anton Maklakov
+ Copyright (c) 2019 David Loffredo
+ Copyright (c) 2020 Boris Kolpackov
+ Copyright (c) 2022 Martin Ettl
Licensed under the MIT license:
Permission is hereby granted, free of charge, to any person obtaining
@@ -32,7 +41,7 @@
#ifdef XML_TOK_IMPL_C
-# ifndef IS_INVALID_CHAR
+# ifndef IS_INVALID_CHAR // i.e. for UTF-16 and XML_MIN_SIZE not defined
# define IS_INVALID_CHAR(enc, ptr, n) (0)
# endif
@@ -61,7 +70,7 @@
case BT_LEAD##n: \
if (end - ptr < n) \
return XML_TOK_PARTIAL_CHAR; \
- if (! IS_NAME_CHAR(enc, ptr, n)) { \
+ if (IS_INVALID_CHAR(enc, ptr, n) || ! IS_NAME_CHAR(enc, ptr, n)) { \
*nextTokPtr = ptr; \
return XML_TOK_INVALID; \
} \
@@ -88,9 +97,9 @@
# define CHECK_NMSTRT_CASE(n, enc, ptr, end, nextTokPtr) \
case BT_LEAD##n: \
- if (end - ptr < n) \
+ if ((end) - (ptr) < (n)) \
return XML_TOK_PARTIAL_CHAR; \
- if (! IS_NMSTRT_CHAR(enc, ptr, n)) { \
+ if (IS_INVALID_CHAR(enc, ptr, n) || ! IS_NMSTRT_CHAR(enc, ptr, n)) { \
*nextTokPtr = ptr; \
return XML_TOK_INVALID; \
} \
@@ -116,7 +125,8 @@
# define PREFIX(ident) ident
# endif
-# define HAS_CHARS(enc, ptr, end, count) (end - ptr >= count * MINBPC(enc))
+# define HAS_CHARS(enc, ptr, end, count) \
+ ((end) - (ptr) >= ((count) * MINBPC(enc)))
# define HAS_CHAR(enc, ptr, end) HAS_CHARS(enc, ptr, end, 1)
@@ -1134,6 +1144,10 @@ PREFIX(prologTok)(const ENCODING *enc, const char *ptr, const char *end,
case BT_LEAD##n: \
if (end - ptr < n) \
return XML_TOK_PARTIAL_CHAR; \
+ if (IS_INVALID_CHAR(enc, ptr, n)) { \
+ *nextTokPtr = ptr; \
+ return XML_TOK_INVALID; \
+ } \
if (IS_NMSTRT_CHAR(enc, ptr, n)) { \
ptr += n; \
tok = XML_TOK_NAME; \
@@ -1262,7 +1276,7 @@ PREFIX(attributeValueTok)(const ENCODING *enc, const char *ptr, const char *end,
switch (BYTE_TYPE(enc, ptr)) {
# define LEAD_CASE(n) \
case BT_LEAD##n: \
- ptr += n; \
+ ptr += n; /* NOTE: The encoding has already been validated. */ \
break;
LEAD_CASE(2)
LEAD_CASE(3)
@@ -1331,7 +1345,7 @@ PREFIX(entityValueTok)(const ENCODING *enc, const char *ptr, const char *end,
switch (BYTE_TYPE(enc, ptr)) {
# define LEAD_CASE(n) \
case BT_LEAD##n: \
- ptr += n; \
+ ptr += n; /* NOTE: The encoding has already been validated. */ \
break;
LEAD_CASE(2)
LEAD_CASE(3)
@@ -1510,7 +1524,7 @@ PREFIX(getAtts)(const ENCODING *enc, const char *ptr, int attsMax,
state = inName; \
}
# define LEAD_CASE(n) \
- case BT_LEAD##n: \
+ case BT_LEAD##n: /* NOTE: The encoding has already been validated. */ \
START_NAME ptr += (n - MINBPC(enc)); \
break;
LEAD_CASE(2)
@@ -1722,7 +1736,7 @@ PREFIX(nameLength)(const ENCODING *enc, const char *ptr) {
switch (BYTE_TYPE(enc, ptr)) {
# define LEAD_CASE(n) \
case BT_LEAD##n: \
- ptr += n; \
+ ptr += n; /* NOTE: The encoding has already been validated. */ \
break;
LEAD_CASE(2)
LEAD_CASE(3)
@@ -1767,14 +1781,15 @@ PREFIX(updatePosition)(const ENCODING *enc, const char *ptr, const char *end,
switch (BYTE_TYPE(enc, ptr)) {
# define LEAD_CASE(n) \
case BT_LEAD##n: \
- ptr += n; \
+ ptr += n; /* NOTE: The encoding has already been validated. */ \
+ pos->columnNumber++; \
break;
LEAD_CASE(2)
LEAD_CASE(3)
LEAD_CASE(4)
# undef LEAD_CASE
case BT_LF:
- pos->columnNumber = (XML_Size)-1;
+ pos->columnNumber = 0;
pos->lineNumber++;
ptr += MINBPC(enc);
break;
@@ -1783,13 +1798,13 @@ PREFIX(updatePosition)(const ENCODING *enc, const char *ptr, const char *end,
ptr += MINBPC(enc);
if (HAS_CHAR(enc, ptr, end) && BYTE_TYPE(enc, ptr) == BT_LF)
ptr += MINBPC(enc);
- pos->columnNumber = (XML_Size)-1;
+ pos->columnNumber = 0;
break;
default:
ptr += MINBPC(enc);
+ pos->columnNumber++;
break;
}
- pos->columnNumber++;
}
}
diff --git a/Modules/expat/xmltok_impl.h b/Modules/expat/xmltok_impl.h
index e925dbc7e2c833..3469c4ae138c95 100644
--- a/Modules/expat/xmltok_impl.h
+++ b/Modules/expat/xmltok_impl.h
@@ -7,7 +7,8 @@
|_| XML parser
Copyright (c) 1997-2000 Thai Open Source Software Center Ltd
- Copyright (c) 2000-2017 Expat development team
+ Copyright (c) 2000 Clark Cooper
+ Copyright (c) 2017-2019 Sebastian Pipping
Licensed under the MIT license:
Permission is hereby granted, free of charge, to any person obtaining
@@ -44,7 +45,7 @@ enum {
BT_LF, /* line feed = "\n" */
BT_GT, /* greater than = ">" */
BT_QUOT, /* quotation character = "\"" */
- BT_APOS, /* aposthrophe = "'" */
+ BT_APOS, /* apostrophe = "'" */
BT_EQUALS, /* equal sign = "=" */
BT_QUEST, /* question mark = "?" */
BT_EXCL, /* exclamation mark = "!" */
diff --git a/Modules/expat/xmltok_ns.c b/Modules/expat/xmltok_ns.c
index 919c74e9f97fe8..fbdd3e3c7b7999 100644
--- a/Modules/expat/xmltok_ns.c
+++ b/Modules/expat/xmltok_ns.c
@@ -7,7 +7,11 @@
|_| XML parser
Copyright (c) 1997-2000 Thai Open Source Software Center Ltd
- Copyright (c) 2000-2017 Expat development team
+ Copyright (c) 2000 Clark Cooper
+ Copyright (c) 2002 Greg Stein
+ Copyright (c) 2002 Fred L. Drake, Jr.
+ Copyright (c) 2002-2006 Karl Waclawek
+ Copyright (c) 2017-2021 Sebastian Pipping
Licensed under the MIT license:
Permission is hereby granted, free of charge, to any person obtaining
@@ -89,7 +93,7 @@ NS(XmlInitEncoding)(INIT_ENCODING *p, const ENCODING **encPtr,
static const ENCODING *
NS(findEncoding)(const ENCODING *enc, const char *ptr, const char *end) {
# define ENCODING_MAX 128
- char buf[ENCODING_MAX];
+ char buf[ENCODING_MAX] = "";
char *p = buf;
int i;
XmlUtf8Convert(enc, &ptr, end, &p, p + ENCODING_MAX - 1);
diff --git a/PCbuild/pyexpat.vcxproj b/PCbuild/pyexpat.vcxproj
index 61a2697e116630..31483d90029e7b 100644
--- a/PCbuild/pyexpat.vcxproj
+++ b/PCbuild/pyexpat.vcxproj
@@ -61,16 +61,21 @@
$(PySourcePath)Modules\expat;%(AdditionalIncludeDirectories)
_CRT_SECURE_NO_WARNINGS;PYEXPAT_EXPORTS;XML_STATIC;%(PreprocessorDefinitions)
+
+ $(expatLibDir)\libexpat.lib
+
+ 0x1e000000
+
-
-
-
-
+
+
+
+
-
-
-
+
+
+
diff --git a/PCbuild/python.props b/PCbuild/python.props
index 1c9f3f7d228a8a..97bcf91de30b85 100644
--- a/PCbuild/python.props
+++ b/PCbuild/python.props
@@ -64,7 +64,8 @@
$(zlibDir)\
$(zlibDir)include
$(zlibDir)lib
-
+
+ $(AS_DEPENDENCIES_DIR)/lib
_d