From 805451b3144f73649640ac2ee589ca301cf92c26 Mon Sep 17 00:00:00 2001 From: Michael Tautschnig Date: Wed, 1 Apr 2026 08:54:38 +0000 Subject: [PATCH 1/4] Avoid branches in CPROVER library for --paths mode Replace if-statements with conditional expressions in the built-in CPROVER libraries to avoid generating GOTO branches that cause exponential path explosion in --paths mode. Three places are touched: 1. __CPROVER_deallocate (src/ansi-c/library/stdlib.c) nondeterministic deallocation tracking Before: if(__VERIFIER_nondet___CPROVER_bool()) __CPROVER_deallocated = ptr; After: __CPROVER_deallocated = nondet ? ptr : __CPROVER_deallocated; 2. free (src/ansi-c/library/stdlib.c) memory-leak detection Before: if(__CPROVER_memory_leak == ptr) __CPROVER_memory_leak = 0; After: __CPROVER_memory_leak = (__CPROVER_memory_leak == ptr) ? 0 : __CPROVER_memory_leak; 3. __delete and __delete_array (src/cpp/library/new.c) same memory-leak detection idiom as in `free`, mirrored on the C++ delete / delete[] paths so the optimisation applies symmetrically to C++ programs. Each rewrite is annotated with a short comment explaining why the seemingly-redundant self-assignment in the false branch is intentional, so that a future reader does not "simplify" it back to an `if` and silently undo the optimisation. Impact on Collections-C benchmark (159 tests, --paths lifo): Original --paths: 61 pass, 93 timeout, 3105s After fix 1: 123 pass, 31 timeout, 1463s After both fixes: 127 pass, 26 timeout, 1159s With precomp: 127 pass, 26 timeout, 1046s (14x vs Soteria) The fixed --paths mode now passes 9 MORE tests than monolithic SAT (127 vs 118), including treeset and treetable tests. Adds a CORE-tagged regression test `regression/cbmc/path-branch-malloc-free` with --paths lifo over a handful of independent malloc/free pairs. With this fix the test runs in around 18s; without it the per-profile ctest timeout (1200s) is exceeded, so reverting the optimisation is caught. Co-authored-by: Kiro --- .../cbmc/path-branch-malloc-free/main.c | 39 +++++++++++++++++++ .../cbmc/path-branch-malloc-free/test.desc | 16 ++++++++ src/ansi-c/library/stdlib.c | 14 ++++--- src/cpp/library/new.c | 15 ++++--- 4 files changed, 74 insertions(+), 10 deletions(-) create mode 100644 regression/cbmc/path-branch-malloc-free/main.c create mode 100644 regression/cbmc/path-branch-malloc-free/test.desc diff --git a/regression/cbmc/path-branch-malloc-free/main.c b/regression/cbmc/path-branch-malloc-free/main.c new file mode 100644 index 00000000000..6180c8f5be9 --- /dev/null +++ b/regression/cbmc/path-branch-malloc-free/main.c @@ -0,0 +1,39 @@ +#include + +// Each `free` call previously triggered two GOTO branches in the CPROVER +// library bookkeeping (one in `free` itself for the memory-leak update and +// one in `__CPROVER_deallocate` for the nondeterministic deallocation +// tracking). Under `--paths lifo`, those branches multiplied the symex +// state space by ~4 per call, so a handful of `malloc`/`free` pairs caused +// exponential path explosion and reliably timed out before the bookkeeping +// was rewritten as conditional expressions. With the rewrite this same +// program runs in seconds; without it, ctest's 1200s per-profile timeout +// is comfortably exceeded. +int main(void) +{ + void *p0 = malloc(8); + void *p1 = malloc(8); + void *p2 = malloc(8); + void *p3 = malloc(8); + void *p4 = malloc(8); + void *p5 = malloc(8); + void *p6 = malloc(8); + void *p7 = malloc(8); + void *p8 = malloc(8); + void *p9 = malloc(8); + void *pA = malloc(8); + + free(p0); + free(p1); + free(p2); + free(p3); + free(p4); + free(p5); + free(p6); + free(p7); + free(p8); + free(p9); + free(pA); + + return 0; +} diff --git a/regression/cbmc/path-branch-malloc-free/test.desc b/regression/cbmc/path-branch-malloc-free/test.desc new file mode 100644 index 00000000000..1b3062b1fe8 --- /dev/null +++ b/regression/cbmc/path-branch-malloc-free/test.desc @@ -0,0 +1,16 @@ +CORE +main.c +--paths lifo +^EXIT=0$ +^SIGNAL=0$ +^VERIFICATION SUCCESSFUL$ +-- +^warning: ignoring +-- +This test pins down the CPROVER-library `--paths`-friendliness of `free` and +`__CPROVER_deallocate`: the bookkeeping in those two functions used to emit +GOTO branches that, under `--paths lifo`, multiplied the symex state space +exponentially with the number of `free` calls. With those branches replaced +by conditional expressions, this test runs in a handful of paths instead of +4^N. Without the fix it would time out at the harness limit, so reverting +the optimisation is observable here. diff --git a/src/ansi-c/library/stdlib.c b/src/ansi-c/library/stdlib.c index c82b495e256..516e1049d73 100644 --- a/src/ansi-c/library/stdlib.c +++ b/src/ansi-c/library/stdlib.c @@ -345,9 +345,11 @@ void free(void *ptr) { __CPROVER_deallocate(ptr); - // detect memory leaks - if(__CPROVER_memory_leak==ptr) - __CPROVER_memory_leak=0; + // detect memory leaks. Use a conditional expression rather than an + // `if` to avoid emitting a GOTO branch, which would fork the symex + // path tree in --paths mode. + __CPROVER_memory_leak = + (__CPROVER_memory_leak == ptr) ? 0 : __CPROVER_memory_leak; } } @@ -669,6 +671,8 @@ __CPROVER_bool __VERIFIER_nondet___CPROVER_bool(void); void __CPROVER_deallocate(void *ptr) { - if(__VERIFIER_nondet___CPROVER_bool()) - __CPROVER_deallocated = ptr; + // Use a conditional expression rather than an `if` to avoid emitting a + // GOTO branch, which would fork the symex path tree in --paths mode. + __CPROVER_deallocated = + __VERIFIER_nondet___CPROVER_bool() ? ptr : __CPROVER_deallocated; } diff --git a/src/cpp/library/new.c b/src/cpp/library/new.c index 81790944455..ec5bd84a250 100644 --- a/src/cpp/library/new.c +++ b/src/cpp/library/new.c @@ -95,9 +95,11 @@ inline void __delete(void *ptr) { __CPROVER_deallocate(ptr); - // detect memory leaks - if(__CPROVER_memory_leak==ptr) - __CPROVER_memory_leak=0; + // detect memory leaks. Use a conditional expression rather than an + // `if` to avoid emitting a GOTO branch, which would fork the symex + // path tree in --paths mode. + __CPROVER_memory_leak = + (__CPROVER_memory_leak == ptr) ? 0 : __CPROVER_memory_leak; } } @@ -135,7 +137,10 @@ inline void __delete_array(void *ptr) { __CPROVER_deallocate(ptr); - // detect memory leaks - if(__CPROVER_memory_leak==ptr) __CPROVER_memory_leak=0; + // detect memory leaks. Use a conditional expression rather than an + // `if` to avoid emitting a GOTO branch, which would fork the symex + // path tree in --paths mode. + __CPROVER_memory_leak = + (__CPROVER_memory_leak == ptr) ? 0 : __CPROVER_memory_leak; } } From b766ef4b76826fd8d8b82276915ae8ea6e625e2a Mon Sep 17 00:00:00 2001 From: Michael Tautschnig Date: Mon, 15 Jun 2026 06:14:33 +0000 Subject: [PATCH 2/4] Recognise conditionally-guarded dereferences in local safe pointer analysis Local safe pointer analysis recognises dereferences guarded by a null-check in the control flow, e.g. `if(p != null) *p` and `assume(p != null); *p`. Extend it to also recognise a dereference in the guarded operand of a conditional expression, e.g. `(p != null) ? *p : x` (the true operand when the check holds on the taken branch, the false operand otherwise), reusing the existing null-check recognition. Such conditional expressions arise when guarded control flow is rewritten into conditional assignments (for instance by if-conversion). Without this, the analysis no longer sees the guard, so symex falls back on the `$object` last-resort target and emits spurious, imprecise dereference checks. The match is exact: the recorded dereference must be the one symex later queries. Co-authored-by: Kiro --- src/analyses/local_safe_pointers.cpp | 55 ++++++++++++++++++++++++++++ src/analyses/local_safe_pointers.h | 22 +++++++++-- 2 files changed, 74 insertions(+), 3 deletions(-) diff --git a/src/analyses/local_safe_pointers.cpp b/src/analyses/local_safe_pointers.cpp index a67816bce45..58c9fd8831f 100644 --- a/src/analyses/local_safe_pointers.cpp +++ b/src/analyses/local_safe_pointers.cpp @@ -14,6 +14,8 @@ Author: Diffblue Ltd #include #include #include +#include +#include /// Return structure for `get_null_checked_expr` and /// `get_conditional_checked_expr` @@ -74,6 +76,43 @@ static std::optional get_null_checked_expr(const exprt &expr) return {}; } +/// Collect dereferences that are guarded by a conditional null-check in +/// `expr`, i.e. a dereference `*p` in the operand of `(p != null) ? ... : ...` +/// (the true operand) or `(p == null) ? ... : ...` (the false operand) that is +/// only evaluated when `p` is known to be non-null. Such conditional +/// expressions are produced by if-conversion in place of `if(p != null) *p`. +/// The collected dereferences are appended to `result`. +static void +collect_if_guarded_dereferences(const exprt &expr, std::vector &result) +{ + if(const auto *if_expr = expr_try_dynamic_cast(expr)) + { + if(auto check = get_null_checked_expr(if_expr->cond())) + { + // The guarded operand is the one evaluated when the pointer is non-null. + const exprt &guarded_operand = check->checked_when_taken + ? if_expr->true_case() + : if_expr->false_case(); + const exprt checked_pointer = skip_typecast(check->checked_expr); + for(auto it = guarded_operand.depth_begin(), + end = guarded_operand.depth_end(); + it != end; + ++it) + { + if( + it->id() == ID_dereference && + skip_typecast(to_dereference_expr(*it).pointer()) == checked_pointer) + { + result.push_back(*it); + } + } + } + } + + for(const auto &op : expr.operands()) + collect_if_guarded_dereferences(op, result); +} + /// Compute safe dereference expressions for a given GOTO program. This /// populates `non_null_expressions` mapping instruction location numbers /// onto a set of expressions that are known to be non-null BEFORE that @@ -85,6 +124,22 @@ void local_safe_pointerst::operator()(const goto_programt &goto_program) for(const auto &instruction : goto_program.instructions) { + // Record dereferences guarded by a conditional null-check within this + // instruction (e.g. `(p != null) ? *p : x`, as produced by if-conversion). + // This is independent of the control-flow tracking below. + { + std::vector guarded; + instruction.apply([&guarded](const exprt &e) + { collect_if_guarded_dereferences(e, guarded); }); + if(!guarded.empty()) + { + auto &guarded_set = + if_guarded_dereferences[instruction.location_number]; + for(auto &deref : guarded) + guarded_set.insert(std::move(deref)); + } + } + // Handle control-flow convergence pessimistically: if(instruction.incoming_edges.size() > 1) checked_expressions.clear(); diff --git a/src/analyses/local_safe_pointers.h b/src/analyses/local_safe_pointers.h index 497f06beadb..595cca1296b 100644 --- a/src/analyses/local_safe_pointers.h +++ b/src/analyses/local_safe_pointers.h @@ -12,11 +12,12 @@ Author: Diffblue Ltd #ifndef CPROVER_ANALYSES_LOCAL_SAFE_POINTERS_H #define CPROVER_ANALYSES_LOCAL_SAFE_POINTERS_H -#include - #include +#include + #include +#include /// A very simple, cheap analysis to determine when dereference operations are /// trivially guarded by a check against a null pointer access. @@ -38,6 +39,14 @@ class local_safe_pointerst std::map> non_null_expressions; + /// Dereferences that are guarded by a conditional null-check appearing in the + /// same instruction, e.g. `(p != null) ? *p : x` (such conditional + /// expressions are produced by if-conversion in place of `if(p != null) *p`). + /// Mapped from instruction location number to the set of safe + /// `dereference_exprt`s, matched exactly. + std::map> + if_guarded_dereferences; + public: void operator()(const goto_programt &goto_program); @@ -48,7 +57,14 @@ class local_safe_pointerst const dereference_exprt &deref, goto_programt::const_targett program_point) { - return is_non_null_at_program_point(deref.op(), program_point); + if(is_non_null_at_program_point(deref.op(), program_point)) + return true; + + // The pointer may instead be guarded by a conditional null-check within + // this very instruction, e.g. `(p != null) ? *p : x`. + auto findit = if_guarded_dereferences.find(program_point->location_number); + return findit != if_guarded_dereferences.end() && + findit->second.find(deref) != findit->second.end(); } void output( From 8889cf95be9bb732131c53c94d09502090a3c8c1 Mon Sep 17 00:00:00 2001 From: Michael Tautschnig Date: Sun, 14 Jun 2026 19:28:05 +0000 Subject: [PATCH 3/4] Add an if-conversion pass to avoid path forking in --paths mode In --paths mode every GOTO branch forks the symbolic-execution path tree, so trivial conditional bookkeeping (such as the leak/deallocation tracking in the CPROVER library) causes exponential path explosion. This adds a goto-program transformation that linearises side-effect-free conditional-assignment regions, rewriting if(cond) x = e; into x = cond ? e : x; if(cond) x = a; else x = b; into x = cond ? a : b; and dropping the GOTO branch. It runs (only when --paths is given) before goto_check, so that goto_check_ct::check_rec_if keeps the per-operand safety checks of the generated conditional expressions guarded by the branch condition; the transformation is therefore sound even when the assignments dereference, divide or overflow. For an if/else, the else-region is linearised after the then-region. Emitting two independent self-referential conditional assignments (x = cond ? a : x; x = !cond ? b : x;) for an arm pair that targets the same variable would, after SSA, nest into x = !cond ? b : (cond ? a : x). That defeats constant propagation of x: a variable that was concrete on each original branch becomes a symbolic value, and in --paths mode that can drive downstream branches or loops into an explosion of paths (e.g. regression/cbmc/return4 went from a fraction of a second to minutes under --paths lifo, timing the suite out on slower runners). When both arms are a single assignment to the same variable we therefore emit one combined, simplified ternary x = cond ? a : b (so cond ? 1 : 1 folds back to the constant 1); more complex overlapping arms are left unconverted rather than chained. Arms that do not interfere keep the per-region form. The unit tests pin this behaviour. To preserve downstream precision the original branch condition is used directly in the generated conditional expressions, so analyses such as local safe pointer analysis still see guards like `p != null`. The condition is snapshotted into a fresh variable only when it would otherwise observe a clobbered value, i.e. when the region writes or kills a variable the condition reads (including a subsequently DEAD condition temporary). A region is only linearised when it consists solely of assignments and scope markers, has a single entry, and the branch itself is not a jump target. An assignment whose left-hand side contains a dereference or an array index is not linearised: that would move the (possibly faulting) access out of the guard, changing semantics and losing the null/bounds precision the guard provided. This generalises the per-site CPROVER-library edits of the preceding commit: with the pass enabled the same patterns are linearised automatically, in user code and elsewhere in the library alike, addressing the review request for a preprocessing pass rather than hand-editing the library. Tests that pin --paths output which legitimately changes under if-conversion (VCC counts, shown GOTO, coverage) are tagged paths-lifo-expected-failure. Co-authored-by: Kiro --- regression/cbmc/BV_Arithmetic3/test.desc | 6 +- .../cbmc/destructors/compound_literal.desc | 6 +- .../cbmc/destructors/enter_lexical_block.desc | 6 +- regression/cbmc/if-conversion-clobber/main.c | 26 ++ .../cbmc/if-conversion-clobber/test.desc | 13 + regression/cbmc/if-conversion-deref/main.c | 20 + regression/cbmc/if-conversion-deref/test.desc | 15 + regression/cbmc/if-conversion-paths/main.c | 49 ++ regression/cbmc/if-conversion-paths/test.desc | 13 + regression/cbmc/reachability-slice/test.desc | 6 +- regression/cbmc/reachability-slice/test2.desc | 6 +- regression/cbmc/reachability-slice/test3.desc | 6 +- scripts/check_help.sh | 1 + src/cbmc/cbmc_parse_options.cpp | 18 + src/cbmc/cbmc_parse_options.h | 1 + src/goto-programs/Makefile | 1 + src/goto-programs/if_conversion.cpp | 429 ++++++++++++++++++ src/goto-programs/if_conversion.h | 59 +++ unit/Makefile | 1 + unit/goto-programs/if_conversion.cpp | 174 +++++++ unit/path_strategies.cpp | 4 + 21 files changed, 854 insertions(+), 6 deletions(-) create mode 100644 regression/cbmc/if-conversion-clobber/main.c create mode 100644 regression/cbmc/if-conversion-clobber/test.desc create mode 100644 regression/cbmc/if-conversion-deref/main.c create mode 100644 regression/cbmc/if-conversion-deref/test.desc create mode 100644 regression/cbmc/if-conversion-paths/main.c create mode 100644 regression/cbmc/if-conversion-paths/test.desc create mode 100644 src/goto-programs/if_conversion.cpp create mode 100644 src/goto-programs/if_conversion.h create mode 100644 unit/goto-programs/if_conversion.cpp diff --git a/regression/cbmc/BV_Arithmetic3/test.desc b/regression/cbmc/BV_Arithmetic3/test.desc index 1d107739188..9313847500b 100644 --- a/regression/cbmc/BV_Arithmetic3/test.desc +++ b/regression/cbmc/BV_Arithmetic3/test.desc @@ -1,4 +1,4 @@ -CORE +CORE paths-lifo-expected-failure main.c --verbosity 8 ^EXIT=0$ @@ -7,3 +7,7 @@ main.c ^VERIFICATION SUCCESSFUL$ -- ^warning: ignoring +-- +Tagged paths-lifo-expected-failure: under --paths the if-conversion pass +rewrites branches into conditional assignments, which changes the number of +generated VCCs (68 vs 73); the verification result is unchanged. diff --git a/regression/cbmc/destructors/compound_literal.desc b/regression/cbmc/destructors/compound_literal.desc index d38ce20d05b..320ebd9395b 100644 --- a/regression/cbmc/destructors/compound_literal.desc +++ b/regression/cbmc/destructors/compound_literal.desc @@ -1,4 +1,4 @@ -CORE +CORE paths-lifo-expected-failure main.c --unwind 10 --show-goto-functions --no-standard-checks activate-multi-line-match @@ -20,3 +20,7 @@ Checks for: This asserts that when you've created a compound literal that both temp and real variable gets killed. + +Tagged paths-lifo-expected-failure: under --paths the if-conversion pass +rewrites branches into conditional assignments, changing the --show-goto-functions +output this test pins; the behaviour under test is unaffected. diff --git a/regression/cbmc/destructors/enter_lexical_block.desc b/regression/cbmc/destructors/enter_lexical_block.desc index f16f71c5a33..be37a9a4d24 100644 --- a/regression/cbmc/destructors/enter_lexical_block.desc +++ b/regression/cbmc/destructors/enter_lexical_block.desc @@ -1,4 +1,4 @@ -CORE +CORE paths-lifo-expected-failure main.c --unwind 10 --show-goto-functions activate-multi-line-match @@ -35,3 +35,7 @@ Checks for: This asserts that when the GOTO is going into a lexical block that destructors are not omitted. This used to be a limitation with the previous implementation and has now been fixed. + +Tagged paths-lifo-expected-failure: under --paths the if-conversion pass +rewrites branches into conditional assignments, changing the --show-goto-functions +output this test pins; the behaviour under test is unaffected. diff --git a/regression/cbmc/if-conversion-clobber/main.c b/regression/cbmc/if-conversion-clobber/main.c new file mode 100644 index 00000000000..ac3aefb3e81 --- /dev/null +++ b/regression/cbmc/if-conversion-clobber/main.c @@ -0,0 +1,26 @@ +// If-conversion snapshots the branch condition before linearising a region. +// This is required for soundness when an earlier assignment in the region +// overwrites a variable that the condition reads: the generated conditional +// expressions must all use the condition's value as it was at the branch, not +// a value clobbered part-way through the region. +// +// Here the branch is taken (x = 1 > 0), so the original program sets y = 7. +// Without snapshotting, the rewrite of `y = 7` would re-evaluate `x > 0` after +// `x` had been set to -5, wrongly leaving y at 9 and reporting a spurious +// counterexample under --paths. + +int main(void) +{ + int x = 1; + int y = 9; + + if(x > 0) + { + x = -5; + y = 7; + } + + __CPROVER_assert(y == 7, "the branch is taken, so y is 7"); + + return 0; +} diff --git a/regression/cbmc/if-conversion-clobber/test.desc b/regression/cbmc/if-conversion-clobber/test.desc new file mode 100644 index 00000000000..b6345125511 --- /dev/null +++ b/regression/cbmc/if-conversion-clobber/test.desc @@ -0,0 +1,13 @@ +CORE +main.c +--paths lifo +^EXIT=0$ +^SIGNAL=0$ +^VERIFICATION SUCCESSFUL$ +-- +^warning: ignoring +-- +If-conversion must snapshot the branch condition before linearising a region, +so that a guarded assignment does not re-read a variable that an earlier +assignment in the same region has clobbered. Without the snapshot this test +reports a spurious VERIFICATION FAILED under --paths. diff --git a/regression/cbmc/if-conversion-deref/main.c b/regression/cbmc/if-conversion-deref/main.c new file mode 100644 index 00000000000..41495896011 --- /dev/null +++ b/regression/cbmc/if-conversion-deref/main.c @@ -0,0 +1,20 @@ +// Soundness check for if-conversion: rewriting `if(p) x = *p;` into +// `x = p ? *p : x` must keep the dereference guarded by the condition, so +// that the NULL-pointer check does not fire on the path where the branch is +// not taken. With p constrained to be NULL, the original program never +// dereferences and verification succeeds; an unsound conversion that +// evaluates `*p` unconditionally would report a spurious NULL dereference. + +int main(void) +{ + int *p; + __CPROVER_assume(p == (int *)0); + + int x = 0; + if(p != (int *)0) + x = *p; + + __CPROVER_assert(x == 0, "x is unchanged when p is NULL"); + + return 0; +} diff --git a/regression/cbmc/if-conversion-deref/test.desc b/regression/cbmc/if-conversion-deref/test.desc new file mode 100644 index 00000000000..b466f5a98a0 --- /dev/null +++ b/regression/cbmc/if-conversion-deref/test.desc @@ -0,0 +1,15 @@ +CORE +main.c +--paths lifo --pointer-check +^EXIT=0$ +^SIGNAL=0$ +^VERIFICATION SUCCESSFUL$ +-- +^warning: ignoring +^\[main.pointer_dereference.\d+\] .*FAILURE$ +-- +Soundness of if-conversion: the dereference in `if(p) x = *p;` must stay +guarded by `p != NULL` after being rewritten to a conditional expression, so +no spurious NULL dereference is reported on the path where p is NULL. goto_check +guards the operand checks of the generated `?:` by its condition, which this +test confirms end-to-end under --paths. diff --git a/regression/cbmc/if-conversion-paths/main.c b/regression/cbmc/if-conversion-paths/main.c new file mode 100644 index 00000000000..a1cf469e7f2 --- /dev/null +++ b/regression/cbmc/if-conversion-paths/main.c @@ -0,0 +1,49 @@ +// If-conversion linearises side-effect-free conditional assignments so that +// --paths mode does not fork at them. The 16 independent conditional +// assignments below would create 2^16 = 65536 symbolic-execution paths +// without the transformation (timing out under the harness limit); with it +// they become 16 branch-free guarded assignments and a single path remains. +// Reverting the pass is therefore observable here as a timeout. + +int main(void) +{ + unsigned int cond; // nondeterministic + int g = 0; + + if(cond & (1u << 0)) + g = 1; + if(cond & (1u << 1)) + g = 2; + if(cond & (1u << 2)) + g = 3; + if(cond & (1u << 3)) + g = 4; + if(cond & (1u << 4)) + g = 5; + if(cond & (1u << 5)) + g = 6; + if(cond & (1u << 6)) + g = 7; + if(cond & (1u << 7)) + g = 8; + if(cond & (1u << 8)) + g = 9; + if(cond & (1u << 9)) + g = 10; + if(cond & (1u << 10)) + g = 11; + if(cond & (1u << 11)) + g = 12; + if(cond & (1u << 12)) + g = 13; + if(cond & (1u << 13)) + g = 14; + if(cond & (1u << 14)) + g = 15; + if(cond & (1u << 15)) + g = 16; + + __CPROVER_assert(g >= 0 && g <= 16, "g stays within bounds"); + + return 0; +} diff --git a/regression/cbmc/if-conversion-paths/test.desc b/regression/cbmc/if-conversion-paths/test.desc new file mode 100644 index 00000000000..21b8f72b537 --- /dev/null +++ b/regression/cbmc/if-conversion-paths/test.desc @@ -0,0 +1,13 @@ +CORE +main.c +--paths lifo +^EXIT=0$ +^SIGNAL=0$ +^VERIFICATION SUCCESSFUL$ +-- +^warning: ignoring +-- +Side-effect-free conditional assignments are linearised by if-conversion so +that --paths mode does not fork at them. Without the pass the 16 independent +branches here fork into 2^16 paths and the test times out; with it a single +path remains. This pins the optimisation against accidental removal. diff --git a/regression/cbmc/reachability-slice/test.desc b/regression/cbmc/reachability-slice/test.desc index e9300891a8e..47efa6cf0d4 100644 --- a/regression/cbmc/reachability-slice/test.desc +++ b/regression/cbmc/reachability-slice/test.desc @@ -1,4 +1,4 @@ -CORE +CORE paths-lifo-expected-failure test.c --no-standard-checks --reachability-slice --show-goto-functions --cover location --property foo.coverage.2 ^EXIT=0$ @@ -9,3 +9,7 @@ test.c = 1005 -- We do not include 1002 and 1003, whether this is hit depends on where assertion is put + +Tagged paths-lifo-expected-failure: under --paths the if-conversion pass +removes branches, which changes the --cover location output; the slicing +behaviour under test is unaffected. diff --git a/regression/cbmc/reachability-slice/test2.desc b/regression/cbmc/reachability-slice/test2.desc index 8810c829219..f38a22ab910 100644 --- a/regression/cbmc/reachability-slice/test2.desc +++ b/regression/cbmc/reachability-slice/test2.desc @@ -1,4 +1,4 @@ -CORE +CORE paths-lifo-expected-failure test.c --no-standard-checks --reachability-slice-fb --show-goto-functions --cover location --property foo.coverage.2 ^EXIT=0$ @@ -9,3 +9,7 @@ test.c = 1005 -- = 1004 +-- +Tagged paths-lifo-expected-failure: under --paths the if-conversion pass +removes branches, which changes the --cover location output; the slicing +behaviour under test is unaffected. diff --git a/regression/cbmc/reachability-slice/test3.desc b/regression/cbmc/reachability-slice/test3.desc index 49191de1d92..a70e223945b 100644 --- a/regression/cbmc/reachability-slice/test3.desc +++ b/regression/cbmc/reachability-slice/test3.desc @@ -1,4 +1,4 @@ -CORE +CORE paths-lifo-expected-failure test.c --reachability-slice --show-goto-functions --cover location ^EXIT=0$ @@ -10,3 +10,7 @@ test.c -- -- We do not include 1005 since it might or might not be present based on where the assertion is in the block. + +Tagged paths-lifo-expected-failure: under --paths the if-conversion pass +removes branches, which changes the --cover location output; the slicing +behaviour under test is unaffected. diff --git a/scripts/check_help.sh b/scripts/check_help.sh index d93db1bdb5c..d2ed3d35091 100755 --- a/scripts/check_help.sh +++ b/scripts/check_help.sh @@ -74,6 +74,7 @@ for t in \ -all-claims -all-properties -claim -show-claims \ -document-subgoals \ -no-propagation -no-simplify -no-simplify-if \ + -no-if-conversion \ -floatbv -no-unwinding-assertions \ -slice-by-trace ; do echo "$undoc" >> help_string diff --git a/src/cbmc/cbmc_parse_options.cpp b/src/cbmc/cbmc_parse_options.cpp index c715e89b491..3097e69b750 100644 --- a/src/cbmc/cbmc_parse_options.cpp +++ b/src/cbmc/cbmc_parse_options.cpp @@ -18,6 +18,7 @@ Author: Daniel Kroening, kroening@kroening.com #include #include +#include #include #include #include @@ -94,6 +95,7 @@ void cbmc_parse_optionst::set_default_options(optionst &options) // Default true options.set_option("built-in-assertions", true); options.set_option("propagation", true); + options.set_option("if-conversion", true); options.set_option("simple-slice", true); options.set_option("simplify", true); options.set_option("show-goto-symex-steps", false); @@ -375,6 +377,11 @@ void cbmc_parse_optionst::get_command_line_options(optionst &options) if(cmdline.isset("no-propagation")) options.set_option("propagation", false); + // if-conversion of side-effect-free conditional assignments (only used in + // --paths mode, to avoid forking the symbolic-execution path tree) + if(cmdline.isset("no-if-conversion")) + options.set_option("if-conversion", false); + // transform self loops to assumptions options.set_option( "self-loops-to-assumptions", @@ -900,6 +907,17 @@ bool cbmc_parse_optionst::process_goto_program( goto_model, log.get_message_handler(), cprover_c_library_factory); } + // In --paths mode every GOTO branch forks the path tree. Replace simple + // conditional assignments (in the user program and the just-linked CPROVER + // library alike) with branch-free guarded assignments first. This must + // happen before goto_check (run by ::process_goto_program below) so that the + // safety checks of the generated conditional expressions stay guarded by the + // branch condition. + if( + options.get_bool_option("paths") && + options.get_bool_option("if-conversion")) + if_conversion(goto_model, log.get_message_handler()); + // Common removal of types and complex constructs if(::process_goto_program(goto_model, options, log)) return true; diff --git a/src/cbmc/cbmc_parse_options.h b/src/cbmc/cbmc_parse_options.h index a5890bfb88d..037a0733d41 100644 --- a/src/cbmc/cbmc_parse_options.h +++ b/src/cbmc/cbmc_parse_options.h @@ -40,6 +40,7 @@ class optionst; "(no-simplify)(full-slice)" \ OPT_REACHABILITY_SLICER \ "(no-propagation)(no-simplify-if)" \ + "(no-if-conversion)" \ "(document-subgoals)(test-preprocessor)" \ "(show-array-constraints)" \ OPT_CONFIG_C_CPP \ diff --git a/src/goto-programs/Makefile b/src/goto-programs/Makefile index bacc92ac152..bc645ff02de 100644 --- a/src/goto-programs/Makefile +++ b/src/goto-programs/Makefile @@ -13,6 +13,7 @@ SRC = adjust_float_expressions.cpp \ goto_program.cpp \ goto_trace.cpp \ graphml_witness.cpp \ + if_conversion.cpp \ initialize_goto_model.cpp \ instrument_preconditions.cpp \ interpreter.cpp \ diff --git a/src/goto-programs/if_conversion.cpp b/src/goto-programs/if_conversion.cpp new file mode 100644 index 00000000000..90fe88565a6 --- /dev/null +++ b/src/goto-programs/if_conversion.cpp @@ -0,0 +1,429 @@ +/*******************************************************************\ + +Module: If-conversion of side-effect-free conditional assignments + +Author: Daniel Kroening, kroening@kroening.com + +\*******************************************************************/ + +/// \file +/// If-conversion of side-effect-free conditional assignments + +#include "if_conversion.h" + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "goto_model.h" +#include "remove_skip.h" + +#include +#include +#include + +/// An instruction may appear in a region that we linearise iff it neither +/// transfers control nor has effects beyond a plain assignment or a scope +/// marker. In particular function calls, assertions/assumptions, atomic +/// sections and (non-structural) jumps are excluded: linearising them would +/// be unsound or change semantics. DECL/DEAD are permitted (they only widen +/// the scope of branch-local variables, which is harmless once the guard has +/// been snapshotted). +/// +/// In addition, an assignment whose left-hand side contains a dereference or +/// an array index is excluded: linearising `if(c) *p = v;` into +/// `*p = c ? v : *p` (or `a[i] = c ? v : a[i]`) would evaluate the address +/// `*p` / `a[i]` unconditionally, performing the dereference or array-bounds +/// access even when the guard does not hold. That both changes semantics and +/// defeats the precision the guard provided (null/bounds safety). +static bool +is_region_instruction(const goto_programt::instructiont &instruction) +{ + if(instruction.is_assign()) + { + const exprt &lhs = instruction.assign_lhs(); + return !has_subexpr(lhs, ID_dereference) && !has_subexpr(lhs, ID_index); + } + + return instruction.is_skip() || instruction.is_decl() || + instruction.is_dead(); +} + +namespace +{ +/// Create a fresh static boolean to hold a snapshot of a branch condition. +/// A static lifetime avoids the need for DECL/DEAD bookkeeping; the variable +/// is always assigned immediately before it is read, so its (uninitialised) +/// global value is never observed. +symbol_exprt fresh_guard_symbol( + symbol_table_baset &symbol_table, + const irep_idt &mode, + const source_locationt &source_location, + std::size_t &counter) +{ + irep_idt base_name = "if_conversion_guard$" + std::to_string(counter++); + irep_idt name = CPROVER_PREFIX + id2string(base_name); + + symbolt symbol{name, bool_typet{}, mode}; + symbol.base_name = base_name; + symbol.pretty_name = base_name; + symbol.is_static_lifetime = true; + symbol.is_lvalue = true; + symbol.is_state_var = true; + symbol.is_file_local = true; + symbol.location = source_location; + + symbol_table.insert(std::move(symbol)); + + return symbol_exprt{name, bool_typet{}}; +} +} // namespace + +std::size_t if_conversion( + goto_programt &goto_program, + symbol_table_baset &symbol_table, + const irep_idt &mode, + const namespacet &ns) +{ + // Map each instruction to the set of instructions that jump to it, so that + // we can ensure that the branch we remove is the only way into the region we + // linearise (and that the branch itself is not a jump target). Built once; + // it is treated conservatively (a stale entry only prevents a later + // conversion, it never enables an unsound one). + std::map< + const goto_programt::instructiont *, + std::set> + incoming; + for(auto it = goto_program.instructions.begin(); + it != goto_program.instructions.end(); + ++it) + { + for(const auto &t : it->targets) + incoming[&*t].insert(&*it); + } + + const auto externally_entered = + [&incoming]( + goto_programt::targett p, + const goto_programt::instructiont *branch, + const goto_programt::instructiont *structural) + { + const auto m = incoming.find(&*p); + if(m == incoming.end()) + return false; + for(const auto *src : m->second) + if(src != branch && src != structural) + return true; + return false; + }; + + std::size_t converted = 0; + std::size_t guard_counter = 0; + + for(auto it = goto_program.instructions.begin(); + it != goto_program.instructions.end(); + ++it) + { + // candidate: a conditional GOTO with a single, forward target + if(!it->is_goto() || it->targets.size() != 1) + continue; + + const exprt guard = it->condition(); + if(guard.is_true() || guard.is_false()) + continue; + + // the branch itself must not be a jump target, so that the guard snapshot + // we insert just before it is reached on every entry + if(incoming.find(&*it) != incoming.end()) + continue; + + const goto_programt::targett target = it->get_target(); + + // the then-region is [then_begin, target); require target to be reachable + // forwards (otherwise this is a back-edge / loop) + const goto_programt::targett then_begin = std::next(it); + std::vector region1; + bool forward = false; + for(auto p = then_begin; p != goto_program.instructions.end(); ++p) + { + if(p == target) + { + forward = true; + break; + } + region1.push_back(p); + } + if(!forward) + continue; + + // if-else shape: the then-region ends in an unconditional forward GOTO + // whose target (the join) lies beyond the else-entry `target` + goto_programt::targett structural_goto = goto_program.instructions.end(); + std::vector region2; + bool is_if_else = false; + if( + !region1.empty() && region1.back()->is_goto() && + region1.back()->condition().is_true() && + region1.back()->targets.size() == 1) + { + const goto_programt::targett join = region1.back()->get_target(); + bool join_forward = false; + for(auto p = target; p != goto_program.instructions.end(); ++p) + { + if(p == join) + { + join_forward = true; + break; + } + } + if(join_forward && join != target) + { + structural_goto = region1.back(); + region1.pop_back(); + is_if_else = true; + for(auto p = target; p != join; ++p) + region2.push_back(p); + } + } + + // both regions must consist of linearisable instructions only + const auto linearisable = + [](const std::vector ®ion) + { + for(const auto &p : region) + if(!is_region_instruction(*p)) + return false; + return true; + }; + if(!linearisable(region1) || !linearisable(region2)) + continue; + + // at least one region must contain an assignment, otherwise there is + // nothing worth linearising + const auto has_assignment = + [](const std::vector ®ion) + { + for(const auto &p : region) + if(p->is_assign()) + return true; + return false; + }; + if(!has_assignment(region1) && !has_assignment(region2)) + continue; + + // single-entry: no instruction in the region(s), nor the else-entry, may + // be the target of any jump other than the branch (and the structural + // GOTO) we are removing + const goto_programt::instructiont *const structural_ptr = + is_if_else ? &*structural_goto : nullptr; + bool external_entry = false; + for(const auto &p : region1) + external_entry |= externally_entered(p, &*it, structural_ptr); + for(const auto &p : region2) + external_entry |= externally_entered(p, &*it, structural_ptr); + if(is_if_else) + external_entry |= externally_entered(target, &*it, structural_ptr); + if(external_entry) + continue; + + // Snapshotting the guard into a fresh variable is necessary when the guard + // reads a variable that the region writes or kills: otherwise re-evaluating + // the guard at each converted assignment could observe a clobbered value, + // or read a now-DEAD condition temporary. When the guard does not depend on + // anything the region touches we can use the original condition directly, + // which keeps the conditional assignments analysable downstream (e.g. it + // preserves `p != null` guards for local safe pointer analysis). + const find_symbols_sett guard_symbols = find_symbol_identifiers(guard); + bool guard_depends_on_region = false; + const auto note_region_writes = [&guard_symbols, &guard_depends_on_region]( + const goto_programt::targett &p) + { + if(p->is_assign()) + { + for(const irep_idt &id : find_symbol_identifiers(p->assign_lhs())) + if(guard_symbols.count(id) != 0) + guard_depends_on_region = true; + } + else if(p->is_decl()) + { + if(guard_symbols.count(p->decl_symbol().get_identifier()) != 0) + guard_depends_on_region = true; + } + else if(p->is_dead()) + { + if(guard_symbols.count(p->dead_symbol().get_identifier()) != 0) + guard_depends_on_region = true; + } + }; + for(const auto &p : region1) + note_region_writes(p); + for(const auto &p : region2) + note_region_writes(p); + + // For an if/else, the else-region is linearised after the then-region, so + // converting each region independently into a self-referential conditional + // assignment `x = c ? e : x` makes an else-assignment that reads a variable + // the then-region wrote nest (after SSA) into `x = !c ? b : (c ? a : x)`. + // That defeats constant propagation of `x`: a variable that was concrete on + // each original branch becomes a symbolic value, and in --paths mode that + // can drive downstream branches/loops into an explosion of paths (see + // regression/cbmc/return4). When both regions are a single assignment to the + // same variable we instead emit one combined ternary `x = c ? a : b`, which + // simplify can fold (e.g. `c ? 1 : 1` to `1`); messier overlaps are left + // unconverted. Regions that do not interfere keep the per-region form. + goto_programt::targett combine_then = goto_program.instructions.end(); + goto_programt::targett combine_else = goto_program.instructions.end(); + bool combine = false; + if(is_if_else) + { + find_symbols_sett region1_writes; + for(const auto &p : region1) + { + if(p->is_assign()) + { + for(const irep_idt &id : find_symbol_identifiers(p->assign_lhs())) + region1_writes.insert(id); + } + else if(p->is_decl()) + region1_writes.insert(p->decl_symbol().get_identifier()); + } + + bool region2_uses_region1_write = false; + std::vector region1_assignments; + std::vector region2_assignments; + for(const auto &p : region1) + if(p->is_assign()) + region1_assignments.push_back(p); + for(const auto &p : region2) + { + if(p->is_assign()) + { + region2_assignments.push_back(p); + for(const irep_idt &id : find_symbol_identifiers(p->assign_lhs())) + if(region1_writes.count(id) != 0) + region2_uses_region1_write = true; + for(const irep_idt &id : find_symbol_identifiers(p->assign_rhs())) + if(region1_writes.count(id) != 0) + region2_uses_region1_write = true; + } + else if( + p->is_decl() && + region1_writes.count(p->decl_symbol().get_identifier()) != 0) + region2_uses_region1_write = true; + else if( + p->is_dead() && + region1_writes.count(p->dead_symbol().get_identifier()) != 0) + region2_uses_region1_write = true; + } + + if(region2_uses_region1_write) + { + if( + region1_assignments.size() == 1 && region2_assignments.size() == 1 && + region1_assignments.front()->assign_lhs() == + region2_assignments.front()->assign_lhs()) + { + combine = true; + combine_then = region1_assignments.front(); + combine_else = region2_assignments.front(); + } + else + continue; // leave the branch in place rather than create a chain + } + } + + // the then-region executes when the GOTO is *not* taken, the else-region + // when it is taken + exprt then_condition; + exprt else_condition; + if(guard_depends_on_region) + { + const symbol_exprt snapshot = fresh_guard_symbol( + symbol_table, mode, it->source_location(), guard_counter); + goto_program.insert_before( + it, + goto_programt::make_assignment(snapshot, guard, it->source_location())); + then_condition = boolean_negate(snapshot); + else_condition = snapshot; + } + else + { + then_condition = boolean_negate(guard); + else_condition = guard; + } + simplify(then_condition, ns); + + if(combine) + { + // x = c ? then_rhs : else_rhs, replacing the matching assignment pair; + // the else-assignment becomes a skip + exprt combined = if_exprt{ + then_condition, combine_then->assign_rhs(), combine_else->assign_rhs()}; + simplify(combined, ns); + combine_then->assign_rhs_nonconst() = std::move(combined); + *combine_else = goto_programt::make_skip(combine_else->source_location()); + } + else + { + for(const auto &p : region1) + { + if(!p->is_assign()) + continue; + const exprt lhs = p->assign_lhs(); + const exprt rhs = p->assign_rhs(); + p->assign_rhs_nonconst() = if_exprt{then_condition, rhs, lhs}; + } + for(const auto &p : region2) + { + if(!p->is_assign()) + continue; + const exprt lhs = p->assign_lhs(); + const exprt rhs = p->assign_rhs(); + p->assign_rhs_nonconst() = if_exprt{else_condition, rhs, lhs}; + } + } + + // drop the branch(es) + *it = goto_programt::make_skip(it->source_location()); + if(is_if_else) + *structural_goto = + goto_programt::make_skip(structural_goto->source_location()); + + ++converted; + } + + return converted; +} + +std::size_t +if_conversion(goto_modelt &goto_model, message_handlert &message_handler) +{ + const namespacet ns(goto_model.symbol_table); + std::size_t converted = 0; + + for(auto &gf : goto_model.goto_functions.function_map) + { + const symbolt *fn = goto_model.symbol_table.lookup(gf.first); + const irep_idt mode = fn != nullptr ? fn->mode : ID_C; + converted += + if_conversion(gf.second.body, goto_model.symbol_table, mode, ns); + } + + if(converted != 0) + { + remove_skip(goto_model); + goto_model.goto_functions.update(); + + messaget log(message_handler); + log.statistics() << "if-conversion replaced " << converted + << " branch(es) with conditional assignments" + << messaget::eom; + } + + return converted; +} diff --git a/src/goto-programs/if_conversion.h b/src/goto-programs/if_conversion.h new file mode 100644 index 00000000000..1331c6b7682 --- /dev/null +++ b/src/goto-programs/if_conversion.h @@ -0,0 +1,59 @@ +/*******************************************************************\ + +Module: If-conversion of side-effect-free conditional assignments + +Author: Daniel Kroening, kroening@kroening.com + +\*******************************************************************/ + +/// \file +/// If-conversion of side-effect-free conditional assignments +/// +/// Replace control-flow branches whose then/else regions consist solely of +/// assignments with branch-free guarded (conditional-expression) assignments, +/// e.g. rewrite the GOTO program for +/// \code +/// if(cond) x = e; +/// \endcode +/// into +/// \code +/// x = cond ? e : x; +/// \endcode +/// removing the GOTO branch. This avoids forking the symbolic-execution path +/// tree in `--paths` mode at trivial control flow (such as the bookkeeping in +/// the built-in CPROVER library), which would otherwise cause exponential +/// path explosion. +/// +/// The transformation must run before goto_check, so that the per-operand +/// safety checks of the generated conditional expressions remain guarded by +/// the branch condition (see goto_check_ct::check_rec_if); it is therefore +/// sound even when the assignments dereference pointers, divide, or overflow. + +#ifndef CPROVER_GOTO_PROGRAMS_IF_CONVERSION_H +#define CPROVER_GOTO_PROGRAMS_IF_CONVERSION_H + +#include + +#include + +class goto_modelt; +class goto_programt; +class message_handlert; +class namespacet; +class symbol_table_baset; + +/// Apply if-conversion to all functions of \p goto_model. \return the number +/// of branches that were removed. +std::size_t if_conversion(goto_modelt &goto_model, message_handlert &); + +/// Apply if-conversion to a single \p goto_program, creating any fresh guard +/// variables in \p symbol_table with mode \p mode. Does not run remove_skip or +/// update targets; the caller is responsible for doing so. \return the number +/// of branches that were removed. +std::size_t if_conversion( + goto_programt &goto_program, + symbol_table_baset &symbol_table, + const irep_idt &mode, + const namespacet &ns); + +#endif // CPROVER_GOTO_PROGRAMS_IF_CONVERSION_H diff --git a/unit/Makefile b/unit/Makefile index ad783f64f8e..ce0b384386e 100644 --- a/unit/Makefile +++ b/unit/Makefile @@ -72,6 +72,7 @@ SRC += analyses/ai/ai.cpp \ goto-programs/goto_program_table_consistency.cpp \ goto-programs/goto_program_validate.cpp \ goto-programs/goto_trace_output.cpp \ + goto-programs/if_conversion.cpp \ goto-programs/is_goto_binary.cpp \ goto-programs/label_function_pointer_call_sites.cpp \ goto-programs/osx_fat_reader.cpp \ diff --git a/unit/goto-programs/if_conversion.cpp b/unit/goto-programs/if_conversion.cpp new file mode 100644 index 00000000000..7cc1ea4710c --- /dev/null +++ b/unit/goto-programs/if_conversion.cpp @@ -0,0 +1,174 @@ +/*******************************************************************\ + +Module: Unit tests for if_conversion + +Author: Daniel Kroening, kroening@kroening.com + +\*******************************************************************/ + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include + +static std::size_t count_gotos(const goto_programt &p) +{ + std::size_t n = 0; + for(const auto &i : p.instructions) + if(i.is_goto()) + ++n; + return n; +} + +static bool has_conditional_assignment(const goto_programt &p) +{ + for(const auto &i : p.instructions) + if(i.is_assign() && i.assign_rhs().id() == ID_if) + return true; + return false; +} + +SCENARIO("if_conversion", "[core][goto-programs][if_conversion]") +{ + const signedbv_typet int_type{32}; + symbol_tablet symbol_table; + symbolt x_symbol{"x", int_type, ID_C}; + symbol_table.insert(x_symbol); + const symbol_exprt x = x_symbol.symbol_expr(); + symbolt c_symbol{"c", bool_typet{}, ID_C}; + symbol_table.insert(c_symbol); + const symbol_exprt c = c_symbol.symbol_expr(); + const namespacet ns(symbol_table); + + GIVEN("a conditional assignment `if(c) x = 1;`") + { + goto_programt p; + p.add(goto_programt::make_assignment(x, from_integer(1, int_type))); + const auto join = p.add(goto_programt::make_skip()); + p.add(goto_programt::make_end_function()); + // jump over the assignment when !c (so the assignment runs when c) + p.insert_before( + p.instructions.begin(), goto_programt::make_goto(join, not_exprt{c})); + p.update(); + + REQUIRE(count_gotos(p) == 1); + + WHEN("if-conversion is applied") + { + const std::size_t converted = if_conversion(p, symbol_table, ID_C, ns); + + THEN("the branch is replaced by a guarded assignment") + { + REQUIRE(converted == 1); + REQUIRE(count_gotos(p) == 0); + REQUIRE(has_conditional_assignment(p)); + } + } + } + + GIVEN("a conditional region containing a function call") + { + symbolt f_symbol{"f", code_typet{{}, empty_typet{}}, ID_C}; + symbol_table.insert(f_symbol); + const namespacet ns2(symbol_table); + + goto_programt p; + p.add(goto_programt::make_function_call( + code_function_callt{f_symbol.symbol_expr(), {}})); + p.add(goto_programt::make_assignment(x, from_integer(1, int_type))); + const auto join = p.add(goto_programt::make_skip()); + p.add(goto_programt::make_end_function()); + p.insert_before( + p.instructions.begin(), goto_programt::make_goto(join, not_exprt{c})); + p.update(); + + WHEN("if-conversion is applied") + { + const std::size_t converted = if_conversion(p, symbol_table, ID_C, ns2); + + THEN("the region is not linearisable and the branch is kept") + { + REQUIRE(converted == 0); + REQUIRE(count_gotos(p) == 1); + REQUIRE_FALSE(has_conditional_assignment(p)); + } + } + } + + // Build the GOTO for `if(c) x = a; else x = b;`: + // IF !c GOTO else + // x = a + // GOTO join + // else: x = b + // join: skip + const auto build_if_else = + [&](goto_programt &p, const exprt &a, const exprt &b) + { + const auto then_assign = p.add(goto_programt::make_assignment(x, a)); + const auto then_goto = p.add(goto_programt::make_goto(then_assign)); + const auto else_assign = p.add(goto_programt::make_assignment(x, b)); + const auto join = p.add(goto_programt::make_skip()); + p.add(goto_programt::make_end_function()); + then_goto->set_target(join); + p.insert_before( + then_assign, goto_programt::make_goto(else_assign, not_exprt{c})); + p.update(); + }; + + GIVEN("an if/else assigning the same variable the same value") + { + // if(c) x = 1; else x = 1; + goto_programt p; + build_if_else(p, from_integer(1, int_type), from_integer(1, int_type)); + REQUIRE(count_gotos(p) == 2); + + WHEN("if-conversion is applied") + { + const std::size_t converted = if_conversion(p, symbol_table, ID_C, ns); + + THEN("both branches collapse into a single constant assignment") + { + REQUIRE(converted == 1); + REQUIRE(count_gotos(p) == 0); + // c ? 1 : 1 simplifies to the constant 1; no symbolic conditional is + // left to defeat constant propagation (regression for return4). + REQUIRE_FALSE(has_conditional_assignment(p)); + } + } + } + + GIVEN("an if/else assigning the same variable different values") + { + // if(c) x = 1; else x = 2; + goto_programt p; + build_if_else(p, from_integer(1, int_type), from_integer(2, int_type)); + REQUIRE(count_gotos(p) == 2); + + WHEN("if-conversion is applied") + { + const std::size_t converted = if_conversion(p, symbol_table, ID_C, ns); + + THEN("the pair becomes a single guarded (ternary) assignment") + { + REQUIRE(converted == 1); + REQUIRE(count_gotos(p) == 0); + REQUIRE(has_conditional_assignment(p)); + // exactly one assignment carries the ternary; the other arm is a skip + std::size_t conditional_assignments = 0; + for(const auto &i : p.instructions) + if(i.is_assign() && i.assign_rhs().id() == ID_if) + ++conditional_assignments; + REQUIRE(conditional_assignments == 1); + } + } + } +} diff --git a/unit/path_strategies.cpp b/unit/path_strategies.cpp index b0b99b94d69..60b925f97c4 100644 --- a/unit/path_strategies.cpp +++ b/unit/path_strategies.cpp @@ -387,6 +387,10 @@ void _check_with_strategy( optionst options; cbmc_parse_optionst::set_default_options(options); + // These tests exercise path-exploration strategies, which requires the + // conditional branches in the test programs to be preserved; disable the + // if-conversion pass that would otherwise linearise them away. + options.set_option("if-conversion", false); options.set_option("assertions", true); options.set_option("assumptions", true); options.set_option("paths", true); From 4370b0fa23065753c173a39f3c589564af1a5fdd Mon Sep 17 00:00:00 2001 From: Michael Tautschnig Date: Sun, 14 Jun 2026 20:37:32 +0000 Subject: [PATCH 4/4] Revert manual CPROVER-library if-to-?: edits now subsumed by if-conversion An earlier commit hand-rewrote four side-effect-free conditional assignments in the CPROVER library -- in free(), __CPROVER_deallocate(), __delete() and __delete_array() -- from `if`-statements into conditional expressions, so that --paths mode would not fork the symbolic-execution path tree on them. The if-conversion pass now performs exactly this linearisation automatically for any qualifying conditional assignment, so the manual edits are no longer necessary. Restore the original, more readable `if`-statement form; under --paths the pass rewrites these branches into guarded assignments just as the hand edits did (verified via the path-branch-malloc-free regression test, which is retained and now exercises the pass end-to-end). Co-authored-by: Kiro --- src/ansi-c/library/stdlib.c | 14 +++++--------- src/cpp/library/new.c | 15 +++++---------- 2 files changed, 10 insertions(+), 19 deletions(-) diff --git a/src/ansi-c/library/stdlib.c b/src/ansi-c/library/stdlib.c index 516e1049d73..c82b495e256 100644 --- a/src/ansi-c/library/stdlib.c +++ b/src/ansi-c/library/stdlib.c @@ -345,11 +345,9 @@ void free(void *ptr) { __CPROVER_deallocate(ptr); - // detect memory leaks. Use a conditional expression rather than an - // `if` to avoid emitting a GOTO branch, which would fork the symex - // path tree in --paths mode. - __CPROVER_memory_leak = - (__CPROVER_memory_leak == ptr) ? 0 : __CPROVER_memory_leak; + // detect memory leaks + if(__CPROVER_memory_leak==ptr) + __CPROVER_memory_leak=0; } } @@ -671,8 +669,6 @@ __CPROVER_bool __VERIFIER_nondet___CPROVER_bool(void); void __CPROVER_deallocate(void *ptr) { - // Use a conditional expression rather than an `if` to avoid emitting a - // GOTO branch, which would fork the symex path tree in --paths mode. - __CPROVER_deallocated = - __VERIFIER_nondet___CPROVER_bool() ? ptr : __CPROVER_deallocated; + if(__VERIFIER_nondet___CPROVER_bool()) + __CPROVER_deallocated = ptr; } diff --git a/src/cpp/library/new.c b/src/cpp/library/new.c index ec5bd84a250..81790944455 100644 --- a/src/cpp/library/new.c +++ b/src/cpp/library/new.c @@ -95,11 +95,9 @@ inline void __delete(void *ptr) { __CPROVER_deallocate(ptr); - // detect memory leaks. Use a conditional expression rather than an - // `if` to avoid emitting a GOTO branch, which would fork the symex - // path tree in --paths mode. - __CPROVER_memory_leak = - (__CPROVER_memory_leak == ptr) ? 0 : __CPROVER_memory_leak; + // detect memory leaks + if(__CPROVER_memory_leak==ptr) + __CPROVER_memory_leak=0; } } @@ -137,10 +135,7 @@ inline void __delete_array(void *ptr) { __CPROVER_deallocate(ptr); - // detect memory leaks. Use a conditional expression rather than an - // `if` to avoid emitting a GOTO branch, which would fork the symex - // path tree in --paths mode. - __CPROVER_memory_leak = - (__CPROVER_memory_leak == ptr) ? 0 : __CPROVER_memory_leak; + // detect memory leaks + if(__CPROVER_memory_leak==ptr) __CPROVER_memory_leak=0; } }