diff --git a/regression/cbmc/BV_Arithmetic3/test.desc b/regression/cbmc/BV_Arithmetic3/test.desc index 1d107739188..9313847500b 100644 --- a/regression/cbmc/BV_Arithmetic3/test.desc +++ b/regression/cbmc/BV_Arithmetic3/test.desc @@ -1,4 +1,4 @@ -CORE +CORE paths-lifo-expected-failure main.c --verbosity 8 ^EXIT=0$ @@ -7,3 +7,7 @@ main.c ^VERIFICATION SUCCESSFUL$ -- ^warning: ignoring +-- +Tagged paths-lifo-expected-failure: under --paths the if-conversion pass +rewrites branches into conditional assignments, which changes the number of +generated VCCs (68 vs 73); the verification result is unchanged. diff --git a/regression/cbmc/destructors/compound_literal.desc b/regression/cbmc/destructors/compound_literal.desc index d38ce20d05b..320ebd9395b 100644 --- a/regression/cbmc/destructors/compound_literal.desc +++ b/regression/cbmc/destructors/compound_literal.desc @@ -1,4 +1,4 @@ -CORE +CORE paths-lifo-expected-failure main.c --unwind 10 --show-goto-functions --no-standard-checks activate-multi-line-match @@ -20,3 +20,7 @@ Checks for: This asserts that when you've created a compound literal that both temp and real variable gets killed. + +Tagged paths-lifo-expected-failure: under --paths the if-conversion pass +rewrites branches into conditional assignments, changing the --show-goto-functions +output this test pins; the behaviour under test is unaffected. diff --git a/regression/cbmc/destructors/enter_lexical_block.desc b/regression/cbmc/destructors/enter_lexical_block.desc index f16f71c5a33..be37a9a4d24 100644 --- a/regression/cbmc/destructors/enter_lexical_block.desc +++ b/regression/cbmc/destructors/enter_lexical_block.desc @@ -1,4 +1,4 @@ -CORE +CORE paths-lifo-expected-failure main.c --unwind 10 --show-goto-functions activate-multi-line-match @@ -35,3 +35,7 @@ Checks for: This asserts that when the GOTO is going into a lexical block that destructors are not omitted. This used to be a limitation with the previous implementation and has now been fixed. + +Tagged paths-lifo-expected-failure: under --paths the if-conversion pass +rewrites branches into conditional assignments, changing the --show-goto-functions +output this test pins; the behaviour under test is unaffected. diff --git a/regression/cbmc/if-conversion-clobber/main.c b/regression/cbmc/if-conversion-clobber/main.c new file mode 100644 index 00000000000..ac3aefb3e81 --- /dev/null +++ b/regression/cbmc/if-conversion-clobber/main.c @@ -0,0 +1,26 @@ +// If-conversion snapshots the branch condition before linearising a region. +// This is required for soundness when an earlier assignment in the region +// overwrites a variable that the condition reads: the generated conditional +// expressions must all use the condition's value as it was at the branch, not +// a value clobbered part-way through the region. +// +// Here the branch is taken (x = 1 > 0), so the original program sets y = 7. +// Without snapshotting, the rewrite of `y = 7` would re-evaluate `x > 0` after +// `x` had been set to -5, wrongly leaving y at 9 and reporting a spurious +// counterexample under --paths. + +int main(void) +{ + int x = 1; + int y = 9; + + if(x > 0) + { + x = -5; + y = 7; + } + + __CPROVER_assert(y == 7, "the branch is taken, so y is 7"); + + return 0; +} diff --git a/regression/cbmc/if-conversion-clobber/test.desc b/regression/cbmc/if-conversion-clobber/test.desc new file mode 100644 index 00000000000..b6345125511 --- /dev/null +++ b/regression/cbmc/if-conversion-clobber/test.desc @@ -0,0 +1,13 @@ +CORE +main.c +--paths lifo +^EXIT=0$ +^SIGNAL=0$ +^VERIFICATION SUCCESSFUL$ +-- +^warning: ignoring +-- +If-conversion must snapshot the branch condition before linearising a region, +so that a guarded assignment does not re-read a variable that an earlier +assignment in the same region has clobbered. Without the snapshot this test +reports a spurious VERIFICATION FAILED under --paths. diff --git a/regression/cbmc/if-conversion-deref/main.c b/regression/cbmc/if-conversion-deref/main.c new file mode 100644 index 00000000000..41495896011 --- /dev/null +++ b/regression/cbmc/if-conversion-deref/main.c @@ -0,0 +1,20 @@ +// Soundness check for if-conversion: rewriting `if(p) x = *p;` into +// `x = p ? *p : x` must keep the dereference guarded by the condition, so +// that the NULL-pointer check does not fire on the path where the branch is +// not taken. With p constrained to be NULL, the original program never +// dereferences and verification succeeds; an unsound conversion that +// evaluates `*p` unconditionally would report a spurious NULL dereference. + +int main(void) +{ + int *p; + __CPROVER_assume(p == (int *)0); + + int x = 0; + if(p != (int *)0) + x = *p; + + __CPROVER_assert(x == 0, "x is unchanged when p is NULL"); + + return 0; +} diff --git a/regression/cbmc/if-conversion-deref/test.desc b/regression/cbmc/if-conversion-deref/test.desc new file mode 100644 index 00000000000..b466f5a98a0 --- /dev/null +++ b/regression/cbmc/if-conversion-deref/test.desc @@ -0,0 +1,15 @@ +CORE +main.c +--paths lifo --pointer-check +^EXIT=0$ +^SIGNAL=0$ +^VERIFICATION SUCCESSFUL$ +-- +^warning: ignoring +^\[main.pointer_dereference.\d+\] .*FAILURE$ +-- +Soundness of if-conversion: the dereference in `if(p) x = *p;` must stay +guarded by `p != NULL` after being rewritten to a conditional expression, so +no spurious NULL dereference is reported on the path where p is NULL. goto_check +guards the operand checks of the generated `?:` by its condition, which this +test confirms end-to-end under --paths. diff --git a/regression/cbmc/if-conversion-paths/main.c b/regression/cbmc/if-conversion-paths/main.c new file mode 100644 index 00000000000..a1cf469e7f2 --- /dev/null +++ b/regression/cbmc/if-conversion-paths/main.c @@ -0,0 +1,49 @@ +// If-conversion linearises side-effect-free conditional assignments so that +// --paths mode does not fork at them. The 16 independent conditional +// assignments below would create 2^16 = 65536 symbolic-execution paths +// without the transformation (timing out under the harness limit); with it +// they become 16 branch-free guarded assignments and a single path remains. +// Reverting the pass is therefore observable here as a timeout. + +int main(void) +{ + unsigned int cond; // nondeterministic + int g = 0; + + if(cond & (1u << 0)) + g = 1; + if(cond & (1u << 1)) + g = 2; + if(cond & (1u << 2)) + g = 3; + if(cond & (1u << 3)) + g = 4; + if(cond & (1u << 4)) + g = 5; + if(cond & (1u << 5)) + g = 6; + if(cond & (1u << 6)) + g = 7; + if(cond & (1u << 7)) + g = 8; + if(cond & (1u << 8)) + g = 9; + if(cond & (1u << 9)) + g = 10; + if(cond & (1u << 10)) + g = 11; + if(cond & (1u << 11)) + g = 12; + if(cond & (1u << 12)) + g = 13; + if(cond & (1u << 13)) + g = 14; + if(cond & (1u << 14)) + g = 15; + if(cond & (1u << 15)) + g = 16; + + __CPROVER_assert(g >= 0 && g <= 16, "g stays within bounds"); + + return 0; +} diff --git a/regression/cbmc/if-conversion-paths/test.desc b/regression/cbmc/if-conversion-paths/test.desc new file mode 100644 index 00000000000..21b8f72b537 --- /dev/null +++ b/regression/cbmc/if-conversion-paths/test.desc @@ -0,0 +1,13 @@ +CORE +main.c +--paths lifo +^EXIT=0$ +^SIGNAL=0$ +^VERIFICATION SUCCESSFUL$ +-- +^warning: ignoring +-- +Side-effect-free conditional assignments are linearised by if-conversion so +that --paths mode does not fork at them. Without the pass the 16 independent +branches here fork into 2^16 paths and the test times out; with it a single +path remains. This pins the optimisation against accidental removal. diff --git a/regression/cbmc/path-branch-malloc-free/main.c b/regression/cbmc/path-branch-malloc-free/main.c new file mode 100644 index 00000000000..6180c8f5be9 --- /dev/null +++ b/regression/cbmc/path-branch-malloc-free/main.c @@ -0,0 +1,39 @@ +#include + +// Each `free` call previously triggered two GOTO branches in the CPROVER +// library bookkeeping (one in `free` itself for the memory-leak update and +// one in `__CPROVER_deallocate` for the nondeterministic deallocation +// tracking). Under `--paths lifo`, those branches multiplied the symex +// state space by ~4 per call, so a handful of `malloc`/`free` pairs caused +// exponential path explosion and reliably timed out before the bookkeeping +// was rewritten as conditional expressions. With the rewrite this same +// program runs in seconds; without it, ctest's 1200s per-profile timeout +// is comfortably exceeded. +int main(void) +{ + void *p0 = malloc(8); + void *p1 = malloc(8); + void *p2 = malloc(8); + void *p3 = malloc(8); + void *p4 = malloc(8); + void *p5 = malloc(8); + void *p6 = malloc(8); + void *p7 = malloc(8); + void *p8 = malloc(8); + void *p9 = malloc(8); + void *pA = malloc(8); + + free(p0); + free(p1); + free(p2); + free(p3); + free(p4); + free(p5); + free(p6); + free(p7); + free(p8); + free(p9); + free(pA); + + return 0; +} diff --git a/regression/cbmc/path-branch-malloc-free/test.desc b/regression/cbmc/path-branch-malloc-free/test.desc new file mode 100644 index 00000000000..1b3062b1fe8 --- /dev/null +++ b/regression/cbmc/path-branch-malloc-free/test.desc @@ -0,0 +1,16 @@ +CORE +main.c +--paths lifo +^EXIT=0$ +^SIGNAL=0$ +^VERIFICATION SUCCESSFUL$ +-- +^warning: ignoring +-- +This test pins down the CPROVER-library `--paths`-friendliness of `free` and +`__CPROVER_deallocate`: the bookkeeping in those two functions used to emit +GOTO branches that, under `--paths lifo`, multiplied the symex state space +exponentially with the number of `free` calls. With those branches replaced +by conditional expressions, this test runs in a handful of paths instead of +4^N. Without the fix it would time out at the harness limit, so reverting +the optimisation is observable here. diff --git a/regression/cbmc/reachability-slice/test.desc b/regression/cbmc/reachability-slice/test.desc index e9300891a8e..47efa6cf0d4 100644 --- a/regression/cbmc/reachability-slice/test.desc +++ b/regression/cbmc/reachability-slice/test.desc @@ -1,4 +1,4 @@ -CORE +CORE paths-lifo-expected-failure test.c --no-standard-checks --reachability-slice --show-goto-functions --cover location --property foo.coverage.2 ^EXIT=0$ @@ -9,3 +9,7 @@ test.c = 1005 -- We do not include 1002 and 1003, whether this is hit depends on where assertion is put + +Tagged paths-lifo-expected-failure: under --paths the if-conversion pass +removes branches, which changes the --cover location output; the slicing +behaviour under test is unaffected. diff --git a/regression/cbmc/reachability-slice/test2.desc b/regression/cbmc/reachability-slice/test2.desc index 8810c829219..f38a22ab910 100644 --- a/regression/cbmc/reachability-slice/test2.desc +++ b/regression/cbmc/reachability-slice/test2.desc @@ -1,4 +1,4 @@ -CORE +CORE paths-lifo-expected-failure test.c --no-standard-checks --reachability-slice-fb --show-goto-functions --cover location --property foo.coverage.2 ^EXIT=0$ @@ -9,3 +9,7 @@ test.c = 1005 -- = 1004 +-- +Tagged paths-lifo-expected-failure: under --paths the if-conversion pass +removes branches, which changes the --cover location output; the slicing +behaviour under test is unaffected. diff --git a/regression/cbmc/reachability-slice/test3.desc b/regression/cbmc/reachability-slice/test3.desc index 49191de1d92..a70e223945b 100644 --- a/regression/cbmc/reachability-slice/test3.desc +++ b/regression/cbmc/reachability-slice/test3.desc @@ -1,4 +1,4 @@ -CORE +CORE paths-lifo-expected-failure test.c --reachability-slice --show-goto-functions --cover location ^EXIT=0$ @@ -10,3 +10,7 @@ test.c -- -- We do not include 1005 since it might or might not be present based on where the assertion is in the block. + +Tagged paths-lifo-expected-failure: under --paths the if-conversion pass +removes branches, which changes the --cover location output; the slicing +behaviour under test is unaffected. diff --git a/scripts/check_help.sh b/scripts/check_help.sh index d93db1bdb5c..d2ed3d35091 100755 --- a/scripts/check_help.sh +++ b/scripts/check_help.sh @@ -74,6 +74,7 @@ for t in \ -all-claims -all-properties -claim -show-claims \ -document-subgoals \ -no-propagation -no-simplify -no-simplify-if \ + -no-if-conversion \ -floatbv -no-unwinding-assertions \ -slice-by-trace ; do echo "$undoc" >> help_string diff --git a/src/analyses/local_safe_pointers.cpp b/src/analyses/local_safe_pointers.cpp index a67816bce45..58c9fd8831f 100644 --- a/src/analyses/local_safe_pointers.cpp +++ b/src/analyses/local_safe_pointers.cpp @@ -14,6 +14,8 @@ Author: Diffblue Ltd #include #include #include +#include +#include /// Return structure for `get_null_checked_expr` and /// `get_conditional_checked_expr` @@ -74,6 +76,43 @@ static std::optional get_null_checked_expr(const exprt &expr) return {}; } +/// Collect dereferences that are guarded by a conditional null-check in +/// `expr`, i.e. a dereference `*p` in the operand of `(p != null) ? ... : ...` +/// (the true operand) or `(p == null) ? ... : ...` (the false operand) that is +/// only evaluated when `p` is known to be non-null. Such conditional +/// expressions are produced by if-conversion in place of `if(p != null) *p`. +/// The collected dereferences are appended to `result`. +static void +collect_if_guarded_dereferences(const exprt &expr, std::vector &result) +{ + if(const auto *if_expr = expr_try_dynamic_cast(expr)) + { + if(auto check = get_null_checked_expr(if_expr->cond())) + { + // The guarded operand is the one evaluated when the pointer is non-null. + const exprt &guarded_operand = check->checked_when_taken + ? if_expr->true_case() + : if_expr->false_case(); + const exprt checked_pointer = skip_typecast(check->checked_expr); + for(auto it = guarded_operand.depth_begin(), + end = guarded_operand.depth_end(); + it != end; + ++it) + { + if( + it->id() == ID_dereference && + skip_typecast(to_dereference_expr(*it).pointer()) == checked_pointer) + { + result.push_back(*it); + } + } + } + } + + for(const auto &op : expr.operands()) + collect_if_guarded_dereferences(op, result); +} + /// Compute safe dereference expressions for a given GOTO program. This /// populates `non_null_expressions` mapping instruction location numbers /// onto a set of expressions that are known to be non-null BEFORE that @@ -85,6 +124,22 @@ void local_safe_pointerst::operator()(const goto_programt &goto_program) for(const auto &instruction : goto_program.instructions) { + // Record dereferences guarded by a conditional null-check within this + // instruction (e.g. `(p != null) ? *p : x`, as produced by if-conversion). + // This is independent of the control-flow tracking below. + { + std::vector guarded; + instruction.apply([&guarded](const exprt &e) + { collect_if_guarded_dereferences(e, guarded); }); + if(!guarded.empty()) + { + auto &guarded_set = + if_guarded_dereferences[instruction.location_number]; + for(auto &deref : guarded) + guarded_set.insert(std::move(deref)); + } + } + // Handle control-flow convergence pessimistically: if(instruction.incoming_edges.size() > 1) checked_expressions.clear(); diff --git a/src/analyses/local_safe_pointers.h b/src/analyses/local_safe_pointers.h index 497f06beadb..595cca1296b 100644 --- a/src/analyses/local_safe_pointers.h +++ b/src/analyses/local_safe_pointers.h @@ -12,11 +12,12 @@ Author: Diffblue Ltd #ifndef CPROVER_ANALYSES_LOCAL_SAFE_POINTERS_H #define CPROVER_ANALYSES_LOCAL_SAFE_POINTERS_H -#include - #include +#include + #include +#include /// A very simple, cheap analysis to determine when dereference operations are /// trivially guarded by a check against a null pointer access. @@ -38,6 +39,14 @@ class local_safe_pointerst std::map> non_null_expressions; + /// Dereferences that are guarded by a conditional null-check appearing in the + /// same instruction, e.g. `(p != null) ? *p : x` (such conditional + /// expressions are produced by if-conversion in place of `if(p != null) *p`). + /// Mapped from instruction location number to the set of safe + /// `dereference_exprt`s, matched exactly. + std::map> + if_guarded_dereferences; + public: void operator()(const goto_programt &goto_program); @@ -48,7 +57,14 @@ class local_safe_pointerst const dereference_exprt &deref, goto_programt::const_targett program_point) { - return is_non_null_at_program_point(deref.op(), program_point); + if(is_non_null_at_program_point(deref.op(), program_point)) + return true; + + // The pointer may instead be guarded by a conditional null-check within + // this very instruction, e.g. `(p != null) ? *p : x`. + auto findit = if_guarded_dereferences.find(program_point->location_number); + return findit != if_guarded_dereferences.end() && + findit->second.find(deref) != findit->second.end(); } void output( diff --git a/src/cbmc/cbmc_parse_options.cpp b/src/cbmc/cbmc_parse_options.cpp index c715e89b491..3097e69b750 100644 --- a/src/cbmc/cbmc_parse_options.cpp +++ b/src/cbmc/cbmc_parse_options.cpp @@ -18,6 +18,7 @@ Author: Daniel Kroening, kroening@kroening.com #include #include +#include #include #include #include @@ -94,6 +95,7 @@ void cbmc_parse_optionst::set_default_options(optionst &options) // Default true options.set_option("built-in-assertions", true); options.set_option("propagation", true); + options.set_option("if-conversion", true); options.set_option("simple-slice", true); options.set_option("simplify", true); options.set_option("show-goto-symex-steps", false); @@ -375,6 +377,11 @@ void cbmc_parse_optionst::get_command_line_options(optionst &options) if(cmdline.isset("no-propagation")) options.set_option("propagation", false); + // if-conversion of side-effect-free conditional assignments (only used in + // --paths mode, to avoid forking the symbolic-execution path tree) + if(cmdline.isset("no-if-conversion")) + options.set_option("if-conversion", false); + // transform self loops to assumptions options.set_option( "self-loops-to-assumptions", @@ -900,6 +907,17 @@ bool cbmc_parse_optionst::process_goto_program( goto_model, log.get_message_handler(), cprover_c_library_factory); } + // In --paths mode every GOTO branch forks the path tree. Replace simple + // conditional assignments (in the user program and the just-linked CPROVER + // library alike) with branch-free guarded assignments first. This must + // happen before goto_check (run by ::process_goto_program below) so that the + // safety checks of the generated conditional expressions stay guarded by the + // branch condition. + if( + options.get_bool_option("paths") && + options.get_bool_option("if-conversion")) + if_conversion(goto_model, log.get_message_handler()); + // Common removal of types and complex constructs if(::process_goto_program(goto_model, options, log)) return true; diff --git a/src/cbmc/cbmc_parse_options.h b/src/cbmc/cbmc_parse_options.h index a5890bfb88d..037a0733d41 100644 --- a/src/cbmc/cbmc_parse_options.h +++ b/src/cbmc/cbmc_parse_options.h @@ -40,6 +40,7 @@ class optionst; "(no-simplify)(full-slice)" \ OPT_REACHABILITY_SLICER \ "(no-propagation)(no-simplify-if)" \ + "(no-if-conversion)" \ "(document-subgoals)(test-preprocessor)" \ "(show-array-constraints)" \ OPT_CONFIG_C_CPP \ diff --git a/src/goto-programs/Makefile b/src/goto-programs/Makefile index bacc92ac152..bc645ff02de 100644 --- a/src/goto-programs/Makefile +++ b/src/goto-programs/Makefile @@ -13,6 +13,7 @@ SRC = adjust_float_expressions.cpp \ goto_program.cpp \ goto_trace.cpp \ graphml_witness.cpp \ + if_conversion.cpp \ initialize_goto_model.cpp \ instrument_preconditions.cpp \ interpreter.cpp \ diff --git a/src/goto-programs/if_conversion.cpp b/src/goto-programs/if_conversion.cpp new file mode 100644 index 00000000000..c097476adbe --- /dev/null +++ b/src/goto-programs/if_conversion.cpp @@ -0,0 +1,429 @@ +/*******************************************************************\ + +Module: If-conversion of side-effect-free conditional assignments + +Author: Daniel Kroening, kroening@kroening.com + +\*******************************************************************/ + +/// \file +/// If-conversion of side-effect-free conditional assignments + +#include "if_conversion.h" + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "goto_model.h" +#include "remove_skip.h" + +#include +#include +#include + +/// An instruction may appear in a region that we linearise iff it neither +/// transfers control nor has effects beyond a plain assignment or a scope +/// marker. In particular function calls, assertions/assumptions, atomic +/// sections and (non-structural) jumps are excluded: linearising them would +/// be unsound or change semantics. DECL/DEAD are permitted (they only widen +/// the scope of branch-local variables, which is harmless once the guard has +/// been snapshotted). +/// +/// In addition, an assignment whose left-hand side contains a dereference or +/// an array index is excluded: linearising `if(c) *p = v;` into +/// `*p = c ? v : *p` (or `a[i] = c ? v : a[i]`) would evaluate the address +/// `*p` / `a[i]` unconditionally, performing the dereference or array-bounds +/// access even when the guard does not hold. That both changes semantics and +/// defeats the precision the guard provided (null/bounds safety). +static bool +is_region_instruction(const goto_programt::instructiont &instruction) +{ + if(instruction.is_assign()) + { + const exprt &lhs = instruction.assign_lhs(); + return !has_subexpr(lhs, ID_dereference) && !has_subexpr(lhs, ID_index); + } + + return instruction.is_skip() || instruction.is_decl() || + instruction.is_dead(); +} + +namespace +{ +/// Create a fresh static boolean to hold a snapshot of a branch condition. +/// A static lifetime avoids the need for DECL/DEAD bookkeeping; the variable +/// is always assigned immediately before it is read, so its (uninitialised) +/// global value is never observed. +symbol_exprt fresh_guard_symbol( + symbol_table_baset &symbol_table, + const irep_idt &mode, + const source_locationt &source_location, + std::size_t &counter) +{ + irep_idt base_name = "if_conversion_guard$" + std::to_string(counter++); + irep_idt name = CPROVER_PREFIX + id2string(base_name); + + symbolt symbol{name, bool_typet{}, mode}; + symbol.base_name = base_name; + symbol.pretty_name = base_name; + symbol.is_static_lifetime = true; + symbol.is_lvalue = true; + symbol.is_state_var = true; + symbol.is_file_local = true; + symbol.location = source_location; + + symbol_table.insert(std::move(symbol)); + + return symbol_exprt{name, bool_typet{}}; +} +} // namespace + +std::size_t if_conversion( + goto_programt &goto_program, + symbol_table_baset &symbol_table, + const irep_idt &mode, + const namespacet &ns) +{ + // Map each instruction to the set of instructions that jump to it, so that + // we can ensure that the branch we remove is the only way into the region we + // linearise (and that the branch itself is not a jump target). Built once; + // it is treated conservatively (a stale entry only prevents a later + // conversion, it never enables an unsound one). + std::map< + const goto_programt::instructiont *, + std::set> + incoming; + for(auto it = goto_program.instructions.begin(); + it != goto_program.instructions.end(); + ++it) + { + for(const auto &t : it->targets) + incoming[&*t].insert(&*it); + } + + const auto externally_entered = + [&incoming]( + goto_programt::targett p, + const goto_programt::instructiont *branch, + const goto_programt::instructiont *structural) + { + const auto m = incoming.find(&*p); + if(m == incoming.end()) + return false; + for(const auto *src : m->second) + if(src != branch && src != structural) + return true; + return false; + }; + + std::size_t converted = 0; + std::size_t guard_counter = 0; + + for(auto it = goto_program.instructions.begin(); + it != goto_program.instructions.end(); + ++it) + { + // candidate: a conditional GOTO with a single, forward target + if(!it->is_goto() || it->targets.size() != 1) + continue; + + const exprt guard = it->condition(); + if(guard.is_true() || guard.is_false()) + continue; + + // the branch itself must not be a jump target, so that the guard snapshot + // we insert just before it is reached on every entry + if(incoming.find(&*it) != incoming.end()) + continue; + + const goto_programt::targett target = it->get_target(); + + // the then-region is [then_begin, target); require target to be reachable + // forwards (otherwise this is a back-edge / loop) + const goto_programt::targett then_begin = std::next(it); + std::vector region1; + bool forward = false; + for(auto p = then_begin; p != goto_program.instructions.end(); ++p) + { + if(p == target) + { + forward = true; + break; + } + region1.push_back(p); + } + if(!forward) + continue; + + // if-else shape: the then-region ends in an unconditional forward GOTO + // whose target (the join) lies beyond the else-entry `target` + goto_programt::targett structural_goto = goto_program.instructions.end(); + std::vector region2; + bool is_if_else = false; + if( + !region1.empty() && region1.back()->is_goto() && + region1.back()->condition().is_true() && + region1.back()->targets.size() == 1) + { + const goto_programt::targett join = region1.back()->get_target(); + bool join_forward = false; + for(auto p = target; p != goto_program.instructions.end(); ++p) + { + if(p == join) + { + join_forward = true; + break; + } + } + if(join_forward && join != target) + { + structural_goto = region1.back(); + region1.pop_back(); + is_if_else = true; + for(auto p = target; p != join; ++p) + region2.push_back(p); + } + } + + // both regions must consist of linearisable instructions only + const auto linearisable = + [](const std::vector ®ion) + { + for(const auto &p : region) + if(!is_region_instruction(*p)) + return false; + return true; + }; + if(!linearisable(region1) || !linearisable(region2)) + continue; + + // at least one region must contain an assignment, otherwise there is + // nothing worth linearising + const auto has_assignment = + [](const std::vector ®ion) + { + for(const auto &p : region) + if(p->is_assign()) + return true; + return false; + }; + if(!has_assignment(region1) && !has_assignment(region2)) + continue; + + // single-entry: no instruction in the region(s), nor the else-entry, may + // be the target of any jump other than the branch (and the structural + // GOTO) we are removing + const goto_programt::instructiont *const structural_ptr = + is_if_else ? &*structural_goto : nullptr; + bool external_entry = false; + for(const auto &p : region1) + external_entry |= externally_entered(p, &*it, structural_ptr); + for(const auto &p : region2) + external_entry |= externally_entered(p, &*it, structural_ptr); + if(is_if_else) + external_entry |= externally_entered(target, &*it, structural_ptr); + if(external_entry) + continue; + + // Snapshotting the guard into a fresh variable is necessary when the guard + // reads a variable that the region writes or kills: otherwise re-evaluating + // the guard at each converted assignment could observe a clobbered value, + // or read a now-DEAD condition temporary. When the guard does not depend on + // anything the region touches we can use the original condition directly, + // which keeps the conditional assignments analysable downstream (e.g. it + // preserves `p != null` guards for local safe pointer analysis). + const find_symbols_sett guard_symbols = find_symbol_identifiers(guard); + bool guard_depends_on_region = false; + const auto note_region_writes = [&guard_symbols, &guard_depends_on_region]( + const goto_programt::targett &p) + { + if(p->is_assign()) + { + for(const irep_idt &id : find_symbol_identifiers(p->assign_lhs())) + if(guard_symbols.count(id) != 0) + guard_depends_on_region = true; + } + else if(p->is_decl()) + { + if(guard_symbols.count(p->decl_symbol().get_identifier()) != 0) + guard_depends_on_region = true; + } + else if(p->is_dead()) + { + if(guard_symbols.count(p->dead_symbol().get_identifier()) != 0) + guard_depends_on_region = true; + } + }; + for(const auto &p : region1) + note_region_writes(p); + for(const auto &p : region2) + note_region_writes(p); + + // For an if/else, the else-region is linearised after the then-region, so + // converting each region independently into a self-referential conditional + // assignment `x = c ? e : x` makes an else-assignment that reads a variable + // the then-region wrote nest (after SSA) into `x = !c ? b : (c ? a : x)`. + // That defeats constant propagation of `x`: a variable that was concrete on + // each original branch becomes a symbolic value, and in --paths mode that + // can drive downstream branches/loops into an explosion of paths (see + // regression/cbmc/return4). When both regions are a single assignment to + // the same variable we instead emit one combined ternary `x = c ? a : b`, + // which simplify can fold (e.g. `c ? 1 : 1` to `1`); messier overlaps are + // left unconverted. Regions that do not interfere keep the per-region form. + goto_programt::targett combine_then = goto_program.instructions.end(); + goto_programt::targett combine_else = goto_program.instructions.end(); + bool combine = false; + if(is_if_else) + { + find_symbols_sett region1_writes; + for(const auto &p : region1) + { + if(p->is_assign()) + { + for(const irep_idt &id : find_symbol_identifiers(p->assign_lhs())) + region1_writes.insert(id); + } + else if(p->is_decl()) + region1_writes.insert(p->decl_symbol().get_identifier()); + } + + bool region2_uses_region1_write = false; + std::vector region1_assignments; + std::vector region2_assignments; + for(const auto &p : region1) + if(p->is_assign()) + region1_assignments.push_back(p); + for(const auto &p : region2) + { + if(p->is_assign()) + { + region2_assignments.push_back(p); + for(const irep_idt &id : find_symbol_identifiers(p->assign_lhs())) + if(region1_writes.count(id) != 0) + region2_uses_region1_write = true; + for(const irep_idt &id : find_symbol_identifiers(p->assign_rhs())) + if(region1_writes.count(id) != 0) + region2_uses_region1_write = true; + } + else if( + p->is_decl() && + region1_writes.count(p->decl_symbol().get_identifier()) != 0) + region2_uses_region1_write = true; + else if( + p->is_dead() && + region1_writes.count(p->dead_symbol().get_identifier()) != 0) + region2_uses_region1_write = true; + } + + if(region2_uses_region1_write) + { + if( + region1_assignments.size() == 1 && region2_assignments.size() == 1 && + region1_assignments.front()->assign_lhs() == + region2_assignments.front()->assign_lhs()) + { + combine = true; + combine_then = region1_assignments.front(); + combine_else = region2_assignments.front(); + } + else + continue; // leave the branch in place rather than create a chain + } + } + + // the then-region executes when the GOTO is *not* taken, the else-region + // when it is taken + exprt then_condition; + exprt else_condition; + if(guard_depends_on_region) + { + const symbol_exprt snapshot = fresh_guard_symbol( + symbol_table, mode, it->source_location(), guard_counter); + goto_program.insert_before( + it, + goto_programt::make_assignment(snapshot, guard, it->source_location())); + then_condition = boolean_negate(snapshot); + else_condition = snapshot; + } + else + { + then_condition = boolean_negate(guard); + else_condition = guard; + } + simplify(then_condition, ns); + + if(combine) + { + // x = c ? then_rhs : else_rhs, replacing the matching assignment pair; + // the else-assignment becomes a skip + exprt combined = if_exprt{ + then_condition, combine_then->assign_rhs(), combine_else->assign_rhs()}; + simplify(combined, ns); + combine_then->assign_rhs_nonconst() = std::move(combined); + *combine_else = goto_programt::make_skip(combine_else->source_location()); + } + else + { + for(const auto &p : region1) + { + if(!p->is_assign()) + continue; + const exprt lhs = p->assign_lhs(); + const exprt rhs = p->assign_rhs(); + p->assign_rhs_nonconst() = if_exprt{then_condition, rhs, lhs}; + } + for(const auto &p : region2) + { + if(!p->is_assign()) + continue; + const exprt lhs = p->assign_lhs(); + const exprt rhs = p->assign_rhs(); + p->assign_rhs_nonconst() = if_exprt{else_condition, rhs, lhs}; + } + } + + // drop the branch(es) + *it = goto_programt::make_skip(it->source_location()); + if(is_if_else) + *structural_goto = + goto_programt::make_skip(structural_goto->source_location()); + + ++converted; + } + + return converted; +} + +std::size_t +if_conversion(goto_modelt &goto_model, message_handlert &message_handler) +{ + const namespacet ns(goto_model.symbol_table); + std::size_t converted = 0; + + for(auto &gf : goto_model.goto_functions.function_map) + { + const symbolt *fn = goto_model.symbol_table.lookup(gf.first); + const irep_idt mode = fn != nullptr ? fn->mode : ID_C; + converted += + if_conversion(gf.second.body, goto_model.symbol_table, mode, ns); + } + + if(converted != 0) + { + remove_skip(goto_model); + goto_model.goto_functions.update(); + + messaget log(message_handler); + log.statistics() << "if-conversion replaced " << converted + << " branch(es) with conditional assignments" + << messaget::eom; + } + + return converted; +} diff --git a/src/goto-programs/if_conversion.h b/src/goto-programs/if_conversion.h new file mode 100644 index 00000000000..1331c6b7682 --- /dev/null +++ b/src/goto-programs/if_conversion.h @@ -0,0 +1,59 @@ +/*******************************************************************\ + +Module: If-conversion of side-effect-free conditional assignments + +Author: Daniel Kroening, kroening@kroening.com + +\*******************************************************************/ + +/// \file +/// If-conversion of side-effect-free conditional assignments +/// +/// Replace control-flow branches whose then/else regions consist solely of +/// assignments with branch-free guarded (conditional-expression) assignments, +/// e.g. rewrite the GOTO program for +/// \code +/// if(cond) x = e; +/// \endcode +/// into +/// \code +/// x = cond ? e : x; +/// \endcode +/// removing the GOTO branch. This avoids forking the symbolic-execution path +/// tree in `--paths` mode at trivial control flow (such as the bookkeeping in +/// the built-in CPROVER library), which would otherwise cause exponential +/// path explosion. +/// +/// The transformation must run before goto_check, so that the per-operand +/// safety checks of the generated conditional expressions remain guarded by +/// the branch condition (see goto_check_ct::check_rec_if); it is therefore +/// sound even when the assignments dereference pointers, divide, or overflow. + +#ifndef CPROVER_GOTO_PROGRAMS_IF_CONVERSION_H +#define CPROVER_GOTO_PROGRAMS_IF_CONVERSION_H + +#include + +#include + +class goto_modelt; +class goto_programt; +class message_handlert; +class namespacet; +class symbol_table_baset; + +/// Apply if-conversion to all functions of \p goto_model. \return the number +/// of branches that were removed. +std::size_t if_conversion(goto_modelt &goto_model, message_handlert &); + +/// Apply if-conversion to a single \p goto_program, creating any fresh guard +/// variables in \p symbol_table with mode \p mode. Does not run remove_skip or +/// update targets; the caller is responsible for doing so. \return the number +/// of branches that were removed. +std::size_t if_conversion( + goto_programt &goto_program, + symbol_table_baset &symbol_table, + const irep_idt &mode, + const namespacet &ns); + +#endif // CPROVER_GOTO_PROGRAMS_IF_CONVERSION_H diff --git a/unit/Makefile b/unit/Makefile index ad783f64f8e..ce0b384386e 100644 --- a/unit/Makefile +++ b/unit/Makefile @@ -72,6 +72,7 @@ SRC += analyses/ai/ai.cpp \ goto-programs/goto_program_table_consistency.cpp \ goto-programs/goto_program_validate.cpp \ goto-programs/goto_trace_output.cpp \ + goto-programs/if_conversion.cpp \ goto-programs/is_goto_binary.cpp \ goto-programs/label_function_pointer_call_sites.cpp \ goto-programs/osx_fat_reader.cpp \ diff --git a/unit/goto-programs/if_conversion.cpp b/unit/goto-programs/if_conversion.cpp new file mode 100644 index 00000000000..7cc1ea4710c --- /dev/null +++ b/unit/goto-programs/if_conversion.cpp @@ -0,0 +1,174 @@ +/*******************************************************************\ + +Module: Unit tests for if_conversion + +Author: Daniel Kroening, kroening@kroening.com + +\*******************************************************************/ + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include + +static std::size_t count_gotos(const goto_programt &p) +{ + std::size_t n = 0; + for(const auto &i : p.instructions) + if(i.is_goto()) + ++n; + return n; +} + +static bool has_conditional_assignment(const goto_programt &p) +{ + for(const auto &i : p.instructions) + if(i.is_assign() && i.assign_rhs().id() == ID_if) + return true; + return false; +} + +SCENARIO("if_conversion", "[core][goto-programs][if_conversion]") +{ + const signedbv_typet int_type{32}; + symbol_tablet symbol_table; + symbolt x_symbol{"x", int_type, ID_C}; + symbol_table.insert(x_symbol); + const symbol_exprt x = x_symbol.symbol_expr(); + symbolt c_symbol{"c", bool_typet{}, ID_C}; + symbol_table.insert(c_symbol); + const symbol_exprt c = c_symbol.symbol_expr(); + const namespacet ns(symbol_table); + + GIVEN("a conditional assignment `if(c) x = 1;`") + { + goto_programt p; + p.add(goto_programt::make_assignment(x, from_integer(1, int_type))); + const auto join = p.add(goto_programt::make_skip()); + p.add(goto_programt::make_end_function()); + // jump over the assignment when !c (so the assignment runs when c) + p.insert_before( + p.instructions.begin(), goto_programt::make_goto(join, not_exprt{c})); + p.update(); + + REQUIRE(count_gotos(p) == 1); + + WHEN("if-conversion is applied") + { + const std::size_t converted = if_conversion(p, symbol_table, ID_C, ns); + + THEN("the branch is replaced by a guarded assignment") + { + REQUIRE(converted == 1); + REQUIRE(count_gotos(p) == 0); + REQUIRE(has_conditional_assignment(p)); + } + } + } + + GIVEN("a conditional region containing a function call") + { + symbolt f_symbol{"f", code_typet{{}, empty_typet{}}, ID_C}; + symbol_table.insert(f_symbol); + const namespacet ns2(symbol_table); + + goto_programt p; + p.add(goto_programt::make_function_call( + code_function_callt{f_symbol.symbol_expr(), {}})); + p.add(goto_programt::make_assignment(x, from_integer(1, int_type))); + const auto join = p.add(goto_programt::make_skip()); + p.add(goto_programt::make_end_function()); + p.insert_before( + p.instructions.begin(), goto_programt::make_goto(join, not_exprt{c})); + p.update(); + + WHEN("if-conversion is applied") + { + const std::size_t converted = if_conversion(p, symbol_table, ID_C, ns2); + + THEN("the region is not linearisable and the branch is kept") + { + REQUIRE(converted == 0); + REQUIRE(count_gotos(p) == 1); + REQUIRE_FALSE(has_conditional_assignment(p)); + } + } + } + + // Build the GOTO for `if(c) x = a; else x = b;`: + // IF !c GOTO else + // x = a + // GOTO join + // else: x = b + // join: skip + const auto build_if_else = + [&](goto_programt &p, const exprt &a, const exprt &b) + { + const auto then_assign = p.add(goto_programt::make_assignment(x, a)); + const auto then_goto = p.add(goto_programt::make_goto(then_assign)); + const auto else_assign = p.add(goto_programt::make_assignment(x, b)); + const auto join = p.add(goto_programt::make_skip()); + p.add(goto_programt::make_end_function()); + then_goto->set_target(join); + p.insert_before( + then_assign, goto_programt::make_goto(else_assign, not_exprt{c})); + p.update(); + }; + + GIVEN("an if/else assigning the same variable the same value") + { + // if(c) x = 1; else x = 1; + goto_programt p; + build_if_else(p, from_integer(1, int_type), from_integer(1, int_type)); + REQUIRE(count_gotos(p) == 2); + + WHEN("if-conversion is applied") + { + const std::size_t converted = if_conversion(p, symbol_table, ID_C, ns); + + THEN("both branches collapse into a single constant assignment") + { + REQUIRE(converted == 1); + REQUIRE(count_gotos(p) == 0); + // c ? 1 : 1 simplifies to the constant 1; no symbolic conditional is + // left to defeat constant propagation (regression for return4). + REQUIRE_FALSE(has_conditional_assignment(p)); + } + } + } + + GIVEN("an if/else assigning the same variable different values") + { + // if(c) x = 1; else x = 2; + goto_programt p; + build_if_else(p, from_integer(1, int_type), from_integer(2, int_type)); + REQUIRE(count_gotos(p) == 2); + + WHEN("if-conversion is applied") + { + const std::size_t converted = if_conversion(p, symbol_table, ID_C, ns); + + THEN("the pair becomes a single guarded (ternary) assignment") + { + REQUIRE(converted == 1); + REQUIRE(count_gotos(p) == 0); + REQUIRE(has_conditional_assignment(p)); + // exactly one assignment carries the ternary; the other arm is a skip + std::size_t conditional_assignments = 0; + for(const auto &i : p.instructions) + if(i.is_assign() && i.assign_rhs().id() == ID_if) + ++conditional_assignments; + REQUIRE(conditional_assignments == 1); + } + } + } +} diff --git a/unit/path_strategies.cpp b/unit/path_strategies.cpp index b0b99b94d69..60b925f97c4 100644 --- a/unit/path_strategies.cpp +++ b/unit/path_strategies.cpp @@ -387,6 +387,10 @@ void _check_with_strategy( optionst options; cbmc_parse_optionst::set_default_options(options); + // These tests exercise path-exploration strategies, which requires the + // conditional branches in the test programs to be preserved; disable the + // if-conversion pass that would otherwise linearise them away. + options.set_option("if-conversion", false); options.set_option("assertions", true); options.set_option("assumptions", true); options.set_option("paths", true);