cprover
Loading...
Searching...
No Matches
ci_lazy_methods.cpp
Go to the documentation of this file.
1/*******************************************************************\
2
3Module: Java Bytecode
4
5Author: Diffblue Ltd.
6
7\*******************************************************************/
8
9#include "ci_lazy_methods.h"
10
11#include <util/expr_iterator.h>
12#include <util/namespace.h>
13#include <util/suffix.h>
14#include <util/symbol_table.h>
15
17
19#include "java_class_loader.h"
20#include "java_entry_point.h"
21#include "remove_exceptions.h"
22
39 const symbol_table_baset &symbol_table,
40 const irep_idt &main_class,
41 const std::vector<irep_idt> &main_jar_classes,
42 const std::vector<load_extra_methodst> &lazy_methods_extra_entry_points,
43 java_class_loadert &java_class_loader,
44 const std::vector<irep_idt> &extra_instantiated_classes,
45 const select_pointer_typet &pointer_type_selector,
46 const synthetic_methods_mapt &synthetic_methods)
47 : main_class(main_class),
48 main_jar_classes(main_jar_classes),
49 lazy_methods_extra_entry_points(lazy_methods_extra_entry_points),
50 java_class_loader(java_class_loader),
51 extra_instantiated_classes(extra_instantiated_classes),
52 pointer_type_selector(pointer_type_selector),
53 synthetic_methods(synthetic_methods)
54{
55 // build the class hierarchy
56 class_hierarchy(symbol_table);
57}
58
65static bool references_class_model(const exprt &expr)
66{
67 static const struct_tag_typet class_type("java::java.lang.Class");
68
69 for(auto it = expr.depth_begin(); it != expr.depth_end(); ++it)
70 {
72 it->type() == class_type &&
76 {
77 return true;
78 }
79 }
80
81 return false;
82}
83
101 symbol_table_baset &symbol_table,
102 method_bytecodet &method_bytecode,
103 const method_convertert &method_converter,
104 message_handlert &message_handler)
105{
106 std::unordered_set<irep_idt> methods_to_convert_later =
107 entry_point_methods(symbol_table, message_handler);
108
109 // Add any extra entry points specified; we should elaborate these in the
110 // same way as the main function.
111 for(const auto &extra_function_generator : lazy_methods_extra_entry_points)
112 {
113 std::vector<irep_idt> extra_methods =
114 extra_function_generator(symbol_table);
115 methods_to_convert_later.insert(extra_methods.begin(), extra_methods.end());
116 }
117
118 std::unordered_set<irep_idt> instantiated_classes;
119
120 {
121 std::unordered_set<irep_idt> initial_callable_methods;
122 ci_lazy_methods_neededt initial_lazy_methods(
123 initial_callable_methods,
124 instantiated_classes,
125 symbol_table,
128 methods_to_convert_later, namespacet(symbol_table), initial_lazy_methods);
129 methods_to_convert_later.insert(
130 initial_callable_methods.begin(), initial_callable_methods.end());
131 }
132
133 std::unordered_set<irep_idt> methods_already_populated;
134 std::unordered_set<class_method_descriptor_exprt, irep_hash>
135 called_virtual_functions;
136 bool class_initializer_seen = false;
137
138 messaget log{message_handler};
139
140 bool any_new_classes = true;
141 while(any_new_classes)
142 {
143 bool any_new_methods = true;
144 while(any_new_methods)
145 {
146 any_new_methods = false;
147 while(!methods_to_convert_later.empty())
148 {
149 std::unordered_set<irep_idt> methods_to_convert;
150 std::swap(methods_to_convert, methods_to_convert_later);
151 for(const auto &mname : methods_to_convert)
152 {
153 const auto conversion_result = convert_and_analyze_method(
154 method_converter,
155 methods_already_populated,
156 class_initializer_seen,
157 mname,
158 symbol_table,
159 methods_to_convert_later,
160 instantiated_classes,
161 called_virtual_functions,
162 message_handler);
163 any_new_methods |= conversion_result.new_method_seen;
164 class_initializer_seen |= conversion_result.class_initializer_seen;
165 }
166 }
167
168 // Given the object types we now know may be created, populate more
169 // possible virtual function call targets:
170
171 log.debug() << "CI lazy methods: add virtual method targets ("
172 << called_virtual_functions.size() << " callsites)"
173 << messaget::eom;
174
175 for(const class_method_descriptor_exprt &called_virtual_function :
176 called_virtual_functions)
177 {
179 called_virtual_function,
180 instantiated_classes,
181 methods_to_convert_later,
182 symbol_table);
183 }
184 }
185
187 methods_to_convert_later,
188 instantiated_classes,
189 called_virtual_functions,
190 symbol_table);
191 }
192
193 // Remove symbols for methods that were declared but never used:
194 symbol_tablet keep_symbols;
195 // Manually keep @inflight_exception, as it is unused at this stage
196 // but will become used when the `remove_exceptions` pass is run:
197 keep_symbols.add(symbol_table.lookup_ref(INFLIGHT_EXCEPTION_VARIABLE_NAME));
198
199 for(const auto &sym : symbol_table.symbols)
200 {
201 // Don't keep global variables (unless they're gathered below from a
202 // function that references them)
203 if(sym.second.is_static_lifetime)
204 continue;
205 if(sym.second.type.id()==ID_code)
206 {
207 // Don't keep functions that belong to this language that we haven't
208 // converted above
209 if(
210 (method_bytecode.contains_method(sym.first) ||
211 synthetic_methods.count(sym.first)) &&
212 !methods_already_populated.count(sym.first))
213 {
214 continue;
215 }
216 // If this is a function then add all the things used in it
217 gather_needed_globals(sym.second.value, symbol_table, keep_symbols);
218 }
219 keep_symbols.add(sym.second);
220 }
221
222 log.debug() << "CI lazy methods: removed "
223 << symbol_table.symbols.size() - keep_symbols.symbols.size()
224 << " unreachable methods and globals" << messaget::eom;
225
226 auto sorted_to_keep = keep_symbols.sorted_symbol_names();
227 auto all_sorted = symbol_table.sorted_symbol_names();
228 auto it = sorted_to_keep.cbegin();
229 for(const auto &id : all_sorted)
230 {
231 if(it == sorted_to_keep.cend() || id != *it)
232 symbol_table.remove(id);
233 else
234 ++it;
235 }
236
237 return false;
238}
239
248 std::unordered_set<irep_idt> &methods_to_convert_later,
249 std::unordered_set<irep_idt> &instantiated_classes,
250 const std::unordered_set<class_method_descriptor_exprt, irep_hash>
251 &virtual_functions,
252 symbol_table_baset &symbol_table)
253{
254 ci_lazy_methods_neededt lazy_methods_loader(
255 methods_to_convert_later,
256 instantiated_classes,
257 symbol_table,
259
260 bool any_new_classes = false;
261 for(const class_method_descriptor_exprt &virtual_function : virtual_functions)
262 {
263 std::unordered_set<irep_idt> candidate_target_methods;
265 virtual_function,
266 instantiated_classes,
267 candidate_target_methods,
268 symbol_table);
269
270 if(!candidate_target_methods.empty())
271 continue;
272
273 const java_method_typet &java_method_type =
274 to_java_method_type(virtual_function.type());
275
276 // Add the call class to instantiated_classes and assert that it
277 // didn't already exist. It can't be instantiated already, otherwise it
278 // would give a concrete definition of the called method, and
279 // candidate_target_methods would be non-empty.
280 const irep_idt &call_class = virtual_function.class_id();
281 bool was_missing = instantiated_classes.count(call_class) == 0;
282 CHECK_RETURN(was_missing);
283 any_new_classes = true;
284
285 const typet &this_type = java_method_type.get_this()->type();
286 if(
287 const pointer_typet *this_pointer_type =
289 {
290 lazy_methods_loader.add_all_needed_classes(*this_pointer_type);
291 }
292
293 // That should in particular have added call_class to the possibly
294 // instantiated types.
295 bool still_missing = instantiated_classes.count(call_class) == 0;
296 CHECK_RETURN(!still_missing);
297
298 // Make sure we add our return type as required, as we may not have
299 // seen any concrete instances of it being created.
300 const typet &return_type = java_method_type.return_type();
301 if(
302 const pointer_typet *return_pointer_type =
304 {
305 lazy_methods_loader.add_all_needed_classes(*return_pointer_type);
306 }
307
308 // Check that `get_virtual_method_target` returns a method now
309 const irep_idt &method_name = virtual_function.mangled_method_name();
310 const irep_idt method_id = get_virtual_method_target(
311 instantiated_classes, method_name, call_class, symbol_table);
312 CHECK_RETURN(!method_id.empty());
313
314 // Add what it returns to methods_to_convert_later
315 methods_to_convert_later.insert(method_id);
316 }
317 return any_new_classes;
318}
319
331 const method_convertert &method_converter,
332 std::unordered_set<irep_idt> &methods_already_populated,
333 const bool class_initializer_already_seen,
334 const irep_idt &method_name,
335 symbol_table_baset &symbol_table,
336 std::unordered_set<irep_idt> &methods_to_convert_later,
337 std::unordered_set<irep_idt> &instantiated_classes,
338 std::unordered_set<class_method_descriptor_exprt, irep_hash>
339 &called_virtual_functions,
340 message_handlert &message_handler)
341{
343 if(!methods_already_populated.insert(method_name).second)
344 return result;
345
346 messaget log{message_handler};
347 log.debug() << "CI lazy methods: elaborate " << method_name << messaget::eom;
348
349 // Note this wraps *references* to methods_to_convert_later &
350 // instantiated_classes
351 ci_lazy_methods_neededt needed_methods(
352 methods_to_convert_later,
353 instantiated_classes,
354 symbol_table,
356
357 if(method_converter(method_name, needed_methods))
358 return result;
359
360 const exprt &method_body = symbol_table.lookup_ref(method_name).value;
361 gather_virtual_callsites(method_body, called_virtual_functions);
362
363 if(!class_initializer_already_seen && references_class_model(method_body))
364 {
365 result.class_initializer_seen = true;
366 const irep_idt initializer_signature =
368 if(symbol_table.has_symbol(initializer_signature))
369 methods_to_convert_later.insert(initializer_signature);
370 }
371 result.new_method_seen = true;
372 return result;
373}
374
380std::unordered_set<irep_idt> ci_lazy_methodst::entry_point_methods(
381 const symbol_table_baset &symbol_table,
382 message_handlert &message_handler)
383{
384 std::unordered_set<irep_idt> methods_to_convert_later;
385
386 const main_function_resultt main_function =
387 get_main_symbol(symbol_table, this->main_class, message_handler);
388 if(!main_function.is_success())
389 {
390 // Failed, mark all functions in the given main class(es)
391 // reachable.
392 std::vector<irep_idt> reachable_classes;
393 if(!this->main_class.empty())
394 reachable_classes.push_back(this->main_class);
395 else
396 reachable_classes = this->main_jar_classes;
397 for(const irep_idt &class_name : reachable_classes)
398 {
399 const auto &methods =
400 this->java_class_loader.get_original_class(class_name)
402 for(const auto &method : methods)
403 {
404 const irep_idt methodid = "java::" + id2string(class_name) + "." +
405 id2string(method.name) + ":" +
406 id2string(method.descriptor);
407 methods_to_convert_later.insert(methodid);
408 }
409 }
410 }
411 else
412 methods_to_convert_later.insert(main_function.main_function.name);
413 return methods_to_convert_later;
414}
415
425 const std::unordered_set<irep_idt> &entry_points,
426 const namespacet &ns,
427 ci_lazy_methods_neededt &needed_lazy_methods)
428{
429 for(const auto &mname : entry_points)
430 {
431 const auto &symbol=ns.lookup(mname);
432 const auto &mtype = to_java_method_type(symbol.type);
433 for(const auto &param : mtype.parameters())
434 {
435 if(param.type().id()==ID_pointer)
436 {
437 const pointer_typet &original_pointer = to_pointer_type(param.type());
438 needed_lazy_methods.add_all_needed_classes(original_pointer);
439 }
440 }
441 }
442
443 // Also add classes whose instances are magically
444 // created by the JVM and so won't be spotted by
445 // looking for constructors and calls as usual:
446 needed_lazy_methods.add_needed_class("java::java.lang.String");
447 needed_lazy_methods.add_needed_class("java::java.lang.Class");
448 needed_lazy_methods.add_needed_class("java::java.lang.Object");
449
450 // As in class_loader, ensure these classes stay available
451 for(const auto &id : extra_instantiated_classes)
452 needed_lazy_methods.add_needed_class("java::" + id2string(id));
453}
454
460 const exprt &e,
461 std::unordered_set<class_method_descriptor_exprt, irep_hash> &result)
462{
463 if(e.id()!=ID_code)
464 return;
465 const codet &c=to_code(e);
466 if(
467 c.get_statement() == ID_function_call &&
469 to_code_function_call(c).function()))
470 {
471 result.insert(
473 }
474 else
475 {
476 for(const exprt &op : e.operands())
477 gather_virtual_callsites(op, result);
478 }
479}
480
492 const class_method_descriptor_exprt &called_function,
493 const std::unordered_set<irep_idt> &instantiated_classes,
494 std::unordered_set<irep_idt> &callable_methods,
495 symbol_table_baset &symbol_table)
496{
497 const auto &call_class = called_function.class_id();
498 const auto &method_name = called_function.mangled_method_name();
499
500 class_hierarchyt::idst self_and_child_classes =
502 self_and_child_classes.push_back(call_class);
503
504 for(const irep_idt &class_name : self_and_child_classes)
505 {
506 const irep_idt method_id = get_virtual_method_target(
507 instantiated_classes, method_name, class_name, symbol_table);
508 if(!method_id.empty())
509 callable_methods.insert(method_id);
510 }
511}
512
519 const exprt &e,
520 const symbol_table_baset &symbol_table,
521 symbol_table_baset &needed)
522{
523 if(e.id()==ID_symbol)
524 {
525 // If the symbol isn't in the symbol table at all, then it is defined
526 // on an opaque type (i.e. we don't have the class definition at this point)
527 // and will be created during the typecheck phase.
528 // We don't mark it as 'needed' as it doesn't exist yet to keep.
529 const auto findit=
530 symbol_table.symbols.find(to_symbol_expr(e).get_identifier());
531 if(findit!=symbol_table.symbols.end() &&
532 findit->second.is_static_lifetime)
533 {
534 needed.add(findit->second);
535 // Gather any globals referenced in the initialiser:
536 gather_needed_globals(findit->second.value, symbol_table, needed);
537 }
538 }
539 else
540 {
541 for(const auto &op : e.operands())
542 gather_needed_globals(op, symbol_table, needed);
543 }
544}
545
559 const std::unordered_set<irep_idt> &instantiated_classes,
560 const irep_idt &call_basename,
561 const irep_idt &classname,
562 const symbol_table_baset &symbol_table)
563{
564 // Program-wide, is this class ever instantiated?
565 if(!instantiated_classes.count(classname))
566 return irep_idt();
567
568 auto resolved_call =
569 get_inherited_method_implementation(call_basename, classname, symbol_table);
570
571 if(resolved_call)
572 return resolved_call->get_full_component_identifier();
573 else
574 return irep_idt();
575}
static bool references_class_model(const exprt &expr)
Checks if an expression refers to any class literals (e.g.
Collect methods needed to be loaded using the lazy method.
std::function< bool(const irep_idt &function_id, ci_lazy_methods_neededt)> method_convertert
void add_all_needed_classes(const pointer_typet &pointer_type)
Add to the needed classes all classes specified, the replacement type if it will be replaced,...
bool add_needed_class(const irep_idt &)
Notes class class_symbol_name will be instantiated, or a static field belonging to it will be accesse...
convert_method_resultt convert_and_analyze_method(const method_convertert &method_converter, std::unordered_set< irep_idt > &methods_already_populated, const bool class_initializer_already_seen, const irep_idt &method_name, symbol_table_baset &symbol_table, std::unordered_set< irep_idt > &methods_to_convert_later, std::unordered_set< irep_idt > &instantiated_classes, std::unordered_set< class_method_descriptor_exprt, irep_hash > &called_virtual_functions, message_handlert &message_handler)
Convert a method, add it to the populated set, add needed methods to methods_to_convert_later and add...
const std::vector< irep_idt > & extra_instantiated_classes
irep_idt get_virtual_method_target(const std::unordered_set< irep_idt > &instantiated_classes, const irep_idt &call_basename, const irep_idt &classname, const symbol_table_baset &symbol_table)
Find a virtual callee, if one is defined and the callee type is known to exist.
void get_virtual_method_targets(const class_method_descriptor_exprt &called_function, const std::unordered_set< irep_idt > &instantiated_classes, std::unordered_set< irep_idt > &callable_methods, symbol_table_baset &symbol_table)
Find possible callees, excluding types that are not known to be instantiated.
std::vector< irep_idt > main_jar_classes
const select_pointer_typet & pointer_type_selector
const std::vector< load_extra_methodst > & lazy_methods_extra_entry_points
class_hierarchyt class_hierarchy
void gather_virtual_callsites(const exprt &e, std::unordered_set< class_method_descriptor_exprt, irep_hash > &result)
Get places where virtual functions are called.
java_class_loadert & java_class_loader
bool operator()(symbol_table_baset &symbol_table, method_bytecodet &method_bytecode, const method_convertert &method_converter, message_handlert &message_handler)
Uses a simple context-insensitive ('ci') analysis to determine which methods may be reachable from th...
const synthetic_methods_mapt & synthetic_methods
ci_lazy_methodst(const symbol_table_baset &symbol_table, const irep_idt &main_class, const std::vector< irep_idt > &main_jar_classes, const std::vector< load_extra_methodst > &lazy_methods_extra_entry_points, java_class_loadert &java_class_loader, const std::vector< irep_idt > &extra_instantiated_classes, const select_pointer_typet &pointer_type_selector, const synthetic_methods_mapt &synthetic_methods)
Constructor for lazy-method loading.
void gather_needed_globals(const exprt &e, const symbol_table_baset &symbol_table, symbol_table_baset &needed)
See output.
void initialize_instantiated_classes(const std::unordered_set< irep_idt > &entry_points, const namespacet &ns, ci_lazy_methods_neededt &needed_lazy_methods)
Build up a list of methods whose type may be passed around reachable from the entry point.
bool handle_virtual_methods_with_no_callees(std::unordered_set< irep_idt > &methods_to_convert_later, std::unordered_set< irep_idt > &instantiated_classes, const std::unordered_set< class_method_descriptor_exprt, irep_hash > &virtual_functions, symbol_table_baset &symbol_table)
Look for virtual callsites with no candidate targets.
std::unordered_set< irep_idt > entry_point_methods(const symbol_table_baset &symbol_table, message_handlert &message_handler)
Entry point methods are either:
idst get_children_trans(const irep_idt &id) const
std::vector< irep_idt > idst
An expression describing a method on a class.
Definition std_expr.h:3448
const irep_idt & mangled_method_name() const
The method name after mangling it by combining it with the descriptor.
Definition std_expr.h:3485
const irep_idt & class_id() const
Unique identifier in the symbol table, of the compile time type of the class which this expression is...
Definition std_expr.h:3493
const typet & return_type() const
Definition std_types.h:645
const parametert * get_this() const
Definition std_types.h:621
Data structure for representing an arbitrary statement in a program.
const irep_idt & get_statement() const
dstringt has one field, an unsigned integer no which is an index into a static table of strings.
Definition dstring.h:39
bool empty() const
Definition dstring.h:90
Base class for all expressions.
Definition expr.h:56
depth_iteratort depth_end()
Definition expr.cpp:249
depth_iteratort depth_begin()
Definition expr.cpp:247
typet & type()
Return the type of the expression.
Definition expr.h:84
operandst & operands()
Definition expr.h:94
const irep_idt & id() const
Definition irep.h:396
Class responsible to load .class files.
const java_bytecode_parse_treet & get_original_class(const irep_idt &class_name)
Class that provides messages with a built-in verbosity 'level'.
Definition message.h:155
static eomt eom
Definition message.h:297
bool contains_method(const irep_idt &method_id) const
A namespacet is essentially one or two symbol tables bound together, to allow for symbol lookups in t...
Definition namespace.h:91
bool lookup(const irep_idt &name, const symbolt *&symbol) const override
See documentation for namespace_baset::lookup().
The pointer type These are both 'bitvector_typet' (they have a width) and 'type_with_subtypet' (they ...
A struct tag type, i.e., struct_typet with an identifier.
Definition std_types.h:449
const irep_idt & get_identifier() const
Definition std_expr.h:142
The symbol table base class interface.
std::vector< irep_idt > sorted_symbol_names() const
Build and return a lexicographically sorted vector of symbol names from all symbols stored in this sy...
bool remove(const irep_idt &name)
Remove a symbol from the symbol table.
const symbolst & symbols
Read-only field, used to look up symbols given their names.
bool has_symbol(const irep_idt &name) const
Check whether a symbol exists in the symbol table.
bool add(const symbolt &symbol)
Add a new symbol to the symbol table.
const symbolt & lookup_ref(const irep_idt &name) const
Find a symbol in the symbol table for read-only access.
The symbol table.
irep_idt name
The unique identifier.
Definition symbol.h:40
exprt value
Initial value of symbol.
Definition symbol.h:34
The type of an expression, extends irept.
Definition type.h:29
auto type_try_dynamic_cast(TType &base) -> typename detail::expr_try_dynamic_cast_return_typet< T, TType >::type
Try to cast a reference to a generic typet to a specific derived class.
Definition expr_cast.h:135
Forward depth-first search iterators These iterators' copy operations are expensive,...
const code_function_callt & to_code_function_call(const goto_instruction_codet &code)
const std::string & id2string(const irep_idt &d)
Definition irep.h:47
#define JAVA_CLASS_MODEL_SUFFIX
main_function_resultt get_main_symbol(const symbol_table_baset &symbol_table, const irep_idt &main_class, message_handlert &message_handler)
Figures out the entry point of the code to verify.
irep_idt get_java_class_literal_initializer_signature()
Get the symbol name of java.lang.Class' initializer method.
const java_method_typet & to_java_method_type(const typet &type)
Definition java_types.h:184
const pointer_typet & to_pointer_type(const typet &type)
Cast a typet to a pointer_typet.
Remove function exceptional returns.
#define INFLIGHT_EXCEPTION_VARIABLE_NAME
optionalt< resolve_inherited_componentt::inherited_componentt > get_inherited_method_implementation(const irep_idt &call_basename, const irep_idt &classname, const symbol_table_baset &symbol_table)
Given a class and a component, identify the concrete method it is resolved to.
Given a class and a component (either field or method), find the closest parent that defines that com...
#define CHECK_RETURN(CONDITION)
Definition invariant.h:495
const codet & to_code(const exprt &expr)
const class_method_descriptor_exprt & to_class_method_descriptor_expr(const exprt &expr)
Cast an exprt to a class_method_descriptor_exprt.
Definition std_expr.h:3538
bool can_cast_expr< class_method_descriptor_exprt >(const exprt &base)
Definition std_expr.h:3548
bool can_cast_expr< symbol_exprt >(const exprt &base)
Definition std_expr.h:206
const symbol_exprt & to_symbol_expr(const exprt &expr)
Cast an exprt to a symbol_exprt.
Definition std_expr.h:222
bool has_suffix(const std::string &s, const std::string &suffix)
Definition suffix.h:17
Author: Diffblue Ltd.
std::unordered_map< irep_idt, synthetic_method_typet > synthetic_methods_mapt
Maps method names on to a synthetic method kind.
dstringt irep_idt