From 34e0882b83aedc6fb9c7532c89afaf8e0043b7ce Mon Sep 17 00:00:00 2001 From: Alan Modra Date: Tue, 29 Aug 2017 15:55:33 +0930 Subject: [PATCH 1/1] [GOLD] PowerPC tls_get_addr_optimize This implements the special __tls_get_addr_opt call stub for powerpc gold that returns __thread variable addresses without actually making a call to __tls_get_addr in most cases. Shared libraries that are loaded at program load time (ie. dlopen is not used) have a known layout for their __thread variables, and thus DTPMOD64/DPTREL64 pairs describing those variables can be set up by ld.so for the __tls_get_addr_opt call stub fast exit. Ref https://sourceware.org/ml/libc-alpha/2015-03/msg00626.html I really, really wish I'd used a differently versioned __tls_get_addr symbol than the base symbol to indicate glibc support for the optimized call, rather than having glibc export __tls_get_addr_opt. A lot of the messing around here, flipping symbols from __tls_get_addr to __tls_get_addr_opt, is caused by that decision. About the only benefit is that a user can see at a glance that their disassembled code is calling __tls_get_addr via the fancy call stub.. Anyway, we need references to __tls_get_addr to seem like they were to __tls_get_addr_opt, and in cases like the tsan interceptor, a definition of __tls_get_addr to seem like one of __tls_get_addr_opt as well. That's the reason for Symbol::clear_in_reg and Symbol_table::clone, and why symbols are substituted in Scan::global and other places dealing with dynamic linking. elfcpp/ * elfcpp.h (DT_PPC_OPT): Define. * powerpc.h (PPC_OPT_TLS): Define. gold/ * options.h (tls_get_addr_optimize): New option. * symtab.h (Symbol::clear_in_reg, clone): New functions. (Sized_symbol::clone): New function. (Symbol_table::clone): New function. * resolve.cc (Symbol::clone, Sized_symbol::clone): New functions. * powerpc.cc (Target_powerpc::has_tls_get_addr_opt_, tls_get_addr_, tls_get_addr_opt_): New vars. (Target_powerpc::tls_get_addr_opt, tls_get_addr, is_tls_get_addr_opt, replace_tls_get_addr, set_has_tls_get_addr_opt, stk_linker): New functions. (Target_powerpc::Track_tls::maybe_skip_tls_get_addr_call): Add target param. Update callers. Compare symbols rather than names. (Target_powerpc::do_define_standard_symbols): Init tls_get_addr_ and tls_get_addr_opt_. (Target_powerpc::Branch_info::mark_pltcall): Translate tls_get_addr sym to tls_get_addr_opt. (Target_powerpc::Branch_info::make_stub): Likewise. (Stub_table::define_stub_syms): Likewise. (Target_powerpc::Scan::global): Likewise. (Target_powerpc::Relocate::relocate): Likewise. (add_3_12_2, add_3_12_13, bctrl, beqlr, cmpdi_11_0, cmpwi_11_0, ld_11_1, ld_11_3, ld_12_3, lwz_11_3, lwz_12_3, mr_0_3, mr_3_0, mtlr_11, std_11_1): New constants. (Stub_table::eh_frame_added_): Delete. (Stub_table::tls_get_addr_opt_bctrl_, plt_fde_len_, plt_fde_): New vars. (Stub_table::init_plt_fde): New functions. (Stub_table::add_eh_frame, replace_eh_frame): Move definition out of line. Init and use plt_fde_. (Stub_table::plt_call_size): Return size for tls_get_addr stub. Extract alignment code to.. (Stub_table::plt_call_align): ..this new function. Adjust all callers. (Stub_table::add_plt_call_entry): Set has_tls_get_addr_opt and tls_get_addr_opt_bctrl, and align after that. (Stub_table::do_write): Write out tls_get_addr stub. (Target_powerpc::do_finalize_sections): Emit DT_PPC_OPT PPC_OPT_TLS/PPC64_OPT_TLS bit. (Target_powerpc::Relocate::relocate): Don't check for or modify nop following bl for tls_get_addr stub. --- elfcpp/ChangeLog | 5 + elfcpp/elfcpp.h | 3 + elfcpp/powerpc.h | 6 + gold/ChangeLog | 41 ++++++ gold/options.h | 3 + gold/powerpc.cc | 363 +++++++++++++++++++++++++++++++++++++++-------- gold/resolve.cc | 62 ++++++++ gold/symtab.h | 28 ++++ 8 files changed, 454 insertions(+), 57 deletions(-) diff --git a/elfcpp/ChangeLog b/elfcpp/ChangeLog index f669248626..200d0d4ee4 100644 --- a/elfcpp/ChangeLog +++ b/elfcpp/ChangeLog @@ -1,3 +1,8 @@ +2017-08-29 Alan Modra + + * elfcpp.h (DT_PPC_OPT): Define. + * powerpc.h (PPC_OPT_TLS): Define. + 2017-07-28 H.J. Lu PR gold/21857 diff --git a/elfcpp/elfcpp.h b/elfcpp/elfcpp.h index cccec4c256..1f629ced05 100644 --- a/elfcpp/elfcpp.h +++ b/elfcpp/elfcpp.h @@ -768,6 +768,9 @@ enum DT // Specify the value of _GLOBAL_OFFSET_TABLE_. DT_PPC_GOT = 0x70000000, + // Specify whether various optimisations are possible. + DT_PPC_OPT = 0x70000001, + // Specify the start of the .glink section. DT_PPC64_GLINK = 0x70000000, diff --git a/elfcpp/powerpc.h b/elfcpp/powerpc.h index 3dc0828677..71cac5e410 100644 --- a/elfcpp/powerpc.h +++ b/elfcpp/powerpc.h @@ -228,6 +228,12 @@ enum EF_PPC64_ABI = 3 }; +// DT_PPC_OPT bits +enum +{ + PPC_OPT_TLS = 1 +}; + // DT_PPC64_OPT bits enum { diff --git a/gold/ChangeLog b/gold/ChangeLog index 04b68f846a..9359493cd2 100644 --- a/gold/ChangeLog +++ b/gold/ChangeLog @@ -1,3 +1,44 @@ +2017-08-29 Alan Modra + + * options.h (tls_get_addr_optimize): New option. + * symtab.h (Symbol::clear_in_reg, clone): New functions. + (Sized_symbol::clone): New function. + (Symbol_table::clone): New function. + * resolve.cc (Symbol::clone, Sized_symbol::clone): New functions. + * powerpc.cc (Target_powerpc::has_tls_get_addr_opt_, + tls_get_addr_, tls_get_addr_opt_): New vars. + (Target_powerpc::tls_get_addr_opt, tls_get_addr, + is_tls_get_addr_opt, replace_tls_get_addr, + set_has_tls_get_addr_opt, stk_linker): New functions. + (Target_powerpc::Track_tls::maybe_skip_tls_get_addr_call): Add + target param. Update callers. Compare symbols rather than names. + (Target_powerpc::do_define_standard_symbols): Init tls_get_addr_ + and tls_get_addr_opt_. + (Target_powerpc::Branch_info::mark_pltcall): Translate tls_get_addr + sym to tls_get_addr_opt. + (Target_powerpc::Branch_info::make_stub): Likewise. + (Stub_table::define_stub_syms): Likewise. + (Target_powerpc::Scan::global): Likewise. + (Target_powerpc::Relocate::relocate): Likewise. + (add_3_12_2, add_3_12_13, bctrl, beqlr, cmpdi_11_0, cmpwi_11_0, + ld_11_1, ld_11_3, ld_12_3, lwz_11_3, lwz_12_3, mr_0_3, mr_3_0, + mtlr_11, std_11_1): New constants. + (Stub_table::eh_frame_added_): Delete. + (Stub_table::tls_get_addr_opt_bctrl_, plt_fde_len_, plt_fde_): New vars. + (Stub_table::init_plt_fde): New functions. + (Stub_table::add_eh_frame, replace_eh_frame): Move definition out + of line. Init and use plt_fde_. + (Stub_table::plt_call_size): Return size for tls_get_addr stub. + Extract alignment code to.. + (Stub_table::plt_call_align): ..this new function. Adjust all callers. + (Stub_table::add_plt_call_entry): Set has_tls_get_addr_opt and + tls_get_addr_opt_bctrl, and align after that. + (Stub_table::do_write): Write out tls_get_addr stub. + (Target_powerpc::do_finalize_sections): Emit DT_PPC_OPT + PPC_OPT_TLS/PPC64_OPT_TLS bit. + (Target_powerpc::Relocate::relocate): Don't check for or modify + nop following bl for tls_get_addr stub. + 2017-08-29 Alan Modra * symtab.h (Symbol): Split u_ into u1_ and u2_. Adjust accessors diff --git a/gold/options.h b/gold/options.h index 970e76f4d5..6d10b416b0 100644 --- a/gold/options.h +++ b/gold/options.h @@ -1294,6 +1294,9 @@ class General_options DEFINE_bool(tls_optimize, options::TWO_DASHES, '\0', true, N_("(PowerPC/64 only) Optimize GD/LD/IE code to IE/LE"), N_("(PowerPC/64 only) Don'\''t try to optimize TLS accesses")); + DEFINE_bool(tls_get_addr_optimize, options::TWO_DASHES, '\0', true, + N_("(PowerPC/64 only) Use a special __tls_get_addr call"), + N_("(PowerPC/64 only) Don't use a special __tls_get_addr call")); DEFINE_bool(toc_optimize, options::TWO_DASHES, '\0', true, N_("(PowerPC64 only) Optimize TOC code sequences"), diff --git a/gold/powerpc.cc b/gold/powerpc.cc index c29850b617..7f3f025d83 100644 --- a/gold/powerpc.cc +++ b/gold/powerpc.cc @@ -613,8 +613,10 @@ class Target_powerpc : public Sized_target stub_tables_(), branch_lookup_table_(), branch_info_(), tocsave_loc_(), plt_thread_safe_(false), plt_localentry0_(false), plt_localentry0_init_(false), has_localentry0_(false), + has_tls_get_addr_opt_(false), relax_failed_(false), relax_fail_count_(0), - stub_group_size_(0), savres_section_(0) + stub_group_size_(0), savres_section_(0), + tls_get_addr_(NULL), tls_get_addr_opt_(NULL) { } @@ -1081,11 +1083,43 @@ class Target_powerpc : public Sized_target this->set_processor_specific_flags(flags); } + Symbol* + tls_get_addr_opt() const + { return this->tls_get_addr_opt_; } + + Symbol* + tls_get_addr() const + { return this->tls_get_addr_; } + + // If optimizing __tls_get_addr calls, whether this is the + // "__tls_get_addr" symbol. + bool + is_tls_get_addr_opt(const Symbol* gsym) const + { + return this->tls_get_addr_opt_ && (gsym == this->tls_get_addr_ + || gsym == this->tls_get_addr_opt_); + } + + bool + replace_tls_get_addr(const Symbol* gsym) const + { return this->tls_get_addr_opt_ && gsym == this->tls_get_addr_; } + + void + set_has_tls_get_addr_opt() + { this->has_tls_get_addr_opt_ = true; } + // Offset to toc save stack slot int stk_toc() const { return this->abiversion() < 2 ? 40 : 24; } + // Offset to linker save stack slot. ELFv2 doesn't have a linker word, + // so use the CR save slot. Used only by __tls_get_addr call stub, + // relying on __tls_get_addr not saving CR itself. + int + stk_linker() const + { return this->abiversion() < 2 ? 32 : 8; } + private: class Track_tls @@ -1139,12 +1173,14 @@ class Target_powerpc : public Sized_target {this->tls_get_addr_state_ = SKIP; } Tls_get_addr - maybe_skip_tls_get_addr_call(unsigned int r_type, const Symbol* gsym) + maybe_skip_tls_get_addr_call(Target_powerpc* target, + unsigned int r_type, const Symbol* gsym) { bool is_tls_call = ((r_type == elfcpp::R_POWERPC_REL24 || r_type == elfcpp::R_PPC_PLTREL24) && gsym != NULL - && strcmp(gsym->name(), "__tls_get_addr") == 0); + && (gsym == target->tls_get_addr() + || gsym == target->tls_get_addr_opt())); Tls_get_addr last_tls = this->tls_get_addr_state_; this->tls_get_addr_state_ = NOT_EXPECTED; if (is_tls_call && last_tls != EXPECTED) @@ -1549,12 +1585,18 @@ class Target_powerpc : public Sized_target bool plt_localentry0_; bool plt_localentry0_init_; bool has_localentry0_; + bool has_tls_get_addr_opt_; bool relax_failed_; int relax_fail_count_; int32_t stub_group_size_; Output_data_save_res *savres_section_; + + // The "__tls_get_addr" symbol, if present + Symbol* tls_get_addr_; + // If optimizing __tls_get_addr calls, the "__tls_get_addr_opt" symbol. + Symbol* tls_get_addr_opt_; }; template<> @@ -2459,6 +2501,36 @@ Target_powerpc::do_define_standard_symbols( false, false); } } + + this->tls_get_addr_ = symtab->lookup("__tls_get_addr"); + if (parameters->options().tls_get_addr_optimize() + && this->tls_get_addr_ != NULL + && this->tls_get_addr_->in_reg()) + this->tls_get_addr_opt_ = symtab->lookup("__tls_get_addr_opt"); + if (this->tls_get_addr_opt_ != NULL) + { + if (this->tls_get_addr_->is_undefined() + || this->tls_get_addr_->is_from_dynobj()) + { + // Make it seem as if references to __tls_get_addr are + // really to __tls_get_addr_opt, so the latter symbol is + // made dynamic, not the former. + this->tls_get_addr_->clear_in_reg(); + this->tls_get_addr_opt_->set_in_reg(); + } + // We have a non-dynamic definition for __tls_get_addr. + // Make __tls_get_addr_opt the same, if it does not already have + // a non-dynamic definition. + else if (this->tls_get_addr_opt_->is_undefined() + || this->tls_get_addr_opt_->is_from_dynobj()) + { + Sized_symbol* from + = static_cast*>(this->tls_get_addr_); + Sized_symbol* to + = static_cast*>(this->tls_get_addr_opt_); + symtab->clone(to, from); + } + } } // Set up PowerPC target specific relobj. @@ -3030,6 +3102,8 @@ Target_powerpc::Branch_info::mark_pltcall( return false; Symbol* sym = this->object_->global_symbol(this->r_sym_); + if (target->replace_tls_get_addr(sym)) + sym = target->tls_get_addr_opt(); if (sym != NULL && sym->is_forwarder()) sym = symtab->resolve_forwards(sym); const Sized_symbol* gsym = static_cast*>(sym); @@ -3055,12 +3129,14 @@ Target_powerpc::Branch_info::make_stub( Symbol_table* symtab) const { Symbol* sym = this->object_->global_symbol(this->r_sym_); - if (sym != NULL && sym->is_forwarder()) - sym = symtab->resolve_forwards(sym); - const Sized_symbol* gsym = static_cast*>(sym); Target_powerpc* target = static_cast*>( parameters->sized_target()); + if (target->replace_tls_get_addr(sym)) + sym = target->tls_get_addr_opt(); + if (sym != NULL && sym->is_forwarder()) + sym = symtab->resolve_forwards(sym); + const Sized_symbol* gsym = static_cast*>(sym); bool ok = true; if (gsym != NULL @@ -3675,6 +3751,8 @@ static const uint32_t add_2_2_11 = 0x7c425a14; static const uint32_t add_2_2_12 = 0x7c426214; static const uint32_t add_3_3_2 = 0x7c631214; static const uint32_t add_3_3_13 = 0x7c636a14; +static const uint32_t add_3_12_2 = 0x7c6c1214; +static const uint32_t add_3_12_13 = 0x7c6c6a14; static const uint32_t add_11_0_11 = 0x7d605a14; static const uint32_t add_11_2_11 = 0x7d625a14; static const uint32_t add_11_11_2 = 0x7d6b1214; @@ -3696,10 +3774,14 @@ static const uint32_t addis_12_12 = 0x3d8c0000; static const uint32_t b = 0x48000000; static const uint32_t bcl_20_31 = 0x429f0005; static const uint32_t bctr = 0x4e800420; +static const uint32_t bctrl = 0x4e800421; +static const uint32_t beqlr = 0x4d820020; static const uint32_t blr = 0x4e800020; static const uint32_t bnectr_p4 = 0x4ce20420; static const uint32_t cmpld_7_12_0 = 0x7fac0040; static const uint32_t cmpldi_2_0 = 0x28220000; +static const uint32_t cmpdi_11_0 = 0x2c2b0000; +static const uint32_t cmpwi_11_0 = 0x2c0b0000; static const uint32_t cror_15_15_15 = 0x4def7b82; static const uint32_t cror_31_31_31 = 0x4ffffb82; static const uint32_t ld_0_1 = 0xe8010000; @@ -3708,9 +3790,12 @@ static const uint32_t ld_2_1 = 0xe8410000; static const uint32_t ld_2_2 = 0xe8420000; static const uint32_t ld_2_11 = 0xe84b0000; static const uint32_t ld_2_12 = 0xe84c0000; +static const uint32_t ld_11_1 = 0xe9610000; static const uint32_t ld_11_2 = 0xe9620000; +static const uint32_t ld_11_3 = 0xe9630000; static const uint32_t ld_11_11 = 0xe96b0000; static const uint32_t ld_12_2 = 0xe9820000; +static const uint32_t ld_12_3 = 0xe9830000; static const uint32_t ld_12_11 = 0xe98b0000; static const uint32_t ld_12_12 = 0xe98c0000; static const uint32_t lfd_0_1 = 0xc8010000; @@ -3722,17 +3807,22 @@ static const uint32_t lis_11 = 0x3d600000; static const uint32_t lis_12 = 0x3d800000; static const uint32_t lvx_0_12_0 = 0x7c0c00ce; static const uint32_t lwz_0_12 = 0x800c0000; +static const uint32_t lwz_11_3 = 0x81630000; static const uint32_t lwz_11_11 = 0x816b0000; static const uint32_t lwz_11_30 = 0x817e0000; +static const uint32_t lwz_12_3 = 0x81830000; static const uint32_t lwz_12_12 = 0x818c0000; static const uint32_t lwzu_0_12 = 0x840c0000; static const uint32_t mflr_0 = 0x7c0802a6; static const uint32_t mflr_11 = 0x7d6802a6; static const uint32_t mflr_12 = 0x7d8802a6; +static const uint32_t mr_0_3 = 0x7c601b78; +static const uint32_t mr_3_0 = 0x7c030378; static const uint32_t mtctr_0 = 0x7c0903a6; static const uint32_t mtctr_11 = 0x7d6903a6; static const uint32_t mtctr_12 = 0x7d8903a6; static const uint32_t mtlr_0 = 0x7c0803a6; +static const uint32_t mtlr_11 = 0x7d6803a6; static const uint32_t mtlr_12 = 0x7d8803a6; static const uint32_t nop = 0x60000000; static const uint32_t ori_0_0_0 = 0x60000000; @@ -3740,6 +3830,7 @@ static const uint32_t srdi_0_0_2 = 0x7800f082; static const uint32_t std_0_1 = 0xf8010000; static const uint32_t std_0_12 = 0xf80c0000; static const uint32_t std_2_1 = 0xf8410000; +static const uint32_t std_11_1 = 0xf9610000; static const uint32_t stfd_0_1 = 0xd8010000; static const uint32_t stvx_0_12_0 = 0x7c0c01ce; static const uint32_t sub_11_11_12 = 0x7d6c5850; @@ -4102,7 +4193,8 @@ class Stub_table : public Output_relaxed_input_section orig_data_size_(owner->current_data_size()), plt_size_(0), last_plt_size_(0), branch_size_(0), last_branch_size_(0), min_size_threshold_(0), - eh_frame_added_(false), need_save_res_(false), uniq_(id) + need_save_res_(false), uniq_(id), tls_get_addr_opt_bctrl_(-1u), + plt_fde_len_(0) { this->set_output_section(output_section); @@ -4263,48 +4355,17 @@ class Stub_table : public Output_relaxed_input_section return false; } - // Add .eh_frame info for this stub section. Unlike other linker - // generated .eh_frame this is added late in the link, because we - // only want the .eh_frame info if this particular stub section is - // non-empty. + // Generate a suitable FDE to describe code in this stub group. void - add_eh_frame(Layout* layout) - { - if (!parameters->options().ld_generated_unwind_info()) - return; + init_plt_fde(); - // Since we add stub .eh_frame info late, it must be placed - // after all other linker generated .eh_frame info so that - // merge mapping need not be updated for input sections. - // There is no provision to use a different CIE to that used - // by .glink. - if (!this->targ_->has_glink()) - return; - - if (this->plt_size_ + this->branch_size_ + this->need_save_res_ == 0) - return; - - layout->add_eh_frame_for_plt(this, - Eh_cie::eh_frame_cie, - sizeof (Eh_cie::eh_frame_cie), - default_fde, - sizeof (default_fde)); - this->eh_frame_added_ = true; - } + // Add .eh_frame info for this stub section. + void + add_eh_frame(Layout* layout); + // Remove .eh_frame info for this stub section. void - remove_eh_frame(Layout* layout) - { - if (this->eh_frame_added_) - { - layout->remove_eh_frame_for_plt(this, - Eh_cie::eh_frame_cie, - sizeof (Eh_cie::eh_frame_cie), - default_fde, - sizeof (default_fde)); - this->eh_frame_added_ = false; - } - } + remove_eh_frame(Layout* layout); Target_powerpc* targ() const @@ -4356,7 +4417,12 @@ class Stub_table : public Output_relaxed_input_section plt_call_size(typename Plt_stub_entries::const_iterator p) const { if (size == 32) - return 16; + { + const Symbol* gsym = p->first.sym_; + if (this->targ_->is_tls_get_addr_opt(gsym)) + return 12 * 4; + return 4 * 4; + } bool is_iplt; Address plt_addr = this->plt_off(p, &is_iplt); @@ -4370,6 +4436,9 @@ class Stub_table : public Output_relaxed_input_section got_addr += ppcobj->toc_base_offset(); Address off = plt_addr - got_addr; unsigned int bytes = 4 * 4 + 4 * (ha(off) != 0); + const Symbol* gsym = p->first.sym_; + if (this->targ_->is_tls_get_addr_opt(gsym)) + bytes += 13 * 4; if (this->targ_->abiversion() < 2) { bool static_chain = parameters->options().plt_static_chain(); @@ -4379,6 +4448,12 @@ class Stub_table : public Output_relaxed_input_section + 8 * thread_safe + 4 * (ha(off + 8 + 8 * static_chain) != ha(off))); } + return bytes; + } + + unsigned int + plt_call_align(unsigned int bytes) const + { unsigned int align = 1 << parameters->options().plt_align(); if (align > 1) bytes = (bytes + align - 1) & -align; @@ -4518,13 +4593,16 @@ class Stub_table : public Output_relaxed_input_section // a stub table, it is zero for the first few iterations, then // increases monotonically. Address min_size_threshold_; - // Whether .eh_frame info has been created for this stub section. - bool eh_frame_added_; // Set if this stub group needs a copy of out-of-line register // save/restore functions. bool need_save_res_; // Per stub table unique identifier. uint32_t uniq_; + // The bctrl in the __tls_get_addr_opt stub, if present. + unsigned int tls_get_addr_opt_bctrl_; + // FDE unwind info for this stub group. + unsigned int plt_fde_len_; + unsigned char plt_fde_[20]; }; // Add a plt call stub, if we do not already have one for this @@ -4553,6 +4631,12 @@ Stub_table::add_plt_call_entry( p.first->second.localentry0_ = 1; this->targ_->set_has_localentry0(); } + if (this->targ_->is_tls_get_addr_opt(gsym)) + { + this->targ_->set_has_tls_get_addr_opt(); + this->tls_get_addr_opt_bctrl_ = this->plt_size_ - 5 * 4; + } + this->plt_size_ = this->plt_call_align(this->plt_size_); } if (size == 64 && !tocsave @@ -4578,6 +4662,7 @@ Stub_table::add_plt_call_entry( if (p.second) { this->plt_size_ = ent.off_ + this->plt_call_size(p.first); + this->plt_size_ = this->plt_call_align(this->plt_size_); if (size == 64 && this->targ_->is_elfv2_localentry0(object, locsym_index)) { @@ -4697,6 +4782,94 @@ Stub_table::find_long_branch_entry( return p->second; } +// Generate a suitable FDE to describe code in this stub group. +// The __tls_get_addr_opt call stub needs to describe where it saves +// LR, to support exceptions that might be thrown from __tls_get_addr. + +template +void +Stub_table::init_plt_fde() +{ + unsigned char* p = this->plt_fde_; + // offset pcrel sdata4, size udata4, and augmentation size byte. + memset (p, 0, 9); + p += 9; + if (this->tls_get_addr_opt_bctrl_ != -1u) + { + unsigned int to_bctrl = this->tls_get_addr_opt_bctrl_ / 4; + if (to_bctrl < 64) + *p++ = elfcpp::DW_CFA_advance_loc + to_bctrl; + else if (to_bctrl < 256) + { + *p++ = elfcpp::DW_CFA_advance_loc1; + *p++ = to_bctrl; + } + else if (to_bctrl < 65536) + { + *p++ = elfcpp::DW_CFA_advance_loc2; + elfcpp::Swap<16, big_endian>::writeval(p, to_bctrl); + p += 2; + } + else + { + *p++ = elfcpp::DW_CFA_advance_loc4; + elfcpp::Swap<32, big_endian>::writeval(p, to_bctrl); + p += 4; + } + *p++ = elfcpp::DW_CFA_offset_extended_sf; + *p++ = 65; + *p++ = -(this->targ_->stk_linker() / 8) & 0x7f; + *p++ = elfcpp::DW_CFA_advance_loc + 4; + *p++ = elfcpp::DW_CFA_restore_extended; + *p++ = 65; + } + this->plt_fde_len_ = p - this->plt_fde_; +} + +// Add .eh_frame info for this stub section. Unlike other linker +// generated .eh_frame this is added late in the link, because we +// only want the .eh_frame info if this particular stub section is +// non-empty. + +template +void +Stub_table::add_eh_frame(Layout* layout) +{ + if (!parameters->options().ld_generated_unwind_info()) + return; + + // Since we add stub .eh_frame info late, it must be placed + // after all other linker generated .eh_frame info so that + // merge mapping need not be updated for input sections. + // There is no provision to use a different CIE to that used + // by .glink. + if (!this->targ_->has_glink()) + return; + + if (this->plt_size_ + this->branch_size_ + this->need_save_res_ == 0) + return; + + this->init_plt_fde(); + layout->add_eh_frame_for_plt(this, + Eh_cie::eh_frame_cie, + sizeof (Eh_cie::eh_frame_cie), + this->plt_fde_, this->plt_fde_len_); +} + +template +void +Stub_table::remove_eh_frame(Layout* layout) +{ + if (this->plt_fde_len_ != 0) + { + layout->remove_eh_frame_for_plt(this, + Eh_cie::eh_frame_cie, + sizeof (Eh_cie::eh_frame_cie), + this->plt_fde_, this->plt_fde_len_); + this->plt_fde_len_ = 0; + } +} + // A class to handle .glink. template @@ -4896,13 +5069,15 @@ Stub_table::define_stub_syms(Symbol_table* symtab) sprintf(localname, "%x", cs->first.locsym_); symname = localname; } + else if (this->targ_->is_tls_get_addr_opt(cs->first.sym_)) + symname = this->targ_->tls_get_addr_opt()->name(); else symname = cs->first.sym_->name(); char* name = new char[8 + 10 + strlen(obj) + strlen(symname) + strlen(add) + 1]; sprintf(name, "%08x.plt_call.%s%s%s", this->uniq_, obj, symname, add); Address value = this->stub_address() - this->address() + cs->second.off_; - unsigned int stub_size = this->plt_call_size(cs); + unsigned int stub_size = this->plt_call_align(this->plt_call_size(cs)); this->targ_->define_local(symtab, name, this, value, stub_size); } } @@ -5011,6 +5186,33 @@ Stub_table::do_write(Output_file* of) } p = oview + cs->second.off_; + const Symbol* gsym = cs->first.sym_; + if (this->targ_->is_tls_get_addr_opt(gsym)) + { + write_insn(p, ld_11_3 + 0); + p += 4; + write_insn(p, ld_12_3 + 8); + p += 4; + write_insn(p, mr_0_3); + p += 4; + write_insn(p, cmpdi_11_0); + p += 4; + write_insn(p, add_3_12_13); + p += 4; + write_insn(p, beqlr); + p += 4; + write_insn(p, mr_3_0); + p += 4; + if (!cs->second.localentry0_) + { + write_insn(p, mflr_11); + p += 4; + write_insn(p, (std_11_1 + + this->targ_->stk_linker())); + p += 4; + } + use_fake_dep = thread_safe; + } if (ha(off) != 0) { if (cs->second.r2save_) @@ -5097,7 +5299,20 @@ Stub_table::do_write(Output_file* of) p += 4; } } - if (thread_safe && !use_fake_dep) + if (!cs->second.localentry0_ + && this->targ_->is_tls_get_addr_opt(gsym)) + { + write_insn(p, bctrl); + p += 4; + write_insn(p, ld_2_1 + this->targ_->stk_toc()); + p += 4; + write_insn(p, ld_11_1 + this->targ_->stk_linker()); + p += 4; + write_insn(p, mtlr_11); + p += 4; + write_insn(p, blr); + } + else if (thread_safe && !use_fake_dep) { write_insn(p, cmpldi_2_0); p += 4; @@ -5173,6 +5388,26 @@ Stub_table::do_write(Output_file* of) plt_addr += plt_base; p = oview + cs->second.off_; + const Symbol* gsym = cs->first.sym_; + if (this->targ_->is_tls_get_addr_opt(gsym)) + { + write_insn(p, lwz_11_3 + 0); + p += 4; + write_insn(p, lwz_12_3 + 4); + p += 4; + write_insn(p, mr_0_3); + p += 4; + write_insn(p, cmpwi_11_0); + p += 4; + write_insn(p, add_3_12_2); + p += 4; + write_insn(p, beqlr); + p += 4; + write_insn(p, mr_3_0); + p += 4; + write_insn(p, nop); + p += 4; + } if (parameters->options().output_is_position_independent()) { Address got_addr; @@ -6225,7 +6460,7 @@ Target_powerpc::Scan::local( const elfcpp::Sym& lsym, bool is_discarded) { - this->maybe_skip_tls_get_addr_call(r_type, NULL); + this->maybe_skip_tls_get_addr_call(target, r_type, NULL); if ((size == 64 && r_type == elfcpp::R_PPC64_TLSGD) || (size == 32 && r_type == elfcpp::R_PPC_TLSGD)) @@ -6772,9 +7007,15 @@ Target_powerpc::Scan::global( unsigned int r_type, Symbol* gsym) { - if (this->maybe_skip_tls_get_addr_call(r_type, gsym) == Track_tls::SKIP) + if (this->maybe_skip_tls_get_addr_call(target, r_type, gsym) + == Track_tls::SKIP) return; + if (target->replace_tls_get_addr(gsym)) + // Change a __tls_get_addr reference to __tls_get_addr_opt + // so dynamic relocs are emitted against the latter symbol. + gsym = target->tls_get_addr_opt(); + if ((size == 64 && r_type == elfcpp::R_PPC64_TLSGD) || (size == 32 && r_type == elfcpp::R_PPC_TLSGD)) { @@ -7904,6 +8145,8 @@ Target_powerpc::do_finalize_sections( odyn->add_section_plus_offset(elfcpp::DT_PPC_GOT, this->got_, this->got_->g_o_t()); } + if (this->has_tls_get_addr_opt_) + odyn->add_constant(elfcpp::DT_PPC_OPT, elfcpp::PPC_OPT_TLS); } else { @@ -7915,9 +8158,12 @@ Target_powerpc::do_finalize_sections( (this->glink_->pltresolve_size - 32)); } - if (this->has_localentry0_) + if (this->has_localentry0_ || this->has_tls_get_addr_opt_) odyn->add_constant(elfcpp::DT_PPC64_OPT, - elfcpp::PPC64_OPT_LOCALENTRY); + ((this->has_localentry0_ + ? elfcpp::PPC64_OPT_LOCALENTRY : 0) + | (this->has_tls_get_addr_opt_ + ? elfcpp::PPC64_OPT_TLS : 0))); } } @@ -8035,9 +8281,12 @@ Target_powerpc::Relocate::relocate( if (view == NULL) return true; + if (target->replace_tls_get_addr(gsym)) + gsym = static_cast*>(target->tls_get_addr_opt()); + const elfcpp::Rela rela(preloc); unsigned int r_type = elfcpp::elf_r_type(rela.get_r_info()); - switch (this->maybe_skip_tls_get_addr_call(r_type, gsym)) + switch (this->maybe_skip_tls_get_addr_call(target, r_type, gsym)) { case Track_tls::NOT_EXPECTED: gold_error_at_location(relinfo, relnum, rela.get_r_offset(), @@ -8162,8 +8411,8 @@ Target_powerpc::Relocate::relocate( { typedef typename elfcpp::Swap<32, big_endian>::Valtype Valtype; Valtype* wv = reinterpret_cast(view); - bool can_plt_call = localentry0; - if (!localentry0 && rela.get_r_offset() + 8 <= view_size) + bool can_plt_call = localentry0 || target->is_tls_get_addr_opt(gsym); + if (!can_plt_call && rela.get_r_offset() + 8 <= view_size) { Valtype insn = elfcpp::Swap<32, big_endian>::readval(wv); Valtype insn2 = elfcpp::Swap<32, big_endian>::readval(wv + 1); diff --git a/gold/resolve.cc b/gold/resolve.cc index 042d03254d..b85bbc810d 100644 --- a/gold/resolve.cc +++ b/gold/resolve.cc @@ -917,6 +917,61 @@ Symbol_table::report_resolve_problem(bool is_error, const char* msg, gold_info("%s: %s: previous definition here", program_name, objname); } +// Completely override existing symbol. Everything bar name_, +// version_, and is_forced_local_ flag are copied. version_ is +// cleared if from->version_ is clear. Returns true if this symbol +// should be forced local. +bool +Symbol::clone(const Symbol* from) +{ + // Don't allow cloning after dynamic linking info is attached to symbols. + // We aren't prepared to merge such. + gold_assert(!this->has_symtab_index() && !from->has_symtab_index()); + gold_assert(!this->has_dynsym_index() && !from->has_dynsym_index()); + gold_assert(this->got_offset_list()->get_list() == NULL + && from->got_offset_list()->get_list() == NULL); + gold_assert(!this->has_plt_offset() && !from->has_plt_offset()); + + if (!from->version_) + this->version_ = from->version_; + this->u1_ = from->u1_; + this->u2_ = from->u2_; + this->type_ = from->type_; + this->binding_ = from->binding_; + this->visibility_ = from->visibility_; + this->nonvis_ = from->nonvis_; + this->source_ = from->source_; + this->is_def_ = from->is_def_; + this->is_forwarder_ = from->is_forwarder_; + this->has_alias_ = from->has_alias_; + this->needs_dynsym_entry_ = from->needs_dynsym_entry_; + this->in_reg_ = from->in_reg_; + this->in_dyn_ = from->in_dyn_; + this->needs_dynsym_value_ = from->needs_dynsym_value_; + this->has_warning_ = from->has_warning_; + this->is_copied_from_dynobj_ = from->is_copied_from_dynobj_; + this->is_ordinary_shndx_ = from->is_ordinary_shndx_; + this->in_real_elf_ = from->in_real_elf_; + this->is_defined_in_discarded_section_ + = from->is_defined_in_discarded_section_; + this->undef_binding_set_ = from->undef_binding_set_; + this->undef_binding_weak_ = from->undef_binding_weak_; + this->is_predefined_ = from->is_predefined_; + this->is_protected_ = from->is_protected_; + this->non_zero_localentry_ = from->non_zero_localentry_; + + return !this->is_forced_local_ && from->is_forced_local_; +} + +template +bool +Sized_symbol::clone(const Sized_symbol* from) +{ + this->value_ = from->value_; + this->symsize_ = from->symsize_; + return Symbol::clone(from); +} + // A special case of should_override which is only called for a strong // defined symbol from a regular object file. This is used when // defining special symbols. @@ -1116,4 +1171,11 @@ Symbol_table::override_with_special<64>(Sized_symbol<64>*, const Sized_symbol<64>*); #endif +template +bool +Sized_symbol<32>::clone(const Sized_symbol<32>*); + +template +bool +Sized_symbol<64>::clone(const Sized_symbol<64>*); } // End namespace gold. diff --git a/gold/symtab.h b/gold/symtab.h index 88d6c2782b..a67d5eb90d 100644 --- a/gold/symtab.h +++ b/gold/symtab.h @@ -329,6 +329,11 @@ class Symbol set_in_reg() { this->in_reg_ = true; } + // Forget this symbol was seen in a regular object. + void + clear_in_reg() + { this->in_reg_ = false; } + // Return whether this symbol has been seen in a dynamic object. bool in_dyn() const @@ -893,6 +898,13 @@ class Symbol set_non_zero_localentry() { this->non_zero_localentry_ = true; } + // Completely override existing symbol. Everything bar name_, + // version_, and is_forced_local_ flag are copied. version_ is + // cleared if from->version_ is clear. Returns true if this symbol + // should be forced local. + bool + clone(const Symbol* from); + protected: // Instances of this class should always be created at a specific // size. @@ -1182,6 +1194,13 @@ class Sized_symbol : public Symbol void allocate_common(Output_data*, Value_type value); + // Completely override existing symbol. Everything bar name_, + // version_, and is_forced_local_ flag are copied. version_ is + // cleared if from->version_ is clear. Returns true if this symbol + // should be forced local. + bool + clone(const Sized_symbol* from); + private: Sized_symbol(const Sized_symbol&); Sized_symbol& operator=(const Sized_symbol&); @@ -1687,6 +1706,15 @@ class Symbol_table version_script() const { return version_script_; } + // Completely override existing symbol. + template + void + clone(Sized_symbol* to, const Sized_symbol* from) + { + if (to->clone(from)) + this->force_local(to); + } + private: Symbol_table(const Symbol_table&); Symbol_table& operator=(const Symbol_table&); -- 2.34.1