+ if (input_section->sec_info_type == SEC_INFO_TYPE_TARGET
+ && input_section->size != input_section->rawsize
+ && (strcmp (input_section->output_section->name, ".init") == 0
+ || strcmp (input_section->output_section->name, ".fini") == 0))
+ {
+ /* Branch around the trampolines. */
+ unsigned int insn = B + input_section->size - input_section->rawsize;
+ bfd_put_32 (input_bfd, insn, contents + input_section->rawsize);
+ }
+
+ if (htab->params->ppc476_workaround
+ && input_section->sec_info_type == SEC_INFO_TYPE_TARGET
+ && (!bfd_link_relocatable (info)
+ || (input_section->output_section->alignment_power
+ >= htab->params->pagesize_p2)))
+ {
+ bfd_vma start_addr, end_addr, addr;
+ bfd_vma pagesize = (bfd_vma) 1 << htab->params->pagesize_p2;
+
+ if (relax_info->workaround_size != 0)
+ {
+ bfd_byte *p;
+ unsigned int n;
+ bfd_byte fill[4];
+
+ bfd_put_32 (input_bfd, BA, fill);
+ p = contents + input_section->size - relax_info->workaround_size;
+ n = relax_info->workaround_size >> 2;
+ while (n--)
+ {
+ memcpy (p, fill, 4);
+ p += 4;
+ }
+ }
+
+ /* The idea is: Replace the last instruction on a page with a
+ branch to a patch area. Put the insn there followed by a
+ branch back to the next page. Complicated a little by
+ needing to handle moved conditional branches, and by not
+ wanting to touch data-in-text. */
+
+ start_addr = (input_section->output_section->vma
+ + input_section->output_offset);
+ end_addr = (start_addr + input_section->size
+ - relax_info->workaround_size);
+ for (addr = ((start_addr & -pagesize) + pagesize - 4);
+ addr < end_addr;
+ addr += pagesize)
+ {
+ bfd_vma offset = addr - start_addr;
+ Elf_Internal_Rela *lo, *hi;
+ bfd_boolean is_data;
+ bfd_vma patch_off, patch_addr;
+ unsigned int insn;
+
+ /* Do we have a data reloc at this offset? If so, leave
+ the word alone. */
+ is_data = FALSE;
+ lo = relocs;
+ hi = relend;
+ rel = NULL;
+ while (lo < hi)
+ {
+ rel = lo + (hi - lo) / 2;
+ if (rel->r_offset < offset)
+ lo = rel + 1;
+ else if (rel->r_offset > offset + 3)
+ hi = rel;
+ else
+ {
+ switch (ELF32_R_TYPE (rel->r_info))
+ {
+ case R_PPC_ADDR32:
+ case R_PPC_UADDR32:
+ case R_PPC_REL32:
+ case R_PPC_ADDR30:
+ is_data = TRUE;
+ break;
+ default:
+ break;
+ }
+ break;
+ }
+ }
+ if (is_data)
+ continue;
+
+ /* Some instructions can be left alone too. Unconditional
+ branches, except for bcctr with BO=0x14 (bctr, bctrl),
+ avoid the icache failure.
+
+ The problem occurs due to prefetch across a page boundary
+ where stale instructions can be fetched from the next
+ page, and the mechanism for flushing these bad
+ instructions fails under certain circumstances. The
+ unconditional branches:
+ 1) Branch: b, bl, ba, bla,
+ 2) Branch Conditional: bc, bca, bcl, bcla,
+ 3) Branch Conditional to Link Register: bclr, bclrl,
+ where (2) and (3) have BO=0x14 making them unconditional,
+ prevent the bad prefetch because the prefetch itself is
+ affected by these instructions. This happens even if the
+ instruction is not executed.
+
+ A bctr example:
+ .
+ . lis 9,new_page@ha
+ . addi 9,9,new_page@l
+ . mtctr 9
+ . bctr
+ . nop
+ . nop
+ . new_page:
+ .
+ The bctr is not predicted taken due to ctr not being
+ ready, so prefetch continues on past the bctr into the
+ new page which might have stale instructions. If they
+ fail to be flushed, then they will be executed after the
+ bctr executes. Either of the following modifications
+ prevent the bad prefetch from happening in the first
+ place:
+ .
+ . lis 9,new_page@ha lis 9,new_page@ha
+ . addi 9,9,new_page@l addi 9,9,new_page@l
+ . mtctr 9 mtctr 9
+ . bctr bctr
+ . nop b somewhere_else
+ . b somewhere_else nop
+ . new_page: new_page:
+ . */
+ insn = bfd_get_32 (input_bfd, contents + offset);
+ if ((insn & (0x3f << 26)) == (18u << 26) /* b,bl,ba,bla */
+ || ((insn & (0x3f << 26)) == (16u << 26) /* bc,bcl,bca,bcla*/
+ && (insn & (0x14 << 21)) == (0x14 << 21)) /* with BO=0x14 */
+ || ((insn & (0x3f << 26)) == (19u << 26)
+ && (insn & (0x3ff << 1)) == (16u << 1) /* bclr,bclrl */
+ && (insn & (0x14 << 21)) == (0x14 << 21)))/* with BO=0x14 */
+ continue;
+
+ patch_addr = (start_addr + input_section->size
+ - relax_info->workaround_size);
+ patch_addr = (patch_addr + 15) & -16;
+ patch_off = patch_addr - start_addr;
+ bfd_put_32 (input_bfd, B + patch_off - offset, contents + offset);
+
+ if (rel != NULL
+ && rel->r_offset >= offset
+ && rel->r_offset < offset + 4)
+ {
+ asection *sreloc;
+
+ /* If the insn we are patching had a reloc, adjust the
+ reloc r_offset so that the reloc applies to the moved
+ location. This matters for -r and --emit-relocs. */
+ if (rel + 1 != relend)
+ {
+ Elf_Internal_Rela tmp = *rel;
+
+ /* Keep the relocs sorted by r_offset. */
+ memmove (rel, rel + 1, (relend - (rel + 1)) * sizeof (*rel));
+ relend[-1] = tmp;
+ }
+ relend[-1].r_offset += patch_off - offset;
+
+ /* Adjust REL16 addends too. */
+ switch (ELF32_R_TYPE (relend[-1].r_info))
+ {
+ case R_PPC_REL16:
+ case R_PPC_REL16_LO:
+ case R_PPC_REL16_HI:
+ case R_PPC_REL16_HA:
+ relend[-1].r_addend += patch_off - offset;
+ break;
+ default:
+ break;
+ }
+
+ /* If we are building a PIE or shared library with
+ non-PIC objects, perhaps we had a dynamic reloc too?
+ If so, the dynamic reloc must move with the insn. */
+ sreloc = elf_section_data (input_section)->sreloc;
+ if (sreloc != NULL)
+ {
+ Elf32_External_Rela *slo, *shi, *srelend;
+ bfd_vma soffset;
+
+ slo = (Elf32_External_Rela *) sreloc->contents;
+ shi = srelend = slo + sreloc->reloc_count;
+ soffset = (offset + input_section->output_section->vma
+ + input_section->output_offset);
+ while (slo < shi)
+ {
+ Elf32_External_Rela *srel = slo + (shi - slo) / 2;
+ bfd_elf32_swap_reloca_in (output_bfd, (bfd_byte *) srel,
+ &outrel);
+ if (outrel.r_offset < soffset)
+ slo = srel + 1;
+ else if (outrel.r_offset > soffset + 3)
+ shi = srel;
+ else
+ {
+ if (srel + 1 != srelend)
+ {
+ memmove (srel, srel + 1,
+ (srelend - (srel + 1)) * sizeof (*srel));
+ srel = srelend - 1;
+ }
+ outrel.r_offset += patch_off - offset;
+ bfd_elf32_swap_reloca_out (output_bfd, &outrel,
+ (bfd_byte *) srel);
+ break;
+ }
+ }
+ }
+ }
+ else
+ rel = NULL;
+
+ if ((insn & (0x3f << 26)) == (16u << 26) /* bc */
+ && (insn & 2) == 0 /* relative */)
+ {
+ bfd_vma delta = ((insn & 0xfffc) ^ 0x8000) - 0x8000;
+
+ delta += offset - patch_off;
+ if (bfd_link_relocatable (info) && rel != NULL)
+ delta = 0;
+ if (!bfd_link_relocatable (info) && rel != NULL)
+ {
+ enum elf_ppc_reloc_type r_type;
+
+ r_type = ELF32_R_TYPE (relend[-1].r_info);
+ if (r_type == R_PPC_REL14_BRTAKEN)
+ insn |= BRANCH_PREDICT_BIT;
+ else if (r_type == R_PPC_REL14_BRNTAKEN)
+ insn &= ~BRANCH_PREDICT_BIT;
+ else
+ BFD_ASSERT (r_type == R_PPC_REL14);
+
+ if ((r_type == R_PPC_REL14_BRTAKEN
+ || r_type == R_PPC_REL14_BRNTAKEN)
+ && delta + 0x8000 < 0x10000
+ && (bfd_signed_vma) delta < 0)
+ insn ^= BRANCH_PREDICT_BIT;
+ }
+ if (delta + 0x8000 < 0x10000)
+ {
+ bfd_put_32 (input_bfd,
+ (insn & ~0xfffc) | (delta & 0xfffc),
+ contents + patch_off);
+ patch_off += 4;
+ bfd_put_32 (input_bfd,
+ B | ((offset + 4 - patch_off) & 0x3fffffc),
+ contents + patch_off);
+ patch_off += 4;
+ }
+ else
+ {
+ if (rel != NULL)
+ {
+ unsigned int r_sym = ELF32_R_SYM (relend[-1].r_info);
+
+ relend[-1].r_offset += 8;
+ relend[-1].r_info = ELF32_R_INFO (r_sym, R_PPC_REL24);
+ }
+ bfd_put_32 (input_bfd,
+ (insn & ~0xfffc) | 8,
+ contents + patch_off);
+ patch_off += 4;
+ bfd_put_32 (input_bfd,
+ B | ((offset + 4 - patch_off) & 0x3fffffc),
+ contents + patch_off);
+ patch_off += 4;
+ bfd_put_32 (input_bfd,
+ B | ((delta - 8) & 0x3fffffc),
+ contents + patch_off);
+ patch_off += 4;
+ }
+ }
+ else
+ {
+ bfd_put_32 (input_bfd, insn, contents + patch_off);
+ patch_off += 4;
+ bfd_put_32 (input_bfd,
+ B | ((offset + 4 - patch_off) & 0x3fffffc),
+ contents + patch_off);
+ patch_off += 4;
+ }
+ BFD_ASSERT (patch_off <= input_section->size);
+ relax_info->workaround_size = input_section->size - patch_off;
+ }
+ }
+