OpenASIP 2.2
Loading...
Searching...
No Matches
LoopPrologAndEpilogBuilder.cc
Go to the documentation of this file.
1/*
2 Copyright (c) 2002-2020 Tampere University.
3
4 This file is part of TTA-Based Codesign Environment (TCE).
5
6 Permission is hereby granted, free of charge, to any person obtaining a
7 copy of this software and associated documentation files (the "Software"),
8 to deal in the Software without restriction, including without limitation
9 the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 and/or sell copies of the Software, and to permit persons to whom the
11 Software is furnished to do so, subject to the following conditions:
12
13 The above copyright notice and this permission notice shall be included in
14 all copies or substantial portions of the Software.
15
16 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 DEALINGS IN THE SOFTWARE.
23 */
24/**
25 * @file LoopPrologAndEpilogBuilder.cc
26 *
27 * Definition of LoopPrologAndEpilogBuilder interface.
28 *
29 * @author Mikael Lepist� 2008 (mikael.lepisto-no.spam-tut.fi)
30 * @author HeikkI Kultala 2015-2020 (heikki.kultala-no.spam-tuni.fi)
31 * @note rating: red
32 */
33
35
38#include "BasicBlock.hh"
39#include "Instruction.hh"
40#include "ControlFlowGraph.hh"
44#include "Immediate.hh"
45#include "Move.hh"
47#include "CodeGenerator.hh"
49#include "BasicBlockPass.hh"
50#include "ImmediateUnit.hh"
51#include "BF2Scheduler.hh"
52#include "ControlFlowEdge.hh"
53
54#include <set>
55
58
61
62void
66 ControlFlowEdge& jumpEdge) {
67 TTAProgram::BasicBlock& tailBB = tail.basicBlock();
68
69 auto predicate = jumpEdge.edgePredicate();
70 std::pair<int, TTAProgram::Move*> jumpData =
71 CopyingDelaySlotFiller::findJump(tailBB, &predicate);
72
73 std::pair<TTAProgram::Move*, std::shared_ptr<TTAProgram::Immediate> >
74 jumpAddressData;
75
76 // should be the same
77 int jumpIndex = jumpData.first;
78 TTAProgram::Move* jumpMove = jumpData.second;
79 if (jumpMove == NULL) {
80 abortWithError("incoming jump not found");
81 }
82
86
87 if (!jumpMove->source().isInstructionAddress()) {
88 // address comes from LIMM, search the write to limm reg.
89 jumpAddressData =
91 jumpIndex, *jumpMove, irm);
92 if (jumpAddressData.first == NULL &&
93 jumpAddressData.second == NULL) {
94 //Imm source not found, aborting
95 abortWithError("Unknown immediate contains jump address");
96 return;
97 }
98 if (jumpAddressData.first != NULL) {
99 jumpAddressData.first->setSource(
101 } else {
102 jumpAddressData.second->setValue(
104 }
105 } else {
106 jumpMove->source().setInstructionReference(ir);
107 }
108}
109
110/**
111 * Adds a prolog into control flow graph.
112 *
113 * Creates the node and fixed edges and instruction references.
114 *
115 * @param cfg cfg where to add the prolog
116 * @param prologBB bb which contains the prolog
117 * @param loopBBN node of cfg which contains the loop
118 */
119void
121 ControlFlowGraph& cfg, BasicBlockNode& prologBBN,
122 BasicBlockNode& loopBBN) {
123
124 if (Application::verboseLevel() > 1) {
125 std::cerr << "adding prolog into cfg" << std::endl;
126 }
127 cfg.addNode(prologBBN);
128
129 // update incoming cfg edges to point into prolog
130 for (int i = 0; i < cfg.inDegree(loopBBN); i++) {
131 ControlFlowEdge&e = cfg.inEdge(loopBBN,i);
132 BasicBlockNode& tail = cfg.tailNode(e);
133 if (&tail != &loopBBN) {
134 cfg.moveInEdge(loopBBN, prologBBN,e);
135 i--;
136 if (e.isJumpEdge()) {
137 if (Application::verboseLevel() > 1) {
138 std::cerr << "\tmoving jump into loop into prolog"
139 << std::endl;
140 }
142 tail, prologBBN, e);
143 }
144 }
145 }
146
147 // connect prolog and loop
148 cfg.connectNodes(
149 prologBBN, loopBBN, *(new ControlFlowEdge(
152
153}
154
155/**
156 * Adds an epilog into control flow graph.
157 *
158 * Creates the node and fixed edges.
159 *
160 * @param cfg cfg where to add the epilog
161 * @param prologBB bb which contains the epilog
162 * @param loopBBN node of cfg which contains the loop
163 */
164
165void
167 ControlFlowGraph& cfg, BasicBlockNode& epilogBBN,
168 BasicBlockNode& loopBBN) {
169
170 cfg.addNode(epilogBBN);
171 assert(cfg.outDegree(loopBBN) == 2);
172
173 for (int i = 0; i < cfg.outDegree(loopBBN); i++) {
174 ControlFlowEdge& loopExitEdge = cfg.outEdge(loopBBN,i);
175 if (!loopExitEdge.isFallThroughEdge()) {
176 continue;
177 }
178 BasicBlockNode& succNode = cfg.headNode(loopExitEdge);
179 cfg.moveInEdge(succNode, epilogBBN, loopExitEdge);
180 cfg.connectNodes(
181 epilogBBN, succNode, *(new ControlFlowEdge(
184 )));
185
186 return;
187 }
188 assert(false&&"Loop exit fall-thru not in ddg!");
189}
190
191int
194 ControlFlowGraph& cfg, BasicBlockNode& loopBBN, int endCycle,
195 bool createEpilog) {
196
197 if (Application::verboseLevel() > 1) {
198 Application::logStream() << "Building epilog and prolog for the loop."
199 << std::endl;
200 std::cerr << "rm.smallestCycle: " << rm.smallestCycle() << std::endl;
201 std::cerr << "end cycle: " << endCycle << std::endl;
202 }
203
204 int ii = rm.initiationInterval();
205 if (Application::verboseLevel() > 1) {
206 Application::logStream() << "Investigate nodes. ii: " << ii
207 << std::endl;
208 }
209
210 // resolve how many times loop overlaps
211 int overlap_count;
212 if (endCycle != -1) {
213 overlap_count = ((endCycle - rm.smallestCycle()) / ii);
214 } else {
215 overlap_count = ddg.largestCycle() / ii;
216 }
217
218 TTAProgram::Move* jumpMove = NULL;
219 if (overlap_count > 0) {
220 if (Application::verboseLevel() > 1) {
222 << "Building prolog and epilog..... "
223 << " Overlapcount: " << overlap_count << std::endl;
224 }
225
226 DataDependenceGraph* rootDDG = static_cast<DataDependenceGraph*>(
227 ddg.rootGraph());
228
229 // make room for moves of prolog and epilog
230 TTAProgram::BasicBlock *prolog =
232 BasicBlockNode *prologNode = new BasicBlockNode(*prolog, true, true,
233 loopBBN.originalStartAddress(), false);
234
235 TTAProgram::BasicBlock *epilog = NULL;
236 BasicBlockNode* epilogNode = NULL;
237 if (createEpilog) {
238 epilog =
240 epilogNode = new BasicBlockNode(
241 *epilog, true, true, loopBBN.originalStartAddress(), false);
242 }
243 for (int i = 0; i < overlap_count * ii; i++) {
244 prolog->add(new TTAProgram::Instruction());
245 if (createEpilog) {
246 epilog->add(new TTAProgram::Instruction());
247 }
248 }
249
250 // scan for long immediates. these can be copied to both
251 // prolog and epilog.
252 for (int j = 0; j < overlap_count; j++) {
253 for (int i = 0; i < ii; i++) {
255 for (int k = 0; k < ins->immediateCount(); k++) {
256 auto newImm = ins->immediate(k).copy();
257 auto newImm2 = ins->immediate(k).copy();
258 prolog->instructionAtIndex(i+j*ii).addImmediate(newImm);
259 if (createEpilog) {
260 epilog->instructionAtIndex(i+j*ii).addImmediate(newImm2);
261 }
262 }
263 }
264 }
265 int newZero = endCycle + 1 - (ii * (1 + overlap_count));
266 for (int i = 0 ; i < overlap_count; i++) {
267 for (int j = 0; j < ddg.nodeCount(); j++) {
268 MoveNode& n = ddg.node(j);
269
270 // ignore jump
271 if (n.move().isControlFlowMove()) {
272 jumpMove = &n.move();
273 continue;
274 }
275
276 int cycle;
277 if (endCycle != -1) {
278 cycle = n.cycle() - newZero;
279 } else {
280 cycle = n.cycle();
281 }
282 int round = cycle / ii;
283 int place = cycle % ii;
284
285 MoveNode* nodeCopy;
286 if (createEpilog || round == 0) {
287 nodeCopy = n.copy();
288 nodeCopy->move().setGuard(NULL);
289
290 if (endCycle != -1) {
291 nodeCopy->setCycle(place + newZero);
292 } else {
293 nodeCopy->setCycle(place);
294 }
295
296 if (Application::verboseLevel() > 1) {
298 << "Node cycle: " << cycle
299 << " round: " << round
300 << " place: " << place << " "
301 << n.toString()<< "\t" ;
302 }
303 } else {
304 continue;
305 }
306/*
307 if (createEpilog) {
308 if (n.move().isControlFlowMove()) {
309 assert(!n.move().isUnconditional());
310 if (Application::verboseLevel() > 1) {
311 std::cerr << "jumpmove reversing guard at address: "
312 << &nodeCopy->move()<< std::endl;
313 }
314 nodeCopy->move().setGuard(
315 TTAProgram::CodeGenerator::createInverseGuard(
316 n.move().guard(), &n.move().bus()));
317 // TODO: set jump target to first of epilog
318 nodeCopy->move().setSource(
319 new TTAProgram::TerminalInstructionReference(
320 cfg.instructionReferenceManager().createReference(
321 epilog->instructionAtIndex(0))));
322 }
323 }
324*/
325 // select if node should go to prolog or epilog
326 if (round <= i) {
327 prolog->instructionAtIndex(
328 (i*ii)+place).addMove(nodeCopy->movePtr());
329 if (Application::verboseLevel() > 1) {
330 Application::logStream() << " to prolog" << std::endl;
331 }
332 rootDDG->addNode(*nodeCopy, *prologNode);
333 rootDDG->copyExternalInEdges(*nodeCopy, n);
334 rootDDG->copyExternalOutEdges(*nodeCopy, n);
335
336 } else {
337 if (createEpilog) {
338 epilog->instructionAtIndex(
339 (i*ii)+place).addMove(nodeCopy->movePtr());
340 if (Application::verboseLevel() > 1) {
341 Application::logStream() << " to epilog" << std::endl;
342 }
343 rootDDG->addNode(*nodeCopy, *epilogNode);
344 rootDDG->copyExternalInEdges(*nodeCopy, n);
345 rootDDG->copyExternalOutEdges(*nodeCopy, n);
346 }
347 }
348 }
349 }
350 assert(jumpMove != NULL);
351
352 if (Application::verboseLevel() > 1) {
354 << "Disassembly of prolog" << std::endl <<
355 prolog->disassembly() << std::endl;
356 if (createEpilog) {
358 << "Disassembly of epilog" << std::endl <<
359 epilog->disassembly() << std::endl;
360 }
361 }
362
363 if (optimizeProlog(*prolog)) {
364 if (Application::verboseLevel() > 1) {
366 << "Disassembly of optimized prolog" << std::endl <<
367 prolog->disassembly() << std::endl;
368 }
369 addPrologIntoCfg(cfg, *prologNode, loopBBN);
370 } else {
371 delete prologNode;
372 }
373 if (Application::verboseLevel() > 1) {
375 << "Prolog optimized."
376 << std::endl;
377 }
378 if (createEpilog) {
379 if (optimizeEpilog(*epilog)) {
380 if (Application::verboseLevel() > 1) {
382 << "Disassembly of optimized epilog" << std::endl <<
383 epilog->disassembly() << std::endl;
384 }
385 addEpilogIntoCfg(cfg, *epilogNode, loopBBN);
386 } else {
387 delete epilogNode; // TODO: what about the BB?
388 }
389 if (Application::verboseLevel() > 1) {
391 << "Epilog optimized."
392 << std::endl;
393 }
394 }
395 } else {
396 if (Application::verboseLevel() > 1) {
398 << "No overlapping instructions.No need for prolog or epilog."
399 << "Should have used normal scheduler."
400 << std::endl;
401 }
402 }
403
404 return overlap_count;
405}
406
407/** Stupid immediates still have to be copied from the main loop :( */
409 SimpleResourceManager& prologRM,
410 SimpleResourceManager& loopRM,
411 ControlFlowGraph& cfg, BasicBlockNode& loopBBN) {
412
413 int startCycle = prologRM.smallestCycle();
414
416 BasicBlockNode* prologNode = new BasicBlockNode(*prologBB);
417
418 if (Application::verboseLevel() > 1) {
420 << "Copying prolog rm to bb" << prologNode->toString()
421 << " Start cycle: " << startCycle << std::endl;
422 }
423 BasicBlockPass::copyRMToBB(prologRM, *prologBB, prologRM.machine(),
425 loopRM.initiationInterval()-1
427
428 if (optimizeProlog(*prologBB)) {
429 addPrologIntoCfg(cfg, *prologNode, loopBBN);
430 prologNode->setScheduled();
431
432 if (Application::verboseLevel() > 1) {
434 << "Prolog added to cfg" << prologNode->toString() << std::endl
435 << "Disassembly of prolog" << std::endl <<
436 prologBB->disassembly() << std::endl;
437 }
438 } else {
439 if (Application::verboseLevel() > 1) {
441 << "Empty prolog, not added to cfg" << std::endl;
442 }
443 delete prologNode;
444 }
445 return prologNode;
446}
447
449 SimpleResourceManager& prologEpilogRM,
450 int ii, ControlFlowGraph& cfg, BasicBlockNode& loopBBN) {
451
453 BasicBlockNode* epilogNode = new BasicBlockNode(*epilogBB);
454
455 if (Application::verboseLevel() > 1) {
457 << "Copying epilog rm to bb" << epilogNode->toString()
458 << std::endl;
459 }
460
461 for (int i = BF2Scheduler::PROLOG_CYCLE_BIAS + ii;
462 i <= prologEpilogRM.largestCycle(); i++) {
463 TTAProgram::Instruction* newInstruction =
464 prologEpilogRM.instruction(i);
465 prologEpilogRM.loseInstructionOwnership(i);
466 epilogBB->add(newInstruction);
467 }
468 if (optimizeEpilog(*epilogBB)) {
469 addEpilogIntoCfg(cfg, *epilogNode, loopBBN);
470 epilogNode->setScheduled();
471 if (Application::verboseLevel() > 1) {
473 << "epilog added to cfg" << epilogNode->toString() << std::endl;
474
476 << "Disassembly of epilog" << std::endl <<
477 epilogBB->disassembly() << std::endl;
478 }
479 } else {
480 if (Application::verboseLevel() > 1) {
482 << "epilog empty, not added to cfg." << std::endl;
483 }
484 delete epilogNode;
485 }
486
487 return epilogNode;
488
489}
490
491bool
493
494 if (prolog.instructionCount() == 0) {
495 return false;
496 }
497 // optimize away immediate values not used
498 for (int i = 0; i < prolog.instructionCount() ;i++) {
500 for (int j = 0; j < ins.immediateCount(); j++) {
501 bool used = false;
502 bool overwritten = false;
503 TTAProgram::Immediate& imm = ins.immediate(j);
504 const TTAProgram::Terminal& t = imm.destination();
505 const TTAProgram::Terminal& dst = imm.destination();
506 const TTAMachine::ImmediateUnit& immu = dst.immediateUnit();
507 for (int k = i + immu.latency() ;
508 k < prolog.instructionCount() &&
509 !used && !overwritten; k++) {
510
512 // if immu has latency 0, check immediate before moves.
513 if (immu.latency() == 0 && k > i) {
514 for (int l = 0; l < ins2.immediateCount(); l++) {
515 const TTAProgram::Immediate& imm2 = ins2.immediate(l);
516 const TTAProgram::Terminal& t2 = imm2.destination();
517 if (&t2.immediateUnit() == & t.immediateUnit() &&
518 t2.index() == t.index()) {
519 overwritten = true;
520 break;
521 }
522 }
523 }
524
525 for (int l = 0; l < ins2.moveCount(); l++) {
526 const TTAProgram::Move& m = ins2.move(l);
527 const TTAProgram::Terminal& t2 = m.source();
528 if (t2.isImmediateRegister() &&
529 &t2.immediateUnit() == &dst.immediateUnit() &&
530 t2.index() == dst.index()) {
531 used = true;
532 break;
533 }
534 }
535 // if immu has latency 1, check moves before imms
536 if (immu.latency() == 1) {
537 for (int l = 0; l < ins2.immediateCount(); l++) {
538 const TTAProgram::Immediate& imm2 = ins2.immediate(l);
539 const TTAProgram::Terminal& t2 = imm2.destination();
540 if (&t2.immediateUnit() == & t.immediateUnit() &&
541 t2.index() == t.index()) {
542 overwritten = true;
543 break;
544 }
545 }
546 }
547 }
548 if (!used && overwritten) {
549 ins.removeImmediate(imm);
550 }
551 }
552 }
553 for (int i = 0; i <= prolog.instructionCount() -1 ;i++) {
555 // check for moves is enough. no imms after moves.
556 if (ins.moveCount() > 0 || ins.immediateCount() > 0) {
557 break;
558 } else {
559 // empty? leave one ins behind just for
560 if (i == prolog.instructionCount() - 1) {
561 return false;
562 }
563 prolog.remove(ins);
564 delete &ins;
565 // We removed current instruction, there shell be new current
566 // instruction once we increse i at beginning of the loop.
567 i--;
568 }
569 }
570 return true;
571}
572
573
574bool
576
577 if (epilog.instructionCount() == 0) {
578 return false;
579 }
580
581 for (int i = epilog.instructionCount() -1 ; i >= 0 ; i--) {
583 // check for moves is enough. no imms after moves.
584 if (ins.moveCount() > 0) {
585 break;
586 } else {
587 // empty? leave one ins behind just for
588 if (i == 0) {
589 return false;
590 }
591 assert(i!= 0);
592 epilog.remove(ins);
593 delete &ins;
594 }
595 }
596 return true;
597}
#define abortWithError(message)
#define assert(condition)
static int verboseLevel()
static std::ostream & logStream()
static const int PROLOG_CYCLE_BIAS
void setScheduled(bool state=true)
TTAProgram::BasicBlock & basicBlock()
InstructionAddress originalStartAddress() const
std::string toString() const
static void copyRMToBB(SimpleResourceManager &rm, TTAProgram::BasicBlock &bb, const TTAMachine::Machine &targetMachine, TTAProgram::InstructionReferenceManager &irm, int lastCycle=-1)
BoostGraph * rootGraph()
virtual Node & headNode(const Edge &edge) const
int nodeCount() const
virtual Edge & outEdge(const Node &node, const int index) const
virtual Edge & inEdge(const Node &node, const int index) const
virtual void addNode(Node &node)
Node & node(const int index) const
virtual int inDegree(const Node &node) const
virtual void moveInEdge(const Node &source, const Node &destination, Edge &edge, const Node *tail=NULL, bool childs=false)
virtual int outDegree(const Node &node) const
virtual Node & tailNode(const Edge &edge) const
virtual void connectNodes(const Node &nTail, const Node &nHead, Edge &e)
bool isFallThroughEdge() const
bool isJumpEdge() const
CFGEdgePredicate edgePredicate() const
TTAProgram::InstructionReferenceManager & instructionReferenceManager()
static std::pair< TTAProgram::Move *, std::shared_ptr< TTAProgram::Immediate > > findJumpImmediate(int jumpIndex, TTAProgram::Move &jumpMove, TTAProgram::InstructionReferenceManager &irm)
static std::pair< int, TTAProgram::Move * > findJump(TTAProgram::BasicBlock &bb, ControlFlowEdge::CFGEdgePredicate *pred=nullptr)
void copyExternalInEdges(MoveNode &nodeCopy, const MoveNode &source)
void copyExternalOutEdges(MoveNode &nodeCopy, const MoveNode &source)
void addNode(MoveNode &moveNode)
bool optimizeEpilog(TTAProgram::BasicBlock &epilog)
void addEpilogIntoCfg(ControlFlowGraph &cfg, BasicBlockNode &epilogBBN, BasicBlockNode &loopBBN)
virtual int build(DataDependenceGraph &ddg, SimpleResourceManager &rm, ControlFlowGraph &cfg, BasicBlockNode &loopBBN, int endCycle=-1, bool createEpilog=true)
void addPrologIntoCfg(ControlFlowGraph &cfg, BasicBlockNode &prologBBN, BasicBlockNode &loopBBN)
BasicBlockNode * addEpilogFromRM(SimpleResourceManager &prologEpilogRM, int ii, ControlFlowGraph &cfg, BasicBlockNode &loopBBN)
void moveJumpDestination(TTAProgram::InstructionReferenceManager &irm, BasicBlockNode &tail, BasicBlockNode &dst, ControlFlowEdge &jumpEdge)
bool optimizeProlog(TTAProgram::BasicBlock &prolog)
BasicBlockNode * addPrologFromRM(SimpleResourceManager &prologRM, SimpleResourceManager &loopRM, ControlFlowGraph &cfg, BasicBlockNode &loopBBN)
int cycle() const
Definition MoveNode.cc:421
std::shared_ptr< TTAProgram::Move > movePtr()
std::string toString() const
Definition MoveNode.cc:576
TTAProgram::Move & move()
void setCycle(const int newcycle)
Definition MoveNode.cc:503
MoveNode * copy()
Definition MoveNode.cc:135
const TTAMachine::Machine & machine() const
virtual int smallestCycle() const override
virtual unsigned initiationInterval() const
virtual TTAProgram::Instruction * instruction(int cycle) const override
virtual void loseInstructionOwnership(int cycle)
virtual int largestCycle() const override
virtual int latency() const
int skippedFirstInstructions() const
Definition BasicBlock.cc:88
virtual void add(Instruction *ins)
virtual int instructionCount() const
virtual std::string disassembly() const
virtual void remove(Instruction &ins)
virtual Instruction & instructionAtIndex(int index) const
const Terminal & destination() const
Definition Immediate.cc:92
std::shared_ptr< Immediate > copy() const
Definition Immediate.cc:131
InstructionReference createReference(Instruction &ins)
void removeImmediate(Immediate &imm)
void addImmediate(std::shared_ptr< Immediate > imm)
Move & move(int i) const
Immediate & immediate(int i) const
void addMove(std::shared_ptr< Move > move)
bool isControlFlowMove() const
Definition Move.cc:233
Terminal & source() const
Definition Move.cc:302
void setGuard(MoveGuard *guard)
Definition Move.cc:360
virtual int index() const
Definition Terminal.cc:274
virtual void setInstructionReference(InstructionReference ref)
Definition Terminal.cc:404
virtual bool isInstructionAddress() const
Definition Terminal.cc:87
virtual bool isImmediateRegister() const
Definition Terminal.cc:97
virtual const TTAMachine::ImmediateUnit & immediateUnit() const
Definition Terminal.cc:240