• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

tueda / form / 15241916852

25 May 2025 08:59PM UTC coverage: 47.908% (-2.8%) from 50.743%
15241916852

push

github

tueda
ci: build arm64-windows binaries

39009 of 81425 relevant lines covered (47.91%)

1079780.1 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

66.28
/sources/optimize.cc
1
/** @file optimize.cc
2
 *
3
 *        experimental routines for the optimization of FORTRAN or C output.
4
 */
5

6
/* #[ License : */
7
/*
8
 *   Copyright (C) 1984-2023 J.A.M. Vermaseren
9
 *   When using this file you are requested to refer to the publication
10
 *   J.A.M.Vermaseren "New features of FORM" math-ph/0010025
11
 *   This is considered a matter of courtesy as the development was paid
12
 *   for by FOM the Dutch physics granting agency and we would like to
13
 *   be able to track its scientific use to convince FOM of its value
14
 *   for the community.
15
 *
16
 *   This file is part of FORM.
17
 *
18
 *   FORM is free software: you can redistribute it and/or modify it under the
19
 *   terms of the GNU General Public License as published by the Free Software
20
 *   Foundation, either version 3 of the License, or (at your option) any later
21
 *   version.
22
 *
23
 *   FORM is distributed in the hope that it will be useful, but WITHOUT ANY
24
 *   WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
25
 *   FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
26
 *   details.
27
 *
28
 *   You should have received a copy of the GNU General Public License along
29
 *   with FORM.  If not, see <http://www.gnu.org/licenses/>.
30
 */
31
/*
32
          #] License : 
33
          #[ includes :
34
*/
35

36
//#define DEBUG
37
//#define DEBUG_MORE
38
//#define DEBUG_MCTS
39
//#define DEBUG_GREEDY
40

41
#ifdef HAVE_CONFIG_H
42
#ifndef CONFIG_H_INCLUDED
43
#define CONFIG_H_INCLUDED
44
#include <config.h>
45
#endif
46
#endif
47

48
#include <vector>
49
#include <stack>
50
#include <algorithm>
51
#include <set>
52
#include <map>
53
#include <climits>
54
#include <cmath>
55
#include <string>
56
#include <algorithm>
57
#include <iostream>
58

59
#ifdef APPLE64
60
#ifndef HAVE_UNORDERED_MAP
61
#define HAVE_UNORDERED_MAP
62
#endif
63
#ifndef HAVE_UNORDERED_SET
64
#define HAVE_UNORDERED_SET
65
#endif
66
#endif
67

68
#ifdef HAVE_UNORDERED_MAP
69
        #include <unordered_map>
70
        using std::unordered_map;
71
#elif !defined(HAVE_TR1_UNORDERED_MAP) && defined(HAVE_BOOST_UNORDERED_MAP_HPP)
72
        #include <boost/unordered_map.hpp>
73
        using boost::unordered_map;
74
#else
75
        #include <tr1/unordered_map>
76
        using std::tr1::unordered_map;
77
#endif
78
#ifdef HAVE_UNORDERED_SET
79
        #include <unordered_set>
80
        using std::unordered_set;
81
#elif !defined(HAVE_TR1_UNORDERED_SET) && defined(HAVE_BOOST_UNORDERED_SET_HPP)
82
        #include <boost/unordered_set.hpp>
83
        using boost::unordered_set;
84
#else
85
        #include <tr1/unordered_set>
86
        using std::tr1::unordered_set;
87
#endif
88

89
#if defined(HAVE_BUILTIN_POPCOUNT)
90
        static inline int popcount(unsigned int x) {
42✔
91
                return __builtin_popcount(x);
42✔
92
        }
93
#elif defined(HAVE_POPCNT)
94
        #include <intrin.h>
95
        static inline int popcount(unsigned int x) {
96
                return __popcnt(x);
97
        }
98
#else
99
        static inline int popcount(unsigned int x) {
100
                int count = 0;
101
                while (x > 0) {
102
                        if ((x & 1) == 1) count++;
103
                        x >>= 1;
104
                }
105
                return count;
106
        }
107
#endif
108

109
extern "C" {
110
#include "form3.h"
111
}
112

113
//#ifdef DEBUG
114
#include "mytime.h"
115
//#endif
116

117
using namespace std;
118

119
// operators
120
const WORD OPER_ADD = -1;
121
const WORD OPER_MUL = -2;
122
const WORD OPER_COMMA = -3;
123

124
// class for a node of the MCTS tree
125
class tree_node {
88✔
126
public:
127
        vector<tree_node> childs;
128
        double sum_results;
129
        int num_visits;
130
        WORD var;
131
        bool finished;
132
        PADPOINTER(1,1,1,1);
133

134
        tree_node (int _var=0):
94✔
135
                sum_results(0), num_visits(0), var(_var), finished(false) {}
18✔
136

137
        tree_node(const tree_node& other):
64✔
138
                childs(other.childs),
64✔
139
                sum_results(other.sum_results),
64✔
140
                num_visits(other.num_visits),
64✔
141
                var(other.var),
64✔
142
                finished(other.finished) {}
64✔
143

144
        tree_node& operator=(const tree_node& other) {
88✔
145
                if (this != &other) {
88✔
146
                        childs = other.childs;
85✔
147
                        sum_results = other.sum_results;
85✔
148
                        num_visits = other.num_visits;
85✔
149
                        var = other.var;
85✔
150
                        finished = other.finished;
85✔
151
                }
152
                return *this;
88✔
153
        }
154
};
155

156
// global variables for multithreading
157
WORD *optimize_expr;
158
vector<vector<WORD> > optimize_best_Horner_schemes;
159
int optimize_num_vars;
160
int optimize_best_num_oper;
161
vector<WORD> optimize_best_instr;
162
vector<WORD> optimize_best_vars;
163

164
// global variables for MCTS
165
bool mcts_factorized, mcts_separated;
166
vector<WORD> mcts_vars;
167
tree_node mcts_root;
168
int mcts_expr_score;
169
set<pair<int,vector<WORD> > > mcts_best_schemes;
170

171
#ifdef WITHPTHREADS
172
pthread_mutex_t optimize_lock;
173
#endif
174

175
/*
176
          #] includes : 
177
          #[ print_instr :
178
*/
179

180
void print_instr (const vector<WORD> &instr, WORD num)
×
181
{
182
        const WORD *tbegin = &*instr.begin();
×
183
        const WORD *tend = tbegin+instr.size();
×
184
        for (const WORD *t=tbegin; t!=tend; t+=*(t+2)) {
×
185
                MesPrint("<%d> %a",num,t[2],t);
×
186
        }
187
}
×
188

189
/*
190
          #] print_instr : 
191
          #[ my_random_shuffle :
192
*/
193

194
/**  Random shuffle
195
 *
196
 *         Description
197
 *         ===========
198
 *         Randomly permutes elements in the range [fr,to). Functionality is
199
 *         the same as C++'s "random_shuffle", but it uses Form's "wranf".
200
 */
201
template <class RandomAccessIterator>
202
void my_random_shuffle (PHEAD RandomAccessIterator fr, RandomAccessIterator to) {
130✔
203
  for (int i=to-fr-1; i>0; --i)
149✔
204
                swap (fr[i],fr[wranf(BHEAD0) % (i+1)]);
19✔
205
}
130✔
206

207
/*
208
          #] my_random_shuffle : 
209
          #[ get_expression :
210
*/
211

212
static WORD comlist[3] = {TYPETOPOLYNOMIAL,3,DOALL};
213

214
/**  Get expression
215
 *
216
 *         Description
217
 *         ===========
218
 *         Reads an expression from the input file into a buffer (called
219
 *         optimize_expr). This buffer is used during the optimization
220
 *         process. Non-symbols are removed by ConvertToPoly and are put in
221
 *         temporary symbols.
222
 *
223
 *         The return value is the length of the expression in WORDs, or a
224
 *         negative number if it failed.
225
 */
226
LONG get_expression (int exprnr) {
36✔
227

228
        GETIDENTITY;
36✔
229

230
  AR.NoCompress = 1;
36✔
231

232
  NewSort(BHEAD0);
36✔
233
  EXPRESSIONS e = Expressions+exprnr;
36✔
234
  SetScratch(AR.infile,&(e->onfile));
36✔
235

236
  // get header term
237
  WORD *term = AT.WorkPointer;
36✔
238
        GetTerm(BHEAD term);
36✔
239

240
  NewSort(BHEAD0);
36✔
241

242
  // get terms
243
  while (GetTerm(BHEAD term) > 0) {
22,155✔
244
        AT.WorkPointer = term + *term;
22,083✔
245
        WORD *t1 = term;
22,083✔
246
        WORD *t2 = term + *term;
22,083✔
247
        if (ConvertToPoly(BHEAD t1,t2,comlist,1) < 0) return -1;
22,083✔
248
        int n = *t2;
22,083✔
249
        NCOPY(t1,t2,n);
306,627✔
250
        AT.WorkPointer = term + *term;
22,083✔
251
        if (StoreTerm(BHEAD term)) return -1;
22,083✔
252
  }
253

254
  // sort and store in buffer
255
  LONG len = EndSort(BHEAD (WORD *)((VOID *)(&optimize_expr)),2);
36✔
256
  LowerSortLevel();
36✔
257
  AT.WorkPointer = term;
36✔
258

259
        return len;
36✔
260
}
261

262
/*
263
          #] get_expression : 
264
          #[ PF_get_expression :
265
*/
266
#ifdef WITHMPI
267

268
// get_expression for ParFORM
269
LONG PF_get_expression (int exprnr) {
270
        LONG len;
271
        if (PF.me == MASTER) {
272
                len = get_expression(exprnr);
273
        }
274
        if (PF.numtasks > 1) {
275
                PF_BroadcastBuffer(&optimize_expr, &len);
276
        }
277
        return len;
278
}
279

280
// replace get_expression called in Optimize
281
#define get_expression PF_get_expression
282

283
#endif
284
/*
285
          #] PF_get_expression : 
286
          #[ get_brackets :
287
*/
288

289
/**  Get brackets
290
 *
291
 *         Description
292
 *         ===========
293
 *         Checks whether the input expression (stored in optimize_expr)
294
 *         contains brackets. If so, this method replaces terms outside
295
 *         brackets by powers of SEPERATESYMBOL (equal brackets have equal
296
 *         powers) and the brackets are returned. If not, the result is
297
 *         empty.
298
 *
299
 *         Brackets are used for simultaneous optimization. The symbol
300
 *         SEPARATESYMBOL is always the first one used in a Horner scheme.
301
 */
302

303
vector<vector<WORD> > get_brackets () {
36✔
304

305
  // check for brackets in expression
306
  bool has_brackets = false;
36✔
307
  for (WORD *t=optimize_expr; *t!=0; t+=*t) {
22,119✔
308
        WORD *tend=t+*t; tend-=ABS(*(tend-1));
22,083✔
309
        for (WORD *t1=t+1; t1<tend; t1+=*(t1+1))
44,172✔
310
          if (*t1 == HAAKJE)
22,089✔
311
                has_brackets=true;
12✔
312
  }
313

314
  // replace brackets by SEPARATESYMBOL
315
  vector<vector<WORD> > brackets;
36✔
316

317
  if (has_brackets) {
36✔
318
                int exprlen=10;        // we need potential space for an empty bracket
319
                for (WORD *t=optimize_expr; *t!=0; t+=*t)
21✔
320
                        exprlen += *t;
12✔
321
        WORD *newexpr = (WORD *)Malloc1(exprlen*sizeof(WORD), "optimize newexpr");
9✔
322

323
        int i=0;
9✔
324
        int sep_power = 0;
9✔
325

326
        for (WORD *t=optimize_expr; *t!=0; t+=*t) {
21✔
327
          WORD *t1 = t+1;
12✔
328

329
                        // scan for bracket
330
          vector<WORD> bracket;
12✔
331
          for (; *t1!=HAAKJE; t1+=*(t1+1))
21✔
332
                bracket.insert(bracket.end(), t1, t1+*(t1+1));
9✔
333

334
          if (brackets.size()==0 || bracket!=brackets.back()) {
12✔
335
                sep_power++;
9✔
336
                brackets.push_back(bracket);
9✔
337
          }
338
          t1+=*(t1+1);
12✔
339

340
          WORD left = t + *t - t1;
12✔
341
          bool more_symbols = (left != ABS(*(t+*t-1)));
12✔
342

343
                        // add power of SEPARATESYMBOL
344
          newexpr[i++] = 1 + left + (more_symbols ? 2 : 4);
12✔
345
          newexpr[i++] = SYMBOL;
12✔
346
          newexpr[i++] = (more_symbols ? *(t1+1) + 2 : 4);
12✔
347
          newexpr[i++] = SEPARATESYMBOL;
12✔
348
          newexpr[i++] = sep_power;
12✔
349

350
                        // add remaining terms
351
          if (more_symbols) {
12✔
352
                t1+=2;
3✔
353
                left-=2;
3✔
354
          }
355
          while (left-->0)
54✔
356
                newexpr[i++] = *(t1++);
42✔
357
        }
12✔
358
/*
359
        We insert here an empty bracket that is zero.
360
        It is used for the case that there is only a single bracket which is
361
        outside the notation for trees at a later stage.
362

363
        There may be a problem now in that in the case of sep_power==1
364
        newexpr is bigger than optimize_expr. We have to check that.
365
*/
366
        if ( sep_power == 1 )
9✔
367
        {
368
          WORD *t;
9✔
369
          vector<WORD> bracket;
9✔
370
          bracket.push_back(0);
9✔
371
          bracket.push_back(0);
9✔
372
          bracket.push_back(0);
9✔
373
          bracket.push_back(0);
9✔
374
          sep_power++;
9✔
375
          brackets.push_back(bracket);
9✔
376
          newexpr[i++] = 8;
9✔
377
          newexpr[i++] = SYMBOL;
9✔
378
          newexpr[i++] = 4;
9✔
379
          newexpr[i++] = SEPARATESYMBOL;
9✔
380
          newexpr[i++] = sep_power;
9✔
381
          newexpr[i++] = 1;
9✔
382
          newexpr[i++] = 1;
9✔
383
          newexpr[i++] = 3;
9✔
384
          newexpr[i++] = 0;
9✔
385
          for (t=optimize_expr; *t!=0; t+=*t) {}
21✔
386
          if ( t-optimize_expr+1 < i ) {  // We have to redo this
9✔
387
                M_free(optimize_expr,"$-sort space");
6✔
388
                optimize_expr = (WORD *)Malloc1(i*sizeof(WORD),"$-sort space");
6✔
389
          }
390
        }
9✔
391
        else {
392
                newexpr[i++] = 0;
×
393
        }
394
        memcpy(optimize_expr, newexpr, i*sizeof(WORD));
9✔
395
        M_free(newexpr,"optimize newexpr");
9✔
396

397
        // if factorized, replace SEP by FAC and remove brackets
398
        if (brackets[0].size()>0 && brackets[0][2]==FACTORSYMBOL) {
9✔
399
                for (WORD *t=optimize_expr; *t!=0; t+=*t) {
×
400
                        if (*t == ABS(*(t+*t-1))+1) continue;
×
401
                        if (t[1]==SYMBOL)
×
402
                                for (int i=3; i<t[2]; i+=2)
×
403
                                        if (t[i]==SEPARATESYMBOL) t[i]=FACTORSYMBOL;
×
404
                }
405
                return vector<vector<WORD> >();
×
406
        }
407
  }
408

409
  return brackets;
36✔
410
}
36✔
411

412
/*
413
          #] get_brackets : 
414
          #[ count_operators :
415
*/
416

417
/**  Count operators
418
 *
419
 *         Description
420
 *         ===========
421
 *         Counts the number of operators in a Form-style expression.
422
 */
423
int count_operators (const WORD *expr, bool print=false) {
12✔
424

425
        int n=0;
12✔
426
        while (*(expr+n)!=0) n+=*(expr+n);
36✔
427

428
        int cntpow=0, cntmul=0, cntadd=0, sumpow=0;
12✔
429
        WORD maxpowfac=1, maxpowsep=1;
12✔
430

431
        for (const WORD *t=expr; *t!=0; t+=*t) {
36✔
432
                if (t!=expr) cntadd++;                                // new term
24✔
433
                if (*t==ABS(*(t+*t-1))+1) continue; // only coefficient
24✔
434

435
                int cntsym=0;
24✔
436

437
                if (t[1]==SYMBOL)
24✔
438
                        for (int i=3; i<t[2]; i+=2) {
54✔
439
                                if (t[i]==FACTORSYMBOL) {
30✔
440
                                        maxpowfac = max(maxpowfac, t[i+1]);
×
441
                                        continue;
×
442
                                }
443
                                if (t[i]==SEPARATESYMBOL) {
30✔
444
                                        maxpowsep = max(maxpowsep, t[i+1]);
21✔
445
                                        continue;
21✔
446
                                }
447
                                if (t[i+1]>2) {                  // (extra)symbol power>2
9✔
448
                                        cntpow++;
×
449
                                        sumpow += (int)floor(log(t[i+1])/log(2.0)) + popcount(t[i+1]) - 1;
×
450
                                }
451
                                if (t[i+1]==2) cntmul++; // (extra)symbol squared
9✔
452
                                cntsym++;
9✔
453
                        }
454

455
                if (ABS(*(t+*t-1))!=3 || *(t+*t-2)!=1 || *(t+*t-3)!=1) cntsym++; // non +/-1 coefficient
24✔
456

457
                if (cntsym > 0)        cntmul+=cntsym-1;
24✔
458
        }
459

460
        cntadd -= maxpowfac-1;
12✔
461
        cntmul += maxpowfac-1;
12✔
462

463
        cntadd -= maxpowsep-1;
12✔
464

465
        if (print)
12✔
466
                MesPrint ("*** STATS: original        %lP %lM %lA : %l", cntpow,cntmul,cntadd,sumpow+cntmul+cntadd);
×
467

468
        return sumpow+cntmul+cntadd;
12✔
469
}
470

471
/**  Count operators
472
 *
473
 *         Description
474
 *         ===========
475
 *         Counts the number of operators in a vector of instructions
476
 */
477
int count_operators (const vector<WORD> &instr, bool print=false) {
81✔
478

479
        int cntpow=0, cntmul=0, cntadd=0, sumpow=0;
81✔
480

481
        const WORD *ebegin = &*instr.begin();
81✔
482
        const WORD *eend = ebegin+instr.size();
81✔
483

484
        for (const WORD *e=ebegin; e!=eend; e+=*(e+2)) {
3,696✔
485
                for (const WORD *t=e+3; *t!=0; t+=*t) {
11,049✔
486
                        if (t!=e+3) {
7,434✔
487
                                if (*(e+1)==OPER_ADD) cntadd++;         // new term
3,819✔
488
                                if (*(e+1)==OPER_MUL) cntmul++;         // new term
3,819✔
489
                        }
490
                        if (*t == ABS(*(t+*t-1))+1) continue;        // only coefficient
7,434✔
491
                        if (*(t+1)==SYMBOL || *(t+1)==EXTRASYMBOL) {
7,386✔
492
                                if (*(t+4)==2) cntmul++;                        // (extra)symbol squared
7,386✔
493
                                if (*(t+4)>2) {                                         // (extra)symbol power>2
7,386✔
494
                                        cntpow++;
42✔
495
                                        sumpow += (int)floor(log(*(t+4))/log(2.0)) + popcount(*(t+4)) - 1;
42✔
496
                                }
497
                        }
498
                        if (ABS(*(t+*t-1))!=3 || *(t+*t-2)!=1 || *(t+*t-3)!=1) cntmul++; // non +/-1 coefficient
7,386✔
499
                }
500
        }
501

502
        if (print)
81✔
503
                MesPrint ("*** STATS: optimized %lP %lM %lA : %l", cntpow,cntmul,cntadd,sumpow+cntmul+cntadd);
×
504

505
        return sumpow+cntmul+cntadd;
81✔
506
}
507

508
/*
509
          #] count_operators : 
510
          #[ occurrence_order :
511
*/
512

513
/**  Occurrence order
514
 *
515
 *         Description
516
 *         ===========
517
 *         Extracts all variables from an expression and sorts them with
518
 *         most occurring first (or last, with rev=true)
519
 */
520
vector<WORD> occurrence_order (const WORD *expr, bool rev) {
30✔
521

522
        // count the number of occurrences of variables
523
        map<WORD,int> cnt;
30✔
524
        for (const WORD *t=expr; *t!=0; t+=*t) {
44,148✔
525
                if (*t == ABS(*(t+*t-1))+1) continue; // skip constant term
44,118✔
526
                if (t[1] == SYMBOL)
44,118✔
527
                        for (int i=3; i<t[2]; i+=2)
196,098✔
528
                                cnt[t[i]]++;
151,980✔
529
        }
530

531
        bool is_fac=false, is_sep=false;
30✔
532
        if (cnt.count(FACTORSYMBOL)) {
30✔
533
                is_fac=true;
×
534
                cnt.erase(FACTORSYMBOL);
×
535
        }
536
        if (cnt.count(SEPARATESYMBOL)) {
30✔
537
                is_sep=true;
×
538
                cnt.erase(SEPARATESYMBOL);
×
539
        }
540

541
        // determine the order of the variables
542
        vector<pair<int,WORD> > cnt_order;
30✔
543
        for (map<WORD,int>::iterator i=cnt.begin(); i!=cnt.end(); i++)
252✔
544
                cnt_order.push_back(make_pair(i->second, i->first));
222✔
545
        sort(cnt_order.rbegin(), cnt_order.rend());
30✔
546

547
        // create resulting order
548
        vector<WORD> order;
30✔
549
        for (int i=0; i<(int)cnt_order.size(); i++)
252✔
550
                order.push_back(cnt_order[i].second);
222✔
551

552
        if (rev) reverse(order.begin(),order.end());
30✔
553

554
        // add FACTORSYMBOL/SEPARATESYMBOL
555
        if (is_fac) order.insert(order.begin(), FACTORSYMBOL);
30✔
556
        if (is_sep) order.insert(order.begin(), SEPARATESYMBOL);
30✔
557

558
        return order;
60✔
559
}
30✔
560

561
/*
562
          #] occurrence_order : 
563
          #[ Horner_tree :
564
*/
565

566
/**  Horner tree building
567
 *
568
 *         Description
569
 *         ===========
570
 *         Given a Form-style expression (in a buffer in memory), this
571
 *         builds an expression tree. The tree is determined by a
572
 *         multivariate Horner scheme, i.e., something of the form:
573
 *
574
 *                1+y+x*(2+y*(1+y)+x*(3-y*(...)))
575
 *
576
 *         The order of the variables is given to the method "Horner_tree",
577
 *         which renumbers ad reorders the terms of the expression. Next,
578
 *         the recursive method "build_Horner_tree" does the actual tree
579
 *         construction.
580
 *
581
 *         The tree is represented in postfix notation. Tokens are of the
582
 *         following forms:
583
 *
584
 *         - SNUMBER tokenlength num den coefflength
585
 *         - SYMBOL tokenlength variable power
586
 *         - OPER_ADD or OPER_MUL
587
 *
588
 *         Note
589
 *         ====
590
 *         Sets AN.poly_num_vars and allocates AN.poly_vars. The latter
591
 *         should be freed later.
592
 */
593

594
/**  Get power of variable (helper function for build_Horner_tree)
595
 *
596
 *         Description
597
 *         ===========
598
 *         Returns the power of the variable "var", which is at position
599
 *         "pos" in this term, if it is present.
600
 */
601
WORD getpower (const WORD *term, int var, int pos) {
923,439✔
602
        if (*term == ABS(*(term+*term-1))+1) return 0; // constant term
923,439✔
603
        if (2*pos+2 >= term[2]) return 0;                           // too few symbols
923,439✔
604
        if (term[2*pos+3] != var) return 0;                    // incorrect symbol
765,053✔
605
        return term[2*pos+4];                                                   // return power
168,365✔
606
}
607

608
/**  Call GcdLong/DivLong with leading zeroes
609
 *
610
 *         Description
611
 *         ===========
612
 *         These method remove leading zeroes, so that GcdLong and DivLong
613
 *         can safely be called.
614
 */
615
void fixarg (UWORD *t, WORD &n) {
2,149,828✔
616
        int an=ABS(n), sn=SGN(n);
2,149,828✔
617
        while (*(t+an-1)==0) an--;
2,149,828✔
618
        n=an*sn;
2,149,828✔
619
}
2,149,828✔
620

621
void GcdLong_fix_args (PHEAD UWORD *a, WORD na, UWORD *b, WORD nb, UWORD *c, WORD *nc) {
493,350✔
622
        fixarg(a,na);
493,350✔
623
        fixarg(b,nb);
493,350✔
624
        GcdLong(BHEAD a,na,b,nb,c,nc);
493,350✔
625
}
493,350✔
626

627
void DivLong_fix_args(UWORD *a, WORD na, UWORD *b, WORD nb, UWORD *c,        WORD *nc, UWORD *d, WORD *nd) {
581,564✔
628
        fixarg(a,na);
581,564✔
629
        fixarg(b,nb);
581,564✔
630
        DivLong(a,na,b,nb,c,nc,d,nd);
581,564✔
631
}
581,564✔
632

633
/**  Build the Horner tree
634
 *
635
 *         Description
636
 *         ===========
637
 *         Constructs the Horner tree. The method processes one variable and
638
 *         continues recursively until the Horner scheme is completed.
639
 *
640
 *         "terms" is a pointer to the starts of the terms. "numterms" is
641
 *         the number of terms to be processed. "var" is the next variable
642
 *         to be processed (index between 0 and #maxvar) and "maxvar" is the
643
 *         last variable to be processed, so that partial Horner trees can
644
 *         also be constructed. "pos" is the position that the power of
645
 *         "var" should be in (one level further in the recursion, "pos" has
646
 *         increased by 0 or 1 depending on whether the previous power was 0
647
 *         or not). The result is written at the pointer "res".
648
 *
649
 *         This method also factors out gcds of the coefficients. The result
650
 *         should end with "gcd OPER_MUL" at all times, so that one level
651
 *         higher gcds can be extracted again.
652
 */
653
void build_Horner_tree (const WORD **terms, int numterms, int var, int maxvar, int pos, vector<WORD> *res) {
246,885✔
654

655
        GETIDENTITY;
246,885✔
656

657
        if (var == maxvar) {
246,885✔
658
                // Horner tree is finished, so add remaining terms unfactorized
659
                // (note: since only complete Horner schemes seem to be useful, numterms=1 here
660

661
                for (int fr=0; fr<numterms; fr++) {
88,548✔
662

663
                        bool empty = true;
44,274✔
664
                        const WORD *t = terms[fr];
44,274✔
665

666
                        // add symbols
667
                        if (*t != ABS(*(t+*t-1))+1)
44,274✔
668
                                for (int i=3+2*pos; i<t[2]; i+=2) {
44,274✔
669
                                        res->push_back(SYMBOL);
×
670
                                        res->push_back(4);
×
671
                                        res->push_back(t[i]);
×
672
                                        res->push_back(t[i+1]);
×
673
                                        if (!empty) res->push_back(OPER_MUL);
×
674
                                        empty = false;
×
675
                                }
676

677
                        // if empty, add a 1, since the result should look like "... coeff *"
678
                        if (empty) {
44,274✔
679
                                res->push_back(SNUMBER);
44,274✔
680
                                res->push_back(5);
44,274✔
681
                                res->push_back(1);
44,274✔
682
                                res->push_back(1);
44,274✔
683
                                res->push_back(3);
44,274✔
684
                        }
685

686
                        // add coefficient
687
                        res->push_back(SNUMBER);
44,274✔
688
                        WORD n = ABS(*(t+*t-1));
44,274✔
689
                        res->push_back(n+2);
44,274✔
690
                        for (int i=0; i<n; i++)
177,096✔
691
                                res->push_back(*(t+*t-n+i));
132,822✔
692
                        res->push_back(OPER_MUL);
44,274✔
693

694
                        if (fr>0) res->push_back(OPER_ADD);
44,274✔
695
                }
696

697
                // result should end with gcd of the terms; right now this never
698
                // triggers, but if one would allow for incomplete Horner schemes,
699
                // one should extract the gcd here
700
                if (numterms > 1) {
44,274✔
701
                        res->push_back(SNUMBER);
×
702
                        res->push_back(5);
×
703
                        res->push_back(1);
×
704
                        res->push_back(1);
×
705
                        res->push_back(3);
×
706
                        res->push_back(OPER_MUL);
×
707
                }
708
        }
709
        else {
710
                // extract variable "var" and the gcd and proceed recursively
711

712
                WORD nnum = 0, nden = 0, ntmp = 0, ndum = 0;
202,611✔
713
                UWORD *num = NumberMalloc("build_Horner_tree");
202,611✔
714
                UWORD *den = NumberMalloc("build_Horner_tree");
202,611✔
715
                UWORD *tmp = NumberMalloc("build_Horner_tree");
202,611✔
716
                UWORD *dum = NumberMalloc("build_Horner_tree");
202,611✔
717

718
                // previous coefficient for gcd extraction or coefficient multiplication
719
                int prev_coeff_idx = -1;
202,611✔
720

721
                for (int fr=0; fr<numterms;) {
449,372✔
722

723
                        // find power of current term
724
                        WORD pow = getpower(terms[fr], var, pos);
246,761✔
725

726
                        // find all terms with that power
727
                        int to=fr+1;
246,761✔
728
                        while (to<numterms && getpower(terms[to], var, pos) == pow) to++;
835,182✔
729

730
                        // recursively build Horner tree of all terms proportional to var^pow
731
                        build_Horner_tree (terms+fr, to-fr, var+1, maxvar, pow==0?pos:pos+1, res);
246,761✔
732

733
                        if (AN.poly_vars[var] != FACTORSYMBOL && AN.poly_vars[var] != SEPARATESYMBOL) {
246,761✔
734
                                // if normal symbol, find gcd(numerators) and gcd(denominators)
735
                                WORD  n1 = res->at(res->size()-2) / 2;
246,675✔
736
                                WORD *t1 = &res->at(res->size()-2-2*ABS(n1));
246,675✔
737

738
                                WORD *t2 = fr==0 ? t1 : &res->at(prev_coeff_idx);
246,675✔
739
                                WORD  n2 = fr==0 ? n1 : *(t2+*(t2+1)-1) / 2;
290,782✔
740
                                if (fr>0) t2+=2;
44,107✔
741

742
                                GcdLong_fix_args(BHEAD (UWORD *)t1,n1,(UWORD *)t2,n2,num,&nnum);
246,675✔
743
                                GcdLong_fix_args(BHEAD (UWORD *)t1+ABS(n1),ABS(n1),(UWORD *)t2+ABS(n2),ABS(n2),den,&nden);
246,675✔
744

745
                                // divide out gcds; note: leading zeroes can be added here
746
                                for (int i=0; i<2; i++) {
537,457✔
747
                                        if (i==1 && fr==0) break;
493,350✔
748

749
                                        WORD *t = i==0 ? t1 : t2;
290,782✔
750
                                        WORD n = i==0 ? n1 : n2;
44,107✔
751

752
                                        DivLong_fix_args((UWORD *)t, n, num, nnum, tmp, &ntmp, dum, &ndum);
290,782✔
753
                                        for (int j=0; j<ABS(ntmp); j++) *t++ = tmp[j];
581,564✔
754
                                        for (int j=0; j<ABS(n)-ABS(ntmp); j++) *t++ = 0;
290,782✔
755

756
                                        if (SGN(ntmp) != SGN(n)) n=-n;
290,782✔
757

758
                                        DivLong_fix_args((UWORD *)t, n, den, nden, tmp, &ntmp, dum, &ndum);
290,782✔
759
                                        for (int j=0; j<ABS(ntmp); j++) *t++ = tmp[j];
581,564✔
760
                                        for (int j=0; j<ABS(n)-ABS(ntmp); j++) *t++ = 0;
290,782✔
761

762
                                        *t++ = SGN(n) * (2*ABS(n)+1);
290,782✔
763
                                }
764

765
                                // add the addition operator
766
                                if (fr>0) res->push_back(OPER_ADD);
246,675✔
767

768
                                // add the power of "var"
769
                                WORD nextpow = (to==numterms ? 0 : getpower(terms[to], var, pos));
246,675✔
770

771
                                if (pow-nextpow > 0) {
246,675✔
772
                                        res->push_back(SYMBOL);
52,297✔
773
                                        res->push_back(4);
52,297✔
774
                                        res->push_back(var);
52,297✔
775
                                        res->push_back(pow-nextpow);
52,297✔
776
                                        res->push_back(OPER_MUL);
52,297✔
777
                                }
778

779
                                // add the extracted gcd
780
                                res->push_back(SNUMBER);
246,675✔
781
                                WORD n = MaX(ABS(nnum),nden);
246,675✔
782
                                res->push_back(n*2+3);
246,675✔
783
                                for (int i=0; i<ABS(nnum); i++) res->push_back(num[i]);
493,350✔
784
                                for (int i=0; i<n-ABS(nnum); i++) res->push_back(0);
246,675✔
785
                                for (int i=0; i<nden; i++) res->push_back(den[i]);
493,350✔
786
                                for (int i=0; i<n-ABS(nden); i++) res->push_back(0);
246,675✔
787
                                res->push_back(SGN(nnum)*(2*n+1));
246,675✔
788
                                res->push_back(OPER_MUL);
246,675✔
789

790
                                prev_coeff_idx = res->size() - ABS(res->at(res->size()-2)) - 3;
246,675✔
791
                        }
792
                        else if (AN.poly_vars[var]==FACTORSYMBOL) {
86✔
793

794
                                // if factorsymbol, multiply overall integer contents
795

796
                                if (fr>0) {
×
797
                                        WORD  n1 = res->at(res->size()-2) / 2;
×
798
                                        WORD *t1 = &res->at(res->size()-2-2*ABS(n1));
×
799
                                        WORD *t2 = &res->at(prev_coeff_idx);
×
800
                                        WORD  n2 = *(t2+*(t2+1)-1) / 2;
×
801
                                        t2+=2;
×
802

803
                                        MulRat(BHEAD (UWORD *)t1,n1,(UWORD *)t2,n2,tmp,&ntmp);
×
804

805
                                        // replace previous coefficient with 1
806
                                        n2=ABS(n2);
×
807
                                        for (int i=0; i<ABS(n2); i++)
×
808
                                                t2[i] = t2[n2+i] = i==0 ? 1 : 0;
×
809
                                        t2[2*n2] = 2*n2+1;
×
810

811
                                        // remove this coefficient
812
                                        for (int i=0; i<2*ABS(n1)+2; i++)
×
813
                                                res->pop_back();
×
814

815
                                        // add product
816
                                        res->back() = 2*ABS(ntmp)+3;                                   // adjust size of term
×
817
                                        res->insert(res->end(), tmp, tmp+2*ABS(ntmp)); // num/den coefficient
×
818
                                        res->push_back(SGN(ntmp)*(2*ABS(ntmp)+1));           // size of coefficient
×
819
                                        res->push_back(OPER_MUL);                                           // operator
×
820
                                }
821

822
                                prev_coeff_idx = res->size() - ABS(res->at(res->size()-2)) - 3;
×
823

824
                                // multiply previous factors with this factor
825
                                if (fr>0)
×
826
                                        res->push_back(OPER_MUL);
×
827
                        }
828
                        else { // AN.poly_vars[var]==SEPARATESYMBOL
829
                                if (fr>0)
86✔
830
                                        res->push_back(OPER_COMMA);
43✔
831
                                prev_coeff_idx = -1;
832
                        }
833

834
                        fr=to;
835
                }
836

837
                // cleanup
838
                NumberFree(dum,"build_Horner_tree");
202,611✔
839
                NumberFree(tmp,"build_Horner_tree");
202,611✔
840
                NumberFree(den,"build_Horner_tree");
202,611✔
841
                NumberFree(num,"build_Horner_tree");
202,611✔
842
        }
843
}
246,885✔
844

845
/**  Term compare (helper function for Horner_tree)
846
 *
847
 *         Description
848
 *         ===========
849
 *         Compares two terms of the form "L SYM 4 x n coeff" or "L
850
 *         coeff". Lower powers of lower-indexed symbols come first. This is
851
 *         used to sort the terms in correct order.
852
 */
853
bool term_compare (const WORD *a, const WORD *b) {
580,739✔
854
        if (*a == ABS(*(a+*a-1))+1) return true; // coefficient comes first
580,739✔
855
        if (*b == ABS(*(b+*b-1))+1) return false;
580,739✔
856
        if (a[1]!=SYMBOL) return true;
580,739✔
857
        if (b[1]!=SYMBOL) return false;
580,739✔
858
        for (int i=3; i<a[2] && i<b[2]; i+=2) {
1,211,028✔
859
                if (a[i]  !=b[i]  ) return a[i]  >b[i];
1,211,009✔
860
                if (a[i+1]!=b[i+1]) return a[i+1]<b[i+1];
715,991✔
861
        }
862
        return a[2]<b[2];
19✔
863
}
864

865
/**  Prepare Horner tree building
866
 *
867
 *         Description
868
 *         ===========
869
 *         This method renumbers the variables to 0...#vars-1 according to
870
 *         the specified order. Next, it stored pointer to individual terms
871
 *         and sorts the terms with higher power first. Then the sorted
872
 *         lists of power is used for the construction of the Horner tree.
873
 */
874
vector<WORD> Horner_tree (const WORD *expr, const vector<WORD> &order) {
124✔
875
#ifdef DEBUG
876
        MesPrint ("*** [%s, w=%w] CALL: Horner_tree(%a)", thetime_str().c_str(), order.size(), &order[0]);
877
#endif
878

879
        GETIDENTITY;
124✔
880

881
        // find the renumbering scheme (new numbers are 0,1,...,#vars-1)
882
        map<WORD,WORD> renum;
124✔
883
        AN.poly_num_vars = order.size();
124✔
884
        AN.poly_vars = (WORD *)Malloc1(AN.poly_num_vars*sizeof(WORD), "AN.poly_vars");
124✔
885
        for (int i=0; i<AN.poly_num_vars; i++) {
510✔
886
                AN.poly_vars[i] = order[i];
386✔
887
                renum[order[i]] = i;
386✔
888
        }
889

890
        // sort variables in individual terms using bubble sort
891
        WORD* sorted;
892
        WORD* dynamicAlloc = 0;
44,398✔
893
        LONG sumsize = 0;
894

895
        for (const WORD *t=expr; *t!=0; t+=*t) {
44,398✔
896
                sumsize += *t;
44,274✔
897
        }
898

899
        // We actually need sumsize + 1 WORDS available, due to the "*sorted = 0;"
900
        // at the end of the sort loop.
901
        if ( AT.WorkPointer + sumsize + 1 > AT.WorkTop ) {
124✔
902
                dynamicAlloc = (WORD*)Malloc1(sizeof(*dynamicAlloc)*(sumsize+1), "Horner_tree alloc");
6✔
903
                sorted = dynamicAlloc;
904
        }
905
        else {
906
                sorted = AT.WorkPointer;
907
        }
908

909
        for (const WORD *t=expr; *t!=0; t+=*t) {
44,398✔
910
                memcpy(sorted, t, *t*sizeof(WORD));
44,274✔
911

912
                if (*t != ABS(*(t+*t-1))+1) {
44,274✔
913
                        // non-constant term
914

915
                        // renumber variables, adding new variables if necessary
916
                        for (int i=3; i<sorted[2]; i+=2) {
196,480✔
917
                                if (!renum.count(sorted[i])) {
152,206✔
918
                                        renum[sorted[i]] = AN.poly_num_vars;
×
919

920
                                        WORD *new_poly_vars = (WORD *)Malloc1((AN.poly_num_vars+1)*sizeof(WORD), "AN.poly_vars");
×
921
                                        memcpy(new_poly_vars, AN.poly_vars, AN.poly_num_vars*sizeof(WORD));
×
922
                                        M_free(AN.poly_vars,"poly_vars");
×
923
                                        AN.poly_vars = new_poly_vars;
×
924
                                        AN.poly_vars[AN.poly_num_vars] = sorted[i];
×
925
                                        AN.poly_num_vars++;
×
926
                                }
927
                                sorted[i] = renum[sorted[i]];
152,206✔
928
                        }
929
                        // order variables
930
                        for (int i=0; i<sorted[2]/2; i++)
240,754✔
931
                                for (int j=3; j+2<sorted[2]; j+=2)
694,720✔
932
                                        if (sorted[j] > sorted[j+2]) {
498,240✔
933
                                                swap(sorted[j]        , sorted[j+2]);
97,569✔
934
                                                swap(sorted[j+1], sorted[j+3]);
97,569✔
935
                                        }
936
                }
937

938
                sorted += *sorted;
44,274✔
939
        }
940

941
        *sorted = 0;
124✔
942
        if ( dynamicAlloc ) {
124✔
943
                sorted = dynamicAlloc;
944
        }
945
        else {
946
                sorted = AT.WorkPointer;
118✔
947
        }
948

949
        // find pointers to all terms and sort them efficiently
950
        vector<const WORD *> terms;
124✔
951
        for (const WORD *t=sorted; *t!=0; t+=*t)
44,398✔
952
                terms.push_back(t);
44,274✔
953
        sort(terms.rbegin(),terms.rend(),term_compare);
124✔
954

955
        // construct the Horner tree
956
        vector<WORD> res;
124✔
957
        build_Horner_tree(&terms[0], terms.size(), 0, AN.poly_num_vars, 0, &res);
124✔
958

959
        // remove leading zeroes in coefficients
960
        int j=0;
961
        for (int i=0; i<(int)res.size();) {
775,040✔
962
                if (res[i]==OPER_ADD || res[i]==OPER_MUL || res[i]==OPER_COMMA)
774,916✔
963
                        res[j++] = res[i++];
387,396✔
964
                else if (res[i]==SYMBOL) {
387,520✔
965
                        memmove(&res[j], &res[i], res[i+1]*sizeof(WORD));
52,297✔
966
                        i+=res[j+1];
52,297✔
967
                        j+=res[j+1];
52,297✔
968
                }
969
                else if (res[i]==SNUMBER) {
335,223✔
970
                        int n = (res[i+1]-2)/2;
335,223✔
971
                        int dn = 0;
335,223✔
972
                        while (res[i+1+n-dn]==0 && res[i+1+2*n-dn]==0) dn++;
335,223✔
973
                        res[j++] = SNUMBER;
335,223✔
974
                        res[j++] = 2*(n-dn) + 3;
335,223✔
975
                        memmove(&res[j], &res[i+2], (n-dn)*sizeof(WORD));
335,223✔
976
                        j+=n-dn;
335,223✔
977
                        memmove(&res[j], &res[i+n+2], (n-dn)*sizeof(WORD));
335,223✔
978
                        j+=n-dn;
335,223✔
979
                        res[j++] = SGN(res[i+2*n+2])*(2*(n-dn)+1);
335,223✔
980
                        i+=2*n+3;
335,223✔
981
                }
982
        }
983
        res.resize(j);
124✔
984

985
        if ( dynamicAlloc ) {
124✔
986
                M_free(dynamicAlloc, "Horner_tree alloc");
6✔
987
        }
988

989
#ifdef DEBUG
990
        MesPrint ("*** [%s, w=%w] DONE: Horner_tree(%a)", thetime_str().c_str(), order.size(), &order[0]);
991
#endif
992

993
        return res;
248✔
994
}
124✔
995

996
/*
997
          #] Horner_tree : 
998
          #[ print_tree :
999
*/
1000

1001
// print Horner tree (for debugging)
1002
void print_tree (const vector<WORD> &tree) {
×
1003

1004
        GETIDENTITY;
×
1005

1006
        for (int i=0; i<(int)tree.size();) {
×
1007
                if (tree[i]==OPER_ADD) {
×
1008
                        MesPrint("+%");
×
1009
                        i++;
×
1010
                }
1011
                else if (tree[i]==OPER_MUL) {
1012
                        MesPrint("*%");
×
1013
                        i++;
×
1014
                }
1015
                else if (tree[i]==OPER_COMMA) {
1016
                        MesPrint(",%");
×
1017
                        i++;
×
1018
                }
1019
                else if (tree[i]==SNUMBER) {
1020
                        UBYTE buf[100];
×
1021
                        int n = tree[i+tree[i+1]-1]/2;
×
1022
                        PrtLong((UWORD *)&tree[i+2], n, buf);
×
1023
                        int l = strlen((char *)buf);
×
1024
                        buf[l]='/';
×
1025
                        n=ABS(n);
×
1026
                        PrtLong((UWORD *)&tree[i+2+n], n, buf+l+1);
×
1027
                        MesPrint("%s%",buf);
×
1028
                        i+=tree[i+1];
×
1029
                }
1030
                else if (tree[i]==SYMBOL) {
1031
                        if (AN.poly_vars[tree[i+2]] < 10000)
×
1032
                                MesPrint("%s^%d%", VARNAME(symbols,AN.poly_vars[tree[i+2]]), tree[i+3]);
×
1033
                        else
1034
                                MesPrint("Z%d^%d%", MAXVARIABLES-AN.poly_vars[tree[i+2]], tree[i+3]);
×
1035
                        i+=tree[i+1];
×
1036
                }
1037
                else {
1038
                        MesPrint("error");
×
1039
                        exit(1);
×
1040
                }
1041

1042
                MesPrint(" %");
×
1043
        }
1044

1045
        MesPrint("");
×
1046
}
×
1047

1048
/*
1049
          #] print_tree : 
1050
          #[ generate_instructions :
1051
*/
1052

1053

1054
struct CSEHash {
1055
        size_t operator()(const vector<WORD>& n) const {
459,219✔
1056
                return n[0];
459,219✔
1057
        }
1058
};
1059

1060
struct CSEEq {
1061
        bool operator()(const vector<WORD>& lhs, const vector<WORD>& rhs) const {
453,615✔
1062
                        if (lhs.size() != rhs.size()) return false;
453,615✔
1063
                        for (unsigned int i = 1; i < lhs.size(); i++) {
3,289,617✔
1064
                                if (lhs[i] != rhs[i]) return false;
2,836,002✔
1065
                        }
1066
                        return true;
1067
        }
1068
};
1069

1070

1071
template<typename T> size_t hash_range(T* array, int size) {
774,916✔
1072
        size_t hash = 0;
774,916✔
1073

1074
        for (int i = 0; i < size; i++) {
3,822,590✔
1075
                hash ^= array[i] + 0x9e3779b9 + (hash << 6) + (hash >> 2);
3,047,674✔
1076
        }
1077

1078
        return hash;
774,916✔
1079
}
1080

1081
/**  Generate instructions
1082
 *
1083
 *         Description
1084
 *         ===========
1085
 *         Converts the expression tree to a list of instructions that
1086
 *         directly translate to code. Instructions are of the form:
1087
 *
1088
 *                expr.nr operator length [operands]+ trailing.zero
1089
 *
1090
 *         The operands are of the form:
1091
 *
1092
 *                length [(EXTRA)SYMBOL length variable power] coeff
1093
 *
1094
 *         This method only generates binary operators. Merging is done
1095
 *         later. The method also checks for common subexpressions and
1096
 *         eliminates them and the flag "do_CSE" is set.
1097
 *
1098
 *         Implementation details
1099
 *         ======================
1100
 *         The map "ID" keeps track of which subexpressions already
1101
 *         exist. The key is formatted as one of the following:
1102
 *
1103
 *                SYMBOL x n
1104
 *                SNUMBER coeff
1105
 *                OPERATOR LHS RHS
1106
 *
1107
 *         with LHS/RHS formatted as one of the following:
1108
 *
1109
 *                SNUMBER idx 0
1110
 *                (EXTRA)SYMBOL x n
1111
 *
1112
 *         ID[symbol] or ID[operator] equals a subexpression
1113
 *         number. ID[coeff] equals the position of the number in the input.
1114
 *
1115
 *         The stack s is used the process the postfix expression
1116
 *         tree. Three-word tokens of the form:
1117
 *
1118
 *                SNUMBER idx.of.coeff 0
1119
 *                SYMBOL x n
1120
 *                EXTRASYMBOL x 1
1121
 *
1122
 *         are pushed onto it. Operators pop two operands and push the
1123
 *         resulting expression.
1124
 *
1125
 *         (Extra)symbols are 1-indexed, because -X is also needed to
1126
 *         represent -1 times this term.
1127
 *
1128
 *         There is currently a bug. The notation cannot tell if there is a single
1129
 *         bracket and then ignores the bracket.
1130
 *
1131
 *         TODO: check if this method performs properly if do_CSE=false
1132
 */
1133
vector<WORD> generate_instructions (const vector<WORD> &tree, bool do_CSE) {
45✔
1134
#ifdef DEBUG
1135
        MesPrint ("*** [%s, w=%w] CALL: generate_instructions(cse=%d)",
1136
                                                thetime_str().c_str(), do_CSE?1:0);
1137
#endif
1138

1139
        typedef unordered_map<vector<WORD>, int, CSEHash, CSEEq> csemap;
45✔
1140
        csemap ID;
45✔
1141

1142
        // reserve lots of space, to prevent later rehashes
1143
        // TODO: what if this is too large? make a parameter?
1144
        if (do_CSE) {
45✔
1145
                        ID.rehash(mcts_expr_score * 2);
6✔
1146
        }
1147

1148
        // s is a stack of operands to process when you encounter operators
1149
        // in the postfix expression tree. Operands consist of three WORDs,
1150
        // formatted as the LHS/RHS of the keys in ID.
1151
        stack<int> s;
45✔
1152
        vector<WORD> instr;
45✔
1153
        WORD numinstr = 0;
45✔
1154
        vector<WORD> x;
45✔
1155

1156
        // process the expression tree
1157
        for (int i=0; i<(int)tree.size();) {
774,036✔
1158
                x.clear();
773,991✔
1159

1160
                if (tree[i]==SNUMBER) {
773,991✔
1161
                        WORD hash = hash_range(&tree[i], tree[i + 1]);
334,827✔
1162
                        x.push_back(hash);
334,827✔
1163
                        x.push_back(SNUMBER);
334,827✔
1164
                        x.insert(x.end(),&tree[i],&tree[i]+tree[i+1]);
334,827✔
1165
                        int sign = SGN(x.back());
334,827✔
1166
                        x.back() *= sign;
334,827✔
1167

1168
                        std::pair<csemap::iterator, bool> suc = ID.insert(csemap::value_type(x, i + 1));
334,827✔
1169
                        s.push(0);
334,827✔
1170
                        s.push(sign * suc.first->second);
334,827✔
1171
                        s.push(SNUMBER);
334,827✔
1172
                        s.push(hash);
334,827✔
1173
                        i+=tree[i+1];
334,827✔
1174
                }
1175
                else if (tree[i]==SYMBOL) {
439,164✔
1176
                        WORD hash = hash_range(&tree[i], tree[i + 1]);
52,191✔
1177
                        if (tree[i+3]>1) {
52,191✔
1178
                                x.push_back(hash);
1,188✔
1179
                                x.push_back(SYMBOL);
1,188✔
1180
                                x.push_back(tree[i+2]+1); // variable (1-indexed)
1,188✔
1181
                                x.push_back(tree[i+3]);   // power
1,188✔
1182

1183
                                csemap::iterator it = ID.find(x);
1,188✔
1184
                                if (do_CSE && it != ID.end()) {
1,188✔
1185
                                        // already-seen power of a symbol
1186
                                        s.push(1);
1,122✔
1187
                                        s.push(it->second);
1,122✔
1188
                                        s.push(EXTRASYMBOL);
2,244✔
1189
                                } else {
1190
                                        //MesPrint("strange");
1191
                                        if (numinstr == MAXPOSITIVE) {
66✔
1192
                                                MesPrint((char *)"ERROR: too many temporary variables needed in optimization");
×
1193
                                                Terminate(-1);
×
1194
                                        }
1195

1196
                                        // new power greater than 1 of a symbol
1197
                                        instr.push_back(numinstr);         // expr.nr
66✔
1198
                                        instr.push_back(OPER_MUL);         // operator
66✔
1199
                                        instr.push_back(9+OPTHEAD);  // length total
66✔
1200
                                        instr.push_back(8);                  // length operand
66✔
1201
                                        instr.push_back(SYMBOL);         // SYMBOL
66✔
1202
                                        instr.push_back(4);                  // length symbol
66✔
1203
                                        instr.push_back(tree[i+2]);  // variable
66✔
1204
                                        instr.push_back(tree[i+3]);  // power
66✔
1205
                                        instr.push_back(1);                  // numerator
66✔
1206
                                        instr.push_back(1);                  // denominator
66✔
1207
                                        instr.push_back(3);                  // length coeff
66✔
1208
                                        instr.push_back(0);                  // trailing 0
66✔
1209

1210
                                        ID[x] = ++numinstr;
66✔
1211
                                        s.push(1);
66✔
1212
                                        s.push(numinstr);
66✔
1213
                                        s.push(EXTRASYMBOL);
132✔
1214
                                }
1215
                        }
1216
                        else {
1217
                                // power of 1
1218
                                s.push(tree[i+3]);         // power
51,003✔
1219
                                s.push(tree[i+2]+1); // variable (1-indexed)
51,003✔
1220
                                s.push(SYMBOL);
102,006✔
1221
                        }
1222

1223
                        s.push(hash); // push hash
52,191✔
1224
                        i+=tree[i+1];
52,191✔
1225
                }
1226
                else { // tree[i]==OPERATOR
1227
                        int oper = tree[i];
386,973✔
1228
                        i++;
386,973✔
1229

1230
                        x.push_back(0); // placeholder for hash
386,973✔
1231
                        x.push_back(oper);
386,973✔
1232
                        vector<WORD> hash;
386,973✔
1233
                        hash.push_back(oper);
386,973✔
1234

1235
                        // get two operands from the stack
1236
                        for (int operand=0; operand<2; operand++) {
1,160,919✔
1237
                                hash.push_back(s.top()); s.pop();
773,946✔
1238
                                x.push_back(s.top()); s.pop();
773,946✔
1239
                                x.push_back(s.top()); s.pop();
773,946✔
1240
                                x.push_back(s.top()); s.pop();
773,946✔
1241
                        }
1242

1243
                        x[0] = hash_range(&hash[0], 3);
386,973✔
1244

1245
                        // get rid of multiplications by +/-1
1246
                        if (oper==OPER_MUL) {
386,973✔
1247
                                bool do_continue = false;
419,247✔
1248

1249
                                for (int operand=0; operand<2; operand++) {
419,247✔
1250
                                        int idx_oper1 = operand==0 ? 2 : 5;
403,125✔
1251
                                        int idx_oper2 = operand==0 ? 5 : 2;
60,252✔
1252

1253
                                        // check whether operand 1 equals +/-1
1254
                                        if (x[idx_oper1]==SNUMBER) {
403,125✔
1255

1256
                                                int idx = ABS(x[idx_oper1+1])-1;
334,812✔
1257
                                                if (tree[idx+2]==1 && tree[idx+3]==1 && ABS(tree[idx+4])==3) {
334,812✔
1258
                                                        // push +/- other operand and continue
1259
                                                        s.push(x[idx_oper2+2]);
326,751✔
1260
                                                        s.push(x[idx_oper2+1]*SGN(x[idx_oper1+1]));
326,751✔
1261
                                                        s.push(x[idx_oper2]);
326,751✔
1262
                                                        s.push(hash[1 + (operand + 1) % 2]);
326,751✔
1263
                                                        do_continue = true;
1264
                                                        break;
1265
                                                }
1266
                                        }
1267
                                }
1268

1269
                                if (do_continue) continue;
326,751✔
1270
                        }
1271

1272
                        // check whether this subexpression has been seen before
1273
                        // if not, generate instruction to define it
1274
                        csemap::iterator it = ID.find(x);
60,222✔
1275
                        if (!do_CSE || it == ID.end()) {
60,222✔
1276
                                if (numinstr == MAXPOSITIVE) {
2,694✔
1277
                                        MesPrint((char *)"ERROR: too many temporary variables needed in optimization");
×
1278
                                        Terminate(-1);
×
1279
                                }
1280

1281
                                instr.push_back(numinstr); // expr.nr.
2,694✔
1282
                                instr.push_back(x[1]);           // operator
2,694✔
1283
                                instr.push_back(3);            // length
2,694✔
1284

1285
                                ID[x] = ++numinstr;
2,694✔
1286

1287
                                int lenidx = instr.size()-1;
2,694✔
1288

1289
                                for (int j=0; j<2; j++)
8,082✔
1290
                                        if (x[3*j+2]==SYMBOL || x[3*j+2]==EXTRASYMBOL) {
5,388✔
1291
                                                instr.push_back(8);                                            // length total
4,656✔
1292
                                                instr.push_back(x[3*j+2]);                                   // (extra)symbol
4,656✔
1293
                                                instr.push_back(4);                                            // length (extra)symbol
4,656✔
1294
                                                instr.push_back(ABS(x[3*j+3])-1);                   // variable (0-indexed)
4,656✔
1295
                                                instr.push_back(x[3*j+4]);                                   // power
4,656✔
1296
                                                instr.push_back(1);                                            // numerator
4,656✔
1297
                                                instr.push_back(1);                                            // denominator
4,656✔
1298
                                                instr.push_back(3*SGN(x[3*j+3]));                   // length coeff
4,656✔
1299
                                                instr[lenidx] += 8;
4,656✔
1300
                                        }
1301
                                        else { // x[3*j+1]==SNUMBER
1302
                                                int t = ABS(x[3*j+3])-1;
732✔
1303
                                                instr.push_back(tree[t+1]-1);                                                           // length number
732✔
1304
                                                instr.insert(instr.end(), &tree[t+2], &tree[t]+tree[t+1]); // digits
732✔
1305
                                                instr.back() *= SGN(instr.back()) * SGN(x[3*j+3]);
732✔
1306
                                                instr[lenidx] += tree[t+1]-1;
732✔
1307
                                        }
1308

1309
                                instr.push_back(0); // trailing 0
2,694✔
1310
                                instr[lenidx]++;
2,694✔
1311
                        }
1312

1313
                        // push new expression on the stack
1314
                        s.push(1);
60,222✔
1315
                        s.push(ID[x]);
60,222✔
1316
                        s.push(EXTRASYMBOL);
60,222✔
1317
                        s.push(x[0]); // push hash
60,222✔
1318
                }
386,973✔
1319
        }
1320

1321
#ifdef DEBUG
1322
        MesPrint ("*** [%s, w=%w] DONE: generate_instructions(cse=%d) : numoper=%d",
1323
                                                thetime_str().c_str(), do_CSE?1:0, count_operators(instr));
1324
#endif
1325

1326
        return instr;
90✔
1327
}
45✔
1328

1329
/*
1330
          #] generate_instructions : 
1331
          #[ count_operators_cse :
1332
*/
1333

1334

1335
/**
1336
* Count number of operators in a binary tree, while removing CSEs on the fly.
1337
* The instruction set is not created, which makes this method slightly faster.
1338
*
1339
* A hash is created on the fly and is passed through the stack.
1340
* TODO: find better hash functions
1341
*/
1342
int count_operators_cse (const vector<WORD> &tree) {
×
1343
        //MesPrint ("*** [%s] Starting CSEE", thetime_str().c_str());
1344

1345
        typedef unordered_map<vector<WORD>, int, CSEHash, CSEEq> csemap;
×
1346
        csemap ID;
×
1347

1348
        // reserve lots of space, to prevent later rehashes
1349
        // TODO: what if this is too large? make a parameter?
1350
        ID.rehash(mcts_expr_score * 2);
×
1351

1352
        // s is a stack of operands to process when you encounter operators
1353
        // in the postfix expression tree. Operands consist of three WORDs,
1354
        // formatted as the LHS/RHS of the keys in ID.
1355
        stack<int> s;
×
1356
        int numinstr = 0, numcommas = 0;
×
1357
        vector<WORD> x;
×
1358

1359
        // process the expression tree
1360
        for (int i=0; i<(int)tree.size();) {
×
1361
                x.clear();
×
1362

1363
                if (tree[i]==SNUMBER) {
×
1364
                        WORD hash = hash_range(&tree[i], tree[i + 1]);
×
1365
                        x.push_back(hash);
×
1366
                        x.push_back(SNUMBER);
×
1367
                        x.insert(x.end(),&tree[i],&tree[i]+tree[i+1]);
×
1368
                        int sign = SGN(x.back());
×
1369
                        x.back() *= sign;
×
1370

1371
                        std::pair<csemap::iterator, bool> suc = ID.insert(csemap::value_type(x, i + 1));
×
1372
                        s.push(0);
×
1373
                        s.push(sign * suc.first->second);
×
1374
                        s.push(SNUMBER);
×
1375
                        s.push(hash);
×
1376
                        i+=tree[i+1];
×
1377
                }
1378
                else if (tree[i]==SYMBOL) {
×
1379
                        WORD hash = hash_range(&tree[i], tree[i + 1]);
×
1380
                        if (tree[i+3]>1) {
×
1381
                                x.push_back(hash);
×
1382
                                x.push_back(SYMBOL);
×
1383
                                x.push_back(tree[i+2]+1); // variable (1-indexed)
×
1384
                                x.push_back(tree[i+3]);   // power
×
1385

1386
                                csemap::iterator it = ID.find(x);
×
1387
                                if (it != ID.end()) {
×
1388
                                        // already-seen power of a symbol
1389
                                        s.push(1);
×
1390
                                        s.push(it->second);
×
1391
                                        s.push(EXTRASYMBOL);
×
1392
                                } else {
1393
                                        if (tree[i + 3] == 2)
×
1394
                                                numinstr++;
×
1395
                                        else
1396
                                                numinstr += (int)floor(log(tree[i+3])/log(2.0)) + popcount(tree[i+3]) - 1;
×
1397

1398
                                        ID[x] = numinstr;
×
1399
                                        s.push(1);
×
1400
                                        s.push(numinstr);
×
1401
                                        s.push(EXTRASYMBOL);
×
1402
                                }
1403
                        }
1404
                        else {
1405
                                // power of 1
1406
                                s.push(tree[i+3]);         // power
×
1407
                                s.push(tree[i+2]+1); // variable (1-indexed)
×
1408
                                s.push(SYMBOL);
×
1409
                        }
1410

1411
                        s.push(hash); // push hash
×
1412

1413
                        i+=tree[i+1];
×
1414
                }
1415
                else { // tree[i]==OPERATOR
1416
                        int oper = tree[i];
×
1417
                        i++;
×
1418

1419
                        x.push_back(0); // placeholder for hash
×
1420
                        x.push_back(oper);
×
1421
                        vector<WORD> hash;
×
1422
                        hash.push_back(oper);
×
1423

1424
                        // get two operands from the stack
1425
                        for (int operand=0; operand<2; operand++) {
×
1426
                                hash.push_back(s.top()); s.pop();
×
1427
                                x.push_back(s.top()); s.pop();
×
1428
                                x.push_back(s.top()); s.pop();
×
1429
                                x.push_back(s.top()); s.pop();
×
1430
                        }
1431

1432

1433
                        x[0] = hash_range(&hash[0], 3);
×
1434

1435
                        // get rid of multiplications by +/-1
1436
                        if (oper==OPER_MUL) {
×
1437
                                bool do_continue = false;
×
1438

1439
                                for (int operand=0; operand<2; operand++) {
×
1440
                                        int idx_oper1 = operand==0 ? 2 : 5;
×
1441
                                        int idx_oper2 = operand==0 ? 5 : 2;
×
1442

1443
                                        // check whether operand 1 equals +/-1
1444
                                        if (x[idx_oper1]==SNUMBER) {
×
1445

1446
                                                int idx = ABS(x[idx_oper1+1])-1;
×
1447
                                                if (tree[idx+2]==1 && tree[idx+3]==1 && ABS(tree[idx+4])==3) {
×
1448
                                                        // push +/- other operand and continue
1449
                                                        s.push(x[idx_oper2+2]);
×
1450
                                                        s.push(x[idx_oper2+1]*SGN(x[idx_oper1+1]));
×
1451
                                                        s.push(x[idx_oper2]);
×
1452
                                                        s.push(hash[1 + (operand + 1) % 2]);
×
1453
                                                        do_continue = true;
1454
                                                        break;
1455
                                                }
1456
                                        }
1457
                                }
1458

1459
                                if (do_continue) continue;
×
1460
                        }
1461

1462
                        // check whether this subexpression has been seen before
1463
                        // if not, generate instruction to define it
1464
                        csemap::iterator it = ID.find(x);
×
1465
                        if (it == ID.end()) {
×
1466
                                if (numinstr == MAXPOSITIVE) {
×
1467
                                        MesPrint((char *)"ERROR: too many temporary variables needed in optimization");
×
1468
                                        Terminate(-1);
×
1469
                                }
1470

1471
                                if (oper == OPER_COMMA) numcommas++;
×
1472
                                ID[x] = ++numinstr;
×
1473
                                s.push(1);
×
1474
                                s.push(numinstr);
×
1475
                                s.push(EXTRASYMBOL);
×
1476
                        } else {
1477
                                // push new expression on the stack
1478
                                s.push(1);
×
1479
                                s.push(it->second);
×
1480
                                s.push(EXTRASYMBOL);
×
1481
                        }
1482

1483
                        s.push(x[0]); // push hash
×
1484
                }
×
1485
        }
1486

1487
        //MesPrint ("*** [%s] Stopping CSEE", thetime_str().c_str());
1488
        return numinstr - numcommas;
×
1489
}
×
1490

1491
/*
1492
          #] count_operators_cse : 
1493
          #[ count_operators_cse_topdown :
1494
*/
1495

1496
typedef struct node {
1497
        const WORD* data;
1498
        struct node* l;
1499
        struct node* r; // TODO: add l,r to data?
1500
        WORD sign; // TODO: use data for this?
1501
        UWORD hash;
1502

1503
        node() : l(NULL), r(NULL), sign(1), hash(0) {};
1504
        node(const WORD* data) : data(data), l(NULL), r(NULL), sign(1), hash(0) {};
925✔
1505

1506
        // a minus sign in the tree should only count as a different entry if
1507
        // it is a compound expression: a = -a, but T+-V != T+V
1508
        int cmp(const struct node* rhs) const {
×
1509
                if (this == rhs) return 0;
×
1510

1511
                if (data[0] != rhs->data[0]) return data[0] < rhs->data[0] ? -1 : 1;
×
1512
                int mod = data[0] == SNUMBER ? -1 : 0; // don't check sign, for numbers
×
1513
                if (data[0] == SYMBOL || data[0] == SNUMBER) {
×
1514
                        for (int i = 0; i < data[1] + mod; i++) {
×
1515
                                if (data[i] != rhs->data[i]) return data[i] < rhs->data[i] ? -1 : 1;
×
1516
                                }
1517
                } else {
1518
                        int lv = l->cmp(rhs->l);
×
1519
                        if (lv != 0) return lv;
×
1520
                        int rv = r->cmp(rhs->r);
×
1521
                        if (rv != 0) return rv;
×
1522

1523
                        // TODO: only for ADD operation
1524
                        if (l->sign != rhs->l->sign) return l->sign < rhs->l->sign ? -1 : 1;
×
1525
                        if (r->sign != rhs->r->sign) return r->sign < rhs->r->sign ? -1 : 1;
×
1526
                }
1527

1528
                return 0;
1529
        }
1530

1531
                // less than operator
1532
        bool operator() (const struct node* lhs, const struct node* rhs) const
1533
        {
1534
                return lhs->cmp(rhs) < 0;
1535
        }
1536

1537
        void calcHash() {
925✔
1538
                int mod = data[0] == SNUMBER ? -1 : 0; // don't check sign, for numbers
925✔
1539
                if (data[0] == SYMBOL || data[0] == SNUMBER) {
925✔
1540
                        hash = hash_range(data, data[1] + mod);
769✔
1541
                } else {
1542
                        if (l->hash == 0) l->calcHash();
156✔
1543
                        if (r->hash == 0) r->calcHash();
156✔
1544

1545
                        // signs only matter for compound expressions
1546
                        size_t newr[] = {(size_t)data[0], l->hash, (size_t)l->sign, r->hash, (size_t)r->sign};
156✔
1547
                        hash = hash_range(newr, 5);
156✔
1548
                }
1549
        }
925✔
1550
} NODE;
1551

1552
struct NodeHash {
1553
        size_t operator()(const NODE* n) const {
95✔
1554
                return n->hash; // already computed
95✔
1555
        }
1556
};
1557

1558
struct NodeEq {
1559
        bool operator()(const NODE* lhs, const NODE* rhs) const {
×
1560
                return lhs->cmp(rhs) == 0;
×
1561
        }
1562
};
1563

1564
NODE* buildTree(vector<WORD> &tree) {
79✔
1565
        //MesPrint ("*** [%s] Starting CSEE topdown", thetime_str().c_str());
1566

1567
        // allocate spaces for the tree, cannot be more nodes than tree size
1568
        NODE* ar = (NODE*)Malloc1(tree.size() * sizeof(NODE), "CSE tree");
79✔
1569
        NODE* c = 0;
79✔
1570
        unsigned int curIndex = 0;
79✔
1571

1572
        stack<NODE*> st;
79✔
1573
        for (int i=0; i<(int)tree.size();) {
1,004✔
1574
                c = ar + curIndex;
925✔
1575
                new (c) NODE(&tree[i]); // placement new
925✔
1576
                curIndex++;
925✔
1577

1578
                if (tree[i]==SYMBOL || tree[i] == SNUMBER) {
925✔
1579
                        // extract the sign to a new class member
1580
                        if (tree[i] == SNUMBER) {
502✔
1581
                                c->sign = SGN(tree[i + tree[i + 1] -1]);
396✔
1582
                        }
1583

1584
                        c->calcHash();
502✔
1585
                        st.push(c);
502✔
1586
                        i+=tree[i+1];
502✔
1587
                } else {
1588
                        c->r = st.top(); st.pop();
423✔
1589
                        c->l = st.top(); st.pop();
423✔
1590

1591
                        // filter *1 and *-1
1592
                        // TODO: also multiply if there are two numbers?
1593
                        if (c->data[0] == OPER_MUL) {
423✔
1594
                                NODE* ch[] = {c->r, c->l};
373✔
1595
                                for (int j = 0; j < 2; j++)
540✔
1596
                                        if (ch[j]->data[0] == SNUMBER && ch[j]->data[1] == 5 && ch[j]->data[2]==1 && ch[j]->data[3]==1) {
495✔
1597
                                                        ch[(j+1)%2]->sign *= ch[j]->sign; // transfer sign
328✔
1598
                                                        c = ch[(j+1)%2];
328✔
1599
                                                        break;
328✔
1600
                                        }
1601
                        }
1602

1603
                        c->calcHash();
423✔
1604
                        st.push(c);
423✔
1605
                        i++;
423✔
1606
                }
1607
        }
1608

1609
        // TODO: reallocate to smaller size? Could save memory
1610
        //MesPrint("Memory difference: %d vs %d", curIndex, tree.size());
1611

1612
        // we want to make the root of the tree the first element
1613
        // so that we can easily free the array.
1614
        // we swap the first element with the root
1615
        // we need to change the pointer in the operator node that has this element as a child
1616
        // TODO: check performance
1617
        for (unsigned int i = 0; i < curIndex; i++) {
1,004✔
1618
                if (ar[i].l == ar) ar[i].l = st.top();
925✔
1619
                if (ar[i].r == ar) ar[i].r = st.top();
925✔
1620
        }
1621

1622
        swap(ar[0], *st.top());        
79✔
1623
        return ar;
79✔
1624
}
79✔
1625

1626
int count_operators_cse_topdown (vector<WORD> &tree) {
79✔
1627
        typedef unordered_set<NODE*, NodeHash, NodeEq> nodeset;
79✔
1628
        nodeset ID;
79✔
1629

1630
        // reserve lots of space, to prevent later rehashes
1631
        // TODO: what if this is too large? make a parameter?
1632
        ID.rehash(mcts_expr_score * 2);
79✔
1633

1634
        int numinstr = 0;
79✔
1635

1636
        NODE* root = buildTree(tree);
79✔
1637

1638
        stack<NODE*> stack;
79✔
1639
        stack.push(root);
79✔
1640
        while (!stack.empty())
348✔
1641
        {
1642
                NODE* c = stack.top();
269✔
1643
                stack.pop();
269✔
1644

1645
                if (c->data[0] == SYMBOL) {
269✔
1646
                        if (c->data[3] > 1) {
106✔
1647
                                std::pair<nodeset::iterator, bool> suc = ID.insert(c);
×
1648
                                if (suc.second) { // new
×
1649
                                        if (c->data[3] == 2)
×
1650
                                                numinstr++;
×
1651
                                        else
1652
                                                numinstr += (int)floor(log(c->data[3])/log(2.0)) + popcount(c->data[3]) - 1;
×
1653
                                }
1654
                        }
1655
                } else {
1656
                        if (c->data[0] != SNUMBER) {
163✔
1657
                                // operator
1658
                                std::pair<nodeset::iterator, bool> suc = ID.insert(c);
95✔
1659
                                if (suc.second) {
95✔
1660
                                        stack.push(c->r);
95✔
1661
                                        stack.push(c->l);
95✔
1662

1663
                                        // ignore OPER_COMMA
1664
                                        if (c->data[0] == OPER_MUL || c->data[0] == OPER_ADD)
95✔
1665
                                                numinstr++;
61✔
1666
                                }
1667
                        }
1668
                }
1669
        }
1670

1671
        //MesPrint ("*** [%s] Stopping CSEE", thetime_str().c_str());
1672
        M_free(root, "CSE tree");
79✔
1673

1674
        return numinstr;
79✔
1675
}
79✔
1676

1677
/*
1678
          #] count_operators_cse_topdown : 
1679
          #[ simulated_annealing :
1680
*/
1681
vector<WORD> simulated_annealing() {
×
1682
        float minT = AO.Optimize.saMinT.fval;
×
1683
        float maxT = AO.Optimize.saMaxT.fval;
×
1684
        float T = maxT;
×
1685
        float coolrate = pow(minT / maxT, 1 / (float)AO.Optimize.saIter);
×
1686

1687
        GETIDENTITY;
×
1688

1689
        // create a valid state where FACTORSYMBOL/SEPARATESYMBOL remains first
1690
        vector<WORD> state = occurrence_order(optimize_expr, false);
×
1691
        int startindex = 0;
×
1692
        if ((state.size() > 0 && state[0] == SEPARATESYMBOL) || (state.size() > 1 && state[1] == FACTORSYMBOL)) startindex++;
×
1693
        if (state.size() > 1 && state[1] == FACTORSYMBOL) startindex++;
×
1694

1695
        my_random_shuffle(BHEAD state.begin() + startindex, state.end()); // start from random scheme
×
1696

1697
        vector<WORD> tree = Horner_tree(optimize_expr, state);
×
1698
        int curscore = count_operators_cse_topdown(tree);
×
1699

1700
        // clean poly_vars, that are allocated by Horner_tree
1701
        AN.poly_num_vars = 0;
×
1702
        M_free(AN.poly_vars,"poly_vars");
×
1703

1704
        std::vector<WORD> best = state; // best state
×
1705
        int bestscore = curscore;
×
1706

1707
        if (startindex == (int)state.size() || (int)state.size() - startindex < 2) {
×
1708
                return best;
1709
        }
1710
        
1711
        for (int o = 0; o < AO.Optimize.saIter; o++) {
×
1712
                int inda = iranf(BHEAD state.size() - startindex) + startindex;
×
1713
                int indb = iranf(BHEAD state.size() - startindex) + startindex;
×
1714

1715
                if (inda == indb) {
×
1716
                        continue;
×
1717
                }
1718

1719
                swap(state[inda], state[indb]); // swap works best for Horner
×
1720

1721
                vector<WORD> tree = Horner_tree(optimize_expr, state);
×
1722
                int newscore = count_operators_cse_topdown(tree);
×
1723

1724
                // clean poly_vars, that are allocated by Horner_tree
1725
                AN.poly_num_vars = 0;
×
1726
                M_free(AN.poly_vars,"poly_vars");
×
1727

1728
                if (newscore <= curscore || 2.0 * wranf(BHEAD0) / (float)(UWORD)(-1) < exp((curscore - newscore) / T)) {
×
1729
                        curscore = newscore;
×
1730

1731
                        if (curscore < bestscore) {
×
1732
                                bestscore = curscore;
×
1733
                                best = state;
×
1734
                        }
1735
                } else {
1736
                        swap(state[inda], state[indb]);
×
1737
                }
1738

1739
#ifdef DEBUG_SA
1740
        MesPrint("Score at step %d: %d", o, curscore);
1741
#endif
1742
                T *= coolrate;
×
1743
        }
×
1744

1745
#ifdef DEBUG_SA
1746
        MesPrint("Simulated annealing score: %d", bestscore);
1747
#endif
1748

1749
        return best;
1750
}
×
1751

1752
/*
1753
          #] simulated_annealing : 
1754
          #[ printpstree :
1755
*/
1756

1757
/*
1758
// print MCTS tree with LaTeX/pstricks (for analysis)
1759
void printpstree_rec (tree_node x, string pre="") {
1760

1761
        if (x.num_visits==1) {
1762
                MesPrint("%s\\TR{%d}",pre.c_str(),x.var);
1763
        }
1764
        else {
1765
                MesPrint("%s\\pstree%s{\\TR{%d}}{",pre.c_str(),
1766
                                                 pre=="  "?"[nodesep=0, levelsep=40]":"",
1767
                                                 x.var);
1768
                for (int i=0; i<(int)x.childs.size(); i++)
1769
                        if (x.childs[i].num_visits>0)
1770
                                printpstree_rec(x.childs[i], pre+"        ");
1771
                MesPrint("%s}",pre.c_str());
1772
        }
1773
}
1774

1775
void printpstree () {
1776
        // draw tree with pstricks
1777
        MesPrint ("\\documentclass{article}");
1778
        MesPrint ("\\usepackage{pstricks,pst-node,pst-tree,graphicx}");
1779
        MesPrint ("\\begin{document}");
1780
        MesPrint ("\\scalebox{0.02}{");
1781
        printpstree_rec(mcts_root,"  ");
1782
        MesPrint ("}");
1783
        MesPrint ("\\end{document}");
1784
}
1785
*/
1786

1787
/*
1788
          #] printpstree : 
1789
          #[ find_Horner_MCTS_expand_tree :
1790
*/
1791

1792
/**  Expand MCTS tree
1793
 *
1794
 *         Description
1795
 *         ===========
1796
 *         This method does one MCTS step: it selects the most-promising
1797
 *         node, expands it, randomly completes the Horner scheme and
1798
 *         backpropagates the results.
1799
 *
1800
 *         Selection is done according to the UCT formula:
1801
 *
1802
 *                UCT(i) = <x(i)> + C * sqrt(2*log(N)/n(i)),
1803
 *
1804
 *         where <x(i)> is the average result of child i, n(i) is the number
1805
 *         of time child i is visited, N=SUM(n(i)) and C is a constant to be
1806
 *         determined experimentally (can be set via mctsconstant).
1807
 *
1808
 *         A "virtual loss" is added once a node is selected. This is
1809
 *         relevant to avoid duplicate work in the parallel version.
1810
 *
1811
 *         Notes
1812
 *         =====
1813
 *         - The method is called from "find_Horner_MCTS" in Form and from
1814
 *           "RunThread" via "find_Horner_MCTS_expand_tree_threaded" in
1815
 *           TForm.
1816
 *         - The code is divided into three functions: "next_MCTS_scheme",
1817
 *           "try_MCTS_scheme" and "update_MCTS_scheme". In this way, the
1818
 *           source code is shared with ParForm; "try_MCTS_scheme" is
1819
 *           assumed to run on workers, while the others are assumed to run
1820
 *           on the master.
1821
 */
1822

1823
/*
1824
                 #[ next_MCTS_scheme :
1825
*/
1826

1827
// find a Horner scheme to be used for the next simulation
1828
inline static void next_MCTS_scheme (PHEAD vector<WORD> *porder, vector<WORD> *pscheme, vector<tree_node *> *ppath) {
79✔
1829

1830
        vector<WORD> &order = *porder;
79✔
1831
        vector<WORD> &schemev = *pscheme;
79✔
1832
        vector<tree_node *> &path = *ppath;
79✔
1833
        int depth = 0, nchild0;
79✔
1834
        float slide_down_factor = 1.0;
79✔
1835

1836
        order.clear();
79✔
1837
        path.clear();
79✔
1838

1839
        // MCTS step I: select
1840
        tree_node *select = &mcts_root;
79✔
1841
        path.push_back(select);
79✔
1842
        nchild0 = select->childs.size();
79✔
1843
        while (select->childs.size() > 0) {
138✔
1844
                // add virtual loss
1845
                select->num_visits++;
86✔
1846
                select->sum_results+=mcts_expr_score;
86✔
1847

1848
//-------------------------------------------------------------------
1849
                                switch ( AO.Optimize.mctsdecaymode ) {
86✔
1850
                                        case 1:  // Based on http://arxiv.org/abs/arXiv:1312.0841
86✔
1851
                                                slide_down_factor = 1.0-(1.0*AT.optimtimes)/(1.0*AO.Optimize.mctsnumexpand);
86✔
1852
                                                break;
86✔
1853
                                        case 2:  // This gives a bit more cleanup time at the end.
×
1854
                                                if ( 2*AT.optimtimes < AO.Optimize.mctsnumexpand ) {
×
1855
                                                        slide_down_factor = 1.0*(AO.Optimize.mctsnumexpand-2*AT.optimtimes);
×
1856
                                                        slide_down_factor /= 1.0*AO.Optimize.mctsnumexpand;
×
1857
                                                }
1858
                                                else {
1859
                                                        slide_down_factor = 0.0001;
1860
                                                }
1861
                                                break;
1862
                                        case 3:  // depth dependent factor combined with case 1
×
1863
                                                float dd = 1.0-(1.0*depth)/(1.0*nchild0);
×
1864
                                                slide_down_factor = 1.0-(1.0*AT.optimtimes)/(1.0*AO.Optimize.mctsnumexpand);
×
1865
                                                if ( dd <= 0.000001 ) slide_down_factor = 1.0;
×
1866
                                                else slide_down_factor /= dd;
×
1867
                                                if ( slide_down_factor > 1.0 ) slide_down_factor = 1.0;
×
1868
                                                break;
1869
                                }
1870
//-------------------------------------------------------------------
1871

1872
#ifdef DEBUG_MCTS
1873
                MesPrint("select %d",select->var);
1874
#endif
1875

1876
                // find most-promising node
1877
                double best=0;
86✔
1878
                tree_node *next=NULL;
86✔
1879
                for (vector<tree_node>::iterator p=select->childs.begin(); p<select->childs.end(); p++) {
323✔
1880
                        double score;
237✔
1881
                        if (p->num_visits >= 1) {
237✔
1882

1883
                                // there are results calculated, so select with the UCT formula
1884
                                score = mcts_expr_score / (p->sum_results/p->num_visits) +
179✔
1885
//-------------------------------------------------------------------------
1886
                                        slide_down_factor *
179✔
1887
//-------------------------------------------------------------------------
1888
                                        2 * AO.Optimize.mctsconstant.fval * sqrt(2*log(select->num_visits) / p->num_visits);
179✔
1889

1890
#ifdef DEBUG_MCTS
1891
                                printf("%d: %.2lf [x=%.2lf n=%d fin=%i]\n",p->var,score,mcts_expr_score / (p->sum_results/p->num_visits),
1892
                                                         p->num_visits,p->finished?1:0);
1893
                                fflush(stdout);
1894
#endif
1895
                        }
1896
                        else {
1897
                // no results yet, so select this node by setting score=infinite
1898
                                score = 1e100;
1899

1900
#ifdef DEBUG_MCTS
1901
                                printf("%d: inf\n",p->var); fflush(stdout);
1902
#endif
1903
                        }
1904

1905
                        // update best candidate
1906
                        if (!p->finished && score>best) {
237✔
1907
                                best=score;
79✔
1908
                                next=&*p;
79✔
1909
                        }
1910
                }
1911

1912
                // if no node is found, this node is finished
1913
                if (next==NULL) {
86✔
1914
                        select->finished=true;
27✔
1915
                        break;
27✔
1916
                }
1917

1918
                // traverse down the tree
1919
                select = next;
59✔
1920
                path.push_back(select);
59✔
1921
                order.push_back(select->var);
59✔
1922
                depth++;
59✔
1923
        }
1924

1925
        // MCTS step II: expand
1926

1927
#ifdef DEBUG_MCTS
1928
        MesPrint("expand %d",select->var);
1929
#endif
1930

1931
        // variables used so far
1932
        set<WORD> var_used;
79✔
1933

1934
        for (int i=0; i<(int)order.size(); i++)
138✔
1935
                var_used.insert(ABS(order[i])-1);
59✔
1936

1937
        // if this a new node, create node and add children
1938
        if (!select->finished && select->childs.size()==0) {
79✔
1939
                tree_node new_node(select->var);
39✔
1940
                int sign = (order.size() == 0) ? 1 : SGN(order.back());
39✔
1941
                for (int i=0; i<(int)mcts_vars.size(); i++)
98✔
1942
                        if (!var_used.count(mcts_vars[i])) {
59✔
1943
                                new_node.childs.push_back(tree_node(sign*(mcts_vars[i]+1)));
13✔
1944
                                if (AO.Optimize.hornerdirection==O_FORWARDANDBACKWARD)
13✔
1945
                                        new_node.childs.push_back(tree_node(-sign*(mcts_vars[i]+1)));
×
1946
                        }
1947
                my_random_shuffle(BHEAD new_node.childs.begin(), new_node.childs.end());
39✔
1948

1949
                // here locking is necessary, since operator=(tree_node) is a
1950
                // non-atomic operation (using pointers makes this lock obsolete)
1951
                LOCK(optimize_lock);
39✔
1952
                *select = new_node;
39✔
1953
                UNLOCK(optimize_lock);
39✔
1954
        }
39✔
1955
        // set finished if necessary
1956
        if (select->childs.size()==0)
79✔
1957
                select->finished = true;
39✔
1958

1959
        // add virtual loss of number of operators in original expression
1960
        select->num_visits++;
79✔
1961
        select->sum_results+=mcts_expr_score;
79✔
1962

1963
        // MCTS step III: simulation
1964

1965
        // create complete Horner scheme
1966
        deque<WORD> scheme;
158✔
1967

1968
        for (int i=0; i<(int)mcts_vars.size(); i++)
185✔
1969
                if (!var_used.count(mcts_vars[i]))
106✔
1970
                        scheme.push_back(mcts_vars[i]);
47✔
1971
        my_random_shuffle(BHEAD scheme.begin(), scheme.end());
79✔
1972

1973
        for (int i=(int)order.size()-1; i>=0; i--) {
138✔
1974
                if (order[i] > 0)
59✔
1975
                        scheme.push_front(order[i]-1);
30✔
1976
                else
1977
                        scheme.push_back(-order[i]-1);
29✔
1978
        }
1979

1980
        // add FACTORSYMBOL/SEPARATESYMBOL is necessary
1981
        if (mcts_factorized)
79✔
1982
                scheme.push_front(FACTORSYMBOL);
×
1983
        if (mcts_separated)
79✔
1984
                scheme.push_front(SEPARATESYMBOL);
34✔
1985

1986
        // Horner scheme as a vector
1987
        schemev = vector<WORD>(scheme.begin(), scheme.end());
158✔
1988

1989
}
79✔
1990

1991
/*
1992
                 #] next_MCTS_scheme : 
1993
                 #[ try_MCTS_scheme :
1994
*/
1995

1996
// count the number of operators in the given Horner scheme
1997
inline static void try_MCTS_scheme (PHEAD const vector<WORD> &scheme, int *pnum_oper) {
79✔
1998

1999
        // do Horner, CSE and count the number of operators
2000
        vector<WORD> tree = Horner_tree(optimize_expr, scheme);
79✔
2001
        //vector<WORD> instr = generate_instructions(tree, true);
2002
        //int num_oper = count_operators(instr);
2003
        //int num_oper2 = count_operators_cse(tree);
2004
        //int num_oper2 = count_operators_cse_topdown(tree);
2005
        //MesPrint("%d %d", num_oper, num_oper2);
2006

2007
        int num_oper = count_operators_cse_topdown(tree);
79✔
2008

2009
        // clean poly_vars, that is allocated by Horner_tree
2010
        AN.poly_num_vars = 0;
79✔
2011
        M_free(AN.poly_vars,"poly_vars");
79✔
2012

2013
        *pnum_oper = num_oper;
79✔
2014

2015
}
79✔
2016

2017
/*
2018
                 #] try_MCTS_scheme : 
2019
                 #[ update_MCTS_scheme :
2020
*/
2021

2022
// update the best score and the search tree
2023
inline static void update_MCTS_scheme (int num_oper, const vector<WORD> &scheme, vector<tree_node *> *ppath) {
79✔
2024

2025
        vector<tree_node *> &path = *ppath;
79✔
2026

2027
        // update the (global) list of best Horner scheme
2028
        if ((int)mcts_best_schemes.size() < AO.Optimize.mctsnumkeep ||
79✔
2029
                        (--mcts_best_schemes.end())->first > num_oper) {
×
2030
                // here locking is necessary, for otherwise best_schemes may
2031
                // become corrupted; lock can be prevented if each thread keeps
2032
                // track of it's own list and those lists are merged in the end,
2033
                // but this seems not useful to implement
2034
                LOCK(optimize_lock);
79✔
2035
                mcts_best_schemes.insert(make_pair(num_oper,scheme));
79✔
2036
                if ((int)mcts_best_schemes.size() > AO.Optimize.mctsnumkeep)
79✔
2037
                        mcts_best_schemes.erase(--mcts_best_schemes.end());
×
2038
                UNLOCK(optimize_lock);
61✔
2039
        }
2040

2041
        // MCTS step IV: backpropagate
2042

2043
        // add number of operator and subtract mcts_expr_score, which
2044
        // behaves as a "virtual loss"
2045
        for (vector<tree_node *>::iterator p=path.begin(); p<path.end(); p++)
217✔
2046
                (*p)->sum_results += num_oper - mcts_expr_score;
138✔
2047

2048
}
79✔
2049

2050
/*
2051
                 #] update_MCTS_scheme : 
2052
*/
2053

2054
void find_Horner_MCTS_expand_tree () {
79✔
2055

2056
#ifdef DEBUG
2057
        MesPrint ("*** [%s, w=%w] CALL: find_Horner_MCTS_expand_tree", thetime_str().c_str());
2058
#endif
2059

2060
        GETIDENTITY;
79✔
2061

2062
        // the order for the Horner scheme up to the selected node, with signs
2063
        // indicating forward or backward.
2064
        vector<WORD> order;
79✔
2065

2066
        // complete Horner scheme
2067
        vector<WORD> scheme;
79✔
2068

2069
        // path to the selected node
2070
        vector<tree_node *> path;
79✔
2071

2072
        // the number of operations obtained by the simulation
2073
        int num_oper;
79✔
2074

2075
        next_MCTS_scheme(BHEAD &order, &scheme, &path);
79✔
2076
        try_MCTS_scheme(BHEAD scheme, &num_oper);
79✔
2077
#ifdef DEBUG_MCTS
2078
        // Actually "order" is needed only for this debug output.
2079
        MesPrint ("{%a} -> {%a} -> %d", order.size(), &order[0], scheme.size(), &scheme[0], num_oper);
2080
#endif
2081
        update_MCTS_scheme(num_oper, scheme, &path);
79✔
2082

2083
#ifdef DEBUG
2084
        MesPrint ("*** [%s, w=%w] DONE: find_Horner_MCTS_expand_tree(%a-> %d)",
2085
                                                thetime_str().c_str(), scheme.size(), &scheme[0], num_oper);
2086
#endif
2087

2088
}
79✔
2089

2090
/*
2091
          #] find_Horner_MCTS_expand_tree : 
2092
          #[ PF_find_Horner_MCTS_expand_tree :
2093
*/
2094
#ifdef WITHMPI
2095

2096
// To remember which task is assigned to each slave. A task is represented by
2097
// a pair of a Horner scheme and the selected path for the scheme.
2098
// The index range is from 1 to PF.numtasks-1.
2099
vector<pair<vector<WORD>, vector<tree_node *> > > PF_opt_MCTS_tasks;
2100

2101
// Initialization.
2102
void PF_find_Horner_MCTS_expand_tree_master_init () {
2103

2104
        PF_opt_MCTS_tasks.resize(PF.numtasks);
2105
        for (int i = 1; i < PF.numtasks; i++) {
2106
                pair<vector<WORD>, vector<tree_node *> > &p = PF_opt_MCTS_tasks[i];
2107
                p.first.clear();
2108
                p.second.clear();
2109
        }
2110

2111
}
2112

2113
// Wait for an idle slave and return the process number.
2114
int PF_find_Horner_MCTS_expand_tree_master_next () {
2115

2116
        // Find an idle slave.
2117
        int next;
2118
        PF_Receive(PF_ANY_SOURCE, PF_OPT_MCTS_MSGTAG, &next, NULL);
2119

2120
        // Check if the slave had a task.
2121
        pair<vector<WORD>, vector<tree_node *> > &p = PF_opt_MCTS_tasks[next];
2122
        if (!p.first.empty()) {
2123
                // If so, update the result.
2124
                int num_oper;
2125
                PF_Unpack(&num_oper, 1, PF_INT);
2126
                update_MCTS_scheme(num_oper, p.first, &p.second);
2127

2128
                // Clear the task.
2129
                p.first.clear();
2130
                p.second.clear();
2131
        }
2132

2133
        return next;
2134

2135
}
2136

2137
// The main function on the master.
2138
void PF_find_Horner_MCTS_expand_tree_master () {
2139

2140
#ifdef DEBUG
2141
        MesPrint ("*** [%s, w=%w] CALL: PF_find_Horner_MCTS_expand_tree_master", thetime_str().c_str());
2142
#endif
2143

2144
        vector<WORD> order;
2145
        vector<WORD> scheme;
2146
        vector<tree_node *> path;
2147

2148
        next_MCTS_scheme(BHEAD &order, &scheme, &path);
2149

2150
        // Find an idle slave.
2151
        int next = PF_find_Horner_MCTS_expand_tree_master_next();
2152

2153
        // Send a new task to the slave.
2154
        PF_PrepareLongSinglePack();
2155
        int len = scheme.size();
2156
        PF_LongSinglePack(&len, 1, PF_INT);
2157
        PF_LongSinglePack(&scheme[0], len, PF_WORD);
2158
        PF_LongSingleSend(next, PF_OPT_MCTS_MSGTAG);
2159

2160
        // Remember the task.
2161
        pair<vector<WORD>, vector<tree_node *> > &p = PF_opt_MCTS_tasks[next];
2162
        p.first = scheme;
2163
        p.second = path;
2164

2165
#ifdef DEBUG
2166
        MesPrint ("*** [%s, w=%w] DONE: PF_find_Horner_MCTS_expand_tree_master", thetime_str().c_str());
2167
#endif
2168

2169
}
2170

2171
// Wait for all the slaves to finish their tasks.
2172
void PF_find_Horner_MCTS_expand_tree_master_wait () {
2173

2174
#ifdef DEBUG
2175
        MesPrint ("*** [%s, w=%w] CALL: PF_find_Horner_MCTS_expand_tree_master_wait", thetime_str().c_str());
2176
#endif
2177

2178
        // Wait for all the slaves.
2179
        for (int i = 1; i < PF.numtasks; i++) {
2180
                int next = PF_find_Horner_MCTS_expand_tree_master_next();
2181
                // Send a null task.
2182
                PF_PrepareLongSinglePack();
2183
                int len = 0;
2184
                PF_LongSinglePack(&len, 1, PF_INT);
2185
                PF_LongSingleSend(next, PF_OPT_MCTS_MSGTAG);
2186
        }
2187

2188
#ifdef DEBUG
2189
        MesPrint ("*** [%s, w=%w] DONE: PF_find_Horner_MCTS_expand_tree_master_wait", thetime_str().c_str());
2190
#endif
2191

2192
}
2193

2194
// The main function on the slaves.
2195
void PF_find_Horner_MCTS_expand_tree_slave () {
2196

2197
#ifdef DEBUG
2198
        MesPrint ("*** [%s, w=%w] CALL: PF_find_Horner_MCTS_expand_tree_slave", thetime_str().c_str());
2199
#endif
2200

2201
        vector<WORD> scheme;
2202

2203
        {
2204
                // Send the first message to the master, which indicates I am idle.
2205
                PF_PreparePack();
2206
                int dummy = 0;
2207
                PF_Pack(&dummy, 1, PF_INT);
2208
                PF_Send(MASTER, PF_OPT_MCTS_MSGTAG);
2209
        }
2210

2211
        for (;;) {
2212
                // Get a task from the master.
2213
                PF_LongSingleReceive(MASTER, PF_OPT_MCTS_MSGTAG, NULL, NULL);
2214

2215
                // Length of the task.
2216
                int len;
2217
                PF_LongSingleUnpack(&len, 1, PF_INT);
2218

2219
                // No task remains.
2220
                if (len == 0) break;
2221

2222
                // Perform the given task.
2223
                scheme.resize(len);
2224
                PF_LongSingleUnpack(&scheme[0], len, PF_WORD);
2225
                int num_oper;
2226
                try_MCTS_scheme(scheme, &num_oper);
2227

2228
                // Send the result to the master.
2229
                PF_PreparePack();
2230
                PF_Pack(&num_oper, 1, PF_INT);
2231
                PF_Send(MASTER, PF_OPT_MCTS_MSGTAG);
2232
        }
2233

2234
#ifdef DEBUG
2235
        MesPrint ("*** [%s, w=%w] DONE: PF_find_Horner_MCTS_expand_tree_slave", thetime_str().c_str());
2236
#endif
2237

2238
}
2239

2240
#endif
2241
/*
2242
          #] PF_find_Horner_MCTS_expand_tree : 
2243
          #[ find_Horner_MCTS :
2244
*/
2245

2246
/**  Find best Horner schemes using MCTS
2247
 *
2248
 *         Description
2249
 *         ===========
2250
 *         The method governs the MCTS for the best Horner schemes. It does
2251
 *         some pre-processing, calls "find_Horner_MCTS_expand_tree" a
2252
 *         number of times and does some post-processing.
2253
 */
2254
//vector<vector<WORD> > find_Horner_MCTS () {
2255
void find_Horner_MCTS () {
12✔
2256

2257
#ifdef WITHMPI
2258
        if (PF.me != MASTER) {
2259
                if (PF.numtasks <= 1) return;
2260
                PF_find_Horner_MCTS_expand_tree_slave();
2261
                return;
2262
        }
2263
#endif
2264

2265
#ifdef DEBUG
2266
        MesPrint ("*** [%s, w=%w] CALL: find_Horner_MCTS", thetime_str().c_str());
2267
#endif
2268

2269
        GETIDENTITY;
12✔
2270

2271
        LONG start_time = TimeWallClock(1);
12✔
2272

2273
        // initialize the used global variables
2274
        mcts_expr_score = count_operators(optimize_expr);
12✔
2275
        mcts_root = tree_node();
12✔
2276

2277
        // extract all symbols from the expression
2278
        set<WORD> var_set;
12✔
2279
        for (WORD *t=optimize_expr; *t!=0; t+=*t)
36✔
2280
                if (t[1] == SYMBOL)
24✔
2281
                        for (int i=3; i<t[2]; i+=2)
54✔
2282
                                var_set.insert(t[i]);
30✔
2283

2284
        // check for factorized/separated expression and make sure that
2285
        // FACTORSYMBOL/SEPARATESYMBOL isn't included in the MCTS
2286
        mcts_factorized = var_set.count(FACTORSYMBOL);
12✔
2287
        if (mcts_factorized) var_set.erase(FACTORSYMBOL);
12✔
2288
        mcts_separated = var_set.count(SEPARATESYMBOL);
12✔
2289
        if (mcts_separated) var_set.erase(SEPARATESYMBOL);
12✔
2290

2291
        mcts_vars = vector<WORD>(var_set.begin(), var_set.end());
24✔
2292
        optimize_num_vars = (int)mcts_vars.size();
12✔
2293
        // initialize MCTS tree root
2294
        for (int i=0; i<(int)mcts_vars.size(); i++) {
21✔
2295
                if (AO.Optimize.hornerdirection != O_BACKWARD)
9✔
2296
                        mcts_root.childs.push_back(tree_node(+(mcts_vars[i]+1)));
9✔
2297
                if (AO.Optimize.hornerdirection != O_FORWARD)
9✔
2298
                        mcts_root.childs.push_back(tree_node(-(mcts_vars[i]+1)));
9✔
2299
        }
2300
        my_random_shuffle(BHEAD mcts_root.childs.begin(), mcts_root.childs.end());
12✔
2301

2302
#if defined(WITHMPI)
2303
        PF_find_Horner_MCTS_expand_tree_master_init();
2304
#endif
2305

2306
        // initialize a potential variable mctsconstant scheme.
2307
        AT.optimtimes = 0;
12✔
2308

2309
        // call expand_tree until it is called "mctsnumexpand" times, the
2310
        // time limit is reached or the tree is fully finished
2311
        for (int times=0; times<AO.Optimize.mctsnumexpand && !mcts_root.finished &&
91✔
2312
                                 (AO.Optimize.mctstimelimit==0 ||        (TimeWallClock(1)-start_time)/100 < AO.Optimize.mctstimelimit);
79✔
2313
                         times++) {
2314
                AT.optimtimes = times;
79✔
2315
        // call expand_tree routine depending on threading mode
2316
#if defined(WITHPTHREADS)
2317
                if (AM.totalnumberofthreads > 1)
61✔
2318
                        find_Horner_MCTS_expand_tree_threaded();
61✔
2319
                else
2320
#elif defined(WITHMPI)
2321
                if (PF.numtasks > 1)
2322
                        PF_find_Horner_MCTS_expand_tree_master();
2323
                else
2324
#endif
2325
                        find_Horner_MCTS_expand_tree();
18✔
2326
        }
2327

2328
        // if TForm or ParForm, wait for everyone to finish
2329
#ifdef WITHPTHREADS
2330
        MasterWaitAll();
8✔
2331
#endif
2332
#ifdef WITHMPI
2333
        PF_find_Horner_MCTS_expand_tree_master_wait();
2334
#endif
2335
#ifdef DEBUG
2336
        MesPrint ("*** [%s, w=%w] DONE: find_Horner_MCTS", thetime_str().c_str());
2337
#endif
2338

2339
}
12✔
2340

2341
/*
2342
          #] find_Horner_MCTS : 
2343
          #[ merge_operators :
2344
*/
2345

2346
/**  Merge operators
2347
 *
2348
 *         Description
2349
 *         ===========
2350
 *         The input instructions form a binary DAG. This method merges
2351
 *         expressions like
2352
 *
2353
 *                Z1 = a+b;
2354
 *                Z2 = Z1+c;
2355
 *
2356
 *         into
2357
 *
2358
 *                Z2 = a+b+c;
2359
 *
2360
 *         An instruction is merged iff it only has one parent and the
2361
 *         operator equals its parent's operator.
2362
 *
2363
 *         This still leaves some freedom: where should the coefficients end
2364
 *         up in cases as:
2365
 *
2366
 *                Z1 = Z2 + x    <=>         Z1 = 2*Z2 + x
2367
 *                Z2 = 2*x*y                         Z2 = x*y
2368
 *
2369
 *         Both are relevant, e.g. for CSE of the form "2*x" and "2*Z2". The
2370
 *         flag "move_coeff" moves coefficients from LHS-like expressions to
2371
 *         RHS-like expressions.
2372
 *
2373
 *         Furthermore, this method removes empty equation (Z1=0), that are
2374
 *         introduced by some "optimize_greedy" substitutions.
2375
 *
2376
 *         Implementation details
2377
 *         ======================
2378
 *         Expressions are mostly traversed via a stack, so that parents are
2379
 *         evaluated before their children.
2380
 *
2381
 *         With "move_coeff" set coefficients are moved, but this leads to
2382
 *         some tricky cases, e.g.
2383
 *
2384
 *           Z1 = Z2 + x
2385
 *           Z2 = 2*y
2386
 *
2387
 *         Here Z2 reduces to the trivial equation Z2=y, which should be
2388
 *         eliminated. Here the array skip[i] comes in.
2389
 *
2390
 *         Furthermore in the case
2391
 *
2392
 *           Z1 = Z2 + x
2393
 *           Z2 = 2*Z3
2394
 *           Z3 = x*Z4
2395
 *           Z4 = y*z
2396
 *
2397
 *         after substituting Z1 = 2*Z3 + x, the parent expression for Z4
2398
 *         becomes Z3 instead of Z2. This is where renum_par[i] comes in.
2399
 *
2400
 *         Finally, once a coefficient has been moved, skip_coeff[i] is set
2401
 *         and this coefficient is copied into the new expression anymore.
2402
 */
2403
vector<WORD> merge_operators (const vector<WORD> &all_instr, bool move_coeff) {
123✔
2404

2405
#ifdef DEBUG_MORE
2406
        MesPrint ("*** [%s, w=%w] CALL: merge_operators", thetime_str().c_str());
2407
#endif
2408

2409
        // get starting positions of instructions
2410
        vector<const WORD *> instr;
123✔
2411
        const WORD *tbegin = &*all_instr.begin();
123✔
2412
        const WORD *tend = tbegin+all_instr.size();
123✔
2413
        // copy all instructions to temp space. There will be n of them in instr.
2414
        for (const WORD *t=tbegin; t!=tend; t+=*(t+2)) {
3,621✔
2415
                instr.push_back(t);
3,498✔
2416
        }
2417
        int n = instr.size();
123✔
2418

2419
        // find parents and number of parents of instructions
2420
        vector<int> par(n), numpar(n,0);
123✔
2421
        for (int i=0; i<n; i++) par[i]=i;
3,621✔
2422

2423
        for (int i=0; i<n; i++) {
3,621✔
2424
                for (const WORD *t=instr[i]+OPTHEAD; *t!=0; t+=*t) {
10,296✔
2425
                        if ( *(t+1)==EXTRASYMBOL && *t!=1+ABS(*(t+*t-1)) ) {
6,798✔
2426
                                // extra symbol t[3] is referred to in instr i.
2427
                                par[*(t+3)]=i;
3,903✔
2428
                                numpar[*(t+3)]++;
3,903✔
2429

2430
                                // if coefficient isn't +/-1 or power > 1, increase numpar,
2431
                                // so that this is not merged
2432
                                if (*(t+*t-3)!=1 || *(t+*t-2)!=1 || ABS(*(t+*t-1))!=3) numpar[*(t+3)]++;
3,903✔
2433
                                if (*(t+4)>1) numpar[*(t+3)]++;
3,903✔
2434
                        }
2435
                }
2436
        }
2437
        // determine which instructions to merge
2438
        stack<int> s;
123✔
2439
        s.push(n-1);
123✔
2440
        vector<bool> vis(n,false);
123✔
2441

2442
        while (!s.empty()) {
4,149✔
2443

2444
                int i=s.top(); s.pop();
4,026✔
2445
                if (vis[i]) continue;
4,026✔
2446
                vis[i]=true;
3,408✔
2447

2448
                for (const WORD *t=instr[i]+OPTHEAD; *t!=0; t+=*t)
10,206✔
2449
                        if ( *(t+1)==EXTRASYMBOL && *t!=1+ABS(*(t+*t-1)) )
6,798✔
2450
                                s.push(*(t+3));
6,798✔
2451

2452
                // condition: one parent and equal operator as parent
2453
                if (numpar[i]==1 && *(instr[i]+1)==*(instr[par[i]]+1))
3,408✔
2454
                        par[i] = par[par[i]]; // The expr into which we subst par[i] to get i
174✔
2455
                else
2456
                        par[i] = i;
3,234✔
2457
        }
2458

2459
        // merge instructions into new instructions
2460
        vector<WORD> newinstr;
123✔
2461

2462
        // stack of new expressions, all 0-indexed
2463
        stack<WORD> new_expr;
123✔
2464
        new_expr.push(n-1);
123✔
2465
        vis = vector<bool>(n,false);
246✔
2466

2467
        // skip empty equations (might be introduced by greedy optimizations)
2468
        vector<bool> skip(n,false), skipcoeff(n,false);
123✔
2469
        for (int i=0; i<n; i++)
3,621✔
2470
                if (*(instr[i]+OPTHEAD) == 0) skip[i]=skipcoeff[i]=true;
3,498✔
2471

2472
        // for renumbering merged parents
2473
        vector<int> renum_par(n);
123✔
2474
        for (int i=0; i<n; i++)
3,621✔
2475
                renum_par[i]=i;
3,498✔
2476

2477
        while (!new_expr.empty()) {
3,975✔
2478
                int x = new_expr.top(); new_expr.pop();
3,852✔
2479
                if (vis[x]) continue;
3,852✔
2480
                vis[x] = true;
3,234✔
2481

2482
                // find all instructions with parent=x and copy their arguments
2483
                // into a new expression
2484
                bool first_copy=true;
3,234✔
2485
                int lenidx = newinstr.size()+2;
3,234✔
2486

2487
                // 1-indexed, since signs may occur
2488
                stack<WORD> this_expr;
3,234✔
2489
                this_expr.push(x+1);
3,234✔
2490

2491
                while (!this_expr.empty()) {
6,642✔
2492
                        // pop from stack, determine expr.nr and sign
2493
                        int i = this_expr.top(); this_expr.pop();
3,408✔
2494
                        int sign = SGN(i);
3,408✔
2495
                        i = ABS(i)-1;
3,408✔
2496
                        for (const WORD *t=instr[i]+OPTHEAD; *t!=0; t+=*t) { // terms in i
10,206✔
2497
                                // don't copy a term if:
2498
                                // (1) skip=true, since then it's merged into the parent
2499
                                // (2) extrasymbol with parent=x, because its children should be copied
2500
                                // (3) coefficient with skipcoeff=true, since it's already copied
2501
                                bool copy = !skip[i];
6,798✔
2502
                                if (*t!=1+ABS(*(t+*t-1)) && *(t+1)==EXTRASYMBOL) {
6,798✔
2503
                                        if (par[*(t+3)] == x) {
3,903✔
2504
                                                // parent of term refers to x. we push it with its sign if no skip is true
2505
                                                // and the sign of the expr.
2506
                                                this_expr.push(sign * (skip[i]||skipcoeff[i] ? 1 : SGN(*(t+*t-1))) * (1+*(t+3)));
174✔
2507
                                                if (*(instr[i]+1) == OPER_MUL) sign=1;
174✔
2508
                                                copy=false;
2509
                                        }
2510
                                        else {
2511
                                                new_expr.push(*(t+3));
3,729✔
2512
                                        }
2513
                                }
2514

2515
                                if (*t == 1+ABS(*(t+*t-1)) && skipcoeff[i]) {
6,798✔
2516
                                        copy=false;
×
2517
                                }
2518

2519
                                if (copy) {
6,798✔
2520
                                        // first term, so add header
2521
                                        if (first_copy) {
5,196✔
2522
                                                newinstr.push_back(renum_par[x]);  // expr.nr.
2,520✔
2523
                                                newinstr.push_back(*(instr[x]+1)); // operator
2,520✔
2524
                                                newinstr.push_back(3);                           // length   OPTHEAD?
2,520✔
2525
                                                first_copy=false;
2,520✔
2526
                                        }
2527

2528
                                        // copy term and adjust sign
2529
                                        int thislenidx = newinstr.size();
5,196✔
2530
                                        newinstr.insert(newinstr.end(), t, t+*t); // Put the whole term in newinstr
5,196✔
2531
                                        newinstr.back() *= sign;
5,196✔
2532
                                        if (*(instr[i]+1) == OPER_MUL) sign=1;
5,196✔
2533
                                        newinstr[lenidx] += *t;
5,196✔
2534

2535
                                        // check for moving coefficients up
2536
                                        // necessary condition: MUL-expression with 1 parent
2537
                                        if (move_coeff && *t!=1+ABS(*(t+*t-1)) && *(instr[i]+1)!=OPER_COMMA &&
4,440✔
2538
                                                        *(t+1)==EXTRASYMBOL && numpar[*(t+3)]==1 && *(instr[*(t+3)]+1)==OPER_MUL) {
9,594✔
2539

2540
                                                // coefficient is always the first term (that's how Horner+generate works)
2541
                                                const WORD *t1 = instr[*(t+3)]+OPTHEAD;
1,566✔
2542
                                                const WORD *t2 = t1+*t1;
1,566✔
2543

2544
                                                if (*t1 == 1+ABS(*(t1+*t1-1))) {
1,566✔
2545
                                                        // t1 pointer to a coefficient, so move it
2546

2547
                                                        // remove old coefficient of 1
2548
                                                        WORD *t3 = &*newinstr.end();                                         //
714✔
2549
                                                        int sign2 = SGN(t3[-1]);                                                  //
714✔
2550
                                                        newinstr.erase(newinstr.end()-3, newinstr.end());
714✔
2551
                                                        // count number of arguments; iff it is 2 move the (extra)symbol too
2552
                                                        int numargs=0;
714✔
2553
                                                        for (const WORD *tt=t1; *tt!=0; tt+=*tt) {
2,142✔
2554
                                                                numargs++;
1,428✔
2555
                                                        }
2556
                                                        if (numargs==2 && *(t2+4)==1) {
714✔
2557
                                                                // replace (extra)symbol
2558
                                                                newinstr[newinstr.size()-4] = *(t2+1);
714✔
2559
                                                                newinstr[newinstr.size()-3] = *(t2+2);
714✔
2560
                                                                newinstr[newinstr.size()-2] = *(t2+3);
714✔
2561
                                                                newinstr[newinstr.size()-1] = *(t2+4);
714✔
2562
                                                                sign2 *= SGN(*(t2+*t2-1));                        // was t2[7]
714✔
2563

2564
                                                                // ignore this expression from now on
2565
                                                                skip[*(t+3)]=true;
714✔
2566
                                                                if (*(t2+1)==EXTRASYMBOL)
714✔
2567
                                                                        renum_par[*(t+3)] = *(t2+3);
660✔
2568
                                                        }
2569
                                                        else {
2570
                                                                // otherwise, ignore coefficient from now on
2571
                                                                // we need to collect the signs of the terms
2572
                                                                // first and set them to one. This was forgotten
2573
                                                                // before. Gave occasional errors.
2574
                                                                if ( numargs > 2 || ( numargs == 2 && t2[4] > 1 ) ) {
×
2575
                                                                        for (WORD *tt=(WORD *)t2; *tt!=0; tt+=*tt) {
×
2576
                                                                                if ( tt[*tt-1] < 0 ) {
×
2577
                                                                                        tt[*tt-1] = -tt[*tt-1];
×
2578
                                                                                        sign2 = -sign2;
×
2579
                                                                                }
2580
                                                                        }
2581
                                                                }
2582
                                                                skipcoeff[*(t+3)]=true;
×
2583
                                                        }
2584

2585
                                                        // add new coefficient
2586
                                                        newinstr.insert(newinstr.end(), t1+1, t1+*t1);
714✔
2587
                                                        newinstr.back() *= sign2;
714✔
2588
                                                        newinstr[thislenidx] += ABS(newinstr.back()) - 3;
714✔
2589
                                                        newinstr[lenidx] += ABS(newinstr.back()) - 3;
714✔
2590
                                                }
2591
                                        }
2592
                                }
2593
                        }
2594
                }
2595

2596
                // if something has been copied, add trailing zero
2597
                if (!first_copy) {
3,234✔
2598
                        newinstr.push_back(0);
2,520✔
2599
                        newinstr[lenidx]++;
2,520✔
2600
                }
2601
        }
3,234✔
2602

2603
        // renumber the expressions to 0,1,2,..,; only keep expressions with
2604
        // skip=false which are their own parent after a renumbering in case
2605
        // of moved coefficients
2606

2607
        // find renumber scheme
2608
        vector<int> renum(n,-1);
123✔
2609
        int next=0;
123✔
2610
        for (int i=0; i<n; i++)
3,621✔
2611
                if (!skip[i] && renum_par[par[i]]==i) renum[renum_par[i]]=next++;
3,498✔
2612

2613
        // find new instruction index
2614
        tbegin = &*newinstr.begin();
123✔
2615
        tend = tbegin+newinstr.size();
123✔
2616
        for (const WORD *t=tbegin; t!=tend; t+=*(t+2))
2,643✔
2617
                instr[renum[*t]] = t;
2,520✔
2618

2619
        // renumbering expressions and sort them lexicographically (in
2620
        // new_instr they are in the preorder of the expression tree)
2621
        vector<WORD> sortinstr;
123✔
2622
        for (int i=0; i<next; i++) {
2,643✔
2623
                int idx = sortinstr.size();
2,520✔
2624
                sortinstr.insert(sortinstr.end(), instr[i], instr[i]+*(instr[i]+2));
2,520✔
2625

2626
                sortinstr[idx] = renum[sortinstr[idx]];
2,520✔
2627

2628
                // renumber content of an expression
2629
                for (WORD *t2=&sortinstr[idx]+3; *t2!=0; t2+=*t2)
7,716✔
2630
                        if (*t2!=1+ABS(*(t2+*t2-1)) && *(t2+1)==EXTRASYMBOL)
5,196✔
2631
                                *(t2+3) = renum[*(t2+3)];
3,015✔
2632
        }
2633

2634
#ifdef DEBUG_MORE
2635
        MesPrint ("*** [%s, w=%w] DONE: merge_operators", thetime_str().c_str());
2636
#endif
2637

2638
        return sortinstr;
123✔
2639
}
492✔
2640

2641
/*
2642
          #] merge_operators : 
2643
          #[ class Optimization :
2644
*/
2645

2646
/**  class Optimization
2647
 *
2648
 *         Description
2649
 *         ===========
2650
 *         This object represents an optimization. Its type is a number in
2651
 *         the range 0 to 5. Depending on this type, the variables arg1, arg2
2652
 *         and coeff indicate:
2653
 *
2654
 *         type==0 : optimization of the form x[arg1] ^ arg2 (coeff=empty)
2655
 *         type==1 : optimization of the form x[arg1] * x[arg2] (coeff=empty)
2656
 *         type==2 : optimization of the form x[arg1] * coeff (arg2=0)
2657
 *         type==3 : optimization of the form x[arg1] + coeff (arg2=0)
2658
 *         type==4 : optimization of the form x[arg1] + x[arg2] (coeff=empty)
2659
 *         type==5 : optimization of the form x[arg1] - x[arg2] (coeff=empty)
2660
 *
2661
 *         Here, "x[arg]" represents a symbol (if positive) or an
2662
 *         extrasymbol (if negative). The represented symbol's id is
2663
 *         ABS(x[arg])-1.
2664
 *
2665
 *         "eqns" is a list of equation, where this optimization can be
2666
 *         performed.
2667
 *
2668
 *         "improve" is the total improvement of this optimization.
2669
 */
2670
class optimization {
474✔
2671
public:
2672
        int type, arg1, arg2, improve;
2673
        vector<WORD> coeff;
2674
        vector<int> eqnidxs;
2675

2676
        bool operator< (const optimization &a) const {
4,992✔
2677
                if (arg1 != a.arg1) return arg1 < a.arg1;
4,992✔
2678
                if (arg2 != a.arg2) return arg2 < a.arg2;
1,050✔
2679
                if (type != a.type) return type < a.type;
132✔
2680
                return coeff < a.coeff;
132✔
2681
        }
2682
};
2683

2684
/*
2685
          #] class Optimization : 
2686
          #[ find_optimizations :
2687
*/
2688

2689
/**  Find optimizations
2690
 *
2691
 *         Description
2692
 *         ===========
2693
 *         This method find all optimization of the form described in "class
2694
 *         Optimization". It process every equation, looking for possible
2695
 *         optimizations and stores them in a fast-access data structure to
2696
 *         count the total improvement of an optimization.
2697
 */
2698
vector<optimization> find_optimizations (const vector<WORD> &instr) {
90✔
2699

2700
#ifdef DEBUG_GREEDY
2701
        MesPrint ("*** [%s, w=%w] CALL: find_optimizations", thetime_str().c_str());
2702
#endif
2703

2704
//        #[ Startup :
2705
        // the resulting vector of optimizations
2706
        vector<optimization> res;
90✔
2707

2708
        // a map to count the improvement of an optimization; the
2709
        // improvement is stored as a vector<int> with equation numbers
2710
        map<optimization, vector<int> > cnt;
90✔
2711

2712
        // a map to identify coefficients
2713
        map<vector<int>,int> idx_coeff;
90✔
2714

2715
        const WORD *ebegin = &*instr.begin();
90✔
2716
        const WORD *eend = ebegin+instr.size();
90✔
2717

2718
        for (int optim_type=0; optim_type<=4; optim_type++) {
540✔
2719

2720
                cnt.clear();
450✔
2721
                idx_coeff.clear();
450✔
2722

2723
          optimization optim;
450✔
2724
                optim.type = optim_type;
450✔
2725
//        #] Startup : 
2726

2727
//        #[ type 0 :          find optimizations of the form z=x^n (optim.type==0)
2728
                if (optim_type == 0) {
450✔
2729
                        for (const WORD *e=ebegin; e!=eend; e+=*(e+2)) {
990✔
2730
                                if (*(e+1) != OPER_MUL) continue;
900✔
2731
                                for (const WORD *t=e+3; *t!=0; t+=*t) {
1,356✔
2732
                                        if (*t == ABS(*(t+*t-1))+1) continue;
864✔
2733
                                        if (*(t+4) > 1) {
720✔
2734
                                                optim.arg1 = (*(t+1)==SYMBOL ? 1 : -1) * (*(t+3) + 1);
108✔
2735
                                                optim.arg2 = *(t+4);
108✔
2736
                                                cnt[optim].push_back(e-ebegin);
108✔
2737
                                        }
2738
                                }
2739
                        }
2740
                }
2741
//        #] type 0 : 
2742
//        #[ type 1 :         find optimizations of the form z=x*y (optim.type==1)
2743
                if (optim_type == 1) {
450✔
2744
                        for (const WORD *e=ebegin; e!=eend; e+=*(e+2)) {
990✔
2745
                                if (*(e+1) != OPER_MUL) continue;
900✔
2746

2747
                                for (const WORD *t1=e+3; *t1!=0; t1+=*t1) {
1,356✔
2748
                                        if (*t1 == ABS(*(t1+*t1-1))+1) continue;
864✔
2749
                                        int x1 = (*(t1+1)==SYMBOL ? 1 : -1) * (*(t1+3) + 1);
720✔
2750

2751
                                        for (const WORD *t2=t1+*t1; *t2!=0; t2+=*t2) {
960✔
2752
                                                if (*t2 == ABS(*(t2+*t2-1))+1) continue;
240✔
2753
                                                int x2 = (*(t2+1)==SYMBOL ? 1 : -1) * (*(t2+3) + 1);
240✔
2754

2755
                                                if (*(t1+4) == *(t2+4)) {
240✔
2756
                                                        optim.arg1 = x1;
240✔
2757
                                                        optim.arg2 = x2;
240✔
2758
                                                        if (optim.arg1 > optim.arg2)
240✔
2759
                                                                swap(optim.arg1, optim.arg2);
228✔
2760

2761
                                                        if (*(t1+4) == 1)
240✔
2762
                                                                cnt[optim].push_back(e-ebegin);
240✔
2763
                                                        else {
2764
                                                                // E=x^n*y^n -> z=x*y; E=z^n is double improvement
2765
                                                                cnt[optim].push_back(e-ebegin);
×
2766
                                                                cnt[optim].push_back(e-ebegin);
×
2767
                                                        }
2768
                                                }
2769
                                        }
2770
                                }
2771
                        }
2772
                }
2773
//        #] type 1 : 
2774
//        #[ type 2 :         find optimizations of the form z=c*x (optim.type==2)
2775
                if (optim_type == 2) {
450✔
2776
                        for (const WORD *e=ebegin; e!=eend; e+=*(e+2)) {
990✔
2777

2778
                                if (*(e+1) == OPER_ADD) {
900✔
2779
                                        // in ADD-equation
2780

2781
                                        for (const WORD *t=e+3; *t!=0; t+=*t) {
1,314✔
2782
                                                if (*t == ABS(*(t+*t-1))+1) continue;
924✔
2783

2784
                                                if (*(t+4)==1) {
918✔
2785
                                                        if (ABS(*(t+*t-1))==3 && *(t+*t-2)==1 && *(t+*t-3)==1) continue;
918✔
2786

2787
                                                        optim.coeff = vector<WORD>(t+*t-ABS(*(t+*t-1)), t+*t);
168✔
2788
                                                        optim.coeff.back() = ABS(optim.coeff.back());
84✔
2789

2790
                                                        optim.arg1 = (*(t+1)==SYMBOL ? 1 : -1) * (*(t+3) + 1);
84✔
2791
                                                        optim.arg2 = 0;
84✔
2792

2793
                                                        cnt[optim].push_back(e-ebegin);
84✔
2794
                                                }
2795
                                        }
2796
                                }
2797
                                else if (*(e+1) == OPER_MUL) {
510✔
2798
                                        // in MUL-equation
2799
                                        optim.coeff.clear();
492✔
2800

2801
                                        for (const WORD *t=e+3; *t!=0; t+=*t)
1,356✔
2802
                                                if (*t == ABS(*(t+*t-1))+1) {
864✔
2803
                                                        if (ABS(*(t+*t-1))==3 && *(t+*t-2)==1 && *(t+*t-3)==1) continue;
144✔
2804
                                                        optim.coeff = vector<WORD>(t+*t-ABS(*(t+*t-1)), t+*t);
288✔
2805
                                                        optim.coeff.back() = ABS(optim.coeff.back());
144✔
2806
                                                }
2807

2808
                                        if (!optim.coeff.empty())
492✔
2809
                                                for (const WORD *t=e+3; *t!=0; t+=*t) {
432✔
2810
                                                        if (*t == ABS(*(t+*t-1))+1) continue;
288✔
2811
                                                        if (*(t+4) != 1) continue;
144✔
2812
                                                        optim.arg1 = (*(t+1)==SYMBOL ? 1 : -1) * (*(t+3) + 1);
132✔
2813
                                                        optim.arg2 = 0;
132✔
2814
                                                        cnt[optim].push_back(e-ebegin);
132✔
2815
                                                }
2816
                                }
2817
                        }
2818
                }
2819
//        #] type 2 : 
2820
//        #[ type 3 :         find optimizations of the form z=x+c (optim.type==3)
2821
                if (optim_type == 3) {
450✔
2822
                        for (const WORD *e=ebegin; e!=eend; e+=*(e+2)) {
990✔
2823

2824
                                if (*(e+1) != OPER_ADD) continue;
900✔
2825

2826
                                optim.coeff.clear();
390✔
2827

2828
                                for (const WORD *t=e+3; *t!=0; t+=*t)
1,314✔
2829
                                        if (*t == ABS(*(t+*t-1))+1) {
924✔
2830
                                                optim.coeff = vector<WORD>(t+*t-ABS(*(t+*t-1)), t+*t);
12✔
2831
                                        }
2832

2833
                                if (!optim.coeff.empty()) {
390✔
2834
                                        for (const WORD *t=e+3; *t!=0; t+=*t) {
18✔
2835
                                                if (*t == ABS(*(t+*t-1))+1) continue;
12✔
2836
                                                if (ABS(*(t+*t-1))!=3 || *(t+*t-2)!=1 || *(t+*t-3)!=1) continue;
6✔
2837
                                                if (*(t+*t-1)==-3) optim.coeff.back() *= -1;
6✔
2838

2839
                                                optim.arg1 = (*(t+1)==SYMBOL ? 1 : -1) * (*(t+3) + 1);
6✔
2840
                                                optim.arg2 = 0;
6✔
2841
                                                cnt[optim].push_back(e-ebegin);
6✔
2842
                                                if (*(t+*t-1)==-3) optim.coeff.back() *= -1;
6✔
2843
                                        }
2844
                                }
2845
                        }
2846
                }
2847
//        #] type 3 : 
2848
//        #[ type 4,5 : find optimizations of the form z=x+y or z=x-y (optim.type==4 or 5)
2849
                if (optim_type == 4) {
450✔
2850
                        for (const WORD *e=ebegin; e!=eend; e+=*(e+2)) {
990✔
2851
                                if (*(e+1) != OPER_ADD) continue;
900✔
2852

2853
                                for (const WORD *t1=e+3; *t1!=0; t1+=*t1) {
1,314✔
2854
                                        if (*t1 == ABS(*(t1+*t1-1))+1) continue;
924✔
2855
                                        int x1 = (*(t1+1)==SYMBOL ? 1 : -1) * (*(t1+3) + 1);
918✔
2856

2857
                                        for (const WORD *t2=t1+*t1; *t2!=0; t2+=*t2) {
1,698✔
2858
                                                if (*t2 == ABS(*(t2+*t2-1))+1) continue;
780✔
2859
                                                int x2 = (*(t2+1)==SYMBOL ? 1 : -1) * (*(t2+3) + 1);
780✔
2860

2861
                                                int sign1 = SGN(*(t1+*t1-1));
780✔
2862
                                                int sign2 = SGN(*(t2+*t2-1));
780✔
2863

2864
                                                if (BigLong((UWORD *)t1+5, ABS(*(t1+*t1-1))-1, (UWORD *)t2+5, ABS(*(t2+*t2-1))-1) == 0) {
780✔
2865

2866
                                                        optim.type = (sign1 * sign2 == 1 ? 4 : 5); // optimization type
696✔
2867
                                                        optim.arg1 = x1;
696✔
2868
                                                        optim.arg2 = x2;
696✔
2869
                                                        if (optim.arg1 > optim.arg2) {
696✔
2870
                                                                swap(optim.arg1, optim.arg2);
366✔
2871
                                                        }
2872

2873
                                                        if (ABS(*(t1+*t1-1))==3 && *(t1+*t1-2)==1 && *(t1+*t1-3)==1)
696✔
2874
                                                                cnt[optim].push_back(e-ebegin);
696✔
2875
                                                        else {
2876
                                                                // E=2x+2y -> z=x+y; E=2z is improvement bby itself
2877
                                                                cnt[optim].push_back(e-ebegin);
×
2878
                                                                cnt[optim].push_back(e-ebegin);
×
2879
                                                        }
2880
                                                }
2881
                                        }
2882
                                }
2883
                        }
2884
                }
2885
//        #] type 4,5 : 
2886
//        #[ add :
2887

2888
                // add optimizations with positive improvement to the result
2889
                for (map<optimization, vector<int> >::iterator i=cnt.begin(); i!=cnt.end(); i++) {
1,704✔
2890
                        int improve = i->second.size() - 1;
1,254✔
2891
                        if (improve > 0) {
1,254✔
2892
                                res.push_back(i->first);
12✔
2893
                                res.back().improve = improve;
12✔
2894
                                res.back().eqnidxs = i->second;
12✔
2895

2896
                                // remove duplicates, that were add to get the correct improvement
2897
                                res.back().eqnidxs.erase(unique(res.back().eqnidxs.begin(), res.back().eqnidxs.end()), res.back().eqnidxs.end());
12✔
2898
                        }
2899
                }
2900
        }
450✔
2901
//        #] add : 
2902

2903
#ifdef DEBUG_GREEDY
2904
        MesPrint ("*** [%s, w=%w] DONE: find_optimizations",thetime_str().c_str());
2905
#endif
2906

2907
        return res;
90✔
2908
}
90✔
2909

2910
/*
2911
          #] find_optimizations : 
2912
          #[ do_optimization :
2913
*/
2914

2915
/**  Do optimization
2916
 *
2917
 *         Description
2918
 *         ===========
2919
 *         This method performs an optimization. It scans through the
2920
 *         equations of "optim.eqnidxs" and looks in which this optimization
2921
 *         can still be performed (due to other performed optimizations this
2922
 *         isn't always the case). If possible, it substitutes the common
2923
 *         subexpression by a new extra symbol numbered "newid". Finally,
2924
 *         the new extrasymbol is defined accordingly.
2925
 *
2926
 *         Substitutions may lead to trivial equations of the form "Zi=Zj",
2927
 *         but these are removed in the end of the method. The method returns
2928
 *         whether the substitution has been done once or more (or not).
2929
 */
2930

2931
bool do_optimization (const optimization optim, vector<WORD> &instr, int newid) {
12✔
2932

2933
//        #[ Debug code :
2934
#ifdef DEBUG_GREEDY
2935
        if (optim.type==0)
2936
                MesPrint ("*** [%s, w=%w] CALL: do_optimization(improve=%d, %c%d^%d)",
2937
                  thetime_str().c_str(), optim.improve,
2938
                optim.arg1>0?'x':'Z', ABS(optim.arg1)-1, optim.arg2);
2939
        else if (optim.type==1 || optim.type>=4)
2940
                MesPrint ("*** [%s, w=%w] CALL: do_optimization(improve=%d, %c%d%c%c%d)",
2941
                thetime_str().c_str(), optim.improve,
2942
                optim.arg1>0?'x':'Z', ABS(optim.arg1)-1,
2943
                optim.type==1 ? '*' : optim.type==4 ? '+' : '-',
2944
                optim.arg2>0?'x':'Z', ABS(optim.arg2)-1);
2945
        else {
2946
        WORD n = optim.coeff.back()/2;
2947
        UBYTE num[BITSINWORD*ABS(n)], den[BITSINWORD*ABS(n)];
2948
        PrtLong((UWORD *)&optim.coeff[0], n, num);
2949
        PrtLong((UWORD *)&optim.coeff[ABS(n)], ABS(n), den);
2950
                MesPrint ("*** [%s, w=%w] CALL: do_optimization(improve=%d, %c%d%c%s/%s)",
2951
                thetime_str().c_str(), optim.improve,
2952
                optim.arg1>0?'x':'Z', ABS(optim.arg1)-1,
2953
                optim.type==2 ? '*' : '+', num,den);
2954
  }
2955
#endif
2956
//        #] Debug code : 
2957

2958
        bool substituted = false;
12✔
2959
        WORD *ebegin = &*instr.begin();
12✔
2960

2961
//        #[ type 0 : substitution of the form z=x^n (optim.type==0)
2962
        if (optim.type == 0) {
12✔
2963

2964
                int vartypex = optim.arg1>0 ? SYMBOL : EXTRASYMBOL;
12✔
2965
                int varnumx  = ABS(optim.arg1) - 1;
12✔
2966
                int n = optim.arg2;
12✔
2967

2968
                for (int i=0; i<(int)optim.eqnidxs.size(); i++) {
36✔
2969
                        WORD *e = ebegin + optim.eqnidxs[i];
24✔
2970
                        if (*(e+1) != OPER_MUL) continue;
24✔
2971

2972
                        // scan through equation
2973
                        for (WORD *t=e+3; *t!=0; t+=*t) {
60✔
2974
                                if (*t == ABS(*(t+*t-1))+1) continue;
36✔
2975
                                if (*(t+1)==vartypex &&
24✔
2976
                                                *(t+3)==varnumx &&
24✔
2977
                                                *(t+4) % n == 0) {
24✔
2978

2979
                                        // substitute
2980
                                        *(t+1) = EXTRASYMBOL;
24✔
2981
                                        *(t+3) = newid;
24✔
2982
                                        *(t+4) /= n;
24✔
2983

2984
                                        substituted = true;
24✔
2985
                                }
2986
                        }
2987
                }
2988

2989
                if (!substituted) {
12✔
2990
#ifdef DEBUG_GREEDY
2991
                        MesPrint ("*** [%s, w=%w] DONE: do_optimization : res=false", thetime_str().c_str(), optim.improve);
2992
#endif
2993
                        return false;
×
2994
                }
2995

2996
                // add extra equation (Tnew = x^n)
2997
                instr.push_back(newid);    // eqn.nr
12✔
2998
                instr.push_back(OPER_MUL); // operator
12✔
2999
                instr.push_back(12);           // total length
12✔
3000
                instr.push_back(8);            // term length
12✔
3001
                instr.push_back(vartypex); // (extra)symbol
12✔
3002
                instr.push_back(4);            // symbol length
12✔
3003
                instr.push_back(varnumx);  // symbol id
12✔
3004
                instr.push_back(n);            // power
12✔
3005
                instr.push_back(1);
12✔
3006
                instr.push_back(1);            // coefficient 1
12✔
3007
                instr.push_back(3);
12✔
3008
                instr.push_back(0);            // trailing 0
12✔
3009
        }
3010
//        #] type 0 : 
3011
//        #[ type 1 : substitution of the form z=x*y (optim.type==1)
3012
        if (optim.type == 1) {
12✔
3013

3014
                int vartypex = optim.arg1>0 ? SYMBOL : EXTRASYMBOL;
×
3015
                int varnumx  = ABS(optim.arg1) - 1;
×
3016
                int vartypey = optim.arg2>0 ? SYMBOL : EXTRASYMBOL;
×
3017
                int varnumy  = ABS(optim.arg2) - 1;
×
3018

3019
                for (int i=0; i<(int)optim.eqnidxs.size(); i++) {
×
3020
                        WORD *e = ebegin + optim.eqnidxs[i];
×
3021
                        if (*(e+1) != OPER_MUL) continue;
×
3022

3023
                        // scan through equation
3024
                        int powx=0, powy=0;
×
3025
                        for (WORD *t=e+3; *t!=0; t+=*t) {
×
3026
                                if (*t == ABS(*(t+*t-1))+1) continue;
×
3027
                                if (*(t+1)==vartypex && *(t+3)==varnumx) powx = *(t+4);
×
3028
                                if (*(t+1)==vartypey && *(t+3)==varnumy) powy = *(t+4);
×
3029
                        }
3030

3031
                        // substitute if found
3032
                        if (powx>0 && powy>0 && powx==powy) {
×
3033

3034
                                WORD sign = 1;
3035
                                WORD *newt = e+3;
3036

3037
                                for (WORD *t=e+3; *t!=0;) {
×
3038
                                        int dt=*t;
×
3039

3040
                                        if (*t == ABS(*(t+*t-1))+1 ||
×
3041
                                                        (!(*(t+1)==vartypex && *(t+3)==varnumx) &&
×
3042
                                                         !(*(t+1)==vartypey && *(t+3)==varnumy))) {
×
3043
                                                memmove(newt, t, *t*sizeof(WORD));
×
3044
                                                newt += dt;
×
3045
                                        }
3046
                                        else {
3047
                                                sign *= SGN(*(t+*t-1));
×
3048
                                        }
3049

3050
                                        t+=dt;
×
3051
                                }
3052

3053
                                *newt++ = 8;                   // term length
×
3054
                                *newt++ = EXTRASYMBOL; // extrasymbol
×
3055
                                *newt++ = 4;                   // symbol length
×
3056
                                *newt++ = newid;           // symbol id
×
3057
                                *newt++ = powx;            // power
×
3058
                                *newt++ = 1;
×
3059
                                *newt++ = 1;                   // coefficient +/-1
×
3060
                                *newt++ = 3*sign;
×
3061
                                *newt++ = 0;                   // trailing 0
×
3062

3063
                                substituted = true;
×
3064
                        }
3065
                }
3066

3067
                if (!substituted) {
×
3068
#ifdef DEBUG_GREEDY
3069
                        MesPrint ("*** [%s, w=%w] DONE: do_optimization : res=false", thetime_str().c_str(), optim.improve);
3070
#endif
3071
                        return false;
×
3072
                }
3073

3074
                // add extra equation (Tnew = x*y)
3075
                instr.push_back(newid);    // eqn.nr
×
3076
                instr.push_back(OPER_MUL); // operator
×
3077
                instr.push_back(20);           // total length
×
3078
                instr.push_back(8);            // LHS length
×
3079
                instr.push_back(vartypex); // (extra)symbol
×
3080
                instr.push_back(4);            // symbol length
×
3081
                instr.push_back(varnumx);  // symbol id
×
3082
                instr.push_back(1);            // power 1
×
3083
                instr.push_back(1);
×
3084
                instr.push_back(1);            // coefficient 1
×
3085
                instr.push_back(3);
×
3086
                instr.push_back(8);            // RHS length
×
3087
                instr.push_back(vartypey); // (extra)symbol
×
3088
                instr.push_back(4);            // symbol length
×
3089
                instr.push_back(varnumy);  // symbol id
×
3090
                instr.push_back(1);            // power 1
×
3091
                instr.push_back(1);
×
3092
                instr.push_back(1);            // coefficient 1
×
3093
                instr.push_back(3);
×
3094
                instr.push_back(0);            // trailing 0
×
3095
        }
3096
//        #] type 1 : 
3097
//        #[ type 2 : substitution of the form z=c*x (optim.type==2)
3098

3099
        if (optim.type == 2) {
12✔
3100
                int vartype = optim.arg1>0 ? SYMBOL : EXTRASYMBOL;
×
3101
                int varnum        = ABS(optim.arg1) - 1;
×
3102

3103
                WORD ncoeff = optim.coeff.back();
×
3104

3105
                // scan through equations
3106
                for (int i=0; i<(int)optim.eqnidxs.size(); i++) {
×
3107
                        WORD *e = ebegin + optim.eqnidxs[i];
×
3108

3109
                        if (*(e+1) == OPER_ADD) {
×
3110
                                // scan through ADD-equation
3111
                                for (WORD *t=e+3; *t!=0; t+=*t) {
×
3112

3113
                                        if (*t == ABS(*(t+*t-1))+1) continue;
×
3114

3115
                                        if (*(t+1)==vartype && ABS(*(t+3))==varnum && *(t+4)==1 &&
×
3116
                                                        BigLong((UWORD *)&optim.coeff[0],ncoeff-1,
×
3117
                                                                                        (UWORD *)t+*t-ABS(*(t+*t-1)),ABS(*(t+*t-1))-1) == 0) {
×
3118
                                                // substitute
3119

3120
                                                int sign = SGN(*(t+*t-1));
×
3121

3122
                                                WORD *tend = t;
×
3123
                                                while (*tend!=0) tend+=*tend;
×
3124
                                                WORD nmove = tend - t - *t;
×
3125
                                                memmove(t, t+*t, nmove*sizeof(WORD));
×
3126
                                                t += nmove;
×
3127

3128
                                                *t++ = 8;                        // term length
×
3129
                                                *t++ = EXTRASYMBOL; // (extra)symbol
×
3130
                                                *t++ = 4;                        // symbol length
×
3131
                                                *t++ = newid;                // symbol id
×
3132
                                                *t++ = 1;                        // power of 1
×
3133
                                                *t++ = 1;
×
3134
                                                *t++ = 1;                        // coefficient of +/-1
×
3135
                                                *t++ = 3 * sign;
×
3136
                                                *t++ = 0;                        // trailing 0
×
3137

3138
                                                substituted = true;
×
3139
                                                break;
×
3140
                                        }
3141
                                }
3142
                        }
3143
                        else if (*(e+1) == OPER_MUL) {
×
3144

3145
                                bool coeff_match=false, var_match=false;
×
3146
                                int sign = 1;
×
3147

3148
                                // scan through MUL-equation
3149
                                for (WORD *t=e+3; *t!=0; t+=*t) {
×
3150
                                        if (*t == ABS(*(t+*t-1))+1 && BigLong((UWORD *)&optim.coeff[0],ncoeff-1,
×
3151
                                                                                                                                                                                                (UWORD *)t+*t-ABS(*(t+*t-1)),ABS(*(t+*t-1))-1) == 0) {
×
3152
                                                coeff_match = true;
×
3153
                                                sign *= SGN(*(t+*t-1));
×
3154
                                        }
3155
                                        else if (*(t+1)==vartype && ABS(*(t+3))==varnum && *(t+4)==1) {
×
3156
                                                var_match = true;
×
3157
                                                sign *= SGN(*(t+*t-1));
×
3158
                                        }
3159
                                }
3160

3161
                                // substitute if found
3162
                                if (coeff_match && var_match) {
×
3163

3164
                                        WORD *newt = e+3;
3165

3166
                                        for (WORD *t=e+3; *t!=0;) {
×
3167

3168
                                                int dt=*t;
×
3169

3170
                                                if (*t!=ABS(*(t+*t-1))+1 && !(*(t+1)==vartype && ABS(*(t+3))==varnum && *(t+4)==1)) {
×
3171
                                                        memmove(newt, t, dt*sizeof(WORD));
×
3172
                                                        newt += dt;
×
3173
                                                }
3174

3175
                                                t+=dt;
×
3176
                                        }
3177

3178
                                        *newt++ = 8;                   // term length
×
3179
                                        *newt++ = EXTRASYMBOL; // extrasymbol
×
3180
                                        *newt++ = 4;                   // symbol length
×
3181
                                        *newt++ = newid;           // symbol id
×
3182
                                        *newt++ = 1;                   // power of 1
×
3183
                                        *newt++ = 1;
×
3184
                                        *newt++ = 1;                   // coefficient of +/-1
×
3185
                                        *newt++ = 3 * sign;
×
3186
                                        *newt++ = 0;                   // trailing 0
×
3187

3188
                                        substituted = true;
×
3189
                                }
3190
                        }
3191
                }
3192

3193
                if (!substituted) {
×
3194
#ifdef DEBUG_GREEDY
3195
                        MesPrint ("*** [%s, w=%w] DONE: do_optimization : res=false", thetime_str().c_str(), optim.improve);
3196
#endif
3197
                        return false;
×
3198
                }
3199

3200
                // add extra equation (Tnew = c*y)
3201
                instr.push_back(newid);                    // eqn.nr
×
3202
                instr.push_back(OPER_ADD);                   // operator
×
3203
                instr.push_back(9+ABS(ncoeff));    // total length
×
3204
                instr.push_back(5+ABS(ncoeff));    // term length
×
3205
                instr.push_back(vartype);                   // (extra)symbol
×
3206
                instr.push_back(4);                            // symbol length
×
3207
                instr.push_back(varnum);                   // symbol id
×
3208
                instr.push_back(1);                            // power of 1
×
3209
                for (int i=0; i<ABS(ncoeff); i++)  // coefficient
×
3210
                        instr.push_back(optim.coeff[i]);
×
3211
                instr.push_back(0);                            // trailing 0
×
3212
        }
3213
//        #] type 2 : 
3214
//        #[ type 3 : substitution of the form z=x+c (optim.type==3)
3215
        if (optim.type == 3) {
12✔
3216
                int vartype = optim.arg1>0 ? SYMBOL : EXTRASYMBOL;
×
3217
                int varnum        = ABS(optim.arg1) - 1;
×
3218

3219
                WORD ncoeff = optim.coeff.back();
×
3220

3221
                // scan through equation
3222
                for (int i=0; i<(int)optim.eqnidxs.size(); i++) {
×
3223
                        WORD *e = ebegin + optim.eqnidxs[i];
×
3224

3225
                        if (*(e+1) != OPER_ADD) continue;
×
3226

3227
                        int coeff_match=0, var_match=0;
×
3228

3229
                        for (WORD *t=e+3; *t!=0; t+=*t) {
×
3230
                                if (*t == ABS(*(t+*t-1))+1 && BigLong((UWORD *)&optim.coeff[0],ABS(ncoeff)-1,
×
3231
                                                                                                                                                                                        (UWORD *)t+*t-ABS(*(t+*t-1)),ABS(*(t+*t-1))-1) == 0)
×
3232
                                        coeff_match = SGN(ncoeff) * SGN(*(t+*t-1));
×
3233
                                else if (*(t+1)==vartype && ABS(*(t+3))==varnum && *(t+4)==1)
×
3234
                                        var_match = SGN(*(t+7));
×
3235
                        }
3236

3237
                        // substitute if found (x+c and -x-c and matches)
3238
                        if (coeff_match * var_match == 1) {
×
3239

3240
                                WORD *newt = e+3;
3241

3242
                                for (WORD *t=e+3; *t!=0;) {
×
3243
                                        int dt=*t;
×
3244
                                        if (*t!=ABS(*(t+*t-1))+1 && !(*(t+1)==vartype && ABS(*(t+3))==varnum && *(t+4)==1)) {
×
3245
                                                memmove(newt, t, dt*sizeof(WORD));
×
3246
                                                newt += dt;
×
3247
                                        }
3248
                                        t+=dt;
×
3249
                                }
3250

3251
                                *newt++ = 8;                        // term length
×
3252
                                *newt++ = EXTRASYMBOL;        // extrasymbol
×
3253
                                *newt++ = 4;                        // symbol length
×
3254
                                *newt++ = newid;                // symbol id
×
3255
                                *newt++ = 1;                        // power of 1
×
3256
                                *newt++ = 1;
×
3257
                                *newt++ = 1;                        // coefficient of +/-1
×
3258
                                *newt++ = 3*coeff_match;
×
3259
                                *newt++ = 0;                        // trailing zero
×
3260

3261
                                substituted = true;
×
3262
                        }
3263
                }
3264

3265
                if (!substituted) {
×
3266
#ifdef DEBUG_GREEDY
3267
                        MesPrint ("*** [%s, w=%w] DONE: do_optimization : res=false", thetime_str().c_str(), optim.improve);
3268
#endif
3269
                        return false;
×
3270
                }
3271

3272
                // add extra equation (Tnew = x+c)
3273
                instr.push_back(newid);                   // eqn.nr
×
3274
                instr.push_back(OPER_ADD);                  // operator
×
3275
                instr.push_back(13+ABS(ncoeff));  // total length
×
3276
                instr.push_back(8);                           // x-term length
×
3277
                instr.push_back(vartype);                  // (extra)symbol
×
3278
                instr.push_back(4);                           // symbol length
×
3279
                instr.push_back(varnum);                  // symbol id
×
3280
                instr.push_back(1);                           // power of 1
×
3281
                instr.push_back(1);
×
3282
                instr.push_back(1);                           // coefficient of 1
×
3283
                instr.push_back(3);
×
3284
                instr.push_back(ABS(ncoeff)+1);   // c-term length
×
3285
                for (int i=0; i<ABS(ncoeff); i++) // coefficient
×
3286
                        instr.push_back(optim.coeff[i]);
×
3287
                instr.push_back(0);                           // trailing zero
×
3288
        }
3289
//        #] type 3 : 
3290
//        #[ type 4,5 : substitution of the form z=x+y or z=x-y (optim.type=4 or 5)
3291
        if (optim.type >= 4) {
12✔
3292

3293
                int vartypex = optim.arg1>0 ? SYMBOL : EXTRASYMBOL;
×
3294
                int varnumx  = ABS(optim.arg1) - 1;
×
3295
                int vartypey = optim.arg2>0 ? SYMBOL : EXTRASYMBOL;
×
3296
                int varnumy  = ABS(optim.arg2) - 1;
×
3297

3298
                // scan through equations
3299
                for (int i=0; i<(int)optim.eqnidxs.size(); i++) {
×
3300
                        WORD *e = ebegin + optim.eqnidxs[i];
×
3301

3302
                        if (*(e+1) != OPER_ADD) continue;
×
3303

3304
                        const WORD *coeffx=NULL, *coeffy=NULL;
×
3305
                        WORD ncoeffx=0,ncoeffy=0;
×
3306

3307
                        // looks for terms
3308
                        for (WORD *t=e+3; *t!=0; t+=*t) {
×
3309
                                if (*t == ABS(*(t+*t-1))+1) continue; // constant
×
3310
                                if (*(t+1)==vartypex && *(t+3)==varnumx && *(t+4)==1) {
×
3311
                                        coeffx = t+5;
×
3312
                                        ncoeffx = *(t+*t-1);
×
3313
                                }
3314
                                if (*(t+1)==vartypey && *(t+3)==varnumy && *(t+4)==1) {
×
3315
                                        coeffy = t+5;
×
3316
                                        ncoeffy = *(t+*t-1);
×
3317
                                }
3318
                        }
3319

3320
                        // check signs (type=4: x+y and -x-y, type=5: x-y and -x+y) ??????
3321
                        // check signs (type=4: x+y, type=5: x-y) !!!!!!!!!!
3322
                        if (SGN(ncoeffx) * SGN(ncoeffy) * (optim.type==4 ? 1 : -1) == 1) {
×
3323
                                // check absolute value of coefficients
3324
                                if (BigLong((UWORD *)coeffx, ABS(ncoeffx)-1, (UWORD *)coeffy, ABS(ncoeffy)-1) == 0) {
×
3325
                                        // substitute
3326
                                        vector<WORD> coeff(coeffx, coeffx+ABS(ncoeffx));
×
3327

3328
                                        WORD *newt = e+3;
×
3329
/*
3330
if ( optim.type == 5 ) {
3331
        while ( *newt ) newt+=*newt;
3332
        int i = (newt - e) - 3;
3333
        MesPrint("        < %a",i,e+3);
3334
        newt = e+3;
3335
}
3336
*/
3337
                                        for (WORD *t=e+3; *t!=0;) {
×
3338
                                                int dt=*t;
×
3339
                                                if (*t == ABS(*(t+*t-1))+1 ||
×
3340
                                                                (!(*(t+1)==vartypex && *(t+3)==varnumx) &&
×
3341
                                                                 !(*(t+1)==vartypey && *(t+3)==varnumy))) {
×
3342
                                                        memmove(newt, t, dt*sizeof(WORD));
×
3343
                                                        newt += dt;
×
3344
                                                }
3345
                                                t+=dt;
×
3346
                                        }
3347

3348
                                        *newt++ = 5 + ABS(ncoeffx);           // term length
×
3349
                                        *newt++ = EXTRASYMBOL;                          // extrasymbol
×
3350
                                        *newt++ = 4;                                          // symbol length
×
3351
                                        *newt++ = newid;                                  // symbol id
×
3352
                                        *newt++ = 1;                                          // power of 1
×
3353
                                        for (int j=0; j<ABS(ncoeffx); j++)// coefficient
×
3354
                                                *newt++ = coeff[j];
×
3355
                                        *newt++ = 0;                                          // trailing 0
×
3356
                                        substituted = true;
×
3357
/*
3358
if ( optim.type == 5 ) {
3359
        int i = (newt - e) - 4;
3360
        MesPrint("        > %a",i,e+3);
3361
}
3362
*/
3363
                                }
×
3364
                        }
3365
                }
3366

3367
                if (!substituted) {
×
3368
#ifdef DEBUG_GREEDY
3369
                        MesPrint ("*** [%s, w=%w] DONE: do_optimization : res=false", thetime_str().c_str(), optim.improve);
3370
#endif
3371
                        return false;
×
3372
                }
3373
/*
3374
if ( optim.type == 5 )
3375
MesPrint ("improve=%d, %c%d%c%c%d)", optim.improve,
3376
        optim.arg1>0?'x':'Z', ABS(optim.arg1)-1,
3377
        optim.type==1 ? '*' : optim.type==4 ? '+' : '-',
3378
        optim.arg2>0?'x':'Z', ABS(optim.arg2)-1);
3379
*/
3380
                // add extra equation (Tnew = x+/-y)
3381
                instr.push_back(newid);    // eqn.nr
×
3382
                instr.push_back(OPER_ADD); // operator
×
3383
                instr.push_back(20);           // total length
×
3384
                instr.push_back(8);            // term length
×
3385
                instr.push_back(vartypex); // (extra)symbol
×
3386
                instr.push_back(4);            // symbol length
×
3387
                instr.push_back(varnumx);  // symbol id
×
3388
                instr.push_back(1);            // power of 1
×
3389
                instr.push_back(1);
×
3390
                instr.push_back(1);            // coefficient of 1
×
3391
                instr.push_back(3);
×
3392
                instr.push_back(8);            // term length
×
3393
                instr.push_back(vartypey); // (extra)symbol
×
3394
                instr.push_back(4);            // symbol length
×
3395
                instr.push_back(varnumy);  // symbol id
×
3396
                instr.push_back(1);            // power of 1
×
3397
                instr.push_back(1);
×
3398
                instr.push_back(1);            // coefficient of +/-1
×
3399
                instr.push_back(3*(optim.type==4?1:-1));
×
3400
                instr.push_back(0);            // trailing 0
×
3401
        }
3402
//        #] type 4,5 : 
3403
//        #[ trivial :  remove trivial equations of the form Zi = +/-Zj
3404
        vector<int> renum(newid+1, 0);
12✔
3405
        bool do_renum=false;
12✔
3406

3407
        // vector may be moved when it is extended
3408
        ebegin = &*instr.begin();
12✔
3409

3410
        for (int i=0; i<(int)optim.eqnidxs.size(); i++) {
36✔
3411
                WORD *e = ebegin + optim.eqnidxs[i];
24✔
3412
                WORD *t = e+3;
24✔
3413
                if (*t==0) continue;          // empty (removed) equation
24✔
3414
                if (*(t+*t)!=0) continue; // more than 1 term
24✔
3415
                if (*t!=8) continue;          // term not of correct form
12✔
3416
                if (*(t+4)!=1) continue;  // power != 1
12✔
3417
                if (*(t+5)!=1 || *(t+6)!=1) continue; // coeff != +/-1
12✔
3418

3419
                // trivial term, so renumber this one
3420
                renum[*e] = SGN(*(t+7)) * (*(t+3) + 1);
12✔
3421
                do_renum = true;
12✔
3422

3423
                // remove equation
3424
                *t=0;
12✔
3425
        }
3426
//        #] trivial : 
3427
//        #[ renumbering :
3428

3429
        // there are renumberings to be done, so loop through all equations
3430
        if (do_renum) {
12✔
3431
                WORD *eend = ebegin+instr.size();
12✔
3432

3433
                for (WORD *e=ebegin; e!=eend; e+=*(e+2)) {
264✔
3434
                        for (WORD *t=e+3; *t!=0; t+=*t) {
732✔
3435
                                if (*t == ABS(*(t+*t-1))+1) continue;
480✔
3436
                                if (*(t+1)==EXTRASYMBOL && renum[*(t+3)]!=0) {
420✔
3437
                                        *(t+*t-1) *= SGN(renum[*(t+3)]);
12✔
3438
                                        *(t+3) = ABS(renum[*(t+3)]) - 1;
12✔
3439
                                }
3440
                        }
3441
                }
3442
        }
3443
//        #] renumbering : 
3444

3445
#ifdef DEBUG_GREEDY
3446
        MesPrint ("*** [%s, w=%w] DONE: do_optimization : res=true", thetime_str().c_str(), optim.improve);
3447
#endif
3448

3449
        return true;
12✔
3450
}
12✔
3451

3452
/*
3453
          #] do_optimization : 
3454
          #[ partial_factorize :
3455
*/
3456

3457
/**  Partial factorization of instructions
3458
 *
3459
 *         Description
3460
 *         ===========
3461
 *         This method performs partial factorization of instructions. In
3462
 *         particular the following instructions
3463
 *
3464
 *           Z1 = x*a*b
3465
 *           Z2 = x*c*d*e
3466
 *           Z3 = 2*x + Z1 + Z2 + more
3467
 *
3468
 *         are replaced by
3469
 *
3470
 *           Z1 = a*b
3471
 *           Z2 = c*d*e
3472
 *           Z3 = Zj + more
3473
 *           Zi = 2 + Z1 + Z2
3474
 *           Zj = x*Zi
3475
 *
3476
 *         Here it is necessary that no other equations refer to Z1 and
3477
 *         Z2. The generation of trivial instructions (Zi=Zj or Zi=x) is
3478
 *         prevented.
3479
 */
3480
int partial_factorize (vector<WORD> &instr, int n, int improve) {
90✔
3481

3482
#ifdef DEBUG_GREEDY
3483
        MesPrint ("*** [%s, w=%w] CALL: partial_factorize (n=%d)", thetime_str().c_str(), n);
3484
#endif
3485

3486
        GETIDENTITY;
90✔
3487

3488
        // get starting positions of instructions
3489
        vector<int> instr_idx(n);
90✔
3490
        WORD *ebegin = &*instr.begin();
90✔
3491
        WORD *eend = ebegin+instr.size();
90✔
3492
        for (WORD *e=ebegin; e!=eend; e+=*(e+2)) {
1,002✔
3493
                instr_idx[*e] = e - ebegin;
912✔
3494
        }
3495

3496
        // get reference counts
3497
/*
3498
 *        The next construction replaces the vector construction which is
3499
 *        rather costly for valgrind (and maybe also in normal running)
3500
 */
3501
        int nmax = 2*n;
90✔
3502
        WORD *numpar = (WORD *)Malloc1(nmax*sizeof(WORD),"numpar");
90✔
3503
        for ( int i = 0; i < nmax; i++ ) numpar[i] = 0;
1,914✔
3504
//        vector<WORD> numpar(n);
3505
        for (WORD *e=ebegin; e!=eend; e+=*(e+2))
1,002✔
3506
                for (WORD *t=e+3; *t!=0; t+=*t) {
2,736✔
3507
                        if (*t == ABS(*(t+*t-1))+1) continue;
1,824✔
3508
                        if (*(t+1) == EXTRASYMBOL) numpar[*(t+3)]++;
1,644✔
3509
                }
3510

3511
        // find factorizable expressions
3512
        for (int i=0; i<n; i++) {
1,002✔
3513
                WORD *e = &*instr.begin() + instr_idx[i];
912✔
3514
                if (*(e+1) != OPER_ADD) continue;
912✔
3515

3516
                // count symbol occurrences
3517
                map<WORD,WORD> cnt; // 1-indexed, <0:EXTRASYMBOL, >0:SYMBOL
390✔
3518

3519
                for (WORD *t=e+3; *t!=0; t+=*t) {
1,314✔
3520
                        if (*t==ABS(*(t+*t-1))+1) continue;
924✔
3521

3522
                        // count symbols in t
3523
                        if (*(t+4)==1)
918✔
3524
                                cnt[(*(t+1)==SYMBOL ? 1 : -1) * (*(t+3)+1)]++;
1,434✔
3525

3526
                        // count symbols in extrasymbols of t
3527
                        if (*(t+1)==EXTRASYMBOL && *(t+4)==1 && numpar[*(t+3)]==1) {
918✔
3528
                                WORD *t2 = &*instr.begin() + instr_idx[*(t+3)];
468✔
3529
                                if (*(t2+1) != OPER_MUL) continue;
468✔
3530
                                for (t2+=3; *t2!=0; t2+=*t2) {
1,236✔
3531
                                        if (*t2 == ABS(*(t2+*t2-1))+1) continue;
804✔
3532
                                        if (*(t2+4)==1)
660✔
3533
                                                cnt[(*(t2+1)==SYMBOL ? 1 : -1) * (*(t2+3)+1)]++;
912✔
3534
                                }
3535
                        }
3536
                }
3537

3538
                // find most-occurring symbol
3539
                WORD x=0, best=0;
390✔
3540
                for (map<WORD,WORD>::iterator it=cnt.begin(); it!=cnt.end(); it++)
1,908✔
3541
                        if (it->second > best) { x=it->first; best=it->second; }
1,518✔
3542

3543
                // occurrence>=2 and occurrence>improve, so factorize
3544

3545
                if (best>=2 && best>improve) {
390✔
3546
                        // initialize new equation (Zi from example above)
3547
                        vector<WORD> new_eqn;
×
3548
                        new_eqn.push_back(n);
×
3549
                        new_eqn.push_back(OPER_ADD);
×
3550
                        new_eqn.push_back(0); // length
×
3551

3552
                        WORD dt;
×
3553
                        WORD *newt=e+3;
×
3554
                        for (WORD *t=e+3; *t!=0; t+=dt) {
×
3555
                                dt = *t;
×
3556
                                bool keep=true;
×
3557

3558
                                if (*t!=ABS(*(t+*t-1))+1) {
×
3559

3560
                                        // factorized symbol is in t itself
3561
                                        if (*(t+4)==1) {
×
3562
                                                WORD y = (*(t+1)==SYMBOL ? 1 : -1) * (*(t+3)+1);
×
3563
                                                if (y==x) {
×
3564
                                                        new_eqn.push_back(*t-4);
×
3565
                                                        new_eqn.insert(new_eqn.end(), t+5, t+dt);
×
3566
                                                        keep=false;
×
3567
                                                }
3568
                                        }
3569

3570
                                        // look in extrasymbol of t with ref.count=1
3571
                                        if (*(t+1)==EXTRASYMBOL && *(t+4)==1 && numpar[*(t+3)]==1) {
×
3572
                                                WORD *t2 = &*instr.begin() + instr_idx[*(t+3)];
×
3573
                                                if (*(t2+1) == OPER_MUL) {
×
3574
                                                        bool has_x=false;
×
3575
                                                        for (t2+=3; *t2!=0; t2+=*t2) {
×
3576
                                                                if (*t2 == ABS(*(t2+*t2-1))+1) continue;
×
3577
                                                                WORD y = (*(t2+1)==SYMBOL ? 1 : -1) * (*(t2+3)+1);
×
3578
                                                                // extrasymbol has factorized symbol
3579
                                                                if (y==x && *(t2+4)==1) {
×
3580
                                                                        has_x=true;
×
3581
                                                                        // copy remaining part
3582
                                                                        WORD *tend=t2+*t2;
×
3583
                                                                        WORD sign = SGN(*(tend-1));
×
3584
                                                                        while (*tend!=0) tend+=*tend;
×
3585
                                                                        int dt2 = tend - (t2+*t2);
×
3586
                                                                        memmove(t2, t2+*t2, (dt2+1)*sizeof(WORD));
×
3587
                                                                        t2 += dt2;
×
3588
                                                                        *(t2-1) *= sign;
×
3589
                                                                        break;
×
3590
                                                                }
3591
                                                        }
3592
                                                        if (has_x) {
×
3593
                                                                // extrasymbol has x, so add it to new equation
3594
                                                                keep=false;
×
3595
                                                                int thisidx=new_eqn.size();
×
3596
                                                                new_eqn.insert(new_eqn.end(), t, t+dt);
×
3597
                                                                t2 = &*instr.begin() + instr_idx[*(t+3)] + 3;
×
3598
                                                                // if becomes trivial, substitute the term
3599
                                                                if (*(t2+*t2)==0) {
×
3600
                                                                        // it's a number
3601
                                                                        if (*t2 == ABS(*(t2+*t2-1))+1) {
×
3602
                                                                                if (ABS(new_eqn[new_eqn.size()-1])==3 && new_eqn[new_eqn.size()-2]==1 && new_eqn[new_eqn.size()-3]==1) {
×
3603
                                                                                        // original equation has coefficient of +/-1, so replace it
3604
                                                                                        WORD sign = SGN(new_eqn.back());
×
3605
                                                                                        new_eqn.erase(new_eqn.begin()+thisidx, new_eqn.end());
×
3606
                                                                                        new_eqn.insert(new_eqn.end(), t2, t2+*t2);
×
3607
                                                                                        new_eqn.back() *= sign;
×
3608
                                                                                        *t2 = 0;
×
3609
                                                                                }
3610
                                                                                else {
3611
                                                                                        // two non-trivial coefficients, so multiply them
3612
                                                                                        // note: untested code (found no way to trigger it)
3613
                                                                                        UWORD *tmp = NumberMalloc("partial_factorize");
×
3614
                                                                                        WORD ntmp=0;
×
3615
                                                                                        MulRat(BHEAD (UWORD *)t2+*t2-ABS(*(t2+*t2-1)), *(t2+*t2-1),
×
3616
                                                                                                                 (UWORD *)&*(new_eqn.end()-ABS(new_eqn.back())), new_eqn.back(),
×
3617
                                                                                                                 tmp, &ntmp);
3618
                                                                                        new_eqn.erase(new_eqn.begin()+thisidx, new_eqn.end());
×
3619
                                                                                        new_eqn.push_back(ABS(ntmp)+1);
×
3620
                                                                                        new_eqn.insert(new_eqn.end(), tmp, tmp+ABS(ntmp));
×
3621
                                                                                        NumberFree(tmp,"partial_factorize");
×
3622
                                                                                        *t2 = 0;
×
3623
                                                                                }
3624
                                                                        }
3625
                                                                        else if (*(t2+4)==1) {
×
3626
                                                                                // it's a variable
3627
                                                                                new_eqn.back() *= SGN(*(t2+*t2-1));
×
3628
                                                                                new_eqn[thisidx+1] = *(t2+1);
×
3629
                                                                                new_eqn[thisidx+2] = *(t2+2);
×
3630
                                                                                new_eqn[thisidx+3] = *(t2+3);
×
3631
                                                                                new_eqn[thisidx+4] = *(t2+4);
×
3632
                                                                                *t2 = 0;
×
3633
                                                                        }
3634
                                                                }
3635
                                                        }
3636
                                                }
3637
                                        }
3638
                                }
3639

3640
                                // no x, so copy it
3641
                                if (keep) {
×
3642
                                        memmove(newt, t, dt*sizeof(WORD));
×
3643
                                        newt += dt;
×
3644
                                }
3645
                        }
3646

3647
                        // finalize new equation
3648
                        new_eqn.push_back(0);
×
3649
                        new_eqn[2] = new_eqn.size();
×
3650

3651
                        bool empty = newt == e+3;
×
3652
                        if ( n+1 >= nmax ) {
×
3653
                                int i, newnmax = nmax*2;
×
3654
                                WORD *newnumpar = (WORD *)Malloc1(newnmax*sizeof(WORD),"newnumpar");
×
3655
                                for ( i = 0; i < n; i++ ) newnumpar[i] = numpar[i];
×
3656
                                for ( ; i < newnmax; i++ ) newnumpar[i] = 0;
×
3657
                                M_free(numpar,"numpar");
×
3658
                                numpar = newnumpar;
3659
                                nmax = newnmax;
3660
                        }
3661
//                        numpar.push_back(0);
3662
                        n++;
×
3663

3664
                        // if original is not empty, add new equation (Zj) to it
3665
                        // otherwise replace it later
3666
                        if (!empty) {
×
3667
                                *newt++ = 8;
×
3668
                                *newt++ = EXTRASYMBOL;
×
3669
                                *newt++ = 4;
×
3670
                                *newt++ = n;
×
3671
                                *newt++ = 1;
×
3672
                                *newt++ = 1;
×
3673
                                *newt++ = 1;
×
3674
                                *newt++ = 3;
×
3675
                                *newt++ = 0;
×
3676
                        }
3677

3678
                        // add new equation to instructions
3679
                        instr_idx.push_back(instr.size());
×
3680
                        instr.insert(instr.end(), new_eqn.begin(), new_eqn.end());
×
3681

3682
                        // generate another new equation (Zj=x*Zi)
3683
                        new_eqn.clear();
×
3684
                        new_eqn.push_back(n);
×
3685
                        new_eqn.push_back(OPER_MUL);
×
3686
                        new_eqn.push_back(20);
×
3687
                        new_eqn.push_back(8);
×
3688

3689
                        // add factorized symbol
3690
                        if (x>0) {
×
3691
                                new_eqn.push_back(SYMBOL);
×
3692
                                new_eqn.push_back(4);
×
3693
                                new_eqn.push_back(x-1);
×
3694
                                new_eqn.push_back(1);
×
3695
                        }
3696
                        else {
3697
                                new_eqn.push_back(EXTRASYMBOL);
×
3698
                                new_eqn.push_back(4);
×
3699
                                new_eqn.push_back(-x-1);
×
3700
                                new_eqn.push_back(1);
×
3701
                        }
3702
                        new_eqn.push_back(1);
×
3703
                        new_eqn.push_back(1);
×
3704
                        new_eqn.push_back(3);
×
3705
                        new_eqn.push_back(8);
×
3706
                        // add new equation (Zi)
3707
                        new_eqn.push_back(EXTRASYMBOL);
×
3708
                        new_eqn.push_back(4);
×
3709
                        new_eqn.push_back(n-1);
×
3710
                        new_eqn.push_back(1);
×
3711
                        new_eqn.push_back(1);
×
3712
                        new_eqn.push_back(1);
×
3713
                        new_eqn.push_back(3);
×
3714
                        new_eqn.push_back(0);
×
3715

3716
                        if (!empty) {
×
3717
                                // add new equation (Zj) to instructions
3718
                                instr_idx.push_back(instr.size());
×
3719
                                instr.insert(instr.end(), new_eqn.begin(), new_eqn.end());
×
3720
                                if ( n+1 >= nmax ) {
×
3721
                                        int i, newnmax = nmax*2;
×
3722
                                        WORD *newnumpar = (WORD *)Malloc1(newnmax*sizeof(WORD),"newnumpar");
×
3723
                                        for ( i = 0; i < n; i++ ) newnumpar[i] = numpar[i];
×
3724
                                        for ( ; i < newnmax; i++ ) newnumpar[i] = 0;
×
3725
                                        M_free(numpar,"numpar");
×
3726
                                        numpar = newnumpar;
3727
                                        nmax = newnmax;
3728
                                }
3729
//                                numpar.push_back(0);
3730
                                n++;
×
3731
                        }
3732
                        else {
3733
                                // replace e with Zj
3734
                                e = &*instr.begin() + instr_idx[i];
×
3735
                                e[1] = OPER_MUL;
×
3736
                                memcpy(e+3, &new_eqn[3], (new_eqn.size()-3)*sizeof(WORD));
×
3737
                        }
3738

3739
                        // decrease i, so this expression is factorized again if possible
3740
                        i--;
×
3741
                }
×
3742
        }
390✔
3743

3744
#ifdef DEBUG_GREEDY
3745
        MesPrint ("*** [%s, w=%w] DONE: partial_factorize (n=%d)", thetime_str().c_str(), n);
3746
#endif
3747
        M_free(numpar,"numpar");
90✔
3748
        return n;
90✔
3749
}
90✔
3750

3751
/*
3752
          #] partial_factorize : 
3753
          #[ optimize_greedy :
3754
*/
3755

3756
/**  Optimize instructions greedily
3757
 *
3758
 *         Description
3759
 *         ===========
3760
 *         This method optimizes an expression greedily. It calls
3761
 *         "find_optimizations" to obtain candidates and performs the best
3762
 *         one(s) by calling "do_optimization".
3763
 *
3764
 *         How many different optimization are done, before
3765
 *         "find_optimization" is called again, is determined by the
3766
 *         settings "greedyminnum" and "greedymaxperc".
3767
 *
3768
 *         During the optimization process, sequences of zeroes are
3769
 *         introduced in the instructions, since moving all instructions
3770
 *         when one gets optimized, is very costly. Therefore, in the end,
3771
 *         the instructions are "compressed" again to remove these extra
3772
 *         zeroes.
3773
 */
3774
vector<WORD> optimize_greedy (vector<WORD> instr, LONG time_limit) {
78✔
3775

3776
#ifdef DEBUG
3777
        int old_num_oper = count_operators(instr);
3778
        MesPrint ("*** [%s, w=%w] CALL: optimize_greedy(numoper=%d)",
3779
                                                thetime_str().c_str(), old_num_oper);
3780
#endif
3781

3782
        LONG start_time = TimeWallClock(1);
78✔
3783

3784
        WORD *ebegin = &*instr.begin();
78✔
3785
        WORD *eend = ebegin+instr.size();
78✔
3786

3787
        // store final equation, since it must be the last equation later
3788
        int final_eqn_idx = 0;
78✔
3789
        int next_eqn = 0;
78✔
3790

3791
        for (WORD *e=ebegin; e!=eend; e+=*(e+2)) {
726✔
3792
                next_eqn = *e + 1;
648✔
3793
                final_eqn_idx = e-ebegin;
648✔
3794
        }
3795
        // optimize instructions
3796
        while (TimeWallClock(1)-start_time < time_limit) {
90✔
3797
                int old_next_eqn = next_eqn;
90✔
3798

3799
                // find optimizations
3800
                vector<optimization> optim = find_optimizations(instr);
90✔
3801

3802
                // add_eqnidxs contains modified equations, which might have to be updated later again
3803
                vector<int> add_eqnidxs;
90✔
3804

3805
                // number of optimizations to do
3806
                int num_do_optims = max(AO.Optimize.greedyminnum, (int)optim.size()*AO.Optimize.greedymaxperc/100);
90✔
3807

3808
                // if best improvement is one, do all optimizations
3809
                int best_improve=0;
90✔
3810
                for (int i=0; i<(int)optim.size(); i++)
102✔
3811
                        best_improve = max(best_improve, optim[i].improve);
24✔
3812
                if (best_improve <= 1)
90✔
3813
                        num_do_optims = MAXPOSITIVE;
90✔
3814
                // do a number of optimizations
3815
                while (optim.size() > 0 && num_do_optims-- > 0) {
102✔
3816

3817
                        // find best optimization
3818
                        int best=0;
12✔
3819
                        best_improve=0;
12✔
3820
                        for (int i=0; i<(int)optim.size(); i++)
24✔
3821
                                if (optim[i].improve > best_improve) {
12✔
3822
                                        best=i;
12✔
3823
                                        best_improve=optim[i].improve;
12✔
3824
                                }
3825

3826
                        // add extra equations
3827
                        for (int i=0; i<(int)add_eqnidxs.size(); i++)
12✔
3828
                                optim[best].eqnidxs.push_back(add_eqnidxs[i]);
×
3829

3830
                        // do optimization, update next_eqn if successful
3831
                        int next_idx = instr.size();
12✔
3832
                        if (do_optimization(optim[best], instr, next_eqn)) {
24✔
3833
                                next_eqn++;
12✔
3834
                                add_eqnidxs.push_back(next_idx);
12✔
3835
                        }
3836

3837
                        optim.erase(optim.begin()+best);
12✔
3838
                }
3839

3840
                // partially factorize with improve >= best_improve
3841
                next_eqn = partial_factorize(instr, next_eqn, best_improve);
90✔
3842

3843
                // check whether nothing has changed
3844
                if (next_eqn == old_next_eqn) break;
90✔
3845
        }
90✔
3846

3847
        // add final equation to the back (must be by definition)
3848
        instr.push_back(next_eqn);
78✔
3849
        instr.insert(instr.end(), instr.begin()+final_eqn_idx+1, instr.begin()+final_eqn_idx+instr[final_eqn_idx+2]);
78✔
3850

3851
        // removed original final equation
3852
        instr[final_eqn_idx+3] = 0;
78✔
3853

3854
        // remove extra zeroes
3855
        WORD *t = &instr[0];
78✔
3856

3857
        ebegin = &*instr.begin();
78✔
3858
        eend = ebegin+instr.size();
78✔
3859
        int de=0;
78✔
3860

3861
        for (WORD *e=ebegin; e!=eend; e+=de) {
816✔
3862
                de = *(e+2);
738✔
3863
                int n=3;
738✔
3864
                while (*(e+n) != 0) n+=*(e+n);
2,082✔
3865
                n++;
738✔
3866
                memmove (t, e, n*sizeof(WORD));
738✔
3867
                *(t+2) = n;
738✔
3868
                t += n;
738✔
3869
        }
3870

3871
        instr.resize(t - &instr[0]);
78✔
3872

3873
#ifdef DEBUG
3874
        MesPrint ("*** [%s, w=%w] DONE: optimize_greedy(numoper=%d) : numoper=%d",
3875
                                                thetime_str().c_str(), old_num_oper, count_operators(instr));
3876
#endif
3877

3878
        return instr;
78✔
3879
}
3880

3881
/*
3882
          #] optimize_greedy : 
3883
          #[ recycle_variables :
3884
*/
3885

3886
/**  Recycle variables
3887
 *
3888
 *         Description
3889
 *         ===========
3890
 *         The current input uses many temporary variables. Many of them
3891
 *         become obsolete at some point during the evaluation of the code,
3892
 *         so can be recycled. This method renumbers the temporary
3893
 *         variables, so that they are recycled. Furthermore, the input is
3894
 *         order in depth-first order, so that the instructions can be
3895
 *         performed consecutively.
3896
 *
3897
 *         Implementation details
3898
 *         ======================
3899
 *         First, for each subDAG, an estimate for the number of variables
3900
 *         needed is made. This is done by the following recursive formula:
3901
 *
3902
 *                #vars(x) = max(#vars(ch_i(x)) + i),
3903
 *
3904
 *         with ch_i(x) the i-th child of x, where the childs are ordered
3905
 *         w.r.t. #vars(ch_i). This formula is exact if the input forms a
3906
 *         tree, and otherwise gives a reasonable estimate.
3907
 *
3908
 *         Then, the instructions are reordered in a depth-first order with
3909
 *         childs ordered w.r.t. #vars. Next, the times that variables
3910
 *         become obsolete are found. Each LHS of an instruction is
3911
 *         renumbered to the lowest-numbered temporary variable that is
3912
 *         available at that time.
3913
 */
3914
vector<WORD> recycle_variables (const vector<WORD> &all_instr) {
45✔
3915

3916
#ifdef DEBUG_MORE
3917
        MesPrint ("*** [%s, w=%w] CALL: recycle_variables", thetime_str().c_str());
3918
#endif
3919

3920
        // get starting positions of instructions
3921
        vector<const WORD *> instr;
45✔
3922
        const WORD *tbegin = &*all_instr.begin();
45✔
3923
        const WORD *tend = tbegin+all_instr.size();
45✔
3924
        for (const WORD *t=tbegin; t!=tend; t+=*(t+2))
1,917✔
3925
                instr.push_back(t);
1,872✔
3926
        int n = instr.size();
45✔
3927

3928
        // determine with expressions are connected, how many intermediate
3929
        // are needed (assuming it's a expression tree instead of a DAG) and
3930
        // sort the leaves such that you need a minimal number of variables
3931
        vector<int> vars_needed(n);
45✔
3932
        vector<bool> vis(n,false);
45✔
3933
        vector<vector<int> > conn(n);
45✔
3934

3935
        stack<int> s;
45✔
3936
        s.push(n);
45✔
3937

3938
        while (!s.empty()) {
4,395✔
3939
                int i=s.top(); s.pop();
4,350✔
3940
                if (i>0) {
4,350✔
3941
                        i--;
2,478✔
3942
                        if (vis[i]) continue;
2,478✔
3943
                        vis[i]=true;
1,872✔
3944
                        s.push(-(i+1));
1,872✔
3945

3946
                        // find all connections
3947
                        for (const WORD *t=instr[i]+3; *t!=0; t+=*t)
5,724✔
3948
                                if (*t!=1+ABS(*(t+*t-1)) && *(t+1)==EXTRASYMBOL) {
3,852✔
3949
                                        int k = *(t+3);
2,433✔
3950
                                        conn[i].push_back(k);
2,433✔
3951
                                        s.push(k+1);
4,866✔
3952
                                }
3953
                }
3954
                else {
3955
                        i=-i-1;
1,872✔
3956

3957
                        // sort the childs w.r.t. needed variables
3958
                        vector<pair<int,int> > need;
1,872✔
3959
                        for (int j=0; j<(int)conn[i].size(); j++)
4,305✔
3960
                                need.push_back(make_pair(vars_needed[conn[i][j]], conn[i][j]));
2,433✔
3961

3962
                        // keep the comma expression in proper order
3963
                        if (*(instr[i]+1) != OPER_COMMA)
1,872✔
3964
                                sort(need.rbegin(), need.rend());
1,863✔
3965

3966
                        vars_needed[i] = 1;
1,872✔
3967
                        for (int j=0; j<(int)need.size(); j++) {
4,305✔
3968
                                vars_needed[i] = max(vars_needed[i], need[j].first+j);
2,433✔
3969
                                conn[i][j] = need[j].second;
2,433✔
3970
                        }
3971
                }
1,872✔
3972
        }
3973

3974
        // order the instructions in depth-first order and determine the first
3975
        // and last occurrences of variables
3976
        vector<int> order, first(n,0), last(n,0);
45✔
3977
        vis = vector<bool>(n,false);
90✔
3978
        s.push(n);
45✔
3979

3980
        while (!s.empty()) {
4,395✔
3981

3982
                int i=s.top(); s.pop();
4,350✔
3983

3984
                if (i>0) {
4,350✔
3985
                        i--;
2,478✔
3986
                        if (vis[i]) continue;
2,478✔
3987
                        vis[i]=true;
1,872✔
3988
                        s.push(-(i+1));
1,872✔
3989
                        for (int j=(int)conn[i].size()-1; j>=0; j--)
4,305✔
3990
                                s.push(conn[i][j]+1);
4,866✔
3991
                }
3992
                else {
3993
                        i=-i-1;
1,872✔
3994
                        first[i] = last[i] = order.size();
1,872✔
3995
                        order.push_back(i);
1,872✔
3996
                        for (int j=0; j<(int)conn[i].size(); j++) {
4,305✔
3997
                                int k = conn[i][j];
2,433✔
3998
                                last[k] = max(last[k], first[i]);
4,866✔
3999
                        }
4000
                }
4001
        }
4002

4003
        // find the renumbering to recycled variables, where at any time the
4004
        // lowest-indexed variable that can be used is chosen
4005
        int numvar=0;
45✔
4006
        set<int> var;
45✔
4007
        vector<int> renum(n);
45✔
4008

4009
        for (int i=0; i<(int)order.size(); i++) {
1,917✔
4010
                for (int j=0; j<(int)conn[order[i]].size(); j++) {
4,305✔
4011
                        int k = conn[order[i]][j];
2,433✔
4012
                        if (last[k] == i) var.insert(renum[k]);
2,433✔
4013
                }
4014

4015
                if (var.empty()) var.insert(numvar++);
1,872✔
4016
                renum[order[i]] = *var.begin(); var.erase(var.begin());
1,872✔
4017
        }
4018

4019
        // put the number of variables used in a preprocessor variable
4020

4021
        // generate new instructions with the renumbering
4022
        vector<WORD> newinstr;
45✔
4023

4024
        for (int i=0; i<(int)order.size(); i++) {
1,917✔
4025
                int x = order[i];
1,872✔
4026
                int j = newinstr.size();
1,872✔
4027
                newinstr.insert(newinstr.end(), instr[x], instr[x]+*(instr[x]+2));
1,872✔
4028

4029
                newinstr[j] = renum[newinstr[j]];
1,872✔
4030

4031
                for (WORD *t=&newinstr[j+3]; *t!=0; t+=*t)
5,724✔
4032
                        if (*t!=1+ABS(*(t+*t-1)) && *(t+1)==EXTRASYMBOL)
3,852✔
4033
                                *(t+3) = renum[*(t+3)];
2,433✔
4034
        }
4035

4036
#ifdef DEBUG_MORE
4037
        MesPrint ("*** [%s, w=%w] DONE: recycle_variables", thetime_str().c_str());
4038
#endif
4039

4040
        return newinstr;
45✔
4041
}
90✔
4042

4043
/*
4044
          #] recycle_variables : 
4045
          #[ optimize_expression_given_Horner :
4046
*/
4047

4048
/**  Optimize expression given a Horner scheme
4049
 *
4050
 *         Description
4051
 *         ===========
4052
 *         This method picks one Horner scheme from the list of best Horner
4053
 *         schemes, applies this scheme to the expression and then,
4054
 *         depending on optimize.settings, does a common subexpression
4055
 *         elimination (CSE) or performs greedy optimizations.
4056
 *
4057
 *         CSE is fast, while greedy might be slow. CSE followed by greedy
4058
 *         is faster than greedy alone, but typically results in slightly
4059
 *         worse code (not proven; just observed).
4060
 */
4061
void optimize_expression_given_Horner () {
45✔
4062

4063
#ifdef DEBUG
4064
        MesPrint ("*** [%s, w=%w] CALL: optimize_expression_given_Horner", thetime_str().c_str());
4065
#endif
4066

4067
        GETIDENTITY;
45✔
4068

4069
        // initialize timer
4070
        LONG start_time = TimeWallClock(1);
45✔
4071
        LONG time_limit = 100 * AO.Optimize.greedytimelimit / (AO.Optimize.horner == O_MCTS ? AO.Optimize.mctsnumkeep : 1);
45✔
4072
        if (time_limit == 0) time_limit=MAXPOSITIVE;
45✔
4073

4074
        // pick a Horner scheme from the list
4075
        LOCK(optimize_lock);
45✔
4076
        vector<WORD> Horner_scheme = optimize_best_Horner_schemes.back();
45✔
4077
        optimize_best_Horner_schemes.pop_back();
45✔
4078
        UNLOCK(optimize_lock);
45✔
4079

4080
//        if ( ( AO.Optimize.debugflags&2 ) == 2 ) {
4081
//                MesPrint ("Scheme: %a",Horner_scheme.size(),&(Horner_scheme[0]));
4082
//        }
4083

4084
        // apply Horner scheme
4085
        vector<WORD> tree = Horner_tree(optimize_expr, Horner_scheme);
45✔
4086

4087
        // generate instructions, eventually with CSE
4088
        vector<WORD> instr;
45✔
4089

4090
        if (AO.Optimize.method == O_CSE || AO.Optimize.method == O_CSEGREEDY)
45✔
4091
                instr = generate_instructions(tree, true);
12✔
4092
        else
4093
                instr = generate_instructions(tree, false);
78✔
4094
        /// eventually do greedy optimizations
4095
        if (AO.Optimize.method == O_CSEGREEDY || AO.Optimize.method == O_GREEDY) {
45✔
4096
                instr = merge_operators(instr, false);
78✔
4097
                instr = optimize_greedy(instr, time_limit-(TimeWallClock(1)-start_time));
78✔
4098
                instr = merge_operators(instr, true);
78✔
4099
                instr = optimize_greedy(instr, time_limit-(TimeWallClock(1)-start_time));
78✔
4100
        }
4101
        instr = merge_operators(instr, true);
90✔
4102

4103
        // recycle the temporary variables
4104
        instr = recycle_variables(instr);
90✔
4105

4106
        // determine the quality of the code and possibly update the best code
4107
        int num_oper = count_operators(instr);
45✔
4108

4109
        LOCK(optimize_lock);
45✔
4110
        if (num_oper < optimize_best_num_oper) {
45✔
4111
                optimize_num_vars = Horner_scheme.size();
27✔
4112
                optimize_best_num_oper = num_oper;
27✔
4113
                optimize_best_instr = instr;
27✔
4114
                optimize_best_vars = vector<WORD>(AN.poly_vars, AN.poly_vars+AN.poly_num_vars);
54✔
4115
        }
4116
        UNLOCK(optimize_lock);
45✔
4117

4118
  // clean poly_vars, that are allocated by Horner_tree
4119
        AN.poly_num_vars = 0;
45✔
4120
        M_free(AN.poly_vars,"poly_vars");
45✔
4121

4122
#ifdef DEBUG
4123
        MesPrint ("*** [%s, w=%w] DONE: optimize_expression_given_Horner", thetime_str().c_str());
4124
#endif
4125
}
45✔
4126

4127
/*
4128
          #] optimize_expression_given_Horner : 
4129
          #[ PF_optimize_expression_given_Horner :
4130
*/
4131
#ifdef WITHMPI
4132

4133
// Initialization.
4134
void PF_optimize_expression_given_Horner_master_init () {
4135
        // Nothing to do for now.
4136
}
4137

4138
// Wait for an idle slave and return the process number.
4139
int PF_optimize_expression_given_Horner_master_next() {
4140

4141
        // Find an idle slave.
4142
        int next;
4143
        PF_LongSingleReceive(PF_ANY_SOURCE, PF_OPT_HORNER_MSGTAG, &next, NULL);
4144
        return next;
4145

4146
}
4147

4148
// The main function on the master.
4149
void PF_optimize_expression_given_Horner_master () {
4150

4151
#ifdef DEBUG
4152
        MesPrint ("*** [%s, w=%w] CALL: PF_optimize_expression_given_Horner_master", thetime_str().c_str());
4153
#endif
4154

4155
        // pick a Horner scheme from the list
4156
        vector<WORD> Horner_scheme = optimize_best_Horner_schemes.back();
4157
        optimize_best_Horner_schemes.pop_back();
4158

4159
        // Find an idle slave.
4160
        int next = PF_optimize_expression_given_Horner_master_next();
4161

4162
        // Send a new task to the slave.
4163
        PF_PrepareLongSinglePack();
4164
        int len = Horner_scheme.size();
4165
        PF_LongSinglePack(&len, 1, PF_INT);
4166
        PF_LongSinglePack(&Horner_scheme[0], len, PF_WORD);
4167
        PF_LongSingleSend(next, PF_OPT_HORNER_MSGTAG);
4168

4169
#ifdef DEBUG
4170
        MesPrint ("*** [%s, w=%w] DONE: PF_optimize_expression_given_Horner_master", thetime_str().c_str());
4171
#endif
4172

4173
}
4174

4175
// Wait for all the slaves to finish their tasks.
4176
void PF_optimize_expression_given_Horner_master_wait () {
4177

4178
#ifdef DEBUG
4179
        MesPrint ("*** [%s, w=%w] CALL: PF_optimize_expression_given_Horner_master_wait", thetime_str().c_str());
4180
#endif
4181

4182
        // Wait for all the slaves.
4183
        for (int i = 1; i < PF.numtasks; i++) {
4184
                int next = PF_optimize_expression_given_Horner_master_next();
4185
                // Send a null task.
4186
                PF_PrepareLongSinglePack();
4187
                int len = 0;
4188
                PF_LongSinglePack(&len, 1, PF_INT);
4189
                PF_LongSingleSend(next, PF_OPT_HORNER_MSGTAG);
4190
        }
4191

4192
        // Combine the result from all the slaves.
4193
        optimize_best_num_oper = INT_MAX;
4194
        for (int i = 1; i < PF.numtasks; i++) {
4195
                PF_LongSingleReceive(PF_ANY_SOURCE, PF_OPT_COLLECT_MSGTAG, NULL, NULL);
4196

4197
                int len;
4198

4199
                // The first integer is the number of operations.
4200
                PF_LongSingleUnpack(&len, 1, PF_INT);
4201

4202
                if (len >= optimize_best_num_oper) continue;
4203

4204
                // Update the best result.
4205
                optimize_best_num_oper = len;
4206
                PF_LongSingleUnpack(&len, 1, PF_INT);
4207
                optimize_best_instr.resize(len);
4208
                PF_LongSingleUnpack(&optimize_best_instr[0], len, PF_WORD);
4209
                PF_LongSingleUnpack(&len, 1, PF_INT);
4210
                optimize_best_vars.resize(len);
4211
                PF_LongSingleUnpack(&optimize_best_vars[0], len, PF_WORD);
4212

4213
                optimize_num_vars = optimize_best_vars.size();        // TODO
4214
        }
4215

4216
#ifdef DEBUG
4217
        MesPrint ("*** [%s, w=%w] DONE: PF_optimize_expression_given_Horner_master_wait", thetime_str().c_str());
4218
#endif
4219

4220
}
4221

4222
// The main function on the slaves.
4223
void PF_optimize_expression_given_Horner_slave () {
4224

4225
#ifdef DEBUG
4226
        MesPrint ("*** [%s, w=%w] CALL: PF_optimize_expression_given_Horner_slave", thetime_str().c_str());
4227
#endif
4228

4229
        optimize_best_Horner_schemes.clear();
4230
        optimize_best_num_oper = INT_MAX;
4231

4232
        int dummy = 0;
4233
        int len;
4234

4235
        for (;;) {
4236
                // Ask the master for the next task.
4237
                PF_PrepareLongSinglePack();
4238
                PF_LongSinglePack(&dummy, 1, PF_INT);
4239
                PF_LongSingleSend(MASTER, PF_OPT_HORNER_MSGTAG);
4240

4241
                // Get a task from the master.
4242
                PF_LongSingleReceive(MASTER, PF_OPT_HORNER_MSGTAG, NULL, NULL);
4243

4244
                // Length of the task.
4245
                PF_LongSingleUnpack(&len, 1, PF_INT);
4246

4247
                // No task remains.
4248
                if (len == 0) break;
4249

4250
                // Perform the given task.
4251
                optimize_best_Horner_schemes.push_back(vector<WORD>());
4252
                vector<WORD> &Horner_scheme = optimize_best_Horner_schemes.back();
4253
                Horner_scheme.resize(len);
4254
                PF_LongSingleUnpack(&Horner_scheme[0], len, PF_WORD);
4255
                optimize_expression_given_Horner ();
4256
        }
4257

4258
        // Send the result to the master.
4259
        PF_PrepareLongSinglePack();
4260
        PF_LongSinglePack(&optimize_best_num_oper, 1, PF_INT);
4261
        if (optimize_best_num_oper != INT_MAX) {
4262
                len = optimize_best_instr.size();
4263
                PF_LongSinglePack(&len, 1, PF_INT);
4264
                PF_LongSinglePack(&optimize_best_instr[0], len, PF_WORD);
4265
                len = optimize_best_vars.size();
4266
                PF_LongSinglePack(&len, 1, PF_INT);
4267
                PF_LongSinglePack(&optimize_best_vars[0], len, PF_WORD);
4268
        }
4269
        PF_LongSingleSend(MASTER, PF_OPT_COLLECT_MSGTAG);
4270

4271
#ifdef DEBUG
4272
        MesPrint ("*** [%s, w=%w] DONE: PF_optimize_expression_given_Horner_slave", thetime_str().c_str());
4273
#endif
4274

4275
}
4276

4277
#endif
4278
/*
4279
          #] PF_optimize_expression_given_Horner : 
4280
          #[ generate_output :
4281
*/
4282

4283
/**  Generate output
4284
 *
4285
 *         Description
4286
 *         ===========
4287
 *         This method prepares the instructions for printing. They are
4288
 *         stored in Form format, so that they can be printed by
4289
 *         "PrintExtraSymbol". The results are stored in the buffer
4290
 *         AO.OptimizeResult.
4291
 */
4292
VOID generate_output (const vector<WORD> &instr, int exprnr, int extraoffset, const vector<vector<WORD> > &brackets) {
27✔
4293

4294
#ifdef DEBUG
4295
        MesPrint ("*** [%s, w=%w] CALL: generate_output", thetime_str().c_str());
4296
#endif
4297

4298
        GETIDENTITY;
27✔
4299
        vector<WORD> output;
27✔
4300

4301
        // one-indexed instead of zero-indexed
4302
        extraoffset++;
27✔
4303
        int num = 0;
27✔
4304
        int maxsize = (int)instr.size();
27✔
4305
        for (int i=0; i<maxsize; i+=instr[i+2]) num++;
969✔
4306
        int *tstart = (int *)Malloc1(num*sizeof(int),"nplaces");
27✔
4307
        num = 0;
4308
        for (int i=0; i<maxsize; i+=instr[i+2]) tstart[num++] = i;
969✔
4309
        for (int j=0; j<num; j++) {
969✔
4310
                int i = tstart[j];
942✔
4311

4312
                // copy arguments
4313
                WCOPY(AT.WorkPointer, &instr[i+3], (instr[i+2]-3));
17,316✔
4314

4315
                // update maximal variable number
4316
                AO.OptimizeResult.maxvar = MaX(AO.OptimizeResult.maxvar, instr[i]+extraoffset);
942✔
4317

4318
                // renumber symbols and extrasymbols to correct values
4319
                for (WORD *t=AT.WorkPointer; *t!=0; t+=*t) {
2,880✔
4320
                        if (*t == ABS(*(t+*t-1))+1) continue;
1,938✔
4321
                        if (*(t+1)==SYMBOL) *(t+3) = optimize_best_vars[*(t+3)];
1,920✔
4322
                        if (*(t+1)==EXTRASYMBOL) {
1,920✔
4323
                                *(t+1) = SYMBOL;
1,218✔
4324
                                *(t+3) = MAXVARIABLES-*(t+3)-extraoffset;
1,218✔
4325
                        }
4326
                }
4327

4328
                // reformat multiplication instructions, since their current
4329
                // format is "expr.nr OPER_MUL length arguments", which differs
4330
                // from Form's format for a product of symbols
4331
                if (instr[i+1]==OPER_MUL) {
942✔
4332

4333
                        WORD *now=AT.WorkPointer+1;
387✔
4334
                        int dt;
387✔
4335
                        bool coeff=false;
387✔
4336
                        int coeff_sign=1;
387✔
4337

4338
                        for (WORD *t=AT.WorkPointer; *t!=0; t+=dt) {
1,134✔
4339
                                dt = *t;
747✔
4340
                                if (*t == ABS(*(t+*t-1))+1) {
747✔
4341
                                        // copy coefficient
4342
                                        memmove(AT.WorkPointer+instr[i+2], t, *t*sizeof(WORD));
×
4343
                                        coeff = true;
×
4344
                                }
4345
                                else {
4346
                                        // move symbol
4347
                                        int n = *(t+2);
747✔
4348
                                        memmove(now, t+1, n*sizeof(WORD));
747✔
4349
                                        now += n;
747✔
4350
                                        coeff_sign *= SGN(*(t+dt-1));
747✔
4351
                                }
4352
                        }
4353

4354
                        if (coeff) {
387✔
4355
                                // add existing coefficient
4356
                                int n = *(AT.WorkPointer + instr[i+2]) - 1;
×
4357
                                memmove(now, AT.WorkPointer+instr[i+2]+1, n*sizeof(WORD));
×
4358
                                now += n;
×
4359
                        }
4360
                        else {
4361
                                // add coefficient of one
4362
                                *now++=1;
387✔
4363
                                *now++=1;
387✔
4364
                                *now++=3;
387✔
4365
                        }
4366

4367
                        *(now-1) *= coeff_sign;
387✔
4368

4369
                        *AT.WorkPointer = now - AT.WorkPointer;
387✔
4370
                        *now++ = 0;
387✔
4371
                }
4372

4373
                // in the case of simultaneous optimization of expressions, add the
4374
                // brackets to the final expression
4375
                if (instr[i+1]==OPER_COMMA) {
942✔
4376
                        WORD *start = AT.WorkPointer + instr[i+2];
9✔
4377
                        WORD *now = start;
9✔
4378
                        int b=0;
9✔
4379
                        for (const WORD *t=AT.WorkPointer; *t!=0; t+=*t) {
18✔
4380
                                if ( ( brackets[b].size() != 0 ) && ( brackets[b][0] == 0 ) ) break;
18✔
4381
                                *now++ = *t + brackets[b].size();
9✔
4382
                                if (brackets[b].size() != 0) {
9✔
4383
                                        memcpy(now, &brackets[b][0], brackets[b].size()*sizeof(WORD));
6✔
4384
                                        now += brackets[b].size();
6✔
4385
                                }
4386
                                memcpy(now, t+1, (*t-1)*sizeof(WORD));
9✔
4387
                                now += *t-1;
9✔
4388
                                b++;
9✔
4389
                        }
4390
                        *now++ = 0;
9✔
4391
                        memmove(AT.WorkPointer, start, (now-start)*sizeof(WORD));
9✔
4392
                }
4393

4394
                // add the number of the extra symbol; if it is the last one,
4395
                // replace the extra symbol number with the expression number
4396
                if (i+instr[i+2]<(int)instr.size())
942✔
4397
                        output.push_back(instr[i]+extraoffset);
915✔
4398
                else {
4399
                        output.push_back(-(exprnr+1));
27✔
4400
                }
4401

4402
                // add code for this symbol
4403
                int n=0;
942✔
4404
                while (*(AT.WorkPointer+n)!=0)
2,511✔
4405
                        n += *(AT.WorkPointer+n);
1,569✔
4406
                n++;
942✔
4407
                output.insert(output.end(), AT.WorkPointer, AT.WorkPointer+n);
942✔
4408
        }
4409

4410
        // trailing zero
4411
        output.push_back(0);
27✔
4412

4413
        // clear buffer
4414
        if (AO.OptimizeResult.code != NULL)
27✔
4415
                M_free(AO.OptimizeResult.code, "optimize output");
3✔
4416

4417
        M_free(tstart,"nplaces");
27✔
4418

4419
        // copy buffer
4420
        AO.OptimizeResult.codesize = output.size();
27✔
4421
        AO.OptimizeResult.code = (WORD *)Malloc1(output.size()*sizeof(WORD), "optimize output");
27✔
4422
        memcpy(AO.OptimizeResult.code, &output[0], output.size()*sizeof(WORD));
27✔
4423

4424
#ifdef DEBUG
4425
        MesPrint ("*** [%s, w=%w] DONE: generate_output", thetime_str().c_str());
4426
#endif
4427
}
27✔
4428

4429
/*
4430
          #] generate_output : 
4431
          #[ generate_expression :
4432
*/
4433

4434
/**  Generate expression
4435
 *
4436
 *         Description
4437
 *         ===========
4438
 *         This method modifies the original optimized expression by an
4439
 *         expression with extra symbols. This is used for "#Optimize".
4440
 */
4441
WORD generate_expression (WORD exprnr) {
18✔
4442

4443
#ifdef DEBUG
4444
        MesPrint ("*** [%s, w=%w] CALL: generate_expression", thetime_str().c_str());
4445
#endif
4446

4447
        GETIDENTITY;
18✔
4448

4449
        WORD *oldWorkPointer = AT.WorkPointer;
18✔
4450

4451
        CBUF *C = cbuf+AC.cbufnum;
18✔
4452
        WORD *term = AT.WorkPointer, oldcurexpr = AR.CurExpr;
18✔
4453
        POSITION position;
18✔
4454
        EXPRESSIONS e = Expressions+exprnr;
18✔
4455
        SetScratch(AR.infile,&(e->onfile));
18✔
4456

4457
        if ( GetTerm(BHEAD term) <= 0 ) {
18✔
4458
                MesPrint("Expression %d has problems in scratchfile",exprnr);
×
4459
                Terminate(-1);
×
4460
        }
4461
        SeekScratch(AR.outfile,&position);
18✔
4462
        e->onfile = position;
18✔
4463
        if ( PutOut(BHEAD term,&position,AR.outfile,0) < 0 ) {
18✔
4464
                MesPrint("Expression %d has problems in output scratchfile",exprnr);
×
4465
                Terminate(-1);
×
4466
        }
4467

4468
        AR.CurExpr = exprnr;
18✔
4469
        NewSort(BHEAD0);
18✔
4470

4471
        // scan for the original expression (marked by *t<0) and give the
4472
        // terms to Generator
4473
        WORD *t = AO.OptimizeResult.code;
18✔
4474
        {
18✔
4475
                WORD old = AR.Cnumlhs; AR.Cnumlhs = 0;
18✔
4476
                // We can use the remaining part of the WorkSpace for every term that
4477
                // goes through Generator. We have WorkSpace problems here if we use
4478
                // the (modified) AT.WorkPointer every time in the loop.
4479
                WORD* currentWorkPointer = AT.WorkPointer;
18✔
4480
                while (*t!=0) {
891✔
4481
                        bool is_expr = *t < 0;
873✔
4482
                        t++;
873✔
4483
                        while (*t!=0) {
2,322✔
4484
                                if (is_expr) {
1,449✔
4485
                                        memcpy(currentWorkPointer, t, *t*sizeof(WORD));
93✔
4486
                                        Generator(BHEAD currentWorkPointer, C->numlhs);
93✔
4487
                                }
4488
                                t+=*t;
1,449✔
4489
                        }
4490
                        t++;
873✔
4491
                }
4492
                AR.Cnumlhs = old;
18✔
4493
        }
4494

4495
        // final sorting
4496
        if (EndSort(BHEAD NULL,0) < 0) {
18✔
4497
                LowerSortLevel();
×
4498
                Terminate(-1);
×
4499
        }
4500

4501
        AT.WorkPointer = oldWorkPointer;
18✔
4502
        AR.CurExpr = oldcurexpr;
18✔
4503

4504
#ifdef DEBUG
4505
        MesPrint ("*** [%s, w=%w] DONE: generate_expression", thetime_str().c_str());
4506
#endif
4507

4508
        return 0;
18✔
4509
}
4510

4511
/*
4512
          #] generate_expression : 
4513
          #[ optimize_print_code :
4514
*/
4515

4516
/**  Print optimized code
4517
 *
4518
 *         Description
4519
 *         ===========
4520
 *         This method prints the optimized code via
4521
 *         "PrintExtraSymbol". Depending on the flag, the original
4522
 *         expression is printed (for "Print") or not (for "#Optimize /
4523
 *         #write "%O").
4524
 */
4525
VOID optimize_print_code (int print_expr) {
24✔
4526

4527
#ifdef DEBUG
4528
        MesPrint ("*** [%s, w=%w] CALL: optimize_print_code", thetime_str().c_str());
4529
#endif
4530
        if ( ( AO.Optimize.debugflags & 1 ) != 0 ) {
24✔
4531
/*
4532
 *                The next code is for debugging purposes. We may want the statements
4533
 *                in reverse order to substitute them all back.
4534
 *                Jan used a Mathematica program to do this. Here we make that
4535
 *                        Format Ox,debugflag=1;
4536
 *                Creates reverse order during printing.
4537
 *                All we have to do is put id in front of the statements. This is done
4538
 *                in PrintExtraSymbol.
4539
 */
4540
                WORD *t = AO.OptimizeResult.code;
×
4541
                WORD num = 0;        // First we count the number of objects.
×
4542
                while (*t!=0) {
×
4543
                        num++;
×
4544
                        t++; while (*t!=0) t+=*t; t++;
×
4545
                }
4546
                WORD **tstart = (WORD **)Malloc1(num*sizeof(WORD *),"print objects");
×
4547
                t = AO.OptimizeResult.code; num = 0; // Now we get the addresses
×
4548
                while (*t!=0) {
×
4549
                        tstart[num++] = t;
×
4550
                        t++; while (*t!=0) t+=*t; t++;
×
4551
                }
4552
                // Flip the addresses
4553
                int halfnum = num/2;
×
4554
                for (int i=0; i<halfnum; i++) { swap(tstart[i],tstart[num-1-i]); }
×
4555
                for ( int i = 0; i < num; i++ ) {
×
4556
                        t = tstart[i];
×
4557
                        if (*t > 0)
×
4558
                                PrintExtraSymbol(*t, t+1, EXTRASYMBOL);
×
4559
                        else if (print_expr)
×
4560
                                PrintExtraSymbol(-*t-1, t+1, EXPRESSIONNUMBER);
×
4561
                }
4562
                CBUF *C = cbuf + AM.sbufnum;
×
4563
                if (C->numrhs >= AO.OptimizeResult.minvar)
×
4564
                        PrintSubtermList(AO.OptimizeResult.minvar, C->numrhs);
×
4565
        }
4566
        else {
4567
                // print extra symbols from ConvertToPoly in optimization
4568
                CBUF *C = cbuf + AM.sbufnum;
24✔
4569
                if (C->numrhs >= AO.OptimizeResult.minvar)
24✔
4570
                        PrintSubtermList(AO.OptimizeResult.minvar, C->numrhs);
12✔
4571

4572
                WORD *t = AO.OptimizeResult.code;
24✔
4573
                while (*t!=0) {
168✔
4574
                        if (*t > 0) {
144✔
4575
                                PrintExtraSymbol(*t, t+1, EXTRASYMBOL);
120✔
4576
                        }
4577
                        else if (print_expr)
24✔
4578
                                PrintExtraSymbol(-*t-1, t+1, EXPRESSIONNUMBER);
18✔
4579
                        t++;
144✔
4580
                        while (*t!=0) t+=*t;
390✔
4581
                        t++;
144✔
4582
                }
4583
        }
4584

4585
#ifdef DEBUG
4586
        MesPrint ("*** [%s, w=%w] DONE: optimize_print_code", thetime_str().c_str());
4587
#endif
4588
}
24✔
4589

4590
/*
4591
          #] optimize_print_code : 
4592
          #[ Optimize :
4593
*/
4594

4595
/**  Optimization of expression
4596
 *
4597
 *         Description
4598
 *         ===========
4599
 *         This method takes an input expression and generates optimized
4600
 *         code to calculate its value. The following methods are called to
4601
 *         do so:
4602
 *
4603
 *         (1) get_expression : to read to expression
4604
 *
4605
 *         (2) get_brackets : find brackets for simultaneous optimization
4606
 *
4607
 *         (3) occurrence_order or find_Horner_MCTS : to determine (the)
4608
 *         Horner scheme(s) to use; this depends on AO.optimize.horner
4609
 *
4610
 *         (4) optimize_expression_given_Horner : to do the optimizations
4611
 *         for each Horner scheme; this method does either CSE or greedy
4612
 *         optimizations dependings on AO.optimize.method
4613
 *
4614
 *         (5) generate_output : to format the output in Form notation and
4615
 *         store it in a buffer
4616
 *
4617
 *         (6a) optimize_print_code : to print the expression (for "Print")
4618
 *         or
4619
 *         (6b) generate_expression : to modify the expression (for
4620
 *         "#Optimize")
4621
 *
4622
 *         On ParFORM, all the processes must call this function at the same
4623
 *         time. Then
4624
 *
4625
 *         (1) Because only the master can access to the expression to be
4626
 *         optimized, the master broadcast the expression to all the slaves
4627
 *         after reading the expression (PF_get_expression).
4628
 *
4629
 *         (2) get_brackets reads optimize_expr as the input and it works
4630
 *         also on the slaves. We leave it although the bracket information
4631
 *         is not needed on the slaves (used in (5) on the master).
4632
 *
4633
 *         (3) and (4) find_Horner_MCTS and optimize_expression_given_Horner
4634
 *         are parallelized.
4635
 *
4636
 *         (5), (6a) and (6b) are needed only on the master.
4637
 */
4638
int Optimize (WORD exprnr, int do_print) {
36✔
4639

4640
#if defined(WITHMPI) && (defined(DEBUG) || defined(DEBUG_MORE) || defined(DEBUG_MCTS) || defined(DEBUG_GREEDY))
4641
        // set AS.printflag negative temporary.
4642
        struct save_printflag {
4643
                save_printflag() {
4644
                        oldprintflag = AS.printflag;
4645
                        AS.printflag = -1;
4646
                }
4647
                ~save_printflag() {
4648
                        AS.printflag = oldprintflag;
4649
                }
4650
                int oldprintflag;
4651
        } save_printflag_;
4652
#endif
4653

4654
#ifdef DEBUG
4655
        MesPrint ("*** [%s, w=%w] CALL: Optimize", thetime_str().c_str());
4656
        MesPrint ("*** %"); PrintRunningTime();
4657
#endif
4658

4659
#ifdef WITHPTHREADS
4660
        optimize_lock = dummylock;
24✔
4661
#endif
4662

4663
        AO.OptimizeResult.minvar = (cbuf + AM.sbufnum)->numrhs + 1;
36✔
4664

4665
        if (get_expression(exprnr) < 0) return -1;
36✔
4666
        vector<vector<WORD> > brackets = get_brackets();
36✔
4667

4668
#ifdef DEBUG
4669
#ifdef WITHMPI
4670
                if (PF.me == MASTER)
4671
#endif
4672
                MesPrint ("*** runtime after preparing the expression: %"); PrintRunningTime();
4673
#endif
4674

4675
        if (optimize_expr[0]==0 ||
36✔
4676
                        (optimize_expr[optimize_expr[0]]==0 && optimize_expr[0]==ABS(optimize_expr[optimize_expr[0]-1])+1) ||
36✔
4677
                        (optimize_expr[optimize_expr[0]]==0 && optimize_expr[0]==8 &&
6✔
4678
                         optimize_expr[5]==1 && optimize_expr[6]==1 && ABS(optimize_expr[7])==3)) {
3✔
4679
                if (AO.OptimizeResult.code != NULL)
9✔
4680
                        M_free(AO.OptimizeResult.code, "optimize output");
9✔
4681

4682
                // zero terms or one trivial term (number or +/-variable), so no
4683
                // optimization, so copy expression; special case because without
4684
                // operators the optimization crashes
4685
                AO.OptimizeResult.code = (WORD *)Malloc1((optimize_expr[0]+3)*sizeof(WORD), "optimize output");
9✔
4686
                AO.OptimizeResult.code[0] = -(exprnr+1);
9✔
4687
                memcpy(AO.OptimizeResult.code+1, optimize_expr, (optimize_expr[0]+1)*sizeof(WORD));
9✔
4688
                AO.OptimizeResult.code[optimize_expr[0]+2] = 0;
9✔
4689
        }
4690
        else {
4691
                // find Horner scheme(s)
4692
                optimize_best_Horner_schemes.clear();
27✔
4693
                if (AO.Optimize.horner == O_OCCURRENCE) {
27✔
4694
                        if (AO.Optimize.hornerdirection != O_BACKWARD)
15✔
4695
                                optimize_best_Horner_schemes.push_back(occurrence_order(optimize_expr, false));
30✔
4696
                        if (AO.Optimize.hornerdirection != O_FORWARD)
15✔
4697
                                optimize_best_Horner_schemes.push_back(occurrence_order(optimize_expr, true));
30✔
4698
                }
4699
                else {
4700
                        if (AO.Optimize.horner == O_SIMULATED_ANNEALING) {
12✔
4701
                                optimize_best_Horner_schemes.push_back(simulated_annealing());
×
4702
                        }
4703
                        else {
4704
                                mcts_best_schemes.clear();
12✔
4705
                                if ( AO.inscheme ) {
12✔
4706
                                        optimize_best_Horner_schemes.push_back(vector<WORD>(AO.schemenum));
×
4707
                                        for ( int i=0; i < AO.schemenum; i++ )
×
4708
                                                optimize_best_Horner_schemes[0][i] = AO.inscheme[i];
×
4709
                                }
4710
                                else {
4711
                                        for ( int i = 0; i < AO.Optimize.mctsnumrepeat; i++ )
24✔
4712
                                                find_Horner_MCTS();
12✔
4713
                                        // generate results
4714
                                        for (set<pair<int,vector<WORD> > >::iterator i=mcts_best_schemes.begin(); i!=mcts_best_schemes.end(); i++) {
27✔
4715
                                                optimize_best_Horner_schemes.push_back(i->second);
15✔
4716
#ifdef DEBUG_MCTS
4717
                                                MesPrint ("{%a} -> %d",i->second.size(), &i->second[0], i->first);
4718
#endif
4719
                                        }
4720
                                }
4721
                                // clear the tree by making a new empty one.
4722
                                mcts_root = tree_node();
24✔
4723
                        }
4724
                }
4725
#ifdef DEBUG
4726
#ifdef WITHMPI
4727
                if (PF.me == MASTER)
4728
#endif
4729
                MesPrint ("*** runtime after Horner: %"); PrintRunningTime();
4730
#endif
4731

4732
#ifdef WITHMPI
4733
                if (PF.me == MASTER ) {
4734
                        PF_optimize_expression_given_Horner_master_init();
4735
#endif
4736

4737
                // find best Horner scheme and results
4738
                optimize_best_num_oper = INT_MAX;
27✔
4739

4740
                int imax = (int)optimize_best_Horner_schemes.size();
27✔
4741

4742
                for (int i=0; i<imax; i++) {
72✔
4743
#if defined(WITHPTHREADS)
4744
                        if (AM.totalnumberofthreads > 1)
30✔
4745
                                optimize_expression_given_Horner_threaded();
30✔
4746
                        else
4747
#elif defined(WITHMPI)
4748
                        if (PF.numtasks > 1)
4749
                                PF_optimize_expression_given_Horner_master();
4750
                        else
4751
#endif
4752
                                optimize_expression_given_Horner();
15✔
4753
                }
4754

4755
#ifdef WITHMPI
4756
                        PF_optimize_expression_given_Horner_master_wait();
4757
                }
4758
                else {
4759
                        if (PF.numtasks > 1)
4760
                                PF_optimize_expression_given_Horner_slave();
4761
                }
4762
#endif
4763

4764
#ifdef WITHPTHREADS
4765
                MasterWaitAll();
18✔
4766
#endif
4767
                // format results, then print it (for "Print") or modify
4768
                // expression (for "#Optimize")
4769
#ifdef WITHMPI
4770
                if (PF.me == MASTER)
4771
#endif
4772
                generate_output(optimize_best_instr, exprnr, cbuf[AM.sbufnum].numrhs, brackets);
27✔
4773
#ifdef WITHMPI
4774
                else {
4775
                        // non-null dummy code for slaves
4776
                        if (AO.OptimizeResult.code == NULL) {
4777
                                AO.OptimizeResult.code = (WORD *)Malloc1(sizeof(WORD), "optimize output");
4778
                        }
4779
                }
4780
#endif
4781
        }
4782

4783
#ifdef WITHMPI
4784
        if (PF.me == MASTER) {
4785
                PF_PreparePack();
4786
                PF_Pack(&AO.OptimizeResult.minvar, 1, PF_WORD);
4787
                PF_Pack(&AO.OptimizeResult.maxvar, 1, PF_WORD);
4788
        }
4789
        PF_Broadcast();
4790
        if (PF.me != MASTER) {
4791
                PF_Unpack(&AO.OptimizeResult.minvar, 1, PF_WORD);
4792
                PF_Unpack(&AO.OptimizeResult.maxvar, 1, PF_WORD);
4793
        }
4794
#endif
4795

4796
        // set preprocessor variables
4797
        char str[100];
36✔
4798
        snprintf (str,100,"%d",AO.OptimizeResult.minvar);
36✔
4799
        PutPreVar((UBYTE *)"optimminvar_",(UBYTE *)str,0,1);
36✔
4800
        snprintf (str,100,"%d",AO.OptimizeResult.maxvar);
36✔
4801
        PutPreVar((UBYTE *)"optimmaxvar_",(UBYTE *)str,0,1);
36✔
4802

4803
        if (do_print) {
36✔
4804
#ifdef WITHMPI
4805
                if (PF.me == MASTER)
4806
#endif
4807
                optimize_print_code(1);
18✔
4808
                ClearOptimize();
18✔
4809
        }
4810
        else {
4811
#ifdef WITHMPI
4812
                if (PF.me == MASTER)
4813
#endif
4814
                generate_expression(exprnr);
18✔
4815
        }
4816

4817
#ifdef WITHMPI
4818
        if (PF.me == MASTER) {
4819
#endif
4820

4821
        if ( AO.Optimize.printstats > 0 ) {
36✔
4822
                char str[20];
×
4823
                MesPrint("");
×
4824
                count_operators(optimize_expr,true);
×
4825
                int numop = count_operators(optimize_best_instr,true);
×
4826
                snprintf(str,20,"%d",numop);
×
4827
                PutPreVar((UBYTE *)"optimvalue_",(UBYTE *)str,0,1);
×
4828
        }
4829
        else {
4830
                char str[20];
36✔
4831
                int numop = count_operators(optimize_best_instr,false);
36✔
4832
                snprintf(str,20,"%d",numop);
36✔
4833
                PutPreVar((UBYTE *)"optimvalue_",(UBYTE *)str,0,1);
36✔
4834
        }
4835

4836
        if ( ( AO.Optimize.schemeflags&1 ) == 1 ) {
36✔
4837
                GETIDENTITY
4838
                UBYTE *OutScr, *Out, *old1 = AO.OutputLine, *old2 = AO.OutFill;
×
4839
                int i, sym;
×
4840
                AO.OutputLine = AO.OutFill = (UBYTE *)AT.WorkPointer;
×
4841
                FiniLine();
×
4842
                OutScr = (UBYTE *)AT.WorkPointer + ( TOLONG(AT.WorkTop) - TOLONG(AT.WorkPointer) ) /2;
×
4843
                TokenToLine((UBYTE *)" Scheme selected: ");
×
4844
                for ( i = 0; i < optimize_num_vars; i++ ) {
×
4845
                        Out = OutScr;
×
4846
                        sym = optimize_best_vars[i];
×
4847
                        if ( i > 0 ) TokenToLine((UBYTE *)",");
×
4848
                        if ( sym < NumSymbols ) {
×
4849
                                StrCopy(FindSymbol(sym),OutScr);
×
4850
/*                                StrCopy(VARNAME(symbols,sym),OutScr); */
4851
                        }
4852
                        else {
4853
                                Out = StrCopy((UBYTE *)AC.extrasym,Out);
×
4854
                                if ( AC.extrasymbols == 0 ) {
×
4855
                                        Out = NumCopy((MAXVARIABLES-sym),Out);
×
4856
                                        Out = StrCopy((UBYTE *)"_",Out);
×
4857
                                }
4858
                                else if ( AC.extrasymbols == 1 ) {
×
4859
                                        Out = AddArrayIndex((MAXVARIABLES-sym),Out);
×
4860
                                }
4861
                        }
4862
                        TokenToLine(OutScr);
×
4863
                }
4864
                TokenToLine((UBYTE *)";");
×
4865
                FiniLine();
×
4866
                AO.OutFill = old2;
×
4867
                AO.OutputLine = old1;
×
4868
        }
4869

4870
        {
36✔
4871
                GETIDENTITY
24✔
4872
                UBYTE *OutScr, *Out, *outstring = 0;
36✔
4873
                int i, sym;
36✔
4874
                AO.OutputLine = AO.OutFill = (UBYTE *)AT.WorkPointer;
36✔
4875
                OutScr = (UBYTE *)AT.WorkPointer + ( TOLONG(AT.WorkTop) - TOLONG(AT.WorkPointer) ) /2;
36✔
4876
                for ( i = 0; i < optimize_num_vars; i++ ) {
228✔
4877
                        Out = OutScr;
192✔
4878
                        sym = optimize_best_vars[i];
192✔
4879
                        if ( sym < NumSymbols ) {
192✔
4880
                                StrCopy(FindSymbol(sym),OutScr);
171✔
4881
/*                                StrCopy(VARNAME(symbols,sym),OutScr); */
4882
                        }
4883
                        else {
4884
                                Out = StrCopy((UBYTE *)AC.extrasym,Out);
21✔
4885
                                if ( AC.extrasymbols == 0 ) {
21✔
4886
                                        Out = NumCopy((MAXVARIABLES-sym),Out);
12✔
4887
                                        Out = StrCopy((UBYTE *)"_",Out);
12✔
4888
                                }
4889
                                else if ( AC.extrasymbols == 1 ) {
9✔
4890
                                        Out = AddArrayIndex((MAXVARIABLES-sym),Out);
9✔
4891
                                }
4892
                        }
4893
                        outstring = AddToString(outstring,OutScr,1);
192✔
4894
                }
4895
                if ( outstring == 0 ) {
36✔
4896
                        PutPreVar((UBYTE *)"optimscheme_",(UBYTE *)"",0,1);
×
4897
                }
4898
                else {
4899
                        PutPreVar((UBYTE *)"optimscheme_",(UBYTE *)outstring,0,1);
36✔
4900
                        M_free(outstring,"AddToString");
36✔
4901
                }
4902
        }
4903

4904
#ifdef WITHMPI
4905
        }
4906

4907
        // synchronize optimvalue_ and optimscheme_
4908
        if ( PF.me == MASTER ) {
4909
                UBYTE *value;
4910
                int bytes;
4911

4912
                PF_PrepareLongMultiPack();
4913

4914
                value = GetPreVar((UBYTE *)"optimvalue_", WITHERROR);
4915
                bytes = strlen((char *)value);
4916
                PF_LongMultiPack(&bytes, 1, PF_INT);
4917
                PF_LongMultiPack(value, bytes, PF_BYTE);
4918

4919
                value = GetPreVar((UBYTE *)"optimscheme_", WITHERROR);
4920
                bytes = strlen((char *)value);
4921
                PF_LongMultiPack(&bytes, 1, PF_INT);
4922
                PF_LongMultiPack(value, bytes, PF_BYTE);
4923
        }
4924
        PF_LongMultiBroadcast();
4925
        if ( PF.me != MASTER ) {
4926
                static vector<UBYTE> prevarbuf;
4927
                UBYTE *value;
4928
                int bytes;
4929

4930
                PF_LongMultiUnpack(&bytes, 1, PF_INT);
4931
                prevarbuf.reserve(bytes + 1);
4932
                value = &prevarbuf[0];
4933
                PF_LongMultiUnpack(value, bytes, PF_BYTE);
4934
                value[bytes] = '\0';  // null terminator
4935
                PutPreVar((UBYTE *)"optimvalue_", value, NULL, 1);
4936

4937
                PF_LongMultiUnpack(&bytes, 1, PF_INT);
4938
                prevarbuf.reserve(bytes + 1);
4939
                value = &prevarbuf[0];
4940
                PF_LongMultiUnpack(value, bytes, PF_BYTE);
4941
                value[bytes] = '\0';  // null terminator
4942
                PutPreVar((UBYTE *)"optimscheme_", value, NULL, 1);
4943
        }
4944
#endif
4945

4946
        // cleanup
4947
        M_free(optimize_expr,"LoadOptim");
36✔
4948

4949
#ifdef DEBUG
4950
        MesPrint ("*** [%s, w=%w] DONE: Optimize", thetime_str().c_str());
4951
#endif
4952

4953
        return 0;
36✔
4954
}
36✔
4955

4956
/*
4957
          #] Optimize : 
4958
          #[ ClearOptimize :
4959
*/
4960

4961
/**  Optimization of expression
4962
 *
4963
 *         Description
4964
 *         ===========
4965
 *         Clears the buffers that were used for optimization output.
4966
 *         Clears the expression from the buffers (marks it to be dropped).
4967
 *         Note: we need to use the expression by its name, because numbers
4968
 *         may change if we drop other expressions between when we do the
4969
 *         optimizations and clear the results (in execute.c). Also this is
4970
 *         not 100% safe, because we could overwrite the optimized
4971
 *         expression. But that can be done only in a Local or Global
4972
 *         statement and hence we only have to test there that we might have
4973
 *         to call ClearOptimize first. (in file comexpr.c)
4974
 */
4975
int ClearOptimize()
54✔
4976
{
4977
        char str[20];
54✔
4978
        WORD numexpr, *w;
54✔
4979
        int error = 0;
54✔
4980
        if ( AO.OptimizeResult.code != NULL ) {
54✔
4981
                M_free(AO.OptimizeResult.code, "optimize output");
21✔
4982
                AO.OptimizeResult.code = NULL;
21✔
4983
                AO.OptimizeResult.codesize = 0;
21✔
4984
                PutPreVar((UBYTE *)"optimminvar_",(UBYTE *)("0"),0,1);
21✔
4985
                PutPreVar((UBYTE *)"optimmaxvar_",(UBYTE *)("0"),0,1);
21✔
4986
                PruneExtraSymbols(AO.OptimizeResult.minvar-1);
21✔
4987
                cbuf[AM.sbufnum].numrhs = AO.OptimizeResult.minvar-1;
21✔
4988
                AO.OptimizeResult.minvar = AO.OptimizeResult.maxvar = 0;
21✔
4989
        }
4990
        if ( AO.OptimizeResult.nameofexpr != NULL ) {
54✔
4991
/*
4992
                We have to pick up the expression by its name. Numbers may change.
4993
                Note that this requires that when we overwrite an expression, we
4994
                check that it is not an optimized expression. See execute.c and
4995
                comexpr.c
4996
*/
4997
                if ( GetName(AC.exprnames,AO.OptimizeResult.nameofexpr,&numexpr,NOAUTO) != CEXPRESSION ) {
15✔
4998
                        MesPrint("@Internal error while clearing optimized expression %s ",AO.OptimizeResult.nameofexpr);
×
4999
                        Terminate(-1);
×
5000
                }
5001
                M_free(AO.OptimizeResult.nameofexpr, "optimize expression name");
15✔
5002
                AO.OptimizeResult.nameofexpr = NULL;
15✔
5003
                w = &(Expressions[numexpr].status);
15✔
5004
                *w = SetExprCases(DROP,1,*w);
15✔
5005
                if ( *w < 0 ) error = 1;
15✔
5006
        }
5007
        snprintf (str,20,"%d",cbuf[AM.sbufnum].numrhs);
54✔
5008
        PutPreVar(AM.oldnumextrasymbols,(UBYTE *)str,0,1);
54✔
5009
        PutPreVar((UBYTE *)"optimvalue_",(UBYTE *)("0"),0,1);
54✔
5010
        PutPreVar((UBYTE *)"optimscheme_",(UBYTE *)("0"),0,1);
54✔
5011
        return(error);
54✔
5012
}
5013

5014
/*
5015
          #] ClearOptimize : 
5016
*/
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc