#include "pch.h" #define XOR_RAND(state, result_v...
تم الإنشاء في: ٢٩ أكتوبر ٢٠٢٥
تمت الإجابة باستخدام GPT-5 Thinking بواسطة Chat01
تم الإنشاء في: ٢٩ أكتوبر ٢٠٢٥
تمت الإجابة باستخدام GPT-5 Thinking بواسطة Chat01
#include "pch.h"
#define XOR_RAND(state, result_var)
do {
int s = (state);
s ^= s << 13;
s ^= s >> 17;
s ^= s << 5;
state = s;
result_var = state * 0x1.0p-32f;
} while (0)
#define XOR_RAND_GRSH(state, result_var)
do {
int s = (state);
s ^= s << 13;
s ^= s >> 17;
s ^= s << 5;
state = s;
result_var = fmaf(state, 0x1.0p-31f, -1.0f);
} while (0)
#define FABE13_COS(x, result_var)
do {
const float ax = fabsf(x);
float r = fmodf(ax, 6.28318530718f);
if (r > 3.14159265359f) {
r = 6.28318530718f - r;
}
if (r < 1.57079632679f) {
const float t2 = r * r;
const float t4 = t2 * t2;
result_var = fmaf(t4, fmaf(t2, -0.0013888889f, 0.0416666667f), fmaf(t2, -0.5f, 1.0f));
} else {
r = 3.14159265359f - r;
const float t2 = r * r;
const float t4 = t2 * t2;
result_var = -fmaf(t4, fmaf(t2, -0.0013888889f, 0.0416666667f), fmaf(t2, -0.5f, 1.0f));
}
} while (0)
#define FABE13_SIN(x, result_var)
do {
const float x = (x);
const float ax = fabsf(x);
float r = fmodf(ax, 6.28318530718f);
bool sfl = r > 3.14159265359f;
if (sfl) r = 6.28318530718f - r;
bool cfl = r > 1.57079632679f;
if (cfl) r = 3.14159265359f - r;
const float t2 = r * r;
float _s = fmaf(t2, fmaf(t2, fmaf(t2, -0.0001984127f, 0.0083333333f), -0.16666666f), 1.0f) * r;
result_var = ((x < 0.0f) ^ sfl) ? -_s : _s;
} while (0)
#define FABE13_SINCOS(in, sin_out, cos_out, n)
do {
int i = 0;
const int limit = (n) & ~7;
if ((n) >= 8) {
static __declspec(align(32)) const __m256 VEC_TWOPI = _mm256_set1_ps(6.28318530718f);
static __declspec(align(32)) const __m256 VEC_PI = _mm256_set1_ps(3.14159265359f);
static __declspec(align(32)) const __m256 VEC_PI_2 = _mm256_set1_ps(1.57079632679f);
static __declspec(align(32)) const __m256 INV_TWOPI = _mm256_set1_ps(0.15915494309189535f);
static __declspec(align(32)) const __m256 BIAS = _mm256_set1_ps(12582912.0f);
static __declspec(align(32)) const __m256 VEC_COS_P5 = _mm256_set1_ps(-0.0013888889f);
static __declspec(align(32)) const __m256 VEC_COS_P3 = _mm256_set1_ps(0.0416666667f);
static __declspec(align(32)) const __m256 VEC_COS_P1 = _mm256_set1_ps(-0.5f);
static __declspec(align(32)) const __m256 VEC_COS_P0 = _mm256_set1_ps(1.0f);
static __declspec(align(32)) const __m256 VEC_SIN_P5 = _mm256_set1_ps(-0.0001984127f);
static __declspec(align(32)) const __m256 VEC_SIN_P3 = _mm256_set1_ps(0.0083333333f);
static __declspec(align(32)) const __m256 VEC_SIN_P1 = _mm256_set1_ps(-0.16666666f);
static __declspec(align(32)) const __m256 VEC_SIN_P0 = _mm256_set1_ps(1.0f);
static __declspec(align(32)) const __m256 VEC_ZERO = _mm256_setzero_ps();
while (i < limit) {
const __m256 vx = _mm256_load_ps(&(in)[i]);
const __m256 vax = _mm256_andnot_ps(_mm256_set1_ps(-0.0f), vx);
__m256 q = _mm256_fmadd_ps(vax, INV_TWOPI, BIAS);
q = _mm256_sub_ps(q, BIAS);
const __m256 r = _mm256_fnmadd_ps(VEC_TWOPI, q, vax);
const __m256 r1 = _mm256_min_ps(r, _mm256_sub_ps(VEC_TWOPI, r));
const __m256 r2 = _mm256_min_ps(r1, _mm256_sub_ps(VEC_PI, r1));
const __m256 t2 = _mm256_mul_ps(r2, r2);
const __m256 cosv = _mm256_fmadd_ps(t2, _mm256_fmadd_ps(t2, _mm256_fmadd_ps(t2, VEC_COS_P5, VEC_COS_P3), VEC_COS_P1), VEC_COS_P0);
const __m256 sinv = _mm256_mul_ps(_mm256_fmadd_ps(t2, _mm256_fmadd_ps(t2, _mm256_fmadd_ps(t2, VEC_SIN_P5, VEC_SIN_P3), VEC_SIN_P1), VEC_SIN_P0), r2);
const __m256 cflip = _mm256_cmp_ps(r1, VEC_PI_2, _CMP_GT_OQ);
const __m256 sflip = _mm256_xor_ps(_mm256_cmp_ps(vx, VEC_ZERO, _CMP_LT_OQ), _mm256_cmp_ps(r, VEC_PI, _CMP_GT_OQ));
_mm256_store_ps(&(cos_out)[i], _mm256_blendv_ps(cosv, _mm256_sub_ps(VEC_ZERO, cosv), cflip));
_mm256_store_ps(&(sin_out)[i], _mm256_blendv_ps(sinv, _mm256_sub_ps(VEC_ZERO, sinv), sflip));
i += 8;
}
}
while (i < (n)) {
const float x = (in)[i];
const float ax = fabsf(x);
float q = fmaf(ax, 0.15915494309189535f, 12582912.0f);
q -= 12582912.0f;
float r = fmaf(-6.28318530718f, q, ax);
const bool sflip = r > 3.14159265359f;
if (sflip) r = 6.28318530718f - r;
const bool cflip = r > 1.57079632679f;
if (cflip) r = 3.14159265359f - r;
const float t2 = r * r;
const float c = fmaf(t2, fmaf(t2, fmaf(t2, -0.0013888889f, 0.0416666667f), -0.5f), 1.0f);
const float s = fmaf(t2, fmaf(t2, fmaf(t2, -0.0001984127f, 0.0083333333f), -0.16666666f), 1.0f) * r;
(cos_out)[i] = cflip ? -c : c;
(sin_out)[i] = ((x < 0.0f) ^ sflip) ? -s : s;
++i;
}
} while (0)
enum List : uint8_t {
Top = 0b00u,
Down = 0b01u,
Left = 0b10u,
Right = 0b11u
};
__declspec(align(4)) struct Step {
const uint8_t next;
const uint8_t dx;
const uint8_t dy;
};
__declspec(align(4)) struct InvStep {
const uint8_t q;
const uint8_t next;
};
__declspec(align(64)) static const Step g_step_tbl[4][4] = {
{ {Right, 0u, 0u}, {Top, 0u, 1u}, {Top, 1u, 1u}, {Left, 1u, 0u} },
{ {Left, 1u, 1u}, {Down, 1u, 0u}, {Down, 0u, 0u}, {Right, 0u, 1u} },
{ {Down, 1u, 1u}, {Left, 0u, 1u}, {Left, 0u, 0u}, {Top, 1u, 0u} },
{ {Top, 0u, 0u}, {Right, 1u, 0u}, {Right, 1u, 1u}, {Down, 0u, 1u} }
};
__declspec(align(64)) static const InvStep g_inv_tbl[4][4] = {
{ {0u, Right}, {1u, Top}, {3u, Left}, {2u, Top} },
{ {2u, Down}, {3u, Right}, {1u, Down}, {0u, Left} },
{ {2u, Left}, {1u, Left}, {3u, Top}, {0u, Down} },
{ {0u, Top}, {3u, Down}, {1u, Right}, {2u, Right} }
};
static const boost::mpi::environment* g_env;
static const boost::mpi::communicator* g_world;
__declspec(align(16)) struct CrossMsg final {
float s_x1, s_x2;
float e_x1, e_x2;
float Rtop;
texttemplate<typename Archive> __declspec(noalias) __forceinline void serialize(Archive& ar, const unsigned int) noexcept { ar& s_x1& s_x2& e_x1& e_x2& Rtop; }
};
__declspec(align(16)) struct CtrlMsg final {
bool kind;
CrossMsg xchg;
texttemplate<typename Archive> __declspec(noalias) __forceinline void serialize(Archive& ar, const unsigned int) noexcept { ar& kind& xchg; }
};
__declspec(align(16)) struct Slab {
char* const base;
char* current;
char* const end;
text__forceinline Slab(void* const memory, const size_t usable) : base((char*)memory), current(base), end(base + (usable & ~(size_t)63u)) { }
};
static tbb::enumerable_thread_specific<Slab*> tls( noexcept {
void* memory = _aligned_malloc(16777216u, 16u);
Slab* slab = (Slab*)_aligned_malloc(32u, 16u);
new (slab) Slab(memory, 16777216u);
char* p = slab->base;
while (p < slab->end) {
*p = 0;
p += 4096u;
}
return slab;
});
__declspec(align(16)) struct Peano2DMap {
const int levels;
const float a, b, c, d;
const float lenx, leny;
const float inv_lenx;
const uint32_t scale;
const uint8_t start;
text__forceinline Peano2DMap(int L, float _a, float _b, float _c, float _d, uint8_t st) : levels(L), a(_a), b(_b), c(_c), d(_d), lenx(_b - _a), leny(_d - _c), inv_lenx(1.0f / (_b - _a)), scale((uint32_t)1u << (L << 1)), start(st) { }
};
static Peano2DMap gActiveMap(0, 0, 0, 0, 0, 0);
__forceinline bool ComparePtr(const struct Interval* a, const struct Interval* b) noexcept;
__declspec(align(16)) struct Interval {
const float x1;
const float x2;
const float y1;
const float y2;
const float delta_y;
const float ordinate_factor;
const float N_factor;
const float quadratic_term;
const float M;
float R;
text__forceinline void* operator new(size_t) noexcept { Slab* s = tls.local(); char* r = s->current; s->current += 64u; return r; } __forceinline Interval(float _x1, float _x2, float _y1, float _y2, float _N) noexcept : x1(_x1), x2(_x2), y1(_y1), y2(_y2), delta_y(_y2 - _y1), ordinate_factor(-(y1 + y2) * 2.0f), N_factor(_N == 1.0f ? _x2 - _x1 : _N == 2.0f ? sqrtf(_x2 - _x1) : powf(_x2 - _x1, 1.0f / _N)), quadratic_term((1.0f / N_factor)* delta_y* delta_y), M((1.0f / N_factor)* fabsf(delta_y)) { } __forceinline void ChangeCharacteristic(float _m) noexcept { R = fmaf(1.0f / _m, quadratic_term, fmaf(_m, N_factor, ordinate_factor)); }
};
__forceinline bool ComparePtr(const Interval* a, const Interval* b) noexcept {
return a->R < b->R;
}
__forceinline void RecomputeR_ConstM_AVX2(Interval* const* arr, size_t n, float m) {
const __m256 vm = _mm256_set1_ps(m);
__m256 vinvm = _mm256_rcp_ps(vm);
vinvm = _mm256_mul_ps(vinvm, _mm256_fnmadd_ps(vm, vinvm, _mm256_set1_ps(2.0f)));
textsize_t i = 0; const int limit = (int)(n & ~7u); if (n >= 8u) { while (i < (size_t)limit) { __declspec(align(32)) float q[8], nf[8], ord[8]; int k = 0; while (k < 8) { const Interval* p = arr[i + k]; q[k] = p->quadratic_term; nf[k] = p->N_factor; ord[k] = p->ordinate_factor; ++k; } const __m256 vq = _mm256_load_ps(q); const __m256 vnf = _mm256_load_ps(nf); const __m256 vod = _mm256_load_ps(ord); const __m256 t = _mm256_fmadd_ps(vm, vnf, vod); const __m256 res = _mm256_fmadd_ps(vq, vinvm, t); __declspec(align(32)) float out[8]; _mm256_store_ps(out, res); k = 0; while (k < 8) { arr[i + k]->R = out[k]; ++k; } i += 8u; } } while (i < n) { arr[i]->ChangeCharacteristic(m); ++i; }
}
__forceinline void RecomputeR_AffineM_AVX2(Interval* const* arr, size_t n, float GF, float alpha) {
const __m256 vGF = _mm256_set1_ps(GF);
const __m256 va = _mm256_set1_ps(alpha);
textsize_t i = 0; const int limit = (int)(n & ~7u); if (n >= 8u) { while (i < (size_t)limit) { __declspec(align(32)) float len[8], Mv[8], q[8], nf[8], ord[8]; int k = 0; while (k < 8) { const Interval* p = arr[i + k]; len[k] = p->x2 - p->x1; Mv[k] = p->M; q[k] = p->quadratic_term; nf[k] = p->N_factor; ord[k] = p->ordinate_factor; ++k; } const __m256 vlen = _mm256_load_ps(len); const __m256 vM = _mm256_load_ps(Mv); const __m256 vq = _mm256_load_ps(q); const __m256 vnf = _mm256_load_ps(nf); const __m256 vod = _mm256_load_ps(ord); const __m256 vm = _mm256_fmadd_ps(vGF, vlen, _mm256_mul_ps(va, vM)); __m256 vinvm = _mm256_rcp_ps(vm); vinvm = _mm256_mul_ps(vinvm, _mm256_fnmadd_ps(vm, vinvm, _mm256_set1_ps(2.0f))); const __m256 t = _mm256_fmadd_ps(vm, vnf, vod); const __m256 res = _mm256_fmadd_ps(vq, vinvm, t); __declspec(align(32)) float out[8]; _mm256_store_ps(out, res); k = 0; while (k < 8) { arr[i + k]->R = out[k]; ++k; } i += 8u; } } while (i < n) { const Interval* p = arr[i]; const float mi = fmaf(GF, (p->x2 - p->x1), p->M * alpha); arr[i]->R = fmaf(1.0f / mi, p->quadratic_term, fmaf(mi, p->N_factor, p->ordinate_factor)); ++i; }
}
__forceinline float Shag(float _m, float x1, float x2, float y1, float y2, float _N, float _r) {
const float diff = y2 - y1;
const float sign_mult = _mm_cvtss_f32(_mm_castsi128_ps(_mm_set1_epi32(0x3F800000u | (((((uint32_t)&diff)) & 0x80000000u) ^ 0x80000000u))));
textreturn _N == 1.0f ? fmaf(-(1.0f / _m), diff, x1 + x2) * 0.5f : _N == 2.0f ? fmaf(sign_mult / (_m * _m), diff * diff * _r, x1 + x2) * 0.5f : fmaf(sign_mult / powf(_m, _N), powf(diff, _N) * _r, x1 + x2) * 0.5f;
}
struct MortonND {
int dim;
int levels;
uint64_t scale;
std::vector<float> low, high;
std::vector<int> perm;
std::vector<uint64_t> invMask;
textMortonND(int D, int L, const float* lows, const float* highs, uint32_t seed) : dim(D), levels(L), scale(1ull), low(lows, lows + D), high(highs, highs + D), perm(D), invMask(D, 0) { int i = 0; while (i < D) { perm[i] = i; ++i; } uint32_t st = seed ? seed : 0x9E3779B9u; for (int k = D - 1; k > 0; --k) { float r; XOR_RAND(st, r); int j = (int)floorf(r * (k + 1)); int t = perm[k]; perm[k] = perm[j]; perm[j] = t; } for (int k = 0; k < D; ++k) { float r; XOR_RAND(st, r); invMask[k] = (r > 0.5f) ? (((uint64_t)1 << levels) - 1ull) : 0ull; } int maxbits = 63; int need = levels * D; if (need > maxbits) { levels = maxbits / D; if (levels < 1) levels = 1; } scale = 1ull << ((uint64_t)levels * (uint64_t)D); } __forceinline void map01ToPoint(float t, float* out) const noexcept { if (t < 0.0f) t = 0.0f; else if (t >= 1.0f) t = nextafterf(1.0f, 0.0f); uint64_t idx = (uint64_t)(t * (double)scale); if (idx >= scale) idx = scale - 1; std::vector<uint64_t> coord(dim, 0ull); for (int bit = 0; bit < levels; ++bit) { for (int d = 0; d < dim; ++d) { int pd = perm[d]; uint64_t b = (idx >> ((bit * dim) + d)) & 1ull; coord[pd] |= (b << bit); } } for (int d = 0; d < dim; ++d) { if (invMask[d]) coord[d] ^= invMask[d]; } const float invC = 1.0f / (float)((uint64_t)1 << levels); for (int d = 0; d < dim; ++d) { float c0 = low[d] + (high[d] - low[d]) * ((float)coord[d] * invC); float cs = (high[d] - low[d]) * invC; out[d] = fmaf(0.5f, cs, c0); } } __forceinline float pointTo01(const float* in) const noexcept { std::vector<uint64_t> coord(dim, 0ull); const float invC = 1.0f / (float)((uint64_t)1 << levels); for (int d = 0; d < dim; ++d) { float t = (in[d] - low[d]) / (high[d] - low[d]); if (t < 0.0f) t = 0.0f; else if (t > 0.99999994f) t = 0.99999994f; uint64_t ci = (uint64_t)(t * (float)((uint64_t)1 << levels)); if (ci >= ((uint64_t)1 << levels)) ci = ((uint64_t)1 << levels) - 1; coord[d] = ci; } for (int d = 0; d < dim; ++d) { if (invMask[d]) coord[d] ^= invMask[d]; } uint64_t idx = 0ull; for (int bit = 0; bit < levels; ++bit) { for (int d = 0; d < dim; ++d) { int pd = perm[d]; uint64_t b = (coord[pd] >> bit) & 1ull; idx |= (b << ((bit * dim) + d)); } } double tt = (double)idx / (double)scale; if (tt >= 1.0) tt = nextafter(1.0, 0.0); return (float)tt; }
};
struct ManipCost {
int n;
bool variableLen;
float targetX, targetY;
float minTheta;
text__forceinline float operator()(const float* q) const noexcept { const float* th = q; const float* L = variableLen ? (q + n) : nullptr; float x = 0.0f, y = 0.0f, phi = 0.0f; float penC = 0.0f; float Lc = 1.0f; for (int i = 0; i < n; ++i) { const float t = th[i]; float Li = variableLen ? L[i] : Lc; phi += t; float c, s; FABE13_COS(phi, c); FABE13_SIN(phi, s); x = fmaf(Li, c, x); y = fmaf(Li, s, y); } for (int i = 0; i < n - 1; ++i) { float dth = th[i + 1] - th[i]; if (fabsf(dth) < minTheta) { penC += minTheta - fabsf(dth); } } float dx = x - targetX; float dy = y - targetY; float dist = sqrtf(fmaf(dx, dx, dy * dy)); return dist + penC; }
};
__forceinline void HitTest2D_analytic(float x_param, float& out_x1, float& out_x2) {
const float a = gActiveMap.a;
const float inv_lenx = gActiveMap.inv_lenx;
const uint32_t scale = gActiveMap.scale;
const uint32_t scale_minus_1 = scale - 1u;
const float lenx = gActiveMap.lenx;
const float leny = gActiveMap.leny;
const float c = gActiveMap.c;
const uint8_t start = gActiveMap.start;
const int levels = gActiveMap.levels;
textfloat norm = (x_param - a) * inv_lenx; norm = fminf(fmaxf(norm, 0.0f), 0x1.fffffep-1f); uint32_t idx = (uint32_t)(norm * (float)scale); idx = idx > scale_minus_1 ? scale_minus_1 : idx; float sx = lenx, sy = leny; float x1 = a, x2 = c; uint8_t type = start; int l = levels - 1; while (l >= 0) { const uint32_t q = (idx >> (l * 2)) & 3u; const Step s = g_step_tbl[type][q]; type = s.next; sx *= 0.5f; sy *= 0.5f; x1 += s.dx ? sx : 0.0f; x2 += s.dy ? sy : 0.0f; --l; } out_x1 = x1 + sx * 0.5f; out_x2 = x2 + sy * 0.5f;
}
__forceinline float FindX2D_analytic(float px, float py) {
const float a = gActiveMap.a;
const float b = gActiveMap.b;
const float c = gActiveMap.c;
const float d = gActiveMap.d;
const float lenx = gActiveMap.lenx;
const float leny = gActiveMap.leny;
const uint32_t scale = gActiveMap.scale;
const uint8_t start = gActiveMap.start;
const int levels = gActiveMap.levels;
textconst float clamped_px = fminf(fmaxf(px, a), b); const float clamped_py = fminf(fmaxf(py, c), d); float sx = lenx, sy = leny; float x0 = a, y0 = c; uint32_t idx = 0u; uint8_t type = start; int l = 0; while (l < levels) { sx *= 0.5f; sy *= 0.5f; const float mx = x0 + sx; const float my = y0 + sy; const uint32_t tr = (uint32_t)((clamped_px > mx) & (clamped_py > my)); const uint32_t tl = (uint32_t)((clamped_px < mx) & (clamped_py > my)); const uint32_t dl = (uint32_t)((clamped_px < mx) & (clamped_py < my)); const uint32_t none = (uint32_t)(1u ^ (tr | tl | dl)); const uint32_t dd = (tr << 1) | tr | tl | (none << 1); const InvStep inv = g_inv_tbl[type][dd]; type = inv.next; idx = (idx << 2) | inv.q; const uint32_t dx = dd >> 1; const uint32_t dy = dd & 1u; x0 += dx ? sx : 0.0f; y0 += dy ? sy : 0.0f; ++l; } const float scale_recip = 1.0f / (float)scale; return fmaf((float)idx * scale_recip, lenx, a);
}
static __forceinline void agp_run_branch_mpi(const MortonND& map, const ManipCost& cost, int maxIter, float r, bool adaptive, float eps, unsigned seed, std::vector<Interval*>& H, std::vector<float>& bestQ, float& bestF, float& bestX, float& bestY) {
const int dim = cost.n + (cost.variableLen ? cost.n : 0);
textauto evalAt = [&](float t) -> float { std::vector<float> q(dim); map.map01ToPoint(t, q.data()); float f = cost(q.data()); float x = 0, y = 0, phi = 0; const float* th = q.data(); const float* L = cost.variableLen ? (q.data() + cost.n) : nullptr; float Lc = 1.0f; for (int i = 0; i < cost.n; ++i) { phi += th[i]; float Li = cost.variableLen ? L[i] : Lc; float c, s; FABE13_COS(phi, c); FABE13_SIN(phi, s); x = fmaf(Li, c, x); y = fmaf(Li, s, y); } if (f < bestF) { bestF = f; bestQ = q; bestX = x; bestY = y; } return f; }; float a = 0.0f, b = 1.0f; float f_a = evalAt(a); float f_b = evalAt(b); Interval* root = new Interval(a, b, f_a, f_b, (float)dim); float Mmax = root->M; float m = r * Mmax; root->ChangeCharacteristic(m); H.clear(); H.reserve((size_t)maxIter); H.push_back(root); std::make_heap(H.begin(), H.end(), ComparePtr); float dmax = b - a; const float initial_len = dmax; const float thr03 = 0.3f * initial_len; const float inv_thr03 = 1.0f / thr03; int it = 0; const int rank = g_world->rank(); const int world = g_world->size(); while (it < maxIter) { while (g_world->iprobe(boost::mpi::any_source, 0)) { CtrlMsg in; g_world->recv(boost::mpi::any_source, 0, in); if (in.kind) { if (!rank) { break; } else { return; } } else { float sx = in.xchg.s_x1, ex = in.xchg.e_x1; if (sx < 0.0f) sx = 0.0f; if (ex > 1.0f) ex = 1.0f; if (ex <= sx) continue; std::vector<float> tmp(dim); map.map01ToPoint(sx, tmp.data()); float y1 = cost(tmp.data()); map.map01ToPoint(ex, tmp.data()); float y2 = cost(tmp.data()); Interval* inj = new Interval(sx, ex, y1, y2, (float)dim); inj->ChangeCharacteristic(m); if (!H.empty()) { Interval* top = H.front(); if (inj->R > 1.15f * top->R) { inj->R = in.xchg.Rtop; H.emplace_back(inj); std::push_heap(H.begin(), H.end(), ComparePtr); } } else { H.emplace_back(inj); std::push_heap(H.begin(), H.end(), ComparePtr); } } } std::pop_heap(H.begin(), H.end(), ComparePtr); Interval* cur = H.back(); H.pop_back(); const float x1 = cur->x1, x2 = cur->x2, y1 = cur->y1, y2 = cur->y2; float tNew = Shag(m, x1, x2, y1, y2, (float)dim, r); if (tNew < a) tNew = a; else if (tNew > b) tNew = b; float fNew = evalAt(tNew); Interval* left = new Interval(x1, tNew, y1, fNew, (float)dim); Interval* right = new Interval(tNew, x2, fNew, y2, (float)dim); float Mloc = left->M > right->M ? left->M : right->M; if (adaptive) { float len1 = tNew - x1, len2 = x2 - tNew; if (len1 + len2 == dmax) { dmax = len1 > len2 ? len1 : len2; for (auto p : H) { float L = p->x2 - p->x1; if (L > dmax) dmax = L; } } if ((thr03 > dmax && !(it % 3)) || (10.0f * dmax < initial_len)) { if (Mloc > Mmax) { Mmax = Mloc; m = r * Mmax; } float progress = 1.0f - (dmax * inv_thr03); float alpha = progress * progress; float beta = 2.0f - alpha; float MULT = (Mmax / dmax); float global_coeff = fmaf(MULT, r, -MULT); float GF = beta * global_coeff; left->ChangeCharacteristic(fmaf(GF, len1, left->M * alpha)); right->ChangeCharacteristic(fmaf(GF, len2, right->M * alpha)); size_t sz = H.size(); if (sz < 64u) { size_t i = 0; while (i < sz) { Interval* p = H[i]; float len = p->x2 - p->x1; p->ChangeCharacteristic(fmaf(GF, len, p->M * alpha)); ++i; } } else { RecomputeR_AffineM_AVX2(H.data(), sz, GF, alpha); } std::make_heap(H.begin(), H.end(), ComparePtr); } else { if (Mloc > Mmax) { if (Mloc < 1.15f * Mmax) { Mmax = Mloc; m = r * Mmax; left->ChangeCharacteristic(m); right->ChangeCharacteristic(m); } else { Mmax = Mloc; m = r * Mmax; left->ChangeCharacteristic(m); right->ChangeCharacteristic(m); size_t sz = H.size(); if (sz < 64u) { size_t i = 0; while (i < sz) { H[i]->ChangeCharacteristic(m); ++i; } } else { RecomputeR_ConstM_AVX2(H.data(), sz, m); } std::make_heap(H.begin(), H.end(), ComparePtr); } } else { left->ChangeCharacteristic(m); right->ChangeCharacteristic(m); } } } else { if (Mloc > Mmax) { if (Mloc < 1.15f * Mmax) { Mmax = Mloc; m = r * Mmax; left->ChangeCharacteristic(m); right->ChangeCharacteristic(m); } else { Mmax = Mloc; m = r * Mmax; left->ChangeCharacteristic(m); right->ChangeCharacteristic(m); size_t sz = H.size(); if (sz < 64u) { size_t i = 0; while (i < sz) { H[i]->ChangeCharacteristic(m); ++i; } } else { RecomputeR_ConstM_AVX2(H.data(), sz, m); } std::make_heap(H.begin(), H.end(), ComparePtr); } } else { left->ChangeCharacteristic(m); right->ChangeCharacteristic(m); } } H.push_back(left); std::push_heap(H.begin(), H.end(), ComparePtr); H.push_back(right); std::push_heap(H.begin(), H.end(), ComparePtr); Interval* top = H.front(); float interval_len = top->x2 - top->x1; const float p = fmaf(-1.0f / initial_len, dmax, 1.0f); const int T = (int)fmaf(-expm1f(p), 264.0f, 277.0f); bool want_term = (interval_len < eps) || (it == maxIter - 1); if (!(it % T) || want_term) { CtrlMsg out; out.kind = want_term; out.xchg.s_x1 = top->x1; out.xchg.e_x1 = top->x2; out.xchg.s_x2 = 0.0f; out.xchg.e_x2 = 0.0f; out.xchg.Rtop = top->R; for (int i = 0; i < world; ++i) { if (i != rank) g_world->isend(i, 0, out); } if (want_term) { break; } } ++it; }
}
struct BestPacket {
float bestF;
int dim;
float bestX;
float bestY;
texttemplate<typename Archive> void serialize(Archive& ar, const unsigned int) { ar& bestF& dim& bestX& bestY; }
};
extern "C" __declspec(dllexport) __declspec(noalias)
void AGP_Manip2D(int nSegments, bool variableLengths, float minTheta,
float targetX, float targetY, int peanoLevels, int maxIterPerBranch,
float r, bool adaptiveMode, float epsilon, unsigned int seed,
float** out_bestQ, size_t* out_bestQLen,
float* out_bestX, float* out_bestY, float* out_bestF) {
const int dim = nSegments + (variableLengths ? nSegments : 0);
textconst float thetaMin = -3.14159265359f; const float thetaMax = 3.14159265359f; const float lenMin = 0.1f; const float lenMax = 2.0f; std::vector<float> low(dim), high(dim); for (int i = 0; i < nSegments; ++i) { low[i] = thetaMin; high[i] = thetaMax; } if (variableLengths) { for (int i = 0; i < nSegments; ++i) { low[nSegments + i] = lenMin; high[nSegments + i] = lenMax; } } ManipCost cost{ nSegments, variableLengths, targetX, targetY, minTheta }; const int rank = g_world->rank(); const int world = g_world->size(); MortonND map(dim, peanoLevels, low.data(), high.data(), seed + 0x9E3779B9u * rank); std::vector<Interval*> H; std::vector<float> bestQ; float bestF = FLT_MAX, bx = 0.0f, by = 0.0f; agp_run_branch_mpi(map, cost, maxIterPerBranch, r, adaptiveMode, epsilon, seed, H, bestQ, bestF, bx, by); BestPacket me{ bestF, dim, bx, by }; if (!rank) { std::vector<float> winnerQ = bestQ; float winF = bestF, wx = bx, wy = by; for (int i = 1; i < world; ++i) { BestPacket bp; g_world->recv(i, 2, bp); std::vector<float> qin; g_world->recv(i, 3, qin); if (bp.bestF < winF) { winF = bp.bestF; wx = bp.bestX; wy = bp.bestY; winnerQ = qin; } } *out_bestQLen = (size_t)winnerQ.size(); *out_bestQ = (float*)CoTaskMemAlloc(sizeof(float) * (*out_bestQLen)); std::memcpy(*out_bestQ, winnerQ.data(), sizeof(float) * (*out_bestQLen)); *out_bestX = wx; *out_bestY = wy; *out_bestF = winF; } else { g_world->send(0, 2, me); g_world->send(0, 3, bestQ); }
}
extern "C" __declspec(dllexport) __declspec(noalias) __forceinline int AgpInit(int peanoLevel, float a, float b, float c, float d) {
g_env = new boost::mpi::environment();
g_world = new boost::mpi::communicator();
textconst int rank = g_world->rank(); const int world_size = g_world->size(); if (world_size == 4) { switch (rank) { case 0: new(&gActiveMap) Peano2DMap(peanoLevel, a, b, c, d, static_cast<uint8_t>(Top)); break; case 1: new(&gActiveMap) Peano2DMap(peanoLevel, a, b, c, d, static_cast<uint8_t>(Down)); break; case 2: new(&gActiveMap) Peano2DMap(peanoLevel, a, b, c, d, static_cast<uint8_t>(Left)); break; default: new(&gActiveMap) Peano2DMap(peanoLevel, a, b, c, d, static_cast<uint8_t>(Right)); } } else { } return rank;
}
__forceinline float ShekelFunc(float x, float seed) {
int i = 0;
float st = seed, r1, r2, res = 0.0f;
textwhile (i < 10) { XOR_RAND(st, r1); float xp = fmaf(-r1, 10.0f, x); XOR_RAND(st, r1); XOR_RAND(st, r2); float d = fmaf(fmaf(r1, 20.0f, 5.0f), xp * xp, fmaf(r2, 0.2f, 1.0f)); d = copysignf(fmaxf(fabsf(d), FLT_MIN), d); res -= 1.0f / d; ++i; } return res;
}
__forceinline float RastriginFunc(float x1, float x2) {
const float t = fmaf(x1, x1, x2 * x2);
float c1, c2;
FABE13_COS(6.28318530717958647692f * x1, c1);
FABE13_COS(6.28318530717958647692f * x2, c2);
return (t - fmaf(c1 + c2, 10.0f, -14.6f)) * fmaf(-t, 0.25f, 18.42f);
}
__forceinline float HillFunc(float x, float seed) {
int j = 0;
__declspec(align(32)) float ang[14u];
float st = 6.28318530717958647692f * x;
textwhile (j < 14) { ang[j] = st * (float)(j + 1); ++j; } __declspec(align(32)) float sv[14u], cv[14u]; FABE13_SINCOS(ang, sv, cv, 14u); float state = seed, r1, r2; XOR_RAND(state, r1); float res = fmaf(r1, 2.0f, -1.1f); --j; while (j >= 0) { XOR_RAND(state, r1); XOR_RAND(state, r2); res += fmaf(fmaf(r1, 2.0f, -1.1f), sv[j], fmaf(r2, 2.0f, -1.1f) * cv[j]); --j; } return res;
}
__forceinline float GrishaginFunc(float x1, float x2, float seed) {
int j = 0;
__declspec(align(32)) float aj[8u], ak[8u];
textwhile (j < 8) { float pj = 3.14159265358979323846f * (float)(j + 1); aj[j] = pj * x1; ak[j] = pj * x2; ++j; } __declspec(align(32)) float sj[8u], cj[8u], sk[8u], ck[8u]; FABE13_SINCOS(aj, sj, cj, 8u); FABE13_SINCOS(ak, sk, ck, 8u); --j; float p1 = 0.0f, p2 = 0.0f; float st = seed, r1, r2; while (j >= 0) { size_t k = 0u; while (k < 8u) { float s = sj[j] * sj[j]; float c = ck[k] * ck[k]; XOR_RAND_GRSH(st, r1); XOR_RAND_GRSH(st, r2); p1 = fmaf(r1, s, fmaf(r2, c, p1)); XOR_RAND_GRSH(st, r1); XOR_RAND_GRSH(st, r2); p2 = fmaf(-r1, c, fmaf(r2, s, p2)); ++k; } --j; } return -sqrtf(fmaf(p1, p1, p2 * p2));
}
extern "C" __declspec(dllexport) __declspec(noalias)
void AGP_1D(float global_iterations, float a, float b, float r, bool mode, float epsilon, float seed, float** out_data, size_t* out_len) {
Slab* slab = tls.local();
slab->current = slab->base;
textint schetchick = 0; const float initial_length = b - a; float dmax = initial_length; const float threshold_03 = 0.3f * initial_length; const float inv_threshold_03 = 1.0f / threshold_03; const float start_val = ShekelFunc(a, seed); float best_f = ShekelFunc(b, seed); float x_Rmax_1 = a; float x_Rmax_2 = b; float y_Rmax_1 = start_val; float y_Rmax_2 = best_f; std::vector<float, boost::alignment::aligned_allocator<float, 16u>> Extr; std::vector<Interval*, boost::alignment::aligned_allocator<Interval*, 64u>> R; Extr.reserve((size_t)global_iterations << 2u); R.reserve((size_t)global_iterations << 1u); R.emplace_back(new Interval(a, b, start_val, best_f, 1.0f)); float Mmax = R.front()->M; float m = r * Mmax; while (true) { const float new_point = Shag(m, x_Rmax_1, x_Rmax_2, y_Rmax_1, y_Rmax_2, 1.0f, r); const float new_value = ShekelFunc(new_point, seed); if (new_value < best_f) { best_f = new_value; Extr.emplace_back(best_f); Extr.emplace_back(new_point); } std::pop_heap(R.begin(), R.end(), ComparePtr); const Interval* pro = R.back(); const float new_x1 = pro->x1; const float new_x2 = pro->x2; const float len2 = new_x2 - new_point; const float len1 = new_point - new_x1; const float interval_len = len1 < len2 ? len1 : len2; if (++schetchick == (int)global_iterations || interval_len < epsilon) { Extr.emplace_back((float)schetchick); Extr.emplace_back(interval_len); *out_len = Extr.size(); *out_data = (float*)CoTaskMemAlloc(sizeof(float) * (*out_len)); memcpy(*out_data, Extr.data(), sizeof(float) * (*out_len)); return; } Interval* curr = new Interval(new_x1, new_point, pro->y1, new_value, 1.0f); Interval* curr1 = new Interval(new_point, new_x2, new_value, pro->y2, 1.0f); const float currM = curr->M > curr1->M ? curr->M : curr1->M; const size_t r_size = R.size(); if (mode) { if (len2 + len1 == dmax) { dmax = len2 > len1 ? len2 : len1; size_t i = 0u; while (i < r_size) { const float li = R[i]->x2 - R[i]->x1; if (li > dmax) dmax = li; ++i; } } if (threshold_03 > dmax && !(schetchick % 3) || 10.0f * dmax < initial_length) { if (currM > Mmax) { Mmax = currM; m = r * Mmax; } const float progress = fmaf(-inv_threshold_03, dmax, 1.0f); const float alpha = fmaf(progress, progress, 1.0f); const float betta = 2.0f - alpha; const float MULT = (1.0f / dmax) * Mmax; const float global_coeff = fmaf(MULT, r, -MULT); const float GF = betta * global_coeff; curr->ChangeCharacteristic(fmaf(GF, len1, curr->M * alpha)); curr1->ChangeCharacteristic(fmaf(GF, len2, curr1->M * alpha)); if (r_size < 64u) { size_t i = 0u; while (i < r_size) { R[i]->ChangeCharacteristic(fmaf(GF, R[i]->x2 - R[i]->x1, R[i]->M * alpha)); ++i; } } else { RecomputeR_AffineM_AVX2(R.data(), r_size, GF, alpha); } std::make_heap(R.begin(), R.end(), ComparePtr); } else { if (currM > Mmax) { if (currM < 1.15f * Mmax) { Mmax = currM; m = r * Mmax; curr->ChangeCharacteristic(m); curr1->ChangeCharacteristic(m); } else { Mmax = currM; m = r * Mmax; curr->ChangeCharacteristic(m); curr1->ChangeCharacteristic(m); if (r_size < 64u) { size_t i = 0u; while (i < r_size) { R[i]->ChangeCharacteristic(m); ++i; } } else { RecomputeR_ConstM_AVX2(R.data(), r_size, m); } std::make_heap(R.begin(), R.end(), ComparePtr); } } else { curr->ChangeCharacteristic(m); curr1->ChangeCharacteristic(m); } } } else { if (currM > Mmax) { if (currM < 1.15f * Mmax) { Mmax = currM; m = r * Mmax; curr->ChangeCharacteristic(m); curr1->ChangeCharacteristic(m); } else { Mmax = currM; m = r * Mmax; curr->ChangeCharacteristic(m); curr1->ChangeCharacteristic(m); if (r_size < 64u) { size_t i = 0u; while (i < r_size) { R[i]->ChangeCharacteristic(m); ++i; } } else { RecomputeR_ConstM_AVX2(R.data(), r_size, m); } std::make_heap(R.begin(), R.end(), ComparePtr); } } else { curr->ChangeCharacteristic(m); curr1->ChangeCharacteristic(m); } } R.back() = curr; std::push_heap(R.begin(), R.end(), ComparePtr); R.emplace_back(curr1); std::push_heap(R.begin(), R.end(), ComparePtr); const Interval* top = R.front(); x_Rmax_1 = top->x1; x_Rmax_2 = top->x2; y_Rmax_1 = top->y1; y_Rmax_2 = top->y2; }
}
extern "C" __declspec(dllexport) __declspec(noalias)
void AGP_2D(
const float N,
const float global_iterations,
const float a,
const float b,
const float c,
const float d,
const float r,
const bool mode,
const float epsilon,
const float seed,
float** const __restrict out_data,
size_t* const __restrict out_len) noexcept
{
Slab* const __restrict slab = tls.local();
slab->current = slab->base;
textint schetchick = 0; int no_improve = 0; const int rank = g_world->rank(); const int world_size = g_world->size(); while (g_world->iprobe(boost::mpi::any_source, 0)) { CtrlMsg dummy; g_world->recv(boost::mpi::any_source, 0, dummy); } const float inv_divider = ldexpf(1.0f, -((gActiveMap.levels << 1) + 1)); const float x_addition = (b - a) * inv_divider; const float y_addition = (d - c) * inv_divider; const float true_start = a + x_addition; const float true_end = b - x_addition; float x_Rmax_1 = true_start; float x_Rmax_2 = true_end; const float initial_length = x_Rmax_2 - x_Rmax_1; float dmax = initial_length; const float threshold_03 = 0.3f * initial_length; const float inv_threshold_03 = 1.0f / threshold_03; const float start_val = rank % 3 ? RastriginFunc(true_end, d - y_addition) : RastriginFunc(true_start, c + y_addition); float best_f = rank % 2 ? RastriginFunc(true_start, d - y_addition) : RastriginFunc(true_end, c + y_addition); float y_Rmax_1 = start_val; float y_Rmax_2 = best_f; std::vector<float, boost::alignment::aligned_allocator<float, 16u>> Extr; std::vector<Interval* __restrict, boost::alignment::aligned_allocator<Interval* __restrict, 64u>> R; Extr.clear(); Extr.reserve(static_cast<size_t>(global_iterations) << 2u); R.clear(); R.reserve(static_cast<size_t>(global_iterations) << 1u); R.emplace_back(new Interval(true_start, true_end, start_val, best_f, 2.0f)); const Interval* __restrict top_ptr; float Mmax = R.front()->M; float m = r * Mmax; while (true) { const float interval_len = x_Rmax_2 - x_Rmax_1; const bool stagnation = no_improve > 100 && schetchick > 270; const float p = fmaf(-1.0f / initial_length, dmax, 1.0f); while (g_world->iprobe(boost::mpi::any_source, 0)) { CtrlMsg in; g_world->recv(boost::mpi::any_source, 0, in); if (in.kind) { if (!rank) { Extr.emplace_back(static_cast<float>(schetchick)); Extr.emplace_back(interval_len); *out_len = Extr.size(); *out_data = reinterpret_cast<float* __restrict>(CoTaskMemAlloc(sizeof(float) * (*out_len))); memcpy(*out_data, Extr.data(), sizeof(float) * (*out_len)); } return; } const float sx = FindX2D_analytic(in.xchg.s_x1, in.xchg.s_x2); const float ex = FindX2D_analytic(in.xchg.e_x1, in.xchg.e_x2); Interval* const __restrict injected = new Interval(sx, ex, RastriginFunc(in.xchg.s_x1, in.xchg.s_x2), RastriginFunc(in.xchg.e_x1, in.xchg.e_x2), 2.0f); injected->ChangeCharacteristic(m); if (injected->R > 1.15f * top_ptr->R) { const float k = stagnation ? fmaf((1.0f / expm1f(1.0f)) * 1.0f, expm1f(p), 0.3f) : fmaf((1.0f / expm1f(1.0f)) * 0.6f, expm1f(p), 0.6f); injected->R = in.xchg.Rtop * k; R.emplace_back(injected); std::push_heap(R.begin(), R.end(), ComparePtr); } } const int T = static_cast<int>(fmaf(-expm1f(p), 264.0f, 277.0f)); const bool want_term = interval_len < epsilon || schetchick == static_cast<int>(global_iterations); if (!(++schetchick % T) || stagnation || want_term) { CtrlMsg out; out.kind = want_term; if (!out.kind) { float s_x1, s_x2, e_x1, e_x2; HitTest2D_analytic(top_ptr->x1, s_x1, s_x2); HitTest2D_analytic(top_ptr->x2, e_x1, e_x2); out.xchg = CrossMsg{ s_x1, s_x2, e_x1, e_x2, top_ptr->R }; } int i = 0; while (i < world_size) { if (i != rank) g_world->isend(i, 0, out); ++i; } if (out.kind) { if (!rank) { Extr.emplace_back(static_cast<float>(schetchick)); Extr.emplace_back(interval_len); *out_len = Extr.size(); *out_data = reinterpret_cast<float* __restrict>(CoTaskMemAlloc(sizeof(float) * (*out_len))); memcpy(*out_data, Extr.data(), sizeof(float) * (*out_len)); } return; } } const float new_point = Shag(m, x_Rmax_1, x_Rmax_2, y_Rmax_1, y_Rmax_2, 2.0f, r); float new_x1_val, new_x2_val; HitTest2D_analytic(new_point, new_x1_val, new_x2_val); const float new_value = RastriginFunc(new_x1_val, new_x2_val); if (new_value < best_f) { best_f = new_value; Extr.emplace_back(best_f); Extr.emplace_back(new_x1_val); Extr.emplace_back(new_x2_val); no_improve = 0; } else { ++no_improve; } std::pop_heap(R.begin(), R.end(), ComparePtr); Interval* const __restrict promejutochny_otrezok = R.back(); const float segment_x1 = promejutochny_otrezok->x1; const float segment_x2 = promejutochny_otrezok->x2; const float len2 = segment_x2 - new_point; const float len1 = new_point - segment_x1; Interval* const __restrict curr = new Interval(segment_x1, new_point, promejutochny_otrezok->y1, new_value, 2.0f); Interval* const __restrict curr1 = new Interval(new_point, segment_x2, new_value, promejutochny_otrezok->y2, 2.0f); const float currM = curr->M > curr1->M ? curr->M : curr1->M; const size_t r_size = R.size(); if (mode) { if (len2 + len1 == dmax) { dmax = len2 > len1 ? len2 : len1; size_t i = 0u;
#pragma loop(ivdep)
while (i < r_size) {
const float len_item = R[i]->x2 - R[i]->x1;
if (len_item > dmax) dmax = len_item;
++i;
}
}
textif (threshold_03 > dmax && !(schetchick % 3) || 10.0f * dmax < initial_length) { if (currM > Mmax) { Mmax = currM; m = r * Mmax; } const float progress = fmaf(-inv_threshold_03, dmax, 1.0f); const float alpha = fmaf(progress, progress, 1.0f); const float betta = 2.0f - alpha; const float MULTIPLIER = (1.0f / dmax) * Mmax; const float global_coeff = fmaf(MULTIPLIER, r, -MULTIPLIER); const float GLOBAL_FACTOR = betta * global_coeff; curr->ChangeCharacteristic(fmaf(GLOBAL_FACTOR, len1, curr->M * alpha)); curr1->ChangeCharacteristic(fmaf(GLOBAL_FACTOR, len2, curr1->M * alpha)); if (r_size < 64u) { size_t i = 0u;
#pragma loop(ivdep)
while (i < r_size) {
R[i]->ChangeCharacteristic(fmaf(GLOBAL_FACTOR, R[i]->x2 - R[i]->x1, R[i]->M * alpha));
++i;
}
}
else {
RecomputeR_AffineM_AVX2(R.data(), r_size, GLOBAL_FACTOR, alpha);
}
std::make_heap(R.begin(), R.end(), ComparePtr);
}
else {
if (currM > Mmax) {
if (currM < 1.15f * Mmax) {
Mmax = currM;
m = r * Mmax;
curr->ChangeCharacteristic(m);
curr1->ChangeCharacteristic(m);
}
else {
Mmax = currM;
m = r * Mmax;
curr->ChangeCharacteristic(m);
curr1->ChangeCharacteristic(m);
textif (r_size < 64u) { size_t i = 0u;
#pragma loop(ivdep)
while (i < r_size) {
R[i]->ChangeCharacteristic(m);
++i;
}
}
else {
RecomputeR_ConstM_AVX2(R.data(), r_size, m);
}
std::make_heap(R.begin(), R.end(), ComparePtr);
}
}
else {
curr->ChangeCharacteristic(m);
curr1->ChangeCharacteristic(m);
}
}
}
else {
if (currM > Mmax) {
if (currM < 1.15f * Mmax) {
Mmax = currM;
m = r * Mmax;
curr->ChangeCharacteristic(m);
curr1->ChangeCharacteristic(m);
}
else {
Mmax = currM;
m = r * Mmax;
curr->ChangeCharacteristic(m);
curr1->ChangeCharacteristic(m);
textif (r_size < 64u) { size_t i = 0u;
#pragma loop(ivdep)
while (i < r_size) {
R[i]->ChangeCharacteristic(m);
++i;
}
}
else {
RecomputeR_ConstM_AVX2(R.data(), r_size, m);
}
std::make_heap(R.begin(), R.end(), ComparePtr);
}
}
else {
curr->ChangeCharacteristic(m);
curr1->ChangeCharacteristic(m);
}
}
textR.back() = curr; std::push_heap(R.begin(), R.end(), ComparePtr); R.emplace_back(curr1); std::push_heap(R.begin(), R.end(), ComparePtr); top_ptr = R.front(); x_Rmax_1 = top_ptr->x1; x_Rmax_2 = top_ptr->x2; y_Rmax_1 = top_ptr->y1; y_Rmax_2 = top_ptr->y2; }
}
struct RunParams {
int nSegments;
uint8_t varLen;
float minTheta;
float tx, ty;
int levels, maxIter;
float r;
uint8_t adaptive;
float eps;
unsigned int seed;
texttemplate<typename Archive> void serialize(Archive& ar, const unsigned int) { ar& nSegments& varLen& minTheta& tx& ty& levels& maxIter& r& adaptive& eps& seed; }
};
extern "C" __declspec(dllexport) __declspec(noalias)
void AgpStartManipND(
int nSegments,
bool variableLengths,
float minTheta,
float targetX,
float targetY,
int peanoLevels,
int maxIterPerBranch,
float r,
bool adaptiveMode,
float epsilon,
unsigned int seed)
{
RunParams p;
p.nSegments = nSegments;
p.varLen = (uint8_t)variableLengths;
p.minTheta = minTheta;
p.tx = targetX;
p.ty = targetY;
p.levels = peanoLevels;
p.maxIter = maxIterPerBranch;
p.r = r;
p.adaptive = (uint8_t)adaptiveMode;
p.eps = epsilon;
p.seed = seed;
textconst int rank = g_world->rank(); const int world = g_world->size(); for (int i = 1; i < world; ++i) { if (i != rank) g_world->isend(i, 1, p); }
}
extern "C" __declspec(dllexport) __declspec(noalias) __forceinline void AgpWaitStartAndRun() {
while (true) {
if (g_world->iprobe(0, 1)) {
RunParams p;
g_world->recv(0, 1, p);
float* q = nullptr;
size_t qlen = 0;
float bx = 0, by = 0, bf = 0;
textAGP_Manip2D( p.nSegments, (bool)p.varLen, p.minTheta, p.tx, p.ty, p.levels, p.maxIter, p.r, (bool)p.adaptive, p.eps, p.seed, &q, &qlen, &bx, &by, &bf ); if (q) CoTaskMemFree(q); } Sleep(0); }
}
extern "C" __declspec(dllexport) __declspec(noalias)
void AgpStartWorkers() {
int i = 1;
const int world = g_world->size();
textwhile (i < world) { g_world->isend(i, 1, 0); ++i; }
}
extern "C" __declspec(dllexport) __declspec(noalias) __forceinline void AgpStartWorkersLegacy() {
AgpStartWorkers();
}
extern "C" __declspec(dllexport) __declspec(noalias) __forceinline void AGP_Free(float* p) {
CoTaskMemFree(p);
} это мой код в длл библиотеке сейчас, раньше у меня был одномерный и двумерный алгоритм, сейчас добавлен многомерный - он не использует стандартные двумерные кривые пеано гильберта а использует пространственные развёртки гиперкубы - алгоритм работает правильно но мне нужен анализ и понимание как он работает - работать базово он должен также как и одномерные и двумерные версии - сама идея алгоритма не меняется - но что тут изменилось так это целевая функция - так как теперь целевая функция это функция минимизации положения захвата робота манипулятора - также заметь там есть метод agpinit - он вызывается один раз при загрузке основного приложения и в предыдущих случаях он был нужен так как экономил время - развёртки пеано строились один раз при инициализации а не на каждое нажатие кнопки, может в новом случае и не требуется явной инициализации гиперкубов во время старта приложения - но с точки зрения скорости возможно стоит что-то добавить в этот метод для нового случая (новый случай требует много ориентаций - поэтому я запускаю его на большем числе процессов чем 4 для лучшего параллелизма) - для экономии памяти и времени - возможно туда стоит вынести какую-то инициализации или резервирование памяти - дай более чёткий ответ что стоит с этим сделать, также обрати внимание на код в форме #include "MyForm.h"
#include <float.h>
using namespace System;
using namespace System::Windows::Forms;
typedef int(__cdecl* PInit)(int, float, float, float, float);
typedef void(__cdecl* PStartWorkers)();
[STAThread]
int main() {
HMODULE h = LoadLibraryW(L"TEST_FUNC.dll");
auto AgpInit = (PInit)GetProcAddress(h, "AgpInit");
auto AgpWaitStartAndRun = (PStartWorkers)GetProcAddress(h, "AgpWaitStartAndRun");
const int rank = AgpInit(12, -2.2f, 1.8f, -2.2f, 1.8f);
if (!rank) {
Application::EnableVisualStyles();
Application::SetCompatibleTextRenderingDefault(false);
Application::Run(gcnew TESTAGP::MyForm(h));
}
else {
AgpWaitStartAndRun();
}
return 0;
} и #pragma once
#define WIN32_LEAN_AND_MEAN
#include <Windows.h>
#include <stdint.h>
using namespace System;
using namespace System::Drawing;
using namespace System::Windows::Forms;
using namespace System::Collections::Generic;
using namespace System::Drawing::Drawing2D;
typedef void(__cdecl* P_MANIP)(
int nSegments,
bool variableLengths,
float deltaThetaMax,
float targetX, float targetY,
int peanoLevels, int maxIterPerBranch,
float r, bool adaptiveMode, float epsilon, unsigned int seed,
float** out_bestQ, size_t* out_bestQLen,
float* out_bestX, float* out_bestY, float* out_bestF
);
typedef void(__cdecl* P_FREE)(float*);
typedef int(__cdecl* P_INIT)(int, float, float, float, float);
typedef void(__cdecl* P_WAIT)();
typedef void(__cdecl* P_START)(
int nSegments,
bool variableLengths,
float deltaThetaMax,
float targetX, float targetY,
int peanoLevels, int maxIterPerBranch,
float r, bool adaptiveMode, float epsilon, unsigned int seed
);
namespace TESTAGP {
textpublic ref class MyForm : public Form { public: MyForm(HMODULE hLib) : hLib(hLib) { this->SetStyle(ControlStyles::AllPaintingInWmPaint | ControlStyles::UserPaint | ControlStyles::OptimizedDoubleBuffer, true); this->Text = L"AGP Manipulator 2D - Оптимизация положения захвата"; this->ClientSize = System::Drawing::Size(1000, 700); this->Resize += gcnew EventHandler(this, &MyForm::OnResize); fManip = (P_MANIP)GetProcAddress(hLib, "AGP_Manip2D"); pFree = (P_FREE)GetProcAddress(hLib, "AGP_Free"); pStart = (P_START)GetProcAddress(hLib, "AgpStartManipND"); angles = gcnew List<float>(0.0f); lengths = gcnew List<float>(0.0f); InitUI(); ResetRandomConfig(); } protected: ~MyForm() {} private: HMODULE hLib; P_MANIP fManip; P_FREE pFree; P_START pStart; int nSegments; bool variableLengths; List<float>^ angles; List<float>^ lengths; CheckBox^ cbVarLen; NumericUpDown^ nudMinTheta; NumericUpDown^ nudBaseLength; NumericUpDown^ nudStretchFactor; NumericUpDown^ nudTargetX; NumericUpDown^ nudTargetY; NumericUpDown^ nudLevels; NumericUpDown^ nudMaxIter; CheckBox^ cbAdaptive; NumericUpDown^ nudR; NumericUpDown^ nudEps; Button^ btnAdd; Button^ btnRem; Button^ btnOptimize; Label^ lblInfo; System::UInt32 rngState = 0xA5C39E0Du; void InitUI() { int y = 10, w = 180, h = 24, pad = 8; int currentX = 10; Label^ L; L = gcnew Label(); L->Text = L"Мин. угол между звеньями (рад)"; L->Left = currentX; L->Top = y; L->Width = w; this->Controls->Add(L); nudMinTheta = gcnew NumericUpDown(); nudMinTheta->Left = currentX; nudMinTheta->Top = y + h + 2; nudMinTheta->Width = w; nudMinTheta->DecimalPlaces = 3; nudMinTheta->Minimum = (Decimal)0.01; nudMinTheta->Maximum = (Decimal)3.14159; nudMinTheta->Value = (Decimal)0.5; this->Controls->Add(nudMinTheta); currentX += w + 20; L = gcnew Label(); L->Text = L"Базовая длина звена"; L->Left = currentX; L->Top = y; L->Width = w; this->Controls->Add(L); nudBaseLength = gcnew NumericUpDown(); nudBaseLength->Left = currentX; nudBaseLength->Top = y + h + 2; nudBaseLength->Width = w; nudBaseLength->DecimalPlaces = 2; nudBaseLength->Minimum = (Decimal)0.1; nudBaseLength->Maximum = (Decimal)3.0; nudBaseLength->Value = (Decimal)1.0; this->Controls->Add(nudBaseLength); currentX += w + 20; L = gcnew Label(); L->Text = L"Макс. коэф. растяжения/сжатия"; L->Left = currentX; L->Top = y; L->Width = w; this->Controls->Add(L); nudStretchFactor = gcnew NumericUpDown(); nudStretchFactor->Left = currentX; nudStretchFactor->Top = y + h + 2; nudStretchFactor->Width = w; nudStretchFactor->DecimalPlaces = 2; nudStretchFactor->Minimum = (Decimal)1.0; nudStretchFactor->Maximum = (Decimal)3.0; nudStretchFactor->Value = (Decimal)1.5; this->Controls->Add(nudStretchFactor); currentX += w + 20; cbVarLen = gcnew CheckBox(); cbVarLen->Text = L"Переменные длины звеньев"; cbVarLen->Left = currentX; cbVarLen->Top = y + h + 2; cbVarLen->Width = w; cbVarLen->Checked = false; this->Controls->Add(cbVarLen); currentX += w + 20; y += h * 2 + pad + 10; currentX = 10; L = gcnew Label(); L->Text = L"Целевая позиция X"; L->Left = currentX; L->Top = y; L->Width = w; this->Controls->Add(L); nudTargetX = gcnew NumericUpDown(); nudTargetX->Left = currentX; nudTargetX->Top = y + h + 2; nudTargetX->Width = w; nudTargetX->DecimalPlaces = 2; nudTargetX->Minimum = (Decimal)-10.0; nudTargetX->Maximum = (Decimal)10.0; nudTargetX->Value = (Decimal)3.5; this->Controls->Add(nudTargetX); currentX += w + 20; L = gcnew Label(); L->Text = L"Целевая позиция Y"; L->Left = currentX; L->Top = y; L->Width = w; this->Controls->Add(L); nudTargetY = gcnew NumericUpDown(); nudTargetY->Left = currentX; nudTargetY->Top = y + h + 2; nudTargetY->Width = w; nudTargetY->DecimalPlaces = 2; nudTargetY->Minimum = (Decimal)-10.0; nudTargetY->Maximum = (Decimal)10.0; nudTargetY->Value = (Decimal)1.0; this->Controls->Add(nudTargetY); currentX += w + 20; L = gcnew Label(); L->Text = L"Глубина развертки"; L->Left = currentX; L->Top = y; L->Width = w; this->Controls->Add(L); nudLevels = gcnew NumericUpDown(); nudLevels->Left = currentX; nudLevels->Top = y + h + 2; nudLevels->Width = w; nudLevels->Minimum = 4; nudLevels->Maximum = 20; nudLevels->Value = 12; this->Controls->Add(nudLevels); currentX += w + 20; L = gcnew Label(); L->Text = L"Параметр надежности (r)"; L->Left = currentX; L->Top = y; L->Width = w; this->Controls->Add(L); nudR = gcnew NumericUpDown(); nudR->Left = currentX; nudR->Top = y + h + 2; nudR->Width = w; nudR->DecimalPlaces = 2; nudR->Minimum = (Decimal)1.0; nudR->Maximum = (Decimal)10.0; nudR->Value = (Decimal)2.5; this->Controls->Add(nudR); currentX += w + 20; cbAdaptive = gcnew CheckBox(); cbAdaptive->Text = L"Адаптивная схема"; cbAdaptive->Left = currentX; cbAdaptive->Top = y + h + 2; cbAdaptive->Width = w; cbAdaptive->Checked = true; this->Controls->Add(cbAdaptive); y += h * 2 + pad + 10; currentX = 10; L = gcnew Label(); L->Text = L"Точность (epsilon)"; L->Left = currentX; L->Top = y; L->Width = w; this->Controls->Add(L); nudEps = gcnew NumericUpDown(); nudEps->Left = currentX; nudEps->Top = y + h + 2; nudEps->Width = w; nudEps->DecimalPlaces = 6; nudEps->Minimum = (Decimal)0.000001; nudEps->Maximum = (Decimal)0.1; nudEps->Value = (Decimal)0.0001; this->Controls->Add(nudEps); currentX += w + 20; L = gcnew Label(); L->Text = L"Макс. число итераций"; L->Left = currentX; L->Top = y; L->Width = w; this->Controls->Add(L); nudMaxIter = gcnew NumericUpDown(); nudMaxIter->Left = currentX; nudMaxIter->Top = y + h + 2; nudMaxIter->Width = w; nudMaxIter->Minimum = 10; nudMaxIter->Maximum = 50000; nudMaxIter->Value = 1000; this->Controls->Add(nudMaxIter); currentX += 200; btnAdd = gcnew Button(); btnAdd->Text = L"+ Звено"; btnAdd->Left = currentX; btnAdd->Top = y + h + 2; btnAdd->Width = 80; btnAdd->Click += gcnew EventHandler(this, &MyForm::OnAddClick); this->Controls->Add(btnAdd); currentX += 85; btnRem = gcnew Button(); btnRem->Text = L"- Звено"; btnRem->Left = currentX; btnRem->Top = y + h + 2; btnRem->Width = 80; btnRem->Click += gcnew EventHandler(this, &MyForm::OnRemClick); this->Controls->Add(btnRem); currentX += 125; btnOptimize = gcnew Button(); btnOptimize->Text = L"Оптимизировать"; btnOptimize->Left = currentX; btnOptimize->Top = y + h + 2; btnOptimize->Width = 120; btnOptimize->Click += gcnew EventHandler(this, &MyForm::OnOptimizeClick); this->Controls->Add(btnOptimize); currentX += 125; lblInfo = gcnew Label(); lblInfo->Left = currentX; lblInfo->Top = y; lblInfo->Width = 250; lblInfo->Height = 60; lblInfo->BorderStyle = BorderStyle::FixedSingle; lblInfo->Text = L"Готов к работе"; this->Controls->Add(lblInfo); } void ResetRandomConfig() { nSegments = 1; angles->Clear(); lengths->Clear(); angles->Add(RandAngle()); lengths->Add((float)nudBaseLength->Value); variableLengths = false; this->Invalidate(); } float Rand01() { rngState ^= rngState << 13; rngState ^= rngState >> 17; rngState ^= rngState << 5; return (float)((double)(unsigned)rngState / 4294967296.0); } float RandAngle() { return (Rand01() * 6.28318530717958647692f) - 3.14159265358979323846f; } System::Void OnResize(System::Object^ sender, System::EventArgs^ e) { this->Invalidate(); } System::Void OnAddClick(System::Object^ sender, System::EventArgs^ e) { ++nSegments; angles->Add(RandAngle()); lengths->Add((float)nudBaseLength->Value); this->Invalidate(); } System::Void OnRemClick(System::Object^ sender, System::EventArgs^ e) { if (nSegments > 1) { --nSegments; angles->RemoveAt(angles->Count - 1); lengths->RemoveAt(lengths->Count - 1); this->Invalidate(); } } System::Void OnOptimizeClick(System::Object^ sender, System::EventArgs^ e) { if (!fManip) return; variableLengths = cbVarLen->Checked; float minTheta = (float)nudMinTheta->Value; float tx = (float)nudTargetX->Value; float ty = (float)nudTargetY->Value; int levels = (int)nudLevels->Value; int maxIter = (int)nudMaxIter->Value; bool adaptive = cbAdaptive->Checked; float r_param = (float)nudR->Value; float eps = (float)nudEps->Value; unsigned int seed = (unsigned)GetTickCount(); if (pStart) { pStart( nSegments, variableLengths, minTheta, tx, ty, levels, maxIter, r_param, adaptive, eps, seed ); } LARGE_INTEGER t0, t1, fq; QueryPerformanceCounter(&t0); float* bestQ = nullptr; size_t bestQLen = 0; float bestX = 0, bestY = 0, bestF = 0; fManip( nSegments, variableLengths, minTheta, tx, ty, levels, maxIter, r_param, adaptive, eps, seed, &bestQ, &bestQLen, &bestX, &bestY, &bestF ); QueryPerformanceCounter(&t1); QueryPerformanceFrequency(&fq); double micros = 1e6 * (double)(t1.QuadPart - t0.QuadPart) / (double)fq.QuadPart; if (bestQ && bestQLen >= (size_t)nSegments) { angles->Clear(); for (int i = 0; i < nSegments; ++i) angles->Add(bestQ[i]); if (variableLengths) { lengths->Clear(); for (int i = 0; i < nSegments; ++i) lengths->Add(bestQ[nSegments + i]); } else { lengths->Clear(); for (int i = 0; i < nSegments; ++i) lengths->Add((float)nudBaseLength->Value); } if (pFree) pFree(bestQ); } lblInfo->Text = String::Format(L"Результат оптимизации:\nЦелевая функция: {0:F5}\nКонечная точка: ({1:F3}, {2:F3})\nВремя: {3:F0} мкс", bestF, bestX, bestY, micros); this->Invalidate(); } protected: virtual void OnPaint(PaintEventArgs^ e) override { Form::OnPaint(e); Graphics^ g = e->Graphics; g->SmoothingMode = System::Drawing::Drawing2D::SmoothingMode::HighQuality; g->Clear(this->BackColor); int topOffset = 150; System::Drawing::Rectangle drawArea = System::Drawing::Rectangle(0, topOffset, this->ClientSize.Width, this->ClientSize.Height - topOffset); g->FillRectangle(Brushes::White, drawArea); int leftWallX = drawArea.Left + this->ClientSize.Width * 25 / 100; Pen^ wallPen = gcnew Pen(Color::Black, 2); g->DrawLine(wallPen, leftWallX, drawArea.Top, leftWallX, drawArea.Bottom); HatchBrush^ hatchBrush = gcnew HatchBrush(HatchStyle::BackwardDiagonal, Color::LightGray, Color::White); int leftHatchWidth = 100; g->FillRectangle(hatchBrush, leftWallX - leftHatchWidth, drawArea.Top, leftHatchWidth, drawArea.Height); float targetX = (float)nudTargetX->Value; float targetY = (float)nudTargetY->Value; float scale = 160.0f; int baseX = leftWallX; int baseY = drawArea.Top + drawArea.Height / 2; float pixelTargetX = baseX + targetX * scale; float pixelTargetY = baseY - targetY * scale; int rightWallX = (int)(pixelTargetX + 8); rightWallX = Math::Min(rightWallX, drawArea.Right - 10); Pen^ dashedPen = gcnew Pen(Color::Black, 2); dashedPen->DashStyle = DashStyle::Dash; g->DrawLine(dashedPen, rightWallX, drawArea.Top, rightWallX, drawArea.Bottom); int rightHatchWidth = leftHatchWidth; g->FillRectangle(hatchBrush, rightWallX, drawArea.Top, rightHatchWidth, drawArea.Height); Pen^ targetPen = gcnew Pen(Color::Green, 1.5f); targetPen->DashStyle = DashStyle::Dot; g->DrawEllipse(targetPen, pixelTargetX - 8.0f, pixelTargetY - 8.0f, 16.0f, 16.0f); cli::array<PointF>^ pts = gcnew cli::array<PointF>(nSegments + 1); pts[0] = PointF((float)baseX, (float)baseY); float x = 0.0f, y = 0.0f, phi = 0.0f; for (int i = 0; i < nSegments; ++i) { array<float>^ anglesArray = angles->ToArray(); array<float>^ lengthsArray = lengths->ToArray(); float theta = anglesArray[i]; float L = lengthsArray[i]; phi += theta; x += L * (float)Math::Cos((double)phi); y += L * (float)Math::Sin((double)phi); pts[i + 1] = PointF(baseX + x * scale, baseY - y * scale); } Pen^ penRod = gcnew Pen(Color::Red, 6.0f); for (int i = 0; i < nSegments; ++i) g->DrawLine(penRod, pts[i], pts[i + 1]); SolidBrush^ brJoint = gcnew SolidBrush(Color::Blue); const float R = 6.0f; for (int i = 0; i <= nSegments; ++i) g->FillEllipse(brJoint, pts[i].X - R, pts[i].Y - R, 2 * R, 2 * R); delete wallPen; delete dashedPen; delete targetPen; delete penRod; delete brJoint; delete hatchBrush; } };
} - здесь происходит взаимодействие с интерфейсом - с ним взаимодействует только нулевой ранк как и должно быть - но обрати внимание каким был код когда мне нужно было тестировать чисто двумерный алгоритм: #pragma once
#define WIN32_LEAN_AND_MEAN
#include <Windows.h>
#include <algorithm>
#include <vector>
#include <utility>
namespace TESTAGP {
using namespace System;
using namespace System::IO;
textref class MyForm : public System::Windows::Forms::Form { public: MyForm(HMODULE hLib) : hLib(hLib) { // Передаем дескриптор из main InitializeComponent(); // Загружаем функции из DLL f = (agp_c)GetProcAddress(hLib, "AGP_2D"); pStart = (start_workers)GetProcAddress(hLib, "AgpStartWorkers"); pFree = (free_agp)GetProcAddress(hLib, "AGP_Free"); pInit = (PInit)GetProcAddress(hLib, "AgpInit"); } protected: ~MyForm() { delete components; } private: HINSTANCE hLib = nullptr; typedef void(__cdecl* agp_c)(float, float, float, float, float, float, float, bool, float, float, float**, size_t*); typedef void(__cdecl* start_workers)(); typedef void(__cdecl* free_agp)(float*); typedef int(__cdecl* PInit)(int, float, float, float, float); agp_c f = nullptr; start_workers pStart = nullptr; free_agp pFree = nullptr; PInit pInit = nullptr; System::Windows::Forms::Button^ button1; System::Windows::Forms::TextBox^ textBox2; System::Windows::Forms::DataVisualization::Charting::Chart^ chart2; System::Windows::Forms::Label^ label2; System::Windows::Forms::Label^ label3; System::Windows::Forms::TextBox^ textBox1; System::Windows::Forms::TextBox^ textBox3; System::Windows::Forms::TextBox^ textBox4; System::Windows::Forms::TextBox^ textBox5; System::Windows::Forms::TextBox^ textBox6; System::Windows::Forms::Label^ label6; System::Windows::Forms::Label^ label7; System::Windows::Forms::Label^ label8; System::Windows::Forms::Label^ label9; System::Windows::Forms::Label^ label10; System::Windows::Forms::Label^ label1; System::Windows::Forms::TextBox^ textBox7; System::Windows::Forms::TextBox^ textBox8; System::ComponentModel::Container^ components; System::Void button1_Click(System::Object^ sender, System::EventArgs^ e) { chart2->Series[0]->Points->Clear(); chart2->Series[1]->Points->Clear(); chart2->Series[2]->Points->Clear(); chart2->Series[3]->Points->Clear(); static LARGE_INTEGER start, end; QueryPerformanceCounter(&start); float* buf = nullptr; size_t len = 0u; pStart(); // разбудили rank != 0 f(2.0f, 10000.0f, -2.2f, 1.8f, -2.2f, 1.8f, 2.5f, false, 0.00001f, GetTickCount(), &buf, &len); if (buf != nullptr) { std::vector<float> Extr_2D(buf, buf + len); pFree(buf); // Для 2D ветки: извлекаем schetchick и interval_len textBox7->Text = Convert::ToString(Extr_2D.back()); // schetchick Extr_2D.pop_back(); textBox6->Text = Convert::ToString(Extr_2D.back()); // interval_len Extr_2D.pop_back(); // Извлекаем последнюю тройку (x2, x1, f) float x2 = Extr_2D.back(); Extr_2D.pop_back(); float x1 = Extr_2D.back(); Extr_2D.pop_back(); float f_val = Extr_2D.back(); Extr_2D.pop_back(); // Выводим координаты и значение функции textBox4->Text = Convert::ToString(x1); textBox3->Text = Convert::ToString(x2); textBox2->Text = Convert::ToString(f_val); // Новое поле для значения функции // Отображаем точку на графике chart2->Series[2]->Points->AddXY(x1, x2); // Обрабатываем остальные точки (в порядке: f, x1, x2) while (!Extr_2D.empty()) { float x2_point = Extr_2D.back(); Extr_2D.pop_back(); float x1_point = Extr_2D.back(); Extr_2D.pop_back(); float f_point = Extr_2D.back(); Extr_2D.pop_back(); // Добавляем точку на график (только координаты) chart2->Series[0]->Points->AddXY(x1_point, x2_point); } } QueryPerformanceCounter(&end); LARGE_INTEGER freq; QueryPerformanceFrequency(&freq); const int multiplier = ((1ULL << 32) / freq.QuadPart) * 1'000'000ULL; textBox5->Text = Convert::ToString( ((end.QuadPart - start.QuadPart) * multiplier) >> 32 ) + " microseconds"; //textBox2->Text = Convert::ToString(Extr_2D.back()); //textBox1->Text = Convert::ToString(Extr_2D_LNA.back()); }
#pragma region Windows Form Designer generated code
void InitializeComponent(void) {
System::Windows::Forms::DataVisualization::Charting::ChartArea^ chartArea1 = (gcnew System::Windows::Forms::DataVisualization::Charting::ChartArea());
System::Windows::Forms::DataVisualization::Charting::Legend^ legend1 = (gcnew System::Windows::Forms::DataVisualization::Charting::Legend());
System::Windows::Forms::DataVisualization::Charting::Series^ series1 = (gcnew System::Windows::Forms::DataVisualization::Charting::Series());
System::Windows::Forms::DataVisualization::Charting::Series^ series2 = (gcnew System::Windows::Forms::DataVisualization::Charting::Series());
System::Windows::Forms::DataVisualization::Charting::Series^ series3 = (gcnew System::Windows::Forms::DataVisualization::Charting::Series());
System::Windows::Forms::DataVisualization::Charting::Series^ series4 = (gcnew System::Windows::Forms::DataVisualization::Charting::Series());
this->button1 = (gcnew System::Windows::Forms::Button());
this->textBox2 = (gcnew System::Windows::Forms::TextBox());
this->chart2 = (gcnew System::Windows::Forms::DataVisualization::Charting::Chart());
this->label2 = (gcnew System::Windows::Forms::Label());
this->textBox1 = (gcnew System::Windows::Forms::TextBox());
this->textBox3 = (gcnew System::Windows::Forms::TextBox());
this->textBox4 = (gcnew System::Windows::Forms::TextBox());
this->textBox5 = (gcnew System::Windows::Forms::TextBox());
this->textBox6 = (gcnew System::Windows::Forms::TextBox());
this->label6 = (gcnew System::Windows::Forms::Label());
this->label7 = (gcnew System::Windows::Forms::Label());
this->label8 = (gcnew System::Windows::Forms::Label());
this->label9 = (gcnew System::Windows::Forms::Label());
this->label10 = (gcnew System::Windows::Forms::Label());
this->label1 = (gcnew System::Windows::Forms::Label());
this->textBox7 = (gcnew System::Windows::Forms::TextBox());
this->label3 = (gcnew System::Windows::Forms::Label());
this->textBox8 = (gcnew System::Windows::Forms::TextBox());
(cli::safe_castSystem::ComponentModel::ISupportInitialize^(this->chart2))->BeginInit();
this->SuspendLayout();
//
// button1
//
this->button1->BackColor = System::Drawing::SystemColors::Info;
this->button1->Cursor = System::Windows::Forms::Cursors::Hand;
this->button1->FlatAppearance->BorderColor = System::Drawing::Color::FromArgb(static_castSystem::Int32(static_castSystem::Byte(64)),
static_castSystem::Int32(static_castSystem::Byte(64)), static_castSystem::Int32(static_castSystem::Byte(64)));
this->button1->FlatAppearance->BorderSize = 3;
this->button1->FlatAppearance->MouseDownBackColor = System::Drawing::Color::FromArgb(static_castSystem::Int32(static_castSystem::Byte(128)),
static_castSystem::Int32(static_castSystem::Byte(128)), static_castSystem::Int32(static_castSystem::Byte(255)));
this->button1->FlatAppearance->MouseOverBackColor = System::Drawing::Color::FromArgb(static_castSystem::Int32(static_castSystem::Byte(192)),
static_castSystem::Int32(static_castSystem::Byte(192)), static_castSystem::Int32(static_castSystem::Byte(255)));
this->button1->FlatStyle = System::Windows::Forms::FlatStyle::Flat;
this->button1->Font = (gcnew System::Drawing::Font(L"Yu Gothic UI", 14.25F, System::Drawing::FontStyle::Bold));
this->button1->ForeColor = System::Drawing::SystemColors::ControlDarkDark;
this->button1->Location = System::Drawing::Point(897, 724);
this->button1->Name = L"button1";
this->button1->Size = System::Drawing::Size(275, 75);
this->button1->TabIndex = 2;
this->button1->Text = L"SOL";
this->button1->UseVisualStyleBackColor = false;
this->button1->Click += gcnew System::EventHandler(this, &MyForm::button1_Click);
//
// textBox2
//
this->textBox2->BackColor = System::Drawing::SystemColors::ControlLightLight;
this->textBox2->Cursor = System::Windows::Forms::Cursors::Hand;
this->textBox2->Font = (gcnew System::Drawing::Font(L"Yu Gothic UI", 12, System::Drawing::FontStyle::Bold));
this->textBox2->ForeColor = System::Drawing::Color::FromArgb(static_castSystem::Int32(static_castSystem::Byte(64)), static_castSystem::Int32(static_castSystem::Byte(0)),
static_castSystem::Int32(static_castSystem::Byte(64)));
this->textBox2->Location = System::Drawing::Point(992, 619);
this->textBox2->Name = L"textBox2";
this->textBox2->Size = System::Drawing::Size(180, 29);
this->textBox2->TabIndex = 4;
//
// chart2
//
this->chart2->BackColor = System::Drawing::SystemColors::ControlLight;
chartArea1->AxisX->Interval = 0.1;
chartArea1->AxisX->IsLabelAutoFit = false;
chartArea1->AxisX->LabelStyle->Font = (gcnew System::Drawing::Font(L"Yu Gothic", 6.75F));
chartArea1->AxisX->Maximum = 1.8;
chartArea1->AxisX->Minimum = -2.2;
chartArea1->AxisX->Title = L"x1";
chartArea1->AxisX->TitleFont = (gcnew System::Drawing::Font(L"Yu Gothic UI", 11.25F, System::Drawing::FontStyle::Bold));
chartArea1->AxisY->Interval = 0.1;
chartArea1->AxisY->IsLabelAutoFit = false;
chartArea1->AxisY->LabelStyle->Font = (gcnew System::Drawing::Font(L"Yu Gothic", 7.92F));
chartArea1->AxisY->Maximum = 1.8;
chartArea1->AxisY->Minimum = -2.2;
chartArea1->AxisY->Title = L"x2";
chartArea1->AxisY->TitleFont = (gcnew System::Drawing::Font(L"Yu Gothic UI", 11.25F, System::Drawing::FontStyle::Bold));
chartArea1->BackColor = System::Drawing::Color::FloralWhite;
chartArea1->BackGradientStyle = System::Windows::Forms::DataVisualization::Charting::GradientStyle::Center;
chartArea1->BackSecondaryColor = System::Drawing::Color::AliceBlue;
chartArea1->Name = L"ChartArea1";
this->chart2->ChartAreas->Add(chartArea1);
legend1->BackColor = System::Drawing::Color::Transparent;
legend1->BackGradientStyle = System::Windows::Forms::DataVisualization::Charting::GradientStyle::Center;
legend1->BackSecondaryColor = System::Drawing::SystemColors::ActiveCaption;
legend1->BorderColor = System::Drawing::Color::Transparent;
legend1->Font = (gcnew System::Drawing::Font(L"Yu Gothic UI", 11.25F, System::Drawing::FontStyle::Bold));
legend1->ForeColor = System::Drawing::SystemColors::ActiveCaptionText;
legend1->HeaderSeparator = System::Windows::Forms::DataVisualization::Charting::LegendSeparatorStyle::ThickGradientLine;
legend1->HeaderSeparatorColor = System::Drawing::Color::IndianRed;
legend1->IsTextAutoFit = false;
legend1->ItemColumnSeparator = System::Windows::Forms::DataVisualization::Charting::LegendSeparatorStyle::ThickGradientLine;
legend1->ItemColumnSeparatorColor = System::Drawing::Color::IndianRed;
legend1->Name = L"Legend1";
this->chart2->Legends->Add(legend1);
this->chart2->Location = System::Drawing::Point(12, 14);
this->chart2->Name = L"chart2";
series1->ChartArea = L"ChartArea1";
series1->ChartType = System::Windows::Forms::DataVisualization::Charting::SeriesChartType::FastPoint;
series1->Legend = L"Legend1";
series1->Name = L"Точки данных";
series2->ChartArea = L"ChartArea1";
series2->ChartType = System::Windows::Forms::DataVisualization::Charting::SeriesChartType::FastPoint;
series2->Legend = L"Legend1";
series2->Name = L"Точки данных LNA";
series3->ChartArea = L"ChartArea1";
series3->ChartType = System::Windows::Forms::DataVisualization::Charting::SeriesChartType::FastPoint;
series3->Legend = L"Legend1";
series3->Name = L"Точки данных(финал)";
series4->ChartArea = L"ChartArea1";
series4->ChartType = System::Windows::Forms::DataVisualization::Charting::SeriesChartType::FastPoint;
series4->Legend = L"Legend1";
series4->Name = L"Точки данных LNA(финал)";
this->chart2->Series->Add(series1);
this->chart2->Series->Add(series2);
this->chart2->Series->Add(series3);
this->chart2->Series->Add(series4);
this->chart2->Size = System::Drawing::Size(1160, 785);
this->chart2->TabIndex = 5;
//
// label2
//
this->label2->AutoSize = true;
this->label2->BackColor = System::Drawing::SystemColors::InactiveCaption;
this->label2->BorderStyle = System::Windows::Forms::BorderStyle::Fixed3D;
this->label2->Cursor = System::Windows::Forms::Cursors::Hand;
this->label2->FlatStyle = System::Windows::Forms::FlatStyle::Flat;
this->label2->Font = (gcnew System::Drawing::Font(L"Yu Gothic UI", 12, static_castSystem::Drawing::FontStyle(((System::Drawing::FontStyle::Bold | System::Drawing::FontStyle::Italic)
| System::Drawing::FontStyle::Underline))));
this->label2->ForeColor = System::Drawing::Color::DarkBlue;
this->label2->Location = System::Drawing::Point(946, 622);
this->label2->Name = L"label2";
this->label2->Size = System::Drawing::Size(40, 23);
this->label2->TabIndex = 8;
this->label2->Text = L"Extr";
//
// textBox1
//
this->textBox1->BackColor = System::Drawing::SystemColors::ControlLightLight;
this->textBox1->Cursor = System::Windows::Forms::Cursors::Hand;
this->textBox1->Font = (gcnew System::Drawing::Font(L"Yu Gothic UI", 12, System::Drawing::FontStyle::Bold));
this->textBox1->ForeColor = System::Drawing::Color::FromArgb(static_castSystem::Int32(static_castSystem::Byte(64)), static_castSystem::Int32(static_castSystem::Byte(0)),
static_castSystem::Int32(static_castSystem::Byte(64)));
this->textBox1->Location = System::Drawing::Point(992, 654);
this->textBox1->Name = L"textBox1";
this->textBox1->Size = System::Drawing::Size(180, 29);
this->textBox1->TabIndex = 13;
//
// textBox3
//
this->textBox3->BackColor = System::Drawing::SystemColors::ControlLightLight;
this->textBox3->Cursor = System::Windows::Forms::Cursors::Hand;
this->textBox3->Font = (gcnew System::Drawing::Font(L"Yu Gothic UI", 12, System::Drawing::FontStyle::Bold));
this->textBox3->ForeColor = System::Drawing::Color::FromArgb(static_castSystem::Int32(static_castSystem::Byte(64)), static_castSystem::Int32(static_castSystem::Byte(0)),
static_castSystem::Int32(static_castSystem::Byte(64)));
this->textBox3->Location = System::Drawing::Point(992, 584);
this->textBox3->Name = L"textBox3";
this->textBox3->Size = System::Drawing::Size(180, 29);
this->textBox3->TabIndex = 14;
//
// textBox4
//
this->textBox4->BackColor = System::Drawing::SystemColors::ControlLightLight;
this->textBox4->Cursor = System::Windows::Forms::Cursors::Hand;
this->textBox4->Font = (gcnew System::Drawing::Font(L"Yu Gothic UI", 12, System::Drawing::FontStyle::Bold));
this->textBox4->ForeColor = System::Drawing::Color::FromArgb(static_castSystem::Int32(static_castSystem::Byte(64)), static_castSystem::Int32(static_castSystem::Byte(0)),
static_castSystem::Int32(static_castSystem::Byte(64)));
this->textBox4->Location = System::Drawing::Point(992, 549);
this->textBox4->Name = L"textBox4";
this->textBox4->Size = System::Drawing::Size(180, 29);
this->textBox4->TabIndex = 15;
//
// textBox5
//
this->textBox5->BackColor = System::Drawing::SystemColors::ControlLightLight;
this->textBox5->Cursor = System::Windows::Forms::Cursors::Hand;
this->textBox5->Font = (gcnew System::Drawing::Font(L"Yu Gothic UI", 12, System::Drawing::FontStyle::Bold));
this->textBox5->ForeColor = System::Drawing::Color::FromArgb(static_castSystem::Int32(static_castSystem::Byte(64)), static_castSystem::Int32(static_castSystem::Byte(0)),
static_castSystem::Int32(static_castSystem::Byte(64)));
this->textBox5->Location = System::Drawing::Point(992, 514);
this->textBox5->Name = L"textBox5";
this->textBox5->Size = System::Drawing::Size(180, 29);
this->textBox5->TabIndex = 16;
//
// textBox6
//
this->textBox6->BackColor = System::Drawing::SystemColors::ControlLightLight;
this->textBox6->Cursor = System::Windows::Forms::Cursors::Hand;
this->textBox6->Font = (gcnew System::Drawing::Font(L"Yu Gothic UI", 12, System::Drawing::FontStyle::Bold));
this->textBox6->ForeColor = System::Drawing::Color::FromArgb(static_castSystem::Int32(static_castSystem::Byte(64)), static_castSystem::Int32(static_castSystem::Byte(0)),
static_castSystem::Int32(static_castSystem::Byte(64)));
this->textBox6->Location = System::Drawing::Point(992, 443);
this->textBox6->Name = L"textBox6";
this->textBox6->Size = System::Drawing::Size(180, 29);
this->textBox6->TabIndex = 17;
//
// label6
//
this->label6->AutoSize = true;
this->label6->BackColor = System::Drawing::SystemColors::InactiveCaption;
this->label6->BorderStyle = System::Windows::Forms::BorderStyle::Fixed3D;
this->label6->Cursor = System::Windows::Forms::Cursors::Hand;
this->label6->FlatStyle = System::Windows::Forms::FlatStyle::Flat;
this->label6->Font = (gcnew System::Drawing::Font(L"Yu Gothic UI", 12, static_castSystem::Drawing::FontStyle(((System::Drawing::FontStyle::Bold | System::Drawing::FontStyle::Italic)
| System::Drawing::FontStyle::Underline))));
this->label6->ForeColor = System::Drawing::Color::DarkBlue;
this->label6->Location = System::Drawing::Point(911, 657);
this->label6->Name = L"label6";
this->label6->Size = System::Drawing::Size(75, 23);
this->label6->TabIndex = 18;
this->label6->Text = L"Extr LNA";
//
// label7
//
this->label7->AutoSize = true;
this->label7->BackColor = System::Drawing::SystemColors::InactiveCaption;
this->label7->BorderStyle = System::Windows::Forms::BorderStyle::Fixed3D;
this->label7->Cursor = System::Windows::Forms::Cursors::Hand;
this->label7->FlatStyle = System::Windows::Forms::FlatStyle::Flat;
this->label7->Font = (gcnew System::Drawing::Font(L"Yu Gothic UI", 12, static_castSystem::Drawing::FontStyle(((System::Drawing::FontStyle::Bold | System::Drawing::FontStyle::Italic)
| System::Drawing::FontStyle::Underline))));
this->label7->ForeColor = System::Drawing::Color::DarkBlue;
this->label7->Location = System::Drawing::Point(957, 587);
this->label7->Name = L"label7";
this->label7->Size = System::Drawing::Size(29, 23);
this->label7->TabIndex = 19;
this->label7->Text = L"x2";
//
// label8
//
this->label8->AutoSize = true;
this->label8->BackColor = System::Drawing::SystemColors::InactiveCaption;
this->label8->BorderStyle = System::Windows::Forms::BorderStyle::Fixed3D;
this->label8->Cursor = System::Windows::Forms::Cursors::Hand;
this->label8->FlatStyle = System::Windows::Forms::FlatStyle::Flat;
this->label8->Font = (gcnew System::Drawing::Font(L"Yu Gothic UI", 12, static_castSystem::Drawing::FontStyle(((System::Drawing::FontStyle::Bold | System::Drawing::FontStyle::Italic)
| System::Drawing::FontStyle::Underline))));
this->label8->ForeColor = System::Drawing::Color::DarkBlue;
this->label8->Location = System::Drawing::Point(960, 552);
this->label8->Name = L"label8";
this->label8->Size = System::Drawing::Size(26, 23);
this->label8->TabIndex = 20;
this->label8->Text = L"x1";
//
// label9
//
this->label9->AutoSize = true;
this->label9->BackColor = System::Drawing::SystemColors::InactiveCaption;
this->label9->BorderStyle = System::Windows::Forms::BorderStyle::Fixed3D;
this->label9->Cursor = System::Windows::Forms::Cursors::Hand;
this->label9->FlatStyle = System::Windows::Forms::FlatStyle::Flat;
this->label9->Font = (gcnew System::Drawing::Font(L"Yu Gothic UI", 12, static_castSystem::Drawing::FontStyle(((System::Drawing::FontStyle::Bold | System::Drawing::FontStyle::Italic)
| System::Drawing::FontStyle::Underline))));
this->label9->ForeColor = System::Drawing::Color::DarkBlue;
this->label9->Location = System::Drawing::Point(903, 517);
this->label9->Name = L"label9";
this->label9->Size = System::Drawing::Size(83, 23);
this->label9->TabIndex = 21;
this->label9->Text = L"total time";
//
// label10
//
this->label10->AutoSize = true;
this->label10->BackColor = System::Drawing::SystemColors::InactiveCaption;
this->label10->BorderStyle = System::Windows::Forms::BorderStyle::Fixed3D;
this->label10->Cursor = System::Windows::Forms::Cursors::Hand;
this->label10->FlatStyle = System::Windows::Forms::FlatStyle::Flat;
this->label10->Font = (gcnew System::Drawing::Font(L"Yu Gothic UI", 12, static_castSystem::Drawing::FontStyle(((System::Drawing::FontStyle::Bold | System::Drawing::FontStyle::Italic)
| System::Drawing::FontStyle::Underline))));
this->label10->ForeColor = System::Drawing::Color::DarkBlue;
this->label10->Location = System::Drawing::Point(903, 446);
this->label10->Name = L"label10";
this->label10->Size = System::Drawing::Size(83, 23);
this->label10->TabIndex = 22;
this->label10->Text = L"iter count";
//
// label1
//
this->label1->AutoSize = true;
this->label1->BackColor = System::Drawing::SystemColors::InactiveCaption;
this->label1->BorderStyle = System::Windows::Forms::BorderStyle::Fixed3D;
this->label1->Cursor = System::Windows::Forms::Cursors::Hand;
this->label1->FlatStyle = System::Windows::Forms::FlatStyle::Flat;
this->label1->Font = (gcnew System::Drawing::Font(L"Yu Gothic UI", 12, static_castSystem::Drawing::FontStyle(((System::Drawing::FontStyle::Bold | System::Drawing::FontStyle::Italic)
| System::Drawing::FontStyle::Underline))));
this->label1->ForeColor = System::Drawing::Color::DarkBlue;
this->label1->Location = System::Drawing::Point(911, 692);
this->label1->Name = L"label1";
this->label1->Size = System::Drawing::Size(75, 23);
this->label1->TabIndex = 24;
this->label1->Text = L"accuracy";
//
// textBox7
//
this->textBox7->BackColor = System::Drawing::SystemColors::ControlLightLight;
this->textBox7->Cursor = System::Windows::Forms::Cursors::Hand;
this->textBox7->Font = (gcnew System::Drawing::Font(L"Yu Gothic UI", 12, System::Drawing::FontStyle::Bold));
this->textBox7->ForeColor = System::Drawing::Color::FromArgb(static_castSystem::Int32(static_castSystem::Byte(64)), static_castSystem::Int32(static_castSystem::Byte(0)),
static_castSystem::Int32(static_castSystem::Byte(64)));
this->textBox7->Location = System::Drawing::Point(992, 689);
this->textBox7->Name = L"textBox7";
this->textBox7->Size = System::Drawing::Size(180, 29);
this->textBox7->TabIndex = 23;
//
// label3
//
this->label3->AutoSize = true;
this->label3->BackColor = System::Drawing::SystemColors::InactiveCaption;
this->label3->BorderStyle = System::Windows::Forms::BorderStyle::Fixed3D;
this->label3->Cursor = System::Windows::Forms::Cursors::Hand;
this->label3->FlatStyle = System::Windows::Forms::FlatStyle::Flat;
this->label3->Font = (gcnew System::Drawing::Font(L"Yu Gothic UI", 12, static_castSystem::Drawing::FontStyle(((System::Drawing::FontStyle::Bold | System::Drawing::FontStyle::Italic)
| System::Drawing::FontStyle::Underline))));
this->label3->ForeColor = System::Drawing::Color::DarkBlue;
this->label3->Location = System::Drawing::Point(868, 481);
this->label3->Name = L"label3";
this->label3->Size = System::Drawing::Size(118, 23);
this->label3->TabIndex = 26;
this->label3->Text = L"iter count LNA";
//
// textBox8
//
this->textBox8->BackColor = System::Drawing::SystemColors::ControlLightLight;
this->textBox8->Cursor = System::Windows::Forms::Cursors::Hand;
this->textBox8->Font = (gcnew System::Drawing::Font(L"Yu Gothic UI", 12, System::Drawing::FontStyle::Bold));
this->textBox8->ForeColor = System::Drawing::Color::FromArgb(static_castSystem::Int32(static_castSystem::Byte(64)), static_castSystem::Int32(static_castSystem::Byte(0)),
static_castSystem::Int32(static_castSystem::Byte(64)));
this->textBox8->Location = System::Drawing::Point(992, 478);
this->textBox8->Name = L"textBox8";
this->textBox8->Size = System::Drawing::Size(180, 29);
this->textBox8->TabIndex = 25;
//
// MyForm
//
this->AutoScaleDimensions = System::Drawing::SizeF(6, 13);
this->AutoScaleMode = System::Windows::Forms::AutoScaleMode::Font;
this->ClientSize = System::Drawing::Size(1184, 811);
this->Controls->Add(this->label3);
this->Controls->Add(this->textBox8);
this->Controls->Add(this->label1);
this->Controls->Add(this->textBox7);
this->Controls->Add(this->label10);
this->Controls->Add(this->label9);
this->Controls->Add(this->label8);
this->Controls->Add(this->label7);
this->Controls->Add(this->label6);
this->Controls->Add(this->textBox6);
this->Controls->Add(this->textBox5);
this->Controls->Add(this->textBox4);
this->Controls->Add(this->textBox3);
this->Controls->Add(this->textBox1);
this->Controls->Add(this->label2);
this->Controls->Add(this->textBox2);
this->Controls->Add(this->button1);
this->Controls->Add(this->chart2);
this->Name = L"MyForm";
this->Text = L"MyForm";
(cli::safe_castSystem::ComponentModel::ISupportInitialize^(this->chart2))->EndInit();
this->ResumeLayout(false);
this->PerformLayout();
}
#pragma endregion
};
} // namespace TESTAGP
#include "MyForm.h"
#include <float.h>
using namespace System;
using namespace System::Windows::Forms;
typedef int(__cdecl* PInit)(int, float, float, float, float);
typedef void(__cdecl* PStartWorkers)();
[STAThread]
int main() {
HMODULE h = LoadLibraryW(L"TEST_FUNC.dll");
auto AgpInit = (PInit)GetProcAddress(h, "AgpInit");
auto AgpWaitStartAndRun = (PStartWorkers)GetProcAddress(h, "AgpWaitStartAndRun");
textconst int rank = AgpInit(12, -2.2f, 1.8f, -2.2f, 1.8f); if (!rank) { Application::EnableVisualStyles(); Application::SetCompatibleTextRenderingDefault(false); Application::Run(gcnew TESTAGP::MyForm(h)); } else { AgpWaitStartAndRun(); } return 0;
}
#include "pch.h"
#define XOR_RAND(state, result_var)
do {
int s = state;
s ^= s << 13;
s ^= s >> 17;
s ^= s << 5;
state = s;
result_var = state * 0x1.0p-32f;
} while(0)
#define XOR_RAND_GRSH(state, result_var)
do {
int s = state;
s ^= s << 13;
s ^= s >> 17;
s ^= s << 5;
state = s;
result_var = fmaf(state, 0x1.0p-31f, -1.0f);
} while(0)
#define FABE13_COS(x, result_var)
do {
const float abs_val = fabsf(x);
float reduced = fmodf(abs_val, 6.28318530718f);
if(reduced > 3.14159265359f) {
reduced = 6.28318530718f - reduced;
}
if(reduced < 1.57079632679f) {
const float val2 = reduced * reduced;
const float val4 = val2 * val2;
result_var = fmaf(val4, fmaf(val2, -0.0013888889f, 0.0416666667f), fmaf(val2, -0.5f, 1.0f));
} else {
reduced = 3.14159265359f - reduced;
const float val2 = reduced * reduced;
const float val4 = val2 * val2;
result_var = -fmaf(val4, fmaf(val2, -0.0013888889f, 0.0416666667f), fmaf(val2, -0.5f, 1.0f));
}
} while(0)
#define FABE13_SINCOS(in, sin_out, cos_out, n)
do {
int i = 0;
const int limit = n & ~7;
if(n >= 8) {
static __declspec(align(32)) const __m256 VEC_TWOPI = _mm256_set1_ps(6.28318530718f);
static __declspec(align(32)) const __m256 VEC_PI = _mm256_set1_ps(3.14159265359f);
static __declspec(align(32)) const __m256 VEC_PI_2 = _mm256_set1_ps(1.57079632679f);
static __declspec(align(32)) const __m256 INV_TWOPI = _mm256_set1_ps(0.15915494309189535f);
static __declspec(align(32)) const __m256 BIAS = _mm256_set1_ps(12582912.0f);
static __declspec(align(32)) const __m256 VEC_COS_P5 = _mm256_set1_ps(-0.0013888889f);
static __declspec(align(32)) const __m256 VEC_COS_P3 = _mm256_set1_ps(0.0416666667f);
static __declspec(align(32)) const __m256 VEC_COS_P1 = _mm256_set1_ps(-0.5f);
static __declspec(align(32)) const __m256 VEC_COS_P0 = _mm256_set1_ps(1.0f);
static __declspec(align(32)) const __m256 VEC_SIN_P5 = _mm256_set1_ps(-0.0001984127f);
static __declspec(align(32)) const __m256 VEC_SIN_P3 = _mm256_set1_ps(0.0083333333f);
static __declspec(align(32)) const __m256 VEC_SIN_P1 = _mm256_set1_ps(-0.16666666f);
static __declspec(align(32)) const __m256 VEC_SIN_P0 = _mm256_set1_ps(1.0f);
static __declspec(align(32)) const __m256 VEC_ZERO = _mm256_setzero_ps();
while(i < limit) {
const __m256 vx = _mm256_load_ps(&in[i]);
const __m256 vax = _mm256_andnot_ps(_mm256_set1_ps(-0.0f), vx);
__m256 q = _mm256_fmadd_ps(vax, INV_TWOPI, BIAS);
q = _mm256_sub_ps(q, BIAS);
const __m256 r = _mm256_fnmadd_ps(VEC_TWOPI, q, vax);
const __m256 r1 = _mm256_min_ps(r, _mm256_sub_ps(VEC_TWOPI, r));
const __m256 r2 = _mm256_min_ps(r1, _mm256_sub_ps(VEC_PI, r1));
const __m256 t2 = _mm256_mul_ps(r2, r2);
const __m256 cosv = _mm256_fmadd_ps(t2, _mm256_fmadd_ps(t2, _mm256_fmadd_ps(t2, VEC_COS_P5, VEC_COS_P3), VEC_COS_P1), VEC_COS_P0);
const __m256 sinv = _mm256_mul_ps(_mm256_fmadd_ps(t2, _mm256_fmadd_ps(t2, _mm256_fmadd_ps(t2, VEC_SIN_P5, VEC_SIN_P3), VEC_SIN_P1), VEC_SIN_P0), r2);
const __m256 cflip = _mm256_cmp_ps(r1, VEC_PI_2, _CMP_GT_OQ);
const __m256 sflip = _mm256_xor_ps(_mm256_cmp_ps(vx, VEC_ZERO, _CMP_LT_OQ), _mm256_cmp_ps(r, VEC_PI, _CMP_GT_OQ));
_mm256_store_ps(&cos_out[i], _mm256_blendv_ps(cosv, _mm256_sub_ps(VEC_ZERO, cosv), cflip));
_mm256_store_ps(&sin_out[i], _mm256_blendv_ps(sinv, _mm256_sub_ps(VEC_ZERO, sinv), sflip));
i += 8;
}
}
while(i < n) {
const float x = in[i];
const float ax = fabsf(x);
float q = fmaf(ax, 0.15915494309189535f, 12582912.0f);
q -= 12582912.0f;
float r = fmaf(-6.28318530718f, q, ax);
const bool sflip = r > 3.14159265359f;
if(sflip) r = 6.28318530718f - r;
const bool cflip = r > 1.57079632679f;
if(cflip) r = 3.14159265359f - r;
const float t2 = r * r;
const float c = fmaf(t2, fmaf(t2, fmaf(t2, -0.0013888889f, 0.0416666667f), -0.5f), 1.0f);
const float s = fmaf(t2, fmaf(t2, fmaf(t2, -0.0001984127f, 0.0083333333f), -0.16666666f), 1.0f) * r;
cos_out[i] = cflip ? -c : c;
sin_out[i] = ((x < 0.0f) ^ sflip) ? -s : s;
++i;
}
} while(0)
enum List : uint8_t { Top = 0b00u, Down = 0b01u, Left = 0b10u, Right = 0b11u };
__declspec(align(4)) struct Step final {
const uint8_t next;
const uint8_t dx;
const uint8_t dy;
};
__declspec(align(4)) struct InvStep final {
const uint8_t q;
const uint8_t next;
};
__declspec(align(64)) static const Step g_step_tbl[4][4] = {
{ {Right,0u,0u}, {Top,0u,1u}, {Top,1u,1u}, {Left,1u,0u} },
{ {Left,1u,1u}, {Down,1u,0u}, {Down,0u,0u}, {Right,0u,1u} },
{ {Down,1u,1u}, {Left,0u,1u}, {Left,0u,0u}, {Top,1u,0u} },
{ {Top,0u,0u}, {Right,1u,0u}, {Right,1u,1u}, {Down,0u,1u} }
};
__declspec(align(64)) static const InvStep g_inv_tbl[4][4] = {
{ {0u,Right}, {1u,Top}, {3u,Left}, {2u,Top} },
{ {2u,Down}, {3u,Right}, {1u,Down}, {0u,Left} },
{ {2u,Left}, {1u,Left}, {3u,Top}, {0u,Down} },
{ {0u,Top}, {3u,Down}, {1u,Right}, {2u,Right} }
};
static const boost::mpi::environment* __restrict g_env;
static const boost::mpi::communicator* __restrict g_world;
__declspec(align(16)) struct CrossMsg final {
float s_x1, s_x2;
float e_x1, e_x2;
float Rtop;
template<typename Archive>
__declspec(noalias) __forceinline void serialize(Archive& ar, const unsigned int) noexcept { ar& s_x1& s_x2& e_x1& e_x2& Rtop; }
};
__declspec(align(16)) struct CtrlMsg final {
bool kind;
CrossMsg xchg;
template<typename Archive>
__declspec(noalias) __forceinline void serialize(Archive& ar, const unsigned int) noexcept {
ar& kind& xchg;
}
};
__declspec(align(16)) struct Slab final {
char* const __restrict base;
char* __restrict current;
char* const __restrict end;
text__declspec(noalias) __forceinline Slab(void* const __restrict memory, const size_t usable_size) noexcept : base(static_cast<char* __restrict>(memory)) , current(base) , end(base + (usable_size & ~static_cast<size_t>(63u))) { }
};
static tbb::enumerable_thread_specific<Slab*> tls( noexcept {
void* const __restrict memory = _aligned_malloc(16777216u, 16u);
Slab* const __restrict slab = static_cast<Slab*>(_aligned_malloc(32u, 16u));
new (slab) Slab(memory, 16777216u);
char* __restrict p = slab->base;
#pragma loop(ivdep)
while (p < slab->end) {
*p = 0u;
p += 4096u;
}
return slab;
}());
__declspec(align(64)) struct Interval final {
const float x1;
const float x2;
const float y1;
const float y2;
const float delta_y;
const float ordinate_factor;
const float N_factor;
const float quadratic_term;
const float M;
float R;
text__declspec(noalias) __forceinline void* operator new(const size_t) noexcept { Slab* const __restrict s = tls.local(); char* const __restrict result = s->current; s->current += 64u; return result; } __declspec(noalias) __forceinline Interval(const float _x1, const float _x2, const float _y1, const float _y2, const float _N) noexcept : x1(_x1), x2(_x2), y1(_y1), y2(_y2) , delta_y(_y2 - _y1) , ordinate_factor(-(y1 + y2) * 2.0f) , N_factor(_N == 1.0f ? _x2 - _x1 : _N == 2.0f ? sqrtf(_x2 - _x1) : powf(_x2 - _x1, 1.0f / _N)) , quadratic_term((1.0f / N_factor)* delta_y* delta_y) , M((1.0f / N_factor)* fabsf(delta_y)) { } __declspec(noalias) __forceinline void ChangeCharacteristic(const float _m) noexcept { R = fmaf(1.0f / _m, quadratic_term, fmaf(_m, N_factor, ordinate_factor)); }
};
__declspec(align(16)) struct Peano2DMap final {
const int levels;
const float a, b, c, d;
const float lenx, leny;
const float inv_lenx;
const uint32_t scale;
const uint8_t start;
text__declspec(noalias) __forceinline Peano2DMap( const int L, const float _a, const float _b, const float _c, const float _d, const uint8_t startType ) noexcept : levels(L) , a(_a), b(_b), c(_c), d(_d) , lenx(_b - _a) , leny(_d - _c) , inv_lenx(1.0f / (_b - _a)) , scale(static_cast<uint32_t>(1u) << (L << 1)) , start(startType) { }
};
static Peano2DMap gActiveMap(0, 0.0f, 0.0f, 0.0f, 0.0f, 0b00u);
static __declspec(noalias) __forceinline bool ComparePtr(const Interval* const __restrict a, const Interval* const __restrict b) noexcept
{
return a->R < b->R;
}
static __declspec(noalias) __forceinline float ShekelFunc(const float x, const float seed) noexcept
{
int i = 0;
float current_state = seed, current_res, current_res2, res = 0.0f;
while (i < 10) {
XOR_RAND(current_state, current_res);
const float x_part = fmaf(-current_res, 10.0f, x);
XOR_RAND(current_state, current_res);
XOR_RAND(current_state, current_res2);
float delimiter = fmaf(fmaf(current_res, 20.0f, 5.0f), x_part * x_part, fmaf(current_res2, 0.2f, 1.0f));
delimiter = copysignf(fmaxf(fabsf(delimiter), FLT_MIN), delimiter);
res -= 1.0f / delimiter;
++i;
}
return res;
}
static __declspec(noalias) __forceinline float RastriginFunc(const float x1, const float x2) noexcept
{
const float term1 = fmaf(x1, x1, x2 * x2);
float cos1, cos2;
FABE13_COS(6.28318530717958647692f * x1, cos1);
FABE13_COS(6.28318530717958647692f * x2, cos2);
return (term1 - fmaf(cos1 + cos2, 10.0f, -14.6f)) * fmaf(-term1, 0.25f, 18.42f);
}
static __declspec(noalias) __forceinline float HillFunc(const float x, const float seed) noexcept
{
int j = 0;
__declspec(align(32)) float angles[14u];
const float start_angle = 6.28318530717958647692f * x;
#pragma loop(ivdep)
while (j < 14) {
angles[j] = start_angle * static_cast<float>(j + 1);
++j;
}
__declspec(align(32)) float sin_vals[14u];
__declspec(align(32)) float cos_vals[14u];
FABE13_SINCOS(angles, sin_vals, cos_vals, 14u);
float current_state = seed, current_res, current_res2;
XOR_RAND(current_state, current_res);
float res = fmaf(current_res, 2.0f, -1.1f);
--j;
while (j >= 0) {
XOR_RAND(current_state, current_res);
XOR_RAND(current_state, current_res2);
res += fmaf(fmaf(current_res, 2.0f, -1.1f), sin_vals[j], fmaf(current_res2, 2.0f, -1.1f) * cos_vals[j]);
--j;
}
return res;
}
static __declspec(noalias) __forceinline float GrishaginFunc(const float x1, const float x2, const float seed) noexcept
{
int j = 0;
__declspec(align(32)) float angles_j[8u];
__declspec(align(32)) float angles_k[8u];
#pragma loop(ivdep)
while (j < 8) {
const float pj_mult = 3.14159265358979323846f * static_cast<float>(j + 1);
angles_j[j] = pj_mult * x1;
angles_k[j] = pj_mult * x2;
++j;
}
__declspec(align(32)) float sin_j[8u], cos_j[8u];
__declspec(align(32)) float sin_k[8u], cos_k[8u];
FABE13_SINCOS(angles_j, sin_j, cos_j, 8u);
FABE13_SINCOS(angles_k, sin_k, cos_k, 8u);
--j;
float part1 = 0.0f;
float part2 = 0.0f;
float current_state = seed, current_res, current_res2;
while (j >= 0) {
size_t k = 0u;
while (k < 8u) {
const float sin_term = sin_j[j] * sin_j[j];
const float cos_term = cos_k[k] * cos_k[k];
XOR_RAND_GRSH(current_state, current_res);
XOR_RAND_GRSH(current_state, current_res2);
part1 = fmaf(current_res, sin_term, fmaf(current_res2, cos_term, part1));
XOR_RAND_GRSH(current_state, current_res);
XOR_RAND_GRSH(current_state, current_res2);
part2 = fmaf(-current_res, cos_term, fmaf(current_res2, sin_term, part2));
++k;
}
--j;
}
return -sqrtf(fmaf(part1, part1, part2 * part2));
}
static __declspec(noalias) __forceinline float Shag(const float _m, const float x1, const float x2, const float y1,
const float y2, const float _N, const float _r) noexcept
{
const float diff = y2 - y1;
const float sign_mult = _mm_cvtss_f32(_mm_castsi128_ps(_mm_set1_epi32(
0x3F800000u | ((reinterpret_cast<const uint32_t>(&diff) & 0x80000000u) ^ 0x80000000u))));
return _N == 1.0f
? fmaf(-(1.0f / _m), diff, x1 + x2) * 0.5f
: _N == 2.0f
? fmaf(sign_mult / (_m * _m), diff * diff * _r, x1 + x2) * 0.5f
: fmaf(sign_mult / powf(_m, _N), powf(diff, _N) * _r, x1 + x2) * 0.5f;
}
static __declspec(noalias) __forceinline void RecomputeR_ConstM_AVX2(Interval* const* const __restrict arr, const size_t n, const float m) noexcept {
const __m256 vm = _mm256_set1_ps(m);
text__m256 vinvm = _mm256_rcp_ps(vm); vinvm = _mm256_mul_ps(vinvm, _mm256_fnmadd_ps(vm, vinvm, _mm256_set1_ps(2.0f))); size_t i = 0; const int limit = static_cast<int>(n & ~7u); if (n >= 8u) { while (i < static_cast<size_t>(limit)) { __declspec(align(32)) float q[8], nf[8], ord[8]; int k = 0;
#pragma loop(ivdep)
while (k < 8) {
const Interval* const __restrict p = arr[i + k];
q[k] = p->quadratic_term;
nf[k] = p->N_factor;
ord[k] = p->ordinate_factor;
++k;
}
const __m256 vq = _mm256_load_ps(q);
const __m256 vnf = _mm256_load_ps(nf);
const __m256 vod = _mm256_load_ps(ord);
textconst __m256 t = _mm256_fmadd_ps(vm, vnf, vod); const __m256 res = _mm256_fmadd_ps(vq, vinvm, t); __declspec(align(32)) float out[8]; _mm256_store_ps(out, res); k = 0;
#pragma loop(ivdep)
while (k < 8) {
arr[i + k]->R = out[k];
++k;
}
i += 8u;
}
}
while (i < n) {
arr[i]->ChangeCharacteristic(m);
++i;
}
}
static __declspec(noalias) __forceinline void RecomputeR_AffineM_AVX2(Interval* const* const __restrict arr, const size_t n, const float GLOBAL_FACTOR, const float alpha) noexcept {
const __m256 vGF = _mm256_set1_ps(GLOBAL_FACTOR);
const __m256 va = _mm256_set1_ps(alpha);
textsize_t i = 0; const int limit = static_cast<int>(n & ~7u); if (n >= 8u) { while (i < static_cast<size_t>(limit)) { __declspec(align(32)) float len[8], Mv[8], q[8], nf[8], ord[8]; int k = 0;
#pragma loop(ivdep)
while (k < 8) {
const Interval* const p = arr[i + k];
len[k] = p->x2 - p->x1;
Mv[k] = p->M;
q[k] = p->quadratic_term;
nf[k] = p->N_factor;
ord[k] = p->ordinate_factor;
++k;
}
const __m256 vlen = _mm256_load_ps(len);
const __m256 vM = _mm256_load_ps(Mv);
const __m256 vq = _mm256_load_ps(q);
const __m256 vnf = _mm256_load_ps(nf);
const __m256 vod = _mm256_load_ps(ord);
textconst __m256 vm = _mm256_fmadd_ps(vGF, vlen, _mm256_mul_ps(va, vM)); __m256 vinvm = _mm256_rcp_ps(vm); vinvm = _mm256_mul_ps(vinvm, _mm256_fnmadd_ps(vm, vinvm, _mm256_set1_ps(2.0f))); const __m256 t = _mm256_fmadd_ps(vm, vnf, vod); const __m256 res = _mm256_fmadd_ps(vq, vinvm, t); __declspec(align(32)) float out[8]; _mm256_store_ps(out, res); k = 0;
#pragma loop(ivdep)
while (k < 8) {
arr[i + k]->R = out[k];
++k;
}
i += 8u;
}
}
while (i < n) {
const Interval* const __restrict p = arr[i];
const float mi = fmaf(GLOBAL_FACTOR, p->x2 - p->x1, p->M * alpha);
arr[i]->R = fmaf(1.0f / mi, p->quadratic_term, fmaf(mi, p->N_factor, p->ordinate_factor));
++i;
}
}
__declspec(noalias) __forceinline void HitTest2D_analytic(const float x_param, float& out_x1, float& out_x2) noexcept
{
const float a = gActiveMap.a;
const float inv_lenx = gActiveMap.inv_lenx;
const uint32_t scale = gActiveMap.scale;
const uint32_t scale_minus_1 = scale - 1u;
const float lenx = gActiveMap.lenx;
const float leny = gActiveMap.leny;
const float c = gActiveMap.c;
const uint8_t start = gActiveMap.start;
const int levels = gActiveMap.levels;
textfloat norm = (x_param - a) * inv_lenx; norm = fminf(fmaxf(norm, 0.0f), 0x1.fffffep-1f); uint32_t idx = static_cast<uint32_t>(norm * static_cast<float>(scale)); idx = idx > scale_minus_1 ? scale_minus_1 : idx; float sx = lenx, sy = leny; float x1 = a, x2 = c; uint8_t type = start; int l = levels - 1;
#pragma loop(ivdep)
while (l >= 0) {
const uint32_t q = (idx >> (l * 2)) & 3u;
const Step s = g_step_tbl[type][q];
type = s.next;
sx *= 0.5f; sy *= 0.5f;
x1 += s.dx ? sx : 0.0f;
x2 += s.dy ? sy : 0.0f;
--l;
}
out_x1 = x1 + sx * 0.5f;
out_x2 = x2 + sy * 0.5f;
}
__declspec(noalias) __forceinline float FindX2D_analytic(const float px, const float py) noexcept
{
const float a = gActiveMap.a;
const float b = gActiveMap.b;
const float c = gActiveMap.c;
const float d = gActiveMap.d;
const float lenx = gActiveMap.lenx;
const float leny = gActiveMap.leny;
const uint32_t scale = gActiveMap.scale;
const uint8_t start = gActiveMap.start;
const int levels = gActiveMap.levels;
textconst float clamped_px = fminf(fmaxf(px, a), b); const float clamped_py = fminf(fmaxf(py, c), d); float sx = lenx, sy = leny; float x0 = a, y0 = c; uint32_t idx = 0u; uint8_t type = start; int l = 0;
#pragma loop(ivdep)
while (l < levels) {
sx *= 0.5f; sy *= 0.5f;
const float mx = x0 + sx;
const float my = y0 + sy;
textconst uint32_t tr = static_cast<uint32_t>((clamped_px > mx) & (clamped_py > my)); const uint32_t tl = static_cast<uint32_t>((clamped_px < mx) & (clamped_py > my)); const uint32_t dl = static_cast<uint32_t>((clamped_px < mx) & (clamped_py < my)); const uint32_t none = static_cast<uint32_t>(1u ^ (tr | tl | dl)); const uint32_t dd = (tr << 1) | tr | tl | (none << 1); const InvStep inv = g_inv_tbl[type][dd]; type = inv.next; idx = (idx << 2) | inv.q; const uint32_t dx = dd >> 1; const uint32_t dy = dd & 1u; x0 += dx ? sx : 0.0f; y0 += dy ? sy : 0.0f; ++l; } const float scale_reciprocal = 1.0f / static_cast<float>(scale); return fmaf(static_cast<float>(idx) * scale_reciprocal, lenx, a);
}
extern "C" __declspec(dllexport) __declspec(noalias) __forceinline int AgpInit(const int peanoLevel, const float a, const float b, const float c, const float d) noexcept
{
g_env = new boost::mpi::environment();
g_world = new boost::mpi::communicator();
const int rank = g_world->rank();
textswitch (rank) { case 0: new(&gActiveMap) Peano2DMap(peanoLevel, a, b, c, d, static_cast<uint8_t>(Top)); break; case 1: new(&gActiveMap) Peano2DMap(peanoLevel, a, b, c, d, static_cast<uint8_t>(Down)); break; case 2: new(&gActiveMap) Peano2DMap(peanoLevel, a, b, c, d, static_cast<uint8_t>(Left)); break; default: new(&gActiveMap) Peano2DMap(peanoLevel, a, b, c, d, static_cast<uint8_t>(Right)); } return rank;
}
extern "C" __declspec(dllexport) __declspec(noalias)
void AGP_1D(const float global_iterations,
const float a, const float b, const float r,
const bool mode, const float epsilon, const float seed,
float** const __restrict out_data, size_t* const __restrict out_len) noexcept
{
Slab* const __restrict slab = tls.local();
slab->current = slab->base;
textint schetchick = 0; const float initial_length = b - a; float dmax = initial_length; const float threshold_03 = 0.3f * initial_length; const float inv_threshold_03 = 1.0f / threshold_03; const float start_val = ShekelFunc(a, seed); float best_f = ShekelFunc(b, seed); float x_Rmax_1 = a; float x_Rmax_2 = b; float y_Rmax_1 = start_val; float y_Rmax_2 = best_f; std::vector<float, boost::alignment::aligned_allocator<float, 16u>> Extr; std::vector<Interval* __restrict, boost::alignment::aligned_allocator<Interval* __restrict, 64u>> R; Extr.clear(); Extr.reserve(static_cast<size_t>(global_iterations) << 2u); R.clear(); R.reserve(static_cast<size_t>(global_iterations) << 1u); R.emplace_back(new Interval(a, b, start_val, best_f, 1.0f)); float Mmax = R.front()->M; float m = r * Mmax; while (true) { const float new_point = Shag(m, x_Rmax_1, x_Rmax_2, y_Rmax_1, y_Rmax_2, 1.0f, r); const float new_value = ShekelFunc(new_point, seed); if (new_value < best_f) { best_f = new_value; Extr.emplace_back(best_f); Extr.emplace_back(new_point); } std::pop_heap(R.begin(), R.end(), ComparePtr); const Interval* const __restrict promejutochny_otrezok = R.back(); const float new_x1 = promejutochny_otrezok->x1; const float new_x2 = promejutochny_otrezok->x2; const float len2 = new_x2 - new_point; const float len1 = new_point - new_x1; const float interval_len = len1 < len2 ? len1 : len2; if (++schetchick == static_cast<int>(global_iterations) || interval_len < epsilon) { Extr.emplace_back(static_cast<float>(schetchick)); Extr.emplace_back(interval_len); *out_len = Extr.size(); *out_data = reinterpret_cast<float* __restrict>(CoTaskMemAlloc(sizeof(float) * (*out_len))); memcpy(*out_data, Extr.data(), sizeof(float) * (*out_len)); return; } Interval* const __restrict curr = new Interval(new_x1, new_point, promejutochny_otrezok->y1, new_value, 1.0f); Interval* const __restrict curr1 = new Interval(new_point, new_x2, new_value, promejutochny_otrezok->y2, 1.0f); const float currM = curr->M > curr1->M ? curr->M : curr1->M; const size_t r_size = R.size(); if (mode) { if (len2 + len1 == dmax) { dmax = len2 > len1 ? len2 : len1; size_t i = 0u;
#pragma loop(ivdep)
while (i < r_size) {
const float len_item = R[i]->x2 - R[i]->x1;
if (len_item > dmax) dmax = len_item;
++i;
}
}
textif (threshold_03 > dmax && !(schetchick % 3) || 10.0f * dmax < initial_length) { if (currM > Mmax) { Mmax = currM; m = r * Mmax; } const float progress = fmaf(-inv_threshold_03, dmax, 1.0f); const float alpha = fmaf(progress, progress, 1.0f); const float betta = 2.0f - alpha; const float MULTIPLIER = (1.0f / dmax) * Mmax; const float global_coeff = fmaf(MULTIPLIER, r, -MULTIPLIER); const float GLOBAL_FACTOR = betta * global_coeff; curr->ChangeCharacteristic(fmaf(GLOBAL_FACTOR, len1, curr->M * alpha)); curr1->ChangeCharacteristic(fmaf(GLOBAL_FACTOR, len2, curr1->M * alpha)); if (r_size < 64u) { size_t i = 0u;
#pragma loop(ivdep)
while (i < r_size) {
R[i]->ChangeCharacteristic(fmaf(GLOBAL_FACTOR, R[i]->x2 - R[i]->x1, R[i]->M * alpha));
++i;
}
}
else {
RecomputeR_AffineM_AVX2(R.data(), r_size, GLOBAL_FACTOR, alpha);
}
std::make_heap(R.begin(), R.end(), ComparePtr);
}
else {
if (currM > Mmax) {
if (currM < 1.15f * Mmax) {
Mmax = currM;
m = r * Mmax;
curr->ChangeCharacteristic(m);
curr1->ChangeCharacteristic(m);
}
else {
Mmax = currM;
m = r * Mmax;
curr->ChangeCharacteristic(m);
curr1->ChangeCharacteristic(m);
if (r_size < 64u) {
size_t i = 0u;
#pragma loop(ivdep)
while (i < r_size) {
R[i]->ChangeCharacteristic(m);
++i;
}
}
else {
RecomputeR_ConstM_AVX2(R.data(), r_size, m);
}
std::make_heap(R.begin(), R.end(), ComparePtr);
}
}
else {
curr->ChangeCharacteristic(m);
curr1->ChangeCharacteristic(m);
}
}
}
else {
if (currM > Mmax) {
if (currM < 1.15f * Mmax) {
Mmax = currM;
m = r * Mmax;
curr->ChangeCharacteristic(m);
curr1->ChangeCharacteristic(m);
}
else {
Mmax = currM;
m = r * Mmax;
curr->ChangeCharacteristic(m);
curr1->ChangeCharacteristic(m);
if (r_size < 64u) {
size_t i = 0u;
#pragma loop(ivdep)
while (i < r_size) {
R[i]->ChangeCharacteristic(m);
++i;
}
}
else {
RecomputeR_ConstM_AVX2(R.data(), r_size, m);
}
std::make_heap(R.begin(), R.end(), ComparePtr);
}
}
else {
curr->ChangeCharacteristic(m);
curr1->ChangeCharacteristic(m);
}
}
textR.back() = curr; std::push_heap(R.begin(), R.end(), ComparePtr); R.emplace_back(curr1); std::push_heap(R.begin(), R.end(), ComparePtr); const Interval* const __restrict top_ptr = R.front(); x_Rmax_1 = top_ptr->x1; x_Rmax_2 = top_ptr->x2; y_Rmax_1 = top_ptr->y1; y_Rmax_2 = top_ptr->y2; }
}
extern "C" __declspec(dllexport) __declspec(noalias)
void AGP_2D(
const float N, const float global_iterations,
const float a, const float b, const float c, const float d, const float r,
const bool mode, const float epsilon, const float seed,
float** const __restrict out_data, size_t* const __restrict out_len) noexcept
{
Slab* const __restrict slab = tls.local();
slab->current = slab->base;
textint schetchick = 0; int no_improve = 0; const int rank = g_world->rank(); const int world_size = g_world->size(); while (g_world->iprobe(boost::mpi::any_source, 0)) { CtrlMsg dummy; g_world->recv(boost::mpi::any_source, 0, dummy); } const float inv_divider = ldexpf(1.0f, -((gActiveMap.levels << 1) + 1)); const float x_addition = (b - a) * inv_divider; const float y_addition = (d - c) * inv_divider; const float true_start = a + x_addition; const float true_end = b - x_addition; float x_Rmax_1 = true_start; float x_Rmax_2 = true_end; const float initial_length = x_Rmax_2 - x_Rmax_1; float dmax = initial_length; const float threshold_03 = 0.3f * initial_length; const float inv_threshold_03 = 1.0f / threshold_03; const float start_val = rank % 3 ? RastriginFunc(true_end, d - y_addition) : RastriginFunc(true_start, c + y_addition); float best_f = rank % 2 ? RastriginFunc(true_start, d - y_addition) : RastriginFunc(true_end, c + y_addition); float y_Rmax_1 = start_val; float y_Rmax_2 = best_f; std::vector<float, boost::alignment::aligned_allocator<float, 16u>> Extr; std::vector<Interval* __restrict, boost::alignment::aligned_allocator<Interval* __restrict, 64u>> R; Extr.clear(); Extr.reserve(static_cast<size_t>(global_iterations) << 2u); R.clear(); R.reserve(static_cast<size_t>(global_iterations) << 1u); R.emplace_back(new Interval(true_start, true_end, start_val, best_f, 2.0f)); const Interval* __restrict top_ptr; float Mmax = R.front()->M; float m = r * Mmax; while (true) { const float interval_len = x_Rmax_2 - x_Rmax_1; const bool stagnation = no_improve > 100 && schetchick > 270; const float p = fmaf(-1.0f / initial_length, dmax, 1.0f); while (g_world->iprobe(boost::mpi::any_source, 0)) { CtrlMsg in; g_world->recv(boost::mpi::any_source, 0, in); if (in.kind) { if (!rank) { Extr.emplace_back(static_cast<float>(schetchick)); Extr.emplace_back(interval_len); *out_len = Extr.size(); *out_data = reinterpret_cast<float* __restrict>(CoTaskMemAlloc(sizeof(float) * (*out_len))); memcpy(*out_data, Extr.data(), sizeof(float) * (*out_len)); } return; } const float sx = FindX2D_analytic(in.xchg.s_x1, in.xchg.s_x2); const float ex = FindX2D_analytic(in.xchg.e_x1, in.xchg.e_x2); Interval* const __restrict injected = new Interval(sx, ex, RastriginFunc(in.xchg.s_x1, in.xchg.s_x2), RastriginFunc(in.xchg.e_x1, in.xchg.e_x2), 2.0f); injected->ChangeCharacteristic(m); if (injected->R > 1.15f * top_ptr->R) { const float k = stagnation ? fmaf((1.0f / expm1f(1.0f)) * 1.0f, expm1f(p), 0.3f) : fmaf((1.0f / expm1f(1.0f)) * 0.6f, expm1f(p), 0.6f); injected->R = in.xchg.Rtop * k; R.emplace_back(injected); std::push_heap(R.begin(), R.end(), ComparePtr); } } const int T = static_cast<int>(fmaf(-expm1f(p), 264.0f, 277.0f)); const bool want_term = interval_len < epsilon || schetchick == static_cast<int>(global_iterations); if (!(++schetchick % T) || stagnation || want_term) { CtrlMsg out; out.kind = want_term; if (!out.kind) { float s_x1, s_x2, e_x1, e_x2; HitTest2D_analytic(top_ptr->x1, s_x1, s_x2); HitTest2D_analytic(top_ptr->x2, e_x1, e_x2); out.xchg = CrossMsg{ s_x1, s_x2, e_x1, e_x2, top_ptr->R }; } int i = 0; while (i < world_size) { if (i != rank) g_world->isend(i, 0, out); ++i; } if (out.kind) { if (!rank) { Extr.emplace_back(static_cast<float>(schetchick)); Extr.emplace_back(interval_len); *out_len = Extr.size(); *out_data = reinterpret_cast<float* __restrict>(CoTaskMemAlloc(sizeof(float) * (*out_len))); memcpy(*out_data, Extr.data(), sizeof(float) * (*out_len)); } return; } } const float new_point = Shag(m, x_Rmax_1, x_Rmax_2, y_Rmax_1, y_Rmax_2, 2.0f, r); float new_x1_val, new_x2_val; HitTest2D_analytic(new_point, new_x1_val, new_x2_val); const float new_value = RastriginFunc(new_x1_val, new_x2_val); if (new_value < best_f) { best_f = new_value; Extr.emplace_back(best_f); Extr.emplace_back(new_x1_val); Extr.emplace_back(new_x2_val); no_improve = 0; } else { ++no_improve; } std::pop_heap(R.begin(), R.end(), ComparePtr); Interval* const __restrict promejutochny_otrezok = R.back(); const float segment_x1 = promejutochny_otrezok->x1; const float segment_x2 = promejutochny_otrezok->x2; const float len2 = segment_x2 - new_point; const float len1 = new_point - segment_x1; Interval* const __restrict curr = new Interval(segment_x1, new_point, promejutochny_otrezok->y1, new_value, 2.0f); Interval* const __restrict curr1 = new Interval(new_point, segment_x2, new_value, promejutochny_otrezok->y2, 2.0f); const float currM = curr->M > curr1->M ? curr->M : curr1->M; const size_t r_size = R.size(); if (mode) { if (len2 + len1 == dmax) { dmax = len2 > len1 ? len2 : len1; size_t i = 0u;
#pragma loop(ivdep)
while (i < r_size) {
const float len_item = R[i]->x2 - R[i]->x1;
if (len_item > dmax) dmax = len_item;
++i;
}
}
textif (threshold_03 > dmax && !(schetchick % 3) || 10.0f * dmax < initial_length) { if (currM > Mmax) { Mmax = currM; m = r * Mmax; } const float progress = fmaf(-inv_threshold_03, dmax, 1.0f); const float alpha = fmaf(progress, progress, 1.0f); const float betta = 2.0f - alpha; const float MULTIPLIER = (1.0f / dmax) * Mmax; const float global_coeff = fmaf(MULTIPLIER, r, -MULTIPLIER); const float GLOBAL_FACTOR = betta * global_coeff; curr->ChangeCharacteristic(fmaf(GLOBAL_FACTOR, len1, curr->M * alpha)); curr1->ChangeCharacteristic(fmaf(GLOBAL_FACTOR, len2, curr1->M * alpha)); if (r_size < 64u) { size_t i = 0u;
#pragma loop(ivdep)
while (i < r_size) {
R[i]->ChangeCharacteristic(fmaf(GLOBAL_FACTOR, R[i]->x2 - R[i]->x1, R[i]->M * alpha));
++i;
}
}
else {
RecomputeR_AffineM_AVX2(R.data(), r_size, GLOBAL_FACTOR, alpha);
}
std::make_heap(R.begin(), R.end(), ComparePtr);
}
else {
if (currM > Mmax) {
if (currM < 1.15f * Mmax) {
Mmax = currM;
m = r * Mmax;
curr->ChangeCharacteristic(m);
curr1->ChangeCharacteristic(m);
}
else {
Mmax = currM;
m = r * Mmax;
curr->ChangeCharacteristic(m);
curr1->ChangeCharacteristic(m);
if (r_size < 64u) {
size_t i = 0u;
#pragma loop(ivdep)
while (i < r_size) {
R[i]->ChangeCharacteristic(m);
++i;
}
}
else {
RecomputeR_ConstM_AVX2(R.data(), r_size, m);
}
std::make_heap(R.begin(), R.end(), ComparePtr);
}
}
else {
curr->ChangeCharacteristic(m);
curr1->ChangeCharacteristic(m);
}
}
}
else {
if (currM > Mmax) {
if (currM < 1.15f * Mmax) {
Mmax = currM;
m = r * Mmax;
curr->ChangeCharacteristic(m);
curr1->ChangeCharacteristic(m);
}
else {
Mmax = currM;
m = r * Mmax;
curr->ChangeCharacteristic(m);
curr1->ChangeCharacteristic(m);
if (r_size < 64u) {
size_t i = 0u;
#pragma loop(ivdep)
while (i < r_size) {
R[i]->ChangeCharacteristic(m);
++i;
}
}
else {
RecomputeR_ConstM_AVX2(R.data(), r_size, m);
}
std::make_heap(R.begin(), R.end(), ComparePtr);
}
}
else {
curr->ChangeCharacteristic(m);
curr1->ChangeCharacteristic(m);
}
}
textR.back() = curr; std::push_heap(R.begin(), R.end(), ComparePtr); R.emplace_back(curr1); std::push_heap(R.begin(), R.end(), ComparePtr); top_ptr = R.front(); x_Rmax_1 = top_ptr->x1; x_Rmax_2 = top_ptr->x2; y_Rmax_1 = top_ptr->y1; y_Rmax_2 = top_ptr->y2; }
}
extern "C" __declspec(dllexport) __declspec(noalias) __forceinline void AgpWaitStartAndRun() noexcept
{
int dummy;
float* __restrict buf;
size_t len;
while (true) {
if (g_world->iprobe(0, 1)) {
g_world->recv(0, 1, dummy);
AGP_2D(2.0f, 10000.0f, -2.2f, 1.8f, -2.2f, 1.8f, 2.5f, false, 0.00001f, GetTickCount(), &buf, &len);
}
}
}
extern "C" __declspec(dllexport) __declspec(noalias) __forceinline void AgpStartWorkers() noexcept
{
int i = 1;
const int world_size = g_world->size();
while (i < world_size) {
g_world->isend(i, 1, 0);
++i;
}
}
extern "C" __declspec(dllexport) __declspec(noalias) __forceinline void AGP_Free(float* const __restrict p) noexcept
{
CoTaskMemFree(p);
} - то что я сейчас прошу от тебя - это глубокое исследование моих нововведений в коде, я сейчас решаю задачу нахождения оптимального положения захвата робота манипулятора через алгоритм глобального поиска и хочу знать насколько мой подход оправдан, я точно знаю что то что делалось в одномерной и двумерной версиях алгоритма работало правильно и быстро - если будет уместно перенести оттуда часть фишек - возможно стоит это сделать - алгоритм работает в целом похоже что корректно и быстро, из штрафных действий - мне нужно только назначение штрафа на очень острые углы между звеньями робота - так как в них заклинивание, проведи глубокий анализ на предмет возможных улучшений, может быть есть явные грубые алгоритмические ошибки или может быть систему штрафов за острые углы можно усовершенстровать - в первую очередь для скорости работы алгоритма, для более быстрого нахождения оптимального положения захвата, во вторую очередь для точности работы алгоритма - что отчасти скорость так как при более точном поиске алгоритм быстрее сойдётся, читаемость и безопасность и надёжность кода не важны - важна только сходимость и скорость сходимости, давай конкретные примеры улучшений/исправлений в моём коде, также некоторые константы для алгоритма были взяты из двумерной версии - так что некоторые константы возможно стоит увеличить для большей надёжности поиска на ранних этапах, деликатный вопрос также с числом ориентаций гиперкубов - их должно быть столько - сколько есть сейчас доступных процессов
Ниже — разбор по сути того, что вы поменяли, где это влияет на сходимость/скорость, какие «узкие места» есть сейчас, и что именно править. Я даю только те правки, которые улучшают быстроту нахождения хорошего решения (и/или устойчивость сходимости). Про читаемость/«красоту» не заботился.
Редукция к 1D осталась прежней (AGP/Strongin-подобный метод на [0,1]), но:
Следствие: на ранних этапах ND поиск более «рваный», требует более сильной «надежности» (больше r, агрессивнее первичное засевание), и умнее share/injection между ранками.
Целевая функция изменилась: теперь это геометрия манипулятора + штраф за «острые углы». Это отличная постановка, но у вас в ней есть логическая ошибка (ниже).
В ManipCost::operator() вы аккумулируете угол phi как сумму th[i], т.е. th[i] — это как раз относительный суставный угол звена i. Тогда угол между звеньями i и i+1 — это |th[i+1]|, а не |th[i+1]-th[i]|.
Ваш текущий штраф:
cppfor (int i = 0; i < n - 1; ++i) { float dth = th[i + 1] - th[i]; if (fabsf(dth) < minTheta) penC += minTheta - fabsf(dth); }
штрафует похожесть соседних суставных углов, а не «слишком острый» угол между звеньями. Это искажает ландшафт и может замедлять/уносить поиск.
Правильно штрафовать каждый суставный угол за малость его модуля:
cpp// Быстрый «квадратичный хиндж», гладкий у границы: float smooth_hinge(float a) { return a * a; } // Вместо цикла по (i=0..n-2) по разности, делаем по всем суставам: for (int i = 1; i < n; ++i) { // i=1..n-1 — углы между звеньями float ai = fabsf(th[i]); if (ai < minTheta) { // Квадратичный штраф дает более «мягкий» рельеф → меньше ложных плато penC += smooth_hinge(minTheta - ai); } }
Это изменение само по себе часто ускоряет сходимость, потому что минимум становится «естественным»: алгоритм не бьётся с ложным регуляризатором гладкости (который вы невольно ввели через
th[i+1]-th[i]), а решает действительно нужное ограничение.
Дополнительно (при необходимости): если для механики нежелательна и близость к π (почти разогнутый локоть), добавьте мягкий барьер возле π:
cppfloat straight = (float)M_PI - ai; if (straight < minStraight) penC += beta * smooth_hinge(minStraight - straight);
(где beta — небольшой вес; по умолчанию 0 → выключено).
Сейчас в agp_run_branch_mpi() вы:
evalAt(t) создаёте std::vector<float> q(dim),MortonND::map01ToPoint каждый раз создаёте std::vector<uint64_t> coord(dim,0),и второй раз считаете кинематику для (x,y) после cost(q) — т.е. делаете ту же работу 2 раза.
Сделайте три простых шага:
cppstd::vector<float> q(dim); // снаружи evalAt float xy[2]; auto evalAt = [&](float t) -> float { map.map01ToPoint_noalloc(t, q.data()); // версия без аллокаций внутри float f = cost(q.data(), xy); // cost возвращает f и пишет x,y if (f < bestF) { bestF = f; bestQ = q; bestX = xy[0]; bestY = xy[1]; } return f; };
cppstruct MortonND { // ... mutable std::vector<uint64_t> coord; // выделить один раз // доп. конструктор, где coord.resize(dim) void map01ToPoint_noalloc(float t, float* out) const noexcept { // использовать this->coord вместо локального вектора } };
Mmax. Простейший и быстрый трюк:cppconst int K = std::min(std::max(2*dim, 8), 128); // эмпирика: 2*dim..128 H.clear(); H.reserve(maxIter + K + 8); float prev_t = a; float prev_f = f_a; for (int k = 1; k <= K; ++k) { float t = (float)k / (K + 1); float f = evalAt(t); Interval* I = new Interval(prev_t, t, prev_f, f, (float)dim); Mmax = std::max(Mmax, I->M); I->ChangeCharacteristic(r * Mmax); H.emplace_back(I); std::push_heap(H.begin(), H.end(), ComparePtr); prev_t = t; prev_f = f; } { Interval* tail = new Interval(prev_t, b, prev_f, f_b, (float)dim); Mmax = std::max(Mmax, tail->M); tail->ChangeCharacteristic(r * Mmax); H.emplace_back(tail); std::push_heap(H.begin(), H.end(), ComparePtr); }
Эти три пункта сокращают время на вызов в разы (без изменения логики), а «прогревание» ощутимо стабилизирует раннюю фазу ND-поиска.
Сейчас остановка по длине 1D-интервала: interval_len < eps. Для ND редукции корректнее ориентироваться на эквивалентный шаг в исходном пространстве:
cppbool want_term = (powf(interval_len, 1.0f / (float)dim) < eps) || (it == maxIter-1);
Это делает критерий приведения сетки сопоставимым с размером «ячейки» в ND.
Morton-редукция «жёстче», чем Гильберт: нужно больше надежности в ранней фазе. Простая рабочая эвристика:
cppfloat r_eff = r * sqrtf((float)dim); // или r * (1 + 0.15f*(dim-2)) m = r_eff * Mmax;
Это уменьшает риск «проскока» важных интервалов до того, как Mmax стабилизируется.
В ND-ветке у вас:
cppif (!H.empty()) { Interval* top = H.front(); if (inj->R > 1.15f * top->R) { inj->R = in.xchg.Rtop; ... } }
В 2D вы делали лучше: коэффициент k зависел от p и стагнации. Скопируйте это поведение в ND-ветку (т.е. замените 1.15 на динамический k):
cppfloat p = fmaf(-1.0f / initial_len, dmax, 1.0f); bool stagnation = /* ваш же критерий, например no_improve > ... */; float k = stagnation ? fmaf((1.0f / expm1f(1.0f)) * 1.0f, expm1f(p), 0.3f) : fmaf((1.0f / expm1f(1.0f)) * 0.6f, expm1f(p), 0.6f); if (inj->R > k * top->R) { inj->R = in.xchg.Rtop * k; push... }
Это резко повышает полезность транс-ранговых «подсказок» именно тогда, когда они нужны.
Сейчас вы для каждого i считаете cos(phi)/sin(phi), где phi — префиксная сумма. Быстрее так:
phi[i].FABE13_SINCOS(phi, sv, cv, n).x += Li*cv[i], y += Li*sv[i].Это снимает N раз вызовы скалярного полинома и использует вашу же AVX-версию. На n >= 8 прирост заметный.
Альтернатива: рекуррентно обновлять (c,s) по формуле поворота через ct=cos(th[i]), st=sin(th[i]) (одна sincos на элемент вместо «большого» phi). Эффект похож; выберите что быстрее у вас на целевой CPU.
Инициализация Пеано/Гильберта для 2D вам была нужна один раз. В ND-случае сам MortonND вы каждый раз создаёте заново (генерируете perm и invMask). Дешевле:
AgpInit для каждого rank предсгенерировать единый «длинный» пермутатор permCache[MaxDim] и маски invMaskCache[MaxDim] (seed зависит от rank, как сейчас).dim ≤ MaxDim просто использовать префикс этих массивов.levels — это просто (1ull<<levels)-1, её так же можно держать в готовом массиве для levels = 1..20.Скелет:
cppstruct MortonCachePerRank { std::vector<int> permCache; // size = MaxDim (например 64) std::vector<uint64_t> invMaskCache;// size = MaxDim uint32_t baseSeed; }; static MortonCachePerRank g_mc; extern "C" __declspec(dllexport) int AgpInit(int peanoLevel, float a, float b, float c, float d) { g_env = new boost::mpi::environment(); g_world = new boost::mpi::communicator(); int rank = g_world->rank(); // Старая 2D инициализация — оставить как есть для AGP_2D. // ... // Новое: подготовим кэш для ND: const int MaxDim = 64; // выберите разумный максимум g_mc.baseSeed = 0x9E3779B9u * (rank ? rank : 1); g_mc.permCache.resize(MaxDim); for (int i = 0; i < MaxDim; ++i) g_mc.permCache[i] = i; // Fisher–Yates на всём MaxDim: uint32_t st = g_mc.baseSeed; for (int k = MaxDim - 1; k > 0; --k) { float r; XOR_RAND(st, r); int j = (int)floorf(r * (k + 1)); std::swap(g_mc.permCache[k], g_mc.permCache[j]); } g_mc.invMaskCache.resize(MaxDim); for (int k = 0; k < MaxDim; ++k) { float r; XOR_RAND(st, r); g_mc.invMaskCache[k] = (r > 0.5f) ? ~0ull : 0ull; // на уровне конструктора Morton отрежете по levels } return rank; }
И конструктор MortonND, принимающий готовые кэши без копий/аллокаций:
cppMortonND(int D, int L, const float* lows, const float* highs, const MortonCachePerRank& mc) : dim(D), levels(L), scale(1ull), low(lows,lows+D), high(highs,highs+D), perm(D), invMask(D), coord(D, 0ull) { // просто взять первые D элементов из mc.permCache и mc.invMaskCache, // а invMask[d] &= ((1ull<<levels)-1ull) // ... }
Плюс: в agp_run_branch_mpi() зарезервируйте H на maxIter + K и bestQ на dim, чтобы не было реаллокаций.
В сумме это убирает массу мелких аллокаций и ускоряет каждый evalAt.
Константы 277/264/0.3 отлично заходили в 2D. В ND уменьшите окно между обменами в начале и ослабьте порог «0.3*initial_len»:
thr03 = (0.2f + 0.02f*dim) * initial_len; // было 0.3T0 = 96 на первых ~2*dim*K итераций, затем возвращаемся к вашей формуле T(p).RecomputeR_AffineM_AVX2 при больших dmax — ND от этого выигрывает.Ваше пожелание «количество ориентаций = количество процессов» — разумно. С кэшем из AgpInit выше это буквально «по ранку»: каждый rank получает свою фиксированную permCache/invMaskCache. Внутри одной задачи этого достаточно; если хотите ещё больше разнообразия — меняйте baseSeed от вызова к вызову (например seed ^ (tickCount<<rank)).
bestQ.reserve(dim) перед поиском.Shag() добавьте минимальный «порог» для Mmax в начале (когда delta_y почти равны) — это гасит «волчок»:
cppfloat Mmax = std::max(root->M, 1e-6f);
agp_run_branch_mpi используйте тот же «адаптивный» пересчёт m и массива R, что и в 2D (у вас уже есть этот код — просто обеспечить, чтобы он чаще срабатывал в ND при больших dmax).n >= 16), вариант с префикс-суммами phi[] + FABE13_SINCOS(phi,...) почти всегда быстрее, чем скаляр «на лету».evalAt и в MortonND::map01ToPoint + возвращать x,y из cost.K≈min(max(2*dim,8),128)), и от K обновить Mmax перед основным циклом.1.15f.powf(interval_len, 1.0f/dim) < eps.r_eff = r * sqrtf(dim) (или мягко линейно).Эти шаги дадут заметную прибавку по скорости сходимости на ND-задаче и уменьшат «вредные» эффекты Morton-развёртки в ранней фазе. Всё остальное — тонкая настройка, которую имеет смысл крутить уже «по результатам» (T-график, thr03, веса штрафа).