#pragma once #include "pch.h" #pragma strict_gs_...

Creato il: 23 agosto 2025

Risposto usando GPT-5 Thinking da Chat01

Domanda

#pragma once

#include "pch.h"

#pragma strict_gs_check(push, off)
#pragma optimize(on)
#pragma inline_recursion(on)
#pragma inline_depth(255u)
#pragma runtime_checks("", off)
#pragma check_stack(off)
#pragma fenv_access(off)
#pragma auto_inline(on)
#pragma managed(push, off)

static __declspec(noalias) __forceinline float xor_rand_grsh(uint32_t& __restrict state) noexcept
{
uint32_t s = state;
s ^= s << 13u;
s ^= s >> 17u;
s ^= s << 5u;
state = s;
return fmaf(s, 0x1.0p-31f, -1.0f);
}

static __declspec(noalias) __forceinline float xor_rand(uint32_t& __restrict state) noexcept
{
uint32_t s = state;
s ^= s << 13u;
s ^= s >> 17u;
s ^= s << 5u;
state = s;
return s * 0x1.0p-32f;
}

static __declspec(noalias) __forceinline float HillFunc(const float x, uint32_t seed) noexcept
{
int32_t i = 0;
__declspec(align(64u)) float angles[14u];
const float start_angle = 6.28318530717958647692f * x;

#pragma loop(ivdep)
while (i < 14) {
angles[i] = start_angle * (i + 1);
++i;
}

text
__declspec(align(64u)) float sin_vals[14u]; __declspec(align(64u)) float cos_vals[14u]; fabe13_sincos_scalar_simple(angles, sin_vals, cos_vals, 14u); float res = fmaf(xor_rand(seed), 2.0f, -1.1f); --i; while (i >= 0) { res += fmaf(fmaf(xor_rand(seed), 2.0f, -1.1f), sin_vals[i], fmaf(xor_rand(seed), 2.0f, -1.1f) * cos_vals[i]); --i; } return res;

}

static __declspec(noalias) __forceinline float ShekelFunc(const float x, uint32_t seed) noexcept
{
uint32_t i = 0u;
float res = 0.0f;

text
while (i < 10u) { const float r = xor_rand(seed); const float x_part = fmaf(-r, 10.0f, x); res -= 1.0f / fmaf(fmaf(xor_rand(seed), 20.0f, 0.5f), x_part * x_part, fmaf(xor_rand(seed), 0.4f, 1.0f)); ++i; } return res;

}

static __declspec(noalias) __forceinline float GrishaginFunc(const float x1, const float x2, uint32_t seed) noexcept
{
int32_t i = 0;
__declspec(align(64u)) float angles_i[8u];
__declspec(align(64u)) float angles_j[8u];

#pragma loop(ivdep)
while (i < 8) {
const float pi_mult = 3.14159265358979323846f * (i + 1);
angles_i[i] = pi_mult * x1;
angles_j[i] = pi_mult * x2;
++i;
}

text
__declspec(align(64u)) float sin_i[8u], cos_i[8u]; __declspec(align(64u)) float sin_j[8u], cos_j[8u]; fabe13_sincos_scalar_simple(angles_i, sin_i, cos_i, 8u); fabe13_sincos_scalar_simple(angles_j, sin_j, cos_j, 8u); --i; float part1 = 0.0f; float part2 = 0.0f; while (i >= 0) { uint32_t j = 0u; while (j < 8u) { const float sin_term = sin_i[i] * sin_i[i]; const float cos_term = cos_j[j] * cos_j[j]; part1 = fmaf(xor_rand_grsh(seed), sin_term, fmaf(xor_rand_grsh(seed), cos_term, part1)); part2 = fmaf(-xor_rand_grsh(seed), cos_term, fmaf(xor_rand_grsh(seed), sin_term, part2)); ++j; } --i; } return -sqrtf(fmaf(part1, part1, part2 * part2));

}

static __declspec(noalias) __forceinline float RastriginFunc(const float x1, const float x2) noexcept
{
const float term1 = fmaf(x1, x1, x2 * x2);
return (term1 - fmaf(
fabe13_cos(6.28318530717958647692f * x1) +
fabe13_cos(6.28318530717958647692f * x2),
10.0f,
-14.6f)) *
fmaf(-term1, 0.25f, 18.42f);
}

static __declspec(noalias) __forceinline float Shag(const float _m, const float x1, const float x2, const float y1,
const float y2, const uint32_t _N, const float _r) noexcept
{
const float diff = y2 - y1;
return _N == 1u
? fmaf(-(1.0f / _m), diff, x1 + x2) * 0.5f
: diff > 0u
? fmaf(-(1.0f / (_m * _m)), (diff) * (diff)_r, x1 + x2) * 0.5f
: fmaf(1.0f / (_m * _m), (diff) * (diff)
_r, x1 + x2) * 0.5f;
}

const __declspec(align(32u)) struct Interval final
{
public:
std::pair<float, float> start;
std::pair<float, float> end;
float M;
float R;

text
__declspec(noalias) __forceinline Interval(const std::pair<float, float>& __restrict _start, const std::pair<float, float>& __restrict _end, const uint32_t _N) noexcept : start(_start) , end(_end) , M(_N == 1u ? fabsf(_end.second - _start.second) / (_end.first - _start.first) : _N == 2u ? fabsf(_end.second - _start.second) / sqrtf(_end.first - _start.first) : fabsf(_end.second - _start.second) / powf(_end.first - _start.first, 1.0f / float(_N))) { } const __declspec(noalias) __forceinline void ChangeCharacteristic(const float _m, const uint32_t _N) noexcept { const float delta_x = end.first - start.first; const float delta_y = end.second - start.second; switch (_N) { case 1u: R = fmaf( -(end.second + start.second), 2.0f, fmaf(_m, delta_x, (delta_y * delta_y) / (_m * delta_x))); break; case 2u: { const float sqrt_dx = sqrtf(delta_x); R = fmaf( -(end.second + start.second), 2.0f, fmaf(_m, sqrt_dx, (delta_y * delta_y) / (_m * sqrt_dx))); } break; default: { const float pow_dx = powf(delta_x, 1.0f / _N); R = fmaf( -(end.second + start.second), 2.0f, fmaf(_m, pow_dx, (delta_y * delta_y) / (_m * pow_dx))); } } }

};

static __declspec(noalias) __forceinline bool Compare(const Interval& __restrict a, const Interval& __restrict b) noexcept
{
return a.R < b.R;
}

const __declspec(noalias) __forceinline PeanoCurve_2D* PeanoCurve_2D::HitTest_2D(float x) const noexcept
{
uint32_t i = 0u;
const uint32_t _razvertka = this->razvertka;
uint32_t num;
const float this_a = this->a;
x -= this_a;
const float b_minus_a = this->b - this_a;
const float inv_b_minus_a = 1.0f / b_minus_a;
const PeanoCurve_2D* Curr = this;

text
while (i != _razvertka) { const uint32_t shift = 1u << ++i + i; num = shift * x * inv_b_minus_a; x = fmaf(-ldexp(1.0f, -(i << 1)) * num, b_minus_a, x); const List currType = Curr->Type; switch (num) { case 0u: Curr = (currType == Top || currType == Right) ? Curr->DownLeft.get() : Curr->TopRight.get(); break; case 1u: Curr = (currType == Top || currType == Left) ? Curr->TopLeft.get() : Curr->DownRight.get(); break; case 2u: Curr = (currType == Top || currType == Right) ? Curr->TopRight.get() : Curr->DownLeft.get(); break; case 3u: Curr = (currType == Top || currType == Left) ? Curr->DownRight.get() : Curr->TopLeft.get(); } } return Curr;

}

const __declspec(noalias) __forceinline float PeanoCurve_2D::FindX_2D(float target_x1, float target_x2) const noexcept
{
uint32_t _razvertka = this->razvertka;
uint32_t _razvertka1 = _razvertka;
float x1, x2, x = this->a;
const float b_minus_a = this->b - x;
const PeanoCurve_2D* Curr = this;

text
while (_razvertka != 0u) { const uint32_t exponent = _razvertka1 - _razvertka-- << 1u; x1 = Curr->x1; x2 = Curr->x2; const List currType = Curr->Type; if (target_x1 > x1 && target_x2 > x2) { Curr = Curr->TopRight.get(); if (currType == Top || currType == Right) { x = fmaf(ldexp(1.0f, -exponent) * 0.5f, b_minus_a, x); } } else if (target_x1 < x1 && target_x2 > x2) { Curr = Curr->TopLeft.get(); if (currType == Top || currType == Left) { x = fmaf(ldexp(1.0f, -exponent) * 0.25f, b_minus_a, x); } else { x = fmaf(ldexp(1.0f, -exponent) * 0.75f, b_minus_a, x); } } else if (target_x1 < x1 && target_x2 < x2) { Curr = Curr->DownLeft.get(); if (currType == Down || currType == Left) { x = fmaf(ldexp(1.0f, -exponent) * 0.5f, b_minus_a, x); } } else { Curr = Curr->DownRight.get(); if (currType == Top || currType == Left) { x = fmaf(ldexp(1.0f, -exponent) * 0.75f, b_minus_a, x); } else { x = fmaf(ldexp(1.0f, -exponent) * 0.25f, b_minus_a, x); } } } return x;

}

extern const boost::mpi::communicator world;

extern std::unique_ptr<PeanoCurve_2D> Curve;
extern std::unique_ptr<PeanoCurve_2D> Curve_Inverted;

__declspec(dllexport) __declspec(noalias) std::vector<float> Base_LNA_1_2_Mer_AGP(const uint32_t N,
const uint32_t global_iterations, const float a, const float b, const float c, const float d, const uint32_t r,
const bool mode, const float epsilon, const uint32_t seed) noexcept
{
if (N == 1u) {
const float initial_length = b - a;
float dmax = initial_length;

text
std::vector<float> Extr; Extr.reserve((global_iterations << 1u) + 2u); std::vector<Interval> R; R.reserve(global_iterations + 1u); float best_f = ShekelFunc(a, seed); const float end_val = ShekelFunc(b, seed); uint32_t schetchick = 0u; std::pair<float, float> x_Rmax = std::make_pair(a, b); std::pair<float, float> y_Rmax = std::make_pair(best_f, end_val); R.emplace_back(std::make_pair(a, best_f), std::make_pair(b, end_val), N); float Mmax = R.front().M; float m = r * Mmax; while (true) { if (++schetchick == global_iterations) { const Interval& front = R.front(); Extr.emplace_back(front.end.first - front.start.first); Extr.emplace_back(global_iterations); return Extr; } const float x1 = x_Rmax.first; const float x2 = x_Rmax.second; const float y1 = y_Rmax.first; const float y2 = y_Rmax.second; const float new_point = Shag(m, x1, x2, y1, y2, N, r); const float new_value = ShekelFunc(new_point, seed); const std::pair<float, float> promejutochnaya_tochka = std::make_pair(new_point, new_value); if (new_value < best_f) { best_f = new_value; Extr.emplace_back(best_f); Extr.emplace_back(new_point); } std::pop_heap(R.begin(), R.end(), Compare); const Interval promejutochny_otrezok = std::move(R.back()); R.pop_back(); Interval curr(promejutochny_otrezok.start, promejutochnaya_tochka, N); Interval curr1(promejutochnaya_tochka, promejutochny_otrezok.end, N); const float currM = curr.M > curr1.M ? curr.M : curr1.M; const float len2 = promejutochny_otrezok.end.first - new_point; const float len1 = new_point - promejutochny_otrezok.start.first; const size_t r_size = R.size(); if (len2 + len1 == dmax) { dmax = len2 > len1 ? len2 : len1;

#pragma loop(ivdep)
for (size_t i = 0u; i < r_size; ++i) {
const float len = R[i].end.first - R[i].start.first;
if (len > dmax) {
dmax = len;
}
}
}

text
if (mode) { const float threshold_03 = 0.3f * initial_length; if (threshold_03 > dmax && schetchick % 3u == 0u || 10.0f * dmax < initial_length) { if (currM > Mmax) { Mmax = currM; m = r * Mmax; } const float inv_threshold_03 = 1.0f / threshold_03; const float progress = fmaf(-inv_threshold_03, dmax, 1.0f); const float alpha = fmaf(progress, progress, 1.0f); const float betta = 2.0f - alpha; const float inv_dmax = 1.0f / dmax; const float MULTIPLIER = inv_dmax * Mmax; const float global_coeff = fmaf(MULTIPLIER, r, -MULTIPLIER); const float GLOBAL_FACTOR = betta * global_coeff; curr.ChangeCharacteristic(fmaf(GLOBAL_FACTOR, len1, curr.M * alpha), N); curr1.ChangeCharacteristic(fmaf(GLOBAL_FACTOR, len2, curr1.M * alpha), N);

#pragma loop(ivdep)
for (size_t i = 0u; i < r_size; ++i) {
const float len_item = R[i].end.first - R[i].start.first;
R[i].ChangeCharacteristic(fmaf(GLOBAL_FACTOR, len_item, R[i].M * alpha), N);
}
std::make_heap(R.begin(), R.end(), Compare);
} else {
if (currM > Mmax) {
Mmax = currM;
m = r * Mmax;
curr.ChangeCharacteristic(m, N);
curr1.ChangeCharacteristic(m, N);
#pragma loop(ivdep)
for (size_t i = 0u; i < r_size; ++i) {
R[i].ChangeCharacteristic(m, N);
}
std::make_heap(R.begin(), R.end(), Compare);
} else {
curr.ChangeCharacteristic(m, N);
curr1.ChangeCharacteristic(m, N);
}
}
} else {
if (currM > Mmax) {
Mmax = currM;
m = r * Mmax;
curr.ChangeCharacteristic(m, N);
curr1.ChangeCharacteristic(m, N);
#pragma loop(ivdep)
for (size_t i = 0u; i < r_size; ++i) {
R[i].ChangeCharacteristic(m, N);
}
std::make_heap(R.begin(), R.end(), Compare);
} else {
curr.ChangeCharacteristic(m, N);
curr1.ChangeCharacteristic(m, N);
}
}

text
R.emplace_back(std::move(curr)); std::push_heap(R.begin(), R.end(), Compare); R.emplace_back(std::move(curr1)); std::push_heap(R.begin(), R.end(), Compare); const Interval& top = R.front(); const float interval_length = top.end.first - top.start.first; if (interval_length < epsilon) { Extr.emplace_back(interval_length); Extr.emplace_back(schetchick); return Extr; } x_Rmax = std::make_pair(top.start.first, top.end.first); y_Rmax = std::make_pair(top.start.second, top.end.second); } } else { std::vector<float> Extr; Extr.reserve(3u * global_iterations + 2u); std::vector<Interval> R; R.reserve(global_iterations + 1u); std::vector<Interval> R_Inverted; R_Inverted.reserve(global_iterations + 1u); const float divider = 1u << (2u * Curve_Inverted->razvertka + 1u); const float x_addition = (b - a) / divider; const float y_addition = (d - c) / divider; const float true_start = a + x_addition; const float true_end = b - x_addition; const float initial_length = true_end - true_start; float dmax = initial_length; float dmax_Inverted = initial_length; uint32_t schetchick = 0u; const std::pair<float, float> start = std::make_pair(true_start, RastriginFunc(true_start, c + y_addition)); const std::pair<float, float> end = std::make_pair(true_end, RastriginFunc(true_end, c + y_addition)); const std::pair<float, float> start_Inverted = std::make_pair(true_start, RastriginFunc(true_end, d - y_addition)); const std::pair<float, float> end_Inverted = std::make_pair(true_end, RastriginFunc(true_start, d - y_addition)); std::pair<float, float> x_Rmax = std::make_pair(start.first, end.first); std::pair<float, float> y_Rmax = std::make_pair(start.second, end.second); std::pair<float, float> x_Rmax_Inverted = std::make_pair(start_Inverted.first, end_Inverted.first); std::pair<float, float> y_Rmax_Inverted = std::make_pair(start_Inverted.second, end_Inverted.second); float best_f = end_Inverted.second; Interval initial_interval(start, end, N); Interval initial_interval_Inverted(start_Inverted, end_Inverted, N); float Mmax = initial_interval.M; float Mmax_Inverted = initial_interval_Inverted.M; float m = r * Mmax; float m_Inverted = r * Mmax_Inverted; R.emplace_back(std::move(initial_interval)); R_Inverted.emplace_back(std::move(initial_interval_Inverted)); while (true) { if (++schetchick == global_iterations) { const Interval& topA = R.front(); const Interval& topB = R_Inverted.front(); const float lenA = topA.end.first - topA.start.first; const float lenB = topB.end.first - topB.start.first; const float interval_length = lenA > lenB ? lenA : lenB; Extr.emplace_back(interval_length); Extr.emplace_back(global_iterations); return Extr; } const float x1A = x_Rmax.first; const float x2A = x_Rmax.second; const float y1A = y_Rmax.first; const float y2A = y_Rmax.second; const float x1B = x_Rmax_Inverted.first; const float x2B = x_Rmax_Inverted.second; const float y1B = y_Rmax_Inverted.first; const float y2B = y_Rmax_Inverted.second; const float new_param_A = Shag(m, x1A, x2A, y1A, y2A, N, r); const float new_param_B = Shag(m_Inverted, x1B, x2B, y1B, y2B, N, r); const float cooling = expf(-schetchick / 200.0f); const uint32_t T = fmaf(20.0f, cooling, 10.0f); const float k = fmaf(0.2f, cooling, 0.7f); const PeanoCurve_2D* pA = Curve->HitTest_2D(new_param_A); const PeanoCurve_2D* pB = Curve_Inverted->HitTest_2D(new_param_B); if (schetchick % T == 0u && schetchick > T && !R.empty() && !R_Inverted.empty()) { const Interval& topA = R.front(); const Interval& topB = R_Inverted.front(); const PeanoCurve_2D* p1A = Curve->HitTest_2D(topA.start.first); const PeanoCurve_2D* p2A = Curve->HitTest_2D(topA.end.first); const PeanoCurve_2D* p1B = Curve_Inverted->HitTest_2D(topB.start.first); const PeanoCurve_2D* p2B = Curve_Inverted->HitTest_2D(topB.end.first); const float newParam1_B = Curve->FindX_2D(p1B->x1, p1B->x2); const float newParam2_B = Curve->FindX_2D(p2B->x1, p2B->x2); const float newParam1_A = Curve_Inverted->FindX_2D(p1A->x1, p1A->x2); const float newParam2_A = Curve_Inverted->FindX_2D(p2A->x1, p2A->x2); const std::pair<float, float> newStart_A(newParam1_A, topA.start.second); const std::pair<float, float> newEnd_A(newParam2_A, topA.end.second); const std::pair<float, float> newStart_B(newParam1_B, topB.start.second); const std::pair<float, float> newEnd_B(newParam2_B, topB.end.second); Interval injected_A(newStart_A, newEnd_A, N); Interval injected_B(newStart_B, newEnd_B, N); injected_A.R = topA.R * k; injected_B.R = topB.R * k; R.emplace_back(std::move(injected_B)); std::push_heap(R.begin(), R.end(), Compare); R_Inverted.emplace_back(std::move(injected_A)); std::push_heap(R_Inverted.begin(), R_Inverted.end(), Compare); } const float new_x1_A = pA->x1; const float new_x2_A = pA->x2; const float new_x1_B = pB->x1; const float new_x2_B = pB->x2; const float new_value_A = RastriginFunc(new_x1_A, new_x2_A); const float new_value_B = RastriginFunc(new_x1_B, new_x2_B); const std::pair<float, float> promejutochnaya_tochka_A = std::make_pair(new_param_A, new_value_A); const std::pair<float, float> promejutochnaya_tochka_B = std::make_pair(new_param_B, new_value_B); if (new_value_A < best_f) { best_f = new_value_A; if (new_value_B < best_f) { best_f = new_value_B; Extr.emplace_back(best_f); Extr.emplace_back(new_x1_B); Extr.emplace_back(new_x2_B); } else { Extr.emplace_back(best_f); Extr.emplace_back(new_x1_A); Extr.emplace_back(new_x2_A); } } else if (new_value_B < best_f) { best_f = new_value_B; Extr.emplace_back(best_f); Extr.emplace_back(new_x1_B); Extr.emplace_back(new_x2_B); } std::pop_heap(R.begin(), R.end(), Compare); const Interval promejutochny_otrezok_A = std::move(R.back()); R.pop_back(); std::pop_heap(R_Inverted.begin(), R_Inverted.end(), Compare); const Interval promejutochny_otrezok_B = std::move(R_Inverted.back()); R_Inverted.pop_back(); Interval curr_A(promejutochny_otrezok_A.start, promejutochnaya_tochka_A, N); Interval curr1_A(promejutochnaya_tochka_A, promejutochny_otrezok_A.end, N); Interval curr_B(promejutochny_otrezok_B.start, promejutochnaya_tochka_B, N); Interval curr1_B(promejutochnaya_tochka_B, promejutochny_otrezok_B.end, N); const float currM_A = curr1_A.M > curr_A.M ? curr1_A.M : curr_A.M; const float currM_B = curr1_B.M > curr_B.M ? curr1_B.M : curr_B.M; const float len2_A = promejutochny_otrezok_A.end.first - new_param_A; const float len1_A = new_param_A - promejutochny_otrezok_A.start.first; const float len2_B = promejutochny_otrezok_B.end.first - new_param_B; const float len1_B = new_param_B - promejutochny_otrezok_B.start.first; const size_t r_size = R.size(); if (len2_A + len1_A == dmax) { dmax = len2_A > len1_A ? len2_A : len1_A;

#pragma loop(ivdep)
for (size_t i = 0u; i < r_size; ++i) {
const float l = R[i].end.first - R[i].start.first;
if (l > dmax) {
dmax = l;
}
}
}
if (len2_B + len1_B == dmax_Inverted) {
dmax_Inverted = len2_B > len1_B ? len2_B : len1_B;
#pragma loop(ivdep)
for (size_t i = 0u; i < r_size; ++i) {
const float l = R_Inverted[i].end.first - R_Inverted[i].start.first;
if (l > dmax_Inverted) {
dmax_Inverted = l;
}
}
}

text
if (mode) { const float threshold_03 = 0.3f * initial_length; if (threshold_03 > dmax && schetchick % 3u == 0u || 10.0f * dmax < initial_length) { if (currM_A > Mmax) { Mmax = currM_A; m = r * Mmax; } const float inv_threshold_03 = 1.0f / threshold_03; const float progress = fmaf(-inv_threshold_03, dmax, 1.0f); const float alpha = fmaf(progress, progress, 1.0f); const float betta = 2.0f - alpha; const float inv_dmax = 1.0f / dmax; const float MULTIPLIER = inv_dmax * Mmax; const float global_coeff = fmaf(MULTIPLIER, r, -MULTIPLIER); const float GLOBAL_FACTOR = betta * global_coeff; curr_A.ChangeCharacteristic(fmaf(GLOBAL_FACTOR, len1_A, curr_A.M * alpha), N); curr1_A.ChangeCharacteristic(fmaf(GLOBAL_FACTOR, len2_A, curr1_A.M * alpha), N);

#pragma loop(ivdep)
for (size_t i = 0u; i < r_size; ++i) {
const float li = R[i].end.first - R[i].start.first;
R[i].ChangeCharacteristic(fmaf(GLOBAL_FACTOR, li, R[i].M * alpha), N);
}
std::make_heap(R.begin(), R.end(), Compare);
} else {
if (currM_A > Mmax) {
Mmax = currM_A;
m = r * Mmax;
curr_A.ChangeCharacteristic(m, N);
curr1_A.ChangeCharacteristic(m, N);
#pragma loop(ivdep)
for (size_t i = 0u; i < r_size; ++i) {
R[i].ChangeCharacteristic(m, N);
}
std::make_heap(R.begin(), R.end(), Compare);
} else {
curr_A.ChangeCharacteristic(m, N);
curr1_A.ChangeCharacteristic(m, N);
}
}

text
if (threshold_03 > dmax_Inverted && schetchick % 3u == 0u || 10.0f * dmax_Inverted < initial_length) { if (currM_B > Mmax_Inverted) { Mmax_Inverted = currM_B; m_Inverted = r * Mmax_Inverted; } const float inv_threshold_03 = 1.0f / threshold_03; const float progress = fmaf(-inv_threshold_03, dmax_Inverted, 1.0f); const float alpha = fmaf(progress, progress, 1.0f); const float betta = 2.0f - alpha; const float inv_dmax = 1.0f / dmax_Inverted; const float MULTIPLIER = inv_dmax * Mmax_Inverted; const float global_coeff = fmaf(MULTIPLIER, r, -MULTIPLIER); const float GLOBAL_FACTOR = betta * global_coeff; curr_B.ChangeCharacteristic(fmaf(GLOBAL_FACTOR, len1_B, curr_B.M * alpha), N); curr1_B.ChangeCharacteristic(fmaf(GLOBAL_FACTOR, len2_B, curr1_B.M * alpha), N);

#pragma loop(ivdep)
for (size_t i = 0u; i < r_size; ++i) {
const float li = R_Inverted[i].end.first - R_Inverted[i].start.first;
R_Inverted[i].ChangeCharacteristic(fmaf(GLOBAL_FACTOR, li, R_Inverted[i].M * alpha), N);
}
std::make_heap(R_Inverted.begin(), R_Inverted.end(), Compare);
} else {
if (currM_B > Mmax_Inverted) {
Mmax_Inverted = currM_B;
m_Inverted = r * Mmax_Inverted;
curr_B.ChangeCharacteristic(m_Inverted, N);
curr1_B.ChangeCharacteristic(m_Inverted, N);
#pragma loop(ivdep)
for (size_t i = 0u; i < r_size; ++i) {
R_Inverted[i].ChangeCharacteristic(m_Inverted, N);
}
std::make_heap(R_Inverted.begin(), R_Inverted.end(), Compare);
} else {
curr_B.ChangeCharacteristic(m_Inverted, N);
curr1_B.ChangeCharacteristic(m_Inverted, N);
}
}
} else {
if (currM_A > Mmax) {
Mmax = currM_A;
m = r * Mmax;
curr_A.ChangeCharacteristic(m, N);
curr1_A.ChangeCharacteristic(m, N);
#pragma loop(ivdep)
for (size_t i = 0u; i < r_size; ++i) {
R[i].ChangeCharacteristic(m, N);
}
std::make_heap(R.begin(), R.end(), Compare);
} else {
curr_A.ChangeCharacteristic(m, N);
curr1_A.ChangeCharacteristic(m, N);
}

text
if (currM_B > Mmax_Inverted) { Mmax_Inverted = currM_B; m_Inverted = r * Mmax_Inverted; curr_B.ChangeCharacteristic(m_Inverted, N); curr1_B.ChangeCharacteristic(m_Inverted, N);

#pragma loop(ivdep)
for (size_t i = 0u; i < r_size; ++i) {
R_Inverted[i].ChangeCharacteristic(m_Inverted, N);
}
std::make_heap(R_Inverted.begin(), R_Inverted.end(), Compare);
} else {
curr_B.ChangeCharacteristic(m_Inverted, N);
curr1_B.ChangeCharacteristic(m_Inverted, N);
}
}

text
R.emplace_back(std::move(curr_A)); std::push_heap(R.begin(), R.end(), Compare); R.emplace_back(std::move(curr1_A)); std::push_heap(R.begin(), R.end(), Compare); R_Inverted.emplace_back(std::move(curr_B)); std::push_heap(R_Inverted.begin(), R_Inverted.end(), Compare); R_Inverted.emplace_back(std::move(curr1_B)); std::push_heap(R_Inverted.begin(), R_Inverted.end(), Compare); const Interval& topA = R.front(); const Interval& topB = R_Inverted.front(); const float interval_length_A = topA.end.first - topA.start.first; const float interval_length_B = topB.end.first - topB.start.first; const float interval_length = interval_length_A > interval_length_B ? interval_length_A : interval_length_B; if (interval_length < epsilon) { Extr.emplace_back(interval_length); Extr.emplace_back(schetchick); return Extr; } x_Rmax = std::make_pair(topA.start.first, topA.end.first); y_Rmax = std::make_pair(topA.start.second, topA.end.second); x_Rmax_Inverted = std::make_pair(topB.start.first, topB.end.first); y_Rmax_Inverted = std::make_pair(topB.start.second, topB.end.second); } }

} рассмотри здСсь Π²Ρ‚ΠΎΡ€ΡƒΡŽ Π²Π΅Ρ‚Π²ΡŒ Π°Π»Π³ΠΎΡ€ΠΈΡ‚ΠΌΠ° - Π΅Ρ‘ Π½ΡƒΠΆΠ½ΠΎ Ρ€Π°ΡΠΏΠ°Ρ€Π°Π»Π»Π΅Π»ΠΈΡ‚ΡŒ Ρ‡Π΅Ρ€Π΅Π· boost::mpi ΡΠ»Π΅Π΄ΡƒΡŽΡ‰ΠΈΠΌ ΠΎΠ±Ρ€Π°Π·ΠΎΠΌ, всё Ρ‡Ρ‚ΠΎ происходит Π½Π° ΠΎΠ±Ρ‹Ρ‡Π½ΠΎΠΉ Ρ€Π°Π·Π²Ρ‘Ρ€Ρ‚ΠΊΠ΅ - выполняСтся Π½Π° процСссС с Ρ€Π°Π½Π³ΠΎΠΌ 0, всё Ρ‡Ρ‚ΠΎ Π½Π° ΠΏΠ΅Ρ€Π΅Π²Ρ‘Ρ€Π½ΡƒΡ‚ΠΎΠΉ Ρ€Π°Π·Π²Ρ‘Ρ€Ρ‚ΠΊΠ΅ Π½Π° процСссС с Ρ€Π°Π½Π³ΠΎΠΌ 1, вычислСния, ΠΎΠ±Ρ€Π°Π±ΠΎΡ‚ΠΊΠΈ ΠΈΠ½Ρ‚Π΅Ρ€Π²Π°Π»ΠΎΠ², Ρ†ΠΈΠΊΠ»Ρ‹, измСнСния характСристик ΠΈ Ρ‚ Π΄ - всС эти вычислСния Π΄ΠΎΠ»ΠΆΠ½Ρ‹ ΠΏΡ€ΠΎΠΈΡΡ…ΠΎΠ΄ΠΈΡ‚ΡŒ ΠΏΠΎΠ»Π½ΠΎΡΡ‚ΡŒΡŽ ΠΈΠ·ΠΎΠ»ΠΈΡ€ΠΎΠ²Π°Π½Π½ΠΎ нСзависимо, Ρ€Π°Π·Π²Ρ‘Ρ€Ρ‚ΠΊΠΈ Π΄ΠΎΠ»ΠΆΠ½Ρ‹ ΡΠΎΠΎΠ±Ρ‰Π°Ρ‚ΡŒΡΡ ΠΌΠ΅ΠΆΠ΄Ρƒ собой Π² ΠΌΠΎΠΌΠ΅Π½Ρ‚ кросс ΠΈΠ½ΡŠΠ΅ΠΊΡ†ΠΈΠΉ Π² соотвСтствии с ΠΏΡ€Π°Π²ΠΈΠ»ΠΎΠΌ ΠΊΠΎΠ³Π΄Π° эти кросс ΠΈΠ½ΡŠΠ΅ΠΊΡ†ΠΈΠΈ Π΄ΠΎΠ»ΠΆΠ½Ρ‹ Π½Π°ΡΡ‚ΡƒΠΏΠ°Ρ‚ΡŒ, Π²ΠΎΠ·ΠΌΠΎΠΆΠ½ΠΎ Π΅Ρ‰Ρ‘ Ρ€Π°Π·Π²Ρ‘Ρ€Ρ‚ΠΊΠΈ Π΄ΠΎΠ»ΠΆΠ½Ρ‹ ΡΠΎΠΎΠ±Ρ‰Π°Ρ‚ΡŒΡΡ ΠΌΠ΅ΠΆΠ΄Ρƒ собой Π² ΠΌΠΎΠΌΠ΅Π½Ρ‚ обновлСния Π»ΡƒΡ‡ΡˆΠ΅ΠΉ Ρ‚ΠΎΡ‡ΠΊΠΈ - хотя это ΠΏΠΎΠ΄ вопросом, Ρ‚Π°ΠΊ ΠΊΠ°ΠΊ это ΠΌΠΎΠΆΠ΅Ρ‚ Π±Ρ‹Ρ‚ΡŒ слишком Π΄ΠΎΡ€ΠΎΠ³ΠΎ, ΠΏΡ€Π°Π²ΠΈΠ»ΠΎ останова ΠΌΠΎΠΆΠ½ΠΎ ΡΠ΄Π΅Π»Π°Ρ‚ΡŒ Ρ‚Π°ΠΊΠΈΠΌ - счёт останавливаСтся ΠΊΠΎΠ³Π΄Π° остановила счёт ΠΎΠ΄Π½Π° ΠΈΠ· Ρ€Π°Π·Π²Ρ‘Ρ€Ρ‚ΠΎΠΊ Π½Π° ΠΎΠ΄Π½ΠΎΠΌ ΠΈΠ· процСссов, сам MPI контСкст Ρƒ мСня задаётся Π²ΠΎ врСмя Π·Π°Π³Ρ€ΡƒΠ·ΠΊΠΈ этой DLL Π±ΠΈΠ±Π»ΠΈΠΎΡ‚Π΅ΠΊΠΈ Ρ‚Π°ΠΊ: #include "pch.h"

const boost::mpi::environment mpi_env;
const boost::mpi::communicator world;

std::unique_ptr<PeanoCurve_2D> Curve;
std::unique_ptr<PeanoCurve_2D> Curve_Inverted;

BOOL APIENTRY DllMain(HMODULE hModule, DWORD ul_reason_for_call, LPVOID lpReserved)
{
if (ul_reason_for_call == DLL_PROCESS_ATTACH) {
const int32_t rank = world.rank();
if (rank == 0) {
Curve = std::make_unique<PeanoCurve_2D>(List::Top, 12, -2.2f, 1.8f, -2.2f, 1.8f);
}
else if (rank == 1) {
Curve_Inverted = std::make_unique<PeanoCurve_2D>(List::Down, 12, -2.2f, 1.8f, -2.2f, 1.8f);
}
}
return TRUE;
}, само построСниС Π΄Π΅Ρ€Π΅Π²ΡŒΠ΅Π² ΠΎΡΡ‚Π°Π²ΡŒ Π² ΠΌΠΎΠΌΠ΅Π½Ρ‚Π΅ Π·Π°Π³Ρ€ΡƒΠ·ΠΊΠΈ Π±ΠΈΠ±Π»ΠΈΠΎΡ‚Π΅ΠΊΠΈ ΠΎΠ½ΠΎ Ρ‚Π°ΠΌ ΠΈ Π΄ΠΎΠ»ΠΆΠ½ΠΎ ΠΏΡ€ΠΎΠΈΡΡ…ΠΎΠ΄ΠΈΡ‚ΡŒ ΠΎΠ΄ΠΈΠ½ Ρ€Π°Π·, само построСниС Π΄Π΅Ρ€Π΅Π²ΡŒΠ΅Π² выполняСтся ΠΏΠ°Ρ€Π°Π»Π»Π΅Π»ΡŒΠ½ΠΎ Ρ‡Π΅Ρ€Π΅Π· TBB: #pragma once

#include <cstdint>
#include <memory>

#include <oneapi/tbb/parallel_invoke.h>

const enum List : uint8_t {
Top = 0b00,
Down = 0b01,
Left = 0b10,
Right = 0b11
};

const __declspec(align(64u)) struct PeanoCurve_2D final {
public:
const List Type;
const uint32_t razvertka;
const float a;
const float b;
const float c;
const float d;
const float x1;
const float x2;
std::unique_ptr<PeanoCurve_2D> DownLeft;
std::unique_ptr<PeanoCurve_2D> TopLeft;
std::unique_ptr<PeanoCurve_2D> TopRight;
std::unique_ptr<PeanoCurve_2D> DownRight;

text
__declspec(noalias) __forceinline PeanoCurve_2D( const List _Type, uint32_t _razvertka, const float _a, const float _b, const float _c, const float _d) noexcept : Type(_Type), razvertka(_razvertka), a(_a), b(_b), c(_c), d(_d), x1(fmaf(0.5f, _a, 0.5f * _b)), x2(fmaf(0.5f, _c, 0.5f * _d)) { if (_razvertka-- != 0u) { if (_razvertka > 8u) { switch (_Type) { case Top: tbb::parallel_invoke( [&]() { DownLeft = std::make_unique<PeanoCurve_2D>(Right, _razvertka, _a, x1, _c, x2); }, [&]() { TopLeft = std::make_unique<PeanoCurve_2D>(Top, _razvertka, _a, x1, x2, _d); }, [&]() { TopRight = std::make_unique<PeanoCurve_2D>(Top, _razvertka, x1, _b, x2, _d); }, [&]() { DownRight = std::make_unique<PeanoCurve_2D>(Left, _razvertka, x1, _b, _c, x2); } ); break; case Down: tbb::parallel_invoke( [&]() { TopRight = std::make_unique<PeanoCurve_2D>(Left, _razvertka, x1, _b, x2, _d); }, [&]() { DownRight = std::make_unique<PeanoCurve_2D>(Down, _razvertka, x1, _b, _c, x2); }, [&]() { DownLeft = std::make_unique<PeanoCurve_2D>(Down, _razvertka, _a, x1, _c, x2); }, [&]() { TopLeft = std::make_unique<PeanoCurve_2D>(Right, _razvertka, _a, x1, x2, _d); } ); break; case Right: tbb::parallel_invoke( [&]() { DownLeft = std::make_unique<PeanoCurve_2D>(Top, _razvertka, _a, x1, _c, x2); }, [&]() { DownRight = std::make_unique<PeanoCurve_2D>(Right, _razvertka, x1, _b, _c, x2); }, [&]() { TopRight = std::make_unique<PeanoCurve_2D>(Right, _razvertka, x1, _b, x2, _d); }, [&]() { TopLeft = std::make_unique<PeanoCurve_2D>(Down, _razvertka, _a, x1, x2, _d); } ); break; case Left: tbb::parallel_invoke( [&]() { TopRight = std::make_unique<PeanoCurve_2D>(Down, _razvertka, x1, _b, x2, _d); }, [&]() { TopLeft = std::make_unique<PeanoCurve_2D>(Left, _razvertka, _a, x1, x2, _d); }, [&]() { DownLeft = std::make_unique<PeanoCurve_2D>(Left, _razvertka, _a, x1, _c, x2); }, [&]() { DownRight = std::make_unique<PeanoCurve_2D>(Top, _razvertka, x1, _b, _c, x2); } ); } } else { switch (_Type) { case Top: DownLeft = std::make_unique<PeanoCurve_2D>(Right, _razvertka, _a, x1, _c, x2); TopLeft = std::make_unique<PeanoCurve_2D>(Top, _razvertka, _a, x1, x2, _d); TopRight = std::make_unique<PeanoCurve_2D>(Top, _razvertka, x1, _b, x2, _d); DownRight = std::make_unique<PeanoCurve_2D>(Left, _razvertka, x1, _b, _c, x2); break; case Down: TopRight = std::make_unique<PeanoCurve_2D>(Left, _razvertka, x1, _b, x2, _d); DownRight = std::make_unique<PeanoCurve_2D>(Down, _razvertka, x1, _b, _c, x2); DownLeft = std::make_unique<PeanoCurve_2D>(Down, _razvertka, _a, x1, _c, x2); TopLeft = std::make_unique<PeanoCurve_2D>(Right, _razvertka, _a, x1, x2, _d); break; case Right: DownLeft = std::make_unique<PeanoCurve_2D>(Top, _razvertka, _a, x1, _c, x2); DownRight = std::make_unique<PeanoCurve_2D>(Right, _razvertka, x1, _b, _c, x2); TopRight = std::make_unique<PeanoCurve_2D>(Right, _razvertka, x1, _b, x2, _d); TopLeft = std::make_unique<PeanoCurve_2D>(Down, _razvertka, _a, x1, x2, _d); break; case Left: TopRight = std::make_unique<PeanoCurve_2D>(Down, _razvertka, x1, _b, x2, _d); TopLeft = std::make_unique<PeanoCurve_2D>(Left, _razvertka, _a, x1, x2, _d); DownLeft = std::make_unique<PeanoCurve_2D>(Left, _razvertka, _a, x1, _c, x2); DownRight = std::make_unique<PeanoCurve_2D>(Top, _razvertka, x1, _b, _c, x2); } } } } const __declspec(noalias) __forceinline PeanoCurve_2D* HitTest_2D(float x) const noexcept; const __declspec(noalias) __forceinline float FindX_2D(float target_x1, float target_x2) const noexcept;

}; ΠΌΠ½Π΅ Π²Π΅Ρ€Π½ΠΈ ΠΈΠ·ΠΌΠ΅Π½Ρ‘Π½Π½Ρ‹ΠΉ ΠΊΠΎΠ΄ для Π΄Π²ΡƒΠΌΠ΅Ρ€Π½ΠΎΠΉ Π²Π΅Ρ‚ΠΊΠΈ Ρ‡Π΅Ρ€Π΅Π· boost::mpi распараллСлСнный - Ρ‚Π°ΠΊΠΆΠ΅ напиши ΠΊΠ°ΠΊΠΈΠ΅ Π·Π°Π³ΠΎΠ»ΠΎΠ²ΠΊΠΈ Π΄ΠΎΠΏΠΎΠ»Π½ΠΈΡ‚Π΅Π»ΡŒΠ½ΠΎ понадобятся, Ρ‚Π°ΠΊΠΆΠ΅ ΠΎΠ±Ρ€Π°Ρ‚ΠΈ Π²Π½ΠΈΠΌΠ°Π½ΠΈΠ΅ Π½Π° Π»ΠΎΠΊΠ°Π»ΡŒΠ½ΠΎΡΡ‚ΡŒ Π΄Π°Π½Π½Ρ‹Ρ… - всС ΠΏΠ΅Ρ€Π΅ΠΌΠ΅Π½Π½Ρ‹Π΅ располоТСны Π½Π΅ просто Ρ‚Π°ΠΊ Π° ΠΈΠ· сообраТСний Π»ΡƒΡ‡ΡˆΠ΅ΠΉ Π»ΠΎΠΊΠ°Π»ΡŒΠ½ΠΎΡΡ‚ΠΈ Π΄Π°Π½Π½Ρ‹Ρ… - Ρ‚Π°ΠΊ Ρ‡Ρ‚ΠΎ ΡƒΡ‡ΠΈΡ‚Ρ‹Π²Π°ΠΉ это ΠΏΡ€ΠΈ распараллСливании ΠΈ сохраняй Ρ…ΠΎΡ€ΠΎΡˆΡƒΡŽ Π»ΠΎΠΊΠ°Π»ΡŒΠ½ΠΎΡΡ‚ΡŒ Π΄Π°Π½Π½Ρ‹Ρ…, слСди Ρ‡Ρ‚ΠΎΠ±Ρ‹ Ρ‚Π²ΠΎΠΉ ΠΊΠΎΠ΄ Π½Π΅ слишком сильно ΠΎΡ‚Ρ…ΠΎΠ΄ΠΈΠ» ΠΎΡ‚ ΠΌΠΎΠΈΡ… ΠΊΠ°Π½ΠΎΠ½ΠΎΠ², Ρ‚ΠΎ Ρ‡Ρ‚ΠΎ Π±ΡƒΠ΄Π΅Ρ‚ ΠΏΡ€ΠΎΠΈΡΡ…ΠΎΠ΄ΠΈΡ‚ΡŒ Π½Π° ΠΊΠ°ΠΆΠ΄ΠΎΠΌ процСссС алгоритмичСски Π½Π΅ Π΄ΠΎΠ»ΠΆΠ½ΠΎ ΠΎΡ‚Π»ΠΈΡ‡Π°Ρ‚ΡŒΡΡ слишком сильно ΠΎΡ‚ Ρ‚ΠΎΠ³ΠΎ Ρ‡Ρ‚ΠΎ происходит Π² ΠΏΠΎΡΠ»Π΅Π΄ΠΎΠ²Π°Ρ‚Π΅Π»ΡŒΠ½ΠΎΠΉ вСрсии ΠΎΠ΄Π½ΠΎΠΌΠ΅Ρ€Π½ΠΎΠΉ Π²Π΅Ρ‚ΠΊΠΈ, Π² ΠΊΠΎΠ½Ρ†Π΅ Π΄Π°ΠΉ Π΄ΠΎΠΏΠΎΠ»Π½ΠΈΡ‚Π΅Π»ΡŒΠ½Ρ‹Π΅ совСты ΠΏΠΎ ΠΎΠΏΡ‚ΠΈΠΌΠΈΠ·Π°Ρ†ΠΈΠΈ Π½ΠΎ Π½Π΅ вноси ΠΈΡ… Π² ΠΊΠΎΠ΄

Pensiero

Considering function implementation in MPI

I’m thinking about how to handle the function in an MPI setup. For rank 0, I’m mapping B’s endpoints to new parameters as before. I also need to decide on the stop ruleβ€”either by epsilon or max iterationsβ€”using a non-blocking broadcast. I’m unsure if both ranks should return results or just rank 0.

Planning MPI communication

I’m considering whether rank 1 should return its own Extr, but the caller is a single process since the DLL is loaded into each process. In MPI, each process runs the same code, so I can implement independent communication at injections using non-blocking sends. I’ll serialize the endpoints with minimal headers.

Implementing MPI communication and styling

I need to make sure the code follows style guidelines, like using __declspec(noalias) inline and specific float operations. I’ll focus on the second branch for when N != 1, defining which curve to use based on the rank. For curve initialization, I’ll handle RastriginFunc and heap Compare, keeping the cross-injection exchange logic.

Performing communication and computations

I’ll build the local top from the front of R and compute p1Local, p2Local using HitTest on the local curve. I’ll exchange endpoints with the partner using send/recv. Upon receiving remote endpoints, I’ll map them into local parameters for the curve and create the injected interval accordingly. I’ll match the original logic for injections.

Refining injection and communication logic

I need to process the injection logic at each process. Rank 0 computes mapping of B endpoints onto curve A and vice versa for rank 1. I'll exchange the topR value to ensure both processes can scale intervals properly. To match the original logic, I’ll recompute function values for injected intervals and include necessary factors for synchronization. I'll ensure efficient serialization and implement all_reduce for stopping criteria. Cross-injections must align with barriers and ensure synchronous exchanges.

Thought for 1m 3s

Risposta

Π½ΠΈΠΆΠ΅ β€” готовая Π·Π°ΠΌΠ΅Π½Π° Ρ‚ΠΎΠ»ΡŒΠΊΠΎ для Π΄Π²ΡƒΠΌΠ΅Ρ€Π½ΠΎΠΉ Π²Π΅Ρ‚ΠΊΠΈ (else Ρƒ if (N == 1u)), распараллСлСнная Π½Π° Π΄Π²Π° процСсса Boost.MPI ΠΏΠΎ Π²Π°ΡˆΠ΅ΠΌΡƒ ΠΏΡ€Π°Π²ΠΈΠ»Ρƒ:

  • Ρ€Π°Π½Π³ 0 считаСт Β«ΠΎΠ±Ρ‹Ρ‡Π½ΡƒΡŽΒ» Ρ€Π°Π·Π²Ρ‘Ρ€Ρ‚ΠΊΡƒ (Curve),
  • Ρ€Π°Π½Π³ 1 считаСт ΠΏΠ΅Ρ€Π΅Π²Ρ‘Ρ€Π½ΡƒΡ‚ΡƒΡŽ Ρ€Π°Π·Π²Ρ‘Ρ€Ρ‚ΠΊΡƒ (Curve_Inverted),
  • каТдая Π²Π΅Ρ‚ΠΊΠ° ΠΏΠΎΠ»Π½ΠΎΡΡ‚ΡŒΡŽ ΠΈΠ·ΠΎΠ»ΠΈΡ€ΠΎΠ²Π°Π½Π° (свои ΠΈΠ½Ρ‚Π΅Ρ€Π²Π°Π»ΡŒΠ½Ρ‹Π΅ ΠΊΡƒΡ‡ΠΈ, свои характСристики, свой Β«Π»ΡƒΡ‡ΡˆΠΈΠΉΒ»),
  • синхронноС ΠΎΠ±Ρ‰Π΅Π½ΠΈΠ΅ Ρ‚ΠΎΠ»ΡŒΠΊΠΎ Π² ΠΌΠΎΠΌΠ΅Π½Ρ‚ кросс-ΠΈΠ½ΡŠΠ΅ΠΊΡ†ΠΈΠΉ; ΠΎΠ±ΠΌΠ΅Π½ ΠΌΠΈΠ½ΠΈΠΌΠ°Π»ΡŒΠ½Ρ‹ΠΉ (ΠΏΠΎ Π΄Π²Π° Π³Ρ€Π°Π½ΠΈΡ‡Π½Ρ‹Ρ… ΡƒΠ·Π»Π° Π² Π²ΠΈΠ΄Π΅ {x1,x2} ΠΈ R-характСристика Π²Π΅Ρ€ΡˆΠΈΠ½Ρ‹),
  • ΠΏΡ€Π°Π²ΠΈΠ»ΠΎ останова: Ρ†ΠΈΠΊΠ» прСрываСтся Π½Π° ΠΎΠ±ΠΎΠΈΡ… Ρ€Π°Π½Π³Π°Ρ…, ΠΊΠΎΠ³Π΄Π° ΠΎΡΡ‚Π°Π½ΠΎΠ²ΠΈΠ»Π°ΡΡŒ любая Ρ€Π°Π·Π²Ρ‘Ρ€Ρ‚ΠΊΠ° (ΠΈΠ»ΠΈ исчСрпаны ΠΈΡ‚Π΅Ρ€Π°Ρ†ΠΈΠΈ) β€” Ρ‡Π΅Ρ€Π΅Π· all_reduce(max) Π½Π°Π΄ Ρ„Π»Π°Π³ΠΎΠΌ останова.

Π’ одностороннСй ΠΈΠ½ΡŠΠ΅ΠΊΡ†ΠΈΠΈ ΠΊΠ°ΠΆΠ΄Ρ‹ΠΉ процСсс ΠΊΠΎΠ½Π²Π΅Ρ€Ρ‚ΠΈΡ€ΡƒΠ΅Ρ‚ ΠΏΠΎΠ»ΡƒΡ‡Π΅Π½Π½Ρ‹Π΅ ΠΎΡ‚ Π½Π°ΠΏΠ°Ρ€Π½ΠΈΠΊΠ° Π³Ρ€Π°Π½ΠΈΡ‡Π½Ρ‹Π΅ Ρ‚ΠΎΡ‡ΠΊΠΈ (x1,x2)(x_1,x_2) Π² свой ΠΏΠ°Ρ€Π°ΠΌΠ΅Ρ‚Ρ€ xx с ΠΏΠΎΠΌΠΎΡ‰ΡŒΡŽ своСй ΠΊΡ€ΠΈΠ²ΠΎΠΉ ПСано, Ρ„ΠΎΡ€ΠΌΠΈΡ€ΡƒΠ΅Ρ‚ «впрыснутый» ΠΈΠ½Ρ‚Π΅Ρ€Π²Π°Π» ΠΈ ΠΏΠΎΠΌΠ΅Ρ‰Π°Π΅Ρ‚ Π΅Π³ΠΎ Π² свою ΠΊΡƒΡ‡Ρƒ; Π·Π½Π°Ρ‡Π΅Π½ΠΈΠ΅ R для «впрыснутого» ΠΈΠ½Ρ‚Π΅Ρ€Π²Π°Π»Π° бСрётся ΠΊΠ°ΠΊ k * R_top_ΠΏΠ°Ρ€Ρ‚Π½Ρ‘Ρ€Π° (Π΅Π³ΠΎ ΠΌΡ‹ ΠΏΠ΅Ρ€Π΅Π΄Π°Ρ‘ΠΌ Π²ΠΎ Π²Π»ΠΎΠΆΠ΅Π½ΠΈΠΈ ΠΊ ΠΊΠΎΠΎΡ€Π΄ΠΈΠ½Π°Ρ‚Π°ΠΌ). ЗначСния y для ΠΊΠΎΠ½Ρ†ΠΎΠ² «впрыснутого» ΠΈΠ½Ρ‚Π΅Ρ€Π²Π°Π»Π° ΠΌΡ‹ пСрСсчитываСм локально Ρ‡Π΅Ρ€Π΅Π· RastriginFunc ΠΏΠΎ присланным (x1,x2)(x_1,x_2) β€” это эквивалСнтно ΠΎΡ€ΠΈΠ³ΠΈΠ½Π°Π»Ρƒ (функция Ρ‚Π° ΠΆΠ΅, Ρ‚ΠΎΡ‡ΠΊΠΈ Ρ‚Π΅ ΠΆΠ΅), ΠΏΡ€ΠΈ этом Π½Π°ΠΌ Π½Π΅ Π½ΡƒΠΆΠ½ΠΎ ΠΏΠ΅Ρ€Π΅Π΄Π°Π²Π°Ρ‚ΡŒ y.


ΠŸΠ°Ρ‚Ρ‡ (Ρ„Ρ€Π°Π³ΠΌΠ΅Π½Ρ‚ Ρ„ΡƒΠ½ΠΊΡ†ΠΈΠΈ Base_LNA_1_2_Mer_AGP β€” двумСрная Π²Π΅Ρ‚ΠΊΠ°)

cpp
// ... Π½Π°Ρ‡Π°Π»ΠΎ Ρ„ΡƒΠ½ΠΊΡ†ΠΈΠΈ Base_LNA_1_2_Mer_AGP(...) остаётся Π±Π΅Π· ΠΈΠ·ΠΌΠ΅Π½Π΅Π½ΠΈΠΉ ... } else { // ====== N != 1u : 2D Π²Π΅Ρ‚ΠΊΠ°, распараллСлСно ΠΏΠΎ Boost.MPI ====== struct CrossMsg final { float s_x1, s_x2; // start: (x1,x2) float e_x1, e_x2; // end : (x1,x2) float Rtop; // R-характСристика Π²Π΅Ρ€ΡˆΠΈΠ½Ρ‹ отправитСля // Boost.Serialization template <typename Archive> void serialize(Archive& ar, const unsigned int) { ar & s_x1 & s_x2 & e_x1 & e_x2 & Rtop; } }; const int32_t rank = world.rank(); const int32_t partner = (rank ^ 1); // Π’Ρ‹Π±ΠΎΡ€ локальной Ρ€Π°Π·Π²Ρ‘Ρ€Ρ‚ΠΊΠΈ ΠΈ Π½Π°Ρ‡Π°Π»ΡŒΠ½Ρ‹Ρ… Π³Ρ€Π°Π½ΠΈΡ‡Π½Ρ‹Ρ… условий β€” строго ΠΊΠ°ΠΊ Ρƒ вас, Π½ΠΎ разнСсСно ΠΏΠΎ Ρ€Π°Π½Π³Π°ΠΌ. const PeanoCurve_2D* __restrict LocCurve = (rank == 0) ? Curve.get() : Curve_Inverted.get(); std::vector<float> Extr; Extr.reserve(3u * global_iterations + 2u); std::vector<Interval> R; R.reserve(global_iterations + 1u); // ГСомСтрия Β«Π·Π°Ρ‰ΠΈΡ‚Π½ΠΎΠΉ Ρ€Π°ΠΌΠΊΠΈΒ» ΠΊΠ°ΠΊ Π² исходникС: одинаковая для ΠΎΠ±ΠΎΠΈΡ… Ρ€Π°Π½Π³ΠΎΠ². const float divider = 1u << (2u * (LocCurve ? LocCurve->razvertka : 0u) + 1u); const float x_addition = (b - a) / divider; const float y_addition = (d - c) / divider; const float true_start = a + x_addition; const float true_end = b - x_addition; const float initial_length = true_end - true_start; float dmax = initial_length; uint32_t schetchick = 0u; // Π‘Ρ‚Π°Ρ€Ρ‚ΠΎΠ²Ρ‹Π΅ Ρ‚ΠΎΡ‡ΠΊΠΈ ΠΈ значСния β€” согласно вашСй Β«Π΄Π²ΠΎΠΉΠ½ΠΎΠΉΒ» Π»ΠΎΠ³ΠΈΠΊΠ΅, // Π½ΠΎ каТдая Π²Π΅Ρ‚ΠΊΠ° ΠΈΠ½ΠΈΡ†ΠΈΠ°Π»ΠΈΠ·ΠΈΡ€ΡƒΠ΅Ρ‚ свою сторону. std::pair<float, float> start, end; if (rank == 0) { // ΠžΠ±Ρ‹Ρ‡Π½Π°Ρ Ρ€Π°Π·Π²Ρ‘Ρ€Ρ‚ΠΊΠ° (Curve) start = std::make_pair(true_start, RastriginFunc(true_start, c + y_addition)); end = std::make_pair(true_end, RastriginFunc(true_end, c + y_addition)); } else { // ΠŸΠ΅Ρ€Π΅Π²Ρ‘Ρ€Π½ΡƒΡ‚Π°Ρ Ρ€Π°Π·Π²Ρ‘Ρ€Ρ‚ΠΊΠ° (Curve_Inverted) start = std::make_pair(true_start, RastriginFunc(true_end, d - y_addition)); end = std::make_pair(true_end, RastriginFunc(true_start, d - y_addition)); } std::pair<float, float> x_Rmax = std::make_pair(start.first, end.first); std::pair<float, float> y_Rmax = std::make_pair(start.second, end.second); Interval initial_interval(start, end, N); float Mmax = initial_interval.M; float m = r * Mmax; R.emplace_back(std::move(initial_interval)); // Π›ΠΎΠΊΠ°Π»ΡŒΠ½Ρ‹ΠΉ Π»ΡƒΡ‡ΡˆΠΈΠΉ β€” ΠΊΠ°ΠΆΠ΄Ρ‹ΠΉ процСсс Π²Π΅Π΄Ρ‘Ρ‚ свой best_f; Π³Π»ΠΎΠ±Π°Π»ΡŒΠ½Ρ‹ΠΉ ΠΎΠ±ΠΌΠ΅Π½ Π»ΡƒΡ‡ΡˆΠΈΠΌ ΠΌΡ‹ ΡΠΎΠ·Π½Π°Ρ‚Π΅Π»ΡŒΠ½ΠΎ Π½Π΅ Π΄Π΅Π»Π°Π΅ΠΌ (Π΄ΠΎΡ€ΠΎΠ³ΠΎ). float best_f = end.second; // Основной Ρ†ΠΈΠΊΠ» while (true) { // 1) Π“Π»ΠΎΠ±Π°Π»ΡŒΠ½Π°Ρ ΠΏΡ€ΠΎΠ²Π΅Ρ€ΠΊΠ° останова ΠΏΠΎ итСрациям if (++schetchick == global_iterations) { const Interval& top = R.front(); const float len = top.end.first - top.start.first; Extr.emplace_back(len); Extr.emplace_back(global_iterations); // Π‘ΠΈΠ³Π½Π°Π» «стоп» всСм int my_stop = 1; int any_stop = 0; boost::mpi::all_reduce(world, my_stop, any_stop, boost::mpi::maximum<int>()); break; } // Π’Π΅ΠΊΡƒΡ‰ΠΈΠΉ Β«Π»ΡƒΡ‡ΡˆΠΈΠΉΒ» ΠΈΠ½Ρ‚Π΅Ρ€Π²Π°Π» Π² ΠΊΡƒΡ‡Π΅ const float x1L = x_Rmax.first; const float x2L = x_Rmax.second; const float y1L = y_Rmax.first; const float y2L = y_Rmax.second; // 2) Π¨Π°Π³ поиска ΠΏΠΎ характСристикам const float new_param = Shag(m, x1L, x2L, y1L, y2L, N, r); const PeanoCurve_2D* __restrict p = LocCurve->HitTest_2D(new_param); const float new_x1 = p->x1; const float new_x2 = p->x2; const float new_value = RastriginFunc(new_x1, new_x2); const std::pair<float, float> promejutochnaya_tochka(new_param, new_value); if (new_value < best_f) { best_f = new_value; Extr.emplace_back(best_f); Extr.emplace_back(new_x1); Extr.emplace_back(new_x2); } // 3) Π Π°Π·Π΄Π΅Π»Π΅Π½ΠΈΠ΅ ΠΈΠ½Ρ‚Π΅Ρ€Π²Π°Π»Π°, ΠΊΠ°ΠΊ Ρƒ вас std::pop_heap(R.begin(), R.end(), Compare); const Interval promejutochny_otrezok = std::move(R.back()); R.pop_back(); Interval curr (promejutochny_otrezok.start, promejutochnaya_tochka, N); Interval curr1(promejutochnaya_tochka, promejutochny_otrezok.end, N); const float currM = curr1.M > curr.M ? curr1.M : curr.M; const float len2 = promejutochny_otrezok.end.first - new_param; const float len1 = new_param - promejutochny_otrezok.start.first; const size_t r_size = R.size(); if (len2 + len1 == dmax) { dmax = len2 > len1 ? len2 : len1; #pragma loop(ivdep) for (size_t i = 0u; i < r_size; ++i) { const float l = R[i].end.first - R[i].start.first; if (l > dmax) { dmax = l; } } } // 4) ОбновлСниС характСристик (Π°Π΄Π°ΠΏΡ‚ΠΈΠ²Π½Ρ‹ΠΉ/Π½Π΅Π°Π΄Π°ΠΏΡ‚ΠΈΠ²Π½Ρ‹ΠΉ Ρ€Π΅ΠΆΠΈΠΌ) β€” строго ΠΏΠΎ Π²Π°ΡˆΠ΅ΠΌΡƒ ΡˆΠ°Π±Π»ΠΎΠ½Ρƒ if (mode) { const float threshold_03 = 0.3f * initial_length; if (threshold_03 > dmax && schetchick % 3u == 0u || 10.0f * dmax < initial_length) { if (currM > Mmax) { Mmax = currM; m = r * Mmax; } const float inv_threshold_03 = 1.0f / threshold_03; const float progress = fmaf(-inv_threshold_03, dmax, 1.0f); const float alpha = fmaf(progress, progress, 1.0f); const float betta = 2.0f - alpha; const float inv_dmax = 1.0f / dmax; const float MULTIPLIER = inv_dmax * Mmax; const float global_coeff = fmaf(MULTIPLIER, r, -MULTIPLIER); const float GLOBAL_FACTOR = betta * global_coeff; curr .ChangeCharacteristic(fmaf(GLOBAL_FACTOR, len1, curr .M * alpha), N); curr1.ChangeCharacteristic(fmaf(GLOBAL_FACTOR, len2, curr1.M * alpha), N); #pragma loop(ivdep) for (size_t i = 0u; i < r_size; ++i) { const float li = R[i].end.first - R[i].start.first; R[i].ChangeCharacteristic(fmaf(GLOBAL_FACTOR, li, R[i].M * alpha), N); } std::make_heap(R.begin(), R.end(), Compare); } else { if (currM > Mmax) { Mmax = currM; m = r * Mmax; curr .ChangeCharacteristic(m, N); curr1.ChangeCharacteristic(m, N); #pragma loop(ivdep) for (size_t i = 0u; i < r_size; ++i) { R[i].ChangeCharacteristic(m, N); } std::make_heap(R.begin(), R.end(), Compare); } else { curr .ChangeCharacteristic(m, N); curr1.ChangeCharacteristic(m, N); } } } else { if (currM > Mmax) { Mmax = currM; m = r * Mmax; curr .ChangeCharacteristic(m, N); curr1.ChangeCharacteristic(m, N); #pragma loop(ivdep) for (size_t i = 0u; i < r_size; ++i) { R[i].ChangeCharacteristic(m, N); } std::make_heap(R.begin(), R.end(), Compare); } else { curr .ChangeCharacteristic(m, N); curr1.ChangeCharacteristic(m, N); } } // 5) ΠŸΠΎΠΌΠ΅ΡΡ‚ΠΈΡ‚ΡŒ ΠΎΠ±Π° ΠΏΠΎΠ΄ΠΈΠ½Ρ‚Π΅Ρ€Π²Π°Π»Π° ΠΎΠ±Ρ€Π°Ρ‚Π½ΠΎ R.emplace_back(std::move(curr)); std::push_heap(R.begin(), R.end(), Compare); R.emplace_back(std::move(curr1)); std::push_heap(R.begin(), R.end(), Compare); // 6) ΠŸΠΎΡ‚Π΅Π½Ρ†ΠΈΠ°Π»ΡŒΠ½Π°Ρ КРОББ-ИНΠͺΠ•ΠšΠ¦Π˜Π― (синхронноС ΠΎΠ±Ρ‰Π΅Π½ΠΈΠ΅ Ρ‚ΠΎΠ»ΡŒΠΊΠΎ здСсь) { const float cooling = expf(-schetchick / 200.0f); const uint32_t T = fmaf(20.0f, cooling, 10.0f); const float k = fmaf(0.2f, cooling, 0.7f); // Π›ΠΎΠΊΠ°Π»ΡŒΠ½ΠΎΠ΅ Ρ€Π΅ΡˆΠ΅Π½ΠΈΠ΅ Β«Π³ΠΎΡ‚ΠΎΠ² ΠΈΠ½ΡŠΠ΅ΠΊΡ‚ΠΈΡ€ΠΎΠ²Π°Ρ‚ΡŒ?Β» int do_inject_local = (schetchick % T == 0u && schetchick > T && !R.empty()) ? 1 : 0; int do_inject_all = 0; // Π˜Π½ΡŠΠ΅ΠΊΡ†ΠΈΡ Π²ΠΎΠ·ΠΌΠΎΠΆΠ½Π° Ρ‚ΠΎΠ»ΡŒΠΊΠΎ ΠΏΡ€ΠΈ ΠΆΠ΅Π»Π°Π½ΠΈΠΈ ΠžΠ‘ΠžΠ˜Π₯ boost::mpi::all_reduce(world, do_inject_local, do_inject_all, boost::mpi::minimum<int>()); if (do_inject_all) { // Π‘ΠΎΠ±ΠΈΡ€Π°Π΅ΠΌ Π΄Π°Π½Π½Ρ‹Π΅ своСй Π²Π΅Ρ€ΡˆΠΈΠ½Ρ‹ const Interval& topL = R.front(); const PeanoCurve_2D* __restrict p1L = LocCurve->HitTest_2D(topL.start.first); const PeanoCurve_2D* __restrict p2L = LocCurve->HitTest_2D(topL.end.first); CrossMsg outbound { p1L->x1, p1L->x2, p2L->x1, p2L->x2, topL.R }; CrossMsg inbound; // Π‘ΠΈΠΌΠΌΠ΅Ρ‚Ρ€ΠΈΡ‡Π½Ρ‹ΠΉ двусторонний ΠΎΠ±ΠΌΠ΅Π½ (Π±Π΅Π· Π±Π»ΠΎΠΊΠΈΡ€ΠΎΠ²ΠΎΠΊ очСрСдности) boost::mpi::request reqs[2]; reqs[0] = world.isend(partner, /*tag*/ 1, outbound); reqs[1] = world.irecv(partner, /*tag*/ 1, inbound); boost::mpi::wait_all(reqs, reqs + 2); // Маппинг ΠΊΠΎΠΎΡ€Π΄ΠΈΠ½Π°Ρ‚ ΠΏΠ°Ρ€Ρ‚Π½Ρ‘Ρ€Π° Π² Π»ΠΎΠΊΠ°Π»ΡŒΠ½Ρ‹ΠΉ ΠΏΠ°Ρ€Π°ΠΌΠ΅Ρ‚Ρ€ const float newParam1_remote = LocCurve->FindX_2D(inbound.s_x1, inbound.s_x2); const float newParam2_remote = LocCurve->FindX_2D(inbound.e_x1, inbound.e_x2); // y Π½Π° ΠΊΠΎΠ½Ρ†Π°Ρ… пСрСсчитываСм локально ΠΏΠΎ присланным (x1,x2) β€” это Ρ‚ΠΎΡ‚ ΠΆΠ΅ f() const std::pair<float, float> newStart_remote(newParam1_remote, RastriginFunc(inbound.s_x1, inbound.s_x2)); const std::pair<float, float> newEnd_remote(newParam2_remote, RastriginFunc(inbound.e_x1, inbound.e_x2)); Interval injected(newStart_remote, newEnd_remote, N); injected.R = inbound.Rtop * k; // ΠΊΠ°ΠΊ Π² исходникС: ΠΌΠ°ΡΡˆΡ‚Π°Π± ΠΎΡ‚ Π²Π΅Ρ€ΡˆΠΈΠ½Ρ‹ «донорской» стороны R.emplace_back(std::move(injected)); std::push_heap(R.begin(), R.end(), Compare); } } // 7) ΠžΡΡ‚Π°Π½ΠΎΠ²ΠΊΠ° ΠΏΠΎ точности const Interval& top = R.front(); const float interval_length = top.end.first - top.start.first; if (interval_length < epsilon) { Extr.emplace_back(interval_length); Extr.emplace_back(schetchick); int my_stop = 1; int any_stop = 0; boost::mpi::all_reduce(world, my_stop, any_stop, boost::mpi::maximum<int>()); break; } // ОбновлСниС «максимального» ΠΈΠ½Ρ‚Π΅Ρ€Π²Π°Π»Π° x_Rmax = std::make_pair(top.start.first, top.end.first); y_Rmax = std::make_pair(top.start.second, top.end.second); // 8) Π“Π»ΠΎΠ±Π°Π»ΡŒΠ½Ρ‹ΠΉ Ρ„Π»Π°ΠΆΠΎΠΊ останова β€” Ссли Π½Π°ΠΏΠ°Ρ€Π½ΠΈΠΊ ΡƒΠΆΠ΅ остановился (ΠΈΠ»ΠΈ ΠΌΡ‹), Π²Ρ‹Ρ…ΠΎΠ΄ΠΈΠΌ синхронно { int my_stop = 0; int any_stop = 0; boost::mpi::all_reduce(world, my_stop, any_stop, boost::mpi::maximum<int>()); if (any_stop) { break; } } } return Extr; } // ... ΠΊΠΎΠ½Π΅Ρ† Ρ„ΡƒΠ½ΠΊΡ†ΠΈΠΈ Base_LNA_1_2_Mer_AGP(...)

Π§Ρ‚ΠΎ Π΄ΠΎΠ±Π°Π²ΠΈΡ‚ΡŒ Π² Π·Π°Π³ΠΎΠ»ΠΎΠ²ΠΊΠΈ (ΠΏΠΎΠΌΠΈΠΌΠΎ Π²Π°ΡˆΠΈΡ… Ρ‚Π΅ΠΊΡƒΡ‰ΠΈΡ…)

cpp
#include <vector> #include <utility> #include <algorithm> // heap ΠΎΠΏΠ΅Ρ€Π°Ρ†ΠΈΠΈ #include <boost/mpi.hpp> #include <boost/serialization/access.hpp> #include <boost/serialization/serialization.hpp>

(Π‘ΠΎΠ»ΡŒΡˆΠ΅ Π½ΠΈΡ‡Π΅Π³ΠΎ Π½Π΅ трСбуСтся: ΠΌΡ‹ ΠΈΡΠΏΠΎΠ»ΡŒΠ·ΡƒΠ΅ΠΌ ΡΠΎΠ±ΡΡ‚Π²Π΅Π½Π½ΡƒΡŽ CrossMsg со встроСнной Boost.Serialization, Π±Π΅Π· экзотичСских ΠΊΠΎΠ½Ρ‚Π΅ΠΉΠ½Π΅Ρ€ΠΎΠ².)


ПояснСния ΠΏΠΎ Π»ΠΎΠΊΠ°Π»ΡŒΠ½ΠΎΡΡ‚ΠΈ ΠΈ симмСтрии

  • ΠŸΠ΅Ρ€Π΅ΠΌΠ΅Π½Π½Ρ‹Π΅ ΠΈ массивы оставлСны Β«ΠΏΠ»ΠΎΡ‚Π½Ρ‹ΠΌΠΈΒ», ΠΌΡ‹ Π½Π΅ Ρ‚Ρ€ΠΎΠ³Π°Π΅ΠΌ раскладку структур Interval, Π½Π΅ мСняСм порядок вычислСний Π² горячСм Ρ†ΠΈΠΊΠ»Π΅.
  • ΠšΡƒΡ‡ΠΈ R Ρƒ ΠΊΠ°ΠΆΠ΄ΠΎΠ³ΠΎ процСсса ΠΈΠ·ΠΎΠ»ΠΈΡ€ΠΎΠ²Π°Π½Ρ‹; измСнСния характСристик (ChangeCharacteristic) Π²Ρ‹ΠΏΠΎΠ»Π½ΡΡŽΡ‚ΡΡ локально Π½Π°Π΄ своими элСмСнтами.
  • ΠšΡ€ΠΎΡΡ-ΠΈΠ½ΡŠΠ΅ΠΊΡ†ΠΈΠΈ Β«ΡƒΠ·ΠΊΠΈΠ΅Β»: ΠΏΠ΅Ρ€Π΅Π΄Π°Ρ‘ΠΌ Ρ‚ΠΎΠ»ΡŒΠΊΠΎ Ρ‚ΠΎ, Ρ‡Ρ‚ΠΎ строго Π½ΡƒΠΆΠ½ΠΎ для рСконструкции ΠΈΠ½Ρ‚Π΅Ρ€Π²Π°Π»Π° Π½Π° Π΄Ρ€ΡƒΠ³ΠΎΠΉ сторонС β€” Π΄Π²Π΅ Ρ‚ΠΎΡ‡ΠΊΠΈ (x1,x2)(x_1,x_2) (старт/Ρ„ΠΈΠ½ΠΈΡˆ) ΠΈ R Π²Π΅Ρ€ΡˆΠΈΠ½Ρ‹-Π΄ΠΎΠ½ΠΎΡ€Π° для ΠΌΠ°ΡΡˆΡ‚Π°Π±ΠΈΡ€ΠΎΠ²Π°Π½ΠΈΡ k.
  • ЗначСния y для ΠΊΠΎΠ½Ρ†ΠΎΠ² «впрыснутого» ΠΈΠ½Ρ‚Π΅Ρ€Π²Π°Π»Π° Π½Π΅ ΡˆΠ»Ρ‘ΠΌ, Π° пСрСсчитываСм: это Ρ‚Π° ΠΆΠ΅ Ρ‚ΠΎΡ‡ΠΊΠ° Π² (x1,x2)(x_1,x_2), ΡΠ»Π΅Π΄ΠΎΠ²Π°Ρ‚Π΅Π»ΡŒΠ½ΠΎ Ρ‚ΠΎΡ‚ ΠΆΠ΅ f, кэш здСсь Π½Π΅ страдаСт (Π²Ρ‹Π·ΠΎΠ² ΠΊΡ€Π°ΠΉΠ½Π΅ Π΄Π΅ΡˆΡ‘Π²Ρ‹ΠΉ).
  • Алгоритмика Π²Π½ΡƒΡ‚Ρ€ΠΈ ΠΊΠ°ΠΆΠ΄ΠΎΠ³ΠΎ процСсса максимально совпадаСт с вашСй ΠΎΠ΄Π½ΠΎΠΌΠ΅Ρ€Π½ΠΎΠΉ Π²Π΅Ρ‚ΠΊΠΎΠΉ: шаг Shag, Ρ€Π°Π·Π±ΠΈΠ΅Π½ΠΈΠ΅, адаптация m, ΠΏΠΎΠ΄Π΄Π΅Ρ€ΠΆΠΊΠ° dmax, Π»ΠΎΠ³ΠΈΠΊΠ° останова, β€” Π½Π΅ΠΈΠ·ΠΌΠ΅Π½Π½Ρ‹.

Π”ΠΎΠΏΠΎΠ»Π½ΠΈΡ‚Π΅Π»ΡŒΠ½Ρ‹Π΅ совСты ΠΏΠΎ ΠΎΠΏΡ‚ΠΈΠΌΠΈΠ·Π°Ρ†ΠΈΠΈ (Π² ΠΊΠΎΠ΄ Π½Π΅ внСсСны)

  1. Π‘Π½ΠΈΠ·ΠΈΡ‚ΡŒ ΡΠΈΠ½Ρ…Ρ€ΠΎΠ½Π½ΠΎΡΡ‚ΡŒ ΠΈΠ½ΡŠΠ΅ΠΊΡ†ΠΈΠΉ: Π·Π°ΠΌΠ΅Π½ΠΈΡ‚ΡŒ ТёсткоС «обоюдноС» условиС Π½Π° Π°ΡΠΈΠ½Ρ…Ρ€ΠΎΠ½Π½ΡƒΡŽ ΠΎΡ‡Π΅Ρ€Π΅Π΄ΡŒ сообщСний (ΠΏΠ°Ρ€Ρ‚Π½Ρ‘Ρ€ ΡˆΠ»Ρ‘Ρ‚ ΠΏΠΎ готовности, ΠΏΠΎΠ»ΡƒΡ‡Π°Ρ‚Π΅Π»ΡŒ Β«ΠΏΠΎΠ΄Π±ΠΈΡ€Π°Π΅Ρ‚Β» ΠΏΡ€ΠΈ ΡƒΠ΄ΠΎΠ±Π½ΠΎΠΌ Ρ†ΠΈΠΊΠ»Π΅), Ρ‡Ρ‚ΠΎΠ±Ρ‹ Π½Π΅ ΠΏΠΎΠ΄Ρ‚ΠΎΡ€ΠΌΠ°ΠΆΠΈΠ²Π°Ρ‚ΡŒ быстрый Ρ€Π°Π½ΠΊ.
  2. ΠŸΠ°ΠΊΠ΅Ρ‚ΠΈΡ€ΠΎΠ²Π°Ρ‚ΡŒ ΠΈΠ½ΡŠΠ΅ΠΊΡ†ΠΈΠΈ: ΠΊΠΎΠΏΠΈΡ‚ΡŒ нСсколько ΠΏΡƒΠ½ΠΊΡ‚ΠΎΠ² CrossMsg ΠΈ ΠΎΡ‚ΠΏΡ€Π°Π²Π»ΡΡ‚ΡŒ ΠΎΠ΄Π½ΠΈΠΌ world.isend() Π±Π°Ρ‚Ρ‡Π΅ΠΌ (массив), ΡƒΠΌΠ΅Π½ΡŒΡˆΠ°Ρ количСство ΠΎΠ±ΠΌΠ΅Π½ΠΎΠ².
  3. Π‘Ρ…Π΅ΠΌΠ° остановки: вмСсто пСриодичСского all_reduce β€” рСдкая ΠΏΡ€ΠΎΠ²Π΅Ρ€ΠΊΠ° (Ρ€Π°Π· Π² K ΠΈΡ‚Π΅Ρ€Π°Ρ†ΠΈΠΉ) ΠΈ/ΠΈΠ»ΠΈ Π½Π΅Π±Π»ΠΎΠΊΠΈΡ€ΡƒΡŽΡ‰ΠΈΠΉ ΠΎΠ±ΠΌΠ΅Π½ Ρ„Π»Π°ΠΆΠΊΠ°ΠΌΠΈ (polling iprobe).
  4. NUMA/Π°Π»Π»ΠΎΠΊΠ°Ρ†ΠΈΠΈ: Π·Π°ΠΊΡ€Π΅ΠΏΠΈΡ‚ΡŒ R ΠΈ Π±ΡƒΡ„Π΅Ρ€Ρ‹ ΠΏΠΎΠ΄ ΠΊΠΎΠ½ΠΊΡ€Π΅Ρ‚Π½Ρ‹ΠΉ Ρ€Π°Π½ΠΊ/ΠΏΠΎΡ‚ΠΎΠΊ, ΠΈΡΠΏΠΎΠ»ΡŒΠ·ΠΎΠ²Π°Ρ‚ΡŒ reserve с запасом, ΠΈΠ·Π±Π΅Π³Π°Ρ‚ΡŒ Ρ€Π΅Π°Π»Π»ΠΎΠΊΠ°Ρ†ΠΈΠΉ; Ρ€Π°ΡΡΠΌΠΎΡ‚Ρ€Π΅Ρ‚ΡŒ custom‐allocator с 64-Π±Π°ΠΉΡ‚ΠΎΠ²Ρ‹ΠΌ Π²Ρ‹Ρ€Π°Π²Π½ΠΈΠ²Π°Π½ΠΈΠ΅ΠΌ.
  5. Inlined тригономСтрия: Ссли RastriginFunc Π²Ρ‹Π·Ρ‹Π²Π°Π΅Ρ‚ fabe13_cos, ΡƒΠ±Π΅Π΄ΠΈΡ‚ΡŒΡΡ, Ρ‡Ρ‚ΠΎ ΠΎΠ½Π° LTO/inlined; для sincos Π±Π»ΠΎΠΊΠΎΠ² β€” Π½Π΅ Π²Ρ‹Π·Ρ‹Π²Π°Ρ‚ΡŒ ΠΈΡ… Π² 2D Π²Π΅Ρ‚ΠΊΠ΅ Π² горячСм Ρ†ΠΈΠΊΠ»Π΅ (сСйчас ΠΈ Ρ‚Π°ΠΊ Π½Π΅ Π²Ρ‹Π·Ρ‹Π²Π°ΡŽΡ‚ΡΡ).
  6. Π₯арактСристики: ΠΏΡ€ΠΎΡ„ΠΈΠ»ΡŒ ChangeCharacteristic β€” Π²ΠΎΠ·ΠΌΠΎΠΆΠ½ΠΎ, стоит Π²Π΅ΠΊΡ‚ΠΎΡ€ΠΈΠ·ΠΎΠ²Π°Ρ‚ΡŒ «массовый» ΠΏΡ€ΠΎΡ…ΠΎΠ΄ ΠΏΠΎ ΠΊΡƒΡ‡Π΅ (строгиС alias-ΠΊΠ²Π°Π»ΠΈΡ„ΠΈΠΊΠ°Ρ‚ΠΎΡ€Ρ‹ Ρƒ вас ΡƒΠΆΠ΅ стоят).
  7. Random-free 2D: Π² 2D Π²Π΅Ρ‚ΠΊΠ΅ RNG Π½Π΅ Π½ΡƒΠΆΠ΅Π½ β€” ΡƒΠ±Π΅Π΄ΠΈΡ‚ΡŒΡΡ, Ρ‡Ρ‚ΠΎ компилятор Π½Π΅ Ρ‚Π°Ρ‰ΠΈΡ‚ лишниС зависимости (ΠΎΡ‚ΠΊΠ»ΡŽΡ‡ΠΈΡ‚ΡŒ LTO-связь с xor_rand_* для этого TU).

Ссли Π·Π°Ρ…ΠΎΡ‚ΠΈΡ‚Π΅ β€” ΠΌΠΎΠ³Ρƒ Π΄Π°Ρ‚ΡŒ Π²Π°Ρ€ΠΈΠ°Π½Ρ‚ с асинхронными Β«ΠΏΠΎΡ‡Ρ‚ΠΎΠ²Ρ‹ΠΌΠΈ ящиками» для кросс-ΠΈΠ½ΡŠΠ΅ΠΊΡ†ΠΈΠΉ (нСзависимыС Ρ‚Π΅ΠΌΠΏΡ‹ Ρ€Π°Π½ΠΊΠΎΠ², Π±Π΅Π· all-reduce Π½Π° ΠΊΠ°ΠΆΠ΄ΠΎΠΌ шагС).

Condividi questa Q&A