#pragma once #define WIN32_LEAN_AND_MEAN #include ...

作成日: 2025年10月27日

使用モデル GPT-5 Thinking by Chat01

質問

#pragma once
#define WIN32_LEAN_AND_MEAN
#include <Windows.h>
#include <stdint.h>

using namespace System;
using namespace System::Drawing;
using namespace System::Windows::Forms;
using namespace System::Collections::Generic;
using namespace System::Drawing::Drawing2D;

typedef void(__cdecl* P_MANIP)(
int nSegments,
float thetaMin, float thetaMax,
bool variableLengths,
float lenMin, float lenMax,
float deltaThetaMax,
float lambdaCurv, float lambdaAngle, float lambdaLength,
float targetX, float targetY,
int peanoLevels, int orientations, int maxIterPerBranch,
float r, bool adaptiveMode, float epsilon, unsigned int seed,
float** out_bestQ, size_t* out_bestQLen,
float* out_bestX, float* out_bestY, float* out_bestF
);
typedef void(__cdecl* P_FREE)(float*);

namespace TESTAGP {

text
public ref class MyForm : public Form { public: MyForm(HMODULE hLib) : hLib(hLib) { this->SetStyle(ControlStyles::AllPaintingInWmPaint | ControlStyles::UserPaint | ControlStyles::OptimizedDoubleBuffer, true); this->Text = L"AGP Manipulator 2D - Оптимизация положения захвата"; this->ClientSize = System::Drawing::Size(1000, 700); // Подписка на событие изменения размера this->Resize += gcnew EventHandler(this, &MyForm::OnResize); fManip = (P_MANIP)GetProcAddress(hLib, "AGP_Manip2D"); pFree = (P_FREE)GetProcAddress(hLib, "AGP_Free"); angles = gcnew List<float>(0.0f); lengths = gcnew List<float>(0.0f); InitUI(); ResetRandomConfig(); } protected: ~MyForm() {} private: // DLL HMODULE hLib; P_MANIP fManip; P_FREE pFree; // состояние int nSegments; bool variableLengths; List<float>^ angles; List<float>^ lengths; // элементы UI CheckBox^ cbVarLen; NumericUpDown^ nudFixedLength; NumericUpDown^ nudStretchFactor; NumericUpDown^ nudDeltaThetaMax; NumericUpDown^ nudTargetX; NumericUpDown^ nudTargetY; NumericUpDown^ nudLevels; NumericUpDown^ nudMaxIter; CheckBox^ cbAdaptive; NumericUpDown^ nudR; NumericUpDown^ nudEps; Button^ btnAdd; Button^ btnRem; Button^ btnOptimize; Label^ lblInfo; // RNG System::UInt32 rngState = 0xA5C39E0Du; // --- вспомогательные --- void InitUI() { int y = 10, w = 180, h = 24, pad = 8; int currentX = 10; Label^ L; // Первая строка элементов L = gcnew Label(); L->Text = L"Макс. угол между звеньями (рад)"; L->Left = currentX; L->Top = y; L->Width = w; this->Controls->Add(L); nudDeltaThetaMax = gcnew NumericUpDown(); nudDeltaThetaMax->Left = currentX; nudDeltaThetaMax->Top = y + h + 2; nudDeltaThetaMax->Width = w; nudDeltaThetaMax->DecimalPlaces = 3; nudDeltaThetaMax->Minimum = (Decimal)0.1; nudDeltaThetaMax->Maximum = (Decimal)3.14159; nudDeltaThetaMax->Value = (Decimal)1.57; this->Controls->Add(nudDeltaThetaMax); currentX += w + 20; L = gcnew Label(); L->Text = L"Базовая длина звена"; L->Left = currentX; L->Top = y; L->Width = w; this->Controls->Add(L); nudFixedLength = gcnew NumericUpDown(); nudFixedLength->Left = currentX; nudFixedLength->Top = y + h + 2; nudFixedLength->Width = w; nudFixedLength->DecimalPlaces = 2; nudFixedLength->Minimum = (Decimal)0.1; nudFixedLength->Maximum = (Decimal)2.0; nudFixedLength->Value = (Decimal)1.0; this->Controls->Add(nudFixedLength); currentX += w + 20; L = gcnew Label(); L->Text = L"Коэф. растяжения/сжатия"; L->Left = currentX; L->Top = y; L->Width = w; this->Controls->Add(L); nudStretchFactor = gcnew NumericUpDown(); nudStretchFactor->Left = currentX; nudStretchFactor->Top = y + h + 2; nudStretchFactor->Width = w; nudStretchFactor->DecimalPlaces = 2; nudStretchFactor->Minimum = (Decimal)1.0; nudStretchFactor->Maximum = (Decimal)2.0; nudStretchFactor->Value = (Decimal)1.5; this->Controls->Add(nudStretchFactor); currentX += w + 20; cbVarLen = gcnew CheckBox(); cbVarLen->Text = L"Переменные длины звеньев"; cbVarLen->Left = currentX; cbVarLen->Top = y + h + 2; cbVarLen->Width = w; cbVarLen->Checked = false; this->Controls->Add(cbVarLen); currentX += w + 20; // Вторая строка элементов y += h * 2 + pad + 10; currentX = 10; L = gcnew Label(); L->Text = L"Целевая позиция X"; L->Left = currentX; L->Top = y; L->Width = w; this->Controls->Add(L); nudTargetX = gcnew NumericUpDown(); nudTargetX->Left = currentX; nudTargetX->Top = y + h + 2; nudTargetX->Width = w; nudTargetX->DecimalPlaces = 2; nudTargetX->Minimum = (Decimal)-10.0; nudTargetX->Maximum = (Decimal)10.0; nudTargetX->Value = (Decimal)3.5; this->Controls->Add(nudTargetX); currentX += w + 20; L = gcnew Label(); L->Text = L"Целевая позиция Y"; L->Left = currentX; L->Top = y; L->Width = w; this->Controls->Add(L); nudTargetY = gcnew NumericUpDown(); nudTargetY->Left = currentX; nudTargetY->Top = y + h + 2; nudTargetY->Width = w; nudTargetY->DecimalPlaces = 2; nudTargetY->Minimum = (Decimal)-10.0; nudTargetY->Maximum = (Decimal)10.0; nudTargetY->Value = (Decimal)1.0; this->Controls->Add(nudTargetY); currentX += w + 20; L = gcnew Label(); L->Text = L"Глубина развертки Пеано-Гильберта"; L->Left = currentX; L->Top = y; L->Width = w; this->Controls->Add(L); nudLevels = gcnew NumericUpDown(); nudLevels->Left = currentX; nudLevels->Top = y + h + 2; nudLevels->Width = w; nudLevels->Minimum = 4; nudLevels->Maximum = 20; nudLevels->Value = 12; this->Controls->Add(nudLevels); currentX += w + 20; L = gcnew Label(); L->Text = L"Параметр надежности (r)"; L->Left = currentX; L->Top = y; L->Width = w; this->Controls->Add(L); nudR = gcnew NumericUpDown(); nudR->Left = currentX; nudR->Top = y + h + 2; nudR->Width = w; nudR->DecimalPlaces = 2; nudR->Minimum = (Decimal)1.0; nudR->Maximum = (Decimal)10.0; nudR->Value = (Decimal)2.5; this->Controls->Add(nudR); currentX += w + 20; cbAdaptive = gcnew CheckBox(); cbAdaptive->Text = L"Адаптивная схема"; cbAdaptive->Left = currentX; cbAdaptive->Top = y + h + 2; cbAdaptive->Width = w; cbAdaptive->Checked = true; this->Controls->Add(cbAdaptive); // Третья строка элементов y += h * 2 + pad + 10; currentX = 10; L = gcnew Label(); L->Text = L"Точность (epsilon)"; L->Left = currentX; L->Top = y; L->Width = w; this->Controls->Add(L); nudEps = gcnew NumericUpDown(); nudEps->Left = currentX; nudEps->Top = y + h + 2; nudEps->Width = w; nudEps->DecimalPlaces = 6; nudEps->Minimum = (Decimal)0.000001; nudEps->Maximum = (Decimal)0.1; nudEps->Value = (Decimal)0.0001; this->Controls->Add(nudEps); currentX += w + 20; L = gcnew Label(); L->Text = L"Макс. число итераций"; L->Left = currentX; L->Top = y; L->Width = w; this->Controls->Add(L); nudMaxIter = gcnew NumericUpDown(); nudMaxIter->Left = currentX; nudMaxIter->Top = y + h + 2; nudMaxIter->Width = w; nudMaxIter->Minimum = 10; nudMaxIter->Maximum = 50000; nudMaxIter->Value = 1000; this->Controls->Add(nudMaxIter); currentX += w + 20; btnAdd = gcnew Button(); btnAdd->Text = L"+ Звено"; btnAdd->Left = currentX; btnAdd->Top = y + h + 2; btnAdd->Width = 80; btnAdd->Click += gcnew EventHandler(this, &MyForm::OnAddClick); this->Controls->Add(btnAdd); currentX += 85; btnRem = gcnew Button(); btnRem->Text = L"- Звено"; btnRem->Left = currentX; btnRem->Top = y + h + 2; btnRem->Width = 80; btnRem->Click += gcnew EventHandler(this, &MyForm::OnRemClick); this->Controls->Add(btnRem); currentX += 85; btnOptimize = gcnew Button(); btnOptimize->Text = L"Оптимизировать"; btnOptimize->Left = currentX; btnOptimize->Top = y + h + 2; btnOptimize->Width = 120; btnOptimize->Click += gcnew EventHandler(this, &MyForm::OnOptimizeClick); this->Controls->Add(btnOptimize); currentX += 125; // Информационная панель lblInfo = gcnew Label(); lblInfo->Left = currentX; lblInfo->Top = y; lblInfo->Width = 250; lblInfo->Height = 60; lblInfo->BorderStyle = BorderStyle::FixedSingle; lblInfo->Text = L"Готов к работе"; this->Controls->Add(lblInfo); } void ResetRandomConfig() { nSegments = 1; angles->Clear(); lengths->Clear(); angles->Add(RandAngle()); lengths->Add((float)nudFixedLength->Value); variableLengths = false; this->Invalidate(); } float Rand01() { rngState ^= rngState << 13; rngState ^= rngState >> 17; rngState ^= rngState << 5; return (float)((double)(unsigned)rngState / 4294967296.0); } float RandAngle() { return (Rand01() * 6.28318530717958647692f) - 3.14159265358979323846f; } // Обработчик изменения размера формы System::Void OnResize(System::Object^ sender, System::EventArgs^ e) { this->Invalidate(); } // --- события --- System::Void OnAddClick(System::Object^ sender, System::EventArgs^ e) { ++nSegments; angles->Add(RandAngle()); lengths->Add((float)nudFixedLength->Value); this->Invalidate(); } System::Void OnRemClick(System::Object^ sender, System::EventArgs^ e) { if (nSegments > 1) { --nSegments; angles->RemoveAt(angles->Count - 1); lengths->RemoveAt(lengths->Count - 1); this->Invalidate(); } } System::Void OnOptimizeClick(System::Object^ sender, System::EventArgs^ e) { if (!fManip) return; // Фиксированные параметры float thMin = -3.14159f; // Полный диапазон углов float thMax = 3.14159f; // Полный диапазон углов float lamC = 1.0f; // Фиксированный штраф за кривизну float lamA = 0.0f; // Отключен штраф за углы float lamL = 0.0f; // Отключен штраф за длины int orientations = 4; // Фиксированное количество ориентаций // Параметры из интерфейса variableLengths = cbVarLen->Checked; float fixedLength = (float)nudFixedLength->Value; float stretchFactor = (float)nudStretchFactor->Value; // Расчет минимальной и максимальной длины на основе коэффициента растяжения float Lmin = fixedLength / stretchFactor; float Lmax = fixedLength * stretchFactor; float dthMax = (float)nudDeltaThetaMax->Value; // Максимальный угол между звеньями float tx = (float)nudTargetX->Value; float ty = (float)nudTargetY->Value; int levels = (int)nudLevels->Value; int maxIter = (int)nudMaxIter->Value; bool adaptive = cbAdaptive->Checked; float r = (float)nudR->Value; float eps = (float)nudEps->Value; LARGE_INTEGER t0, t1, fq; QueryPerformanceCounter(&t0); float* bestQ = nullptr; size_t bestQLen = 0; float bestX = 0, bestY = 0, bestF = 0; fManip( nSegments, thMin, thMax, variableLengths, Lmin, Lmax, dthMax, lamC, lamA, lamL, tx, ty, levels, orientations, maxIter, r, adaptive, eps, (unsigned)GetTickCount(), &bestQ, &bestQLen, &bestX, &bestY, &bestF ); QueryPerformanceCounter(&t1); QueryPerformanceFrequency(&fq); double micros = 1e6 * (double)(t1.QuadPart - t0.QuadPart) / (double)fq.QuadPart; if (bestQ && bestQLen >= (size_t)nSegments) { angles->Clear(); for (int i = 0; i < nSegments; ++i) angles->Add(bestQ[i]); if (variableLengths) { lengths->Clear(); for (int i = 0; i < nSegments; ++i) lengths->Add(bestQ[nSegments + i]); } else { lengths->Clear(); for (int i = 0; i < nSegments; ++i) lengths->Add(fixedLength); } pFree(bestQ); } lblInfo->Text = String::Format(L"Результат оптимизации:\nЦелевая функция: {0:F5}\nКонечная точка: ({1:F3}, {2:F3})\nВремя: {3:F0} мкс", bestF, bestX, bestY, micros); this->Invalidate(); } protected: virtual void OnPaint(PaintEventArgs^ e) override { Form::OnPaint(e); Graphics^ g = e->Graphics; g->SmoothingMode = System::Drawing::Drawing2D::SmoothingMode::HighQuality; // Очистка всей клиентской области g->Clear(this->BackColor); // Область для рисования манипулятора (сдвинута вниз на 150 пикселей) int topOffset = 150; System::Drawing::Rectangle drawArea = System::Drawing::Rectangle( 0, topOffset, this->ClientSize.Width, this->ClientSize.Height - topOffset); // Задний фон всей области рисования (от левого до правого края) g->FillRectangle(Brushes::White, drawArea); // Левая стена со штриховкой int leftWallX = drawArea.Left + this->ClientSize.Width * 25 / 100; Pen^ wallPen = gcnew Pen(Color::Black, 2); g->DrawLine(wallPen, leftWallX, drawArea.Top, leftWallX, drawArea.Bottom); // Штриховка слева от стены - ограниченная область HatchBrush^ hatchBrush = gcnew HatchBrush( HatchStyle::BackwardDiagonal, Color::LightGray, Color::White ); int leftHatchWidth = 100; g->FillRectangle(hatchBrush, leftWallX - leftHatchWidth, drawArea.Top, leftHatchWidth, drawArea.Height); // Вычисляем позицию целевой точки в пикселях с увеличенным масштабом float targetX = (float)nudTargetX->Value; float targetY = (float)nudTargetY->Value; float scale = 160.0f; int baseX = leftWallX; int baseY = drawArea.Top + drawArea.Height / 2; float pixelTargetX = baseX + targetX * scale; float pixelTargetY = baseY - targetY * scale; // Правая стена (пунктир) - всегда примыкает к целевой окружности int rightWallX = (int)(pixelTargetX + 8); // Ограничиваем правую стену, чтобы не выходила за границы области рисования rightWallX = Math::Min(rightWallX, drawArea.Right - 10); Pen^ dashedPen = gcnew Pen(Color::Black, 2); dashedPen->DashStyle = DashStyle::Dash; g->DrawLine(dashedPen, rightWallX, drawArea.Top, rightWallX, drawArea.Bottom); // Штриховка справа от правой стены - ограниченная область такой же ширины, как слева int rightHatchWidth = leftHatchWidth; g->FillRectangle(hatchBrush, rightWallX, drawArea.Top, rightHatchWidth, drawArea.Height); // Целевая позиция (пунктирная окружность) Pen^ targetPen = gcnew Pen(Color::Green, 1.5f); targetPen->DashStyle = DashStyle::Dot; g->DrawEllipse(targetPen, pixelTargetX - 8.0f, pixelTargetY - 8.0f, 16.0f, 16.0f); // Манипулятор cli::array<PointF>^ pts = gcnew cli::array<PointF>(nSegments + 1); pts[0] = PointF((float)baseX, (float)baseY); float x = 0.0f, y = 0.0f, phi = 0.0f; for (int i = 0; i < nSegments; ++i) { array<float>^ anglesArray = angles->ToArray(); array<float>^ lengthsArray = lengths->ToArray(); float theta = anglesArray[i]; float L = lengthsArray[i]; // Используем текущую длину (фиксированную или оптимизированную) phi += theta; x += L * (float)Math::Cos((double)phi); y += L * (float)Math::Sin((double)phi); pts[i + 1] = PointF(baseX + x * scale, baseY - y * scale); } // Стержни Pen^ penRod = gcnew Pen(Color::Red, 6.0f); for (int i = 0; i < nSegments; ++i) g->DrawLine(penRod, pts[i], pts[i + 1]); // Шарниры SolidBrush^ brJoint = gcnew SolidBrush(Color::Blue); const float R = 6.0f; for (int i = 0; i <= nSegments; ++i) g->FillEllipse(brJoint, pts[i].X - R, pts[i].Y - R, 2 * R, 2 * R); // Освобождение ресурсов delete wallPen; delete dashedPen; delete targetPen; delete penRod; delete brJoint; delete hatchBrush; } };

} // namespace TESTAGP - код в MyForm.h - сейчас, #include "MyForm.h"

using namespace System;
using namespace System::Windows::Forms;

[STAThread]
int main() {
HMODULE h = LoadLibraryW(L"TEST_FUNC.dll"); // имя вашей DLL
if (!h) {
MessageBox::Show(L"Не удалось загрузить TEST_FUNC.dll", L"Ошибка", MessageBoxButtons::OK, MessageBoxIcon::Error);
return 1;
}
Application::EnableVisualStyles();
Application::SetCompatibleTextRenderingDefault(false);
Application::Run(gcnew TESTAGP::MyForm(h));
return 0;
} - код MyForm.cpp сейчас, /другой бэкенд код/ ... extern "C" __declspec(dllexport) __declspec(noalias)
void AGP_2D(
const float N, const float global_iterations,
const float a, const float b, const float c, const float d, const float r,
const bool mode, const float epsilon, const float seed,
float** const __restrict out_data, size_t* const __restrict out_len) noexcept
{
Slab* const __restrict slab = tls.local();
slab->current = slab->base;

text
int schetchick = 0; int no_improve = 0; const int rank = g_world->rank(); const int world_size = g_world->size(); while (g_world->iprobe(boost::mpi::any_source, 0)) { CtrlMsg dummy; g_world->recv(boost::mpi::any_source, 0, dummy); } const float inv_divider = ldexpf(1.0f, -((gActiveMap.levels << 1) + 1)); const float x_addition = (b - a) * inv_divider; const float y_addition = (d - c) * inv_divider; const float true_start = a + x_addition; const float true_end = b - x_addition; float x_Rmax_1 = true_start; float x_Rmax_2 = true_end; const float initial_length = x_Rmax_2 - x_Rmax_1; float dmax = initial_length; const float threshold_03 = 0.3f * initial_length; const float inv_threshold_03 = 1.0f / threshold_03; const float start_val = rank % 3 ? RastriginFunc(true_end, d - y_addition) : RastriginFunc(true_start, c + y_addition); float best_f = rank % 2 ? RastriginFunc(true_start, d - y_addition) : RastriginFunc(true_end, c + y_addition); float y_Rmax_1 = start_val; float y_Rmax_2 = best_f; std::vector<float, boost::alignment::aligned_allocator<float, 16u>> Extr; std::vector<Interval* __restrict, boost::alignment::aligned_allocator<Interval* __restrict, 64u>> R; Extr.clear(); Extr.reserve(static_cast<size_t>(global_iterations) << 2u); R.clear(); R.reserve(static_cast<size_t>(global_iterations) << 1u); R.emplace_back(new Interval(true_start, true_end, start_val, best_f, 2.0f)); const Interval* __restrict top_ptr; float Mmax = R.front()->M; float m = r * Mmax; while (true) { const float interval_len = x_Rmax_2 - x_Rmax_1; const bool stagnation = no_improve > 100 && schetchick > 270; const float p = fmaf(-1.0f / initial_length, dmax, 1.0f); while (g_world->iprobe(boost::mpi::any_source, 0)) { CtrlMsg in; g_world->recv(boost::mpi::any_source, 0, in); if (in.kind) { if (!rank) { Extr.emplace_back(static_cast<float>(schetchick)); Extr.emplace_back(interval_len); *out_len = Extr.size(); *out_data = reinterpret_cast<float* __restrict>(CoTaskMemAlloc(sizeof(float) * (*out_len))); memcpy(*out_data, Extr.data(), sizeof(float) * (*out_len)); } return; } const float sx = FindX2D_analytic(in.xchg.s_x1, in.xchg.s_x2); const float ex = FindX2D_analytic(in.xchg.e_x1, in.xchg.e_x2); Interval* const __restrict injected = new Interval(sx, ex, RastriginFunc(in.xchg.s_x1, in.xchg.s_x2), RastriginFunc(in.xchg.e_x1, in.xchg.e_x2), 2.0f); injected->ChangeCharacteristic(m); if (injected->R > 1.15f * top_ptr->R) { const float k = stagnation ? fmaf((1.0f / expm1f(1.0f)) * 1.0f, expm1f(p), 0.3f) : fmaf((1.0f / expm1f(1.0f)) * 0.6f, expm1f(p), 0.6f); injected->R = in.xchg.Rtop * k; R.emplace_back(injected); std::push_heap(R.begin(), R.end(), ComparePtr); } } const int T = static_cast<int>(fmaf(-expm1f(p), 264.0f, 277.0f)); const bool want_term = interval_len < epsilon || schetchick == static_cast<int>(global_iterations); if (!(++schetchick % T) || stagnation || want_term) { CtrlMsg out; out.kind = want_term; if (!out.kind) { float s_x1, s_x2, e_x1, e_x2; HitTest2D_analytic(top_ptr->x1, s_x1, s_x2); HitTest2D_analytic(top_ptr->x2, e_x1, e_x2); out.xchg = CrossMsg{ s_x1, s_x2, e_x1, e_x2, top_ptr->R }; } int i = 0; while (i < world_size) { if (i != rank) g_world->isend(i, 0, out); ++i; } if (out.kind) { if (!rank) { Extr.emplace_back(static_cast<float>(schetchick)); Extr.emplace_back(interval_len); *out_len = Extr.size(); *out_data = reinterpret_cast<float* __restrict>(CoTaskMemAlloc(sizeof(float) * (*out_len))); memcpy(*out_data, Extr.data(), sizeof(float) * (*out_len)); } return; } } const float new_point = Shag(m, x_Rmax_1, x_Rmax_2, y_Rmax_1, y_Rmax_2, 2.0f, r); float new_x1_val, new_x2_val; HitTest2D_analytic(new_point, new_x1_val, new_x2_val); const float new_value = RastriginFunc(new_x1_val, new_x2_val); if (new_value < best_f) { best_f = new_value; Extr.emplace_back(best_f); Extr.emplace_back(new_x1_val); Extr.emplace_back(new_x2_val); no_improve = 0; } else { ++no_improve; } std::pop_heap(R.begin(), R.end(), ComparePtr); Interval* const __restrict promejutochny_otrezok = R.back(); const float segment_x1 = promejutochny_otrezok->x1; const float segment_x2 = promejutochny_otrezok->x2; const float len2 = segment_x2 - new_point; const float len1 = new_point - segment_x1; Interval* const __restrict curr = new Interval(segment_x1, new_point, promejutochny_otrezok->y1, new_value, 2.0f); Interval* const __restrict curr1 = new Interval(new_point, segment_x2, new_value, promejutochny_otrezok->y2, 2.0f); const float currM = curr->M > curr1->M ? curr->M : curr1->M; const size_t r_size = R.size(); if (mode) { if (len2 + len1 == dmax) { dmax = len2 > len1 ? len2 : len1; size_t i = 0u;

#pragma loop(ivdep)
while (i < r_size) {
const float len_item = R[i]->x2 - R[i]->x1;
if (len_item > dmax) dmax = len_item;
++i;
}
}

text
if (threshold_03 > dmax && !(schetchick % 3) || 10.0f * dmax < initial_length) { if (currM > Mmax) { Mmax = currM; m = r * Mmax; } const float progress = fmaf(-inv_threshold_03, dmax, 1.0f); const float alpha = fmaf(progress, progress, 1.0f); const float betta = 2.0f - alpha; const float MULTIPLIER = (1.0f / dmax) * Mmax; const float global_coeff = fmaf(MULTIPLIER, r, -MULTIPLIER); const float GLOBAL_FACTOR = betta * global_coeff; curr->ChangeCharacteristic(fmaf(GLOBAL_FACTOR, len1, curr->M * alpha)); curr1->ChangeCharacteristic(fmaf(GLOBAL_FACTOR, len2, curr1->M * alpha)); if (r_size < 64u) { size_t i = 0u;

#pragma loop(ivdep)
while (i < r_size) {
R[i]->ChangeCharacteristic(fmaf(GLOBAL_FACTOR, R[i]->x2 - R[i]->x1, R[i]->M * alpha));
++i;
}
}
else {
RecomputeR_AffineM_AVX2(R.data(), r_size, GLOBAL_FACTOR, alpha);
}
std::make_heap(R.begin(), R.end(), ComparePtr);
}
else {
if (currM > Mmax) {
if (currM < 1.15f * Mmax) {
Mmax = currM;
m = r * Mmax;
curr->ChangeCharacteristic(m);
curr1->ChangeCharacteristic(m);
}
else {
Mmax = currM;
m = r * Mmax;
curr->ChangeCharacteristic(m);
curr1->ChangeCharacteristic(m);
if (r_size < 64u) {
size_t i = 0u;
#pragma loop(ivdep)
while (i < r_size) {
R[i]->ChangeCharacteristic(m);
++i;
}
}
else {
RecomputeR_ConstM_AVX2(R.data(), r_size, m);
}
std::make_heap(R.begin(), R.end(), ComparePtr);
}
}
else {
curr->ChangeCharacteristic(m);
curr1->ChangeCharacteristic(m);
}
}
}
else {
if (currM > Mmax) {
if (currM < 1.15f * Mmax) {
Mmax = currM;
m = r * Mmax;
curr->ChangeCharacteristic(m);
curr1->ChangeCharacteristic(m);
}
else {
Mmax = currM;
m = r * Mmax;
curr->ChangeCharacteristic(m);
curr1->ChangeCharacteristic(m);
if (r_size < 64u) {
size_t i = 0u;
#pragma loop(ivdep)
while (i < r_size) {
R[i]->ChangeCharacteristic(m);
++i;
}
}
else {
RecomputeR_ConstM_AVX2(R.data(), r_size, m);
}
std::make_heap(R.begin(), R.end(), ComparePtr);
}
}
else {
curr->ChangeCharacteristic(m);
curr1->ChangeCharacteristic(m);
}
}

text
R.back() = curr; std::push_heap(R.begin(), R.end(), ComparePtr); R.emplace_back(curr1); std::push_heap(R.begin(), R.end(), ComparePtr); top_ptr = R.front(); x_Rmax_1 = top_ptr->x1; x_Rmax_2 = top_ptr->x2; y_Rmax_1 = top_ptr->y1; y_Rmax_2 = top_ptr->y2; }

}

extern "C" __declspec(dllexport) __declspec(noalias) __forceinline void AgpWaitStartAndRun() noexcept
{
int dummy;
float* __restrict buf;
size_t len;
while (true) {
if (g_world->iprobe(0, 1)) {
g_world->recv(0, 1, dummy);
AGP_2D(2.0f, 10000.0f, -2.2f, 1.8f, -2.2f, 1.8f, 2.5f, false, 0.00001f, GetTickCount(), &buf, &len);
}
}
}

extern "C" __declspec(dllexport) __declspec(noalias) __forceinline void AgpStartWorkers() noexcept
{
int i = 1;
const int world_size = g_world->size();
while (i < world_size) {
g_world->isend(i, 1, 0);
++i;
}
}

extern "C" __declspec(dllexport) __declspec(noalias) __forceinline void AGP_Free(float* const __restrict p) noexcept
{
CoTaskMemFree(p);
}

//ВНИМАНИЕ СЕЙЧАС НАЧИНАЕТСЯ КУСОК КОТОРОГО НЕ БЫЛО
// ---------- Мортоновская (Z-order) развертка для nD ----------
// Аналитически быстрая: interleave битов индекса в координаты.
// Для разнообразия "ориентаций" используем перестановку осей и инверсии.
struct MortonND {
int dim; // D
int levels; // уровни разбиения (итоговая сетка 2^levels по каждой оси)
uint64_t scale; // 1 << (levels * dim)
std::vector<float> low, high; // коробка поиска
std::vector<int> perm; // перестановка осей
std::vector<uint64_t> invMask;// какие оси инвертировать (по ориентации)

text
MortonND(int D, int L, const float* lows, const float* highs, uint32_t orientationSeed) : dim(D), levels(L), scale( (uint64_t)1 << (L * D) ), low(lows, lows + D), high(highs, highs + D), perm(D), invMask(D, 0) { // Простая псевдослучайная перестановка осей for (int i = 0; i < D; ++i) perm[i] = i; // Fisher-Yates на основе xor-rand uint32_t st = orientationSeed ? orientationSeed : 0x9E3779B9u; for (int i = D-1; i > 0; --i) { float current_res; XOR_RAND(st, current_res); int j = (int)std::floor(current_res * (i+1)); std::swap(perm[i], perm[j]); } // Маски инверсий по осям (повороты/зеркала) for (int i = 0; i < D; ++i) { float current_res; XOR_RAND(st, current_res); invMask[i] = (current_res > 0.5f) ? ((uint64_t)1 << levels) - 1ull : 0ull; } } // Преобразование t in [0,1] -> точка в D-мерной коробке (центр ячейки, чтобы повторить идею «середин») inline void map01ToPoint(float t, float* __restrict out) const noexcept { // Квантуем t в индекс ячейки: if (t < 0.0f) t = 0.0f; else if (t >= 1.0f) t = std::nextafter(1.0f, 0.0f); uint64_t idx = (uint64_t)(t * (double)scale); if (idx >= scale) idx = scale - 1; // Достанем по уровням биты и соберём координатные индексы: std::vector<uint64_t> coordIdx(dim, 0ull); for (int bit = 0; bit < levels; ++bit) { for (int d = 0; d < dim; ++d) { int pd = perm[d]; // перестановка uint64_t bitmask = (idx >> (bit*dim + d)) & 1ull; coordIdx[pd] |= (bitmask << bit); } } // Инвертирование по некоторым осям (зеркалирование): for (int d = 0; d < dim; ++d) { if (invMask[d]) { coordIdx[d] ^= invMask[d]; } } const float invCells = 1.0f / (float)( (uint64_t)1 << levels ); for (int d = 0; d < dim; ++d) { float cellStart = low[d] + (high[d] - low[d]) * ( (float)coordIdx[d] * invCells ); float cellSize = (high[d] - low[d]) * invCells; out[d] = cellStart + 0.5f * cellSize; // центр ячейки } }

};

#define M_PI 3.141592653589793238462643383279502884L

// Обработка углов в (-pi, pi]
static inline float wrap_pi(float a) noexcept {
a = std::fmod(a + (float)M_PI, 2.0f * (float)M_PI);
if (a < 0) a += 2.0f * (float)M_PI;
return a - (float)M_PI;
}

// Кинематика и целевая функция
struct ManipCost {
int n; // звеньев
bool variableLen;
float targetX, targetY;

text
// Общие (глобальные) диапазоны: θ in [thMin, thMax], L in [LMin, LMax] float thMin, thMax; float LMin, LMax; // Штрафы float dThetaMax; // порог излома float lambdaCurv; float lambdaAng; float lambdaLen; // Вычисление стоимости для q = [θ₁..θₙ, (возможно L₁..Lₙ)] inline float operator()(const float* q) const noexcept { const float* th = q; const float* L = variableLen ? (q + n) : nullptr; // 1) расстояние до цели float x = 0.0f, y = 0.0f, phi = 0.0f; float penAngle = 0.0f, penLen = 0.0f, penCurv = 0.0f; float Lconst = 1.0f; for (int i=0; i<n; ++i) { const float theta = th[i]; const float Li = variableLen ? std::max(LMin, std::min(LMax, L[i])) : Lconst; phi += theta; x += Li * std::cos(phi); y += Li * std::sin(phi); // штрафы за угол if (theta < thMin) penAngle += (thMin - theta); if (theta > thMax) penAngle += (theta - thMax); // штраф за длину (если переменная) if (variableLen) { if (L[i] < LMin) penLen += (LMin - L[i]); if (L[i] > LMax) penLen += (L[i] - LMax); } } // 2) штраф за «кривизну» (изломы) for (int i=0; i<n-1; ++i) { float dth = wrap_pi(th[i+1] - th[i]); // излом между соседними float viol = std::fabs(dth) - dThetaMax; if (viol > 0.0f) penCurv += viol; // линейный штраф } const float dx = (x - targetX); const float dy = (y - targetY); const float dist = std::sqrt(dx*dx + dy*dy); return dist + lambdaAng*penAngle + lambdaLen*penLen + lambdaCurv*penCurv; }

};

// Одна «ветка» AGP по конкретной ориентации MortonND
static void agp_run_branch(
const MortonND& map,
const ManipCost& cost,
int maxIter,
float r,
bool adaptiveMode,
float eps,
uint32_t seed,
// out:
std::vector<float>& bestQ, float& bestF, float& bestX, float& bestY
){
const int dim = cost.n + (cost.variableLen ? cost.n : 0);

text
auto evalAt = [&](float t)->float { std::vector<float> q(dim); map.map01ToPoint(t, q.data()); // Преобразуем из [low,high] уже учтённых в map — там всё учтено float f = cost(q.data()); // Для обратного восстановления EE(x,y) (необязательно, но полезно вернуть) float x=0,y=0,phi=0; const float* th = q.data(); const float* L = cost.variableLen ? (q.data()+cost.n) : nullptr; float Lconst = 1.0f; for (int i=0;i<cost.n;++i){ phi += th[i]; float Li = cost.variableLen ? L[i] : Lconst; x += Li * std::cos(phi); y += Li * std::sin(phi); } // Если это пока лучший — сохраним всё if (f < bestF) { bestF = f; bestQ = q; bestX = x; bestY = y; } return f; }; // Стартовые концы [0,1] float a = 0.0f, b = 1.0f; float f_a = evalAt(a); float f_b = evalAt(b); // Куча интервалов по вашей логике std::vector<Interval*> H; H.reserve( (size_t)maxIter ); Interval* root = new Interval(a, b, f_a, f_b, (float)dim); float Mmax = root->M; float m = r * Mmax; root->ChangeCharacteristic(m); H.push_back(root); std::make_heap(H.begin(), H.end(), ComparePtr); float dmax = b - a; const float initial_len = dmax; const float thr03 = 0.3f * initial_len; const float inv_thr03 = 1.0f / thr03; int it = 0; while (it < maxIter) { // Берём лучший интервал std::pop_heap(H.begin(), H.end(), ComparePtr); Interval* cur = H.back(); H.pop_back(); const float x1 = cur->x1, x2 = cur->x2; const float y1 = cur->y1, y2 = cur->y2; // Шаг float tNew = Shag(m, x1, x2, y1, y2, (float)dim, r); tNew = std::max(a, std::min(b, tNew)); float fNew = evalAt(tNew); // Разделяем Interval* left = new Interval(x1, tNew, y1, fNew, (float)dim); Interval* right = new Interval(tNew, x2, fNew, y2, (float)dim); float Mloc = std::max(left->M, right->M); // Адаптация m (упрощённая версия вашей логики) if (adaptiveMode) { float len1 = tNew - x1; float len2 = x2 - tNew; if (len1 + len2 == dmax) { dmax = std::max(len1, len2); for (auto p: H) dmax = std::max(dmax, p->x2 - p->x1); } if ( (thr03 > dmax && !(it % 3)) || (10.0f * dmax < initial_len) ) { if (Mloc > Mmax) { Mmax = Mloc; m = r * Mmax; } float progress = 1.0f - (dmax * inv_thr03); float alpha = progress*progress; float beta = 2.0f - alpha; float MULT = (Mmax / dmax); float global_coeff = MULT * (r - 1.0f); float GF = beta * global_coeff; left->ChangeCharacteristic( GF*len1 + left->M*alpha ); right->ChangeCharacteristic( GF*len2 + right->M*alpha ); for (auto p: H) { float len = p->x2 - p->x1; p->ChangeCharacteristic( GF*len + p->M*alpha ); } } else { if (Mloc > Mmax) { if (Mloc < 1.15f*Mmax) { Mmax = Mloc; m = r*Mmax; } else { Mmax = Mloc; m = r*Mmax; for (auto p: H) p->ChangeCharacteristic(m); } } left->ChangeCharacteristic(m); right->ChangeCharacteristic(m); } } else { if (Mloc > Mmax) { if (Mloc < 1.15f*Mmax) { Mmax = Mloc; m = r*Mmax; } else { Mmax = Mloc; m = r*Mmax; for (auto p: H) p->ChangeCharacteristic(m); } } left->ChangeCharacteristic(m); right->ChangeCharacteristic(m); } H.push_back(left); std::push_heap(H.begin(), H.end(), ComparePtr); H.push_back(right); std::push_heap(H.begin(), H.end(), ComparePtr); // Критерий останова float currLen = std::max(left->x2 - left->x1, right->x2 - right->x1); if (currLen < eps) break; ++it; } // Освобождение for (auto p: H) delete p;

}

// ---------- Экспорт: запуск оптимизации манипулятора ----------
extern "C" __declspec(dllexport) __declspec(noalias) void AGP_Manip2D(
int nSegments,
// общие диапазоны углов и длин (для простоты одинаковые для всех звеньев)
float thetaMin, float thetaMax,
bool variableLengths,
float lenMin, float lenMax,
// штрафные параметры
float deltaThetaMax,
float lambdaCurv, float lambdaAngle, float lambdaLength,
// целевая точка
float targetX, float targetY,
// параметры развертки/поиска
int peanoLevels, // глубина (≥ 8 обычно достаточно; 10–14 для тонкого поиска)
int orientations, // сколько «ориентаций» обойти параллельно (1..число ядер)
int maxIterPerBranch, // лимит итераций на ветку
float r, // как в вашем коде, например 2.5
bool adaptiveMode,
float epsilon,
unsigned int seed,
// out:
float** out_bestQ, size_t* out_bestQLen,
float* out_bestX, float* out_bestY, float* out_bestF
){
const int dim = nSegments + (variableLengths ? nSegments : 0);

text
// Коробка поиска [low, high] в каждой координате q: std::vector<float> low(dim), high(dim); for (int i=0; i<nSegments; ++i) { low[i] = thetaMin; high[i] = thetaMax; } if (variableLengths) { for (int i=0; i<nSegments; ++i) { low[nSegments+i] = lenMin; high[nSegments+i] = lenMax; } } ManipCost cost { nSegments, variableLengths, targetX, targetY, thetaMin, thetaMax, lenMin, lenMax, deltaThetaMax, lambdaCurv, lambdaAngle, lambdaLength }; // Параллельно обходим несколько ориентаций Morton-развёртки и выбираем лучшую int branches = std::max(1, orientations); std::atomic<float> globalBest(FLT_MAX); std::mutex bestGuard; std::vector<float> bestQ(dim, 0.0f); float bestFVal = FLT_MAX, bestXVal=0.0f, bestYVal=0.0f; auto worker = [&](uint32_t seedOrient){ MortonND map(dim, peanoLevels, low.data(), high.data(), seedOrient); std::vector<float> locBestQ; float locBestF = FLT_MAX, locBestX=0.0f, locBestY=0.0f; agp_run_branch(map, cost, maxIterPerBranch, r, adaptiveMode, epsilon, seed+seedOrient, locBestQ, locBestF, locBestX, locBestY); float prev = globalBest.load(); while (locBestF < prev && !globalBest.compare_exchange_weak(prev, locBestF)) {} if (locBestF <= globalBest.load()) { std::lock_guard<std::mutex> lk(bestGuard); if (locBestF < bestFVal) { bestFVal = locBestF; bestXVal = locBestX; bestYVal = locBestY; bestQ = locBestQ; } } }; std::vector<std::thread> threads; threads.reserve(branches); uint32_t orientSeed = seed ? seed : 0xA53C9E0Du; for (int i=0;i<branches;++i) { threads.emplace_back(worker, orientSeed + i*0x9E3779B9u); } for (auto& t: threads) t.join(); // Возврат пользователю *out_bestQLen = (size_t)dim; *out_bestQ = (float*)CoTaskMemAlloc(sizeof(float)*(*out_bestQLen)); std::memcpy(*out_bestQ, bestQ.data(), sizeof(float)*(*out_bestQLen)); *out_bestX = bestXVal; *out_bestY = bestYVal; *out_bestF = bestFVal;

} - код основного .cpp файла dll сейчас, #include "pch.h"

#define XOR_RAND(state, result_var)
do {
int s = state;
s ^= s << 13;
s ^= s >> 17;
s ^= s << 5;
state = s;
result_var = state * 0x1.0p-32f;
} while(0)

#define XOR_RAND_GRSH(state, result_var)
do {
int s = state;
s ^= s << 13;
s ^= s >> 17;
s ^= s << 5;
state = s;
result_var = fmaf(state, 0x1.0p-31f, -1.0f);
} while(0)

#define FABE13_COS(x, result_var)
do {
const float abs_val = fabsf(x);
float reduced = fmodf(abs_val, 6.28318530718f);
if(reduced > 3.14159265359f) {
reduced = 6.28318530718f - reduced;
}
if(reduced < 1.57079632679f) {
const float val2 = reduced * reduced;
const float val4 = val2 * val2;
result_var = fmaf(val4, fmaf(val2, -0.0013888889f, 0.0416666667f), fmaf(val2, -0.5f, 1.0f));
} else {
reduced = 3.14159265359f - reduced;
const float val2 = reduced * reduced;
const float val4 = val2 * val2;
result_var = -fmaf(val4, fmaf(val2, -0.0013888889f, 0.0416666667f), fmaf(val2, -0.5f, 1.0f));
}
} while(0)

#define FABE13_SINCOS(in, sin_out, cos_out, n)
do {
int i = 0;
const int limit = n & ~7;
if(n >= 8) {
static __declspec(align(32)) const __m256 VEC_TWOPI = _mm256_set1_ps(6.28318530718f);
static __declspec(align(32)) const __m256 VEC_PI = _mm256_set1_ps(3.14159265359f);
static __declspec(align(32)) const __m256 VEC_PI_2 = _mm256_set1_ps(1.57079632679f);
static __declspec(align(32)) const __m256 INV_TWOPI = _mm256_set1_ps(0.15915494309189535f);
static __declspec(align(32)) const __m256 BIAS = _mm256_set1_ps(12582912.0f);
static __declspec(align(32)) const __m256 VEC_COS_P5 = _mm256_set1_ps(-0.0013888889f);
static __declspec(align(32)) const __m256 VEC_COS_P3 = _mm256_set1_ps(0.0416666667f);
static __declspec(align(32)) const __m256 VEC_COS_P1 = _mm256_set1_ps(-0.5f);
static __declspec(align(32)) const __m256 VEC_COS_P0 = _mm256_set1_ps(1.0f);
static __declspec(align(32)) const __m256 VEC_SIN_P5 = _mm256_set1_ps(-0.0001984127f);
static __declspec(align(32)) const __m256 VEC_SIN_P3 = _mm256_set1_ps(0.0083333333f);
static __declspec(align(32)) const __m256 VEC_SIN_P1 = _mm256_set1_ps(-0.16666666f);
static __declspec(align(32)) const __m256 VEC_SIN_P0 = _mm256_set1_ps(1.0f);
static __declspec(align(32)) const __m256 VEC_ZERO = _mm256_setzero_ps();
while(i < limit) {
const __m256 vx = _mm256_load_ps(&in[i]);
const __m256 vax = _mm256_andnot_ps(_mm256_set1_ps(-0.0f), vx);
__m256 q = _mm256_fmadd_ps(vax, INV_TWOPI, BIAS);
q = _mm256_sub_ps(q, BIAS);
const __m256 r = _mm256_fnmadd_ps(VEC_TWOPI, q, vax);
const __m256 r1 = _mm256_min_ps(r, _mm256_sub_ps(VEC_TWOPI, r));
const __m256 r2 = _mm256_min_ps(r1, _mm256_sub_ps(VEC_PI, r1));
const __m256 t2 = _mm256_mul_ps(r2, r2);
const __m256 cosv = _mm256_fmadd_ps(t2, _mm256_fmadd_ps(t2, _mm256_fmadd_ps(t2, VEC_COS_P5, VEC_COS_P3), VEC_COS_P1), VEC_COS_P0);
const __m256 sinv = _mm256_mul_ps(_mm256_fmadd_ps(t2, _mm256_fmadd_ps(t2, _mm256_fmadd_ps(t2, VEC_SIN_P5, VEC_SIN_P3), VEC_SIN_P1), VEC_SIN_P0), r2);
const __m256 cflip = _mm256_cmp_ps(r1, VEC_PI_2, _CMP_GT_OQ);
const __m256 sflip = _mm256_xor_ps(_mm256_cmp_ps(vx, VEC_ZERO, _CMP_LT_OQ), _mm256_cmp_ps(r, VEC_PI, _CMP_GT_OQ));
_mm256_store_ps(&cos_out[i], _mm256_blendv_ps(cosv, _mm256_sub_ps(VEC_ZERO, cosv), cflip));
_mm256_store_ps(&sin_out[i], _mm256_blendv_ps(sinv, _mm256_sub_ps(VEC_ZERO, sinv), sflip));
i += 8;
}
}
while(i < n) {
const float x = in[i];
const float ax = fabsf(x);
float q = fmaf(ax, 0.15915494309189535f, 12582912.0f);
q -= 12582912.0f;
float r = fmaf(-6.28318530718f, q, ax);
const bool sflip = r > 3.14159265359f;
if(sflip) r = 6.28318530718f - r;
const bool cflip = r > 1.57079632679f;
if(cflip) r = 3.14159265359f - r;
const float t2 = r * r;
const float c = fmaf(t2, fmaf(t2, fmaf(t2, -0.0013888889f, 0.0416666667f), -0.5f), 1.0f);
const float s = fmaf(t2, fmaf(t2, fmaf(t2, -0.0001984127f, 0.0083333333f), -0.16666666f), 1.0f) * r;
cos_out[i] = cflip ? -c : c;
sin_out[i] = ((x < 0.0f) ^ sflip) ? -s : s;
++i;
}
} while(0)

enum List : uint8_t { Top = 0b00u, Down = 0b01u, Left = 0b10u, Right = 0b11u };

__declspec(align(4)) struct Step final {
const uint8_t next;
const uint8_t dx;
const uint8_t dy;
};

__declspec(align(4)) struct InvStep final {
const uint8_t q;
const uint8_t next;
};

__declspec(align(64)) static const Step g_step_tbl[4][4] = {
{ {Right,0u,0u}, {Top,0u,1u}, {Top,1u,1u}, {Left,1u,0u} },
{ {Left,1u,1u}, {Down,1u,0u}, {Down,0u,0u}, {Right,0u,1u} },
{ {Down,1u,1u}, {Left,0u,1u}, {Left,0u,0u}, {Top,1u,0u} },
{ {Top,0u,0u}, {Right,1u,0u}, {Right,1u,1u}, {Down,0u,1u} }
};

__declspec(align(64)) static const InvStep g_inv_tbl[4][4] = {
{ {0u,Right}, {1u,Top}, {3u,Left}, {2u,Top} },
{ {2u,Down}, {3u,Right}, {1u,Down}, {0u,Left} },
{ {2u,Left}, {1u,Left}, {3u,Top}, {0u,Down} },
{ {0u,Top}, {3u,Down}, {1u,Right}, {2u,Right} }
};

static const boost::mpi::environment* __restrict g_env;
static const boost::mpi::communicator* __restrict g_world;

__declspec(align(16)) struct CrossMsg final {
float s_x1, s_x2;
float e_x1, e_x2;
float Rtop;
template<typename Archive>
__declspec(noalias) __forceinline void serialize(Archive& ar, const unsigned int) noexcept { ar& s_x1& s_x2& e_x1& e_x2& Rtop; }
};

__declspec(align(16)) struct CtrlMsg final {
bool kind;
CrossMsg xchg;
template<typename Archive>
__declspec(noalias) __forceinline void serialize(Archive& ar, const unsigned int) noexcept {
ar& kind& xchg;
}
};

__declspec(align(16)) struct Slab final {
char* const __restrict base;
char* __restrict current;
char* const __restrict end;

text
__declspec(noalias) __forceinline Slab(void* const __restrict memory, const size_t usable_size) noexcept : base(static_cast<char* __restrict>(memory)) , current(base) , end(base + (usable_size & ~static_cast<size_t>(63u))) { }

};

static tbb::enumerable_thread_specific<Slab*> tls( noexcept {
void* const __restrict memory = _aligned_malloc(16777216u, 16u);
Slab* const __restrict slab = static_cast<Slab*>(_aligned_malloc(32u, 16u));
new (slab) Slab(memory, 16777216u);

char* __restrict p = slab->base;

#pragma loop(ivdep)
while (p < slab->end) {
*p = 0u;
p += 4096u;
}
return slab;
}());

__declspec(align(64)) struct Interval final {
const float x1;
const float x2;
const float y1;
const float y2;
const float delta_y;
const float ordinate_factor;
const float N_factor;
const float quadratic_term;
const float M;
float R;

text
__declspec(noalias) __forceinline void* operator new(const size_t) noexcept { Slab* const __restrict s = tls.local(); char* const __restrict result = s->current; s->current += 64u; return result; } __declspec(noalias) __forceinline Interval(const float _x1, const float _x2, const float _y1, const float _y2, const float _N) noexcept : x1(_x1), x2(_x2), y1(_y1), y2(_y2) , delta_y(_y2 - _y1) , ordinate_factor(-(y1 + y2) * 2.0f) , N_factor(_N == 1.0f ? _x2 - _x1 : _N == 2.0f ? sqrtf(_x2 - _x1) : powf(_x2 - _x1, 1.0f / _N)) , quadratic_term((1.0f / N_factor)* delta_y* delta_y) , M((1.0f / N_factor)* fabsf(delta_y)) { } __declspec(noalias) __forceinline void ChangeCharacteristic(const float _m) noexcept { R = fmaf(1.0f / _m, quadratic_term, fmaf(_m, N_factor, ordinate_factor)); }

};

__declspec(align(16)) struct Peano2DMap final {
const int levels;
const float a, b, c, d;
const float lenx, leny;
const float inv_lenx;
const uint32_t scale;
const uint8_t start;

text
__declspec(noalias) __forceinline Peano2DMap( const int L, const float _a, const float _b, const float _c, const float _d, const uint8_t startType ) noexcept : levels(L) , a(_a), b(_b), c(_c), d(_d) , lenx(_b - _a) , leny(_d - _c) , inv_lenx(1.0f / (_b - _a)) , scale(static_cast<uint32_t>(1u) << (L << 1)) , start(startType) { }

};

static Peano2DMap gActiveMap(0, 0.0f, 0.0f, 0.0f, 0.0f, 0b00u);

static __declspec(noalias) __forceinline bool ComparePtr(const Interval* const __restrict a, const Interval* const __restrict b) noexcept
{
return a->R < b->R;
}

static __declspec(noalias) __forceinline float ShekelFunc(const float x, const float seed) noexcept
{
int i = 0;
float current_state = seed, current_res, current_res2, res = 0.0f;
while (i < 10) {
XOR_RAND(current_state, current_res);
const float x_part = fmaf(-current_res, 10.0f, x);
XOR_RAND(current_state, current_res);
XOR_RAND(current_state, current_res2);
float delimiter = fmaf(fmaf(current_res, 20.0f, 5.0f), x_part * x_part, fmaf(current_res2, 0.2f, 1.0f));
delimiter = copysignf(fmaxf(fabsf(delimiter), FLT_MIN), delimiter);
res -= 1.0f / delimiter;
++i;
}
return res;
}

static __declspec(noalias) __forceinline float RastriginFunc(const float x1, const float x2) noexcept
{
const float term1 = fmaf(x1, x1, x2 * x2);
float cos1, cos2;
FABE13_COS(6.28318530717958647692f * x1, cos1);
FABE13_COS(6.28318530717958647692f * x2, cos2);
return (term1 - fmaf(cos1 + cos2, 10.0f, -14.6f)) * fmaf(-term1, 0.25f, 18.42f);
}

static __declspec(noalias) __forceinline float HillFunc(const float x, const float seed) noexcept
{
int j = 0;
__declspec(align(32)) float angles[14u];
const float start_angle = 6.28318530717958647692f * x;
#pragma loop(ivdep)
while (j < 14) {
angles[j] = start_angle * static_cast<float>(j + 1);
++j;
}
__declspec(align(32)) float sin_vals[14u];
__declspec(align(32)) float cos_vals[14u];
FABE13_SINCOS(angles, sin_vals, cos_vals, 14u);
float current_state = seed, current_res, current_res2;
XOR_RAND(current_state, current_res);
float res = fmaf(current_res, 2.0f, -1.1f);
--j;
while (j >= 0) {
XOR_RAND(current_state, current_res);
XOR_RAND(current_state, current_res2);
res += fmaf(fmaf(current_res, 2.0f, -1.1f), sin_vals[j], fmaf(current_res2, 2.0f, -1.1f) * cos_vals[j]);
--j;
}
return res;
}

static __declspec(noalias) __forceinline float GrishaginFunc(const float x1, const float x2, const float seed) noexcept
{
int j = 0;
__declspec(align(32)) float angles_j[8u];
__declspec(align(32)) float angles_k[8u];
#pragma loop(ivdep)
while (j < 8) {
const float pj_mult = 3.14159265358979323846f * static_cast<float>(j + 1);
angles_j[j] = pj_mult * x1;
angles_k[j] = pj_mult * x2;
++j;
}
__declspec(align(32)) float sin_j[8u], cos_j[8u];
__declspec(align(32)) float sin_k[8u], cos_k[8u];
FABE13_SINCOS(angles_j, sin_j, cos_j, 8u);
FABE13_SINCOS(angles_k, sin_k, cos_k, 8u);
--j;
float part1 = 0.0f;
float part2 = 0.0f;
float current_state = seed, current_res, current_res2;
while (j >= 0) {
size_t k = 0u;
while (k < 8u) {
const float sin_term = sin_j[j] * sin_j[j];
const float cos_term = cos_k[k] * cos_k[k];
XOR_RAND_GRSH(current_state, current_res);
XOR_RAND_GRSH(current_state, current_res2);
part1 = fmaf(current_res, sin_term, fmaf(current_res2, cos_term, part1));
XOR_RAND_GRSH(current_state, current_res);
XOR_RAND_GRSH(current_state, current_res2);
part2 = fmaf(-current_res, cos_term, fmaf(current_res2, sin_term, part2));
++k;
}
--j;
}
return -sqrtf(fmaf(part1, part1, part2 * part2));
}

static __declspec(noalias) __forceinline float Shag(const float _m, const float x1, const float x2, const float y1,
const float y2, const float _N, const float _r) noexcept
{
const float diff = y2 - y1;
const float sign_mult = _mm_cvtss_f32(_mm_castsi128_ps(_mm_set1_epi32(
0x3F800000u | ((reinterpret_cast<const uint32_t>(&diff) & 0x80000000u) ^ 0x80000000u))));
return _N == 1.0f
? fmaf(-(1.0f / _m), diff, x1 + x2) * 0.5f
: _N == 2.0f
? fmaf(sign_mult / (_m * _m), diff * diff * _r, x1 + x2) * 0.5f
: fmaf(sign_mult / powf(_m, _N), powf(diff, _N) * _r, x1 + x2) * 0.5f;
}

static __declspec(noalias) __forceinline void RecomputeR_ConstM_AVX2(Interval* const* const __restrict arr, const size_t n, const float m) noexcept {
const __m256 vm = _mm256_set1_ps(m);

text
__m256 vinvm = _mm256_rcp_ps(vm); vinvm = _mm256_mul_ps(vinvm, _mm256_fnmadd_ps(vm, vinvm, _mm256_set1_ps(2.0f))); size_t i = 0; const int limit = static_cast<int>(n & ~7u); if (n >= 8u) { while (i < static_cast<size_t>(limit)) { __declspec(align(32)) float q[8], nf[8], ord[8]; int k = 0;

#pragma loop(ivdep)
while (k < 8) {
const Interval* const __restrict p = arr[i + k];
q[k] = p->quadratic_term;
nf[k] = p->N_factor;
ord[k] = p->ordinate_factor;
++k;
}
const __m256 vq = _mm256_load_ps(q);
const __m256 vnf = _mm256_load_ps(nf);
const __m256 vod = _mm256_load_ps(ord);

text
const __m256 t = _mm256_fmadd_ps(vm, vnf, vod); const __m256 res = _mm256_fmadd_ps(vq, vinvm, t); __declspec(align(32)) float out[8]; _mm256_store_ps(out, res); k = 0;

#pragma loop(ivdep)
while (k < 8) {
arr[i + k]->R = out[k];
++k;
}
i += 8u;
}
}
while (i < n) {
arr[i]->ChangeCharacteristic(m);
++i;
}
}

static __declspec(noalias) __forceinline void RecomputeR_AffineM_AVX2(Interval* const* const __restrict arr, const size_t n, const float GLOBAL_FACTOR, const float alpha) noexcept {
const __m256 vGF = _mm256_set1_ps(GLOBAL_FACTOR);
const __m256 va = _mm256_set1_ps(alpha);

text
size_t i = 0; const int limit = static_cast<int>(n & ~7u); if (n >= 8u) { while (i < static_cast<size_t>(limit)) { __declspec(align(32)) float len[8], Mv[8], q[8], nf[8], ord[8]; int k = 0;

#pragma loop(ivdep)
while (k < 8) {
const Interval* const p = arr[i + k];
len[k] = p->x2 - p->x1;
Mv[k] = p->M;
q[k] = p->quadratic_term;
nf[k] = p->N_factor;
ord[k] = p->ordinate_factor;
++k;
}
const __m256 vlen = _mm256_load_ps(len);
const __m256 vM = _mm256_load_ps(Mv);
const __m256 vq = _mm256_load_ps(q);
const __m256 vnf = _mm256_load_ps(nf);
const __m256 vod = _mm256_load_ps(ord);

text
const __m256 vm = _mm256_fmadd_ps(vGF, vlen, _mm256_mul_ps(va, vM)); __m256 vinvm = _mm256_rcp_ps(vm); vinvm = _mm256_mul_ps(vinvm, _mm256_fnmadd_ps(vm, vinvm, _mm256_set1_ps(2.0f))); const __m256 t = _mm256_fmadd_ps(vm, vnf, vod); const __m256 res = _mm256_fmadd_ps(vq, vinvm, t); __declspec(align(32)) float out[8]; _mm256_store_ps(out, res); k = 0;

#pragma loop(ivdep)
while (k < 8) {
arr[i + k]->R = out[k];
++k;
}
i += 8u;
}
}
while (i < n) {
const Interval* const __restrict p = arr[i];
const float mi = fmaf(GLOBAL_FACTOR, p->x2 - p->x1, p->M * alpha);
arr[i]->R = fmaf(1.0f / mi, p->quadratic_term, fmaf(mi, p->N_factor, p->ordinate_factor));
++i;
}
}

__declspec(noalias) __forceinline void HitTest2D_analytic(const float x_param, float& out_x1, float& out_x2) noexcept
{
const float a = gActiveMap.a;
const float inv_lenx = gActiveMap.inv_lenx;
const uint32_t scale = gActiveMap.scale;
const uint32_t scale_minus_1 = scale - 1u;
const float lenx = gActiveMap.lenx;
const float leny = gActiveMap.leny;
const float c = gActiveMap.c;
const uint8_t start = gActiveMap.start;
const int levels = gActiveMap.levels;

text
float norm = (x_param - a) * inv_lenx; norm = fminf(fmaxf(norm, 0.0f), 0x1.fffffep-1f); uint32_t idx = static_cast<uint32_t>(norm * static_cast<float>(scale)); idx = idx > scale_minus_1 ? scale_minus_1 : idx; float sx = lenx, sy = leny; float x1 = a, x2 = c; uint8_t type = start; int l = levels - 1;

#pragma loop(ivdep)
while (l >= 0) {
const uint32_t q = (idx >> (l * 2)) & 3u;
const Step s = g_step_tbl[type][q];
type = s.next;
sx *= 0.5f; sy *= 0.5f;
x1 += s.dx ? sx : 0.0f;
x2 += s.dy ? sy : 0.0f;
--l;
}
out_x1 = x1 + sx * 0.5f;
out_x2 = x2 + sy * 0.5f;
}

__declspec(noalias) __forceinline float FindX2D_analytic(const float px, const float py) noexcept
{
const float a = gActiveMap.a;
const float b = gActiveMap.b;
const float c = gActiveMap.c;
const float d = gActiveMap.d;
const float lenx = gActiveMap.lenx;
const float leny = gActiveMap.leny;
const uint32_t scale = gActiveMap.scale;
const uint8_t start = gActiveMap.start;
const int levels = gActiveMap.levels;

text
const float clamped_px = fminf(fmaxf(px, a), b); const float clamped_py = fminf(fmaxf(py, c), d); float sx = lenx, sy = leny; float x0 = a, y0 = c; uint32_t idx = 0u; uint8_t type = start; int l = 0;

#pragma loop(ivdep)
while (l < levels) {
sx *= 0.5f; sy *= 0.5f;
const float mx = x0 + sx;
const float my = y0 + sy;

text
const uint32_t tr = static_cast<uint32_t>((clamped_px > mx) & (clamped_py > my)); const uint32_t tl = static_cast<uint32_t>((clamped_px < mx) & (clamped_py > my)); const uint32_t dl = static_cast<uint32_t>((clamped_px < mx) & (clamped_py < my)); const uint32_t none = static_cast<uint32_t>(1u ^ (tr | tl | dl)); const uint32_t dd = (tr << 1) | tr | tl | (none << 1); const InvStep inv = g_inv_tbl[type][dd]; type = inv.next; idx = (idx << 2) | inv.q; const uint32_t dx = dd >> 1; const uint32_t dy = dd & 1u; x0 += dx ? sx : 0.0f; y0 += dy ? sy : 0.0f; ++l; } const float scale_reciprocal = 1.0f / static_cast<float>(scale); return fmaf(static_cast<float>(idx) * scale_reciprocal, lenx, a);

}

extern "C" __declspec(dllexport) __declspec(noalias) __forceinline int AgpInit(const int peanoLevel, const float a, const float b, const float c, const float d) noexcept
{
g_env = new boost::mpi::environment();
g_world = new boost::mpi::communicator();
const int rank = g_world->rank();

text
switch (rank) { case 0: new(&gActiveMap) Peano2DMap(peanoLevel, a, b, c, d, static_cast<uint8_t>(Top)); break; case 1: new(&gActiveMap) Peano2DMap(peanoLevel, a, b, c, d, static_cast<uint8_t>(Down)); break; case 2: new(&gActiveMap) Peano2DMap(peanoLevel, a, b, c, d, static_cast<uint8_t>(Left)); break; default: new(&gActiveMap) Peano2DMap(peanoLevel, a, b, c, d, static_cast<uint8_t>(Right)); } return rank;

}

extern "C" __declspec(dllexport) __declspec(noalias)
void AGP_1D(const float global_iterations,
const float a, const float b, const float r,
const bool mode, const float epsilon, const float seed,
float** const __restrict out_data, size_t* const __restrict out_len) noexcept
{
Slab* const __restrict slab = tls.local();
slab->current = slab->base;

text
int schetchick = 0; const float initial_length = b - a; float dmax = initial_length; const float threshold_03 = 0.3f * initial_length; const float inv_threshold_03 = 1.0f / threshold_03; const float start_val = ShekelFunc(a, seed); float best_f = ShekelFunc(b, seed); float x_Rmax_1 = a; float x_Rmax_2 = b; float y_Rmax_1 = start_val; float y_Rmax_2 = best_f; std::vector<float, boost::alignment::aligned_allocator<float, 16u>> Extr; std::vector<Interval* __restrict, boost::alignment::aligned_allocator<Interval* __restrict, 64u>> R; Extr.clear(); Extr.reserve(static_cast<size_t>(global_iterations) << 2u); R.clear(); R.reserve(static_cast<size_t>(global_iterations) << 1u); R.emplace_back(new Interval(a, b, start_val, best_f, 1.0f)); float Mmax = R.front()->M; float m = r * Mmax; while (true) { const float new_point = Shag(m, x_Rmax_1, x_Rmax_2, y_Rmax_1, y_Rmax_2, 1.0f, r); const float new_value = ShekelFunc(new_point, seed); if (new_value < best_f) { best_f = new_value; Extr.emplace_back(best_f); Extr.emplace_back(new_point); } std::pop_heap(R.begin(), R.end(), ComparePtr); const Interval* const __restrict promejutochny_otrezok = R.back(); const float new_x1 = promejutochny_otrezok->x1; const float new_x2 = promejutochny_otrezok->x2; const float len2 = new_x2 - new_point; const float len1 = new_point - new_x1; const float interval_len = len1 < len2 ? len1 : len2; if (++schetchick == static_cast<int>(global_iterations) || interval_len < epsilon) { Extr.emplace_back(static_cast<float>(schetchick)); Extr.emplace_back(interval_len); *out_len = Extr.size(); *out_data = reinterpret_cast<float* __restrict>(CoTaskMemAlloc(sizeof(float) * (*out_len))); memcpy(*out_data, Extr.data(), sizeof(float) * (*out_len)); return; } Interval* const __restrict curr = new Interval(new_x1, new_point, promejutochny_otrezok->y1, new_value, 1.0f); Interval* const __restrict curr1 = new Interval(new_point, new_x2, new_value, promejutochny_otrezok->y2, 1.0f); const float currM = curr->M > curr1->M ? curr->M : curr1->M; const size_t r_size = R.size(); if (mode) { if (len2 + len1 == dmax) { dmax = len2 > len1 ? len2 : len1; size_t i = 0u;

#pragma loop(ivdep)
while (i < r_size) {
const float len_item = R[i]->x2 - R[i]->x1;
if (len_item > dmax) dmax = len_item;
++i;
}
}

text
if (threshold_03 > dmax && !(schetchick % 3) || 10.0f * dmax < initial_length) { if (currM > Mmax) { Mmax = currM; m = r * Mmax; } const float progress = fmaf(-inv_threshold_03, dmax, 1.0f); const float alpha = fmaf(progress, progress, 1.0f); const float betta = 2.0f - alpha; const float MULTIPLIER = (1.0f / dmax) * Mmax; const float global_coeff = fmaf(MULTIPLIER, r, -MULTIPLIER); const float GLOBAL_FACTOR = betta * global_coeff; curr->ChangeCharacteristic(fmaf(GLOBAL_FACTOR, len1, curr->M * alpha)); curr1->ChangeCharacteristic(fmaf(GLOBAL_FACTOR, len2, curr1->M * alpha)); if (r_size < 64u) { size_t i = 0u;

#pragma loop(ivdep)
while (i < r_size) {
R[i]->ChangeCharacteristic(fmaf(GLOBAL_FACTOR, R[i]->x2 - R[i]->x1, R[i]->M * alpha));
++i;
}
}
else {
RecomputeR_AffineM_AVX2(R.data(), r_size, GLOBAL_FACTOR, alpha);
}
std::make_heap(R.begin(), R.end(), ComparePtr);
}
else {
if (currM > Mmax) {
if (currM < 1.15f * Mmax) {
Mmax = currM;
m = r * Mmax;
curr->ChangeCharacteristic(m);
curr1->ChangeCharacteristic(m);
}
else {
Mmax = currM;
m = r * Mmax;
curr->ChangeCharacteristic(m);
curr1->ChangeCharacteristic(m);
if (r_size < 64u) {
size_t i = 0u;
#pragma loop(ivdep)
while (i < r_size) {
R[i]->ChangeCharacteristic(m);
++i;
}
}
else {
RecomputeR_ConstM_AVX2(R.data(), r_size, m);
}
std::make_heap(R.begin(), R.end(), ComparePtr);
}
}
else {
curr->ChangeCharacteristic(m);
curr1->ChangeCharacteristic(m);
}
}
}
else {
if (currM > Mmax) {
if (currM < 1.15f * Mmax) {
Mmax = currM;
m = r * Mmax;
curr->ChangeCharacteristic(m);
curr1->ChangeCharacteristic(m);
}
else {
Mmax = currM;
m = r * Mmax;
curr->ChangeCharacteristic(m);
curr1->ChangeCharacteristic(m);
if (r_size < 64u) {
size_t i = 0u;
#pragma loop(ivdep)
while (i < r_size) {
R[i]->ChangeCharacteristic(m);
++i;
}
}
else {
RecomputeR_ConstM_AVX2(R.data(), r_size, m);
}
std::make_heap(R.begin(), R.end(), ComparePtr);
}
}
else {
curr->ChangeCharacteristic(m);
curr1->ChangeCharacteristic(m);
}
}

text
R.back() = curr; std::push_heap(R.begin(), R.end(), ComparePtr); R.emplace_back(curr1); std::push_heap(R.begin(), R.end(), ComparePtr); const Interval* const __restrict top_ptr = R.front(); x_Rmax_1 = top_ptr->x1; x_Rmax_2 = top_ptr->x2; y_Rmax_1 = top_ptr->y1; y_Rmax_2 = top_ptr->y2; }

}

extern "C" __declspec(dllexport) __declspec(noalias)
void AGP_2D(
const float N, const float global_iterations,
const float a, const float b, const float c, const float d, const float r,
const bool mode, const float epsilon, const float seed,
float** const __restrict out_data, size_t* const __restrict out_len) noexcept
{
Slab* const __restrict slab = tls.local();
slab->current = slab->base;

text
int schetchick = 0; int no_improve = 0; const int rank = g_world->rank(); const int world_size = g_world->size(); while (g_world->iprobe(boost::mpi::any_source, 0)) { CtrlMsg dummy; g_world->recv(boost::mpi::any_source, 0, dummy); } const float inv_divider = ldexpf(1.0f, -((gActiveMap.levels << 1) + 1)); const float x_addition = (b - a) * inv_divider; const float y_addition = (d - c) * inv_divider; const float true_start = a + x_addition; const float true_end = b - x_addition; float x_Rmax_1 = true_start; float x_Rmax_2 = true_end; const float initial_length = x_Rmax_2 - x_Rmax_1; float dmax = initial_length; const float threshold_03 = 0.3f * initial_length; const float inv_threshold_03 = 1.0f / threshold_03; const float start_val = rank % 3 ? RastriginFunc(true_end, d - y_addition) : RastriginFunc(true_start, c + y_addition); float best_f = rank % 2 ? RastriginFunc(true_start, d - y_addition) : RastriginFunc(true_end, c + y_addition); float y_Rmax_1 = start_val; float y_Rmax_2 = best_f; std::vector<float, boost::alignment::aligned_allocator<float, 16u>> Extr; std::vector<Interval* __restrict, boost::alignment::aligned_allocator<Interval* __restrict, 64u>> R; Extr.clear(); Extr.reserve(static_cast<size_t>(global_iterations) << 2u); R.clear(); R.reserve(static_cast<size_t>(global_iterations) << 1u); R.emplace_back(new Interval(true_start, true_end, start_val, best_f, 2.0f)); const Interval* __restrict top_ptr; float Mmax = R.front()->M; float m = r * Mmax; while (true) { const float interval_len = x_Rmax_2 - x_Rmax_1; const bool stagnation = no_improve > 100 && schetchick > 270; const float p = fmaf(-1.0f / initial_length, dmax, 1.0f); while (g_world->iprobe(boost::mpi::any_source, 0)) { CtrlMsg in; g_world->recv(boost::mpi::any_source, 0, in); if (in.kind) { if (!rank) { Extr.emplace_back(static_cast<float>(schetchick)); Extr.emplace_back(interval_len); *out_len = Extr.size(); *out_data = reinterpret_cast<float* __restrict>(CoTaskMemAlloc(sizeof(float) * (*out_len))); memcpy(*out_data, Extr.data(), sizeof(float) * (*out_len)); } return; } const float sx = FindX2D_analytic(in.xchg.s_x1, in.xchg.s_x2); const float ex = FindX2D_analytic(in.xchg.e_x1, in.xchg.e_x2); Interval* const __restrict injected = new Interval(sx, ex, RastriginFunc(in.xchg.s_x1, in.xchg.s_x2), RastriginFunc(in.xchg.e_x1, in.xchg.e_x2), 2.0f); injected->ChangeCharacteristic(m); if (injected->R > 1.15f * top_ptr->R) { const float k = stagnation ? fmaf((1.0f / expm1f(1.0f)) * 1.0f, expm1f(p), 0.3f) : fmaf((1.0f / expm1f(1.0f)) * 0.6f, expm1f(p), 0.6f); injected->R = in.xchg.Rtop * k; R.emplace_back(injected); std::push_heap(R.begin(), R.end(), ComparePtr); } } const int T = static_cast<int>(fmaf(-expm1f(p), 264.0f, 277.0f)); const bool want_term = interval_len < epsilon || schetchick == static_cast<int>(global_iterations); if (!(++schetchick % T) || stagnation || want_term) { CtrlMsg out; out.kind = want_term; if (!out.kind) { float s_x1, s_x2, e_x1, e_x2; HitTest2D_analytic(top_ptr->x1, s_x1, s_x2); HitTest2D_analytic(top_ptr->x2, e_x1, e_x2); out.xchg = CrossMsg{ s_x1, s_x2, e_x1, e_x2, top_ptr->R }; } int i = 0; while (i < world_size) { if (i != rank) g_world->isend(i, 0, out); ++i; } if (out.kind) { if (!rank) { Extr.emplace_back(static_cast<float>(schetchick)); Extr.emplace_back(interval_len); *out_len = Extr.size(); *out_data = reinterpret_cast<float* __restrict>(CoTaskMemAlloc(sizeof(float) * (*out_len))); memcpy(*out_data, Extr.data(), sizeof(float) * (*out_len)); } return; } } const float new_point = Shag(m, x_Rmax_1, x_Rmax_2, y_Rmax_1, y_Rmax_2, 2.0f, r); float new_x1_val, new_x2_val; HitTest2D_analytic(new_point, new_x1_val, new_x2_val); const float new_value = RastriginFunc(new_x1_val, new_x2_val); if (new_value < best_f) { best_f = new_value; Extr.emplace_back(best_f); Extr.emplace_back(new_x1_val); Extr.emplace_back(new_x2_val); no_improve = 0; } else { ++no_improve; } std::pop_heap(R.begin(), R.end(), ComparePtr); Interval* const __restrict promejutochny_otrezok = R.back(); const float segment_x1 = promejutochny_otrezok->x1; const float segment_x2 = promejutochny_otrezok->x2; const float len2 = segment_x2 - new_point; const float len1 = new_point - segment_x1; Interval* const __restrict curr = new Interval(segment_x1, new_point, promejutochny_otrezok->y1, new_value, 2.0f); Interval* const __restrict curr1 = new Interval(new_point, segment_x2, new_value, promejutochny_otrezok->y2, 2.0f); const float currM = curr->M > curr1->M ? curr->M : curr1->M; const size_t r_size = R.size(); if (mode) { if (len2 + len1 == dmax) { dmax = len2 > len1 ? len2 : len1; size_t i = 0u;

#pragma loop(ivdep)
while (i < r_size) {
const float len_item = R[i]->x2 - R[i]->x1;
if (len_item > dmax) dmax = len_item;
++i;
}
}

text
if (threshold_03 > dmax && !(schetchick % 3) || 10.0f * dmax < initial_length) { if (currM > Mmax) { Mmax = currM; m = r * Mmax; } const float progress = fmaf(-inv_threshold_03, dmax, 1.0f); const float alpha = fmaf(progress, progress, 1.0f); const float betta = 2.0f - alpha; const float MULTIPLIER = (1.0f / dmax) * Mmax; const float global_coeff = fmaf(MULTIPLIER, r, -MULTIPLIER); const float GLOBAL_FACTOR = betta * global_coeff; curr->ChangeCharacteristic(fmaf(GLOBAL_FACTOR, len1, curr->M * alpha)); curr1->ChangeCharacteristic(fmaf(GLOBAL_FACTOR, len2, curr1->M * alpha)); if (r_size < 64u) { size_t i = 0u;

#pragma loop(ivdep)
while (i < r_size) {
R[i]->ChangeCharacteristic(fmaf(GLOBAL_FACTOR, R[i]->x2 - R[i]->x1, R[i]->M * alpha));
++i;
}
}
else {
RecomputeR_AffineM_AVX2(R.data(), r_size, GLOBAL_FACTOR, alpha);
}
std::make_heap(R.begin(), R.end(), ComparePtr);
}
else {
if (currM > Mmax) {
if (currM < 1.15f * Mmax) {
Mmax = currM;
m = r * Mmax;
curr->ChangeCharacteristic(m);
curr1->ChangeCharacteristic(m);
}
else {
Mmax = currM;
m = r * Mmax;
curr->ChangeCharacteristic(m);
curr1->ChangeCharacteristic(m);
if (r_size < 64u) {
size_t i = 0u;
#pragma loop(ivdep)
while (i < r_size) {
R[i]->ChangeCharacteristic(m);
++i;
}
}
else {
RecomputeR_ConstM_AVX2(R.data(), r_size, m);
}
std::make_heap(R.begin(), R.end(), ComparePtr);
}
}
else {
curr->ChangeCharacteristic(m);
curr1->ChangeCharacteristic(m);
}
}
}
else {
if (currM > Mmax) {
if (currM < 1.15f * Mmax) {
Mmax = currM;
m = r * Mmax;
curr->ChangeCharacteristic(m);
curr1->ChangeCharacteristic(m);
}
else {
Mmax = currM;
m = r * Mmax;
curr->ChangeCharacteristic(m);
curr1->ChangeCharacteristic(m);
if (r_size < 64u) {
size_t i = 0u;
#pragma loop(ivdep)
while (i < r_size) {
R[i]->ChangeCharacteristic(m);
++i;
}
}
else {
RecomputeR_ConstM_AVX2(R.data(), r_size, m);
}
std::make_heap(R.begin(), R.end(), ComparePtr);
}
}
else {
curr->ChangeCharacteristic(m);
curr1->ChangeCharacteristic(m);
}
}

text
R.back() = curr; std::push_heap(R.begin(), R.end(), ComparePtr); R.emplace_back(curr1); std::push_heap(R.begin(), R.end(), ComparePtr); top_ptr = R.front(); x_Rmax_1 = top_ptr->x1; x_Rmax_2 = top_ptr->x2; y_Rmax_1 = top_ptr->y1; y_Rmax_2 = top_ptr->y2; }

}

extern "C" __declspec(dllexport) __declspec(noalias) __forceinline void AgpWaitStartAndRun() noexcept
{
int dummy;
float* __restrict buf;
size_t len;
while (true) {
if (g_world->iprobe(0, 1)) {
g_world->recv(0, 1, dummy);
AGP_2D(2.0f, 10000.0f, -2.2f, 1.8f, -2.2f, 1.8f, 2.5f, false, 0.00001f, GetTickCount(), &buf, &len);
}
}
}

extern "C" __declspec(dllexport) __declspec(noalias) __forceinline void AgpStartWorkers() noexcept
{
int i = 1;
const int world_size = g_world->size();
while (i < world_size) {
g_world->isend(i, 1, 0);
++i;
}
}

extern "C" __declspec(dllexport) __declspec(noalias) __forceinline void AGP_Free(float* const __restrict p) noexcept
{
CoTaskMemFree(p);
} - код основного .cpp файла dll раньше, #include "MyForm.h"
#include <float.h>

using namespace System;
using namespace System::Windows::Forms;

typedef int(__cdecl* PInit)(int, float, float, float, float);
typedef void(__cdecl* PStartWorkers)();

[STAThread]
int main() {
HMODULE h = LoadLibraryW(L"TEST_FUNC.dll");
auto AgpInit = (PInit)GetProcAddress(h, "AgpInit");
auto AgpWaitStartAndRun = (PStartWorkers)GetProcAddress(h, "AgpWaitStartAndRun");

text
const int rank = AgpInit(12, -2.2f, 1.8f, -2.2f, 1.8f); if (!rank) { Application::EnableVisualStyles(); Application::SetCompatibleTextRenderingDefault(false); Application::Run(gcnew TESTAGP::MyForm(h)); } else { AgpWaitStartAndRun(); } return 0;

} - код MyForm.cpp раньше, #pragma once

#define WIN32_LEAN_AND_MEAN
#include <Windows.h>

#include <algorithm>

#include <vector>

#include <utility>

namespace TESTAGP {
using namespace System;
using namespace System::IO;

text
ref class MyForm : public System::Windows::Forms::Form { public: MyForm(HMODULE hLib) : hLib(hLib) { // Передаем дескриптор из main InitializeComponent(); // Загружаем функции из DLL f = (agp_c)GetProcAddress(hLib, "AGP_2D"); pStart = (start_workers)GetProcAddress(hLib, "AgpStartWorkers"); pFree = (free_agp)GetProcAddress(hLib, "AGP_Free"); pInit = (PInit)GetProcAddress(hLib, "AgpInit"); } protected: ~MyForm() { delete components; } private: HINSTANCE hLib = nullptr; typedef void(__cdecl* agp_c)(float, float, float, float, float, float, float, bool, float, float, float**, size_t*); typedef void(__cdecl* start_workers)(); typedef void(__cdecl* free_agp)(float*); typedef int(__cdecl* PInit)(int, float, float, float, float); agp_c f = nullptr; start_workers pStart = nullptr; free_agp pFree = nullptr; PInit pInit = nullptr; System::Windows::Forms::Button^ button1; System::Windows::Forms::TextBox^ textBox2; System::Windows::Forms::DataVisualization::Charting::Chart^ chart2; System::Windows::Forms::Label^ label2; System::Windows::Forms::Label^ label3; System::Windows::Forms::TextBox^ textBox1; System::Windows::Forms::TextBox^ textBox3; System::Windows::Forms::TextBox^ textBox4; System::Windows::Forms::TextBox^ textBox5; System::Windows::Forms::TextBox^ textBox6; System::Windows::Forms::Label^ label6; System::Windows::Forms::Label^ label7; System::Windows::Forms::Label^ label8; System::Windows::Forms::Label^ label9; System::Windows::Forms::Label^ label10; System::Windows::Forms::Label^ label1; System::Windows::Forms::TextBox^ textBox7; System::Windows::Forms::TextBox^ textBox8; System::ComponentModel::Container^ components; System::Void button1_Click(System::Object^ sender, System::EventArgs^ e) { chart2->Series[0]->Points->Clear(); chart2->Series[1]->Points->Clear(); chart2->Series[2]->Points->Clear(); chart2->Series[3]->Points->Clear(); static LARGE_INTEGER start, end; QueryPerformanceCounter(&start); float* buf = nullptr; size_t len = 0u; pStart(); // разбудили rank != 0 f(2.0f, 10000.0f, -2.2f, 1.8f, -2.2f, 1.8f, 2.5f, false, 0.00001f, GetTickCount(), &buf, &len); if (buf != nullptr) { std::vector<float> Extr_2D(buf, buf + len); pFree(buf); // Для 2D ветки: извлекаем schetchick и interval_len textBox7->Text = Convert::ToString(Extr_2D.back()); // schetchick Extr_2D.pop_back(); textBox6->Text = Convert::ToString(Extr_2D.back()); // interval_len Extr_2D.pop_back(); // Извлекаем последнюю тройку (x2, x1, f) float x2 = Extr_2D.back(); Extr_2D.pop_back(); float x1 = Extr_2D.back(); Extr_2D.pop_back(); float f_val = Extr_2D.back(); Extr_2D.pop_back(); // Выводим координаты и значение функции textBox4->Text = Convert::ToString(x1); textBox3->Text = Convert::ToString(x2); textBox2->Text = Convert::ToString(f_val); // Новое поле для значения функции // Отображаем точку на графике chart2->Series[2]->Points->AddXY(x1, x2); // Обрабатываем остальные точки (в порядке: f, x1, x2) while (!Extr_2D.empty()) { float x2_point = Extr_2D.back(); Extr_2D.pop_back(); float x1_point = Extr_2D.back(); Extr_2D.pop_back(); float f_point = Extr_2D.back(); Extr_2D.pop_back(); // Добавляем точку на график (только координаты) chart2->Series[0]->Points->AddXY(x1_point, x2_point); } } QueryPerformanceCounter(&end); LARGE_INTEGER freq; QueryPerformanceFrequency(&freq); const int multiplier = ((1ULL << 32) / freq.QuadPart) * 1'000'000ULL; textBox5->Text = Convert::ToString( ((end.QuadPart - start.QuadPart) * multiplier) >> 32 ) + " microseconds"; //textBox2->Text = Convert::ToString(Extr_2D.back()); //textBox1->Text = Convert::ToString(Extr_2D_LNA.back()); }

#pragma region Windows Form Designer generated code
void InitializeComponent(void) {
System::Windows::Forms::DataVisualization::Charting::ChartArea^ chartArea1 = (gcnew System::Windows::Forms::DataVisualization::Charting::ChartArea());
System::Windows::Forms::DataVisualization::Charting::Legend^ legend1 = (gcnew System::Windows::Forms::DataVisualization::Charting::Legend());
System::Windows::Forms::DataVisualization::Charting::Series^ series1 = (gcnew System::Windows::Forms::DataVisualization::Charting::Series());
System::Windows::Forms::DataVisualization::Charting::Series^ series2 = (gcnew System::Windows::Forms::DataVisualization::Charting::Series());
System::Windows::Forms::DataVisualization::Charting::Series^ series3 = (gcnew System::Windows::Forms::DataVisualization::Charting::Series());
System::Windows::Forms::DataVisualization::Charting::Series^ series4 = (gcnew System::Windows::Forms::DataVisualization::Charting::Series());
this->button1 = (gcnew System::Windows::Forms::Button());
this->textBox2 = (gcnew System::Windows::Forms::TextBox());
this->chart2 = (gcnew System::Windows::Forms::DataVisualization::Charting::Chart());
this->label2 = (gcnew System::Windows::Forms::Label());
this->textBox1 = (gcnew System::Windows::Forms::TextBox());
this->textBox3 = (gcnew System::Windows::Forms::TextBox());
this->textBox4 = (gcnew System::Windows::Forms::TextBox());
this->textBox5 = (gcnew System::Windows::Forms::TextBox());
this->textBox6 = (gcnew System::Windows::Forms::TextBox());
this->label6 = (gcnew System::Windows::Forms::Label());
this->label7 = (gcnew System::Windows::Forms::Label());
this->label8 = (gcnew System::Windows::Forms::Label());
this->label9 = (gcnew System::Windows::Forms::Label());
this->label10 = (gcnew System::Windows::Forms::Label());
this->label1 = (gcnew System::Windows::Forms::Label());
this->textBox7 = (gcnew System::Windows::Forms::TextBox());
this->label3 = (gcnew System::Windows::Forms::Label());
this->textBox8 = (gcnew System::Windows::Forms::TextBox());
(cli::safe_castSystem::ComponentModel::ISupportInitialize^(this->chart2))->BeginInit();
this->SuspendLayout();
//
// button1
//
this->button1->BackColor = System::Drawing::SystemColors::Info;
this->button1->Cursor = System::Windows::Forms::Cursors::Hand;
this->button1->FlatAppearance->BorderColor = System::Drawing::Color::FromArgb(static_castSystem::Int32(static_castSystem::Byte(64)),
static_castSystem::Int32(static_castSystem::Byte(64)), static_castSystem::Int32(static_castSystem::Byte(64)));
this->button1->FlatAppearance->BorderSize = 3;
this->button1->FlatAppearance->MouseDownBackColor = System::Drawing::Color::FromArgb(static_castSystem::Int32(static_castSystem::Byte(128)),
static_castSystem::Int32(static_castSystem::Byte(128)), static_castSystem::Int32(static_castSystem::Byte(255)));
this->button1->FlatAppearance->MouseOverBackColor = System::Drawing::Color::FromArgb(static_castSystem::Int32(static_castSystem::Byte(192)),
static_castSystem::Int32(static_castSystem::Byte(192)), static_castSystem::Int32(static_castSystem::Byte(255)));
this->button1->FlatStyle = System::Windows::Forms::FlatStyle::Flat;
this->button1->Font = (gcnew System::Drawing::Font(L"Yu Gothic UI", 14.25F, System::Drawing::FontStyle::Bold));
this->button1->ForeColor = System::Drawing::SystemColors::ControlDarkDark;
this->button1->Location = System::Drawing::Point(897, 724);
this->button1->Name = L"button1";
this->button1->Size = System::Drawing::Size(275, 75);
this->button1->TabIndex = 2;
this->button1->Text = L"SOL";
this->button1->UseVisualStyleBackColor = false;
this->button1->Click += gcnew System::EventHandler(this, &MyForm::button1_Click);
//
// textBox2
//
this->textBox2->BackColor = System::Drawing::SystemColors::ControlLightLight;
this->textBox2->Cursor = System::Windows::Forms::Cursors::Hand;
this->textBox2->Font = (gcnew System::Drawing::Font(L"Yu Gothic UI", 12, System::Drawing::FontStyle::Bold));
this->textBox2->ForeColor = System::Drawing::Color::FromArgb(static_castSystem::Int32(static_castSystem::Byte(64)), static_castSystem::Int32(static_castSystem::Byte(0)),
static_castSystem::Int32(static_castSystem::Byte(64)));
this->textBox2->Location = System::Drawing::Point(992, 619);
this->textBox2->Name = L"textBox2";
this->textBox2->Size = System::Drawing::Size(180, 29);
this->textBox2->TabIndex = 4;
//
// chart2
//
this->chart2->BackColor = System::Drawing::SystemColors::ControlLight;
chartArea1->AxisX->Interval = 0.1;
chartArea1->AxisX->IsLabelAutoFit = false;
chartArea1->AxisX->LabelStyle->Font = (gcnew System::Drawing::Font(L"Yu Gothic", 6.75F));
chartArea1->AxisX->Maximum = 1.8;
chartArea1->AxisX->Minimum = -2.2;
chartArea1->AxisX->Title = L"x1";
chartArea1->AxisX->TitleFont = (gcnew System::Drawing::Font(L"Yu Gothic UI", 11.25F, System::Drawing::FontStyle::Bold));
chartArea1->AxisY->Interval = 0.1;
chartArea1->AxisY->IsLabelAutoFit = false;
chartArea1->AxisY->LabelStyle->Font = (gcnew System::Drawing::Font(L"Yu Gothic", 7.92F));
chartArea1->AxisY->Maximum = 1.8;
chartArea1->AxisY->Minimum = -2.2;
chartArea1->AxisY->Title = L"x2";
chartArea1->AxisY->TitleFont = (gcnew System::Drawing::Font(L"Yu Gothic UI", 11.25F, System::Drawing::FontStyle::Bold));
chartArea1->BackColor = System::Drawing::Color::FloralWhite;
chartArea1->BackGradientStyle = System::Windows::Forms::DataVisualization::Charting::GradientStyle::Center;
chartArea1->BackSecondaryColor = System::Drawing::Color::AliceBlue;
chartArea1->Name = L"ChartArea1";
this->chart2->ChartAreas->Add(chartArea1);
legend1->BackColor = System::Drawing::Color::Transparent;
legend1->BackGradientStyle = System::Windows::Forms::DataVisualization::Charting::GradientStyle::Center;
legend1->BackSecondaryColor = System::Drawing::SystemColors::ActiveCaption;
legend1->BorderColor = System::Drawing::Color::Transparent;
legend1->Font = (gcnew System::Drawing::Font(L"Yu Gothic UI", 11.25F, System::Drawing::FontStyle::Bold));
legend1->ForeColor = System::Drawing::SystemColors::ActiveCaptionText;
legend1->HeaderSeparator = System::Windows::Forms::DataVisualization::Charting::LegendSeparatorStyle::ThickGradientLine;
legend1->HeaderSeparatorColor = System::Drawing::Color::IndianRed;
legend1->IsTextAutoFit = false;
legend1->ItemColumnSeparator = System::Windows::Forms::DataVisualization::Charting::LegendSeparatorStyle::ThickGradientLine;
legend1->ItemColumnSeparatorColor = System::Drawing::Color::IndianRed;
legend1->Name = L"Legend1";
this->chart2->Legends->Add(legend1);
this->chart2->Location = System::Drawing::Point(12, 14);
this->chart2->Name = L"chart2";
series1->ChartArea = L"ChartArea1";
series1->ChartType = System::Windows::Forms::DataVisualization::Charting::SeriesChartType::FastPoint;
series1->Legend = L"Legend1";
series1->Name = L"Точки данных";
series2->ChartArea = L"ChartArea1";
series2->ChartType = System::Windows::Forms::DataVisualization::Charting::SeriesChartType::FastPoint;
series2->Legend = L"Legend1";
series2->Name = L"Точки данных LNA";
series3->ChartArea = L"ChartArea1";
series3->ChartType = System::Windows::Forms::DataVisualization::Charting::SeriesChartType::FastPoint;
series3->Legend = L"Legend1";
series3->Name = L"Точки данных(финал)";
series4->ChartArea = L"ChartArea1";
series4->ChartType = System::Windows::Forms::DataVisualization::Charting::SeriesChartType::FastPoint;
series4->Legend = L"Legend1";
series4->Name = L"Точки данных LNA(финал)";
this->chart2->Series->Add(series1);
this->chart2->Series->Add(series2);
this->chart2->Series->Add(series3);
this->chart2->Series->Add(series4);
this->chart2->Size = System::Drawing::Size(1160, 785);
this->chart2->TabIndex = 5;
//
// label2
//
this->label2->AutoSize = true;
this->label2->BackColor = System::Drawing::SystemColors::InactiveCaption;
this->label2->BorderStyle = System::Windows::Forms::BorderStyle::Fixed3D;
this->label2->Cursor = System::Windows::Forms::Cursors::Hand;
this->label2->FlatStyle = System::Windows::Forms::FlatStyle::Flat;
this->label2->Font = (gcnew System::Drawing::Font(L"Yu Gothic UI", 12, static_castSystem::Drawing::FontStyle(((System::Drawing::FontStyle::Bold | System::Drawing::FontStyle::Italic)
| System::Drawing::FontStyle::Underline))));
this->label2->ForeColor = System::Drawing::Color::DarkBlue;
this->label2->Location = System::Drawing::Point(946, 622);
this->label2->Name = L"label2";
this->label2->Size = System::Drawing::Size(40, 23);
this->label2->TabIndex = 8;
this->label2->Text = L"Extr";
//
// textBox1
//
this->textBox1->BackColor = System::Drawing::SystemColors::ControlLightLight;
this->textBox1->Cursor = System::Windows::Forms::Cursors::Hand;
this->textBox1->Font = (gcnew System::Drawing::Font(L"Yu Gothic UI", 12, System::Drawing::FontStyle::Bold));
this->textBox1->ForeColor = System::Drawing::Color::FromArgb(static_castSystem::Int32(static_castSystem::Byte(64)), static_castSystem::Int32(static_castSystem::Byte(0)),
static_castSystem::Int32(static_castSystem::Byte(64)));
this->textBox1->Location = System::Drawing::Point(992, 654);
this->textBox1->Name = L"textBox1";
this->textBox1->Size = System::Drawing::Size(180, 29);
this->textBox1->TabIndex = 13;
//
// textBox3
//
this->textBox3->BackColor = System::Drawing::SystemColors::ControlLightLight;
this->textBox3->Cursor = System::Windows::Forms::Cursors::Hand;
this->textBox3->Font = (gcnew System::Drawing::Font(L"Yu Gothic UI", 12, System::Drawing::FontStyle::Bold));
this->textBox3->ForeColor = System::Drawing::Color::FromArgb(static_castSystem::Int32(static_castSystem::Byte(64)), static_castSystem::Int32(static_castSystem::Byte(0)),
static_castSystem::Int32(static_castSystem::Byte(64)));
this->textBox3->Location = System::Drawing::Point(992, 584);
this->textBox3->Name = L"textBox3";
this->textBox3->Size = System::Drawing::Size(180, 29);
this->textBox3->TabIndex = 14;
//
// textBox4
//
this->textBox4->BackColor = System::Drawing::SystemColors::ControlLightLight;
this->textBox4->Cursor = System::Windows::Forms::Cursors::Hand;
this->textBox4->Font = (gcnew System::Drawing::Font(L"Yu Gothic UI", 12, System::Drawing::FontStyle::Bold));
this->textBox4->ForeColor = System::Drawing::Color::FromArgb(static_castSystem::Int32(static_castSystem::Byte(64)), static_castSystem::Int32(static_castSystem::Byte(0)),
static_castSystem::Int32(static_castSystem::Byte(64)));
this->textBox4->Location = System::Drawing::Point(992, 549);
this->textBox4->Name = L"textBox4";
this->textBox4->Size = System::Drawing::Size(180, 29);
this->textBox4->TabIndex = 15;
//
// textBox5
//
this->textBox5->BackColor = System::Drawing::SystemColors::ControlLightLight;
this->textBox5->Cursor = System::Windows::Forms::Cursors::Hand;
this->textBox5->Font = (gcnew System::Drawing::Font(L"Yu Gothic UI", 12, System::Drawing::FontStyle::Bold));
this->textBox5->ForeColor = System::Drawing::Color::FromArgb(static_castSystem::Int32(static_castSystem::Byte(64)), static_castSystem::Int32(static_castSystem::Byte(0)),
static_castSystem::Int32(static_castSystem::Byte(64)));
this->textBox5->Location = System::Drawing::Point(992, 514);
this->textBox5->Name = L"textBox5";
this->textBox5->Size = System::Drawing::Size(180, 29);
this->textBox5->TabIndex = 16;
//
// textBox6
//
this->textBox6->BackColor = System::Drawing::SystemColors::ControlLightLight;
this->textBox6->Cursor = System::Windows::Forms::Cursors::Hand;
this->textBox6->Font = (gcnew System::Drawing::Font(L"Yu Gothic UI", 12, System::Drawing::FontStyle::Bold));
this->textBox6->ForeColor = System::Drawing::Color::FromArgb(static_castSystem::Int32(static_castSystem::Byte(64)), static_castSystem::Int32(static_castSystem::Byte(0)),
static_castSystem::Int32(static_castSystem::Byte(64)));
this->textBox6->Location = System::Drawing::Point(992, 443);
this->textBox6->Name = L"textBox6";
this->textBox6->Size = System::Drawing::Size(180, 29);
this->textBox6->TabIndex = 17;
//
// label6
//
this->label6->AutoSize = true;
this->label6->BackColor = System::Drawing::SystemColors::InactiveCaption;
this->label6->BorderStyle = System::Windows::Forms::BorderStyle::Fixed3D;
this->label6->Cursor = System::Windows::Forms::Cursors::Hand;
this->label6->FlatStyle = System::Windows::Forms::FlatStyle::Flat;
this->label6->Font = (gcnew System::Drawing::Font(L"Yu Gothic UI", 12, static_castSystem::Drawing::FontStyle(((System::Drawing::FontStyle::Bold | System::Drawing::FontStyle::Italic)
| System::Drawing::FontStyle::Underline))));
this->label6->ForeColor = System::Drawing::Color::DarkBlue;
this->label6->Location = System::Drawing::Point(911, 657);
this->label6->Name = L"label6";
this->label6->Size = System::Drawing::Size(75, 23);
this->label6->TabIndex = 18;
this->label6->Text = L"Extr LNA";
//
// label7
//
this->label7->AutoSize = true;
this->label7->BackColor = System::Drawing::SystemColors::InactiveCaption;
this->label7->BorderStyle = System::Windows::Forms::BorderStyle::Fixed3D;
this->label7->Cursor = System::Windows::Forms::Cursors::Hand;
this->label7->FlatStyle = System::Windows::Forms::FlatStyle::Flat;
this->label7->Font = (gcnew System::Drawing::Font(L"Yu Gothic UI", 12, static_castSystem::Drawing::FontStyle(((System::Drawing::FontStyle::Bold | System::Drawing::FontStyle::Italic)
| System::Drawing::FontStyle::Underline))));
this->label7->ForeColor = System::Drawing::Color::DarkBlue;
this->label7->Location = System::Drawing::Point(957, 587);
this->label7->Name = L"label7";
this->label7->Size = System::Drawing::Size(29, 23);
this->label7->TabIndex = 19;
this->label7->Text = L"x2";
//
// label8
//
this->label8->AutoSize = true;
this->label8->BackColor = System::Drawing::SystemColors::InactiveCaption;
this->label8->BorderStyle = System::Windows::Forms::BorderStyle::Fixed3D;
this->label8->Cursor = System::Windows::Forms::Cursors::Hand;
this->label8->FlatStyle = System::Windows::Forms::FlatStyle::Flat;
this->label8->Font = (gcnew System::Drawing::Font(L"Yu Gothic UI", 12, static_castSystem::Drawing::FontStyle(((System::Drawing::FontStyle::Bold | System::Drawing::FontStyle::Italic)
| System::Drawing::FontStyle::Underline))));
this->label8->ForeColor = System::Drawing::Color::DarkBlue;
this->label8->Location = System::Drawing::Point(960, 552);
this->label8->Name = L"label8";
this->label8->Size = System::Drawing::Size(26, 23);
this->label8->TabIndex = 20;
this->label8->Text = L"x1";
//
// label9
//
this->label9->AutoSize = true;
this->label9->BackColor = System::Drawing::SystemColors::InactiveCaption;
this->label9->BorderStyle = System::Windows::Forms::BorderStyle::Fixed3D;
this->label9->Cursor = System::Windows::Forms::Cursors::Hand;
this->label9->FlatStyle = System::Windows::Forms::FlatStyle::Flat;
this->label9->Font = (gcnew System::Drawing::Font(L"Yu Gothic UI", 12, static_castSystem::Drawing::FontStyle(((System::Drawing::FontStyle::Bold | System::Drawing::FontStyle::Italic)
| System::Drawing::FontStyle::Underline))));
this->label9->ForeColor = System::Drawing::Color::DarkBlue;
this->label9->Location = System::Drawing::Point(903, 517);
this->label9->Name = L"label9";
this->label9->Size = System::Drawing::Size(83, 23);
this->label9->TabIndex = 21;
this->label9->Text = L"total time";
//
// label10
//
this->label10->AutoSize = true;
this->label10->BackColor = System::Drawing::SystemColors::InactiveCaption;
this->label10->BorderStyle = System::Windows::Forms::BorderStyle::Fixed3D;
this->label10->Cursor = System::Windows::Forms::Cursors::Hand;
this->label10->FlatStyle = System::Windows::Forms::FlatStyle::Flat;
this->label10->Font = (gcnew System::Drawing::Font(L"Yu Gothic UI", 12, static_castSystem::Drawing::FontStyle(((System::Drawing::FontStyle::Bold | System::Drawing::FontStyle::Italic)
| System::Drawing::FontStyle::Underline))));
this->label10->ForeColor = System::Drawing::Color::DarkBlue;
this->label10->Location = System::Drawing::Point(903, 446);
this->label10->Name = L"label10";
this->label10->Size = System::Drawing::Size(83, 23);
this->label10->TabIndex = 22;
this->label10->Text = L"iter count";
//
// label1
//
this->label1->AutoSize = true;
this->label1->BackColor = System::Drawing::SystemColors::InactiveCaption;
this->label1->BorderStyle = System::Windows::Forms::BorderStyle::Fixed3D;
this->label1->Cursor = System::Windows::Forms::Cursors::Hand;
this->label1->FlatStyle = System::Windows::Forms::FlatStyle::Flat;
this->label1->Font = (gcnew System::Drawing::Font(L"Yu Gothic UI", 12, static_castSystem::Drawing::FontStyle(((System::Drawing::FontStyle::Bold | System::Drawing::FontStyle::Italic)
| System::Drawing::FontStyle::Underline))));
this->label1->ForeColor = System::Drawing::Color::DarkBlue;
this->label1->Location = System::Drawing::Point(911, 692);
this->label1->Name = L"label1";
this->label1->Size = System::Drawing::Size(75, 23);
this->label1->TabIndex = 24;
this->label1->Text = L"accuracy";
//
// textBox7
//
this->textBox7->BackColor = System::Drawing::SystemColors::ControlLightLight;
this->textBox7->Cursor = System::Windows::Forms::Cursors::Hand;
this->textBox7->Font = (gcnew System::Drawing::Font(L"Yu Gothic UI", 12, System::Drawing::FontStyle::Bold));
this->textBox7->ForeColor = System::Drawing::Color::FromArgb(static_castSystem::Int32(static_castSystem::Byte(64)), static_castSystem::Int32(static_castSystem::Byte(0)),
static_castSystem::Int32(static_castSystem::Byte(64)));
this->textBox7->Location = System::Drawing::Point(992, 689);
this->textBox7->Name = L"textBox7";
this->textBox7->Size = System::Drawing::Size(180, 29);
this->textBox7->TabIndex = 23;
//
// label3
//
this->label3->AutoSize = true;
this->label3->BackColor = System::Drawing::SystemColors::InactiveCaption;
this->label3->BorderStyle = System::Windows::Forms::BorderStyle::Fixed3D;
this->label3->Cursor = System::Windows::Forms::Cursors::Hand;
this->label3->FlatStyle = System::Windows::Forms::FlatStyle::Flat;
this->label3->Font = (gcnew System::Drawing::Font(L"Yu Gothic UI", 12, static_castSystem::Drawing::FontStyle(((System::Drawing::FontStyle::Bold | System::Drawing::FontStyle::Italic)
| System::Drawing::FontStyle::Underline))));
this->label3->ForeColor = System::Drawing::Color::DarkBlue;
this->label3->Location = System::Drawing::Point(868, 481);
this->label3->Name = L"label3";
this->label3->Size = System::Drawing::Size(118, 23);
this->label3->TabIndex = 26;
this->label3->Text = L"iter count LNA";
//
// textBox8
//
this->textBox8->BackColor = System::Drawing::SystemColors::ControlLightLight;
this->textBox8->Cursor = System::Windows::Forms::Cursors::Hand;
this->textBox8->Font = (gcnew System::Drawing::Font(L"Yu Gothic UI", 12, System::Drawing::FontStyle::Bold));
this->textBox8->ForeColor = System::Drawing::Color::FromArgb(static_castSystem::Int32(static_castSystem::Byte(64)), static_castSystem::Int32(static_castSystem::Byte(0)),
static_castSystem::Int32(static_castSystem::Byte(64)));
this->textBox8->Location = System::Drawing::Point(992, 478);
this->textBox8->Name = L"textBox8";
this->textBox8->Size = System::Drawing::Size(180, 29);
this->textBox8->TabIndex = 25;
//
// MyForm
//
this->AutoScaleDimensions = System::Drawing::SizeF(6, 13);
this->AutoScaleMode = System::Windows::Forms::AutoScaleMode::Font;
this->ClientSize = System::Drawing::Size(1184, 811);
this->Controls->Add(this->label3);
this->Controls->Add(this->textBox8);
this->Controls->Add(this->label1);
this->Controls->Add(this->textBox7);
this->Controls->Add(this->label10);
this->Controls->Add(this->label9);
this->Controls->Add(this->label8);
this->Controls->Add(this->label7);
this->Controls->Add(this->label6);
this->Controls->Add(this->textBox6);
this->Controls->Add(this->textBox5);
this->Controls->Add(this->textBox4);
this->Controls->Add(this->textBox3);
this->Controls->Add(this->textBox1);
this->Controls->Add(this->label2);
this->Controls->Add(this->textBox2);
this->Controls->Add(this->button1);
this->Controls->Add(this->chart2);
this->Name = L"MyForm";
this->Text = L"MyForm";
(cli::safe_castSystem::ComponentModel::ISupportInitialize^(this->chart2))->EndInit();
this->ResumeLayout(false);
this->PerformLayout();

	}

#pragma endregion
};
} // namespace TESTAGP - код MyForm.h раньше, раньше у меня было приложение которое находило минимум двумерной функции Растригина и на графике виде сверху кроме линий уровня показывало наилучшую точку, это запускалось через mpiexec следующим образом C:\Users\maxim>mpiexec -n 4 D:\TEST_AGP\x64\Release\TEST_AGP.exe, именно из-за параллелизма boost::mpi была такая логика запуска в управляемом приложении где только процесс с нулевым рангом взаимодействовал с интерфейсом - эту особенность нужно оставить, раньше запускал на 4ёх процессах так как в двумерном случае для развёрток пеано-гильберта имеет смысл использовать как раз максимально 4 ориентации, сейчас задача может сильно вырасти в размерности, поэтому нужно написать обобщённый многомерный алгоритм - заметь что он будет очень похож и на двумерный и на одномерный - так как скелет алгоритма один и тот же, только там будет подставляться другая целевая функция, нужно сделать так чтобы приложение снова поддерживало запуск через mpiexec с множеством процессов, не нужно сильно заботиться о переносимости - в основном это приложение будет запускаться на моём пк, у меня 6 физ. ядер - я не буду запускать на большем числе процессов чем 6 - можно даже жёстко зафиксировать это в алгоритме - лучше всего изначально строить алгоритм строго заточенный под запуск на 6ти ядрах, так как в многомерном случае число полезных ориентаций гиперкубов будет сильно больше 4 и нужно использовать всегда по максимуму все 6 ядер чтобы получать максимально полную поисковую информацию со всех ориентаций, в двумерный алгоритм жёстко зашиты константы - ты можешь их немного увеличить для многомерного случая в соответствии с ожиданиями какие значения будут лучше для многомерного случая, обрати внимание на мой стиль - максимальная оптимизация и скорость - минимум читаемости и поддерживаемости - только хардкорный низкоуровневый стиль, можно использовать чисто сишный старый стиль там где это приведёт к ускорению, нужно активно использовать векторизацию, предсказуемые таблицы там где это даст прирост в скорости, также и с векторизацией и с fmaf, никаких проверок безопасности, ты должен следовать моему несбалансированному стилю с предпочтением скорости, ищи любые возможности выжать ещё пару наносекунд где это возможно, сам интерфейс который у меня сейчас работает меня устраивает, нужно в первую очередь заменить заглушку которая сейчас используется для многомерного алгоритма на рабочую многомерную версию, также проверь корректна ли твоя реализация многомерных гиперкубов, также учти что там в одном месте используется константа для числа пи - так тоже делать не надо, везде где возможны предвычисления и вставки готовых чисел - вставлять готовые числа, интерфейс не трогай, но учитывай штрафы за излишнюю кривизну, это и правда должно учитываться в алгоритме, сама целевая функция кажется верной, но перепроверь её и ускорь по максимуму если это возможно, везде где это возможно используй мои оптимизированный макросы для векторизованного вычисления синуса и косинуса - если потребуется макрос для быстрого вычисления именно синуса - напиши отдельный макрос с таким же порядком полинома который используется у меня для косинуса, сами базовые функции и структуры по типу шага или структуры для хранения интервалов не трогай так как они и так максимально оптимизированы, если где-то будет возможность действовать по аналогии с тем как я действовал раньше и применять подходы похожие на те которые применялись раньше - то смело используй мои наработки - так как они уже максимально оптимизированы по скорости, мой код специфичен под Windows и компилятор MSVC - это важно и это должно оставаться так, этот код должен работать только на моей платформе, переносимость не важна, читаемость не важна, безопасность не важна - важна только максимальная скорость, далее по приоритету точность и полнота исследования, но если можно немного пожертвовать точностью для скорости - то стоит так сделать, ассемблерные вставки не применяй - они у меня не работают, ищи другие пути, мне верни полный код для всех трёх файлов - без комментариев, а то что изменишь напиши мне отдельно текстом, думай так долго как сможешь я могу ждать несколько часов - дай ответ максимально удовлетворяющий моим требованиям

思考

Exploring MPI for ND Manipulator

So, the previous architecture supported MPI, with rank 0 handling the UI and others as worker processes. The user wants to restore support for MPI and extend it to ND with six orientations. I’ll ensure the UI sends signals to start workers, and MPI will manage coordination across processes. Each process will handle different orientations.

Integrating MPI for ND Manipulator

To integrate MPI for the ND manipulator, each process will work with a different orientation seed for MortonND. The processes also need to exchange regions of the data, like the 2D version with CrossMsg. Using Morton Z-order mapping, I'll need to define functions to convert between 2D and ND coordinates for proper segment injection based on given endpoints.

Mapping with Morton for ND

To map ND coordinates using Morton, I'll write functions to convert a vector to a Morton index by quantizing and interleaving bits. The MortonND structure already has necessary permutations for inversions. For message exchange, instead of using D-dimensional coordinates, I'll send param endpoints to minimize message size, but the message structure may require some thoughtful adjustments.

Reinterpreting message structures for MPI

The user requested not to change basic structures, but we can reinterpret CrossMsg and CtrlMsg. I can use s_x1 and s_x2 to carry 1D param values instead of 2D coordinates. The FindX2D_analytic and HitTest2D_analytic might be impacted by this, but we can maintain AGP_Manip2D for consistency, implementing MPI for ND under this function.

Planning MPI integration

To restore MPI functionality, I’ll modify MyForm.cpp to use AgpInit and AgpWaitStartAndRun, where rank 0 signals other processes to start. I'll also need to modify AGP_Manip2D for coordination and message parameters. Instead of pre-configured constants, each process will share top intervals and best results. I'll repurpose CtrlMsg for interval sharing while keeping the functions simple and aligned with the 2D design. The gActiveMap structure can still be kept for compatibility.

Implementing MPI integration for AGP

To generalize AGP_Manip2D for MPI, I’ll implement waiting for the Param message, then start AGP_Manip2D with those parameters. Each process will run a branch based on its orientation and seed. We’ll use MPI messaging to exchange top intervals periodically. Rank 0 will handle termination and collect best results across all ranks. To simplify, the winner—rank 0—returns the best result. During termination, rank 0 will gather the best results from all ranks using MPI.

Planning AGP_Mpi integration

I’ll keep existing headers and functions in place for consistency since the codebase is large. The MyForm UI will trigger AgpStartManipND while passing parameters to AGP_Manip2D. We'll define RunParamsND for passing serialized parameters across processes. For the main algorithm, I’ll implement node-based MPI communication and periodic exchanges. When the task ends, rank 0 will collect results. Potential challenges with serialization and vector handling will be addressed with boost::mpi. We'll also implement penalty function adjustments.

Implementing backend changes

For performance and speed, I’m adding quick macros like FABE13_SIN, reusing polynomials from existing macros. I'll also carefully manage memory to avoid overflow in MortonND by clamping levels based on dim. In MyForm.h, I'll update UI code to call AgpStartManipND directly on optimize actions. The backend will manage serialization using structures like RunParamsND and BestPacket, broadcasting across ranks. We'll ensure consistency and make necessary adjustments to optimize code execution.

Thought for 2m 11s

回答

ниже — полный код для трёх файлов без комментариев. после кода — кратко, что именно я изменил.

MyForm.h

cpp
#pragma once #define WIN32_LEAN_AND_MEAN #include <Windows.h> #include <stdint.h> using namespace System; using namespace System::Drawing; using namespace System::Windows::Forms; using namespace System::Collections::Generic; using namespace System::Drawing::Drawing2D; typedef void(__cdecl* P_MANIP)( int nSegments, float thetaMin, float thetaMax, bool variableLengths, float lenMin, float lenMax, float deltaThetaMax, float lambdaCurv, float lambdaAngle, float lambdaLength, float targetX, float targetY, int peanoLevels, int orientations, int maxIterPerBranch, float r, bool adaptiveMode, float epsilon, unsigned int seed, float** out_bestQ, size_t* out_bestQLen, float* out_bestX, float* out_bestY, float* out_bestF ); typedef void(__cdecl* P_FREE)(float*); typedef int(__cdecl* P_INIT)(int, float, float, float, float); typedef void(__cdecl* P_WAIT)(); typedef void(__cdecl* P_START)( int nSegments, float thetaMin, float thetaMax, bool variableLengths, float lenMin, float lenMax, float deltaThetaMax, float lambdaCurv, float lambdaAngle, float lambdaLength, float targetX, float targetY, int peanoLevels, int orientations, int maxIterPerBranch, float r, bool adaptiveMode, float epsilon, unsigned int seed ); namespace TESTAGP { public ref class MyForm : public Form { public: MyForm(HMODULE hLib) : hLib(hLib) { this->SetStyle(ControlStyles::AllPaintingInWmPaint | ControlStyles::UserPaint | ControlStyles::OptimizedDoubleBuffer, true); this->Text = L"AGP Manipulator 2D - Оптимизация положения захвата"; this->ClientSize = System::Drawing::Size(1000, 700); this->Resize += gcnew EventHandler(this, &MyForm::OnResize); fManip = (P_MANIP)GetProcAddress(hLib, "AGP_Manip2D"); pFree = (P_FREE)GetProcAddress(hLib, "AGP_Free"); pStart = (P_START)GetProcAddress(hLib, "AgpStartManipND"); angles = gcnew List<float>(0.0f); lengths = gcnew List<float>(0.0f); InitUI(); ResetRandomConfig(); } protected: ~MyForm() {} private: HMODULE hLib; P_MANIP fManip; P_FREE pFree; P_START pStart; int nSegments; bool variableLengths; List<float>^ angles; List<float>^ lengths; CheckBox^ cbVarLen; NumericUpDown^ nudFixedLength; NumericUpDown^ nudStretchFactor; NumericUpDown^ nudDeltaThetaMax; NumericUpDown^ nudTargetX; NumericUpDown^ nudTargetY; NumericUpDown^ nudLevels; NumericUpDown^ nudMaxIter; CheckBox^ cbAdaptive; NumericUpDown^ nudR; NumericUpDown^ nudEps; Button^ btnAdd; Button^ btnRem; Button^ btnOptimize; Label^ lblInfo; System::UInt32 rngState = 0xA5C39E0Du; void InitUI() { int y = 10, w = 180, h = 24, pad = 8; int currentX = 10; Label^ L; L = gcnew Label(); L->Text = L"Макс. угол между звеньями (рад)"; L->Left = currentX; L->Top = y; L->Width = w; this->Controls->Add(L); nudDeltaThetaMax = gcnew NumericUpDown(); nudDeltaThetaMax->Left = currentX; nudDeltaThetaMax->Top = y + h + 2; nudDeltaThetaMax->Width = w; nudDeltaThetaMax->DecimalPlaces = 3; nudDeltaThetaMax->Minimum = (Decimal)0.1; nudDeltaThetaMax->Maximum = (Decimal)3.14159; nudDeltaThetaMax->Value = (Decimal)1.57; this->Controls->Add(nudDeltaThetaMax); currentX += w + 20; L = gcnew Label(); L->Text = L"Базовая длина звена"; L->Left = currentX; L->Top = y; L->Width = w; this->Controls->Add(L); nudFixedLength = gcnew NumericUpDown(); nudFixedLength->Left = currentX; nudFixedLength->Top = y + h + 2; nudFixedLength->Width = w; nudFixedLength->DecimalPlaces = 2; nudFixedLength->Minimum = (Decimal)0.1; nudFixedLength->Maximum = (Decimal)2.0; nudFixedLength->Value = (Decimal)1.0; this->Controls->Add(nudFixedLength); currentX += w + 20; L = gcnew Label(); L->Text = L"Коэф. растяжения/сжатия"; L->Left = currentX; L->Top = y; L->Width = w; this->Controls->Add(L); nudStretchFactor = gcnew NumericUpDown(); nudStretchFactor->Left = currentX; nudStretchFactor->Top = y + h + 2; nudStretchFactor->Width = w; nudStretchFactor->DecimalPlaces = 2; nudStretchFactor->Minimum = (Decimal)1.0; nudStretchFactor->Maximum = (Decimal)2.0; nudStretchFactor->Value = (Decimal)1.5; this->Controls->Add(nudStretchFactor); currentX += w + 20; cbVarLen = gcnew CheckBox(); cbVarLen->Text = L"Переменные длины звеньев"; cbVarLen->Left = currentX; cbVarLen->Top = y + h + 2; cbVarLen->Width = w; cbVarLen->Checked = false; this->Controls->Add(cbVarLen); currentX += w + 20; y += h * 2 + pad + 10; currentX = 10; L = gcnew Label(); L->Text = L"Целевая позиция X"; L->Left = currentX; L->Top = y; L->Width = w; this->Controls->Add(L); nudTargetX = gcnew NumericUpDown(); nudTargetX->Left = currentX; nudTargetX->Top = y + h + 2; nudTargetX->Width = w; nudTargetX->DecimalPlaces = 2; nudTargetX->Minimum = (Decimal)-10.0; nudTargetX->Maximum = (Decimal)10.0; nudTargetX->Value = (Decimal)3.5; this->Controls->Add(nudTargetX); currentX += w + 20; L = gcnew Label(); L->Text = L"Целевая позиция Y"; L->Left = currentX; L->Top = y; L->Width = w; this->Controls->Add(L); nudTargetY = gcnew NumericUpDown(); nudTargetY->Left = currentX; nudTargetY->Top = y + h + 2; nudTargetY->Width = w; nudTargetY->DecimalPlaces = 2; nudTargetY->Minimum = (Decimal)-10.0; nudTargetY->Maximum = (Decimal)10.0; nudTargetY->Value = (Decimal)1.0; this->Controls->Add(nudTargetY); currentX += w + 20; L = gcnew Label(); L->Text = L"Глубина развертки Пеано-Гильберта"; L->Left = currentX; L->Top = y; L->Width = w; this->Controls->Add(L); nudLevels = gcnew NumericUpDown(); nudLevels->Left = currentX; nudLevels->Top = y + h + 2; nudLevels->Width = w; nudLevels->Minimum = 4; nudLevels->Maximum = 20; nudLevels->Value = 12; this->Controls->Add(nudLevels); currentX += w + 20; L = gcnew Label(); L->Text = L"Параметр надежности (r)"; L->Left = currentX; L->Top = y; L->Width = w; this->Controls->Add(L); nudR = gcnew NumericUpDown(); nudR->Left = currentX; nudR->Top = y + h + 2; nudR->Width = w; nudR->DecimalPlaces = 2; nudR->Minimum = (Decimal)1.0; nudR->Maximum = (Decimal)10.0; nudR->Value = (Decimal)2.5; this->Controls->Add(nudR); currentX += w + 20; cbAdaptive = gcnew CheckBox(); cbAdaptive->Text = L"Адаптивная схема"; cbAdaptive->Left = currentX; cbAdaptive->Top = y + h + 2; cbAdaptive->Width = w; cbAdaptive->Checked = true; this->Controls->Add(cbAdaptive); y += h * 2 + pad + 10; currentX = 10; L = gcnew Label(); L->Text = L"Точность (epsilon)"; L->Left = currentX; L->Top = y; L->Width = w; this->Controls->Add(L); nudEps = gcnew NumericUpDown(); nudEps->Left = currentX; nudEps->Top = y + h + 2; nudEps->Width = w; nudEps->DecimalPlaces = 6; nudEps->Minimum = (Decimal)0.000001; nudEps->Maximum = (Decimal)0.1; nudEps->Value = (Decimal)0.0001; this->Controls->Add(nudEps); currentX += w + 20; L = gcnew Label(); L->Text = L"Макс. число итераций"; L->Left = currentX; L->Top = y; L->Width = w; this->Controls->Add(L); nudMaxIter = gcnew NumericUpDown(); nudMaxIter->Left = currentX; nudMaxIter->Top = y + h + 2; nudMaxIter->Width = w; nudMaxIter->Minimum = 10; nudMaxIter->Maximum = 50000; nudMaxIter->Value = 1000; this->Controls->Add(nudMaxIter); currentX += 85; btnAdd = gcnew Button(); btnAdd->Text = L"+ Звено"; btnAdd->Left = currentX; btnAdd->Top = y + h + 2; btnAdd->Width = 80; btnAdd->Click += gcnew EventHandler(this, &MyForm::OnAddClick); this->Controls->Add(btnAdd); currentX += 85; btnRem = gcnew Button(); btnRem->Text = L"- Звено"; btnRem->Left = currentX; btnRem->Top = y + h + 2; btnRem->Width = 80; btnRem->Click += gcnew EventHandler(this, &MyForm::OnRemClick); this->Controls->Add(btnRem); currentX += 125; btnOptimize = gcnew Button(); btnOptimize->Text = L"Оптимизировать"; btnOptimize->Left = currentX; btnOptimize->Top = y + h + 2; btnOptimize->Width = 120; btnOptimize->Click += gcnew EventHandler(this, &MyForm::OnOptimizeClick); this->Controls->Add(btnOptimize); currentX += 125; lblInfo = gcnew Label(); lblInfo->Left = currentX; lblInfo->Top = y; lblInfo->Width = 250; lblInfo->Height = 60; lblInfo->BorderStyle = BorderStyle::FixedSingle; lblInfo->Text = L"Готов к работе"; this->Controls->Add(lblInfo); } void ResetRandomConfig() { nSegments = 1; angles->Clear(); lengths->Clear(); angles->Add(RandAngle()); lengths->Add((float)nudFixedLength->Value); variableLengths = false; this->Invalidate(); } float Rand01() { rngState ^= rngState << 13; rngState ^= rngState >> 17; rngState ^= rngState << 5; return (float)((double)(unsigned)rngState / 4294967296.0); } float RandAngle() { return (Rand01() * 6.28318530717958647692f) - 3.14159265358979323846f; } System::Void OnResize(System::Object^ sender, System::EventArgs^ e) { this->Invalidate(); } System::Void OnAddClick(System::Object^ sender, System::EventArgs^ e) { ++nSegments; angles->Add(RandAngle()); lengths->Add((float)nudFixedLength->Value); this->Invalidate(); } System::Void OnRemClick(System::Object^ sender, System::EventArgs^ e) { if (nSegments > 1) { --nSegments; angles->RemoveAt(angles->Count - 1); lengths->RemoveAt(lengths->Count - 1); this->Invalidate(); } } System::Void OnOptimizeClick(System::Object^ sender, System::EventArgs^ e) { if (!fManip) return; float thMin = -3.14159265358979323846f; float thMax = 3.14159265358979323846f; float lamC = 1.0f; float lamA = 0.0f; float lamL = 0.0f; int orientations = 6; variableLengths = cbVarLen->Checked; float fixedLength = (float)nudFixedLength->Value; float stretchFactor = (float)nudStretchFactor->Value; float Lmin = fixedLength / stretchFactor; float Lmax = fixedLength * stretchFactor; float dthMax = (float)nudDeltaThetaMax->Value; float tx = (float)nudTargetX->Value; float ty = (float)nudTargetY->Value; int levels = (int)nudLevels->Value; int maxIter = (int)nudMaxIter->Value; bool adaptive = cbAdaptive->Checked; float r = (float)nudR->Value; float eps = (float)nudEps->Value; unsigned int seed = (unsigned)GetTickCount(); if (pStart) { pStart( nSegments, thMin, thMax, variableLengths, Lmin, Lmax, dthMax, lamC, lamA, lamL, tx, ty, levels, orientations, maxIter, r, adaptive, eps, seed ); } LARGE_INTEGER t0, t1, fq; QueryPerformanceCounter(&t0); float* bestQ = nullptr; size_t bestQLen = 0; float bestX = 0, bestY = 0, bestF = 0; fManip( nSegments, thMin, thMax, variableLengths, Lmin, Lmax, dthMax, lamC, lamA, lamL, tx, ty, levels, orientations, maxIter, r, adaptive, eps, seed, &bestQ, &bestQLen, &bestX, &bestY, &bestF ); QueryPerformanceCounter(&t1); QueryPerformanceFrequency(&fq); double micros = 1e6 * (double)(t1.QuadPart - t0.QuadPart) / (double)fq.QuadPart; if (bestQ && bestQLen >= (size_t)nSegments) { angles->Clear(); for (int i = 0; i < nSegments; ++i) angles->Add(bestQ[i]); if (variableLengths) { lengths->Clear(); for (int i = 0; i < nSegments; ++i) lengths->Add(bestQ[nSegments + i]); } else { lengths->Clear(); for (int i = 0; i < nSegments; ++i) lengths->Add(fixedLength); } if (pFree) pFree(bestQ); } lblInfo->Text = String::Format(L"Результат оптимизации:\nЦелевая функция: {0:F5}\nКонечная точка: ({1:F3}, {2:F3})\nВремя: {3:F0} мкс", bestF, bestX, bestY, micros); this->Invalidate(); } protected: virtual void OnPaint(PaintEventArgs^ e) override { Form::OnPaint(e); Graphics^ g = e->Graphics; g->SmoothingMode = System::Drawing::Drawing2D::SmoothingMode::HighQuality; g->Clear(this->BackColor); int topOffset = 150; System::Drawing::Rectangle drawArea = System::Drawing::Rectangle(0, topOffset, this->ClientSize.Width, this->ClientSize.Height - topOffset); g->FillRectangle(Brushes::White, drawArea); int leftWallX = drawArea.Left + this->ClientSize.Width * 25 / 100; Pen^ wallPen = gcnew Pen(Color::Black, 2); g->DrawLine(wallPen, leftWallX, drawArea.Top, leftWallX, drawArea.Bottom); HatchBrush^ hatchBrush = gcnew HatchBrush(HatchStyle::BackwardDiagonal, Color::LightGray, Color::White); int leftHatchWidth = 100; g->FillRectangle(hatchBrush, leftWallX - leftHatchWidth, drawArea.Top, leftHatchWidth, drawArea.Height); float targetX = (float)nudTargetX->Value; float targetY = (float)nudTargetY->Value; float scale = 160.0f; int baseX = leftWallX; int baseY = drawArea.Top + drawArea.Height / 2; float pixelTargetX = baseX + targetX * scale; float pixelTargetY = baseY - targetY * scale; int rightWallX = (int)(pixelTargetX + 8); rightWallX = Math::Min(rightWallX, drawArea.Right - 10); Pen^ dashedPen = gcnew Pen(Color::Black, 2); dashedPen->DashStyle = DashStyle::Dash; g->DrawLine(dashedPen, rightWallX, drawArea.Top, rightWallX, drawArea.Bottom); int rightHatchWidth = leftHatchWidth; g->FillRectangle(hatchBrush, rightWallX, drawArea.Top, rightHatchWidth, drawArea.Height); Pen^ targetPen = gcnew Pen(Color::Green, 1.5f); targetPen->DashStyle = DashStyle::Dot; g->DrawEllipse(targetPen, pixelTargetX - 8.0f, pixelTargetY - 8.0f, 16.0f, 16.0f); cli::array<PointF>^ pts = gcnew cli::array<PointF>(nSegments + 1); pts[0] = PointF((float)baseX, (float)baseY); float x = 0.0f, y = 0.0f, phi = 0.0f; for (int i = 0; i < nSegments; ++i) { array<float>^ anglesArray = angles->ToArray(); array<float>^ lengthsArray = lengths->ToArray(); float theta = anglesArray[i]; float L = lengthsArray[i]; phi += theta; x += L * (float)Math::Cos((double)phi); y += L * (float)Math::Sin((double)phi); pts[i + 1] = PointF(baseX + x * scale, baseY - y * scale); } Pen^ penRod = gcnew Pen(Color::Red, 6.0f); for (int i = 0; i < nSegments; ++i) g->DrawLine(penRod, pts[i], pts[i + 1]); SolidBrush^ brJoint = gcnew SolidBrush(Color::Blue); const float R = 6.0f; for (int i = 0; i <= nSegments; ++i) g->FillEllipse(brJoint, pts[i].X - R, pts[i].Y - R, 2 * R, 2 * R); delete wallPen; delete dashedPen; delete targetPen; delete penRod; delete brJoint; delete hatchBrush; } }; }

MyForm.cpp

cpp
#include "MyForm.h" #include <float.h> using namespace System; using namespace System::Windows::Forms; typedef int(__cdecl* PInit)(int, float, float, float, float); typedef void(__cdecl* PStartWorkers)(); [STAThread] int main() { HMODULE h = LoadLibraryW(L"TEST_FUNC.dll"); auto AgpInit = (PInit)GetProcAddress(h, "AgpInit"); auto AgpWaitStartAndRun = (PStartWorkers)GetProcAddress(h, "AgpWaitStartAndRun"); const int rank = AgpInit(12, -2.2f, 1.8f, -2.2f, 1.8f); if (!rank) { Application::EnableVisualStyles(); Application::SetCompatibleTextRenderingDefault(false); Application::Run(gcnew TESTAGP::MyForm(h)); } else { AgpWaitStartAndRun(); } return 0; }

TEST_FUNC main .cpp (DLL)

cpp
#include "pch.h" #include <Windows.h> #include <float.h> #include <stdint.h> #include <math.h> #include <immintrin.h> #include <vector> #include <algorithm> #include <atomic> #include <thread> #include <mutex> #include <cstring> #include <boost/mpi.hpp> #include <boost/serialization/vector.hpp> #define XOR_RAND(state, result_var) do{int s=(state);s^=s<<13;s^=s>>17;s^=s<<5;state=s;result_var=state*0x1.0p-32f;}while(0) #define XOR_RAND_GRSH(state, result_var) do{int s=(state);s^=s<<13;s^=s>>17;s^=s<<5;state=s;result_var=fmaf(state,0x1.0p-31f,-1.0f);}while(0) #define FABE13_COS(x, result_var) do{const float _ax_=fabsf(x);float _r_=fmodf(_ax_,6.28318530718f);if(_r_>3.14159265359f){_r_=6.28318530718f-_r_;}if(_r_<1.57079632679f){const float _t2_=_r_*_r_;const float _t4_=_t2_*_t2_;result_var=fmaf(_t4_,fmaf(_t2_,-0.0013888889f,0.0416666667f),fmaf(_t2_,-0.5f,1.0f));}else{_r_=3.14159265359f-_r_;const float _t2_=_r_*_r_;const float _t4_=_t2_*_t2_;result_var=-fmaf(_t4_,fmaf(_t2_,-0.0013888889f,0.0416666667f),fmaf(_t2_,-0.5f,1.0f));}}while(0) #define FABE13_SIN(x, result_var) do{const float _x_=(x);const float _ax_=fabsf(_x_);float _r_=fmodf(_ax_,6.28318530718f);bool _sfl_=_r_>3.14159265359f;if(_sfl_)_r_=6.28318530718f-_r_;bool _cfl_=_r_>1.57079632679f;if(_cfl_)_r_=3.14159265359f-_r_;const float _t2_=_r_*_r_;float _s=fmaf(_t2_,fmaf(_t2_,fmaf(_t2_,-0.0001984127f,0.0083333333f),-0.16666666f),1.0f)*_r_;result_var=((_x_<0.0f)^_sfl_)?-_s:_s;}while(0) #define FABE13_SINCOS(in, sin_out, cos_out, n) do{int i=0;const int limit=(n)&~7; if((n)>=8){static __declspec(align(32)) const __m256 VEC_TWOPI=_mm256_set1_ps(6.28318530718f);static __declspec(align(32)) const __m256 VEC_PI=_mm256_set1_ps(3.14159265359f);static __declspec(align(32)) const __m256 VEC_PI_2=_mm256_set1_ps(1.57079632679f);static __declspec(align(32)) const __m256 INV_TWOPI=_mm256_set1_ps(0.15915494309189535f);static __declspec(align(32)) const __m256 BIAS=_mm256_set1_ps(12582912.0f);static __declspec(align(32)) const __m256 VEC_COS_P5=_mm256_set1_ps(-0.0013888889f);static __declspec(align(32)) const __m256 VEC_COS_P3=_mm256_set1_ps(0.0416666667f);static __declspec(align(32)) const __m256 VEC_COS_P1=_mm256_set1_ps(-0.5f);static __declspec(align(32)) const __m256 VEC_COS_P0=_mm256_set1_ps(1.0f);static __declspec(align(32)) const __m256 VEC_SIN_P5=_mm256_set1_ps(-0.0001984127f);static __declspec(align(32)) const __m256 VEC_SIN_P3=_mm256_set1_ps(0.0083333333f);static __declspec(align(32)) const __m256 VEC_SIN_P1=_mm256_set1_ps(-0.16666666f);static __declspec(align(32)) const __m256 VEC_SIN_P0=_mm256_set1_ps(1.0f);static __declspec(align(32)) const __m256 VEC_ZERO=_mm256_setzero_ps();while(i<limit){const __m256 vx=_mm256_load_ps(&(in)[i]);const __m256 vax=_mm256_andnot_ps(_mm256_set1_ps(-0.0f),vx);__m256 q=_mm256_fmadd_ps(vax,INV_TWOPI,BIAS);q=_mm256_sub_ps(q,BIAS);const __m256 r=_mm256_fnmadd_ps(VEC_TWOPI,q,vax);const __m256 r1=_mm256_min_ps(r,_mm256_sub_ps(VEC_TWOPI,r));const __m256 r2=_mm256_min_ps(r1,_mm256_sub_ps(VEC_PI,r1));const __m256 t2=_mm256_mul_ps(r2,r2);const __m256 cosv=_mm256_fmadd_ps(t2,_mm256_fmadd_ps(t2,_mm256_fmadd_ps(t2,VEC_COS_P5,VEC_COS_P3),VEC_COS_P1),VEC_COS_P0);const __m256 sinv=_mm256_mul_ps(_mm256_fmadd_ps(t2,_mm256_fmadd_ps(t2,_mm256_fmadd_ps(t2,VEC_SIN_P5,VEC_SIN_P3),VEC_SIN_P1),VEC_SIN_P0),r2);const __m256 cflip=_mm256_cmp_ps(r1,VEC_PI_2,_CMP_GT_OQ);const __m256 sflip=_mm256_xor_ps(_mm256_cmp_ps(vx,VEC_ZERO,_CMP_LT_OQ),_mm256_cmp_ps(r,VEC_PI,_CMP_GT_OQ));_mm256_store_ps(&(cos_out)[i],_mm256_blendv_ps(cosv,_mm256_sub_ps(VEC_ZERO,cosv),cflip));_mm256_store_ps(&(sin_out)[i],_mm256_blendv_ps(sinv,_mm256_sub_ps(VEC_ZERO,sinv),sflip));i+=8;}}while(i<(n)){const float x=(in)[i];const float ax=fabsf(x);float q=fmaf(ax,0.15915494309189535f,12582912.0f);q-=12582912.0f;float r=fmaf(-6.28318530718f,q,ax);const bool sflip=r>3.14159265359f;if(sflip)r=6.28318530718f-r;const bool cflip=r>1.57079632679f;if(cflip)r=3.14159265359f-r;const float t2=r*r;const float c=fmaf(t2,fmaf(t2,fmaf(t2,-0.0013888889f,0.0416666667f),-0.5f),1.0f);const float s=fmaf(t2,fmaf(t2,fmaf(t2,-0.0001984127f,0.0083333333f),-0.16666666f),1.0f)*r;(cos_out)[i]=cflip?-c:c;(sin_out)[i]=((x<0.0f)^sflip)?-s:s;++i;}}while(0) enum List : uint8_t { Top = 0b00u, Down = 0b01u, Left = 0b10u, Right = 0b11u }; __declspec(align(4)) struct Step{const uint8_t next;const uint8_t dx;const uint8_t dy;}; __declspec(align(4)) struct InvStep{const uint8_t q;const uint8_t next;}; __declspec(align(64)) static const Step g_step_tbl[4][4]={{ {Right,0u,0u},{Top,0u,1u},{Top,1u,1u},{Left,1u,0u} },{ {Left,1u,1u},{Down,1u,0u},{Down,0u,0u},{Right,0u,1u} },{ {Down,1u,1u},{Left,0u,1u},{Left,0u,0u},{Top,1u,0u} },{ {Top,0u,0u},{Right,1u,0u},{Right,1u,1u},{Down,0u,1u} }}; __declspec(align(64)) static const InvStep g_inv_tbl[4][4]={{ {0u,Right},{1u,Top},{3u,Left},{2u,Top} },{ {2u,Down},{3u,Right},{1u,Down},{0u,Left} },{ {2u,Left},{1u,Left},{3u,Top},{0u,Down} },{ {0u,Top},{3u,Down},{1u,Right},{2u,Right} }}; static const boost::mpi::environment* g_env; static const boost::mpi::communicator* g_world; __declspec(align(16)) struct CrossMsg{bool kind;float s_x1;float s_x2;float e_x1;float e_x2;float Rtop;template<typename Archive> void serialize(Archive& ar,const unsigned int){ar&kind&s_x1&s_x2&e_x1&e_x2&Rtop;}}; __declspec(align(16)) struct Slab{char*const base;char* current;char*const end;__forceinline Slab(void*const memory,const size_t usable):base((char*)memory),current(base),end(base+(usable&~(size_t)63u)){}}; static tbb::enumerable_thread_specific<Slab*> tls([]() noexcept{void* memory=_aligned_malloc(16777216u,16u);Slab* slab=(Slab*)_aligned_malloc(32u,16u);new (slab) Slab(memory,16777216u);char* p=slab->base;while(p<slab->end){*p=0;p+=4096u;}return slab;}); __declspec(align(16)) struct Peano2DMap{const int levels;const float a,b,c,d;const float lenx,leny;const float inv_lenx;const uint32_t scale;const uint8_t start;__forceinline Peano2DMap(int L,float _a,float _b,float _c,float _d,uint8_t st):levels(L),a(_a),b(_b),c(_c),d(_d),lenx(_b-_a),leny(_d-_c),inv_lenx(1.0f/(_b-_a)),scale((uint32_t)1u<<(L<<1)),start(st){}}; static Peano2DMap gActiveMap(0,0,0,0,0,0); __forceinline bool ComparePtr(const struct Interval* a,const struct Interval* b) noexcept; __declspec(align(16)) struct Interval{const float x1;const float x2;const float y1;const float y2;const float delta_y;const float ordinate_factor;const float N_factor;const float quadratic_term;const float M;float R;__forceinline void* operator new(size_t) noexcept{Slab* s=tls.local();char* r=s->current;s->current+=64u;return r;}__forceinline Interval(float _x1,float _x2,float _y1,float _y2,float _N) noexcept:x1(_x1),x2(_x2),y1(_y1),y2(_y2),delta_y(_y2-_y1),ordinate_factor(-(y1+y2)*2.0f),N_factor(_N==1.0f? _x2-_x1: _N==2.0f? sqrtf(_x2-_x1): powf(_x2-_x1,1.0f/_N)),quadratic_term((1.0f/N_factor)*delta_y*delta_y),M((1.0f/N_factor)*fabsf(delta_y)){}__forceinline void ChangeCharacteristic(float _m) noexcept{R=fmaf(1.0f/_m,quadratic_term,fmaf(_m,N_factor,ordinate_factor));}}; __forceinline bool ComparePtr(const Interval* a,const Interval* b) noexcept{return a->R<b->R;} __forceinline void RecomputeR_ConstM_AVX2(Interval*const* arr,size_t n,float m){const __m256 vm=_mm256_set1_ps(m);__m256 vinvm=_mm256_rcp_ps(vm);vinvm=_mm256_mul_ps(vinvm,_mm256_fnmadd_ps(vm,vinvm,_mm256_set1_ps(2.0f)));size_t i=0;const int limit=(int)(n&~7u);if(n>=8u){while(i<(size_t)limit){__declspec(align(32)) float q[8],nf[8],ord[8];int k=0;while(k<8){const Interval* p=arr[i+k];q[k]=p->quadratic_term;nf[k]=p->N_factor;ord[k]=p->ordinate_factor;++k;}const __m256 vq=_mm256_load_ps(q);const __m256 vnf=_mm256_load_ps(nf);const __m256 vod=_mm256_load_ps(ord);const __m256 t=_mm256_fmadd_ps(vm,vnf,vod);const __m256 res=_mm256_fmadd_ps(vq,vinvm,t);__declspec(align(32)) float out[8];_mm256_store_ps(out,res);k=0;while(k<8){arr[i+k]->R=out[k];++k;}i+=8u;}}while(i<n){arr[i]->ChangeCharacteristic(m);++i;}} __forceinline void RecomputeR_AffineM_AVX2(Interval*const* arr,size_t n,float GF,float alpha){const __m256 vGF=_mm256_set1_ps(GF);const __m256 va=_mm256_set1_ps(alpha);size_t i=0;const int limit=(int)(n&~7u);if(n>=8u){while(i<(size_t)limit){__declspec(align(32)) float len[8],Mv[8],q[8],nf[8],ord[8];int k=0;while(k<8){const Interval* p=arr[i+k];len[k]=p->x2-p->x1;Mv[k]=p->M;q[k]=p->quadratic_term;nf[k]=p->N_factor;ord[k]=p->ordinate_factor;++k;}const __m256 vlen=_mm256_load_ps(len);const __m256 vM=_mm256_load_ps(Mv);const __m256 vq=_mm256_load_ps(q);const __m256 vnf=_mm256_load_ps(nf);const __m256 vod=_mm256_load_ps(ord);const __m256 vm=_mm256_fmadd_ps(vGF,vlen,_mm256_mul_ps(va,vM));__m256 vinvm=_mm256_rcp_ps(vm);vinvm=_mm256_mul_ps(vinvm,_mm256_fnmadd_ps(vm,vinvm,_mm256_set1_ps(2.0f)));const __m256 t=_mm256_fmadd_ps(vm,vnf,vod);const __m256 res=_mm256_fmadd_ps(vq,vinvm,t);__declspec(align(32)) float out[8];_mm256_store_ps(out,res);k=0;while(k<8){arr[i+k]->R=out[k];++k;}i+=8u;}}while(i<n){const Interval* p=arr[i];const float mi=fmaf(GF,(p->x2-p->x1),p->M*alpha);arr[i]->R=fmaf(1.0f/mi,p->quadratic_term,fmaf(mi,p->N_factor,p->ordinate_factor));++i;}} __forceinline float Shag(float _m,float x1,float x2,float y1,float y2,float _N,float _r){const float diff=y2-y1;const float sign_mult=_mm_cvtss_f32(_mm_castsi128_ps(_mm_set1_epi32(0x3F800000u|(((*((uint32_t*)&diff))&0x80000000u)^0x80000000u))));return _N==1.0f? fmaf(-(1.0f/_m),diff,x1+x2)*0.5f: _N==2.0f? fmaf(sign_mult/(_m*_m),diff*diff*_r,x1+x2)*0.5f: fmaf(sign_mult/powf(_m,_N),powf(diff,_N)*_r,x1+x2)*0.5f;} struct MortonND{int dim;int levels;uint64_t scale;std::vector<float> low,high;std::vector<int> perm;std::vector<uint64_t> invMask;MortonND(int D,int L,const float* lows,const float* highs,uint32_t seed):dim(D),levels(L),scale(1ull),low(lows,lows+D),high(highs,highs+D),perm(D),invMask(D,0){int i=0;while(i<D){perm[i]=i;++i;}uint32_t st=seed?seed:0x9E3779B9u;for(int k=D-1;k>0;--k){float r;XOR_RAND(st,r);int j=(int)floorf(r*(k+1));int t=perm[k];perm[k]=perm[j];perm[j]=t;}for(int k=0;k<D;++k){float r;XOR_RAND(st,r);invMask[k]=(r>0.5f)?(((uint64_t)1<<levels)-1ull):0ull;}int maxbits=63;int need=levels*D;if(need>maxbits){levels=maxbits/D;if(levels<1)levels=1;}scale=1ull<<((uint64_t)levels*(uint64_t)D);}__forceinline void map01ToPoint(float t,float* out) const noexcept{if(t<0.0f)t=0.0f;else if(t>=1.0f)t=nextafterf(1.0f,0.0f);uint64_t idx=(uint64_t)(t*(double)scale);if(idx>=scale)idx=scale-1;std::vector<uint64_t> coord(dim,0ull);for(int bit=0;bit<levels;++bit){for(int d=0;d<dim;++d){int pd=perm[d];uint64_t b=(idx>>((bit*dim)+d))&1ull;coord[pd]|=(b<<bit);}}for(int d=0;d<dim;++d){if(invMask[d])coord[d]^=invMask[d];}const float invC=1.0f/(float)((uint64_t)1<<levels);for(int d=0;d<dim;++d){float c0=low[d]+(high[d]-low[d])*((float)coord[d]*invC);float cs=(high[d]-low[d])*invC;out[d]=fmaf(0.5f,cs,c0);}}__forceinline float pointTo01(const float* in) const noexcept{std::vector<uint64_t> coord(dim,0ull);const float invC=1.0f/(float)((uint64_t)1<<levels);for(int d=0;d<dim;++d){float t=(in[d]-low[d])/(high[d]-low[d]);if(t<0.0f)t=0.0f;else if(t>0.99999994f)t=0.99999994f;uint64_t ci=(uint64_t)(t*(float)((uint64_t)1<<levels));if(ci>=((uint64_t)1<<levels))ci=((uint64_t)1<<levels)-1;coord[d]=ci;}for(int d=0;d<dim;++d){if(invMask[d])coord[d]^=invMask[d];}uint64_t idx=0ull;for(int bit=0;bit<levels;++bit){for(int d=0;d<dim;++d){int pd=perm[d];uint64_t b=(coord[pd]>>bit)&1ull;idx|=(b<<((bit*dim)+d));}}double tt=(double)idx/(double)scale;if(tt>=1.0)tt=nextafter(1.0,0.0);return (float)tt;}}; __forceinline float wrap_pi_fast(float a){a=fmodf(a+3.14159265359f,6.28318530718f);if(a<0) a+=6.28318530718f;return a-3.14159265359f;} struct ManipCost{int n;bool variableLen;float targetX,targetY;float thMin,thMax;float LMin,LMax;float dThetaMax;float lambdaCurv,lambdaAng,lambdaLen;__forceinline float operator()(const float* q) const noexcept{const float* th=q;const float* L=variableLen?(q+n):nullptr;float x=0.0f,y=0.0f,phi=0.0f;float penA=0.0f,penL=0.0f,penC=0.0f;float Lc=1.0f;for(int i=0;i<n;++i){const float t=th[i];float Li=variableLen? fminf(fmaxf(L[i],LMin),LMax):Lc;phi+=t;float c,s;FABE13_COS(phi,c);FABE13_SIN(phi,s);x=fmaf(Li,c,x);y=fmaf(Li,s,y);if(t<thMin)penA+=thMin-t; if(t>thMax)penA+=t-thMax; if(variableLen){if(L[i]<LMin)penL+=LMin-L[i]; if(L[i]>LMax)penL+=L[i]-LMax;}}for(int i=0;i<n-1;++i){float dth=wrap_pi_fast(th[i+1]-th[i]);float v=fabsf(dth)-dThetaMax;if(v>0.0f)penC+=v;}float dx=x-targetX;float dy=y-targetY;float dist=sqrtf(fmaf(dx,dx,dy*dy));return fmaf(lambdaAng,penA,fmaf(lambdaLen,penL,fmaf(lambdaCurv,penC,dist)));}}; __forceinline void HitTest2D_analytic(float x_param,float& out_x1,float& out_x2){const float a=gActiveMap.a;const float inv_lenx=gActiveMap.inv_lenx;const uint32_t scale=gActiveMap.scale;const uint32_t scale_minus_1=scale-1u;const float lenx=gActiveMap.lenx;const float leny=gActiveMap.leny;const float c=gActiveMap.c;const uint8_t start=gActiveMap.start;const int levels=gActiveMap.levels;float norm=(x_param-a)*inv_lenx;norm=fminf(fmaxf(norm,0.0f),0x1.fffffep-1f);uint32_t idx=(uint32_t)(norm*(float)scale);idx=idx>scale_minus_1?scale_minus_1:idx;float sx=lenx,sy=leny;float x1=a,x2=c;uint8_t type=start;int l=levels-1;while(l>=0){const uint32_t q=(idx>>(l*2))&3u;const Step s=g_step_tbl[type][q];type=s.next;sx*=0.5f;sy*=0.5f;x1+=s.dx? sx:0.0f;x2+=s.dy? sy:0.0f;--l;}out_x1=x1+sx*0.5f;out_x2=x2+sy*0.5f;} __forceinline float FindX2D_analytic(float px,float py){const float a=gActiveMap.a;const float b=gActiveMap.b;const float c=gActiveMap.c;const float d=gActiveMap.d;const float lenx=gActiveMap.lenx;const float leny=gActiveMap.leny;const uint32_t scale=gActiveMap.scale;const uint8_t start=gActiveMap.start;const int levels=gActiveMap.levels;const float clamped_px=fminf(fmaxf(px,a),b);const float clamped_py=fminf(fmaxf(py,c),d);float sx=lenx,sy=leny;float x0=a,y0=c;uint32_t idx=0u;uint8_t type=start;int l=0;while(l<levels){sx*=0.5f;sy*=0.5f;const float mx=x0+sx;const float my=y0+sy;const uint32_t tr=(uint32_t)((clamped_px>mx)&(clamped_py>my));const uint32_t tl=(uint32_t)((clamped_px<mx)&(clamped_py>my));const uint32_t dl=(uint32_t)((clamped_px<mx)&(clamped_py<my));const uint32_t none=(uint32_t)(1u^(tr|tl|dl));const uint32_t dd=(tr<<1)|tr|tl|(none<<1);const InvStep inv=g_inv_tbl[type][dd];type=inv.next;idx=(idx<<2)|inv.q;const uint32_t dx=dd>>1;const uint32_t dy=dd&1u;x0+=dx? sx:0.0f;y0+=dy? sy:0.0f;++l;}const float scale_recip=1.0f/(float)scale;return fmaf((float)idx*scale_recip,lenx,a);} static __forceinline void agp_run_branch_mpi(const MortonND& map,const ManipCost& cost,int maxIter,float r,bool adaptive,float eps,unsigned seed,std::vector<Interval*>& H,std::vector<float>& bestQ,float& bestF,float& bestX,float& bestY){ const int dim=cost.n+(cost.variableLen?cost.n:0); auto evalAt=[&](float t)->float{ std::vector<float> q(dim); map.map01ToPoint(t,q.data()); float f=cost(q.data()); float x=0,y=0,phi=0; const float* th=q.data(); const float* L=cost.variableLen?(q.data()+cost.n):nullptr; float Lc=1.0f; for(int i=0;i<cost.n;++i){ phi+=th[i]; float Li=cost.variableLen?L[i]:Lc; float c,s;FABE13_COS(phi,c);FABE13_SIN(phi,s); x=fmaf(Li,c,x);y=fmaf(Li,s,y); } if(f<bestF){bestF=f;bestQ=q;bestX=x;bestY=y;} return f; }; float a=0.0f,b=1.0f; float f_a=evalAt(a); float f_b=evalAt(b); Interval* root=new Interval(a,b,f_a,f_b,(float)dim); float Mmax=root->M; float m=r*Mmax; root->ChangeCharacteristic(m); H.clear(); H.reserve((size_t)maxIter); H.push_back(root); std::make_heap(H.begin(),H.end(),ComparePtr); float dmax=b-a; const float initial_len=dmax; const float thr03=0.3f*initial_len; const float inv_thr03=1.0f/thr03; int it=0; const int rank=g_world->rank(); const int world=g_world->size(); while(it<maxIter){ while(g_world->iprobe(boost::mpi::any_source,0)){ CrossMsg in;g_world->recv(boost::mpi::any_source,0,in); if(in.kind){ if(!rank){ break; }else{ return; } }else{ float sx=in.s_x1,ex=in.e_x1; if(sx<0.0f)sx=0.0f; if(ex>1.0f)ex=1.0f; if(ex<=sx) continue; std::vector<float> tmp(dim); map.map01ToPoint(sx,tmp.data());float y1=cost(tmp.data()); map.map01ToPoint(ex,tmp.data());float y2=cost(tmp.data()); Interval* inj=new Interval(sx,ex,y1,y2,(float)dim); inj->ChangeCharacteristic(m); if(!H.empty()){ Interval* top=H.front(); if(inj->R>1.15f*top->R){ inj->R=in.Rtop; H.emplace_back(inj); std::push_heap(H.begin(),H.end(),ComparePtr); } }else{ H.emplace_back(inj); std::push_heap(H.begin(),H.end(),ComparePtr); } } } std::pop_heap(H.begin(),H.end(),ComparePtr); Interval* cur=H.back();H.pop_back(); const float x1=cur->x1,x2=cur->x2,y1=cur->y1,y2=cur->y2; float tNew=Shag(m,x1,x2,y1,y2,(float)dim,r); if(tNew<a)tNew=a; else if(tNew>b)tNew=b; float fNew=evalAt(tNew); Interval* left=new Interval(x1,tNew,y1,fNew,(float)dim); Interval* right=new Interval(tNew,x2,fNew,y2,(float)dim); float Mloc=left->M>right->M?left->M:right->M; if(adaptive){ float len1=tNew-x1,len2=x2-tNew; if(len1+len2==dmax){ dmax=len1>len2?len1:len2; for(auto p:H){float L=p->x2-p->x1; if(L>dmax)dmax=L;} } if((thr03>dmax && !(it%3)) || (10.0f*dmax<initial_len)){ if(Mloc>Mmax){Mmax=Mloc; m=r*Mmax;} float progress=1.0f-(dmax*inv_thr03); float alpha=progress*progress; float beta=2.0f-alpha; float MULT=(Mmax/dmax); float global_coeff=fmaf(MULT,r,-MULT); float GF=beta*global_coeff; left->ChangeCharacteristic(fmaf(GF,len1,left->M*alpha)); right->ChangeCharacteristic(fmaf(GF,len2,right->M*alpha)); size_t sz=H.size(); if(sz<64u){ size_t i=0;while(i<sz){Interval* p=H[i];float len=p->x2-p->x1;p->ChangeCharacteristic(fmaf(GF,len,p->M*alpha));++i;} }else{ RecomputeR_AffineM_AVX2(H.data(),sz,GF,alpha); } std::make_heap(H.begin(),H.end(),ComparePtr); }else{ if(Mloc>Mmax){ if(Mloc<1.15f*Mmax){Mmax=Mloc; m=r*Mmax; left->ChangeCharacteristic(m); right->ChangeCharacteristic(m);} else{ Mmax=Mloc; m=r*Mmax; left->ChangeCharacteristic(m); right->ChangeCharacteristic(m); size_t sz=H.size(); if(sz<64u){size_t i=0;while(i<sz){H[i]->ChangeCharacteristic(m);++i;}} else{RecomputeR_ConstM_AVX2(H.data(),sz,m);} std::make_heap(H.begin(),H.end(),ComparePtr); } }else{ left->ChangeCharacteristic(m); right->ChangeCharacteristic(m); } } }else{ if(Mloc>Mmax){ if(Mloc<1.15f*Mmax){Mmax=Mloc; m=r*Mmax; left->ChangeCharacteristic(m); right->ChangeCharacteristic(m);} else{ Mmax=Mloc; m=r*Mmax; left->ChangeCharacteristic(m); right->ChangeCharacteristic(m); size_t sz=H.size(); if(sz<64u){size_t i=0;while(i<sz){H[i]->ChangeCharacteristic(m);++i;}} else{RecomputeR_ConstM_AVX2(H.data(),sz,m);} std::make_heap(H.begin(),H.end(),ComparePtr); } }else{ left->ChangeCharacteristic(m); right->ChangeCharacteristic(m); } } H.push_back(left);std::push_heap(H.begin(),H.end(),ComparePtr); H.push_back(right);std::push_heap(H.begin(),H.end(),ComparePtr); Interval* top=H.front(); float interval_len=top->x2-top->x1; const float p=fmaf(-1.0f/initial_len,dmax,1.0f); const int T=(int)fmaf(-expm1f(p),264.0f,277.0f); bool want_term=(interval_len<eps)||(it==maxIter-1); if(!(it%T)||want_term){ CrossMsg out;out.kind=want_term;out.s_x1=top->x1;out.e_x1=top->x2;out.s_x2=0.0f;out.e_x2=0.0f;out.Rtop=top->R; for(int i=0;i<world;++i){if(i!=rank) g_world->isend(i,0,out);} if(want_term){break;} } ++it; } } struct BestPacket{float bestF;int dim;float bestX;float bestY;template<typename Archive> void serialize(Archive& ar,const unsigned int){ar&bestF&dim&bestX&bestY;}}; extern "C" __declspec(dllexport) __declspec(noalias) void AGP_Manip2D(int nSegments,float thetaMin,float thetaMax,bool variableLengths,float lenMin,float lenMax,float deltaThetaMax,float lambdaCurv,float lambdaAngle,float lambdaLength,float targetX,float targetY,int peanoLevels,int orientations,int maxIterPerBranch,float r,bool adaptiveMode,float epsilon,unsigned int seed,float** out_bestQ,size_t* out_bestQLen,float* out_bestX,float* out_bestY,float* out_bestF){ const int dim=nSegments+(variableLengths?nSegments:0); std::vector<float> low(dim),high(dim); for(int i=0;i<nSegments;++i){low[i]=thetaMin;high[i]=thetaMax;} if(variableLengths){for(int i=0;i<nSegments;++i){low[nSegments+i]=lenMin;high[nSegments+i]=lenMax;}} ManipCost cost{nSegments,variableLengths,targetX,targetY,thetaMin,thetaMax,lenMin,lenMax,deltaThetaMax,lambdaCurv,lambdaAngle,lambdaLength}; const int rank=g_world->rank(); const int world=g_world->size(); MortonND map(dim,peanoLevels,low.data(),high.data(),seed+0x9E3779B9u*rank); std::vector<Interval*> H; std::vector<float> bestQ;float bestF=FLT_MAX,bx=0.0f,by=0.0f; agp_run_branch_mpi(map,cost,maxIterPerBranch,r,adaptiveMode,epsilon,seed,H,bestQ,bestF,bx,by); BestPacket me{bestF,dim,bx,by}; if(!rank){ std::vector<float> winnerQ=bestQ;float winF=bestF,wx=bx,wy=by; for(int i=1;i<world;++i){ BestPacket bp;g_world->recv(i,2,bp); std::vector<float> qin;g_world->recv(i,3,qin); if(bp.bestF<winF){winF=bp.bestF;wx=bp.bestX;wy=bp.bestY;winnerQ=qin;} } *out_bestQLen=(size_t)winnerQ.size(); *out_bestQ=(float*)CoTaskMemAlloc(sizeof(float)*(*out_bestQLen)); std::memcpy(*out_bestQ,winnerQ.data(),sizeof(float)*(*out_bestQLen)); *out_bestX=wx;*out_bestY=wy;*out_bestF=winF; }else{ g_world->send(0,2,me); g_world->send(0,3,bestQ); } } static const boost::mpi::environment* __restrict gg_env; static const boost::mpi::communicator* __restrict gg_world; __forceinline float ShekelFunc(float x,float seed){int i=0;float st=seed,r1,r2,res=0.0f;while(i<10){XOR_RAND(st,r1);float xp=fmaf(-r1,10.0f,x);XOR_RAND(st,r1);XOR_RAND(st,r2);float d=fmaf(fmaf(r1,20.0f,5.0f),xp*xp,fmaf(r2,0.2f,1.0f));d=copysignf(fmaxf(fabsf(d),FLT_MIN),d);res-=1.0f/d;++i;}return res;} __forceinline float RastriginFunc(float x1,float x2){const float t=fmaf(x1,x1,x2*x2);float c1,c2;FABE13_COS(6.28318530717958647692f*x1,c1);FABE13_COS(6.28318530717958647692f*x2,c2);return (t-fmaf(c1+c2,10.0f,-14.6f))*fmaf(-t,0.25f,18.42f);} __forceinline float HillFunc(float x,float seed){int j=0;__declspec(align(32)) float ang[14u];float st=6.28318530717958647692f*x;while(j<14){ang[j]=st*(float)(j+1);++j;}__declspec(align(32)) float sv[14u],cv[14u];FABE13_SINCOS(ang,sv,cv,14u);float state=seed,r1,r2;XOR_RAND(state,r1);float res=fmaf(r1,2.0f,-1.1f);--j;while(j>=0){XOR_RAND(state,r1);XOR_RAND(state,r2);res+=fmaf(fmaf(r1,2.0f,-1.1f),sv[j],fmaf(r2,2.0f,-1.1f)*cv[j]);--j;}return res;} __forceinline float GrishaginFunc(float x1,float x2,float seed){int j=0;__declspec(align(32)) float aj[8u],ak[8u];while(j<8){float pj=3.14159265358979323846f*(float)(j+1);aj[j]=pj*x1;ak[j]=pj*x2;++j;}__declspec(align(32)) float sj[8u],cj[8u],sk[8u],ck[8u];FABE13_SINCOS(aj,sj,cj,8u);FABE13_SINCOS(ak,sk,ck,8u);--j;float p1=0.0f,p2=0.0f;float st=seed,r1,r2;while(j>=0){size_t k=0u;while(k<8u){float s=sj[j]*sj[j];float c=ck[k]*ck[k];XOR_RAND_GRSH(st,r1);XOR_RAND_GRSH(st,r2);p1=fmaf(r1,s,fmaf(r2,c,p1));XOR_RAND_GRSH(st,r1);XOR_RAND_GRSH(st,r2);p2=fmaf(-r1,c,fmaf(r2,s,p2));++k;}--j;}return -sqrtf(fmaf(p1,p1,p2*p2));} extern "C" __declspec(dllexport) __declspec(noalias) __forceinline int AgpInit(int peanoLevel,float a,float b,float c,float d){ g_env=new boost::mpi::environment(); g_world=new boost::mpi::communicator(); const int rank=g_world->rank(); switch(rank){case 0:new(&gActiveMap) Peano2DMap(peanoLevel,a,b,c,d,(uint8_t)Top);break;case 1:new(&gActiveMap) Peano2DMap(peanoLevel,a,b,c,d,(uint8_t)Down);break;case 2:new(&gActiveMap) Peano2DMap(peanoLevel,a,b,c,d,(uint8_t)Left);break;default:new(&gActiveMap) Peano2DMap(peanoLevel,a,b,c,d,(uint8_t)Right);} return rank; } extern "C" __declspec(dllexport) __declspec(noalias) void AGP_1D(float global_iterations,float a,float b,float r,bool mode,float epsilon,float seed,float** out_data,size_t* out_len){ Slab* slab=tls.local();slab->current=slab->base;int schetchick=0;const float initial_length=b-a;float dmax=initial_length;const float threshold_03=0.3f*initial_length;const float inv_threshold_03=1.0f/threshold_03;const float start_val=ShekelFunc(a,seed);float best_f=ShekelFunc(b,seed);float x_Rmax_1=a;float x_Rmax_2=b;float y_Rmax_1=start_val;float y_Rmax_2=best_f;std::vector<float,boost::alignment::aligned_allocator<float,16u>> Extr;std::vector<Interval*,boost::alignment::aligned_allocator<Interval*,64u>> R;Extr.reserve((size_t)global_iterations<<2u);R.reserve((size_t)global_iterations<<1u);R.emplace_back(new Interval(a,b,start_val,best_f,1.0f));float Mmax=R.front()->M;float m=r*Mmax;while(true){const float new_point=Shag(m,x_Rmax_1,x_Rmax_2,y_Rmax_1,y_Rmax_2,1.0f,r);const float new_value=ShekelFunc(new_point,seed);if(new_value<best_f){best_f=new_value;Extr.emplace_back(best_f);Extr.emplace_back(new_point);}std::pop_heap(R.begin(),R.end(),ComparePtr);const Interval* pro=R.back();const float new_x1=pro->x1;const float new_x2=pro->x2;const float len2=new_x2-new_point;const float len1=new_point-new_x1;const float interval_len=len1<len2?len1:len2;if(++schetchick==(int)global_iterations||interval_len<epsilon){Extr.emplace_back((float)schetchick);Extr.emplace_back(interval_len);*out_len=Extr.size();*out_data=(float*)CoTaskMemAlloc(sizeof(float)*(*out_len));memcpy(*out_data,Extr.data(),sizeof(float)*(*out_len));return;}Interval* curr=new Interval(new_x1,new_point,pro->y1,new_value,1.0f);Interval* curr1=new Interval(new_point,new_x2,new_value,pro->y2,1.0f);const float currM=curr->M>curr1->M?curr->M:curr1->M;const size_t r_size=R.size();if(mode){if(len2+len1==dmax){dmax=len2>len1?len2:len1;size_t i=0u;while(i<r_size){const float li=R[i]->x2-R[i]->x1;if(li>dmax)dmax=li;++i;}}if(threshold_03>dmax&&!(schetchick%3)||10.0f*dmax<initial_length){if(currM>Mmax){Mmax=currM;m=r*Mmax;}const float progress=fmaf(-inv_threshold_03,dmax,1.0f);const float alpha=fmaf(progress,progress,1.0f);const float betta=2.0f-alpha;const float MULT=(1.0f/dmax)*Mmax;const float global_coeff=fmaf(MULT,r,-MULT);const float GF=betta*global_coeff;curr->ChangeCharacteristic(fmaf(GF,len1,curr->M*alpha));curr1->ChangeCharacteristic(fmaf(GF,len2,curr1->M*alpha));if(r_size<64u){size_t i=0u;while(i<r_size){R[i]->ChangeCharacteristic(fmaf(GF,R[i]->x2-R[i]->x1,R[i]->M*alpha));++i;}}else{RecomputeR_AffineM_AVX2(R.data(),r_size,GF,alpha);}std::make_heap(R.begin(),R.end(),ComparePtr);}else{if(currM>Mmax){if(currM<1.15f*Mmax){Mmax=currM;m=r*Mmax;curr->ChangeCharacteristic(m);curr1->ChangeCharacteristic(m);}else{Mmax=currM;m=r*Mmax;curr->ChangeCharacteristic(m);curr1->ChangeCharacteristic(m);if(r_size<64u){size_t i=0u;while(i<r_size){R[i]->ChangeCharacteristic(m);++i;}}else{RecomputeR_ConstM_AVX2(R.data(),r_size,m);}std::make_heap(R.begin(),R.end(),ComparePtr);}}else{curr->ChangeCharacteristic(m);curr1->ChangeCharacteristic(m);}}}else{if(currM>Mmax){if(currM<1.15f*Mmax){Mmax=currM;m=r*Mmax;curr->ChangeCharacteristic(m);curr1->ChangeCharacteristic(m);}else{Mmax=currM;m=r*Mmax;curr->ChangeCharacteristic(m);curr1->ChangeCharacteristic(m);if(r_size<64u){size_t i=0u;while(i<r_size){R[i]->ChangeCharacteristic(m);++i;}}else{RecomputeR_ConstM_AVX2(R.data(),r_size,m);}std::make_heap(R.begin(),R.end(),ComparePtr);}}else{curr->ChangeCharacteristic(m);curr1->ChangeCharacteristic(m);}}R.back()=curr;std::push_heap(R.begin(),R.end(),ComparePtr);R.emplace_back(curr1);std::push_heap(R.begin(),R.end(),ComparePtr);const Interval* top=R.front();x_Rmax_1=top->x1;x_Rmax_2=top->x2;y_Rmax_1=top->y1;y_Rmax_2=top->y2;}} extern "C" __declspec(dllexport) __declspec(noalias) void AGP_2D(float N,float global_iterations,float a,float b,float c,float d,float r,bool mode,float epsilon,float seed,float** out_data,size_t* out_len){ Slab* slab=tls.local();slab->current=slab->base;int schetchick=0;int no_improve=0;const int rank=g_world->rank();const int world_size=g_world->size();while(g_world->iprobe(boost::mpi::any_source,0)){CrossMsg dummy;g_world->recv(boost::mpi::any_source,0,dummy);}const float inv_divider=ldexpf(1.0f,-((gActiveMap.levels<<1)+1));const float x_add=(b-a)*inv_divider;const float y_add=(d-c)*inv_divider;const float true_start=a+x_add;const float true_end=b-x_add;float x_Rmax_1=true_start;float x_Rmax_2=true_end;const float initial_length=x_Rmax_2-x_Rmax_1;float dmax=initial_length;const float threshold_03=0.3f*initial_length;const float inv_threshold_03=1.0f/threshold_03;const float start_val=rank%3? RastriginFunc(true_end,d-y_add):RastriginFunc(true_start,c+y_add);float best_f=rank%2? RastriginFunc(true_start,d-y_add):RastriginFunc(true_end,c+y_add);float y_Rmax_1=start_val;float y_Rmax_2=best_f;std::vector<float,boost::alignment::aligned_allocator<float,16u>> Extr;std::vector<Interval*,boost::alignment::aligned_allocator<Interval*,64u>> R;Extr.reserve((size_t)global_iterations<<2u);R.reserve((size_t)global_iterations<<1u);R.emplace_back(new Interval(true_start,true_end,start_val,best_f,2.0f));const Interval* top_ptr;float Mmax=R.front()->M;float m=r*Mmax;while(true){const float interval_len=x_Rmax_2-x_Rmax_1;const bool stagnation=no_improve>100&&schetchick>270;const float p=fmaf(-1.0f/initial_length,dmax,1.0f);while(g_world->iprobe(boost::mpi::any_source,0)){CrossMsg in;g_world->recv(boost::mpi::any_source,0,in);if(in.kind){if(!rank){Extr.emplace_back((float)schetchick);Extr.emplace_back(interval_len);*out_len=Extr.size();*out_data=(float*)CoTaskMemAlloc(sizeof(float)*(*out_len));memcpy(*out_data,Extr.data(),sizeof(float)*(*out_len));}return;}float sx=FindX2D_analytic(in.s_x1,in.s_x2);float ex=FindX2D_analytic(in.e_x1,in.e_x2);Interval* inj=new Interval(sx,ex,RastriginFunc(in.s_x1,in.s_x2),RastriginFunc(in.e_x1,in.e_x2),2.0f);inj->ChangeCharacteristic(m);if(inj->R>1.15f*top_ptr->R){const float k=stagnation? fmaf((1.0f/expm1f(1.0f))*1.0f,expm1f(p),0.3f): fmaf((1.0f/expm1f(1.0f))*0.6f,expm1f(p),0.6f);inj->R=in.xchg.Rtop*k;R.emplace_back(inj);std::push_heap(R.begin(),R.end(),ComparePtr);}}const int T=(int)fmaf(-expm1f(p),264.0f,277.0f);const bool want_term=interval_len<epsilon||schetchick==(int)global_iterations;if(!(++schetchick%T)||stagnation||want_term){CrossMsg out;out.kind=want_term;float s_x1,s_x2,e_x1,e_x2;HitTest2D_analytic(top_ptr->x1,s_x1,s_x2);HitTest2D_analytic(top_ptr->x2,e_x1,e_x2);out.s_x1=s_x1;out.s_x2=s_x2;out.e_x1=e_x1;out.e_x2=e_x2;out.Rtop=top_ptr->R;int i=0;while(i<world_size){if(i!=rank)g_world->isend(i,0,out);++i;}if(out.kind){if(!rank){Extr.emplace_back((float)schetchick);Extr.emplace_back(interval_len);*out_len=Extr.size();*out_data=(float*)CoTaskMemAlloc(sizeof(float)*(*out_len));memcpy(*out_data,Extr.data(),sizeof(float)*(*out_len));}return;}}const float new_point=Shag(m,x_Rmax_1,x_Rmax_2,y_Rmax_1,y_Rmax_2,2.0f,r);float nx1,nx2;HitTest2D_analytic(new_point,nx1,nx2);const float new_value=RastriginFunc(nx1,nx2);if(new_value<best_f){best_f=new_value;Extr.emplace_back(best_f);Extr.emplace_back(nx1);Extr.emplace_back(nx2);no_improve=0;}else{++no_improve;}std::pop_heap(R.begin(),R.end(),ComparePtr);Interval* pro=R.back();const float seg_x1=pro->x1;const float seg_x2=pro->x2;const float len2=seg_x2-new_point;const float len1=new_point-seg_x1;Interval* curr=new Interval(seg_x1,new_point,pro->y1,new_value,2.0f);Interval* curr1=new Interval(new_point,seg_x2,new_value,pro->y2,2.0f);const float currM=curr->M>curr1->M?curr->M:curr1->M;const size_t r_size=R.size();if(mode){if(len2+len1==dmax){dmax=len2>len1?len2:len1;size_t i=0u;while(i<r_size){const float li=R[i]->x2-R[i]->x1;if(li>dmax)dmax=li;++i;}}if(threshold_03>dmax&&!(schetchick%3)||10.0f*dmax<initial_length){if(currM>Mmax){Mmax=currM;m=r*Mmax;}const float progress=fmaf(-inv_threshold_03,dmax,1.0f);const float alpha=fmaf(progress,progress,1.0f);const float betta=2.0f-alpha;const float MULT=(1.0f/dmax)*Mmax;const float global_coeff=fmaf(MULT,r,-MULT);const float GF=betta*global_coeff;curr->ChangeCharacteristic(fmaf(GF,len1,curr->M*alpha));curr1->ChangeCharacteristic(fmaf(GF,len2,curr1->M*alpha));if(r_size<64u){size_t i=0u;while(i<r_size){R[i]->ChangeCharacteristic(fmaf(GF,R[i]->x2-R[i]->x1,R[i]->M*alpha));++i;}}else{RecomputeR_AffineM_AVX2(R.data(),r_size,GF,alpha);}std::make_heap(R.begin(),R.end(),ComparePtr);}else{if(currM>Mmax){if(currM<1.15f*Mmax){Mmax=currM;m=r*Mmax;curr->ChangeCharacteristic(m);curr1->ChangeCharacteristic(m);}else{Mmax=currM;m=r*Mmax;curr->ChangeCharacteristic(m);curr1->ChangeCharacteristic(m);if(r_size<64u){size_t i=0u;while(i<r_size){R[i]->ChangeCharacteristic(m);++i;}}else{RecomputeR_ConstM_AVX2(R.data(),r_size,m);}std::make_heap(R.begin(),R.end(),ComparePtr);}}else{curr->ChangeCharacteristic(m);curr1->ChangeCharacteristic(m);}}}else{if(currM>Mmax){if(currM<1.15f*Mmax){Mmax=currM;m=r*Mmax;curr->ChangeCharacteristic(m);curr1->ChangeCharacteristic(m);}else{Mmax=currM;m=r*Mmax;curr->ChangeCharacteristic(m);curr1->ChangeCharacteristic(m);if(r_size<64u){size_t i=0u;while(i<r_size){R[i]->ChangeCharacteristic(m);++i;}}else{RecomputeR_ConstM_AVX2(R.data(),r_size,m);}std::make_heap(R.begin(),R.end(),ComparePtr);}}else{curr->ChangeCharacteristic(m);curr1->ChangeCharacteristic(m);}}R.back()=curr;std::push_heap(R.begin(),R.end(),ComparePtr);R.emplace_back(curr1);std::push_heap(R.begin(),R.end(),ComparePtr);top_ptr=R.front();x_Rmax_1=top_ptr->x1;x_Rmax_2=top_ptr->x2;y_Rmax_1=top_ptr->y1;y_Rmax_2=top_ptr->y2;}} extern "C" __declspec(dllexport) __declspec(noalias) __forceinline void AgpWaitStartAndRun(){ struct RunParams{int nSegments;float thMin,thMax;uint8_t varLen;float Lmin,Lmax;float dthMax,lamC,lamA,lamL;float tx,ty;int levels,orient,maxIter;float r;uint8_t adaptive;float eps;unsigned int seed;template<typename Archive> void serialize(Archive& ar,const unsigned int){ar&nSegments&thMin&thMax&varLen&Lmin&Lmax&dthMax&lamC&lamA&lamL&tx&ty&levels&orient&maxIter&r&adaptive&eps&seed;}}; while(true){ if(g_world->iprobe(0,1)){ RunParams p;g_world->recv(0,1,p); float* q=nullptr;size_t qlen=0;float bx=0,by=0,bf=0; AGP_Manip2D(p.nSegments,p.thMin,p.thMax,(bool)p.varLen,p.Lmin,p.Lmax,p.dthMax,p.lamC,p.lamA,p.lamL,p.tx,p.ty,p.levels,p.orient,p.maxIter,p.r,(bool)p.adaptive,p.eps,p.seed,&q,&qlen,&bx,&by,&bf); if(q) CoTaskMemFree(q); } Sleep(0); } } extern "C" __declspec(dllexport) __declspec(noalias) void AgpStartWorkers(){ int i=1;const int world=g_world->size();while(i<world){g_world->isend(i,1,0);++i;} } extern "C" __declspec(dllexport) __declspec(noalias) void AgpStartManipND(int nSegments,float thetaMin,float thetaMax,bool variableLengths,float lenMin,float lenMax,float deltaThetaMax,float lambdaCurv,float lambdaAngle,float lambdaLength,float targetX,float targetY,int peanoLevels,int orientations,int maxIterPerBranch,float r,bool adaptiveMode,float epsilon,unsigned int seed){ struct RunParams{int nSegments;float thMin,thMax;uint8_t varLen;float Lmin,Lmax;float dthMax,lamC,lamA,lamL;float tx,ty;int levels,orient,maxIter;float r;uint8_t adaptive;float eps;unsigned int seed;template<typename Archive> void serialize(Archive& ar,const unsigned int){ar&nSegments&thMin&thMax&varLen&Lmin&Lmax&dthMax&lamC&lamA&lamL&tx&ty&levels&orient&maxIter&r&adaptive&eps&seed;}}; RunParams p; p.nSegments=nSegments;p.thMin=thetaMin;p.thMax=thetaMax;p.varLen=(uint8_t)variableLengths;p.Lmin=lenMin;p.Lmax=lenMax;p.dthMax=deltaThetaMax;p.lamC=lambdaCurv;p.lamA=lambdaAngle;p.lamL=lambdaLength;p.tx=targetX;p.ty=targetY;p.levels=peanoLevels;p.orient=orientations;p.maxIter=maxIterPerBranch;p.r=r;p.adaptive=(uint8_t)adaptiveMode;p.eps=epsilon;p.seed=seed; const int rank=g_world->rank();const int world=g_world->size(); for(int i=1;i<world;++i){if(i!=rank) g_world->isend(i,1,p);} } extern "C" __declspec(dllexport) __declspec(noalias) __forceinline void AgpStartWorkersLegacy(){AgpStartWorkers();} extern "C" __declspec(dllexport) __declspec(noalias) __forceinline void AGP_Free(float* p){CoTaskMemFree(p);}

что изменено и зачем:

  • вернул схему запуска через mpiexec: в MyForm.cpp снова AgpInit() и ветвление по rank; процессы rank>0 крутятся в AgpWaitStartAndRun().
  • добавил экспорт AgpStartManipND(...) и вызов его из UI перед оптимизацией, чтобы раскидывать параметры запуска всем воркерам через MPI (тег 1). интерфейс формы не менял визуально.
  • реализовал многомерную развертку по Мортону MortonND с перестановками осей и инверсиями; добавил обратное преобразование pointTo01, жестко ограничиваю levels так, чтобы не переполнить 64-битный индекс.
  • реализовал многомерную ветку AGP с обменом интервалами между процессами через CrossMsg по параметру t∈[0,1]. инъекции создаются как интервалы [t1,t2] с пересчетом функции в концах.
  • AGP_Manip2D(...) теперь — ND-решатель манипулятора на MPI: каждый процесс идёт со своей ориентацией (seed смещается на rank), обменивается топ-интервалами, по завершении rank 0 собирает лучшие ответы со всех рангов (теги 2/3) и возвращает глобально лучший в UI.
  • фиксировал число ориентаций на уровне UI до 6 по умолчанию, чтобы задействовать все ядра; сама функция не полагается на параметр orientations, а использует реальное число процессов.
  • ускорил целевую функцию манипулятора: везде быстрые тригонометрии FABE13_COS/FABE13_SIN, редукция угла, штраф за кривизну через wrap_pi_fast, минимум аллокаций, fmaf.
  • добавил скалярный быстрый синус FABE13_SIN по той же схеме полиномов, что уже использовалась.
  • обмен и терминция — по ускоренной схеме, как в вашей 2D-версии: периодические рассылки топ-интервала, адаптивная перенастройка m, векторизированные пересчёты R через AVX2.
  • формат сообщений и базовые структуры (Interval, AVX2-рутины, Hilbert 2D часть) оставил, старые 1D/2D функции оставлены для совместимости.
  • UI не трогал по разметке, только добавил вызов AgpStartManipND и жёстко поставил orientations=6.

запуск, как и раньше:

mpiexec -n 6 D:\TEST_AGP\x64\Release\TEST_AGP.exe

ранг 0 показывает интерфейс, остальные — рабочие процессы.

このQ&Aを共有