C++/CLR MyForm.h #pragma once #define WIN32_LEAN_A...
Criado em: 9 de novembro de 2025
Respondido usando GPT-5 Thinking por Chat01
Criado em: 9 de novembro de 2025
Respondido usando GPT-5 Thinking por Chat01
C++/CLR MyForm.h #pragma once
#define WIN32_LEAN_AND_MEAN
#include <Windows.h>
#include <stdint.h>
using namespace System;
using namespace System::Drawing;
using namespace System::Windows::Forms;
using namespace System::Collections::Generic;
using namespace System::Drawing::Drawing2D;
typedef void(__cdecl* P_MANIP)(int, bool, float, float, float, int, int, float, bool, float, unsigned int, float**, size_t*, float*, float*, float*);
typedef void(__cdecl* P_FREE)(float*);
typedef void(__cdecl* P_START)(int, bool, float, float, float, int, int, float, bool, float, unsigned int);
namespace TESTAGP {
public ref class MyForm : public Form {
public:
MyForm(HMODULE hLib) : hLib(hLib) {
this->SetStyle(ControlStyles::AllPaintingInWmPaint | ControlStyles::UserPaint | ControlStyles::OptimizedDoubleBuffer, true);
this->Text = L"AGP Manipulator 2D"; this->ClientSize = System::Drawing::Size(1000, 700);
this->Resize += gcnew EventHandler(this, &MyForm::OnResize);
fManip = (P_MANIP)GetProcAddress(hLib, "AGP_Manip2D");
pFree = (P_FREE)GetProcAddress(hLib, "AGP_Free");
pStart = (P_START)GetProcAddress(hLib, "AgpStartManipND");
angles = gcnew List<float>(0.0f);
lengths = gcnew List<float>(0.0f);
InitUI();
ResetRandomConfig();
}
protected:
~MyForm() {}
private:
HMODULE hLib; P_MANIP fManip; P_FREE pFree; P_START pStart;
int nSegments; bool variableLengths; List<float>^ angles; List<float>^ lengths;
CheckBox^ cbVarLen; NumericUpDown^ nudMinTheta, ^ nudBaseLength, ^ nudStretchFactor, ^ nudTargetX, ^ nudTargetY, ^ nudLevels, ^ nudMaxIter;
CheckBox^ cbAdaptive; NumericUpDown^ nudR, ^ nudEps; Button^ btnAdd, ^ btnRem, ^ btnOptimize; Label^ lblInfo;
System::UInt32 rngState = 0xA5C39E0Du;
textvoid WireInvalidate(Control^ c) { // чтобы после любых изменений интерфейса можно было сразу жать «Оптимизировать» if (dynamic_cast<NumericUpDown^>(c)) ((NumericUpDown^)c)->ValueChanged += gcnew EventHandler(this, &MyForm::OnAnyChanged); else if (dynamic_cast<CheckBox^>(c)) ((CheckBox^)c)->CheckedChanged += gcnew EventHandler(this, &MyForm::OnAnyChanged); } void InitUI() { int y = 10, w = 180, h = 24, pad = 8, currentX = 10; Label^ L; // Мин угол (рад) — по умолчанию 1.5 рад (было 0.5) L = gcnew Label(); L->Text = L"Мин. угол (рад)"; L->Location = Point(currentX, y); L->Width = w; this->Controls->Add(L); nudMinTheta = gcnew NumericUpDown(); nudMinTheta->Location = Point(currentX, y + h + 2); nudMinTheta->Width = w; nudMinTheta->DecimalPlaces = 3; nudMinTheta->Minimum = (Decimal)0.01; nudMinTheta->Maximum = (Decimal)3.14159; nudMinTheta->Value = (Decimal)1.5; this->Controls->Add(nudMinTheta); WireInvalidate(nudMinTheta); // Базовая длина — диапазон [0.5; 2.0] currentX += w + 20; L = gcnew Label(); L->Text = L"Базовая длина"; L->Location = Point(currentX, y); L->Width = w; this->Controls->Add(L); nudBaseLength = gcnew NumericUpDown(); nudBaseLength->Location = Point(currentX, y + h + 2); nudBaseLength->Width = w; nudBaseLength->DecimalPlaces = 2; nudBaseLength->Minimum = (Decimal)0.50; nudBaseLength->Maximum = (Decimal)2.00; nudBaseLength->Value = (Decimal)1.00; this->Controls->Add(nudBaseLength); WireInvalidate(nudBaseLength); // Коэф. растяжения — [1.0; 1.5] с мелким шагом currentX += w + 20; L = gcnew Label(); L->Text = L"Коэф. растяжения"; L->Location = Point(currentX, y); L->Width = w; this->Controls->Add(L); nudStretchFactor = gcnew NumericUpDown(); nudStretchFactor->Location = Point(currentX, y + h + 2); nudStretchFactor->Width = w; nudStretchFactor->DecimalPlaces = 2; nudStretchFactor->Minimum = (Decimal)1.00; nudStretchFactor->Maximum = (Decimal)1.50; nudStretchFactor->Increment = (Decimal)0.01; nudStretchFactor->Value = (Decimal)1.50; this->Controls->Add(nudStretchFactor); WireInvalidate(nudStretchFactor); // Переменные длины (флаг) currentX += w + 20; cbVarLen = gcnew CheckBox(); cbVarLen->Text = L"Переменные длины"; cbVarLen->Location = Point(currentX, y + h + 2); cbVarLen->Width = w; cbVarLen->Checked = false; this->Controls->Add(cbVarLen); WireInvalidate(cbVarLen); // Следующая строка currentX = 10; y += h * 2 + pad + 10; // Цель X/Y L = gcnew Label(); L->Text = L"Цель X"; L->Location = Point(currentX, y); L->Width = w; this->Controls->Add(L); nudTargetX = gcnew NumericUpDown(); nudTargetX->Location = Point(currentX, y + h + 2); nudTargetX->Width = w; nudTargetX->DecimalPlaces = 2; nudTargetX->Minimum = (Decimal)-10.0; nudTargetX->Maximum = (Decimal)10.0; nudTargetX->Value = (Decimal)3.5; this->Controls->Add(nudTargetX); WireInvalidate(nudTargetX); currentX += w + 20; L = gcnew Label(); L->Text = L"Цель Y"; L->Location = Point(currentX, y); L->Width = w; this->Controls->Add(L); nudTargetY = gcnew NumericUpDown(); nudTargetY->Location = Point(currentX, y + h + 2); nudTargetY->Width = w; nudTargetY->DecimalPlaces = 2; nudTargetY->Minimum = (Decimal)-10.0; nudTargetY->Maximum = (Decimal)10.0; nudTargetY->Value = (Decimal)1.0; this->Controls->Add(nudTargetY); WireInvalidate(nudTargetY); // Глубина — минимум 7, максимум 20 (как просили) currentX += w + 20; L = gcnew Label(); L->Text = L"Глубина"; L->Location = Point(currentX, y); L->Width = w; this->Controls->Add(L); nudLevels = gcnew NumericUpDown(); nudLevels->Location = Point(currentX, y + h + 2); nudLevels->Width = w; nudLevels->Minimum = 7; nudLevels->Maximum = 20; nudLevels->Value = 12; this->Controls->Add(nudLevels); WireInvalidate(nudLevels); // Надёжность r — [1.0; 20.0], по умолчанию 2.5 currentX += w + 20; L = gcnew Label(); L->Text = L"Надежность (r)"; L->Location = Point(currentX, y); L->Width = w; this->Controls->Add(L); nudR = gcnew NumericUpDown(); nudR->Location = Point(currentX, y + h + 2); nudR->Width = w; nudR->DecimalPlaces = 2; nudR->Minimum = (Decimal)1.00; nudR->Maximum = (Decimal)20.00; nudR->Value = (Decimal)2.50; this->Controls->Add(nudR); WireInvalidate(nudR); // Адаптивная схема (флаг) currentX += w + 20; cbAdaptive = gcnew CheckBox(); cbAdaptive->Text = L"Адаптивная"; cbAdaptive->Location = Point(currentX, y + h + 2); cbAdaptive->Width = w; cbAdaptive->Checked = true; this->Controls->Add(cbAdaptive); WireInvalidate(cbAdaptive); // Следующая строка y += h * 2 + pad + 10; currentX = 10; // Точность — по умолчанию 0,00001; минимум 1e-9; максимум 0.1 L = gcnew Label(); L->Text = L"Точность"; L->Location = Point(currentX, y); L->Width = w; this->Controls->Add(L); nudEps = gcnew NumericUpDown(); nudEps->Location = Point(currentX, y + h + 2); nudEps->Width = w; // ВАЖНО: ставим 9 знаков, чтобы можно было выставить 1e-9; значение по умолчанию 0.00001 nudEps->DecimalPlaces = 9; nudEps->Minimum = (Decimal)0.000000001; nudEps->Maximum = (Decimal)0.1; nudEps->Value = (Decimal)0.00001; // Примечание: WinForms будет показывать с 9 знаками; это нормально функционально. this->Controls->Add(nudEps); WireInvalidate(nudEps); // Макс. итераций — по умолчанию оставить 1000, но максимум поднять до 500000 currentX += w + 20; L = gcnew Label(); L->Text = L"Макс. итераций"; L->Location = Point(currentX, y); L->Width = w; this->Controls->Add(L); nudMaxIter = gcnew NumericUpDown(); nudMaxIter->Location = Point(currentX, y + h + 2); nudMaxIter->Width = w; nudMaxIter->Minimum = 10; nudMaxIter->Maximum = 500000; nudMaxIter->Value = 1000; this->Controls->Add(nudMaxIter); WireInvalidate(nudMaxIter); // Кнопки и инфо currentX += 200; btnAdd = gcnew Button(); btnAdd->Text = L"+ Звено"; btnAdd->Location = Point(currentX, y + h + 2); btnAdd->Width = 80; btnAdd->Click += gcnew EventHandler(this, &MyForm::OnAddClick); this->Controls->Add(btnAdd); currentX += 85; btnRem = gcnew Button(); btnRem->Text = L"- Звено"; btnRem->Location = Point(currentX, y + h + 2); btnRem->Width = 80; btnRem->Click += gcnew EventHandler(this, &MyForm::OnRemClick); this->Controls->Add(btnRem); currentX += 125; btnOptimize = gcnew Button(); btnOptimize->Text = L"Оптимизировать"; btnOptimize->Location = Point(currentX, y + h + 2); btnOptimize->Width = 120; btnOptimize->Click += gcnew EventHandler(this, &MyForm::OnOptimizeClick); this->Controls->Add(btnOptimize); currentX += 125; lblInfo = gcnew Label(); lblInfo->Location = Point(currentX, y); lblInfo->Size = System::Drawing::Size(250, 60); lblInfo->BorderStyle = BorderStyle::FixedSingle; lblInfo->Text = L"Готов"; this->Controls->Add(lblInfo); } void ResetRandomConfig() { nSegments = 1; angles->Clear(); lengths->Clear(); // По умолчанию первый сегмент смотрит строго вверх (π/2) angles->Add((float)(3.14159265358979323846 / 2.0)); lengths->Add((float)nudBaseLength->Value); variableLengths = false; this->Invalidate(); } float Rand01() { rngState ^= rngState << 13; rngState ^= rngState >> 17; rngState ^= rngState << 5; return (float)((double)(unsigned)rngState / 4294967296.0); } float RandAngle() { return (Rand01() * 6.28318530717958647692f) - 3.14159265358979323846f; } System::Void OnResize(System::Object^, System::EventArgs^) { this->Invalidate(); } System::Void OnAnyChanged(System::Object^, System::EventArgs^) { this->Invalidate(); } System::Void OnAddClick(System::Object^, System::EventArgs^) { ++nSegments; angles->Add(RandAngle()); lengths->Add((float)nudBaseLength->Value); this->Invalidate(); } System::Void OnRemClick(System::Object^, System::EventArgs^) { if (nSegments > 1) { --nSegments; angles->RemoveAt(angles->Count - 1); lengths->RemoveAt(lengths->Count - 1); this->Invalidate(); } } System::Void OnOptimizeClick(System::Object^, System::EventArgs^) { if (!fManip) return; variableLengths = cbVarLen->Checked; float minTheta = (float)nudMinTheta->Value; float tx = (float)nudTargetX->Value, ty = (float)nudTargetY->Value; int levels = (int)nudLevels->Value, maxIter = (int)nudMaxIter->Value; bool adaptive = cbAdaptive->Checked; float r_param = (float)nudR->Value, eps = (float)nudEps->Value; unsigned int seed = (unsigned)GetTickCount(); if (pStart) pStart(nSegments, variableLengths, minTheta, tx, ty, levels, maxIter, r_param, adaptive, eps, seed); LARGE_INTEGER t0, t1, fq; QueryPerformanceCounter(&t0); float* bestQ = nullptr; size_t bestQLen = 0; float bestX = 0, bestY = 0, bestF = 0; fManip(nSegments, variableLengths, minTheta, tx, ty, levels, maxIter, r_param, adaptive, eps, seed, &bestQ, &bestQLen, &bestX, &bestY, &bestF); QueryPerformanceCounter(&t1); QueryPerformanceFrequency(&fq); double micros = 1e6 * (double)(t1.QuadPart - t0.QuadPart) / (double)fq.QuadPart; if (bestQ && bestQLen >= (size_t)nSegments) { angles->Clear(); for (int i = 0; i < nSegments; ++i) angles->Add(bestQ[i]); if (variableLengths) { lengths->Clear(); for (int i = 0; i < nSegments; ++i) lengths->Add(bestQ[nSegments + i]); } else { lengths->Clear(); for (int i = 0; i < nSegments; ++i) lengths->Add((float)nudBaseLength->Value); } if (pFree) pFree(bestQ); } lblInfo->Text = String::Format(L"Результат:\nЦель: {0:F5}\nТочка: ({1:F3}, {2:F3})\nВремя: {3:F0} мкс", bestF, bestX, bestY, micros); this->Invalidate(); } protected: virtual void OnPaint(PaintEventArgs^ e) override { Form::OnPaint(e); Graphics^ g = e->Graphics; g->SmoothingMode = System::Drawing::Drawing2D::SmoothingMode::HighQuality; g->Clear(this->BackColor); int topOffset = 150; System::Drawing::Rectangle drawArea = System::Drawing::Rectangle(0, topOffset, this->ClientSize.Width, this->ClientSize.Height - topOffset); g->FillRectangle(Brushes::White, drawArea); int leftWallX = drawArea.Left + this->ClientSize.Width * 25 / 100; Pen^ wallPen = gcnew Pen(Color::Black, 2); g->DrawLine(wallPen, leftWallX, drawArea.Top, leftWallX, drawArea.Bottom); HatchBrush^ hatchBrush = gcnew HatchBrush(HatchStyle::BackwardDiagonal, Color::LightGray, Color::White); int leftHatchWidth = 100; g->FillRectangle(hatchBrush, leftWallX - leftHatchWidth, drawArea.Top, leftHatchWidth, drawArea.Height); float targetX = (float)nudTargetX->Value; float targetY = (float)nudTargetY->Value; float scale = 160.0f; int baseX = leftWallX; int baseY = drawArea.Top + drawArea.Height / 2; float pixelTargetX = baseX + targetX * scale; float pixelTargetY = baseY - targetY * scale; int rightWallX = (int)(pixelTargetX + 8); rightWallX = Math::Min(rightWallX, drawArea.Right - 10); Pen^ dashedPen = gcnew Pen(Color::Black, 2); dashedPen->DashStyle = DashStyle::Dash; g->DrawLine(dashedPen, rightWallX, drawArea.Top, rightWallX, drawArea.Bottom); int rightHatchWidth = leftHatchWidth; g->FillRectangle(hatchBrush, rightWallX, drawArea.Top, rightHatchWidth, drawArea.Height); Pen^ targetPen = gcnew Pen(Color::Green, 1.5f); targetPen->DashStyle = DashStyle::Dot; g->DrawEllipse(targetPen, pixelTargetX - 8.0f, pixelTargetY - 8.0f, 16.0f, 16.0f); cli::array<PointF>^ pts = gcnew cli::array<PointF>(nSegments + 1); pts[0] = PointF((float)baseX, (float)baseY); float x = 0.0f, y = 0.0f, phi = 0.0f; for (int i = 0; i < nSegments; ++i) { array<float>^ anglesArray = angles->ToArray(); array<float>^ lengthsArray = lengths->ToArray(); float theta = anglesArray[i]; float L = lengthsArray[i]; phi += theta; x += L * (float)Math::Cos((double)phi); y += L * (float)Math::Sin((double)phi); pts[i + 1] = PointF(baseX + x * scale, baseY - y * scale); } Pen^ penRod = gcnew Pen(Color::Red, 6.0f); for (int i = 0; i < nSegments; ++i) g->DrawLine(penRod, pts[i], pts[i + 1]); SolidBrush^ brJoint = gcnew SolidBrush(Color::Blue); const float R = 6.0f; for (int i = 0; i <= nSegments; ++i) g->FillEllipse(brJoint, pts[i].X - R, pts[i].Y - R, 2 * R, 2 * R); delete wallPen; delete dashedPen; delete targetPen; delete penRod; delete brJoint; delete hatchBrush; } };
} MyForm.cpp: // MyForm.cpp — точка входа GUI, без функциональных изменений, просто подключение DLL и запуск формы
#include "MyForm.h"
#include <float.h>
using namespace System;
using namespace System::Windows::Forms;
typedef int(__cdecl* PInit)(int, float, float, float, float);
typedef void(__cdecl* PStartWorkers)();
[STAThread]
int main() {
HMODULE h = LoadLibraryW(L"TEST_FUNC.dll");
auto AgpInit = (PInit)GetProcAddress(h, "AgpInit");
auto AgpWaitStartAndRun = (PStartWorkers)GetProcAddress(h, "AgpWaitStartAndRun");
text// Инициализация: глубина 12 как и была const int rank = AgpInit(12, -2.2f, 1.8f, -2.2f, 1.8f); if (!rank) { Application::EnableVisualStyles(); Application::SetCompatibleTextRenderingDefault(false); Application::Run(gcnew TESTAGP::MyForm(h)); } else { AgpWaitStartAndRun(); } return 0;
} код .cpp файла dll: // agp_algo.cpp
// Полностью обновлённый файл DLL с учётом всех требований.
// ВАЖНО: собирается как C++17/20, /arch:AVX2, /Oi, /O2, /fp:fast желательны.
#include "pch.h"
// ==================== ВАШИ МАКРО И ФУНКЦИИ (без изменений интерфейсов), мелкие доработки дальше ====================
#define XOR_RAND(state, result_var)
do {
uint32_t s = (state);
s ^= s << 13;
s ^= s >> 17;
s ^= s << 5;
(state) = s;
float tmp = (float)((double)(s) * (1.0/4294967296.0));
result_var = tmp;
} while (0)
#define XOR_RAND_GRSH(state, result_var)
do {
uint32_t s = (state);
s ^= s << 13;
s ^= s >> 17;
s ^= s << 5;
(state) = s;
result_var = fmaf((float)(int32_t)s, 0x1.0p-31f, -1.0f);
} while (0)
#define FABE13_COS(x, result_var)
do {
const float ax = fabsf(x);
float r = fmodf(ax, 6.28318530718f);
if (r > 3.14159265359f)
r = 6.28318530718f - r;
if (r < 1.57079632679f) {
const float t2 = r * r;
const float t4 = t2 * t2;
result_var = fmaf(t4, fmaf(t2, -0.0013888889f, 0.0416666667f), fmaf(t2, -0.5f, 1.0f));
} else {
r = 3.14159265359f - r;
const float t2 = r * r;
const float t4 = t2 * t2;
result_var = -fmaf(t4, fmaf(t2, -0.0013888889f, 0.0416666667f), fmaf(t2, -0.5f, 1.0f));
}
} while (0)
#define FABE13_SIN(x, result_var)
do {
const float x = (x);
const float ax = fabsf(x);
float r = fmodf(ax, 6.28318530718f);
bool sfl = r > 3.14159265359f;
if (sfl)
r = 6.28318530718f - r;
bool cfl = r > 1.57079632679f;
if (cfl)
r = 3.14159265359f - r;
const float t2 = r * r;
float _s = fmaf(t2, fmaf(t2, fmaf(t2, -0.0001984127f, 0.0083333333f), -0.16666666f), 1.0f) * r;
result_var = ((x < 0.0f) ^ sfl) ? -_s : _s;
} while (0)
#define FABE13_SINCOS(in, sin_out, cos_out, n)
do {
int i = 0;
const int limit = (n) & ~7;
if ((n) >= 8) {
static __declspec(align(32)) const __m256 VEC_TWOPI = _mm256_set1_ps(6.28318530718f);
static __declspec(align(32)) const __m256 VEC_PI = _mm256_set1_ps(3.14159265359f);
static __declspec(align(32)) const __m256 VEC_PI_2 = _mm256_set1_ps(1.57079632679f);
static __declspec(align(32)) const __m256 INV_TWOPI = _mm256_set1_ps(0.15915494309189535f);
static __declspec(align(32)) const __m256 BIAS = _mm256_set1_ps(12582912.0f);
static __declspec(align(32)) const __m256 VEC_COS_P5 = _mm256_set1_ps(-0.0013888889f);
static __declspec(align(32)) const __m256 VEC_COS_P3 = _mm256_set1_ps(0.0416666667f);
static __declspec(align(32)) const __m256 VEC_COS_P1 = _mm256_set1_ps(-0.5f);
static __declspec(align(32)) const __m256 VEC_COS_P0 = _mm256_set1_ps(1.0f);
static __declspec(align(32)) const __m256 VEC_SIN_P5 = _mm256_set1_ps(-0.0001984127f);
static __declspec(align(32)) const __m256 VEC_SIN_P3 = _mm256_set1_ps(0.0083333333f);
static __declspec(align(32)) const __m256 VEC_SIN_P1 = _mm256_set1_ps(-0.16666666f);
static __declspec(align(32)) const __m256 VEC_SIN_P0 = _mm256_set1_ps(1.0f);
static __declspec(align(32)) const __m256 VEC_ZERO = _mm256_setzero_ps();
while (i < limit) {
const __m256 vx = _mm256_load_ps(&(in)[i]);
const __m256 vax = _mm256_andnot_ps(_mm256_set1_ps(-0.0f), vx);
__m256 q = _mm256_fmadd_ps(vax, INV_TWOPI, BIAS);
q = _mm256_sub_ps(q, BIAS);
const __m256 r = _mm256_fnmadd_ps(VEC_TWOPI, q, vax);
const __m256 r1 = _mm256_min_ps(r, _mm256_sub_ps(VEC_TWOPI, r));
const __m256 r2 = _mm256_min_ps(r1, _mm256_sub_ps(VEC_PI, r1));
const __m256 t2 = _mm256_mul_ps(r2, r2);
const __m256 cosv = _mm256_fmadd_ps(t2, _mm256_fmadd_ps(t2, _mm256_fmadd_ps(t2, VEC_COS_P5, VEC_COS_P3), VEC_COS_P1), VEC_COS_P0);
const __m256 sinv = _mm256_mul_ps(_mm256_fmadd_ps(t2, _mm256_fmadd_ps(t2, _mm256_fmadd_ps(t2, VEC_SIN_P5, VEC_SIN_P3), VEC_SIN_P1), VEC_SIN_P0), r2);
const __m256 cflip = _mm256_cmp_ps(r1, VEC_PI_2, _CMP_GT_OQ);
const __m256 sflip = _mm256_xor_ps(_mm256_cmp_ps(vx, VEC_ZERO, _CMP_LT_OQ), _mm256_cmp_ps(r, VEC_PI, _CMP_GT_OQ));
_mm256_store_ps(&(cos_out)[i], _mm256_blendv_ps(cosv, _mm256_sub_ps(VEC_ZERO, cosv), cflip));
_mm256_store_ps(&(sin_out)[i], _mm256_blendv_ps(sinv, _mm256_sub_ps(VEC_ZERO, sinv), sflip));
i += 8;
}
}
while (i < (n)) {
const float x = (in)[i];
const float ax = fabsf(x);
float q = fmaf(ax, 0.15915494309189535f, 12582912.0f);
q -= 12582912.0f;
float r = fmaf(-6.28318530718f, q, ax);
const bool sflip = r > 3.14159265359f;
if (sflip)
r = 6.28318530718f - r;
const bool cflip = r > 1.57079632679f;
if (cflip)
r = 3.14159265359f - r;
const float t2 = r * r;
const float c = fmaf(t2, fmaf(t2, fmaf(t2, -0.0013888889f, 0.0416666667f), -0.5f), 1.0f);
const float s = fmaf(t2, fmaf(t2, fmaf(t2, -0.0001984127f, 0.0083333333f), -0.16666666f), 1.0f) * r;
(cos_out)[i] = cflip ? -c : c;
(sin_out)[i] = ((x < 0.0f) ^ sflip) ? -s : s;
++i;
}
} while (0)
// ---------------- Peano / Morton таблички и MPI-глобали (без изменения интерфейса) ----------------
enum List : uint8_t { Top = 0b00u, Down = 0b01u, Left = 0b10u, Right = 0b11u };
__declspec(align(4)) struct Step final { const uint8_t next, dx, dy; };
__declspec(align(4)) struct InvStep final { const uint8_t q, next; };
__declspec(align(64)) static const Step g_step_tbl[4][4] = {
{ { Right,0u,0u }, { Top,0u,1u }, { Top,1u,1u }, { Left,1u,0u } },
{ { Left,1u,1u }, { Down,1u,0u }, { Down,0u,0u }, { Right,0u,1u } },
{ { Down,1u,1u }, { Left,0u,1u }, { Left,0u,0u }, { Top,1u,0u } },
{ { Top,0u,0u }, { Right,1u,0u }, { Right,1u,1u }, { Down,0u,1u } }
};
__declspec(align(64)) static const InvStep g_inv_tbl[4][4] = {
{ { 0u,Right }, { 1u,Top }, { 3u,Left }, { 2u,Top } },
{ { 2u,Down }, { 3u,Right }, { 1u,Down }, { 0u,Left } },
{ { 2u,Left }, { 1u,Left }, { 3u,Top }, { 0u,Down } },
{ { 0u,Top }, { 3u,Down }, { 1u,Right }, { 2u,Right } }
};
static const boost::mpi::environment* g_env;
static const boost::mpi::communicator* g_world;
// ---------------- Служебные структуры сообщений и slab-аллокатор ----------------
__declspec(align(16)) struct CrossMsg final {
float s_x1, s_x2, e_x1, e_x2, Rtop;
template<typename Archive> __declspec(noalias) __forceinline void serialize(Archive& ar, const unsigned int) noexcept { ar& s_x1& s_x2& e_x1& e_x2& Rtop; }
};
__declspec(align(16)) struct CtrlMsg final {
bool kind; CrossMsg xchg;
template<typename Archive> __declspec(noalias) __forceinline void serialize(Archive& ar, const unsigned int) noexcept { ar& kind& xchg; }
};
__declspec(align(16)) struct Slab final {
char* const base; char* current; char* const end;
__forceinline Slab(void* const memory, const size_t usable) : base((char*)memory), current(base), end(base + (usable & ~(size_t)63u)) {}
};
static tbb::enumerable_thread_specific<Slab*> tls( noexcept {
void* memory = _aligned_malloc(16777216u, 64u);
Slab* slab = (Slab*)_aligned_malloc(32u, 64u);
new (slab) Slab(memory, 16777216u);
char* p = slab->base;
while (p < slab->end) { *p = 0; p += 4096u; }
return slab;
});
// ---------------- Параметры активной 2D-карты (как было) ----------------
__declspec(align(16)) struct Peano2DMap final {
const int levels; const float a, b, c, d; const float lenx, leny; const float inv_lenx; const uint32_t scale; const uint8_t start;
__forceinline Peano2DMap(int L, float _a, float _b, float _c, float _d, uint8_t st)
: levels(L), a(_a), b(_b), c(_c), d(_d),
lenx(_b - _a), leny(_d - _c), inv_lenx(1.0f / (_b - _a)),
scale((uint32_t)1u << (L << 1)), start(st) {
}
};
static Peano2DMap gActiveMap(0, 0, 0, 0, 0, 0);
// ---------------- Интервалы и сравнения (как было) ----------------
__declspec(align(64)) struct Interval1D final {
const float x1, x2, y1, y2, delta_y, ordinate_factor, N_factor, quadratic_term, M; float R;
__forceinline void* operator new(size_t) noexcept { Slab* s = tls.local(); char* r = s->current; s->current += 64u; return r; }
__forceinline Interval1D(float _x1, float _x2, float _y1, float _y2, float _N) noexcept
: x1(_x1), x2(_x2), y1(_y1), y2(_y2), delta_y(_y2 - _y1), ordinate_factor(-(y1 + y2) * 2.0f),
N_factor(_N == 1.0f ? _x2 - _x1 : sqrtf(_x2 - _x1)),
quadratic_term((1.0f / N_factor)* delta_y* delta_y),
M((1.0f / N_factor)* fabsf(delta_y)) {
}
__forceinline void ChangeCharacteristic(float _m) noexcept { R = fmaf(1.0f / _m, quadratic_term, fmaf(_m, N_factor, ordinate_factor)); }
};
__declspec(align(64)) struct IntervalND final {
const float x1, x2, y1, y2, delta_y, ordinate_factor;
float N_factor, quadratic_term, M, R;
uint64_t i1, i2; float diam; int span_level;
__forceinline void* operator new(size_t) noexcept { Slab* s = tls.local(); char* r = s->current; s->current += 64u; return r; }
__forceinline IntervalND(float _x1, float _x2, float _y1, float _y2) noexcept
: x1(_x1), x2(_x2), y1(_y1), y2(_y2), delta_y(_y2 - _y1), ordinate_factor(-(y1 + y2) * 2.0f),
N_factor(0), quadratic_term(0), M(0), R(0), i1(0), i2(0), diam(0), span_level(0) {
}
__forceinline void compute_span_level(const struct MortonND& map) noexcept; // объявим ниже
__forceinline void set_metric(float d_alpha) noexcept { N_factor = d_alpha; quadratic_term = (1.0f / N_factor) * delta_y * delta_y; M = (1.0f / N_factor) * fabsf(delta_y); }
__forceinline void ChangeCharacteristic(float _m) noexcept { R = fmaf(1.0f / _m, quadratic_term, fmaf(_m, N_factor, ordinate_factor)); }
};
__forceinline bool ComparePtr1D(const Interval1D* a, const Interval1D* b) noexcept { return a->R < b->R; }
__forceinline bool ComparePtrND(const IntervalND* a, const IntervalND* b) noexcept { return a->R < b->R; }
// ---------------- Массовые пересчёты R (как было) ----------------
__forceinline void RecomputeR_ConstM_AVX2_1D(Interval1D* const* arr, size_t n, float m) {
const __m256 vm = _mm256_set1_ps(m);
__m256 vinvm = _mm256_rcp_ps(vm);
vinvm = _mm256_mul_ps(vinvm, _mm256_fnmadd_ps(vm, vinvm, _mm256_set1_ps(2.0f)));
size_t i = 0, limit = n & ~7ull;
alignas(32) float q[8], nf[8], od[8], out[8];
for (; i < limit; i += 8) {
for (int k = 0; k < 8; ++k) { const Interval1D* p = arr[i + k]; q[k] = p->quadratic_term; nf[k] = p->N_factor; od[k] = p->ordinate_factor; }
const __m256 vq = _mm256_load_ps(q), vnf = _mm256_load_ps(nf), vod = _mm256_load_ps(od);
const __m256 t = _mm256_fmadd_ps(vm, vnf, vod);
const __m256 res = _mm256_fmadd_ps(vq, vinvm, t);
_mm256_store_ps(out, res);
for (int k = 0; k < 8; ++k) arr[i + k]->R = out[k];
}
for (; i < n; ++i) arr[i]->ChangeCharacteristic(m);
}
__forceinline void RecomputeR_AffineM_AVX2_1D(Interval1D* const* arr, size_t n, float GF, float alpha) {
const __m256 vGF = _mm256_set1_ps(GF), va = _mm256_set1_ps(alpha);
size_t i = 0, limit = n & ~7ull;
alignas(32) float ln[8], Mv[8], q[8], nf[8], od[8], out[8];
for (; i < limit; i += 8) {
for (int k = 0; k < 8; ++k) {
const Interval1D* p = arr[i + k];
ln[k] = p->x2 - p->x1; Mv[k] = p->M; q[k] = p->quadratic_term; nf[k] = p->N_factor; od[k] = p->ordinate_factor;
}
const __m256 vln = _mm256_load_ps(ln), vM = _mm256_load_ps(Mv), vq = _mm256_load_ps(q), vnf = _mm256_load_ps(nf), vod = _mm256_load_ps(od);
const __m256 vm = _mm256_fmadd_ps(vGF, vln, _mm256_mul_ps(va, vM));
__m256 vinvm = _mm256_rcp_ps(vm);
vinvm = _mm256_mul_ps(vinvm, _mm256_fnmadd_ps(vm, vinvm, _mm256_set1_ps(2.0f)));
const __m256 t = _mm256_fmadd_ps(vm, vnf, vod);
const __m256 res = _mm256_fmadd_ps(vq, vinvm, t);
_mm256_store_ps(out, res);
for (int k = 0; k < 8; ++k) arr[i + k]->R = out[k];
}
for (; i < n; ++i) {
const Interval1D* p = arr[i];
const float mi = fmaf(GF, (p->x2 - p->x1), p->M * alpha);
arr[i]->R = fmaf(1.0f / mi, p->quadratic_term, fmaf(mi, p->N_factor, p->ordinate_factor));
}
}
__forceinline void RecomputeR_ConstM_AVX2_ND(IntervalND* const* arr, size_t n, float m) {
const __m256 vm = _mm256_set1_ps(m);
__m256 vinvm = _mm256_rcp_ps(vm);
vinvm = _mm256_mul_ps(vinvm, _mm256_fnmadd_ps(vm, vinvm, _mm256_set1_ps(2.0f)));
size_t i = 0, limit = n & ~7ull;
alignas(32) float q[8], nf[8], od[8], out[8];
for (; i < limit; i += 8) {
for (int k = 0; k < 8; ++k) { const IntervalND* p = arr[i + k]; q[k] = p->quadratic_term; nf[k] = p->N_factor; od[k] = p->ordinate_factor; }
const __m256 vq = _mm256_load_ps(q), vnf = _mm256_load_ps(nf), vod = _mm256_load_ps(od);
const __m256 t = _mm256_fmadd_ps(vm, vnf, vod);
const __m256 res = _mm256_fmadd_ps(vq, vinvm, t);
_mm256_store_ps(out, res);
for (int k = 0; k < 8; ++k) arr[i + k]->R = out[k];
}
for (; i < n; ++i) arr[i]->ChangeCharacteristic(m);
}
__forceinline void RecomputeR_AffineM_AVX2_ND(IntervalND* const* arr, size_t n, float GF, float alpha) {
const __m256 vGF = _mm256_set1_ps(GF), va = _mm256_set1_ps(alpha);
size_t i = 0, limit = n & ~7ull;
alignas(32) float ln[8], Mv[8], q[8], nf[8], od[8], out[8];
for (; i < limit; i += 8) {
for (int k = 0; k < 8; ++k) {
const IntervalND* p = arr[i + k];
ln[k] = p->x2 - p->x1; Mv[k] = p->M; q[k] = p->quadratic_term; nf[k] = p->N_factor; od[k] = p->ordinate_factor;
}
const __m256 vln = _mm256_load_ps(ln), vM = _mm256_load_ps(Mv), vq = _mm256_load_ps(q), vnf = _mm256_load_ps(nf), vod = _mm256_load_ps(od);
const __m256 vm = _mm256_fmadd_ps(vGF, vln, _mm256_mul_ps(va, vM));
__m256 vinvm = _mm256_rcp_ps(vm);
vinvm = _mm256_mul_ps(vinvm, _mm256_fnmadd_ps(vm, vinvm, _mm256_set1_ps(2.0f)));
const __m256 t = _mm256_fmadd_ps(vm, vnf, vod);
const __m256 res = _mm256_fmadd_ps(vq, vinvm, t);
_mm256_store_ps(out, res);
for (int k = 0; k < 8; ++k) arr[i + k]->R = out[k];
}
for (; i < n; ++i) {
const IntervalND* p = arr[i];
const float mi = fmaf(GF, (p->x2 - p->x1), p->M * alpha);
arr[i]->R = fmaf(1.0f / mi, p->quadratic_term, fmaf(mi, p->N_factor, p->ordinate_factor));
}
}
// ---------------- Быстрые степени (как было) ----------------
__forceinline float fast_pow_int(float v, int n) {
float r;
switch (n) {
case 3: { float v2 = v * v; r = v2 * v; } break;
case 4: { float v2 = v * v; r = v2 * v2; } break;
case 5: { float v2 = v * v; r = v2 * v2 * v; } break;
case 6: { float v2 = v * v; float v4 = v2 * v2; r = v4 * v2; } break;
case 7: { float v2 = v * v; float v4 = v2 * v2; r = v4 * v2 * v; } break;
case 8: { float v2 = v * v; float v4 = v2 * v2; r = v4 * v4; } break;
case 9: { float v3 = v * v * v; float v6 = v3 * v3; r = v6 * v3; } break;
case 10: { float v2 = v * v; float v4 = v2 * v2; float v8 = v4 * v4; r = v8 * v2; } break;
case 11: { float v2 = v * v; float v4 = v2 * v2; float v8 = v4 * v4; r = v8 * v2 * v; } break;
case 12: { float v3 = v * v * v; float v6 = v3 * v3; r = v6 * v6; } break;
case 13: { float v3 = v * v * v; float v6 = v3 * v3; r = v6 * v6 * v; } break;
case 14: { float v7 = v * v * v * v * v * v * v; r = v7 * v7; } break;
case 15: { float v7 = v * v * v * v * v * v * v; r = v7 * v7 * v; } break;
default: { float v2 = v * v; float v4 = v2 * v2; float v8 = v4 * v4; r = v8 * v8; }
}
return r;
}
// ---------------- Функция шага (как было) ----------------
__forceinline float Shag(float _m, float x1, float x2, float y1, float y2, float _N, float _r) {
const float diff = y2 - y1;
const float sign_mult = _mm_cvtss_f32(_mm_castsi128_ps(_mm_set1_epi32(0x3F800000u | (((((uint32_t)&diff)) & 0x80000000u) ^ 0x80000000u))));
if (_N == 1)
return fmaf(-(1.0f / _m), diff, x1 + x2) * 0.5f;
if (_N == 2)
return fmaf(sign_mult / (_m * _m), diff * diff * _r, x1 + x2) * 0.5f;
const float invmN = 1.0f / fast_pow_int(_m, _N);
const float dN = fast_pow_int(fabsf(diff), _N);
return fmaf(sign_mult * invmN, dN * _r, x1 + x2) * 0.5f;
}
// ---------------- Morton-кэш ----------------
struct MortonCachePerRank {
std::vector<int> permCache;
std::vector<uint64_t> invMaskCache;
uint32_t baseSeed;
};
static MortonCachePerRank g_mc;
static __forceinline uint64_t gray_encode(uint64_t x) noexcept { return x ^ (x >> 1); }
static __forceinline uint64_t gray_decode(uint64_t g) noexcept { g ^= g >> 32; g ^= g >> 16; g ^= g >> 8; g ^= g >> 4; g ^= g >> 2; g ^= g >> 1; return g; }
// ---------------- MortonND: ВАЖНО — поддержка levels*dim > 64 через «чанки» ----------------
struct MortonND final {
int dim, levels; // полные уровни
int eff_levels; // уровни в одном 64-битном чанке (<= floor(63/dim))
int extra_levels; // остаток уровней за пределами первого чанка (>=0)
int chunks; // количество чанков
std::vector<int> chunk_bits; // кол-во уровней в каждом чанке
std::vector<uint64_t> chunk_bases; // 2^(dim*chunk_bits[c]) на каждый чанк
uint64_t scale; // БАЗА для индекса старшего чанка (совместима со старыми местами использования)
std::vector<float> low, high, step, invStep, baseOff;
std::vector<int> perm;
std::vector<uint64_t> invMask; // длиной levels (используем кусочно)
std::vector<uint64_t> pextMask; // маски для СТАРШЕГО чанка (для эвристик/диаметра)
std::vector<uint64_t> pextMaskChunks; // маски (chunk,dim) для map01ToPoint
float invScaleLevel;
bool use_gray;
textstatic inline uint64_t make_mask(int dim, int Lc, int d) { uint64_t m = 0ull, bitpos = (uint64_t)d; for (int b = 0; b < Lc; ++b) { m |= 1ull << bitpos; bitpos += (uint64_t)dim; } return m; } __forceinline MortonND(int D, int L, const float* lows, const float* highs, const MortonCachePerRank& mc) : dim(D), levels(L), eff_levels((std::max)(1, (int)(63 / (D ? D : 1)))), extra_levels((L > eff_levels) ? (L - eff_levels) : 0), chunks((extra_levels > 0) ? (1 + (extra_levels + eff_levels - 1) / eff_levels) : 1), low(lows, lows + D), high(highs, highs + D), step(D, 0.0f), invStep(D, 0.0f), baseOff(D, 0.0f), perm(mc.permCache.begin(), mc.permCache.begin() + D), invMask(mc.invMaskCache.begin(), mc.invMaskCache.begin() + D), invScaleLevel(1.0f / (float)((uint64_t)1 << L)), use_gray(true) { // Размер шага и базовое смещение — по ПОЛНОЙ глубине: for (int d = 0; d < dim; ++d) { float rng = high[d] - low[d]; float st = rng * invScaleLevel; step[d] = st; invStep[d] = 1.0f / st; baseOff[d] = fmaf(0.5f, st, low[d]); // центр ячейки на полном уровне } // Инициализация чанков chunk_bits.resize(chunks); pextMaskChunks.resize((size_t)chunks * (size_t)dim); chunk_bases.resize(chunks); int remaining = levels; for (int c = 0; c < chunks; ++c) { int Lc = (c == 0) ? (std::min)(eff_levels, remaining) : (std::min)(eff_levels, remaining); chunk_bits[c] = Lc; remaining -= Lc; uint64_t baseC = (uint64_t)1 << (dim * Lc); chunk_bases[c] = baseC; for (int d = 0; d < dim; ++d) pextMaskChunks[(size_t)c * (size_t)dim + (size_t)d] = make_mask(dim, Lc, d); } // Маски для старшего чанка (для диаметра/эвристик) pextMask.resize(dim); for (int d = 0; d < dim; ++d) pextMask[d] = make_mask(dim, chunk_bits[0], d); // Маски инверсий — используем только нужные биты для каждого чанка // (оставляем как есть; сдвиги/маски применим динамически) // БАЗОВАЯ «scale» — база старшего чанка: 2^(dim*eff_levels_actual_for_chunk0) scale = (uint64_t)1 << (dim * chunk_bits[0]); } // Диаметр между двумя индексами старшего чанка, с учётом «скрытых» уровней: добавим extra_levels в nfree как константу __forceinline float block_diameter(uint64_t i1, uint64_t i2) const noexcept { if (i1 > i2) std::swap(i1, i2); float s2 = 0.0f; for (int d = 0; d < dim; ++d) { const int pd = perm[d]; const uint64_t varying = (i1 ^ i2) & pextMask[d]; const int nfree_hi = _mm_popcnt_u64(varying); const int nfree_total = nfree_hi + (levels - chunk_bits[0]); const float range = step[pd] * (ldexpf(1.0f, nfree_total) - 1.0f); s2 = fmaf(range, range, s2); } return sqrtf(s2); } // Преобразование t∈[0,1) → точка, используя все чанки (точность сохраняется) __forceinline void map01ToPoint(float t, float* __restrict out) const noexcept { if (t <= 0.0f) t = 0.0f; else if (t >= 1.0f) t = 0x1.fffffep-1f; // накапливаем биты для каждой размерности uint64_t accBits[32] = { 0ull }; int accShifted[32] = { 0 }; double u = (double)t; // используем double для дробной части for (int c = 0; c < chunks; ++c) { const int Lc = chunk_bits[c]; const uint64_t baseC = chunk_bases[c]; double scaled = u * (double)baseC; uint64_t idxc = (scaled >= (double)baseC) ? (baseC - 1ull) : (uint64_t)scaled; u = scaled - (double)idxc; // дробная часть для следующего чанка if (use_gray) idxc = gray_encode(idxc); // какой набор бит инверсии используется для этого чанка: int shift_from_top = 0; for (int k = 0; k <= c; ++k) shift_from_top += chunk_bits[k]; int inv_shift = levels - shift_from_top; // смещение начала подпоследовательности в общем инверс-маске for (int d = 0; d < dim; ++d) { int pd = perm[d]; uint64_t mask = pextMaskChunks[(size_t)c * (size_t)dim + (size_t)d]; uint64_t bits = _pext_u64(idxc, mask); if (inv_shift >= 0) { uint64_t invMaskSegment = 0ull; if (chunk_bits[c] < 63) { uint64_t take = ((uint64_t)1 << chunk_bits[c]) - 1ull; invMaskSegment = (invMask[pd] >> inv_shift) & take; } bits ^= invMaskSegment; } accBits[pd] = (accBits[pd] << Lc) | bits; accShifted[pd] += Lc; } } for (int d = 0; d < dim; ++d) out[d] = fmaf(step[d], (float)accBits[d], baseOff[d]); } // t для лучшего-приблизительного позиционирования (используем только старший чанк — быстро ради эвристик) __forceinline float pointToT(const float* __restrict q) const noexcept { const int bitsFull = levels; const int bitsCoarse = chunk_bits[0]; uint64_t idx0 = 0ull; for (int d = 0; d < dim; ++d) { int pd = perm[d]; float v = (q[pd] - baseOff[pd]) * invStep[pd]; int64_t cell = (int64_t)_mm_cvt_ss2si(_mm_round_ss(_mm_setzero_ps(), _mm_set_ss(v), _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC)); // отбрасываем младшие (bitsFull - bitsCoarse) разрядов: if (cell < 0) cell = 0; int64_t maxv = ((int64_t)1 << bitsFull) - 1; if (cell > maxv) cell = maxv; uint64_t b = (uint64_t)cell >> (bitsFull - bitsCoarse); // инверсия для старшего чанка: uint64_t invMask0 = 0ull; if (bitsCoarse < 63) { uint64_t take = ((uint64_t)1 << bitsCoarse) - 1ull; invMask0 = (invMask[pd] >> (bitsFull - bitsCoarse)) & take; } b ^= invMask0; idx0 |= _pdep_u64(b, pextMask[d]); } if (use_gray) idx0 = gray_decode(idx0); return ((float)idx0 + 0.5f) / (float)scale; }
};
__forceinline void IntervalND::compute_span_level(const MortonND& map) noexcept {
span_level = 0;
for (int d = 0; d < map.dim; ++d) {
uint64_t varying = (i1 ^ i2) & map.pextMask[d];
span_level += _mm_popcnt_u64(varying);
}
// учтём, что ниже старшего чанка есть ещё уровни:
span_level += (map.levels - map.chunk_bits[0]) * map.dim;
span_level = (std::min)(span_level, 11);
}
// ---------------- Стоимость для манипулятора (добавлены приоритет положительных углов и экспоненциальный штраф) ----------------
struct ManipCost final {
int n; bool variableLen; float targetX, targetY; float minTheta;
// коэффициенты для «арки» и экспоненциального штрафа
float archBiasW, archBiasK; // слабый приоритет положительных углов
float sharpW; // вес штрафа за |θ| < minTheta
ManipCost(int _n, bool _variableLen, float _targetX, float _targetY, float _minTheta)
: n(_n), variableLen(_variableLen), targetX(_targetX), targetY(_targetY), minTheta(_minTheta),
archBiasW(0.02f), archBiasK(3.0f), sharpW(0.05f) {
}
__forceinline float operator()(const float* __restrict q, float& out_x, float& out_y) const noexcept {
const float* th = q;
const float* L = variableLen ? (q + n) : nullptr;
__declspec(align(64)) float phi[32], s_arr[32], c_arr[32];
float x = 0.0f, y = 0.0f, phi_acc = 0.0f, penC = 0.0f, archPen = 0.0f;
text// накопление фаз, затем sincos for (int i = 0; i < n; ++i) { phi_acc += th[i]; phi[i] = phi_acc; } FABE13_SINCOS(phi, s_arr, c_arr, n); const float Lc = 1.0f; if (variableLen) { for (int i = 0; i < n; ++i) { float Li = L[i]; x = fmaf(Li, c_arr[i], x); y = fmaf(Li, s_arr[i], y); } } else { for (int i = 0; i < n; ++i) { x = fmaf(Lc, c_arr[i], x); y = fmaf(Lc, s_arr[i], y); } } // штрафы for (int i = 0; i < n; ++i) { float ai = fabsf(th[i]); float v = minTheta - ai; // «слишком острый» (малый) угол if (v > 0.0f) { // экспоненциально нарастающий штраф (через exp2 для скорости) float scale = 2.0f / (minTheta + 1e-6f); penC += sharpW * (exp2f(scale * v) - 1.0f); } // лёгкий приоритет положительных углов (арка): softplus(k * (-theta)) // = ln(1 + exp(k*(-θ))) — минимальный на θ>=0, растёт на θ<0 float t = -th[i] * archBiasK; // быстрая и стабильная softplus: float sp = (t > 10.f) ? t : log1pf(expf(t)); archPen += archBiasW * sp; } float dx = x - targetX, dy = y - targetY; float dist = sqrtf(fmaf(dx, dx, dy * dy)); out_x = x; out_y = y; return dist + penC + archPen; }
};
// ---------------- Попадание по карте (как было) ----------------
__forceinline void HitTest2D_analytic(float x_param, float& out_x1, float& out_x2) {
const float a = gActiveMap.a, inv_lenx = gActiveMap.inv_lenx;
const uint32_t scale = gActiveMap.scale, scale_minus_1 = scale - 1u;
const float lenx = gActiveMap.lenx, leny = gActiveMap.leny, c = gActiveMap.c;
const uint8_t start = gActiveMap.start; const int levels = gActiveMap.levels;
textfloat norm = (x_param - a) * inv_lenx; norm = fminf(fmaxf(norm, 0.0f), 0x1.fffffep-1f); uint32_t idx = (uint32_t)(norm * (float)scale); idx = idx > scale_minus_1 ? scale_minus_1 : idx; float sx = lenx, sy = leny; float x1 = a, x2 = c; uint8_t type = start; int l = levels - 1; while (l >= 0) { const uint32_t q = (idx >> (l * 2)) & 3u; const Step s = g_step_tbl[type][q]; type = s.next; sx *= 0.5f; sy *= 0.5f; x1 += s.dx ? sx : 0.0f; x2 += s.dy ? sy : 0.0f; --l; } out_x1 = x1 + sx * 0.5f; out_x2 = x2 + sy * 0.5f;
}
__forceinline float FindX2D_analytic(float px, float py) {
const float a = gActiveMap.a, b = gActiveMap.b, c = gActiveMap.c, d = gActiveMap.d;
const float lenx = gActiveMap.lenx, leny = gActiveMap.leny; const uint32_t scale = gActiveMap.scale;
const uint8_t start = gActiveMap.start; const int levels = gActiveMap.levels;
const float clamped_px = fminf(fmaxf(px, a), b), clamped_py = fminf(fmaxf(py, c), d);
float sx = lenx, sy = leny; float x0 = a, y0 = c; uint32_t idx = 0u; uint8_t type = start; int l = 0;
while (l < levels) {
sx *= 0.5f; sy *= 0.5f; const float mx = x0 + sx, my = y0 + sy;
const uint32_t tr = (uint32_t)((clamped_px > mx) & (clamped_py > my));
const uint32_t tl = (uint32_t)((clamped_px < mx) & (clamped_py > my));
const uint32_t dl = (uint32_t)((clamped_px < mx) & (clamped_py < my));
const uint32_t none = (uint32_t)(1u ^ (tr | tl | dl));
const uint32_t dd = (tr << 1) | tr | tl | (none << 1);
const InvStep inv = g_inv_tbl[type][dd];
type = inv.next; idx = (idx << 2) | inv.q;
const uint32_t dx = dd >> 1, dy = dd & 1u; x0 += dx ? sx : 0.0f; y0 += dy ? sy : 0.0f; ++l;
}
const float scale_recip = 1.0f / (float)scale;
return fmaf((float)idx * scale_recip, lenx, a);
}
// ---------------- Сообщения ND (как было) ----------------
__declspec(align(16)) struct MultiCrossMsg final {
float intervals[15]; uint8_t count;
template<typename Archive> __declspec(noalias) __forceinline void serialize(Archive& ar, const unsigned int) noexcept { ar& intervals& count; }
};
__declspec(align(16)) struct BestSolutionMsg final {
float bestF, bestX, bestY, bestQ[32]; uint8_t dim;
template<typename Archive> __declspec(noalias) __forceinline void serialize(Archive& ar, const unsigned int) noexcept { ar& bestF& bestX& bestY& bestQ& dim; }
};
__declspec(align(16)) struct CtrlMsgND final {
uint8_t kind; CrossMsg xchg; MultiCrossMsg multiXchg; BestSolutionMsg bestSol;
template<typename Archive> __declspec(noalias) __forceinline void serialize(Archive& ar, const unsigned int) noexcept {
ar& kind; if (kind == 1) ar& xchg; else if (kind == 2) ar& multiXchg; else if (kind == 3) ar& bestSol;
}
};
// ---------------- Генерация сидов (как было) ----------------
static __forceinline int generate_lhs_seeds_lite(const MortonND& map, const int dim, float* __restrict S, int stride, uint32_t seed) {
int temp_dim = dim; const int ns = --temp_dim * temp_dim; uint32_t st = seed;
alignas(32) int permutations[32][16];
for (int d = 0; d < dim; ++d) {
for (int s = 0; s < ns; ++s) permutations[d][s] = s;
for (int s = ns - 1; s > 0; --s) { st ^= st << 13; st ^= st >> 17; st ^= st << 5; int j = st % (s + 1); std::swap(permutations[d][s], permutations[d][j]); }
}
for (int s = 0; s < ns; ++s) {
for (int d = 0; d < dim; ++d) {
st ^= st << 13; st ^= st >> 17; st ^= st << 5;
float u = (st & 0xFFFFFF) * 5.9604645e-8f;
int stratum = permutations[d][s];
float pos = ((float)stratum + u) / (float)ns;
int pd = map.perm[d];
float lo = map.low[pd], hi = map.high[pd];
S[s * stride + d] = fmaf(pos, (hi - lo), lo);
}
}
return ns;
}
static __forceinline int generate_heuristic_seeds(const ManipCost& cost, const MortonND& map, int dim, float* __restrict S, int stride, uint32_t seed) {
const int n = cost.n; const bool VL = cost.variableLen;
const float tx = cost.targetX, ty = cost.targetY;
int total_seeds = 0;
// 1) направленное в сторону цели
{
float* s0 = S + total_seeds * stride;
float phi = atan2f(ty, tx);
float rho = sqrtf(fmaf(tx, tx, ty * ty));
float len = fminf(fmaxf(rho / (float)n, 0.5f), 2.0f); // новый минимум 0.5
for (int i = 0; i < n; ++i) s0[i] = phi / (float)n;
if (VL) for (int i = 0; i < n; ++i) s0[n + i] = len;
total_seeds++;
}
// 2) попеременно
{
float* s1 = S + total_seeds * stride;
float phi = atan2f(ty, tx);
for (int i = 0; i < n; ++i) s1[i] = 0.5f * phi * ((i & 1) ? -1.0f : 1.0f);
if (VL) for (int i = 0; i < n; ++i) s1[n + i] = 1.0f * (0.8f + 0.4f * (float)i / (float)n);
total_seeds++;
}
// 3) затухающая линейка
{
float* s2 = S + total_seeds * stride;
const float inv = (n > 1) ? 1.0f / (float)(n - 1) : 0.0f;
float phi = atan2f(ty, tx);
for (int i = 0; i < n; ++i) { float pr = (float)i * inv; s2[i] = phi * (1.0f - 0.3f * pr); }
if (VL) { for (int i = 0; i < n; ++i) { float si = sinf(1.5f * (float)i); s2[n + i] = (1.0f + 0.2f * si); } }
total_seeds++;
}
// 4) LHS лайт
int lhs_count = generate_lhs_seeds_lite(map, dim, S + total_seeds * stride, stride, seed);
total_seeds += lhs_count;
return total_seeds;
}
// ---------------- Основной ND-ветвитель с улучшениями ----------------
static __forceinline void agp_run_branch_mpi(
const MortonND& map, const ManipCost& cost, int maxIter, float r, bool adaptive, float eps, unsigned seed,
std::vector<IntervalND*>& H, std::vector<float>& bestQ, float& bestF, float& bestX, float& bestY, float M_prior = 1e-3f)
{
const int n = cost.n;
const int dim = n + (cost.variableLen ? n : 0);
textalignas(32) float M_by_span[12]; for (int i = 0; i < 12; ++i) M_by_span[i] = M_prior; float Mmax = M_prior; alignas(64) float q_local[32], phi[32], s_arr[32], c_arr[32], sum_s[32], sum_c[32], q_try[32]; bestQ.reserve(dim); float x = 0.0f, y = 0.0f; int no_improve = 0; auto t_to_idx = [&](float t) -> uint64_t { float tt = (t <= 0.0f) ? 0.0f : (t >= 1.0f ? std::nextafterf(1.0f, 0.0f) : t); uint64_t idx = (uint64_t)((double)tt * (double)map.scale); // индекс старшего чанка if (idx >= map.scale) idx = map.scale - 1ull; return idx; }; auto update_pockets_and_Mmax = [&](IntervalND* I) { const int k = I->span_level; if (I->M > M_by_span[k]) M_by_span[k] = I->M; if (M_by_span[k] > Mmax) Mmax = M_by_span[k]; }; float a = 0.0f, b = 1.0f; // --- локальная оценка + небольшой градиентный шаг + ДОТОЧКА ПО ПОСЛЕДНЕМУ ЗВЕНУ --- auto evalAt = [&](float t) -> float { map.map01ToPoint(t, q_local); float f = cost(q_local, x, y); if (f < bestF * 1.25f) { // подготовим суммы для градиента float acc = 0.0f; for (int i = 0; i < n; ++i) { acc += q_local[i]; phi[i] = acc; } FABE13_SINCOS(phi, s_arr, c_arr, n); float as = 0.0f, ac = 0.0f; for (int k = n - 1; k >= 0; --k) { const float Lk = cost.variableLen ? q_local[n + k] : 1.0f; as += Lk * s_arr[k]; ac += Lk * c_arr[k]; sum_s[k] = as; sum_c[k] = ac; } const float dx = x - cost.targetX, dy = y - cost.targetY; float dist = sqrtf(fmaf(dx, dx, dy * dy)) + 1e-8f; // градиент с учётом новых штрафов: float eta = 0.125f; for (int stepI = 0; stepI < 3; ++stepI) { for (int i = 0; i < n; ++i) { float gpen = 0.0f; // экспоненциальный штраф за малые |θ| { float ai = fabsf(q_local[i]); float v = cost.minTheta - ai; if (v > 0.0f) { float scale = 2.0f / (cost.minTheta + 1e-6f); float e = exp2f(scale * v); float dpen_dtheta = cost.sharpW * (e * 0.69314718055994530941723212145818f * scale) * (-copysignf(1.0f, q_local[i])); gpen += dpen_dtheta; } } // «арка»: производная softplus(k*(-θ)) = -k * sigmoid(k*(-θ)) { float tsg = -q_local[i] * cost.archBiasK; float sig = 1.0f / (1.0f + expf(-tsg)); gpen += -(cost.archBiasW * cost.archBiasK) * sig; } float g = (dx * (-sum_s[i]) + dy * (sum_c[i])) / dist + gpen; q_try[i] = q_local[i] - eta * g; // ГРАНИЦЫ УГЛОВ (рад): для i==0 [-60°, +150°], для остальных [-150°, +150°] const float deg2rad = 3.14159265358979323846f / 180.0f; const float lo0 = -60.0f * deg2rad, hi0 = 150.0f * deg2rad; const float lo = -150.0f * deg2rad, hi = 150.0f * deg2rad; const float Lb = (i == 0) ? lo0 : lo; const float Hb = (i == 0) ? hi0 : hi; if (q_try[i] < Lb) q_try[i] = Lb; else if (q_try[i] > Hb) q_try[i] = Hb; } if (cost.variableLen) { for (int i = 0; i < n; ++i) { float g = (dx * c_arr[i] + dy * s_arr[i]) / dist; float v = q_local[n + i] - eta * g; // НОВЫЕ ГРАНИЦЫ ДЛИН: [0.5; 2.0] if (v < 0.5f) v = 0.5f; else if (v > 2.0f) v = 2.0f; q_try[n + i] = v; } } float x2, y2; float f2 = cost(q_try, x2, y2); if (f2 < f) { memcpy(q_local, q_try, dim * sizeof(float)); f = f2; x = x2; y = y2; break; } eta *= 0.5f; } // ДОТОЧКА ПО ПОСЛЕДНЕМУ УГЛУ — чтобы «довернуть» последний сегмент const int last = n - 1; const float deg2rad = 3.14159265358979323846f / 180.0f; const float lo = (last == 0) ? (-60.0f * deg2rad) : (-150.0f * deg2rad); const float hi = 150.0f * deg2rad; float bestLocF = f; float saved = q_local[last]; for (float delta = 0.05f; delta >= 0.00625f; delta *= 0.5f) { for (int sgn = -1; sgn <= 1; sgn += 2) { float cand = saved + sgn * delta; if (cand < lo) cand = lo; else if (cand > hi) cand = hi; float backup = q_local[last]; q_local[last] = cand; float x2, y2; float f2 = cost(q_local, x2, y2); if (f2 < bestLocF) { bestLocF = f2; x = x2; y = y2; saved = cand; } q_local[last] = backup; } } if (bestLocF < f) { q_local[last] = saved; f = bestLocF; } } if (f < bestF) { bestF = f; bestQ.assign(q_local, q_local + dim); bestX = x; bestY = y; no_improve = 0; } else { ++no_improve; } return f; }; float f_a = evalAt(a), f_b = evalAt(b); const int K = (std::min)((std::max)(2 * dim, 8), 128); H.reserve((size_t)maxIter + K + 16); const int rank = g_world->rank(); const int world = g_world->size(); alignas(64) float seeds[64 * 32]; const int seedCnt = generate_heuristic_seeds(cost, map, dim, seeds, 32, seed + rank * 7919u); for (int i = 0; i < seedCnt && i < 64; ++i) { const float* s = seeds + i * 32; float t_seed = map.pointToT(s); float interval_size = (i < 3) ? (0.0004f * (float)dim) : (0.00031f * (float)dim) * exp2f((1.0f / (float)(seedCnt - 4)) * log2f(0.00025f / 0.00031f) * (float)(i - 3)); float t1 = fmaxf(a, t_seed - interval_size), t2 = fminf(b, t_seed + interval_size); if (t2 <= t1) continue; alignas(64) float q1[32], q2[32]; float x1, y1, x2, y2; map.map01ToPoint(t1, q1); float f1 = cost(q1, x1, y1); map.map01ToPoint(t2, q2); float f2 = cost(q2, x2, y2); IntervalND* I = new IntervalND(t1, t2, f1, f2); I->i1 = t_to_idx(t1); I->i2 = t_to_idx(t2); I->diam = map.block_diameter(I->i1, I->i2); I->compute_span_level(map); I->set_metric(I->diam); update_pockets_and_Mmax(I); I->ChangeCharacteristic(r * Mmax); if (i < 3) I->R *= fmaf(0.01f, (float)dim, 0.85f); else { float start_mult = 0.214f * (float)dim; float end_mult = 0.174f * (float)dim; float mult = start_mult * exp2f((1.0f / (float)(seedCnt - 4)) * log2f(end_mult / start_mult) * (float)(i - 3)); I->R *= mult; } H.emplace_back(I); std::push_heap(H.begin(), H.end(), ComparePtrND); if (f1 < bestF) { bestF = f1; bestQ.assign(q1, q1 + dim); bestX = x1; bestY = y1; } if (f2 < bestF) { bestF = f2; bestQ.assign(q2, q2 + dim); bestX = x2; bestY = y2; } } float prev_t = a, prev_f = f_a; for (int k = 1; k <= K; ++k) { float t = a + (b - a) * ((float)k / (K + 1)) + (float)rank / (float)(world * (K + 1)); float f = evalAt(t); IntervalND* I = new IntervalND(prev_t, t, prev_f, f); I->i1 = t_to_idx(prev_t); I->i2 = t_to_idx(t); I->diam = map.block_diameter(I->i1, I->i2); I->compute_span_level(map); I->set_metric(I->diam); update_pockets_and_Mmax(I); I->ChangeCharacteristic(r * Mmax); H.emplace_back(I); std::push_heap(H.begin(), H.end(), ComparePtrND); prev_t = t; prev_f = f; } IntervalND* tail = new IntervalND(prev_t, b, prev_f, f_b); tail->i1 = t_to_idx(prev_t); tail->i2 = t_to_idx(b); tail->diam = map.block_diameter(tail->i1, tail->i2); tail->compute_span_level(map); tail->set_metric(tail->diam); update_pockets_and_Mmax(tail); tail->ChangeCharacteristic(r * Mmax); H.emplace_back(tail); std::push_heap(H.begin(), H.end(), ComparePtrND); float dmax = b - a, initial_len = dmax, thr03 = 0.3f * initial_len, inv_thr03 = 1.0f / thr03; int it = 0; // новые «мягко-зависящие-от-размерности» константы auto kickEveryByDim = [&](int dim) -> int { // 60..120, убывает экспоненциально с размерностью float z = 120.0f * exp2f(-0.05f * (float)dim); if (z < 60.0f) z = 60.0f; return (int)z; }; auto noImproveThrByDim = [&](int dim) -> int { float z = 80.0f * exp2f(-0.08f * (float)dim); if (z < 30.0f) z = 30.0f; return (int)z; }; while (it < maxIter) { // подталкивание около лучшего — зависящее от размерности if ((it % kickEveryByDim(dim)) == 0 && no_improve > noImproveThrByDim(dim) && !bestQ.empty()) { float t_best = map.pointToT(bestQ.data()); for (int i = 0; i < 2; ++i) { float off = (i == 0) ? 0.01f : -0.01f; float t_seed = fminf(b, fmaxf(a, t_best + off)); float f_seed = evalAt(t_seed); IntervalND* J = new IntervalND(t_seed - 0.005f, t_seed + 0.005f, f_seed, f_seed); J->i1 = t_to_idx(t_seed - 0.005f); J->i2 = t_to_idx(t_seed + 0.005f); J->diam = map.block_diameter(J->i1, J->i2); J->compute_span_level(map); J->set_metric(J->diam); update_pockets_and_Mmax(J); J->ChangeCharacteristic(r * Mmax); J->R *= 0.9f; H.emplace_back(J); std::push_heap(H.begin(), H.end(), ComparePtrND); } no_improve = 0; } const float p = fmaf(-1.0f / initial_len, dmax, 1.0f); bool stagnation = (no_improve > 100) && (it > 270); // Плавная адаптация «T» к размерности (вместо констант 264/277) float A = 200.0f + 64.0f * exp2f(-0.06f * (float)dim); float B = 210.0f + 67.0f * exp2f(-0.06f * (float)dim); const int T = (int)fmaf(-expm1f(p), A, B); float r_eff = fmaxf(1.0f, r * (0.7f + 0.3f * (1.0f - p))); std::pop_heap(H.begin(), H.end(), ComparePtrND); IntervalND* cur = H.back(); H.pop_back(); const float x1 = cur->x1, x2 = cur->x2, y1 = cur->y1, y2 = cur->y2; float m = r_eff * Mmax; float tNew = Shag(m, x1, x2, y1, y2, (float)dim, r); tNew = fminf(fmaxf(tNew, a), b); float fNew = evalAt(tNew); IntervalND* L = new IntervalND(x1, tNew, y1, fNew); IntervalND* Rv = new IntervalND(tNew, x2, fNew, y2); L->i1 = t_to_idx(x1); L->i2 = t_to_idx(tNew); Rv->i1 = t_to_idx(tNew); Rv->i2 = t_to_idx(x2); L->diam = map.block_diameter(L->i1, L->i2); Rv->diam = map.block_diameter(Rv->i1, Rv->i2); L->compute_span_level(map); Rv->compute_span_level(map); L->set_metric(L->diam); Rv->set_metric(Rv->diam); float Mloc = (std::max)(L->M, Rv->M); update_pockets_and_Mmax(L); update_pockets_and_Mmax(Rv); const float prevMmax = Mmax; if (Mloc > Mmax) Mmax = Mloc; m = r_eff * Mmax; if (adaptive) { float len1 = tNew - x1, len2 = x2 - tNew; if (len1 + len2 == dmax) { dmax = (std::max)(len1, len2); for (auto pI : H) { float Ls = pI->x2 - pI->x1; if (Ls > dmax) dmax = Ls; } } // НЕ ТРОГАЕМ усл. с thr03 — строго как просили if ((thr03 > dmax && !(it % 3)) || (10.0f * dmax < initial_len)) { const float progress = fmaf(-dmax, inv_thr03, 1.0f); const float alpha = progress * progress; const float beta = fmaf(-alpha, 1.0f, 2.0f); const float MULT = (1.0f / dmax) * Mmax; const float global_coeff = fmaf(MULT, r_eff, -MULT); const float GF = fmaf(beta, global_coeff, 0.0f); L->ChangeCharacteristic(fmaf(GF, len1, L->M * alpha)); Rv->ChangeCharacteristic(fmaf(GF, len2, Rv->M * alpha)); size_t sz = H.size(); RecomputeR_AffineM_AVX2_ND(H.data(), sz, GF, alpha); std::make_heap(H.begin(), H.end(), ComparePtrND); } else { if (Mloc > prevMmax) { L->ChangeCharacteristic(m); Rv->ChangeCharacteristic(m); if (Mloc > 1.15f * prevMmax) { size_t sz = H.size(); RecomputeR_ConstM_AVX2_ND(H.data(), sz, m); std::make_heap(H.begin(), H.end(), ComparePtrND); } } else { L->ChangeCharacteristic(m); Rv->ChangeCharacteristic(m); } } } else { if (Mloc > prevMmax) { L->ChangeCharacteristic(m); Rv->ChangeCharacteristic(m); if (Mloc > 1.15f * prevMmax) { size_t sz = H.size(); RecomputeR_ConstM_AVX2_ND(H.data(), sz, m); std::make_heap(H.begin(), H.end(), ComparePtrND); } } else { L->ChangeCharacteristic(m); Rv->ChangeCharacteristic(m); } } H.push_back(L); std::push_heap(H.begin(), H.end(), ComparePtrND); H.push_back(Rv); std::push_heap(H.begin(), H.end(), ComparePtrND); if (!H.empty()) { _mm_prefetch((const char*)H[0], _MM_HINT_T0); if (H.size() > 1) _mm_prefetch((const char*)H[1], _MM_HINT_T0); } IntervalND* top = H.front(); float interval_len = top->x2 - top->x1; // обмен/терминация по расписанию: bool want_term = (exp2f(log2f(interval_len) / (float)dim) < eps) || (it == maxIter - 1); if (!(it % T) || want_term) { CtrlMsgND out; out.kind = want_term ? 0 : 2; if (!want_term) { uint8_t cnt = (uint8_t)((H.size() >= 3) ? 3 : H.size()); out.multiXchg.count = cnt; float* dest = out.multiXchg.intervals; IntervalND* t1 = H[0]; IntervalND* t2 = (H.size() > 1 ? H[1] : H[0]); IntervalND* t3 = (H.size() > 2 ? H[2] : H[H.size() - 1]); IntervalND* tops[3] = { t1, t2, t3 }; for (uint8_t i2 = 0; i2 < cnt; ++i2) { IntervalND* Tt = tops[i2]; dest[0] = Tt->x1; dest[1] = 0.0f; dest[2] = Tt->x2; dest[3] = 0.0f; dest[4] = Tt->R; dest += 5; } } for (int i2 = 0; i2 < world; ++i2) if (i2 != rank) g_world->isend(i2, 0, out); if (want_term) break; } if (!(it % 500) && !bestQ.empty()) { CtrlMsgND out; out.kind = 3; out.bestSol.bestF = bestF; out.bestSol.bestX = bestX; out.bestSol.bestY = bestY; out.bestSol.dim = (uint8_t)bestQ.size(); memcpy(out.bestSol.bestQ, bestQ.data(), bestQ.size() * sizeof(float)); for (int i2 = 0; i2 < world; ++i2) if (i2 != rank) g_world->isend(i2, 0, out); } while (g_world->iprobe(boost::mpi::any_source, 0)) { CtrlMsgND in; g_world->recv(boost::mpi::any_source, 0, in); if (in.kind == 0) { if (!rank) break; else return; } else if (in.kind == 1) { float sx = in.xchg.s_x1, ex = in.xchg.e_x1; if (sx < 0.0f) sx = 0.0f; if (ex > 1.0f) ex = 1.0f; if (ex > sx) { alignas(64) float tmp[32]; float tx, ty; map.map01ToPoint(sx, tmp); float y1i = cost(tmp, tx, ty); map.map01ToPoint(ex, tmp); float y2i = cost(tmp, tx, ty); IntervalND* inj = new IntervalND(sx, ex, y1i, y2i); inj->i1 = t_to_idx(sx); inj->i2 = t_to_idx(ex); inj->diam = map.block_diameter(inj->i1, inj->i2); inj->compute_span_level(map); inj->set_metric(inj->diam); update_pockets_and_Mmax(inj); inj->ChangeCharacteristic(r * Mmax); if (!H.empty()) { _mm_prefetch((const char*)H[0], _MM_HINT_T0); if (H.size() > 1) _mm_prefetch((const char*)H[1], _MM_HINT_T0); } IntervalND* topH = H.front(); if (inj->R > 1.15f * topH->R) { float p2 = fmaf(-1.0f / initial_len, dmax, 1.0f); float k = (no_improve > 100 && it > 270) ? fmaf(0.5819767068693265f, expm1f(p2), 0.3f) : fmaf(0.3491860241215959f, expm1f(p2), 0.6f); inj->R = in.xchg.Rtop * k; H.emplace_back(inj); std::push_heap(H.begin(), H.end(), ComparePtrND); } } } else if (in.kind == 2) { const MultiCrossMsg& mX = in.multiXchg; for (uint8_t ii = 0; ii < mX.count; ++ii) { const float* d = &mX.intervals[ii * 5]; float sx = d[0], ex = d[2]; if (sx < 0.0f) sx = 0.0f; if (ex > 1.0f) ex = 1.0f; if (ex > sx) { alignas(64) float tmp[32]; float tx, ty; map.map01ToPoint(sx, tmp); float y1i = cost(tmp, tx, ty); map.map01ToPoint(ex, tmp); float y2i = cost(tmp, tx, ty); IntervalND* inj = new IntervalND(sx, ex, y1i, y2i); inj->i1 = t_to_idx(sx); inj->i2 = t_to_idx(ex); inj->diam = map.block_diameter(inj->i1, inj->i2); inj->compute_span_level(map); inj->set_metric(inj->diam); update_pockets_and_Mmax(inj); inj->ChangeCharacteristic(r * Mmax); if (!H.empty()) { _mm_prefetch((const char*)H[0], _MM_HINT_T0); if (H.size() > 1) _mm_prefetch((const char*)H[1], _MM_HINT_T0); } IntervalND* topH = H.front(); if (inj->R > 1.15f * topH->R) { float p2 = fmaf(-1.0f / initial_len, dmax, 1.0f); float k = (no_improve > 100 && it > 270) ? fmaf(0.5819767068693265f, expm1f(p2), 0.3f) : fmaf(0.3491860241215959f, expm1f(p2), 0.6f); inj->R = d[4] * k; H.emplace_back(inj); std::push_heap(H.begin(), H.end(), ComparePtrND); } } } } else if (in.kind == 3) { const BestSolutionMsg& bm = in.bestSol; if (bm.bestF < bestF * 1.15f) { alignas(64) float tmp_q[32]; memcpy(tmp_q, bm.bestQ, bm.dim * sizeof(float)); float t_best = map.pointToT(tmp_q); float t1 = fmaxf(a, t_best - 0.001f), t2 = fminf(b, t_best + 0.001f); if (t2 > t1) { alignas(64) float tq1[32], tq2[32]; float xx1, yy1, xx2, yy2; map.map01ToPoint(t1, tq1); float f1 = cost(tq1, xx1, yy1); map.map01ToPoint(t2, tq2); float f2 = cost(tq2, xx2, yy2); IntervalND* I = new IntervalND(t1, t2, f1, f2); I->i1 = t_to_idx(t1); I->i2 = t_to_idx(t2); I->diam = map.block_diameter(I->i1, I->i2); I->compute_span_level(map); I->set_metric(I->diam); update_pockets_and_Mmax(I); I->ChangeCharacteristic(r * Mmax); I->R *= 0.90f; H.emplace_back(I); std::push_heap(H.begin(), H.end(), ComparePtrND); } if (bm.bestF < bestF) { bestF = bm.bestF; bestX = bm.bestX; bestY = bm.bestY; bestQ.assign(bm.bestQ, bm.bestQ + bm.dim); } } } } ++it; }
}
struct BestPacket {
float bestF; int dim; float bestX; float bestY;
template<typename Archive> void serialize(Archive& ar, const unsigned int) { ar& bestF& dim& bestX& bestY; }
};
// ---------------- ВНЕШНИЕ ФУНКЦИИ DLL ----------------
// Главная ND оптимизация с новыми границами углов/длин
extern "C" __declspec(dllexport) __declspec(noalias)
void AGP_Manip2D(int nSegments, bool variableLengths, float minTheta, float targetX, float targetY,
int peanoLevels, int maxIterPerBranch, float r, bool adaptiveMode, float epsilon,
unsigned int seed, float** out_bestQ, size_t* out_bestQLen, float* out_bestX,
float* out_bestY, float* out_bestF)
{
const int dim = nSegments + (variableLengths ? nSegments : 0);
text// Перестановка/инверсии (как было) g_mc.permCache.resize(dim); for (int i = 0; i < dim; ++i) g_mc.permCache[i] = i; uint32_t s = g_mc.baseSeed; for (int i = dim - 1; i > 0; --i) { s ^= s << 13; s ^= s >> 17; s ^= s << 5; uint32_t j = s % (uint32_t)(i + 1); std::swap(g_mc.permCache[i], g_mc.permCache[j]); } g_mc.invMaskCache.resize(dim); for (int k = 0; k < dim; ++k) { s ^= s << 13; s ^= s >> 17; s ^= s << 5; g_mc.invMaskCache[k] = (uint64_t)s; } // НОВЫЕ ГРАНИЦЫ: // углы: θ0 ∈ [-60°, +150°], θi>0 ∈ [-150°, +150°]; длины ∈ [0.5, 2.0] const float deg2rad = 3.14159265358979323846f / 180.0f; const float theta0Min = -60.0f * deg2rad, theta0Max = 150.0f * deg2rad; const float thetaMin = -150.0f * deg2rad, thetaMax = 150.0f * deg2rad; const float lenMin = 0.5f, lenMax = 2.0f; std::vector<float> low; low.reserve(dim); std::vector<float> high; high.reserve(dim); for (int i = 0; i < nSegments; ++i) { low.push_back(i == 0 ? theta0Min : thetaMin); high.push_back(i == 0 ? theta0Max : thetaMax); } if (variableLengths) { for (int i = 0; i < nSegments; ++i) { low.push_back(lenMin); high.push_back(lenMax); } } ManipCost cost(nSegments, variableLengths, targetX, targetY, minTheta); const int rank = g_world->rank(), world = g_world->size(); std::vector<float> bestQ; float bestF = FLT_MAX, bx = 0.0f, by = 0.0f; const int levels0 = (std::min)(peanoLevels, 8); const int maxIter0 = (int)(maxIterPerBranch * 0.2f); MortonND map0(dim, levels0, low.data(), high.data(), g_mc); std::vector<IntervalND*> H_coarse; std::vector<float> bestQ_coarse; float bestF_coarse = FLT_MAX, bx_coarse = 0.0f, by_coarse = 0.0f; float M_prior = (variableLengths ? 2.0f * nSegments : 2.0f * nSegments) * (1.0f / (float)(1u << levels0)); if (variableLengths) M_prior += 1.41421356237f * (1.0f / (float)(1u << levels0)); agp_run_branch_mpi(map0, cost, maxIter0, r, adaptiveMode, epsilon, seed, H_coarse, bestQ_coarse, bestF_coarse, bx_coarse, by_coarse, M_prior); if (bestF_coarse < bestF) { bestF = bestF_coarse; bestQ = bestQ_coarse; bx = bx_coarse; by = by_coarse; } if (levels0 < peanoLevels) { MortonND map1(dim, peanoLevels, low.data(), high.data(), g_mc); std::vector<IntervalND*> H_fine; std::vector<float> bestQ_fine = bestQ; float bestF_fine = bestF, bx_fine = bx, by_fine = by; float M_prior_fine = (variableLengths ? 2.0f * nSegments : 2.0f * nSegments) * (1.0f / (float)(1u << peanoLevels)); if (variableLengths) M_prior_fine += 1.41421356237f * (1.0f / (float)(1u << peanoLevels)); if (!H_coarse.empty()) { std::sort(H_coarse.begin(), H_coarse.end(), [](const IntervalND* a, const IntervalND* b) { return a->R < b->R; }); const size_t topCount = (size_t)(H_coarse.size() * 0.3f); auto t_to_idx_fine = [&](float t) -> uint64_t { float tt = (t <= 0.0f) ? 0.0f : (t >= 1.0f ? std::nextafterf(1.0f, 0.0f) : t); uint64_t idx = (uint64_t)((double)tt * (double)map1.scale); if (idx >= map1.scale) idx = map1.scale - 1ull; return idx; }; for (size_t i = 0; i < topCount && i < H_coarse.size(); ++i) { const IntervalND* C = H_coarse[i]; alignas(64) float q1[32], q2[32]; float x1, y1, x2, y2; map1.map01ToPoint(C->x1, q1); float f1 = cost(q1, x1, y1); map1.map01ToPoint(C->x2, q2); float f2 = cost(q2, x2, y2); IntervalND* I = new IntervalND(C->x1, C->x2, f1, f2); I->i1 = t_to_idx_fine(C->x1); I->i2 = t_to_idx_fine(C->x2); I->diam = map1.block_diameter(I->i1, I->i2); I->set_metric(I->diam); H_fine.push_back(I); if (f1 < bestF_fine) { bestF_fine = f1; bestQ_fine.assign(q1, q1 + dim); bx_fine = x1; by_fine = y1; } if (f2 < bestF_fine) { bestF_fine = f2; bestQ_fine.assign(q2, q2 + dim); bx_fine = x2; by_fine = y2; } } std::make_heap(H_fine.begin(), H_fine.end(), ComparePtrND); } agp_run_branch_mpi(map1, cost, maxIterPerBranch - maxIter0, r, adaptiveMode, epsilon, seed, H_fine, bestQ_fine, bestF_fine, bx_fine, by_fine, M_prior_fine); if (bestF_fine < bestF) { bestF = bestF_fine; bestQ = bestQ_fine; bx = bx_fine; by = by_fine; } } BestPacket me{ bestF, dim, bx, by }; if (!rank) { std::vector<float> winnerQ = bestQ; float winF = bestF, wx = bx, wy = by; for (int i = 1; i < world; ++i) { BestPacket bp; g_world->recv(i, 2, bp); std::vector<float> qin; g_world->recv(i, 3, qin); if (bp.bestF < winF) { winF = bp.bestF; wx = bp.bestX; wy = bp.bestY; winnerQ = qin; } } *out_bestQLen = winnerQ.size(); *out_bestQ = (float*)CoTaskMemAlloc(sizeof(float) * (*out_bestQLen)); memcpy(*out_bestQ, winnerQ.data(), sizeof(float) * (*out_bestQLen)); *out_bestX = wx; *out_bestY = wy; *out_bestF = winF; } else { g_world->send(0, 2, me); g_world->send(0, 3, bestQ); }
}
extern "C" __declspec(dllexport) __declspec(noalias) __forceinline int AgpInit(int peanoLevel, float a, float b, float c, float d) {
g_env = new boost::mpi::environment();
g_world = new boost::mpi::communicator();
_MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_ON);
_MM_SET_DENORMALS_ZERO_MODE(_MM_DENORMALS_ZERO_ON);
const int rank = g_world->rank();
const int world_size = g_world->size();
if (world_size == 4) { new (&gActiveMap) Peano2DMap(peanoLevel, a, b, c, d, rank & 3); }
g_mc.baseSeed = fmaf(0x9E3779B9u, rank, 0x9E3779B9u);
return rank;
}
// 1D/2D тестовые функции/обёртки — без функциональных изменений (оставлены как были)
__forceinline float ShekelFunc(float x, float seed) {
int i = 0; float st = seed, r1, r2, res = 0.0f;
while (i < 10) {
XOR_RAND(st, r1); float xp = fmaf(-r1, 10.0f, x); XOR_RAND(st, r1); XOR_RAND(st, r2);
float d = fmaf(fmaf(r1, 20.0f, 5.0f), xp * xp, fmaf(r2, 0.2f, 1.0f)); d = copysignf(fmaxf(fabsf(d), FLT_MIN), d); res -= 1.0f / d; ++i;
}
return res;
}
__forceinline float RastriginFunc(float x1, float x2) {
const float t = fmaf(x1, x1, x2 * x2); float c1, c2;
FABE13_COS(6.28318530717958647692f * x1, c1); FABE13_COS(6.28318530717958647692f * x2, c2);
return (t - fmaf(c1 + c2, 10.0f, -14.6f)) * fmaf(-t, 0.25f, 18.42f);
}
__forceinline float HillFunc(float x, float seed) {
int j = 0; __declspec(align(32)) float ang[14u];
float st = 6.28318530717958647692f * x; while (j < 14) { ang[j] = st * (float)(j + 1); ++j; }
__declspec(align(32)) float sv[14u], cv[14u]; FABE13_SINCOS(ang, sv, cv, 14u);
float state = seed, r1, r2; XOR_RAND(state, r1); float res = fmaf(r1, 2.0f, -1.1f); --j;
while (j >= 0) { XOR_RAND(state, r1); XOR_RAND(state, r2); res += fmaf(fmaf(r1, 2.0f, -1.1f), sv[j], fmaf(r2, 2.0f, -1.1f) * cv[j]); --j; }
return res;
}
__forceinline float GrishaginFunc(float x1, float x2, float seed) {
int j = 0; __declspec(align(32)) float aj[8u], ak[8u];
while (j < 8) { float pj = 3.14159265358979323846f * (float)(j + 1); aj[j] = pj * x1; ak[j] = pj * x2; ++j; }
__declspec(align(32)) float sj[8u], cj[8u], sk[8u], ck[8u]; FABE13_SINCOS(aj, sj, cj, 8u); FABE13_SINCOS(ak, sk, ck, 8u);
--j; float p1 = 0.0f, p2 = 0.0f; float st = seed, r1, r2;
while (j >= 0) {
size_t k = 0u; while (k < 8u) {
float s = sj[j] * sj[j]; float c = ck[k] * ck[k];
XOR_RAND_GRSH(st, r1); XOR_RAND_GRSH(st, r2); p1 = fmaf(r1, s, fmaf(r2, c, p1));
XOR_RAND_GRSH(st, r1); XOR_RAND_GRSH(st, r2); p2 = fmaf(-r1, c, fmaf(r2, s, p2)); ++k;
}
--j;
}
return -sqrtf(fmaf(p1, p1, p2 * p2));
}
// Тестовые 1D/2D API оставлены без поведенческих изменений
extern "C" __declspec(dllexport) __declspec(noalias)
void AGP_1D(float global_iterations, float a, float b, float r, bool mode, float epsilon, float seed,
float** out_data, size_t* out_len) {
Slab* slab = tls.local(); slab->current = slab->base; int schetchick = 0;
const float initial_length = b - a; float dmax = initial_length;
const float threshold_03 = 0.3f * initial_length, inv_threshold_03 = 1.0f / threshold_03;
const float start_val = ShekelFunc(a, seed); float best_f = ShekelFunc(b, seed);
float x_Rmax_1 = a, x_Rmax_2 = b; float y_Rmax_1 = start_val, y_Rmax_2 = best_f;
std::vector<float, boost::alignment::aligned_allocator<float, 16u>> Extr;
std::vector<Interval1D*, boost::alignment::aligned_allocator<Interval1D*, 64u>> R;
Extr.reserve((size_t)global_iterations << 2u); R.reserve((size_t)global_iterations << 1u);
R.emplace_back(new Interval1D(a, b, start_val, best_f, 1.0f)); float Mmax = R.front()->M; float m = r * Mmax;
textwhile (true) { const float new_point = Shag(m, x_Rmax_1, x_Rmax_2, y_Rmax_1, y_Rmax_2, 1.0f, r); const float new_value = ShekelFunc(new_point, seed); if (new_value < best_f) { best_f = new_value; Extr.emplace_back(best_f); Extr.emplace_back(new_point); } std::pop_heap(R.begin(), R.end(), ComparePtr1D); const Interval1D* pro = R.back(); const float new_x1 = pro->x1, new_x2 = pro->x2; const float len2 = new_x2 - new_point, len1 = new_point - new_x1; const float interval_len = (len1 < len2 ? len1 : len2); if (++schetchick == (int)global_iterations || interval_len < epsilon) { Extr.emplace_back((float)schetchick); Extr.emplace_back(interval_len); *out_len = Extr.size(); *out_data = (float*)CoTaskMemAlloc(sizeof(float) * (*out_len)); memcpy(*out_data, Extr.data(), sizeof(float) * (*out_len)); return; } Interval1D* curr = new Interval1D(new_x1, new_point, pro->y1, new_value, 1.0f); Interval1D* curr1 = new Interval1D(new_point, new_x2, new_value, pro->y2, 1.0f); const float currM = curr->M > curr1->M ? curr->M : curr1->M; const size_t r_size = R.size(); if (mode) { if (len2 + len1 == dmax) { dmax = len2 > len1 ? len2 : len1; for (auto p : R) { float L = p->x2 - p->x1; if (L > dmax) dmax = L; } } if (threshold_03 > dmax && !(schetchick % 3) || 10.0f * dmax < initial_length) { if (currM > Mmax) { Mmax = currM; m = r * Mmax; } const float progress = fmaf(-inv_threshold_03, dmax, 1.0f); const float alpha = progress * progress; const float betta = 2.0f - alpha; const float MULT = (1.0f / dmax) * Mmax; const float global_coeff = fmaf(MULT, r, -MULT); const float GF = betta * global_coeff; curr->ChangeCharacteristic(fmaf(GF, len1, curr->M * alpha)); curr1->ChangeCharacteristic(fmaf(GF, len2, curr1->M * alpha)); RecomputeR_AffineM_AVX2_1D(R.data(), r_size, GF, alpha); std::make_heap(R.begin(), R.end(), ComparePtr1D); } else { if (currM > Mmax) { if (currM < 1.15f * Mmax) { Mmax = currM; m = r * Mmax; curr->ChangeCharacteristic(m); curr1->ChangeCharacteristic(m); } else { Mmax = currM; m = r * Mmax; curr->ChangeCharacteristic(m); curr1->ChangeCharacteristic(m); RecomputeR_ConstM_AVX2_1D(R.data(), r_size, m); std::make_heap(R.begin(), R.end(), ComparePtr1D); } } else { curr->ChangeCharacteristic(m); curr1->ChangeCharacteristic(m); } } } else { if (currM > Mmax) { if (currM < 1.15f * Mmax) { Mmax = currM; m = r * Mmax; curr->ChangeCharacteristic(m); curr1->ChangeCharacteristic(m); } else { Mmax = currM; m = r * Mmax; curr->ChangeCharacteristic(m); curr1->ChangeCharacteristic(m); RecomputeR_ConstM_AVX2_1D(R.data(), r_size, m); std::make_heap(R.begin(), R.end(), ComparePtr1D); } } else { curr->ChangeCharacteristic(m); curr1->ChangeCharacteristic(m); } } R.back() = curr; std::push_heap(R.begin(), R.end(), ComparePtr1D); R.emplace_back(curr1); std::push_heap(R.begin(), R.end(), ComparePtr1D); const Interval1D* top = R.front(); x_Rmax_1 = top->x1; x_Rmax_2 = top->x2; y_Rmax_1 = top->y1; y_Rmax_2 = top->y2; }
}
extern "C" __declspec(dllexport) __declspec(noalias)
void AGP_2D(const float N, const float global_iterations, const float a, const float b, const float c,
const float d, const float r, const bool mode, const float epsilon, const float seed,
float** const __restrict out_data, size_t* const __restrict out_len) noexcept
{
Slab* const __restrict slab = tls.local(); slab->current = slab->base;
int schetchick = 0, no_improve = 0; const int rank = g_world->rank(); const int world_size = g_world->size();
while (g_world->iprobe(boost::mpi::any_source, 0)) { CtrlMsg dummy; g_world->recv(boost::mpi::any_source, 0, dummy); }
const float inv_divider = ldexpf(1.0f, -((gActiveMap.levels << 1) + 1));
const float x_addition = (b - a) * inv_divider, y_addition = (d - c) * inv_divider;
const float true_start = a + x_addition, true_end = b - x_addition;
float x_Rmax_1 = true_start, x_Rmax_2 = true_end;
const float initial_length = x_Rmax_2 - x_Rmax_1; float dmax = initial_length;
const float threshold_03 = 0.3f * initial_length, inv_threshold_03 = 1.0f / threshold_03;
const float start_val = rank % 3 ? RastriginFunc(true_end, d - y_addition) : RastriginFunc(true_start, c + y_addition);
float best_f = rank % 2 ? RastriginFunc(true_start, d - y_addition) : RastriginFunc(true_end, c + y_addition);
float y_Rmax_1 = start_val, y_Rmax_2 = best_f;
std::vector<float, boost::alignment::aligned_allocator<float, 16u>> Extr;
std::vector<Interval1D* __restrict, boost::alignment::aligned_allocator<Interval1D* __restrict, 64u>> R;
Extr.clear(); Extr.reserve(static_cast<size_t>(global_iterations) << 2u);
R.clear(); R.reserve(static_cast<size_t>(global_iterations) << 1u);
R.emplace_back(new Interval1D(true_start, true_end, start_val, best_f, 2.0f));
const Interval1D* __restrict top_ptr;
float Mmax = R.front()->M, m = r * Mmax;
while (true) {
const float interval_len = x_Rmax_2 - x_Rmax_1;
const bool stagnation = no_improve > 100 && schetchick > 270;
const float p = fmaf(-1.0f / initial_length, dmax, 1.0f);
while (g_world->iprobe(boost::mpi::any_source, 0)) {
CtrlMsg in; g_world->recv(boost::mpi::any_source, 0, in);
if (in.kind) {
if (!rank) {
Extr.emplace_back((float)schetchick); Extr.emplace_back(interval_len); *out_len = Extr.size();
out_data = reinterpret_cast<float __restrict>(CoTaskMemAlloc(sizeof(float) * (*out_len)));
memcpy(*out_data, Extr.data(), sizeof(float) * (out_len));
}
return;
}
const float sx = FindX2D_analytic(in.xchg.s_x1, in.xchg.s_x2);
const float ex = FindX2D_analytic(in.xchg.e_x1, in.xchg.e_x2);
Interval1D const __restrict injected = new Interval1D(sx, ex,
RastriginFunc(in.xchg.s_x1, in.xchg.s_x2), RastriginFunc(in.xchg.e_x1, in.xchg.e_x2), 2.0f);
injected->ChangeCharacteristic(m);
if (injected->R > 1.15f * top_ptr->R) {
const float k = stagnation ? fmaf(0.5819767068693265f, expm1f(p), 0.3f) : fmaf(0.3491860241215959f, expm1f(p), 0.6f);
injected->R = in.xchg.Rtop * k;
R.emplace_back(injected); std::push_heap(R.begin(), R.end(), ComparePtr1D);
}
}
// исходная формула T оставлена (это тестовая 2D), менять здесь не обязательно
const int T = (int)fmaf(-expm1f(p), 264.0f, 277.0f);
const bool want_term = interval_len < epsilon || schetchick == static_cast<int>(global_iterations);
if (!(++schetchick % T) || stagnation || want_term) {
CtrlMsg out; out.kind = want_term;
if (!out.kind) {
float s_x1, s_x2, e_x1, e_x2; HitTest2D_analytic(top_ptr->x1, s_x1, s_x2); HitTest2D_analytic(top_ptr->x2, e_x1, e_x2);
out.xchg = CrossMsg{ s_x1, s_x2, e_x1, e_x2, top_ptr->R };
}
for (int i = 0; i < world_size; ++i) if (i != rank) g_world->isend(i, 0, out);
if (out.kind) {
if (!rank) {
Extr.emplace_back((float)schetchick); Extr.emplace_back(interval_len); *out_len = Extr.size();
out_data = reinterpret_cast<float __restrict>(CoTaskMemAlloc(sizeof(float) * (out_len)));
memcpy(out_data, Extr.data(), sizeof(float) * (out_len));
}
return;
}
}
const float new_point = Shag(m, x_Rmax_1, x_Rmax_2, y_Rmax_1, y_Rmax_2, 2.0f, r);
float new_x1_val, new_x2_val; HitTest2D_analytic(new_point, new_x1_val, new_x2_val);
const float new_value = RastriginFunc(new_x1_val, new_x2_val);
if (new_value < best_f) { best_f = new_value; Extr.emplace_back(best_f); Extr.emplace_back(new_x1_val); Extr.emplace_back(new_x2_val); no_improve = 0; }
else { ++no_improve; }
std::pop_heap(R.begin(), R.end(), ComparePtr1D);
Interval1D const __restrict promej = R.back();
const float segment_x1 = promej->x1, segment_x2 = promej->x2;
const float len2 = segment_x2 - new_point, len1 = new_point - segment_x1;
Interval1D const __restrict curr = new Interval1D(segment_x1, new_point, promej->y1, new_value, 2.0f);
Interval1D const __restrict curr1 = new Interval1D(new_point, segment_x2, new_value, promej->y2, 2.0f);
const float currM = (std::max)(curr->M, curr1->M); const size_t r_size = R.size();
if (mode) {
if (len2 + len1 == dmax) { dmax = (std::max)(len1, len2); for (auto pI : R) { float L = pI->x2 - pI->x1; if (L > dmax) dmax = L; } }
if (threshold_03 > dmax && !(schetchick % 3) || 10.0f * dmax < initial_length) {
if (currM > Mmax) { Mmax = currM; m = r * Mmax; }
const float progress = fmaf(-inv_threshold_03, dmax, 1.0f);
const float alpha = progress * progress; const float betta = 2.0f - alpha;
const float MULTIPLIER = (1.0f / dmax) * Mmax;
const float global_coeff = fmaf(MULTIPLIER, r, -MULTIPLIER);
const float GLOBAL_FACTOR = betta * global_coeff;
curr->ChangeCharacteristic(fmaf(GLOBAL_FACTOR, len1, curr->M * alpha));
curr1->ChangeCharacteristic(fmaf(GLOBAL_FACTOR, len2, curr1->M * alpha));
RecomputeR_AffineM_AVX2_1D(R.data(), r_size, GLOBAL_FACTOR, alpha);
std::make_heap(R.begin(), R.end(), ComparePtr1D);
}
else {
if (currM > Mmax) {
if (currM < 1.15f * Mmax) { Mmax = currM; m = r * Mmax; curr->ChangeCharacteristic(m); curr1->ChangeCharacteristic(m); }
else {
Mmax = currM; m = r * Mmax; curr->ChangeCharacteristic(m); curr1->ChangeCharacteristic(m);
RecomputeR_ConstM_AVX2_1D(R.data(), r_size, m); std::make_heap(R.begin(), R.end(), ComparePtr1D);
}
}
else { curr->ChangeCharacteristic(m); curr1->ChangeCharacteristic(m); }
}
}
else {
if (currM > Mmax) {
if (currM < 1.15f * Mmax) { Mmax = currM; m = r * Mmax; curr->ChangeCharacteristic(m); curr1->ChangeCharacteristic(m); }
else {
Mmax = currM; m = r * Mmax; curr->ChangeCharacteristic(m); curr1->ChangeCharacteristic(m);
RecomputeR_ConstM_AVX2_1D(R.data(), r_size, m); std::make_heap(R.begin(), R.end(), ComparePtr1D);
}
}
else { curr->ChangeCharacteristic(m); curr1->ChangeCharacteristic(m); }
}
R.back() = curr; std::push_heap(R.begin(), R.end(), ComparePtr1D);
R.emplace_back(curr1); std::push_heap(R.begin(), R.end(), ComparePtr1D);
top_ptr = R.front(); x_Rmax_1 = top_ptr->x1; x_Rmax_2 = top_ptr->x2; y_Rmax_1 = top_ptr->y1; y_Rmax_2 = top_ptr->y2;
}
}
struct RunParams {
int nSegments; uint8_t varLen; float minTheta; float tx, ty; int levels, maxIter; float r; uint8_t adaptive; float eps; unsigned int seed;
template<typename Archive> void serialize(Archive& ar, const unsigned int) { ar& nSegments& varLen& minTheta& tx& ty& levels& maxIter& r& adaptive& eps& seed; }
};
extern "C" __declspec(dllexport) __declspec(noalias)
void AgpStartManipND(int nSegments, bool variableLengths, float minTheta, float targetX, float targetY,
int peanoLevels, int maxIterPerBranch, float r, bool adaptiveMode, float epsilon, unsigned int seed) {
RunParams p; p.nSegments = nSegments; p.varLen = (uint8_t)variableLengths; p.minTheta = minTheta; p.tx = targetX; p.ty = targetY;
p.levels = peanoLevels; p.maxIter = maxIterPerBranch; p.r = r; p.adaptive = (uint8_t)adaptiveMode; p.eps = epsilon; p.seed = seed;
const int rank = g_world->rank(); const int world = g_world->size();
for (int i = 1; i < world; ++i) if (i != rank) g_world->isend(i, 1, p);
}
extern "C" __declspec(dllexport) __declspec(noalias) __forceinline void AgpWaitStartAndRun() {
while (true) {
if (g_world->iprobe(0, 1)) {
RunParams p; g_world->recv(0, 1, p);
float* q = nullptr; size_t qlen = 0; float bx = 0, by = 0, bf = 0;
AGP_Manip2D(p.nSegments, (bool)p.varLen, p.minTheta, p.tx, p.ty, p.levels, p.maxIter, p.r, (bool)p.adaptive, p.eps, p.seed, &q, &qlen, &bx, &by, &bf);
if (q) CoTaskMemFree(q);
}
Sleep(0);
}
}
extern "C" __declspec(dllexport) __declspec(noalias) void AgpStartWorkers() {
int i = 1; const int world = g_world->size();
while (i < world) { g_world->isend(i, 1, 0); ++i; }
}
extern "C" __declspec(dllexport) __declspec(noalias) __forceinline void AGP_Free(float* p) { CoTaskMemFree(p); } dllmain.cpp: #include "pch.h"
BOOL APIENTRY DllMain(HMODULE hModule, DWORD ul_reason_for_call, LPVOID lpReserved) {
if (ul_reason_for_call == DLL_PROCESS_ATTACH) {
DisableThreadLibraryCalls(hModule);
}
return TRUE;
} - обрати внимание dll используется в управляемом коде, внимательно проанализируй весь код на предмет утечек памяти и опасного доступа к памяти когда память повреждена, сейчас при запуске приложения я иногда наблюдаю картину что я нажимаю на кнопку а картинка ещё долгое время после даже иногда многих нажатий не обновляется - ощущение что в этот момент зависания и происходит освобождение памяти или подготовка памяти - в общем что-то с памятью - так как потом после нескольких нажатий на кнопку картинка всё же обновляется, ощущение что где-то происходит неаккуратное обращение с памятью, иногда после многих а иногда после нескольких нажатий я получаю вывод: C:\Users\maxim>mpiexec -n 6 D:\TEST_AGP\x64\Release\TEST_AGP.exe
Необработанное исключение: System.AccessViolationException: Попытка чтения или записи в защищенную память. Это часто свидетельствует о том, что другая память повреждена.
в TESTAGP.MyForm.OnOptimizeClick(Object sender, EventArgs e) в D:\TEST_AGP\MyForm.h:строка 229
в System.Windows.Forms.Control.OnClick(EventArgs e)
в System.Windows.Forms.Button.OnMouseUp(MouseEventArgs mevent)
в System.Windows.Forms.Control.WmMouseUp(Message& m, MouseButtons button, Int32 clicks)
в System.Windows.Forms.Control.WndProc(Message& m)
в System.Windows.Forms.ButtonBase.WndProc(Message& m)
в System.Windows.Forms.Button.WndProc(Message& m)
в System.Windows.Forms.NativeWindow.Callback(IntPtr hWnd, Int32 msg, IntPtr wparam, IntPtr lparam)
в System.Windows.Forms.UnsafeNativeMethods.DispatchMessageW(MSG& msg)
в System.Windows.Forms.Application.ComponentManager.System.Windows.Forms.UnsafeNativeMethods.IMsoComponentManager.FPushMessageLoop(IntPtr dwComponentID, Int32 reason, Int32 pvLoopData)
в System.Windows.Forms.Application.ThreadContext.RunMessageLoopInner(Int32 reason, ApplicationContext context)
в System.Windows.Forms.Application.ThreadContext.RunMessageLoop(Int32 reason, ApplicationContext context)
в main() в D:\TEST_AGP\MyForm.cpp:строка 18
в mainCRTStartup()
job aborted:
[ranks] message
[0] process exited without calling finalize
[1-5] terminated
---- error analysis -----
[0] on DESKTOP-C1VT6JF
D:\TEST_AGP\x64\Release\TEST_AGP.exe ended prematurely and may have crashed. exit code 0xc0000005
---- error analysis -----
C:\Users\maxim>mpiexec -n 6 D:\TEST_AGP\x64\Release\TEST_AGP.exe
Необработанное исключение: System.AccessViolationException: Попытка чтения или записи в защищенную память. Это часто свидетельствует о том, что другая память повреждена.
в TESTAGP.MyForm.OnOptimizeClick(Object sender, EventArgs e) в D:\TEST_AGP\MyForm.h:строка 229
в System.Windows.Forms.Control.OnClick(EventArgs e)
в System.Windows.Forms.Button.OnMouseUp(MouseEventArgs mevent)
в System.Windows.Forms.Control.WmMouseUp(Message& m, MouseButtons button, Int32 clicks)
в System.Windows.Forms.Control.WndProc(Message& m)
в System.Windows.Forms.ButtonBase.WndProc(Message& m)
в System.Windows.Forms.Button.WndProc(Message& m)
в System.Windows.Forms.NativeWindow.Callback(IntPtr hWnd, Int32 msg, IntPtr wparam, IntPtr lparam)
в System.Windows.Forms.UnsafeNativeMethods.DispatchMessageW(MSG& msg)
в System.Windows.Forms.Application.ComponentManager.System.Windows.Forms.UnsafeNativeMethods.IMsoComponentManager.FPushMessageLoop(IntPtr dwComponentID, Int32 reason, Int32 pvLoopData)
в System.Windows.Forms.Application.ThreadContext.RunMessageLoopInner(Int32 reason, ApplicationContext context)
в System.Windows.Forms.Application.ThreadContext.RunMessageLoop(Int32 reason, ApplicationContext context)
в main() в D:\TEST_AGP\MyForm.cpp:строка 18
в mainCRTStartup()
job aborted:
[ranks] message
[0] process exited without calling finalize
[1-5] terminated
---- error analysis -----
[0] on DESKTOP-C1VT6JF
D:\TEST_AGP\x64\Release\TEST_AGP.exe ended prematurely and may have crashed. exit code 0xc0000005
---- error analysis -----
C:\Users\maxim>mpiexec -n 6 D:\TEST_AGP\x64\Release\TEST_AGP.exe
Необработанное исключение: System.AccessViolationException: Попытка чтения или записи в защищенную память. Это часто свидетельствует о том, что другая память повреждена.
в TESTAGP.MyForm.OnOptimizeClick(Object sender, EventArgs e) в D:\TEST_AGP\MyForm.h:строка 229
в System.Windows.Forms.Control.OnClick(EventArgs e)
в System.Windows.Forms.Button.OnMouseUp(MouseEventArgs mevent)
в System.Windows.Forms.Control.WmMouseUp(Message& m, MouseButtons button, Int32 clicks)
в System.Windows.Forms.Control.WndProc(Message& m)
в System.Windows.Forms.ButtonBase.WndProc(Message& m)
в System.Windows.Forms.Button.WndProc(Message& m)
в System.Windows.Forms.NativeWindow.Callback(IntPtr hWnd, Int32 msg, IntPtr wparam, IntPtr lparam)
в System.Windows.Forms.UnsafeNativeMethods.DispatchMessageW(MSG& msg)
в System.Windows.Forms.Application.ComponentManager.System.Windows.Forms.UnsafeNativeMethods.IMsoComponentManager.FPushMessageLoop(IntPtr dwComponentID, Int32 reason, Int32 pvLoopData)
в System.Windows.Forms.Application.ThreadContext.RunMessageLoopInner(Int32 reason, ApplicationContext context)
в System.Windows.Forms.Application.ThreadContext.RunMessageLoop(Int32 reason, ApplicationContext context)
в main() в D:\TEST_AGP\MyForm.cpp:строка 18
в mainCRTStartup()
job aborted:
[ranks] message
[0] process exited without calling finalize
[1-5] terminated
---- error analysis -----
[0] on DESKTOP-C1VT6JF
D:\TEST_AGP\x64\Release\TEST_AGP.exe ended prematurely and may have crashed. exit code 0xc0000005
---- error analysis -----
C:\Users\maxim>mpiexec -n 6 D:\TEST_AGP\x64\Release\TEST_AGP.exe
Необработанное исключение: System.AccessViolationException: Попытка чтения или записи в защищенную память. Это часто свидетельствует о том, что другая память повреждена.
в TESTAGP.MyForm.OnOptimizeClick(Object sender, EventArgs e) в D:\TEST_AGP\MyForm.h:строка 229
в System.Windows.Forms.Control.OnClick(EventArgs e)
в System.Windows.Forms.Button.OnMouseUp(MouseEventArgs mevent)
в System.Windows.Forms.Control.WmMouseUp(Message& m, MouseButtons button, Int32 clicks)
в System.Windows.Forms.Control.WndProc(Message& m)
в System.Windows.Forms.ButtonBase.WndProc(Message& m)
в System.Windows.Forms.Button.WndProc(Message& m)
в System.Windows.Forms.NativeWindow.Callback(IntPtr hWnd, Int32 msg, IntPtr wparam, IntPtr lparam)
в System.Windows.Forms.UnsafeNativeMethods.DispatchMessageW(MSG& msg)
в System.Windows.Forms.Application.ComponentManager.System.Windows.Forms.UnsafeNativeMethods.IMsoComponentManager.FPushMessageLoop(IntPtr dwComponentID, Int32 reason, Int32 pvLoopData)
в System.Windows.Forms.Application.ThreadContext.RunMessageLoopInner(Int32 reason, ApplicationContext context)
в System.Windows.Forms.Application.ThreadContext.RunMessageLoop(Int32 reason, ApplicationContext context)
в main() в D:\TEST_AGP\MyForm.cpp:строка 18
в mainCRTStartup()
job aborted:
[ranks] message
[0] process exited without calling finalize
[1-5] terminated
---- error analysis -----
[0] on DESKTOP-C1VT6JF
D:\TEST_AGP\x64\Release\TEST_AGP.exe ended prematurely and may have crashed. exit code 0xc0000005
---- error analysis -----
C:\Users\maxim>mpiexec -n 6 D:\TEST_AGP\x64\Release\TEST_AGP.exe
Необработанное исключение: System.AccessViolationException: Попытка чтения или записи в защищенную память. Это часто свидетельствует о том, что другая память повреждена.
в TESTAGP.MyForm.OnOptimizeClick(Object sender, EventArgs e) в D:\TEST_AGP\MyForm.h:строка 229
в System.Windows.Forms.Control.OnClick(EventArgs e)
в System.Windows.Forms.Button.OnMouseUp(MouseEventArgs mevent)
в System.Windows.Forms.Control.WmMouseUp(Message& m, MouseButtons button, Int32 clicks)
в System.Windows.Forms.Control.WndProc(Message& m)
в System.Windows.Forms.ButtonBase.WndProc(Message& m)
в System.Windows.Forms.Button.WndProc(Message& m)
в System.Windows.Forms.NativeWindow.Callback(IntPtr hWnd, Int32 msg, IntPtr wparam, IntPtr lparam)
в System.Windows.Forms.UnsafeNativeMethods.DispatchMessageW(MSG& msg)
в System.Windows.Forms.Application.ComponentManager.System.Windows.Forms.UnsafeNativeMethods.IMsoComponentManager.FPushMessageLoop(IntPtr dwComponentID, Int32 reason, Int32 pvLoopData)
в System.Windows.Forms.Application.ThreadContext.RunMessageLoopInner(Int32 reason, ApplicationContext context)
в System.Windows.Forms.Application.ThreadContext.RunMessageLoop(Int32 reason, ApplicationContext context)
в main() в D:\TEST_AGP\MyForm.cpp:строка 18
в mainCRTStartup()
job aborted:
[ranks] message
[0] process exited without calling finalize
[1-5] terminated
---- error analysis -----
[0] on DESKTOP-C1VT6JF
D:\TEST_AGP\x64\Release\TEST_AGP.exe ended prematurely and may have crashed. exit code 0xc0000005
---- error analysis -----
C:\Users\maxim>mpiexec -n 6 D:\TEST_AGP\x64\Release\TEST_AGP.exe
Необработанное исключение: System.AccessViolationException: Попытка чтения или записи в защищенную память. Это часто свидетельствует о том, что другая память повреждена.
в TESTAGP.MyForm.OnOptimizeClick(Object sender, EventArgs e) в D:\TEST_AGP\MyForm.h:строка 229
в System.Windows.Forms.Control.OnClick(EventArgs e)
в System.Windows.Forms.Button.OnMouseUp(MouseEventArgs mevent)
в System.Windows.Forms.Control.WmMouseUp(Message& m, MouseButtons button, Int32 clicks)
в System.Windows.Forms.Control.WndProc(Message& m)
в System.Windows.Forms.ButtonBase.WndProc(Message& m)
в System.Windows.Forms.Button.WndProc(Message& m)
в System.Windows.Forms.NativeWindow.Callback(IntPtr hWnd, Int32 msg, IntPtr wparam, IntPtr lparam)
в System.Windows.Forms.UnsafeNativeMethods.DispatchMessageW(MSG& msg)
в System.Windows.Forms.Application.ComponentManager.System.Windows.Forms.UnsafeNativeMethods.IMsoComponentManager.FPushMessageLoop(IntPtr dwComponentID, Int32 reason, Int32 pvLoopData)
в System.Windows.Forms.Application.ThreadContext.RunMessageLoopInner(Int32 reason, ApplicationContext context)
в System.Windows.Forms.Application.ThreadContext.RunMessageLoop(Int32 reason, ApplicationContext context)
в main() в D:\TEST_AGP\MyForm.cpp:строка 18
в mainCRTStartup()
job aborted:
[ranks] message
[0] process exited without calling finalize
[1-5] terminated
---- error analysis -----
[0] on DESKTOP-C1VT6JF
D:\TEST_AGP\x64\Release\TEST_AGP.exe ended prematurely and may have crashed. exit code 0xc0000005
---- error analysis -----
C:\Users\maxim>mpiexec -n 6 D:\TEST_AGP\x64\Release\TEST_AGP.exe
Необработанное исключение: System.AccessViolationException: Попытка чтения или записи в защищенную память. Это часто свидетельствует о том, что другая память повреждена.
в TESTAGP.MyForm.OnOptimizeClick(Object sender, EventArgs e) в D:\TEST_AGP\MyForm.h:строка 229
в System.Windows.Forms.Control.OnClick(EventArgs e)
в System.Windows.Forms.Button.OnMouseUp(MouseEventArgs mevent)
в System.Windows.Forms.Control.WmMouseUp(Message& m, MouseButtons button, Int32 clicks)
в System.Windows.Forms.Control.WndProc(Message& m)
в System.Windows.Forms.ButtonBase.WndProc(Message& m)
в System.Windows.Forms.Button.WndProc(Message& m)
в System.Windows.Forms.NativeWindow.Callback(IntPtr hWnd, Int32 msg, IntPtr wparam, IntPtr lparam)
в System.Windows.Forms.UnsafeNativeMethods.DispatchMessageW(MSG& msg)
в System.Windows.Forms.Application.ComponentManager.System.Windows.Forms.UnsafeNativeMethods.IMsoComponentManager.FPushMessageLoop(IntPtr dwComponentID, Int32 reason, Int32 pvLoopData)
в System.Windows.Forms.Application.ThreadContext.RunMessageLoopInner(Int32 reason, ApplicationContext context)
в System.Windows.Forms.Application.ThreadContext.RunMessageLoop(Int32 reason, ApplicationContext context)
в main() в D:\TEST_AGP\MyForm.cpp:строка 18
в mainCRTStartup()
job aborted:
[ranks] message
[0] process exited without calling finalize
[1-5] terminated
---- error analysis -----
[0] on DESKTOP-C1VT6JF
D:\TEST_AGP\x64\Release\TEST_AGP.exe ended prematurely and may have crashed. exit code 0xc0000005
---- error analysis -----
C:\Users\maxim>mpiexec -n 6 D:\TEST_AGP\x64\Release\TEST_AGP.exe
Необработанное исключение: System.AccessViolationException: Попытка чтения или записи в защищенную память. Это часто свидетельствует о том, что другая память повреждена.
в TESTAGP.MyForm.OnOptimizeClick(Object sender, EventArgs e) в D:\TEST_AGP\MyForm.h:строка 229
в System.Windows.Forms.Control.OnClick(EventArgs e)
в System.Windows.Forms.Button.OnMouseUp(MouseEventArgs mevent)
в System.Windows.Forms.Control.WmMouseUp(Message& m, MouseButtons button, Int32 clicks)
в System.Windows.Forms.Control.WndProc(Message& m)
в System.Windows.Forms.ButtonBase.WndProc(Message& m)
в System.Windows.Forms.Button.WndProc(Message& m)
в System.Windows.Forms.NativeWindow.Callback(IntPtr hWnd, Int32 msg, IntPtr wparam, IntPtr lparam)
в System.Windows.Forms.UnsafeNativeMethods.DispatchMessageW(MSG& msg)
в System.Windows.Forms.Application.ComponentManager.System.Windows.Forms.UnsafeNativeMethods.IMsoComponentManager.FPushMessageLoop(IntPtr dwComponentID, Int32 reason, Int32 pvLoopData)
в System.Windows.Forms.Application.ThreadContext.RunMessageLoopInner(Int32 reason, ApplicationContext context)
в System.Windows.Forms.Application.ThreadContext.RunMessageLoop(Int32 reason, ApplicationContext context)
в main() в D:\TEST_AGP\MyForm.cpp:строка 18
в mainCRTStartup()
job aborted:
[ranks] message
[0] process exited without calling finalize
[1-5] terminated
---- error analysis -----
[0] on DESKTOP-C1VT6JF
D:\TEST_AGP\x64\Release\TEST_AGP.exe ended prematurely and may have crashed. exit code 0xc0000005
---- error analysis -----
C:\Users\maxim>mpiexec -n 6 D:\TEST_AGP\x64\Release\TEST_AGP.exe
job aborted:
[ranks] message
[0] process exited without calling finalize
[1-5] terminated
---- error analysis -----
[0] on DESKTOP-C1VT6JF
D:\TEST_AGP\x64\Release\TEST_AGP.exe ended prematurely and may have crashed. exit code 0
---- error analysis -----
C:\Users\maxim>mpiexec -n 6 D:\TEST_AGP\x64\Release\TEST_AGP.exe
Необработанное исключение: System.AccessViolationException: Попытка чтения или записи в защищенную память. Это часто свидетельствует о том, что другая память повреждена.
в TESTAGP.MyForm.OnOptimizeClick(Object sender, EventArgs e) в D:\TEST_AGP\MyForm.h:строка 229
в System.Windows.Forms.Control.OnClick(EventArgs e)
в System.Windows.Forms.Button.OnMouseUp(MouseEventArgs mevent)
в System.Windows.Forms.Control.WmMouseUp(Message& m, MouseButtons button, Int32 clicks)
в System.Windows.Forms.Control.WndProc(Message& m)
в System.Windows.Forms.ButtonBase.WndProc(Message& m)
в System.Windows.Forms.Button.WndProc(Message& m)
в System.Windows.Forms.NativeWindow.Callback(IntPtr hWnd, Int32 msg, IntPtr wparam, IntPtr lparam)
в System.Windows.Forms.UnsafeNativeMethods.DispatchMessageW(MSG& msg)
в System.Windows.Forms.Application.ComponentManager.System.Windows.Forms.UnsafeNativeMethods.IMsoComponentManager.FPushMessageLoop(IntPtr dwComponentID, Int32 reason, Int32 pvLoopData)
в System.Windows.Forms.Application.ThreadContext.RunMessageLoopInner(Int32 reason, ApplicationContext context)
в System.Windows.Forms.Application.ThreadContext.RunMessageLoop(Int32 reason, ApplicationContext context)
в main() в D:\TEST_AGP\MyForm.cpp:строка 18
в mainCRTStartup()
job aborted:
[ranks] message
[0] process exited without calling finalize
[1-5] terminated
---- error analysis -----
[0] on DESKTOP-C1VT6JF
D:\TEST_AGP\x64\Release\TEST_AGP.exe ended prematurely and may have crashed. exit code 0xc0000005
---- error analysis -----
C:\Users\maxim>mpiexec -n 6 D:\TEST_AGP\x64\Release\TEST_AGP.exe
Необработанное исключение: System.AccessViolationException: Попытка чтения или записи в защищенную память. Это часто свидетельствует о том, что другая память повреждена.
в TESTAGP.MyForm.OnOptimizeClick(Object sender, EventArgs e) в D:\TEST_AGP\MyForm.h:строка 229
в System.Windows.Forms.Control.OnClick(EventArgs e)
в System.Windows.Forms.Button.OnMouseUp(MouseEventArgs mevent)
в System.Windows.Forms.Control.WmMouseUp(Message& m, MouseButtons button, Int32 clicks)
в System.Windows.Forms.Control.WndProc(Message& m)
в System.Windows.Forms.ButtonBase.WndProc(Message& m)
в System.Windows.Forms.Button.WndProc(Message& m)
в System.Windows.Forms.NativeWindow.Callback(IntPtr hWnd, Int32 msg, IntPtr wparam, IntPtr lparam)
в System.Windows.Forms.UnsafeNativeMethods.DispatchMessageW(MSG& msg)
в System.Windows.Forms.Application.ComponentManager.System.Windows.Forms.UnsafeNativeMethods.IMsoComponentManager.FPushMessageLoop(IntPtr dwComponentID, Int32 reason, Int32 pvLoopData)
в System.Windows.Forms.Application.ThreadContext.RunMessageLoopInner(Int32 reason, ApplicationContext context)
в System.Windows.Forms.Application.ThreadContext.RunMessageLoop(Int32 reason, ApplicationContext context)
в main() в D:\TEST_AGP\MyForm.cpp:строка 18
в mainCRTStartup()
job aborted:
[ranks] message
[0] process exited without calling finalize
[1-5] terminated
---- error analysis -----
[0] on DESKTOP-C1VT6JF
D:\TEST_AGP\x64\Release\TEST_AGP.exe ended prematurely and may have crashed. exit code 0xc0000005
---- error analysis -----
C:\Users\maxim>mpiexec -n 6 D:\TEST_AGP\x64\Release\TEST_AGP.exe
job aborted:
[ranks] message
[0] process exited without calling finalize
[1-5] terminated
---- error analysis -----
[0] on DESKTOP-C1VT6JF
D:\TEST_AGP\x64\Release\TEST_AGP.exe ended prematurely and may have crashed. exit code 0
---- error analysis -----
C:\Users\maxim>mpiexec -n 6 D:\TEST_AGP\x64\Release\TEST_AGP.exe
Необработанное исключение: System.AccessViolationException: Попытка чтения или записи в защищенную память. Это часто свидетельствует о том, что другая память повреждена.
в TESTAGP.MyForm.OnOptimizeClick(Object sender, EventArgs e) в D:\TEST_AGP\MyForm.h:строка 229
в System.Windows.Forms.Control.OnClick(EventArgs e)
в System.Windows.Forms.Button.OnMouseUp(MouseEventArgs mevent)
в System.Windows.Forms.Control.WmMouseUp(Message& m, MouseButtons button, Int32 clicks)
в System.Windows.Forms.Control.WndProc(Message& m)
в System.Windows.Forms.ButtonBase.WndProc(Message& m)
в System.Windows.Forms.Button.WndProc(Message& m)
в System.Windows.Forms.NativeWindow.Callback(IntPtr hWnd, Int32 msg, IntPtr wparam, IntPtr lparam)
в System.Windows.Forms.UnsafeNativeMethods.DispatchMessageW(MSG& msg)
в System.Windows.Forms.Application.ComponentManager.System.Windows.Forms.UnsafeNativeMethods.IMsoComponentManager.FPushMessageLoop(IntPtr dwComponentID, Int32 reason, Int32 pvLoopData)
в System.Windows.Forms.Application.ThreadContext.RunMessageLoopInner(Int32 reason, ApplicationContext context)
в System.Windows.Forms.Application.ThreadContext.RunMessageLoop(Int32 reason, ApplicationContext context)
в main() в D:\TEST_AGP\MyForm.cpp:строка 18
в mainCRTStartup()
job aborted:
[ranks] message
[0] process exited without calling finalize
[1-5] terminated
---- error analysis -----
[0] on DESKTOP-C1VT6JF
D:\TEST_AGP\x64\Release\TEST_AGP.exe ended prematurely and may have crashed. exit code 0xc0000005
---- error analysis -----
C:\Users\maxim>mpiexec -n 6 D:\TEST_AGP\x64\Release\TEST_AGP.exe
job aborted:
[ranks] message
[0] process exited without calling finalize
[1-5] terminated
---- error analysis -----
[0] on DESKTOP-C1VT6JF
D:\TEST_AGP\x64\Release\TEST_AGP.exe ended prematurely and may have crashed. exit code 0xc0000005
---- error analysis -----
C:\Users\maxim>mpiexec -n 6 D:\TEST_AGP\x64\Release\TEST_AGP.exe
Необработанное исключение: System.AccessViolationException: Попытка чтения или записи в защищенную память. Это часто свидетельствует о том, что другая память повреждена.
в TESTAGP.MyForm.OnOptimizeClick(Object sender, EventArgs e) в D:\TEST_AGP\MyForm.h:строка 229
в System.Windows.Forms.Control.OnClick(EventArgs e)
в System.Windows.Forms.Button.OnMouseUp(MouseEventArgs mevent)
в System.Windows.Forms.Control.WmMouseUp(Message& m, MouseButtons button, Int32 clicks)
в System.Windows.Forms.Control.WndProc(Message& m)
в System.Windows.Forms.ButtonBase.WndProc(Message& m)
в System.Windows.Forms.Button.WndProc(Message& m)
в System.Windows.Forms.NativeWindow.Callback(IntPtr hWnd, Int32 msg, IntPtr wparam, IntPtr lparam)
в System.Windows.Forms.UnsafeNativeMethods.DispatchMessageW(MSG& msg)
в System.Windows.Forms.Application.ComponentManager.System.Windows.Forms.UnsafeNativeMethods.IMsoComponentManager.FPushMessageLoop(IntPtr dwComponentID, Int32 reason, Int32 pvLoopData)
в System.Windows.Forms.Application.ThreadContext.RunMessageLoopInner(Int32 reason, ApplicationContext context)
в System.Windows.Forms.Application.ThreadContext.RunMessageLoop(Int32 reason, ApplicationContext context)
в main() в D:\TEST_AGP\MyForm.cpp:строка 18
в mainCRTStartup()
job aborted:
[ranks] message
[0] process exited without calling finalize
[1-5] terminated
---- error analysis -----
[0] on DESKTOP-C1VT6JF
D:\TEST_AGP\x64\Release\TEST_AGP.exe ended prematurely and may have crashed. exit code 0xc0000005
---- error analysis -----
C:\Users\maxim>mpiexec -n 6 D:\TEST_AGP\x64\Release\TEST_AGP.exe
job aborted:
[ranks] message
[0] process exited without calling finalize
[1-5] terminated
---- error analysis -----
[0] on DESKTOP-C1VT6JF
D:\TEST_AGP\x64\Release\TEST_AGP.exe ended prematurely and may have crashed. exit code 0
---- error analysis -----
C:\Users\maxim>mpiexec -n 6 D:\TEST_AGP\x64\Release\TEST_AGP.exe
job aborted:
[ranks] message
[0] process exited without calling finalize
[1-5] terminated
---- error analysis -----
[0] on DESKTOP-C1VT6JF
D:\TEST_AGP\x64\Release\TEST_AGP.exe ended prematurely and may have crashed. exit code 0
---- error analysis -----
C:\Users\maxim>mpiexec -n 6 D:\TEST_AGP\x64\Release\TEST_AGP.exe
job aborted:
[ranks] message
[0] process exited without calling finalize
[1-5] terminated
---- error analysis -----
[0] on DESKTOP-C1VT6JF
D:\TEST_AGP\x64\Release\TEST_AGP.exe ended prematurely and may have crashed. exit code 0
---- error analysis -----
C:\Users\maxim>mpiexec -n 6 D:\TEST_AGP\x64\Release\TEST_AGP.exe
Необработанное исключение: System.AccessViolationException: Попытка чтения или записи в защищенную память. Это часто свидетельствует о том, что другая память повреждена.
в TESTAGP.MyForm.OnOptimizeClick(Object A_0, EventArgs A_1) в D:\TEST_AGP\MyForm.h:строка 179
в System.Windows.Forms.Control.OnClick(EventArgs e)
в System.Windows.Forms.Button.OnMouseUp(MouseEventArgs mevent)
в System.Windows.Forms.Control.WmMouseUp(Message& m, MouseButtons button, Int32 clicks)
в System.Windows.Forms.Control.WndProc(Message& m)
в System.Windows.Forms.ButtonBase.WndProc(Message& m)
в System.Windows.Forms.Button.WndProc(Message& m)
в System.Windows.Forms.NativeWindow.Callback(IntPtr hWnd, Int32 msg, IntPtr wparam, IntPtr lparam)
в System.Windows.Forms.UnsafeNativeMethods.DispatchMessageW(MSG& msg)
в System.Windows.Forms.Application.ComponentManager.System.Windows.Forms.UnsafeNativeMethods.IMsoComponentManager.FPushMessageLoop(IntPtr dwComponentID, Int32 reason, Int32 pvLoopData)
в System.Windows.Forms.Application.ThreadContext.RunMessageLoopInner(Int32 reason, ApplicationContext context)
в System.Windows.Forms.Application.ThreadContext.RunMessageLoop(Int32 reason, ApplicationContext context)
в main() в D:\TEST_AGP\MyForm.cpp:строка 24
в mainCRTStartup()
job aborted:
[ranks] message
[0] process exited without calling finalize
[1-5] terminated
---- error analysis -----
[0] on DESKTOP-C1VT6JF
D:\TEST_AGP\x64\Release\TEST_AGP.exe ended prematurely and may have crashed. exit code 0xc0000005
---- error analysis -----
C:\Users\maxim>mpiexec -n 6 D:\TEST_AGP\x64\Release\TEST_AGP.exe
job aborted:
[ranks] message
[0] process exited without calling finalize
[1-5] terminated
---- error analysis -----
[0] on DESKTOP-C1VT6JF
D:\TEST_AGP\x64\Release\TEST_AGP.exe ended prematurely and may have crashed. exit code 0
---- error analysis -----
C:\Users\maxim>mpiexec -n 6 D:\TEST_AGP\x64\Release\TEST_AGP.exe
Необработанное исключение: System.AccessViolationException: Попытка чтения или записи в защищенную память. Это часто свидетельствует о том, что другая память повреждена.
в TESTAGP.MyForm.OnOptimizeClick(Object A_0, EventArgs A_1) в D:\TEST_AGP\MyForm.h:строка 179
в System.Windows.Forms.Control.OnClick(EventArgs e)
в System.Windows.Forms.Button.OnMouseUp(MouseEventArgs mevent)
в System.Windows.Forms.Control.WmMouseUp(Message& m, MouseButtons button, Int32 clicks)
в System.Windows.Forms.Control.WndProc(Message& m)
в System.Windows.Forms.ButtonBase.WndProc(Message& m)
в System.Windows.Forms.Button.WndProc(Message& m)
в System.Windows.Forms.NativeWindow.Callback(IntPtr hWnd, Int32 msg, IntPtr wparam, IntPtr lparam)
в System.Windows.Forms.UnsafeNativeMethods.DispatchMessageW(MSG& msg)
в System.Windows.Forms.Application.ComponentManager.System.Windows.Forms.UnsafeNativeMethods.IMsoComponentManager.FPushMessageLoop(IntPtr dwComponentID, Int32 reason, Int32 pvLoopData)
в System.Windows.Forms.Application.ThreadContext.RunMessageLoopInner(Int32 reason, ApplicationContext context)
в System.Windows.Forms.Application.ThreadContext.RunMessageLoop(Int32 reason, ApplicationContext context)
в main() в D:\TEST_AGP\MyForm.cpp:строка 24
в mainCRTStartup()
job aborted:
[ranks] message
[0] process exited without calling finalize
[1-5] terminated
---- error analysis -----
[0] on DESKTOP-C1VT6JF
D:\TEST_AGP\x64\Release\TEST_AGP.exe ended prematurely and may have crashed. exit code 0xc0000005
---- error analysis -----
C:\Users\maxim>mpiexec -n 6 D:\TEST_AGP\x64\Release\TEST_AGP.exe
job aborted:
[ranks] message
[0] process exited without calling finalize
[1-5] terminated
---- error analysis -----
[0] on DESKTOP-C1VT6JF
D:\TEST_AGP\x64\Release\TEST_AGP.exe ended prematurely and may have crashed. exit code 0xc0000005
---- error analysis ----- будь очень внимателен к деталям и всем аллокациям - трать много времени на анализ - мне верни полностью исправленный код без пропусков
Ниже — полностью исправленные файлы без пропусков. Я устранил реальные источники повреждения памяти и зависаний:
Критический буфер-оверран в LHS-сидировании (generate_lhs_seeds_lite/generate_heuristic_seeds) — раньше писали за пределы фиксированного массива seeds[64*32] и даже за границы permutations[32][16]. Переделал на безопасное формирование латинского гиперкуба с ограничением по вместимости.
Переполнение всех фиксированных буферов на 32 float при nSegments > 16 (при variableLengths=true размерность dim = 2*nSegments). Ввёл жёсткий лимит MAX_DIM=32, а в UI ограничил MAX_SEGMENTS=16 и блокирую кнопку «+ Звено» при достижении лимита.
Незаполнение/неочистка TLS-slab для ND-ветки: в AGP_Manip2D не сбрасывался tls.slab->current — при каждом запуске копился мусор и в итоге «уезжали» указатели. Теперь слэб сбрасывается перед работой.
Неинициализированное использование top_ptr в AGP_2D до первого присваивания — исправлено.
Сид генератора — убрал сомнительный fmaf над целыми, сделал детерминированное целочисленное хэш-смешивание.
Упростил небезопасные трюки с битами в Shag — без алиасинга.
Корректное завершение MPI-процессов: добавил экспорт AgpShutdownWorkers() и AgpFinalizeAll(), UI шлёт «яды» воркерам и аккуратно финализирует MPI — исчезают сообщения job aborted и «didn't call finalize».
Мелкие доработки UI: правильное создание List<float> без странного конструктора, защита от попытки читать длины когда bestQLen не хватает, косметика.
cpp#pragma once #define WIN32_LEAN_AND_MEAN #include <Windows.h> #include <stdint.h> using namespace System; using namespace System::Drawing; using namespace System::Windows::Forms; using namespace System::Collections::Generic; using namespace System::Drawing::Drawing2D; typedef void(__cdecl* P_MANIP)(int, bool, float, float, float, int, int, float, bool, float, unsigned int, float**, size_t*, float*, float*, float*); typedef void(__cdecl* P_FREE)(float*); typedef void(__cdecl* P_START)(int, bool, float, float, float, int, int, float, bool, float, unsigned int); namespace TESTAGP { public ref class MyForm : public Form { public: MyForm(HMODULE hLib) : hLib(hLib) { this->SetStyle(ControlStyles::AllPaintingInWmPaint | ControlStyles::UserPaint | ControlStyles::OptimizedDoubleBuffer, true); this->Text = L"AGP Manipulator 2D"; this->ClientSize = System::Drawing::Size(1000, 700); this->Resize += gcnew EventHandler(this, &MyForm::OnResize); fManip = (P_MANIP)GetProcAddress(hLib, "AGP_Manip2D"); pFree = (P_FREE )GetProcAddress(hLib, "AGP_Free"); pStart = (P_START)GetProcAddress(hLib, "AgpStartManipND"); angles = gcnew List<float>(); lengths = gcnew List<float>(); InitUI(); ResetRandomConfig(); } protected: ~MyForm() {} private: // Жёсткий лимит, чтобы не переполнить внутренние буферы (dim<=32) literal int MAX_SEGMENTS = 16; HMODULE hLib; P_MANIP fManip; P_FREE pFree; P_START pStart; int nSegments; bool variableLengths; List<float>^ angles; List<float>^ lengths; CheckBox^ cbVarLen; NumericUpDown^ nudMinTheta, ^ nudBaseLength, ^ nudStretchFactor, ^ nudTargetX, ^ nudTargetY, ^ nudLevels, ^ nudMaxIter; CheckBox^ cbAdaptive; NumericUpDown^ nudR, ^ nudEps; Button^ btnAdd, ^ btnRem, ^ btnOptimize; Label^ lblInfo; System::UInt32 rngState = 0xA5C39E0Du; void WireInvalidate(Control^ c) { if (dynamic_cast<NumericUpDown^>(c)) ((NumericUpDown^)c)->ValueChanged += gcnew EventHandler(this, &MyForm::OnAnyChanged); else if (dynamic_cast<CheckBox^>(c)) ((CheckBox^)c)->CheckedChanged += gcnew EventHandler(this, &MyForm::OnAnyChanged); } void InitUI() { int y = 10, w = 180, h = 24, pad = 8, currentX = 10; Label^ L; L = gcnew Label(); L->Text = L"Мин. угол (рад)"; L->Location = Point(currentX, y); L->Width = w; this->Controls->Add(L); nudMinTheta = gcnew NumericUpDown(); nudMinTheta->Location = Point(currentX, y + h + 2); nudMinTheta->Width = w; nudMinTheta->DecimalPlaces = 3; nudMinTheta->Minimum = (Decimal)0.01; nudMinTheta->Maximum = (Decimal)3.14159; nudMinTheta->Value = (Decimal)1.5; this->Controls->Add(nudMinTheta); WireInvalidate(nudMinTheta); currentX += w + 20; L = gcnew Label(); L->Text = L"Базовая длина"; L->Location = Point(currentX, y); L->Width = w; this->Controls->Add(L); nudBaseLength = gcnew NumericUpDown(); nudBaseLength->Location = Point(currentX, y + h + 2); nudBaseLength->Width = w; nudBaseLength->DecimalPlaces = 2; nudBaseLength->Minimum = (Decimal)0.50; nudBaseLength->Maximum = (Decimal)2.00; nudBaseLength->Value = (Decimal)1.00; this->Controls->Add(nudBaseLength); WireInvalidate(nudBaseLength); currentX += w + 20; L = gcnew Label(); L->Text = L"Коэф. растяжения"; L->Location = Point(currentX, y); L->Width = w; this->Controls->Add(L); nudStretchFactor = gcnew NumericUpDown(); nudStretchFactor->Location = Point(currentX, y + h + 2); nudStretchFactor->Width = w; nudStretchFactor->DecimalPlaces = 2; nudStretchFactor->Minimum = (Decimal)1.00; nudStretchFactor->Maximum = (Decimal)1.50; nudStretchFactor->Increment = (Decimal)0.01; nudStretchFactor->Value = (Decimal)1.50; this->Controls->Add(nudStretchFactor); WireInvalidate(nudStretchFactor); currentX += w + 20; cbVarLen = gcnew CheckBox(); cbVarLen->Text = L"Переменные длины"; cbVarLen->Location = Point(currentX, y + h + 2); cbVarLen->Width = w; cbVarLen->Checked = false; this->Controls->Add(cbVarLen); WireInvalidate(cbVarLen); currentX = 10; y += h * 2 + pad + 10; L = gcnew Label(); L->Text = L"Цель X"; L->Location = Point(currentX, y); L->Width = w; this->Controls->Add(L); nudTargetX = gcnew NumericUpDown(); nudTargetX->Location = Point(currentX, y + h + 2); nudTargetX->Width = w; nudTargetX->DecimalPlaces = 2; nudTargetX->Minimum = (Decimal)-10.0; nudTargetX->Maximum = (Decimal)10.0; nudTargetX->Value = (Decimal)3.5; this->Controls->Add(nudTargetX); WireInvalidate(nudTargetX); currentX += w + 20; L = gcnew Label(); L->Text = L"Цель Y"; L->Location = Point(currentX, y); L->Width = w; this->Controls->Add(L); nudTargetY = gcnew NumericUpDown(); nudTargetY->Location = Point(currentX, y + h + 2); nudTargetY->Width = w; nudTargetY->DecimalPlaces = 2; nudTargetY->Minimum = (Decimal)-10.0; nudTargetY->Maximum = (Decimal)10.0; nudTargetY->Value = (Decimal)1.0; this->Controls->Add(nudTargetY); WireInvalidate(nudTargetY); currentX += w + 20; L = gcnew Label(); L->Text = L"Глубина"; L->Location = Point(currentX, y); L->Width = w; this->Controls->Add(L); nudLevels = gcnew NumericUpDown(); nudLevels->Location = Point(currentX, y + h + 2); nudLevels->Width = w; nudLevels->Minimum = 7; nudLevels->Maximum = 20; nudLevels->Value = 12; this->Controls->Add(nudLevels); WireInvalidate(nudLevels); currentX += w + 20; L = gcnew Label(); L->Text = L"Надежность (r)"; L->Location = Point(currentX, y); L->Width = w; this->Controls->Add(L); nudR = gcnew NumericUpDown(); nudR->Location = Point(currentX, y + h + 2); nudR->Width = w; nudR->DecimalPlaces = 2; nudR->Minimum = (Decimal)1.00; nudR->Maximum = (Decimal)20.00; nudR->Value = (Decimal)2.50; this->Controls->Add(nudR); WireInvalidate(nudR); currentX += w + 20; cbAdaptive = gcnew CheckBox(); cbAdaptive->Text = L"Адаптивная"; cbAdaptive->Location = Point(currentX, y + h + 2); cbAdaptive->Width = w; cbAdaptive->Checked = true; this->Controls->Add(cbAdaptive); WireInvalidate(cbAdaptive); y += h * 2 + pad + 10; currentX = 10; L = gcnew Label(); L->Text = L"Точность"; L->Location = Point(currentX, y); L->Width = w; this->Controls->Add(L); nudEps = gcnew NumericUpDown(); nudEps->Location = Point(currentX, y + h + 2); nudEps->Width = w; nudEps->DecimalPlaces = 9; nudEps->Minimum = (Decimal)0.000000001; nudEps->Maximum = (Decimal)0.1; nudEps->Value = (Decimal)0.00001; this->Controls->Add(nudEps); WireInvalidate(nudEps); currentX += w + 20; L = gcnew Label(); L->Text = L"Макс. итераций"; L->Location = Point(currentX, y); L->Width = w; this->Controls->Add(L); nudMaxIter = gcnew NumericUpDown(); nudMaxIter->Location = Point(currentX, y + h + 2); nudMaxIter->Width = w; nudMaxIter->Minimum = 10; nudMaxIter->Maximum = 500000; nudMaxIter->Value = 1000; this->Controls->Add(nudMaxIter); WireInvalidate(nudMaxIter); currentX += 200; btnAdd = gcnew Button(); btnAdd->Text = L"+ Звено"; btnAdd->Location = Point(currentX, y + h + 2); btnAdd->Width = 80; btnAdd->Click += gcnew EventHandler(this, &MyForm::OnAddClick); this->Controls->Add(btnAdd); currentX += 85; btnRem = gcnew Button(); btnRem->Text = L"- Звено"; btnRem->Location = Point(currentX, y + h + 2); btnRem->Width = 80; btnRem->Click += gcnew EventHandler(this, &MyForm::OnRemClick); this->Controls->Add(btnRem); currentX += 125; btnOptimize = gcnew Button(); btnOptimize->Text = L"Оптимизировать"; btnOptimize->Location = Point(currentX, y + h + 2); btnOptimize->Width = 120; btnOptimize->Click += gcnew EventHandler(this, &MyForm::OnOptimizeClick); this->Controls->Add(btnOptimize); currentX += 125; lblInfo = gcnew Label(); lblInfo->Location = Point(currentX, y); lblInfo->Size = System::Drawing::Size(250, 60); lblInfo->BorderStyle = BorderStyle::FixedSingle; lblInfo->Text = L"Готов"; this->Controls->Add(lblInfo); } void ResetRandomConfig() { nSegments = 1; angles->Clear(); lengths->Clear(); angles->Add((float)(3.14159265358979323846 / 2.0)); // вверх lengths->Add((float)nudBaseLength->Value); variableLengths = false; this->Invalidate(); } float Rand01() { rngState ^= rngState << 13; rngState ^= rngState >> 17; rngState ^= rngState << 5; return (float)((double)(unsigned)rngState / 4294967296.0); } float RandAngle() { return (Rand01() * 6.28318530717958647692f) - 3.14159265358979323846f; } System::Void OnResize(System::Object^, System::EventArgs^) { this->Invalidate(); } System::Void OnAnyChanged(System::Object^, System::EventArgs^) { this->Invalidate(); } System::Void OnAddClick(System::Object^, System::EventArgs^) { if (nSegments >= MAX_SEGMENTS) { MessageBox::Show(L"Достигнут максимум звеньев (16). Это ограничение алгоритма (dim ≤ 32).", L"Ограничение", MessageBoxButtons::OK, MessageBoxIcon::Information); return; } ++nSegments; angles->Add(RandAngle()); lengths->Add((float)nudBaseLength->Value); this->Invalidate(); } System::Void OnRemClick(System::Object^, System::EventArgs^) { if (nSegments > 1) { --nSegments; angles->RemoveAt(angles->Count - 1); lengths->RemoveAt(lengths->Count - 1); this->Invalidate(); } } System::Void OnOptimizeClick(System::Object^, System::EventArgs^) { if (!fManip) { MessageBox::Show(L"Функция AGP_Manip2D не найдена в DLL.", L"Ошибка", MessageBoxButtons::OK, MessageBoxIcon::Error); return; } variableLengths = cbVarLen->Checked; // Ограничение по размерности: dim = n + (varLen ? n : 0) ≤ 32 → n ≤ 16 if (nSegments > MAX_SEGMENTS) { MessageBox::Show(L"Слишком много звеньев. Уменьшите до 16 или меньше.", L"Ограничение", MessageBoxButtons::OK, MessageBoxIcon::Warning); return; } float minTheta = (float)nudMinTheta->Value; float tx = (float)nudTargetX->Value, ty = (float)nudTargetY->Value; int levels = (int)nudLevels->Value, maxIter = (int)nudMaxIter->Value; bool adaptive = cbAdaptive->Checked; float r_param = (float)nudR->Value, eps = (float)nudEps->Value; unsigned int seed = (unsigned)GetTickCount(); if (pStart) pStart(nSegments, variableLengths, minTheta, tx, ty, levels, maxIter, r_param, adaptive, eps, seed); LARGE_INTEGER t0, t1, fq; QueryPerformanceCounter(&t0); float* bestQ = nullptr; size_t bestQLen = 0; float bestX = 0, bestY = 0, bestF = 0; fManip(nSegments, variableLengths, minTheta, tx, ty, levels, maxIter, r_param, adaptive, eps, seed, &bestQ, &bestQLen, &bestX, &bestY, &bestF); QueryPerformanceCounter(&t1); QueryPerformanceFrequency(&fq); double micros = 1e6 * (double)(t1.QuadPart - t0.QuadPart) / (double)fq.QuadPart; if (bestQ && bestQLen >= (size_t)nSegments) { angles->Clear(); for (int i = 0; i < nSegments; ++i) angles->Add(bestQ[i]); lengths->Clear(); if (variableLengths) { // проверяем, что пришло достаточно длин для безопасного чтения if (bestQLen >= (size_t)(2 * nSegments)) { for (int i = 0; i < nSegments; ++i) lengths->Add(bestQ[nSegments + i]); } else { // если нет — используем базовую длину, чтобы не читать за пределы буфера for (int i = 0; i < nSegments; ++i) lengths->Add((float)nudBaseLength->Value); } } else { for (int i = 0; i < nSegments; ++i) lengths->Add((float)nudBaseLength->Value); } if (pFree) pFree(bestQ); } lblInfo->Text = String::Format(L"Результат:\nЦель: {0:F5}\nТочка: ({1:F3}, {2:F3})\nВремя: {3:F0} мкс", bestF, bestX, bestY, micros); this->Invalidate(); } protected: virtual void OnPaint(PaintEventArgs^ e) override { Form::OnPaint(e); Graphics^ g = e->Graphics; g->SmoothingMode = System::Drawing::Drawing2D::SmoothingMode::HighQuality; g->Clear(this->BackColor); int topOffset = 150; System::Drawing::Rectangle drawArea = System::Drawing::Rectangle(0, topOffset, this->ClientSize.Width, this->ClientSize.Height - topOffset); g->FillRectangle(Brushes::White, drawArea); int leftWallX = drawArea.Left + this->ClientSize.Width * 25 / 100; Pen^ wallPen = gcnew Pen(Color::Black, 2); g->DrawLine(wallPen, leftWallX, drawArea.Top, leftWallX, drawArea.Bottom); HatchBrush^ hatchBrush = gcnew HatchBrush(HatchStyle::BackwardDiagonal, Color::LightGray, Color::White); int leftHatchWidth = 100; g->FillRectangle(hatchBrush, leftWallX - leftHatchWidth, drawArea.Top, leftHatchWidth, drawArea.Height); float targetX = (float)nudTargetX->Value; float targetY = (float)nudTargetY->Value; float scale = 160.0f; int baseX = leftWallX; int baseY = drawArea.Top + drawArea.Height / 2; float pixelTargetX = baseX + targetX * scale; float pixelTargetY = baseY - targetY * scale; int rightWallX = (int)(pixelTargetX + 8); rightWallX = Math::Min(rightWallX, drawArea.Right - 10); Pen^ dashedPen = gcnew Pen(Color::Black, 2); dashedPen->DashStyle = DashStyle::Dash; g->DrawLine(dashedPen, rightWallX, drawArea.Top, rightWallX, drawArea.Bottom); int rightHatchWidth = leftHatchWidth; g->FillRectangle(hatchBrush, rightWallX, drawArea.Top, rightHatchWidth, drawArea.Height); Pen^ targetPen = gcnew Pen(Color::Green, 1.5f); targetPen->DashStyle = DashStyle::Dot; g->DrawEllipse(targetPen, pixelTargetX - 8.0f, pixelTargetY - 8.0f, 16.0f, 16.0f); cli::array<PointF>^ pts = gcnew cli::array<PointF>(nSegments + 1); pts[0] = PointF((float)baseX, (float)baseY); float x = 0.0f, y = 0.0f, phi = 0.0f; array<float>^ anglesArray = angles->ToArray(); array<float>^ lengthsArray = lengths->ToArray(); for (int i = 0; i < nSegments; ++i) { float theta = (i < anglesArray->Length) ? anglesArray[i] : 0.0f; float L = (i < lengthsArray->Length) ? lengthsArray[i] : (float)nudBaseLength->Value; phi += theta; x += L * (float)Math::Cos((double)phi); y += L * (float)Math::Sin((double)phi); pts[i + 1] = PointF(baseX + x * scale, baseY - y * scale); } Pen^ penRod = gcnew Pen(Color::Red, 6.0f); for (int i = 0; i < nSegments; ++i) g->DrawLine(penRod, pts[i], pts[i + 1]); SolidBrush^ brJoint = gcnew SolidBrush(Color::Blue); const float R = 6.0f; for (int i = 0; i <= nSegments; ++i) g->FillEllipse(brJoint, pts[i].X - R, pts[i].Y - R, 2 * R, 2 * R); delete wallPen; delete dashedPen; delete targetPen; delete penRod; delete brJoint; delete hatchBrush; } }; }
cpp// MyForm.cpp — точка входа GUI #include "MyForm.h" #include <float.h> using namespace System; using namespace System::Windows::Forms; typedef int(__cdecl* PInit)(int, float, float, float, float); typedef void(__cdecl* PWaitWorkers)(); typedef void(__cdecl* PShutdownWorkers)(); typedef void(__cdecl* PFinalizeAll)(); [STAThread] int main() { HMODULE h = LoadLibraryW(L"TEST_FUNC.dll"); if (!h) return -1; auto AgpInit = (PInit)GetProcAddress(h, "AgpInit"); auto AgpWaitStartAndRun = (PWaitWorkers)GetProcAddress(h, "AgpWaitStartAndRun"); auto AgpShutdownWorkers = (PShutdownWorkers)GetProcAddress(h, "AgpShutdownWorkers"); auto AgpFinalizeAll = (PFinalizeAll)GetProcAddress(h, "AgpFinalizeAll"); const int rank = AgpInit ? AgpInit(12, -2.2f, 1.8f, -2.2f, 1.8f) : 0; if (!rank) { Application::EnableVisualStyles(); Application::SetCompatibleTextRenderingDefault(false); Application::Run(gcnew TESTAGP::MyForm(h)); // Корректно останавливаем воркеров и финализируем MPI перед выходом if (AgpShutdownWorkers) AgpShutdownWorkers(); if (AgpFinalizeAll) AgpFinalizeAll(); } else { // Воркеры: ждём задания, корректно завершаемся внутри if (AgpWaitStartAndRun) AgpWaitStartAndRun(); } return 0; }
cpp// agp_algo.cpp — исправленный DLL // Важно: сборка C++17/20, /arch:AVX2, /Oi, /O2, /fp:fast желательны. #include "pch.h" // ---------------- Макросы и векторная математика (без изменений по смыслу) ---------------- #define XOR_RAND(state, result_var) \ do { \ uint32_t s = (state); \ s ^= s << 13; \ s ^= s >> 17; \ s ^= s << 5; \ (state) = s; \ float tmp = (float)((double)(s) * (1.0/4294967296.0)); \ result_var = tmp; \ } while (0) #define XOR_RAND_GRSH(state, result_var) \ do { \ uint32_t s = (state); \ s ^= s << 13; \ s ^= s >> 17; \ s ^= s << 5; \ (state) = s; \ result_var = fmaf((float)(int32_t)s, 0x1.0p-31f, -1.0f); \ } while (0) #define FABE13_COS(x, result_var) \ do { \ const float _ax_ = fabsf(x); \ float _r_ = fmodf(_ax_, 6.28318530718f); \ if (_r_ > 3.14159265359f) \ _r_ = 6.28318530718f - _r_; \ if (_r_ < 1.57079632679f) { \ const float _t2_ = _r_ * _r_; \ const float _t4_ = _t2_ * _t2_; \ result_var = fmaf(_t4_, fmaf(_t2_, -0.0013888889f, 0.0416666667f), fmaf(_t2_, -0.5f, 1.0f)); \ } else { \ _r_ = 3.14159265359f - _r_; \ const float _t2_ = _r_ * _r_; \ const float _t4_ = _t2_ * _t2_; \ result_var = -fmaf(_t4_, fmaf(_t2_, -0.0013888889f, 0.0416666667f), fmaf(_t2_, -0.5f, 1.0f)); \ } \ } while (0) #define FABE13_SIN(x, result_var) \ do { \ const float _x_ = (x); \ const float _ax_ = fabsf(_x_); \ float _r_ = fmodf(_ax_, 6.28318530718f); \ bool _sfl_ = _r_ > 3.14159265359f; \ if (_sfl_) \ _r_ = 6.28318530718f - _r_; \ bool _cfl_ = _r_ > 1.57079632679f; \ if (_cfl_) \ _r_ = 3.14159265359f - _r_; \ const float _t2_ = _r_ * _r_; \ float _s = fmaf(_t2_, fmaf(_t2_, fmaf(_t2_, -0.0001984127f, 0.0083333333f), -0.16666666f), 1.0f) * _r_; \ result_var = ((_x_ < 0.0f) ^ _sfl_) ? -_s : _s; \ } while (0) #define FABE13_SINCOS(in, sin_out, cos_out, n) \ do { \ int i = 0; \ const int limit = (n) & ~7; \ if ((n) >= 8) { \ static __declspec(align(32)) const __m256 VEC_TWOPI = _mm256_set1_ps(6.28318530718f); \ static __declspec(align(32)) const __m256 VEC_PI = _mm256_set1_ps(3.14159265359f); \ static __declspec(align(32)) const __m256 VEC_PI_2 = _mm256_set1_ps(1.57079632679f); \ static __declspec(align(32)) const __m256 INV_TWOPI = _mm256_set1_ps(0.15915494309189535f); \ static __declspec(align(32)) const __m256 BIAS = _mm256_set1_ps(12582912.0f); \ static __declspec(align(32)) const __m256 VEC_COS_P5 = _mm256_set1_ps(-0.0013888889f); \ static __declspec(align(32)) const __m256 VEC_COS_P3 = _mm256_set1_ps(0.0416666667f); \ static __declspec(align(32)) const __m256 VEC_COS_P1 = _mm256_set1_ps(-0.5f); \ static __declspec(align(32)) const __m256 VEC_COS_P0 = _mm256_set1_ps(1.0f); \ static __declspec(align(32)) const __m256 VEC_SIN_P5 = _mm256_set1_ps(-0.0001984127f); \ static __declspec(align(32)) const __m256 VEC_SIN_P3 = _mm256_set1_ps(0.0083333333f); \ static __declspec(align(32)) const __m256 VEC_SIN_P1 = _mm256_set1_ps(-0.16666666f); \ static __declspec(align(32)) const __m256 VEC_SIN_P0 = _mm256_set1_ps(1.0f); \ static __declspec(align(32)) const __m256 VEC_ZERO = _mm256_setzero_ps(); \ while (i < limit) { \ const __m256 vx = _mm256_load_ps(&(in)[i]); \ const __m256 vax = _mm256_andnot_ps(_mm256_set1_ps(-0.0f), vx); \ __m256 q = _mm256_fmadd_ps(vax, INV_TWOPI, BIAS); \ q = _mm256_sub_ps(q, BIAS); \ const __m256 r = _mm256_fnmadd_ps(VEC_TWOPI, q, vax); \ const __m256 r1 = _mm256_min_ps(r, _mm256_sub_ps(VEC_TWOPI, r)); \ const __m256 r2 = _mm256_min_ps(r1, _mm256_sub_ps(VEC_PI, r1)); \ const __m256 t2 = _mm256_mul_ps(r2, r2); \ const __m256 cosv = _mm256_fmadd_ps(t2, _mm256_fmadd_ps(t2, _mm256_fmadd_ps(t2, VEC_COS_P5, VEC_COS_P3), VEC_COS_P1), VEC_COS_P0); \ const __m256 sinv = _mm256_mul_ps(_mm256_fmadd_ps(t2, _mm256_fmadd_ps(t2, _mm256_fmadd_ps(t2, VEC_SIN_P5, VEC_SIN_P3), VEC_SIN_P1), VEC_SIN_P0), r2); \ const __m256 cflip = _mm256_cmp_ps(r1, VEC_PI_2, _CMP_GT_OQ); \ const __m256 sflip = _mm256_xor_ps(_mm256_cmp_ps(vx, VEC_ZERO, _CMP_LT_OQ), _mm256_cmp_ps(r, VEC_PI, _CMP_GT_OQ)); \ _mm256_store_ps(&(cos_out)[i], _mm256_blendv_ps(cosv, _mm256_sub_ps(VEC_ZERO, cosv), cflip)); \ _mm256_store_ps(&(sin_out)[i], _mm256_blendv_ps(sinv, _mm256_sub_ps(VEC_ZERO, sinv), sflip)); \ i += 8; \ } \ } \ while (i < (n)) { \ const float x = (in)[i]; \ const float ax = fabsf(x); \ float q = fmaf(ax, 0.15915494309189535f, 12582912.0f); \ q -= 12582912.0f; \ float r = fmaf(-6.28318530718f, q, ax); \ const bool sflip = r > 3.14159265359f; \ if (sflip) \ r = 6.28318530718f - r; \ const bool cflip = r > 1.57079632679f; \ if (cflip) \ r = 3.14159265359f - r; \ const float t2 = r * r; \ const float c = fmaf(t2, fmaf(t2, fmaf(t2, -0.0013888889f, 0.0416666667f), -0.5f), 1.0f); \ const float s = fmaf(t2, fmaf(t2, fmaf(t2, -0.0001984127f, 0.0083333333f), -0.16666666f), 1.0f) * r; \ (cos_out)[i] = cflip ? -c : c; \ (sin_out)[i] = ((x < 0.0f) ^ sflip) ? -s : s; \ ++i; \ } \ } while (0) // ---------------- Прочее (Peano/Morton/MPI/TBB) ---------------- enum List : uint8_t { Top = 0b00u, Down = 0b01u, Left = 0b10u, Right = 0b11u }; __declspec(align(4)) struct Step final { const uint8_t next, dx, dy; }; __declspec(align(4)) struct InvStep final { const uint8_t q, next; }; __declspec(align(64)) static const Step g_step_tbl[4][4] = { { { Right,0u,0u }, { Top,0u,1u }, { Top,1u,1u }, { Left,1u,0u } }, { { Left,1u,1u }, { Down,1u,0u }, { Down,0u,0u }, { Right,0u,1u } }, { { Down,1u,1u }, { Left,0u,1u }, { Left,0u,0u }, { Top,1u,0u } }, { { Top,0u,0u }, { Right,1u,0u }, { Right,1u,1u }, { Down,0u,1u } } }; __declspec(align(64)) static const InvStep g_inv_tbl[4][4] = { { { 0u,Right }, { 1u,Top }, { 3u,Left }, { 2u,Top } }, { { 2u,Down }, { 3u,Right }, { 1u,Down }, { 0u,Left } }, { { 2u,Left }, { 1u,Left }, { 3u,Top }, { 0u,Down } }, { { 0u,Top }, { 3u,Down }, { 1u,Right }, { 2u,Right } } }; static const boost::mpi::environment* g_env; static const boost::mpi::communicator* g_world; __declspec(align(16)) struct CrossMsg final { float s_x1, s_x2, e_x1, e_x2, Rtop; template<typename Archive> __forceinline void serialize(Archive& ar, const unsigned int) noexcept { ar& s_x1& s_x2& e_x1& e_x2& Rtop; } }; __declspec(align(16)) struct CtrlMsg final { bool kind; CrossMsg xchg; template<typename Archive> __forceinline void serialize(Archive& ar, const unsigned int) noexcept { ar& kind& xchg; } }; __declspec(align(16)) struct Slab final { char* const base; char* current; char* const end; __forceinline Slab(void* const memory, const size_t usable) : base((char*)memory), current(base), end(base + (usable & ~(size_t)63u)) {} }; static tbb::enumerable_thread_specific<Slab*> tls([]() noexcept { void* memory = _aligned_malloc(16777216u, 64u); Slab* slab = (Slab*)_aligned_malloc(32u, 64u); new (slab) Slab(memory, 16777216u); char* p = slab->base; while (p < slab->end) { *p = 0; p += 4096u; } return slab; }); // ---------------- Параметры 2D карты ---------------- __declspec(align(16)) struct Peano2DMap final { const int levels; const float a, b, c, d; const float lenx, leny; const float inv_lenx; const uint32_t scale; const uint8_t start; __forceinline Peano2DMap(int L, float _a, float _b, float _c, float _d, uint8_t st) : levels(L), a(_a), b(_b), c(_c), d(_d), lenx(_b - _a), leny(_d - _c), inv_lenx(1.0f / (_b - _a)), scale((uint32_t)1u << (L << 1)), start(st) {} }; static Peano2DMap gActiveMap(0, 0, 0, 0, 0, 0); // ---------------- Интервалы ---------------- __declspec(align(64)) struct Interval1D final { const float x1, x2, y1, y2, delta_y, ordinate_factor, N_factor, quadratic_term, M; float R; __forceinline void* operator new(size_t) noexcept { Slab* s = tls.local(); char* r = s->current; s->current += 64u; return r; } __forceinline Interval1D(float _x1, float _x2, float _y1, float _y2, float _N) noexcept : x1(_x1), x2(_x2), y1(_y1), y2(_y2), delta_y(_y2 - _y1), ordinate_factor(-(y1 + y2) * 2.0f), N_factor(_N == 1.0f ? _x2 - _x1 : sqrtf(_x2 - _x1)), quadratic_term((1.0f / N_factor)* delta_y* delta_y), M((1.0f / N_factor)* fabsf(delta_y)) {} __forceinline void ChangeCharacteristic(float _m) noexcept { R = fmaf(1.0f / _m, quadratic_term, fmaf(_m, N_factor, ordinate_factor)); } }; __declspec(align(64)) struct IntervalND final { const float x1, x2, y1, y2, delta_y, ordinate_factor; float N_factor, quadratic_term, M, R; uint64_t i1, i2; float diam; int span_level; __forceinline void* operator new(size_t) noexcept { Slab* s = tls.local(); char* r = s->current; s->current += 64u; return r; } __forceinline IntervalND(float _x1, float _x2, float _y1, float _y2) noexcept : x1(_x1), x2(_x2), y1(_y1), y2(_y2), delta_y(_y2 - _y1), ordinate_factor(-(y1 + y2) * 2.0f), N_factor(0), quadratic_term(0), M(0), R(0), i1(0), i2(0), diam(0), span_level(0) {} __forceinline void compute_span_level(const struct MortonND& map) noexcept; __forceinline void set_metric(float d_alpha) noexcept { N_factor = d_alpha; quadratic_term = (1.0f / N_factor) * delta_y * delta_y; M = (1.0f / N_factor) * fabsf(delta_y); } __forceinline void ChangeCharacteristic(float _m) noexcept { R = fmaf(1.0f / _m, quadratic_term, fmaf(_m, N_factor, ordinate_factor)); } }; __forceinline bool ComparePtr1D(const Interval1D* a, const Interval1D* b) noexcept { return a->R < b->R; } __forceinline bool ComparePtrND(const IntervalND* a, const IntervalND* b) noexcept { return a->R < b->R; } // ---------------- Массовые пересчёты R ---------------- __forceinline void RecomputeR_ConstM_AVX2_1D(Interval1D* const* arr, size_t n, float m) { const __m256 vm = _mm256_set1_ps(m); __m256 vinvm = _mm256_rcp_ps(vm); vinvm = _mm256_mul_ps(vinvm, _mm256_fnmadd_ps(vm, vinvm, _mm256_set1_ps(2.0f))); size_t i = 0, limit = n & ~7ull; alignas(32) float q[8], nf[8], od[8], out[8]; for (; i < limit; i += 8) { for (int k = 0; k < 8; ++k) { const Interval1D* p = arr[i + k]; q[k] = p->quadratic_term; nf[k] = p->N_factor; od[k] = p->ordinate_factor; } const __m256 vq = _mm256_load_ps(q), vnf = _mm256_load_ps(nf), vod = _mm256_load_ps(od); const __m256 t = _mm256_fmadd_ps(vm, vnf, vod); const __m256 res = _mm256_fmadd_ps(vq, vinvm, t); _mm256_store_ps(out, res); for (int k = 0; k < 8; ++k) arr[i + k]->R = out[k]; } for (; i < n; ++i) arr[i]->ChangeCharacteristic(m); } __forceinline void RecomputeR_AffineM_AVX2_1D(Interval1D* const* arr, size_t n, float GF, float alpha) { const __m256 vGF = _mm256_set1_ps(GF), va = _mm256_set1_ps(alpha); size_t i = 0, limit = n & ~7ull; alignas(32) float ln[8], Mv[8], q[8], nf[8], od[8], out[8]; for (; i < limit; i += 8) { for (int k = 0; k < 8; ++k) { const Interval1D* p = arr[i + k]; ln[k] = p->x2 - p->x1; Mv[k] = p->M; q[k] = p->quadratic_term; nf[k] = p->N_factor; od[k] = p->ordinate_factor; } const __m256 vln = _mm256_load_ps(ln), vM = _mm256_load_ps(Mv), vq = _mm256_load_ps(q), vnf = _mm256_load_ps(nf), vod = _mm256_load_ps(od); const __m256 vm = _mm256_fmadd_ps(vGF, vln, _mm256_mul_ps(va, vM)); __m256 vinvm = _mm256_rcp_ps(vm); vinvm = _mm256_mul_ps(vinvm, _mm256_fnmadd_ps(vm, vinvm, _mm256_set1_ps(2.0f))); const __m256 t = _mm256_fmadd_ps(vm, vnf, vod); const __m256 res = _mm256_fmadd_ps(vq, vinvm, t); _mm256_store_ps(out, res); for (int k = 0; k < 8; ++k) arr[i + k]->R = out[k]; } for (; i < n; ++i) { const Interval1D* p = arr[i]; const float mi = fmaf(GF, (p->x2 - p->x1), p->M * alpha); arr[i]->R = fmaf(1.0f / mi, p->quadratic_term, fmaf(mi, p->N_factor, p->ordinate_factor)); } } __forceinline void RecomputeR_ConstM_AVX2_ND(IntervalND* const* arr, size_t n, float m) { const __m256 vm = _mm256_set1_ps(m); __m256 vinvm = _mm256_rcp_ps(vm); vinvm = _mm256_mul_ps(vinvm, _mm256_fnmadd_ps(vm, vinvm, _mm256_set1_ps(2.0f))); size_t i = 0, limit = n & ~7ull; alignas(32) float q[8], nf[8], od[8], out[8]; for (; i < limit; i += 8) { for (int k = 0; k < 8; ++k) { const IntervalND* p = arr[i + k]; q[k] = p->quadratic_term; nf[k] = p->N_factor; od[k] = p->ordinate_factor; } const __m256 vq = _mm256_load_ps(q), vnf = _mm256_load_ps(nf), vod = _mm256_load_ps(od); const __m256 t = _mm256_fmadd_ps(vm, vnf, vod); const __m256 res = _mm256_fmadd_ps(vq, vinvm, t); _mm256_store_ps(out, res); for (int k = 0; k < 8; ++k) arr[i + k]->R = out[k]; } for (; i < n; ++i) arr[i]->ChangeCharacteristic(m); } __forceinline void RecomputeR_AffineM_AVX2_ND(IntervalND* const* arr, size_t n, float GF, float alpha) { const __m256 vGF = _mm256_set1_ps(GF), va = _mm256_set1_ps(alpha); size_t i = 0, limit = n & ~7ull; alignas(32) float ln[8], Mv[8], q[8], nf[8], od[8], out[8]; for (; i < limit; i += 8) { for (int k = 0; k < 8; ++k) { const IntervalND* p = arr[i + k]; ln[k] = p->x2 - p->x1; Mv[k] = p->M; q[k] = p->quadratic_term; nf[k] = p->N_factor; od[k] = p->ordinate_factor; } const __m256 vln = _mm256_load_ps(ln), vM = _mm256_load_ps(Mv), vq = _mm256_load_ps(q), vnf = _mm256_load_ps(nf), vod = _mm256_load_ps(od); const __m256 vm = _mm256_fmadd_ps(vGF, vln, _mm256_mul_ps(va, vM)); __m256 vinvm = _mm256_rcp_ps(vm); vinvm = _mm256_mul_ps(vinvm, _mm256_fnmadd_ps(vm, vinvm, _mm256_set1_ps(2.0f))); const __m256 t = _mm256_fmadd_ps(vm, vnf, vod); const __m256 res = _mm256_fmadd_ps(vq, vinvm, t); _mm256_store_ps(out, res); for (int k = 0; k < 8; ++k) arr[i + k]->R = out[k]; } for (; i < n; ++i) { const IntervalND* p = arr[i]; const float mi = fmaf(GF, (p->x2 - p->x1), p->M * alpha); arr[i]->R = fmaf(1.0f / mi, p->quadratic_term, fmaf(mi, p->N_factor, p->ordinate_factor)); } } // ---------------- Быстрые степени ---------------- __forceinline float fast_pow_int(float v, int n) { float r; switch (n) { case 3: { float v2 = v * v; r = v2 * v; } break; case 4: { float v2 = v * v; r = v2 * v2; } break; case 5: { float v2 = v * v; r = v2 * v2 * v; } break; case 6: { float v2 = v * v; float v4 = v2 * v2; r = v4 * v2; } break; case 7: { float v2 = v * v; float v4 = v2 * v2; r = v4 * v2 * v; } break; case 8: { float v2 = v * v; float v4 = v2 * v2; r = v4 * v4; } break; case 9: { float v3 = v * v * v; float v6 = v3 * v3; r = v6 * v3; } break; case 10: { float v2 = v * v; float v4 = v2 * v2; float v8 = v4 * v4; r = v8 * v2; } break; case 11: { float v2 = v * v; float v4 = v2 * v2; float v8 = v4 * v4; r = v8 * v2 * v; } break; case 12: { float v3 = v * v * v; float v6 = v3 * v3; r = v6 * v6; } break; case 13: { float v3 = v * v * v; float v6 = v3 * v3; r = v6 * v6 * v; } break; case 14: { float v7 = v * v * v * v * v * v * v; r = v7 * v7; } break; case 15: { float v7 = v * v * v * v * v * v * v; r = v7 * v7 * v; } break; default: { float v2 = v * v; float v4 = v2 * v2; float v8 = v4 * v4; r = v8 * v8; } } return r; } // ---------------- Шаг (без aliasing-трюков) ---------------- __forceinline float Shag(float _m, float x1, float x2, float y1, float y2, float _N, float _r) { const float diff = y2 - y1; const float sign_mult = (diff < 0.0f) ? -1.0f : 1.0f; if (_N == 1) return fmaf(-(1.0f / _m), diff, x1 + x2) * 0.5f; if (_N == 2) return fmaf(sign_mult / (_m * _m), diff * diff * _r, x1 + x2) * 0.5f; const float invmN = 1.0f / fast_pow_int(_m, _N); const float dN = fast_pow_int(fabsf(diff), _N); return fmaf(sign_mult * invmN, dN * _r, x1 + x2) * 0.5f; } // ---------------- Morton utils ---------------- struct MortonCachePerRank { std::vector<int> permCache; std::vector<uint64_t> invMaskCache; uint32_t baseSeed; }; static MortonCachePerRank g_mc; static __forceinline uint64_t gray_encode(uint64_t x) noexcept { return x ^ (x >> 1); } static __forceinline uint64_t gray_decode(uint64_t g) noexcept { g ^= g >> 32; g ^= g >> 16; g ^= g >> 8; g ^= g >> 4; g ^= g >> 2; g ^= g >> 1; return g; } // ---------------- MortonND ---------------- struct MortonND final { int dim, levels; int eff_levels; int extra_levels; int chunks; std::vector<int> chunk_bits; std::vector<uint64_t> chunk_bases; uint64_t scale; std::vector<float> low, high, step, invStep, baseOff; std::vector<int> perm; std::vector<uint64_t> invMask; std::vector<uint64_t> pextMask; std::vector<uint64_t> pextMaskChunks; float invScaleLevel; bool use_gray; static inline uint64_t make_mask(int dim, int Lc, int d) { uint64_t m = 0ull, bitpos = (uint64_t)d; for (int b = 0; b < Lc; ++b) { m |= 1ull << bitpos; bitpos += (uint64_t)dim; } return m; } __forceinline MortonND(int D, int L, const float* lows, const float* highs, const MortonCachePerRank& mc) : dim(D), levels(L), eff_levels((std::max)(1, (int)(63 / (D ? D : 1)))), extra_levels((L > eff_levels) ? (L - eff_levels) : 0), chunks((extra_levels > 0) ? (1 + (extra_levels + eff_levels - 1) / eff_levels) : 1), low(lows, lows + D), high(highs, highs + D), step(D, 0.0f), invStep(D, 0.0f), baseOff(D, 0.0f), perm(mc.permCache.begin(), mc.permCache.begin() + D), invMask(mc.invMaskCache.begin(), mc.invMaskCache.begin() + D), invScaleLevel(1.0f / (float)((uint64_t)1 << L)), use_gray(true) { for (int d = 0; d < dim; ++d) { float rng = high[d] - low[d]; float st = rng * invScaleLevel; step[d] = st; invStep[d] = 1.0f / st; baseOff[d] = fmaf(0.5f, st, low[d]); } chunk_bits.resize(chunks); pextMaskChunks.resize((size_t)chunks * (size_t)dim); chunk_bases.resize(chunks); int remaining = levels; for (int c = 0; c < chunks; ++c) { int Lc = (c == 0) ? (std::min)(eff_levels, remaining) : (std::min)(eff_levels, remaining); chunk_bits[c] = Lc; remaining -= Lc; uint64_t baseC = (uint64_t)1 << (dim * Lc); chunk_bases[c] = baseC; for (int d = 0; d < dim; ++d) pextMaskChunks[(size_t)c * (size_t)dim + (size_t)d] = make_mask(dim, Lc, d); } pextMask.resize(dim); for (int d = 0; d < dim; ++d) pextMask[d] = make_mask(dim, chunk_bits[0], d); scale = (uint64_t)1 << (dim * chunk_bits[0]); } __forceinline float block_diameter(uint64_t i1, uint64_t i2) const noexcept { if (i1 > i2) std::swap(i1, i2); float s2 = 0.0f; for (int d = 0; d < dim; ++d) { const int pd = perm[d]; const uint64_t varying = (i1 ^ i2) & pextMask[d]; const int nfree_hi = _mm_popcnt_u64(varying); const int nfree_total = nfree_hi + (levels - chunk_bits[0]); const float range = step[pd] * (ldexpf(1.0f, nfree_total) - 1.0f); s2 = fmaf(range, range, s2); } return sqrtf(s2); } __forceinline void map01ToPoint(float t, float* __restrict out) const noexcept { if (t <= 0.0f) t = 0.0f; else if (t >= 1.0f) t = 0x1.fffffep-1f; uint64_t accBits[32] = { 0ull }; int accShifted[32] = { 0 }; double u = (double)t; for (int c = 0; c < chunks; ++c) { const int Lc = chunk_bits[c]; const uint64_t baseC = chunk_bases[c]; double scaled = u * (double)baseC; uint64_t idxc = (scaled >= (double)baseC) ? (baseC - 1ull) : (uint64_t)scaled; u = scaled - (double)idxc; if (use_gray) idxc = gray_encode(idxc); int shift_from_top = 0; for (int k = 0; k <= c; ++k) shift_from_top += chunk_bits[k]; int inv_shift = levels - shift_from_top; for (int d = 0; d < dim; ++d) { int pd = perm[d]; uint64_t mask = pextMaskChunks[(size_t)c * (size_t)dim + (size_t)d]; uint64_t bits = _pext_u64(idxc, mask); if (inv_shift >= 0) { uint64_t invMaskSegment = 0ull; if (chunk_bits[c] < 63) { uint64_t take = ((uint64_t)1 << chunk_bits[c]) - 1ull; invMaskSegment = (invMask[pd] >> inv_shift) & take; } bits ^= invMaskSegment; } accBits[pd] = (accBits[pd] << Lc) | bits; accShifted[pd] += Lc; } } for (int d = 0; d < dim; ++d) out[d] = fmaf(step[d], (float)accBits[d], baseOff[d]); } __forceinline float pointToT(const float* __restrict q) const noexcept { const int bitsFull = levels; const int bitsCoarse = chunk_bits[0]; uint64_t idx0 = 0ull; for (int d = 0; d < dim; ++d) { int pd = perm[d]; float v = (q[pd] - baseOff[pd]) * invStep[pd]; int64_t cell = (int64_t)_mm_cvt_ss2si(_mm_round_ss(_mm_setzero_ps(), _mm_set_ss(v), _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC)); if (cell < 0) cell = 0; int64_t maxv = ((int64_t)1 << bitsFull) - 1; if (cell > maxv) cell = maxv; uint64_t b = (uint64_t)cell >> (bitsFull - bitsCoarse); uint64_t invMask0 = 0ull; if (bitsCoarse < 63) { uint64_t take = ((uint64_t)1 << bitsCoarse) - 1ull; invMask0 = (invMask[pd] >> (bitsFull - bitsCoarse)) & take; } b ^= invMask0; idx0 |= _pdep_u64(b, pextMask[d]); } if (use_gray) idx0 = gray_decode(idx0); return ((float)idx0 + 0.5f) / (float)scale; } }; __forceinline void IntervalND::compute_span_level(const MortonND& map) noexcept { span_level = 0; for (int d = 0; d < map.dim; ++d) { uint64_t varying = (i1 ^ i2) & map.pextMask[d]; span_level += _mm_popcnt_u64(varying); } span_level += (map.levels - map.chunk_bits[0]) * map.dim; span_level = (std::min)(span_level, 11); } // ---------------- Стоимость манипулятора ---------------- struct ManipCost final { int n; bool variableLen; float targetX, targetY; float minTheta; float archBiasW, archBiasK; float sharpW; ManipCost(int _n, bool _variableLen, float _targetX, float _targetY, float _minTheta) : n(_n), variableLen(_variableLen), targetX(_targetX), targetY(_targetY), minTheta(_minTheta), archBiasW(0.02f), archBiasK(3.0f), sharpW(0.05f) {} __forceinline float operator()(const float* __restrict q, float& out_x, float& out_y) const noexcept { const float* th = q; const float* L = variableLen ? (q + n) : nullptr; __declspec(align(64)) float phi[32], s_arr[32], c_arr[32]; float x = 0.0f, y = 0.0f, phi_acc = 0.0f, penC = 0.0f, archPen = 0.0f; for (int i = 0; i < n; ++i) { phi_acc += th[i]; phi[i] = phi_acc; } FABE13_SINCOS(phi, s_arr, c_arr, n); if (variableLen) { for (int i = 0; i < n; ++i) { float Li = L[i]; x = fmaf(Li, c_arr[i], x); y = fmaf(Li, s_arr[i], y); } } else { for (int i = 0; i < n; ++i) { x = fmaf(1.0f, c_arr[i], x); y = fmaf(1.0f, s_arr[i], y); } } for (int i = 0; i < n; ++i) { float ai = fabsf(th[i]); float v = minTheta - ai; if (v > 0.0f) { float scale = 2.0f / (minTheta + 1e-6f); penC += sharpW * (exp2f(scale * v) - 1.0f); } float t = -th[i] * archBiasK; float sp = (t > 10.f) ? t : log1pf(expf(t)); archPen += archBiasW * sp; } float dx = x - targetX, dy = y - targetY; float dist = sqrtf(fmaf(dx, dx, dy * dy)); out_x = x; out_y = y; return dist + penC + archPen; } }; // ---------------- Hit-test для 2D карты ---------------- __forceinline void HitTest2D_analytic(float x_param, float& out_x1, float& out_x2) { const float a = gActiveMap.a, inv_lenx = gActiveMap.inv_lenx; const uint32_t scale = gActiveMap.scale, scale_minus_1 = scale - 1u; const float lenx = gActiveMap.lenx, leny = gActiveMap.leny, c = gActiveMap.c; const uint8_t start = gActiveMap.start; const int levels = gActiveMap.levels; float norm = (x_param - a) * inv_lenx; norm = fminf(fmaxf(norm, 0.0f), 0x1.fffffep-1f); uint32_t idx = (uint32_t)(norm * (float)scale); idx = idx > scale_minus_1 ? scale_minus_1 : idx; float sx = lenx, sy = leny; float x1 = a, x2 = c; uint8_t type = start; int l = levels - 1; while (l >= 0) { const uint32_t q = (idx >> (l * 2)) & 3u; const Step s = g_step_tbl[type][q]; type = s.next; sx *= 0.5f; sy *= 0.5f; x1 += s.dx ? sx : 0.0f; x2 += s.dy ? sy : 0.0f; --l; } out_x1 = x1 + sx * 0.5f; out_x2 = x2 + sy * 0.5f; } __forceinline float FindX2D_analytic(float px, float py) { const float a = gActiveMap.a, b = gActiveMap.b, c = gActiveMap.c, d = gActiveMap.d; const float lenx = gActiveMap.lenx, leny = gActiveMap.leny; const uint32_t scale = gActiveMap.scale; const uint8_t start = gActiveMap.start; const int levels = gActiveMap.levels; const float clamped_px = fminf(fmaxf(px, a), b), clamped_py = fminf(fmaxf(py, c), d); float sx = lenx, sy = leny; float x0 = a, y0 = c; uint32_t idx = 0u; uint8_t type = start; int l = 0; while (l < levels) { sx *= 0.5f; sy *= 0.5f; const float mx = x0 + sx, my = y0 + sy; const uint32_t tr = (uint32_t)((clamped_px > mx) & (clamped_py > my)); const uint32_t tl = (uint32_t)((clamped_px < mx) & (clamped_py > my)); const uint32_t dl = (uint32_t)((clamped_px < mx) & (clamped_py < my)); const uint32_t none = (uint32_t)(1u ^ (tr | tl | dl)); const uint32_t dd = (tr << 1) | tr | tl | (none << 1); const InvStep inv = g_inv_tbl[type][dd]; type = inv.next; idx = (idx << 2) | inv.q; const uint32_t dx = dd >> 1, dy = dd & 1u; x0 += dx ? sx : 0.0f; y0 += dy ? sy : 0.0f; ++l; } const float scale_recip = 1.0f / (float)scale; return fmaf((float)idx * scale_recip, lenx, a); } // ---------------- MPI-структуры ---------------- __declspec(align(16)) struct MultiCrossMsg final { float intervals[15]; uint8_t count; template<typename Archive> __forceinline void serialize(Archive& ar, const unsigned int) noexcept { ar& intervals& count; } }; __declspec(align(16)) struct BestSolutionMsg final { float bestF, bestX, bestY, bestQ[32]; uint8_t dim; template<typename Archive> __forceinline void serialize(Archive& ar, const unsigned int) noexcept { ar& bestF& bestX& bestY& bestQ& dim; } }; __declspec(align(16)) struct CtrlMsgND final { uint8_t kind; CrossMsg xchg; MultiCrossMsg multiXchg; BestSolutionMsg bestSol; template<typename Archive> __forceinline void serialize(Archive& ar, const unsigned int) noexcept { ar& kind; if (kind == 1) ar& xchg; else if (kind == 2) ar& multiXchg; else if (kind == 3) ar& bestSol; } }; // ---------------- Генерация сидов (исправлено, без переполнений) ---------------- static __forceinline int generate_lhs_seeds_lite(const MortonND& map, const int dim, float* __restrict S, int stride, uint32_t seed, int max_out) { if (dim <= 0 || max_out <= 0) return 0; // Кол-во LHS точек не больше max_out const int lhs_count = max_out; std::vector<uint32_t> A(dim), B(dim); uint32_t st = seed; auto next = [&]() -> uint32_t { st ^= st << 13; st ^= st >> 17; st ^= st << 5; return st; }; // Для каждой размерности берём линейную перестановку s -> (A*s + B) mod lhs_count (A нечётное → биекция) for (int d = 0; d < dim; ++d) { uint32_t a = (next() | 1u) % (uint32_t)lhs_count; if (a == 0) a = 1; uint32_t b = next() % (uint32_t)lhs_count; A[d] = a; B[d] = b; } for (int sidx = 0; sidx < lhs_count; ++sidx) { for (int d = 0; d < dim; ++d) { float u = (next() & 0xFFFFFF) * 5.9604645e-8f; uint32_t stratum = (A[d] * (uint32_t)sidx + B[d]) % (uint32_t)lhs_count; float pos = ((float)stratum + u) / (float)lhs_count; int pd = map.perm[d]; float lo = map.low[pd], hi = map.high[pd]; S[sidx * stride + d] = fmaf(pos, (hi - lo), lo); } } return lhs_count; } static __forceinline int generate_heuristic_seeds(const ManipCost& cost, const MortonND& map, int dim, float* __restrict S, int stride, uint32_t seed, int max_total) { int total = 0; const int n = cost.n; const bool VL = cost.variableLen; auto add_seed = [&](const std::function<void(float*)>& fill) { if (total >= max_total) return false; float* s = S + total * stride; for (int i = 0; i < stride; ++i) s[i] = 0.0f; fill(s); total++; return true; }; // 1) В сторону цели add_seed([&](float* s0) { float phi = atan2f(cost.targetY, cost.targetX); float rho = sqrtf(fmaf(cost.targetX, cost.targetX, cost.targetY * cost.targetY)); float len = fminf(fmaxf(rho / (float)n, 0.5f), 2.0f); for (int i = 0; i < n; ++i) s0[i] = phi / (float)n; if (VL) for (int i = 0; i < n; ++i) s0[n + i] = len; }); // 2) Попеременно add_seed([&](float* s1) { float phi = atan2f(cost.targetY, cost.targetX); for (int i = 0; i < n; ++i) s1[i] = 0.5f * phi * ((i & 1) ? -1.0f : 1.0f); if (VL) for (int i = 0; i < n; ++i) s1[n + i] = 1.0f * (0.8f + 0.4f * (float)i / (float)n); }); // 3) Затухающая линейка add_seed([&](float* s2) { const float inv = (n > 1) ? 1.0f / (float)(n - 1) : 0.0f; float phi = atan2f(cost.targetY, cost.targetX); for (int i = 0; i < n; ++i) { float pr = (float)i * inv; s2[i] = phi * (1.0f - 0.3f * pr); } if (VL) { for (int i = 0; i < n; ++i) { float si = sinf(1.5f * (float)i); s2[n + i] = (1.0f + 0.2f * si); } } }); // 4) LHS-лайт (забиваем остаток ёмкости) if (total < max_total) { int lhs_cap = max_total - total; int got = generate_lhs_seeds_lite(map, dim, S + total * stride, stride, seed, lhs_cap); total += got; } return total; } // ---------------- Ветвитель ND ---------------- static __forceinline void agp_run_branch_mpi( const MortonND& map, const ManipCost& cost, int maxIter, float r, bool adaptive, float eps, unsigned seed, std::vector<IntervalND*>& H, std::vector<float>& bestQ, float& bestF, float& bestX, float& bestY, float M_prior = 1e-3f) { const int n = cost.n; const int dim = n + (cost.variableLen ? n : 0); alignas(32) float M_by_span[12]; for (int i = 0; i < 12; ++i) M_by_span[i] = M_prior; float Mmax = M_prior; alignas(64) float q_local[32], phi[32], s_arr[32], c_arr[32], sum_s[32], sum_c[32], q_try[32]; bestQ.reserve(dim); float x = 0.0f, y = 0.0f; int no_improve = 0; auto t_to_idx = [&](float t) -> uint64_t { float tt = (t <= 0.0f) ? 0.0f : (t >= 1.0f ? std::nextafterf(1.0f, 0.0f) : t); uint64_t idx = (uint64_t)((double)tt * (double)map.scale); if (idx >= map.scale) idx = map.scale - 1ull; return idx; }; auto update_pockets_and_Mmax = [&](IntervalND* I) { const int k = I->span_level; if (I->M > M_by_span[k]) M_by_span[k] = I->M; if (M_by_span[k] > Mmax) Mmax = M_by_span[k]; }; float a = 0.0f, b = 1.0f; auto evalAt = [&](float t) -> float { map.map01ToPoint(t, q_local); float f = cost(q_local, x, y); if (f < bestF * 1.25f) { float acc = 0.0f; for (int i = 0; i < n; ++i) { acc += q_local[i]; phi[i] = acc; } FABE13_SINCOS(phi, s_arr, c_arr, n); float as = 0.0f, ac = 0.0f; for (int k = n - 1; k >= 0; --k) { const float Lk = cost.variableLen ? q_local[n + k] : 1.0f; as += Lk * s_arr[k]; ac += Lk * c_arr[k]; sum_s[k] = as; sum_c[k] = ac; } const float dx = x - cost.targetX, dy = y - cost.targetY; float dist = sqrtf(fmaf(dx, dx, dy * dy)) + 1e-8f; float eta = 0.125f; for (int stepI = 0; stepI < 3; ++stepI) { for (int i = 0; i < n; ++i) { float gpen = 0.0f; { float ai = fabsf(q_local[i]); float v = cost.minTheta - ai; if (v > 0.0f) { float scale = 2.0f / (cost.minTheta + 1e-6f); float e = exp2f(scale * v); float dpen_dtheta = cost.sharpW * (e * 0.69314718055994530941f * scale) * (-copysignf(1.0f, q_local[i])); gpen += dpen_dtheta; } } { float tsg = -q_local[i] * cost.archBiasK; float sig = 1.0f / (1.0f + expf(-tsg)); gpen += -(cost.archBiasW * cost.archBiasK) * sig; } float g = (dx * (-sum_s[i]) + dy * (sum_c[i])) / dist + gpen; q_try[i] = q_local[i] - eta * g; const float deg2rad = 3.14159265358979323846f / 180.0f; const float lo0 = -60.0f * deg2rad, hi0 = 150.0f * deg2rad; const float lo = -150.0f * deg2rad, hi = 150.0f * deg2rad; const float Lb = (i == 0) ? lo0 : lo; const float Hb = (i == 0) ? hi0 : hi; if (q_try[i] < Lb) q_try[i] = Lb; else if (q_try[i] > Hb) q_try[i] = Hb; } if (cost.variableLen) { for (int i = 0; i < n; ++i) { float g = (dx * c_arr[i] + dy * s_arr[i]) / dist; float v = q_local[n + i] - eta * g; if (v < 0.5f) v = 0.5f; else if (v > 2.0f) v = 2.0f; q_try[n + i] = v; } } float x2, y2; float f2 = cost(q_try, x2, y2); if (f2 < f) { memcpy(q_local, q_try, dim * sizeof(float)); f = f2; x = x2; y = y2; break; } eta *= 0.5f; } const int last = n - 1; const float deg2rad = 3.14159265358979323846f / 180.0f; const float lo = (last == 0) ? (-60.0f * deg2rad) : (-150.0f * deg2rad); const float hi = 150.0f * deg2rad; float bestLocF = f; float saved = q_local[last]; for (float delta = 0.05f; delta >= 0.00625f; delta *= 0.5f) { for (int sgn = -1; sgn <= 1; sgn += 2) { float cand = saved + sgn * delta; if (cand < lo) cand = lo; else if (cand > hi) cand = hi; float backup = q_local[last]; q_local[last] = cand; float x2, y2; float f2 = cost(q_local, x2, y2); if (f2 < bestLocF) { bestLocF = f2; x = x2; y = y2; saved = cand; } q_local[last] = backup; } } if (bestLocF < f) { q_local[last] = saved; f = bestLocF; } } if (f < bestF) { bestF = f; bestQ.assign(q_local, q_local + dim); bestX = x; bestY = y; no_improve = 0; } else { ++no_improve; } return f; }; float f_a = evalAt(a), f_b = evalAt(b); const int K = (std::min)((std::max)(2 * dim, 8), 128); H.reserve((size_t)maxIter + K + 16); const int rank = g_world->rank(); const int world = g_world->size(); const int MAX_SEEDS = 128; std::vector<float> seeds((size_t)MAX_SEEDS * 32u, 0.0f); const int seedCnt = generate_heuristic_seeds(cost, map, dim, seeds.data(), 32, seed + rank * 7919u, MAX_SEEDS); for (int i = 0; i < seedCnt; ++i) { const float* s = seeds.data() + i * 32; float t_seed = map.pointToT(s); const int denom = (std::max)(1, seedCnt - 4); float interval_size = (i < 3) ? (0.0004f * (float)dim) : (0.00031f * (float)dim) * exp2f((1.0f / (float)denom) * log2f(0.00025f / 0.00031f) * (float)(i - 3)); float t1 = fmaxf(a, t_seed - interval_size), t2 = fminf(b, t_seed + interval_size); if (t2 <= t1) continue; alignas(64) float q1[32], q2[32]; float x1, y1, x2, y2; map.map01ToPoint(t1, q1); float f1 = cost(q1, x1, y1); map.map01ToPoint(t2, q2); float f2 = cost(q2, x2, y2); IntervalND* I = new IntervalND(t1, t2, f1, f2); I->i1 = t_to_idx(t1); I->i2 = t_to_idx(t2); I->diam = map.block_diameter(I->i1, I->i2); I->compute_span_level(map); I->set_metric(I->diam); update_pockets_and_Mmax(I); I->ChangeCharacteristic(r * Mmax); if (i < 3) I->R *= fmaf(0.01f, (float)dim, 0.85f); else { float start_mult = 0.214f * (float)dim; float end_mult = 0.174f * (float)dim; float mult = start_mult * exp2f((1.0f / (float)denom) * log2f(end_mult / start_mult) * (float)(i - 3)); I->R *= mult; } H.emplace_back(I); std::push_heap(H.begin(), H.end(), ComparePtrND); if (f1 < bestF) { bestF = f1; bestQ.assign(q1, q1 + dim); bestX = x1; bestY = y1; } if (f2 < bestF) { bestF = f2; bestQ.assign(q2, q2 + dim); bestX = x2; bestY = y2; } } float prev_t = a, prev_f = f_a; for (int k = 1; k <= K; ++k) { float t = a + (b - a) * ((float)k / (K + 1)) + (float)rank / (float)(world * (K + 1)); float f = evalAt(t); IntervalND* I = new IntervalND(prev_t, t, prev_f, f); I->i1 = t_to_idx(prev_t); I->i2 = t_to_idx(t); I->diam = map.block_diameter(I->i1, I->i2); I->compute_span_level(map); I->set_metric(I->diam); update_pockets_and_Mmax(I); I->ChangeCharacteristic(r * Mmax); H.emplace_back(I); std::push_heap(H.begin(), H.end(), ComparePtrND); prev_t = t; prev_f = f; } IntervalND* tail = new IntervalND(prev_t, b, prev_f, f_b); tail->i1 = t_to_idx(prev_t); tail->i2 = t_to_idx(b); tail->diam = map.block_diameter(tail->i1, tail->i2); tail->compute_span_level(map); tail->set_metric(tail->diam); update_pockets_and_Mmax(tail); tail->ChangeCharacteristic(r * Mmax); H.emplace_back(tail); std::push_heap(H.begin(), H.end(), ComparePtrND); float dmax = b - a, initial_len = dmax, thr03 = 0.3f * initial_len, inv_thr03 = 1.0f / thr03; int it = 0; auto kickEveryByDim = [&](int dim) -> int { float z = 120.0f * exp2f(-0.05f * (float)dim); if (z < 60.0f) z = 60.0f; return (int)z; }; auto noImproveThrByDim = [&](int dim) -> int { float z = 80.0f * exp2f(-0.08f * (float)dim); if (z < 30.0f) z = 30.0f; return (int)z; }; while (it < maxIter) { if ((it % kickEveryByDim(dim)) == 0 && no_improve > noImproveThrByDim(dim) && !bestQ.empty()) { float t_best = map.pointToT(bestQ.data()); for (int i = 0; i < 2; ++i) { float off = (i == 0) ? 0.01f : -0.01f; float t_seed = fminf(b, fmaxf(a, t_best + off)); float f_seed = evalAt(t_seed); IntervalND* J = new IntervalND(t_seed - 0.005f, t_seed + 0.005f, f_seed, f_seed); J->i1 = t_to_idx(t_seed - 0.005f); J->i2 = t_to_idx(t_seed + 0.005f); J->diam = map.block_diameter(J->i1, J->i2); J->compute_span_level(map); J->set_metric(J->diam); update_pockets_and_Mmax(J); J->ChangeCharacteristic(r * Mmax); J->R *= 0.9f; H.emplace_back(J); std::push_heap(H.begin(), H.end(), ComparePtrND); } no_improve = 0; } const float p = fmaf(-1.0f / initial_len, dmax, 1.0f); bool stagnation = (no_improve > 100) && (it > 270); float A = 200.0f + 64.0f * exp2f(-0.06f * (float)dim); float B = 210.0f + 67.0f * exp2f(-0.06f * (float)dim); const int T = (int)fmaf(-expm1f(p), A, B); float r_eff = fmaxf(1.0f, r * (0.7f + 0.3f * (1.0f - p))); std::pop_heap(H.begin(), H.end(), ComparePtrND); IntervalND* cur = H.back(); H.pop_back(); const float x1 = cur->x1, x2 = cur->x2, y1 = cur->y1, y2 = cur->y2; float m = r_eff * Mmax; float tNew = Shag(m, x1, x2, y1, y2, (float)dim, r); tNew = fminf(fmaxf(tNew, a), b); float fNew = evalAt(tNew); IntervalND* L = new IntervalND(x1, tNew, y1, fNew); IntervalND* Rv = new IntervalND(tNew, x2, fNew, y2); L->i1 = t_to_idx(x1); L->i2 = t_to_idx(tNew); Rv->i1 = t_to_idx(tNew); Rv->i2 = t_to_idx(x2); L->diam = map.block_diameter(L->i1, L->i2); Rv->diam = map.block_diameter(Rv->i1, Rv->i2); L->compute_span_level(map); Rv->compute_span_level(map); L->set_metric(L->diam); Rv->set_metric(Rv->diam); float Mloc = (std::max)(L->M, Rv->M); update_pockets_and_Mmax(L); update_pockets_and_Mmax(Rv); const float prevMmax = Mmax; if (Mloc > Mmax) Mmax = Mloc; m = r_eff * Mmax; if (adaptive) { float len1 = tNew - x1, len2 = x2 - tNew; if (len1 + len2 == dmax) { dmax = (std::max)(len1, len2); for (auto pI : H) { float Ls = pI->x2 - pI->x1; if (Ls > dmax) dmax = Ls; } } if ((thr03 > dmax && !(it % 3)) || (10.0f * dmax < initial_len)) { const float progress = fmaf(-dmax, inv_thr03, 1.0f); const float alpha = progress * progress; const float beta = fmaf(-alpha, 1.0f, 2.0f); const float MULT = (1.0f / dmax) * Mmax; const float global_coeff = fmaf(MULT, r_eff, -MULT); const float GF = fmaf(beta, global_coeff, 0.0f); L->ChangeCharacteristic(fmaf(GF, len1, L->M * alpha)); Rv->ChangeCharacteristic(fmaf(GF, len2, Rv->M * alpha)); size_t sz = H.size(); RecomputeR_AffineM_AVX2_ND(H.data(), sz, GF, alpha); std::make_heap(H.begin(), H.end(), ComparePtrND); } else { if (Mloc > prevMmax) { L->ChangeCharacteristic(m); Rv->ChangeCharacteristic(m); if (Mloc > 1.15f * prevMmax) { size_t sz = H.size(); RecomputeR_ConstM_AVX2_ND(H.data(), sz, m); std::make_heap(H.begin(), H.end(), ComparePtrND); } } else { L->ChangeCharacteristic(m); Rv->ChangeCharacteristic(m); } } } else { if (Mloc > prevMmax) { L->ChangeCharacteristic(m); Rv->ChangeCharacteristic(m); if (Mloc > 1.15f * prevMmax) { size_t sz = H.size(); RecomputeR_ConstM_AVX2_ND(H.data(), sz, m); std::make_heap(H.begin(), H.end(), ComparePtrND); } } else { L->ChangeCharacteristic(m); Rv->ChangeCharacteristic(m); } } H.push_back(L); std::push_heap(H.begin(), H.end(), ComparePtrND); H.push_back(Rv); std::push_heap(H.begin(), H.end(), ComparePtrND); if (!H.empty()) { _mm_prefetch((const char*)H[0], _MM_HINT_T0); if (H.size() > 1) _mm_prefetch((const char*)H[1], _MM_HINT_T0); } IntervalND* top = H.front(); float interval_len = top->x2 - top->x1; bool want_term = (exp2f(log2f(interval_len) / (float)dim) < eps) || (it == maxIter - 1); if (!(it % T) || want_term) { CtrlMsgND out; out.kind = want_term ? 0 : 2; if (!want_term) { uint8_t cnt = (uint8_t)((H.size() >= 3) ? 3 : H.size()); out.multiXchg.count = cnt; float* dest = out.multiXchg.intervals; IntervalND* t1 = H[0]; IntervalND* t2 = (H.size() > 1 ? H[1] : H[0]); IntervalND* t3 = (H.size() > 2 ? H[2] : H[H.size() - 1]); IntervalND* tops[3] = { t1, t2, t3 }; for (uint8_t i2 = 0; i2 < cnt; ++i2) { IntervalND* Tt = tops[i2]; dest[0] = Tt->x1; dest[1] = 0.0f; dest[2] = Tt->x2; dest[3] = 0.0f; dest[4] = Tt->R; dest += 5; } } for (int i2 = 0; i2 < world; ++i2) if (i2 != rank) g_world->isend(i2, 0, out); if (want_term) break; } if (!(it % 500) && !bestQ.empty()) { CtrlMsgND out; out.kind = 3; out.bestSol.bestF = bestF; out.bestSol.bestX = bestX; out.bestSol.bestY = bestY; out.bestSol.dim = (uint8_t)bestQ.size(); memcpy(out.bestSol.bestQ, bestQ.data(), bestQ.size() * sizeof(float)); for (int i2 = 0; i2 < world; ++i2) if (i2 != rank) g_world->isend(i2, 0, out); } while (g_world->iprobe(boost::mpi::any_source, 0)) { CtrlMsgND in; g_world->recv(boost::mpi::any_source, 0, in); if (in.kind == 0) { if (!rank) break; else return; } else if (in.kind == 1) { float sx = in.xchg.s_x1, ex = in.xchg.e_x1; if (sx < 0.0f) sx = 0.0f; if (ex > 1.0f) ex = 1.0f; if (ex > sx) { alignas(64) float tmp[32]; float tx, ty; map.map01ToPoint(sx, tmp); float y1i = cost(tmp, tx, ty); map.map01ToPoint(ex, tmp); float y2i = cost(tmp, tx, ty); IntervalND* inj = new IntervalND(sx, ex, y1i, y2i); inj->i1 = t_to_idx(sx); inj->i2 = t_to_idx(ex); inj->diam = map.block_diameter(inj->i1, inj->i2); inj->compute_span_level(map); inj->set_metric(inj->diam); update_pockets_and_Mmax(inj); inj->ChangeCharacteristic(r * Mmax); if (!H.empty()) { _mm_prefetch((const char*)H[0], _MM_HINT_T0); if (H.size() > 1) _mm_prefetch((const char*)H[1], _MM_HINT_T0); } IntervalND* topH = H.front(); if (inj->R > 1.15f * topH->R) { float p2 = fmaf(-1.0f / initial_len, dmax, 1.0f); float k = (no_improve > 100 && it > 270) ? fmaf(0.5819767068693265f, expm1f(p2), 0.3f) : fmaf(0.3491860241215959f, expm1f(p2), 0.6f); inj->R = in.xchg.Rtop * k; H.emplace_back(inj); std::push_heap(H.begin(), H.end(), ComparePtrND); } } } else if (in.kind == 2) { const MultiCrossMsg& mX = in.multiXchg; for (uint8_t ii = 0; ii < mX.count; ++ii) { const float* d = &mX.intervals[ii * 5]; float sx = d[0], ex = d[2]; if (sx < 0.0f) sx = 0.0f; if (ex > 1.0f) ex = 1.0f; if (ex > sx) { alignas(64) float tmp[32]; float tx, ty; map.map01ToPoint(sx, tmp); float y1i = cost(tmp, tx, ty); map.map01ToPoint(ex, tmp); float y2i = cost(tmp, tx, ty); IntervalND* inj = new IntervalND(sx, ex, y1i, y2i); inj->i1 = t_to_idx(sx); inj->i2 = t_to_idx(ex); inj->diam = map.block_diameter(inj->i1, inj->i2); inj->compute_span_level(map); inj->set_metric(inj->diam); update_pockets_and_Mmax(inj); inj->ChangeCharacteristic(r * Mmax); if (!H.empty()) { _mm_prefetch((const char*)H[0], _MM_HINT_T0); if (H.size() > 1) _mm_prefetch((const char*)H[1], _MM_HINT_T0); } IntervalND* topH = H.front(); if (inj->R > 1.15f * topH->R) { float p2 = fmaf(-1.0f / initial_len, dmax, 1.0f); float k = (no_improve > 100 && it > 270) ? fmaf(0.5819767068693265f, expm1f(p2), 0.3f) : fmaf(0.3491860241215959f, expm1f(p2), 0.6f); inj->R = d[4] * k; H.emplace_back(inj); std::push_heap(H.begin(), H.end(), ComparePtrND); } } } } else if (in.kind == 3) { const BestSolutionMsg& bm = in.bestSol; if (bm.bestF < bestF * 1.15f) { alignas(64) float tmp_q[32]; memcpy(tmp_q, bm.bestQ, bm.dim * sizeof(float)); float t_best = map.pointToT(tmp_q); float t1 = fmaxf(a, t_best - 0.001f), t2 = fminf(b, t_best + 0.001f); if (t2 > t1) { alignas(64) float tq1[32], tq2[32]; float xx1, yy1, xx2, yy2; map.map01ToPoint(t1, tq1); float f1 = cost(tq1, xx1, yy1); map.map01ToPoint(t2, tq2); float f2 = cost(tq2, xx2, yy2); IntervalND* I = new IntervalND(t1, t2, f1, f2); I->i1 = t_to_idx(t1); I->i2 = t_to_idx(t2); I->diam = map.block_diameter(I->i1, I->i2); I->compute_span_level(map); I->set_metric(I->diam); update_pockets_and_Mmax(I); I->ChangeCharacteristic(r * Mmax); I->R *= 0.90f; H.emplace_back(I); std::push_heap(H.begin(), H.end(), ComparePtrND); } if (bm.bestF < bestF) { bestF = bm.bestF; bestX = bm.bestX; bestY = bm.bestY; bestQ.assign(bm.bestQ, bm.bestQ + bm.dim); } } } } ++it; } } struct BestPacket { float bestF; int dim; float bestX; float bestY; template<typename Archive> void serialize(Archive& ar, const unsigned int) { ar& bestF& dim& bestX& bestY; } }; // ---------------- Экспортируемые функции DLL ---------------- extern "C" __declspec(dllexport) __declspec(noalias) void AGP_Manip2D(int nSegments, bool variableLengths, float minTheta, float targetX, float targetY, int peanoLevels, int maxIterPerBranch, float r, bool adaptiveMode, float epsilon, unsigned int seed, float** out_bestQ, size_t* out_bestQLen, float* out_bestX, float* out_bestY, float* out_bestF) { // Сбрасываем slab перед началом — предотвращает накопление и выход за границы { Slab* slab = tls.local(); slab->current = slab->base; } const int MAX_DIM = 32; const int dim = nSegments + (variableLengths ? nSegments : 0); // Перестановка/инверсии g_mc.permCache.resize(dim); for (int i = 0; i < dim; ++i) g_mc.permCache[i] = i; uint32_t sseed = g_mc.baseSeed; for (int i = dim - 1; i > 0; --i) { sseed ^= sseed << 13; sseed ^= sseed >> 17; sseed ^= sseed << 5; uint32_t j = sseed % (uint32_t)(i + 1); std::swap(g_mc.permCache[i], g_mc.permCache[j]); } g_mc.invMaskCache.resize(dim); for (int k = 0; k < dim; ++k) { sseed ^= sseed << 13; sseed ^= sseed >> 17; sseed ^= sseed << 5; g_mc.invMaskCache[k] = (uint64_t)sseed; } const float deg2rad = 3.14159265358979323846f / 180.0f; const float theta0Min = -60.0f * deg2rad, theta0Max = 150.0f * deg2rad; const float thetaMin = -150.0f * deg2rad, thetaMax = 150.0f * deg2rad; const float lenMin = 0.5f, lenMax = 2.0f; std::vector<float> low; low.reserve(dim); std::vector<float> high; high.reserve(dim); for (int i = 0; i < nSegments; ++i) { low.push_back(i == 0 ? theta0Min : thetaMin); high.push_back(i == 0 ? theta0Max : thetaMax); } if (variableLengths) { for (int i = 0; i < nSegments; ++i) { low.push_back(lenMin); high.push_back(lenMax); } } ManipCost cost(nSegments, variableLengths, targetX, targetY, minTheta); const int rank = g_world->rank(), world = g_world->size(); std::vector<float> bestQ; float bestF = FLT_MAX, bx = 0.0f, by = 0.0f; // Если размерность превышает возможности протокола (пакет bestQ[32]), работаем в "пустом" режиме, // но корректно синхронизируем MPI (воркеры всё равно посылают пакеты). bool over_dim = (dim > MAX_DIM); if (!over_dim) { const int levels0 = (std::min)(peanoLevels, 8); const int maxIter0 = (int)(maxIterPerBranch * 0.2f); MortonND map0(dim, levels0, low.data(), high.data(), g_mc); std::vector<IntervalND*> H_coarse; std::vector<float> bestQ_coarse; float bestF_coarse = FLT_MAX, bx_coarse = 0.0f, by_coarse = 0.0f; float M_prior = (2.0f * nSegments) * (1.0f / (float)(1u << levels0)); if (variableLengths) M_prior += 1.41421356237f * (1.0f / (float)(1u << levels0)); agp_run_branch_mpi(map0, cost, maxIter0, r, adaptiveMode, epsilon, seed, H_coarse, bestQ_coarse, bestF_coarse, bx_coarse, by_coarse, M_prior); if (bestF_coarse < bestF) { bestF = bestF_coarse; bestQ = bestQ_coarse; bx = bx_coarse; by = by_coarse; } if (levels0 < peanoLevels) { MortonND map1(dim, peanoLevels, low.data(), high.data(), g_mc); std::vector<IntervalND*> H_fine; std::vector<float> bestQ_fine = bestQ; float bestF_fine = bestF, bx_fine = bx, by_fine = by; float M_prior_fine = (2.0f * nSegments) * (1.0f / (float)(1u << peanoLevels)); if (variableLengths) M_prior_fine += 1.41421356237f * (1.0f / (float)(1u << peanoLevels)); if (!H_coarse.empty()) { std::sort(H_coarse.begin(), H_coarse.end(), [](const IntervalND* a, const IntervalND* b) { return a->R < b->R; }); const size_t topCount = (size_t)(H_coarse.size() * 0.3f); auto t_to_idx_fine = [&](float t) -> uint64_t { float tt = (t <= 0.0f) ? 0.0f : (t >= 1.0f ? std::nextafterf(1.0f, 0.0f) : t); uint64_t idx = (uint64_t)((double)tt * (double)map1.scale); if (idx >= map1.scale) idx = map1.scale - 1ull; return idx; }; for (size_t i = 0; i < topCount && i < H_coarse.size(); ++i) { const IntervalND* C = H_coarse[i]; alignas(64) float q1[32], q2[32]; float x1, y1, x2, y2; map1.map01ToPoint(C->x1, q1); float f1 = cost(q1, x1, y1); map1.map01ToPoint(C->x2, q2); float f2 = cost(q2, x2, y2); IntervalND* I = new IntervalND(C->x1, C->x2, f1, f2); I->i1 = t_to_idx_fine(C->x1); I->i2 = t_to_idx_fine(C->x2); I->diam = map1.block_diameter(I->i1, I->i2); I->set_metric(I->diam); H_fine.push_back(I); if (f1 < bestF_fine) { bestF_fine = f1; bestQ_fine.assign(q1, q1 + dim); bx_fine = x1; by_fine = y1; } if (f2 < bestF_fine) { bestF_fine = f2; bestQ_fine.assign(q2, q2 + dim); bx_fine = x2; by_fine = y2; } } std::make_heap(H_fine.begin(), H_fine.end(), ComparePtrND); } agp_run_branch_mpi(map1, cost, maxIterPerBranch - maxIter0, r, adaptiveMode, epsilon, seed, H_fine, bestQ_fine, bestF_fine, bx_fine, by_fine, M_prior_fine); if (bestF_fine < bestF) { bestF = bestF_fine; bestQ = bestQ_fine; bx = bx_fine; by = by_fine; } } } BestPacket me{ bestF, dim, bx, by }; if (!rank) { std::vector<float> winnerQ = bestQ; float winF = bestF, wx = bx, wy = by; for (int i = 1; i < world; ++i) { BestPacket bp; g_world->recv(i, 2, bp); std::vector<float> qin; g_world->recv(i, 3, qin); if (bp.bestF < winF) { winF = bp.bestF; wx = bp.bestX; wy = bp.bestY; winnerQ = qin; } } if (over_dim) { // Ничего не отдаём в UI — защитимся от переполнений *out_bestQLen = 0; *out_bestQ = nullptr; *out_bestX = 0; *out_bestY = 0; *out_bestF = FLT_MAX; return; } *out_bestQLen = winnerQ.size(); *out_bestQ = (float*)CoTaskMemAlloc(sizeof(float) * (*out_bestQLen)); memcpy(*out_bestQ, winnerQ.data(), sizeof(float) * (*out_bestQLen)); *out_bestX = wx; *out_bestY = wy; *out_bestF = winF; } else { g_world->send(0, 2, me); g_world->send(0, 3, bestQ); } } extern "C" __declspec(dllexport) __declspec(noalias) __forceinline int AgpInit(int peanoLevel, float a, float b, float c, float d) { g_env = new boost::mpi::environment(); g_world = new boost::mpi::communicator(); _MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_ON); _MM_SET_DENORMALS_ZERO_MODE(_MM_DENORMALS_ZERO_ON); const int rank = g_world->rank(); const int world_size = g_world->size(); if (world_size == 4) { new (&gActiveMap) Peano2DMap(peanoLevel, a, b, c, d, rank & 3); } // Надёжный хэш-сид g_mc.baseSeed = (uint32_t)(0x9E3779B9u ^ (0x85EBCA6Bu * (uint32_t)rank + 0xC2B2AE35u)); return rank; } // Тестовые функции (без изменений) __forceinline float ShekelFunc(float x, float seed) { int i = 0; float st = seed, r1, r2, res = 0.0f; while (i < 10) { XOR_RAND(st, r1); float xp = fmaf(-r1, 10.0f, x); XOR_RAND(st, r1); XOR_RAND(st, r2); float d = fmaf(fmaf(r1, 20.0f, 5.0f), xp * xp, fmaf(r2, 0.2f, 1.0f)); d = copysignf(fmaxf(fabsf(d), FLT_MIN), d); res -= 1.0f / d; ++i; } return res; } __forceinline float RastriginFunc(float x1, float x2) { const float t = fmaf(x1, x1, x2 * x2); float c1, c2; FABE13_COS(6.28318530717958647692f * x1, c1); FABE13_COS(6.28318530717958647692f * x2, c2); return (t - fmaf(c1 + c2, 10.0f, -14.6f)) * fmaf(-t, 0.25f, 18.42f); } __forceinline float HillFunc(float x, float seed) { int j = 0; __declspec(align(32)) float ang[14u]; float st = 6.28318530717958647692f * x; while (j < 14) { ang[j] = st * (float)(j + 1); ++j; } __declspec(align(32)) float sv[14u], cv[14u]; FABE13_SINCOS(ang, sv, cv, 14u); float state = seed, r1, r2; XOR_RAND(state, r1); float res = fmaf(r1, 2.0f, -1.1f); --j; while (j >= 0) { XOR_RAND(state, r1); XOR_RAND(state, r2); res += fmaf(fmaf(r1, 2.0f, -1.1f), sv[j], fmaf(r2, 2.0f, -1.1f) * cv[j]); --j; } return res; } __forceinline float GrishaginFunc(float x1, float x2, float seed) { int j = 0; __declspec(align(32)) float aj[8u], ak[8u]; while (j < 8) { float pj = 3.14159265358979323846f * (float)(j + 1); aj[j] = pj * x1; ak[j] = pj * x2; ++j; } __declspec(align(32)) float sj[8u], cj[8u], sk[8u], ck[8u]; FABE13_SINCOS(aj, sj, cj, 8u); FABE13_SINCOS(ak, sk, ck, 8u); --j; float p1 = 0.0f, p2 = 0.0f; float st = seed, r1, r2; while (j >= 0) { size_t k = 0u; while (k < 8u) { float s = sj[j] * sj[j]; float c = ck[k] * ck[k]; XOR_RAND_GRSH(st, r1); XOR_RAND_GRSH(st, r2); p1 = fmaf(r1, s, fmaf(r2, c, p1)); XOR_RAND_GRSH(st, r1); XOR_RAND_GRSH(st, r2); p2 = fmaf(-r1, c, fmaf(r2, s, p2)); ++k; } --j; } return -sqrtf(fmaf(p1, p1, p2 * p2)); } // 1D/2D API (как были) extern "C" __declspec(dllexport) __declspec(noalias) void AGP_1D(float global_iterations, float a, float b, float r, bool mode, float epsilon, float seed, float** out_data, size_t* out_len) { Slab* slab = tls.local(); slab->current = slab->base; int schetchick = 0; const float initial_length = b - a; float dmax = initial_length; const float threshold_03 = 0.3f * initial_length, inv_threshold_03 = 1.0f / threshold_03; const float start_val = ShekelFunc(a, seed); float best_f = ShekelFunc(b, seed); float x_Rmax_1 = a, x_Rmax_2 = b; float y_Rmax_1 = start_val, y_Rmax_2 = best_f; std::vector<float, boost::alignment::aligned_allocator<float, 16u>> Extr; std::vector<Interval1D*, boost::alignment::aligned_allocator<Interval1D*, 64u>> R; Extr.reserve((size_t)global_iterations << 2u); R.reserve((size_t)global_iterations << 1u); R.emplace_back(new Interval1D(a, b, start_val, best_f, 1.0f)); float Mmax = R.front()->M; float m = r * Mmax; while (true) { const float new_point = Shag(m, x_Rmax_1, x_Rmax_2, y_Rmax_1, y_Rmax_2, 1.0f, r); const float new_value = ShekelFunc(new_point, seed); if (new_value < best_f) { best_f = new_value; Extr.emplace_back(best_f); Extr.emplace_back(new_point); } std::pop_heap(R.begin(), R.end(), ComparePtr1D); const Interval1D* pro = R.back(); const float new_x1 = pro->x1, new_x2 = pro->x2; const float len2 = new_x2 - new_point, len1 = new_point - new_x1; const float interval_len = (len1 < len2 ? len1 : len2); if (++schetchick == (int)global_iterations || interval_len < epsilon) { Extr.emplace_back((float)schetchick); Extr.emplace_back(interval_len); *out_len = Extr.size(); *out_data = (float*)CoTaskMemAlloc(sizeof(float) * (*out_len)); memcpy(*out_data, Extr.data(), sizeof(float) * (*out_len)); return; } Interval1D* curr = new Interval1D(new_x1, new_point, pro->y1, new_value, 1.0f); Interval1D* curr1 = new Interval1D(new_point, new_x2, new_value, pro->y2, 1.0f); const float currM = curr->M > curr1->M ? curr->M : curr1->M; const size_t r_size = R.size(); if (mode) { if (len2 + len1 == dmax) { dmax = len2 > len1 ? len2 : len1; for (auto p : R) { float L = p->x2 - p->x1; if (L > dmax) dmax = L; } } if (threshold_03 > dmax && !(schetchick % 3) || 10.0f * dmax < initial_length) { if (currM > Mmax) { Mmax = currM; m = r * Mmax; } const float progress = fmaf(-inv_threshold_03, dmax, 1.0f); const float alpha = progress * progress; const float betta = 2.0f - alpha; const float MULT = (1.0f / dmax) * Mmax; const float global_coeff = fmaf(MULT, r, -MULT); const float GF = betta * global_coeff; curr->ChangeCharacteristic(fmaf(GF, len1, curr->M * alpha)); curr1->ChangeCharacteristic(fmaf(GF, len2, curr1->M * alpha)); RecomputeR_AffineM_AVX2_1D(R.data(), r_size, GF, alpha); std::make_heap(R.begin(), R.end(), ComparePtr1D); } else { if (currM > Mmax) { if (currM < 1.15f * Mmax) { Mmax = currM; m = r * Mmax; curr->ChangeCharacteristic(m); curr1->ChangeCharacteristic(m); } else { Mmax = currM; m = r * Mmax; curr->ChangeCharacteristic(m); curr1->ChangeCharacteristic(m); RecomputeR_ConstM_AVX2_1D(R.data(), r_size, m); std::make_heap(R.begin(), R.end(), ComparePtr1D); } } else { curr->ChangeCharacteristic(m); curr1->ChangeCharacteristic(m); } } } else { if (currM > Mmax) { if (currM < 1.15f * Mmax) { Mmax = currM; m = r * Mmax; curr->ChangeCharacteristic(m); curr1->ChangeCharacteristic(m); } else { Mmax = currM; m = r * Mmax; curr->ChangeCharacteristic(m); curr1->ChangeCharacteristic(m); RecomputeR_ConstM_AVX2_1D(R.data(), r_size, m); std::make_heap(R.begin(), R.end(), ComparePtr1D); } } else { curr->ChangeCharacteristic(m); curr1->ChangeCharacteristic(m); } } R.back() = curr; std::push_heap(R.begin(), R.end(), ComparePtr1D); R.emplace_back(curr1); std::push_heap(R.begin(), R.end(), ComparePtr1D); const Interval1D* top = R.front(); x_Rmax_1 = top->x1; x_Rmax_2 = top->x2; y_Rmax_1 = top->y1; y_Rmax_2 = top->y2; } } extern "C" __declspec(dllexport) __declspec(noalias) void AGP_2D(const float N, const float global_iterations, const float a, const float b, const float c, const float d, const float r, const bool mode, const float epsilon, const float seed, float** const __restrict out_data, size_t* const __restrict out_len) noexcept { Slab* const __restrict slab = tls.local(); slab->current = slab->base; int schetchick = 0, no_improve = 0; const int rank = g_world->rank(); const int world_size = g_world->size(); while (g_world->iprobe(boost::mpi::any_source, 0)) { CtrlMsg dummy; g_world->recv(boost::mpi::any_source, 0, dummy); } const float inv_divider = ldexpf(1.0f, -((gActiveMap.levels << 1) + 1)); const float x_addition = (b - a) * inv_divider, y_addition = (d - c) * inv_divider; const float true_start = a + x_addition, true_end = b - x_addition; float x_Rmax_1 = true_start, x_Rmax_2 = true_end; const float initial_length = x_Rmax_2 - x_Rmax_1; float dmax = initial_length; const float threshold_03 = 0.3f * initial_length, inv_threshold_03 = 1.0f / threshold_03; const float start_val = rank % 3 ? RastriginFunc(true_end, d - y_addition) : RastriginFunc(true_start, c + y_addition); float best_f = rank % 2 ? RastriginFunc(true_start, d - y_addition) : RastriginFunc(true_end, c + y_addition); float y_Rmax_1 = start_val, y_Rmax_2 = best_f; std::vector<float, boost::alignment::aligned_allocator<float, 16u>> Extr; std::vector<Interval1D* __restrict, boost::alignment::aligned_allocator<Interval1D* __restrict, 64u>> R; Extr.clear(); Extr.reserve(static_cast<size_t>(global_iterations) << 2u); R.clear(); R.reserve(static_cast<size_t>(global_iterations) << 1u); R.emplace_back(new Interval1D(true_start, true_end, start_val, best_f, 2.0f)); const Interval1D* __restrict top_ptr = R.front(); // ИНИЦИАЛИЗАЦИЯ! float Mmax = R.front()->M, m = r * Mmax; while (true) { const float interval_len = x_Rmax_2 - x_Rmax_1; const bool stagnation = no_improve > 100 && schetchick > 270; const float p = fmaf(-1.0f / initial_length, dmax, 1.0f); while (g_world->iprobe(boost::mpi::any_source, 0)) { CtrlMsg in; g_world->recv(boost::mpi::any_source, 0, in); if (in.kind) { if (!rank) { Extr.emplace_back((float)schetchick); Extr.emplace_back(interval_len); *out_len = Extr.size(); *out_data = reinterpret_cast<float* __restrict>(CoTaskMemAlloc(sizeof(float) * (*out_len))); memcpy(*out_data, Extr.data(), sizeof(float) * (*out_len)); } return; } const float sx = FindX2D_analytic(in.xchg.s_x1, in.xchg.s_x2); const float ex = FindX2D_analytic(in.xchg.e_x1, in.xchg.e_x2); Interval1D* const __restrict injected = new Interval1D(sx, ex, RastriginFunc(in.xchg.s_x1, in.xchg.s_x2), RastriginFunc(in.xchg.e_x1, in.xchg.e_x2), 2.0f); injected->ChangeCharacteristic(m); if (injected->R > 1.15f * top_ptr->R) { const float k = stagnation ? fmaf(0.5819767068693265f, expm1f(p), 0.3f) : fmaf(0.3491860241215959f, expm1f(p), 0.6f); injected->R = in.xchg.Rtop * k; R.emplace_back(injected); std::push_heap(R.begin(), R.end(), ComparePtr1D); } } const int T = (int)fmaf(-expm1f(p), 264.0f, 277.0f); const bool want_term = interval_len < epsilon || schetchick == static_cast<int>(global_iterations); if (!(++schetchick % T) || stagnation || want_term) { CtrlMsg out; out.kind = want_term; if (!out.kind) { float s_x1, s_x2, e_x1, e_x2; HitTest2D_analytic(top_ptr->x1, s_x1, s_x2); HitTest2D_analytic(top_ptr->x2, e_x1, e_x2); out.xchg = CrossMsg{ s_x1, s_x2, e_x1, e_x2, top_ptr->R }; } for (int i = 0; i < world_size; ++i) if (i != rank) g_world->isend(i, 0, out); if (out.kind) { if (!rank) { Extr.emplace_back((float)schetchick); Extr.emplace_back(interval_len); *out_len = Extr.size(); *out_data = reinterpret_cast<float* __restrict>(CoTaskMemAlloc(sizeof(float) * (*out_len))); memcpy(*out_data, Extr.data(), sizeof(float) * (*out_len)); } return; } } const float new_point = Shag(m, x_Rmax_1, x_Rmax_2, y_Rmax_1, y_Rmax_2, 2.0f, r); float new_x1_val, new_x2_val; HitTest2D_analytic(new_point, new_x1_val, new_x2_val); const float new_value = RastriginFunc(new_x1_val, new_x2_val); if (new_value < best_f) { best_f = new_value; Extr.emplace_back(best_f); Extr.emplace_back(new_x1_val); Extr.emplace_back(new_x2_val); no_improve = 0; } else { ++no_improve; } std::pop_heap(R.begin(), R.end(), ComparePtr1D); Interval1D* const __restrict promej = R.back(); const float segment_x1 = promej->x1, segment_x2 = promej->x2; const float len2 = segment_x2 - new_point, len1 = new_point - segment_x1; Interval1D* const __restrict curr = new Interval1D(segment_x1, new_point, promej->y1, new_value, 2.0f); Interval1D* const __restrict curr1 = new Interval1D(new_point, segment_x2, new_value, promej->y2, 2.0f); const float currM = (std::max)(curr->M, curr1->M); const size_t r_size = R.size(); if (mode) { if (len2 + len1 == dmax) { dmax = (std::max)(len1, len2); for (auto pI : R) { float L = pI->x2 - pI->x1; if (L > dmax) dmax = L; } } if (threshold_03 > dmax && !(schetchick % 3) || 10.0f * dmax < initial_length) { if (currM > Mmax) { Mmax = currM; m = r * Mmax; } const float progress = fmaf(-inv_threshold_03, dmax, 1.0f); const float alpha = progress * progress; const float betta = 2.0f - alpha; const float MULTIPLIER = (1.0f / dmax) * Mmax; const float global_coeff = fmaf(MULTIPLIER, r, -MULTIPLIER); const float GLOBAL_FACTOR = betta * global_coeff; curr->ChangeCharacteristic(fmaf(GLOBAL_FACTOR, len1, curr->M * alpha)); curr1->ChangeCharacteristic(fmaf(GLOBAL_FACTOR, len2, curr1->M * alpha)); RecomputeR_AffineM_AVX2_1D(R.data(), r_size, GLOBAL_FACTOR, alpha); std::make_heap(R.begin(), R.end(), ComparePtr1D); } else { if (currM > Mmax) { if (currM < 1.15f * Mmax) { Mmax = currM; m = r * Mmax; curr->ChangeCharacteristic(m); curr1->ChangeCharacteristic(m); } else { Mmax = currM; m = r * Mmax; curr->ChangeCharacteristic(m); curr1->ChangeCharacteristic(m); RecomputeR_ConstM_AVX2_1D(R.data(), r_size, m); std::make_heap(R.begin(), R.end(), ComparePtr1D); } } else { curr->ChangeCharacteristic(m); curr1->ChangeCharacteristic(m); } } } else { if (currM > Mmax) { if (currM < 1.15f * Mmax) { Mmax = currM; m = r * Mmax; curr->ChangeCharacteristic(m); curr1->ChangeCharacteristic(m); } else { Mmax = currM; m = r * Mmax; curr->ChangeCharacteristic(m); curr1->ChangeCharacteristic(m); RecomputeR_ConstM_AVX2_1D(R.data(), r_size, m); std::make_heap(R.begin(), R.end(), ComparePtr1D); } } else { curr->ChangeCharacteristic(m); curr1->ChangeCharacteristic(m); } } R.back() = curr; std::push_heap(R.begin(), R.end(), ComparePtr1D); R.emplace_back(curr1); std::push_heap(R.begin(), R.end(), ComparePtr1D); top_ptr = R.front(); x_Rmax_1 = top_ptr->x1; x_Rmax_2 = top_ptr->x2; y_Rmax_1 = top_ptr->y1; y_Rmax_2 = top_ptr->y2; } } // Управляющие сообщения struct RunParams { int nSegments; uint8_t varLen; float minTheta; float tx, ty; int levels, maxIter; float r; uint8_t adaptive; float eps; unsigned int seed; template<typename Archive> void serialize(Archive& ar, const unsigned int) { ar& nSegments& varLen& minTheta& tx& ty& levels& maxIter& r& adaptive& eps& seed; } }; extern "C" __declspec(dllexport) __declspec(noalias) void AgpStartManipND(int nSegments, bool variableLengths, float minTheta, float targetX, float targetY, int peanoLevels, int maxIterPerBranch, float r, bool adaptiveMode, float epsilon, unsigned int seed) { RunParams p; p.nSegments = nSegments; p.varLen = (uint8_t)variableLengths; p.minTheta = minTheta; p.tx = targetX; p.ty = targetY; p.levels = peanoLevels; p.maxIter = maxIterPerBranch; p.r = r; p.adaptive = (uint8_t)adaptiveMode; p.eps = epsilon; p.seed = seed; const int rank = g_world->rank(); const int world = g_world->size(); for (int i = 1; i < world; ++i) if (i != rank) g_world->isend(i, 1, p); } extern "C" __declspec(dllexport) __declspec(noalias) __forceinline void AgpWaitStartAndRun() { while (true) { if (g_world->iprobe(0, 1)) { RunParams p; g_world->recv(0, 1, p); if (p.nSegments == -1) break; // приказ завершения float* q = nullptr; size_t qlen = 0; float bx = 0, by = 0, bf = 0; AGP_Manip2D(p.nSegments, (bool)p.varLen, p.minTheta, p.tx, p.ty, p.levels, p.maxIter, p.r, (bool)p.adaptive, p.eps, p.seed, &q, &qlen, &bx, &by, &bf); if (q) CoTaskMemFree(q); } Sleep(0); } // Корректная финализация для воркера delete g_world; g_world = nullptr; delete g_env; g_env = nullptr; } extern "C" __declspec(dllexport) __declspec(noalias) void AgpShutdownWorkers() { const int rank = g_world->rank(); const int world = g_world->size(); RunParams p{}; p.nSegments = -1; // «яд» for (int i = 1; i < world; ++i) if (i != rank) g_world->isend(i, 1, p); } extern "C" __declspec(dllexport) __declspec(noalias) void AgpFinalizeAll() { if (g_world) { delete g_world; g_world = nullptr; } if (g_env) { delete g_env; g_env = nullptr; } } extern "C" __declspec(dllexport) __declspec(noalias) __forceinline void AGP_Free(float* p) { CoTaskMemFree(p); }
cpp#include "pch.h" BOOL APIENTRY DllMain(HMODULE hModule, DWORD ul_reason_for_call, LPVOID) { if (ul_reason_for_call == DLL_PROCESS_ATTACH) { DisableThreadLibraryCalls(hModule); } return TRUE; }
Сброс slab в AGP_Manip2D: раньше ND-интервалы (IntervalND) выделялись из 16 МБ локального «слэба», но он никогда не сбрасывался в этой функции. После нескольких запусков «Оптимизировать» указатель выходил за пределы, происходила порча памяти с эффектом «через несколько нажатий». Теперь это исключено — слэб очищается на входе.
Безопасная генерация сидов: убраны оба источника OOB — фиксированный буфер seeds[64*32] и статический permutations[32][16]. Теперь используется ограничение по вместимости и арифметические перестановки, работающие без больших вспомогательных массивов.
Лимит размерности: весь стек алгоритма (bestQ в сообщениях, локальные массивы phi/s_arr/c_arr/q_local/...) рассчитан на максимум 32 float. Я ввёл согласованный лимит MAX_SEGMENTS=16 в UI и мягкую проверку dim>32 в DLL с корректной MPI-синхронизацией (воркеры всё равно отчитываются, ранк0 отдаёт пустой результат).
Инициализация top_ptr в 2D: предотвращает чтение мусора до первого R.front().
Сид-генератор теперь чисто целочисленный, без fmaf над «нечисловыми» константами.
Завершение MPI: добавлены AgpShutdownWorkers/AgpFinalizeAll, вызываются из UI после закрытия окна — нет «job aborted / didn't finalize».
Если нужно — могу собрать «минимальный чек-лист» для теста (командная строка, сценарии с 1/6 процессами), но сам код выше уже включает все исправления, которые действительно вели к повреждению памяти и AccessViolation.