очень ответственное задание: я приведу код метода,...

очень ответственное задание: я приведу код метода, тебе нужно сначала в двумерной ветке в блоке перед основным циклом где много полей инициализаций - в этом блоке обеспечить наилучшую локальность данных - то есть перетасовать этот блок строчек кода перед циклом так чтобы обеспечить наилучшее попадание в кэш в первую очередь, наибольшую скорость доступа к данным и локальность данных и минимизацию кэш промахов во вторую очередь, наилучшее распределение по кэш линиям и оптимальное размещение в памяти и постановка однотипных инструкций вместе, важное замечание - сами инструкции - строки кода менять нельзя - тут всё очень оптимизировано, как и нельзя вводить дополнительные переменные, как и нельзя удалять существующие - также нельзя выносить булевые выражения в отдельные константы, в целом ты не должен менять сам код, это твёрдое условие - но касается строго оно только двумерной ветки, код одномерной ветки ты должен будешь привести к тому же смыслу что и код двумерной - то есть там тоже оптимально перетасовать блок инструкций перед циклом - и в целом организовать код и внутри цикла одномерной ветки схоже с кодом двумерной - но учти что когда будешь немного менять одномерную ветку исходи из того - приведут ли эти изменения к увеличению скорости или потенциальному увеличению скорости - если да - то применяй эти изменения, код: extern "C" __declspec(dllexport) __declspec(noalias) void Base_LNA_1_2_Mer_AGP(
const float N, const float global_iterations, const float a, const float b, const float c, const float d, const float r,
const bool mode, const float epsilon, const float seed,
float** __restrict out_data, size_t* __restrict out_len) noexcept
{
if (N == 1.0f) {
const float initial_length = b - a;
float dmax = initial_length;
const float start_val = ShekelFunc(a, seed);
float best_f = ShekelFunc(b, seed);
float schetchick = 0.0f;

text
	float x_Rmax_1 = a;
	float x_Rmax_2 = b;
	float y_Rmax_1 = start_val;
	float y_Rmax_2 = best_f;

	//std::vector<float> Extr;
	Extr.reserve(global_iterations * 4u);
	//std::vector<std::unique_ptr<Interval>> R;
	R.reserve(global_iterations * 2u);

	// Создаем начальный интервал
	R.emplace_back(std::make_unique<Interval>(&a, &b, &start_val, &best_f, &N));
	std::push_heap(R.begin(), R.end(), ComparePtr);
	float Mmax = *R.front()->M;
	float m = r * Mmax;

	const float threshold_03 = 0.3f * initial_length;
	const float inv_threshold_03 = 1.0f / threshold_03;

	float x1, y1, x2, y2, new_point, new_value;

	while (true) {
		new_point = Shag(m, x_Rmax_1, x_Rmax_2, y_Rmax_1, y_Rmax_2, N, r);
		new_value = ShekelFunc(new_point, seed);

		if (new_value < best_f) {
			best_f = new_value;
			Extr.emplace_back(best_f);
			Extr.emplace_back(new_point);
		}

		// Извлекаем максимальный интервал
		std::pop_heap(R.begin(), R.end(), ComparePtr);
		const std::unique_ptr<Interval> promejutochny_otrezok = std::move(R.back());
		//const std::unique_ptr<Interval>& promejutochny_otrezok = R.back();
		x1 = *promejutochny_otrezok->x1;
		y1 = *promejutochny_otrezok->y1;
		x2 = *promejutochny_otrezok->x2;
		y2 = *promejutochny_otrezok->y2;
		R.pop_back();
		const size_t r_size = R.size();

		const float len2 = x2 - new_point;
		const float len1 = new_point - x1;
		const float dmin = len1 < len2 ? len1 : len2;

		// Условие завершения
		if (++schetchick == global_iterations || dmin < epsilon) {
			if (g_world->rank() == 0) {
				Extr.emplace_back(schetchick);
				Extr.emplace_back(dmin);
				*out_len = Extr.size();
				*out_data = reinterpret_cast<float*>(CoTaskMemAlloc(sizeof(float) * (*out_len)));
				memcpy(*out_data, Extr.data(), sizeof(float) * (*out_len));
			}
			return;
		}

		// Создаем новые интервалы
		std::unique_ptr<Interval> curr = std::make_unique<Interval>(&x1, &new_point, &y1, &new_value, &N);
		std::unique_ptr<Interval> curr1 = std::make_unique<Interval>(&new_point, &x2, &new_value, &y2, &N);
		const float currM = *curr->M > *curr1->M ? *curr->M : *curr1->M;

		if (len2 + len1 == dmax) {
			dmax = len2 > len1 ? len2 : len1;

#pragma loop(ivdep)
for (size_t i = 0u; i < r_size; ++i) {
const float len = *R[i]->x2 - *R[i]->x1;
if (len > dmax) {
dmax = len;
}
}
}

text
		if (mode) {
			if (threshold_03 > dmax && fmodf(schetchick, 3.0f) == 0.0f || 10.0f * dmax < initial_length) {
				if (currM > Mmax) {
					Mmax = currM;
					m = r * Mmax;
				}
				const float progress = fmaf(-inv_threshold_03, dmax, 1.0f);
				const float alpha = fmaf(progress, progress, 1.0f);
				const float betta = 2.0f - alpha;

				const float MULTIPLIER = (1.0f / dmax) * Mmax;
				const float global_coeff = fmaf(MULTIPLIER, r, -MULTIPLIER);
				const float GLOBAL_FACTOR = betta * global_coeff;

				curr->ChangeCharacteristic(fmaf(GLOBAL_FACTOR, len1, *curr->M * alpha));
				curr1->ChangeCharacteristic(fmaf(GLOBAL_FACTOR, len2, *curr1->M * alpha));

#pragma loop(ivdep)
for (size_t i = 0u; i < r_size; ++i) {
const float len_item = *R[i]->x2 - *R[i]->x1;
R[i]->ChangeCharacteristic(fmaf(GLOBAL_FACTOR, len_item, *R[i]->M * alpha));
}
std::make_heap(R.begin(), R.end(), ComparePtr);
}
else {
if (currM > Mmax) {
Mmax = currM;
m = r * Mmax;
curr->ChangeCharacteristic(m);
curr1->ChangeCharacteristic(m);
#pragma loop(ivdep)
for (size_t i = 0u; i < r_size; ++i) {
R[i]->ChangeCharacteristic(m);
}
std::make_heap(R.begin(), R.end(), ComparePtr);
}
else {
curr->ChangeCharacteristic(m);
curr1->ChangeCharacteristic(m);
}
}
}
else {
if (currM > Mmax) {
Mmax = currM;
m = r * Mmax;
curr->ChangeCharacteristic(m);
curr1->ChangeCharacteristic(m);
#pragma loop(ivdep)
for (size_t i = 0u; i < r_size; ++i) {
R[i]->ChangeCharacteristic(m);
}
std::make_heap(R.begin(), R.end(), ComparePtr);
}
else {
curr->ChangeCharacteristic(m);
curr1->ChangeCharacteristic(m);
}
}

text
		// Добавляем новые интервалы
		R.emplace_back(std::move(curr));
		std::push_heap(R.begin(), R.end(), ComparePtr);
		R.emplace_back(std::move(curr1));
		std::push_heap(R.begin(), R.end(), ComparePtr);

		// Обновляем x_Rmax и y_Rmax
		const std::unique_ptr<Interval>& __restrict top = R.front();
		x_Rmax_1 = *top->x1;
		x_Rmax_2 = *top->x2;
		y_Rmax_1 = *top->y1;
		y_Rmax_2 = *top->y2;
	}
}
else {
	const int rank = g_world->rank();
	const int partner = rank ^ 1;
	const float divider = ldexpf(1.0f, (Curve->razvertka << 1) + 1);
	const float inv_divider = 1.0f / divider;
	const float x_addition = (b - a) * inv_divider;
	const float y_addition = (d - c) * inv_divider;
	const float true_start = a + x_addition;
	float x_Rmax_1 = true_start;
	const float true_end = b - x_addition;
	float x_Rmax_2 = true_end;
	const float initial_length = true_end - true_start;
	const float threshold_03 = 0.3f * initial_length;
	float dmax = initial_length;
	const float inv_threshold_03 = 1.0f / threshold_03;
	const float end_val = rank ? RastriginFunc(true_end, d - y_addition) : RastriginFunc(true_start, c + y_addition);
	float y_Rmax_1 = end_val;
	float best_f = rank ? RastriginFunc(true_start, d - y_addition) : RastriginFunc(true_end, c + y_addition);
	float y_Rmax_2 = best_f;

	int schetchick = 0;
	int mcQueenSpeed = 1;

	Extr.reserve(global_iterations * 4u);
	Extr.clear();
	R.reserve(global_iterations * 2u);
	R.clear();

	// Создаем начальный интервал
	R.emplace_back(std::make_unique<Interval>(&true_start, &true_end, &end_val, &best_f, &N));
	float Mmax = *R.front()->M;
	float m = r * Mmax;

	// ВЫНЕСЕНО: Все переменные из цикла
	float new_point, new_value, newParam1_remote1, newParam2_remote1, newParam1_remote2, newParam2_remote2;
	int dummy;
	std::unique_ptr<Interval> promejutochny_otrezok;
	boost::optional<boost::mpi::status> probe_status;
	CrossMsg inbound;
	const PeanoCurve_2D* __restrict p1L;
	const PeanoCurve_2D* __restrict p2L;
	CrossMsg outbound;
	std::unique_ptr<Interval> injected;
	const PeanoCurve_2D* __restrict pc;
	float new_x1, new_x2;
	std::unique_ptr<Interval> curr;
	std::unique_ptr<Interval> curr1;
	float currM;
	float len2, len1;
	size_t r_size;
	float progress, alpha, betta, MULTIPLIER, global_coeff, GLOBAL_FACTOR;
	const std::unique_ptr<Interval>* __restrict top_ptr;
	float interval_len;

	// ВЫНЕСЕНО: Переменные для условий
	float cooling, T, k;

	while (true) {
		// MPI проверки
		probe_status = g_world->iprobe(partner, boost::mpi::any_tag);
		if (probe_status->tag() == 0) {
			g_world->irecv(partner, 0, mcQueenSpeed);
		}
		else if (probe_status->tag() == 2) {
			g_world->irecv(partner, 2, dummy);
			if (partner) {
				top_ptr = &R.front();
				Extr.emplace_back(schetchick);
				Extr.emplace_back(*(*top_ptr)->x2 - *(*top_ptr)->x1);
				*out_len = Extr.size();
				*out_data = reinterpret_cast<float*>(CoTaskMemAlloc(sizeof(float) * (*out_len)));
				memcpy(*out_data, Extr.data(), sizeof(float) * (*out_len));
			}
			return;
		}

		// Условие завершения по итерациям
		if (++schetchick == global_iterations) {
			g_world->isend(partner, 2, 0);
			if (partner) {
				top_ptr = &R.front();
				Extr.emplace_back(schetchick);
				Extr.emplace_back(*(*top_ptr)->x2 - *(*top_ptr)->x1);
				*out_len = Extr.size();
				*out_data = reinterpret_cast<float*>(CoTaskMemAlloc(sizeof(float) * (*out_len)));
				memcpy(*out_data, Extr.data(), sizeof(float) * (*out_len));
			}
			return;
		}

		// «Охлаждение» и период T
		cooling = ldexpf(1.0f, -schetchick * (1.0f / 138.63f));
		T = floorf(fmaf(20.0f, cooling, 10.0f));
		k = fmaf(0.2f, cooling, 0.7f);

		if (!(fmodf(schetchick, T)) == mcQueenSpeed) {
			top_ptr = &R.front();
			p1L = Curve->HitTest_2D(*(*top_ptr)->x1);
			p2L = Curve->HitTest_2D(*(*top_ptr)->x2);

			outbound = CrossMsg{ p1L->x1, p1L->x2, p2L->x1, p2L->x2, *(*top_ptr)->R };

			if (mcQueenSpeed) {
				g_world->isend(partner, 0, 0);
			}
			else {
				mcQueenSpeed = 1;
			}

			g_world->isend(partner, 1, outbound);
			g_world->irecv(partner, 1, inbound);

			// Проекция удалённых точек
			newParam1_remote1 = Curve->FindX_2D(inbound.s_x1, inbound.s_x2);
			newParam2_remote1 = Curve->FindX_2D(inbound.e_x1, inbound.e_x2);
			newParam1_remote2 = RastriginFunc(inbound.s_x1, inbound.s_x2);
			newParam2_remote2 = RastriginFunc(inbound.e_x1, inbound.e_x2);

			// «Инъекция» с пониженной характеристикой
			injected = std::make_unique<Interval>(&newParam1_remote1, &newParam2_remote1, &newParam1_remote2, &newParam2_remote2, &N);
			*injected->R = inbound.Rtop * k;

			R.emplace_back(std::move(injected));
			std::push_heap(R.begin(), R.end(), ComparePtr);
		}

		// Локальный шаг LNA
		new_point = Shag(m, x_Rmax_1, x_Rmax_2, y_Rmax_1, y_Rmax_2, N, r);

		// Координаты по кривой Пеано
		pc = Curve->HitTest_2D(new_point);
		new_x1 = pc->x1;
		new_x2 = pc->x2;
		new_value = RastriginFunc(new_x1, new_x2);

		// Обновление экстремумов
		if (new_value < best_f) {
			best_f = new_value;
			Extr.emplace_back(best_f);
			Extr.emplace_back(new_x1);
			Extr.emplace_back(new_x2);
		}

		// Разбиение лучшего интервала
		std::pop_heap(R.begin(), R.end(), ComparePtr);
		promejutochny_otrezok = std::move(R.back());
		R.pop_back();

		// Создаём подынтервалы
		curr = std::make_unique<Interval>(promejutochny_otrezok->x1, &new_point, promejutochny_otrezok->y1, &new_value, &N);
		curr1 = std::make_unique<Interval>(&new_point, promejutochny_otrezok->x2, &new_value, promejutochny_otrezok->y2, &N);

		currM = *curr->M > *curr1->M ? *curr->M : *curr1->M;
		len2 = *promejutochny_otrezok->x2 - new_point;
		len1 = new_point - *promejutochny_otrezok->x1;
		r_size = R.size();

		// Обновление dmax
		if (len2 + len1 == dmax) {
			dmax = (len2 > len1 ? len2 : len1);

#pragma loop(ivdep)
for (size_t i = 0u; i < r_size; ++i) {
const float len = *R[i]->x2 - *R[i]->x1;
if (len > dmax) dmax = len;
}
}

text
		if (mode) {
			if (threshold_03 > dmax && fmodf(schetchick, 3.0f) == 0.0f || 10.0f * dmax < initial_length) {
				if (currM > Mmax) {
					Mmax = currM;
					m = r * Mmax;
				}
				progress = fmaf(-inv_threshold_03, dmax, 1.0f);
				alpha = fmaf(progress, progress, 1.0f);
				betta = 2.0f - alpha;
				MULTIPLIER = (1.0f / dmax) * Mmax;
				global_coeff = fmaf(MULTIPLIER, r, -MULTIPLIER);
				GLOBAL_FACTOR = betta * global_coeff;

				curr->ChangeCharacteristic(fmaf(GLOBAL_FACTOR, len1, *curr->M * alpha));
				curr1->ChangeCharacteristic(fmaf(GLOBAL_FACTOR, len2, *curr1->M * alpha));

#pragma loop(ivdep)
for (size_t i = 0u; i < r_size; ++i) {
R[i]->ChangeCharacteristic(fmaf(GLOBAL_FACTOR, *R[i]->x2 - *R[i]->x1, *R[i]->M * alpha));
}
std::make_heap(R.begin(), R.end(), ComparePtr);
}
else {
if (currM > Mmax) {
Mmax = currM;
m = r * Mmax;
curr->ChangeCharacteristic(m);
curr1->ChangeCharacteristic(m);
#pragma loop(ivdep)
for (size_t i = 0u; i < r_size; ++i) {
R[i]->ChangeCharacteristic(m);
}
std::make_heap(R.begin(), R.end(), ComparePtr);
}
else {
curr->ChangeCharacteristic(m);
curr1->ChangeCharacteristic(m);
}
}
}
else {
if (currM > Mmax) {
Mmax = currM;
m = r * Mmax;
curr->ChangeCharacteristic(m);
curr1->ChangeCharacteristic(m);
#pragma loop(ivdep)
for (size_t i = 0u; i < r_size; ++i) {
R[i]->ChangeCharacteristic(m);
}
std::make_heap(R.begin(), R.end(), ComparePtr);
}
else {
curr->ChangeCharacteristic(m);
curr1->ChangeCharacteristic(m);
}
}

text
		// Добавление подынтервалов
		R.emplace_back(std::move(curr));
		std::push_heap(R.begin(), R.end(), ComparePtr);
		R.emplace_back(std::move(curr1));
		std::push_heap(R.begin(), R.end(), ComparePtr);

		// Проверка критерия сходимости
		top_ptr = &R.front();
		interval_len = *(*top_ptr)->x2 - *(*top_ptr)->x1;
		if (interval_len < epsilon) {
			g_world->isend(partner, 2, 0);
			if (partner) {
				Extr.emplace_back(schetchick);
				Extr.emplace_back(interval_len);
				*out_len = Extr.size();
				*out_data = reinterpret_cast<float*>(CoTaskMemAlloc(sizeof(float) * (*out_len)));
				memcpy(*out_data, Extr.data(), sizeof(float) * (*out_len));
			}
			return;
		}

		// Обновляем x_Rmax и y_Rmax
		x_Rmax_1 = *(*top_ptr)->x1;
		x_Rmax_2 = *(*top_ptr)->x2;
		y_Rmax_1 = *(*top_ptr)->y1;
		y_Rmax_2 = *(*top_ptr)->y2;
	}
}

}

очень ответственное задание: я приведу код метода,...

سؤال

مشاركة هذا السؤال والجواب