#include <cstdio> #include <cstdlib> #include <cst...

생성일: 2025년 3월 26일

답변에 사용된 모델 O1 by Chat01

질문

#include <cstdio>
#include <cstdlib>
#include <cstring>
#include <string>
#include <vector>
#include <fstream>
#include <algorithm>
#include <cmath>
#include <random>
#include <limits>
#include <android/log.h>
#include "inc/QnnWrapper.h"
#include "inc/FileSystemOP.hpp"
#include "inc/Logger.h"
#include "GetOpt.hpp"

#ifdef SUPPORT_TOKENIZERS
#include "third_party/tokenizers-cpp/include/tokenizers_c.h"
static TokenizerHandle g_tokenizer = nullptr;
#endif

static struct AppConfig {
std::string backendPath;
std::string systemLibraryPath;
std::string cachedBinaryPath;
std::string tokenizerJsonPath;
std::string inputText;
} g_appConfig;

static QnnRuntimeEnv* g_qnnEnv = nullptr;
static QnnSession* g_qnnSession = nullptr;
static SessionInfo* g_sessionInfo = nullptr;
static ZeekrGraphInfo* g_zeekrGraph = nullptr;

static const int g_eosTokenId = 2;
static const float g_temperature = 0.7f;
static const int g_topK = 16;
static const int g_maxNewTokens = 32;
static const float g_repetitionPenalty = 1.0f;
static const int g_modelMaxInputLen = 32;

static std::string trim(const std::string& str) {
auto start = str.find_first_not_of(" \t\n\r");
auto end = str.find_last_not_of(" \t\n\r");
return (start == std::string::npos) ? "" : str.substr(start, end - start + 1);
}

std::vectorstd::string splitString(const std::string& str, const std::string& sep) {
std::vectorstd::string result;
size_t start = 0, end;
while ((end = str.find(sep, start)) != std::string::npos) {
result.push_back(trim(str.substr(start, end - start)));
start = end + sep.length();
}
result.push_back(trim(str.substr(start)));
return result;
}

static double getCurrentRSS() {
FILE* file = fopen("/proc/self/status", "r");
if (!file) return 0.0;

text
char line[128]; double rss_mb = 0.0; while (fgets(line, sizeof(line), file)) { if (strncmp(line, "VmRSS:", 6) == 0) { char* num_start = line + 6; while (*num_start && !isdigit(*num_start)) num_start++; rss_mb = atof(num_start) / 1024.0; break; } } fclose(file); return rss_mb;

}

static std::string decodeTokens(const std::vector<int>& tokenIds) {
#ifndef SUPPORT_TOKENIZERS
return "";
#else
if (!g_tokenizer) {
__android_log_print(ANDROID_LOG_ERROR, "Tokenizer", "Tokenizer not initialized");
return "";
}
if (tokenIds.empty()) return "";

text
std::vector<uint32_t> data32(tokenIds.begin(), tokenIds.end()); tokenizers_decode(g_tokenizer, data32.data(), data32.size(), 0); const char* decodedPtr = nullptr; size_t decodedLen = 0; tokenizers_get_decode_str(g_tokenizer, &decodedPtr, &decodedLen); if (!decodedPtr || decodedLen == 0) { return ""; } return std::string(decodedPtr, decodedLen);

#endif
}

static std::vector<float> runInferenceGetLogits(const std::vector<int32_t>& tokens) {
double mem_before = getCurrentRSS();
if (!g_zeekrGraph || g_zeekrGraph->numInputTensors == 0) {
__android_log_print(ANDROID_LOG_ERROR, "QNN", "Model not initialized");
return {};
}

text
ZeekrTensor& inTensor = g_zeekrGraph->input_TensorInfo[0]; size_t copyCount = std::min(tokens.size(), (size_t)g_modelMaxInputLen); memcpy(inTensor.tensor_data, tokens.data(), copyCount * sizeof(int32_t)); if (copyCount < g_modelMaxInputLen) { memset((int32_t*)inTensor.tensor_data + copyCount, 0, (g_modelMaxInputLen - copyCount) * sizeof(int32_t)); } session_Run(g_qnnSession, g_zeekrGraph); double mem_after = getCurrentRSS(); __android_log_print(ANDROID_LOG_INFO, "Memory", "Inference: %.2f MB", mem_after - mem_before); std::vector<float> logits; if (g_zeekrGraph->numOutputTensors > 0) { ZeekrTensor& outTensor = g_zeekrGraph->output_TensorInfo[0]; float* output = reinterpret_cast<float*>(outTensor.tensor_data); size_t floatCount = outTensor.data_size / sizeof(float); logits.resize(floatCount); for (size_t i = 0; i < floatCount; ++i) { logits[i] = output[i]; } } return logits;

}

static int sampleNextToken(const std::vector<float>& logits,
const std::vector<int>& currentSequence,
float temperature,
float repetitionPenalty,
int topK) {
if (logits.empty()) {
return 0;
}

text
std::vector<float> modified = logits; if (std::fabs(temperature) < 1e-9) { int maxIdx = -1; float maxVal = -1e30f; for (int i = 0; i < (int)modified.size(); i++) { if (modified[i] > maxVal) { maxVal = modified[i]; maxIdx = i; } } return (maxIdx < 0) ? 0 : maxIdx; } else { for (auto& val : modified) { val /= temperature; } } if (topK > 0 && topK < (int)modified.size()) { std::vector<float> tempCopy = modified; std::nth_element(tempCopy.begin(), tempCopy.begin()+topK, tempCopy.end(), std::greater<float>()); float kthVal = tempCopy[topK-1]; for (auto &v : modified) { if (v < kthVal) { v = -std::numeric_limits<float>::infinity(); } } } std::vector<double> probs(modified.size()); double sumExp = 0.0; for (size_t i = 0; i < modified.size(); i++) { double e = std::exp((double)modified[i]); probs[i] = e; sumExp += e; } if (sumExp < 1e-10) { return 0; // all -&infin; } for (auto &p : probs) { p /= sumExp; } static thread_local std::mt19937 rng(std::random_device{}()); std::discrete_distribution<int> dist(probs.begin(), probs.end()); return dist(rng);

}

static std::vector<int> generateSequence(std::vector<int> initTokens) {
std::vector<int> result = initTokens;

text
while (result.size() < g_maxNewTokens) { std::vector<int32_t> input32; size_t currentLen = result.size(); if (currentLen > g_modelMaxInputLen) { input32.assign( result.end() - g_modelMaxInputLen, result.end() ); currentLen = g_modelMaxInputLen; } else { input32 = std::vector<int32_t>(result.begin(), result.end()); } input32.resize(g_modelMaxInputLen, 0); std::vector<float> logits = runInferenceGetLogits(input32); if (logits.empty()) break; int nextTokenId = sampleNextToken( logits, result, g_temperature, g_repetitionPenalty, g_topK ); if (nextTokenId == g_eosTokenId) break; result.push_back(nextTokenId); } return result;

}

bool initTokenizer() {
double mem_before = getCurrentRSS();
#ifdef SUPPORT_TOKENIZERS
if (g_appConfig.tokenizerJsonPath.empty()) {
__android_log_print(ANDROID_LOG_ERROR, "Tokenizer", "Missing tokenizer.json path");
return false;
}

text
std::ifstream file(g_appConfig.tokenizerJsonPath); if (!file) { __android_log_print(ANDROID_LOG_ERROR, "Tokenizer", "Cannot open tokenizer.json"); return false; } std::string jsonBlob((std::istreambuf_iterator<char>(file)), std::istreambuf_iterator<char>()); g_tokenizer = tokenizers_new_from_str(jsonBlob.c_str(), jsonBlob.size()); return g_tokenizer != nullptr; double mem_after = getCurrentRSS(); __android_log_print(ANDROID_LOG_INFO, "Memory", "Tokenizer init: %.2f MB", mem_after - mem_before);

#else
double mem_after = getCurrentRSS();
__android_log_print(ANDROID_LOG_INFO, "Memory", "Tokenizer init: %.2f MB", 0.0);
__android_log_print(ANDROID_LOG_WARN, "Tokenizer", "Tokenizers not supported");
return false;
#endif
}

std::string tokenizeText(const std::string& text) {
#ifdef SUPPORT_TOKENIZERS
if (!g_tokenizer) {
__android_log_print(ANDROID_LOG_ERROR, "Tokenizer", "Tokenizer not initialized");
return "";
}

text
TokenizerEncodeResult result; tokenizers_encode(g_tokenizer, text.c_str(), text.length(), 0, &result); std::string tokenStr; for (size_t i = 0; i < result.len; ++i) { tokenStr += std::to_string(result.token_ids[i]); if (i != result.len - 1) tokenStr += " "; } tokenizers_free_encode_results(&result, 1); //调试 printf("[DEBUG] Tokenized IDs: "); for (size_t i = 0; i < result.len; ++i) { printf("%d ", result.token_ids[i]); // 打印原始token ID } printf("\n"); //调试 return tokenStr;

#else
return "Tokenizers not supported";
#endif
}

bool initQnnModel() {
double mem_before = getCurrentRSS();
std::string new_path = "/system/app/pp_test/lxg/lib/aarch64-android/";
const char* current_path = getenv("LD_LIBRARY_PATH");
if (current_path) new_path += ":" + std::string(current_path);
setenv("LD_LIBRARY_PATH", new_path.c_str(), 1);

text
g_qnnEnv = createQnnRuntimeEnv(g_appConfig.backendPath.c_str(), g_appConfig.systemLibraryPath.c_str()); if (!g_qnnEnv) { __android_log_print(ANDROID_LOG_ERROR, "QNN", "createQnnRuntimeEnv failed"); return false; } g_qnnSession = createQnnRuntimeSession(g_appConfig.cachedBinaryPath.c_str(), g_qnnEnv); if (!g_qnnSession) { __android_log_print(ANDROID_LOG_ERROR, "QNN", "createQnnRuntimeSession failed"); return false; } g_sessionInfo = getSessionInfo(g_qnnSession); if (!g_sessionInfo) { __android_log_print(ANDROID_LOG_ERROR, "QNN", "getSessionInfo failed"); return false; } g_zeekrGraph = &g_sessionInfo->graphInfo[0]; print_sessionInfo(g_sessionInfo); double mem_after = getCurrentRSS(); __android_log_print(ANDROID_LOG_INFO, "Memory", "QNN init: %.2f MB", mem_after - mem_before); return true;

}

void runInference(const std::vector<int32_t>& tokens) {
if (!g_zeekrGraph || g_zeekrGraph->numInputTensors == 0) {
__android_log_print(ANDROID_LOG_ERROR, "QNN", "Model not initialized");
return;
}

text
ZeekrTensor& inTensor = g_zeekrGraph->input_TensorInfo[0]; size_t copyCount = std::min(tokens.size(), (size_t)g_modelMaxInputLen); memcpy(inTensor.tensor_data, tokens.data(), copyCount * sizeof(int32_t)); if (copyCount < g_modelMaxInputLen) { memset((int32_t*)inTensor.tensor_data + copyCount, 0, (g_modelMaxInputLen - copyCount) * sizeof(int32_t)); } session_Run(g_qnnSession, g_zeekrGraph); if (g_zeekrGraph->numOutputTensors > 0) { ZeekrTensor& outTensor = g_zeekrGraph->output_TensorInfo[0]; float* output = reinterpret_cast<float*>(outTensor.tensor_data); printf("Inference Result: "); for (size_t i = 0; i < outTensor.data_size / sizeof(float); ++i) { printf("%.4f ", output[i]); } printf("\n"); }

}

void showHelp() {
printf("Usage: ./qnn_sample [OPTIONS]\n");
printf("Options:\n");
printf(" --tokenize <TEXT> Tokenize input text\n");
printf(" --init Initialize model and tokenizer\n");
printf(" --inference <TEXT> Run inference with text\n");
printf(" --generate <TEXT> Run generation with text (top_k/temperature sampling)\n"); // [ADDED]
printf("Required for initialization:\n");
printf(" --backend <PATH> QNN backend library path\n");
printf(" --systemlib <PATH> QNN system library path\n");
printf(" --cached <PATH> Cached model binary path\n");
printf(" --tokenizer <PATH> Tokenizer JSON file path\n");
}

void parseArgs(int argc, char** argv) {
static const pal::Option long_options[] = {
{"tokenize", pal::required_argument, 0, 't'},
{"init", pal::no_argument, 0, 'i'},
{"inference", pal::required_argument, 0, 'f'},
{"generate", pal::required_argument, 0, 'g'}, // [ADDED]
{"backend", pal::required_argument, 0, 'b'},
{"systemlib", pal::required_argument, 0, 's'},
{"cached", pal::required_argument, 0, 'c'},
{"tokenizer", pal::required_argument, 0, 'j'},
{"help", pal::no_argument, 0, 'h'},
{0, 0, 0, 0}
};

text
int opt, option_index = 0; while ((opt = pal::getOptLongOnly(argc, (const char**)argv, "t:if:g:b:s:c:j:h", long_options, &option_index)) != -1) { switch (opt) { case 't': g_appConfig.inputText = pal::g_optArg; break; case 'i': break; case 'f': g_appConfig.inputText = pal::g_optArg; break; case 'g': if (pal::g_optArg) { g_appConfig.inputText = pal::g_optArg; } break; case 'b': g_appConfig.backendPath = pal::g_optArg; break; case 's': g_appConfig.systemLibraryPath = pal::g_optArg; break; case 'c': g_appConfig.cachedBinaryPath = pal::g_optArg; break; case 'j': g_appConfig.tokenizerJsonPath = pal::g_optArg; break; case 'h': showHelp(); exit(0); default: showHelp(); exit(1); } }

}

int main(int argc, char** argv) {
parseArgs(argc, argv);

text
if (argc >= 2 && strcmp(argv[1], "--tokenize") == 0) { if (!g_appConfig.inputText.empty()) { if (initTokenizer()) { std::string tokens = tokenizeText(g_appConfig.inputText); printf("Tokenize Result:\nInput: \"%s\"\nTokens: \"%s\"\n", g_appConfig.inputText.c_str(), tokens.c_str()); } } return 0; } if (argc >= 2 && strcmp(argv[1], "--init") == 0) { bool tokenizerOK = initTokenizer(); bool qnnOK = initQnnModel(); printf("Initialization Status:\nTokenizer: %s\nQNN Model: %s\n", tokenizerOK ? "Success" : "Failed", qnnOK ? "Success" : "Failed"); return 0; } if (argc >= 2 && strcmp(argv[1], "--inference") == 0) { std::ifstream fin("input.txt"); if (!fin.is_open()) { printf("Cannot open input.txt"); return 1; } std::string line; std::getline(fin, line); g_appConfig.inputText = line; fin.close(); if (g_appConfig.inputText.empty()) { printf("Input from input.txt is empty"); return 1; } if (!initTokenizer() || !initQnnModel()) { printf("Initialization failed, cannot run inference"); return 1; } std::string tokenStr = tokenizeText(g_appConfig.inputText); std::vector<std::string> tokenList = splitString(tokenStr, " "); std::vector<int32_t> tokens; for (const auto& t : tokenList) { tokens.push_back(std::stoll(t)); } runInference(tokens); return 0; } if (argc >= 2 && strcmp(argv[1], "--generate") == 0) { std::ifstream fin("input.txt"); if (!fin.is_open()) { printf("Cannot open input.txt\n"); return 1; } std::string line; std::getline(fin, line); g_appConfig.inputText = line; fin.close(); if (g_appConfig.inputText.empty()) { printf("Input from input.txt is empty\n"); return 1; } if (!initTokenizer() || !initQnnModel()) { printf("Initialization failed, cannot run generation\n"); return 1; } std::string tokenStr = tokenizeText(g_appConfig.inputText); std::vector<std::string> tokenList = splitString(tokenStr, " "); std::vector<int> initTokens; for (const auto& t : tokenList) { initTokens.push_back(std::stoi(t)); } std::vector<int> generatedIds = generateSequence(initTokens); std::string finalText = decodeTokens(generatedIds); printf("Generated text: %s\n", finalText.c_str()); return 0; } showHelp(); return 1;

}
以上代码编译运行之后,输入的<s> user\n2D上北下南显示</s> \n<s> assistant\n 输出之后变成<s> user\n2D上北下南显示</s> \n<s> assistant\n]]ro 为什么会这样?在我的tokenizer.json中,<s>是开始符号,值为1,</s>是结束符号,值为2

사고

Incorporating the code elements reveals a concern with syntax and clarity.

Taking a closer look at header inclusions reveals a thorough understanding of essential libraries for low-level and high-level programming tasks.

Considering the inclusion of headers like cstdio, cstdlib, cstring, string, and vector to ensure comprehensive functionality for various programming needs.

Ensuring comprehensive coverage

I'm including essential headers like cstdio, cstdlib, cstring, string, and vector to ensure the program has the necessary functionality for various programming tasks.

Strategizing header inclusions

I’m pulling together essential headers like cstdio, cstdlib, cstring, string, and vector. This ensures a robust and flexible programming environment.

Ensuring comprehensive coverage

Incorporating essential headers like cstdio, cstdlib, cstring, string, and vector ensures a robust and flexible programming environment, enabling seamless handling of input/output, memory management, and data structures.

Reasoned for 6 seconds

답변

根据你提供的代码以及描述的情况,这个问题可能是由于模型或tokenizer在处理输入时对token的解析或生成的过程有误。特别是输入文本 <s> user\n2D上北下南显示</s> \n<s> assistant\n 被错误地处理为 <s> user\n2D上北下南显示</s> \n<s> assistant\n]]ro,可能的原因如下:

1. Tokenization的错误

  • tokenizeText 函数中,输入文本会先经过tokenizer进行处理。你提到 <s> 是开始符号,其对应的token值为1,</s> 是结束符号,对应的token值为2。但是,如果tokenizer的初始化或者编码(encoding)过程中,token的边界或特殊字符(如 <s></s>)没有被正确解析,就可能导致误解析或者token id错误。
  • 可能的问题是,在对文本进行tokenization时,tokenizer没有正确分割标记符号(如 <s></s>),导致在分词后生成了错误的token id。

2. 模型的解码问题

  • 在模型生成文本时,可能存在对特殊标记的处理问题,尤其是在生成时,模型可能会错误地将某些token(如 <s></s>)误处理为其他字符或token。这可能是由于模型没有正确学习到这些特殊符号的语境,或者这些符号在训练过程中并没有被正确定义。

3. 生成过程中的token误匹配

  • 代码中的 generateSequence 函数用于基于给定的token进行生成,但在生成过程中,如果tokens没有正确传递或处理,或者生成模型输出的token对应的ID没有正确解码,可能会导致一些意外的字符或标记(如 ]]ro)作为输出。

解决方案

1. 检查Tokenizer初始化

  • 确保你提供的 tokenizer.json 文件正确地包含了所有必要的特殊符号(如 <s></s>)以及其对应的ID,并且tokenizer在初始化时没有出错。你可以尝试打印tokenization的结果,确认输入文本是否被正确处理。

  • 你可以通过在 tokenizeText 中加入调试信息来检查tokenizer是否正确解析了 <s></s>

    cpp
    std::string tokenStr = tokenizeText(g_appConfig.inputText); printf("[DEBUG] Tokenized IDs: %s\n", tokenStr.c_str());
  • 如果tokenizer的token化规则有误,可以尝试调整tokenizer配置文件或验证tokenizer初始化过程。

2. 检查模型的输出和解码

  • 你提到 <s> 是开始符号,</s> 是结束符号。如果模型在生成过程中意外地生成了无效的token(如 ]]ro),请检查模型的输出层和解码过程,确保在模型生成文本时,不会生成非法的和分析生成的token id和解码后的文本,确保生成的tokens在整个生成过程中被正确转换回字符串。

    cpp
    std::vector<int> generatedIds = generateSequence(initTokens); std::string finalText = decodeTokens(generatedIds); printf("Generated text: %s\n", finalText.c_str());
  • 检查 decodeTokens 函数的实现,确保token id可以正确转换回原始文本。

3. 调整生成过程中的sampling策略

  • 你在 generateSequence 中使用了温度(temperature)、重复惩罚(repetitionPenalty)和top-K采样(topK)。如果这些值设置不当,可能导致模型生成不合理的token。可以尝试调整这些参数,以避免生成不合逻辑的结果。

  • 例如,增加 temperature 或调整 topK 的值,看看是否能够改善生成结果。

调试建议

  1. 验证tokenizer: 打印tokenized的tokens以及解码后的文本,确保 <s></s> 正确转换。
  2. 打印logits: 在生成过程中打印logits,查看模型的输出分布,了解为何会产生不合适的tokens。
  3. 确保模型配置正确: 确认模型输入和输出的格式是否与tokenizer匹配,以及模型是否正确训练了对这些特殊符号的理解。

通过以上步骤,你可以定位问题的根本原因,并进行相应的调整。

이 Q&A 공유하기