#include <cstdio> #include <cstdlib> #include <cst...
تم الإنشاء في: ٢٦ مارس ٢٠٢٥
تم الإنشاء في: ٢٦ مارس ٢٠٢٥
#include <cstdio>
#include <cstdlib>
#include <cstring>
#include <string>
#include <vector>
#include <fstream>
#include <algorithm>
#include <cmath>
#include <random>
#include <limits>
#include <android/log.h>
#include "inc/QnnWrapper.h"
#include "inc/FileSystemOP.hpp"
#include "inc/Logger.h"
#include "GetOpt.hpp"
#ifdef SUPPORT_TOKENIZERS
#include "third_party/tokenizers-cpp/include/tokenizers_c.h"
static TokenizerHandle g_tokenizer = nullptr;
#endif
static struct AppConfig {
std::string backendPath;
std::string systemLibraryPath;
std::string cachedBinaryPath;
std::string tokenizerJsonPath;
std::string inputText;
} g_appConfig;
static QnnRuntimeEnv* g_qnnEnv = nullptr;
static QnnSession* g_qnnSession = nullptr;
static SessionInfo* g_sessionInfo = nullptr;
static ZeekrGraphInfo* g_zeekrGraph = nullptr;
static const int g_eosTokenId = 2;
static const float g_temperature = 0.7f;
static const int g_topK = 16;
static const int g_maxNewTokens = 32;
static const float g_repetitionPenalty = 1.0f;
static const int g_modelMaxInputLen = 32;
static std::string trim(const std::string& str) {
auto start = str.find_first_not_of(" \t\n\r");
auto end = str.find_last_not_of(" \t\n\r");
return (start == std::string::npos) ? "" : str.substr(start, end - start + 1);
}
std::vectorstd::string splitString(const std::string& str, const std::string& sep) {
std::vectorstd::string result;
size_t start = 0, end;
while ((end = str.find(sep, start)) != std::string::npos) {
result.push_back(trim(str.substr(start, end - start)));
start = end + sep.length();
}
result.push_back(trim(str.substr(start)));
return result;
}
static double getCurrentRSS() {
FILE* file = fopen("/proc/self/status", "r");
if (!file) return 0.0;
textchar line[128]; double rss_mb = 0.0; while (fgets(line, sizeof(line), file)) { if (strncmp(line, "VmRSS:", 6) == 0) { char* num_start = line + 6; while (*num_start && !isdigit(*num_start)) num_start++; rss_mb = atof(num_start) / 1024.0; break; } } fclose(file); return rss_mb;
}
static std::string decodeTokens(const std::vector<int>& tokenIds) {
#ifndef SUPPORT_TOKENIZERS
return "";
#else
if (!g_tokenizer) {
__android_log_print(ANDROID_LOG_ERROR, "Tokenizer", "Tokenizer not initialized");
return "";
}
if (tokenIds.empty()) return "";
textstd::vector<uint32_t> data32(tokenIds.begin(), tokenIds.end()); tokenizers_decode(g_tokenizer, data32.data(), data32.size(), 0); const char* decodedPtr = nullptr; size_t decodedLen = 0; tokenizers_get_decode_str(g_tokenizer, &decodedPtr, &decodedLen); if (!decodedPtr || decodedLen == 0) { return ""; } return std::string(decodedPtr, decodedLen);
#endif
}
static std::vector<float> runInferenceGetLogits(const std::vector<int32_t>& tokens) {
double mem_before = getCurrentRSS();
if (!g_zeekrGraph || g_zeekrGraph->numInputTensors == 0) {
__android_log_print(ANDROID_LOG_ERROR, "QNN", "Model not initialized");
return {};
}
textZeekrTensor& inTensor = g_zeekrGraph->input_TensorInfo[0]; size_t copyCount = std::min(tokens.size(), (size_t)g_modelMaxInputLen); memcpy(inTensor.tensor_data, tokens.data(), copyCount * sizeof(int32_t)); if (copyCount < g_modelMaxInputLen) { memset((int32_t*)inTensor.tensor_data + copyCount, 0, (g_modelMaxInputLen - copyCount) * sizeof(int32_t)); } session_Run(g_qnnSession, g_zeekrGraph); double mem_after = getCurrentRSS(); __android_log_print(ANDROID_LOG_INFO, "Memory", "Inference: %.2f MB", mem_after - mem_before); std::vector<float> logits; if (g_zeekrGraph->numOutputTensors > 0) { ZeekrTensor& outTensor = g_zeekrGraph->output_TensorInfo[0]; float* output = reinterpret_cast<float*>(outTensor.tensor_data); size_t floatCount = outTensor.data_size / sizeof(float); logits.resize(floatCount); for (size_t i = 0; i < floatCount; ++i) { logits[i] = output[i]; } } return logits;
}
static int sampleNextToken(const std::vector<float>& logits,
const std::vector<int>& currentSequence,
float temperature,
float repetitionPenalty,
int topK) {
if (logits.empty()) {
return 0;
}
textstd::vector<float> modified = logits; if (std::fabs(temperature) < 1e-9) { int maxIdx = -1; float maxVal = -1e30f; for (int i = 0; i < (int)modified.size(); i++) { if (modified[i] > maxVal) { maxVal = modified[i]; maxIdx = i; } } return (maxIdx < 0) ? 0 : maxIdx; } else { for (auto& val : modified) { val /= temperature; } } if (topK > 0 && topK < (int)modified.size()) { std::vector<float> tempCopy = modified; std::nth_element(tempCopy.begin(), tempCopy.begin()+topK, tempCopy.end(), std::greater<float>()); float kthVal = tempCopy[topK-1]; for (auto &v : modified) { if (v < kthVal) { v = -std::numeric_limits<float>::infinity(); } } } std::vector<double> probs(modified.size()); double sumExp = 0.0; for (size_t i = 0; i < modified.size(); i++) { double e = std::exp((double)modified[i]); probs[i] = e; sumExp += e; } if (sumExp < 1e-10) { return 0; // all -∞ } for (auto &p : probs) { p /= sumExp; } static thread_local std::mt19937 rng(std::random_device{}()); std::discrete_distribution<int> dist(probs.begin(), probs.end()); return dist(rng);
}
static std::vector<int> generateSequence(std::vector<int> initTokens) {
std::vector<int> result = initTokens;
textwhile (result.size() < g_maxNewTokens) { std::vector<int32_t> input32; size_t currentLen = result.size(); if (currentLen > g_modelMaxInputLen) { input32.assign( result.end() - g_modelMaxInputLen, result.end() ); currentLen = g_modelMaxInputLen; } else { input32 = std::vector<int32_t>(result.begin(), result.end()); } input32.resize(g_modelMaxInputLen, 0); std::vector<float> logits = runInferenceGetLogits(input32); if (logits.empty()) break; int nextTokenId = sampleNextToken( logits, result, g_temperature, g_repetitionPenalty, g_topK ); if (nextTokenId == g_eosTokenId) break; result.push_back(nextTokenId); } return result;
}
bool initTokenizer() {
double mem_before = getCurrentRSS();
#ifdef SUPPORT_TOKENIZERS
if (g_appConfig.tokenizerJsonPath.empty()) {
__android_log_print(ANDROID_LOG_ERROR, "Tokenizer", "Missing tokenizer.json path");
return false;
}
textstd::ifstream file(g_appConfig.tokenizerJsonPath); if (!file) { __android_log_print(ANDROID_LOG_ERROR, "Tokenizer", "Cannot open tokenizer.json"); return false; } std::string jsonBlob((std::istreambuf_iterator<char>(file)), std::istreambuf_iterator<char>()); g_tokenizer = tokenizers_new_from_str(jsonBlob.c_str(), jsonBlob.size()); return g_tokenizer != nullptr; double mem_after = getCurrentRSS(); __android_log_print(ANDROID_LOG_INFO, "Memory", "Tokenizer init: %.2f MB", mem_after - mem_before);
#else
double mem_after = getCurrentRSS();
__android_log_print(ANDROID_LOG_INFO, "Memory", "Tokenizer init: %.2f MB", 0.0);
__android_log_print(ANDROID_LOG_WARN, "Tokenizer", "Tokenizers not supported");
return false;
#endif
}
std::string tokenizeText(const std::string& text) {
#ifdef SUPPORT_TOKENIZERS
if (!g_tokenizer) {
__android_log_print(ANDROID_LOG_ERROR, "Tokenizer", "Tokenizer not initialized");
return "";
}
textTokenizerEncodeResult result; tokenizers_encode(g_tokenizer, text.c_str(), text.length(), 0, &result); std::string tokenStr; for (size_t i = 0; i < result.len; ++i) { tokenStr += std::to_string(result.token_ids[i]); if (i != result.len - 1) tokenStr += " "; } tokenizers_free_encode_results(&result, 1); //调试 printf("[DEBUG] Tokenized IDs: "); for (size_t i = 0; i < result.len; ++i) { printf("%d ", result.token_ids[i]); // 打印原始token ID } printf("\n"); //调试 return tokenStr;
#else
return "Tokenizers not supported";
#endif
}
bool initQnnModel() {
double mem_before = getCurrentRSS();
std::string new_path = "/system/app/pp_test/lxg/lib/aarch64-android/";
const char* current_path = getenv("LD_LIBRARY_PATH");
if (current_path) new_path += ":" + std::string(current_path);
setenv("LD_LIBRARY_PATH", new_path.c_str(), 1);
textg_qnnEnv = createQnnRuntimeEnv(g_appConfig.backendPath.c_str(), g_appConfig.systemLibraryPath.c_str()); if (!g_qnnEnv) { __android_log_print(ANDROID_LOG_ERROR, "QNN", "createQnnRuntimeEnv failed"); return false; } g_qnnSession = createQnnRuntimeSession(g_appConfig.cachedBinaryPath.c_str(), g_qnnEnv); if (!g_qnnSession) { __android_log_print(ANDROID_LOG_ERROR, "QNN", "createQnnRuntimeSession failed"); return false; } g_sessionInfo = getSessionInfo(g_qnnSession); if (!g_sessionInfo) { __android_log_print(ANDROID_LOG_ERROR, "QNN", "getSessionInfo failed"); return false; } g_zeekrGraph = &g_sessionInfo->graphInfo[0]; print_sessionInfo(g_sessionInfo); double mem_after = getCurrentRSS(); __android_log_print(ANDROID_LOG_INFO, "Memory", "QNN init: %.2f MB", mem_after - mem_before); return true;
}
void runInference(const std::vector<int32_t>& tokens) {
if (!g_zeekrGraph || g_zeekrGraph->numInputTensors == 0) {
__android_log_print(ANDROID_LOG_ERROR, "QNN", "Model not initialized");
return;
}
textZeekrTensor& inTensor = g_zeekrGraph->input_TensorInfo[0]; size_t copyCount = std::min(tokens.size(), (size_t)g_modelMaxInputLen); memcpy(inTensor.tensor_data, tokens.data(), copyCount * sizeof(int32_t)); if (copyCount < g_modelMaxInputLen) { memset((int32_t*)inTensor.tensor_data + copyCount, 0, (g_modelMaxInputLen - copyCount) * sizeof(int32_t)); } session_Run(g_qnnSession, g_zeekrGraph); if (g_zeekrGraph->numOutputTensors > 0) { ZeekrTensor& outTensor = g_zeekrGraph->output_TensorInfo[0]; float* output = reinterpret_cast<float*>(outTensor.tensor_data); printf("Inference Result: "); for (size_t i = 0; i < outTensor.data_size / sizeof(float); ++i) { printf("%.4f ", output[i]); } printf("\n"); }
}
void showHelp() {
printf("Usage: ./qnn_sample [OPTIONS]\n");
printf("Options:\n");
printf(" --tokenize <TEXT> Tokenize input text\n");
printf(" --init Initialize model and tokenizer\n");
printf(" --inference <TEXT> Run inference with text\n");
printf(" --generate <TEXT> Run generation with text (top_k/temperature sampling)\n"); // [ADDED]
printf("Required for initialization:\n");
printf(" --backend <PATH> QNN backend library path\n");
printf(" --systemlib <PATH> QNN system library path\n");
printf(" --cached <PATH> Cached model binary path\n");
printf(" --tokenizer <PATH> Tokenizer JSON file path\n");
}
void parseArgs(int argc, char** argv) {
static const pal::Option long_options[] = {
{"tokenize", pal::required_argument, 0, 't'},
{"init", pal::no_argument, 0, 'i'},
{"inference", pal::required_argument, 0, 'f'},
{"generate", pal::required_argument, 0, 'g'}, // [ADDED]
{"backend", pal::required_argument, 0, 'b'},
{"systemlib", pal::required_argument, 0, 's'},
{"cached", pal::required_argument, 0, 'c'},
{"tokenizer", pal::required_argument, 0, 'j'},
{"help", pal::no_argument, 0, 'h'},
{0, 0, 0, 0}
};
textint opt, option_index = 0; while ((opt = pal::getOptLongOnly(argc, (const char**)argv, "t:if:g:b:s:c:j:h", long_options, &option_index)) != -1) { switch (opt) { case 't': g_appConfig.inputText = pal::g_optArg; break; case 'i': break; case 'f': g_appConfig.inputText = pal::g_optArg; break; case 'g': if (pal::g_optArg) { g_appConfig.inputText = pal::g_optArg; } break; case 'b': g_appConfig.backendPath = pal::g_optArg; break; case 's': g_appConfig.systemLibraryPath = pal::g_optArg; break; case 'c': g_appConfig.cachedBinaryPath = pal::g_optArg; break; case 'j': g_appConfig.tokenizerJsonPath = pal::g_optArg; break; case 'h': showHelp(); exit(0); default: showHelp(); exit(1); } }
}
int main(int argc, char** argv) {
parseArgs(argc, argv);
textif (argc >= 2 && strcmp(argv[1], "--tokenize") == 0) { if (!g_appConfig.inputText.empty()) { if (initTokenizer()) { std::string tokens = tokenizeText(g_appConfig.inputText); printf("Tokenize Result:\nInput: \"%s\"\nTokens: \"%s\"\n", g_appConfig.inputText.c_str(), tokens.c_str()); } } return 0; } if (argc >= 2 && strcmp(argv[1], "--init") == 0) { bool tokenizerOK = initTokenizer(); bool qnnOK = initQnnModel(); printf("Initialization Status:\nTokenizer: %s\nQNN Model: %s\n", tokenizerOK ? "Success" : "Failed", qnnOK ? "Success" : "Failed"); return 0; } if (argc >= 2 && strcmp(argv[1], "--inference") == 0) { std::ifstream fin("input.txt"); if (!fin.is_open()) { printf("Cannot open input.txt"); return 1; } std::string line; std::getline(fin, line); g_appConfig.inputText = line; fin.close(); if (g_appConfig.inputText.empty()) { printf("Input from input.txt is empty"); return 1; } if (!initTokenizer() || !initQnnModel()) { printf("Initialization failed, cannot run inference"); return 1; } std::string tokenStr = tokenizeText(g_appConfig.inputText); std::vector<std::string> tokenList = splitString(tokenStr, " "); std::vector<int32_t> tokens; for (const auto& t : tokenList) { tokens.push_back(std::stoll(t)); } runInference(tokens); return 0; } if (argc >= 2 && strcmp(argv[1], "--generate") == 0) { std::ifstream fin("input.txt"); if (!fin.is_open()) { printf("Cannot open input.txt\n"); return 1; } std::string line; std::getline(fin, line); g_appConfig.inputText = line; fin.close(); if (g_appConfig.inputText.empty()) { printf("Input from input.txt is empty\n"); return 1; } if (!initTokenizer() || !initQnnModel()) { printf("Initialization failed, cannot run generation\n"); return 1; } std::string tokenStr = tokenizeText(g_appConfig.inputText); std::vector<std::string> tokenList = splitString(tokenStr, " "); std::vector<int> initTokens; for (const auto& t : tokenList) { initTokens.push_back(std::stoi(t)); } std::vector<int> generatedIds = generateSequence(initTokens); std::string finalText = decodeTokens(generatedIds); printf("Generated text: %s\n", finalText.c_str()); return 0; } showHelp(); return 1;
}
以上代码编译运行之后,输入的<s> user\n2D上北下南显示</s> \n<s> assistant\n 输出之后变成<s> user\n2D上北下南显示</s> \n<s> assistant\n]]ro 为什么会这样?在我的tokenizer.json中,<s>是开始符号,值为1,</s>是结束符号,值为2
根据你提供的代码以及描述的情况,这个问题可能是由于模型或tokenizer在处理输入时对token的解析或生成的过程有误。特别是输入文本 <s> user\n2D上北下南显示</s> \n<s> assistant\n
被错误地处理为 <s> user\n2D上北下南显示</s> \n<s> assistant\n]]ro
,可能的原因如下:
tokenizeText
函数中,输入文本会先经过tokenizer进行处理。你提到 <s>
是开始符号,其对应的token值为1,</s>
是结束符号,对应的token值为2。但是,如果tokenizer的初始化或者编码(encoding)过程中,token的边界或特殊字符(如 <s>
和 </s>
)没有被正确解析,就可能导致误解析或者token id错误。<s>
和 </s>
),导致在分词后生成了错误的token id。<s>
和 </s>
)误处理为其他字符或token。这可能是由于模型没有正确学习到这些特殊符号的语境,或者这些符号在训练过程中并没有被正确定义。generateSequence
函数用于基于给定的token进行生成,但在生成过程中,如果tokens没有正确传递或处理,或者生成模型输出的token对应的ID没有正确解码,可能会导致一些意外的字符或标记(如 ]]ro
)作为输出。确保你提供的 tokenizer.json
文件正确地包含了所有必要的特殊符号(如 <s>
和 </s>
)以及其对应的ID,并且tokenizer在初始化时没有出错。你可以尝试打印tokenization的结果,确认输入文本是否被正确处理。
你可以通过在 tokenizeText
中加入调试信息来检查tokenizer是否正确解析了 <s>
和 </s>
:
cppstd::string tokenStr = tokenizeText(g_appConfig.inputText); printf("[DEBUG] Tokenized IDs: %s\n", tokenStr.c_str());
如果tokenizer的token化规则有误,可以尝试调整tokenizer配置文件或验证tokenizer初始化过程。
你提到 <s>
是开始符号,</s>
是结束符号。如果模型在生成过程中意外地生成了无效的token(如 ]]ro
),请检查模型的输出层和解码过程,确保在模型生成文本时,不会生成非法的和分析生成的token id和解码后的文本,确保生成的tokens在整个生成过程中被正确转换回字符串。
cppstd::vector<int> generatedIds = generateSequence(initTokens); std::string finalText = decodeTokens(generatedIds); printf("Generated text: %s\n", finalText.c_str());
检查 decodeTokens
函数的实现,确保token id可以正确转换回原始文本。
你在 generateSequence
中使用了温度(temperature
)、重复惩罚(repetitionPenalty
)和top-K采样(topK
)。如果这些值设置不当,可能导致模型生成不合理的token。可以尝试调整这些参数,以避免生成不合逻辑的结果。
例如,增加 temperature
或调整 topK
的值,看看是否能够改善生成结果。
<s>
和 </s>
正确转换。通过以上步骤,你可以定位问题的根本原因,并进行相应的调整。