Veja o seguinte projeto Delphi: pPDFPowerToolsCon...
Tạo vào: 14 tháng 3, 2025
Trả lời bằng O3 Mini High bởi Chat01
Tạo vào: 14 tháng 3, 2025
Trả lời bằng O3 Mini High bởi Chat01
Veja o seguinte projeto Delphi:
pPDFPowerToolsConsoleApp.dpr:
program{$APPTYPE CONSOLE} uses System.SysUtils, uFileSystemUtilities, uErrorHandling, uProcessExecution, System.Classes, Winapi.Windows, uXPDFWrapper; function StringCchCopy(pszDest: PWideChar; cchDest: ULONGLONG; pszSrc: PWideChar): HRESULT; stdcall; external 'strsafe.dll' name 'StringCchCopyW'; type TUTF8NoBOMEncoding = class(TUTF8Encoding) public function GetPreamble: TBytes; override; end; function TUTF8NoBOMEncoding.GetPreamble: TBytes; begin // Sobrescreve para retornar nenhum BOM Result := []; end; procedure RunCoreTests; var Output, ErrorOutput: string; begin // Existing tests (Test 1-4 remain the same) // ... end; procedure RunXPDFWrapperTests; var PDFPath: string; TextContent, Version: string; Metadata: TStringList; ConsoleStream: THandleStream; Encoding: TUTF8NoBOMEncoding; // use a nova classe begin Encoding := TUTF8NoBOMEncoding.Create; // sem parâmetros Writeln('Testing XPDF Wrapper functionalities...'); // Test 1: Text Extraction PDFPath := 'sample.pdf'; // Substitua por um PDF válido if not FileExists(PDFPath) then begin LogError('Test file not found: ' + PDFPath); Exit; end; if TXPDFWrapper.ExtractTextFromPDF(PDFPath, TextContent) then begin Writeln('Text extraction succeeded.'); Writeln('First 100 chars: ' + Copy(TextContent, 1, 100)); end else Writeln('Text extraction failed.'); // Test 2: Version Check if TXPDFWrapper.GetPDFVersion(PDFPath, Version) then Writeln('PDF Version: ' + Version) else Writeln('Version check failed.'); // Cria um THandleStream apontando para o handle da saída padrão (STDOUT) ConsoleStream := THandleStream.Create(GetStdHandle(STD_OUTPUT_HANDLE)); // Test 3: Metadata Extraction Metadata := TStringList.Create; try if TXPDFWrapper.ExtractMetadata(PDFPath, Metadata) then begin Writeln('Metadata:'); try Metadata.SaveToStream(ConsoleStream, Encoding); finally Encoding.Free; end; end else Writeln('Metadata extraction failed.'); finally Metadata.Free; ConsoleStream.Free; end; end; // Add to pPDFPowerToolsConsoleApp.dpr procedure SetConsoleToUnicodeFont; var ConsoleHandle: THandle; FontInfo: CONSOLE_FONT_INFOEX; begin ConsoleHandle := GetStdHandle(STD_OUTPUT_HANDLE); ZeroMemory(@FontInfo, SizeOf(FontInfo)); FontInfo.cbSize := SizeOf(FontInfo); FontInfo.nFont := 0; FontInfo.dwFontSize.X := 0; FontInfo.dwFontSize.Y := 16; FontInfo.FontFamily := FF_DONTCARE; FontInfo.FontWeight := FW_NORMAL; StringCchCopy(FontInfo.FaceName, LF_FACESIZE, 'Lucida Console'); SetCurrentConsoleFontEx(ConsoleHandle, False, FontInfo); end; begin // Set console to UTF-8 mode (Windows-specific) SetConsoleOutputCP(CP_UTF8); SetConsoleCP(CP_UTF8); try Writeln('Initializing PDF Power Tools...'); TDependencyManager.Initialize; if TDependencyManager.CheckDependencies then Writeln('All dependencies are present.') else Writeln('Dependency checks failed. See log for details.'); // Run core process execution tests RunCoreTests; // Run new XPDF Wrapper tests RunXPDFWrapperTests; Writeln('All tests completed. Press Enter to exit.'); ReadLn; except on E: Exception do begin LogCritical('Unhandled exception: ' + E.Message); Writeln('Critical error: ' + E.Message); ReadLn; end; end; end.
uErrorHandling.pas:
unitinterface uses System.SysUtils, System.Classes, Generics.Collections, Winapi.Windows; type // Log Level Enum TLogLevel = ( llDebug, // Very detailed information, useful for debugging llInfo, // General information about application flow llWarning, // Potential issues, non-critical errors llError, // Errors that prevent normal operation of a feature llCritical, // Critical errors that may cause application instability or data loss llNone // No logging ); // Log message event type TLogMessageEvent = procedure(Sender: TObject; const Message: string; LogLevel: TLogLevel) of object; // Base Exception Class EPDFPowerToolsException = class(Exception) private FErrorCode: Integer; FErrorTime: TDateTime; FSourceModule: string; FInnerException: Exception; FContextInfo: string; // General context info, e.g. command line, file path, etc. protected function GetFormattedMessage: string; virtual; public constructor Create(const AMessage: string); overload; constructor Create(const AMessage: string; ErrorCode: Integer); overload; constructor Create(const AMessage: string; ErrorCode: Integer; const SourceModule: string); overload; constructor Create(const AMessage: string; ErrorCode: Integer; const SourceModule: string; InnerException: Exception); overload; constructor CreateFmt(const Format: string; const Args: array of const); overload; constructor CreateFmt(const Format: string; const Args: array of const; ErrorCode: Integer); overload; constructor CreateFmt(const Format: string; const Args: array of const; ErrorCode: Integer; const SourceModule: string); overload; constructor CreateFmt(const Format: string; const Args: array of const; ErrorCode: Integer; const SourceModule: string; InnerException: Exception); overload; property ErrorCode: Integer read FErrorCode; property ErrorTime: TDateTime read FErrorTime; property SourceModule: string read FSourceModule; property InnerException: Exception read FInnerException; property ContextInfo: string read FContextInfo write FContextInfo; function GetFullErrorMessage: string; virtual; destructor Destroy; override; end; // Specific Exception Types (categorized) EPDFFileException = class(EPDFPowerToolsException); // File related errors (not found, access denied, etc.) EPDFProcessException = class(EPDFPowerToolsException); // Process execution errors (timeout, command failure) EPDFFormatException = class(EPDFPowerToolsException); // PDF format errors (invalid PDF, corrupted file) EPDFConfigurationException = class(EPDFPowerToolsException); // Configuration file errors, missing settings EPDFAPIUsageException = class(EPDFPowerToolsException); // Errors due to incorrect API usage by the developer EPDFDependencyException = class(EPDFPowerToolsException); // Missing or incompatible dependencies (DLLs, EXEs) EPDFSecurityException = class(EPDFPowerToolsException); // PDF security/encryption related errors // Interface for Log Targets (for extensibility) ILogTarget = interface ['{B62F1DC5-338A-40D0-B781-0383F39DF764}'] procedure LogMessage(const Message: string; LogLevel: TLogLevel); end; // Console Log Target TConsoleLogTarget = class(TInterfacedObject, ILogTarget) private FUseStdErr: Boolean; public constructor Create(UseStdErr: Boolean = False); procedure LogMessage(const Message: string; LogLevel: TLogLevel); reintroduce; virtual; property UseStdErr: Boolean read FUseStdErr write FUseStdErr; end; // File Log Target TFileLogTarget = class(TInterfacedObject, ILogTarget) private FLogFilePath: string; FIsLogFileOpen: Boolean; FLogFile: TextFile; procedure EnsureLogFileOpen; procedure CloseLogFile; protected procedure WriteToLogFile(const Message: string); public constructor Create(const LogFilePath: string); destructor Destroy; override; procedure LogMessage(const Message: string; LogLevel: TLogLevel); reintroduce; virtual; property LogFilePath: string read FLogFilePath write FLogFilePath; end; // Event Log Target (Windows specific) {$IFDEF MSWINDOWS} TEventLogTarget = class(TInterfacedObject, ILogTarget) private FEventSourceName: string; protected function GetEventType(LogLevel: TLogLevel): Word; inline; public constructor Create(const EventSourceName: string); procedure LogMessage(const Message: string; LogLevel: TLogLevel); reintroduce; virtual; property EventSourceName: string read FEventSourceName write FEventSourceName; end; {$ENDIF} // Centralized Error Logger (Singleton) TErrorLogger = class private class var FInstance: TErrorLogger; FLogLevel: TLogLevel; FLogTargets: TList<ILogTarget>; FOnLogMessage: TLogMessageEvent; class function GetLogLevel: TLogLevel; static; inline; class procedure SetLogLevel(const Value: TLogLevel); static; inline; class function GetOnLogMessage: TLogMessageEvent; static; inline; class procedure SetOnLogMessage(const Value: TLogMessageEvent); static; inline; constructor Create; destructor Destroy; override; protected procedure InternalLog(const Message: string; LogLevel: TLogLevel); public class function GetInstance: TErrorLogger; inline; class procedure Initialize; class procedure FinalizeLogger; class property LogLevel: TLogLevel read GetLogLevel write SetLogLevel; class property OnLogMessage: TLogMessageEvent read GetOnLogMessage write SetOnLogMessage; procedure AddLogTarget(LogTarget: ILogTarget); inline; procedure ClearLogTargets; inline; procedure Debug(const Message: string); overload; inline; procedure DebugFmt(const Format: string; const Args: TArray<TVarRec>); overload; inline; procedure Info(const Message: string); overload; inline; procedure InfoFmt(const Format: string; const Args: TArray<TVarRec>); overload; inline; procedure Warning(const Message: string); overload; inline; procedure WarningFmt(const Format: string; const Args: TArray<TVarRec>); overload; inline; procedure Error(const Message: string); overload; inline; procedure ErrorFmt(const Format: string; const Args: TArray<TVarRec>); overload; inline; procedure Critical(const Message: string); overload; inline; procedure CriticalFmt(const Format: string; const Args: TArray<TVarRec>); overload; inline; end; // Helper function to convert LogLevel to String function LogLevelToString(LogLevel: TLogLevel): string; inline; // Global Logging Procedures procedure LogDebug(const Message: string); overload; inline; procedure LogDebugFmt(const Format: string; const Args: TArray<TVarRec>); overload; inline; procedure LogInfo(const Message: string); overload; inline; procedure LogInfoFmt(const Format: string; const Args: TArray<TVarRec>); overload; inline; procedure LogWarning(const Message: string); overload; inline; procedure LogWarningFmt(const Format: string; const Args: TArray<TVarRec>); overload; inline; procedure LogError(const Message: string); overload; inline; procedure LogErrorFmt(const Format: string; const Args: TArray<TVarRec>); overload; inline; procedure LogCritical(const Message: string); overload; inline; procedure LogCriticalFmt(const Format: string; const Args: TArray<TVarRec>); overload; inline; const LvlStr: array[TLogLevel] of string = ('Debug', 'Info', 'Warning', 'Error', 'Critical', 'None'); implementation uses DateUtils, SyncObjs, System.StrUtils, System.Generics.Collections; { EPDFPowerToolsException } constructor EPDFPowerToolsException.Create(const AMessage: string); begin inherited Create(AMessage); FErrorCode := 0; FErrorTime := Now; FSourceModule := ''; FInnerException := nil; FContextInfo := ''; end; constructor EPDFPowerToolsException.Create(const AMessage: string; ErrorCode: Integer); begin inherited Create(AMessage); FErrorCode := ErrorCode; FErrorTime := Now; FSourceModule := ''; FInnerException := nil; FContextInfo := ''; end; constructor EPDFPowerToolsException.Create(const AMessage: string; ErrorCode: Integer; const SourceModule: string); begin inherited Create(AMessage); FErrorCode := ErrorCode; FErrorTime := Now; FSourceModule := SourceModule; FInnerException := nil; FContextInfo := ''; end; constructor EPDFPowerToolsException.Create(const AMessage: string; ErrorCode: Integer; const SourceModule: string; InnerException: Exception); begin inherited Create(AMessage); FErrorCode := ErrorCode; FErrorTime := Now; FSourceModule := SourceModule; FInnerException := InnerException; FContextInfo := ''; end; constructor EPDFPowerToolsException.CreateFmt(const Format: string; const Args: array of const); begin Create(System.SysUtils.Format(Format, Args)); end; constructor EPDFPowerToolsException.CreateFmt(const Format: string; const Args: array of const; ErrorCode: Integer); begin Create(System.SysUtils.Format(Format, Args), ErrorCode); end; constructor EPDFPowerToolsException.CreateFmt(const Format: string; const Args: array of const; ErrorCode: Integer; const SourceModule: string); begin Create(System.SysUtils.Format(Format, Args), ErrorCode, SourceModule); end; constructor EPDFPowerToolsException.CreateFmt(const Format: string; const Args: array of const; ErrorCode: Integer; const SourceModule: string; InnerException: Exception); begin Create(System.SysUtils.Format(Format, Args), ErrorCode, SourceModule, InnerException); end; destructor EPDFPowerToolsException.Destroy; begin FInnerException := nil; // Do not free inner exception (just a reference) inherited; end; function EPDFPowerToolsException.GetFormattedMessage: string; begin Result := Message; end; function EPDFPowerToolsException.GetFullErrorMessage: string; var sb: TStringBuilder; begin sb := TStringBuilder.Create; try sb.Append('Error Time: ').Append(FormatDateTime('yyyy-mm-dd hh:nn:ss.zzz', FErrorTime)).AppendLine; sb.Append('Error Level: ').Append(FSourceModule).AppendLine; sb.Append('Error Code: ').Append(IntToStr(FErrorCode)).AppendLine; if FSourceModule <> '' then sb.Append('Source Module: ').Append(FSourceModule).AppendLine; sb.Append('Message: ').Append(GetFormattedMessage).AppendLine; if FContextInfo <> '' then sb.Append('Context Info: ').Append(FContextInfo).AppendLine; if Assigned(FInnerException) then begin sb.AppendLine; sb.AppendLine('--- Inner Exception ---'); if FInnerException is EPDFPowerToolsException then sb.Append(EPDFPowerToolsException(FInnerException).GetFullErrorMessage) else sb.Append(FInnerException.ClassName).Append(': ').Append(FInnerException.Message); end; Result := sb.ToString; finally sb.Free; end; end; { TConsoleLogTarget } constructor TConsoleLogTarget.Create(UseStdErr: Boolean); begin inherited Create; FUseStdErr := UseStdErr; end; procedure TConsoleLogTarget.LogMessage(const Message: string; LogLevel: TLogLevel); var OutputHandle: THandle; NumWritten: Cardinal; begin if FUseStdErr then OutputHandle := GetStdHandle(STD_ERROR_HANDLE) else OutputHandle := GetStdHandle(STD_OUTPUT_HANDLE); if OutputHandle <> INVALID_HANDLE_VALUE then WriteConsole(OutputHandle, PChar(Message), Length(Message), NumWritten, nil); end; { TFileLogTarget } constructor TFileLogTarget.Create(const LogFilePath: string); begin inherited Create; FLogFilePath := LogFilePath; FIsLogFileOpen := False; end; destructor TFileLogTarget.Destroy; begin CloseLogFile; inherited; end; procedure TFileLogTarget.EnsureLogFileOpen; begin if not FIsLogFileOpen then begin try AssignFile(FLogFile, FLogFilePath); Append(FLogFile); // or Rewrite, per requirements if IOResult <> 0 then raise Exception.CreateFmt('Could not open log file for appending: %s. I/O Error: %d', [FLogFilePath, IOResult]); FIsLogFileOpen := True; except on E: Exception do begin // Fallback: log to console if file open fails TConsoleLogTarget.Create(True) .LogMessage('[CRITICAL] Failed to open log file: ' + E.Message + '. Logging to console instead.', llCritical); FIsLogFileOpen := False; end; end; end; end; procedure TFileLogTarget.CloseLogFile; begin if FIsLogFileOpen then begin try CloseFile(FLogFile); finally FIsLogFileOpen := False; end; end; end; procedure TFileLogTarget.WriteToLogFile(const Message: string); begin EnsureLogFileOpen; if FIsLogFileOpen then begin try Writeln(FLogFile, Message); Flush(FLogFile); except on E: Exception do begin TConsoleLogTarget.Create(True) .LogMessage('[WARNING] Failed to write to log file: ' + E.Message + '. Message: ' + Message, llWarning); CloseLogFile; // Close to force re-open on next log end; end; end; end; procedure TFileLogTarget.LogMessage(const Message: string; LogLevel: TLogLevel); begin WriteToLogFile(Message); end; {$IFDEF MSWINDOWS} { TEventLogTarget } constructor TEventLogTarget.Create(const EventSourceName: string); begin inherited Create; FEventSourceName := EventSourceName; end; function TEventLogTarget.GetEventType(LogLevel: TLogLevel): Word; begin case LogLevel of llCritical, llError: Result := EVENTLOG_ERROR_TYPE; llWarning: Result := EVENTLOG_WARNING_TYPE; llInfo, llDebug: Result := EVENTLOG_INFORMATION_TYPE; else Result := EVENTLOG_INFORMATION_TYPE; end; end; procedure TEventLogTarget.LogMessage(const Message: string; LogLevel: TLogLevel); var EventLogHandle: THandle; EventType: Word; Strings: array [0..0] of PChar; begin EventLogHandle := RegisterEventSource(nil, PChar(FEventSourceName)); if EventLogHandle <> 0 then begin try EventType := GetEventType(LogLevel); Strings[0] := PChar(Message); ReportEvent(EventLogHandle, EventType, 0, 0, nil, 1, 0, @Strings, nil); finally DeregisterEventSource(EventLogHandle); end; end else TConsoleLogTarget.Create(True) .LogMessage('[WARNING] Failed to write to Windows Event Log (Source: ' + FEventSourceName + '). Logging to console instead.', llWarning); end; {$ENDIF} { TErrorLogger } class function TErrorLogger.GetInstance: TErrorLogger; begin if FInstance = nil then FInstance := TErrorLogger.Create; Result := FInstance; end; class procedure TErrorLogger.Initialize; begin if FInstance = nil then GetInstance; end; class procedure TErrorLogger.FinalizeLogger; begin if FInstance <> nil then begin FInstance.Free; FInstance := nil; end; end; constructor TErrorLogger.Create; begin inherited Create; FLogLevel := llWarning; // Default log level FLogTargets := TList<ILogTarget>.Create; FOnLogMessage := nil; end; destructor TErrorLogger.Destroy; begin ClearLogTargets; FLogTargets.Free; inherited; end; class function TErrorLogger.GetLogLevel: TLogLevel; begin Result := GetInstance.FLogLevel; end; class procedure TErrorLogger.SetLogLevel(const Value: TLogLevel); begin GetInstance.FLogLevel := Value; end; class function TErrorLogger.GetOnLogMessage: TLogMessageEvent; begin Result := GetInstance.FOnLogMessage; end; class procedure TErrorLogger.SetOnLogMessage(const Value: TLogMessageEvent); begin GetInstance.FOnLogMessage := Value; end; procedure TErrorLogger.InternalLog(const Message: string; LogLevel: TLogLevel); var i: Integer; FormattedMessage: string; sb: TStringBuilder; // Use TStringBuilder begin if LogLevel < FLogLevel then Exit; sb := TStringBuilder.Create; try sb.Append(FormatDateTime('yyyy-mm-dd hh:nn:ss.zzz', Now)); sb.Append(' - [').Append(LogLevelToString(LogLevel)).Append('] - '); sb.Append(Message).Append(sLineBreak); FormattedMessage := sb.ToString; finally sb.Free; end; for i := 0 to FLogTargets.Count - 1 do FLogTargets[i].LogMessage(FormattedMessage, LogLevel); if Assigned(FOnLogMessage) then FOnLogMessage(Self, FormattedMessage, LogLevel); end; procedure TErrorLogger.AddLogTarget(LogTarget: ILogTarget); begin if LogTarget <> nil then FLogTargets.Add(LogTarget); end; procedure TErrorLogger.ClearLogTargets; begin FLogTargets.Clear; end; procedure TErrorLogger.Debug(const Message: string); begin InternalLog(Message, llDebug); end; procedure TErrorLogger.DebugFmt(const Format: string; const Args: TArray<TVarRec>); begin Debug(System.SysUtils.Format(Format, Args)); end; procedure TErrorLogger.Info(const Message: string); begin InternalLog(Message, llInfo); end; procedure TErrorLogger.InfoFmt(const Format: string; const Args: TArray<TVarRec>); begin Info(System.SysUtils.Format(Format, Args)); end; procedure TErrorLogger.Warning(const Message: string); begin InternalLog(Message, llWarning); end; procedure TErrorLogger.WarningFmt(const Format: string; const Args: TArray<TVarRec>); begin Warning(System.SysUtils.Format(Format, Args)); end; procedure TErrorLogger.Error(const Message: string); begin InternalLog(Message, llError); end; procedure TErrorLogger.ErrorFmt(const Format: string; const Args: TArray<TVarRec>); begin Error(System.SysUtils.Format(Format, Args)); end; procedure TErrorLogger.Critical(const Message: string); begin InternalLog(Message, llCritical); end; procedure TErrorLogger.CriticalFmt(const Format: string; const Args: TArray<TVarRec>); begin Critical(System.SysUtils.Format(Format, Args)); end; function LogLevelToString(LogLevel: TLogLevel): string; begin Result := LvlStr[LogLevel]; end; { Global Logging Procedures } procedure LogDebug(const Message: string); begin TErrorLogger.GetInstance.Debug(Message); end; procedure LogDebugFmt(const Format: string; const Args: TArray<TVarRec>); begin TErrorLogger.GetInstance.DebugFmt(Format, Args); end; procedure LogInfo(const Message: string); begin TErrorLogger.GetInstance.Info(Message); end; procedure LogInfoFmt(const Format: string; const Args: TArray<TVarRec>); begin TErrorLogger.GetInstance.InfoFmt(Format, Args); end; procedure LogWarning(const Message: string); begin TErrorLogger.GetInstance.Warning(Message); end; procedure LogWarningFmt(const Format: string; const Args: TArray<TVarRec>); begin TErrorLogger.GetInstance.WarningFmt(Format, Args); end; procedure LogError(const Message: string); begin TErrorLogger.GetInstance.Error(Message); end; procedure LogErrorFmt(const Format: string; const Args: TArray<TVarRec>); begin TErrorLogger.GetInstance.ErrorFmt(Format, Args); end; procedure LogCritical(const Message: string); begin TErrorLogger.GetInstance.Critical(Message); end; procedure LogCriticalFmt(const Format: string; const Args: TArray<TVarRec>); begin TErrorLogger.GetInstance.CriticalFmt(Format, Args); end; initialization TErrorLogger.Initialize; // Initialize logger on unit startup finalization TErrorLogger.FinalizeLogger; // Finalize logger on unit shutdown end.
uFileSystemUtilities.pas:
unitinterface uses System.SysUtils, System.Classes, System.IniFiles, System.Types, Winapi.ShellAPI, Winapi.Windows, IdHTTP, IdException, IdSSLOpenSSL, // Add IdSSLOpenSSL for SSL handler System.Zip, IOUtils, uErrorHandling, System.JSON; // Add System.JSON const // Constants for XPDF Tools location, download URL, and required files GITHUB_API_LATEST_RELEASE_URL = 'https://api.github.com/repos/oschwartz10612/poppler-windows/releases/latest'; XPDF_TOOLS_DIR = 'SuiteXPDFTools'; REQUIRED_XPDF_FILES: array of string = ['cairo.dll', 'charset.dll', 'deflate.dll', 'expat.dll', 'fontconfig-1.dll', 'freetype.dll', 'iconv.dll', 'jpeg8.dll', 'lcms2.dll', 'Lerc.dll', 'libcrypto-3-x64.dll', 'libcurl.dll', 'libexpat.dll', 'liblzma.dll', 'libpng16.dll', 'libssh2.dll', 'libtiff.dll', 'libzstd.dll', 'openjp2.dll', 'pdfattach.exe', 'pdfdetach.exe', 'pdffonts.exe', 'pdfimages.exe', 'pdfinfo.exe', 'pdfseparate.exe', 'pdftocairo.exe', 'pdftohtml.exe', 'pdftoppm.exe', 'pdftops.exe', 'pdftotext.exe', 'pdfunite.exe', 'pixman-1-0.dll', 'poppler-cpp.dll', 'poppler-glib.dll', 'poppler.dll', 'tiff.dll', 'zlib.dll', 'zstd.dll', 'zstd.exe']; var // Pre-calculate the application path gAppPath: string; type TFileChecker = class private class var FFullPath: string; // Make FullPath a class variable public class function CheckFileExists(const AFilePath: string): Boolean; inline; static; class function CheckDirectoryExists(const ADirectoryPath: string): Boolean; inline; static; class function CheckXPDFExecutableExists(const ExecutableName: string; const SearchPaths: TArray<string>): Boolean; inline; static; class function CheckFileExistsBool(const AFilePath: string): Boolean; inline; static; class function CheckDirectoryExistsBool(const ADirectoryPath: string) : Boolean; inline; static; class function GetFullPath: string; static; end; TXPDFRCParser = class private FConfig: TIniFile; function GetStringValue(const Section, Key, DefaultValue: string) : string; inline; function GetBoolValue(const Section, Key: string; DefaultValue: Boolean) : Boolean; inline; function LocateConfigFile: Boolean; inline; public constructor Create; destructor Destroy; override; function GetTextEncoding: string; inline; end; TDependencyManager = class private class var FInstance: TDependencyManager; class var FXPDFRCParser: TXPDFRCParser; FInitialized: Boolean; public class function GetInstance: TDependencyManager; inline; class procedure Initialize; class procedure FinalizeManager; class function InitializeXPDFToolsPath: Boolean; static; class function GetXPDFRCParser: TXPDFRCParser; inline; static; class function CheckDependencies: Boolean; constructor Create; destructor Destroy; override; end; procedure CreateOutputDirectory(const DirPath: string); inline; function GetTempDirectory: string; inline; function GenerateUniqueFileName(const Directory: string; const FilePrefix: string; const FileExtension: string): string; procedure CleanDirectory(const Directory: string); procedure DownloadAndExtractXPDFTools; function GetLatestReleaseURLFromGitHub: string; implementation uses DateUtils, System.StrUtils; {$R-} {$Q-} // Disable range and overflow checks function GetExecutablePath(const ExecutableName: string; var ExecutablePath: string): Boolean; inline; var Buffer: array [0 .. MAX_PATH - 1] of Char; begin if (ExecutableName = '') then begin ExecutablePath := ''; Result := False; Exit; end; if FindExecutable(PChar(ExecutableName), nil, Buffer) > 32 then begin ExecutablePath := String(Buffer); // Correctly convert to string Result := True; end else begin ExecutablePath := ''; Result := False; end; end; class function TFileChecker.CheckFileExists(const AFilePath: string): Boolean; begin if AFilePath = '' then begin raise EPDFFileException.Create ('Empty file path provided to CheckFileExists'); Result := False; Exit; end; FFullPath := ExpandFileName(AFilePath); Result := FileExists(FFullPath); if not Result then raise EPDFFileException.CreateFmt('File not found: %s', [FFullPath]); end; class function TFileChecker.CheckDirectoryExists(const ADirectoryPath : string): Boolean; begin if ADirectoryPath = '' then begin raise EPDFFileException.Create ('Empty directory path provided to CheckDirectoryExists'); Result := False; Exit; end; FFullPath := ExpandFileName(ADirectoryPath); Result := DirectoryExists(FFullPath); if not Result then raise EPDFFileException.CreateFmt('Directory not found: %s', [FFullPath]); end; class function TFileChecker.GetFullPath: string; begin Result := IncludeTrailingPathDelimiter(gAppPath + XPDF_TOOLS_DIR) + 'pdftotext.exe'; // Similarly for other tools like pdfinfo.exe end; class function TFileChecker.CheckXPDFExecutableExists(const ExecutableName : string; const SearchPaths: TArray<string>): Boolean; var ExecutablePath: string; SearchDir: string; I: Integer; SafeExecutableName: string; begin // Guard against empty executable name if (ExecutableName = '') then begin LogError('Empty executable name provided to CheckXPDFExecutableExists'); raise EPDFDependencyException.Create ('Invalid XPDF executable name: No name provided'); end; // Ensure executable name has .exe extension SafeExecutableName := ExecutableName; if not EndsText('.exe', SafeExecutableName) then SafeExecutableName := SafeExecutableName + '.exe'; // 1. Check in SuiteXPDFTools directory. if FileExists(IncludeTrailingPathDelimiter(gAppPath + XPDF_TOOLS_DIR) + SafeExecutableName) then begin FFullPath := IncludeTrailingPathDelimiter(gAppPath + XPDF_TOOLS_DIR) + SafeExecutableName; Result := True; Exit; end; // 2. Check in specified search paths. for I := Low(SearchPaths) to High(SearchPaths) do begin SearchDir := SearchPaths[I]; if (SearchDir <> '') and FileExists(IncludeTrailingPathDelimiter(SearchDir) + SafeExecutableName) then begin FFullPath := IncludeTrailingPathDelimiter(SearchDir) + SafeExecutableName; Result := True; Exit; end; end; // 3. Check default path (using FindExecutable) if GetExecutablePath(SafeExecutableName, ExecutablePath) then begin FFullPath := ExecutablePath; Result := True; Exit; end; // 4. If not found, raise exception with the correct filename FFullPath := ''; raise EPDFDependencyException.CreateFmt ('XPDF Executable not found: %s. Ensure it is in SuiteXPDFTools directory, your PATH, or the application directory.', [SafeExecutableName]); end; class function TFileChecker.CheckFileExistsBool(const AFilePath : string): Boolean; begin if AFilePath = '' then begin Result := False; Exit; end; FFullPath := ExpandFileName(AFilePath); Result := FileExists(FFullPath); end; class function TFileChecker.CheckDirectoryExistsBool(const ADirectoryPath : string): Boolean; begin if ADirectoryPath = '' then begin Result := False; Exit; end; FFullPath := ExpandFileName(ADirectoryPath); Result := DirectoryExists(FFullPath); end; constructor TXPDFRCParser.Create; var ConfigFile: string; begin inherited Create; FConfig := nil; ConfigFile := gAppPath + 'xpdfrc'; // Use pre-calculated path if FileExists(ConfigFile) then FConfig := TIniFile.Create(ConfigFile); end; destructor TXPDFRCParser.Destroy; begin if Assigned(FConfig) then // Check if FConfig is assigned FConfig.Free; inherited; end; function TXPDFRCParser.LocateConfigFile: Boolean; begin Result := FileExists(gAppPath + 'xpdfrc'); // Use pre-calculated path end; function TXPDFRCParser.GetStringValue(const Section, Key, DefaultValue: string): string; begin if Assigned(FConfig) then Result := FConfig.ReadString(Section, Key, DefaultValue) else Result := DefaultValue; end; function TXPDFRCParser.GetBoolValue(const Section, Key: string; DefaultValue: Boolean): Boolean; begin if Assigned(FConfig) then Result := FConfig.ReadBool(Section, Key, DefaultValue) else Result := DefaultValue; end; function TXPDFRCParser.GetTextEncoding: string; begin Result := GetStringValue('Text Control', 'textEncoding', 'Latin1'); end; class function TDependencyManager.GetInstance: TDependencyManager; begin if FInstance = nil then FInstance := TDependencyManager.Create; Result := FInstance; end; class procedure TDependencyManager.Initialize; begin if FInstance = nil then GetInstance; end; class procedure TDependencyManager.FinalizeManager; begin if FInstance <> nil then begin FInstance.Free; FInstance := nil; end; end; constructor TDependencyManager.Create; begin inherited Create; FXPDFRCParser := nil; FInitialized := False; end; destructor TDependencyManager.Destroy; begin if Assigned(FXPDFRCParser) then // Check if FXPDFRCParser is assigned FXPDFRCParser.Free; inherited; end; class function TDependencyManager.InitializeXPDFToolsPath: Boolean; begin if FXPDFRCParser = nil then FXPDFRCParser := TXPDFRCParser.Create; // Ensure XPDF tools are downloaded and extracted try DownloadAndExtractXPDFTools; // Check specifically for pdftotext.exe TFileChecker.CheckXPDFExecutableExists('pdftotext.exe', []); Result := True; except on E: Exception do begin LogError('Failed to initialize XPDF tools: ' + E.Message); Result := False; end; end; end; class function TDependencyManager.GetXPDFRCParser: TXPDFRCParser; begin Result := FXPDFRCParser; end; procedure CreateOutputDirectory(const DirPath: string); inline; begin if DirPath = '' then Exit; if not DirectoryExists(DirPath) then ForceDirectories(DirPath); end; function GetTempDirectory: string; inline; var TempDir: string; begin TempDir := GetEnvironmentVariable('TEMP'); if TempDir = '' then TempDir := TPath.GetTempPath; if TempDir = '' then raise EPDFFileException.Create('Could not determine temporary directory.'); Result := IncludeTrailingPathDelimiter(TempDir); end; function GenerateUniqueFileName(const Directory: string; const FilePrefix: string; const FileExtension: string): string; var Dir, Ext: string; begin if Directory = '' then Dir := GetTempDirectory else Dir := IncludeTrailingPathDelimiter(Directory); if FileExtension = '' then Ext := '' else if FileExtension[1] = '.' then Ext := FileExtension else Ext := '.' + FileExtension; Result := Dir + FilePrefix + FormatDateTime('yyyymmdd_hhnnss_zzz_', Now) + TGUID.NewGuid.ToString.Replace('{', '').Replace('}', '') + Ext; end; procedure CleanDirectory(const Directory: string); var SearchRec: TSearchRec; Dir: string; begin if Directory = '' then Exit; Dir := IncludeTrailingPathDelimiter(Directory); if FindFirst(Dir + '*.*', faAnyFile, SearchRec) = 0 then begin try repeat if (SearchRec.Name <> '.') and (SearchRec.Name <> '..') and (SearchRec.Attr and faDirectory = 0) then begin try if not System.SysUtils.DeleteFile(Dir + SearchRec.Name) then LogWarning('Failed to delete file: ' + Dir + SearchRec.Name); except on E: Exception do LogWarning('Error deleting file ' + Dir + SearchRec.Name + ': ' + E.Message); end; end; until FindNext(SearchRec) <> 0; finally System.SysUtils.FindClose(SearchRec); end; end; end; class function TDependencyManager.CheckDependencies: Boolean; var RequiredFile: string; XPDFToolsPath: string; AllFilesPresent: Boolean; ExecutableName: string; CriticalTool: string; CriticalTools: array of string; begin AllFilesPresent := True; XPDFToolsPath := IncludeTrailingPathDelimiter(gAppPath + XPDF_TOOLS_DIR); // Garante que as ferramentas foram baixadas/extracionadas try DownloadAndExtractXPDFTools; except on E: Exception do begin LogError('Falha ao baixar/extrair as ferramentas XPDF: ' + E.Message); Result := False; Exit; end; end; // Verifica cada arquivo requerido definido na constante REQUIRED_XPDF_FILES for RequiredFile in REQUIRED_XPDF_FILES do begin if RequiredFile = '' then Continue; // Pula entradas vazias if EndsText('.exe', RequiredFile) then // Se for executável begin try // Obtém o nome sem extensão (para validação, se necessário) ExecutableName := ChangeFileExt(RequiredFile, ''); if ExecutableName = '' then Continue; // Pula nomes vazios if not TFileChecker.CheckFileExistsBool(XPDFToolsPath + RequiredFile) then begin LogWarning('Executável XPDF requerido não encontrado: ' + XPDFToolsPath + RequiredFile); AllFilesPresent := False; end; except on E: Exception do begin LogError('Erro ao checar o executável ' + RequiredFile + ': ' + E.Message); AllFilesPresent := False; end; end; end else // Se for DLL ou outro arquivo begin if not TFileChecker.CheckFileExistsBool(XPDFToolsPath + RequiredFile) then begin LogWarning('Arquivo XPDF requerido não encontrado: ' + XPDFToolsPath + RequiredFile); AllFilesPresent := False; end; end; end; // Lista das ferramentas críticas para checagem específica CriticalTools := ['pdfattach.exe', 'pdfdetach.exe', 'pdffonts.exe', 'pdfimages.exe', 'pdfinfo.exe', 'pdfseparate.exe', 'pdftocairo.exe', 'pdftohtml.exe', 'pdftoppm.exe', 'pdftops.exe', 'pdftotext.exe', 'pdfunite.exe']; // Checa cada ferramenta crítica individualmente for CriticalTool in CriticalTools do begin try TFileChecker.CheckXPDFExecutableExists(CriticalTool, []); except on E: Exception do begin LogError('Ferramenta crítica XPDF não encontrada - ' + CriticalTool + ': ' + E.Message); AllFilesPresent := False; end; end; end; Result := AllFilesPresent; end; procedure DownloadAndExtractXPDFTools; var HTTP: TIdHTTP; ZipFileStream: TFileStream; Zip: TZipFile; TargetPath, ZipPath, TempExtractPath: string; I: Integer; SourceFile, DestFile, FileName: string; DownloadURL: string; AlreadyChecked: Boolean; Files: TStringDynArray; begin // First check if tools already exist AlreadyChecked := False; try AlreadyChecked := TFileChecker.CheckFileExistsBool (IncludeTrailingPathDelimiter(gAppPath + XPDF_TOOLS_DIR) + 'pdftotext.exe'); except AlreadyChecked := False; end; if AlreadyChecked then begin LogInfo('XPDF tools are already present.'); Exit; end; // Create target directory TargetPath := IncludeTrailingPathDelimiter(gAppPath + XPDF_TOOLS_DIR); ForceDirectories(TargetPath); // Create a temporary extraction directory TempExtractPath := IncludeTrailingPathDelimiter(TargetPath + 'temp_extract'); ForceDirectories(TempExtractPath); // Generate a unique name for the zip file ZipPath := GenerateUniqueFileName(TargetPath, 'xpdf-tools-win64-', '.zip'); LogInfo('Downloading XPDF tools...'); // Get the latest release URL from GitHub try DownloadURL := GetLatestReleaseURLFromGitHub; if DownloadURL = '' then raise EPDFDependencyException.Create ('No download URL found for XPDF tools'); except on E: Exception do begin LogError('Failed to get XPDF download URL: ' + E.Message); raise EPDFDependencyException.Create('Failed to get XPDF download URL: ' + E.Message); end; end; HTTP := TIdHTTP.Create(nil); HTTP.HandleRedirects := True; // Habilita o seguimento de redirecionamentos ZipFileStream := nil; try try // Configure SSL para HTTPS HTTP.IOHandler := TIdSSLIOHandlerSocketOpenSSL.Create(HTTP); with TIdSSLIOHandlerSocketOpenSSL(HTTP.IOHandler).SSLOptions do begin {$IFDEF HAVE_TLSV1_3} SSLVersions := [sslvTLSv1, sslvTLSv1_1, sslvTLSv1_2, sslvTLSv1_3]; {$ELSE} SSLVersions := [sslvTLSv1, sslvTLSv1_1, sslvTLSv1_2]; {$ENDIF} Method := sslvTLSv1_2; Mode := sslmUnassigned; VerifyMode := []; VerifyDepth := 0; end; // Set timeout para download HTTP.ConnectTimeout := 30000; // 30 segundos HTTP.ReadTimeout := 60000; // 60 segundos // Cria um stream para o arquivo ZIP ZipFileStream := TFileStream.Create(ZipPath, fmCreate); // Efetua o download do arquivo HTTP.Get(DownloadURL, ZipFileStream); ZipFileStream.Position := 0; LogInfo('XPDF tools baixadas com sucesso. Iniciando extração...'); except on E: EIdHTTPProtocolException do begin if FileExists(ZipPath) then System.SysUtils.DeleteFile(ZipPath); raise EPDFDependencyException.CreateFmt ('Falha no download dos XPDF tools (HTTP %d): %s', [E.ErrorCode, E.Message]); end; on E: Exception do begin if FileExists(ZipPath) then System.SysUtils.DeleteFile(ZipPath); raise EPDFDependencyException.CreateFmt ('Falha no download dos XPDF tools: %s', [E.Message]); end; end; finally if Assigned(ZipFileStream) then begin ZipFileStream.Free; ZipFileStream := nil; end; HTTP.Free; end; // Extract the downloaded ZIP file to temp directory first Zip := TZipFile.Create; try try if not FileExists(ZipPath) then raise EPDFDependencyException.Create ('ZIP file not found after download'); Zip.Open(ZipPath, zmRead); // Extract all files to the temporary directory Zip.ExtractAll(TempExtractPath); LogInfo('Extracted all files to temporary directory: ' + TempExtractPath); // Now find all executable and DLL files in the temp directory recursively Files := TDirectory.GetFiles(TempExtractPath, '*.*', TSearchOption.soAllDirectories); for SourceFile in Files do begin // Only copy .exe and .dll files if EndsText('.exe', SourceFile) or EndsText('.dll', SourceFile) then begin FileName := ExtractFileName(SourceFile); DestFile := IncludeTrailingPathDelimiter(TargetPath) + FileName; try // Copy the file to the target directory TFile.Copy(SourceFile, DestFile, True); LogInfo('Copied: ' + FileName + ' to ' + TargetPath); except on E: Exception do LogWarning('Failed to copy file ' + FileName + ': ' + E.Message); end; end; end; except on E: EZipException do raise EPDFDependencyException.CreateFmt ('Failed to extract XPDF tools: %s', [E.Message]); on E: Exception do raise EPDFDependencyException.CreateFmt ('Error extracting XPDF tools: %s', [E.Message]); end; finally Zip.Free; // Clean up the ZIP file try if FileExists(ZipPath) then System.SysUtils.DeleteFile(ZipPath); except on E: Exception do LogWarning('Failed to delete ZIP file: ' + E.Message); end; // Clean up the temporary extraction directory try if DirectoryExists(TempExtractPath) then TDirectory.Delete(TempExtractPath, True); except on E: Exception do LogWarning('Failed to delete temporary directory: ' + E.Message); end; end; LogInfo('XPDF tools installed in ' + TargetPath); // Final verification if not TFileChecker.CheckFileExistsBool(TargetPath + 'pdftotext.exe') then raise EPDFDependencyException.Create ('XPDF tools installation failed: pdftotext.exe not found after extraction'); end; function GetLatestReleaseURLFromGitHub: string; var HTTP: TIdHTTP; SSLHandler: TIdSSLIOHandlerSocketOpenSSL; Response: string; JsonValue: TJSONValue; JsonObject: TJSONObject; AssetsArray: TJSONArray; I: Integer; AssetObject: TJSONObject; DownloadURL: string; AssetName: string; begin Result := ''; HTTP := TIdHTTP.Create(nil); SSLHandler := TIdSSLIOHandlerSocketOpenSSL.Create(HTTP); JsonValue := nil; try try // Configure HTTP client HTTP.IOHandler := SSLHandler; HTTP.ConnectTimeout := 30000; // 30 seconds HTTP.ReadTimeout := 30000; // 30 seconds // Configure SSL {$IFDEF HAVE_TLSV1_3} SSLHandler.SSLOptions.SSLVersions := [sslvTLSv1, sslvTLSv1_1, sslvTLSv1_2, sslvTLSv1_3]; {$ELSE} SSLHandler.SSLOptions.SSLVersions := [sslvTLSv1, sslvTLSv1_1, sslvTLSv1_2]; {$ENDIF} SSLHandler.SSLOptions.Method := sslvTLSv1_2; SSLHandler.SSLOptions.Mode := sslmUnassigned; SSLHandler.SSLOptions.VerifyMode := []; SSLHandler.SSLOptions.VerifyDepth := 0; // Set user agent to avoid API rate limiting HTTP.Request.UserAgent := 'PDFSuite-Dependency-Manager'; // Get the latest release information Response := HTTP.Get(GITHUB_API_LATEST_RELEASE_URL); if Response = '' then raise EPDFDependencyException.Create('Empty response from GitHub API'); // Parse the JSON response JsonValue := TJSONObject.ParseJSONValue(Response); if not Assigned(JsonValue) then raise EPDFDependencyException.Create ('Invalid JSON response from GitHub API'); if not(JsonValue is TJSONObject) then begin LogWarning('GitHub API response is not a JSON object'); raise EPDFDependencyException.Create ('Unexpected response format from GitHub API'); end; JsonObject := JsonValue as TJSONObject; // Get the assets array if not JsonObject.TryGetValue<TJSONArray>('assets', AssetsArray) then raise EPDFDependencyException.Create ('No assets found in GitHub release'); if not Assigned(AssetsArray) or (AssetsArray.Count = 0) then raise EPDFDependencyException.Create ('No assets available in the latest release'); // Find a suitable asset (ZIP file for Windows 64-bit) for I := 0 to AssetsArray.Count - 1 do begin if not(AssetsArray.Items[I] is TJSONObject) then Continue; AssetObject := AssetsArray.Items[I] as TJSONObject; // Get the asset name if not AssetObject.TryGetValue<string>('name', AssetName) or (AssetName = '') then Continue; // Check if this is a suitable Windows ZIP file if (EndsText('.zip', AssetName) or EndsText('.7z', AssetName) or EndsText('.tar.gz', AssetName)) and ((Pos('win', LowerCase(AssetName)) > 0) or (Pos('windows', LowerCase(AssetName)) > 0) or (Pos('x64', LowerCase(AssetName)) > 0) or (Pos('amd64', LowerCase(AssetName)) > 0)) then begin // Get the download URL if AssetObject.TryGetValue<string>('browser_download_url', DownloadURL) and (DownloadURL <> '') then begin Result := DownloadURL; LogInfo('Found download URL: ' + Result); Break; end; end; end; // Fallback: if no matching asset found, try to use the first ZIP asset if Result = '' then begin for I := 0 to AssetsArray.Count - 1 do begin if not(AssetsArray.Items[I] is TJSONObject) then Continue; AssetObject := AssetsArray.Items[I] as TJSONObject; if not AssetObject.TryGetValue<string>('name', AssetName) or (AssetName = '') then Continue; if EndsText('.zip', AssetName) then begin if AssetObject.TryGetValue<string>('browser_download_url', DownloadURL) and (DownloadURL <> '') then begin Result := DownloadURL; LogInfo('Using fallback download URL: ' + Result); Break; end; end; end; end; except on E: EIdHTTPProtocolException do begin LogError('HTTP error fetching GitHub API data: ' + E.Message); raise EPDFDependencyException.CreateFmt ('HTTP error fetching GitHub API data (code %d): %s', [E.ErrorCode, E.Message]); end; on E: Exception do begin LogError('Error fetching latest release URL from GitHub API: ' + E.Message); raise EPDFDependencyException.CreateFmt ('Error fetching latest release URL from GitHub API: %s', [E.Message]); end; end; finally if Assigned(JsonValue) then JsonValue.Free; SSLHandler.Free; HTTP.Free; end; if Result = '' then raise EPDFDependencyException.Create ('No suitable XPDF Tools release ZIP file found on GitHub'); end; initialization gAppPath := IncludeTrailingPathDelimiter(ExtractFilePath(ParamStr(0))); TDependencyManager.Initialize; finalization TDependencyManager.FinalizeManager; {$R+} {$Q+} // Re-enable checks end.
uProcessExecution.pas:
unitinterface uses System.SysUtils, System.Math, Winapi.Windows, Winapi.UserEnv, System.Classes, uErrorHandling, SyncObjs; // For TCriticalSection - for thread safety if needed function BuildEnvironmentBlock(const EnvList: TStringList): Pointer; type // Custom Exception Types for Process Execution EPProcessExecutionError = class(EPDFPowerToolsException) protected FCommandLine: string; public constructor Create(const AMessage: string; const CommandLine: string); overload; constructor CreateFmt(const Format: string; const Args: array of const; const CommandLine: string); overload; property CommandLine: string read FCommandLine; end; EPProcessCreationError = class(EPProcessExecutionError) public constructor Create(const AMessage: string; const CommandLine: string; const Win32ErrorCode: Integer); overload; constructor CreateFmt(const Format: string; const Args: array of const; const CommandLine: string; const Win32ErrorCode: Integer); overload; end; EPProcessTimeoutError = class(EPProcessExecutionError) public constructor Create(const AMessage: string; const CommandLine: string; const TimeoutMS: Cardinal); overload; constructor CreateFmt(const Format: string; const Args: array of const; const CommandLine: string; const TimeoutMS: Cardinal); overload; end; EPProcessOutputError = class(EPProcessExecutionError) public constructor Create(const AMessage: string; const CommandLine: string); overload; constructor CreateFmt(const Format: string; const Args: array of const; const CommandLine: string); overload; end; EPProcessSignalError = class(EPProcessExecutionError) // For future signal handling if needed public constructor Create(const AMessage: string; const CommandLine: string; const SignalCode: Integer); overload; constructor CreateFmt(const Format: string; const Args: array of const; const CommandLine: string; const SignalCode: Integer); overload; end; // TProcessExecutor Class - Static Utility Class TProcessExecutor = class private class var FDefaultTimeoutMS: Cardinal; // Class-level default timeout class var FProcessExecutionLock: TCriticalSection; // For thread safety if needed in future class function GetDefaultTimeout: Cardinal; static; class procedure SetDefaultTimeout(const AValue: Cardinal); static; class function ExecuteProcessInternal(const CommandLine: string; const TimeoutMS: Cardinal; const WorkingDirectory: string; const EnvironmentVariables: TStringList; out Output: string; out ErrorOutput: string): Boolean; public class constructor Create; class destructor Destroy; static; class function QuoteCommandLineArgument(const Argument: string): string; static; class function ExecuteCommand(const CommandLine: string; const TimeoutMS: Cardinal; const WorkingDirectory: string; const EnvironmentVariables: TStringList): Boolean; overload; // TimeoutMS = -1 uses default timeout class function ExecuteCommand(const CommandLine: string; out Output: string; out ErrorOutput: string; const TimeoutMS: Cardinal = Cardinal(-1); const WorkingDirectory: string = ''; const EnvironmentVariables: TStringList = nil): Boolean; overload; // Returns boolean success, separate output and error streams class property DefaultTimeoutMS: Cardinal read GetDefaultTimeout write SetDefaultTimeout; // Configurable default timeout end; implementation uses Diagnostics; // For Process Information retrieval (future enhancement for process details) function BuildEnvironmentBlock(const EnvList: TStringList): Pointer; var I: Integer; EnvStr, Buffer: string; P: PChar; begin Buffer := ''; for I := 0 to EnvList.Count - 1 do begin EnvStr := EnvList[I]; if EnvStr <> '' then Buffer := Buffer + EnvStr + #0; end; Buffer := Buffer + #0; // dupla terminação P := StrAlloc(Length(Buffer) + 1); StrPCopy(P, Buffer); Result := P; end; { EPProcessExecutionError } constructor EPProcessExecutionError.Create(const AMessage: string; const CommandLine: string); begin inherited Create(AMessage); FCommandLine := CommandLine; ContextInfo := 'Command Line: ' + CommandLine; end; constructor EPProcessExecutionError.CreateFmt(const Format: string; const Args: array of const; const CommandLine: string); begin Create(System.SysUtils.Format(Format, Args), CommandLine); end; { EPProcessCreationError } constructor EPProcessCreationError.Create(const AMessage: string; const CommandLine: string; const Win32ErrorCode: Integer); begin inherited Create(Format('%s (Win32 Error Code: %d, %s)', [AMessage, Win32ErrorCode, SysErrorMessage(Win32ErrorCode)]), CommandLine); end; constructor EPProcessCreationError.CreateFmt(const Format: string; const Args: array of const; const CommandLine: string; const Win32ErrorCode: Integer); begin Create(System.SysUtils.Format(Format, Args), CommandLine, Win32ErrorCode); end; { EPProcessTimeoutError } constructor EPProcessTimeoutError.Create(const AMessage: string; const CommandLine: string; const TimeoutMS: Cardinal); begin inherited Create(Format('%s (Timeout: %d ms)', [AMessage, TimeoutMS]), CommandLine); end; constructor EPProcessTimeoutError.CreateFmt(const Format: string; const Args: array of const; const CommandLine: string; const TimeoutMS: Cardinal); begin Create(System.SysUtils.Format(Format, Args), CommandLine, TimeoutMS); end; { EPProcessOutputError } constructor EPProcessOutputError.Create(const AMessage: string; const CommandLine: string); begin inherited Create(AMessage, CommandLine); end; constructor EPProcessOutputError.CreateFmt(const Format: string; const Args: array of const; const CommandLine: string); begin Create(System.SysUtils.Format(Format, Args), CommandLine); end; { EPProcessSignalError } constructor EPProcessSignalError.Create(const AMessage: string; const CommandLine: string; const SignalCode: Integer); begin inherited Create(Format('%s (Signal Code: %d)', [AMessage, SignalCode]), CommandLine); // Signal codes are OS specific end; constructor EPProcessSignalError.CreateFmt(const Format: string; const Args: array of const; const CommandLine: string; const SignalCode: Integer); begin Create(System.SysUtils.Format(Format, Args), CommandLine, SignalCode); end; { TProcessExecutor } class constructor TProcessExecutor.Create; begin FDefaultTimeoutMS := 15000; // Default timeout: 15 seconds FProcessExecutionLock := TCriticalSection.Create; // Initialize lock end; class destructor TProcessExecutor.Destroy; begin FProcessExecutionLock.Free; // Free the lock end; class function TProcessExecutor.GetDefaultTimeout: Cardinal; begin Result := FDefaultTimeoutMS; end; class procedure TProcessExecutor.SetDefaultTimeout(const AValue: Cardinal); begin FDefaultTimeoutMS := AValue; end; class function TProcessExecutor.QuoteCommandLineArgument(const Argument: string): string; begin // Basic quoting for command line arguments - handles spaces and quotes if (Pos(' ', Argument) > 0) or (Pos('"', Argument) > 0) then begin Result := '"' + StringReplace(Argument, '"', '""', [rfReplaceAll]) + '"'; end else begin Result := Argument; end; end; class function TProcessExecutor.ExecuteProcessInternal(const CommandLine: string; const TimeoutMS: Cardinal; const WorkingDirectory: string; const EnvironmentVariables: TStringList; out Output: string; out ErrorOutput: string): Boolean; var MutableCmdLine: string; SecurityAttr: TSecurityAttributes; hStdOutRead, hStdOutWrite, hStdErrRead, hStdErrWrite: THandle; StartupInfo: TStartupInfo; ProcessInfo: TProcessInformation; Buffer: array[0..4095] of UTF8Char; BytesRead: DWORD; WaitResult: DWORD; UseTimeout: Cardinal; pWorkingDir: PChar; EnvBlock: Pointer; begin Result := False; Output := ''; ErrorOutput := ''; MutableCmdLine := CommandLine; UniqueString(MutableCmdLine); UseTimeout := IfThen(TimeoutMS = Cardinal(-1), FDefaultTimeoutMS, TimeoutMS); SecurityAttr.nLength := SizeOf(TSecurityAttributes); SecurityAttr.bInheritHandle := True; SecurityAttr.lpSecurityDescriptor := nil; if not CreatePipe(hStdOutRead, hStdOutWrite, @SecurityAttr, 0) then begin LogError('Failed to create output pipe.'); Exit; end; try if not CreatePipe(hStdErrRead, hStdErrWrite, @SecurityAttr, 0) then begin LogError('Failed to create error pipe.'); Exit; end; try ZeroMemory(@StartupInfo, SizeOf(StartupInfo)); StartupInfo.cb := SizeOf(StartupInfo); StartupInfo.hStdOutput := hStdOutWrite; StartupInfo.hStdError := hStdErrWrite; StartupInfo.dwFlags := STARTF_USESTDHANDLES or STARTF_USESHOWWINDOW; StartupInfo.wShowWindow := SW_HIDE; if WorkingDirectory = '' then pWorkingDir := nil else pWorkingDir := PChar(WorkingDirectory); LogDebug(Format('Executing command: %s (Timeout: %d ms)', [MutableCmdLine, UseTimeout])); if pWorkingDir = nil then LogDebug('Working directory: (inherited)') else LogDebug(Format('Working directory: %s', [pWorkingDir])); EnvBlock := nil; if Assigned(EnvironmentVariables) then EnvBlock := BuildEnvironmentBlock(EnvironmentVariables); try if not CreateProcess(nil, PChar(MutableCmdLine), nil, nil, True, CREATE_NO_WINDOW, EnvBlock, pWorkingDir, StartupInfo, ProcessInfo) then raise EPProcessCreationError.CreateFmt( 'CreateProcess failed for command: %s (Working Directory: %s, Win32 Error Code: %d)', [MutableCmdLine, WorkingDirectory, GetLastError()], MutableCmdLine, GetLastError()); CloseHandle(hStdOutWrite); CloseHandle(hStdErrWrite); WaitResult := WaitForSingleObject(ProcessInfo.hProcess, UseTimeout); if WaitResult = WAIT_TIMEOUT then begin LogError(Format('Process timed out: %s (Timeout: %d ms)', [CommandLine, UseTimeout])); TerminateProcess(ProcessInfo.hProcess, 1); // Explicitly terminate the process raise EPProcessTimeoutError.CreateFmt('Process timed out: %s (Timeout: %d ms)', [CommandLine, UseTimeout], CommandLine, UseTimeout); end; if WaitResult = WAIT_FAILED then raise EPProcessCreationError.CreateFmt('WaitForSingleObject failed with error code: %d', [GetLastError()], MutableCmdLine, GetLastError()); while ReadFile(hStdOutRead, Buffer, SizeOf(Buffer) - 1, BytesRead, nil) and (BytesRead > 0) do begin Buffer[BytesRead] := #0; Output := Output + UTF8ToString(Buffer); // Convert UTF-8 bytes to string end; while ReadFile(hStdErrRead, Buffer, SizeOf(Buffer) - 1, BytesRead, nil) and (BytesRead > 0) do begin Buffer[BytesRead] := #0; ErrorOutput := ErrorOutput + UTF8ToString(Buffer); // Convert UTF-8 bytes to string end; Result := True; finally if Assigned(EnvBlock) then StrDispose(PChar(EnvBlock)); if Assigned(EnvBlock) then DestroyEnvironmentBlock(EnvBlock); CloseHandle(ProcessInfo.hProcess); CloseHandle(ProcessInfo.hThread); end; finally CloseHandle(hStdErrRead); CloseHandle(hStdErrWrite); end; finally CloseHandle(hStdOutRead); CloseHandle(hStdOutWrite); end; end; class function TProcessExecutor.ExecuteCommand(const CommandLine: string; const TimeoutMS: Cardinal; const WorkingDirectory: string; const EnvironmentVariables: TStringList): Boolean; var Output: string; ErrorOutput: string; begin // Thread safety - acquire lock for multi-threaded scenarios FProcessExecutionLock.Acquire; try Result := ExecuteProcessInternal(CommandLine, TimeoutMS, WorkingDirectory, EnvironmentVariables, Output, ErrorOutput); finally FProcessExecutionLock.Release; end; end; class function TProcessExecutor.ExecuteCommand(const CommandLine: string; out Output: string; out ErrorOutput: string; const TimeoutMS: Cardinal; const WorkingDirectory: string; const EnvironmentVariables: TStringList): Boolean; var MutableCmdLine: string; // Local mutable copy SecurityAttr: TSecurityAttributes; hStdOutRead, hStdOutWrite, hStdErrRead, hStdErrWrite: THandle; StartupInfo: TStartupInfo; ProcessInfo: TProcessInformation; Buffer: array[0..4095] of UTF8Char; BytesRead: DWORD; WaitResult: DWORD; UseTimeout: Cardinal; pWorkingDir: PChar; ErrorMsg: string; EnvBlock: Pointer; begin // Create mutable copy of command line MutableCmdLine := CommandLine; UniqueString(MutableCmdLine); // Ensure exclusive ownership Result := False; // Assume failure initially Output := ''; ErrorOutput := ''; UseTimeout := TimeoutMS; if UseTimeout = Cardinal(-1) then // Use default timeout if -1 is passed UseTimeout := FDefaultTimeoutMS; SecurityAttr.nLength := SizeOf(TSecurityAttributes); SecurityAttr.bInheritHandle := True; SecurityAttr.lpSecurityDescriptor := nil; if not CreatePipe(hStdOutRead, hStdOutWrite, @SecurityAttr, 0) then begin ErrorMsg := Format('ExecuteCommand (Output/Error Overload): Failed to create output pipe for command: %s. Win32 Error: %d', [CommandLine, GetLastError()]); LogError(ErrorMsg); Exit; // Exit on pipe creation failure end; if not CreatePipe(hStdErrRead, hStdErrWrite, @SecurityAttr, 0) then begin CloseHandle(hStdOutRead); CloseHandle(hStdOutWrite); ErrorMsg := Format('ExecuteCommand (Output/Error Overload): Failed to create error pipe for command: %s. Win32 Error: %d', [CommandLine, GetLastError()]); LogError(ErrorMsg); Exit; // Exit on pipe creation failure end; try ZeroMemory(@StartupInfo, SizeOf(StartupInfo)); StartupInfo.cb := SizeOf(StartupInfo); StartupInfo.hStdOutput := hStdOutWrite; StartupInfo.hStdError := hStdErrWrite; StartupInfo.dwFlags := STARTF_USESTDHANDLES or STARTF_USESHOWWINDOW; StartupInfo.wShowWindow := SW_HIDE; // Properly handle working directory if WorkingDirectory = '' then pWorkingDir := nil else pWorkingDir := PChar(WorkingDirectory); // Optional debug logging LogDebug(Format('Executing command: %s (Timeout: %d ms)', [CommandLine, UseTimeout])); if pWorkingDir = nil then LogDebug('Working directory: (inherited)') else LogDebug(Format('Working directory: %s', [pWorkingDir])); begin if Assigned(EnvironmentVariables) then begin EnvBlock := BuildEnvironmentBlock(EnvironmentVariables); try if not CreateProcess(nil, PChar(MutableCmdLine), nil, nil, True, CREATE_NO_WINDOW, EnvBlock, pWorkingDir, StartupInfo, ProcessInfo) then begin ErrorMsg := Format('ExecuteCommand (Output/Error Overload): CreateProcess failed for command: %s. Win32 Error: %d', [MutableCmdLine, GetLastError()]); LogError(ErrorMsg); raise EPProcessCreationError.CreateFmt('CreateProcess failed for command: %s', [MutableCmdLine], MutableCmdLine, GetLastError()); end; finally DestroyEnvironmentBlock(EnvBlock); end; end else begin if not CreateProcess(nil, PChar(MutableCmdLine), nil, nil, True, CREATE_NO_WINDOW, nil, pWorkingDir, StartupInfo, ProcessInfo) then begin ErrorMsg := Format('ExecuteCommand (Output/Error Overload): CreateProcess failed for command: %s. Win32 Error: %d', [MutableCmdLine, GetLastError()]); LogError(ErrorMsg); raise EPProcessCreationError.CreateFmt('CreateProcess failed for command: %s', [MutableCmdLine], MutableCmdLine, GetLastError()); end; end; end; CloseHandle(hStdOutWrite); CloseHandle(hStdErrWrite); WaitResult := WaitForSingleObject(ProcessInfo.hProcess, UseTimeout); if WaitResult = WAIT_TIMEOUT then begin ErrorMsg := Format('ExecuteCommand (Output/Error Overload): Process timed out for command: %s after %d ms.', [CommandLine, UseTimeout]); LogError(ErrorMsg); raise EPProcessTimeoutError.Create('Process timed out.', CommandLine, UseTimeout); end; if WaitResult = WAIT_FAILED then begin ErrorMsg := Format('ExecuteCommand (Output/Error Overload): WaitForSingleObject failed for command: %s. Win32 Error: %d', [CommandLine, GetLastError()]); LogError(ErrorMsg); raise EPProcessCreationError.CreateFmt('WaitForSingleObject failed with error code: %d', [GetLastError()], CommandLine, GetLastError()); end; Output := ''; while ReadFile(hStdOutRead, Buffer, SizeOf(Buffer)-1, BytesRead, nil) and (BytesRead > 0) do begin Buffer[BytesRead] := #0; Output := Output + UTF8ToString(Buffer); end; ErrorOutput := ''; while ReadFile(hStdErrRead, Buffer, SizeOf(Buffer)-1, BytesRead, nil) and (BytesRead > 0) do begin Buffer[BytesRead] := #0; ErrorOutput := ErrorOutput + string(Buffer); end; if ErrorOutput <> '' then // Log stderr output if any (as warning) begin ErrorMsg := Format('ExecuteCommand (Output/Error Overload): XPDF Command Warning (stderr) for command: %s: %s', [CommandLine, ErrorOutput]); LogWarning(ErrorMsg); end; Result := True; // Process executed successfully (even with warnings on stderr) finally CloseHandle(ProcessInfo.hProcess); CloseHandle(ProcessInfo.hThread); CloseHandle(hStdOutRead); CloseHandle(hStdErrRead); end; end; end.
uXPDFWrapper.pas:
unitinterface uses System.SysUtils, System.StrUtils, System.Classes, System.Types, uProcessExecution, uFileSystemUtilities, uErrorHandling; type TXPDFWrapper = class public class function ExtractTextFromPDF(const PDFPath: string; out TextContent: string): Boolean; class function GetPDFVersion(const PDFPath: string; out Version: string): Boolean; class function ExtractMetadata(const PDFPath: string; out Metadata: TStringList): Boolean; end; // Forward declaration for TXPDFRCParser (to avoid circular dependency if needed) //TXPDFRCParser = class; // ------------------------ Text Extraction - pdftotext.exe ------------------------ TPDFTextExtractor = class public class function ExtractText(const PdfFilePath: string; const Options: TStringList = nil): string; static; end; // ------------------------ PDF Information - pdfinfo.exe ------------------------ TPDFInfoRetriever = class public class function GetPDFInfo(const PdfFilePath: string; const Options: TStringList = nil): string; static; class function GetPageSizes(const PdfFilePath: string; const FirstPage: Integer = 0; const LastPage: Integer = 0; const Options: TStringList = nil): string; static; class function GetPageBoxes(const PdfFilePath: string; const FirstPage: Integer = 0; const LastPage: Integer = 0; const Options: TStringList = nil): string; static; class function GetMetadata(const PdfFilePath: string; const Options: TStringList = nil): string; static; class function GetRawDates(const PdfFilePath: string; const Options: TStringList = nil): string; static; end; // ------------------------ PDF Images Extraction - pdfimages.exe ------------------------ TPDFImagesExtractor = class public class function ExtractImages(const PdfFilePath: string; const ImageRoot: string; const Options: TStringList = nil): Boolean; static; class function ListImages(const PdfFilePath: string; const Options: TStringList = nil): string; static; class function ExtractRawImages(const PdfFilePath: string; const ImageRoot: string; const Options: TStringList = nil): Boolean; static; end; // ------------------------ PDF File Attachment Detachment - pdfdetach.exe ------------------------ TPDFFileDetacher = class public class function ListAttachments(const PdfFilePath: string; const Options: TStringList = nil): string; static; class function SaveAttachment(const PdfFilePath: string; const AttachmentNumber: Integer; const OutputPath: string; const Options: TStringList = nil): Boolean; static; class function SaveAllAttachments(const PdfFilePath: string; const OutputDir: string; const Options: TStringList = nil): Boolean; static; end; // ------------------------ PDF to PPM Conversion - pdftoppm.exe ------------------------ TPDFToPPMConverter = class public class function ConvertToPPM(const PdfFilePath: string; const PPMRoot: string; const Options: TStringList = nil): Boolean; static; end; // ------------------------ PDF to PNG Conversion - pdftopng.exe ------------------------ TPDFToPNGConverter = class public class function ConvertToPNG(const PdfFilePath: string; const PNGRoot: string; const Options: TStringList = nil): Boolean; static; end; // ------------------------ PDF to HTML Conversion - pdftohtml.exe ------------------------ TPDFToHTMLConverter = class public class function ConvertToHTML(const PdfFilePath: string; const HTMLDir: string; const Options: TStringList = nil): Boolean; static; end; // ------------------------ PDF to PostScript Conversion - pdftops.exe ------------------------ TPDFToPSConverter = class public class function ConvertToPS(const PdfFilePath: string; const PSFilePath: string; const Options: TStringList = nil): Boolean; static; end; // ------------------------ PDF Fonts Analyzer - pdffonts.exe ------------------------ TPDFFontsAnalyzer = class public class function GetFontsInfo(const PdfFilePath: string; const Options: TStringList = nil): string; static; end; // ------------------------ PDF Separate - pdfseparate.exe ------------------------ TPDFSeparate = class public class function SeparatePages(const PdfFilePath: string; const OutputDir: string; const OutputRoot: string; const FirstPage: Integer = 0; const LastPage: Integer = 0; const Options: TStringList = nil): Boolean; static; end; // ------------------------ PDF Unite - pdfunite.exe ------------------------ TPDFUnite = class public class function UnitePDFs(const InputPDFPaths: TStringDynArray; const OutputFilePath: string; const Options: TStringList = nil): Boolean; static; end; // ------------------------ PDF to Cairo (Images/PS/SVG) - pdftocairo.exe ------------------------ TPDFToCairoConverter = class public class function ConvertToCairo(const PdfFilePath: string; const OutputPath: string; const OutputFormat: string; const Options: TStringList = nil): Boolean; static; end; implementation uses IOUtils; const TIMEOUT = 2000; // Função auxiliar para converter array aberto em TArray<TVarRec> function ToTVarRecArray(const Args: array of const): TArray<TVarRec>; var I: Integer; begin SetLength(Result, Length(Args)); // Como os itens de array of const já são do tipo TVarRec, // podemos copiá-los diretamente. for I := Low(Args) to High(Args) do Result[I] := Args[I]; end; // --- Helper Function --- function BuildCommandLine(const ExecutableName: string; const PdfFilePath: string; const Options: TStringList): string; overload; begin Result := TProcessExecutor.QuoteCommandLineArgument(ExecutableName + '.exe'); // Quote the executable name if Assigned(Options) then begin for var Option in Options do begin Result := Result + ' ' + Option; // Options are ALREADY quoted in most calls (from API level). end; end; Result := Result + ' ' + TProcessExecutor.QuoteCommandLineArgument(PdfFilePath); // Quote the PDF file path end; function BuildCommandLine(const ExecutableName: string; const InputFiles: TStringDynArray; const OutputFile: string; const Options: TStringList): string; overload; var InputFilePath: string; begin Result := TProcessExecutor.QuoteCommandLineArgument(ExecutableName + '.exe'); // Quote the executable if Assigned(Options) then begin for var Option in Options do begin Result := Result + ' ' + Option; // Options are ALREADY quoted in most calls. end; end; for InputFilePath in InputFiles do begin Result := Result + ' ' + TProcessExecutor.QuoteCommandLineArgument(InputFilePath); // Quote each input file path end; Result := Result + ' ' + TProcessExecutor.QuoteCommandLineArgument(OutputFile); // Quote the output file path end; function BuildCommandLine(const ExecutableName: string; const PdfFilePath: string; const OutputPath: string; const Options: TStringList): string; overload; begin Result := TProcessExecutor.QuoteCommandLineArgument(ExecutableName + '.exe'); // Quote the executable if Assigned(Options) then begin for var Option in Options do begin Result := Result + ' ' + Option; end; end; Result := Result + ' ' + TProcessExecutor.QuoteCommandLineArgument(PdfFilePath) + ' ' + TProcessExecutor.QuoteCommandLineArgument(OutputPath); end; class function TXPDFWrapper.ExtractTextFromPDF(const PDFPath: string; out TextContent: string): Boolean; var CommandLine, Output, ErrorOutput, ExecutablePath: string; begin Result := False; TextContent := ''; // Check if pdftotext exists and get its full path if not TFileChecker.CheckXPDFExecutableExists('pdftotext', []) then begin LogError('pdftotext.exe not found.'); Exit; end; ExecutablePath := TFileChecker.GetFullPath; // Add "-enc UTF-8" to force UTF-8 output CommandLine := TProcessExecutor.QuoteCommandLineArgument(ExecutablePath) + ' -q -enc UTF-8 ' + TProcessExecutor.QuoteCommandLineArgument(PDFPath) + ' -'; if TProcessExecutor.ExecuteCommand(CommandLine, Output, ErrorOutput) then begin TextContent := Output; Result := True; end else LogErrorFmt('Text extraction failed. Output: [%s], Error: [%s]', ToTVarRecArray([Output, ErrorOutput])); end; class function TXPDFWrapper.GetPDFVersion(const PDFPath: string; out Version: string): Boolean; var CommandLine, Output, ErrorOutput, ExecutablePath: string; Lines: TStringList; I: Integer; Line: string; begin Result := False; Version := ''; // Check if pdfinfo exists and get its full path if not TFileChecker.CheckXPDFExecutableExists('pdfinfo', []) then begin LogError('pdfinfo.exe not found.'); Exit; end; ExecutablePath := TFileChecker.GetFullPath; CommandLine := TProcessExecutor.QuoteCommandLineArgument(ExecutablePath) + ' ' + TProcessExecutor.QuoteCommandLineArgument(PDFPath); if not TProcessExecutor.ExecuteCommand(CommandLine, Output, ErrorOutput) then begin LogError('Failed to get PDF version: ' + ErrorOutput); Exit; end; Lines := TStringList.Create; try Lines.Text := Output; for I := 0 to Lines.Count - 1 do begin Line := Trim(Lines[I]); // Handle both "PDF version: 1.7" and "PDF version: 1.7" if Pos('PDF version:', Line) = 1 then begin Version := Trim(StringReplace(Line, 'PDF version:', '', [rfIgnoreCase])); Result := True; Exit; end; end; finally Lines.Free; end; if Version = '' then begin Lines.Text := ErrorOutput; LogError('Could not find PDF version in output.'); end; end; class function TXPDFWrapper.ExtractMetadata(const PDFPath: string; out Metadata: TStringList): Boolean; var CommandLine, Output, ErrorOutput, ExecutablePath: string; begin Result := False; Metadata := TStringList.Create; // Check if pdfinfo exists and get its full path if not TFileChecker.CheckXPDFExecutableExists('pdfinfo', []) then begin LogError('pdfinfo.exe not found.'); Metadata.Free; Metadata := nil; Exit; end; ExecutablePath := TFileChecker.GetFullPath; CommandLine := TProcessExecutor.QuoteCommandLineArgument(ExecutablePath) + ' -meta ' + TProcessExecutor.QuoteCommandLineArgument(PDFPath); if not TProcessExecutor.ExecuteCommand(CommandLine, Output, ErrorOutput) then begin LogError('Failed to extract metadata: ' + ErrorOutput); Metadata.Free; Metadata := nil; Exit; end; Metadata.Text := Output; Result := True; end; // ------------------------ Text Extraction - pdftotext.exe ------------------------ { TPDFTextExtractor } class function TPDFTextExtractor.ExtractText(const PdfFilePath: string; const Options: TStringList): string; var CommandLine: string; ErrorOutput: string; begin TFileChecker.CheckXPDFExecutableExists('pdftotext', []); // Dependency check CommandLine := BuildCommandLine('pdftotext', PdfFilePath, Options); try if not TProcessExecutor.ExecuteCommand(CommandLine, Result, ErrorOutput, TIMEOUT) then LogError('Failed to extract text: ' + ErrorOutput); except on E: Exception do raise EPDFProcessException.CreateFmt( 'Error extracting text from PDF: %s. Details: %s. Command Line: %s', [PdfFilePath, E.Message, CommandLine] // 3 argumentos no array ); end; end; // ------------------------ PDF Information - pdfinfo.exe ------------------------ { TPDFInfoRetriever } class function TPDFInfoRetriever.GetPDFInfo(const PdfFilePath: string; const Options: TStringList): string; var CommandLine: string; ErrorOutput: string; begin TFileChecker.CheckXPDFExecutableExists('pdfinfo', []); // Dependency check CommandLine := BuildCommandLine('pdfinfo', PdfFilePath, Options); try if not TProcessExecutor.ExecuteCommand(CommandLine, Result, ErrorOutput, TIMEOUT) then LogError('Failed to get PDF info: ' + ErrorOutput); except on E: Exception do raise EPDFProcessException.CreateFmt( 'Error getting PDF info for: %s. Details: %s. Command Line: %s', [PdfFilePath, E.Message, CommandLine] // 3 argumentos no array ); end; end; class function TPDFInfoRetriever.GetPageSizes(const PdfFilePath: string; const FirstPage: Integer; const LastPage: Integer; const Options: TStringList): string; var CommandLine: string; LocalOptions: TStringList; ErrorOutput: string; begin TFileChecker.CheckXPDFExecutableExists('pdfinfo', []); // Verificação de dependência LocalOptions := TStringList.Create; try try if Assigned(Options) then LocalOptions.AddStrings(Options); if FirstPage > 0 then LocalOptions.Add('-f ' + IntToStr(FirstPage)); if LastPage > 0 then LocalOptions.Add('-l ' + IntToStr(LastPage)); CommandLine := BuildCommandLine('pdfinfo', PdfFilePath, LocalOptions); if not TProcessExecutor.ExecuteCommand(CommandLine, Result, ErrorOutput, TIMEOUT) then LogError('Failed to get page sizes: ' + ErrorOutput); except on E: Exception do raise EPDFProcessException.CreateFmt( 'Error getting page sizes for: %s. Details: %s. Command Line: %s', [PdfFilePath, E.Message, CommandLine] ); end; finally LocalOptions.Free; end; end; class function TPDFInfoRetriever.GetPageBoxes(const PdfFilePath: string; const FirstPage: Integer; const LastPage: Integer; const Options: TStringList): string; var CommandLine: string; LocalOptions: TStringList; ErrorOutput: string; begin TFileChecker.CheckXPDFExecutableExists('pdfinfo', []); // Verificação de dependência LocalOptions := TStringList.Create; try try if Assigned(Options) then LocalOptions.AddStrings(Options); LocalOptions.Add('-box'); if FirstPage > 0 then LocalOptions.Add('-f ' + IntToStr(FirstPage)); if LastPage > 0 then LocalOptions.Add('-l ' + IntToStr(LastPage)); CommandLine := BuildCommandLine('pdfinfo', PdfFilePath, LocalOptions); if not TProcessExecutor.ExecuteCommand(CommandLine, Result, ErrorOutput, TIMEOUT) then LogError('Failed to get page boxes: ' + ErrorOutput); except on E: Exception do raise EPDFProcessException.CreateFmt( 'Error getting page boxes for: %s. Details: %s. Command Line: %s', [PdfFilePath, E.Message, CommandLine] ); end; finally LocalOptions.Free; end; end; class function TPDFInfoRetriever.GetMetadata(const PdfFilePath: string; const Options: TStringList): string; var CommandLine: string; LocalOptions: TStringList; ErrorOutput: string; begin TFileChecker.CheckXPDFExecutableExists('pdfinfo', []); // Verificação de dependência LocalOptions := TStringList.Create; try try if Assigned(Options) then LocalOptions.AddStrings(Options); LocalOptions.Add('-meta'); CommandLine := BuildCommandLine('pdfinfo', PdfFilePath, LocalOptions); if not TProcessExecutor.ExecuteCommand(CommandLine, Result, ErrorOutput, TIMEOUT) then LogError('Failed to get metadata: ' + ErrorOutput); except on E: Exception do raise EPDFProcessException.CreateFmt( 'Error getting metadata for: %s. Details: %s. Command Line: %s', [PdfFilePath, E.Message, CommandLine] ); end; finally LocalOptions.Free; end; end; class function TPDFInfoRetriever.GetRawDates(const PdfFilePath: string; const Options: TStringList): string; var CommandLine: string; LocalOptions: TStringList; ErrorOutput: string; begin TFileChecker.CheckXPDFExecutableExists('pdfinfo', []); // Verificação de dependência LocalOptions := TStringList.Create; try try if Assigned(Options) then LocalOptions.AddStrings(Options); LocalOptions.Add('-rawdates'); CommandLine := BuildCommandLine('pdfinfo', PdfFilePath, LocalOptions); if not TProcessExecutor.ExecuteCommand(CommandLine, Result, ErrorOutput, TIMEOUT) then LogError('Failed to get raw dates: ' + ErrorOutput); except on E: Exception do raise EPDFProcessException.CreateFmt( 'Error getting raw dates for: %s. Details: %s. Command Line: %s', [PdfFilePath, E.Message, CommandLine] ); end; finally LocalOptions.Free; end; end; // ------------------------ PDF Images Extraction - pdfimages.exe ------------------------ { TPDFImagesExtractor } class function TPDFImagesExtractor.ExtractImages(const PdfFilePath: string; const ImageRoot: string; const Options: TStringList): Boolean; var CommandLine: string; ErrorOutput: string; Output: string; begin TFileChecker.CheckXPDFExecutableExists('pdfimages', []); // Dependency check CommandLine := BuildCommandLine('pdfimages', PdfFilePath, ImageRoot, Options); try if not TProcessExecutor.ExecuteCommand(CommandLine, Output, ErrorOutput, TIMEOUT) then LogError('Failed to extract images: ' + ErrorOutput); Result := True; except on E: Exception do begin LogErrorFmt('Error extracting images from PDF: %s to root: %s. Details: %s', ToTVarRecArray([PdfFilePath, ImageRoot, E.Message])); Result := False; end; end; end; class function TPDFImagesExtractor.ListImages(const PdfFilePath: string; const Options: TStringList): string; var CommandLine: string; LocalOptions: TStringList; ErrorOutput: string; begin TFileChecker.CheckXPDFExecutableExists('pdfimages', []); // Verificação de dependência LocalOptions := TStringList.Create; try try if Assigned(Options) then LocalOptions.AddStrings(Options); LocalOptions.Add('-list'); CommandLine := BuildCommandLine('pdfimages', PdfFilePath, '', LocalOptions); if not TProcessExecutor.ExecuteCommand(CommandLine, Result, ErrorOutput, TIMEOUT) then LogError('Failed to list images: ' + ErrorOutput); except on E: Exception do raise EPDFProcessException.CreateFmt( 'Error listing images in PDF: %s. Details: %s. Command Line: %s', [PdfFilePath, E.Message, CommandLine] ); end; finally LocalOptions.Free; end; end; class function TPDFImagesExtractor.ExtractRawImages(const PdfFilePath: string; const ImageRoot: string; const Options: TStringList): Boolean; var CommandLine: string; LocalOptions: TStringList; ErrorOutput: string; Output: string; begin TFileChecker.CheckXPDFExecutableExists('pdfimages', []); // Verificação de dependência LocalOptions := TStringList.Create; try try if Assigned(Options) then LocalOptions.AddStrings(Options); LocalOptions.Add('-raw'); CommandLine := BuildCommandLine('pdfimages', PdfFilePath, ImageRoot, LocalOptions); if not TProcessExecutor.ExecuteCommand(CommandLine, Output, ErrorOutput, TIMEOUT) then LogError('Failed to extract raw images: ' + ErrorOutput); Result := True; except on E: Exception do begin LogErrorFmt('Error extracting raw images from PDF: %s to root: %s. Details: %s', ToTVarRecArray([PdfFilePath, ImageRoot, E.Message])); Result := False; end; end; finally LocalOptions.Free; end; end; // ------------------------ PDF File Attachment Detachment - pdfdetach.exe ------------------------ { TPDFFileDetacher } class function TPDFFileDetacher.ListAttachments(const PdfFilePath: string; const Options: TStringList): string; var CommandLine: string; LocalOptions: TStringList; ErrorOutput: string; begin TFileChecker.CheckXPDFExecutableExists('pdfdetach', []); // Verificação de dependência LocalOptions := TStringList.Create; try try if Assigned(Options) then LocalOptions.AddStrings(Options); LocalOptions.Add('-list'); CommandLine := BuildCommandLine('pdfdetach', PdfFilePath, '', LocalOptions); if not TProcessExecutor.ExecuteCommand(CommandLine, Result, ErrorOutput, TIMEOUT) then LogError('Failed to list attachments: ' + ErrorOutput); except on E: Exception do raise EPDFProcessException.CreateFmt( 'Error listing attachments in PDF: %s. Details: %s. Command Line: %s', [PdfFilePath, E.Message, CommandLine] ); end; finally LocalOptions.Free; end; end; class function TPDFFileDetacher.SaveAttachment(const PdfFilePath: string; const AttachmentNumber: Integer; const OutputPath: string; const Options: TStringList): Boolean; var CommandLine: string; LocalOptions: TStringList; ErrorOutput: string; Output: string; begin TFileChecker.CheckXPDFExecutableExists('pdfdetach', []); // Verificação de dependência LocalOptions := TStringList.Create; try try if Assigned(Options) then LocalOptions.AddStrings(Options); LocalOptions.Add('-save ' + IntToStr(AttachmentNumber)); LocalOptions.Add('-o ' + OutputPath); CommandLine := BuildCommandLine('pdfdetach', PdfFilePath, '', LocalOptions); if not TProcessExecutor.ExecuteCommand(CommandLine, Output, ErrorOutput, TIMEOUT) then LogError('Failed to save attachment: ' + ErrorOutput); Result := True; except on E: Exception do begin LogErrorFmt('Error saving attachment %d from PDF: %s to path: %s. Details: %s', ToTVarRecArray([AttachmentNumber, PdfFilePath, OutputPath, E.Message])); Result := False; end; end; finally LocalOptions.Free; end; end; class function TPDFFileDetacher.SaveAllAttachments(const PdfFilePath: string; const OutputDir: string; const Options: TStringList): Boolean; var CommandLine: string; LocalOptions: TStringList; ErrorOutput: string; Output: string; begin TFileChecker.CheckXPDFExecutableExists('pdfdetach', []); // Verificação de dependência LocalOptions := TStringList.Create; try try if Assigned(Options) then LocalOptions.AddStrings(Options); LocalOptions.Add('-saveall'); LocalOptions.Add('-o ' + OutputDir); CommandLine := BuildCommandLine('pdfdetach', PdfFilePath, '', LocalOptions); if not TProcessExecutor.ExecuteCommand(CommandLine, Output, ErrorOutput, TIMEOUT) then LogError('Failed to save all attachments: ' + ErrorOutput); Result := True; except on E: Exception do begin LogErrorFmt('Error saving all attachments from PDF: %s to dir: %s. Details: %s', ToTVarRecArray([PdfFilePath, OutputDir, E.Message])); Result := False; end; end; finally LocalOptions.Free; end; end; // ------------------------ PDF to PPM Conversion - pdftoppm.exe ------------------------ { TPDFToPPMConverter } class function TPDFToPPMConverter.ConvertToPPM(const PdfFilePath: string; const PPMRoot: string; const Options: TStringList): Boolean; var CommandLine: string; ErrorOutput: string; Output: string; begin TFileChecker.CheckXPDFExecutableExists('pdftoppm', []); // Dependency check CommandLine := BuildCommandLine('pdftoppm', PdfFilePath, PPMRoot, Options); try if not TProcessExecutor.ExecuteCommand(CommandLine, Output, ErrorOutput, TIMEOUT) then LogError('Failed to convert to PPM: ' + ErrorOutput); Result := True; except on E: Exception do begin LogErrorFmt('Error converting PDF to PPM: %s to root: %s. Details: %s', ToTVarRecArray([PdfFilePath, PPMRoot, E.Message])); Result := False; end; end; end; // ------------------------ PDF to PNG Conversion - pdftopng.exe ------------------------ { TPDFToPNGConverter } class function TPDFToPNGConverter.ConvertToPNG(const PdfFilePath: string; const PNGRoot: string; const Options: TStringList): Boolean; var CommandLine: string; ErrorOutput: string; Output: string; begin TFileChecker.CheckXPDFExecutableExists('pdftopng', []); // Dependency check CommandLine := BuildCommandLine('pdftopng', PdfFilePath, PNGRoot, Options); try if not TProcessExecutor.ExecuteCommand(CommandLine, Output, ErrorOutput, TIMEOUT) then LogError('Failed to converto to PNG: ' + ErrorOutput); Result := True; except on E: Exception do begin LogErrorFmt('Error converting PDF to PNG: %s to root: %s. Details: %s', ToTVarRecArray([PdfFilePath, PNGRoot, E.Message])); Result := False; end; end; end; // ------------------------ PDF to HTML Conversion - pdftohtml.exe ------------------------ { TPDFToHTMLConverter } class function TPDFToHTMLConverter.ConvertToHTML(const PdfFilePath: string; const HTMLDir: string; const Options: TStringList): Boolean; var CommandLine: string; ErrorOutput: string; Output: string; begin TFileChecker.CheckXPDFExecutableExists('pdftohtml', []); // Dependency check CommandLine := BuildCommandLine('pdftohtml', PdfFilePath, HTMLDir, Options); try if not TProcessExecutor.ExecuteCommand(CommandLine, Output, ErrorOutput, TIMEOUT) then LogError('Failed to convert to HTML: ' + ErrorOutput); Result := True; except on E: Exception do begin LogErrorFmt('Error converting PDF to HTML: %s to dir: %s. Details: %s', ToTVarRecArray([PdfFilePath, HTMLDir, E.Message])); Result := False; end; end; end; // ------------------------ PDF to PostScript Conversion - pdftops.exe ------------------------ { TPDFToPSConverter } class function TPDFToPSConverter.ConvertToPS(const PdfFilePath: string; const PSFilePath: string; const Options: TStringList): Boolean; var CommandLine: string; ErrorOutput: string; Output: string; begin TFileChecker.CheckXPDFExecutableExists('pdftops', []); // Dependency check CommandLine := BuildCommandLine('pdftops', PdfFilePath, PSFilePath, Options); try if not TProcessExecutor.ExecuteCommand(CommandLine, Output, ErrorOutput, TIMEOUT) then LogError('Failed to convert to PS: ' + ErrorOutput); Result := True; except on E: Exception do begin LogErrorFmt('Error converting PDF to PostScript: %s to path: %s. Details: %s', ToTVarRecArray([PdfFilePath, PSFilePath, E.Message])); Result := False; end; end; end; // ------------------------ PDF Fonts Analyzer - pdffonts.exe ------------------------ { TPDFFontsAnalyzer } class function TPDFFontsAnalyzer.GetFontsInfo(const PdfFilePath: string; const Options: TStringList): string; var CommandLine: string; ErrorOutput: string; begin TFileChecker.CheckXPDFExecutableExists('pdffonts', []); // Dependency check CommandLine := BuildCommandLine('pdffonts', PdfFilePath, '', Options); try if not TProcessExecutor.ExecuteCommand(CommandLine, Result, ErrorOutput, TIMEOUT) then LogError('Failed to get fonts info: ' + ErrorOutput); except on E: Exception do raise EPDFProcessException.CreateFmt( 'Error getting fonts info for PDF: %s. Details: %s. Command Line: %s', [PdfFilePath, E.Message, CommandLine] // 3 argumentos no array ); end; end; // ------------------------ PDF Separate - pdfseparate.exe ------------------------ { TPDFSeparate } class function TPDFSeparate.SeparatePages(const PdfFilePath: string; const OutputDir: string; const OutputRoot: string; const FirstPage: Integer; const LastPage: Integer; const Options: TStringList): Boolean; var CommandLine: string; LocalOptions: TStringList; ErrorOutput: string; Output: string; begin TFileChecker.CheckXPDFExecutableExists('pdfseparate', []); // Verificação de dependência LocalOptions := TStringList.Create; try try if Assigned(Options) then LocalOptions.AddStrings(Options); if FirstPage > 0 then LocalOptions.Add('-f ' + IntToStr(FirstPage)); if LastPage > 0 then LocalOptions.Add('-l ' + IntToStr(LastPage)); CommandLine := BuildCommandLine('pdfseparate', PdfFilePath, IncludeTrailingPathDelimiter(OutputDir) + OutputRoot + '-%d.pdf', LocalOptions); if not TProcessExecutor.ExecuteCommand(CommandLine, Output, ErrorOutput, TIMEOUT) then LogError('Failed to separate pages: ' + ErrorOutput); Result := True; except on E: Exception do begin LogErrorFmt('Error separating pages of PDF: %s to dir: %s, root: %s. Details: %s', ToTVarRecArray([PdfFilePath, OutputDir, OutputRoot, E.Message])); Result := False; end; end; finally LocalOptions.Free; end; end; // ------------------------ PDF Unite - pdfunite.exe ------------------------ { TPDFUnite } class function TPDFUnite.UnitePDFs(const InputPDFPaths: TStringDynArray; const OutputFilePath: string; const Options: TStringList): Boolean; var CommandLine: string; ErrorOutput: string; Output: string; begin TFileChecker.CheckXPDFExecutableExists('pdfunite', []); // Dependency check CommandLine := BuildCommandLine('pdfunite', InputPDFPaths, OutputFilePath, Options); try if not TProcessExecutor.ExecuteCommand(CommandLine, Output, ErrorOutput, TIMEOUT) then LogError('Failed to unite PDFs: ' + ErrorOutput); Result := True; except on E: Exception do begin LogErrorFmt('Error uniting PDFs to output: %s. Details: %s', ToTVarRecArray([OutputFilePath, E.Message])); Result := False; end; end; end; // ------------------------ PDF to Cairo (Images/PS/SVG) - pdftocairo.exe ------------------------ { TPDFToCairoConverter } class function TPDFToCairoConverter.ConvertToCairo(const PdfFilePath: string; const OutputPath: string; const OutputFormat: string; const Options: TStringList): Boolean; var CommandLine: string; LocalOptions: TStringList; ErrorOutput: string; Output: string; begin TFileChecker.CheckXPDFExecutableExists('pdftocairo', []); // Verificação de dependência LocalOptions := TStringList.Create; try try if Assigned(Options) then LocalOptions.AddStrings(Options); LocalOptions.Add('-' + LowerCase(OutputFormat)); // e.g., -png, -ps, -svg CommandLine := BuildCommandLine('pdftocairo', PdfFilePath, OutputPath, LocalOptions); if not TProcessExecutor.ExecuteCommand(CommandLine, Output, ErrorOutput, TIMEOUT) then LogError('Failed to convert to cairo: ' + ErrorOutput); Result := True; except on E: Exception do begin LogErrorFmt('Error converting PDF to Cairo format (%s): %s to path: %s. Details: %s', ToTVarRecArray([OutputFormat, PdfFilePath, OutputPath, E.Message])); Result := False; end; end; finally LocalOptions.Free; end; end; end.
However, I still face some problems, see the output in the terminal when running the test:
textInitializing PDF Power Tools... All dependencies are present. Testing XPDF Wrapper functionalities... Text extraction succeeded. First 100 chars: PÁGINA 1 Teste da página 1 PÁGINA 2 Teste da Página 2 Version check failed. Metadata: All tests completed. Press Enter to exit.
I still see the character , in windows terminal cmd, however, I think everything is fine because this character indicates the start and end of text extraction. Despite the following instructions, if you want to leave it, you can leave it, then focus on the problems of failure to get the version check and metadata. Do only what you see that is really wrong, because in fact, because I am a layman, I may be thinking that the project is wrong, but in fact it is right in some things.
Develop a robust Delphi-based PDF processing toolkit using XPDF utilities that:
Provide COMPLETE FILE CONTENT replacements for:
pPDFPowerToolsConsoleApp.dpr (main program)
uXPDFWrapper.pas (core logic)
uProcessExecution.pas (process handler)
Include detailed inline comments explaining encoding handling changes
Must maintain Delphi 11.3+ compatibility
Cannot alter external XPDF binaries (pdftotext.exe/pdfinfo.exe)
Solution must work on Windows 10/11 with default consoles
Avoid third-party libraries beyond System.* and WinAPI
Preserve existing exception handling structure
Account for PDF files with mixed encoding formats
Ensure perfect operation after your analysis and rewriting of the fully corrected code, perfectly operational with excellent results
--
The project uses XPDF command-line tools through Delphi process execution. Despite multiple fixes (UTF-8 encoding setup, BOM removal, console font changes), critical issues persist:
Text Extraction:
Version Check Failure:
Metadata Residual Issues:
System-Specific Challenges:
Code Audit Findings:
Below are three complete revised source files. In these revisions we have:
• Centralized UTF‐8 handling by reading process output into a memory stream and then converting it with TEncoding.UTF8 so that any multi‐byte characters (including Portuguese letters) are handled correctly.
• A helper (SanitizeOutput) in the XPDF wrapper that removes any unwanted BOM bytes and (optionally) replaces control characters (like form feed #$0C) with visible markers.
• More robust parsing of the “PDF version:” line by trimming extra whitespace and removing control codes.
• Console initialization that sets the code pages and font so that Unicode output (with no BOM) appears properly in Windows 10/11’s default CMD.
Below you’ll find the complete file contents for:
delphiprogram pPDFPowerToolsConsoleApp; {$APPTYPE CONSOLE} uses System.SysUtils, uFileSystemUtilities, uErrorHandling, uProcessExecution, System.Classes, Winapi.Windows, uXPDFWrapper; { Declare external API for setting the console font } function StringCchCopy(pszDest: PWideChar; cchDest: ULONGLONG; pszSrc: PWideChar): HRESULT; stdcall; external 'strsafe.dll' name 'StringCchCopyW'; type // Custom UTF-8 encoding that returns no BOM – this ensures that when writing to the console the output is clean. TUTF8NoBOMEncoding = class(TUTF8Encoding) public function GetPreamble: TBytes; override; end; function TUTF8NoBOMEncoding.GetPreamble: TBytes; begin // Override to return an empty byte array (no BOM) Result := []; end; { Procedure to set the console to use a Unicode-friendly font } procedure SetConsoleToUnicodeFont; var ConsoleHandle: THandle; FontInfo: CONSOLE_FONT_INFOEX; begin ConsoleHandle := GetStdHandle(STD_OUTPUT_HANDLE); ZeroMemory(@FontInfo, SizeOf(FontInfo)); FontInfo.cbSize := SizeOf(FontInfo); FontInfo.nFont := 0; FontInfo.dwFontSize.X := 0; FontInfo.dwFontSize.Y := 16; // Set a clear font size for readability FontInfo.FontFamily := FF_DONTCARE; FontInfo.FontWeight := FW_NORMAL; // Using "Lucida Console" because it supports a wide range of Unicode characters StringCchCopy(FontInfo.FaceName, LF_FACESIZE, 'Lucida Console'); SetCurrentConsoleFontEx(ConsoleHandle, False, FontInfo); end; { A placeholder for additional core tests if needed } procedure RunCoreTests; var Output, ErrorOutput: string; begin // Future core tests can be inserted here end; { Runs the tests for the XPDF wrapper functionality } procedure RunXPDFWrapperTests; var PDFPath: string; TextContent, Version: string; Metadata: TStringList; ConsoleStream: THandleStream; Encoding: TUTF8NoBOMEncoding; begin // Create our no-BOM UTF-8 encoding for console output Encoding := TUTF8NoBOMEncoding.Create; try Writeln('Testing XPDF Wrapper functionalities...'); // --- Test 1: Text Extraction --- PDFPath := 'sample.pdf'; // Replace with a valid PDF file path if not FileExists(PDFPath) then begin LogError('Test file not found: ' + PDFPath); Exit; end; if TXPDFWrapper.ExtractTextFromPDF(PDFPath, TextContent) then begin // Optionally, sanitize output to replace form feed control characters (#$0C) // Here we leave them intact; the main application may choose to replace them. Writeln('Text extraction succeeded.'); Writeln('First 100 chars: ' + Copy(TextContent, 1, 100)); end else Writeln('Text extraction failed.'); // --- Test 2: PDF Version Retrieval --- if TXPDFWrapper.GetPDFVersion(PDFPath, Version) then Writeln('PDF Version: ' + Version) else Writeln('Version check failed.'); // --- Test 3: Metadata Extraction --- Metadata := TStringList.Create; try if TXPDFWrapper.ExtractMetadata(PDFPath, Metadata) then begin Writeln('Metadata:'); // Remove any residual BOM or form feed characters from metadata output. Metadata.Text := StringReplace(Metadata.Text, #$FEFF, '', [rfReplaceAll]); Metadata.Text := StringReplace(Metadata.Text, #$0C, ' ', [rfReplaceAll]); // Output metadata using our no-BOM UTF-8 encoding to ensure clean display. ConsoleStream := THandleStream.Create(GetStdHandle(STD_OUTPUT_HANDLE)); try Metadata.SaveToStream(ConsoleStream, Encoding); finally ConsoleStream.Free; end; end else Writeln('Metadata extraction failed.'); finally Metadata.Free; end; finally Encoding.Free; end; end; begin // Set the console input/output to UTF-8 mode. SetConsoleOutputCP(CP_UTF8); SetConsoleCP(CP_UTF8); SetConsoleToUnicodeFont; try Writeln('Initializing PDF Power Tools...'); TDependencyManager.Initialize; if TDependencyManager.CheckDependencies then Writeln('All dependencies are present.') else Writeln('Dependency checks failed. See log for details.'); // Run core process execution tests. RunCoreTests; // Run tests for the XPDF wrapper functionalities. RunXPDFWrapperTests; Writeln('All tests completed. Press Enter to exit.'); ReadLn; except on E: Exception do begin LogCritical('Unhandled exception: ' + E.Message); Writeln('Critical error: ' + E.Message); ReadLn; end; end; end.
delphiunit uXPDFWrapper; interface uses System.SysUtils, System.StrUtils, System.Classes, System.Types, uProcessExecution, uFileSystemUtilities, uErrorHandling; type // TXPDFWrapper encapsulates interactions with XPDF command-line utilities. TXPDFWrapper = class public // Extracts text from the given PDF using pdftotext.exe. // The output is sanitized to remove BOM artifacts. class function ExtractTextFromPDF(const PDFPath: string; out TextContent: string): Boolean; // Retrieves the PDF version using pdfinfo.exe. // The method robustly trims extra whitespace and removes control codes. class function GetPDFVersion(const PDFPath: string; out Version: string): Boolean; // Extracts metadata from the PDF (as returned by pdfinfo.exe with -meta). // The metadata is sanitized to remove any BOM bytes and unwanted control characters. class function ExtractMetadata(const PDFPath: string; out Metadata: TStringList): Boolean; end; implementation uses IOUtils; const TIMEOUT = 2000; { Helper function to sanitize output strings. This removes a UTF-8 BOM (if any) and can also replace control characters. You may uncomment the replacement of form feed characters if desired. } function SanitizeOutput(const Input: string): string; begin // Remove the UTF-8 BOM if present. Result := StringReplace(Input, #$EF#$BB#$BF, '', [rfReplaceAll]); // Optionally, replace form feed (#$0C) with a visible marker. // Uncomment the next line to replace form feed with a page break marker. // Result := StringReplace(Result, #$0C, sLineBreak + '--- Page Break ---' + sLineBreak, [rfReplaceAll]); end; { TXPDFWrapper.ExtractTextFromPDF } class function TXPDFWrapper.ExtractTextFromPDF(const PDFPath: string; out TextContent: string): Boolean; var CommandLine, Output, ErrorOutput, ExecutablePath: string; begin Result := False; TextContent := ''; // Ensure that the required pdftotext.exe exists. if not TFileChecker.CheckXPDFExecutableExists('pdftotext', []) then begin LogError('pdftotext.exe not found.'); Exit; end; ExecutablePath := TFileChecker.GetFullPath; // Build the command line to force UTF-8 output. CommandLine := TProcessExecutor.QuoteCommandLineArgument(ExecutablePath) + ' -q -enc UTF-8 ' + TProcessExecutor.QuoteCommandLineArgument(PDFPath) + ' -'; if TProcessExecutor.ExecuteCommand(CommandLine, Output, ErrorOutput) then begin // Sanitize the output to remove any BOM or unwanted control characters. TextContent := SanitizeOutput(Output); Result := True; end else LogErrorFmt('Text extraction failed. Output: [%s], Error: [%s]', [Output, ErrorOutput]); end; { TXPDFWrapper.GetPDFVersion } class function TXPDFWrapper.GetPDFVersion(const PDFPath: string; out Version: string): Boolean; var CommandLine, Output, ErrorOutput: string; Lines: TStringList; I: Integer; Line, Temp: string; begin Result := False; Version := ''; // Ensure that pdfinfo.exe is available. if not TFileChecker.CheckXPDFExecutableExists('pdfinfo', []) then begin LogError('pdfinfo.exe not found.'); Exit; end; // Build command line to run pdfinfo.exe on the target PDF. CommandLine := TProcessExecutor.QuoteCommandLineArgument(TFileChecker.GetFullPath) + ' ' + TProcessExecutor.QuoteCommandLineArgument(PDFPath); if not TProcessExecutor.ExecuteCommand(CommandLine, Output, ErrorOutput) then begin LogError('Failed to get PDF version: ' + ErrorOutput); Exit; end; // Parse the output line-by-line to find the version. Lines := TStringList.Create; try Lines.Text := Output; for I := 0 to Lines.Count - 1 do begin Line := Trim(Lines[I]); // Check in a case-insensitive manner whether the line starts with "PDF version:". if StartsText('PDF version:', Line) then begin // Extract the version string that follows the colon. Temp := Copy(Line, Length('PDF version:') + 1, MaxInt); Version := Trim(Temp); // Remove any control characters (like form feed) if present. Version := StringReplace(Version, #$0C, '', [rfReplaceAll]); Result := True; Exit; end; end; finally Lines.Free; end; // Fallback: if the version was not found in standard output, try the error output. if not Result and (ErrorOutput <> '') then begin Lines := TStringList.Create; try Lines.Text := ErrorOutput; for I := 0 to Lines.Count - 1 do begin Line := Trim(Lines[I]); if StartsText('PDF version:', Line) then begin Temp := Copy(Line, Length('PDF version:') + 1, MaxInt); Version := Trim(Temp); Version := StringReplace(Version, #$0C, '', [rfReplaceAll]); Result := True; Exit; end; end; finally Lines.Free; end; end; if Version = '' then LogError('Could not determine PDF version from output.'); end; { TXPDFWrapper.ExtractMetadata } class function TXPDFWrapper.ExtractMetadata(const PDFPath: string; out Metadata: TStringList): Boolean; var CommandLine, Output, ErrorOutput: string; begin Result := False; Metadata := TStringList.Create; // Ensure that pdfinfo.exe is available. if not TFileChecker.CheckXPDFExecutableExists('pdfinfo', []) then begin LogError('pdfinfo.exe not found.'); Metadata.Free; Metadata := nil; Exit; end; // Build the command line with the -meta option. CommandLine := TProcessExecutor.QuoteCommandLineArgument(TFileChecker.GetFullPath) + ' -meta ' + TProcessExecutor.QuoteCommandLineArgument(PDFPath); if not TProcessExecutor.ExecuteCommand(CommandLine, Output, ErrorOutput) then begin LogError('Failed to extract metadata: ' + ErrorOutput); Metadata.Free; Metadata := nil; Exit; end; // Sanitize metadata text to remove any BOM and unwanted control characters. Metadata.Text := SanitizeOutput(Output); Result := True; end; end.
delphiunit uProcessExecution; interface uses System.SysUtils, System.Math, Winapi.Windows, Winapi.UserEnv, System.Classes, uErrorHandling, SyncObjs; { Builds an environment block from a TStringList of "Name=Value" strings } function BuildEnvironmentBlock(const EnvList: TStringList): Pointer; type // Custom exception classes for process execution errors. EPProcessExecutionError = class(EPDFPowerToolsException) protected FCommandLine: string; public constructor Create(const AMessage: string; const CommandLine: string); overload; constructor CreateFmt(const Format: string; const Args: array of const; const CommandLine: string); overload; property CommandLine: string read FCommandLine; end; EPProcessCreationError = class(EPProcessExecutionError) public constructor Create(const AMessage: string; const CommandLine: string; const Win32ErrorCode: Integer); overload; constructor CreateFmt(const Format: string; const Args: array of const; const CommandLine: string; const Win32ErrorCode: Integer); overload; end; EPProcessTimeoutError = class(EPProcessExecutionError) public constructor Create(const AMessage: string; const CommandLine: string; const TimeoutMS: Cardinal); overload; constructor CreateFmt(const Format: string; const Args: array of const; const CommandLine: string; const TimeoutMS: Cardinal); overload; end; EPProcessOutputError = class(EPProcessExecutionError) public constructor Create(const AMessage: string; const CommandLine: string); overload; constructor CreateFmt(const Format: string; const Args: array of const; const CommandLine: string); overload; end; EPProcessSignalError = class(EPProcessExecutionError) public constructor Create(const AMessage: string; const CommandLine: string; const SignalCode: Integer); overload; constructor CreateFmt(const Format: string; const Args: array of const; const CommandLine: string; const SignalCode: Integer); overload; end; // TProcessExecutor is a static class that handles the execution of external processes. // In the revised version, process output is read into a TMemoryStream and then converted // to a Unicode string using TEncoding.UTF8. This ensures that multi-byte sequences (such as // Portuguese characters) are handled correctly even if they span across buffer boundaries. TProcessExecutor = class private class var FDefaultTimeoutMS: Cardinal; class var FProcessExecutionLock: TCriticalSection; class function GetDefaultTimeout: Cardinal; static; class procedure SetDefaultTimeout(const AValue: Cardinal); static; class function ExecuteProcessInternal(const CommandLine: string; const TimeoutMS: Cardinal; const WorkingDirectory: string; const EnvironmentVariables: TStringList; out Output, ErrorOutput: string): Boolean; public class constructor Create; class destructor Destroy; static; class function QuoteCommandLineArgument(const Argument: string): string; static; class function ExecuteCommand(const CommandLine: string; const TimeoutMS: Cardinal; const WorkingDirectory: string; const EnvironmentVariables: TStringList): Boolean; overload; class function ExecuteCommand(const CommandLine: string; out Output, ErrorOutput: string; const TimeoutMS: Cardinal = Cardinal(-1); const WorkingDirectory: string = ''; const EnvironmentVariables: TStringList = nil): Boolean; overload; class property DefaultTimeoutMS: Cardinal read GetDefaultTimeout write SetDefaultTimeout; end; implementation uses Diagnostics; { BuildEnvironmentBlock builds a double-null terminated string from a list of environment variables } function BuildEnvironmentBlock(const EnvList: TStringList): Pointer; var I: Integer; EnvStr, Buffer: string; P: PChar; begin Buffer := ''; for I := 0 to EnvList.Count - 1 do begin EnvStr := EnvList[I]; if EnvStr <> '' then Buffer := Buffer + EnvStr + #0; end; Buffer := Buffer + #0; // Double termination P := StrAlloc(Length(Buffer) + 1); StrPCopy(P, Buffer); Result := P; end; { EPProcessExecutionError } constructor EPProcessExecutionError.Create(const AMessage: string; const CommandLine: string); begin inherited Create(AMessage); FCommandLine := CommandLine; ContextInfo := 'Command Line: ' + CommandLine; end; constructor EPProcessExecutionError.CreateFmt(const Format: string; const Args: array of const; const CommandLine: string); begin Create(Format(Format, Args), CommandLine); end; { EPProcessCreationError } constructor EPProcessCreationError.Create(const AMessage: string; const CommandLine: string; const Win32ErrorCode: Integer); begin inherited Create(Format('%s (Win32 Error Code: %d, %s)', [AMessage, Win32ErrorCode, SysErrorMessage(Win32ErrorCode)]), CommandLine); end; constructor EPProcessCreationError.CreateFmt(const Format: string; const Args: array of const; const CommandLine: string; const Win32ErrorCode: Integer); begin Create(Format(Format, Args), CommandLine, Win32ErrorCode); end; { EPProcessTimeoutError } constructor EPProcessTimeoutError.Create(const AMessage: string; const CommandLine: string; const TimeoutMS: Cardinal); begin inherited Create(Format('%s (Timeout: %d ms)', [AMessage, TimeoutMS]), CommandLine); end; constructor EPProcessTimeoutError.CreateFmt(const Format: string; const Args: array of const; const CommandLine: string; const TimeoutMS: Cardinal); begin Create(Format(Format, Args), CommandLine, TimeoutMS); end; { EPProcessOutputError } constructor EPProcessOutputError.Create(const AMessage: string; const CommandLine: string); begin inherited Create(AMessage, CommandLine); end; constructor EPProcessOutputError.CreateFmt(const Format: string; const Args: array of const; const CommandLine: string); begin Create(Format(Format, Args), CommandLine); end; { EPProcessSignalError } constructor EPProcessSignalError.Create(const AMessage: string; const CommandLine: string; const SignalCode: Integer); begin inherited Create(Format('%s (Signal Code: %d)', [AMessage, SignalCode]), CommandLine); end; constructor EPProcessSignalError.CreateFmt(const Format: string; const Args: array of const; const CommandLine: string; const SignalCode: Integer); begin Create(Format(Format, Args), CommandLine, SignalCode); end; { TProcessExecutor } class constructor TProcessExecutor.Create; begin FDefaultTimeoutMS := 15000; // Default timeout: 15 seconds FProcessExecutionLock := TCriticalSection.Create; end; class destructor TProcessExecutor.Destroy; begin FProcessExecutionLock.Free; end; class function TProcessExecutor.GetDefaultTimeout: Cardinal; begin Result := FDefaultTimeoutMS; end; class procedure TProcessExecutor.SetDefaultTimeout(const AValue: Cardinal); begin FDefaultTimeoutMS := AValue; end; class function TProcessExecutor.QuoteCommandLineArgument(const Argument: string): string; begin // Quote the argument if it contains spaces or quotes. if (Pos(' ', Argument) > 0) or (Pos('"', Argument) > 0) then Result := '"' + StringReplace(Argument, '"', '""', [rfReplaceAll]) + '"' else Result := Argument; end; { ExecuteProcessInternal Revised to read process output into a TMemoryStream so that multi‐byte UTF-8 sequences are handled correctly even if they span across buffer boundaries. The streams are then converted using TEncoding.UTF8.GetString. } class function TProcessExecutor.ExecuteProcessInternal(const CommandLine: string; const TimeoutMS: Cardinal; const WorkingDirectory: string; const EnvironmentVariables: TStringList; out Output, ErrorOutput: string): Boolean; var MutableCmdLine: string; SecurityAttr: TSecurityAttributes; hStdOutRead, hStdOutWrite, hStdErrRead, hStdErrWrite: THandle; StartupInfo: TStartupInfo; ProcessInfo: TProcessInformation; WaitResult: DWORD; UseTimeout: Cardinal; pWorkingDir: PChar; EnvBlock: Pointer; OutStream, ErrStream: TMemoryStream; Buffer: array[0..4095] of Byte; BytesRead: DWORD; TempStr: string; begin Result := False; Output := ''; ErrorOutput := ''; MutableCmdLine := CommandLine; UniqueString(MutableCmdLine); UseTimeout := IfThen(TimeoutMS = Cardinal(-1), FDefaultTimeoutMS, TimeoutMS); SecurityAttr.nLength := SizeOf(TSecurityAttributes); SecurityAttr.bInheritHandle := True; SecurityAttr.lpSecurityDescriptor := nil; if not CreatePipe(hStdOutRead, hStdOutWrite, @SecurityAttr, 0) then begin LogError('Failed to create output pipe.'); Exit; end; try if not CreatePipe(hStdErrRead, hStdErrWrite, @SecurityAttr, 0) then begin LogError('Failed to create error pipe.'); Exit; end; try ZeroMemory(@StartupInfo, SizeOf(StartupInfo)); StartupInfo.cb := SizeOf(StartupInfo); StartupInfo.hStdOutput := hStdOutWrite; StartupInfo.hStdError := hStdErrWrite; StartupInfo.dwFlags := STARTF_USESTDHANDLES or STARTF_USESHOWWINDOW; StartupInfo.wShowWindow := SW_HIDE; if WorkingDirectory = '' then pWorkingDir := nil else pWorkingDir := PChar(WorkingDirectory); LogDebug(Format('Executing command: %s (Timeout: %d ms)', [MutableCmdLine, UseTimeout])); if pWorkingDir = nil then LogDebug('Working directory: (inherited)') else LogDebug(Format('Working directory: %s', [pWorkingDir])); EnvBlock := nil; if Assigned(EnvironmentVariables) then EnvBlock := BuildEnvironmentBlock(EnvironmentVariables); try if not CreateProcess(nil, PChar(MutableCmdLine), nil, nil, True, CREATE_NO_WINDOW, EnvBlock, pWorkingDir, StartupInfo, ProcessInfo) then raise EPProcessCreationError.CreateFmt('CreateProcess failed for command: %s (Working Directory: %s, Win32 Error Code: %d)', [MutableCmdLine, WorkingDirectory, GetLastError], MutableCmdLine, GetLastError); // Close write handles so that the read operations can detect end-of-file. CloseHandle(hStdOutWrite); CloseHandle(hStdErrWrite); WaitResult := WaitForSingleObject(ProcessInfo.hProcess, UseTimeout); if WaitResult = WAIT_TIMEOUT then begin LogError(Format('Process timed out: %s (Timeout: %d ms)', [CommandLine, UseTimeout])); TerminateProcess(ProcessInfo.hProcess, 1); raise EPProcessTimeoutError.CreateFmt('Process timed out: %s (Timeout: %d ms)', [CommandLine, UseTimeout], CommandLine, UseTimeout); end; if WaitResult = WAIT_FAILED then raise EPProcessCreationError.CreateFmt('WaitForSingleObject failed with error code: %d', [GetLastError], MutableCmdLine, GetLastError); // Use memory streams to accumulate stdout and stderr data. OutStream := TMemoryStream.Create; ErrStream := TMemoryStream.Create; try // Read from standard output pipe. while ReadFile(hStdOutRead, Buffer, SizeOf(Buffer), BytesRead, nil) and (BytesRead > 0) do OutStream.WriteBuffer(Buffer, BytesRead); // Read from standard error pipe. while ReadFile(hStdErrRead, Buffer, SizeOf(Buffer), BytesRead, nil) and (BytesRead > 0) do ErrStream.WriteBuffer(Buffer, BytesRead); // Convert the output stream bytes to a Unicode string using UTF-8 decoding. if OutStream.Size > 0 then begin SetLength(TempStr, OutStream.Size); OutStream.Position := 0; OutStream.ReadBuffer(TempStr[1], OutStream.Size); Output := TEncoding.UTF8.GetString(TEncoding.UTF8.GetBytes(TempStr)); end; if ErrStream.Size > 0 then begin SetLength(TempStr, ErrStream.Size); ErrStream.Position := 0; ErrStream.ReadBuffer(TempStr[1], ErrStream.Size); ErrorOutput := TEncoding.UTF8.GetString(TEncoding.UTF8.GetBytes(TempStr)); end; finally OutStream.Free; ErrStream.Free; end; Result := True; finally if Assigned(EnvBlock) then begin StrDispose(PChar(EnvBlock)); // (Note: In this context, calling DestroyEnvironmentBlock is not needed.) end; CloseHandle(ProcessInfo.hProcess); CloseHandle(ProcessInfo.hThread); end; finally CloseHandle(hStdErrRead); // hStdErrWrite already closed. end; finally CloseHandle(hStdOutRead); // hStdOutWrite already closed. end; end; class function TProcessExecutor.ExecuteCommand(const CommandLine: string; const TimeoutMS: Cardinal; const WorkingDirectory: string; const EnvironmentVariables: TStringList): Boolean; var DummyOut, DummyErr: string; begin FProcessExecutionLock.Acquire; try Result := ExecuteProcessInternal(CommandLine, TimeoutMS, WorkingDirectory, EnvironmentVariables, DummyOut, DummyErr); finally FProcessExecutionLock.Release; end; end; class function TProcessExecutor.ExecuteCommand(const CommandLine: string; out Output, ErrorOutput: string; const TimeoutMS: Cardinal; const WorkingDirectory: string; const EnvironmentVariables: TStringList): Boolean; begin FProcessExecutionLock.Acquire; try Result := ExecuteProcessInternal(CommandLine, TimeoutMS, WorkingDirectory, EnvironmentVariables, Output, ErrorOutput); finally FProcessExecutionLock.Release; end; end; end.
In these revisions:
These changes preserve the existing exception handling and are fully compatible with Delphi 11.3+ on Windows 10/11 without altering the external XPDF binaries.