From f8400cbba9bb5cc64fdbe4b14729ff8497700a1e Mon Sep 17 00:00:00 2001 From: Sam Lantinga Date: Wed, 31 Jul 2019 09:11:20 -0700 Subject: [PATCH] Fixed bug 4692 - Command line parsing Galadrim As I have seen, SDL implements its own command line parser for Windows in SDL_windows_main.c. Unfortunately, it doesn't seem to allow command line arguments with trailing backslashes if quoting is required. Usually, when you write an application that gets command line arguments passed as argc and argv, the parsing is done by parse_cmdline. The Windows API also provides the function CommandLineToArgvW, so an application can parse itself if only the command line string is provided. Both functions behave almost identically according to their documentation. If the argument "\\" (including the quotes) is passed, they both turn it into a single backslash. The SDL command line parser on the other hand doesn't recognize the second quote character as the closing character in this example and therefore includes it in the parsed argument. The parser does not count the number of backslashes preceding a quote. It always treats a quote as escaped if a backslash is in front of it. Therefore, it should be impossible to quote and escape an argument correctly, if it has a trailing backslash and contains characters that require quoting. Of course, each application is allowed to implement its own parsing rules, so SDL is free to do so. But the problem I see is that there are arguments, that are impossible to be passed to the parser correctly, as I described above. Is there a reason, why SDL does not simply use CommandLineToArgvW instead of implementing its own parser? Here are some links that show that correct argument parsing, as it is usually done in Windows, is quite complicated: https://docs.microsoft.com/en-us/windows/desktop/api/shellapi/nf-shellapi-commandlinetoargvw http://www.windowsinspired.com/how-a-windows-programs-splits-its-command-line-into-individual-arguments/ --- src/main/windows/SDL_windows_main.c | 154 +++++----------------------- 1 file changed, 27 insertions(+), 127 deletions(-) diff --git a/src/main/windows/SDL_windows_main.c b/src/main/windows/SDL_windows_main.c index 32f672760e50c..aa1f7d38781dc 100644 --- a/src/main/windows/SDL_windows_main.c +++ b/src/main/windows/SDL_windows_main.c @@ -9,6 +9,7 @@ /* Include this so we define UNICODE properly */ #include "../../core/windows/SDL_windows.h" +#include /* CommandLineToArgvW() */ /* Include the SDL main definition header */ #include "SDL.h" @@ -18,87 +19,7 @@ # undef main #endif /* main */ -static void -UnEscapeQuotes(char *arg) -{ - char *last = NULL; - - while (*arg) { - if (*arg == '"' && (last != NULL && *last == '\\')) { - char *c_curr = arg; - char *c_last = last; - - while (*c_curr) { - *c_last = *c_curr; - c_last = c_curr; - c_curr++; - } - *c_last = '\0'; - } - last = arg; - arg++; - } -} - -/* Parse a command line buffer into arguments */ -static int -ParseCommandLine(char *cmdline, char **argv) -{ - char *bufp; - char *lastp = NULL; - int argc, last_argc; - - argc = last_argc = 0; - for (bufp = cmdline; *bufp;) { - /* Skip leading whitespace */ - while (*bufp == ' ' || *bufp == '\t') { - ++bufp; - } - /* Skip over argument */ - if (*bufp == '"') { - ++bufp; - if (*bufp) { - if (argv) { - argv[argc] = bufp; - } - ++argc; - } - /* Skip over word */ - lastp = bufp; - while (*bufp && (*bufp != '"' || *lastp == '\\')) { - lastp = bufp; - ++bufp; - } - } else { - if (*bufp) { - if (argv) { - argv[argc] = bufp; - } - ++argc; - } - /* Skip over word */ - while (*bufp && (*bufp != ' ' && *bufp != '\t')) { - ++bufp; - } - } - if (*bufp) { - if (argv) { - *bufp = '\0'; - } - ++bufp; - } - - /* Strip out \ from \" sequences */ - if (argv && last_argc != argc) { - UnEscapeQuotes(argv[last_argc]); - } - last_argc = argc; - } - if (argv) { - argv[argc] = NULL; - } - return (argc); -} +#define WIN_WStringToUTF8(S) SDL_iconv_string("UTF-8", "UTF-16LE", (char *)(S), (SDL_wcslen(S)+1)*sizeof(WCHAR)) /* Pop up an out of memory message, returns to Windows */ static BOOL @@ -119,65 +40,44 @@ OutOfMemory(void) /* Gets the arguments with GetCommandLine, converts them to argc and argv and calls SDL_main */ static int -main_getcmdline() +main_getcmdline(void) { + LPWSTR *argvw; char **argv; - int argc; - char *cmdline = NULL; - int retval = 0; - int cmdalloc = 0; - const TCHAR *text = GetCommandLine(); - const TCHAR *ptr; - int argc_guess = 2; /* space for NULL and initial argument. */ - int rc; - - /* make a rough guess of command line arguments. Overestimates if there - are quoted things. */ - for (ptr = text; *ptr; ptr++) { - if ((*ptr == ' ') || (*ptr == '\t')) { - argc_guess++; - } - } + int i, argc; + int result; -#if UNICODE - rc = WideCharToMultiByte(CP_UTF8, 0, text, -1, NULL, 0, NULL, NULL); - if (rc > 0) { - cmdalloc = rc + (sizeof (char *) * argc_guess); - argv = (char **) VirtualAlloc(NULL, cmdalloc, MEM_RESERVE|MEM_COMMIT, PAGE_READWRITE); - if (argv) { - int rc2; - cmdline = (char *) (argv + argc_guess); - rc2 = WideCharToMultiByte(CP_UTF8, 0, text, -1, cmdline, rc, NULL, NULL); - SDL_assert(rc2 == rc); - } - } -#else - /* !!! FIXME: are these in the system codepage? We need to convert to UTF-8. */ - rc = ((int) SDL_strlen(text)) + 1; - cmdalloc = rc + (sizeof (char *) * argc_guess); - argv = (char **) VirtualAlloc(NULL, cmdalloc, MEM_RESERVE|MEM_COMMIT, PAGE_READWRITE); - if (argv) { - cmdline = (char *) (argv + argc_guess); - SDL_strcpy(cmdline, text); - } -#endif - if (cmdline == NULL) { + argvw = CommandLineToArgvW(GetCommandLineW(), &argc); + if (argvw == NULL) { return OutOfMemory(); } /* Parse it into argv and argc */ - SDL_assert(ParseCommandLine(cmdline, NULL) <= argc_guess); - argc = ParseCommandLine(cmdline, argv); + argv = (char **)SDL_calloc(argc + 1, sizeof(*argv)); + if (!argv) { + return OutOfMemory(); + } + for (i = 0; i < argc; ++i) { + argv[i] = WIN_WStringToUTF8(argvw[i]); + if (!argv[i]) { + return OutOfMemory(); + } + } + argv[i] = NULL; + LocalFree(argvw); SDL_SetMainReady(); /* Run the application main() code */ - retval = SDL_main(argc, argv); + result = SDL_main(argc, argv); - VirtualFree(argv, cmdalloc, MEM_DECOMMIT); - VirtualFree(argv, 0, MEM_RELEASE); + /* Free argv, to avoid memory leak */ + for (i = 0; i < argc; ++i) { + SDL_free(argv[i]); + } + SDL_free(argv); - return retval; + return result; } /* This is where execution begins [console apps, ansi] */