Fixed bug 4692 - Command line parsing
authorSam Lantinga <slouken@libsdl.org>
Wed, 31 Jul 2019 09:11:20 -0700
changeset 12969a1917148d38a
parent 12968 0e3948762c96
child 12970 d482a01e159f
Fixed bug 4692 - Command line parsing

Galadrim

As I have seen, SDL implements its own command line parser for Windows in SDL_windows_main.c. Unfortunately, it doesn't seem to allow command line arguments with trailing backslashes if quoting is required.

Usually, when you write an application that gets command line arguments passed as argc and argv, the parsing is done by parse_cmdline. The Windows API also provides the function CommandLineToArgvW, so an application can parse itself if only the command line string is provided. Both functions behave almost identically according to their documentation. If the argument "\\" (including the quotes) is passed, they both turn it into a single backslash.

The SDL command line parser on the other hand doesn't recognize the second quote character as the closing character in this example and therefore includes it in the parsed argument. The parser does not count the number of backslashes preceding a quote. It always treats a quote as escaped if a backslash is in front of it. Therefore, it should be impossible to quote and escape an argument correctly, if it has a trailing backslash and contains characters that require quoting.

Of course, each application is allowed to implement its own parsing rules, so SDL is free to do so. But the problem I see is that there are arguments, that are impossible to be passed to the parser correctly, as I described above. Is there a reason, why SDL does not simply use CommandLineToArgvW instead of implementing its own parser?

Here are some links that show that correct argument parsing, as it is usually done in Windows, is quite complicated:

https://docs.microsoft.com/en-us/windows/desktop/api/shellapi/nf-shellapi-commandlinetoargvw

http://www.windowsinspired.com/how-a-windows-programs-splits-its-command-line-into-individual-arguments/
src/main/windows/SDL_windows_main.c
     1.1 --- a/src/main/windows/SDL_windows_main.c	Wed Jul 31 05:11:40 2019 +0300
     1.2 +++ b/src/main/windows/SDL_windows_main.c	Wed Jul 31 09:11:20 2019 -0700
     1.3 @@ -9,6 +9,7 @@
     1.4  
     1.5  /* Include this so we define UNICODE properly */
     1.6  #include "../../core/windows/SDL_windows.h"
     1.7 +#include <shellapi.h> /* CommandLineToArgvW() */
     1.8  
     1.9  /* Include the SDL main definition header */
    1.10  #include "SDL.h"
    1.11 @@ -18,87 +19,7 @@
    1.12  #  undef main
    1.13  #endif /* main */
    1.14  
    1.15 -static void
    1.16 -UnEscapeQuotes(char *arg)
    1.17 -{
    1.18 -    char *last = NULL;
    1.19 -
    1.20 -    while (*arg) {
    1.21 -        if (*arg == '"' && (last != NULL && *last == '\\')) {
    1.22 -            char *c_curr = arg;
    1.23 -            char *c_last = last;
    1.24 -
    1.25 -            while (*c_curr) {
    1.26 -                *c_last = *c_curr;
    1.27 -                c_last = c_curr;
    1.28 -                c_curr++;
    1.29 -            }
    1.30 -            *c_last = '\0';
    1.31 -        }
    1.32 -        last = arg;
    1.33 -        arg++;
    1.34 -    }
    1.35 -}
    1.36 -
    1.37 -/* Parse a command line buffer into arguments */
    1.38 -static int
    1.39 -ParseCommandLine(char *cmdline, char **argv)
    1.40 -{
    1.41 -    char *bufp;
    1.42 -    char *lastp = NULL;
    1.43 -    int argc, last_argc;
    1.44 -
    1.45 -    argc = last_argc = 0;
    1.46 -    for (bufp = cmdline; *bufp;) {
    1.47 -        /* Skip leading whitespace */
    1.48 -        while (*bufp == ' ' || *bufp == '\t') {
    1.49 -            ++bufp;
    1.50 -        }
    1.51 -        /* Skip over argument */
    1.52 -        if (*bufp == '"') {
    1.53 -            ++bufp;
    1.54 -            if (*bufp) {
    1.55 -                if (argv) {
    1.56 -                    argv[argc] = bufp;
    1.57 -                }
    1.58 -                ++argc;
    1.59 -            }
    1.60 -            /* Skip over word */
    1.61 -            lastp = bufp;
    1.62 -            while (*bufp && (*bufp != '"' || *lastp == '\\')) {
    1.63 -                lastp = bufp;
    1.64 -                ++bufp;
    1.65 -            }
    1.66 -        } else {
    1.67 -            if (*bufp) {
    1.68 -                if (argv) {
    1.69 -                    argv[argc] = bufp;
    1.70 -                }
    1.71 -                ++argc;
    1.72 -            }
    1.73 -            /* Skip over word */
    1.74 -            while (*bufp && (*bufp != ' ' && *bufp != '\t')) {
    1.75 -                ++bufp;
    1.76 -            }
    1.77 -        }
    1.78 -        if (*bufp) {
    1.79 -            if (argv) {
    1.80 -                *bufp = '\0';
    1.81 -            }
    1.82 -            ++bufp;
    1.83 -        }
    1.84 -
    1.85 -        /* Strip out \ from \" sequences */
    1.86 -        if (argv && last_argc != argc) {
    1.87 -            UnEscapeQuotes(argv[last_argc]);
    1.88 -        }
    1.89 -        last_argc = argc;
    1.90 -    }
    1.91 -    if (argv) {
    1.92 -        argv[argc] = NULL;
    1.93 -    }
    1.94 -    return (argc);
    1.95 -}
    1.96 +#define WIN_WStringToUTF8(S) SDL_iconv_string("UTF-8", "UTF-16LE", (char *)(S), (SDL_wcslen(S)+1)*sizeof(WCHAR))
    1.97  
    1.98  /* Pop up an out of memory message, returns to Windows */
    1.99  static BOOL
   1.100 @@ -119,65 +40,44 @@
   1.101  /* Gets the arguments with GetCommandLine, converts them to argc and argv
   1.102     and calls SDL_main */
   1.103  static int
   1.104 -main_getcmdline()
   1.105 +main_getcmdline(void)
   1.106  {
   1.107 +    LPWSTR *argvw;
   1.108      char **argv;
   1.109 -    int argc;
   1.110 -    char *cmdline = NULL;
   1.111 -    int retval = 0;
   1.112 -    int cmdalloc = 0;
   1.113 -    const TCHAR *text = GetCommandLine();
   1.114 -    const TCHAR *ptr;
   1.115 -    int argc_guess = 2;  /* space for NULL and initial argument. */
   1.116 -    int rc;
   1.117 +    int i, argc;
   1.118 +	int result;
   1.119  
   1.120 -    /* make a rough guess of command line arguments. Overestimates if there
   1.121 -       are quoted things. */
   1.122 -    for (ptr = text; *ptr; ptr++) {
   1.123 -        if ((*ptr == ' ') || (*ptr == '\t')) {
   1.124 -            argc_guess++;
   1.125 -        }
   1.126 -    }
   1.127 -
   1.128 -#if UNICODE
   1.129 -    rc = WideCharToMultiByte(CP_UTF8, 0, text, -1, NULL, 0, NULL, NULL);
   1.130 -    if (rc > 0) {
   1.131 -        cmdalloc = rc + (sizeof (char *) * argc_guess);
   1.132 -        argv = (char **) VirtualAlloc(NULL, cmdalloc, MEM_RESERVE|MEM_COMMIT, PAGE_READWRITE);
   1.133 -        if (argv) {
   1.134 -            int rc2;
   1.135 -            cmdline = (char *) (argv + argc_guess);
   1.136 -            rc2 = WideCharToMultiByte(CP_UTF8, 0, text, -1, cmdline, rc, NULL, NULL);
   1.137 -            SDL_assert(rc2 == rc);
   1.138 -        }
   1.139 -    }
   1.140 -#else
   1.141 -    /* !!! FIXME: are these in the system codepage? We need to convert to UTF-8. */
   1.142 -    rc = ((int) SDL_strlen(text)) + 1;
   1.143 -    cmdalloc = rc + (sizeof (char *) * argc_guess);
   1.144 -    argv = (char **) VirtualAlloc(NULL, cmdalloc, MEM_RESERVE|MEM_COMMIT, PAGE_READWRITE);
   1.145 -    if (argv) {
   1.146 -        cmdline = (char *) (argv + argc_guess);
   1.147 -        SDL_strcpy(cmdline, text);
   1.148 -    }
   1.149 -#endif
   1.150 -    if (cmdline == NULL) {
   1.151 +    argvw = CommandLineToArgvW(GetCommandLineW(), &argc);
   1.152 +    if (argvw == NULL) {
   1.153          return OutOfMemory();
   1.154      }
   1.155  
   1.156      /* Parse it into argv and argc */
   1.157 -    SDL_assert(ParseCommandLine(cmdline, NULL) <= argc_guess);
   1.158 -    argc = ParseCommandLine(cmdline, argv);
   1.159 +    argv = (char **)SDL_calloc(argc + 1, sizeof(*argv));
   1.160 +    if (!argv) {
   1.161 +        return OutOfMemory();
   1.162 +    }
   1.163 +    for (i = 0; i < argc; ++i) {
   1.164 +        argv[i] = WIN_WStringToUTF8(argvw[i]);
   1.165 +        if (!argv[i]) {
   1.166 +            return OutOfMemory();
   1.167 +        }
   1.168 +    }
   1.169 +    argv[i] = NULL;
   1.170 +    LocalFree(argvw);
   1.171  
   1.172      SDL_SetMainReady();
   1.173  
   1.174      /* Run the application main() code */
   1.175 -    retval = SDL_main(argc, argv);
   1.176 +    result = SDL_main(argc, argv);
   1.177  
   1.178 -    VirtualFree(argv, cmdalloc, MEM_DECOMMIT);
   1.179 -    VirtualFree(argv, 0, MEM_RELEASE);
   1.180 +	/* Free argv, to avoid memory leak */
   1.181 +    for (i = 0; i < argc; ++i) {
   1.182 +        SDL_free(argv[i]);
   1.183 +    }
   1.184 +	SDL_free(argv);
   1.185  
   1.186 -    return retval;
   1.187 +	return result;
   1.188  }
   1.189  
   1.190  /* This is where execution begins [console apps, ansi] */