/* Pure system interface. Or at least its beginnings. */ /* NOTE: This module isn't loaded by the standard prelude right now, so you'll have to add the clause 'using system;' to your programs in which you want to use this interface. */ /* Copyright (c) 2008 by Albert Graef <Dr.Graef@t-online.de>. This file is part of the Pure programming language and system. Pure is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. Pure is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see <http://www.gnu.org/licenses/>. */ /* This module offers some useful system routines, straight from the C library, as well as some convenience functions for wrapping these up in Pure. Even the "purest" program needs to do some basic I/O every once in a while, and this module provides the necessary stuff to do just that. The interface is rather minimalistic and preliminary right now, but will probably grow over time. */ /* Provide some useful values as global variables. These also include the standard I/O streams and various constants related to the I/O, glob and regex functions. After loading this module, see list -v for a list of these. */ private pure_sys_vars; extern void pure_sys_vars(); pure_sys_vars; /* errno and friends. This value and the related routines are indispensable to give proper diagnostics when system calls fail for some reason. Note that, by its very nature, errno is a fairly volatile value, don't expect it to survive a return to the command line in interactive sessions. */ extern int pure_errno() = errno, void pure_set_errno(int) = set_errno; extern void perror(char*), char* strerror(int); /* POSIX locale handling. Details are platform-specific, but you can expect that at least the categories LC_ALL, LC_COLLATE, LC_CTYPE, LC_MONETARY, LC_NUMERIC and LC_TIME are defined, as well as the following values for the locale parameter: "C" or "POSIX" (the default POSIX locale), "" (the system default locale), and NULL, to just query the current locale. Other string values which can be passed as the locale argument depend on the implementation, please check your local setlocale(3) documentation for details. If locale is not NULL, the current locale is changed accordingly. The return value is the new locale, or the current locale when passing NULL for the locale parameter. In either case, the string returned by setlocale is such that it can be passed to setlocale to restore the same locale again. In case of an error, setlocale returns a null pointer. Please note that calling this function alters the Pure interpreter's idea of what the current locale is, which will affect the expected encoding of subsequently loaded scripts, among other things. When the interpreter starts up, it always sets the default system locale. Unless your scripts rely on a specific encoding, setting the locale to either "C" or "" should always be safe. */ private c_setlocale; extern void* setlocale(int category, void* locale) = c_setlocale; setlocale category::int locale = return (check (c_setlocale category buf)) with check res = cstring_dup res if not null res; = res otherwise; return res = free buf $$ res if not null buf; = res otherwise; end when buf = if stringp locale then byte_cstring locale else locale; end if stringp locale || pointerp locale && null locale; /* Signal handling. The action parameter of 'trap' can be one of the predefined integer values SIG_TRAP, SIG_IGN and SIG_DFL. SIG_TRAP causes the given signal to be handled by mapping it to a Pure exception of the form 'signal SIG'. SIG_IGN ignores the signal, SIG_DFL reverts to the system's default handling. See 'list -g SIG*' for a list of known signal values on your system. NOTE: Most standard termination signals (SIGINT, SIGTERM, etc.) are already set up at the start of the interpreter to report corresponding Pure exceptions; if this is not desired, you can use 'trap' to either ignore these or revert to the default handlers instead. */ extern void pure_trap(int action, int sig) = trap; /* Time functions. 'time' reports the current time in seconds since the "epoch" a.k.a. 00:00:00 UTC, Jan 1 1970. The result is always a bigint (in fact, the time value is already 64 bit on many OSes nowadays). */ extern long pure_time() = time; /* Functions to format a time value as a string. The ctime and gmtime functions convert a time value to a string in either local time or UTC. The strftime function also formats a time value as local time, using a format specification supplied by the user. See ctime(3), gmtime(3) and strftime(3) for details. */ extern char* pure_ctime(long) = ctime; extern char* pure_gmtime(long) = gmtime; extern char* pure_strftime(char* format, long t) = strftime; /* The gettimeofday function also returns wallclock time as seconds since the epoch, but theoretically offers resolutions in the microsec range (actual resolutions vary, but are usually in the msec range for contemporary systems). The result is returned as a double value (which also limits precision). This function may actually be implemented through different system calls, depending on what's available on the host OS. */ extern double pure_gettimeofday() = gettimeofday; /* The clock function returns the current CPU (not wallclock) time since an arbitrary point in the past, as a machine int. The number of "ticks" per second is given by the CLOCKS_PER_SEC constant. Note that this value will wrap around approximately every 72 minutes. */ extern int clock(); /* The sleep and nanosleep functions suspend execution for a given time interval in seconds. 'sleep' takes integer (int/bigint) arguments only and uses the sleep() system function. 'nanosleep' also accepts double arguments and theoretically supports resolutions down to 1 nanosecond (again, actual resolutions vary). This function may actually be implemented through different system calls, depending on what's available on the host OS. Both functions usually return zero, unless the sleep was interrupted by a signal, in which case the time remaining to be slept is returned. */ extern int sleep(int); extern double pure_nanosleep(double) = nanosleep; nanosleep t::int | nanosleep t::bigint = nanosleep (double t); /* Basic process operations: system executes a shell command, exit terminates the program with the given status code. */ extern int system(char* cmd), void exit(int status); /* Interface to malloc, free and friends. These let you allocate dynamic buffers (represented as Pure pointer values) for various nasty purposes. The usual caveats apply, so *only* use these directly if you know what you're doing! */ extern void* calloc(int nmembers, int size); extern void* malloc(int size), void* realloc(void* ptr, int size); extern void free(void* ptr); /* Basic I/O interface. Note that this module also defines the standard I/O streams stdin, stderr and stdout as variables on startup. These are ready to be used with the operations defined below. Also note that some of these routines are actually overridden with more convenient Pure wrappers below. */ private c_fopen c_popen c_fclose c_pclose; extern FILE* fopen(char* name, char* mode) = c_fopen; extern FILE* popen(char* cmd, char* mode) = c_popen; extern int fclose(FILE* fp) = c_fclose, int pclose(FILE* fp) = c_pclose; extern int fflush(FILE* fp); private c_fgets c_gets; extern char* fgets(void* buf, int size, FILE* fp) = c_fgets; extern char* gets(void* buf) = c_gets; extern int fputs(char* s, FILE* fp), int puts(char* s); extern int fread(void* ptr, int size, int nmemb, FILE* fp); extern int fwrite(void* ptr, int size, int nmemb, FILE* fp); extern void clearerr(FILE* fp); extern int feof(FILE* fp), int ferror(FILE* fp); /* Pure wrappers for fopen/popen and fclose/pclose which take care of closing a file object automagically when it's garbage-collected. */ fopen name::string mode::string = if null fp then fp else sentry c_fclose fp when fp = c_fopen name mode end; popen name::string mode::string = if null fp then fp else sentry c_pclose fp when fp = c_popen name mode end; fclose fp::pointer = clear_sentry fp $$ c_fclose fp; pclose fp::pointer = clear_sentry fp $$ c_pclose fp; /* Pure wrappers for fgets and gets which handle the necessary buffering automatically. */ fgets f::pointer = read_a_line f buf "" when buf = malloc 1024 end with read_a_line f buf t = check s when s = c_fgets buf 1024 f end with check s::string = return (t+s) if done s; = read_a_line f buf (t+s) otherwise; check s = return s if null t; = return t otherwise; done s::string = feof f || ferror f || not null s && last s == "\n"; return x = free buf $$ x; end; end; gets = if null s then s else if last s == "\n" then init s else s when s = fgets stdin end; /* A variation of fgets which slurps in an entire text file at once. */ fget f::pointer = read_a_file f buf "" when buf = malloc 0x10000 end with read_a_file f buf t = check s when s = c_fgets buf 0x10000 f end with check s::string = return (t+s) if feof f || ferror f; = read_a_file f buf (t+s) otherwise; check s = return s if null t; = return t otherwise; return x = free buf $$ x; end; end; /* printf, scanf and friends. Since Pure cannot call C varargs functions directly, the runtime provides us with some functions which only process a single argument at a time. Our wrapper functions take or return a tuple of values, and check these against the format specifiers. Only simple formats derived from %cdioux, %efg, %s and %p are supported right now. */ /* printf/fprintf: Normally, these return the result of the underlying C routines (number of characters written, or negative on error). However, in case of an abnormal condition in the wrapper function (error in format string, argument mismatch), they will throw an exception. */ private pure_fprintf pure_fprintf_int pure_fprintf_double pure_fprintf_string pure_fprintf_pointer; extern int pure_fprintf(FILE *fp, char *format); extern int pure_fprintf_int(FILE *fp, char *format, int x); extern int pure_fprintf_double(FILE *fp, char *format, double x); extern int pure_fprintf_string(FILE *fp, char *format, char *x); extern int pure_fprintf_pointer(FILE *fp, char *format, void *x); private printf_split_format printf_format_spec printf_format_str; printf format::string args = fprintf stdout format args; fprintf fp::pointer format::string args = count when args = if tuplep args then list args else [args]; count, _ = catch error_handler (foldl (do_fprintf fp) (0,args) $ printf_split_format format); end with error_handler (printf_error res::int) = res, []; error_handler x = throw x otherwise; do_fprintf fp (count,arg:args) (printf_format_spec t s) = count, args when res = case t, arg of "c", x::int | "c", x::bigint | "d", x::int | "d", x::bigint = pure_fprintf_int fp s x; "g", x::double = pure_fprintf_double fp s x; "s", x::string = pure_fprintf_string fp s x; "p", x::string | "p", x::pointer = pure_fprintf_pointer fp s x; _ = throw (printf_value_error s arg); end; count = if res>=0 then count+res else throw (printf_error res); end; do_fprintf fp (count,args) (printf_format_str s) = count, args when res = pure_fprintf fp s; count = if res>=0 then count+res else throw (printf_error res); end; do_fprintf fp (count,[]) (printf_format_spec t s) = throw (printf_value_error s ()); do_fprintf fp (count,_) _ = throw (this_cant_happen count); end; printf_split_format format = regexg analyze "(%[-#0 ]?[0-9]*([.][0-9]*)?[cdiouxXeEfgGsp])|(%)|([^%]|%%)+" REG_EXTENDED format 0 with analyze info = if p>=0 then // format specifier printf_format_spec (format_type (last u)) u else if q>=0 then // error throw (printf_format_error q) else printf_format_str u // literal when _, u = reg 0 info; // matched portion of the format string p, _ = reg 1 info; // p>=0 indicates valid format specifier q, _ = reg 3 info; // q>=0 indicates unrecognized format specifier end; format_type x = if index "diouxX" x >= 0 then "d" else if index "eEfgG" x >= 0 then "g" else x; end; /* sprintf: Unlike the C routines, these just return the string result, or a null pointer in case of an error; otherwise, the error handling is the same as with printf/fprintf. The implementation actually uses snprintf for safety, a suitable output buffer is provided automatically. */ private pure_snprintf pure_snprintf_int pure_snprintf_double pure_snprintf_string pure_snprintf_pointer; extern int pure_snprintf(void *buf, int, char *format); extern int pure_snprintf_int(void *buf, int, char *format, int x); extern int pure_snprintf_double(void *buf, int, char *format, double x); extern int pure_snprintf_string(void *buf, int, char *format, char *x); extern int pure_snprintf_pointer(void *buf, int, char *format, void *x); sprintf format::string args = s when args = if tuplep args then list args else [args]; s, _ = catch error_handler (foldl do_sprintf ("",args) $ printf_split_format format); end with error_handler (printf_error res::int) = pointer 0, []; error_handler x = throw x otherwise; do_sprintf (u,arg:args) (printf_format_spec t s) = u, args when size = case t, arg of "s", x::string = #s+#x+1000; // guestimate _ = 64; // should be enough for number formats in any case end; buf = check_buf (malloc size); res = case t, arg of "c", x::int | "c", x::bigint | "d", x::int | "d", x::bigint = pure_snprintf_int buf size s x; "g", x::double = pure_snprintf_double buf size s x; "s", x::string = pure_snprintf_string buf size s x; "p", x::string | "p", x::pointer = pure_snprintf_pointer buf size s x; _ = free buf $$ throw (printf_value_error s arg); end; u = if res>=0 then u + cstring buf else free buf $$ throw (printf_error res); end; do_sprintf (u,args) (printf_format_str s) = u, args when size = #s+1000; buf = check_buf (malloc size); res = pure_snprintf buf size s; u = if res>=0 then u + cstring buf else free buf $$ throw (printf_error res); end; do_sprintf (u,[]) (printf_format_spec t s) = throw (printf_value_error s ()); do_sprintf (u,_) _ = throw (this_cant_happen u); check_buf buf = throw malloc_error if null buf; = buf otherwise; end; /* scanf, fscanf: These normally return a tuple (or singleton) with the converted values. An exception of the form 'scanf_error ret', where ret is the tuple of successfully converted values (which may be less than the number of requested input items), is thrown if eof was met or another error occurred while still reading. The handling of other abnormal conditions (e.g., error in format string) is analogous to printf et al. Also note that our implementation here doesn't accept any of the length modifiers used by the C routines. Floating point values will *always* be read in double precision, so you just specify "e", "g" etc. for these. OTOH, the "assignment suppression" flag "*" is understood; the corresponding items will not be returned. */ private pure_fscanf pure_fscanf_int pure_fscanf_double pure_fscanf_string pure_fscanf_pointer; extern int pure_fscanf(FILE *fp, char *format); extern int pure_fscanf_int(FILE *fp, char *format, int *x); extern int pure_fscanf_double(FILE *fp, char *format, double *x); extern int pure_fscanf_string(FILE *fp, char *format, void *x); extern int pure_fscanf_pointer(FILE *fp, char *format, void **x); private scanf_split_format scanf_format_spec scanf_format_str; scanf format::string = fscanf stdin format; fscanf fp::pointer format::string = tuple $ reverse ret when _, ret = catch error_handler (foldl (do_fscanf fp) (0,[]) $ scanf_split_format format); end with error_handler (scanf_error ret) = throw (scanf_error (tuple $ reverse ret)); error_handler x = throw x otherwise; do_fscanf fp (nread,ret) (scanf_format_spec t s) = nread+res, ret when // 16 bytes should be more than enough to hold any elementary type; // for the string case, see guestimate below. size, s = if t=="s" then guestimate s else 16, s; // Make sure to zero the buffer here, since %c doesn't store a terminal // null byte. buf = check_buf (calloc size 1); res = case t of "n" = pure_fscanf_int fp s buf; "d" = pure_fscanf_int fp s buf; "g" = pure_fscanf_double fp s buf; "s" = pure_fscanf_string fp s buf; "p" = pure_fscanf_pointer fp s buf; _ = throw (this_cant_happen ret); end; // Note: In difference to C scanf, the return value is the number of read // characters here, with -1 denoting an error condition. res = if res>=0 then res else free buf $$ throw (scanf_error ret); val = case t of "n" = nread+get_int buf; "d" = get_int buf; "g" = get_double buf; "s" = cstring buf; "p" = get_pointer buf; _ = throw (this_cant_happen ret); end; _ = if t=="s" then () else free buf; ret = val:ret; end; do_fscanf fp (nread,ret) (scanf_format_str s) = nread+res, ret when res = pure_fscanf fp s; ret = if res>=0 then ret else throw (scanf_error ret); end; do_fscanf _ (_,ret) _ = throw (this_cant_happen ret); check_buf buf = throw malloc_error if null buf; = buf otherwise; // Compute a reasonable size for a string buffer; if necessary, modify the // field width of the format accordingly. guestimate "%c" = 2, "%c"; guestimate format = n, format when 1,0,_,1,s = regex "^%([0-9]*)" REG_EXTENDED format 0; // Leave one extra byte for the null terminator. n, format = if null s then 1025, "%1024"+tail format else eval s+1, format; end; end; scanf_split_format format = regexg analyze "(%[*]?[0-9]*([cdiouxXneEfgsp]|\\[\\^?\\]?[^]]+\\]))|(%)|([^%]|%%)+" REG_EXTENDED format 0 with analyze info = if p>=0 && u!1!="*" then // format specifier (scanf_format_spec t (kludge t u) when t = format_type (last u) end) else if q>=0 then // error throw (scanf_format_error q) else scanf_format_str u // literal when _, u = reg 0 info; // matched portion of the format string p, _ = reg 1 info; // p>=0 indicates valid format specifier q, _ = reg 3 info; // q>=0 indicates unrecognized format specifier end; format_type x = if x == "n" then "n" else if index "diouxX" x >= 0 then "d" else if index "eEfg" x >= 0 then "g" else if x=="]" || x=="c" then "s" else x; // fix up the format specifier for double values (needs 'l' modifier) kludge "g" u = init u + "l" + last u; kludge _ u = u otherwise; end; /* sscanf: This works exactly like fscanf, but input comes from a string (first argument) rather than a file. */ private pure_sscanf pure_sscanf_int pure_sscanf_double pure_sscanf_string pure_sscanf_pointer; extern int pure_sscanf(char *buf, char *format); extern int pure_sscanf_int(char *buf, char *format, int *x); extern int pure_sscanf_double(char *buf, char *format, double *x); extern int pure_sscanf_string(char *buf, char *format, void *x); extern int pure_sscanf_pointer(char *buf, char *format, void **x); sscanf s::string format::string = tuple $ reverse ret when _, _, ret = catch error_handler (foldl do_sscanf (s,0,[]) $ scanf_split_format format); end with error_handler (scanf_error ret) = throw (scanf_error (tuple $ reverse ret)); error_handler x = throw x otherwise; do_sscanf (u,nread,ret) (scanf_format_spec t s) = u, nread+res, ret when // 16 bytes should be more than enough to hold any elementary type; // for the string case, see guestimate below. size, s = if t=="s" then guestimate s else 16, s; // Make sure to zero the buffer here, since %c doesn't store a terminal // null byte. buf = check_buf (calloc size 1); res = case t of "n" = pure_sscanf_int u s buf; "d" = pure_sscanf_int u s buf; "g" = pure_sscanf_double u s buf; "s" = pure_sscanf_string u s buf; "p" = pure_sscanf_pointer u s buf; _ = throw (this_cant_happen ret); end; // Note: In difference to C scanf, the return value is the number of read // characters here, with -1 denoting an error condition. res = if res>=0 then res else free buf $$ throw (scanf_error ret); val = case t of "n" = nread+get_int buf; "d" = get_int buf; "g" = get_double buf; "s" = cstring buf; "p" = get_pointer buf; _ = throw (this_cant_happen ret); end; _ = if t=="s" then () else free buf; ret = val:ret; u = drop res u; end; do_sscanf (u,nread,ret) (scanf_format_str s) = u, nread+res, ret when res = pure_sscanf u s; ret = if res>=0 then ret else throw (scanf_error ret); u = drop res u; end; do_sscanf (_,_,ret) _ = throw (this_cant_happen ret); check_buf buf = throw malloc_error if null buf; = buf otherwise; // Compute a reasonable size for a string buffer; if necessary, modify the // field width of the format accordingly. guestimate "%c" = 2, "%c"; guestimate format = n, format when 1,0,_,1,s = regex "^%([0-9]*)" REG_EXTENDED format 0; // Leave one extra byte for the null terminator. n, format = if null s then 1025, "%1024"+tail format else eval s+1, format; end; end; /* readline: A simple wrapper around GNU readline which reads a line from stdin with prompt and command line editing. (Note that we cannot simply use the C readline function directly in Pure since it would leak memory on the return value.) We also provide readline's companion, the add_history function, which you need to add strings to readline's history. */ private c_readline; extern void* readline(char* prompt) = c_readline; extern void add_history(char* s); readline prompt::string = cstring $ c_readline prompt; /* Shell globbing (fnmatch and glob). For fnmatch we provide a wrapper which returns a simple truth value (1 = match, 0 = no match) instead of an error code. For glob we provide a wrapper which only takes the pattern and the flags as arguments and returns a Pure list with the matches (unless there is an error in which case the integer result code is returned). The available flag values and glob error codes are available as symbolic FNM_* and GLOB_* constants defined as variables in the global environment. See the fnmatch(3) and glob(3) manpages for the meaning of these. In extension to POSIX, Pure also provides the constant GLOB_SIZE which indicates the buffer size required for glob's globptr argument. */ private c_fnmatch c_glob globfree globlist; extern int fnmatch(char* pat, char* s, int flags) = c_fnmatch; extern int glob(char* pat, int flags, void* errfunc, void* globptr) = c_glob; extern void globfree(void* globptr); // runtime function to decode a globptr into a Pure string list extern expr* globlist(void* globptr); fnmatch pat::string s::string flags::int = c_fnmatch pat s flags == 0; glob pat::string flags::int = result when globptr = calloc 1 GLOB_SIZE; result = c_glob pat flags (pointer 0) globptr; result = if result==0 then globlist globptr else result; _ = globfree globptr $$ free globptr; end; /* POSIX regex matching (regcomp and regexec). The C functions have a somewhat difficult calling sequence, hence we provide a couple of high-level wrapper functions for use in Pure programs below. */ private regcomp regexec regerror regfree regmatches reglist; extern int regcomp(void* regptr, char* pat, int cflags); extern int regexec(void* regptr, char* s, int n, void* matches, int eflags); extern int regerror(int errcode, void* regptr, void* buf, int size); extern void regfree(void* regptr); // runtime: return the number of subpatterns and storage for the match result extern expr* regmatches(void* regptr, int cflags); // runtime: decode the regexec result into a Pure tuple of (pos,substr) pairs extern expr* reglist(void* regptr, void* s, void* matches); /* regex: A convenience function which compiles and matches a regex in one go, and returns the list of submatches (if any). The arguments are: - pat::string, the regular expression pattern; - cflags::int, the compilation flags (bitwise or of any of the flags accepted by regcomp(3)); - s::string, the subject string to be matched; - eflags::int, the matching execution flags (bitwise or of any of the flags accepted by regexec(3)). Symbolic REG_* constants are provided for the different flag values, see the regcomp(3) manpage for an explanation of these. In extension to POSIX, Pure also provides the REG_SIZE constant which indicates the size needed for the compiled regex buffer (regptr argument). Depending on the flags and the outcome of the operation, the result of this function can take one of the following forms: - regerr code msg: This indicates an error during compilation of the pattern (e.g., if there was a syntax error in the pattern). code is the nonzero integer code returned by regcomp, and msg is the corresponding error message string, as returned by regerror. You can redefine the regerr function as appropriate for your application (e.g., if you'd like to print an error message or throw an exception). - 0 or 1: Just a truth value indicates whether the pattern matched or not. This will be the form of the result if the REG_NOSUB flag was specified for compilation, indicating that no submatch information is to be computed. - 0 (indicating no match), or 1 (indicating a successful match), where the latter value is followed by a tuple of (pos,substr) pairs for each submatch. This will be the form of the result only if the REG_NOSUB flag was *not* specified for compilation, so that submatch information is available. Note that, according to POSIX semantics, a return value of 1 does *not* generally mean that the entire subject string was matched, unless you explicitly tie the pattern to the beginning (^) and end ($) of the string. If the result takes the latter form, each (pos,substr) pair indicates a portion of the subject string which was matched; pos is the position at which the match starts, and substr is the substring (starting at position pos) which was matched. The first (pos,substr) pair always indicates which portion of the string was matched by the entire pattern, the remaining pairs represent submatches for the parenthesized subpatterns of the pattern, as described on the regcomp(3) manual page. Note that some submatches may be empty (if they matched the empty string), in which case a pair (pos,"") indicates the (nonnegative) position pos where the subpattern matched the empty string. Other submatches may not participate in the match at all, in which case the pair (-1,"") is returned. */ regex pat::string cflags::int s::string eflags::int = result when result = regcomp regptr pat cflags; result = if result==0 then match else regerr result (decode result); _ = regfree regptr $$ free regptr; end with match = result when n, matches = regmatches regptr cflags; result = regexec regptr s n matches eflags; result = if result==0 then (1, reglist regptr s matches) else 0; end; decode n::int = cstring buf when size = regerror n regptr (pointer 0) 0; buf = malloc size; _ = regerror n regptr buf size; end; end when regptr = calloc 1 REG_SIZE; end; /* The following helper functions are provided to analyze the result returned by regex. */ /* reg_result returns the result of a regex call, i.e., a regerr term if compilation failed, and a flag indicating whether the match was successful otherwise. */ reg_result (regerr code msg) = regerr code msg; reg_result b::int = b; reg_result (b::int,_) = b; /* reg_info returns the submatch info if any, otherwise it returns (). */ reg_info (_::int,info) = info; reg_info _ = () otherwise; /* reg n info returns the nth submatch of the given submatch info, where info is the result of a reg_info call. */ reg 0 (p,s,_) = p,s; reg 0 (p,s) = p,s; reg n::int (_,_,infos) = reg (n-1) infos if n>0; /* regs info returns all valid submatches, i.e., the list of all triples (n,p,s) for which reg n == (p,s) with p>=0. */ regs info = myregs 0 info with myregs n (p,s,infos) = (n,p,s):myregs (n+1) infos if p>=0; = myregs (n+1) infos otherwise; myregs n (p,s) = [(n,p,s)] if p>=0; = [] otherwise; end; /* regexg: Perform a global regular expression search. This routine will scan the entire string for (non-overlapping) instances of the pattern, applies the given function f to the reg_info (see above) for each match, and collects all results in a list. NOTE: Never specify the REG_NOSUB flag with this function, it needs the submatch info. Also, this function will never match the empty string, to prevent looping on pathological patterns. */ regexg f pat::string cflags::int s::string eflags::int = result when result = regcomp regptr pat cflags; result = if result==0 then (match 0 s with match offs "" = []; match offs s = result when result = regexec regptr s n matches eflags; result = if result==0 then (if null u then match (offs+q) (drop p s) else f info : match (offs+q) (drop q s) when info = reglist regptr s matches; p, u = reg 0 info; q = p+#u+null u; info = xlat_pos offs info; end with xlat_pos offs (p, u, info) = offs+p, u, xlat_pos offs info if p>=0; = p, u, xlat_pos offs info otherwise; xlat_pos offs (p, u) = offs+p, u if p>=0; = p, u otherwise; end) else []; end; end when n, matches = regmatches regptr cflags end) else regerr result (decode result); _ = regfree regptr $$ free regptr; end with decode n::int = cstring buf when size = regerror n regptr (pointer 0) 0; buf = malloc size; _ = regerror n regptr buf size; end; end when regptr = calloc 1 REG_SIZE; end; /* regexgg works like regexg, but allows overlapping matches. */ regexgg f pat::string cflags::int s::string eflags::int = result when result = regcomp regptr pat cflags; result = if result==0 then (match s with match offs "" = []; match offs s = result when result = regexec regptr s n matches eflags; result = if result==0 then (if null u then match (offs+q) (drop q s) else f info : match (offs+q) (drop q s) when info = reglist regptr s matches; p, u = reg 0 info; q = p+1; info = xlat_pos offs info; end with xlat_pos offs (p, u, info) = offs+p, u, xlat_pos offs info if p>=0; = p, u, xlat_pos offs info otherwise; xlat_pos offs (p, u) = offs+p, u if p>=0; = p, u otherwise; end) else []; end; end when n, matches = regmatches regptr cflags end) else regerr result (decode result); _ = regfree regptr $$ free regptr; end with decode n::int = cstring buf when size = regerror n regptr (pointer 0) 0; buf = malloc size; _ = regerror n regptr buf size; end; end when regptr = calloc 1 REG_SIZE; end; /* regsub replaces all non-overlapping instances of a pattern with a computed substitution string. To these ends, the given function f is applied to the reg_info (see above) for each match; it should return a string value. The result string is then obtained by concatenating f info for all matches, with the unmatched portions of the string in between. */ regsub f pat::string cflags::int s::string eflags::int = result when result = regcomp regptr pat cflags; result = if result==0 then (match s with match "" = ""; match s = result when result = regexec regptr s n matches eflags; result = if result==0 then (if null u then take q s + match (drop q s) else take p s + f info + match (drop q s) when info = reglist regptr s matches; p, u = reg 0 info; q = p+#u+null u; end) else s; end; end when n, matches = regmatches regptr cflags end) else regerr result (decode result); _ = regfree regptr $$ free regptr; end with decode n::int = cstring buf when size = regerror n regptr (pointer 0) 0; buf = malloc size; _ = regerror n regptr buf size; end; end when regptr = calloc 1 REG_SIZE; end; /* regsplit splits a string into constituents delimited by substrings matching the given pattern. */ regsplit pat::string cflags::int s::string eflags::int = result when result = regcomp regptr pat cflags; result = if result==0 then (match s with match "" = [""]; match s = result when result = regexec regptr s n matches eflags; result = if result==0 then (take p s : match (drop q s) when info = reglist regptr s matches; p, u = reg 0 info; q = p+#u+null u; end) else [s]; end; end when n, matches = regmatches regptr cflags end) else regerr result (decode result); _ = regfree regptr $$ free regptr; end with decode n::int = cstring buf when size = regerror n regptr (pointer 0) 0; buf = malloc size; _ = regerror n regptr buf size; end; end when regptr = calloc 1 REG_SIZE; end;