/* Basic string functions. */

/* Copyright (c) 2008 by Albert Graef <Dr.Graef@t-online.de>.

   This file is part of the Pure programming language and system.

   Pure is free software: you can redistribute it and/or modify it under the
   terms of the GNU General Public License as published by the Free Software
   Foundation, either version 3 of the License, or (at your option) any later
   version.

   Pure is distributed in the hope that it will be useful, but WITHOUT ANY
   WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
   FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
   details.

   You should have received a copy of the GNU General Public License along
   with this program.  If not, see <http://www.gnu.org/licenses/>. */

/* Convert between Pure expressions and strings. The str function gives you
   the print representation of an expression in Pure syntax, as a string. The
   eval function does the opposite, by parsing and returning the value of an
   expression specified as a string in Pure syntax. (In fact, eval goes well
   beyond this, as it can parse and execute arbitrary Pure code. In that case
   it will return the last computed expression, if any.) Errors are reported
   with the lasterr routine. This string value will be nonempty iff an error
   was encountered during the most recent invokation of eval(). In that case
   each reported error message is terminated with a newline character. */

private pure_str;
extern void* str(expr*) = pure_str;
extern expr* eval(char*); // IMPURE!
extern char* lasterr();

str x                   = cstring (pure_str x);

/* Convert between Unicode character codes and single character strings. */

private string_chr string_ord;
extern expr* string_chr(int);
extern expr* string_ord(void*);

chr n::int              = string_chr n if n>0;
ord s::string           = string_ord s if #s==1;

/* Low-level conversions. The following routines are provided by the runtime
   to turn a C char* pointer into a Pure string. (Normally you don't have to
   worry about this, because the C interface already takes care of the
   necessary conversions, but in some low-level code these operations are
   useful.) The routines with a _dup suffix take a copy of the original
   string, the ones without take ownership of the string pointer (assuming it
   to be malloc'ed). The _cstring routines also convert from the system
   encoding. */

private pure_string pure_cstring pure_string_dup pure_cstring_dup;
extern expr* pure_string(void* s);
extern expr* pure_cstring(void* s);
extern expr* pure_string_dup(void* s);
extern expr* pure_cstring_dup(void* s);

string s::pointer       = pure_string s;
cstring s::pointer      = pure_cstring s;
string_dup s::pointer   = pure_string_dup s;
cstring_dup s::pointer  = pure_cstring_dup s;

/* Construct a "byte string" from a string. The result is a raw pointer object
   pointing to the converted string. The original string is copied (and, in
   the case of pure_byte_cstring, converted to the system encoding). The
   resulting byte string is a malloc'ed pointer which can be used like a C
   char* (employing pointer arithmetic etc.; the usual caveats apply), and has
   to be freed explicitly by the caller when no longer needed. */

private pure_byte_string pure_byte_cstring;
extern expr* pure_byte_string(void *s);
extern expr* pure_byte_cstring(void *s);

byte_string s::string   = pure_byte_string s;
byte_cstring s::string  = pure_byte_cstring s;

/* Character arithmetic. This allows single-character strings to be used,
   e.g., in arithmetic sequences like "a".."z". */

c::string+n::int        = chr (ord c+n) if #c==1;
c::string-n::int        = chr (ord c-n) if #c==1 && ord c>=n;
c::string-d::string     = ord c-ord d if #c==1 && #d==1;

/* Basic string operations: size, indexing, and concatenation. These properly
   deal with multibyte characters. Therefore indexing every character in a
   string takes quadratic time; as a remedy, we also offer a linear-time
   operation to determine the list of all characters of a string in one go. */

private string_null string_size string_concat string_char_at string_chars;
extern bool string_null(void*);
extern int string_size(void*);
extern expr* string_concat(void*, void*);
extern expr* string_char_at(void*, int);
extern expr* string_chars(void*);

null s::string          = string_null s;
#s::string              = string_size s;
s::string!n::int        = string_char_at s n if n>=0 && n<#s;
s::string+t::string     = string_concat s t;
chars s::string         = string_chars s;

/* Lexicographic string comparison. */

private strcmp;
extern int strcmp(void*, void*);

x::string<y::string     = strcmp x y < 0;
x::string>y::string     = strcmp x y > 0;
x::string<=y::string    = strcmp x y <= 0;
x::string>=y::string    = strcmp x y >= 0;
x::string==y::string    = strcmp x y == 0;
x::string!=y::string    = strcmp x y != 0;

/* Compute and find substrings of a string. */

private string_substr string_index;
extern expr* string_substr(void*, int, int);
extern int string_index(void*, void*);

substr s::string pos::int size::int
                        = string_substr s (max 0 pos) (max 0 size)
                          with max x y = if x>=y then x else y end;
index s::string u::string
                        = string_index s u;

/* Concatenate a list of strings. */

private string_concat_list;
extern expr* string_concat_list(expr*);
strcat xs               = string_concat_list xs if listp xs && all stringp xs;

/* Concatenate a list of strings, interpolating a given delimiter. */

join delim::string []   = "";
join delim::string (x::string:xs)
                        = x + strcat [delim+x | x = xs]
                            if listp xs && all stringp xs;

/* Split a string into parts delimited by the given (nonempty) string. */

split delim::string s::string
                        = if null s then [] else split1 delim s
with
  split1 delim s        = case index s delim of
                            n = take n s : split1 delim (drop (n+m) s) if n>=0;
                              = [s] otherwise;
                          end;
end when m = #delim end if not null delim;

/* Conversions between between strings and lists, streams and tuples. */

list s::string          = chars s;
stream s::string        = stream (chars s);
tuple s::string         = tuple (chars s);

string []               = "";
string xs@(_::string:_) = strcat xs if all stringp xs;

/* Conversions between strings and matrices. */

matrix s::string        = matrix (chars s);
string x::matrix        = string (list x) if all stringp x;

/* Define the customary list operations on strings, so that these can mostly
   be used as if they were lists. */

s::string+[]            = chars s;
s::string+xs@(_:_)      = chars s+xs;
[]+s::string+[]         = chars s;
xs@(_:_)+s::string      = xs+chars s;

reverse s::string       = strcat (reverse (chars s));
catmap f s::string      = catmap f (chars s);

cycle s::string         = cycle (chars s);
cyclen n::int s::string = cyclen n (chars s) if not null s;

all p s::string         = all p (chars s);
any p s::string         = any p (chars s);
do f s::string          = do f (chars s);
drop n::int s::string   = substr s n (#s-n);
dropwhile p s::string   = strcat (dropwhile p (chars s));
filter p s::string      = strcat (filter p (chars s));
foldl f a s::string     = foldl f a (chars s);
foldl1 f s::string      = foldl1 f (chars s);
foldr f a s::string     = foldr f a (chars s);
foldr1 f s::string      = foldr1 f (chars s);
head s::string          = s!0 if not null s;
init s::string          = substr s 0 (#s-1) if not null s;
last s::string          = s!(#s-1) if not null s;
map f s::string         = map f (chars s);
scanl f a s::string     = scanl f a (chars s);
scanl1 f s::string      = scanl1 f (chars s);
scanr f a s::string     = scanr f a (chars s);
scanr1 f s::string      = scanr1 f (chars s);
take n::int s::string   = substr s 0 n;
takewhile p s::string   = strcat (takewhile p (chars s));
tail s::string          = substr s 1 (#s-1) if not null s;
zip s::string t::string = zip (chars s) (chars t);
zip3 s::string t::string u::string
                        = zip3 (chars s) (chars t) (chars u);
zipwith f s::string t::string
                        = zipwith f (chars s) (chars t);
zipwith3 f s::string t::string u::string
                        = zipwith3 f (chars s) (chars t) (chars u);
dowith f s::string t::string
                        = dowith f (chars s) (chars t);
dowith3 f s::string t::string u::string
                        = dowith3 f (chars s) (chars t) (chars u);