(*
 * This file is part of Barista.
 * Copyright (C) 2007-2014 Xavier Clerc.
 *
 * Barista is free software; you can redistribute it and/or modify
 * it under the terms of the GNU Lesser General Public License as published by
 * the Free Software Foundation; either version 3 of the License, or
 * (at your option) any later version.
 *
 * Barista is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public License
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
 *)

(** UTF8 strings.

    This definition is independent of the actual implementation,
    that can be based on either the Camomile library (available at
    http://camomile.sourceforge.net), or the OCaml-Java runtime
    library. *)


type t
(** The type of UTF8 strings. *)

BARISTA_ERROR =
  | Unable_to_convert_to_bytes of t
  | Unable_to_convert_from_bytes of Bytes.t
  | Unable_to_convert_to_utf8 of string
  | Unable_to_convert_from_utf8 of t
  | Invalid_index of int * int
  | Invalid_substring of int * int * int
  | Invalid_escaped_string of t

val make_of_list : UChar.t list -> t
(** Constructs a UTF8 string from a list of Unicode characters. *)

val to_string : t -> string
(** Conversion from UTF8 string into standard string.

    Raises [Exception] if conversion fails. *)

val to_string_noerr : t -> string
(** Equivalent to [to_string], except that any exception is discarded and
    ["???"] is returned. *)

val of_string : string -> t
(** Conversion from standard string into UTF8 string.

    Raises [Exception] if conversion fails. *)

val to_bytes : t -> Bytes.t
(** Conversion from UTF8 string into bytes.

    Bytes use the {i modified} format, as they appear in Java class
    files (cf. the documentation of the {i java.io.DataInput} class).

    Raises [Exception] if conversion fails. *)

val of_bytes : Bytes.t -> t
(** Conversion from bytes into UTF8 string.

    Bytes use the {i modified} format, as they appear in Java class
    files (cf. the documentation of the {i java.io.DataInput} class).

    Raises [Exception] if conversion fails. *)

val to_latin1 : t -> Bytes.t
(** Conversion from UTF8 string into bytes, using the {i latin1}
    encoding.

    Raises [Exception] if conversion fails. *)

val of_latin1 : Bytes.t -> t
(** Conversion from bytes into UTF8 string, using the {i latin1}
    encoding.

    Raises [Exception] if conversion fails. *)

val of_char : char -> t
(** Conversion from standard character into UTF8 string.

    Raises [Exception] if conversion fails. *)

val of_uchar : UChar.t -> t
(** Conversion from Unicode character into UTF8 string. *)

val length : t -> int
(** Returns the length of the passed string (in characters, not bytes). *)

val get : t -> int -> UChar.t
(** [get str idx] returns the character of [str] at index [idx].

    Raises [Exception] if index is not valid. *)

val equal : t -> t -> bool
(** Equality over UTF8 strings. *)

val compare : t -> t -> int
(** Comparison over UTF8 strings. *)

val hash : t -> int
(** Hash function over UTF8 strings. *)

val index_from : t -> int -> UChar.t -> int
(** [index_from str idx ch] returns the lowest index above or equal to
    [idx] where string [str] contains a character that equals [ch].

    Raises [Not_found] if such an index does not exist. *)

val rindex_from : t -> int -> UChar.t -> int
(** [rindex_from str idx ch] returns the highest index below or equal to
    [idx] where string [str] contains a character that equals [ch].

    Raises [Not_found] if such an index does not exist. *)

val substring : t -> int -> int -> t
(** [substring str start end] returns a string whose characters are those
    of [str] from index [first] to index [last] (both inclusive).

    Returns an empty string iff [last < first].

    Raises [Exception] if [first] or [last] is not a valid index. *)

val (++) : t -> t -> t
(** Concatenation of UTF8 strings. *)

val concat : t list -> t
(** [concat l] returns the concatenation of all strings in [l]. *)

val concat_sep : t -> t list -> t
(** [concat_sep sep l] returns the concatenation of all strings in [l],
    separator [sep] being inserted between two strings. *)

val concat_sep_map : t -> ('a -> t) -> 'a list -> t
(** [concat_sep_map sep f l] is equivalent to
    [concat_sep sep (List.map f l)]. *)

val concat_sep_map_last : t -> ('a -> t) -> ('a -> t) -> 'a list -> t
(** [concat_sep_map_last sep f1 f2 l] is similar to
    [concat_sep_map sep f1 l], except that [f2] is used instead of [f1]
    for the last element of [l].. *)

val replace : UChar.t -> UChar.t -> t -> t
(** [replace ch1 ch2 str] returns a copy of [str] where every character
    equal to [ch1] has been replaced by [ch2]. *)

val contains : UChar.t -> t -> bool
(** [contains ch str] returns [true] iff string [str] contains a
    character equal to [ch]. *)

val split : UChar.t -> t -> t list
(** [split ch str] returns the string list obtained by splitting [str]
    using delimiter [ch]. *)

val split_quotes : UChar.t -> t -> t list
(** Similar to [split], with support for quoting. *)

val trim : t -> t
(** [trim s] returns the passed string with neither leading nor trailing
    whitespaces. *)

val starts_with : t -> t -> bool
(** [starts_with prefix str] returns [true] iff [str] starts with
    [prefix]. *)

val ends_with : t -> t -> bool
(** [endswith suffix str] returns [true] iff [str] ends with [suffix]. *)

val escape : t -> t
(** [escape s] returns the literal constant string corresponding to the
    passed string. A leading and a trailing double quote are added and
    every quote inside the passed string is escaped by a backslash. *)

val unescape : t -> t
(** [unescape str] returns the string corresponding to the passed literal
    constant string. Both leading and trailing quotes are removed and
    escaped sequences are converted. *)

val escape_char : UChar.t -> t
(** [escape_char ch] returns the literal constant string corresponding to
    the passed character. A leading and a trailing simple quote are
    added. *)

module Hashtbl : Hashtbl.S with type key = t
(** Hashtables with UTF8 strings as keys. *)

module Map : Map.S with type key = t
(** Maps with UTF8 strings as keys. *)

module Set : Set.S with type elt = t
(** Sets with UTF8 strings as elements. *)
