(*
 * This file is part of Barista.
 * Copyright (C) 2007-2014 Xavier Clerc.
 *
 * Barista is free software; you can redistribute it and/or modify
 * it under the terms of the GNU Lesser General Public License as published by
 * the Free Software Foundation; either version 3 of the License, or
 * (at your option) any later version.
 *
 * Barista is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public License
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
 *)

(** Lexer states for UTF8 strings.

    This definition is independent of the actual implementation,
    that can be based on either the Camomile library (available at
    http://camomile.sourceforge.net), or the OCaml-Java runtime
    library. *)


BARISTA_ERROR =
  | End_of_lexer
  | Invalid_consume of UChar.t * UChar.t

class t : UTF8.t -> object

  method is_available : bool
      (** Returns [true] iff there is at least one character available on
          the lexer. *)

  method peek : UChar.t
      (** Returns [true] the next character if one is available.

          Raises [Exception] if end of string is encountered. *)

  method look_ahead_string : UTF8.t -> bool
      (** Returns [true] iff the next character of the lexer is equal to
          one present in the passed string.

          Raises [Exception] if end of string is encountered. *)

  method look_ahead_list : UChar.t list -> bool
      (** Returns [true] iff the next character of the lexer is equal to
          one of the passed list.

          Raises [Exception] if end of string is encountered. *)

  method look_ahead : UChar.t -> bool
      (** Returns [true] iff the next character of the lexer is equal to
          the passed one.

          Raises [Exception] if end of string is encountered. *)

  method consume_char : UChar.t
      (** Consumes the next character and returns it.

          Raises [Exception] if end of string is encountered. *)

  method consume : unit
      (** Consumes (i.e. {i skips}) the next character.

          Raises [Exception] if end of string is encountered. *)

  method consume_only : UChar.t -> unit
      (** Consumes (i.e. skips) the next character only if it is equal
          to the passed one.

          Raises [Exception] if end of string is encountered, or if
          the passed character is not equal to the next one of the
          lexer. *)

  method consume_until_string : UTF8.t -> UTF8.t
      (** Consumes characters until a character equal to one present in
          the passed string is read from the lexer, and then returns the
          string of consumed characters (none of the passed characters is
          consumed).

          Raises [Exception] if end of string is encountered. *)

  method consume_until_list : UChar.t list -> UTF8.t
      (** Consumes characters until a character equal to one of the
          passed list is read from the lexer, and then returns the string
          of consumed characters (none of the passed characters is consumed).

          Raises [Exception] if end of string is encountered. *)

  method consume_until : UChar.t -> UTF8.t
      (** Consumes characters until a character equal to the passed one is
          read from the lexer and then returns the string of consumed
          characters (the passed character is not consumed).

          Raises [Exception] if end of string is encountered. *)

  method consume_all : UTF8.t
      (** Consumes all remaining characters. *)

  method consume_whitespace : unit
      (** Consumes all whitespace characters until a non-whitespace one
          is encountered. *)

end
(** This class encapsulates the state of a lexer over a UTF8 string that
    is passed at instance creation.

    The next character to be read is the first one of the UTF8 string
    passed at instance creation (unless it is empty). *)
