This is a simple example of LL(1) parsing, by hand, in O'Caml and C++. The O'Caml version is the original, and has better comments.

LL(1) parsing is a top-down (analyse descendante en Franšais) parsing technique, also known as recursive descent (indeed in our example there is no explicit stack -- instead the language's call stack is used). Some interesting online references about parsing in general, and LL(1) in particular are Pierre-Yves Schobbens's slides (in French), and the online book, Parsing Techniques - A Practical Guide (in English).

Download the O'Caml and C++ code. Underneath, the parsing part is shown. The lexer is not shown, look at the code if you want to see it (it is a very simple module with 3 public functions: create_from_string, peek, and next).

The naive grammar for arithmetic expressions

The grammar

1. original (ambiguous) grammar, for elementary arithmetic expressions:
   E -> ( E ) { $2 }
   E -> E + E { $1 + $3 }
   E -> E * E { $1 * $3 }
   E -> int   { $1 }

   with the tokens: int = [0-9]+, (, ), +, *

2. the same without left recursion (we don't bother with actions at this point):
   E -> ( E )
   E -> int
   E -> int E'
   E' -> + E
   E' -> * E
   E' -> + E E'
   E' -> * E E'

3. now with factored right parts (we also re-added actions):
   E -> ( E )     { $2 }
   E -> int F     { let (op,v) = $2 in op $1 $2 }
   F ->  { ((+), 0) }
   F -> E'        { $1 }
   E' -> + E F    { let (op,v) = $3 in ((+), op $2 v) }
   E' -> * E F    { let (op,v) = $3 in ((+), op $2 v) }

   First sets:
   First(E)  = { (, int }
   First(F)  = { +, * }
   First(E') = { +, * }

   Follow sets (with a pseudo-token, EOF = end of file (EOF is also often marked as $)):
   Follow(E)  = { ), +, *, EOF }
   Follow(F)  = { ), +, *, EOF }
   Follow(E') = { ), +, *, EOF }

   Nullables = { F }

The parsing code

open Token
open Ll1_common

let rec parse_E stream = match Tokenizer.peek stream with
(*  E -> ( E ) *)
| LPar ->
    consume stream LPar;
    let e = parse_E stream in
    consume stream RPar; e
(*  E -> int F *)
| Int(i) -> consume stream (Int(i)); let (op,v) = parse_F stream in op i v
| _ as x -> raise (Parse_error (unexpected_msg x))

(* there is a LL(1) conflict here on Plus and Times, that we will resolve in favor of F -> E' *)
and parse_F stream = match Tokenizer.peek stream with
(*  F ->  *)
| RPar |  EOF -> (+), 0
(*  F -> E' *)
| Plus | Times -> parse_E' stream
| _ as x -> raise (Parse_error (unexpected_msg x))

and parse_E' stream = match Tokenizer.peek stream with
(*  E' -> + E F *)
| Plus ->
    consume stream Plus;
    let e = parse_E stream in
    let (op,v) = parse_F stream in
    (+), (op e v)
(*  E' -> * E F *)
| Times ->
    consume stream Times;
    let e = parse_E stream in
    let (op,v) = parse_F stream in
    ( * ), (op e v)
| _ as x -> raise (Parse_error (unexpected_msg x))

(* allows to test the parser on a string *)
let test_parse s =
  let t = Tokenizer.create_from_string s in
  parse_E t

The fixed grammar for arithmetic expressions

Working the grammar

Now we have a problem: the previous grammar will evaluate
   2 * 3 + 2 as 2 * ( 3 + 2 ) = 10 and
   2 + 3 * 2 as 2 + ( 3 * 2 ) = 8
   ie it always associates from the right, not taking precedences into account.

   To fix this, we can rework 1 as (there are many possibilities):
1'. 1 with precedences fixed:
   E -> Term                 { $1 + $3 }

   Term -> Factor            { $1 }
   Term -> Term + Term       { $1 + $3 }

   Factor -> int             { $1 }
   Factor -> ( E )           { $2 }
   Factor -> Factor * Factor { $1 * $3 }

   (note: this is still ambiguous wrt 2 + 2 + 3 =? (2+2) + 3 or 2 + (2+3))

2'. without left recursion (and without actions at this point):
   E -> Term

   Term -> Factor
   Term -> Factor Term'

   Term' -> + Term
   Term' -> + Term Term'

   Factor -> int
   Factor -> ( E )
   Factor -> int Factor'
   Factor -> ( E ) Factor'

   Factor' -> * Factor
   Factor' -> * Factor Factor'

3'. now with common right part prefixes factored out, and actions back in:
   E -> Term                        { $1 }

   Term -> Factor MaybeTerm'        { let (op,v)=$2 in op $1 v) }

   MaybeTerm' ->           { (+), 0 }
   MaybeTerm' -> Term'              { $1 }

   Term' -> + Term MaybeTerm'       { let (op,v)=$3 in (+), (op $2 v) }

   Factor -> int MaybeFactor'       { let (op,v)=$2 in op $1 v }
   Factor -> ( E ) MaybeFactor'     { let (op,v)=$4 in op $2 v }

   MaybeFactor' ->         { ( * ), 1 }
   MaybeFactor' -> Factor'          { $1 }

   Factor' -> * Factor MaybeFactor' { let (op,v)=$3 in ( * ),(op $2 v) }

   First sets:
   First(E)            = { int, ( }
   First(Term)         = { int, ( }
   First(Term')        = { + }
   First(MaybeTerm')   = { + }
   First(Factor)       = { int, ( }
   First(Factor')      = { * }
   First(MaybeFactor') = { * }

   Follow sets (with a pseudo-token, EOF = end of file):
   Follow(E)            = { ), EOF }
   Follow(Term)         = { +, ), EOF }
   Follow(Term')        = { +, ), EOF }
   Follow(MaybeTerm')   = { +, ), EOF }
   Follow(Factor)       = { +, ), EOF }
   Follow(Factor')      = { +, ), EOF }
   Follow(MaybeFactor') = { +, ), EOF }

   Nullables = { MaybeTerm', MaybeFactor' }

The parsing code


open Token
open Ll1_common

(* a little sugar for this implementation, to aid debugging *)

let unexp nt tok =
  raise (Parse_error (nt ^ ": " ^ (unexpected_msg tok)))

let parse name stream f =
  Printf.printf "parsing: %s\n" name;
  f (Tokenizer.peek stream)

let rec parse_E2 stream = parse "E2" stream (function
  | LPar | Int(_) -> parse_Term stream
  | _ as x -> unexp "E2" x)

and parse_Term stream = parse "Term" stream (function
  | LPar | Int(_) ->
      let f = parse_Factor stream in
      let (op,v) = parse_MaybeTerm' stream in
      op f v
  | _ as x -> unexp "Term" x)

(* here we have an ambiguity on +, which is part of both Follow(MaybeTerm') and First(Term')
   we choose to resolve it by using the "MaybeTerm' -> Term" rule

   this means that this grammar is not LL(1) -- however, by making an educated choice, we cope without modifying it
   (we could modify the grammar, but this is not worth the trouble)
 *)
and parse_MaybeTerm' stream = parse "MaybeTerm'" stream (function
  | RPar | EOF -> (+), 0
  | Plus -> parse_Term' stream
  | _ as x -> unexp "MaybeTerm'" x)

and parse_Term' stream = parse "Term'" stream (function
  | Plus ->
      consume stream Plus;
      let t = parse_Term stream in
      let (op,v) = parse_MaybeTerm' stream in
      (+), (op t v)
  | _ as x -> unexp "Term'" x)

and parse_Factor stream = parse "Factor" stream (function
  | Int(i) ->
      consume stream (Int(i));
      let (op,v) = parse_MaybeFactor' stream in
      op i v
  | LPar ->
      consume stream LPar;
      let e = parse_E2 stream in
      consume stream RPar;
      let (op,v) = parse_MaybeFactor' stream in
      op e v
  | _ as x -> unexp "Factor" x)

and parse_MaybeFactor' stream = parse "MaybeFactor'" stream (function
  | Plus | RPar | EOF -> ( * ), 1
  | Times -> parse_Factor' stream
  | _ as x -> unexp "MaybeFactor'" x)

and parse_Factor' stream = parse "Factor'" stream (function
  | Times ->
      consume stream Times;
      let f = parse_Factor stream in
      let (op,v) = parse_MaybeFactor' stream in
      ( * ), (op f v)
  | _ as x -> unexp "Factor'" x)

let test_parse2 s =
  let t = Tokenizer.create_from_string s in
  parse_E2 t