regexp.lang: Languages




Overview

Let A be a type representing the alphabet. Strings over A are captured by the type seq A of finite sequences over A and languages as predicates over seq A.
  • lang A: Languages over the alphabet set A. Languages support the following operations. They are predicates on seq A.
  • ε: the language consisting of only the empty sequence [::],
  • singleton a: For a : A the language consisting of the single letter string [::a],
  • satisfy pr: the language of single letter strings on which the predicate pr : A bool evaluates to true.
  • L1 L2: The language L1 and L2 denote the same languages. For the automata theoretic setting it is this extensional notion of equivalence that is the right notion and not the intensional =. This relation is declared as a generalized equivalence relation and supports generalized rewriting.

Language operations

  • L1 || L2: the union of languages .
  • L1 ++ L2: the concatenation of languages
  • L^n or pow L n: the -fold concatenation of , i.e
  • many L: the Kleene closure
  • many1 L: the language

Co-functor structure on lang

  • comap: For f : A B gives the contravariant lift comap f : lang B lang A. Comap preserve all the operations on languages and supports generalized rewriting.
    • It composes (contravariantly) comap f (comap g L) comap (g \o f) L.
    • It preserves all the natural language theoretic operations that we defined above. For example, comap f (L1 ++ L2) comap f L1 ++ comap f L2
All the language operations and the comap function are declared as Morphism for the language equivalence relation .

Definition lang A := seq A Prop.

Definition epsilon {A} : lang A
  := fun xsxs = [::].

Definition singleton {A} (a : A) : lang A
  := fun xsxs = [:: a].

Definition satisfy {A} (tf : A bool) : lang A
  := fun xs a : A, xs = [:: a] tf a.

Definition alt{A}(L1 L2 : lang A) : lang A
  := fun xsL1 xs L2 xs.

Definition concat{A}(L1 L2 : lang A) : lang A
  := fun xs xs1 xs2, (xs = xs1 ++ xs2 L1 xs1 L2 xs2).

Infix "||" := alt : language_scope.
Infix "++" := concat : language_scope.
Notation "'ε'" := epsilon.

Fixpoint pow {A}(L : lang A)(n : nat) : lang A :=
  if n is m.+1 then (L ++ pow L m)%lan
  else ε.

Infix "^" := pow : language_scope.

Definition many{A}(L : lang A) : lang A := fun xs n, (L ^ n)%lan xs.
Definition many1{A}(L : lang A) : lang A := fun xs n, (L ^ (n.+1))%lan xs.

Language equivalence.

The heart of most of our proofs involve proving two (potentially different) languages L1 and L2 are essentially the same. Since the languages might be defined differently (one coming from a regular expression the other from an automata) the most natural notion is the extensional equivalence of the underlying predicates.

Module eq.

  Section Cxt.
    Context {A : Type}.

    Definition rel (L1 L2 : lang A) := xs, L1 xs L2 xs.
    Lemma refl : reflexive (lang A) rel.
      done.
    Qed.

    Lemma sym : symmetric (lang A) rel.
      by moveL1 L2 h xs; rewrite h.
    Qed.

    Lemma trans : transitive (lang A) rel.
      by moveL1 L2 L3 h12 h23 xs; rewrite h12 h23.
    Qed.
  End Cxt.

End eq.

Add Parametric Relation A : (lang A) (@eq.rel A)
  reflexivity proved by eq.refl symmetry proved by eq.sym transitivity proved by eq.trans as lang_equivalence.

Add Parametric Morphism A : (@alt A) with signature
    (@eq.rel A) ==> (@eq.rel A) ==> (@eq.rel A) as lang_alt_morphism.
  by moveL1 L2 H12 L3 L4 H34 xs; rewrite /alt (H12 xs) (H34 xs).
Qed.

Infix "≡" := eq.rel (at level 70).

Lemma satisfy_ext [A][f g : A bool] : f =1 g (satisfy f satisfy g)%lan.
Proof.
  moveHfg xs; rewrite /satisfy; split; casea [hxs hyp]; a.
  - by rewrite -Hfg.
  - by rewrite Hfg.
Qed.

Add Parametric Morphism A : (@concat A) with signature
    (@eq.rel A) ==> (@eq.rel A) ==> (@eq.rel A) as lang_concat_morphism.
  moveL1 L2 H12 L3 L4 H34 xs; rewrite /concat; split; caseys [zs [hxs]].
  - by rewrite (H12 ys) (H34 zs); caseh1 h2; ys; zs.
  - by rewrite -(H12 ys) -(H34 zs); caseh1 h2; ys; zs.
Qed.

Lemma concat_epsilon {A} (L : lang A) : (L ++ ε)%lan L.
  rewrite /concat; movexs; split.
  - by caseys [zs [hxs [hys hzs]]]; rewrite hxs hzs cats0.
  - by moveh; xs; [::]; rewrite cats0 //= /epsilon.
Qed.

Lemma epsilon_concat {A} (L : lang A) : (ε ++ L)%lan L.
  rewrite /concat; movexs; rewrite /epsilon; split.
  - by caseys [zs [hxs [hys hzs]]]; rewrite hxs hys cat0s.
  - by moveh; [::]; xs; rewrite cat0s //= /epsilon.
Qed.

Add Parametric Morphism A : (@pow A) with signature
    (@eq.rel A) ==> eq ==> (@eq.rel A) as lang_pow_morphism.
  moveL1 L2 H n.
  elim: n ⇒ [|n IHn]; by do 2 rewrite //= ?H ?IHn.
Qed.

Add Parametric Morphism A : (@many A) with signature
    (@eq.rel A) ==> (@eq.rel A) as lang_many_morphism.
  moveL1 L2 H.
  have hpow : n, (L1 ^ n L2 ^ n)%lan by moven; rewrite H.
  rewrite /manyxs; split; casen.
  - by rewrite (hpow n xs); n.
  - by rewrite -(hpow n xs); n.
Qed.

Add Parametric Morphism A : (@many1 A) with signature
    (@eq.rel A) ==> (@eq.rel A) as lang_many1_morphism.
  moveL1 L2 H.
  have hpow : n, (L1 ^ n L2 ^ n)%lan by moven; rewrite H.
  rewrite /manyxs; split; casen.
  - by rewrite (hpow (n.+1) xs); n.
  - by rewrite -(hpow (n.+1) xs); n.
Qed.

From Stdlib Require List.

Lemma pow0 {A}{L : lang A} {xs} : (L ^ 0)%lan xs xs = [::].
  rewrite //.
Qed.

Lemma pow1 {A}(L : lang A) : (L ^ 1 L)%lan.
  by rewrite /pow concat_epsilon.
Qed.

Lemma many1_nil {A}(L : lang A) : many1 L [::] L [::].
  case ⇒ [n]; rewrite //=; caseys [zs [hnil [hys hzs]]].
  have hyp : ys ++ zs = [::] by rewrite -hnil.
  set hnils := List.app_eq_nil ys zs hyp.
  case: hnilsh1 h2; by rewrite -h1.
Qed.

Contravariant lift of alphabet transformations.

Function composition (give by the operator \o) allows us to lift functions f : A B on the underlying alphabets types A and B to the functions comap f: lang B lang A between their language family.

Definition comap {A B : Type}(f : A B) (L : lang B) : lang A :=
  L \o map f.


Section Comap.
  Context {A B}(f : A B).

  Lemma comap_epsilon : comap f ε ε.
    movexs; split.
    - rewrite /comap /comp.
      by move /List.map_eq_nil.
    - by moveHeps; rewrite Heps //=.
  Qed.

  Lemma comap_alt (L1 L2 : lang B) : (comap f (L1 || L2) comap f L1 || comap f L2)%lan.
    movexs; rewrite /comap //=.
  Qed.

  Lemma comap_satisfy {pr : B bool} : (comap f (satisfy pr) satisfy (pr \o f))%lan.
    movexs; rewrite /satisfy /comap /comp; split.
    - caseb [hxs hpr].
      by case (map_single hxs) ⇒ a [h1 hf]; a; rewrite h1 -hf.
    - casea [hxs hpr]; (f a); by rewrite hxs.
  Qed.

  Lemma comap_concat (L1 L2 : lang B) : (comap f (L1 ++ L2) comap f L1 ++ comap f L2)%lan.
    movexs; rewrite /comap //= /concat /comp; split.
    - caseys1 [ys2 [Hconcat [H1 H2]]].
      case (map_catE Hconcat) ⇒ xs1 [xs2 [Hxsconcat [Hm1 Hm2]]].
       xs1; xs2; by rewrite //= Hm1 Hm2.
    - casexs1 [xs2 [Hxs [HL1 HL2]]].
       [seq f x | x <- xs1]; [seq f x | x <- xs2]; by rewrite Hxs map_cat.
  Qed.

  Lemma comap_pow [L : lang B] : n, (comap f (L^n) (comap f L)^n)%lan .
    elim ⇒ [|n IH].
    - by rewrite comap_epsilon.
    - by rewrite comap_concat IH.
  Qed.

  Lemma comap_many1 (L : lang B) : (comap f (many1 L) many1 (comap f L))%lan.
    movexs; split.
    - set lhs := many1 _ _; rewrite /comap; casen Hyp.
      by n; rewrite -(comap_pow _ xs) /comap /comp.
    - casen; rewrite -(comap_pow _ xs) /comap. by n.
  Qed.
End Comap.

Lemma comap_comap {A B C}(f : B A)(g : C B) : L : lang A, comap (f \o g) L comap g (comap f L).
  moveL xs; by rewrite /comap ?/comp map_comp.
Qed.

Add Parametric Morphism A B : comap with signature
    (@eqfun A (fun _B)) ==> (@eq.rel B) ==> (@eq.rel A) as lang_comap_morphism.
  movef g hfg L1 L2 H xs; by rewrite /comap /comp (H _) (eq_map hfg) .
Qed.