regexp.lang: Languages
Overview
- lang A: Languages over the alphabet set A. Languages support the following operations. They are predicates on seq A.
- ε: the language consisting of only the empty sequence [::],
- singleton a: For a : A the language consisting of the single
letter string [::a],
- satisfy pr: the language of single letter strings on which the
predicate pr : A → bool evaluates to true.
- L1 ≡ L2: The language L1 and L2 denote the same languages. For the automata theoretic setting it is this extensional notion of equivalence that is the right notion and not the intensional =. This relation is declared as a generalized equivalence relation and supports generalized rewriting.
Language operations
- L1 || L2: the union of languages .
- L1 ++ L2: the concatenation of languages
- L^n or pow L n: the -fold concatenation of , i.e
- many L: the Kleene closure
- many1 L: the language
Co-functor structure on lang
- comap: For f : A → B gives the contravariant lift comap f :
lang B → lang A. Comap preserve all the operations on languages
and supports generalized rewriting.
- It composes (contravariantly) comap f (comap g L) ≡ comap (g \o
f) L.
- It preserves all the natural language theoretic operations that we defined above. For example, comap f (L1 ++ L2) ≡ comap f L1 ++ comap f L2
- It composes (contravariantly) comap f (comap g L) ≡ comap (g \o
f) L.
Definition lang A := seq A → Prop.
Definition epsilon {A} : lang A
:= fun xs ⇒ xs = [::].
Definition singleton {A} (a : A) : lang A
:= fun xs ⇒ xs = [:: a].
Definition satisfy {A} (tf : A → bool) : lang A
:= fun xs ⇒ ∃ a : A, xs = [:: a] ∧ tf a.
Definition alt{A}(L1 L2 : lang A) : lang A
:= fun xs ⇒ L1 xs ∨ L2 xs.
Definition concat{A}(L1 L2 : lang A) : lang A
:= fun xs ⇒ ∃ xs1 xs2, (xs = xs1 ++ xs2 ∧ L1 xs1 ∧ L2 xs2).
Infix "||" := alt : language_scope.
Infix "++" := concat : language_scope.
Notation "'ε'" := epsilon.
Fixpoint pow {A}(L : lang A)(n : nat) : lang A :=
if n is m.+1 then (L ++ pow L m)%lan
else ε.
Infix "^" := pow : language_scope.
Definition many{A}(L : lang A) : lang A := fun xs ⇒ ∃ n, (L ^ n)%lan xs.
Definition many1{A}(L : lang A) : lang A := fun xs ⇒ ∃ n, (L ^ (n.+1))%lan xs.
Language equivalence.
Module eq.
Section Cxt.
Context {A : Type}.
Definition rel (L1 L2 : lang A) := ∀ xs, L1 xs ↔ L2 xs.
Lemma refl : reflexive (lang A) rel.
done.
Qed.
Lemma sym : symmetric (lang A) rel.
by move ⇒ L1 L2 h xs; rewrite h.
Qed.
Lemma trans : transitive (lang A) rel.
by move ⇒ L1 L2 L3 h12 h23 xs; rewrite h12 h23.
Qed.
End Cxt.
End eq.
Add Parametric Relation A : (lang A) (@eq.rel A)
reflexivity proved by eq.refl symmetry proved by eq.sym transitivity proved by eq.trans as lang_equivalence.
Add Parametric Morphism A : (@alt A) with signature
(@eq.rel A) ==> (@eq.rel A) ==> (@eq.rel A) as lang_alt_morphism.
by move ⇒ L1 L2 H12 L3 L4 H34 xs; rewrite /alt (H12 xs) (H34 xs).
Qed.
Infix "≡" := eq.rel (at level 70).
Lemma satisfy_ext [A][f g : A → bool] : f =1 g → (satisfy f ≡ satisfy g)%lan.
Proof.
move ⇒ Hfg xs; rewrite /satisfy; split; case ⇒ a [hxs hyp]; ∃ a.
- by rewrite -Hfg.
- by rewrite Hfg.
Qed.
Add Parametric Morphism A : (@concat A) with signature
(@eq.rel A) ==> (@eq.rel A) ==> (@eq.rel A) as lang_concat_morphism.
move ⇒ L1 L2 H12 L3 L4 H34 xs; rewrite /concat; split; case ⇒ ys [zs [hxs]].
- by rewrite (H12 ys) (H34 zs); case ⇒ h1 h2; ∃ ys; ∃ zs.
- by rewrite -(H12 ys) -(H34 zs); case ⇒ h1 h2; ∃ ys; ∃ zs.
Qed.
Lemma concat_epsilon {A} (L : lang A) : (L ++ ε)%lan ≡ L.
rewrite /concat; move ⇒ xs; split.
- by case ⇒ ys [zs [hxs [hys hzs]]]; rewrite hxs hzs cats0.
- by move ⇒ h; ∃ xs; ∃ [::]; rewrite cats0 //= /epsilon.
Qed.
Lemma epsilon_concat {A} (L : lang A) : (ε ++ L)%lan ≡ L.
rewrite /concat; move ⇒ xs; rewrite /epsilon; split.
- by case ⇒ ys [zs [hxs [hys hzs]]]; rewrite hxs hys cat0s.
- by move ⇒ h; ∃ [::]; ∃ xs; rewrite cat0s //= /epsilon.
Qed.
Add Parametric Morphism A : (@pow A) with signature
(@eq.rel A) ==> eq ==> (@eq.rel A) as lang_pow_morphism.
move ⇒ L1 L2 H n.
elim: n ⇒ [|n IHn]; by do 2 rewrite //= ?H ?IHn.
Qed.
Add Parametric Morphism A : (@many A) with signature
(@eq.rel A) ==> (@eq.rel A) as lang_many_morphism.
move ⇒ L1 L2 H.
have hpow : ∀ n, (L1 ^ n ≡ L2 ^ n)%lan by move ⇒ n; rewrite H.
rewrite /many ⇒ xs; split; case ⇒ n.
- by rewrite (hpow n xs); ∃ n.
- by rewrite -(hpow n xs); ∃ n.
Qed.
Add Parametric Morphism A : (@many1 A) with signature
(@eq.rel A) ==> (@eq.rel A) as lang_many1_morphism.
move ⇒ L1 L2 H.
have hpow : ∀ n, (L1 ^ n ≡ L2 ^ n)%lan by move ⇒ n; rewrite H.
rewrite /many ⇒ xs; split; case ⇒ n.
- by rewrite (hpow (n.+1) xs); ∃ n.
- by rewrite -(hpow (n.+1) xs); ∃ n.
Qed.
From Stdlib Require List.
Lemma pow0 {A}{L : lang A} {xs} : (L ^ 0)%lan xs → xs = [::].
rewrite //.
Qed.
Lemma pow1 {A}(L : lang A) : (L ^ 1 ≡ L)%lan.
by rewrite /pow concat_epsilon.
Qed.
Lemma many1_nil {A}(L : lang A) : many1 L [::] → L [::].
case ⇒ [n]; rewrite //=; case ⇒ ys [zs [hnil [hys hzs]]].
have hyp : ys ++ zs = [::] by rewrite -hnil.
set hnils := List.app_eq_nil ys zs hyp.
case: hnils ⇒ h1 h2; by rewrite -h1.
Qed.
Contravariant lift of alphabet transformations.
Section Comap.
Context {A B}(f : A → B).
Lemma comap_epsilon : comap f ε ≡ ε.
move ⇒ xs; split.
- rewrite /comap /comp.
by move /List.map_eq_nil.
- by move ⇒ Heps; rewrite Heps //=.
Qed.
Lemma comap_alt (L1 L2 : lang B) : (comap f (L1 || L2) ≡ comap f L1 || comap f L2)%lan.
move ⇒ xs; rewrite /comap //=.
Qed.
Lemma comap_satisfy {pr : B → bool} : (comap f (satisfy pr) ≡ satisfy (pr \o f))%lan.
move ⇒ xs; rewrite /satisfy /comap /comp; split.
- case ⇒ b [hxs hpr].
by case (map_single hxs) ⇒ a [h1 hf]; ∃ a; rewrite h1 -hf.
- case ⇒ a [hxs hpr]; ∃ (f a); by rewrite hxs.
Qed.
Lemma comap_concat (L1 L2 : lang B) : (comap f (L1 ++ L2) ≡ comap f L1 ++ comap f L2)%lan.
move ⇒ xs; rewrite /comap //= /concat /comp; split.
- case ⇒ ys1 [ys2 [Hconcat [H1 H2]]].
case (map_catE Hconcat) ⇒ xs1 [xs2 [Hxsconcat [Hm1 Hm2]]].
∃ xs1; ∃ xs2; by rewrite //= Hm1 Hm2.
- case ⇒ xs1 [xs2 [Hxs [HL1 HL2]]].
∃ [seq f x | x <- xs1]; ∃ [seq f x | x <- xs2]; by rewrite Hxs map_cat.
Qed.
Lemma comap_pow [L : lang B] : ∀ n, (comap f (L^n) ≡ (comap f L)^n)%lan .
elim ⇒ [|n IH].
- by rewrite comap_epsilon.
- by rewrite comap_concat IH.
Qed.
Lemma comap_many1 (L : lang B) : (comap f (many1 L) ≡ many1 (comap f L))%lan.
move ⇒ xs; split.
- set lhs := many1 _ _; rewrite /comap; case ⇒ n Hyp.
by ∃ n; rewrite -(comap_pow _ xs) /comap /comp.
- case ⇒ n; rewrite -(comap_pow _ xs) /comap. by ∃ n.
Qed.
End Comap.
Lemma comap_comap {A B C}(f : B → A)(g : C → B) : ∀ L : lang A, comap (f \o g) L ≡ comap g (comap f L).
move ⇒ L xs; by rewrite /comap ?/comp map_comp.
Qed.
Add Parametric Morphism A B : comap with signature
(@eqfun A (fun _ ⇒ B)) ==> (@eq.rel B) ==> (@eq.rel A) as lang_comap_morphism.
move ⇒ f g hfg L1 L2 H xs; by rewrite /comap /comp (H _) (eq_map hfg) .
Qed.