NFA2DFA

May 28, 2014
c++

input: number of symbols symbol set(each space between two symbol) number of states(state number starts from 1 to n by default) start state number number of accept states accept states set state transfer table (epsilon transfer is at column one and if there are several epsilon transfers, seperate them with a comma without spaces) The following file is the input file.

	NFA2DFA.txt

	2
	a b
	4
	1
	1
	3
	3 2 1
	-1 1 -1
	-1 3 4
	-1 -1 3

we input a integer N by str.txt file. then we generate accepted strings by DFA and NFA whose length is no more than N. Then compare those strings generated by NFA and DFA are all the same. The output is in str_out.txt

The following program is the main program.


/*
 * =====================================================================================
 *       Filename : NFA2DFA.cpp
 *    Description : 
 *    Version     : 0.1
 *        Created : 05/13/14 20:43
 *         Author : Liu Xue Yang (LXY), liuxueyang457@163.com
 *         Motto  : How about today?
 * =====================================================================================
 */
#include <cstdio>
#include <map>
#include <cstdlib>
#include <iostream>
#include <cstring>
#include <set>
#include <vector>
#include <iomanip>
#include <cctype>
#include <string>
#include <stack>
#include <algorithm>

using namespace std;

int trans[1000][1000];
int n;                                          /* number of symbols */
char ch[1000];                                  /* symbol set */
int start_state;                                /* start state */
int n_state;                                    /* number of states */
int nac_state;                                  /* number of accept states */
int ac[1000];                                   /* accept states */
char input[1000];                               /* input string */
int start = 1;                                  /* start state */
bool flag = true;                               /* whether the string is legal */
bool mark_ac;                                   /* whether the string is accepted */
int N;
vector<int> dfa_ac;
vector<vector<int> > e_s;
void dfs_nfa(string, int);
void dfs_dfa(string, int);
bool ac_check(int);
bool ac_check_dfa(int);
bool all_check(int);
vector<vector<int> > Newtrans;
vector<string> dfa_generate, nfa_generate;

/* 
 * ===  FUNCTION  ======================================================================
 *         Name: main
 * =====================================================================================
 */

	int
main ( int argc, char *argv[] )
{

	freopen("NFA2DFA.txt", "r", stdin);

	dfa_ac.clear();
	scanf ( "%d", &n);
	getchar();
	for ( int i = 0; i < n; ++i ) {
		scanf ( "%c", &ch[i] );
		getchar();
	}
	scanf ( "%d%d%d", &n_state, &start_state, &nac_state );
	for ( int i = 0; i < nac_state; ++i ) {
		scanf ( "%d", &ac[i] );
	}
	string str;
	e_s.clear();
	char buffer[1111];
	for ( int i = 0; i < n_state; ++i ) {
		cin >> str;
		memset(buffer, 0, sizeof(buffer));
		strcpy(buffer, str.c_str());
		char *pch = strtok(buffer, ",");
		char suffer[1000];
		vector<int> t_a;
		t_a.clear();
		while ( NULL != pch ) {
			memset(suffer, 0, sizeof(suffer));
			strcpy(suffer, pch);
			int su_n = atoi(suffer);
			t_a.push_back(su_n);
			pch = strtok(NULL, ",");
		}
		e_s.push_back(t_a);
		for ( int j = 1; j < n + 1; ++j ) {
			scanf ( "%d", &trans[i][j] );
		}
	}
	vector<int>::iterator i_t;
	vector<int> v;

	map<vector<int>, int > mystate;
	mystate.clear();

	Newtrans.clear();

	int ii;
	ii = 0;
	set<int> s_in;
	s_in.clear();
	v.push_back(start_state);
	s_in.insert(start_state);
	// start_state enclosure
	for ( i_t = e_s[start_state-1].begin(); i_t != e_s[start_state-1].end(); ++i_t ) {
		if ( -1 != *i_t && s_in.count(*i_t) == 0) {
			v.push_back(*i_t);
			s_in.insert(*i_t);
		}
	}
	vector<int>::iterator it;
	bool in_ac = false;
	for ( it = v.begin(); it != v.end(); ++it ) {
		for ( int jk = 0; jk < nac_state; ++jk ) {
			if ( ac[jk] == (*it) ) {
				in_ac = true;
				dfa_ac.push_back(0);
				break;
			}
		}
		if ( in_ac ) {
			break;
		}
	}
	sort(v.begin(), v.end());

	vector<int> add;
	add.clear();

	mystate.insert(pair<vector<int>, int>(v, ii));
	++ii;
	Newtrans.push_back(add);                      /* push_back a empty vector */

	stack<vector<int> > stac;
	stac.push(v);
	while ( !stac.empty() ) {                     /*if stack if not empty we will search new state */
		vector<int> tmp;
		v = stac.top();
		stac.pop();
		int jj;
		jj = mystate[v];
		for ( int i = 1; i < n + 1; ++i ) {         /* for every symbol */
			tmp.clear();
			s_in.clear();
			// move(v, i) = tmp
			for ( it = v.begin(); it != v.end(); ++it ) { /* for every symbol in this set (this symbol as input) */
				if (-1 != trans[(*it)-1][i] && s_in.count(trans[(*it)-1][i]) == 0) { /* we can not push_back -1!!!f**k!!! */
					tmp.push_back(trans[(*it)-1][i]);     /* =_= T_T I wasted so much time to debug here!!!!!! T^T */
					s_in.insert(trans[(*it)-1][i]);
				}
			}
			// get enclosure tmp-enclosure
			vector<int> tmp_i;
			tmp_i = tmp;
			for ( it = tmp.begin(); it != tmp.end(); ++it ) { /* get stuck!!! command terminated */
				for ( i_t = e_s[(*it)-1].begin(); i_t != e_s[(*it)-1].end(); ++i_t ) {
					if ( -1 != *i_t && s_in.count(*i_t) == 0 ) {
						tmp_i.push_back(*i_t);              /* we cannot modify tmp */
						s_in.insert(*i_t);
					}
				}
			}
			tmp = tmp_i;
			sort(tmp.begin(), tmp.end());
			if ( (tmp.size() != 0) && !mystate.count(tmp)) {                  /* do not have it */
				mystate.insert(pair<vector<int>, int>(tmp, ii));
				in_ac = false;
				for ( it = tmp.begin(); it != tmp.end(); ++it ) {
					for ( int jk = 0; jk < nac_state; ++jk ) {
						if ( ac[jk] == (*it) ) {
							in_ac = true;
							dfa_ac.push_back(ii);
							break;
						}
					}
					if ( in_ac ) {
						break;
					}
				}
				add.clear();
				Newtrans.push_back(add);
				ii++;
				stac.push(tmp);
			}
			if (tmp.size() != 0) {
				Newtrans[jj].push_back(mystate[tmp]);
			}
			else {
				Newtrans[jj].push_back(-1);
			}
		}
	}
//	vector<vector<int> >::iterator j_t;
//	for ( j_t = Newtrans.begin(); j_t != Newtrans.end(); ++j_t ) {
//		for ( it = (*j_t).begin(); it != (*j_t).end(); ++it ) {
//			cout << *it << " ";
//		}
//		cout << endl;
//	}
	// succeed!
	// Out put DFA!
	for (int i = 0; i < ii; ++i) {
		for ( it = Newtrans[i].begin(); it != Newtrans[i].end(); ++it ) {
			cout << *it << " ";
		}
		cout << endl;
	}
	// output accepted state
	cout << "accepted state: "; 
	for ( it = dfa_ac.begin(); it != dfa_ac.end() ; ++it ) {
		cout << *it << " "; 
	}
	N = 3;
	dfa_generate.clear();
	nfa_generate.clear();
//	printf ( "\n" );
	freopen("str.txt", "r", stdin);
	freopen("str_out.txt", "w", stdout);
	while ( ~scanf("%d", &N) ) {
		string sad = "";
		dfs_dfa(sad, 0);
		sort(dfa_generate.begin(), dfa_generate.end());
		vector<string>::iterator end_u, sadbu;
		end_u = unique(dfa_generate.begin(), dfa_generate.end());
		dfa_generate.erase(end_u, dfa_generate.end());
		cout << "********************" << endl; 
		cout << "size = " << dfa_generate.size() << endl; 
		for ( sadbu = dfa_generate.begin(); sadbu != dfa_generate.end(); ++sadbu ) {
			cout << *sadbu << endl;
		}
		sad = "";
		dfs_nfa(sad, 1);
		sort(nfa_generate.begin(), nfa_generate.end());
		end_u = unique(nfa_generate.begin(), nfa_generate.end());
		nfa_generate.erase(end_u, nfa_generate.end());
		cout << "********************" << endl; 
		cout << "size = " << nfa_generate.size() << endl; 
		for ( sadbu = nfa_generate.begin(); sadbu != nfa_generate.end(); ++sadbu ) {
			cout << *sadbu << endl;
		}
		bool mark = true;
		if ( dfa_generate.size() != nfa_generate.size() ) {
			mark = false;
			printf ( "size is Different!\n" );
		} else {
			for ( int js = 0; js < dfa_generate.size(); ++js ) {
				if ( dfa_generate[js] != nfa_generate[js] ) {
					mark = false;
					break;
				}
			}
		}
		if ( mark ) {
			cout << "Same!" << endl; 
		} 
		else {
			cout << "Different!" << endl;
		}
	}

		return EXIT_SUCCESS;
}				/* ----------  end of function main  ---------- */

/* 
 * ===  FUNCTION  ======================================================================
 *         Name:  dfs_dfa
 *  Description:  dfs to generate string
 * =====================================================================================
 */
	void
dfs_dfa ( string str, int now )
{
	int len = str.length();
	if ( len > N ) {
		return;
	}
	if ( len > 0 && len <= N && ac_check_dfa(now) ) {
		dfa_generate.push_back(str);
//		cout << str << endl;
	}
	vector<int>::iterator jk;
	for ( jk = Newtrans[now].begin(); jk != Newtrans[now].end(); ++jk ) {
		if ( -1 != *jk ) {
			dfs_dfa(str + ch[jk-Newtrans[now].begin()], *jk);
		}
	}
	return ;
}		/* -----  end of function dfs_dfa  ----- */

/* 
 * ===  FUNCTION  ======================================================================
 *         Name:  dfs_nfa
 *  Description:  dfs to generate string
 * =====================================================================================
 */
	void
dfs_nfa ( string str, int now )
{
	int len = str.length();
	if ( len > N ) {
		return;
	}
	if ( len > 0 && len <= N && all_check(now) ) {
		nfa_generate.push_back(str);
//		cout << str << endl;
	}
	for ( int i = 1; i < n + 1; ++i ) {
		if ( -1 != trans[now-1][i] ) {
			dfs_nfa(str+ch[i-1], trans[now-1][i]);
		}
	}
	vector<int>::iterator i_t;
	for ( i_t = e_s[now-1].begin(); i_t != e_s[now-1].end(); ++i_t ) {
		if ( -1 != *i_t ) {
			dfs_nfa(str, *i_t);
		}
		else break;
	}
	return ;
}		/* -----  end of function dfs_nfa  ----- */

/* 
 * ===  FUNCTION  ======================================================================
 *         Name:  all_check
 *  Description:  enclosure check
 * =====================================================================================
 */
	bool
all_check ( int sta )
{
	bool mark = false;
	mark = ac_check(sta);
	if ( mark ) {
		return mark;
	}
//	vector<int>::iterator i_t;
//	for ( i_t = e_s[sta-1].begin(); i_t != e_s[sta-1].end(); ++i_t ) {
//		if ( -1 != *i_t ) {
//			mark = ac_check(*i_t);
//			if ( mark ) {
//				break;
//			}
//		}
//		else break;
//	}
	return mark;
}		/* -----  end of function all_check  ----- */

/* 
 * ===  FUNCTION  ======================================================================
 *         Name:  ac_check
 *  Description:  check whether a state is accepted
 * =====================================================================================
 */
	bool
ac_check ( int sta )
{
	bool mark = false;
	for ( int i = 0; i < nac_state; ++i ) {
		if ( sta == ac[i] ) {
			mark = true;
			break;
		}
	}
	return mark;
}		/* -----  end of function ac_check  ----- */

/* 
 * ===  FUNCTION  ======================================================================
 *         Name:  ac_check_dfa
 *  Description:  check whether it is legal
 * =====================================================================================
 */
	bool
ac_check_dfa ( int sta )
{
	bool mark = false;
	vector<int>::iterator it;
	for ( it = dfa_ac.begin(); it != dfa_ac.end(); ++it ) {
		if ( (*it) == sta ) {
			mark = true;
			return mark;
		}
	}
	return mark;
}		/* -----  end of function ac_check_dfa  ----- */

Nice.


Maybe I made a mistake! I forget a case: when I get epsilon of a node, it may have some consistent epsilon transfers! So I should use dfs to get the whole epsilon!

I find this problem when I read this blog: cyjb Thank You! I will fix the problem later.

comments powered by Disqus