馬爾可夫鏈算法(markov算法)的awk、C++、C說話完成代碼。本站提示廣大學習愛好者:(馬爾可夫鏈算法(markov算法)的awk、C++、C說話完成代碼)文章只能為提供參考,不一定能成為您想要的結果。以下是馬爾可夫鏈算法(markov算法)的awk、C++、C說話完成代碼正文
1. 成績描寫
馬爾可夫鏈算法用於生成一段隨機的英文,其思惟異常簡略。起首讀入數據,然後將讀入的數據分紅前綴和後綴兩部門,經由過程前綴來隨機獲得後綴,籍此發生一段可讀的隨機英文。
為了解釋便利,假定我們有以下一段話:
Show your flowcharts and conceal your tables and I will be mystified. Show your tables and your flowcharts will be obvious.
假定前綴的長度為2,則我們處置輸出今後獲得以下數據,我們起首獲得一個前綴,然後在前綴的後綴列表中隨機選擇一個單詞,然後轉變前綴,反復上述進程,如許,我們發生的句子將是可讀的。
上面是處置過的數據:
前綴 後綴
show your flowcharts tables
your flowcharts and will
flowcharts and conceal
flowcharts will be
your tables and and
will be mystified. obvious.
be mystified. show
be obvious. (end)
處置這個文本的馬爾可夫鏈算法將起首帶引show your,然後隨機掏出flowcharts 或許table 兩個單詞,假定選擇的是flowcharts, 則新的前綴就是your flowcharts,同理,選擇table 時,新的前綴就是your table,有了新的前綴your flowcharts 今後,再次隨即選擇它的後綴,此次是在and 和 will 中隨機選擇,反復上述進程,就可以夠發生一段可讀的文本。詳細描寫以下:
設置 w1 和 w2 為文本的前兩個詞
輸入 w1 和 w2
輪回:
隨機地選出 w3,它是文本中 w1 w2 的後綴中的一個
打印 w3
把 w1 和 w2 分離換成 w2 和 w3
反復輪回
2.awk 法式
馬爾可夫鏈算法其實不難,我們會在前面看到,用c說話來處理這個成績會相當費事,而用awk則只須要5分鐘就可以弄定。這的確就是一個演示awk長處的成績。
awk 中有聯系關系數組,正好可以用來表現前綴和後綴的關系。法式以下:
# markov.awk: markov chain algorithm for 2-word prefixes BEGIN { MAXGEN = 10000; NONWORD = "\n"; w1 = w2 = NONWORD } { for (i = 1; i <= NF; i++) { # read all words statetab[w1,w2,++nsuffix[w1,w2]] = $i w1 = w2 w2 = $i } } END { statetab[w1,w2,++nsuffix[w1,w2]] = NONWORD # add tail w1 = w2 = NONWORD for (i = 0; i < MAXGEN; i++) { # generate r = int(rand()*nsuffix[w1,w2]) + 1 # nsuffix >= 1 p = statetab[w1,w2,r] if (p == NONWORD) exit print p w1 = w2 # advance chain w2 = p } }
3. C++ 法式
該成績的重要難點就在於經由過程前綴隨機的獲得後綴,在C++ 中,我們可以借助map 來完成前綴和後綴的對應關系,以此獲得較高的開辟效力。
/* Copyright (C) 1999 Lucent Technologies */ /* Excerpted from 'The Practice of Programming' */ /* by Brian W. Kernighan and Rob Pike */ #include <time.h> #include <iostream> #include <string> #include <deque> #include <map> #include <vector> using namespace std; const int NPREF = 2; const char NONWORD[] = "\n"; // cannot appear as real line: we remove newlines const int MAXGEN = 10000; // maximum words generated typedef deque<string> Prefix; map<Prefix, vector<string> > statetab; // prefix -> suffixes void build(Prefix&, istream&); void generate(int nwords); void add(Prefix&, const string&); // markov main: markov-chain random text generation int main(void) { int nwords = MAXGEN; Prefix prefix; // current input prefix srand(time(NULL)); for (int i = 0; i < NPREF; i++) add(prefix, NONWORD); build(prefix, cin); add(prefix, NONWORD); generate(nwords); return 0; } // build: read input words, build state table void build(Prefix& prefix, istream& in) { string buf; while (in >> buf) add(prefix, buf); } // add: add word to suffix deque, update prefix void add(Prefix& prefix, const string& s) { if (prefix.size() == NPREF) { statetab[prefix].push_back(s); prefix.pop_front(); } prefix.push_back(s); } // generate: produce output, one word per line void generate(int nwords) { Prefix prefix; int i; for (i = 0; i < NPREF; i++) add(prefix, NONWORD); for (i = 0; i < nwords; i++) { vector<string>& suf = statetab[prefix]; const string& w = suf[rand() % suf.size()]; if (w == NONWORD) break; cout << w << "\n"; prefix.pop_front(); // advance prefix.push_back(w); } }
4. c 法式
假如須要法式運轉得足夠快,那就只能用較初級的說話來完成了。當我們用c 說話來完成時,就不能不斟酌各類各樣的成績了。起首,面對的第一個成績就是,若何表現前綴和後綴的關系?
這裡采取前綴的key,後綴為value 的方法存儲前綴與後綴的關系,我們曉得,hash表的查找速度最快,所以,這裡采取hash表也是道理當中的事,只是看你能不克不及想到,用前綴作key,基於下面的思緒,再細心一點,就沒有甚麼年夜成績了。
/* Copyright (C) 1999 Lucent Technologies */ /* Excerpted from 'The Practice of Programming' */ /* by Brian W. Kernighan and Rob Pike */ /* * Markov chain random text generator. */ #include <string.h> #include <stdlib.h> #include <stdio.h> #include <string.h> #include <time.h> #include "eprintf.h" enum { NPREF = 2, /* number of prefix words */ NHASH = 4093, /* size of state hash table array */ MAXGEN = 10000 /* maximum words generated */ }; typedef struct State State; typedef struct Suffix Suffix; struct State { /* prefix + suffix list */ char *pref[NPREF]; /* prefix words */ Suffix *suf; /* list of suffixes */ State *next; /* next in hash table */ }; struct Suffix { /* list of suffixes */ char *word; /* suffix */ Suffix *next; /* next in list of suffixes */ }; State *lookup(char *prefix[], int create); void build(char *prefix[], FILE*); void generate(int nwords); void add(char *prefix[], char *word); State *statetab[NHASH]; /* hash table of states */ char NONWORD[] = "\n"; /* cannot appear as real word */ /* markov main: markov-chain random text generation */ int main(void) { int i, nwords = MAXGEN; char *prefix[NPREF]; /* current input prefix */ int c; long seed; setprogname("markov"); seed = time(NULL); srand(seed); for (i = 0; i < NPREF; i++) /* set up initial prefix */ prefix[i] = NONWORD; build(prefix, stdin); add(prefix, NONWORD); generate(nwords); return 0; } const int MULTIPLIER = 31; /* for hash() */ /* hash: compute hash value for array of NPREF strings */ unsigned int hash(char *s[NPREF]) { unsigned int h; unsigned char *p; int i; h = 0; for (i = 0; i < NPREF; i++) for (p = (unsigned char *) s[i]; *p != '\0'; p++) h = MULTIPLIER * h + *p; return h % NHASH; } /* lookup: search for prefix; create if requested. */ /* returns pointer if present or created; NULL if not. */ /* creation doesn't strdup so strings mustn't change later. */ State* lookup(char *prefix[NPREF], int create) { int i, h; State *sp; h = hash(prefix); for (sp = statetab[h]; sp != NULL; sp = sp->next) { for (i = 0; i < NPREF; i++) if (strcmp(prefix[i], sp->pref[i]) != 0) break; if (i == NPREF) /* found it */ return sp; } if (create) { sp = (State *) emalloc(sizeof(State)); for (i = 0; i < NPREF; i++) sp->pref[i] = prefix[i]; sp->suf = NULL; sp->next = statetab[h]; statetab[h] = sp; } return sp; } /* addsuffix: add to state. suffix must not change later */ void addsuffix(State *sp, char *suffix) { Suffix *suf; suf = (Suffix *) emalloc(sizeof(Suffix)); suf->word = suffix; suf->next = sp->suf; sp->suf = suf; } /* add: add word to suffix list, update prefix */ void add(char *prefix[NPREF], char *suffix) { State *sp; sp = lookup(prefix, 1); /* create if not found */ addsuffix(sp, suffix); /* move the words down the prefix */ memmove(prefix, prefix+1, (NPREF-1)*sizeof(prefix[0])); prefix[NPREF-1] = suffix; } /* build: read input, build prefix table */ void build(char *prefix[NPREF], FILE *f) { char buf[100], fmt[10]; /* create a format string; %s could overflow buf */ sprintf(fmt, "%%%ds", sizeof(buf)-1); while (fscanf(f, fmt, buf) != EOF) add(prefix, estrdup(buf)); } /* generate: produce output, one word per line */ void generate(int nwords) { State *sp; Suffix *suf; char *prefix[NPREF], *w; int i, nmatch; for (i = 0; i < NPREF; i++) /* reset initial prefix */ prefix[i] = NONWORD; for (i = 0; i < nwords; i++) { sp = lookup(prefix, 0); if (sp == NULL) eprintf("internal error: lookup failed"); nmatch = 0; for (suf = sp->suf; suf != NULL; suf = suf->next) if (rand() % ++nmatch == 0) /* prob = 1/nmatch */ w = suf->word; if (nmatch == 0) eprintf("internal error: no suffix %d %s", i, prefix[0]); if (strcmp(w, NONWORD) == 0) break; printf("%s\n", w); memmove(prefix, prefix+1, (NPREF-1)*sizeof(prefix[0])); prefix[NPREF-1] = w; } }