程式師世界 >> 編程語言 >> C語言 >> C++ >> C++入門知識 >> Cracking the coding interview: 查找文中兩個單詞的距離

Cracking the coding interview: 查找文中兩個單詞的距離

編輯：C++入門知識

題目：You have a large text file containing words. Given any two words, find the shortest distance (in terms of number of words) between them in the file. If the operation will be repeated many times for the same file(but different pairs of words), can you optimize your solution?

解法一：只需要一次性查詢的，我們可以利用兩個下標，一個記錄最後一次遇到第一個單詞的下標，另外一個記錄遇到另外一個單詞的下標，然後每次比較兩個單詞的距離，記錄當前最小的兩個單詞的距離，最後就得到最小的距離了。時間效率O(n);

程序如下：

int shortestWordsDistance(vector &words, string &w1, string &w2)
{
	int lastWord1 = -1, lastWord2 = -1;
	int minDistance = INT_MAX;

	for (int i = 0; i < words.size(); i++)
	{
		if (words[i] == w1)
		{
			lastWord1 = i;
			if (lastWord2 != -1) minDistance = 
				min(minDistance, lastWord1 - lastWord2);
		}
		if (words[i] == w2)//不加else是因為當w1==w2的時候，可以計算出為零
		{
			lastWord2 = i;
			if (lastWord1 != -1) minDistance = 
				min(minDistance, lastWord2 - lastWord1);
		}
	}
	return minDistance;
}

解法二：這是個可以優化重復查找效率的算法，利用額外空間存儲文中所有單詞出現的次數，並記錄其出現的位置，形成一個數列，如果需要查找兩個單詞距離的時候，就把兩個單詞的出現位置數列合並起來，遍歷一次這個數列就可以得到兩個單詞的距離了。

那麼需要建立一個表，所用時間效率是：O(n);查找的時候所用時間內效率是O(m), m是兩個單詞出現在文中的次數。因為表不需要重復計算，所以效率可以優化很多。

帶測試實例的程序如下：

struct MarkInt
{
	int a;
	string str;
	MarkInt(int a1, string s1):a(a1), str(s1){}
};

bool operator<(const MarkInt &m1, const MarkInt &m2)
{
	return m1.a < m2.a;
}

class WordsDistacne
{
	map > lookupTable;
	vector words;
public:
	int shortestWordsDistanceRepeated(string &w1, string &w2)
	{
		if (!lookupTable.count(w1) || !lookupTable.count(w2)) return -1;
		vector vm;
		lookupTable[w1].push_back(MarkInt(INT_MAX,w1));
		lookupTable[w2].push_back(MarkInt(INT_MAX,w2));
		int n1 = lookupTable[w1].size();
		int n2 = lookupTable[w2].size();
		
		for (int i = 0, j = 0; i < n1 && j < n2; )
		{
			if (lookupTable[w1][i] < lookupTable[w2][j])
			{
				vm.push_back(lookupTable[w1][i++]);
			}
			else vm.push_back(lookupTable[w2][j++]);
		}
		vm.pop_back();
		lookupTable[w1].pop_back();
		lookupTable[w2].pop_back();

		int lastword1 = -1, lastword2 = -1;
		int minDistance = INT_MAX;
		for (int i = 0; i < vm.size(); i++)
		{
			if (vm[i].str == w1) 
			{
				lastword1 = vm[i].a;
				if (lastword2 >= 0)
				{
					minDistance = min(minDistance, lastword1 - lastword2);
				}
			}
			if(vm[i].str == w2)
			{
				lastword2 = vm[i].a;
				if (lastword1 >= 0)
				{
					minDistance = min(minDistance, lastword2 - lastword1);
				}
			}
		}
		return minDistance;
	}

	void setupTable()
	{
		for (int i = 0; i < words.size(); i++)
		{
			lookupTable[words[i]].push_back(MarkInt(i, words[i]));
		}
	}
	WordsDistacne(vector s):words(s)
	{
		setupTable();
	}
};

int main()
{
	string s[] = {"my", "you", "hi", "you", "hi","my", "go", "not", "yes", "you"};
	int n = sizeof(s)/sizeof(string);
	vector vs(s, s+n);
	string s1 = "my";
	string s2 = "not";
	cout<