Subversion Repositories gelsvn

Rev

Rev 448 | Only display areas with differences | Ignore whitespace | Details | Blame | Last modification | View Log | RSS feed

Rev 448 Rev 595
-
 
1
/* ----------------------------------------------------------------------- *
-
 
2
 * This file is part of GEL, http://www.imm.dtu.dk/GEL
-
 
3
 * Copyright (C) the authors and DTU Informatics
-
 
4
 * For license and list of authors, see ../../doc/intro.pdf
-
 
5
 * ----------------------------------------------------------------------- */
-
 
6
 
-
 
7
/**
-
 
8
 * @file HashTable.h
-
 
9
 * @brief Hash table class.
-
 
10
 */
-
 
11
 
1
#ifndef __UTIL_HASHTABLE_H
12
#ifndef __UTIL_HASHTABLE_H
2
#define __UTIL_HASHTABLE_H
13
#define __UTIL_HASHTABLE_H
3
 
14
 
4
#include "HashKey.h"
15
#include "HashKey.h"
5
 
16
 
6
#include <functional>
17
#include <functional>
7
#include <iostream>
18
#include <iostream>
8
 
19
 
9
namespace Util
20
namespace Util
10
{
21
{
11
 
22
 
12
	namespace
23
	namespace
13
	{
24
	{
14
		const int HASH_EXPAND_LOAD = 80;
25
		const int HASH_EXPAND_LOAD = 80;
15
	};
26
	};
16
 
27
 
17
	/** \brief Hashtable class template. 
28
	/** \brief Hashtable class template. 
18
 
29
 
19
	This class template has taken for too long to get right. That is because
30
	This class template has taken for too long to get right. That is because
20
	it is flexible. It can be used either as a simple (key, value) database
31
	it is flexible. It can be used either as a simple (key, value) database
21
	or as a reference counted database.
32
	or as a reference counted database.
22
 
33
 
23
	There are two template arguments: KEY_T is the hash key type. This type
34
	There are two template arguments: KEY_T is the hash key type. This type
24
	must provide operators == and != and a hash function which takes an 
35
	must provide operators == and != and a hash function which takes an 
25
	integer denoting the table size and returns another integer - the table
36
	integer denoting the table size and returns another integer - the table
26
	argument.
37
	argument.
27
		
38
		
28
	VAL_T is the value type. There are few restrictions on this type but it 
39
	VAL_T is the value type. There are few restrictions on this type but it 
29
	must have default and copy constructors and operator=. The HashTable 
40
	must have default and copy constructors and operator=. The HashTable 
30
	template is intended for use with simple types.
41
	template is intended for use with simple types.
31
	*/
42
	*/
32
	template<class KEY_T, class VAL_T>
43
	template<class KEY_T, class VAL_T>
33
	class HashTable
44
	class HashTable
34
	{
45
	{
35
		/** struct containing hashtable entries.
46
		/** struct containing hashtable entries.
36
				Apart from the value, this struct also contains a reference
47
				Apart from the value, this struct also contains a reference
37
				count and the hash key */
48
				count and the hash key */
38
		struct EntryType
49
		struct EntryType
39
		{
50
		{
40
			// Hash key
51
			// Hash key
41
			KEY_T key;
52
			KEY_T key;
42
 
53
 
43
			/// Table value corresponding to key
54
			/// Table value corresponding to key
44
			VAL_T val;
55
			VAL_T val;
45
 
56
 
46
			/// Reference counter
57
			/// Reference counter
47
			signed char ref;
58
			signed char ref;
48
		
59
		
49
		public:
60
		public:
50
			EntryType(): ref(-1) {}
61
			EntryType(): ref(-1) {}
51
		};
62
		};
52
	
63
	
53
		/// The table itself (array of entries)
64
		/// The table itself (array of entries)
54
		EntryType* table;   
65
		EntryType* table;   
55
 
66
 
56
	private:
67
	private:
57
 
68
 
58
		/// Number of elements in the table
69
		/// Number of elements in the table
59
		int items;          
70
		int items;          
60
  
71
  
61
		/// Size of table
72
		/// Size of table
62
		int size;       
73
		int size;       
63
 
74
 
64
		/// Resize the table (new size is argument)
75
		/// Resize the table (new size is argument)
65
		void resize(int new_size);
76
		void resize(int new_size);
66
 
77
 
67
		/** Private find entry function. Returns true iff entry was found 
78
		/** Private find entry function. Returns true iff entry was found 
68
				and reference non-zero. the second argument is set to the table
79
				and reference non-zero. the second argument is set to the table
69
				index corresponding to key. */
80
				index corresponding to key. */
70
		bool find_entry_priv(const KEY_T&, int&) const;
81
		bool find_entry_priv(const KEY_T&, int&) const;
71
  
82
  
72
	public:
83
	public:
73
 
84
 
74
		/// Construct an empty hash table
85
		/// Construct an empty hash table
75
		HashTable(): items(0), size(1) 
86
		HashTable(): items(0), size(1) 
76
		{
87
		{
77
			table = new EntryType[size];
88
			table = new EntryType[size];
78
		}
89
		}
79
 
90
 
80
		/// Destruct it
91
		/// Destruct it
81
		~HashTable() { delete [] table; }
92
		~HashTable() { delete [] table; }
82
 
93
 
83
		/** Create entry in hash table.
94
		/** Create entry in hash table.
84
				The first argument is the key, and the second is the value
95
				The first argument is the key, and the second is the value
85
				to be inserted. The third argument is the amount that the 
96
				to be inserted. The third argument is the amount that the 
86
				reference count should be increased.
97
				reference count should be increased.
87
			
98
			
88
				If third argument is zero: The key,value pair is inserted
99
				If third argument is zero: The key,value pair is inserted
89
				if key is not found. In either case the reference count is 
100
				if key is not found. In either case the reference count is 
90
				set to 1. function returns true if key was not found (or 
101
				set to 1. function returns true if key was not found (or 
91
				reference count=0 which counts as key begin absent)
102
				reference count=0 which counts as key begin absent)
92
 
103
 
93
				If third argument is non-zero: Behaviour is as above except 
104
				If third argument is non-zero: Behaviour is as above except 
94
				that the reference count is incremented or set to value of 
105
				that the reference count is incremented or set to value of 
95
				third argument depending on whether the key was already present. */
106
				third argument depending on whether the key was already present. */
96
		bool create_entry(const KEY_T&, const VAL_T&,int=0);
107
		bool create_entry(const KEY_T&, const VAL_T&,int=0);
97
 
108
 
98
		bool create_entry(const KEY_T& kv, VAL_T*& val, int incr=0);
109
		bool create_entry(const KEY_T& kv, VAL_T*& val, int incr=0);
99
 
110
 
100
 
111
 
101
 
112
 
102
		/** Removes an entry from database.
113
		/** Removes an entry from database.
103
				This function returns false if the key was not found. Otherwise
114
				This function returns false if the key was not found. Otherwise
104
				the reference count is set to zero and the function returns true
115
				the reference count is set to zero and the function returns true
105
				having first reduced the size of the table, if the size was 
116
				having first reduced the size of the table, if the size was 
106
				below threshold after removal. */
117
				below threshold after removal. */
107
		bool remove_entry(const KEY_T&);
118
		bool remove_entry(const KEY_T&);
108
 
119
 
109
		/** Find entry in database.
120
		/** Find entry in database.
110
				The second argument is set to the value that corresponds to the key
121
				The second argument is set to the value that corresponds to the key
111
				passed as first argument. The function returns true if the key
122
				passed as first argument. The function returns true if the key
112
				was found (and reference non-zero) */
123
				was found (and reference non-zero) */
113
		bool find_entry(const KEY_T&, VAL_T&) const;
124
		bool find_entry(const KEY_T&, VAL_T&) const;
114
 
125
 
115
		/** Decrease reference count. THe key is first argument, the value
126
		/** Decrease reference count. THe key is first argument, the value
116
				is returned in second argument, and the third argument contains 
127
				is returned in second argument, and the third argument contains 
117
				the amount of decrease, 1 is default. The function returns true
128
				the amount of decrease, 1 is default. The function returns true
118
				if the key was found and reference non-zero. False otherwise.
129
				if the key was found and reference non-zero. False otherwise.
119
 
130
 
120
				if the reference count is zero after being decreased, the function
131
				if the reference count is zero after being decreased, the function
121
				may decrease the size of the table. 
132
				may decrease the size of the table. 
122
		*/
133
		*/
123
		bool decr_reference(const KEY_T&, VAL_T&, int=1);
134
		bool decr_reference(const KEY_T&, VAL_T&, int=1);
124
 
135
 
125
 
136
 
126
		/** Change Entry. Does not affect reference count. Returns true
137
		/** Change Entry. Does not affect reference count. Returns true
127
				if the entry exists - false otherwise. */
138
				if the entry exists - false otherwise. */
128
		bool change_entry(const KEY_T&, const VAL_T&);
139
		bool change_entry(const KEY_T&, const VAL_T&);
129
 
140
 
130
		/// Print various statistics for the table 
141
		/// Print various statistics for the table 
131
		void print() const;
142
		void print() const;
132
 
143
 
133
		/** Get the first entry in HashTable. All arguments are passed by 
144
		/** Get the first entry in HashTable. All arguments are passed by 
134
				reference. The first argument is the index. Upon return the 
145
				reference. The first argument is the index. Upon return the 
135
				first argument will contain the first valid index in the table.
146
				first argument will contain the first valid index in the table.
136
				This index should be passed to the get_next function. (see below).
147
				This index should be passed to the get_next function. (see below).
137
				Subsequent arguments are the key, the value and the reference 
148
				Subsequent arguments are the key, the value and the reference 
138
				count. These are set to the values that correspond to the index. 
149
				count. These are set to the values that correspond to the index. 
139
				The function returns false if there are no valid indices - i.e. 
150
				The function returns false if there are no valid indices - i.e. 
140
				the table is empty 
151
				the table is empty 
141
		*/
152
		*/
142
		bool get_first(int& idx, KEY_T&,VAL_T& val,int&) const;
153
		bool get_first(int& idx, KEY_T&,VAL_T& val,int&) const;
143
 
154
 
144
		/** Get the next entry in the HashTable. All arguments are passed
155
		/** Get the next entry in the HashTable. All arguments are passed
145
				by reference.
156
				by reference.
146
				This first argument is a valid index to an element in the table 
157
				This first argument is a valid index to an element in the table 
147
				(from a previous call to either this function or get_first)
158
				(from a previous call to either this function or get_first)
148
				get_next finds the next valid entry and returns key, value and 
159
				get_next finds the next valid entry and returns key, value and 
149
				reference count in the next three arguments. get_next returns 
160
				reference count in the next three arguments. get_next returns 
150
				false if there is no next valid index. */
161
				false if there is no next valid index. */
151
		bool get_next(int& idx, KEY_T&,VAL_T& val,int&) const;
162
		bool get_next(int& idx, KEY_T&,VAL_T& val,int&) const;
152
 
163
 
153
		/// Returns the sum of the reference counts that are > 0
164
		/// Returns the sum of the reference counts that are > 0
154
		int integrate() const; 
165
		int integrate() const; 
155
 
166
 
156
 
167
 
157
		/// Returns no of items in table.
168
		/// Returns no of items in table.
158
		int no_items() const {return items;}
169
		int no_items() const {return items;}
159
 
170
 
160
		/// Returns true if number of items==0
171
		/// Returns true if number of items==0
161
		bool empty() const {return items==0;}
172
		bool empty() const {return items==0;}
162
 
173
 
163
		/// Returns total size of table (in bytes)
174
		/// Returns total size of table (in bytes)
164
		int total_size() const {return sizeof(*this) + sizeof(EntryType)*size;}
175
		int total_size() const {return sizeof(*this) + sizeof(EntryType)*size;}
165
 
176
 
166
		/// Returns the full size of an entry (key + value + ref.count).
177
		/// Returns the full size of an entry (key + value + ref.count).
167
		static int entry_size() {return sizeof(EntryType);}
178
		static int entry_size() {return sizeof(EntryType);}
168
	
179
	
169
	};
180
	};
170
 
181
 
171
 
182
 
172
	template<class KEY_T, class VAL_T>
183
	template<class KEY_T, class VAL_T>
173
	bool HashTable<KEY_T, VAL_T>::find_entry_priv(const KEY_T& kv, int& k) const
184
	bool HashTable<KEY_T, VAL_T>::find_entry_priv(const KEY_T& kv, int& k) const
174
	{
185
	{
175
		k = kv.hash(size);
186
		k = kv.hash(size);
176
		int fk = k;
187
		int fk = k;
177
		while(table[k].ref != -1)
188
		while(table[k].ref != -1)
178
			{
189
			{
179
				if (table[k].key == kv) 
190
				if (table[k].key == kv) 
180
					return (table[k].ref > 0) ? true : false;
191
					return (table[k].ref > 0) ? true : false;
181
				k = (k+1)&(size-1);
192
				k = (k+1)&(size-1);
182
				if (k==fk) break;
193
				if (k==fk) break;
183
			}
194
			}
184
		return false;
195
		return false;
185
	}
196
	}
186
 
197
 
187
	template<class KEY_T, class VAL_T>
198
	template<class KEY_T, class VAL_T>
188
	bool HashTable<KEY_T, VAL_T>::find_entry(const KEY_T& kv, VAL_T& val) const
199
	bool HashTable<KEY_T, VAL_T>::find_entry(const KEY_T& kv, VAL_T& val) const
189
	{
200
	{
190
		int k;
201
		int k;
191
		if(find_entry_priv(kv,k))
202
		if(find_entry_priv(kv,k))
192
			{
203
			{
193
				val = table[k].val;
204
				val = table[k].val;
194
				return true;
205
				return true;
195
			}
206
			}
196
		return false;
207
		return false;
197
	}
208
	}
198
 
209
 
199
	template<class KEY_T, class VAL_T>
210
	template<class KEY_T, class VAL_T>
200
	void HashTable<KEY_T, VAL_T>::resize(int new_size)
211
	void HashTable<KEY_T, VAL_T>::resize(int new_size)
201
	{ 
212
	{ 
202
		int j=0;
213
		int j=0;
203
		EntryType* tmp_table = new EntryType[new_size];
214
		EntryType* tmp_table = new EntryType[new_size];
204
		for(int i=0; i<size;i++)
215
		for(int i=0; i<size;i++)
205
			if (table[i].ref >0)
216
			if (table[i].ref >0)
206
				{
217
				{
207
					j++;
218
					j++;
208
					int k = table[i].key.hash(new_size);
219
					int k = table[i].key.hash(new_size);
209
				
220
				
210
					while(tmp_table[k].ref != -1) 
221
					while(tmp_table[k].ref != -1) 
211
						k=(k+1)&(new_size-1); 
222
						k=(k+1)&(new_size-1); 
212
				
223
				
213
					tmp_table[k].key = table[i].key;
224
					tmp_table[k].key = table[i].key;
214
					tmp_table[k].val = table[i].val;
225
					tmp_table[k].val = table[i].val;
215
					tmp_table[k].ref = table[i].ref;
226
					tmp_table[k].ref = table[i].ref;
216
				}
227
				}
217
      
228
      
218
		delete table;
229
		delete table;
219
		table = tmp_table;
230
		table = tmp_table;
220
		size = new_size;
231
		size = new_size;
221
	}
232
	}
222
 
233
 
223
	template<class KEY_T, class VAL_T>
234
	template<class KEY_T, class VAL_T>
224
	bool HashTable<KEY_T, VAL_T>::create_entry(const KEY_T& kv, 
235
	bool HashTable<KEY_T, VAL_T>::create_entry(const KEY_T& kv, 
225
																						 const VAL_T& val,
236
																						 const VAL_T& val,
226
																						 int incr)
237
																						 int incr)
227
	{
238
	{
228
		if (items*100 >= HASH_EXPAND_LOAD * size) 
239
		if (items*100 >= HASH_EXPAND_LOAD * size) 
229
			resize(size*2);
240
			resize(size*2);
230
 
241
 
231
		int k;
242
		int k;
232
		if(!find_entry_priv(kv,k))
243
		if(!find_entry_priv(kv,k))
233
			{
244
			{
234
				++items;
245
				++items;
235
				table[k].ref = (incr==0? 1: incr);
246
				table[k].ref = (incr==0? 1: incr);
236
				table[k].key = kv;
247
				table[k].key = kv;
237
				table[k].val = val;
248
				table[k].val = val;
238
				return true;
249
				return true;
239
			}
250
			}
240
		table[k].ref += incr;
251
		table[k].ref += incr;
241
		table[k].val = val;
252
		table[k].val = val;
242
		return false;
253
		return false;
243
	}
254
	}
244
 
255
 
245
	template<class KEY_T, class VAL_T>
256
	template<class KEY_T, class VAL_T>
246
	bool HashTable<KEY_T, VAL_T>::create_entry(const KEY_T& kv, 
257
	bool HashTable<KEY_T, VAL_T>::create_entry(const KEY_T& kv, 
247
																						 VAL_T*& val,
258
																						 VAL_T*& val,
248
																						 int incr)
259
																						 int incr)
249
	{
260
	{
250
		if (items*100 >= HASH_EXPAND_LOAD * size) 
261
		if (items*100 >= HASH_EXPAND_LOAD * size) 
251
			resize(size*2);
262
			resize(size*2);
252
 
263
 
253
		int k;
264
		int k;
254
		if(!find_entry_priv(kv,k))
265
		if(!find_entry_priv(kv,k))
255
			{
266
			{
256
				++items;
267
				++items;
257
				table[k].ref = (incr==0? 1: incr);
268
				table[k].ref = (incr==0? 1: incr);
258
				table[k].key = kv;
269
				table[k].key = kv;
259
				val = &table[k].val;
270
				val = &table[k].val;
260
				return true;
271
				return true;
261
			}
272
			}
262
		table[k].ref += incr;
273
		table[k].ref += incr;
263
		val=&table[k].val;
274
		val=&table[k].val;
264
		return false;
275
		return false;
265
	}
276
	}
266
 
277
 
267
 
278
 
268
	template<class KEY_T, class VAL_T>
279
	template<class KEY_T, class VAL_T>
269
	bool HashTable<KEY_T, VAL_T>::remove_entry(const KEY_T& kv)
280
	bool HashTable<KEY_T, VAL_T>::remove_entry(const KEY_T& kv)
270
	{
281
	{
271
		int k;
282
		int k;
272
		if(find_entry_priv(kv,k))
283
		if(find_entry_priv(kv,k))
273
			{
284
			{
274
				table[k].ref = 0;
285
				table[k].ref = 0;
275
				--items;
286
				--items;
276
				if(items*100 < (HASH_EXPAND_LOAD * size) / 2)
287
				if(items*100 < (HASH_EXPAND_LOAD * size) / 2)
277
					resize(size/2);
288
					resize(size/2);
278
				return true;
289
				return true;
279
			}
290
			}
280
		return false;
291
		return false;
281
	}
292
	}
282
 
293
 
283
 
294
 
284
 
295
 
285
	template<class KEY_T, class VAL_T>
296
	template<class KEY_T, class VAL_T>
286
	bool HashTable<KEY_T, VAL_T>::decr_reference(const KEY_T& kv,  
297
	bool HashTable<KEY_T, VAL_T>::decr_reference(const KEY_T& kv,  
287
																							 VAL_T& val, 
298
																							 VAL_T& val, 
288
																							 int decr)
299
																							 int decr)
289
	{
300
	{
290
		int k;
301
		int k;
291
		if(find_entry_priv(kv,k))
302
		if(find_entry_priv(kv,k))
292
			{
303
			{
293
				table[k].ref -= decr;
304
				table[k].ref -= decr;
294
				val = table[k].val;
305
				val = table[k].val;
295
				if(table[k].ref == 0)
306
				if(table[k].ref == 0)
296
					{
307
					{
297
						--items;
308
						--items;
298
						if(items*100 < (HASH_EXPAND_LOAD * size) / 2)
309
						if(items*100 < (HASH_EXPAND_LOAD * size) / 2)
299
							resize(size/2);
310
							resize(size/2);
300
					}
311
					}
301
				return true;
312
				return true;
302
			}
313
			}
303
		return false;
314
		return false;
304
	}
315
	}
305
 
316
 
306
	template<class KEY_T, class VAL_T>
317
	template<class KEY_T, class VAL_T>
307
	bool HashTable<KEY_T, VAL_T>::change_entry(const KEY_T& kv,  
318
	bool HashTable<KEY_T, VAL_T>::change_entry(const KEY_T& kv,  
308
																						 const VAL_T& val)
319
																						 const VAL_T& val)
309
	{
320
	{
310
		int k;
321
		int k;
311
		if(find_entry_priv(kv,k))
322
		if(find_entry_priv(kv,k))
312
			{
323
			{
313
				table[k].val = val;
324
				table[k].val = val;
314
				return true;
325
				return true;
315
			}
326
			}
316
		return false;
327
		return false;
317
	}
328
	}
318
 
329
 
319
 
330
 
320
	template<class KEY_T, class VAL_T>
331
	template<class KEY_T, class VAL_T>
321
	void HashTable<KEY_T, VAL_T>::print() const
332
	void HashTable<KEY_T, VAL_T>::print() const
322
	{
333
	{
323
		std::cout << "Entry size " << sizeof(EntryType) << " bytes\n"
334
		std::cout << "Entry size " << sizeof(EntryType) << " bytes\n"
324
							<< "Max number of items " << size	<< "\n" 
335
							<< "Max number of items " << size	<< "\n" 
325
							<< "Number of items " << items << std::endl;
336
							<< "Number of items " << items << std::endl;
326
	}
337
	}
327
 
338
 
328
	template<class KEY_T, class VAL_T>
339
	template<class KEY_T, class VAL_T>
329
	int HashTable<KEY_T, VAL_T>::integrate() const 
340
	int HashTable<KEY_T, VAL_T>::integrate() const 
330
	{
341
	{
331
		int integral =0;
342
		int integral =0;
332
		for(int i=0;i<size; i++)
343
		for(int i=0;i<size; i++)
333
			if(table[i].ref>0) integral += table[i].ref;
344
			if(table[i].ref>0) integral += table[i].ref;
334
		return integral;
345
		return integral;
335
	}
346
	}
336
 
347
 
337
	template<class KEY_T, class VAL_T>
348
	template<class KEY_T, class VAL_T>
338
	bool HashTable<KEY_T, VAL_T>::get_first(int& idx, 
349
	bool HashTable<KEY_T, VAL_T>::get_first(int& idx, 
339
																					KEY_T& key,
350
																					KEY_T& key,
340
																					VAL_T& val,
351
																					VAL_T& val,
341
																					int& r) const 
352
																					int& r) const 
342
	{
353
	{
343
		if(size==0) return false;
354
		if(size==0) return false;
344
	
355
	
345
		idx=0;
356
		idx=0;
346
		while(table[idx].ref <= 0)
357
		while(table[idx].ref <= 0)
347
			{
358
			{
348
				++idx;
359
				++idx;
349
				if(idx==size) return false;
360
				if(idx==size) return false;
350
			}
361
			}
351
		key = table[idx].key;
362
		key = table[idx].key;
352
		val = table[idx].val;
363
		val = table[idx].val;
353
		r   = table[idx].ref;
364
		r   = table[idx].ref;
354
		return true;
365
		return true;
355
	}
366
	}
356
 
367
 
357
	template<class KEY_T, class VAL_T>
368
	template<class KEY_T, class VAL_T>
358
	bool HashTable<KEY_T, VAL_T>::get_next(int& idx, 
369
	bool HashTable<KEY_T, VAL_T>::get_next(int& idx, 
359
																				 KEY_T& key,
370
																				 KEY_T& key,
360
																				 VAL_T& val,
371
																				 VAL_T& val,
361
																				 int& r) const
372
																				 int& r) const
362
	{
373
	{
363
		idx++;
374
		idx++;
364
		if(idx==size) return false;
375
		if(idx==size) return false;
365
		while(table[idx].ref <= 0)
376
		while(table[idx].ref <= 0)
366
			{
377
			{
367
				++idx;
378
				++idx;
368
				if(idx==size) return false;
379
				if(idx==size) return false;
369
			}
380
			}
370
		key = table[idx].key;
381
		key = table[idx].key;
371
		val = table[idx].val;
382
		val = table[idx].val;
372
		r   = table[idx].ref;
383
		r   = table[idx].ref;
373
		return true;
384
		return true;
374
	}
385
	}
375
 
386
 
376
}
387
}
377
 
388
 
378
 
389
 
379
#endif
390
#endif
380
 
391