散列 - 数据结构（分离链接法、开放定址法） · 数据结构与算法分析-C语言描述

散列是一种用于以常数平均时间执行插入、删除和查找的技术。理想的散列数据结构只不过是一个包含有关键字的具有固定大小的数组。典型情况下，一个关键字就是一个带有相关值（工资信息等）的字符串。散列函数主要的问题就是解决冲突的消除问题。如果当一个元素被插入时另一个元素已经存在（散列值存在），那么就会产生冲突。解决这种冲突的方法有几种，一般用最简单的两种：分离链接法、开放地址法 1.分离链接法 ![](https://box.kancloud.cn/2016-08-17_57b4292056093.jpg) //分离链接散列表的头文件声明 ~~~ #ifndef HASH_H_INCLUDED #define HASH_H_INCLUDED #include <stdio.h> #include <stdlib.h> typedef unsigned int Index; typedef int DataType; struct ListNode; typedef struct ListNode *Position; typedef Position List; struct HashTbl; typedef struct HashTbl *HashTable; HashTable Hash_InitTable(int TableSize); void Hash_Insert(DataType key, HashTable H); void Hash_Delete(DataType key, HashTable H); void Hash_Show(HashTable H); int Hash_Free(HashTable H); #endif // HASH_H_INCLUDED ~~~ //分离链接散列表的功能函数声明 ~~~ #include "hash.h" struct ListNode { DataType Data; Position Next; }; struct HashTbl { int TableSize; List *TheLists; }; void Print_Err(char *str); int NextPrime(int n); Index Hash(const int key, int tablesize) { unsigned int hashval = 0; int x; x = key; if(x < 0) { printf("the key is low 0\n"); exit(-1); } while(x != 0) { hashval = (hashval << 3) + x%10; x /= 10; } return hashval % tablesize; } HashTable Hash_InitTable(int Size) { HashTable H; int i; H = (HashTable)malloc(sizeof(struct HashTbl)); if(H == NULL) Print_Err("no space for malloc H"); H->TableSize = NextPrime(Size); H->TheLists = (List *)malloc(sizeof(List) * H->TableSize); if(H->TheLists == NULL) Print_Err("no space for malloc H->TabLists"); for(i = 0; i < H->TableSize; i++) { H->TheLists[i] = (List)malloc(sizeof(struct ListNode)); if(H->TheLists[i] == NULL) Print_Err("no space for malloc H->TabLists[i]"); else H->TheLists[i]->Next = NULL; } return H; } Position Hash_Find(DataType key, HashTable H) { Position P; List L; L = H->TheLists[Hash(key, H->TableSize)]; P = L->Next; while(P != NULL && P->Data != key) P = P->Next; return P; } void Hash_Insert(DataType key, HashTable H) { Position pos, new_cell; List L; pos = Hash_Find(key, H); if(pos == NULL) { new_cell = malloc(sizeof(struct ListNode)); if(new_cell == NULL) Print_Err("no space in Hash_Insert"); else { L = H->TheLists[Hash(key, H->TableSize)]; new_cell->Next = L->Next; new_cell->Data = key; L->Next = new_cell; } } } void Hash_Delete(DataType key, HashTable H) { Position pos, front_cell, cur_cell; List L; pos = Hash_Find(key, H); if(pos != NULL) { L = H->TheLists[Hash(key, H->TableSize)]; front_cell = L; while(front_cell->Next != NULL && front_cell->Next->Data != key) { front_cell = front_cell->Next; } cur_cell = front_cell->Next; front_cell->Next = cur_cell->Next; free(cur_cell); } } int Hash_Free(HashTable H) { int i; for(i = 0; i < H->TableSize; i++) free(H->TheLists[i]); free(H->TheLists); free(H); return 1; } void Hash_Show(HashTable H) { int i; Position p; printf("H->TableSize:%d\n", H->TableSize); for(i = 0; i < H->TableSize; i++) { printf("H->TheLists[%d]: ", i); p = H->TheLists[i]; p = p->Next; while(p != NULL) { printf("%d ", p->Data); p = p->Next; } printf("\n"); } } /* * hash.c中要用到的子函数 */ void Print_Err(char *str) { printf("%s\n", str); exit(-1); } int IsPrime(unsigned int n) { unsigned int i; if(n < 2) return -1; for(i = 2; i < n; i++) { if(n % i == 0) return 0; } return 1; } int NextPrime(int n) { int i; for(i = n; ; i++) { if(IsPrime(i) == 1) return i; } exit(-1); } ~~~ //main.c函数测试 ~~~ #include <stdio.h> #include <stdlib.h> #include "hash.h" int main() { HashTable H = NULL; H = Hash_InitTable(6); Hash_Insert(1, H); Hash_Insert(2, H); Hash_Insert(3, H); Hash_Insert(24, H); Hash_Insert(5, H); Hash_Insert(6, H); Hash_Show(H); Hash_Delete(24, H); Hash_Show(H); if(Hash_Free(H) == 1) printf("free the HashTable is ok\n"); return 0; } ~~~ 1.开放定址法因为开放定址法都要置入表内，所以开放定址法所需要的表要比分离链接散列用表大。开放定址法分为线性探测法、平方探测法、双散列 ![](https://box.kancloud.cn/2016-08-17_57b429207b797.jpg) 平方探测是消除线性探测中一次聚集问题的冲突解决办法，平方探测就是冲突函数为二次函数的探测方法。流行的选择是F(i)=i^2，上图显示了使用该冲突函数所得到的开放定址散列表。如果使用平方探测法，且表的大小是素数，那么当表至少有一半是空的时候，总能够插入一个新的元素。哪怕表有比一半多一个的位置被填满时，那么插入都有可能失败（虽然这是难以见到的） //开放定址法头文件声明-（平方探测法） ~~~ #ifndef HASH_H_INCLUDED #define HASH_H_INCLUDED #include <stdio.h> #include <stdlib.h> typedef unsigned int Index; typedef Index Position; struct HashTbl; typedef int DataType; typedef struct HashTbl *HashTable; HashTable Hash_Init(int Size); void Hash_Insert(DataType key, HashTable H); void Hash_Delete(DataType key, HashTable H); void Hash_Show(HashTable H); int Hash_Free(HashTable H); #endif // HASH_H_INCLUDED ~~~ //开放定址法功能函数声明-（平方探测法） ~~~ #include "hash.h" enum KindOfEntry {Legitimate, Empty, Deleted}; struct HashEntry { DataType Data; enum KindOfEntry Info; }; typedef struct HashEntry Cell; struct HashTbl { int TableSize; Cell *TheCells; }; void Print_Err(char *str); int NextPrime(int n); HashTable Hash_Init(int Size) { HashTable H; int i; H = (HashTable)malloc(sizeof(struct HashTbl)); if(H == NULL) Print_Err("no space for H in Hash_Init()"); H->TableSize = NextPrime(Size); H->TheCells = (Cell *)malloc(sizeof(Cell) * H->TableSize); if(H->TheCells == NULL) Print_Err("no space for H->TheCells in Hash_Init()"); for(i = 0; i < H->TableSize; i++) H->TheCells[i].Info = Empty; return H; } Index Hash(const int key, int tablesize) { unsigned int hashval = 0; int x; x = key; if(x < 0) { printf("the key is low 0\n"); exit(-1); } while(x != 0) { hashval = (hashval << 3) + x%10; x /= 10; } return hashval % tablesize; } Position Hash_Find(DataType key, HashTable H) { Position cur_pos; int cnt; cnt = 0; cur_pos = Hash(key, H->TableSize); while(H->TheCells[cur_pos].Info != Empty && H->TheCells[cur_pos].Data != key) { cur_pos += (2 * ++cnt - 1); if(cnt >= H->TableSize) cnt -= H->TableSize; } return cur_pos; } void Hash_Insert(DataType key, HashTable H) { Position pos; pos = Hash_Find(key, H); if(H->TheCells[pos].Info != Legitimate) { H->TheCells[pos].Info = Legitimate; H->TheCells[pos].Data = key; } } void Hash_Delete(DataType key, HashTable H) { Position pos; pos = Hash_Find(key, H); if(H->TheCells[pos].Info == Legitimate) { H->TheCells[pos].Info = Empty; //H->TheCells[pos].Data = key; } } void Hash_Show(HashTable H) { int i; printf("H->TableSize: %d\n", H->TableSize); for(i = 0; i < H->TableSize; i++) { if(H->TheCells[i].Info == Legitimate) { printf("H->TheCells[%d]: %d\n", i, H->TheCells[i].Data); } else { printf("H->TheCells[%d]:\n", i); } } printf("\n"); } int Hash_Free(HashTable H) { if(H != NULL) { free(H->TheCells); free(H); } return 1; } /* * hash.c中要用到的子函数 */ void Print_Err(char *str) { printf("%s\n", str); exit(-1); } int IsPrime(unsigned int n) { unsigned int i; if(n < 2) return -1; for(i = 2; i < n; i++) { if(n % i == 0) return 0; } return 1; } int NextPrime(int n) { int i; for(i = n; ; i++) { if(IsPrime(i) == 1) return i; } exit(-1); } ~~~ //main.c测试主函数-（平方探测法） ~~~ #include <stdio.h> #include <stdlib.h> #include "hash.h" int main() { HashTable H; H = Hash_Init(6); Hash_Insert(1, H); Hash_Insert(2, H); Hash_Insert(3, H); Hash_Insert(13, H); Hash_Insert(14, H); Hash_Show(H); Hash_Delete(3, H); Hash_Show(H); Hash_Insert(3, H); Hash_Show(H); if(Hash_Free(H)) printf("free the hash is ok\n"); return 0; } ~~~ 3.其他的散列还有再散列和可扩展列。 ![](https://box.kancloud.cn/2016-08-17_57b42920a2da9.jpg)