C语言实现简单的倒排文件索引
inver.h文件
[cpp]
#ifndef INVERT_FILE_H
#define INVERT_FILE_H
#include<stdio.h>
#include<stdlib.h>
typedef struct _invertfile_ {
unsigned int tablelen;
void **table;
//unsigned int offset;
unsigned int nodecount;
}if_t;
typedef struct _word_{
unsigned int id;
unsigned int refered;//
void *link;
}word_t;
typedef struct _word_frequency_{
unsigned int d_id;
unsigned int refered;//the num of referenced in the document
void *next;
}wf_t;
if_t* invertfile_create(int length);
void invertfile_insert(if_t *h,int w_id,int d_id);
wf_t* invertfile_search(if_t *h,int w_id,int d_id);
void invertfile_traverse(if_t *h);
void invertfile_free(if_t *h);
#endif
invert.cpp
[cpp]
#include"invert.h"
if_t* invertfile_create(int length){
if_t *h;
h = (if_t *)calloc(1,sizeof(if_t));
if (NULL == h) return NULL;
h->table =(void **)calloc(length,sizeof(void *));
h->tablelen=length;
h->nodecount=0;
word_t *w;
for(int i=0;i<length;i++){
h->table[i]=malloc(sizeof(word_t));
w=(word_t*)h->table[i];
w->id=i;
w->refered=0;
w->link=NULL;
}
return h;
}
//check if document d_id have word w_id
wf_t* invertfile_search(if_t *h,int w_id,int d_id){
word_t *w;
wf_t *wf;
w=(word_t*)h->table[w_id];
if(w->refered>0){
wf=(wf_t*)w->link;
while(wf){
if(wf->d_id==d_id)return wf;
wf=(wf_t*)wf->next;
}
}
return NULL;
}
void invertfile_insert(if_t *h,int w_id,int d_id){
word_t * w=(word_t*)h->table[w_id];
wf_t * wf;
if((wf=invertfile_search(h,w_id,d_id))!=NULL){
wf->refered++;
}
else{
wf=(wf_t *)malloc(sizeof(wf_t));
wf->next=w->link;
w->link=wf;
w->refered++;
wf->refered++;
wf->d_id=d_id;
h->nodecount++;
}
}
void invertfile_free(if_t *h){
word_t *w;
wf_t* wf,*cur;
for(int i=0;i<h->tablelen;i++){
w=(word_t*)h->table[i];
wf=(wf_t*)w->link;
while(wf!=NULL){
cur=wf;
wf=(wf_t*)wf->next;
free(cur);
}
free(w);
}
free(h->table);
}
void invertfile_traverse(if_t *h){
word_t *w;
wf_t* wf,*cur;
for(int i=0;i<h->tablelen;i++){
w=(word_t*)h->table[i];
wf=(wf_t*)w->link;
printf("word_id:%d;",w->id);
while(wf!=NULL){
cur=wf;
wf=(wf_t*)wf->next;
printf("d_id:%d,freq:%d;",cur->d_id,cur->refered);
}
printf("\n");
}
}
测试文件main.cpp
[cpp]
#include"invert.h"
int main(){
if_t *f=invertfile_create(10);
invertfile_insert(f,1,1);
invertfile_insert(f,1,1);
invertfile_insert(f,1,3);
invertfile_insert(f,2,5);
invertfile_traverse(f);
invertfile_free(f);
}
实验结果:
[cpp]
word_id:0;
word_id:1;d_id:3,freq:1;d_id:1,freq:2;
word_id:2;d_id:5,freq:1;
word_id:3;
word_id:4;
word_id:5;
word_id:6;
word_id:7;
word_id:8;
word_id:9;
补充:软件开发 , C语言 ,