-
Notifications
You must be signed in to change notification settings - Fork 0
/
crawler.h
96 lines (73 loc) · 1.79 KB
/
crawler.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
#ifndef CRAWLER_H
#define CRAWLER_H
#include<string>
#include<vector>
#include <QString>
#define BST_TREE_TYPE 0
#define TST_TREE_TYPE 1
#define TRIE_TREE_TYPE 2
class WordPosition
{
private:
int file_id;
int line_num;
int sentence_num;
std::string last_word;
std::string next_word;
public:
WordPosition();
WordPosition(int file_id, int line_num, int sentence_num, std::string last_word, std::string next_word);
};
class Word
{
private:
std::string word_value;
std::vector<WordPosition> positions;
public:
void add_position(WordPosition position);
};
struct FileWordNode
{
QString word;
FileWordNode *last;
FileWordNode *next;
FileWordNode *last_equal;
FileWordNode *next_equal;
int line_num;
int file_id;
FileWordNode();
FileWordNode(QString word, int line_num, int file_id);
};
class Crawler
{
private:
std::vector<QString> files_paths;
std::vector<QString> dir_paths;
Crawler();
void tokenizeString(QStringList &tokes,QString &line);
void freeMemory();
public:
static Crawler& getInstance();
std::vector<FileWordNode *> file_words_starts;
void crawlAll();
void build(int type);
void add_directory(QString dir_path);
QStringList listFiles(int dir_id = 0);
QStringList listOfCrawledFiles();
void addFilesOfDirectoriesToFilelist();
QString getDir(int dir_id = 0);
QString getFileName(int id);
bool isFileExistedInList(QString path);
/**
* @brief crawlFile
* a fucntion to parse the tree and add it to the linked list
* @param file_path
* @return
*/
bool crawlFile(QString file_path);
bool add_file_to_list(QString file_path);
void buildTree();
~Crawler();
};
QString linkedListToString(FileWordNode *node);
#endif // CRAWLER_H