forked from watmough/DBF
-
Notifications
You must be signed in to change notification settings - Fork 0
/
dbf.c
301 lines (246 loc) · 6.94 KB
/
dbf.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////
// References:
//
// DBF Specification and considerations
// http://www.digitalpreservation.gov/formats/fdd/fdd000325.shtml
// http://www.esri.com/library/whitepapers/pdfs/shapefile.pdf (see page 25)
// http://webhelp.esri.com/arcgisdesktop/9.3/index.cfm?TopicName=Geoprocessing_considerations_for_shapefile_output
// http://www.clicketyclick.dk/databases/xbase/format/db2_dbf.html#DBII_DBF_STRUCT
// http://www.clicketyclick.dk/databases/xbase/format/dbf.html#DBF_NOTE_15_SOURCE
////////////////////////////////////////////////////////////////////////////////
// overall design
// this is the barest of bare-bones apis
// fields are all fixed length, and we know how many rows we have.
// copy field data verbatim
//
// Reading Interface
//------------------
//
// HANDLE initdbf( byte * )
// map file into memory and call this to initialize and read
// field definitions etc.
//
// dbfield ** getcolumns( HANDLE )
// get all the fields, NULL terminated array
// can use to populate list of columns and types
//
// void * getrawcolumn( byte column )
// returns a pointer to an array of requested column
// assuming just a few columns are needed, very fast just to blit the data.
//
// char *getcharactercolumn( byte column )
// double *getnumericcolumn( byte column )
// char *getdatecolumn( byte column )
// char *gettimecolumn( byte column )
// simple type wrappers around void *getrawcolumn()
//
// Writing Interface
//------------------
// write version
// write number of records
// write last update
// write record length
// write each field specifier
// write terminator
// write records
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <unistd.h>
#include <fcntl.h>
#include <sys/mman.h>
typedef unsigned char byte;
typedef void * HANDLE;
/*
// structs
typedef struct dbfIIheader // supports dbase II
{
byte[1] version[1];
byte[2] numberofrecords;
byte[3] lastupdate;
byte[2] recordlength;
// array of field descriptors
// single byte terminator
// array of records
} dbfheader;
typedef struct dbfIIfield
{
byte[10] fieldname;
byte fieldtype;
byte fieldlength;
byte[2] fieldaddress;
byte fieldcount;
}
*/
typedef struct dbfIIIheader // supports dbase III - V
{
byte version[1];
byte lastupdate[3];
byte numberofrecords[4];
byte headerlength[2]; // record data starts after this
byte recordlength[2]; // record length
byte reserved1[2];
byte incompletetransaction[1];
byte encrypted[1];
byte lanfreerecord[4];
byte reserved2[8];
byte mdx[1];
byte language[1];
byte reserved3[2];
// array of field descriptors
// single byte terminator
// array of records
} dbfIIIheader;
typedef struct dbfIIIfield
{
byte fieldname[11];
byte fieldtype;
byte fieldaddress[4]; // do not use
byte fieldlength;
byte fieldecimalcount;
byte reserved1[2];
byte workareaid[1];
byte reserved2[2];
byte setfields;
byte reserved3[7];
byte indexflag;
} dbfIIIfield;
// HANDLE dbfinit( byte * mem )
// map file into memory and call this to initialize and read
// field definitions etc.
HANDLE dbfinit(byte * mem)
{
// just poiint the handle at memory
HANDLE handle = 0;
switch( *(byte *)mem )
{
case 3:
case 4:
case 5:
handle = mem;
break;
default:
printf("Unsupported DBF version: %0x\n",*mem);
handle = 0;
exit(1);
}
return handle;
}
// dbfIIIfield ** getcolumns( HANDLE )
// get all the fields, NULL terminated array
// can use to populate list of columns and types
dbfIIIfield ** dbfgetcolumns( HANDLE handle )
{
// check handle is known type
byte * mem = (byte *)handle;
if( *mem<3 || *mem>5 ) {
// error, unsupported
printf("Unknown DBF version: %0x\n",*mem);
exit(1);
}
// count how many pointers will we need
int columns = 0;
byte * pos = mem+sizeof(dbfIIIheader);
while( *pos!=0 && *pos!=0xD )
columns++, pos+=sizeof(dbfIIIfield);
// grab some memory to return pointers, include guard null
dbfIIIfield ** array = (dbfIIIfield**)malloc((columns+1)*sizeof(dbfIIIfield*));
// copy pointers
pos = mem+sizeof(dbfIIIheader);
for(int i=0;i<columns;++i) {
array[i] = (dbfIIIfield*)(pos+i*sizeof(dbfIIIfield));
}
array[columns] = (dbfIIIfield*)0;
return array;
}
// byte * getrawcolumn( HANDLE handle )
// returns a pointer to an array of requested column
// assuming just a few columns are needed, very fast just to blit the data.
byte * dbfgetrawcolumn( HANDLE handle,int col, int *collen, int *count )
{
// check handle is known type
byte * mem = (byte *)handle;
if( *mem<3 || *mem>5 ) {
// error, unsupported
printf("Unknown DBF version: %0x\n",*mem);
exit(1);
}
// get record count
dbfIIIheader * header = (dbfIIIheader *)mem;
*count = *(int*)&header->numberofrecords;
// get header length
int headerlength = *(unsigned short*)&header->headerlength;
// get record length
int recordlength = *(unsigned short*)&header->recordlength;
// find where in the record we need to copy from
int colnum = 0;
int recordoff = 1;
byte * pos = mem+sizeof(dbfIIIheader);
dbfIIIfield * field = (dbfIIIfield*)pos;
while(*pos!=0 && *pos!=0xD && colnum<col) {
printf("col: %d ",colnum);
colnum++;
field = (dbfIIIfield*)pos;
recordoff += field->fieldlength;
printf("width: %d cum: %d\n",field->fieldlength,recordoff);
pos+=sizeof(dbfIIIfield);
}
// create data
field = (dbfIIIfield*)pos;
*collen = field->fieldlength;
byte * data = (byte *)malloc(*count**collen);
byte * datapos = data;
// step through and blit data
pos = mem+headerlength;
for(int i=0;i<*count;++i) {
memcpy(datapos,pos+recordoff,*collen);
pos += recordlength;
datapos += *collen;
}
return data;
}
// test the api
int main( int argc, char * argv[] )
{
// debugging
printf("sizeof(dbfIIIheader): %ld\n",sizeof(dbfIIIheader));
printf("sizeof(dbfIIIfield): %ld\n",sizeof(dbfIIIfield));
// usage
if( argc<2 ) {
printf("usage: %s [-c] [-r name] [-v] file\n -c lists the column names\n -r lists all rows for named column\n -v dump as csv.\n",argv[0]);
exit(1);
}
// parse command line args
// get filename
char * filename = "/Users/jonathan/Development/ShapeFiles/TM_WORLD_BORDERS-0.3.dbf";
// memory map filename
int fd = open(filename,O_RDONLY);
byte * mem = mmap(0,64*1024,PROT_READ,MAP_SHARED,fd,0);
// initialize
HANDLE handle = dbfinit(mem);
{
// list columns
dbfIIIfield ** columns = dbfgetcolumns(handle);
for(int i=0;columns[i]!=0;++i) {
dbfIIIfield * field = columns[i];
printf("%s\n",field->fieldname);
}
}
{
// list named column
int colnum = atoi(argv[argc-1]);
int collen = 0;
int count = 0;
byte * data = dbfgetrawcolumn(handle,colnum,&collen,&count);
for(int i=0;i<count;++i) {
printf("row: %d col: %.*s\n",i,collen,data+i*collen);
}
}
{
// dump as csv file
}
}