This repository has been archived by the owner on Sep 2, 2021. It is now read-only.
forked from bireme/isis2json
-
Notifications
You must be signed in to change notification settings - Fork 6
/
subfield.py
142 lines (108 loc) · 4.03 KB
/
subfield.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
#!/usr/bin/env python
# -*- encoding: utf-8 -*-
# ISIS-DM: the ISIS Data Model API
#
# Copyright (C) 2010 BIREME/PAHO/WHO
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Lesser General Public License as published
# by the Free Software Foundation, either version 2.1 of the License, or
# (at your option) any later version.
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Lesser General Public License for more details.
# You should have received a copy of the GNU Lesser General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
from collections import namedtuple
import re
MAIN_SUBFIELD_KEY = '_'
SUBFIELD_MARKER_RE = re.compile(r'\^([a-z0-9])', re.IGNORECASE)
DEFAULT_ENCODING = u'utf-8'
def expand(content, subkeys=None):
''' Parse a field into an association list of keys and subfields
>>> expand('zero^1one^2two^3three')
[('_', 'zero'), ('1', 'one'), ('2', 'two'), ('3', 'three')]
'''
if subkeys is None:
regex = SUBFIELD_MARKER_RE
elif subkeys == '':
return [(MAIN_SUBFIELD_KEY, content)]
else:
regex = re.compile(r'\^(['+subkeys+'])', re.IGNORECASE)
content = content.replace('^^', '^^ ')
parts = []
start = 0
key = MAIN_SUBFIELD_KEY
while True:
found = regex.search(content, start)
if found is None: break
parts.append((key, content[start:found.start()].rstrip()))
key = found.group(1).lower()
start = found.end()
parts.append((key, content[start:].rstrip()))
return parts
class CompositeString(object):
''' Represent an Isis field, with subfields, using
Python native datastructures
>>> author = CompositeString('John Tenniel^xillustrator',
... subkeys='x')
>>> unicode(author)
u'John Tenniel^xillustrator'
'''
def __init__(self, isis_raw, subkeys=None, encoding=DEFAULT_ENCODING):
if not isinstance(isis_raw, basestring):
raise TypeError('%r value must be unicode or str instance' % isis_raw)
self.__isis_raw = isis_raw.decode(encoding)
self.__expanded = expand(self.__isis_raw, subkeys)
def __getitem__(self, key):
for subfield in self.__expanded:
if subfield[0] == key:
return subfield[1]
else:
raise KeyError(key)
def __iter__(self):
return (subfield[0] for subfield in self.__expanded)
def items(self):
return self.__expanded
def __unicode__(self):
return self.__isis_raw
def __str__(self):
return str(self.__isis_raw)
class CompositeField(object):
''' Represent an Isis field, with subfields, using
Python native datastructures
>>> author = CompositeField( [('name','Braz, Marcelo'),('role','writer')] )
>>> print author['name']
Braz, Marcelo
>>> print author['role']
writer
>>> author
CompositeField((('name', 'Braz, Marcelo'), ('role', 'writer')))
'''
def __init__(self, value, subkeys=None):
if subkeys is None:
subkeys = [item[0] for item in value]
try:
value_as_dict = dict(value)
except TypeError:
raise TypeError('%r value must be a key-value structure' % self)
for key in value_as_dict:
if key not in subkeys:
raise TypeError('Unexpected keyword %r' % key)
self.value = tuple([(key, value_as_dict.get(key,None)) for key in subkeys])
def __getitem__(self, key):
return dict(self.value)[key]
def __repr__(self):
return "CompositeField(%s)" % str(self.items())
def items(self):
return self.value
def __unicode__(self):
unicode(self.items())
def __str__(self):
str(self.items())
def test():
import doctest
doctest.testmod()
if __name__=='__main__':
test()