-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathL16Q4_BetterSplitting.py
58 lines (47 loc) · 1.81 KB
/
L16Q4_BetterSplitting.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
# 1 Gold Star
# The built-in <string>.split() procedure works
# okay, but fails to find all the words on a page
# because it only uses whitespace to split the
# string. To do better, we should also use punctuation
# marks to split the page into words.
# Define a procedure, split_string, that takes two
# inputs: the string to split and a string containing
# all of the characters considered separators. The
# procedure should return a list of strings that break
# the source string up by the characters in the
# splitlist.
import re
# print re.split("[.+!]", "this.is+a!string")
#>>> 'this', 'is', 'a', 'string']
#my initial solution
def split_string(source,splitlist):
#format for re.split by adding brackets around string
split_list = '['+ splitlist + ']'
#perform re.split procedure
new_split = re.split(split_list, source)
#filter empty values from list
new_split = filter(None, new_split) # fastest
return new_split
#Udacity solution
def split_string_udacity(source,splitlist):
output = []
atsplit = True
for char in source:
if char in splitlist:
atsplit = True
else:
if atsplit:
output.append(char)
atsplit = False
else:
output[-1] += char
return output
out = split_string("This is a test-of the,string separation-code!"," ,!-")
print out
#>>> ['This', 'is', 'a', 'test', 'of', 'the', 'string', 'separation', 'code']
out = split_string("After the flood ... all the colors came out.", " .")
print out
#>>> ['After', 'the', 'flood', 'all', 'the', 'colors', 'came', 'out']
out = split_string("First Name,Last Name,Street Address,City,State,Zip Code",",")
print out
#>>>['First Name', 'Last Name', 'Street Address', 'City', 'State', 'Zip Code']