# layer quotation (when need be)
s = "this is a 'string'" # good
print(s)
s = 'this is a "string"' # good
print(s)
s = "this is a "string"" # bad
print(s)
# Use escape features when this occurs
s = "this is a \"string\"" # Good again
print(s)
# Multiline Strings
s2 = '''
This is a long string!
With many lines
Many. Lines.
'''
print(s2)
# Or use \n to generate a new line
s3 = "This is a long string!\nNow I'm starting a new line."
print(s3)
str
types are really useful because so many common regular expression (regex) methods are baked into the string object when created.
Note that some methods, such as addition and multiplication, take on new functionality here.
Method | Description |
---|---|
.capitalize() |
S.capitalize() -> str |
.casefold() |
S.casefold() -> str |
.center() |
S.center(width[, fillchar]) -> str |
.count() |
S.count(sub[, start[, end]]) -> int |
.encode() |
S.encode(encoding='utf-8', errors='strict') -> bytes |
.endswith() |
S.endswith(suffix[, start[, end]]) -> bool |
.expandtabs() |
S.expandtabs(tabsize=8) -> str |
.find() |
S.find(sub[, start[, end]]) -> int |
.format() |
S.format(*args, **kwargs) -> str |
.format_map() |
S.format_map(mapping) -> str |
.index() |
S.index(sub[, start[, end]]) -> int |
.isalnum() |
S.isalnum() -> bool |
.isalpha() |
S.isalpha() -> bool |
.isdecimal() |
S.isdecimal() -> bool |
.isdigit() |
S.isdigit() -> bool |
.isidentifier() |
S.isidentifier() -> bool |
.islower() |
S.islower() -> bool |
.isnumeric() |
S.isnumeric() -> bool |
.isprintable() |
S.isprintable() -> bool |
.isspace() |
S.isspace() -> bool |
.istitle() |
S.istitle() -> bool |
.isupper() |
S.isupper() -> bool |
.join() |
S.join(iterable) -> str |
.ljust() |
S.ljust(width[, fillchar]) -> str |
.lower() |
S.lower() -> str |
.lstrip() |
S.lstrip([chars]) -> str |
.maketrans() |
Return a translation table usable for str.translate(). |
.partition() |
S.partition(sep) -> (head, sep, tail) |
.replace() |
S.replace(old, new[, count]) -> str |
.rfind() |
S.rfind(sub[, start[, end]]) -> int |
.rindex() |
S.rindex(sub[, start[, end]]) -> int |
.rjust() |
S.rjust(width[, fillchar]) -> str |
.rpartition() |
S.rpartition(sep) -> (head, sep, tail) |
.rsplit() |
S.rsplit(sep=None, maxsplit=-1) -> list of strings |
.rstrip() |
S.rstrip([chars]) -> str |
.split() |
S.split(sep=None, maxsplit=-1) -> list of strings |
.splitlines() |
S.splitlines([keepends]) -> list of strings |
.startswith() |
S.startswith(prefix[, start[, end]]) -> bool |
.strip() |
S.strip([chars]) -> str |
.swapcase() |
S.swapcase() -> str |
.title() |
S.title() -> str |
.translate() |
S.translate(table) -> str |
.upper() |
S.upper() -> str |
.zfill() |
S.zfill(width) -> str |
my_str = "Regression is a common statistical learning technique"
print(my_str.lower()) # convert to lower
print(my_str.upper()) # convert to upper
print(my_str.isupper()) # boolean determination
my_str.replace("Regression","Random Forest")
sent = "This is important to remember."
sent.split() # break a string into a list`
seq_str = "A A A A B B B B C C C C A A C A B A C"
seq_str.count("A") # count the number of times a certain pattern occurs
ind = sent.find("i")
print(ind)
print(sent[ind])
# In concert, we can do some useful manipulations of text
sent_ws = " THIS is a Sentence &95#with problems"
sent_ws = sent_ws.strip() # Strip white space
print(sent_ws)
sent_ws = sent_ws.replace("&95#","") # strip problem values by leveraging the pattern
print(sent_ws)
sent_ws = sent_ws.lower() # convert to lower case
print(sent_ws)
sent_ws = sent_ws.capitalize() # capitalize the first letter
print(sent_ws)
sent_ws = sent_ws + "."
print(sent_ws)
Often, we need to combine data and strings, either to report results or progress, or compose more versatile text objects. Python makes it easy to integrate data with strings.
"" % ()
¶The %
takes in a tuple of data points. We then need to specify the location we want the data value and the data type of the incoming value.
x = 4
y = "dog"
"This is a string with a number (%s) and a word (%s)" %(x,y)
"This is a string with a number (%d) and a word (%d)" %(x,y)
"This is a string with a number (%d) and a word (%s)" %(x,y)
"This is a string with a number (%.3f) and a word (%s)" %(x,y)
.format()
¶# Integer positions
"This is a string with a number ({0}) and a word ({1})".format('4','dog')
# Named Fields
'This is a {a} in a {b}'.format(a='dog',b='house')
# Can leverage index positions of defined data structures.
ps = [1.0,2.2,3]
'This is a field: {ps[2]} and {ps[1]}. '.format(ps=ps)
fstrings emerge from a desire to make string formatting more readable. The above two methods are fine, but these can be difficult to read when these statements become involved. To this end, fstrings provide an easy syntax in which objects can be evaluated directly in the string statement usin {}
. This increases readability.
f'This is a field: {ps[2]} and {ps[1]}'
f"Progress: { round((44/76)*100,2) }%"
Note the default string code character is UTF-8
word = "éôü"
word
en_word = word.encode('UTF-8')
en_word
en_word.decode('UTF-8')
Let's briefly explore working with dates in Python using the datetime
standard library.
from datetime import datetime
now = datetime.now()
now
Method | Description |
---|---|
.astimezone() |
tz -> convert to local time in new timezone tz |
.combine() |
date, time -> datetime with same date and time fields |
.ctime() |
Return ctime() style string. |
.date() |
Return date object with same year, month and day. |
.day() |
int([x]) -> integer int(x, base=10) -> integer |
.dst() |
Return self.tzinfo.dst(self). |
.fold() |
int([x]) -> integer int(x, base=10) -> integer |
.fromisoformat() |
string -> datetime from datetime.isoformat() output |
.fromordinal() |
int -> date corresponding to a proleptic Gregorian ordinal. |
.fromtimestamp() |
timestamp[, tz] -> tz's local time from POSIX timestamp. |
.hour() |
int([x]) -> integer int(x, base=10) -> integer |
.isocalendar() |
Return a 3-tuple containing ISO year, week number, and weekday. |
.isoformat() |
[sep] -> string in ISO 8601 format, YYYY-MM-DDT[HH[:MM[:SS[.mmm[uuu]]]]][+HH:MM]. sep is used to separate the year from the time, and defaults to 'T'. timespec specifies what components of the time to include (allowed values are 'auto', 'hours', 'minutes', 'seconds', 'milliseconds', and 'microseconds'). |
.isoweekday() |
Return the day of the week represented by the date. Monday == 1 ... Sunday == 7 |
.max() |
datetime(year, month, day[, hour[, minute[, second[, microsecond[,tzinfo]]]]]) |
.microsecond() |
int([x]) -> integer int(x, base=10) -> integer |
.min() |
datetime(year, month, day[, hour[, minute[, second[, microsecond[,tzinfo]]]]]) |
.minute() |
int([x]) -> integer int(x, base=10) -> integer |
.month() |
int([x]) -> integer int(x, base=10) -> integer |
.now() |
Returns new datetime object representing current time local to tz. |
.replace() |
Return datetime with new specified fields. |
.resolution() |
Difference between two datetime values. |
.second() |
int([x]) -> integer int(x, base=10) -> integer |
.strftime() |
format -> strftime() style string. |
.strptime() |
string, format -> new datetime parsed from a string (like time.strptime()). |
.time() |
Return time object with same time but with tzinfo=None. |
.timestamp() |
Return POSIX timestamp as float. |
.timetuple() |
Return time tuple, compatible with time.localtime(). |
.timetz() |
Return time object with same time and tzinfo. |
.today() |
Current date or datetime: same as self.class.fromtimestamp(time.time()). |
.toordinal() |
Return proleptic Gregorian ordinal. January 1 of year 1 is day 1. |
.tzname() |
Return self.tzinfo.tzname(self). |
.utcfromtimestamp() |
Construct a naive UTC datetime from a POSIX timestamp. |
.utcnow() |
Return a new datetime representing UTC day and time. |
.utcoffset() |
Return self.tzinfo.utcoffset(self). |
.utctimetuple() |
Return UTC time tuple, compatible with time.localtime(). |
.weekday() |
Return the day of the week represented by the date. Monday == 0 ... Sunday == 6 |
.year() |
int([x]) -> integer int(x, base=10) -> integer |
now.month
now.year
Can format Date Time
now.strftime('%Y %m %d')
now.strftime('%Y-%m-%d %H:%M:%S')
Generating dates
past = datetime(year=2008,month=4,day=20)
past
Comparing dates
diff = now - past
diff.days
from datetime import timedelta
now + timedelta(hours=5)
Converting raw date strings into datetime objects. The secret is that we need to identify the structure of that the date is formatted in.
new_date = datetime.strptime('Jun 1 2005', '%b %d %Y')
new_date