Python全栈开发学习笔记

时间:2022-12-31 14:03:51

---恢复内容开始---

正则表达式

简介

正则表达式,就其本质而言,正则表达式(RE) 是一种小型的、高度专业化的

 

import re

#字符串的处理
"abcde".find("b") #返回字符串所处的位置,找不到的话返回-1
"abcde".find("bc")
"abcde".split("b") #以b为分割 字符串
"abcd".replace("ab","ee") #把 ab替换成ee

 字符匹配(普通字符,元字符)

 

  普通字符: 大多数字符和字幕都会和自身匹配

import re

re.findall(
"bin", "adfadfafsdbindfadf") # 在第二个字符串找到第一个字符串,严格匹配,找到会返回,没找到为空

元字符(11个): 

    .    ^   $   *   +   ?   {     }  [  ]  |  (  )  \

  1.  "." 代表除了换行符以外的任何一个换行符. (模糊匹配)

import re

r
= re.findall("alex.w", "aaaalexw")
print(r)

#输出
"C:\Program Files\Python35\python.exe" C:/Users/.../PycharmProjects/python_fullstack/day9.py
[
'alexkw']

Process finished with exit code 0

  2.   "^"  代表匹配是否是此字符开始,字符必须在开头

r = re.findall("^alex.w", "aaaalexkw")
print(r)

#输出
"C:\Program Files\Python35\python.exe" C:/Users/。。。/PycharmProjects/python_fullstack/day9.py
[]

Process finished with exit code 0
#!/usr/bin/env python
#
-*- coding:utf-8 -*-


import re

r
= re.findall("^alex.w", "alexkw0000")
print(r)

#输出
"C:\Program Files\Python35\python.exe" C:/Users/stephen/PycharmProjects/python_fullstack/day9.py
[
'alexkw']

Process finished with exit code 0

 3. “$"符号是匹配尾部

#!/usr/bin/env python
#
-*- coding:utf-8 -*-


import re

r
= re.findall("alex.w$", "99999alexkw")
print(r)

#输出
"C:\Program Files\Python35\python.exe" C:/Users/.../PycharmProjects/python_fullstack/day9.py
[
'alexkw']

Process finished with exit code 0

 4.   *  是贪婪匹配 匹配*号之前的一个字符,比如alex* 匹配的是*之前相邻字符 , 0~多次都可以匹配.

如果要匹配多个, *前用括号括起来。

import re

r
= re.findall("alex*", "wwwalex")
print(r)

r
= re.findall("alex*", "wwwale")
print(r)


r
= re.findall("alex*", "wwwalexxxxxxxxxxxxxxxxxxx")
print(r)

#输出
"C:\Program Files\Python35\python.exe" C:/Users/。。。/PycharmProjects/python_fullstack/day9.py
[
'alex']
[
'ale']
[
'alexxxxxxxxxxxxxxxxxxx']

Process finished with exit code 0

 5.  “+” 和“*" 类似, 匹配之前的字符,之前字符是1到多次(" * " 是0~多次).

 1 r = re.findall('alex+', "wwwalexxxxxxxxxxxxxx")
2 print(r)
3
4 #输出
5 "C:\Program Files\Python35\python.exe" C:/Users/.../PycharmProjects/python_fullstack/day9.py
6 ['alexxxxxxxxxxxxxx']
7
8
9
10 r = re.findall('alex+', "wwwale")
11 print(r)
12
13 #输出
14 "C:\Program Files\Python35\python.exe" C:/Users/.../PycharmProjects/python_fullstack/day9.py
15 []
16
17
18
19 r = re.findall('alex+', "wwwalexxxxxxxxxxxxxxxxxxxxx")
20 print(r)
21
22 #输出
23 "C:\Program Files\Python35\python.exe" C:/Users/.../PycharmProjects/python_fullstack/day9.py
24 ['alexxxxxxxxxxxxxxxxxxxxx']

6. " ? " 匹配 ‘ ? '之前的0个或者1个字符,无法匹配多个字符

r = re.findall("alex?", "wwwale") #匹配0个"x"
print(r)

r
= re.findall('alex?', 'wwwalex') # 匹配1个"x"
print(r)

r
= re.findall('alex?', 'wwwalexxxxxx') #无法匹配多个"x"
print(r)

#输出
"C:\Program Files\Python35\python.exe" C:/Users/.../PycharmProjects/python_fullstack/day9.py
[
'ale']
[
'alex']
[
'alex']

Process finished with exit code 0

 7. “{}" 匹配指定次数{3}指定匹配3次;  {3,5}匹配3~5次(3,4,5)都没问题。

r = re.findall('alex{3}', 'wwwalexxxx')   #
print(r)

r
= re.findall('alex{3,5}', 'wwwalexx') # 3~5次都可以匹配,最大5次
print(r)


r
= re.findall('alex{3,5}', 'wwwalexxx') # 3~5次都可以匹配,最大5次
print(r)

r
= re.findall('alex{3,5}', 'wwwalexxxx') # 3~5次都可以匹配,最大5次
print(r)

r
= re.findall('alex{3,5}', 'wwwalexxxxxxxxxxxxxx') # 3~5次都可以匹配,最大5次
print(r)


#输出
"C:\Program Files\Python35\python.exe" C:/Users/.../PycharmProjects/python_fullstack/day9.py
[
'alexxx']
[]
[
'alexxx']
[
'alexxxx']
[
'alexxxxx']

Process finished with exit code 0

 8. “ [] ”  是或的意思,例如'a[bc]d', 可以匹配abd,  acd.  abcd不行,仅仅匹配1个b/c的字符。

r = re.findall('a[bc]d', 'wwwacd')
print(r)

r
= re.findall('a[bc]d', 'wwwabd')
print(r)


r
= re.findall('a[bc]d', 'wwwabcd')
print(r)

#输出
"C:\Program Files\Python35\python.exe" C:/Users/.../PycharmProjects/python_fullstack/day9.py
[
'acd']
[
'abd']
[]

Process finished with exit code 0

 ". " 在字符集里失去意义,变成了普通字符

r = re.findall('a[.]d', 'wwwaqd')
print(r)

r
= re.findall('a[.]d', 'wwwa.d')
print(r)

# 输出
"C:\Program Files\Python35\python.exe" C:/Users/.../PycharmProjects/python_fullstack/day9.py
[]
[
'a.d']

Process finished with exit code 0

 在字符集里,仍然有意义的字符,特例是-,^

r = re.findall('[a-z]', 'wwwa.d')
print(r)

r
= re.findall('[1-9]', 'www3wa8.d')
print(r)

r
= re.findall('[^1-9]', 'www3wa8.d')
print(r)




#输出
"C:\Program Files\Python35\python.exe" C:/Users/.../PycharmProjects/python_fullstack/day9.py
[
'w', 'w', 'w', 'a', 'd']
[
'3', '8']
[
'w', 'w', 'w', 'w', 'a', '0', '.', 'd']

Process finished with exit code 0

 9. " \ " 反斜杠是最重要的元字符,作用:

     (1). 反斜杠后面跟元字符去除元字符的特殊功能;

     (2). 后面跟普通字符实现特殊功能。

     (3). 引用序号对应的字组所匹配的字符串

            

r = re.search(r"(alex)(eric)com\2", "alexericcomeric")
print(r)

#输出
"C:\Program Files\Python35\python.exe" C:/Users/.../PycharmProjects/python_fullstack/day9.py
<_sre.SRE_Match object; span=(0, 15), match='alexericcomeric'>

Process finished with exit code 0

 \d 匹配任何十进制数; 它相当于类 [ 0-9].

r = re.findall('\d', 'ww3wa8.d0')
print(r)

# 输出
"C:\Program Files\Python35\python.exe" C:/Users/stephen/PycharmProjects/python_fullstack/day9.py
[
'3', '8', '0']

Process finished with exit code 0

 

\D 匹配任何非数字字符;她相当于类[^0-9]

\s 匹配任何空白字符;它相当于类[ \t\n\r\f\v]

\S 匹配任何非空白字符;相当于类[^ \t\n\r\f\v]

\w 匹配任何字母数字字符;相当于类[a-zA-Z0-9_]

\W 匹配任何非字母数字字符;相当于类[^a-zA-Z0-9]

\b 匹配一个单词边界,也就是指单词和空格间的位置

 

---恢复内容结束---

正则表达式

简介

正则表达式,就其本质而言,正则表达式(RE) 是一种小型的、高度专业化的

 

import re

#字符串的处理
"abcde".find("b") #返回字符串所处的位置,找不到的话返回-1
"abcde".find("bc")
"abcde".split("b") #以b为分割 字符串
"abcd".replace("ab","ee") #把 ab替换成ee

 字符匹配(普通字符,元字符)

 

  普通字符: 大多数字符和字幕都会和自身匹配

import re

re.findall(
"bin", "adfadfafsdbindfadf") # 在第二个字符串找到第一个字符串,严格匹配,找到会返回,没找到为空

元字符(11个): 

    .    ^   $   *   +   ?   {     }  [  ]  |  (  )  \

  1.  "." 代表除了换行符以外的任何一个换行符. (模糊匹配)

import re

r
= re.findall("alex.w", "aaaalexw")
print(r)

#输出
"C:\Program Files\Python35\python.exe" C:/Users/.../PycharmProjects/python_fullstack/day9.py
[
'alexkw']

Process finished with exit code 0

  2.   "^"  代表匹配是否是此字符开始,字符必须在开头

r = re.findall("^alex.w", "aaaalexkw")
print(r)

#输出
"C:\Program Files\Python35\python.exe" C:/Users/。。。/PycharmProjects/python_fullstack/day9.py
[]

Process finished with exit code 0
#!/usr/bin/env python
#
-*- coding:utf-8 -*-


import re

r
= re.findall("^alex.w", "alexkw0000")
print(r)

#输出
"C:\Program Files\Python35\python.exe" C:/Users/stephen/PycharmProjects/python_fullstack/day9.py
[
'alexkw']

Process finished with exit code 0

 3. “$"符号是匹配尾部

#!/usr/bin/env python
#
-*- coding:utf-8 -*-


import re

r
= re.findall("alex.w$", "99999alexkw")
print(r)

#输出
"C:\Program Files\Python35\python.exe" C:/Users/.../PycharmProjects/python_fullstack/day9.py
[
'alexkw']

Process finished with exit code 0

 4.   *  是贪婪匹配 匹配*号之前的一个字符,比如alex* 匹配的是*之前相邻字符 , 0~多次都可以匹配.

如果要匹配多个, *前用括号括起来。

import re

r
= re.findall("alex*", "wwwalex")
print(r)

r
= re.findall("alex*", "wwwale")
print(r)


r
= re.findall("alex*", "wwwalexxxxxxxxxxxxxxxxxxx")
print(r)

#输出
"C:\Program Files\Python35\python.exe" C:/Users/。。。/PycharmProjects/python_fullstack/day9.py
[
'alex']
[
'ale']
[
'alexxxxxxxxxxxxxxxxxxx']

Process finished with exit code 0

 5.  “+” 和“*" 类似, 匹配之前的字符,之前字符是1到多次(" * " 是0~多次).

 1 r = re.findall('alex+', "wwwalexxxxxxxxxxxxxx")
2 print(r)
3
4 #输出
5 "C:\Program Files\Python35\python.exe" C:/Users/.../PycharmProjects/python_fullstack/day9.py
6 ['alexxxxxxxxxxxxxx']
7
8
9
10 r = re.findall('alex+', "wwwale")
11 print(r)
12
13 #输出
14 "C:\Program Files\Python35\python.exe" C:/Users/.../PycharmProjects/python_fullstack/day9.py
15 []
16
17
18
19 r = re.findall('alex+', "wwwalexxxxxxxxxxxxxxxxxxxxx")
20 print(r)
21
22 #输出
23 "C:\Program Files\Python35\python.exe" C:/Users/.../PycharmProjects/python_fullstack/day9.py
24 ['alexxxxxxxxxxxxxxxxxxxxx']

6. " ? " 匹配 ‘ ? '之前的0个或者1个字符,无法匹配多个字符

r = re.findall("alex?", "wwwale") #匹配0个"x"
print(r)

r
= re.findall('alex?', 'wwwalex') # 匹配1个"x"
print(r)

r
= re.findall('alex?', 'wwwalexxxxxx') #无法匹配多个"x"
print(r)

#输出
"C:\Program Files\Python35\python.exe" C:/Users/.../PycharmProjects/python_fullstack/day9.py
[
'ale']
[
'alex']
[
'alex']

Process finished with exit code 0

 7. “{}" 匹配指定次数{3}指定匹配3次;  {3,5}匹配3~5次(3,4,5)都没问题。

r = re.findall('alex{3}', 'wwwalexxxx')   #
print(r)

r
= re.findall('alex{3,5}', 'wwwalexx') # 3~5次都可以匹配,最大5次
print(r)


r
= re.findall('alex{3,5}', 'wwwalexxx') # 3~5次都可以匹配,最大5次
print(r)

r
= re.findall('alex{3,5}', 'wwwalexxxx') # 3~5次都可以匹配,最大5次
print(r)

r
= re.findall('alex{3,5}', 'wwwalexxxxxxxxxxxxxx') # 3~5次都可以匹配,最大5次
print(r)


#输出
"C:\Program Files\Python35\python.exe" C:/Users/.../PycharmProjects/python_fullstack/day9.py
[
'alexxx']
[]
[
'alexxx']
[
'alexxxx']
[
'alexxxxx']

Process finished with exit code 0

 8. “ [] ”  是或的意思,例如'a[bc]d', 可以匹配abd,  acd.  abcd不行,仅仅匹配1个b/c的字符。

r = re.findall('a[bc]d', 'wwwacd')
print(r)

r
= re.findall('a[bc]d', 'wwwabd')
print(r)


r
= re.findall('a[bc]d', 'wwwabcd')
print(r)

#输出
"C:\Program Files\Python35\python.exe" C:/Users/.../PycharmProjects/python_fullstack/day9.py
[
'acd']
[
'abd']
[]

Process finished with exit code 0

 ". " 在字符集里失去意义,变成了普通字符

r = re.findall('a[.]d', 'wwwaqd')
print(r)

r
= re.findall('a[.]d', 'wwwa.d')
print(r)

# 输出
"C:\Program Files\Python35\python.exe" C:/Users/.../PycharmProjects/python_fullstack/day9.py
[]
[
'a.d']

Process finished with exit code 0

 在字符集里,仍然有意义的字符,特例是-,^ \d

r = re.findall('[a-z]', 'wwwa.d')
print(r)

r
= re.findall('[1-9]', 'www3wa8.d')
print(r)

r
= re.findall('[^1-9]', 'www3wa8.d')
print(r)




#输出
"C:\Program Files\Python35\python.exe" C:/Users/.../PycharmProjects/python_fullstack/day9.py
[
'w', 'w', 'w', 'a', 'd']
[
'3', '8']
[
'w', 'w', 'w', 'w', 'a', '0', '.', 'd']

Process finished with exit code 0
r = re.findall('[\d]', ' ww3 wa8.d0')
print(r)

#输出

"C:\Program Files\Python35\python.exe" C:/Users/stephen/PycharmProjects/python_fullstack/day9.py
[
' ']

 

 

 

9. " \ " 反斜杠是最重要的元字符,作用:

     (1). 反斜杠后面跟元字符去除元字符的特殊功能;

     (2). 后面跟普通字符实现特殊功能。

     (3). 引用序号对应的字组所匹配的字符串

            

r = re.search(r"(alex)(eric)com\2", "alexericcomeric")  # \2 代表第二个group(2)  ---> "eric"; group(1)代之alex,group(2)代表"eric"
print(r)

#输出
"C:\Program Files\Python35\python.exe" C:/Users/.../PycharmProjects/python_fullstack/day9.py
<_sre.SRE_Match object; span=(0, 15), match='alexericcomeric'>

Process finished with exit code 0

 \d 匹配任何十进制数; 它相当于类 [ 0-9].

r = re.findall('\d', 'ww3wa8.d0')  # 匹配1个数字
print(r)
r
= re.findall('\d\d', 'ww23wa8.d0') #匹配2个
print(r)

#输出
"C:\Program Files\Python35\python.exe" C:/Users/.../PycharmProjects/python_fullstack/day9.py
[
'3', '8', '0']
[
'23']

Process finished with exit code 0

 

 

\D 匹配任何非数字字符;她相当于类[^0-9]

r = re.findall('\D', 'ww3wa8.d0')  # 匹配1非数字
print(r)
r
= re.findall('\D\D', 'ww3wa8.d0') # 匹配2非数字
print(r)

# 输出

"C:\Program Files\Python35\python.exe" C:/Users/.../PycharmProjects/python_fullstack/day9.py
[
'w', 'w', 'w', 'a', '.', 'd']
[
'ww', 'wa', '.d']

Process finished with exit code 0

 

\s 匹配任何空白字符;它相当于类[ \t\n\r\f\v]

r = re.findall('\s', 'ww3 wa8.d0')
print(r)

#输出
"C:\Program Files\Python35\python.exe" C:/Users/.../PycharmProjects/python_fullstack/day9.py
[
' ']

 

\S 匹配任何非空白字符;相当于类[^ \t\n\r\f\v]

\w 匹配任何字母数字字符;相当于类[a-zA-Z0-9_]

r = re.findall('\w', 'ww3wa8.d0')  # 匹配任意字母数字
print(r)
r
= re.findall('\w\w', 'ww3wa8.d0') # 匹配2个任意字母数字
print(r)

#输出
"C:\Program Files\Python35\python.exe" C:/Users/.../PycharmProjects/python_fullstack/day9.py
[
'w', 'w', '3', 'w', 'a', '8', 'd', '0']
[
'ww', '3w', 'a8', 'd0']

Process finished with exit code 0

 

\W 匹配任何非字母数字字符;相当于类[^a-zA-Z0-9]

\b 匹配一个单词边界,也就是指单词和空格间的位置

r = re.findall(r"\bads\b", "abc bcd sf!ads!fasdf ads asfasfasdfasdfasdf")
print(r)

#输出
"C:\Program Files\Python35\python.exe" C:/Users/stephen/PycharmProjects/python_fullstack/day9.py
[
'ads', 'ads']

Process finished with exit code 0