Parsing HTML with BeautifulSoup
Install
Common package name: beautifulsoup4beautifulsoup4
Parse HTML
bs4_parse.py
from bs4 import BeautifulSoup
html = """
<html><body>
<h1 class='title'>Hello</h1>
<a href='https://example.com'>Link</a>
</body></html>
"""
soup = BeautifulSoup(html, "html.parser")
print(soup.h1.get_text(strip=True))
print(soup.a["href"])bs4_parse.py
from bs4 import BeautifulSoup
html = """
<html><body>
<h1 class='title'>Hello</h1>
<a href='https://example.com'>Link</a>
</body></html>
"""
soup = BeautifulSoup(html, "html.parser")
print(soup.h1.get_text(strip=True))
print(soup.a["href"])Find elements
bs4_find.py
from bs4 import BeautifulSoup
soup = BeautifulSoup("<p class='x'>A</p><p class='y'>B</p>", "html.parser")
print(soup.find("p", class_="y").text)
print([p.text for p in soup.find_all("p")])bs4_find.py
from bs4 import BeautifulSoup
soup = BeautifulSoup("<p class='x'>A</p><p class='y'>B</p>", "html.parser")
print(soup.find("p", class_="y").text)
print([p.text for p in soup.find_all("p")])Use CSS selectors
bs4_select.py
from bs4 import BeautifulSoup
soup = BeautifulSoup("<div><span class='price'>$10</span></div>", "html.parser")
print(soup.select_one("span.price").text)bs4_select.py
from bs4 import BeautifulSoup
soup = BeautifulSoup("<div><span class='price'>$10</span></div>", "html.parser")
print(soup.select_one("span.price").text)๐งช Try It Yourself
Exercise 1 โ List Files with os.listdir
Exercise 2 โ Join Paths with os.path.join
Exercise 3 โ Write and Read a File
If this helped you, consider buying me a coffee โ
Buy me a coffeeWas this page helpful?
Let us know how we did
