import os
import requests
from bs4 import BeautifulSoupUsing LLMs to extract structured data from text
def extract_pdf_url(url):
try:
response = requests.get(url)
response.raise_for_status()
except Exception as e:
raise e
soup = BeautifulSoup(response.text, "html.parser")
pdf_url = soup.find("a", {"class": "btn btn-primary btn-lg"})["href"]
if pdf_url:
return pdf_url
else:
raise Exception("PDF URL not found")extract_pdf_url("https://www.csis.org/programs/strategic-technologies-program/significant-cyber-incidents")TypeError: 'NoneType' object is not subscriptable