import os
import requests
from bs4 import BeautifulSoup
Using LLMs to extract structured data from text
def extract_pdf_url(url):
try:
= requests.get(url)
response
response.raise_for_status()except Exception as e:
raise e
= BeautifulSoup(response.text, "html.parser")
soup
= soup.find("a", {"class": "btn btn-primary btn-lg"})["href"]
pdf_url
if pdf_url:
return pdf_url
else:
raise Exception("PDF URL not found")
"https://www.csis.org/programs/strategic-technologies-program/significant-cyber-incidents") extract_pdf_url(
TypeError: 'NoneType' object is not subscriptable