23
app/helpers/clean_email_html.py
Normal file
23
app/helpers/clean_email_html.py
Normal file
@@ -0,0 +1,23 @@
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
def clean_email_html(html_content: str):
|
||||
if not html_content:
|
||||
return ""
|
||||
|
||||
# Parse the HTML
|
||||
soup = BeautifulSoup(html_content, "html.parser")
|
||||
|
||||
# Remove script and style elements (CSS and JS)
|
||||
# This is important! otherwise the AI reads the code as text
|
||||
for script_or_style in soup(["script", "style", "head", "title", "meta"]):
|
||||
script_or_style.decompose()
|
||||
|
||||
# Get text and replace multiple spaces/newlines with a single space
|
||||
text = soup.get_text(separator=' ')
|
||||
|
||||
# Clean up extra whitespace
|
||||
lines = (line.strip() for line in text.splitlines())
|
||||
chunks = (phrase.strip() for line in lines for phrase in line.split(" "))
|
||||
clean_text = ' '.join(chunk for chunk in chunks if chunk)
|
||||
|
||||
return clean_text
|
||||
Reference in New Issue
Block a user