ai-news-blogger / utils /blog_writer.py
KavinduHansaka's picture
Upload blog_writer.py
94f62dd verified
import os
import tempfile
from gtts import gTTS
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
model_id = "meta-llama/Llama-3.2-3B-Instruct"
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(
model_id,
torch_dtype="auto",
device_map="auto"
)
pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)
def format_prompt(system, user):
return f"""<|begin_of_text|><|start_header_id|>system<|end_header_id|>
{system}
<|start_header_id|>user<|end_header_id|>
{user}<|start_header_id|>assistant<|end_header_id|>
"""
def summarize_news(summary):
prompt = format_prompt("You are a professional summarizer.", f"Summarize this news in 2 sentences:\n{summary}")
return pipe(prompt, max_new_tokens=100, return_full_text=False)[0]["generated_text"].strip()
def generate_tags(title, summary):
prompt = format_prompt("You generate SEO hashtags.", f"Suggest 5 SEO hashtags for a blog about '{title}'. Summary: {summary}")
return pipe(prompt, max_new_tokens=30, return_full_text=False)[0]["generated_text"].strip()
def convert_to_audio(text):
tts = gTTS(text)
path = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3").name
tts.save(path)
return path
def fact_check_blog(blog_text):
prompt = format_prompt("You are an AI fact checker.", f"""
Fact check this blog and identify inaccuracies:
{blog_text}
""")
return pipe(prompt, max_new_tokens=250, return_full_text=False)[0]["generated_text"].strip()
def chunked_generation(prompt: str, max_tokens_per_chunk: int = 500, total_tokens: int = 1600) -> str:
collected = ""
current_prompt = prompt
for _ in range(0, total_tokens, max_tokens_per_chunk):
result = pipe(current_prompt, max_new_tokens=max_tokens_per_chunk, return_full_text=False)[0]["generated_text"].strip()
if not result:
break
collected += result + "\n"
current_prompt = prompt + collected
return collected.strip()
def generate_blog_with_tone(title: str, summary: str, tone: str) -> tuple:
summarized = summarize_news(summary)
prompt = format_prompt(f"You are a {tone} blog writer.", f"""
Write a 250-word blog post on this news, starting with a hook.
Use markdown formatting and end with 3 SEO hashtags.
Title: {title}
Summary: {summarized}
""")
return chunked_generation(prompt, 400, 800), summarized
def generate_long_blog_with_tone(title: str, summary: str, tone: str) -> str:
summarized = summarize_news(summary)
prompt = format_prompt(f"You are a {tone} blog writer.", f"""
Write a 600-word blog post on this news, including an engaging introduction, detailed body, and conclusion.
Use markdown formatting and end with 5 SEO hashtags.
Title: {title}
Summary: {summarized}
""")
return chunked_generation(prompt, 500, 1600)
def extend_single_blog(title: str, short_blog: str) -> str:
prompt = format_prompt("You are a professional content editor.", f"""
Take the following short blog post and extend it into a full-length version (~600 words) with detailed insights, examples, and storytelling.
Title: {title}
Original Blog:
{short_blog}
""")
return chunked_generation(prompt, 500, 1600)