diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..b053218 Binary files /dev/null and b/requirements.txt differ diff --git a/scrape_amazon.py b/scrape_amazon.py index 59b4aae..53bd8be 100644 --- a/scrape_amazon.py +++ b/scrape_amazon.py @@ -6,6 +6,7 @@ from selectorlib import Extractor import re import pyperclip from openai import OpenAI + import os @@ -20,12 +21,12 @@ class AmazonScraper: 'https': f'socks5h://{PROXY_HOST}:{PROXY_PORT}' } - HEADERS = { + self.HEADERS = { 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/44.0.2403.157 Safari/537.36', 'Accept-Language': 'en-US, en;q=0.5' } - self.HEADERS = { + HEADERS = { 'authority': 'www.amazon.com', 'pragma': 'no-cache', 'cache-control': 'no-cache', @@ -100,7 +101,9 @@ class AmazonScraper: print(review_data) print(product_info) print(product_info['images'], type(product_info['images'])) + self.images = eval(product_info['images']) + print(self.images) @@ -135,6 +138,7 @@ class AIInterface: base_url='https://api.together.xyz', ) + # client._proxies chat_completion = client.chat.completions.create( messages=[ { diff --git a/webui.py b/webui.py index 86e5296..86d0826 100644 --- a/webui.py +++ b/webui.py @@ -7,17 +7,17 @@ llms = ['meta-llama/Llama-2-70b-chat-hf', "mistralai/Mixtral-8x7B-Instruct-v0.1" scraper = AmazonScraper() aii = AIInterface() -def replace_img_tag(html_text, img_dict): +def replace_img_tag(url, html_text, img_dict): # Regular expression pattern to find tags img_tag_pattern = r'' # Function to replace tag - def replacer(match): + def replacer(match, url=url): img_url = match.group(1) srcset = ', '.join(f'{url} {w}w' for url, (w, h) in img_dict.items()) replacement = f'''
This page includes " + # f"paid Amazon affiliate links
and Include a link to the product {url} at the very end. Also " + # f"include this image {image} after the second paragraph. Format it nicely and professionally in " + # f"HTML. :\n\n") + text + html_content = aii.ask_ai(prompt_for_ai, model=llms[1]) + + prompt_for_ai = (f"Take the following HTML code and slightly modify it. Underneath the title add this tag 'This page includes paid Amazon affiliate links
'. Include a " + f"link {url} to the product at the end. Also include this image {image} after the first or " + f"second paragraph. Return a nice and professional HTML code:\n") + html_content + html_content = aii.ask_ai(prompt_for_ai, model=llms[1]) + html_content = replace_img_tag(url, html_content, scraper.images) print(html_content) return html_content