from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
import time
from pdfDownloader.abstract import PaperDownloader
from webdriver_manager.chrome import ChromeDriverManager
import requests, os, shutil
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

class IEEEExploreDownloader(PaperDownloader):

    def __init__(self, driver_manager):
        super().__init__(driver_manager)

    def download(self, url: str, paper_idx: str):
        driver = self.driver_manager.get_driver()

        driver.get(url)

        try:
            pdf_element = driver.find_element(
                By.XPATH, '//a[contains(@class, "xpl-btn-pdf")]'
            )
            pdf_url = pdf_element.get_attribute("href")
            if not pdf_url:
                print("未找到PDF跳转链接")
                return False
        except Exception as e:
            print("无法定位PDF按钮:", e)
            return False

        # driver.get("https://arxiv.org/pdf/2502.18137")
        try:
            driver.get(pdf_url)
        except Exception as e:
            print(f"An unexpected error occurred: {e}")
            return False
        time.sleep(5)


        try:
            iframe = driver.find_element(By.XPATH, '/html/body/iframe')
            pdf_src = iframe.get_attribute('src')
            if not pdf_src:
                print('iframe未发现 PDF src')
                return False
        except Exception as e:
            print('未找到包含PDF的iframe:', e)
            return False

        state = self.trigger_and_save_pdf(driver, pdf_src, self.download_dir, self.saved_dir, paper_idx)
        return state

    def get_author_institution_journal(self, url: str):

        driver = self.driver_manager.get_driver()
        driver.get(url)
        time.sleep(5)
        author_button = driver.find_element(By.LINK_TEXT, "All Authors")
        author_button.click()

        # get title
        title_element = driver.find_element(
            By.XPATH, "//h1[contains(@class, 'document-title')]"
        )
        title = title_element.text
        # get author and institution information
        authors = []
        institutions = set()
        authors_containers = driver.find_elements(
            By.CLASS_NAME, "authors-accordion-container"
        )

        for container in authors_containers:
            text_lines = container.text.split("\n")
            authors.append(text_lines[0])
            # last paragraph is instruction to the author
            if len(text_lines) > 2:
                text_lines = text_lines[:-1]
            for line in text_lines[1:]:
                if line and len(line) < 150:
                    # a naive way to filter out the author's affiliation
                    institutions.add(line)
        institutions = list(institutions)

        # get published journal information
        try:
            journal_div_element = driver.find_element(
                By.XPATH,
                "//div[contains(@class, 'stats-document-abstract-publishedIn')]",
            )
            journal_element = journal_div_element.find_element(By.XPATH, ".//a")
            journal = journal_element.text
        except:
            journal = ""

        print(f"Title: {title}")
        print(f"Authors: {authors}")
        print(f"Institutions: {institutions}")
        print(f"Journal: {journal}")
        return {
            "title": title,
            "authors": authors,
            "institutions": institutions,
            "journal": journal,
            "url": url,
        }



# ProQuest
class ProQuestDownloader(PaperDownloader):
    def __init__(self, driver_manager):
        super().__init__(driver_manager)

    def download(self, url: str, paper_idx: str):
        driver = self.driver_manager.get_driver()
        driver.get(url)
        time.sleep(4)

        try:
            pdf_element = driver.find_element(By.XPATH, '//*[@id="openViewPdf"]')
            pdf_src = pdf_element.get_attribute("src")
            if not pdf_src:
                print("未找到PDF链接")
                return False
        except Exception as e:
            print("无法定位PDF按钮:", e)
            return False

        state = self.trigger_and_save_pdf(driver, pdf_src, self.download_dir, self.saved_dir, paper_idx)
        return state

    def get_author_institution_journal(self, url: str):
        driver = self.driver_manager.get_driver()
        driver.get(url)
        time.sleep(2)
        # 提取信息

        # get title               
        try:
            # ProQuest的标题一般是h1.title 或 h1.document-title
            title_element = driver.find_element(By.XPATH, '//*[@id="documentTitle"]')
            title = title_element.text
        except:
            title = ""

        # get authors
        authors = []
        # 常见情况，作者位于ul.authors中，一个li或a标签
        try:
            authors_containers = driver.find_element(By.XPATH, '//span[@class="auth-tooltip-trigger author-name"]')
            author = authors_containers.text.strip()
        except:
            pass


        print(f"Title: {title}")
        print(f"Authors: {author}")
        return {
            "title": title,
            "authors": authors,
            "institutions": "",    
            "journal": "",
            "url": url,
        }

# springerDownloader
class SpringerDownloader(PaperDownloader):
    def __init__(self, driver_manager):
        super().__init__(driver_manager)

    def download(self, url: str, paper_idx: str):
        driver = self.driver_manager.get_driver()

        driver.get(url)
        time.sleep(4)
        
        try:
            pdf_element = driver.find_element(
                By.XPATH, '//*[@id="main"]/section/div/div/div[1]/div/div/div[1]/a'
            )
            pdf_src = pdf_element.get_attribute("href")
            if not pdf_src:
                print("未找到PDF链接")
                return False
        except Exception as e:
            print("无法定位PDF按钮:", e)
            return False

        state = self.trigger_and_save_pdf(driver, pdf_src, self.download_dir, self.saved_dir, paper_idx)
        
        return state
        
    def get_author_institution_journal(self, url: str):

        driver = self.driver_manager.get_driver()
        driver.get(url)

        # 提取信息
        # get title
        try:
            title_element = driver.find_element(
                By.XPATH, "//h1[contains(@class, 'c-article-title')]"
            )
            title = title_element.text
        except:
            title = ""

        # get author
        authors = []
        authors_containers = driver.find_elements(
            "xpath", '//a[@data-test="author-name"]'
        )

        for container in authors_containers:
            author = container.text
            authors.append(author)

        # get published journal information
        try:
            journal_element = driver.find_element(
                "css selector", '[data-track-label="breadcrumb2"]'
            )
            journal = journal_element.text
        except:
            journal = ""

        print(f"Title: {title}")
        print(f"Authors: {authors}")
        print(f"Journal: {journal}")
        return {
            "title": title,
            "authors": authors,
            "institutions": "",
            "journal": journal,
            "url": url,
        }


class acmDownloader(PaperDownloader):
    def __init__(self, driver_manager):
        super().__init__(driver_manager)

    def download(self, url: str, paper_idx: str):
        driver = self.driver_manager.get_driver()
        driver.get(url)
        time.sleep(4)

        try:
            pdf_element = driver.find_element(By.XPATH, '//*[@id="skip-to-main-content"]/main/article/header/div/div[7]/div[2]/div[4]/a[1]')
            pdf_src = pdf_element.get_attribute("href")
            if not pdf_src:
                print("未找到PDF链接")
                return False
        except Exception as e:
            print("无法定位PDF按钮:", e)
            return False

        state = self.trigger_and_save_pdf(driver, pdf_src, self.download_dir, self.saved_dir, paper_idx)
        return state


    def get_author_institution_journal(self, url: str):
        # time.sleep(30)

        max_attempts = 3
        success = False
        title_element = None

        for attempt in range(max_attempts):
            try:
                driver = self.driver_manager.get_driver()
                driver.get(url)
                title_element = driver.find_element("xpath", '//h1[@property="name"]')
                success = True
                break  # 成功就退出循环
            except Exception as e:
                if attempt == max_attempts - 1:
                    return {
                        "title": "",
                        "authors": "",
                        "institutions": "",
                        "journal": "",
                        "url": "",
                    }

        title = title_element.text
        # get author and institution information
        authors = []
        authors_containers = driver.find_elements("xpath", '//span[@property="author"]')

        for container in authors_containers:
            author = container.text
            authors.append(author)

        # get published journal information
        try:
            journal_div_element = driver.find_element(
                By.XPATH,
                "//div[contains(@class, 'core-self-citation')]",
            )
            journal_element = journal_div_element.find_element(By.XPATH, ".//a")
            journal = journal_element.text
        except:
            journal = ""

        print(f"Title: {title}")
        print(f"Authors: {authors}")
        # print(f"Institutions: {institutions}")
        print(f"Journal: {journal}")
        return {
            "title": title,
            "authors": authors,
            "institutions": "",
            "journal": journal,
            "url": url,
        }


class mdpiDownloader(PaperDownloader):
    def __init__(self, driver_manager):
        super().__init__(driver_manager)

    def download(self, url: str, paper_idx: str):
        driver = self.driver_manager.get_driver()
        driver.get(url)
        time.sleep(4)

        try:
            pdf_element = driver.find_element(By.XPATH, '//*[@id="drop-download-1026596"]/a[1]')
            pdf_src = pdf_element.get_attribute("href")
            if not pdf_src:
                print("未找到PDF链接")
                return False
        except Exception as e:
            print("无法定位PDF按钮:", e)
            return False

        state = self.trigger_and_save_pdf(driver, pdf_src, self.download_dir, self.saved_dir, paper_idx)
        return state

    def get_author_institution_journal(self, url: str):

        driver = self.driver_manager.get_driver()
        driver.get(url)

        # get title
        title_element = driver.find_element(
            By.XPATH,
            "//h1[contains(@class, 'title hypothesis_container')]",
        )
        title = title_element.text

        # get author and institution information
        authors = []
        institutions = set()
        authors_containers = driver.find_element(
            By.XPATH,
            "//div[contains(@class, 'art-authors hypothesis_container')]",
        )

        authors_spans = authors_containers.find_elements(
            By.XPATH,
            "//div[contains(@class, 'profile-card-drop')]",
        )
        for container in authors_spans:
            author = container.text
            authors.append(author)

        institutions_containers = driver.find_elements(
            By.XPATH,
            "//div[contains(@class, 'affiliation-name')]",
        )

        for container in institutions_containers:
            institution = container.text
            if institution == "Author to whom correspondence should be addressed.":
                continue
            institutions.add(institution)
        institutions = list(institutions)

        # get published journal information
        try:
            journal_element_div = driver.find_element(
                By.XPATH,
                "//div[contains(@class, 'bib-identity')]",
            )
            journal_element = journal_element_div.find_element(By.XPATH, ".//em")
            journal = journal_element.text
        except:
            journal = ""

        print(f"Title: {title}")
        print(f"Authors: {authors}")
        print(f"Institutions: {institutions}")
        print(f"Journal: {journal}")
        return {
            "title": title,
            "authors": authors,
            "institutions": institutions,
            "journal": journal,
            "url": url,
        }


class ScienceDirectDownloader(PaperDownloader):
    def __init__(self, driver_manager):
        super().__init__(driver_manager)

    def download(self, url: str, paper_idx: str):
        driver = self.driver_manager.get_driver()
        driver.get(url)
        time.sleep(4)

        try:
            pdf_element = driver.find_element(By.XPATH, '//*[@id="mathjax-container"]/div[1]/div[2]/div/ul/li[1]/a')
            pdf_src = pdf_element.get_attribute("href")
            if not pdf_src:
                print("未找到PDF链接")
                return False
        except Exception as e:
            print("无法定位PDF按钮:", e)
            return False

        state = self.trigger_and_save_pdf(driver, pdf_src, self.download_dir, self.saved_dir, paper_idx)
        return state

    def get_author_institution_journal(self, url: str):

        driver = self.driver_manager.get_driver()
        driver.get(url)

        author_button = driver.find_element(By.ID, "show-more-btn")
        author_button.click()

        # get title
        title_element = driver.find_element(By.ID, "screen-reader-main-title")
        title = title_element.text

        # get author and institution information
        authors = []
        institutions = set()
        authors_containers = driver.find_element(By.ID, "author-group")
        authors_spans = authors_containers.find_elements(
            By.CLASS_NAME, "react-xocs-alternative-link"
        )
        for container in authors_spans:
            author = container.text
            authors.append(author)

        institutions_containers = authors_containers.find_elements(By.TAG_NAME, "dd")
        for container in institutions_containers:
            institution = container.text
            institutions.add(institution)
        institutions = list(institutions)

        # get published journal information
        try:
            journal_element = driver.find_element(
                By.ID,
                "publication-title",
            )
            journal = journal_element.text
        except:
            journal = ""

        print(f"Title: {title}")
        print(f"Authors: {authors}")
        print(f"Institutions: {institutions}")
        print(f"Journal: {journal}")
        return {
            "title": title,
            "authors": authors,
            "institutions": institutions,
            "journal": journal,
            "url": url,
        }


class patentDownloader(PaperDownloader):
    def __init__(self, driver_manager):
        super().__init__(driver_manager)

    def download(self, url: str, paper_idx: str):
        driver = self.driver_manager.get_driver()

        driver.get(url)
        time.sleep(8)
        # 1. 获取PDF href
        try:
            pdf_element = driver.find_element(
                By.XPATH, '//*[@id="wrapper"]/div[1]/div[2]/section/header/div/a'
            )
            pdf_src = pdf_element.get_attribute("href")
            if not pdf_src:
                print("未找到PDF链接")
                return False
        except Exception as e:
            print("无法定位PDF按钮:", e)
            return False

        # 2. 清空下载目录
        self.clear_download_dir(self.download_dir)
        
        # 4. 跳转 iframe 的 PDF 直链，触发下载
        try:
            driver.get(pdf_src)
        except Exception as e:
            print(f"An unexpected error occurred: {e}")
            return False
        # 5. 等待下载完成
        time.sleep(60)
        file_path = None
        files = [f for f in os.listdir(self.download_dir) if f.endswith('.pdf')]
        if files:
            file_path = os.path.join(self.download_dir, files[0])
            file_name = files[0]

        # 6. 文件重命名
        if file_path == None:
            print("下载超时或失败" )
            return False
        save_path = os.path.join(self.saved_dir, paper_idx+"-" + file_name)
        try:
            shutil.move(file_path, save_path)
            print("[✓] PDF saved:", save_path)
            return True
        except Exception as e:
            print("重命名或保存失败:", e)
            return False

    def get_author_institution_journal(self, url: str):

        driver = self.driver_manager.get_driver()
        driver.get(url)
        time.sleep(8)
        # get title
        title_element = driver.find_element(By.XPATH, '//h1[@id="title"]')
        title = title_element.text
        # get author and institution information
        authors_institution = []
        authors_container = driver.find_element(
            By.XPATH,
            "//dl[contains(@class, 'important-people style-scope patent-result')]",
        )
        authors_spans = authors_container.find_elements(
            By.XPATH,
            ".//dt[contains(@class, 'style-scope patent-result')] | .//dd[contains(@class, 'style-scope patent-result')]",
        )
        for idx, container in enumerate(authors_spans):
            author = container.text
            if author == "Current Assignee":
                split_idx = idx
            authors_institution.append(author)
        authors = authors_institution[1:split_idx]
        institutions = authors_institution[split_idx + 1 :]

        print(f"Title: {title}")
        print(f"Authors: {authors}")
        print(f"Institutions: {institutions}")
        return {
            "title": title,
            "authors": authors,
            "institutions": institutions,
            "journal": "",
            "url": url,
        }


class ArxivDownloader(PaperDownloader):
    def __init__(self, driver_manager):
        super().__init__(driver_manager)

    def download(self, url: str):
        pass

    def get_author_institution_journal(self, url: str):

        driver = self.driver_manager.get_driver()
        driver.get(url)

        # get title
        title_element = driver.find_element(By.XPATH, "//h1[@class='title mathjax']")
        title = title_element.text

        # get author and institution information
        authors = []
        authors_container = driver.find_element(
            By.XPATH,
            "//div[contains(@class, 'authors')]",
        )
        authors_spans = authors_container.find_elements(By.TAG_NAME, "a")

        for idx, container in enumerate(authors_spans):
            author = container.text

            authors.append(author)

        print(f"Title: {title}")
        print(f"Authors: {authors}")
        return {
            "title": title,
            "authors": authors,
            "institutions": "",
            "journal": "",
            "url": url,
        }
