From 0dc51333380ed78c9415b21f71848dc5ba035041 Mon Sep 17 00:00:00 2001
From: erik aronesty <erik@q32.com>
Date: Mon, 18 Nov 2024 11:43:32 -0800
Subject: [PATCH 1/8] Apply patch via automation

---
 linkedin_scraper/person.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/linkedin_scraper/person.py b/linkedin_scraper/person.py
index e7ad7e4..04b5c7c 100644
--- a/linkedin_scraper/person.py
+++ b/linkedin_scraper/person.py
@@ -116,7 +116,7 @@ def get_experiences(self):
         main_list = self.wait_for_element_to_load(name="pvs-list__container", base=main)
         for position in main_list.find_elements(By.CLASS_NAME, "pvs-list__paged-list-item"):
             position = position.find_element(By.CSS_SELECTOR, "div[data-view-name='profile-component-entity']")
-            company_logo_elem, position_details = position.find_elements(By.XPATH, "*")
+            company_logo_elem, position_details, *_  = position.find_elements(By.XPATH, "*")
 
             # company elem
             company_linkedin_url = company_logo_elem.find_element(By.XPATH,"*").get_attribute("href")
@@ -216,7 +216,7 @@ def get_educations(self):
         main_list = self.wait_for_element_to_load(name="pvs-list__container", base=main)
         for position in main_list.find_elements(By.CLASS_NAME,"pvs-list__paged-list-item"):
             position = position.find_element(By.XPATH,"//div[@data-view-name='profile-component-entity']")
-            institution_logo_elem, position_details = position.find_elements(By.XPATH,"*")
+            institution_logo_elem, position_details, *_ = position.find_elements(By.XPATH,"*")
 
             # company elem
             institution_linkedin_url = institution_logo_elem.find_element(By.XPATH,"*").get_attribute("href")

From a60754890e834f42f3a737fcf151746215a2caa5 Mon Sep 17 00:00:00 2001
From: erik aronesty <erik@q32.com>
Date: Mon, 18 Nov 2024 11:45:39 -0800
Subject: [PATCH 2/8] Apply patch via automation

---
 linkedin_scraper/person.py      |   4 +-
 linkedin_scraper/person.py.orig | 411 ++++++++++++++++++++++++++++++++
 2 files changed, 413 insertions(+), 2 deletions(-)
 create mode 100644 linkedin_scraper/person.py.orig

diff --git a/linkedin_scraper/person.py b/linkedin_scraper/person.py
index 04b5c7c..e7ad7e4 100644
--- a/linkedin_scraper/person.py
+++ b/linkedin_scraper/person.py
@@ -116,7 +116,7 @@ def get_experiences(self):
         main_list = self.wait_for_element_to_load(name="pvs-list__container", base=main)
         for position in main_list.find_elements(By.CLASS_NAME, "pvs-list__paged-list-item"):
             position = position.find_element(By.CSS_SELECTOR, "div[data-view-name='profile-component-entity']")
-            company_logo_elem, position_details, *_  = position.find_elements(By.XPATH, "*")
+            company_logo_elem, position_details = position.find_elements(By.XPATH, "*")
 
             # company elem
             company_linkedin_url = company_logo_elem.find_element(By.XPATH,"*").get_attribute("href")
@@ -216,7 +216,7 @@ def get_educations(self):
         main_list = self.wait_for_element_to_load(name="pvs-list__container", base=main)
         for position in main_list.find_elements(By.CLASS_NAME,"pvs-list__paged-list-item"):
             position = position.find_element(By.XPATH,"//div[@data-view-name='profile-component-entity']")
-            institution_logo_elem, position_details, *_ = position.find_elements(By.XPATH,"*")
+            institution_logo_elem, position_details = position.find_elements(By.XPATH,"*")
 
             # company elem
             institution_linkedin_url = institution_logo_elem.find_element(By.XPATH,"*").get_attribute("href")
diff --git a/linkedin_scraper/person.py.orig b/linkedin_scraper/person.py.orig
new file mode 100644
index 0000000..04b5c7c
--- /dev/null
+++ b/linkedin_scraper/person.py.orig
@@ -0,0 +1,411 @@
+import requests
+from selenium import webdriver
+from selenium.webdriver.common.by import By
+from selenium.webdriver.support.ui import WebDriverWait
+from selenium.webdriver.support import expected_conditions as EC
+from selenium.common.exceptions import NoSuchElementException
+from .objects import Experience, Education, Scraper, Interest, Accomplishment, Contact
+import os
+from linkedin_scraper import selectors
+
+
+class Person(Scraper):
+
+    __TOP_CARD = "scaffold-layout__main"
+    __WAIT_FOR_ELEMENT_TIMEOUT = 5
+
+    def __init__(
+        self,
+        linkedin_url=None,
+        name=None,
+        about=None,
+        experiences=None,
+        educations=None,
+        interests=None,
+        accomplishments=None,
+        company=None,
+        job_title=None,
+        contacts=None,
+        driver=None,
+        get=True,
+        scrape=True,
+        close_on_complete=True,
+        time_to_wait_after_login=0,
+    ):
+        self.linkedin_url = linkedin_url
+        self.name = name
+        self.about = about or []
+        self.experiences = experiences or []
+        self.educations = educations or []
+        self.interests = interests or []
+        self.accomplishments = accomplishments or []
+        self.also_viewed_urls = []
+        self.contacts = contacts or []
+
+        if driver is None:
+            try:
+                if os.getenv("CHROMEDRIVER") == None:
+                    driver_path = os.path.join(
+                        os.path.dirname(__file__), "drivers/chromedriver"
+                    )
+                else:
+                    driver_path = os.getenv("CHROMEDRIVER")
+
+                driver = webdriver.Chrome(driver_path)
+            except:
+                driver = webdriver.Chrome()
+
+        if get:
+            driver.get(linkedin_url)
+
+        self.driver = driver
+
+        if scrape:
+            self.scrape(close_on_complete)
+
+    def add_about(self, about):
+        self.about.append(about)
+
+    def add_experience(self, experience):
+        self.experiences.append(experience)
+
+    def add_education(self, education):
+        self.educations.append(education)
+
+    def add_interest(self, interest):
+        self.interests.append(interest)
+
+    def add_accomplishment(self, accomplishment):
+        self.accomplishments.append(accomplishment)
+
+    def add_location(self, location):
+        self.location = location
+
+    def add_contact(self, contact):
+        self.contacts.append(contact)
+
+    def scrape(self, close_on_complete=True):
+        if self.is_signed_in():
+            self.scrape_logged_in(close_on_complete=close_on_complete)
+        else:
+            print("you are not logged in!")
+
+    def _click_see_more_by_class_name(self, class_name):
+        try:
+            _ = WebDriverWait(self.driver, self.__WAIT_FOR_ELEMENT_TIMEOUT).until(
+                EC.presence_of_element_located((By.CLASS_NAME, class_name))
+            )
+            div = self.driver.find_element(By.CLASS_NAME, class_name)
+            div.find_element(By.TAG_NAME, "button").click()
+        except Exception as e:
+            pass
+
+    def is_open_to_work(self):
+        try:
+            return "#OPEN_TO_WORK" in self.driver.find_element(By.CLASS_NAME,"pv-top-card-profile-picture").find_element(By.TAG_NAME,"img").get_attribute("title")
+        except:
+            return False
+
+    def get_experiences(self):
+        url = os.path.join(self.linkedin_url, "details/experience")
+        self.driver.get(url)
+        self.focus()
+        main = self.wait_for_element_to_load(by=By.TAG_NAME, name="main")
+        self.scroll_to_half()
+        self.scroll_to_bottom()
+        main_list = self.wait_for_element_to_load(name="pvs-list__container", base=main)
+        for position in main_list.find_elements(By.CLASS_NAME, "pvs-list__paged-list-item"):
+            position = position.find_element(By.CSS_SELECTOR, "div[data-view-name='profile-component-entity']")
+            company_logo_elem, position_details, *_  = position.find_elements(By.XPATH, "*")
+
+            # company elem
+            company_linkedin_url = company_logo_elem.find_element(By.XPATH,"*").get_attribute("href")
+            if not company_linkedin_url:
+                continue
+
+            # position details
+            position_details_list = position_details.find_elements(By.XPATH,"*")
+            position_summary_details = position_details_list[0] if len(position_details_list) > 0 else None
+            position_summary_text = position_details_list[1] if len(position_details_list) > 1 else None
+            outer_positions = position_summary_details.find_element(By.XPATH,"*").find_elements(By.XPATH,"*")
+
+            if len(outer_positions) == 4:
+                position_title = outer_positions[0].find_element(By.TAG_NAME,"span").text
+                company = outer_positions[1].find_element(By.TAG_NAME,"span").text
+                work_times = outer_positions[2].find_element(By.TAG_NAME,"span").text
+                location = outer_positions[3].find_element(By.TAG_NAME,"span").text
+            elif len(outer_positions) == 3:
+                if "·" in outer_positions[2].text:
+                    position_title = outer_positions[0].find_element(By.TAG_NAME,"span").text
+                    company = outer_positions[1].find_element(By.TAG_NAME,"span").text
+                    work_times = outer_positions[2].find_element(By.TAG_NAME,"span").text
+                    location = ""
+                else:
+                    position_title = ""
+                    company = outer_positions[0].find_element(By.TAG_NAME,"span").text
+                    work_times = outer_positions[1].find_element(By.TAG_NAME,"span").text
+                    location = outer_positions[2].find_element(By.TAG_NAME,"span").text
+            else:
+                position_title = ""
+                company = outer_positions[0].find_element(By.TAG_NAME,"span").text
+                work_times = ""
+                location = ""
+
+
+            times = work_times.split("·")[0].strip() if work_times else ""
+            duration = work_times.split("·")[1].strip() if len(work_times.split("·")) > 1 else None
+
+            from_date = " ".join(times.split(" ")[:2]) if times else ""
+            to_date = " ".join(times.split(" ")[3:]) if times else ""
+            if position_summary_text and any(element.get_attribute("pvs-list__container") for element in position_summary_text.find_elements(By.TAG_NAME, "*")):
+                inner_positions = (position_summary_text.find_element(By.CLASS_NAME,"pvs-list__container")
+                                  .find_element(By.XPATH,"*").find_element(By.XPATH,"*").find_element(By.XPATH,"*")
+                                  .find_elements(By.CLASS_NAME,"pvs-list__paged-list-item"))
+            else:
+                inner_positions = []
+            if len(inner_positions) > 1:
+                descriptions = inner_positions
+                for description in descriptions:
+                    res = description.find_element(By.TAG_NAME,"a").find_elements(By.XPATH,"*")
+                    position_title_elem = res[0] if len(res) > 0 else None
+                    work_times_elem = res[1] if len(res) > 1 else None
+                    location_elem = res[2] if len(res) > 2 else None
+
+
+                    location = location_elem.find_element(By.XPATH,"*").text if location_elem else None
+                    position_title = position_title_elem.find_element(By.XPATH,"*").find_element(By.TAG_NAME,"*").text if position_title_elem else ""
+                    work_times = work_times_elem.find_element(By.XPATH,"*").text if work_times_elem else ""
+                    times = work_times.split("·")[0].strip() if work_times else ""
+                    duration = work_times.split("·")[1].strip() if len(work_times.split("·")) > 1 else None
+                    from_date = " ".join(times.split(" ")[:2]) if times else ""
+                    to_date = " ".join(times.split(" ")[3:]) if times else ""
+
+                    experience = Experience(
+                        position_title=position_title,
+                        from_date=from_date,
+                        to_date=to_date,
+                        duration=duration,
+                        location=location,
+                        description=description,
+                        institution_name=company,
+                        linkedin_url=company_linkedin_url
+                    )
+                    self.add_experience(experience)
+            else:
+                description = position_summary_text.text if position_summary_text else ""
+
+                experience = Experience(
+                    position_title=position_title,
+                    from_date=from_date,
+                    to_date=to_date,
+                    duration=duration,
+                    location=location,
+                    description=description,
+                    institution_name=company,
+                    linkedin_url=company_linkedin_url
+                )
+                self.add_experience(experience)
+
+    def get_educations(self):
+        url = os.path.join(self.linkedin_url, "details/education")
+        self.driver.get(url)
+        self.focus()
+        main = self.wait_for_element_to_load(by=By.TAG_NAME, name="main")
+        self.scroll_to_half()
+        self.scroll_to_bottom()
+        main_list = self.wait_for_element_to_load(name="pvs-list__container", base=main)
+        for position in main_list.find_elements(By.CLASS_NAME,"pvs-list__paged-list-item"):
+            position = position.find_element(By.XPATH,"//div[@data-view-name='profile-component-entity']")
+            institution_logo_elem, position_details, *_ = position.find_elements(By.XPATH,"*")
+
+            # company elem
+            institution_linkedin_url = institution_logo_elem.find_element(By.XPATH,"*").get_attribute("href")
+
+            # position details
+            position_details_list = position_details.find_elements(By.XPATH,"*")
+            position_summary_details = position_details_list[0] if len(position_details_list) > 0 else None
+            position_summary_text = position_details_list[1] if len(position_details_list) > 1 else None
+            outer_positions = position_summary_details.find_element(By.XPATH,"*").find_elements(By.XPATH,"*")
+
+            institution_name = outer_positions[0].find_element(By.TAG_NAME,"span").text
+            if len(outer_positions) > 1:
+                degree = outer_positions[1].find_element(By.TAG_NAME,"span").text
+            else:
+                degree = None
+
+            if len(outer_positions) > 2:
+                times = outer_positions[2].find_element(By.TAG_NAME,"span").text
+
+                if times != "":
+                    from_date = times.split(" ")[times.split(" ").index("-")-1] if len(times.split(" "))>3 else times.split(" ")[0]
+                    to_date = times.split(" ")[-1]
+            else:
+                from_date = None
+                to_date = None
+
+
+
+            description = position_summary_text.text if position_summary_text else ""
+
+            education = Education(
+                from_date=from_date,
+                to_date=to_date,
+                description=description,
+                degree=degree,
+                institution_name=institution_name,
+                linkedin_url=institution_linkedin_url
+            )
+            self.add_education(education)
+
+    def get_name_and_location(self):
+        top_panel = self.driver.find_element(By.XPATH, "//*[@class='mt2 relative']")
+        self.name = top_panel.find_element(By.TAG_NAME, "h1").text
+        self.location = top_panel.find_element(By.XPATH, "//*[@class='text-body-small inline t-black--light break-words']").text
+
+    def get_about(self):
+        try:
+            about = self.driver.find_element(By.ID,"about").find_element(By.XPATH,"..").find_element(By.CLASS_NAME,"display-flex").text
+        except NoSuchElementException :
+            about=None
+        self.about = about
+
+    def scrape_logged_in(self, close_on_complete=True):
+        driver = self.driver
+        duration = None
+
+        root = WebDriverWait(driver, self.__WAIT_FOR_ELEMENT_TIMEOUT).until(
+            EC.presence_of_element_located(
+                (
+                    By.CLASS_NAME,
+                    self.__TOP_CARD,
+                )
+            )
+        )
+        self.focus()
+        self.wait(5)
+
+        # get name and location
+        self.get_name_and_location()
+
+        self.open_to_work = self.is_open_to_work()
+
+        # get about
+        self.get_about()
+        driver.execute_script(
+            "window.scrollTo(0, Math.ceil(document.body.scrollHeight/2));"
+        )
+        driver.execute_script(
+            "window.scrollTo(0, Math.ceil(document.body.scrollHeight/1.5));"
+        )
+
+        # get experience
+        self.get_experiences()
+
+        # get education
+        self.get_educations()
+
+        driver.get(self.linkedin_url)
+
+        # get interest
+        try:
+
+            _ = WebDriverWait(driver, self.__WAIT_FOR_ELEMENT_TIMEOUT).until(
+                EC.presence_of_element_located(
+                    (
+                        By.XPATH,
+                        "//*[@class='pv-profile-section pv-interests-section artdeco-container-card artdeco-card ember-view']",
+                    )
+                )
+            )
+            interestContainer = driver.find_element(By.XPATH,
+                "//*[@class='pv-profile-section pv-interests-section artdeco-container-card artdeco-card ember-view']"
+            )
+            for interestElement in interestContainer.find_elements(By.XPATH,
+                "//*[@class='pv-interest-entity pv-profile-section__card-item ember-view']"
+            ):
+                interest = Interest(
+                    interestElement.find_element(By.TAG_NAME, "h3").text.strip()
+                )
+                self.add_interest(interest)
+        except:
+            pass
+
+        # get accomplishment
+        try:
+            _ = WebDriverWait(driver, self.__WAIT_FOR_ELEMENT_TIMEOUT).until(
+                EC.presence_of_element_located(
+                    (
+                        By.XPATH,
+                        "//*[@class='pv-profile-section pv-accomplishments-section artdeco-container-card artdeco-card ember-view']",
+                    )
+                )
+            )
+            acc = driver.find_element(By.XPATH,
+                "//*[@class='pv-profile-section pv-accomplishments-section artdeco-container-card artdeco-card ember-view']"
+            )
+            for block in acc.find_elements(By.XPATH,
+                "//div[@class='pv-accomplishments-block__content break-words']"
+            ):
+                category = block.find_element(By.TAG_NAME, "h3")
+                for title in block.find_element(By.TAG_NAME,
+                    "ul"
+                ).find_elements(By.TAG_NAME, "li"):
+                    accomplishment = Accomplishment(category.text, title.text)
+                    self.add_accomplishment(accomplishment)
+        except:
+            pass
+
+        # get connections
+        try:
+            driver.get("https://www.linkedin.com/mynetwork/invite-connect/connections/")
+            _ = WebDriverWait(driver, self.__WAIT_FOR_ELEMENT_TIMEOUT).until(
+                EC.presence_of_element_located((By.CLASS_NAME, "mn-connections"))
+            )
+            connections = driver.find_element(By.CLASS_NAME, "mn-connections")
+            if connections is not None:
+                for conn in connections.find_elements(By.CLASS_NAME, "mn-connection-card"):
+                    anchor = conn.find_element(By.CLASS_NAME, "mn-connection-card__link")
+                    url = anchor.get_attribute("href")
+                    name = conn.find_element(By.CLASS_NAME, "mn-connection-card__details").find_element(By.CLASS_NAME, "mn-connection-card__name").text.strip()
+                    occupation = conn.find_element(By.CLASS_NAME, "mn-connection-card__details").find_element(By.CLASS_NAME, "mn-connection-card__occupation").text.strip()
+
+                    contact = Contact(name=name, occupation=occupation, url=url)
+                    self.add_contact(contact)
+        except:
+            connections = None
+
+        if close_on_complete:
+            driver.quit()
+
+    @property
+    def company(self):
+        if self.experiences:
+            return (
+                self.experiences[0].institution_name
+                if self.experiences[0].institution_name
+                else None
+            )
+        else:
+            return None
+
+    @property
+    def job_title(self):
+        if self.experiences:
+            return (
+                self.experiences[0].position_title
+                if self.experiences[0].position_title
+                else None
+            )
+        else:
+            return None
+
+    def __repr__(self):
+        return "<Person {name}\n\nAbout\n{about}\n\nExperience\n{exp}\n\nEducation\n{edu}\n\nInterest\n{int}\n\nAccomplishments\n{acc}\n\nContacts\n{conn}>".format(
+            name=self.name,
+            about=self.about,
+            exp=self.experiences,
+            edu=self.educations,
+            int=self.interests,
+            acc=self.accomplishments,
+            conn=self.contacts,
+        )

From 311cb81624900e5fdac0d6f5145190cc2ff291c9 Mon Sep 17 00:00:00 2001
From: erik aronesty <erik@q32.com>
Date: Mon, 18 Nov 2024 12:01:16 -0800
Subject: [PATCH 3/8] Apply patch via automation

---
 linkedin_scraper/person.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/linkedin_scraper/person.py b/linkedin_scraper/person.py
index e7ad7e4..04b5c7c 100644
--- a/linkedin_scraper/person.py
+++ b/linkedin_scraper/person.py
@@ -116,7 +116,7 @@ def get_experiences(self):
         main_list = self.wait_for_element_to_load(name="pvs-list__container", base=main)
         for position in main_list.find_elements(By.CLASS_NAME, "pvs-list__paged-list-item"):
             position = position.find_element(By.CSS_SELECTOR, "div[data-view-name='profile-component-entity']")
-            company_logo_elem, position_details = position.find_elements(By.XPATH, "*")
+            company_logo_elem, position_details, *_  = position.find_elements(By.XPATH, "*")
 
             # company elem
             company_linkedin_url = company_logo_elem.find_element(By.XPATH,"*").get_attribute("href")
@@ -216,7 +216,7 @@ def get_educations(self):
         main_list = self.wait_for_element_to_load(name="pvs-list__container", base=main)
         for position in main_list.find_elements(By.CLASS_NAME,"pvs-list__paged-list-item"):
             position = position.find_element(By.XPATH,"//div[@data-view-name='profile-component-entity']")
-            institution_logo_elem, position_details = position.find_elements(By.XPATH,"*")
+            institution_logo_elem, position_details, *_ = position.find_elements(By.XPATH,"*")
 
             # company elem
             institution_linkedin_url = institution_logo_elem.find_element(By.XPATH,"*").get_attribute("href")

From f580573d216ea97c88dd880af95c204646622602 Mon Sep 17 00:00:00 2001
From: erik aronesty <erik@q32.com>
Date: Mon, 18 Nov 2024 12:02:43 -0800
Subject: [PATCH 4/8] Apply patch via automation

---
 linkedin_scraper/person.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/linkedin_scraper/person.py b/linkedin_scraper/person.py
index 04b5c7c..e7ad7e4 100644
--- a/linkedin_scraper/person.py
+++ b/linkedin_scraper/person.py
@@ -116,7 +116,7 @@ def get_experiences(self):
         main_list = self.wait_for_element_to_load(name="pvs-list__container", base=main)
         for position in main_list.find_elements(By.CLASS_NAME, "pvs-list__paged-list-item"):
             position = position.find_element(By.CSS_SELECTOR, "div[data-view-name='profile-component-entity']")
-            company_logo_elem, position_details, *_  = position.find_elements(By.XPATH, "*")
+            company_logo_elem, position_details = position.find_elements(By.XPATH, "*")
 
             # company elem
             company_linkedin_url = company_logo_elem.find_element(By.XPATH,"*").get_attribute("href")
@@ -216,7 +216,7 @@ def get_educations(self):
         main_list = self.wait_for_element_to_load(name="pvs-list__container", base=main)
         for position in main_list.find_elements(By.CLASS_NAME,"pvs-list__paged-list-item"):
             position = position.find_element(By.XPATH,"//div[@data-view-name='profile-component-entity']")
-            institution_logo_elem, position_details, *_ = position.find_elements(By.XPATH,"*")
+            institution_logo_elem, position_details = position.find_elements(By.XPATH,"*")
 
             # company elem
             institution_linkedin_url = institution_logo_elem.find_element(By.XPATH,"*").get_attribute("href")

From ad4787d4f33aee560eb59386fb06a54784a564de Mon Sep 17 00:00:00 2001
From: erik aronesty <erik@q32.com>
Date: Mon, 18 Nov 2024 12:03:23 -0800
Subject: [PATCH 5/8] Apply patch via automation

---
 linkedin_scraper/person.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/linkedin_scraper/person.py b/linkedin_scraper/person.py
index e7ad7e4..04b5c7c 100644
--- a/linkedin_scraper/person.py
+++ b/linkedin_scraper/person.py
@@ -116,7 +116,7 @@ def get_experiences(self):
         main_list = self.wait_for_element_to_load(name="pvs-list__container", base=main)
         for position in main_list.find_elements(By.CLASS_NAME, "pvs-list__paged-list-item"):
             position = position.find_element(By.CSS_SELECTOR, "div[data-view-name='profile-component-entity']")
-            company_logo_elem, position_details = position.find_elements(By.XPATH, "*")
+            company_logo_elem, position_details, *_  = position.find_elements(By.XPATH, "*")
 
             # company elem
             company_linkedin_url = company_logo_elem.find_element(By.XPATH,"*").get_attribute("href")
@@ -216,7 +216,7 @@ def get_educations(self):
         main_list = self.wait_for_element_to_load(name="pvs-list__container", base=main)
         for position in main_list.find_elements(By.CLASS_NAME,"pvs-list__paged-list-item"):
             position = position.find_element(By.XPATH,"//div[@data-view-name='profile-component-entity']")
-            institution_logo_elem, position_details = position.find_elements(By.XPATH,"*")
+            institution_logo_elem, position_details, *_ = position.find_elements(By.XPATH,"*")
 
             # company elem
             institution_linkedin_url = institution_logo_elem.find_element(By.XPATH,"*").get_attribute("href")

From 8287b7599b8e3b39be50fd2acd785ea739fef6c9 Mon Sep 17 00:00:00 2001
From: erik aronesty <erik@q32.com>
Date: Mon, 18 Nov 2024 12:08:14 -0800
Subject: [PATCH 6/8] Apply patch via automation

---
 linkedin_scraper/person.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/linkedin_scraper/person.py b/linkedin_scraper/person.py
index 04b5c7c..e7ad7e4 100644
--- a/linkedin_scraper/person.py
+++ b/linkedin_scraper/person.py
@@ -116,7 +116,7 @@ def get_experiences(self):
         main_list = self.wait_for_element_to_load(name="pvs-list__container", base=main)
         for position in main_list.find_elements(By.CLASS_NAME, "pvs-list__paged-list-item"):
             position = position.find_element(By.CSS_SELECTOR, "div[data-view-name='profile-component-entity']")
-            company_logo_elem, position_details, *_  = position.find_elements(By.XPATH, "*")
+            company_logo_elem, position_details = position.find_elements(By.XPATH, "*")
 
             # company elem
             company_linkedin_url = company_logo_elem.find_element(By.XPATH,"*").get_attribute("href")
@@ -216,7 +216,7 @@ def get_educations(self):
         main_list = self.wait_for_element_to_load(name="pvs-list__container", base=main)
         for position in main_list.find_elements(By.CLASS_NAME,"pvs-list__paged-list-item"):
             position = position.find_element(By.XPATH,"//div[@data-view-name='profile-component-entity']")
-            institution_logo_elem, position_details, *_ = position.find_elements(By.XPATH,"*")
+            institution_logo_elem, position_details = position.find_elements(By.XPATH,"*")
 
             # company elem
             institution_linkedin_url = institution_logo_elem.find_element(By.XPATH,"*").get_attribute("href")

From 3218475eab09e09c236269661155aad95ad70d8b Mon Sep 17 00:00:00 2001
From: erik aronesty <erik@q32.com>
Date: Mon, 18 Nov 2024 12:08:28 -0800
Subject: [PATCH 7/8] Apply patch via automation

---
 linkedin_scraper/person.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/linkedin_scraper/person.py b/linkedin_scraper/person.py
index e7ad7e4..04b5c7c 100644
--- a/linkedin_scraper/person.py
+++ b/linkedin_scraper/person.py
@@ -116,7 +116,7 @@ def get_experiences(self):
         main_list = self.wait_for_element_to_load(name="pvs-list__container", base=main)
         for position in main_list.find_elements(By.CLASS_NAME, "pvs-list__paged-list-item"):
             position = position.find_element(By.CSS_SELECTOR, "div[data-view-name='profile-component-entity']")
-            company_logo_elem, position_details = position.find_elements(By.XPATH, "*")
+            company_logo_elem, position_details, *_  = position.find_elements(By.XPATH, "*")
 
             # company elem
             company_linkedin_url = company_logo_elem.find_element(By.XPATH,"*").get_attribute("href")
@@ -216,7 +216,7 @@ def get_educations(self):
         main_list = self.wait_for_element_to_load(name="pvs-list__container", base=main)
         for position in main_list.find_elements(By.CLASS_NAME,"pvs-list__paged-list-item"):
             position = position.find_element(By.XPATH,"//div[@data-view-name='profile-component-entity']")
-            institution_logo_elem, position_details = position.find_elements(By.XPATH,"*")
+            institution_logo_elem, position_details, *_ = position.find_elements(By.XPATH,"*")
 
             # company elem
             institution_linkedin_url = institution_logo_elem.find_element(By.XPATH,"*").get_attribute("href")

From 3bf9816818307967a0af44b70fbd14e6aa07bcf2 Mon Sep 17 00:00:00 2001
From: earonesty <erik@q32.com>
Date: Thu, 8 May 2025 12:54:29 -0700
Subject: [PATCH 8/8] Delete linkedin_scraper/person.py.orig

---
 linkedin_scraper/person.py.orig | 411 --------------------------------
 1 file changed, 411 deletions(-)
 delete mode 100644 linkedin_scraper/person.py.orig

diff --git a/linkedin_scraper/person.py.orig b/linkedin_scraper/person.py.orig
deleted file mode 100644
index 04b5c7c..0000000
--- a/linkedin_scraper/person.py.orig
+++ /dev/null
@@ -1,411 +0,0 @@
-import requests
-from selenium import webdriver
-from selenium.webdriver.common.by import By
-from selenium.webdriver.support.ui import WebDriverWait
-from selenium.webdriver.support import expected_conditions as EC
-from selenium.common.exceptions import NoSuchElementException
-from .objects import Experience, Education, Scraper, Interest, Accomplishment, Contact
-import os
-from linkedin_scraper import selectors
-
-
-class Person(Scraper):
-
-    __TOP_CARD = "scaffold-layout__main"
-    __WAIT_FOR_ELEMENT_TIMEOUT = 5
-
-    def __init__(
-        self,
-        linkedin_url=None,
-        name=None,
-        about=None,
-        experiences=None,
-        educations=None,
-        interests=None,
-        accomplishments=None,
-        company=None,
-        job_title=None,
-        contacts=None,
-        driver=None,
-        get=True,
-        scrape=True,
-        close_on_complete=True,
-        time_to_wait_after_login=0,
-    ):
-        self.linkedin_url = linkedin_url
-        self.name = name
-        self.about = about or []
-        self.experiences = experiences or []
-        self.educations = educations or []
-        self.interests = interests or []
-        self.accomplishments = accomplishments or []
-        self.also_viewed_urls = []
-        self.contacts = contacts or []
-
-        if driver is None:
-            try:
-                if os.getenv("CHROMEDRIVER") == None:
-                    driver_path = os.path.join(
-                        os.path.dirname(__file__), "drivers/chromedriver"
-                    )
-                else:
-                    driver_path = os.getenv("CHROMEDRIVER")
-
-                driver = webdriver.Chrome(driver_path)
-            except:
-                driver = webdriver.Chrome()
-
-        if get:
-            driver.get(linkedin_url)
-
-        self.driver = driver
-
-        if scrape:
-            self.scrape(close_on_complete)
-
-    def add_about(self, about):
-        self.about.append(about)
-
-    def add_experience(self, experience):
-        self.experiences.append(experience)
-
-    def add_education(self, education):
-        self.educations.append(education)
-
-    def add_interest(self, interest):
-        self.interests.append(interest)
-
-    def add_accomplishment(self, accomplishment):
-        self.accomplishments.append(accomplishment)
-
-    def add_location(self, location):
-        self.location = location
-
-    def add_contact(self, contact):
-        self.contacts.append(contact)
-
-    def scrape(self, close_on_complete=True):
-        if self.is_signed_in():
-            self.scrape_logged_in(close_on_complete=close_on_complete)
-        else:
-            print("you are not logged in!")
-
-    def _click_see_more_by_class_name(self, class_name):
-        try:
-            _ = WebDriverWait(self.driver, self.__WAIT_FOR_ELEMENT_TIMEOUT).until(
-                EC.presence_of_element_located((By.CLASS_NAME, class_name))
-            )
-            div = self.driver.find_element(By.CLASS_NAME, class_name)
-            div.find_element(By.TAG_NAME, "button").click()
-        except Exception as e:
-            pass
-
-    def is_open_to_work(self):
-        try:
-            return "#OPEN_TO_WORK" in self.driver.find_element(By.CLASS_NAME,"pv-top-card-profile-picture").find_element(By.TAG_NAME,"img").get_attribute("title")
-        except:
-            return False
-
-    def get_experiences(self):
-        url = os.path.join(self.linkedin_url, "details/experience")
-        self.driver.get(url)
-        self.focus()
-        main = self.wait_for_element_to_load(by=By.TAG_NAME, name="main")
-        self.scroll_to_half()
-        self.scroll_to_bottom()
-        main_list = self.wait_for_element_to_load(name="pvs-list__container", base=main)
-        for position in main_list.find_elements(By.CLASS_NAME, "pvs-list__paged-list-item"):
-            position = position.find_element(By.CSS_SELECTOR, "div[data-view-name='profile-component-entity']")
-            company_logo_elem, position_details, *_  = position.find_elements(By.XPATH, "*")
-
-            # company elem
-            company_linkedin_url = company_logo_elem.find_element(By.XPATH,"*").get_attribute("href")
-            if not company_linkedin_url:
-                continue
-
-            # position details
-            position_details_list = position_details.find_elements(By.XPATH,"*")
-            position_summary_details = position_details_list[0] if len(position_details_list) > 0 else None
-            position_summary_text = position_details_list[1] if len(position_details_list) > 1 else None
-            outer_positions = position_summary_details.find_element(By.XPATH,"*").find_elements(By.XPATH,"*")
-
-            if len(outer_positions) == 4:
-                position_title = outer_positions[0].find_element(By.TAG_NAME,"span").text
-                company = outer_positions[1].find_element(By.TAG_NAME,"span").text
-                work_times = outer_positions[2].find_element(By.TAG_NAME,"span").text
-                location = outer_positions[3].find_element(By.TAG_NAME,"span").text
-            elif len(outer_positions) == 3:
-                if "·" in outer_positions[2].text:
-                    position_title = outer_positions[0].find_element(By.TAG_NAME,"span").text
-                    company = outer_positions[1].find_element(By.TAG_NAME,"span").text
-                    work_times = outer_positions[2].find_element(By.TAG_NAME,"span").text
-                    location = ""
-                else:
-                    position_title = ""
-                    company = outer_positions[0].find_element(By.TAG_NAME,"span").text
-                    work_times = outer_positions[1].find_element(By.TAG_NAME,"span").text
-                    location = outer_positions[2].find_element(By.TAG_NAME,"span").text
-            else:
-                position_title = ""
-                company = outer_positions[0].find_element(By.TAG_NAME,"span").text
-                work_times = ""
-                location = ""
-
-
-            times = work_times.split("·")[0].strip() if work_times else ""
-            duration = work_times.split("·")[1].strip() if len(work_times.split("·")) > 1 else None
-
-            from_date = " ".join(times.split(" ")[:2]) if times else ""
-            to_date = " ".join(times.split(" ")[3:]) if times else ""
-            if position_summary_text and any(element.get_attribute("pvs-list__container") for element in position_summary_text.find_elements(By.TAG_NAME, "*")):
-                inner_positions = (position_summary_text.find_element(By.CLASS_NAME,"pvs-list__container")
-                                  .find_element(By.XPATH,"*").find_element(By.XPATH,"*").find_element(By.XPATH,"*")
-                                  .find_elements(By.CLASS_NAME,"pvs-list__paged-list-item"))
-            else:
-                inner_positions = []
-            if len(inner_positions) > 1:
-                descriptions = inner_positions
-                for description in descriptions:
-                    res = description.find_element(By.TAG_NAME,"a").find_elements(By.XPATH,"*")
-                    position_title_elem = res[0] if len(res) > 0 else None
-                    work_times_elem = res[1] if len(res) > 1 else None
-                    location_elem = res[2] if len(res) > 2 else None
-
-
-                    location = location_elem.find_element(By.XPATH,"*").text if location_elem else None
-                    position_title = position_title_elem.find_element(By.XPATH,"*").find_element(By.TAG_NAME,"*").text if position_title_elem else ""
-                    work_times = work_times_elem.find_element(By.XPATH,"*").text if work_times_elem else ""
-                    times = work_times.split("·")[0].strip() if work_times else ""
-                    duration = work_times.split("·")[1].strip() if len(work_times.split("·")) > 1 else None
-                    from_date = " ".join(times.split(" ")[:2]) if times else ""
-                    to_date = " ".join(times.split(" ")[3:]) if times else ""
-
-                    experience = Experience(
-                        position_title=position_title,
-                        from_date=from_date,
-                        to_date=to_date,
-                        duration=duration,
-                        location=location,
-                        description=description,
-                        institution_name=company,
-                        linkedin_url=company_linkedin_url
-                    )
-                    self.add_experience(experience)
-            else:
-                description = position_summary_text.text if position_summary_text else ""
-
-                experience = Experience(
-                    position_title=position_title,
-                    from_date=from_date,
-                    to_date=to_date,
-                    duration=duration,
-                    location=location,
-                    description=description,
-                    institution_name=company,
-                    linkedin_url=company_linkedin_url
-                )
-                self.add_experience(experience)
-
-    def get_educations(self):
-        url = os.path.join(self.linkedin_url, "details/education")
-        self.driver.get(url)
-        self.focus()
-        main = self.wait_for_element_to_load(by=By.TAG_NAME, name="main")
-        self.scroll_to_half()
-        self.scroll_to_bottom()
-        main_list = self.wait_for_element_to_load(name="pvs-list__container", base=main)
-        for position in main_list.find_elements(By.CLASS_NAME,"pvs-list__paged-list-item"):
-            position = position.find_element(By.XPATH,"//div[@data-view-name='profile-component-entity']")
-            institution_logo_elem, position_details, *_ = position.find_elements(By.XPATH,"*")
-
-            # company elem
-            institution_linkedin_url = institution_logo_elem.find_element(By.XPATH,"*").get_attribute("href")
-
-            # position details
-            position_details_list = position_details.find_elements(By.XPATH,"*")
-            position_summary_details = position_details_list[0] if len(position_details_list) > 0 else None
-            position_summary_text = position_details_list[1] if len(position_details_list) > 1 else None
-            outer_positions = position_summary_details.find_element(By.XPATH,"*").find_elements(By.XPATH,"*")
-
-            institution_name = outer_positions[0].find_element(By.TAG_NAME,"span").text
-            if len(outer_positions) > 1:
-                degree = outer_positions[1].find_element(By.TAG_NAME,"span").text
-            else:
-                degree = None
-
-            if len(outer_positions) > 2:
-                times = outer_positions[2].find_element(By.TAG_NAME,"span").text
-
-                if times != "":
-                    from_date = times.split(" ")[times.split(" ").index("-")-1] if len(times.split(" "))>3 else times.split(" ")[0]
-                    to_date = times.split(" ")[-1]
-            else:
-                from_date = None
-                to_date = None
-
-
-
-            description = position_summary_text.text if position_summary_text else ""
-
-            education = Education(
-                from_date=from_date,
-                to_date=to_date,
-                description=description,
-                degree=degree,
-                institution_name=institution_name,
-                linkedin_url=institution_linkedin_url
-            )
-            self.add_education(education)
-
-    def get_name_and_location(self):
-        top_panel = self.driver.find_element(By.XPATH, "//*[@class='mt2 relative']")
-        self.name = top_panel.find_element(By.TAG_NAME, "h1").text
-        self.location = top_panel.find_element(By.XPATH, "//*[@class='text-body-small inline t-black--light break-words']").text
-
-    def get_about(self):
-        try:
-            about = self.driver.find_element(By.ID,"about").find_element(By.XPATH,"..").find_element(By.CLASS_NAME,"display-flex").text
-        except NoSuchElementException :
-            about=None
-        self.about = about
-
-    def scrape_logged_in(self, close_on_complete=True):
-        driver = self.driver
-        duration = None
-
-        root = WebDriverWait(driver, self.__WAIT_FOR_ELEMENT_TIMEOUT).until(
-            EC.presence_of_element_located(
-                (
-                    By.CLASS_NAME,
-                    self.__TOP_CARD,
-                )
-            )
-        )
-        self.focus()
-        self.wait(5)
-
-        # get name and location
-        self.get_name_and_location()
-
-        self.open_to_work = self.is_open_to_work()
-
-        # get about
-        self.get_about()
-        driver.execute_script(
-            "window.scrollTo(0, Math.ceil(document.body.scrollHeight/2));"
-        )
-        driver.execute_script(
-            "window.scrollTo(0, Math.ceil(document.body.scrollHeight/1.5));"
-        )
-
-        # get experience
-        self.get_experiences()
-
-        # get education
-        self.get_educations()
-
-        driver.get(self.linkedin_url)
-
-        # get interest
-        try:
-
-            _ = WebDriverWait(driver, self.__WAIT_FOR_ELEMENT_TIMEOUT).until(
-                EC.presence_of_element_located(
-                    (
-                        By.XPATH,
-                        "//*[@class='pv-profile-section pv-interests-section artdeco-container-card artdeco-card ember-view']",
-                    )
-                )
-            )
-            interestContainer = driver.find_element(By.XPATH,
-                "//*[@class='pv-profile-section pv-interests-section artdeco-container-card artdeco-card ember-view']"
-            )
-            for interestElement in interestContainer.find_elements(By.XPATH,
-                "//*[@class='pv-interest-entity pv-profile-section__card-item ember-view']"
-            ):
-                interest = Interest(
-                    interestElement.find_element(By.TAG_NAME, "h3").text.strip()
-                )
-                self.add_interest(interest)
-        except:
-            pass
-
-        # get accomplishment
-        try:
-            _ = WebDriverWait(driver, self.__WAIT_FOR_ELEMENT_TIMEOUT).until(
-                EC.presence_of_element_located(
-                    (
-                        By.XPATH,
-                        "//*[@class='pv-profile-section pv-accomplishments-section artdeco-container-card artdeco-card ember-view']",
-                    )
-                )
-            )
-            acc = driver.find_element(By.XPATH,
-                "//*[@class='pv-profile-section pv-accomplishments-section artdeco-container-card artdeco-card ember-view']"
-            )
-            for block in acc.find_elements(By.XPATH,
-                "//div[@class='pv-accomplishments-block__content break-words']"
-            ):
-                category = block.find_element(By.TAG_NAME, "h3")
-                for title in block.find_element(By.TAG_NAME,
-                    "ul"
-                ).find_elements(By.TAG_NAME, "li"):
-                    accomplishment = Accomplishment(category.text, title.text)
-                    self.add_accomplishment(accomplishment)
-        except:
-            pass
-
-        # get connections
-        try:
-            driver.get("https://www.linkedin.com/mynetwork/invite-connect/connections/")
-            _ = WebDriverWait(driver, self.__WAIT_FOR_ELEMENT_TIMEOUT).until(
-                EC.presence_of_element_located((By.CLASS_NAME, "mn-connections"))
-            )
-            connections = driver.find_element(By.CLASS_NAME, "mn-connections")
-            if connections is not None:
-                for conn in connections.find_elements(By.CLASS_NAME, "mn-connection-card"):
-                    anchor = conn.find_element(By.CLASS_NAME, "mn-connection-card__link")
-                    url = anchor.get_attribute("href")
-                    name = conn.find_element(By.CLASS_NAME, "mn-connection-card__details").find_element(By.CLASS_NAME, "mn-connection-card__name").text.strip()
-                    occupation = conn.find_element(By.CLASS_NAME, "mn-connection-card__details").find_element(By.CLASS_NAME, "mn-connection-card__occupation").text.strip()
-
-                    contact = Contact(name=name, occupation=occupation, url=url)
-                    self.add_contact(contact)
-        except:
-            connections = None
-
-        if close_on_complete:
-            driver.quit()
-
-    @property
-    def company(self):
-        if self.experiences:
-            return (
-                self.experiences[0].institution_name
-                if self.experiences[0].institution_name
-                else None
-            )
-        else:
-            return None
-
-    @property
-    def job_title(self):
-        if self.experiences:
-            return (
-                self.experiences[0].position_title
-                if self.experiences[0].position_title
-                else None
-            )
-        else:
-            return None
-
-    def __repr__(self):
-        return "<Person {name}\n\nAbout\n{about}\n\nExperience\n{exp}\n\nEducation\n{edu}\n\nInterest\n{int}\n\nAccomplishments\n{acc}\n\nContacts\n{conn}>".format(
-            name=self.name,
-            about=self.about,
-            exp=self.experiences,
-            edu=self.educations,
-            int=self.interests,
-            acc=self.accomplishments,
-            conn=self.contacts,
-        )