diff --git a/TESTING.md b/TESTING.md new file mode 100644 index 0000000..e183849 --- /dev/null +++ b/TESTING.md @@ -0,0 +1,6 @@ +# Run the following to run unit tests +`python -m unittest` + + +# Integration Tests +`python ./test/scrape_person.py` \ No newline at end of file diff --git a/linkedin_scraper/person.py b/linkedin_scraper/person.py index 86d169e..2f97e8e 100644 --- a/linkedin_scraper/person.py +++ b/linkedin_scraper/person.py @@ -1,19 +1,24 @@ -import requests +from typing import Dict, Any + from selenium import webdriver from selenium.webdriver.common.by import By from selenium.webdriver.support.ui import WebDriverWait from selenium.webdriver.support import expected_conditions as EC from selenium.common.exceptions import NoSuchElementException from .objects import Experience, Education, Scraper, Interest, Accomplishment, Contact +from .utils import to_dict import os from linkedin_scraper import selectors - class Person(Scraper): __TOP_CARD = "pv-top-card" __WAIT_FOR_ELEMENT_TIMEOUT = 5 + + linkedin_url: str + name: str + def __init__( self, linkedin_url=None, @@ -392,3 +397,14 @@ def __repr__(self): acc=self.accomplishments, conn=self.contacts, ) + + def to_dict(self) -> Dict[str, Any]: + return to_dict({ + 'name': self.name, + 'about': self.about, + 'experiences': self.experiences, + 'educations': self.educations, + 'interests': self.interests, + 'accomplishments': self.accomplishments, + 'contacts': self.contacts, + }) diff --git a/linkedin_scraper/utils.py b/linkedin_scraper/utils.py new file mode 100644 index 0000000..7e3823e --- /dev/null +++ b/linkedin_scraper/utils.py @@ -0,0 +1,40 @@ +""" +Utilities +""" +import io +from typing import Dict, Any, List +from dataclasses import is_dataclass, asdict + + +def custom_asdict(obj): + # Custom asdict function that excludes _io.BufferedWriter objects + obj_dict = {} + for field in obj.__dataclass_fields__.values(): + value = getattr(obj, field.name) + if not isinstance(value, io.BufferedWriter): + obj_dict[field.name] = to_dict(value) + return obj_dict + +def to_dict(obj) -> Dict[str, Any]: + if is_dataclass(obj): + # If the object is a data class, use asdict to convert it to a dictionary + return to_dict(custom_asdict(obj)) + + if isinstance(obj, (int, str, bool, float)): + # If the object is a basic type, return it as is + return obj + + if isinstance(obj, list): + # If the object is a list, recursively call to_dict on its elements + return [to_dict(item) for item in obj] + + if isinstance(obj, dict): + # If the object is a dictionary, recursively call to_dict on its values + return {key: to_dict(value) for key, value in obj.items() if not key.startswith('_')} + + if hasattr(obj, '__dict__'): + # If the object has a __dict__ attribute, recursively call to_dict on its attributes + return {key: to_dict(value) for key, value in obj.__dict__.items() if not key.startswith('_')} + + # If none of the above conditions match, return None (or handle as needed) + return None \ No newline at end of file diff --git a/test/__init__.py b/test/__init__.py new file mode 100644 index 0000000..75670b1 --- /dev/null +++ b/test/__init__.py @@ -0,0 +1,6 @@ +import os +import sys + +current_dir = os.path.dirname(os.path.abspath(__file__)) +root_dir = os.path.dirname(current_dir) +sys.path.append(root_dir) # Add the parent directory to sys.path diff --git a/test/scrape_person.py b/test/scrape_person.py index 6e44836..f5d0cca 100644 --- a/test/scrape_person.py +++ b/test/scrape_person.py @@ -1,5 +1,10 @@ +# Integration test +import __init__ from linkedin_scraper import Person rick_fox = Person("https://www.linkedin.com/in/rifox?trk=pub-pbmap") +rick_fox.to_dict() iggy = Person("https://www.linkedin.com/in/andre-iguodala-65b48ab5") +iggy.to_dict() Anirudra = Person("https://in.linkedin.com/in/anirudra-choudhury-109635b1") +Anirudra.to_dict() diff --git a/test/test_utils.py b/test/test_utils.py new file mode 100644 index 0000000..b4bfbb5 --- /dev/null +++ b/test/test_utils.py @@ -0,0 +1,51 @@ +""" +Test Utils +""" +from typing import Dict, Any +from linkedin_scraper.utils import to_dict +from linkedin_scraper.objects import Contact, Institution +from dataclasses import dataclass +import unittest + +@dataclass +class SampleClass: + _test_var: str + contact: Contact + institution: Institution + def to_dict(self) -> Dict[str, Any]: + return to_dict(self) + +class TestUtils(unittest.TestCase): + """ + Test Utils + """ + def test_to_dict(self): + test_class = SampleClass( + _test_var = 'test var', + contact=Contact( + name='test_name' + ), + institution=Institution( + institution_name= 'test_place' + ) + ) + + test_class_dict = to_dict(test_class) + expected_output = { + 'contact': {'name':'test_name', + 'occupation': None, + 'url': None}, + 'institution': { + 'company_size': None, + 'founded': None, + 'headquarters': None, + 'industry': None, + 'institution_name': 'test_place', + 'linkedin_url': None, + 'type': None, + 'website': None + } + } + self.assertEqual(test_class_dict, expected_output) +if __name__ == "__main__": + unittest.main() \ No newline at end of file