test_github_scraper.py 26 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731
  1. #!/usr/bin/env python3
  2. """
  3. Tests for GitHub Scraper (cli/github_scraper.py)
  4. Tests cover:
  5. - GitHubScraper initialization and configuration (C1.1)
  6. - README extraction (C1.2)
  7. - Language detection (C1.4)
  8. - GitHub Issues extraction (C1.7)
  9. - CHANGELOG extraction (C1.8)
  10. - GitHub Releases extraction (C1.9)
  11. - GitHubToSkillConverter and skill building (C1.10)
  12. - Authentication handling
  13. - Error handling and edge cases
  14. """
  15. import unittest
  16. import sys
  17. import json
  18. import tempfile
  19. import shutil
  20. import os
  21. from pathlib import Path
  22. from unittest.mock import Mock, patch, MagicMock
  23. from datetime import datetime
  24. try:
  25. from github import Github, GithubException
  26. PYGITHUB_AVAILABLE = True
  27. except ImportError:
  28. PYGITHUB_AVAILABLE = False
  29. class TestGitHubScraperInitialization(unittest.TestCase):
  30. """Test GitHubScraper initialization and configuration (C1.1)"""
  31. def setUp(self):
  32. if not PYGITHUB_AVAILABLE:
  33. self.skipTest("PyGithub not installed")
  34. from skill_seekers.cli.github_scraper import GitHubScraper
  35. self.GitHubScraper = GitHubScraper
  36. # Create temporary directory for test output
  37. self.temp_dir = tempfile.mkdtemp()
  38. self.output_dir = Path(self.temp_dir)
  39. def tearDown(self):
  40. # Clean up temporary directory
  41. if hasattr(self, 'temp_dir'):
  42. shutil.rmtree(self.temp_dir, ignore_errors=True)
  43. def test_init_with_repo_name(self):
  44. """Test initialization with repository name"""
  45. config = {
  46. 'repo': 'facebook/react',
  47. 'name': 'react',
  48. 'github_token': None
  49. }
  50. scraper = self.GitHubScraper(config)
  51. self.assertEqual(scraper.repo_name, 'facebook/react')
  52. self.assertEqual(scraper.name, 'react')
  53. self.assertIsNotNone(scraper.github)
  54. def test_init_with_token_from_config(self):
  55. """Test initialization with token from config"""
  56. config = {
  57. 'repo': 'facebook/react',
  58. 'name': 'react',
  59. 'github_token': 'test_token_123'
  60. }
  61. with patch('skill_seekers.cli.github_scraper.Github') as mock_github:
  62. scraper = self.GitHubScraper(config)
  63. mock_github.assert_called_once_with('test_token_123')
  64. def test_init_with_token_from_env(self):
  65. """Test initialization with token from environment variable"""
  66. config = {
  67. 'repo': 'facebook/react',
  68. 'name': 'react',
  69. 'github_token': None
  70. }
  71. with patch.dict(os.environ, {'GITHUB_TOKEN': 'env_token_456'}):
  72. with patch('skill_seekers.cli.github_scraper.Github') as mock_github:
  73. scraper = self.GitHubScraper(config)
  74. mock_github.assert_called_once_with('env_token_456')
  75. def test_init_without_token(self):
  76. """Test initialization without authentication"""
  77. config = {
  78. 'repo': 'facebook/react',
  79. 'name': 'react',
  80. 'github_token': None
  81. }
  82. with patch('skill_seekers.cli.github_scraper.Github') as mock_github:
  83. with patch.dict(os.environ, {}, clear=True):
  84. scraper = self.GitHubScraper(config)
  85. # Should create unauthenticated client
  86. self.assertIsNotNone(scraper.github)
  87. def test_token_priority_env_over_config(self):
  88. """Test that GITHUB_TOKEN env var takes priority over config"""
  89. config = {
  90. 'repo': 'facebook/react',
  91. 'name': 'react',
  92. 'github_token': 'config_token'
  93. }
  94. with patch.dict(os.environ, {'GITHUB_TOKEN': 'env_token'}):
  95. scraper = self.GitHubScraper(config)
  96. token = scraper._get_token()
  97. self.assertEqual(token, 'env_token')
  98. class TestREADMEExtraction(unittest.TestCase):
  99. """Test README extraction (C1.2)"""
  100. def setUp(self):
  101. if not PYGITHUB_AVAILABLE:
  102. self.skipTest("PyGithub not installed")
  103. from skill_seekers.cli.github_scraper import GitHubScraper
  104. self.GitHubScraper = GitHubScraper
  105. def test_extract_readme_success(self):
  106. """Test successful README extraction"""
  107. config = {
  108. 'repo': 'facebook/react',
  109. 'name': 'react',
  110. 'github_token': None
  111. }
  112. mock_content = Mock()
  113. mock_content.decoded_content = b'# React\n\nA JavaScript library'
  114. with patch('skill_seekers.cli.github_scraper.Github'):
  115. scraper = self.GitHubScraper(config)
  116. scraper.repo = Mock()
  117. scraper.repo.get_contents.return_value = mock_content
  118. scraper._extract_readme()
  119. self.assertIn('readme', scraper.extracted_data)
  120. self.assertEqual(scraper.extracted_data['readme'], '# React\n\nA JavaScript library')
  121. def test_extract_readme_tries_multiple_locations(self):
  122. """Test that README extraction tries multiple file locations"""
  123. config = {
  124. 'repo': 'facebook/react',
  125. 'name': 'react',
  126. 'github_token': None
  127. }
  128. with patch('skill_seekers.cli.github_scraper.Github'):
  129. scraper = self.GitHubScraper(config)
  130. scraper.repo = Mock()
  131. # Make first attempts fail, succeed on third
  132. def side_effect(path):
  133. if path in ['README.md', 'README.rst']:
  134. raise GithubException(404, 'Not found')
  135. mock_content = Mock()
  136. mock_content.decoded_content = b'# README'
  137. return mock_content
  138. scraper.repo.get_contents.side_effect = side_effect
  139. scraper._extract_readme()
  140. # Should have tried multiple paths
  141. self.assertGreaterEqual(scraper.repo.get_contents.call_count, 1)
  142. def test_extract_readme_not_found(self):
  143. """Test README extraction when no README exists"""
  144. config = {
  145. 'repo': 'test/norepo',
  146. 'name': 'norepo',
  147. 'github_token': None
  148. }
  149. with patch('skill_seekers.cli.github_scraper.Github'):
  150. scraper = self.GitHubScraper(config)
  151. scraper.repo = Mock()
  152. scraper.repo.get_contents.side_effect = GithubException(404, 'Not found')
  153. scraper._extract_readme()
  154. # Should not crash, just log warning (readme initialized as empty string)
  155. self.assertEqual(scraper.extracted_data['readme'], '')
  156. class TestLanguageDetection(unittest.TestCase):
  157. """Test language detection (C1.4)"""
  158. def setUp(self):
  159. if not PYGITHUB_AVAILABLE:
  160. self.skipTest("PyGithub not installed")
  161. from skill_seekers.cli.github_scraper import GitHubScraper
  162. self.GitHubScraper = GitHubScraper
  163. def test_extract_languages_success(self):
  164. """Test successful language detection"""
  165. config = {
  166. 'repo': 'facebook/react',
  167. 'name': 'react',
  168. 'github_token': None
  169. }
  170. with patch('skill_seekers.cli.github_scraper.Github'):
  171. scraper = self.GitHubScraper(config)
  172. scraper.repo = Mock()
  173. scraper.repo.get_languages.return_value = {
  174. 'JavaScript': 8000,
  175. 'TypeScript': 2000
  176. }
  177. scraper._extract_languages()
  178. self.assertIn('languages', scraper.extracted_data)
  179. self.assertIn('JavaScript', scraper.extracted_data['languages'])
  180. self.assertIn('TypeScript', scraper.extracted_data['languages'])
  181. # Check percentages
  182. js_data = scraper.extracted_data['languages']['JavaScript']
  183. self.assertEqual(js_data['bytes'], 8000)
  184. self.assertEqual(js_data['percentage'], 80.0)
  185. ts_data = scraper.extracted_data['languages']['TypeScript']
  186. self.assertEqual(ts_data['bytes'], 2000)
  187. self.assertEqual(ts_data['percentage'], 20.0)
  188. def test_extract_languages_empty(self):
  189. """Test language detection with no languages"""
  190. config = {
  191. 'repo': 'test/norepo',
  192. 'name': 'norepo',
  193. 'github_token': None
  194. }
  195. with patch('skill_seekers.cli.github_scraper.Github'):
  196. scraper = self.GitHubScraper(config)
  197. scraper.repo = Mock()
  198. scraper.repo.get_languages.return_value = {}
  199. scraper._extract_languages()
  200. self.assertIn('languages', scraper.extracted_data)
  201. self.assertEqual(scraper.extracted_data['languages'], {})
  202. class TestIssuesExtraction(unittest.TestCase):
  203. """Test GitHub Issues extraction (C1.7)"""
  204. def setUp(self):
  205. if not PYGITHUB_AVAILABLE:
  206. self.skipTest("PyGithub not installed")
  207. from skill_seekers.cli.github_scraper import GitHubScraper
  208. self.GitHubScraper = GitHubScraper
  209. def test_extract_issues_success(self):
  210. """Test successful issues extraction"""
  211. config = {
  212. 'repo': 'facebook/react',
  213. 'name': 'react',
  214. 'github_token': None,
  215. 'max_issues': 10
  216. }
  217. # Create mock issues
  218. mock_label1 = Mock()
  219. mock_label1.name = 'bug'
  220. mock_label2 = Mock()
  221. mock_label2.name = 'high-priority'
  222. mock_milestone = Mock()
  223. mock_milestone.title = 'v18.0'
  224. mock_issue1 = Mock()
  225. mock_issue1.number = 123
  226. mock_issue1.title = 'Bug in useState'
  227. mock_issue1.state = 'open'
  228. mock_issue1.labels = [mock_label1, mock_label2]
  229. mock_issue1.milestone = mock_milestone
  230. mock_issue1.created_at = datetime(2023, 1, 1)
  231. mock_issue1.updated_at = datetime(2023, 1, 2)
  232. mock_issue1.closed_at = None
  233. mock_issue1.html_url = 'https://github.com/facebook/react/issues/123'
  234. mock_issue1.body = 'Issue description'
  235. mock_issue1.pull_request = None
  236. mock_label3 = Mock()
  237. mock_label3.name = 'enhancement'
  238. mock_issue2 = Mock()
  239. mock_issue2.number = 124
  240. mock_issue2.title = 'Feature request'
  241. mock_issue2.state = 'closed'
  242. mock_issue2.labels = [mock_label3]
  243. mock_issue2.milestone = None
  244. mock_issue2.created_at = datetime(2023, 1, 3)
  245. mock_issue2.updated_at = datetime(2023, 1, 4)
  246. mock_issue2.closed_at = datetime(2023, 1, 5)
  247. mock_issue2.html_url = 'https://github.com/facebook/react/issues/124'
  248. mock_issue2.body = 'Feature description'
  249. mock_issue2.pull_request = None
  250. with patch('skill_seekers.cli.github_scraper.Github'):
  251. scraper = self.GitHubScraper(config)
  252. scraper.repo = Mock()
  253. scraper.repo.get_issues.return_value = [mock_issue1, mock_issue2]
  254. scraper._extract_issues()
  255. self.assertIn('issues', scraper.extracted_data)
  256. issues = scraper.extracted_data['issues']
  257. self.assertEqual(len(issues), 2)
  258. # Check first issue
  259. self.assertEqual(issues[0]['number'], 123)
  260. self.assertEqual(issues[0]['title'], 'Bug in useState')
  261. self.assertEqual(issues[0]['state'], 'open')
  262. self.assertEqual(issues[0]['labels'], ['bug', 'high-priority'])
  263. self.assertEqual(issues[0]['milestone'], 'v18.0')
  264. # Check second issue
  265. self.assertEqual(issues[1]['number'], 124)
  266. self.assertEqual(issues[1]['state'], 'closed')
  267. self.assertIsNone(issues[1]['milestone'])
  268. def test_extract_issues_filters_pull_requests(self):
  269. """Test that pull requests are filtered out from issues"""
  270. config = {
  271. 'repo': 'facebook/react',
  272. 'name': 'react',
  273. 'github_token': None,
  274. 'max_issues': 10
  275. }
  276. # Create mock issue (need all required attributes)
  277. mock_issue = Mock()
  278. mock_issue.number = 123
  279. mock_issue.title = 'Real issue'
  280. mock_issue.state = 'open'
  281. mock_issue.labels = []
  282. mock_issue.milestone = None
  283. mock_issue.created_at = datetime(2023, 1, 1)
  284. mock_issue.updated_at = datetime(2023, 1, 2)
  285. mock_issue.closed_at = None
  286. mock_issue.html_url = 'https://github.com/test/repo/issues/123'
  287. mock_issue.body = 'Issue body'
  288. mock_issue.pull_request = None
  289. mock_pr = Mock()
  290. mock_pr.number = 124
  291. mock_pr.title = 'Pull request'
  292. mock_pr.pull_request = Mock() # Has pull_request attribute
  293. with patch('skill_seekers.cli.github_scraper.Github'):
  294. scraper = self.GitHubScraper(config)
  295. scraper.repo = Mock()
  296. scraper.repo.get_issues.return_value = [mock_issue, mock_pr]
  297. scraper._extract_issues()
  298. issues = scraper.extracted_data['issues']
  299. # Should only have the real issue, not the PR
  300. self.assertEqual(len(issues), 1)
  301. self.assertEqual(issues[0]['number'], 123)
  302. def test_extract_issues_respects_max_limit(self):
  303. """Test that max_issues limit is respected"""
  304. config = {
  305. 'repo': 'facebook/react',
  306. 'name': 'react',
  307. 'github_token': None,
  308. 'max_issues': 2
  309. }
  310. # Create 5 mock issues
  311. mock_issues = []
  312. for i in range(5):
  313. mock_issue = Mock()
  314. mock_issue.number = i
  315. mock_issue.title = f'Issue {i}'
  316. mock_issue.state = 'open'
  317. mock_issue.labels = []
  318. mock_issue.milestone = None
  319. mock_issue.created_at = datetime(2023, 1, 1)
  320. mock_issue.updated_at = datetime(2023, 1, 2)
  321. mock_issue.closed_at = None
  322. mock_issue.html_url = f'https://github.com/test/repo/issues/{i}'
  323. mock_issue.body = None
  324. mock_issue.pull_request = None
  325. mock_issues.append(mock_issue)
  326. with patch('skill_seekers.cli.github_scraper.Github'):
  327. scraper = self.GitHubScraper(config)
  328. scraper.repo = Mock()
  329. scraper.repo.get_issues.return_value = mock_issues
  330. scraper._extract_issues()
  331. issues = scraper.extracted_data['issues']
  332. # Should only extract first 2 issues
  333. self.assertEqual(len(issues), 2)
  334. class TestChangelogExtraction(unittest.TestCase):
  335. """Test CHANGELOG extraction (C1.8)"""
  336. def setUp(self):
  337. if not PYGITHUB_AVAILABLE:
  338. self.skipTest("PyGithub not installed")
  339. from skill_seekers.cli.github_scraper import GitHubScraper
  340. self.GitHubScraper = GitHubScraper
  341. def test_extract_changelog_success(self):
  342. """Test successful CHANGELOG extraction"""
  343. config = {
  344. 'repo': 'facebook/react',
  345. 'name': 'react',
  346. 'github_token': None
  347. }
  348. mock_content = Mock()
  349. mock_content.decoded_content = b'# Changelog\n\n## v1.0.0\n- Initial release'
  350. with patch('skill_seekers.cli.github_scraper.Github'):
  351. scraper = self.GitHubScraper(config)
  352. scraper.repo = Mock()
  353. scraper.repo.get_contents.return_value = mock_content
  354. scraper._extract_changelog()
  355. self.assertIn('changelog', scraper.extracted_data)
  356. self.assertIn('Initial release', scraper.extracted_data['changelog'])
  357. def test_extract_changelog_tries_multiple_locations(self):
  358. """Test that CHANGELOG extraction tries multiple file locations"""
  359. config = {
  360. 'repo': 'facebook/react',
  361. 'name': 'react',
  362. 'github_token': None
  363. }
  364. with patch('skill_seekers.cli.github_scraper.Github'):
  365. scraper = self.GitHubScraper(config)
  366. scraper.repo = Mock()
  367. # Make first attempts fail
  368. call_count = {'count': 0}
  369. def side_effect(path):
  370. call_count['count'] += 1
  371. if path in ['CHANGELOG.md', 'CHANGES.md']:
  372. raise GithubException(404, 'Not found')
  373. mock_content = Mock()
  374. mock_content.decoded_content = b'# History'
  375. return mock_content
  376. scraper.repo.get_contents.side_effect = side_effect
  377. scraper._extract_changelog()
  378. # Should have tried multiple paths
  379. self.assertGreaterEqual(call_count['count'], 1)
  380. def test_extract_changelog_not_found(self):
  381. """Test CHANGELOG extraction when no changelog exists"""
  382. config = {
  383. 'repo': 'test/norepo',
  384. 'name': 'norepo',
  385. 'github_token': None
  386. }
  387. with patch('skill_seekers.cli.github_scraper.Github'):
  388. scraper = self.GitHubScraper(config)
  389. scraper.repo = Mock()
  390. scraper.repo.get_contents.side_effect = GithubException(404, 'Not found')
  391. scraper._extract_changelog()
  392. # Should not crash, just log warning (changelog initialized as empty string)
  393. self.assertEqual(scraper.extracted_data['changelog'], '')
  394. class TestReleasesExtraction(unittest.TestCase):
  395. """Test GitHub Releases extraction (C1.9)"""
  396. def setUp(self):
  397. if not PYGITHUB_AVAILABLE:
  398. self.skipTest("PyGithub not installed")
  399. from skill_seekers.cli.github_scraper import GitHubScraper
  400. self.GitHubScraper = GitHubScraper
  401. def test_extract_releases_success(self):
  402. """Test successful releases extraction"""
  403. config = {
  404. 'repo': 'facebook/react',
  405. 'name': 'react',
  406. 'github_token': None
  407. }
  408. # Create mock releases
  409. mock_release1 = Mock()
  410. mock_release1.tag_name = 'v18.0.0'
  411. mock_release1.title = 'React 18.0.0'
  412. mock_release1.body = 'New features:\n- Concurrent rendering'
  413. mock_release1.draft = False
  414. mock_release1.prerelease = False
  415. mock_release1.created_at = datetime(2023, 3, 1)
  416. mock_release1.published_at = datetime(2023, 3, 1)
  417. mock_release1.html_url = 'https://github.com/facebook/react/releases/tag/v18.0.0'
  418. mock_release1.tarball_url = 'https://github.com/facebook/react/archive/v18.0.0.tar.gz'
  419. mock_release1.zipball_url = 'https://github.com/facebook/react/archive/v18.0.0.zip'
  420. mock_release2 = Mock()
  421. mock_release2.tag_name = 'v18.0.0-rc.0'
  422. mock_release2.title = 'React 18.0.0 RC'
  423. mock_release2.body = 'Release candidate'
  424. mock_release2.draft = False
  425. mock_release2.prerelease = True
  426. mock_release2.created_at = datetime(2023, 2, 1)
  427. mock_release2.published_at = datetime(2023, 2, 1)
  428. mock_release2.html_url = 'https://github.com/facebook/react/releases/tag/v18.0.0-rc.0'
  429. mock_release2.tarball_url = 'https://github.com/facebook/react/archive/v18.0.0-rc.0.tar.gz'
  430. mock_release2.zipball_url = 'https://github.com/facebook/react/archive/v18.0.0-rc.0.zip'
  431. with patch('skill_seekers.cli.github_scraper.Github'):
  432. scraper = self.GitHubScraper(config)
  433. scraper.repo = Mock()
  434. scraper.repo.get_releases.return_value = [mock_release1, mock_release2]
  435. scraper._extract_releases()
  436. self.assertIn('releases', scraper.extracted_data)
  437. releases = scraper.extracted_data['releases']
  438. self.assertEqual(len(releases), 2)
  439. # Check first release
  440. self.assertEqual(releases[0]['tag_name'], 'v18.0.0')
  441. self.assertEqual(releases[0]['name'], 'React 18.0.0')
  442. self.assertFalse(releases[0]['draft'])
  443. self.assertFalse(releases[0]['prerelease'])
  444. self.assertIn('Concurrent rendering', releases[0]['body'])
  445. # Check second release (prerelease)
  446. self.assertEqual(releases[1]['tag_name'], 'v18.0.0-rc.0')
  447. self.assertTrue(releases[1]['prerelease'])
  448. def test_extract_releases_empty(self):
  449. """Test releases extraction with no releases"""
  450. config = {
  451. 'repo': 'test/norepo',
  452. 'name': 'norepo',
  453. 'github_token': None
  454. }
  455. with patch('skill_seekers.cli.github_scraper.Github'):
  456. scraper = self.GitHubScraper(config)
  457. scraper.repo = Mock()
  458. scraper.repo.get_releases.return_value = []
  459. scraper._extract_releases()
  460. self.assertIn('releases', scraper.extracted_data)
  461. self.assertEqual(scraper.extracted_data['releases'], [])
  462. class TestGitHubToSkillConverter(unittest.TestCase):
  463. """Test GitHubToSkillConverter and skill building (C1.10)"""
  464. def setUp(self):
  465. if not PYGITHUB_AVAILABLE:
  466. self.skipTest("PyGithub not installed")
  467. from skill_seekers.cli.github_scraper import GitHubToSkillConverter
  468. self.GitHubToSkillConverter = GitHubToSkillConverter
  469. # Create temporary directory for test output
  470. self.temp_dir = tempfile.mkdtemp()
  471. self.output_dir = Path(self.temp_dir)
  472. # Create mock data file
  473. self.data_file = self.output_dir / "test_github_data.json"
  474. self.mock_data = {
  475. 'repo_info': {
  476. 'name': 'react',
  477. 'full_name': 'facebook/react',
  478. 'description': 'A JavaScript library',
  479. 'stars': 200000,
  480. 'language': 'JavaScript'
  481. },
  482. 'readme': '# React\n\nA JavaScript library for building user interfaces.',
  483. 'languages': {
  484. 'JavaScript': {'bytes': 8000, 'percentage': 80.0},
  485. 'TypeScript': {'bytes': 2000, 'percentage': 20.0}
  486. },
  487. 'issues': [
  488. {
  489. 'number': 123,
  490. 'title': 'Bug in useState',
  491. 'state': 'open',
  492. 'labels': ['bug'],
  493. 'milestone': 'v18.0',
  494. 'created_at': '2023-01-01T10:00:00',
  495. 'updated_at': '2023-01-02T10:00:00',
  496. 'closed_at': None,
  497. 'url': 'https://github.com/facebook/react/issues/123',
  498. 'body': 'Issue description'
  499. }
  500. ],
  501. 'changelog': '# Changelog\n\n## v18.0.0\n- New features',
  502. 'releases': [
  503. {
  504. 'tag_name': 'v18.0.0',
  505. 'name': 'React 18.0.0',
  506. 'body': 'Release notes',
  507. 'published_at': '2023-03-01T10:00:00',
  508. 'prerelease': False,
  509. 'draft': False,
  510. 'url': 'https://github.com/facebook/react/releases/tag/v18.0.0'
  511. }
  512. ]
  513. }
  514. with open(self.data_file, 'w') as f:
  515. json.dump(self.mock_data, f)
  516. def tearDown(self):
  517. # Clean up temporary directory
  518. if hasattr(self, 'temp_dir'):
  519. shutil.rmtree(self.temp_dir, ignore_errors=True)
  520. def test_init_loads_data(self):
  521. """Test that converter loads data file on initialization"""
  522. config = {
  523. 'repo': 'facebook/react',
  524. 'name': 'test',
  525. 'description': 'Test skill'
  526. }
  527. # Override data file path
  528. with patch('skill_seekers.cli.github_scraper.GitHubToSkillConverter.__init__') as mock_init:
  529. mock_init.return_value = None
  530. converter = self.GitHubToSkillConverter(config)
  531. converter.data_file = str(self.data_file)
  532. converter.data = converter._load_data()
  533. self.assertIn('repo_info', converter.data)
  534. self.assertEqual(converter.data['repo_info']['name'], 'react')
  535. def test_build_skill_creates_directory_structure(self):
  536. """Test that build_skill creates proper directory structure"""
  537. # Create data file in expected location
  538. data_file_path = self.output_dir / 'test_github_data.json'
  539. with open(data_file_path, 'w') as f:
  540. json.dump(self.mock_data, f)
  541. config = {
  542. 'repo': 'facebook/react',
  543. 'name': 'test',
  544. 'description': 'Test skill'
  545. }
  546. # Patch the paths to use our temp directory
  547. with patch('skill_seekers.cli.github_scraper.GitHubToSkillConverter._load_data') as mock_load:
  548. mock_load.return_value = self.mock_data
  549. converter = self.GitHubToSkillConverter(config)
  550. converter.skill_dir = str(self.output_dir / 'test_skill')
  551. converter.data = self.mock_data
  552. converter.build_skill()
  553. skill_dir = Path(converter.skill_dir)
  554. self.assertTrue(skill_dir.exists())
  555. self.assertTrue((skill_dir / 'SKILL.md').exists())
  556. self.assertTrue((skill_dir / 'references').exists())
  557. class TestErrorHandling(unittest.TestCase):
  558. """Test error handling and edge cases"""
  559. def setUp(self):
  560. if not PYGITHUB_AVAILABLE:
  561. self.skipTest("PyGithub not installed")
  562. from skill_seekers.cli.github_scraper import GitHubScraper
  563. self.GitHubScraper = GitHubScraper
  564. def test_invalid_repo_name(self):
  565. """Test handling of invalid repository name"""
  566. config = {
  567. 'repo': 'invalid_repo_format',
  568. 'name': 'test',
  569. 'github_token': None
  570. }
  571. with patch('skill_seekers.cli.github_scraper.Github'):
  572. scraper = self.GitHubScraper(config)
  573. scraper.repo = None
  574. scraper.github.get_repo = Mock(side_effect=GithubException(404, 'Not found'))
  575. # Should raise ValueError with helpful message
  576. with self.assertRaises(ValueError) as context:
  577. scraper._fetch_repository()
  578. self.assertIn('Repository not found', str(context.exception))
  579. def test_rate_limit_error(self):
  580. """Test handling of rate limit errors"""
  581. config = {
  582. 'repo': 'facebook/react',
  583. 'name': 'react',
  584. 'github_token': None,
  585. 'max_issues': 10
  586. }
  587. with patch('skill_seekers.cli.github_scraper.Github'):
  588. scraper = self.GitHubScraper(config)
  589. scraper.repo = Mock()
  590. scraper.repo.get_issues.side_effect = GithubException(403, 'Rate limit exceeded')
  591. # Should handle gracefully and log warning
  592. scraper._extract_issues()
  593. # Should not crash, just log warning
  594. if __name__ == '__main__':
  595. unittest.main()