| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170 |
- import pytest
- from unittest.mock import patch, Mock
- import requests
- from skill_seekers.cli.llms_txt_downloader import LlmsTxtDownloader
- def test_successful_download():
- """Test successful download with valid markdown content"""
- downloader = LlmsTxtDownloader("https://example.com/llms.txt")
- mock_response = Mock()
- mock_response.text = "# Header\n\nSome content with markdown patterns.\n\n## Subheader\n\n- List item\n- Another item\n\n```python\ncode_block()\n```\n" + "x" * 200
- mock_response.raise_for_status = Mock()
- with patch('requests.get', return_value=mock_response) as mock_get:
- content = downloader.download()
- assert content is not None
- assert len(content) > 100
- assert isinstance(content, str)
- assert "# Header" in content
- mock_get.assert_called_once()
- def test_timeout_with_retry():
- """Test timeout scenario with retry logic"""
- downloader = LlmsTxtDownloader("https://example.com/llms.txt", max_retries=2)
- with patch('requests.get', side_effect=requests.Timeout("Connection timeout")) as mock_get:
- with patch('time.sleep') as mock_sleep: # Mock sleep to speed up test
- content = downloader.download()
- assert content is None
- assert mock_get.call_count == 2 # Should retry once (2 total attempts)
- assert mock_sleep.call_count == 1 # Should sleep once between retries
- def test_empty_content_rejection():
- """Test rejection of content shorter than 100 chars"""
- downloader = LlmsTxtDownloader("https://example.com/llms.txt")
- mock_response = Mock()
- mock_response.text = "# Short"
- mock_response.raise_for_status = Mock()
- with patch('requests.get', return_value=mock_response):
- content = downloader.download()
- assert content is None
- def test_non_markdown_rejection():
- """Test rejection of content that doesn't look like markdown"""
- downloader = LlmsTxtDownloader("https://example.com/llms.txt")
- mock_response = Mock()
- mock_response.text = "Plain text without any markdown patterns at all. " * 10
- mock_response.raise_for_status = Mock()
- with patch('requests.get', return_value=mock_response):
- content = downloader.download()
- assert content is None
- def test_http_error_handling():
- """Test handling of HTTP errors (404, 500, etc.)"""
- downloader = LlmsTxtDownloader("https://example.com/llms.txt", max_retries=2)
- mock_response = Mock()
- mock_response.raise_for_status.side_effect = requests.HTTPError("404 Not Found")
- with patch('requests.get', return_value=mock_response) as mock_get:
- with patch('time.sleep'):
- content = downloader.download()
- assert content is None
- assert mock_get.call_count == 2 # Should retry once
- def test_exponential_backoff():
- """Test that exponential backoff delays are correct"""
- downloader = LlmsTxtDownloader("https://example.com/llms.txt", max_retries=3)
- with patch('requests.get', side_effect=requests.Timeout("Connection timeout")):
- with patch('time.sleep') as mock_sleep:
- content = downloader.download()
- assert content is None
- # Should sleep with delays: 1s, 2s (2^0, 2^1)
- assert mock_sleep.call_count == 2
- mock_sleep.assert_any_call(1) # First retry delay
- mock_sleep.assert_any_call(2) # Second retry delay
- def test_markdown_validation():
- """Test markdown pattern detection"""
- downloader = LlmsTxtDownloader("https://example.com/llms.txt")
- # Test various markdown patterns
- assert downloader._is_markdown("# Header")
- assert downloader._is_markdown("## Subheader")
- assert downloader._is_markdown("```code```")
- assert downloader._is_markdown("- list item")
- assert downloader._is_markdown("* bullet point")
- assert downloader._is_markdown("`inline code`")
- # Test non-markdown content
- assert not downloader._is_markdown("Plain text without any markdown patterns")
- def test_custom_timeout():
- """Test custom timeout parameter"""
- downloader = LlmsTxtDownloader("https://example.com/llms.txt", timeout=10)
- mock_response = Mock()
- mock_response.text = "# Header\n\nContent " * 50
- mock_response.raise_for_status = Mock()
- with patch('requests.get', return_value=mock_response) as mock_get:
- content = downloader.download()
- assert content is not None
- # Verify timeout was passed to requests.get
- call_kwargs = mock_get.call_args[1]
- assert call_kwargs['timeout'] == 10
- def test_custom_max_retries():
- """Test custom max_retries parameter"""
- downloader = LlmsTxtDownloader("https://example.com/llms.txt", max_retries=5)
- with patch('requests.get', side_effect=requests.Timeout("Connection timeout")) as mock_get:
- with patch('time.sleep'):
- content = downloader.download()
- assert content is None
- assert mock_get.call_count == 5 # Should attempt 5 times
- def test_user_agent_header():
- """Test that custom user agent is set"""
- downloader = LlmsTxtDownloader("https://example.com/llms.txt")
- mock_response = Mock()
- mock_response.text = "# Header\n\nContent " * 50
- mock_response.raise_for_status = Mock()
- with patch('requests.get', return_value=mock_response) as mock_get:
- content = downloader.download()
- assert content is not None
- # Verify custom user agent was passed
- call_kwargs = mock_get.call_args[1]
- assert call_kwargs['headers']['User-Agent'] == 'Skill-Seekers-llms.txt-Reader/1.0'
- def test_get_proper_filename():
- """Test filename conversion from .txt to .md"""
- downloader = LlmsTxtDownloader("https://hono.dev/llms-full.txt")
- filename = downloader.get_proper_filename()
- assert filename == "llms-full.md"
- assert not filename.endswith('.txt')
- def test_get_proper_filename_standard():
- """Test standard variant naming"""
- downloader = LlmsTxtDownloader("https://hono.dev/llms.txt")
- filename = downloader.get_proper_filename()
- assert filename == "llms.md"
- def test_get_proper_filename_small():
- """Test small variant naming"""
- downloader = LlmsTxtDownloader("https://hono.dev/llms-small.txt")
- filename = downloader.get_proper_filename()
- assert filename == "llms-small.md"
|