2 years ago
#3585
Charalampos Lianos
find_elements only locates the first result
I am trying to scrape https://edition.cnn.com/ for a school project...I define a method that gets a query as an input and I must get the title, snippet, etc. of all the articles from all the pages...I have used a for loop, but it just returns me 10 times the data of the first article on each page...here is my script...
I tried scrolling down the page and defining the results with different locators.
while True:
results=driver.find_elements_by_css_selector('[class="cnn-search__result cnn-search__result--article"]')
time.sleep(5)
for result in results:
link,title,date,snippet='NA','NA','NA','NA'
try:
linkBox=driver.find_element_by_css_selector('[class="cnn-search__result-headline"]')
except NoSuchElementException:
linkBox=None
if linkBox:
link=linkBox.get_attribute('href')
try:
titleBox=driver.find_element_by_css_selector('[class="cnn-search__result-headline"]')
except NoSuchElementException:
titleBox=None
if titleBox:title=titleBox.text
try:
dateBox=driver.find_element_by_css_selector('[class="cnn-search__result-publish-date"]')
except NoSuchElementException:
dateBox=None
if dateBox:date=dateBox.text
try:
snippetBox=driver.find_element_by_css_selector('[class="cnn-search__result-body"]')
except NoSuchElementException:
snippetBox=None
if snippetBox:snippet=snippetBox.text
writer.writerow([link,title,date,snippet])
driver.execute_script("arguments[0].scrollIntoView();",result)
time.sleep(1)
try:
nextButton=driver.find_element_by_css_selector('[class="pagination-arrow pagination-arrow-right cnnSearchPageLink text-active"]')
driver.execute_script("arguments[0].click();",nextButton)
time.sleep(3)
except NoSuchElementException:
driver.quit()
break
fw.close()
python
for-loop
findelement
0 Answers
Your Answer