In [65]:
import pywikibot
from bs4 import BeautifulSoup
import urllib.request
import re
# Connect to enwiki
enwiki = pywikibot.Site('en', 'wikipedia')
# and then to wikidata
enwiki_repo = enwiki.data_repository()
In [77]:
def author_citation(url):
    list1 = []
    with urllib.request.urlopen(url) as response:
        html = response.read()
        soup = BeautifulSoup(html, "html.parser")
        text3 = soup.findAll(attrs={'name' : re.compile('citation_author')})
        print(soup)
        #ti = re.match('citation_author', text3)
        #for text in text3:
            #tip = text['content']
            #m = re.match(r"(?P<first_name>\w+) (?P<last_name>\w+)", tip)
            #t = m.group('first_name')
            #p = m.group('last_name')
            #t = re.match(r'^(.+\.)+\s+(.+)$', tip)
            #new_list = list(t.groups())
            #list1.append(new_list)
        
        #for lis in list1:
            #print('name: '+ lis[0])
            #f_name = new_list[0]
            #l_name = new_list[1]
            
            #list1.append(tip)
    #return list1

author_citation('https://pubmed.ncbi.nlm.nih.gov/22366369')
<!DOCTYPE html>

<html lang="en">
<head itemscope="" itemtype="http://schema.org/WebPage" prefix="og: http://ogp.me/ns#">
<meta charset="utf-8"/>
<meta content="IE=edge" http-equiv="X-UA-Compatible"/>
<!-- Mobile properties -->
<meta content="True" name="HandheldFriendly"/>
<meta content="320" name="MobileOptimized"/>
<meta content="width=device-width, initial-scale=1.0" name="viewport"/>
<link href="https://cdn.ncbi.nlm.nih.gov" rel="preconnect"/>
<link href="https://www.ncbi.nlm.nih.gov" rel="preconnect"/>
<link href="https://www.google-analytics.com" rel="preconnect"/>
<link href="https://cdn.ncbi.nlm.nih.gov/pubmed/4d22484f-0ddb-40f4-9209-86f77a0512e5/CACHE/css/output.35e8b192ea09.css" rel="stylesheet" type="text/css"/>
<link href="https://cdn.ncbi.nlm.nih.gov/pubmed/4d22484f-0ddb-40f4-9209-86f77a0512e5/CACHE/css/output.452c70ce66f7.css" rel="stylesheet" type="text/css"/>
<link href="https://cdn.ncbi.nlm.nih.gov/pubmed/4d22484f-0ddb-40f4-9209-86f77a0512e5/CACHE/css/output.dc65705de283.css" rel="stylesheet" type="text/css"/>
<link href="https://cdn.ncbi.nlm.nih.gov/pubmed/4d22484f-0ddb-40f4-9209-86f77a0512e5/CACHE/css/output.1b9d4427b5be.css" rel="stylesheet" type="text/css"/>
<title>Diversification rates and chromosome evolution in the most diverse angiosperm genus of the temperate zone (Carex, Cyperaceae)</title>
<!-- Favicons -->
<link href="https://cdn.ncbi.nlm.nih.gov/coreutils/nwds/img/favicons/favicon.ico" rel="shortcut icon" type="image/ico">
<link href="https://cdn.ncbi.nlm.nih.gov/coreutils/nwds/img/favicons/favicon.png" rel="icon" type="image/png">
<!-- 192x192, as recommended for Android
  http://updates.html5rocks.com/2014/11/Support-for-theme-color-in-Chrome-39-for-Android
  -->
<link href="https://cdn.ncbi.nlm.nih.gov/coreutils/nwds/img/favicons/favicon-192.png" rel="icon" sizes="192x192" type="image/png">
<!-- 57x57 (precomposed) for iPhone 3GS, pre-2011 iPod Touch and older Android devices -->
<link href="https://cdn.ncbi.nlm.nih.gov/coreutils/nwds/img/favicons/favicon-57.png" rel="apple-touch-icon-precomposed"/>
<!-- 72x72 (precomposed) for 1st generation iPad, iPad 2 and iPad mini -->
<link href="https://cdn.ncbi.nlm.nih.gov/coreutils/nwds/img/favicons/favicon-72.png" rel="apple-touch-icon-precomposed" sizes="72x72"/>
<!-- 114x114 (precomposed) for iPhone 4, 4S, 5 and post-2011 iPod Touch -->
<link href="https://cdn.ncbi.nlm.nih.gov/coreutils/nwds/img/favicons/favicon-114.png" rel="apple-touch-icon-precomposed" sizes="114x114"/>
<!-- 144x144 (precomposed) for iPad 3rd and 4th generation -->
<link href="https://cdn.ncbi.nlm.nih.gov/coreutils/nwds/img/favicons/favicon-144.png" rel="apple-touch-icon-precomposed" sizes="144x144"/>
<!-- For Pinger + Google Optimize integration (NS-820) -->
<meta content="" name="ncbi_sg_optimize_id"/>
<!-- Mobile browser address bar color -->
<meta content="#20558a" name="theme-color"/>
<!-- Preserve the Referrer when going from HTTPS to HTTP -->
<meta content="origin-when-cross-origin" name="referrer"/>
<meta content="true" name="ncbi_pinger_gtm_track">
<!-- Logging params: Pinger defaults -->
<meta content="pubmed" name="ncbi_app">
<meta content="pubmed" name="ncbi_db">
<meta content="2D940001D3A8BE6500002FA7B0F60077.1.m_2" name="ncbi_phid">
<meta content="https://www.ncbi.nlm.nih.gov/stat" name="ncbi_pinger_stat_url">
<meta content="literature" name="log_category">
<meta content="pubmed" name="ncbi_cost_center">
<!-- Logging params: Pinger custom -->
<meta content="abstract" name="ncbi_pdid">
<meta content="retrieve" name="log_op"/>
<meta content="22366369" name="uid"/>
<meta content="22366369" name="ncbi_uid"/>
<meta content="22366369" name="log_displayeduids"/>
<meta content="pubmed" name="log_source_db"/>
<meta content="true" name="log_icons_present"/>
<!-- Social meta tags for unfurling urls -->
<meta content="The sedge family (Cyperaceae: Poales; ca. 5600 spp.) is a hyperdiverse cosmopolitan group with centres of species diversity in Africa, Australia, eastern Asia, North America, and the Neotropics. Carex, with ca. 40% of the species in the family, is one of the most species-rich angiosperm genera and t …" name="description"/><meta content="pmid:22366369, doi:10.1016/j.ympev.2012.02.005, Research Support, Non-U.S. Gov't, Research Support, U.S. Gov't, Non-P.H.S., Marcial Escudero, Andrew L Hipp, Luis M Valente, Bayes Theorem, Carex Plant / genetics*, Chromosomes, Plant / genetics*, Climate, Evolution, Molecular*, Genetic Speciation*, Genetic Variation, Phylogeny, PubMed Abstract, NIH, NLM, NCBI, National Institutes of Health, National Center for Biotechnology Information, National Library of Medicine, MEDLINE" name="keywords"/><meta content="index,nofollow,noarchive" name="robots"/><meta content="Diversification rates and chromosome evolution in the most diverse angiosperm genus of the temperate zone (Carex, Cyperaceae) - PubMed" property="og:title"/><meta content="https://pubmed.ncbi.nlm.nih.gov/22366369/" property="og:url"/><meta content="The sedge family (Cyperaceae: Poales; ca. 5600 spp.) is a hyperdiverse cosmopolitan group with centres of species diversity in Africa, Australia, eastern Asia, North America, and the Neotropics. Carex, with ca. 40% of the species in the family, is one of the most species-rich angiosperm genera and t …" property="og:description"/><meta content="https://cdn.ncbi.nlm.nih.gov/pubmed/persistent/pubmed-meta-image.png" property="og:image"/><meta content="https://cdn.ncbi.nlm.nih.gov/pubmed/persistent/pubmed-meta-image.png" property="og:image:secure_url"/><meta content="website" property="og:type"/><meta content="PubMed" property="og:site_name"/><meta content="pubmed.ncbi.nlm.nih.gov" name="twitter:domain"/><meta content="summary" name="twitter:card"/><meta content="Diversification rates and chromosome evolution in the most diverse angiosperm genus of the temperate zone (Carex, Cyperaceae) - PubMed" name="twitter:title"/><meta content="https://pubmed.ncbi.nlm.nih.gov/22366369/" name="twitter:url"/><meta content="The sedge family (Cyperaceae: Poales; ca. 5600 spp.) is a hyperdiverse cosmopolitan group with centres of species diversity in Africa, Australia, eastern Asia, North America, and the Neotropics. Carex, with ca. 40% of the species in the family, is one of the most species-rich angiosperm genera and t …" name="twitter:description"/><meta content="https://cdn.ncbi.nlm.nih.gov/pubmed/persistent/pubmed-meta-image.png" name="twitter:image"/><meta content="Diversification rates and chromosome evolution in the most diverse angiosperm genus of the temperate zone (Carex, Cyperaceae) - PubMed" itemprop="name"/><meta content="https://pubmed.ncbi.nlm.nih.gov/22366369/" itemprop="url"/><meta content="The sedge family (Cyperaceae: Poales; ca. 5600 spp.) is a hyperdiverse cosmopolitan group with centres of species diversity in Africa, Australia, eastern Asia, North America, and the Neotropics. Carex, with ca. 40% of the species in the family, is one of the most species-rich angiosperm genera and t …" itemprop="description"/><meta content="https://cdn.ncbi.nlm.nih.gov/pubmed/persistent/pubmed-meta-image.png" itemprop="image"/>
<!-- OpenSearch XML -->
<link href="https://cdn.ncbi.nlm.nih.gov/pubmed/persistent/opensearch.xml" rel="search" title="PubMed search" type="application/opensearchdescription+xml"/>
<!-- Disables severely broken elements when no JS -->
<noscript>
<link href="https://cdn.ncbi.nlm.nih.gov/pubmed/4d22484f-0ddb-40f4-9209-86f77a0512e5/core/no-script.css" rel="stylesheet" type="text/css"/>
</noscript>
<link href="https://pubmed.ncbi.nlm.nih.gov/22366369/" rel="canonical">
<meta content="Molecular phylogenetics and evolution" name="citation_journal_title">
<meta content="Mol Phylogenet Evol" name="citation_journal_abbrev">
<meta content="Mol Phylogenet Evol" name="citation_publisher">
<meta content="63" name="citation_volume">
<meta content="3" name="citation_issue">
<meta content="650" name="citation_firstpage">
<meta content="655" name="citation_lastpage">
<meta content="2012/6" name="citation_publication_date">
<meta content="Diversification rates and chromosome evolution in the most diverse angiosperm genus of the temperate zone (Carex, Cyperaceae)" name="citation_title">
<meta content="Marcial Escudero" name="citation_author"/>
<meta content="The Morton Arboretum, 4100 Illinois Route 53, Lisle, IL 60532-1293, USA. amesclir@gmail.com" name="citation_author_institution">
<meta content="Andrew L Hipp" name="citation_author"/>
<meta content="Marcia J Waterway" name="citation_author"/>
<meta content="Luis M Valente" name="citation_author"/>
<meta content="22366369" name="citation_pmid"/>
<meta content="10.1016/j.ympev.2012.02.005" name="citation_doi">
<meta content="1095-9513" name="citation_issn">
<meta content="English" name="citation_language">
<meta content="The sedge family (Cyperaceae: Poales; ca. 5600 spp.) is a hyperdiverse cosmopolitan group with centres of species diversity in Africa, Australia, eastern Asia, North America, and the Neotropics. Carex, with ca. 40% of the species in the family, is one of the most species-rich angiosperm genera and the most diverse in temperate regions of the Northern Hemisphere, making it atypical among plants in that it inverts the latitudinal gradient of species richness. Moreover, Carex exhibits high rates of chromosome rearrangement via fission, fusion, and translocation, which distinguishes it from the rest of the Cyperaceae. Here, we use a phylogenetic framework to examine how the onset of contemporary temperate climates and the processes of chromosome evolution have influenced the diversification dynamics of Carex. We provide estimates of diversification rates and map chromosome transitions across the evolutionary history of the main four clades of Carex. We demonstrate that Carex underwent a shift in diversification rates sometime between the Late Eocene and the Oligocene, during a global cooling period, which fits with a transition in diploid chromosome number. We suggest that adaptive radiation to novel temperate climates, aided by a shift in the mode of chromosome evolution, may explain the large-scale radiation of Carex and its latitudinal pattern of species richness." lang="" name="citation_abstract">
</meta></meta></meta></meta></meta></meta></meta></meta></meta></meta></meta></meta></meta></meta></link></meta></meta></meta></meta></meta></meta></meta></meta></link></link></link></head>
<body>
<div class="article-page" data-article-pmid="22366369" id="article-page">
<div class="article-details" id="article-details">
<header class="heading" id="heading">
<h1 class="heading-title">
  
    
    
    
    
      
  Diversification rates and chromosome evolution in the most diverse angiosperm genus of the temperate zone (Carex, Cyperaceae)


    
  
</h1>
<div class="article-citation" style="display:inline-block;">
<span class="article-source">
<span class="journal" style="display:inline-block;">Mol Phylogenet Evol</span><span class="period">. </span><span class="cit">2012 Jun;63(3):650-5.</span>
</span>
<span class="citation-doi">
        doi: 10.1016/j.ympev.2012.02.005.
      </span>
<span class="secondary-date">
        Epub 2012 Feb 19.
      </span>
</div>
<div class="email-authors">
<div class="expanded-authors" id="full-view-expanded-authors">
<div class="authors">
<h3 class="title">
            Authors
          </h3>
<div class="authors-list">
<span class="authors-list-item"><a class="full-name" data-ga-action="author_link" data-ga-category="search" data-ga-label="Marcial Escudero" href="/?term=Escudero+M&amp;cauthor_id=22366369" ref="linksrc=author_name_link">Marcial Escudero</a><sup class="affiliation-links"><span class="author-sup-separator"> </span><a class="affiliation-link" href="#affiliation-1" ref="linksrc=author_aff" title="The Morton Arboretum, 4100 Illinois Route 53, Lisle, IL 60532-1293, USA. amesclir@gmail.com">
                1
              </a></sup><span class="comma">, </span></span><span class="authors-list-item"><a class="full-name" data-ga-action="author_link" data-ga-category="search" data-ga-label="Andrew L Hipp" href="/?term=Hipp+AL&amp;cauthor_id=22366369" ref="linksrc=author_name_link">Andrew L Hipp</a><span class="comma">, </span></span><span class="authors-list-item"><a class="full-name" data-ga-action="author_link" data-ga-category="search" data-ga-label="Marcia J Waterway" href="/?term=Waterway+MJ&amp;cauthor_id=22366369" ref="linksrc=author_name_link">Marcia J Waterway</a><span class="comma">, </span></span><span class="authors-list-item"><a class="full-name" data-ga-action="author_link" data-ga-category="search" data-ga-label="Luis M Valente" href="/?term=Valente+LM&amp;cauthor_id=22366369" ref="linksrc=author_name_link">Luis M Valente</a></span>
</div>
</div>
<div class="affiliations">
<h3 class="title">
            Affiliation
          </h3>
<ul class="item-list">
<li data-affiliation-id="affiliation-1"><sup class="key">1</sup> The Morton Arboretum, 4100 Illinois Route 53, Lisle, IL 60532-1293, USA. amesclir@gmail.com</li>
</ul>
</div>
</div>
</div>
<ul class="identifiers" id="full-view-identifiers">
<li>
<span class="identifier pubmed">
<span class="id-label">
    
      PMID:
    
  </span>
<a class="id-link" data-ga-action="PMID" data-ga-category="full_text" href="http://pubmed.ncbi.nlm.nih.gov/22366369/" ref="linksrc=article_id_link&amp;article_id=22366369&amp;id_type=PubMed" rel="noopener" target="_blank">
      22366369
    </a>
</span>
</li>
<li>
<span class="identifier doi">
<span class="id-label">
    
      DOI:
    
  </span>
<a class="id-link" data-ga-action="DOI" data-ga-category="full_text" href="https://doi.org/10.1016/j.ympev.2012.02.005" ref="linksrc=article_id_link&amp;article_id=10.1016/j.ympev.2012.02.005&amp;id_type=DOI" rel="noopener" target="_blank">
      10.1016/j.ympev.2012.02.005
    </a>
</span>
</li>
</ul>
</header>
<div class="abstract" id="abstract">
<h2 class="title">
        Abstract
        
      </h2>
<div class="abstract-content selected" id="enc-abstract">
<p>
      
      The sedge family (Cyperaceae: Poales; ca. 5600 spp.) is a hyperdiverse cosmopolitan group with centres of species diversity in Africa, Australia, eastern Asia, North America, and the Neotropics. Carex, with ca. 40% of the species in the family, is one of the most species-rich angiosperm genera and the most diverse in temperate regions of the Northern Hemisphere, making it atypical among plants in that it inverts the latitudinal gradient of species richness. Moreover, Carex exhibits high rates of chromosome rearrangement via fission, fusion, and translocation, which distinguishes it from the rest of the Cyperaceae. Here, we use a phylogenetic framework to examine how the onset of contemporary temperate climates and the processes of chromosome evolution have influenced the diversification dynamics of Carex. We provide estimates of diversification rates and map chromosome transitions across the evolutionary history of the main four clades of Carex. We demonstrate that Carex underwent a shift in diversification rates sometime between the Late Eocene and the Oligocene, during a global cooling period, which fits with a transition in diploid chromosome number. We suggest that adaptive radiation to novel temperate climates, aided by a shift in the mode of chromosome evolution, may explain the large-scale radiation of Carex and its latitudinal pattern of species richness.
    </p>
</div>
</div>
<p class="copyright" id="copyright">
    Copyright © 2012 Elsevier Inc. All rights reserved.
  </p>
<div class="publication-types keywords-section" id="publication-types">
<h2 class="title">
        Publication types
      </h2>
<ul class="keywords-list"><li>
                
                  





  Research Support, Non-U.S. Gov't


                
              </li><li>
                
                  





  Research Support, U.S. Gov't, Non-P.H.S.


                
              </li></ul>
</div>
<div class="mesh-terms keywords-section" id="mesh-terms">
<h2 class="title">
        MeSH terms
      </h2>
<ul class="keywords-list"><li>
                  
                    





  Bayes Theorem


                  
                </li><li>
                  
                    





  Carex Plant / genetics*


                  
                </li><li>
                  
                    





  Chromosomes, Plant / genetics*


                  
                </li><li>
                  
                    





  Climate


                  
                </li><li>
                  
                    





  Evolution, Molecular*


                  
                </li><li>
                  
                    





  Genetic Speciation*


                  
                </li><li>
                  
                    





  Genetic Variation


                  
                </li><li>
                  
                    





  Phylogeny


                  
                </li></ul>
</div>
</div>
</div>
</body>
</html>
In [ ]:
>>> m = re.match(r"(?P<first_name>\w+) (?P<last_name>\w+)", "Malcolm Reynolds")
>>> m.group('first_name')
'Malcolm'
>>> m.group('last_name')
'Reynolds'
Named groups can also be referred to by their index:

>>>
>>> m.group(1)
'Malcolm'
>>> m.group(2)
'Reynolds'