Entering edit mode
5.5 years ago
grvpanchal
▴
30
I am trying to get list of genes in chromosomal order using file_backend.cgi. I am getting all entries of id with esearch.fcgi which I post all the ids in epost to get WebEnv. Later I use it in file_backend.cgi. I hope to get all gene list but I am limited to 2000 entries. Below is the code:
set_time_limit( 0 );
$taxid = '559292';
$id_list = json_decode(file_get_contents('https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi?db=gene&term=' . $taxid . '[taxid]&retmode=json'));
$count = $id_list->esearchresult->count;
// $count = 1000;
$id_list = json_decode(file_get_contents('https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi?db=gene&term=559292[taxid]&retmode=json&retmax=' . $count));
$cs_id = '';
foreach ($id_list->esearchresult->idlist as $gene_acc_id)
{
if($cs_id == '')
{
$cs_id = $gene_acc_id;
}
else
{
$cs_id = $cs_id .',' . $gene_acc_id;
}
}
$env_url = 'https://eutils.ncbi.nlm.nih.gov/entrez/eutils/epost.fcgi';
$ch = curl_init( $env_url );
$myvars = 'db=gene&id=' . $cs_id;
curl_setopt( $ch, CURLOPT_POST, 1);
curl_setopt( $ch, CURLOPT_POSTFIELDS, $myvars);
curl_setopt( $ch, CURLOPT_FOLLOWLOCATION, 1);
curl_setopt( $ch, CURLOPT_HEADER, 0);
curl_setopt( $ch, CURLOPT_RETURNTRANSFER, 1);
$response = curl_exec( $ch );
$xml = simplexml_load_string($response);
$query_key = $xml->QueryKey;
$web_env = $xml->WebEnv;
$download_url = 'https://www.ncbi.nlm.nih.gov/portal/utils/file_backend.cgi?Db=gene&HistoryId=' . $web_env . '&QueryKey=' . $query_key . '&Sort=Chromosome&Filter=all&Mode=file&View=tabular&p$l=Email&BaseUrl=&PortName=live&FileName=&CompleteResultCount=' . $count;
$tab = file_get_contents($download_url);
echo $tab;
Any clue how to resolve it?