Johannes Ursus Posted March 22, 2013 Share Posted March 22, 2013 I would like to list (and later use) the contents of a specific dropbox folder. What would be the best way to do this? I have tried to write a scraper using curl to get the links but this seems very tedious. Is there a better way? Link to comment Share on other sites More sharing options...
0 Johannes Ursus Posted March 22, 2013 Author Share Posted March 22, 2013 This is the scraper that I have so far: <?php// Get drobox folder html.$ch = curl_init(); curl_setopt($ch, CURLOPT_URL, 'https://www.dropbox.com/sh/mneh7sehw63cvk6/VcWEokE4yH' ); // Dropbox shared folder link curl_setopt($ch, CURLOPT_USERAGENT, 'Mozilla/5.0 (Windows; U; Windows NT 5.1; de; rv:1.9.2.3) Gecko/20100401 Firefox/3.6.3 (FM Scene 4.6.1)'); curl_setopt($ch, CURLOPT_REFERER, 'https://www.dropbox.com/'); curl_setopt($ch, CURLOPT_ENCODING, 'gzip,deflate'); curl_setopt($ch, CURLOPT_AUTOREFERER, true); curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true); curl_setopt($ch, CURLOPT_TIMEOUT, 5); curl_setopt ($ch, CURLOPT_RETURNTRANSFER, 1); $html =curl_exec ($ch);// extract links with DOMDocument$dom = new DOMDocument();@$dom->loadHTML($html);$links = $dom->getElementsByTagName('a');$processed_links = array();foreach ($links as $link){ if ($link->hasAttribute('class')&& $link->hasAttribute('href')) { foreach ($link->attributes as $a) { if ($a->value=='filename-link') { $processed_links[$link->getAttribute('href')] = $link->getAttribute('href'); } } }}echo var_dump($processed_links);?>[/CODE] Link to comment Share on other sites More sharing options...
0 ZakO Posted March 22, 2013 Share Posted March 22, 2013 Why are you scraping a webpage when Dropbox have an API? edit; sorry, my mistake, after looking through the Dropbox API for some reason it seems like they have a call to access shared/public folders. :huh: Link to comment Share on other sites More sharing options...
0 Johannes Ursus Posted March 22, 2013 Author Share Posted March 22, 2013 Why are you scraping a webpage when Dropbox have an API? edit; sorry, my mistake, after looking through the Dropbox API for some reason it seems like they have a call to access shared/public folders. :huh: I found it's odd that they don't allow to simply list a directory like this without OAuth. I managed to put together something with the scraper to get all images and txt files and display them. Seems to work ok : <?php// Get drobox folder html.$folder = 'https://www.dropbox.com/sh/mneh7sehw63cvk6/VcWEokE4yH'; // Dropbox shared folder link$ch = curl_init(); curl_setopt($ch, CURLOPT_URL, $folder); curl_setopt($ch, CURLOPT_USERAGENT, 'Mozilla/5.0 (Windows; U; Windows NT 5.1; de; rv:1.9.2.3) Gecko/20100401 Firefox/3.6.3 (FM Scene 4.6.1)'); curl_setopt($ch, CURLOPT_REFERER, 'https://www.dropbox.com/'); curl_setopt($ch, CURLOPT_ENCODING, 'gzip,deflate'); curl_setopt($ch, CURLOPT_AUTOREFERER, true); curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true); curl_setopt($ch, CURLOPT_TIMEOUT, 5); curl_setopt ($ch, CURLOPT_RETURNTRANSFER, 1); $html =curl_exec ($ch);// extract links with DOMDocument$dom = new DOMDocument();@$dom->loadHTML($html);$links = $dom->getElementsByTagName('a');$processed_links = array();foreach ($links as $link){ if ($link->hasAttribute('class')&& $link->hasAttribute('href')) { foreach ($link->attributes as $a) { if ($a->value=='filename-link') { $processed_links[$link->getAttribute('href')] = $link->getAttribute('href'); } } }}$exts = $processed_links;foreach ($exts as $ext){ $url = $ext; $path = parse_url($url, PHP_URL_PATH); $ext2 = pathinfo($path, PATHINFO_EXTENSION); // echo "<br> <a href=https://dl.dropbox.com$path>http://dl.dropbox.com$path</a>";if ($ext2 == 'jpg'){echo "<br> <a href=https://dl.dropbox.com$path><img src=https://dl.dropbox.com$path></img></a>"; //echo $path;}if ($ext2 == 'txt'){ $first = "https://dl.dropbox.com";$ch2 = curl_init(); curl_setopt($ch2, CURLOPT_URL, $first.$path); // textfile curl_setopt($ch2, CURLOPT_USERAGENT, 'Mozilla/5.0 (Windows; U; Windows NT 5.1; de; rv:1.9.2.3) Gecko/20100401 Firefox/3.6.3 (FM Scene 4.6.1)'); curl_setopt($ch2, CURLOPT_REFERER, 'https://www.dropbox.com/'); curl_setopt($ch2, CURLOPT_ENCODING, 'gzip,deflate'); curl_setopt($ch2, CURLOPT_AUTOREFERER, true); curl_setopt($ch2, CURLOPT_FOLLOWLOCATION, true); curl_setopt($ch2, CURLOPT_TIMEOUT, 5); curl_setopt($ch2, CURLOPT_RETURNTRANSFER, 1); $str =curl_exec ($ch2); //echo preg_replace('!\r?\n!', '<br>', $str);echo '<pre>';echo preg_replace('!\r?\n!', '<br>', $str);echo '</pre>';}}?>[/CODE] Link to comment Share on other sites More sharing options...
0 Kami- Posted March 25, 2013 Share Posted March 25, 2013 You can get this info out of the API ;) Link to comment Share on other sites More sharing options...
Question
Johannes Ursus
I would like to list (and later use) the contents of a specific dropbox folder. What would be the best way to do this? I have tried to write a scraper using curl to get the links but this seems very tedious. Is there a better way?
Link to comment
Share on other sites
4 answers to this question
Recommended Posts