python web scraping (finviz pull)
Python code below,
#!/usr/bin/env python from lxml import html import requests import csv import sys import os import humanize # Helper URL # http://python-docs.readthedocs.io/en/latest/scenarios/scrape.html curr_arg = 1 for arg in sys.argv: print "Argument " + str(curr_arg) + ": " + arg curr_arg += 1 if len(sys.argv) < 2: print "\nInsufficient arguments! (expected 1)\nUsage: " + os.path.basename(__file__) + " <outputfile.csv>\n" exit(1) output_filename = sys.argv[1] input_url = 'http://finviz.com/insidertrading.ashx?tc=7' print "\nFetching contents from " + input_url buy_tx_page = requests.get(input_url) print "\nDownloaded " + humanize.naturalsize(len(buy_tx_page.content), gnu=True) + " bytes." tree = html.fromstring(buy_tx_page.content) ticker = tree.xpath('//table[@class="body-table"]/tr/td[1]/a/text()') owner = tree.xpath('//table[@class="body-table"]/tr/td[2]/a/text()') relationship = tree.xpath('//table[@class="body-table"]/tr[position()>1]/td[3]/text()') on_date = tree.xpath('//table[@class="body-table"]/tr[position()>1]/td[4]/text()') transaction_type = tree.xpath('//table[@class="body-table"]/tr[position()>1]/td[5]/text()') cost = tree.xpath('//table[@class="body-table"]/tr[position()>1]/td[6]/text()') num_shares = tree.xpath('//table[@class="body-table"]/tr[position()>1]/td[7]/text()') transaction_value = tree.xpath('//table[@class="body-table"]/tr[position()>1]/td[8]/text()') shares_total = tree.xpath('//table[@class="body-table"]/tr[position()>1]/td[9]/text()') sec_form_4 = tree.xpath('//table[@class="body-table"]/tr/td[10]/a/text()') output = open(output_filename, 'wt') loop_len = len(ticker) i = 0 try: writer = csv.writer(output) writer.writerow(('Ticker', 'Owner', 'Relationship', 'Date', 'Transaction', 'Cost', '#Shares', 'Value ($)', '#Shares Total', 'SEC Form 4')) while i < loop_len: writer.writerow((ticker[i], ' '.join(owner[i].split()), relationship[i], on_date[i], transaction_type[i], cost[i].strip(), num_shares[i], transaction_value[i], shares_total[i], sec_form_4[i])) i += 1 finally: output.close() print "\nWrote " + str(i) + " records in " + output_filename