Source code for lnk.bitly.stats

#!/usr/bin/env python
#! -*- coding: utf-8 -*-

"""Statistics and metrics retrieval for bitlinks."""

from __future__ import unicode_literals

import click
import ecstasy
import time

from collections import namedtuple

import lnk.abstract
import lnk.beauty
import lnk.bitly.info
import lnk.countries
import lnk.errors

from lnk.bitly.command import Command

[docs]def echo(*args):
	"""
	Executes a stats command and echoes its output.

	Arguments:
		args (variadic): The arguments to pass to a
						 Stats instance's fetch() method.
	"""
	click.echo(Stats().fetch(*args))

[docs]class Stats(Command):
	"""
	Class to retrieve statistics and info for one or more bitlinks.

	The statistics for a link include its referrers (i.e. from where the link
	was opened), the countries from where the link was opened and of course the
	number of clicks. These statistics can be retrieved 'since-forever', but
	also for specific (possibly open-ended) time-ranges, such as for 'the last
	5 months' or 'between 4 days and 2 minute ago'. Additionally, these
	statistics can be paired with information about each link, retrieved from
	the 'info' command, thereby making the stats command the ultimate
	destination for link statistics *and* information. Output may, as always,
	be in raw format for internal use or in a pretty box. Multiple URLs are
	fully supported.

	Attributes:
		raw (bool): Whether to return the output in raw format for internal use,
					or in a pretty string-representation for outside-display.
		info (lnk.bitly.info.Info): A lnk.bitly.info.Info instance to retrieve
								addittional information for a bitlink.
	"""

	Timespan = namedtuple('Timespan', ['span', 'unit'])

	def __init__(self, raw=False):
		super(Stats, self).__init__('stats')

		self.raw = raw
		self.info = lnk.bitly.info.Info(raw=True)
		self.parameters['timezone'] = time.tzname[0]

[docs]	def fetch(self, only, hide, times, forever, limit, add_info, full, urls):
		"""
		Fetches statistics for one or more URLs.

		Arguments:
			only (tuple): A tuple of strings representing the sets to include
						  in the response ('only' these will be included).
			hide (tuple): A tuple of strings representing the sets to hide
						  from the response (either from all possible sets
						  if only is empty, or else from those selected).
			times (tuple): A tuple of tuples of the schema (<span>, <unit>),
						   representing the timespans for which to fetch
						   statistics.
			forever (bool): Whether to include the statistics 'since forever'.
			limit (int): A limit on the number of items fetched per timespan.
			add_info (bool): Whether or not to add additional information for
							 each link.
			full (bool): Whether to show full country names, or short ISO
						 abbreviations.
			urls (tuple): A tuple of urls to fetch statistics for.

		Returns:
			A plain list of the raw lines if the 'raw' attribute is True,
			else a boxified, pretty string.
		"""
		self.parameters['limit'] = limit

		sets = lnk.abstract.filter_sets(self.sets, only, hide)
		timespans = self.get_timespans(times, forever)
		info = []
		if add_info:
			info = self.info.fetch([], [], False, urls)
			if not info:
				raise lnk.errors.InternalError('Could not fetch additional info.')

		results = []
		for n, url in enumerate(urls):
			header = info[n] if add_info else ['URL: {0}'.format(url)]
			data = self.get_stats(url, timespans, sets)
			lines = self.lineify(data, full)

			results.append(header + lines)

		return results if self.raw else lnk.beauty.boxify(results)

[docs]	def get_stats(self, url, timespans, sets):
		"""
		Retrieves the statistics for a single url.

		The statistics returned are for all timespans supplied, filtered
		according to the sets of statistics wanted.

		Note:
			This method works with threads. For each category and each
			timespan, a new request must be made. Each request is made
			in a separate thread.

		Arguments:
			url (str): The relevant URL to fetch statistics for.
			timespans (tuple): A tuple of tuples of the schema (<span>, <unit>),
							   representing the timespans for which to fetch
							   statistics.
			sets (tuple): The sets of statistics wanted in the response
						 (others are discarded).

		Returns:
			A single 'result', which is a dictionary where the keys are
			the sets (categories) and the values a list of the statistics
			that were fetched for each timespan. Each item in this list
			is in turn a dictionary with one key/value pair for the timespan
			and another for the data retrieved in that category,
			for that timespan. This data is either an integer for the 'clicks'
			category, or a list representing each individual data-point that
			could be fetched. Each of those data-points has a 'clicks' key
			whose value is an integer representing the clicks for that
			data-point. A data-point also has another key, which depends on the
			category, e.g. 'country' for the 'countries' category.
		"""
		parameters = {'link': url}
		result = {}
		threads = []
		for endpoint in sets:
			result[endpoint] = []
			for timespan in timespans:

				parameters['unit'] = timespan.unit

				if timespan.unit.endswith('s'):
					# Get rid of the plural s in e.g. 'weeks'
					parameters['unit'] = timespan.unit[:-1]

				parameters['units'] = timespan.span

				self.queue.put((url, endpoint, timespan, parameters))
				threads.append(self.new_thread(self.request, result))
		self.join(threads)

		return result

[docs]	def request(self, results):
		"""
		Requests statistics for a given configuration.

		The URL, endpoint (set/category) and timespan are all fetched from
		a queue, because this method is always run in a separate thread for
		each configuration of the above parameters. The dictionary to which
		to insert the data must be passed as an argument. The data is
		inserted in a thread-safe, locked way.

		Arguments:
			results (dict): The results dictionary for the URL, to which to
							insert the retrieved statistics.
		"""
		url, endpoint, timespan, parameters = self.queue.get()

		response = self.get(self.endpoints[endpoint], parameters)
		what = "retrieve {0} for '{1}'".format(endpoint, url)
		response = self.verify(response, what)

		# For 'clicks' the key has a different name than the endpoint
		e = endpoint if endpoint != 'clicks' else 'link_clicks'
		data = {'timespan': timespan, 'data': response[e]}

		with self.lock:
			results[endpoint].append(data)

[docs]	def get_timespans(self, times, forever):
		"""
		Parses the timespans passed to fetch and returns Timespan objects.

		The timespans passed to the command-line interface and ultimately
		to the fetch() method are of the schema '(<span>, <unit>)'. These
		are parsed into Stats.Timespan objects. Parsing handles cases such
		as where the unit is 'year'. Bitly cannot handle 'year' as a unit,
		so the timespan must be converted into weeks. Also the 'forever' flag
		is handled here. Moreover, this method retrieves the default timespan
		from the configuration file if no timespans were passed to the method
		(and the command on the CLI).

		Arguments:
			times (tuple): The timespans of the schema '(<span>, <unit>)'.
			forever (bool): Whether to include the 'since forever' timespan.

		Returns:
			A list of Stats.Timespan objects.
		"""
		timespans = set()
		if forever:
			# -1 = since forever (unit could be any)
			timespans.add(Stats.Timespan(-1, 'day'))
		if times:
			for span, unit in times:
				if 'year' in unit:
					span *= 12
					unit = 'months'
				timespans.add(Stats.Timespan(span, unit))
		elif not forever:
			# Get the default from the settings
			unit = self.settings['unit']
			span = self.settings['span']
			if not unit or not span:
				raise lnk.errors.InternalError('Default timespan is invalid!')
			if unit == 'forever':
				timespans.add(Stats.Timespan(-1, 'day'))
			elif 'year' in unit:
				timespans.add(Stats.Timespan(span * 12, 'months'))
			else:
				timespans.add(Stats.Timespan(span, unit))

		return timespans

[docs]	def lineify(self, data, full):
		"""
		Turns the data retrieved for a link into a list of lines.

		Each category has a header containing a string-representation of
		the category. For each timespan of a given category, a list is
		created if the category holds multiple data-points. Lines containing
		timespans and data-points are formatted into list-items using ecstasy.

		Arguments:
			data (dictionary): The data (statistics) to lineify. This data
							   should hold key/value pairs where the keys
							   are the sets/categories of data and the
							   values lists of dictionaries for each timespan.
			full (bool): Whether to show full country names or short
						 ISO abbreviations (e.g. 'Germany' or 'DE').
		Returns:
			A list of lines, ready for output.
		"""
		lines = []
		for category, items in data.items():
			lines.append('{0}:'.format(category.title()))
			lines += self.listify(category, items, full)

		return lines

[docs]	def listify(self, category, data, full):
		"""
		Formats data with multiple data-points into a pretty list.

		Arguments:
			category (str): The category to which the data belongs
							(e.g. referrers).
			data (dict): The actual data, as a dictionary.
			full (bool): Whether to show full country names or short
						 ISO abbreviations (e.g. 'Germany' or 'DE').
		Returns:
			A list of lines, ready for output.
		"""
		lines = []
		for result in data:
			timespan = result['timespan']
			items = result['data']
			header = self.get_header(timespan.span, timespan.unit)
			lines.append(header)
			if not items:
				lines[-1] += ' None'
			elif isinstance(items, list):
				for item in items:
					clicks = item.pop('clicks')
					key = list(item.values())[0]
					line = self.format(category, key, clicks, full)
					lines.append(line)
			else:
				# for clicks
				lines[-1] += ' {0}'.format(items)

		return lines

[docs]	def get_header(self, span, unit):
		"""
		Handles formatting of a timespan header.

		This method handles cases such as the 'since forever' timespan, as
		well as the issue with years (years have to be converted to months
		for bitly).

		Arguments:
			span (int): The span of the timespan (e.g. *4* months).
			unit (str): The unit of the timespan (e.g.. 4 *months*).

		Returns:
			A pretty list-item containing the timespan information.
		"""
		if span == -1:
			header = 'Since forever:'
		else:
			# Do this only for year because years have to be
			# converted to months before requesting stats from
			# the API, but it looks weird to the user if he
			# wanted years and got an equivalent number of months
			if unit == 'months' and span % 12 == 0:
				span //= 12
				unit = 'years' if span > 1 else 'year'
			span = '{0} '.format(span) if span > 1 else ''
			header = 'Last {0}{1}:'.format(span, unit)

		return self.list_item.format(header)

	@staticmethod
[docs]	def format(category, key, value, full):
		"""
		Formats a key/value pair for output.

		Handles special cases such as country-name expansion (the API returns
		them as ISO abbreviations, e.g. 'DE', but often the full name,
		e.g. 'Germany', is really wanted)

		Arguments:
			category (str): The category of the data.
			key (str): The key of the data-point.
			value (str): The value of the data-point.
			full (bool): Whether to show full country names or short
						 ISO abbreviations (e.g. 'Germany' or 'DE').

		Returns:
			A pretty list-item.
		"""
		if category == 'countries':
			if key == 'None':
				key = 'Other'
			elif full:
				key = lnk.countries.names[key]
		elif key == 'direct':
			key = key.title()
		line = '   <-> {0}: {1}'.format(key, value)
		pretty = ecstasy.beautify(line, ecstasy.Color.Yellow)

		return pretty