Source code for n6sdk.data_spec

# -*- coding: utf-8 -*-

# Copyright (c) 2013-2014 NASK. All rights reserved.

"""
.. note::

   For basic information how to use the classes defined in this module
   -- please consult the :ref:`data_spec_class` chapter of the tutorial.
"""


import collections

from pyramid.decorator import reify

from n6sdk.data_spec.fields import (
    Field,
    AddressField,
    AnonymizedIPv4Field,
    ASNField,
    CCField,
    DateTimeField,
    DomainNameField,
    DomainNameSubstringField,
    IntegerField,
    IPv4Field,
    IPv4NetField,
    MD5Field,
    PortField,
    SHA1Field,
    SourceField,
    UnicodeEnumField,
    UnicodeLimitedField,
    URLField,
    URLSubstringField,
)
from n6sdk.exceptions import (
    FieldValueError,
    ParamKeyCleaningError,
    ParamValueCleaningError,
    ResultKeyCleaningError,
    ResultValueCleaningError,
    _KeyCleaningErrorMixin,
)



#
# Constants

#: A tuple of network incident data distribution restriction qualifiers
#: -- used in the :attr:`DataSpec.restriction` field specification.
RESTRICTION_ENUMS = (
    'public', 'need-to-know', 'internal',
)

#: A tuple of network incident data confidence qualifiers
#: -- used in the :attr:`DataSpec.confidence` field specification.
CONFIDENCE_ENUMS = (
    'low', 'medium', 'high',
)

#: A tuple of network incident category labels
#: -- used in the :attr:`DataSpec.category` field specification.
CATEGORY_ENUMS = (
    'bots',
    'cnc',
    'dos-attacker',
    'dos-victim',
    'malurl',
    'phish',
    'proxy',
    'resolver',
    'sandbox-url',
    'scanning',
    'server-exploit',
    'spam',
    'spam-url',
    'tor',
    'other',
)

#: A tuple of network incident layer-#4-protocol labels
#: -- used in the :attr:`DataSpec.proto` field specification.
PROTO_ENUMS = (
    'tcp', 'udp', 'icmp',
)

#: A tuple of network incident origin labels
#: -- used in the :attr:`DataSpec.origin` field specification.
ORIGIN_ENUMS = (
    'c2',
    'dropzone',
    'proxy',
    'p2p-crawler',
    'p2p-drone',
    'sinkhole',
    'sandbox',
    'honeypot',
    'darknet',
    'av',
    'ids',
    'waf',
)

#: A tuple of black list item status qualifiers
#: -- used in the :attr:`DataSpec.status` field specification.
STATUS_ENUMS = (
    'active', 'delisted', 'expired', 'replaced',
)



#
# The abstract base class

[docs]class BaseDataSpec(object): """ The base class for data specification classes. Typically, it should not be subclassed directly -- use :class:`DataSpec` instead. """ def __init__(self, **kwargs): self._all_param_fields = {} self._required_param_fields = {} self._single_param_fields = {} self._all_result_fields = {} self._required_result_fields = {} self._set_fields() super(BaseDataSpec, self).__init__(**kwargs) # # public properties @reify
[docs] def all_keys(self): """ Instance property: a :class:`frozenset` of all keys. (Includes all legal parameter names and result keys.) """ return self.all_param_keys | self.all_result_keys
@reify
[docs] def all_param_keys(self): """ Instance property: a :class:`frozenset` of all legal parameter names. """ return frozenset(self._all_param_fields)
@reify
[docs] def all_result_keys(self): """ Instance property: a :class:`frozenset` of all legal result keys. """ return frozenset(self._all_result_fields) # # public methods (possibly extendable)
[docs] def clean_param_dict(self, params, # optional keyword arguments: ignored_keys=(), forbidden_keys=(), extra_required_keys=(), discarded_keys=()): keys = self._clean_keys( params.viewkeys() - frozenset(ignored_keys), self._all_param_fields.viewkeys() - frozenset(forbidden_keys), self._required_param_fields.viewkeys() | frozenset(extra_required_keys), frozenset(discarded_keys), exc_class=ParamKeyCleaningError) return dict(self._iter_clean_param_items(params, keys))
[docs] def clean_param_keys(self, params, # optional keyword arguments: ignored_keys=(), forbidden_keys=(), extra_required_keys=(), discarded_keys=()): return self._clean_keys( params.viewkeys() - frozenset(ignored_keys), self._all_param_fields.viewkeys() - frozenset(forbidden_keys), self._required_param_fields.viewkeys() | frozenset(extra_required_keys), frozenset(discarded_keys), exc_class=ParamKeyCleaningError)
[docs] def param_field_specs(self, which='all', multi=True, single=True): field_items = self._filter_by_which(which, self._all_param_fields, self._required_param_fields) if not multi: field_items &= self._single_param_fields.viewitems() if not single: field_items -= self._single_param_fields.viewitems() return dict(field_items)
[docs] def clean_result_dict(self, result, # optional keyword arguments: ignored_keys=(), forbidden_keys=(), extra_required_keys=(), discarded_keys=()): keys = self._clean_keys( result.viewkeys() - frozenset(ignored_keys), self._all_result_fields.viewkeys() - frozenset(forbidden_keys), self._required_result_fields.viewkeys() | frozenset(extra_required_keys), frozenset(discarded_keys), exc_class=ResultKeyCleaningError) return dict(self._iter_clean_result_items(result, keys))
[docs] def clean_result_keys(self, result, # optional keyword arguments: ignored_keys=(), forbidden_keys=(), extra_required_keys=(), discarded_keys=()): return self._clean_keys( result.viewkeys() - frozenset(ignored_keys), self._all_result_fields.viewkeys() - frozenset(forbidden_keys), self._required_result_fields.viewkeys() | frozenset(extra_required_keys), frozenset(discarded_keys), exc_class=ResultKeyCleaningError)
[docs] def result_field_specs(self, which='all'): return dict(self._filter_by_which(which, self._all_result_fields, self._required_result_fields)) # # overridable/extendable methods
[docs] def get_adjusted_field(self, key, field, ext=None): if ext is not None: field = ext.make_extended_field(field) return field # # non-public internals
def _set_fields(self): key_to_field = {} for key, field in self._iter_all_field_specs(): key = key.decode('ascii') key_to_field[key] = field if field.in_params is not None: self._all_param_fields[key] = field if field.in_params == 'required': self._required_param_fields[key] = field else: assert field.in_params == 'optional' if field.single_param: self._single_param_fields[key] = field if field.in_result is not None: self._all_result_fields[key] = field if field.in_result == 'required': self._required_result_fields[key] = field else: assert field.in_result == 'optional' # making all fields (including those Ext-updated) # accessible also as instance attributes vars(self).update(key_to_field) def _iter_all_field_specs(self): key_to_ext = collections.defaultdict(Ext) seen_keys = set() attr_containers = (self,) + self.__class__.__mro__ for ac in attr_containers: for key, obj in vars(ac).iteritems(): if isinstance(obj, Ext): key_to_ext[key].nondestructive_update(obj) continue if key in seen_keys: continue seen_keys.add(key) if isinstance(obj, Field): field_ext = key_to_ext.get(key) field = self.get_adjusted_field(key, obj, field_ext) yield key, field for extra in self._iter_extra_param_specs(key, field): yield extra def _iter_extra_param_specs(self, key, parent_field): for key_suffix, xfield in parent_field.extra_params.iteritems(): if xfield is None: # field was masked ("removed") using Ext, e.g. in a subclass continue if not isinstance(xfield, Field): raise TypeError('{!r} is not a {!r} instance' .format(xfield, Field)) xkey = '{}.{}'.format(key, key_suffix) xfield = self.get_adjusted_field(xkey, xfield) yield xkey, xfield # recursive yielding: for extra in self._iter_extra_param_specs(xkey, xfield): yield extra @staticmethod def _clean_keys(keys, legal_keys, required_keys, discarded_keys, exc_class): illegal_keys = keys - legal_keys missing_keys = required_keys - keys if illegal_keys or missing_keys: assert issubclass(exc_class, _KeyCleaningErrorMixin) raise exc_class(illegal_keys, missing_keys) return {key.decode('ascii') for key in (keys - discarded_keys)} def _iter_clean_param_items(self, params, keys): error_info_seq = [] for key in keys: assert key in self._all_param_fields assert key in params field = self._all_param_fields[key] raw_value = params[key] param_values = field._split_raw_param_value(raw_value) assert param_values and type(param_values) is list assert hasattr(field, 'single_param') try: if field.single_param and len(param_values) > 1: raise FieldValueError(public_message=( u'Multiple values for a single-value-only field.')) yield key, [field.clean_param_value(value) for value in param_values] except Exception as exc: error_info_seq.append((key, raw_value, exc)) if error_info_seq: raise ParamValueCleaningError(error_info_seq) def _iter_clean_result_items(self, result, keys): error_info_seq = [] for key in keys: assert key in self._all_result_fields assert key in result field = self._all_result_fields[key] value = result[key] try: yield key, field.clean_result_value(value) except Exception as exc: error_info_seq.append((key, value, exc)) if error_info_seq: raise ResultValueCleaningError(error_info_seq) @staticmethod def _filter_by_which(which, all_fields, required_fields): # select fields that match the `which` argument if which == 'all': return all_fields.viewitems() elif which == 'required': return required_fields.viewitems() elif which == 'optional': return all_fields.viewitems() - required_fields.viewitems() else: raise ValueError("{!r} is not one of: 'all', 'required', 'optional'" .format(which)) # # The concrete base class
[docs]class DataSpec(BaseDataSpec): """ The basic, ready-to-use, data specification class. You can use it directly or inherit from it. """ # # Identification, categorization and other event metadata id = UnicodeLimitedField( in_params='optional', in_result='required', max_length=64, ) source = SourceField( in_params='optional', in_result='required', ) restriction = UnicodeEnumField( in_params='optional', in_result='required', enum_values=RESTRICTION_ENUMS, ) confidence = UnicodeEnumField( in_params='optional', in_result='required', enum_values=CONFIDENCE_ENUMS, ) category = UnicodeEnumField( in_params='optional', in_result='required', enum_values=CATEGORY_ENUMS, ) time = DateTimeField( in_params=None, in_result='required', extra_params=dict( min=DateTimeField( # `time.min` in_params='optional', single_param=True, ), max=DateTimeField( # `time.max` in_params='optional', single_param=True, ), ), ) origin = UnicodeEnumField( in_params='optional', in_result='optional', enum_values=ORIGIN_ENUMS, ) name = UnicodeLimitedField( in_params='optional', in_result='optional', max_length=255, ) target = UnicodeLimitedField( in_params='optional', in_result='optional', max_length=100, ) # # An `address` is a list of dicts containing `ip` + optionally: `asn`, `cc` address = AddressField( in_params=None, in_result='optional', ) # # Query params related to the components of `address` items ip = IPv4Field( in_params='optional', in_result=None, extra_params=dict( net=IPv4NetField( # `ip.net` in_params='optional', ), ), ) asn = ASNField( in_params='optional', in_result=None, ) cc = CCField( in_params='optional', in_result=None, ) # # Other "technical" event properties url = URLField( in_params='optional', in_result='optional', extra_params=dict( sub=URLSubstringField( # `url.sub` in_params='optional', ), ), ) fqdn = DomainNameField( in_params='optional', in_result='optional', extra_params=dict( sub=DomainNameSubstringField( # `fqdn.sub` in_params='optional', ), ), ) proto = UnicodeEnumField( in_params='optional', in_result='optional', enum_values=PROTO_ENUMS, ) sport = PortField( in_params='optional', in_result='optional', ) dport = PortField( in_params='optional', in_result='optional', ) dip = IPv4Field( in_params='optional', in_result='optional', ) adip = AnonymizedIPv4Field( in_params=None, in_result='optional', ) md5 = MD5Field( in_params='optional', in_result='optional', ) sha1 = SHA1Field( in_params='optional', in_result='optional', ) # # Others... expires = DateTimeField( in_params=None, in_result='optional', ) active = Field( in_params=None, in_result=None, extra_params=dict( min=DateTimeField( # `active.min` in_params='optional', single_param=True, ), max=DateTimeField( # `active.max` in_params='optional', single_param=True, ), ), ) status = UnicodeEnumField( in_params='optional', in_result='optional', enum_values=STATUS_ENUMS, ) replaces = UnicodeLimitedField( in_params='optional', in_result='optional', max_length=64, ) until = DateTimeField( in_params=None, in_result='optional', ) count = IntegerField( in_params=None, in_result='optional', min_value=0, max_value=(2 ** 15 - 1), ) # # Auxiliary classes
[docs]class Ext(dict): """ A :class:`dict`-like class for extending field properties in :class:`DataSpec` subclasses. """ def __repr__(self): return '{}({})'.format(self.__class__.__name__, super(Ext, self).__repr__())
[docs] def copy(self): return self.__class__(self)
[docs] def make_extended_field(self, field): merged_init_kwargs = self.copy() merged_init_kwargs.nondestructive_update(field._init_kwargs) return field.__class__(**merged_init_kwargs)
[docs] def nondestructive_update(self, other): if isinstance(other, collections.Mapping): other = other.iteritems() for key, value in other: stored_value = self.setdefault(key, value) if (stored_value is not value) and isinstance(stored_value, Ext): if isinstance(value, Field): self[key] = stored_value.make_extended_field(value) elif isinstance(value, collections.Mapping): merged_value = stored_value.copy() merged_value.nondestructive_update(value) self[key] = merged_value