Data visualisation with Vega

We love nice dashboards. And if you see a chart somewhere on a webpage – chances are it runs D3.js. D3.js is a JavaScript library for manipulating documents based on data. It allows you to do a great deal of visualisation but comes with a bit of a learning curve. Even though the data can come in any shape and form, plotting and transformations are JavaScript.

Declarative approach

This is where Vega comes forward. Everything is now a JSON therefore we can literally build and ship visualisations without touching JavaScript at all!

Step by step

Suppose we’ve got hierarchical animal data represented by following JSON:

"values": [
        {"id": "1", "parent": null, "title": "Animal"},
        {"id": "2", "parent": "1", "title": "Duck"},
        {"id": "3", "parent": "1", "title": "Fish"},
        {"id": "4", "parent": "1", "title": "Zebra"}
      ]

What we can then do is to lay the nodes out in a tree-like shape (stratify does the job):

"transform": [
        {
          "type": "stratify",
          "key": "id",
          "parentKey": "parent"
        },
        {
          "type": "tree",
          "method": "tidy",
          "separation": true,
          "size": [{"signal": "width"}, {"signal": "height"}]
        }
      ]

having laid out the nodes, we need to generate connecting lines, treelinks + linkpath combo does exactly that:

{
      "name": "links",
      "source": "tree", // take datasource "tree" as input
      "transform": [
        { "type": "treelinks" }, // apply transform 1
        { "type": "linkpath", // follow up with next transform
          "shape": "diagonal"
          }
      ]
    }

now that we’ve got our data sources, we want to draw actual objects. In Vega these are called marks. For simplicity I’m only drawing one rectangle with a title for each data point and some basic lines to connect:

"marks": [
    {
      "type": "path",
      "from": {"data": "links"}, // dataset we defined above
      "encode": {
        "enter": {
          "path": {"field": "path"} // linkpath generated a dataset with "path" field in it - we just grab it here
        }
      }
    },
    {
      "type": "rect",
      "from": {"data": "tree"},
      "encode": {
        "enter": {
          "stroke": {"value": "black"},
          "width": {"value": 100},
          "height": {"value": 20},
          "x": {"field": "x"},
          "y": {"field": "y"}
        }
      }
    },
    {
      "type": "text",
      "from": {"data": "tree"}, // use data set we defined earlier
      "encode": {
        "enter": {
          "stroke": {"value": "black"},
          "text": {"field": "title"}, // we can use data fields to display actual values
          "x": {"field": "x"}, // use data fields to draw values from
          "y": {"field": "y"},
          "dx": {"value":50}, // offset the mark to appear in rectangle center
          "dy": {"value":13},
          "align": {"value": "center"}
        }
      }
    }
  ]

All in all we arrived at a very basic hierarchical chart. It looks kinda plain and can definitely be improved: the rectangles there should probably be replaced with groups and connection paths will need some work too.

{
  "$schema": "https://vega.github.io/schema/vega/v5.json",
  "width": 800,
  "height": 300,
  "padding": 5,

  "data": [
    {
      "name": "tree",
      "values": [
        {"id": "1", "parent": null, "title": "Animal"},
        {"id": "2", "parent": "1", "title": "Duck"},
        {"id": "3", "parent": "1", "title": "Fish"},
        {"id": "4", "parent": "1", "title": "Zebra"}
      ],
      "transform": [
        {
          "type": "stratify",
          "key": "id",
          "parentKey": "parent"
        },
        {
          "type": "tree",
          "method": "tidy",
          "separation": true,
          "size": [{"signal": "width"}, {"signal": "height"}]
        }
      ]      
    },
    {
      "name": "links",
      "source": "tree",
      "transform": [
        { "type": "treelinks" },
        { "type": "linkpath",
          "shape": "diagonal"
          }
      ]
    }, 
    {
      "name": "tree-boxes",
      "source": "tree",
      "transform": [
          { 
            "type": "filter",
            "expr": "datum.parent == null"
          }
        ]
    },
    {
      "name": "tree-circles",
      "source": "tree",
      "transform": [
        {
          "type": "filter",
          "expr": "datum.parent != null"
        }
      ]
    }
  ],
  "marks": [
    {
      "type": "path",
      "from": {"data": "links"},
      "encode": {
        "enter": {
          "path": {"field": "path"}
        }
      }
    },
    {
      "type": "rect",
      "from": {"data": "tree-boxes"},
      "encode": {
        "enter": {
          "stroke": {"value": "black"},
          "width": {"value": 100},
          "height": {"value": 20},
          "x": {"field": "x"},
          "y": {"field": "y"}
        }
      }
    },
    {
      "type": "symbol",
      "from": {"data": "tree-circles"},
      "encode": {
        "enter": {
          "stroke": {"value": "black"},
          "width": {"value": 100},
          "height": {"value": 20},
          "x": {"field": "x"},
          "y": {"field": "y"}
        }
      }
    },
    {
      "type": "rect",
      "from": {"data": "tree"},
      "encode": {
        "enter": {
          "stroke": {"value": "black"},
          "width": {"value": 100},
          "height": {"value": 20},
          "x": {"field": "x"},
          "y": {"field": "y"}
        }
      }
    },
    {
      "type": "text",
      "from": {"data": "tree"},
      "encode": {
        "enter": {
          "stroke": {"value": "black"},
          "text": {"field": "title"},
          "x": {"field": "x"},
          "y": {"field": "y"},
          "dx": {"value":50},
          "dy": {"value":13},
          "align": {"value": "center"}
        }
      }
    }
  ]
}

Getting a bit fancier

Suppose, we would like to render different shapes for root and leaf nodes of our chart. One way to achieve this will be to add two filter transformations based on your tree dataset and filter them accordingly:

    {
      "name": "tree-boxes",
      "source": "tree", // grab the existing data
      "transform": [
          { 
            "type": "filter",
            "expr": "datum.parent == null" // run it through a filter defined by expression
          }
        ]
    },
    {
      "name": "tree-circles",
      "source": "tree",
      "transform": [
        {
          "type": "filter",
          "expr": "datum.parent != null"
        }
      ]
    }

then instead of rendering all marks as rect we’d want two different shapes for respective transformed datasets:

{
      "type": "rect",
      "from": {"data": "tree-boxes"},
      "encode": {
        "enter": {
          "stroke": {"value": "black"},
          "width": {"value": 100},
          "height": {"value": 20},
          "x": {"field": "x"},
          "y": {"field": "y"}
        }
      }
    },
    {
      "type": "symbol",
      "from": {"data": "tree-circles"},
      "encode": {
        "enter": {
          "stroke": {"value": "black"},
          "width": {"value": 100},
          "height": {"value": 20},
          "x": {"field": "x"},
          "y": {"field": "y"}
        }
      }
    }

Demo time

Play with Vega in live editor here.

T-SQL syntax analysis

It is nice to be able to analyze the code for either code review automation or rule enforcement. While Roslyn is an extremely useful platform for C# code analysis, more often than not, .net applications come backed by SQL Server. And this means one thing…

We need Roslyn for SQL!

We probably are not getting full blown SQL dependency tracking system just yet. But apparently Microsoft does offer a SqlParser library that is very capable of handling most of static code analysis on SQL. Let us run through a reasonably common scenario. Developers rely on feature flags for all new functionality. Product Owner explicitly enables features are they become ready. States are kept in the database and managed via simple scripts. Since we release code multiple times a day, unfinished features can ship to production as long as respective flags are safely turned off. Now our task is to ensure that change scripts only create feature flag definitions but never turn them on by default.

A bit of theory

Parsing language (usually) happens in stages:

  1. Lexical analysis (convert character stream to basic building blocks – tokens)
  2. Syntactic analysis (group tokens into constructs according to selected grammar – parse tree)
  3. Semantic analysis (convert parse tree into abstract syntax tree)

Once we’ve got the Abstract Syntax Tree (AST), we can interact with it and perform certain operations depending on node type and context. Probably the easiest way to traverse such tree would be to implement a Visitor. Both Roslyn and SQL Parser offer stubs to aid implementing this pattern.

Sample scripts

This particular example revolves around two workflows:

-- case 1 - definе new feature flag
INSERT INTO dbo.FeatureFlag ( [Key], Value ) VALUES ( @featureFlagId, N'true')
-- case 2 - update existing flag
UPDATE dbo.FeatureFlag SET Value = 'true' WHERE [Key] = @featureFlagId -- enable/disable flag state

Let’s imagine we’ve got a directory where developers put new scripts. We need to scan it and figure out whether any scripts attempt to update the Value in our table:

using Microsoft.SqlServer.Management.SqlParser.Parser;
......	
	foreach (var file in Directory.EnumerateFiles(@"C:\ChangeScriptsFolder"))
	{
		var result = Parser.Parse(File.ReadAllText(file)); // parse SQL. That's all code we need to get an Abstract Syntax Tree
		var visitor = new FeatureFlagStateVisitor(file); // prep our visitor
		result.Script.Accept(visitor); // run
		if (!visitor.Result.IsSuccess)
		{
			string errorList = visitor.Result.Errors.Aggregate(new StringBuilder(), (sb, error) => sb.AppendLine(error)).ToString();
			throw new Exception($"FeatureFlag state must be disabled on build. Sql text analysis indicates following issues: {errorList}");
		}
	}
.......

And the visitor itself can look something like this:

    public class SqlAnalysisResult
    {
        public bool IsSuccess { get; set; }
        public IList<string> Errors { get; set; }
    }
    public class FeatureFlagStateVisitorContext
    {
        public bool IsFtUpdating { get; set; }
        public bool IsFtInserting { get; set; }
        public string StatementLocation { get; set; }
        public int InsertColumnRef { get; set; }
    }
    public class FeatureFlagStateVisitor: SqlCodeObjectRecursiveVisitor
    {
        public SqlAnalysisResult Result { get; set; }
        private readonly FeatureFlagStateVisitorContext _context;
        private readonly string _fileName;

        public FeatureFlagStateVisitor(string fileName)
        {
            Result = new SqlAnalysisResult
            {
                Errors = new List<string>(),
                IsSuccess = true
            };
            _context = new FeatureFlagStateVisitorContext();
            _fileName = fileName;
        }

        public override void Visit(SqlInsertSpecification codeObject)
        {
            if (codeObject.Target.Sql.CaseInsensitiveContains("FeatureFlag"))
            {
                _context.InsertColumnRef = codeObject.TargetColumns.IndexOf(codeObject.TargetColumns.FirstOrDefault(c => c.ColumnName.Sql.Contains("Value")));
                if (_context.InsertColumnRef >= 0)
                {
                    _context.IsFtInserting = true;
                    _context.StatementLocation = $"L{codeObject.StartLocation.LineNumber}:{codeObject.StartLocation.Offset} - L{codeObject.EndLocation.LineNumber}:{codeObject.EndLocation.Offset}";
                }
            }
            base.Visit(codeObject);
            _context.IsFtInserting = false;
        }

        public override void Visit(SqlRowConstructorExpression codeObject)
        {
            if (_context.IsFtInserting && codeObject.Values[_context.InsertColumnRef].Sql.CaseInsensitiveContains("true"))
            {
                Result.IsSuccess = false;
                Result.Errors.Add($"INSERT {_fileName} - {_context.StatementLocation}");
            }
            base.Visit(codeObject);
        }

        public override void Visit(SqlUpdateSpecification codeObject)
        {
            if (codeObject.Target.Sql.CaseInsensitiveContains("FeatureFlag"))
            {
                _context.IsFtUpdating = true;
                _context.StatementLocation = $"L{codeObject.StartLocation.LineNumber}:{codeObject.StartLocation.Offset} - L{codeObject.EndLocation.LineNumber}:{codeObject.EndLocation.Offset}";
            }
            base.Visit(codeObject);
            _context.IsFtUpdating = false;
        }

        public override void Visit(SqlColumnAssignment codeObject)
        {
            if (_context.IsFtUpdating && codeObject.Column.Sql.CaseInsensitiveContains("Value") && codeObject.Value.Sql.CaseInsensitiveContains("true"))
            {
                Result.IsSuccess = false;
                Result.Errors.Add($"UPDATE {_fileName} - {_context.StatementLocation}");
            }
            base.Visit(codeObject);
        }
    }

The idea is pretty simple – we keep track where we are at the moment and refer back to this context when making final decision on SqlColumnAssignment/SqlRowConstructorExpression level.

Getting started with Roslyn code analysis

It was going to happen eventually – our research on C# dynamic features eventually ended up with an attempt to parse bits of source code. There are quite a few solutions on the market, with NRefactory being our preferred tool over the years. There are however a few limitations: it does not support .NET core and C# 6.

It is a big deal

It might seem, that support for newer language spec is not critical. But in fact, it gets problematic very quickly even in more established projects. Luckily for us, Microsoft has chosen to open source Roslyn – the very engine that powers their compiler services. Their official documentation covers the platform pretty well and goes in great detail of writing Visual Studio code analysers. We however often have to deal with writing MSBuild tasks that load the whole solution and run analysis on class hierarchies (for example, to detect whether a single `SQL SELECT` statement is being called inside a foreach loop – we would fail the build and suggest to replace it with bulk select)

Installing

Roslyn is available via NuGet as a number of Microsoft.CodeAnalysis.* packages. We normally include these four:

Install-Package Microsoft.CodeAnalysis.Workspaces.MSBuild
Install-Package Microsoft.CodeAnalysis
Install-Package Microsoft.CodeAnalysis.CSharp
Install-Package Microsoft.Build # these classes are needed to support MSBuild workspace when it starts to load solution
Install-Package Microsoft.Build.Utilities.Core # these classes are needed to support MSBuild workspace when it starts to load solution
Install-Package Microsoft.Build.Locator # this is a helper to locate correct MSBuild toolchain (in case the machine has more than one installed)

Sometimes the environment gets confused as to what version MSBuild to use, and this is why starting a project with something like this is pretty much a must since VS2015:

// put this somewhere early in the program
if (!MSBuildLocator.IsRegistered) //MSBuildLocator.RegisterDefaults(); // ensures correct version is loaded up
{
    var vs2022 = MSBuildLocator.QueryVisualStudioInstances().Where(x => x.Name == "Visual Studio Community 2022").First(); // find the correct VS setup. There are namy ways to organise logic here, we'll just assume we want VS2022
    MSBuildLocator.RegisterInstance(vs2022); // register the selected instance
    var _ = typeof(Microsoft.CodeAnalysis.CSharp.Formatting.CSharpFormattingOptions); // this ensures library is referenced so the compiler would not try to optimise it away (if dynamically loading assemblies or doing other voodoo that can throw the compiler off) - probably less important than the above but we prefer to follow cargo cult here and leave it be
}

After initial steps, simplistic solution traversal would look something along these lines:

async Task AnalyseSolution()
{
	using (var w = MSBuildWorkspace.Create())
	{
		var solution = await w.OpenSolutionAsync(@"MySolution.sln");		
		foreach (var project in solution.Projects)
		{			
			var docs = project.Documents; // allows for file-level document filtering
			var compilation = await project.GetCompilationAsync(); // allows for assembly-level analysis as well as SemanticModel 
			foreach (var doc in docs)
			{
				var walker = new CSharpSyntaxWalker(); // CSharpSyntaxWalker is an abstract class - we will need to define our own implementation for this to actually work
				walker.Visit(await doc.GetSyntaxRootAsync()); // traverse the syntax tree
			}
		}
	}
}

Syntax Tree Visitor

As with pretty much every single mainstream syntax analyser, the easiest way to traverse syntax trees is by using a Visitor Pattern. It allows to decouple tree nodes and processing logic. Which will allow room for expansion on either side (easy to add new logic, easy to add new tree node types). Roslyn has stub CSharpSyntaxWalker that allows us to only override required nodes for processing. It then takes care of everything else.

With basics out of the way, let’s look into classes that make up our platform here. Top of the hierarchy is MSBuild Workspace followed by Solution, Project and Document. Roslyn makes a distinction between parsing code and compiling it. Meaning some analytics will only be available in Compilation class that is available for project as well as for individual documents down the track.

Traversing the tree

Just loading the solution is kind of pointless though. We’d need to come up with processing logic – and the best place to do it would be a CSharpSyntaxWalker subclass. Suppose, we’d like to determine whether class constructor contains if statements that are driven by parameters. This might mean we’ve got overly complex classes and could benefit from refactoring these out:

public class ConstructorSyntaxWalker : CSharpSyntaxWalker
{
    public List<IParameterSymbol> Parameters { get; set; }
    public int IfConditions { get; set; }
    
    bool processingConstructor = false;

    SemanticModel sm;

    public ConstructorSyntaxWalker(SemanticModel sm)
    {
        this.sm = sm;
        Parameters = new List<IParameterSymbol>();
    }

    public override void VisitConstructorDeclaration(ConstructorDeclarationSyntax node)
    {
        processingConstructor = true;
        base.VisitConstructorDeclaration(node);
        processingConstructor = false;
    }

    public override void VisitIfStatement(IfStatementSyntax node)
    {
        if (!processingConstructor) return; // we only want to keep traversing if we know we're inside constructor body
        Parameters.AddRange(sm.AnalyzeDataFlow(node).DataFlowsIn.Cast<IParameterSymbol>()); // .AnalyzeDataFlow() is one of the most commonly used parts of the platform: it requires a compilation to work off and allows tracking dependencies. We could then check if these parameters are supplied to constructor and make a call whether this is allowed 
        IfConditions++; // just count for now, nothing fancy
        base.VisitIfStatement(node);
    }
}

Then, somewhere in our solution (or any other solution, really!) We have a class definition like so:

public class TestClass
{
    public TestClass(int a, string o) 
    {
        if (a == 1) DoThis() else DoSomethingElse();
        if (o == "a") Foo() else Bar();
    }
}

If we wanted to throw an exception and halt the build we could invoke out SyntaxWalker:

public static async Task Main()
{
    await AnalyseSolution();
}
...
async static Task AnalyseSolution()
{

    using (var w = MSBuildWorkspace.Create())
    {
        var solution = await w.OpenSolutionAsync(@"..\..\..\TestRoslyn.sln"); // let's analyse our own solution. But can be any file on disk
        foreach (var project in solution.Projects)
        {
            var docs = project.Documents; // allows for file-level document filtering
            var compilation = await project.GetCompilationAsync(); // allows for assembly-level analysis as well as SemanticModel 
            foreach (var doc in docs)
            {
                var walker = new ConstructorSyntaxWalker(await doc.GetSemanticModelAsync());
                walker.Visit(await doc.GetSyntaxRootAsync()); // traverse the syntax tree
                if (walker.IfConditions > 0 && walker.Parameters.Any()) throw new Exception("We do not allow branching in constructors.");
            }
        }
    }
}

And there we have it. This is a very simplistic example, but possibilities are endless!

Entity Framework Core 3 – Custom Functions (Using IMethodCallTranslator)

Every now and then Stack Overflow provides fantastic opportunities to learn something new. One user asked whether SQL Server’s DECRYPTBYPASSPHRASE can be implemented with Entity Framework Core 2.2 so they can fetch encrypted strings in SQL.

Continue reading “Entity Framework Core 3 – Custom Functions (Using IMethodCallTranslator)”

EF Core 3: Getting model metadata from dynamically loaded assembly with IL Emit

Yet another Stack Overflow question has sparked a heated discussion and got us thinking whether we can do better.

In a nutshell, the question was about finding a way to query EF Core model metadata without directly referencing the assembly that defines it. Think MsBuild Task that needs to check if your model is following your company standards. Or a test of some sort.

First stab at it

We were able to help the OP by quickly whipping up the following loader code:

var assembly = Assembly.LoadFrom(@"C:\OnlineShoppingStore\bin\Debug\netcoreapp2.2\OnlineShoppingStore.dll");
var contextType = assembly.GetTypes().First(d => d.Name == "OnlineStoreDbContext");
var ctx = Activator.CreateInstance(contextType) as DbContext; // instantiate your context. this will effectively build your model, so you must have all required EF references in your project
var p = ctx.Model.FindEntityType(assembly.GetTypes().First(d => d.Name == "Product")); // get the type from loaded assembly
//var p = ctx.Model.FindEntityType("OnlineStoreDbContext.Product"); // querying model by type name also works, but you'd need to correctly qualify your type names
var pk = p.FindPrimaryKey().Properties.First().Name; // your PK property name as built by EF model

The answer ended up being accepted, but the OP had a bit of an issue with instantiating the Context:

System.InvalidOperationException: 'No database provider has been configured for this DbContext. 
A provider can be configured by overriding the DbContext.OnConfiguring method or by using AddDbContext on the application service provider. 
If AddDbContext is used, then also ensure that your DbContext type accepts a DbContextOptions object in its constructor and passes it to the base constructor for DbContext.

This is kind of expected: when EF creates the context it will invoke OnConfiguring override and set up DB provider with connection strings and so on and so forth. It all is necessary for the actual thing to run, but for the OP it meant having to drag all DB providers into the test harness. Not ideal.

The idea

After a bit back and forth I’ve got an idea. What if we subclass the Context yet again and override the OnConfiguring with a predefined Provider (say, InMemory)?

IL Emit all things

We don’t get to use IL Emit often – it’s meant for pretty specific use cases and I think this is one. The key to getting it right in our case was finding the correct overload of UseInMemoryDatabase. There’s a chance however, that you might need to tweak it to suit your needs. It is pretty trivial once you know what you’re looking for.

public static MethodBuilder OverrideOnConfiguring(this TypeBuilder tb)
        {
            MethodBuilder onConfiguringMethod = tb.DefineMethod("OnConfiguring",
                MethodAttributes.Public
                | MethodAttributes.HideBySig
                | MethodAttributes.NewSlot
                | MethodAttributes.Virtual,
                CallingConventions.HasThis,
                null,
                new[] { typeof(DbContextOptionsBuilder) });

            // the easiest method to pick will be .UseInMemoryDatabase(this DbContextOptionsBuilder optionsBuilder, string databaseName, Action<InMemoryDbContextOptionsBuilder> inMemoryOptionsAction = null)
            // but since constructing generic delegate seems a bit too much effort we'd rather filter everything else out
            var useInMemoryDatabaseMethodSignature = typeof(InMemoryDbContextOptionsExtensions)
                .GetMethods()
                .Where(m => m.Name == "UseInMemoryDatabase")
                .Where(m => m.GetParameters().Length == 3)
                .Where(m => m.GetParameters().Select(p => p.ParameterType).Contains(typeof(DbContextOptionsBuilder)))
                .Where(m => m.GetParameters().Select(p => p.ParameterType).Contains(typeof(string)))
                .Single();
            
            // emits the equivalent of optionsBuilder.UseInMemoryDatabase("test");
            var gen = onConfiguringMethod.GetILGenerator();
            gen.Emit(OpCodes.Ldarg_1);
            gen.Emit(OpCodes.Ldstr, Guid.NewGuid().ToString());
            gen.Emit(OpCodes.Ldnull);
            gen.Emit(OpCodes.Call, useInMemoryDatabaseMethodSignature);
            gen.Emit(OpCodes.Pop);
            gen.Emit(OpCodes.Ret);

            return onConfiguringMethod;
        }

with the above out of the way we now can build our dynamic type and plug it into our test harness!

class Program
    {
        static void Main(string[] args)
        {
            // load assembly under test
            var assembly = Assembly.LoadFrom(@"..\ef-metadata-query\OnlineShoppingStore\bin\Debug\netcoreapp3.1\OnlineShoppingStore.dll");
            var contextType = assembly.GetTypes().First(d => d.Name == "OnlineStoreDbContext");

            // create yet another assembly that will hold our dynamically generated type
            var typeBuilder = AssemblyBuilder
                                .DefineDynamicAssembly(new AssemblyName(Guid.NewGuid().ToString()), AssemblyBuilderAccess.RunAndCollect)
                                .DefineDynamicModule(Guid.NewGuid() + ".dll")
                                .DefineType("InheritedDbContext", TypeAttributes.Public, contextType); // make new type inherit from DbContext under test!

            // this is the key here! now our dummy implementation will kick in!
            var onConfiguringMethod = typeBuilder.OverrideOnConfiguring();
            typeBuilder.DefineMethodOverride(onConfiguringMethod, typeof(DbContext).GetMethod("OnConfiguring", BindingFlags.Instance | BindingFlags.NonPublic));
            
            var inheritedDbContext = typeBuilder.CreateType(); // enough config, let's get the type and roll with it

            // instantiate inheritedDbContext with default OnConfiguring implementation
            var context = Activator.CreateInstance(inheritedDbContext) as DbContext; // instantiate your context. this will effectively build your model, so you must have all required EF references in your project
            var p = context?.Model.FindEntityType(assembly.GetTypes().First(d => d.Name == "Product")); // get the type from loaded assembly
            
            //query the as-built model
            //var p = ctx.Model.FindEntityType("OnlineStoreDbContext.Product"); // querying model by type name also works, but you'd need to correctly qualify your type names
            var pk = p.FindPrimaryKey().Properties.First().Name; // your PK property name as built by EF model
            
            Console.WriteLine(pk);
        }
    }

This is runnable

Source code is available on GitHub in case you want to check it out and play a bit

Custom Routing in .NET WebAPI

We all need to do weird things sometimes. One assignment we’ve got was to implement an API that would totally obfuscate all parameters in a Base64 encoded string. This will clearly go against stock standard routing and action mapping that ASP.NET WebAPI comes with out of the box. But that got us thinking about ways we can achieve it nonetheless.

By default

Normally, the router will:

  1. get the request URI,
  2. match it against given templates (those "{controller}/{action}" things), and
  3. invoke an {action} on {controller} with whatever parameters happen to be passed along

Then we realise

We’re constrained to full .net framework on the project and fancy .net core middleware are not a thing yet. Luckily for us custom Message Handler is a thing so theoretically we could bootstrap ourselves through that and override IHttpControllerSelector (and potentially IHttpActionSelector).

Setup

Writing code directly in global.asax is an option, but as it calls through to WebApiConfig.Register() by default:

 GlobalConfiguration.Configure(WebApiConfig.Register);

it’s probably a better place for things to do with WebAPI.

App_Start/WebApiConfig.cs

    public static class WebApiConfig
    {
        public static void Register(HttpConfiguration config)
        {
            // Web API configuration and services
            // Web API routes
            config.MessageHandlers.Add(new TestHandler()); // if you define a handler here it will kick in for ALL requests coming into your WebAPI (this does not affect MVC pages though)
            config.MapHttpAttributeRoutes();
            config.Services.Replace(typeof(IHttpControllerSelector), new MyControllerSelector(config)); // you likely will want to override some more services to ensure your logic is supported, this is one example

            // your default routes
            config.Routes.MapHttpRoute(name: "DefaultApi", routeTemplate: "api/{controller}/{id}", defaults: new {id = RouteParameter.Optional});

            //a non-overlapping endpoint to distinguish between requests. you can limit your handler to only kick in to this pipeline
            config.Routes.MapHttpRoute(name: "Base64Api", routeTemplate: "apibase64/{query}", defaults: null, constraints: null
                //, handler: new TestHandler() { InnerHandler = new HttpControllerDispatcher(config) } // here's another option to define a handler
            );
        }
    }

and then define our handler:

TestHandler.cs

    public class TestHandler : DelegatingHandler
    {
        protected override async Task<HttpResponseMessage> SendAsync(HttpRequestMessage request, CancellationToken cancellationToken)
        {
            //suppose we've got a URL like so: http://localhost:60290/api/VmFsdWVzCg==
            var b64Encoded = request.RequestUri.AbsolutePath.Remove(0, "/apibase64/".Length);
            byte[] data = Convert.FromBase64String(b64Encoded);
            string decodedString = Encoding.UTF8.GetString(data); // this will decode to values
            request.Headers.Add("controllerToCall", decodedString); // let us say this is the controller we want to invoke
            HttpResponseMessage resp = await base.SendAsync(request, cancellationToken);
            return resp;
        }
    }

Depending on what exactly we want handler to do, we might also have to supply a custom ControllerSelector implementation:

WebApiConfig.cs

// add this line in your Register method
config.Services.Replace(typeof(IHttpControllerSelector), new MyControllerSelector(config));

MyControllerSelector.cs

    public class MyControllerSelector : DefaultHttpControllerSelector
    {
        public MyControllerSelector(HttpConfiguration configuration) : base(configuration)
        {
        }

        public override string GetControllerName(HttpRequestMessage request)
        {
            //this is pretty minimal implementation that examines a header set from TestHandler and returns correct value
            if (request.Headers.TryGetValues("controllerToCall", out var candidates))
                return candidates.First();
            else
            {
                return base.GetControllerName(request);
            }
        }
    }

Applying this in real life?

Pretty neat theory. We however couldn’t quite figure out a way to take it to our customers that wouldn’t raise a few questions on whether we’re doing something shady there.

Programmatically submitting Google Forms with AngularJs

Google Forms is a viable way to do business. We’ve seen a few successful compaines that rely on it for day-to-day operations. The flow would normally involve users entering data on the go and someone at the back office analysing the responses with Google Spreadsheet.

Forms are flexible

One huge selling point if that we can design our own forms for all kinds of situations: racing bets, work time/attendance, baby feeding – we’ve seen a few exotic cases. Static form data is not enough, we can opt for Google Apps script.

One thing remains the same though

Look and feel of Google forms and default validations do leave much to be desired. What if there was a way to Swap the form UI out for a custom branded SPA with fancy lookaheads and what not?

There is a way

Surely, it all starts with making a form. We’ll go to google forms are design a new one. Expect to spend some time getting it right for your needs. For purposes of this demo we’ll be submitting a table (we’d cheat a bit and post JSON only though).

We’d also ensure that answers get submitted into a new spreadsheet:

Now we need to grab field names Google generated for a form (it is a simple HTML form after all!). Open up form preview, and go to dev tools console in the new tab

And run the following snippet in the console and note the outputs:

document.querySelectorAll('form').forEach((x) => {console.log(x.action)});
document.querySelectorAll('[name^="entry."]').forEach((x) => {console.log(x.name + '=' + x.closest('[role="listitem"]').querySelector('[role="heading"]').innerText)})

Oh, and one more thing…

Well, we’ve got the fields, but to succesfully submit the it we need to know where to submit to. Apparently, it’s a simple matter of picking up the form Id and crafting a URL: https://docs.google.com/forms/d/<your id here>/formResponse

And we are done…almost

Dissecting Google forms was fun. Now we need to somehow build our own frontend to the form. For our specific use case we wanted to show off how we would go about submitting a table dynamically populated with content. As I’ve got a soft spot for AngularJs, I figured I might as well got for it.

Building a custom form

There’s plenty of resources online on how to build SPAs, so I’d not elaborate much on that. There’s however a couple of considerations that in my opinion will make the form submission process seamless for an SPA experience. First and foremost – we’d like to stay on the same page when forms gets sent away – we’d also like to get notified when form gets submitted so our SPA can take own action. One way to do it is to submit a from into a hidden iframe and use its onLoad event to report back (that’s the method I ended up implementing in the example snippet).

Talk is cheap, show me the code

Working example of this technique can be found here: https://codepen.io/timur_kh/pen/oNXYNdL

Making Swagger to get the authorization token from URL query string

Swagger is extremely useful when developing and debugging Web APIs. Some dev environments however got a bit of security added on top which can get a bit too painful to work around.

Enter API key

It doesn’t need to be tedious! We’ll be looking at overriding Swagger-UI’s index page so we can plug a custom handler into onComplete callback. Solution is extremely simple:

  1. Grab latest index.html from Swashbuckle’s source repo (ideally, get the matching version)
  2. Tweak configObject to add an OnComplete callback handler so it will call preauthorizeApiKey when the UI is ready
  3. Override IndexStream in UserSwaggerUI extension method to serve the custom html

I ended up having the following setup (some bits are omitted for brevity):

wwwroot/swashbuckle.html

<!-- your standard HTML here, nothing special -->
<script>
    // some boilerplate initialisation
    // Begin Swagger UI call region
    configObject.onComplete = () => {

        // get the authorization portion of the query string
        var urlParams = new URLSearchParams(window.location.search);
        if (urlParams.has('authorization')) {
            var apikey = urlParams.get('authorization');

            // this is the important bit, see documentation
            ui.preauthorizeApiKey('api key', apikey );// key name must match the one you defined in AddSecurityDefinition method in Startup.cs
       }
    }
    const ui = SwaggerUIBundle(configObject);
    window.ui = ui        
}
</script>

Startup.cs

    public void ConfigureServices(IServiceCollection services)
    {
        .........
        services.AddSwaggerGen(c => {
            c.SwaggerDoc("v1", new Info { Title = "You api title", Version = "v1" });
            c.AddSecurityDefinition("api key", new ApiKeyScheme() // key name must match the one you supply to preauthorizeApiKey call in JS
            {
                Description = "Authorization query string expects API key",
                In = "query",
                Name = "authorization",
                Type = "apiKey"
            });

            var requirements = new Dictionary<string, IEnumerable<string>> {
                { "api key", new List<string>().AsEnumerable() }
            };
            c.AddSecurityRequirement(requirements);
        });
    }

    // This method gets called by the runtime. Use this method to configure the HTTP request pipeline.
    public void Configure(IApplicationBuilder app, IHostingEnvironment env)
    {
        app.UseSwagger();
        app.UseSwaggerUI(c =>
        {
            c.IndexStream = () => File.OpenRead("wwwroot/swashbuckle.html"); // this is the important bit. see documentation https://github.com/domaindrivendev/Swashbuckle.AspNetCore/blob/master/README.md
            c.SwaggerEndpoint("/swagger/v1/swagger.json", "My API V1"); // very standard Swashbuckle init
        });
        app.UseMvc();
    }

After having finished all that, calling the standard swagger URL with ?authorization=1234567890 should automatically authorize the page.

Integration testing aide for MVC core routing

Sometimes unit tests just don’t cut it. This is where integration tests come in. This however brings a whole new set of issues with finding the beast way to isolate the aspects under test and mock everything else away.

Problem statement

Suppose, we’ve got an api and a test that needs to make an http call to our api endpoint, like so:

 [ApiController]
  public class TestController : ControllerBase {

    public IActionResult OkTest() {
      return Ok(true);
    }
  }
.....
public class TestControllerTests {

    private readonly HttpClient _client;

    public TestControllerTests() {
      _client = TestSetup.GetTestClient();
    }

    [Test]
    public async Task OkTest() {
      var path = GetPathHere(nameof(OkTest)); // should return "/api/test/oktest".
      var response = await _client.GetAsync(path);
      response.EnsureSuccessStatusCode();
    }
}

Solution

Knowing that ASP.NET Core comes with such a lightweight package now, and exposes so many extensibility points, one approach we found efficient was to build up the whole Host and query its properties:

private string GetPathHere(string actionName)
    {
        var host = Program.CreateWebHostBuilder(new string[] { }).Build();
        host.Start();
        IActionDescriptorCollectionProvider provider = (host.Services as ServiceProvider).GetService<IActionDescriptorCollectionProvider>();
        return provider.ActionDescriptors.Items.First(i => (i as ControllerActionDescriptor)?.ActionName == actionName).AttributeRouteInfo.Template;
    }

    [TestMethod]
    public void OkTestShouldBeFine()
    {
        var path = GetPathHere(nameof(ValuesController.OkTest)); // "api/test/oktest"
    }

Applicability

This is pretty basic case we’ve been dealing with, and the code makes quite a few assumptions. This approach however seems to hold up pretty well and surely will be our starting point next time round we test MVC actions!

Moq-ing around existing instance

We love unit testing! Seriously, it makes sense if you consider how many times a simple test had saved us from having to revisit that long forgotten project we’ve already moved on from. Not fun and not good for business.

Moq: our tool of choice

To be able to only test the code we want, we need to isolate it. Of course, there’s heaps libraries for that already. Moq is just one of them. It allows of to create objects based on given interfaces and set up the expected behaviour in a way that we can abstract away all code we don’t currently test. Extremely powerful tool

Sometimes you just need a bit more of that

Suppose we’re testing a object that depends on internal state that’s tricky to abstract away. We’d however like to use Mock to replace one operation without changing the others:

public class A
{
    public string Test {get;set;}
    public virtual string ReturnTest() => Test;
}
//and some code below:
void Main()
{
    var config = new A() {
        Test = "TEST"
    } ;

    var mockedConfig = new Mock<A>(); // first we run a stock standard mock
    mockedConfig.CallBase = true; // we will enable CallBase just to point out that it makes no difference  
    var o = mockedConfig.Object;
    Console.WriteLine(o.ReturnTest()); // this will be null because Test has not been initialised from constructor
    mockedConfig.Setup(c => c.ReturnTest()).Returns("mocked"); // of course if you set up your mocks - you will get the value
    Console.WriteLine(o.ReturnTest()); // this will be "mocked" now, no surprises
}

The code above illustrates the problem quite nicely. You’ll know if this is your case when you see it.

General sentiment towards these problems

“It can’t be done, use something else”, they say. Some people on StackOverflow suggest to ditch Moq completely and go for it’s underlying technology Castle DynamicProxy. And it is a valid idea – create a proxy class around yours and intercept calls to the method under test. Easy!

Kinda easy

One advantage or Moq (which is by the way built on top of Castle DynamicProxy) is that it’s not just creating mock objects, but also tracks invocations and allows us to verify those later. Of course, we could opt to write the reuiqred bits ourselves, but why reinvent the wheel and introduce so much code that no one will maintain?

How about we mix and match?

We know that Moq internally leverages Castle DynamicProxy and it actually allows us to generate proxies for instances (they call it Class proxy with target). Therefore the question is – how do we get Moq to make one for us. It seems there’s no such option out of the box, and simply injecting the override didn’t quite go well as there’s not much inversion of control inside the library and most of the types and properties are marked as internal, making inheritance virtually impossible.

Castle Proxy is however much more user firendly and has quite a few methods exposed and available for overriding. So let us define a ProxyGenerator class that would take the method Moq calls and add required functionality to it (just compare CreateClassProxyWithTarget and CreateClassProxy implementations – they are almost identical!)

MyProxyGenerator.cs

class MyProxyGenerator : ProxyGenerator
{
    object _target;

    public MyProxyGenerator(object target) {
        _target = target; // this is the missing piece, we'll have to pass it on to Castle proxy
    }
    // this method is 90% taken from the library source. I only had to tweak two lines (see below)
    public override object CreateClassProxy(Type classToProxy, Type[] additionalInterfacesToProxy, ProxyGenerationOptions options, object[] constructorArguments, params IInterceptor[] interceptors)
    {
        if (classToProxy == null)
        {
            throw new ArgumentNullException("classToProxy");
        }
        if (options == null)
        {
            throw new ArgumentNullException("options");
        }
        if (!classToProxy.GetTypeInfo().IsClass)
        {
            throw new ArgumentException("'classToProxy' must be a class", "classToProxy");
        }
        CheckNotGenericTypeDefinition(classToProxy, "classToProxy");
        CheckNotGenericTypeDefinitions(additionalInterfacesToProxy, "additionalInterfacesToProxy");
        Type proxyType = CreateClassProxyTypeWithTarget(classToProxy, additionalInterfacesToProxy, options); // these really are the two lines that matter
        List<object> list =  BuildArgumentListForClassProxyWithTarget(_target, options, interceptors);       // these really are the two lines that matter
        if (constructorArguments != null && constructorArguments.Length != 0)
        {
            list.AddRange(constructorArguments);
        }
        return CreateClassProxyInstance(proxyType, list, classToProxy, constructorArguments);
    }
}

if all of the above was relativaly straightforward, actually feeding it into Moq is going to be somewhat of a hack. As I mentioned, most of the structures are marked internal so we’ll have to use reflection to get through:

MyMock.cs

public class MyMock<T> : Mock<T>, IDisposable where T : class
{
    void PopulateFactoryReferences()
    {
        // Moq tries ridiculously hard to protect their internal structures - pretty much every class that could be of interest to us is marked internal
        // All below code is basically serving one simple purpose = to swap a `ProxyGenerator` field on the `ProxyFactory.Instance` singleton
        // all types are internal so reflection it is
        // I will invite you to make this a bit cleaner by obtaining the `_generatorFieldInfo` value once and caching it for later
        var moqAssembly = Assembly.Load(nameof(Moq));
        var proxyFactoryType = moqAssembly.GetType("Moq.ProxyFactory");
        var castleProxyFactoryType = moqAssembly.GetType("Moq.CastleProxyFactory");     
        var proxyFactoryInstanceProperty = proxyFactoryType.GetProperty("Instance");
        _generatorFieldInfo = castleProxyFactoryType.GetField("generator", BindingFlags.NonPublic | BindingFlags.Instance);     
        _castleProxyFactoryInstance = proxyFactoryInstanceProperty.GetValue(null);
        _originalProxyFactory = _generatorFieldInfo.GetValue(_castleProxyFactoryInstance);//save default value to restore it later
    }

    public MyMock(T targetInstance) {       
        PopulateFactoryReferences();
        // this is where we do the trick!
        _generatorFieldInfo.SetValue(_castleProxyFactoryInstance, new MyProxyGenerator(targetInstance));
    }

    private FieldInfo _generatorFieldInfo;
    private object _castleProxyFactoryInstance;
    private object _originalProxyFactory;

    public void Dispose()
    {
         // you will notice I opted to implement IDisposable here. 
         // My goal is to ensure I restore the original value on Moq's internal static class property in case you will want to mix up this class with stock standard implementation
         // there are probably other ways to ensure reference is restored reliably, but I'll leave that as another challenge for you to tackle
        _generatorFieldInfo.SetValue(_castleProxyFactoryInstance, _originalProxyFactory);
    }
}

Then, given we’ve got the above working, the actual solution would look like so:

    var config = new A()
    {
        Test = "TEST"
    };
    using (var superMock = new MyMock<A>(config)) // now we can pass instances!
    {
        superMock.CallBase = true; // you still need this, because as far as Moq is oncerned it passes control over to CastleDynamicProxy   
        var o1 = superMock.Object;
        Console.WriteLine(o1.ReturnTest()); // but this should return TEST
    }