@@ -310,6 +310,224 @@ static signed char *compute_wNAF(const BIGNUM *scalar, int w, size_t *ret_len)
310310 return r ;
311311}
312312
313+ #define EC_POINT_BN_set_flags (P , flags ) do { \
314+ BN_set_flags(&(P)->X, (flags)); \
315+ BN_set_flags(&(P)->Y, (flags)); \
316+ BN_set_flags(&(P)->Z, (flags)); \
317+ } while(0)
318+
319+ /*-
320+ * This functions computes (in constant time) a point multiplication over the
321+ * EC group.
322+ *
323+ * At a high level, it is Montgomery ladder with conditional swaps.
324+ *
325+ * It performs either a fixed scalar point multiplication
326+ * (scalar * generator)
327+ * when point is NULL, or a generic scalar point multiplication
328+ * (scalar * point)
329+ * when point is not NULL.
330+ *
331+ * scalar should be in the range [0,n) otherwise all constant time bets are off.
332+ *
333+ * NB: This says nothing about EC_POINT_add and EC_POINT_dbl,
334+ * which of course are not constant time themselves.
335+ *
336+ * The product is stored in r.
337+ *
338+ * Returns 1 on success, 0 otherwise.
339+ */
340+ static int ec_mul_consttime (const EC_GROUP * group , EC_POINT * r ,
341+ const BIGNUM * scalar , const EC_POINT * point ,
342+ BN_CTX * ctx )
343+ {
344+ int i , cardinality_bits , group_top , kbit , pbit , Z_is_one ;
345+ EC_POINT * s = NULL ;
346+ BIGNUM * k = NULL ;
347+ BIGNUM * lambda = NULL ;
348+ BIGNUM * cardinality = NULL ;
349+ BN_CTX * new_ctx = NULL ;
350+ int ret = 0 ;
351+
352+ if (ctx == NULL && (ctx = new_ctx = BN_CTX_new ()) == NULL )
353+ return 0 ;
354+
355+ BN_CTX_start (ctx );
356+
357+ s = EC_POINT_new (group );
358+ if (s == NULL )
359+ goto err ;
360+
361+ if (point == NULL ) {
362+ if (!EC_POINT_copy (s , group -> generator ))
363+ goto err ;
364+ } else {
365+ if (!EC_POINT_copy (s , point ))
366+ goto err ;
367+ }
368+
369+ EC_POINT_BN_set_flags (s , BN_FLG_CONSTTIME );
370+
371+ cardinality = BN_CTX_get (ctx );
372+ lambda = BN_CTX_get (ctx );
373+ k = BN_CTX_get (ctx );
374+ if (k == NULL || !BN_mul (cardinality , & group -> order , & group -> cofactor , ctx ))
375+ goto err ;
376+
377+ /*
378+ * Group cardinalities are often on a word boundary.
379+ * So when we pad the scalar, some timing diff might
380+ * pop if it needs to be expanded due to carries.
381+ * So expand ahead of time.
382+ */
383+ cardinality_bits = BN_num_bits (cardinality );
384+ group_top = cardinality -> top ;
385+ if ((bn_wexpand (k , group_top + 2 ) == NULL )
386+ || (bn_wexpand (lambda , group_top + 2 ) == NULL ))
387+ goto err ;
388+
389+ if (!BN_copy (k , scalar ))
390+ goto err ;
391+
392+ BN_set_flags (k , BN_FLG_CONSTTIME );
393+
394+ if ((BN_num_bits (k ) > cardinality_bits ) || (BN_is_negative (k ))) {
395+ /*-
396+ * this is an unusual input, and we don't guarantee
397+ * constant-timeness
398+ */
399+ if (!BN_nnmod (k , k , cardinality , ctx ))
400+ goto err ;
401+ }
402+
403+ if (!BN_add (lambda , k , cardinality ))
404+ goto err ;
405+ BN_set_flags (lambda , BN_FLG_CONSTTIME );
406+ if (!BN_add (k , lambda , cardinality ))
407+ goto err ;
408+ /*
409+ * lambda := scalar + cardinality
410+ * k := scalar + 2*cardinality
411+ */
412+ kbit = BN_is_bit_set (lambda , cardinality_bits );
413+ BN_consttime_swap (kbit , k , lambda , group_top + 2 );
414+
415+ group_top = group -> field .top ;
416+ if ((bn_wexpand (& s -> X , group_top ) == NULL )
417+ || (bn_wexpand (& s -> Y , group_top ) == NULL )
418+ || (bn_wexpand (& s -> Z , group_top ) == NULL )
419+ || (bn_wexpand (& r -> X , group_top ) == NULL )
420+ || (bn_wexpand (& r -> Y , group_top ) == NULL )
421+ || (bn_wexpand (& r -> Z , group_top ) == NULL ))
422+ goto err ;
423+
424+ /* top bit is a 1, in a fixed pos */
425+ if (!EC_POINT_copy (r , s ))
426+ goto err ;
427+
428+ EC_POINT_BN_set_flags (r , BN_FLG_CONSTTIME );
429+
430+ if (!EC_POINT_dbl (group , s , s , ctx ))
431+ goto err ;
432+
433+ pbit = 0 ;
434+
435+ #define EC_POINT_CSWAP (c , a , b , w , t ) do { \
436+ BN_consttime_swap(c, &(a)->X, &(b)->X, w); \
437+ BN_consttime_swap(c, &(a)->Y, &(b)->Y, w); \
438+ BN_consttime_swap(c, &(a)->Z, &(b)->Z, w); \
439+ t = ((a)->Z_is_one ^ (b)->Z_is_one) & (c); \
440+ (a)->Z_is_one ^= (t); \
441+ (b)->Z_is_one ^= (t); \
442+ } while(0)
443+
444+ /*-
445+ * The ladder step, with branches, is
446+ *
447+ * k[i] == 0: S = add(R, S), R = dbl(R)
448+ * k[i] == 1: R = add(S, R), S = dbl(S)
449+ *
450+ * Swapping R, S conditionally on k[i] leaves you with state
451+ *
452+ * k[i] == 0: T, U = R, S
453+ * k[i] == 1: T, U = S, R
454+ *
455+ * Then perform the ECC ops.
456+ *
457+ * U = add(T, U)
458+ * T = dbl(T)
459+ *
460+ * Which leaves you with state
461+ *
462+ * k[i] == 0: U = add(R, S), T = dbl(R)
463+ * k[i] == 1: U = add(S, R), T = dbl(S)
464+ *
465+ * Swapping T, U conditionally on k[i] leaves you with state
466+ *
467+ * k[i] == 0: R, S = T, U
468+ * k[i] == 1: R, S = U, T
469+ *
470+ * Which leaves you with state
471+ *
472+ * k[i] == 0: S = add(R, S), R = dbl(R)
473+ * k[i] == 1: R = add(S, R), S = dbl(S)
474+ *
475+ * So we get the same logic, but instead of a branch it's a
476+ * conditional swap, followed by ECC ops, then another conditional swap.
477+ *
478+ * Optimization: The end of iteration i and start of i-1 looks like
479+ *
480+ * ...
481+ * CSWAP(k[i], R, S)
482+ * ECC
483+ * CSWAP(k[i], R, S)
484+ * (next iteration)
485+ * CSWAP(k[i-1], R, S)
486+ * ECC
487+ * CSWAP(k[i-1], R, S)
488+ * ...
489+ *
490+ * So instead of two contiguous swaps, you can merge the condition
491+ * bits and do a single swap.
492+ *
493+ * k[i] k[i-1] Outcome
494+ * 0 0 No Swap
495+ * 0 1 Swap
496+ * 1 0 Swap
497+ * 1 1 No Swap
498+ *
499+ * This is XOR. pbit tracks the previous bit of k.
500+ */
501+
502+ for (i = cardinality_bits - 1 ; i >= 0 ; i -- ) {
503+ kbit = BN_is_bit_set (k , i ) ^ pbit ;
504+ EC_POINT_CSWAP (kbit , r , s , group_top , Z_is_one );
505+ if (!EC_POINT_add (group , s , r , s , ctx ))
506+ goto err ;
507+ if (!EC_POINT_dbl (group , r , r , ctx ))
508+ goto err ;
509+ /*
510+ * pbit logic merges this cswap with that of the
511+ * next iteration
512+ */
513+ pbit ^= kbit ;
514+ }
515+ /* one final cswap to move the right value into r */
516+ EC_POINT_CSWAP (pbit , r , s , group_top , Z_is_one );
517+ #undef EC_POINT_CSWAP
518+
519+ ret = 1 ;
520+
521+ err :
522+ EC_POINT_free (s );
523+ BN_CTX_end (ctx );
524+ BN_CTX_free (new_ctx );
525+
526+ return ret ;
527+ }
528+
529+ #undef EC_POINT_BN_set_flags
530+
313531/*
314532 * TODO: table should be optimised for the wNAF-based implementation,
315533 * sometimes smaller windows will give better performance (thus the
@@ -369,6 +587,34 @@ int ec_wNAF_mul(const EC_GROUP *group, EC_POINT *r, const BIGNUM *scalar,
369587 return EC_POINT_set_to_infinity (group , r );
370588 }
371589
590+ if (!BN_is_zero (& group -> order ) && !BN_is_zero (& group -> cofactor )) {
591+ /*-
592+ * Handle the common cases where the scalar is secret, enforcing a constant
593+ * time scalar multiplication algorithm.
594+ */
595+ if ((scalar != NULL ) && (num == 0 )) {
596+ /*-
597+ * In this case we want to compute scalar * GeneratorPoint: this
598+ * codepath is reached most prominently by (ephemeral) key generation
599+ * of EC cryptosystems (i.e. ECDSA keygen and sign setup, ECDH
600+ * keygen/first half), where the scalar is always secret. This is why
601+ * we ignore if BN_FLG_CONSTTIME is actually set and we always call the
602+ * constant time version.
603+ */
604+ return ec_mul_consttime (group , r , scalar , NULL , ctx );
605+ }
606+ if ((scalar == NULL ) && (num == 1 )) {
607+ /*-
608+ * In this case we want to compute scalar * GenericPoint: this codepath
609+ * is reached most prominently by the second half of ECDH, where the
610+ * secret scalar is multiplied by the peer's public point. To protect
611+ * the secret scalar, we ignore if BN_FLG_CONSTTIME is actually set and
612+ * we always call the constant time version.
613+ */
614+ return ec_mul_consttime (group , r , scalars [0 ], points [0 ], ctx );
615+ }
616+ }
617+
372618 for (i = 0 ; i < num ; i ++ ) {
373619 if (group -> meth != points [i ]-> meth ) {
374620 ECerr (EC_F_EC_WNAF_MUL , EC_R_INCOMPATIBLE_OBJECTS );
0 commit comments