
; *******************************************************
; *                                                     *
; *     Turbo Pascal Runtime Library Version 6.0        *
; *     Real Square Root                                *
; *                                                     *
; *     Copyright (C) 1989-1992 Norbert Juffa           *
; *                                                     *
; *******************************************************

             TITLE   F48FSRT

             INCLUDE SE.ASM


CODE         SEGMENT BYTE PUBLIC

             ASSUME  CS:CODE

; Externals

             EXTRN   HaltError:NEAR

; Publics

             PUBLIC  RSqrt

;-------------------------------------------------------------------------------
; RSqrt computes the square root of it argument. For a negative argument,
; runtime error 207 is invoked. The routine uses a estimate-and-correct method
; similar to that used in the division routine. This algorithm is based on the
; longhand method taught in school. Since there can be no tie case in rounding
; when computing the square root, no guard and sticky flags are needed to round
; to nearest or even in compliance with the IEEE floating point standard.
;
; INPUT:     DX:BX:AX  argument
;
; OUTPUT:    DX:BX:AX  square root of argument
;
; DESTROYS:  AX,BX,CX,DX,SI,DI,Flags
;-------------------------------------------------------------------------------

ROOT_EXT     PROC    FAR
$zero_sqrt:  RET                       ; return argument
$sqrt_err:   MOV     AX, 0CFh          ; load error code
             JMP     HaltError         ; execute error handler
ROOT_EXT     ENDP

             ALIGN   4

RSqrt        PROC    FAR
             OR      AL, AL            ; argument zero ?
             JZ      $zero_sqrt        ; yes, return zero
             OR      DH, DH            ; argument negative ?
             JS      $sqrt_err         ; yes, error
             PUSH    BP                ; save TURBO-Pascal frame pointer
             SHR     AL, 1             ; divide biased exponent by 2
             SBB     CL, CL            ; CL = 0, if expo even, else CL = -1
             ADC     AL, 40h           ; make new biased exponent
             PUSH    AX                ; save new exponent
             XOR     AL, AL            ; clear LSB of a3
             OR      DH, 80h           ; set hidden bit
             NEG     CL                ; CL = 0, if expo even, else CL = 1
             SHR     DX, CL            ; divide argument
             RCR     BX, CL            ;  by 2 if
             RCR     AX, CL            ;   expo odd
             XCHG    AX, CX            ; argument in DX:BX:CX
             MOV     SI, DX            ; save a1
             MOV     DI, BX            ; save a2
             MOV     BP, CX            ; save a3
             MOV     BH, DH            ; get MSB of a1
             STC                       ; generate 4 bit approximation
             RCR     BH, 1             ;  in hi-nibble of BH
             NEG     AL                ; adjust approximation (subtract 10)
             AND     AL, 10            ;  for arguments
             SUB     BH, AL            ;   between 4000 0000 00 and 7FFF FFFF FF
             MOV     AL, 0FFh          ; default quotient = FFh
             CMP     DH, BH            ; division overflow ?
             JNB     $no_div0          ; yes, use default quotient
             MOV     AX, DX            ; get a1
             DIV     BH                ; compute a1 / approximation
$no_div0:    ADD     BH, AL            ; add result to approximation
             RCR     BH, 1             ;  and divide by 2 yields 8 bit approx.
             MOV     BL, 0FFh          ; BX >= Trunc(Sqrt(a1))
             MOV     AX, 0FFFFh        ; default quotient
             CMP     DX, BX            ; division overflow ?
             JNB     $no_div1          ; yes, use default quotient
             MOV     AX, DI            ; get a2
             DIV     BX                ; compute a1:a2 / approximation
$no_div1:    ADD     AX, BX            ; add result to approximation
             RCR     AX, 1             ;  and divide by 2 yields 16 bit approx.
             MOV     BX, AX            ; save q1
             MUL     AX                ; DX:AX = q1*q1
             SUB     DI, AX            ; compute
             SBB     SI, DX            ;  remainder (SI:DI)
             JNC     $quot1_ok         ; remainder must be positive

             ALIGN   4

$corr_quot1: DEC     BX                ; try next smaller quotient q1
             STC                       ; adjust
             ADC     DI, BX            ;  remainder
             ADC     SI, 0             ;   to new
             ADD     DI, BX            ;     value
             ADC     SI, 0             ;      of quotient
             JS      $corr_quot1       ; until remainder becomes positive
$quot1_ok:   XCHG    AX, CX            ; concat rest of argument to remainder
             MOV     DX, DI            ; get remainder lo-word
             SHR     SI, 1             ; divide remainder (from here SI=0 !!!)
             RCR     DX, 1             ;  by two so it fits into 32 bits
             RCR     AX, 1             ; DX:AX = remainder / 2
             MOV     CX, 0FFFFh        ; load default quotient
             CMP     DX, BX            ; division overflow ?
             JNB     $no_div2          ; yes, skip division and use default quot
             DIV     BX                ; AX = q2
             XCHG    AX, CX            ; BX:CX = q1:q2
$no_div2:    MOV     AX, CX            ; get q2
             MUL     BX                ; q1*q2
             SUB     BP, AX            ; subtract product
             SBB     DI, DX            ;  from
             SUB     BP, AX            ;   old
             SBB     DI, DX            ;    remainder
             MOV     AX, CX            ; get q2
             MUL     AX                ; q2*q2
             NEG     AX                ; compute
             SBB     BP, DX            ;   new
             SBB     DI, SI            ;    remainder (DI:BP:AX)
             JNS     $quot2_ok         ; remainder must be positive
$corr_quot2: DEC     CX                ; try next smaller value for q2
             STC                       ; adjust
             ADC     AX, CX            ;  value
             ADC     BP, BX            ;   of remainder
             ADC     DI, SI            ;    according
             ADD     AX, CX            ;     to changed
             ADC     BP, BX            ;      value
             ADC     DI, SI            ;       of q2
             JS      $corr_quot2       ; until remainder positive
$quot2_ok:   MOV     SI, AX            ; DI:BP:SI = remainder, BX:CX = quot
             MOV     DX, BP            ; divide
             SHR     DI, 1             ;  remainder by two and
             RCR     DX, 1             ;   stuff 32 most significant bits
             RCR     AX, 1             ;    into DX:AX
             MOV     DI, 0FFFFh        ; load default quotient q3
             CMP     DX, BX            ; division overflow ?
             JNB     $no_div3          ; yes, use default quotient and skip div.
             DIV     BX                ; AX = q3
             XCHG    AX, DI            ; BX:CX:DI = q1:q2:q3, BP:SI:0:0 = rem
$no_div3:    MOV     AX, DI            ; get q3
             MUL     BX                ; q1*q3
             SUB     SI, AX            ; subtract
             SBB     BP, DX            ;  2*q1*q3
             SUB     SI, AX            ;   from
             SBB     BP, DX            ;    remainder
             MOV     AX, DI            ; get q3
             MUL     CX                ; q2*q3
             PUSH    BX                ; save q1
             XOR     BX, BX            ; BP:SI:BX:0 = remainder
             SUB     BX, AX            ; subtract
             SBB     SI, DX            ;  2*q2*q3
             SBB     BP, 0             ;   from
             SUB     BX, AX            ;    remainder
             SBB     SI, DX            ;     "
             SBB     BP, 0             ;      "
             MOV     AX, DI            ; get q3
             MUL     AX                ; q3*q3
             NEG     AX                ; subtract
             SBB     BX, DX            ;  q3*q3
             SBB     SI, 0             ;   from remainder
             SBB     BP, 0             ; BP:SI:BX:AX = remainder
             POP     DX                ; DX:CX:DI = q1:q2:q3
             JNS     $quot3_ok         ; remainder must be positive
$corr_quot3: DEC     DI                ; try next smaller value for q3
             STC                       ; adjust
             ADC     AX, DI            ;  value
             ADC     BX, CX            ;   of
             ADC     SI, DX            ;    remainder
             ADC     BP, 0             ;     according
             ADD     AX, DI            ;      to
             ADC     BX, CX            ;       changed
             ADC     SI, DX            ;        value of
             ADC     BP, 0             ;         quotient
             JS      $corr_quot3       ; until remainder positive
$quot3_ok:   XCHG    AX, DI            ; get result
             MOV     BX, CX            ;  into DX:BX:AX
             ADD     AX, 80h           ; round result
             ADC     BX, 0             ;  (no tie case and
             ADC     DX, 0             ;   no mantissa overflow possible)
             POP     CX                ; get saved exponent
             MOV     AL, CL            ; stuff exponent into result
             AND     DH, 7Fh           ; clear sign bit
             POP     BP                ; restore TURBO-Pascal frame pointer
             RET                       ; done
RSqrt        ENDP

             ALIGN   4

CODE         ENDS

             END
